@alexkroman1/aai 1.0.6 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. package/.turbo/turbo-build.log +11 -11
  2. package/CHANGELOG.md +22 -0
  3. package/dist/_internal-types-CoDTiBd1.js +61 -0
  4. package/dist/host/_mock-ws.d.ts +0 -24
  5. package/dist/host/runtime-barrel.d.ts +0 -1
  6. package/dist/host/runtime-barrel.js +55 -5
  7. package/dist/host/runtime.d.ts +2 -0
  8. package/dist/host/tool-executor.d.ts +1 -0
  9. package/dist/host/ws-handler.d.ts +2 -0
  10. package/dist/index.d.ts +1 -0
  11. package/dist/index.js +90 -1
  12. package/dist/sdk/allowed-hosts.d.ts +34 -0
  13. package/dist/sdk/manifest-barrel.d.ts +3 -5
  14. package/dist/sdk/manifest-barrel.js +2 -52
  15. package/dist/sdk/manifest.d.ts +2 -0
  16. package/dist/sdk/protocol.d.ts +11 -28
  17. package/dist/sdk/protocol.js +6 -3
  18. package/dist/sdk/types.d.ts +2 -0
  19. package/host/_mock-ws.ts +0 -50
  20. package/host/_test-utils.ts +1 -0
  21. package/host/runtime-barrel.ts +0 -1
  22. package/host/runtime.ts +13 -1
  23. package/host/session-ctx.test.ts +387 -0
  24. package/host/session-fixture-replay.test.ts +2 -10
  25. package/host/session.test.ts +19 -41
  26. package/host/tool-executor.test.ts +36 -0
  27. package/host/tool-executor.ts +4 -0
  28. package/host/ws-handler.ts +3 -0
  29. package/index.ts +1 -0
  30. package/package.json +1 -1
  31. package/sdk/__snapshots__/exports.test.ts.snap +79 -0
  32. package/sdk/__snapshots__/schema-shapes.test.ts.snap +187 -0
  33. package/sdk/_test-matchers.test.ts +75 -0
  34. package/sdk/_test-matchers.ts +73 -0
  35. package/sdk/allowed-hosts.test.ts +236 -0
  36. package/sdk/allowed-hosts.ts +113 -0
  37. package/sdk/exports.test.ts +31 -0
  38. package/sdk/manifest-barrel.ts +13 -7
  39. package/sdk/manifest.test.ts +103 -2
  40. package/sdk/manifest.ts +19 -0
  41. package/sdk/protocol-compat.test.ts +0 -6
  42. package/sdk/protocol-snapshot.test.ts +7 -5
  43. package/sdk/protocol.test.ts +107 -21
  44. package/sdk/protocol.ts +7 -15
  45. package/sdk/schema-alignment.test.ts +1 -27
  46. package/sdk/schema-shapes.test.ts +103 -0
  47. package/sdk/tsconfig.json +1 -1
  48. package/sdk/types.test.ts +56 -1
  49. package/sdk/types.ts +2 -0
  50. package/sdk/ws-upgrade.test.ts +8 -8
  51. package/tsconfig.build.json +8 -1
  52. package/tsconfig.json +1 -1
  53. package/vitest.config.ts +1 -0
  54. package/dist/system-prompt-nik_iavo.js +0 -92
@@ -116,6 +116,11 @@ const ClientEventSchema = z.discriminatedUnion("type", [
116
116
  type: z.literal("error"),
117
117
  code: SessionErrorCodeSchema,
118
118
  message: z.string()
119
+ }),
120
+ z.object({
121
+ type: z.literal("custom_event"),
122
+ event: z.string().min(1),
123
+ data: z.unknown()
119
124
  })
120
125
  ]);
121
126
  /** Zod schema for {@link ReadyConfig}. */
@@ -157,7 +162,5 @@ function buildReadyConfig(s2sConfig) {
157
162
  ttsSampleRate: s2sConfig.outputSampleRate
158
163
  };
159
164
  }
160
- /** Zod schema for {@link TurnConfig}. */
161
- const TurnConfigSchema = z.object({ maxSteps: z.number().int().positive().optional() });
162
165
  //#endregion
163
- export { AUDIO_FORMAT, ClientEventSchema, ClientMessageSchema, KvDelSchema, KvGetSchema, KvRequestSchema, KvSetSchema, MessageEnvelopeSchema, ReadyConfigSchema, ServerMessageSchema, SessionErrorCodeSchema, TurnConfigSchema, buildReadyConfig, lenientParse };
166
+ export { ClientEventSchema, ClientMessageSchema, KvDelSchema, KvGetSchema, KvRequestSchema, KvSetSchema, ReadyConfigSchema, ServerMessageSchema, SessionErrorCodeSchema, buildReadyConfig, lenientParse };
@@ -77,6 +77,8 @@ export type ToolContext<S = Record<string, unknown>> = {
77
77
  messages: readonly Message[];
78
78
  /** Unique identifier for the current session. Useful for correlating logs across concurrent sessions. */
79
79
  sessionId: string;
80
+ /** Push a custom event to the connected browser client. Fire-and-forget. */
81
+ send(event: string, data: unknown): void;
80
82
  };
81
83
  /**
82
84
  * Definition of a custom tool that the agent can invoke.
package/host/_mock-ws.ts CHANGED
@@ -133,53 +133,3 @@ export class MockWebSocket extends EventTarget {
133
133
  return this.sent.filter((d): d is string => typeof d === "string").map((s) => JSON.parse(s));
134
134
  }
135
135
  }
136
-
137
- const g: { WebSocket: unknown } = globalThis;
138
-
139
- /**
140
- * Replace `globalThis.WebSocket` with {@link MockWebSocket} for testing.
141
- *
142
- * Returns a handle that tracks all created mock sockets and can restore the
143
- * original `WebSocket` constructor. Supports the `using` declaration via
144
- * `Symbol.dispose` for automatic cleanup.
145
- *
146
- * @returns An object with `created` array, `lastWs` getter, `restore()`, and `[Symbol.dispose]()`.
147
- *
148
- * @example
149
- * ```ts
150
- * using mock = installMockWebSocket();
151
- * const session = new Session("wss://example.com");
152
- * const ws = mock.lastWs!;
153
- * ws.simulateMessage(JSON.stringify({ type: "ready" }));
154
- * // mock automatically restores WebSocket when disposed
155
- * ```
156
- */
157
- export function installMockWebSocket(): {
158
- restore: () => void;
159
- created: MockWebSocket[];
160
- get lastWs(): MockWebSocket | null;
161
- [Symbol.dispose]: () => void;
162
- } {
163
- const saved = globalThis.WebSocket;
164
- const created: MockWebSocket[] = [];
165
-
166
- g.WebSocket = class extends MockWebSocket {
167
- constructor(url: string | URL, protocols?: string | string[] | Record<string, unknown>) {
168
- super(url, protocols);
169
- created.push(this);
170
- }
171
- };
172
-
173
- return {
174
- created,
175
- get lastWs() {
176
- return created.at(-1) ?? null;
177
- },
178
- restore() {
179
- globalThis.WebSocket = saved;
180
- },
181
- [Symbol.dispose]() {
182
- this.restore();
183
- },
184
- };
185
- }
@@ -25,6 +25,7 @@ export function createMockToolContext(overrides?: Partial<ToolContext>): ToolCon
25
25
  kv: {} as never,
26
26
  messages: [],
27
27
  sessionId: "test-session",
28
+ send: vi.fn(),
28
29
  ...overrides,
29
30
  };
30
31
  }
@@ -16,7 +16,6 @@
16
16
  export * from "./builtin-tools.ts";
17
17
  export * from "./runtime.ts";
18
18
  export * from "./runtime-config.ts";
19
- export * from "./s2s.ts";
20
19
  export * from "./server.ts";
21
20
  export * from "./session.ts";
22
21
  export * from "./session-ctx.ts";
package/host/runtime.ts CHANGED
@@ -36,6 +36,8 @@ export type SessionStartOptions = {
36
36
  onClose?: () => void;
37
37
  /** Called with session ID after session cleanup, for guest state cleanup. */
38
38
  onSessionEnd?: (sessionId: string) => void;
39
+ /** Called with session ID and client sink after session setup. Used by sandbox to route custom events. */
40
+ onSinkCreated?: (sessionId: string, sink: ClientSink) => void;
39
41
  };
40
42
 
41
43
  /**
@@ -160,6 +162,7 @@ export function createRuntime(opts: RuntimeOptions): Runtime {
160
162
  } = opts;
161
163
  const agentConfig = toAgentConfig(agent);
162
164
  const sessions = new Map<string, Session>();
165
+ const sinkMap = new Map<string, ClientSink>();
163
166
  const readyConfig: ReadyConfig = buildReadyConfig(s2sConfig);
164
167
 
165
168
  // When overrides are provided (sandbox mode), skip in-process tool setup
@@ -216,6 +219,7 @@ export function createRuntime(opts: RuntimeOptions): Runtime {
216
219
  executeTool = async (name, args, sessionId, messages) => {
217
220
  const tool = allTools[name];
218
221
  if (!tool) return toolError(`Unknown tool: ${name}`);
222
+ const sink = sinkMap.get(sessionId ?? "");
219
223
  return executeToolCall(name, args, {
220
224
  tool,
221
225
  env: frozenEnv,
@@ -224,6 +228,7 @@ export function createRuntime(opts: RuntimeOptions): Runtime {
224
228
  kv,
225
229
  messages,
226
230
  logger,
231
+ send: sink ? (event, data) => sink.event({ type: "custom_event", event, data }) : undefined,
227
232
  });
228
233
  };
229
234
  }
@@ -235,6 +240,7 @@ export function createRuntime(opts: RuntimeOptions): Runtime {
235
240
  skipGreeting?: boolean;
236
241
  resumeFrom?: string;
237
242
  }): Session {
243
+ sinkMap.set(sessionOpts.id, sessionOpts.client);
238
244
  const apiKey = env.ASSEMBLYAI_API_KEY ?? "";
239
245
  return createS2sSession({
240
246
  id: sessionOpts.id,
@@ -257,6 +263,7 @@ export function createRuntime(opts: RuntimeOptions): Runtime {
257
263
 
258
264
  function startSession(ws: SessionWebSocket, startOpts?: SessionStartOptions): void {
259
265
  const resumeFrom = startOpts?.resumeFrom;
266
+ const userOnSessionEnd = startOpts?.onSessionEnd;
260
267
  wireSessionSocket(ws, {
261
268
  sessions,
262
269
  createSession: (sid, client) =>
@@ -272,7 +279,11 @@ export function createRuntime(opts: RuntimeOptions): Runtime {
272
279
  ...(startOpts?.logContext ? { logContext: startOpts.logContext } : {}),
273
280
  ...(startOpts?.onOpen ? { onOpen: startOpts.onOpen } : {}),
274
281
  ...(startOpts?.onClose ? { onClose: startOpts.onClose } : {}),
275
- ...(startOpts?.onSessionEnd ? { onSessionEnd: startOpts.onSessionEnd } : {}),
282
+ ...(startOpts?.onSinkCreated ? { onSinkCreated: startOpts.onSinkCreated } : {}),
283
+ onSessionEnd: (sid) => {
284
+ sinkMap.delete(sid);
285
+ userOnSessionEnd?.(sid);
286
+ },
276
287
  ...(sessionStartTimeoutMs !== undefined ? { sessionStartTimeoutMs } : {}),
277
288
  ...(resumeFrom ? { resumeFrom } : {}),
278
289
  });
@@ -295,6 +306,7 @@ export function createRuntime(opts: RuntimeOptions): Runtime {
295
306
  );
296
307
  }
297
308
  sessions.clear();
309
+ sinkMap.clear();
298
310
  }
299
311
 
300
312
  return {
@@ -0,0 +1,387 @@
1
+ // Copyright 2025 the AAI authors. MIT license.
2
+
3
+ import { describe, expect, it, vi } from "vitest";
4
+ import { DEFAULT_MAX_HISTORY } from "../sdk/constants.ts";
5
+ import type { Message } from "../sdk/types.ts";
6
+ import { toolError } from "../sdk/utils.ts";
7
+ import { flush, makeClient, makeConfig, silentLogger } from "./_test-utils.ts";
8
+ import { buildCtx } from "./session-ctx.ts";
9
+
10
+ function makeBuildCtxOpts(overrides?: Record<string, unknown>) {
11
+ return {
12
+ id: "session-1",
13
+ agent: "test-agent",
14
+ client: makeClient(),
15
+ agentConfig: makeConfig({ maxSteps: 3 }),
16
+ executeTool: vi.fn(async () => "ok"),
17
+ log: silentLogger,
18
+ ...overrides,
19
+ };
20
+ }
21
+
22
+ describe("buildCtx", () => {
23
+ it("returns ctx with the correct session id", () => {
24
+ const ctx = buildCtx(makeBuildCtxOpts({ id: "my-session" }));
25
+ expect(ctx.id).toBe("my-session");
26
+ });
27
+
28
+ it("returns ctx with the correct agent name", () => {
29
+ const ctx = buildCtx(makeBuildCtxOpts({ agent: "my-agent" }));
30
+ expect(ctx.agent).toBe("my-agent");
31
+ });
32
+
33
+ it("initializes with empty conversation messages", () => {
34
+ const ctx = buildCtx(makeBuildCtxOpts());
35
+ expect(ctx.conversationMessages).toEqual([]);
36
+ });
37
+
38
+ it("initializes with null s2s handle", () => {
39
+ const ctx = buildCtx(makeBuildCtxOpts());
40
+ expect(ctx.s2s).toBeNull();
41
+ });
42
+
43
+ it("initializes with null turnPromise", () => {
44
+ const ctx = buildCtx(makeBuildCtxOpts());
45
+ expect(ctx.turnPromise).toBeNull();
46
+ });
47
+
48
+ it("initializes reply state with empty pendingTools, zero toolCallCount, and null replyId", () => {
49
+ const ctx = buildCtx(makeBuildCtxOpts());
50
+ expect(ctx.reply).toEqual({
51
+ pendingTools: [],
52
+ toolCallCount: 0,
53
+ currentReplyId: null,
54
+ });
55
+ });
56
+
57
+ it("defaults maxHistory to DEFAULT_MAX_HISTORY when not provided", () => {
58
+ const ctx = buildCtx(makeBuildCtxOpts());
59
+ expect(ctx.maxHistory).toBe(DEFAULT_MAX_HISTORY);
60
+ });
61
+
62
+ it("uses custom maxHistory when provided", () => {
63
+ const ctx = buildCtx(makeBuildCtxOpts({ maxHistory: 50 }));
64
+ expect(ctx.maxHistory).toBe(50);
65
+ });
66
+
67
+ it("passes through the agentConfig, executeTool, and log dependencies", () => {
68
+ const config = makeConfig({ maxSteps: 7 });
69
+ const executeTool = vi.fn(async () => "done");
70
+ const ctx = buildCtx(makeBuildCtxOpts({ agentConfig: config, executeTool }));
71
+ expect(ctx.agentConfig).toBe(config);
72
+ expect(ctx.executeTool).toBe(executeTool);
73
+ expect(ctx.log).toBe(silentLogger);
74
+ });
75
+ });
76
+
77
+ describe("consumeToolCallStep", () => {
78
+ it("returns null (success) when tool call is within maxSteps", () => {
79
+ const ctx = buildCtx(makeBuildCtxOpts());
80
+ ctx.beginReply("reply-1");
81
+ const result = ctx.consumeToolCallStep("my-tool", "reply-1");
82
+ expect(result).toBeNull();
83
+ });
84
+
85
+ it("increments toolCallCount on each call", () => {
86
+ const ctx = buildCtx(makeBuildCtxOpts());
87
+ ctx.beginReply("reply-1");
88
+
89
+ ctx.consumeToolCallStep("tool-a", "reply-1");
90
+ expect(ctx.reply.toolCallCount).toBe(1);
91
+
92
+ ctx.consumeToolCallStep("tool-b", "reply-1");
93
+ expect(ctx.reply.toolCallCount).toBe(2);
94
+ });
95
+
96
+ it("allows exactly maxSteps tool calls", () => {
97
+ const ctx = buildCtx(makeBuildCtxOpts({ agentConfig: makeConfig({ maxSteps: 2 }) }));
98
+ ctx.beginReply("reply-1");
99
+
100
+ expect(ctx.consumeToolCallStep("tool-1", "reply-1")).toBeNull();
101
+ expect(ctx.consumeToolCallStep("tool-2", "reply-1")).toBeNull();
102
+ });
103
+
104
+ it("rejects when tool call count exceeds maxSteps", () => {
105
+ const ctx = buildCtx(makeBuildCtxOpts({ agentConfig: makeConfig({ maxSteps: 2 }) }));
106
+ ctx.beginReply("reply-1");
107
+
108
+ ctx.consumeToolCallStep("tool-1", "reply-1");
109
+ ctx.consumeToolCallStep("tool-2", "reply-1");
110
+ const result = ctx.consumeToolCallStep("tool-3", "reply-1");
111
+ expect(result).toBe(toolError("Maximum tool steps reached. Please respond to the user now."));
112
+ });
113
+
114
+ it("logs when maxSteps is exceeded", () => {
115
+ const log = { info: vi.fn(), warn: vi.fn(), error: vi.fn(), debug: vi.fn() };
116
+ const ctx = buildCtx(makeBuildCtxOpts({ agentConfig: makeConfig({ maxSteps: 1 }), log }));
117
+ ctx.beginReply("reply-1");
118
+
119
+ ctx.consumeToolCallStep("tool-1", "reply-1"); // ok
120
+ ctx.consumeToolCallStep("tool-2", "reply-1"); // exceeds
121
+
122
+ expect(log.info).toHaveBeenCalledWith("maxSteps exceeded, refusing tool call", {
123
+ toolCallCount: 2,
124
+ maxSteps: 1,
125
+ });
126
+ });
127
+
128
+ it("rejects with stale replyId (mismatched)", () => {
129
+ const ctx = buildCtx(makeBuildCtxOpts());
130
+ ctx.beginReply("reply-1");
131
+
132
+ const result = ctx.consumeToolCallStep("my-tool", "stale-reply");
133
+ expect(result).toBe(toolError("Reply was interrupted. Discarding stale tool call."));
134
+ });
135
+
136
+ it("rejects when replyId is null", () => {
137
+ const ctx = buildCtx(makeBuildCtxOpts());
138
+ ctx.beginReply("reply-1");
139
+
140
+ const result = ctx.consumeToolCallStep("my-tool", null);
141
+ expect(result).toBe(toolError("Reply was interrupted. Discarding stale tool call."));
142
+ });
143
+
144
+ it("rejects when no reply has been started (currentReplyId is null)", () => {
145
+ const ctx = buildCtx(makeBuildCtxOpts());
146
+ // No beginReply — currentReplyId stays null
147
+ const result = ctx.consumeToolCallStep("my-tool", "some-reply");
148
+ expect(result).toBe(toolError("Reply was interrupted. Discarding stale tool call."));
149
+ });
150
+
151
+ it("allows unlimited tool calls when maxSteps is undefined", () => {
152
+ const ctx = buildCtx(makeBuildCtxOpts({ agentConfig: makeConfig() }));
153
+ ctx.beginReply("reply-1");
154
+
155
+ // makeConfig() without maxSteps leaves it undefined
156
+ for (let i = 0; i < 100; i++) {
157
+ expect(ctx.consumeToolCallStep(`tool-${i}`, "reply-1")).toBeNull();
158
+ }
159
+ });
160
+ });
161
+
162
+ describe("pushMessages", () => {
163
+ it("appends messages to conversationMessages", () => {
164
+ const ctx = buildCtx(makeBuildCtxOpts());
165
+ const msg1: Message = { role: "user", content: "hello" };
166
+ const msg2: Message = { role: "assistant", content: "hi" };
167
+
168
+ ctx.pushMessages(msg1);
169
+ expect(ctx.conversationMessages).toEqual([msg1]);
170
+
171
+ ctx.pushMessages(msg2);
172
+ expect(ctx.conversationMessages).toEqual([msg1, msg2]);
173
+ });
174
+
175
+ it("accepts multiple messages at once", () => {
176
+ const ctx = buildCtx(makeBuildCtxOpts());
177
+ const msg1: Message = { role: "user", content: "a" };
178
+ const msg2: Message = { role: "assistant", content: "b" };
179
+ const msg3: Message = { role: "tool", content: "c" };
180
+
181
+ ctx.pushMessages(msg1, msg2, msg3);
182
+ expect(ctx.conversationMessages).toEqual([msg1, msg2, msg3]);
183
+ });
184
+
185
+ it("trims to maxHistory keeping the most recent messages", () => {
186
+ const ctx = buildCtx(makeBuildCtxOpts({ maxHistory: 3 }));
187
+
188
+ ctx.pushMessages(
189
+ { role: "user", content: "1" },
190
+ { role: "assistant", content: "2" },
191
+ { role: "user", content: "3" },
192
+ );
193
+ expect(ctx.conversationMessages).toHaveLength(3);
194
+
195
+ ctx.pushMessages({ role: "assistant", content: "4" });
196
+ expect(ctx.conversationMessages).toHaveLength(3);
197
+ expect(ctx.conversationMessages.map((m) => m.content)).toEqual(["2", "3", "4"]);
198
+ });
199
+
200
+ it("trims correctly when pushing multiple messages that exceed maxHistory", () => {
201
+ const ctx = buildCtx(makeBuildCtxOpts({ maxHistory: 2 }));
202
+
203
+ ctx.pushMessages(
204
+ { role: "user", content: "a" },
205
+ { role: "assistant", content: "b" },
206
+ { role: "user", content: "c" },
207
+ { role: "assistant", content: "d" },
208
+ );
209
+
210
+ expect(ctx.conversationMessages).toHaveLength(2);
211
+ expect(ctx.conversationMessages.map((m) => m.content)).toEqual(["c", "d"]);
212
+ });
213
+
214
+ it("does not trim when maxHistory is 0 (disabled)", () => {
215
+ const ctx = buildCtx(makeBuildCtxOpts({ maxHistory: 0 }));
216
+
217
+ for (let i = 0; i < 300; i++) {
218
+ ctx.pushMessages({ role: "user", content: `msg-${i}` });
219
+ }
220
+ expect(ctx.conversationMessages).toHaveLength(300);
221
+ });
222
+ });
223
+
224
+ describe("cancelReply", () => {
225
+ it("resets pendingTools and toolCallCount", () => {
226
+ const ctx = buildCtx(makeBuildCtxOpts());
227
+ ctx.beginReply("reply-1");
228
+ ctx.consumeToolCallStep("tool-1", "reply-1");
229
+ ctx.reply.pendingTools.push({ callId: "c1", result: "r1" });
230
+
231
+ expect(ctx.reply.toolCallCount).toBe(1);
232
+ expect(ctx.reply.pendingTools).toHaveLength(1);
233
+
234
+ ctx.cancelReply();
235
+
236
+ expect(ctx.reply.toolCallCount).toBe(0);
237
+ expect(ctx.reply.pendingTools).toEqual([]);
238
+ expect(ctx.reply.currentReplyId).toBeNull();
239
+ });
240
+
241
+ it("allows a new reply to start fresh after cancel", () => {
242
+ const ctx = buildCtx(makeBuildCtxOpts({ agentConfig: makeConfig({ maxSteps: 1 }) }));
243
+ ctx.beginReply("reply-1");
244
+ ctx.consumeToolCallStep("tool-1", "reply-1"); // uses the single step
245
+
246
+ ctx.cancelReply();
247
+ ctx.beginReply("reply-2");
248
+
249
+ // Should succeed because toolCallCount was reset
250
+ const result = ctx.consumeToolCallStep("tool-1", "reply-2");
251
+ expect(result).toBeNull();
252
+ });
253
+ });
254
+
255
+ describe("beginReply", () => {
256
+ it("resets reply state with the given replyId", () => {
257
+ const ctx = buildCtx(makeBuildCtxOpts());
258
+ ctx.beginReply("reply-1");
259
+
260
+ expect(ctx.reply.currentReplyId).toBe("reply-1");
261
+ expect(ctx.reply.pendingTools).toEqual([]);
262
+ expect(ctx.reply.toolCallCount).toBe(0);
263
+ });
264
+
265
+ it("clears turnPromise", () => {
266
+ const ctx = buildCtx(makeBuildCtxOpts());
267
+ ctx.chainTurn(Promise.resolve());
268
+ expect(ctx.turnPromise).not.toBeNull();
269
+
270
+ ctx.beginReply("reply-1");
271
+ expect(ctx.turnPromise).toBeNull();
272
+ });
273
+
274
+ it("resets toolCallCount from a previous reply", () => {
275
+ const ctx = buildCtx(makeBuildCtxOpts({ agentConfig: makeConfig({ maxSteps: 2 }) }));
276
+ ctx.beginReply("reply-1");
277
+ ctx.consumeToolCallStep("tool-a", "reply-1");
278
+ ctx.consumeToolCallStep("tool-b", "reply-1");
279
+ expect(ctx.reply.toolCallCount).toBe(2);
280
+
281
+ ctx.beginReply("reply-2");
282
+ expect(ctx.reply.toolCallCount).toBe(0);
283
+
284
+ // Can now use maxSteps again
285
+ expect(ctx.consumeToolCallStep("tool-a", "reply-2")).toBeNull();
286
+ expect(ctx.consumeToolCallStep("tool-b", "reply-2")).toBeNull();
287
+ });
288
+
289
+ it("invalidates tool calls from the previous reply", () => {
290
+ const ctx = buildCtx(makeBuildCtxOpts());
291
+ ctx.beginReply("reply-1");
292
+ ctx.beginReply("reply-2");
293
+
294
+ // Tool call using old replyId should be rejected
295
+ const result = ctx.consumeToolCallStep("my-tool", "reply-1");
296
+ expect(result).toBe(toolError("Reply was interrupted. Discarding stale tool call."));
297
+ });
298
+ });
299
+
300
+ describe("chainTurn", () => {
301
+ it("sets turnPromise on first call", () => {
302
+ const ctx = buildCtx(makeBuildCtxOpts());
303
+ expect(ctx.turnPromise).toBeNull();
304
+
305
+ ctx.chainTurn(Promise.resolve());
306
+ expect(ctx.turnPromise).not.toBeNull();
307
+ });
308
+
309
+ it("chains promises sequentially", async () => {
310
+ const ctx = buildCtx(makeBuildCtxOpts());
311
+ const order: number[] = [];
312
+
313
+ ctx.chainTurn(
314
+ new Promise<void>((resolve) => {
315
+ queueMicrotask(() => {
316
+ order.push(1);
317
+ resolve();
318
+ });
319
+ }),
320
+ );
321
+
322
+ ctx.chainTurn(
323
+ new Promise<void>((resolve) => {
324
+ queueMicrotask(() => {
325
+ order.push(2);
326
+ resolve();
327
+ });
328
+ }),
329
+ );
330
+
331
+ await ctx.turnPromise;
332
+ await flush();
333
+ expect(order).toEqual([1, 2]);
334
+ });
335
+
336
+ it("continues the chain even if a prior turn rejects", async () => {
337
+ const ctx = buildCtx(makeBuildCtxOpts());
338
+ const order: string[] = [];
339
+
340
+ ctx.chainTurn(
341
+ new Promise<void>((_, reject) => {
342
+ queueMicrotask(() => {
343
+ order.push("fail");
344
+ reject(new Error("boom"));
345
+ });
346
+ }),
347
+ );
348
+
349
+ ctx.chainTurn(
350
+ new Promise<void>((resolve) => {
351
+ queueMicrotask(() => {
352
+ order.push("success");
353
+ resolve();
354
+ });
355
+ }),
356
+ );
357
+
358
+ // The chain uses .then() which means rejection propagates.
359
+ // We need to catch the final promise to avoid unhandled rejection.
360
+ try {
361
+ await ctx.turnPromise;
362
+ } catch {
363
+ // expected
364
+ }
365
+ await flush();
366
+
367
+ expect(order).toContain("fail");
368
+ });
369
+
370
+ it("allows awaiting turnPromise to wait for all chained turns", async () => {
371
+ const ctx = buildCtx(makeBuildCtxOpts());
372
+ let completed = false;
373
+
374
+ ctx.chainTurn(
375
+ new Promise<void>((resolve) => {
376
+ setTimeout(() => {
377
+ completed = true;
378
+ resolve();
379
+ }, 10);
380
+ }),
381
+ );
382
+
383
+ expect(completed).toBe(false);
384
+ await ctx.turnPromise;
385
+ expect(completed).toBe(true);
386
+ });
387
+ });
@@ -83,16 +83,8 @@ describe("fixture replay through session", () => {
83
83
  );
84
84
 
85
85
  // Client received tool_call and tool_call_done events
86
- const toolStart = client.events.find((e) => (e as { type: string }).type === "tool_call") as
87
- | { toolName: string; args: Record<string, unknown> }
88
- | undefined;
89
- expect(toolStart).toBeDefined();
90
- expect(toolStart?.toolName).toBe("get_weather");
91
-
92
- const toolDone = client.events.find((e) => (e as { type: string }).type === "tool_call_done") as
93
- | { result: string }
94
- | undefined;
95
- expect(toolDone).toBeDefined();
86
+ expect(client.events).toContainEvent("tool_call", { toolName: "get_weather" });
87
+ expect(client.events).toContainEvent("tool_call_done");
96
88
 
97
89
  // Tool result was sent back to S2S after replyDone
98
90
  await vi.waitFor(() => expect(mockHandle.sendToolResult).toHaveBeenCalled());