@alexkroman1/aai 1.4.5 → 1.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +9 -9
- package/CHANGELOG.md +13 -0
- package/dist/assemblyai-C969QGi4.js +35 -0
- package/dist/cartesia-BfQPOQ7Y.js +37 -0
- package/dist/host/_pipeline-test-fakes.d.ts +3 -1
- package/dist/host/providers/stt/deepgram.d.ts +28 -0
- package/dist/host/providers/tts/cartesia.d.ts +1 -1
- package/dist/host/providers/tts/rime.d.ts +44 -0
- package/dist/host/runtime-barrel.d.ts +4 -2
- package/dist/host/runtime-barrel.js +1432 -1208
- package/dist/host/runtime.d.ts +2 -2
- package/dist/host/s2s.d.ts +16 -16
- package/dist/host/session-core.d.ts +37 -0
- package/dist/host/transports/pipeline-transport.d.ts +48 -0
- package/dist/host/transports/s2s-transport.d.ts +19 -0
- package/dist/host/transports/types.d.ts +45 -0
- package/dist/host/ws-handler.d.ts +14 -10
- package/dist/sdk/protocol.d.ts +6 -5
- package/dist/sdk/providers/llm-barrel.js +1 -1
- package/dist/sdk/providers/stt/deepgram.d.ts +35 -0
- package/dist/sdk/providers/stt-barrel.d.ts +1 -0
- package/dist/sdk/providers/stt-barrel.js +2 -2
- package/dist/sdk/providers/tts/cartesia.d.ts +12 -4
- package/dist/sdk/providers/tts/rime.d.ts +42 -0
- package/dist/sdk/providers/tts-barrel.d.ts +1 -0
- package/dist/sdk/providers/tts-barrel.js +2 -2
- package/host/_pipeline-test-fakes.ts +6 -3
- package/host/_test-utils.ts +209 -128
- package/host/cleanup.test.ts +25 -298
- package/host/integration/pipeline-reference.integration.test.ts +30 -35
- package/host/providers/resolve.ts +10 -2
- package/host/providers/stt/deepgram.test.ts +229 -0
- package/host/providers/stt/deepgram.ts +172 -0
- package/host/providers/tts/cartesia.ts +7 -3
- package/host/providers/tts/rime.test.ts +251 -0
- package/host/providers/tts/rime.ts +322 -0
- package/host/runtime-barrel.ts +4 -2
- package/host/runtime.test.ts +13 -46
- package/host/runtime.ts +131 -23
- package/host/s2s.test.ts +122 -131
- package/host/s2s.ts +44 -52
- package/host/session-core.test.ts +257 -0
- package/host/session-core.ts +262 -0
- package/host/transports/pipeline-transport.test.ts +651 -0
- package/host/transports/pipeline-transport.ts +532 -0
- package/host/{fixture-replay.test.ts → transports/s2s-transport-fixtures.test.ts} +76 -106
- package/host/transports/s2s-transport.test.ts +56 -0
- package/host/transports/s2s-transport.ts +116 -0
- package/host/transports/types.test.ts +22 -0
- package/host/transports/types.ts +51 -0
- package/host/ws-handler.test.ts +324 -242
- package/host/ws-handler.ts +56 -59
- package/package.json +2 -1
- package/sdk/__snapshots__/exports.test.ts.snap +3 -3
- package/sdk/protocol-compat.test.ts +8 -0
- package/sdk/protocol.ts +6 -5
- package/sdk/providers/stt/deepgram.ts +43 -0
- package/sdk/providers/stt-barrel.ts +2 -0
- package/sdk/providers/tts/cartesia.ts +15 -5
- package/sdk/providers/tts/rime.ts +52 -0
- package/sdk/providers/tts-barrel.ts +2 -0
- package/dist/assemblyai-Cxg9eobY.js +0 -18
- package/dist/cartesia-DwDk2tEu.js +0 -10
- package/dist/host/pipeline-session-ctx.d.ts +0 -24
- package/dist/host/pipeline-session.d.ts +0 -52
- package/dist/host/session-ctx.d.ts +0 -73
- package/dist/host/session.d.ts +0 -62
- package/host/pipeline-session-ctx.test.ts +0 -31
- package/host/pipeline-session-ctx.ts +0 -36
- package/host/pipeline-session.test.ts +0 -672
- package/host/pipeline-session.ts +0 -533
- package/host/s2s-fixtures.test.ts +0 -237
- package/host/session-ctx.test.ts +0 -387
- package/host/session-ctx.ts +0 -134
- package/host/session-fixture-replay.test.ts +0 -128
- package/host/session.test.ts +0 -634
- package/host/session.ts +0 -412
- /package/dist/{anthropic-BrUCPKUc.js → anthropic-CcLZygAr.js} +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
// Copyright 2025 the AAI authors. MIT license.
|
|
2
2
|
/**
|
|
3
|
-
* Fixture replay tests with a REAL Runtime.
|
|
3
|
+
* Fixture replay tests with a REAL Runtime — now wired to the transport layer.
|
|
4
4
|
*
|
|
5
5
|
* Replays recorded AssemblyAI S2S messages (from Kokoro TTS audio) through
|
|
6
6
|
* a real agent session — real tool execution, real Zod arg validation, real
|
|
@@ -9,12 +9,16 @@
|
|
|
9
9
|
* This exercises: AgentDef → toAgentConfig → tool schemas → Zod validation
|
|
10
10
|
* → executeToolCall → session orchestration (reply guards, tool buffering,
|
|
11
11
|
* turnPromise chaining, conversation history).
|
|
12
|
+
*
|
|
13
|
+
* Migrated from host/fixture-replay.test.ts (Task 19). Uses createFixtureSession
|
|
14
|
+
* which spies on s2s-transport.ts _internals.connectS2s and fires S2sCallbacks
|
|
15
|
+
* directly — no nanoevents / old S2sEvents system.
|
|
12
16
|
*/
|
|
13
17
|
|
|
14
18
|
import { afterEach, describe, expect, test, vi } from "vitest";
|
|
15
19
|
import { z } from "zod";
|
|
16
|
-
import type { AgentDef } from "
|
|
17
|
-
import { createFixtureSession, flush } from "
|
|
20
|
+
import type { AgentDef } from "../../sdk/types.ts";
|
|
21
|
+
import { createFixtureSession, flush } from "../_test-utils.ts";
|
|
18
22
|
|
|
19
23
|
// ─── Test agents with deterministic tools ────────────────────────────────────
|
|
20
24
|
|
|
@@ -67,7 +71,7 @@ const statefulAgent: AgentDef<{ callCount: number }> = {
|
|
|
67
71
|
|
|
68
72
|
// ─── Tests ───────────────────────────────────────────────────────────────────
|
|
69
73
|
|
|
70
|
-
describe("fixture replay with real executor", () => {
|
|
74
|
+
describe("fixture replay with real executor (transport layer)", () => {
|
|
71
75
|
let cleanup: () => void;
|
|
72
76
|
|
|
73
77
|
afterEach(() => {
|
|
@@ -79,15 +83,15 @@ describe("fixture replay with real executor", () => {
|
|
|
79
83
|
test("tool call fixture: Zod validates args, real tool executes, result sent to S2S", async () => {
|
|
80
84
|
const ctx = createFixtureSession(weatherAgent);
|
|
81
85
|
cleanup = ctx.cleanup;
|
|
82
|
-
await ctx.
|
|
86
|
+
await ctx.start();
|
|
83
87
|
|
|
84
88
|
ctx.replay("tool-call-sequence.json");
|
|
85
89
|
|
|
86
90
|
// Wait for the async tool execution pipeline to complete
|
|
87
|
-
await vi.waitFor(() => expect(ctx.
|
|
91
|
+
await vi.waitFor(() => expect(ctx.fakeHandle.sendToolResult).toHaveBeenCalled());
|
|
88
92
|
|
|
89
93
|
// Verify the real tool was called and produced correct output
|
|
90
|
-
const [callId, resultStr] = vi.mocked(ctx.
|
|
94
|
+
const [callId, resultStr] = vi.mocked(ctx.fakeHandle.sendToolResult).mock.calls[0] as [
|
|
91
95
|
string,
|
|
92
96
|
string,
|
|
93
97
|
];
|
|
@@ -101,34 +105,30 @@ describe("fixture replay with real executor", () => {
|
|
|
101
105
|
test("tool call fixture: client receives tool_call with validated args", async () => {
|
|
102
106
|
const ctx = createFixtureSession(weatherAgent);
|
|
103
107
|
cleanup = ctx.cleanup;
|
|
104
|
-
await ctx.
|
|
108
|
+
await ctx.start();
|
|
105
109
|
|
|
106
110
|
ctx.replay("tool-call-sequence.json");
|
|
107
|
-
await vi.waitFor(() => expect(ctx.
|
|
111
|
+
await vi.waitFor(() => expect(ctx.fakeHandle.sendToolResult).toHaveBeenCalled());
|
|
108
112
|
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
expect(
|
|
113
|
-
expect(toolStart?.args).toEqual({ city: "San Francisco" });
|
|
113
|
+
expect(ctx.client.toolCallEvents.length).toBeGreaterThan(0);
|
|
114
|
+
const toolEvent = ctx.client.toolCallEvents[0];
|
|
115
|
+
expect(toolEvent?.name).toBe("get_weather");
|
|
116
|
+
expect(toolEvent?.args).toEqual({ city: "San Francisco" });
|
|
114
117
|
});
|
|
115
118
|
|
|
116
119
|
test("tool call fixture: conversation history accumulates user + assistant messages", async () => {
|
|
117
120
|
const ctx = createFixtureSession(weatherAgent);
|
|
118
121
|
cleanup = ctx.cleanup;
|
|
119
|
-
await ctx.
|
|
122
|
+
await ctx.start();
|
|
120
123
|
|
|
121
124
|
ctx.replay("tool-call-sequence.json");
|
|
122
|
-
await vi.waitFor(() => expect(ctx.
|
|
125
|
+
await vi.waitFor(() => expect(ctx.fakeHandle.sendToolResult).toHaveBeenCalled());
|
|
123
126
|
await flush();
|
|
124
127
|
|
|
125
128
|
// Client received user transcript
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
);
|
|
129
|
-
expect(turns.length).toBeGreaterThan(0);
|
|
130
|
-
const userText = (turns.at(-1) as { text: string }).text;
|
|
131
|
-
expect(userText.toLowerCase()).toContain("weather");
|
|
129
|
+
expect(ctx.client.userTranscripts.length).toBeGreaterThan(0);
|
|
130
|
+
const lastUserText = ctx.client.userTranscripts.at(-1) ?? "";
|
|
131
|
+
expect(lastUserText.toLowerCase()).toContain("weather");
|
|
132
132
|
});
|
|
133
133
|
|
|
134
134
|
// ── Simple question: no tools, just session lifecycle ──────────────────
|
|
@@ -136,29 +136,25 @@ describe("fixture replay with real executor", () => {
|
|
|
136
136
|
test("simple question fixture: greeting + agent response reach client", async () => {
|
|
137
137
|
const ctx = createFixtureSession(simpleAgent);
|
|
138
138
|
cleanup = ctx.cleanup;
|
|
139
|
-
await ctx.
|
|
139
|
+
await ctx.start();
|
|
140
140
|
|
|
141
141
|
ctx.replay("simple-question-sequence.json");
|
|
142
142
|
await flush();
|
|
143
143
|
|
|
144
|
-
|
|
145
|
-
(e) => (e as { type: string }).type === "agent_transcript",
|
|
146
|
-
);
|
|
147
|
-
expect(chats.length).toBeGreaterThanOrEqual(2); // greeting + answer
|
|
144
|
+
expect(ctx.client.agentTranscripts.length).toBeGreaterThanOrEqual(2); // greeting + answer
|
|
148
145
|
});
|
|
149
146
|
|
|
150
147
|
test("simple question fixture: user speech events forwarded to client", async () => {
|
|
151
148
|
const ctx = createFixtureSession(simpleAgent);
|
|
152
149
|
cleanup = ctx.cleanup;
|
|
153
|
-
await ctx.
|
|
150
|
+
await ctx.start();
|
|
154
151
|
|
|
155
152
|
ctx.replay("simple-question-sequence.json");
|
|
156
153
|
await flush();
|
|
157
154
|
|
|
158
|
-
|
|
159
|
-
expect(
|
|
160
|
-
expect(
|
|
161
|
-
expect(types).toContain("user_transcript");
|
|
155
|
+
expect(ctx.client.speechStartedCount).toBeGreaterThan(0);
|
|
156
|
+
expect(ctx.client.speechStoppedCount).toBeGreaterThan(0);
|
|
157
|
+
expect(ctx.client.userTranscripts.length).toBeGreaterThan(0);
|
|
162
158
|
});
|
|
163
159
|
|
|
164
160
|
// ── Stateful agent: session state persists across tool calls ───────────
|
|
@@ -166,12 +162,12 @@ describe("fixture replay with real executor", () => {
|
|
|
166
162
|
test("stateful agent: tool accesses and mutates session state", async () => {
|
|
167
163
|
const ctx = createFixtureSession(statefulAgent);
|
|
168
164
|
cleanup = ctx.cleanup;
|
|
169
|
-
await ctx.
|
|
165
|
+
await ctx.start();
|
|
170
166
|
|
|
171
167
|
ctx.replay("tool-call-sequence.json");
|
|
172
|
-
await vi.waitFor(() => expect(ctx.
|
|
168
|
+
await vi.waitFor(() => expect(ctx.fakeHandle.sendToolResult).toHaveBeenCalled());
|
|
173
169
|
|
|
174
|
-
const [, resultStr] = vi.mocked(ctx.
|
|
170
|
+
const [, resultStr] = vi.mocked(ctx.fakeHandle.sendToolResult).mock.calls[0] as [
|
|
175
171
|
string,
|
|
176
172
|
string,
|
|
177
173
|
];
|
|
@@ -181,17 +177,16 @@ describe("fixture replay with real executor", () => {
|
|
|
181
177
|
|
|
182
178
|
// ── Greeting only: session lifecycle without user audio ────────────────
|
|
183
179
|
|
|
184
|
-
test("greeting fixture: session setup completes with
|
|
180
|
+
test("greeting fixture: session setup completes with reply_done", async () => {
|
|
185
181
|
const ctx = createFixtureSession(simpleAgent);
|
|
186
182
|
cleanup = ctx.cleanup;
|
|
187
|
-
await ctx.
|
|
183
|
+
await ctx.start();
|
|
188
184
|
|
|
189
185
|
ctx.replay("greeting-session-sequence.json");
|
|
190
186
|
await flush();
|
|
191
187
|
|
|
192
|
-
|
|
193
|
-
expect(
|
|
194
|
-
expect(types).toContain("reply_done");
|
|
188
|
+
expect(ctx.client.agentTranscripts.length).toBeGreaterThan(0);
|
|
189
|
+
expect(ctx.client.replyDoneCount).toBeGreaterThan(0);
|
|
195
190
|
});
|
|
196
191
|
|
|
197
192
|
// ── Tool schemas: real agent produces correct S2S tool schemas ─────────
|
|
@@ -231,13 +226,13 @@ describe("fixture replay with real executor", () => {
|
|
|
231
226
|
|
|
232
227
|
const ctx = createFixtureSession(agent);
|
|
233
228
|
cleanup = ctx.cleanup;
|
|
234
|
-
await ctx.
|
|
229
|
+
await ctx.start();
|
|
235
230
|
|
|
236
231
|
ctx.replay("tool-call-sequence.json");
|
|
237
|
-
await vi.waitFor(() => expect(ctx.
|
|
232
|
+
await vi.waitFor(() => expect(ctx.fakeHandle.sendToolResult).toHaveBeenCalled());
|
|
238
233
|
|
|
239
234
|
// Tool result should contain the error
|
|
240
|
-
const [, resultStr] = vi.mocked(ctx.
|
|
235
|
+
const [, resultStr] = vi.mocked(ctx.fakeHandle.sendToolResult).mock.calls[0] as [
|
|
241
236
|
string,
|
|
242
237
|
string,
|
|
243
238
|
];
|
|
@@ -266,13 +261,13 @@ describe("fixture replay with real executor", () => {
|
|
|
266
261
|
|
|
267
262
|
const ctx = createFixtureSession(agent);
|
|
268
263
|
cleanup = ctx.cleanup;
|
|
269
|
-
await ctx.
|
|
264
|
+
await ctx.start();
|
|
270
265
|
|
|
271
266
|
ctx.replay("tool-call-sequence.json");
|
|
272
|
-
await vi.waitFor(() => expect(ctx.
|
|
267
|
+
await vi.waitFor(() => expect(ctx.fakeHandle.sendToolResult).toHaveBeenCalled());
|
|
273
268
|
|
|
274
269
|
// The result should contain a Zod validation error
|
|
275
|
-
const [, resultStr] = vi.mocked(ctx.
|
|
270
|
+
const [, resultStr] = vi.mocked(ctx.fakeHandle.sendToolResult).mock.calls[0] as [
|
|
276
271
|
string,
|
|
277
272
|
string,
|
|
278
273
|
];
|
|
@@ -304,45 +299,31 @@ describe("fixture replay with real executor", () => {
|
|
|
304
299
|
|
|
305
300
|
const ctx = createFixtureSession(agent);
|
|
306
301
|
cleanup = ctx.cleanup;
|
|
307
|
-
await ctx.
|
|
302
|
+
await ctx.start();
|
|
308
303
|
|
|
309
|
-
const
|
|
304
|
+
const cbs = ctx.mockCallbacks;
|
|
310
305
|
|
|
311
306
|
// Fire an interrupted transcript — should NOT go into conversation history
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
text: "This was interrupted",
|
|
316
|
-
_interrupted: true,
|
|
317
|
-
});
|
|
318
|
-
h._fire("event", { type: "cancelled" });
|
|
307
|
+
cbs.onReplyStarted("r1");
|
|
308
|
+
cbs.onAgentTranscript("This was interrupted", true);
|
|
309
|
+
cbs.onCancelled();
|
|
319
310
|
await flush();
|
|
320
311
|
|
|
321
312
|
// Client sees both agent_transcript and cancelled events
|
|
322
|
-
|
|
323
|
-
expect(
|
|
324
|
-
expect(types).toContain("cancelled");
|
|
313
|
+
expect(ctx.client.agentTranscripts).toContain("This was interrupted");
|
|
314
|
+
expect(ctx.client.cancelledCount).toBeGreaterThan(0);
|
|
325
315
|
|
|
326
316
|
// Fire a non-interrupted transcript — SHOULD go into conversation history
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
text: "This was completed",
|
|
331
|
-
_interrupted: false,
|
|
332
|
-
});
|
|
333
|
-
h._fire("event", { type: "reply_done" });
|
|
317
|
+
cbs.onReplyStarted("r2");
|
|
318
|
+
cbs.onAgentTranscript("This was completed", false);
|
|
319
|
+
cbs.onReplyDone();
|
|
334
320
|
await flush();
|
|
335
321
|
|
|
336
322
|
// Trigger a tool call to inspect conversation history.
|
|
337
|
-
|
|
323
|
+
cbs.onUserTranscript("check");
|
|
338
324
|
await flush();
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
type: "tool_call",
|
|
342
|
-
toolCallId: "c1",
|
|
343
|
-
toolName: "check_history",
|
|
344
|
-
args: { q: "test" },
|
|
345
|
-
});
|
|
325
|
+
cbs.onReplyStarted("r3");
|
|
326
|
+
cbs.onToolCall("c1", "check_history", { q: "test" });
|
|
346
327
|
// Wait for tool to execute (captures messages)
|
|
347
328
|
await vi.waitFor(() => expect(capturedMessages.length).toBeGreaterThan(0));
|
|
348
329
|
|
|
@@ -376,27 +357,27 @@ describe("fixture replay with real executor", () => {
|
|
|
376
357
|
|
|
377
358
|
const ctx = createFixtureSession(agent);
|
|
378
359
|
cleanup = ctx.cleanup;
|
|
379
|
-
await ctx.
|
|
360
|
+
await ctx.start();
|
|
380
361
|
|
|
381
362
|
ctx.replay("tool-call-sequence.json");
|
|
382
|
-
await vi.waitFor(() => expect(ctx.
|
|
363
|
+
await vi.waitFor(() => expect(ctx.fakeHandle.sendToolResult).toHaveBeenCalled());
|
|
383
364
|
|
|
384
365
|
// The tool should have seen the user's weather question in messages
|
|
385
366
|
const userMsgs = capturedMessages.filter((m) => m.role === "user");
|
|
386
367
|
expect(userMsgs.some((m) => m.content.toLowerCase().includes("weather"))).toBe(true);
|
|
387
368
|
});
|
|
388
369
|
|
|
389
|
-
// ── Audio chunks forwarded to client.
|
|
370
|
+
// ── Audio chunks forwarded to client.audio ─────────────────────────────
|
|
390
371
|
|
|
391
|
-
test("reply.audio events forwarded to client.
|
|
372
|
+
test("reply.audio events forwarded to client.audio", async () => {
|
|
392
373
|
const ctx = createFixtureSession(simpleAgent);
|
|
393
374
|
cleanup = ctx.cleanup;
|
|
394
|
-
await ctx.
|
|
375
|
+
await ctx.start();
|
|
395
376
|
|
|
396
|
-
//
|
|
377
|
+
// Fire audio events directly via callbacks (replay skips reply.audio)
|
|
397
378
|
const audioBytes = new Uint8Array([10, 20, 30, 40]);
|
|
398
|
-
ctx.
|
|
399
|
-
ctx.
|
|
379
|
+
ctx.mockCallbacks.onAudio(audioBytes);
|
|
380
|
+
ctx.mockCallbacks.onAudio(new Uint8Array([50, 60]));
|
|
400
381
|
|
|
401
382
|
expect(ctx.client.audioChunks.length).toBe(2);
|
|
402
383
|
expect(Array.from(ctx.client.audioChunks[0] ?? [])).toEqual([10, 20, 30, 40]);
|
|
@@ -422,40 +403,29 @@ describe("fixture replay with real executor", () => {
|
|
|
422
403
|
|
|
423
404
|
const ctx = createFixtureSession(agent);
|
|
424
405
|
cleanup = ctx.cleanup;
|
|
425
|
-
await ctx.
|
|
426
|
-
|
|
427
|
-
const
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
toolCallId: "c1",
|
|
432
|
-
toolName: "get_weather",
|
|
433
|
-
args: { city: "NYC" },
|
|
434
|
-
});
|
|
435
|
-
h._fire("event", {
|
|
436
|
-
type: "tool_call",
|
|
437
|
-
toolCallId: "c2",
|
|
438
|
-
toolName: "get_weather",
|
|
439
|
-
args: { city: "LA" },
|
|
440
|
-
});
|
|
406
|
+
await ctx.start();
|
|
407
|
+
|
|
408
|
+
const cbs = ctx.mockCallbacks;
|
|
409
|
+
cbs.onReplyStarted("r1");
|
|
410
|
+
cbs.onToolCall("c1", "get_weather", { city: "NYC" });
|
|
411
|
+
cbs.onToolCall("c2", "get_weather", { city: "LA" });
|
|
441
412
|
|
|
442
|
-
// Wait for both tool calls to
|
|
413
|
+
// Wait for both tool calls to be dispatched to the client
|
|
443
414
|
await vi.waitFor(() => {
|
|
444
|
-
|
|
445
|
-
expect(starts.length).toBe(2);
|
|
415
|
+
expect(ctx.client.toolCallEvents.length).toBe(2);
|
|
446
416
|
});
|
|
447
417
|
|
|
448
|
-
// Results NOT sent yet —
|
|
449
|
-
expect(ctx.
|
|
418
|
+
// Results NOT sent yet — reply.done hasn't fired
|
|
419
|
+
expect(ctx.fakeHandle.sendToolResult).not.toHaveBeenCalled();
|
|
450
420
|
|
|
451
|
-
// Fire
|
|
452
|
-
|
|
421
|
+
// Fire reply.done — should flush both results
|
|
422
|
+
cbs.onReplyDone();
|
|
453
423
|
await vi.waitFor(() => {
|
|
454
|
-
expect(ctx.
|
|
424
|
+
expect(ctx.fakeHandle.sendToolResult).toHaveBeenCalledTimes(2);
|
|
455
425
|
});
|
|
456
426
|
|
|
457
427
|
// Verify both results are correct
|
|
458
|
-
const calls = vi.mocked(ctx.
|
|
428
|
+
const calls = vi.mocked(ctx.fakeHandle.sendToolResult).mock.calls as [string, string][];
|
|
459
429
|
const results = calls.map(([, r]) => JSON.parse(r));
|
|
460
430
|
expect(results.some((r) => r.city === "NYC")).toBe(true);
|
|
461
431
|
expect(results.some((r) => r.city === "LA")).toBe(true);
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
import { describe, expect, test, vi } from "vitest";
|
|
2
|
+
import { createS2sTransport } from "./s2s-transport.ts";
|
|
3
|
+
import type { TransportCallbacks } from "./types.ts";
|
|
4
|
+
|
|
5
|
+
function makeCallbacks(): TransportCallbacks {
|
|
6
|
+
return {
|
|
7
|
+
onReplyStarted: vi.fn(),
|
|
8
|
+
onReplyDone: vi.fn(),
|
|
9
|
+
onCancelled: vi.fn(),
|
|
10
|
+
onAudioChunk: vi.fn(),
|
|
11
|
+
onAudioDone: vi.fn(),
|
|
12
|
+
onUserTranscript: vi.fn(),
|
|
13
|
+
onAgentTranscript: vi.fn(),
|
|
14
|
+
onToolCall: vi.fn(),
|
|
15
|
+
onError: vi.fn(),
|
|
16
|
+
onSpeechStarted: vi.fn(),
|
|
17
|
+
onSpeechStopped: vi.fn(),
|
|
18
|
+
};
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
describe("S2sTransport", () => {
|
|
22
|
+
test("start() opens an S2S connection and sends session.update", async () => {
|
|
23
|
+
const send = vi.fn();
|
|
24
|
+
const close = vi.fn();
|
|
25
|
+
const ws = Object.assign(new EventTarget(), {
|
|
26
|
+
readyState: 0,
|
|
27
|
+
send,
|
|
28
|
+
close,
|
|
29
|
+
addEventListener: EventTarget.prototype.addEventListener as unknown as (
|
|
30
|
+
type: string,
|
|
31
|
+
listener: EventListener,
|
|
32
|
+
) => void,
|
|
33
|
+
}) as unknown as import("../s2s.ts").S2sWebSocket;
|
|
34
|
+
setTimeout(() => {
|
|
35
|
+
(ws as unknown as { readyState: number }).readyState = 1;
|
|
36
|
+
(ws as unknown as EventTarget).dispatchEvent(new Event("open"));
|
|
37
|
+
}, 0);
|
|
38
|
+
|
|
39
|
+
const t = createS2sTransport({
|
|
40
|
+
apiKey: "k",
|
|
41
|
+
s2sConfig: { wssUrl: "wss://fake", inputSampleRate: 16_000, outputSampleRate: 24_000 },
|
|
42
|
+
sessionConfig: { systemPrompt: "test", tools: [] },
|
|
43
|
+
toolSchemas: [],
|
|
44
|
+
callbacks: makeCallbacks(),
|
|
45
|
+
sid: "sid-1",
|
|
46
|
+
agent: "a",
|
|
47
|
+
createWebSocket: () => ws,
|
|
48
|
+
});
|
|
49
|
+
await t.start();
|
|
50
|
+
expect(send).toHaveBeenCalled();
|
|
51
|
+
const firstSend = JSON.parse(send.mock.calls[0]?.[0] as string);
|
|
52
|
+
expect(firstSend.type).toBe("session.update");
|
|
53
|
+
await t.stop();
|
|
54
|
+
expect(close).toHaveBeenCalled();
|
|
55
|
+
});
|
|
56
|
+
});
|
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
// Copyright 2026 the AAI authors. MIT license.
|
|
2
|
+
// S2S transport — wraps connectS2s and forwards typed callbacks into the SessionCore.
|
|
3
|
+
|
|
4
|
+
import type { Logger, S2SConfig } from "../runtime-config.ts";
|
|
5
|
+
import { consoleLogger } from "../runtime-config.ts";
|
|
6
|
+
import {
|
|
7
|
+
type CreateS2sWebSocket,
|
|
8
|
+
connectS2s,
|
|
9
|
+
defaultCreateS2sWebSocket,
|
|
10
|
+
type S2sHandle,
|
|
11
|
+
type S2sSessionConfig,
|
|
12
|
+
type S2sToolSchema,
|
|
13
|
+
} from "../s2s.ts";
|
|
14
|
+
import type { Transport, TransportCallbacks, TransportSessionConfig } from "./types.ts";
|
|
15
|
+
|
|
16
|
+
/** @internal Exposed for testing — allows spying on connectS2s in unit tests. */
|
|
17
|
+
export const _internals = { connectS2s };
|
|
18
|
+
|
|
19
|
+
export type S2sTransportOptions = {
|
|
20
|
+
apiKey: string;
|
|
21
|
+
s2sConfig: S2SConfig;
|
|
22
|
+
sessionConfig: S2sSessionConfig;
|
|
23
|
+
toolSchemas: S2sToolSchema[];
|
|
24
|
+
callbacks: TransportCallbacks;
|
|
25
|
+
sid: string;
|
|
26
|
+
agent: string;
|
|
27
|
+
createWebSocket?: CreateS2sWebSocket;
|
|
28
|
+
logger?: Logger;
|
|
29
|
+
};
|
|
30
|
+
|
|
31
|
+
export function createS2sTransport(opts: S2sTransportOptions): Transport {
|
|
32
|
+
const log = opts.logger ?? consoleLogger;
|
|
33
|
+
const createWs = opts.createWebSocket ?? defaultCreateS2sWebSocket;
|
|
34
|
+
let handle: S2sHandle | null = null;
|
|
35
|
+
let currentReplyId: string | null = null;
|
|
36
|
+
|
|
37
|
+
async function start(): Promise<void> {
|
|
38
|
+
handle = await _internals.connectS2s({
|
|
39
|
+
apiKey: opts.apiKey,
|
|
40
|
+
config: opts.s2sConfig,
|
|
41
|
+
createWebSocket: createWs,
|
|
42
|
+
logger: log,
|
|
43
|
+
sid: opts.sid,
|
|
44
|
+
callbacks: {
|
|
45
|
+
onSessionReady: (providerSessionId) => opts.callbacks.onSessionReady?.(providerSessionId),
|
|
46
|
+
onReplyStarted: (replyId) => {
|
|
47
|
+
currentReplyId = replyId;
|
|
48
|
+
opts.callbacks.onReplyStarted(replyId);
|
|
49
|
+
},
|
|
50
|
+
onReplyDone: () => {
|
|
51
|
+
currentReplyId = null;
|
|
52
|
+
opts.callbacks.onReplyDone();
|
|
53
|
+
},
|
|
54
|
+
onCancelled: () => {
|
|
55
|
+
currentReplyId = null;
|
|
56
|
+
opts.callbacks.onCancelled();
|
|
57
|
+
},
|
|
58
|
+
onAudio: (bytes) => opts.callbacks.onAudioChunk(bytes),
|
|
59
|
+
onUserTranscript: opts.callbacks.onUserTranscript,
|
|
60
|
+
onAgentTranscript: opts.callbacks.onAgentTranscript,
|
|
61
|
+
onToolCall: opts.callbacks.onToolCall,
|
|
62
|
+
onSpeechStarted: opts.callbacks.onSpeechStarted,
|
|
63
|
+
onSpeechStopped: opts.callbacks.onSpeechStopped,
|
|
64
|
+
onSessionExpired: () => {
|
|
65
|
+
log.info("S2S session expired", { sid: opts.sid });
|
|
66
|
+
handle?.close();
|
|
67
|
+
},
|
|
68
|
+
onError: (err) => opts.callbacks.onError("internal", err.message),
|
|
69
|
+
onClose: (code, reason) => {
|
|
70
|
+
if (currentReplyId !== null) {
|
|
71
|
+
log.warn("S2S closed with active reply", {
|
|
72
|
+
sid: opts.sid,
|
|
73
|
+
agent: opts.agent,
|
|
74
|
+
activeReplyId: currentReplyId,
|
|
75
|
+
code,
|
|
76
|
+
reason,
|
|
77
|
+
});
|
|
78
|
+
opts.callbacks.onError("connection", `S2S closed mid-reply (code=${code})`);
|
|
79
|
+
} else {
|
|
80
|
+
log.info("S2S closed", { code, reason });
|
|
81
|
+
}
|
|
82
|
+
},
|
|
83
|
+
},
|
|
84
|
+
});
|
|
85
|
+
handle.updateSession(opts.sessionConfig);
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
async function stop(): Promise<void> {
|
|
89
|
+
handle?.close();
|
|
90
|
+
handle = null;
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
return {
|
|
94
|
+
start,
|
|
95
|
+
stop,
|
|
96
|
+
sendUserAudio(bytes) {
|
|
97
|
+
handle?.sendAudio(bytes);
|
|
98
|
+
},
|
|
99
|
+
sendToolResult(callId, result) {
|
|
100
|
+
handle?.sendToolResult(callId, result);
|
|
101
|
+
},
|
|
102
|
+
cancelReply() {
|
|
103
|
+
// AssemblyAI S2S doesn't expose an explicit cancel RPC — reply is
|
|
104
|
+
// cancelled when the user speaks. Our `onCancel` from the client is
|
|
105
|
+
// a best-effort signal.
|
|
106
|
+
currentReplyId = null;
|
|
107
|
+
},
|
|
108
|
+
updateSession(config: TransportSessionConfig) {
|
|
109
|
+
handle?.updateSession({
|
|
110
|
+
systemPrompt: config.systemPrompt,
|
|
111
|
+
tools: (config.tools ?? []) as S2sToolSchema[],
|
|
112
|
+
...(config.greeting !== undefined ? { greeting: config.greeting } : {}),
|
|
113
|
+
});
|
|
114
|
+
},
|
|
115
|
+
};
|
|
116
|
+
}
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
import { describe, expect, test } from "vitest";
|
|
2
|
+
import type { Transport, TransportCallbacks } from "./types.ts";
|
|
3
|
+
|
|
4
|
+
describe("Transport types", () => {
|
|
5
|
+
test("file compiles", () => {
|
|
6
|
+
// Types only; runtime check is trivial.
|
|
7
|
+
const stub: Transport = {
|
|
8
|
+
start: () => Promise.resolve(),
|
|
9
|
+
stop: () => Promise.resolve(),
|
|
10
|
+
// biome-ignore lint/suspicious/noEmptyBlockStatements: intentional no-op stub
|
|
11
|
+
sendUserAudio: () => {},
|
|
12
|
+
// biome-ignore lint/suspicious/noEmptyBlockStatements: intentional no-op stub
|
|
13
|
+
sendToolResult: () => {},
|
|
14
|
+
// biome-ignore lint/suspicious/noEmptyBlockStatements: intentional no-op stub
|
|
15
|
+
cancelReply: () => {},
|
|
16
|
+
};
|
|
17
|
+
expect(stub).toBeDefined();
|
|
18
|
+
|
|
19
|
+
// Ensure TransportCallbacks is referenced (type-only check).
|
|
20
|
+
type _CB = TransportCallbacks;
|
|
21
|
+
});
|
|
22
|
+
});
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
// Copyright 2026 the AAI authors. MIT license.
|
|
2
|
+
// Transport strategy — per-session provider wiring (S2S, pipeline, etc.).
|
|
3
|
+
|
|
4
|
+
import type { SessionErrorCode } from "../../sdk/protocol.ts";
|
|
5
|
+
import type { Message } from "../../sdk/types.ts";
|
|
6
|
+
|
|
7
|
+
/**
|
|
8
|
+
* Typed callbacks into the SessionCore. One per event the transport produces.
|
|
9
|
+
* Constructed at transport-creation time; no emitter.on-style indirection.
|
|
10
|
+
*/
|
|
11
|
+
export type TransportCallbacks = {
|
|
12
|
+
onReplyStarted(replyId: string): void;
|
|
13
|
+
onReplyDone(): void;
|
|
14
|
+
onCancelled(): void;
|
|
15
|
+
onAudioChunk(bytes: Uint8Array): void;
|
|
16
|
+
onAudioDone(): void;
|
|
17
|
+
onUserTranscript(text: string): void;
|
|
18
|
+
onAgentTranscript(text: string, interrupted: boolean): void;
|
|
19
|
+
onToolCall(callId: string, name: string, args: Record<string, unknown>): void;
|
|
20
|
+
onError(code: SessionErrorCode, message: string): void;
|
|
21
|
+
onSpeechStarted(): void;
|
|
22
|
+
onSpeechStopped(): void;
|
|
23
|
+
onSessionReady?(providerSessionId: string): void;
|
|
24
|
+
};
|
|
25
|
+
|
|
26
|
+
/** Minimal config a transport may receive at construction time. */
|
|
27
|
+
export type TransportSessionConfig = {
|
|
28
|
+
systemPrompt: string;
|
|
29
|
+
greeting?: string;
|
|
30
|
+
tools?: unknown[];
|
|
31
|
+
history?: Message[];
|
|
32
|
+
};
|
|
33
|
+
|
|
34
|
+
/**
|
|
35
|
+
* Transport abstraction — one implementation per provider strategy
|
|
36
|
+
* (see `s2s-transport.ts`, `pipeline-transport.ts`).
|
|
37
|
+
*/
|
|
38
|
+
export interface Transport {
|
|
39
|
+
/** Open any underlying connections and send initial session config. */
|
|
40
|
+
start(): Promise<void>;
|
|
41
|
+
/** Tear down, flush, close. Idempotent. */
|
|
42
|
+
stop(): Promise<void>;
|
|
43
|
+
/** Forward user audio to the provider. */
|
|
44
|
+
sendUserAudio(bytes: Uint8Array): void;
|
|
45
|
+
/** Forward a tool result back to the provider's reply stream. */
|
|
46
|
+
sendToolResult(callId: string, result: string): void;
|
|
47
|
+
/** Cancel the currently in-flight reply (barge-in / client cancel). */
|
|
48
|
+
cancelReply(): void;
|
|
49
|
+
/** Re-send session config (S2S only; pipeline is a no-op). */
|
|
50
|
+
updateSession?(config: TransportSessionConfig): void;
|
|
51
|
+
}
|