@alexkroman1/aai 1.7.0 → 1.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (133) hide show
  1. package/.turbo/turbo-build.log +11 -9
  2. package/CHANGELOG.md +16 -0
  3. package/dist/{_internal-types-CrnTi9Ew.js → _internal-types-CfOAbK6V.js} +22 -35
  4. package/dist/constants-y68COEGj.js +29 -0
  5. package/dist/host/_base64.d.ts +2 -0
  6. package/dist/host/_mock-ws.d.ts +0 -61
  7. package/dist/host/_pipeline-test-fakes.d.ts +7 -4
  8. package/dist/host/_run-code.d.ts +0 -25
  9. package/dist/host/_runtime-conformance.d.ts +3 -34
  10. package/dist/host/memory-vector.d.ts +0 -11
  11. package/dist/host/providers/resolve-kv.d.ts +0 -7
  12. package/dist/host/providers/resolve-vector.d.ts +0 -8
  13. package/dist/host/providers/stt/assemblyai.d.ts +0 -14
  14. package/dist/host/providers/stt/deepgram.d.ts +2 -14
  15. package/dist/host/providers/stt/soniox.d.ts +0 -22
  16. package/dist/host/providers/tts/rime.d.ts +10 -31
  17. package/dist/host/runtime-barrel.js +628 -642
  18. package/dist/host/runtime-config.d.ts +9 -6
  19. package/dist/host/runtime.d.ts +3 -0
  20. package/dist/host/to-vercel-tools.d.ts +3 -33
  21. package/dist/host/transports/openai-realtime-transport.d.ts +43 -0
  22. package/dist/host/unstorage-kv.d.ts +0 -26
  23. package/dist/index.js +3 -3
  24. package/dist/openai-realtime-cjPAHMMx.js +10 -0
  25. package/dist/sdk/_internal-types.d.ts +6 -55
  26. package/dist/sdk/allowed-hosts.d.ts +4 -3
  27. package/dist/sdk/constants.d.ts +4 -29
  28. package/dist/sdk/define.d.ts +7 -4
  29. package/dist/sdk/kv.d.ts +13 -37
  30. package/dist/sdk/manifest-barrel.js +1 -1
  31. package/dist/sdk/manifest.d.ts +8 -2
  32. package/dist/sdk/protocol.js +1 -1
  33. package/dist/sdk/providers/s2s/openai-realtime.d.ts +17 -0
  34. package/dist/sdk/providers/s2s-barrel.d.ts +9 -0
  35. package/dist/sdk/providers/s2s-barrel.js +2 -0
  36. package/dist/sdk/providers/tts/rime.d.ts +1 -1
  37. package/dist/sdk/providers.d.ts +6 -2
  38. package/dist/sdk/types.d.ts +7 -1
  39. package/dist/{types-KUgezM6u.js → types-DOWVZhb9.js} +1 -7
  40. package/dist/{ws-upgrade-BeOQ7fXL.js → ws-upgrade-CG8-by1n.js} +2 -3
  41. package/host/_base64.ts +9 -0
  42. package/host/_mock-ws.ts +0 -65
  43. package/host/_pipeline-test-fakes.ts +19 -31
  44. package/host/_run-code.ts +10 -53
  45. package/host/_runtime-conformance.ts +3 -44
  46. package/host/_test-utils.ts +20 -42
  47. package/host/builtin-tools.test.ts +127 -222
  48. package/host/builtin-tools.ts +6 -10
  49. package/host/cleanup.test.ts +30 -73
  50. package/host/integration/pipeline-reference.integration.test.ts +12 -17
  51. package/host/integration.test.ts +0 -7
  52. package/host/memory-vector.test.ts +3 -1
  53. package/host/memory-vector.ts +16 -21
  54. package/host/pinecone-vector.test.ts +14 -17
  55. package/host/pinecone-vector.ts +10 -19
  56. package/host/providers/providers.test-d.ts +5 -3
  57. package/host/providers/resolve-kv.ts +23 -41
  58. package/host/providers/resolve-vector.ts +3 -12
  59. package/host/providers/resolve.test.ts +15 -28
  60. package/host/providers/resolve.ts +24 -24
  61. package/host/providers/stt/assemblyai.test.ts +2 -14
  62. package/host/providers/stt/assemblyai.ts +12 -35
  63. package/host/providers/stt/deepgram.test.ts +23 -83
  64. package/host/providers/stt/deepgram.ts +15 -40
  65. package/host/providers/stt/elevenlabs.test.ts +26 -38
  66. package/host/providers/stt/elevenlabs.ts +10 -9
  67. package/host/providers/stt/soniox.test.ts +35 -85
  68. package/host/providers/stt/soniox.ts +8 -53
  69. package/host/providers/tts/cartesia.test.ts +19 -58
  70. package/host/providers/tts/cartesia.ts +36 -66
  71. package/host/providers/tts/rime.test.ts +12 -38
  72. package/host/providers/tts/rime.ts +23 -86
  73. package/host/runtime-config.test.ts +9 -9
  74. package/host/runtime-config.ts +16 -22
  75. package/host/runtime.test.ts +111 -73
  76. package/host/runtime.ts +138 -86
  77. package/host/s2s.test.ts +92 -191
  78. package/host/s2s.ts +56 -53
  79. package/host/server-shutdown.test.ts +9 -30
  80. package/host/server.test.ts +2 -13
  81. package/host/server.ts +85 -100
  82. package/host/session-core.test.ts +15 -30
  83. package/host/session-core.ts +10 -13
  84. package/host/session-prompt.test.ts +1 -5
  85. package/host/to-vercel-tools.test.ts +53 -72
  86. package/host/to-vercel-tools.ts +9 -39
  87. package/host/tool-executor.test.ts +25 -51
  88. package/host/tool-executor.ts +18 -12
  89. package/host/transports/openai-realtime-transport.test.ts +371 -0
  90. package/host/transports/openai-realtime-transport.ts +319 -0
  91. package/host/transports/pipeline-transport.test.ts +125 -298
  92. package/host/transports/pipeline-transport.ts +20 -68
  93. package/host/transports/s2s-transport-fixtures.test.ts +31 -92
  94. package/host/transports/s2s-transport.test.ts +65 -134
  95. package/host/transports/s2s-transport.ts +15 -43
  96. package/host/transports/types.test.ts +4 -8
  97. package/host/unstorage-kv.test.ts +3 -2
  98. package/host/unstorage-kv.ts +5 -35
  99. package/host/ws-handler.test.ts +72 -176
  100. package/host/ws-handler.ts +6 -12
  101. package/package.json +6 -1
  102. package/sdk/__snapshots__/exports.test.ts.snap +7 -0
  103. package/sdk/__snapshots__/schema-shapes.test.ts.snap +1 -0
  104. package/sdk/_internal-types.test.ts +6 -9
  105. package/sdk/_internal-types.ts +16 -57
  106. package/sdk/_test-matchers.ts +25 -15
  107. package/sdk/allowed-hosts.test.ts +50 -114
  108. package/sdk/allowed-hosts.ts +8 -14
  109. package/sdk/constants.ts +5 -52
  110. package/sdk/define.test.ts +7 -6
  111. package/sdk/define.ts +7 -3
  112. package/sdk/exports.test.ts +6 -1
  113. package/sdk/kv.ts +13 -37
  114. package/sdk/manifest.test-d.ts +5 -0
  115. package/sdk/manifest.test.ts +61 -9
  116. package/sdk/manifest.ts +11 -11
  117. package/sdk/protocol-compat.test.ts +66 -98
  118. package/sdk/protocol-snapshot.test.ts +2 -16
  119. package/sdk/protocol.test.ts +13 -22
  120. package/sdk/providers/s2s/openai-realtime.ts +36 -0
  121. package/sdk/providers/s2s-barrel.ts +12 -0
  122. package/sdk/providers/tts/rime.ts +1 -1
  123. package/sdk/providers.ts +24 -5
  124. package/sdk/schema-alignment.test.ts +25 -73
  125. package/sdk/schema-shapes.test.ts +1 -29
  126. package/sdk/system-prompt.test.ts +0 -1
  127. package/sdk/system-prompt.ts +17 -19
  128. package/sdk/types-inference.test.ts +10 -36
  129. package/sdk/types.ts +7 -0
  130. package/sdk/ws-upgrade.test.ts +24 -23
  131. package/sdk/ws-upgrade.ts +2 -3
  132. package/tsdown.config.ts +8 -11
  133. package/dist/constants-C2nirZUI.js +0 -54
@@ -0,0 +1,371 @@
1
+ // Copyright 2026 the AAI authors. MIT license.
2
+ import { describe, expect, test, vi } from "vitest";
3
+ import { silentLogger } from "../_test-utils.ts";
4
+ import {
5
+ createOpenaiRealtimeTransport,
6
+ type OpenaiRealtimeWebSocket,
7
+ } from "./openai-realtime-transport.ts";
8
+ import type { TransportCallbacks } from "./types.ts";
9
+
10
+ function noopCallbacks(): TransportCallbacks {
11
+ return {
12
+ onReplyStarted: vi.fn(),
13
+ onReplyDone: vi.fn(),
14
+ onCancelled: vi.fn(),
15
+ onAudioChunk: vi.fn(),
16
+ onAudioDone: vi.fn(),
17
+ onUserTranscript: vi.fn(),
18
+ onAgentTranscript: vi.fn(),
19
+ onToolCall: vi.fn(),
20
+ onError: vi.fn(),
21
+ onSpeechStarted: vi.fn(),
22
+ onSpeechStopped: vi.fn(),
23
+ };
24
+ }
25
+
26
+ type Listener = (ev: unknown) => void;
27
+
28
+ function makeFakeWs() {
29
+ const listeners: Record<string, Listener[]> = {
30
+ open: [],
31
+ message: [],
32
+ close: [],
33
+ error: [],
34
+ };
35
+ const sent: string[] = [];
36
+ const ws: OpenaiRealtimeWebSocket = {
37
+ readyState: 1,
38
+ send(data: string) {
39
+ sent.push(data);
40
+ },
41
+ close() {
42
+ for (const fn of listeners.close ?? []) fn({ code: 1000, reason: "" });
43
+ },
44
+ addEventListener(type: string, fn: Listener) {
45
+ (listeners[type] ?? []).push(fn);
46
+ },
47
+ } as OpenaiRealtimeWebSocket;
48
+ return Object.assign(ws, {
49
+ fire(type: "open" | "message" | "close" | "error", ev?: unknown) {
50
+ for (const fn of listeners[type] ?? []) fn(ev);
51
+ },
52
+ sent,
53
+ });
54
+ }
55
+
56
+ function startedTransport() {
57
+ const fake = makeFakeWs();
58
+ const cbs = noopCallbacks();
59
+ const transport = createOpenaiRealtimeTransport({
60
+ apiKey: "sk",
61
+ options: {},
62
+ sessionConfig: { systemPrompt: "" },
63
+ toolSchemas: [],
64
+ toolChoice: "auto",
65
+ callbacks: cbs,
66
+ sid: "s",
67
+ agent: "a",
68
+ createWebSocket: () => fake,
69
+ logger: silentLogger,
70
+ });
71
+ const ready = transport.start();
72
+ fake.fire("open");
73
+ return { fake, cbs, transport, ready };
74
+ }
75
+
76
+ describe("openai-realtime-transport: connect and session.update", () => {
77
+ test("opens WS with auth headers and sends session.update on open", async () => {
78
+ const fake = makeFakeWs();
79
+ const createWs = vi.fn(() => fake);
80
+
81
+ const transport = createOpenaiRealtimeTransport({
82
+ apiKey: "sk-test",
83
+ options: { model: "gpt-realtime", voice: "cedar" },
84
+ sessionConfig: {
85
+ systemPrompt: "Be terse.",
86
+ greeting: "Hi.",
87
+ tools: [],
88
+ },
89
+ toolSchemas: [
90
+ {
91
+ type: "function",
92
+ name: "lookup",
93
+ description: "look up something",
94
+ parameters: { type: "object", properties: {} },
95
+ },
96
+ ],
97
+ toolChoice: "auto",
98
+ callbacks: noopCallbacks(),
99
+ sid: "sid-1",
100
+ agent: "test-agent",
101
+ createWebSocket: createWs,
102
+ logger: silentLogger,
103
+ });
104
+
105
+ const startP = transport.start();
106
+ fake.fire("open");
107
+ await startP;
108
+
109
+ expect(createWs).toHaveBeenCalledWith(
110
+ "wss://api.openai.com/v1/realtime?model=gpt-realtime",
111
+ expect.objectContaining({
112
+ headers: expect.objectContaining({
113
+ Authorization: "Bearer sk-test",
114
+ "OpenAI-Beta": "realtime=v1",
115
+ }),
116
+ }),
117
+ );
118
+
119
+ expect(fake.sent.length).toBe(1);
120
+ const first = fake.sent[0];
121
+ if (first === undefined) throw new Error("expected one send");
122
+ const msg = JSON.parse(first);
123
+ expect(msg.type).toBe("session.update");
124
+ expect(msg.session.voice).toBe("cedar");
125
+ expect(msg.session.instructions).toBe("Be terse.");
126
+ expect(msg.session.input_audio_format).toBe("pcm16");
127
+ expect(msg.session.output_audio_format).toBe("pcm16");
128
+ expect(msg.session.modalities).toEqual(["audio", "text"]);
129
+ expect(msg.session.input_audio_transcription).toEqual({ model: "whisper-1" });
130
+ expect(msg.session.turn_detection.type).toBe("server_vad");
131
+ expect(msg.session.tools).toEqual([
132
+ expect.objectContaining({ type: "function", name: "lookup" }),
133
+ ]);
134
+ expect(msg.session.tool_choice).toBe("auto");
135
+ });
136
+ });
137
+
138
+ describe("audio in/out", () => {
139
+ test("sendUserAudio sends input_audio_buffer.append with base64 payload", async () => {
140
+ const { fake, transport, ready } = startedTransport();
141
+ await ready;
142
+ fake.sent.length = 0;
143
+ transport.sendUserAudio(new Uint8Array([1, 2, 3, 4]));
144
+ expect(fake.sent.length).toBe(1);
145
+ const first = fake.sent[0];
146
+ if (first === undefined) throw new Error("expected one send");
147
+ const msg = JSON.parse(first);
148
+ expect(msg.type).toBe("input_audio_buffer.append");
149
+ expect(typeof msg.audio).toBe("string");
150
+ expect(Buffer.from(msg.audio, "base64")).toEqual(Buffer.from([1, 2, 3, 4]));
151
+ });
152
+
153
+ test("response.audio.delta calls onAudioChunk with decoded bytes", async () => {
154
+ const { fake, cbs, ready } = startedTransport();
155
+ await ready;
156
+ const audio = Buffer.from([5, 6, 7, 8]).toString("base64");
157
+ fake.fire("message", {
158
+ data: JSON.stringify({ type: "response.audio.delta", delta: audio }),
159
+ });
160
+ expect(cbs.onAudioChunk).toHaveBeenCalledTimes(1);
161
+ expect(cbs.onAudioChunk).toHaveBeenCalledWith(new Uint8Array([5, 6, 7, 8]));
162
+ });
163
+
164
+ test("response.audio.done calls onAudioDone", async () => {
165
+ const { fake, cbs, ready } = startedTransport();
166
+ await ready;
167
+ fake.fire("message", { data: JSON.stringify({ type: "response.audio.done" }) });
168
+ expect(cbs.onAudioDone).toHaveBeenCalledTimes(1);
169
+ });
170
+ });
171
+
172
+ describe("VAD, user transcript, reply lifecycle, agent transcript", () => {
173
+ test("speech_started/stopped routed to callbacks", async () => {
174
+ const { fake, cbs, ready } = startedTransport();
175
+ await ready;
176
+ fake.fire("message", {
177
+ data: JSON.stringify({ type: "input_audio_buffer.speech_started" }),
178
+ });
179
+ fake.fire("message", {
180
+ data: JSON.stringify({ type: "input_audio_buffer.speech_stopped" }),
181
+ });
182
+ expect(cbs.onSpeechStarted).toHaveBeenCalledTimes(1);
183
+ expect(cbs.onSpeechStopped).toHaveBeenCalledTimes(1);
184
+ });
185
+
186
+ test("user transcription completed routes to onUserTranscript", async () => {
187
+ const { fake, cbs, ready } = startedTransport();
188
+ await ready;
189
+ fake.fire("message", {
190
+ data: JSON.stringify({
191
+ type: "conversation.item.input_audio_transcription.completed",
192
+ transcript: "hello world",
193
+ }),
194
+ });
195
+ expect(cbs.onUserTranscript).toHaveBeenCalledWith("hello world");
196
+ });
197
+
198
+ test("response.created → onReplyStarted; response.done → onReplyDone", async () => {
199
+ const { fake, cbs, ready } = startedTransport();
200
+ await ready;
201
+ fake.fire("message", {
202
+ data: JSON.stringify({ type: "response.created", response: { id: "resp_1" } }),
203
+ });
204
+ expect(cbs.onReplyStarted).toHaveBeenCalledWith("resp_1");
205
+ fake.fire("message", { data: JSON.stringify({ type: "response.done" }) });
206
+ expect(cbs.onReplyDone).toHaveBeenCalledTimes(1);
207
+ });
208
+
209
+ test("agent transcript: deltas accumulated, emitted on done", async () => {
210
+ const { fake, cbs, ready } = startedTransport();
211
+ await ready;
212
+ const item_id = "item_x";
213
+ fake.fire("message", {
214
+ data: JSON.stringify({
215
+ type: "response.audio_transcript.delta",
216
+ item_id,
217
+ delta: "Hi ",
218
+ }),
219
+ });
220
+ fake.fire("message", {
221
+ data: JSON.stringify({
222
+ type: "response.audio_transcript.delta",
223
+ item_id,
224
+ delta: "there.",
225
+ }),
226
+ });
227
+ expect(cbs.onAgentTranscript).not.toHaveBeenCalled();
228
+ fake.fire("message", {
229
+ data: JSON.stringify({ type: "response.audio_transcript.done", item_id }),
230
+ });
231
+ expect(cbs.onAgentTranscript).toHaveBeenCalledWith("Hi there.", false);
232
+ });
233
+
234
+ test("agent transcript: done with no buffered deltas does not emit", async () => {
235
+ const { fake, cbs, ready } = startedTransport();
236
+ await ready;
237
+ fake.fire("message", {
238
+ data: JSON.stringify({
239
+ type: "response.audio_transcript.done",
240
+ item_id: "empty",
241
+ }),
242
+ });
243
+ expect(cbs.onAgentTranscript).not.toHaveBeenCalled();
244
+ });
245
+ });
246
+
247
+ describe("tool calls", () => {
248
+ test("function_call_arguments deltas accumulate; .done emits onToolCall", async () => {
249
+ const { fake, cbs, ready } = startedTransport();
250
+ await ready;
251
+ const item_id = "item_t";
252
+ fake.fire("message", {
253
+ data: JSON.stringify({
254
+ type: "response.output_item.added",
255
+ item: { id: item_id, type: "function_call", name: "lookup", call_id: "call_1" },
256
+ }),
257
+ });
258
+ fake.fire("message", {
259
+ data: JSON.stringify({
260
+ type: "response.function_call_arguments.delta",
261
+ item_id,
262
+ delta: '{"q":',
263
+ }),
264
+ });
265
+ fake.fire("message", {
266
+ data: JSON.stringify({
267
+ type: "response.function_call_arguments.delta",
268
+ item_id,
269
+ delta: '"hi"}',
270
+ }),
271
+ });
272
+ fake.fire("message", {
273
+ data: JSON.stringify({
274
+ type: "response.function_call_arguments.done",
275
+ item_id,
276
+ call_id: "call_1",
277
+ name: "lookup",
278
+ arguments: '{"q":"hi"}',
279
+ }),
280
+ });
281
+ expect(cbs.onToolCall).toHaveBeenCalledWith("call_1", "lookup", { q: "hi" });
282
+ });
283
+
284
+ test("done with empty/invalid args still calls onToolCall with {}", async () => {
285
+ const { fake, cbs, ready } = startedTransport();
286
+ await ready;
287
+ const item_id = "item_e";
288
+ fake.fire("message", {
289
+ data: JSON.stringify({
290
+ type: "response.output_item.added",
291
+ item: { id: item_id, type: "function_call", name: "noop", call_id: "call_e" },
292
+ }),
293
+ });
294
+ fake.fire("message", {
295
+ data: JSON.stringify({
296
+ type: "response.function_call_arguments.done",
297
+ item_id,
298
+ call_id: "call_e",
299
+ name: "noop",
300
+ arguments: "",
301
+ }),
302
+ });
303
+ expect(cbs.onToolCall).toHaveBeenCalledWith("call_e", "noop", {});
304
+ });
305
+
306
+ test("sendToolResult sends conversation.item.create + response.create", async () => {
307
+ const { fake, transport, ready } = startedTransport();
308
+ await ready;
309
+ fake.sent.length = 0; // drop session.update
310
+ transport.sendToolResult("call_1", '{"ok":true}');
311
+ expect(fake.sent.length).toBe(2);
312
+ const m1 = JSON.parse(fake.sent[0] ?? "{}");
313
+ expect(m1.type).toBe("conversation.item.create");
314
+ expect(m1.item.type).toBe("function_call_output");
315
+ expect(m1.item.call_id).toBe("call_1");
316
+ expect(m1.item.output).toBe('{"ok":true}');
317
+ const m2 = JSON.parse(fake.sent[1] ?? "{}");
318
+ expect(m2.type).toBe("response.create");
319
+ });
320
+ });
321
+
322
+ describe("cancel, error, close", () => {
323
+ test("cancelReply sends response.cancel only when a reply is in flight", async () => {
324
+ const { fake, transport, ready } = startedTransport();
325
+ await ready;
326
+ fake.sent.length = 0;
327
+ // No reply yet — cancel should be a no-op
328
+ transport.cancelReply();
329
+ expect(fake.sent.length).toBe(0);
330
+
331
+ fake.fire("message", {
332
+ data: JSON.stringify({ type: "response.created", response: { id: "r1" } }),
333
+ });
334
+ transport.cancelReply();
335
+ expect(fake.sent.length).toBe(1);
336
+ expect(JSON.parse(fake.sent[0] ?? "{}").type).toBe("response.cancel");
337
+ });
338
+
339
+ test("cancelReply also fires onCancelled", async () => {
340
+ const { fake, cbs, transport, ready } = startedTransport();
341
+ await ready;
342
+ fake.fire("message", {
343
+ data: JSON.stringify({ type: "response.created", response: { id: "r2" } }),
344
+ });
345
+ transport.cancelReply();
346
+ expect(cbs.onCancelled).toHaveBeenCalledTimes(1);
347
+ });
348
+
349
+ test("error event routes to onError with internal code", async () => {
350
+ const { fake, cbs, ready } = startedTransport();
351
+ await ready;
352
+ fake.fire("message", {
353
+ data: JSON.stringify({ type: "error", error: { message: "boom" } }),
354
+ });
355
+ expect(cbs.onError).toHaveBeenCalledWith("internal", "boom");
356
+ });
357
+
358
+ test("error event with missing message uses fallback", async () => {
359
+ const { fake, cbs, ready } = startedTransport();
360
+ await ready;
361
+ fake.fire("message", { data: JSON.stringify({ type: "error" }) });
362
+ expect(cbs.onError).toHaveBeenCalledWith("internal", expect.any(String));
363
+ });
364
+
365
+ test("unexpected close routes to onError with connection code", async () => {
366
+ const { fake, cbs, ready } = startedTransport();
367
+ await ready;
368
+ fake.fire("close", { code: 1006, reason: "" });
369
+ expect(cbs.onError).toHaveBeenCalledWith("connection", expect.stringMatching(/closed/i));
370
+ });
371
+ });
@@ -0,0 +1,319 @@
1
+ // Copyright 2026 the AAI authors. MIT license.
2
+ // OpenAI Realtime API transport — implements Transport.
3
+
4
+ import type { JSONSchema7 } from "json-schema";
5
+ import WsWebSocket from "ws";
6
+ import { WS_OPEN } from "../../sdk/constants.ts";
7
+ import type { OpenaiRealtimeOptions } from "../../sdk/providers/s2s/openai-realtime.ts";
8
+ import { base64ToUint8, uint8ToBase64 } from "../_base64.ts";
9
+ import type { Logger } from "../runtime-config.ts";
10
+ import { consoleLogger } from "../runtime-config.ts";
11
+ import type { Transport, TransportCallbacks, TransportSessionConfig } from "./types.ts";
12
+
13
+ const DEFAULT_MODEL = "gpt-realtime-2";
14
+ const DEFAULT_VOICE = "alloy";
15
+ const DEFAULT_URL = "wss://api.openai.com/v1/realtime";
16
+
17
+ export type OpenaiRealtimeWebSocket = {
18
+ readonly readyState: number;
19
+ send(data: string): void;
20
+ close(): void;
21
+ addEventListener(type: "open", fn: () => void): void;
22
+ addEventListener(type: "message", fn: (ev: { data: unknown }) => void): void;
23
+ addEventListener(type: "close", fn: (ev: { code?: number; reason?: string }) => void): void;
24
+ addEventListener(type: "error", fn: (ev: { message?: string }) => void): void;
25
+ };
26
+
27
+ export type CreateOpenaiRealtimeWebSocket = (
28
+ url: string,
29
+ opts: { headers: Record<string, string> },
30
+ ) => OpenaiRealtimeWebSocket;
31
+
32
+ // Node's native WebSocket doesn't support custom headers; the `ws` package does.
33
+ export const defaultCreateOpenaiRealtimeWebSocket: CreateOpenaiRealtimeWebSocket = (url, opts) =>
34
+ new WsWebSocket(url, { headers: opts.headers }) as unknown as OpenaiRealtimeWebSocket;
35
+
36
+ export type OpenaiRealtimeToolSchema = {
37
+ type: "function";
38
+ name: string;
39
+ description: string;
40
+ parameters: JSONSchema7;
41
+ };
42
+
43
+ export type OpenaiRealtimeTransportOptions = {
44
+ apiKey: string;
45
+ options: OpenaiRealtimeOptions;
46
+ sessionConfig: TransportSessionConfig;
47
+ toolSchemas: OpenaiRealtimeToolSchema[];
48
+ toolChoice: "auto" | "required";
49
+ callbacks: TransportCallbacks;
50
+ sid: string;
51
+ agent: string;
52
+ createWebSocket?: CreateOpenaiRealtimeWebSocket;
53
+ logger?: Logger;
54
+ };
55
+
56
+ export function createOpenaiRealtimeTransport(opts: OpenaiRealtimeTransportOptions): Transport {
57
+ const log = opts.logger ?? consoleLogger;
58
+ const createWs = opts.createWebSocket ?? defaultCreateOpenaiRealtimeWebSocket;
59
+ const model = opts.options.model ?? DEFAULT_MODEL;
60
+ const voice = opts.options.voice ?? DEFAULT_VOICE;
61
+ const baseUrl = opts.options.url ?? DEFAULT_URL;
62
+
63
+ let ws: OpenaiRealtimeWebSocket | null = null;
64
+ let closing = false;
65
+ const agentTranscriptBuffers = new Map<string, string>();
66
+ type ToolBuffer = { callId: string; name: string; argsBuffer: string };
67
+ const toolBuffers = new Map<string, ToolBuffer>();
68
+ let currentResponseId: string | null = null;
69
+
70
+ function send(payload: Record<string, unknown>): void {
71
+ if (!ws || ws.readyState !== WS_OPEN) {
72
+ log.debug("OpenAI Realtime send dropped: socket not open", { type: payload.type });
73
+ return;
74
+ }
75
+ ws.send(JSON.stringify(payload));
76
+ }
77
+
78
+ function sendSessionUpdate(): void {
79
+ send({
80
+ type: "session.update",
81
+ session: {
82
+ modalities: ["audio", "text"],
83
+ voice,
84
+ instructions: opts.sessionConfig.systemPrompt,
85
+ input_audio_format: "pcm16",
86
+ output_audio_format: "pcm16",
87
+ input_audio_transcription: { model: "whisper-1" },
88
+ turn_detection: { type: "server_vad" },
89
+ tools: opts.toolSchemas,
90
+ tool_choice: opts.toolChoice,
91
+ },
92
+ });
93
+ }
94
+
95
+ async function start(): Promise<void> {
96
+ const url = `${baseUrl}?model=${encodeURIComponent(model)}`;
97
+ log.info("OpenAI Realtime connecting", { url });
98
+ return new Promise((resolve, reject) => {
99
+ const sock = createWs(url, {
100
+ headers: {
101
+ Authorization: `Bearer ${opts.apiKey}`,
102
+ "OpenAI-Beta": "realtime=v1",
103
+ },
104
+ });
105
+ ws = sock;
106
+ let opened = false;
107
+
108
+ sock.addEventListener("open", () => {
109
+ opened = true;
110
+ sendSessionUpdate();
111
+ resolve();
112
+ });
113
+ sock.addEventListener("message", (ev) => handleMessage(ev.data));
114
+ sock.addEventListener("close", (ev) => handleClose(ev.code ?? 0, ev.reason ?? ""));
115
+ sock.addEventListener("error", (ev) => {
116
+ const msg = typeof ev.message === "string" ? ev.message : "WebSocket error";
117
+ if (!opened) {
118
+ reject(new Error(msg));
119
+ return;
120
+ }
121
+ if (closing) {
122
+ log.info("OpenAI Realtime error during close", { error: msg });
123
+ return;
124
+ }
125
+ opts.callbacks.onError("internal", msg);
126
+ });
127
+ });
128
+ }
129
+
130
+ function asString(v: unknown): string {
131
+ return typeof v === "string" ? v : "";
132
+ }
133
+
134
+ function handleAudioDelta(obj: Record<string, unknown>): void {
135
+ if (typeof obj.delta === "string") {
136
+ opts.callbacks.onAudioChunk(base64ToUint8(obj.delta));
137
+ }
138
+ }
139
+
140
+ function handleUserTranscript(obj: Record<string, unknown>): void {
141
+ if (typeof obj.transcript === "string") {
142
+ opts.callbacks.onUserTranscript(obj.transcript);
143
+ }
144
+ }
145
+
146
+ function handleResponseCreated(obj: Record<string, unknown>): void {
147
+ const resp = obj.response as { id?: unknown } | undefined;
148
+ const id = asString(resp?.id);
149
+ currentResponseId = id;
150
+ opts.callbacks.onReplyStarted(id);
151
+ }
152
+
153
+ function handleAgentTranscriptDelta(obj: Record<string, unknown>): void {
154
+ const id = asString(obj.item_id);
155
+ const delta = asString(obj.delta);
156
+ agentTranscriptBuffers.set(id, (agentTranscriptBuffers.get(id) ?? "") + delta);
157
+ }
158
+
159
+ function handleAgentTranscriptDone(obj: Record<string, unknown>): void {
160
+ const id = asString(obj.item_id);
161
+ const text = agentTranscriptBuffers.get(id) ?? "";
162
+ agentTranscriptBuffers.delete(id);
163
+ if (text) opts.callbacks.onAgentTranscript(text, false);
164
+ }
165
+
166
+ function clearTurnBuffers(): void {
167
+ agentTranscriptBuffers.clear();
168
+ toolBuffers.clear();
169
+ }
170
+
171
+ function handleResponseDone(): void {
172
+ currentResponseId = null;
173
+ clearTurnBuffers();
174
+ opts.callbacks.onReplyDone();
175
+ }
176
+
177
+ function handleErrorEvent(obj: Record<string, unknown>): void {
178
+ const err = obj.error as { message?: unknown } | undefined;
179
+ const message = typeof err?.message === "string" ? err.message : "OpenAI Realtime error";
180
+ clearTurnBuffers();
181
+ opts.callbacks.onError("internal", message);
182
+ }
183
+
184
+ function handleOutputItemAdded(obj: Record<string, unknown>): void {
185
+ const item = obj.item as
186
+ | { id?: string; type?: string; name?: string; call_id?: string }
187
+ | undefined;
188
+ if (item?.type !== "function_call" || !item.id) return;
189
+ toolBuffers.set(item.id, {
190
+ callId: item.call_id ?? "",
191
+ name: item.name ?? "",
192
+ argsBuffer: "",
193
+ });
194
+ }
195
+
196
+ function handleFunctionCallArgsDelta(obj: Record<string, unknown>): void {
197
+ const id = asString(obj.item_id);
198
+ const delta = asString(obj.delta);
199
+ const buf = toolBuffers.get(id);
200
+ if (buf) buf.argsBuffer += delta;
201
+ }
202
+
203
+ function parseToolArgs(argsStr: string, name: string, callId: string): Record<string, unknown> {
204
+ if (!argsStr) return {};
205
+ try {
206
+ const parsed = JSON.parse(argsStr);
207
+ if (parsed && typeof parsed === "object" && !Array.isArray(parsed)) {
208
+ return parsed as Record<string, unknown>;
209
+ }
210
+ } catch {
211
+ log.warn("OpenAI Realtime: invalid tool args JSON", { name, callId });
212
+ }
213
+ return {};
214
+ }
215
+
216
+ function handleFunctionCallArgsDone(obj: Record<string, unknown>): void {
217
+ const id = asString(obj.item_id);
218
+ const buf = toolBuffers.get(id);
219
+ toolBuffers.delete(id);
220
+ const callId = asString(obj.call_id) || (buf?.callId ?? "");
221
+ const name = asString(obj.name) || (buf?.name ?? "");
222
+ const argsStr = asString(obj.arguments) || (buf?.argsBuffer ?? "");
223
+ const args = parseToolArgs(argsStr, name, callId);
224
+ opts.callbacks.onToolCall(callId, name, args);
225
+ }
226
+
227
+ function handleMessage(data: unknown): void {
228
+ let raw: unknown;
229
+ try {
230
+ raw = JSON.parse(String(data));
231
+ } catch {
232
+ log.warn("OpenAI Realtime: invalid JSON");
233
+ return;
234
+ }
235
+ if (typeof raw !== "object" || raw === null) return;
236
+ const obj = raw as Record<string, unknown>;
237
+ switch (obj.type) {
238
+ case "response.audio.delta":
239
+ handleAudioDelta(obj);
240
+ return;
241
+ case "response.audio.done":
242
+ opts.callbacks.onAudioDone();
243
+ return;
244
+ case "input_audio_buffer.speech_started":
245
+ opts.callbacks.onSpeechStarted();
246
+ return;
247
+ case "input_audio_buffer.speech_stopped":
248
+ opts.callbacks.onSpeechStopped();
249
+ return;
250
+ case "conversation.item.input_audio_transcription.completed":
251
+ handleUserTranscript(obj);
252
+ return;
253
+ case "response.created":
254
+ handleResponseCreated(obj);
255
+ return;
256
+ case "response.audio_transcript.delta":
257
+ handleAgentTranscriptDelta(obj);
258
+ return;
259
+ case "response.audio_transcript.done":
260
+ handleAgentTranscriptDone(obj);
261
+ return;
262
+ case "response.done":
263
+ handleResponseDone();
264
+ return;
265
+ case "response.output_item.added":
266
+ handleOutputItemAdded(obj);
267
+ return;
268
+ case "response.function_call_arguments.delta":
269
+ handleFunctionCallArgsDelta(obj);
270
+ return;
271
+ case "response.function_call_arguments.done":
272
+ handleFunctionCallArgsDone(obj);
273
+ return;
274
+ case "error":
275
+ handleErrorEvent(obj);
276
+ return;
277
+ default:
278
+ return;
279
+ }
280
+ }
281
+
282
+ function handleClose(code: number, reason: string): void {
283
+ if (closing) {
284
+ log.info("OpenAI Realtime closed", { code, reason });
285
+ return;
286
+ }
287
+ log.warn("OpenAI Realtime closed unexpectedly", { code, reason });
288
+ opts.callbacks.onError("connection", `OpenAI Realtime closed (code=${code})`);
289
+ }
290
+
291
+ async function stop(): Promise<void> {
292
+ closing = true;
293
+ ws?.close();
294
+ ws = null;
295
+ }
296
+
297
+ return {
298
+ start,
299
+ stop,
300
+ sendUserAudio(bytes) {
301
+ if (!ws || ws.readyState !== WS_OPEN) return;
302
+ ws.send(`{"type":"input_audio_buffer.append","audio":"${uint8ToBase64(bytes)}"}`);
303
+ },
304
+ sendToolResult(callId, result) {
305
+ send({
306
+ type: "conversation.item.create",
307
+ item: { type: "function_call_output", call_id: callId, output: result },
308
+ });
309
+ send({ type: "response.create" });
310
+ },
311
+ cancelReply() {
312
+ if (currentResponseId === null) return;
313
+ send({ type: "response.cancel" });
314
+ currentResponseId = null;
315
+ clearTurnBuffers();
316
+ opts.callbacks.onCancelled();
317
+ },
318
+ };
319
+ }