@kodelyth/voice-call 2026.5.42 → 2026.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (111) hide show
  1. package/package.json +16 -4
  2. package/api.ts +0 -16
  3. package/cli-metadata.ts +0 -10
  4. package/config-api.ts +0 -12
  5. package/index.test.ts +0 -1075
  6. package/index.ts +0 -863
  7. package/runtime-api.ts +0 -20
  8. package/runtime-entry.ts +0 -1
  9. package/setup-api.ts +0 -47
  10. package/src/allowlist.test.ts +0 -18
  11. package/src/allowlist.ts +0 -19
  12. package/src/cli.test.ts +0 -12
  13. package/src/cli.ts +0 -866
  14. package/src/config-compat.test.ts +0 -130
  15. package/src/config-compat.ts +0 -227
  16. package/src/config.test.ts +0 -542
  17. package/src/config.ts +0 -883
  18. package/src/core-bridge.ts +0 -14
  19. package/src/deep-merge.test.ts +0 -40
  20. package/src/deep-merge.ts +0 -23
  21. package/src/gateway-continue-operation.ts +0 -200
  22. package/src/http-headers.test.ts +0 -16
  23. package/src/http-headers.ts +0 -15
  24. package/src/manager/context.ts +0 -50
  25. package/src/manager/events.test.ts +0 -578
  26. package/src/manager/events.ts +0 -332
  27. package/src/manager/lifecycle.ts +0 -53
  28. package/src/manager/lookup.test.ts +0 -52
  29. package/src/manager/lookup.ts +0 -35
  30. package/src/manager/outbound.test.ts +0 -629
  31. package/src/manager/outbound.ts +0 -508
  32. package/src/manager/state.ts +0 -48
  33. package/src/manager/store.ts +0 -107
  34. package/src/manager/timers.test.ts +0 -127
  35. package/src/manager/timers.ts +0 -113
  36. package/src/manager/twiml.test.ts +0 -13
  37. package/src/manager/twiml.ts +0 -17
  38. package/src/manager.closed-loop.test.ts +0 -259
  39. package/src/manager.inbound-allowlist.test.ts +0 -183
  40. package/src/manager.notify.test.ts +0 -390
  41. package/src/manager.restore.test.ts +0 -310
  42. package/src/manager.test-harness.ts +0 -127
  43. package/src/manager.ts +0 -441
  44. package/src/media-stream.test.ts +0 -953
  45. package/src/media-stream.ts +0 -876
  46. package/src/providers/base.ts +0 -99
  47. package/src/providers/mock.test.ts +0 -86
  48. package/src/providers/mock.ts +0 -185
  49. package/src/providers/plivo.test.ts +0 -93
  50. package/src/providers/plivo.ts +0 -601
  51. package/src/providers/shared/call-status.test.ts +0 -24
  52. package/src/providers/shared/call-status.ts +0 -24
  53. package/src/providers/shared/guarded-json-api.test.ts +0 -127
  54. package/src/providers/shared/guarded-json-api.ts +0 -49
  55. package/src/providers/telnyx.test.ts +0 -489
  56. package/src/providers/telnyx.ts +0 -419
  57. package/src/providers/twilio/api.test.ts +0 -184
  58. package/src/providers/twilio/api.ts +0 -100
  59. package/src/providers/twilio/twiml-policy.test.ts +0 -84
  60. package/src/providers/twilio/twiml-policy.ts +0 -87
  61. package/src/providers/twilio/webhook.ts +0 -34
  62. package/src/providers/twilio.test.ts +0 -607
  63. package/src/providers/twilio.ts +0 -861
  64. package/src/providers/twilio.types.ts +0 -17
  65. package/src/realtime-agent-context.test.ts +0 -101
  66. package/src/realtime-agent-context.ts +0 -149
  67. package/src/realtime-defaults.ts +0 -3
  68. package/src/realtime-fast-context.test.ts +0 -74
  69. package/src/realtime-fast-context.ts +0 -27
  70. package/src/realtime-transcription.runtime.ts +0 -4
  71. package/src/realtime-voice.runtime.ts +0 -5
  72. package/src/response-generator.test.ts +0 -385
  73. package/src/response-generator.ts +0 -348
  74. package/src/response-model.test.ts +0 -71
  75. package/src/response-model.ts +0 -23
  76. package/src/runtime.test.ts +0 -625
  77. package/src/runtime.ts +0 -528
  78. package/src/telephony-audio.test.ts +0 -61
  79. package/src/telephony-audio.ts +0 -12
  80. package/src/telephony-tts.test.ts +0 -196
  81. package/src/telephony-tts.ts +0 -235
  82. package/src/test-fixtures.ts +0 -82
  83. package/src/tts-provider-voice.test.ts +0 -34
  84. package/src/tts-provider-voice.ts +0 -21
  85. package/src/tunnel.test.ts +0 -173
  86. package/src/tunnel.ts +0 -314
  87. package/src/types.ts +0 -311
  88. package/src/utils.test.ts +0 -17
  89. package/src/utils.ts +0 -14
  90. package/src/voice-mapping.test.ts +0 -32
  91. package/src/voice-mapping.ts +0 -65
  92. package/src/webhook/realtime-audio-pacer.test.ts +0 -146
  93. package/src/webhook/realtime-audio-pacer.ts +0 -204
  94. package/src/webhook/realtime-handler.test.ts +0 -1450
  95. package/src/webhook/realtime-handler.ts +0 -1382
  96. package/src/webhook/stale-call-reaper.test.ts +0 -89
  97. package/src/webhook/stale-call-reaper.ts +0 -38
  98. package/src/webhook/stream-frame-adapter.test.ts +0 -187
  99. package/src/webhook/stream-frame-adapter.ts +0 -219
  100. package/src/webhook/tailscale.test.ts +0 -216
  101. package/src/webhook/tailscale.ts +0 -129
  102. package/src/webhook-exposure.test.ts +0 -33
  103. package/src/webhook-exposure.ts +0 -84
  104. package/src/webhook-security.test.ts +0 -813
  105. package/src/webhook-security.ts +0 -982
  106. package/src/webhook.hangup-once.lifecycle.test.ts +0 -179
  107. package/src/webhook.test.ts +0 -1615
  108. package/src/webhook.ts +0 -933
  109. package/src/webhook.types.ts +0 -5
  110. package/src/websocket-test-support.ts +0 -72
  111. package/tsconfig.json +0 -16
@@ -1,1450 +0,0 @@
1
- import http from "node:http";
2
- import type {
3
- RealtimeVoiceBridge,
4
- RealtimeVoiceProviderPlugin,
5
- RealtimeVoiceToolCallEvent,
6
- } from "klaw/plugin-sdk/realtime-voice";
7
- import { afterEach, describe, expect, it, vi } from "vitest";
8
- import { WebSocket } from "ws";
9
- import type { VoiceCallRealtimeConfig } from "../config.js";
10
- import type { CallManager } from "../manager.js";
11
- import type { VoiceCallProvider } from "../providers/base.js";
12
- import type { CallRecord, NormalizedEvent } from "../types.js";
13
- import { connectWs, startUpgradeWsServer, waitForClose } from "../websocket-test-support.js";
14
- import { RealtimeCallHandler } from "./realtime-handler.js";
15
-
16
- afterEach(() => {
17
- vi.useRealTimers();
18
- });
19
-
20
- function makeRequest(url: string, host = "gateway.ts.net"): http.IncomingMessage {
21
- const req = new http.IncomingMessage(null as never);
22
- req.url = url;
23
- req.method = "POST";
24
- req.headers = host ? { host } : {};
25
- return req;
26
- }
27
-
28
- function makeBridge(overrides: Partial<RealtimeVoiceBridge> = {}): RealtimeVoiceBridge {
29
- return {
30
- connect: async () => {},
31
- sendAudio: () => {},
32
- setMediaTimestamp: () => {},
33
- submitToolResult: vi.fn(),
34
- acknowledgeMark: () => {},
35
- close: () => {},
36
- isConnected: () => true,
37
- triggerGreeting: () => {},
38
- ...overrides,
39
- };
40
- }
41
-
42
- function makeRealtimeProvider(
43
- createBridge: RealtimeVoiceProviderPlugin["createBridge"],
44
- ): RealtimeVoiceProviderPlugin {
45
- return {
46
- id: "openai",
47
- label: "OpenAI",
48
- isConfigured: () => true,
49
- createBridge,
50
- };
51
- }
52
-
53
- function makeHandler(
54
- overrides?: Partial<VoiceCallRealtimeConfig>,
55
- deps?: {
56
- manager?: Partial<CallManager>;
57
- provider?: Partial<VoiceCallProvider>;
58
- realtimeProvider?: RealtimeVoiceProviderPlugin;
59
- },
60
- ) {
61
- const config: VoiceCallRealtimeConfig = {
62
- enabled: true,
63
- streamPath: overrides?.streamPath ?? "/voice/stream/realtime",
64
- instructions: overrides?.instructions ?? "Be helpful.",
65
- toolPolicy: overrides?.toolPolicy ?? "safe-read-only",
66
- consultPolicy: overrides?.consultPolicy ?? "auto",
67
- tools: overrides?.tools ?? [],
68
- fastContext: overrides?.fastContext ?? {
69
- enabled: false,
70
- timeoutMs: 800,
71
- maxResults: 3,
72
- sources: ["memory", "sessions"],
73
- fallbackToConsult: false,
74
- },
75
- agentContext: overrides?.agentContext ?? {
76
- enabled: false,
77
- maxChars: 6000,
78
- includeIdentity: true,
79
- includeSystemPrompt: true,
80
- includeWorkspaceFiles: true,
81
- files: ["SOUL.md", "IDENTITY.md", "USER.md"],
82
- },
83
- providers: overrides?.providers ?? {},
84
- ...(overrides?.provider ? { provider: overrides.provider } : {}),
85
- };
86
- return new RealtimeCallHandler(
87
- config,
88
- {
89
- processEvent: vi.fn(),
90
- getCall: vi.fn(),
91
- getCallByProviderCallId: vi.fn(),
92
- ...deps?.manager,
93
- } as unknown as CallManager,
94
- {
95
- name: "twilio",
96
- verifyWebhook: vi.fn(),
97
- parseWebhookEvent: vi.fn(),
98
- initiateCall: vi.fn(),
99
- hangupCall: vi.fn(),
100
- playTts: vi.fn(),
101
- startListening: vi.fn(),
102
- stopListening: vi.fn(),
103
- getCallStatus: vi.fn(),
104
- ...deps?.provider,
105
- } as unknown as VoiceCallProvider,
106
- deps?.realtimeProvider ?? makeRealtimeProvider(() => makeBridge()),
107
- { apiKey: "test-key" },
108
- "/voice/webhook",
109
- );
110
- }
111
-
112
- const startRealtimeServer = async (
113
- handler: RealtimeCallHandler,
114
- ): Promise<{
115
- url: string;
116
- close: () => Promise<void>;
117
- }> => {
118
- const payload = handler.buildTwiMLPayload(makeRequest("/voice/webhook"));
119
- const match = payload.body.match(/wss:\/\/[^/]+(\/[^"]+)/);
120
- if (!match) {
121
- throw new Error("Failed to extract realtime stream path");
122
- }
123
-
124
- return await startUpgradeWsServer({
125
- urlPath: match[1],
126
- onUpgrade: (request, socket, head) => {
127
- handler.handleWebSocketUpgrade(request, socket, head);
128
- },
129
- });
130
- };
131
-
132
- const startStreamSessionServer = async (
133
- handler: RealtimeCallHandler,
134
- streamUrl: string,
135
- ): Promise<{
136
- url: string;
137
- close: () => Promise<void>;
138
- }> => {
139
- return await startUpgradeWsServer({
140
- urlPath: new URL(streamUrl).pathname,
141
- onUpgrade: (request, socket, head) => {
142
- handler.handleWebSocketUpgrade(request, socket, head);
143
- },
144
- });
145
- };
146
-
147
- async function waitForRealtimeTest(
148
- callback: () => void | Promise<void>,
149
- options: { timeout?: number; interval?: number } = {},
150
- ) {
151
- await vi.waitFor(callback, { interval: 1, ...options });
152
- }
153
-
154
- function requireFirstMockCall(calls: readonly unknown[][], label: string): unknown[] {
155
- const call = calls.at(0);
156
- if (!call) {
157
- throw new Error(`expected ${label} call`);
158
- }
159
- return call;
160
- }
161
-
162
- describe("RealtimeCallHandler path routing", () => {
163
- it("uses the request host and stream path in TwiML", () => {
164
- const handler = makeHandler();
165
- const payload = handler.buildTwiMLPayload(makeRequest("/voice/webhook", "gateway.ts.net"));
166
-
167
- expect(payload.statusCode).toBe(200);
168
- expect(payload.body).toMatch(
169
- /wss:\/\/gateway\.ts\.net\/voice\/stream\/realtime\/[0-9a-f-]{36}/,
170
- );
171
- });
172
-
173
- it("preserves a public path prefix ahead of serve.path", () => {
174
- const handler = makeHandler({ streamPath: "/custom/stream/realtime" });
175
- handler.setPublicUrl("https://public.example/api/voice/webhook");
176
- const payload = handler.buildTwiMLPayload(makeRequest("/voice/webhook", "127.0.0.1:3334"));
177
-
178
- expect(handler.getStreamPathPattern()).toBe("/api/custom/stream/realtime");
179
- expect(payload.body).toMatch(
180
- /wss:\/\/public\.example\/api\/custom\/stream\/realtime\/[0-9a-f-]{36}/,
181
- );
182
- });
183
-
184
- it("normalizes Twilio outbound realtime directions", async () => {
185
- let callbacks:
186
- | {
187
- onReady?: () => void;
188
- }
189
- | undefined;
190
- const createBridge = vi.fn(
191
- (request: Parameters<RealtimeVoiceProviderPlugin["createBridge"]>[0]) => {
192
- callbacks = request;
193
- return makeBridge();
194
- },
195
- );
196
- const processEvent = vi.fn();
197
- const getCallByProviderCallId = vi.fn(
198
- (): CallRecord => ({
199
- callId: "call-1",
200
- providerCallId: "CA-outbound",
201
- provider: "twilio",
202
- direction: "outbound",
203
- state: "ringing",
204
- from: "+15550001234",
205
- to: "+15550009999",
206
- startedAt: Date.now(),
207
- transcript: [],
208
- processedEventIds: [],
209
- metadata: {},
210
- }),
211
- );
212
- const handler = makeHandler(undefined, {
213
- manager: {
214
- processEvent,
215
- getCallByProviderCallId,
216
- },
217
- realtimeProvider: makeRealtimeProvider(createBridge),
218
- });
219
- const payload = handler.buildTwiMLPayload(
220
- makeRequest("/voice/webhook"),
221
- new URLSearchParams({
222
- Direction: "outbound-dial",
223
- From: "+15550001234",
224
- To: "+15550009999",
225
- }),
226
- );
227
- const match = payload.body.match(/wss:\/\/[^/]+(\/[^"]+)/);
228
- if (!match) {
229
- throw new Error("Failed to extract realtime stream path");
230
- }
231
- const server = await startUpgradeWsServer({
232
- urlPath: match[1],
233
- onUpgrade: (request, socket, head) => {
234
- handler.handleWebSocketUpgrade(request, socket, head);
235
- },
236
- });
237
-
238
- try {
239
- const ws = await connectWs(server.url);
240
- try {
241
- ws.send(
242
- JSON.stringify({
243
- event: "start",
244
- start: { streamSid: "MZ-outbound", callSid: "CA-outbound" },
245
- }),
246
- );
247
- await waitForRealtimeTest(() => {
248
- expect(createBridge).toHaveBeenCalled();
249
- });
250
- callbacks?.onReady?.();
251
- const event = requireFirstMockCall(processEvent.mock.calls, "processed event")[0] as
252
- | NormalizedEvent
253
- | undefined;
254
- expect(event?.type).toBe("call.initiated");
255
- if (event?.type !== "call.initiated") {
256
- throw new Error("expected outbound realtime stream to emit call.initiated");
257
- }
258
- expect(event.direction).toBe("outbound");
259
- expect(event.from).toBe("+15550001234");
260
- expect(event.to).toBe("+15550009999");
261
- } finally {
262
- if (ws.readyState !== WebSocket.CLOSED && ws.readyState !== WebSocket.CLOSING) {
263
- ws.close();
264
- }
265
- }
266
- } finally {
267
- await server.close();
268
- }
269
- });
270
-
271
- it("joins Telnyx realtime streams to the token-bound call", async () => {
272
- const processEvent = vi.fn();
273
- const getCall = vi.fn(
274
- (): CallRecord => ({
275
- callId: "call-1",
276
- providerCallId: "v3:call-1",
277
- provider: "telnyx",
278
- direction: "inbound",
279
- state: "answered",
280
- from: "+15550001234",
281
- to: "+15550009999",
282
- startedAt: Date.now(),
283
- transcript: [],
284
- processedEventIds: [],
285
- metadata: { initialMessage: "hello" },
286
- }),
287
- );
288
- const createBridge = vi.fn(() => makeBridge());
289
- const handler = makeHandler(undefined, {
290
- manager: {
291
- processEvent,
292
- getCall,
293
- },
294
- provider: {
295
- name: "telnyx",
296
- },
297
- realtimeProvider: makeRealtimeProvider(createBridge),
298
- });
299
- handler.setPublicUrl("https://public.example/voice/webhook");
300
- const session = handler.issueStreamSession({
301
- providerName: "telnyx",
302
- callId: "call-1",
303
- from: "+15550001234",
304
- to: "+15550009999",
305
- direction: "inbound",
306
- });
307
- const server = await startStreamSessionServer(handler, session.streamUrl);
308
-
309
- try {
310
- const ws = await connectWs(server.url);
311
- try {
312
- ws.send(
313
- JSON.stringify({
314
- event: "start",
315
- stream_id: "stream-1",
316
- start: { call_control_id: "v3:call-1" },
317
- }),
318
- );
319
- await waitForRealtimeTest(() => {
320
- expect(createBridge).toHaveBeenCalled();
321
- });
322
-
323
- const eventTypes = processEvent.mock.calls.map(
324
- ([event]) => (event as NormalizedEvent).type,
325
- );
326
- expect(eventTypes).toEqual(["call.answered"]);
327
- expect((processEvent.mock.calls[0]?.[0] as NormalizedEvent | undefined)?.callId).toBe(
328
- "call-1",
329
- );
330
- } finally {
331
- if (ws.readyState !== WebSocket.CLOSED && ws.readyState !== WebSocket.CLOSING) {
332
- ws.close();
333
- }
334
- }
335
- } finally {
336
- await server.close();
337
- }
338
- });
339
-
340
- it("rejects Telnyx stream starts that do not match the token-bound call", async () => {
341
- const processEvent = vi.fn();
342
- const getCall = vi.fn(
343
- (): CallRecord => ({
344
- callId: "call-1",
345
- providerCallId: "v3:call-1",
346
- provider: "telnyx",
347
- direction: "inbound",
348
- state: "answered",
349
- from: "+15550001234",
350
- to: "+15550009999",
351
- startedAt: Date.now(),
352
- transcript: [],
353
- processedEventIds: [],
354
- metadata: {},
355
- }),
356
- );
357
- const createBridge = vi.fn(() => makeBridge());
358
- const handler = makeHandler(undefined, {
359
- manager: {
360
- processEvent,
361
- getCall,
362
- },
363
- provider: {
364
- name: "telnyx",
365
- },
366
- realtimeProvider: makeRealtimeProvider(createBridge),
367
- });
368
- handler.setPublicUrl("https://public.example/voice/webhook");
369
- const session = handler.issueStreamSession({
370
- providerName: "telnyx",
371
- callId: "call-1",
372
- direction: "inbound",
373
- });
374
- const server = await startStreamSessionServer(handler, session.streamUrl);
375
-
376
- try {
377
- const ws = await connectWs(server.url);
378
- ws.send(
379
- JSON.stringify({
380
- event: "start",
381
- stream_id: "stream-1",
382
- start: { call_control_id: "v3:other" },
383
- }),
384
- );
385
- const close = await waitForClose(ws);
386
-
387
- expect(close.code).toBe(1008);
388
- expect(createBridge).not.toHaveBeenCalled();
389
- expect(processEvent).not.toHaveBeenCalled();
390
- } finally {
391
- await server.close();
392
- }
393
- });
394
-
395
- it("does not emit an outbound realtime greeting without an initial message", async () => {
396
- let callbacks:
397
- | {
398
- onReady?: () => void;
399
- }
400
- | undefined;
401
- const triggerGreeting = vi.fn();
402
- const createBridge = vi.fn(
403
- (request: Parameters<RealtimeVoiceProviderPlugin["createBridge"]>[0]) => {
404
- callbacks = request;
405
- return makeBridge({ triggerGreeting });
406
- },
407
- );
408
- const getCallByProviderCallId = vi.fn(
409
- (): CallRecord => ({
410
- callId: "call-1",
411
- providerCallId: "CA-silent",
412
- provider: "twilio",
413
- direction: "outbound",
414
- state: "ringing",
415
- from: "+15550001234",
416
- to: "+15550009999",
417
- startedAt: Date.now(),
418
- transcript: [],
419
- processedEventIds: [],
420
- metadata: {},
421
- }),
422
- );
423
- const handler = makeHandler(undefined, {
424
- manager: {
425
- getCallByProviderCallId,
426
- },
427
- realtimeProvider: makeRealtimeProvider(createBridge),
428
- });
429
- const server = await startRealtimeServer(handler);
430
-
431
- try {
432
- const ws = await connectWs(server.url);
433
- try {
434
- ws.send(
435
- JSON.stringify({
436
- event: "start",
437
- start: { streamSid: "MZ-silent", callSid: "CA-silent" },
438
- }),
439
- );
440
- await waitForRealtimeTest(() => {
441
- expect(createBridge).toHaveBeenCalled();
442
- });
443
-
444
- callbacks?.onReady?.();
445
-
446
- expect(triggerGreeting).not.toHaveBeenCalled();
447
- } finally {
448
- if (ws.readyState !== WebSocket.CLOSED && ws.readyState !== WebSocket.CLOSING) {
449
- ws.close();
450
- }
451
- }
452
- } finally {
453
- await server.close();
454
- }
455
- });
456
-
457
- it("speaks through the active outbound realtime bridge by call id", async () => {
458
- const triggerGreeting = vi.fn();
459
- const createBridge = vi.fn(() => makeBridge({ triggerGreeting }));
460
- const getCallByProviderCallId = vi.fn(
461
- (): CallRecord => ({
462
- callId: "call-1",
463
- providerCallId: "CA-speak",
464
- provider: "twilio",
465
- direction: "outbound",
466
- state: "ringing",
467
- from: "+15550001234",
468
- to: "+15550009999",
469
- startedAt: Date.now(),
470
- transcript: [],
471
- processedEventIds: [],
472
- metadata: {},
473
- }),
474
- );
475
- const handler = makeHandler(undefined, {
476
- manager: {
477
- getCallByProviderCallId,
478
- },
479
- realtimeProvider: makeRealtimeProvider(createBridge),
480
- });
481
- const server = await startRealtimeServer(handler);
482
-
483
- try {
484
- const ws = await connectWs(server.url);
485
- try {
486
- ws.send(
487
- JSON.stringify({
488
- event: "start",
489
- start: { streamSid: "MZ-speak", callSid: "CA-speak" },
490
- }),
491
- );
492
- await waitForRealtimeTest(() => {
493
- expect(createBridge).toHaveBeenCalled();
494
- });
495
-
496
- expect(handler.speak("call-1", "Say exactly: hello from Meet.")).toEqual({
497
- success: true,
498
- });
499
- expect(triggerGreeting).toHaveBeenCalledWith("Say exactly: hello from Meet.");
500
- } finally {
501
- if (ws.readyState !== WebSocket.CLOSED && ws.readyState !== WebSocket.CLOSING) {
502
- ws.close();
503
- }
504
- }
505
- } finally {
506
- await server.close();
507
- }
508
- });
509
-
510
- it("ends realtime calls when the telephony stream stops", async () => {
511
- let callbacks:
512
- | {
513
- onClose?: (reason: "completed" | "error") => void;
514
- }
515
- | undefined;
516
- const processEvent = vi.fn();
517
- const createBridge = vi.fn(
518
- (request: Parameters<RealtimeVoiceProviderPlugin["createBridge"]>[0]) => {
519
- callbacks = request;
520
- return makeBridge({
521
- close: () => {
522
- callbacks?.onClose?.("completed");
523
- },
524
- });
525
- },
526
- );
527
- const getCallByProviderCallId = vi.fn(
528
- (): CallRecord => ({
529
- callId: "call-1",
530
- providerCallId: "CA-complete",
531
- provider: "twilio",
532
- direction: "inbound",
533
- state: "ringing",
534
- from: "+15550001234",
535
- to: "+15550009999",
536
- startedAt: Date.now(),
537
- transcript: [],
538
- processedEventIds: [],
539
- metadata: {},
540
- }),
541
- );
542
- const handler = makeHandler(undefined, {
543
- manager: {
544
- processEvent,
545
- getCallByProviderCallId,
546
- },
547
- realtimeProvider: makeRealtimeProvider(createBridge),
548
- });
549
- const server = await startRealtimeServer(handler);
550
-
551
- try {
552
- const ws = await connectWs(server.url);
553
- try {
554
- ws.send(
555
- JSON.stringify({
556
- event: "start",
557
- start: { streamSid: "MZ-complete", callSid: "CA-complete" },
558
- }),
559
- );
560
- await waitForRealtimeTest(() => {
561
- expect(createBridge).toHaveBeenCalled();
562
- });
563
-
564
- ws.send(JSON.stringify({ event: "stop" }));
565
-
566
- await waitForRealtimeTest(() => {
567
- const events = processEvent.mock.calls.map(([event]) => event as NormalizedEvent);
568
- const ended = events.find((event) => event.type === "call.ended");
569
- if (ended?.type !== "call.ended") {
570
- throw new Error("expected realtime stop to emit call.ended");
571
- }
572
- expect(ended.callId).toBe("call-1");
573
- expect(ended.providerCallId).toBe("CA-complete");
574
- expect(ended.reason).toBe("completed");
575
- });
576
- } finally {
577
- if (ws.readyState !== WebSocket.CLOSED && ws.readyState !== WebSocket.CLOSING) {
578
- ws.close();
579
- }
580
- }
581
- } finally {
582
- await server.close();
583
- }
584
- });
585
-
586
- it("records common Talk events for realtime telephony sessions", async () => {
587
- let callbacks:
588
- | {
589
- onAudio?: (audio: Buffer) => void;
590
- onEvent?: (event: {
591
- direction: "client" | "server";
592
- type: string;
593
- detail?: string;
594
- }) => void;
595
- onReady?: () => void;
596
- onTranscript?: (role: "user" | "assistant", text: string, isFinal: boolean) => void;
597
- }
598
- | undefined;
599
- const sendAudio = vi.fn();
600
- const call: CallRecord = {
601
- callId: "call-1",
602
- providerCallId: "CA-talk-events",
603
- provider: "twilio",
604
- direction: "inbound",
605
- state: "ringing",
606
- from: "+15550001234",
607
- to: "+15550009999",
608
- startedAt: Date.now(),
609
- transcript: [],
610
- processedEventIds: [],
611
- metadata: {},
612
- };
613
- const createBridge = vi.fn(
614
- (request: Parameters<RealtimeVoiceProviderPlugin["createBridge"]>[0]) => {
615
- callbacks = request;
616
- return makeBridge({ sendAudio });
617
- },
618
- );
619
- const handler = makeHandler(undefined, {
620
- manager: {
621
- getCallByProviderCallId: vi.fn((): CallRecord => call),
622
- },
623
- realtimeProvider: makeRealtimeProvider(createBridge),
624
- });
625
- const server = await startRealtimeServer(handler);
626
-
627
- try {
628
- const ws = await connectWs(server.url);
629
- try {
630
- ws.send(
631
- JSON.stringify({
632
- event: "start",
633
- start: { streamSid: "MZ-talk-events", callSid: "CA-talk-events" },
634
- }),
635
- );
636
- await waitForRealtimeTest(() => {
637
- expect(createBridge).toHaveBeenCalled();
638
- });
639
-
640
- callbacks?.onReady?.();
641
- ws.send(
642
- JSON.stringify({
643
- event: "media",
644
- media: { payload: Buffer.from([0xff, 0xff]).toString("base64") },
645
- }),
646
- );
647
- await waitForRealtimeTest(() => {
648
- expect(sendAudio).toHaveBeenCalledWith(Buffer.from([0xff, 0xff]));
649
- });
650
- callbacks?.onTranscript?.("user", "hello", true);
651
- callbacks?.onAudio?.(Buffer.from([1, 2, 3]));
652
- callbacks?.onTranscript?.("assistant", "hi there", true);
653
- callbacks?.onEvent?.({ direction: "server", type: "response.done" });
654
-
655
- const recent = call.metadata?.recentTalkEvents as
656
- | Array<{
657
- brain: string;
658
- provider: string;
659
- sessionId: string;
660
- transport: string;
661
- type: string;
662
- }>
663
- | undefined;
664
- expect(recent?.map((event) => event.type)).toEqual([
665
- "session.started",
666
- "session.ready",
667
- "turn.started",
668
- "input.audio.delta",
669
- "transcript.done",
670
- "input.audio.committed",
671
- "output.audio.started",
672
- "output.audio.delta",
673
- "output.text.done",
674
- "output.audio.done",
675
- "turn.ended",
676
- ]);
677
- expect(recent?.[0]?.provider).toBe("openai");
678
- expect(recent?.[0]?.sessionId).toBe("voice-call:call-1:realtime");
679
- expect(recent?.[0]?.transport).toBe("gateway-relay");
680
- expect(call.metadata?.lastTalkEventType).toBe("turn.ended");
681
- } finally {
682
- if (ws.readyState !== WebSocket.CLOSED && ws.readyState !== WebSocket.CLOSING) {
683
- ws.close();
684
- }
685
- }
686
- } finally {
687
- await server.close();
688
- }
689
- });
690
-
691
- it("emits barge-in cancellation with a turn before provider speech_started", async () => {
692
- let callbacks:
693
- | {
694
- onAudio?: (audio: Buffer) => void;
695
- }
696
- | undefined;
697
- const sendAudio = vi.fn();
698
- const call: CallRecord = {
699
- callId: "call-1",
700
- providerCallId: "CA-barge-in",
701
- provider: "twilio",
702
- direction: "inbound",
703
- state: "ringing",
704
- from: "+15550001234",
705
- to: "+15550009999",
706
- startedAt: Date.now(),
707
- transcript: [],
708
- processedEventIds: [],
709
- metadata: {},
710
- };
711
- const createBridge = vi.fn(
712
- (request: Parameters<RealtimeVoiceProviderPlugin["createBridge"]>[0]) => {
713
- callbacks = request;
714
- return makeBridge({ sendAudio });
715
- },
716
- );
717
- const handler = makeHandler(undefined, {
718
- manager: {
719
- getCallByProviderCallId: vi.fn((): CallRecord => call),
720
- },
721
- realtimeProvider: makeRealtimeProvider(createBridge),
722
- });
723
- const server = await startRealtimeServer(handler);
724
-
725
- try {
726
- const ws = await connectWs(server.url);
727
- try {
728
- ws.send(
729
- JSON.stringify({
730
- event: "start",
731
- start: { streamSid: "MZ-barge-in", callSid: "CA-barge-in" },
732
- }),
733
- );
734
- await waitForRealtimeTest(() => {
735
- expect(createBridge).toHaveBeenCalled();
736
- });
737
-
738
- callbacks?.onAudio?.(Buffer.from([1, 2, 3]));
739
- const speechPayload = Buffer.alloc(160, 0x00).toString("base64");
740
- ws.send(JSON.stringify({ event: "media", media: { payload: speechPayload } }));
741
- ws.send(JSON.stringify({ event: "media", media: { payload: speechPayload } }));
742
-
743
- await waitForRealtimeTest(() => {
744
- expect(sendAudio).toHaveBeenCalledTimes(2);
745
- });
746
-
747
- const recent = call.metadata?.recentTalkEvents as
748
- | Array<{
749
- turnId?: string;
750
- type: string;
751
- }>
752
- | undefined;
753
- const cancelled = recent?.find((event) => event.type === "turn.cancelled");
754
- if (!cancelled) {
755
- throw new Error("expected barge-in to cancel the active turn");
756
- }
757
- expect(cancelled.turnId).toMatch(/^turn-\d+$/);
758
- expect(recent?.findLast((event) => event.type === "input.audio.delta")?.turnId).not.toBe(
759
- cancelled.turnId,
760
- );
761
- } finally {
762
- if (ws.readyState !== WebSocket.CLOSED && ws.readyState !== WebSocket.CLOSING) {
763
- ws.close();
764
- }
765
- }
766
- } finally {
767
- await server.close();
768
- }
769
- });
770
-
771
- it("submits continuing responses only for realtime agent consult calls", async () => {
772
- let callbacks:
773
- | {
774
- onToolCall?: (event: {
775
- itemId: string;
776
- callId: string;
777
- name: string;
778
- args: unknown;
779
- }) => void;
780
- onTranscript?: (role: "user" | "assistant", text: string, isFinal: boolean) => void;
781
- }
782
- | undefined;
783
- let resolveConsult: ((value: unknown) => void) | undefined;
784
- let receivedPartialTranscript: string | undefined;
785
- const submitToolResult = vi.fn();
786
- const bridge = makeBridge({
787
- supportsToolResultContinuation: true,
788
- submitToolResult,
789
- });
790
- const createBridge = vi.fn(
791
- (request: Parameters<RealtimeVoiceProviderPlugin["createBridge"]>[0]) => {
792
- callbacks = request;
793
- return bridge;
794
- },
795
- );
796
- const getCallByProviderCallId = vi.fn(
797
- (): CallRecord => ({
798
- callId: "call-1",
799
- providerCallId: "CA-tool",
800
- provider: "twilio",
801
- direction: "inbound",
802
- state: "ringing",
803
- from: "+15550001234",
804
- to: "+15550009999",
805
- startedAt: Date.now(),
806
- transcript: [],
807
- processedEventIds: [],
808
- metadata: {},
809
- }),
810
- );
811
- const handler = makeHandler(undefined, {
812
- manager: {
813
- getCallByProviderCallId,
814
- },
815
- realtimeProvider: makeRealtimeProvider(createBridge),
816
- });
817
- handler.registerToolHandler("klaw_agent_consult", (_args, _callId, context) => {
818
- receivedPartialTranscript = context.partialUserTranscript;
819
- return new Promise((resolve) => {
820
- resolveConsult = resolve;
821
- });
822
- });
823
- handler.registerToolHandler("custom_lookup", async () => ({ ok: true }));
824
- const server = await startRealtimeServer(handler);
825
-
826
- try {
827
- const ws = await connectWs(server.url);
828
- try {
829
- ws.send(
830
- JSON.stringify({
831
- event: "start",
832
- start: { streamSid: "MZ-tool", callSid: "CA-tool" },
833
- }),
834
- );
835
- await waitForRealtimeTest(() => {
836
- expect(createBridge).toHaveBeenCalled();
837
- });
838
-
839
- vi.useFakeTimers();
840
- callbacks?.onTranscript?.("user", "Are the basement", false);
841
- callbacks?.onToolCall?.({
842
- itemId: "item-1",
843
- callId: "consult-call",
844
- name: "klaw_agent_consult",
845
- args: { question: "Are the basement lights on?" },
846
- });
847
- await vi.advanceTimersByTimeAsync(350);
848
- await waitForRealtimeTest(() => {
849
- expect(receivedPartialTranscript).toBe("Are the basement");
850
- });
851
-
852
- await waitForRealtimeTest(() => {
853
- const workingCall = submitToolResult.mock.calls.find(
854
- ([callId]) => callId === "consult-call",
855
- );
856
- if (!workingCall) {
857
- throw new Error("expected consult-call tool result");
858
- }
859
- const payload = workingCall[1] as Record<string, unknown> | undefined;
860
- expect(payload?.status).toBe("working");
861
- expect(payload?.tool).toBe("klaw_agent_consult");
862
- expect(typeof payload?.message).toBe("string");
863
- expect(workingCall[2]).toEqual({ willContinue: true });
864
- });
865
- expect(submitToolResult).toHaveBeenCalledTimes(1);
866
-
867
- resolveConsult?.({ text: "The basement lights are on." });
868
-
869
- await waitForRealtimeTest(() => {
870
- expect(submitToolResult).toHaveBeenLastCalledWith(
871
- "consult-call",
872
- {
873
- text: "The basement lights are on.",
874
- },
875
- undefined,
876
- );
877
- });
878
-
879
- submitToolResult.mockClear();
880
- callbacks?.onToolCall?.({
881
- itemId: "item-2",
882
- callId: "custom-call",
883
- name: "custom_lookup",
884
- args: {},
885
- });
886
-
887
- await waitForRealtimeTest(() => {
888
- expect(submitToolResult).toHaveBeenCalledWith("custom-call", { ok: true }, undefined);
889
- });
890
- const customCallResults = submitToolResult.mock.calls.filter(
891
- ([callId]) => callId === "custom-call",
892
- );
893
- expect(customCallResults).toHaveLength(1);
894
- expect(customCallResults[0]?.[2]).toBeUndefined();
895
- } finally {
896
- vi.useRealTimers();
897
- if (ws.readyState !== WebSocket.CLOSED && ws.readyState !== WebSocket.CLOSING) {
898
- ws.close();
899
- }
900
- }
901
- } finally {
902
- await server.close();
903
- }
904
- });
905
-
906
- it("forces an agent consult from final user transcript when consult policy is always", async () => {
907
- let callbacks:
908
- | {
909
- onTranscript?: (role: "user" | "assistant", text: string, isFinal: boolean) => void;
910
- }
911
- | undefined;
912
- const sendUserMessage = vi.fn();
913
- const bridge = makeBridge({ sendUserMessage });
914
- const createBridge = vi.fn(
915
- (request: Parameters<RealtimeVoiceProviderPlugin["createBridge"]>[0]) => {
916
- callbacks = request;
917
- return bridge;
918
- },
919
- );
920
- const handler = makeHandler(
921
- { consultPolicy: "always" },
922
- {
923
- manager: {
924
- getCallByProviderCallId: vi.fn(
925
- (): CallRecord => ({
926
- callId: "call-1",
927
- providerCallId: "CA-force",
928
- provider: "twilio",
929
- direction: "inbound",
930
- state: "ringing",
931
- from: "+15550001234",
932
- to: "+15550009999",
933
- startedAt: Date.now(),
934
- transcript: [],
935
- processedEventIds: [],
936
- metadata: {},
937
- }),
938
- ),
939
- },
940
- realtimeProvider: makeRealtimeProvider(createBridge),
941
- },
942
- );
943
- const consult = vi.fn<
944
- (args: unknown, callId: string, context: Record<string, unknown>) => Promise<{ text: string }>
945
- >(async () => ({ text: "I created the smoke test file." }));
946
- handler.registerToolHandler("klaw_agent_consult", consult);
947
- const server = await startRealtimeServer(handler);
948
-
949
- try {
950
- const ws = await connectWs(server.url);
951
- try {
952
- ws.send(
953
- JSON.stringify({
954
- event: "start",
955
- start: { streamSid: "MZ-force", callSid: "CA-force" },
956
- }),
957
- );
958
- await waitForRealtimeTest(() => {
959
- expect(createBridge).toHaveBeenCalled();
960
- });
961
-
962
- vi.useFakeTimers();
963
- callbacks?.onTranscript?.("user", "Create a smoke test file for me.", true);
964
- await vi.advanceTimersByTimeAsync(200);
965
-
966
- await waitForRealtimeTest(() => {
967
- expect(consult).toHaveBeenCalledTimes(1);
968
- });
969
- const [args, callId, context] = requireFirstMockCall(consult.mock.calls, "consult");
970
- expect(args).toEqual({
971
- question: "Create a smoke test file for me.",
972
- context:
973
- "The realtime provider produced a final user transcript without invoking klaw_agent_consult, so Klaw is forcing the consult because consultPolicy is always.",
974
- });
975
- expect(callId).toBe("call-1");
976
- expect(context).toEqual({});
977
- await waitForRealtimeTest(() => {
978
- expect(sendUserMessage).toHaveBeenCalledTimes(1);
979
- expect(requireFirstMockCall(sendUserMessage.mock.calls, "user message")).toEqual([
980
- "Internal Klaw consult result is ready.\nDo not call tools for this internal result.\nSpeak the following answer to the caller now, briefly and naturally:\nI created the smoke test file.",
981
- ]);
982
- });
983
- } finally {
984
- vi.useRealTimers();
985
- if (ws.readyState !== WebSocket.CLOSED && ws.readyState !== WebSocket.CLOSING) {
986
- ws.close();
987
- }
988
- }
989
- } finally {
990
- await server.close();
991
- }
992
- });
993
-
994
- it("does not carry a final transcript into the next direct voice turn", async () => {
995
- let callbacks:
996
- | {
997
- onTranscript?: (role: "user" | "assistant", text: string, isFinal: boolean) => void;
998
- }
999
- | undefined;
1000
- const processEvent = vi.fn();
1001
- const createBridge = vi.fn(
1002
- (request: Parameters<RealtimeVoiceProviderPlugin["createBridge"]>[0]) => {
1003
- callbacks = request;
1004
- return makeBridge();
1005
- },
1006
- );
1007
- const handler = makeHandler(undefined, {
1008
- manager: {
1009
- processEvent,
1010
- getCallByProviderCallId: vi.fn(
1011
- (): CallRecord => ({
1012
- callId: "call-1",
1013
- providerCallId: "CA-direct-turns",
1014
- provider: "twilio",
1015
- direction: "inbound",
1016
- state: "ringing",
1017
- from: "+15550001234",
1018
- to: "+15550009999",
1019
- startedAt: Date.now(),
1020
- transcript: [],
1021
- processedEventIds: [],
1022
- metadata: {},
1023
- }),
1024
- ),
1025
- },
1026
- realtimeProvider: makeRealtimeProvider(createBridge),
1027
- });
1028
- const server = await startRealtimeServer(handler);
1029
-
1030
- try {
1031
- const ws = await connectWs(server.url);
1032
- try {
1033
- ws.send(
1034
- JSON.stringify({
1035
- event: "start",
1036
- start: { streamSid: "MZ-direct-turns", callSid: "CA-direct-turns" },
1037
- }),
1038
- );
1039
- await waitForRealtimeTest(() => {
1040
- expect(createBridge).toHaveBeenCalled();
1041
- });
1042
-
1043
- callbacks?.onTranscript?.("user", "Hello there.", true);
1044
- callbacks?.onTranscript?.("user", "How are you?", true);
1045
-
1046
- const speechTranscripts = processEvent.mock.calls
1047
- .map(([event]) => event as NormalizedEvent)
1048
- .filter(
1049
- (event): event is Extract<NormalizedEvent, { type: "call.speech" }> =>
1050
- event.type === "call.speech",
1051
- )
1052
- .map((event) => event.transcript);
1053
- expect(speechTranscripts).toContain("Hello there.");
1054
- expect(speechTranscripts).toContain("How are you?");
1055
- expect(speechTranscripts).not.toContain("Hello there. How are you?");
1056
- } finally {
1057
- if (ws.readyState !== WebSocket.CLOSED && ws.readyState !== WebSocket.CLOSING) {
1058
- ws.close();
1059
- }
1060
- }
1061
- } finally {
1062
- await server.close();
1063
- }
1064
- });
1065
-
1066
- it("waits for partial transcript fragments to settle before consulting", async () => {
1067
- let callbacks:
1068
- | {
1069
- onToolCall?: (event: RealtimeVoiceToolCallEvent) => void;
1070
- onTranscript?: (role: "user" | "assistant", text: string, isFinal: boolean) => void;
1071
- }
1072
- | undefined;
1073
- const submitToolResult = vi.fn();
1074
- const bridge = makeBridge({
1075
- supportsToolResultContinuation: true,
1076
- submitToolResult,
1077
- });
1078
- const createBridge = vi.fn(
1079
- (request: Parameters<RealtimeVoiceProviderPlugin["createBridge"]>[0]) => {
1080
- callbacks = request;
1081
- return bridge;
1082
- },
1083
- );
1084
- const handler = makeHandler(undefined, {
1085
- manager: {
1086
- getCallByProviderCallId: vi.fn(
1087
- (): CallRecord => ({
1088
- callId: "call-1",
1089
- providerCallId: "CA-settle",
1090
- provider: "twilio",
1091
- direction: "inbound",
1092
- state: "ringing",
1093
- from: "+15550001234",
1094
- to: "+15550009999",
1095
- startedAt: Date.now(),
1096
- transcript: [],
1097
- processedEventIds: [],
1098
- metadata: {},
1099
- }),
1100
- ),
1101
- },
1102
- realtimeProvider: makeRealtimeProvider(createBridge),
1103
- });
1104
- const consult = vi.fn<
1105
- (args: unknown, callId: string, context: Record<string, unknown>) => Promise<{ text: string }>
1106
- >(async () => ({ text: "I sent it." }));
1107
- handler.registerToolHandler("klaw_agent_consult", consult);
1108
- const server = await startRealtimeServer(handler);
1109
-
1110
- try {
1111
- const ws = await connectWs(server.url);
1112
- try {
1113
- ws.send(
1114
- JSON.stringify({
1115
- event: "start",
1116
- start: { streamSid: "MZ-settle", callSid: "CA-settle" },
1117
- }),
1118
- );
1119
- await waitForRealtimeTest(() => {
1120
- expect(createBridge).toHaveBeenCalled();
1121
- });
1122
-
1123
- vi.useFakeTimers();
1124
- callbacks?.onTranscript?.("user", "Send a Discord", false);
1125
- callbacks?.onToolCall?.({
1126
- itemId: "item-1",
1127
- callId: "consult-call",
1128
- name: "klaw_agent_consult",
1129
- args: { question: "message" },
1130
- });
1131
- await vi.advanceTimersByTimeAsync(50);
1132
- callbacks?.onTranscript?.("user", "message.", false);
1133
- await vi.advanceTimersByTimeAsync(350);
1134
-
1135
- await waitForRealtimeTest(
1136
- () => {
1137
- expect(consult).toHaveBeenCalledTimes(1);
1138
- },
1139
- { timeout: 2_000 },
1140
- );
1141
- const [args, callId, context] = requireFirstMockCall(consult.mock.calls, "consult");
1142
- const consultArgs = args as { question?: string; context?: string } | undefined;
1143
- expect(consultArgs?.question).toBe("Send a Discord message.");
1144
- expect(consultArgs?.context).toBe(
1145
- "Realtime provider supplied a shorter consult question: message",
1146
- );
1147
- expect(callId).toBe("call-1");
1148
- expect(context).toEqual({ partialUserTranscript: "Send a Discord message." });
1149
- await waitForRealtimeTest(() => {
1150
- expect(submitToolResult).toHaveBeenLastCalledWith(
1151
- "consult-call",
1152
- { text: "I sent it." },
1153
- undefined,
1154
- );
1155
- });
1156
- } finally {
1157
- vi.useRealTimers();
1158
- if (ws.readyState !== WebSocket.CLOSED && ws.readyState !== WebSocket.CLOSING) {
1159
- ws.close();
1160
- }
1161
- }
1162
- } finally {
1163
- await server.close();
1164
- }
1165
- });
1166
-
1167
- it("does not force a duplicate consult when the realtime provider calls the consult tool", async () => {
1168
- let callbacks:
1169
- | {
1170
- onToolCall?: (event: RealtimeVoiceToolCallEvent) => void;
1171
- onTranscript?: (role: "user" | "assistant", text: string, isFinal: boolean) => void;
1172
- }
1173
- | undefined;
1174
- const submitToolResult = vi.fn();
1175
- const bridge = makeBridge({
1176
- supportsToolResultContinuation: true,
1177
- submitToolResult,
1178
- });
1179
- const createBridge = vi.fn(
1180
- (request: Parameters<RealtimeVoiceProviderPlugin["createBridge"]>[0]) => {
1181
- callbacks = request;
1182
- return bridge;
1183
- },
1184
- );
1185
- const handler = makeHandler(
1186
- { consultPolicy: "always" },
1187
- {
1188
- manager: {
1189
- getCallByProviderCallId: vi.fn(
1190
- (): CallRecord => ({
1191
- callId: "call-1",
1192
- providerCallId: "CA-native",
1193
- provider: "twilio",
1194
- direction: "inbound",
1195
- state: "ringing",
1196
- from: "+15550001234",
1197
- to: "+15550009999",
1198
- startedAt: Date.now(),
1199
- transcript: [],
1200
- processedEventIds: [],
1201
- metadata: {},
1202
- }),
1203
- ),
1204
- },
1205
- realtimeProvider: makeRealtimeProvider(createBridge),
1206
- },
1207
- );
1208
- const consult = vi.fn(async () => ({ text: "Native consult result." }));
1209
- handler.registerToolHandler("klaw_agent_consult", consult);
1210
- const server = await startRealtimeServer(handler);
1211
-
1212
- try {
1213
- const ws = await connectWs(server.url);
1214
- try {
1215
- ws.send(
1216
- JSON.stringify({
1217
- event: "start",
1218
- start: { streamSid: "MZ-native", callSid: "CA-native" },
1219
- }),
1220
- );
1221
- await waitForRealtimeTest(() => {
1222
- expect(createBridge).toHaveBeenCalled();
1223
- });
1224
-
1225
- vi.useFakeTimers();
1226
- callbacks?.onTranscript?.("user", "Send me a Discord message.", true);
1227
- callbacks?.onToolCall?.({
1228
- itemId: "item-1",
1229
- callId: "consult-call",
1230
- name: "klaw_agent_consult",
1231
- args: { question: "Send me a Discord message." },
1232
- });
1233
-
1234
- await waitForRealtimeTest(() => {
1235
- expect(submitToolResult).toHaveBeenLastCalledWith(
1236
- "consult-call",
1237
- { text: "Native consult result." },
1238
- undefined,
1239
- );
1240
- });
1241
- await vi.advanceTimersByTimeAsync(250);
1242
- expect(consult).toHaveBeenCalledTimes(1);
1243
- } finally {
1244
- vi.useRealTimers();
1245
- if (ws.readyState !== WebSocket.CLOSED && ws.readyState !== WebSocket.CLOSING) {
1246
- ws.close();
1247
- }
1248
- }
1249
- } finally {
1250
- await server.close();
1251
- }
1252
- });
1253
-
1254
- it("does not submit an interim checking result when fast context is enabled", async () => {
1255
- let callbacks:
1256
- | {
1257
- onToolCall?: (event: RealtimeVoiceToolCallEvent) => void;
1258
- }
1259
- | undefined;
1260
- const submitToolResult = vi.fn();
1261
- const bridge = makeBridge({
1262
- supportsToolResultContinuation: true,
1263
- submitToolResult,
1264
- });
1265
- const createBridge = vi.fn(
1266
- (request: Parameters<RealtimeVoiceProviderPlugin["createBridge"]>[0]) => {
1267
- callbacks = request;
1268
- return bridge;
1269
- },
1270
- );
1271
- const handler = makeHandler(
1272
- {
1273
- fastContext: {
1274
- enabled: true,
1275
- timeoutMs: 800,
1276
- maxResults: 3,
1277
- sources: ["memory", "sessions"],
1278
- fallbackToConsult: false,
1279
- },
1280
- },
1281
- {
1282
- manager: {
1283
- getCallByProviderCallId: vi.fn(
1284
- (): CallRecord => ({
1285
- callId: "call-1",
1286
- providerCallId: "CA-fast",
1287
- provider: "twilio",
1288
- direction: "inbound",
1289
- state: "ringing",
1290
- from: "+15550001234",
1291
- to: "+15550009999",
1292
- startedAt: Date.now(),
1293
- transcript: [],
1294
- processedEventIds: [],
1295
- metadata: {},
1296
- }),
1297
- ),
1298
- },
1299
- realtimeProvider: makeRealtimeProvider(createBridge),
1300
- },
1301
- );
1302
- handler.registerToolHandler("klaw_agent_consult", async () => ({ text: "Fast context." }));
1303
- const server = await startRealtimeServer(handler);
1304
-
1305
- try {
1306
- const ws = await connectWs(server.url);
1307
- try {
1308
- ws.send(
1309
- JSON.stringify({
1310
- event: "start",
1311
- start: { streamSid: "MZ-fast", callSid: "CA-fast" },
1312
- }),
1313
- );
1314
- await waitForRealtimeTest(() => {
1315
- expect(createBridge).toHaveBeenCalled();
1316
- });
1317
-
1318
- callbacks?.onToolCall?.({
1319
- itemId: "item-1",
1320
- callId: "consult-call",
1321
- name: "klaw_agent_consult",
1322
- args: { question: "What do you remember?" },
1323
- });
1324
-
1325
- await waitForRealtimeTest(() => {
1326
- expect(submitToolResult).toHaveBeenCalledWith(
1327
- "consult-call",
1328
- { text: "Fast context." },
1329
- undefined,
1330
- );
1331
- });
1332
- expect(submitToolResult).toHaveBeenCalledTimes(1);
1333
- } finally {
1334
- if (ws.readyState !== WebSocket.CLOSED && ws.readyState !== WebSocket.CLOSING) {
1335
- ws.close();
1336
- }
1337
- }
1338
- } finally {
1339
- await server.close();
1340
- }
1341
- });
1342
- });
1343
-
1344
- describe("RealtimeCallHandler websocket hardening", () => {
1345
- it("closes realtime streams when paced outbound audio exceeds the internal queue cap", async () => {
1346
- let sendProviderAudio: ((audio: Buffer) => void) | undefined;
1347
- const createBridge = vi.fn(
1348
- (request: Parameters<RealtimeVoiceProviderPlugin["createBridge"]>[0]) => {
1349
- sendProviderAudio = request.onAudio;
1350
- return makeBridge();
1351
- },
1352
- );
1353
- const handler = makeHandler(undefined, {
1354
- manager: {
1355
- getCallByProviderCallId: vi.fn(
1356
- (): CallRecord => ({
1357
- callId: "call-1",
1358
- providerCallId: "CA-backpressure",
1359
- provider: "twilio",
1360
- direction: "inbound",
1361
- state: "ringing",
1362
- from: "+15550001234",
1363
- to: "+15550009999",
1364
- startedAt: Date.now(),
1365
- transcript: [],
1366
- processedEventIds: [],
1367
- metadata: {},
1368
- }),
1369
- ),
1370
- },
1371
- realtimeProvider: makeRealtimeProvider(createBridge),
1372
- });
1373
- const server = await startRealtimeServer(handler);
1374
-
1375
- try {
1376
- const ws = await connectWs(server.url);
1377
- try {
1378
- ws.send(
1379
- JSON.stringify({
1380
- event: "start",
1381
- start: { streamSid: "MZ-backpressure", callSid: "CA-backpressure" },
1382
- }),
1383
- );
1384
- await waitForRealtimeTest(() => {
1385
- if (!sendProviderAudio) {
1386
- throw new Error("expected realtime provider audio sender");
1387
- }
1388
- });
1389
-
1390
- const providerAudioSender = sendProviderAudio;
1391
- if (!providerAudioSender) {
1392
- throw new Error("expected realtime provider audio sender");
1393
- }
1394
- providerAudioSender(Buffer.alloc(8_000 * 121, 0x7f));
1395
- const closed = await waitForClose(ws);
1396
-
1397
- expect(closed.code).toBe(1013);
1398
- } finally {
1399
- if (ws.readyState !== WebSocket.CLOSED && ws.readyState !== WebSocket.CLOSING) {
1400
- ws.close();
1401
- }
1402
- }
1403
- } finally {
1404
- await server.close();
1405
- }
1406
- });
1407
-
1408
- it("rejects oversized pre-start frames before bridge setup", async () => {
1409
- const createBridge = vi.fn(() => makeBridge());
1410
- const processEvent = vi.fn();
1411
- const getCallByProviderCallId = vi.fn();
1412
- const handler = makeHandler(undefined, {
1413
- manager: {
1414
- processEvent,
1415
- getCallByProviderCallId,
1416
- },
1417
- realtimeProvider: makeRealtimeProvider(createBridge),
1418
- });
1419
- const server = await startRealtimeServer(handler);
1420
-
1421
- try {
1422
- const ws = await connectWs(server.url);
1423
- try {
1424
- ws.send(
1425
- JSON.stringify({
1426
- event: "start",
1427
- start: {
1428
- streamSid: "MZ-oversized",
1429
- callSid: "CA-oversized",
1430
- padding: "A".repeat(300 * 1024),
1431
- },
1432
- }),
1433
- );
1434
-
1435
- const closed = await waitForClose(ws);
1436
-
1437
- expect(closed.code).toBe(1009);
1438
- expect(createBridge).not.toHaveBeenCalled();
1439
- expect(processEvent).not.toHaveBeenCalled();
1440
- expect(getCallByProviderCallId).not.toHaveBeenCalled();
1441
- } finally {
1442
- if (ws.readyState !== WebSocket.CLOSED && ws.readyState !== WebSocket.CLOSING) {
1443
- ws.close();
1444
- }
1445
- }
1446
- } finally {
1447
- await server.close();
1448
- }
1449
- });
1450
- });