@kodelyth/voice-call 2026.5.42 → 2026.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (111) hide show
  1. package/package.json +16 -4
  2. package/api.ts +0 -16
  3. package/cli-metadata.ts +0 -10
  4. package/config-api.ts +0 -12
  5. package/index.test.ts +0 -1075
  6. package/index.ts +0 -863
  7. package/runtime-api.ts +0 -20
  8. package/runtime-entry.ts +0 -1
  9. package/setup-api.ts +0 -47
  10. package/src/allowlist.test.ts +0 -18
  11. package/src/allowlist.ts +0 -19
  12. package/src/cli.test.ts +0 -12
  13. package/src/cli.ts +0 -866
  14. package/src/config-compat.test.ts +0 -130
  15. package/src/config-compat.ts +0 -227
  16. package/src/config.test.ts +0 -542
  17. package/src/config.ts +0 -883
  18. package/src/core-bridge.ts +0 -14
  19. package/src/deep-merge.test.ts +0 -40
  20. package/src/deep-merge.ts +0 -23
  21. package/src/gateway-continue-operation.ts +0 -200
  22. package/src/http-headers.test.ts +0 -16
  23. package/src/http-headers.ts +0 -15
  24. package/src/manager/context.ts +0 -50
  25. package/src/manager/events.test.ts +0 -578
  26. package/src/manager/events.ts +0 -332
  27. package/src/manager/lifecycle.ts +0 -53
  28. package/src/manager/lookup.test.ts +0 -52
  29. package/src/manager/lookup.ts +0 -35
  30. package/src/manager/outbound.test.ts +0 -629
  31. package/src/manager/outbound.ts +0 -508
  32. package/src/manager/state.ts +0 -48
  33. package/src/manager/store.ts +0 -107
  34. package/src/manager/timers.test.ts +0 -127
  35. package/src/manager/timers.ts +0 -113
  36. package/src/manager/twiml.test.ts +0 -13
  37. package/src/manager/twiml.ts +0 -17
  38. package/src/manager.closed-loop.test.ts +0 -259
  39. package/src/manager.inbound-allowlist.test.ts +0 -183
  40. package/src/manager.notify.test.ts +0 -390
  41. package/src/manager.restore.test.ts +0 -310
  42. package/src/manager.test-harness.ts +0 -127
  43. package/src/manager.ts +0 -441
  44. package/src/media-stream.test.ts +0 -953
  45. package/src/media-stream.ts +0 -876
  46. package/src/providers/base.ts +0 -99
  47. package/src/providers/mock.test.ts +0 -86
  48. package/src/providers/mock.ts +0 -185
  49. package/src/providers/plivo.test.ts +0 -93
  50. package/src/providers/plivo.ts +0 -601
  51. package/src/providers/shared/call-status.test.ts +0 -24
  52. package/src/providers/shared/call-status.ts +0 -24
  53. package/src/providers/shared/guarded-json-api.test.ts +0 -127
  54. package/src/providers/shared/guarded-json-api.ts +0 -49
  55. package/src/providers/telnyx.test.ts +0 -489
  56. package/src/providers/telnyx.ts +0 -419
  57. package/src/providers/twilio/api.test.ts +0 -184
  58. package/src/providers/twilio/api.ts +0 -100
  59. package/src/providers/twilio/twiml-policy.test.ts +0 -84
  60. package/src/providers/twilio/twiml-policy.ts +0 -87
  61. package/src/providers/twilio/webhook.ts +0 -34
  62. package/src/providers/twilio.test.ts +0 -607
  63. package/src/providers/twilio.ts +0 -861
  64. package/src/providers/twilio.types.ts +0 -17
  65. package/src/realtime-agent-context.test.ts +0 -101
  66. package/src/realtime-agent-context.ts +0 -149
  67. package/src/realtime-defaults.ts +0 -3
  68. package/src/realtime-fast-context.test.ts +0 -74
  69. package/src/realtime-fast-context.ts +0 -27
  70. package/src/realtime-transcription.runtime.ts +0 -4
  71. package/src/realtime-voice.runtime.ts +0 -5
  72. package/src/response-generator.test.ts +0 -385
  73. package/src/response-generator.ts +0 -348
  74. package/src/response-model.test.ts +0 -71
  75. package/src/response-model.ts +0 -23
  76. package/src/runtime.test.ts +0 -625
  77. package/src/runtime.ts +0 -528
  78. package/src/telephony-audio.test.ts +0 -61
  79. package/src/telephony-audio.ts +0 -12
  80. package/src/telephony-tts.test.ts +0 -196
  81. package/src/telephony-tts.ts +0 -235
  82. package/src/test-fixtures.ts +0 -82
  83. package/src/tts-provider-voice.test.ts +0 -34
  84. package/src/tts-provider-voice.ts +0 -21
  85. package/src/tunnel.test.ts +0 -173
  86. package/src/tunnel.ts +0 -314
  87. package/src/types.ts +0 -311
  88. package/src/utils.test.ts +0 -17
  89. package/src/utils.ts +0 -14
  90. package/src/voice-mapping.test.ts +0 -32
  91. package/src/voice-mapping.ts +0 -65
  92. package/src/webhook/realtime-audio-pacer.test.ts +0 -146
  93. package/src/webhook/realtime-audio-pacer.ts +0 -204
  94. package/src/webhook/realtime-handler.test.ts +0 -1450
  95. package/src/webhook/realtime-handler.ts +0 -1382
  96. package/src/webhook/stale-call-reaper.test.ts +0 -89
  97. package/src/webhook/stale-call-reaper.ts +0 -38
  98. package/src/webhook/stream-frame-adapter.test.ts +0 -187
  99. package/src/webhook/stream-frame-adapter.ts +0 -219
  100. package/src/webhook/tailscale.test.ts +0 -216
  101. package/src/webhook/tailscale.ts +0 -129
  102. package/src/webhook-exposure.test.ts +0 -33
  103. package/src/webhook-exposure.ts +0 -84
  104. package/src/webhook-security.test.ts +0 -813
  105. package/src/webhook-security.ts +0 -982
  106. package/src/webhook.hangup-once.lifecycle.test.ts +0 -179
  107. package/src/webhook.test.ts +0 -1615
  108. package/src/webhook.ts +0 -933
  109. package/src/webhook.types.ts +0 -5
  110. package/src/websocket-test-support.ts +0 -72
  111. package/tsconfig.json +0 -16
package/src/types.ts DELETED
@@ -1,311 +0,0 @@
1
- import { z } from "zod";
2
- import type { CallMode } from "./config.js";
3
-
4
- // -----------------------------------------------------------------------------
5
- // Provider Identifiers
6
- // -----------------------------------------------------------------------------
7
-
8
- const ProviderNameSchema = z.enum(["telnyx", "twilio", "plivo", "mock"]);
9
- export type ProviderName = z.infer<typeof ProviderNameSchema>;
10
-
11
- // -----------------------------------------------------------------------------
12
- // Core Call Identifiers
13
- // -----------------------------------------------------------------------------
14
-
15
- /** Internal call identifier (UUID) */
16
- export type CallId = string;
17
-
18
- /** Provider-specific call identifier */
19
- type ProviderCallId = string;
20
-
21
- // -----------------------------------------------------------------------------
22
- // Call Lifecycle States
23
- // -----------------------------------------------------------------------------
24
-
25
- const CallStateSchema = z.enum([
26
- // Non-terminal states
27
- "initiated",
28
- "ringing",
29
- "answered",
30
- "active",
31
- "speaking",
32
- "listening",
33
- // Terminal states
34
- "completed",
35
- "hangup-user",
36
- "hangup-bot",
37
- "timeout",
38
- "error",
39
- "failed",
40
- "no-answer",
41
- "busy",
42
- "voicemail",
43
- ]);
44
- export type CallState = z.infer<typeof CallStateSchema>;
45
-
46
- export const TerminalStates = new Set<CallState>([
47
- "completed",
48
- "hangup-user",
49
- "hangup-bot",
50
- "timeout",
51
- "error",
52
- "failed",
53
- "no-answer",
54
- "busy",
55
- "voicemail",
56
- ]);
57
-
58
- const EndReasonSchema = z.enum([
59
- "completed",
60
- "hangup-user",
61
- "hangup-bot",
62
- "timeout",
63
- "error",
64
- "failed",
65
- "no-answer",
66
- "busy",
67
- "voicemail",
68
- ]);
69
- export type EndReason = z.infer<typeof EndReasonSchema>;
70
-
71
- // -----------------------------------------------------------------------------
72
- // Normalized Call Events
73
- // -----------------------------------------------------------------------------
74
-
75
- const BaseEventSchema = z.object({
76
- id: z.string(),
77
- // Stable provider-derived key for idempotency/replay dedupe.
78
- dedupeKey: z.string().optional(),
79
- callId: z.string(),
80
- providerCallId: z.string().optional(),
81
- timestamp: z.number(),
82
- // Optional per-turn nonce for speech events (Twilio <Gather> replay hardening).
83
- turnToken: z.string().optional(),
84
- // Optional fields for inbound call detection
85
- direction: z.enum(["inbound", "outbound"]).optional(),
86
- from: z.string().optional(),
87
- to: z.string().optional(),
88
- });
89
-
90
- const NormalizedEventSchema = z.discriminatedUnion("type", [
91
- BaseEventSchema.extend({
92
- type: z.literal("call.initiated"),
93
- }),
94
- BaseEventSchema.extend({
95
- type: z.literal("call.ringing"),
96
- }),
97
- BaseEventSchema.extend({
98
- type: z.literal("call.answered"),
99
- }),
100
- BaseEventSchema.extend({
101
- type: z.literal("call.active"),
102
- }),
103
- BaseEventSchema.extend({
104
- type: z.literal("call.speaking"),
105
- text: z.string(),
106
- }),
107
- BaseEventSchema.extend({
108
- type: z.literal("call.speech"),
109
- transcript: z.string(),
110
- isFinal: z.boolean(),
111
- confidence: z.number().min(0).max(1).optional(),
112
- }),
113
- BaseEventSchema.extend({
114
- type: z.literal("call.silence"),
115
- durationMs: z.number(),
116
- }),
117
- BaseEventSchema.extend({
118
- type: z.literal("call.dtmf"),
119
- digits: z.string(),
120
- }),
121
- BaseEventSchema.extend({
122
- type: z.literal("call.ended"),
123
- reason: EndReasonSchema,
124
- }),
125
- BaseEventSchema.extend({
126
- type: z.literal("call.error"),
127
- error: z.string(),
128
- retryable: z.boolean().optional(),
129
- }),
130
- ]);
131
- export type NormalizedEvent = z.infer<typeof NormalizedEventSchema>;
132
-
133
- // -----------------------------------------------------------------------------
134
- // Call Direction
135
- // -----------------------------------------------------------------------------
136
-
137
- const CallDirectionSchema = z.enum(["outbound", "inbound"]);
138
-
139
- // -----------------------------------------------------------------------------
140
- // Call Record
141
- // -----------------------------------------------------------------------------
142
-
143
- const TranscriptEntrySchema = z.object({
144
- timestamp: z.number(),
145
- speaker: z.enum(["bot", "user"]),
146
- text: z.string(),
147
- isFinal: z.boolean().default(true),
148
- });
149
- export type TranscriptEntry = z.infer<typeof TranscriptEntrySchema>;
150
-
151
- export const CallRecordSchema = z.object({
152
- callId: z.string(),
153
- providerCallId: z.string().optional(),
154
- provider: ProviderNameSchema,
155
- direction: CallDirectionSchema,
156
- state: CallStateSchema,
157
- from: z.string(),
158
- to: z.string(),
159
- sessionKey: z.string().optional(),
160
- startedAt: z.number(),
161
- answeredAt: z.number().optional(),
162
- endedAt: z.number().optional(),
163
- endReason: EndReasonSchema.optional(),
164
- transcript: z.array(TranscriptEntrySchema).default([]),
165
- processedEventIds: z.array(z.string()).default([]),
166
- metadata: z.record(z.string(), z.unknown()).optional(),
167
- });
168
- export type CallRecord = z.infer<typeof CallRecordSchema>;
169
-
170
- // -----------------------------------------------------------------------------
171
- // Webhook Types
172
- // -----------------------------------------------------------------------------
173
-
174
- export type WebhookVerificationResult = {
175
- ok: boolean;
176
- reason?: string;
177
- /** Signature is valid, but request was seen before within replay window. */
178
- isReplay?: boolean;
179
- /** Stable key derived from authenticated request material. */
180
- verifiedRequestKey?: string;
181
- };
182
-
183
- export type WebhookParseOptions = {
184
- /** Stable request key from verifyWebhook. */
185
- verifiedRequestKey?: string;
186
- };
187
-
188
- export type WebhookContext = {
189
- headers: Record<string, string | string[] | undefined>;
190
- rawBody: string;
191
- url: string;
192
- method: "GET" | "POST" | "PUT" | "DELETE" | "PATCH";
193
- query?: Record<string, string | string[] | undefined>;
194
- remoteAddress?: string;
195
- };
196
-
197
- export type ProviderWebhookParseResult = {
198
- events: NormalizedEvent[];
199
- providerResponseBody?: string;
200
- providerResponseHeaders?: Record<string, string>;
201
- statusCode?: number;
202
- };
203
-
204
- // -----------------------------------------------------------------------------
205
- // Provider Method Types
206
- // -----------------------------------------------------------------------------
207
-
208
- export type InitiateCallInput = {
209
- callId: CallId;
210
- from: string;
211
- to: string;
212
- webhookUrl: string;
213
- clientState?: Record<string, string>;
214
- /** Inline TwiML to execute without fetching webhook TwiML. */
215
- inlineTwiml?: string;
216
- /** TwiML to serve once before normal webhook-driven call handling resumes. */
217
- preConnectTwiml?: string;
218
- /**
219
- * Optional `wss://` URL the carrier should open for bidirectional Media
220
- * Streaming on call connect. Used by carriers (e.g. Telnyx) that attach
221
- * streaming at dial time. Twilio learns the URL from TwiML so it ignores
222
- * this field.
223
- */
224
- streamUrl?: string;
225
- /** Per-call auth token the carrier echoes back on the WS upgrade. */
226
- streamAuthToken?: string;
227
- };
228
-
229
- export type InitiateCallResult = {
230
- providerCallId: ProviderCallId;
231
- status: "initiated" | "queued";
232
- };
233
-
234
- export type HangupCallInput = {
235
- callId: CallId;
236
- providerCallId: ProviderCallId;
237
- reason: EndReason;
238
- };
239
-
240
- export type AnswerCallInput = {
241
- callId: CallId;
242
- providerCallId: ProviderCallId;
243
- /**
244
- * Optional `wss://` URL the carrier should open for bidirectional Media
245
- * Streaming on answer. Used by carriers (e.g. Telnyx) that attach
246
- * streaming at answer time. Twilio learns the URL from TwiML so it ignores
247
- * this field.
248
- */
249
- streamUrl?: string;
250
- /** Per-call auth token the carrier echoes back on the WS upgrade. */
251
- streamAuthToken?: string;
252
- };
253
-
254
- export type PlayTtsInput = {
255
- callId: CallId;
256
- providerCallId: ProviderCallId;
257
- text: string;
258
- voice?: string;
259
- locale?: string;
260
- };
261
-
262
- export type SendDtmfInput = {
263
- callId: CallId;
264
- providerCallId: ProviderCallId;
265
- digits: string;
266
- };
267
-
268
- export type StartListeningInput = {
269
- callId: CallId;
270
- providerCallId: ProviderCallId;
271
- language?: string;
272
- /** Optional per-turn nonce for provider callbacks (replay hardening). */
273
- turnToken?: string;
274
- };
275
-
276
- export type StopListeningInput = {
277
- callId: CallId;
278
- providerCallId: ProviderCallId;
279
- };
280
-
281
- // -----------------------------------------------------------------------------
282
- // Call Status Verification (used on restart to verify persisted calls)
283
- // -----------------------------------------------------------------------------
284
-
285
- export type GetCallStatusInput = {
286
- providerCallId: ProviderCallId;
287
- };
288
-
289
- export type GetCallStatusResult = {
290
- /** Provider-specific status string (e.g. "completed", "in-progress") */
291
- status: string;
292
- /** True when the provider confirms the call has ended */
293
- isTerminal: boolean;
294
- /** True when the status could not be determined (transient error) */
295
- isUnknown?: boolean;
296
- };
297
-
298
- // -----------------------------------------------------------------------------
299
- // Outbound Call Options
300
- // -----------------------------------------------------------------------------
301
-
302
- export type OutboundCallOptions = {
303
- /** Message to speak when call connects */
304
- message?: string;
305
- /** Call mode (overrides config default) */
306
- mode?: CallMode;
307
- /** DTMF digits to send after the call is connected */
308
- dtmfSequence?: string;
309
- /** Session that initiated the call, used for agent context/delegated message routing */
310
- requesterSessionKey?: string;
311
- };
package/src/utils.test.ts DELETED
@@ -1,17 +0,0 @@
1
- import os from "node:os";
2
- import path from "node:path";
3
- import { describe, expect, it } from "vitest";
4
- import { resolveUserPath } from "./utils.js";
5
-
6
- describe("resolveUserPath", () => {
7
- it("returns trimmed empty input unchanged", () => {
8
- expect(resolveUserPath(" ")).toBe("");
9
- });
10
-
11
- it("expands tildes and resolves relative paths", () => {
12
- expect(resolveUserPath("~/voice-call/config.json")).toBe(
13
- path.resolve(os.homedir(), "voice-call/config.json"),
14
- );
15
- expect(resolveUserPath("./voice-call")).toBe(path.resolve("./voice-call"));
16
- });
17
- });
package/src/utils.ts DELETED
@@ -1,14 +0,0 @@
1
- import os from "node:os";
2
- import path from "node:path";
3
-
4
- export function resolveUserPath(input: string): string {
5
- const trimmed = input.trim();
6
- if (!trimmed) {
7
- return trimmed;
8
- }
9
- if (trimmed.startsWith("~")) {
10
- const expanded = trimmed.replace(/^~(?=$|[\\/])/, os.homedir());
11
- return path.resolve(expanded);
12
- }
13
- return path.resolve(trimmed);
14
- }
@@ -1,32 +0,0 @@
1
- import { describe, expect, it } from "vitest";
2
- import {
3
- DEFAULT_POLLY_VOICE,
4
- escapeXml,
5
- getOpenAiVoiceNames,
6
- isOpenAiVoice,
7
- mapVoiceToPolly,
8
- } from "./voice-mapping.js";
9
-
10
- describe("voice mapping", () => {
11
- it("escapes xml-special characters", () => {
12
- expect(escapeXml(`5 < 6 & "quote" 'apostrophe' > 4`)).toBe(
13
- "5 &lt; 6 &amp; &quot;quote&quot; &apos;apostrophe&apos; &gt; 4",
14
- );
15
- });
16
-
17
- it("maps openai voices, passes through provider voices, and falls back to default", () => {
18
- expect(mapVoiceToPolly("alloy")).toBe("Polly.Joanna");
19
- expect(mapVoiceToPolly("ECHO")).toBe("Polly.Matthew");
20
- expect(mapVoiceToPolly("Polly.Brian")).toBe("Polly.Brian");
21
- expect(mapVoiceToPolly("Google.en-US-Standard-C")).toBe("Google.en-US-Standard-C");
22
- expect(mapVoiceToPolly("unknown")).toBe(DEFAULT_POLLY_VOICE);
23
- expect(mapVoiceToPolly(undefined)).toBe(DEFAULT_POLLY_VOICE);
24
- });
25
-
26
- it("detects known openai voices and lists them", () => {
27
- expect(isOpenAiVoice("nova")).toBe(true);
28
- expect(isOpenAiVoice("NOVA")).toBe(true);
29
- expect(isOpenAiVoice("Polly.Joanna")).toBe(false);
30
- expect(getOpenAiVoiceNames()).toEqual(["alloy", "echo", "fable", "onyx", "nova", "shimmer"]);
31
- });
32
- });
@@ -1,65 +0,0 @@
1
- import { normalizeLowercaseStringOrEmpty } from "klaw/plugin-sdk/string-coerce-runtime";
2
-
3
- /**
4
- * Escape XML special characters for TwiML and other XML responses.
5
- */
6
- export function escapeXml(text: string): string {
7
- return text
8
- .replace(/&/g, "&amp;")
9
- .replace(/</g, "&lt;")
10
- .replace(/>/g, "&gt;")
11
- .replace(/"/g, "&quot;")
12
- .replace(/'/g, "&apos;");
13
- }
14
-
15
- /**
16
- * Map of OpenAI voice names to similar Twilio Polly voices.
17
- */
18
- const OPENAI_TO_POLLY_MAP: Record<string, string> = {
19
- alloy: "Polly.Joanna", // neutral, warm
20
- echo: "Polly.Matthew", // male, warm
21
- fable: "Polly.Amy", // British, expressive
22
- onyx: "Polly.Brian", // deep male
23
- nova: "Polly.Salli", // female, friendly
24
- shimmer: "Polly.Kimberly", // female, clear
25
- };
26
-
27
- /**
28
- * Default Polly voice when no mapping is found.
29
- */
30
- export const DEFAULT_POLLY_VOICE = "Polly.Joanna";
31
-
32
- /**
33
- * Map OpenAI voice names to Twilio Polly equivalents.
34
- * Falls through if already a valid Polly/Google voice.
35
- *
36
- * @param voice - OpenAI voice name (alloy, echo, etc.) or Polly voice name
37
- * @returns Polly voice name suitable for Twilio TwiML
38
- */
39
- export function mapVoiceToPolly(voice: string | undefined): string {
40
- if (!voice) {
41
- return DEFAULT_POLLY_VOICE;
42
- }
43
-
44
- // Already a Polly/Google voice - pass through
45
- if (voice.startsWith("Polly.") || voice.startsWith("Google.")) {
46
- return voice;
47
- }
48
-
49
- // Map OpenAI voices to Polly equivalents
50
- return OPENAI_TO_POLLY_MAP[normalizeLowercaseStringOrEmpty(voice)] || DEFAULT_POLLY_VOICE;
51
- }
52
-
53
- /**
54
- * Check if a voice name is a known OpenAI voice.
55
- */
56
- export function isOpenAiVoice(voice: string): boolean {
57
- return normalizeLowercaseStringOrEmpty(voice) in OPENAI_TO_POLLY_MAP;
58
- }
59
-
60
- /**
61
- * Get all supported OpenAI voice names.
62
- */
63
- export function getOpenAiVoiceNames(): string[] {
64
- return Object.keys(OPENAI_TO_POLLY_MAP);
65
- }
@@ -1,146 +0,0 @@
1
- import { afterEach, describe, expect, it, vi } from "vitest";
2
- import {
3
- RealtimeAudioPacer,
4
- RealtimeMulawSpeechStartDetector,
5
- calculateMulawRms,
6
- type RealtimeAudioSerializer,
7
- } from "./realtime-audio-pacer.js";
8
-
9
- function createTwilioSerializer(streamSid: string): RealtimeAudioSerializer {
10
- return {
11
- media: (payload) => JSON.stringify({ event: "media", streamSid, media: { payload } }),
12
- clear: () => JSON.stringify({ event: "clear", streamSid }),
13
- mark: (name) => JSON.stringify({ event: "mark", streamSid, mark: { name } }),
14
- };
15
- }
16
-
17
- function createTelnyxSerializer(): RealtimeAudioSerializer {
18
- return {
19
- media: (payload) => JSON.stringify({ event: "media", media: { payload } }),
20
- clear: () => JSON.stringify({ event: "clear" }),
21
- mark: (name) => JSON.stringify({ event: "mark", mark: { name } }),
22
- };
23
- }
24
-
25
- describe("RealtimeAudioPacer", () => {
26
- afterEach(() => {
27
- vi.useRealTimers();
28
- });
29
-
30
- it("paces realtime audio as 20ms telephony frames before marks (Twilio shape)", async () => {
31
- vi.useFakeTimers();
32
- const sent: unknown[] = [];
33
- const pacer = new RealtimeAudioPacer({
34
- serializer: createTwilioSerializer("MZ-test"),
35
- send: (message) => {
36
- sent.push(JSON.parse(message));
37
- return true;
38
- },
39
- });
40
-
41
- pacer.sendAudio(Buffer.alloc(320, 0x7f));
42
- pacer.sendMark("audio-1");
43
-
44
- expect(sent).toHaveLength(1);
45
- expect(
46
- Buffer.from((sent[0] as { media: { payload: string } }).media.payload, "base64"),
47
- ).toHaveLength(160);
48
-
49
- await vi.advanceTimersByTimeAsync(20);
50
- expect(sent).toHaveLength(2);
51
- expect(
52
- Buffer.from((sent[1] as { media: { payload: string } }).media.payload, "base64"),
53
- ).toHaveLength(160);
54
-
55
- await vi.advanceTimersByTimeAsync(20);
56
- expect(sent[2]).toEqual({
57
- event: "mark",
58
- streamSid: "MZ-test",
59
- mark: { name: "audio-1" },
60
- });
61
- });
62
-
63
- it("clears queued audio immediately (Twilio shape)", async () => {
64
- vi.useFakeTimers();
65
- const sent: unknown[] = [];
66
- const pacer = new RealtimeAudioPacer({
67
- serializer: createTwilioSerializer("MZ-test"),
68
- send: (message) => {
69
- sent.push(JSON.parse(message));
70
- return true;
71
- },
72
- });
73
-
74
- pacer.sendAudio(Buffer.alloc(480, 0x7f));
75
- pacer.clearAudio();
76
- await vi.advanceTimersByTimeAsync(100);
77
-
78
- expect(sent).toHaveLength(2);
79
- expect(sent[1]).toEqual({ event: "clear", streamSid: "MZ-test" });
80
- });
81
-
82
- it("stops instead of buffering unbounded realtime audio", async () => {
83
- vi.useFakeTimers();
84
- const sent: unknown[] = [];
85
- const onBackpressure = vi.fn();
86
- const pacer = new RealtimeAudioPacer({
87
- serializer: createTwilioSerializer("MZ-test"),
88
- maxQueuedAudioBytes: 320,
89
- onBackpressure,
90
- send: (message) => {
91
- sent.push(JSON.parse(message));
92
- return true;
93
- },
94
- });
95
-
96
- pacer.sendAudio(Buffer.alloc(480, 0x7f));
97
- pacer.sendMark("after-overflow");
98
- await vi.advanceTimersByTimeAsync(100);
99
-
100
- expect(onBackpressure).toHaveBeenCalledOnce();
101
- expect(sent).toStrictEqual([]);
102
- });
103
-
104
- it("paces audio in Telnyx envelope shape (no streamSid)", async () => {
105
- vi.useFakeTimers();
106
- const sent: unknown[] = [];
107
- const pacer = new RealtimeAudioPacer({
108
- serializer: createTelnyxSerializer(),
109
- send: (message) => {
110
- sent.push(JSON.parse(message));
111
- return true;
112
- },
113
- });
114
-
115
- pacer.sendAudio(Buffer.alloc(160, 0x7f));
116
- pacer.clearAudio();
117
- await vi.advanceTimersByTimeAsync(100);
118
-
119
- expect(sent).toEqual([
120
- { event: "media", media: { payload: Buffer.alloc(160, 0x7f).toString("base64") } },
121
- { event: "clear" },
122
- ]);
123
- });
124
- });
125
-
126
- describe("RealtimeMulawSpeechStartDetector", () => {
127
- it("detects a speech start after consecutive loud chunks and resets after quiet", () => {
128
- const detector = new RealtimeMulawSpeechStartDetector({
129
- requiredLoudChunks: 2,
130
- requiredQuietChunks: 2,
131
- rmsThreshold: 0.02,
132
- });
133
- const silence = Buffer.alloc(160, 0xff);
134
- const speech = Buffer.alloc(160, 0x00);
135
-
136
- expect(calculateMulawRms(silence)).toBeLessThan(0.02);
137
- expect(calculateMulawRms(speech)).toBeGreaterThan(0.02);
138
- expect(detector.accept(speech)).toBe(false);
139
- expect(detector.accept(speech)).toBe(true);
140
- expect(detector.accept(speech)).toBe(false);
141
- expect(detector.accept(silence)).toBe(false);
142
- expect(detector.accept(silence)).toBe(false);
143
- expect(detector.accept(speech)).toBe(false);
144
- expect(detector.accept(speech)).toBe(true);
145
- });
146
- });