getpatter 0.4.3 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs CHANGED
@@ -10,18 +10,15 @@ import {
10
10
  DEFAULT_PRICING,
11
11
  DeepgramSTT,
12
12
  ElevenLabsConvAIAdapter,
13
- ElevenLabsTTS,
14
13
  EmbeddedServer,
15
14
  LLMLoop,
16
15
  MetricsStore,
17
16
  OpenAILLMProvider,
18
17
  OpenAIRealtimeAdapter,
19
- OpenAITTS,
20
18
  PipelineHookExecutor,
21
19
  RemoteMessageHandler,
22
20
  SentenceChunker,
23
21
  TestSession,
24
- WhisperSTT,
25
22
  calculateRealtimeCost,
26
23
  calculateSttCost,
27
24
  calculateTelephonyCost,
@@ -39,7 +36,7 @@ import {
39
36
  resample16kTo8k,
40
37
  resample24kTo16k,
41
38
  resample8kTo16k
42
- } from "./chunk-35EVXMGB.mjs";
39
+ } from "./chunk-757NVN4L.mjs";
43
40
  import {
44
41
  getLogger,
45
42
  setLogger
@@ -186,49 +183,86 @@ var PatterConnection = class {
186
183
  }
187
184
  };
188
185
 
189
- // src/providers.ts
190
- var STTConfigImpl = class {
191
- provider;
186
+ // src/engines/openai.ts
187
+ var Realtime = class {
188
+ kind = "openai_realtime";
192
189
  apiKey;
193
- language;
194
- constructor(provider, apiKey, language = "en") {
195
- this.provider = provider;
196
- this.apiKey = apiKey;
197
- this.language = language;
198
- }
199
- toDict() {
200
- return { provider: this.provider, api_key: this.apiKey, language: this.language };
190
+ model;
191
+ voice;
192
+ constructor(opts = {}) {
193
+ const key = opts.apiKey ?? process.env.OPENAI_API_KEY;
194
+ if (!key) {
195
+ throw new Error(
196
+ "OpenAI Realtime requires an apiKey. Pass { apiKey: 'sk-...' } or set OPENAI_API_KEY in the environment."
197
+ );
198
+ }
199
+ this.apiKey = key;
200
+ this.model = opts.model ?? "gpt-4o-mini-realtime-preview";
201
+ this.voice = opts.voice ?? "alloy";
201
202
  }
202
203
  };
203
- var TTSConfigImpl = class {
204
- provider;
204
+
205
+ // src/engines/elevenlabs.ts
206
+ var ConvAI = class {
207
+ kind = "elevenlabs_convai";
205
208
  apiKey;
209
+ agentId;
206
210
  voice;
207
- constructor(provider, apiKey, voice = "alloy") {
208
- this.provider = provider;
209
- this.apiKey = apiKey;
210
- this.voice = voice;
211
+ constructor(opts = {}) {
212
+ const key = opts.apiKey ?? process.env.ELEVENLABS_API_KEY;
213
+ const agent = opts.agentId ?? process.env.ELEVENLABS_AGENT_ID;
214
+ if (!key) {
215
+ throw new Error(
216
+ "ElevenLabs ConvAI requires an apiKey. Pass { apiKey: '...' } or set ELEVENLABS_API_KEY in the environment."
217
+ );
218
+ }
219
+ if (!agent) {
220
+ throw new Error(
221
+ "ElevenLabs ConvAI requires an agentId. Pass { agentId: 'agent_...' } or set ELEVENLABS_AGENT_ID in the environment."
222
+ );
223
+ }
224
+ this.apiKey = key;
225
+ this.agentId = agent;
226
+ this.voice = opts.voice;
211
227
  }
212
- toDict() {
213
- return { provider: this.provider, api_key: this.apiKey, voice: this.voice };
228
+ };
229
+
230
+ // src/tunnels/index.ts
231
+ var CloudflareTunnel = class {
232
+ kind = "cloudflare";
233
+ };
234
+ var Static = class {
235
+ kind = "static";
236
+ hostname;
237
+ constructor(opts) {
238
+ if (!opts.hostname) {
239
+ throw new Error("Static tunnel requires a non-empty hostname.");
240
+ }
241
+ this.hostname = opts.hostname;
214
242
  }
215
243
  };
216
- function deepgram(opts) {
217
- return new STTConfigImpl("deepgram", opts.apiKey, opts.language ?? "en");
218
- }
219
- function whisper(opts) {
220
- return new STTConfigImpl("whisper", opts.apiKey, opts.language ?? "en");
221
- }
222
- function elevenlabs(opts) {
223
- return new TTSConfigImpl("elevenlabs", opts.apiKey, opts.voice ?? "rachel");
224
- }
225
- function openaiTts(opts) {
226
- return new TTSConfigImpl("openai", opts.apiKey, opts.voice ?? "alloy");
227
- }
228
244
 
229
245
  // src/client.ts
230
246
  var DEFAULT_BACKEND_URL2 = "wss://api.getpatter.com";
231
247
  var DEFAULT_REST_URL = "https://api.getpatter.com";
248
+ function sttConfigToDict(cfg) {
249
+ const out = {
250
+ provider: cfg.provider,
251
+ api_key: cfg.apiKey,
252
+ language: cfg.language
253
+ };
254
+ if (cfg.options) out.options = { ...cfg.options };
255
+ return out;
256
+ }
257
+ function ttsConfigToDict(cfg) {
258
+ const out = {
259
+ provider: cfg.provider,
260
+ api_key: cfg.apiKey,
261
+ voice: cfg.voice
262
+ };
263
+ if (cfg.options) out.options = { ...cfg.options };
264
+ return out;
265
+ }
232
266
  var Patter = class {
233
267
  apiKey;
234
268
  backendUrl;
@@ -239,20 +273,39 @@ var Patter = class {
239
273
  embeddedServer = null;
240
274
  tunnelHandle = null;
241
275
  constructor(options) {
242
- if ("mode" in options && options.mode === "local") {
276
+ const hasCarrier = "carrier" in options && options.carrier !== void 0;
277
+ const isLocal = "mode" in options && options.mode === "local" || hasCarrier;
278
+ if (isLocal) {
243
279
  const local = options;
244
280
  if (!local.phoneNumber) {
245
281
  throw new Error("Local mode requires phoneNumber");
246
282
  }
247
- if (!local.twilioSid && !local.telnyxKey) {
248
- throw new Error("Local mode requires twilioSid or telnyxKey");
283
+ if (!local.carrier) {
284
+ throw new Error(
285
+ "Local mode requires a `carrier` instance. Pass `carrier: new Twilio({...})` or `carrier: new Telnyx({...})`."
286
+ );
249
287
  }
250
- if (local.twilioSid && !local.twilioToken) {
251
- throw new Error("twilioToken is required when using twilioSid");
288
+ const carrier = local.carrier;
289
+ const tunnel = local.tunnel;
290
+ let tunnelWebhookUrl;
291
+ if (tunnel instanceof Static) {
292
+ if (local.webhookUrl) {
293
+ throw new Error(
294
+ "Cannot use both `tunnel: new StaticTunnel(...)` and `webhookUrl`. Pick one."
295
+ );
296
+ }
297
+ tunnelWebhookUrl = tunnel.hostname;
252
298
  }
253
299
  this.mode = "local";
254
- const normalizedLocal = local.webhookUrl ? { ...local, webhookUrl: local.webhookUrl.replace(/^https?:\/\//, "").replace(/\/$/, "") } : local;
255
- this.localConfig = normalizedLocal;
300
+ const rawWebhook = tunnelWebhookUrl ?? local.webhookUrl;
301
+ const normalizedWebhook = rawWebhook ? rawWebhook.replace(/^https?:\/\//, "").replace(/\/$/, "") : void 0;
302
+ this.localConfig = {
303
+ carrier,
304
+ phoneNumber: local.phoneNumber,
305
+ webhookUrl: normalizedWebhook,
306
+ tunnel: local.tunnel,
307
+ openaiKey: local.openaiKey
308
+ };
256
309
  this.apiKey = "";
257
310
  this.backendUrl = DEFAULT_BACKEND_URL2;
258
311
  this.restUrl = DEFAULT_REST_URL;
@@ -269,25 +322,55 @@ var Patter = class {
269
322
  }
270
323
  // === Local mode ===
271
324
  agent(opts) {
272
- if (opts.provider) {
325
+ let working = { ...opts };
326
+ if (opts.engine) {
327
+ if (opts.provider) {
328
+ throw new Error(
329
+ "Cannot pass both `engine:` and `provider:`. Use one (engine is preferred)."
330
+ );
331
+ }
332
+ const engine = opts.engine;
333
+ if (engine instanceof Realtime) {
334
+ working = {
335
+ ...working,
336
+ provider: "openai_realtime",
337
+ model: working.model ?? engine.model,
338
+ voice: working.voice ?? engine.voice
339
+ };
340
+ if (this.localConfig && !this.localConfig.openaiKey) {
341
+ this.localConfig = { ...this.localConfig, openaiKey: engine.apiKey };
342
+ }
343
+ } else if (engine instanceof ConvAI) {
344
+ working = {
345
+ ...working,
346
+ provider: "elevenlabs_convai",
347
+ voice: working.voice ?? engine.voice
348
+ };
349
+ } else {
350
+ throw new Error(
351
+ "Unknown engine. Expected OpenAIRealtime or ElevenLabsConvAI instance."
352
+ );
353
+ }
354
+ }
355
+ if (working.provider) {
273
356
  const valid = ["openai_realtime", "elevenlabs_convai", "pipeline"];
274
- if (!valid.includes(opts.provider)) {
275
- throw new Error(`provider must be one of: ${valid.join(", ")}. Got: '${opts.provider}'`);
357
+ if (!valid.includes(working.provider)) {
358
+ throw new Error(`provider must be one of: ${valid.join(", ")}. Got: '${working.provider}'`);
276
359
  }
277
360
  }
278
- if (opts.tools) {
279
- if (!Array.isArray(opts.tools)) {
361
+ if (working.tools) {
362
+ if (!Array.isArray(working.tools)) {
280
363
  throw new TypeError("tools must be an array");
281
364
  }
282
- opts.tools.forEach((tool, i) => {
283
- if (!tool.name) throw new Error(`tools[${i}] missing required 'name' field`);
284
- if (!tool.webhookUrl && !tool.handler) throw new Error(`tools[${i}] requires either 'webhookUrl' or 'handler'`);
365
+ working.tools.forEach((tool2, i) => {
366
+ if (!tool2.name) throw new Error(`tools[${i}] missing required 'name' field`);
367
+ if (!tool2.webhookUrl && !tool2.handler) throw new Error(`tools[${i}] requires either 'webhookUrl' or 'handler'`);
285
368
  });
286
369
  }
287
- if (opts.variables !== void 0 && (typeof opts.variables !== "object" || Array.isArray(opts.variables))) {
370
+ if (working.variables !== void 0 && (typeof working.variables !== "object" || Array.isArray(working.variables))) {
288
371
  throw new TypeError("variables must be an object");
289
372
  }
290
- return { ...opts };
373
+ return working;
291
374
  }
292
375
  async serve(opts) {
293
376
  if (this.mode !== "local" || !this.localConfig) {
@@ -310,10 +393,14 @@ var Patter = class {
310
393
  }
311
394
  let webhookUrl = this.localConfig.webhookUrl ?? "";
312
395
  const port = opts.port ?? 8e3;
313
- if (opts.tunnel && webhookUrl) {
396
+ const ctorTunnel = this.localConfig.tunnel;
397
+ const wantsCloudflaredFromServe = opts.tunnel === true;
398
+ const wantsCloudflaredFromCtor = ctorTunnel === true || ctorTunnel instanceof CloudflareTunnel;
399
+ const wantsCloudflared = wantsCloudflaredFromServe || wantsCloudflaredFromCtor;
400
+ if (wantsCloudflared && webhookUrl) {
314
401
  throw new Error("Cannot use both tunnel: true and webhookUrl. Pick one.");
315
402
  }
316
- if (opts.tunnel) {
403
+ if (wantsCloudflared) {
317
404
  const { startTunnel: startTunnel2 } = await import("./tunnel-BL7A7GXW.mjs");
318
405
  this.tunnelHandle = await startTunnel2(port);
319
406
  webhookUrl = this.tunnelHandle.hostname;
@@ -323,17 +410,29 @@ var Patter = class {
323
410
  "No webhookUrl configured. Either:\n - Pass webhookUrl in the Patter constructor\n - Use tunnel: true in serve() to auto-create a tunnel"
324
411
  );
325
412
  }
413
+ const carrier = this.localConfig.carrier;
414
+ const telephonyProvider = carrier.kind === "twilio" ? "twilio" : "telnyx";
415
+ const { autoConfigureCarrier } = await import("./carrier-config-CPG5CROM.mjs");
416
+ await autoConfigureCarrier({
417
+ telephonyProvider,
418
+ twilioSid: carrier.kind === "twilio" ? carrier.accountSid : void 0,
419
+ twilioToken: carrier.kind === "twilio" ? carrier.authToken : void 0,
420
+ telnyxKey: carrier.kind === "telnyx" ? carrier.apiKey : void 0,
421
+ telnyxConnectionId: carrier.kind === "telnyx" ? carrier.connectionId : void 0,
422
+ phoneNumber: this.localConfig.phoneNumber,
423
+ webhookHost: webhookUrl
424
+ });
326
425
  this.embeddedServer = new EmbeddedServer(
327
426
  {
328
- twilioSid: this.localConfig.twilioSid,
329
- twilioToken: this.localConfig.twilioToken,
427
+ twilioSid: carrier.kind === "twilio" ? carrier.accountSid : void 0,
428
+ twilioToken: carrier.kind === "twilio" ? carrier.authToken : void 0,
330
429
  openaiKey: this.localConfig.openaiKey,
331
430
  phoneNumber: this.localConfig.phoneNumber,
332
431
  webhookUrl,
333
- telephonyProvider: this.localConfig.telephonyProvider,
334
- telnyxKey: this.localConfig.telnyxKey,
335
- telnyxConnectionId: this.localConfig.telnyxConnectionId,
336
- telnyxPublicKey: this.localConfig.telnyxPublicKey
432
+ telephonyProvider,
433
+ telnyxKey: carrier.kind === "telnyx" ? carrier.apiKey : void 0,
434
+ telnyxConnectionId: carrier.kind === "telnyx" ? carrier.connectionId : void 0,
435
+ telnyxPublicKey: carrier.kind === "telnyx" ? carrier.publicKey : void 0
337
436
  },
338
437
  opts.agent,
339
438
  opts.onCallStart,
@@ -353,7 +452,7 @@ var Patter = class {
353
452
  if (this.mode !== "local") {
354
453
  throw new Error("test() is only available in local mode");
355
454
  }
356
- const { TestSession: TestSession2 } = await import("./test-mode-RH65MMSP.mjs");
455
+ const { TestSession: TestSession2 } = await import("./test-mode-YFOL2HYH.mjs");
357
456
  const session = new TestSession2();
358
457
  await session.run({
359
458
  agent: opts.agent,
@@ -394,32 +493,51 @@ var Patter = class {
394
493
  if (!this.localConfig) {
395
494
  throw new Error("local config missing");
396
495
  }
397
- const { phoneNumber, webhookUrl, telephonyProvider } = this.localConfig;
398
- if (telephonyProvider === "telnyx") {
399
- const telnyxKey = this.localConfig.telnyxKey ?? "";
400
- const connectionId = this.localConfig.telnyxConnectionId ?? "";
496
+ const { phoneNumber, webhookUrl, carrier } = this.localConfig;
497
+ if (carrier.kind === "telnyx") {
498
+ const telnyxKey = carrier.apiKey;
499
+ const connectionId = carrier.connectionId;
401
500
  const streamUrl = `wss://${webhookUrl}/ws/stream/${encodeURIComponent(localOpts.to)}?caller=${encodeURIComponent(phoneNumber)}&callee=${encodeURIComponent(localOpts.to)}`;
501
+ const telnyxPayload = {
502
+ connection_id: connectionId,
503
+ from: phoneNumber,
504
+ to: localOpts.to,
505
+ stream_url: streamUrl,
506
+ stream_track: "both_tracks"
507
+ };
508
+ if (localOpts.ringTimeout !== void 0) {
509
+ telnyxPayload.timeout_secs = Math.max(1, Math.floor(localOpts.ringTimeout));
510
+ }
402
511
  const response2 = await fetch("https://api.telnyx.com/v2/calls", {
403
512
  method: "POST",
404
513
  headers: {
405
514
  "Content-Type": "application/json",
406
515
  Authorization: `Bearer ${telnyxKey}`
407
516
  },
408
- body: JSON.stringify({
409
- connection_id: connectionId,
410
- from: phoneNumber,
411
- to: localOpts.to,
412
- stream_url: streamUrl,
413
- stream_track: "both_tracks"
414
- })
517
+ body: JSON.stringify(telnyxPayload)
415
518
  });
416
519
  if (!response2.ok) {
417
520
  throw new ProvisionError(`Failed to initiate Telnyx call: ${await response2.text()}`);
418
521
  }
522
+ if (this.embeddedServer) {
523
+ try {
524
+ const body = await response2.clone().json();
525
+ const callId = body.data?.call_control_id;
526
+ if (callId) {
527
+ this.embeddedServer.metricsStore.recordCallInitiated({
528
+ call_id: callId,
529
+ caller: phoneNumber,
530
+ callee: localOpts.to,
531
+ direction: "outbound"
532
+ });
533
+ }
534
+ } catch {
535
+ }
536
+ }
419
537
  return;
420
538
  }
421
- const twilioSid = this.localConfig.twilioSid ?? "";
422
- const twilioToken = this.localConfig.twilioToken ?? "";
539
+ const twilioSid = carrier.accountSid;
540
+ const twilioToken = carrier.authToken;
423
541
  const statusCallbackUrl = `https://${webhookUrl}/webhooks/twilio/status`;
424
542
  const url = `https://api.twilio.com/2010-04-01/Accounts/${twilioSid}/Calls.json`;
425
543
  const params = new URLSearchParams({
@@ -427,13 +545,19 @@ var Patter = class {
427
545
  From: phoneNumber,
428
546
  Url: `https://${webhookUrl}/webhooks/twilio/voice`,
429
547
  StatusCallback: statusCallbackUrl,
430
- StatusCallbackMethod: "POST"
548
+ StatusCallbackMethod: "POST",
549
+ // Full lifecycle so the dashboard sees ringing/no-answer/busy/failed
550
+ // transitions even when media never arrives.
551
+ StatusCallbackEvent: "initiated ringing answered completed"
431
552
  });
432
553
  if (localOpts.machineDetection) {
433
554
  params.append("MachineDetection", "DetectMessageEnd");
434
555
  params.append("AsyncAmd", "true");
435
556
  params.append("AsyncAmdStatusCallback", `https://${webhookUrl}/webhooks/twilio/amd`);
436
557
  }
558
+ if (localOpts.ringTimeout !== void 0) {
559
+ params.append("Timeout", String(Math.max(1, Math.floor(localOpts.ringTimeout))));
560
+ }
437
561
  if (localOpts.voicemailMessage && this.embeddedServer) {
438
562
  this.embeddedServer.voicemailMessage = localOpts.voicemailMessage;
439
563
  }
@@ -448,6 +572,21 @@ var Patter = class {
448
572
  if (!response.ok) {
449
573
  throw new ProvisionError(`Failed to initiate call: ${await response.text()}`);
450
574
  }
575
+ if (this.embeddedServer) {
576
+ try {
577
+ const body = await response.clone().json();
578
+ const callSid = body.sid;
579
+ if (callSid) {
580
+ this.embeddedServer.metricsStore.recordCallInitiated({
581
+ call_id: callSid,
582
+ caller: phoneNumber,
583
+ callee: localOpts.to,
584
+ direction: "outbound"
585
+ });
586
+ }
587
+ } catch {
588
+ }
589
+ }
451
590
  return;
452
591
  }
453
592
  const cloudOpts = options;
@@ -530,61 +669,6 @@ var Patter = class {
530
669
  const data = await response.json();
531
670
  return data.map((c) => ({ id: c.id, direction: c.direction, caller: c.caller, callee: c.callee, startedAt: c.started_at, endedAt: c.ended_at, durationSeconds: c.duration_seconds, status: c.status, transcript: c.transcript }));
532
671
  }
533
- // Provider helpers
534
- static deepgram = deepgram;
535
- static whisper = whisper;
536
- static elevenlabs = elevenlabs;
537
- static openaiTts = openaiTts;
538
- static guardrail(opts) {
539
- return {
540
- name: opts.name,
541
- blockedTerms: opts.blockedTerms,
542
- check: opts.check,
543
- replacement: opts.replacement ?? "I'm sorry, I can't respond to that."
544
- };
545
- }
546
- /**
547
- * Create a tool definition for use with `agent({ tools: [...] })`.
548
- *
549
- * Either `handler` (a function) or `webhookUrl` must be provided.
550
- *
551
- * @param opts.name - Tool name (visible to the LLM).
552
- * @param opts.description - What the tool does (visible to the LLM).
553
- * @param opts.parameters - JSON Schema for tool arguments.
554
- * @param opts.handler - Async function called in-process when the LLM invokes the tool.
555
- * @param opts.webhookUrl - URL to POST to when the LLM invokes the tool.
556
- *
557
- * @example
558
- * ```ts
559
- * phone.agent({
560
- * systemPrompt: 'You are a pizza bot.',
561
- * tools: [
562
- * Patter.tool({
563
- * name: 'check_menu',
564
- * description: 'Check available menu items',
565
- * handler: async (args) => JSON.stringify({ items: ['margherita'] }),
566
- * }),
567
- * ],
568
- * });
569
- * ```
570
- */
571
- static tool(opts) {
572
- if (!opts.handler && !opts.webhookUrl) {
573
- throw new Error("tool() requires either handler or webhookUrl");
574
- }
575
- const t = {
576
- name: opts.name,
577
- description: opts.description ?? "",
578
- parameters: opts.parameters ?? { type: "object", properties: {} }
579
- };
580
- if (opts.handler) {
581
- t.handler = opts.handler;
582
- }
583
- if (opts.webhookUrl) {
584
- t.webhookUrl = opts.webhookUrl;
585
- }
586
- return t;
587
- }
588
672
  // Internal
589
673
  async registerNumber(provider, providerKey, number, providerSecret, country = "US", stt, tts) {
590
674
  const credentials = { api_key: providerKey };
@@ -600,8 +684,8 @@ var Patter = class {
600
684
  provider,
601
685
  provider_credentials: credentials,
602
686
  country,
603
- stt_config: stt?.toDict() ?? null,
604
- tts_config: tts?.toDict() ?? null
687
+ stt_config: stt ? stt.toDict?.() ?? sttConfigToDict(stt) : null,
688
+ tts_config: tts ? tts.toDict?.() ?? ttsConfigToDict(tts) : null
605
689
  })
606
690
  });
607
691
  if (response.status === 409) return;
@@ -674,6 +758,62 @@ function filterForTTS(text) {
674
758
  return filterEmoji(filterMarkdown(text));
675
759
  }
676
760
 
761
+ // src/providers.ts
762
+ var STTConfigImpl = class {
763
+ provider;
764
+ apiKey;
765
+ language;
766
+ options;
767
+ constructor(provider, apiKey, language = "en", options) {
768
+ this.provider = provider;
769
+ this.apiKey = apiKey;
770
+ this.language = language;
771
+ if (options) this.options = options;
772
+ }
773
+ toDict() {
774
+ const out = {
775
+ provider: this.provider,
776
+ api_key: this.apiKey,
777
+ language: this.language
778
+ };
779
+ if (this.options) out.options = { ...this.options };
780
+ return out;
781
+ }
782
+ };
783
+ var TTSConfigImpl = class {
784
+ provider;
785
+ apiKey;
786
+ voice;
787
+ constructor(provider, apiKey, voice = "alloy") {
788
+ this.provider = provider;
789
+ this.apiKey = apiKey;
790
+ this.voice = voice;
791
+ }
792
+ toDict() {
793
+ return { provider: this.provider, api_key: this.apiKey, voice: this.voice };
794
+ }
795
+ };
796
+ function deepgram(opts) {
797
+ const options = {
798
+ model: opts.model ?? "nova-3",
799
+ endpointing_ms: opts.endpointingMs ?? 150,
800
+ utterance_end_ms: opts.utteranceEndMs === null ? null : opts.utteranceEndMs ?? 1e3,
801
+ smart_format: opts.smartFormat ?? true,
802
+ interim_results: opts.interimResults ?? true
803
+ };
804
+ if (opts.vadEvents !== void 0) options.vad_events = opts.vadEvents;
805
+ return new STTConfigImpl("deepgram", opts.apiKey, opts.language ?? "en", options);
806
+ }
807
+ function whisper(opts) {
808
+ return new STTConfigImpl("whisper", opts.apiKey, opts.language ?? "en");
809
+ }
810
+ function elevenlabs(opts) {
811
+ return new TTSConfigImpl("elevenlabs", opts.apiKey, opts.voice ?? "rachel");
812
+ }
813
+ function openaiTts(opts) {
814
+ return new TTSConfigImpl("openai", opts.apiKey, opts.voice ?? "alloy");
815
+ }
816
+
677
817
  // src/fallback-provider.ts
678
818
  var AllProvidersFailedError = class extends Error {
679
819
  constructor(message) {
@@ -719,6 +859,37 @@ var FallbackLLMProvider = class {
719
859
  }
720
860
  }
721
861
  }
862
+ /**
863
+ * Async-friendly disposer. Parity with Python's ``FallbackLLMProvider.aclose()``
864
+ * — safe to call multiple times, returns a resolved Promise once all probe
865
+ * timers are cleared. Prefer this in async contexts so awaiting the
866
+ * shutdown integrates naturally with the owning lifecycle.
867
+ */
868
+ async aclose() {
869
+ this.destroy();
870
+ }
871
+ /**
872
+ * Explicit-resource-management hook so callers can write
873
+ * ``await using fallback = new FallbackLLMProvider([...])`` and have
874
+ * background probe timers cleared automatically when the block exits.
875
+ * Mirrors Python's ``async with FallbackLLMProvider(...)``.
876
+ */
877
+ async [Symbol.asyncDispose]() {
878
+ await this.aclose();
879
+ }
880
+ /**
881
+ * Stream only the text deltas, flattening the chunk envelope. Parity with
882
+ * Python's ``FallbackLLMProvider.complete_stream``. Tool-call and done
883
+ * markers are filtered out so callers can concatenate the yielded strings
884
+ * directly.
885
+ */
886
+ async *completeStream(messages, tools) {
887
+ for await (const chunk of this.stream(messages, tools)) {
888
+ if (chunk.type === "text") {
889
+ yield chunk.content ?? "";
890
+ }
891
+ }
892
+ }
722
893
  // -----------------------------------------------------------------------
723
894
  // LLMProvider implementation
724
895
  // -----------------------------------------------------------------------
@@ -1235,13 +1406,37 @@ function wrapCallback(cb) {
1235
1406
  }
1236
1407
  };
1237
1408
  }
1238
- async function scheduleCron(cron, callback) {
1239
- const cm = await loadCron();
1240
- if (!cm.validate(cron)) {
1241
- throw new Error(`Invalid cron expression: ${cron}`);
1242
- }
1243
- const task = cm.schedule(cron, wrapCallback(callback));
1244
- return makeHandle(`cron-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`, task);
1409
+ function scheduleCron(cron, callback) {
1410
+ let cancelled = false;
1411
+ let task = null;
1412
+ const jobId = `cron-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
1413
+ loadCron().then((cm) => {
1414
+ if (cancelled) return;
1415
+ if (!cm.validate(cron)) {
1416
+ throw new Error(`Invalid cron expression: ${cron}`);
1417
+ }
1418
+ task = cm.schedule(cron, wrapCallback(callback));
1419
+ }).catch((err) => getLogger().error(`scheduleCron failed: ${String(err)}`));
1420
+ return {
1421
+ jobId,
1422
+ cancel() {
1423
+ if (cancelled) return;
1424
+ cancelled = true;
1425
+ if (task) {
1426
+ try {
1427
+ task.stop();
1428
+ } catch {
1429
+ }
1430
+ try {
1431
+ task.destroy?.();
1432
+ } catch {
1433
+ }
1434
+ }
1435
+ },
1436
+ get pending() {
1437
+ return !cancelled;
1438
+ }
1439
+ };
1245
1440
  }
1246
1441
  function scheduleOnce(at, callback) {
1247
1442
  const delayMs = at.getTime() - Date.now();
@@ -1263,8 +1458,18 @@ function scheduleOnce(at, callback) {
1263
1458
  }
1264
1459
  };
1265
1460
  }
1266
- function scheduleInterval(intervalMs, callback) {
1267
- if (intervalMs <= 0) throw new Error("intervalMs must be positive");
1461
+ function scheduleInterval(intervalOrOpts, callback) {
1462
+ let intervalMs;
1463
+ if (typeof intervalOrOpts === "number") {
1464
+ intervalMs = intervalOrOpts;
1465
+ } else if (intervalOrOpts.intervalMs !== void 0) {
1466
+ intervalMs = intervalOrOpts.intervalMs;
1467
+ } else if (intervalOrOpts.seconds !== void 0) {
1468
+ intervalMs = intervalOrOpts.seconds * 1e3;
1469
+ } else {
1470
+ throw new Error("scheduleInterval requires seconds or intervalMs");
1471
+ }
1472
+ if (intervalMs <= 0) throw new Error("interval must be positive");
1268
1473
  let cancelled = false;
1269
1474
  const wrapped = wrapCallback(callback);
1270
1475
  const timer = setInterval(() => {
@@ -1281,85 +1486,378 @@ function scheduleInterval(intervalMs, callback) {
1281
1486
  }
1282
1487
  };
1283
1488
  }
1284
- function makeHandle(jobId, task) {
1285
- let cancelled = false;
1286
- return {
1287
- jobId,
1288
- cancel() {
1289
- if (cancelled) return;
1290
- cancelled = true;
1291
- try {
1292
- task.stop();
1293
- } catch {
1294
- }
1295
- try {
1296
- task.destroy?.();
1297
- } catch {
1298
- }
1299
- },
1300
- get pending() {
1301
- return !cancelled;
1489
+
1490
+ // src/stt/deepgram.ts
1491
+ var STT = class extends DeepgramSTT {
1492
+ constructor(opts = {}) {
1493
+ const key = opts.apiKey ?? process.env.DEEPGRAM_API_KEY;
1494
+ if (!key) {
1495
+ throw new Error(
1496
+ "Deepgram STT requires an apiKey. Pass { apiKey: 'dg_...' } or set DEEPGRAM_API_KEY in the environment."
1497
+ );
1302
1498
  }
1303
- };
1304
- }
1499
+ super(
1500
+ key,
1501
+ opts.language ?? "en",
1502
+ opts.model ?? "nova-3",
1503
+ opts.encoding ?? "linear16",
1504
+ opts.sampleRate ?? 16e3,
1505
+ {
1506
+ endpointingMs: opts.endpointingMs ?? 150,
1507
+ utteranceEndMs: opts.utteranceEndMs === null ? null : opts.utteranceEndMs ?? 1e3,
1508
+ smartFormat: opts.smartFormat ?? true,
1509
+ interimResults: opts.interimResults ?? true,
1510
+ ...opts.vadEvents !== void 0 ? { vadEvents: opts.vadEvents } : {}
1511
+ }
1512
+ );
1513
+ }
1514
+ };
1305
1515
 
1306
- // src/providers/soniox-stt.ts
1307
- import WebSocket3 from "ws";
1308
- var SONIOX_WS_URL = "wss://stt-rt.soniox.com/transcribe-websocket";
1309
- var KEEPALIVE_MESSAGE = '{"type": "keepalive"}';
1310
- var END_TOKEN = "<end>";
1311
- var FINALIZED_TOKEN = "<fin>";
1312
- var KEEPALIVE_INTERVAL_MS = 5e3;
1313
- function isEndToken(token) {
1314
- return token.text === END_TOKEN || token.text === FINALIZED_TOKEN;
1516
+ // src/providers/whisper-stt.ts
1517
+ var OPENAI_TRANSCRIPTION_URL = "https://api.openai.com/v1/audio/transcriptions";
1518
+ var DEFAULT_BUFFER_SIZE = 16e3 * 2;
1519
+ function wrapPcmInWav(pcm, sampleRate = 16e3, channels = 1, bitsPerSample = 16) {
1520
+ const dataSize = pcm.length;
1521
+ const header = Buffer.alloc(44);
1522
+ header.write("RIFF", 0);
1523
+ header.writeUInt32LE(36 + dataSize, 4);
1524
+ header.write("WAVE", 8);
1525
+ header.write("fmt ", 12);
1526
+ header.writeUInt32LE(16, 16);
1527
+ header.writeUInt16LE(1, 20);
1528
+ header.writeUInt16LE(channels, 22);
1529
+ header.writeUInt32LE(sampleRate, 24);
1530
+ header.writeUInt32LE(sampleRate * channels * (bitsPerSample / 8), 28);
1531
+ header.writeUInt16LE(channels * (bitsPerSample / 8), 32);
1532
+ header.writeUInt16LE(bitsPerSample, 34);
1533
+ header.write("data", 36);
1534
+ header.writeUInt32LE(dataSize, 40);
1535
+ return Buffer.concat([header, pcm]);
1315
1536
  }
1316
- var TokenAccumulator = class {
1317
- text = "";
1318
- confSum = 0;
1319
- confCount = 0;
1320
- update(token) {
1321
- if (token.text) {
1322
- this.text += token.text;
1323
- }
1324
- if (typeof token.confidence === "number") {
1325
- this.confSum += token.confidence;
1326
- this.confCount += 1;
1327
- }
1537
+ var WhisperSTT = class _WhisperSTT {
1538
+ apiKey;
1539
+ model;
1540
+ language;
1541
+ bufferSize;
1542
+ buffer = Buffer.alloc(0);
1543
+ callbacks = [];
1544
+ running = false;
1545
+ pendingTranscriptions = [];
1546
+ constructor(apiKey, model = "whisper-1", language, bufferSize = DEFAULT_BUFFER_SIZE) {
1547
+ this.apiKey = apiKey;
1548
+ this.model = model;
1549
+ this.language = language;
1550
+ this.bufferSize = bufferSize;
1328
1551
  }
1329
- get confidence() {
1330
- return this.confCount === 0 ? 0 : this.confSum / this.confCount;
1552
+ /** Factory for Twilio calls — mulaw 8 kHz is transcoded upstream, so we still receive PCM 16-bit. */
1553
+ static forTwilio(apiKey, language = "en", model = "whisper-1") {
1554
+ return new _WhisperSTT(apiKey, model, language);
1331
1555
  }
1332
- reset() {
1333
- this.text = "";
1334
- this.confSum = 0;
1335
- this.confCount = 0;
1556
+ async connect() {
1557
+ this.running = true;
1558
+ this.buffer = Buffer.alloc(0);
1336
1559
  }
1337
- get raw() {
1338
- return { sum: this.confSum, count: this.confCount };
1560
+ sendAudio(audio) {
1561
+ if (!this.running) return;
1562
+ this.buffer = Buffer.concat([this.buffer, audio]);
1563
+ if (this.buffer.length >= this.bufferSize) {
1564
+ const pcm = this.buffer;
1565
+ this.buffer = Buffer.alloc(0);
1566
+ this.trackTranscription(this.transcribeBuffer(pcm));
1567
+ }
1568
+ }
1569
+ trackTranscription(promise) {
1570
+ const wrapped = promise.finally(() => {
1571
+ const idx = this.pendingTranscriptions.indexOf(wrapped);
1572
+ if (idx !== -1) this.pendingTranscriptions.splice(idx, 1);
1573
+ });
1574
+ this.pendingTranscriptions.push(wrapped);
1339
1575
  }
1340
- };
1341
- var SonioxSTT = class _SonioxSTT {
1342
- ws = null;
1343
- callbacks = [];
1344
- final = new TokenAccumulator();
1345
- keepaliveTimer = null;
1346
- apiKey;
1347
- model;
1348
- languageHints;
1349
- languageHintsStrict;
1350
- sampleRate;
1351
- numChannels;
1352
- enableSpeakerDiarization;
1353
- enableLanguageIdentification;
1354
- maxEndpointDelayMs;
1355
- clientReferenceId;
1356
- baseUrl;
1357
- constructor(apiKey, options = {}) {
1358
- if (!apiKey) {
1359
- throw new Error("Soniox apiKey is required");
1576
+ onTranscript(callback) {
1577
+ if (this.callbacks.length >= 10) {
1578
+ getLogger().warn("WhisperSTT: maximum of 10 onTranscript callbacks reached; replacing the last callback.");
1579
+ this.callbacks[this.callbacks.length - 1] = callback;
1580
+ return;
1360
1581
  }
1361
- const maxEndpointDelayMs = options.maxEndpointDelayMs ?? 500;
1362
- if (maxEndpointDelayMs < 500 || maxEndpointDelayMs > 3e3) {
1582
+ this.callbacks.push(callback);
1583
+ }
1584
+ async close() {
1585
+ this.running = false;
1586
+ if (this.buffer.length >= this.bufferSize / 4) {
1587
+ const pcm = this.buffer;
1588
+ this.buffer = Buffer.alloc(0);
1589
+ this.trackTranscription(this.transcribeBuffer(pcm));
1590
+ } else {
1591
+ this.buffer = Buffer.alloc(0);
1592
+ }
1593
+ await Promise.allSettled(this.pendingTranscriptions);
1594
+ this.callbacks = [];
1595
+ }
1596
+ // ------------------------------------------------------------------
1597
+ // Private
1598
+ // ------------------------------------------------------------------
1599
+ async transcribeBuffer(pcm) {
1600
+ const wav = wrapPcmInWav(pcm);
1601
+ const formData = new FormData();
1602
+ formData.append("file", new Blob([wav.buffer.slice(wav.byteOffset, wav.byteOffset + wav.byteLength)], { type: "audio/wav" }), "audio.wav");
1603
+ formData.append("model", this.model);
1604
+ if (this.language) {
1605
+ formData.append("language", this.language);
1606
+ }
1607
+ try {
1608
+ const resp = await fetch(OPENAI_TRANSCRIPTION_URL, {
1609
+ method: "POST",
1610
+ headers: { Authorization: `Bearer ${this.apiKey}` },
1611
+ body: formData,
1612
+ signal: AbortSignal.timeout(15e3)
1613
+ });
1614
+ if (!resp.ok) {
1615
+ const body = await resp.text();
1616
+ getLogger().error(`WhisperSTT transcription error: ${resp.status} ${body}`);
1617
+ return;
1618
+ }
1619
+ const json = await resp.json();
1620
+ const text = (json.text ?? "").trim();
1621
+ if (!text) return;
1622
+ const transcript = {
1623
+ text,
1624
+ isFinal: true,
1625
+ confidence: 1
1626
+ };
1627
+ for (const cb of this.callbacks) {
1628
+ cb(transcript);
1629
+ }
1630
+ } catch (err) {
1631
+ getLogger().error(`WhisperSTT transcription error: ${String(err)}`);
1632
+ }
1633
+ }
1634
+ };
1635
+
1636
+ // src/stt/whisper.ts
1637
+ var STT2 = class extends WhisperSTT {
1638
+ constructor(opts = {}) {
1639
+ const key = opts.apiKey ?? process.env.OPENAI_API_KEY;
1640
+ if (!key) {
1641
+ throw new Error(
1642
+ "Whisper STT requires an apiKey. Pass { apiKey: 'sk-...' } or set OPENAI_API_KEY in the environment."
1643
+ );
1644
+ }
1645
+ super(key, opts.model ?? "whisper-1", opts.language, opts.bufferSize);
1646
+ }
1647
+ };
1648
+
1649
+ // src/providers/cartesia-stt.ts
1650
+ import WebSocket3 from "ws";
1651
+ var DEFAULT_BASE_URL = "https://api.cartesia.ai";
1652
+ var API_VERSION = "2025-04-16";
1653
+ var USER_AGENT = "Patter/1.0 (integration=LiveKit-port; provider=Cartesia)";
1654
+ var KEEPALIVE_INTERVAL_MS = 3e4;
1655
+ var CONNECT_TIMEOUT_MS = 1e4;
1656
+ var MAX_CALLBACKS = 10;
1657
+ var CartesiaSTT = class {
1658
+ constructor(apiKey, options = {}) {
1659
+ this.apiKey = apiKey;
1660
+ this.options = options;
1661
+ if (!apiKey) {
1662
+ throw new Error("CartesiaSTT requires a non-empty apiKey");
1663
+ }
1664
+ }
1665
+ ws = null;
1666
+ callbacks = [];
1667
+ keepaliveTimer = null;
1668
+ /** Cartesia request id — set from the server transcript events. */
1669
+ requestId = "";
1670
+ buildWsUrl() {
1671
+ const opts = this.options;
1672
+ const rawBase = opts.baseUrl ?? DEFAULT_BASE_URL;
1673
+ let base;
1674
+ if (rawBase.startsWith("http://")) {
1675
+ base = `ws://${rawBase.slice("http://".length)}`;
1676
+ } else if (rawBase.startsWith("https://")) {
1677
+ base = `wss://${rawBase.slice("https://".length)}`;
1678
+ } else if (rawBase.startsWith("ws://") || rawBase.startsWith("wss://")) {
1679
+ base = rawBase;
1680
+ } else {
1681
+ base = `wss://${rawBase}`;
1682
+ }
1683
+ const language = opts.language ?? "en";
1684
+ const params = new URLSearchParams({
1685
+ model: opts.model ?? "ink-whisper",
1686
+ sample_rate: String(opts.sampleRate ?? 16e3),
1687
+ encoding: opts.encoding ?? "pcm_s16le",
1688
+ cartesia_version: API_VERSION,
1689
+ api_key: this.apiKey,
1690
+ language
1691
+ });
1692
+ return `${base}/stt/websocket?${params.toString()}`;
1693
+ }
1694
+ async connect() {
1695
+ const url = this.buildWsUrl();
1696
+ this.ws = new WebSocket3(url, {
1697
+ headers: { "User-Agent": USER_AGENT }
1698
+ });
1699
+ await new Promise((resolve, reject) => {
1700
+ const timer = setTimeout(
1701
+ () => reject(new Error("Cartesia STT connect timeout")),
1702
+ CONNECT_TIMEOUT_MS
1703
+ );
1704
+ this.ws.once("open", () => {
1705
+ clearTimeout(timer);
1706
+ resolve();
1707
+ });
1708
+ this.ws.once("error", (err) => {
1709
+ clearTimeout(timer);
1710
+ reject(err);
1711
+ });
1712
+ });
1713
+ this.ws.on("message", (raw) => {
1714
+ let event;
1715
+ try {
1716
+ event = JSON.parse(raw.toString());
1717
+ } catch {
1718
+ return;
1719
+ }
1720
+ this.handleEvent(event);
1721
+ });
1722
+ this.keepaliveTimer = setInterval(() => {
1723
+ if (this.ws && this.ws.readyState === WebSocket3.OPEN) {
1724
+ try {
1725
+ this.ws.ping();
1726
+ } catch {
1727
+ }
1728
+ }
1729
+ }, KEEPALIVE_INTERVAL_MS);
1730
+ }
1731
+ handleEvent(event) {
1732
+ const type = event.type;
1733
+ if (type === "transcript") {
1734
+ const text = (event.text ?? "").trim();
1735
+ const isFinal = Boolean(event.is_final);
1736
+ if (!text && !isFinal) return;
1737
+ if (event.request_id) {
1738
+ this.requestId = event.request_id;
1739
+ }
1740
+ if (!text) return;
1741
+ const confidence = Number(event.probability ?? 1);
1742
+ this.emit({ text, isFinal, confidence });
1743
+ return;
1744
+ }
1745
+ if (type === "error") {
1746
+ getLogger().error(`Cartesia STT error: ${event.message ?? "unknown"}`);
1747
+ return;
1748
+ }
1749
+ }
1750
+ emit(transcript) {
1751
+ for (const cb of this.callbacks) {
1752
+ cb(transcript);
1753
+ }
1754
+ }
1755
+ sendAudio(audio) {
1756
+ if (!this.ws || this.ws.readyState !== WebSocket3.OPEN) return;
1757
+ this.ws.send(audio);
1758
+ }
1759
+ onTranscript(callback) {
1760
+ if (this.callbacks.length >= MAX_CALLBACKS) {
1761
+ getLogger().warn(
1762
+ "CartesiaSTT: maximum of 10 onTranscript callbacks reached; replacing the last callback."
1763
+ );
1764
+ this.callbacks[this.callbacks.length - 1] = callback;
1765
+ return;
1766
+ }
1767
+ this.callbacks.push(callback);
1768
+ }
1769
+ close() {
1770
+ if (this.keepaliveTimer) {
1771
+ clearInterval(this.keepaliveTimer);
1772
+ this.keepaliveTimer = null;
1773
+ }
1774
+ if (this.ws) {
1775
+ try {
1776
+ this.ws.send("finalize");
1777
+ } catch {
1778
+ }
1779
+ this.ws.close();
1780
+ this.ws = null;
1781
+ }
1782
+ }
1783
+ };
1784
+
1785
+ // src/stt/cartesia.ts
1786
+ var STT3 = class extends CartesiaSTT {
1787
+ constructor(opts = {}) {
1788
+ const key = opts.apiKey ?? process.env.CARTESIA_API_KEY;
1789
+ if (!key) {
1790
+ throw new Error(
1791
+ "Cartesia STT requires an apiKey. Pass { apiKey: '...' } or set CARTESIA_API_KEY in the environment."
1792
+ );
1793
+ }
1794
+ super(key, {
1795
+ model: opts.model,
1796
+ language: opts.language,
1797
+ encoding: opts.encoding,
1798
+ sampleRate: opts.sampleRate,
1799
+ baseUrl: opts.baseUrl
1800
+ });
1801
+ }
1802
+ };
1803
+
1804
+ // src/providers/soniox-stt.ts
1805
+ import WebSocket4 from "ws";
1806
+ var SONIOX_WS_URL = "wss://stt-rt.soniox.com/transcribe-websocket";
1807
+ var KEEPALIVE_MESSAGE = '{"type": "keepalive"}';
1808
+ var END_TOKEN = "<end>";
1809
+ var FINALIZED_TOKEN = "<fin>";
1810
+ var KEEPALIVE_INTERVAL_MS2 = 5e3;
1811
+ function isEndToken(token) {
1812
+ return token.text === END_TOKEN || token.text === FINALIZED_TOKEN;
1813
+ }
1814
+ var TokenAccumulator = class {
1815
+ text = "";
1816
+ confSum = 0;
1817
+ confCount = 0;
1818
+ update(token) {
1819
+ if (token.text) {
1820
+ this.text += token.text;
1821
+ }
1822
+ if (typeof token.confidence === "number") {
1823
+ this.confSum += token.confidence;
1824
+ this.confCount += 1;
1825
+ }
1826
+ }
1827
+ get confidence() {
1828
+ return this.confCount === 0 ? 0 : this.confSum / this.confCount;
1829
+ }
1830
+ reset() {
1831
+ this.text = "";
1832
+ this.confSum = 0;
1833
+ this.confCount = 0;
1834
+ }
1835
+ get raw() {
1836
+ return { sum: this.confSum, count: this.confCount };
1837
+ }
1838
+ };
1839
+ var SonioxSTT = class _SonioxSTT {
1840
+ ws = null;
1841
+ callbacks = [];
1842
+ final = new TokenAccumulator();
1843
+ keepaliveTimer = null;
1844
+ apiKey;
1845
+ model;
1846
+ languageHints;
1847
+ languageHintsStrict;
1848
+ sampleRate;
1849
+ numChannels;
1850
+ enableSpeakerDiarization;
1851
+ enableLanguageIdentification;
1852
+ maxEndpointDelayMs;
1853
+ clientReferenceId;
1854
+ baseUrl;
1855
+ constructor(apiKey, options = {}) {
1856
+ if (!apiKey) {
1857
+ throw new Error("Soniox apiKey is required");
1858
+ }
1859
+ const maxEndpointDelayMs = options.maxEndpointDelayMs ?? 500;
1860
+ if (maxEndpointDelayMs < 500 || maxEndpointDelayMs > 3e3) {
1363
1861
  throw new Error("maxEndpointDelayMs must be between 500 and 3000");
1364
1862
  }
1365
1863
  this.apiKey = apiKey;
@@ -1400,7 +1898,7 @@ var SonioxSTT = class _SonioxSTT {
1400
1898
  return config;
1401
1899
  }
1402
1900
  async connect() {
1403
- this.ws = new WebSocket3(this.baseUrl);
1901
+ this.ws = new WebSocket4(this.baseUrl);
1404
1902
  await new Promise((resolve, reject) => {
1405
1903
  const timer = setTimeout(() => reject(new Error("Soniox connect timeout")), 1e4);
1406
1904
  this.ws.once("open", () => {
@@ -1419,13 +1917,13 @@ var SonioxSTT = class _SonioxSTT {
1419
1917
  getLogger().error(`SonioxSTT WebSocket error: ${String(err)}`);
1420
1918
  });
1421
1919
  this.keepaliveTimer = setInterval(() => {
1422
- if (this.ws && this.ws.readyState === WebSocket3.OPEN) {
1920
+ if (this.ws && this.ws.readyState === WebSocket4.OPEN) {
1423
1921
  try {
1424
1922
  this.ws.send(KEEPALIVE_MESSAGE);
1425
1923
  } catch {
1426
1924
  }
1427
1925
  }
1428
- }, KEEPALIVE_INTERVAL_MS);
1926
+ }, KEEPALIVE_INTERVAL_MS2);
1429
1927
  }
1430
1928
  clearKeepalive() {
1431
1929
  if (this.keepaliveTimer) {
@@ -1492,7 +1990,7 @@ var SonioxSTT = class _SonioxSTT {
1492
1990
  }
1493
1991
  }
1494
1992
  sendAudio(audio) {
1495
- if (!this.ws || this.ws.readyState !== WebSocket3.OPEN) return;
1993
+ if (!this.ws || this.ws.readyState !== WebSocket4.OPEN) return;
1496
1994
  if (audio.length === 0) return;
1497
1995
  this.ws.send(audio);
1498
1996
  }
@@ -1522,12 +2020,27 @@ var SonioxSTT = class _SonioxSTT {
1522
2020
  }
1523
2021
  };
1524
2022
 
2023
+ // src/stt/soniox.ts
2024
+ var STT4 = class extends SonioxSTT {
2025
+ constructor(opts = {}) {
2026
+ const key = opts.apiKey ?? process.env.SONIOX_API_KEY;
2027
+ if (!key) {
2028
+ throw new Error(
2029
+ "Soniox STT requires an apiKey. Pass { apiKey: '...' } or set SONIOX_API_KEY in the environment."
2030
+ );
2031
+ }
2032
+ const { apiKey: _ignored, ...rest } = opts;
2033
+ void _ignored;
2034
+ super(key, rest);
2035
+ }
2036
+ };
2037
+
1525
2038
  // src/providers/assemblyai-stt.ts
1526
- import WebSocket4 from "ws";
1527
- var DEFAULT_BASE_URL = "wss://streaming.assemblyai.com";
2039
+ import WebSocket5 from "ws";
2040
+ var DEFAULT_BASE_URL2 = "wss://streaming.assemblyai.com";
1528
2041
  var DEFAULT_MIN_TURN_SILENCE_MS = 100;
1529
- var CONNECT_TIMEOUT_MS = 1e4;
1530
- var MAX_CALLBACKS = 10;
2042
+ var CONNECT_TIMEOUT_MS2 = 1e4;
2043
+ var MAX_CALLBACKS2 = 10;
1531
2044
  var AssemblyAISTT = class _AssemblyAISTT {
1532
2045
  constructor(apiKey, options = {}) {
1533
2046
  this.apiKey = apiKey;
@@ -1582,175 +2095,29 @@ var AssemblyAISTT = class _AssemblyAISTT {
1582
2095
  domain: opts.domain
1583
2096
  };
1584
2097
  const params = new URLSearchParams();
1585
- for (const [key, value] of Object.entries(raw)) {
1586
- if (value === void 0 || value === null) continue;
1587
- if (typeof value === "boolean") {
1588
- params.set(key, value ? "true" : "false");
1589
- } else {
1590
- params.set(key, String(value));
1591
- }
1592
- }
1593
- const base = opts.baseUrl ?? DEFAULT_BASE_URL;
1594
- return `${base}/v3/ws?${params.toString()}`;
1595
- }
1596
- async connect() {
1597
- const url = this.buildUrl();
1598
- this.ws = new WebSocket4(url, {
1599
- headers: {
1600
- Authorization: this.apiKey,
1601
- "Content-Type": "application/json",
1602
- "User-Agent": "Patter/1.0 (integration=LiveKit-port)"
1603
- }
1604
- });
1605
- await new Promise((resolve, reject) => {
1606
- const timer = setTimeout(
1607
- () => reject(new Error("AssemblyAI connect timeout")),
1608
- CONNECT_TIMEOUT_MS
1609
- );
1610
- this.ws.once("open", () => {
1611
- clearTimeout(timer);
1612
- resolve();
1613
- });
1614
- this.ws.once("error", (err) => {
1615
- clearTimeout(timer);
1616
- reject(err);
1617
- });
1618
- });
1619
- this.ws.on("message", (raw) => {
1620
- let event;
1621
- try {
1622
- event = JSON.parse(raw.toString());
1623
- } catch {
1624
- return;
1625
- }
1626
- this.handleEvent(event);
1627
- });
1628
- }
1629
- handleEvent(event) {
1630
- const type = event.type;
1631
- if (type === "Begin") {
1632
- this.sessionId = event.id ?? "";
1633
- this.expiresAt = event.expires_at ?? 0;
1634
- return;
1635
- }
1636
- if (type !== "Turn") {
1637
- return;
1638
- }
1639
- const endOfTurn = Boolean(event.end_of_turn);
1640
- const turnIsFormatted = Boolean(event.turn_is_formatted);
1641
- const words = event.words ?? [];
1642
- const transcriptText = (event.transcript ?? "").trim();
1643
- if (endOfTurn) {
1644
- if (this.options.formatTurns && !turnIsFormatted) return;
1645
- if (!transcriptText) return;
1646
- this.emit({
1647
- text: transcriptText,
1648
- isFinal: true,
1649
- confidence: averageConfidence(words)
1650
- });
1651
- return;
1652
- }
1653
- if (!words.length) return;
1654
- const interim = words.map((w) => (w.text ?? "").trim()).filter(Boolean).join(" ");
1655
- if (!interim) return;
1656
- this.emit({
1657
- text: interim,
1658
- isFinal: false,
1659
- confidence: averageConfidence(words)
1660
- });
1661
- }
1662
- emit(transcript) {
1663
- for (const cb of this.callbacks) {
1664
- cb(transcript);
1665
- }
1666
- }
1667
- sendAudio(audio) {
1668
- if (!this.ws || this.ws.readyState !== WebSocket4.OPEN) return;
1669
- this.ws.send(audio);
1670
- }
1671
- onTranscript(callback) {
1672
- if (this.callbacks.length >= MAX_CALLBACKS) {
1673
- getLogger().warn(
1674
- "AssemblyAISTT: maximum of 10 onTranscript callbacks reached; replacing the last callback."
1675
- );
1676
- this.callbacks[this.callbacks.length - 1] = callback;
1677
- return;
1678
- }
1679
- this.callbacks.push(callback);
1680
- }
1681
- close() {
1682
- if (this.ws) {
1683
- try {
1684
- this.ws.send(JSON.stringify({ type: "Terminate" }));
1685
- } catch {
1686
- }
1687
- this.ws.close();
1688
- this.ws = null;
1689
- }
1690
- }
1691
- };
1692
- function averageConfidence(words) {
1693
- if (!words.length) return 0;
1694
- let total = 0;
1695
- for (const w of words) {
1696
- total += Number(w.confidence ?? 0);
1697
- }
1698
- return total / words.length;
1699
- }
1700
-
1701
- // src/providers/cartesia-stt.ts
1702
- import WebSocket5 from "ws";
1703
- var DEFAULT_BASE_URL2 = "https://api.cartesia.ai";
1704
- var API_VERSION = "2025-04-16";
1705
- var USER_AGENT = "Patter/1.0 (integration=LiveKit-port; provider=Cartesia)";
1706
- var KEEPALIVE_INTERVAL_MS2 = 3e4;
1707
- var CONNECT_TIMEOUT_MS2 = 1e4;
1708
- var MAX_CALLBACKS2 = 10;
1709
- var CartesiaSTT = class {
1710
- constructor(apiKey, options = {}) {
1711
- this.apiKey = apiKey;
1712
- this.options = options;
1713
- if (!apiKey) {
1714
- throw new Error("CartesiaSTT requires a non-empty apiKey");
1715
- }
1716
- }
1717
- ws = null;
1718
- callbacks = [];
1719
- keepaliveTimer = null;
1720
- /** Cartesia request id — set from the server transcript events. */
1721
- requestId = "";
1722
- buildWsUrl() {
1723
- const opts = this.options;
1724
- const rawBase = opts.baseUrl ?? DEFAULT_BASE_URL2;
1725
- let base;
1726
- if (rawBase.startsWith("http://")) {
1727
- base = `ws://${rawBase.slice("http://".length)}`;
1728
- } else if (rawBase.startsWith("https://")) {
1729
- base = `wss://${rawBase.slice("https://".length)}`;
1730
- } else if (rawBase.startsWith("ws://") || rawBase.startsWith("wss://")) {
1731
- base = rawBase;
1732
- } else {
1733
- base = `wss://${rawBase}`;
1734
- }
1735
- const language = opts.language ?? "en";
1736
- const params = new URLSearchParams({
1737
- model: opts.model ?? "ink-whisper",
1738
- sample_rate: String(opts.sampleRate ?? 16e3),
1739
- encoding: opts.encoding ?? "pcm_s16le",
1740
- cartesia_version: API_VERSION,
1741
- api_key: this.apiKey,
1742
- language
1743
- });
1744
- return `${base}/stt/websocket?${params.toString()}`;
2098
+ for (const [key, value] of Object.entries(raw)) {
2099
+ if (value === void 0 || value === null) continue;
2100
+ if (typeof value === "boolean") {
2101
+ params.set(key, value ? "true" : "false");
2102
+ } else {
2103
+ params.set(key, String(value));
2104
+ }
2105
+ }
2106
+ const base = opts.baseUrl ?? DEFAULT_BASE_URL2;
2107
+ return `${base}/v3/ws?${params.toString()}`;
1745
2108
  }
1746
2109
  async connect() {
1747
- const url = this.buildWsUrl();
2110
+ const url = this.buildUrl();
1748
2111
  this.ws = new WebSocket5(url, {
1749
- headers: { "User-Agent": USER_AGENT }
2112
+ headers: {
2113
+ Authorization: this.apiKey,
2114
+ "Content-Type": "application/json",
2115
+ "User-Agent": "Patter/1.0 (integration=LiveKit-port)"
2116
+ }
1750
2117
  });
1751
2118
  await new Promise((resolve, reject) => {
1752
2119
  const timer = setTimeout(
1753
- () => reject(new Error("Cartesia STT connect timeout")),
2120
+ () => reject(new Error("AssemblyAI connect timeout")),
1754
2121
  CONNECT_TIMEOUT_MS2
1755
2122
  );
1756
2123
  this.ws.once("open", () => {
@@ -1771,33 +2138,39 @@ var CartesiaSTT = class {
1771
2138
  }
1772
2139
  this.handleEvent(event);
1773
2140
  });
1774
- this.keepaliveTimer = setInterval(() => {
1775
- if (this.ws && this.ws.readyState === WebSocket5.OPEN) {
1776
- try {
1777
- this.ws.ping();
1778
- } catch {
1779
- }
1780
- }
1781
- }, KEEPALIVE_INTERVAL_MS2);
1782
2141
  }
1783
2142
  handleEvent(event) {
1784
2143
  const type = event.type;
1785
- if (type === "transcript") {
1786
- const text = (event.text ?? "").trim();
1787
- const isFinal = Boolean(event.is_final);
1788
- if (!text && !isFinal) return;
1789
- if (event.request_id) {
1790
- this.requestId = event.request_id;
1791
- }
1792
- if (!text) return;
1793
- const confidence = Number(event.probability ?? 1);
1794
- this.emit({ text, isFinal, confidence });
2144
+ if (type === "Begin") {
2145
+ this.sessionId = event.id ?? "";
2146
+ this.expiresAt = event.expires_at ?? 0;
1795
2147
  return;
1796
2148
  }
1797
- if (type === "error") {
1798
- getLogger().error(`Cartesia STT error: ${event.message ?? "unknown"}`);
2149
+ if (type !== "Turn") {
2150
+ return;
2151
+ }
2152
+ const endOfTurn = Boolean(event.end_of_turn);
2153
+ const turnIsFormatted = Boolean(event.turn_is_formatted);
2154
+ const words = event.words ?? [];
2155
+ const transcriptText = (event.transcript ?? "").trim();
2156
+ if (endOfTurn) {
2157
+ if (this.options.formatTurns && !turnIsFormatted) return;
2158
+ if (!transcriptText) return;
2159
+ this.emit({
2160
+ text: transcriptText,
2161
+ isFinal: true,
2162
+ confidence: averageConfidence(words)
2163
+ });
1799
2164
  return;
1800
2165
  }
2166
+ if (!words.length) return;
2167
+ const interim = words.map((w) => (w.text ?? "").trim()).filter(Boolean).join(" ");
2168
+ if (!interim) return;
2169
+ this.emit({
2170
+ text: interim,
2171
+ isFinal: false,
2172
+ confidence: averageConfidence(words)
2173
+ });
1801
2174
  }
1802
2175
  emit(transcript) {
1803
2176
  for (const cb of this.callbacks) {
@@ -1811,7 +2184,7 @@ var CartesiaSTT = class {
1811
2184
  onTranscript(callback) {
1812
2185
  if (this.callbacks.length >= MAX_CALLBACKS2) {
1813
2186
  getLogger().warn(
1814
- "CartesiaSTT: maximum of 10 onTranscript callbacks reached; replacing the last callback."
2187
+ "AssemblyAISTT: maximum of 10 onTranscript callbacks reached; replacing the last callback."
1815
2188
  );
1816
2189
  this.callbacks[this.callbacks.length - 1] = callback;
1817
2190
  return;
@@ -1819,13 +2192,9 @@ var CartesiaSTT = class {
1819
2192
  this.callbacks.push(callback);
1820
2193
  }
1821
2194
  close() {
1822
- if (this.keepaliveTimer) {
1823
- clearInterval(this.keepaliveTimer);
1824
- this.keepaliveTimer = null;
1825
- }
1826
2195
  if (this.ws) {
1827
2196
  try {
1828
- this.ws.send("finalize");
2197
+ this.ws.send(JSON.stringify({ type: "Terminate" }));
1829
2198
  } catch {
1830
2199
  }
1831
2200
  this.ws.close();
@@ -1833,6 +2202,305 @@ var CartesiaSTT = class {
1833
2202
  }
1834
2203
  }
1835
2204
  };
2205
+ function averageConfidence(words) {
2206
+ if (!words.length) return 0;
2207
+ let total = 0;
2208
+ for (const w of words) {
2209
+ total += Number(w.confidence ?? 0);
2210
+ }
2211
+ return total / words.length;
2212
+ }
2213
+
2214
+ // src/stt/assemblyai.ts
2215
+ var STT5 = class extends AssemblyAISTT {
2216
+ constructor(opts = {}) {
2217
+ const key = opts.apiKey ?? process.env.ASSEMBLYAI_API_KEY;
2218
+ if (!key) {
2219
+ throw new Error(
2220
+ "AssemblyAI STT requires an apiKey. Pass { apiKey: '...' } or set ASSEMBLYAI_API_KEY in the environment."
2221
+ );
2222
+ }
2223
+ const { apiKey: _ignored, ...rest } = opts;
2224
+ void _ignored;
2225
+ super(key, rest);
2226
+ }
2227
+ };
2228
+
2229
+ // src/providers/elevenlabs-tts.ts
2230
+ var ELEVENLABS_BASE_URL = "https://api.elevenlabs.io/v1";
2231
+ var ELEVENLABS_VOICE_ID_BY_NAME = {
2232
+ rachel: "21m00Tcm4TlvDq8ikWAM",
2233
+ drew: "29vD33N1CtxCmqQRPOHJ",
2234
+ clyde: "2EiwWnXFnvU5JabPnv8n",
2235
+ paul: "5Q0t7uMcjvnagumLfvZi",
2236
+ domi: "AZnzlk1XvdvUeBnXmlld",
2237
+ dave: "CYw3kZ02Hs0563khs1Fj",
2238
+ fin: "D38z5RcWu1voky8WS1ja",
2239
+ bella: "EXAVITQu4vr4xnSDxMaL",
2240
+ antoni: "ErXwobaYiN019PkySvjV",
2241
+ thomas: "GBv7mTt0atIp3Br8iCZE",
2242
+ charlie: "IKne3meq5aSn9XLyUdCD",
2243
+ george: "JBFqnCBsd6RMkjVDRZzb",
2244
+ emily: "LcfcDJNUP1GQjkzn1xUU",
2245
+ elli: "MF3mGyEYCl7XYWbV9V6O",
2246
+ callum: "N2lVS1w4EtoT3dr4eOWO",
2247
+ patrick: "ODq5zmih8GrVes37Dizd",
2248
+ harry: "SOYHLrjzK2X1ezoPC6cr",
2249
+ liam: "TX3LPaxmHKxFdv7VOQHJ",
2250
+ dorothy: "ThT5KcBeYPX3keUQqHPh",
2251
+ josh: "TxGEqnHWrfWFTfGW9XjX",
2252
+ arnold: "VR6AewLTigWG4xSOukaG",
2253
+ charlotte: "XB0fDUnXU5powFXDhCwa",
2254
+ matilda: "XrExE9yKIg1WjnnlVkGX",
2255
+ matthew: "Yko7PKHZNXotIFUBG7I9",
2256
+ james: "ZQe5CZNOzWyzPSCn5a3c",
2257
+ joseph: "Zlb1dXrM653N07WRdFW3",
2258
+ jeremy: "bVMeCyTHy58xNoL34h3p",
2259
+ michael: "flq6f7yk4E4fJM5XTYuZ",
2260
+ ethan: "g5CIjZEefAph4nQFvHAz",
2261
+ gigi: "jBpfuIE2acCO8z3wKNLl",
2262
+ freya: "jsCqWAovK2LkecY7zXl4",
2263
+ brian: "nPczCjzI2devNBz1zQrb",
2264
+ grace: "oWAxZDx7w5VEj9dCyTzz",
2265
+ daniel: "onwK4e9ZLuTAKqWW03F9",
2266
+ lily: "pFZP5JQG7iQjIQuC4Bku",
2267
+ serena: "pMsXgVXv3BLzUgSXRplE",
2268
+ adam: "pNInz6obpgDQGcFmaJgB",
2269
+ nicole: "piTKgcLEGmPE4e6mEKli",
2270
+ bill: "pqHfZKP75CvOlQylNhV4",
2271
+ jessie: "t0jbNlBVZ17f02VDIeMI",
2272
+ ryan: "wViXBPUzp2ZZixB1xQuM",
2273
+ sam: "yoZ06aMxZJJ28mfd3POQ",
2274
+ glinda: "z9fAnlkpzviPz146aGWa",
2275
+ giovanni: "zcAOhNBS3c14rBihAFp1",
2276
+ mimi: "zrHiDhphv9ZnVXBqCLjz",
2277
+ alloy: "21m00Tcm4TlvDq8ikWAM"
2278
+ };
2279
+ var VOICE_ID_PATTERN = /^[A-Za-z0-9]{20}$/;
2280
+ function resolveVoiceId(voice) {
2281
+ if (!voice) return voice;
2282
+ if (VOICE_ID_PATTERN.test(voice)) return voice;
2283
+ return ELEVENLABS_VOICE_ID_BY_NAME[voice.toLowerCase()] ?? voice;
2284
+ }
2285
+ var ElevenLabsTTS = class {
2286
+ constructor(apiKey, voiceId = "21m00Tcm4TlvDq8ikWAM", modelId = "eleven_turbo_v2_5", outputFormat = "pcm_16000") {
2287
+ this.apiKey = apiKey;
2288
+ this.modelId = modelId;
2289
+ this.outputFormat = outputFormat;
2290
+ this.voiceId = resolveVoiceId(voiceId);
2291
+ }
2292
+ voiceId;
2293
+ /**
2294
+ * Synthesise text to speech and return the full audio as a single Buffer.
2295
+ *
2296
+ * For large chunks (or when latency matters) call `synthesizeStream` instead.
2297
+ */
2298
+ async synthesize(text) {
2299
+ const chunks = [];
2300
+ for await (const chunk of this.synthesizeStream(text)) {
2301
+ chunks.push(chunk);
2302
+ }
2303
+ return Buffer.concat(chunks);
2304
+ }
2305
+ /**
2306
+ * Synthesise text and yield audio chunks as they arrive (streaming).
2307
+ *
2308
+ * The yielded buffers are raw PCM at 16 kHz (or whatever `outputFormat` is
2309
+ * configured to).
2310
+ */
2311
+ async *synthesizeStream(text) {
2312
+ const url = `${ELEVENLABS_BASE_URL}/text-to-speech/${encodeURIComponent(this.voiceId)}/stream?output_format=${encodeURIComponent(this.outputFormat)}`;
2313
+ const response = await fetch(url, {
2314
+ method: "POST",
2315
+ headers: {
2316
+ "xi-api-key": this.apiKey,
2317
+ "Content-Type": "application/json"
2318
+ },
2319
+ body: JSON.stringify({ text, model_id: this.modelId }),
2320
+ signal: AbortSignal.timeout(3e4)
2321
+ });
2322
+ if (!response.ok) {
2323
+ const body = await response.text();
2324
+ throw new Error(`ElevenLabs TTS error ${response.status}: ${body}`);
2325
+ }
2326
+ if (!response.body) {
2327
+ throw new Error("ElevenLabs TTS: no response body");
2328
+ }
2329
+ const reader = response.body.getReader();
2330
+ try {
2331
+ while (true) {
2332
+ const { done, value } = await reader.read();
2333
+ if (done) break;
2334
+ if (value && value.length > 0) {
2335
+ yield Buffer.from(value);
2336
+ }
2337
+ }
2338
+ } finally {
2339
+ if (typeof reader.cancel === "function") await reader.cancel().catch(() => {
2340
+ });
2341
+ reader.releaseLock();
2342
+ }
2343
+ }
2344
+ };
2345
+
2346
+ // src/tts/elevenlabs.ts
2347
+ var TTS = class extends ElevenLabsTTS {
2348
+ constructor(opts = {}) {
2349
+ const key = opts.apiKey ?? process.env.ELEVENLABS_API_KEY;
2350
+ if (!key) {
2351
+ throw new Error(
2352
+ "ElevenLabs TTS requires an apiKey. Pass { apiKey: '...' } or set ELEVENLABS_API_KEY in the environment."
2353
+ );
2354
+ }
2355
+ super(
2356
+ key,
2357
+ opts.voiceId ?? "21m00Tcm4TlvDq8ikWAM",
2358
+ opts.modelId ?? "eleven_turbo_v2_5",
2359
+ opts.outputFormat ?? "pcm_16000"
2360
+ );
2361
+ }
2362
+ };
2363
+
2364
+ // src/providers/openai-tts.ts
2365
+ var OPENAI_TTS_URL = "https://api.openai.com/v1/audio/speech";
2366
+ var OpenAITTS = class _OpenAITTS {
2367
+ constructor(apiKey, voice = "alloy", model = "tts-1") {
2368
+ this.apiKey = apiKey;
2369
+ this.voice = voice;
2370
+ this.model = model;
2371
+ }
2372
+ /**
2373
+ * Synthesise text to speech and return the full audio as a single Buffer.
2374
+ *
2375
+ * For large chunks (or when latency matters) call `synthesizeStream` instead.
2376
+ */
2377
+ async synthesize(text) {
2378
+ const chunks = [];
2379
+ for await (const chunk of this.synthesizeStream(text)) {
2380
+ chunks.push(chunk);
2381
+ }
2382
+ return Buffer.concat(chunks);
2383
+ }
2384
+ /**
2385
+ * Synthesise text and yield audio chunks as they arrive (streaming).
2386
+ *
2387
+ * OpenAI returns 24 kHz PCM16; each chunk is resampled to 16 kHz before
2388
+ * yielding so the output is ready for telephony pipelines.
2389
+ *
2390
+ * The resampler carries state (buffered samples + odd trailing byte)
2391
+ * between chunks — without that state cross-chunk sample alignment drifts
2392
+ * and the caller hears pops / dropped audio (BUG #23, mirror of the
2393
+ * Python `audioop.ratecv` fix).
2394
+ */
2395
+ async *synthesizeStream(text) {
2396
+ const response = await fetch(OPENAI_TTS_URL, {
2397
+ method: "POST",
2398
+ headers: {
2399
+ "Authorization": `Bearer ${this.apiKey}`,
2400
+ "Content-Type": "application/json"
2401
+ },
2402
+ body: JSON.stringify({
2403
+ model: this.model,
2404
+ input: text,
2405
+ voice: this.voice,
2406
+ response_format: "pcm"
2407
+ }),
2408
+ signal: AbortSignal.timeout(3e4)
2409
+ });
2410
+ if (!response.ok) {
2411
+ const body = await response.text();
2412
+ throw new Error(`OpenAI TTS error ${response.status}: ${body}`);
2413
+ }
2414
+ if (!response.body) {
2415
+ throw new Error("OpenAI TTS: no response body");
2416
+ }
2417
+ const ctx = { carryByte: null, leftover: [] };
2418
+ const reader = response.body.getReader();
2419
+ try {
2420
+ while (true) {
2421
+ const { done, value } = await reader.read();
2422
+ if (done) break;
2423
+ if (value && value.length > 0) {
2424
+ const out = _OpenAITTS.resampleStreaming(Buffer.from(value), ctx);
2425
+ if (out.length > 0) yield out;
2426
+ }
2427
+ }
2428
+ if (ctx.leftover.length > 0) {
2429
+ const tail = Buffer.alloc(ctx.leftover.length * 2);
2430
+ for (let i = 0; i < ctx.leftover.length; i++) {
2431
+ tail.writeInt16LE(ctx.leftover[i], i * 2);
2432
+ }
2433
+ yield tail;
2434
+ }
2435
+ } finally {
2436
+ if (typeof reader.cancel === "function") await reader.cancel().catch(() => {
2437
+ });
2438
+ reader.releaseLock();
2439
+ }
2440
+ }
2441
+ /**
2442
+ * Streaming 24 kHz → 16 kHz resampler (PCM16-LE). Maintains cross-chunk
2443
+ * state so the 3:2 pattern doesn't reset at every network read.
2444
+ */
2445
+ static resampleStreaming(audio, ctx) {
2446
+ let buf;
2447
+ if (ctx.carryByte !== null) {
2448
+ buf = Buffer.concat([Buffer.from([ctx.carryByte]), audio]);
2449
+ ctx.carryByte = null;
2450
+ } else {
2451
+ buf = audio;
2452
+ }
2453
+ if (buf.length % 2 === 1) {
2454
+ ctx.carryByte = buf[buf.length - 1];
2455
+ buf = buf.subarray(0, buf.length - 1);
2456
+ }
2457
+ if (buf.length === 0 && ctx.leftover.length === 0) {
2458
+ return Buffer.alloc(0);
2459
+ }
2460
+ const sampleCount = buf.length / 2;
2461
+ const samples = ctx.leftover.slice();
2462
+ for (let i2 = 0; i2 < sampleCount; i2++) {
2463
+ samples.push(buf.readInt16LE(i2 * 2));
2464
+ }
2465
+ const out = [];
2466
+ let i = 0;
2467
+ while (i + 2 < samples.length) {
2468
+ out.push(samples[i]);
2469
+ out.push(Math.trunc((samples[i + 1] + samples[i + 2]) / 2));
2470
+ i += 3;
2471
+ }
2472
+ ctx.leftover = samples.slice(i);
2473
+ const buffer = Buffer.alloc(out.length * 2);
2474
+ for (let j = 0; j < out.length; j++) {
2475
+ buffer.writeInt16LE(out[j], j * 2);
2476
+ }
2477
+ return buffer;
2478
+ }
2479
+ /** @deprecated use {@link resampleStreaming} with persistent state. */
2480
+ static resample24kTo16k(audio) {
2481
+ const ctx = { carryByte: null, leftover: [] };
2482
+ const out = _OpenAITTS.resampleStreaming(audio, ctx);
2483
+ if (ctx.leftover.length === 0) return out;
2484
+ const tail = Buffer.alloc(ctx.leftover.length * 2);
2485
+ for (let i = 0; i < ctx.leftover.length; i++) {
2486
+ tail.writeInt16LE(ctx.leftover[i], i * 2);
2487
+ }
2488
+ return Buffer.concat([out, tail]);
2489
+ }
2490
+ };
2491
+
2492
+ // src/tts/openai.ts
2493
+ var TTS2 = class extends OpenAITTS {
2494
+ constructor(opts = {}) {
2495
+ const key = opts.apiKey ?? process.env.OPENAI_API_KEY;
2496
+ if (!key) {
2497
+ throw new Error(
2498
+ "OpenAI TTS requires an apiKey. Pass { apiKey: 'sk-...' } or set OPENAI_API_KEY in the environment."
2499
+ );
2500
+ }
2501
+ super(key, opts.voice ?? "alloy", opts.model ?? "tts-1");
2502
+ }
2503
+ };
1836
2504
 
1837
2505
  // src/providers/cartesia-tts.ts
1838
2506
  var CARTESIA_BASE_URL = "https://api.cartesia.ai";
@@ -1932,6 +2600,21 @@ var CartesiaTTS = class {
1932
2600
  }
1933
2601
  };
1934
2602
 
2603
+ // src/tts/cartesia.ts
2604
+ var TTS3 = class extends CartesiaTTS {
2605
+ constructor(opts = {}) {
2606
+ const key = opts.apiKey ?? process.env.CARTESIA_API_KEY;
2607
+ if (!key) {
2608
+ throw new Error(
2609
+ "Cartesia TTS requires an apiKey. Pass { apiKey: '...' } or set CARTESIA_API_KEY in the environment."
2610
+ );
2611
+ }
2612
+ const { apiKey: _ignored, ...rest } = opts;
2613
+ void _ignored;
2614
+ super(key, rest);
2615
+ }
2616
+ };
2617
+
1935
2618
  // src/providers/rime-tts.ts
1936
2619
  var RIME_BASE_URL = "https://users.rime.ai/v1/rime-tts";
1937
2620
  var ARCANA_MODEL_TIMEOUT_MS = 60 * 4 * 1e3;
@@ -2059,6 +2742,21 @@ var RimeTTS = class {
2059
2742
  }
2060
2743
  };
2061
2744
 
2745
+ // src/tts/rime.ts
2746
+ var TTS4 = class extends RimeTTS {
2747
+ constructor(opts = {}) {
2748
+ const key = opts.apiKey ?? process.env.RIME_API_KEY;
2749
+ if (!key) {
2750
+ throw new Error(
2751
+ "Rime TTS requires an apiKey. Pass { apiKey: '...' } or set RIME_API_KEY in the environment."
2752
+ );
2753
+ }
2754
+ const { apiKey: _ignored, ...rest } = opts;
2755
+ void _ignored;
2756
+ super(key, rest);
2757
+ }
2758
+ };
2759
+
2062
2760
  // src/providers/lmnt-tts.ts
2063
2761
  var LMNT_BASE_URL = "https://api.lmnt.com/v1/ai/speech/bytes";
2064
2762
  var LMNTTTS = class {
@@ -2137,6 +2835,119 @@ var LMNTTTS = class {
2137
2835
  }
2138
2836
  };
2139
2837
 
2838
+ // src/tts/lmnt.ts
2839
+ var TTS5 = class extends LMNTTTS {
2840
+ constructor(opts = {}) {
2841
+ const key = opts.apiKey ?? process.env.LMNT_API_KEY;
2842
+ if (!key) {
2843
+ throw new Error(
2844
+ "LMNT TTS requires an apiKey. Pass { apiKey: '...' } or set LMNT_API_KEY in the environment."
2845
+ );
2846
+ }
2847
+ const { apiKey: _ignored, ...rest } = opts;
2848
+ void _ignored;
2849
+ super(key, rest);
2850
+ }
2851
+ };
2852
+
2853
+ // src/carriers/twilio.ts
2854
+ var Carrier = class {
2855
+ kind = "twilio";
2856
+ accountSid;
2857
+ authToken;
2858
+ constructor(opts = {}) {
2859
+ const sid = opts.accountSid ?? process.env.TWILIO_ACCOUNT_SID;
2860
+ const tok = opts.authToken ?? process.env.TWILIO_AUTH_TOKEN;
2861
+ if (!sid) {
2862
+ throw new Error(
2863
+ "Twilio carrier requires accountSid. Pass { accountSid: 'AC...' } or set TWILIO_ACCOUNT_SID in the environment."
2864
+ );
2865
+ }
2866
+ if (!tok) {
2867
+ throw new Error(
2868
+ "Twilio carrier requires authToken. Pass { authToken: '...' } or set TWILIO_AUTH_TOKEN in the environment."
2869
+ );
2870
+ }
2871
+ this.accountSid = sid;
2872
+ this.authToken = tok;
2873
+ }
2874
+ };
2875
+
2876
+ // src/carriers/telnyx.ts
2877
+ var Carrier2 = class {
2878
+ kind = "telnyx";
2879
+ apiKey;
2880
+ connectionId;
2881
+ publicKey;
2882
+ constructor(opts = {}) {
2883
+ const key = opts.apiKey ?? process.env.TELNYX_API_KEY;
2884
+ const conn = opts.connectionId ?? process.env.TELNYX_CONNECTION_ID;
2885
+ const pub = opts.publicKey ?? process.env.TELNYX_PUBLIC_KEY;
2886
+ if (!key) {
2887
+ throw new Error(
2888
+ "Telnyx carrier requires apiKey. Pass { apiKey: '...' } or set TELNYX_API_KEY in the environment."
2889
+ );
2890
+ }
2891
+ if (!conn) {
2892
+ throw new Error(
2893
+ "Telnyx carrier requires connectionId. Pass { connectionId: '...' } or set TELNYX_CONNECTION_ID in the environment."
2894
+ );
2895
+ }
2896
+ this.apiKey = key;
2897
+ this.connectionId = conn;
2898
+ this.publicKey = pub;
2899
+ }
2900
+ };
2901
+
2902
+ // src/public-api.ts
2903
+ var DEFAULT_GUARDRAIL_REPLACEMENT = "I'm sorry, I can't respond to that.";
2904
+ var Guardrail = class {
2905
+ name;
2906
+ blockedTerms;
2907
+ check;
2908
+ replacement;
2909
+ constructor(opts) {
2910
+ if (!opts.name) {
2911
+ throw new Error("Guardrail requires a non-empty name.");
2912
+ }
2913
+ this.name = opts.name;
2914
+ if (opts.blockedTerms) this.blockedTerms = opts.blockedTerms;
2915
+ if (opts.check) this.check = opts.check;
2916
+ this.replacement = opts.replacement ?? DEFAULT_GUARDRAIL_REPLACEMENT;
2917
+ }
2918
+ };
2919
+ function guardrail(opts) {
2920
+ return new Guardrail(opts);
2921
+ }
2922
+ var Tool = class {
2923
+ name;
2924
+ description;
2925
+ parameters;
2926
+ handler;
2927
+ webhookUrl;
2928
+ constructor(opts) {
2929
+ if (!opts.name) {
2930
+ throw new Error("Tool requires a non-empty name.");
2931
+ }
2932
+ const hasHandler = typeof opts.handler === "function";
2933
+ const hasWebhook = typeof opts.webhookUrl === "string" && opts.webhookUrl.length > 0;
2934
+ if (!hasHandler && !hasWebhook) {
2935
+ throw new Error("Tool requires either handler or webhookUrl.");
2936
+ }
2937
+ if (hasHandler && hasWebhook) {
2938
+ throw new Error("Tool accepts handler OR webhookUrl, not both.");
2939
+ }
2940
+ this.name = opts.name;
2941
+ this.description = opts.description ?? "";
2942
+ this.parameters = opts.parameters ?? { type: "object", properties: {} };
2943
+ if (hasHandler) this.handler = opts.handler;
2944
+ if (hasWebhook) this.webhookUrl = opts.webhookUrl;
2945
+ }
2946
+ };
2947
+ function tool(opts) {
2948
+ return new Tool(opts);
2949
+ }
2950
+
2140
2951
  // src/chat-context.ts
2141
2952
  import { randomUUID } from "crypto";
2142
2953
  function generateId() {
@@ -2747,31 +3558,35 @@ function isAudioConfig(value) {
2747
3558
  }
2748
3559
  export {
2749
3560
  AllProvidersFailedError,
2750
- AssemblyAISTT,
3561
+ STT5 as AssemblyAISTT,
2751
3562
  AuthenticationError,
2752
3563
  BackgroundAudioPlayer,
2753
3564
  BuiltinAudioClip,
2754
3565
  CallMetricsAccumulator,
2755
- CartesiaSTT,
2756
- CartesiaTTS,
3566
+ STT3 as CartesiaSTT,
3567
+ TTS3 as CartesiaTTS,
2757
3568
  ChatContext,
3569
+ CloudflareTunnel,
2758
3570
  DEFAULT_MIN_SENTENCE_LEN,
2759
3571
  DEFAULT_PRICING,
2760
3572
  DTMF_EVENTS,
2761
- DeepgramSTT,
3573
+ STT as DeepgramSTT,
3574
+ ConvAI as ElevenLabsConvAI,
2762
3575
  ElevenLabsConvAIAdapter,
2763
- ElevenLabsTTS,
3576
+ TTS as ElevenLabsTTS,
2764
3577
  FallbackLLMProvider,
2765
3578
  GEMINI_DEFAULT_INPUT_SR,
2766
3579
  GEMINI_DEFAULT_OUTPUT_SR,
2767
3580
  GeminiLiveAdapter,
3581
+ Guardrail,
2768
3582
  IVRActivity,
2769
3583
  LLMLoop,
2770
- LMNTTTS,
3584
+ TTS5 as LMNTTTS,
2771
3585
  MetricsStore,
2772
3586
  OpenAILLMProvider,
3587
+ Realtime as OpenAIRealtime,
2773
3588
  OpenAIRealtimeAdapter,
2774
- OpenAITTS,
3589
+ TTS2 as OpenAITTS,
2775
3590
  PartialStreamError,
2776
3591
  Patter,
2777
3592
  PatterConnectionError,
@@ -2779,15 +3594,19 @@ export {
2779
3594
  PipelineHookExecutor,
2780
3595
  ProvisionError,
2781
3596
  RemoteMessageHandler,
2782
- RimeTTS,
3597
+ TTS4 as RimeTTS,
2783
3598
  SentenceChunker,
2784
- SonioxSTT,
3599
+ STT4 as SonioxSTT,
3600
+ Static as StaticTunnel,
3601
+ Carrier2 as Telnyx,
2785
3602
  TestSession,
2786
3603
  TfidfLoopDetector,
3604
+ Tool,
3605
+ Carrier as Twilio,
2787
3606
  ULTRAVOX_DEFAULT_API_BASE,
2788
3607
  ULTRAVOX_DEFAULT_SR,
2789
3608
  UltravoxRealtimeAdapter,
2790
- WhisperSTT,
3609
+ STT2 as WhisperSTT,
2791
3610
  builtinClipPath,
2792
3611
  calculateRealtimeCost,
2793
3612
  calculateSttCost,
@@ -2803,6 +3622,7 @@ export {
2803
3622
  filterMarkdown,
2804
3623
  formatDtmf,
2805
3624
  getLogger,
3625
+ guardrail,
2806
3626
  isRemoteUrl,
2807
3627
  isWebSocketUrl,
2808
3628
  makeAuthMiddleware,
@@ -2824,5 +3644,6 @@ export {
2824
3644
  selectSoundFromList,
2825
3645
  setLogger,
2826
3646
  startTunnel,
3647
+ tool,
2827
3648
  whisper
2828
3649
  };