getpatter 0.4.4 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs CHANGED
@@ -10,18 +10,15 @@ import {
10
10
  DEFAULT_PRICING,
11
11
  DeepgramSTT,
12
12
  ElevenLabsConvAIAdapter,
13
- ElevenLabsTTS,
14
13
  EmbeddedServer,
15
14
  LLMLoop,
16
15
  MetricsStore,
17
16
  OpenAILLMProvider,
18
17
  OpenAIRealtimeAdapter,
19
- OpenAITTS,
20
18
  PipelineHookExecutor,
21
19
  RemoteMessageHandler,
22
20
  SentenceChunker,
23
21
  TestSession,
24
- WhisperSTT,
25
22
  calculateRealtimeCost,
26
23
  calculateSttCost,
27
24
  calculateTelephonyCost,
@@ -39,7 +36,7 @@ import {
39
36
  resample16kTo8k,
40
37
  resample24kTo16k,
41
38
  resample8kTo16k
42
- } from "./chunk-O3RQG3NL.mjs";
39
+ } from "./chunk-757NVN4L.mjs";
43
40
  import {
44
41
  getLogger,
45
42
  setLogger
@@ -186,74 +183,64 @@ var PatterConnection = class {
186
183
  }
187
184
  };
188
185
 
189
- // src/providers.ts
190
- var STTConfigImpl = class {
191
- provider;
186
+ // src/engines/openai.ts
187
+ var Realtime = class {
188
+ kind = "openai_realtime";
192
189
  apiKey;
193
- language;
194
- options;
195
- constructor(provider, apiKey, language = "en", options) {
196
- this.provider = provider;
197
- this.apiKey = apiKey;
198
- this.language = language;
199
- if (options) this.options = options;
200
- }
201
- toDict() {
202
- const out = {
203
- provider: this.provider,
204
- api_key: this.apiKey,
205
- language: this.language
206
- };
207
- if (this.options) out.options = { ...this.options };
208
- return out;
190
+ model;
191
+ voice;
192
+ constructor(opts = {}) {
193
+ const key = opts.apiKey ?? process.env.OPENAI_API_KEY;
194
+ if (!key) {
195
+ throw new Error(
196
+ "OpenAI Realtime requires an apiKey. Pass { apiKey: 'sk-...' } or set OPENAI_API_KEY in the environment."
197
+ );
198
+ }
199
+ this.apiKey = key;
200
+ this.model = opts.model ?? "gpt-4o-mini-realtime-preview";
201
+ this.voice = opts.voice ?? "alloy";
209
202
  }
210
203
  };
211
- var TTSConfigImpl = class {
212
- provider;
204
+
205
+ // src/engines/elevenlabs.ts
206
+ var ConvAI = class {
207
+ kind = "elevenlabs_convai";
213
208
  apiKey;
209
+ agentId;
214
210
  voice;
215
- constructor(provider, apiKey, voice = "alloy") {
216
- this.provider = provider;
217
- this.apiKey = apiKey;
218
- this.voice = voice;
211
+ constructor(opts = {}) {
212
+ const key = opts.apiKey ?? process.env.ELEVENLABS_API_KEY;
213
+ const agent = opts.agentId ?? process.env.ELEVENLABS_AGENT_ID;
214
+ if (!key) {
215
+ throw new Error(
216
+ "ElevenLabs ConvAI requires an apiKey. Pass { apiKey: '...' } or set ELEVENLABS_API_KEY in the environment."
217
+ );
218
+ }
219
+ if (!agent) {
220
+ throw new Error(
221
+ "ElevenLabs ConvAI requires an agentId. Pass { agentId: 'agent_...' } or set ELEVENLABS_AGENT_ID in the environment."
222
+ );
223
+ }
224
+ this.apiKey = key;
225
+ this.agentId = agent;
226
+ this.voice = opts.voice;
219
227
  }
220
- toDict() {
221
- return { provider: this.provider, api_key: this.apiKey, voice: this.voice };
228
+ };
229
+
230
+ // src/tunnels/index.ts
231
+ var CloudflareTunnel = class {
232
+ kind = "cloudflare";
233
+ };
234
+ var Static = class {
235
+ kind = "static";
236
+ hostname;
237
+ constructor(opts) {
238
+ if (!opts.hostname) {
239
+ throw new Error("Static tunnel requires a non-empty hostname.");
240
+ }
241
+ this.hostname = opts.hostname;
222
242
  }
223
243
  };
224
- function deepgram(opts) {
225
- const options = {
226
- model: opts.model ?? "nova-3",
227
- endpointing_ms: opts.endpointingMs ?? 150,
228
- utterance_end_ms: opts.utteranceEndMs === null ? null : opts.utteranceEndMs ?? 1e3,
229
- smart_format: opts.smartFormat ?? true,
230
- interim_results: opts.interimResults ?? true
231
- };
232
- if (opts.vadEvents !== void 0) options.vad_events = opts.vadEvents;
233
- return new STTConfigImpl("deepgram", opts.apiKey, opts.language ?? "en", options);
234
- }
235
- function whisper(opts) {
236
- return new STTConfigImpl("whisper", opts.apiKey, opts.language ?? "en");
237
- }
238
- function elevenlabs(opts) {
239
- return new TTSConfigImpl("elevenlabs", opts.apiKey, opts.voice ?? "rachel");
240
- }
241
- function openaiTts(opts) {
242
- return new TTSConfigImpl("openai", opts.apiKey, opts.voice ?? "alloy");
243
- }
244
- function cartesia(opts) {
245
- return new TTSConfigImpl(
246
- "cartesia",
247
- opts.apiKey,
248
- opts.voice ?? "f786b574-daa5-4673-aa0c-cbe3e8534c02"
249
- );
250
- }
251
- function rime(opts) {
252
- return new TTSConfigImpl("rime", opts.apiKey, opts.voice ?? "astra");
253
- }
254
- function lmnt(opts) {
255
- return new TTSConfigImpl("lmnt", opts.apiKey, opts.voice ?? "leah");
256
- }
257
244
 
258
245
  // src/client.ts
259
246
  var DEFAULT_BACKEND_URL2 = "wss://api.getpatter.com";
@@ -286,21 +273,39 @@ var Patter = class {
286
273
  embeddedServer = null;
287
274
  tunnelHandle = null;
288
275
  constructor(options) {
289
- const isLocal = "mode" in options && options.mode === "local" || !("apiKey" in options) && ("twilioSid" in options && options.twilioSid || "telnyxKey" in options && options.telnyxKey);
276
+ const hasCarrier = "carrier" in options && options.carrier !== void 0;
277
+ const isLocal = "mode" in options && options.mode === "local" || hasCarrier;
290
278
  if (isLocal) {
291
279
  const local = options;
292
280
  if (!local.phoneNumber) {
293
281
  throw new Error("Local mode requires phoneNumber");
294
282
  }
295
- if (!local.twilioSid && !local.telnyxKey) {
296
- throw new Error("Local mode requires twilioSid or telnyxKey");
283
+ if (!local.carrier) {
284
+ throw new Error(
285
+ "Local mode requires a `carrier` instance. Pass `carrier: new Twilio({...})` or `carrier: new Telnyx({...})`."
286
+ );
297
287
  }
298
- if (local.twilioSid && !local.twilioToken) {
299
- throw new Error("twilioToken is required when using twilioSid");
288
+ const carrier = local.carrier;
289
+ const tunnel = local.tunnel;
290
+ let tunnelWebhookUrl;
291
+ if (tunnel instanceof Static) {
292
+ if (local.webhookUrl) {
293
+ throw new Error(
294
+ "Cannot use both `tunnel: new StaticTunnel(...)` and `webhookUrl`. Pick one."
295
+ );
296
+ }
297
+ tunnelWebhookUrl = tunnel.hostname;
300
298
  }
301
299
  this.mode = "local";
302
- const normalizedLocal = local.webhookUrl ? { ...local, webhookUrl: local.webhookUrl.replace(/^https?:\/\//, "").replace(/\/$/, "") } : local;
303
- this.localConfig = normalizedLocal;
300
+ const rawWebhook = tunnelWebhookUrl ?? local.webhookUrl;
301
+ const normalizedWebhook = rawWebhook ? rawWebhook.replace(/^https?:\/\//, "").replace(/\/$/, "") : void 0;
302
+ this.localConfig = {
303
+ carrier,
304
+ phoneNumber: local.phoneNumber,
305
+ webhookUrl: normalizedWebhook,
306
+ tunnel: local.tunnel,
307
+ openaiKey: local.openaiKey
308
+ };
304
309
  this.apiKey = "";
305
310
  this.backendUrl = DEFAULT_BACKEND_URL2;
306
311
  this.restUrl = DEFAULT_REST_URL;
@@ -317,25 +322,55 @@ var Patter = class {
317
322
  }
318
323
  // === Local mode ===
319
324
  agent(opts) {
320
- if (opts.provider) {
325
+ let working = { ...opts };
326
+ if (opts.engine) {
327
+ if (opts.provider) {
328
+ throw new Error(
329
+ "Cannot pass both `engine:` and `provider:`. Use one (engine is preferred)."
330
+ );
331
+ }
332
+ const engine = opts.engine;
333
+ if (engine instanceof Realtime) {
334
+ working = {
335
+ ...working,
336
+ provider: "openai_realtime",
337
+ model: working.model ?? engine.model,
338
+ voice: working.voice ?? engine.voice
339
+ };
340
+ if (this.localConfig && !this.localConfig.openaiKey) {
341
+ this.localConfig = { ...this.localConfig, openaiKey: engine.apiKey };
342
+ }
343
+ } else if (engine instanceof ConvAI) {
344
+ working = {
345
+ ...working,
346
+ provider: "elevenlabs_convai",
347
+ voice: working.voice ?? engine.voice
348
+ };
349
+ } else {
350
+ throw new Error(
351
+ "Unknown engine. Expected OpenAIRealtime or ElevenLabsConvAI instance."
352
+ );
353
+ }
354
+ }
355
+ if (working.provider) {
321
356
  const valid = ["openai_realtime", "elevenlabs_convai", "pipeline"];
322
- if (!valid.includes(opts.provider)) {
323
- throw new Error(`provider must be one of: ${valid.join(", ")}. Got: '${opts.provider}'`);
357
+ if (!valid.includes(working.provider)) {
358
+ throw new Error(`provider must be one of: ${valid.join(", ")}. Got: '${working.provider}'`);
324
359
  }
325
360
  }
326
- if (opts.tools) {
327
- if (!Array.isArray(opts.tools)) {
361
+ if (working.tools) {
362
+ if (!Array.isArray(working.tools)) {
328
363
  throw new TypeError("tools must be an array");
329
364
  }
330
- opts.tools.forEach((tool, i) => {
331
- if (!tool.name) throw new Error(`tools[${i}] missing required 'name' field`);
332
- if (!tool.webhookUrl && !tool.handler) throw new Error(`tools[${i}] requires either 'webhookUrl' or 'handler'`);
365
+ working.tools.forEach((tool2, i) => {
366
+ if (!tool2.name) throw new Error(`tools[${i}] missing required 'name' field`);
367
+ if (!tool2.webhookUrl && !tool2.handler) throw new Error(`tools[${i}] requires either 'webhookUrl' or 'handler'`);
333
368
  });
334
369
  }
335
- if (opts.variables !== void 0 && (typeof opts.variables !== "object" || Array.isArray(opts.variables))) {
370
+ if (working.variables !== void 0 && (typeof working.variables !== "object" || Array.isArray(working.variables))) {
336
371
  throw new TypeError("variables must be an object");
337
372
  }
338
- return { ...opts };
373
+ return working;
339
374
  }
340
375
  async serve(opts) {
341
376
  if (this.mode !== "local" || !this.localConfig) {
@@ -358,10 +393,14 @@ var Patter = class {
358
393
  }
359
394
  let webhookUrl = this.localConfig.webhookUrl ?? "";
360
395
  const port = opts.port ?? 8e3;
361
- if (opts.tunnel && webhookUrl) {
396
+ const ctorTunnel = this.localConfig.tunnel;
397
+ const wantsCloudflaredFromServe = opts.tunnel === true;
398
+ const wantsCloudflaredFromCtor = ctorTunnel === true || ctorTunnel instanceof CloudflareTunnel;
399
+ const wantsCloudflared = wantsCloudflaredFromServe || wantsCloudflaredFromCtor;
400
+ if (wantsCloudflared && webhookUrl) {
362
401
  throw new Error("Cannot use both tunnel: true and webhookUrl. Pick one.");
363
402
  }
364
- if (opts.tunnel) {
403
+ if (wantsCloudflared) {
365
404
  const { startTunnel: startTunnel2 } = await import("./tunnel-BL7A7GXW.mjs");
366
405
  this.tunnelHandle = await startTunnel2(port);
367
406
  webhookUrl = this.tunnelHandle.hostname;
@@ -371,17 +410,29 @@ var Patter = class {
371
410
  "No webhookUrl configured. Either:\n - Pass webhookUrl in the Patter constructor\n - Use tunnel: true in serve() to auto-create a tunnel"
372
411
  );
373
412
  }
413
+ const carrier = this.localConfig.carrier;
414
+ const telephonyProvider = carrier.kind === "twilio" ? "twilio" : "telnyx";
415
+ const { autoConfigureCarrier } = await import("./carrier-config-CPG5CROM.mjs");
416
+ await autoConfigureCarrier({
417
+ telephonyProvider,
418
+ twilioSid: carrier.kind === "twilio" ? carrier.accountSid : void 0,
419
+ twilioToken: carrier.kind === "twilio" ? carrier.authToken : void 0,
420
+ telnyxKey: carrier.kind === "telnyx" ? carrier.apiKey : void 0,
421
+ telnyxConnectionId: carrier.kind === "telnyx" ? carrier.connectionId : void 0,
422
+ phoneNumber: this.localConfig.phoneNumber,
423
+ webhookHost: webhookUrl
424
+ });
374
425
  this.embeddedServer = new EmbeddedServer(
375
426
  {
376
- twilioSid: this.localConfig.twilioSid,
377
- twilioToken: this.localConfig.twilioToken,
427
+ twilioSid: carrier.kind === "twilio" ? carrier.accountSid : void 0,
428
+ twilioToken: carrier.kind === "twilio" ? carrier.authToken : void 0,
378
429
  openaiKey: this.localConfig.openaiKey,
379
430
  phoneNumber: this.localConfig.phoneNumber,
380
431
  webhookUrl,
381
- telephonyProvider: this.localConfig.telephonyProvider,
382
- telnyxKey: this.localConfig.telnyxKey,
383
- telnyxConnectionId: this.localConfig.telnyxConnectionId,
384
- telnyxPublicKey: this.localConfig.telnyxPublicKey
432
+ telephonyProvider,
433
+ telnyxKey: carrier.kind === "telnyx" ? carrier.apiKey : void 0,
434
+ telnyxConnectionId: carrier.kind === "telnyx" ? carrier.connectionId : void 0,
435
+ telnyxPublicKey: carrier.kind === "telnyx" ? carrier.publicKey : void 0
385
436
  },
386
437
  opts.agent,
387
438
  opts.onCallStart,
@@ -401,7 +452,7 @@ var Patter = class {
401
452
  if (this.mode !== "local") {
402
453
  throw new Error("test() is only available in local mode");
403
454
  }
404
- const { TestSession: TestSession2 } = await import("./test-mode-ASSLSQU2.mjs");
455
+ const { TestSession: TestSession2 } = await import("./test-mode-YFOL2HYH.mjs");
405
456
  const session = new TestSession2();
406
457
  await session.run({
407
458
  agent: opts.agent,
@@ -442,10 +493,10 @@ var Patter = class {
442
493
  if (!this.localConfig) {
443
494
  throw new Error("local config missing");
444
495
  }
445
- const { phoneNumber, webhookUrl, telephonyProvider } = this.localConfig;
446
- if (telephonyProvider === "telnyx") {
447
- const telnyxKey = this.localConfig.telnyxKey ?? "";
448
- const connectionId = this.localConfig.telnyxConnectionId ?? "";
496
+ const { phoneNumber, webhookUrl, carrier } = this.localConfig;
497
+ if (carrier.kind === "telnyx") {
498
+ const telnyxKey = carrier.apiKey;
499
+ const connectionId = carrier.connectionId;
449
500
  const streamUrl = `wss://${webhookUrl}/ws/stream/${encodeURIComponent(localOpts.to)}?caller=${encodeURIComponent(phoneNumber)}&callee=${encodeURIComponent(localOpts.to)}`;
450
501
  const telnyxPayload = {
451
502
  connection_id: connectionId,
@@ -485,8 +536,8 @@ var Patter = class {
485
536
  }
486
537
  return;
487
538
  }
488
- const twilioSid = this.localConfig.twilioSid ?? "";
489
- const twilioToken = this.localConfig.twilioToken ?? "";
539
+ const twilioSid = carrier.accountSid;
540
+ const twilioToken = carrier.authToken;
490
541
  const statusCallbackUrl = `https://${webhookUrl}/webhooks/twilio/status`;
491
542
  const url = `https://api.twilio.com/2010-04-01/Accounts/${twilioSid}/Calls.json`;
492
543
  const params = new URLSearchParams({
@@ -618,65 +669,6 @@ var Patter = class {
618
669
  const data = await response.json();
619
670
  return data.map((c) => ({ id: c.id, direction: c.direction, caller: c.caller, callee: c.callee, startedAt: c.started_at, endedAt: c.ended_at, durationSeconds: c.duration_seconds, status: c.status, transcript: c.transcript }));
620
671
  }
621
- // Provider helpers — mirror the Python classmethod factories so callers can
622
- // write ``Patter.deepgram({ apiKey })`` without importing the top-level.
623
- static deepgram = deepgram;
624
- static whisper = whisper;
625
- static elevenlabs = elevenlabs;
626
- static openaiTts = openaiTts;
627
- static cartesia = cartesia;
628
- static rime = rime;
629
- static lmnt = lmnt;
630
- static guardrail(opts) {
631
- return {
632
- name: opts.name,
633
- blockedTerms: opts.blockedTerms,
634
- check: opts.check,
635
- replacement: opts.replacement ?? "I'm sorry, I can't respond to that."
636
- };
637
- }
638
- /**
639
- * Create a tool definition for use with `agent({ tools: [...] })`.
640
- *
641
- * Either `handler` (a function) or `webhookUrl` must be provided.
642
- *
643
- * @param opts.name - Tool name (visible to the LLM).
644
- * @param opts.description - What the tool does (visible to the LLM).
645
- * @param opts.parameters - JSON Schema for tool arguments.
646
- * @param opts.handler - Async function called in-process when the LLM invokes the tool.
647
- * @param opts.webhookUrl - URL to POST to when the LLM invokes the tool.
648
- *
649
- * @example
650
- * ```ts
651
- * phone.agent({
652
- * systemPrompt: 'You are a pizza bot.',
653
- * tools: [
654
- * Patter.tool({
655
- * name: 'check_menu',
656
- * description: 'Check available menu items',
657
- * handler: async (args) => JSON.stringify({ items: ['margherita'] }),
658
- * }),
659
- * ],
660
- * });
661
- * ```
662
- */
663
- static tool(opts) {
664
- if (!opts.handler && !opts.webhookUrl) {
665
- throw new Error("tool() requires either handler or webhookUrl");
666
- }
667
- const t = {
668
- name: opts.name,
669
- description: opts.description ?? "",
670
- parameters: opts.parameters ?? { type: "object", properties: {} }
671
- };
672
- if (opts.handler) {
673
- t.handler = opts.handler;
674
- }
675
- if (opts.webhookUrl) {
676
- t.webhookUrl = opts.webhookUrl;
677
- }
678
- return t;
679
- }
680
672
  // Internal
681
673
  async registerNumber(provider, providerKey, number, providerSecret, country = "US", stt, tts) {
682
674
  const credentials = { api_key: providerKey };
@@ -766,6 +758,62 @@ function filterForTTS(text) {
766
758
  return filterEmoji(filterMarkdown(text));
767
759
  }
768
760
 
761
+ // src/providers.ts
762
+ var STTConfigImpl = class {
763
+ provider;
764
+ apiKey;
765
+ language;
766
+ options;
767
+ constructor(provider, apiKey, language = "en", options) {
768
+ this.provider = provider;
769
+ this.apiKey = apiKey;
770
+ this.language = language;
771
+ if (options) this.options = options;
772
+ }
773
+ toDict() {
774
+ const out = {
775
+ provider: this.provider,
776
+ api_key: this.apiKey,
777
+ language: this.language
778
+ };
779
+ if (this.options) out.options = { ...this.options };
780
+ return out;
781
+ }
782
+ };
783
+ var TTSConfigImpl = class {
784
+ provider;
785
+ apiKey;
786
+ voice;
787
+ constructor(provider, apiKey, voice = "alloy") {
788
+ this.provider = provider;
789
+ this.apiKey = apiKey;
790
+ this.voice = voice;
791
+ }
792
+ toDict() {
793
+ return { provider: this.provider, api_key: this.apiKey, voice: this.voice };
794
+ }
795
+ };
796
+ function deepgram(opts) {
797
+ const options = {
798
+ model: opts.model ?? "nova-3",
799
+ endpointing_ms: opts.endpointingMs ?? 150,
800
+ utterance_end_ms: opts.utteranceEndMs === null ? null : opts.utteranceEndMs ?? 1e3,
801
+ smart_format: opts.smartFormat ?? true,
802
+ interim_results: opts.interimResults ?? true
803
+ };
804
+ if (opts.vadEvents !== void 0) options.vad_events = opts.vadEvents;
805
+ return new STTConfigImpl("deepgram", opts.apiKey, opts.language ?? "en", options);
806
+ }
807
+ function whisper(opts) {
808
+ return new STTConfigImpl("whisper", opts.apiKey, opts.language ?? "en");
809
+ }
810
+ function elevenlabs(opts) {
811
+ return new TTSConfigImpl("elevenlabs", opts.apiKey, opts.voice ?? "rachel");
812
+ }
813
+ function openaiTts(opts) {
814
+ return new TTSConfigImpl("openai", opts.apiKey, opts.voice ?? "alloy");
815
+ }
816
+
769
817
  // src/fallback-provider.ts
770
818
  var AllProvidersFailedError = class extends Error {
771
819
  constructor(message) {
@@ -1439,110 +1487,424 @@ function scheduleInterval(intervalOrOpts, callback) {
1439
1487
  };
1440
1488
  }
1441
1489
 
1442
- // src/providers/soniox-stt.ts
1443
- import WebSocket3 from "ws";
1444
- var SONIOX_WS_URL = "wss://stt-rt.soniox.com/transcribe-websocket";
1445
- var KEEPALIVE_MESSAGE = '{"type": "keepalive"}';
1446
- var END_TOKEN = "<end>";
1447
- var FINALIZED_TOKEN = "<fin>";
1448
- var KEEPALIVE_INTERVAL_MS = 5e3;
1449
- function isEndToken(token) {
1450
- return token.text === END_TOKEN || token.text === FINALIZED_TOKEN;
1451
- }
1452
- var TokenAccumulator = class {
1453
- text = "";
1454
- confSum = 0;
1455
- confCount = 0;
1456
- update(token) {
1457
- if (token.text) {
1458
- this.text += token.text;
1459
- }
1460
- if (typeof token.confidence === "number") {
1461
- this.confSum += token.confidence;
1462
- this.confCount += 1;
1490
+ // src/stt/deepgram.ts
1491
+ var STT = class extends DeepgramSTT {
1492
+ constructor(opts = {}) {
1493
+ const key = opts.apiKey ?? process.env.DEEPGRAM_API_KEY;
1494
+ if (!key) {
1495
+ throw new Error(
1496
+ "Deepgram STT requires an apiKey. Pass { apiKey: 'dg_...' } or set DEEPGRAM_API_KEY in the environment."
1497
+ );
1463
1498
  }
1464
- }
1465
- get confidence() {
1466
- return this.confCount === 0 ? 0 : this.confSum / this.confCount;
1467
- }
1468
- reset() {
1469
- this.text = "";
1470
- this.confSum = 0;
1471
- this.confCount = 0;
1472
- }
1473
- get raw() {
1474
- return { sum: this.confSum, count: this.confCount };
1499
+ super(
1500
+ key,
1501
+ opts.language ?? "en",
1502
+ opts.model ?? "nova-3",
1503
+ opts.encoding ?? "linear16",
1504
+ opts.sampleRate ?? 16e3,
1505
+ {
1506
+ endpointingMs: opts.endpointingMs ?? 150,
1507
+ utteranceEndMs: opts.utteranceEndMs === null ? null : opts.utteranceEndMs ?? 1e3,
1508
+ smartFormat: opts.smartFormat ?? true,
1509
+ interimResults: opts.interimResults ?? true,
1510
+ ...opts.vadEvents !== void 0 ? { vadEvents: opts.vadEvents } : {}
1511
+ }
1512
+ );
1475
1513
  }
1476
1514
  };
1477
- var SonioxSTT = class _SonioxSTT {
1478
- ws = null;
1479
- callbacks = [];
1480
- final = new TokenAccumulator();
1481
- keepaliveTimer = null;
1515
+
1516
+ // src/providers/whisper-stt.ts
1517
+ var OPENAI_TRANSCRIPTION_URL = "https://api.openai.com/v1/audio/transcriptions";
1518
+ var DEFAULT_BUFFER_SIZE = 16e3 * 2;
1519
+ function wrapPcmInWav(pcm, sampleRate = 16e3, channels = 1, bitsPerSample = 16) {
1520
+ const dataSize = pcm.length;
1521
+ const header = Buffer.alloc(44);
1522
+ header.write("RIFF", 0);
1523
+ header.writeUInt32LE(36 + dataSize, 4);
1524
+ header.write("WAVE", 8);
1525
+ header.write("fmt ", 12);
1526
+ header.writeUInt32LE(16, 16);
1527
+ header.writeUInt16LE(1, 20);
1528
+ header.writeUInt16LE(channels, 22);
1529
+ header.writeUInt32LE(sampleRate, 24);
1530
+ header.writeUInt32LE(sampleRate * channels * (bitsPerSample / 8), 28);
1531
+ header.writeUInt16LE(channels * (bitsPerSample / 8), 32);
1532
+ header.writeUInt16LE(bitsPerSample, 34);
1533
+ header.write("data", 36);
1534
+ header.writeUInt32LE(dataSize, 40);
1535
+ return Buffer.concat([header, pcm]);
1536
+ }
1537
+ var WhisperSTT = class _WhisperSTT {
1482
1538
  apiKey;
1483
1539
  model;
1484
- languageHints;
1485
- languageHintsStrict;
1486
- sampleRate;
1487
- numChannels;
1488
- enableSpeakerDiarization;
1489
- enableLanguageIdentification;
1490
- maxEndpointDelayMs;
1491
- clientReferenceId;
1492
- baseUrl;
1493
- constructor(apiKey, options = {}) {
1494
- if (!apiKey) {
1495
- throw new Error("Soniox apiKey is required");
1496
- }
1497
- const maxEndpointDelayMs = options.maxEndpointDelayMs ?? 500;
1498
- if (maxEndpointDelayMs < 500 || maxEndpointDelayMs > 3e3) {
1499
- throw new Error("maxEndpointDelayMs must be between 500 and 3000");
1500
- }
1540
+ language;
1541
+ bufferSize;
1542
+ buffer = Buffer.alloc(0);
1543
+ callbacks = [];
1544
+ running = false;
1545
+ pendingTranscriptions = [];
1546
+ constructor(apiKey, model = "whisper-1", language, bufferSize = DEFAULT_BUFFER_SIZE) {
1501
1547
  this.apiKey = apiKey;
1502
- this.model = options.model ?? "stt-rt-v4";
1503
- this.languageHints = options.languageHints;
1504
- this.languageHintsStrict = options.languageHintsStrict ?? false;
1505
- this.sampleRate = options.sampleRate ?? 16e3;
1506
- this.numChannels = options.numChannels ?? 1;
1507
- this.enableSpeakerDiarization = options.enableSpeakerDiarization ?? false;
1508
- this.enableLanguageIdentification = options.enableLanguageIdentification ?? true;
1509
- this.maxEndpointDelayMs = maxEndpointDelayMs;
1510
- this.clientReferenceId = options.clientReferenceId;
1511
- this.baseUrl = options.baseUrl ?? SONIOX_WS_URL;
1512
- }
1513
- /** Factory for Twilio-style 8 kHz linear PCM. */
1514
- static forTwilio(apiKey, languageHints) {
1515
- return new _SonioxSTT(apiKey, { sampleRate: 8e3, languageHints });
1548
+ this.model = model;
1549
+ this.language = language;
1550
+ this.bufferSize = bufferSize;
1516
1551
  }
1517
- buildConfig() {
1518
- const config = {
1519
- api_key: this.apiKey,
1520
- model: this.model,
1521
- audio_format: "pcm_s16le",
1522
- num_channels: this.numChannels,
1523
- sample_rate: this.sampleRate,
1524
- enable_endpoint_detection: true,
1525
- enable_speaker_diarization: this.enableSpeakerDiarization,
1526
- enable_language_identification: this.enableLanguageIdentification,
1527
- max_endpoint_delay_ms: this.maxEndpointDelayMs
1528
- };
1529
- if (this.languageHints) {
1530
- config.language_hints = this.languageHints;
1531
- config.language_hints_strict = this.languageHintsStrict;
1532
- }
1533
- if (this.clientReferenceId) {
1534
- config.client_reference_id = this.clientReferenceId;
1535
- }
1536
- return config;
1552
+ /** Factory for Twilio calls — mulaw 8 kHz is transcoded upstream, so we still receive PCM 16-bit. */
1553
+ static forTwilio(apiKey, language = "en", model = "whisper-1") {
1554
+ return new _WhisperSTT(apiKey, model, language);
1537
1555
  }
1538
1556
  async connect() {
1539
- this.ws = new WebSocket3(this.baseUrl);
1540
- await new Promise((resolve, reject) => {
1541
- const timer = setTimeout(() => reject(new Error("Soniox connect timeout")), 1e4);
1542
- this.ws.once("open", () => {
1543
- clearTimeout(timer);
1544
- resolve();
1545
- });
1557
+ this.running = true;
1558
+ this.buffer = Buffer.alloc(0);
1559
+ }
1560
+ sendAudio(audio) {
1561
+ if (!this.running) return;
1562
+ this.buffer = Buffer.concat([this.buffer, audio]);
1563
+ if (this.buffer.length >= this.bufferSize) {
1564
+ const pcm = this.buffer;
1565
+ this.buffer = Buffer.alloc(0);
1566
+ this.trackTranscription(this.transcribeBuffer(pcm));
1567
+ }
1568
+ }
1569
+ trackTranscription(promise) {
1570
+ const wrapped = promise.finally(() => {
1571
+ const idx = this.pendingTranscriptions.indexOf(wrapped);
1572
+ if (idx !== -1) this.pendingTranscriptions.splice(idx, 1);
1573
+ });
1574
+ this.pendingTranscriptions.push(wrapped);
1575
+ }
1576
+ onTranscript(callback) {
1577
+ if (this.callbacks.length >= 10) {
1578
+ getLogger().warn("WhisperSTT: maximum of 10 onTranscript callbacks reached; replacing the last callback.");
1579
+ this.callbacks[this.callbacks.length - 1] = callback;
1580
+ return;
1581
+ }
1582
+ this.callbacks.push(callback);
1583
+ }
1584
+ async close() {
1585
+ this.running = false;
1586
+ if (this.buffer.length >= this.bufferSize / 4) {
1587
+ const pcm = this.buffer;
1588
+ this.buffer = Buffer.alloc(0);
1589
+ this.trackTranscription(this.transcribeBuffer(pcm));
1590
+ } else {
1591
+ this.buffer = Buffer.alloc(0);
1592
+ }
1593
+ await Promise.allSettled(this.pendingTranscriptions);
1594
+ this.callbacks = [];
1595
+ }
1596
+ // ------------------------------------------------------------------
1597
+ // Private
1598
+ // ------------------------------------------------------------------
1599
+ async transcribeBuffer(pcm) {
1600
+ const wav = wrapPcmInWav(pcm);
1601
+ const formData = new FormData();
1602
+ formData.append("file", new Blob([wav.buffer.slice(wav.byteOffset, wav.byteOffset + wav.byteLength)], { type: "audio/wav" }), "audio.wav");
1603
+ formData.append("model", this.model);
1604
+ if (this.language) {
1605
+ formData.append("language", this.language);
1606
+ }
1607
+ try {
1608
+ const resp = await fetch(OPENAI_TRANSCRIPTION_URL, {
1609
+ method: "POST",
1610
+ headers: { Authorization: `Bearer ${this.apiKey}` },
1611
+ body: formData,
1612
+ signal: AbortSignal.timeout(15e3)
1613
+ });
1614
+ if (!resp.ok) {
1615
+ const body = await resp.text();
1616
+ getLogger().error(`WhisperSTT transcription error: ${resp.status} ${body}`);
1617
+ return;
1618
+ }
1619
+ const json = await resp.json();
1620
+ const text = (json.text ?? "").trim();
1621
+ if (!text) return;
1622
+ const transcript = {
1623
+ text,
1624
+ isFinal: true,
1625
+ confidence: 1
1626
+ };
1627
+ for (const cb of this.callbacks) {
1628
+ cb(transcript);
1629
+ }
1630
+ } catch (err) {
1631
+ getLogger().error(`WhisperSTT transcription error: ${String(err)}`);
1632
+ }
1633
+ }
1634
+ };
1635
+
1636
+ // src/stt/whisper.ts
1637
+ var STT2 = class extends WhisperSTT {
1638
+ constructor(opts = {}) {
1639
+ const key = opts.apiKey ?? process.env.OPENAI_API_KEY;
1640
+ if (!key) {
1641
+ throw new Error(
1642
+ "Whisper STT requires an apiKey. Pass { apiKey: 'sk-...' } or set OPENAI_API_KEY in the environment."
1643
+ );
1644
+ }
1645
+ super(key, opts.model ?? "whisper-1", opts.language, opts.bufferSize);
1646
+ }
1647
+ };
1648
+
1649
+ // src/providers/cartesia-stt.ts
1650
+ import WebSocket3 from "ws";
1651
+ var DEFAULT_BASE_URL = "https://api.cartesia.ai";
1652
+ var API_VERSION = "2025-04-16";
1653
+ var USER_AGENT = "Patter/1.0 (integration=LiveKit-port; provider=Cartesia)";
1654
+ var KEEPALIVE_INTERVAL_MS = 3e4;
1655
+ var CONNECT_TIMEOUT_MS = 1e4;
1656
+ var MAX_CALLBACKS = 10;
1657
+ var CartesiaSTT = class {
1658
+ constructor(apiKey, options = {}) {
1659
+ this.apiKey = apiKey;
1660
+ this.options = options;
1661
+ if (!apiKey) {
1662
+ throw new Error("CartesiaSTT requires a non-empty apiKey");
1663
+ }
1664
+ }
1665
+ ws = null;
1666
+ callbacks = [];
1667
+ keepaliveTimer = null;
1668
+ /** Cartesia request id — set from the server transcript events. */
1669
+ requestId = "";
1670
+ buildWsUrl() {
1671
+ const opts = this.options;
1672
+ const rawBase = opts.baseUrl ?? DEFAULT_BASE_URL;
1673
+ let base;
1674
+ if (rawBase.startsWith("http://")) {
1675
+ base = `ws://${rawBase.slice("http://".length)}`;
1676
+ } else if (rawBase.startsWith("https://")) {
1677
+ base = `wss://${rawBase.slice("https://".length)}`;
1678
+ } else if (rawBase.startsWith("ws://") || rawBase.startsWith("wss://")) {
1679
+ base = rawBase;
1680
+ } else {
1681
+ base = `wss://${rawBase}`;
1682
+ }
1683
+ const language = opts.language ?? "en";
1684
+ const params = new URLSearchParams({
1685
+ model: opts.model ?? "ink-whisper",
1686
+ sample_rate: String(opts.sampleRate ?? 16e3),
1687
+ encoding: opts.encoding ?? "pcm_s16le",
1688
+ cartesia_version: API_VERSION,
1689
+ api_key: this.apiKey,
1690
+ language
1691
+ });
1692
+ return `${base}/stt/websocket?${params.toString()}`;
1693
+ }
1694
+ async connect() {
1695
+ const url = this.buildWsUrl();
1696
+ this.ws = new WebSocket3(url, {
1697
+ headers: { "User-Agent": USER_AGENT }
1698
+ });
1699
+ await new Promise((resolve, reject) => {
1700
+ const timer = setTimeout(
1701
+ () => reject(new Error("Cartesia STT connect timeout")),
1702
+ CONNECT_TIMEOUT_MS
1703
+ );
1704
+ this.ws.once("open", () => {
1705
+ clearTimeout(timer);
1706
+ resolve();
1707
+ });
1708
+ this.ws.once("error", (err) => {
1709
+ clearTimeout(timer);
1710
+ reject(err);
1711
+ });
1712
+ });
1713
+ this.ws.on("message", (raw) => {
1714
+ let event;
1715
+ try {
1716
+ event = JSON.parse(raw.toString());
1717
+ } catch {
1718
+ return;
1719
+ }
1720
+ this.handleEvent(event);
1721
+ });
1722
+ this.keepaliveTimer = setInterval(() => {
1723
+ if (this.ws && this.ws.readyState === WebSocket3.OPEN) {
1724
+ try {
1725
+ this.ws.ping();
1726
+ } catch {
1727
+ }
1728
+ }
1729
+ }, KEEPALIVE_INTERVAL_MS);
1730
+ }
1731
+ handleEvent(event) {
1732
+ const type = event.type;
1733
+ if (type === "transcript") {
1734
+ const text = (event.text ?? "").trim();
1735
+ const isFinal = Boolean(event.is_final);
1736
+ if (!text && !isFinal) return;
1737
+ if (event.request_id) {
1738
+ this.requestId = event.request_id;
1739
+ }
1740
+ if (!text) return;
1741
+ const confidence = Number(event.probability ?? 1);
1742
+ this.emit({ text, isFinal, confidence });
1743
+ return;
1744
+ }
1745
+ if (type === "error") {
1746
+ getLogger().error(`Cartesia STT error: ${event.message ?? "unknown"}`);
1747
+ return;
1748
+ }
1749
+ }
1750
+ emit(transcript) {
1751
+ for (const cb of this.callbacks) {
1752
+ cb(transcript);
1753
+ }
1754
+ }
1755
+ sendAudio(audio) {
1756
+ if (!this.ws || this.ws.readyState !== WebSocket3.OPEN) return;
1757
+ this.ws.send(audio);
1758
+ }
1759
+ onTranscript(callback) {
1760
+ if (this.callbacks.length >= MAX_CALLBACKS) {
1761
+ getLogger().warn(
1762
+ "CartesiaSTT: maximum of 10 onTranscript callbacks reached; replacing the last callback."
1763
+ );
1764
+ this.callbacks[this.callbacks.length - 1] = callback;
1765
+ return;
1766
+ }
1767
+ this.callbacks.push(callback);
1768
+ }
1769
+ close() {
1770
+ if (this.keepaliveTimer) {
1771
+ clearInterval(this.keepaliveTimer);
1772
+ this.keepaliveTimer = null;
1773
+ }
1774
+ if (this.ws) {
1775
+ try {
1776
+ this.ws.send("finalize");
1777
+ } catch {
1778
+ }
1779
+ this.ws.close();
1780
+ this.ws = null;
1781
+ }
1782
+ }
1783
+ };
1784
+
1785
+ // src/stt/cartesia.ts
1786
+ var STT3 = class extends CartesiaSTT {
1787
+ constructor(opts = {}) {
1788
+ const key = opts.apiKey ?? process.env.CARTESIA_API_KEY;
1789
+ if (!key) {
1790
+ throw new Error(
1791
+ "Cartesia STT requires an apiKey. Pass { apiKey: '...' } or set CARTESIA_API_KEY in the environment."
1792
+ );
1793
+ }
1794
+ super(key, {
1795
+ model: opts.model,
1796
+ language: opts.language,
1797
+ encoding: opts.encoding,
1798
+ sampleRate: opts.sampleRate,
1799
+ baseUrl: opts.baseUrl
1800
+ });
1801
+ }
1802
+ };
1803
+
1804
+ // src/providers/soniox-stt.ts
1805
+ import WebSocket4 from "ws";
1806
+ var SONIOX_WS_URL = "wss://stt-rt.soniox.com/transcribe-websocket";
1807
+ var KEEPALIVE_MESSAGE = '{"type": "keepalive"}';
1808
+ var END_TOKEN = "<end>";
1809
+ var FINALIZED_TOKEN = "<fin>";
1810
+ var KEEPALIVE_INTERVAL_MS2 = 5e3;
1811
+ function isEndToken(token) {
1812
+ return token.text === END_TOKEN || token.text === FINALIZED_TOKEN;
1813
+ }
1814
+ var TokenAccumulator = class {
1815
+ text = "";
1816
+ confSum = 0;
1817
+ confCount = 0;
1818
+ update(token) {
1819
+ if (token.text) {
1820
+ this.text += token.text;
1821
+ }
1822
+ if (typeof token.confidence === "number") {
1823
+ this.confSum += token.confidence;
1824
+ this.confCount += 1;
1825
+ }
1826
+ }
1827
+ get confidence() {
1828
+ return this.confCount === 0 ? 0 : this.confSum / this.confCount;
1829
+ }
1830
+ reset() {
1831
+ this.text = "";
1832
+ this.confSum = 0;
1833
+ this.confCount = 0;
1834
+ }
1835
+ get raw() {
1836
+ return { sum: this.confSum, count: this.confCount };
1837
+ }
1838
+ };
1839
+ var SonioxSTT = class _SonioxSTT {
1840
+ ws = null;
1841
+ callbacks = [];
1842
+ final = new TokenAccumulator();
1843
+ keepaliveTimer = null;
1844
+ apiKey;
1845
+ model;
1846
+ languageHints;
1847
+ languageHintsStrict;
1848
+ sampleRate;
1849
+ numChannels;
1850
+ enableSpeakerDiarization;
1851
+ enableLanguageIdentification;
1852
+ maxEndpointDelayMs;
1853
+ clientReferenceId;
1854
+ baseUrl;
1855
+ constructor(apiKey, options = {}) {
1856
+ if (!apiKey) {
1857
+ throw new Error("Soniox apiKey is required");
1858
+ }
1859
+ const maxEndpointDelayMs = options.maxEndpointDelayMs ?? 500;
1860
+ if (maxEndpointDelayMs < 500 || maxEndpointDelayMs > 3e3) {
1861
+ throw new Error("maxEndpointDelayMs must be between 500 and 3000");
1862
+ }
1863
+ this.apiKey = apiKey;
1864
+ this.model = options.model ?? "stt-rt-v4";
1865
+ this.languageHints = options.languageHints;
1866
+ this.languageHintsStrict = options.languageHintsStrict ?? false;
1867
+ this.sampleRate = options.sampleRate ?? 16e3;
1868
+ this.numChannels = options.numChannels ?? 1;
1869
+ this.enableSpeakerDiarization = options.enableSpeakerDiarization ?? false;
1870
+ this.enableLanguageIdentification = options.enableLanguageIdentification ?? true;
1871
+ this.maxEndpointDelayMs = maxEndpointDelayMs;
1872
+ this.clientReferenceId = options.clientReferenceId;
1873
+ this.baseUrl = options.baseUrl ?? SONIOX_WS_URL;
1874
+ }
1875
+ /** Factory for Twilio-style 8 kHz linear PCM. */
1876
+ static forTwilio(apiKey, languageHints) {
1877
+ return new _SonioxSTT(apiKey, { sampleRate: 8e3, languageHints });
1878
+ }
1879
+ buildConfig() {
1880
+ const config = {
1881
+ api_key: this.apiKey,
1882
+ model: this.model,
1883
+ audio_format: "pcm_s16le",
1884
+ num_channels: this.numChannels,
1885
+ sample_rate: this.sampleRate,
1886
+ enable_endpoint_detection: true,
1887
+ enable_speaker_diarization: this.enableSpeakerDiarization,
1888
+ enable_language_identification: this.enableLanguageIdentification,
1889
+ max_endpoint_delay_ms: this.maxEndpointDelayMs
1890
+ };
1891
+ if (this.languageHints) {
1892
+ config.language_hints = this.languageHints;
1893
+ config.language_hints_strict = this.languageHintsStrict;
1894
+ }
1895
+ if (this.clientReferenceId) {
1896
+ config.client_reference_id = this.clientReferenceId;
1897
+ }
1898
+ return config;
1899
+ }
1900
+ async connect() {
1901
+ this.ws = new WebSocket4(this.baseUrl);
1902
+ await new Promise((resolve, reject) => {
1903
+ const timer = setTimeout(() => reject(new Error("Soniox connect timeout")), 1e4);
1904
+ this.ws.once("open", () => {
1905
+ clearTimeout(timer);
1906
+ resolve();
1907
+ });
1546
1908
  this.ws.once("error", (err) => {
1547
1909
  clearTimeout(timer);
1548
1910
  reject(err);
@@ -1555,13 +1917,13 @@ var SonioxSTT = class _SonioxSTT {
1555
1917
  getLogger().error(`SonioxSTT WebSocket error: ${String(err)}`);
1556
1918
  });
1557
1919
  this.keepaliveTimer = setInterval(() => {
1558
- if (this.ws && this.ws.readyState === WebSocket3.OPEN) {
1920
+ if (this.ws && this.ws.readyState === WebSocket4.OPEN) {
1559
1921
  try {
1560
1922
  this.ws.send(KEEPALIVE_MESSAGE);
1561
1923
  } catch {
1562
1924
  }
1563
1925
  }
1564
- }, KEEPALIVE_INTERVAL_MS);
1926
+ }, KEEPALIVE_INTERVAL_MS2);
1565
1927
  }
1566
1928
  clearKeepalive() {
1567
1929
  if (this.keepaliveTimer) {
@@ -1628,7 +1990,7 @@ var SonioxSTT = class _SonioxSTT {
1628
1990
  }
1629
1991
  }
1630
1992
  sendAudio(audio) {
1631
- if (!this.ws || this.ws.readyState !== WebSocket3.OPEN) return;
1993
+ if (!this.ws || this.ws.readyState !== WebSocket4.OPEN) return;
1632
1994
  if (audio.length === 0) return;
1633
1995
  this.ws.send(audio);
1634
1996
  }
@@ -1658,12 +2020,27 @@ var SonioxSTT = class _SonioxSTT {
1658
2020
  }
1659
2021
  };
1660
2022
 
2023
+ // src/stt/soniox.ts
2024
+ var STT4 = class extends SonioxSTT {
2025
+ constructor(opts = {}) {
2026
+ const key = opts.apiKey ?? process.env.SONIOX_API_KEY;
2027
+ if (!key) {
2028
+ throw new Error(
2029
+ "Soniox STT requires an apiKey. Pass { apiKey: '...' } or set SONIOX_API_KEY in the environment."
2030
+ );
2031
+ }
2032
+ const { apiKey: _ignored, ...rest } = opts;
2033
+ void _ignored;
2034
+ super(key, rest);
2035
+ }
2036
+ };
2037
+
1661
2038
  // src/providers/assemblyai-stt.ts
1662
- import WebSocket4 from "ws";
1663
- var DEFAULT_BASE_URL = "wss://streaming.assemblyai.com";
2039
+ import WebSocket5 from "ws";
2040
+ var DEFAULT_BASE_URL2 = "wss://streaming.assemblyai.com";
1664
2041
  var DEFAULT_MIN_TURN_SILENCE_MS = 100;
1665
- var CONNECT_TIMEOUT_MS = 1e4;
1666
- var MAX_CALLBACKS = 10;
2042
+ var CONNECT_TIMEOUT_MS2 = 1e4;
2043
+ var MAX_CALLBACKS2 = 10;
1667
2044
  var AssemblyAISTT = class _AssemblyAISTT {
1668
2045
  constructor(apiKey, options = {}) {
1669
2046
  this.apiKey = apiKey;
@@ -1718,175 +2095,29 @@ var AssemblyAISTT = class _AssemblyAISTT {
1718
2095
  domain: opts.domain
1719
2096
  };
1720
2097
  const params = new URLSearchParams();
1721
- for (const [key, value] of Object.entries(raw)) {
1722
- if (value === void 0 || value === null) continue;
1723
- if (typeof value === "boolean") {
1724
- params.set(key, value ? "true" : "false");
1725
- } else {
1726
- params.set(key, String(value));
1727
- }
1728
- }
1729
- const base = opts.baseUrl ?? DEFAULT_BASE_URL;
1730
- return `${base}/v3/ws?${params.toString()}`;
1731
- }
1732
- async connect() {
1733
- const url = this.buildUrl();
1734
- this.ws = new WebSocket4(url, {
1735
- headers: {
1736
- Authorization: this.apiKey,
1737
- "Content-Type": "application/json",
1738
- "User-Agent": "Patter/1.0 (integration=LiveKit-port)"
1739
- }
1740
- });
1741
- await new Promise((resolve, reject) => {
1742
- const timer = setTimeout(
1743
- () => reject(new Error("AssemblyAI connect timeout")),
1744
- CONNECT_TIMEOUT_MS
1745
- );
1746
- this.ws.once("open", () => {
1747
- clearTimeout(timer);
1748
- resolve();
1749
- });
1750
- this.ws.once("error", (err) => {
1751
- clearTimeout(timer);
1752
- reject(err);
1753
- });
1754
- });
1755
- this.ws.on("message", (raw) => {
1756
- let event;
1757
- try {
1758
- event = JSON.parse(raw.toString());
1759
- } catch {
1760
- return;
1761
- }
1762
- this.handleEvent(event);
1763
- });
1764
- }
1765
- handleEvent(event) {
1766
- const type = event.type;
1767
- if (type === "Begin") {
1768
- this.sessionId = event.id ?? "";
1769
- this.expiresAt = event.expires_at ?? 0;
1770
- return;
1771
- }
1772
- if (type !== "Turn") {
1773
- return;
1774
- }
1775
- const endOfTurn = Boolean(event.end_of_turn);
1776
- const turnIsFormatted = Boolean(event.turn_is_formatted);
1777
- const words = event.words ?? [];
1778
- const transcriptText = (event.transcript ?? "").trim();
1779
- if (endOfTurn) {
1780
- if (this.options.formatTurns && !turnIsFormatted) return;
1781
- if (!transcriptText) return;
1782
- this.emit({
1783
- text: transcriptText,
1784
- isFinal: true,
1785
- confidence: averageConfidence(words)
1786
- });
1787
- return;
1788
- }
1789
- if (!words.length) return;
1790
- const interim = words.map((w) => (w.text ?? "").trim()).filter(Boolean).join(" ");
1791
- if (!interim) return;
1792
- this.emit({
1793
- text: interim,
1794
- isFinal: false,
1795
- confidence: averageConfidence(words)
1796
- });
1797
- }
1798
- emit(transcript) {
1799
- for (const cb of this.callbacks) {
1800
- cb(transcript);
1801
- }
1802
- }
1803
- sendAudio(audio) {
1804
- if (!this.ws || this.ws.readyState !== WebSocket4.OPEN) return;
1805
- this.ws.send(audio);
1806
- }
1807
- onTranscript(callback) {
1808
- if (this.callbacks.length >= MAX_CALLBACKS) {
1809
- getLogger().warn(
1810
- "AssemblyAISTT: maximum of 10 onTranscript callbacks reached; replacing the last callback."
1811
- );
1812
- this.callbacks[this.callbacks.length - 1] = callback;
1813
- return;
1814
- }
1815
- this.callbacks.push(callback);
1816
- }
1817
- close() {
1818
- if (this.ws) {
1819
- try {
1820
- this.ws.send(JSON.stringify({ type: "Terminate" }));
1821
- } catch {
1822
- }
1823
- this.ws.close();
1824
- this.ws = null;
1825
- }
1826
- }
1827
- };
1828
- function averageConfidence(words) {
1829
- if (!words.length) return 0;
1830
- let total = 0;
1831
- for (const w of words) {
1832
- total += Number(w.confidence ?? 0);
1833
- }
1834
- return total / words.length;
1835
- }
1836
-
1837
- // src/providers/cartesia-stt.ts
1838
- import WebSocket5 from "ws";
1839
- var DEFAULT_BASE_URL2 = "https://api.cartesia.ai";
1840
- var API_VERSION = "2025-04-16";
1841
- var USER_AGENT = "Patter/1.0 (integration=LiveKit-port; provider=Cartesia)";
1842
- var KEEPALIVE_INTERVAL_MS2 = 3e4;
1843
- var CONNECT_TIMEOUT_MS2 = 1e4;
1844
- var MAX_CALLBACKS2 = 10;
1845
- var CartesiaSTT = class {
1846
- constructor(apiKey, options = {}) {
1847
- this.apiKey = apiKey;
1848
- this.options = options;
1849
- if (!apiKey) {
1850
- throw new Error("CartesiaSTT requires a non-empty apiKey");
1851
- }
1852
- }
1853
- ws = null;
1854
- callbacks = [];
1855
- keepaliveTimer = null;
1856
- /** Cartesia request id — set from the server transcript events. */
1857
- requestId = "";
1858
- buildWsUrl() {
1859
- const opts = this.options;
1860
- const rawBase = opts.baseUrl ?? DEFAULT_BASE_URL2;
1861
- let base;
1862
- if (rawBase.startsWith("http://")) {
1863
- base = `ws://${rawBase.slice("http://".length)}`;
1864
- } else if (rawBase.startsWith("https://")) {
1865
- base = `wss://${rawBase.slice("https://".length)}`;
1866
- } else if (rawBase.startsWith("ws://") || rawBase.startsWith("wss://")) {
1867
- base = rawBase;
1868
- } else {
1869
- base = `wss://${rawBase}`;
1870
- }
1871
- const language = opts.language ?? "en";
1872
- const params = new URLSearchParams({
1873
- model: opts.model ?? "ink-whisper",
1874
- sample_rate: String(opts.sampleRate ?? 16e3),
1875
- encoding: opts.encoding ?? "pcm_s16le",
1876
- cartesia_version: API_VERSION,
1877
- api_key: this.apiKey,
1878
- language
1879
- });
1880
- return `${base}/stt/websocket?${params.toString()}`;
2098
+ for (const [key, value] of Object.entries(raw)) {
2099
+ if (value === void 0 || value === null) continue;
2100
+ if (typeof value === "boolean") {
2101
+ params.set(key, value ? "true" : "false");
2102
+ } else {
2103
+ params.set(key, String(value));
2104
+ }
2105
+ }
2106
+ const base = opts.baseUrl ?? DEFAULT_BASE_URL2;
2107
+ return `${base}/v3/ws?${params.toString()}`;
1881
2108
  }
1882
2109
  async connect() {
1883
- const url = this.buildWsUrl();
2110
+ const url = this.buildUrl();
1884
2111
  this.ws = new WebSocket5(url, {
1885
- headers: { "User-Agent": USER_AGENT }
2112
+ headers: {
2113
+ Authorization: this.apiKey,
2114
+ "Content-Type": "application/json",
2115
+ "User-Agent": "Patter/1.0 (integration=LiveKit-port)"
2116
+ }
1886
2117
  });
1887
2118
  await new Promise((resolve, reject) => {
1888
2119
  const timer = setTimeout(
1889
- () => reject(new Error("Cartesia STT connect timeout")),
2120
+ () => reject(new Error("AssemblyAI connect timeout")),
1890
2121
  CONNECT_TIMEOUT_MS2
1891
2122
  );
1892
2123
  this.ws.once("open", () => {
@@ -1907,33 +2138,39 @@ var CartesiaSTT = class {
1907
2138
  }
1908
2139
  this.handleEvent(event);
1909
2140
  });
1910
- this.keepaliveTimer = setInterval(() => {
1911
- if (this.ws && this.ws.readyState === WebSocket5.OPEN) {
1912
- try {
1913
- this.ws.ping();
1914
- } catch {
1915
- }
1916
- }
1917
- }, KEEPALIVE_INTERVAL_MS2);
1918
2141
  }
1919
2142
  handleEvent(event) {
1920
2143
  const type = event.type;
1921
- if (type === "transcript") {
1922
- const text = (event.text ?? "").trim();
1923
- const isFinal = Boolean(event.is_final);
1924
- if (!text && !isFinal) return;
1925
- if (event.request_id) {
1926
- this.requestId = event.request_id;
1927
- }
1928
- if (!text) return;
1929
- const confidence = Number(event.probability ?? 1);
1930
- this.emit({ text, isFinal, confidence });
2144
+ if (type === "Begin") {
2145
+ this.sessionId = event.id ?? "";
2146
+ this.expiresAt = event.expires_at ?? 0;
1931
2147
  return;
1932
2148
  }
1933
- if (type === "error") {
1934
- getLogger().error(`Cartesia STT error: ${event.message ?? "unknown"}`);
2149
+ if (type !== "Turn") {
2150
+ return;
2151
+ }
2152
+ const endOfTurn = Boolean(event.end_of_turn);
2153
+ const turnIsFormatted = Boolean(event.turn_is_formatted);
2154
+ const words = event.words ?? [];
2155
+ const transcriptText = (event.transcript ?? "").trim();
2156
+ if (endOfTurn) {
2157
+ if (this.options.formatTurns && !turnIsFormatted) return;
2158
+ if (!transcriptText) return;
2159
+ this.emit({
2160
+ text: transcriptText,
2161
+ isFinal: true,
2162
+ confidence: averageConfidence(words)
2163
+ });
1935
2164
  return;
1936
2165
  }
2166
+ if (!words.length) return;
2167
+ const interim = words.map((w) => (w.text ?? "").trim()).filter(Boolean).join(" ");
2168
+ if (!interim) return;
2169
+ this.emit({
2170
+ text: interim,
2171
+ isFinal: false,
2172
+ confidence: averageConfidence(words)
2173
+ });
1937
2174
  }
1938
2175
  emit(transcript) {
1939
2176
  for (const cb of this.callbacks) {
@@ -1947,7 +2184,7 @@ var CartesiaSTT = class {
1947
2184
  onTranscript(callback) {
1948
2185
  if (this.callbacks.length >= MAX_CALLBACKS2) {
1949
2186
  getLogger().warn(
1950
- "CartesiaSTT: maximum of 10 onTranscript callbacks reached; replacing the last callback."
2187
+ "AssemblyAISTT: maximum of 10 onTranscript callbacks reached; replacing the last callback."
1951
2188
  );
1952
2189
  this.callbacks[this.callbacks.length - 1] = callback;
1953
2190
  return;
@@ -1955,13 +2192,9 @@ var CartesiaSTT = class {
1955
2192
  this.callbacks.push(callback);
1956
2193
  }
1957
2194
  close() {
1958
- if (this.keepaliveTimer) {
1959
- clearInterval(this.keepaliveTimer);
1960
- this.keepaliveTimer = null;
1961
- }
1962
2195
  if (this.ws) {
1963
2196
  try {
1964
- this.ws.send("finalize");
2197
+ this.ws.send(JSON.stringify({ type: "Terminate" }));
1965
2198
  } catch {
1966
2199
  }
1967
2200
  this.ws.close();
@@ -1969,6 +2202,305 @@ var CartesiaSTT = class {
1969
2202
  }
1970
2203
  }
1971
2204
  };
2205
+ function averageConfidence(words) {
2206
+ if (!words.length) return 0;
2207
+ let total = 0;
2208
+ for (const w of words) {
2209
+ total += Number(w.confidence ?? 0);
2210
+ }
2211
+ return total / words.length;
2212
+ }
2213
+
2214
+ // src/stt/assemblyai.ts
2215
+ var STT5 = class extends AssemblyAISTT {
2216
+ constructor(opts = {}) {
2217
+ const key = opts.apiKey ?? process.env.ASSEMBLYAI_API_KEY;
2218
+ if (!key) {
2219
+ throw new Error(
2220
+ "AssemblyAI STT requires an apiKey. Pass { apiKey: '...' } or set ASSEMBLYAI_API_KEY in the environment."
2221
+ );
2222
+ }
2223
+ const { apiKey: _ignored, ...rest } = opts;
2224
+ void _ignored;
2225
+ super(key, rest);
2226
+ }
2227
+ };
2228
+
2229
+ // src/providers/elevenlabs-tts.ts
2230
+ var ELEVENLABS_BASE_URL = "https://api.elevenlabs.io/v1";
2231
+ var ELEVENLABS_VOICE_ID_BY_NAME = {
2232
+ rachel: "21m00Tcm4TlvDq8ikWAM",
2233
+ drew: "29vD33N1CtxCmqQRPOHJ",
2234
+ clyde: "2EiwWnXFnvU5JabPnv8n",
2235
+ paul: "5Q0t7uMcjvnagumLfvZi",
2236
+ domi: "AZnzlk1XvdvUeBnXmlld",
2237
+ dave: "CYw3kZ02Hs0563khs1Fj",
2238
+ fin: "D38z5RcWu1voky8WS1ja",
2239
+ bella: "EXAVITQu4vr4xnSDxMaL",
2240
+ antoni: "ErXwobaYiN019PkySvjV",
2241
+ thomas: "GBv7mTt0atIp3Br8iCZE",
2242
+ charlie: "IKne3meq5aSn9XLyUdCD",
2243
+ george: "JBFqnCBsd6RMkjVDRZzb",
2244
+ emily: "LcfcDJNUP1GQjkzn1xUU",
2245
+ elli: "MF3mGyEYCl7XYWbV9V6O",
2246
+ callum: "N2lVS1w4EtoT3dr4eOWO",
2247
+ patrick: "ODq5zmih8GrVes37Dizd",
2248
+ harry: "SOYHLrjzK2X1ezoPC6cr",
2249
+ liam: "TX3LPaxmHKxFdv7VOQHJ",
2250
+ dorothy: "ThT5KcBeYPX3keUQqHPh",
2251
+ josh: "TxGEqnHWrfWFTfGW9XjX",
2252
+ arnold: "VR6AewLTigWG4xSOukaG",
2253
+ charlotte: "XB0fDUnXU5powFXDhCwa",
2254
+ matilda: "XrExE9yKIg1WjnnlVkGX",
2255
+ matthew: "Yko7PKHZNXotIFUBG7I9",
2256
+ james: "ZQe5CZNOzWyzPSCn5a3c",
2257
+ joseph: "Zlb1dXrM653N07WRdFW3",
2258
+ jeremy: "bVMeCyTHy58xNoL34h3p",
2259
+ michael: "flq6f7yk4E4fJM5XTYuZ",
2260
+ ethan: "g5CIjZEefAph4nQFvHAz",
2261
+ gigi: "jBpfuIE2acCO8z3wKNLl",
2262
+ freya: "jsCqWAovK2LkecY7zXl4",
2263
+ brian: "nPczCjzI2devNBz1zQrb",
2264
+ grace: "oWAxZDx7w5VEj9dCyTzz",
2265
+ daniel: "onwK4e9ZLuTAKqWW03F9",
2266
+ lily: "pFZP5JQG7iQjIQuC4Bku",
2267
+ serena: "pMsXgVXv3BLzUgSXRplE",
2268
+ adam: "pNInz6obpgDQGcFmaJgB",
2269
+ nicole: "piTKgcLEGmPE4e6mEKli",
2270
+ bill: "pqHfZKP75CvOlQylNhV4",
2271
+ jessie: "t0jbNlBVZ17f02VDIeMI",
2272
+ ryan: "wViXBPUzp2ZZixB1xQuM",
2273
+ sam: "yoZ06aMxZJJ28mfd3POQ",
2274
+ glinda: "z9fAnlkpzviPz146aGWa",
2275
+ giovanni: "zcAOhNBS3c14rBihAFp1",
2276
+ mimi: "zrHiDhphv9ZnVXBqCLjz",
2277
+ alloy: "21m00Tcm4TlvDq8ikWAM"
2278
+ };
2279
+ var VOICE_ID_PATTERN = /^[A-Za-z0-9]{20}$/;
2280
+ function resolveVoiceId(voice) {
2281
+ if (!voice) return voice;
2282
+ if (VOICE_ID_PATTERN.test(voice)) return voice;
2283
+ return ELEVENLABS_VOICE_ID_BY_NAME[voice.toLowerCase()] ?? voice;
2284
+ }
2285
+ var ElevenLabsTTS = class {
2286
+ constructor(apiKey, voiceId = "21m00Tcm4TlvDq8ikWAM", modelId = "eleven_turbo_v2_5", outputFormat = "pcm_16000") {
2287
+ this.apiKey = apiKey;
2288
+ this.modelId = modelId;
2289
+ this.outputFormat = outputFormat;
2290
+ this.voiceId = resolveVoiceId(voiceId);
2291
+ }
2292
+ voiceId;
2293
+ /**
2294
+ * Synthesise text to speech and return the full audio as a single Buffer.
2295
+ *
2296
+ * For large chunks (or when latency matters) call `synthesizeStream` instead.
2297
+ */
2298
+ async synthesize(text) {
2299
+ const chunks = [];
2300
+ for await (const chunk of this.synthesizeStream(text)) {
2301
+ chunks.push(chunk);
2302
+ }
2303
+ return Buffer.concat(chunks);
2304
+ }
2305
+ /**
2306
+ * Synthesise text and yield audio chunks as they arrive (streaming).
2307
+ *
2308
+ * The yielded buffers are raw PCM at 16 kHz (or whatever `outputFormat` is
2309
+ * configured to).
2310
+ */
2311
+ async *synthesizeStream(text) {
2312
+ const url = `${ELEVENLABS_BASE_URL}/text-to-speech/${encodeURIComponent(this.voiceId)}/stream?output_format=${encodeURIComponent(this.outputFormat)}`;
2313
+ const response = await fetch(url, {
2314
+ method: "POST",
2315
+ headers: {
2316
+ "xi-api-key": this.apiKey,
2317
+ "Content-Type": "application/json"
2318
+ },
2319
+ body: JSON.stringify({ text, model_id: this.modelId }),
2320
+ signal: AbortSignal.timeout(3e4)
2321
+ });
2322
+ if (!response.ok) {
2323
+ const body = await response.text();
2324
+ throw new Error(`ElevenLabs TTS error ${response.status}: ${body}`);
2325
+ }
2326
+ if (!response.body) {
2327
+ throw new Error("ElevenLabs TTS: no response body");
2328
+ }
2329
+ const reader = response.body.getReader();
2330
+ try {
2331
+ while (true) {
2332
+ const { done, value } = await reader.read();
2333
+ if (done) break;
2334
+ if (value && value.length > 0) {
2335
+ yield Buffer.from(value);
2336
+ }
2337
+ }
2338
+ } finally {
2339
+ if (typeof reader.cancel === "function") await reader.cancel().catch(() => {
2340
+ });
2341
+ reader.releaseLock();
2342
+ }
2343
+ }
2344
+ };
2345
+
2346
+ // src/tts/elevenlabs.ts
2347
+ var TTS = class extends ElevenLabsTTS {
2348
+ constructor(opts = {}) {
2349
+ const key = opts.apiKey ?? process.env.ELEVENLABS_API_KEY;
2350
+ if (!key) {
2351
+ throw new Error(
2352
+ "ElevenLabs TTS requires an apiKey. Pass { apiKey: '...' } or set ELEVENLABS_API_KEY in the environment."
2353
+ );
2354
+ }
2355
+ super(
2356
+ key,
2357
+ opts.voiceId ?? "21m00Tcm4TlvDq8ikWAM",
2358
+ opts.modelId ?? "eleven_turbo_v2_5",
2359
+ opts.outputFormat ?? "pcm_16000"
2360
+ );
2361
+ }
2362
+ };
2363
+
2364
+ // src/providers/openai-tts.ts
2365
+ var OPENAI_TTS_URL = "https://api.openai.com/v1/audio/speech";
2366
+ var OpenAITTS = class _OpenAITTS {
2367
+ constructor(apiKey, voice = "alloy", model = "tts-1") {
2368
+ this.apiKey = apiKey;
2369
+ this.voice = voice;
2370
+ this.model = model;
2371
+ }
2372
+ /**
2373
+ * Synthesise text to speech and return the full audio as a single Buffer.
2374
+ *
2375
+ * For large chunks (or when latency matters) call `synthesizeStream` instead.
2376
+ */
2377
+ async synthesize(text) {
2378
+ const chunks = [];
2379
+ for await (const chunk of this.synthesizeStream(text)) {
2380
+ chunks.push(chunk);
2381
+ }
2382
+ return Buffer.concat(chunks);
2383
+ }
2384
+ /**
2385
+ * Synthesise text and yield audio chunks as they arrive (streaming).
2386
+ *
2387
+ * OpenAI returns 24 kHz PCM16; each chunk is resampled to 16 kHz before
2388
+ * yielding so the output is ready for telephony pipelines.
2389
+ *
2390
+ * The resampler carries state (buffered samples + odd trailing byte)
2391
+ * between chunks — without that state cross-chunk sample alignment drifts
2392
+ * and the caller hears pops / dropped audio (BUG #23, mirror of the
2393
+ * Python `audioop.ratecv` fix).
2394
+ */
2395
+ async *synthesizeStream(text) {
2396
+ const response = await fetch(OPENAI_TTS_URL, {
2397
+ method: "POST",
2398
+ headers: {
2399
+ "Authorization": `Bearer ${this.apiKey}`,
2400
+ "Content-Type": "application/json"
2401
+ },
2402
+ body: JSON.stringify({
2403
+ model: this.model,
2404
+ input: text,
2405
+ voice: this.voice,
2406
+ response_format: "pcm"
2407
+ }),
2408
+ signal: AbortSignal.timeout(3e4)
2409
+ });
2410
+ if (!response.ok) {
2411
+ const body = await response.text();
2412
+ throw new Error(`OpenAI TTS error ${response.status}: ${body}`);
2413
+ }
2414
+ if (!response.body) {
2415
+ throw new Error("OpenAI TTS: no response body");
2416
+ }
2417
+ const ctx = { carryByte: null, leftover: [] };
2418
+ const reader = response.body.getReader();
2419
+ try {
2420
+ while (true) {
2421
+ const { done, value } = await reader.read();
2422
+ if (done) break;
2423
+ if (value && value.length > 0) {
2424
+ const out = _OpenAITTS.resampleStreaming(Buffer.from(value), ctx);
2425
+ if (out.length > 0) yield out;
2426
+ }
2427
+ }
2428
+ if (ctx.leftover.length > 0) {
2429
+ const tail = Buffer.alloc(ctx.leftover.length * 2);
2430
+ for (let i = 0; i < ctx.leftover.length; i++) {
2431
+ tail.writeInt16LE(ctx.leftover[i], i * 2);
2432
+ }
2433
+ yield tail;
2434
+ }
2435
+ } finally {
2436
+ if (typeof reader.cancel === "function") await reader.cancel().catch(() => {
2437
+ });
2438
+ reader.releaseLock();
2439
+ }
2440
+ }
2441
+ /**
2442
+ * Streaming 24 kHz → 16 kHz resampler (PCM16-LE). Maintains cross-chunk
2443
+ * state so the 3:2 pattern doesn't reset at every network read.
2444
+ */
2445
+ static resampleStreaming(audio, ctx) {
2446
+ let buf;
2447
+ if (ctx.carryByte !== null) {
2448
+ buf = Buffer.concat([Buffer.from([ctx.carryByte]), audio]);
2449
+ ctx.carryByte = null;
2450
+ } else {
2451
+ buf = audio;
2452
+ }
2453
+ if (buf.length % 2 === 1) {
2454
+ ctx.carryByte = buf[buf.length - 1];
2455
+ buf = buf.subarray(0, buf.length - 1);
2456
+ }
2457
+ if (buf.length === 0 && ctx.leftover.length === 0) {
2458
+ return Buffer.alloc(0);
2459
+ }
2460
+ const sampleCount = buf.length / 2;
2461
+ const samples = ctx.leftover.slice();
2462
+ for (let i2 = 0; i2 < sampleCount; i2++) {
2463
+ samples.push(buf.readInt16LE(i2 * 2));
2464
+ }
2465
+ const out = [];
2466
+ let i = 0;
2467
+ while (i + 2 < samples.length) {
2468
+ out.push(samples[i]);
2469
+ out.push(Math.trunc((samples[i + 1] + samples[i + 2]) / 2));
2470
+ i += 3;
2471
+ }
2472
+ ctx.leftover = samples.slice(i);
2473
+ const buffer = Buffer.alloc(out.length * 2);
2474
+ for (let j = 0; j < out.length; j++) {
2475
+ buffer.writeInt16LE(out[j], j * 2);
2476
+ }
2477
+ return buffer;
2478
+ }
2479
+ /** @deprecated use {@link resampleStreaming} with persistent state. */
2480
+ static resample24kTo16k(audio) {
2481
+ const ctx = { carryByte: null, leftover: [] };
2482
+ const out = _OpenAITTS.resampleStreaming(audio, ctx);
2483
+ if (ctx.leftover.length === 0) return out;
2484
+ const tail = Buffer.alloc(ctx.leftover.length * 2);
2485
+ for (let i = 0; i < ctx.leftover.length; i++) {
2486
+ tail.writeInt16LE(ctx.leftover[i], i * 2);
2487
+ }
2488
+ return Buffer.concat([out, tail]);
2489
+ }
2490
+ };
2491
+
2492
+ // src/tts/openai.ts
2493
+ var TTS2 = class extends OpenAITTS {
2494
+ constructor(opts = {}) {
2495
+ const key = opts.apiKey ?? process.env.OPENAI_API_KEY;
2496
+ if (!key) {
2497
+ throw new Error(
2498
+ "OpenAI TTS requires an apiKey. Pass { apiKey: 'sk-...' } or set OPENAI_API_KEY in the environment."
2499
+ );
2500
+ }
2501
+ super(key, opts.voice ?? "alloy", opts.model ?? "tts-1");
2502
+ }
2503
+ };
1972
2504
 
1973
2505
  // src/providers/cartesia-tts.ts
1974
2506
  var CARTESIA_BASE_URL = "https://api.cartesia.ai";
@@ -2068,6 +2600,21 @@ var CartesiaTTS = class {
2068
2600
  }
2069
2601
  };
2070
2602
 
2603
+ // src/tts/cartesia.ts
2604
+ var TTS3 = class extends CartesiaTTS {
2605
+ constructor(opts = {}) {
2606
+ const key = opts.apiKey ?? process.env.CARTESIA_API_KEY;
2607
+ if (!key) {
2608
+ throw new Error(
2609
+ "Cartesia TTS requires an apiKey. Pass { apiKey: '...' } or set CARTESIA_API_KEY in the environment."
2610
+ );
2611
+ }
2612
+ const { apiKey: _ignored, ...rest } = opts;
2613
+ void _ignored;
2614
+ super(key, rest);
2615
+ }
2616
+ };
2617
+
2071
2618
  // src/providers/rime-tts.ts
2072
2619
  var RIME_BASE_URL = "https://users.rime.ai/v1/rime-tts";
2073
2620
  var ARCANA_MODEL_TIMEOUT_MS = 60 * 4 * 1e3;
@@ -2195,6 +2742,21 @@ var RimeTTS = class {
2195
2742
  }
2196
2743
  };
2197
2744
 
2745
+ // src/tts/rime.ts
2746
+ var TTS4 = class extends RimeTTS {
2747
+ constructor(opts = {}) {
2748
+ const key = opts.apiKey ?? process.env.RIME_API_KEY;
2749
+ if (!key) {
2750
+ throw new Error(
2751
+ "Rime TTS requires an apiKey. Pass { apiKey: '...' } or set RIME_API_KEY in the environment."
2752
+ );
2753
+ }
2754
+ const { apiKey: _ignored, ...rest } = opts;
2755
+ void _ignored;
2756
+ super(key, rest);
2757
+ }
2758
+ };
2759
+
2198
2760
  // src/providers/lmnt-tts.ts
2199
2761
  var LMNT_BASE_URL = "https://api.lmnt.com/v1/ai/speech/bytes";
2200
2762
  var LMNTTTS = class {
@@ -2273,6 +2835,119 @@ var LMNTTTS = class {
2273
2835
  }
2274
2836
  };
2275
2837
 
2838
+ // src/tts/lmnt.ts
2839
+ var TTS5 = class extends LMNTTTS {
2840
+ constructor(opts = {}) {
2841
+ const key = opts.apiKey ?? process.env.LMNT_API_KEY;
2842
+ if (!key) {
2843
+ throw new Error(
2844
+ "LMNT TTS requires an apiKey. Pass { apiKey: '...' } or set LMNT_API_KEY in the environment."
2845
+ );
2846
+ }
2847
+ const { apiKey: _ignored, ...rest } = opts;
2848
+ void _ignored;
2849
+ super(key, rest);
2850
+ }
2851
+ };
2852
+
2853
+ // src/carriers/twilio.ts
2854
+ var Carrier = class {
2855
+ kind = "twilio";
2856
+ accountSid;
2857
+ authToken;
2858
+ constructor(opts = {}) {
2859
+ const sid = opts.accountSid ?? process.env.TWILIO_ACCOUNT_SID;
2860
+ const tok = opts.authToken ?? process.env.TWILIO_AUTH_TOKEN;
2861
+ if (!sid) {
2862
+ throw new Error(
2863
+ "Twilio carrier requires accountSid. Pass { accountSid: 'AC...' } or set TWILIO_ACCOUNT_SID in the environment."
2864
+ );
2865
+ }
2866
+ if (!tok) {
2867
+ throw new Error(
2868
+ "Twilio carrier requires authToken. Pass { authToken: '...' } or set TWILIO_AUTH_TOKEN in the environment."
2869
+ );
2870
+ }
2871
+ this.accountSid = sid;
2872
+ this.authToken = tok;
2873
+ }
2874
+ };
2875
+
2876
+ // src/carriers/telnyx.ts
2877
+ var Carrier2 = class {
2878
+ kind = "telnyx";
2879
+ apiKey;
2880
+ connectionId;
2881
+ publicKey;
2882
+ constructor(opts = {}) {
2883
+ const key = opts.apiKey ?? process.env.TELNYX_API_KEY;
2884
+ const conn = opts.connectionId ?? process.env.TELNYX_CONNECTION_ID;
2885
+ const pub = opts.publicKey ?? process.env.TELNYX_PUBLIC_KEY;
2886
+ if (!key) {
2887
+ throw new Error(
2888
+ "Telnyx carrier requires apiKey. Pass { apiKey: '...' } or set TELNYX_API_KEY in the environment."
2889
+ );
2890
+ }
2891
+ if (!conn) {
2892
+ throw new Error(
2893
+ "Telnyx carrier requires connectionId. Pass { connectionId: '...' } or set TELNYX_CONNECTION_ID in the environment."
2894
+ );
2895
+ }
2896
+ this.apiKey = key;
2897
+ this.connectionId = conn;
2898
+ this.publicKey = pub;
2899
+ }
2900
+ };
2901
+
2902
+ // src/public-api.ts
2903
+ var DEFAULT_GUARDRAIL_REPLACEMENT = "I'm sorry, I can't respond to that.";
2904
+ var Guardrail = class {
2905
+ name;
2906
+ blockedTerms;
2907
+ check;
2908
+ replacement;
2909
+ constructor(opts) {
2910
+ if (!opts.name) {
2911
+ throw new Error("Guardrail requires a non-empty name.");
2912
+ }
2913
+ this.name = opts.name;
2914
+ if (opts.blockedTerms) this.blockedTerms = opts.blockedTerms;
2915
+ if (opts.check) this.check = opts.check;
2916
+ this.replacement = opts.replacement ?? DEFAULT_GUARDRAIL_REPLACEMENT;
2917
+ }
2918
+ };
2919
+ function guardrail(opts) {
2920
+ return new Guardrail(opts);
2921
+ }
2922
+ var Tool = class {
2923
+ name;
2924
+ description;
2925
+ parameters;
2926
+ handler;
2927
+ webhookUrl;
2928
+ constructor(opts) {
2929
+ if (!opts.name) {
2930
+ throw new Error("Tool requires a non-empty name.");
2931
+ }
2932
+ const hasHandler = typeof opts.handler === "function";
2933
+ const hasWebhook = typeof opts.webhookUrl === "string" && opts.webhookUrl.length > 0;
2934
+ if (!hasHandler && !hasWebhook) {
2935
+ throw new Error("Tool requires either handler or webhookUrl.");
2936
+ }
2937
+ if (hasHandler && hasWebhook) {
2938
+ throw new Error("Tool accepts handler OR webhookUrl, not both.");
2939
+ }
2940
+ this.name = opts.name;
2941
+ this.description = opts.description ?? "";
2942
+ this.parameters = opts.parameters ?? { type: "object", properties: {} };
2943
+ if (hasHandler) this.handler = opts.handler;
2944
+ if (hasWebhook) this.webhookUrl = opts.webhookUrl;
2945
+ }
2946
+ };
2947
+ function tool(opts) {
2948
+ return new Tool(opts);
2949
+ }
2950
+
2276
2951
  // src/chat-context.ts
2277
2952
  import { randomUUID } from "crypto";
2278
2953
  function generateId() {
@@ -2883,31 +3558,35 @@ function isAudioConfig(value) {
2883
3558
  }
2884
3559
  export {
2885
3560
  AllProvidersFailedError,
2886
- AssemblyAISTT,
3561
+ STT5 as AssemblyAISTT,
2887
3562
  AuthenticationError,
2888
3563
  BackgroundAudioPlayer,
2889
3564
  BuiltinAudioClip,
2890
3565
  CallMetricsAccumulator,
2891
- CartesiaSTT,
2892
- CartesiaTTS,
3566
+ STT3 as CartesiaSTT,
3567
+ TTS3 as CartesiaTTS,
2893
3568
  ChatContext,
3569
+ CloudflareTunnel,
2894
3570
  DEFAULT_MIN_SENTENCE_LEN,
2895
3571
  DEFAULT_PRICING,
2896
3572
  DTMF_EVENTS,
2897
- DeepgramSTT,
3573
+ STT as DeepgramSTT,
3574
+ ConvAI as ElevenLabsConvAI,
2898
3575
  ElevenLabsConvAIAdapter,
2899
- ElevenLabsTTS,
3576
+ TTS as ElevenLabsTTS,
2900
3577
  FallbackLLMProvider,
2901
3578
  GEMINI_DEFAULT_INPUT_SR,
2902
3579
  GEMINI_DEFAULT_OUTPUT_SR,
2903
3580
  GeminiLiveAdapter,
3581
+ Guardrail,
2904
3582
  IVRActivity,
2905
3583
  LLMLoop,
2906
- LMNTTTS,
3584
+ TTS5 as LMNTTTS,
2907
3585
  MetricsStore,
2908
3586
  OpenAILLMProvider,
3587
+ Realtime as OpenAIRealtime,
2909
3588
  OpenAIRealtimeAdapter,
2910
- OpenAITTS,
3589
+ TTS2 as OpenAITTS,
2911
3590
  PartialStreamError,
2912
3591
  Patter,
2913
3592
  PatterConnectionError,
@@ -2915,15 +3594,19 @@ export {
2915
3594
  PipelineHookExecutor,
2916
3595
  ProvisionError,
2917
3596
  RemoteMessageHandler,
2918
- RimeTTS,
3597
+ TTS4 as RimeTTS,
2919
3598
  SentenceChunker,
2920
- SonioxSTT,
3599
+ STT4 as SonioxSTT,
3600
+ Static as StaticTunnel,
3601
+ Carrier2 as Telnyx,
2921
3602
  TestSession,
2922
3603
  TfidfLoopDetector,
3604
+ Tool,
3605
+ Carrier as Twilio,
2923
3606
  ULTRAVOX_DEFAULT_API_BASE,
2924
3607
  ULTRAVOX_DEFAULT_SR,
2925
3608
  UltravoxRealtimeAdapter,
2926
- WhisperSTT,
3609
+ STT2 as WhisperSTT,
2927
3610
  builtinClipPath,
2928
3611
  calculateRealtimeCost,
2929
3612
  calculateSttCost,
@@ -2939,6 +3622,7 @@ export {
2939
3622
  filterMarkdown,
2940
3623
  formatDtmf,
2941
3624
  getLogger,
3625
+ guardrail,
2942
3626
  isRemoteUrl,
2943
3627
  isWebSocketUrl,
2944
3628
  makeAuthMiddleware,
@@ -2960,5 +3644,6 @@ export {
2960
3644
  selectSoundFromList,
2961
3645
  setLogger,
2962
3646
  startTunnel,
3647
+ tool,
2963
3648
  whisper
2964
3649
  };