getpatter 0.5.1 → 0.5.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs CHANGED
@@ -3,21 +3,37 @@ import {
3
3
  } from "./chunk-AFUYSNDH.mjs";
4
4
  import {
5
5
  startTunnel
6
- } from "./chunk-AKQFOFLG.mjs";
6
+ } from "./chunk-SEMKNPCD.mjs";
7
7
  import {
8
+ AuthenticationError,
8
9
  CallMetricsAccumulator,
9
10
  DEFAULT_MIN_SENTENCE_LEN,
10
11
  DEFAULT_PRICING,
11
12
  DeepgramSTT,
13
+ DefaultToolExecutor,
12
14
  ElevenLabsConvAIAdapter,
13
15
  EmbeddedServer,
16
+ EventBus,
14
17
  LLMLoop,
15
18
  MetricsStore,
16
19
  OpenAILLMProvider,
17
20
  OpenAIRealtimeAdapter,
21
+ PatterConnectionError,
22
+ PatterError,
23
+ PcmCarry,
18
24
  PipelineHookExecutor,
25
+ ProvisionError,
26
+ RateLimitError,
19
27
  RemoteMessageHandler,
28
+ SPAN_BARGEIN,
29
+ SPAN_CALL,
30
+ SPAN_ENDPOINT,
31
+ SPAN_LLM,
32
+ SPAN_STT,
33
+ SPAN_TOOL,
34
+ SPAN_TTS,
20
35
  SentenceChunker,
36
+ StatefulResampler,
21
37
  TestSession,
22
38
  calculateRealtimeCost,
23
39
  calculateSttCost,
@@ -25,7 +41,12 @@ import {
25
41
  calculateTtsCost,
26
42
  callsToCsv,
27
43
  callsToJson,
44
+ createResampler16kTo8k,
45
+ createResampler24kTo16k,
46
+ createResampler8kTo16k,
47
+ initTracing,
28
48
  isRemoteUrl,
49
+ isTracingEnabled,
29
50
  isWebSocketUrl,
30
51
  makeAuthMiddleware,
31
52
  mergePricing,
@@ -35,153 +56,14 @@ import {
35
56
  pcm16ToMulaw,
36
57
  resample16kTo8k,
37
58
  resample24kTo16k,
38
- resample8kTo16k
39
- } from "./chunk-B6C3KIBG.mjs";
59
+ resample8kTo16k,
60
+ startSpan
61
+ } from "./chunk-FIFIWBL7.mjs";
40
62
  import {
41
63
  getLogger,
42
64
  setLogger
43
- } from "./chunk-FMNRCP5X.mjs";
44
- import "./chunk-OOIUSZB4.mjs";
45
-
46
- // src/connection.ts
47
- import WebSocket from "ws";
48
-
49
- // src/errors.ts
50
- var PatterError = class extends Error {
51
- constructor(message) {
52
- super(message);
53
- this.name = "PatterError";
54
- }
55
- };
56
- var PatterConnectionError = class extends PatterError {
57
- constructor(message) {
58
- super(message);
59
- this.name = "PatterConnectionError";
60
- }
61
- };
62
- var AuthenticationError = class extends PatterError {
63
- constructor(message) {
64
- super(message);
65
- this.name = "AuthenticationError";
66
- }
67
- };
68
- var ProvisionError = class extends PatterError {
69
- constructor(message) {
70
- super(message);
71
- this.name = "ProvisionError";
72
- }
73
- };
74
-
75
- // src/connection.ts
76
- var DEFAULT_BACKEND_URL = "wss://api.getpatter.com";
77
- var PatterConnection = class {
78
- apiKey;
79
- backendUrl;
80
- wsUrl;
81
- ws = null;
82
- onMessage = null;
83
- onCallStart = null;
84
- onCallEnd = null;
85
- constructor(apiKey, backendUrl = DEFAULT_BACKEND_URL) {
86
- this.apiKey = apiKey;
87
- this.backendUrl = backendUrl.replace(/\/+$/, "");
88
- this.wsUrl = `${this.backendUrl}/ws/sdk`;
89
- }
90
- get isConnected() {
91
- return this.ws !== null && this.ws.readyState === WebSocket.OPEN;
92
- }
93
- async connect(options) {
94
- this.onMessage = options.onMessage;
95
- this.onCallStart = options.onCallStart ?? null;
96
- this.onCallEnd = options.onCallEnd ?? null;
97
- return new Promise((resolve, reject) => {
98
- this.ws = new WebSocket(this.wsUrl, {
99
- headers: { "X-API-Key": this.apiKey }
100
- });
101
- const onError = (err) => {
102
- this.ws?.off("error", onError);
103
- reject(new PatterConnectionError(`Failed to connect: ${err.message}`));
104
- };
105
- this.ws.once("open", () => {
106
- this.ws?.off("error", onError);
107
- this.setupListeners();
108
- resolve();
109
- });
110
- this.ws.on("error", onError);
111
- });
112
- }
113
- setupListeners() {
114
- if (!this.ws) return;
115
- this.ws.on("error", (err) => {
116
- getLogger().error(`WebSocket error: ${err.message}`);
117
- });
118
- this.ws.on("message", async (data) => {
119
- const raw = data.toString();
120
- let parsed;
121
- try {
122
- parsed = JSON.parse(raw);
123
- } catch {
124
- return;
125
- }
126
- const msgType = parsed.type;
127
- if (msgType === "message" && this.onMessage) {
128
- const msg = {
129
- text: parsed.text,
130
- callId: parsed.call_id,
131
- caller: parsed.caller ?? ""
132
- };
133
- try {
134
- const response = await this.onMessage(msg);
135
- if (response != null) {
136
- await this.sendResponse(msg.callId, response);
137
- }
138
- } catch {
139
- }
140
- } else if (msgType === "call_start" && this.onCallStart) {
141
- await this.onCallStart(parsed);
142
- } else if (msgType === "call_end" && this.onCallEnd) {
143
- await this.onCallEnd(parsed);
144
- }
145
- });
146
- this.ws.on("close", () => {
147
- this.ws = null;
148
- });
149
- }
150
- async sendResponse(callId, text) {
151
- if (!this.ws) throw new PatterConnectionError("Not connected");
152
- this.ws.send(JSON.stringify({ type: "response", call_id: callId, text }));
153
- }
154
- async requestCall(fromNumber, toNumber, firstMessage = "") {
155
- if (!this.ws) throw new PatterConnectionError("Not connected");
156
- this.ws.send(
157
- JSON.stringify({
158
- type: "call",
159
- from: fromNumber,
160
- to: toNumber,
161
- first_message: firstMessage
162
- })
163
- );
164
- }
165
- async disconnect() {
166
- if (this.ws) {
167
- this.ws.close();
168
- this.ws = null;
169
- }
170
- }
171
- parseMessage(raw) {
172
- try {
173
- const data = JSON.parse(raw);
174
- if (data.type !== "message") return null;
175
- return {
176
- text: data.text,
177
- callId: data.call_id,
178
- caller: data.caller ?? ""
179
- };
180
- } catch {
181
- return null;
182
- }
183
- }
184
- };
65
+ } from "./chunk-VJVDG4V5.mjs";
66
+ import "./chunk-QHHBUCMT.mjs";
185
67
 
186
68
  // src/engines/openai.ts
187
69
  var Realtime = class {
@@ -241,86 +123,77 @@ var Static = class {
241
123
  this.hostname = opts.hostname;
242
124
  }
243
125
  };
126
+ var Ngrok = class {
127
+ kind = "ngrok";
128
+ hostname;
129
+ constructor(opts = {}) {
130
+ this.hostname = opts.hostname ?? "";
131
+ }
132
+ /**
133
+ * Returns the configured hostname or throws if the marker was constructed
134
+ * without one. Patter does not start ngrok itself — the user is expected
135
+ * to either supply a hostname or run ngrok out-of-band.
136
+ */
137
+ start() {
138
+ if (!this.hostname) {
139
+ throw new Error(
140
+ 'Ngrok requires a hostname; pass new Ngrok({ hostname: "abc.ngrok.io" })'
141
+ );
142
+ }
143
+ return this.hostname;
144
+ }
145
+ };
244
146
 
245
147
  // src/client.ts
246
- var DEFAULT_BACKEND_URL2 = "wss://api.getpatter.com";
247
- var DEFAULT_REST_URL = "https://api.getpatter.com";
248
- function sttConfigToDict(cfg) {
249
- const out = {
250
- provider: cfg.provider,
251
- api_key: cfg.apiKey,
252
- language: cfg.language
253
- };
254
- if (cfg.options) out.options = { ...cfg.options };
255
- return out;
256
- }
257
- function ttsConfigToDict(cfg) {
258
- const out = {
259
- provider: cfg.provider,
260
- api_key: cfg.apiKey,
261
- voice: cfg.voice
262
- };
263
- if (cfg.options) out.options = { ...cfg.options };
264
- return out;
265
- }
266
148
  var Patter = class {
267
- apiKey;
268
- backendUrl;
269
- restUrl;
270
- connection;
271
- mode;
272
149
  localConfig;
273
150
  embeddedServer = null;
274
151
  tunnelHandle = null;
152
+ /**
153
+ * Live `MetricsStore` for the embedded server. Returns `null` before
154
+ * `serve()` is called. Exposed so integrations like `PatterTool` can
155
+ * subscribe to per-call lifecycle events (`call_initiated`,
156
+ * `call_start`, `call_end`).
157
+ */
158
+ get metricsStore() {
159
+ return this.embeddedServer?.metricsStore ?? null;
160
+ }
275
161
  constructor(options) {
276
- const hasCarrier = "carrier" in options && options.carrier !== void 0;
277
- const isLocal = "mode" in options && options.mode === "local" || hasCarrier;
278
- if (isLocal) {
279
- const local = options;
280
- if (!local.phoneNumber) {
281
- throw new Error("Local mode requires phoneNumber");
282
- }
283
- if (!local.carrier) {
162
+ if (options.apiKey !== void 0) {
163
+ throw new Error(
164
+ "Patter Cloud is not yet available in this SDK release. Use local mode with `carrier:` and `phoneNumber:`. Cloud mode will return in a future release."
165
+ );
166
+ }
167
+ if (!options.phoneNumber) {
168
+ throw new Error("Local mode requires phoneNumber");
169
+ }
170
+ if (!options.carrier) {
171
+ throw new Error(
172
+ "Local mode requires a `carrier` instance. Pass `carrier: new Twilio({...})` or `carrier: new Telnyx({...})`."
173
+ );
174
+ }
175
+ const carrier = options.carrier;
176
+ const tunnel = options.tunnel;
177
+ let tunnelWebhookUrl;
178
+ if (tunnel instanceof Static) {
179
+ if (options.webhookUrl) {
284
180
  throw new Error(
285
- "Local mode requires a `carrier` instance. Pass `carrier: new Twilio({...})` or `carrier: new Telnyx({...})`."
181
+ "Cannot use both `tunnel: new StaticTunnel(...)` and `webhookUrl`. Pick one."
286
182
  );
287
183
  }
288
- const carrier = local.carrier;
289
- const tunnel = local.tunnel;
290
- let tunnelWebhookUrl;
291
- if (tunnel instanceof Static) {
292
- if (local.webhookUrl) {
293
- throw new Error(
294
- "Cannot use both `tunnel: new StaticTunnel(...)` and `webhookUrl`. Pick one."
295
- );
296
- }
297
- tunnelWebhookUrl = tunnel.hostname;
298
- }
299
- this.mode = "local";
300
- const rawWebhook = tunnelWebhookUrl ?? local.webhookUrl;
301
- const normalizedWebhook = rawWebhook ? rawWebhook.replace(/^https?:\/\//, "").replace(/\/$/, "") : void 0;
302
- this.localConfig = {
303
- carrier,
304
- phoneNumber: local.phoneNumber,
305
- webhookUrl: normalizedWebhook,
306
- tunnel: local.tunnel,
307
- openaiKey: local.openaiKey
308
- };
309
- this.apiKey = "";
310
- this.backendUrl = DEFAULT_BACKEND_URL2;
311
- this.restUrl = DEFAULT_REST_URL;
312
- this.connection = new PatterConnection("", DEFAULT_BACKEND_URL2);
313
- } else {
314
- const cloudOpts = options;
315
- this.mode = "cloud";
316
- this.localConfig = null;
317
- this.apiKey = cloudOpts.apiKey;
318
- this.backendUrl = cloudOpts.backendUrl ?? DEFAULT_BACKEND_URL2;
319
- this.restUrl = cloudOpts.restUrl ?? DEFAULT_REST_URL;
320
- this.connection = new PatterConnection(this.apiKey, this.backendUrl);
321
- }
184
+ tunnelWebhookUrl = tunnel.hostname;
185
+ }
186
+ const rawWebhook = tunnelWebhookUrl ?? options.webhookUrl;
187
+ const normalizedWebhook = rawWebhook ? rawWebhook.replace(/^https?:\/\//, "").replace(/\/$/, "") : void 0;
188
+ this.localConfig = {
189
+ carrier,
190
+ phoneNumber: options.phoneNumber,
191
+ webhookUrl: normalizedWebhook,
192
+ tunnel: options.tunnel,
193
+ openaiKey: options.openaiKey
194
+ };
322
195
  }
323
- // === Local mode ===
196
+ // === Agent definition ===
324
197
  agent(opts) {
325
198
  let working = { ...opts };
326
199
  if (opts.engine) {
@@ -337,7 +210,7 @@ var Patter = class {
337
210
  model: working.model ?? engine.model,
338
211
  voice: working.voice ?? engine.voice
339
212
  };
340
- if (this.localConfig && !this.localConfig.openaiKey) {
213
+ if (!this.localConfig.openaiKey) {
341
214
  this.localConfig = { ...this.localConfig, openaiKey: engine.apiKey };
342
215
  }
343
216
  } else if (engine instanceof ConvAI) {
@@ -387,10 +260,8 @@ var Patter = class {
387
260
  }
388
261
  return working;
389
262
  }
263
+ // === Serve / test / call ===
390
264
  async serve(opts) {
391
- if (this.mode !== "local" || !this.localConfig) {
392
- throw new Error("serve() is only available in local mode");
393
- }
394
265
  if (!opts.agent || typeof opts.agent !== "object") {
395
266
  throw new TypeError("agent is required. Use phone.agent() to create one.");
396
267
  }
@@ -415,10 +286,13 @@ var Patter = class {
415
286
  if (wantsCloudflared && webhookUrl) {
416
287
  throw new Error("Cannot use both tunnel: true and webhookUrl. Pick one.");
417
288
  }
289
+ const { showBanner } = await import("./banner-3GNZ6VQK.mjs");
290
+ showBanner();
418
291
  if (wantsCloudflared) {
419
- const { startTunnel: startTunnel2 } = await import("./tunnel-O7ICMSTP.mjs");
292
+ const { startTunnel: startTunnel2 } = await import("./tunnel-UVR3PPAU.mjs");
420
293
  this.tunnelHandle = await startTunnel2(port);
421
294
  webhookUrl = this.tunnelHandle.hostname;
295
+ this.localConfig = { ...this.localConfig, webhookUrl };
422
296
  }
423
297
  if (!webhookUrl) {
424
298
  throw new Error(
@@ -427,7 +301,7 @@ var Patter = class {
427
301
  }
428
302
  const carrier = this.localConfig.carrier;
429
303
  const telephonyProvider = carrier.kind === "twilio" ? "twilio" : "telnyx";
430
- const { autoConfigureCarrier } = await import("./carrier-config-CPG5CROM.mjs");
304
+ const { autoConfigureCarrier } = await import("./carrier-config-33HQ2W4V.mjs");
431
305
  await autoConfigureCarrier({
432
306
  telephonyProvider,
433
307
  twilioSid: carrier.kind === "twilio" ? carrier.accountSid : void 0,
@@ -464,138 +338,56 @@ var Patter = class {
464
338
  await this.embeddedServer.start(port);
465
339
  }
466
340
  async test(opts) {
467
- if (this.mode !== "local") {
468
- throw new Error("test() is only available in local mode");
469
- }
470
- const { TestSession: TestSession2 } = await import("./test-mode-JZMYE5HY.mjs");
341
+ const { TestSession: TestSession2 } = await import("./test-mode-MVJ3SKG4.mjs");
471
342
  const session = new TestSession2();
472
343
  await session.run({
473
344
  agent: opts.agent,
474
- openaiKey: this.localConfig?.openaiKey,
345
+ openaiKey: this.localConfig.openaiKey,
475
346
  onMessage: typeof opts.onMessage === "function" ? opts.onMessage : void 0,
476
347
  onCallStart: opts.onCallStart,
477
348
  onCallEnd: opts.onCallEnd
478
349
  });
479
350
  }
480
- // === Cloud mode legacy ===
481
- async connect(options) {
482
- if (options.provider && options.providerKey && options.number) {
483
- await this.registerNumber(
484
- options.provider,
485
- options.providerKey,
486
- options.number,
487
- options.providerSecret,
488
- options.country ?? "US",
489
- options.stt,
490
- options.tts
491
- );
492
- }
493
- await this.connection.connect({
494
- onMessage: options.onMessage,
495
- onCallStart: options.onCallStart,
496
- onCallEnd: options.onCallEnd
497
- });
498
- }
499
351
  async call(options) {
500
- if (this.mode === "local") {
501
- const localOpts = options;
502
- if (!localOpts.to) {
503
- throw new Error("'to' phone number is required");
504
- }
505
- if (!localOpts.to.startsWith("+")) {
506
- throw new Error(`'to' must be in E.164 format (e.g., '+1234567890'). Got: '${localOpts.to}'`);
507
- }
508
- if (!this.localConfig) {
509
- throw new Error("local config missing");
510
- }
511
- const { phoneNumber, webhookUrl, carrier } = this.localConfig;
512
- if (carrier.kind === "telnyx") {
513
- const telnyxKey = carrier.apiKey;
514
- const connectionId = carrier.connectionId;
515
- const streamUrl = `wss://${webhookUrl}/ws/stream/${encodeURIComponent(localOpts.to)}?caller=${encodeURIComponent(phoneNumber)}&callee=${encodeURIComponent(localOpts.to)}`;
516
- const telnyxPayload = {
517
- connection_id: connectionId,
518
- from: phoneNumber,
519
- to: localOpts.to,
520
- stream_url: streamUrl,
521
- stream_track: "both_tracks"
522
- };
523
- if (localOpts.ringTimeout !== void 0) {
524
- telnyxPayload.timeout_secs = Math.max(1, Math.floor(localOpts.ringTimeout));
525
- }
526
- const response2 = await fetch("https://api.telnyx.com/v2/calls", {
527
- method: "POST",
528
- headers: {
529
- "Content-Type": "application/json",
530
- Authorization: `Bearer ${telnyxKey}`
531
- },
532
- body: JSON.stringify(telnyxPayload)
533
- });
534
- if (!response2.ok) {
535
- throw new ProvisionError(`Failed to initiate Telnyx call: ${await response2.text()}`);
536
- }
537
- if (this.embeddedServer) {
538
- try {
539
- const body = await response2.clone().json();
540
- const callId = body.data?.call_control_id;
541
- if (callId) {
542
- this.embeddedServer.metricsStore.recordCallInitiated({
543
- call_id: callId,
544
- caller: phoneNumber,
545
- callee: localOpts.to,
546
- direction: "outbound"
547
- });
548
- }
549
- } catch {
550
- }
551
- }
552
- return;
553
- }
554
- const twilioSid = carrier.accountSid;
555
- const twilioToken = carrier.authToken;
556
- const statusCallbackUrl = `https://${webhookUrl}/webhooks/twilio/status`;
557
- const url = `https://api.twilio.com/2010-04-01/Accounts/${twilioSid}/Calls.json`;
558
- const params = new URLSearchParams({
559
- To: localOpts.to,
560
- From: phoneNumber,
561
- Url: `https://${webhookUrl}/webhooks/twilio/voice`,
562
- StatusCallback: statusCallbackUrl,
563
- StatusCallbackMethod: "POST",
564
- // Full lifecycle so the dashboard sees ringing/no-answer/busy/failed
565
- // transitions even when media never arrives.
566
- StatusCallbackEvent: "initiated ringing answered completed"
567
- });
568
- if (localOpts.machineDetection) {
569
- params.append("MachineDetection", "DetectMessageEnd");
570
- params.append("AsyncAmd", "true");
571
- params.append("AsyncAmdStatusCallback", `https://${webhookUrl}/webhooks/twilio/amd`);
572
- }
573
- if (localOpts.ringTimeout !== void 0) {
574
- params.append("Timeout", String(Math.max(1, Math.floor(localOpts.ringTimeout))));
575
- }
576
- if (localOpts.voicemailMessage && this.embeddedServer) {
577
- this.embeddedServer.voicemailMessage = localOpts.voicemailMessage;
352
+ if (!options.to) {
353
+ throw new Error("'to' phone number is required");
354
+ }
355
+ if (!options.to.startsWith("+")) {
356
+ throw new Error(`'to' must be in E.164 format (e.g., '+1234567890'). Got: '${options.to}'`);
357
+ }
358
+ const { phoneNumber, webhookUrl, carrier } = this.localConfig;
359
+ const effectiveRingTimeout = options.ringTimeout === void 0 ? 25 : options.ringTimeout;
360
+ if (carrier.kind === "telnyx") {
361
+ const telnyxKey = carrier.apiKey;
362
+ const connectionId = carrier.connectionId;
363
+ const telnyxPayload = {
364
+ connection_id: connectionId,
365
+ from: phoneNumber,
366
+ to: options.to
367
+ };
368
+ if (effectiveRingTimeout !== null && effectiveRingTimeout !== void 0) {
369
+ telnyxPayload.timeout_secs = Math.max(1, Math.floor(effectiveRingTimeout));
578
370
  }
579
- const response = await fetch(url, {
371
+ const response2 = await fetch("https://api.telnyx.com/v2/calls", {
580
372
  method: "POST",
581
373
  headers: {
582
- "Content-Type": "application/x-www-form-urlencoded",
583
- Authorization: `Basic ${Buffer.from(`${twilioSid}:${twilioToken}`).toString("base64")}`
374
+ "Content-Type": "application/json",
375
+ Authorization: `Bearer ${telnyxKey}`
584
376
  },
585
- body: params.toString()
377
+ body: JSON.stringify(telnyxPayload)
586
378
  });
587
- if (!response.ok) {
588
- throw new ProvisionError(`Failed to initiate call: ${await response.text()}`);
379
+ if (!response2.ok) {
380
+ throw new ProvisionError(`Failed to initiate Telnyx call: ${await response2.text()}`);
589
381
  }
590
382
  if (this.embeddedServer) {
591
383
  try {
592
- const body = await response.clone().json();
593
- const callSid = body.sid;
594
- if (callSid) {
384
+ const body = await response2.clone().json();
385
+ const callId = body.data?.call_control_id;
386
+ if (callId) {
595
387
  this.embeddedServer.metricsStore.recordCallInitiated({
596
- call_id: callSid,
388
+ call_id: callId,
597
389
  caller: phoneNumber,
598
- callee: localOpts.to,
390
+ callee: options.to,
599
391
  direction: "outbound"
600
392
  });
601
393
  }
@@ -604,21 +396,59 @@ var Patter = class {
604
396
  }
605
397
  return;
606
398
  }
607
- const cloudOpts = options;
608
- if (!this.connection.isConnected) {
609
- if (cloudOpts.onMessage) {
610
- await this.connection.connect({ onMessage: cloudOpts.onMessage });
611
- } else {
612
- throw new PatterConnectionError(
613
- "Not connected. Call connect() first or pass onMessage."
614
- );
399
+ const twilioSid = carrier.accountSid;
400
+ const twilioToken = carrier.authToken;
401
+ const statusCallbackUrl = `https://${webhookUrl}/webhooks/twilio/status`;
402
+ const url = `https://api.twilio.com/2010-04-01/Accounts/${twilioSid}/Calls.json`;
403
+ const streamUrl = `wss://${webhookUrl}/ws/stream/outbound`;
404
+ const inlineTwiml = `<?xml version="1.0" encoding="UTF-8"?><Response><Connect><Stream url="${streamUrl}"/></Connect></Response>`;
405
+ const params = new URLSearchParams({
406
+ To: options.to,
407
+ From: phoneNumber,
408
+ Twiml: inlineTwiml,
409
+ StatusCallback: statusCallbackUrl,
410
+ StatusCallbackMethod: "POST",
411
+ // Full lifecycle so the dashboard sees ringing/no-answer/busy/failed
412
+ // transitions even when media never arrives.
413
+ StatusCallbackEvent: "initiated ringing answered completed"
414
+ });
415
+ if (options.machineDetection) {
416
+ params.append("MachineDetection", "DetectMessageEnd");
417
+ params.append("AsyncAmd", "true");
418
+ params.append("AsyncAmdStatusCallback", `https://${webhookUrl}/webhooks/twilio/amd`);
419
+ }
420
+ if (effectiveRingTimeout !== null && effectiveRingTimeout !== void 0) {
421
+ params.append("Timeout", String(Math.max(1, Math.floor(effectiveRingTimeout))));
422
+ }
423
+ if (options.voicemailMessage && this.embeddedServer) {
424
+ this.embeddedServer.voicemailMessage = options.voicemailMessage;
425
+ }
426
+ const response = await fetch(url, {
427
+ method: "POST",
428
+ headers: {
429
+ "Content-Type": "application/x-www-form-urlencoded",
430
+ Authorization: `Basic ${Buffer.from(`${twilioSid}:${twilioToken}`).toString("base64")}`
431
+ },
432
+ body: params.toString()
433
+ });
434
+ if (!response.ok) {
435
+ throw new ProvisionError(`Failed to initiate call: ${await response.text()}`);
436
+ }
437
+ if (this.embeddedServer) {
438
+ try {
439
+ const body = await response.clone().json();
440
+ const callSid = body.sid;
441
+ if (callSid) {
442
+ this.embeddedServer.metricsStore.recordCallInitiated({
443
+ call_id: callSid,
444
+ caller: phoneNumber,
445
+ callee: options.to,
446
+ direction: "outbound"
447
+ });
448
+ }
449
+ } catch {
615
450
  }
616
451
  }
617
- await this.connection.requestCall(
618
- cloudOpts.fromNumber ?? "",
619
- cloudOpts.to,
620
- cloudOpts.firstMessage ?? ""
621
- );
622
452
  }
623
453
  async disconnect() {
624
454
  if (this.tunnelHandle) {
@@ -629,86 +459,6 @@ var Patter = class {
629
459
  await this.embeddedServer.stop();
630
460
  this.embeddedServer = null;
631
461
  }
632
- await this.connection.disconnect();
633
- }
634
- // === Agent Management ===
635
- async createAgent(opts) {
636
- const response = await fetch(`${this.restUrl}/api/agents`, {
637
- method: "POST",
638
- headers: { "Content-Type": "application/json", "X-API-Key": this.apiKey },
639
- body: JSON.stringify({
640
- name: opts.name,
641
- system_prompt: opts.systemPrompt,
642
- model: opts.model ?? "gpt-4o-mini-realtime-preview",
643
- voice: opts.voice ?? "alloy",
644
- voice_provider: opts.voiceProvider ?? "openai",
645
- language: opts.language ?? "en",
646
- first_message: opts.firstMessage ?? null,
647
- tools: opts.tools?.map((t) => ({ name: t.name, description: t.description, parameters: t.parameters, webhook_url: t.webhookUrl })) ?? null
648
- })
649
- });
650
- if (response.status !== 201) throw new ProvisionError(`Failed to create agent: ${await response.text()}`);
651
- const data = await response.json();
652
- return { id: data.id, name: data.name, systemPrompt: data.system_prompt, model: data.model, voice: data.voice, voiceProvider: data.voice_provider, language: data.language, firstMessage: data.first_message, tools: data.tools };
653
- }
654
- async listAgents() {
655
- const response = await fetch(`${this.restUrl}/api/agents`, { headers: { "X-API-Key": this.apiKey } });
656
- if (!response.ok) throw new ProvisionError(`Failed to list agents: ${response.status}`);
657
- const data = await response.json();
658
- return data.map((a) => ({ id: a.id, name: a.name, systemPrompt: a.system_prompt, model: a.model, voice: a.voice, voiceProvider: a.voice_provider, language: a.language, firstMessage: a.first_message, tools: a.tools }));
659
- }
660
- async buyNumber(opts = {}) {
661
- const response = await fetch(`${this.restUrl}/api/numbers/buy`, {
662
- method: "POST",
663
- headers: { "Content-Type": "application/json", "X-API-Key": this.apiKey },
664
- body: JSON.stringify({ country: opts.country ?? "US", provider: opts.provider ?? "twilio" })
665
- });
666
- if (response.status !== 201) throw new ProvisionError(`Failed to buy number: ${await response.text()}`);
667
- const data = await response.json();
668
- return { id: data.id, number: data.number, provider: data.provider, country: data.country, status: data.status, agentId: data.agent_id };
669
- }
670
- async assignAgent(numberId, agentId) {
671
- const response = await fetch(`${this.restUrl}/api/phone-numbers/${numberId}/assign-agent`, {
672
- method: "POST",
673
- headers: { "Content-Type": "application/json", "X-API-Key": this.apiKey },
674
- body: JSON.stringify({ agent_id: agentId })
675
- });
676
- if (response.status !== 200) throw new ProvisionError(`Failed to assign agent: ${await response.text()}`);
677
- }
678
- async listCalls(limit = 50) {
679
- if (!Number.isInteger(limit) || limit < 1 || limit > 1e3) {
680
- throw new RangeError(`limit must be an integer between 1 and 1000, got ${limit}`);
681
- }
682
- const response = await fetch(`${this.restUrl}/api/calls?limit=${limit}`, { headers: { "X-API-Key": this.apiKey } });
683
- if (!response.ok) throw new ProvisionError(`Failed to list calls: ${response.status}`);
684
- const data = await response.json();
685
- return data.map((c) => ({ id: c.id, direction: c.direction, caller: c.caller, callee: c.callee, startedAt: c.started_at, endedAt: c.ended_at, durationSeconds: c.duration_seconds, status: c.status, transcript: c.transcript }));
686
- }
687
- // Internal
688
- async registerNumber(provider, providerKey, number, providerSecret, country = "US", stt, tts) {
689
- const credentials = { api_key: providerKey };
690
- if (providerSecret) credentials.api_secret = providerSecret;
691
- const response = await fetch(`${this.restUrl}/api/phone-numbers`, {
692
- method: "POST",
693
- headers: {
694
- "Content-Type": "application/json",
695
- "X-API-Key": this.apiKey
696
- },
697
- body: JSON.stringify({
698
- number,
699
- provider,
700
- provider_credentials: credentials,
701
- country,
702
- stt_config: stt ? stt.toDict?.() ?? sttConfigToDict(stt) : null,
703
- tts_config: tts ? tts.toDict?.() ?? ttsConfigToDict(tts) : null
704
- })
705
- });
706
- if (response.status === 409) return;
707
- if (response.status !== 201) {
708
- throw new ProvisionError(
709
- `Failed to register number: ${await response.text()}`
710
- );
711
- }
712
462
  }
713
463
  };
714
464
 
@@ -828,6 +578,46 @@ function elevenlabs(opts) {
828
578
  function openaiTts(opts) {
829
579
  return new TTSConfigImpl("openai", opts.apiKey, opts.voice ?? "alloy");
830
580
  }
581
+ function soniox(opts) {
582
+ return new STTConfigImpl("soniox", opts.apiKey, opts.language ?? "en");
583
+ }
584
+ function speechmatics(_opts) {
585
+ throw new Error(
586
+ "speechmatics() is Python-only right now \u2014 the TS Speechmatics adapter has not shipped yet. Use the Python SDK (sdk-py) or pick another STT provider such as deepgram() / assemblyai() / soniox()."
587
+ );
588
+ }
589
+ function assemblyai(opts) {
590
+ return new STTConfigImpl("assemblyai", opts.apiKey, opts.language ?? "en");
591
+ }
592
+ function cartesia(opts) {
593
+ return new TTSConfigImpl(
594
+ "cartesia",
595
+ opts.apiKey,
596
+ opts.voice ?? "f786b574-daa5-4673-aa0c-cbe3e8534c02"
597
+ );
598
+ }
599
+ function rime(opts) {
600
+ return new TTSConfigImpl("rime", opts.apiKey, opts.voice ?? "astra");
601
+ }
602
+ function lmnt(opts) {
603
+ return new TTSConfigImpl("lmnt", opts.apiKey, opts.voice ?? "leah");
604
+ }
605
+ function ultravox(opts) {
606
+ return {
607
+ provider: "ultravox",
608
+ apiKey: opts.apiKey,
609
+ model: opts.model,
610
+ voice: opts.voice
611
+ };
612
+ }
613
+ function geminiLive(opts) {
614
+ return {
615
+ provider: "gemini_live",
616
+ apiKey: opts.apiKey,
617
+ model: opts.model,
618
+ voice: opts.voice
619
+ };
620
+ }
831
621
 
832
622
  // src/fallback-provider.ts
833
623
  var AllProvidersFailedError = class extends Error {
@@ -1026,13 +816,275 @@ var FallbackLLMProvider = class {
1026
816
  }
1027
817
  };
1028
818
 
819
+ // src/integrations/patter-tool.ts
820
+ import { EventEmitter } from "events";
821
+ var PARAMETERS_SCHEMA = {
822
+ type: "object",
823
+ properties: {
824
+ to: {
825
+ type: "string",
826
+ description: 'Destination phone number in E.164 format (e.g. "+15551234567"). Required.'
827
+ },
828
+ goal: {
829
+ type: "string",
830
+ description: "What the agent should accomplish on the call. Becomes the in-call agent's system prompt for this single call."
831
+ },
832
+ first_message: {
833
+ type: "string",
834
+ description: "Optional first message the agent speaks when the callee answers. Defaults to a generic greeting."
835
+ },
836
+ max_duration_sec: {
837
+ type: "integer",
838
+ description: "Hard timeout for the call in seconds. Default 180. The call is force-ended at this deadline whether or not it has resolved.",
839
+ minimum: 5,
840
+ maximum: 1800
841
+ }
842
+ },
843
+ required: ["to"]
844
+ };
845
+ var DEFAULT_NAME = "make_phone_call";
846
+ var DEFAULT_DESCRIPTION = "Place a real outbound phone call. Returns a JSON object with the full transcript, call status, duration in seconds, and cost. Use this when the user asks you to call someone, schedule appointments by phone, or otherwise reach a human via voice.";
847
+ var PatterTool = class _PatterTool {
848
+ name;
849
+ description;
850
+ phone;
851
+ agent;
852
+ maxDurationSec;
853
+ recording;
854
+ started = false;
855
+ /** Resolver for the next `call_initiated` SSE event. Only set inside the
856
+ * dial mutex (`dialQueue`), so two parallel `execute()` calls never share
857
+ * it and never lose a dispatch. */
858
+ pendingDial = null;
859
+ /** Mutex that serializes the dial → call_id capture critical section.
860
+ * Each `execute()` chains a continuation onto this promise so the
861
+ * `pendingDial` slot is owned by exactly one caller at a time. */
862
+ dialQueue = Promise.resolve();
863
+ /** Captured SSE listener so `stop()` can detach it (prevents leaks when
864
+ * the underlying Patter instance outlives this tool). */
865
+ sseListener = null;
866
+ /** Captured Patter metrics store, for cleanup in `stop()`. */
867
+ metricsStoreRef = null;
868
+ /** call_id → pending promise machinery. */
869
+ pending = /* @__PURE__ */ new Map();
870
+ bus = new EventEmitter();
871
+ /** How long to wait for the `call_initiated` SSE before failing the dial. */
872
+ static DIAL_CAPTURE_TIMEOUT_MS = 1e4;
873
+ constructor(opts) {
874
+ if (!opts.phone) {
875
+ throw new Error("PatterTool: `phone` (a Patter instance) is required.");
876
+ }
877
+ this.phone = opts.phone;
878
+ this.agent = opts.agent;
879
+ this.name = opts.name ?? DEFAULT_NAME;
880
+ this.description = opts.description ?? DEFAULT_DESCRIPTION;
881
+ this.maxDurationSec = Math.max(5, Math.min(1800, opts.maxDurationSec ?? 180));
882
+ this.recording = opts.recording ?? false;
883
+ }
884
+ // --- Schema exporters ---------------------------------------------------
885
+ /** OpenAI Chat Completions / Assistants tool spec. */
886
+ openaiSchema() {
887
+ return {
888
+ type: "function",
889
+ function: {
890
+ name: this.name,
891
+ description: this.description,
892
+ parameters: PARAMETERS_SCHEMA
893
+ }
894
+ };
895
+ }
896
+ /** Anthropic Messages API tool spec. */
897
+ anthropicSchema() {
898
+ return {
899
+ name: this.name,
900
+ description: this.description,
901
+ input_schema: PARAMETERS_SCHEMA
902
+ };
903
+ }
904
+ /**
905
+ * Hermes Agent (Nous Research) registry schema. Same JSON-Schema shape as
906
+ * Anthropic's; Hermes consumes it via `registry.register({ schema: ... })`.
907
+ */
908
+ hermesSchema() {
909
+ return {
910
+ name: this.name,
911
+ description: this.description,
912
+ parameters: PARAMETERS_SCHEMA
913
+ };
914
+ }
915
+ // --- Lifecycle ----------------------------------------------------------
916
+ /** Start the underlying Patter server. Idempotent. */
917
+ async start() {
918
+ if (this.started) return;
919
+ if (!this.agent) {
920
+ throw new Error(
921
+ "PatterTool.start: `agent` config is required. Pass `{ stt, llm, tts }` or an `engine` (e.g. OpenAIRealtime) when constructing PatterTool."
922
+ );
923
+ }
924
+ const builtAgent = this.phone.agent(this.agent);
925
+ await this.phone.serve({
926
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
927
+ agent: builtAgent,
928
+ recording: this.recording,
929
+ onCallEnd: this.onCallEndHandler.bind(this)
930
+ });
931
+ const store = this.phone.metricsStore;
932
+ if (!store) {
933
+ throw new Error(
934
+ "PatterTool.start: phone.metricsStore is null after serve() \u2014 is the dashboard disabled?"
935
+ );
936
+ }
937
+ const listener = (event) => {
938
+ if (event.type === "call_initiated" && this.pendingDial) {
939
+ const callId = event.data.call_id || "";
940
+ if (callId) {
941
+ const dispatch = this.pendingDial;
942
+ this.pendingDial = null;
943
+ dispatch(callId);
944
+ }
945
+ }
946
+ };
947
+ store.on("sse", listener);
948
+ this.sseListener = listener;
949
+ this.metricsStoreRef = store;
950
+ this.started = true;
951
+ }
952
+ /** Stop the underlying Patter server (and reject any pending calls). */
953
+ async stop() {
954
+ if (!this.started) return;
955
+ if (this.metricsStoreRef && this.sseListener) {
956
+ this.metricsStoreRef.off("sse", this.sseListener);
957
+ }
958
+ this.sseListener = null;
959
+ this.metricsStoreRef = null;
960
+ this.pendingDial = null;
961
+ for (const [, p] of this.pending) {
962
+ clearTimeout(p.timer);
963
+ p.reject(new Error("PatterTool: shutdown while call pending"));
964
+ }
965
+ this.pending.clear();
966
+ const stoppable = this.phone;
967
+ if (typeof stoppable.stop === "function") {
968
+ await stoppable.stop();
969
+ }
970
+ this.started = false;
971
+ }
972
+ // --- Execution ----------------------------------------------------------
973
+ async execute(args) {
974
+ if (!this.started) await this.start();
975
+ if (!args || typeof args.to !== "string" || !args.to.startsWith("+")) {
976
+ throw new Error('PatterTool.execute: `to` must be an E.164 phone number (e.g. "+15551234567").');
977
+ }
978
+ const timeoutSec = Math.max(
979
+ 5,
980
+ Math.min(1800, args.max_duration_sec ?? this.maxDurationSec)
981
+ );
982
+ const baseAgent = this.agent ?? {};
983
+ const overrideAgent = this.phone.agent({
984
+ ...baseAgent,
985
+ ...args.goal !== void 0 ? { systemPrompt: args.goal } : {},
986
+ ...args.first_message !== void 0 ? { firstMessage: args.first_message } : {}
987
+ });
988
+ const callId = await this.acquireCallId(args.to, overrideAgent);
989
+ return new Promise((resolve, reject) => {
990
+ const timer = setTimeout(() => {
991
+ this.pending.delete(callId);
992
+ reject(new Error(`PatterTool.execute: call ${callId} exceeded ${timeoutSec}s timeout`));
993
+ }, timeoutSec * 1e3);
994
+ this.pending.set(callId, {
995
+ resolve,
996
+ reject,
997
+ timer,
998
+ startedAt: Date.now() / 1e3
999
+ });
1000
+ });
1001
+ }
1002
+ /** Issue the outbound dial under the mutex and return its assigned call_id. */
1003
+ async acquireCallId(to, agent) {
1004
+ let release;
1005
+ const slot = new Promise((r) => {
1006
+ release = r;
1007
+ });
1008
+ const previous = this.dialQueue;
1009
+ this.dialQueue = previous.then(() => slot);
1010
+ await previous;
1011
+ let captureTimer = null;
1012
+ try {
1013
+ const callIdPromise = new Promise((resolve, reject) => {
1014
+ this.pendingDial = resolve;
1015
+ captureTimer = setTimeout(() => {
1016
+ this.pendingDial = null;
1017
+ reject(
1018
+ new Error(
1019
+ `PatterTool.execute: did not observe call_initiated within ${_PatterTool.DIAL_CAPTURE_TIMEOUT_MS}ms`
1020
+ )
1021
+ );
1022
+ }, _PatterTool.DIAL_CAPTURE_TIMEOUT_MS);
1023
+ });
1024
+ await this.phone.call({
1025
+ to,
1026
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
1027
+ agent
1028
+ });
1029
+ const callId = await callIdPromise;
1030
+ if (captureTimer) clearTimeout(captureTimer);
1031
+ return callId;
1032
+ } finally {
1033
+ if (captureTimer) clearTimeout(captureTimer);
1034
+ this.pendingDial = null;
1035
+ release();
1036
+ }
1037
+ }
1038
+ /**
1039
+ * Hermes-style handler: `(args, kwargs) => Promise<string>` returning a JSON
1040
+ * string with either the result envelope or an `{"error": "..."}` payload.
1041
+ * Mirrors the Python `PatterTool.hermes_handler` so cross-SDK adapters share
1042
+ * the same wire contract.
1043
+ */
1044
+ hermesHandler() {
1045
+ return async (args) => {
1046
+ try {
1047
+ const result = await this.execute(args);
1048
+ return JSON.stringify(result);
1049
+ } catch (err) {
1050
+ return JSON.stringify({ error: err instanceof Error ? err.message : String(err) });
1051
+ }
1052
+ };
1053
+ }
1054
+ // --- Internal: onCallEnd dispatcher -------------------------------------
1055
+ async onCallEndHandler(data) {
1056
+ const callId = data.call_id || "";
1057
+ if (!callId) return;
1058
+ const pending = this.pending.get(callId);
1059
+ if (!pending) {
1060
+ this.bus.emit("orphan_end", { call_id: callId, data });
1061
+ return;
1062
+ }
1063
+ clearTimeout(pending.timer);
1064
+ this.pending.delete(callId);
1065
+ const metrics = data.metrics && typeof data.metrics === "object" ? data.metrics : null;
1066
+ const cost = metrics && typeof metrics.cost === "object" && metrics.cost && typeof metrics.cost.total === "number" ? metrics.cost.total : void 0;
1067
+ const duration = typeof metrics?.duration_seconds === "number" ? metrics?.duration_seconds : Math.max(0, Date.now() / 1e3 - pending.startedAt);
1068
+ const transcript = Array.isArray(data.transcript) ? data.transcript : [];
1069
+ const status = data.status || "completed";
1070
+ pending.resolve({
1071
+ call_id: callId,
1072
+ status,
1073
+ duration_seconds: duration,
1074
+ cost_usd: cost,
1075
+ transcript,
1076
+ metrics
1077
+ });
1078
+ }
1079
+ };
1080
+
1029
1081
  // src/providers/gemini-live.ts
1030
1082
  var GEMINI_DEFAULT_INPUT_SR = 16e3;
1031
1083
  var GEMINI_DEFAULT_OUTPUT_SR = 24e3;
1032
1084
  var GeminiLiveAdapter = class {
1033
1085
  constructor(apiKey, options = {}) {
1034
1086
  this.apiKey = apiKey;
1035
- this.model = options.model ?? "gemini-2.0-flash-exp";
1087
+ this.model = options.model ?? "gemini-2.5-flash-native-audio-preview-09-2025";
1036
1088
  this.voice = options.voice ?? "Puck";
1037
1089
  this.instructions = options.instructions ?? "";
1038
1090
  this.language = options.language ?? "en-US";
@@ -1055,18 +1107,27 @@ var GeminiLiveAdapter = class {
1055
1107
  receiveLoop = null;
1056
1108
  handlers = [];
1057
1109
  running = false;
1110
+ /**
1111
+ * Tracks call_id -> function name so tool responses can be sent back with
1112
+ * the correct `name` field (Gemini expects the original function name,
1113
+ * not the call_id).
1114
+ */
1115
+ pendingToolCalls = /* @__PURE__ */ new Map();
1058
1116
  async connect() {
1059
1117
  let genaiModule;
1060
1118
  try {
1061
1119
  const modName = "@google/genai";
1062
1120
  genaiModule = await import(modName);
1063
- } catch (err) {
1121
+ } catch {
1064
1122
  throw new Error(
1065
- "Gemini Live requires the '@google/genai' package. Install with: npm install @google/genai"
1123
+ '\nGemini Live requires the "@google/genai" package, which is not installed.\n\n Install: npm install @google/genai\n\nThis is an optional peer dependency of getpatter \u2014 it is only needed when\nyou use GeminiLive as an agent engine. Other LLM/engine providers do not\nrequire it.\n'
1066
1124
  );
1067
1125
  }
1068
1126
  const { GoogleGenAI } = genaiModule;
1069
- this.client = new GoogleGenAI({ apiKey: this.apiKey });
1127
+ this.client = new GoogleGenAI({
1128
+ apiKey: this.apiKey,
1129
+ httpOptions: { apiVersion: "v1alpha" }
1130
+ });
1070
1131
  const config = {
1071
1132
  responseModalities: ["AUDIO"],
1072
1133
  speechConfig: {
@@ -1123,9 +1184,11 @@ var GeminiLiveAdapter = class {
1123
1184
  async sendFunctionResult(callId, result) {
1124
1185
  if (!this.session) return;
1125
1186
  const sess = this.session;
1187
+ const name = this.pendingToolCalls.get(callId) ?? callId;
1188
+ this.pendingToolCalls.delete(callId);
1126
1189
  await sess.sendToolResponse?.({
1127
1190
  functionResponses: [
1128
- { id: callId, name: callId, response: { result } }
1191
+ { id: callId, name, response: { result } }
1129
1192
  ]
1130
1193
  });
1131
1194
  }
@@ -1169,9 +1232,14 @@ var GeminiLiveAdapter = class {
1169
1232
  if (r.toolCall) {
1170
1233
  for (const fn of r.toolCall.functionCalls ?? []) {
1171
1234
  const args = fn.args ?? {};
1235
+ const callId = fn.id ?? "";
1236
+ const fnName = fn.name ?? "";
1237
+ if (callId && fnName) {
1238
+ this.pendingToolCalls.set(callId, fnName);
1239
+ }
1172
1240
  await this.emit("function_call", {
1173
- call_id: fn.id ?? "",
1174
- name: fn.name ?? "",
1241
+ call_id: callId,
1242
+ name: fnName,
1175
1243
  arguments: typeof args === "string" ? args : JSON.stringify(args)
1176
1244
  });
1177
1245
  }
@@ -1198,11 +1266,12 @@ var GeminiLiveAdapter = class {
1198
1266
  await this.receiveLoop.catch(() => void 0);
1199
1267
  this.receiveLoop = null;
1200
1268
  }
1269
+ this.pendingToolCalls.clear();
1201
1270
  }
1202
1271
  };
1203
1272
 
1204
1273
  // src/providers/ultravox-realtime.ts
1205
- import WebSocket2 from "ws";
1274
+ import WebSocket from "ws";
1206
1275
  var ULTRAVOX_DEFAULT_API_BASE = "https://api.ultravox.ai/api";
1207
1276
  var ULTRAVOX_DEFAULT_SR = 16e3;
1208
1277
  var UltravoxRealtimeAdapter = class {
@@ -1239,7 +1308,6 @@ var UltravoxRealtimeAdapter = class {
1239
1308
  outputSampleRate: this.sampleRate
1240
1309
  }
1241
1310
  },
1242
- firstSpeaker: this.firstMessage ? "FIRST_SPEAKER_AGENT" : "FIRST_SPEAKER_USER",
1243
1311
  recordingEnabled: false
1244
1312
  };
1245
1313
  if (this.voice) body.voice = this.voice;
@@ -1249,6 +1317,8 @@ var UltravoxRealtimeAdapter = class {
1249
1317
  body.initialMessages = [
1250
1318
  { role: "MESSAGE_ROLE_AGENT", text: this.firstMessage }
1251
1319
  ];
1320
+ } else {
1321
+ body.firstSpeaker = "FIRST_SPEAKER_USER";
1252
1322
  }
1253
1323
  if (this.tools?.length) {
1254
1324
  body.selectedTools = this.tools.map((t) => ({
@@ -1273,7 +1343,7 @@ var UltravoxRealtimeAdapter = class {
1273
1343
  }
1274
1344
  const call = await resp.json();
1275
1345
  if (!call.joinUrl) throw new Error("Ultravox response missing joinUrl");
1276
- this.ws = new WebSocket2(call.joinUrl);
1346
+ this.ws = new WebSocket(call.joinUrl);
1277
1347
  await new Promise((resolve, reject) => {
1278
1348
  const ws = this.ws;
1279
1349
  const onOpen = () => {
@@ -1298,14 +1368,16 @@ var UltravoxRealtimeAdapter = class {
1298
1368
  });
1299
1369
  }
1300
1370
  sendAudio(pcm) {
1301
- if (!this.ws || this.ws.readyState !== WebSocket2.OPEN) return;
1371
+ if (!this.ws || this.ws.readyState !== WebSocket.OPEN) return;
1302
1372
  this.ws.send(pcm, { binary: true });
1303
1373
  }
1304
1374
  async sendText(text) {
1305
- this.ws?.send(JSON.stringify({ type: "input_text_message", text }));
1375
+ if (!this.ws || this.ws.readyState !== WebSocket.OPEN) return;
1376
+ this.ws.send(JSON.stringify({ type: "input_text_message", text }));
1306
1377
  }
1307
1378
  async sendFunctionResult(callId, result) {
1308
- this.ws?.send(
1379
+ if (!this.ws || this.ws.readyState !== WebSocket.OPEN) return;
1380
+ this.ws.send(
1309
1381
  JSON.stringify({
1310
1382
  type: "client_tool_result",
1311
1383
  invocationId: callId,
@@ -1315,7 +1387,8 @@ var UltravoxRealtimeAdapter = class {
1315
1387
  );
1316
1388
  }
1317
1389
  cancelResponse() {
1318
- this.ws?.send(JSON.stringify({ type: "playback_clear_buffer" }));
1390
+ if (!this.ws || this.ws.readyState !== WebSocket.OPEN) return;
1391
+ this.ws.send(JSON.stringify({ type: "playback_clear_buffer" }));
1319
1392
  }
1320
1393
  onEvent(handler) {
1321
1394
  this.handlers.push(handler);
@@ -1396,7 +1469,7 @@ async function loadCron() {
1396
1469
  try {
1397
1470
  const imported = await import(
1398
1471
  /* @vite-ignore */
1399
- "./node-cron-373UVDIO.mjs"
1472
+ "./node-cron-6PRPSBG5.mjs"
1400
1473
  );
1401
1474
  cronModule = imported && imported.default ? imported.default : imported;
1402
1475
  return cronModule;
@@ -1504,6 +1577,7 @@ function scheduleInterval(intervalOrOpts, callback) {
1504
1577
 
1505
1578
  // src/stt/deepgram.ts
1506
1579
  var STT = class extends DeepgramSTT {
1580
+ static providerKey = "deepgram";
1507
1581
  constructor(opts = {}) {
1508
1582
  const key = opts.apiKey ?? process.env.DEEPGRAM_API_KEY;
1509
1583
  if (!key) {
@@ -1531,6 +1605,7 @@ var STT = class extends DeepgramSTT {
1531
1605
  // src/providers/whisper-stt.ts
1532
1606
  var OPENAI_TRANSCRIPTION_URL = "https://api.openai.com/v1/audio/transcriptions";
1533
1607
  var DEFAULT_BUFFER_SIZE = 16e3 * 2;
1608
+ var ALLOWED_MODELS = /* @__PURE__ */ new Set(["whisper-1", "gpt-4o-transcribe", "gpt-4o-mini-transcribe"]);
1534
1609
  function wrapPcmInWav(pcm, sampleRate = 16e3, channels = 1, bitsPerSample = 16) {
1535
1610
  const dataSize = pcm.length;
1536
1611
  const header = Buffer.alloc(44);
@@ -1554,33 +1629,63 @@ var WhisperSTT = class _WhisperSTT {
1554
1629
  model;
1555
1630
  language;
1556
1631
  bufferSize;
1557
- buffer = Buffer.alloc(0);
1558
- callbacks = [];
1632
+ responseFormat;
1633
+ // Accumulate chunks in an array and concat once on flush — avoids the
1634
+ // per-``sendAudio`` O(n) ``Buffer.concat([buffer, chunk])`` that quickly
1635
+ // dominates CPU when the phone leg delivers 20 ms frames.
1636
+ chunks = [];
1637
+ bufferedBytes = 0;
1638
+ callbacks = /* @__PURE__ */ new Set();
1559
1639
  running = false;
1560
1640
  pendingTranscriptions = [];
1561
- constructor(apiKey, model = "whisper-1", language, bufferSize = DEFAULT_BUFFER_SIZE) {
1641
+ /**
1642
+ * @param apiKey OpenAI API key.
1643
+ * @param language ISO-639-1 language code (e.g. ``"en"``, ``"it"``). Optional.
1644
+ * @param model One of ``whisper-1``, ``gpt-4o-transcribe``, ``gpt-4o-mini-transcribe``.
1645
+ * @param bufferSize Bytes of PCM16 to buffer before each transcription request.
1646
+ * @param responseFormat ``"json"`` (default) or ``"verbose_json"``.
1647
+ *
1648
+ * Argument order matches the Python SDK's ``WhisperSTT(api_key, language, model, response_format)``
1649
+ * for cross-language parity. Pre-0.5.3 the TS positional order was
1650
+ * ``(apiKey, model, language, bufferSize, responseFormat)`` — callers using
1651
+ * the old order will need to swap ``language`` and ``model``.
1652
+ */
1653
+ constructor(apiKey, language, model = "whisper-1", bufferSize = DEFAULT_BUFFER_SIZE, responseFormat = "json") {
1654
+ if (!ALLOWED_MODELS.has(model)) {
1655
+ throw new Error(
1656
+ `WhisperSTT: unsupported model "${model}". Expected one of ${[...ALLOWED_MODELS].join(", ")}.`
1657
+ );
1658
+ }
1562
1659
  this.apiKey = apiKey;
1563
1660
  this.model = model;
1564
1661
  this.language = language;
1565
1662
  this.bufferSize = bufferSize;
1663
+ this.responseFormat = responseFormat;
1566
1664
  }
1567
1665
  /** Factory for Twilio calls — mulaw 8 kHz is transcoded upstream, so we still receive PCM 16-bit. */
1568
1666
  static forTwilio(apiKey, language = "en", model = "whisper-1") {
1569
- return new _WhisperSTT(apiKey, model, language);
1667
+ return new _WhisperSTT(apiKey, language, model);
1570
1668
  }
1571
1669
  async connect() {
1572
1670
  this.running = true;
1573
- this.buffer = Buffer.alloc(0);
1671
+ this.chunks = [];
1672
+ this.bufferedBytes = 0;
1574
1673
  }
1575
1674
  sendAudio(audio) {
1576
1675
  if (!this.running) return;
1577
- this.buffer = Buffer.concat([this.buffer, audio]);
1578
- if (this.buffer.length >= this.bufferSize) {
1579
- const pcm = this.buffer;
1580
- this.buffer = Buffer.alloc(0);
1676
+ this.chunks.push(audio);
1677
+ this.bufferedBytes += audio.length;
1678
+ if (this.bufferedBytes >= this.bufferSize) {
1679
+ const pcm = this.flushChunks();
1581
1680
  this.trackTranscription(this.transcribeBuffer(pcm));
1582
1681
  }
1583
1682
  }
1683
+ flushChunks() {
1684
+ const pcm = this.chunks.length === 1 ? this.chunks[0] : Buffer.concat(this.chunks, this.bufferedBytes);
1685
+ this.chunks = [];
1686
+ this.bufferedBytes = 0;
1687
+ return pcm;
1688
+ }
1584
1689
  trackTranscription(promise) {
1585
1690
  const wrapped = promise.finally(() => {
1586
1691
  const idx = this.pendingTranscriptions.indexOf(wrapped);
@@ -1588,25 +1693,25 @@ var WhisperSTT = class _WhisperSTT {
1588
1693
  });
1589
1694
  this.pendingTranscriptions.push(wrapped);
1590
1695
  }
1696
+ /**
1697
+ * Register a transcript listener. Unlike the previous implementation
1698
+ * which capped at 10 and silently replaced the last one, we now keep all
1699
+ * registered callbacks in a Set; use {@link offTranscript} to remove one.
1700
+ */
1591
1701
  onTranscript(callback) {
1592
- if (this.callbacks.length >= 10) {
1593
- getLogger().warn("WhisperSTT: maximum of 10 onTranscript callbacks reached; replacing the last callback.");
1594
- this.callbacks[this.callbacks.length - 1] = callback;
1595
- return;
1596
- }
1597
- this.callbacks.push(callback);
1702
+ this.callbacks.add(callback);
1703
+ }
1704
+ offTranscript(callback) {
1705
+ this.callbacks.delete(callback);
1598
1706
  }
1599
1707
  async close() {
1600
1708
  this.running = false;
1601
- if (this.buffer.length >= this.bufferSize / 4) {
1602
- const pcm = this.buffer;
1603
- this.buffer = Buffer.alloc(0);
1709
+ if (this.bufferedBytes > 0) {
1710
+ const pcm = this.flushChunks();
1604
1711
  this.trackTranscription(this.transcribeBuffer(pcm));
1605
- } else {
1606
- this.buffer = Buffer.alloc(0);
1607
1712
  }
1608
1713
  await Promise.allSettled(this.pendingTranscriptions);
1609
- this.callbacks = [];
1714
+ this.callbacks.clear();
1610
1715
  }
1611
1716
  // ------------------------------------------------------------------
1612
1717
  // Private
@@ -1616,6 +1721,7 @@ var WhisperSTT = class _WhisperSTT {
1616
1721
  const formData = new FormData();
1617
1722
  formData.append("file", new Blob([wav.buffer.slice(wav.byteOffset, wav.byteOffset + wav.byteLength)], { type: "audio/wav" }), "audio.wav");
1618
1723
  formData.append("model", this.model);
1724
+ formData.append("response_format", this.responseFormat);
1619
1725
  if (this.language) {
1620
1726
  formData.append("language", this.language);
1621
1727
  }
@@ -1637,7 +1743,7 @@ var WhisperSTT = class _WhisperSTT {
1637
1743
  const transcript = {
1638
1744
  text,
1639
1745
  isFinal: true,
1640
- confidence: 1
1746
+ confidence: extractConfidence(json)
1641
1747
  };
1642
1748
  for (const cb of this.callbacks) {
1643
1749
  cb(transcript);
@@ -1647,9 +1753,23 @@ var WhisperSTT = class _WhisperSTT {
1647
1753
  }
1648
1754
  }
1649
1755
  };
1756
+ function extractConfidence(payload) {
1757
+ const segments = payload.segments;
1758
+ if (!segments || segments.length === 0) return 1;
1759
+ const scores = [];
1760
+ for (const seg of segments) {
1761
+ const logp = seg.avg_logprob;
1762
+ if (typeof logp === "number") {
1763
+ scores.push(Math.max(0, Math.min(1, Math.exp(logp))));
1764
+ }
1765
+ }
1766
+ if (scores.length === 0) return 1;
1767
+ return scores.reduce((a, b) => a + b, 0) / scores.length;
1768
+ }
1650
1769
 
1651
1770
  // src/stt/whisper.ts
1652
1771
  var STT2 = class extends WhisperSTT {
1772
+ static providerKey = "whisper";
1653
1773
  constructor(opts = {}) {
1654
1774
  const key = opts.apiKey ?? process.env.OPENAI_API_KEY;
1655
1775
  if (!key) {
@@ -1657,18 +1777,53 @@ var STT2 = class extends WhisperSTT {
1657
1777
  "Whisper STT requires an apiKey. Pass { apiKey: 'sk-...' } or set OPENAI_API_KEY in the environment."
1658
1778
  );
1659
1779
  }
1660
- super(key, opts.model ?? "whisper-1", opts.language, opts.bufferSize);
1780
+ super(key, opts.language, opts.model ?? "whisper-1", opts.bufferSize, opts.responseFormat ?? "json");
1781
+ }
1782
+ };
1783
+
1784
+ // src/providers/openai-transcribe-stt.ts
1785
+ var ALLOWED_MODELS2 = /* @__PURE__ */ new Set(["gpt-4o-transcribe", "gpt-4o-mini-transcribe"]);
1786
+ var DEFAULT_BUFFER_SIZE2 = 16e3 * 2;
1787
+ var OpenAITranscribeSTT = class extends WhisperSTT {
1788
+ /**
1789
+ * @param apiKey OpenAI API key.
1790
+ * @param language ISO-639-1 language code (e.g. ``"en"``, ``"it"``). Optional.
1791
+ * @param model One of ``gpt-4o-transcribe`` (default), ``gpt-4o-mini-transcribe``.
1792
+ * ``"whisper-1"`` is intentionally rejected here — use ``WhisperSTT`` for that.
1793
+ * @param bufferSize Bytes of PCM16 to buffer before each transcription request.
1794
+ * @param responseFormat ``"json"`` (default) or ``"verbose_json"``.
1795
+ */
1796
+ constructor(apiKey, language, model = "gpt-4o-transcribe", bufferSize = DEFAULT_BUFFER_SIZE2, responseFormat = "json") {
1797
+ if (!ALLOWED_MODELS2.has(model)) {
1798
+ throw new Error(
1799
+ `OpenAITranscribeSTT: unsupported model "${model}". Expected one of ${[...ALLOWED_MODELS2].join(", ")}. For "whisper-1", use WhisperSTT instead.`
1800
+ );
1801
+ }
1802
+ super(apiKey, language, model, bufferSize, responseFormat);
1803
+ }
1804
+ };
1805
+
1806
+ // src/stt/openai-transcribe.ts
1807
+ var STT3 = class extends OpenAITranscribeSTT {
1808
+ static providerKey = "openai_transcribe";
1809
+ constructor(opts = {}) {
1810
+ const key = opts.apiKey ?? process.env.OPENAI_API_KEY;
1811
+ if (!key) {
1812
+ throw new Error(
1813
+ "OpenAI Transcribe STT requires an apiKey. Pass { apiKey: 'sk-...' } or set OPENAI_API_KEY in the environment."
1814
+ );
1815
+ }
1816
+ super(key, opts.language, opts.model ?? "gpt-4o-transcribe", opts.bufferSize, opts.responseFormat ?? "json");
1661
1817
  }
1662
1818
  };
1663
1819
 
1664
1820
  // src/providers/cartesia-stt.ts
1665
- import WebSocket3 from "ws";
1821
+ import WebSocket2 from "ws";
1666
1822
  var DEFAULT_BASE_URL = "https://api.cartesia.ai";
1667
1823
  var API_VERSION = "2025-04-16";
1668
1824
  var USER_AGENT = "Patter/1.0 (integration=LiveKit-port; provider=Cartesia)";
1669
1825
  var KEEPALIVE_INTERVAL_MS = 3e4;
1670
1826
  var CONNECT_TIMEOUT_MS = 1e4;
1671
- var MAX_CALLBACKS = 10;
1672
1827
  var CartesiaSTT = class {
1673
1828
  constructor(apiKey, options = {}) {
1674
1829
  this.apiKey = apiKey;
@@ -1678,10 +1833,13 @@ var CartesiaSTT = class {
1678
1833
  }
1679
1834
  }
1680
1835
  ws = null;
1681
- callbacks = [];
1836
+ callbacks = /* @__PURE__ */ new Set();
1682
1837
  keepaliveTimer = null;
1683
- /** Cartesia request id — set from the server transcript events. */
1684
- requestId = "";
1838
+ /**
1839
+ * Cartesia request id — set from the server transcript events.
1840
+ * `null` until the first transcript event arrives (matches Python's `None`).
1841
+ */
1842
+ requestId = null;
1685
1843
  buildWsUrl() {
1686
1844
  const opts = this.options;
1687
1845
  const rawBase = opts.baseUrl ?? DEFAULT_BASE_URL;
@@ -1708,7 +1866,7 @@ var CartesiaSTT = class {
1708
1866
  }
1709
1867
  async connect() {
1710
1868
  const url = this.buildWsUrl();
1711
- this.ws = new WebSocket3(url, {
1869
+ this.ws = new WebSocket2(url, {
1712
1870
  headers: { "User-Agent": USER_AGENT }
1713
1871
  });
1714
1872
  await new Promise((resolve, reject) => {
@@ -1735,7 +1893,7 @@ var CartesiaSTT = class {
1735
1893
  this.handleEvent(event);
1736
1894
  });
1737
1895
  this.keepaliveTimer = setInterval(() => {
1738
- if (this.ws && this.ws.readyState === WebSocket3.OPEN) {
1896
+ if (this.ws && this.ws.readyState === WebSocket2.OPEN) {
1739
1897
  try {
1740
1898
  this.ws.ping();
1741
1899
  } catch {
@@ -1768,19 +1926,24 @@ var CartesiaSTT = class {
1768
1926
  }
1769
1927
  }
1770
1928
  sendAudio(audio) {
1771
- if (!this.ws || this.ws.readyState !== WebSocket3.OPEN) return;
1929
+ if (!this.ws || this.ws.readyState !== WebSocket2.OPEN) return;
1772
1930
  this.ws.send(audio);
1773
1931
  }
1774
1932
  onTranscript(callback) {
1775
- if (this.callbacks.length >= MAX_CALLBACKS) {
1776
- getLogger().warn(
1777
- "CartesiaSTT: maximum of 10 onTranscript callbacks reached; replacing the last callback."
1778
- );
1779
- this.callbacks[this.callbacks.length - 1] = callback;
1780
- return;
1781
- }
1782
- this.callbacks.push(callback);
1933
+ this.callbacks.add(callback);
1783
1934
  }
1935
+ /** Remove a previously registered transcript callback. */
1936
+ offTranscript(callback) {
1937
+ this.callbacks.delete(callback);
1938
+ }
1939
+ /**
1940
+ * Synchronous best-effort close. Sends `finalize` and closes the socket
1941
+ * without waiting for the server to flush any remaining transcripts.
1942
+ *
1943
+ * Limitation: any transcript events produced between the `finalize` send
1944
+ * and the socket close may be dropped. Callers that need to guarantee all
1945
+ * transcripts are delivered should await :meth:`closeAsync` instead.
1946
+ */
1784
1947
  close() {
1785
1948
  if (this.keepaliveTimer) {
1786
1949
  clearInterval(this.keepaliveTimer);
@@ -1795,10 +1958,53 @@ var CartesiaSTT = class {
1795
1958
  this.ws = null;
1796
1959
  }
1797
1960
  }
1961
+ /**
1962
+ * Graceful close that awaits the `finalize` send and the socket closing
1963
+ * handshake, matching the Python adapter's behavior. Use this when you
1964
+ * need any in-flight transcripts to be flushed before teardown.
1965
+ */
1966
+ async closeAsync() {
1967
+ if (this.keepaliveTimer) {
1968
+ clearInterval(this.keepaliveTimer);
1969
+ this.keepaliveTimer = null;
1970
+ }
1971
+ const ws = this.ws;
1972
+ this.ws = null;
1973
+ if (!ws) return;
1974
+ if (ws.readyState === WebSocket2.OPEN) {
1975
+ try {
1976
+ await new Promise((resolve) => {
1977
+ ws.send("finalize", (err) => {
1978
+ if (err) getLogger().warn(`CartesiaSTT finalize send failed: ${String(err)}`);
1979
+ resolve();
1980
+ });
1981
+ });
1982
+ } catch (err) {
1983
+ getLogger().warn(`CartesiaSTT finalize error: ${String(err)}`);
1984
+ }
1985
+ }
1986
+ if (ws.readyState === WebSocket2.OPEN || ws.readyState === WebSocket2.CONNECTING) {
1987
+ await new Promise((resolve) => {
1988
+ const done = () => {
1989
+ ws.off("close", done);
1990
+ ws.off("error", done);
1991
+ resolve();
1992
+ };
1993
+ ws.once("close", done);
1994
+ ws.once("error", done);
1995
+ try {
1996
+ ws.close();
1997
+ } catch {
1998
+ resolve();
1999
+ }
2000
+ });
2001
+ }
2002
+ }
1798
2003
  };
1799
2004
 
1800
2005
  // src/stt/cartesia.ts
1801
- var STT3 = class extends CartesiaSTT {
2006
+ var STT4 = class extends CartesiaSTT {
2007
+ static providerKey = "cartesia_stt";
1802
2008
  constructor(opts = {}) {
1803
2009
  const key = opts.apiKey ?? process.env.CARTESIA_API_KEY;
1804
2010
  if (!key) {
@@ -1817,7 +2023,7 @@ var STT3 = class extends CartesiaSTT {
1817
2023
  };
1818
2024
 
1819
2025
  // src/providers/soniox-stt.ts
1820
- import WebSocket4 from "ws";
2026
+ import WebSocket3 from "ws";
1821
2027
  var SONIOX_WS_URL = "wss://stt-rt.soniox.com/transcribe-websocket";
1822
2028
  var KEEPALIVE_MESSAGE = '{"type": "keepalive"}';
1823
2029
  var END_TOKEN = "<end>";
@@ -1913,7 +2119,8 @@ var SonioxSTT = class _SonioxSTT {
1913
2119
  return config;
1914
2120
  }
1915
2121
  async connect() {
1916
- this.ws = new WebSocket4(this.baseUrl);
2122
+ this.final.reset();
2123
+ this.ws = new WebSocket3(this.baseUrl);
1917
2124
  await new Promise((resolve, reject) => {
1918
2125
  const timer = setTimeout(() => reject(new Error("Soniox connect timeout")), 1e4);
1919
2126
  this.ws.once("open", () => {
@@ -1932,7 +2139,7 @@ var SonioxSTT = class _SonioxSTT {
1932
2139
  getLogger().error(`SonioxSTT WebSocket error: ${String(err)}`);
1933
2140
  });
1934
2141
  this.keepaliveTimer = setInterval(() => {
1935
- if (this.ws && this.ws.readyState === WebSocket4.OPEN) {
2142
+ if (this.ws && this.ws.readyState === WebSocket3.OPEN) {
1936
2143
  try {
1937
2144
  this.ws.send(KEEPALIVE_MESSAGE);
1938
2145
  } catch {
@@ -2005,7 +2212,7 @@ var SonioxSTT = class _SonioxSTT {
2005
2212
  }
2006
2213
  }
2007
2214
  sendAudio(audio) {
2008
- if (!this.ws || this.ws.readyState !== WebSocket4.OPEN) return;
2215
+ if (!this.ws || this.ws.readyState !== WebSocket3.OPEN) return;
2009
2216
  if (audio.length === 0) return;
2010
2217
  this.ws.send(audio);
2011
2218
  }
@@ -2036,7 +2243,8 @@ var SonioxSTT = class _SonioxSTT {
2036
2243
  };
2037
2244
 
2038
2245
  // src/stt/soniox.ts
2039
- var STT4 = class extends SonioxSTT {
2246
+ var STT5 = class extends SonioxSTT {
2247
+ static providerKey = "soniox";
2040
2248
  constructor(opts = {}) {
2041
2249
  const key = opts.apiKey ?? process.env.SONIOX_API_KEY;
2042
2250
  if (!key) {
@@ -2051,11 +2259,21 @@ var STT4 = class extends SonioxSTT {
2051
2259
  };
2052
2260
 
2053
2261
  // src/providers/assemblyai-stt.ts
2054
- import WebSocket5 from "ws";
2262
+ import WebSocket4 from "ws";
2055
2263
  var DEFAULT_BASE_URL2 = "wss://streaming.assemblyai.com";
2056
- var DEFAULT_MIN_TURN_SILENCE_MS = 100;
2264
+ var DEFAULT_MIN_TURN_SILENCE_MS = 400;
2057
2265
  var CONNECT_TIMEOUT_MS2 = 1e4;
2058
- var MAX_CALLBACKS2 = 10;
2266
+ var TERMINATION_WAIT_TIMEOUT_MS = 500;
2267
+ var MIN_CHUNK_DURATION_MS = 50;
2268
+ var MAX_CHUNK_DURATION_MS = 1e3;
2269
+ var RECONNECT_ERROR_CODES = /* @__PURE__ */ new Set([3005, 3008]);
2270
+ var VALID_DOMAINS = /* @__PURE__ */ new Set(["general", "medical-v1"]);
2271
+ var AssemblyAISTTNotConnectedError = class extends Error {
2272
+ constructor(message = "AssemblyAISTT is not connected") {
2273
+ super(message);
2274
+ this.name = "AssemblyAISTTNotConnectedError";
2275
+ }
2276
+ };
2059
2277
  var AssemblyAISTT = class _AssemblyAISTT {
2060
2278
  constructor(apiKey, options = {}) {
2061
2279
  this.apiKey = apiKey;
@@ -2063,13 +2281,24 @@ var AssemblyAISTT = class _AssemblyAISTT {
2063
2281
  if (!apiKey) {
2064
2282
  throw new Error("AssemblyAISTT requires a non-empty apiKey");
2065
2283
  }
2284
+ if (options.domain !== void 0 && !VALID_DOMAINS.has(options.domain)) {
2285
+ const hint = options.domain === "medical" ? ' \u2014 did you mean "medical-v1"?' : "";
2286
+ throw new Error(
2287
+ `AssemblyAISTT: invalid domain "${options.domain}"; expected one of [${Array.from(
2288
+ VALID_DOMAINS
2289
+ ).map((d) => `"${d}"`).join(", ")}]${hint}`
2290
+ );
2291
+ }
2066
2292
  }
2067
2293
  ws = null;
2068
- callbacks = [];
2294
+ callbacks = /* @__PURE__ */ new Set();
2295
+ closing = false;
2296
+ reconnectAttempts = 0;
2297
+ terminationResolve = null;
2069
2298
  /** AssemblyAI session id — set when the `Begin` message arrives. */
2070
- sessionId = "";
2299
+ sessionId = null;
2071
2300
  /** Unix timestamp when the AssemblyAI session expires. */
2072
- expiresAt = 0;
2301
+ expiresAt = null;
2073
2302
  /** Factory for Twilio calls — mulaw 8 kHz. */
2074
2303
  static forTwilio(apiKey, model = "universal-streaming-english") {
2075
2304
  return new _AssemblyAISTT(apiKey, {
@@ -2104,11 +2333,14 @@ var AssemblyAISTT = class _AssemblyAISTT {
2104
2333
  keyterms_prompt: opts.keytermsPrompt ? JSON.stringify(opts.keytermsPrompt) : void 0,
2105
2334
  language_detection: languageDetection,
2106
2335
  prompt: opts.prompt,
2107
- vad_threshold: opts.vadThreshold,
2336
+ // vad_threshold intentionally omitted — not a valid v3 parameter.
2108
2337
  speaker_labels: opts.speakerLabels,
2109
2338
  max_speakers: opts.maxSpeakers,
2110
2339
  domain: opts.domain
2111
2340
  };
2341
+ if (opts.useQueryToken) {
2342
+ raw.token = this.apiKey;
2343
+ }
2112
2344
  const params = new URLSearchParams();
2113
2345
  for (const [key, value] of Object.entries(raw)) {
2114
2346
  if (value === void 0 || value === null) continue;
@@ -2121,30 +2353,41 @@ var AssemblyAISTT = class _AssemblyAISTT {
2121
2353
  const base = opts.baseUrl ?? DEFAULT_BASE_URL2;
2122
2354
  return `${base}/v3/ws?${params.toString()}`;
2123
2355
  }
2356
+ buildHeaders() {
2357
+ const headers = {
2358
+ "Content-Type": "application/json",
2359
+ "User-Agent": "Patter/1.0"
2360
+ };
2361
+ if (!this.options.useQueryToken) {
2362
+ headers.Authorization = this.apiKey;
2363
+ }
2364
+ return headers;
2365
+ }
2124
2366
  async connect() {
2367
+ this.closing = false;
2125
2368
  const url = this.buildUrl();
2126
- this.ws = new WebSocket5(url, {
2127
- headers: {
2128
- Authorization: this.apiKey,
2129
- "Content-Type": "application/json",
2130
- "User-Agent": "Patter/1.0 (integration=LiveKit-port)"
2131
- }
2132
- });
2369
+ this.ws = new WebSocket4(url, { headers: this.buildHeaders() });
2370
+ await this.awaitOpen(this.ws);
2371
+ this.attachHandlers(this.ws);
2372
+ }
2373
+ async awaitOpen(ws) {
2133
2374
  await new Promise((resolve, reject) => {
2134
2375
  const timer = setTimeout(
2135
2376
  () => reject(new Error("AssemblyAI connect timeout")),
2136
2377
  CONNECT_TIMEOUT_MS2
2137
2378
  );
2138
- this.ws.once("open", () => {
2379
+ ws.once("open", () => {
2139
2380
  clearTimeout(timer);
2140
2381
  resolve();
2141
2382
  });
2142
- this.ws.once("error", (err) => {
2383
+ ws.once("error", (err) => {
2143
2384
  clearTimeout(timer);
2144
2385
  reject(err);
2145
2386
  });
2146
2387
  });
2147
- this.ws.on("message", (raw) => {
2388
+ }
2389
+ attachHandlers(ws) {
2390
+ ws.on("message", (raw) => {
2148
2391
  let event;
2149
2392
  try {
2150
2393
  event = JSON.parse(raw.toString());
@@ -2153,12 +2396,45 @@ var AssemblyAISTT = class _AssemblyAISTT {
2153
2396
  }
2154
2397
  this.handleEvent(event);
2155
2398
  });
2399
+ ws.on("close", (code) => {
2400
+ if (!this.closing && RECONNECT_ERROR_CODES.has(code) && this.reconnectAttempts < 1) {
2401
+ this.reconnectAttempts += 1;
2402
+ getLogger().warn(
2403
+ `AssemblyAISTT: close code ${code} \u2014 attempting single reconnect.`
2404
+ );
2405
+ this.reconnect().catch((err) => {
2406
+ getLogger().error("AssemblyAISTT reconnect failed", err);
2407
+ });
2408
+ }
2409
+ });
2410
+ }
2411
+ async reconnect() {
2412
+ const url = this.buildUrl();
2413
+ this.ws = new WebSocket4(url, { headers: this.buildHeaders() });
2414
+ await this.awaitOpen(this.ws);
2415
+ this.attachHandlers(this.ws);
2156
2416
  }
2157
2417
  handleEvent(event) {
2158
2418
  const type = event.type;
2159
2419
  if (type === "Begin") {
2160
- this.sessionId = event.id ?? "";
2161
- this.expiresAt = event.expires_at ?? 0;
2420
+ this.sessionId = event.id ?? null;
2421
+ this.expiresAt = event.expires_at ?? null;
2422
+ return;
2423
+ }
2424
+ if (type === "Termination") {
2425
+ if (this.terminationResolve) {
2426
+ this.terminationResolve();
2427
+ this.terminationResolve = null;
2428
+ }
2429
+ return;
2430
+ }
2431
+ if (type === "SpeechStarted") {
2432
+ this.emit({
2433
+ text: "",
2434
+ isFinal: false,
2435
+ confidence: 0,
2436
+ eventType: "SpeechStarted"
2437
+ });
2162
2438
  return;
2163
2439
  }
2164
2440
  if (type !== "Turn") {
@@ -2193,28 +2469,89 @@ var AssemblyAISTT = class _AssemblyAISTT {
2193
2469
  }
2194
2470
  }
2195
2471
  sendAudio(audio) {
2196
- if (!this.ws || this.ws.readyState !== WebSocket5.OPEN) return;
2472
+ if (!this.ws || this.ws.readyState !== WebSocket4.OPEN) {
2473
+ throw new AssemblyAISTTNotConnectedError(
2474
+ "AssemblyAISTT.sendAudio: WebSocket is not open"
2475
+ );
2476
+ }
2477
+ const durationMs = this.estimateChunkDurationMs(audio.length);
2478
+ if (durationMs !== null && (durationMs < MIN_CHUNK_DURATION_MS || durationMs > MAX_CHUNK_DURATION_MS)) {
2479
+ getLogger().warn(
2480
+ `AssemblyAISTT: audio chunk duration ${durationMs.toFixed(1)}ms outside 50-1000ms bounds (may trigger error 3007).`
2481
+ );
2482
+ }
2197
2483
  this.ws.send(audio);
2198
2484
  }
2199
- onTranscript(callback) {
2200
- if (this.callbacks.length >= MAX_CALLBACKS2) {
2201
- getLogger().warn(
2202
- "AssemblyAISTT: maximum of 10 onTranscript callbacks reached; replacing the last callback."
2485
+ estimateChunkDurationMs(byteLength) {
2486
+ if (byteLength <= 0) return null;
2487
+ const sampleRate = this.options.sampleRate ?? 16e3;
2488
+ if (sampleRate <= 0) return null;
2489
+ const bytesPerSample = (this.options.encoding ?? "pcm_s16le") === "pcm_s16le" ? 2 : 1;
2490
+ const samples = byteLength / bytesPerSample;
2491
+ return samples / sampleRate * 1e3;
2492
+ }
2493
+ /**
2494
+ * Send an `UpdateConfiguration` frame to change settings mid-stream.
2495
+ * Only defined fields are included.
2496
+ */
2497
+ updateConfiguration(params) {
2498
+ if (!this.ws || this.ws.readyState !== WebSocket4.OPEN) {
2499
+ throw new AssemblyAISTTNotConnectedError(
2500
+ "AssemblyAISTT.updateConfiguration: WebSocket is not open"
2203
2501
  );
2204
- this.callbacks[this.callbacks.length - 1] = callback;
2205
- return;
2206
2502
  }
2207
- this.callbacks.push(callback);
2503
+ const payload = { type: "UpdateConfiguration" };
2504
+ if (params.keytermsPrompt !== void 0) {
2505
+ payload.keyterms_prompt = JSON.stringify(params.keytermsPrompt);
2506
+ }
2507
+ if (params.prompt !== void 0) {
2508
+ payload.prompt = params.prompt;
2509
+ }
2510
+ if (params.minTurnSilence !== void 0) {
2511
+ payload.min_turn_silence = params.minTurnSilence;
2512
+ }
2513
+ if (params.maxTurnSilence !== void 0) {
2514
+ payload.max_turn_silence = params.maxTurnSilence;
2515
+ }
2516
+ this.ws.send(JSON.stringify(payload));
2208
2517
  }
2209
- close() {
2210
- if (this.ws) {
2211
- try {
2212
- this.ws.send(JSON.stringify({ type: "Terminate" }));
2213
- } catch {
2214
- }
2518
+ /** Force the server to finalize the current turn (for barge-in). */
2519
+ forceEndpoint() {
2520
+ if (!this.ws || this.ws.readyState !== WebSocket4.OPEN) {
2521
+ throw new AssemblyAISTTNotConnectedError(
2522
+ "AssemblyAISTT.forceEndpoint: WebSocket is not open"
2523
+ );
2524
+ }
2525
+ this.ws.send(JSON.stringify({ type: "ForceEndpoint" }));
2526
+ }
2527
+ onTranscript(callback) {
2528
+ this.callbacks.add(callback);
2529
+ return () => {
2530
+ this.callbacks.delete(callback);
2531
+ };
2532
+ }
2533
+ async close() {
2534
+ this.closing = true;
2535
+ if (!this.ws) return;
2536
+ try {
2537
+ this.ws.send(JSON.stringify({ type: "Terminate" }));
2538
+ } catch {
2539
+ }
2540
+ await new Promise((resolve) => {
2541
+ const timer = setTimeout(() => {
2542
+ this.terminationResolve = null;
2543
+ resolve();
2544
+ }, TERMINATION_WAIT_TIMEOUT_MS);
2545
+ this.terminationResolve = () => {
2546
+ clearTimeout(timer);
2547
+ resolve();
2548
+ };
2549
+ });
2550
+ try {
2215
2551
  this.ws.close();
2216
- this.ws = null;
2552
+ } catch {
2217
2553
  }
2554
+ this.ws = null;
2218
2555
  }
2219
2556
  };
2220
2557
  function averageConfidence(words) {
@@ -2227,7 +2564,8 @@ function averageConfidence(words) {
2227
2564
  }
2228
2565
 
2229
2566
  // src/stt/assemblyai.ts
2230
- var STT5 = class extends AssemblyAISTT {
2567
+ var STT6 = class extends AssemblyAISTT {
2568
+ static providerKey = "assemblyai";
2231
2569
  constructor(opts = {}) {
2232
2570
  const key = opts.apiKey ?? process.env.ASSEMBLYAI_API_KEY;
2233
2571
  if (!key) {
@@ -2289,7 +2627,8 @@ var ELEVENLABS_VOICE_ID_BY_NAME = {
2289
2627
  glinda: "z9fAnlkpzviPz146aGWa",
2290
2628
  giovanni: "zcAOhNBS3c14rBihAFp1",
2291
2629
  mimi: "zrHiDhphv9ZnVXBqCLjz",
2292
- alloy: "21m00Tcm4TlvDq8ikWAM"
2630
+ sarah: "EXAVITQu4vr4xnSDxMaL",
2631
+ alloy: "EXAVITQu4vr4xnSDxMaL"
2293
2632
  };
2294
2633
  var VOICE_ID_PATTERN = /^[A-Za-z0-9]{20}$/;
2295
2634
  function resolveVoiceId(voice) {
@@ -2297,14 +2636,78 @@ function resolveVoiceId(voice) {
2297
2636
  if (VOICE_ID_PATTERN.test(voice)) return voice;
2298
2637
  return ELEVENLABS_VOICE_ID_BY_NAME[voice.toLowerCase()] ?? voice;
2299
2638
  }
2300
- var ElevenLabsTTS = class {
2301
- constructor(apiKey, voiceId = "21m00Tcm4TlvDq8ikWAM", modelId = "eleven_turbo_v2_5", outputFormat = "pcm_16000") {
2639
+ var ElevenLabsTTS = class _ElevenLabsTTS {
2640
+ apiKey;
2641
+ voiceId;
2642
+ modelId;
2643
+ outputFormat;
2644
+ voiceSettings;
2645
+ languageCode;
2646
+ chunkSize;
2647
+ constructor(apiKey, voiceIdOrOptions = "21m00Tcm4TlvDq8ikWAM", modelId = "eleven_flash_v2_5", outputFormat = "pcm_16000") {
2302
2648
  this.apiKey = apiKey;
2303
- this.modelId = modelId;
2304
- this.outputFormat = outputFormat;
2305
- this.voiceId = resolveVoiceId(voiceId);
2649
+ if (typeof voiceIdOrOptions === "object") {
2650
+ const o = voiceIdOrOptions;
2651
+ this.voiceId = resolveVoiceId(o.voiceId ?? "21m00Tcm4TlvDq8ikWAM");
2652
+ this.modelId = o.modelId ?? "eleven_flash_v2_5";
2653
+ this.outputFormat = o.outputFormat ?? "pcm_16000";
2654
+ this.voiceSettings = o.voiceSettings;
2655
+ this.languageCode = o.languageCode;
2656
+ this.chunkSize = o.chunkSize ?? 4096;
2657
+ } else {
2658
+ this.voiceId = resolveVoiceId(voiceIdOrOptions);
2659
+ this.modelId = modelId;
2660
+ this.outputFormat = outputFormat;
2661
+ this.voiceSettings = void 0;
2662
+ this.languageCode = void 0;
2663
+ this.chunkSize = 4096;
2664
+ }
2665
+ }
2666
+ /**
2667
+ * Construct an instance pre-configured for Twilio Media Streams.
2668
+ *
2669
+ * Sets `outputFormat='ulaw_8000'` so ElevenLabs emits μ-law @ 8 kHz
2670
+ * directly — the exact wire format Twilio's media stream uses — letting
2671
+ * the SDK skip the 16 kHz→8 kHz resample and PCM→μ-law conversion in
2672
+ * `TwilioAudioSender`. Saves ~30–80 ms first-byte and per-frame CPU,
2673
+ * and removes a potential aliasing source.
2674
+ *
2675
+ * `voiceSettings` defaults to a low-bandwidth-friendly profile
2676
+ * (speaker boost off, modest stability) which sounds cleaner at 8 kHz
2677
+ * μ-law than the studio default. Pass an explicit object to override.
2678
+ */
2679
+ static forTwilio(apiKey, options = {}) {
2680
+ const voiceSettings = options.voiceSettings ?? {
2681
+ // Speaker boost adds high-frequency emphasis that aliases ugly over an
2682
+ // 8 kHz μ-law line. Slightly higher stability tames the excursions
2683
+ // that compander quantization noise can amplify.
2684
+ stability: 0.6,
2685
+ similarity_boost: 0.75,
2686
+ use_speaker_boost: false
2687
+ };
2688
+ return new _ElevenLabsTTS(apiKey, {
2689
+ ...options,
2690
+ voiceSettings,
2691
+ outputFormat: "ulaw_8000"
2692
+ });
2693
+ }
2694
+ /**
2695
+ * Construct an instance pre-configured for Telnyx bidirectional media.
2696
+ *
2697
+ * Telnyx's default media-streaming codec is L16 PCM @ 16 kHz, which
2698
+ * matches our default Telnyx handler. We pick `pcm_16000` so the audio
2699
+ * flows end-to-end with zero resampling or transcoding.
2700
+ *
2701
+ * Trade-off: if your Telnyx profile is pinned to PCMU/8000 (μ-law),
2702
+ * construct `ElevenLabsTTS` directly with `outputFormat: 'ulaw_8000'`
2703
+ * — Telnyx supports that natively too.
2704
+ */
2705
+ static forTelnyx(apiKey, options = {}) {
2706
+ return new _ElevenLabsTTS(apiKey, {
2707
+ ...options,
2708
+ outputFormat: "pcm_16000"
2709
+ });
2306
2710
  }
2307
- voiceId;
2308
2711
  /**
2309
2712
  * Synthesise text to speech and return the full audio as a single Buffer.
2310
2713
  *
@@ -2321,22 +2724,29 @@ var ElevenLabsTTS = class {
2321
2724
  * Synthesise text and yield audio chunks as they arrive (streaming).
2322
2725
  *
2323
2726
  * The yielded buffers are raw PCM at 16 kHz (or whatever `outputFormat` is
2324
- * configured to).
2727
+ * configured to). `chunkSize` controls the maximum yield size — 512 is a
2728
+ * good choice for low-latency telephony.
2325
2729
  */
2326
2730
  async *synthesizeStream(text) {
2327
2731
  const url = `${ELEVENLABS_BASE_URL}/text-to-speech/${encodeURIComponent(this.voiceId)}/stream?output_format=${encodeURIComponent(this.outputFormat)}`;
2732
+ const body = {
2733
+ text,
2734
+ model_id: this.modelId
2735
+ };
2736
+ if (this.voiceSettings) body["voice_settings"] = this.voiceSettings;
2737
+ if (this.languageCode) body["language_code"] = this.languageCode;
2328
2738
  const response = await fetch(url, {
2329
2739
  method: "POST",
2330
2740
  headers: {
2331
2741
  "xi-api-key": this.apiKey,
2332
2742
  "Content-Type": "application/json"
2333
2743
  },
2334
- body: JSON.stringify({ text, model_id: this.modelId }),
2744
+ body: JSON.stringify(body),
2335
2745
  signal: AbortSignal.timeout(3e4)
2336
2746
  });
2337
2747
  if (!response.ok) {
2338
- const body = await response.text();
2339
- throw new Error(`ElevenLabs TTS error ${response.status}: ${body}`);
2748
+ const errBody = await response.text();
2749
+ throw new Error(`ElevenLabs TTS error ${response.status}: ${errBody}`);
2340
2750
  }
2341
2751
  if (!response.body) {
2342
2752
  throw new Error("ElevenLabs TTS: no response body");
@@ -2346,8 +2756,10 @@ var ElevenLabsTTS = class {
2346
2756
  while (true) {
2347
2757
  const { done, value } = await reader.read();
2348
2758
  if (done) break;
2349
- if (value && value.length > 0) {
2350
- yield Buffer.from(value);
2759
+ if (!value || value.length === 0) continue;
2760
+ const buf = Buffer.from(value);
2761
+ for (let offset = 0; offset < buf.length; offset += this.chunkSize) {
2762
+ yield buf.subarray(offset, Math.min(offset + this.chunkSize, buf.length));
2351
2763
  }
2352
2764
  }
2353
2765
  } finally {
@@ -2359,30 +2771,50 @@ var ElevenLabsTTS = class {
2359
2771
  };
2360
2772
 
2361
2773
  // src/tts/elevenlabs.ts
2362
- var TTS = class extends ElevenLabsTTS {
2363
- constructor(opts = {}) {
2364
- const key = opts.apiKey ?? process.env.ELEVENLABS_API_KEY;
2365
- if (!key) {
2366
- throw new Error(
2367
- "ElevenLabs TTS requires an apiKey. Pass { apiKey: '...' } or set ELEVENLABS_API_KEY in the environment."
2368
- );
2369
- }
2774
+ function resolveApiKey(apiKey) {
2775
+ const key = apiKey ?? process.env.ELEVENLABS_API_KEY;
2776
+ if (!key) {
2777
+ throw new Error(
2778
+ "ElevenLabs TTS requires an apiKey. Pass { apiKey: '...' } or set ELEVENLABS_API_KEY in the environment."
2779
+ );
2780
+ }
2781
+ return key;
2782
+ }
2783
+ var TTS = class _TTS extends ElevenLabsTTS {
2784
+ static providerKey = "elevenlabs";
2785
+ constructor(opts = {}) {
2370
2786
  super(
2371
- key,
2372
- opts.voiceId ?? "21m00Tcm4TlvDq8ikWAM",
2373
- opts.modelId ?? "eleven_turbo_v2_5",
2787
+ resolveApiKey(opts.apiKey),
2788
+ opts.voiceId ?? "EXAVITQu4vr4xnSDxMaL",
2789
+ opts.modelId ?? "eleven_flash_v2_5",
2374
2790
  opts.outputFormat ?? "pcm_16000"
2375
2791
  );
2376
2792
  }
2793
+ static forTwilio(arg1, arg2) {
2794
+ const opts = typeof arg1 === "string" ? { apiKey: arg1, ...arg2 ?? {} } : arg1 ?? {};
2795
+ return new _TTS({ ...opts, outputFormat: "ulaw_8000" });
2796
+ }
2797
+ static forTelnyx(arg1, arg2) {
2798
+ const opts = typeof arg1 === "string" ? { apiKey: arg1, ...arg2 ?? {} } : arg1 ?? {};
2799
+ return new _TTS({ ...opts, outputFormat: "pcm_16000" });
2800
+ }
2377
2801
  };
2378
2802
 
2379
2803
  // src/providers/openai-tts.ts
2380
2804
  var OPENAI_TTS_URL = "https://api.openai.com/v1/audio/speech";
2805
+ var INSTRUCTIONS_PREFIX = "gpt-4o-mini-tts";
2806
+ var LPF_ALPHA = 0.78;
2381
2807
  var OpenAITTS = class _OpenAITTS {
2382
- constructor(apiKey, voice = "alloy", model = "tts-1") {
2808
+ constructor(apiKey, voice = "alloy", model = "gpt-4o-mini-tts", instructions = null, speed = null, antiAlias = true) {
2383
2809
  this.apiKey = apiKey;
2384
2810
  this.voice = voice;
2385
2811
  this.model = model;
2812
+ this.instructions = instructions;
2813
+ this.speed = speed;
2814
+ this.antiAlias = antiAlias;
2815
+ if (speed !== null && speed !== void 0 && (speed < 0.25 || speed > 4)) {
2816
+ throw new Error("OpenAITTS: speed must be in [0.25, 4.0]");
2817
+ }
2386
2818
  }
2387
2819
  /**
2388
2820
  * Synthesise text to speech and return the full audio as a single Buffer.
@@ -2399,37 +2831,48 @@ var OpenAITTS = class _OpenAITTS {
2399
2831
  /**
2400
2832
  * Synthesise text and yield audio chunks as they arrive (streaming).
2401
2833
  *
2402
- * OpenAI returns 24 kHz PCM16; each chunk is resampled to 16 kHz before
2403
- * yielding so the output is ready for telephony pipelines.
2834
+ * OpenAI returns 24 kHz PCM16; each chunk is lowpass-filtered then
2835
+ * decimated 3:2 to 16 kHz before yielding so the output is ready for
2836
+ * telephony pipelines.
2404
2837
  *
2405
- * The resampler carries state (buffered samples + odd trailing byte)
2406
- * between chunks without that state cross-chunk sample alignment drifts
2407
- * and the caller hears pops / dropped audio (BUG #23, mirror of the
2408
- * Python `audioop.ratecv` fix).
2838
+ * The resampler carries state (filter memory + buffered samples + odd
2839
+ * trailing byte) between chunks so cross-chunk sample alignment and
2840
+ * filter phase don't reset on every network read.
2409
2841
  */
2410
2842
  async *synthesizeStream(text) {
2843
+ const body = {
2844
+ model: this.model,
2845
+ input: text,
2846
+ voice: this.voice,
2847
+ response_format: "pcm"
2848
+ };
2849
+ if (this.instructions !== null && this.model.startsWith(INSTRUCTIONS_PREFIX)) {
2850
+ body.instructions = this.instructions;
2851
+ }
2852
+ if (this.speed !== null) {
2853
+ body.speed = this.speed;
2854
+ }
2411
2855
  const response = await fetch(OPENAI_TTS_URL, {
2412
2856
  method: "POST",
2413
2857
  headers: {
2414
2858
  "Authorization": `Bearer ${this.apiKey}`,
2415
2859
  "Content-Type": "application/json"
2416
2860
  },
2417
- body: JSON.stringify({
2418
- model: this.model,
2419
- input: text,
2420
- voice: this.voice,
2421
- response_format: "pcm"
2422
- }),
2423
- signal: AbortSignal.timeout(3e4)
2861
+ body: JSON.stringify(body)
2424
2862
  });
2425
2863
  if (!response.ok) {
2426
- const body = await response.text();
2427
- throw new Error(`OpenAI TTS error ${response.status}: ${body}`);
2864
+ const errBody = await response.text();
2865
+ throw new Error(`OpenAI TTS error ${response.status}: ${errBody}`);
2428
2866
  }
2429
2867
  if (!response.body) {
2430
2868
  throw new Error("OpenAI TTS: no response body");
2431
2869
  }
2432
- const ctx = { carryByte: null, leftover: [] };
2870
+ const ctx = {
2871
+ carryByte: null,
2872
+ leftover: [],
2873
+ lpfPrev: 0,
2874
+ lpfEnabled: this.antiAlias
2875
+ };
2433
2876
  const reader = response.body.getReader();
2434
2877
  try {
2435
2878
  while (true) {
@@ -2454,8 +2897,14 @@ var OpenAITTS = class _OpenAITTS {
2454
2897
  }
2455
2898
  }
2456
2899
  /**
2457
- * Streaming 24 kHz → 16 kHz resampler (PCM16-LE). Maintains cross-chunk
2458
- * state so the 3:2 pattern doesn't reset at every network read.
2900
+ * Streaming 24 kHz → 16 kHz resampler (PCM16-LE). Applies a single-pole
2901
+ * lowpass ahead of the 3:2 decimation and carries filter + sample state
2902
+ * across chunks so the cadence doesn't reset at every network read.
2903
+ *
2904
+ * ``ctx.lpfEnabled`` (default true on the streaming path, false for the
2905
+ * legacy static helper) controls whether the LPF is engaged — we keep
2906
+ * the helper bit-exact for the downsample-only tests while the real
2907
+ * streaming path gets anti-alias filtering.
2459
2908
  */
2460
2909
  static resampleStreaming(audio, ctx) {
2461
2910
  let buf;
@@ -2474,14 +2923,26 @@ var OpenAITTS = class _OpenAITTS {
2474
2923
  }
2475
2924
  const sampleCount = buf.length / 2;
2476
2925
  const samples = ctx.leftover.slice();
2926
+ const lpf = ctx.lpfEnabled !== false;
2927
+ let y = ctx.lpfPrev;
2477
2928
  for (let i2 = 0; i2 < sampleCount; i2++) {
2478
- samples.push(buf.readInt16LE(i2 * 2));
2929
+ const x = buf.readInt16LE(i2 * 2);
2930
+ if (lpf) {
2931
+ y = LPF_ALPHA * x + (1 - LPF_ALPHA) * y;
2932
+ let s = Math.round(y);
2933
+ if (s > 32767) s = 32767;
2934
+ else if (s < -32768) s = -32768;
2935
+ samples.push(s);
2936
+ } else {
2937
+ samples.push(x);
2938
+ }
2479
2939
  }
2940
+ if (lpf) ctx.lpfPrev = y;
2480
2941
  const out = [];
2481
2942
  let i = 0;
2482
2943
  while (i + 2 < samples.length) {
2483
2944
  out.push(samples[i]);
2484
- out.push(Math.trunc((samples[i + 1] + samples[i + 2]) / 2));
2945
+ out.push(Math.round((samples[i + 1] + samples[i + 2]) / 2));
2485
2946
  i += 3;
2486
2947
  }
2487
2948
  ctx.leftover = samples.slice(i);
@@ -2493,7 +2954,7 @@ var OpenAITTS = class _OpenAITTS {
2493
2954
  }
2494
2955
  /** @deprecated use {@link resampleStreaming} with persistent state. */
2495
2956
  static resample24kTo16k(audio) {
2496
- const ctx = { carryByte: null, leftover: [] };
2957
+ const ctx = { carryByte: null, leftover: [], lpfPrev: 0, lpfEnabled: false };
2497
2958
  const out = _OpenAITTS.resampleStreaming(audio, ctx);
2498
2959
  if (ctx.leftover.length === 0) return out;
2499
2960
  const tail = Buffer.alloc(ctx.leftover.length * 2);
@@ -2506,6 +2967,7 @@ var OpenAITTS = class _OpenAITTS {
2506
2967
 
2507
2968
  // src/tts/openai.ts
2508
2969
  var TTS2 = class extends OpenAITTS {
2970
+ static providerKey = "openai_tts";
2509
2971
  constructor(opts = {}) {
2510
2972
  const key = opts.apiKey ?? process.env.OPENAI_API_KEY;
2511
2973
  if (!key) {
@@ -2513,15 +2975,22 @@ var TTS2 = class extends OpenAITTS {
2513
2975
  "OpenAI TTS requires an apiKey. Pass { apiKey: 'sk-...' } or set OPENAI_API_KEY in the environment."
2514
2976
  );
2515
2977
  }
2516
- super(key, opts.voice ?? "alloy", opts.model ?? "tts-1");
2978
+ super(
2979
+ key,
2980
+ opts.voice ?? "alloy",
2981
+ opts.model ?? "gpt-4o-mini-tts",
2982
+ opts.instructions ?? null,
2983
+ opts.speed ?? null,
2984
+ opts.antiAlias ?? false
2985
+ );
2517
2986
  }
2518
2987
  };
2519
2988
 
2520
2989
  // src/providers/cartesia-tts.ts
2521
2990
  var CARTESIA_BASE_URL = "https://api.cartesia.ai";
2522
- var CARTESIA_API_VERSION = "2024-11-13";
2991
+ var CARTESIA_API_VERSION = "2025-04-16";
2523
2992
  var CARTESIA_DEFAULT_VOICE_ID = "f786b574-daa5-4673-aa0c-cbe3e8534c02";
2524
- var CartesiaTTS = class {
2993
+ var CartesiaTTS = class _CartesiaTTS {
2525
2994
  apiKey;
2526
2995
  model;
2527
2996
  voice;
@@ -2534,7 +3003,7 @@ var CartesiaTTS = class {
2534
3003
  apiVersion;
2535
3004
  constructor(apiKey, opts = {}) {
2536
3005
  this.apiKey = apiKey;
2537
- this.model = opts.model ?? "sonic-2";
3006
+ this.model = opts.model ?? "sonic-3";
2538
3007
  this.voice = opts.voice ?? CARTESIA_DEFAULT_VOICE_ID;
2539
3008
  this.language = opts.language ?? "en";
2540
3009
  this.sampleRate = opts.sampleRate ?? 16e3;
@@ -2544,6 +3013,29 @@ var CartesiaTTS = class {
2544
3013
  this.baseUrl = opts.baseUrl ?? CARTESIA_BASE_URL;
2545
3014
  this.apiVersion = opts.apiVersion ?? CARTESIA_API_VERSION;
2546
3015
  }
3016
+ /**
3017
+ * Construct an instance pre-configured for Twilio Media Streams.
3018
+ *
3019
+ * Sets `sampleRate=8000` so Cartesia emits PCM_S16LE @ 8 kHz directly.
3020
+ * Twilio's media stream uses μ-law @ 8 kHz so the SDK still does the
3021
+ * PCM → μ-law transcode client-side, but the 16 kHz → 8 kHz resample
3022
+ * step is skipped. Saves ~10–30 ms first-byte plus per-frame CPU and
3023
+ * removes a potential aliasing source.
3024
+ */
3025
+ static forTwilio(apiKey, options = {}) {
3026
+ return new _CartesiaTTS(apiKey, { ...options, sampleRate: 8e3 });
3027
+ }
3028
+ /**
3029
+ * Construct an instance pre-configured for Telnyx bidirectional media.
3030
+ *
3031
+ * Sets `sampleRate=16000` to match Telnyx's L16/16000 default codec —
3032
+ * audio flows end-to-end with zero resampling or transcoding. Same as
3033
+ * the bare-constructor default; exists for API symmetry with
3034
+ * {@link CartesiaTTS.forTwilio}.
3035
+ */
3036
+ static forTelnyx(apiKey, options = {}) {
3037
+ return new _CartesiaTTS(apiKey, { ...options, sampleRate: 16e3 });
3038
+ }
2547
3039
  /** Build the JSON payload for the Cartesia bytes endpoint. */
2548
3040
  buildPayload(text) {
2549
3041
  const payload = {
@@ -2616,18 +3108,31 @@ var CartesiaTTS = class {
2616
3108
  };
2617
3109
 
2618
3110
  // src/tts/cartesia.ts
2619
- var TTS3 = class extends CartesiaTTS {
3111
+ function resolveApiKey2(apiKey) {
3112
+ const key = apiKey ?? process.env.CARTESIA_API_KEY;
3113
+ if (!key) {
3114
+ throw new Error(
3115
+ "Cartesia TTS requires an apiKey. Pass { apiKey: '...' } or set CARTESIA_API_KEY in the environment."
3116
+ );
3117
+ }
3118
+ return key;
3119
+ }
3120
+ var TTS3 = class _TTS extends CartesiaTTS {
3121
+ static providerKey = "cartesia_tts";
2620
3122
  constructor(opts = {}) {
2621
- const key = opts.apiKey ?? process.env.CARTESIA_API_KEY;
2622
- if (!key) {
2623
- throw new Error(
2624
- "Cartesia TTS requires an apiKey. Pass { apiKey: '...' } or set CARTESIA_API_KEY in the environment."
2625
- );
2626
- }
3123
+ const key = resolveApiKey2(opts.apiKey);
2627
3124
  const { apiKey: _ignored, ...rest } = opts;
2628
3125
  void _ignored;
2629
3126
  super(key, rest);
2630
3127
  }
3128
+ static forTwilio(arg1, arg2) {
3129
+ const opts = typeof arg1 === "string" ? { apiKey: arg1, ...arg2 ?? {} } : arg1 ?? {};
3130
+ return new _TTS({ ...opts, sampleRate: 8e3 });
3131
+ }
3132
+ static forTelnyx(arg1, arg2) {
3133
+ const opts = typeof arg1 === "string" ? { apiKey: arg1, ...arg2 ?? {} } : arg1 ?? {};
3134
+ return new _TTS({ ...opts, sampleRate: 16e3 });
3135
+ }
2631
3136
  };
2632
3137
 
2633
3138
  // src/providers/rime-tts.ts
@@ -2759,6 +3264,7 @@ var RimeTTS = class {
2759
3264
 
2760
3265
  // src/tts/rime.ts
2761
3266
  var TTS4 = class extends RimeTTS {
3267
+ static providerKey = "rime";
2762
3268
  constructor(opts = {}) {
2763
3269
  const key = opts.apiKey ?? process.env.RIME_API_KEY;
2764
3270
  if (!key) {
@@ -2852,6 +3358,7 @@ var LMNTTTS = class {
2852
3358
 
2853
3359
  // src/tts/lmnt.ts
2854
3360
  var TTS5 = class extends LMNTTTS {
3361
+ static providerKey = "lmnt";
2855
3362
  constructor(opts = {}) {
2856
3363
  const key = opts.apiKey ?? process.env.LMNT_API_KEY;
2857
3364
  if (!key) {
@@ -2867,6 +3374,7 @@ var TTS5 = class extends LMNTTTS {
2867
3374
 
2868
3375
  // src/llm/openai.ts
2869
3376
  var LLM = class extends OpenAILLMProvider {
3377
+ static providerKey = "openai";
2870
3378
  constructor(opts = {}) {
2871
3379
  const key = opts.apiKey ?? process.env.OPENAI_API_KEY;
2872
3380
  if (!key) {
@@ -2874,15 +3382,27 @@ var LLM = class extends OpenAILLMProvider {
2874
3382
  "OpenAI LLM requires an apiKey. Pass { apiKey: 'sk-...' } or set OPENAI_API_KEY."
2875
3383
  );
2876
3384
  }
2877
- super(key, opts.model ?? "gpt-4o-mini");
3385
+ super(key, opts.model ?? "gpt-4o-mini", {
3386
+ temperature: opts.temperature,
3387
+ maxTokens: opts.maxTokens,
3388
+ responseFormat: opts.responseFormat,
3389
+ parallelToolCalls: opts.parallelToolCalls,
3390
+ toolChoice: opts.toolChoice,
3391
+ seed: opts.seed,
3392
+ topP: opts.topP,
3393
+ frequencyPenalty: opts.frequencyPenalty,
3394
+ presencePenalty: opts.presencePenalty,
3395
+ stop: opts.stop
3396
+ });
2878
3397
  }
2879
3398
  };
2880
3399
 
2881
3400
  // src/providers/anthropic-llm.ts
2882
3401
  var DEFAULT_ANTHROPIC_URL = "https://api.anthropic.com/v1/messages";
2883
3402
  var DEFAULT_ANTHROPIC_VERSION = "2023-06-01";
2884
- var DEFAULT_MODEL = "claude-3-5-sonnet-20241022";
3403
+ var DEFAULT_MODEL = "claude-haiku-4-5-20251001";
2885
3404
  var DEFAULT_MAX_TOKENS = 1024;
3405
+ var PROMPT_CACHING_BETA = "prompt-caching-2024-07-31";
2886
3406
  var AnthropicLLMProvider = class {
2887
3407
  apiKey;
2888
3408
  model;
@@ -2890,6 +3410,7 @@ var AnthropicLLMProvider = class {
2890
3410
  temperature;
2891
3411
  url;
2892
3412
  anthropicVersion;
3413
+ promptCaching;
2893
3414
  constructor(options) {
2894
3415
  if (!options.apiKey) {
2895
3416
  throw new Error(
@@ -2902,6 +3423,7 @@ var AnthropicLLMProvider = class {
2902
3423
  this.temperature = options.temperature;
2903
3424
  this.url = options.baseUrl ?? DEFAULT_ANTHROPIC_URL;
2904
3425
  this.anthropicVersion = options.anthropicVersion ?? DEFAULT_ANTHROPIC_VERSION;
3426
+ this.promptCaching = options.promptCaching ?? true;
2905
3427
  }
2906
3428
  async *stream(messages, tools) {
2907
3429
  const { system, messages: anthropicMessages } = toAnthropicMessages(messages);
@@ -2912,16 +3434,44 @@ var AnthropicLLMProvider = class {
2912
3434
  max_tokens: this.maxTokens,
2913
3435
  stream: true
2914
3436
  };
2915
- if (system) body.system = system;
2916
- if (anthropicTools && anthropicTools.length > 0) body.tools = anthropicTools;
3437
+ if (system) {
3438
+ if (this.promptCaching) {
3439
+ const block = {
3440
+ type: "text",
3441
+ text: system,
3442
+ cache_control: { type: "ephemeral" }
3443
+ };
3444
+ body.system = [block];
3445
+ } else {
3446
+ body.system = system;
3447
+ }
3448
+ }
3449
+ if (anthropicTools && anthropicTools.length > 0) {
3450
+ if (this.promptCaching) {
3451
+ const cachedTools = anthropicTools.map(
3452
+ (t) => ({ ...t })
3453
+ );
3454
+ cachedTools[cachedTools.length - 1] = {
3455
+ ...cachedTools[cachedTools.length - 1],
3456
+ cache_control: { type: "ephemeral" }
3457
+ };
3458
+ body.tools = cachedTools;
3459
+ } else {
3460
+ body.tools = anthropicTools;
3461
+ }
3462
+ }
2917
3463
  if (this.temperature !== void 0) body.temperature = this.temperature;
3464
+ const headers = {
3465
+ "Content-Type": "application/json",
3466
+ "x-api-key": this.apiKey,
3467
+ "anthropic-version": this.anthropicVersion
3468
+ };
3469
+ if (this.promptCaching) {
3470
+ headers["anthropic-beta"] = PROMPT_CACHING_BETA;
3471
+ }
2918
3472
  const response = await fetch(this.url, {
2919
3473
  method: "POST",
2920
- headers: {
2921
- "Content-Type": "application/json",
2922
- "x-api-key": this.apiKey,
2923
- "anthropic-version": this.anthropicVersion
2924
- },
3474
+ headers,
2925
3475
  body: JSON.stringify(body),
2926
3476
  signal: AbortSignal.timeout(3e4)
2927
3477
  });
@@ -3066,6 +3616,7 @@ function toAnthropicMessages(messages) {
3066
3616
 
3067
3617
  // src/llm/anthropic.ts
3068
3618
  var LLM2 = class extends AnthropicLLMProvider {
3619
+ static providerKey = "anthropic";
3069
3620
  constructor(opts = {}) {
3070
3621
  const key = opts.apiKey ?? process.env.ANTHROPIC_API_KEY;
3071
3622
  if (!key) {
@@ -3079,11 +3630,15 @@ var LLM2 = class extends AnthropicLLMProvider {
3079
3630
  maxTokens: opts.maxTokens,
3080
3631
  temperature: opts.temperature,
3081
3632
  baseUrl: opts.baseUrl,
3082
- anthropicVersion: opts.anthropicVersion
3633
+ anthropicVersion: opts.anthropicVersion,
3634
+ promptCaching: opts.promptCaching
3083
3635
  });
3084
3636
  }
3085
3637
  };
3086
3638
 
3639
+ // src/version.ts
3640
+ var VERSION = "0.5.3";
3641
+
3087
3642
  // src/providers/groq-llm.ts
3088
3643
  var GROQ_BASE_URL = "https://api.groq.com/openai/v1";
3089
3644
  var DEFAULT_MODEL2 = "llama-3.3-70b-versatile";
@@ -3091,6 +3646,16 @@ var GroqLLMProvider = class {
3091
3646
  apiKey;
3092
3647
  model;
3093
3648
  baseUrl;
3649
+ temperature;
3650
+ maxTokens;
3651
+ responseFormat;
3652
+ parallelToolCalls;
3653
+ toolChoice;
3654
+ seed;
3655
+ topP;
3656
+ frequencyPenalty;
3657
+ presencePenalty;
3658
+ stop;
3094
3659
  constructor(options) {
3095
3660
  if (!options.apiKey) {
3096
3661
  throw new Error(
@@ -3100,19 +3665,43 @@ var GroqLLMProvider = class {
3100
3665
  this.apiKey = options.apiKey;
3101
3666
  this.model = options.model ?? DEFAULT_MODEL2;
3102
3667
  this.baseUrl = options.baseUrl ?? GROQ_BASE_URL;
3668
+ this.temperature = options.temperature;
3669
+ this.maxTokens = options.maxTokens;
3670
+ this.responseFormat = options.responseFormat;
3671
+ this.parallelToolCalls = options.parallelToolCalls;
3672
+ this.toolChoice = options.toolChoice;
3673
+ this.seed = options.seed;
3674
+ this.topP = options.topP;
3675
+ this.frequencyPenalty = options.frequencyPenalty;
3676
+ this.presencePenalty = options.presencePenalty;
3677
+ this.stop = options.stop;
3103
3678
  }
3104
3679
  async *stream(messages, tools) {
3105
3680
  const body = {
3106
3681
  model: this.model,
3107
3682
  messages,
3108
- stream: true
3683
+ stream: true,
3684
+ stream_options: { include_usage: true }
3109
3685
  };
3686
+ if (this.temperature !== void 0) body.temperature = this.temperature;
3687
+ if (this.maxTokens !== void 0) {
3688
+ body.max_completion_tokens = this.maxTokens;
3689
+ }
3690
+ if (this.responseFormat !== void 0) body.response_format = this.responseFormat;
3691
+ if (this.parallelToolCalls !== void 0) body.parallel_tool_calls = this.parallelToolCalls;
3692
+ if (this.toolChoice !== void 0) body.tool_choice = this.toolChoice;
3693
+ if (this.seed !== void 0) body.seed = this.seed;
3694
+ if (this.topP !== void 0) body.top_p = this.topP;
3695
+ if (this.frequencyPenalty !== void 0) body.frequency_penalty = this.frequencyPenalty;
3696
+ if (this.presencePenalty !== void 0) body.presence_penalty = this.presencePenalty;
3697
+ if (this.stop !== void 0) body.stop = this.stop;
3110
3698
  if (tools) body.tools = tools;
3111
3699
  const response = await fetch(`${this.baseUrl}/chat/completions`, {
3112
3700
  method: "POST",
3113
3701
  headers: {
3114
3702
  "Content-Type": "application/json",
3115
- Authorization: `Bearer ${this.apiKey}`
3703
+ Authorization: `Bearer ${this.apiKey}`,
3704
+ "User-Agent": `getpatter/${VERSION}`
3116
3705
  },
3117
3706
  body: JSON.stringify(body),
3118
3707
  signal: AbortSignal.timeout(3e4)
@@ -3147,6 +3736,16 @@ async function* parseOpenAISseStream(response) {
3147
3736
  } catch {
3148
3737
  continue;
3149
3738
  }
3739
+ const usage = chunk.usage ?? chunk.x_groq?.usage;
3740
+ if (usage) {
3741
+ const cached = chunk.usage?.prompt_tokens_details?.cached_tokens ?? 0;
3742
+ yield {
3743
+ type: "usage",
3744
+ inputTokens: usage.prompt_tokens,
3745
+ outputTokens: usage.completion_tokens,
3746
+ cacheReadInputTokens: cached
3747
+ };
3748
+ }
3150
3749
  const delta = chunk.choices?.[0]?.delta;
3151
3750
  if (!delta) continue;
3152
3751
  if (delta.content) {
@@ -3169,6 +3768,7 @@ async function* parseOpenAISseStream(response) {
3169
3768
 
3170
3769
  // src/llm/groq.ts
3171
3770
  var LLM3 = class extends GroqLLMProvider {
3771
+ static providerKey = "groq";
3172
3772
  constructor(opts = {}) {
3173
3773
  const key = opts.apiKey ?? process.env.GROQ_API_KEY;
3174
3774
  if (!key) {
@@ -3179,7 +3779,17 @@ var LLM3 = class extends GroqLLMProvider {
3179
3779
  super({
3180
3780
  apiKey: key,
3181
3781
  model: opts.model,
3182
- baseUrl: opts.baseUrl
3782
+ baseUrl: opts.baseUrl,
3783
+ temperature: opts.temperature,
3784
+ maxTokens: opts.maxTokens,
3785
+ responseFormat: opts.responseFormat,
3786
+ parallelToolCalls: opts.parallelToolCalls,
3787
+ toolChoice: opts.toolChoice,
3788
+ seed: opts.seed,
3789
+ topP: opts.topP,
3790
+ frequencyPenalty: opts.frequencyPenalty,
3791
+ presencePenalty: opts.presencePenalty,
3792
+ stop: opts.stop
3183
3793
  });
3184
3794
  }
3185
3795
  };
@@ -3187,11 +3797,22 @@ var LLM3 = class extends GroqLLMProvider {
3187
3797
  // src/providers/cerebras-llm.ts
3188
3798
  var CEREBRAS_BASE_URL = "https://api.cerebras.ai/v1";
3189
3799
  var DEFAULT_MODEL3 = "llama3.1-8b";
3800
+ var RETRY_BACKOFF_BASE_MS = 500;
3190
3801
  var CerebrasLLMProvider = class {
3191
3802
  apiKey;
3192
3803
  model;
3193
3804
  baseUrl;
3194
3805
  gzipCompression;
3806
+ temperature;
3807
+ maxTokens;
3808
+ responseFormat;
3809
+ parallelToolCalls;
3810
+ toolChoice;
3811
+ seed;
3812
+ topP;
3813
+ frequencyPenalty;
3814
+ presencePenalty;
3815
+ stop;
3195
3816
  constructor(options) {
3196
3817
  if (!options.apiKey) {
3197
3818
  throw new Error(
@@ -3201,18 +3822,43 @@ var CerebrasLLMProvider = class {
3201
3822
  this.apiKey = options.apiKey;
3202
3823
  this.model = options.model ?? DEFAULT_MODEL3;
3203
3824
  this.baseUrl = options.baseUrl ?? CEREBRAS_BASE_URL;
3204
- this.gzipCompression = options.gzipCompression ?? false;
3825
+ this.gzipCompression = options.gzipCompression ?? true;
3826
+ this.temperature = options.temperature;
3827
+ this.maxTokens = options.maxTokens;
3828
+ this.responseFormat = options.responseFormat;
3829
+ this.parallelToolCalls = options.parallelToolCalls;
3830
+ this.toolChoice = options.toolChoice;
3831
+ this.seed = options.seed;
3832
+ this.topP = options.topP;
3833
+ this.frequencyPenalty = options.frequencyPenalty;
3834
+ this.presencePenalty = options.presencePenalty;
3835
+ this.stop = options.stop;
3205
3836
  }
3206
3837
  async *stream(messages, tools) {
3207
3838
  const body = {
3208
3839
  model: this.model,
3209
3840
  messages,
3210
- stream: true
3841
+ stream: true,
3842
+ stream_options: { include_usage: true }
3211
3843
  };
3844
+ if (this.temperature !== void 0) body.temperature = this.temperature;
3845
+ if (this.maxTokens !== void 0) {
3846
+ body.max_completion_tokens = this.maxTokens;
3847
+ }
3848
+ if (this.responseFormat !== void 0) body.response_format = this.responseFormat;
3849
+ if (this.parallelToolCalls !== void 0) body.parallel_tool_calls = this.parallelToolCalls;
3850
+ if (this.toolChoice !== void 0) body.tool_choice = this.toolChoice;
3851
+ if (this.seed !== void 0) body.seed = this.seed;
3852
+ if (this.topP !== void 0) body.top_p = this.topP;
3853
+ if (this.frequencyPenalty !== void 0) body.frequency_penalty = this.frequencyPenalty;
3854
+ if (this.presencePenalty !== void 0) body.presence_penalty = this.presencePenalty;
3855
+ if (this.stop !== void 0) body.stop = this.stop;
3212
3856
  if (tools) body.tools = tools;
3213
3857
  const headers = {
3214
3858
  "Content-Type": "application/json",
3215
- Authorization: `Bearer ${this.apiKey}`
3859
+ Authorization: `Bearer ${this.apiKey}`,
3860
+ // Identify the SDK in upstream logs/rate-limit attribution.
3861
+ "User-Agent": `getpatter/${VERSION}`
3216
3862
  };
3217
3863
  let payload = JSON.stringify(body);
3218
3864
  if (this.gzipCompression) {
@@ -3222,18 +3868,43 @@ var CerebrasLLMProvider = class {
3222
3868
  headers["Content-Encoding"] = "gzip";
3223
3869
  }
3224
3870
  }
3225
- const response = await fetch(`${this.baseUrl}/chat/completions`, {
3226
- method: "POST",
3227
- headers,
3228
- body: payload,
3229
- signal: AbortSignal.timeout(3e4)
3230
- });
3231
- if (!response.ok) {
3232
- const errText = await response.text();
3233
- getLogger().error(`Cerebras API error: ${response.status} ${errText}`);
3234
- return;
3871
+ const maxAttempts = 2;
3872
+ let lastErrText = "";
3873
+ let lastStatus = 0;
3874
+ for (let attempt = 0; attempt < maxAttempts; attempt++) {
3875
+ const response = await fetch(`${this.baseUrl}/chat/completions`, {
3876
+ method: "POST",
3877
+ headers,
3878
+ body: payload,
3879
+ signal: AbortSignal.timeout(3e4)
3880
+ });
3881
+ if (response.ok) {
3882
+ yield* parseOpenAISseStream(response);
3883
+ return;
3884
+ }
3885
+ lastStatus = response.status;
3886
+ lastErrText = await response.text().catch(() => "");
3887
+ const isRetriable = response.status === 429 || response.status >= 500;
3888
+ const isLastAttempt = attempt >= maxAttempts - 1;
3889
+ if (!isRetriable || isLastAttempt) {
3890
+ if (response.status === 404 && lastErrText.includes("model_not_found")) {
3891
+ getLogger().error(
3892
+ `Cerebras: model "${this.model}" not available on your tier. Override via \`new CerebrasLLM({ model: '<id>' })\` and list tier-available ids with \`GET ${this.baseUrl}/models\` (common: llama3.1-8b, qwen-3-235b-a22b-instruct-2507, llama-3.3-70b on paid). Raw response: ${lastErrText}`
3893
+ );
3894
+ } else {
3895
+ getLogger().error(`Cerebras API error: ${response.status} ${lastErrText}`);
3896
+ }
3897
+ return;
3898
+ }
3899
+ const advisoryMs = parseRateLimitResetMs(response.headers);
3900
+ const exponentialMs = RETRY_BACKOFF_BASE_MS * Math.pow(2, attempt);
3901
+ const delayMs = Math.max(advisoryMs, exponentialMs);
3902
+ getLogger().warn(
3903
+ `Cerebras API ${response.status} (attempt ${attempt + 1}/${maxAttempts}); retrying after ${delayMs}ms`
3904
+ );
3905
+ await new Promise((r) => setTimeout(r, delayMs));
3235
3906
  }
3236
- yield* parseOpenAISseStream(response);
3907
+ throw new PatterError(`Cerebras API error ${lastStatus}: ${lastErrText || "request failed"}`);
3237
3908
  }
3238
3909
  };
3239
3910
  async function gzipEncode(data) {
@@ -3260,9 +3931,28 @@ async function gzipEncode(data) {
3260
3931
  }
3261
3932
  return out;
3262
3933
  }
3934
+ function parseRateLimitResetMs(headers) {
3935
+ const candidates = [
3936
+ headers.get("x-ratelimit-reset-tokens-minute"),
3937
+ headers.get("x-ratelimit-reset-requests-minute"),
3938
+ // Some upstreams send the standard ``retry-after`` (seconds).
3939
+ headers.get("retry-after")
3940
+ ];
3941
+ let bestMs = 0;
3942
+ for (const raw of candidates) {
3943
+ if (!raw) continue;
3944
+ const parsed = Number.parseFloat(raw);
3945
+ if (Number.isFinite(parsed) && parsed > 0) {
3946
+ const ms = parsed * 1e3;
3947
+ if (ms > bestMs) bestMs = ms;
3948
+ }
3949
+ }
3950
+ return bestMs;
3951
+ }
3263
3952
 
3264
3953
  // src/llm/cerebras.ts
3265
3954
  var LLM4 = class extends CerebrasLLMProvider {
3955
+ static providerKey = "cerebras";
3266
3956
  constructor(opts = {}) {
3267
3957
  const key = opts.apiKey ?? process.env.CEREBRAS_API_KEY;
3268
3958
  if (!key) {
@@ -3274,7 +3964,17 @@ var LLM4 = class extends CerebrasLLMProvider {
3274
3964
  apiKey: key,
3275
3965
  model: opts.model,
3276
3966
  baseUrl: opts.baseUrl,
3277
- gzipCompression: opts.gzipCompression
3967
+ gzipCompression: opts.gzipCompression,
3968
+ temperature: opts.temperature,
3969
+ maxTokens: opts.maxTokens,
3970
+ responseFormat: opts.responseFormat,
3971
+ parallelToolCalls: opts.parallelToolCalls,
3972
+ toolChoice: opts.toolChoice,
3973
+ seed: opts.seed,
3974
+ topP: opts.topP,
3975
+ frequencyPenalty: opts.frequencyPenalty,
3976
+ presencePenalty: opts.presencePenalty,
3977
+ stop: opts.stop
3278
3978
  });
3279
3979
  }
3280
3980
  };
@@ -3330,6 +4030,7 @@ var GoogleLLMProvider = class {
3330
4030
  const decoder = new TextDecoder();
3331
4031
  let buffer = "";
3332
4032
  let nextIndex = 0;
4033
+ let lastUsage;
3333
4034
  while (true) {
3334
4035
  const { done, value } = await reader.read();
3335
4036
  if (done) break;
@@ -3347,6 +4048,9 @@ var GoogleLLMProvider = class {
3347
4048
  } catch {
3348
4049
  continue;
3349
4050
  }
4051
+ if (payload.usageMetadata) {
4052
+ lastUsage = payload.usageMetadata;
4053
+ }
3350
4054
  const candidate = payload.candidates?.[0];
3351
4055
  const parts = candidate?.content?.parts ?? [];
3352
4056
  for (const part of parts) {
@@ -3369,6 +4073,14 @@ var GoogleLLMProvider = class {
3369
4073
  }
3370
4074
  }
3371
4075
  }
4076
+ if (lastUsage) {
4077
+ yield {
4078
+ type: "usage",
4079
+ inputTokens: lastUsage.promptTokenCount,
4080
+ outputTokens: lastUsage.candidatesTokenCount,
4081
+ cacheReadInputTokens: lastUsage.cachedContentTokenCount ?? 0
4082
+ };
4083
+ }
3372
4084
  yield { type: "done" };
3373
4085
  }
3374
4086
  };
@@ -3458,6 +4170,7 @@ function toGeminiContents(messages) {
3458
4170
 
3459
4171
  // src/llm/google.ts
3460
4172
  var LLM5 = class extends GoogleLLMProvider {
4173
+ static providerKey = "google";
3461
4174
  constructor(opts = {}) {
3462
4175
  const key = opts.apiKey ?? process.env.GEMINI_API_KEY ?? process.env.GOOGLE_API_KEY;
3463
4176
  if (!key) {
@@ -3475,6 +4188,279 @@ var LLM5 = class extends GoogleLLMProvider {
3475
4188
  }
3476
4189
  };
3477
4190
 
4191
+ // src/providers/silero-vad.ts
4192
+ import { createRequire } from "module";
4193
+ import * as fs from "fs";
4194
+ import * as path from "path";
4195
+ import { fileURLToPath } from "url";
4196
+ var SUPPORTED_SAMPLE_RATES = [8e3, 16e3];
4197
+ function resolveModuleDir() {
4198
+ try {
4199
+ const cjsDir = new Function("return typeof __dirname !== 'undefined' ? __dirname : null")();
4200
+ if (typeof cjsDir === "string") return cjsDir;
4201
+ } catch {
4202
+ }
4203
+ try {
4204
+ const url = import.meta.url;
4205
+ if (url) return path.dirname(fileURLToPath(url));
4206
+ } catch {
4207
+ }
4208
+ return process.cwd();
4209
+ }
4210
+ var MODULE_DIR = resolveModuleDir();
4211
+ function resolveDefaultModelPath() {
4212
+ const candidates = [
4213
+ path.join(MODULE_DIR, "resources", "silero_vad.onnx"),
4214
+ path.join(MODULE_DIR, "..", "resources", "silero_vad.onnx")
4215
+ ];
4216
+ for (const c of candidates) if (fs.existsSync(c)) return c;
4217
+ return candidates[0];
4218
+ }
4219
+ var DEFAULT_MODEL_PATH = resolveDefaultModelPath();
4220
+ async function loadOnnxRuntime() {
4221
+ let firstErr;
4222
+ try {
4223
+ const mod = await import("./dist-YRCCJQ26.mjs");
4224
+ return mod;
4225
+ } catch (e) {
4226
+ firstErr = e;
4227
+ }
4228
+ try {
4229
+ const req = createRequire(path.join(process.cwd(), "package.json"));
4230
+ return req("onnxruntime-node");
4231
+ } catch (e) {
4232
+ const detail = e?.message ?? String(e);
4233
+ const original = firstErr?.message ?? String(firstErr);
4234
+ throw new Error(
4235
+ `
4236
+ SileroVAD requires the "onnxruntime-node" package, which could not be resolved.
4237
+
4238
+ Install: npm install onnxruntime-node
4239
+
4240
+ This is an optional peer dependency of getpatter (~210 MB) \u2014 it is only
4241
+ needed when you use SileroVAD in pipeline mode.
4242
+
4243
+ import() failed: ${original}
4244
+ cwd-require failed: ${detail}
4245
+ `
4246
+ );
4247
+ }
4248
+ }
4249
+ var ExpFilter = class {
4250
+ constructor(alpha) {
4251
+ this.alpha = alpha;
4252
+ if (!(alpha > 0 && alpha <= 1)) {
4253
+ throw new Error("alpha must be in (0, 1].");
4254
+ }
4255
+ }
4256
+ filtered = null;
4257
+ apply(exp, sample) {
4258
+ if (this.filtered === null) {
4259
+ this.filtered = sample;
4260
+ } else {
4261
+ const a = Math.pow(this.alpha, exp);
4262
+ this.filtered = a * this.filtered + (1 - a) * sample;
4263
+ }
4264
+ return this.filtered;
4265
+ }
4266
+ reset() {
4267
+ this.filtered = null;
4268
+ }
4269
+ };
4270
+ var OnnxModel = class {
4271
+ constructor(runtime, session, sampleRate) {
4272
+ this.runtime = runtime;
4273
+ this.session = session;
4274
+ if (!SUPPORTED_SAMPLE_RATES.includes(sampleRate)) {
4275
+ throw new Error("Silero VAD only supports 8KHz and 16KHz sample rates");
4276
+ }
4277
+ this.sampleRate = sampleRate;
4278
+ this.windowSizeSamples = sampleRate === 8e3 ? 256 : 512;
4279
+ this.contextSize = sampleRate === 8e3 ? 32 : 64;
4280
+ this.context = new Float32Array(this.contextSize);
4281
+ this.rnnState = new Float32Array(2 * 1 * 128);
4282
+ this.inputBuffer = new Float32Array(this.contextSize + this.windowSizeSamples);
4283
+ this.sampleRateTensor = BigInt64Array.from([BigInt(sampleRate)]);
4284
+ }
4285
+ sampleRate;
4286
+ windowSizeSamples;
4287
+ contextSize;
4288
+ context;
4289
+ rnnState;
4290
+ inputBuffer;
4291
+ sampleRateTensor;
4292
+ async run(window) {
4293
+ if (window.length !== this.windowSizeSamples) {
4294
+ throw new Error(
4295
+ `window must have exactly ${this.windowSizeSamples} samples, got ${window.length}`
4296
+ );
4297
+ }
4298
+ this.inputBuffer.set(this.context, 0);
4299
+ this.inputBuffer.set(window, this.contextSize);
4300
+ const { Tensor } = this.runtime;
4301
+ const feeds = {
4302
+ input: new Tensor("float32", this.inputBuffer, [1, this.inputBuffer.length]),
4303
+ state: new Tensor("float32", this.rnnState, [2, 1, 128]),
4304
+ sr: new Tensor("int64", this.sampleRateTensor, [])
4305
+ };
4306
+ const results = await this.session.run(feeds);
4307
+ const outputKey = Object.keys(results).find((k) => k !== "stateN") ?? "output";
4308
+ const stateKey = "stateN" in results ? "stateN" : Object.keys(results).find((k) => k !== outputKey);
4309
+ const out = results[outputKey];
4310
+ const newState = stateKey ? results[stateKey] : void 0;
4311
+ if (newState && newState.data instanceof Float32Array) {
4312
+ this.rnnState = Float32Array.from(newState.data);
4313
+ }
4314
+ this.context = this.inputBuffer.slice(-this.contextSize);
4315
+ const data = out.data;
4316
+ return data[0] ?? 0;
4317
+ }
4318
+ };
4319
+ var SileroVAD = class _SileroVAD {
4320
+ constructor(model, opts) {
4321
+ this.model = model;
4322
+ this.opts = opts;
4323
+ }
4324
+ pending = new Float32Array(0);
4325
+ expFilter = new ExpFilter(0.35);
4326
+ pubSpeaking = false;
4327
+ speechThresholdDuration = 0;
4328
+ silenceThresholdDuration = 0;
4329
+ closed = false;
4330
+ /**
4331
+ * Load the Silero VAD model. Defaults match the LiveKit Silero plugin.
4332
+ * Throws if `onnxruntime-node` is not installed.
4333
+ */
4334
+ static async load(options = {}) {
4335
+ const sampleRate = options.sampleRate ?? 16e3;
4336
+ if (!SUPPORTED_SAMPLE_RATES.includes(sampleRate)) {
4337
+ throw new Error("Silero VAD only supports 8KHz and 16KHz sample rates");
4338
+ }
4339
+ const activationThreshold = options.activationThreshold ?? 0.5;
4340
+ const deactivationThreshold = options.deactivationThreshold ?? Math.max(activationThreshold - 0.15, 0.01);
4341
+ if (deactivationThreshold <= 0) {
4342
+ throw new Error("deactivationThreshold must be greater than 0");
4343
+ }
4344
+ const runtime = await loadOnnxRuntime();
4345
+ const modelPath = options.onnxFilePath ?? DEFAULT_MODEL_PATH;
4346
+ const session = await runtime.InferenceSession.create(modelPath, {
4347
+ interOpNumThreads: 1,
4348
+ intraOpNumThreads: 1,
4349
+ executionMode: "sequential",
4350
+ executionProviders: options.forceCpu === false ? void 0 : ["cpu"]
4351
+ });
4352
+ const model = new OnnxModel(runtime, session, sampleRate);
4353
+ return new _SileroVAD(model, {
4354
+ minSpeechDuration: options.minSpeechDuration ?? 0.05,
4355
+ minSilenceDuration: options.minSilenceDuration ?? 0.55,
4356
+ prefixPaddingDuration: options.prefixPaddingDuration ?? 0.5,
4357
+ activationThreshold,
4358
+ deactivationThreshold,
4359
+ sampleRate
4360
+ });
4361
+ }
4362
+ /**
4363
+ * Internal factory used by tests — bypasses onnxruntime-node loading.
4364
+ * @internal
4365
+ */
4366
+ static fromOnnxModel(runtime, session, options) {
4367
+ const model = new OnnxModel(runtime, session, options.sampleRate);
4368
+ return new _SileroVAD(model, options);
4369
+ }
4370
+ get sampleRate() {
4371
+ return this.opts.sampleRate;
4372
+ }
4373
+ /**
4374
+ * Number of int16 PCM samples that must be provided per call to
4375
+ * processFrame for the model to run one inference window.
4376
+ *
4377
+ * Constraint (ported from LiveKit Agents / Silero ONNX spec):
4378
+ * - 16 000 Hz → 512 samples (32 ms)
4379
+ * - 8 000 Hz → 256 samples (32 ms)
4380
+ *
4381
+ * Callers that feed raw audio in fixed-size chunks (e.g. WebSocket frames)
4382
+ * should buffer incoming audio until at least numFramesRequired() int16
4383
+ * samples are available before calling processFrame. The provider
4384
+ * internally buffers partial windows so smaller chunks are also safe, but
4385
+ * passing exactly one window per call minimises heap allocation.
4386
+ */
4387
+ numFramesRequired() {
4388
+ return this.opts.sampleRate === 8e3 ? 256 : 512;
4389
+ }
4390
+ async processFrame(pcmChunk, sampleRate) {
4391
+ if (this.closed) {
4392
+ throw new Error("SileroVAD is closed");
4393
+ }
4394
+ if (sampleRate !== this.opts.sampleRate) {
4395
+ throw new Error(
4396
+ `input sampleRate ${sampleRate} does not match model sampleRate ${this.opts.sampleRate}; resampling is not implemented in the Patter port`
4397
+ );
4398
+ }
4399
+ if (pcmChunk.length === 0) {
4400
+ return null;
4401
+ }
4402
+ const numSamples = Math.floor(pcmChunk.length / 2);
4403
+ if (numSamples === 0) {
4404
+ return null;
4405
+ }
4406
+ const samples = new Float32Array(numSamples);
4407
+ for (let i = 0; i < numSamples; i++) {
4408
+ samples[i] = pcmChunk.readInt16LE(i * 2) / 32767;
4409
+ }
4410
+ const merged = new Float32Array(this.pending.length + samples.length);
4411
+ merged.set(this.pending, 0);
4412
+ merged.set(samples, this.pending.length);
4413
+ this.pending = merged;
4414
+ const windowSize = this.model.windowSizeSamples;
4415
+ let event = null;
4416
+ while (this.pending.length >= windowSize) {
4417
+ const window = this.pending.slice(0, windowSize);
4418
+ this.pending = this.pending.slice(windowSize);
4419
+ const rawP = await this.model.run(window);
4420
+ const p = this.expFilter.apply(1, rawP);
4421
+ const windowDuration = windowSize / this.opts.sampleRate;
4422
+ const transition = this.advanceState(p, windowDuration);
4423
+ if (transition !== null) {
4424
+ event = transition;
4425
+ }
4426
+ }
4427
+ return event;
4428
+ }
4429
+ advanceState(p, windowDuration) {
4430
+ const opts = this.opts;
4431
+ if (p >= opts.activationThreshold || this.pubSpeaking && p > opts.deactivationThreshold) {
4432
+ this.speechThresholdDuration += windowDuration;
4433
+ this.silenceThresholdDuration = 0;
4434
+ if (!this.pubSpeaking) {
4435
+ if (this.speechThresholdDuration >= opts.minSpeechDuration) {
4436
+ this.pubSpeaking = true;
4437
+ return {
4438
+ type: "speech_start",
4439
+ confidence: p,
4440
+ durationMs: this.speechThresholdDuration * 1e3
4441
+ };
4442
+ }
4443
+ }
4444
+ } else {
4445
+ this.silenceThresholdDuration += windowDuration;
4446
+ this.speechThresholdDuration = 0;
4447
+ if (this.pubSpeaking && this.silenceThresholdDuration >= opts.minSilenceDuration) {
4448
+ this.pubSpeaking = false;
4449
+ return {
4450
+ type: "speech_end",
4451
+ confidence: p,
4452
+ durationMs: this.silenceThresholdDuration * 1e3
4453
+ };
4454
+ }
4455
+ }
4456
+ return null;
4457
+ }
4458
+ async close() {
4459
+ if (this.closed) return;
4460
+ this.closed = true;
4461
+ }
4462
+ };
4463
+
3478
4464
  // src/carriers/twilio.ts
3479
4465
  var Carrier = class {
3480
4466
  kind = "twilio";
@@ -3823,7 +4809,7 @@ var DebouncedCall = class {
3823
4809
  this.timer = setTimeout(() => {
3824
4810
  this.timer = null;
3825
4811
  Promise.resolve(this.callback()).catch((err) => {
3826
- console.error("IVR silence callback raised:", err);
4812
+ getLogger().error("IVR silence callback raised:", err);
3827
4813
  });
3828
4814
  }, this.delayMs);
3829
4815
  }
@@ -3879,7 +4865,7 @@ var IVRActivity = class {
3879
4865
  try {
3880
4866
  await this.onLoopDetected();
3881
4867
  } catch (err) {
3882
- console.error("IVR onLoopDetected callback raised:", err);
4868
+ getLogger().error("IVR onLoopDetected callback raised:", err);
3883
4869
  }
3884
4870
  }
3885
4871
  }
@@ -3917,7 +4903,7 @@ var IVRActivity = class {
3917
4903
  try {
3918
4904
  await this.onSilence();
3919
4905
  } catch (err) {
3920
- console.error("IVR onSilence callback raised:", err);
4906
+ getLogger().error("IVR onSilence callback raised:", err);
3921
4907
  }
3922
4908
  }
3923
4909
  }
@@ -3967,9 +4953,9 @@ var IVRActivity = class {
3967
4953
  };
3968
4954
 
3969
4955
  // src/services/background-audio.ts
3970
- import { promises as fs } from "fs";
3971
- import path from "path";
3972
- import { fileURLToPath } from "url";
4956
+ import { promises as fs2 } from "fs";
4957
+ import path2 from "path";
4958
+ import { fileURLToPath as fileURLToPath2 } from "url";
3973
4959
  var BuiltinAudioClip = {
3974
4960
  CITY_AMBIENCE: "city-ambience.ogg",
3975
4961
  FOREST_AMBIENCE: "forest-ambience.ogg",
@@ -3981,8 +4967,8 @@ var BuiltinAudioClip = {
3981
4967
  };
3982
4968
  function builtinClipPath(clip) {
3983
4969
  const meta = typeof import.meta !== "undefined" ? import.meta : void 0;
3984
- const here = meta?.url ? path.dirname(fileURLToPath(meta.url)) : typeof __dirname !== "undefined" ? __dirname : process.cwd();
3985
- return path.resolve(here, "..", "resources", "audio", clip);
4970
+ const here = meta?.url ? path2.dirname(fileURLToPath2(meta.url)) : typeof __dirname !== "undefined" ? __dirname : process.cwd();
4971
+ return path2.resolve(here, "..", "resources", "audio", clip);
3986
4972
  }
3987
4973
  var INT16_MIN = -32768;
3988
4974
  var INT16_MAX = 32767;
@@ -4151,7 +5137,7 @@ var BackgroundAudioPlayer = class {
4151
5137
  return source.decode(source.path);
4152
5138
  case "builtin": {
4153
5139
  const p = builtinClipPath(source.clip);
4154
- const header = await fs.readFile(p, { flag: "r" }).then((buf) => buf.subarray(0, 4));
5140
+ const header = await fs2.readFile(p, { flag: "r" }).then((buf) => buf.subarray(0, 4));
4155
5141
  if (header.toString("ascii") !== "OggS") {
4156
5142
  throw new Error(`Bundled clip ${source.clip} is not a valid Ogg file`);
4157
5143
  }
@@ -4181,15 +5167,264 @@ var BackgroundAudioPlayer = class {
4181
5167
  function isAudioConfig(value) {
4182
5168
  return typeof value === "object" && value !== null && "source" in value && typeof value.source === "object";
4183
5169
  }
5170
+
5171
+ // src/providers/twilio-adapter.ts
5172
+ var TWILIO_API_BASE = "https://api.twilio.com/2010-04-01";
5173
+ var TwilioAdapter = class _TwilioAdapter {
5174
+ accountSid;
5175
+ region;
5176
+ baseUrl;
5177
+ authHeader;
5178
+ constructor(accountSid, authToken, opts = {}) {
5179
+ if (!accountSid) throw new Error("TwilioAdapter: accountSid is required");
5180
+ if (!authToken) throw new Error("TwilioAdapter: authToken is required");
5181
+ this.accountSid = accountSid;
5182
+ this.region = opts.region;
5183
+ this.baseUrl = opts.region ? `https://api.${opts.region}.twilio.com/2010-04-01` : TWILIO_API_BASE;
5184
+ this.authHeader = `Basic ${Buffer.from(`${accountSid}:${authToken}`).toString("base64")}`;
5185
+ }
5186
+ async request(method, path3, body) {
5187
+ const url = `${this.baseUrl}/Accounts/${encodeURIComponent(this.accountSid)}${path3}`;
5188
+ const headers = { Authorization: this.authHeader };
5189
+ if (body) headers["Content-Type"] = "application/x-www-form-urlencoded";
5190
+ const response = await fetch(url, {
5191
+ method,
5192
+ headers,
5193
+ body: body ? body.toString() : void 0,
5194
+ signal: AbortSignal.timeout(3e4)
5195
+ });
5196
+ const text = await response.text();
5197
+ if (!response.ok) {
5198
+ throw new Error(`Twilio ${method} ${path3} failed: ${response.status} ${text}`);
5199
+ }
5200
+ if (!text) return {};
5201
+ try {
5202
+ return JSON.parse(text);
5203
+ } catch (e) {
5204
+ throw new Error(`Twilio returned non-JSON response: ${String(e)}`);
5205
+ }
5206
+ }
5207
+ /**
5208
+ * Provision a local phone number in the given country.
5209
+ *
5210
+ * Lists available local numbers, then purchases the first match.
5211
+ */
5212
+ async provisionNumber(opts) {
5213
+ const country = encodeURIComponent(opts.countryCode);
5214
+ const queryParts = ["PageSize=1"];
5215
+ if (opts.areaCode) queryParts.push(`AreaCode=${encodeURIComponent(opts.areaCode)}`);
5216
+ const path3 = `/AvailablePhoneNumbers/${country}/Local.json?${queryParts.join("&")}`;
5217
+ const available = await this.request("GET", path3);
5218
+ const first = available.available_phone_numbers?.[0]?.phone_number;
5219
+ if (!first) {
5220
+ throw new Error(`TwilioAdapter: no numbers available for country ${opts.countryCode}`);
5221
+ }
5222
+ const body = new URLSearchParams({ PhoneNumber: first });
5223
+ const purchased = await this.request(
5224
+ "POST",
5225
+ "/IncomingPhoneNumbers.json",
5226
+ body
5227
+ );
5228
+ if (!purchased.sid || !purchased.phone_number) {
5229
+ throw new Error("TwilioAdapter: malformed response from IncomingPhoneNumbers.create");
5230
+ }
5231
+ return { phoneNumber: purchased.phone_number, sid: purchased.sid };
5232
+ }
5233
+ /** Update an already-purchased number to point at our voice webhook. */
5234
+ async configureNumber(phoneNumberSid, opts) {
5235
+ if (!phoneNumberSid) throw new Error("TwilioAdapter: phoneNumberSid is required");
5236
+ const body = new URLSearchParams({
5237
+ VoiceUrl: opts.voiceUrl,
5238
+ VoiceMethod: "POST"
5239
+ });
5240
+ if (opts.statusCallback) body.set("StatusCallback", opts.statusCallback);
5241
+ await this.request(
5242
+ "POST",
5243
+ `/IncomingPhoneNumbers/${encodeURIComponent(phoneNumberSid)}.json`,
5244
+ body
5245
+ );
5246
+ }
5247
+ /** Place an outbound call. Returns the Twilio call SID. */
5248
+ async initiateCall(opts) {
5249
+ if (!opts.url && !opts.streamUrl) {
5250
+ throw new Error("TwilioAdapter: initiateCall requires either url or streamUrl");
5251
+ }
5252
+ const body = new URLSearchParams({
5253
+ From: opts.from,
5254
+ To: opts.to
5255
+ });
5256
+ if (opts.url) {
5257
+ body.set("Url", opts.url);
5258
+ } else if (opts.streamUrl) {
5259
+ body.set("Twiml", _TwilioAdapter.generateStreamTwiml(opts.streamUrl));
5260
+ }
5261
+ if (opts.statusCallback) body.set("StatusCallback", opts.statusCallback);
5262
+ if (opts.machineDetection) body.set("MachineDetection", opts.machineDetection);
5263
+ if (opts.extraParams) {
5264
+ for (const [key, value] of Object.entries(opts.extraParams)) {
5265
+ body.set(key, value);
5266
+ }
5267
+ }
5268
+ const call = await this.request("POST", "/Calls.json", body);
5269
+ if (!call.sid) {
5270
+ throw new Error("TwilioAdapter: Calls.create returned no SID");
5271
+ }
5272
+ return { callSid: call.sid };
5273
+ }
5274
+ /**
5275
+ * Build a minimal ``<Response><Connect><Stream url="..."/></Connect></Response>``
5276
+ * TwiML document. Mirrors the Python adapter's ``generate_stream_twiml``.
5277
+ */
5278
+ static generateStreamTwiml(streamUrl) {
5279
+ const escaped = streamUrl.replace(/&/g, "&amp;").replace(/</g, "&lt;").replace(/>/g, "&gt;").replace(/"/g, "&quot;").replace(/'/g, "&apos;");
5280
+ return `<?xml version="1.0" encoding="UTF-8"?><Response><Connect><Stream url="${escaped}"/></Connect></Response>`;
5281
+ }
5282
+ /** Force-complete an in-progress call. */
5283
+ async endCall(callSid) {
5284
+ if (!callSid) throw new Error("TwilioAdapter: callSid is required");
5285
+ const body = new URLSearchParams({ Status: "completed" });
5286
+ try {
5287
+ await this.request(
5288
+ "POST",
5289
+ `/Calls/${encodeURIComponent(callSid)}.json`,
5290
+ body
5291
+ );
5292
+ } catch (err) {
5293
+ getLogger().warn(`[TwilioAdapter] endCall failed for ${callSid}: ${String(err)}`);
5294
+ throw err;
5295
+ }
5296
+ }
5297
+ };
5298
+
5299
+ // src/providers/telnyx-adapter.ts
5300
+ import { randomUUID as randomUUID2 } from "crypto";
5301
+ var TELNYX_API_BASE = "https://api.telnyx.com/v2";
5302
+ var TelnyxAdapter = class {
5303
+ apiKey;
5304
+ connectionId;
5305
+ baseUrl = TELNYX_API_BASE;
5306
+ constructor(apiKey, connectionId) {
5307
+ if (!apiKey) throw new Error("TelnyxAdapter: apiKey is required");
5308
+ this.apiKey = apiKey;
5309
+ this.connectionId = connectionId;
5310
+ }
5311
+ async request(method, path3, body) {
5312
+ const url = `${this.baseUrl}${path3}`;
5313
+ const headers = {
5314
+ Authorization: `Bearer ${this.apiKey}`
5315
+ };
5316
+ if (body !== void 0) headers["Content-Type"] = "application/json";
5317
+ const response = await fetch(url, {
5318
+ method,
5319
+ headers,
5320
+ body: body !== void 0 ? JSON.stringify(body) : void 0,
5321
+ signal: AbortSignal.timeout(3e4)
5322
+ });
5323
+ const text = await response.text();
5324
+ if (!response.ok) {
5325
+ throw new Error(`Telnyx ${method} ${path3} failed: ${response.status} ${text}`);
5326
+ }
5327
+ if (!text) return {};
5328
+ try {
5329
+ return JSON.parse(text);
5330
+ } catch (e) {
5331
+ throw new Error(`Telnyx returned non-JSON response: ${String(e)}`);
5332
+ }
5333
+ }
5334
+ /**
5335
+ * Search available numbers for ``countryCode`` and place an order for the
5336
+ * first match. Returns both the reserved E.164 number and the order ID.
5337
+ */
5338
+ async provisionNumber(opts) {
5339
+ const country = encodeURIComponent(opts.countryCode);
5340
+ const searchPath = `/available_phone_numbers?filter[phone_number][country_code]=${country}&filter[limit]=1`;
5341
+ const available = await this.request("GET", searchPath);
5342
+ const chosen = available.data?.[0]?.phone_number;
5343
+ if (!chosen) {
5344
+ throw new Error(`TelnyxAdapter: no numbers available for ${opts.countryCode}`);
5345
+ }
5346
+ const orderBody = {
5347
+ phone_numbers: [{ phone_number: chosen }]
5348
+ };
5349
+ if (this.connectionId) {
5350
+ orderBody.connection_id = this.connectionId;
5351
+ }
5352
+ const order = await this.request(
5353
+ "POST",
5354
+ "/number_orders",
5355
+ orderBody
5356
+ );
5357
+ const orderId = order.data?.id ?? "";
5358
+ return { phoneNumber: chosen, orderId };
5359
+ }
5360
+ /** Attach a number to a Call Control Application. */
5361
+ async configureNumber(phoneNumber, opts) {
5362
+ if (!phoneNumber) throw new Error("TelnyxAdapter: phoneNumber is required");
5363
+ if (!opts.connectionId) throw new Error("TelnyxAdapter: connectionId is required");
5364
+ await this.request(
5365
+ "PATCH",
5366
+ `/phone_numbers/${encodeURIComponent(phoneNumber)}/voice`,
5367
+ { connection_id: opts.connectionId, tech_prefix_enabled: false }
5368
+ );
5369
+ }
5370
+ /**
5371
+ * Place an outbound call on the Call Control Application.
5372
+ *
5373
+ * Note: we intentionally do NOT pass ``stream_url`` here — audio streaming
5374
+ * is configured on the Application itself (or started explicitly via a
5375
+ * ``streaming_start`` command). Passing ``stream_url`` on dial is a
5376
+ * deprecated code path that Telnyx rejects in newer API versions.
5377
+ */
5378
+ async initiateCall(opts) {
5379
+ const connectionId = opts.connectionId ?? this.connectionId;
5380
+ if (!connectionId) {
5381
+ throw new Error("TelnyxAdapter: connectionId must be provided to initiateCall");
5382
+ }
5383
+ const payload = {
5384
+ connection_id: connectionId,
5385
+ from: opts.from,
5386
+ to: opts.to
5387
+ };
5388
+ if (opts.clientState) {
5389
+ payload.client_state = Buffer.from(opts.clientState, "utf-8").toString("base64");
5390
+ }
5391
+ const resp = await this.request("POST", "/calls", payload);
5392
+ const callControlId = resp.data?.call_control_id;
5393
+ if (!callControlId) {
5394
+ throw new Error("TelnyxAdapter: /calls returned no call_control_id");
5395
+ }
5396
+ return { callControlId };
5397
+ }
5398
+ /** Hang up an in-progress call. */
5399
+ async endCall(callControlId, opts = {}) {
5400
+ if (!callControlId) throw new Error("TelnyxAdapter: callControlId is required");
5401
+ const encoded = encodeURIComponent(callControlId);
5402
+ const body = {
5403
+ command_id: opts.commandId ?? randomUUID2()
5404
+ };
5405
+ try {
5406
+ await this.request(
5407
+ "POST",
5408
+ `/calls/${encoded}/actions/hangup`,
5409
+ body
5410
+ );
5411
+ } catch (err) {
5412
+ getLogger().warn(
5413
+ `[TelnyxAdapter] endCall failed for ${callControlId}: ${String(err)}`
5414
+ );
5415
+ throw err;
5416
+ }
5417
+ }
5418
+ };
4184
5419
  export {
4185
5420
  AllProvidersFailedError,
4186
5421
  LLM2 as AnthropicLLM,
4187
- STT5 as AssemblyAISTT,
5422
+ STT6 as AssemblyAISTT,
4188
5423
  AuthenticationError,
4189
5424
  BackgroundAudioPlayer,
4190
5425
  BuiltinAudioClip,
4191
5426
  CallMetricsAccumulator,
4192
- STT3 as CartesiaSTT,
5427
+ STT4 as CartesiaSTT,
4193
5428
  TTS3 as CartesiaTTS,
4194
5429
  LLM4 as CerebrasLLM,
4195
5430
  ChatContext,
@@ -4198,9 +5433,11 @@ export {
4198
5433
  DEFAULT_PRICING,
4199
5434
  DTMF_EVENTS,
4200
5435
  STT as DeepgramSTT,
5436
+ DefaultToolExecutor,
4201
5437
  ConvAI as ElevenLabsConvAI,
4202
5438
  ElevenLabsConvAIAdapter,
4203
5439
  TTS as ElevenLabsTTS,
5440
+ EventBus,
4204
5441
  FallbackLLMProvider,
4205
5442
  GEMINI_DEFAULT_INPUT_SR,
4206
5443
  GEMINI_DEFAULT_OUTPUT_SR,
@@ -4212,31 +5449,48 @@ export {
4212
5449
  LLMLoop,
4213
5450
  TTS5 as LMNTTTS,
4214
5451
  MetricsStore,
5452
+ Ngrok,
4215
5453
  LLM as OpenAILLM,
4216
5454
  OpenAILLMProvider,
4217
5455
  Realtime as OpenAIRealtime,
4218
5456
  OpenAIRealtimeAdapter,
4219
5457
  TTS2 as OpenAITTS,
5458
+ STT3 as OpenAITranscribeSTT,
4220
5459
  PartialStreamError,
4221
5460
  Patter,
4222
5461
  PatterConnectionError,
4223
5462
  PatterError,
5463
+ PatterTool,
5464
+ PcmCarry,
4224
5465
  PipelineHookExecutor,
4225
5466
  ProvisionError,
5467
+ RateLimitError,
4226
5468
  RemoteMessageHandler,
4227
5469
  TTS4 as RimeTTS,
5470
+ SPAN_BARGEIN,
5471
+ SPAN_CALL,
5472
+ SPAN_ENDPOINT,
5473
+ SPAN_LLM,
5474
+ SPAN_STT,
5475
+ SPAN_TOOL,
5476
+ SPAN_TTS,
4228
5477
  SentenceChunker,
4229
- STT4 as SonioxSTT,
5478
+ SileroVAD,
5479
+ STT5 as SonioxSTT,
5480
+ StatefulResampler,
4230
5481
  Static as StaticTunnel,
4231
5482
  Carrier2 as Telnyx,
5483
+ TelnyxAdapter,
4232
5484
  TestSession,
4233
5485
  TfidfLoopDetector,
4234
5486
  Tool,
4235
5487
  Carrier as Twilio,
5488
+ TwilioAdapter,
4236
5489
  ULTRAVOX_DEFAULT_API_BASE,
4237
5490
  ULTRAVOX_DEFAULT_SR,
4238
5491
  UltravoxRealtimeAdapter,
4239
5492
  STT2 as WhisperSTT,
5493
+ assemblyai,
4240
5494
  builtinClipPath,
4241
5495
  calculateRealtimeCost,
4242
5496
  calculateSttCost,
@@ -4244,6 +5498,10 @@ export {
4244
5498
  calculateTtsCost,
4245
5499
  callsToCsv,
4246
5500
  callsToJson,
5501
+ cartesia,
5502
+ createResampler16kTo8k,
5503
+ createResampler24kTo16k,
5504
+ createResampler8kTo16k,
4247
5505
  deepgram,
4248
5506
  defineTool,
4249
5507
  elevenlabs,
@@ -4251,10 +5509,14 @@ export {
4251
5509
  filterForTTS,
4252
5510
  filterMarkdown,
4253
5511
  formatDtmf,
5512
+ geminiLive,
4254
5513
  getLogger,
4255
5514
  guardrail,
5515
+ initTracing,
4256
5516
  isRemoteUrl,
5517
+ isTracingEnabled,
4257
5518
  isWebSocketUrl,
5519
+ lmnt,
4258
5520
  makeAuthMiddleware,
4259
5521
  mergePricing,
4260
5522
  mixPcm,
@@ -4268,12 +5530,17 @@ export {
4268
5530
  resample24kTo16k,
4269
5531
  resample8kTo16k,
4270
5532
  resamplePcm,
5533
+ rime,
4271
5534
  scheduleCron,
4272
5535
  scheduleInterval,
4273
5536
  scheduleOnce,
4274
5537
  selectSoundFromList,
4275
5538
  setLogger,
5539
+ soniox,
5540
+ speechmatics,
5541
+ startSpan,
4276
5542
  startTunnel,
4277
5543
  tool,
5544
+ ultravox,
4278
5545
  whisper
4279
5546
  };