getpatter 0.5.2 → 0.5.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs CHANGED
@@ -3,21 +3,37 @@ import {
3
3
  } from "./chunk-AFUYSNDH.mjs";
4
4
  import {
5
5
  startTunnel
6
- } from "./chunk-AKQFOFLG.mjs";
6
+ } from "./chunk-SEMKNPCD.mjs";
7
7
  import {
8
+ AuthenticationError,
8
9
  CallMetricsAccumulator,
9
10
  DEFAULT_MIN_SENTENCE_LEN,
10
11
  DEFAULT_PRICING,
11
12
  DeepgramSTT,
13
+ DefaultToolExecutor,
12
14
  ElevenLabsConvAIAdapter,
13
15
  EmbeddedServer,
16
+ EventBus,
14
17
  LLMLoop,
15
18
  MetricsStore,
16
19
  OpenAILLMProvider,
17
20
  OpenAIRealtimeAdapter,
21
+ PatterConnectionError,
22
+ PatterError,
23
+ PcmCarry,
18
24
  PipelineHookExecutor,
25
+ ProvisionError,
26
+ RateLimitError,
19
27
  RemoteMessageHandler,
28
+ SPAN_BARGEIN,
29
+ SPAN_CALL,
30
+ SPAN_ENDPOINT,
31
+ SPAN_LLM,
32
+ SPAN_STT,
33
+ SPAN_TOOL,
34
+ SPAN_TTS,
20
35
  SentenceChunker,
36
+ StatefulResampler,
21
37
  TestSession,
22
38
  calculateRealtimeCost,
23
39
  calculateSttCost,
@@ -25,7 +41,12 @@ import {
25
41
  calculateTtsCost,
26
42
  callsToCsv,
27
43
  callsToJson,
44
+ createResampler16kTo8k,
45
+ createResampler24kTo16k,
46
+ createResampler8kTo16k,
47
+ initTracing,
28
48
  isRemoteUrl,
49
+ isTracingEnabled,
29
50
  isWebSocketUrl,
30
51
  makeAuthMiddleware,
31
52
  mergePricing,
@@ -35,153 +56,14 @@ import {
35
56
  pcm16ToMulaw,
36
57
  resample16kTo8k,
37
58
  resample24kTo16k,
38
- resample8kTo16k
39
- } from "./chunk-7SDDK2AO.mjs";
59
+ resample8kTo16k,
60
+ startSpan
61
+ } from "./chunk-FIFIWBL7.mjs";
40
62
  import {
41
63
  getLogger,
42
64
  setLogger
43
- } from "./chunk-FMNRCP5X.mjs";
44
- import "./chunk-OOIUSZB4.mjs";
45
-
46
- // src/connection.ts
47
- import WebSocket from "ws";
48
-
49
- // src/errors.ts
50
- var PatterError = class extends Error {
51
- constructor(message) {
52
- super(message);
53
- this.name = "PatterError";
54
- }
55
- };
56
- var PatterConnectionError = class extends PatterError {
57
- constructor(message) {
58
- super(message);
59
- this.name = "PatterConnectionError";
60
- }
61
- };
62
- var AuthenticationError = class extends PatterError {
63
- constructor(message) {
64
- super(message);
65
- this.name = "AuthenticationError";
66
- }
67
- };
68
- var ProvisionError = class extends PatterError {
69
- constructor(message) {
70
- super(message);
71
- this.name = "ProvisionError";
72
- }
73
- };
74
-
75
- // src/connection.ts
76
- var DEFAULT_BACKEND_URL = "wss://api.getpatter.com";
77
- var PatterConnection = class {
78
- apiKey;
79
- backendUrl;
80
- wsUrl;
81
- ws = null;
82
- onMessage = null;
83
- onCallStart = null;
84
- onCallEnd = null;
85
- constructor(apiKey, backendUrl = DEFAULT_BACKEND_URL) {
86
- this.apiKey = apiKey;
87
- this.backendUrl = backendUrl.replace(/\/+$/, "");
88
- this.wsUrl = `${this.backendUrl}/ws/sdk`;
89
- }
90
- get isConnected() {
91
- return this.ws !== null && this.ws.readyState === WebSocket.OPEN;
92
- }
93
- async connect(options) {
94
- this.onMessage = options.onMessage;
95
- this.onCallStart = options.onCallStart ?? null;
96
- this.onCallEnd = options.onCallEnd ?? null;
97
- return new Promise((resolve, reject) => {
98
- this.ws = new WebSocket(this.wsUrl, {
99
- headers: { "X-API-Key": this.apiKey }
100
- });
101
- const onError = (err) => {
102
- this.ws?.off("error", onError);
103
- reject(new PatterConnectionError(`Failed to connect: ${err.message}`));
104
- };
105
- this.ws.once("open", () => {
106
- this.ws?.off("error", onError);
107
- this.setupListeners();
108
- resolve();
109
- });
110
- this.ws.on("error", onError);
111
- });
112
- }
113
- setupListeners() {
114
- if (!this.ws) return;
115
- this.ws.on("error", (err) => {
116
- getLogger().error(`WebSocket error: ${err.message}`);
117
- });
118
- this.ws.on("message", async (data) => {
119
- const raw = data.toString();
120
- let parsed;
121
- try {
122
- parsed = JSON.parse(raw);
123
- } catch {
124
- return;
125
- }
126
- const msgType = parsed.type;
127
- if (msgType === "message" && this.onMessage) {
128
- const msg = {
129
- text: parsed.text,
130
- callId: parsed.call_id,
131
- caller: parsed.caller ?? ""
132
- };
133
- try {
134
- const response = await this.onMessage(msg);
135
- if (response != null) {
136
- await this.sendResponse(msg.callId, response);
137
- }
138
- } catch {
139
- }
140
- } else if (msgType === "call_start" && this.onCallStart) {
141
- await this.onCallStart(parsed);
142
- } else if (msgType === "call_end" && this.onCallEnd) {
143
- await this.onCallEnd(parsed);
144
- }
145
- });
146
- this.ws.on("close", () => {
147
- this.ws = null;
148
- });
149
- }
150
- async sendResponse(callId, text) {
151
- if (!this.ws) throw new PatterConnectionError("Not connected");
152
- this.ws.send(JSON.stringify({ type: "response", call_id: callId, text }));
153
- }
154
- async requestCall(fromNumber, toNumber, firstMessage = "") {
155
- if (!this.ws) throw new PatterConnectionError("Not connected");
156
- this.ws.send(
157
- JSON.stringify({
158
- type: "call",
159
- from: fromNumber,
160
- to: toNumber,
161
- first_message: firstMessage
162
- })
163
- );
164
- }
165
- async disconnect() {
166
- if (this.ws) {
167
- this.ws.close();
168
- this.ws = null;
169
- }
170
- }
171
- parseMessage(raw) {
172
- try {
173
- const data = JSON.parse(raw);
174
- if (data.type !== "message") return null;
175
- return {
176
- text: data.text,
177
- callId: data.call_id,
178
- caller: data.caller ?? ""
179
- };
180
- } catch {
181
- return null;
182
- }
183
- }
184
- };
65
+ } from "./chunk-VJVDG4V5.mjs";
66
+ import "./chunk-QHHBUCMT.mjs";
185
67
 
186
68
  // src/engines/openai.ts
187
69
  var Realtime = class {
@@ -241,86 +123,77 @@ var Static = class {
241
123
  this.hostname = opts.hostname;
242
124
  }
243
125
  };
126
+ var Ngrok = class {
127
+ kind = "ngrok";
128
+ hostname;
129
+ constructor(opts = {}) {
130
+ this.hostname = opts.hostname ?? "";
131
+ }
132
+ /**
133
+ * Returns the configured hostname or throws if the marker was constructed
134
+ * without one. Patter does not start ngrok itself — the user is expected
135
+ * to either supply a hostname or run ngrok out-of-band.
136
+ */
137
+ start() {
138
+ if (!this.hostname) {
139
+ throw new Error(
140
+ 'Ngrok requires a hostname; pass new Ngrok({ hostname: "abc.ngrok.io" })'
141
+ );
142
+ }
143
+ return this.hostname;
144
+ }
145
+ };
244
146
 
245
147
  // src/client.ts
246
- var DEFAULT_BACKEND_URL2 = "wss://api.getpatter.com";
247
- var DEFAULT_REST_URL = "https://api.getpatter.com";
248
- function sttConfigToDict(cfg) {
249
- const out = {
250
- provider: cfg.provider,
251
- api_key: cfg.apiKey,
252
- language: cfg.language
253
- };
254
- if (cfg.options) out.options = { ...cfg.options };
255
- return out;
256
- }
257
- function ttsConfigToDict(cfg) {
258
- const out = {
259
- provider: cfg.provider,
260
- api_key: cfg.apiKey,
261
- voice: cfg.voice
262
- };
263
- if (cfg.options) out.options = { ...cfg.options };
264
- return out;
265
- }
266
148
  var Patter = class {
267
- apiKey;
268
- backendUrl;
269
- restUrl;
270
- connection;
271
- mode;
272
149
  localConfig;
273
150
  embeddedServer = null;
274
151
  tunnelHandle = null;
152
+ /**
153
+ * Live `MetricsStore` for the embedded server. Returns `null` before
154
+ * `serve()` is called. Exposed so integrations like `PatterTool` can
155
+ * subscribe to per-call lifecycle events (`call_initiated`,
156
+ * `call_start`, `call_end`).
157
+ */
158
+ get metricsStore() {
159
+ return this.embeddedServer?.metricsStore ?? null;
160
+ }
275
161
  constructor(options) {
276
- const hasCarrier = "carrier" in options && options.carrier !== void 0;
277
- const isLocal = "mode" in options && options.mode === "local" || hasCarrier;
278
- if (isLocal) {
279
- const local = options;
280
- if (!local.phoneNumber) {
281
- throw new Error("Local mode requires phoneNumber");
282
- }
283
- if (!local.carrier) {
162
+ if (options.apiKey !== void 0) {
163
+ throw new Error(
164
+ "Patter Cloud is not yet available in this SDK release. Use local mode with `carrier:` and `phoneNumber:`. Cloud mode will return in a future release."
165
+ );
166
+ }
167
+ if (!options.phoneNumber) {
168
+ throw new Error("Local mode requires phoneNumber");
169
+ }
170
+ if (!options.carrier) {
171
+ throw new Error(
172
+ "Local mode requires a `carrier` instance. Pass `carrier: new Twilio({...})` or `carrier: new Telnyx({...})`."
173
+ );
174
+ }
175
+ const carrier = options.carrier;
176
+ const tunnel = options.tunnel;
177
+ let tunnelWebhookUrl;
178
+ if (tunnel instanceof Static) {
179
+ if (options.webhookUrl) {
284
180
  throw new Error(
285
- "Local mode requires a `carrier` instance. Pass `carrier: new Twilio({...})` or `carrier: new Telnyx({...})`."
181
+ "Cannot use both `tunnel: new StaticTunnel(...)` and `webhookUrl`. Pick one."
286
182
  );
287
183
  }
288
- const carrier = local.carrier;
289
- const tunnel = local.tunnel;
290
- let tunnelWebhookUrl;
291
- if (tunnel instanceof Static) {
292
- if (local.webhookUrl) {
293
- throw new Error(
294
- "Cannot use both `tunnel: new StaticTunnel(...)` and `webhookUrl`. Pick one."
295
- );
296
- }
297
- tunnelWebhookUrl = tunnel.hostname;
298
- }
299
- this.mode = "local";
300
- const rawWebhook = tunnelWebhookUrl ?? local.webhookUrl;
301
- const normalizedWebhook = rawWebhook ? rawWebhook.replace(/^https?:\/\//, "").replace(/\/$/, "") : void 0;
302
- this.localConfig = {
303
- carrier,
304
- phoneNumber: local.phoneNumber,
305
- webhookUrl: normalizedWebhook,
306
- tunnel: local.tunnel,
307
- openaiKey: local.openaiKey
308
- };
309
- this.apiKey = "";
310
- this.backendUrl = DEFAULT_BACKEND_URL2;
311
- this.restUrl = DEFAULT_REST_URL;
312
- this.connection = new PatterConnection("", DEFAULT_BACKEND_URL2);
313
- } else {
314
- const cloudOpts = options;
315
- this.mode = "cloud";
316
- this.localConfig = null;
317
- this.apiKey = cloudOpts.apiKey;
318
- this.backendUrl = cloudOpts.backendUrl ?? DEFAULT_BACKEND_URL2;
319
- this.restUrl = cloudOpts.restUrl ?? DEFAULT_REST_URL;
320
- this.connection = new PatterConnection(this.apiKey, this.backendUrl);
321
- }
184
+ tunnelWebhookUrl = tunnel.hostname;
185
+ }
186
+ const rawWebhook = tunnelWebhookUrl ?? options.webhookUrl;
187
+ const normalizedWebhook = rawWebhook ? rawWebhook.replace(/^https?:\/\//, "").replace(/\/$/, "") : void 0;
188
+ this.localConfig = {
189
+ carrier,
190
+ phoneNumber: options.phoneNumber,
191
+ webhookUrl: normalizedWebhook,
192
+ tunnel: options.tunnel,
193
+ openaiKey: options.openaiKey
194
+ };
322
195
  }
323
- // === Local mode ===
196
+ // === Agent definition ===
324
197
  agent(opts) {
325
198
  let working = { ...opts };
326
199
  if (opts.engine) {
@@ -337,7 +210,7 @@ var Patter = class {
337
210
  model: working.model ?? engine.model,
338
211
  voice: working.voice ?? engine.voice
339
212
  };
340
- if (this.localConfig && !this.localConfig.openaiKey) {
213
+ if (!this.localConfig.openaiKey) {
341
214
  this.localConfig = { ...this.localConfig, openaiKey: engine.apiKey };
342
215
  }
343
216
  } else if (engine instanceof ConvAI) {
@@ -387,10 +260,8 @@ var Patter = class {
387
260
  }
388
261
  return working;
389
262
  }
263
+ // === Serve / test / call ===
390
264
  async serve(opts) {
391
- if (this.mode !== "local" || !this.localConfig) {
392
- throw new Error("serve() is only available in local mode");
393
- }
394
265
  if (!opts.agent || typeof opts.agent !== "object") {
395
266
  throw new TypeError("agent is required. Use phone.agent() to create one.");
396
267
  }
@@ -415,12 +286,13 @@ var Patter = class {
415
286
  if (wantsCloudflared && webhookUrl) {
416
287
  throw new Error("Cannot use both tunnel: true and webhookUrl. Pick one.");
417
288
  }
418
- const { showBanner } = await import("./banner-FLR2HE5Z.mjs");
289
+ const { showBanner } = await import("./banner-3GNZ6VQK.mjs");
419
290
  showBanner();
420
291
  if (wantsCloudflared) {
421
- const { startTunnel: startTunnel2 } = await import("./tunnel-O7ICMSTP.mjs");
292
+ const { startTunnel: startTunnel2 } = await import("./tunnel-UVR3PPAU.mjs");
422
293
  this.tunnelHandle = await startTunnel2(port);
423
294
  webhookUrl = this.tunnelHandle.hostname;
295
+ this.localConfig = { ...this.localConfig, webhookUrl };
424
296
  }
425
297
  if (!webhookUrl) {
426
298
  throw new Error(
@@ -429,7 +301,7 @@ var Patter = class {
429
301
  }
430
302
  const carrier = this.localConfig.carrier;
431
303
  const telephonyProvider = carrier.kind === "twilio" ? "twilio" : "telnyx";
432
- const { autoConfigureCarrier } = await import("./carrier-config-CPG5CROM.mjs");
304
+ const { autoConfigureCarrier } = await import("./carrier-config-33HQ2W4V.mjs");
433
305
  await autoConfigureCarrier({
434
306
  telephonyProvider,
435
307
  twilioSid: carrier.kind === "twilio" ? carrier.accountSid : void 0,
@@ -466,138 +338,56 @@ var Patter = class {
466
338
  await this.embeddedServer.start(port);
467
339
  }
468
340
  async test(opts) {
469
- if (this.mode !== "local") {
470
- throw new Error("test() is only available in local mode");
471
- }
472
- const { TestSession: TestSession2 } = await import("./test-mode-K2TTPRGE.mjs");
341
+ const { TestSession: TestSession2 } = await import("./test-mode-MVJ3SKG4.mjs");
473
342
  const session = new TestSession2();
474
343
  await session.run({
475
344
  agent: opts.agent,
476
- openaiKey: this.localConfig?.openaiKey,
345
+ openaiKey: this.localConfig.openaiKey,
477
346
  onMessage: typeof opts.onMessage === "function" ? opts.onMessage : void 0,
478
347
  onCallStart: opts.onCallStart,
479
348
  onCallEnd: opts.onCallEnd
480
349
  });
481
350
  }
482
- // === Cloud mode legacy ===
483
- async connect(options) {
484
- if (options.provider && options.providerKey && options.number) {
485
- await this.registerNumber(
486
- options.provider,
487
- options.providerKey,
488
- options.number,
489
- options.providerSecret,
490
- options.country ?? "US",
491
- options.stt,
492
- options.tts
493
- );
494
- }
495
- await this.connection.connect({
496
- onMessage: options.onMessage,
497
- onCallStart: options.onCallStart,
498
- onCallEnd: options.onCallEnd
499
- });
500
- }
501
351
  async call(options) {
502
- if (this.mode === "local") {
503
- const localOpts = options;
504
- if (!localOpts.to) {
505
- throw new Error("'to' phone number is required");
506
- }
507
- if (!localOpts.to.startsWith("+")) {
508
- throw new Error(`'to' must be in E.164 format (e.g., '+1234567890'). Got: '${localOpts.to}'`);
509
- }
510
- if (!this.localConfig) {
511
- throw new Error("local config missing");
512
- }
513
- const { phoneNumber, webhookUrl, carrier } = this.localConfig;
514
- if (carrier.kind === "telnyx") {
515
- const telnyxKey = carrier.apiKey;
516
- const connectionId = carrier.connectionId;
517
- const streamUrl = `wss://${webhookUrl}/ws/stream/${encodeURIComponent(localOpts.to)}?caller=${encodeURIComponent(phoneNumber)}&callee=${encodeURIComponent(localOpts.to)}`;
518
- const telnyxPayload = {
519
- connection_id: connectionId,
520
- from: phoneNumber,
521
- to: localOpts.to,
522
- stream_url: streamUrl,
523
- stream_track: "both_tracks"
524
- };
525
- if (localOpts.ringTimeout !== void 0) {
526
- telnyxPayload.timeout_secs = Math.max(1, Math.floor(localOpts.ringTimeout));
527
- }
528
- const response2 = await fetch("https://api.telnyx.com/v2/calls", {
529
- method: "POST",
530
- headers: {
531
- "Content-Type": "application/json",
532
- Authorization: `Bearer ${telnyxKey}`
533
- },
534
- body: JSON.stringify(telnyxPayload)
535
- });
536
- if (!response2.ok) {
537
- throw new ProvisionError(`Failed to initiate Telnyx call: ${await response2.text()}`);
538
- }
539
- if (this.embeddedServer) {
540
- try {
541
- const body = await response2.clone().json();
542
- const callId = body.data?.call_control_id;
543
- if (callId) {
544
- this.embeddedServer.metricsStore.recordCallInitiated({
545
- call_id: callId,
546
- caller: phoneNumber,
547
- callee: localOpts.to,
548
- direction: "outbound"
549
- });
550
- }
551
- } catch {
552
- }
553
- }
554
- return;
555
- }
556
- const twilioSid = carrier.accountSid;
557
- const twilioToken = carrier.authToken;
558
- const statusCallbackUrl = `https://${webhookUrl}/webhooks/twilio/status`;
559
- const url = `https://api.twilio.com/2010-04-01/Accounts/${twilioSid}/Calls.json`;
560
- const params = new URLSearchParams({
561
- To: localOpts.to,
562
- From: phoneNumber,
563
- Url: `https://${webhookUrl}/webhooks/twilio/voice`,
564
- StatusCallback: statusCallbackUrl,
565
- StatusCallbackMethod: "POST",
566
- // Full lifecycle so the dashboard sees ringing/no-answer/busy/failed
567
- // transitions even when media never arrives.
568
- StatusCallbackEvent: "initiated ringing answered completed"
569
- });
570
- if (localOpts.machineDetection) {
571
- params.append("MachineDetection", "DetectMessageEnd");
572
- params.append("AsyncAmd", "true");
573
- params.append("AsyncAmdStatusCallback", `https://${webhookUrl}/webhooks/twilio/amd`);
574
- }
575
- if (localOpts.ringTimeout !== void 0) {
576
- params.append("Timeout", String(Math.max(1, Math.floor(localOpts.ringTimeout))));
577
- }
578
- if (localOpts.voicemailMessage && this.embeddedServer) {
579
- this.embeddedServer.voicemailMessage = localOpts.voicemailMessage;
352
+ if (!options.to) {
353
+ throw new Error("'to' phone number is required");
354
+ }
355
+ if (!options.to.startsWith("+")) {
356
+ throw new Error(`'to' must be in E.164 format (e.g., '+1234567890'). Got: '${options.to}'`);
357
+ }
358
+ const { phoneNumber, webhookUrl, carrier } = this.localConfig;
359
+ const effectiveRingTimeout = options.ringTimeout === void 0 ? 25 : options.ringTimeout;
360
+ if (carrier.kind === "telnyx") {
361
+ const telnyxKey = carrier.apiKey;
362
+ const connectionId = carrier.connectionId;
363
+ const telnyxPayload = {
364
+ connection_id: connectionId,
365
+ from: phoneNumber,
366
+ to: options.to
367
+ };
368
+ if (effectiveRingTimeout !== null && effectiveRingTimeout !== void 0) {
369
+ telnyxPayload.timeout_secs = Math.max(1, Math.floor(effectiveRingTimeout));
580
370
  }
581
- const response = await fetch(url, {
371
+ const response2 = await fetch("https://api.telnyx.com/v2/calls", {
582
372
  method: "POST",
583
373
  headers: {
584
- "Content-Type": "application/x-www-form-urlencoded",
585
- Authorization: `Basic ${Buffer.from(`${twilioSid}:${twilioToken}`).toString("base64")}`
374
+ "Content-Type": "application/json",
375
+ Authorization: `Bearer ${telnyxKey}`
586
376
  },
587
- body: params.toString()
377
+ body: JSON.stringify(telnyxPayload)
588
378
  });
589
- if (!response.ok) {
590
- throw new ProvisionError(`Failed to initiate call: ${await response.text()}`);
379
+ if (!response2.ok) {
380
+ throw new ProvisionError(`Failed to initiate Telnyx call: ${await response2.text()}`);
591
381
  }
592
382
  if (this.embeddedServer) {
593
383
  try {
594
- const body = await response.clone().json();
595
- const callSid = body.sid;
596
- if (callSid) {
384
+ const body = await response2.clone().json();
385
+ const callId = body.data?.call_control_id;
386
+ if (callId) {
597
387
  this.embeddedServer.metricsStore.recordCallInitiated({
598
- call_id: callSid,
388
+ call_id: callId,
599
389
  caller: phoneNumber,
600
- callee: localOpts.to,
390
+ callee: options.to,
601
391
  direction: "outbound"
602
392
  });
603
393
  }
@@ -606,21 +396,59 @@ var Patter = class {
606
396
  }
607
397
  return;
608
398
  }
609
- const cloudOpts = options;
610
- if (!this.connection.isConnected) {
611
- if (cloudOpts.onMessage) {
612
- await this.connection.connect({ onMessage: cloudOpts.onMessage });
613
- } else {
614
- throw new PatterConnectionError(
615
- "Not connected. Call connect() first or pass onMessage."
616
- );
399
+ const twilioSid = carrier.accountSid;
400
+ const twilioToken = carrier.authToken;
401
+ const statusCallbackUrl = `https://${webhookUrl}/webhooks/twilio/status`;
402
+ const url = `https://api.twilio.com/2010-04-01/Accounts/${twilioSid}/Calls.json`;
403
+ const streamUrl = `wss://${webhookUrl}/ws/stream/outbound`;
404
+ const inlineTwiml = `<?xml version="1.0" encoding="UTF-8"?><Response><Connect><Stream url="${streamUrl}"/></Connect></Response>`;
405
+ const params = new URLSearchParams({
406
+ To: options.to,
407
+ From: phoneNumber,
408
+ Twiml: inlineTwiml,
409
+ StatusCallback: statusCallbackUrl,
410
+ StatusCallbackMethod: "POST",
411
+ // Full lifecycle so the dashboard sees ringing/no-answer/busy/failed
412
+ // transitions even when media never arrives.
413
+ StatusCallbackEvent: "initiated ringing answered completed"
414
+ });
415
+ if (options.machineDetection) {
416
+ params.append("MachineDetection", "DetectMessageEnd");
417
+ params.append("AsyncAmd", "true");
418
+ params.append("AsyncAmdStatusCallback", `https://${webhookUrl}/webhooks/twilio/amd`);
419
+ }
420
+ if (effectiveRingTimeout !== null && effectiveRingTimeout !== void 0) {
421
+ params.append("Timeout", String(Math.max(1, Math.floor(effectiveRingTimeout))));
422
+ }
423
+ if (options.voicemailMessage && this.embeddedServer) {
424
+ this.embeddedServer.voicemailMessage = options.voicemailMessage;
425
+ }
426
+ const response = await fetch(url, {
427
+ method: "POST",
428
+ headers: {
429
+ "Content-Type": "application/x-www-form-urlencoded",
430
+ Authorization: `Basic ${Buffer.from(`${twilioSid}:${twilioToken}`).toString("base64")}`
431
+ },
432
+ body: params.toString()
433
+ });
434
+ if (!response.ok) {
435
+ throw new ProvisionError(`Failed to initiate call: ${await response.text()}`);
436
+ }
437
+ if (this.embeddedServer) {
438
+ try {
439
+ const body = await response.clone().json();
440
+ const callSid = body.sid;
441
+ if (callSid) {
442
+ this.embeddedServer.metricsStore.recordCallInitiated({
443
+ call_id: callSid,
444
+ caller: phoneNumber,
445
+ callee: options.to,
446
+ direction: "outbound"
447
+ });
448
+ }
449
+ } catch {
617
450
  }
618
451
  }
619
- await this.connection.requestCall(
620
- cloudOpts.fromNumber ?? "",
621
- cloudOpts.to,
622
- cloudOpts.firstMessage ?? ""
623
- );
624
452
  }
625
453
  async disconnect() {
626
454
  if (this.tunnelHandle) {
@@ -631,86 +459,6 @@ var Patter = class {
631
459
  await this.embeddedServer.stop();
632
460
  this.embeddedServer = null;
633
461
  }
634
- await this.connection.disconnect();
635
- }
636
- // === Agent Management ===
637
- async createAgent(opts) {
638
- const response = await fetch(`${this.restUrl}/api/agents`, {
639
- method: "POST",
640
- headers: { "Content-Type": "application/json", "X-API-Key": this.apiKey },
641
- body: JSON.stringify({
642
- name: opts.name,
643
- system_prompt: opts.systemPrompt,
644
- model: opts.model ?? "gpt-4o-mini-realtime-preview",
645
- voice: opts.voice ?? "alloy",
646
- voice_provider: opts.voiceProvider ?? "openai",
647
- language: opts.language ?? "en",
648
- first_message: opts.firstMessage ?? null,
649
- tools: opts.tools?.map((t) => ({ name: t.name, description: t.description, parameters: t.parameters, webhook_url: t.webhookUrl })) ?? null
650
- })
651
- });
652
- if (response.status !== 201) throw new ProvisionError(`Failed to create agent: ${await response.text()}`);
653
- const data = await response.json();
654
- return { id: data.id, name: data.name, systemPrompt: data.system_prompt, model: data.model, voice: data.voice, voiceProvider: data.voice_provider, language: data.language, firstMessage: data.first_message, tools: data.tools };
655
- }
656
- async listAgents() {
657
- const response = await fetch(`${this.restUrl}/api/agents`, { headers: { "X-API-Key": this.apiKey } });
658
- if (!response.ok) throw new ProvisionError(`Failed to list agents: ${response.status}`);
659
- const data = await response.json();
660
- return data.map((a) => ({ id: a.id, name: a.name, systemPrompt: a.system_prompt, model: a.model, voice: a.voice, voiceProvider: a.voice_provider, language: a.language, firstMessage: a.first_message, tools: a.tools }));
661
- }
662
- async buyNumber(opts = {}) {
663
- const response = await fetch(`${this.restUrl}/api/numbers/buy`, {
664
- method: "POST",
665
- headers: { "Content-Type": "application/json", "X-API-Key": this.apiKey },
666
- body: JSON.stringify({ country: opts.country ?? "US", provider: opts.provider ?? "twilio" })
667
- });
668
- if (response.status !== 201) throw new ProvisionError(`Failed to buy number: ${await response.text()}`);
669
- const data = await response.json();
670
- return { id: data.id, number: data.number, provider: data.provider, country: data.country, status: data.status, agentId: data.agent_id };
671
- }
672
- async assignAgent(numberId, agentId) {
673
- const response = await fetch(`${this.restUrl}/api/phone-numbers/${numberId}/assign-agent`, {
674
- method: "POST",
675
- headers: { "Content-Type": "application/json", "X-API-Key": this.apiKey },
676
- body: JSON.stringify({ agent_id: agentId })
677
- });
678
- if (response.status !== 200) throw new ProvisionError(`Failed to assign agent: ${await response.text()}`);
679
- }
680
- async listCalls(limit = 50) {
681
- if (!Number.isInteger(limit) || limit < 1 || limit > 1e3) {
682
- throw new RangeError(`limit must be an integer between 1 and 1000, got ${limit}`);
683
- }
684
- const response = await fetch(`${this.restUrl}/api/calls?limit=${limit}`, { headers: { "X-API-Key": this.apiKey } });
685
- if (!response.ok) throw new ProvisionError(`Failed to list calls: ${response.status}`);
686
- const data = await response.json();
687
- return data.map((c) => ({ id: c.id, direction: c.direction, caller: c.caller, callee: c.callee, startedAt: c.started_at, endedAt: c.ended_at, durationSeconds: c.duration_seconds, status: c.status, transcript: c.transcript }));
688
- }
689
- // Internal
690
- async registerNumber(provider, providerKey, number, providerSecret, country = "US", stt, tts) {
691
- const credentials = { api_key: providerKey };
692
- if (providerSecret) credentials.api_secret = providerSecret;
693
- const response = await fetch(`${this.restUrl}/api/phone-numbers`, {
694
- method: "POST",
695
- headers: {
696
- "Content-Type": "application/json",
697
- "X-API-Key": this.apiKey
698
- },
699
- body: JSON.stringify({
700
- number,
701
- provider,
702
- provider_credentials: credentials,
703
- country,
704
- stt_config: stt ? stt.toDict?.() ?? sttConfigToDict(stt) : null,
705
- tts_config: tts ? tts.toDict?.() ?? ttsConfigToDict(tts) : null
706
- })
707
- });
708
- if (response.status === 409) return;
709
- if (response.status !== 201) {
710
- throw new ProvisionError(
711
- `Failed to register number: ${await response.text()}`
712
- );
713
- }
714
462
  }
715
463
  };
716
464
 
@@ -830,6 +578,46 @@ function elevenlabs(opts) {
830
578
  function openaiTts(opts) {
831
579
  return new TTSConfigImpl("openai", opts.apiKey, opts.voice ?? "alloy");
832
580
  }
581
+ function soniox(opts) {
582
+ return new STTConfigImpl("soniox", opts.apiKey, opts.language ?? "en");
583
+ }
584
+ function speechmatics(_opts) {
585
+ throw new Error(
586
+ "speechmatics() is Python-only right now \u2014 the TS Speechmatics adapter has not shipped yet. Use the Python SDK (sdk-py) or pick another STT provider such as deepgram() / assemblyai() / soniox()."
587
+ );
588
+ }
589
+ function assemblyai(opts) {
590
+ return new STTConfigImpl("assemblyai", opts.apiKey, opts.language ?? "en");
591
+ }
592
+ function cartesia(opts) {
593
+ return new TTSConfigImpl(
594
+ "cartesia",
595
+ opts.apiKey,
596
+ opts.voice ?? "f786b574-daa5-4673-aa0c-cbe3e8534c02"
597
+ );
598
+ }
599
+ function rime(opts) {
600
+ return new TTSConfigImpl("rime", opts.apiKey, opts.voice ?? "astra");
601
+ }
602
+ function lmnt(opts) {
603
+ return new TTSConfigImpl("lmnt", opts.apiKey, opts.voice ?? "leah");
604
+ }
605
+ function ultravox(opts) {
606
+ return {
607
+ provider: "ultravox",
608
+ apiKey: opts.apiKey,
609
+ model: opts.model,
610
+ voice: opts.voice
611
+ };
612
+ }
613
+ function geminiLive(opts) {
614
+ return {
615
+ provider: "gemini_live",
616
+ apiKey: opts.apiKey,
617
+ model: opts.model,
618
+ voice: opts.voice
619
+ };
620
+ }
833
621
 
834
622
  // src/fallback-provider.ts
835
623
  var AllProvidersFailedError = class extends Error {
@@ -1028,13 +816,275 @@ var FallbackLLMProvider = class {
1028
816
  }
1029
817
  };
1030
818
 
819
+ // src/integrations/patter-tool.ts
820
+ import { EventEmitter } from "events";
821
+ var PARAMETERS_SCHEMA = {
822
+ type: "object",
823
+ properties: {
824
+ to: {
825
+ type: "string",
826
+ description: 'Destination phone number in E.164 format (e.g. "+15551234567"). Required.'
827
+ },
828
+ goal: {
829
+ type: "string",
830
+ description: "What the agent should accomplish on the call. Becomes the in-call agent's system prompt for this single call."
831
+ },
832
+ first_message: {
833
+ type: "string",
834
+ description: "Optional first message the agent speaks when the callee answers. Defaults to a generic greeting."
835
+ },
836
+ max_duration_sec: {
837
+ type: "integer",
838
+ description: "Hard timeout for the call in seconds. Default 180. The call is force-ended at this deadline whether or not it has resolved.",
839
+ minimum: 5,
840
+ maximum: 1800
841
+ }
842
+ },
843
+ required: ["to"]
844
+ };
845
+ var DEFAULT_NAME = "make_phone_call";
846
+ var DEFAULT_DESCRIPTION = "Place a real outbound phone call. Returns a JSON object with the full transcript, call status, duration in seconds, and cost. Use this when the user asks you to call someone, schedule appointments by phone, or otherwise reach a human via voice.";
847
+ var PatterTool = class _PatterTool {
848
+ name;
849
+ description;
850
+ phone;
851
+ agent;
852
+ maxDurationSec;
853
+ recording;
854
+ started = false;
855
+ /** Resolver for the next `call_initiated` SSE event. Only set inside the
856
+ * dial mutex (`dialQueue`), so two parallel `execute()` calls never share
857
+ * it and never lose a dispatch. */
858
+ pendingDial = null;
859
+ /** Mutex that serializes the dial → call_id capture critical section.
860
+ * Each `execute()` chains a continuation onto this promise so the
861
+ * `pendingDial` slot is owned by exactly one caller at a time. */
862
+ dialQueue = Promise.resolve();
863
+ /** Captured SSE listener so `stop()` can detach it (prevents leaks when
864
+ * the underlying Patter instance outlives this tool). */
865
+ sseListener = null;
866
+ /** Captured Patter metrics store, for cleanup in `stop()`. */
867
+ metricsStoreRef = null;
868
+ /** call_id → pending promise machinery. */
869
+ pending = /* @__PURE__ */ new Map();
870
+ bus = new EventEmitter();
871
+ /** How long to wait for the `call_initiated` SSE before failing the dial. */
872
+ static DIAL_CAPTURE_TIMEOUT_MS = 1e4;
873
+ constructor(opts) {
874
+ if (!opts.phone) {
875
+ throw new Error("PatterTool: `phone` (a Patter instance) is required.");
876
+ }
877
+ this.phone = opts.phone;
878
+ this.agent = opts.agent;
879
+ this.name = opts.name ?? DEFAULT_NAME;
880
+ this.description = opts.description ?? DEFAULT_DESCRIPTION;
881
+ this.maxDurationSec = Math.max(5, Math.min(1800, opts.maxDurationSec ?? 180));
882
+ this.recording = opts.recording ?? false;
883
+ }
884
+ // --- Schema exporters ---------------------------------------------------
885
+ /** OpenAI Chat Completions / Assistants tool spec. */
886
+ openaiSchema() {
887
+ return {
888
+ type: "function",
889
+ function: {
890
+ name: this.name,
891
+ description: this.description,
892
+ parameters: PARAMETERS_SCHEMA
893
+ }
894
+ };
895
+ }
896
+ /** Anthropic Messages API tool spec. */
897
+ anthropicSchema() {
898
+ return {
899
+ name: this.name,
900
+ description: this.description,
901
+ input_schema: PARAMETERS_SCHEMA
902
+ };
903
+ }
904
+ /**
905
+ * Hermes Agent (Nous Research) registry schema. Same JSON-Schema shape as
906
+ * Anthropic's; Hermes consumes it via `registry.register({ schema: ... })`.
907
+ */
908
+ hermesSchema() {
909
+ return {
910
+ name: this.name,
911
+ description: this.description,
912
+ parameters: PARAMETERS_SCHEMA
913
+ };
914
+ }
915
+ // --- Lifecycle ----------------------------------------------------------
916
+ /** Start the underlying Patter server. Idempotent. */
917
+ async start() {
918
+ if (this.started) return;
919
+ if (!this.agent) {
920
+ throw new Error(
921
+ "PatterTool.start: `agent` config is required. Pass `{ stt, llm, tts }` or an `engine` (e.g. OpenAIRealtime) when constructing PatterTool."
922
+ );
923
+ }
924
+ const builtAgent = this.phone.agent(this.agent);
925
+ await this.phone.serve({
926
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
927
+ agent: builtAgent,
928
+ recording: this.recording,
929
+ onCallEnd: this.onCallEndHandler.bind(this)
930
+ });
931
+ const store = this.phone.metricsStore;
932
+ if (!store) {
933
+ throw new Error(
934
+ "PatterTool.start: phone.metricsStore is null after serve() \u2014 is the dashboard disabled?"
935
+ );
936
+ }
937
+ const listener = (event) => {
938
+ if (event.type === "call_initiated" && this.pendingDial) {
939
+ const callId = event.data.call_id || "";
940
+ if (callId) {
941
+ const dispatch = this.pendingDial;
942
+ this.pendingDial = null;
943
+ dispatch(callId);
944
+ }
945
+ }
946
+ };
947
+ store.on("sse", listener);
948
+ this.sseListener = listener;
949
+ this.metricsStoreRef = store;
950
+ this.started = true;
951
+ }
952
+ /** Stop the underlying Patter server (and reject any pending calls). */
953
+ async stop() {
954
+ if (!this.started) return;
955
+ if (this.metricsStoreRef && this.sseListener) {
956
+ this.metricsStoreRef.off("sse", this.sseListener);
957
+ }
958
+ this.sseListener = null;
959
+ this.metricsStoreRef = null;
960
+ this.pendingDial = null;
961
+ for (const [, p] of this.pending) {
962
+ clearTimeout(p.timer);
963
+ p.reject(new Error("PatterTool: shutdown while call pending"));
964
+ }
965
+ this.pending.clear();
966
+ const stoppable = this.phone;
967
+ if (typeof stoppable.stop === "function") {
968
+ await stoppable.stop();
969
+ }
970
+ this.started = false;
971
+ }
972
+ // --- Execution ----------------------------------------------------------
973
+ async execute(args) {
974
+ if (!this.started) await this.start();
975
+ if (!args || typeof args.to !== "string" || !args.to.startsWith("+")) {
976
+ throw new Error('PatterTool.execute: `to` must be an E.164 phone number (e.g. "+15551234567").');
977
+ }
978
+ const timeoutSec = Math.max(
979
+ 5,
980
+ Math.min(1800, args.max_duration_sec ?? this.maxDurationSec)
981
+ );
982
+ const baseAgent = this.agent ?? {};
983
+ const overrideAgent = this.phone.agent({
984
+ ...baseAgent,
985
+ ...args.goal !== void 0 ? { systemPrompt: args.goal } : {},
986
+ ...args.first_message !== void 0 ? { firstMessage: args.first_message } : {}
987
+ });
988
+ const callId = await this.acquireCallId(args.to, overrideAgent);
989
+ return new Promise((resolve, reject) => {
990
+ const timer = setTimeout(() => {
991
+ this.pending.delete(callId);
992
+ reject(new Error(`PatterTool.execute: call ${callId} exceeded ${timeoutSec}s timeout`));
993
+ }, timeoutSec * 1e3);
994
+ this.pending.set(callId, {
995
+ resolve,
996
+ reject,
997
+ timer,
998
+ startedAt: Date.now() / 1e3
999
+ });
1000
+ });
1001
+ }
1002
+ /** Issue the outbound dial under the mutex and return its assigned call_id. */
1003
+ async acquireCallId(to, agent) {
1004
+ let release;
1005
+ const slot = new Promise((r) => {
1006
+ release = r;
1007
+ });
1008
+ const previous = this.dialQueue;
1009
+ this.dialQueue = previous.then(() => slot);
1010
+ await previous;
1011
+ let captureTimer = null;
1012
+ try {
1013
+ const callIdPromise = new Promise((resolve, reject) => {
1014
+ this.pendingDial = resolve;
1015
+ captureTimer = setTimeout(() => {
1016
+ this.pendingDial = null;
1017
+ reject(
1018
+ new Error(
1019
+ `PatterTool.execute: did not observe call_initiated within ${_PatterTool.DIAL_CAPTURE_TIMEOUT_MS}ms`
1020
+ )
1021
+ );
1022
+ }, _PatterTool.DIAL_CAPTURE_TIMEOUT_MS);
1023
+ });
1024
+ await this.phone.call({
1025
+ to,
1026
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
1027
+ agent
1028
+ });
1029
+ const callId = await callIdPromise;
1030
+ if (captureTimer) clearTimeout(captureTimer);
1031
+ return callId;
1032
+ } finally {
1033
+ if (captureTimer) clearTimeout(captureTimer);
1034
+ this.pendingDial = null;
1035
+ release();
1036
+ }
1037
+ }
1038
+ /**
1039
+ * Hermes-style handler: `(args, kwargs) => Promise<string>` returning a JSON
1040
+ * string with either the result envelope or an `{"error": "..."}` payload.
1041
+ * Mirrors the Python `PatterTool.hermes_handler` so cross-SDK adapters share
1042
+ * the same wire contract.
1043
+ */
1044
+ hermesHandler() {
1045
+ return async (args) => {
1046
+ try {
1047
+ const result = await this.execute(args);
1048
+ return JSON.stringify(result);
1049
+ } catch (err) {
1050
+ return JSON.stringify({ error: err instanceof Error ? err.message : String(err) });
1051
+ }
1052
+ };
1053
+ }
1054
+ // --- Internal: onCallEnd dispatcher -------------------------------------
1055
+ async onCallEndHandler(data) {
1056
+ const callId = data.call_id || "";
1057
+ if (!callId) return;
1058
+ const pending = this.pending.get(callId);
1059
+ if (!pending) {
1060
+ this.bus.emit("orphan_end", { call_id: callId, data });
1061
+ return;
1062
+ }
1063
+ clearTimeout(pending.timer);
1064
+ this.pending.delete(callId);
1065
+ const metrics = data.metrics && typeof data.metrics === "object" ? data.metrics : null;
1066
+ const cost = metrics && typeof metrics.cost === "object" && metrics.cost && typeof metrics.cost.total === "number" ? metrics.cost.total : void 0;
1067
+ const duration = typeof metrics?.duration_seconds === "number" ? metrics?.duration_seconds : Math.max(0, Date.now() / 1e3 - pending.startedAt);
1068
+ const transcript = Array.isArray(data.transcript) ? data.transcript : [];
1069
+ const status = data.status || "completed";
1070
+ pending.resolve({
1071
+ call_id: callId,
1072
+ status,
1073
+ duration_seconds: duration,
1074
+ cost_usd: cost,
1075
+ transcript,
1076
+ metrics
1077
+ });
1078
+ }
1079
+ };
1080
+
1031
1081
  // src/providers/gemini-live.ts
1032
1082
  var GEMINI_DEFAULT_INPUT_SR = 16e3;
1033
1083
  var GEMINI_DEFAULT_OUTPUT_SR = 24e3;
1034
1084
  var GeminiLiveAdapter = class {
1035
1085
  constructor(apiKey, options = {}) {
1036
1086
  this.apiKey = apiKey;
1037
- this.model = options.model ?? "gemini-2.0-flash-exp";
1087
+ this.model = options.model ?? "gemini-2.5-flash-native-audio-preview-09-2025";
1038
1088
  this.voice = options.voice ?? "Puck";
1039
1089
  this.instructions = options.instructions ?? "";
1040
1090
  this.language = options.language ?? "en-US";
@@ -1057,18 +1107,27 @@ var GeminiLiveAdapter = class {
1057
1107
  receiveLoop = null;
1058
1108
  handlers = [];
1059
1109
  running = false;
1110
+ /**
1111
+ * Tracks call_id -> function name so tool responses can be sent back with
1112
+ * the correct `name` field (Gemini expects the original function name,
1113
+ * not the call_id).
1114
+ */
1115
+ pendingToolCalls = /* @__PURE__ */ new Map();
1060
1116
  async connect() {
1061
1117
  let genaiModule;
1062
1118
  try {
1063
1119
  const modName = "@google/genai";
1064
1120
  genaiModule = await import(modName);
1065
- } catch (err) {
1121
+ } catch {
1066
1122
  throw new Error(
1067
- "Gemini Live requires the '@google/genai' package. Install with: npm install @google/genai"
1123
+ '\nGemini Live requires the "@google/genai" package, which is not installed.\n\n Install: npm install @google/genai\n\nThis is an optional peer dependency of getpatter \u2014 it is only needed when\nyou use GeminiLive as an agent engine. Other LLM/engine providers do not\nrequire it.\n'
1068
1124
  );
1069
1125
  }
1070
1126
  const { GoogleGenAI } = genaiModule;
1071
- this.client = new GoogleGenAI({ apiKey: this.apiKey });
1127
+ this.client = new GoogleGenAI({
1128
+ apiKey: this.apiKey,
1129
+ httpOptions: { apiVersion: "v1alpha" }
1130
+ });
1072
1131
  const config = {
1073
1132
  responseModalities: ["AUDIO"],
1074
1133
  speechConfig: {
@@ -1125,9 +1184,11 @@ var GeminiLiveAdapter = class {
1125
1184
  async sendFunctionResult(callId, result) {
1126
1185
  if (!this.session) return;
1127
1186
  const sess = this.session;
1187
+ const name = this.pendingToolCalls.get(callId) ?? callId;
1188
+ this.pendingToolCalls.delete(callId);
1128
1189
  await sess.sendToolResponse?.({
1129
1190
  functionResponses: [
1130
- { id: callId, name: callId, response: { result } }
1191
+ { id: callId, name, response: { result } }
1131
1192
  ]
1132
1193
  });
1133
1194
  }
@@ -1171,9 +1232,14 @@ var GeminiLiveAdapter = class {
1171
1232
  if (r.toolCall) {
1172
1233
  for (const fn of r.toolCall.functionCalls ?? []) {
1173
1234
  const args = fn.args ?? {};
1235
+ const callId = fn.id ?? "";
1236
+ const fnName = fn.name ?? "";
1237
+ if (callId && fnName) {
1238
+ this.pendingToolCalls.set(callId, fnName);
1239
+ }
1174
1240
  await this.emit("function_call", {
1175
- call_id: fn.id ?? "",
1176
- name: fn.name ?? "",
1241
+ call_id: callId,
1242
+ name: fnName,
1177
1243
  arguments: typeof args === "string" ? args : JSON.stringify(args)
1178
1244
  });
1179
1245
  }
@@ -1200,11 +1266,12 @@ var GeminiLiveAdapter = class {
1200
1266
  await this.receiveLoop.catch(() => void 0);
1201
1267
  this.receiveLoop = null;
1202
1268
  }
1269
+ this.pendingToolCalls.clear();
1203
1270
  }
1204
1271
  };
1205
1272
 
1206
1273
  // src/providers/ultravox-realtime.ts
1207
- import WebSocket2 from "ws";
1274
+ import WebSocket from "ws";
1208
1275
  var ULTRAVOX_DEFAULT_API_BASE = "https://api.ultravox.ai/api";
1209
1276
  var ULTRAVOX_DEFAULT_SR = 16e3;
1210
1277
  var UltravoxRealtimeAdapter = class {
@@ -1241,7 +1308,6 @@ var UltravoxRealtimeAdapter = class {
1241
1308
  outputSampleRate: this.sampleRate
1242
1309
  }
1243
1310
  },
1244
- firstSpeaker: this.firstMessage ? "FIRST_SPEAKER_AGENT" : "FIRST_SPEAKER_USER",
1245
1311
  recordingEnabled: false
1246
1312
  };
1247
1313
  if (this.voice) body.voice = this.voice;
@@ -1251,6 +1317,8 @@ var UltravoxRealtimeAdapter = class {
1251
1317
  body.initialMessages = [
1252
1318
  { role: "MESSAGE_ROLE_AGENT", text: this.firstMessage }
1253
1319
  ];
1320
+ } else {
1321
+ body.firstSpeaker = "FIRST_SPEAKER_USER";
1254
1322
  }
1255
1323
  if (this.tools?.length) {
1256
1324
  body.selectedTools = this.tools.map((t) => ({
@@ -1275,7 +1343,7 @@ var UltravoxRealtimeAdapter = class {
1275
1343
  }
1276
1344
  const call = await resp.json();
1277
1345
  if (!call.joinUrl) throw new Error("Ultravox response missing joinUrl");
1278
- this.ws = new WebSocket2(call.joinUrl);
1346
+ this.ws = new WebSocket(call.joinUrl);
1279
1347
  await new Promise((resolve, reject) => {
1280
1348
  const ws = this.ws;
1281
1349
  const onOpen = () => {
@@ -1300,14 +1368,16 @@ var UltravoxRealtimeAdapter = class {
1300
1368
  });
1301
1369
  }
1302
1370
  sendAudio(pcm) {
1303
- if (!this.ws || this.ws.readyState !== WebSocket2.OPEN) return;
1371
+ if (!this.ws || this.ws.readyState !== WebSocket.OPEN) return;
1304
1372
  this.ws.send(pcm, { binary: true });
1305
1373
  }
1306
1374
  async sendText(text) {
1307
- this.ws?.send(JSON.stringify({ type: "input_text_message", text }));
1375
+ if (!this.ws || this.ws.readyState !== WebSocket.OPEN) return;
1376
+ this.ws.send(JSON.stringify({ type: "input_text_message", text }));
1308
1377
  }
1309
1378
  async sendFunctionResult(callId, result) {
1310
- this.ws?.send(
1379
+ if (!this.ws || this.ws.readyState !== WebSocket.OPEN) return;
1380
+ this.ws.send(
1311
1381
  JSON.stringify({
1312
1382
  type: "client_tool_result",
1313
1383
  invocationId: callId,
@@ -1317,7 +1387,8 @@ var UltravoxRealtimeAdapter = class {
1317
1387
  );
1318
1388
  }
1319
1389
  cancelResponse() {
1320
- this.ws?.send(JSON.stringify({ type: "playback_clear_buffer" }));
1390
+ if (!this.ws || this.ws.readyState !== WebSocket.OPEN) return;
1391
+ this.ws.send(JSON.stringify({ type: "playback_clear_buffer" }));
1321
1392
  }
1322
1393
  onEvent(handler) {
1323
1394
  this.handlers.push(handler);
@@ -1398,7 +1469,7 @@ async function loadCron() {
1398
1469
  try {
1399
1470
  const imported = await import(
1400
1471
  /* @vite-ignore */
1401
- "./node-cron-373UVDIO.mjs"
1472
+ "./node-cron-6PRPSBG5.mjs"
1402
1473
  );
1403
1474
  cronModule = imported && imported.default ? imported.default : imported;
1404
1475
  return cronModule;
@@ -1506,6 +1577,7 @@ function scheduleInterval(intervalOrOpts, callback) {
1506
1577
 
1507
1578
  // src/stt/deepgram.ts
1508
1579
  var STT = class extends DeepgramSTT {
1580
+ static providerKey = "deepgram";
1509
1581
  constructor(opts = {}) {
1510
1582
  const key = opts.apiKey ?? process.env.DEEPGRAM_API_KEY;
1511
1583
  if (!key) {
@@ -1533,6 +1605,7 @@ var STT = class extends DeepgramSTT {
1533
1605
  // src/providers/whisper-stt.ts
1534
1606
  var OPENAI_TRANSCRIPTION_URL = "https://api.openai.com/v1/audio/transcriptions";
1535
1607
  var DEFAULT_BUFFER_SIZE = 16e3 * 2;
1608
+ var ALLOWED_MODELS = /* @__PURE__ */ new Set(["whisper-1", "gpt-4o-transcribe", "gpt-4o-mini-transcribe"]);
1536
1609
  function wrapPcmInWav(pcm, sampleRate = 16e3, channels = 1, bitsPerSample = 16) {
1537
1610
  const dataSize = pcm.length;
1538
1611
  const header = Buffer.alloc(44);
@@ -1556,33 +1629,63 @@ var WhisperSTT = class _WhisperSTT {
1556
1629
  model;
1557
1630
  language;
1558
1631
  bufferSize;
1559
- buffer = Buffer.alloc(0);
1560
- callbacks = [];
1632
+ responseFormat;
1633
+ // Accumulate chunks in an array and concat once on flush — avoids the
1634
+ // per-``sendAudio`` O(n) ``Buffer.concat([buffer, chunk])`` that quickly
1635
+ // dominates CPU when the phone leg delivers 20 ms frames.
1636
+ chunks = [];
1637
+ bufferedBytes = 0;
1638
+ callbacks = /* @__PURE__ */ new Set();
1561
1639
  running = false;
1562
1640
  pendingTranscriptions = [];
1563
- constructor(apiKey, model = "whisper-1", language, bufferSize = DEFAULT_BUFFER_SIZE) {
1641
+ /**
1642
+ * @param apiKey OpenAI API key.
1643
+ * @param language ISO-639-1 language code (e.g. ``"en"``, ``"it"``). Optional.
1644
+ * @param model One of ``whisper-1``, ``gpt-4o-transcribe``, ``gpt-4o-mini-transcribe``.
1645
+ * @param bufferSize Bytes of PCM16 to buffer before each transcription request.
1646
+ * @param responseFormat ``"json"`` (default) or ``"verbose_json"``.
1647
+ *
1648
+ * Argument order matches the Python SDK's ``WhisperSTT(api_key, language, model, response_format)``
1649
+ * for cross-language parity. Pre-0.5.3 the TS positional order was
1650
+ * ``(apiKey, model, language, bufferSize, responseFormat)`` — callers using
1651
+ * the old order will need to swap ``language`` and ``model``.
1652
+ */
1653
+ constructor(apiKey, language, model = "whisper-1", bufferSize = DEFAULT_BUFFER_SIZE, responseFormat = "json") {
1654
+ if (!ALLOWED_MODELS.has(model)) {
1655
+ throw new Error(
1656
+ `WhisperSTT: unsupported model "${model}". Expected one of ${[...ALLOWED_MODELS].join(", ")}.`
1657
+ );
1658
+ }
1564
1659
  this.apiKey = apiKey;
1565
1660
  this.model = model;
1566
1661
  this.language = language;
1567
1662
  this.bufferSize = bufferSize;
1663
+ this.responseFormat = responseFormat;
1568
1664
  }
1569
1665
  /** Factory for Twilio calls — mulaw 8 kHz is transcoded upstream, so we still receive PCM 16-bit. */
1570
1666
  static forTwilio(apiKey, language = "en", model = "whisper-1") {
1571
- return new _WhisperSTT(apiKey, model, language);
1667
+ return new _WhisperSTT(apiKey, language, model);
1572
1668
  }
1573
1669
  async connect() {
1574
1670
  this.running = true;
1575
- this.buffer = Buffer.alloc(0);
1671
+ this.chunks = [];
1672
+ this.bufferedBytes = 0;
1576
1673
  }
1577
1674
  sendAudio(audio) {
1578
1675
  if (!this.running) return;
1579
- this.buffer = Buffer.concat([this.buffer, audio]);
1580
- if (this.buffer.length >= this.bufferSize) {
1581
- const pcm = this.buffer;
1582
- this.buffer = Buffer.alloc(0);
1676
+ this.chunks.push(audio);
1677
+ this.bufferedBytes += audio.length;
1678
+ if (this.bufferedBytes >= this.bufferSize) {
1679
+ const pcm = this.flushChunks();
1583
1680
  this.trackTranscription(this.transcribeBuffer(pcm));
1584
1681
  }
1585
1682
  }
1683
+ flushChunks() {
1684
+ const pcm = this.chunks.length === 1 ? this.chunks[0] : Buffer.concat(this.chunks, this.bufferedBytes);
1685
+ this.chunks = [];
1686
+ this.bufferedBytes = 0;
1687
+ return pcm;
1688
+ }
1586
1689
  trackTranscription(promise) {
1587
1690
  const wrapped = promise.finally(() => {
1588
1691
  const idx = this.pendingTranscriptions.indexOf(wrapped);
@@ -1590,25 +1693,25 @@ var WhisperSTT = class _WhisperSTT {
1590
1693
  });
1591
1694
  this.pendingTranscriptions.push(wrapped);
1592
1695
  }
1696
+ /**
1697
+ * Register a transcript listener. Unlike the previous implementation
1698
+ * which capped at 10 and silently replaced the last one, we now keep all
1699
+ * registered callbacks in a Set; use {@link offTranscript} to remove one.
1700
+ */
1593
1701
  onTranscript(callback) {
1594
- if (this.callbacks.length >= 10) {
1595
- getLogger().warn("WhisperSTT: maximum of 10 onTranscript callbacks reached; replacing the last callback.");
1596
- this.callbacks[this.callbacks.length - 1] = callback;
1597
- return;
1598
- }
1599
- this.callbacks.push(callback);
1702
+ this.callbacks.add(callback);
1703
+ }
1704
+ offTranscript(callback) {
1705
+ this.callbacks.delete(callback);
1600
1706
  }
1601
1707
  async close() {
1602
1708
  this.running = false;
1603
- if (this.buffer.length >= this.bufferSize / 4) {
1604
- const pcm = this.buffer;
1605
- this.buffer = Buffer.alloc(0);
1709
+ if (this.bufferedBytes > 0) {
1710
+ const pcm = this.flushChunks();
1606
1711
  this.trackTranscription(this.transcribeBuffer(pcm));
1607
- } else {
1608
- this.buffer = Buffer.alloc(0);
1609
1712
  }
1610
1713
  await Promise.allSettled(this.pendingTranscriptions);
1611
- this.callbacks = [];
1714
+ this.callbacks.clear();
1612
1715
  }
1613
1716
  // ------------------------------------------------------------------
1614
1717
  // Private
@@ -1618,6 +1721,7 @@ var WhisperSTT = class _WhisperSTT {
1618
1721
  const formData = new FormData();
1619
1722
  formData.append("file", new Blob([wav.buffer.slice(wav.byteOffset, wav.byteOffset + wav.byteLength)], { type: "audio/wav" }), "audio.wav");
1620
1723
  formData.append("model", this.model);
1724
+ formData.append("response_format", this.responseFormat);
1621
1725
  if (this.language) {
1622
1726
  formData.append("language", this.language);
1623
1727
  }
@@ -1639,7 +1743,7 @@ var WhisperSTT = class _WhisperSTT {
1639
1743
  const transcript = {
1640
1744
  text,
1641
1745
  isFinal: true,
1642
- confidence: 1
1746
+ confidence: extractConfidence(json)
1643
1747
  };
1644
1748
  for (const cb of this.callbacks) {
1645
1749
  cb(transcript);
@@ -1649,9 +1753,23 @@ var WhisperSTT = class _WhisperSTT {
1649
1753
  }
1650
1754
  }
1651
1755
  };
1756
+ function extractConfidence(payload) {
1757
+ const segments = payload.segments;
1758
+ if (!segments || segments.length === 0) return 1;
1759
+ const scores = [];
1760
+ for (const seg of segments) {
1761
+ const logp = seg.avg_logprob;
1762
+ if (typeof logp === "number") {
1763
+ scores.push(Math.max(0, Math.min(1, Math.exp(logp))));
1764
+ }
1765
+ }
1766
+ if (scores.length === 0) return 1;
1767
+ return scores.reduce((a, b) => a + b, 0) / scores.length;
1768
+ }
1652
1769
 
1653
1770
  // src/stt/whisper.ts
1654
1771
  var STT2 = class extends WhisperSTT {
1772
+ static providerKey = "whisper";
1655
1773
  constructor(opts = {}) {
1656
1774
  const key = opts.apiKey ?? process.env.OPENAI_API_KEY;
1657
1775
  if (!key) {
@@ -1659,18 +1777,53 @@ var STT2 = class extends WhisperSTT {
1659
1777
  "Whisper STT requires an apiKey. Pass { apiKey: 'sk-...' } or set OPENAI_API_KEY in the environment."
1660
1778
  );
1661
1779
  }
1662
- super(key, opts.model ?? "whisper-1", opts.language, opts.bufferSize);
1780
+ super(key, opts.language, opts.model ?? "whisper-1", opts.bufferSize, opts.responseFormat ?? "json");
1781
+ }
1782
+ };
1783
+
1784
+ // src/providers/openai-transcribe-stt.ts
1785
+ var ALLOWED_MODELS2 = /* @__PURE__ */ new Set(["gpt-4o-transcribe", "gpt-4o-mini-transcribe"]);
1786
+ var DEFAULT_BUFFER_SIZE2 = 16e3 * 2;
1787
+ var OpenAITranscribeSTT = class extends WhisperSTT {
1788
+ /**
1789
+ * @param apiKey OpenAI API key.
1790
+ * @param language ISO-639-1 language code (e.g. ``"en"``, ``"it"``). Optional.
1791
+ * @param model One of ``gpt-4o-transcribe`` (default), ``gpt-4o-mini-transcribe``.
1792
+ * ``"whisper-1"`` is intentionally rejected here — use ``WhisperSTT`` for that.
1793
+ * @param bufferSize Bytes of PCM16 to buffer before each transcription request.
1794
+ * @param responseFormat ``"json"`` (default) or ``"verbose_json"``.
1795
+ */
1796
+ constructor(apiKey, language, model = "gpt-4o-transcribe", bufferSize = DEFAULT_BUFFER_SIZE2, responseFormat = "json") {
1797
+ if (!ALLOWED_MODELS2.has(model)) {
1798
+ throw new Error(
1799
+ `OpenAITranscribeSTT: unsupported model "${model}". Expected one of ${[...ALLOWED_MODELS2].join(", ")}. For "whisper-1", use WhisperSTT instead.`
1800
+ );
1801
+ }
1802
+ super(apiKey, language, model, bufferSize, responseFormat);
1803
+ }
1804
+ };
1805
+
1806
+ // src/stt/openai-transcribe.ts
1807
+ var STT3 = class extends OpenAITranscribeSTT {
1808
+ static providerKey = "openai_transcribe";
1809
+ constructor(opts = {}) {
1810
+ const key = opts.apiKey ?? process.env.OPENAI_API_KEY;
1811
+ if (!key) {
1812
+ throw new Error(
1813
+ "OpenAI Transcribe STT requires an apiKey. Pass { apiKey: 'sk-...' } or set OPENAI_API_KEY in the environment."
1814
+ );
1815
+ }
1816
+ super(key, opts.language, opts.model ?? "gpt-4o-transcribe", opts.bufferSize, opts.responseFormat ?? "json");
1663
1817
  }
1664
1818
  };
1665
1819
 
1666
1820
  // src/providers/cartesia-stt.ts
1667
- import WebSocket3 from "ws";
1821
+ import WebSocket2 from "ws";
1668
1822
  var DEFAULT_BASE_URL = "https://api.cartesia.ai";
1669
1823
  var API_VERSION = "2025-04-16";
1670
1824
  var USER_AGENT = "Patter/1.0 (integration=LiveKit-port; provider=Cartesia)";
1671
1825
  var KEEPALIVE_INTERVAL_MS = 3e4;
1672
1826
  var CONNECT_TIMEOUT_MS = 1e4;
1673
- var MAX_CALLBACKS = 10;
1674
1827
  var CartesiaSTT = class {
1675
1828
  constructor(apiKey, options = {}) {
1676
1829
  this.apiKey = apiKey;
@@ -1680,10 +1833,13 @@ var CartesiaSTT = class {
1680
1833
  }
1681
1834
  }
1682
1835
  ws = null;
1683
- callbacks = [];
1836
+ callbacks = /* @__PURE__ */ new Set();
1684
1837
  keepaliveTimer = null;
1685
- /** Cartesia request id — set from the server transcript events. */
1686
- requestId = "";
1838
+ /**
1839
+ * Cartesia request id — set from the server transcript events.
1840
+ * `null` until the first transcript event arrives (matches Python's `None`).
1841
+ */
1842
+ requestId = null;
1687
1843
  buildWsUrl() {
1688
1844
  const opts = this.options;
1689
1845
  const rawBase = opts.baseUrl ?? DEFAULT_BASE_URL;
@@ -1710,7 +1866,7 @@ var CartesiaSTT = class {
1710
1866
  }
1711
1867
  async connect() {
1712
1868
  const url = this.buildWsUrl();
1713
- this.ws = new WebSocket3(url, {
1869
+ this.ws = new WebSocket2(url, {
1714
1870
  headers: { "User-Agent": USER_AGENT }
1715
1871
  });
1716
1872
  await new Promise((resolve, reject) => {
@@ -1737,7 +1893,7 @@ var CartesiaSTT = class {
1737
1893
  this.handleEvent(event);
1738
1894
  });
1739
1895
  this.keepaliveTimer = setInterval(() => {
1740
- if (this.ws && this.ws.readyState === WebSocket3.OPEN) {
1896
+ if (this.ws && this.ws.readyState === WebSocket2.OPEN) {
1741
1897
  try {
1742
1898
  this.ws.ping();
1743
1899
  } catch {
@@ -1770,19 +1926,24 @@ var CartesiaSTT = class {
1770
1926
  }
1771
1927
  }
1772
1928
  sendAudio(audio) {
1773
- if (!this.ws || this.ws.readyState !== WebSocket3.OPEN) return;
1929
+ if (!this.ws || this.ws.readyState !== WebSocket2.OPEN) return;
1774
1930
  this.ws.send(audio);
1775
1931
  }
1776
1932
  onTranscript(callback) {
1777
- if (this.callbacks.length >= MAX_CALLBACKS) {
1778
- getLogger().warn(
1779
- "CartesiaSTT: maximum of 10 onTranscript callbacks reached; replacing the last callback."
1780
- );
1781
- this.callbacks[this.callbacks.length - 1] = callback;
1782
- return;
1783
- }
1784
- this.callbacks.push(callback);
1933
+ this.callbacks.add(callback);
1785
1934
  }
1935
+ /** Remove a previously registered transcript callback. */
1936
+ offTranscript(callback) {
1937
+ this.callbacks.delete(callback);
1938
+ }
1939
+ /**
1940
+ * Synchronous best-effort close. Sends `finalize` and closes the socket
1941
+ * without waiting for the server to flush any remaining transcripts.
1942
+ *
1943
+ * Limitation: any transcript events produced between the `finalize` send
1944
+ * and the socket close may be dropped. Callers that need to guarantee all
1945
+ * transcripts are delivered should await :meth:`closeAsync` instead.
1946
+ */
1786
1947
  close() {
1787
1948
  if (this.keepaliveTimer) {
1788
1949
  clearInterval(this.keepaliveTimer);
@@ -1797,10 +1958,53 @@ var CartesiaSTT = class {
1797
1958
  this.ws = null;
1798
1959
  }
1799
1960
  }
1961
+ /**
1962
+ * Graceful close that awaits the `finalize` send and the socket closing
1963
+ * handshake, matching the Python adapter's behavior. Use this when you
1964
+ * need any in-flight transcripts to be flushed before teardown.
1965
+ */
1966
+ async closeAsync() {
1967
+ if (this.keepaliveTimer) {
1968
+ clearInterval(this.keepaliveTimer);
1969
+ this.keepaliveTimer = null;
1970
+ }
1971
+ const ws = this.ws;
1972
+ this.ws = null;
1973
+ if (!ws) return;
1974
+ if (ws.readyState === WebSocket2.OPEN) {
1975
+ try {
1976
+ await new Promise((resolve) => {
1977
+ ws.send("finalize", (err) => {
1978
+ if (err) getLogger().warn(`CartesiaSTT finalize send failed: ${String(err)}`);
1979
+ resolve();
1980
+ });
1981
+ });
1982
+ } catch (err) {
1983
+ getLogger().warn(`CartesiaSTT finalize error: ${String(err)}`);
1984
+ }
1985
+ }
1986
+ if (ws.readyState === WebSocket2.OPEN || ws.readyState === WebSocket2.CONNECTING) {
1987
+ await new Promise((resolve) => {
1988
+ const done = () => {
1989
+ ws.off("close", done);
1990
+ ws.off("error", done);
1991
+ resolve();
1992
+ };
1993
+ ws.once("close", done);
1994
+ ws.once("error", done);
1995
+ try {
1996
+ ws.close();
1997
+ } catch {
1998
+ resolve();
1999
+ }
2000
+ });
2001
+ }
2002
+ }
1800
2003
  };
1801
2004
 
1802
2005
  // src/stt/cartesia.ts
1803
- var STT3 = class extends CartesiaSTT {
2006
+ var STT4 = class extends CartesiaSTT {
2007
+ static providerKey = "cartesia_stt";
1804
2008
  constructor(opts = {}) {
1805
2009
  const key = opts.apiKey ?? process.env.CARTESIA_API_KEY;
1806
2010
  if (!key) {
@@ -1819,7 +2023,7 @@ var STT3 = class extends CartesiaSTT {
1819
2023
  };
1820
2024
 
1821
2025
  // src/providers/soniox-stt.ts
1822
- import WebSocket4 from "ws";
2026
+ import WebSocket3 from "ws";
1823
2027
  var SONIOX_WS_URL = "wss://stt-rt.soniox.com/transcribe-websocket";
1824
2028
  var KEEPALIVE_MESSAGE = '{"type": "keepalive"}';
1825
2029
  var END_TOKEN = "<end>";
@@ -1915,7 +2119,8 @@ var SonioxSTT = class _SonioxSTT {
1915
2119
  return config;
1916
2120
  }
1917
2121
  async connect() {
1918
- this.ws = new WebSocket4(this.baseUrl);
2122
+ this.final.reset();
2123
+ this.ws = new WebSocket3(this.baseUrl);
1919
2124
  await new Promise((resolve, reject) => {
1920
2125
  const timer = setTimeout(() => reject(new Error("Soniox connect timeout")), 1e4);
1921
2126
  this.ws.once("open", () => {
@@ -1934,7 +2139,7 @@ var SonioxSTT = class _SonioxSTT {
1934
2139
  getLogger().error(`SonioxSTT WebSocket error: ${String(err)}`);
1935
2140
  });
1936
2141
  this.keepaliveTimer = setInterval(() => {
1937
- if (this.ws && this.ws.readyState === WebSocket4.OPEN) {
2142
+ if (this.ws && this.ws.readyState === WebSocket3.OPEN) {
1938
2143
  try {
1939
2144
  this.ws.send(KEEPALIVE_MESSAGE);
1940
2145
  } catch {
@@ -2007,7 +2212,7 @@ var SonioxSTT = class _SonioxSTT {
2007
2212
  }
2008
2213
  }
2009
2214
  sendAudio(audio) {
2010
- if (!this.ws || this.ws.readyState !== WebSocket4.OPEN) return;
2215
+ if (!this.ws || this.ws.readyState !== WebSocket3.OPEN) return;
2011
2216
  if (audio.length === 0) return;
2012
2217
  this.ws.send(audio);
2013
2218
  }
@@ -2038,7 +2243,8 @@ var SonioxSTT = class _SonioxSTT {
2038
2243
  };
2039
2244
 
2040
2245
  // src/stt/soniox.ts
2041
- var STT4 = class extends SonioxSTT {
2246
+ var STT5 = class extends SonioxSTT {
2247
+ static providerKey = "soniox";
2042
2248
  constructor(opts = {}) {
2043
2249
  const key = opts.apiKey ?? process.env.SONIOX_API_KEY;
2044
2250
  if (!key) {
@@ -2053,11 +2259,21 @@ var STT4 = class extends SonioxSTT {
2053
2259
  };
2054
2260
 
2055
2261
  // src/providers/assemblyai-stt.ts
2056
- import WebSocket5 from "ws";
2262
+ import WebSocket4 from "ws";
2057
2263
  var DEFAULT_BASE_URL2 = "wss://streaming.assemblyai.com";
2058
- var DEFAULT_MIN_TURN_SILENCE_MS = 100;
2264
+ var DEFAULT_MIN_TURN_SILENCE_MS = 400;
2059
2265
  var CONNECT_TIMEOUT_MS2 = 1e4;
2060
- var MAX_CALLBACKS2 = 10;
2266
+ var TERMINATION_WAIT_TIMEOUT_MS = 500;
2267
+ var MIN_CHUNK_DURATION_MS = 50;
2268
+ var MAX_CHUNK_DURATION_MS = 1e3;
2269
+ var RECONNECT_ERROR_CODES = /* @__PURE__ */ new Set([3005, 3008]);
2270
+ var VALID_DOMAINS = /* @__PURE__ */ new Set(["general", "medical-v1"]);
2271
+ var AssemblyAISTTNotConnectedError = class extends Error {
2272
+ constructor(message = "AssemblyAISTT is not connected") {
2273
+ super(message);
2274
+ this.name = "AssemblyAISTTNotConnectedError";
2275
+ }
2276
+ };
2061
2277
  var AssemblyAISTT = class _AssemblyAISTT {
2062
2278
  constructor(apiKey, options = {}) {
2063
2279
  this.apiKey = apiKey;
@@ -2065,13 +2281,24 @@ var AssemblyAISTT = class _AssemblyAISTT {
2065
2281
  if (!apiKey) {
2066
2282
  throw new Error("AssemblyAISTT requires a non-empty apiKey");
2067
2283
  }
2284
+ if (options.domain !== void 0 && !VALID_DOMAINS.has(options.domain)) {
2285
+ const hint = options.domain === "medical" ? ' \u2014 did you mean "medical-v1"?' : "";
2286
+ throw new Error(
2287
+ `AssemblyAISTT: invalid domain "${options.domain}"; expected one of [${Array.from(
2288
+ VALID_DOMAINS
2289
+ ).map((d) => `"${d}"`).join(", ")}]${hint}`
2290
+ );
2291
+ }
2068
2292
  }
2069
2293
  ws = null;
2070
- callbacks = [];
2294
+ callbacks = /* @__PURE__ */ new Set();
2295
+ closing = false;
2296
+ reconnectAttempts = 0;
2297
+ terminationResolve = null;
2071
2298
  /** AssemblyAI session id — set when the `Begin` message arrives. */
2072
- sessionId = "";
2299
+ sessionId = null;
2073
2300
  /** Unix timestamp when the AssemblyAI session expires. */
2074
- expiresAt = 0;
2301
+ expiresAt = null;
2075
2302
  /** Factory for Twilio calls — mulaw 8 kHz. */
2076
2303
  static forTwilio(apiKey, model = "universal-streaming-english") {
2077
2304
  return new _AssemblyAISTT(apiKey, {
@@ -2106,11 +2333,14 @@ var AssemblyAISTT = class _AssemblyAISTT {
2106
2333
  keyterms_prompt: opts.keytermsPrompt ? JSON.stringify(opts.keytermsPrompt) : void 0,
2107
2334
  language_detection: languageDetection,
2108
2335
  prompt: opts.prompt,
2109
- vad_threshold: opts.vadThreshold,
2336
+ // vad_threshold intentionally omitted — not a valid v3 parameter.
2110
2337
  speaker_labels: opts.speakerLabels,
2111
2338
  max_speakers: opts.maxSpeakers,
2112
2339
  domain: opts.domain
2113
2340
  };
2341
+ if (opts.useQueryToken) {
2342
+ raw.token = this.apiKey;
2343
+ }
2114
2344
  const params = new URLSearchParams();
2115
2345
  for (const [key, value] of Object.entries(raw)) {
2116
2346
  if (value === void 0 || value === null) continue;
@@ -2123,30 +2353,41 @@ var AssemblyAISTT = class _AssemblyAISTT {
2123
2353
  const base = opts.baseUrl ?? DEFAULT_BASE_URL2;
2124
2354
  return `${base}/v3/ws?${params.toString()}`;
2125
2355
  }
2356
+ buildHeaders() {
2357
+ const headers = {
2358
+ "Content-Type": "application/json",
2359
+ "User-Agent": "Patter/1.0"
2360
+ };
2361
+ if (!this.options.useQueryToken) {
2362
+ headers.Authorization = this.apiKey;
2363
+ }
2364
+ return headers;
2365
+ }
2126
2366
  async connect() {
2367
+ this.closing = false;
2127
2368
  const url = this.buildUrl();
2128
- this.ws = new WebSocket5(url, {
2129
- headers: {
2130
- Authorization: this.apiKey,
2131
- "Content-Type": "application/json",
2132
- "User-Agent": "Patter/1.0 (integration=LiveKit-port)"
2133
- }
2134
- });
2369
+ this.ws = new WebSocket4(url, { headers: this.buildHeaders() });
2370
+ await this.awaitOpen(this.ws);
2371
+ this.attachHandlers(this.ws);
2372
+ }
2373
+ async awaitOpen(ws) {
2135
2374
  await new Promise((resolve, reject) => {
2136
2375
  const timer = setTimeout(
2137
2376
  () => reject(new Error("AssemblyAI connect timeout")),
2138
2377
  CONNECT_TIMEOUT_MS2
2139
2378
  );
2140
- this.ws.once("open", () => {
2379
+ ws.once("open", () => {
2141
2380
  clearTimeout(timer);
2142
2381
  resolve();
2143
2382
  });
2144
- this.ws.once("error", (err) => {
2383
+ ws.once("error", (err) => {
2145
2384
  clearTimeout(timer);
2146
2385
  reject(err);
2147
2386
  });
2148
2387
  });
2149
- this.ws.on("message", (raw) => {
2388
+ }
2389
+ attachHandlers(ws) {
2390
+ ws.on("message", (raw) => {
2150
2391
  let event;
2151
2392
  try {
2152
2393
  event = JSON.parse(raw.toString());
@@ -2155,12 +2396,45 @@ var AssemblyAISTT = class _AssemblyAISTT {
2155
2396
  }
2156
2397
  this.handleEvent(event);
2157
2398
  });
2399
+ ws.on("close", (code) => {
2400
+ if (!this.closing && RECONNECT_ERROR_CODES.has(code) && this.reconnectAttempts < 1) {
2401
+ this.reconnectAttempts += 1;
2402
+ getLogger().warn(
2403
+ `AssemblyAISTT: close code ${code} \u2014 attempting single reconnect.`
2404
+ );
2405
+ this.reconnect().catch((err) => {
2406
+ getLogger().error("AssemblyAISTT reconnect failed", err);
2407
+ });
2408
+ }
2409
+ });
2410
+ }
2411
+ async reconnect() {
2412
+ const url = this.buildUrl();
2413
+ this.ws = new WebSocket4(url, { headers: this.buildHeaders() });
2414
+ await this.awaitOpen(this.ws);
2415
+ this.attachHandlers(this.ws);
2158
2416
  }
2159
2417
  handleEvent(event) {
2160
2418
  const type = event.type;
2161
2419
  if (type === "Begin") {
2162
- this.sessionId = event.id ?? "";
2163
- this.expiresAt = event.expires_at ?? 0;
2420
+ this.sessionId = event.id ?? null;
2421
+ this.expiresAt = event.expires_at ?? null;
2422
+ return;
2423
+ }
2424
+ if (type === "Termination") {
2425
+ if (this.terminationResolve) {
2426
+ this.terminationResolve();
2427
+ this.terminationResolve = null;
2428
+ }
2429
+ return;
2430
+ }
2431
+ if (type === "SpeechStarted") {
2432
+ this.emit({
2433
+ text: "",
2434
+ isFinal: false,
2435
+ confidence: 0,
2436
+ eventType: "SpeechStarted"
2437
+ });
2164
2438
  return;
2165
2439
  }
2166
2440
  if (type !== "Turn") {
@@ -2195,28 +2469,89 @@ var AssemblyAISTT = class _AssemblyAISTT {
2195
2469
  }
2196
2470
  }
2197
2471
  sendAudio(audio) {
2198
- if (!this.ws || this.ws.readyState !== WebSocket5.OPEN) return;
2472
+ if (!this.ws || this.ws.readyState !== WebSocket4.OPEN) {
2473
+ throw new AssemblyAISTTNotConnectedError(
2474
+ "AssemblyAISTT.sendAudio: WebSocket is not open"
2475
+ );
2476
+ }
2477
+ const durationMs = this.estimateChunkDurationMs(audio.length);
2478
+ if (durationMs !== null && (durationMs < MIN_CHUNK_DURATION_MS || durationMs > MAX_CHUNK_DURATION_MS)) {
2479
+ getLogger().warn(
2480
+ `AssemblyAISTT: audio chunk duration ${durationMs.toFixed(1)}ms outside 50-1000ms bounds (may trigger error 3007).`
2481
+ );
2482
+ }
2199
2483
  this.ws.send(audio);
2200
2484
  }
2201
- onTranscript(callback) {
2202
- if (this.callbacks.length >= MAX_CALLBACKS2) {
2203
- getLogger().warn(
2204
- "AssemblyAISTT: maximum of 10 onTranscript callbacks reached; replacing the last callback."
2485
+ estimateChunkDurationMs(byteLength) {
2486
+ if (byteLength <= 0) return null;
2487
+ const sampleRate = this.options.sampleRate ?? 16e3;
2488
+ if (sampleRate <= 0) return null;
2489
+ const bytesPerSample = (this.options.encoding ?? "pcm_s16le") === "pcm_s16le" ? 2 : 1;
2490
+ const samples = byteLength / bytesPerSample;
2491
+ return samples / sampleRate * 1e3;
2492
+ }
2493
+ /**
2494
+ * Send an `UpdateConfiguration` frame to change settings mid-stream.
2495
+ * Only defined fields are included.
2496
+ */
2497
+ updateConfiguration(params) {
2498
+ if (!this.ws || this.ws.readyState !== WebSocket4.OPEN) {
2499
+ throw new AssemblyAISTTNotConnectedError(
2500
+ "AssemblyAISTT.updateConfiguration: WebSocket is not open"
2205
2501
  );
2206
- this.callbacks[this.callbacks.length - 1] = callback;
2207
- return;
2208
2502
  }
2209
- this.callbacks.push(callback);
2503
+ const payload = { type: "UpdateConfiguration" };
2504
+ if (params.keytermsPrompt !== void 0) {
2505
+ payload.keyterms_prompt = JSON.stringify(params.keytermsPrompt);
2506
+ }
2507
+ if (params.prompt !== void 0) {
2508
+ payload.prompt = params.prompt;
2509
+ }
2510
+ if (params.minTurnSilence !== void 0) {
2511
+ payload.min_turn_silence = params.minTurnSilence;
2512
+ }
2513
+ if (params.maxTurnSilence !== void 0) {
2514
+ payload.max_turn_silence = params.maxTurnSilence;
2515
+ }
2516
+ this.ws.send(JSON.stringify(payload));
2210
2517
  }
2211
- close() {
2212
- if (this.ws) {
2213
- try {
2214
- this.ws.send(JSON.stringify({ type: "Terminate" }));
2215
- } catch {
2216
- }
2518
+ /** Force the server to finalize the current turn (for barge-in). */
2519
+ forceEndpoint() {
2520
+ if (!this.ws || this.ws.readyState !== WebSocket4.OPEN) {
2521
+ throw new AssemblyAISTTNotConnectedError(
2522
+ "AssemblyAISTT.forceEndpoint: WebSocket is not open"
2523
+ );
2524
+ }
2525
+ this.ws.send(JSON.stringify({ type: "ForceEndpoint" }));
2526
+ }
2527
+ onTranscript(callback) {
2528
+ this.callbacks.add(callback);
2529
+ return () => {
2530
+ this.callbacks.delete(callback);
2531
+ };
2532
+ }
2533
+ async close() {
2534
+ this.closing = true;
2535
+ if (!this.ws) return;
2536
+ try {
2537
+ this.ws.send(JSON.stringify({ type: "Terminate" }));
2538
+ } catch {
2539
+ }
2540
+ await new Promise((resolve) => {
2541
+ const timer = setTimeout(() => {
2542
+ this.terminationResolve = null;
2543
+ resolve();
2544
+ }, TERMINATION_WAIT_TIMEOUT_MS);
2545
+ this.terminationResolve = () => {
2546
+ clearTimeout(timer);
2547
+ resolve();
2548
+ };
2549
+ });
2550
+ try {
2217
2551
  this.ws.close();
2218
- this.ws = null;
2552
+ } catch {
2219
2553
  }
2554
+ this.ws = null;
2220
2555
  }
2221
2556
  };
2222
2557
  function averageConfidence(words) {
@@ -2229,7 +2564,8 @@ function averageConfidence(words) {
2229
2564
  }
2230
2565
 
2231
2566
  // src/stt/assemblyai.ts
2232
- var STT5 = class extends AssemblyAISTT {
2567
+ var STT6 = class extends AssemblyAISTT {
2568
+ static providerKey = "assemblyai";
2233
2569
  constructor(opts = {}) {
2234
2570
  const key = opts.apiKey ?? process.env.ASSEMBLYAI_API_KEY;
2235
2571
  if (!key) {
@@ -2300,14 +2636,78 @@ function resolveVoiceId(voice) {
2300
2636
  if (VOICE_ID_PATTERN.test(voice)) return voice;
2301
2637
  return ELEVENLABS_VOICE_ID_BY_NAME[voice.toLowerCase()] ?? voice;
2302
2638
  }
2303
- var ElevenLabsTTS = class {
2304
- constructor(apiKey, voiceId = "EXAVITQu4vr4xnSDxMaL", modelId = "eleven_turbo_v2_5", outputFormat = "pcm_16000") {
2639
+ var ElevenLabsTTS = class _ElevenLabsTTS {
2640
+ apiKey;
2641
+ voiceId;
2642
+ modelId;
2643
+ outputFormat;
2644
+ voiceSettings;
2645
+ languageCode;
2646
+ chunkSize;
2647
+ constructor(apiKey, voiceIdOrOptions = "21m00Tcm4TlvDq8ikWAM", modelId = "eleven_flash_v2_5", outputFormat = "pcm_16000") {
2305
2648
  this.apiKey = apiKey;
2306
- this.modelId = modelId;
2307
- this.outputFormat = outputFormat;
2308
- this.voiceId = resolveVoiceId(voiceId);
2649
+ if (typeof voiceIdOrOptions === "object") {
2650
+ const o = voiceIdOrOptions;
2651
+ this.voiceId = resolveVoiceId(o.voiceId ?? "21m00Tcm4TlvDq8ikWAM");
2652
+ this.modelId = o.modelId ?? "eleven_flash_v2_5";
2653
+ this.outputFormat = o.outputFormat ?? "pcm_16000";
2654
+ this.voiceSettings = o.voiceSettings;
2655
+ this.languageCode = o.languageCode;
2656
+ this.chunkSize = o.chunkSize ?? 4096;
2657
+ } else {
2658
+ this.voiceId = resolveVoiceId(voiceIdOrOptions);
2659
+ this.modelId = modelId;
2660
+ this.outputFormat = outputFormat;
2661
+ this.voiceSettings = void 0;
2662
+ this.languageCode = void 0;
2663
+ this.chunkSize = 4096;
2664
+ }
2665
+ }
2666
+ /**
2667
+ * Construct an instance pre-configured for Twilio Media Streams.
2668
+ *
2669
+ * Sets `outputFormat='ulaw_8000'` so ElevenLabs emits μ-law @ 8 kHz
2670
+ * directly — the exact wire format Twilio's media stream uses — letting
2671
+ * the SDK skip the 16 kHz→8 kHz resample and PCM→μ-law conversion in
2672
+ * `TwilioAudioSender`. Saves ~30–80 ms first-byte and per-frame CPU,
2673
+ * and removes a potential aliasing source.
2674
+ *
2675
+ * `voiceSettings` defaults to a low-bandwidth-friendly profile
2676
+ * (speaker boost off, modest stability) which sounds cleaner at 8 kHz
2677
+ * μ-law than the studio default. Pass an explicit object to override.
2678
+ */
2679
+ static forTwilio(apiKey, options = {}) {
2680
+ const voiceSettings = options.voiceSettings ?? {
2681
+ // Speaker boost adds high-frequency emphasis that aliases ugly over an
2682
+ // 8 kHz μ-law line. Slightly higher stability tames the excursions
2683
+ // that compander quantization noise can amplify.
2684
+ stability: 0.6,
2685
+ similarity_boost: 0.75,
2686
+ use_speaker_boost: false
2687
+ };
2688
+ return new _ElevenLabsTTS(apiKey, {
2689
+ ...options,
2690
+ voiceSettings,
2691
+ outputFormat: "ulaw_8000"
2692
+ });
2693
+ }
2694
+ /**
2695
+ * Construct an instance pre-configured for Telnyx bidirectional media.
2696
+ *
2697
+ * Telnyx's default media-streaming codec is L16 PCM @ 16 kHz, which
2698
+ * matches our default Telnyx handler. We pick `pcm_16000` so the audio
2699
+ * flows end-to-end with zero resampling or transcoding.
2700
+ *
2701
+ * Trade-off: if your Telnyx profile is pinned to PCMU/8000 (μ-law),
2702
+ * construct `ElevenLabsTTS` directly with `outputFormat: 'ulaw_8000'`
2703
+ * — Telnyx supports that natively too.
2704
+ */
2705
+ static forTelnyx(apiKey, options = {}) {
2706
+ return new _ElevenLabsTTS(apiKey, {
2707
+ ...options,
2708
+ outputFormat: "pcm_16000"
2709
+ });
2309
2710
  }
2310
- voiceId;
2311
2711
  /**
2312
2712
  * Synthesise text to speech and return the full audio as a single Buffer.
2313
2713
  *
@@ -2324,22 +2724,29 @@ var ElevenLabsTTS = class {
2324
2724
  * Synthesise text and yield audio chunks as they arrive (streaming).
2325
2725
  *
2326
2726
  * The yielded buffers are raw PCM at 16 kHz (or whatever `outputFormat` is
2327
- * configured to).
2727
+ * configured to). `chunkSize` controls the maximum yield size — 512 is a
2728
+ * good choice for low-latency telephony.
2328
2729
  */
2329
2730
  async *synthesizeStream(text) {
2330
2731
  const url = `${ELEVENLABS_BASE_URL}/text-to-speech/${encodeURIComponent(this.voiceId)}/stream?output_format=${encodeURIComponent(this.outputFormat)}`;
2732
+ const body = {
2733
+ text,
2734
+ model_id: this.modelId
2735
+ };
2736
+ if (this.voiceSettings) body["voice_settings"] = this.voiceSettings;
2737
+ if (this.languageCode) body["language_code"] = this.languageCode;
2331
2738
  const response = await fetch(url, {
2332
2739
  method: "POST",
2333
2740
  headers: {
2334
2741
  "xi-api-key": this.apiKey,
2335
2742
  "Content-Type": "application/json"
2336
2743
  },
2337
- body: JSON.stringify({ text, model_id: this.modelId }),
2744
+ body: JSON.stringify(body),
2338
2745
  signal: AbortSignal.timeout(3e4)
2339
2746
  });
2340
2747
  if (!response.ok) {
2341
- const body = await response.text();
2342
- throw new Error(`ElevenLabs TTS error ${response.status}: ${body}`);
2748
+ const errBody = await response.text();
2749
+ throw new Error(`ElevenLabs TTS error ${response.status}: ${errBody}`);
2343
2750
  }
2344
2751
  if (!response.body) {
2345
2752
  throw new Error("ElevenLabs TTS: no response body");
@@ -2349,8 +2756,10 @@ var ElevenLabsTTS = class {
2349
2756
  while (true) {
2350
2757
  const { done, value } = await reader.read();
2351
2758
  if (done) break;
2352
- if (value && value.length > 0) {
2353
- yield Buffer.from(value);
2759
+ if (!value || value.length === 0) continue;
2760
+ const buf = Buffer.from(value);
2761
+ for (let offset = 0; offset < buf.length; offset += this.chunkSize) {
2762
+ yield buf.subarray(offset, Math.min(offset + this.chunkSize, buf.length));
2354
2763
  }
2355
2764
  }
2356
2765
  } finally {
@@ -2362,30 +2771,50 @@ var ElevenLabsTTS = class {
2362
2771
  };
2363
2772
 
2364
2773
  // src/tts/elevenlabs.ts
2365
- var TTS = class extends ElevenLabsTTS {
2366
- constructor(opts = {}) {
2367
- const key = opts.apiKey ?? process.env.ELEVENLABS_API_KEY;
2368
- if (!key) {
2369
- throw new Error(
2370
- "ElevenLabs TTS requires an apiKey. Pass { apiKey: '...' } or set ELEVENLABS_API_KEY in the environment."
2371
- );
2372
- }
2774
+ function resolveApiKey(apiKey) {
2775
+ const key = apiKey ?? process.env.ELEVENLABS_API_KEY;
2776
+ if (!key) {
2777
+ throw new Error(
2778
+ "ElevenLabs TTS requires an apiKey. Pass { apiKey: '...' } or set ELEVENLABS_API_KEY in the environment."
2779
+ );
2780
+ }
2781
+ return key;
2782
+ }
2783
+ var TTS = class _TTS extends ElevenLabsTTS {
2784
+ static providerKey = "elevenlabs";
2785
+ constructor(opts = {}) {
2373
2786
  super(
2374
- key,
2787
+ resolveApiKey(opts.apiKey),
2375
2788
  opts.voiceId ?? "EXAVITQu4vr4xnSDxMaL",
2376
- opts.modelId ?? "eleven_turbo_v2_5",
2789
+ opts.modelId ?? "eleven_flash_v2_5",
2377
2790
  opts.outputFormat ?? "pcm_16000"
2378
2791
  );
2379
2792
  }
2793
+ static forTwilio(arg1, arg2) {
2794
+ const opts = typeof arg1 === "string" ? { apiKey: arg1, ...arg2 ?? {} } : arg1 ?? {};
2795
+ return new _TTS({ ...opts, outputFormat: "ulaw_8000" });
2796
+ }
2797
+ static forTelnyx(arg1, arg2) {
2798
+ const opts = typeof arg1 === "string" ? { apiKey: arg1, ...arg2 ?? {} } : arg1 ?? {};
2799
+ return new _TTS({ ...opts, outputFormat: "pcm_16000" });
2800
+ }
2380
2801
  };
2381
2802
 
2382
2803
  // src/providers/openai-tts.ts
2383
2804
  var OPENAI_TTS_URL = "https://api.openai.com/v1/audio/speech";
2805
+ var INSTRUCTIONS_PREFIX = "gpt-4o-mini-tts";
2806
+ var LPF_ALPHA = 0.78;
2384
2807
  var OpenAITTS = class _OpenAITTS {
2385
- constructor(apiKey, voice = "alloy", model = "tts-1") {
2808
+ constructor(apiKey, voice = "alloy", model = "gpt-4o-mini-tts", instructions = null, speed = null, antiAlias = true) {
2386
2809
  this.apiKey = apiKey;
2387
2810
  this.voice = voice;
2388
2811
  this.model = model;
2812
+ this.instructions = instructions;
2813
+ this.speed = speed;
2814
+ this.antiAlias = antiAlias;
2815
+ if (speed !== null && speed !== void 0 && (speed < 0.25 || speed > 4)) {
2816
+ throw new Error("OpenAITTS: speed must be in [0.25, 4.0]");
2817
+ }
2389
2818
  }
2390
2819
  /**
2391
2820
  * Synthesise text to speech and return the full audio as a single Buffer.
@@ -2402,37 +2831,48 @@ var OpenAITTS = class _OpenAITTS {
2402
2831
  /**
2403
2832
  * Synthesise text and yield audio chunks as they arrive (streaming).
2404
2833
  *
2405
- * OpenAI returns 24 kHz PCM16; each chunk is resampled to 16 kHz before
2406
- * yielding so the output is ready for telephony pipelines.
2834
+ * OpenAI returns 24 kHz PCM16; each chunk is lowpass-filtered then
2835
+ * decimated 3:2 to 16 kHz before yielding so the output is ready for
2836
+ * telephony pipelines.
2407
2837
  *
2408
- * The resampler carries state (buffered samples + odd trailing byte)
2409
- * between chunks without that state cross-chunk sample alignment drifts
2410
- * and the caller hears pops / dropped audio (BUG #23, mirror of the
2411
- * Python `audioop.ratecv` fix).
2838
+ * The resampler carries state (filter memory + buffered samples + odd
2839
+ * trailing byte) between chunks so cross-chunk sample alignment and
2840
+ * filter phase don't reset on every network read.
2412
2841
  */
2413
2842
  async *synthesizeStream(text) {
2843
+ const body = {
2844
+ model: this.model,
2845
+ input: text,
2846
+ voice: this.voice,
2847
+ response_format: "pcm"
2848
+ };
2849
+ if (this.instructions !== null && this.model.startsWith(INSTRUCTIONS_PREFIX)) {
2850
+ body.instructions = this.instructions;
2851
+ }
2852
+ if (this.speed !== null) {
2853
+ body.speed = this.speed;
2854
+ }
2414
2855
  const response = await fetch(OPENAI_TTS_URL, {
2415
2856
  method: "POST",
2416
2857
  headers: {
2417
2858
  "Authorization": `Bearer ${this.apiKey}`,
2418
2859
  "Content-Type": "application/json"
2419
2860
  },
2420
- body: JSON.stringify({
2421
- model: this.model,
2422
- input: text,
2423
- voice: this.voice,
2424
- response_format: "pcm"
2425
- }),
2426
- signal: AbortSignal.timeout(3e4)
2861
+ body: JSON.stringify(body)
2427
2862
  });
2428
2863
  if (!response.ok) {
2429
- const body = await response.text();
2430
- throw new Error(`OpenAI TTS error ${response.status}: ${body}`);
2864
+ const errBody = await response.text();
2865
+ throw new Error(`OpenAI TTS error ${response.status}: ${errBody}`);
2431
2866
  }
2432
2867
  if (!response.body) {
2433
2868
  throw new Error("OpenAI TTS: no response body");
2434
2869
  }
2435
- const ctx = { carryByte: null, leftover: [] };
2870
+ const ctx = {
2871
+ carryByte: null,
2872
+ leftover: [],
2873
+ lpfPrev: 0,
2874
+ lpfEnabled: this.antiAlias
2875
+ };
2436
2876
  const reader = response.body.getReader();
2437
2877
  try {
2438
2878
  while (true) {
@@ -2457,8 +2897,14 @@ var OpenAITTS = class _OpenAITTS {
2457
2897
  }
2458
2898
  }
2459
2899
  /**
2460
- * Streaming 24 kHz → 16 kHz resampler (PCM16-LE). Maintains cross-chunk
2461
- * state so the 3:2 pattern doesn't reset at every network read.
2900
+ * Streaming 24 kHz → 16 kHz resampler (PCM16-LE). Applies a single-pole
2901
+ * lowpass ahead of the 3:2 decimation and carries filter + sample state
2902
+ * across chunks so the cadence doesn't reset at every network read.
2903
+ *
2904
+ * ``ctx.lpfEnabled`` (default true on the streaming path, false for the
2905
+ * legacy static helper) controls whether the LPF is engaged — we keep
2906
+ * the helper bit-exact for the downsample-only tests while the real
2907
+ * streaming path gets anti-alias filtering.
2462
2908
  */
2463
2909
  static resampleStreaming(audio, ctx) {
2464
2910
  let buf;
@@ -2477,14 +2923,26 @@ var OpenAITTS = class _OpenAITTS {
2477
2923
  }
2478
2924
  const sampleCount = buf.length / 2;
2479
2925
  const samples = ctx.leftover.slice();
2926
+ const lpf = ctx.lpfEnabled !== false;
2927
+ let y = ctx.lpfPrev;
2480
2928
  for (let i2 = 0; i2 < sampleCount; i2++) {
2481
- samples.push(buf.readInt16LE(i2 * 2));
2929
+ const x = buf.readInt16LE(i2 * 2);
2930
+ if (lpf) {
2931
+ y = LPF_ALPHA * x + (1 - LPF_ALPHA) * y;
2932
+ let s = Math.round(y);
2933
+ if (s > 32767) s = 32767;
2934
+ else if (s < -32768) s = -32768;
2935
+ samples.push(s);
2936
+ } else {
2937
+ samples.push(x);
2938
+ }
2482
2939
  }
2940
+ if (lpf) ctx.lpfPrev = y;
2483
2941
  const out = [];
2484
2942
  let i = 0;
2485
2943
  while (i + 2 < samples.length) {
2486
2944
  out.push(samples[i]);
2487
- out.push(Math.trunc((samples[i + 1] + samples[i + 2]) / 2));
2945
+ out.push(Math.round((samples[i + 1] + samples[i + 2]) / 2));
2488
2946
  i += 3;
2489
2947
  }
2490
2948
  ctx.leftover = samples.slice(i);
@@ -2496,7 +2954,7 @@ var OpenAITTS = class _OpenAITTS {
2496
2954
  }
2497
2955
  /** @deprecated use {@link resampleStreaming} with persistent state. */
2498
2956
  static resample24kTo16k(audio) {
2499
- const ctx = { carryByte: null, leftover: [] };
2957
+ const ctx = { carryByte: null, leftover: [], lpfPrev: 0, lpfEnabled: false };
2500
2958
  const out = _OpenAITTS.resampleStreaming(audio, ctx);
2501
2959
  if (ctx.leftover.length === 0) return out;
2502
2960
  const tail = Buffer.alloc(ctx.leftover.length * 2);
@@ -2509,6 +2967,7 @@ var OpenAITTS = class _OpenAITTS {
2509
2967
 
2510
2968
  // src/tts/openai.ts
2511
2969
  var TTS2 = class extends OpenAITTS {
2970
+ static providerKey = "openai_tts";
2512
2971
  constructor(opts = {}) {
2513
2972
  const key = opts.apiKey ?? process.env.OPENAI_API_KEY;
2514
2973
  if (!key) {
@@ -2516,15 +2975,22 @@ var TTS2 = class extends OpenAITTS {
2516
2975
  "OpenAI TTS requires an apiKey. Pass { apiKey: 'sk-...' } or set OPENAI_API_KEY in the environment."
2517
2976
  );
2518
2977
  }
2519
- super(key, opts.voice ?? "alloy", opts.model ?? "tts-1");
2978
+ super(
2979
+ key,
2980
+ opts.voice ?? "alloy",
2981
+ opts.model ?? "gpt-4o-mini-tts",
2982
+ opts.instructions ?? null,
2983
+ opts.speed ?? null,
2984
+ opts.antiAlias ?? false
2985
+ );
2520
2986
  }
2521
2987
  };
2522
2988
 
2523
2989
  // src/providers/cartesia-tts.ts
2524
2990
  var CARTESIA_BASE_URL = "https://api.cartesia.ai";
2525
- var CARTESIA_API_VERSION = "2024-11-13";
2991
+ var CARTESIA_API_VERSION = "2025-04-16";
2526
2992
  var CARTESIA_DEFAULT_VOICE_ID = "f786b574-daa5-4673-aa0c-cbe3e8534c02";
2527
- var CartesiaTTS = class {
2993
+ var CartesiaTTS = class _CartesiaTTS {
2528
2994
  apiKey;
2529
2995
  model;
2530
2996
  voice;
@@ -2537,7 +3003,7 @@ var CartesiaTTS = class {
2537
3003
  apiVersion;
2538
3004
  constructor(apiKey, opts = {}) {
2539
3005
  this.apiKey = apiKey;
2540
- this.model = opts.model ?? "sonic-2";
3006
+ this.model = opts.model ?? "sonic-3";
2541
3007
  this.voice = opts.voice ?? CARTESIA_DEFAULT_VOICE_ID;
2542
3008
  this.language = opts.language ?? "en";
2543
3009
  this.sampleRate = opts.sampleRate ?? 16e3;
@@ -2547,6 +3013,29 @@ var CartesiaTTS = class {
2547
3013
  this.baseUrl = opts.baseUrl ?? CARTESIA_BASE_URL;
2548
3014
  this.apiVersion = opts.apiVersion ?? CARTESIA_API_VERSION;
2549
3015
  }
3016
+ /**
3017
+ * Construct an instance pre-configured for Twilio Media Streams.
3018
+ *
3019
+ * Sets `sampleRate=8000` so Cartesia emits PCM_S16LE @ 8 kHz directly.
3020
+ * Twilio's media stream uses μ-law @ 8 kHz so the SDK still does the
3021
+ * PCM → μ-law transcode client-side, but the 16 kHz → 8 kHz resample
3022
+ * step is skipped. Saves ~10–30 ms first-byte plus per-frame CPU and
3023
+ * removes a potential aliasing source.
3024
+ */
3025
+ static forTwilio(apiKey, options = {}) {
3026
+ return new _CartesiaTTS(apiKey, { ...options, sampleRate: 8e3 });
3027
+ }
3028
+ /**
3029
+ * Construct an instance pre-configured for Telnyx bidirectional media.
3030
+ *
3031
+ * Sets `sampleRate=16000` to match Telnyx's L16/16000 default codec —
3032
+ * audio flows end-to-end with zero resampling or transcoding. Same as
3033
+ * the bare-constructor default; exists for API symmetry with
3034
+ * {@link CartesiaTTS.forTwilio}.
3035
+ */
3036
+ static forTelnyx(apiKey, options = {}) {
3037
+ return new _CartesiaTTS(apiKey, { ...options, sampleRate: 16e3 });
3038
+ }
2550
3039
  /** Build the JSON payload for the Cartesia bytes endpoint. */
2551
3040
  buildPayload(text) {
2552
3041
  const payload = {
@@ -2619,18 +3108,31 @@ var CartesiaTTS = class {
2619
3108
  };
2620
3109
 
2621
3110
  // src/tts/cartesia.ts
2622
- var TTS3 = class extends CartesiaTTS {
3111
+ function resolveApiKey2(apiKey) {
3112
+ const key = apiKey ?? process.env.CARTESIA_API_KEY;
3113
+ if (!key) {
3114
+ throw new Error(
3115
+ "Cartesia TTS requires an apiKey. Pass { apiKey: '...' } or set CARTESIA_API_KEY in the environment."
3116
+ );
3117
+ }
3118
+ return key;
3119
+ }
3120
+ var TTS3 = class _TTS extends CartesiaTTS {
3121
+ static providerKey = "cartesia_tts";
2623
3122
  constructor(opts = {}) {
2624
- const key = opts.apiKey ?? process.env.CARTESIA_API_KEY;
2625
- if (!key) {
2626
- throw new Error(
2627
- "Cartesia TTS requires an apiKey. Pass { apiKey: '...' } or set CARTESIA_API_KEY in the environment."
2628
- );
2629
- }
3123
+ const key = resolveApiKey2(opts.apiKey);
2630
3124
  const { apiKey: _ignored, ...rest } = opts;
2631
3125
  void _ignored;
2632
3126
  super(key, rest);
2633
3127
  }
3128
+ static forTwilio(arg1, arg2) {
3129
+ const opts = typeof arg1 === "string" ? { apiKey: arg1, ...arg2 ?? {} } : arg1 ?? {};
3130
+ return new _TTS({ ...opts, sampleRate: 8e3 });
3131
+ }
3132
+ static forTelnyx(arg1, arg2) {
3133
+ const opts = typeof arg1 === "string" ? { apiKey: arg1, ...arg2 ?? {} } : arg1 ?? {};
3134
+ return new _TTS({ ...opts, sampleRate: 16e3 });
3135
+ }
2634
3136
  };
2635
3137
 
2636
3138
  // src/providers/rime-tts.ts
@@ -2762,6 +3264,7 @@ var RimeTTS = class {
2762
3264
 
2763
3265
  // src/tts/rime.ts
2764
3266
  var TTS4 = class extends RimeTTS {
3267
+ static providerKey = "rime";
2765
3268
  constructor(opts = {}) {
2766
3269
  const key = opts.apiKey ?? process.env.RIME_API_KEY;
2767
3270
  if (!key) {
@@ -2855,6 +3358,7 @@ var LMNTTTS = class {
2855
3358
 
2856
3359
  // src/tts/lmnt.ts
2857
3360
  var TTS5 = class extends LMNTTTS {
3361
+ static providerKey = "lmnt";
2858
3362
  constructor(opts = {}) {
2859
3363
  const key = opts.apiKey ?? process.env.LMNT_API_KEY;
2860
3364
  if (!key) {
@@ -2870,6 +3374,7 @@ var TTS5 = class extends LMNTTTS {
2870
3374
 
2871
3375
  // src/llm/openai.ts
2872
3376
  var LLM = class extends OpenAILLMProvider {
3377
+ static providerKey = "openai";
2873
3378
  constructor(opts = {}) {
2874
3379
  const key = opts.apiKey ?? process.env.OPENAI_API_KEY;
2875
3380
  if (!key) {
@@ -2877,15 +3382,27 @@ var LLM = class extends OpenAILLMProvider {
2877
3382
  "OpenAI LLM requires an apiKey. Pass { apiKey: 'sk-...' } or set OPENAI_API_KEY."
2878
3383
  );
2879
3384
  }
2880
- super(key, opts.model ?? "gpt-4o-mini");
3385
+ super(key, opts.model ?? "gpt-4o-mini", {
3386
+ temperature: opts.temperature,
3387
+ maxTokens: opts.maxTokens,
3388
+ responseFormat: opts.responseFormat,
3389
+ parallelToolCalls: opts.parallelToolCalls,
3390
+ toolChoice: opts.toolChoice,
3391
+ seed: opts.seed,
3392
+ topP: opts.topP,
3393
+ frequencyPenalty: opts.frequencyPenalty,
3394
+ presencePenalty: opts.presencePenalty,
3395
+ stop: opts.stop
3396
+ });
2881
3397
  }
2882
3398
  };
2883
3399
 
2884
3400
  // src/providers/anthropic-llm.ts
2885
3401
  var DEFAULT_ANTHROPIC_URL = "https://api.anthropic.com/v1/messages";
2886
3402
  var DEFAULT_ANTHROPIC_VERSION = "2023-06-01";
2887
- var DEFAULT_MODEL = "claude-3-5-sonnet-20241022";
3403
+ var DEFAULT_MODEL = "claude-haiku-4-5-20251001";
2888
3404
  var DEFAULT_MAX_TOKENS = 1024;
3405
+ var PROMPT_CACHING_BETA = "prompt-caching-2024-07-31";
2889
3406
  var AnthropicLLMProvider = class {
2890
3407
  apiKey;
2891
3408
  model;
@@ -2893,6 +3410,7 @@ var AnthropicLLMProvider = class {
2893
3410
  temperature;
2894
3411
  url;
2895
3412
  anthropicVersion;
3413
+ promptCaching;
2896
3414
  constructor(options) {
2897
3415
  if (!options.apiKey) {
2898
3416
  throw new Error(
@@ -2905,6 +3423,7 @@ var AnthropicLLMProvider = class {
2905
3423
  this.temperature = options.temperature;
2906
3424
  this.url = options.baseUrl ?? DEFAULT_ANTHROPIC_URL;
2907
3425
  this.anthropicVersion = options.anthropicVersion ?? DEFAULT_ANTHROPIC_VERSION;
3426
+ this.promptCaching = options.promptCaching ?? true;
2908
3427
  }
2909
3428
  async *stream(messages, tools) {
2910
3429
  const { system, messages: anthropicMessages } = toAnthropicMessages(messages);
@@ -2915,16 +3434,44 @@ var AnthropicLLMProvider = class {
2915
3434
  max_tokens: this.maxTokens,
2916
3435
  stream: true
2917
3436
  };
2918
- if (system) body.system = system;
2919
- if (anthropicTools && anthropicTools.length > 0) body.tools = anthropicTools;
3437
+ if (system) {
3438
+ if (this.promptCaching) {
3439
+ const block = {
3440
+ type: "text",
3441
+ text: system,
3442
+ cache_control: { type: "ephemeral" }
3443
+ };
3444
+ body.system = [block];
3445
+ } else {
3446
+ body.system = system;
3447
+ }
3448
+ }
3449
+ if (anthropicTools && anthropicTools.length > 0) {
3450
+ if (this.promptCaching) {
3451
+ const cachedTools = anthropicTools.map(
3452
+ (t) => ({ ...t })
3453
+ );
3454
+ cachedTools[cachedTools.length - 1] = {
3455
+ ...cachedTools[cachedTools.length - 1],
3456
+ cache_control: { type: "ephemeral" }
3457
+ };
3458
+ body.tools = cachedTools;
3459
+ } else {
3460
+ body.tools = anthropicTools;
3461
+ }
3462
+ }
2920
3463
  if (this.temperature !== void 0) body.temperature = this.temperature;
3464
+ const headers = {
3465
+ "Content-Type": "application/json",
3466
+ "x-api-key": this.apiKey,
3467
+ "anthropic-version": this.anthropicVersion
3468
+ };
3469
+ if (this.promptCaching) {
3470
+ headers["anthropic-beta"] = PROMPT_CACHING_BETA;
3471
+ }
2921
3472
  const response = await fetch(this.url, {
2922
3473
  method: "POST",
2923
- headers: {
2924
- "Content-Type": "application/json",
2925
- "x-api-key": this.apiKey,
2926
- "anthropic-version": this.anthropicVersion
2927
- },
3474
+ headers,
2928
3475
  body: JSON.stringify(body),
2929
3476
  signal: AbortSignal.timeout(3e4)
2930
3477
  });
@@ -3069,6 +3616,7 @@ function toAnthropicMessages(messages) {
3069
3616
 
3070
3617
  // src/llm/anthropic.ts
3071
3618
  var LLM2 = class extends AnthropicLLMProvider {
3619
+ static providerKey = "anthropic";
3072
3620
  constructor(opts = {}) {
3073
3621
  const key = opts.apiKey ?? process.env.ANTHROPIC_API_KEY;
3074
3622
  if (!key) {
@@ -3082,11 +3630,15 @@ var LLM2 = class extends AnthropicLLMProvider {
3082
3630
  maxTokens: opts.maxTokens,
3083
3631
  temperature: opts.temperature,
3084
3632
  baseUrl: opts.baseUrl,
3085
- anthropicVersion: opts.anthropicVersion
3633
+ anthropicVersion: opts.anthropicVersion,
3634
+ promptCaching: opts.promptCaching
3086
3635
  });
3087
3636
  }
3088
3637
  };
3089
3638
 
3639
+ // src/version.ts
3640
+ var VERSION = "0.5.3";
3641
+
3090
3642
  // src/providers/groq-llm.ts
3091
3643
  var GROQ_BASE_URL = "https://api.groq.com/openai/v1";
3092
3644
  var DEFAULT_MODEL2 = "llama-3.3-70b-versatile";
@@ -3094,6 +3646,16 @@ var GroqLLMProvider = class {
3094
3646
  apiKey;
3095
3647
  model;
3096
3648
  baseUrl;
3649
+ temperature;
3650
+ maxTokens;
3651
+ responseFormat;
3652
+ parallelToolCalls;
3653
+ toolChoice;
3654
+ seed;
3655
+ topP;
3656
+ frequencyPenalty;
3657
+ presencePenalty;
3658
+ stop;
3097
3659
  constructor(options) {
3098
3660
  if (!options.apiKey) {
3099
3661
  throw new Error(
@@ -3103,19 +3665,43 @@ var GroqLLMProvider = class {
3103
3665
  this.apiKey = options.apiKey;
3104
3666
  this.model = options.model ?? DEFAULT_MODEL2;
3105
3667
  this.baseUrl = options.baseUrl ?? GROQ_BASE_URL;
3668
+ this.temperature = options.temperature;
3669
+ this.maxTokens = options.maxTokens;
3670
+ this.responseFormat = options.responseFormat;
3671
+ this.parallelToolCalls = options.parallelToolCalls;
3672
+ this.toolChoice = options.toolChoice;
3673
+ this.seed = options.seed;
3674
+ this.topP = options.topP;
3675
+ this.frequencyPenalty = options.frequencyPenalty;
3676
+ this.presencePenalty = options.presencePenalty;
3677
+ this.stop = options.stop;
3106
3678
  }
3107
3679
  async *stream(messages, tools) {
3108
3680
  const body = {
3109
3681
  model: this.model,
3110
3682
  messages,
3111
- stream: true
3683
+ stream: true,
3684
+ stream_options: { include_usage: true }
3112
3685
  };
3686
+ if (this.temperature !== void 0) body.temperature = this.temperature;
3687
+ if (this.maxTokens !== void 0) {
3688
+ body.max_completion_tokens = this.maxTokens;
3689
+ }
3690
+ if (this.responseFormat !== void 0) body.response_format = this.responseFormat;
3691
+ if (this.parallelToolCalls !== void 0) body.parallel_tool_calls = this.parallelToolCalls;
3692
+ if (this.toolChoice !== void 0) body.tool_choice = this.toolChoice;
3693
+ if (this.seed !== void 0) body.seed = this.seed;
3694
+ if (this.topP !== void 0) body.top_p = this.topP;
3695
+ if (this.frequencyPenalty !== void 0) body.frequency_penalty = this.frequencyPenalty;
3696
+ if (this.presencePenalty !== void 0) body.presence_penalty = this.presencePenalty;
3697
+ if (this.stop !== void 0) body.stop = this.stop;
3113
3698
  if (tools) body.tools = tools;
3114
3699
  const response = await fetch(`${this.baseUrl}/chat/completions`, {
3115
3700
  method: "POST",
3116
3701
  headers: {
3117
3702
  "Content-Type": "application/json",
3118
- Authorization: `Bearer ${this.apiKey}`
3703
+ Authorization: `Bearer ${this.apiKey}`,
3704
+ "User-Agent": `getpatter/${VERSION}`
3119
3705
  },
3120
3706
  body: JSON.stringify(body),
3121
3707
  signal: AbortSignal.timeout(3e4)
@@ -3150,6 +3736,16 @@ async function* parseOpenAISseStream(response) {
3150
3736
  } catch {
3151
3737
  continue;
3152
3738
  }
3739
+ const usage = chunk.usage ?? chunk.x_groq?.usage;
3740
+ if (usage) {
3741
+ const cached = chunk.usage?.prompt_tokens_details?.cached_tokens ?? 0;
3742
+ yield {
3743
+ type: "usage",
3744
+ inputTokens: usage.prompt_tokens,
3745
+ outputTokens: usage.completion_tokens,
3746
+ cacheReadInputTokens: cached
3747
+ };
3748
+ }
3153
3749
  const delta = chunk.choices?.[0]?.delta;
3154
3750
  if (!delta) continue;
3155
3751
  if (delta.content) {
@@ -3172,6 +3768,7 @@ async function* parseOpenAISseStream(response) {
3172
3768
 
3173
3769
  // src/llm/groq.ts
3174
3770
  var LLM3 = class extends GroqLLMProvider {
3771
+ static providerKey = "groq";
3175
3772
  constructor(opts = {}) {
3176
3773
  const key = opts.apiKey ?? process.env.GROQ_API_KEY;
3177
3774
  if (!key) {
@@ -3182,19 +3779,40 @@ var LLM3 = class extends GroqLLMProvider {
3182
3779
  super({
3183
3780
  apiKey: key,
3184
3781
  model: opts.model,
3185
- baseUrl: opts.baseUrl
3782
+ baseUrl: opts.baseUrl,
3783
+ temperature: opts.temperature,
3784
+ maxTokens: opts.maxTokens,
3785
+ responseFormat: opts.responseFormat,
3786
+ parallelToolCalls: opts.parallelToolCalls,
3787
+ toolChoice: opts.toolChoice,
3788
+ seed: opts.seed,
3789
+ topP: opts.topP,
3790
+ frequencyPenalty: opts.frequencyPenalty,
3791
+ presencePenalty: opts.presencePenalty,
3792
+ stop: opts.stop
3186
3793
  });
3187
3794
  }
3188
3795
  };
3189
3796
 
3190
3797
  // src/providers/cerebras-llm.ts
3191
3798
  var CEREBRAS_BASE_URL = "https://api.cerebras.ai/v1";
3192
- var DEFAULT_MODEL3 = "llama3.1-8b";
3799
+ var DEFAULT_MODEL3 = "gpt-oss-120b";
3800
+ var RETRY_BACKOFF_BASE_MS = 500;
3193
3801
  var CerebrasLLMProvider = class {
3194
3802
  apiKey;
3195
3803
  model;
3196
3804
  baseUrl;
3197
3805
  gzipCompression;
3806
+ temperature;
3807
+ maxTokens;
3808
+ responseFormat;
3809
+ parallelToolCalls;
3810
+ toolChoice;
3811
+ seed;
3812
+ topP;
3813
+ frequencyPenalty;
3814
+ presencePenalty;
3815
+ stop;
3198
3816
  constructor(options) {
3199
3817
  if (!options.apiKey) {
3200
3818
  throw new Error(
@@ -3204,18 +3822,43 @@ var CerebrasLLMProvider = class {
3204
3822
  this.apiKey = options.apiKey;
3205
3823
  this.model = options.model ?? DEFAULT_MODEL3;
3206
3824
  this.baseUrl = options.baseUrl ?? CEREBRAS_BASE_URL;
3207
- this.gzipCompression = options.gzipCompression ?? false;
3825
+ this.gzipCompression = options.gzipCompression ?? true;
3826
+ this.temperature = options.temperature;
3827
+ this.maxTokens = options.maxTokens;
3828
+ this.responseFormat = options.responseFormat;
3829
+ this.parallelToolCalls = options.parallelToolCalls;
3830
+ this.toolChoice = options.toolChoice;
3831
+ this.seed = options.seed;
3832
+ this.topP = options.topP;
3833
+ this.frequencyPenalty = options.frequencyPenalty;
3834
+ this.presencePenalty = options.presencePenalty;
3835
+ this.stop = options.stop;
3208
3836
  }
3209
3837
  async *stream(messages, tools) {
3210
3838
  const body = {
3211
3839
  model: this.model,
3212
3840
  messages,
3213
- stream: true
3841
+ stream: true,
3842
+ stream_options: { include_usage: true }
3214
3843
  };
3844
+ if (this.temperature !== void 0) body.temperature = this.temperature;
3845
+ if (this.maxTokens !== void 0) {
3846
+ body.max_completion_tokens = this.maxTokens;
3847
+ }
3848
+ if (this.responseFormat !== void 0) body.response_format = this.responseFormat;
3849
+ if (this.parallelToolCalls !== void 0) body.parallel_tool_calls = this.parallelToolCalls;
3850
+ if (this.toolChoice !== void 0) body.tool_choice = this.toolChoice;
3851
+ if (this.seed !== void 0) body.seed = this.seed;
3852
+ if (this.topP !== void 0) body.top_p = this.topP;
3853
+ if (this.frequencyPenalty !== void 0) body.frequency_penalty = this.frequencyPenalty;
3854
+ if (this.presencePenalty !== void 0) body.presence_penalty = this.presencePenalty;
3855
+ if (this.stop !== void 0) body.stop = this.stop;
3215
3856
  if (tools) body.tools = tools;
3216
3857
  const headers = {
3217
3858
  "Content-Type": "application/json",
3218
- Authorization: `Bearer ${this.apiKey}`
3859
+ Authorization: `Bearer ${this.apiKey}`,
3860
+ // Identify the SDK in upstream logs/rate-limit attribution.
3861
+ "User-Agent": `getpatter/${VERSION}`
3219
3862
  };
3220
3863
  let payload = JSON.stringify(body);
3221
3864
  if (this.gzipCompression) {
@@ -3225,18 +3868,43 @@ var CerebrasLLMProvider = class {
3225
3868
  headers["Content-Encoding"] = "gzip";
3226
3869
  }
3227
3870
  }
3228
- const response = await fetch(`${this.baseUrl}/chat/completions`, {
3229
- method: "POST",
3230
- headers,
3231
- body: payload,
3232
- signal: AbortSignal.timeout(3e4)
3233
- });
3234
- if (!response.ok) {
3235
- const errText = await response.text();
3236
- getLogger().error(`Cerebras API error: ${response.status} ${errText}`);
3237
- return;
3871
+ const maxAttempts = 2;
3872
+ let lastErrText = "";
3873
+ let lastStatus = 0;
3874
+ for (let attempt = 0; attempt < maxAttempts; attempt++) {
3875
+ const response = await fetch(`${this.baseUrl}/chat/completions`, {
3876
+ method: "POST",
3877
+ headers,
3878
+ body: payload,
3879
+ signal: AbortSignal.timeout(3e4)
3880
+ });
3881
+ if (response.ok) {
3882
+ yield* parseOpenAISseStream(response);
3883
+ return;
3884
+ }
3885
+ lastStatus = response.status;
3886
+ lastErrText = await response.text().catch(() => "");
3887
+ const isRetriable = response.status === 429 || response.status >= 500;
3888
+ const isLastAttempt = attempt >= maxAttempts - 1;
3889
+ if (!isRetriable || isLastAttempt) {
3890
+ if (response.status === 404 && lastErrText.includes("model_not_found")) {
3891
+ getLogger().error(
3892
+ `Cerebras: model "${this.model}" not available on your tier. Override via \`new CerebrasLLM({ model: '<id>' })\` and list tier-available ids with \`GET ${this.baseUrl}/models\` (common: llama3.1-8b, qwen-3-235b-a22b-instruct-2507, llama-3.3-70b on paid). Raw response: ${lastErrText}`
3893
+ );
3894
+ } else {
3895
+ getLogger().error(`Cerebras API error: ${response.status} ${lastErrText}`);
3896
+ }
3897
+ return;
3898
+ }
3899
+ const advisoryMs = parseRateLimitResetMs(response.headers);
3900
+ const exponentialMs = RETRY_BACKOFF_BASE_MS * Math.pow(2, attempt);
3901
+ const delayMs = Math.max(advisoryMs, exponentialMs);
3902
+ getLogger().warn(
3903
+ `Cerebras API ${response.status} (attempt ${attempt + 1}/${maxAttempts}); retrying after ${delayMs}ms`
3904
+ );
3905
+ await new Promise((r) => setTimeout(r, delayMs));
3238
3906
  }
3239
- yield* parseOpenAISseStream(response);
3907
+ throw new PatterError(`Cerebras API error ${lastStatus}: ${lastErrText || "request failed"}`);
3240
3908
  }
3241
3909
  };
3242
3910
  async function gzipEncode(data) {
@@ -3263,9 +3931,28 @@ async function gzipEncode(data) {
3263
3931
  }
3264
3932
  return out;
3265
3933
  }
3934
+ function parseRateLimitResetMs(headers) {
3935
+ const candidates = [
3936
+ headers.get("x-ratelimit-reset-tokens-minute"),
3937
+ headers.get("x-ratelimit-reset-requests-minute"),
3938
+ // Some upstreams send the standard ``retry-after`` (seconds).
3939
+ headers.get("retry-after")
3940
+ ];
3941
+ let bestMs = 0;
3942
+ for (const raw of candidates) {
3943
+ if (!raw) continue;
3944
+ const parsed = Number.parseFloat(raw);
3945
+ if (Number.isFinite(parsed) && parsed > 0) {
3946
+ const ms = parsed * 1e3;
3947
+ if (ms > bestMs) bestMs = ms;
3948
+ }
3949
+ }
3950
+ return bestMs;
3951
+ }
3266
3952
 
3267
3953
  // src/llm/cerebras.ts
3268
3954
  var LLM4 = class extends CerebrasLLMProvider {
3955
+ static providerKey = "cerebras";
3269
3956
  constructor(opts = {}) {
3270
3957
  const key = opts.apiKey ?? process.env.CEREBRAS_API_KEY;
3271
3958
  if (!key) {
@@ -3277,7 +3964,17 @@ var LLM4 = class extends CerebrasLLMProvider {
3277
3964
  apiKey: key,
3278
3965
  model: opts.model,
3279
3966
  baseUrl: opts.baseUrl,
3280
- gzipCompression: opts.gzipCompression
3967
+ gzipCompression: opts.gzipCompression,
3968
+ temperature: opts.temperature,
3969
+ maxTokens: opts.maxTokens,
3970
+ responseFormat: opts.responseFormat,
3971
+ parallelToolCalls: opts.parallelToolCalls,
3972
+ toolChoice: opts.toolChoice,
3973
+ seed: opts.seed,
3974
+ topP: opts.topP,
3975
+ frequencyPenalty: opts.frequencyPenalty,
3976
+ presencePenalty: opts.presencePenalty,
3977
+ stop: opts.stop
3281
3978
  });
3282
3979
  }
3283
3980
  };
@@ -3333,6 +4030,7 @@ var GoogleLLMProvider = class {
3333
4030
  const decoder = new TextDecoder();
3334
4031
  let buffer = "";
3335
4032
  let nextIndex = 0;
4033
+ let lastUsage;
3336
4034
  while (true) {
3337
4035
  const { done, value } = await reader.read();
3338
4036
  if (done) break;
@@ -3350,6 +4048,9 @@ var GoogleLLMProvider = class {
3350
4048
  } catch {
3351
4049
  continue;
3352
4050
  }
4051
+ if (payload.usageMetadata) {
4052
+ lastUsage = payload.usageMetadata;
4053
+ }
3353
4054
  const candidate = payload.candidates?.[0];
3354
4055
  const parts = candidate?.content?.parts ?? [];
3355
4056
  for (const part of parts) {
@@ -3372,6 +4073,14 @@ var GoogleLLMProvider = class {
3372
4073
  }
3373
4074
  }
3374
4075
  }
4076
+ if (lastUsage) {
4077
+ yield {
4078
+ type: "usage",
4079
+ inputTokens: lastUsage.promptTokenCount,
4080
+ outputTokens: lastUsage.candidatesTokenCount,
4081
+ cacheReadInputTokens: lastUsage.cachedContentTokenCount ?? 0
4082
+ };
4083
+ }
3375
4084
  yield { type: "done" };
3376
4085
  }
3377
4086
  };
@@ -3461,6 +4170,7 @@ function toGeminiContents(messages) {
3461
4170
 
3462
4171
  // src/llm/google.ts
3463
4172
  var LLM5 = class extends GoogleLLMProvider {
4173
+ static providerKey = "google";
3464
4174
  constructor(opts = {}) {
3465
4175
  const key = opts.apiKey ?? process.env.GEMINI_API_KEY ?? process.env.GOOGLE_API_KEY;
3466
4176
  if (!key) {
@@ -3478,6 +4188,279 @@ var LLM5 = class extends GoogleLLMProvider {
3478
4188
  }
3479
4189
  };
3480
4190
 
4191
+ // src/providers/silero-vad.ts
4192
+ import { createRequire } from "module";
4193
+ import * as fs from "fs";
4194
+ import * as path from "path";
4195
+ import { fileURLToPath } from "url";
4196
+ var SUPPORTED_SAMPLE_RATES = [8e3, 16e3];
4197
+ function resolveModuleDir() {
4198
+ try {
4199
+ const cjsDir = new Function("return typeof __dirname !== 'undefined' ? __dirname : null")();
4200
+ if (typeof cjsDir === "string") return cjsDir;
4201
+ } catch {
4202
+ }
4203
+ try {
4204
+ const url = import.meta.url;
4205
+ if (url) return path.dirname(fileURLToPath(url));
4206
+ } catch {
4207
+ }
4208
+ return process.cwd();
4209
+ }
4210
+ var MODULE_DIR = resolveModuleDir();
4211
+ function resolveDefaultModelPath() {
4212
+ const candidates = [
4213
+ path.join(MODULE_DIR, "resources", "silero_vad.onnx"),
4214
+ path.join(MODULE_DIR, "..", "resources", "silero_vad.onnx")
4215
+ ];
4216
+ for (const c of candidates) if (fs.existsSync(c)) return c;
4217
+ return candidates[0];
4218
+ }
4219
+ var DEFAULT_MODEL_PATH = resolveDefaultModelPath();
4220
+ async function loadOnnxRuntime() {
4221
+ let firstErr;
4222
+ try {
4223
+ const mod = await import("./dist-YRCCJQ26.mjs");
4224
+ return mod;
4225
+ } catch (e) {
4226
+ firstErr = e;
4227
+ }
4228
+ try {
4229
+ const req = createRequire(path.join(process.cwd(), "package.json"));
4230
+ return req("onnxruntime-node");
4231
+ } catch (e) {
4232
+ const detail = e?.message ?? String(e);
4233
+ const original = firstErr?.message ?? String(firstErr);
4234
+ throw new Error(
4235
+ `
4236
+ SileroVAD requires the "onnxruntime-node" package, which could not be resolved.
4237
+
4238
+ Install: npm install onnxruntime-node
4239
+
4240
+ This is an optional peer dependency of getpatter (~210 MB) \u2014 it is only
4241
+ needed when you use SileroVAD in pipeline mode.
4242
+
4243
+ import() failed: ${original}
4244
+ cwd-require failed: ${detail}
4245
+ `
4246
+ );
4247
+ }
4248
+ }
4249
+ var ExpFilter = class {
4250
+ constructor(alpha) {
4251
+ this.alpha = alpha;
4252
+ if (!(alpha > 0 && alpha <= 1)) {
4253
+ throw new Error("alpha must be in (0, 1].");
4254
+ }
4255
+ }
4256
+ filtered = null;
4257
+ apply(exp, sample) {
4258
+ if (this.filtered === null) {
4259
+ this.filtered = sample;
4260
+ } else {
4261
+ const a = Math.pow(this.alpha, exp);
4262
+ this.filtered = a * this.filtered + (1 - a) * sample;
4263
+ }
4264
+ return this.filtered;
4265
+ }
4266
+ reset() {
4267
+ this.filtered = null;
4268
+ }
4269
+ };
4270
+ var OnnxModel = class {
4271
+ constructor(runtime, session, sampleRate) {
4272
+ this.runtime = runtime;
4273
+ this.session = session;
4274
+ if (!SUPPORTED_SAMPLE_RATES.includes(sampleRate)) {
4275
+ throw new Error("Silero VAD only supports 8KHz and 16KHz sample rates");
4276
+ }
4277
+ this.sampleRate = sampleRate;
4278
+ this.windowSizeSamples = sampleRate === 8e3 ? 256 : 512;
4279
+ this.contextSize = sampleRate === 8e3 ? 32 : 64;
4280
+ this.context = new Float32Array(this.contextSize);
4281
+ this.rnnState = new Float32Array(2 * 1 * 128);
4282
+ this.inputBuffer = new Float32Array(this.contextSize + this.windowSizeSamples);
4283
+ this.sampleRateTensor = BigInt64Array.from([BigInt(sampleRate)]);
4284
+ }
4285
+ sampleRate;
4286
+ windowSizeSamples;
4287
+ contextSize;
4288
+ context;
4289
+ rnnState;
4290
+ inputBuffer;
4291
+ sampleRateTensor;
4292
+ async run(window) {
4293
+ if (window.length !== this.windowSizeSamples) {
4294
+ throw new Error(
4295
+ `window must have exactly ${this.windowSizeSamples} samples, got ${window.length}`
4296
+ );
4297
+ }
4298
+ this.inputBuffer.set(this.context, 0);
4299
+ this.inputBuffer.set(window, this.contextSize);
4300
+ const { Tensor } = this.runtime;
4301
+ const feeds = {
4302
+ input: new Tensor("float32", this.inputBuffer, [1, this.inputBuffer.length]),
4303
+ state: new Tensor("float32", this.rnnState, [2, 1, 128]),
4304
+ sr: new Tensor("int64", this.sampleRateTensor, [])
4305
+ };
4306
+ const results = await this.session.run(feeds);
4307
+ const outputKey = Object.keys(results).find((k) => k !== "stateN") ?? "output";
4308
+ const stateKey = "stateN" in results ? "stateN" : Object.keys(results).find((k) => k !== outputKey);
4309
+ const out = results[outputKey];
4310
+ const newState = stateKey ? results[stateKey] : void 0;
4311
+ if (newState && newState.data instanceof Float32Array) {
4312
+ this.rnnState = Float32Array.from(newState.data);
4313
+ }
4314
+ this.context = this.inputBuffer.slice(-this.contextSize);
4315
+ const data = out.data;
4316
+ return data[0] ?? 0;
4317
+ }
4318
+ };
4319
+ var SileroVAD = class _SileroVAD {
4320
+ constructor(model, opts) {
4321
+ this.model = model;
4322
+ this.opts = opts;
4323
+ }
4324
+ pending = new Float32Array(0);
4325
+ expFilter = new ExpFilter(0.35);
4326
+ pubSpeaking = false;
4327
+ speechThresholdDuration = 0;
4328
+ silenceThresholdDuration = 0;
4329
+ closed = false;
4330
+ /**
4331
+ * Load the Silero VAD model. Defaults match the LiveKit Silero plugin.
4332
+ * Throws if `onnxruntime-node` is not installed.
4333
+ */
4334
+ static async load(options = {}) {
4335
+ const sampleRate = options.sampleRate ?? 16e3;
4336
+ if (!SUPPORTED_SAMPLE_RATES.includes(sampleRate)) {
4337
+ throw new Error("Silero VAD only supports 8KHz and 16KHz sample rates");
4338
+ }
4339
+ const activationThreshold = options.activationThreshold ?? 0.5;
4340
+ const deactivationThreshold = options.deactivationThreshold ?? Math.max(activationThreshold - 0.15, 0.01);
4341
+ if (deactivationThreshold <= 0) {
4342
+ throw new Error("deactivationThreshold must be greater than 0");
4343
+ }
4344
+ const runtime = await loadOnnxRuntime();
4345
+ const modelPath = options.onnxFilePath ?? DEFAULT_MODEL_PATH;
4346
+ const session = await runtime.InferenceSession.create(modelPath, {
4347
+ interOpNumThreads: 1,
4348
+ intraOpNumThreads: 1,
4349
+ executionMode: "sequential",
4350
+ executionProviders: options.forceCpu === false ? void 0 : ["cpu"]
4351
+ });
4352
+ const model = new OnnxModel(runtime, session, sampleRate);
4353
+ return new _SileroVAD(model, {
4354
+ minSpeechDuration: options.minSpeechDuration ?? 0.05,
4355
+ minSilenceDuration: options.minSilenceDuration ?? 0.55,
4356
+ prefixPaddingDuration: options.prefixPaddingDuration ?? 0.5,
4357
+ activationThreshold,
4358
+ deactivationThreshold,
4359
+ sampleRate
4360
+ });
4361
+ }
4362
+ /**
4363
+ * Internal factory used by tests — bypasses onnxruntime-node loading.
4364
+ * @internal
4365
+ */
4366
+ static fromOnnxModel(runtime, session, options) {
4367
+ const model = new OnnxModel(runtime, session, options.sampleRate);
4368
+ return new _SileroVAD(model, options);
4369
+ }
4370
+ get sampleRate() {
4371
+ return this.opts.sampleRate;
4372
+ }
4373
+ /**
4374
+ * Number of int16 PCM samples that must be provided per call to
4375
+ * processFrame for the model to run one inference window.
4376
+ *
4377
+ * Constraint (ported from LiveKit Agents / Silero ONNX spec):
4378
+ * - 16 000 Hz → 512 samples (32 ms)
4379
+ * - 8 000 Hz → 256 samples (32 ms)
4380
+ *
4381
+ * Callers that feed raw audio in fixed-size chunks (e.g. WebSocket frames)
4382
+ * should buffer incoming audio until at least numFramesRequired() int16
4383
+ * samples are available before calling processFrame. The provider
4384
+ * internally buffers partial windows so smaller chunks are also safe, but
4385
+ * passing exactly one window per call minimises heap allocation.
4386
+ */
4387
+ numFramesRequired() {
4388
+ return this.opts.sampleRate === 8e3 ? 256 : 512;
4389
+ }
4390
+ async processFrame(pcmChunk, sampleRate) {
4391
+ if (this.closed) {
4392
+ throw new Error("SileroVAD is closed");
4393
+ }
4394
+ if (sampleRate !== this.opts.sampleRate) {
4395
+ throw new Error(
4396
+ `input sampleRate ${sampleRate} does not match model sampleRate ${this.opts.sampleRate}; resampling is not implemented in the Patter port`
4397
+ );
4398
+ }
4399
+ if (pcmChunk.length === 0) {
4400
+ return null;
4401
+ }
4402
+ const numSamples = Math.floor(pcmChunk.length / 2);
4403
+ if (numSamples === 0) {
4404
+ return null;
4405
+ }
4406
+ const samples = new Float32Array(numSamples);
4407
+ for (let i = 0; i < numSamples; i++) {
4408
+ samples[i] = pcmChunk.readInt16LE(i * 2) / 32767;
4409
+ }
4410
+ const merged = new Float32Array(this.pending.length + samples.length);
4411
+ merged.set(this.pending, 0);
4412
+ merged.set(samples, this.pending.length);
4413
+ this.pending = merged;
4414
+ const windowSize = this.model.windowSizeSamples;
4415
+ let event = null;
4416
+ while (this.pending.length >= windowSize) {
4417
+ const window = this.pending.slice(0, windowSize);
4418
+ this.pending = this.pending.slice(windowSize);
4419
+ const rawP = await this.model.run(window);
4420
+ const p = this.expFilter.apply(1, rawP);
4421
+ const windowDuration = windowSize / this.opts.sampleRate;
4422
+ const transition = this.advanceState(p, windowDuration);
4423
+ if (transition !== null) {
4424
+ event = transition;
4425
+ }
4426
+ }
4427
+ return event;
4428
+ }
4429
+ advanceState(p, windowDuration) {
4430
+ const opts = this.opts;
4431
+ if (p >= opts.activationThreshold || this.pubSpeaking && p > opts.deactivationThreshold) {
4432
+ this.speechThresholdDuration += windowDuration;
4433
+ this.silenceThresholdDuration = 0;
4434
+ if (!this.pubSpeaking) {
4435
+ if (this.speechThresholdDuration >= opts.minSpeechDuration) {
4436
+ this.pubSpeaking = true;
4437
+ return {
4438
+ type: "speech_start",
4439
+ confidence: p,
4440
+ durationMs: this.speechThresholdDuration * 1e3
4441
+ };
4442
+ }
4443
+ }
4444
+ } else {
4445
+ this.silenceThresholdDuration += windowDuration;
4446
+ this.speechThresholdDuration = 0;
4447
+ if (this.pubSpeaking && this.silenceThresholdDuration >= opts.minSilenceDuration) {
4448
+ this.pubSpeaking = false;
4449
+ return {
4450
+ type: "speech_end",
4451
+ confidence: p,
4452
+ durationMs: this.silenceThresholdDuration * 1e3
4453
+ };
4454
+ }
4455
+ }
4456
+ return null;
4457
+ }
4458
+ async close() {
4459
+ if (this.closed) return;
4460
+ this.closed = true;
4461
+ }
4462
+ };
4463
+
3481
4464
  // src/carriers/twilio.ts
3482
4465
  var Carrier = class {
3483
4466
  kind = "twilio";
@@ -3826,7 +4809,7 @@ var DebouncedCall = class {
3826
4809
  this.timer = setTimeout(() => {
3827
4810
  this.timer = null;
3828
4811
  Promise.resolve(this.callback()).catch((err) => {
3829
- console.error("IVR silence callback raised:", err);
4812
+ getLogger().error("IVR silence callback raised:", err);
3830
4813
  });
3831
4814
  }, this.delayMs);
3832
4815
  }
@@ -3882,7 +4865,7 @@ var IVRActivity = class {
3882
4865
  try {
3883
4866
  await this.onLoopDetected();
3884
4867
  } catch (err) {
3885
- console.error("IVR onLoopDetected callback raised:", err);
4868
+ getLogger().error("IVR onLoopDetected callback raised:", err);
3886
4869
  }
3887
4870
  }
3888
4871
  }
@@ -3920,7 +4903,7 @@ var IVRActivity = class {
3920
4903
  try {
3921
4904
  await this.onSilence();
3922
4905
  } catch (err) {
3923
- console.error("IVR onSilence callback raised:", err);
4906
+ getLogger().error("IVR onSilence callback raised:", err);
3924
4907
  }
3925
4908
  }
3926
4909
  }
@@ -3970,9 +4953,9 @@ var IVRActivity = class {
3970
4953
  };
3971
4954
 
3972
4955
  // src/services/background-audio.ts
3973
- import { promises as fs } from "fs";
3974
- import path from "path";
3975
- import { fileURLToPath } from "url";
4956
+ import { promises as fs2 } from "fs";
4957
+ import path2 from "path";
4958
+ import { fileURLToPath as fileURLToPath2 } from "url";
3976
4959
  var BuiltinAudioClip = {
3977
4960
  CITY_AMBIENCE: "city-ambience.ogg",
3978
4961
  FOREST_AMBIENCE: "forest-ambience.ogg",
@@ -3984,8 +4967,8 @@ var BuiltinAudioClip = {
3984
4967
  };
3985
4968
  function builtinClipPath(clip) {
3986
4969
  const meta = typeof import.meta !== "undefined" ? import.meta : void 0;
3987
- const here = meta?.url ? path.dirname(fileURLToPath(meta.url)) : typeof __dirname !== "undefined" ? __dirname : process.cwd();
3988
- return path.resolve(here, "..", "resources", "audio", clip);
4970
+ const here = meta?.url ? path2.dirname(fileURLToPath2(meta.url)) : typeof __dirname !== "undefined" ? __dirname : process.cwd();
4971
+ return path2.resolve(here, "..", "resources", "audio", clip);
3989
4972
  }
3990
4973
  var INT16_MIN = -32768;
3991
4974
  var INT16_MAX = 32767;
@@ -4154,7 +5137,7 @@ var BackgroundAudioPlayer = class {
4154
5137
  return source.decode(source.path);
4155
5138
  case "builtin": {
4156
5139
  const p = builtinClipPath(source.clip);
4157
- const header = await fs.readFile(p, { flag: "r" }).then((buf) => buf.subarray(0, 4));
5140
+ const header = await fs2.readFile(p, { flag: "r" }).then((buf) => buf.subarray(0, 4));
4158
5141
  if (header.toString("ascii") !== "OggS") {
4159
5142
  throw new Error(`Bundled clip ${source.clip} is not a valid Ogg file`);
4160
5143
  }
@@ -4184,15 +5167,264 @@ var BackgroundAudioPlayer = class {
4184
5167
  function isAudioConfig(value) {
4185
5168
  return typeof value === "object" && value !== null && "source" in value && typeof value.source === "object";
4186
5169
  }
5170
+
5171
+ // src/providers/twilio-adapter.ts
5172
+ var TWILIO_API_BASE = "https://api.twilio.com/2010-04-01";
5173
+ var TwilioAdapter = class _TwilioAdapter {
5174
+ accountSid;
5175
+ region;
5176
+ baseUrl;
5177
+ authHeader;
5178
+ constructor(accountSid, authToken, opts = {}) {
5179
+ if (!accountSid) throw new Error("TwilioAdapter: accountSid is required");
5180
+ if (!authToken) throw new Error("TwilioAdapter: authToken is required");
5181
+ this.accountSid = accountSid;
5182
+ this.region = opts.region;
5183
+ this.baseUrl = opts.region ? `https://api.${opts.region}.twilio.com/2010-04-01` : TWILIO_API_BASE;
5184
+ this.authHeader = `Basic ${Buffer.from(`${accountSid}:${authToken}`).toString("base64")}`;
5185
+ }
5186
+ async request(method, path3, body) {
5187
+ const url = `${this.baseUrl}/Accounts/${encodeURIComponent(this.accountSid)}${path3}`;
5188
+ const headers = { Authorization: this.authHeader };
5189
+ if (body) headers["Content-Type"] = "application/x-www-form-urlencoded";
5190
+ const response = await fetch(url, {
5191
+ method,
5192
+ headers,
5193
+ body: body ? body.toString() : void 0,
5194
+ signal: AbortSignal.timeout(3e4)
5195
+ });
5196
+ const text = await response.text();
5197
+ if (!response.ok) {
5198
+ throw new Error(`Twilio ${method} ${path3} failed: ${response.status} ${text}`);
5199
+ }
5200
+ if (!text) return {};
5201
+ try {
5202
+ return JSON.parse(text);
5203
+ } catch (e) {
5204
+ throw new Error(`Twilio returned non-JSON response: ${String(e)}`);
5205
+ }
5206
+ }
5207
+ /**
5208
+ * Provision a local phone number in the given country.
5209
+ *
5210
+ * Lists available local numbers, then purchases the first match.
5211
+ */
5212
+ async provisionNumber(opts) {
5213
+ const country = encodeURIComponent(opts.countryCode);
5214
+ const queryParts = ["PageSize=1"];
5215
+ if (opts.areaCode) queryParts.push(`AreaCode=${encodeURIComponent(opts.areaCode)}`);
5216
+ const path3 = `/AvailablePhoneNumbers/${country}/Local.json?${queryParts.join("&")}`;
5217
+ const available = await this.request("GET", path3);
5218
+ const first = available.available_phone_numbers?.[0]?.phone_number;
5219
+ if (!first) {
5220
+ throw new Error(`TwilioAdapter: no numbers available for country ${opts.countryCode}`);
5221
+ }
5222
+ const body = new URLSearchParams({ PhoneNumber: first });
5223
+ const purchased = await this.request(
5224
+ "POST",
5225
+ "/IncomingPhoneNumbers.json",
5226
+ body
5227
+ );
5228
+ if (!purchased.sid || !purchased.phone_number) {
5229
+ throw new Error("TwilioAdapter: malformed response from IncomingPhoneNumbers.create");
5230
+ }
5231
+ return { phoneNumber: purchased.phone_number, sid: purchased.sid };
5232
+ }
5233
+ /** Update an already-purchased number to point at our voice webhook. */
5234
+ async configureNumber(phoneNumberSid, opts) {
5235
+ if (!phoneNumberSid) throw new Error("TwilioAdapter: phoneNumberSid is required");
5236
+ const body = new URLSearchParams({
5237
+ VoiceUrl: opts.voiceUrl,
5238
+ VoiceMethod: "POST"
5239
+ });
5240
+ if (opts.statusCallback) body.set("StatusCallback", opts.statusCallback);
5241
+ await this.request(
5242
+ "POST",
5243
+ `/IncomingPhoneNumbers/${encodeURIComponent(phoneNumberSid)}.json`,
5244
+ body
5245
+ );
5246
+ }
5247
+ /** Place an outbound call. Returns the Twilio call SID. */
5248
+ async initiateCall(opts) {
5249
+ if (!opts.url && !opts.streamUrl) {
5250
+ throw new Error("TwilioAdapter: initiateCall requires either url or streamUrl");
5251
+ }
5252
+ const body = new URLSearchParams({
5253
+ From: opts.from,
5254
+ To: opts.to
5255
+ });
5256
+ if (opts.url) {
5257
+ body.set("Url", opts.url);
5258
+ } else if (opts.streamUrl) {
5259
+ body.set("Twiml", _TwilioAdapter.generateStreamTwiml(opts.streamUrl));
5260
+ }
5261
+ if (opts.statusCallback) body.set("StatusCallback", opts.statusCallback);
5262
+ if (opts.machineDetection) body.set("MachineDetection", opts.machineDetection);
5263
+ if (opts.extraParams) {
5264
+ for (const [key, value] of Object.entries(opts.extraParams)) {
5265
+ body.set(key, value);
5266
+ }
5267
+ }
5268
+ const call = await this.request("POST", "/Calls.json", body);
5269
+ if (!call.sid) {
5270
+ throw new Error("TwilioAdapter: Calls.create returned no SID");
5271
+ }
5272
+ return { callSid: call.sid };
5273
+ }
5274
+ /**
5275
+ * Build a minimal ``<Response><Connect><Stream url="..."/></Connect></Response>``
5276
+ * TwiML document. Mirrors the Python adapter's ``generate_stream_twiml``.
5277
+ */
5278
+ static generateStreamTwiml(streamUrl) {
5279
+ const escaped = streamUrl.replace(/&/g, "&amp;").replace(/</g, "&lt;").replace(/>/g, "&gt;").replace(/"/g, "&quot;").replace(/'/g, "&apos;");
5280
+ return `<?xml version="1.0" encoding="UTF-8"?><Response><Connect><Stream url="${escaped}"/></Connect></Response>`;
5281
+ }
5282
+ /** Force-complete an in-progress call. */
5283
+ async endCall(callSid) {
5284
+ if (!callSid) throw new Error("TwilioAdapter: callSid is required");
5285
+ const body = new URLSearchParams({ Status: "completed" });
5286
+ try {
5287
+ await this.request(
5288
+ "POST",
5289
+ `/Calls/${encodeURIComponent(callSid)}.json`,
5290
+ body
5291
+ );
5292
+ } catch (err) {
5293
+ getLogger().warn(`[TwilioAdapter] endCall failed for ${callSid}: ${String(err)}`);
5294
+ throw err;
5295
+ }
5296
+ }
5297
+ };
5298
+
5299
+ // src/providers/telnyx-adapter.ts
5300
+ import { randomUUID as randomUUID2 } from "crypto";
5301
+ var TELNYX_API_BASE = "https://api.telnyx.com/v2";
5302
+ var TelnyxAdapter = class {
5303
+ apiKey;
5304
+ connectionId;
5305
+ baseUrl = TELNYX_API_BASE;
5306
+ constructor(apiKey, connectionId) {
5307
+ if (!apiKey) throw new Error("TelnyxAdapter: apiKey is required");
5308
+ this.apiKey = apiKey;
5309
+ this.connectionId = connectionId;
5310
+ }
5311
+ async request(method, path3, body) {
5312
+ const url = `${this.baseUrl}${path3}`;
5313
+ const headers = {
5314
+ Authorization: `Bearer ${this.apiKey}`
5315
+ };
5316
+ if (body !== void 0) headers["Content-Type"] = "application/json";
5317
+ const response = await fetch(url, {
5318
+ method,
5319
+ headers,
5320
+ body: body !== void 0 ? JSON.stringify(body) : void 0,
5321
+ signal: AbortSignal.timeout(3e4)
5322
+ });
5323
+ const text = await response.text();
5324
+ if (!response.ok) {
5325
+ throw new Error(`Telnyx ${method} ${path3} failed: ${response.status} ${text}`);
5326
+ }
5327
+ if (!text) return {};
5328
+ try {
5329
+ return JSON.parse(text);
5330
+ } catch (e) {
5331
+ throw new Error(`Telnyx returned non-JSON response: ${String(e)}`);
5332
+ }
5333
+ }
5334
+ /**
5335
+ * Search available numbers for ``countryCode`` and place an order for the
5336
+ * first match. Returns both the reserved E.164 number and the order ID.
5337
+ */
5338
+ async provisionNumber(opts) {
5339
+ const country = encodeURIComponent(opts.countryCode);
5340
+ const searchPath = `/available_phone_numbers?filter[phone_number][country_code]=${country}&filter[limit]=1`;
5341
+ const available = await this.request("GET", searchPath);
5342
+ const chosen = available.data?.[0]?.phone_number;
5343
+ if (!chosen) {
5344
+ throw new Error(`TelnyxAdapter: no numbers available for ${opts.countryCode}`);
5345
+ }
5346
+ const orderBody = {
5347
+ phone_numbers: [{ phone_number: chosen }]
5348
+ };
5349
+ if (this.connectionId) {
5350
+ orderBody.connection_id = this.connectionId;
5351
+ }
5352
+ const order = await this.request(
5353
+ "POST",
5354
+ "/number_orders",
5355
+ orderBody
5356
+ );
5357
+ const orderId = order.data?.id ?? "";
5358
+ return { phoneNumber: chosen, orderId };
5359
+ }
5360
+ /** Attach a number to a Call Control Application. */
5361
+ async configureNumber(phoneNumber, opts) {
5362
+ if (!phoneNumber) throw new Error("TelnyxAdapter: phoneNumber is required");
5363
+ if (!opts.connectionId) throw new Error("TelnyxAdapter: connectionId is required");
5364
+ await this.request(
5365
+ "PATCH",
5366
+ `/phone_numbers/${encodeURIComponent(phoneNumber)}/voice`,
5367
+ { connection_id: opts.connectionId, tech_prefix_enabled: false }
5368
+ );
5369
+ }
5370
+ /**
5371
+ * Place an outbound call on the Call Control Application.
5372
+ *
5373
+ * Note: we intentionally do NOT pass ``stream_url`` here — audio streaming
5374
+ * is configured on the Application itself (or started explicitly via a
5375
+ * ``streaming_start`` command). Passing ``stream_url`` on dial is a
5376
+ * deprecated code path that Telnyx rejects in newer API versions.
5377
+ */
5378
+ async initiateCall(opts) {
5379
+ const connectionId = opts.connectionId ?? this.connectionId;
5380
+ if (!connectionId) {
5381
+ throw new Error("TelnyxAdapter: connectionId must be provided to initiateCall");
5382
+ }
5383
+ const payload = {
5384
+ connection_id: connectionId,
5385
+ from: opts.from,
5386
+ to: opts.to
5387
+ };
5388
+ if (opts.clientState) {
5389
+ payload.client_state = Buffer.from(opts.clientState, "utf-8").toString("base64");
5390
+ }
5391
+ const resp = await this.request("POST", "/calls", payload);
5392
+ const callControlId = resp.data?.call_control_id;
5393
+ if (!callControlId) {
5394
+ throw new Error("TelnyxAdapter: /calls returned no call_control_id");
5395
+ }
5396
+ return { callControlId };
5397
+ }
5398
+ /** Hang up an in-progress call. */
5399
+ async endCall(callControlId, opts = {}) {
5400
+ if (!callControlId) throw new Error("TelnyxAdapter: callControlId is required");
5401
+ const encoded = encodeURIComponent(callControlId);
5402
+ const body = {
5403
+ command_id: opts.commandId ?? randomUUID2()
5404
+ };
5405
+ try {
5406
+ await this.request(
5407
+ "POST",
5408
+ `/calls/${encoded}/actions/hangup`,
5409
+ body
5410
+ );
5411
+ } catch (err) {
5412
+ getLogger().warn(
5413
+ `[TelnyxAdapter] endCall failed for ${callControlId}: ${String(err)}`
5414
+ );
5415
+ throw err;
5416
+ }
5417
+ }
5418
+ };
4187
5419
  export {
4188
5420
  AllProvidersFailedError,
4189
5421
  LLM2 as AnthropicLLM,
4190
- STT5 as AssemblyAISTT,
5422
+ STT6 as AssemblyAISTT,
4191
5423
  AuthenticationError,
4192
5424
  BackgroundAudioPlayer,
4193
5425
  BuiltinAudioClip,
4194
5426
  CallMetricsAccumulator,
4195
- STT3 as CartesiaSTT,
5427
+ STT4 as CartesiaSTT,
4196
5428
  TTS3 as CartesiaTTS,
4197
5429
  LLM4 as CerebrasLLM,
4198
5430
  ChatContext,
@@ -4201,9 +5433,11 @@ export {
4201
5433
  DEFAULT_PRICING,
4202
5434
  DTMF_EVENTS,
4203
5435
  STT as DeepgramSTT,
5436
+ DefaultToolExecutor,
4204
5437
  ConvAI as ElevenLabsConvAI,
4205
5438
  ElevenLabsConvAIAdapter,
4206
5439
  TTS as ElevenLabsTTS,
5440
+ EventBus,
4207
5441
  FallbackLLMProvider,
4208
5442
  GEMINI_DEFAULT_INPUT_SR,
4209
5443
  GEMINI_DEFAULT_OUTPUT_SR,
@@ -4215,31 +5449,48 @@ export {
4215
5449
  LLMLoop,
4216
5450
  TTS5 as LMNTTTS,
4217
5451
  MetricsStore,
5452
+ Ngrok,
4218
5453
  LLM as OpenAILLM,
4219
5454
  OpenAILLMProvider,
4220
5455
  Realtime as OpenAIRealtime,
4221
5456
  OpenAIRealtimeAdapter,
4222
5457
  TTS2 as OpenAITTS,
5458
+ STT3 as OpenAITranscribeSTT,
4223
5459
  PartialStreamError,
4224
5460
  Patter,
4225
5461
  PatterConnectionError,
4226
5462
  PatterError,
5463
+ PatterTool,
5464
+ PcmCarry,
4227
5465
  PipelineHookExecutor,
4228
5466
  ProvisionError,
5467
+ RateLimitError,
4229
5468
  RemoteMessageHandler,
4230
5469
  TTS4 as RimeTTS,
5470
+ SPAN_BARGEIN,
5471
+ SPAN_CALL,
5472
+ SPAN_ENDPOINT,
5473
+ SPAN_LLM,
5474
+ SPAN_STT,
5475
+ SPAN_TOOL,
5476
+ SPAN_TTS,
4231
5477
  SentenceChunker,
4232
- STT4 as SonioxSTT,
5478
+ SileroVAD,
5479
+ STT5 as SonioxSTT,
5480
+ StatefulResampler,
4233
5481
  Static as StaticTunnel,
4234
5482
  Carrier2 as Telnyx,
5483
+ TelnyxAdapter,
4235
5484
  TestSession,
4236
5485
  TfidfLoopDetector,
4237
5486
  Tool,
4238
5487
  Carrier as Twilio,
5488
+ TwilioAdapter,
4239
5489
  ULTRAVOX_DEFAULT_API_BASE,
4240
5490
  ULTRAVOX_DEFAULT_SR,
4241
5491
  UltravoxRealtimeAdapter,
4242
5492
  STT2 as WhisperSTT,
5493
+ assemblyai,
4243
5494
  builtinClipPath,
4244
5495
  calculateRealtimeCost,
4245
5496
  calculateSttCost,
@@ -4247,6 +5498,10 @@ export {
4247
5498
  calculateTtsCost,
4248
5499
  callsToCsv,
4249
5500
  callsToJson,
5501
+ cartesia,
5502
+ createResampler16kTo8k,
5503
+ createResampler24kTo16k,
5504
+ createResampler8kTo16k,
4250
5505
  deepgram,
4251
5506
  defineTool,
4252
5507
  elevenlabs,
@@ -4254,10 +5509,14 @@ export {
4254
5509
  filterForTTS,
4255
5510
  filterMarkdown,
4256
5511
  formatDtmf,
5512
+ geminiLive,
4257
5513
  getLogger,
4258
5514
  guardrail,
5515
+ initTracing,
4259
5516
  isRemoteUrl,
5517
+ isTracingEnabled,
4260
5518
  isWebSocketUrl,
5519
+ lmnt,
4261
5520
  makeAuthMiddleware,
4262
5521
  mergePricing,
4263
5522
  mixPcm,
@@ -4271,12 +5530,17 @@ export {
4271
5530
  resample24kTo16k,
4272
5531
  resample8kTo16k,
4273
5532
  resamplePcm,
5533
+ rime,
4274
5534
  scheduleCron,
4275
5535
  scheduleInterval,
4276
5536
  scheduleOnce,
4277
5537
  selectSoundFromList,
4278
5538
  setLogger,
5539
+ soniox,
5540
+ speechmatics,
5541
+ startSpan,
4279
5542
  startTunnel,
4280
5543
  tool,
5544
+ ultravox,
4281
5545
  whisper
4282
5546
  };