getpatter 0.6.0 → 0.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs CHANGED
@@ -15,6 +15,7 @@ import {
15
15
  LLMLoop,
16
16
  MetricsStore,
17
17
  OpenAILLMProvider,
18
+ OpenAIRealtime2Adapter,
18
19
  OpenAIRealtimeAdapter,
19
20
  PatterConnectionError,
20
21
  PatterError,
@@ -59,7 +60,12 @@ import {
59
60
  resample8kTo16k,
60
61
  resolveLogRoot,
61
62
  startSpan
62
- } from "./chunk-JUQ5WQTQ.mjs";
63
+ } from "./chunk-TEW3NAZJ.mjs";
64
+ import {
65
+ MinWordsStrategy,
66
+ evaluateStrategies,
67
+ resetStrategies
68
+ } from "./chunk-D4424JZR.mjs";
63
69
  import {
64
70
  getLogger,
65
71
  setLogger
@@ -69,7 +75,7 @@ import {
69
75
  } from "./chunk-6GR5MHHQ.mjs";
70
76
  import {
71
77
  SileroVAD
72
- } from "./chunk-X3364LSI.mjs";
78
+ } from "./chunk-RV7APPYE.mjs";
73
79
  import {
74
80
  __dirname,
75
81
  __require,
@@ -106,6 +112,30 @@ var Realtime = class {
106
112
  }
107
113
  };
108
114
 
115
+ // src/engines/openai-2.ts
116
+ init_esm_shims();
117
+ var Realtime2 = class {
118
+ kind = "openai_realtime_2";
119
+ apiKey;
120
+ model;
121
+ voice;
122
+ reasoningEffort;
123
+ inputAudioTranscriptionModel;
124
+ constructor(opts = {}) {
125
+ const key = opts.apiKey ?? process.env.OPENAI_API_KEY;
126
+ if (!key) {
127
+ throw new Error(
128
+ "OpenAI Realtime 2 requires an apiKey. Pass { apiKey: 'sk-...' } or set OPENAI_API_KEY in the environment."
129
+ );
130
+ }
131
+ this.apiKey = key;
132
+ this.model = opts.model ?? "gpt-realtime-2";
133
+ this.voice = opts.voice ?? "alloy";
134
+ this.reasoningEffort = opts.reasoningEffort;
135
+ this.inputAudioTranscriptionModel = opts.inputAudioTranscriptionModel;
136
+ }
137
+ };
138
+
109
139
  // src/engines/elevenlabs.ts
110
140
  init_esm_shims();
111
141
  var ConvAI = class {
@@ -520,12 +550,35 @@ function filterUndef(obj) {
520
550
  }
521
551
 
522
552
  // src/client.ts
553
+ var PREWARM_CACHE_MAX = 200;
554
+ var PREWARM_TTL_GRACE_MS = 5e3;
555
+ var PARKED_CONN_TTL_MS = 3e4;
523
556
  function resolvePersistRoot(persist) {
524
557
  if (persist === false) return null;
525
558
  if (persist === true) return resolveLogRoot("auto");
526
559
  if (typeof persist === "string") return resolveLogRoot(persist);
527
560
  return resolveLogRoot();
528
561
  }
562
+ function closeParkedConnections(slot) {
563
+ if (slot.stt) {
564
+ try {
565
+ slot.stt.close();
566
+ } catch {
567
+ }
568
+ }
569
+ if (slot.tts) {
570
+ try {
571
+ slot.tts.ws.close();
572
+ } catch {
573
+ }
574
+ }
575
+ if (slot.openaiRealtime) {
576
+ try {
577
+ slot.openaiRealtime.close();
578
+ } catch {
579
+ }
580
+ }
581
+ }
529
582
  var Patter = class {
530
583
  localConfig;
531
584
  embeddedServer = null;
@@ -546,6 +599,65 @@ var Patter = class {
546
599
  * ``Cannot use both tunnel: true and webhookUrl``.
547
600
  */
548
601
  tunnelOwnsWebhookUrl = false;
602
+ /**
603
+ * Pre-rendered first-message TTS audio per outbound call_id. Populated
604
+ * by :meth:`call` when ``agent.prewarmFirstMessage`` is true; consumed
605
+ * by the StreamHandler firstMessage emit so the greeting streams
606
+ * instantly on ``start`` instead of paying the 200-700 ms TTS first-byte
607
+ * latency. See ``AgentOptions.prewarmFirstMessage``.
608
+ *
609
+ * Stores raw bytes in the TTS provider's native sample rate; the
610
+ * carrier-side audio sender resamples on emit.
611
+ */
612
+ prewarmAudio = /* @__PURE__ */ new Map();
613
+ /**
614
+ * Call IDs whose prewarm cache slot has already been consumed —
615
+ * either by ``popPrewarmAudio`` (cache hit OR miss on the firstMessage
616
+ * emit path) or by ``recordPrewarmWaste`` (call ended before pickup).
617
+ * The prewarm task checks this set BEFORE writing bytes so a slow
618
+ * synth that finishes after the consumer already polled doesn't
619
+ * orphan bytes in ``prewarmAudio``. See FIX #92 in the parity audit.
620
+ */
621
+ prewarmConsumed = /* @__PURE__ */ new Set();
622
+ /**
623
+ * Background tasks tracked so :meth:`disconnect` can wait on / drop any
624
+ * still-running prewarm-first-message synth before tearing down.
625
+ */
626
+ prewarmTasks = /* @__PURE__ */ new Set();
627
+ /**
628
+ * TTL eviction timers keyed by call_id so :meth:`disconnect` (and
629
+ * normal consumption / waste-record paths) can cancel any pending
630
+ * timer when the slot drains naturally. Without this, the timer
631
+ * would WARN spuriously after the cache was already emptied.
632
+ */
633
+ prewarmTtlTimers = /* @__PURE__ */ new Map();
634
+ /**
635
+ * Pre-opened, fully-handshaked provider WebSockets keyed by
636
+ * carrier-issued call_id. Populated by ``parkProviderConnections``
637
+ * during the carrier ringing window; consumed by the per-call
638
+ * StreamHandler at ``start`` via ``adoptWebSocket(...)`` so STT / TTS
639
+ * / Realtime audio can flow on the first turn without paying the
640
+ * 150-900 ms TLS + WS-upgrade + protocol-handshake round-trip again.
641
+ *
642
+ * Distinct from ``prewarmAudio`` (which holds pre-rendered TTS bytes
643
+ * for the first message); the two features are complementary and
644
+ * orthogonal — both can be active for the same call.
645
+ *
646
+ * Each slot may hold up to three parked connections (STT, TTS,
647
+ * Realtime). Drained by:
648
+ * - {@link popPrewarmedConnections} on the carrier ``start`` event
649
+ * (consumed normally — the handles transfer to the StreamHandler)
650
+ * - {@link recordPrewarmWaste} on call-termination paths (no-answer,
651
+ * busy, failed, canceled, AMD voicemail). Closes parked sockets.
652
+ * - {@link disconnect} on Patter teardown. Closes all parked sockets.
653
+ */
654
+ prewarmedConnections = /* @__PURE__ */ new Map();
655
+ /**
656
+ * TTL eviction handles keyed by call_id for connections that are never
657
+ * adopted (e.g. a carrier that swallows ``start``). Closes the parked
658
+ * sockets so they don't leak past the safety window.
659
+ */
660
+ prewarmedConnTimers = /* @__PURE__ */ new Map();
549
661
  /**
550
662
  * Speech-edge events for turn-taking instrumentation. Public surface: the
551
663
  * seven `on*` proxy accessors below plus the `conversationState` snapshot.
@@ -553,13 +665,15 @@ var Patter = class {
553
665
  * the previous behaviour.
554
666
  *
555
667
  * See `src/_speech-events.ts` for the full event taxonomy and the
556
- * industry-alignment table (LiveKit / Pipecat / OpenAI Realtime).
668
+ * OpenAI Realtime alignment table.
557
669
  */
558
670
  speechEvents = new SpeechEvents();
559
671
  // ---- Speech-edge event callback proxies ------------------------------
560
- // The seven `on*` properties below mirror the public APIs of LiveKit
561
- // Agents, Pipecat and OpenAI Realtime. They proxy to `speechEvents` so
562
- // the dispatcher remains the single source of truth (state + OTel).
672
+ // The seven `on*` properties below follow the canonical voice-agent
673
+ // metric set (user/agent state transitions, turn boundaries, TTFT, audio
674
+ // first-byte) and align with OpenAI Realtime where applicable. They
675
+ // proxy to `speechEvents` so the dispatcher remains the single source of
676
+ // truth (state + OTel).
563
677
  get onUserSpeechStarted() {
564
678
  return this.speechEvents.onUserSpeechStarted;
565
679
  }
@@ -604,8 +718,8 @@ var Patter = class {
604
718
  }
605
719
  /**
606
720
  * Snapshot of the current per-side state of the call.
607
- * Mirrors LiveKit's `user_state_changed` / `agent_state_changed`
608
- * payloads. Read-only and safe to call at any time.
721
+ * Returns the user_state / agent_state payload shape — read-only and
722
+ * safe to call at any time.
609
723
  */
610
724
  get conversationState() {
611
725
  return this.speechEvents.conversationState;
@@ -717,7 +831,7 @@ var Patter = class {
717
831
  );
718
832
  }
719
833
  const engine = opts.engine;
720
- if (engine instanceof Realtime) {
834
+ if (engine instanceof Realtime || engine instanceof Realtime2) {
721
835
  working = {
722
836
  ...working,
723
837
  provider: "openai_realtime",
@@ -735,7 +849,7 @@ var Patter = class {
735
849
  };
736
850
  } else {
737
851
  throw new Error(
738
- "Unknown engine. Expected OpenAIRealtime or ElevenLabsConvAI instance."
852
+ "Unknown engine. Expected OpenAIRealtime, OpenAIRealtime2, or ElevenLabsConvAI instance."
739
853
  );
740
854
  }
741
855
  } else if (!working.provider && (working.stt !== void 0 || working.tts !== void 0 || working.llm !== void 0)) {
@@ -795,6 +909,13 @@ var Patter = class {
795
909
  if (!opts.agent.systemPrompt && opts.agent.provider !== "pipeline") {
796
910
  throw new Error("agent.systemPrompt is required");
797
911
  }
912
+ if (opts.agent.echoCancellation) {
913
+ try {
914
+ await import("./aec-PJJMUM5E.mjs");
915
+ } catch (err) {
916
+ getLogger().debug(`AEC pre-import failed at serve(): ${String(err)}`);
917
+ }
918
+ }
798
919
  if (opts.port !== void 0) {
799
920
  if (typeof opts.port !== "number" || opts.port < 1 || opts.port > 65535) {
800
921
  throw new RangeError(`port must be between 1 and 65535, got ${opts.port}`);
@@ -876,6 +997,9 @@ var Patter = class {
876
997
  opts.dashboard ?? true,
877
998
  opts.dashboardToken ?? ""
878
999
  );
1000
+ this.embeddedServer.popPrewarmAudio = this.popPrewarmAudio;
1001
+ this.embeddedServer.popPrewarmedConnections = this.popPrewarmedConnections;
1002
+ this.embeddedServer.recordPrewarmWaste = this.recordPrewarmWaste;
879
1003
  try {
880
1004
  await this.embeddedServer.start(port);
881
1005
  if (this.tunnelHandle) {
@@ -890,7 +1014,7 @@ var Patter = class {
890
1014
  }
891
1015
  /** Run the agent in interactive terminal-test mode (no real telephony). */
892
1016
  async test(opts) {
893
- const { TestSession: TestSession2 } = await import("./test-mode-Y7YG5LFZ.mjs");
1017
+ const { TestSession: TestSession2 } = await import("./test-mode-WEKKNBLD.mjs");
894
1018
  const session = new TestSession2();
895
1019
  await session.run({
896
1020
  agent: opts.agent,
@@ -900,6 +1024,325 @@ var Patter = class {
900
1024
  onCallEnd: opts.onCallEnd
901
1025
  });
902
1026
  }
1027
+ /**
1028
+ * Pop and return the pre-synthesised first-message audio for ``callId``.
1029
+ *
1030
+ * Returns ``undefined`` when ``agent.prewarmFirstMessage`` was not set
1031
+ * for the originating outbound call, or when the synth was still in
1032
+ * flight at the moment the carrier emitted ``start`` (cache miss — the
1033
+ * StreamHandler falls back to live TTS).
1034
+ *
1035
+ * Called by the per-call StreamHandler at the start of the firstMessage
1036
+ * emit. Returning bytes here lets the handler skip the live TTS
1037
+ * synthesis and stream the cached buffer directly.
1038
+ *
1039
+ * Marks ``callId`` as consumed regardless of cache hit/miss so a slow
1040
+ * synth task that finishes after this call drops its bytes instead of
1041
+ * orphaning them in ``prewarmAudio``. See FIX #92.
1042
+ */
1043
+ popPrewarmAudio = (callId) => {
1044
+ this.prewarmConsumed.add(callId);
1045
+ const ttl = this.prewarmTtlTimers.get(callId);
1046
+ if (ttl !== void 0) {
1047
+ clearTimeout(ttl);
1048
+ this.prewarmTtlTimers.delete(callId);
1049
+ }
1050
+ const buf = this.prewarmAudio.get(callId);
1051
+ if (buf !== void 0) this.prewarmAudio.delete(callId);
1052
+ return buf;
1053
+ };
1054
+ /**
1055
+ * Log a warning if a prewarmed greeting was paid for but never used.
1056
+ * The TTS bill for ``agent.firstMessage`` has already been incurred by
1057
+ * the background synth task, so the user should know — opt-in feature
1058
+ * with a known cost surface.
1059
+ *
1060
+ * Idempotent: the second call for the same ``callId`` is a no-op, so
1061
+ * the status callback firing first and ``endCall`` running afterwards
1062
+ * (or vice-versa) does not double-WARN. Public so the embedded
1063
+ * server's webhook handlers can invoke it on no-answer / busy /
1064
+ * failed / canceled / AMD-machine paths. See FIX #91.
1065
+ */
1066
+ recordPrewarmWaste = (callId) => {
1067
+ this.closePrewarmedConnections(callId);
1068
+ if (this.prewarmConsumed.has(callId)) {
1069
+ this.prewarmAudio.delete(callId);
1070
+ return;
1071
+ }
1072
+ this.prewarmConsumed.add(callId);
1073
+ const ttl = this.prewarmTtlTimers.get(callId);
1074
+ if (ttl !== void 0) {
1075
+ clearTimeout(ttl);
1076
+ this.prewarmTtlTimers.delete(callId);
1077
+ }
1078
+ const buf = this.prewarmAudio.get(callId);
1079
+ if (buf !== void 0) {
1080
+ this.prewarmAudio.delete(callId);
1081
+ getLogger().warn(
1082
+ `Prewarm wasted for call ${callId} \u2014 first-message TTS already paid (~${buf.byteLength} bytes synthesised) but call ended before pickup.`
1083
+ );
1084
+ }
1085
+ };
1086
+ /**
1087
+ * Pop and return the parked provider WebSockets for ``callId``, or
1088
+ * ``undefined`` when no parked connections exist.
1089
+ *
1090
+ * Wired into ``EmbeddedServer.popPrewarmedConnections`` so the
1091
+ * per-call ``StreamHandler`` can adopt the parked sockets at the
1092
+ * carrier ``start`` event instead of opening fresh ones — saving
1093
+ * ~150-900 ms of cold-start handshake on the first turn.
1094
+ */
1095
+ popPrewarmedConnections = (callId) => {
1096
+ const slot = this.prewarmedConnections.get(callId);
1097
+ if (slot === void 0) return void 0;
1098
+ this.prewarmedConnections.delete(callId);
1099
+ const ttl = this.prewarmedConnTimers.get(callId);
1100
+ if (ttl !== void 0) {
1101
+ clearTimeout(ttl);
1102
+ this.prewarmedConnTimers.delete(callId);
1103
+ }
1104
+ return slot;
1105
+ };
1106
+ /**
1107
+ * Close any parked provider WebSockets for ``callId``. Wired into
1108
+ * ``EmbeddedServer.closePrewarmedConnections`` so call-termination
1109
+ * paths (no-answer, busy, failed, canceled, AMD voicemail) drop the
1110
+ * sockets cleanly instead of leaving them to the upstream timeout.
1111
+ */
1112
+ closePrewarmedConnections = (callId) => {
1113
+ const slot = this.prewarmedConnections.get(callId);
1114
+ if (slot === void 0) return;
1115
+ this.prewarmedConnections.delete(callId);
1116
+ const ttl = this.prewarmedConnTimers.get(callId);
1117
+ if (ttl !== void 0) {
1118
+ clearTimeout(ttl);
1119
+ this.prewarmedConnTimers.delete(callId);
1120
+ }
1121
+ closeParkedConnections(slot);
1122
+ };
1123
+ /**
1124
+ * Open and park provider WebSockets in parallel with the carrier-side
1125
+ * ``initiateCall``. Unlike :meth:`spawnProviderWarmup` (which closes
1126
+ * the WS after a brief idle), the sockets opened here stay OPEN and
1127
+ * are handed off to the per-call ``StreamHandler`` on ``start``.
1128
+ *
1129
+ * This is the structural fix for first-turn cold-start: on Node's
1130
+ * ``ws`` package, opening + closing a WS does NOT warm TLS for the
1131
+ * next open — every fresh ``new WebSocket()`` re-pays the full
1132
+ * TCP + TLS + HTTP-101 round-trip. By keeping the WS open and
1133
+ * adopting it directly, the live first turn skips the handshake
1134
+ * entirely (saves ~150-900 ms depending on provider).
1135
+ *
1136
+ * Best-effort: each provider's parking task is wrapped in
1137
+ * ``Promise.allSettled`` so a slow or failing endpoint cannot block
1138
+ * the others. Providers without ``openParkedConnection`` contribute
1139
+ * nothing — the call falls through to the cold ``connect()`` path
1140
+ * for that provider.
1141
+ */
1142
+ parkProviderConnections(agent, callId) {
1143
+ const stt = agent.stt;
1144
+ const tts = agent.tts;
1145
+ const sttOpen = typeof stt?.openParkedConnection === "function" ? stt.openParkedConnection.bind(stt) : null;
1146
+ const ttsOpen = typeof tts?.openParkedConnection === "function" ? tts.openParkedConnection.bind(tts) : null;
1147
+ if (!sttOpen && !ttsOpen) return;
1148
+ const slot = {};
1149
+ this.prewarmedConnections.set(callId, slot);
1150
+ const startedAt = Date.now();
1151
+ const tasks = [];
1152
+ if (sttOpen) {
1153
+ tasks.push((async () => {
1154
+ try {
1155
+ const ws = await sttOpen();
1156
+ if (this.prewarmedConnections.get(callId) !== slot) {
1157
+ try {
1158
+ ws.close();
1159
+ } catch {
1160
+ }
1161
+ return;
1162
+ }
1163
+ slot.stt = ws;
1164
+ getLogger().info(
1165
+ `[PREWARM] callId=${callId} provider=stt ms=${Date.now() - startedAt}`
1166
+ );
1167
+ } catch (err) {
1168
+ getLogger().debug(`Park STT failed for ${callId}: ${String(err)}`);
1169
+ }
1170
+ })());
1171
+ }
1172
+ if (ttsOpen) {
1173
+ tasks.push((async () => {
1174
+ try {
1175
+ const parked = await ttsOpen();
1176
+ if (this.prewarmedConnections.get(callId) !== slot) {
1177
+ try {
1178
+ parked.ws.close();
1179
+ } catch {
1180
+ }
1181
+ return;
1182
+ }
1183
+ slot.tts = parked;
1184
+ getLogger().info(
1185
+ `[PREWARM] callId=${callId} provider=tts ms=${Date.now() - startedAt}`
1186
+ );
1187
+ } catch (err) {
1188
+ getLogger().debug(`Park TTS failed for ${callId}: ${String(err)}`);
1189
+ }
1190
+ })());
1191
+ }
1192
+ const task = (async () => {
1193
+ await Promise.allSettled(tasks);
1194
+ })();
1195
+ this.prewarmTasks.add(task);
1196
+ void task.finally(() => {
1197
+ this.prewarmTasks.delete(task);
1198
+ if (!this.prewarmedConnections.has(callId)) return;
1199
+ const handle = setTimeout(() => {
1200
+ this.prewarmedConnTimers.delete(callId);
1201
+ const orphan = this.prewarmedConnections.get(callId);
1202
+ if (orphan === void 0) return;
1203
+ this.prewarmedConnections.delete(callId);
1204
+ closeParkedConnections(orphan);
1205
+ getLogger().warn(
1206
+ `[PREWARM] parked connections evicted by TTL for ${callId} \u2014 call never reached start (~${(PARKED_CONN_TTL_MS / 1e3).toFixed(0)}s).`
1207
+ );
1208
+ }, PARKED_CONN_TTL_MS);
1209
+ handle.unref?.();
1210
+ this.prewarmedConnTimers.set(callId, handle);
1211
+ });
1212
+ }
1213
+ /**
1214
+ * Spawn a fire-and-forget task that warms up STT / TTS / LLM in
1215
+ * parallel with the carrier-side ``initiateCall``.
1216
+ *
1217
+ * Best-effort: each provider's optional ``warmup()`` is wrapped in
1218
+ * ``Promise.allSettled`` so a slow or failing endpoint cannot block
1219
+ * the others. Providers without ``warmup`` contribute nothing.
1220
+ */
1221
+ spawnProviderWarmup(agent) {
1222
+ const targets = [];
1223
+ const collect = (provider, label) => {
1224
+ if (!provider || typeof provider !== "object") return;
1225
+ const fn = provider.warmup;
1226
+ if (typeof fn !== "function") return;
1227
+ targets.push({
1228
+ name: label,
1229
+ fn: fn.bind(provider)
1230
+ });
1231
+ };
1232
+ collect(agent.stt, "stt");
1233
+ collect(agent.tts, "tts");
1234
+ collect(agent.llm, "llm");
1235
+ if (targets.length === 0) return;
1236
+ const task = (async () => {
1237
+ const results = await Promise.allSettled(targets.map((t) => t.fn()));
1238
+ results.forEach((r, i) => {
1239
+ if (r.status === "rejected") {
1240
+ getLogger().debug(
1241
+ `Provider warmup failed (${targets[i].name}): ${String(r.reason)}`
1242
+ );
1243
+ }
1244
+ });
1245
+ })();
1246
+ this.prewarmTasks.add(task);
1247
+ void task.finally(() => this.prewarmTasks.delete(task));
1248
+ }
1249
+ /**
1250
+ * Pre-render ``agent.firstMessage`` to TTS bytes during the ringing
1251
+ * window and stash them in ``prewarmAudio.set(callId, buf)``.
1252
+ *
1253
+ * Skipped silently when ``agent.prewarmFirstMessage`` is false or
1254
+ * when ``agent.tts`` / ``agent.firstMessage`` is missing. The synth
1255
+ * is bounded by ``ringTimeout`` (default 25 s) so a never-answered
1256
+ * call doesn't tie up the TTS connection. On timeout / error the
1257
+ * cache is left empty and the StreamHandler falls back to live TTS.
1258
+ *
1259
+ * **Pipeline mode only.** Realtime / ConvAI provider modes never
1260
+ * consume the prewarm cache (the StreamHandler for those modes runs
1261
+ * its first-message emit through the provider's own audio path).
1262
+ * Spawning the prewarm in those modes pays the TTS bill for nothing
1263
+ * — refused with a warn.
1264
+ *
1265
+ * **Capped at ``PREWARM_CACHE_MAX`` concurrent entries.** Refused
1266
+ * with a warn when the cap is reached (the call still proceeds —
1267
+ * StreamHandler falls back to live TTS).
1268
+ */
1269
+ spawnPrewarmFirstMessage(agent, callId, ringTimeout) {
1270
+ if (!agent.prewarmFirstMessage) return;
1271
+ const providerMode = agent.provider ?? "openai_realtime";
1272
+ if (providerMode !== "pipeline") {
1273
+ getLogger().warn(
1274
+ `agent.prewarmFirstMessage=true is only supported in pipeline mode (provider=${providerMode}); skipping pre-synth to avoid wasted TTS spend.`
1275
+ );
1276
+ return;
1277
+ }
1278
+ const firstMessage = agent.firstMessage ?? "";
1279
+ const tts = agent.tts;
1280
+ if (!firstMessage || !tts) return;
1281
+ if (typeof tts.synthesizeStream !== "function") return;
1282
+ const inFlight = this.prewarmAudio.size + this.prewarmTasks.size;
1283
+ if (inFlight >= PREWARM_CACHE_MAX) {
1284
+ getLogger().warn(
1285
+ `Prewarm cache full (${inFlight}/${PREWARM_CACHE_MAX} in-flight) \u2014 skipping pre-synth for call ${callId}; falling back to live TTS at pickup.`
1286
+ );
1287
+ return;
1288
+ }
1289
+ const timeoutMs = (typeof ringTimeout === "number" ? ringTimeout : 25) * 1e3;
1290
+ const task = (async () => {
1291
+ try {
1292
+ const accumulate = async () => {
1293
+ const chunks = [];
1294
+ for await (const chunk of tts.synthesizeStream(firstMessage)) {
1295
+ const u = chunk;
1296
+ if (Buffer.isBuffer(u)) chunks.push(u);
1297
+ else if (ArrayBuffer.isView(u))
1298
+ chunks.push(Buffer.from(u.buffer, u.byteOffset, u.byteLength));
1299
+ }
1300
+ return Buffer.concat(chunks);
1301
+ };
1302
+ const timer = new Promise(
1303
+ (_resolve, reject) => setTimeout(
1304
+ () => reject(new Error("prewarm-first-message timeout")),
1305
+ timeoutMs
1306
+ ).unref?.()
1307
+ );
1308
+ const buf = await Promise.race([accumulate(), timer]);
1309
+ if (buf.byteLength > 0) {
1310
+ if (this.prewarmConsumed.has(callId)) {
1311
+ getLogger().warn(
1312
+ `Prewarm orphaned for call ${callId} \u2014 synth completed (~${buf.byteLength} bytes) AFTER consumer polled; bytes dropped, TTS bill already paid.`
1313
+ );
1314
+ return;
1315
+ }
1316
+ this.prewarmAudio.set(callId, buf);
1317
+ getLogger().debug(
1318
+ `Prewarm first-message ready for call ${callId} (${buf.byteLength} bytes)`
1319
+ );
1320
+ }
1321
+ } catch (err) {
1322
+ getLogger().debug(
1323
+ `Prewarm first-message failed for call ${callId}: ${String(err)}`
1324
+ );
1325
+ }
1326
+ })();
1327
+ this.prewarmTasks.add(task);
1328
+ void task.finally(() => {
1329
+ this.prewarmTasks.delete(task);
1330
+ if (!this.prewarmAudio.has(callId)) return;
1331
+ const ttlMs = timeoutMs + PREWARM_TTL_GRACE_MS;
1332
+ const handle = setTimeout(() => {
1333
+ this.prewarmTtlTimers.delete(callId);
1334
+ const orphan = this.prewarmAudio.get(callId);
1335
+ if (orphan === void 0) return;
1336
+ this.prewarmAudio.delete(callId);
1337
+ this.prewarmConsumed.add(callId);
1338
+ getLogger().warn(
1339
+ `Prewarm bytes evicted by TTL \u2014 call ${callId} never consumed them (~${orphan.byteLength} bytes synthesised, ${(ttlMs / 1e3).toFixed(1)}s after ringTimeout).`
1340
+ );
1341
+ }, ttlMs);
1342
+ handle.unref?.();
1343
+ this.prewarmTtlTimers.set(callId, handle);
1344
+ });
1345
+ }
903
1346
  /** Place an outbound call via the configured carrier. */
904
1347
  async call(options) {
905
1348
  if (!options.to) {
@@ -914,6 +1357,9 @@ var Patter = class {
914
1357
  if (this.embeddedServer) {
915
1358
  this.embeddedServer.onMachineDetection = options.onMachineDetection;
916
1359
  }
1360
+ if (options.agent.prewarm !== false) {
1361
+ this.spawnProviderWarmup(options.agent);
1362
+ }
917
1363
  if (carrier.kind === "telnyx") {
918
1364
  const telnyxKey = carrier.apiKey;
919
1365
  const connectionId = carrier.connectionId;
@@ -939,19 +1385,24 @@ var Patter = class {
939
1385
  if (!response2.ok) {
940
1386
  throw new ProvisionError(`Failed to initiate Telnyx call: ${await response2.text()}`);
941
1387
  }
942
- if (this.embeddedServer) {
943
- try {
944
- const body = await response2.clone().json();
945
- const callId = body.data?.call_control_id;
946
- if (callId) {
947
- this.embeddedServer.metricsStore.recordCallInitiated({
948
- call_id: callId,
949
- caller: phoneNumber,
950
- callee: options.to,
951
- direction: "outbound"
952
- });
953
- }
954
- } catch {
1388
+ let telnyxCallId;
1389
+ try {
1390
+ const body = await response2.clone().json();
1391
+ telnyxCallId = body.data?.call_control_id;
1392
+ } catch {
1393
+ }
1394
+ if (this.embeddedServer && telnyxCallId) {
1395
+ this.embeddedServer.metricsStore.recordCallInitiated({
1396
+ call_id: telnyxCallId,
1397
+ caller: phoneNumber,
1398
+ callee: options.to,
1399
+ direction: "outbound"
1400
+ });
1401
+ }
1402
+ if (telnyxCallId) {
1403
+ this.spawnPrewarmFirstMessage(options.agent, telnyxCallId, effectiveRingTimeout);
1404
+ if (options.agent.prewarm !== false) {
1405
+ this.parkProviderConnections(options.agent, telnyxCallId);
955
1406
  }
956
1407
  }
957
1408
  return;
@@ -994,25 +1445,31 @@ var Patter = class {
994
1445
  if (!response.ok) {
995
1446
  throw new ProvisionError(`Failed to initiate call: ${await response.text()}`);
996
1447
  }
997
- if (this.embeddedServer) {
998
- try {
999
- const body = await response.clone().json();
1000
- const callSid = body.sid;
1001
- if (callSid) {
1002
- this.embeddedServer.metricsStore.recordCallInitiated({
1003
- call_id: callSid,
1004
- caller: phoneNumber,
1005
- callee: options.to,
1006
- direction: "outbound"
1007
- });
1008
- const notificationsPath = body.subresource_uris?.notifications;
1009
- if (notificationsPath) {
1010
- getLogger().info(
1011
- `Outbound call ${callSid} placed. Twilio notifications: https://api.twilio.com${notificationsPath} (check here if the call drops with no audio).`
1012
- );
1013
- }
1014
- }
1015
- } catch {
1448
+ let twilioCallSid;
1449
+ let twilioNotificationsPath;
1450
+ try {
1451
+ const body = await response.clone().json();
1452
+ twilioCallSid = body.sid;
1453
+ twilioNotificationsPath = body.subresource_uris?.notifications;
1454
+ } catch {
1455
+ }
1456
+ if (this.embeddedServer && twilioCallSid) {
1457
+ this.embeddedServer.metricsStore.recordCallInitiated({
1458
+ call_id: twilioCallSid,
1459
+ caller: phoneNumber,
1460
+ callee: options.to,
1461
+ direction: "outbound"
1462
+ });
1463
+ if (twilioNotificationsPath) {
1464
+ getLogger().info(
1465
+ `Outbound call ${twilioCallSid} placed. Twilio notifications: https://api.twilio.com${twilioNotificationsPath} (check here if the call drops with no audio).`
1466
+ );
1467
+ }
1468
+ }
1469
+ if (twilioCallSid) {
1470
+ this.spawnPrewarmFirstMessage(options.agent, twilioCallSid, effectiveRingTimeout);
1471
+ if (options.agent.prewarm !== false) {
1472
+ this.parkProviderConnections(options.agent, twilioCallSid);
1016
1473
  }
1017
1474
  }
1018
1475
  }
@@ -1020,8 +1477,36 @@ var Patter = class {
1020
1477
  * Stop the embedded server and any running tunnel. Safe to call multiple
1021
1478
  * times. Leaves the instance reusable: a subsequent ``serve()`` works as
1022
1479
  * if the previous lifecycle never happened.
1480
+ *
1481
+ * Also clears any pending TTL eviction timers, awaits in-flight
1482
+ * prewarm-first-message synth tasks (best-effort, with a 1 s safety
1483
+ * timeout), and clears the prewarm cache. Without this a still-running
1484
+ * TTS WS keeps the user billed long after SDK teardown, and stale
1485
+ * entries leak across ``serve`` / ``disconnect`` cycles. See FIX #93.
1023
1486
  */
1024
1487
  async disconnect() {
1488
+ for (const handle of this.prewarmTtlTimers.values()) {
1489
+ clearTimeout(handle);
1490
+ }
1491
+ this.prewarmTtlTimers.clear();
1492
+ if (this.prewarmTasks.size > 0) {
1493
+ const drain = Promise.allSettled(Array.from(this.prewarmTasks));
1494
+ const timer = new Promise(
1495
+ (resolve) => setTimeout(resolve, 1e3).unref?.()
1496
+ );
1497
+ await Promise.race([drain, timer]);
1498
+ }
1499
+ this.prewarmTasks.clear();
1500
+ this.prewarmAudio.clear();
1501
+ this.prewarmConsumed.clear();
1502
+ for (const handle of this.prewarmedConnTimers.values()) {
1503
+ clearTimeout(handle);
1504
+ }
1505
+ this.prewarmedConnTimers.clear();
1506
+ for (const slot of this.prewarmedConnections.values()) {
1507
+ closeParkedConnections(slot);
1508
+ }
1509
+ this.prewarmedConnections.clear();
1025
1510
  if (this.tunnelHandle) {
1026
1511
  this.tunnelHandle.stop();
1027
1512
  this.tunnelHandle = null;
@@ -1072,6 +1557,7 @@ var Patter = class {
1072
1557
  if (!callSid) {
1073
1558
  throw new Error("callSid must be a non-empty string");
1074
1559
  }
1560
+ this.recordPrewarmWaste(callSid);
1075
1561
  const carrier = this.localConfig.carrier;
1076
1562
  if (carrier.kind === "twilio") {
1077
1563
  const auth = Buffer.from(`${carrier.accountSid}:${carrier.authToken}`).toString("base64");
@@ -1107,7 +1593,7 @@ var Patter = class {
1107
1593
  }
1108
1594
  };
1109
1595
  async function waitForTunnelPubliclyReachable(hostname, totalTimeoutMs = 6e4, graceMs = 5e3) {
1110
- const log = getLogger();
1596
+ const log2 = getLogger();
1111
1597
  const { Resolver } = await import("dns/promises");
1112
1598
  const resolver = new Resolver({ timeout: 1500, tries: 1 });
1113
1599
  resolver.setServers(["1.1.1.1", "8.8.8.8"]);
@@ -1119,7 +1605,7 @@ async function waitForTunnelPubliclyReachable(hostname, totalTimeoutMs = 6e4, gr
1119
1605
  try {
1120
1606
  const records = await resolver.resolve4(hostname);
1121
1607
  const first = records[0] ?? "<unknown>";
1122
- log.info(
1608
+ log2.info(
1123
1609
  "Tunnel DNS resolved \u2192 %s (attempt %d); waiting %d ms grace",
1124
1610
  first,
1125
1611
  attempt,
@@ -2333,6 +2819,8 @@ function wrapPcmInWav(pcm, sampleRate = 16e3, channels = 1, bitsPerSample = 16)
2333
2819
  return Buffer.concat([header, pcm]);
2334
2820
  }
2335
2821
  var WhisperSTT = class _WhisperSTT {
2822
+ /** Stable pricing/dashboard key — read by stream-handler/metrics. */
2823
+ static providerKey = "whisper";
2336
2824
  apiKey;
2337
2825
  model;
2338
2826
  language;
@@ -2501,6 +2989,8 @@ init_esm_shims();
2501
2989
  var ALLOWED_MODELS2 = /* @__PURE__ */ new Set(["gpt-4o-transcribe", "gpt-4o-mini-transcribe"]);
2502
2990
  var DEFAULT_BUFFER_SIZE2 = 16e3 * 2;
2503
2991
  var OpenAITranscribeSTT = class extends WhisperSTT {
2992
+ /** Stable pricing/dashboard key — read by stream-handler/metrics. */
2993
+ static providerKey = "openai_transcribe";
2504
2994
  /**
2505
2995
  * @param apiKey OpenAI API key.
2506
2996
  * @param language ISO-639-1 language code (e.g. ``"en"``, ``"it"``). Optional.
@@ -2576,6 +3066,8 @@ var CartesiaSTT = class {
2576
3066
  }
2577
3067
  apiKey;
2578
3068
  options;
3069
+ /** Stable pricing/dashboard key — read by stream-handler/metrics. */
3070
+ static providerKey = "cartesia_stt";
2579
3071
  ws = null;
2580
3072
  callbacks = /* @__PURE__ */ new Set();
2581
3073
  keepaliveTimer = null;
@@ -2584,6 +3076,37 @@ var CartesiaSTT = class {
2584
3076
  * `null` until the first transcript event arrives (matches Python's `None`).
2585
3077
  */
2586
3078
  requestId = null;
3079
+ /**
3080
+ * Open a fresh WebSocket without arming any message / keepalive handlers
3081
+ * and without taking ownership on `this.ws`. Returns the OPEN socket so
3082
+ * the caller (the prewarm pipeline) can park it for later adoption via
3083
+ * `adoptWebSocket`. Bounded by `CONNECT_TIMEOUT_MS`.
3084
+ *
3085
+ * Billing safety: opening + parking the WS does not stream audio
3086
+ * (Cartesia STT bills on streamed audio seconds), so no charge is
3087
+ * incurred. Close the returned WS yourself if it is never adopted.
3088
+ */
3089
+ async openParkedConnection() {
3090
+ const url = this.buildWsUrl();
3091
+ const ws = new WebSocket2(url, {
3092
+ headers: { "User-Agent": USER_AGENT }
3093
+ });
3094
+ await new Promise((resolve, reject) => {
3095
+ const timer = setTimeout(
3096
+ () => reject(new Error("Cartesia STT park connect timeout")),
3097
+ CONNECT_TIMEOUT_MS
3098
+ );
3099
+ ws.once("open", () => {
3100
+ clearTimeout(timer);
3101
+ resolve();
3102
+ });
3103
+ ws.once("error", (err) => {
3104
+ clearTimeout(timer);
3105
+ reject(err);
3106
+ });
3107
+ });
3108
+ return ws;
3109
+ }
2587
3110
  buildWsUrl() {
2588
3111
  const opts = this.options;
2589
3112
  const rawBase = opts.baseUrl ?? DEFAULT_BASE_URL;
@@ -2608,6 +3131,57 @@ var CartesiaSTT = class {
2608
3131
  });
2609
3132
  return `${base}/stt/websocket?${params.toString()}`;
2610
3133
  }
3134
+ /**
3135
+ * Pre-call WebSocket warmup for the Cartesia STT `/stt/websocket` endpoint.
3136
+ *
3137
+ * Opens the WS (DNS + TLS + auth handshake), idles ~250 ms so the
3138
+ * Cartesia edge keeps session state warm, then closes. By the time
3139
+ * `connect()` is invoked at call-pickup the resolver and TLS session
3140
+ * are hot — net wire time saving of 200-500 ms.
3141
+ *
3142
+ * Billing safety: Cartesia STT bills on streamed audio seconds (per
3143
+ * https://docs.cartesia.ai/2025-04-16/api-reference/stt/stt). Opening
3144
+ * + closing the WebSocket without forwarding audio does not consume
3145
+ * billable seconds. Best-effort: failures logged at debug level.
3146
+ */
3147
+ async warmup() {
3148
+ const url = this.buildWsUrl();
3149
+ let ws = null;
3150
+ try {
3151
+ ws = await new Promise((resolve, reject) => {
3152
+ const sock = new WebSocket2(url, {
3153
+ headers: { "User-Agent": USER_AGENT }
3154
+ });
3155
+ const timer = setTimeout(() => {
3156
+ try {
3157
+ sock.close();
3158
+ } catch {
3159
+ }
3160
+ reject(new Error("Cartesia STT warmup connect timeout"));
3161
+ }, 5e3);
3162
+ sock.once("open", () => {
3163
+ clearTimeout(timer);
3164
+ resolve(sock);
3165
+ });
3166
+ sock.once("error", (err) => {
3167
+ clearTimeout(timer);
3168
+ reject(err);
3169
+ });
3170
+ });
3171
+ await new Promise((r) => setTimeout(r, 250));
3172
+ } catch (err) {
3173
+ getLogger().debug(
3174
+ `Cartesia STT warmup failed (best-effort): ${describeWarmupError(err)}`
3175
+ );
3176
+ } finally {
3177
+ if (ws) {
3178
+ try {
3179
+ ws.close();
3180
+ } catch {
3181
+ }
3182
+ }
3183
+ }
3184
+ }
2611
3185
  /** Open the streaming WebSocket and arm message + keepalive handlers. */
2612
3186
  async connect() {
2613
3187
  const url = this.buildWsUrl();
@@ -2628,6 +3202,24 @@ var CartesiaSTT = class {
2628
3202
  reject(err);
2629
3203
  });
2630
3204
  });
3205
+ this.armMessageAndKeepalive();
3206
+ }
3207
+ /**
3208
+ * Adopt a pre-opened, already-OPEN WebSocket produced by the prewarm
3209
+ * pipeline (see `Patter.parkProviderConnections`). Skips the fresh
3210
+ * `new WebSocket()` + handshake — the WS is already through DNS, TLS
3211
+ * and HTTP-101 so audio frames can flow on this turn instead of
3212
+ * paying ~150-400 ms of handshake.
3213
+ *
3214
+ * Caller MUST verify `ws.readyState === OPEN` before calling. If the
3215
+ * parked WS died between park and adopt, fall back to `connect()`.
3216
+ */
3217
+ adoptWebSocket(ws) {
3218
+ this.ws = ws;
3219
+ this.armMessageAndKeepalive();
3220
+ }
3221
+ armMessageAndKeepalive() {
3222
+ if (!this.ws) return;
2631
3223
  this.ws.on("message", (raw) => {
2632
3224
  let event;
2633
3225
  try {
@@ -2675,6 +3267,31 @@ var CartesiaSTT = class {
2675
3267
  if (!this.ws || this.ws.readyState !== WebSocket2.OPEN) return;
2676
3268
  this.ws.send(audio);
2677
3269
  }
3270
+ /**
3271
+ * Force Cartesia to finalise the in-flight utterance immediately.
3272
+ *
3273
+ * Sends a ``finalize`` text frame on the live WebSocket. Cartesia
3274
+ * replies with the final transcript followed by ``flush_done``,
3275
+ * bypassing its conservative internal silence heuristic (which can
3276
+ * wait 2-7 s on PSTN audio before naturally finalising). Wired
3277
+ * into ``StreamHandler`` on the VAD ``speech_end`` event so the
3278
+ * SDK's authoritative end-of-speech detection forces an immediate
3279
+ * STT finalisation — turning Cartesia's natural-pause endpointing
3280
+ * into a deterministic VAD-driven one, parity with the Deepgram
3281
+ * fast-path. No-op when the WS isn't open. Parity with Python
3282
+ * ``CartesiaSTT.finalize``.
3283
+ */
3284
+ async finalize() {
3285
+ if (!this.ws || this.ws.readyState !== WebSocket2.OPEN) return;
3286
+ await new Promise((resolve) => {
3287
+ this.ws.send(CartesiaSTTClientFrame.FINALIZE, (err) => {
3288
+ if (err) {
3289
+ getLogger().debug(`Cartesia finalize send failed: ${String(err)}`);
3290
+ }
3291
+ resolve();
3292
+ });
3293
+ });
3294
+ }
2678
3295
  /** Register a transcript listener. */
2679
3296
  onTranscript(callback) {
2680
3297
  this.callbacks.add(callback);
@@ -2748,6 +3365,17 @@ var CartesiaSTT = class {
2748
3365
  }
2749
3366
  }
2750
3367
  };
3368
+ function describeWarmupError(err) {
3369
+ if (typeof err === "object" && err !== null) {
3370
+ const e = err;
3371
+ if (typeof e.statusCode === "number") return `HTTP ${e.statusCode}`;
3372
+ if (typeof e.code === "number" && e.code >= 100 && e.code < 600) return `HTTP ${e.code}`;
3373
+ const ctor = e.constructor?.name;
3374
+ if (typeof ctor === "string" && ctor !== "Object") return ctor;
3375
+ if (typeof e.name === "string") return e.name;
3376
+ }
3377
+ return typeof err;
3378
+ }
2751
3379
 
2752
3380
  // src/stt/cartesia.ts
2753
3381
  var STT4 = class extends CartesiaSTT {
@@ -2826,6 +3454,8 @@ var TokenAccumulator = class {
2826
3454
  }
2827
3455
  };
2828
3456
  var SonioxSTT = class _SonioxSTT {
3457
+ /** Stable pricing/dashboard key — read by stream-handler/metrics. */
3458
+ static providerKey = "soniox";
2829
3459
  ws = null;
2830
3460
  callbacks = [];
2831
3461
  final = new TokenAccumulator();
@@ -3103,6 +3733,8 @@ var AssemblyAISTT = class _AssemblyAISTT {
3103
3733
  }
3104
3734
  apiKey;
3105
3735
  options;
3736
+ /** Stable pricing/dashboard key — read by stream-handler/metrics. */
3737
+ static providerKey = "assemblyai";
3106
3738
  ws = null;
3107
3739
  callbacks = /* @__PURE__ */ new Set();
3108
3740
  closing = false;
@@ -3192,6 +3824,62 @@ var AssemblyAISTT = class _AssemblyAISTT {
3192
3824
  }
3193
3825
  return headers;
3194
3826
  }
3827
+ /**
3828
+ * Pre-call WebSocket warmup for the AssemblyAI v3 `/v3/ws` endpoint.
3829
+ *
3830
+ * Opens the WS (DNS + TLS + auth handshake), idles ~250 ms so the
3831
+ * AssemblyAI edge keeps the session state warm, then sends Terminate
3832
+ * and closes. By the time `connect()` is invoked at call-pickup the
3833
+ * resolver and TLS session are hot — net wire time saving of
3834
+ * 200-500 ms.
3835
+ *
3836
+ * Billing safety: AssemblyAI Universal Streaming bills on streamed
3837
+ * audio seconds (per https://www.assemblyai.com/pricing). Opening +
3838
+ * closing the WebSocket without forwarding any audio frames does
3839
+ * not consume billable seconds. Best-effort: failures logged at
3840
+ * debug level.
3841
+ */
3842
+ async warmup() {
3843
+ const url = this.buildUrl();
3844
+ const headers = this.buildHeaders();
3845
+ let ws = null;
3846
+ try {
3847
+ ws = await new Promise((resolve, reject) => {
3848
+ const sock = new WebSocket4(url, { headers });
3849
+ const timer = setTimeout(() => {
3850
+ try {
3851
+ sock.close();
3852
+ } catch {
3853
+ }
3854
+ reject(new Error("AssemblyAI STT warmup connect timeout"));
3855
+ }, 5e3);
3856
+ sock.once("open", () => {
3857
+ clearTimeout(timer);
3858
+ resolve(sock);
3859
+ });
3860
+ sock.once("error", (err) => {
3861
+ clearTimeout(timer);
3862
+ reject(err);
3863
+ });
3864
+ });
3865
+ await new Promise((r) => setTimeout(r, 250));
3866
+ try {
3867
+ ws.send(JSON.stringify({ type: AssemblyAIClientFrame.TERMINATE }));
3868
+ } catch {
3869
+ }
3870
+ } catch (err) {
3871
+ getLogger().debug(
3872
+ `AssemblyAI STT warmup failed (best-effort): ${describeWarmupError2(err)}`
3873
+ );
3874
+ } finally {
3875
+ if (ws) {
3876
+ try {
3877
+ ws.close();
3878
+ } catch {
3879
+ }
3880
+ }
3881
+ }
3882
+ }
3195
3883
  /** Open the streaming WebSocket and arm message handlers. */
3196
3884
  async connect() {
3197
3885
  this.closing = false;
@@ -3420,6 +4108,17 @@ function averageConfidence(words) {
3420
4108
  }
3421
4109
  return total / words.length;
3422
4110
  }
4111
+ function describeWarmupError2(err) {
4112
+ if (typeof err === "object" && err !== null) {
4113
+ const e = err;
4114
+ if (typeof e.statusCode === "number") return `HTTP ${e.statusCode}`;
4115
+ if (typeof e.code === "number" && e.code >= 100 && e.code < 600) return `HTTP ${e.code}`;
4116
+ const ctor = e.constructor?.name;
4117
+ if (typeof ctor === "string" && ctor !== "Object") return ctor;
4118
+ if (typeof e.name === "string") return e.name;
4119
+ }
4120
+ return typeof err;
4121
+ }
3423
4122
 
3424
4123
  // src/stt/assemblyai.ts
3425
4124
  var STT6 = class extends AssemblyAISTT {
@@ -3476,6 +4175,8 @@ var SpeechmaticsServerMessage = {
3476
4175
  ERROR: "Error"
3477
4176
  };
3478
4177
  var SpeechmaticsSTT = class {
4178
+ /** Stable pricing/dashboard key — read by stream-handler/metrics. */
4179
+ static providerKey = "speechmatics";
3479
4180
  ws = null;
3480
4181
  transcriptCallbacks = /* @__PURE__ */ new Set();
3481
4182
  errorCallbacks = /* @__PURE__ */ new Set();
@@ -3864,6 +4565,13 @@ var ElevenLabsOutputFormat = {
3864
4565
  ULAW_8000: "ulaw_8000"
3865
4566
  };
3866
4567
  var ElevenLabsTTS = class _ElevenLabsTTS {
4568
+ // Stable pricing/dashboard key — read by stream-handler / metrics via
4569
+ // ``(agent.tts.constructor as any).providerKey``. Without this the cost
4570
+ // calculator falls back to ``constructor.name`` ("ElevenLabsTTS") which
4571
+ // does NOT match the pricing table key "elevenlabs", silently zeroing
4572
+ // TTS cost for callers that construct the raw REST class directly
4573
+ // (exposed at top level as ``ElevenLabsRestTTS``).
4574
+ static providerKey = "elevenlabs";
3867
4575
  apiKey;
3868
4576
  voiceId;
3869
4577
  modelId;
@@ -4052,7 +4760,7 @@ var ElevenLabsPlanError = class extends ElevenLabsTTSError {
4052
4760
  this.name = "ElevenLabsPlanError";
4053
4761
  }
4054
4762
  };
4055
- var PLAN_REQUIRED_MSG = "ElevenLabs WS streaming requires a Pro plan or higher (the WS endpoint returned `payment_required`). Either upgrade at https://elevenlabs.io/pricing, or use the HTTP `ElevenLabsTTS` class which works on all plans (drop-in API).";
4763
+ var PLAN_REQUIRED_MSG = "ElevenLabs WS streaming requires a Pro plan or higher (the WS endpoint returned `payment_required`). Either upgrade at https://elevenlabs.io/pricing, or use `ElevenLabsRestTTS` for HTTP REST instead which works on all plans (drop-in API).";
4056
4764
  function sanitiseLogStr(value, limit = 200) {
4057
4765
  return String(value).replace(/[\r\n\x00]/g, " ").slice(0, limit);
4058
4766
  }
@@ -4071,6 +4779,19 @@ var ElevenLabsWebSocketTTS = class _ElevenLabsWebSocketTTS {
4071
4779
  inactivityTimeout;
4072
4780
  chunkLengthSchedule;
4073
4781
  chunkSize;
4782
+ /**
4783
+ * Single-slot adoption queue. The prewarm pipeline parks one WS per
4784
+ * outbound call here; the next `synthesizeStream` call consumes it
4785
+ * (skipping `new WebSocket()` and the BOS send) instead of opening
4786
+ * a fresh socket. The slot is consumed exactly once: if a second
4787
+ * `synthesizeStream` runs before the first, only the first benefits.
4788
+ *
4789
+ * We keep this on the adapter (not in a parameter) so the existing
4790
+ * `for await (const chunk of agent.tts.synthesizeStream(...))` call
4791
+ * site in `StreamHandler` continues to work without signature
4792
+ * changes.
4793
+ */
4794
+ adoptedConnection = null;
4074
4795
  /**
4075
4796
  * The wire format requested over the ElevenLabs WS. Initially set from
4076
4797
  * the constructor; ``setTelephonyCarrier`` may auto-flip it to the
@@ -4086,7 +4807,7 @@ var ElevenLabsWebSocketTTS = class _ElevenLabsWebSocketTTS {
4086
4807
  constructor(opts) {
4087
4808
  if (opts.modelId === "eleven_v3") {
4088
4809
  throw new Error(
4089
- "eleven_v3 is not supported by the WebSocket stream-input endpoint \u2014 use the HTTP ElevenLabsTTS class instead."
4810
+ "eleven_v3 is not supported by the WebSocket stream-input endpoint \u2014 use `ElevenLabsRestTTS` for HTTP REST instead."
4090
4811
  );
4091
4812
  }
4092
4813
  this.apiKey = opts.apiKey;
@@ -4148,6 +4869,24 @@ var ElevenLabsWebSocketTTS = class _ElevenLabsWebSocketTTS {
4148
4869
  if (this.languageCode) params.set("language_code", this.languageCode);
4149
4870
  return `${WS_BASE}/${encodeURIComponent(this.voiceId)}/stream-input?${params.toString()}`;
4150
4871
  }
4872
+ /**
4873
+ * Build the protocol-required BOS frame sent on every fresh WS.
4874
+ *
4875
+ * The single-space `{"text": " "}` keep-alive establishes the session
4876
+ * without committing any synthesis (no `flush: true`, no real text).
4877
+ * Production `synthesizeStream()` and `warmup()` share this exact
4878
+ * construction so the upstream worker chooses the same per-session
4879
+ * config in both cases — otherwise the warm session is on a different
4880
+ * worker than the live request, which defeats the warmup goal.
4881
+ */
4882
+ buildBosFrame() {
4883
+ const init = { text: " " };
4884
+ if (this.voiceSettings) init["voice_settings"] = this.voiceSettings;
4885
+ if (!this.autoMode && this.chunkLengthSchedule) {
4886
+ init["generation_config"] = { chunk_length_schedule: this.chunkLengthSchedule };
4887
+ }
4888
+ return init;
4889
+ }
4151
4890
  /**
4152
4891
  * Single-shot synthesis: open WS, send text, yield bytes, close.
4153
4892
  *
@@ -4166,9 +4905,26 @@ var ElevenLabsWebSocketTTS = class _ElevenLabsWebSocketTTS {
4166
4905
  * after flush — auto_mode could otherwise truncate the tail audio).
4167
4906
  */
4168
4907
  async *synthesizeStream(text) {
4169
- const ws = new WebSocket6(this.buildUrl(), {
4170
- headers: { "xi-api-key": this.apiKey }
4171
- });
4908
+ let ws;
4909
+ let bosAlreadySent = false;
4910
+ let adopted = false;
4911
+ const parked = this.adoptedConnection;
4912
+ this.adoptedConnection = null;
4913
+ if (parked && parked.ws.readyState === WebSocket6.OPEN) {
4914
+ ws = parked.ws;
4915
+ bosAlreadySent = parked.bosSent;
4916
+ adopted = true;
4917
+ } else {
4918
+ if (parked) {
4919
+ try {
4920
+ parked.ws.close();
4921
+ } catch {
4922
+ }
4923
+ }
4924
+ ws = new WebSocket6(this.buildUrl(), {
4925
+ headers: { "xi-api-key": this.apiKey }
4926
+ });
4927
+ }
4172
4928
  const queue = [];
4173
4929
  let done = false;
4174
4930
  let pendingError = null;
@@ -4238,28 +4994,27 @@ var ElevenLabsWebSocketTTS = class _ElevenLabsWebSocketTTS {
4238
4994
  };
4239
4995
  ws.on("error", onError);
4240
4996
  try {
4241
- await new Promise((resolve, reject) => {
4242
- connectTimer = setTimeout(
4243
- () => reject(new Error("ElevenLabs WS connect timeout")),
4244
- CONNECT_TIMEOUT_MS4
4245
- );
4246
- ws.once("open", () => {
4247
- if (connectTimer) clearTimeout(connectTimer);
4248
- connectTimer = void 0;
4249
- resolve();
4250
- });
4251
- ws.once("error", (err) => {
4252
- if (connectTimer) clearTimeout(connectTimer);
4253
- connectTimer = void 0;
4254
- reject(err);
4997
+ if (!adopted) {
4998
+ await new Promise((resolve, reject) => {
4999
+ connectTimer = setTimeout(
5000
+ () => reject(new Error("ElevenLabs WS connect timeout")),
5001
+ CONNECT_TIMEOUT_MS4
5002
+ );
5003
+ ws.once("open", () => {
5004
+ if (connectTimer) clearTimeout(connectTimer);
5005
+ connectTimer = void 0;
5006
+ resolve();
5007
+ });
5008
+ ws.once("error", (err) => {
5009
+ if (connectTimer) clearTimeout(connectTimer);
5010
+ connectTimer = void 0;
5011
+ reject(err);
5012
+ });
4255
5013
  });
4256
- });
4257
- const init = { text: " " };
4258
- if (this.voiceSettings) init["voice_settings"] = this.voiceSettings;
4259
- if (!this.autoMode && this.chunkLengthSchedule) {
4260
- init["generation_config"] = { chunk_length_schedule: this.chunkLengthSchedule };
4261
5014
  }
4262
- ws.send(JSON.stringify(init));
5015
+ if (!bosAlreadySent) {
5016
+ ws.send(JSON.stringify(this.buildBosFrame()));
5017
+ }
4263
5018
  ws.send(JSON.stringify({ text: text + " ", flush: true }));
4264
5019
  ws.on("message", onMessage);
4265
5020
  ws.on("close", onClose);
@@ -4305,8 +5060,141 @@ var ElevenLabsWebSocketTTS = class _ElevenLabsWebSocketTTS {
4305
5060
  ws.removeAllListeners();
4306
5061
  }
4307
5062
  }
5063
+ /**
5064
+ * Pre-call WebSocket warmup for the ElevenLabs `/stream-input` endpoint.
5065
+ *
5066
+ * Opens the WS (DNS + TLS + auth handshake), sends the EXACT same BOS
5067
+ * frame the production `synthesizeStream()` path sends — including
5068
+ * `voice_settings` and (when configured) `generation_config` — so
5069
+ * ElevenLabs instantiates the same per-session worker for both
5070
+ * warmup and the live request. If the BOS frames differ, the server
5071
+ * may route warmup and the real call to two different workers, and
5072
+ * the warmed worker is wasted. Idles ~250 ms, then closes. By the
5073
+ * time the first `synthesizeStream()` call lands during the call,
5074
+ * the connection pool has the upstream warm — net wire time saving
5075
+ * of 200-500 ms.
5076
+ *
5077
+ * Billing safety: ElevenLabs bills on synthesised characters
5078
+ * delivered via `audio` frames (per https://elevenlabs.io/pricing).
5079
+ * The keepalive (single-space `text`, no `flush: true`, no real
5080
+ * transcript) is documented as the session-establishment frame and
5081
+ * does NOT generate synthesis. Closing without sending the actual
5082
+ * transcript does not consume billable characters. Best-effort:
5083
+ * failures logged at debug level.
5084
+ */
5085
+ async warmup() {
5086
+ const ws = new WebSocket6(this.buildUrl(), {
5087
+ headers: { "xi-api-key": this.apiKey }
5088
+ });
5089
+ try {
5090
+ await new Promise((resolve, reject) => {
5091
+ const timer = setTimeout(
5092
+ () => reject(new Error("ElevenLabs WS TTS warmup connect timeout")),
5093
+ CONNECT_TIMEOUT_MS4
5094
+ );
5095
+ ws.once("open", () => {
5096
+ clearTimeout(timer);
5097
+ resolve();
5098
+ });
5099
+ ws.once("error", (err) => {
5100
+ clearTimeout(timer);
5101
+ reject(err);
5102
+ });
5103
+ });
5104
+ try {
5105
+ ws.send(JSON.stringify(this.buildBosFrame()));
5106
+ } catch {
5107
+ }
5108
+ await new Promise((r) => setTimeout(r, 250));
5109
+ } catch (err) {
5110
+ getLogger().debug(`ElevenLabs WS TTS warmup failed (best-effort): ${String(err)}`);
5111
+ } finally {
5112
+ try {
5113
+ if (ws.readyState === WebSocket6.OPEN || ws.readyState === WebSocket6.CONNECTING) {
5114
+ ws.close();
5115
+ }
5116
+ } catch {
5117
+ }
5118
+ ws.removeAllListeners();
5119
+ }
5120
+ }
5121
+ /**
5122
+ * Open a fresh WS, send the EXACT BOS frame the live `synthesizeStream`
5123
+ * sends, and return the OPEN socket without closing it. Used by the
5124
+ * prewarm pipeline to park a TTS connection during the carrier ringing
5125
+ * window so the next `synthesizeStream` call can adopt it via
5126
+ * {@link adoptWebSocket} and skip ~400-900 ms of TLS + BOS round-trip.
5127
+ *
5128
+ * Returns a parked-handle the caller stashes; the next
5129
+ * `synthesizeStream` will detect the adoption queue and skip its own
5130
+ * `new WebSocket()` + BOS send.
5131
+ *
5132
+ * Billing safety: BOS is the documented session-establishment frame
5133
+ * (single space `text`, no `flush: true`) and does not generate
5134
+ * synthesis. ElevenLabs bills on `audio` frames received from the
5135
+ * server, not on BOS bytes sent by the client.
5136
+ */
5137
+ async openParkedConnection() {
5138
+ const ws = new WebSocket6(this.buildUrl(), {
5139
+ headers: { "xi-api-key": this.apiKey }
5140
+ });
5141
+ await new Promise((resolve, reject) => {
5142
+ const timer = setTimeout(
5143
+ () => reject(new Error("ElevenLabs WS park connect timeout")),
5144
+ CONNECT_TIMEOUT_MS4
5145
+ );
5146
+ ws.once("open", () => {
5147
+ clearTimeout(timer);
5148
+ resolve();
5149
+ });
5150
+ ws.once("error", (err) => {
5151
+ clearTimeout(timer);
5152
+ reject(err);
5153
+ });
5154
+ });
5155
+ let bosSent = false;
5156
+ try {
5157
+ ws.send(JSON.stringify(this.buildBosFrame()));
5158
+ bosSent = true;
5159
+ } catch {
5160
+ }
5161
+ return { ws, bosSent };
5162
+ }
5163
+ /**
5164
+ * Stash a parked WS handle so the next `synthesizeStream` call adopts
5165
+ * it instead of opening a fresh socket. Caller is responsible for
5166
+ * holding the handle alive until either the live request consumes it
5167
+ * or the call ends (in which case `discardAdoptedConnection()`
5168
+ * cleans it up).
5169
+ */
5170
+ adoptWebSocket(parked) {
5171
+ const prev = this.adoptedConnection;
5172
+ this.adoptedConnection = parked;
5173
+ if (prev && prev !== parked) {
5174
+ try {
5175
+ prev.ws.close();
5176
+ } catch {
5177
+ }
5178
+ }
5179
+ }
5180
+ /**
5181
+ * Drop and close any pending parked WS without consuming it. Used on
5182
+ * call-failure paths so a never-started call does not leak a TTS WS
5183
+ * that ElevenLabs will close after its inactivity timeout anyway.
5184
+ */
5185
+ discardAdoptedConnection() {
5186
+ const parked = this.adoptedConnection;
5187
+ this.adoptedConnection = null;
5188
+ if (parked) {
5189
+ try {
5190
+ parked.ws.close();
5191
+ } catch {
5192
+ }
5193
+ }
5194
+ }
4308
5195
  /** No-op — connections are per-utterance and torn down inside synthesizeStream. */
4309
5196
  async close() {
5197
+ this.discardAdoptedConnection();
4310
5198
  }
4311
5199
  };
4312
5200
  function looksLikeJson(buf) {
@@ -4386,6 +5274,8 @@ var OpenAITTS = class _OpenAITTS {
4386
5274
  speed;
4387
5275
  antiAlias;
4388
5276
  targetSampleRate;
5277
+ /** Stable pricing/dashboard key — read by stream-handler/metrics. */
5278
+ static providerKey = "openai_tts";
4389
5279
  /**
4390
5280
  * Synthesise text to speech and return the full audio as a single Buffer.
4391
5281
  *
@@ -4611,6 +5501,8 @@ var CartesiaTTSVoiceMode = {
4611
5501
  EMBEDDING: "embedding"
4612
5502
  };
4613
5503
  var CartesiaTTS = class _CartesiaTTS {
5504
+ /** Stable pricing/dashboard key — read by stream-handler/metrics. */
5505
+ static providerKey = "cartesia_tts";
4614
5506
  apiKey;
4615
5507
  model;
4616
5508
  voice;
@@ -4685,6 +5577,38 @@ var CartesiaTTS = class _CartesiaTTS {
4685
5577
  }
4686
5578
  return payload;
4687
5579
  }
5580
+ /**
5581
+ * Pre-call HTTP warmup for the Cartesia `/tts/bytes` endpoint.
5582
+ *
5583
+ * Issues a lightweight `GET <baseUrl>/voices` so DNS, TLS, and HTTP/2
5584
+ * are already up by the time the first `synthesizeStream()` POST
5585
+ * lands. Best-effort: 5 s timeout, all exceptions swallowed at
5586
+ * debug level.
5587
+ *
5588
+ * Billing safety: `GET /voices` is a free metadata read on
5589
+ * Cartesia's REST surface (per https://docs.cartesia.ai). It does
5590
+ * not consume synthesis credits. The actual synthesis is billed
5591
+ * only when `POST /tts/bytes` runs with a non-empty `transcript`.
5592
+ *
5593
+ * Note: Cartesia TTS uses the HTTP path (vs the WebSocket variant
5594
+ * Cartesia also exposes) — connection warmup is therefore HTTP-GET
5595
+ * based, not WebSocket pre-handshake. The latency win is smaller
5596
+ * (~50-150 ms vs the ~200-500 ms of a WS prewarm) but still real.
5597
+ */
5598
+ async warmup() {
5599
+ try {
5600
+ await fetch(`${this.baseUrl}/voices`, {
5601
+ method: "GET",
5602
+ headers: {
5603
+ "X-API-Key": this.apiKey,
5604
+ "Cartesia-Version": this.apiVersion
5605
+ },
5606
+ signal: AbortSignal.timeout(5e3)
5607
+ });
5608
+ } catch (err) {
5609
+ getLogger().debug(`Cartesia TTS warmup failed (best-effort): ${String(err)}`);
5610
+ }
5611
+ }
4688
5612
  /** Synthesize text and return the concatenated audio buffer. */
4689
5613
  async synthesize(text) {
4690
5614
  const chunks = [];
@@ -4788,6 +5712,8 @@ function timeoutForModel(model) {
4788
5712
  return MIST_MODEL_TIMEOUT_MS;
4789
5713
  }
4790
5714
  var RimeTTS = class {
5715
+ /** Stable pricing/dashboard key — read by stream-handler/metrics. */
5716
+ static providerKey = "rime";
4791
5717
  apiKey;
4792
5718
  model;
4793
5719
  speaker;
@@ -4943,6 +5869,8 @@ var LMNTSampleRate = {
4943
5869
  HZ_24000: 24e3
4944
5870
  };
4945
5871
  var LMNTTTS = class {
5872
+ /** Stable pricing/dashboard key — read by stream-handler/metrics. */
5873
+ static providerKey = "lmnt";
4946
5874
  apiKey;
4947
5875
  model;
4948
5876
  voice;
@@ -5041,6 +5969,7 @@ init_esm_shims();
5041
5969
  // src/providers/inworld-tts.ts
5042
5970
  init_esm_shims();
5043
5971
  var INWORLD_BASE_URL = "https://api.inworld.ai/tts/v1/voice:stream";
5972
+ var INWORLD_VOICES_URL = "https://api.inworld.ai/tts/v1/voices";
5044
5973
  var InworldModel = {
5045
5974
  TTS_2: "inworld-tts-2",
5046
5975
  TTS_1_5_MAX: "inworld-tts-1.5-max",
@@ -5055,6 +5984,8 @@ var InworldAudioEncoding = {
5055
5984
  MP3: "MP3"
5056
5985
  };
5057
5986
  var InworldTTS = class {
5987
+ /** Stable pricing/dashboard key — read by stream-handler/metrics. */
5988
+ static providerKey = "inworld";
5058
5989
  authToken;
5059
5990
  model;
5060
5991
  voice;
@@ -5099,6 +6030,45 @@ var InworldTTS = class {
5099
6030
  if (this.deliveryMode !== void 0) payload.deliveryMode = this.deliveryMode;
5100
6031
  return payload;
5101
6032
  }
6033
+ /**
6034
+ * Pre-call HTTP warmup for the Inworld TTS API.
6035
+ *
6036
+ * Issues a lightweight `GET /tts/v1/voices` against the API host so
6037
+ * DNS + TLS + HTTP/2 connection are already up by the time the first
6038
+ * `synthesizeStream()` POST lands. Best-effort: 5 s timeout, all
6039
+ * exceptions swallowed at debug level.
6040
+ *
6041
+ * Earlier revisions issued `HEAD` against the streaming endpoint
6042
+ * (`/tts/v1/voice:stream`). That endpoint is POST-only so HEAD
6043
+ * returns `405 Method Not Allowed` — the warmup still completed the
6044
+ * TLS handshake but spammed 405 errors into Inworld's audit logs and
6045
+ * into our own logs. Switching to a documented `GET /tts/v1/voices`
6046
+ * metadata read is a 2xx-clean equivalent.
6047
+ *
6048
+ * Billing safety: `GET /tts/v1/voices` is a free metadata endpoint
6049
+ * (per https://docs.inworld.ai/). It returns the voice catalogue
6050
+ * without invoking the synthesis pipeline. The actual synthesis is
6051
+ * billed only when `POST /tts/v1/voice:stream` runs with a non-empty
6052
+ * `text`.
6053
+ *
6054
+ * Note: Inworld TTS uses the HTTP NDJSON streaming path rather than
6055
+ * a persistent WebSocket — connection warmup is therefore HTTP-based,
6056
+ * not WebSocket pre-handshake. The latency win is smaller (~50-150 ms)
6057
+ * than the WS-based prewarms but still real on cold-start calls.
6058
+ */
6059
+ async warmup() {
6060
+ try {
6061
+ await fetch(INWORLD_VOICES_URL, {
6062
+ method: "GET",
6063
+ headers: {
6064
+ Authorization: `Basic ${this.authToken}`
6065
+ },
6066
+ signal: AbortSignal.timeout(5e3)
6067
+ });
6068
+ } catch (err) {
6069
+ getLogger().debug(`Inworld TTS warmup failed (best-effort): ${String(err)}`);
6070
+ }
6071
+ }
5102
6072
  /** Synthesize text and return the concatenated audio buffer. */
5103
6073
  async synthesize(text) {
5104
6074
  const chunks = [];
@@ -5238,6 +6208,8 @@ var DEFAULT_MODEL = AnthropicModel.CLAUDE_HAIKU_4_5_20251001;
5238
6208
  var DEFAULT_MAX_TOKENS = 1024;
5239
6209
  var PROMPT_CACHING_BETA = "prompt-caching-2024-07-31";
5240
6210
  var AnthropicLLMProvider = class {
6211
+ /** Stable pricing/dashboard key — read by stream-handler/metrics. */
6212
+ static providerKey = "anthropic";
5241
6213
  apiKey;
5242
6214
  model;
5243
6215
  maxTokens;
@@ -5259,6 +6231,27 @@ var AnthropicLLMProvider = class {
5259
6231
  this.anthropicVersion = options.anthropicVersion ?? DEFAULT_ANTHROPIC_VERSION;
5260
6232
  this.promptCaching = options.promptCaching ?? true;
5261
6233
  }
6234
+ /**
6235
+ * Pre-call DNS / TLS warmup for the Anthropic Messages API.
6236
+ * Issues a lightweight ``GET https://api.anthropic.com/v1/models`` so
6237
+ * DNS, TLS and HTTP/2 are already up by the time the first ``messages``
6238
+ * call lands. Best-effort: 5 s timeout, exceptions swallowed at debug.
6239
+ */
6240
+ async warmup() {
6241
+ try {
6242
+ const modelsUrl = this.url.replace(/\/messages\/?$/, "/models");
6243
+ await fetch(modelsUrl, {
6244
+ method: "GET",
6245
+ headers: {
6246
+ "x-api-key": this.apiKey,
6247
+ "anthropic-version": this.anthropicVersion
6248
+ },
6249
+ signal: AbortSignal.timeout(5e3)
6250
+ });
6251
+ } catch (err) {
6252
+ getLogger().debug(`Anthropic LLM warmup failed (best-effort): ${String(err)}`);
6253
+ }
6254
+ }
5262
6255
  /** Stream Patter-format LLM chunks for the given OpenAI-style chat history. */
5263
6256
  async *stream(messages, tools, opts) {
5264
6257
  const { system, messages: anthropicMessages } = toAnthropicMessages(messages);
@@ -5494,6 +6487,8 @@ var GroqModel = {
5494
6487
  };
5495
6488
  var DEFAULT_MODEL2 = GroqModel.LLAMA_3_3_70B_VERSATILE;
5496
6489
  var GroqLLMProvider = class {
6490
+ /** Stable pricing/dashboard key — read by stream-handler/metrics. */
6491
+ static providerKey = "groq";
5497
6492
  apiKey;
5498
6493
  model;
5499
6494
  baseUrl;
@@ -5527,6 +6522,21 @@ var GroqLLMProvider = class {
5527
6522
  this.presencePenalty = options.presencePenalty;
5528
6523
  this.stop = options.stop;
5529
6524
  }
6525
+ /**
6526
+ * Pre-call DNS / TLS warmup for the Groq inference endpoint.
6527
+ * Best-effort: 5 s timeout, all exceptions swallowed at debug level.
6528
+ */
6529
+ async warmup() {
6530
+ try {
6531
+ await fetch(`${this.baseUrl}/models`, {
6532
+ method: "GET",
6533
+ headers: { Authorization: `Bearer ${this.apiKey}` },
6534
+ signal: AbortSignal.timeout(5e3)
6535
+ });
6536
+ } catch (err) {
6537
+ getLogger().debug(`Groq LLM warmup failed (best-effort): ${String(err)}`);
6538
+ }
6539
+ }
5530
6540
  /** Stream Patter-format LLM chunks from the Groq chat completions API. */
5531
6541
  async *stream(messages, tools, opts) {
5532
6542
  const body = {
@@ -5662,6 +6672,8 @@ var CerebrasModel = {
5662
6672
  var DEFAULT_MODEL3 = CerebrasModel.GPT_OSS_120B;
5663
6673
  var RETRY_BACKOFF_BASE_MS = 500;
5664
6674
  var CerebrasLLMProvider = class {
6675
+ /** Stable pricing/dashboard key — read by stream-handler/metrics. */
6676
+ static providerKey = "cerebras";
5665
6677
  apiKey;
5666
6678
  model;
5667
6679
  baseUrl;
@@ -5697,6 +6709,21 @@ var CerebrasLLMProvider = class {
5697
6709
  this.presencePenalty = options.presencePenalty;
5698
6710
  this.stop = options.stop;
5699
6711
  }
6712
+ /**
6713
+ * Pre-call DNS / TLS warmup for the Cerebras inference endpoint.
6714
+ * Best-effort: 5 s timeout, all exceptions swallowed at debug level.
6715
+ */
6716
+ async warmup() {
6717
+ try {
6718
+ await fetch(`${this.baseUrl}/models`, {
6719
+ method: "GET",
6720
+ headers: { Authorization: `Bearer ${this.apiKey}` },
6721
+ signal: AbortSignal.timeout(5e3)
6722
+ });
6723
+ } catch (err) {
6724
+ getLogger().debug(`Cerebras LLM warmup failed (best-effort): ${String(err)}`);
6725
+ }
6726
+ }
5700
6727
  /** Stream Patter-format LLM chunks from the Cerebras chat completions API. */
5701
6728
  async *stream(messages, tools, opts) {
5702
6729
  const body = {
@@ -5859,6 +6886,8 @@ var GoogleModel = {
5859
6886
  var DEFAULT_MODEL4 = GoogleModel.GEMINI_2_5_FLASH;
5860
6887
  var DEFAULT_BASE_URL3 = "https://generativelanguage.googleapis.com/v1beta";
5861
6888
  var GoogleLLMProvider = class {
6889
+ /** Stable pricing/dashboard key — read by stream-handler/metrics. */
6890
+ static providerKey = "google";
5862
6891
  apiKey;
5863
6892
  model;
5864
6893
  baseUrl;
@@ -5876,6 +6905,23 @@ var GoogleLLMProvider = class {
5876
6905
  this.temperature = options.temperature;
5877
6906
  this.maxOutputTokens = options.maxOutputTokens;
5878
6907
  }
6908
+ /**
6909
+ * Pre-call DNS / TLS warmup for the Gemini API.
6910
+ * Issues a lightweight ``GET ${baseUrl}/models?key=...`` so DNS, TLS
6911
+ * and HTTP/2 are already up by the time the first
6912
+ * ``streamGenerateContent`` call lands. Best-effort: 5 s timeout, all
6913
+ * exceptions swallowed at debug level.
6914
+ */
6915
+ async warmup() {
6916
+ try {
6917
+ await fetch(`${this.baseUrl}/models?key=${encodeURIComponent(this.apiKey)}`, {
6918
+ method: "GET",
6919
+ signal: AbortSignal.timeout(5e3)
6920
+ });
6921
+ } catch (err) {
6922
+ getLogger().debug(`Google LLM warmup failed (best-effort): ${String(err)}`);
6923
+ }
6924
+ }
5879
6925
  /** Stream Patter-format LLM chunks from the Gemini SSE endpoint. */
5880
6926
  async *stream(messages, tools, opts) {
5881
6927
  const { systemInstruction, contents } = toGeminiContents(messages);
@@ -6065,6 +7111,186 @@ var LLM5 = class extends GoogleLLMProvider {
6065
7111
  }
6066
7112
  };
6067
7113
 
7114
+ // src/providers/deepfilternet-filter.ts
7115
+ init_esm_shims();
7116
+ function log() {
7117
+ return getLogger();
7118
+ }
7119
+ var DEEPFILTERNET_SR = 48e3;
7120
+ async function loadOnnxRuntime() {
7121
+ try {
7122
+ const specifier = "onnxruntime-node";
7123
+ const mod = await import(specifier);
7124
+ return mod;
7125
+ } catch {
7126
+ return null;
7127
+ }
7128
+ }
7129
+ function pcm16ToFloat32(pcm) {
7130
+ const view = new Int16Array(pcm.buffer, pcm.byteOffset, Math.floor(pcm.byteLength / 2));
7131
+ const out = new Float32Array(view.length);
7132
+ for (let i = 0; i < view.length; i += 1) {
7133
+ out[i] = view[i] / 32768;
7134
+ }
7135
+ return out;
7136
+ }
7137
+ function float32ToPcm16(samples) {
7138
+ const out = Buffer.alloc(samples.length * 2);
7139
+ for (let i = 0; i < samples.length; i += 1) {
7140
+ const clamped = Math.max(-1, Math.min(1, samples[i]));
7141
+ out.writeInt16LE(Math.round(clamped * 32767), i * 2);
7142
+ }
7143
+ return out;
7144
+ }
7145
+ var DeepFilterNetFilter = class {
7146
+ modelPath;
7147
+ silenceWarnings;
7148
+ session = null;
7149
+ ort = null;
7150
+ warned = false;
7151
+ closed = false;
7152
+ // Fix 5: stateful resamplers for src_sr↔48k conversions so chunk-boundary
7153
+ // samples are not discarded. Lazy-created and torn down on rate change.
7154
+ _resamplerSrcRate = null;
7155
+ _upsamplerInst = null;
7156
+ _downsamplerInst = null;
7157
+ constructor(options = {}) {
7158
+ this.modelPath = options.modelPath;
7159
+ this.silenceWarnings = options.silenceWarnings === true;
7160
+ }
7161
+ async ensureSession() {
7162
+ if (this.session !== null) {
7163
+ return this.session;
7164
+ }
7165
+ if (!this.modelPath) {
7166
+ if (!this.warned && !this.silenceWarnings) {
7167
+ log().warn(
7168
+ "DeepFilterNetFilter: no modelPath provided; audio will pass through unmodified. Provide a DeepFilterNet ONNX model to enable noise suppression."
7169
+ );
7170
+ this.warned = true;
7171
+ }
7172
+ return null;
7173
+ }
7174
+ if (this.ort === null) {
7175
+ this.ort = await loadOnnxRuntime();
7176
+ }
7177
+ if (this.ort === null) {
7178
+ if (!this.warned && !this.silenceWarnings) {
7179
+ log().warn(
7180
+ "DeepFilterNetFilter: onnxruntime-node is not installed; audio will pass through unmodified. Run `npm install onnxruntime-node` to enable noise suppression."
7181
+ );
7182
+ this.warned = true;
7183
+ }
7184
+ return null;
7185
+ }
7186
+ try {
7187
+ this.session = await this.ort.InferenceSession.create(this.modelPath);
7188
+ return this.session;
7189
+ } catch (error) {
7190
+ const message = error instanceof Error ? error.message : String(error);
7191
+ log().error(`DeepFilterNetFilter: failed to load model: ${message}`);
7192
+ this.warned = true;
7193
+ return null;
7194
+ }
7195
+ }
7196
+ /** Run noise suppression on a PCM16 chunk; pass-through when no model is loaded. */
7197
+ async process(pcmChunk, sampleRate) {
7198
+ if (this.closed) {
7199
+ throw new Error("DeepFilterNetFilter is closed");
7200
+ }
7201
+ if (pcmChunk.length === 0) {
7202
+ return pcmChunk;
7203
+ }
7204
+ const session = await this.ensureSession();
7205
+ if (session === null || this.ort === null) {
7206
+ return pcmChunk;
7207
+ }
7208
+ try {
7209
+ if (this._resamplerSrcRate !== sampleRate) {
7210
+ this._resamplerSrcRate = sampleRate;
7211
+ this._upsamplerInst = new StatefulResampler({ srcRate: sampleRate, dstRate: DEEPFILTERNET_SR });
7212
+ this._downsamplerInst = new StatefulResampler({ srcRate: DEEPFILTERNET_SR, dstRate: sampleRate });
7213
+ }
7214
+ const samples = pcm16ToFloat32(pcmChunk);
7215
+ const pcm16Up = this._upsamplerInst.process(float32ToPcm16(new Float32Array(samples)));
7216
+ const upsampled = pcm16ToFloat32(pcm16Up);
7217
+ const inputName = session.inputNames[0];
7218
+ const outputName = session.outputNames[0];
7219
+ const tensor = new this.ort.Tensor("float32", upsampled, [1, upsampled.length]);
7220
+ const feeds = { [inputName]: tensor };
7221
+ const results = await session.run(feeds);
7222
+ const output = results[outputName];
7223
+ if (!output || !output.data) {
7224
+ return pcmChunk;
7225
+ }
7226
+ const enhanced = output.data instanceof Float32Array ? output.data : new Float32Array(output.data);
7227
+ const pcm16Enhanced = float32ToPcm16(enhanced);
7228
+ const pcm16Restored = this._downsamplerInst.process(pcm16Enhanced);
7229
+ return pcm16Restored;
7230
+ } catch (error) {
7231
+ const message = error instanceof Error ? error.message : String(error);
7232
+ log().error(`DeepFilterNetFilter.process failed: ${message}`);
7233
+ return pcmChunk;
7234
+ }
7235
+ }
7236
+ /** Flush resamplers, release the ONNX session, and mark the filter closed. */
7237
+ async close() {
7238
+ try {
7239
+ this._upsamplerInst?.flush();
7240
+ } catch {
7241
+ }
7242
+ try {
7243
+ this._downsamplerInst?.flush();
7244
+ } catch {
7245
+ }
7246
+ this._upsamplerInst = null;
7247
+ this._downsamplerInst = null;
7248
+ if (this.session !== null && typeof this.session.release === "function") {
7249
+ try {
7250
+ await this.session.release();
7251
+ } catch (error) {
7252
+ const message = error instanceof Error ? error.message : String(error);
7253
+ log().warn(`DeepFilterNetFilter.close: release failed: ${message}`);
7254
+ }
7255
+ }
7256
+ this.session = null;
7257
+ this.closed = true;
7258
+ }
7259
+ };
7260
+
7261
+ // src/providers/krisp-filter.ts
7262
+ init_esm_shims();
7263
+ var KrispSampleRate = {
7264
+ HZ_8000: 8e3,
7265
+ HZ_16000: 16e3,
7266
+ HZ_32000: 32e3,
7267
+ HZ_44100: 44100,
7268
+ HZ_48000: 48e3
7269
+ };
7270
+ var KrispFrameDuration = {
7271
+ MS_10: 10,
7272
+ MS_15: 15,
7273
+ MS_20: 20,
7274
+ MS_30: 30,
7275
+ MS_32: 32
7276
+ };
7277
+ var NODE_SDK_UNAVAILABLE_MESSAGE = "Krisp VIVA Filter is not yet available for the Patter TypeScript SDK.\n\nAs of 2026-05, Krisp does not publish an official Node.js (server) SDK. The Patter TypeScript SDK ships only the AudioFilter interface scaffold (this file) for parity with the Python implementation, since Patter runs server-side on a real-time audio stream from the telephony carrier.\n\nAvailable paths today:\n 1. Use the Python SDK: `from getpatter.providers.krisp_filter import KrispVivaFilter` \u2014 fully implemented, requires `pip install getpatter[krisp]` + `KRISP_VIVA_SDK_LICENSE_KEY` + `KRISP_VIVA_FILTER_MODEL_PATH`.\n 2. Use DeepFilterNet on TS: `new DeepFilterNetFilter({ modelPath: '.../DeepFilterNet3.onnx' })` \u2014 community ONNX export, no license needed.\n\nBrowser/React Native (not applicable to Patter server-side, listed for completeness):\n - Browser WASM wrappers (various third-party packages) process local microphone capture, not server-received PCM/mulaw audio.\n - Mobile client wrappers (iOS/Android, various third-party packages) are likewise client-side only.\n\nTrack Node SDK status:\n - https://krisp.ai/developers/\n - Patter backlog: task #38 \"Krisp TS port decision\"\n";
7278
+ var KrispVivaFilter = class {
7279
+ static providerKey = "krisp_viva";
7280
+ constructor(_options = {}) {
7281
+ throw new Error(NODE_SDK_UNAVAILABLE_MESSAGE);
7282
+ }
7283
+ // The two methods below are unreachable at runtime (constructor throws)
7284
+ // but kept so the class structurally satisfies `AudioFilter`. When the
7285
+ // Node binding lands, replace constructor + these stubs with the real
7286
+ // implementation.
7287
+ async process(pcmChunk, _sampleRate) {
7288
+ return pcmChunk;
7289
+ }
7290
+ async close() {
7291
+ }
7292
+ };
7293
+
6068
7294
  // src/telephony/twilio.ts
6069
7295
  init_esm_shims();
6070
7296
  var Carrier = class {
@@ -7100,6 +8326,8 @@ var TelnyxSTT = class {
7100
8326
  transcriptionEngine;
7101
8327
  sampleRate;
7102
8328
  baseUrl;
8329
+ /** Stable pricing/dashboard key — read by stream-handler/metrics. */
8330
+ static providerKey = "telnyx_stt";
7103
8331
  ws = null;
7104
8332
  callbacks = [];
7105
8333
  headerSent = false;
@@ -7204,6 +8432,8 @@ var TelnyxTTS = class {
7204
8432
  apiKey;
7205
8433
  voice;
7206
8434
  baseUrl;
8435
+ /** Stable pricing/dashboard key — read by stream-handler/metrics. */
8436
+ static providerKey = "telnyx_tts";
7207
8437
  /** Collect every audio chunk into a single Buffer. */
7208
8438
  async synthesize(text) {
7209
8439
  const chunks = [];
@@ -7305,10 +8535,12 @@ export {
7305
8535
  DEFAULT_MIN_SENTENCE_LEN,
7306
8536
  DEFAULT_PRICING,
7307
8537
  DTMF_EVENTS,
8538
+ DeepFilterNetFilter,
7308
8539
  STT as DeepgramSTT,
7309
8540
  DefaultToolExecutor,
7310
8541
  ConvAI as ElevenLabsConvAI,
7311
8542
  ElevenLabsConvAIAdapter,
8543
+ ElevenLabsTTS as ElevenLabsRestTTS,
7312
8544
  TTS as ElevenLabsTTS,
7313
8545
  TTS2 as ElevenLabsWebSocketTTS,
7314
8546
  ErrorCode,
@@ -7322,13 +8554,19 @@ export {
7322
8554
  Guardrail,
7323
8555
  IVRActivity,
7324
8556
  TTS7 as InworldTTS,
8557
+ KrispFrameDuration,
8558
+ KrispSampleRate,
8559
+ KrispVivaFilter,
7325
8560
  LLMLoop,
7326
8561
  TTS6 as LMNTTTS,
7327
8562
  MetricsStore,
8563
+ MinWordsStrategy,
7328
8564
  Ngrok,
7329
8565
  LLM as OpenAILLM,
7330
8566
  OpenAILLMProvider,
7331
8567
  Realtime as OpenAIRealtime,
8568
+ Realtime2 as OpenAIRealtime2,
8569
+ OpenAIRealtime2Adapter,
7332
8570
  OpenAIRealtimeAdapter,
7333
8571
  TTS3 as OpenAITTS,
7334
8572
  STT3 as OpenAITranscribeSTT,
@@ -7395,6 +8633,7 @@ export {
7395
8633
  deepgram,
7396
8634
  defineTool,
7397
8635
  elevenlabs,
8636
+ evaluateStrategies as evaluateBargeInStrategies,
7398
8637
  filterEmoji,
7399
8638
  filterForTTS,
7400
8639
  filterMarkdown,
@@ -7420,6 +8659,7 @@ export {
7420
8659
  resample24kTo16k,
7421
8660
  resample8kTo16k,
7422
8661
  resamplePcm,
8662
+ resetStrategies as resetBargeInStrategies,
7423
8663
  rime,
7424
8664
  scheduleCron,
7425
8665
  scheduleInterval,