getpatter 0.6.3 → 0.6.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -49,7 +49,7 @@ var init_cjs_shims = __esm({
49
49
  });
50
50
 
51
51
  // src/errors.ts
52
- var ErrorCode, PatterError, PatterConnectionError, AuthenticationError, ProvisionError, RateLimitError;
52
+ var ErrorCode, PatterError, PatterConfigError, PatterConnectionError, AuthenticationError, ProvisionError, RateLimitError;
53
53
  var init_errors = __esm({
54
54
  "src/errors.ts"() {
55
55
  "use strict";
@@ -85,6 +85,12 @@ var init_errors = __esm({
85
85
  this.code = options?.code ?? ErrorCode.INTERNAL;
86
86
  }
87
87
  };
88
+ PatterConfigError = class extends PatterError {
89
+ constructor(message, options) {
90
+ super(message, { code: options?.code ?? ErrorCode.CONFIG });
91
+ this.name = "PatterConfigError";
92
+ }
93
+ };
88
94
  PatterConnectionError = class extends PatterError {
89
95
  constructor(message, options) {
90
96
  super(message, { code: options?.code ?? ErrorCode.CONNECTION });
@@ -136,6 +142,45 @@ var init_logger = __esm({
136
142
  });
137
143
 
138
144
  // src/providers/openai-realtime.ts
145
+ function validateRealtimeTurnDetection(td) {
146
+ if (td === void 0) return;
147
+ if (td.type !== void 0 && td.type !== "server_vad" && td.type !== "semantic_vad") {
148
+ throw new Error(
149
+ `RealtimeTurnDetection.type must be 'server_vad' or 'semantic_vad', got ${JSON.stringify(td.type)}`
150
+ );
151
+ }
152
+ if (td.eagerness !== void 0 && td.eagerness !== "low" && td.eagerness !== "medium" && td.eagerness !== "high" && td.eagerness !== "auto") {
153
+ throw new Error(
154
+ `RealtimeTurnDetection.eagerness must be one of low|medium|high|auto, got ${JSON.stringify(td.eagerness)}`
155
+ );
156
+ }
157
+ if (td.eagerness !== void 0 && td.type !== "semantic_vad") {
158
+ throw new Error(
159
+ "RealtimeTurnDetection.eagerness is only valid when type='semantic_vad'"
160
+ );
161
+ }
162
+ }
163
+ function buildTurnDetection(td, opts) {
164
+ validateRealtimeTurnDetection(td);
165
+ let detection;
166
+ if (td?.type === "semantic_vad") {
167
+ detection = { type: "semantic_vad" };
168
+ if (td.eagerness !== void 0) detection.eagerness = td.eagerness;
169
+ } else {
170
+ detection = {
171
+ type: td?.type ?? opts.defaultType,
172
+ threshold: td?.threshold ?? 0.5,
173
+ prefix_padding_ms: td?.prefixPaddingMs ?? 300,
174
+ silence_duration_ms: td?.silenceDurationMs ?? opts.defaultSilenceMs
175
+ };
176
+ }
177
+ if (opts.includeResponseGating) {
178
+ const serverManaged = !(opts.gateResponseOnTranscript ?? false);
179
+ detection.create_response = serverManaged;
180
+ detection.interrupt_response = serverManaged;
181
+ }
182
+ return detection;
183
+ }
139
184
  function estimateAudioMs(chunk, format) {
140
185
  if (chunk.length === 0) return 0;
141
186
  if (format === OpenAIRealtimeAudioFormat.G711_ULAW || format === OpenAIRealtimeAudioFormat.G711_ALAW)
@@ -196,6 +241,7 @@ var init_openai_realtime = __esm({
196
241
  this.tools = tools;
197
242
  this.audioFormat = audioFormat;
198
243
  this.options = options;
244
+ this.gateResponseOnTranscript = options.gateResponseOnTranscript ?? false;
199
245
  }
200
246
  apiKey;
201
247
  model;
@@ -225,6 +271,23 @@ var init_openai_realtime = __esm({
225
271
  // could have produced, which is what the user actually heard.
226
272
  currentResponseFirstAudioAt = null;
227
273
  options;
274
+ // When true, the stream handler waits for the Whisper ``transcript_input``
275
+ // event before requesting the model response (legacy behavior). When false
276
+ // (default) the response is requested on ``speech_stopped`` and the
277
+ // transcript is display-only. Read by the stream handler via
278
+ // ``getGateResponseOnTranscript()``.
279
+ gateResponseOnTranscript;
280
+ /**
281
+ * Whether the stream handler should gate the model response on the Whisper
282
+ * transcript (legacy) or fire it on `speech_stopped` (default, decoupled).
283
+ *
284
+ * `false` (default) — the response is requested on `speech_stopped`,
285
+ * independently of Whisper. `true` — the response is requested only after
286
+ * `transcript_input` passes the hallucination filter.
287
+ */
288
+ getGateResponseOnTranscript() {
289
+ return this.gateResponseOnTranscript;
290
+ }
228
291
  /**
229
292
  * Build the production session.update body. Mirrors the body sent
230
293
  * inside `connect()` so warmup can apply identical configuration to
@@ -236,16 +299,26 @@ var init_openai_realtime = __esm({
236
299
  output_audio_format: this.audioFormat,
237
300
  voice: this.voice,
238
301
  instructions: this.instructions || "You are a helpful voice assistant. Be concise.",
239
- turn_detection: {
240
- type: this.options.vadType ?? OpenAIRealtimeVADType.SERVER_VAD,
241
- threshold: 0.5,
242
- prefix_padding_ms: 300,
243
- silence_duration_ms: this.options.silenceDurationMs ?? 300
244
- },
302
+ // v1 turn_detection carries NO create_response / interrupt_response
303
+ // keys. The v1 server defaults (`create_response: true`,
304
+ // `interrupt_response: true`) ARE the server-managed behaviour we want by
305
+ // default, so omitting them is equivalent to sending `true` — gating
306
+ // disabled here. `gateResponseOnTranscript` is still threaded through for
307
+ // symmetry with the GA builder, but has no wire effect while
308
+ // includeResponseGating is false.
309
+ turn_detection: buildTurnDetection(this.options.turnDetection, {
310
+ defaultType: this.options.vadType ?? OpenAIRealtimeVADType.SERVER_VAD,
311
+ defaultSilenceMs: this.options.silenceDurationMs ?? 300,
312
+ includeResponseGating: false,
313
+ gateResponseOnTranscript: this.gateResponseOnTranscript
314
+ }),
245
315
  input_audio_transcription: {
246
316
  model: this.options.inputAudioTranscriptionModel ?? OpenAITranscriptionModel.WHISPER_1
247
317
  }
248
318
  };
319
+ if (this.options.noiseReduction !== void 0) {
320
+ config2.input_audio_noise_reduction = { type: this.options.noiseReduction };
321
+ }
249
322
  if (this.options.temperature !== void 0) config2.temperature = this.options.temperature;
250
323
  if (this.options.maxResponseOutputTokens !== void 0) {
251
324
  config2.max_response_output_tokens = this.options.maxResponseOutputTokens;
@@ -509,6 +582,10 @@ var init_openai_realtime = __esm({
509
582
  };
510
583
  const timer = setTimeout(() => {
511
584
  cleanup();
585
+ try {
586
+ ws.close();
587
+ } catch {
588
+ }
512
589
  reject(new Error("OpenAI Realtime park connect timeout"));
513
590
  }, 8e3);
514
591
  ws.on("message", onMessage);
@@ -603,20 +680,33 @@ var init_openai_realtime = __esm({
603
680
  dispatch("error", { type: "socket_error", message: err?.message ?? String(err) });
604
681
  });
605
682
  }
606
- /** Truncate the in-flight assistant turn and cancel the active response.
683
+ /** Truncate the in-flight assistant turn's playback offset on the server.
684
+ *
685
+ * Sends ONLY ``conversation.item.truncate`` — no ``response.cancel``. This
686
+ * is the half of barge-in handling that a WebSocket transport MUST always
687
+ * perform: per OpenAI's docs, the GA server auto-truncates on barge-in only
688
+ * over WebRTC / SIP; on the WebSocket transport the client is responsible
689
+ * for telling the server how much of the assistant turn was actually heard.
690
+ * In server-managed mode (``interrupt_response: true``) the server already
691
+ * cancels the response itself, so issuing ``response.cancel`` here would be
692
+ * redundant / rejected — call this method, not {@link cancelResponse}.
607
693
  *
608
694
  * ``audio_end_ms`` MUST reflect what the caller actually heard, not what
609
695
  * the server generated. OpenAI streams audio at 5-10x real-time, so the
610
696
  * byte-derived counter overstates playback whenever the consumer cleared
611
- * its playout buffer (e.g. ``send_clear``) before the audio reached the
697
+ * its playout buffer (e.g. ``sendClear``) before the audio reached the
612
698
  * speaker. We bound the truncate point by wall-clock time since the first
613
699
  * chunk of this response — that's the physical maximum a 1x real-time
614
700
  * playback could have produced. Without this cap, OpenAI keeps the full
615
701
  * generated assistant text on the transcript, and the model replays /
616
702
  * resumes from it on the next turn — manifesting as re-greetings and
617
703
  * mid-sentence fragments after a barge-in storm.
704
+ *
705
+ * No-op when no response is in flight, keeping it idempotent across stale
706
+ * callers. Resets per-response tracking so post-truncate late frames and
707
+ * the next response start clean.
618
708
  */
619
- cancelResponse() {
709
+ truncate() {
620
710
  if (!this.ws) return;
621
711
  if (!this.currentResponseItemId) {
622
712
  return;
@@ -636,11 +726,31 @@ var init_openai_realtime = __esm({
636
726
  } catch (err) {
637
727
  getLogger().debug?.(`conversation.item.truncate failed: ${String(err)}`);
638
728
  }
639
- this.ws.send(JSON.stringify({ type: "response.cancel" }));
640
729
  this.currentResponseItemId = null;
641
730
  this.currentResponseAudioMs = 0;
642
731
  this.currentResponseFirstAudioAt = null;
643
732
  }
733
+ /** Truncate the in-flight assistant turn AND cancel the active response.
734
+ *
735
+ * Sends BOTH ``conversation.item.truncate`` (the played-offset bookkeeping)
736
+ * AND ``response.cancel``. Use this on the LEGACY client-managed barge-in
737
+ * path (``gateResponseOnTranscript`` true → ``interrupt_response: false``,
738
+ * so the server does NOT cancel for us) and for explicit cancels driven by
739
+ * Patter (e.g. on transfer / hangup). In server-managed mode call
740
+ * {@link truncate} instead — the server already cancels the response, and an
741
+ * extra ``response.cancel`` would be redundant / rejected.
742
+ *
743
+ * Truncation bounding semantics are identical to {@link truncate}; see its
744
+ * doc comment for the ``audio_end_ms`` wall-clock cap rationale.
745
+ */
746
+ cancelResponse() {
747
+ if (!this.ws) return;
748
+ if (!this.currentResponseItemId) {
749
+ return;
750
+ }
751
+ this.truncate();
752
+ this.ws.send(JSON.stringify({ type: "response.cancel" }));
753
+ }
644
754
  /** Inject a user text turn and request a new response. */
645
755
  async sendText(text) {
646
756
  this.ws?.send(JSON.stringify({
@@ -685,6 +795,32 @@ var init_openai_realtime = __esm({
685
795
  }
686
796
  }));
687
797
  }
798
+ /**
799
+ * Speak a short reassurance filler WITHOUT injecting a `role:user` turn.
800
+ *
801
+ * Same no-fake-turn shape as {@link sendFirstMessage}: a bare
802
+ * `response.create` carrying explicit `instructions`, so the filler is the
803
+ * assistant's own in-band audio. The reassurance scheduler in the
804
+ * stream-handler routes here instead of {@link sendText} — which would emit
805
+ * a `conversation.item.create` with `role:'user'` and falsely show the
806
+ * caller saying "One moment." in the transcript. Fillers must not imply
807
+ * success or failure.
808
+ *
809
+ * Uses `modalities: ['audio', 'text']` (v1-beta shape). The GA subclass
810
+ * {@link OpenAIRealtime2Adapter} overrides this with `output_modalities`
811
+ * and re-injects `audio.output.voice` so the GA endpoint does not reject
812
+ * the request. Mirrors Python `OpenAIRealtimeAdapter.send_reassurance` in
813
+ * `providers/openai_realtime.py`.
814
+ */
815
+ async sendReassurance(text) {
816
+ this.ws?.send(JSON.stringify({
817
+ type: "response.create",
818
+ response: {
819
+ modalities: ["audio", "text"],
820
+ instructions: `Say exactly this and nothing else: "${text}"`
821
+ }
822
+ }));
823
+ }
688
824
  /** Submit a tool/function-call result and request the next response. */
689
825
  async sendFunctionResult(callId, result) {
690
826
  this.ws?.send(JSON.stringify({
@@ -925,7 +1061,12 @@ var init_transcoding = __esm({
925
1061
  * Resets all state after flushing.
926
1062
  */
927
1063
  flush() {
928
- this.carry.flush();
1064
+ const carryTail = this.carry.flush();
1065
+ if (carryTail.length > 0) {
1066
+ getLogger().warn(
1067
+ "[patter] StatefulResampler.flush: trailing odd byte discarded \u2014 upstream produced odd-length PCM stream"
1068
+ );
1069
+ }
929
1070
  if (this.srcRate === 16e3 && this.dstRate === 8e3 && this.firPendingSample !== null) {
930
1071
  const s = this.firPendingSample;
931
1072
  const tmp = Buffer.alloc(4);
@@ -1165,44 +1306,46 @@ var init_openai_realtime_2 = __esm({
1165
1306
  buildGASessionConfig() {
1166
1307
  const opts = this.options;
1167
1308
  const fmt = { type: "audio/pcm", rate: 24e3 };
1309
+ const audioInput = {
1310
+ format: fmt,
1311
+ transcription: {
1312
+ model: opts.inputAudioTranscriptionModel ?? OpenAITranscriptionModel.WHISPER_1
1313
+ },
1314
+ // Response creation + barge-in cancellation (issue #154 — hand
1315
+ // turn-taking to the server by default):
1316
+ // - DEFAULT (`gateResponseOnTranscript` false → SERVER-MANAGED):
1317
+ // `create_response: true` lets the SERVER auto-create the response
1318
+ // when it commits the user's audio buffer
1319
+ // (`input_audio_buffer.committed`). `interrupt_response: true` lets the
1320
+ // SERVER cancel the in-flight response on its own VAD `speech_started`.
1321
+ // The e2e model replies immediately, in parallel with the Whisper
1322
+ // transcript — no transcript wait (~500 ms reclaimed), no client-side
1323
+ // race. On a WebSocket transport the client STILL must clear the
1324
+ // carrier buffer (`sendClear`) and `conversation.item.truncate` the
1325
+ // played offset on barge-in (the server only auto-truncates on
1326
+ // WebRTC/SIP), but it does NOT send `response.cancel`. Whisper is
1327
+ // display-only — it can never trigger / gate / cancel the response.
1328
+ // - LEGACY (`gateResponseOnTranscript` true → CLIENT-MANAGED opt-out):
1329
+ // `create_response: false` + `interrupt_response: false` so the stream
1330
+ // handler drives `response.create` (after the hallucination filter)
1331
+ // and `response.cancel` (on barge-in) itself. Escape hatch for no-AEC
1332
+ // PSTN self-interruption. Both keys are tied to the same switch inside
1333
+ // `buildTurnDetection`.
1334
+ turn_detection: buildTurnDetection(opts.turnDetection, {
1335
+ defaultType: opts.vadType ?? OpenAIRealtimeVADType.SERVER_VAD,
1336
+ defaultSilenceMs: opts.silenceDurationMs ?? 300,
1337
+ includeResponseGating: true,
1338
+ gateResponseOnTranscript: this.getGateResponseOnTranscript()
1339
+ })
1340
+ };
1341
+ if (opts.noiseReduction !== void 0) {
1342
+ audioInput.noise_reduction = { type: opts.noiseReduction };
1343
+ }
1168
1344
  const config2 = {
1169
1345
  type: "realtime",
1170
1346
  output_modalities: opts.modalities ?? ["audio"],
1171
1347
  audio: {
1172
- input: {
1173
- format: fmt,
1174
- transcription: {
1175
- model: opts.inputAudioTranscriptionModel ?? OpenAITranscriptionModel.WHISPER_1
1176
- },
1177
- // VAD threshold raised back to the OpenAI default (0.5) on
1178
- // 2026-05-22. The earlier 0.1 tuning (motivated by the
1179
- // upsampled telephony-band loss in high frequencies) made the
1180
- // server VAD trigger on the carrier-loopback echo of the
1181
- // agent's OWN outbound audio in PSTN no-AEC scenarios.
1182
- // Combined with the default ``turn_detection.create_response:
1183
- // true``, every phantom ``speech_started`` ended a turn early
1184
- // and auto-created a new response that the agent immediately
1185
- // spoke over, leading to a runaway loop where the first
1186
- // message was repeatedly cut and re-generated.
1187
- turn_detection: {
1188
- type: opts.vadType ?? OpenAIRealtimeVADType.SERVER_VAD,
1189
- threshold: 0.5,
1190
- prefix_padding_ms: 300,
1191
- silence_duration_ms: opts.silenceDurationMs ?? 500,
1192
- // Defer ``response.create`` to the application: when OpenAI's
1193
- // server VAD commits an ``input_audio_buffer.committed`` segment
1194
- // that turns out to be a Whisper hallucination on silence/echo,
1195
- // auto-creating a response would generate a phantom turn (the
1196
- // model reads the hallucinated text as user input). Patter
1197
- // triggers ``response.create`` explicitly in the Realtime
1198
- // stream-handler AFTER validating ``transcript_input`` against
1199
- // the hallucination filter. Pair with ``interrupt_response:
1200
- // false`` so server VAD also leaves in-flight responses alone —
1201
- // barge-in is gated client-side.
1202
- create_response: false,
1203
- interrupt_response: false
1204
- }
1205
- },
1348
+ input: audioInput,
1206
1349
  output: {
1207
1350
  format: fmt,
1208
1351
  voice: this.voice
@@ -1255,14 +1398,7 @@ var init_openai_realtime_2 = __esm({
1255
1398
  if (t && t in GA_TO_V1_EVENT_NAMES) {
1256
1399
  const newType = GA_TO_V1_EVENT_NAMES[t];
1257
1400
  if (t === "response.output_audio.delta" && typeof parsed.delta === "string") {
1258
- const mulaw = this.transcodeOutboundPcm24ToMulaw8Buffer(parsed.delta);
1259
- const FRAME_BYTES = 160;
1260
- if (mulaw.length === 0) return;
1261
- for (let off = 0; off < mulaw.length; off += FRAME_BYTES) {
1262
- const slice = mulaw.subarray(off, Math.min(off + FRAME_BYTES, mulaw.length));
1263
- const frame = { ...parsed, type: newType, delta: slice.toString("base64") };
1264
- handler(Buffer.from(JSON.stringify(frame)), ...rest);
1265
- }
1401
+ this.translateGaAudioDelta(parsed, handler, rest);
1266
1402
  return;
1267
1403
  }
1268
1404
  parsed.type = newType;
@@ -1291,6 +1427,7 @@ var init_openai_realtime_2 = __esm({
1291
1427
  sessionCreated = true;
1292
1428
  ws.send(JSON.stringify({ type: "session.update", session: this.buildGASessionConfig() }));
1293
1429
  } else if (msg.type === "session.updated") {
1430
+ this.warnIfOutputFormatUnexpected(msg);
1294
1431
  cleanup();
1295
1432
  resolve2();
1296
1433
  } else if (msg.type === "error") {
@@ -1396,6 +1533,10 @@ var init_openai_realtime_2 = __esm({
1396
1533
  };
1397
1534
  const timer = setTimeout(() => {
1398
1535
  cleanup();
1536
+ try {
1537
+ ws.close();
1538
+ } catch {
1539
+ }
1399
1540
  reject(new Error("OpenAI Realtime 2 park connect timeout"));
1400
1541
  }, 8e3);
1401
1542
  ws.on("message", onMessage);
@@ -1443,8 +1584,12 @@ var init_openai_realtime_2 = __esm({
1443
1584
  const parsed = JSON.parse(text);
1444
1585
  const t = parsed.type;
1445
1586
  if (t && Object.prototype.hasOwnProperty.call(GA_TO_V1_EVENT_NAMES, t)) {
1587
+ if (t === "response.output_audio.delta" && typeof parsed.delta === "string") {
1588
+ this.translateGaAudioDelta(parsed, handler, rest);
1589
+ return;
1590
+ }
1446
1591
  parsed.type = GA_TO_V1_EVENT_NAMES[t];
1447
- handler(JSON.stringify(parsed), ...rest);
1592
+ handler(Buffer.from(JSON.stringify(parsed)), ...rest);
1448
1593
  return;
1449
1594
  }
1450
1595
  } catch {
@@ -1529,6 +1674,55 @@ var init_openai_realtime_2 = __esm({
1529
1674
  }
1530
1675
  return out;
1531
1676
  }
1677
+ /**
1678
+ * Log-only safety net for issue #154. The GA server echoes the *effective*
1679
+ * session config in `session.updated`; we request `audio/pcm` @ 24 kHz and
1680
+ * transcode PCM24→mulaw8 ourselves (see
1681
+ * `transcodeOutboundPcm24ToMulaw8Buffer`). If a future GA schema change ever
1682
+ * made the server return a different output format, that transcode — which
1683
+ * assumes PCM16-LE @ 24 kHz — would silently corrupt audio, exactly the
1684
+ * v1-beta failure mode #154 fixed. Warn so the drift surfaces in logs instead
1685
+ * of as static. Never gates audio.
1686
+ */
1687
+ warnIfOutputFormatUnexpected(msg) {
1688
+ const fmt = msg?.session?.audio?.output?.format;
1689
+ if (!fmt || typeof fmt !== "object") return;
1690
+ if (fmt.type !== "audio/pcm" || fmt.rate != null && fmt.rate !== 24e3) {
1691
+ getLogger().warn(
1692
+ `OpenAI Realtime 2: server-echoed output format ${JSON.stringify(fmt)} differs from the requested audio/pcm@24000 \u2014 the outbound PCM24\u2192mulaw8 transcode assumes PCM16-LE 24 kHz, so carrier audio may be garbled (issue #154). Informational only; audio is not gated on this.`
1693
+ );
1694
+ }
1695
+ }
1696
+ /**
1697
+ * Shared audio-delta translation helper. Transcodes a GA
1698
+ * `response.output_audio.delta` payload (base64 PCM-16-LE 24 kHz)
1699
+ * into mulaw 8 kHz and splits the result into 160-byte (20 ms) frames,
1700
+ * dispatching one synthetic `response.audio.delta` event per frame.
1701
+ *
1702
+ * Called from BOTH the `connect()` shim and the `adoptWebSocket()` shim
1703
+ * so that warm-path (prewarm/adopted) calls receive identical transcoding
1704
+ * to cold-path calls. Without this, adopted sockets forwarded raw PCM-24
1705
+ * to Twilio/Telnyx, producing garbled or silent audio on every warm call.
1706
+ *
1707
+ * @param parsed - The parsed GA event object (type already checked to be
1708
+ * `response.output_audio.delta` with a string `delta`).
1709
+ * @param handler - The downstream message listener to dispatch each frame to.
1710
+ * @param rest - Extra arguments forwarded from the original `message` event.
1711
+ * @returns `true` if frames were dispatched (caller should return early),
1712
+ * `false` if the resampler is still warming up (zero output bytes).
1713
+ */
1714
+ translateGaAudioDelta(parsed, handler, rest) {
1715
+ const newType = GA_TO_V1_EVENT_NAMES["response.output_audio.delta"];
1716
+ const mulaw = this.transcodeOutboundPcm24ToMulaw8Buffer(parsed.delta);
1717
+ const FRAME_BYTES = 160;
1718
+ if (mulaw.length === 0) return false;
1719
+ for (let off = 0; off < mulaw.length; off += FRAME_BYTES) {
1720
+ const slice = mulaw.subarray(off, Math.min(off + FRAME_BYTES, mulaw.length));
1721
+ const frame = { ...parsed, type: newType, delta: slice.toString("base64") };
1722
+ handler(Buffer.from(JSON.stringify(frame)), ...rest);
1723
+ }
1724
+ return true;
1725
+ }
1532
1726
  /**
1533
1727
  * Base64 PCM-16-LE 24 kHz → Base64 mulaw 8 kHz. Used by the WS
1534
1728
  * translation shim on each `response.output_audio.delta`. The stateful
@@ -1558,6 +1752,34 @@ var init_openai_realtime_2 = __esm({
1558
1752
  }
1559
1753
  this.ws?.send(JSON.stringify({ type: "response.create", response: responseBody }));
1560
1754
  }
1755
+ /**
1756
+ * Speak a short reassurance filler WITHOUT injecting a `role:user` turn.
1757
+ *
1758
+ * GA-shape sibling of {@link sendFirstMessage} (and override of the base v1
1759
+ * {@link OpenAIRealtimeAdapter.sendReassurance}): a bare `response.create`
1760
+ * carrying explicit `instructions` so the filler is the assistant's own
1761
+ * in-band audio. No `conversation.item.create` with `role:"user"` is
1762
+ * emitted, so the transcript shows no phantom caller line. The GA endpoint
1763
+ * rejects `response.modalities` and does not inherit `audio.output.voice`
1764
+ * for an explicit `response.create`, so — exactly as in
1765
+ * {@link sendFirstMessage} — we send `output_modalities` and re-inject the
1766
+ * voice. Fillers must not imply success or failure.
1767
+ *
1768
+ * Mirrors Python `OpenAIRealtime2Adapter.send_reassurance` in
1769
+ * `providers/openai_realtime_2.py`.
1770
+ */
1771
+ async sendReassurance(text) {
1772
+ if (!this.ws) return;
1773
+ const responseBody = {
1774
+ output_modalities: ["audio"],
1775
+ audio: { output: { voice: this.voice } },
1776
+ instructions: `Say exactly this and nothing else: "${text}"`
1777
+ };
1778
+ if (this.options.reasoningEffort !== void 0) {
1779
+ responseBody.reasoning = { effort: this.options.reasoningEffort };
1780
+ }
1781
+ this.ws.send(JSON.stringify({ type: "response.create", response: responseBody }));
1782
+ }
1561
1783
  };
1562
1784
  }
1563
1785
  });
@@ -2345,11 +2567,25 @@ function calculateRealtimeCachedSavings(usage, pricing, model) {
2345
2567
  const rates = resolveProviderRates(pricing.openai_realtime, model);
2346
2568
  if (rates.unit !== "token") return 0;
2347
2569
  const input = usage.input_token_details ?? {};
2348
- const cached2 = input.cached_tokens_details ?? {};
2349
2570
  const cachedAudioRate = rates.cached_audio_input_per_token ?? rates.audio_input_per_token ?? 0;
2350
2571
  const cachedTextRate = rates.cached_text_input_per_token ?? rates.text_input_per_token ?? 0;
2351
- const cachedAudio = Math.min(cached2.audio_tokens ?? 0, input.audio_tokens ?? 0);
2352
- const cachedText = Math.min(cached2.text_tokens ?? 0, input.text_tokens ?? 0);
2572
+ const totalAudio = input.audio_tokens ?? 0;
2573
+ const totalText = input.text_tokens ?? 0;
2574
+ let cachedAudio;
2575
+ let cachedText;
2576
+ const details = input.cached_tokens_details;
2577
+ if (details && (details.audio_tokens !== void 0 || details.text_tokens !== void 0)) {
2578
+ cachedAudio = Math.min(details.audio_tokens ?? 0, totalAudio);
2579
+ cachedText = Math.min(details.text_tokens ?? 0, totalText);
2580
+ } else if (input.cached_tokens && input.cached_tokens > 0) {
2581
+ const totalIn = totalAudio + totalText;
2582
+ const ratio = totalIn > 0 ? input.cached_tokens / totalIn : 0;
2583
+ cachedAudio = Math.min(Math.round(totalAudio * ratio), totalAudio);
2584
+ cachedText = Math.min(Math.round(totalText * ratio), totalText);
2585
+ } else {
2586
+ cachedAudio = 0;
2587
+ cachedText = 0;
2588
+ }
2353
2589
  const fullAudio = cachedAudio * (rates.audio_input_per_token ?? 0);
2354
2590
  const fullText = cachedText * (rates.text_input_per_token ?? 0);
2355
2591
  const discountedAudio = cachedAudio * cachedAudioRate;
@@ -2797,8 +3033,8 @@ function loadTranscriptJsonl(filePath) {
2797
3033
  } catch {
2798
3034
  continue;
2799
3035
  }
2800
- const tsIso = typeof row.ts === "string" ? Date.parse(row.ts) : NaN;
2801
- const tsNumeric = typeof row.timestamp === "number" ? row.timestamp * 1e3 : NaN;
3036
+ const tsIso = typeof row.ts === "string" ? Date.parse(row.ts) / 1e3 : NaN;
3037
+ const tsNumeric = typeof row.timestamp === "number" ? row.timestamp : NaN;
2802
3038
  const timestamp = Number.isFinite(tsIso) ? tsIso : Number.isFinite(tsNumeric) ? tsNumeric : 0;
2803
3039
  const userText = typeof row.user_text === "string" ? row.user_text : "";
2804
3040
  const agentText = typeof row.agent_text === "string" ? row.agent_text : "";
@@ -2956,14 +3192,49 @@ var init_store = __esm({
2956
3192
  } else {
2957
3193
  for (let i = this.calls.length - 1; i >= 0; i--) {
2958
3194
  if (this.calls[i].call_id === callId) {
2959
- this.calls[i].status = status;
2960
- Object.assign(this.calls[i], extra);
3195
+ this.calls[i] = { ...this.calls[i], status, ...extra };
2961
3196
  break;
2962
3197
  }
2963
3198
  }
2964
3199
  }
2965
3200
  this.publish("call_status", { call_id: callId, status, ...extra });
2966
3201
  }
3202
+ /**
3203
+ * Record a single transcript line (user/assistant) as it becomes known.
3204
+ *
3205
+ * FIX-5 (issue #154): the live forward path for the dashboard transcript.
3206
+ * The Realtime stream handler calls this the moment each line is known — the
3207
+ * user line right after the hallucination filter accepts it, the assistant
3208
+ * line when its turn flushes — keyed by the monotonic ``turnIndex`` reserved
3209
+ * at turn-open (``reserveTurnIndex``). Each line is appended to the active
3210
+ * call's ``transcript`` array and broadcast over SSE as a ``transcript_line``
3211
+ * event so the dashboard can render lines as they arrive and re-sort by
3212
+ * ``(turnIndex, user<assistant)`` — making a late-arriving user line land
3213
+ * ABOVE its agent line. ``recordTurn`` de-dups against the lines pushed here
3214
+ * by ``(turnIndex, role)`` so the metrics path never double-pushes the same
3215
+ * text. Parity with Python ``record_transcript_line``.
3216
+ */
3217
+ recordTranscriptLine(data) {
3218
+ const callId = data.call_id || "";
3219
+ const { role, text, turnIndex } = data;
3220
+ if (!callId || role !== "user" && role !== "assistant" || !text) return;
3221
+ const active = this.activeCalls.get(callId);
3222
+ if (active) {
3223
+ if (!active.transcript) active.transcript = [];
3224
+ active.transcript.push({
3225
+ role,
3226
+ text,
3227
+ timestamp: Date.now() / 1e3,
3228
+ turnIndex
3229
+ });
3230
+ }
3231
+ this.publish("transcript_line", {
3232
+ call_id: callId,
3233
+ turnIndex,
3234
+ role,
3235
+ text
3236
+ });
3237
+ }
2967
3238
  /** Append a single conversation turn to an active call and broadcast it via SSE. */
2968
3239
  recordTurn(data) {
2969
3240
  const callId = data.call_id || "";
@@ -2978,14 +3249,19 @@ var init_store = __esm({
2978
3249
  const userText = typeof turnRecord.user_text === "string" ? turnRecord.user_text : "";
2979
3250
  const agentText = typeof turnRecord.agent_text === "string" ? turnRecord.agent_text : "";
2980
3251
  const ts = typeof turnRecord.timestamp === "number" ? turnRecord.timestamp : Date.now() / 1e3;
2981
- if (userText.length > 0) {
2982
- active.transcript.push({ role: "user", text: userText, timestamp: ts });
3252
+ const turnIndex = typeof turnRecord.turn_index === "number" ? turnRecord.turn_index : void 0;
3253
+ const alreadyLive = (role) => turnIndex !== void 0 && (active.transcript ?? []).some(
3254
+ (e) => e.turnIndex === turnIndex && e.role === role
3255
+ );
3256
+ if (userText.length > 0 && !alreadyLive("user")) {
3257
+ active.transcript.push({ role: "user", text: userText, timestamp: ts, turnIndex });
2983
3258
  }
2984
- if (agentText.length > 0 && agentText !== "[interrupted]") {
3259
+ if (agentText.length > 0 && agentText !== "[interrupted]" && !alreadyLive("assistant")) {
2985
3260
  active.transcript.push({
2986
3261
  role: "assistant",
2987
3262
  text: agentText,
2988
- timestamp: ts
3263
+ timestamp: ts,
3264
+ turnIndex
2989
3265
  });
2990
3266
  }
2991
3267
  }
@@ -3058,7 +3334,7 @@ var init_store = __esm({
3058
3334
  getCall(callId) {
3059
3335
  if (this.deletedCallIds.has(callId)) return null;
3060
3336
  for (let i = this.calls.length - 1; i >= 0; i--) {
3061
- if (this.calls[i].call_id === callId) return this.calls[i];
3337
+ if (this.calls[i].call_id === callId) return { ...this.calls[i] };
3062
3338
  }
3063
3339
  return null;
3064
3340
  }
@@ -3100,7 +3376,9 @@ var init_store = __esm({
3100
3376
  }
3101
3377
  if (accepted.length === 0) return [];
3102
3378
  accepted.sort();
3103
- this.persistDeletedIds();
3379
+ this.persistDeletedIds().catch(
3380
+ (err) => getLogger().debug(`MetricsStore.deleteCalls: persistDeletedIds failed: ${String(err)}`)
3381
+ );
3104
3382
  this.publish("calls_deleted", { call_ids: accepted });
3105
3383
  return accepted;
3106
3384
  }
@@ -3112,19 +3390,19 @@ var init_store = __esm({
3112
3390
  getDeletedCallIds() {
3113
3391
  return Array.from(this.deletedCallIds).sort();
3114
3392
  }
3115
- /** Atomically persist the deleted-ids set to disk. Best-effort. */
3116
- persistDeletedIds() {
3393
+ /** Atomically persist the deleted-ids set to disk. Best-effort async. */
3394
+ async persistDeletedIds() {
3117
3395
  if (this.deletedIdsPath === null) return;
3118
3396
  try {
3119
3397
  const dir = path2.dirname(this.deletedIdsPath);
3120
- fs2.mkdirSync(dir, { recursive: true });
3398
+ await fs2.promises.mkdir(dir, { recursive: true });
3121
3399
  const tmp = this.deletedIdsPath + ".tmp";
3122
3400
  const payload = {
3123
3401
  version: 1,
3124
3402
  deleted_call_ids: Array.from(this.deletedCallIds).sort()
3125
3403
  };
3126
- fs2.writeFileSync(tmp, JSON.stringify(payload, null, 2), "utf8");
3127
- fs2.renameSync(tmp, this.deletedIdsPath);
3404
+ await fs2.promises.writeFile(tmp, JSON.stringify(payload, null, 2), "utf8");
3405
+ await fs2.promises.rename(tmp, this.deletedIdsPath);
3128
3406
  } catch (err) {
3129
3407
  getLogger().debug(
3130
3408
  `MetricsStore.persistDeletedIds: ${String(err)}`
@@ -3133,7 +3411,8 @@ var init_store = __esm({
3133
3411
  }
3134
3412
  /** Look up an active call by id (returns undefined if not active or unknown). */
3135
3413
  getActive(callId) {
3136
- return this.activeCalls.get(callId);
3414
+ const rec = this.activeCalls.get(callId);
3415
+ return rec !== void 0 ? { ...rec } : void 0;
3137
3416
  }
3138
3417
  /** Return all currently active (not yet ended) calls. */
3139
3418
  getActiveCalls() {
@@ -3460,8 +3739,8 @@ function mountDashboard(app, store, token = "") {
3460
3739
  res.type("text/html").send(DASHBOARD_HTML);
3461
3740
  });
3462
3741
  app.get("/api/dashboard/calls", auth2, (req, res) => {
3463
- const limit = Math.min(parseInt(req.query.limit || "50", 10) || 50, 1e3);
3464
- const offset = parseInt(req.query.offset || "0", 10) || 0;
3742
+ const limit = Math.min(Math.max(0, parseInt(req.query.limit || "50", 10) || 50), 1e3);
3743
+ const offset = Math.max(0, parseInt(req.query.offset || "0", 10) || 0);
3465
3744
  res.json(store.getCalls(limit, offset));
3466
3745
  });
3467
3746
  app.get("/api/dashboard/calls/:callId", auth2, (req, res) => {
@@ -3551,8 +3830,8 @@ data: ${data}
3551
3830
  function mountApi(app, store, token = "") {
3552
3831
  const auth2 = makeAuthMiddleware(token);
3553
3832
  app.get("/api/v1/calls", auth2, (req, res) => {
3554
- const limit = Math.min(parseInt(req.query.limit || "50", 10) || 50, 1e3);
3555
- const offset = parseInt(req.query.offset || "0", 10) || 0;
3833
+ const limit = Math.min(Math.max(0, parseInt(req.query.limit || "50", 10) || 50), 1e3);
3834
+ const offset = Math.max(0, parseInt(req.query.offset || "0", 10) || 0);
3556
3835
  const calls = store.getCalls(limit, offset);
3557
3836
  res.json({
3558
3837
  data: calls,
@@ -3831,14 +4110,31 @@ var init_remote_message = __esm({
3831
4110
  while (chunks.length > 0) {
3832
4111
  yield chunks.shift();
3833
4112
  }
4113
+ const READ_TIMEOUT_MS = 3e4;
3834
4114
  while (!done && !error2) {
3835
- const text = await new Promise((resolve2) => {
4115
+ const messagePromise = new Promise((resolve2) => {
3836
4116
  if (chunks.length > 0) {
3837
4117
  resolve2(chunks.shift());
3838
4118
  } else {
3839
4119
  resolveNext = resolve2;
3840
4120
  }
3841
4121
  });
4122
+ let timeoutHandle;
4123
+ const timeoutPromise = new Promise((_, reject) => {
4124
+ timeoutHandle = setTimeout(
4125
+ () => reject(new Error("WebSocket read timeout: no frame received within 30 s")),
4126
+ READ_TIMEOUT_MS
4127
+ );
4128
+ });
4129
+ let text;
4130
+ try {
4131
+ text = await Promise.race([messagePromise, timeoutPromise]);
4132
+ } catch (timeoutErr) {
4133
+ resolveNext = null;
4134
+ throw timeoutErr;
4135
+ } finally {
4136
+ clearTimeout(timeoutHandle);
4137
+ }
3842
4138
  if (text === null) break;
3843
4139
  yield text;
3844
4140
  }
@@ -4080,18 +4376,6 @@ var init_deepgram_stt = __esm({
4080
4376
  } catch {
4081
4377
  return;
4082
4378
  }
4083
- const dataType = String(data.type ?? "unknown");
4084
- if (dataType === "Results") {
4085
- const transcript2 = (data.channel?.alternatives?.[0]?.transcript ?? "").trim();
4086
- const isFinal = Boolean(data.is_final);
4087
- const speechFinal2 = Boolean(data.speech_final);
4088
- const fromFinalize = Boolean(data.from_finalize);
4089
- getLogger().info(
4090
- `[DIAG] DG Results text=${JSON.stringify(transcript2.slice(0, 60))} isFinal=${isFinal} speechFinal=${speechFinal2} fromFinalize=${fromFinalize}`
4091
- );
4092
- } else if (dataType !== "Metadata") {
4093
- getLogger().info(`[DIAG] DG event type=${dataType}`);
4094
- }
4095
4379
  if (data.type === "Metadata" && data.request_id) {
4096
4380
  this.requestId = data.request_id;
4097
4381
  return;
@@ -4181,7 +4465,7 @@ var init_deepgram_stt = __esm({
4181
4465
  if (!this.ws || this.ws.readyState !== import_ws4.default.OPEN) {
4182
4466
  this.audioDroppedCount++;
4183
4467
  if (this.audioDroppedCount === 1 || this.audioDroppedCount % 50 === 0) {
4184
- getLogger().info(
4468
+ getLogger().debug(
4185
4469
  `[DIAG] DeepgramSTT.sendAudio dropped (ws state=${this.ws?.readyState ?? "null"}) \u2014 total dropped=${this.audioDroppedCount}`
4186
4470
  );
4187
4471
  }
@@ -4190,7 +4474,7 @@ var init_deepgram_stt = __esm({
4190
4474
  if (audio.length === 0) return;
4191
4475
  this.audioSentCount++;
4192
4476
  if (this.audioSentCount === 1 || this.audioSentCount % 100 === 0) {
4193
- getLogger().info(
4477
+ getLogger().debug(
4194
4478
  `[DIAG] DeepgramSTT.sendAudio: total chunks sent=${this.audioSentCount} (last=${audio.length} bytes)`
4195
4479
  );
4196
4480
  }
@@ -4228,16 +4512,16 @@ var init_deepgram_stt = __esm({
4228
4512
  finalize() {
4229
4513
  const ws = this.ws;
4230
4514
  if (!ws || ws.readyState !== import_ws4.default.OPEN) {
4231
- getLogger().info(
4515
+ getLogger().debug(
4232
4516
  `[DIAG] DeepgramSTT.finalize SKIPPED (ws state=${ws?.readyState ?? "null"})`
4233
4517
  );
4234
4518
  return;
4235
4519
  }
4236
4520
  try {
4237
4521
  ws.send(JSON.stringify({ type: "Finalize" }));
4238
- getLogger().info("[DIAG] DeepgramSTT.finalize sent {type:Finalize}");
4522
+ getLogger().debug("[DIAG] DeepgramSTT.finalize sent {type:Finalize}");
4239
4523
  } catch (err) {
4240
- getLogger().info(`[DIAG] DeepgramSTT.finalize send failed: ${String(err)}`);
4524
+ getLogger().debug(`[DIAG] DeepgramSTT.finalize send failed: ${String(err)}`);
4241
4525
  }
4242
4526
  }
4243
4527
  /** Send Finalize, briefly drain trailing transcripts, then close the socket. */
@@ -4317,6 +4601,7 @@ var init_metrics = __esm({
4317
4601
  _pricing;
4318
4602
  _callStart;
4319
4603
  _turns = [];
4604
+ // mutable internal array; immutable when exposed via TurnMetrics[] → readonly TurnMetrics[]
4320
4605
  // Per-turn timing state
4321
4606
  _turnStart = null;
4322
4607
  _sttComplete = null;
@@ -4403,6 +4688,16 @@ var init_metrics = __esm({
4403
4688
  * (the common cause of missing endpoint signals).
4404
4689
  */
4405
4690
  _endpointSignalMissingCount = 0;
4691
+ /**
4692
+ * Monotonic per-call turn counter. Reserved at turn OPEN
4693
+ * (``onAdapterSpeechStopped`` / ``speech_stopped``) via
4694
+ * ``reserveTurnIndex()`` and threaded through the buffering pipeline into
4695
+ * ``recordTurnComplete`` / ``recordTurnInterrupted`` as ``preReservedIndex``.
4696
+ * This makes ``turn_index`` stable under drops / interrupts (previously it
4697
+ * was assigned at completion as ``this._turns.length``, which shifted when a
4698
+ * turn was dropped). Parity with Python ``_next_turn_index``.
4699
+ */
4700
+ _nextTurnIndex = 0;
4406
4701
  constructor(opts) {
4407
4702
  this.callId = opts.callId;
4408
4703
  this.providerMode = opts.providerMode;
@@ -4451,12 +4746,27 @@ var init_metrics = __esm({
4451
4746
  this._turnUserText = "";
4452
4747
  this._turnSttAudioSeconds = 0;
4453
4748
  this._turnAlreadyClosed = false;
4749
+ this._initialTtfbEmitted = false;
4454
4750
  this._vadStoppedAt = null;
4455
4751
  this._sttFinalAt = null;
4456
4752
  this._turnCommittedAt = null;
4457
4753
  this._onUserTurnCompletedDelayMs = null;
4458
4754
  this._eventBus?.emit("turn_started", { callId: this.callId });
4459
4755
  }
4756
+ /**
4757
+ * Reserve and return the next monotonic turn index.
4758
+ *
4759
+ * Called once per turn at the moment the turn OPENS (Realtime:
4760
+ * ``onAdapterSpeechStopped``). The returned index is threaded through the
4761
+ * buffering pipeline and handed back to ``recordTurnComplete`` /
4762
+ * ``recordTurnInterrupted`` as ``preReservedIndex`` so the emitted
4763
+ * ``turn_index`` matches the live per-line transcript ordering even when a
4764
+ * turn is dropped or interrupted between open and close. Parity with Python
4765
+ * ``reserve_turn_index``.
4766
+ */
4767
+ reserveTurnIndex() {
4768
+ return this._nextTurnIndex++;
4769
+ }
4460
4770
  /**
4461
4771
  * Start a new turn only if no turn is currently open.
4462
4772
  * Use this at inbound-audio ingestion points so the turn timer begins
@@ -4494,6 +4804,7 @@ var init_metrics = __esm({
4494
4804
  anchorUserSpeechStart() {
4495
4805
  if (this._turnCommittedMono !== null) return;
4496
4806
  this._turnStart = hrTimeMs();
4807
+ this._turnAlreadyClosed = false;
4497
4808
  this._endpointSignalAt = null;
4498
4809
  this._vadStoppedAt = null;
4499
4810
  this._sttFinalAt = null;
@@ -4617,11 +4928,14 @@ var init_metrics = __esm({
4617
4928
  * ``user_text=''``. The caller treats ``null`` as "nothing to emit";
4618
4929
  * ``emitTurnMetrics`` is already null-safe.
4619
4930
  */
4620
- recordTurnComplete(agentText) {
4931
+ recordTurnComplete(agentText, preReservedIndex) {
4621
4932
  if (this._turnAlreadyClosed) return null;
4622
4933
  const latency = this._computeTurnLatency();
4623
4934
  const turn = {
4624
- turn_index: this._turns.length,
4935
+ // Use the pre-reserved index (stable across drops/interrupts) when the
4936
+ // caller threaded one through; otherwise fall back to the append
4937
+ // position for back-compat with callers that never reserved.
4938
+ turn_index: preReservedIndex ?? this._turns.length,
4625
4939
  user_text: this._turnUserText,
4626
4940
  agent_text: agentText,
4627
4941
  latency,
@@ -4630,10 +4944,10 @@ var init_metrics = __esm({
4630
4944
  timestamp: Date.now() / 1e3
4631
4945
  };
4632
4946
  this._turns.push(turn);
4633
- this._resetTurnState();
4634
- this._turnAlreadyClosed = true;
4635
4947
  this._eventBus?.emit("turn_ended", { callId: this.callId, turn });
4636
4948
  this._eventBus?.emit("metrics_collected", { callId: this.callId, turn });
4949
+ this._resetTurnState();
4950
+ this._turnAlreadyClosed = true;
4637
4951
  return turn;
4638
4952
  }
4639
4953
  /**
@@ -4645,12 +4959,12 @@ var init_metrics = __esm({
4645
4959
  * a future refactor that reorders the bargein + LLM-unwind paths)
4646
4960
  * from overwriting a turn that the complete path already emitted.
4647
4961
  */
4648
- recordTurnInterrupted() {
4962
+ recordTurnInterrupted(preReservedIndex) {
4649
4963
  if (this._turnStart === null) return null;
4650
4964
  if (this._turnAlreadyClosed) return null;
4651
4965
  const latency = this._computeTurnLatency();
4652
4966
  const turn = {
4653
- turn_index: this._turns.length,
4967
+ turn_index: preReservedIndex ?? this._turns.length,
4654
4968
  user_text: this._turnUserText,
4655
4969
  agent_text: "[interrupted]",
4656
4970
  latency,
@@ -4702,8 +5016,10 @@ var init_metrics = __esm({
4702
5016
  }
4703
5017
  /**
4704
5018
  * Record the delta (ms) between turn-committed and when on_user_turn_completed
4705
- * pipeline hook finished. Stored for inclusion in the next ``emitEouMetrics``
4706
- * call (or an explicit re-emit if desired).
5019
+ * pipeline hook finished. Does NOT re-emit: like Python's
5020
+ * ``record_on_user_turn_completed_delay``, this only stores the value; the
5021
+ * single EOU emission happens on ``recordTurnCommitted`` (3-timestamp guard,
5022
+ * delay defaults to 0 if not yet recorded).
4707
5023
  */
4708
5024
  recordOnUserTurnCompletedDelay(delayMs) {
4709
5025
  this._onUserTurnCompletedDelayMs = delayMs;
@@ -4716,7 +5032,7 @@ var init_metrics = __esm({
4716
5032
  * ``transcriptionDelay`` = turnCommitted − vadStopped (ms)
4717
5033
  * ``onUserTurnCompletedDelay`` = caller-supplied delta (ms) or 0
4718
5034
  */
4719
- /** Emit `EOUMetrics` once VAD-stop, STT-final, and turn-committed timestamps are all known. */
5035
+ /** Emit `EOUMetrics` once VAD-stop, STT-final, turn-committed, and on_user_turn_completed delay are all known. */
4720
5036
  emitEouMetrics() {
4721
5037
  if (this._vadStoppedAt === null || this._sttFinalAt === null || this._turnCommittedAt === null) {
4722
5038
  return;
@@ -5265,10 +5581,13 @@ var init_circuit_breaker = __esm({
5265
5581
  if (s.state === CircuitBreakerState.OPEN) {
5266
5582
  if (this.clock() - s.openedAt >= this.cooldownMs) {
5267
5583
  s.state = CircuitBreakerState.HALF_OPEN;
5584
+ s.probeInFlight = true;
5268
5585
  return true;
5269
5586
  }
5270
5587
  return false;
5271
5588
  }
5589
+ if (s.probeInFlight) return false;
5590
+ s.probeInFlight = true;
5272
5591
  return true;
5273
5592
  }
5274
5593
  /** Mark a successful execution. Resets the breaker to CLOSED. */
@@ -5278,19 +5597,21 @@ var init_circuit_breaker = __esm({
5278
5597
  s.state = CircuitBreakerState.CLOSED;
5279
5598
  s.consecutiveFailures = 0;
5280
5599
  s.openedAt = 0;
5600
+ s.probeInFlight = false;
5281
5601
  }
5282
5602
  /** Mark a failed execution; trips OPEN once threshold is reached. */
5283
5603
  recordFailure(toolName) {
5284
5604
  if (this.threshold <= 0) return;
5285
5605
  let s = this.state.get(toolName);
5286
5606
  if (!s) {
5287
- s = { state: CircuitBreakerState.CLOSED, consecutiveFailures: 0, openedAt: 0 };
5607
+ s = { state: CircuitBreakerState.CLOSED, consecutiveFailures: 0, openedAt: 0, probeInFlight: false };
5288
5608
  this.state.set(toolName, s);
5289
5609
  }
5290
5610
  s.consecutiveFailures += 1;
5291
5611
  if (s.consecutiveFailures >= this.threshold) {
5292
5612
  s.state = CircuitBreakerState.OPEN;
5293
5613
  s.openedAt = this.clock();
5614
+ s.probeInFlight = false;
5294
5615
  }
5295
5616
  }
5296
5617
  /**
@@ -5314,6 +5635,10 @@ var init_circuit_breaker = __esm({
5314
5635
  });
5315
5636
 
5316
5637
  // src/llm-loop.ts
5638
+ function resolveToolTimeoutMs(toolTimeoutMs, defaultMs) {
5639
+ if (toolTimeoutMs === void 0) return defaultMs;
5640
+ return Math.max(100, Math.min(toolTimeoutMs, MAX_TOOL_TIMEOUT_MS));
5641
+ }
5317
5642
  async function invokeHandler(handler, args, callContext, onProgress) {
5318
5643
  const invoked = handler(args, callContext);
5319
5644
  if (invoked && typeof invoked === "object" && typeof invoked[Symbol.asyncIterator] === "function" && typeof invoked.next === "function") {
@@ -5372,7 +5697,7 @@ function mergeAbortSignals(...signals) {
5372
5697
  }
5373
5698
  return controller.signal;
5374
5699
  }
5375
- var DEFAULT_TOOL_MAX_RETRIES, DEFAULT_TOOL_RETRY_DELAY_MS, DEFAULT_TOOL_TIMEOUT_MS, TOOL_MAX_RESPONSE_BYTES, DefaultToolExecutor, OpenAILLMProvider, DEFAULT_PHONE_PREAMBLE, LLMLoop;
5700
+ var DEFAULT_TOOL_MAX_RETRIES, DEFAULT_TOOL_RETRY_DELAY_MS, DEFAULT_TOOL_TIMEOUT_MS, MAX_TOOL_TIMEOUT_MS, TOOL_MAX_RESPONSE_BYTES, ToolTimeoutError, DefaultToolExecutor, OpenAILLMProvider, DEFAULT_PHONE_PREAMBLE, LLMLoop;
5376
5701
  var init_llm_loop = __esm({
5377
5702
  "src/llm-loop.ts"() {
5378
5703
  "use strict";
@@ -5385,7 +5710,14 @@ var init_llm_loop = __esm({
5385
5710
  DEFAULT_TOOL_MAX_RETRIES = 2;
5386
5711
  DEFAULT_TOOL_RETRY_DELAY_MS = 500;
5387
5712
  DEFAULT_TOOL_TIMEOUT_MS = 1e4;
5713
+ MAX_TOOL_TIMEOUT_MS = 3e5;
5388
5714
  TOOL_MAX_RESPONSE_BYTES = 1 * 1024 * 1024;
5715
+ ToolTimeoutError = class extends Error {
5716
+ constructor(message) {
5717
+ super(message);
5718
+ this.name = "ToolTimeoutError";
5719
+ }
5720
+ };
5389
5721
  DefaultToolExecutor = class {
5390
5722
  maxRetries;
5391
5723
  retryDelayMs;
@@ -5411,15 +5743,41 @@ var init_llm_loop = __esm({
5411
5743
  retry_after_ms: cooldown
5412
5744
  });
5413
5745
  }
5746
+ const effectiveTimeoutMs = resolveToolTimeoutMs(
5747
+ toolDef.timeoutMs,
5748
+ this.requestTimeoutMs
5749
+ );
5414
5750
  if (toolDef.handler) {
5415
5751
  const totalAttempts = this.maxRetries + 1;
5416
5752
  let lastErr = null;
5417
5753
  for (let attempt = 0; attempt < totalAttempts; attempt++) {
5754
+ let timeoutTimer;
5418
5755
  try {
5419
- const result = await invokeHandler(toolDef.handler, args, callContext, onProgress);
5756
+ const handlerPromise = invokeHandler(toolDef.handler, args, callContext, onProgress);
5757
+ const result = await Promise.race([
5758
+ handlerPromise,
5759
+ new Promise((_, reject) => {
5760
+ timeoutTimer = setTimeout(
5761
+ () => reject(
5762
+ new ToolTimeoutError(
5763
+ `Tool handler '${toolDef.name}' timed out after ${effectiveTimeoutMs}ms`
5764
+ )
5765
+ ),
5766
+ effectiveTimeoutMs
5767
+ );
5768
+ })
5769
+ ]);
5420
5770
  this.breaker.recordSuccess(toolDef.name);
5421
5771
  return result;
5422
5772
  } catch (e) {
5773
+ if (e instanceof ToolTimeoutError) {
5774
+ getLogger().error(String(e));
5775
+ this.breaker.recordFailure(toolDef.name);
5776
+ return JSON.stringify({
5777
+ error: String(e),
5778
+ fallback: true
5779
+ });
5780
+ }
5423
5781
  lastErr = e;
5424
5782
  if (attempt < totalAttempts - 1) {
5425
5783
  getLogger().warn(
@@ -5427,6 +5785,8 @@ var init_llm_loop = __esm({
5427
5785
  );
5428
5786
  await new Promise((r) => setTimeout(r, backoffDelayMs(this.retryDelayMs, attempt)));
5429
5787
  }
5788
+ } finally {
5789
+ if (timeoutTimer !== void 0) clearTimeout(timeoutTimer);
5430
5790
  }
5431
5791
  }
5432
5792
  this.breaker.recordFailure(toolDef.name);
@@ -5463,7 +5823,10 @@ var init_llm_loop = __esm({
5463
5823
  ...callContext,
5464
5824
  attempt: attempt + 1
5465
5825
  }),
5466
- signal: AbortSignal.timeout(this.requestTimeoutMs)
5826
+ // Use per-tool timeout when set, otherwise fall back to
5827
+ // the executor-level default. Mirrors Python's per-request
5828
+ // ``timeout=`` override on httpx.AsyncClient.post().
5829
+ signal: AbortSignal.timeout(effectiveTimeoutMs)
5467
5830
  });
5468
5831
  if (!resp.ok) throw new Error(`HTTP ${resp.status}`);
5469
5832
  const result = JSON.stringify(await resp.json());
@@ -5589,7 +5952,7 @@ var init_llm_loop = __esm({
5589
5952
  body.tools = tools;
5590
5953
  }
5591
5954
  const signal = mergeAbortSignals(opts?.signal, AbortSignal.timeout(3e4));
5592
- const response = await fetch("https://api.openai.com/v1/chat/completions", {
5955
+ const response = await fetch(`${this.baseUrl}/chat/completions`, {
5593
5956
  method: "POST",
5594
5957
  headers: {
5595
5958
  "Content-Type": "application/json",
@@ -5609,50 +5972,55 @@ var init_llm_loop = __esm({
5609
5972
  if (!reader) return;
5610
5973
  const decoder = new TextDecoder();
5611
5974
  let buffer = "";
5612
- while (true) {
5613
- const { done, value } = await reader.read();
5614
- if (done) break;
5615
- buffer += decoder.decode(value, { stream: true });
5616
- const lines = buffer.split("\n");
5617
- buffer = lines.pop() || "";
5618
- for (const line of lines) {
5619
- const trimmed = line.trim();
5620
- if (!trimmed || !trimmed.startsWith("data: ")) continue;
5621
- const data = trimmed.slice(6);
5622
- if (data === "[DONE]") continue;
5623
- let chunk;
5624
- try {
5625
- chunk = JSON.parse(data);
5626
- } catch {
5627
- continue;
5628
- }
5629
- if (chunk.usage) {
5630
- const cached2 = chunk.usage.prompt_tokens_details?.cached_tokens ?? 0;
5631
- const uncachedInput = Math.max(0, (chunk.usage.prompt_tokens ?? 0) - cached2);
5632
- yield {
5633
- type: "usage",
5634
- inputTokens: uncachedInput,
5635
- outputTokens: chunk.usage.completion_tokens,
5636
- cacheReadInputTokens: cached2
5637
- };
5638
- }
5639
- const delta = chunk.choices?.[0]?.delta;
5640
- if (!delta) continue;
5641
- if (delta.content) {
5642
- yield { type: "text", content: delta.content };
5643
- }
5644
- if (delta.tool_calls) {
5645
- for (const tc of delta.tool_calls) {
5975
+ try {
5976
+ while (true) {
5977
+ const { done, value } = await reader.read();
5978
+ if (done) break;
5979
+ buffer += decoder.decode(value, { stream: true });
5980
+ const lines = buffer.split("\n");
5981
+ buffer = lines.pop() || "";
5982
+ for (const line of lines) {
5983
+ const trimmed = line.trim();
5984
+ if (!trimmed || !trimmed.startsWith("data: ")) continue;
5985
+ const data = trimmed.slice(6);
5986
+ if (data === "[DONE]") continue;
5987
+ let chunk;
5988
+ try {
5989
+ chunk = JSON.parse(data);
5990
+ } catch {
5991
+ continue;
5992
+ }
5993
+ if (chunk.usage) {
5994
+ const cached2 = chunk.usage.prompt_tokens_details?.cached_tokens ?? 0;
5995
+ const uncachedInput = Math.max(0, (chunk.usage.prompt_tokens ?? 0) - cached2);
5646
5996
  yield {
5647
- type: "tool_call",
5648
- index: tc.index,
5649
- id: tc.id,
5650
- name: tc.function?.name,
5651
- arguments: tc.function?.arguments
5997
+ type: "usage",
5998
+ inputTokens: uncachedInput,
5999
+ outputTokens: chunk.usage.completion_tokens,
6000
+ cacheReadInputTokens: cached2
5652
6001
  };
5653
6002
  }
6003
+ const delta = chunk.choices?.[0]?.delta;
6004
+ if (!delta) continue;
6005
+ if (delta.content) {
6006
+ yield { type: "text", content: delta.content };
6007
+ }
6008
+ if (delta.tool_calls) {
6009
+ for (const tc of delta.tool_calls) {
6010
+ yield {
6011
+ type: "tool_call",
6012
+ index: tc.index,
6013
+ id: tc.id,
6014
+ name: tc.function?.name,
6015
+ arguments: tc.function?.arguments
6016
+ };
6017
+ }
6018
+ }
5654
6019
  }
5655
6020
  }
6021
+ } finally {
6022
+ reader.cancel().catch(() => {
6023
+ });
5656
6024
  }
5657
6025
  }
5658
6026
  };
@@ -5763,12 +6131,14 @@ ${systemPrompt}` : DEFAULT_PHONE_PREAMBLE;
5763
6131
  const hasAfterLlmResponse = Boolean(hookExecutor?.hasAfterLlmResponse() && hookCtx);
5764
6132
  const hasAfterLlmChunk = Boolean(hookExecutor?.hasAfterLlmChunk());
5765
6133
  const allEmittedText = [];
6134
+ const callId = callContext.call_id;
6135
+ const streamOpts = typeof callId === "string" && callId.length > 0 ? { ...opts, callId } : opts;
5766
6136
  for (let iter = 0; iter < maxIterations; iter++) {
5767
6137
  const toolCallsAccumulated = /* @__PURE__ */ new Map();
5768
6138
  const textParts = [];
5769
6139
  let hasToolCalls = false;
5770
6140
  let usageChunkReceived = false;
5771
- for await (const chunk of this.provider.stream(messages, this.openaiTools, opts)) {
6141
+ for await (const chunk of this.provider.stream(messages, this.openaiTools, streamOpts)) {
5772
6142
  if (chunk.type === "text" && chunk.content) {
5773
6143
  const content = hasAfterLlmChunk && hookExecutor ? hookExecutor.runAfterLlmChunk(chunk.content) : chunk.content;
5774
6144
  textParts.push(content);
@@ -5786,7 +6156,7 @@ ${systemPrompt}` : DEFAULT_PHONE_PREAMBLE;
5786
6156
  chunk.inputTokens ?? 0,
5787
6157
  chunk.outputTokens ?? 0,
5788
6158
  chunk.cacheReadInputTokens ?? 0,
5789
- chunk.cacheCreationInputTokens ?? 0
6159
+ chunk.cacheWriteInputTokens ?? 0
5790
6160
  );
5791
6161
  } else if (chunk.type === "tool_call") {
5792
6162
  hasToolCalls = true;
@@ -6180,8 +6550,8 @@ function getElementAtPath(obj, path6) {
6180
6550
  }
6181
6551
  function promiseAllObject(promisesObj) {
6182
6552
  const keys = Object.keys(promisesObj);
6183
- const promises = keys.map((key) => promisesObj[key]);
6184
- return Promise.all(promises).then((results) => {
6553
+ const promises2 = keys.map((key) => promisesObj[key]);
6554
+ return Promise.all(promises2).then((results) => {
6185
6555
  const resolvedObj = {};
6186
6556
  for (let i = 0; i < keys.length; i++) {
6187
6557
  resolvedObj[keys[i]] = results[i];
@@ -23972,6 +24342,8 @@ var init_mcp_client = __esm({
23972
24342
  "use strict";
23973
24343
  init_cjs_shims();
23974
24344
  init_logger();
24345
+ init_server();
24346
+ init_version();
23975
24347
  MCPManager = class {
23976
24348
  configs;
23977
24349
  connected = [];
@@ -23997,10 +24369,16 @@ var init_mcp_client = __esm({
23997
24369
  }
23998
24370
  const aggregatedTools = [];
23999
24371
  for (const cfg of this.configs) {
24372
+ try {
24373
+ validateWebhookUrl(cfg.url);
24374
+ } catch (e) {
24375
+ getLogger().error(`MCP server '${cfg.name}' (${cfg.url}) rejected by SSRF guard: ${String(e)}`);
24376
+ continue;
24377
+ }
24000
24378
  const transport = new transportModule.StreamableHTTPClientTransport(new URL(cfg.url), {
24001
24379
  requestInit: { headers: cfg.headers }
24002
24380
  });
24003
- const client = new mcpModule.Client({ name: "patter", version: "0.6.0" });
24381
+ const client = new mcpModule.Client({ name: "patter", version: VERSION });
24004
24382
  try {
24005
24383
  await client.connect(transport);
24006
24384
  } catch (e) {
@@ -24074,6 +24452,276 @@ var init_mcp_client = __esm({
24074
24452
  }
24075
24453
  });
24076
24454
 
24455
+ // src/consult.ts
24456
+ function isLoopbackOrPrivateHost(baseUrl) {
24457
+ let host;
24458
+ try {
24459
+ host = new URL(baseUrl).hostname.toLowerCase();
24460
+ } catch {
24461
+ return false;
24462
+ }
24463
+ if (host.startsWith("[") && host.endsWith("]")) host = host.slice(1, -1);
24464
+ if (host === "localhost" || host === "0.0.0.0" || host === "::1") return true;
24465
+ if (host.endsWith(".local")) return true;
24466
+ if (/^127\./.test(host) || /^10\./.test(host) || /^192\.168\./.test(host)) return true;
24467
+ if (/^169\.254\./.test(host)) return true;
24468
+ const m = host.match(/^172\.(\d+)\./);
24469
+ if (m) {
24470
+ const octet = Number(m[1]);
24471
+ if (octet >= 16 && octet <= 31) return true;
24472
+ }
24473
+ if (host.includes(":") && (/^f[cd][0-9a-f]{2}:/.test(host) || /^fe[89ab][0-9a-f]:/.test(host))) {
24474
+ return true;
24475
+ }
24476
+ return false;
24477
+ }
24478
+ function openclawConsult(agent, opts = {}) {
24479
+ if (!agent || !OPENCLAW_AGENT_RE.test(agent)) {
24480
+ throw new Error(
24481
+ "OpenClaw agent must be a non-empty id of letters, digits, and ._:/- only"
24482
+ );
24483
+ }
24484
+ const baseUrl = opts.baseUrl ?? OPENCLAW_DEFAULT_BASE_URL;
24485
+ const model = agent.includes("/") || agent.includes(":") ? agent : `openclaw/${agent}`;
24486
+ return {
24487
+ openaiCompatible: {
24488
+ baseUrl,
24489
+ model,
24490
+ apiKey: opts.apiKey,
24491
+ apiKeyEnv: OPENCLAW_API_KEY_ENV,
24492
+ sessionHeader: OPENCLAW_SESSION_HEADER
24493
+ },
24494
+ timeoutMs: opts.timeoutMs ?? DEFAULT_TIMEOUT_MS,
24495
+ toolName: opts.toolName ?? DEFAULT_TOOL_NAME,
24496
+ description: opts.description ?? OPENCLAW_DESCRIPTION,
24497
+ reassurance: opts.reassurance ?? OPENCLAW_REASSURANCE,
24498
+ headers: opts.headers,
24499
+ allowLoopback: opts.allowLoopback ?? isLoopbackOrPrivateHost(baseUrl)
24500
+ };
24501
+ }
24502
+ function buildConsultTool(config2) {
24503
+ const hasUrl = config2.url != null;
24504
+ const hasOpenAI = config2.openaiCompatible != null;
24505
+ if (hasUrl === hasOpenAI) {
24506
+ throw new Error("ConsultConfig requires exactly one of url or openaiCompatible");
24507
+ }
24508
+ const timeoutMs = config2.timeoutMs ?? DEFAULT_TIMEOUT_MS;
24509
+ const baseHeaders = {
24510
+ ...config2.headers ?? {},
24511
+ "Content-Type": "application/json"
24512
+ };
24513
+ const handler = hasOpenAI ? buildOpenAIHandler(config2.openaiCompatible, baseHeaders, timeoutMs, config2.allowLoopback ?? false) : buildWebhookHandler(config2.url, baseHeaders, timeoutMs, config2.allowLoopback ?? false);
24514
+ const tool2 = {
24515
+ name: config2.toolName ?? DEFAULT_TOOL_NAME,
24516
+ description: config2.description ?? DEFAULT_DESCRIPTION,
24517
+ parameters: PARAMETERS,
24518
+ handler
24519
+ };
24520
+ return config2.reassurance != null ? { ...tool2, reassurance: config2.reassurance } : tool2;
24521
+ }
24522
+ function buildWebhookHandler(url2, headers, timeoutMs, allowLoopback) {
24523
+ validateWebhookUrl(url2, allowLoopback);
24524
+ return async (args, context) => {
24525
+ const requestText = typeof args?.request === "string" ? args.request : "";
24526
+ const payload = {
24527
+ request: requestText,
24528
+ call_id: context?.call_id ?? "",
24529
+ caller: context?.caller ?? "",
24530
+ callee: context?.callee ?? ""
24531
+ };
24532
+ let body;
24533
+ try {
24534
+ const resp = await fetch(url2, {
24535
+ method: "POST",
24536
+ headers,
24537
+ body: JSON.stringify(payload),
24538
+ signal: AbortSignal.timeout(timeoutMs)
24539
+ });
24540
+ if (!resp.ok) {
24541
+ getLogger().warn(`consult tool: orchestrator returned HTTP ${resp.status}`);
24542
+ return GRACEFUL_FALLBACK;
24543
+ }
24544
+ body = (await resp.text()).slice(0, MAX_RESPONSE_CHARS);
24545
+ } catch (e) {
24546
+ getLogger().warn(
24547
+ `consult tool: orchestrator call failed: ${e instanceof Error ? e.name : "error"}`
24548
+ );
24549
+ return GRACEFUL_FALLBACK;
24550
+ }
24551
+ try {
24552
+ const data = JSON.parse(body);
24553
+ if (data && typeof data === "object" && !Array.isArray(data)) {
24554
+ const obj = data;
24555
+ for (const key of REPLY_KEYS) {
24556
+ if (typeof obj[key] === "string") return obj[key];
24557
+ }
24558
+ }
24559
+ return JSON.stringify(data);
24560
+ } catch {
24561
+ return body;
24562
+ }
24563
+ };
24564
+ }
24565
+ function buildOpenAIHandler(oc, baseHeaders, timeoutMs, allowLoopback) {
24566
+ const endpoint = oc.baseUrl.replace(/\/+$/, "") + "/chat/completions";
24567
+ validateWebhookUrl(endpoint, allowLoopback);
24568
+ const apiKey = oc.apiKey ?? (oc.apiKeyEnv ? process.env[oc.apiKeyEnv] : void 0);
24569
+ const headers = { ...baseHeaders };
24570
+ if (apiKey) headers.Authorization = `Bearer ${apiKey}`;
24571
+ const sessionHeader = oc.sessionHeader;
24572
+ const model = oc.model;
24573
+ return async (args, context) => {
24574
+ const requestText = typeof args?.request === "string" ? args.request : "";
24575
+ const callId = context?.call_id ?? "";
24576
+ const caller = context?.caller ?? "";
24577
+ const callee = context?.callee ?? "";
24578
+ const contextLines = ["You are answering an inbound phone call relayed by a voice agent."];
24579
+ if (caller) contextLines.push(`Caller: ${caller}`);
24580
+ if (callee) contextLines.push(`Line dialed: ${callee}`);
24581
+ contextLines.push(
24582
+ "Reply concisely in a spoken, conversational style \u2014 it is read aloud to the caller."
24583
+ );
24584
+ const reqHeaders = { ...headers };
24585
+ if (sessionHeader && callId) reqHeaders[sessionHeader] = callId;
24586
+ const payload = {
24587
+ model,
24588
+ messages: [
24589
+ { role: "system", content: contextLines.join("\n") },
24590
+ { role: "user", content: requestText }
24591
+ ],
24592
+ stream: false
24593
+ };
24594
+ if (callId) payload.user = callId;
24595
+ try {
24596
+ const resp = await fetch(endpoint, {
24597
+ method: "POST",
24598
+ headers: reqHeaders,
24599
+ body: JSON.stringify(payload),
24600
+ signal: AbortSignal.timeout(timeoutMs)
24601
+ });
24602
+ if (resp.status === 404) {
24603
+ getLogger().warn(
24604
+ "consult tool: OpenAI-compatible endpoint returned 404 \u2014 is it enabled? (OpenClaw: set gateway.http.endpoints.chatCompletions.enabled = true)"
24605
+ );
24606
+ return GRACEFUL_FALLBACK;
24607
+ }
24608
+ if (!resp.ok) {
24609
+ getLogger().warn(`consult tool: openai-compatible returned HTTP ${resp.status}`);
24610
+ return GRACEFUL_FALLBACK;
24611
+ }
24612
+ const data = await resp.json();
24613
+ const content = data?.choices?.[0]?.message?.content;
24614
+ if (typeof content === "string" && content.trim()) {
24615
+ return content.trim().slice(0, MAX_RESPONSE_CHARS);
24616
+ }
24617
+ getLogger().warn("consult tool: response missing choices[0].message.content");
24618
+ return GRACEFUL_FALLBACK;
24619
+ } catch (e) {
24620
+ getLogger().warn(
24621
+ `consult tool: openai-compatible call failed: ${e instanceof Error ? e.name : "error"}`
24622
+ );
24623
+ return GRACEFUL_FALLBACK;
24624
+ }
24625
+ };
24626
+ }
24627
+ function buildPostCallRecord(data, includeTranscript) {
24628
+ const lines = [];
24629
+ const caller = data.caller;
24630
+ const callee = data.callee;
24631
+ if (caller) lines.push(`Caller: ${caller}`);
24632
+ if (callee) lines.push(`Line dialed: ${callee}`);
24633
+ const metrics = data.metrics;
24634
+ const duration3 = metrics?.durationSeconds ?? metrics?.duration_seconds;
24635
+ if (typeof duration3 === "number") lines.push(`Duration: ${Math.round(duration3)}s`);
24636
+ if (includeTranscript) {
24637
+ const entries = data.transcript ?? [];
24638
+ const rendered = entries.filter((e) => e && typeof e === "object").map((e) => `${e.role ?? "?"}: ${e.text ?? ""}`).join("\n");
24639
+ if (rendered) lines.push("Transcript:\n" + rendered.slice(0, POSTCALL_MAX_TRANSCRIPT_CHARS));
24640
+ }
24641
+ return lines.length ? lines.join("\n") : "(no call details available)";
24642
+ }
24643
+ function openclawPostCallNotifier(agent, opts = {}) {
24644
+ const cfg = openclawConsult(agent, {
24645
+ baseUrl: opts.baseUrl,
24646
+ apiKey: opts.apiKey,
24647
+ timeoutMs: opts.timeoutMs ?? DEFAULT_TIMEOUT_MS,
24648
+ allowLoopback: opts.allowLoopback
24649
+ });
24650
+ const oc = cfg.openaiCompatible;
24651
+ const endpoint = oc.baseUrl.replace(/\/+$/, "") + "/chat/completions";
24652
+ validateWebhookUrl(endpoint, cfg.allowLoopback ?? false);
24653
+ const apiKey = oc.apiKey ?? (oc.apiKeyEnv ? process.env[oc.apiKeyEnv] : void 0);
24654
+ const sessionHeader = oc.sessionHeader;
24655
+ const model = oc.model;
24656
+ const timeoutMs = cfg.timeoutMs ?? DEFAULT_TIMEOUT_MS;
24657
+ const includeTranscript = opts.includeTranscript ?? true;
24658
+ const instruction = opts.instruction ?? POSTCALL_INSTRUCTION;
24659
+ return async (data) => {
24660
+ const callId = (data ?? {}).call_id ?? "";
24661
+ const record2 = buildPostCallRecord(data ?? {}, includeTranscript);
24662
+ const headers = { "Content-Type": "application/json" };
24663
+ if (apiKey) headers.Authorization = `Bearer ${apiKey}`;
24664
+ if (sessionHeader && callId) headers[sessionHeader] = callId;
24665
+ const payload = {
24666
+ model,
24667
+ messages: [
24668
+ { role: "system", content: instruction },
24669
+ { role: "user", content: record2 }
24670
+ ],
24671
+ stream: false
24672
+ };
24673
+ if (callId) payload.user = callId;
24674
+ try {
24675
+ const resp = await fetch(endpoint, {
24676
+ method: "POST",
24677
+ headers,
24678
+ body: JSON.stringify(payload),
24679
+ signal: AbortSignal.timeout(timeoutMs)
24680
+ });
24681
+ if (!resp.ok) {
24682
+ getLogger().warn(`openclaw post-call notify: HTTP ${resp.status}`);
24683
+ }
24684
+ } catch (e) {
24685
+ getLogger().warn(
24686
+ `openclaw post-call notify failed: ${e instanceof Error ? e.name : "error"}`
24687
+ );
24688
+ }
24689
+ };
24690
+ }
24691
+ var DEFAULT_TIMEOUT_MS, DEFAULT_TOOL_NAME, DEFAULT_DESCRIPTION, MAX_RESPONSE_CHARS, REPLY_KEYS, GRACEFUL_FALLBACK, OPENCLAW_DEFAULT_BASE_URL, OPENCLAW_API_KEY_ENV, OPENCLAW_SESSION_HEADER, OPENCLAW_DESCRIPTION, OPENCLAW_REASSURANCE, OPENCLAW_AGENT_RE, PARAMETERS, POSTCALL_INSTRUCTION, POSTCALL_MAX_TRANSCRIPT_CHARS;
24692
+ var init_consult = __esm({
24693
+ "src/consult.ts"() {
24694
+ "use strict";
24695
+ init_cjs_shims();
24696
+ init_logger();
24697
+ init_server();
24698
+ DEFAULT_TIMEOUT_MS = 3e4;
24699
+ DEFAULT_TOOL_NAME = "consult_agent";
24700
+ DEFAULT_DESCRIPTION = "Consult your back-office agent for deeper reasoning, fresh information, or actions beyond this call. Use when the caller asks something you cannot answer directly.";
24701
+ MAX_RESPONSE_CHARS = 1e6;
24702
+ REPLY_KEYS = ["reply", "response", "text", "result", "answer", "message"];
24703
+ GRACEFUL_FALLBACK = "I wasn't able to reach the system to get that answer right now.";
24704
+ OPENCLAW_DEFAULT_BASE_URL = "http://127.0.0.1:18789/v1";
24705
+ OPENCLAW_API_KEY_ENV = "OPENCLAW_API_KEY";
24706
+ OPENCLAW_SESSION_HEADER = "x-openclaw-session-key";
24707
+ OPENCLAW_DESCRIPTION = "Consult your OpenClaw agent for anything account-specific \u2014 appointments, customer records, schedules, or actions in the back-office system. NEVER state an appointment time, customer detail, or schedule fact from your own memory; ALWAYS call this tool for those and read back what it returns.";
24708
+ OPENCLAW_REASSURANCE = "Let me check on that for you, one moment.";
24709
+ OPENCLAW_AGENT_RE = /^[A-Za-z0-9._:/-]+$/;
24710
+ PARAMETERS = {
24711
+ type: "object",
24712
+ properties: {
24713
+ request: {
24714
+ type: "string",
24715
+ description: "The question or task to send to your back-office agent for deeper reasoning, fresh information, or an action beyond this call. State it self-containedly \u2014 the dialog history is not forwarded with the consult."
24716
+ }
24717
+ },
24718
+ required: ["request"]
24719
+ };
24720
+ POSTCALL_INSTRUCTION = "A phone call handled by the voice agent has just ended. Here is the record of the call. Log it and follow up if anything needs action.";
24721
+ POSTCALL_MAX_TRANSCRIPT_CHARS = 12e3;
24722
+ }
24723
+ });
24724
+
24077
24725
  // src/sentence-chunker.ts
24078
24726
  function splitSentences(text, minSentenceLen = DEFAULT_MIN_SENTENCE_LEN) {
24079
24727
  const alphabets = "([A-Za-z])";
@@ -26616,6 +27264,8 @@ var init_silero_vad = __esm({
26616
27264
  speechThresholdDuration = 0;
26617
27265
  silenceThresholdDuration = 0;
26618
27266
  closed = false;
27267
+ /** Transitions produced in the current processFrame call but not yet returned. */
27268
+ eventQueue = [];
26619
27269
  /**
26620
27270
  * Load the Silero VAD model.
26621
27271
  * Throws if `onnxruntime-node` is not installed.
@@ -26741,22 +27391,21 @@ var init_silero_vad = __esm({
26741
27391
  );
26742
27392
  }
26743
27393
  if (pcmChunk.length === 0) {
26744
- return null;
27394
+ return this.eventQueue.shift() ?? null;
26745
27395
  }
26746
27396
  const numSamples = Math.floor(pcmChunk.length / 2);
26747
27397
  if (numSamples === 0) {
26748
- return null;
27398
+ return this.eventQueue.shift() ?? null;
26749
27399
  }
26750
27400
  const samples = new Float32Array(numSamples);
26751
27401
  for (let i = 0; i < numSamples; i++) {
26752
- samples[i] = pcmChunk.readInt16LE(i * 2) / 32767;
27402
+ samples[i] = pcmChunk.readInt16LE(i * 2) / 32768;
26753
27403
  }
26754
27404
  const merged = new Float32Array(this.pending.length + samples.length);
26755
27405
  merged.set(this.pending, 0);
26756
27406
  merged.set(samples, this.pending.length);
26757
27407
  this.pending = merged;
26758
27408
  const windowSize = this.model.windowSizeSamples;
26759
- let event = null;
26760
27409
  while (this.pending.length >= windowSize) {
26761
27410
  const window = this.pending.slice(0, windowSize);
26762
27411
  this.pending = this.pending.slice(windowSize);
@@ -26765,10 +27414,10 @@ var init_silero_vad = __esm({
26765
27414
  const windowDuration = windowSize / this.opts.sampleRate;
26766
27415
  const transition = this.advanceState(p, windowDuration);
26767
27416
  if (transition !== null) {
26768
- event = transition;
27417
+ this.eventQueue.push(transition);
26769
27418
  }
26770
27419
  }
26771
- return event;
27420
+ return this.eventQueue.shift() ?? null;
26772
27421
  }
26773
27422
  advanceState(p, windowDuration) {
26774
27423
  const opts = this.opts;
@@ -26823,6 +27472,7 @@ var init_silero_vad = __esm({
26823
27472
  this.pubSpeaking = false;
26824
27473
  this.speechThresholdDuration = 0;
26825
27474
  this.silenceThresholdDuration = 0;
27475
+ this.eventQueue = [];
26826
27476
  this.expFilter.reset();
26827
27477
  this.model.reset();
26828
27478
  }
@@ -27063,6 +27713,13 @@ var init_aec = __esm({
27063
27713
  });
27064
27714
 
27065
27715
  // src/stream-handler.ts
27716
+ function applyToolCallPreambles(prompt, knob) {
27717
+ if (!knob) return prompt;
27718
+ const block = typeof knob === "string" ? knob : DEFAULT_TOOL_CALL_PREAMBLE_BLOCK;
27719
+ return prompt ? `${block}
27720
+
27721
+ ${prompt}` : block;
27722
+ }
27066
27723
  function checkGuardrails(text, guardrails) {
27067
27724
  if (!guardrails) return null;
27068
27725
  for (const guard of guardrails) {
@@ -27119,6 +27776,13 @@ function augmentWithBuiltinHandoffTools(userTools, callbacks) {
27119
27776
  }
27120
27777
  return out;
27121
27778
  }
27779
+ function isSttHallucination(text) {
27780
+ const stripped = text.trim().toLowerCase().replace(/[.,!?;:…。!?\s]+$/u, "").trim();
27781
+ if (stripped === "") return true;
27782
+ if (HALLUCINATIONS.has(stripped)) return true;
27783
+ const pieces = stripped.split(/[.!?…。!?]+/u).map((p) => p.trim()).filter((p) => p.length > 0);
27784
+ return pieces.length > 1 && pieces.every((p) => HALLUCINATIONS.has(p));
27785
+ }
27122
27786
  async function queryDeepgramCost(metricsAcc, deepgramKey, deepgramRequestId) {
27123
27787
  try {
27124
27788
  const projResp = await fetch("https://api.deepgram.com/v1/projects", {
@@ -27149,7 +27813,7 @@ async function queryDeepgramCost(metricsAcc, deepgramKey, deepgramRequestId) {
27149
27813
  } catch {
27150
27814
  }
27151
27815
  }
27152
- var HALLUCINATIONS, StreamHandler;
27816
+ var DEFAULT_TOOL_CALL_PREAMBLE_BLOCK, HALLUCINATIONS, StreamHandler;
27153
27817
  var init_stream_handler = __esm({
27154
27818
  "src/stream-handler.ts"() {
27155
27819
  "use strict";
@@ -27167,39 +27831,96 @@ var init_stream_handler = __esm({
27167
27831
  init_mcp_client();
27168
27832
  init_logger();
27169
27833
  init_server();
27834
+ init_consult();
27170
27835
  init_sentence_chunker();
27171
27836
  init_pipeline_hooks();
27172
27837
  init_event_bus();
27173
27838
  init_tracing();
27839
+ DEFAULT_TOOL_CALL_PREAMBLE_BLOCK = `# Preambles
27840
+
27841
+ Use short preambles only when they help the user understand that work is happening. A preamble is one short spoken update describing the action you are about to take \u2014 not hidden reasoning, and never a claim about the result.
27842
+
27843
+ ## When to use a preamble
27844
+ Use a preamble when:
27845
+ - you are about to call a tool that may take noticeable time;
27846
+ - you need to reason through a multi-step request;
27847
+ - you are checking records, availability, account state, or policy details;
27848
+ - you are preparing an escalation or handoff;
27849
+ - silence would make the assistant feel unresponsive.
27850
+
27851
+ When a preamble is needed, output it immediately before the reasoning or tool call.
27852
+
27853
+ ## When to NOT use a preamble
27854
+ Do not use a preamble when:
27855
+ - the answer is direct and can be given immediately;
27856
+ - the user is only confirming, correcting, or declining something;
27857
+ - the audio is unclear and you need clarification instead;
27858
+ - the tool call is lightweight and the user would not benefit from an update.
27859
+
27860
+ ## Style
27861
+ - Keep it to one short sentence (two only before a high-impact action).
27862
+ - Vary the wording across turns; do not reuse the same opener.
27863
+ - Describe the action, not the internal reasoning.
27864
+ - Never imply success or failure before the tool returns.
27865
+
27866
+ Prefer:
27867
+ - "I'll check that order now."
27868
+ - "I'll look up your appointment details."
27869
+ - "I'll verify that before we make any changes."
27870
+ - "I'll check the policy and then give you the next step."
27871
+ - "I'll pull that up so we can make sure it's the right account."
27872
+
27873
+ Avoid:
27874
+ - "Let me think about that for a second."
27875
+ - "Please wait while I process your request."
27876
+ - "I'm going to use my tools now."
27877
+ - "Hmm..." / "One moment while I process that..."`;
27174
27878
  HALLUCINATIONS = /* @__PURE__ */ new Set([
27175
- "you",
27176
- "thank you",
27177
- "thanks",
27178
- "yeah",
27179
- "yes",
27180
- "no",
27181
- "okay",
27182
- "ok",
27183
- "uh",
27184
- "um",
27185
- "mmm",
27186
- "hmm",
27187
- ".",
27188
- "bye",
27189
- "right",
27190
- "cool",
27191
- // Whisper YouTube-caption hallucinations
27879
+ // Issue #154: the hallucination filter is now DISPLAY-ONLY — it no longer
27880
+ // gates response creation (the server drives the response on
27881
+ // ``input_audio_buffer.committed`` by default). Dropping a phrase here
27882
+ // therefore deletes the user's transcript line (recordSttComplete never
27883
+ // fires → empty user_text → dashboard skips the user line). So this set is
27884
+ // restricted to genuine NON-SPEECH artefacts that Whisper emits on
27885
+ // silence / TTS echo, NOT real conversational words. Standalone words like
27886
+ // 'yes', 'no', 'okay', 'right', 'you', 'thanks' were REMOVED — they are
27887
+ // legitimate user replies and must reach the transcript. Parity with
27888
+ // Python ``_STT_HALLUCINATIONS``.
27889
+ //
27890
+ // Whisper caption / training-set hallucinations. Whisper was trained heavily
27891
+ // on captioned video, so on silence / PSTN echo it falls back to the most
27892
+ // common caption credits + sign-offs. Curated from widely-reported
27893
+ // Whisper-on-silence outputs across the open-source ASR community.
27192
27894
  "thank you for watching",
27193
27895
  "thanks for watching",
27194
27896
  "thank you for watching!",
27195
27897
  "thanks for watching!",
27196
27898
  "thank you so much for watching",
27899
+ "thank you for watching please subscribe",
27900
+ "thanks for watching please subscribe",
27197
27901
  "thanks for listening",
27902
+ "we'll see you next time",
27903
+ "see you next time",
27904
+ "bye bye",
27198
27905
  "please subscribe",
27906
+ "please subscribe to my channel",
27907
+ "don't forget to subscribe",
27908
+ "like and subscribe",
27199
27909
  "subscribe",
27910
+ "subtitles by the amara.org community",
27911
+ "subtitles by the amara org community",
27912
+ "subtitles by",
27913
+ "transcribed by",
27914
+ "transcription by castingwords",
27915
+ "the end",
27916
+ // Music / sound markers.
27200
27917
  "music",
27201
27918
  "[music]",
27919
+ "piano music",
27920
+ "applause",
27921
+ "[applause]",
27202
27922
  "\u266A",
27923
+ // Silence markers.
27203
27924
  "[no audio]",
27204
27925
  "[silence]",
27205
27926
  "[blank_audio]",
@@ -27503,7 +28224,14 @@ var init_stream_handler = __esm({
27503
28224
  * barge-in armed during the audible tail. Tunable via env.
27504
28225
  */
27505
28226
  endSpeakingWithGrace() {
27506
- const grace = Number(process.env.PATTER_TTS_TAIL_GRACE_MS ?? 1500);
28227
+ const rawGrace = process.env.PATTER_TTS_TAIL_GRACE_MS;
28228
+ const parsedGrace = rawGrace !== void 0 ? Number(rawGrace) : NaN;
28229
+ const grace = rawGrace !== void 0 && Number.isFinite(parsedGrace) ? parsedGrace : 1500;
28230
+ if (rawGrace !== void 0 && !Number.isFinite(parsedGrace)) {
28231
+ getLogger().warn(
28232
+ `PATTER_TTS_TAIL_GRACE_MS="${rawGrace}" is not a valid number \u2014 using default 1500ms`
28233
+ );
28234
+ }
27507
28235
  if (grace > 0) {
27508
28236
  const gen = this.speakingGeneration;
27509
28237
  this.clearGraceTimer();
@@ -27597,6 +28325,14 @@ var init_stream_handler = __esm({
27597
28325
  `[DIAG] Flushed ${replayed} pre-barge-in frame(s) (~${replayed * 20} ms) to STT`
27598
28326
  );
27599
28327
  }
28328
+ /**
28329
+ * Per-call resolved tool list. Starts as ``null`` (falls back to
28330
+ * ``deps.agent.tools``). Populated by ``initMcpTools`` when MCP servers
28331
+ * are configured so discovered tools are merged in without mutating the
28332
+ * shared ``AgentOptions`` object. Code that needs the effective tool list
28333
+ * should read ``this.resolvedTools ?? this.deps.agent.tools``.
28334
+ */
28335
+ resolvedTools = null;
27600
28336
  llmLoop = null;
27601
28337
  /**
27602
28338
  * Per-call tool executor — provides retry-with-exponential-backoff and a
@@ -27640,6 +28376,17 @@ var init_stream_handler = __esm({
27640
28376
  userTranscriptPending = false;
27641
28377
  pendingAssistantTurn = null;
27642
28378
  pendingAssistantTimer = null;
28379
+ /**
28380
+ * Reserved monotonic turn index for the in-flight Realtime turn (issue
28381
+ * #154, fix 5/6). Reserved in ``onAdapterSpeechStopped`` via
28382
+ * ``metricsAcc.reserveTurnIndex()`` the moment the turn OPENS, then threaded
28383
+ * through to the live per-line transcript events (``recordTranscriptLine``)
28384
+ * and into ``recordTurnComplete`` / ``recordTurnInterrupted`` so the
28385
+ * dashboard can sort a late-arriving user line ABOVE its agent line by
28386
+ * ``(turnIndex, role)``. ``null`` until the first turn opens. Parity with
28387
+ * Python ``_current_turn_index``.
28388
+ */
28389
+ currentTurnIndex = null;
27643
28390
  /**
27644
28391
  * Hard cap on how long we wait for the user transcript before flushing
27645
28392
  * the buffered assistant turn alone. 3 s covers OpenAI Whisper's typical
@@ -27721,6 +28468,23 @@ var init_stream_handler = __esm({
27721
28468
  * streaming/regular LLM, WebSocket remote, Realtime response_done) so the
27722
28469
  * payload shape lives in one place.
27723
28470
  */
28471
+ /**
28472
+ * Emit a live per-line transcript event to the dashboard store (issue #154,
28473
+ * fix 5). Routed through a single helper so the call shape lives in one
28474
+ * place. ``recordTranscriptLine`` appends the line to the active call's
28475
+ * transcript and publishes a ``transcript_line`` SSE event; the dashboard
28476
+ * sorts by (turnIndex, user<assistant) so a late user line lands above its
28477
+ * agent line. No-op when no turn index has been reserved yet.
28478
+ */
28479
+ emitTranscriptLine(role, text) {
28480
+ if (this.currentTurnIndex === null) return;
28481
+ this.deps.metricsStore.recordTranscriptLine({
28482
+ call_id: this.callId,
28483
+ turnIndex: this.currentTurnIndex,
28484
+ role,
28485
+ text
28486
+ });
28487
+ }
27724
28488
  async emitTurnMetrics(turn) {
27725
28489
  if (turn == null) return;
27726
28490
  this.deps.metricsStore.recordTurn({ call_id: this.callId, turn });
@@ -27827,7 +28591,7 @@ var init_stream_handler = __esm({
27827
28591
  if (customParams.callee && !this.callee) this.callee = customParams.callee;
27828
28592
  const mode = this.deps.agent.engine ? `engine=${this.deps.agent.engine.kind ?? "unknown"}` : "pipeline";
27829
28593
  getLogger().info(
27830
- `Call started: ${callId} (${this.deps.bridge.label}, ${mode}, ${sanitizeLogValue(this.caller || "?")} \u2192 ${sanitizeLogValue(this.callee || "?")})`
28594
+ `Call started: ${callId} (${this.deps.bridge.label}, ${mode}, ${maskPhoneNumber(this.caller || "?")} \u2192 ${maskPhoneNumber(this.callee || "?")})`
27831
28595
  );
27832
28596
  if (Object.keys(customParams).length > 0) {
27833
28597
  getLogger().debug(`Custom params: ${sanitizeLogValue(JSON.stringify(customParams))}`);
@@ -27872,10 +28636,13 @@ var init_stream_handler = __esm({
27872
28636
  const resolvedPrompt = Object.keys(allVars).length > 0 ? this.deps.resolveVariables(this.deps.agent.systemPrompt, allVars) : this.deps.agent.systemPrompt;
27873
28637
  const provider2 = this.deps.agent.provider ?? "openai_realtime";
27874
28638
  await this.initMcpTools();
28639
+ this.injectConsultTool();
27875
28640
  if (provider2 === "pipeline") {
27876
28641
  await this.initPipeline(resolvedPrompt);
27877
28642
  } else {
27878
- await this.initRealtimeAdapter(resolvedPrompt);
28643
+ await this.initRealtimeAdapter(
28644
+ applyToolCallPreambles(resolvedPrompt, this.deps.agent.toolCallPreambles)
28645
+ );
27879
28646
  }
27880
28647
  }
27881
28648
  /**
@@ -27900,10 +28667,25 @@ var init_stream_handler = __esm({
27900
28667
  }
27901
28668
  if (discovered.length === 0) return;
27902
28669
  MCPManager.assertNoConflicts(this.deps.agent.tools, discovered);
27903
- const mutableAgent = this.deps.agent;
27904
- mutableAgent.tools = [...mutableAgent.tools ?? [], ...discovered];
28670
+ this.resolvedTools = [...this.deps.agent.tools ?? [], ...discovered];
27905
28671
  getLogger().info(`MCP: merged ${discovered.length} tool(s) into agent`);
27906
28672
  }
28673
+ /**
28674
+ * Merge the built-in ``consult`` tool into the per-call tool list when
28675
+ * ``agent.consult`` is set, mirroring {@link initMcpTools}: the shared
28676
+ * ``deps.agent`` is NOT mutated; the merged list is stored on
28677
+ * ``this.resolvedTools`` so ``buildAIAdapter`` (Realtime) and the pipeline
28678
+ * ``LLMLoop`` both see it. Idempotent — a no-op if a tool with the same name
28679
+ * is already present.
28680
+ */
28681
+ injectConsultTool() {
28682
+ const consult = this.deps.agent.consult;
28683
+ if (!consult) return;
28684
+ const consultTool = buildConsultTool(consult);
28685
+ const base = this.resolvedTools ?? (this.deps.agent.tools ?? []);
28686
+ if (base.some((t) => t.name === consultTool.name)) return;
28687
+ this.resolvedTools = [...base, consultTool];
28688
+ }
27907
28689
  /** Set the stream SID (Twilio only, called after parsing 'start' event). */
27908
28690
  /** Set the carrier-side stream id (Twilio `streamSid` / Telnyx stream identifier). */
27909
28691
  setStreamSid(sid) {
@@ -27923,8 +28705,12 @@ var init_stream_handler = __esm({
27923
28705
  if (activeVad && !this.vadDisabled) {
27924
28706
  try {
27925
28707
  const vadPromise = activeVad.processFrame(pcm16k, 16e3);
27926
- const timeoutPromise = new Promise((resolve2) => setTimeout(() => resolve2(null), 25));
28708
+ let vadTimeoutId;
28709
+ const timeoutPromise = new Promise((resolve2) => {
28710
+ vadTimeoutId = setTimeout(() => resolve2(null), 25);
28711
+ });
27927
28712
  const evt = await Promise.race([vadPromise, timeoutPromise]);
28713
+ clearTimeout(vadTimeoutId);
27928
28714
  if (evt) {
27929
28715
  getLogger().info(
27930
28716
  `[VAD] ${evt.type} agentSpeaking=${this.isSpeaking}`
@@ -27997,7 +28783,7 @@ var init_stream_handler = __esm({
27997
28783
  if ((this.deps.agent.bargeInThresholdMs ?? 300) === 0) return;
27998
28784
  }
27999
28785
  const hooks = this.deps.agent.hooks;
28000
- if (hooks) {
28786
+ if (hooks?.beforeSendToStt) {
28001
28787
  const hookExecutor = new PipelineHookExecutor(hooks);
28002
28788
  const hookCtx = this.buildHookContext();
28003
28789
  const processed = await hookExecutor.runBeforeSendToStt(pcm16k, hookCtx);
@@ -28423,7 +29209,7 @@ var init_stream_handler = __esm({
28423
29209
  }
28424
29210
  const providerModel = this.deps.agent.llm?.model ?? "";
28425
29211
  const augmentedTools = augmentWithBuiltinHandoffTools(
28426
- this.deps.agent.tools,
29212
+ this.resolvedTools ?? this.deps.agent.tools,
28427
29213
  {
28428
29214
  transferCall: (number4) => this.deps.bridge.transferCall(this.callId, number4),
28429
29215
  endCall: () => this.deps.bridge.endCall(this.callId, this.ws)
@@ -28447,7 +29233,7 @@ var init_stream_handler = __esm({
28447
29233
  let llmModel = this.deps.agent.model || "gpt-4o-mini";
28448
29234
  if (llmModel.includes("realtime")) llmModel = "gpt-4o-mini";
28449
29235
  const augmentedTools = augmentWithBuiltinHandoffTools(
28450
- this.deps.agent.tools,
29236
+ this.resolvedTools ?? this.deps.agent.tools,
28451
29237
  {
28452
29238
  transferCall: (number4) => this.deps.bridge.transferCall(this.callId, number4),
28453
29239
  endCall: () => this.deps.bridge.endCall(this.callId, this.ws)
@@ -28871,6 +29657,14 @@ var init_stream_handler = __esm({
28871
29657
  chunker.reset();
28872
29658
  getLogger().error(`LLM loop error (${label}):`, e);
28873
29659
  this.metricsAcc.recordTurnInterrupted();
29660
+ const fallback = this.deps.agent.llmErrorMessage;
29661
+ if (fallback && !ttsFirstByteSent.value && this.isSpeaking) {
29662
+ try {
29663
+ await this.synthesizeSentence(fallback, hookExecutor, hookCtx, ttsFirstByteSent);
29664
+ } catch (err) {
29665
+ getLogger().error(`llmErrorMessage fallback synthesis failed (${label}):`, err);
29666
+ }
29667
+ }
28874
29668
  }
28875
29669
  }
28876
29670
  this.metricsAcc.recordLlmComplete();
@@ -28971,7 +29765,7 @@ var init_stream_handler = __esm({
28971
29765
  // ---------------------------------------------------------------------------
28972
29766
  async initRealtimeAdapter(resolvedPrompt) {
28973
29767
  const label = this.deps.bridge.label;
28974
- this.adapter = this.deps.buildAIAdapter(resolvedPrompt);
29768
+ this.adapter = this.deps.buildAIAdapter(resolvedPrompt, this.resolvedTools ?? void 0);
28975
29769
  let parked;
28976
29770
  if (typeof this.deps.popPrewarmedConnections === "function") {
28977
29771
  try {
@@ -29044,6 +29838,7 @@ var init_stream_handler = __esm({
29044
29838
  response_done: async (eventData) => this.onAdapterResponseDone(eventData),
29045
29839
  speech_started: async () => this.onAdapterSpeechInterrupt(),
29046
29840
  interruption: async () => this.onAdapterSpeechInterrupt(),
29841
+ error: async (eventData) => this.onAdapterError(eventData),
29047
29842
  function_call: async (eventData) => {
29048
29843
  if (this.adapter instanceof OpenAIRealtimeAdapter) {
29049
29844
  await this.handleFunctionCall(eventData);
@@ -29130,21 +29925,31 @@ var init_stream_handler = __esm({
29130
29925
  if (!this.metricsAcc.turnActive) this.metricsAcc.startTurn();
29131
29926
  this.currentAgentText = "";
29132
29927
  this.responseAudioStarted = false;
29928
+ this.currentTurnIndex = this.metricsAcc.reserveTurnIndex();
29133
29929
  this.userTranscriptPending = true;
29134
29930
  await this.emitUserSpeechEnded();
29135
29931
  }
29136
29932
  async onAdapterTranscriptInput(inputText) {
29137
- const stripped = inputText.trim().toLowerCase();
29138
- if (HALLUCINATIONS.has(stripped) || stripped === "") {
29933
+ if (isSttHallucination(inputText)) {
29139
29934
  getLogger().debug(
29140
29935
  `Realtime transcript_input dropped (likely Whisper hallucination on silence/echo): ${sanitizeLogValue(inputText.slice(0, 60))}`
29141
29936
  );
29142
29937
  this.userTranscriptPending = false;
29938
+ if (this.pendingAssistantTurn !== null) {
29939
+ const buffered = this.pendingAssistantTurn;
29940
+ this.pendingAssistantTurn = null;
29941
+ if (this.pendingAssistantTimer) {
29942
+ clearTimeout(this.pendingAssistantTimer);
29943
+ this.pendingAssistantTimer = null;
29944
+ }
29945
+ await this.flushAssistantTurn(buffered);
29946
+ }
29143
29947
  return;
29144
29948
  }
29145
29949
  getLogger().debug(`User (${this.deps.bridge.label}): ${sanitizeLogValue(inputText)}`);
29146
29950
  this.history.push({ role: "user", text: inputText, timestamp: Date.now() });
29147
- if (this.adapter instanceof OpenAIRealtimeAdapter) {
29951
+ this.emitTranscriptLine("user", inputText);
29952
+ if (this.adapter instanceof OpenAIRealtimeAdapter && this.adapter.getGateResponseOnTranscript()) {
29148
29953
  void this.adapter.requestResponse().catch(
29149
29954
  (err) => getLogger().debug(`Realtime requestResponse failed: ${String(err)}`)
29150
29955
  );
@@ -29191,8 +29996,12 @@ var init_stream_handler = __esm({
29191
29996
  history: [...this.history.entries]
29192
29997
  });
29193
29998
  }
29999
+ const reservedIndex = this.currentTurnIndex;
30000
+ this.emitTranscriptLine("assistant", text);
29194
30001
  this.responseAudioStarted = false;
29195
- await this.emitTurnMetrics(this.metricsAcc.recordTurnComplete(text));
30002
+ await this.emitTurnMetrics(
30003
+ this.metricsAcc.recordTurnComplete(text, reservedIndex ?? void 0)
30004
+ );
29196
30005
  }
29197
30006
  /**
29198
30007
  * Push an assistant turn into history and fire `onTranscript` so host
@@ -29291,7 +30100,9 @@ var init_stream_handler = __esm({
29291
30100
  this.pendingAssistantTimer = null;
29292
30101
  this.userTranscriptPending = false;
29293
30102
  if (buffered !== null) {
29294
- void this.flushAssistantTurn(buffered);
30103
+ this.flushAssistantTurn(buffered).catch(
30104
+ (err) => getLogger().error("flushAssistantTurn (fallback timer) failed:", err)
30105
+ );
29295
30106
  }
29296
30107
  }, _StreamHandler.REALTIME_USER_TRANSCRIPT_WAIT_MS);
29297
30108
  this.responseAudioStarted = false;
@@ -29300,7 +30111,9 @@ var init_stream_handler = __esm({
29300
30111
  await this.flushAssistantTurn(text);
29301
30112
  }
29302
30113
  async onAdapterSpeechInterrupt() {
29303
- if (this.adapter instanceof OpenAIRealtimeAdapter) {
30114
+ const isEngine = this.adapter instanceof OpenAIRealtimeAdapter;
30115
+ const clientManaged = isEngine && this.adapter.getGateResponseOnTranscript();
30116
+ if (clientManaged) {
29304
30117
  const startedAt = this.adapter.currentResponseFirstAudioAt;
29305
30118
  if (startedAt !== null) {
29306
30119
  const elapsedMs = Date.now() - startedAt;
@@ -29313,12 +30126,20 @@ var init_stream_handler = __esm({
29313
30126
  }
29314
30127
  }
29315
30128
  this.deps.bridge.sendClear(this.ws, this.streamSid);
29316
- if (this.adapter instanceof OpenAIRealtimeAdapter) this.adapter.cancelResponse();
30129
+ if (clientManaged) {
30130
+ this.metricsAcc.recordBargeinDetected();
30131
+ this.adapter.cancelResponse();
30132
+ } else if (isEngine) {
30133
+ this.adapter.truncate();
30134
+ }
29317
30135
  this.metricsAcc.recordTurnInterrupted();
29318
30136
  if (this.responseAudioStarted) {
29319
30137
  await this.emitAgentSpeechEnded(true);
29320
30138
  }
29321
30139
  await this.emitUserSpeechStarted();
30140
+ if (clientManaged) {
30141
+ this.metricsAcc.anchorUserSpeechStart();
30142
+ }
29322
30143
  this.currentAgentText = "";
29323
30144
  this.responseAudioStarted = false;
29324
30145
  this.pendingAssistantTurn = null;
@@ -29328,6 +30149,28 @@ var init_stream_handler = __esm({
29328
30149
  }
29329
30150
  this.userTranscriptPending = false;
29330
30151
  }
30152
+ /**
30153
+ * Handle a Realtime ``error`` event (issue #154, fix 4).
30154
+ *
30155
+ * Both Realtime providers dispatch ``('error', …)`` for server-side errors,
30156
+ * non-normal socket closes, and socket errors, but the stream handler
30157
+ * previously had no entry for it in the dispatch table so these were
30158
+ * silently swallowed. We surface them at WARN level with ONLY the error
30159
+ * envelope fields (``type`` / ``code`` / ``message``) — never any audio or
30160
+ * transcript body, to avoid logging PII. The call is NOT terminated: the
30161
+ * provider decides whether to recover, and many of these (e.g. a transient
30162
+ * ``input_audio_buffer_commit_empty``) are non-fatal. Parity with the
30163
+ * Python ``elif ev_type == 'error'`` branches.
30164
+ */
30165
+ async onAdapterError(eventData) {
30166
+ const err = eventData ?? {};
30167
+ const type = typeof err.type === "string" ? err.type : "unknown";
30168
+ const code = typeof err.code === "string" ? err.code : "";
30169
+ const message = typeof err.message === "string" ? err.message : "";
30170
+ getLogger().warn(
30171
+ `Realtime error (${this.deps.bridge.label}) type=${type} code=${code} message=${sanitizeLogValue(message)}`
30172
+ );
30173
+ }
29331
30174
  /**
29332
30175
  * Emit a tool-invocation event into the transcript timeline. Pushes a
29333
30176
  * `role=tool` entry into `history` (so it appears in the dashboard
@@ -29395,7 +30238,8 @@ var init_stream_handler = __esm({
29395
30238
  }
29396
30239
  return;
29397
30240
  }
29398
- const toolDef = this.deps.agent.tools?.find((t) => t.name === fc.name);
30241
+ const effectiveTools = this.resolvedTools ?? this.deps.agent.tools;
30242
+ const toolDef = effectiveTools?.find((t) => t.name === fc.name);
29399
30243
  if (!toolDef) {
29400
30244
  getLogger().warn(`Realtime tool '${fc.name}' not found in agent.tools \u2014 skipping`);
29401
30245
  const result2 = JSON.stringify({ error: `Tool '${fc.name}' not registered`, fallback: true });
@@ -29418,7 +30262,8 @@ var init_stream_handler = __esm({
29418
30262
  if (msg && this.adapter instanceof OpenAIRealtimeAdapter) {
29419
30263
  const realtimeAdapter = this.adapter;
29420
30264
  reassuranceTimer = setTimeout(() => {
29421
- realtimeAdapter.sendText(msg).catch((e) => {
30265
+ const fire = typeof realtimeAdapter.sendReassurance === "function" ? realtimeAdapter.sendReassurance(msg) : realtimeAdapter.sendText(msg);
30266
+ fire.catch((e) => {
29422
30267
  getLogger().warn(`Reassurance message failed for tool '${fc.name}': ${String(e)}`);
29423
30268
  });
29424
30269
  }, afterMs);
@@ -29438,7 +30283,8 @@ var init_stream_handler = __esm({
29438
30283
  parsedArgs,
29439
30284
  {
29440
30285
  call_id: this.callId,
29441
- caller: this.caller
30286
+ caller: this.caller,
30287
+ callee: this.callee
29442
30288
  },
29443
30289
  onProgress
29444
30290
  );
@@ -29588,21 +30434,21 @@ async function appendJsonl(filePath, record2) {
29588
30434
  await import_node_fs2.promises.mkdir(path4.dirname(filePath), { recursive: true });
29589
30435
  await import_node_fs2.promises.appendFile(filePath, JSON.stringify(record2) + "\n", { encoding: "utf8" });
29590
30436
  }
29591
- function rmTree(target) {
30437
+ async function rmTreeAsync(target) {
29592
30438
  try {
29593
- for (const child of fs4.readdirSync(target)) {
30439
+ for (const child of await import_node_fs2.promises.readdir(target)) {
29594
30440
  const childPath = path4.join(target, child);
29595
- const stat = fs4.lstatSync(childPath);
30441
+ const stat = await import_node_fs2.promises.lstat(childPath);
29596
30442
  if (stat.isDirectory()) {
29597
- rmTree(childPath);
30443
+ await rmTreeAsync(childPath);
29598
30444
  } else {
29599
30445
  try {
29600
- fs4.unlinkSync(childPath);
30446
+ await import_node_fs2.promises.unlink(childPath);
29601
30447
  } catch {
29602
30448
  }
29603
30449
  }
29604
30450
  }
29605
- fs4.rmdirSync(target);
30451
+ await import_node_fs2.promises.rmdir(target);
29606
30452
  } catch {
29607
30453
  }
29608
30454
  }
@@ -29684,7 +30530,9 @@ var init_call_log = __esm({
29684
30530
  getLogger().warn(`call_log write failed (${sanitizeLogValue(callId)}): ${sanitizeLogValue(String(err))}`);
29685
30531
  }
29686
30532
  if (crypto5.randomBytes(1)[0] < 5) {
29687
- this.sweepOldDays();
30533
+ void this.sweepOldDays().catch(
30534
+ (e) => getLogger().debug(`call_log sweep failed: ${sanitizeLogValue(String(e))}`)
30535
+ );
29688
30536
  }
29689
30537
  }
29690
30538
  /** Append a single turn record to the call's `transcript.jsonl`. */
@@ -29759,23 +30607,27 @@ var init_call_log = __esm({
29759
30607
  }
29760
30608
  }
29761
30609
  // --- Retention ---------------------------------------------------------
29762
- sweepOldDays() {
30610
+ async sweepOldDays() {
29763
30611
  if (this.root === null) return;
29764
30612
  const days = retentionDays();
29765
30613
  if (days === 0) return;
29766
30614
  const cutoff = Date.now() / 1e3 - days * 86400;
29767
30615
  const callsRoot = path4.join(this.root, "calls");
29768
- if (!fs4.existsSync(callsRoot)) return;
29769
30616
  try {
29770
- for (const yearName of fs4.readdirSync(callsRoot)) {
30617
+ await import_node_fs2.promises.access(callsRoot);
30618
+ } catch {
30619
+ return;
30620
+ }
30621
+ try {
30622
+ for (const yearName of await import_node_fs2.promises.readdir(callsRoot)) {
29771
30623
  if (!/^\d+$/.test(yearName)) continue;
29772
30624
  const yearDir = path4.join(callsRoot, yearName);
29773
- if (!fs4.statSync(yearDir).isDirectory()) continue;
29774
- for (const monthName of fs4.readdirSync(yearDir)) {
30625
+ if (!(await import_node_fs2.promises.stat(yearDir)).isDirectory()) continue;
30626
+ for (const monthName of await import_node_fs2.promises.readdir(yearDir)) {
29775
30627
  if (!/^\d+$/.test(monthName)) continue;
29776
30628
  const monthDir = path4.join(yearDir, monthName);
29777
- if (!fs4.statSync(monthDir).isDirectory()) continue;
29778
- for (const dayName of fs4.readdirSync(monthDir)) {
30629
+ if (!(await import_node_fs2.promises.stat(monthDir)).isDirectory()) continue;
30630
+ for (const dayName of await import_node_fs2.promises.readdir(monthDir)) {
29779
30631
  if (!/^\d+$/.test(dayName)) continue;
29780
30632
  const dayDir = path4.join(monthDir, dayName);
29781
30633
  const y = Number.parseInt(yearName, 10);
@@ -29783,16 +30635,16 @@ var init_call_log = __esm({
29783
30635
  const d = Number.parseInt(dayName, 10);
29784
30636
  const ts = Date.UTC(y, m - 1, d) / 1e3;
29785
30637
  if (ts < cutoff) {
29786
- rmTree(dayDir);
30638
+ await rmTreeAsync(dayDir);
29787
30639
  }
29788
30640
  }
29789
30641
  try {
29790
- if (fs4.readdirSync(monthDir).length === 0) fs4.rmdirSync(monthDir);
30642
+ if ((await import_node_fs2.promises.readdir(monthDir)).length === 0) await import_node_fs2.promises.rmdir(monthDir);
29791
30643
  } catch {
29792
30644
  }
29793
30645
  }
29794
30646
  try {
29795
- if (fs4.readdirSync(yearDir).length === 0) fs4.rmdirSync(yearDir);
30647
+ if ((await import_node_fs2.promises.readdir(yearDir)).length === 0) await import_node_fs2.promises.rmdir(yearDir);
29796
30648
  } catch {
29797
30649
  }
29798
30650
  }
@@ -29833,13 +30685,16 @@ function telnyxHangupOutcome(cause) {
29833
30685
  if (c === "call_rejected" || c === "rejected" || c === "destination_out_of_order") return "failed";
29834
30686
  return null;
29835
30687
  }
29836
- function validateWebhookUrl(url2) {
30688
+ function validateWebhookUrl(url2, allowLoopback = false) {
29837
30689
  const parsed = new URL(url2);
29838
30690
  if (!["http:", "https:"].includes(parsed.protocol)) {
29839
30691
  throw new Error(`Invalid webhook URL scheme: ${parsed.protocol}`);
29840
30692
  }
29841
30693
  const rawHost = parsed.hostname;
29842
30694
  const host = rawHost.replace(/^\[/, "").replace(/\]$/, "").toLowerCase();
30695
+ if (allowLoopback) {
30696
+ return;
30697
+ }
29843
30698
  const BLOCKED_HOSTNAMES = /* @__PURE__ */ new Set([
29844
30699
  "localhost",
29845
30700
  "ip6-localhost",
@@ -29881,6 +30736,34 @@ function validateWebhookUrl(url2) {
29881
30736
  }
29882
30737
  }
29883
30738
  }
30739
+ function extractHost(value) {
30740
+ const trimmed = value.trim();
30741
+ if (!trimmed) return "";
30742
+ let host = trimmed.replace(/^[a-z]+:\/\//i, "").replace(/\/.*$/, "");
30743
+ if (host.startsWith("[")) {
30744
+ return host.slice(1).split("]", 1)[0].toLowerCase();
30745
+ }
30746
+ if (!host.includes("::")) {
30747
+ const lastColon = host.lastIndexOf(":");
30748
+ if (lastColon !== -1 && /^\d+$/.test(host.slice(lastColon + 1))) {
30749
+ host = host.slice(0, lastColon);
30750
+ }
30751
+ }
30752
+ return host.toLowerCase();
30753
+ }
30754
+ function isLoopbackHost(value) {
30755
+ const host = extractHost(value);
30756
+ if (!host) return false;
30757
+ if (host === "localhost" || host === "ip6-localhost" || host === "ip6-loopback") {
30758
+ return true;
30759
+ }
30760
+ if (host === "::1" || host === "::ffff:127.0.0.1") return true;
30761
+ const v4 = /^(\d{1,3})\.(\d{1,3})\.(\d{1,3})\.(\d{1,3})$/.exec(host);
30762
+ if (v4) {
30763
+ return parseInt(v4[1], 10) === 127;
30764
+ }
30765
+ return false;
30766
+ }
29884
30767
  function validateTelnyxSignature(rawBody, signature, timestamp, publicKey, toleranceSec = 300) {
29885
30768
  try {
29886
30769
  const ts = parseInt(timestamp, 10);
@@ -29944,7 +30827,7 @@ function resolveVariables(template, variables) {
29944
30827
  }
29945
30828
  return result;
29946
30829
  }
29947
- function buildAIAdapter(config2, agent, resolvedPrompt) {
30830
+ function buildAIAdapter(config2, agent, resolvedPrompt, toolsOverride) {
29948
30831
  const engine = agent.engine;
29949
30832
  if (agent.provider === "elevenlabs_convai") {
29950
30833
  if (!engine || engine.kind !== "elevenlabs_convai") {
@@ -29959,12 +30842,24 @@ function buildAIAdapter(config2, agent, resolvedPrompt) {
29959
30842
  agent.firstMessage ?? ""
29960
30843
  );
29961
30844
  }
29962
- const agentTools = agent.tools?.map((t) => ({
29963
- name: t.name,
29964
- description: t.description,
29965
- parameters: t.parameters,
29966
- strict: t.strict
29967
- })) ?? [];
30845
+ const preamblesOn = Boolean(agent.toolCallPreambles);
30846
+ const agentTools = (toolsOverride ?? agent.tools)?.map((t) => {
30847
+ let description = t.description;
30848
+ const reassurance = t.reassurance;
30849
+ const sample = typeof reassurance === "string" ? reassurance : void 0;
30850
+ if (preamblesOn && sample) {
30851
+ description = `${description}
30852
+
30853
+ Preamble sample phrases:
30854
+ - ${sample}`;
30855
+ }
30856
+ return {
30857
+ name: t.name,
30858
+ description,
30859
+ parameters: t.parameters,
30860
+ strict: t.strict
30861
+ };
30862
+ }) ?? [];
29968
30863
  const tools = [...agentTools, TRANSFER_CALL_TOOL, END_CALL_TOOL];
29969
30864
  const isOpenAIEngine = engine && (engine.kind === "openai_realtime" || engine.kind === "openai_realtime_2");
29970
30865
  const openaiKey = isOpenAIEngine ? engine.apiKey : config2.openaiKey ?? "";
@@ -29976,8 +30871,27 @@ function buildAIAdapter(config2, agent, resolvedPrompt) {
29976
30871
  if (engine.inputAudioTranscriptionModel !== void 0) {
29977
30872
  adapterOptions.inputAudioTranscriptionModel = engine.inputAudioTranscriptionModel;
29978
30873
  }
30874
+ if (engine.noiseReduction !== void 0) {
30875
+ adapterOptions.noiseReduction = engine.noiseReduction;
30876
+ }
30877
+ if (engine.turnDetection !== void 0) {
30878
+ adapterOptions.turnDetection = engine.turnDetection;
30879
+ }
30880
+ if (engine.gateResponseOnTranscript !== void 0) {
30881
+ adapterOptions.gateResponseOnTranscript = engine.gateResponseOnTranscript;
30882
+ }
30883
+ }
30884
+ const agentOpts = agent;
30885
+ if (agentOpts.openaiRealtimeNoiseReduction !== void 0) {
30886
+ adapterOptions.noiseReduction = agentOpts.openaiRealtimeNoiseReduction;
30887
+ }
30888
+ if (agentOpts.realtimeTurnDetection !== void 0) {
30889
+ adapterOptions.turnDetection = agentOpts.realtimeTurnDetection;
29979
30890
  }
29980
- const AdapterCtor = engine && engine.kind === "openai_realtime_2" ? OpenAIRealtime2Adapter : OpenAIRealtimeAdapter;
30891
+ if (agentOpts.openaiRealtimeGateResponseOnTranscript !== void 0) {
30892
+ adapterOptions.gateResponseOnTranscript = agentOpts.openaiRealtimeGateResponseOnTranscript;
30893
+ }
30894
+ const AdapterCtor = OpenAIRealtime2Adapter;
29981
30895
  return new AdapterCtor(
29982
30896
  openaiKey,
29983
30897
  agent.model,
@@ -30006,7 +30920,6 @@ var init_server = __esm({
30006
30920
  import_express = __toESM(require("express"));
30007
30921
  import_http = require("http");
30008
30922
  import_ws5 = require("ws");
30009
- init_openai_realtime();
30010
30923
  init_openai_realtime_2();
30011
30924
  init_elevenlabs_convai();
30012
30925
  init_plivo_adapter();
@@ -30069,6 +30982,11 @@ var init_server = __esm({
30069
30982
  getLogger().warn(`TwilioBridge.transferCall rejected: invalid CallSid ${JSON.stringify(callId)}`);
30070
30983
  return;
30071
30984
  }
30985
+ const E164_RE = /^\+[1-9]\d{6,14}$/;
30986
+ if (!E164_RE.test(toNumber)) {
30987
+ getLogger().warn(`TwilioBridge.transferCall rejected: invalid target ${JSON.stringify(toNumber)}`);
30988
+ return;
30989
+ }
30072
30990
  const transferUrl = `https://api.twilio.com/2010-04-01/Accounts/${this.config.twilioSid}/Calls/${callId}.json`;
30073
30991
  await fetch(transferUrl, {
30074
30992
  method: "POST",
@@ -30275,7 +31193,7 @@ var init_server = __esm({
30275
31193
  };
30276
31194
  GRACEFUL_SHUTDOWN_TIMEOUT_MS = 1e4;
30277
31195
  EmbeddedServer = class {
30278
- constructor(config2, agent, onCallStart, onCallEnd, onTranscript, onMessage, recording = false, voicemailMessage = "", onMetrics, pricingOverrides, dashboard = true, dashboardToken = "") {
31196
+ constructor(config2, agent, onCallStart, onCallEnd, onTranscript, onMessage, recording = false, voicemailMessage = "", onMetrics, pricingOverrides, dashboard = true, dashboardToken = "", allowInsecureDashboard = false) {
30279
31197
  this.config = config2;
30280
31198
  this.agent = agent;
30281
31199
  this.onCallStart = onCallStart;
@@ -30287,6 +31205,7 @@ var init_server = __esm({
30287
31205
  this.onMetrics = onMetrics;
30288
31206
  this.dashboard = dashboard;
30289
31207
  this.dashboardToken = dashboardToken;
31208
+ this.allowInsecureDashboard = allowInsecureDashboard;
30290
31209
  this.metricsStore = new MetricsStore();
30291
31210
  this.pricing = mergePricing(pricingOverrides);
30292
31211
  const logRoot = config2.persistRoot === void 0 ? resolveLogRoot() : config2.persistRoot;
@@ -30313,8 +31232,31 @@ var init_server = __esm({
30313
31232
  onMetrics;
30314
31233
  dashboard;
30315
31234
  dashboardToken;
31235
+ allowInsecureDashboard;
30316
31236
  server = null;
30317
31237
  wss = null;
31238
+ /**
31239
+ * Whether the dashboard + ``/api/*`` routes were mounted in ``start()``.
31240
+ * The dashboard is now ALWAYS mounted when enabled (it never 404s): an
31241
+ * exposed, token-less bind is protected with an auto-generated token
31242
+ * rather than refused. This flag is therefore ``true`` whenever the
31243
+ * dashboard is enabled — kept so the startup banner can gate on it.
31244
+ */
31245
+ dashboardMounted = false;
31246
+ /**
31247
+ * The token actually in effect for the dashboard + ``/api/*`` routes,
31248
+ * resolved in ``start()``. One of: the explicit ``dashboardToken`` if set;
31249
+ * a freshly generated UUID when the bind is exposed and
31250
+ * ``allowInsecureDashboard`` is ``false``; or ``''`` (OPEN) for loopback
31251
+ * local dev and for an exposed bind with ``allowInsecureDashboard=true``.
31252
+ * Read by the startup banner (to print the ready URL with ``?token=``) and
31253
+ * by authentic tests (to authenticate).
31254
+ */
31255
+ effectiveDashboardToken = "";
31256
+ /** The token in effect for the dashboard, resolved at ``start()``. Empty string = served OPEN. */
31257
+ get resolvedDashboardToken() {
31258
+ return this.effectiveDashboardToken;
31259
+ }
30318
31260
  twilioTokenWarningLogged = false;
30319
31261
  telnyxSigWarningLogged = false;
30320
31262
  metricsStore;
@@ -30332,12 +31274,14 @@ var init_server = __esm({
30332
31274
  activeConnections = /* @__PURE__ */ new Set();
30333
31275
  activeCallIds = /* @__PURE__ */ new Map();
30334
31276
  /**
30335
- * Per-call AMD result callback set by ``Patter.call()`` for the most
30336
- * recent outbound call. Public so ``client.ts`` can populate it after
30337
- * server start. Cleared after firing once per call to avoid leaking
30338
- * across calls.
31277
+ * Per-call AMD result callbacks keyed by CallSid / call_control_id.
31278
+ * Public so ``client.ts`` can register a callback per outbound call.
31279
+ * The Map slot is deleted after the callback fires once preventing
31280
+ * cross-call misfires when multiple concurrent outbound calls are in
31281
+ * flight (single-slot was a race condition: the last registered callback
31282
+ * would win for every in-flight AMD result).
30339
31283
  */
30340
- onMachineDetection;
31284
+ onMachineDetectionByCallSid = /* @__PURE__ */ new Map();
30341
31285
  /**
30342
31286
  * Pre-warm first-message audio accessor wired by ``Patter.serve()``.
30343
31287
  * The per-call StreamHandler invokes this with its ``callId`` at the
@@ -30458,6 +31402,42 @@ var init_server = __esm({
30458
31402
  this.completions.clear();
30459
31403
  this.amdClass.clear();
30460
31404
  }
31405
+ /**
31406
+ * Decide whether this server is reachable beyond loopback (127.0.0.1).
31407
+ *
31408
+ * The dashboard serves call transcripts and metadata (PII), so before
31409
+ * mounting it unauthenticated we must know whether anyone off-host can
31410
+ * reach the port. Signals (in order):
31411
+ *
31412
+ * (a)+(b) — a public webhook URL. ``client.ts`` resolves
31413
+ * ``config.webhookUrl`` to the live hostname for every serve path:
31414
+ * a cloudflared quick-tunnel host, a {@link StaticTunnel} hostname,
31415
+ * or an explicit ``webhookUrl``. A tunnel directive (signal a) and a
31416
+ * public webhook URL (signal b) therefore both surface here as a
31417
+ * non-loopback, non-private webhook host. This is the case that
31418
+ * matters for tunnels — the whole port (dashboard included) is
31419
+ * published on a public ``*.trycloudflare.com`` URL.
31420
+ *
31421
+ * (c) — an EXPLICIT non-loopback bind override via ``PATTER_BIND_HOST``.
31422
+ * Node's ``http.Server.listen(port, host)`` defaults to 127.0.0.1
31423
+ * here (see ``start()``), so plain local dev is never flagged; only
31424
+ * an operator who set ``PATTER_BIND_HOST`` to e.g. ``0.0.0.0`` is.
31425
+ *
31426
+ * Only loopback webhook hosts (127.0.0.0/8, localhost, ::1) are treated as
31427
+ * not-exposed. RFC1918 / LAN hosts ARE exposure — they are reachable by
31428
+ * other machines on the network — matching the Python SDK's gate.
31429
+ */
31430
+ isExposed() {
31431
+ const bindOverride = process.env.PATTER_BIND_HOST;
31432
+ if (bindOverride && !isLoopbackHost(bindOverride)) {
31433
+ return true;
31434
+ }
31435
+ const host = extractHost(this.config.webhookUrl ?? "");
31436
+ if (host && !isLoopbackHost(host)) {
31437
+ return true;
31438
+ }
31439
+ return false;
31440
+ }
30461
31441
  /** Bind HTTP + WebSocket listeners on `port`, mount carrier webhooks and dashboard routes. */
30462
31442
  async start(port = 8e3) {
30463
31443
  const webhookUrlPattern = /^[a-zA-Z0-9][a-zA-Z0-9.\-]+[a-zA-Z0-9]$/;
@@ -30493,6 +31473,9 @@ var init_server = __esm({
30493
31473
  }
30494
31474
  next();
30495
31475
  });
31476
+ req.on("error", (err) => {
31477
+ next(err);
31478
+ });
30496
31479
  } else {
30497
31480
  next();
30498
31481
  }
@@ -30503,8 +31486,25 @@ var init_server = __esm({
30503
31486
  res.json({ status: "ok", mode: "local" });
30504
31487
  });
30505
31488
  if (this.dashboard) {
30506
- mountDashboard(app, this.metricsStore, this.dashboardToken);
30507
- mountApi(app, this.metricsStore, this.dashboardToken);
31489
+ const exposed = this.isExposed();
31490
+ if (this.dashboardToken) {
31491
+ this.effectiveDashboardToken = this.dashboardToken;
31492
+ } else if (exposed && !this.allowInsecureDashboard) {
31493
+ this.effectiveDashboardToken = import_node_crypto4.default.randomUUID();
31494
+ getLogger().warn(
31495
+ `Dashboard is reachable beyond 127.0.0.1 without a configured token; protecting it with an auto-generated token. Open: http://127.0.0.1:${port}/?token=${this.effectiveDashboardToken} Set dashboardToken for a stable token, or allowInsecureDashboard=true to serve it open.`
31496
+ );
31497
+ } else if (exposed && this.allowInsecureDashboard) {
31498
+ this.effectiveDashboardToken = "";
31499
+ getLogger().warn(
31500
+ "Dashboard served WITHOUT authentication on a publicly-reachable bind (allowInsecureDashboard=true). Call transcripts and metadata are exposed to anyone who can reach this URL."
31501
+ );
31502
+ } else {
31503
+ this.effectiveDashboardToken = "";
31504
+ }
31505
+ mountDashboard(app, this.metricsStore, this.effectiveDashboardToken);
31506
+ mountApi(app, this.metricsStore, this.effectiveDashboardToken);
31507
+ this.dashboardMounted = true;
30508
31508
  }
30509
31509
  app.post("/webhooks/twilio/status", (req, res) => {
30510
31510
  if (this.config.twilioToken) {
@@ -30590,8 +31590,9 @@ var init_server = __esm({
30590
31590
  if (callSid) {
30591
31591
  this.amdClass.set(callSid, classifyTwilioAmd(answeredBy));
30592
31592
  }
30593
- const cb = this.onMachineDetection;
31593
+ const cb = callSid ? this.onMachineDetectionByCallSid.get(callSid) : void 0;
30594
31594
  if (cb && callSid) {
31595
+ this.onMachineDetectionByCallSid.delete(callSid);
30595
31596
  try {
30596
31597
  await cb({
30597
31598
  call_id: callSid,
@@ -30718,8 +31719,9 @@ var init_server = __esm({
30718
31719
  if (amdCallId) {
30719
31720
  this.amdClass.set(amdCallId, classifyTelnyxAmd(amdResult));
30720
31721
  }
30721
- const cbTx = this.onMachineDetection;
31722
+ const cbTx = amdCallId ? this.onMachineDetectionByCallSid.get(amdCallId) : void 0;
30722
31723
  if (cbTx && amdCallId) {
31724
+ this.onMachineDetectionByCallSid.delete(amdCallId);
30723
31725
  try {
30724
31726
  await cbTx({
30725
31727
  call_id: amdCallId,
@@ -30887,8 +31889,13 @@ var init_server = __esm({
30887
31889
  getLogger().info(`AMD result for ${sanitizeLogValue(callUuid)}: ${sanitizeLogValue(amdRaw)}`);
30888
31890
  const classification = classifyPlivoAmd(amdRaw);
30889
31891
  if (callUuid) this.amdClass.set(callUuid, classification);
30890
- const cb = this.onMachineDetection;
31892
+ let cbKey = callUuid && this.onMachineDetectionByCallSid.has(callUuid) ? callUuid : void 0;
31893
+ if (cbKey === void 0 && this.onMachineDetectionByCallSid.size === 1) {
31894
+ cbKey = this.onMachineDetectionByCallSid.keys().next().value;
31895
+ }
31896
+ const cb = cbKey !== void 0 ? this.onMachineDetectionByCallSid.get(cbKey) : void 0;
30891
31897
  if (cb && callUuid) {
31898
+ if (cbKey !== void 0) this.onMachineDetectionByCallSid.delete(cbKey);
30892
31899
  try {
30893
31900
  await cb({
30894
31901
  call_id: callUuid,
@@ -30969,27 +31976,34 @@ var init_server = __esm({
30969
31976
  this.handleTwilioStream(ws, url2);
30970
31977
  }
30971
31978
  });
30972
- await new Promise((resolve2) => {
31979
+ await new Promise((resolve2, reject) => {
30973
31980
  const bindHost = process.env.PATTER_BIND_HOST ?? "127.0.0.1";
31981
+ this.server.once("error", reject);
30974
31982
  this.server.listen(port, bindHost, () => {
31983
+ this.server.off("error", reject);
30975
31984
  getLogger().info(`Server on port ${port}`);
30976
31985
  getLogger().info(`Webhook: https://${this.config.webhookUrl}`);
30977
31986
  getLogger().info(`Phone: ${this.config.phoneNumber}`);
30978
31987
  const model = this.agent.model ?? "";
30979
- if (model && model !== "gpt-4o-mini-realtime-preview" && model.includes("realtime")) {
31988
+ const calibrated = ["gpt-realtime-mini", "gpt-4o-mini-realtime-preview"];
31989
+ if (model && !calibrated.includes(model) && model.includes("realtime")) {
30980
31990
  getLogger().warn(
30981
- `Agent uses "${sanitizeLogValue(model)}" but DEFAULT_PRICING.openai_realtime is calibrated for "gpt-4o-mini-realtime-preview". Pass Patter({ pricing: { openai_realtime: {...} } }) to set rates for this model, otherwise the dashboard cost display will under-report.`
31991
+ `Agent uses "${sanitizeLogValue(model)}" but DEFAULT_PRICING.openai_realtime is calibrated for the default Realtime models (gpt-realtime-mini / gpt-4o-mini-realtime-preview). Pass Patter({ pricing: { openai_realtime: {...} } }) to set rates for this model, otherwise the dashboard cost display will under-report.`
30982
31992
  );
30983
31993
  }
30984
- if (this.dashboard) {
30985
- console.log("\n\u2500\u2500\u2500\u2500 Dashboard \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500");
30986
- getLogger().info(`URL: http://127.0.0.1:${port}/`);
30987
- if (!this.dashboardToken) {
31994
+ if (this.dashboard && this.dashboardMounted) {
31995
+ getLogger().info("\u2500\u2500\u2500\u2500 Dashboard \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500");
31996
+ if (this.effectiveDashboardToken) {
31997
+ getLogger().info(
31998
+ `URL: http://127.0.0.1:${port}/?token=${this.effectiveDashboardToken}`
31999
+ );
32000
+ } else {
32001
+ getLogger().info(`URL: http://127.0.0.1:${port}/`);
30988
32002
  getLogger().warn(
30989
32003
  "Dashboard is enabled without authentication. Set dashboardToken to protect call data. This is safe for local development but should not be exposed on a public network."
30990
32004
  );
30991
32005
  }
30992
- console.log("\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n");
32006
+ getLogger().info("\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500");
30993
32007
  }
30994
32008
  resolve2();
30995
32009
  });
@@ -31065,7 +32079,7 @@ var init_server = __esm({
31065
32079
  onMessage: this.onMessage,
31066
32080
  onMetrics: wrappedMetrics,
31067
32081
  recording: this.recording,
31068
- buildAIAdapter: (resolvedPrompt) => buildAIAdapter(this.config, this.agent, resolvedPrompt),
32082
+ buildAIAdapter: (resolvedPrompt, toolsOverride) => buildAIAdapter(this.config, this.agent, resolvedPrompt, toolsOverride),
31069
32083
  sanitizeVariables,
31070
32084
  resolveVariables,
31071
32085
  popPrewarmAudio: this.popPrewarmAudio,
@@ -31339,17 +32353,18 @@ var init_server = __esm({
31339
32353
  }
31340
32354
  if (this.activeConnections.size > 0) {
31341
32355
  getLogger().info(`Waiting for ${this.activeConnections.size} active connection(s) to close...`);
31342
- await Promise.race([
31343
- new Promise((resolve2) => {
31344
- const checkInterval = setInterval(() => {
31345
- if (this.activeConnections.size === 0) {
31346
- clearInterval(checkInterval);
31347
- resolve2();
31348
- }
31349
- }, 100);
31350
- }),
31351
- new Promise((resolve2) => setTimeout(resolve2, GRACEFUL_SHUTDOWN_TIMEOUT_MS))
31352
- ]);
32356
+ let checkInterval;
32357
+ const drainPromise = new Promise((resolve2) => {
32358
+ checkInterval = setInterval(() => {
32359
+ if (this.activeConnections.size === 0) {
32360
+ clearInterval(checkInterval);
32361
+ resolve2();
32362
+ }
32363
+ }, 100);
32364
+ });
32365
+ const timeoutPromise = new Promise((resolve2) => setTimeout(resolve2, GRACEFUL_SHUTDOWN_TIMEOUT_MS));
32366
+ await Promise.race([drainPromise, timeoutPromise]);
32367
+ clearInterval(checkInterval);
31353
32368
  }
31354
32369
  if (this.activeConnections.size > 0) {
31355
32370
  getLogger().info(`Force-closing ${this.activeConnections.size} remaining connection(s)`);
@@ -31478,6 +32493,9 @@ __export(carrier_config_exports, {
31478
32493
  configureTelnyxNumber: () => configureTelnyxNumber,
31479
32494
  configureTwilioNumber: () => configureTwilioNumber
31480
32495
  });
32496
+ function redactPhone2(n) {
32497
+ return n.slice(0, 3) + "***" + n.slice(-4);
32498
+ }
31481
32499
  async function configureTwilioNumber(accountSid, authToken, phoneNumber, voiceUrl) {
31482
32500
  const auth2 = `Basic ${Buffer.from(`${accountSid}:${authToken}`).toString("base64")}`;
31483
32501
  const listUrl = `${TWILIO_API_BASE}/Accounts/${accountSid}/IncomingPhoneNumbers.json?PhoneNumber=${encodeURIComponent(phoneNumber)}`;
@@ -31493,7 +32511,7 @@ async function configureTwilioNumber(accountSid, authToken, phoneNumber, voiceUr
31493
32511
  const body = await listResp.json();
31494
32512
  const match = body.incoming_phone_numbers?.[0];
31495
32513
  if (!match) {
31496
- throw new Error(`Twilio number ${phoneNumber} not found on account ${accountSid}`);
32514
+ throw new Error(`Twilio number ${redactPhone2(phoneNumber)} not found on account ${accountSid}`);
31497
32515
  }
31498
32516
  const updateUrl = `${TWILIO_API_BASE}/Accounts/${accountSid}/IncomingPhoneNumbers/${match.sid}.json`;
31499
32517
  const form = new URLSearchParams({ VoiceUrl: voiceUrl, VoiceMethod: "POST" });
@@ -31512,17 +32530,20 @@ async function configureTwilioNumber(accountSid, authToken, phoneNumber, voiceUr
31512
32530
  }
31513
32531
  }
31514
32532
  async function configureTelnyxNumber(apiKey, connectionId, phoneNumber) {
31515
- const resp = await fetch(`${TELNYX_API_BASE}/phone_numbers/${encodeURIComponent(phoneNumber)}`, {
31516
- method: "PATCH",
31517
- headers: {
31518
- Authorization: `Bearer ${apiKey}`,
31519
- "Content-Type": "application/json"
31520
- },
31521
- body: JSON.stringify({ connection_id: connectionId })
31522
- });
32533
+ const resp = await fetch(
32534
+ `${TELNYX_API_BASE}/phone_numbers/${encodeURIComponent(phoneNumber)}/voice`,
32535
+ {
32536
+ method: "PATCH",
32537
+ headers: {
32538
+ Authorization: `Bearer ${apiKey}`,
32539
+ "Content-Type": "application/json"
32540
+ },
32541
+ body: JSON.stringify({ connection_id: connectionId, tech_prefix_enabled: false })
32542
+ }
32543
+ );
31523
32544
  if (!resp.ok) {
31524
32545
  throw new Error(
31525
- `Telnyx PATCH /phone_numbers/${phoneNumber} failed: ${resp.status} ${await resp.text()}`
32546
+ `Telnyx PATCH /phone_numbers/${redactPhone2(phoneNumber)}/voice failed: ${resp.status} ${await resp.text()}`
31526
32547
  );
31527
32548
  }
31528
32549
  }
@@ -31572,7 +32593,7 @@ async function autoConfigureCarrier(params) {
31572
32593
  if (provider2 === "telnyx" && params.telnyxKey && params.telnyxConnectionId) {
31573
32594
  try {
31574
32595
  await configureTelnyxNumber(params.telnyxKey, params.telnyxConnectionId, params.phoneNumber);
31575
- log3.info("Telnyx number %s associated with connection %s", params.phoneNumber, params.telnyxConnectionId);
32596
+ log3.info("Telnyx number ***%s associated with connection %s", params.phoneNumber.slice(-4), params.telnyxConnectionId);
31576
32597
  } catch (err) {
31577
32598
  log3.warn("Could not auto-configure Telnyx number: %s", err instanceof Error ? err.message : String(err));
31578
32599
  }
@@ -31722,12 +32743,12 @@ var init_test_mode = __esm({
31722
32743
  }
31723
32744
  continue;
31724
32745
  }
31725
- conversationHistory.push({
31726
- role: "user",
31727
- text: userInput,
31728
- timestamp: Date.now()
31729
- });
31730
32746
  if (onMessage) {
32747
+ conversationHistory.push({
32748
+ role: "user",
32749
+ text: userInput,
32750
+ timestamp: Date.now()
32751
+ });
31731
32752
  try {
31732
32753
  const responseText = await onMessage({
31733
32754
  text: userInput,
@@ -31757,6 +32778,11 @@ var init_test_mode = __esm({
31757
32778
  }
31758
32779
  log3.info("");
31759
32780
  const responseText = parts.join("");
32781
+ conversationHistory.push({
32782
+ role: "user",
32783
+ text: userInput,
32784
+ timestamp: Date.now()
32785
+ });
31760
32786
  if (responseText) {
31761
32787
  conversationHistory.push({
31762
32788
  role: "assistant",
@@ -33189,6 +34215,7 @@ __export(index_exports, {
33189
34215
  GoogleLLM: () => LLM5,
33190
34216
  GroqLLM: () => LLM3,
33191
34217
  Guardrail: () => Guardrail,
34218
+ HermesLLM: () => LLM7,
33192
34219
  IVRActivity: () => IVRActivity,
33193
34220
  InworldTTS: () => TTS7,
33194
34221
  KrispFrameDuration: () => KrispFrameDuration,
@@ -33199,6 +34226,8 @@ __export(index_exports, {
33199
34226
  MetricsStore: () => MetricsStore,
33200
34227
  MinWordsStrategy: () => MinWordsStrategy,
33201
34228
  Ngrok: () => Ngrok,
34229
+ OpenAICompatibleLLM: () => LLM6,
34230
+ OpenAICompatibleLLMProvider: () => OpenAICompatibleLLMProvider,
33202
34231
  OpenAILLM: () => LLM,
33203
34232
  OpenAILLMProvider: () => OpenAILLMProvider,
33204
34233
  OpenAIRealtime: () => Realtime,
@@ -33212,10 +34241,12 @@ __export(index_exports, {
33212
34241
  OpenAITranscribeSTT: () => STT3,
33213
34242
  OpenAITranscriptionModel: () => OpenAITranscriptionModel,
33214
34243
  OpenAIVoice: () => OpenAIVoice,
34244
+ OpenClawLLM: () => LLM8,
33215
34245
  PRICING_LAST_UPDATED: () => PRICING_LAST_UPDATED,
33216
34246
  PRICING_VERSION: () => PRICING_VERSION,
33217
34247
  PartialStreamError: () => PartialStreamError,
33218
34248
  Patter: () => Patter,
34249
+ PatterConfigError: () => PatterConfigError,
33219
34250
  PatterConnectionError: () => PatterConnectionError,
33220
34251
  PatterError: () => PatterError,
33221
34252
  PatterTool: () => PatterTool,
@@ -33303,6 +34334,8 @@ __export(index_exports, {
33303
34334
  mulawToPcm16: () => mulawToPcm16,
33304
34335
  notifyDashboard: () => notifyDashboard,
33305
34336
  openaiTts: () => openaiTts,
34337
+ openclawConsult: () => openclawConsult,
34338
+ openclawPostCallNotifier: () => openclawPostCallNotifier,
33306
34339
  pcm16ToMulaw: () => pcm16ToMulaw,
33307
34340
  resample16kTo8k: () => resample16kTo8k,
33308
34341
  resample24kTo16k: () => resample24kTo16k,
@@ -33333,6 +34366,7 @@ init_server();
33333
34366
 
33334
34367
  // src/engines/openai.ts
33335
34368
  init_cjs_shims();
34369
+ init_openai_realtime();
33336
34370
  var Realtime = class {
33337
34371
  kind = "openai_realtime";
33338
34372
  apiKey;
@@ -33340,6 +34374,9 @@ var Realtime = class {
33340
34374
  voice;
33341
34375
  reasoningEffort;
33342
34376
  inputAudioTranscriptionModel;
34377
+ noiseReduction;
34378
+ turnDetection;
34379
+ gateResponseOnTranscript;
33343
34380
  constructor(opts = {}) {
33344
34381
  const key = opts.apiKey ?? process.env.OPENAI_API_KEY;
33345
34382
  if (!key) {
@@ -33347,16 +34384,26 @@ var Realtime = class {
33347
34384
  "OpenAI Realtime requires an apiKey. Pass { apiKey: 'sk-...' } or set OPENAI_API_KEY in the environment."
33348
34385
  );
33349
34386
  }
34387
+ if (opts.noiseReduction !== void 0 && opts.noiseReduction !== "near_field" && opts.noiseReduction !== "far_field") {
34388
+ throw new Error(
34389
+ `noiseReduction must be 'near_field' or 'far_field', got ${JSON.stringify(opts.noiseReduction)}`
34390
+ );
34391
+ }
34392
+ validateRealtimeTurnDetection(opts.turnDetection);
33350
34393
  this.apiKey = key;
33351
34394
  this.model = opts.model ?? "gpt-realtime-mini";
33352
34395
  this.voice = opts.voice ?? "alloy";
33353
34396
  this.reasoningEffort = opts.reasoningEffort;
33354
34397
  this.inputAudioTranscriptionModel = opts.inputAudioTranscriptionModel;
34398
+ this.noiseReduction = opts.noiseReduction;
34399
+ this.turnDetection = opts.turnDetection;
34400
+ this.gateResponseOnTranscript = opts.gateResponseOnTranscript;
33355
34401
  }
33356
34402
  };
33357
34403
 
33358
34404
  // src/engines/openai-2.ts
33359
34405
  init_cjs_shims();
34406
+ init_openai_realtime();
33360
34407
  var Realtime2 = class {
33361
34408
  kind = "openai_realtime_2";
33362
34409
  apiKey;
@@ -33364,6 +34411,9 @@ var Realtime2 = class {
33364
34411
  voice;
33365
34412
  reasoningEffort;
33366
34413
  inputAudioTranscriptionModel;
34414
+ noiseReduction;
34415
+ turnDetection;
34416
+ gateResponseOnTranscript;
33367
34417
  constructor(opts = {}) {
33368
34418
  const key = opts.apiKey ?? process.env.OPENAI_API_KEY;
33369
34419
  if (!key) {
@@ -33371,11 +34421,20 @@ var Realtime2 = class {
33371
34421
  "OpenAI Realtime 2 requires an apiKey. Pass { apiKey: 'sk-...' } or set OPENAI_API_KEY in the environment."
33372
34422
  );
33373
34423
  }
34424
+ if (opts.noiseReduction !== void 0 && opts.noiseReduction !== "near_field" && opts.noiseReduction !== "far_field") {
34425
+ throw new Error(
34426
+ `noiseReduction must be 'near_field' or 'far_field', got ${JSON.stringify(opts.noiseReduction)}`
34427
+ );
34428
+ }
34429
+ validateRealtimeTurnDetection(opts.turnDetection);
33374
34430
  this.apiKey = key;
33375
34431
  this.model = opts.model ?? "gpt-realtime-2";
33376
34432
  this.voice = opts.voice ?? "alloy";
33377
34433
  this.reasoningEffort = opts.reasoningEffort;
33378
34434
  this.inputAudioTranscriptionModel = opts.inputAudioTranscriptionModel;
34435
+ this.noiseReduction = opts.noiseReduction;
34436
+ this.turnDetection = opts.turnDetection;
34437
+ this.gateResponseOnTranscript = opts.gateResponseOnTranscript;
33379
34438
  }
33380
34439
  };
33381
34440
 
@@ -33809,7 +34868,7 @@ function resolvePersistRoot(persist) {
33809
34868
  if (typeof persist === "string") return resolveLogRoot(persist);
33810
34869
  const envRoot = resolveLogRoot();
33811
34870
  if (envRoot !== null) return envRoot;
33812
- return resolveLogRoot("auto");
34871
+ return null;
33813
34872
  }
33814
34873
  function closeParkedConnections(slot) {
33815
34874
  if (slot.stt) {
@@ -34093,7 +35152,12 @@ var Patter = class {
34093
35152
  ...working,
34094
35153
  provider: "openai_realtime",
34095
35154
  model: working.model ?? engine.model,
34096
- voice: working.voice ?? engine.voice
35155
+ voice: working.voice ?? engine.voice,
35156
+ // Explicit agent() kwargs win over the engine marker value
35157
+ // (same precedence as Python: explicit kwarg > engine > default).
35158
+ openaiRealtimeNoiseReduction: working.openaiRealtimeNoiseReduction ?? engine.noiseReduction,
35159
+ realtimeTurnDetection: working.realtimeTurnDetection ?? engine.turnDetection,
35160
+ openaiRealtimeGateResponseOnTranscript: working.openaiRealtimeGateResponseOnTranscript ?? engine.gateResponseOnTranscript
34097
35161
  };
34098
35162
  if (!this.localConfig.openaiKey) {
34099
35163
  this.localConfig = { ...this.localConfig, openaiKey: engine.apiKey };
@@ -34118,6 +35182,11 @@ var Patter = class {
34118
35182
  throw new Error(`provider must be one of: ${valid.join(", ")}. Got: '${working.provider}'`);
34119
35183
  }
34120
35184
  }
35185
+ if (working.consult && working.provider === "elevenlabs_convai") {
35186
+ getLogger().warn(
35187
+ "consult is set but provider is ElevenLabs ConvAI; the consult tool is only injected in Realtime and Pipeline modes and will be ignored for this agent."
35188
+ );
35189
+ }
34121
35190
  if (working.llm !== void 0) {
34122
35191
  const llm = working.llm;
34123
35192
  if (!llm || typeof llm.stream !== "function") {
@@ -34256,7 +35325,8 @@ var Patter = class {
34256
35325
  opts.onMetrics,
34257
35326
  opts.pricing,
34258
35327
  opts.dashboard ?? true,
34259
- opts.dashboardToken ?? ""
35328
+ opts.dashboardToken ?? "",
35329
+ opts.allowInsecureDashboard ?? false
34260
35330
  );
34261
35331
  this.embeddedServer.popPrewarmAudio = this.popPrewarmAudio;
34262
35332
  this.embeddedServer.popPrewarmedConnections = this.popPrewarmedConnections;
@@ -34668,8 +35738,8 @@ var Patter = class {
34668
35738
  if (!options.to) {
34669
35739
  throw new Error("'to' phone number is required");
34670
35740
  }
34671
- if (!options.to.startsWith("+")) {
34672
- throw new Error(`'to' must be in E.164 format (e.g., '+1234567890'). Got: '${options.to}'`);
35741
+ if (!/^\+[1-9]\d{6,14}$/.test(options.to)) {
35742
+ throw new Error("'to' must be E.164 format (+<country><digits>). Got value with invalid format.");
34673
35743
  }
34674
35744
  if (options.wait && !this.embeddedServer) {
34675
35745
  throw new PatterConnectionError(
@@ -34680,9 +35750,6 @@ var Patter = class {
34680
35750
  let callId = "";
34681
35751
  const effectiveRingTimeout = options.ringTimeout === void 0 ? 25 : options.ringTimeout;
34682
35752
  const wantsAmd = options.machineDetection !== false || Boolean(options.voicemailMessage);
34683
- if (this.embeddedServer) {
34684
- this.embeddedServer.onMachineDetection = options.onMachineDetection;
34685
- }
34686
35753
  if (options.agent.prewarm !== false) {
34687
35754
  this.spawnProviderWarmup(options.agent);
34688
35755
  }
@@ -34727,6 +35794,12 @@ var Patter = class {
34727
35794
  };
34728
35795
  if (this.embeddedServer) {
34729
35796
  this.embeddedServer.metricsStore.recordCallInitiated(initiatedPayload);
35797
+ if (options.onMachineDetection) {
35798
+ this.embeddedServer.onMachineDetectionByCallSid.set(
35799
+ telnyxCallId,
35800
+ options.onMachineDetection
35801
+ );
35802
+ }
34730
35803
  }
34731
35804
  try {
34732
35805
  const { notifyDashboard: notifyDashboard2 } = await Promise.resolve().then(() => (init_persistence(), persistence_exports));
@@ -34792,6 +35865,12 @@ var Patter = class {
34792
35865
  };
34793
35866
  if (this.embeddedServer) {
34794
35867
  this.embeddedServer.metricsStore.recordCallInitiated(initiatedPayload);
35868
+ if (options.onMachineDetection) {
35869
+ this.embeddedServer.onMachineDetectionByCallSid.set(
35870
+ plivoCallId,
35871
+ options.onMachineDetection
35872
+ );
35873
+ }
34795
35874
  }
34796
35875
  try {
34797
35876
  const { notifyDashboard: notifyDashboard2 } = await Promise.resolve().then(() => (init_persistence(), persistence_exports));
@@ -34861,6 +35940,12 @@ var Patter = class {
34861
35940
  };
34862
35941
  if (this.embeddedServer) {
34863
35942
  this.embeddedServer.metricsStore.recordCallInitiated(initiatedPayload);
35943
+ if (options.onMachineDetection) {
35944
+ this.embeddedServer.onMachineDetectionByCallSid.set(
35945
+ twilioCallSid,
35946
+ options.onMachineDetection
35947
+ );
35948
+ }
34864
35949
  if (twilioNotificationsPath) {
34865
35950
  getLogger().info(
34866
35951
  `Outbound call ${twilioCallSid} placed. Twilio notifications: https://api.twilio.com${twilioNotificationsPath} (check here if the call drops with no audio).`
@@ -35144,6 +36229,7 @@ function defineTool(input) {
35144
36229
  }
35145
36230
 
35146
36231
  // src/index.ts
36232
+ init_consult();
35147
36233
  init_logger();
35148
36234
  init_sentence_chunker();
35149
36235
  init_pipeline_hooks();
@@ -35361,8 +36447,8 @@ var FallbackLLMProvider = class {
35361
36447
  * markers are filtered out so callers can concatenate the yielded strings
35362
36448
  * directly.
35363
36449
  */
35364
- async *completeStream(messages, tools) {
35365
- for await (const chunk of this.stream(messages, tools)) {
36450
+ async *completeStream(messages, tools, opts) {
36451
+ for await (const chunk of this.stream(messages, tools, opts)) {
35366
36452
  if (chunk.type === "text") {
35367
36453
  yield chunk.content ?? "";
35368
36454
  }
@@ -35372,14 +36458,15 @@ var FallbackLLMProvider = class {
35372
36458
  // LLMProvider implementation
35373
36459
  // -----------------------------------------------------------------------
35374
36460
  /** Streaming entry point — yields chunks from the first provider that succeeds. */
35375
- async *stream(messages, tools) {
36461
+ async *stream(messages, tools, opts) {
35376
36462
  const errors = [];
35377
36463
  const result = yield* this.tryProviders(
35378
36464
  messages,
35379
36465
  tools,
35380
36466
  /* availableOnly */
35381
36467
  true,
35382
- errors
36468
+ errors,
36469
+ opts
35383
36470
  );
35384
36471
  if (result === "done") return;
35385
36472
  getLogger().warn(
@@ -35390,7 +36477,8 @@ var FallbackLLMProvider = class {
35390
36477
  tools,
35391
36478
  /* availableOnly */
35392
36479
  false,
35393
- errors
36480
+ errors,
36481
+ opts
35394
36482
  );
35395
36483
  if (retryResult === "done") return;
35396
36484
  throw new AllProvidersFailedError(
@@ -35400,7 +36488,7 @@ var FallbackLLMProvider = class {
35400
36488
  // -----------------------------------------------------------------------
35401
36489
  // Internals
35402
36490
  // -----------------------------------------------------------------------
35403
- async *tryProviders(messages, tools, availableOnly, errors) {
36491
+ async *tryProviders(messages, tools, availableOnly, errors, opts) {
35404
36492
  for (let i = 0; i < this.providers.length; i++) {
35405
36493
  if (availableOnly && !this.availability[i]) continue;
35406
36494
  for (let attempt = 0; attempt < this.maxRetryPerProvider; attempt++) {
@@ -35409,7 +36497,7 @@ var FallbackLLMProvider = class {
35409
36497
  `FallbackLLMProvider: trying provider ${i}${attempt > 0 ? ` (retry ${attempt})` : ""}`
35410
36498
  );
35411
36499
  let yieldedTokens = false;
35412
- const gen = this.providers[i].stream(messages, tools);
36500
+ const gen = this.providers[i].stream(messages, tools, opts);
35413
36501
  while (true) {
35414
36502
  let iterResult;
35415
36503
  try {
@@ -35523,7 +36611,7 @@ var PARAMETERS_SCHEMA = {
35523
36611
  required: ["to"]
35524
36612
  };
35525
36613
  var DEFAULT_NAME = "make_phone_call";
35526
- var DEFAULT_DESCRIPTION = "Place a real outbound phone call. Returns a JSON object with the full transcript, call status, duration in seconds, and cost. Use this when the user asks you to call someone, schedule appointments by phone, or otherwise reach a human via voice.";
36614
+ var DEFAULT_DESCRIPTION2 = "Place a real outbound phone call. Returns a JSON object with the full transcript, call status, duration in seconds, and cost. Use this when the user asks you to call someone, schedule appointments by phone, or otherwise reach a human via voice.";
35527
36615
  var PatterTool = class {
35528
36616
  name;
35529
36617
  description;
@@ -35532,6 +36620,11 @@ var PatterTool = class {
35532
36620
  maxDurationSec;
35533
36621
  recording;
35534
36622
  started = false;
36623
+ /** Cached in-progress (or completed) start promise so concurrent execute()
36624
+ * callers all await the same boot sequence instead of each racing into
36625
+ * phone.serve(). Reset to null on failure so callers can retry after a
36626
+ * transient error. */
36627
+ startPromise = null;
35535
36628
  constructor(opts) {
35536
36629
  if (!opts.phone) {
35537
36630
  throw new Error("PatterTool: `phone` (a Patter instance) is required.");
@@ -35539,7 +36632,7 @@ var PatterTool = class {
35539
36632
  this.phone = opts.phone;
35540
36633
  this.agent = opts.agent;
35541
36634
  this.name = opts.name ?? DEFAULT_NAME;
35542
- this.description = opts.description ?? DEFAULT_DESCRIPTION;
36635
+ this.description = opts.description ?? DEFAULT_DESCRIPTION2;
35543
36636
  this.maxDurationSec = Math.max(5, Math.min(1800, opts.maxDurationSec ?? 180));
35544
36637
  this.recording = opts.recording ?? false;
35545
36638
  }
@@ -35583,8 +36676,21 @@ var PatterTool = class {
35583
36676
  * `serve()` provides here. No `onCallEnd` callback is wired: the SDK's own
35584
36677
  * per-callId completion registry resolves the result, so the user's
35585
36678
  * `onCallEnd` slot is left free.
36679
+ *
36680
+ * Idempotent and concurrency-safe: concurrent callers all await the same
36681
+ * in-progress boot instead of each racing into `phone.serve()`.
35586
36682
  */
35587
36683
  async start() {
36684
+ if (this.startPromise) return this.startPromise;
36685
+ this.startPromise = this._doStart();
36686
+ try {
36687
+ await this.startPromise;
36688
+ } catch (err) {
36689
+ this.startPromise = null;
36690
+ throw err;
36691
+ }
36692
+ }
36693
+ async _doStart() {
35588
36694
  if (this.started) return;
35589
36695
  if (!this.agent) {
35590
36696
  throw new Error(
@@ -35610,6 +36716,7 @@ var PatterTool = class {
35610
36716
  }
35611
36717
  }
35612
36718
  this.started = false;
36719
+ this.startPromise = null;
35613
36720
  }
35614
36721
  // --- Execution ----------------------------------------------------------
35615
36722
  /**
@@ -35981,7 +37088,8 @@ var UltravoxRealtimeAdapter = class {
35981
37088
  "X-API-Key": this.apiKey,
35982
37089
  "Content-Type": "application/json"
35983
37090
  },
35984
- body: JSON.stringify(body)
37091
+ body: JSON.stringify(body),
37092
+ signal: AbortSignal.timeout(15e3)
35985
37093
  });
35986
37094
  if (!resp.ok) {
35987
37095
  const text = await resp.text().catch(() => "");
@@ -35992,12 +37100,36 @@ var UltravoxRealtimeAdapter = class {
35992
37100
  this.ws = new import_ws6.default(call.joinUrl);
35993
37101
  await new Promise((resolve2, reject) => {
35994
37102
  const ws = this.ws;
37103
+ let settled = false;
37104
+ const timer = setTimeout(() => {
37105
+ if (settled) return;
37106
+ settled = true;
37107
+ ws.off("open", onOpen);
37108
+ ws.off("error", onError);
37109
+ this.ws = null;
37110
+ try {
37111
+ ws.close();
37112
+ } catch {
37113
+ }
37114
+ reject(new Error("Ultravox WS connect timeout"));
37115
+ }, 15e3);
35995
37116
  const onOpen = () => {
37117
+ if (settled) return;
37118
+ settled = true;
37119
+ clearTimeout(timer);
35996
37120
  ws.off("error", onError);
35997
37121
  resolve2();
35998
37122
  };
35999
37123
  const onError = (err) => {
37124
+ if (settled) return;
37125
+ settled = true;
37126
+ clearTimeout(timer);
36000
37127
  ws.off("open", onOpen);
37128
+ this.ws = null;
37129
+ try {
37130
+ ws.close();
37131
+ } catch {
37132
+ }
36001
37133
  reject(err);
36002
37134
  };
36003
37135
  ws.once("open", onOpen);
@@ -36845,7 +37977,7 @@ var STT = class extends DeepgramSTT {
36845
37977
  {
36846
37978
  endpointingMs: opts.endpointingMs ?? 150,
36847
37979
  utteranceEndMs: opts.utteranceEndMs === null ? null : opts.utteranceEndMs ?? 1e3,
36848
- smartFormat: opts.smartFormat ?? true,
37980
+ smartFormat: opts.smartFormat ?? false,
36849
37981
  interimResults: opts.interimResults ?? true,
36850
37982
  ...opts.vadEvents !== void 0 ? { vadEvents: opts.vadEvents } : {}
36851
37983
  }
@@ -37165,7 +38297,7 @@ var CartesiaSTT = class {
37165
38297
  });
37166
38298
  ws.once("error", (err) => {
37167
38299
  clearTimeout(timer);
37168
- reject(err);
38300
+ reject(new Error(`Cartesia STT park connect failed: ${describeWarmupError(err)}`));
37169
38301
  });
37170
38302
  });
37171
38303
  return ws;
@@ -37521,7 +38653,7 @@ var SonioxSTT = class _SonioxSTT {
37521
38653
  /** Stable pricing/dashboard key — read by stream-handler/metrics. */
37522
38654
  static providerKey = "soniox";
37523
38655
  ws = null;
37524
- callbacks = [];
38656
+ callbacks = /* @__PURE__ */ new Set();
37525
38657
  final = new TokenAccumulator();
37526
38658
  keepaliveTimer = null;
37527
38659
  apiKey;
@@ -37683,16 +38815,13 @@ var SonioxSTT = class _SonioxSTT {
37683
38815
  if (audio.length === 0) return;
37684
38816
  this.ws.send(audio);
37685
38817
  }
37686
- /** Register a transcript listener (max 10 concurrent listeners). */
38818
+ /** Register a transcript listener. */
37687
38819
  onTranscript(callback) {
37688
- if (this.callbacks.length >= 10) {
37689
- getLogger().warn(
37690
- "SonioxSTT: maximum of 10 onTranscript callbacks reached; replacing the last callback."
37691
- );
37692
- this.callbacks[this.callbacks.length - 1] = callback;
37693
- return;
37694
- }
37695
- this.callbacks.push(callback);
38820
+ this.callbacks.add(callback);
38821
+ }
38822
+ /** Unregister a previously registered transcript listener. */
38823
+ offTranscript(callback) {
38824
+ this.callbacks.delete(callback);
37696
38825
  }
37697
38826
  /** Send the empty-frame stream terminator and close the WebSocket. */
37698
38827
  close() {
@@ -37774,12 +38903,6 @@ var VALID_DOMAINS = /* @__PURE__ */ new Set([
37774
38903
  AssemblyAIDomain.GENERAL,
37775
38904
  AssemblyAIDomain.MEDICAL_V1
37776
38905
  ]);
37777
- var AssemblyAISTTNotConnectedError = class extends Error {
37778
- constructor(message = "AssemblyAISTT is not connected") {
37779
- super(message);
37780
- this.name = "AssemblyAISTTNotConnectedError";
37781
- }
37782
- };
37783
38906
  var AssemblyAISTT = class _AssemblyAISTT {
37784
38907
  constructor(apiKey, options = {}) {
37785
38908
  this.apiKey = apiKey;
@@ -38103,9 +39226,10 @@ var AssemblyAISTT = class _AssemblyAISTT {
38103
39226
  */
38104
39227
  updateConfiguration(params) {
38105
39228
  if (!this.ws || this.ws.readyState !== import_ws9.default.OPEN) {
38106
- throw new AssemblyAISTTNotConnectedError(
38107
- "AssemblyAISTT.updateConfiguration: WebSocket is not open"
39229
+ getLogger().debug(
39230
+ "AssemblyAISTT.updateConfiguration: WebSocket is not open \u2014 dropping update (call teardown)."
38108
39231
  );
39232
+ return;
38109
39233
  }
38110
39234
  const payload = {
38111
39235
  type: AssemblyAIClientFrame.UPDATE_CONFIGURATION
@@ -38127,9 +39251,10 @@ var AssemblyAISTT = class _AssemblyAISTT {
38127
39251
  /** Force the server to finalize the current turn (for barge-in). */
38128
39252
  forceEndpoint() {
38129
39253
  if (!this.ws || this.ws.readyState !== import_ws9.default.OPEN) {
38130
- throw new AssemblyAISTTNotConnectedError(
38131
- "AssemblyAISTT.forceEndpoint: WebSocket is not open"
39254
+ getLogger().debug(
39255
+ "AssemblyAISTT.forceEndpoint: WebSocket is not open \u2014 dropping request (call teardown)."
38132
39256
  );
39257
+ return;
38133
39258
  }
38134
39259
  this.ws.send(JSON.stringify({ type: AssemblyAIClientFrame.FORCE_ENDPOINT }));
38135
39260
  }
@@ -38144,6 +39269,14 @@ var AssemblyAISTT = class _AssemblyAISTT {
38144
39269
  async close() {
38145
39270
  this.closing = true;
38146
39271
  if (!this.ws) return;
39272
+ if (this.chunkBufferBytes > 0 && this.ws.readyState === import_ws9.default.OPEN) {
39273
+ try {
39274
+ this.ws.send(Buffer.concat(this.chunkBuffer, this.chunkBufferBytes));
39275
+ } catch {
39276
+ }
39277
+ this.chunkBuffer = [];
39278
+ this.chunkBufferBytes = 0;
39279
+ }
38147
39280
  try {
38148
39281
  this.ws.send(JSON.stringify({ type: AssemblyAIClientFrame.TERMINATE }));
38149
39282
  } catch {
@@ -39350,7 +40483,7 @@ var TTS3 = class extends OpenAITTS {
39350
40483
  opts.model ?? "gpt-4o-mini-tts",
39351
40484
  opts.instructions ?? null,
39352
40485
  opts.speed ?? null,
39353
- opts.antiAlias ?? false
40486
+ opts.antiAlias ?? true
39354
40487
  );
39355
40488
  }
39356
40489
  };
@@ -39525,7 +40658,6 @@ init_cjs_shims();
39525
40658
  init_cjs_shims();
39526
40659
  init_logger();
39527
40660
  var INWORLD_BASE_URL = "https://api.inworld.ai/tts/v1/voice:stream";
39528
- var INWORLD_VOICES_URL = "https://api.inworld.ai/tts/v1/voices";
39529
40661
  var InworldModel = {
39530
40662
  TTS_2: "inworld-tts-2",
39531
40663
  TTS_1_5_MAX: "inworld-tts-1.5-max",
@@ -39614,7 +40746,8 @@ var InworldTTS = class {
39614
40746
  */
39615
40747
  async warmup() {
39616
40748
  try {
39617
- await fetch(INWORLD_VOICES_URL, {
40749
+ const voicesUrl = new URL(this.baseUrl).origin + "/tts/v1/voices";
40750
+ await fetch(voicesUrl, {
39618
40751
  method: "GET",
39619
40752
  headers: {
39620
40753
  Authorization: `Basic ${this.authToken}`
@@ -39874,58 +41007,87 @@ var AnthropicLLMProvider = class {
39874
41007
  const toolIndexByBlock = /* @__PURE__ */ new Map();
39875
41008
  const toolIdByBlock = /* @__PURE__ */ new Map();
39876
41009
  let nextIndex = 0;
39877
- while (true) {
39878
- const { done, value } = await reader.read();
39879
- if (done) break;
39880
- buffer += decoder.decode(value, { stream: true });
39881
- const lines = buffer.split("\n");
39882
- buffer = lines.pop() || "";
39883
- for (const line of lines) {
39884
- const trimmed = line.trim();
39885
- if (!trimmed.startsWith("data: ")) continue;
39886
- const data = trimmed.slice(6);
39887
- if (!data || data === "[DONE]") continue;
39888
- let event;
39889
- try {
39890
- event = JSON.parse(data);
39891
- } catch {
39892
- continue;
39893
- }
39894
- if (event.type === "content_block_start" && event.content_block?.type === "tool_use") {
39895
- const blockIdx = event.index ?? 0;
39896
- const toolId = event.content_block.id ?? "";
39897
- const toolName = event.content_block.name ?? "";
39898
- const patterIndex = nextIndex++;
39899
- toolIndexByBlock.set(blockIdx, patterIndex);
39900
- toolIdByBlock.set(blockIdx, toolId);
39901
- yield {
39902
- type: "tool_call",
39903
- index: patterIndex,
39904
- id: toolId,
39905
- name: toolName,
39906
- arguments: ""
39907
- };
39908
- continue;
39909
- }
39910
- if (event.type === "content_block_delta") {
39911
- if (event.delta?.type === "text_delta" && event.delta.text) {
39912
- yield { type: "text", content: event.delta.text };
41010
+ let inputTokens = 0;
41011
+ let outputTokens = 0;
41012
+ let cacheReadTokens = 0;
41013
+ let cacheWriteTokens = 0;
41014
+ try {
41015
+ while (true) {
41016
+ const { done, value } = await reader.read();
41017
+ if (done) break;
41018
+ buffer += decoder.decode(value, { stream: true });
41019
+ const lines = buffer.split("\n");
41020
+ buffer = lines.pop() || "";
41021
+ for (const line of lines) {
41022
+ const trimmed = line.trim();
41023
+ if (!trimmed.startsWith("data: ")) continue;
41024
+ const data = trimmed.slice(6);
41025
+ if (!data || data === "[DONE]") continue;
41026
+ let event;
41027
+ try {
41028
+ event = JSON.parse(data);
41029
+ } catch {
41030
+ continue;
41031
+ }
41032
+ if (event.type === "message_start" && event.message?.usage) {
41033
+ const u = event.message.usage;
41034
+ if (u.input_tokens) inputTokens = u.input_tokens;
41035
+ if (u.cache_creation_input_tokens) cacheWriteTokens = u.cache_creation_input_tokens;
41036
+ if (u.cache_read_input_tokens) cacheReadTokens = u.cache_read_input_tokens;
41037
+ continue;
41038
+ }
41039
+ if (event.type === "message_delta" && event.usage?.output_tokens) {
41040
+ outputTokens = event.usage.output_tokens;
39913
41041
  continue;
39914
41042
  }
39915
- if (event.delta?.type === "input_json_delta" && event.delta.partial_json) {
41043
+ if (event.type === "content_block_start" && event.content_block?.type === "tool_use") {
39916
41044
  const blockIdx = event.index ?? 0;
39917
- const patterIndex = toolIndexByBlock.get(blockIdx);
39918
- if (patterIndex !== void 0) {
39919
- yield {
39920
- type: "tool_call",
39921
- index: patterIndex,
39922
- id: toolIdByBlock.get(blockIdx),
39923
- arguments: event.delta.partial_json
39924
- };
41045
+ const toolId = event.content_block.id ?? "";
41046
+ const toolName = event.content_block.name ?? "";
41047
+ const patterIndex = nextIndex++;
41048
+ toolIndexByBlock.set(blockIdx, patterIndex);
41049
+ toolIdByBlock.set(blockIdx, toolId);
41050
+ yield {
41051
+ type: "tool_call",
41052
+ index: patterIndex,
41053
+ id: toolId,
41054
+ name: toolName,
41055
+ arguments: ""
41056
+ };
41057
+ continue;
41058
+ }
41059
+ if (event.type === "content_block_delta") {
41060
+ if (event.delta?.type === "text_delta" && event.delta.text) {
41061
+ yield { type: "text", content: event.delta.text };
41062
+ continue;
41063
+ }
41064
+ if (event.delta?.type === "input_json_delta" && event.delta.partial_json) {
41065
+ const blockIdx = event.index ?? 0;
41066
+ const patterIndex = toolIndexByBlock.get(blockIdx);
41067
+ if (patterIndex !== void 0) {
41068
+ yield {
41069
+ type: "tool_call",
41070
+ index: patterIndex,
41071
+ id: toolIdByBlock.get(blockIdx),
41072
+ arguments: event.delta.partial_json
41073
+ };
41074
+ }
39925
41075
  }
39926
41076
  }
39927
41077
  }
39928
41078
  }
41079
+ } finally {
41080
+ reader.cancel().catch(() => {
41081
+ });
41082
+ }
41083
+ if (inputTokens > 0 || outputTokens > 0 || cacheReadTokens > 0 || cacheWriteTokens > 0) {
41084
+ yield {
41085
+ type: "usage",
41086
+ inputTokens,
41087
+ outputTokens,
41088
+ cacheReadInputTokens: cacheReadTokens,
41089
+ cacheWriteInputTokens: cacheWriteTokens
41090
+ };
39929
41091
  }
39930
41092
  yield { type: "done" };
39931
41093
  }
@@ -39985,16 +41147,17 @@ function toAnthropicMessages(messages) {
39985
41147
  }
39986
41148
  if (role === "tool") {
39987
41149
  const contentStr = typeof rawMsg.content === "string" ? rawMsg.content : JSON.stringify(rawMsg.content);
39988
- out.push({
39989
- role: "user",
39990
- content: [
39991
- {
39992
- type: "tool_result",
39993
- tool_use_id: rawMsg.tool_call_id ?? "",
39994
- content: contentStr
39995
- }
39996
- ]
39997
- });
41150
+ const toolResultBlock = {
41151
+ type: "tool_result",
41152
+ tool_use_id: rawMsg.tool_call_id ?? "",
41153
+ content: contentStr
41154
+ };
41155
+ const prev = out.length > 0 ? out[out.length - 1] : void 0;
41156
+ if (prev && prev.role === "user" && Array.isArray(prev.content) && prev.content.length > 0 && prev.content.every((b) => b["type"] === "tool_result")) {
41157
+ prev.content.push(toolResultBlock);
41158
+ } else {
41159
+ out.push({ role: "user", content: [toolResultBlock] });
41160
+ }
39998
41161
  continue;
39999
41162
  }
40000
41163
  }
@@ -40137,50 +41300,55 @@ async function* parseOpenAISseStream(response) {
40137
41300
  if (!reader) return;
40138
41301
  const decoder = new TextDecoder();
40139
41302
  let buffer = "";
40140
- while (true) {
40141
- const { done, value } = await reader.read();
40142
- if (done) break;
40143
- buffer += decoder.decode(value, { stream: true });
40144
- const lines = buffer.split("\n");
40145
- buffer = lines.pop() || "";
40146
- for (const line of lines) {
40147
- const trimmed = line.trim();
40148
- if (!trimmed || !trimmed.startsWith("data: ")) continue;
40149
- const data = trimmed.slice(6);
40150
- if (data === "[DONE]") continue;
40151
- let chunk;
40152
- try {
40153
- chunk = JSON.parse(data);
40154
- } catch {
40155
- continue;
40156
- }
40157
- const usage = chunk.usage ?? chunk.x_groq?.usage;
40158
- if (usage) {
40159
- const cached2 = chunk.usage?.prompt_tokens_details?.cached_tokens ?? 0;
40160
- yield {
40161
- type: "usage",
40162
- inputTokens: usage.prompt_tokens,
40163
- outputTokens: usage.completion_tokens,
40164
- cacheReadInputTokens: cached2
40165
- };
40166
- }
40167
- const delta = chunk.choices?.[0]?.delta;
40168
- if (!delta) continue;
40169
- if (delta.content) {
40170
- yield { type: "text", content: delta.content };
40171
- }
40172
- if (delta.tool_calls) {
40173
- for (const tc of delta.tool_calls) {
41303
+ try {
41304
+ while (true) {
41305
+ const { done, value } = await reader.read();
41306
+ if (done) break;
41307
+ buffer += decoder.decode(value, { stream: true });
41308
+ const lines = buffer.split("\n");
41309
+ buffer = lines.pop() || "";
41310
+ for (const line of lines) {
41311
+ const trimmed = line.trim();
41312
+ if (!trimmed || !trimmed.startsWith("data: ")) continue;
41313
+ const data = trimmed.slice(6);
41314
+ if (data === "[DONE]") continue;
41315
+ let chunk;
41316
+ try {
41317
+ chunk = JSON.parse(data);
41318
+ } catch {
41319
+ continue;
41320
+ }
41321
+ const usage = chunk.usage ?? chunk.x_groq?.usage;
41322
+ if (usage) {
41323
+ const cached2 = chunk.usage?.prompt_tokens_details?.cached_tokens ?? 0;
40174
41324
  yield {
40175
- type: "tool_call",
40176
- index: tc.index,
40177
- id: tc.id,
40178
- name: tc.function?.name,
40179
- arguments: tc.function?.arguments
41325
+ type: "usage",
41326
+ inputTokens: usage.prompt_tokens,
41327
+ outputTokens: usage.completion_tokens,
41328
+ cacheReadInputTokens: cached2
40180
41329
  };
40181
41330
  }
41331
+ const delta = chunk.choices?.[0]?.delta;
41332
+ if (!delta) continue;
41333
+ if (delta.content) {
41334
+ yield { type: "text", content: delta.content };
41335
+ }
41336
+ if (delta.tool_calls) {
41337
+ for (const tc of delta.tool_calls) {
41338
+ yield {
41339
+ type: "tool_call",
41340
+ index: tc.index,
41341
+ id: tc.id,
41342
+ name: tc.function?.name,
41343
+ arguments: tc.function?.arguments
41344
+ };
41345
+ }
41346
+ }
40182
41347
  }
40183
41348
  }
41349
+ } finally {
41350
+ reader.cancel().catch(() => {
41351
+ });
40184
41352
  }
40185
41353
  }
40186
41354
 
@@ -40349,11 +41517,21 @@ var CerebrasLLMProvider = class {
40349
41517
  }
40350
41518
  const advisoryMs = parseRateLimitResetMs(response.headers);
40351
41519
  const exponentialMs = RETRY_BACKOFF_BASE_MS * Math.pow(2, attempt);
40352
- const delayMs = Math.max(advisoryMs, exponentialMs);
41520
+ const delayMs = Math.min(5e3, Math.max(advisoryMs, exponentialMs));
40353
41521
  getLogger().warn(
40354
41522
  `Cerebras API ${response.status} (attempt ${attempt + 1}/${maxAttempts}); retrying after ${delayMs}ms`
40355
41523
  );
40356
- await new Promise((r) => setTimeout(r, delayMs));
41524
+ await new Promise((resolve2, reject) => {
41525
+ const t = setTimeout(resolve2, delayMs);
41526
+ opts?.signal?.addEventListener(
41527
+ "abort",
41528
+ () => {
41529
+ clearTimeout(t);
41530
+ reject(opts.signal.reason);
41531
+ },
41532
+ { once: true }
41533
+ );
41534
+ });
40357
41535
  }
40358
41536
  throw new PatterError(`Cerebras API error ${lastStatus}: ${lastErrText || "request failed"}`);
40359
41537
  }
@@ -40516,47 +41694,52 @@ var GoogleLLMProvider = class {
40516
41694
  let buffer = "";
40517
41695
  let nextIndex = 0;
40518
41696
  let lastUsage;
40519
- while (true) {
40520
- const { done, value } = await reader.read();
40521
- if (done) break;
40522
- buffer += decoder.decode(value, { stream: true });
40523
- const lines = buffer.split("\n");
40524
- buffer = lines.pop() || "";
40525
- for (const line of lines) {
40526
- const trimmed = line.trim();
40527
- if (!trimmed.startsWith("data: ")) continue;
40528
- const data = trimmed.slice(6);
40529
- if (!data) continue;
40530
- let payload;
40531
- try {
40532
- payload = JSON.parse(data);
40533
- } catch {
40534
- continue;
40535
- }
40536
- if (payload.usageMetadata) {
40537
- lastUsage = payload.usageMetadata;
40538
- }
40539
- const candidate = payload.candidates?.[0];
40540
- const parts = candidate?.content?.parts ?? [];
40541
- for (const part of parts) {
40542
- if (part.functionCall) {
40543
- const args = part.functionCall.args ?? {};
40544
- const callId = part.functionCall.id ?? `gemini_call_${nextIndex}`;
40545
- yield {
40546
- type: "tool_call",
40547
- index: nextIndex,
40548
- id: callId,
40549
- name: part.functionCall.name ?? "",
40550
- arguments: JSON.stringify(args)
40551
- };
40552
- nextIndex++;
41697
+ try {
41698
+ while (true) {
41699
+ const { done, value } = await reader.read();
41700
+ if (done) break;
41701
+ buffer += decoder.decode(value, { stream: true });
41702
+ const lines = buffer.split("\n");
41703
+ buffer = lines.pop() || "";
41704
+ for (const line of lines) {
41705
+ const trimmed = line.trim();
41706
+ if (!trimmed.startsWith("data: ")) continue;
41707
+ const data = trimmed.slice(6);
41708
+ if (!data) continue;
41709
+ let payload;
41710
+ try {
41711
+ payload = JSON.parse(data);
41712
+ } catch {
40553
41713
  continue;
40554
41714
  }
40555
- if (part.text) {
40556
- yield { type: "text", content: part.text };
41715
+ if (payload.usageMetadata) {
41716
+ lastUsage = payload.usageMetadata;
41717
+ }
41718
+ const candidate = payload.candidates?.[0];
41719
+ const parts = candidate?.content?.parts ?? [];
41720
+ for (const part of parts) {
41721
+ if (part.functionCall) {
41722
+ const args = part.functionCall.args ?? {};
41723
+ const callId = part.functionCall.id ?? `gemini_call_${nextIndex}`;
41724
+ yield {
41725
+ type: "tool_call",
41726
+ index: nextIndex,
41727
+ id: callId,
41728
+ name: part.functionCall.name ?? "",
41729
+ arguments: JSON.stringify(args)
41730
+ };
41731
+ nextIndex++;
41732
+ continue;
41733
+ }
41734
+ if (part.text) {
41735
+ yield { type: "text", content: part.text };
41736
+ }
40557
41737
  }
40558
41738
  }
40559
41739
  }
41740
+ } finally {
41741
+ reader.cancel().catch(() => {
41742
+ });
40560
41743
  }
40561
41744
  if (lastUsage) {
40562
41745
  yield {
@@ -40650,7 +41833,17 @@ function toGeminiContents(messages) {
40650
41833
  continue;
40651
41834
  }
40652
41835
  }
40653
- return { systemInstruction: systemParts.join("\n\n"), contents };
41836
+ const merged = [];
41837
+ for (const entry of contents) {
41838
+ const prev = merged[merged.length - 1];
41839
+ const isFunctionResponseOnly = (c) => c.role === "user" && c.parts.every((p) => p.functionResponse !== void 0);
41840
+ if (prev && isFunctionResponseOnly(prev) && isFunctionResponseOnly(entry)) {
41841
+ prev.parts.push(...entry.parts);
41842
+ } else {
41843
+ merged.push(entry);
41844
+ }
41845
+ }
41846
+ return { systemInstruction: systemParts.join("\n\n"), contents: merged };
40654
41847
  }
40655
41848
 
40656
41849
  // src/llm/google.ts
@@ -40673,13 +41866,270 @@ var LLM5 = class extends GoogleLLMProvider {
40673
41866
  }
40674
41867
  };
40675
41868
 
41869
+ // src/llm/openai-compatible.ts
41870
+ init_cjs_shims();
41871
+ init_llm_loop();
41872
+ init_errors();
41873
+ init_logger();
41874
+ init_version();
41875
+ var DEFAULT_TIMEOUT_S = 60;
41876
+ var OpenAICompatibleLLMProvider = class {
41877
+ /**
41878
+ * Stable pricing/dashboard key — read by stream-handler/metrics. Typed as
41879
+ * ``string`` (not the narrowed literal) so the Hermes / OpenClaw presets can
41880
+ * override it with their own key while still extending this class.
41881
+ */
41882
+ static providerKey = "openai_compatible";
41883
+ /** Resolved bearer; undefined for keyless gateways. */
41884
+ apiKey;
41885
+ model;
41886
+ baseUrl;
41887
+ timeoutMs;
41888
+ extraHeaders;
41889
+ sessionUserPrefix;
41890
+ sessionIdHeader;
41891
+ sessionIdPrefix;
41892
+ sessionKeyHeader;
41893
+ sessionKey;
41894
+ temperature;
41895
+ maxTokens;
41896
+ responseFormat;
41897
+ parallelToolCalls;
41898
+ toolChoice;
41899
+ seed;
41900
+ topP;
41901
+ frequencyPenalty;
41902
+ presencePenalty;
41903
+ stop;
41904
+ constructor(options) {
41905
+ if (!options.baseUrl) {
41906
+ throw new Error(
41907
+ 'OpenAICompatibleLLMProvider requires a baseUrl (e.g. "http://127.0.0.1:11434/v1").'
41908
+ );
41909
+ }
41910
+ if (!options.model) {
41911
+ throw new Error("OpenAICompatibleLLMProvider requires a model.");
41912
+ }
41913
+ this.apiKey = options.apiKey ?? (options.apiKeyEnv ? process.env[options.apiKeyEnv] : void 0);
41914
+ this.model = options.model;
41915
+ this.baseUrl = options.baseUrl;
41916
+ this.timeoutMs = (options.timeout ?? DEFAULT_TIMEOUT_S) * 1e3;
41917
+ this.extraHeaders = options.extraHeaders;
41918
+ this.sessionUserPrefix = options.sessionUserPrefix;
41919
+ this.sessionIdHeader = options.sessionIdHeader;
41920
+ this.sessionIdPrefix = options.sessionIdPrefix;
41921
+ this.sessionKeyHeader = options.sessionKeyHeader;
41922
+ this.sessionKey = options.sessionKey;
41923
+ this.temperature = options.temperature;
41924
+ this.maxTokens = options.maxTokens;
41925
+ this.responseFormat = options.responseFormat;
41926
+ this.parallelToolCalls = options.parallelToolCalls;
41927
+ this.toolChoice = options.toolChoice;
41928
+ this.seed = options.seed;
41929
+ this.topP = options.topP;
41930
+ this.frequencyPenalty = options.frequencyPenalty;
41931
+ this.presencePenalty = options.presencePenalty;
41932
+ this.stop = options.stop;
41933
+ }
41934
+ /**
41935
+ * Assemble the request headers. ``User-Agent`` is set first so any
41936
+ * ``extraHeaders`` (and the per-call session headers) layer on top without
41937
+ * silently dropping the SDK attribution, and the ``Authorization`` header is
41938
+ * only added when a key is present (keyless gateways omit it).
41939
+ *
41940
+ * The two session headers are emitted INDEPENDENTLY, each gated on its own
41941
+ * config (decoupled from ``sessionUserPrefix`` and from each other):
41942
+ * - ``sessionIdHeader`` (+ ``callId``) → ``` `${sessionIdPrefix}${callId}` ```
41943
+ * - ``sessionKeyHeader`` (+ ``sessionKey``) → the static ``sessionKey`` value.
41944
+ * ``sessionKey`` is a credential-grade memory scope and is never logged.
41945
+ */
41946
+ buildHeaders(callId) {
41947
+ const headers = {
41948
+ "Content-Type": "application/json",
41949
+ "User-Agent": `getpatter/${VERSION}`,
41950
+ ...this.extraHeaders ?? {}
41951
+ };
41952
+ if (this.apiKey) {
41953
+ headers.Authorization = `Bearer ${this.apiKey}`;
41954
+ }
41955
+ if (this.sessionIdHeader && callId) {
41956
+ headers[this.sessionIdHeader] = `${this.sessionIdPrefix ?? ""}${callId}`;
41957
+ }
41958
+ if (this.sessionKeyHeader && this.sessionKey) {
41959
+ headers[this.sessionKeyHeader] = this.sessionKey;
41960
+ }
41961
+ return headers;
41962
+ }
41963
+ /**
41964
+ * Pre-call DNS / TLS warmup for the configured endpoint. Best-effort:
41965
+ * 5 s timeout, all exceptions swallowed at debug level. The ``Authorization``
41966
+ * header is only sent when a key is present so the operator-grade bearer is
41967
+ * never echoed for keyless gateways (and the key is never logged).
41968
+ */
41969
+ async warmup() {
41970
+ try {
41971
+ const headers = {};
41972
+ if (this.apiKey) headers.Authorization = `Bearer ${this.apiKey}`;
41973
+ await fetch(`${this.baseUrl}/models`, {
41974
+ method: "GET",
41975
+ headers,
41976
+ signal: AbortSignal.timeout(5e3)
41977
+ });
41978
+ } catch (err) {
41979
+ getLogger().debug(
41980
+ `OpenAI-compatible LLM warmup failed (best-effort): ${String(err)}`
41981
+ );
41982
+ }
41983
+ }
41984
+ /**
41985
+ * Build the request body. Mirrors the base OpenAI provider's sampling-kwarg
41986
+ * assembly and additionally sets ``user`` for session continuity when
41987
+ * ``sessionUserPrefix`` is set AND a ``callId`` is available — so the default
41988
+ * (prefix unset) behaviour is byte-identical to the base provider.
41989
+ */
41990
+ buildBody(messages, tools, callId) {
41991
+ const body = {
41992
+ model: this.model,
41993
+ messages,
41994
+ stream: true,
41995
+ stream_options: { include_usage: true }
41996
+ };
41997
+ if (this.temperature !== void 0) body.temperature = this.temperature;
41998
+ if (this.maxTokens !== void 0) body.max_completion_tokens = this.maxTokens;
41999
+ if (this.responseFormat !== void 0) body.response_format = this.responseFormat;
42000
+ if (this.parallelToolCalls !== void 0) body.parallel_tool_calls = this.parallelToolCalls;
42001
+ if (this.toolChoice !== void 0) body.tool_choice = this.toolChoice;
42002
+ if (this.seed !== void 0) body.seed = this.seed;
42003
+ if (this.topP !== void 0) body.top_p = this.topP;
42004
+ if (this.frequencyPenalty !== void 0) body.frequency_penalty = this.frequencyPenalty;
42005
+ if (this.presencePenalty !== void 0) body.presence_penalty = this.presencePenalty;
42006
+ if (this.stop !== void 0) body.stop = this.stop;
42007
+ if (tools) body.tools = tools;
42008
+ if (this.sessionUserPrefix !== void 0 && callId) {
42009
+ body.user = `${this.sessionUserPrefix}${callId}`;
42010
+ }
42011
+ return body;
42012
+ }
42013
+ /** Stream Patter-format LLM chunks from the configured chat completions API. */
42014
+ async *stream(messages, tools, opts) {
42015
+ const callId = opts?.callId;
42016
+ const body = this.buildBody(messages, tools, callId);
42017
+ const response = await fetch(`${this.baseUrl}/chat/completions`, {
42018
+ method: "POST",
42019
+ headers: this.buildHeaders(callId),
42020
+ body: JSON.stringify(body),
42021
+ signal: mergeAbortSignals(opts?.signal, AbortSignal.timeout(this.timeoutMs))
42022
+ });
42023
+ if (!response.ok) {
42024
+ const errText = await response.text();
42025
+ getLogger().error(
42026
+ `OpenAI-compatible API error: ${response.status} ${errText}`
42027
+ );
42028
+ throw new PatterConnectionError(
42029
+ `LLM API returned ${response.status}: ${errText.slice(0, 200)}`
42030
+ );
42031
+ }
42032
+ yield* parseOpenAISseStream(response);
42033
+ }
42034
+ };
42035
+ var LLM6 = class extends OpenAICompatibleLLMProvider {
42036
+ static providerKey = "openai_compatible";
42037
+ };
42038
+
42039
+ // src/llm/hermes.ts
42040
+ init_cjs_shims();
42041
+ var BASE_URL = "http://127.0.0.1:8642/v1";
42042
+ var DEFAULT_MODEL5 = "hermes-agent";
42043
+ var API_KEY_ENV = "API_SERVER_KEY";
42044
+ var MODEL_ENV = "API_SERVER_MODEL_NAME";
42045
+ var SESSION_USER_PREFIX = "patter-call-";
42046
+ var SESSION_ID_HEADER = "X-Hermes-Session-Id";
42047
+ var SESSION_ID_PREFIX = "patter-call-";
42048
+ var SESSION_KEY_HEADER = "X-Hermes-Session-Key";
42049
+ var DEFAULT_TIMEOUT_S2 = 120;
42050
+ var LLM7 = class extends OpenAICompatibleLLMProvider {
42051
+ static providerKey = "hermes";
42052
+ constructor(opts = {}) {
42053
+ const model = opts.model ?? process.env[MODEL_ENV] ?? DEFAULT_MODEL5;
42054
+ const options = {
42055
+ apiKey: opts.apiKey,
42056
+ apiKeyEnv: API_KEY_ENV,
42057
+ baseUrl: opts.baseUrl ?? BASE_URL,
42058
+ model,
42059
+ timeout: opts.timeout ?? DEFAULT_TIMEOUT_S2,
42060
+ sessionUserPrefix: SESSION_USER_PREFIX,
42061
+ sessionIdHeader: SESSION_ID_HEADER,
42062
+ sessionIdPrefix: SESSION_ID_PREFIX,
42063
+ sessionKeyHeader: SESSION_KEY_HEADER,
42064
+ sessionKey: opts.sessionKey,
42065
+ extraHeaders: opts.extraHeaders,
42066
+ temperature: opts.temperature,
42067
+ maxTokens: opts.maxTokens,
42068
+ responseFormat: opts.responseFormat,
42069
+ parallelToolCalls: opts.parallelToolCalls,
42070
+ toolChoice: opts.toolChoice,
42071
+ seed: opts.seed,
42072
+ topP: opts.topP,
42073
+ frequencyPenalty: opts.frequencyPenalty,
42074
+ presencePenalty: opts.presencePenalty,
42075
+ stop: opts.stop
42076
+ };
42077
+ super(options);
42078
+ }
42079
+ };
42080
+
42081
+ // src/llm/openclaw.ts
42082
+ init_cjs_shims();
42083
+ var BASE_URL2 = "http://127.0.0.1:18789/v1";
42084
+ var API_KEY_ENV2 = "OPENCLAW_API_KEY";
42085
+ var SESSION_HEADER = "x-openclaw-session-key";
42086
+ var SESSION_USER_PREFIX2 = "patter-call-";
42087
+ var DEFAULT_TIMEOUT_S3 = 120;
42088
+ var OPENCLAW_AGENT_RE2 = /^[A-Za-z0-9._:/-]+$/;
42089
+ var LLM8 = class extends OpenAICompatibleLLMProvider {
42090
+ static providerKey = "openclaw";
42091
+ constructor(opts) {
42092
+ const agent = opts?.agent;
42093
+ if (!agent || !OPENCLAW_AGENT_RE2.test(agent)) {
42094
+ throw new Error(
42095
+ `Invalid OpenClaw agent id: ${JSON.stringify(agent)}. Allowed characters: letters, digits, dot, underscore, colon, slash, dash.`
42096
+ );
42097
+ }
42098
+ const model = agent.includes("/") || agent.includes(":") ? agent : `openclaw/${agent}`;
42099
+ const options = {
42100
+ apiKey: opts.apiKey,
42101
+ apiKeyEnv: API_KEY_ENV2,
42102
+ baseUrl: opts.baseUrl ?? BASE_URL2,
42103
+ model,
42104
+ timeout: opts.timeout ?? DEFAULT_TIMEOUT_S3,
42105
+ sessionUserPrefix: SESSION_USER_PREFIX2,
42106
+ // Wire-identical to the prior behaviour: header value is the raw call id
42107
+ // (empty prefix), and OpenClaw's gateway also derives the session from
42108
+ // the ``user`` field above. No separate memory-scope header.
42109
+ sessionIdHeader: SESSION_HEADER,
42110
+ sessionIdPrefix: "",
42111
+ extraHeaders: opts.extraHeaders,
42112
+ temperature: opts.temperature,
42113
+ maxTokens: opts.maxTokens,
42114
+ responseFormat: opts.responseFormat,
42115
+ parallelToolCalls: opts.parallelToolCalls,
42116
+ toolChoice: opts.toolChoice,
42117
+ seed: opts.seed,
42118
+ topP: opts.topP,
42119
+ frequencyPenalty: opts.frequencyPenalty,
42120
+ presencePenalty: opts.presencePenalty,
42121
+ stop: opts.stop
42122
+ };
42123
+ super(options);
42124
+ }
42125
+ };
42126
+
40676
42127
  // src/index.ts
40677
42128
  init_silero_vad();
40678
42129
 
40679
42130
  // src/providers/deepfilternet-filter.ts
40680
42131
  init_cjs_shims();
40681
42132
  init_logger();
40682
- init_transcoding();
40683
42133
  function log2() {
40684
42134
  return getLogger();
40685
42135
  }
@@ -40709,6 +42159,57 @@ function float32ToPcm16(samples) {
40709
42159
  }
40710
42160
  return out;
40711
42161
  }
42162
+ var ArbitraryResampler = class {
42163
+ srcRate;
42164
+ dstRate;
42165
+ phase = 0;
42166
+ // fractional position into the current chunk
42167
+ lastSample = 0;
42168
+ // last input sample from the previous chunk
42169
+ hasHistory = false;
42170
+ constructor(srcRate, dstRate) {
42171
+ this.srcRate = srcRate;
42172
+ this.dstRate = dstRate;
42173
+ }
42174
+ /** Process a chunk of PCM16-LE mono audio and return resampled PCM16-LE. */
42175
+ process(pcm) {
42176
+ const sampleCount = Math.floor(pcm.length / 2);
42177
+ if (sampleCount === 0) return Buffer.alloc(0);
42178
+ const step = this.srcRate / this.dstRate;
42179
+ const outArr = [];
42180
+ let phase = this.phase;
42181
+ while (true) {
42182
+ const idx = Math.floor(phase);
42183
+ if (idx >= sampleCount) break;
42184
+ const frac = phase - idx;
42185
+ let s0;
42186
+ let s1;
42187
+ if (idx < 0) {
42188
+ s0 = this.hasHistory ? this.lastSample : 0;
42189
+ s1 = pcm.readInt16LE(0);
42190
+ } else {
42191
+ s0 = pcm.readInt16LE(idx * 2);
42192
+ s1 = idx + 1 < sampleCount ? pcm.readInt16LE((idx + 1) * 2) : s0;
42193
+ }
42194
+ const interp = Math.round(s0 + (s1 - s0) * frac);
42195
+ outArr.push(Math.max(-32768, Math.min(32767, interp)));
42196
+ phase += step;
42197
+ }
42198
+ this.lastSample = pcm.readInt16LE((sampleCount - 1) * 2);
42199
+ this.hasHistory = true;
42200
+ this.phase = phase - sampleCount;
42201
+ const out = Buffer.alloc(outArr.length * 2);
42202
+ for (let j = 0; j < outArr.length; j++) out.writeInt16LE(outArr[j], j * 2);
42203
+ return out;
42204
+ }
42205
+ /** Flush any buffered state and reset. Returns any remaining tail output. */
42206
+ flush() {
42207
+ this.phase = 0;
42208
+ this.lastSample = 0;
42209
+ this.hasHistory = false;
42210
+ return Buffer.alloc(0);
42211
+ }
42212
+ };
40712
42213
  var DeepFilterNetFilter = class {
40713
42214
  modelPath;
40714
42215
  silenceWarnings;
@@ -40716,8 +42217,9 @@ var DeepFilterNetFilter = class {
40716
42217
  ort = null;
40717
42218
  warned = false;
40718
42219
  closed = false;
40719
- // Fix 5: stateful resamplers for src_sr↔48k conversions so chunk-boundary
42220
+ // Stateful resamplers for src_sr↔48k conversions so chunk-boundary
40720
42221
  // samples are not discarded. Lazy-created and torn down on rate change.
42222
+ // Uses ArbitraryResampler which supports any integer rate pair.
40721
42223
  _resamplerSrcRate = null;
40722
42224
  _upsamplerInst = null;
40723
42225
  _downsamplerInst = null;
@@ -40775,8 +42277,8 @@ var DeepFilterNetFilter = class {
40775
42277
  try {
40776
42278
  if (this._resamplerSrcRate !== sampleRate) {
40777
42279
  this._resamplerSrcRate = sampleRate;
40778
- this._upsamplerInst = new StatefulResampler({ srcRate: sampleRate, dstRate: DEEPFILTERNET_SR });
40779
- this._downsamplerInst = new StatefulResampler({ srcRate: DEEPFILTERNET_SR, dstRate: sampleRate });
42280
+ this._upsamplerInst = new ArbitraryResampler(sampleRate, DEEPFILTERNET_SR);
42281
+ this._downsamplerInst = new ArbitraryResampler(DEEPFILTERNET_SR, sampleRate);
40780
42282
  }
40781
42283
  const samples = pcm16ToFloat32(pcmChunk);
40782
42284
  const pcm16Up = this._upsamplerInst.process(float32ToPcm16(new Float32Array(samples)));
@@ -40940,6 +42442,17 @@ var Tool = class {
40940
42442
  parameters;
40941
42443
  handler;
40942
42444
  webhookUrl;
42445
+ reassurance;
42446
+ /**
42447
+ * Per-tool execution timeout in milliseconds. `undefined` uses the
42448
+ * executor default (10 000 ms). Mirrors Python `timeout_s`.
42449
+ */
42450
+ timeoutMs;
42451
+ /**
42452
+ * Enable OpenAI strict mode for this tool's function schema. Off by
42453
+ * default. Mirrors Python `strict` on `Tool`.
42454
+ */
42455
+ strict;
40943
42456
  constructor(opts) {
40944
42457
  if (!opts.name) {
40945
42458
  throw new Error("Tool requires a non-empty name.");
@@ -40957,6 +42470,9 @@ var Tool = class {
40957
42470
  this.parameters = opts.parameters ?? { type: "object", properties: {} };
40958
42471
  if (hasHandler) this.handler = opts.handler;
40959
42472
  if (hasWebhook) this.webhookUrl = opts.webhookUrl;
42473
+ if (opts.reassurance !== void 0) this.reassurance = opts.reassurance;
42474
+ if (opts.timeoutMs !== void 0) this.timeoutMs = opts.timeoutMs;
42475
+ if (opts.strict !== void 0) this.strict = opts.strict;
40960
42476
  }
40961
42477
  };
40962
42478
  function tool(opts) {
@@ -41120,7 +42636,6 @@ var ChatContext = class _ChatContext {
41120
42636
  init_cjs_shims();
41121
42637
  init_logger();
41122
42638
  var DTMF_EVENTS = [
41123
- "0",
41124
42639
  "1",
41125
42640
  "2",
41126
42641
  "3",
@@ -41130,6 +42645,7 @@ var DTMF_EVENTS = [
41130
42645
  "7",
41131
42646
  "8",
41132
42647
  "9",
42648
+ "0",
41133
42649
  "*",
41134
42650
  "#",
41135
42651
  "A",
@@ -41809,18 +43325,24 @@ var TelnyxAdapter = class {
41809
43325
  "/number_orders",
41810
43326
  orderBody
41811
43327
  );
41812
- const orderId = order.data?.id ?? "";
43328
+ const orderId = order.data?.id;
43329
+ if (!orderId) throw new Error("TelnyxAdapter: /number_orders returned no order id");
41813
43330
  return { phoneNumber: chosen, orderId };
41814
43331
  }
41815
43332
  /** Attach a number to a Call Control Application. */
41816
43333
  async configureNumber(phoneNumber, opts) {
41817
43334
  if (!phoneNumber) throw new Error("TelnyxAdapter: phoneNumber is required");
41818
43335
  if (!opts.connectionId) throw new Error("TelnyxAdapter: connectionId is required");
41819
- await this.request(
41820
- "PATCH",
41821
- `/phone_numbers/${encodeURIComponent(phoneNumber)}/voice`,
41822
- { connection_id: opts.connectionId, tech_prefix_enabled: false }
41823
- );
43336
+ try {
43337
+ await this.request(
43338
+ "PATCH",
43339
+ `/phone_numbers/${encodeURIComponent(phoneNumber)}/voice`,
43340
+ { connection_id: opts.connectionId, tech_prefix_enabled: false }
43341
+ );
43342
+ } catch (err) {
43343
+ const status = err instanceof Error ? err.message.replace(/\+\d{7,15}/g, "[REDACTED]") : String(err);
43344
+ throw new Error(`TelnyxAdapter: configureNumber failed: ${status}`);
43345
+ }
41824
43346
  }
41825
43347
  /**
41826
43348
  * Place an outbound call on the Call Control Application.
@@ -41928,7 +43450,7 @@ var TelnyxSTT = class {
41928
43450
  /** Stable pricing/dashboard key — read by stream-handler/metrics. */
41929
43451
  static providerKey = "telnyx_stt";
41930
43452
  ws = null;
41931
- callbacks = [];
43453
+ callbacks = /* @__PURE__ */ new Set();
41932
43454
  headerSent = false;
41933
43455
  /** Open the streaming WebSocket and arm message handlers. */
41934
43456
  async connect() {
@@ -41984,14 +43506,13 @@ var TelnyxSTT = class {
41984
43506
  }
41985
43507
  this.ws.send(audio);
41986
43508
  }
41987
- /** Register a transcript listener (max 10 concurrent listeners). */
43509
+ /** Register a transcript listener. */
41988
43510
  onTranscript(callback) {
41989
- if (this.callbacks.length >= 10) {
41990
- getLogger().warn("TelnyxSTT: maximum of 10 onTranscript callbacks reached; replacing the last callback.");
41991
- this.callbacks[this.callbacks.length - 1] = callback;
41992
- return;
41993
- }
41994
- this.callbacks.push(callback);
43511
+ this.callbacks.add(callback);
43512
+ }
43513
+ /** Unregister a previously-registered transcript listener. */
43514
+ offTranscript(callback) {
43515
+ this.callbacks.delete(callback);
41995
43516
  }
41996
43517
  /** Close the streaming WebSocket. */
41997
43518
  close() {
@@ -42002,6 +43523,7 @@ var TelnyxSTT = class {
42002
43523
  }
42003
43524
  this.ws = null;
42004
43525
  }
43526
+ this.headerSent = false;
42005
43527
  }
42006
43528
  };
42007
43529
 
@@ -42023,6 +43545,7 @@ var TelnyxTTSSampleRate = {
42023
43545
  HZ_24000: 24e3
42024
43546
  };
42025
43547
  var DEFAULT_VOICE = TelnyxTTSVoice.NATURAL_HD_ASTRA;
43548
+ var FRAME_TIMEOUT_MS2 = 3e4;
42026
43549
  var TelnyxTTS = class {
42027
43550
  constructor(apiKey, voice = DEFAULT_VOICE, baseUrl = TELNYX_TTS_WS_URL) {
42028
43551
  this.apiKey = apiKey;
@@ -42050,69 +43573,83 @@ var TelnyxTTS = class {
42050
43573
  */
42051
43574
  async *synthesizeStream(text) {
42052
43575
  const url2 = `${this.baseUrl}?voice=${encodeURIComponent(this.voice)}`;
42053
- const ws = new import_ws13.default(url2, {
42054
- headers: { Authorization: `Bearer ${this.apiKey}` }
42055
- });
42056
- await new Promise((resolve2, reject) => {
42057
- const timer = setTimeout(() => reject(new Error("Telnyx TTS connect timeout")), 1e4);
42058
- ws.once("open", () => {
42059
- clearTimeout(timer);
42060
- resolve2();
43576
+ let ws = null;
43577
+ try {
43578
+ let push2 = function(item) {
43579
+ const w = waiters.shift();
43580
+ if (w) {
43581
+ w(item);
43582
+ } else {
43583
+ queue.push(item);
43584
+ }
43585
+ };
43586
+ var push = push2;
43587
+ ws = new import_ws13.default(url2, {
43588
+ headers: { Authorization: `Bearer ${this.apiKey}` }
42061
43589
  });
42062
- ws.once("error", (err) => {
42063
- clearTimeout(timer);
42064
- reject(err);
43590
+ await new Promise((resolve2, reject) => {
43591
+ const timer = setTimeout(() => reject(new Error("Telnyx TTS connect timeout")), 1e4);
43592
+ ws.once("open", () => {
43593
+ clearTimeout(timer);
43594
+ resolve2();
43595
+ });
43596
+ ws.once("error", (err) => {
43597
+ clearTimeout(timer);
43598
+ reject(err);
43599
+ });
42065
43600
  });
42066
- });
42067
- const queue = [];
42068
- const waiters = [];
42069
- function push(item) {
42070
- const w = waiters.shift();
42071
- if (w) {
42072
- w(item);
42073
- } else {
42074
- queue.push(item);
42075
- }
42076
- }
42077
- ws.on("message", (raw) => {
42078
- let data;
42079
- try {
42080
- data = JSON.parse(raw.toString());
42081
- } catch {
42082
- getLogger().warn("TelnyxTTS: received invalid JSON");
42083
- return;
42084
- }
42085
- const audioB64 = data.audio;
42086
- if (!audioB64) return;
42087
- try {
42088
- const audioBytes = Buffer.from(audioB64, "base64");
42089
- if (audioBytes.length > 0) {
42090
- push(audioBytes);
43601
+ const queue = [];
43602
+ const waiters = [];
43603
+ ws.on("message", (raw) => {
43604
+ let data;
43605
+ try {
43606
+ data = JSON.parse(raw.toString());
43607
+ } catch {
43608
+ getLogger().warn("TelnyxTTS: received invalid JSON");
43609
+ return;
42091
43610
  }
42092
- } catch {
42093
- }
42094
- });
42095
- ws.on("close", () => {
42096
- push(null);
42097
- });
42098
- ws.on("error", (err) => {
42099
- push({ error: err instanceof Error ? err : new Error(String(err)) });
42100
- });
42101
- ws.send(JSON.stringify({ text: " " }));
42102
- ws.send(JSON.stringify({ text }));
42103
- ws.send(JSON.stringify({ text: "" }));
42104
- try {
43611
+ const audioB64 = data.audio;
43612
+ if (!audioB64) return;
43613
+ try {
43614
+ const audioBytes = Buffer.from(audioB64, "base64");
43615
+ if (audioBytes.length > 0) {
43616
+ push2(audioBytes);
43617
+ }
43618
+ } catch {
43619
+ }
43620
+ });
43621
+ ws.on("close", () => {
43622
+ push2(null);
43623
+ });
43624
+ ws.on("error", (err) => {
43625
+ push2({ error: err instanceof Error ? err : new Error(String(err)) });
43626
+ });
43627
+ ws.send(JSON.stringify({ text: " " }));
43628
+ ws.send(JSON.stringify({ text }));
43629
+ ws.send(JSON.stringify({ text: "" }));
42105
43630
  while (true) {
42106
- const item = queue.length > 0 ? queue.shift() : await new Promise((resolve2) => waiters.push(resolve2));
43631
+ let frameTimer;
43632
+ const item = queue.length > 0 ? queue.shift() : await Promise.race([
43633
+ new Promise((resolve2) => waiters.push(resolve2)),
43634
+ new Promise((_, reject) => {
43635
+ frameTimer = setTimeout(
43636
+ () => reject(new Error("Telnyx TTS frame timeout")),
43637
+ FRAME_TIMEOUT_MS2
43638
+ );
43639
+ })
43640
+ ]).finally(() => {
43641
+ if (frameTimer !== void 0) clearTimeout(frameTimer);
43642
+ });
42107
43643
  if (item === null) return;
42108
43644
  if (typeof item === "object" && "error" in item) throw item.error;
42109
43645
  yield item;
42110
43646
  }
42111
43647
  } finally {
42112
43648
  try {
42113
- ws.close();
43649
+ ws?.close();
42114
43650
  } catch {
42115
43651
  }
43652
+ ws?.removeAllListeners();
42116
43653
  }
42117
43654
  }
42118
43655
  };
@@ -42160,6 +43697,7 @@ init_event_bus();
42160
43697
  GoogleLLM,
42161
43698
  GroqLLM,
42162
43699
  Guardrail,
43700
+ HermesLLM,
42163
43701
  IVRActivity,
42164
43702
  InworldTTS,
42165
43703
  KrispFrameDuration,
@@ -42170,6 +43708,8 @@ init_event_bus();
42170
43708
  MetricsStore,
42171
43709
  MinWordsStrategy,
42172
43710
  Ngrok,
43711
+ OpenAICompatibleLLM,
43712
+ OpenAICompatibleLLMProvider,
42173
43713
  OpenAILLM,
42174
43714
  OpenAILLMProvider,
42175
43715
  OpenAIRealtime,
@@ -42183,10 +43723,12 @@ init_event_bus();
42183
43723
  OpenAITranscribeSTT,
42184
43724
  OpenAITranscriptionModel,
42185
43725
  OpenAIVoice,
43726
+ OpenClawLLM,
42186
43727
  PRICING_LAST_UPDATED,
42187
43728
  PRICING_VERSION,
42188
43729
  PartialStreamError,
42189
43730
  Patter,
43731
+ PatterConfigError,
42190
43732
  PatterConnectionError,
42191
43733
  PatterError,
42192
43734
  PatterTool,
@@ -42274,6 +43816,8 @@ init_event_bus();
42274
43816
  mulawToPcm16,
42275
43817
  notifyDashboard,
42276
43818
  openaiTts,
43819
+ openclawConsult,
43820
+ openclawPostCallNotifier,
42277
43821
  pcm16ToMulaw,
42278
43822
  resample16kTo8k,
42279
43823
  resample24kTo16k,