getpatter 0.6.3 → 0.6.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -49,7 +49,7 @@ var init_cjs_shims = __esm({
49
49
  });
50
50
 
51
51
  // src/errors.ts
52
- var ErrorCode, PatterError, PatterConnectionError, AuthenticationError, ProvisionError, RateLimitError;
52
+ var ErrorCode, PatterError, PatterConfigError, PatterConnectionError, AuthenticationError, ProvisionError, RateLimitError;
53
53
  var init_errors = __esm({
54
54
  "src/errors.ts"() {
55
55
  "use strict";
@@ -85,6 +85,12 @@ var init_errors = __esm({
85
85
  this.code = options?.code ?? ErrorCode.INTERNAL;
86
86
  }
87
87
  };
88
+ PatterConfigError = class extends PatterError {
89
+ constructor(message, options) {
90
+ super(message, { code: options?.code ?? ErrorCode.CONFIG });
91
+ this.name = "PatterConfigError";
92
+ }
93
+ };
88
94
  PatterConnectionError = class extends PatterError {
89
95
  constructor(message, options) {
90
96
  super(message, { code: options?.code ?? ErrorCode.CONNECTION });
@@ -136,6 +142,45 @@ var init_logger = __esm({
136
142
  });
137
143
 
138
144
  // src/providers/openai-realtime.ts
145
+ function validateRealtimeTurnDetection(td) {
146
+ if (td === void 0) return;
147
+ if (td.type !== void 0 && td.type !== "server_vad" && td.type !== "semantic_vad") {
148
+ throw new Error(
149
+ `RealtimeTurnDetection.type must be 'server_vad' or 'semantic_vad', got ${JSON.stringify(td.type)}`
150
+ );
151
+ }
152
+ if (td.eagerness !== void 0 && td.eagerness !== "low" && td.eagerness !== "medium" && td.eagerness !== "high" && td.eagerness !== "auto") {
153
+ throw new Error(
154
+ `RealtimeTurnDetection.eagerness must be one of low|medium|high|auto, got ${JSON.stringify(td.eagerness)}`
155
+ );
156
+ }
157
+ if (td.eagerness !== void 0 && td.type !== "semantic_vad") {
158
+ throw new Error(
159
+ "RealtimeTurnDetection.eagerness is only valid when type='semantic_vad'"
160
+ );
161
+ }
162
+ }
163
+ function buildTurnDetection(td, opts) {
164
+ validateRealtimeTurnDetection(td);
165
+ let detection;
166
+ if (td?.type === "semantic_vad") {
167
+ detection = { type: "semantic_vad" };
168
+ if (td.eagerness !== void 0) detection.eagerness = td.eagerness;
169
+ } else {
170
+ detection = {
171
+ type: td?.type ?? opts.defaultType,
172
+ threshold: td?.threshold ?? 0.5,
173
+ prefix_padding_ms: td?.prefixPaddingMs ?? 300,
174
+ silence_duration_ms: td?.silenceDurationMs ?? opts.defaultSilenceMs
175
+ };
176
+ }
177
+ if (opts.includeResponseGating) {
178
+ const serverManaged = !(opts.gateResponseOnTranscript ?? false);
179
+ detection.create_response = serverManaged;
180
+ detection.interrupt_response = serverManaged;
181
+ }
182
+ return detection;
183
+ }
139
184
  function estimateAudioMs(chunk, format) {
140
185
  if (chunk.length === 0) return 0;
141
186
  if (format === OpenAIRealtimeAudioFormat.G711_ULAW || format === OpenAIRealtimeAudioFormat.G711_ALAW)
@@ -196,6 +241,7 @@ var init_openai_realtime = __esm({
196
241
  this.tools = tools;
197
242
  this.audioFormat = audioFormat;
198
243
  this.options = options;
244
+ this.gateResponseOnTranscript = options.gateResponseOnTranscript ?? false;
199
245
  }
200
246
  apiKey;
201
247
  model;
@@ -225,6 +271,23 @@ var init_openai_realtime = __esm({
225
271
  // could have produced, which is what the user actually heard.
226
272
  currentResponseFirstAudioAt = null;
227
273
  options;
274
+ // When true, the stream handler waits for the Whisper ``transcript_input``
275
+ // event before requesting the model response (legacy behavior). When false
276
+ // (default) the response is requested on ``speech_stopped`` and the
277
+ // transcript is display-only. Read by the stream handler via
278
+ // ``getGateResponseOnTranscript()``.
279
+ gateResponseOnTranscript;
280
+ /**
281
+ * Whether the stream handler should gate the model response on the Whisper
282
+ * transcript (legacy) or fire it on `speech_stopped` (default, decoupled).
283
+ *
284
+ * `false` (default) — the response is requested on `speech_stopped`,
285
+ * independently of Whisper. `true` — the response is requested only after
286
+ * `transcript_input` passes the hallucination filter.
287
+ */
288
+ getGateResponseOnTranscript() {
289
+ return this.gateResponseOnTranscript;
290
+ }
228
291
  /**
229
292
  * Build the production session.update body. Mirrors the body sent
230
293
  * inside `connect()` so warmup can apply identical configuration to
@@ -236,16 +299,26 @@ var init_openai_realtime = __esm({
236
299
  output_audio_format: this.audioFormat,
237
300
  voice: this.voice,
238
301
  instructions: this.instructions || "You are a helpful voice assistant. Be concise.",
239
- turn_detection: {
240
- type: this.options.vadType ?? OpenAIRealtimeVADType.SERVER_VAD,
241
- threshold: 0.5,
242
- prefix_padding_ms: 300,
243
- silence_duration_ms: this.options.silenceDurationMs ?? 300
244
- },
302
+ // v1 turn_detection carries NO create_response / interrupt_response
303
+ // keys. The v1 server defaults (`create_response: true`,
304
+ // `interrupt_response: true`) ARE the server-managed behaviour we want by
305
+ // default, so omitting them is equivalent to sending `true` — gating
306
+ // disabled here. `gateResponseOnTranscript` is still threaded through for
307
+ // symmetry with the GA builder, but has no wire effect while
308
+ // includeResponseGating is false.
309
+ turn_detection: buildTurnDetection(this.options.turnDetection, {
310
+ defaultType: this.options.vadType ?? OpenAIRealtimeVADType.SERVER_VAD,
311
+ defaultSilenceMs: this.options.silenceDurationMs ?? 300,
312
+ includeResponseGating: false,
313
+ gateResponseOnTranscript: this.gateResponseOnTranscript
314
+ }),
245
315
  input_audio_transcription: {
246
316
  model: this.options.inputAudioTranscriptionModel ?? OpenAITranscriptionModel.WHISPER_1
247
317
  }
248
318
  };
319
+ if (this.options.noiseReduction !== void 0) {
320
+ config2.input_audio_noise_reduction = { type: this.options.noiseReduction };
321
+ }
249
322
  if (this.options.temperature !== void 0) config2.temperature = this.options.temperature;
250
323
  if (this.options.maxResponseOutputTokens !== void 0) {
251
324
  config2.max_response_output_tokens = this.options.maxResponseOutputTokens;
@@ -509,6 +582,10 @@ var init_openai_realtime = __esm({
509
582
  };
510
583
  const timer = setTimeout(() => {
511
584
  cleanup();
585
+ try {
586
+ ws.close();
587
+ } catch {
588
+ }
512
589
  reject(new Error("OpenAI Realtime park connect timeout"));
513
590
  }, 8e3);
514
591
  ws.on("message", onMessage);
@@ -603,20 +680,33 @@ var init_openai_realtime = __esm({
603
680
  dispatch("error", { type: "socket_error", message: err?.message ?? String(err) });
604
681
  });
605
682
  }
606
- /** Truncate the in-flight assistant turn and cancel the active response.
683
+ /** Truncate the in-flight assistant turn's playback offset on the server.
684
+ *
685
+ * Sends ONLY ``conversation.item.truncate`` — no ``response.cancel``. This
686
+ * is the half of barge-in handling that a WebSocket transport MUST always
687
+ * perform: per OpenAI's docs, the GA server auto-truncates on barge-in only
688
+ * over WebRTC / SIP; on the WebSocket transport the client is responsible
689
+ * for telling the server how much of the assistant turn was actually heard.
690
+ * In server-managed mode (``interrupt_response: true``) the server already
691
+ * cancels the response itself, so issuing ``response.cancel`` here would be
692
+ * redundant / rejected — call this method, not {@link cancelResponse}.
607
693
  *
608
694
  * ``audio_end_ms`` MUST reflect what the caller actually heard, not what
609
695
  * the server generated. OpenAI streams audio at 5-10x real-time, so the
610
696
  * byte-derived counter overstates playback whenever the consumer cleared
611
- * its playout buffer (e.g. ``send_clear``) before the audio reached the
697
+ * its playout buffer (e.g. ``sendClear``) before the audio reached the
612
698
  * speaker. We bound the truncate point by wall-clock time since the first
613
699
  * chunk of this response — that's the physical maximum a 1x real-time
614
700
  * playback could have produced. Without this cap, OpenAI keeps the full
615
701
  * generated assistant text on the transcript, and the model replays /
616
702
  * resumes from it on the next turn — manifesting as re-greetings and
617
703
  * mid-sentence fragments after a barge-in storm.
704
+ *
705
+ * No-op when no response is in flight, keeping it idempotent across stale
706
+ * callers. Resets per-response tracking so post-truncate late frames and
707
+ * the next response start clean.
618
708
  */
619
- cancelResponse() {
709
+ truncate() {
620
710
  if (!this.ws) return;
621
711
  if (!this.currentResponseItemId) {
622
712
  return;
@@ -636,11 +726,31 @@ var init_openai_realtime = __esm({
636
726
  } catch (err) {
637
727
  getLogger().debug?.(`conversation.item.truncate failed: ${String(err)}`);
638
728
  }
639
- this.ws.send(JSON.stringify({ type: "response.cancel" }));
640
729
  this.currentResponseItemId = null;
641
730
  this.currentResponseAudioMs = 0;
642
731
  this.currentResponseFirstAudioAt = null;
643
732
  }
733
+ /** Truncate the in-flight assistant turn AND cancel the active response.
734
+ *
735
+ * Sends BOTH ``conversation.item.truncate`` (the played-offset bookkeeping)
736
+ * AND ``response.cancel``. Use this on the LEGACY client-managed barge-in
737
+ * path (``gateResponseOnTranscript`` true → ``interrupt_response: false``,
738
+ * so the server does NOT cancel for us) and for explicit cancels driven by
739
+ * Patter (e.g. on transfer / hangup). In server-managed mode call
740
+ * {@link truncate} instead — the server already cancels the response, and an
741
+ * extra ``response.cancel`` would be redundant / rejected.
742
+ *
743
+ * Truncation bounding semantics are identical to {@link truncate}; see its
744
+ * doc comment for the ``audio_end_ms`` wall-clock cap rationale.
745
+ */
746
+ cancelResponse() {
747
+ if (!this.ws) return;
748
+ if (!this.currentResponseItemId) {
749
+ return;
750
+ }
751
+ this.truncate();
752
+ this.ws.send(JSON.stringify({ type: "response.cancel" }));
753
+ }
644
754
  /** Inject a user text turn and request a new response. */
645
755
  async sendText(text) {
646
756
  this.ws?.send(JSON.stringify({
@@ -685,6 +795,32 @@ var init_openai_realtime = __esm({
685
795
  }
686
796
  }));
687
797
  }
798
+ /**
799
+ * Speak a short reassurance filler WITHOUT injecting a `role:user` turn.
800
+ *
801
+ * Same no-fake-turn shape as {@link sendFirstMessage}: a bare
802
+ * `response.create` carrying explicit `instructions`, so the filler is the
803
+ * assistant's own in-band audio. The reassurance scheduler in the
804
+ * stream-handler routes here instead of {@link sendText} — which would emit
805
+ * a `conversation.item.create` with `role:'user'` and falsely show the
806
+ * caller saying "One moment." in the transcript. Fillers must not imply
807
+ * success or failure.
808
+ *
809
+ * Uses `modalities: ['audio', 'text']` (v1-beta shape). The GA subclass
810
+ * {@link OpenAIRealtime2Adapter} overrides this with `output_modalities`
811
+ * and re-injects `audio.output.voice` so the GA endpoint does not reject
812
+ * the request. Mirrors Python `OpenAIRealtimeAdapter.send_reassurance` in
813
+ * `providers/openai_realtime.py`.
814
+ */
815
+ async sendReassurance(text) {
816
+ this.ws?.send(JSON.stringify({
817
+ type: "response.create",
818
+ response: {
819
+ modalities: ["audio", "text"],
820
+ instructions: `Say exactly this and nothing else: "${text}"`
821
+ }
822
+ }));
823
+ }
688
824
  /** Submit a tool/function-call result and request the next response. */
689
825
  async sendFunctionResult(callId, result) {
690
826
  this.ws?.send(JSON.stringify({
@@ -925,7 +1061,12 @@ var init_transcoding = __esm({
925
1061
  * Resets all state after flushing.
926
1062
  */
927
1063
  flush() {
928
- this.carry.flush();
1064
+ const carryTail = this.carry.flush();
1065
+ if (carryTail.length > 0) {
1066
+ getLogger().warn(
1067
+ "[patter] StatefulResampler.flush: trailing odd byte discarded \u2014 upstream produced odd-length PCM stream"
1068
+ );
1069
+ }
929
1070
  if (this.srcRate === 16e3 && this.dstRate === 8e3 && this.firPendingSample !== null) {
930
1071
  const s = this.firPendingSample;
931
1072
  const tmp = Buffer.alloc(4);
@@ -1165,44 +1306,46 @@ var init_openai_realtime_2 = __esm({
1165
1306
  buildGASessionConfig() {
1166
1307
  const opts = this.options;
1167
1308
  const fmt = { type: "audio/pcm", rate: 24e3 };
1309
+ const audioInput = {
1310
+ format: fmt,
1311
+ transcription: {
1312
+ model: opts.inputAudioTranscriptionModel ?? OpenAITranscriptionModel.WHISPER_1
1313
+ },
1314
+ // Response creation + barge-in cancellation (issue #154 — hand
1315
+ // turn-taking to the server by default):
1316
+ // - DEFAULT (`gateResponseOnTranscript` false → SERVER-MANAGED):
1317
+ // `create_response: true` lets the SERVER auto-create the response
1318
+ // when it commits the user's audio buffer
1319
+ // (`input_audio_buffer.committed`). `interrupt_response: true` lets the
1320
+ // SERVER cancel the in-flight response on its own VAD `speech_started`.
1321
+ // The e2e model replies immediately, in parallel with the Whisper
1322
+ // transcript — no transcript wait (~500 ms reclaimed), no client-side
1323
+ // race. On a WebSocket transport the client STILL must clear the
1324
+ // carrier buffer (`sendClear`) and `conversation.item.truncate` the
1325
+ // played offset on barge-in (the server only auto-truncates on
1326
+ // WebRTC/SIP), but it does NOT send `response.cancel`. Whisper is
1327
+ // display-only — it can never trigger / gate / cancel the response.
1328
+ // - LEGACY (`gateResponseOnTranscript` true → CLIENT-MANAGED opt-out):
1329
+ // `create_response: false` + `interrupt_response: false` so the stream
1330
+ // handler drives `response.create` (after the hallucination filter)
1331
+ // and `response.cancel` (on barge-in) itself. Escape hatch for no-AEC
1332
+ // PSTN self-interruption. Both keys are tied to the same switch inside
1333
+ // `buildTurnDetection`.
1334
+ turn_detection: buildTurnDetection(opts.turnDetection, {
1335
+ defaultType: opts.vadType ?? OpenAIRealtimeVADType.SERVER_VAD,
1336
+ defaultSilenceMs: opts.silenceDurationMs ?? 300,
1337
+ includeResponseGating: true,
1338
+ gateResponseOnTranscript: this.getGateResponseOnTranscript()
1339
+ })
1340
+ };
1341
+ if (opts.noiseReduction !== void 0) {
1342
+ audioInput.noise_reduction = { type: opts.noiseReduction };
1343
+ }
1168
1344
  const config2 = {
1169
1345
  type: "realtime",
1170
1346
  output_modalities: opts.modalities ?? ["audio"],
1171
1347
  audio: {
1172
- input: {
1173
- format: fmt,
1174
- transcription: {
1175
- model: opts.inputAudioTranscriptionModel ?? OpenAITranscriptionModel.WHISPER_1
1176
- },
1177
- // VAD threshold raised back to the OpenAI default (0.5) on
1178
- // 2026-05-22. The earlier 0.1 tuning (motivated by the
1179
- // upsampled telephony-band loss in high frequencies) made the
1180
- // server VAD trigger on the carrier-loopback echo of the
1181
- // agent's OWN outbound audio in PSTN no-AEC scenarios.
1182
- // Combined with the default ``turn_detection.create_response:
1183
- // true``, every phantom ``speech_started`` ended a turn early
1184
- // and auto-created a new response that the agent immediately
1185
- // spoke over, leading to a runaway loop where the first
1186
- // message was repeatedly cut and re-generated.
1187
- turn_detection: {
1188
- type: opts.vadType ?? OpenAIRealtimeVADType.SERVER_VAD,
1189
- threshold: 0.5,
1190
- prefix_padding_ms: 300,
1191
- silence_duration_ms: opts.silenceDurationMs ?? 500,
1192
- // Defer ``response.create`` to the application: when OpenAI's
1193
- // server VAD commits an ``input_audio_buffer.committed`` segment
1194
- // that turns out to be a Whisper hallucination on silence/echo,
1195
- // auto-creating a response would generate a phantom turn (the
1196
- // model reads the hallucinated text as user input). Patter
1197
- // triggers ``response.create`` explicitly in the Realtime
1198
- // stream-handler AFTER validating ``transcript_input`` against
1199
- // the hallucination filter. Pair with ``interrupt_response:
1200
- // false`` so server VAD also leaves in-flight responses alone —
1201
- // barge-in is gated client-side.
1202
- create_response: false,
1203
- interrupt_response: false
1204
- }
1205
- },
1348
+ input: audioInput,
1206
1349
  output: {
1207
1350
  format: fmt,
1208
1351
  voice: this.voice
@@ -1255,14 +1398,7 @@ var init_openai_realtime_2 = __esm({
1255
1398
  if (t && t in GA_TO_V1_EVENT_NAMES) {
1256
1399
  const newType = GA_TO_V1_EVENT_NAMES[t];
1257
1400
  if (t === "response.output_audio.delta" && typeof parsed.delta === "string") {
1258
- const mulaw = this.transcodeOutboundPcm24ToMulaw8Buffer(parsed.delta);
1259
- const FRAME_BYTES = 160;
1260
- if (mulaw.length === 0) return;
1261
- for (let off = 0; off < mulaw.length; off += FRAME_BYTES) {
1262
- const slice = mulaw.subarray(off, Math.min(off + FRAME_BYTES, mulaw.length));
1263
- const frame = { ...parsed, type: newType, delta: slice.toString("base64") };
1264
- handler(Buffer.from(JSON.stringify(frame)), ...rest);
1265
- }
1401
+ this.translateGaAudioDelta(parsed, handler, rest);
1266
1402
  return;
1267
1403
  }
1268
1404
  parsed.type = newType;
@@ -1291,6 +1427,7 @@ var init_openai_realtime_2 = __esm({
1291
1427
  sessionCreated = true;
1292
1428
  ws.send(JSON.stringify({ type: "session.update", session: this.buildGASessionConfig() }));
1293
1429
  } else if (msg.type === "session.updated") {
1430
+ this.warnIfOutputFormatUnexpected(msg);
1294
1431
  cleanup();
1295
1432
  resolve2();
1296
1433
  } else if (msg.type === "error") {
@@ -1396,6 +1533,10 @@ var init_openai_realtime_2 = __esm({
1396
1533
  };
1397
1534
  const timer = setTimeout(() => {
1398
1535
  cleanup();
1536
+ try {
1537
+ ws.close();
1538
+ } catch {
1539
+ }
1399
1540
  reject(new Error("OpenAI Realtime 2 park connect timeout"));
1400
1541
  }, 8e3);
1401
1542
  ws.on("message", onMessage);
@@ -1443,8 +1584,12 @@ var init_openai_realtime_2 = __esm({
1443
1584
  const parsed = JSON.parse(text);
1444
1585
  const t = parsed.type;
1445
1586
  if (t && Object.prototype.hasOwnProperty.call(GA_TO_V1_EVENT_NAMES, t)) {
1587
+ if (t === "response.output_audio.delta" && typeof parsed.delta === "string") {
1588
+ this.translateGaAudioDelta(parsed, handler, rest);
1589
+ return;
1590
+ }
1446
1591
  parsed.type = GA_TO_V1_EVENT_NAMES[t];
1447
- handler(JSON.stringify(parsed), ...rest);
1592
+ handler(Buffer.from(JSON.stringify(parsed)), ...rest);
1448
1593
  return;
1449
1594
  }
1450
1595
  } catch {
@@ -1529,6 +1674,55 @@ var init_openai_realtime_2 = __esm({
1529
1674
  }
1530
1675
  return out;
1531
1676
  }
1677
+ /**
1678
+ * Log-only safety net for issue #154. The GA server echoes the *effective*
1679
+ * session config in `session.updated`; we request `audio/pcm` @ 24 kHz and
1680
+ * transcode PCM24→mulaw8 ourselves (see
1681
+ * `transcodeOutboundPcm24ToMulaw8Buffer`). If a future GA schema change ever
1682
+ * made the server return a different output format, that transcode — which
1683
+ * assumes PCM16-LE @ 24 kHz — would silently corrupt audio, exactly the
1684
+ * v1-beta failure mode #154 fixed. Warn so the drift surfaces in logs instead
1685
+ * of as static. Never gates audio.
1686
+ */
1687
+ warnIfOutputFormatUnexpected(msg) {
1688
+ const fmt = msg?.session?.audio?.output?.format;
1689
+ if (!fmt || typeof fmt !== "object") return;
1690
+ if (fmt.type !== "audio/pcm" || fmt.rate != null && fmt.rate !== 24e3) {
1691
+ getLogger().warn(
1692
+ `OpenAI Realtime 2: server-echoed output format ${JSON.stringify(fmt)} differs from the requested audio/pcm@24000 \u2014 the outbound PCM24\u2192mulaw8 transcode assumes PCM16-LE 24 kHz, so carrier audio may be garbled (issue #154). Informational only; audio is not gated on this.`
1693
+ );
1694
+ }
1695
+ }
1696
+ /**
1697
+ * Shared audio-delta translation helper. Transcodes a GA
1698
+ * `response.output_audio.delta` payload (base64 PCM-16-LE 24 kHz)
1699
+ * into mulaw 8 kHz and splits the result into 160-byte (20 ms) frames,
1700
+ * dispatching one synthetic `response.audio.delta` event per frame.
1701
+ *
1702
+ * Called from BOTH the `connect()` shim and the `adoptWebSocket()` shim
1703
+ * so that warm-path (prewarm/adopted) calls receive identical transcoding
1704
+ * to cold-path calls. Without this, adopted sockets forwarded raw PCM-24
1705
+ * to Twilio/Telnyx, producing garbled or silent audio on every warm call.
1706
+ *
1707
+ * @param parsed - The parsed GA event object (type already checked to be
1708
+ * `response.output_audio.delta` with a string `delta`).
1709
+ * @param handler - The downstream message listener to dispatch each frame to.
1710
+ * @param rest - Extra arguments forwarded from the original `message` event.
1711
+ * @returns `true` if frames were dispatched (caller should return early),
1712
+ * `false` if the resampler is still warming up (zero output bytes).
1713
+ */
1714
+ translateGaAudioDelta(parsed, handler, rest) {
1715
+ const newType = GA_TO_V1_EVENT_NAMES["response.output_audio.delta"];
1716
+ const mulaw = this.transcodeOutboundPcm24ToMulaw8Buffer(parsed.delta);
1717
+ const FRAME_BYTES = 160;
1718
+ if (mulaw.length === 0) return false;
1719
+ for (let off = 0; off < mulaw.length; off += FRAME_BYTES) {
1720
+ const slice = mulaw.subarray(off, Math.min(off + FRAME_BYTES, mulaw.length));
1721
+ const frame = { ...parsed, type: newType, delta: slice.toString("base64") };
1722
+ handler(Buffer.from(JSON.stringify(frame)), ...rest);
1723
+ }
1724
+ return true;
1725
+ }
1532
1726
  /**
1533
1727
  * Base64 PCM-16-LE 24 kHz → Base64 mulaw 8 kHz. Used by the WS
1534
1728
  * translation shim on each `response.output_audio.delta`. The stateful
@@ -1558,6 +1752,34 @@ var init_openai_realtime_2 = __esm({
1558
1752
  }
1559
1753
  this.ws?.send(JSON.stringify({ type: "response.create", response: responseBody }));
1560
1754
  }
1755
+ /**
1756
+ * Speak a short reassurance filler WITHOUT injecting a `role:user` turn.
1757
+ *
1758
+ * GA-shape sibling of {@link sendFirstMessage} (and override of the base v1
1759
+ * {@link OpenAIRealtimeAdapter.sendReassurance}): a bare `response.create`
1760
+ * carrying explicit `instructions` so the filler is the assistant's own
1761
+ * in-band audio. No `conversation.item.create` with `role:"user"` is
1762
+ * emitted, so the transcript shows no phantom caller line. The GA endpoint
1763
+ * rejects `response.modalities` and does not inherit `audio.output.voice`
1764
+ * for an explicit `response.create`, so — exactly as in
1765
+ * {@link sendFirstMessage} — we send `output_modalities` and re-inject the
1766
+ * voice. Fillers must not imply success or failure.
1767
+ *
1768
+ * Mirrors Python `OpenAIRealtime2Adapter.send_reassurance` in
1769
+ * `providers/openai_realtime_2.py`.
1770
+ */
1771
+ async sendReassurance(text) {
1772
+ if (!this.ws) return;
1773
+ const responseBody = {
1774
+ output_modalities: ["audio"],
1775
+ audio: { output: { voice: this.voice } },
1776
+ instructions: `Say exactly this and nothing else: "${text}"`
1777
+ };
1778
+ if (this.options.reasoningEffort !== void 0) {
1779
+ responseBody.reasoning = { effort: this.options.reasoningEffort };
1780
+ }
1781
+ this.ws.send(JSON.stringify({ type: "response.create", response: responseBody }));
1782
+ }
1561
1783
  };
1562
1784
  }
1563
1785
  });
@@ -2345,11 +2567,25 @@ function calculateRealtimeCachedSavings(usage, pricing, model) {
2345
2567
  const rates = resolveProviderRates(pricing.openai_realtime, model);
2346
2568
  if (rates.unit !== "token") return 0;
2347
2569
  const input = usage.input_token_details ?? {};
2348
- const cached2 = input.cached_tokens_details ?? {};
2349
2570
  const cachedAudioRate = rates.cached_audio_input_per_token ?? rates.audio_input_per_token ?? 0;
2350
2571
  const cachedTextRate = rates.cached_text_input_per_token ?? rates.text_input_per_token ?? 0;
2351
- const cachedAudio = Math.min(cached2.audio_tokens ?? 0, input.audio_tokens ?? 0);
2352
- const cachedText = Math.min(cached2.text_tokens ?? 0, input.text_tokens ?? 0);
2572
+ const totalAudio = input.audio_tokens ?? 0;
2573
+ const totalText = input.text_tokens ?? 0;
2574
+ let cachedAudio;
2575
+ let cachedText;
2576
+ const details = input.cached_tokens_details;
2577
+ if (details && (details.audio_tokens !== void 0 || details.text_tokens !== void 0)) {
2578
+ cachedAudio = Math.min(details.audio_tokens ?? 0, totalAudio);
2579
+ cachedText = Math.min(details.text_tokens ?? 0, totalText);
2580
+ } else if (input.cached_tokens && input.cached_tokens > 0) {
2581
+ const totalIn = totalAudio + totalText;
2582
+ const ratio = totalIn > 0 ? input.cached_tokens / totalIn : 0;
2583
+ cachedAudio = Math.min(Math.round(totalAudio * ratio), totalAudio);
2584
+ cachedText = Math.min(Math.round(totalText * ratio), totalText);
2585
+ } else {
2586
+ cachedAudio = 0;
2587
+ cachedText = 0;
2588
+ }
2353
2589
  const fullAudio = cachedAudio * (rates.audio_input_per_token ?? 0);
2354
2590
  const fullText = cachedText * (rates.text_input_per_token ?? 0);
2355
2591
  const discountedAudio = cachedAudio * cachedAudioRate;
@@ -2797,8 +3033,8 @@ function loadTranscriptJsonl(filePath) {
2797
3033
  } catch {
2798
3034
  continue;
2799
3035
  }
2800
- const tsIso = typeof row.ts === "string" ? Date.parse(row.ts) : NaN;
2801
- const tsNumeric = typeof row.timestamp === "number" ? row.timestamp * 1e3 : NaN;
3036
+ const tsIso = typeof row.ts === "string" ? Date.parse(row.ts) / 1e3 : NaN;
3037
+ const tsNumeric = typeof row.timestamp === "number" ? row.timestamp : NaN;
2802
3038
  const timestamp = Number.isFinite(tsIso) ? tsIso : Number.isFinite(tsNumeric) ? tsNumeric : 0;
2803
3039
  const userText = typeof row.user_text === "string" ? row.user_text : "";
2804
3040
  const agentText = typeof row.agent_text === "string" ? row.agent_text : "";
@@ -2956,14 +3192,49 @@ var init_store = __esm({
2956
3192
  } else {
2957
3193
  for (let i = this.calls.length - 1; i >= 0; i--) {
2958
3194
  if (this.calls[i].call_id === callId) {
2959
- this.calls[i].status = status;
2960
- Object.assign(this.calls[i], extra);
3195
+ this.calls[i] = { ...this.calls[i], status, ...extra };
2961
3196
  break;
2962
3197
  }
2963
3198
  }
2964
3199
  }
2965
3200
  this.publish("call_status", { call_id: callId, status, ...extra });
2966
3201
  }
3202
+ /**
3203
+ * Record a single transcript line (user/assistant) as it becomes known.
3204
+ *
3205
+ * FIX-5 (issue #154): the live forward path for the dashboard transcript.
3206
+ * The Realtime stream handler calls this the moment each line is known — the
3207
+ * user line right after the hallucination filter accepts it, the assistant
3208
+ * line when its turn flushes — keyed by the monotonic ``turnIndex`` reserved
3209
+ * at turn-open (``reserveTurnIndex``). Each line is appended to the active
3210
+ * call's ``transcript`` array and broadcast over SSE as a ``transcript_line``
3211
+ * event so the dashboard can render lines as they arrive and re-sort by
3212
+ * ``(turnIndex, user<assistant)`` — making a late-arriving user line land
3213
+ * ABOVE its agent line. ``recordTurn`` de-dups against the lines pushed here
3214
+ * by ``(turnIndex, role)`` so the metrics path never double-pushes the same
3215
+ * text. Parity with Python ``record_transcript_line``.
3216
+ */
3217
+ recordTranscriptLine(data) {
3218
+ const callId = data.call_id || "";
3219
+ const { role, text, turnIndex } = data;
3220
+ if (!callId || role !== "user" && role !== "assistant" || !text) return;
3221
+ const active = this.activeCalls.get(callId);
3222
+ if (active) {
3223
+ if (!active.transcript) active.transcript = [];
3224
+ active.transcript.push({
3225
+ role,
3226
+ text,
3227
+ timestamp: Date.now() / 1e3,
3228
+ turnIndex
3229
+ });
3230
+ }
3231
+ this.publish("transcript_line", {
3232
+ call_id: callId,
3233
+ turnIndex,
3234
+ role,
3235
+ text
3236
+ });
3237
+ }
2967
3238
  /** Append a single conversation turn to an active call and broadcast it via SSE. */
2968
3239
  recordTurn(data) {
2969
3240
  const callId = data.call_id || "";
@@ -2978,14 +3249,19 @@ var init_store = __esm({
2978
3249
  const userText = typeof turnRecord.user_text === "string" ? turnRecord.user_text : "";
2979
3250
  const agentText = typeof turnRecord.agent_text === "string" ? turnRecord.agent_text : "";
2980
3251
  const ts = typeof turnRecord.timestamp === "number" ? turnRecord.timestamp : Date.now() / 1e3;
2981
- if (userText.length > 0) {
2982
- active.transcript.push({ role: "user", text: userText, timestamp: ts });
3252
+ const turnIndex = typeof turnRecord.turn_index === "number" ? turnRecord.turn_index : void 0;
3253
+ const alreadyLive = (role) => turnIndex !== void 0 && (active.transcript ?? []).some(
3254
+ (e) => e.turnIndex === turnIndex && e.role === role
3255
+ );
3256
+ if (userText.length > 0 && !alreadyLive("user")) {
3257
+ active.transcript.push({ role: "user", text: userText, timestamp: ts, turnIndex });
2983
3258
  }
2984
- if (agentText.length > 0 && agentText !== "[interrupted]") {
3259
+ if (agentText.length > 0 && agentText !== "[interrupted]" && !alreadyLive("assistant")) {
2985
3260
  active.transcript.push({
2986
3261
  role: "assistant",
2987
3262
  text: agentText,
2988
- timestamp: ts
3263
+ timestamp: ts,
3264
+ turnIndex
2989
3265
  });
2990
3266
  }
2991
3267
  }
@@ -3058,7 +3334,7 @@ var init_store = __esm({
3058
3334
  getCall(callId) {
3059
3335
  if (this.deletedCallIds.has(callId)) return null;
3060
3336
  for (let i = this.calls.length - 1; i >= 0; i--) {
3061
- if (this.calls[i].call_id === callId) return this.calls[i];
3337
+ if (this.calls[i].call_id === callId) return { ...this.calls[i] };
3062
3338
  }
3063
3339
  return null;
3064
3340
  }
@@ -3100,7 +3376,9 @@ var init_store = __esm({
3100
3376
  }
3101
3377
  if (accepted.length === 0) return [];
3102
3378
  accepted.sort();
3103
- this.persistDeletedIds();
3379
+ this.persistDeletedIds().catch(
3380
+ (err) => getLogger().debug(`MetricsStore.deleteCalls: persistDeletedIds failed: ${String(err)}`)
3381
+ );
3104
3382
  this.publish("calls_deleted", { call_ids: accepted });
3105
3383
  return accepted;
3106
3384
  }
@@ -3112,19 +3390,19 @@ var init_store = __esm({
3112
3390
  getDeletedCallIds() {
3113
3391
  return Array.from(this.deletedCallIds).sort();
3114
3392
  }
3115
- /** Atomically persist the deleted-ids set to disk. Best-effort. */
3116
- persistDeletedIds() {
3393
+ /** Atomically persist the deleted-ids set to disk. Best-effort async. */
3394
+ async persistDeletedIds() {
3117
3395
  if (this.deletedIdsPath === null) return;
3118
3396
  try {
3119
3397
  const dir = path2.dirname(this.deletedIdsPath);
3120
- fs2.mkdirSync(dir, { recursive: true });
3398
+ await fs2.promises.mkdir(dir, { recursive: true });
3121
3399
  const tmp = this.deletedIdsPath + ".tmp";
3122
3400
  const payload = {
3123
3401
  version: 1,
3124
3402
  deleted_call_ids: Array.from(this.deletedCallIds).sort()
3125
3403
  };
3126
- fs2.writeFileSync(tmp, JSON.stringify(payload, null, 2), "utf8");
3127
- fs2.renameSync(tmp, this.deletedIdsPath);
3404
+ await fs2.promises.writeFile(tmp, JSON.stringify(payload, null, 2), "utf8");
3405
+ await fs2.promises.rename(tmp, this.deletedIdsPath);
3128
3406
  } catch (err) {
3129
3407
  getLogger().debug(
3130
3408
  `MetricsStore.persistDeletedIds: ${String(err)}`
@@ -3133,7 +3411,8 @@ var init_store = __esm({
3133
3411
  }
3134
3412
  /** Look up an active call by id (returns undefined if not active or unknown). */
3135
3413
  getActive(callId) {
3136
- return this.activeCalls.get(callId);
3414
+ const rec = this.activeCalls.get(callId);
3415
+ return rec !== void 0 ? { ...rec } : void 0;
3137
3416
  }
3138
3417
  /** Return all currently active (not yet ended) calls. */
3139
3418
  getActiveCalls() {
@@ -3460,8 +3739,8 @@ function mountDashboard(app, store, token = "") {
3460
3739
  res.type("text/html").send(DASHBOARD_HTML);
3461
3740
  });
3462
3741
  app.get("/api/dashboard/calls", auth2, (req, res) => {
3463
- const limit = Math.min(parseInt(req.query.limit || "50", 10) || 50, 1e3);
3464
- const offset = parseInt(req.query.offset || "0", 10) || 0;
3742
+ const limit = Math.min(Math.max(0, parseInt(req.query.limit || "50", 10) || 50), 1e3);
3743
+ const offset = Math.max(0, parseInt(req.query.offset || "0", 10) || 0);
3465
3744
  res.json(store.getCalls(limit, offset));
3466
3745
  });
3467
3746
  app.get("/api/dashboard/calls/:callId", auth2, (req, res) => {
@@ -3551,8 +3830,8 @@ data: ${data}
3551
3830
  function mountApi(app, store, token = "") {
3552
3831
  const auth2 = makeAuthMiddleware(token);
3553
3832
  app.get("/api/v1/calls", auth2, (req, res) => {
3554
- const limit = Math.min(parseInt(req.query.limit || "50", 10) || 50, 1e3);
3555
- const offset = parseInt(req.query.offset || "0", 10) || 0;
3833
+ const limit = Math.min(Math.max(0, parseInt(req.query.limit || "50", 10) || 50), 1e3);
3834
+ const offset = Math.max(0, parseInt(req.query.offset || "0", 10) || 0);
3556
3835
  const calls = store.getCalls(limit, offset);
3557
3836
  res.json({
3558
3837
  data: calls,
@@ -3831,14 +4110,31 @@ var init_remote_message = __esm({
3831
4110
  while (chunks.length > 0) {
3832
4111
  yield chunks.shift();
3833
4112
  }
4113
+ const READ_TIMEOUT_MS = 3e4;
3834
4114
  while (!done && !error2) {
3835
- const text = await new Promise((resolve2) => {
4115
+ const messagePromise = new Promise((resolve2) => {
3836
4116
  if (chunks.length > 0) {
3837
4117
  resolve2(chunks.shift());
3838
4118
  } else {
3839
4119
  resolveNext = resolve2;
3840
4120
  }
3841
4121
  });
4122
+ let timeoutHandle;
4123
+ const timeoutPromise = new Promise((_, reject) => {
4124
+ timeoutHandle = setTimeout(
4125
+ () => reject(new Error("WebSocket read timeout: no frame received within 30 s")),
4126
+ READ_TIMEOUT_MS
4127
+ );
4128
+ });
4129
+ let text;
4130
+ try {
4131
+ text = await Promise.race([messagePromise, timeoutPromise]);
4132
+ } catch (timeoutErr) {
4133
+ resolveNext = null;
4134
+ throw timeoutErr;
4135
+ } finally {
4136
+ clearTimeout(timeoutHandle);
4137
+ }
3842
4138
  if (text === null) break;
3843
4139
  yield text;
3844
4140
  }
@@ -4080,18 +4376,6 @@ var init_deepgram_stt = __esm({
4080
4376
  } catch {
4081
4377
  return;
4082
4378
  }
4083
- const dataType = String(data.type ?? "unknown");
4084
- if (dataType === "Results") {
4085
- const transcript2 = (data.channel?.alternatives?.[0]?.transcript ?? "").trim();
4086
- const isFinal = Boolean(data.is_final);
4087
- const speechFinal2 = Boolean(data.speech_final);
4088
- const fromFinalize = Boolean(data.from_finalize);
4089
- getLogger().info(
4090
- `[DIAG] DG Results text=${JSON.stringify(transcript2.slice(0, 60))} isFinal=${isFinal} speechFinal=${speechFinal2} fromFinalize=${fromFinalize}`
4091
- );
4092
- } else if (dataType !== "Metadata") {
4093
- getLogger().info(`[DIAG] DG event type=${dataType}`);
4094
- }
4095
4379
  if (data.type === "Metadata" && data.request_id) {
4096
4380
  this.requestId = data.request_id;
4097
4381
  return;
@@ -4181,7 +4465,7 @@ var init_deepgram_stt = __esm({
4181
4465
  if (!this.ws || this.ws.readyState !== import_ws4.default.OPEN) {
4182
4466
  this.audioDroppedCount++;
4183
4467
  if (this.audioDroppedCount === 1 || this.audioDroppedCount % 50 === 0) {
4184
- getLogger().info(
4468
+ getLogger().debug(
4185
4469
  `[DIAG] DeepgramSTT.sendAudio dropped (ws state=${this.ws?.readyState ?? "null"}) \u2014 total dropped=${this.audioDroppedCount}`
4186
4470
  );
4187
4471
  }
@@ -4190,7 +4474,7 @@ var init_deepgram_stt = __esm({
4190
4474
  if (audio.length === 0) return;
4191
4475
  this.audioSentCount++;
4192
4476
  if (this.audioSentCount === 1 || this.audioSentCount % 100 === 0) {
4193
- getLogger().info(
4477
+ getLogger().debug(
4194
4478
  `[DIAG] DeepgramSTT.sendAudio: total chunks sent=${this.audioSentCount} (last=${audio.length} bytes)`
4195
4479
  );
4196
4480
  }
@@ -4228,16 +4512,16 @@ var init_deepgram_stt = __esm({
4228
4512
  finalize() {
4229
4513
  const ws = this.ws;
4230
4514
  if (!ws || ws.readyState !== import_ws4.default.OPEN) {
4231
- getLogger().info(
4515
+ getLogger().debug(
4232
4516
  `[DIAG] DeepgramSTT.finalize SKIPPED (ws state=${ws?.readyState ?? "null"})`
4233
4517
  );
4234
4518
  return;
4235
4519
  }
4236
4520
  try {
4237
4521
  ws.send(JSON.stringify({ type: "Finalize" }));
4238
- getLogger().info("[DIAG] DeepgramSTT.finalize sent {type:Finalize}");
4522
+ getLogger().debug("[DIAG] DeepgramSTT.finalize sent {type:Finalize}");
4239
4523
  } catch (err) {
4240
- getLogger().info(`[DIAG] DeepgramSTT.finalize send failed: ${String(err)}`);
4524
+ getLogger().debug(`[DIAG] DeepgramSTT.finalize send failed: ${String(err)}`);
4241
4525
  }
4242
4526
  }
4243
4527
  /** Send Finalize, briefly drain trailing transcripts, then close the socket. */
@@ -4317,6 +4601,7 @@ var init_metrics = __esm({
4317
4601
  _pricing;
4318
4602
  _callStart;
4319
4603
  _turns = [];
4604
+ // mutable internal array; immutable when exposed via TurnMetrics[] → readonly TurnMetrics[]
4320
4605
  // Per-turn timing state
4321
4606
  _turnStart = null;
4322
4607
  _sttComplete = null;
@@ -4403,6 +4688,16 @@ var init_metrics = __esm({
4403
4688
  * (the common cause of missing endpoint signals).
4404
4689
  */
4405
4690
  _endpointSignalMissingCount = 0;
4691
+ /**
4692
+ * Monotonic per-call turn counter. Reserved at turn OPEN
4693
+ * (``onAdapterSpeechStopped`` / ``speech_stopped``) via
4694
+ * ``reserveTurnIndex()`` and threaded through the buffering pipeline into
4695
+ * ``recordTurnComplete`` / ``recordTurnInterrupted`` as ``preReservedIndex``.
4696
+ * This makes ``turn_index`` stable under drops / interrupts (previously it
4697
+ * was assigned at completion as ``this._turns.length``, which shifted when a
4698
+ * turn was dropped). Parity with Python ``_next_turn_index``.
4699
+ */
4700
+ _nextTurnIndex = 0;
4406
4701
  constructor(opts) {
4407
4702
  this.callId = opts.callId;
4408
4703
  this.providerMode = opts.providerMode;
@@ -4451,12 +4746,27 @@ var init_metrics = __esm({
4451
4746
  this._turnUserText = "";
4452
4747
  this._turnSttAudioSeconds = 0;
4453
4748
  this._turnAlreadyClosed = false;
4749
+ this._initialTtfbEmitted = false;
4454
4750
  this._vadStoppedAt = null;
4455
4751
  this._sttFinalAt = null;
4456
4752
  this._turnCommittedAt = null;
4457
4753
  this._onUserTurnCompletedDelayMs = null;
4458
4754
  this._eventBus?.emit("turn_started", { callId: this.callId });
4459
4755
  }
4756
+ /**
4757
+ * Reserve and return the next monotonic turn index.
4758
+ *
4759
+ * Called once per turn at the moment the turn OPENS (Realtime:
4760
+ * ``onAdapterSpeechStopped``). The returned index is threaded through the
4761
+ * buffering pipeline and handed back to ``recordTurnComplete`` /
4762
+ * ``recordTurnInterrupted`` as ``preReservedIndex`` so the emitted
4763
+ * ``turn_index`` matches the live per-line transcript ordering even when a
4764
+ * turn is dropped or interrupted between open and close. Parity with Python
4765
+ * ``reserve_turn_index``.
4766
+ */
4767
+ reserveTurnIndex() {
4768
+ return this._nextTurnIndex++;
4769
+ }
4460
4770
  /**
4461
4771
  * Start a new turn only if no turn is currently open.
4462
4772
  * Use this at inbound-audio ingestion points so the turn timer begins
@@ -4494,6 +4804,7 @@ var init_metrics = __esm({
4494
4804
  anchorUserSpeechStart() {
4495
4805
  if (this._turnCommittedMono !== null) return;
4496
4806
  this._turnStart = hrTimeMs();
4807
+ this._turnAlreadyClosed = false;
4497
4808
  this._endpointSignalAt = null;
4498
4809
  this._vadStoppedAt = null;
4499
4810
  this._sttFinalAt = null;
@@ -4617,11 +4928,14 @@ var init_metrics = __esm({
4617
4928
  * ``user_text=''``. The caller treats ``null`` as "nothing to emit";
4618
4929
  * ``emitTurnMetrics`` is already null-safe.
4619
4930
  */
4620
- recordTurnComplete(agentText) {
4931
+ recordTurnComplete(agentText, preReservedIndex) {
4621
4932
  if (this._turnAlreadyClosed) return null;
4622
4933
  const latency = this._computeTurnLatency();
4623
4934
  const turn = {
4624
- turn_index: this._turns.length,
4935
+ // Use the pre-reserved index (stable across drops/interrupts) when the
4936
+ // caller threaded one through; otherwise fall back to the append
4937
+ // position for back-compat with callers that never reserved.
4938
+ turn_index: preReservedIndex ?? this._turns.length,
4625
4939
  user_text: this._turnUserText,
4626
4940
  agent_text: agentText,
4627
4941
  latency,
@@ -4630,10 +4944,10 @@ var init_metrics = __esm({
4630
4944
  timestamp: Date.now() / 1e3
4631
4945
  };
4632
4946
  this._turns.push(turn);
4633
- this._resetTurnState();
4634
- this._turnAlreadyClosed = true;
4635
4947
  this._eventBus?.emit("turn_ended", { callId: this.callId, turn });
4636
4948
  this._eventBus?.emit("metrics_collected", { callId: this.callId, turn });
4949
+ this._resetTurnState();
4950
+ this._turnAlreadyClosed = true;
4637
4951
  return turn;
4638
4952
  }
4639
4953
  /**
@@ -4645,12 +4959,12 @@ var init_metrics = __esm({
4645
4959
  * a future refactor that reorders the bargein + LLM-unwind paths)
4646
4960
  * from overwriting a turn that the complete path already emitted.
4647
4961
  */
4648
- recordTurnInterrupted() {
4962
+ recordTurnInterrupted(preReservedIndex) {
4649
4963
  if (this._turnStart === null) return null;
4650
4964
  if (this._turnAlreadyClosed) return null;
4651
4965
  const latency = this._computeTurnLatency();
4652
4966
  const turn = {
4653
- turn_index: this._turns.length,
4967
+ turn_index: preReservedIndex ?? this._turns.length,
4654
4968
  user_text: this._turnUserText,
4655
4969
  agent_text: "[interrupted]",
4656
4970
  latency,
@@ -4702,8 +5016,10 @@ var init_metrics = __esm({
4702
5016
  }
4703
5017
  /**
4704
5018
  * Record the delta (ms) between turn-committed and when on_user_turn_completed
4705
- * pipeline hook finished. Stored for inclusion in the next ``emitEouMetrics``
4706
- * call (or an explicit re-emit if desired).
5019
+ * pipeline hook finished. Does NOT re-emit: like Python's
5020
+ * ``record_on_user_turn_completed_delay``, this only stores the value; the
5021
+ * single EOU emission happens on ``recordTurnCommitted`` (3-timestamp guard,
5022
+ * delay defaults to 0 if not yet recorded).
4707
5023
  */
4708
5024
  recordOnUserTurnCompletedDelay(delayMs) {
4709
5025
  this._onUserTurnCompletedDelayMs = delayMs;
@@ -4716,7 +5032,7 @@ var init_metrics = __esm({
4716
5032
  * ``transcriptionDelay`` = turnCommitted − vadStopped (ms)
4717
5033
  * ``onUserTurnCompletedDelay`` = caller-supplied delta (ms) or 0
4718
5034
  */
4719
- /** Emit `EOUMetrics` once VAD-stop, STT-final, and turn-committed timestamps are all known. */
5035
+ /** Emit `EOUMetrics` once VAD-stop, STT-final, turn-committed, and on_user_turn_completed delay are all known. */
4720
5036
  emitEouMetrics() {
4721
5037
  if (this._vadStoppedAt === null || this._sttFinalAt === null || this._turnCommittedAt === null) {
4722
5038
  return;
@@ -5265,10 +5581,13 @@ var init_circuit_breaker = __esm({
5265
5581
  if (s.state === CircuitBreakerState.OPEN) {
5266
5582
  if (this.clock() - s.openedAt >= this.cooldownMs) {
5267
5583
  s.state = CircuitBreakerState.HALF_OPEN;
5584
+ s.probeInFlight = true;
5268
5585
  return true;
5269
5586
  }
5270
5587
  return false;
5271
5588
  }
5589
+ if (s.probeInFlight) return false;
5590
+ s.probeInFlight = true;
5272
5591
  return true;
5273
5592
  }
5274
5593
  /** Mark a successful execution. Resets the breaker to CLOSED. */
@@ -5278,19 +5597,21 @@ var init_circuit_breaker = __esm({
5278
5597
  s.state = CircuitBreakerState.CLOSED;
5279
5598
  s.consecutiveFailures = 0;
5280
5599
  s.openedAt = 0;
5600
+ s.probeInFlight = false;
5281
5601
  }
5282
5602
  /** Mark a failed execution; trips OPEN once threshold is reached. */
5283
5603
  recordFailure(toolName) {
5284
5604
  if (this.threshold <= 0) return;
5285
5605
  let s = this.state.get(toolName);
5286
5606
  if (!s) {
5287
- s = { state: CircuitBreakerState.CLOSED, consecutiveFailures: 0, openedAt: 0 };
5607
+ s = { state: CircuitBreakerState.CLOSED, consecutiveFailures: 0, openedAt: 0, probeInFlight: false };
5288
5608
  this.state.set(toolName, s);
5289
5609
  }
5290
5610
  s.consecutiveFailures += 1;
5291
5611
  if (s.consecutiveFailures >= this.threshold) {
5292
5612
  s.state = CircuitBreakerState.OPEN;
5293
5613
  s.openedAt = this.clock();
5614
+ s.probeInFlight = false;
5294
5615
  }
5295
5616
  }
5296
5617
  /**
@@ -5314,6 +5635,10 @@ var init_circuit_breaker = __esm({
5314
5635
  });
5315
5636
 
5316
5637
  // src/llm-loop.ts
5638
+ function resolveToolTimeoutMs(toolTimeoutMs, defaultMs) {
5639
+ if (toolTimeoutMs === void 0) return defaultMs;
5640
+ return Math.max(100, Math.min(toolTimeoutMs, MAX_TOOL_TIMEOUT_MS));
5641
+ }
5317
5642
  async function invokeHandler(handler, args, callContext, onProgress) {
5318
5643
  const invoked = handler(args, callContext);
5319
5644
  if (invoked && typeof invoked === "object" && typeof invoked[Symbol.asyncIterator] === "function" && typeof invoked.next === "function") {
@@ -5372,7 +5697,7 @@ function mergeAbortSignals(...signals) {
5372
5697
  }
5373
5698
  return controller.signal;
5374
5699
  }
5375
- var DEFAULT_TOOL_MAX_RETRIES, DEFAULT_TOOL_RETRY_DELAY_MS, DEFAULT_TOOL_TIMEOUT_MS, TOOL_MAX_RESPONSE_BYTES, DefaultToolExecutor, OpenAILLMProvider, DEFAULT_PHONE_PREAMBLE, LLMLoop;
5700
+ var DEFAULT_TOOL_MAX_RETRIES, DEFAULT_TOOL_RETRY_DELAY_MS, DEFAULT_TOOL_TIMEOUT_MS, MAX_TOOL_TIMEOUT_MS, TOOL_MAX_RESPONSE_BYTES, ToolTimeoutError, DefaultToolExecutor, OpenAILLMProvider, DEFAULT_PHONE_PREAMBLE, LLMLoop;
5376
5701
  var init_llm_loop = __esm({
5377
5702
  "src/llm-loop.ts"() {
5378
5703
  "use strict";
@@ -5385,7 +5710,14 @@ var init_llm_loop = __esm({
5385
5710
  DEFAULT_TOOL_MAX_RETRIES = 2;
5386
5711
  DEFAULT_TOOL_RETRY_DELAY_MS = 500;
5387
5712
  DEFAULT_TOOL_TIMEOUT_MS = 1e4;
5713
+ MAX_TOOL_TIMEOUT_MS = 3e5;
5388
5714
  TOOL_MAX_RESPONSE_BYTES = 1 * 1024 * 1024;
5715
+ ToolTimeoutError = class extends Error {
5716
+ constructor(message) {
5717
+ super(message);
5718
+ this.name = "ToolTimeoutError";
5719
+ }
5720
+ };
5389
5721
  DefaultToolExecutor = class {
5390
5722
  maxRetries;
5391
5723
  retryDelayMs;
@@ -5411,15 +5743,41 @@ var init_llm_loop = __esm({
5411
5743
  retry_after_ms: cooldown
5412
5744
  });
5413
5745
  }
5746
+ const effectiveTimeoutMs = resolveToolTimeoutMs(
5747
+ toolDef.timeoutMs,
5748
+ this.requestTimeoutMs
5749
+ );
5414
5750
  if (toolDef.handler) {
5415
5751
  const totalAttempts = this.maxRetries + 1;
5416
5752
  let lastErr = null;
5417
5753
  for (let attempt = 0; attempt < totalAttempts; attempt++) {
5754
+ let timeoutTimer;
5418
5755
  try {
5419
- const result = await invokeHandler(toolDef.handler, args, callContext, onProgress);
5756
+ const handlerPromise = invokeHandler(toolDef.handler, args, callContext, onProgress);
5757
+ const result = await Promise.race([
5758
+ handlerPromise,
5759
+ new Promise((_, reject) => {
5760
+ timeoutTimer = setTimeout(
5761
+ () => reject(
5762
+ new ToolTimeoutError(
5763
+ `Tool handler '${toolDef.name}' timed out after ${effectiveTimeoutMs}ms`
5764
+ )
5765
+ ),
5766
+ effectiveTimeoutMs
5767
+ );
5768
+ })
5769
+ ]);
5420
5770
  this.breaker.recordSuccess(toolDef.name);
5421
5771
  return result;
5422
5772
  } catch (e) {
5773
+ if (e instanceof ToolTimeoutError) {
5774
+ getLogger().error(String(e));
5775
+ this.breaker.recordFailure(toolDef.name);
5776
+ return JSON.stringify({
5777
+ error: String(e),
5778
+ fallback: true
5779
+ });
5780
+ }
5423
5781
  lastErr = e;
5424
5782
  if (attempt < totalAttempts - 1) {
5425
5783
  getLogger().warn(
@@ -5427,6 +5785,8 @@ var init_llm_loop = __esm({
5427
5785
  );
5428
5786
  await new Promise((r) => setTimeout(r, backoffDelayMs(this.retryDelayMs, attempt)));
5429
5787
  }
5788
+ } finally {
5789
+ if (timeoutTimer !== void 0) clearTimeout(timeoutTimer);
5430
5790
  }
5431
5791
  }
5432
5792
  this.breaker.recordFailure(toolDef.name);
@@ -5463,7 +5823,10 @@ var init_llm_loop = __esm({
5463
5823
  ...callContext,
5464
5824
  attempt: attempt + 1
5465
5825
  }),
5466
- signal: AbortSignal.timeout(this.requestTimeoutMs)
5826
+ // Use per-tool timeout when set, otherwise fall back to
5827
+ // the executor-level default. Mirrors Python's per-request
5828
+ // ``timeout=`` override on httpx.AsyncClient.post().
5829
+ signal: AbortSignal.timeout(effectiveTimeoutMs)
5467
5830
  });
5468
5831
  if (!resp.ok) throw new Error(`HTTP ${resp.status}`);
5469
5832
  const result = JSON.stringify(await resp.json());
@@ -5589,7 +5952,7 @@ var init_llm_loop = __esm({
5589
5952
  body.tools = tools;
5590
5953
  }
5591
5954
  const signal = mergeAbortSignals(opts?.signal, AbortSignal.timeout(3e4));
5592
- const response = await fetch("https://api.openai.com/v1/chat/completions", {
5955
+ const response = await fetch(`${this.baseUrl}/chat/completions`, {
5593
5956
  method: "POST",
5594
5957
  headers: {
5595
5958
  "Content-Type": "application/json",
@@ -5609,50 +5972,55 @@ var init_llm_loop = __esm({
5609
5972
  if (!reader) return;
5610
5973
  const decoder = new TextDecoder();
5611
5974
  let buffer = "";
5612
- while (true) {
5613
- const { done, value } = await reader.read();
5614
- if (done) break;
5615
- buffer += decoder.decode(value, { stream: true });
5616
- const lines = buffer.split("\n");
5617
- buffer = lines.pop() || "";
5618
- for (const line of lines) {
5619
- const trimmed = line.trim();
5620
- if (!trimmed || !trimmed.startsWith("data: ")) continue;
5621
- const data = trimmed.slice(6);
5622
- if (data === "[DONE]") continue;
5623
- let chunk;
5624
- try {
5625
- chunk = JSON.parse(data);
5626
- } catch {
5627
- continue;
5628
- }
5629
- if (chunk.usage) {
5630
- const cached2 = chunk.usage.prompt_tokens_details?.cached_tokens ?? 0;
5631
- const uncachedInput = Math.max(0, (chunk.usage.prompt_tokens ?? 0) - cached2);
5632
- yield {
5633
- type: "usage",
5634
- inputTokens: uncachedInput,
5635
- outputTokens: chunk.usage.completion_tokens,
5636
- cacheReadInputTokens: cached2
5637
- };
5638
- }
5639
- const delta = chunk.choices?.[0]?.delta;
5640
- if (!delta) continue;
5641
- if (delta.content) {
5642
- yield { type: "text", content: delta.content };
5643
- }
5644
- if (delta.tool_calls) {
5645
- for (const tc of delta.tool_calls) {
5975
+ try {
5976
+ while (true) {
5977
+ const { done, value } = await reader.read();
5978
+ if (done) break;
5979
+ buffer += decoder.decode(value, { stream: true });
5980
+ const lines = buffer.split("\n");
5981
+ buffer = lines.pop() || "";
5982
+ for (const line of lines) {
5983
+ const trimmed = line.trim();
5984
+ if (!trimmed || !trimmed.startsWith("data: ")) continue;
5985
+ const data = trimmed.slice(6);
5986
+ if (data === "[DONE]") continue;
5987
+ let chunk;
5988
+ try {
5989
+ chunk = JSON.parse(data);
5990
+ } catch {
5991
+ continue;
5992
+ }
5993
+ if (chunk.usage) {
5994
+ const cached2 = chunk.usage.prompt_tokens_details?.cached_tokens ?? 0;
5995
+ const uncachedInput = Math.max(0, (chunk.usage.prompt_tokens ?? 0) - cached2);
5646
5996
  yield {
5647
- type: "tool_call",
5648
- index: tc.index,
5649
- id: tc.id,
5650
- name: tc.function?.name,
5651
- arguments: tc.function?.arguments
5997
+ type: "usage",
5998
+ inputTokens: uncachedInput,
5999
+ outputTokens: chunk.usage.completion_tokens,
6000
+ cacheReadInputTokens: cached2
5652
6001
  };
5653
6002
  }
6003
+ const delta = chunk.choices?.[0]?.delta;
6004
+ if (!delta) continue;
6005
+ if (delta.content) {
6006
+ yield { type: "text", content: delta.content };
6007
+ }
6008
+ if (delta.tool_calls) {
6009
+ for (const tc of delta.tool_calls) {
6010
+ yield {
6011
+ type: "tool_call",
6012
+ index: tc.index,
6013
+ id: tc.id,
6014
+ name: tc.function?.name,
6015
+ arguments: tc.function?.arguments
6016
+ };
6017
+ }
6018
+ }
5654
6019
  }
5655
6020
  }
6021
+ } finally {
6022
+ reader.cancel().catch(() => {
6023
+ });
5656
6024
  }
5657
6025
  }
5658
6026
  };
@@ -5786,7 +6154,7 @@ ${systemPrompt}` : DEFAULT_PHONE_PREAMBLE;
5786
6154
  chunk.inputTokens ?? 0,
5787
6155
  chunk.outputTokens ?? 0,
5788
6156
  chunk.cacheReadInputTokens ?? 0,
5789
- chunk.cacheCreationInputTokens ?? 0
6157
+ chunk.cacheWriteInputTokens ?? 0
5790
6158
  );
5791
6159
  } else if (chunk.type === "tool_call") {
5792
6160
  hasToolCalls = true;
@@ -6180,8 +6548,8 @@ function getElementAtPath(obj, path6) {
6180
6548
  }
6181
6549
  function promiseAllObject(promisesObj) {
6182
6550
  const keys = Object.keys(promisesObj);
6183
- const promises = keys.map((key) => promisesObj[key]);
6184
- return Promise.all(promises).then((results) => {
6551
+ const promises2 = keys.map((key) => promisesObj[key]);
6552
+ return Promise.all(promises2).then((results) => {
6185
6553
  const resolvedObj = {};
6186
6554
  for (let i = 0; i < keys.length; i++) {
6187
6555
  resolvedObj[keys[i]] = results[i];
@@ -23972,6 +24340,8 @@ var init_mcp_client = __esm({
23972
24340
  "use strict";
23973
24341
  init_cjs_shims();
23974
24342
  init_logger();
24343
+ init_server();
24344
+ init_version();
23975
24345
  MCPManager = class {
23976
24346
  configs;
23977
24347
  connected = [];
@@ -23997,10 +24367,16 @@ var init_mcp_client = __esm({
23997
24367
  }
23998
24368
  const aggregatedTools = [];
23999
24369
  for (const cfg of this.configs) {
24370
+ try {
24371
+ validateWebhookUrl(cfg.url);
24372
+ } catch (e) {
24373
+ getLogger().error(`MCP server '${cfg.name}' (${cfg.url}) rejected by SSRF guard: ${String(e)}`);
24374
+ continue;
24375
+ }
24000
24376
  const transport = new transportModule.StreamableHTTPClientTransport(new URL(cfg.url), {
24001
24377
  requestInit: { headers: cfg.headers }
24002
24378
  });
24003
- const client = new mcpModule.Client({ name: "patter", version: "0.6.0" });
24379
+ const client = new mcpModule.Client({ name: "patter", version: VERSION });
24004
24380
  try {
24005
24381
  await client.connect(transport);
24006
24382
  } catch (e) {
@@ -24074,6 +24450,276 @@ var init_mcp_client = __esm({
24074
24450
  }
24075
24451
  });
24076
24452
 
24453
+ // src/consult.ts
24454
+ function isLoopbackOrPrivateHost(baseUrl) {
24455
+ let host;
24456
+ try {
24457
+ host = new URL(baseUrl).hostname.toLowerCase();
24458
+ } catch {
24459
+ return false;
24460
+ }
24461
+ if (host.startsWith("[") && host.endsWith("]")) host = host.slice(1, -1);
24462
+ if (host === "localhost" || host === "0.0.0.0" || host === "::1") return true;
24463
+ if (host.endsWith(".local")) return true;
24464
+ if (/^127\./.test(host) || /^10\./.test(host) || /^192\.168\./.test(host)) return true;
24465
+ if (/^169\.254\./.test(host)) return true;
24466
+ const m = host.match(/^172\.(\d+)\./);
24467
+ if (m) {
24468
+ const octet = Number(m[1]);
24469
+ if (octet >= 16 && octet <= 31) return true;
24470
+ }
24471
+ if (host.includes(":") && (/^f[cd][0-9a-f]{2}:/.test(host) || /^fe[89ab][0-9a-f]:/.test(host))) {
24472
+ return true;
24473
+ }
24474
+ return false;
24475
+ }
24476
+ function openclawConsult(agent, opts = {}) {
24477
+ if (!agent || !OPENCLAW_AGENT_RE.test(agent)) {
24478
+ throw new Error(
24479
+ "OpenClaw agent must be a non-empty id of letters, digits, and ._:/- only"
24480
+ );
24481
+ }
24482
+ const baseUrl = opts.baseUrl ?? OPENCLAW_DEFAULT_BASE_URL;
24483
+ const model = agent.includes("/") || agent.includes(":") ? agent : `openclaw/${agent}`;
24484
+ return {
24485
+ openaiCompatible: {
24486
+ baseUrl,
24487
+ model,
24488
+ apiKey: opts.apiKey,
24489
+ apiKeyEnv: OPENCLAW_API_KEY_ENV,
24490
+ sessionHeader: OPENCLAW_SESSION_HEADER
24491
+ },
24492
+ timeoutMs: opts.timeoutMs ?? DEFAULT_TIMEOUT_MS,
24493
+ toolName: opts.toolName ?? DEFAULT_TOOL_NAME,
24494
+ description: opts.description ?? OPENCLAW_DESCRIPTION,
24495
+ reassurance: opts.reassurance ?? OPENCLAW_REASSURANCE,
24496
+ headers: opts.headers,
24497
+ allowLoopback: opts.allowLoopback ?? isLoopbackOrPrivateHost(baseUrl)
24498
+ };
24499
+ }
24500
+ function buildConsultTool(config2) {
24501
+ const hasUrl = config2.url != null;
24502
+ const hasOpenAI = config2.openaiCompatible != null;
24503
+ if (hasUrl === hasOpenAI) {
24504
+ throw new Error("ConsultConfig requires exactly one of url or openaiCompatible");
24505
+ }
24506
+ const timeoutMs = config2.timeoutMs ?? DEFAULT_TIMEOUT_MS;
24507
+ const baseHeaders = {
24508
+ ...config2.headers ?? {},
24509
+ "Content-Type": "application/json"
24510
+ };
24511
+ const handler = hasOpenAI ? buildOpenAIHandler(config2.openaiCompatible, baseHeaders, timeoutMs, config2.allowLoopback ?? false) : buildWebhookHandler(config2.url, baseHeaders, timeoutMs, config2.allowLoopback ?? false);
24512
+ const tool2 = {
24513
+ name: config2.toolName ?? DEFAULT_TOOL_NAME,
24514
+ description: config2.description ?? DEFAULT_DESCRIPTION,
24515
+ parameters: PARAMETERS,
24516
+ handler
24517
+ };
24518
+ return config2.reassurance != null ? { ...tool2, reassurance: config2.reassurance } : tool2;
24519
+ }
24520
+ function buildWebhookHandler(url2, headers, timeoutMs, allowLoopback) {
24521
+ validateWebhookUrl(url2, allowLoopback);
24522
+ return async (args, context) => {
24523
+ const requestText = typeof args?.request === "string" ? args.request : "";
24524
+ const payload = {
24525
+ request: requestText,
24526
+ call_id: context?.call_id ?? "",
24527
+ caller: context?.caller ?? "",
24528
+ callee: context?.callee ?? ""
24529
+ };
24530
+ let body;
24531
+ try {
24532
+ const resp = await fetch(url2, {
24533
+ method: "POST",
24534
+ headers,
24535
+ body: JSON.stringify(payload),
24536
+ signal: AbortSignal.timeout(timeoutMs)
24537
+ });
24538
+ if (!resp.ok) {
24539
+ getLogger().warn(`consult tool: orchestrator returned HTTP ${resp.status}`);
24540
+ return GRACEFUL_FALLBACK;
24541
+ }
24542
+ body = (await resp.text()).slice(0, MAX_RESPONSE_CHARS);
24543
+ } catch (e) {
24544
+ getLogger().warn(
24545
+ `consult tool: orchestrator call failed: ${e instanceof Error ? e.name : "error"}`
24546
+ );
24547
+ return GRACEFUL_FALLBACK;
24548
+ }
24549
+ try {
24550
+ const data = JSON.parse(body);
24551
+ if (data && typeof data === "object" && !Array.isArray(data)) {
24552
+ const obj = data;
24553
+ for (const key of REPLY_KEYS) {
24554
+ if (typeof obj[key] === "string") return obj[key];
24555
+ }
24556
+ }
24557
+ return JSON.stringify(data);
24558
+ } catch {
24559
+ return body;
24560
+ }
24561
+ };
24562
+ }
24563
+ function buildOpenAIHandler(oc, baseHeaders, timeoutMs, allowLoopback) {
24564
+ const endpoint = oc.baseUrl.replace(/\/+$/, "") + "/chat/completions";
24565
+ validateWebhookUrl(endpoint, allowLoopback);
24566
+ const apiKey = oc.apiKey ?? (oc.apiKeyEnv ? process.env[oc.apiKeyEnv] : void 0);
24567
+ const headers = { ...baseHeaders };
24568
+ if (apiKey) headers.Authorization = `Bearer ${apiKey}`;
24569
+ const sessionHeader = oc.sessionHeader;
24570
+ const model = oc.model;
24571
+ return async (args, context) => {
24572
+ const requestText = typeof args?.request === "string" ? args.request : "";
24573
+ const callId = context?.call_id ?? "";
24574
+ const caller = context?.caller ?? "";
24575
+ const callee = context?.callee ?? "";
24576
+ const contextLines = ["You are answering an inbound phone call relayed by a voice agent."];
24577
+ if (caller) contextLines.push(`Caller: ${caller}`);
24578
+ if (callee) contextLines.push(`Line dialed: ${callee}`);
24579
+ contextLines.push(
24580
+ "Reply concisely in a spoken, conversational style \u2014 it is read aloud to the caller."
24581
+ );
24582
+ const reqHeaders = { ...headers };
24583
+ if (sessionHeader && callId) reqHeaders[sessionHeader] = callId;
24584
+ const payload = {
24585
+ model,
24586
+ messages: [
24587
+ { role: "system", content: contextLines.join("\n") },
24588
+ { role: "user", content: requestText }
24589
+ ],
24590
+ stream: false
24591
+ };
24592
+ if (callId) payload.user = callId;
24593
+ try {
24594
+ const resp = await fetch(endpoint, {
24595
+ method: "POST",
24596
+ headers: reqHeaders,
24597
+ body: JSON.stringify(payload),
24598
+ signal: AbortSignal.timeout(timeoutMs)
24599
+ });
24600
+ if (resp.status === 404) {
24601
+ getLogger().warn(
24602
+ "consult tool: OpenAI-compatible endpoint returned 404 \u2014 is it enabled? (OpenClaw: set gateway.http.endpoints.chatCompletions.enabled = true)"
24603
+ );
24604
+ return GRACEFUL_FALLBACK;
24605
+ }
24606
+ if (!resp.ok) {
24607
+ getLogger().warn(`consult tool: openai-compatible returned HTTP ${resp.status}`);
24608
+ return GRACEFUL_FALLBACK;
24609
+ }
24610
+ const data = await resp.json();
24611
+ const content = data?.choices?.[0]?.message?.content;
24612
+ if (typeof content === "string" && content.trim()) {
24613
+ return content.trim().slice(0, MAX_RESPONSE_CHARS);
24614
+ }
24615
+ getLogger().warn("consult tool: response missing choices[0].message.content");
24616
+ return GRACEFUL_FALLBACK;
24617
+ } catch (e) {
24618
+ getLogger().warn(
24619
+ `consult tool: openai-compatible call failed: ${e instanceof Error ? e.name : "error"}`
24620
+ );
24621
+ return GRACEFUL_FALLBACK;
24622
+ }
24623
+ };
24624
+ }
24625
+ function buildPostCallRecord(data, includeTranscript) {
24626
+ const lines = [];
24627
+ const caller = data.caller;
24628
+ const callee = data.callee;
24629
+ if (caller) lines.push(`Caller: ${caller}`);
24630
+ if (callee) lines.push(`Line dialed: ${callee}`);
24631
+ const metrics = data.metrics;
24632
+ const duration3 = metrics?.durationSeconds ?? metrics?.duration_seconds;
24633
+ if (typeof duration3 === "number") lines.push(`Duration: ${Math.round(duration3)}s`);
24634
+ if (includeTranscript) {
24635
+ const entries = data.transcript ?? [];
24636
+ const rendered = entries.filter((e) => e && typeof e === "object").map((e) => `${e.role ?? "?"}: ${e.text ?? ""}`).join("\n");
24637
+ if (rendered) lines.push("Transcript:\n" + rendered.slice(0, POSTCALL_MAX_TRANSCRIPT_CHARS));
24638
+ }
24639
+ return lines.length ? lines.join("\n") : "(no call details available)";
24640
+ }
24641
+ function openclawPostCallNotifier(agent, opts = {}) {
24642
+ const cfg = openclawConsult(agent, {
24643
+ baseUrl: opts.baseUrl,
24644
+ apiKey: opts.apiKey,
24645
+ timeoutMs: opts.timeoutMs ?? DEFAULT_TIMEOUT_MS,
24646
+ allowLoopback: opts.allowLoopback
24647
+ });
24648
+ const oc = cfg.openaiCompatible;
24649
+ const endpoint = oc.baseUrl.replace(/\/+$/, "") + "/chat/completions";
24650
+ validateWebhookUrl(endpoint, cfg.allowLoopback ?? false);
24651
+ const apiKey = oc.apiKey ?? (oc.apiKeyEnv ? process.env[oc.apiKeyEnv] : void 0);
24652
+ const sessionHeader = oc.sessionHeader;
24653
+ const model = oc.model;
24654
+ const timeoutMs = cfg.timeoutMs ?? DEFAULT_TIMEOUT_MS;
24655
+ const includeTranscript = opts.includeTranscript ?? true;
24656
+ const instruction = opts.instruction ?? POSTCALL_INSTRUCTION;
24657
+ return async (data) => {
24658
+ const callId = (data ?? {}).call_id ?? "";
24659
+ const record2 = buildPostCallRecord(data ?? {}, includeTranscript);
24660
+ const headers = { "Content-Type": "application/json" };
24661
+ if (apiKey) headers.Authorization = `Bearer ${apiKey}`;
24662
+ if (sessionHeader && callId) headers[sessionHeader] = callId;
24663
+ const payload = {
24664
+ model,
24665
+ messages: [
24666
+ { role: "system", content: instruction },
24667
+ { role: "user", content: record2 }
24668
+ ],
24669
+ stream: false
24670
+ };
24671
+ if (callId) payload.user = callId;
24672
+ try {
24673
+ const resp = await fetch(endpoint, {
24674
+ method: "POST",
24675
+ headers,
24676
+ body: JSON.stringify(payload),
24677
+ signal: AbortSignal.timeout(timeoutMs)
24678
+ });
24679
+ if (!resp.ok) {
24680
+ getLogger().warn(`openclaw post-call notify: HTTP ${resp.status}`);
24681
+ }
24682
+ } catch (e) {
24683
+ getLogger().warn(
24684
+ `openclaw post-call notify failed: ${e instanceof Error ? e.name : "error"}`
24685
+ );
24686
+ }
24687
+ };
24688
+ }
24689
+ var DEFAULT_TIMEOUT_MS, DEFAULT_TOOL_NAME, DEFAULT_DESCRIPTION, MAX_RESPONSE_CHARS, REPLY_KEYS, GRACEFUL_FALLBACK, OPENCLAW_DEFAULT_BASE_URL, OPENCLAW_API_KEY_ENV, OPENCLAW_SESSION_HEADER, OPENCLAW_DESCRIPTION, OPENCLAW_REASSURANCE, OPENCLAW_AGENT_RE, PARAMETERS, POSTCALL_INSTRUCTION, POSTCALL_MAX_TRANSCRIPT_CHARS;
24690
+ var init_consult = __esm({
24691
+ "src/consult.ts"() {
24692
+ "use strict";
24693
+ init_cjs_shims();
24694
+ init_logger();
24695
+ init_server();
24696
+ DEFAULT_TIMEOUT_MS = 3e4;
24697
+ DEFAULT_TOOL_NAME = "consult_agent";
24698
+ DEFAULT_DESCRIPTION = "Consult your back-office agent for deeper reasoning, fresh information, or actions beyond this call. Use when the caller asks something you cannot answer directly.";
24699
+ MAX_RESPONSE_CHARS = 1e6;
24700
+ REPLY_KEYS = ["reply", "response", "text", "result", "answer", "message"];
24701
+ GRACEFUL_FALLBACK = "I wasn't able to reach the system to get that answer right now.";
24702
+ OPENCLAW_DEFAULT_BASE_URL = "http://127.0.0.1:18789/v1";
24703
+ OPENCLAW_API_KEY_ENV = "OPENCLAW_API_KEY";
24704
+ OPENCLAW_SESSION_HEADER = "x-openclaw-session-key";
24705
+ OPENCLAW_DESCRIPTION = "Consult your OpenClaw agent for anything account-specific \u2014 appointments, customer records, schedules, or actions in the back-office system. NEVER state an appointment time, customer detail, or schedule fact from your own memory; ALWAYS call this tool for those and read back what it returns.";
24706
+ OPENCLAW_REASSURANCE = "Let me check on that for you, one moment.";
24707
+ OPENCLAW_AGENT_RE = /^[A-Za-z0-9._:/-]+$/;
24708
+ PARAMETERS = {
24709
+ type: "object",
24710
+ properties: {
24711
+ request: {
24712
+ type: "string",
24713
+ description: "The question or task to send to your back-office agent for deeper reasoning, fresh information, or an action beyond this call. State it self-containedly \u2014 the dialog history is not forwarded with the consult."
24714
+ }
24715
+ },
24716
+ required: ["request"]
24717
+ };
24718
+ POSTCALL_INSTRUCTION = "A phone call handled by the voice agent has just ended. Here is the record of the call. Log it and follow up if anything needs action.";
24719
+ POSTCALL_MAX_TRANSCRIPT_CHARS = 12e3;
24720
+ }
24721
+ });
24722
+
24077
24723
  // src/sentence-chunker.ts
24078
24724
  function splitSentences(text, minSentenceLen = DEFAULT_MIN_SENTENCE_LEN) {
24079
24725
  const alphabets = "([A-Za-z])";
@@ -26616,6 +27262,8 @@ var init_silero_vad = __esm({
26616
27262
  speechThresholdDuration = 0;
26617
27263
  silenceThresholdDuration = 0;
26618
27264
  closed = false;
27265
+ /** Transitions produced in the current processFrame call but not yet returned. */
27266
+ eventQueue = [];
26619
27267
  /**
26620
27268
  * Load the Silero VAD model.
26621
27269
  * Throws if `onnxruntime-node` is not installed.
@@ -26741,22 +27389,21 @@ var init_silero_vad = __esm({
26741
27389
  );
26742
27390
  }
26743
27391
  if (pcmChunk.length === 0) {
26744
- return null;
27392
+ return this.eventQueue.shift() ?? null;
26745
27393
  }
26746
27394
  const numSamples = Math.floor(pcmChunk.length / 2);
26747
27395
  if (numSamples === 0) {
26748
- return null;
27396
+ return this.eventQueue.shift() ?? null;
26749
27397
  }
26750
27398
  const samples = new Float32Array(numSamples);
26751
27399
  for (let i = 0; i < numSamples; i++) {
26752
- samples[i] = pcmChunk.readInt16LE(i * 2) / 32767;
27400
+ samples[i] = pcmChunk.readInt16LE(i * 2) / 32768;
26753
27401
  }
26754
27402
  const merged = new Float32Array(this.pending.length + samples.length);
26755
27403
  merged.set(this.pending, 0);
26756
27404
  merged.set(samples, this.pending.length);
26757
27405
  this.pending = merged;
26758
27406
  const windowSize = this.model.windowSizeSamples;
26759
- let event = null;
26760
27407
  while (this.pending.length >= windowSize) {
26761
27408
  const window = this.pending.slice(0, windowSize);
26762
27409
  this.pending = this.pending.slice(windowSize);
@@ -26765,10 +27412,10 @@ var init_silero_vad = __esm({
26765
27412
  const windowDuration = windowSize / this.opts.sampleRate;
26766
27413
  const transition = this.advanceState(p, windowDuration);
26767
27414
  if (transition !== null) {
26768
- event = transition;
27415
+ this.eventQueue.push(transition);
26769
27416
  }
26770
27417
  }
26771
- return event;
27418
+ return this.eventQueue.shift() ?? null;
26772
27419
  }
26773
27420
  advanceState(p, windowDuration) {
26774
27421
  const opts = this.opts;
@@ -26823,6 +27470,7 @@ var init_silero_vad = __esm({
26823
27470
  this.pubSpeaking = false;
26824
27471
  this.speechThresholdDuration = 0;
26825
27472
  this.silenceThresholdDuration = 0;
27473
+ this.eventQueue = [];
26826
27474
  this.expFilter.reset();
26827
27475
  this.model.reset();
26828
27476
  }
@@ -27063,6 +27711,13 @@ var init_aec = __esm({
27063
27711
  });
27064
27712
 
27065
27713
  // src/stream-handler.ts
27714
+ function applyToolCallPreambles(prompt, knob) {
27715
+ if (!knob) return prompt;
27716
+ const block = typeof knob === "string" ? knob : DEFAULT_TOOL_CALL_PREAMBLE_BLOCK;
27717
+ return prompt ? `${block}
27718
+
27719
+ ${prompt}` : block;
27720
+ }
27066
27721
  function checkGuardrails(text, guardrails) {
27067
27722
  if (!guardrails) return null;
27068
27723
  for (const guard of guardrails) {
@@ -27119,6 +27774,13 @@ function augmentWithBuiltinHandoffTools(userTools, callbacks) {
27119
27774
  }
27120
27775
  return out;
27121
27776
  }
27777
+ function isSttHallucination(text) {
27778
+ const stripped = text.trim().toLowerCase().replace(/[.,!?;:…。!?\s]+$/u, "").trim();
27779
+ if (stripped === "") return true;
27780
+ if (HALLUCINATIONS.has(stripped)) return true;
27781
+ const pieces = stripped.split(/[.!?…。!?]+/u).map((p) => p.trim()).filter((p) => p.length > 0);
27782
+ return pieces.length > 1 && pieces.every((p) => HALLUCINATIONS.has(p));
27783
+ }
27122
27784
  async function queryDeepgramCost(metricsAcc, deepgramKey, deepgramRequestId) {
27123
27785
  try {
27124
27786
  const projResp = await fetch("https://api.deepgram.com/v1/projects", {
@@ -27149,7 +27811,7 @@ async function queryDeepgramCost(metricsAcc, deepgramKey, deepgramRequestId) {
27149
27811
  } catch {
27150
27812
  }
27151
27813
  }
27152
- var HALLUCINATIONS, StreamHandler;
27814
+ var DEFAULT_TOOL_CALL_PREAMBLE_BLOCK, HALLUCINATIONS, StreamHandler;
27153
27815
  var init_stream_handler = __esm({
27154
27816
  "src/stream-handler.ts"() {
27155
27817
  "use strict";
@@ -27167,39 +27829,96 @@ var init_stream_handler = __esm({
27167
27829
  init_mcp_client();
27168
27830
  init_logger();
27169
27831
  init_server();
27832
+ init_consult();
27170
27833
  init_sentence_chunker();
27171
27834
  init_pipeline_hooks();
27172
27835
  init_event_bus();
27173
27836
  init_tracing();
27837
+ DEFAULT_TOOL_CALL_PREAMBLE_BLOCK = `# Preambles
27838
+
27839
+ Use short preambles only when they help the user understand that work is happening. A preamble is one short spoken update describing the action you are about to take \u2014 not hidden reasoning, and never a claim about the result.
27840
+
27841
+ ## When to use a preamble
27842
+ Use a preamble when:
27843
+ - you are about to call a tool that may take noticeable time;
27844
+ - you need to reason through a multi-step request;
27845
+ - you are checking records, availability, account state, or policy details;
27846
+ - you are preparing an escalation or handoff;
27847
+ - silence would make the assistant feel unresponsive.
27848
+
27849
+ When a preamble is needed, output it immediately before the reasoning or tool call.
27850
+
27851
+ ## When to NOT use a preamble
27852
+ Do not use a preamble when:
27853
+ - the answer is direct and can be given immediately;
27854
+ - the user is only confirming, correcting, or declining something;
27855
+ - the audio is unclear and you need clarification instead;
27856
+ - the tool call is lightweight and the user would not benefit from an update.
27857
+
27858
+ ## Style
27859
+ - Keep it to one short sentence (two only before a high-impact action).
27860
+ - Vary the wording across turns; do not reuse the same opener.
27861
+ - Describe the action, not the internal reasoning.
27862
+ - Never imply success or failure before the tool returns.
27863
+
27864
+ Prefer:
27865
+ - "I'll check that order now."
27866
+ - "I'll look up your appointment details."
27867
+ - "I'll verify that before we make any changes."
27868
+ - "I'll check the policy and then give you the next step."
27869
+ - "I'll pull that up so we can make sure it's the right account."
27870
+
27871
+ Avoid:
27872
+ - "Let me think about that for a second."
27873
+ - "Please wait while I process your request."
27874
+ - "I'm going to use my tools now."
27875
+ - "Hmm..." / "One moment while I process that..."`;
27174
27876
  HALLUCINATIONS = /* @__PURE__ */ new Set([
27175
- "you",
27176
- "thank you",
27177
- "thanks",
27178
- "yeah",
27179
- "yes",
27180
- "no",
27181
- "okay",
27182
- "ok",
27183
- "uh",
27184
- "um",
27185
- "mmm",
27186
- "hmm",
27187
- ".",
27188
- "bye",
27189
- "right",
27190
- "cool",
27191
- // Whisper YouTube-caption hallucinations
27877
+ // Issue #154: the hallucination filter is now DISPLAY-ONLY — it no longer
27878
+ // gates response creation (the server drives the response on
27879
+ // ``input_audio_buffer.committed`` by default). Dropping a phrase here
27880
+ // therefore deletes the user's transcript line (recordSttComplete never
27881
+ // fires → empty user_text → dashboard skips the user line). So this set is
27882
+ // restricted to genuine NON-SPEECH artefacts that Whisper emits on
27883
+ // silence / TTS echo, NOT real conversational words. Standalone words like
27884
+ // 'yes', 'no', 'okay', 'right', 'you', 'thanks' were REMOVED — they are
27885
+ // legitimate user replies and must reach the transcript. Parity with
27886
+ // Python ``_STT_HALLUCINATIONS``.
27887
+ //
27888
+ // Whisper caption / training-set hallucinations. Whisper was trained heavily
27889
+ // on captioned video, so on silence / PSTN echo it falls back to the most
27890
+ // common caption credits + sign-offs. Curated from widely-reported
27891
+ // Whisper-on-silence outputs across the open-source ASR community.
27192
27892
  "thank you for watching",
27193
27893
  "thanks for watching",
27194
27894
  "thank you for watching!",
27195
27895
  "thanks for watching!",
27196
27896
  "thank you so much for watching",
27897
+ "thank you for watching please subscribe",
27898
+ "thanks for watching please subscribe",
27197
27899
  "thanks for listening",
27900
+ "we'll see you next time",
27901
+ "see you next time",
27902
+ "bye bye",
27198
27903
  "please subscribe",
27904
+ "please subscribe to my channel",
27905
+ "don't forget to subscribe",
27906
+ "like and subscribe",
27199
27907
  "subscribe",
27908
+ "subtitles by the amara.org community",
27909
+ "subtitles by the amara org community",
27910
+ "subtitles by",
27911
+ "transcribed by",
27912
+ "transcription by castingwords",
27913
+ "the end",
27914
+ // Music / sound markers.
27200
27915
  "music",
27201
27916
  "[music]",
27917
+ "piano music",
27918
+ "applause",
27919
+ "[applause]",
27202
27920
  "\u266A",
27921
+ // Silence markers.
27203
27922
  "[no audio]",
27204
27923
  "[silence]",
27205
27924
  "[blank_audio]",
@@ -27503,7 +28222,14 @@ var init_stream_handler = __esm({
27503
28222
  * barge-in armed during the audible tail. Tunable via env.
27504
28223
  */
27505
28224
  endSpeakingWithGrace() {
27506
- const grace = Number(process.env.PATTER_TTS_TAIL_GRACE_MS ?? 1500);
28225
+ const rawGrace = process.env.PATTER_TTS_TAIL_GRACE_MS;
28226
+ const parsedGrace = rawGrace !== void 0 ? Number(rawGrace) : NaN;
28227
+ const grace = rawGrace !== void 0 && Number.isFinite(parsedGrace) ? parsedGrace : 1500;
28228
+ if (rawGrace !== void 0 && !Number.isFinite(parsedGrace)) {
28229
+ getLogger().warn(
28230
+ `PATTER_TTS_TAIL_GRACE_MS="${rawGrace}" is not a valid number \u2014 using default 1500ms`
28231
+ );
28232
+ }
27507
28233
  if (grace > 0) {
27508
28234
  const gen = this.speakingGeneration;
27509
28235
  this.clearGraceTimer();
@@ -27597,6 +28323,14 @@ var init_stream_handler = __esm({
27597
28323
  `[DIAG] Flushed ${replayed} pre-barge-in frame(s) (~${replayed * 20} ms) to STT`
27598
28324
  );
27599
28325
  }
28326
+ /**
28327
+ * Per-call resolved tool list. Starts as ``null`` (falls back to
28328
+ * ``deps.agent.tools``). Populated by ``initMcpTools`` when MCP servers
28329
+ * are configured so discovered tools are merged in without mutating the
28330
+ * shared ``AgentOptions`` object. Code that needs the effective tool list
28331
+ * should read ``this.resolvedTools ?? this.deps.agent.tools``.
28332
+ */
28333
+ resolvedTools = null;
27600
28334
  llmLoop = null;
27601
28335
  /**
27602
28336
  * Per-call tool executor — provides retry-with-exponential-backoff and a
@@ -27640,6 +28374,17 @@ var init_stream_handler = __esm({
27640
28374
  userTranscriptPending = false;
27641
28375
  pendingAssistantTurn = null;
27642
28376
  pendingAssistantTimer = null;
28377
+ /**
28378
+ * Reserved monotonic turn index for the in-flight Realtime turn (issue
28379
+ * #154, fix 5/6). Reserved in ``onAdapterSpeechStopped`` via
28380
+ * ``metricsAcc.reserveTurnIndex()`` the moment the turn OPENS, then threaded
28381
+ * through to the live per-line transcript events (``recordTranscriptLine``)
28382
+ * and into ``recordTurnComplete`` / ``recordTurnInterrupted`` so the
28383
+ * dashboard can sort a late-arriving user line ABOVE its agent line by
28384
+ * ``(turnIndex, role)``. ``null`` until the first turn opens. Parity with
28385
+ * Python ``_current_turn_index``.
28386
+ */
28387
+ currentTurnIndex = null;
27643
28388
  /**
27644
28389
  * Hard cap on how long we wait for the user transcript before flushing
27645
28390
  * the buffered assistant turn alone. 3 s covers OpenAI Whisper's typical
@@ -27721,6 +28466,23 @@ var init_stream_handler = __esm({
27721
28466
  * streaming/regular LLM, WebSocket remote, Realtime response_done) so the
27722
28467
  * payload shape lives in one place.
27723
28468
  */
28469
+ /**
28470
+ * Emit a live per-line transcript event to the dashboard store (issue #154,
28471
+ * fix 5). Routed through a single helper so the call shape lives in one
28472
+ * place. ``recordTranscriptLine`` appends the line to the active call's
28473
+ * transcript and publishes a ``transcript_line`` SSE event; the dashboard
28474
+ * sorts by (turnIndex, user<assistant) so a late user line lands above its
28475
+ * agent line. No-op when no turn index has been reserved yet.
28476
+ */
28477
+ emitTranscriptLine(role, text) {
28478
+ if (this.currentTurnIndex === null) return;
28479
+ this.deps.metricsStore.recordTranscriptLine({
28480
+ call_id: this.callId,
28481
+ turnIndex: this.currentTurnIndex,
28482
+ role,
28483
+ text
28484
+ });
28485
+ }
27724
28486
  async emitTurnMetrics(turn) {
27725
28487
  if (turn == null) return;
27726
28488
  this.deps.metricsStore.recordTurn({ call_id: this.callId, turn });
@@ -27827,7 +28589,7 @@ var init_stream_handler = __esm({
27827
28589
  if (customParams.callee && !this.callee) this.callee = customParams.callee;
27828
28590
  const mode = this.deps.agent.engine ? `engine=${this.deps.agent.engine.kind ?? "unknown"}` : "pipeline";
27829
28591
  getLogger().info(
27830
- `Call started: ${callId} (${this.deps.bridge.label}, ${mode}, ${sanitizeLogValue(this.caller || "?")} \u2192 ${sanitizeLogValue(this.callee || "?")})`
28592
+ `Call started: ${callId} (${this.deps.bridge.label}, ${mode}, ${maskPhoneNumber(this.caller || "?")} \u2192 ${maskPhoneNumber(this.callee || "?")})`
27831
28593
  );
27832
28594
  if (Object.keys(customParams).length > 0) {
27833
28595
  getLogger().debug(`Custom params: ${sanitizeLogValue(JSON.stringify(customParams))}`);
@@ -27872,10 +28634,13 @@ var init_stream_handler = __esm({
27872
28634
  const resolvedPrompt = Object.keys(allVars).length > 0 ? this.deps.resolveVariables(this.deps.agent.systemPrompt, allVars) : this.deps.agent.systemPrompt;
27873
28635
  const provider2 = this.deps.agent.provider ?? "openai_realtime";
27874
28636
  await this.initMcpTools();
28637
+ this.injectConsultTool();
27875
28638
  if (provider2 === "pipeline") {
27876
28639
  await this.initPipeline(resolvedPrompt);
27877
28640
  } else {
27878
- await this.initRealtimeAdapter(resolvedPrompt);
28641
+ await this.initRealtimeAdapter(
28642
+ applyToolCallPreambles(resolvedPrompt, this.deps.agent.toolCallPreambles)
28643
+ );
27879
28644
  }
27880
28645
  }
27881
28646
  /**
@@ -27900,10 +28665,25 @@ var init_stream_handler = __esm({
27900
28665
  }
27901
28666
  if (discovered.length === 0) return;
27902
28667
  MCPManager.assertNoConflicts(this.deps.agent.tools, discovered);
27903
- const mutableAgent = this.deps.agent;
27904
- mutableAgent.tools = [...mutableAgent.tools ?? [], ...discovered];
28668
+ this.resolvedTools = [...this.deps.agent.tools ?? [], ...discovered];
27905
28669
  getLogger().info(`MCP: merged ${discovered.length} tool(s) into agent`);
27906
28670
  }
28671
+ /**
28672
+ * Merge the built-in ``consult`` tool into the per-call tool list when
28673
+ * ``agent.consult`` is set, mirroring {@link initMcpTools}: the shared
28674
+ * ``deps.agent`` is NOT mutated; the merged list is stored on
28675
+ * ``this.resolvedTools`` so ``buildAIAdapter`` (Realtime) and the pipeline
28676
+ * ``LLMLoop`` both see it. Idempotent — a no-op if a tool with the same name
28677
+ * is already present.
28678
+ */
28679
+ injectConsultTool() {
28680
+ const consult = this.deps.agent.consult;
28681
+ if (!consult) return;
28682
+ const consultTool = buildConsultTool(consult);
28683
+ const base = this.resolvedTools ?? (this.deps.agent.tools ?? []);
28684
+ if (base.some((t) => t.name === consultTool.name)) return;
28685
+ this.resolvedTools = [...base, consultTool];
28686
+ }
27907
28687
  /** Set the stream SID (Twilio only, called after parsing 'start' event). */
27908
28688
  /** Set the carrier-side stream id (Twilio `streamSid` / Telnyx stream identifier). */
27909
28689
  setStreamSid(sid) {
@@ -27923,8 +28703,12 @@ var init_stream_handler = __esm({
27923
28703
  if (activeVad && !this.vadDisabled) {
27924
28704
  try {
27925
28705
  const vadPromise = activeVad.processFrame(pcm16k, 16e3);
27926
- const timeoutPromise = new Promise((resolve2) => setTimeout(() => resolve2(null), 25));
28706
+ let vadTimeoutId;
28707
+ const timeoutPromise = new Promise((resolve2) => {
28708
+ vadTimeoutId = setTimeout(() => resolve2(null), 25);
28709
+ });
27927
28710
  const evt = await Promise.race([vadPromise, timeoutPromise]);
28711
+ clearTimeout(vadTimeoutId);
27928
28712
  if (evt) {
27929
28713
  getLogger().info(
27930
28714
  `[VAD] ${evt.type} agentSpeaking=${this.isSpeaking}`
@@ -27997,7 +28781,7 @@ var init_stream_handler = __esm({
27997
28781
  if ((this.deps.agent.bargeInThresholdMs ?? 300) === 0) return;
27998
28782
  }
27999
28783
  const hooks = this.deps.agent.hooks;
28000
- if (hooks) {
28784
+ if (hooks?.beforeSendToStt) {
28001
28785
  const hookExecutor = new PipelineHookExecutor(hooks);
28002
28786
  const hookCtx = this.buildHookContext();
28003
28787
  const processed = await hookExecutor.runBeforeSendToStt(pcm16k, hookCtx);
@@ -28423,7 +29207,7 @@ var init_stream_handler = __esm({
28423
29207
  }
28424
29208
  const providerModel = this.deps.agent.llm?.model ?? "";
28425
29209
  const augmentedTools = augmentWithBuiltinHandoffTools(
28426
- this.deps.agent.tools,
29210
+ this.resolvedTools ?? this.deps.agent.tools,
28427
29211
  {
28428
29212
  transferCall: (number4) => this.deps.bridge.transferCall(this.callId, number4),
28429
29213
  endCall: () => this.deps.bridge.endCall(this.callId, this.ws)
@@ -28447,7 +29231,7 @@ var init_stream_handler = __esm({
28447
29231
  let llmModel = this.deps.agent.model || "gpt-4o-mini";
28448
29232
  if (llmModel.includes("realtime")) llmModel = "gpt-4o-mini";
28449
29233
  const augmentedTools = augmentWithBuiltinHandoffTools(
28450
- this.deps.agent.tools,
29234
+ this.resolvedTools ?? this.deps.agent.tools,
28451
29235
  {
28452
29236
  transferCall: (number4) => this.deps.bridge.transferCall(this.callId, number4),
28453
29237
  endCall: () => this.deps.bridge.endCall(this.callId, this.ws)
@@ -28971,7 +29755,7 @@ var init_stream_handler = __esm({
28971
29755
  // ---------------------------------------------------------------------------
28972
29756
  async initRealtimeAdapter(resolvedPrompt) {
28973
29757
  const label = this.deps.bridge.label;
28974
- this.adapter = this.deps.buildAIAdapter(resolvedPrompt);
29758
+ this.adapter = this.deps.buildAIAdapter(resolvedPrompt, this.resolvedTools ?? void 0);
28975
29759
  let parked;
28976
29760
  if (typeof this.deps.popPrewarmedConnections === "function") {
28977
29761
  try {
@@ -29044,6 +29828,7 @@ var init_stream_handler = __esm({
29044
29828
  response_done: async (eventData) => this.onAdapterResponseDone(eventData),
29045
29829
  speech_started: async () => this.onAdapterSpeechInterrupt(),
29046
29830
  interruption: async () => this.onAdapterSpeechInterrupt(),
29831
+ error: async (eventData) => this.onAdapterError(eventData),
29047
29832
  function_call: async (eventData) => {
29048
29833
  if (this.adapter instanceof OpenAIRealtimeAdapter) {
29049
29834
  await this.handleFunctionCall(eventData);
@@ -29130,21 +29915,31 @@ var init_stream_handler = __esm({
29130
29915
  if (!this.metricsAcc.turnActive) this.metricsAcc.startTurn();
29131
29916
  this.currentAgentText = "";
29132
29917
  this.responseAudioStarted = false;
29918
+ this.currentTurnIndex = this.metricsAcc.reserveTurnIndex();
29133
29919
  this.userTranscriptPending = true;
29134
29920
  await this.emitUserSpeechEnded();
29135
29921
  }
29136
29922
  async onAdapterTranscriptInput(inputText) {
29137
- const stripped = inputText.trim().toLowerCase();
29138
- if (HALLUCINATIONS.has(stripped) || stripped === "") {
29923
+ if (isSttHallucination(inputText)) {
29139
29924
  getLogger().debug(
29140
29925
  `Realtime transcript_input dropped (likely Whisper hallucination on silence/echo): ${sanitizeLogValue(inputText.slice(0, 60))}`
29141
29926
  );
29142
29927
  this.userTranscriptPending = false;
29928
+ if (this.pendingAssistantTurn !== null) {
29929
+ const buffered = this.pendingAssistantTurn;
29930
+ this.pendingAssistantTurn = null;
29931
+ if (this.pendingAssistantTimer) {
29932
+ clearTimeout(this.pendingAssistantTimer);
29933
+ this.pendingAssistantTimer = null;
29934
+ }
29935
+ await this.flushAssistantTurn(buffered);
29936
+ }
29143
29937
  return;
29144
29938
  }
29145
29939
  getLogger().debug(`User (${this.deps.bridge.label}): ${sanitizeLogValue(inputText)}`);
29146
29940
  this.history.push({ role: "user", text: inputText, timestamp: Date.now() });
29147
- if (this.adapter instanceof OpenAIRealtimeAdapter) {
29941
+ this.emitTranscriptLine("user", inputText);
29942
+ if (this.adapter instanceof OpenAIRealtimeAdapter && this.adapter.getGateResponseOnTranscript()) {
29148
29943
  void this.adapter.requestResponse().catch(
29149
29944
  (err) => getLogger().debug(`Realtime requestResponse failed: ${String(err)}`)
29150
29945
  );
@@ -29191,8 +29986,12 @@ var init_stream_handler = __esm({
29191
29986
  history: [...this.history.entries]
29192
29987
  });
29193
29988
  }
29989
+ const reservedIndex = this.currentTurnIndex;
29990
+ this.emitTranscriptLine("assistant", text);
29194
29991
  this.responseAudioStarted = false;
29195
- await this.emitTurnMetrics(this.metricsAcc.recordTurnComplete(text));
29992
+ await this.emitTurnMetrics(
29993
+ this.metricsAcc.recordTurnComplete(text, reservedIndex ?? void 0)
29994
+ );
29196
29995
  }
29197
29996
  /**
29198
29997
  * Push an assistant turn into history and fire `onTranscript` so host
@@ -29291,7 +30090,9 @@ var init_stream_handler = __esm({
29291
30090
  this.pendingAssistantTimer = null;
29292
30091
  this.userTranscriptPending = false;
29293
30092
  if (buffered !== null) {
29294
- void this.flushAssistantTurn(buffered);
30093
+ this.flushAssistantTurn(buffered).catch(
30094
+ (err) => getLogger().error("flushAssistantTurn (fallback timer) failed:", err)
30095
+ );
29295
30096
  }
29296
30097
  }, _StreamHandler.REALTIME_USER_TRANSCRIPT_WAIT_MS);
29297
30098
  this.responseAudioStarted = false;
@@ -29300,7 +30101,9 @@ var init_stream_handler = __esm({
29300
30101
  await this.flushAssistantTurn(text);
29301
30102
  }
29302
30103
  async onAdapterSpeechInterrupt() {
29303
- if (this.adapter instanceof OpenAIRealtimeAdapter) {
30104
+ const isEngine = this.adapter instanceof OpenAIRealtimeAdapter;
30105
+ const clientManaged = isEngine && this.adapter.getGateResponseOnTranscript();
30106
+ if (clientManaged) {
29304
30107
  const startedAt = this.adapter.currentResponseFirstAudioAt;
29305
30108
  if (startedAt !== null) {
29306
30109
  const elapsedMs = Date.now() - startedAt;
@@ -29313,12 +30116,20 @@ var init_stream_handler = __esm({
29313
30116
  }
29314
30117
  }
29315
30118
  this.deps.bridge.sendClear(this.ws, this.streamSid);
29316
- if (this.adapter instanceof OpenAIRealtimeAdapter) this.adapter.cancelResponse();
30119
+ if (clientManaged) {
30120
+ this.metricsAcc.recordBargeinDetected();
30121
+ this.adapter.cancelResponse();
30122
+ } else if (isEngine) {
30123
+ this.adapter.truncate();
30124
+ }
29317
30125
  this.metricsAcc.recordTurnInterrupted();
29318
30126
  if (this.responseAudioStarted) {
29319
30127
  await this.emitAgentSpeechEnded(true);
29320
30128
  }
29321
30129
  await this.emitUserSpeechStarted();
30130
+ if (clientManaged) {
30131
+ this.metricsAcc.anchorUserSpeechStart();
30132
+ }
29322
30133
  this.currentAgentText = "";
29323
30134
  this.responseAudioStarted = false;
29324
30135
  this.pendingAssistantTurn = null;
@@ -29328,6 +30139,28 @@ var init_stream_handler = __esm({
29328
30139
  }
29329
30140
  this.userTranscriptPending = false;
29330
30141
  }
30142
+ /**
30143
+ * Handle a Realtime ``error`` event (issue #154, fix 4).
30144
+ *
30145
+ * Both Realtime providers dispatch ``('error', …)`` for server-side errors,
30146
+ * non-normal socket closes, and socket errors, but the stream handler
30147
+ * previously had no entry for it in the dispatch table so these were
30148
+ * silently swallowed. We surface them at WARN level with ONLY the error
30149
+ * envelope fields (``type`` / ``code`` / ``message``) — never any audio or
30150
+ * transcript body, to avoid logging PII. The call is NOT terminated: the
30151
+ * provider decides whether to recover, and many of these (e.g. a transient
30152
+ * ``input_audio_buffer_commit_empty``) are non-fatal. Parity with the
30153
+ * Python ``elif ev_type == 'error'`` branches.
30154
+ */
30155
+ async onAdapterError(eventData) {
30156
+ const err = eventData ?? {};
30157
+ const type = typeof err.type === "string" ? err.type : "unknown";
30158
+ const code = typeof err.code === "string" ? err.code : "";
30159
+ const message = typeof err.message === "string" ? err.message : "";
30160
+ getLogger().warn(
30161
+ `Realtime error (${this.deps.bridge.label}) type=${type} code=${code} message=${sanitizeLogValue(message)}`
30162
+ );
30163
+ }
29331
30164
  /**
29332
30165
  * Emit a tool-invocation event into the transcript timeline. Pushes a
29333
30166
  * `role=tool` entry into `history` (so it appears in the dashboard
@@ -29395,7 +30228,8 @@ var init_stream_handler = __esm({
29395
30228
  }
29396
30229
  return;
29397
30230
  }
29398
- const toolDef = this.deps.agent.tools?.find((t) => t.name === fc.name);
30231
+ const effectiveTools = this.resolvedTools ?? this.deps.agent.tools;
30232
+ const toolDef = effectiveTools?.find((t) => t.name === fc.name);
29399
30233
  if (!toolDef) {
29400
30234
  getLogger().warn(`Realtime tool '${fc.name}' not found in agent.tools \u2014 skipping`);
29401
30235
  const result2 = JSON.stringify({ error: `Tool '${fc.name}' not registered`, fallback: true });
@@ -29418,7 +30252,8 @@ var init_stream_handler = __esm({
29418
30252
  if (msg && this.adapter instanceof OpenAIRealtimeAdapter) {
29419
30253
  const realtimeAdapter = this.adapter;
29420
30254
  reassuranceTimer = setTimeout(() => {
29421
- realtimeAdapter.sendText(msg).catch((e) => {
30255
+ const fire = typeof realtimeAdapter.sendReassurance === "function" ? realtimeAdapter.sendReassurance(msg) : realtimeAdapter.sendText(msg);
30256
+ fire.catch((e) => {
29422
30257
  getLogger().warn(`Reassurance message failed for tool '${fc.name}': ${String(e)}`);
29423
30258
  });
29424
30259
  }, afterMs);
@@ -29438,7 +30273,8 @@ var init_stream_handler = __esm({
29438
30273
  parsedArgs,
29439
30274
  {
29440
30275
  call_id: this.callId,
29441
- caller: this.caller
30276
+ caller: this.caller,
30277
+ callee: this.callee
29442
30278
  },
29443
30279
  onProgress
29444
30280
  );
@@ -29588,21 +30424,21 @@ async function appendJsonl(filePath, record2) {
29588
30424
  await import_node_fs2.promises.mkdir(path4.dirname(filePath), { recursive: true });
29589
30425
  await import_node_fs2.promises.appendFile(filePath, JSON.stringify(record2) + "\n", { encoding: "utf8" });
29590
30426
  }
29591
- function rmTree(target) {
30427
+ async function rmTreeAsync(target) {
29592
30428
  try {
29593
- for (const child of fs4.readdirSync(target)) {
30429
+ for (const child of await import_node_fs2.promises.readdir(target)) {
29594
30430
  const childPath = path4.join(target, child);
29595
- const stat = fs4.lstatSync(childPath);
30431
+ const stat = await import_node_fs2.promises.lstat(childPath);
29596
30432
  if (stat.isDirectory()) {
29597
- rmTree(childPath);
30433
+ await rmTreeAsync(childPath);
29598
30434
  } else {
29599
30435
  try {
29600
- fs4.unlinkSync(childPath);
30436
+ await import_node_fs2.promises.unlink(childPath);
29601
30437
  } catch {
29602
30438
  }
29603
30439
  }
29604
30440
  }
29605
- fs4.rmdirSync(target);
30441
+ await import_node_fs2.promises.rmdir(target);
29606
30442
  } catch {
29607
30443
  }
29608
30444
  }
@@ -29684,7 +30520,9 @@ var init_call_log = __esm({
29684
30520
  getLogger().warn(`call_log write failed (${sanitizeLogValue(callId)}): ${sanitizeLogValue(String(err))}`);
29685
30521
  }
29686
30522
  if (crypto5.randomBytes(1)[0] < 5) {
29687
- this.sweepOldDays();
30523
+ void this.sweepOldDays().catch(
30524
+ (e) => getLogger().debug(`call_log sweep failed: ${sanitizeLogValue(String(e))}`)
30525
+ );
29688
30526
  }
29689
30527
  }
29690
30528
  /** Append a single turn record to the call's `transcript.jsonl`. */
@@ -29759,23 +30597,27 @@ var init_call_log = __esm({
29759
30597
  }
29760
30598
  }
29761
30599
  // --- Retention ---------------------------------------------------------
29762
- sweepOldDays() {
30600
+ async sweepOldDays() {
29763
30601
  if (this.root === null) return;
29764
30602
  const days = retentionDays();
29765
30603
  if (days === 0) return;
29766
30604
  const cutoff = Date.now() / 1e3 - days * 86400;
29767
30605
  const callsRoot = path4.join(this.root, "calls");
29768
- if (!fs4.existsSync(callsRoot)) return;
29769
30606
  try {
29770
- for (const yearName of fs4.readdirSync(callsRoot)) {
30607
+ await import_node_fs2.promises.access(callsRoot);
30608
+ } catch {
30609
+ return;
30610
+ }
30611
+ try {
30612
+ for (const yearName of await import_node_fs2.promises.readdir(callsRoot)) {
29771
30613
  if (!/^\d+$/.test(yearName)) continue;
29772
30614
  const yearDir = path4.join(callsRoot, yearName);
29773
- if (!fs4.statSync(yearDir).isDirectory()) continue;
29774
- for (const monthName of fs4.readdirSync(yearDir)) {
30615
+ if (!(await import_node_fs2.promises.stat(yearDir)).isDirectory()) continue;
30616
+ for (const monthName of await import_node_fs2.promises.readdir(yearDir)) {
29775
30617
  if (!/^\d+$/.test(monthName)) continue;
29776
30618
  const monthDir = path4.join(yearDir, monthName);
29777
- if (!fs4.statSync(monthDir).isDirectory()) continue;
29778
- for (const dayName of fs4.readdirSync(monthDir)) {
30619
+ if (!(await import_node_fs2.promises.stat(monthDir)).isDirectory()) continue;
30620
+ for (const dayName of await import_node_fs2.promises.readdir(monthDir)) {
29779
30621
  if (!/^\d+$/.test(dayName)) continue;
29780
30622
  const dayDir = path4.join(monthDir, dayName);
29781
30623
  const y = Number.parseInt(yearName, 10);
@@ -29783,16 +30625,16 @@ var init_call_log = __esm({
29783
30625
  const d = Number.parseInt(dayName, 10);
29784
30626
  const ts = Date.UTC(y, m - 1, d) / 1e3;
29785
30627
  if (ts < cutoff) {
29786
- rmTree(dayDir);
30628
+ await rmTreeAsync(dayDir);
29787
30629
  }
29788
30630
  }
29789
30631
  try {
29790
- if (fs4.readdirSync(monthDir).length === 0) fs4.rmdirSync(monthDir);
30632
+ if ((await import_node_fs2.promises.readdir(monthDir)).length === 0) await import_node_fs2.promises.rmdir(monthDir);
29791
30633
  } catch {
29792
30634
  }
29793
30635
  }
29794
30636
  try {
29795
- if (fs4.readdirSync(yearDir).length === 0) fs4.rmdirSync(yearDir);
30637
+ if ((await import_node_fs2.promises.readdir(yearDir)).length === 0) await import_node_fs2.promises.rmdir(yearDir);
29796
30638
  } catch {
29797
30639
  }
29798
30640
  }
@@ -29833,13 +30675,16 @@ function telnyxHangupOutcome(cause) {
29833
30675
  if (c === "call_rejected" || c === "rejected" || c === "destination_out_of_order") return "failed";
29834
30676
  return null;
29835
30677
  }
29836
- function validateWebhookUrl(url2) {
30678
+ function validateWebhookUrl(url2, allowLoopback = false) {
29837
30679
  const parsed = new URL(url2);
29838
30680
  if (!["http:", "https:"].includes(parsed.protocol)) {
29839
30681
  throw new Error(`Invalid webhook URL scheme: ${parsed.protocol}`);
29840
30682
  }
29841
30683
  const rawHost = parsed.hostname;
29842
30684
  const host = rawHost.replace(/^\[/, "").replace(/\]$/, "").toLowerCase();
30685
+ if (allowLoopback) {
30686
+ return;
30687
+ }
29843
30688
  const BLOCKED_HOSTNAMES = /* @__PURE__ */ new Set([
29844
30689
  "localhost",
29845
30690
  "ip6-localhost",
@@ -29881,6 +30726,34 @@ function validateWebhookUrl(url2) {
29881
30726
  }
29882
30727
  }
29883
30728
  }
30729
+ function extractHost(value) {
30730
+ const trimmed = value.trim();
30731
+ if (!trimmed) return "";
30732
+ let host = trimmed.replace(/^[a-z]+:\/\//i, "").replace(/\/.*$/, "");
30733
+ if (host.startsWith("[")) {
30734
+ return host.slice(1).split("]", 1)[0].toLowerCase();
30735
+ }
30736
+ if (!host.includes("::")) {
30737
+ const lastColon = host.lastIndexOf(":");
30738
+ if (lastColon !== -1 && /^\d+$/.test(host.slice(lastColon + 1))) {
30739
+ host = host.slice(0, lastColon);
30740
+ }
30741
+ }
30742
+ return host.toLowerCase();
30743
+ }
30744
+ function isLoopbackHost(value) {
30745
+ const host = extractHost(value);
30746
+ if (!host) return false;
30747
+ if (host === "localhost" || host === "ip6-localhost" || host === "ip6-loopback") {
30748
+ return true;
30749
+ }
30750
+ if (host === "::1" || host === "::ffff:127.0.0.1") return true;
30751
+ const v4 = /^(\d{1,3})\.(\d{1,3})\.(\d{1,3})\.(\d{1,3})$/.exec(host);
30752
+ if (v4) {
30753
+ return parseInt(v4[1], 10) === 127;
30754
+ }
30755
+ return false;
30756
+ }
29884
30757
  function validateTelnyxSignature(rawBody, signature, timestamp, publicKey, toleranceSec = 300) {
29885
30758
  try {
29886
30759
  const ts = parseInt(timestamp, 10);
@@ -29944,7 +30817,7 @@ function resolveVariables(template, variables) {
29944
30817
  }
29945
30818
  return result;
29946
30819
  }
29947
- function buildAIAdapter(config2, agent, resolvedPrompt) {
30820
+ function buildAIAdapter(config2, agent, resolvedPrompt, toolsOverride) {
29948
30821
  const engine = agent.engine;
29949
30822
  if (agent.provider === "elevenlabs_convai") {
29950
30823
  if (!engine || engine.kind !== "elevenlabs_convai") {
@@ -29959,12 +30832,24 @@ function buildAIAdapter(config2, agent, resolvedPrompt) {
29959
30832
  agent.firstMessage ?? ""
29960
30833
  );
29961
30834
  }
29962
- const agentTools = agent.tools?.map((t) => ({
29963
- name: t.name,
29964
- description: t.description,
29965
- parameters: t.parameters,
29966
- strict: t.strict
29967
- })) ?? [];
30835
+ const preamblesOn = Boolean(agent.toolCallPreambles);
30836
+ const agentTools = (toolsOverride ?? agent.tools)?.map((t) => {
30837
+ let description = t.description;
30838
+ const reassurance = t.reassurance;
30839
+ const sample = typeof reassurance === "string" ? reassurance : void 0;
30840
+ if (preamblesOn && sample) {
30841
+ description = `${description}
30842
+
30843
+ Preamble sample phrases:
30844
+ - ${sample}`;
30845
+ }
30846
+ return {
30847
+ name: t.name,
30848
+ description,
30849
+ parameters: t.parameters,
30850
+ strict: t.strict
30851
+ };
30852
+ }) ?? [];
29968
30853
  const tools = [...agentTools, TRANSFER_CALL_TOOL, END_CALL_TOOL];
29969
30854
  const isOpenAIEngine = engine && (engine.kind === "openai_realtime" || engine.kind === "openai_realtime_2");
29970
30855
  const openaiKey = isOpenAIEngine ? engine.apiKey : config2.openaiKey ?? "";
@@ -29976,8 +30861,27 @@ function buildAIAdapter(config2, agent, resolvedPrompt) {
29976
30861
  if (engine.inputAudioTranscriptionModel !== void 0) {
29977
30862
  adapterOptions.inputAudioTranscriptionModel = engine.inputAudioTranscriptionModel;
29978
30863
  }
30864
+ if (engine.noiseReduction !== void 0) {
30865
+ adapterOptions.noiseReduction = engine.noiseReduction;
30866
+ }
30867
+ if (engine.turnDetection !== void 0) {
30868
+ adapterOptions.turnDetection = engine.turnDetection;
30869
+ }
30870
+ if (engine.gateResponseOnTranscript !== void 0) {
30871
+ adapterOptions.gateResponseOnTranscript = engine.gateResponseOnTranscript;
30872
+ }
30873
+ }
30874
+ const agentOpts = agent;
30875
+ if (agentOpts.openaiRealtimeNoiseReduction !== void 0) {
30876
+ adapterOptions.noiseReduction = agentOpts.openaiRealtimeNoiseReduction;
29979
30877
  }
29980
- const AdapterCtor = engine && engine.kind === "openai_realtime_2" ? OpenAIRealtime2Adapter : OpenAIRealtimeAdapter;
30878
+ if (agentOpts.realtimeTurnDetection !== void 0) {
30879
+ adapterOptions.turnDetection = agentOpts.realtimeTurnDetection;
30880
+ }
30881
+ if (agentOpts.openaiRealtimeGateResponseOnTranscript !== void 0) {
30882
+ adapterOptions.gateResponseOnTranscript = agentOpts.openaiRealtimeGateResponseOnTranscript;
30883
+ }
30884
+ const AdapterCtor = OpenAIRealtime2Adapter;
29981
30885
  return new AdapterCtor(
29982
30886
  openaiKey,
29983
30887
  agent.model,
@@ -30006,7 +30910,6 @@ var init_server = __esm({
30006
30910
  import_express = __toESM(require("express"));
30007
30911
  import_http = require("http");
30008
30912
  import_ws5 = require("ws");
30009
- init_openai_realtime();
30010
30913
  init_openai_realtime_2();
30011
30914
  init_elevenlabs_convai();
30012
30915
  init_plivo_adapter();
@@ -30069,6 +30972,11 @@ var init_server = __esm({
30069
30972
  getLogger().warn(`TwilioBridge.transferCall rejected: invalid CallSid ${JSON.stringify(callId)}`);
30070
30973
  return;
30071
30974
  }
30975
+ const E164_RE = /^\+[1-9]\d{6,14}$/;
30976
+ if (!E164_RE.test(toNumber)) {
30977
+ getLogger().warn(`TwilioBridge.transferCall rejected: invalid target ${JSON.stringify(toNumber)}`);
30978
+ return;
30979
+ }
30072
30980
  const transferUrl = `https://api.twilio.com/2010-04-01/Accounts/${this.config.twilioSid}/Calls/${callId}.json`;
30073
30981
  await fetch(transferUrl, {
30074
30982
  method: "POST",
@@ -30275,7 +31183,7 @@ var init_server = __esm({
30275
31183
  };
30276
31184
  GRACEFUL_SHUTDOWN_TIMEOUT_MS = 1e4;
30277
31185
  EmbeddedServer = class {
30278
- constructor(config2, agent, onCallStart, onCallEnd, onTranscript, onMessage, recording = false, voicemailMessage = "", onMetrics, pricingOverrides, dashboard = true, dashboardToken = "") {
31186
+ constructor(config2, agent, onCallStart, onCallEnd, onTranscript, onMessage, recording = false, voicemailMessage = "", onMetrics, pricingOverrides, dashboard = true, dashboardToken = "", allowInsecureDashboard = false) {
30279
31187
  this.config = config2;
30280
31188
  this.agent = agent;
30281
31189
  this.onCallStart = onCallStart;
@@ -30287,6 +31195,7 @@ var init_server = __esm({
30287
31195
  this.onMetrics = onMetrics;
30288
31196
  this.dashboard = dashboard;
30289
31197
  this.dashboardToken = dashboardToken;
31198
+ this.allowInsecureDashboard = allowInsecureDashboard;
30290
31199
  this.metricsStore = new MetricsStore();
30291
31200
  this.pricing = mergePricing(pricingOverrides);
30292
31201
  const logRoot = config2.persistRoot === void 0 ? resolveLogRoot() : config2.persistRoot;
@@ -30313,8 +31222,31 @@ var init_server = __esm({
30313
31222
  onMetrics;
30314
31223
  dashboard;
30315
31224
  dashboardToken;
31225
+ allowInsecureDashboard;
30316
31226
  server = null;
30317
31227
  wss = null;
31228
+ /**
31229
+ * Whether the dashboard + ``/api/*`` routes were mounted in ``start()``.
31230
+ * The dashboard is now ALWAYS mounted when enabled (it never 404s): an
31231
+ * exposed, token-less bind is protected with an auto-generated token
31232
+ * rather than refused. This flag is therefore ``true`` whenever the
31233
+ * dashboard is enabled — kept so the startup banner can gate on it.
31234
+ */
31235
+ dashboardMounted = false;
31236
+ /**
31237
+ * The token actually in effect for the dashboard + ``/api/*`` routes,
31238
+ * resolved in ``start()``. One of: the explicit ``dashboardToken`` if set;
31239
+ * a freshly generated UUID when the bind is exposed and
31240
+ * ``allowInsecureDashboard`` is ``false``; or ``''`` (OPEN) for loopback
31241
+ * local dev and for an exposed bind with ``allowInsecureDashboard=true``.
31242
+ * Read by the startup banner (to print the ready URL with ``?token=``) and
31243
+ * by authentic tests (to authenticate).
31244
+ */
31245
+ effectiveDashboardToken = "";
31246
+ /** The token in effect for the dashboard, resolved at ``start()``. Empty string = served OPEN. */
31247
+ get resolvedDashboardToken() {
31248
+ return this.effectiveDashboardToken;
31249
+ }
30318
31250
  twilioTokenWarningLogged = false;
30319
31251
  telnyxSigWarningLogged = false;
30320
31252
  metricsStore;
@@ -30332,12 +31264,14 @@ var init_server = __esm({
30332
31264
  activeConnections = /* @__PURE__ */ new Set();
30333
31265
  activeCallIds = /* @__PURE__ */ new Map();
30334
31266
  /**
30335
- * Per-call AMD result callback set by ``Patter.call()`` for the most
30336
- * recent outbound call. Public so ``client.ts`` can populate it after
30337
- * server start. Cleared after firing once per call to avoid leaking
30338
- * across calls.
31267
+ * Per-call AMD result callbacks keyed by CallSid / call_control_id.
31268
+ * Public so ``client.ts`` can register a callback per outbound call.
31269
+ * The Map slot is deleted after the callback fires once preventing
31270
+ * cross-call misfires when multiple concurrent outbound calls are in
31271
+ * flight (single-slot was a race condition: the last registered callback
31272
+ * would win for every in-flight AMD result).
30339
31273
  */
30340
- onMachineDetection;
31274
+ onMachineDetectionByCallSid = /* @__PURE__ */ new Map();
30341
31275
  /**
30342
31276
  * Pre-warm first-message audio accessor wired by ``Patter.serve()``.
30343
31277
  * The per-call StreamHandler invokes this with its ``callId`` at the
@@ -30458,6 +31392,42 @@ var init_server = __esm({
30458
31392
  this.completions.clear();
30459
31393
  this.amdClass.clear();
30460
31394
  }
31395
+ /**
31396
+ * Decide whether this server is reachable beyond loopback (127.0.0.1).
31397
+ *
31398
+ * The dashboard serves call transcripts and metadata (PII), so before
31399
+ * mounting it unauthenticated we must know whether anyone off-host can
31400
+ * reach the port. Signals (in order):
31401
+ *
31402
+ * (a)+(b) — a public webhook URL. ``client.ts`` resolves
31403
+ * ``config.webhookUrl`` to the live hostname for every serve path:
31404
+ * a cloudflared quick-tunnel host, a {@link StaticTunnel} hostname,
31405
+ * or an explicit ``webhookUrl``. A tunnel directive (signal a) and a
31406
+ * public webhook URL (signal b) therefore both surface here as a
31407
+ * non-loopback, non-private webhook host. This is the case that
31408
+ * matters for tunnels — the whole port (dashboard included) is
31409
+ * published on a public ``*.trycloudflare.com`` URL.
31410
+ *
31411
+ * (c) — an EXPLICIT non-loopback bind override via ``PATTER_BIND_HOST``.
31412
+ * Node's ``http.Server.listen(port, host)`` defaults to 127.0.0.1
31413
+ * here (see ``start()``), so plain local dev is never flagged; only
31414
+ * an operator who set ``PATTER_BIND_HOST`` to e.g. ``0.0.0.0`` is.
31415
+ *
31416
+ * Only loopback webhook hosts (127.0.0.0/8, localhost, ::1) are treated as
31417
+ * not-exposed. RFC1918 / LAN hosts ARE exposure — they are reachable by
31418
+ * other machines on the network — matching the Python SDK's gate.
31419
+ */
31420
+ isExposed() {
31421
+ const bindOverride = process.env.PATTER_BIND_HOST;
31422
+ if (bindOverride && !isLoopbackHost(bindOverride)) {
31423
+ return true;
31424
+ }
31425
+ const host = extractHost(this.config.webhookUrl ?? "");
31426
+ if (host && !isLoopbackHost(host)) {
31427
+ return true;
31428
+ }
31429
+ return false;
31430
+ }
30461
31431
  /** Bind HTTP + WebSocket listeners on `port`, mount carrier webhooks and dashboard routes. */
30462
31432
  async start(port = 8e3) {
30463
31433
  const webhookUrlPattern = /^[a-zA-Z0-9][a-zA-Z0-9.\-]+[a-zA-Z0-9]$/;
@@ -30493,6 +31463,9 @@ var init_server = __esm({
30493
31463
  }
30494
31464
  next();
30495
31465
  });
31466
+ req.on("error", (err) => {
31467
+ next(err);
31468
+ });
30496
31469
  } else {
30497
31470
  next();
30498
31471
  }
@@ -30503,8 +31476,25 @@ var init_server = __esm({
30503
31476
  res.json({ status: "ok", mode: "local" });
30504
31477
  });
30505
31478
  if (this.dashboard) {
30506
- mountDashboard(app, this.metricsStore, this.dashboardToken);
30507
- mountApi(app, this.metricsStore, this.dashboardToken);
31479
+ const exposed = this.isExposed();
31480
+ if (this.dashboardToken) {
31481
+ this.effectiveDashboardToken = this.dashboardToken;
31482
+ } else if (exposed && !this.allowInsecureDashboard) {
31483
+ this.effectiveDashboardToken = import_node_crypto4.default.randomUUID();
31484
+ getLogger().warn(
31485
+ `Dashboard is reachable beyond 127.0.0.1 without a configured token; protecting it with an auto-generated token. Open: http://127.0.0.1:${port}/?token=${this.effectiveDashboardToken} Set dashboardToken for a stable token, or allowInsecureDashboard=true to serve it open.`
31486
+ );
31487
+ } else if (exposed && this.allowInsecureDashboard) {
31488
+ this.effectiveDashboardToken = "";
31489
+ getLogger().warn(
31490
+ "Dashboard served WITHOUT authentication on a publicly-reachable bind (allowInsecureDashboard=true). Call transcripts and metadata are exposed to anyone who can reach this URL."
31491
+ );
31492
+ } else {
31493
+ this.effectiveDashboardToken = "";
31494
+ }
31495
+ mountDashboard(app, this.metricsStore, this.effectiveDashboardToken);
31496
+ mountApi(app, this.metricsStore, this.effectiveDashboardToken);
31497
+ this.dashboardMounted = true;
30508
31498
  }
30509
31499
  app.post("/webhooks/twilio/status", (req, res) => {
30510
31500
  if (this.config.twilioToken) {
@@ -30590,8 +31580,9 @@ var init_server = __esm({
30590
31580
  if (callSid) {
30591
31581
  this.amdClass.set(callSid, classifyTwilioAmd(answeredBy));
30592
31582
  }
30593
- const cb = this.onMachineDetection;
31583
+ const cb = callSid ? this.onMachineDetectionByCallSid.get(callSid) : void 0;
30594
31584
  if (cb && callSid) {
31585
+ this.onMachineDetectionByCallSid.delete(callSid);
30595
31586
  try {
30596
31587
  await cb({
30597
31588
  call_id: callSid,
@@ -30718,8 +31709,9 @@ var init_server = __esm({
30718
31709
  if (amdCallId) {
30719
31710
  this.amdClass.set(amdCallId, classifyTelnyxAmd(amdResult));
30720
31711
  }
30721
- const cbTx = this.onMachineDetection;
31712
+ const cbTx = amdCallId ? this.onMachineDetectionByCallSid.get(amdCallId) : void 0;
30722
31713
  if (cbTx && amdCallId) {
31714
+ this.onMachineDetectionByCallSid.delete(amdCallId);
30723
31715
  try {
30724
31716
  await cbTx({
30725
31717
  call_id: amdCallId,
@@ -30887,8 +31879,13 @@ var init_server = __esm({
30887
31879
  getLogger().info(`AMD result for ${sanitizeLogValue(callUuid)}: ${sanitizeLogValue(amdRaw)}`);
30888
31880
  const classification = classifyPlivoAmd(amdRaw);
30889
31881
  if (callUuid) this.amdClass.set(callUuid, classification);
30890
- const cb = this.onMachineDetection;
31882
+ let cbKey = callUuid && this.onMachineDetectionByCallSid.has(callUuid) ? callUuid : void 0;
31883
+ if (cbKey === void 0 && this.onMachineDetectionByCallSid.size === 1) {
31884
+ cbKey = this.onMachineDetectionByCallSid.keys().next().value;
31885
+ }
31886
+ const cb = cbKey !== void 0 ? this.onMachineDetectionByCallSid.get(cbKey) : void 0;
30891
31887
  if (cb && callUuid) {
31888
+ if (cbKey !== void 0) this.onMachineDetectionByCallSid.delete(cbKey);
30892
31889
  try {
30893
31890
  await cb({
30894
31891
  call_id: callUuid,
@@ -30969,27 +31966,34 @@ var init_server = __esm({
30969
31966
  this.handleTwilioStream(ws, url2);
30970
31967
  }
30971
31968
  });
30972
- await new Promise((resolve2) => {
31969
+ await new Promise((resolve2, reject) => {
30973
31970
  const bindHost = process.env.PATTER_BIND_HOST ?? "127.0.0.1";
31971
+ this.server.once("error", reject);
30974
31972
  this.server.listen(port, bindHost, () => {
31973
+ this.server.off("error", reject);
30975
31974
  getLogger().info(`Server on port ${port}`);
30976
31975
  getLogger().info(`Webhook: https://${this.config.webhookUrl}`);
30977
31976
  getLogger().info(`Phone: ${this.config.phoneNumber}`);
30978
31977
  const model = this.agent.model ?? "";
30979
- if (model && model !== "gpt-4o-mini-realtime-preview" && model.includes("realtime")) {
31978
+ const calibrated = ["gpt-realtime-mini", "gpt-4o-mini-realtime-preview"];
31979
+ if (model && !calibrated.includes(model) && model.includes("realtime")) {
30980
31980
  getLogger().warn(
30981
- `Agent uses "${sanitizeLogValue(model)}" but DEFAULT_PRICING.openai_realtime is calibrated for "gpt-4o-mini-realtime-preview". Pass Patter({ pricing: { openai_realtime: {...} } }) to set rates for this model, otherwise the dashboard cost display will under-report.`
31981
+ `Agent uses "${sanitizeLogValue(model)}" but DEFAULT_PRICING.openai_realtime is calibrated for the default Realtime models (gpt-realtime-mini / gpt-4o-mini-realtime-preview). Pass Patter({ pricing: { openai_realtime: {...} } }) to set rates for this model, otherwise the dashboard cost display will under-report.`
30982
31982
  );
30983
31983
  }
30984
- if (this.dashboard) {
30985
- console.log("\n\u2500\u2500\u2500\u2500 Dashboard \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500");
30986
- getLogger().info(`URL: http://127.0.0.1:${port}/`);
30987
- if (!this.dashboardToken) {
31984
+ if (this.dashboard && this.dashboardMounted) {
31985
+ getLogger().info("\u2500\u2500\u2500\u2500 Dashboard \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500");
31986
+ if (this.effectiveDashboardToken) {
31987
+ getLogger().info(
31988
+ `URL: http://127.0.0.1:${port}/?token=${this.effectiveDashboardToken}`
31989
+ );
31990
+ } else {
31991
+ getLogger().info(`URL: http://127.0.0.1:${port}/`);
30988
31992
  getLogger().warn(
30989
31993
  "Dashboard is enabled without authentication. Set dashboardToken to protect call data. This is safe for local development but should not be exposed on a public network."
30990
31994
  );
30991
31995
  }
30992
- console.log("\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n");
31996
+ getLogger().info("\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500");
30993
31997
  }
30994
31998
  resolve2();
30995
31999
  });
@@ -31065,7 +32069,7 @@ var init_server = __esm({
31065
32069
  onMessage: this.onMessage,
31066
32070
  onMetrics: wrappedMetrics,
31067
32071
  recording: this.recording,
31068
- buildAIAdapter: (resolvedPrompt) => buildAIAdapter(this.config, this.agent, resolvedPrompt),
32072
+ buildAIAdapter: (resolvedPrompt, toolsOverride) => buildAIAdapter(this.config, this.agent, resolvedPrompt, toolsOverride),
31069
32073
  sanitizeVariables,
31070
32074
  resolveVariables,
31071
32075
  popPrewarmAudio: this.popPrewarmAudio,
@@ -31339,17 +32343,18 @@ var init_server = __esm({
31339
32343
  }
31340
32344
  if (this.activeConnections.size > 0) {
31341
32345
  getLogger().info(`Waiting for ${this.activeConnections.size} active connection(s) to close...`);
31342
- await Promise.race([
31343
- new Promise((resolve2) => {
31344
- const checkInterval = setInterval(() => {
31345
- if (this.activeConnections.size === 0) {
31346
- clearInterval(checkInterval);
31347
- resolve2();
31348
- }
31349
- }, 100);
31350
- }),
31351
- new Promise((resolve2) => setTimeout(resolve2, GRACEFUL_SHUTDOWN_TIMEOUT_MS))
31352
- ]);
32346
+ let checkInterval;
32347
+ const drainPromise = new Promise((resolve2) => {
32348
+ checkInterval = setInterval(() => {
32349
+ if (this.activeConnections.size === 0) {
32350
+ clearInterval(checkInterval);
32351
+ resolve2();
32352
+ }
32353
+ }, 100);
32354
+ });
32355
+ const timeoutPromise = new Promise((resolve2) => setTimeout(resolve2, GRACEFUL_SHUTDOWN_TIMEOUT_MS));
32356
+ await Promise.race([drainPromise, timeoutPromise]);
32357
+ clearInterval(checkInterval);
31353
32358
  }
31354
32359
  if (this.activeConnections.size > 0) {
31355
32360
  getLogger().info(`Force-closing ${this.activeConnections.size} remaining connection(s)`);
@@ -31478,6 +32483,9 @@ __export(carrier_config_exports, {
31478
32483
  configureTelnyxNumber: () => configureTelnyxNumber,
31479
32484
  configureTwilioNumber: () => configureTwilioNumber
31480
32485
  });
32486
+ function redactPhone2(n) {
32487
+ return n.slice(0, 3) + "***" + n.slice(-4);
32488
+ }
31481
32489
  async function configureTwilioNumber(accountSid, authToken, phoneNumber, voiceUrl) {
31482
32490
  const auth2 = `Basic ${Buffer.from(`${accountSid}:${authToken}`).toString("base64")}`;
31483
32491
  const listUrl = `${TWILIO_API_BASE}/Accounts/${accountSid}/IncomingPhoneNumbers.json?PhoneNumber=${encodeURIComponent(phoneNumber)}`;
@@ -31493,7 +32501,7 @@ async function configureTwilioNumber(accountSid, authToken, phoneNumber, voiceUr
31493
32501
  const body = await listResp.json();
31494
32502
  const match = body.incoming_phone_numbers?.[0];
31495
32503
  if (!match) {
31496
- throw new Error(`Twilio number ${phoneNumber} not found on account ${accountSid}`);
32504
+ throw new Error(`Twilio number ${redactPhone2(phoneNumber)} not found on account ${accountSid}`);
31497
32505
  }
31498
32506
  const updateUrl = `${TWILIO_API_BASE}/Accounts/${accountSid}/IncomingPhoneNumbers/${match.sid}.json`;
31499
32507
  const form = new URLSearchParams({ VoiceUrl: voiceUrl, VoiceMethod: "POST" });
@@ -31512,17 +32520,20 @@ async function configureTwilioNumber(accountSid, authToken, phoneNumber, voiceUr
31512
32520
  }
31513
32521
  }
31514
32522
  async function configureTelnyxNumber(apiKey, connectionId, phoneNumber) {
31515
- const resp = await fetch(`${TELNYX_API_BASE}/phone_numbers/${encodeURIComponent(phoneNumber)}`, {
31516
- method: "PATCH",
31517
- headers: {
31518
- Authorization: `Bearer ${apiKey}`,
31519
- "Content-Type": "application/json"
31520
- },
31521
- body: JSON.stringify({ connection_id: connectionId })
31522
- });
32523
+ const resp = await fetch(
32524
+ `${TELNYX_API_BASE}/phone_numbers/${encodeURIComponent(phoneNumber)}/voice`,
32525
+ {
32526
+ method: "PATCH",
32527
+ headers: {
32528
+ Authorization: `Bearer ${apiKey}`,
32529
+ "Content-Type": "application/json"
32530
+ },
32531
+ body: JSON.stringify({ connection_id: connectionId, tech_prefix_enabled: false })
32532
+ }
32533
+ );
31523
32534
  if (!resp.ok) {
31524
32535
  throw new Error(
31525
- `Telnyx PATCH /phone_numbers/${phoneNumber} failed: ${resp.status} ${await resp.text()}`
32536
+ `Telnyx PATCH /phone_numbers/${redactPhone2(phoneNumber)}/voice failed: ${resp.status} ${await resp.text()}`
31526
32537
  );
31527
32538
  }
31528
32539
  }
@@ -31572,7 +32583,7 @@ async function autoConfigureCarrier(params) {
31572
32583
  if (provider2 === "telnyx" && params.telnyxKey && params.telnyxConnectionId) {
31573
32584
  try {
31574
32585
  await configureTelnyxNumber(params.telnyxKey, params.telnyxConnectionId, params.phoneNumber);
31575
- log3.info("Telnyx number %s associated with connection %s", params.phoneNumber, params.telnyxConnectionId);
32586
+ log3.info("Telnyx number ***%s associated with connection %s", params.phoneNumber.slice(-4), params.telnyxConnectionId);
31576
32587
  } catch (err) {
31577
32588
  log3.warn("Could not auto-configure Telnyx number: %s", err instanceof Error ? err.message : String(err));
31578
32589
  }
@@ -31722,12 +32733,12 @@ var init_test_mode = __esm({
31722
32733
  }
31723
32734
  continue;
31724
32735
  }
31725
- conversationHistory.push({
31726
- role: "user",
31727
- text: userInput,
31728
- timestamp: Date.now()
31729
- });
31730
32736
  if (onMessage) {
32737
+ conversationHistory.push({
32738
+ role: "user",
32739
+ text: userInput,
32740
+ timestamp: Date.now()
32741
+ });
31731
32742
  try {
31732
32743
  const responseText = await onMessage({
31733
32744
  text: userInput,
@@ -31757,6 +32768,11 @@ var init_test_mode = __esm({
31757
32768
  }
31758
32769
  log3.info("");
31759
32770
  const responseText = parts.join("");
32771
+ conversationHistory.push({
32772
+ role: "user",
32773
+ text: userInput,
32774
+ timestamp: Date.now()
32775
+ });
31760
32776
  if (responseText) {
31761
32777
  conversationHistory.push({
31762
32778
  role: "assistant",
@@ -33216,6 +34232,7 @@ __export(index_exports, {
33216
34232
  PRICING_VERSION: () => PRICING_VERSION,
33217
34233
  PartialStreamError: () => PartialStreamError,
33218
34234
  Patter: () => Patter,
34235
+ PatterConfigError: () => PatterConfigError,
33219
34236
  PatterConnectionError: () => PatterConnectionError,
33220
34237
  PatterError: () => PatterError,
33221
34238
  PatterTool: () => PatterTool,
@@ -33303,6 +34320,8 @@ __export(index_exports, {
33303
34320
  mulawToPcm16: () => mulawToPcm16,
33304
34321
  notifyDashboard: () => notifyDashboard,
33305
34322
  openaiTts: () => openaiTts,
34323
+ openclawConsult: () => openclawConsult,
34324
+ openclawPostCallNotifier: () => openclawPostCallNotifier,
33306
34325
  pcm16ToMulaw: () => pcm16ToMulaw,
33307
34326
  resample16kTo8k: () => resample16kTo8k,
33308
34327
  resample24kTo16k: () => resample24kTo16k,
@@ -33333,6 +34352,7 @@ init_server();
33333
34352
 
33334
34353
  // src/engines/openai.ts
33335
34354
  init_cjs_shims();
34355
+ init_openai_realtime();
33336
34356
  var Realtime = class {
33337
34357
  kind = "openai_realtime";
33338
34358
  apiKey;
@@ -33340,6 +34360,9 @@ var Realtime = class {
33340
34360
  voice;
33341
34361
  reasoningEffort;
33342
34362
  inputAudioTranscriptionModel;
34363
+ noiseReduction;
34364
+ turnDetection;
34365
+ gateResponseOnTranscript;
33343
34366
  constructor(opts = {}) {
33344
34367
  const key = opts.apiKey ?? process.env.OPENAI_API_KEY;
33345
34368
  if (!key) {
@@ -33347,16 +34370,26 @@ var Realtime = class {
33347
34370
  "OpenAI Realtime requires an apiKey. Pass { apiKey: 'sk-...' } or set OPENAI_API_KEY in the environment."
33348
34371
  );
33349
34372
  }
34373
+ if (opts.noiseReduction !== void 0 && opts.noiseReduction !== "near_field" && opts.noiseReduction !== "far_field") {
34374
+ throw new Error(
34375
+ `noiseReduction must be 'near_field' or 'far_field', got ${JSON.stringify(opts.noiseReduction)}`
34376
+ );
34377
+ }
34378
+ validateRealtimeTurnDetection(opts.turnDetection);
33350
34379
  this.apiKey = key;
33351
34380
  this.model = opts.model ?? "gpt-realtime-mini";
33352
34381
  this.voice = opts.voice ?? "alloy";
33353
34382
  this.reasoningEffort = opts.reasoningEffort;
33354
34383
  this.inputAudioTranscriptionModel = opts.inputAudioTranscriptionModel;
34384
+ this.noiseReduction = opts.noiseReduction;
34385
+ this.turnDetection = opts.turnDetection;
34386
+ this.gateResponseOnTranscript = opts.gateResponseOnTranscript;
33355
34387
  }
33356
34388
  };
33357
34389
 
33358
34390
  // src/engines/openai-2.ts
33359
34391
  init_cjs_shims();
34392
+ init_openai_realtime();
33360
34393
  var Realtime2 = class {
33361
34394
  kind = "openai_realtime_2";
33362
34395
  apiKey;
@@ -33364,6 +34397,9 @@ var Realtime2 = class {
33364
34397
  voice;
33365
34398
  reasoningEffort;
33366
34399
  inputAudioTranscriptionModel;
34400
+ noiseReduction;
34401
+ turnDetection;
34402
+ gateResponseOnTranscript;
33367
34403
  constructor(opts = {}) {
33368
34404
  const key = opts.apiKey ?? process.env.OPENAI_API_KEY;
33369
34405
  if (!key) {
@@ -33371,11 +34407,20 @@ var Realtime2 = class {
33371
34407
  "OpenAI Realtime 2 requires an apiKey. Pass { apiKey: 'sk-...' } or set OPENAI_API_KEY in the environment."
33372
34408
  );
33373
34409
  }
34410
+ if (opts.noiseReduction !== void 0 && opts.noiseReduction !== "near_field" && opts.noiseReduction !== "far_field") {
34411
+ throw new Error(
34412
+ `noiseReduction must be 'near_field' or 'far_field', got ${JSON.stringify(opts.noiseReduction)}`
34413
+ );
34414
+ }
34415
+ validateRealtimeTurnDetection(opts.turnDetection);
33374
34416
  this.apiKey = key;
33375
34417
  this.model = opts.model ?? "gpt-realtime-2";
33376
34418
  this.voice = opts.voice ?? "alloy";
33377
34419
  this.reasoningEffort = opts.reasoningEffort;
33378
34420
  this.inputAudioTranscriptionModel = opts.inputAudioTranscriptionModel;
34421
+ this.noiseReduction = opts.noiseReduction;
34422
+ this.turnDetection = opts.turnDetection;
34423
+ this.gateResponseOnTranscript = opts.gateResponseOnTranscript;
33379
34424
  }
33380
34425
  };
33381
34426
 
@@ -33809,7 +34854,7 @@ function resolvePersistRoot(persist) {
33809
34854
  if (typeof persist === "string") return resolveLogRoot(persist);
33810
34855
  const envRoot = resolveLogRoot();
33811
34856
  if (envRoot !== null) return envRoot;
33812
- return resolveLogRoot("auto");
34857
+ return null;
33813
34858
  }
33814
34859
  function closeParkedConnections(slot) {
33815
34860
  if (slot.stt) {
@@ -34093,7 +35138,12 @@ var Patter = class {
34093
35138
  ...working,
34094
35139
  provider: "openai_realtime",
34095
35140
  model: working.model ?? engine.model,
34096
- voice: working.voice ?? engine.voice
35141
+ voice: working.voice ?? engine.voice,
35142
+ // Explicit agent() kwargs win over the engine marker value
35143
+ // (same precedence as Python: explicit kwarg > engine > default).
35144
+ openaiRealtimeNoiseReduction: working.openaiRealtimeNoiseReduction ?? engine.noiseReduction,
35145
+ realtimeTurnDetection: working.realtimeTurnDetection ?? engine.turnDetection,
35146
+ openaiRealtimeGateResponseOnTranscript: working.openaiRealtimeGateResponseOnTranscript ?? engine.gateResponseOnTranscript
34097
35147
  };
34098
35148
  if (!this.localConfig.openaiKey) {
34099
35149
  this.localConfig = { ...this.localConfig, openaiKey: engine.apiKey };
@@ -34118,6 +35168,11 @@ var Patter = class {
34118
35168
  throw new Error(`provider must be one of: ${valid.join(", ")}. Got: '${working.provider}'`);
34119
35169
  }
34120
35170
  }
35171
+ if (working.consult && working.provider === "elevenlabs_convai") {
35172
+ getLogger().warn(
35173
+ "consult is set but provider is ElevenLabs ConvAI; the consult tool is only injected in Realtime and Pipeline modes and will be ignored for this agent."
35174
+ );
35175
+ }
34121
35176
  if (working.llm !== void 0) {
34122
35177
  const llm = working.llm;
34123
35178
  if (!llm || typeof llm.stream !== "function") {
@@ -34256,7 +35311,8 @@ var Patter = class {
34256
35311
  opts.onMetrics,
34257
35312
  opts.pricing,
34258
35313
  opts.dashboard ?? true,
34259
- opts.dashboardToken ?? ""
35314
+ opts.dashboardToken ?? "",
35315
+ opts.allowInsecureDashboard ?? false
34260
35316
  );
34261
35317
  this.embeddedServer.popPrewarmAudio = this.popPrewarmAudio;
34262
35318
  this.embeddedServer.popPrewarmedConnections = this.popPrewarmedConnections;
@@ -34668,8 +35724,8 @@ var Patter = class {
34668
35724
  if (!options.to) {
34669
35725
  throw new Error("'to' phone number is required");
34670
35726
  }
34671
- if (!options.to.startsWith("+")) {
34672
- throw new Error(`'to' must be in E.164 format (e.g., '+1234567890'). Got: '${options.to}'`);
35727
+ if (!/^\+[1-9]\d{6,14}$/.test(options.to)) {
35728
+ throw new Error("'to' must be E.164 format (+<country><digits>). Got value with invalid format.");
34673
35729
  }
34674
35730
  if (options.wait && !this.embeddedServer) {
34675
35731
  throw new PatterConnectionError(
@@ -34680,9 +35736,6 @@ var Patter = class {
34680
35736
  let callId = "";
34681
35737
  const effectiveRingTimeout = options.ringTimeout === void 0 ? 25 : options.ringTimeout;
34682
35738
  const wantsAmd = options.machineDetection !== false || Boolean(options.voicemailMessage);
34683
- if (this.embeddedServer) {
34684
- this.embeddedServer.onMachineDetection = options.onMachineDetection;
34685
- }
34686
35739
  if (options.agent.prewarm !== false) {
34687
35740
  this.spawnProviderWarmup(options.agent);
34688
35741
  }
@@ -34727,6 +35780,12 @@ var Patter = class {
34727
35780
  };
34728
35781
  if (this.embeddedServer) {
34729
35782
  this.embeddedServer.metricsStore.recordCallInitiated(initiatedPayload);
35783
+ if (options.onMachineDetection) {
35784
+ this.embeddedServer.onMachineDetectionByCallSid.set(
35785
+ telnyxCallId,
35786
+ options.onMachineDetection
35787
+ );
35788
+ }
34730
35789
  }
34731
35790
  try {
34732
35791
  const { notifyDashboard: notifyDashboard2 } = await Promise.resolve().then(() => (init_persistence(), persistence_exports));
@@ -34792,6 +35851,12 @@ var Patter = class {
34792
35851
  };
34793
35852
  if (this.embeddedServer) {
34794
35853
  this.embeddedServer.metricsStore.recordCallInitiated(initiatedPayload);
35854
+ if (options.onMachineDetection) {
35855
+ this.embeddedServer.onMachineDetectionByCallSid.set(
35856
+ plivoCallId,
35857
+ options.onMachineDetection
35858
+ );
35859
+ }
34795
35860
  }
34796
35861
  try {
34797
35862
  const { notifyDashboard: notifyDashboard2 } = await Promise.resolve().then(() => (init_persistence(), persistence_exports));
@@ -34861,6 +35926,12 @@ var Patter = class {
34861
35926
  };
34862
35927
  if (this.embeddedServer) {
34863
35928
  this.embeddedServer.metricsStore.recordCallInitiated(initiatedPayload);
35929
+ if (options.onMachineDetection) {
35930
+ this.embeddedServer.onMachineDetectionByCallSid.set(
35931
+ twilioCallSid,
35932
+ options.onMachineDetection
35933
+ );
35934
+ }
34864
35935
  if (twilioNotificationsPath) {
34865
35936
  getLogger().info(
34866
35937
  `Outbound call ${twilioCallSid} placed. Twilio notifications: https://api.twilio.com${twilioNotificationsPath} (check here if the call drops with no audio).`
@@ -35144,6 +36215,7 @@ function defineTool(input) {
35144
36215
  }
35145
36216
 
35146
36217
  // src/index.ts
36218
+ init_consult();
35147
36219
  init_logger();
35148
36220
  init_sentence_chunker();
35149
36221
  init_pipeline_hooks();
@@ -35361,8 +36433,8 @@ var FallbackLLMProvider = class {
35361
36433
  * markers are filtered out so callers can concatenate the yielded strings
35362
36434
  * directly.
35363
36435
  */
35364
- async *completeStream(messages, tools) {
35365
- for await (const chunk of this.stream(messages, tools)) {
36436
+ async *completeStream(messages, tools, opts) {
36437
+ for await (const chunk of this.stream(messages, tools, opts)) {
35366
36438
  if (chunk.type === "text") {
35367
36439
  yield chunk.content ?? "";
35368
36440
  }
@@ -35372,14 +36444,15 @@ var FallbackLLMProvider = class {
35372
36444
  // LLMProvider implementation
35373
36445
  // -----------------------------------------------------------------------
35374
36446
  /** Streaming entry point — yields chunks from the first provider that succeeds. */
35375
- async *stream(messages, tools) {
36447
+ async *stream(messages, tools, opts) {
35376
36448
  const errors = [];
35377
36449
  const result = yield* this.tryProviders(
35378
36450
  messages,
35379
36451
  tools,
35380
36452
  /* availableOnly */
35381
36453
  true,
35382
- errors
36454
+ errors,
36455
+ opts
35383
36456
  );
35384
36457
  if (result === "done") return;
35385
36458
  getLogger().warn(
@@ -35390,7 +36463,8 @@ var FallbackLLMProvider = class {
35390
36463
  tools,
35391
36464
  /* availableOnly */
35392
36465
  false,
35393
- errors
36466
+ errors,
36467
+ opts
35394
36468
  );
35395
36469
  if (retryResult === "done") return;
35396
36470
  throw new AllProvidersFailedError(
@@ -35400,7 +36474,7 @@ var FallbackLLMProvider = class {
35400
36474
  // -----------------------------------------------------------------------
35401
36475
  // Internals
35402
36476
  // -----------------------------------------------------------------------
35403
- async *tryProviders(messages, tools, availableOnly, errors) {
36477
+ async *tryProviders(messages, tools, availableOnly, errors, opts) {
35404
36478
  for (let i = 0; i < this.providers.length; i++) {
35405
36479
  if (availableOnly && !this.availability[i]) continue;
35406
36480
  for (let attempt = 0; attempt < this.maxRetryPerProvider; attempt++) {
@@ -35409,7 +36483,7 @@ var FallbackLLMProvider = class {
35409
36483
  `FallbackLLMProvider: trying provider ${i}${attempt > 0 ? ` (retry ${attempt})` : ""}`
35410
36484
  );
35411
36485
  let yieldedTokens = false;
35412
- const gen = this.providers[i].stream(messages, tools);
36486
+ const gen = this.providers[i].stream(messages, tools, opts);
35413
36487
  while (true) {
35414
36488
  let iterResult;
35415
36489
  try {
@@ -35523,7 +36597,7 @@ var PARAMETERS_SCHEMA = {
35523
36597
  required: ["to"]
35524
36598
  };
35525
36599
  var DEFAULT_NAME = "make_phone_call";
35526
- var DEFAULT_DESCRIPTION = "Place a real outbound phone call. Returns a JSON object with the full transcript, call status, duration in seconds, and cost. Use this when the user asks you to call someone, schedule appointments by phone, or otherwise reach a human via voice.";
36600
+ var DEFAULT_DESCRIPTION2 = "Place a real outbound phone call. Returns a JSON object with the full transcript, call status, duration in seconds, and cost. Use this when the user asks you to call someone, schedule appointments by phone, or otherwise reach a human via voice.";
35527
36601
  var PatterTool = class {
35528
36602
  name;
35529
36603
  description;
@@ -35532,6 +36606,11 @@ var PatterTool = class {
35532
36606
  maxDurationSec;
35533
36607
  recording;
35534
36608
  started = false;
36609
+ /** Cached in-progress (or completed) start promise so concurrent execute()
36610
+ * callers all await the same boot sequence instead of each racing into
36611
+ * phone.serve(). Reset to null on failure so callers can retry after a
36612
+ * transient error. */
36613
+ startPromise = null;
35535
36614
  constructor(opts) {
35536
36615
  if (!opts.phone) {
35537
36616
  throw new Error("PatterTool: `phone` (a Patter instance) is required.");
@@ -35539,7 +36618,7 @@ var PatterTool = class {
35539
36618
  this.phone = opts.phone;
35540
36619
  this.agent = opts.agent;
35541
36620
  this.name = opts.name ?? DEFAULT_NAME;
35542
- this.description = opts.description ?? DEFAULT_DESCRIPTION;
36621
+ this.description = opts.description ?? DEFAULT_DESCRIPTION2;
35543
36622
  this.maxDurationSec = Math.max(5, Math.min(1800, opts.maxDurationSec ?? 180));
35544
36623
  this.recording = opts.recording ?? false;
35545
36624
  }
@@ -35583,8 +36662,21 @@ var PatterTool = class {
35583
36662
  * `serve()` provides here. No `onCallEnd` callback is wired: the SDK's own
35584
36663
  * per-callId completion registry resolves the result, so the user's
35585
36664
  * `onCallEnd` slot is left free.
36665
+ *
36666
+ * Idempotent and concurrency-safe: concurrent callers all await the same
36667
+ * in-progress boot instead of each racing into `phone.serve()`.
35586
36668
  */
35587
36669
  async start() {
36670
+ if (this.startPromise) return this.startPromise;
36671
+ this.startPromise = this._doStart();
36672
+ try {
36673
+ await this.startPromise;
36674
+ } catch (err) {
36675
+ this.startPromise = null;
36676
+ throw err;
36677
+ }
36678
+ }
36679
+ async _doStart() {
35588
36680
  if (this.started) return;
35589
36681
  if (!this.agent) {
35590
36682
  throw new Error(
@@ -35610,6 +36702,7 @@ var PatterTool = class {
35610
36702
  }
35611
36703
  }
35612
36704
  this.started = false;
36705
+ this.startPromise = null;
35613
36706
  }
35614
36707
  // --- Execution ----------------------------------------------------------
35615
36708
  /**
@@ -35981,7 +37074,8 @@ var UltravoxRealtimeAdapter = class {
35981
37074
  "X-API-Key": this.apiKey,
35982
37075
  "Content-Type": "application/json"
35983
37076
  },
35984
- body: JSON.stringify(body)
37077
+ body: JSON.stringify(body),
37078
+ signal: AbortSignal.timeout(15e3)
35985
37079
  });
35986
37080
  if (!resp.ok) {
35987
37081
  const text = await resp.text().catch(() => "");
@@ -35992,12 +37086,36 @@ var UltravoxRealtimeAdapter = class {
35992
37086
  this.ws = new import_ws6.default(call.joinUrl);
35993
37087
  await new Promise((resolve2, reject) => {
35994
37088
  const ws = this.ws;
37089
+ let settled = false;
37090
+ const timer = setTimeout(() => {
37091
+ if (settled) return;
37092
+ settled = true;
37093
+ ws.off("open", onOpen);
37094
+ ws.off("error", onError);
37095
+ this.ws = null;
37096
+ try {
37097
+ ws.close();
37098
+ } catch {
37099
+ }
37100
+ reject(new Error("Ultravox WS connect timeout"));
37101
+ }, 15e3);
35995
37102
  const onOpen = () => {
37103
+ if (settled) return;
37104
+ settled = true;
37105
+ clearTimeout(timer);
35996
37106
  ws.off("error", onError);
35997
37107
  resolve2();
35998
37108
  };
35999
37109
  const onError = (err) => {
37110
+ if (settled) return;
37111
+ settled = true;
37112
+ clearTimeout(timer);
36000
37113
  ws.off("open", onOpen);
37114
+ this.ws = null;
37115
+ try {
37116
+ ws.close();
37117
+ } catch {
37118
+ }
36001
37119
  reject(err);
36002
37120
  };
36003
37121
  ws.once("open", onOpen);
@@ -36845,7 +37963,7 @@ var STT = class extends DeepgramSTT {
36845
37963
  {
36846
37964
  endpointingMs: opts.endpointingMs ?? 150,
36847
37965
  utteranceEndMs: opts.utteranceEndMs === null ? null : opts.utteranceEndMs ?? 1e3,
36848
- smartFormat: opts.smartFormat ?? true,
37966
+ smartFormat: opts.smartFormat ?? false,
36849
37967
  interimResults: opts.interimResults ?? true,
36850
37968
  ...opts.vadEvents !== void 0 ? { vadEvents: opts.vadEvents } : {}
36851
37969
  }
@@ -37165,7 +38283,7 @@ var CartesiaSTT = class {
37165
38283
  });
37166
38284
  ws.once("error", (err) => {
37167
38285
  clearTimeout(timer);
37168
- reject(err);
38286
+ reject(new Error(`Cartesia STT park connect failed: ${describeWarmupError(err)}`));
37169
38287
  });
37170
38288
  });
37171
38289
  return ws;
@@ -37521,7 +38639,7 @@ var SonioxSTT = class _SonioxSTT {
37521
38639
  /** Stable pricing/dashboard key — read by stream-handler/metrics. */
37522
38640
  static providerKey = "soniox";
37523
38641
  ws = null;
37524
- callbacks = [];
38642
+ callbacks = /* @__PURE__ */ new Set();
37525
38643
  final = new TokenAccumulator();
37526
38644
  keepaliveTimer = null;
37527
38645
  apiKey;
@@ -37683,16 +38801,13 @@ var SonioxSTT = class _SonioxSTT {
37683
38801
  if (audio.length === 0) return;
37684
38802
  this.ws.send(audio);
37685
38803
  }
37686
- /** Register a transcript listener (max 10 concurrent listeners). */
38804
+ /** Register a transcript listener. */
37687
38805
  onTranscript(callback) {
37688
- if (this.callbacks.length >= 10) {
37689
- getLogger().warn(
37690
- "SonioxSTT: maximum of 10 onTranscript callbacks reached; replacing the last callback."
37691
- );
37692
- this.callbacks[this.callbacks.length - 1] = callback;
37693
- return;
37694
- }
37695
- this.callbacks.push(callback);
38806
+ this.callbacks.add(callback);
38807
+ }
38808
+ /** Unregister a previously registered transcript listener. */
38809
+ offTranscript(callback) {
38810
+ this.callbacks.delete(callback);
37696
38811
  }
37697
38812
  /** Send the empty-frame stream terminator and close the WebSocket. */
37698
38813
  close() {
@@ -37774,12 +38889,6 @@ var VALID_DOMAINS = /* @__PURE__ */ new Set([
37774
38889
  AssemblyAIDomain.GENERAL,
37775
38890
  AssemblyAIDomain.MEDICAL_V1
37776
38891
  ]);
37777
- var AssemblyAISTTNotConnectedError = class extends Error {
37778
- constructor(message = "AssemblyAISTT is not connected") {
37779
- super(message);
37780
- this.name = "AssemblyAISTTNotConnectedError";
37781
- }
37782
- };
37783
38892
  var AssemblyAISTT = class _AssemblyAISTT {
37784
38893
  constructor(apiKey, options = {}) {
37785
38894
  this.apiKey = apiKey;
@@ -38103,9 +39212,10 @@ var AssemblyAISTT = class _AssemblyAISTT {
38103
39212
  */
38104
39213
  updateConfiguration(params) {
38105
39214
  if (!this.ws || this.ws.readyState !== import_ws9.default.OPEN) {
38106
- throw new AssemblyAISTTNotConnectedError(
38107
- "AssemblyAISTT.updateConfiguration: WebSocket is not open"
39215
+ getLogger().debug(
39216
+ "AssemblyAISTT.updateConfiguration: WebSocket is not open \u2014 dropping update (call teardown)."
38108
39217
  );
39218
+ return;
38109
39219
  }
38110
39220
  const payload = {
38111
39221
  type: AssemblyAIClientFrame.UPDATE_CONFIGURATION
@@ -38127,9 +39237,10 @@ var AssemblyAISTT = class _AssemblyAISTT {
38127
39237
  /** Force the server to finalize the current turn (for barge-in). */
38128
39238
  forceEndpoint() {
38129
39239
  if (!this.ws || this.ws.readyState !== import_ws9.default.OPEN) {
38130
- throw new AssemblyAISTTNotConnectedError(
38131
- "AssemblyAISTT.forceEndpoint: WebSocket is not open"
39240
+ getLogger().debug(
39241
+ "AssemblyAISTT.forceEndpoint: WebSocket is not open \u2014 dropping request (call teardown)."
38132
39242
  );
39243
+ return;
38133
39244
  }
38134
39245
  this.ws.send(JSON.stringify({ type: AssemblyAIClientFrame.FORCE_ENDPOINT }));
38135
39246
  }
@@ -38144,6 +39255,14 @@ var AssemblyAISTT = class _AssemblyAISTT {
38144
39255
  async close() {
38145
39256
  this.closing = true;
38146
39257
  if (!this.ws) return;
39258
+ if (this.chunkBufferBytes > 0 && this.ws.readyState === import_ws9.default.OPEN) {
39259
+ try {
39260
+ this.ws.send(Buffer.concat(this.chunkBuffer, this.chunkBufferBytes));
39261
+ } catch {
39262
+ }
39263
+ this.chunkBuffer = [];
39264
+ this.chunkBufferBytes = 0;
39265
+ }
38147
39266
  try {
38148
39267
  this.ws.send(JSON.stringify({ type: AssemblyAIClientFrame.TERMINATE }));
38149
39268
  } catch {
@@ -39350,7 +40469,7 @@ var TTS3 = class extends OpenAITTS {
39350
40469
  opts.model ?? "gpt-4o-mini-tts",
39351
40470
  opts.instructions ?? null,
39352
40471
  opts.speed ?? null,
39353
- opts.antiAlias ?? false
40472
+ opts.antiAlias ?? true
39354
40473
  );
39355
40474
  }
39356
40475
  };
@@ -39525,7 +40644,6 @@ init_cjs_shims();
39525
40644
  init_cjs_shims();
39526
40645
  init_logger();
39527
40646
  var INWORLD_BASE_URL = "https://api.inworld.ai/tts/v1/voice:stream";
39528
- var INWORLD_VOICES_URL = "https://api.inworld.ai/tts/v1/voices";
39529
40647
  var InworldModel = {
39530
40648
  TTS_2: "inworld-tts-2",
39531
40649
  TTS_1_5_MAX: "inworld-tts-1.5-max",
@@ -39614,7 +40732,8 @@ var InworldTTS = class {
39614
40732
  */
39615
40733
  async warmup() {
39616
40734
  try {
39617
- await fetch(INWORLD_VOICES_URL, {
40735
+ const voicesUrl = new URL(this.baseUrl).origin + "/tts/v1/voices";
40736
+ await fetch(voicesUrl, {
39618
40737
  method: "GET",
39619
40738
  headers: {
39620
40739
  Authorization: `Basic ${this.authToken}`
@@ -39874,58 +40993,87 @@ var AnthropicLLMProvider = class {
39874
40993
  const toolIndexByBlock = /* @__PURE__ */ new Map();
39875
40994
  const toolIdByBlock = /* @__PURE__ */ new Map();
39876
40995
  let nextIndex = 0;
39877
- while (true) {
39878
- const { done, value } = await reader.read();
39879
- if (done) break;
39880
- buffer += decoder.decode(value, { stream: true });
39881
- const lines = buffer.split("\n");
39882
- buffer = lines.pop() || "";
39883
- for (const line of lines) {
39884
- const trimmed = line.trim();
39885
- if (!trimmed.startsWith("data: ")) continue;
39886
- const data = trimmed.slice(6);
39887
- if (!data || data === "[DONE]") continue;
39888
- let event;
39889
- try {
39890
- event = JSON.parse(data);
39891
- } catch {
39892
- continue;
39893
- }
39894
- if (event.type === "content_block_start" && event.content_block?.type === "tool_use") {
39895
- const blockIdx = event.index ?? 0;
39896
- const toolId = event.content_block.id ?? "";
39897
- const toolName = event.content_block.name ?? "";
39898
- const patterIndex = nextIndex++;
39899
- toolIndexByBlock.set(blockIdx, patterIndex);
39900
- toolIdByBlock.set(blockIdx, toolId);
39901
- yield {
39902
- type: "tool_call",
39903
- index: patterIndex,
39904
- id: toolId,
39905
- name: toolName,
39906
- arguments: ""
39907
- };
39908
- continue;
39909
- }
39910
- if (event.type === "content_block_delta") {
39911
- if (event.delta?.type === "text_delta" && event.delta.text) {
39912
- yield { type: "text", content: event.delta.text };
40996
+ let inputTokens = 0;
40997
+ let outputTokens = 0;
40998
+ let cacheReadTokens = 0;
40999
+ let cacheWriteTokens = 0;
41000
+ try {
41001
+ while (true) {
41002
+ const { done, value } = await reader.read();
41003
+ if (done) break;
41004
+ buffer += decoder.decode(value, { stream: true });
41005
+ const lines = buffer.split("\n");
41006
+ buffer = lines.pop() || "";
41007
+ for (const line of lines) {
41008
+ const trimmed = line.trim();
41009
+ if (!trimmed.startsWith("data: ")) continue;
41010
+ const data = trimmed.slice(6);
41011
+ if (!data || data === "[DONE]") continue;
41012
+ let event;
41013
+ try {
41014
+ event = JSON.parse(data);
41015
+ } catch {
41016
+ continue;
41017
+ }
41018
+ if (event.type === "message_start" && event.message?.usage) {
41019
+ const u = event.message.usage;
41020
+ if (u.input_tokens) inputTokens = u.input_tokens;
41021
+ if (u.cache_creation_input_tokens) cacheWriteTokens = u.cache_creation_input_tokens;
41022
+ if (u.cache_read_input_tokens) cacheReadTokens = u.cache_read_input_tokens;
39913
41023
  continue;
39914
41024
  }
39915
- if (event.delta?.type === "input_json_delta" && event.delta.partial_json) {
41025
+ if (event.type === "message_delta" && event.usage?.output_tokens) {
41026
+ outputTokens = event.usage.output_tokens;
41027
+ continue;
41028
+ }
41029
+ if (event.type === "content_block_start" && event.content_block?.type === "tool_use") {
39916
41030
  const blockIdx = event.index ?? 0;
39917
- const patterIndex = toolIndexByBlock.get(blockIdx);
39918
- if (patterIndex !== void 0) {
39919
- yield {
39920
- type: "tool_call",
39921
- index: patterIndex,
39922
- id: toolIdByBlock.get(blockIdx),
39923
- arguments: event.delta.partial_json
39924
- };
41031
+ const toolId = event.content_block.id ?? "";
41032
+ const toolName = event.content_block.name ?? "";
41033
+ const patterIndex = nextIndex++;
41034
+ toolIndexByBlock.set(blockIdx, patterIndex);
41035
+ toolIdByBlock.set(blockIdx, toolId);
41036
+ yield {
41037
+ type: "tool_call",
41038
+ index: patterIndex,
41039
+ id: toolId,
41040
+ name: toolName,
41041
+ arguments: ""
41042
+ };
41043
+ continue;
41044
+ }
41045
+ if (event.type === "content_block_delta") {
41046
+ if (event.delta?.type === "text_delta" && event.delta.text) {
41047
+ yield { type: "text", content: event.delta.text };
41048
+ continue;
41049
+ }
41050
+ if (event.delta?.type === "input_json_delta" && event.delta.partial_json) {
41051
+ const blockIdx = event.index ?? 0;
41052
+ const patterIndex = toolIndexByBlock.get(blockIdx);
41053
+ if (patterIndex !== void 0) {
41054
+ yield {
41055
+ type: "tool_call",
41056
+ index: patterIndex,
41057
+ id: toolIdByBlock.get(blockIdx),
41058
+ arguments: event.delta.partial_json
41059
+ };
41060
+ }
39925
41061
  }
39926
41062
  }
39927
41063
  }
39928
41064
  }
41065
+ } finally {
41066
+ reader.cancel().catch(() => {
41067
+ });
41068
+ }
41069
+ if (inputTokens > 0 || outputTokens > 0 || cacheReadTokens > 0 || cacheWriteTokens > 0) {
41070
+ yield {
41071
+ type: "usage",
41072
+ inputTokens,
41073
+ outputTokens,
41074
+ cacheReadInputTokens: cacheReadTokens,
41075
+ cacheWriteInputTokens: cacheWriteTokens
41076
+ };
39929
41077
  }
39930
41078
  yield { type: "done" };
39931
41079
  }
@@ -39985,16 +41133,17 @@ function toAnthropicMessages(messages) {
39985
41133
  }
39986
41134
  if (role === "tool") {
39987
41135
  const contentStr = typeof rawMsg.content === "string" ? rawMsg.content : JSON.stringify(rawMsg.content);
39988
- out.push({
39989
- role: "user",
39990
- content: [
39991
- {
39992
- type: "tool_result",
39993
- tool_use_id: rawMsg.tool_call_id ?? "",
39994
- content: contentStr
39995
- }
39996
- ]
39997
- });
41136
+ const toolResultBlock = {
41137
+ type: "tool_result",
41138
+ tool_use_id: rawMsg.tool_call_id ?? "",
41139
+ content: contentStr
41140
+ };
41141
+ const prev = out.length > 0 ? out[out.length - 1] : void 0;
41142
+ if (prev && prev.role === "user" && Array.isArray(prev.content) && prev.content.length > 0 && prev.content.every((b) => b["type"] === "tool_result")) {
41143
+ prev.content.push(toolResultBlock);
41144
+ } else {
41145
+ out.push({ role: "user", content: [toolResultBlock] });
41146
+ }
39998
41147
  continue;
39999
41148
  }
40000
41149
  }
@@ -40137,50 +41286,55 @@ async function* parseOpenAISseStream(response) {
40137
41286
  if (!reader) return;
40138
41287
  const decoder = new TextDecoder();
40139
41288
  let buffer = "";
40140
- while (true) {
40141
- const { done, value } = await reader.read();
40142
- if (done) break;
40143
- buffer += decoder.decode(value, { stream: true });
40144
- const lines = buffer.split("\n");
40145
- buffer = lines.pop() || "";
40146
- for (const line of lines) {
40147
- const trimmed = line.trim();
40148
- if (!trimmed || !trimmed.startsWith("data: ")) continue;
40149
- const data = trimmed.slice(6);
40150
- if (data === "[DONE]") continue;
40151
- let chunk;
40152
- try {
40153
- chunk = JSON.parse(data);
40154
- } catch {
40155
- continue;
40156
- }
40157
- const usage = chunk.usage ?? chunk.x_groq?.usage;
40158
- if (usage) {
40159
- const cached2 = chunk.usage?.prompt_tokens_details?.cached_tokens ?? 0;
40160
- yield {
40161
- type: "usage",
40162
- inputTokens: usage.prompt_tokens,
40163
- outputTokens: usage.completion_tokens,
40164
- cacheReadInputTokens: cached2
40165
- };
40166
- }
40167
- const delta = chunk.choices?.[0]?.delta;
40168
- if (!delta) continue;
40169
- if (delta.content) {
40170
- yield { type: "text", content: delta.content };
40171
- }
40172
- if (delta.tool_calls) {
40173
- for (const tc of delta.tool_calls) {
41289
+ try {
41290
+ while (true) {
41291
+ const { done, value } = await reader.read();
41292
+ if (done) break;
41293
+ buffer += decoder.decode(value, { stream: true });
41294
+ const lines = buffer.split("\n");
41295
+ buffer = lines.pop() || "";
41296
+ for (const line of lines) {
41297
+ const trimmed = line.trim();
41298
+ if (!trimmed || !trimmed.startsWith("data: ")) continue;
41299
+ const data = trimmed.slice(6);
41300
+ if (data === "[DONE]") continue;
41301
+ let chunk;
41302
+ try {
41303
+ chunk = JSON.parse(data);
41304
+ } catch {
41305
+ continue;
41306
+ }
41307
+ const usage = chunk.usage ?? chunk.x_groq?.usage;
41308
+ if (usage) {
41309
+ const cached2 = chunk.usage?.prompt_tokens_details?.cached_tokens ?? 0;
40174
41310
  yield {
40175
- type: "tool_call",
40176
- index: tc.index,
40177
- id: tc.id,
40178
- name: tc.function?.name,
40179
- arguments: tc.function?.arguments
41311
+ type: "usage",
41312
+ inputTokens: usage.prompt_tokens,
41313
+ outputTokens: usage.completion_tokens,
41314
+ cacheReadInputTokens: cached2
40180
41315
  };
40181
41316
  }
41317
+ const delta = chunk.choices?.[0]?.delta;
41318
+ if (!delta) continue;
41319
+ if (delta.content) {
41320
+ yield { type: "text", content: delta.content };
41321
+ }
41322
+ if (delta.tool_calls) {
41323
+ for (const tc of delta.tool_calls) {
41324
+ yield {
41325
+ type: "tool_call",
41326
+ index: tc.index,
41327
+ id: tc.id,
41328
+ name: tc.function?.name,
41329
+ arguments: tc.function?.arguments
41330
+ };
41331
+ }
41332
+ }
40182
41333
  }
40183
41334
  }
41335
+ } finally {
41336
+ reader.cancel().catch(() => {
41337
+ });
40184
41338
  }
40185
41339
  }
40186
41340
 
@@ -40349,11 +41503,21 @@ var CerebrasLLMProvider = class {
40349
41503
  }
40350
41504
  const advisoryMs = parseRateLimitResetMs(response.headers);
40351
41505
  const exponentialMs = RETRY_BACKOFF_BASE_MS * Math.pow(2, attempt);
40352
- const delayMs = Math.max(advisoryMs, exponentialMs);
41506
+ const delayMs = Math.min(5e3, Math.max(advisoryMs, exponentialMs));
40353
41507
  getLogger().warn(
40354
41508
  `Cerebras API ${response.status} (attempt ${attempt + 1}/${maxAttempts}); retrying after ${delayMs}ms`
40355
41509
  );
40356
- await new Promise((r) => setTimeout(r, delayMs));
41510
+ await new Promise((resolve2, reject) => {
41511
+ const t = setTimeout(resolve2, delayMs);
41512
+ opts?.signal?.addEventListener(
41513
+ "abort",
41514
+ () => {
41515
+ clearTimeout(t);
41516
+ reject(opts.signal.reason);
41517
+ },
41518
+ { once: true }
41519
+ );
41520
+ });
40357
41521
  }
40358
41522
  throw new PatterError(`Cerebras API error ${lastStatus}: ${lastErrText || "request failed"}`);
40359
41523
  }
@@ -40516,47 +41680,52 @@ var GoogleLLMProvider = class {
40516
41680
  let buffer = "";
40517
41681
  let nextIndex = 0;
40518
41682
  let lastUsage;
40519
- while (true) {
40520
- const { done, value } = await reader.read();
40521
- if (done) break;
40522
- buffer += decoder.decode(value, { stream: true });
40523
- const lines = buffer.split("\n");
40524
- buffer = lines.pop() || "";
40525
- for (const line of lines) {
40526
- const trimmed = line.trim();
40527
- if (!trimmed.startsWith("data: ")) continue;
40528
- const data = trimmed.slice(6);
40529
- if (!data) continue;
40530
- let payload;
40531
- try {
40532
- payload = JSON.parse(data);
40533
- } catch {
40534
- continue;
40535
- }
40536
- if (payload.usageMetadata) {
40537
- lastUsage = payload.usageMetadata;
40538
- }
40539
- const candidate = payload.candidates?.[0];
40540
- const parts = candidate?.content?.parts ?? [];
40541
- for (const part of parts) {
40542
- if (part.functionCall) {
40543
- const args = part.functionCall.args ?? {};
40544
- const callId = part.functionCall.id ?? `gemini_call_${nextIndex}`;
40545
- yield {
40546
- type: "tool_call",
40547
- index: nextIndex,
40548
- id: callId,
40549
- name: part.functionCall.name ?? "",
40550
- arguments: JSON.stringify(args)
40551
- };
40552
- nextIndex++;
41683
+ try {
41684
+ while (true) {
41685
+ const { done, value } = await reader.read();
41686
+ if (done) break;
41687
+ buffer += decoder.decode(value, { stream: true });
41688
+ const lines = buffer.split("\n");
41689
+ buffer = lines.pop() || "";
41690
+ for (const line of lines) {
41691
+ const trimmed = line.trim();
41692
+ if (!trimmed.startsWith("data: ")) continue;
41693
+ const data = trimmed.slice(6);
41694
+ if (!data) continue;
41695
+ let payload;
41696
+ try {
41697
+ payload = JSON.parse(data);
41698
+ } catch {
40553
41699
  continue;
40554
41700
  }
40555
- if (part.text) {
40556
- yield { type: "text", content: part.text };
41701
+ if (payload.usageMetadata) {
41702
+ lastUsage = payload.usageMetadata;
41703
+ }
41704
+ const candidate = payload.candidates?.[0];
41705
+ const parts = candidate?.content?.parts ?? [];
41706
+ for (const part of parts) {
41707
+ if (part.functionCall) {
41708
+ const args = part.functionCall.args ?? {};
41709
+ const callId = part.functionCall.id ?? `gemini_call_${nextIndex}`;
41710
+ yield {
41711
+ type: "tool_call",
41712
+ index: nextIndex,
41713
+ id: callId,
41714
+ name: part.functionCall.name ?? "",
41715
+ arguments: JSON.stringify(args)
41716
+ };
41717
+ nextIndex++;
41718
+ continue;
41719
+ }
41720
+ if (part.text) {
41721
+ yield { type: "text", content: part.text };
41722
+ }
40557
41723
  }
40558
41724
  }
40559
41725
  }
41726
+ } finally {
41727
+ reader.cancel().catch(() => {
41728
+ });
40560
41729
  }
40561
41730
  if (lastUsage) {
40562
41731
  yield {
@@ -40650,7 +41819,17 @@ function toGeminiContents(messages) {
40650
41819
  continue;
40651
41820
  }
40652
41821
  }
40653
- return { systemInstruction: systemParts.join("\n\n"), contents };
41822
+ const merged = [];
41823
+ for (const entry of contents) {
41824
+ const prev = merged[merged.length - 1];
41825
+ const isFunctionResponseOnly = (c) => c.role === "user" && c.parts.every((p) => p.functionResponse !== void 0);
41826
+ if (prev && isFunctionResponseOnly(prev) && isFunctionResponseOnly(entry)) {
41827
+ prev.parts.push(...entry.parts);
41828
+ } else {
41829
+ merged.push(entry);
41830
+ }
41831
+ }
41832
+ return { systemInstruction: systemParts.join("\n\n"), contents: merged };
40654
41833
  }
40655
41834
 
40656
41835
  // src/llm/google.ts
@@ -40679,7 +41858,6 @@ init_silero_vad();
40679
41858
  // src/providers/deepfilternet-filter.ts
40680
41859
  init_cjs_shims();
40681
41860
  init_logger();
40682
- init_transcoding();
40683
41861
  function log2() {
40684
41862
  return getLogger();
40685
41863
  }
@@ -40709,6 +41887,57 @@ function float32ToPcm16(samples) {
40709
41887
  }
40710
41888
  return out;
40711
41889
  }
41890
+ var ArbitraryResampler = class {
41891
+ srcRate;
41892
+ dstRate;
41893
+ phase = 0;
41894
+ // fractional position into the current chunk
41895
+ lastSample = 0;
41896
+ // last input sample from the previous chunk
41897
+ hasHistory = false;
41898
+ constructor(srcRate, dstRate) {
41899
+ this.srcRate = srcRate;
41900
+ this.dstRate = dstRate;
41901
+ }
41902
+ /** Process a chunk of PCM16-LE mono audio and return resampled PCM16-LE. */
41903
+ process(pcm) {
41904
+ const sampleCount = Math.floor(pcm.length / 2);
41905
+ if (sampleCount === 0) return Buffer.alloc(0);
41906
+ const step = this.srcRate / this.dstRate;
41907
+ const outArr = [];
41908
+ let phase = this.phase;
41909
+ while (true) {
41910
+ const idx = Math.floor(phase);
41911
+ if (idx >= sampleCount) break;
41912
+ const frac = phase - idx;
41913
+ let s0;
41914
+ let s1;
41915
+ if (idx < 0) {
41916
+ s0 = this.hasHistory ? this.lastSample : 0;
41917
+ s1 = pcm.readInt16LE(0);
41918
+ } else {
41919
+ s0 = pcm.readInt16LE(idx * 2);
41920
+ s1 = idx + 1 < sampleCount ? pcm.readInt16LE((idx + 1) * 2) : s0;
41921
+ }
41922
+ const interp = Math.round(s0 + (s1 - s0) * frac);
41923
+ outArr.push(Math.max(-32768, Math.min(32767, interp)));
41924
+ phase += step;
41925
+ }
41926
+ this.lastSample = pcm.readInt16LE((sampleCount - 1) * 2);
41927
+ this.hasHistory = true;
41928
+ this.phase = phase - sampleCount;
41929
+ const out = Buffer.alloc(outArr.length * 2);
41930
+ for (let j = 0; j < outArr.length; j++) out.writeInt16LE(outArr[j], j * 2);
41931
+ return out;
41932
+ }
41933
+ /** Flush any buffered state and reset. Returns any remaining tail output. */
41934
+ flush() {
41935
+ this.phase = 0;
41936
+ this.lastSample = 0;
41937
+ this.hasHistory = false;
41938
+ return Buffer.alloc(0);
41939
+ }
41940
+ };
40712
41941
  var DeepFilterNetFilter = class {
40713
41942
  modelPath;
40714
41943
  silenceWarnings;
@@ -40716,8 +41945,9 @@ var DeepFilterNetFilter = class {
40716
41945
  ort = null;
40717
41946
  warned = false;
40718
41947
  closed = false;
40719
- // Fix 5: stateful resamplers for src_sr↔48k conversions so chunk-boundary
41948
+ // Stateful resamplers for src_sr↔48k conversions so chunk-boundary
40720
41949
  // samples are not discarded. Lazy-created and torn down on rate change.
41950
+ // Uses ArbitraryResampler which supports any integer rate pair.
40721
41951
  _resamplerSrcRate = null;
40722
41952
  _upsamplerInst = null;
40723
41953
  _downsamplerInst = null;
@@ -40775,8 +42005,8 @@ var DeepFilterNetFilter = class {
40775
42005
  try {
40776
42006
  if (this._resamplerSrcRate !== sampleRate) {
40777
42007
  this._resamplerSrcRate = sampleRate;
40778
- this._upsamplerInst = new StatefulResampler({ srcRate: sampleRate, dstRate: DEEPFILTERNET_SR });
40779
- this._downsamplerInst = new StatefulResampler({ srcRate: DEEPFILTERNET_SR, dstRate: sampleRate });
42008
+ this._upsamplerInst = new ArbitraryResampler(sampleRate, DEEPFILTERNET_SR);
42009
+ this._downsamplerInst = new ArbitraryResampler(DEEPFILTERNET_SR, sampleRate);
40780
42010
  }
40781
42011
  const samples = pcm16ToFloat32(pcmChunk);
40782
42012
  const pcm16Up = this._upsamplerInst.process(float32ToPcm16(new Float32Array(samples)));
@@ -40940,6 +42170,17 @@ var Tool = class {
40940
42170
  parameters;
40941
42171
  handler;
40942
42172
  webhookUrl;
42173
+ reassurance;
42174
+ /**
42175
+ * Per-tool execution timeout in milliseconds. `undefined` uses the
42176
+ * executor default (10 000 ms). Mirrors Python `timeout_s`.
42177
+ */
42178
+ timeoutMs;
42179
+ /**
42180
+ * Enable OpenAI strict mode for this tool's function schema. Off by
42181
+ * default. Mirrors Python `strict` on `Tool`.
42182
+ */
42183
+ strict;
40943
42184
  constructor(opts) {
40944
42185
  if (!opts.name) {
40945
42186
  throw new Error("Tool requires a non-empty name.");
@@ -40957,6 +42198,9 @@ var Tool = class {
40957
42198
  this.parameters = opts.parameters ?? { type: "object", properties: {} };
40958
42199
  if (hasHandler) this.handler = opts.handler;
40959
42200
  if (hasWebhook) this.webhookUrl = opts.webhookUrl;
42201
+ if (opts.reassurance !== void 0) this.reassurance = opts.reassurance;
42202
+ if (opts.timeoutMs !== void 0) this.timeoutMs = opts.timeoutMs;
42203
+ if (opts.strict !== void 0) this.strict = opts.strict;
40960
42204
  }
40961
42205
  };
40962
42206
  function tool(opts) {
@@ -41120,7 +42364,6 @@ var ChatContext = class _ChatContext {
41120
42364
  init_cjs_shims();
41121
42365
  init_logger();
41122
42366
  var DTMF_EVENTS = [
41123
- "0",
41124
42367
  "1",
41125
42368
  "2",
41126
42369
  "3",
@@ -41130,6 +42373,7 @@ var DTMF_EVENTS = [
41130
42373
  "7",
41131
42374
  "8",
41132
42375
  "9",
42376
+ "0",
41133
42377
  "*",
41134
42378
  "#",
41135
42379
  "A",
@@ -41809,18 +43053,24 @@ var TelnyxAdapter = class {
41809
43053
  "/number_orders",
41810
43054
  orderBody
41811
43055
  );
41812
- const orderId = order.data?.id ?? "";
43056
+ const orderId = order.data?.id;
43057
+ if (!orderId) throw new Error("TelnyxAdapter: /number_orders returned no order id");
41813
43058
  return { phoneNumber: chosen, orderId };
41814
43059
  }
41815
43060
  /** Attach a number to a Call Control Application. */
41816
43061
  async configureNumber(phoneNumber, opts) {
41817
43062
  if (!phoneNumber) throw new Error("TelnyxAdapter: phoneNumber is required");
41818
43063
  if (!opts.connectionId) throw new Error("TelnyxAdapter: connectionId is required");
41819
- await this.request(
41820
- "PATCH",
41821
- `/phone_numbers/${encodeURIComponent(phoneNumber)}/voice`,
41822
- { connection_id: opts.connectionId, tech_prefix_enabled: false }
41823
- );
43064
+ try {
43065
+ await this.request(
43066
+ "PATCH",
43067
+ `/phone_numbers/${encodeURIComponent(phoneNumber)}/voice`,
43068
+ { connection_id: opts.connectionId, tech_prefix_enabled: false }
43069
+ );
43070
+ } catch (err) {
43071
+ const status = err instanceof Error ? err.message.replace(/\+\d{7,15}/g, "[REDACTED]") : String(err);
43072
+ throw new Error(`TelnyxAdapter: configureNumber failed: ${status}`);
43073
+ }
41824
43074
  }
41825
43075
  /**
41826
43076
  * Place an outbound call on the Call Control Application.
@@ -41928,7 +43178,7 @@ var TelnyxSTT = class {
41928
43178
  /** Stable pricing/dashboard key — read by stream-handler/metrics. */
41929
43179
  static providerKey = "telnyx_stt";
41930
43180
  ws = null;
41931
- callbacks = [];
43181
+ callbacks = /* @__PURE__ */ new Set();
41932
43182
  headerSent = false;
41933
43183
  /** Open the streaming WebSocket and arm message handlers. */
41934
43184
  async connect() {
@@ -41984,14 +43234,13 @@ var TelnyxSTT = class {
41984
43234
  }
41985
43235
  this.ws.send(audio);
41986
43236
  }
41987
- /** Register a transcript listener (max 10 concurrent listeners). */
43237
+ /** Register a transcript listener. */
41988
43238
  onTranscript(callback) {
41989
- if (this.callbacks.length >= 10) {
41990
- getLogger().warn("TelnyxSTT: maximum of 10 onTranscript callbacks reached; replacing the last callback.");
41991
- this.callbacks[this.callbacks.length - 1] = callback;
41992
- return;
41993
- }
41994
- this.callbacks.push(callback);
43239
+ this.callbacks.add(callback);
43240
+ }
43241
+ /** Unregister a previously-registered transcript listener. */
43242
+ offTranscript(callback) {
43243
+ this.callbacks.delete(callback);
41995
43244
  }
41996
43245
  /** Close the streaming WebSocket. */
41997
43246
  close() {
@@ -42002,6 +43251,7 @@ var TelnyxSTT = class {
42002
43251
  }
42003
43252
  this.ws = null;
42004
43253
  }
43254
+ this.headerSent = false;
42005
43255
  }
42006
43256
  };
42007
43257
 
@@ -42023,6 +43273,7 @@ var TelnyxTTSSampleRate = {
42023
43273
  HZ_24000: 24e3
42024
43274
  };
42025
43275
  var DEFAULT_VOICE = TelnyxTTSVoice.NATURAL_HD_ASTRA;
43276
+ var FRAME_TIMEOUT_MS2 = 3e4;
42026
43277
  var TelnyxTTS = class {
42027
43278
  constructor(apiKey, voice = DEFAULT_VOICE, baseUrl = TELNYX_TTS_WS_URL) {
42028
43279
  this.apiKey = apiKey;
@@ -42050,69 +43301,83 @@ var TelnyxTTS = class {
42050
43301
  */
42051
43302
  async *synthesizeStream(text) {
42052
43303
  const url2 = `${this.baseUrl}?voice=${encodeURIComponent(this.voice)}`;
42053
- const ws = new import_ws13.default(url2, {
42054
- headers: { Authorization: `Bearer ${this.apiKey}` }
42055
- });
42056
- await new Promise((resolve2, reject) => {
42057
- const timer = setTimeout(() => reject(new Error("Telnyx TTS connect timeout")), 1e4);
42058
- ws.once("open", () => {
42059
- clearTimeout(timer);
42060
- resolve2();
43304
+ let ws = null;
43305
+ try {
43306
+ let push2 = function(item) {
43307
+ const w = waiters.shift();
43308
+ if (w) {
43309
+ w(item);
43310
+ } else {
43311
+ queue.push(item);
43312
+ }
43313
+ };
43314
+ var push = push2;
43315
+ ws = new import_ws13.default(url2, {
43316
+ headers: { Authorization: `Bearer ${this.apiKey}` }
42061
43317
  });
42062
- ws.once("error", (err) => {
42063
- clearTimeout(timer);
42064
- reject(err);
43318
+ await new Promise((resolve2, reject) => {
43319
+ const timer = setTimeout(() => reject(new Error("Telnyx TTS connect timeout")), 1e4);
43320
+ ws.once("open", () => {
43321
+ clearTimeout(timer);
43322
+ resolve2();
43323
+ });
43324
+ ws.once("error", (err) => {
43325
+ clearTimeout(timer);
43326
+ reject(err);
43327
+ });
42065
43328
  });
42066
- });
42067
- const queue = [];
42068
- const waiters = [];
42069
- function push(item) {
42070
- const w = waiters.shift();
42071
- if (w) {
42072
- w(item);
42073
- } else {
42074
- queue.push(item);
42075
- }
42076
- }
42077
- ws.on("message", (raw) => {
42078
- let data;
42079
- try {
42080
- data = JSON.parse(raw.toString());
42081
- } catch {
42082
- getLogger().warn("TelnyxTTS: received invalid JSON");
42083
- return;
42084
- }
42085
- const audioB64 = data.audio;
42086
- if (!audioB64) return;
42087
- try {
42088
- const audioBytes = Buffer.from(audioB64, "base64");
42089
- if (audioBytes.length > 0) {
42090
- push(audioBytes);
43329
+ const queue = [];
43330
+ const waiters = [];
43331
+ ws.on("message", (raw) => {
43332
+ let data;
43333
+ try {
43334
+ data = JSON.parse(raw.toString());
43335
+ } catch {
43336
+ getLogger().warn("TelnyxTTS: received invalid JSON");
43337
+ return;
42091
43338
  }
42092
- } catch {
42093
- }
42094
- });
42095
- ws.on("close", () => {
42096
- push(null);
42097
- });
42098
- ws.on("error", (err) => {
42099
- push({ error: err instanceof Error ? err : new Error(String(err)) });
42100
- });
42101
- ws.send(JSON.stringify({ text: " " }));
42102
- ws.send(JSON.stringify({ text }));
42103
- ws.send(JSON.stringify({ text: "" }));
42104
- try {
43339
+ const audioB64 = data.audio;
43340
+ if (!audioB64) return;
43341
+ try {
43342
+ const audioBytes = Buffer.from(audioB64, "base64");
43343
+ if (audioBytes.length > 0) {
43344
+ push2(audioBytes);
43345
+ }
43346
+ } catch {
43347
+ }
43348
+ });
43349
+ ws.on("close", () => {
43350
+ push2(null);
43351
+ });
43352
+ ws.on("error", (err) => {
43353
+ push2({ error: err instanceof Error ? err : new Error(String(err)) });
43354
+ });
43355
+ ws.send(JSON.stringify({ text: " " }));
43356
+ ws.send(JSON.stringify({ text }));
43357
+ ws.send(JSON.stringify({ text: "" }));
42105
43358
  while (true) {
42106
- const item = queue.length > 0 ? queue.shift() : await new Promise((resolve2) => waiters.push(resolve2));
43359
+ let frameTimer;
43360
+ const item = queue.length > 0 ? queue.shift() : await Promise.race([
43361
+ new Promise((resolve2) => waiters.push(resolve2)),
43362
+ new Promise((_, reject) => {
43363
+ frameTimer = setTimeout(
43364
+ () => reject(new Error("Telnyx TTS frame timeout")),
43365
+ FRAME_TIMEOUT_MS2
43366
+ );
43367
+ })
43368
+ ]).finally(() => {
43369
+ if (frameTimer !== void 0) clearTimeout(frameTimer);
43370
+ });
42107
43371
  if (item === null) return;
42108
43372
  if (typeof item === "object" && "error" in item) throw item.error;
42109
43373
  yield item;
42110
43374
  }
42111
43375
  } finally {
42112
43376
  try {
42113
- ws.close();
43377
+ ws?.close();
42114
43378
  } catch {
42115
43379
  }
43380
+ ws?.removeAllListeners();
42116
43381
  }
42117
43382
  }
42118
43383
  };
@@ -42187,6 +43452,7 @@ init_event_bus();
42187
43452
  PRICING_VERSION,
42188
43453
  PartialStreamError,
42189
43454
  Patter,
43455
+ PatterConfigError,
42190
43456
  PatterConnectionError,
42191
43457
  PatterError,
42192
43458
  PatterTool,
@@ -42274,6 +43540,8 @@ init_event_bus();
42274
43540
  mulawToPcm16,
42275
43541
  notifyDashboard,
42276
43542
  openaiTts,
43543
+ openclawConsult,
43544
+ openclawPostCallNotifier,
42277
43545
  pcm16ToMulaw,
42278
43546
  resample16kTo8k,
42279
43547
  resample24kTo16k,