getpatter 0.6.3 → 0.6.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,7 +5,7 @@ import {
5
5
  createResampler8kTo16k,
6
6
  mulawToPcm16,
7
7
  pcm16ToMulaw
8
- } from "./chunk-CL2U3YET.mjs";
8
+ } from "./chunk-BO227NTF.mjs";
9
9
  import {
10
10
  getLogger
11
11
  } from "./chunk-MVOQFAEO.mjs";
@@ -1039,11 +1039,25 @@ function calculateRealtimeCachedSavings(usage, pricing, model) {
1039
1039
  const rates = resolveProviderRates(pricing.openai_realtime, model);
1040
1040
  if (rates.unit !== "token") return 0;
1041
1041
  const input = usage.input_token_details ?? {};
1042
- const cached = input.cached_tokens_details ?? {};
1043
1042
  const cachedAudioRate = rates.cached_audio_input_per_token ?? rates.audio_input_per_token ?? 0;
1044
1043
  const cachedTextRate = rates.cached_text_input_per_token ?? rates.text_input_per_token ?? 0;
1045
- const cachedAudio = Math.min(cached.audio_tokens ?? 0, input.audio_tokens ?? 0);
1046
- const cachedText = Math.min(cached.text_tokens ?? 0, input.text_tokens ?? 0);
1044
+ const totalAudio = input.audio_tokens ?? 0;
1045
+ const totalText = input.text_tokens ?? 0;
1046
+ let cachedAudio;
1047
+ let cachedText;
1048
+ const details = input.cached_tokens_details;
1049
+ if (details && (details.audio_tokens !== void 0 || details.text_tokens !== void 0)) {
1050
+ cachedAudio = Math.min(details.audio_tokens ?? 0, totalAudio);
1051
+ cachedText = Math.min(details.text_tokens ?? 0, totalText);
1052
+ } else if (input.cached_tokens && input.cached_tokens > 0) {
1053
+ const totalIn = totalAudio + totalText;
1054
+ const ratio = totalIn > 0 ? input.cached_tokens / totalIn : 0;
1055
+ cachedAudio = Math.min(Math.round(totalAudio * ratio), totalAudio);
1056
+ cachedText = Math.min(Math.round(totalText * ratio), totalText);
1057
+ } else {
1058
+ cachedAudio = 0;
1059
+ cachedText = 0;
1060
+ }
1047
1061
  const fullAudio = cachedAudio * (rates.audio_input_per_token ?? 0);
1048
1062
  const fullText = cachedText * (rates.text_input_per_token ?? 0);
1049
1063
  const discountedAudio = cachedAudio * cachedAudioRate;
@@ -1290,14 +1304,49 @@ var MetricsStore = class extends EventEmitter {
1290
1304
  } else {
1291
1305
  for (let i = this.calls.length - 1; i >= 0; i--) {
1292
1306
  if (this.calls[i].call_id === callId) {
1293
- this.calls[i].status = status;
1294
- Object.assign(this.calls[i], extra);
1307
+ this.calls[i] = { ...this.calls[i], status, ...extra };
1295
1308
  break;
1296
1309
  }
1297
1310
  }
1298
1311
  }
1299
1312
  this.publish("call_status", { call_id: callId, status, ...extra });
1300
1313
  }
1314
+ /**
1315
+ * Record a single transcript line (user/assistant) as it becomes known.
1316
+ *
1317
+ * FIX-5 (issue #154): the live forward path for the dashboard transcript.
1318
+ * The Realtime stream handler calls this the moment each line is known — the
1319
+ * user line right after the hallucination filter accepts it, the assistant
1320
+ * line when its turn flushes — keyed by the monotonic ``turnIndex`` reserved
1321
+ * at turn-open (``reserveTurnIndex``). Each line is appended to the active
1322
+ * call's ``transcript`` array and broadcast over SSE as a ``transcript_line``
1323
+ * event so the dashboard can render lines as they arrive and re-sort by
1324
+ * ``(turnIndex, user<assistant)`` — making a late-arriving user line land
1325
+ * ABOVE its agent line. ``recordTurn`` de-dups against the lines pushed here
1326
+ * by ``(turnIndex, role)`` so the metrics path never double-pushes the same
1327
+ * text. Parity with Python ``record_transcript_line``.
1328
+ */
1329
+ recordTranscriptLine(data) {
1330
+ const callId = data.call_id || "";
1331
+ const { role, text, turnIndex } = data;
1332
+ if (!callId || role !== "user" && role !== "assistant" || !text) return;
1333
+ const active = this.activeCalls.get(callId);
1334
+ if (active) {
1335
+ if (!active.transcript) active.transcript = [];
1336
+ active.transcript.push({
1337
+ role,
1338
+ text,
1339
+ timestamp: Date.now() / 1e3,
1340
+ turnIndex
1341
+ });
1342
+ }
1343
+ this.publish("transcript_line", {
1344
+ call_id: callId,
1345
+ turnIndex,
1346
+ role,
1347
+ text
1348
+ });
1349
+ }
1301
1350
  /** Append a single conversation turn to an active call and broadcast it via SSE. */
1302
1351
  recordTurn(data) {
1303
1352
  const callId = data.call_id || "";
@@ -1312,14 +1361,19 @@ var MetricsStore = class extends EventEmitter {
1312
1361
  const userText = typeof turnRecord.user_text === "string" ? turnRecord.user_text : "";
1313
1362
  const agentText = typeof turnRecord.agent_text === "string" ? turnRecord.agent_text : "";
1314
1363
  const ts = typeof turnRecord.timestamp === "number" ? turnRecord.timestamp : Date.now() / 1e3;
1315
- if (userText.length > 0) {
1316
- active.transcript.push({ role: "user", text: userText, timestamp: ts });
1364
+ const turnIndex = typeof turnRecord.turn_index === "number" ? turnRecord.turn_index : void 0;
1365
+ const alreadyLive = (role) => turnIndex !== void 0 && (active.transcript ?? []).some(
1366
+ (e) => e.turnIndex === turnIndex && e.role === role
1367
+ );
1368
+ if (userText.length > 0 && !alreadyLive("user")) {
1369
+ active.transcript.push({ role: "user", text: userText, timestamp: ts, turnIndex });
1317
1370
  }
1318
- if (agentText.length > 0 && agentText !== "[interrupted]") {
1371
+ if (agentText.length > 0 && agentText !== "[interrupted]" && !alreadyLive("assistant")) {
1319
1372
  active.transcript.push({
1320
1373
  role: "assistant",
1321
1374
  text: agentText,
1322
- timestamp: ts
1375
+ timestamp: ts,
1376
+ turnIndex
1323
1377
  });
1324
1378
  }
1325
1379
  }
@@ -1392,7 +1446,7 @@ var MetricsStore = class extends EventEmitter {
1392
1446
  getCall(callId) {
1393
1447
  if (this.deletedCallIds.has(callId)) return null;
1394
1448
  for (let i = this.calls.length - 1; i >= 0; i--) {
1395
- if (this.calls[i].call_id === callId) return this.calls[i];
1449
+ if (this.calls[i].call_id === callId) return { ...this.calls[i] };
1396
1450
  }
1397
1451
  return null;
1398
1452
  }
@@ -1434,7 +1488,9 @@ var MetricsStore = class extends EventEmitter {
1434
1488
  }
1435
1489
  if (accepted.length === 0) return [];
1436
1490
  accepted.sort();
1437
- this.persistDeletedIds();
1491
+ this.persistDeletedIds().catch(
1492
+ (err) => getLogger().debug(`MetricsStore.deleteCalls: persistDeletedIds failed: ${String(err)}`)
1493
+ );
1438
1494
  this.publish("calls_deleted", { call_ids: accepted });
1439
1495
  return accepted;
1440
1496
  }
@@ -1446,19 +1502,19 @@ var MetricsStore = class extends EventEmitter {
1446
1502
  getDeletedCallIds() {
1447
1503
  return Array.from(this.deletedCallIds).sort();
1448
1504
  }
1449
- /** Atomically persist the deleted-ids set to disk. Best-effort. */
1450
- persistDeletedIds() {
1505
+ /** Atomically persist the deleted-ids set to disk. Best-effort async. */
1506
+ async persistDeletedIds() {
1451
1507
  if (this.deletedIdsPath === null) return;
1452
1508
  try {
1453
1509
  const dir = path2.dirname(this.deletedIdsPath);
1454
- fs2.mkdirSync(dir, { recursive: true });
1510
+ await fs2.promises.mkdir(dir, { recursive: true });
1455
1511
  const tmp = this.deletedIdsPath + ".tmp";
1456
1512
  const payload = {
1457
1513
  version: 1,
1458
1514
  deleted_call_ids: Array.from(this.deletedCallIds).sort()
1459
1515
  };
1460
- fs2.writeFileSync(tmp, JSON.stringify(payload, null, 2), "utf8");
1461
- fs2.renameSync(tmp, this.deletedIdsPath);
1516
+ await fs2.promises.writeFile(tmp, JSON.stringify(payload, null, 2), "utf8");
1517
+ await fs2.promises.rename(tmp, this.deletedIdsPath);
1462
1518
  } catch (err) {
1463
1519
  getLogger().debug(
1464
1520
  `MetricsStore.persistDeletedIds: ${String(err)}`
@@ -1467,7 +1523,8 @@ var MetricsStore = class extends EventEmitter {
1467
1523
  }
1468
1524
  /** Look up an active call by id (returns undefined if not active or unknown). */
1469
1525
  getActive(callId) {
1470
- return this.activeCalls.get(callId);
1526
+ const rec = this.activeCalls.get(callId);
1527
+ return rec !== void 0 ? { ...rec } : void 0;
1471
1528
  }
1472
1529
  /** Return all currently active (not yet ended) calls. */
1473
1530
  getActiveCalls() {
@@ -1712,8 +1769,8 @@ function loadTranscriptJsonl(filePath) {
1712
1769
  } catch {
1713
1770
  continue;
1714
1771
  }
1715
- const tsIso = typeof row.ts === "string" ? Date.parse(row.ts) : NaN;
1716
- const tsNumeric = typeof row.timestamp === "number" ? row.timestamp * 1e3 : NaN;
1772
+ const tsIso = typeof row.ts === "string" ? Date.parse(row.ts) / 1e3 : NaN;
1773
+ const tsNumeric = typeof row.timestamp === "number" ? row.timestamp : NaN;
1717
1774
  const timestamp = Number.isFinite(tsIso) ? tsIso : Number.isFinite(tsNumeric) ? tsNumeric : 0;
1718
1775
  const userText = typeof row.user_text === "string" ? row.user_text : "";
1719
1776
  const agentText = typeof row.agent_text === "string" ? row.agent_text : "";
@@ -1870,8 +1927,8 @@ function mountDashboard(app, store, token = "") {
1870
1927
  res.type("text/html").send(DASHBOARD_HTML);
1871
1928
  });
1872
1929
  app.get("/api/dashboard/calls", auth, (req, res) => {
1873
- const limit = Math.min(parseInt(req.query.limit || "50", 10) || 50, 1e3);
1874
- const offset = parseInt(req.query.offset || "0", 10) || 0;
1930
+ const limit = Math.min(Math.max(0, parseInt(req.query.limit || "50", 10) || 50), 1e3);
1931
+ const offset = Math.max(0, parseInt(req.query.offset || "0", 10) || 0);
1875
1932
  res.json(store.getCalls(limit, offset));
1876
1933
  });
1877
1934
  app.get("/api/dashboard/calls/:callId", auth, (req, res) => {
@@ -1961,8 +2018,8 @@ data: ${data}
1961
2018
  function mountApi(app, store, token = "") {
1962
2019
  const auth = makeAuthMiddleware(token);
1963
2020
  app.get("/api/v1/calls", auth, (req, res) => {
1964
- const limit = Math.min(parseInt(req.query.limit || "50", 10) || 50, 1e3);
1965
- const offset = parseInt(req.query.offset || "0", 10) || 0;
2021
+ const limit = Math.min(Math.max(0, parseInt(req.query.limit || "50", 10) || 50), 1e3);
2022
+ const offset = Math.max(0, parseInt(req.query.offset || "0", 10) || 0);
1966
2023
  const calls = store.getCalls(limit, offset);
1967
2024
  res.json({
1968
2025
  data: calls,
@@ -2219,14 +2276,31 @@ var RemoteMessageHandler = class {
2219
2276
  while (chunks.length > 0) {
2220
2277
  yield chunks.shift();
2221
2278
  }
2279
+ const READ_TIMEOUT_MS = 3e4;
2222
2280
  while (!done && !error) {
2223
- const text = await new Promise((resolve2) => {
2281
+ const messagePromise = new Promise((resolve2) => {
2224
2282
  if (chunks.length > 0) {
2225
2283
  resolve2(chunks.shift());
2226
2284
  } else {
2227
2285
  resolveNext = resolve2;
2228
2286
  }
2229
2287
  });
2288
+ let timeoutHandle;
2289
+ const timeoutPromise = new Promise((_, reject) => {
2290
+ timeoutHandle = setTimeout(
2291
+ () => reject(new Error("WebSocket read timeout: no frame received within 30 s")),
2292
+ READ_TIMEOUT_MS
2293
+ );
2294
+ });
2295
+ let text;
2296
+ try {
2297
+ text = await Promise.race([messagePromise, timeoutPromise]);
2298
+ } catch (timeoutErr) {
2299
+ resolveNext = null;
2300
+ throw timeoutErr;
2301
+ } finally {
2302
+ clearTimeout(timeoutHandle);
2303
+ }
2230
2304
  if (text === null) break;
2231
2305
  yield text;
2232
2306
  }
@@ -2286,6 +2360,12 @@ var PatterError = class extends Error {
2286
2360
  this.code = options?.code ?? ErrorCode.INTERNAL;
2287
2361
  }
2288
2362
  };
2363
+ var PatterConfigError = class extends PatterError {
2364
+ constructor(message, options) {
2365
+ super(message, { code: options?.code ?? ErrorCode.CONFIG });
2366
+ this.name = "PatterConfigError";
2367
+ }
2368
+ };
2289
2369
  var PatterConnectionError = class extends PatterError {
2290
2370
  constructor(message, options) {
2291
2371
  super(message, { code: options?.code ?? ErrorCode.CONNECTION });
@@ -2530,18 +2610,6 @@ var DeepgramSTT = class _DeepgramSTT {
2530
2610
  } catch {
2531
2611
  return;
2532
2612
  }
2533
- const dataType = String(data.type ?? "unknown");
2534
- if (dataType === "Results") {
2535
- const transcript2 = (data.channel?.alternatives?.[0]?.transcript ?? "").trim();
2536
- const isFinal = Boolean(data.is_final);
2537
- const speechFinal2 = Boolean(data.speech_final);
2538
- const fromFinalize = Boolean(data.from_finalize);
2539
- getLogger().info(
2540
- `[DIAG] DG Results text=${JSON.stringify(transcript2.slice(0, 60))} isFinal=${isFinal} speechFinal=${speechFinal2} fromFinalize=${fromFinalize}`
2541
- );
2542
- } else if (dataType !== "Metadata") {
2543
- getLogger().info(`[DIAG] DG event type=${dataType}`);
2544
- }
2545
2613
  if (data.type === "Metadata" && data.request_id) {
2546
2614
  this.requestId = data.request_id;
2547
2615
  return;
@@ -2631,7 +2699,7 @@ var DeepgramSTT = class _DeepgramSTT {
2631
2699
  if (!this.ws || this.ws.readyState !== WebSocket2.OPEN) {
2632
2700
  this.audioDroppedCount++;
2633
2701
  if (this.audioDroppedCount === 1 || this.audioDroppedCount % 50 === 0) {
2634
- getLogger().info(
2702
+ getLogger().debug(
2635
2703
  `[DIAG] DeepgramSTT.sendAudio dropped (ws state=${this.ws?.readyState ?? "null"}) \u2014 total dropped=${this.audioDroppedCount}`
2636
2704
  );
2637
2705
  }
@@ -2640,7 +2708,7 @@ var DeepgramSTT = class _DeepgramSTT {
2640
2708
  if (audio.length === 0) return;
2641
2709
  this.audioSentCount++;
2642
2710
  if (this.audioSentCount === 1 || this.audioSentCount % 100 === 0) {
2643
- getLogger().info(
2711
+ getLogger().debug(
2644
2712
  `[DIAG] DeepgramSTT.sendAudio: total chunks sent=${this.audioSentCount} (last=${audio.length} bytes)`
2645
2713
  );
2646
2714
  }
@@ -2678,16 +2746,16 @@ var DeepgramSTT = class _DeepgramSTT {
2678
2746
  finalize() {
2679
2747
  const ws = this.ws;
2680
2748
  if (!ws || ws.readyState !== WebSocket2.OPEN) {
2681
- getLogger().info(
2749
+ getLogger().debug(
2682
2750
  `[DIAG] DeepgramSTT.finalize SKIPPED (ws state=${ws?.readyState ?? "null"})`
2683
2751
  );
2684
2752
  return;
2685
2753
  }
2686
2754
  try {
2687
2755
  ws.send(JSON.stringify({ type: "Finalize" }));
2688
- getLogger().info("[DIAG] DeepgramSTT.finalize sent {type:Finalize}");
2756
+ getLogger().debug("[DIAG] DeepgramSTT.finalize sent {type:Finalize}");
2689
2757
  } catch (err) {
2690
- getLogger().info(`[DIAG] DeepgramSTT.finalize send failed: ${String(err)}`);
2758
+ getLogger().debug(`[DIAG] DeepgramSTT.finalize send failed: ${String(err)}`);
2691
2759
  }
2692
2760
  }
2693
2761
  /** Send Finalize, briefly drain trailing transcripts, then close the socket. */
@@ -2760,6 +2828,7 @@ var CallMetricsAccumulator = class {
2760
2828
  _pricing;
2761
2829
  _callStart;
2762
2830
  _turns = [];
2831
+ // mutable internal array; immutable when exposed via TurnMetrics[] → readonly TurnMetrics[]
2763
2832
  // Per-turn timing state
2764
2833
  _turnStart = null;
2765
2834
  _sttComplete = null;
@@ -2846,6 +2915,16 @@ var CallMetricsAccumulator = class {
2846
2915
  * (the common cause of missing endpoint signals).
2847
2916
  */
2848
2917
  _endpointSignalMissingCount = 0;
2918
+ /**
2919
+ * Monotonic per-call turn counter. Reserved at turn OPEN
2920
+ * (``onAdapterSpeechStopped`` / ``speech_stopped``) via
2921
+ * ``reserveTurnIndex()`` and threaded through the buffering pipeline into
2922
+ * ``recordTurnComplete`` / ``recordTurnInterrupted`` as ``preReservedIndex``.
2923
+ * This makes ``turn_index`` stable under drops / interrupts (previously it
2924
+ * was assigned at completion as ``this._turns.length``, which shifted when a
2925
+ * turn was dropped). Parity with Python ``_next_turn_index``.
2926
+ */
2927
+ _nextTurnIndex = 0;
2849
2928
  constructor(opts) {
2850
2929
  this.callId = opts.callId;
2851
2930
  this.providerMode = opts.providerMode;
@@ -2894,12 +2973,27 @@ var CallMetricsAccumulator = class {
2894
2973
  this._turnUserText = "";
2895
2974
  this._turnSttAudioSeconds = 0;
2896
2975
  this._turnAlreadyClosed = false;
2976
+ this._initialTtfbEmitted = false;
2897
2977
  this._vadStoppedAt = null;
2898
2978
  this._sttFinalAt = null;
2899
2979
  this._turnCommittedAt = null;
2900
2980
  this._onUserTurnCompletedDelayMs = null;
2901
2981
  this._eventBus?.emit("turn_started", { callId: this.callId });
2902
2982
  }
2983
+ /**
2984
+ * Reserve and return the next monotonic turn index.
2985
+ *
2986
+ * Called once per turn at the moment the turn OPENS (Realtime:
2987
+ * ``onAdapterSpeechStopped``). The returned index is threaded through the
2988
+ * buffering pipeline and handed back to ``recordTurnComplete`` /
2989
+ * ``recordTurnInterrupted`` as ``preReservedIndex`` so the emitted
2990
+ * ``turn_index`` matches the live per-line transcript ordering even when a
2991
+ * turn is dropped or interrupted between open and close. Parity with Python
2992
+ * ``reserve_turn_index``.
2993
+ */
2994
+ reserveTurnIndex() {
2995
+ return this._nextTurnIndex++;
2996
+ }
2903
2997
  /**
2904
2998
  * Start a new turn only if no turn is currently open.
2905
2999
  * Use this at inbound-audio ingestion points so the turn timer begins
@@ -2937,6 +3031,7 @@ var CallMetricsAccumulator = class {
2937
3031
  anchorUserSpeechStart() {
2938
3032
  if (this._turnCommittedMono !== null) return;
2939
3033
  this._turnStart = hrTimeMs();
3034
+ this._turnAlreadyClosed = false;
2940
3035
  this._endpointSignalAt = null;
2941
3036
  this._vadStoppedAt = null;
2942
3037
  this._sttFinalAt = null;
@@ -3060,11 +3155,14 @@ var CallMetricsAccumulator = class {
3060
3155
  * ``user_text=''``. The caller treats ``null`` as "nothing to emit";
3061
3156
  * ``emitTurnMetrics`` is already null-safe.
3062
3157
  */
3063
- recordTurnComplete(agentText) {
3158
+ recordTurnComplete(agentText, preReservedIndex) {
3064
3159
  if (this._turnAlreadyClosed) return null;
3065
3160
  const latency = this._computeTurnLatency();
3066
3161
  const turn = {
3067
- turn_index: this._turns.length,
3162
+ // Use the pre-reserved index (stable across drops/interrupts) when the
3163
+ // caller threaded one through; otherwise fall back to the append
3164
+ // position for back-compat with callers that never reserved.
3165
+ turn_index: preReservedIndex ?? this._turns.length,
3068
3166
  user_text: this._turnUserText,
3069
3167
  agent_text: agentText,
3070
3168
  latency,
@@ -3073,10 +3171,10 @@ var CallMetricsAccumulator = class {
3073
3171
  timestamp: Date.now() / 1e3
3074
3172
  };
3075
3173
  this._turns.push(turn);
3076
- this._resetTurnState();
3077
- this._turnAlreadyClosed = true;
3078
3174
  this._eventBus?.emit("turn_ended", { callId: this.callId, turn });
3079
3175
  this._eventBus?.emit("metrics_collected", { callId: this.callId, turn });
3176
+ this._resetTurnState();
3177
+ this._turnAlreadyClosed = true;
3080
3178
  return turn;
3081
3179
  }
3082
3180
  /**
@@ -3088,12 +3186,12 @@ var CallMetricsAccumulator = class {
3088
3186
  * a future refactor that reorders the bargein + LLM-unwind paths)
3089
3187
  * from overwriting a turn that the complete path already emitted.
3090
3188
  */
3091
- recordTurnInterrupted() {
3189
+ recordTurnInterrupted(preReservedIndex) {
3092
3190
  if (this._turnStart === null) return null;
3093
3191
  if (this._turnAlreadyClosed) return null;
3094
3192
  const latency = this._computeTurnLatency();
3095
3193
  const turn = {
3096
- turn_index: this._turns.length,
3194
+ turn_index: preReservedIndex ?? this._turns.length,
3097
3195
  user_text: this._turnUserText,
3098
3196
  agent_text: "[interrupted]",
3099
3197
  latency,
@@ -3145,8 +3243,10 @@ var CallMetricsAccumulator = class {
3145
3243
  }
3146
3244
  /**
3147
3245
  * Record the delta (ms) between turn-committed and when on_user_turn_completed
3148
- * pipeline hook finished. Stored for inclusion in the next ``emitEouMetrics``
3149
- * call (or an explicit re-emit if desired).
3246
+ * pipeline hook finished. Does NOT re-emit: like Python's
3247
+ * ``record_on_user_turn_completed_delay``, this only stores the value; the
3248
+ * single EOU emission happens on ``recordTurnCommitted`` (3-timestamp guard,
3249
+ * delay defaults to 0 if not yet recorded).
3150
3250
  */
3151
3251
  recordOnUserTurnCompletedDelay(delayMs) {
3152
3252
  this._onUserTurnCompletedDelayMs = delayMs;
@@ -3159,7 +3259,7 @@ var CallMetricsAccumulator = class {
3159
3259
  * ``transcriptionDelay`` = turnCommitted − vadStopped (ms)
3160
3260
  * ``onUserTurnCompletedDelay`` = caller-supplied delta (ms) or 0
3161
3261
  */
3162
- /** Emit `EOUMetrics` once VAD-stop, STT-final, and turn-committed timestamps are all known. */
3262
+ /** Emit `EOUMetrics` once VAD-stop, STT-final, turn-committed, and on_user_turn_completed delay are all known. */
3163
3263
  emitEouMetrics() {
3164
3264
  if (this._vadStoppedAt === null || this._sttFinalAt === null || this._turnCommittedAt === null) {
3165
3265
  return;
@@ -3575,10 +3675,16 @@ var MCPManager = class {
3575
3675
  }
3576
3676
  const aggregatedTools = [];
3577
3677
  for (const cfg of this.configs) {
3678
+ try {
3679
+ validateWebhookUrl(cfg.url);
3680
+ } catch (e) {
3681
+ getLogger().error(`MCP server '${cfg.name}' (${cfg.url}) rejected by SSRF guard: ${String(e)}`);
3682
+ continue;
3683
+ }
3578
3684
  const transport = new transportModule.StreamableHTTPClientTransport(new URL(cfg.url), {
3579
3685
  requestInit: { headers: cfg.headers }
3580
3686
  });
3581
- const client = new mcpModule.Client({ name: "patter", version: "0.6.0" });
3687
+ const client = new mcpModule.Client({ name: "patter", version: VERSION });
3582
3688
  try {
3583
3689
  await client.connect(transport);
3584
3690
  } catch (e) {
@@ -3650,6 +3756,268 @@ var MCPManager = class {
3650
3756
  }
3651
3757
  };
3652
3758
 
3759
+ // src/consult.ts
3760
+ init_esm_shims();
3761
+ var DEFAULT_TIMEOUT_MS = 3e4;
3762
+ var DEFAULT_TOOL_NAME = "consult_agent";
3763
+ var DEFAULT_DESCRIPTION = "Consult your back-office agent for deeper reasoning, fresh information, or actions beyond this call. Use when the caller asks something you cannot answer directly.";
3764
+ var MAX_RESPONSE_CHARS = 1e6;
3765
+ var REPLY_KEYS = ["reply", "response", "text", "result", "answer", "message"];
3766
+ var GRACEFUL_FALLBACK = "I wasn't able to reach the system to get that answer right now.";
3767
+ var OPENCLAW_DEFAULT_BASE_URL = "http://127.0.0.1:18789/v1";
3768
+ var OPENCLAW_API_KEY_ENV = "OPENCLAW_API_KEY";
3769
+ var OPENCLAW_SESSION_HEADER = "x-openclaw-session-key";
3770
+ var OPENCLAW_DESCRIPTION = "Consult your OpenClaw agent for anything account-specific \u2014 appointments, customer records, schedules, or actions in the back-office system. NEVER state an appointment time, customer detail, or schedule fact from your own memory; ALWAYS call this tool for those and read back what it returns.";
3771
+ var OPENCLAW_REASSURANCE = "Let me check on that for you, one moment.";
3772
+ var OPENCLAW_AGENT_RE = /^[A-Za-z0-9._:/-]+$/;
3773
+ var PARAMETERS = {
3774
+ type: "object",
3775
+ properties: {
3776
+ request: {
3777
+ type: "string",
3778
+ description: "The question or task to send to your back-office agent for deeper reasoning, fresh information, or an action beyond this call. State it self-containedly \u2014 the dialog history is not forwarded with the consult."
3779
+ }
3780
+ },
3781
+ required: ["request"]
3782
+ };
3783
+ function isLoopbackOrPrivateHost(baseUrl) {
3784
+ let host;
3785
+ try {
3786
+ host = new URL(baseUrl).hostname.toLowerCase();
3787
+ } catch {
3788
+ return false;
3789
+ }
3790
+ if (host.startsWith("[") && host.endsWith("]")) host = host.slice(1, -1);
3791
+ if (host === "localhost" || host === "0.0.0.0" || host === "::1") return true;
3792
+ if (host.endsWith(".local")) return true;
3793
+ if (/^127\./.test(host) || /^10\./.test(host) || /^192\.168\./.test(host)) return true;
3794
+ if (/^169\.254\./.test(host)) return true;
3795
+ const m = host.match(/^172\.(\d+)\./);
3796
+ if (m) {
3797
+ const octet = Number(m[1]);
3798
+ if (octet >= 16 && octet <= 31) return true;
3799
+ }
3800
+ if (host.includes(":") && (/^f[cd][0-9a-f]{2}:/.test(host) || /^fe[89ab][0-9a-f]:/.test(host))) {
3801
+ return true;
3802
+ }
3803
+ return false;
3804
+ }
3805
+ function openclawConsult(agent, opts = {}) {
3806
+ if (!agent || !OPENCLAW_AGENT_RE.test(agent)) {
3807
+ throw new Error(
3808
+ "OpenClaw agent must be a non-empty id of letters, digits, and ._:/- only"
3809
+ );
3810
+ }
3811
+ const baseUrl = opts.baseUrl ?? OPENCLAW_DEFAULT_BASE_URL;
3812
+ const model = agent.includes("/") || agent.includes(":") ? agent : `openclaw/${agent}`;
3813
+ return {
3814
+ openaiCompatible: {
3815
+ baseUrl,
3816
+ model,
3817
+ apiKey: opts.apiKey,
3818
+ apiKeyEnv: OPENCLAW_API_KEY_ENV,
3819
+ sessionHeader: OPENCLAW_SESSION_HEADER
3820
+ },
3821
+ timeoutMs: opts.timeoutMs ?? DEFAULT_TIMEOUT_MS,
3822
+ toolName: opts.toolName ?? DEFAULT_TOOL_NAME,
3823
+ description: opts.description ?? OPENCLAW_DESCRIPTION,
3824
+ reassurance: opts.reassurance ?? OPENCLAW_REASSURANCE,
3825
+ headers: opts.headers,
3826
+ allowLoopback: opts.allowLoopback ?? isLoopbackOrPrivateHost(baseUrl)
3827
+ };
3828
+ }
3829
+ function buildConsultTool(config) {
3830
+ const hasUrl = config.url != null;
3831
+ const hasOpenAI = config.openaiCompatible != null;
3832
+ if (hasUrl === hasOpenAI) {
3833
+ throw new Error("ConsultConfig requires exactly one of url or openaiCompatible");
3834
+ }
3835
+ const timeoutMs = config.timeoutMs ?? DEFAULT_TIMEOUT_MS;
3836
+ const baseHeaders = {
3837
+ ...config.headers ?? {},
3838
+ "Content-Type": "application/json"
3839
+ };
3840
+ const handler = hasOpenAI ? buildOpenAIHandler(config.openaiCompatible, baseHeaders, timeoutMs, config.allowLoopback ?? false) : buildWebhookHandler(config.url, baseHeaders, timeoutMs, config.allowLoopback ?? false);
3841
+ const tool = {
3842
+ name: config.toolName ?? DEFAULT_TOOL_NAME,
3843
+ description: config.description ?? DEFAULT_DESCRIPTION,
3844
+ parameters: PARAMETERS,
3845
+ handler
3846
+ };
3847
+ return config.reassurance != null ? { ...tool, reassurance: config.reassurance } : tool;
3848
+ }
3849
+ function buildWebhookHandler(url, headers, timeoutMs, allowLoopback) {
3850
+ validateWebhookUrl(url, allowLoopback);
3851
+ return async (args, context) => {
3852
+ const requestText = typeof args?.request === "string" ? args.request : "";
3853
+ const payload = {
3854
+ request: requestText,
3855
+ call_id: context?.call_id ?? "",
3856
+ caller: context?.caller ?? "",
3857
+ callee: context?.callee ?? ""
3858
+ };
3859
+ let body;
3860
+ try {
3861
+ const resp = await fetch(url, {
3862
+ method: "POST",
3863
+ headers,
3864
+ body: JSON.stringify(payload),
3865
+ signal: AbortSignal.timeout(timeoutMs)
3866
+ });
3867
+ if (!resp.ok) {
3868
+ getLogger().warn(`consult tool: orchestrator returned HTTP ${resp.status}`);
3869
+ return GRACEFUL_FALLBACK;
3870
+ }
3871
+ body = (await resp.text()).slice(0, MAX_RESPONSE_CHARS);
3872
+ } catch (e) {
3873
+ getLogger().warn(
3874
+ `consult tool: orchestrator call failed: ${e instanceof Error ? e.name : "error"}`
3875
+ );
3876
+ return GRACEFUL_FALLBACK;
3877
+ }
3878
+ try {
3879
+ const data = JSON.parse(body);
3880
+ if (data && typeof data === "object" && !Array.isArray(data)) {
3881
+ const obj = data;
3882
+ for (const key of REPLY_KEYS) {
3883
+ if (typeof obj[key] === "string") return obj[key];
3884
+ }
3885
+ }
3886
+ return JSON.stringify(data);
3887
+ } catch {
3888
+ return body;
3889
+ }
3890
+ };
3891
+ }
3892
+ function buildOpenAIHandler(oc, baseHeaders, timeoutMs, allowLoopback) {
3893
+ const endpoint = oc.baseUrl.replace(/\/+$/, "") + "/chat/completions";
3894
+ validateWebhookUrl(endpoint, allowLoopback);
3895
+ const apiKey = oc.apiKey ?? (oc.apiKeyEnv ? process.env[oc.apiKeyEnv] : void 0);
3896
+ const headers = { ...baseHeaders };
3897
+ if (apiKey) headers.Authorization = `Bearer ${apiKey}`;
3898
+ const sessionHeader = oc.sessionHeader;
3899
+ const model = oc.model;
3900
+ return async (args, context) => {
3901
+ const requestText = typeof args?.request === "string" ? args.request : "";
3902
+ const callId = context?.call_id ?? "";
3903
+ const caller = context?.caller ?? "";
3904
+ const callee = context?.callee ?? "";
3905
+ const contextLines = ["You are answering an inbound phone call relayed by a voice agent."];
3906
+ if (caller) contextLines.push(`Caller: ${caller}`);
3907
+ if (callee) contextLines.push(`Line dialed: ${callee}`);
3908
+ contextLines.push(
3909
+ "Reply concisely in a spoken, conversational style \u2014 it is read aloud to the caller."
3910
+ );
3911
+ const reqHeaders = { ...headers };
3912
+ if (sessionHeader && callId) reqHeaders[sessionHeader] = callId;
3913
+ const payload = {
3914
+ model,
3915
+ messages: [
3916
+ { role: "system", content: contextLines.join("\n") },
3917
+ { role: "user", content: requestText }
3918
+ ],
3919
+ stream: false
3920
+ };
3921
+ if (callId) payload.user = callId;
3922
+ try {
3923
+ const resp = await fetch(endpoint, {
3924
+ method: "POST",
3925
+ headers: reqHeaders,
3926
+ body: JSON.stringify(payload),
3927
+ signal: AbortSignal.timeout(timeoutMs)
3928
+ });
3929
+ if (resp.status === 404) {
3930
+ getLogger().warn(
3931
+ "consult tool: OpenAI-compatible endpoint returned 404 \u2014 is it enabled? (OpenClaw: set gateway.http.endpoints.chatCompletions.enabled = true)"
3932
+ );
3933
+ return GRACEFUL_FALLBACK;
3934
+ }
3935
+ if (!resp.ok) {
3936
+ getLogger().warn(`consult tool: openai-compatible returned HTTP ${resp.status}`);
3937
+ return GRACEFUL_FALLBACK;
3938
+ }
3939
+ const data = await resp.json();
3940
+ const content = data?.choices?.[0]?.message?.content;
3941
+ if (typeof content === "string" && content.trim()) {
3942
+ return content.trim().slice(0, MAX_RESPONSE_CHARS);
3943
+ }
3944
+ getLogger().warn("consult tool: response missing choices[0].message.content");
3945
+ return GRACEFUL_FALLBACK;
3946
+ } catch (e) {
3947
+ getLogger().warn(
3948
+ `consult tool: openai-compatible call failed: ${e instanceof Error ? e.name : "error"}`
3949
+ );
3950
+ return GRACEFUL_FALLBACK;
3951
+ }
3952
+ };
3953
+ }
3954
+ var POSTCALL_INSTRUCTION = "A phone call handled by the voice agent has just ended. Here is the record of the call. Log it and follow up if anything needs action.";
3955
+ var POSTCALL_MAX_TRANSCRIPT_CHARS = 12e3;
3956
+ function buildPostCallRecord(data, includeTranscript) {
3957
+ const lines = [];
3958
+ const caller = data.caller;
3959
+ const callee = data.callee;
3960
+ if (caller) lines.push(`Caller: ${caller}`);
3961
+ if (callee) lines.push(`Line dialed: ${callee}`);
3962
+ const metrics = data.metrics;
3963
+ const duration = metrics?.durationSeconds ?? metrics?.duration_seconds;
3964
+ if (typeof duration === "number") lines.push(`Duration: ${Math.round(duration)}s`);
3965
+ if (includeTranscript) {
3966
+ const entries = data.transcript ?? [];
3967
+ const rendered = entries.filter((e) => e && typeof e === "object").map((e) => `${e.role ?? "?"}: ${e.text ?? ""}`).join("\n");
3968
+ if (rendered) lines.push("Transcript:\n" + rendered.slice(0, POSTCALL_MAX_TRANSCRIPT_CHARS));
3969
+ }
3970
+ return lines.length ? lines.join("\n") : "(no call details available)";
3971
+ }
3972
+ function openclawPostCallNotifier(agent, opts = {}) {
3973
+ const cfg = openclawConsult(agent, {
3974
+ baseUrl: opts.baseUrl,
3975
+ apiKey: opts.apiKey,
3976
+ timeoutMs: opts.timeoutMs ?? DEFAULT_TIMEOUT_MS,
3977
+ allowLoopback: opts.allowLoopback
3978
+ });
3979
+ const oc = cfg.openaiCompatible;
3980
+ const endpoint = oc.baseUrl.replace(/\/+$/, "") + "/chat/completions";
3981
+ validateWebhookUrl(endpoint, cfg.allowLoopback ?? false);
3982
+ const apiKey = oc.apiKey ?? (oc.apiKeyEnv ? process.env[oc.apiKeyEnv] : void 0);
3983
+ const sessionHeader = oc.sessionHeader;
3984
+ const model = oc.model;
3985
+ const timeoutMs = cfg.timeoutMs ?? DEFAULT_TIMEOUT_MS;
3986
+ const includeTranscript = opts.includeTranscript ?? true;
3987
+ const instruction = opts.instruction ?? POSTCALL_INSTRUCTION;
3988
+ return async (data) => {
3989
+ const callId = (data ?? {}).call_id ?? "";
3990
+ const record = buildPostCallRecord(data ?? {}, includeTranscript);
3991
+ const headers = { "Content-Type": "application/json" };
3992
+ if (apiKey) headers.Authorization = `Bearer ${apiKey}`;
3993
+ if (sessionHeader && callId) headers[sessionHeader] = callId;
3994
+ const payload = {
3995
+ model,
3996
+ messages: [
3997
+ { role: "system", content: instruction },
3998
+ { role: "user", content: record }
3999
+ ],
4000
+ stream: false
4001
+ };
4002
+ if (callId) payload.user = callId;
4003
+ try {
4004
+ const resp = await fetch(endpoint, {
4005
+ method: "POST",
4006
+ headers,
4007
+ body: JSON.stringify(payload),
4008
+ signal: AbortSignal.timeout(timeoutMs)
4009
+ });
4010
+ if (!resp.ok) {
4011
+ getLogger().warn(`openclaw post-call notify: HTTP ${resp.status}`);
4012
+ }
4013
+ } catch (e) {
4014
+ getLogger().warn(
4015
+ `openclaw post-call notify failed: ${e instanceof Error ? e.name : "error"}`
4016
+ );
4017
+ }
4018
+ };
4019
+ }
4020
+
3653
4021
  // src/sentence-chunker.ts
3654
4022
  init_esm_shims();
3655
4023
  var DEFAULT_MIN_SENTENCE_LEN = 20;
@@ -4351,6 +4719,52 @@ async function withSpan(name, attrs, fn) {
4351
4719
  }
4352
4720
 
4353
4721
  // src/stream-handler.ts
4722
+ var DEFAULT_TOOL_CALL_PREAMBLE_BLOCK = `# Preambles
4723
+
4724
+ Use short preambles only when they help the user understand that work is happening. A preamble is one short spoken update describing the action you are about to take \u2014 not hidden reasoning, and never a claim about the result.
4725
+
4726
+ ## When to use a preamble
4727
+ Use a preamble when:
4728
+ - you are about to call a tool that may take noticeable time;
4729
+ - you need to reason through a multi-step request;
4730
+ - you are checking records, availability, account state, or policy details;
4731
+ - you are preparing an escalation or handoff;
4732
+ - silence would make the assistant feel unresponsive.
4733
+
4734
+ When a preamble is needed, output it immediately before the reasoning or tool call.
4735
+
4736
+ ## When to NOT use a preamble
4737
+ Do not use a preamble when:
4738
+ - the answer is direct and can be given immediately;
4739
+ - the user is only confirming, correcting, or declining something;
4740
+ - the audio is unclear and you need clarification instead;
4741
+ - the tool call is lightweight and the user would not benefit from an update.
4742
+
4743
+ ## Style
4744
+ - Keep it to one short sentence (two only before a high-impact action).
4745
+ - Vary the wording across turns; do not reuse the same opener.
4746
+ - Describe the action, not the internal reasoning.
4747
+ - Never imply success or failure before the tool returns.
4748
+
4749
+ Prefer:
4750
+ - "I'll check that order now."
4751
+ - "I'll look up your appointment details."
4752
+ - "I'll verify that before we make any changes."
4753
+ - "I'll check the policy and then give you the next step."
4754
+ - "I'll pull that up so we can make sure it's the right account."
4755
+
4756
+ Avoid:
4757
+ - "Let me think about that for a second."
4758
+ - "Please wait while I process your request."
4759
+ - "I'm going to use my tools now."
4760
+ - "Hmm..." / "One moment while I process that..."`;
4761
+ function applyToolCallPreambles(prompt, knob) {
4762
+ if (!knob) return prompt;
4763
+ const block = typeof knob === "string" ? knob : DEFAULT_TOOL_CALL_PREAMBLE_BLOCK;
4764
+ return prompt ? `${block}
4765
+
4766
+ ${prompt}` : block;
4767
+ }
4354
4768
  function checkGuardrails(text, guardrails) {
4355
4769
  if (!guardrails) return null;
4356
4770
  for (const guard of guardrails) {
@@ -4408,39 +4822,63 @@ function augmentWithBuiltinHandoffTools(userTools, callbacks) {
4408
4822
  return out;
4409
4823
  }
4410
4824
  var HALLUCINATIONS = /* @__PURE__ */ new Set([
4411
- "you",
4412
- "thank you",
4413
- "thanks",
4414
- "yeah",
4415
- "yes",
4416
- "no",
4417
- "okay",
4418
- "ok",
4419
- "uh",
4420
- "um",
4421
- "mmm",
4422
- "hmm",
4423
- ".",
4424
- "bye",
4425
- "right",
4426
- "cool",
4427
- // Whisper YouTube-caption hallucinations
4825
+ // Issue #154: the hallucination filter is now DISPLAY-ONLY — it no longer
4826
+ // gates response creation (the server drives the response on
4827
+ // ``input_audio_buffer.committed`` by default). Dropping a phrase here
4828
+ // therefore deletes the user's transcript line (recordSttComplete never
4829
+ // fires → empty user_text → dashboard skips the user line). So this set is
4830
+ // restricted to genuine NON-SPEECH artefacts that Whisper emits on
4831
+ // silence / TTS echo, NOT real conversational words. Standalone words like
4832
+ // 'yes', 'no', 'okay', 'right', 'you', 'thanks' were REMOVED — they are
4833
+ // legitimate user replies and must reach the transcript. Parity with
4834
+ // Python ``_STT_HALLUCINATIONS``.
4835
+ //
4836
+ // Whisper caption / training-set hallucinations. Whisper was trained heavily
4837
+ // on captioned video, so on silence / PSTN echo it falls back to the most
4838
+ // common caption credits + sign-offs. Curated from widely-reported
4839
+ // Whisper-on-silence outputs across the open-source ASR community.
4428
4840
  "thank you for watching",
4429
4841
  "thanks for watching",
4430
4842
  "thank you for watching!",
4431
4843
  "thanks for watching!",
4432
4844
  "thank you so much for watching",
4845
+ "thank you for watching please subscribe",
4846
+ "thanks for watching please subscribe",
4433
4847
  "thanks for listening",
4848
+ "we'll see you next time",
4849
+ "see you next time",
4850
+ "bye bye",
4434
4851
  "please subscribe",
4852
+ "please subscribe to my channel",
4853
+ "don't forget to subscribe",
4854
+ "like and subscribe",
4435
4855
  "subscribe",
4856
+ "subtitles by the amara.org community",
4857
+ "subtitles by the amara org community",
4858
+ "subtitles by",
4859
+ "transcribed by",
4860
+ "transcription by castingwords",
4861
+ "the end",
4862
+ // Music / sound markers.
4436
4863
  "music",
4437
4864
  "[music]",
4865
+ "piano music",
4866
+ "applause",
4867
+ "[applause]",
4438
4868
  "\u266A",
4869
+ // Silence markers.
4439
4870
  "[no audio]",
4440
4871
  "[silence]",
4441
4872
  "[blank_audio]",
4442
4873
  "(silence)"
4443
4874
  ]);
4875
+ function isSttHallucination(text) {
4876
+ const stripped = text.trim().toLowerCase().replace(/[.,!?;:…。!?\s]+$/u, "").trim();
4877
+ if (stripped === "") return true;
4878
+ if (HALLUCINATIONS.has(stripped)) return true;
4879
+ const pieces = stripped.split(/[.!?…。!?]+/u).map((p) => p.trim()).filter((p) => p.length > 0);
4880
+ return pieces.length > 1 && pieces.every((p) => HALLUCINATIONS.has(p));
4881
+ }
4444
4882
  var StreamHandler = class _StreamHandler {
4445
4883
  deps;
4446
4884
  ws;
@@ -4739,7 +5177,14 @@ var StreamHandler = class _StreamHandler {
4739
5177
  * barge-in armed during the audible tail. Tunable via env.
4740
5178
  */
4741
5179
  endSpeakingWithGrace() {
4742
- const grace = Number(process.env.PATTER_TTS_TAIL_GRACE_MS ?? 1500);
5180
+ const rawGrace = process.env.PATTER_TTS_TAIL_GRACE_MS;
5181
+ const parsedGrace = rawGrace !== void 0 ? Number(rawGrace) : NaN;
5182
+ const grace = rawGrace !== void 0 && Number.isFinite(parsedGrace) ? parsedGrace : 1500;
5183
+ if (rawGrace !== void 0 && !Number.isFinite(parsedGrace)) {
5184
+ getLogger().warn(
5185
+ `PATTER_TTS_TAIL_GRACE_MS="${rawGrace}" is not a valid number \u2014 using default 1500ms`
5186
+ );
5187
+ }
4743
5188
  if (grace > 0) {
4744
5189
  const gen = this.speakingGeneration;
4745
5190
  this.clearGraceTimer();
@@ -4833,6 +5278,14 @@ var StreamHandler = class _StreamHandler {
4833
5278
  `[DIAG] Flushed ${replayed} pre-barge-in frame(s) (~${replayed * 20} ms) to STT`
4834
5279
  );
4835
5280
  }
5281
+ /**
5282
+ * Per-call resolved tool list. Starts as ``null`` (falls back to
5283
+ * ``deps.agent.tools``). Populated by ``initMcpTools`` when MCP servers
5284
+ * are configured so discovered tools are merged in without mutating the
5285
+ * shared ``AgentOptions`` object. Code that needs the effective tool list
5286
+ * should read ``this.resolvedTools ?? this.deps.agent.tools``.
5287
+ */
5288
+ resolvedTools = null;
4836
5289
  llmLoop = null;
4837
5290
  /**
4838
5291
  * Per-call tool executor — provides retry-with-exponential-backoff and a
@@ -4876,6 +5329,17 @@ var StreamHandler = class _StreamHandler {
4876
5329
  userTranscriptPending = false;
4877
5330
  pendingAssistantTurn = null;
4878
5331
  pendingAssistantTimer = null;
5332
+ /**
5333
+ * Reserved monotonic turn index for the in-flight Realtime turn (issue
5334
+ * #154, fix 5/6). Reserved in ``onAdapterSpeechStopped`` via
5335
+ * ``metricsAcc.reserveTurnIndex()`` the moment the turn OPENS, then threaded
5336
+ * through to the live per-line transcript events (``recordTranscriptLine``)
5337
+ * and into ``recordTurnComplete`` / ``recordTurnInterrupted`` so the
5338
+ * dashboard can sort a late-arriving user line ABOVE its agent line by
5339
+ * ``(turnIndex, role)``. ``null`` until the first turn opens. Parity with
5340
+ * Python ``_current_turn_index``.
5341
+ */
5342
+ currentTurnIndex = null;
4879
5343
  /**
4880
5344
  * Hard cap on how long we wait for the user transcript before flushing
4881
5345
  * the buffered assistant turn alone. 3 s covers OpenAI Whisper's typical
@@ -4957,6 +5421,23 @@ var StreamHandler = class _StreamHandler {
4957
5421
  * streaming/regular LLM, WebSocket remote, Realtime response_done) so the
4958
5422
  * payload shape lives in one place.
4959
5423
  */
5424
+ /**
5425
+ * Emit a live per-line transcript event to the dashboard store (issue #154,
5426
+ * fix 5). Routed through a single helper so the call shape lives in one
5427
+ * place. ``recordTranscriptLine`` appends the line to the active call's
5428
+ * transcript and publishes a ``transcript_line`` SSE event; the dashboard
5429
+ * sorts by (turnIndex, user<assistant) so a late user line lands above its
5430
+ * agent line. No-op when no turn index has been reserved yet.
5431
+ */
5432
+ emitTranscriptLine(role, text) {
5433
+ if (this.currentTurnIndex === null) return;
5434
+ this.deps.metricsStore.recordTranscriptLine({
5435
+ call_id: this.callId,
5436
+ turnIndex: this.currentTurnIndex,
5437
+ role,
5438
+ text
5439
+ });
5440
+ }
4960
5441
  async emitTurnMetrics(turn) {
4961
5442
  if (turn == null) return;
4962
5443
  this.deps.metricsStore.recordTurn({ call_id: this.callId, turn });
@@ -5063,7 +5544,7 @@ var StreamHandler = class _StreamHandler {
5063
5544
  if (customParams.callee && !this.callee) this.callee = customParams.callee;
5064
5545
  const mode = this.deps.agent.engine ? `engine=${this.deps.agent.engine.kind ?? "unknown"}` : "pipeline";
5065
5546
  getLogger().info(
5066
- `Call started: ${callId} (${this.deps.bridge.label}, ${mode}, ${sanitizeLogValue(this.caller || "?")} \u2192 ${sanitizeLogValue(this.callee || "?")})`
5547
+ `Call started: ${callId} (${this.deps.bridge.label}, ${mode}, ${maskPhoneNumber(this.caller || "?")} \u2192 ${maskPhoneNumber(this.callee || "?")})`
5067
5548
  );
5068
5549
  if (Object.keys(customParams).length > 0) {
5069
5550
  getLogger().debug(`Custom params: ${sanitizeLogValue(JSON.stringify(customParams))}`);
@@ -5108,10 +5589,13 @@ var StreamHandler = class _StreamHandler {
5108
5589
  const resolvedPrompt = Object.keys(allVars).length > 0 ? this.deps.resolveVariables(this.deps.agent.systemPrompt, allVars) : this.deps.agent.systemPrompt;
5109
5590
  const provider2 = this.deps.agent.provider ?? "openai_realtime";
5110
5591
  await this.initMcpTools();
5592
+ this.injectConsultTool();
5111
5593
  if (provider2 === "pipeline") {
5112
5594
  await this.initPipeline(resolvedPrompt);
5113
5595
  } else {
5114
- await this.initRealtimeAdapter(resolvedPrompt);
5596
+ await this.initRealtimeAdapter(
5597
+ applyToolCallPreambles(resolvedPrompt, this.deps.agent.toolCallPreambles)
5598
+ );
5115
5599
  }
5116
5600
  }
5117
5601
  /**
@@ -5136,10 +5620,25 @@ var StreamHandler = class _StreamHandler {
5136
5620
  }
5137
5621
  if (discovered.length === 0) return;
5138
5622
  MCPManager.assertNoConflicts(this.deps.agent.tools, discovered);
5139
- const mutableAgent = this.deps.agent;
5140
- mutableAgent.tools = [...mutableAgent.tools ?? [], ...discovered];
5623
+ this.resolvedTools = [...this.deps.agent.tools ?? [], ...discovered];
5141
5624
  getLogger().info(`MCP: merged ${discovered.length} tool(s) into agent`);
5142
5625
  }
5626
+ /**
5627
+ * Merge the built-in ``consult`` tool into the per-call tool list when
5628
+ * ``agent.consult`` is set, mirroring {@link initMcpTools}: the shared
5629
+ * ``deps.agent`` is NOT mutated; the merged list is stored on
5630
+ * ``this.resolvedTools`` so ``buildAIAdapter`` (Realtime) and the pipeline
5631
+ * ``LLMLoop`` both see it. Idempotent — a no-op if a tool with the same name
5632
+ * is already present.
5633
+ */
5634
+ injectConsultTool() {
5635
+ const consult = this.deps.agent.consult;
5636
+ if (!consult) return;
5637
+ const consultTool = buildConsultTool(consult);
5638
+ const base = this.resolvedTools ?? (this.deps.agent.tools ?? []);
5639
+ if (base.some((t) => t.name === consultTool.name)) return;
5640
+ this.resolvedTools = [...base, consultTool];
5641
+ }
5143
5642
  /** Set the stream SID (Twilio only, called after parsing 'start' event). */
5144
5643
  /** Set the carrier-side stream id (Twilio `streamSid` / Telnyx stream identifier). */
5145
5644
  setStreamSid(sid) {
@@ -5159,8 +5658,12 @@ var StreamHandler = class _StreamHandler {
5159
5658
  if (activeVad && !this.vadDisabled) {
5160
5659
  try {
5161
5660
  const vadPromise = activeVad.processFrame(pcm16k, 16e3);
5162
- const timeoutPromise = new Promise((resolve2) => setTimeout(() => resolve2(null), 25));
5661
+ let vadTimeoutId;
5662
+ const timeoutPromise = new Promise((resolve2) => {
5663
+ vadTimeoutId = setTimeout(() => resolve2(null), 25);
5664
+ });
5163
5665
  const evt = await Promise.race([vadPromise, timeoutPromise]);
5666
+ clearTimeout(vadTimeoutId);
5164
5667
  if (evt) {
5165
5668
  getLogger().info(
5166
5669
  `[VAD] ${evt.type} agentSpeaking=${this.isSpeaking}`
@@ -5233,7 +5736,7 @@ var StreamHandler = class _StreamHandler {
5233
5736
  if ((this.deps.agent.bargeInThresholdMs ?? 300) === 0) return;
5234
5737
  }
5235
5738
  const hooks = this.deps.agent.hooks;
5236
- if (hooks) {
5739
+ if (hooks?.beforeSendToStt) {
5237
5740
  const hookExecutor = new PipelineHookExecutor(hooks);
5238
5741
  const hookCtx = this.buildHookContext();
5239
5742
  const processed = await hookExecutor.runBeforeSendToStt(pcm16k, hookCtx);
@@ -5504,7 +6007,7 @@ var StreamHandler = class _StreamHandler {
5504
6007
  }
5505
6008
  if (!this.deps.agent.vad) {
5506
6009
  try {
5507
- const { SileroVAD } = await import("./silero-vad-LNDFGIY7.mjs");
6010
+ const { SileroVAD } = await import("./silero-vad-RGF5HCIR.mjs");
5508
6011
  this.autoVad = await SileroVAD.forPhoneCall();
5509
6012
  getLogger().info(
5510
6013
  `auto-VAD enabled (SileroVAD, phone preset). Pass agent.vad=\u2026 to override.`
@@ -5659,7 +6162,7 @@ var StreamHandler = class _StreamHandler {
5659
6162
  }
5660
6163
  const providerModel = this.deps.agent.llm?.model ?? "";
5661
6164
  const augmentedTools = augmentWithBuiltinHandoffTools(
5662
- this.deps.agent.tools,
6165
+ this.resolvedTools ?? this.deps.agent.tools,
5663
6166
  {
5664
6167
  transferCall: (number) => this.deps.bridge.transferCall(this.callId, number),
5665
6168
  endCall: () => this.deps.bridge.endCall(this.callId, this.ws)
@@ -5683,7 +6186,7 @@ var StreamHandler = class _StreamHandler {
5683
6186
  let llmModel = this.deps.agent.model || "gpt-4o-mini";
5684
6187
  if (llmModel.includes("realtime")) llmModel = "gpt-4o-mini";
5685
6188
  const augmentedTools = augmentWithBuiltinHandoffTools(
5686
- this.deps.agent.tools,
6189
+ this.resolvedTools ?? this.deps.agent.tools,
5687
6190
  {
5688
6191
  transferCall: (number) => this.deps.bridge.transferCall(this.callId, number),
5689
6192
  endCall: () => this.deps.bridge.endCall(this.callId, this.ws)
@@ -6107,6 +6610,14 @@ var StreamHandler = class _StreamHandler {
6107
6610
  chunker.reset();
6108
6611
  getLogger().error(`LLM loop error (${label}):`, e);
6109
6612
  this.metricsAcc.recordTurnInterrupted();
6613
+ const fallback = this.deps.agent.llmErrorMessage;
6614
+ if (fallback && !ttsFirstByteSent.value && this.isSpeaking) {
6615
+ try {
6616
+ await this.synthesizeSentence(fallback, hookExecutor, hookCtx, ttsFirstByteSent);
6617
+ } catch (err) {
6618
+ getLogger().error(`llmErrorMessage fallback synthesis failed (${label}):`, err);
6619
+ }
6620
+ }
6110
6621
  }
6111
6622
  }
6112
6623
  this.metricsAcc.recordLlmComplete();
@@ -6207,7 +6718,7 @@ var StreamHandler = class _StreamHandler {
6207
6718
  // ---------------------------------------------------------------------------
6208
6719
  async initRealtimeAdapter(resolvedPrompt) {
6209
6720
  const label = this.deps.bridge.label;
6210
- this.adapter = this.deps.buildAIAdapter(resolvedPrompt);
6721
+ this.adapter = this.deps.buildAIAdapter(resolvedPrompt, this.resolvedTools ?? void 0);
6211
6722
  let parked;
6212
6723
  if (typeof this.deps.popPrewarmedConnections === "function") {
6213
6724
  try {
@@ -6280,6 +6791,7 @@ var StreamHandler = class _StreamHandler {
6280
6791
  response_done: async (eventData) => this.onAdapterResponseDone(eventData),
6281
6792
  speech_started: async () => this.onAdapterSpeechInterrupt(),
6282
6793
  interruption: async () => this.onAdapterSpeechInterrupt(),
6794
+ error: async (eventData) => this.onAdapterError(eventData),
6283
6795
  function_call: async (eventData) => {
6284
6796
  if (this.adapter instanceof OpenAIRealtimeAdapter) {
6285
6797
  await this.handleFunctionCall(eventData);
@@ -6366,21 +6878,31 @@ var StreamHandler = class _StreamHandler {
6366
6878
  if (!this.metricsAcc.turnActive) this.metricsAcc.startTurn();
6367
6879
  this.currentAgentText = "";
6368
6880
  this.responseAudioStarted = false;
6881
+ this.currentTurnIndex = this.metricsAcc.reserveTurnIndex();
6369
6882
  this.userTranscriptPending = true;
6370
6883
  await this.emitUserSpeechEnded();
6371
6884
  }
6372
6885
  async onAdapterTranscriptInput(inputText) {
6373
- const stripped = inputText.trim().toLowerCase();
6374
- if (HALLUCINATIONS.has(stripped) || stripped === "") {
6886
+ if (isSttHallucination(inputText)) {
6375
6887
  getLogger().debug(
6376
6888
  `Realtime transcript_input dropped (likely Whisper hallucination on silence/echo): ${sanitizeLogValue(inputText.slice(0, 60))}`
6377
6889
  );
6378
6890
  this.userTranscriptPending = false;
6891
+ if (this.pendingAssistantTurn !== null) {
6892
+ const buffered = this.pendingAssistantTurn;
6893
+ this.pendingAssistantTurn = null;
6894
+ if (this.pendingAssistantTimer) {
6895
+ clearTimeout(this.pendingAssistantTimer);
6896
+ this.pendingAssistantTimer = null;
6897
+ }
6898
+ await this.flushAssistantTurn(buffered);
6899
+ }
6379
6900
  return;
6380
6901
  }
6381
6902
  getLogger().debug(`User (${this.deps.bridge.label}): ${sanitizeLogValue(inputText)}`);
6382
6903
  this.history.push({ role: "user", text: inputText, timestamp: Date.now() });
6383
- if (this.adapter instanceof OpenAIRealtimeAdapter) {
6904
+ this.emitTranscriptLine("user", inputText);
6905
+ if (this.adapter instanceof OpenAIRealtimeAdapter && this.adapter.getGateResponseOnTranscript()) {
6384
6906
  void this.adapter.requestResponse().catch(
6385
6907
  (err) => getLogger().debug(`Realtime requestResponse failed: ${String(err)}`)
6386
6908
  );
@@ -6427,8 +6949,12 @@ var StreamHandler = class _StreamHandler {
6427
6949
  history: [...this.history.entries]
6428
6950
  });
6429
6951
  }
6952
+ const reservedIndex = this.currentTurnIndex;
6953
+ this.emitTranscriptLine("assistant", text);
6430
6954
  this.responseAudioStarted = false;
6431
- await this.emitTurnMetrics(this.metricsAcc.recordTurnComplete(text));
6955
+ await this.emitTurnMetrics(
6956
+ this.metricsAcc.recordTurnComplete(text, reservedIndex ?? void 0)
6957
+ );
6432
6958
  }
6433
6959
  /**
6434
6960
  * Push an assistant turn into history and fire `onTranscript` so host
@@ -6527,7 +7053,9 @@ var StreamHandler = class _StreamHandler {
6527
7053
  this.pendingAssistantTimer = null;
6528
7054
  this.userTranscriptPending = false;
6529
7055
  if (buffered !== null) {
6530
- void this.flushAssistantTurn(buffered);
7056
+ this.flushAssistantTurn(buffered).catch(
7057
+ (err) => getLogger().error("flushAssistantTurn (fallback timer) failed:", err)
7058
+ );
6531
7059
  }
6532
7060
  }, _StreamHandler.REALTIME_USER_TRANSCRIPT_WAIT_MS);
6533
7061
  this.responseAudioStarted = false;
@@ -6536,7 +7064,9 @@ var StreamHandler = class _StreamHandler {
6536
7064
  await this.flushAssistantTurn(text);
6537
7065
  }
6538
7066
  async onAdapterSpeechInterrupt() {
6539
- if (this.adapter instanceof OpenAIRealtimeAdapter) {
7067
+ const isEngine = this.adapter instanceof OpenAIRealtimeAdapter;
7068
+ const clientManaged = isEngine && this.adapter.getGateResponseOnTranscript();
7069
+ if (clientManaged) {
6540
7070
  const startedAt = this.adapter.currentResponseFirstAudioAt;
6541
7071
  if (startedAt !== null) {
6542
7072
  const elapsedMs = Date.now() - startedAt;
@@ -6549,12 +7079,20 @@ var StreamHandler = class _StreamHandler {
6549
7079
  }
6550
7080
  }
6551
7081
  this.deps.bridge.sendClear(this.ws, this.streamSid);
6552
- if (this.adapter instanceof OpenAIRealtimeAdapter) this.adapter.cancelResponse();
7082
+ if (clientManaged) {
7083
+ this.metricsAcc.recordBargeinDetected();
7084
+ this.adapter.cancelResponse();
7085
+ } else if (isEngine) {
7086
+ this.adapter.truncate();
7087
+ }
6553
7088
  this.metricsAcc.recordTurnInterrupted();
6554
7089
  if (this.responseAudioStarted) {
6555
7090
  await this.emitAgentSpeechEnded(true);
6556
7091
  }
6557
7092
  await this.emitUserSpeechStarted();
7093
+ if (clientManaged) {
7094
+ this.metricsAcc.anchorUserSpeechStart();
7095
+ }
6558
7096
  this.currentAgentText = "";
6559
7097
  this.responseAudioStarted = false;
6560
7098
  this.pendingAssistantTurn = null;
@@ -6564,6 +7102,28 @@ var StreamHandler = class _StreamHandler {
6564
7102
  }
6565
7103
  this.userTranscriptPending = false;
6566
7104
  }
7105
+ /**
7106
+ * Handle a Realtime ``error`` event (issue #154, fix 4).
7107
+ *
7108
+ * Both Realtime providers dispatch ``('error', …)`` for server-side errors,
7109
+ * non-normal socket closes, and socket errors, but the stream handler
7110
+ * previously had no entry for it in the dispatch table so these were
7111
+ * silently swallowed. We surface them at WARN level with ONLY the error
7112
+ * envelope fields (``type`` / ``code`` / ``message``) — never any audio or
7113
+ * transcript body, to avoid logging PII. The call is NOT terminated: the
7114
+ * provider decides whether to recover, and many of these (e.g. a transient
7115
+ * ``input_audio_buffer_commit_empty``) are non-fatal. Parity with the
7116
+ * Python ``elif ev_type == 'error'`` branches.
7117
+ */
7118
+ async onAdapterError(eventData) {
7119
+ const err = eventData ?? {};
7120
+ const type = typeof err.type === "string" ? err.type : "unknown";
7121
+ const code = typeof err.code === "string" ? err.code : "";
7122
+ const message = typeof err.message === "string" ? err.message : "";
7123
+ getLogger().warn(
7124
+ `Realtime error (${this.deps.bridge.label}) type=${type} code=${code} message=${sanitizeLogValue(message)}`
7125
+ );
7126
+ }
6567
7127
  /**
6568
7128
  * Emit a tool-invocation event into the transcript timeline. Pushes a
6569
7129
  * `role=tool` entry into `history` (so it appears in the dashboard
@@ -6631,7 +7191,8 @@ var StreamHandler = class _StreamHandler {
6631
7191
  }
6632
7192
  return;
6633
7193
  }
6634
- const toolDef = this.deps.agent.tools?.find((t) => t.name === fc.name);
7194
+ const effectiveTools = this.resolvedTools ?? this.deps.agent.tools;
7195
+ const toolDef = effectiveTools?.find((t) => t.name === fc.name);
6635
7196
  if (!toolDef) {
6636
7197
  getLogger().warn(`Realtime tool '${fc.name}' not found in agent.tools \u2014 skipping`);
6637
7198
  const result2 = JSON.stringify({ error: `Tool '${fc.name}' not registered`, fallback: true });
@@ -6654,7 +7215,8 @@ var StreamHandler = class _StreamHandler {
6654
7215
  if (msg && this.adapter instanceof OpenAIRealtimeAdapter) {
6655
7216
  const realtimeAdapter = this.adapter;
6656
7217
  reassuranceTimer = setTimeout(() => {
6657
- realtimeAdapter.sendText(msg).catch((e) => {
7218
+ const fire = typeof realtimeAdapter.sendReassurance === "function" ? realtimeAdapter.sendReassurance(msg) : realtimeAdapter.sendText(msg);
7219
+ fire.catch((e) => {
6658
7220
  getLogger().warn(`Reassurance message failed for tool '${fc.name}': ${String(e)}`);
6659
7221
  });
6660
7222
  }, afterMs);
@@ -6674,7 +7236,8 @@ var StreamHandler = class _StreamHandler {
6674
7236
  parsedArgs,
6675
7237
  {
6676
7238
  call_id: this.callId,
6677
- caller: this.caller
7239
+ caller: this.caller,
7240
+ callee: this.callee
6678
7241
  },
6679
7242
  onProgress
6680
7243
  );
@@ -6924,7 +7487,9 @@ var CallLogger = class {
6924
7487
  getLogger().warn(`call_log write failed (${sanitizeLogValue(callId)}): ${sanitizeLogValue(String(err))}`);
6925
7488
  }
6926
7489
  if (crypto4.randomBytes(1)[0] < 5) {
6927
- this.sweepOldDays();
7490
+ void this.sweepOldDays().catch(
7491
+ (e) => getLogger().debug(`call_log sweep failed: ${sanitizeLogValue(String(e))}`)
7492
+ );
6928
7493
  }
6929
7494
  }
6930
7495
  /** Append a single turn record to the call's `transcript.jsonl`. */
@@ -6999,23 +7564,27 @@ var CallLogger = class {
6999
7564
  }
7000
7565
  }
7001
7566
  // --- Retention ---------------------------------------------------------
7002
- sweepOldDays() {
7567
+ async sweepOldDays() {
7003
7568
  if (this.root === null) return;
7004
7569
  const days = retentionDays();
7005
7570
  if (days === 0) return;
7006
7571
  const cutoff = Date.now() / 1e3 - days * 86400;
7007
7572
  const callsRoot = path3.join(this.root, "calls");
7008
- if (!fs3.existsSync(callsRoot)) return;
7009
7573
  try {
7010
- for (const yearName of fs3.readdirSync(callsRoot)) {
7574
+ await fsp.access(callsRoot);
7575
+ } catch {
7576
+ return;
7577
+ }
7578
+ try {
7579
+ for (const yearName of await fsp.readdir(callsRoot)) {
7011
7580
  if (!/^\d+$/.test(yearName)) continue;
7012
7581
  const yearDir = path3.join(callsRoot, yearName);
7013
- if (!fs3.statSync(yearDir).isDirectory()) continue;
7014
- for (const monthName of fs3.readdirSync(yearDir)) {
7582
+ if (!(await fsp.stat(yearDir)).isDirectory()) continue;
7583
+ for (const monthName of await fsp.readdir(yearDir)) {
7015
7584
  if (!/^\d+$/.test(monthName)) continue;
7016
7585
  const monthDir = path3.join(yearDir, monthName);
7017
- if (!fs3.statSync(monthDir).isDirectory()) continue;
7018
- for (const dayName of fs3.readdirSync(monthDir)) {
7586
+ if (!(await fsp.stat(monthDir)).isDirectory()) continue;
7587
+ for (const dayName of await fsp.readdir(monthDir)) {
7019
7588
  if (!/^\d+$/.test(dayName)) continue;
7020
7589
  const dayDir = path3.join(monthDir, dayName);
7021
7590
  const y = Number.parseInt(yearName, 10);
@@ -7023,16 +7592,16 @@ var CallLogger = class {
7023
7592
  const d = Number.parseInt(dayName, 10);
7024
7593
  const ts = Date.UTC(y, m - 1, d) / 1e3;
7025
7594
  if (ts < cutoff) {
7026
- rmTree(dayDir);
7595
+ await rmTreeAsync(dayDir);
7027
7596
  }
7028
7597
  }
7029
7598
  try {
7030
- if (fs3.readdirSync(monthDir).length === 0) fs3.rmdirSync(monthDir);
7599
+ if ((await fsp.readdir(monthDir)).length === 0) await fsp.rmdir(monthDir);
7031
7600
  } catch {
7032
7601
  }
7033
7602
  }
7034
7603
  try {
7035
- if (fs3.readdirSync(yearDir).length === 0) fs3.rmdirSync(yearDir);
7604
+ if ((await fsp.readdir(yearDir)).length === 0) await fsp.rmdir(yearDir);
7036
7605
  } catch {
7037
7606
  }
7038
7607
  }
@@ -7041,21 +7610,21 @@ var CallLogger = class {
7041
7610
  }
7042
7611
  }
7043
7612
  };
7044
- function rmTree(target) {
7613
+ async function rmTreeAsync(target) {
7045
7614
  try {
7046
- for (const child of fs3.readdirSync(target)) {
7615
+ for (const child of await fsp.readdir(target)) {
7047
7616
  const childPath = path3.join(target, child);
7048
- const stat = fs3.lstatSync(childPath);
7617
+ const stat = await fsp.lstat(childPath);
7049
7618
  if (stat.isDirectory()) {
7050
- rmTree(childPath);
7619
+ await rmTreeAsync(childPath);
7051
7620
  } else {
7052
7621
  try {
7053
- fs3.unlinkSync(childPath);
7622
+ await fsp.unlink(childPath);
7054
7623
  } catch {
7055
7624
  }
7056
7625
  }
7057
7626
  }
7058
- fs3.rmdirSync(target);
7627
+ await fsp.rmdir(target);
7059
7628
  } catch {
7060
7629
  }
7061
7630
  }
@@ -7116,13 +7685,16 @@ function telnyxHangupOutcome(cause) {
7116
7685
  if (c === "call_rejected" || c === "rejected" || c === "destination_out_of_order") return "failed";
7117
7686
  return null;
7118
7687
  }
7119
- function validateWebhookUrl(url) {
7688
+ function validateWebhookUrl(url, allowLoopback = false) {
7120
7689
  const parsed = new URL(url);
7121
7690
  if (!["http:", "https:"].includes(parsed.protocol)) {
7122
7691
  throw new Error(`Invalid webhook URL scheme: ${parsed.protocol}`);
7123
7692
  }
7124
7693
  const rawHost = parsed.hostname;
7125
7694
  const host = rawHost.replace(/^\[/, "").replace(/\]$/, "").toLowerCase();
7695
+ if (allowLoopback) {
7696
+ return;
7697
+ }
7126
7698
  const BLOCKED_HOSTNAMES = /* @__PURE__ */ new Set([
7127
7699
  "localhost",
7128
7700
  "ip6-localhost",
@@ -7164,6 +7736,34 @@ function validateWebhookUrl(url) {
7164
7736
  }
7165
7737
  }
7166
7738
  }
7739
+ function extractHost(value) {
7740
+ const trimmed = value.trim();
7741
+ if (!trimmed) return "";
7742
+ let host = trimmed.replace(/^[a-z]+:\/\//i, "").replace(/\/.*$/, "");
7743
+ if (host.startsWith("[")) {
7744
+ return host.slice(1).split("]", 1)[0].toLowerCase();
7745
+ }
7746
+ if (!host.includes("::")) {
7747
+ const lastColon = host.lastIndexOf(":");
7748
+ if (lastColon !== -1 && /^\d+$/.test(host.slice(lastColon + 1))) {
7749
+ host = host.slice(0, lastColon);
7750
+ }
7751
+ }
7752
+ return host.toLowerCase();
7753
+ }
7754
+ function isLoopbackHost(value) {
7755
+ const host = extractHost(value);
7756
+ if (!host) return false;
7757
+ if (host === "localhost" || host === "ip6-localhost" || host === "ip6-loopback") {
7758
+ return true;
7759
+ }
7760
+ if (host === "::1" || host === "::ffff:127.0.0.1") return true;
7761
+ const v4 = /^(\d{1,3})\.(\d{1,3})\.(\d{1,3})\.(\d{1,3})$/.exec(host);
7762
+ if (v4) {
7763
+ return parseInt(v4[1], 10) === 127;
7764
+ }
7765
+ return false;
7766
+ }
7167
7767
  function validateTelnyxSignature(rawBody, signature, timestamp, publicKey, toleranceSec = 300) {
7168
7768
  try {
7169
7769
  const ts = parseInt(timestamp, 10);
@@ -7227,7 +7827,7 @@ function resolveVariables(template, variables) {
7227
7827
  }
7228
7828
  return result;
7229
7829
  }
7230
- function buildAIAdapter(config, agent, resolvedPrompt) {
7830
+ function buildAIAdapter(config, agent, resolvedPrompt, toolsOverride) {
7231
7831
  const engine = agent.engine;
7232
7832
  if (agent.provider === "elevenlabs_convai") {
7233
7833
  if (!engine || engine.kind !== "elevenlabs_convai") {
@@ -7242,12 +7842,24 @@ function buildAIAdapter(config, agent, resolvedPrompt) {
7242
7842
  agent.firstMessage ?? ""
7243
7843
  );
7244
7844
  }
7245
- const agentTools = agent.tools?.map((t) => ({
7246
- name: t.name,
7247
- description: t.description,
7248
- parameters: t.parameters,
7249
- strict: t.strict
7250
- })) ?? [];
7845
+ const preamblesOn = Boolean(agent.toolCallPreambles);
7846
+ const agentTools = (toolsOverride ?? agent.tools)?.map((t) => {
7847
+ let description = t.description;
7848
+ const reassurance = t.reassurance;
7849
+ const sample = typeof reassurance === "string" ? reassurance : void 0;
7850
+ if (preamblesOn && sample) {
7851
+ description = `${description}
7852
+
7853
+ Preamble sample phrases:
7854
+ - ${sample}`;
7855
+ }
7856
+ return {
7857
+ name: t.name,
7858
+ description,
7859
+ parameters: t.parameters,
7860
+ strict: t.strict
7861
+ };
7862
+ }) ?? [];
7251
7863
  const tools = [...agentTools, TRANSFER_CALL_TOOL, END_CALL_TOOL];
7252
7864
  const isOpenAIEngine = engine && (engine.kind === "openai_realtime" || engine.kind === "openai_realtime_2");
7253
7865
  const openaiKey = isOpenAIEngine ? engine.apiKey : config.openaiKey ?? "";
@@ -7259,8 +7871,27 @@ function buildAIAdapter(config, agent, resolvedPrompt) {
7259
7871
  if (engine.inputAudioTranscriptionModel !== void 0) {
7260
7872
  adapterOptions.inputAudioTranscriptionModel = engine.inputAudioTranscriptionModel;
7261
7873
  }
7874
+ if (engine.noiseReduction !== void 0) {
7875
+ adapterOptions.noiseReduction = engine.noiseReduction;
7876
+ }
7877
+ if (engine.turnDetection !== void 0) {
7878
+ adapterOptions.turnDetection = engine.turnDetection;
7879
+ }
7880
+ if (engine.gateResponseOnTranscript !== void 0) {
7881
+ adapterOptions.gateResponseOnTranscript = engine.gateResponseOnTranscript;
7882
+ }
7883
+ }
7884
+ const agentOpts = agent;
7885
+ if (agentOpts.openaiRealtimeNoiseReduction !== void 0) {
7886
+ adapterOptions.noiseReduction = agentOpts.openaiRealtimeNoiseReduction;
7887
+ }
7888
+ if (agentOpts.realtimeTurnDetection !== void 0) {
7889
+ adapterOptions.turnDetection = agentOpts.realtimeTurnDetection;
7262
7890
  }
7263
- const AdapterCtor = engine && engine.kind === "openai_realtime_2" ? OpenAIRealtime2Adapter : OpenAIRealtimeAdapter;
7891
+ if (agentOpts.openaiRealtimeGateResponseOnTranscript !== void 0) {
7892
+ adapterOptions.gateResponseOnTranscript = agentOpts.openaiRealtimeGateResponseOnTranscript;
7893
+ }
7894
+ const AdapterCtor = OpenAIRealtime2Adapter;
7264
7895
  return new AdapterCtor(
7265
7896
  openaiKey,
7266
7897
  agent.model,
@@ -7294,6 +7925,11 @@ var TwilioBridge = class {
7294
7925
  getLogger().warn(`TwilioBridge.transferCall rejected: invalid CallSid ${JSON.stringify(callId)}`);
7295
7926
  return;
7296
7927
  }
7928
+ const E164_RE = /^\+[1-9]\d{6,14}$/;
7929
+ if (!E164_RE.test(toNumber)) {
7930
+ getLogger().warn(`TwilioBridge.transferCall rejected: invalid target ${JSON.stringify(toNumber)}`);
7931
+ return;
7932
+ }
7297
7933
  const transferUrl = `https://api.twilio.com/2010-04-01/Accounts/${this.config.twilioSid}/Calls/${callId}.json`;
7298
7934
  await fetch(transferUrl, {
7299
7935
  method: "POST",
@@ -7509,7 +8145,7 @@ var TelnyxBridge = class {
7509
8145
  };
7510
8146
  var GRACEFUL_SHUTDOWN_TIMEOUT_MS = 1e4;
7511
8147
  var EmbeddedServer = class {
7512
- constructor(config, agent, onCallStart, onCallEnd, onTranscript, onMessage, recording = false, voicemailMessage = "", onMetrics, pricingOverrides, dashboard = true, dashboardToken = "") {
8148
+ constructor(config, agent, onCallStart, onCallEnd, onTranscript, onMessage, recording = false, voicemailMessage = "", onMetrics, pricingOverrides, dashboard = true, dashboardToken = "", allowInsecureDashboard = false) {
7513
8149
  this.config = config;
7514
8150
  this.agent = agent;
7515
8151
  this.onCallStart = onCallStart;
@@ -7521,6 +8157,7 @@ var EmbeddedServer = class {
7521
8157
  this.onMetrics = onMetrics;
7522
8158
  this.dashboard = dashboard;
7523
8159
  this.dashboardToken = dashboardToken;
8160
+ this.allowInsecureDashboard = allowInsecureDashboard;
7524
8161
  this.metricsStore = new MetricsStore();
7525
8162
  this.pricing = mergePricing(pricingOverrides);
7526
8163
  const logRoot = config.persistRoot === void 0 ? resolveLogRoot() : config.persistRoot;
@@ -7547,8 +8184,31 @@ var EmbeddedServer = class {
7547
8184
  onMetrics;
7548
8185
  dashboard;
7549
8186
  dashboardToken;
8187
+ allowInsecureDashboard;
7550
8188
  server = null;
7551
8189
  wss = null;
8190
+ /**
8191
+ * Whether the dashboard + ``/api/*`` routes were mounted in ``start()``.
8192
+ * The dashboard is now ALWAYS mounted when enabled (it never 404s): an
8193
+ * exposed, token-less bind is protected with an auto-generated token
8194
+ * rather than refused. This flag is therefore ``true`` whenever the
8195
+ * dashboard is enabled — kept so the startup banner can gate on it.
8196
+ */
8197
+ dashboardMounted = false;
8198
+ /**
8199
+ * The token actually in effect for the dashboard + ``/api/*`` routes,
8200
+ * resolved in ``start()``. One of: the explicit ``dashboardToken`` if set;
8201
+ * a freshly generated UUID when the bind is exposed and
8202
+ * ``allowInsecureDashboard`` is ``false``; or ``''`` (OPEN) for loopback
8203
+ * local dev and for an exposed bind with ``allowInsecureDashboard=true``.
8204
+ * Read by the startup banner (to print the ready URL with ``?token=``) and
8205
+ * by authentic tests (to authenticate).
8206
+ */
8207
+ effectiveDashboardToken = "";
8208
+ /** The token in effect for the dashboard, resolved at ``start()``. Empty string = served OPEN. */
8209
+ get resolvedDashboardToken() {
8210
+ return this.effectiveDashboardToken;
8211
+ }
7552
8212
  twilioTokenWarningLogged = false;
7553
8213
  telnyxSigWarningLogged = false;
7554
8214
  metricsStore;
@@ -7566,12 +8226,14 @@ var EmbeddedServer = class {
7566
8226
  activeConnections = /* @__PURE__ */ new Set();
7567
8227
  activeCallIds = /* @__PURE__ */ new Map();
7568
8228
  /**
7569
- * Per-call AMD result callback set by ``Patter.call()`` for the most
7570
- * recent outbound call. Public so ``client.ts`` can populate it after
7571
- * server start. Cleared after firing once per call to avoid leaking
7572
- * across calls.
8229
+ * Per-call AMD result callbacks keyed by CallSid / call_control_id.
8230
+ * Public so ``client.ts`` can register a callback per outbound call.
8231
+ * The Map slot is deleted after the callback fires once preventing
8232
+ * cross-call misfires when multiple concurrent outbound calls are in
8233
+ * flight (single-slot was a race condition: the last registered callback
8234
+ * would win for every in-flight AMD result).
7573
8235
  */
7574
- onMachineDetection;
8236
+ onMachineDetectionByCallSid = /* @__PURE__ */ new Map();
7575
8237
  /**
7576
8238
  * Pre-warm first-message audio accessor wired by ``Patter.serve()``.
7577
8239
  * The per-call StreamHandler invokes this with its ``callId`` at the
@@ -7692,6 +8354,42 @@ var EmbeddedServer = class {
7692
8354
  this.completions.clear();
7693
8355
  this.amdClass.clear();
7694
8356
  }
8357
+ /**
8358
+ * Decide whether this server is reachable beyond loopback (127.0.0.1).
8359
+ *
8360
+ * The dashboard serves call transcripts and metadata (PII), so before
8361
+ * mounting it unauthenticated we must know whether anyone off-host can
8362
+ * reach the port. Signals (in order):
8363
+ *
8364
+ * (a)+(b) — a public webhook URL. ``client.ts`` resolves
8365
+ * ``config.webhookUrl`` to the live hostname for every serve path:
8366
+ * a cloudflared quick-tunnel host, a {@link StaticTunnel} hostname,
8367
+ * or an explicit ``webhookUrl``. A tunnel directive (signal a) and a
8368
+ * public webhook URL (signal b) therefore both surface here as a
8369
+ * non-loopback, non-private webhook host. This is the case that
8370
+ * matters for tunnels — the whole port (dashboard included) is
8371
+ * published on a public ``*.trycloudflare.com`` URL.
8372
+ *
8373
+ * (c) — an EXPLICIT non-loopback bind override via ``PATTER_BIND_HOST``.
8374
+ * Node's ``http.Server.listen(port, host)`` defaults to 127.0.0.1
8375
+ * here (see ``start()``), so plain local dev is never flagged; only
8376
+ * an operator who set ``PATTER_BIND_HOST`` to e.g. ``0.0.0.0`` is.
8377
+ *
8378
+ * Only loopback webhook hosts (127.0.0.0/8, localhost, ::1) are treated as
8379
+ * not-exposed. RFC1918 / LAN hosts ARE exposure — they are reachable by
8380
+ * other machines on the network — matching the Python SDK's gate.
8381
+ */
8382
+ isExposed() {
8383
+ const bindOverride = process.env.PATTER_BIND_HOST;
8384
+ if (bindOverride && !isLoopbackHost(bindOverride)) {
8385
+ return true;
8386
+ }
8387
+ const host = extractHost(this.config.webhookUrl ?? "");
8388
+ if (host && !isLoopbackHost(host)) {
8389
+ return true;
8390
+ }
8391
+ return false;
8392
+ }
7695
8393
  /** Bind HTTP + WebSocket listeners on `port`, mount carrier webhooks and dashboard routes. */
7696
8394
  async start(port = 8e3) {
7697
8395
  const webhookUrlPattern = /^[a-zA-Z0-9][a-zA-Z0-9.\-]+[a-zA-Z0-9]$/;
@@ -7727,6 +8425,9 @@ var EmbeddedServer = class {
7727
8425
  }
7728
8426
  next();
7729
8427
  });
8428
+ req.on("error", (err) => {
8429
+ next(err);
8430
+ });
7730
8431
  } else {
7731
8432
  next();
7732
8433
  }
@@ -7737,8 +8438,25 @@ var EmbeddedServer = class {
7737
8438
  res.json({ status: "ok", mode: "local" });
7738
8439
  });
7739
8440
  if (this.dashboard) {
7740
- mountDashboard(app, this.metricsStore, this.dashboardToken);
7741
- mountApi(app, this.metricsStore, this.dashboardToken);
8441
+ const exposed = this.isExposed();
8442
+ if (this.dashboardToken) {
8443
+ this.effectiveDashboardToken = this.dashboardToken;
8444
+ } else if (exposed && !this.allowInsecureDashboard) {
8445
+ this.effectiveDashboardToken = crypto5.randomUUID();
8446
+ getLogger().warn(
8447
+ `Dashboard is reachable beyond 127.0.0.1 without a configured token; protecting it with an auto-generated token. Open: http://127.0.0.1:${port}/?token=${this.effectiveDashboardToken} Set dashboardToken for a stable token, or allowInsecureDashboard=true to serve it open.`
8448
+ );
8449
+ } else if (exposed && this.allowInsecureDashboard) {
8450
+ this.effectiveDashboardToken = "";
8451
+ getLogger().warn(
8452
+ "Dashboard served WITHOUT authentication on a publicly-reachable bind (allowInsecureDashboard=true). Call transcripts and metadata are exposed to anyone who can reach this URL."
8453
+ );
8454
+ } else {
8455
+ this.effectiveDashboardToken = "";
8456
+ }
8457
+ mountDashboard(app, this.metricsStore, this.effectiveDashboardToken);
8458
+ mountApi(app, this.metricsStore, this.effectiveDashboardToken);
8459
+ this.dashboardMounted = true;
7742
8460
  }
7743
8461
  app.post("/webhooks/twilio/status", (req, res) => {
7744
8462
  if (this.config.twilioToken) {
@@ -7824,8 +8542,9 @@ var EmbeddedServer = class {
7824
8542
  if (callSid) {
7825
8543
  this.amdClass.set(callSid, classifyTwilioAmd(answeredBy));
7826
8544
  }
7827
- const cb = this.onMachineDetection;
8545
+ const cb = callSid ? this.onMachineDetectionByCallSid.get(callSid) : void 0;
7828
8546
  if (cb && callSid) {
8547
+ this.onMachineDetectionByCallSid.delete(callSid);
7829
8548
  try {
7830
8549
  await cb({
7831
8550
  call_id: callSid,
@@ -7952,8 +8671,9 @@ var EmbeddedServer = class {
7952
8671
  if (amdCallId) {
7953
8672
  this.amdClass.set(amdCallId, classifyTelnyxAmd(amdResult));
7954
8673
  }
7955
- const cbTx = this.onMachineDetection;
8674
+ const cbTx = amdCallId ? this.onMachineDetectionByCallSid.get(amdCallId) : void 0;
7956
8675
  if (cbTx && amdCallId) {
8676
+ this.onMachineDetectionByCallSid.delete(amdCallId);
7957
8677
  try {
7958
8678
  await cbTx({
7959
8679
  call_id: amdCallId,
@@ -8121,8 +8841,13 @@ var EmbeddedServer = class {
8121
8841
  getLogger().info(`AMD result for ${sanitizeLogValue(callUuid)}: ${sanitizeLogValue(amdRaw)}`);
8122
8842
  const classification = classifyPlivoAmd(amdRaw);
8123
8843
  if (callUuid) this.amdClass.set(callUuid, classification);
8124
- const cb = this.onMachineDetection;
8844
+ let cbKey = callUuid && this.onMachineDetectionByCallSid.has(callUuid) ? callUuid : void 0;
8845
+ if (cbKey === void 0 && this.onMachineDetectionByCallSid.size === 1) {
8846
+ cbKey = this.onMachineDetectionByCallSid.keys().next().value;
8847
+ }
8848
+ const cb = cbKey !== void 0 ? this.onMachineDetectionByCallSid.get(cbKey) : void 0;
8125
8849
  if (cb && callUuid) {
8850
+ if (cbKey !== void 0) this.onMachineDetectionByCallSid.delete(cbKey);
8126
8851
  try {
8127
8852
  await cb({
8128
8853
  call_id: callUuid,
@@ -8203,27 +8928,34 @@ var EmbeddedServer = class {
8203
8928
  this.handleTwilioStream(ws, url);
8204
8929
  }
8205
8930
  });
8206
- await new Promise((resolve2) => {
8931
+ await new Promise((resolve2, reject) => {
8207
8932
  const bindHost = process.env.PATTER_BIND_HOST ?? "127.0.0.1";
8933
+ this.server.once("error", reject);
8208
8934
  this.server.listen(port, bindHost, () => {
8935
+ this.server.off("error", reject);
8209
8936
  getLogger().info(`Server on port ${port}`);
8210
8937
  getLogger().info(`Webhook: https://${this.config.webhookUrl}`);
8211
8938
  getLogger().info(`Phone: ${this.config.phoneNumber}`);
8212
8939
  const model = this.agent.model ?? "";
8213
- if (model && model !== "gpt-4o-mini-realtime-preview" && model.includes("realtime")) {
8940
+ const calibrated = ["gpt-realtime-mini", "gpt-4o-mini-realtime-preview"];
8941
+ if (model && !calibrated.includes(model) && model.includes("realtime")) {
8214
8942
  getLogger().warn(
8215
- `Agent uses "${sanitizeLogValue(model)}" but DEFAULT_PRICING.openai_realtime is calibrated for "gpt-4o-mini-realtime-preview". Pass Patter({ pricing: { openai_realtime: {...} } }) to set rates for this model, otherwise the dashboard cost display will under-report.`
8943
+ `Agent uses "${sanitizeLogValue(model)}" but DEFAULT_PRICING.openai_realtime is calibrated for the default Realtime models (gpt-realtime-mini / gpt-4o-mini-realtime-preview). Pass Patter({ pricing: { openai_realtime: {...} } }) to set rates for this model, otherwise the dashboard cost display will under-report.`
8216
8944
  );
8217
8945
  }
8218
- if (this.dashboard) {
8219
- console.log("\n\u2500\u2500\u2500\u2500 Dashboard \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500");
8220
- getLogger().info(`URL: http://127.0.0.1:${port}/`);
8221
- if (!this.dashboardToken) {
8946
+ if (this.dashboard && this.dashboardMounted) {
8947
+ getLogger().info("\u2500\u2500\u2500\u2500 Dashboard \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500");
8948
+ if (this.effectiveDashboardToken) {
8949
+ getLogger().info(
8950
+ `URL: http://127.0.0.1:${port}/?token=${this.effectiveDashboardToken}`
8951
+ );
8952
+ } else {
8953
+ getLogger().info(`URL: http://127.0.0.1:${port}/`);
8222
8954
  getLogger().warn(
8223
8955
  "Dashboard is enabled without authentication. Set dashboardToken to protect call data. This is safe for local development but should not be exposed on a public network."
8224
8956
  );
8225
8957
  }
8226
- console.log("\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n");
8958
+ getLogger().info("\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500");
8227
8959
  }
8228
8960
  resolve2();
8229
8961
  });
@@ -8299,7 +9031,7 @@ var EmbeddedServer = class {
8299
9031
  onMessage: this.onMessage,
8300
9032
  onMetrics: wrappedMetrics,
8301
9033
  recording: this.recording,
8302
- buildAIAdapter: (resolvedPrompt) => buildAIAdapter(this.config, this.agent, resolvedPrompt),
9034
+ buildAIAdapter: (resolvedPrompt, toolsOverride) => buildAIAdapter(this.config, this.agent, resolvedPrompt, toolsOverride),
8303
9035
  sanitizeVariables,
8304
9036
  resolveVariables,
8305
9037
  popPrewarmAudio: this.popPrewarmAudio,
@@ -8573,17 +9305,18 @@ var EmbeddedServer = class {
8573
9305
  }
8574
9306
  if (this.activeConnections.size > 0) {
8575
9307
  getLogger().info(`Waiting for ${this.activeConnections.size} active connection(s) to close...`);
8576
- await Promise.race([
8577
- new Promise((resolve2) => {
8578
- const checkInterval = setInterval(() => {
8579
- if (this.activeConnections.size === 0) {
8580
- clearInterval(checkInterval);
8581
- resolve2();
8582
- }
8583
- }, 100);
8584
- }),
8585
- new Promise((resolve2) => setTimeout(resolve2, GRACEFUL_SHUTDOWN_TIMEOUT_MS))
8586
- ]);
9308
+ let checkInterval;
9309
+ const drainPromise = new Promise((resolve2) => {
9310
+ checkInterval = setInterval(() => {
9311
+ if (this.activeConnections.size === 0) {
9312
+ clearInterval(checkInterval);
9313
+ resolve2();
9314
+ }
9315
+ }, 100);
9316
+ });
9317
+ const timeoutPromise = new Promise((resolve2) => setTimeout(resolve2, GRACEFUL_SHUTDOWN_TIMEOUT_MS));
9318
+ await Promise.race([drainPromise, timeoutPromise]);
9319
+ clearInterval(checkInterval);
8587
9320
  }
8588
9321
  if (this.activeConnections.size > 0) {
8589
9322
  getLogger().info(`Force-closing ${this.activeConnections.size} remaining connection(s)`);
@@ -8630,10 +9363,13 @@ var CircuitBreakerRegistry = class {
8630
9363
  if (s.state === CircuitBreakerState.OPEN) {
8631
9364
  if (this.clock() - s.openedAt >= this.cooldownMs) {
8632
9365
  s.state = CircuitBreakerState.HALF_OPEN;
9366
+ s.probeInFlight = true;
8633
9367
  return true;
8634
9368
  }
8635
9369
  return false;
8636
9370
  }
9371
+ if (s.probeInFlight) return false;
9372
+ s.probeInFlight = true;
8637
9373
  return true;
8638
9374
  }
8639
9375
  /** Mark a successful execution. Resets the breaker to CLOSED. */
@@ -8643,19 +9379,21 @@ var CircuitBreakerRegistry = class {
8643
9379
  s.state = CircuitBreakerState.CLOSED;
8644
9380
  s.consecutiveFailures = 0;
8645
9381
  s.openedAt = 0;
9382
+ s.probeInFlight = false;
8646
9383
  }
8647
9384
  /** Mark a failed execution; trips OPEN once threshold is reached. */
8648
9385
  recordFailure(toolName) {
8649
9386
  if (this.threshold <= 0) return;
8650
9387
  let s = this.state.get(toolName);
8651
9388
  if (!s) {
8652
- s = { state: CircuitBreakerState.CLOSED, consecutiveFailures: 0, openedAt: 0 };
9389
+ s = { state: CircuitBreakerState.CLOSED, consecutiveFailures: 0, openedAt: 0, probeInFlight: false };
8653
9390
  this.state.set(toolName, s);
8654
9391
  }
8655
9392
  s.consecutiveFailures += 1;
8656
9393
  if (s.consecutiveFailures >= this.threshold) {
8657
9394
  s.state = CircuitBreakerState.OPEN;
8658
9395
  s.openedAt = this.clock();
9396
+ s.probeInFlight = false;
8659
9397
  }
8660
9398
  }
8661
9399
  /**
@@ -8680,7 +9418,18 @@ var CircuitBreakerRegistry = class {
8680
9418
  var DEFAULT_TOOL_MAX_RETRIES = 2;
8681
9419
  var DEFAULT_TOOL_RETRY_DELAY_MS = 500;
8682
9420
  var DEFAULT_TOOL_TIMEOUT_MS = 1e4;
9421
+ var MAX_TOOL_TIMEOUT_MS = 3e5;
8683
9422
  var TOOL_MAX_RESPONSE_BYTES = 1 * 1024 * 1024;
9423
+ var ToolTimeoutError = class extends Error {
9424
+ constructor(message) {
9425
+ super(message);
9426
+ this.name = "ToolTimeoutError";
9427
+ }
9428
+ };
9429
+ function resolveToolTimeoutMs(toolTimeoutMs, defaultMs) {
9430
+ if (toolTimeoutMs === void 0) return defaultMs;
9431
+ return Math.max(100, Math.min(toolTimeoutMs, MAX_TOOL_TIMEOUT_MS));
9432
+ }
8684
9433
  async function invokeHandler(handler, args, callContext, onProgress) {
8685
9434
  const invoked = handler(args, callContext);
8686
9435
  if (invoked && typeof invoked === "object" && typeof invoked[Symbol.asyncIterator] === "function" && typeof invoked.next === "function") {
@@ -8740,15 +9489,41 @@ var DefaultToolExecutor = class {
8740
9489
  retry_after_ms: cooldown
8741
9490
  });
8742
9491
  }
9492
+ const effectiveTimeoutMs = resolveToolTimeoutMs(
9493
+ toolDef.timeoutMs,
9494
+ this.requestTimeoutMs
9495
+ );
8743
9496
  if (toolDef.handler) {
8744
9497
  const totalAttempts = this.maxRetries + 1;
8745
9498
  let lastErr = null;
8746
9499
  for (let attempt = 0; attempt < totalAttempts; attempt++) {
9500
+ let timeoutTimer;
8747
9501
  try {
8748
- const result = await invokeHandler(toolDef.handler, args, callContext, onProgress);
9502
+ const handlerPromise = invokeHandler(toolDef.handler, args, callContext, onProgress);
9503
+ const result = await Promise.race([
9504
+ handlerPromise,
9505
+ new Promise((_, reject) => {
9506
+ timeoutTimer = setTimeout(
9507
+ () => reject(
9508
+ new ToolTimeoutError(
9509
+ `Tool handler '${toolDef.name}' timed out after ${effectiveTimeoutMs}ms`
9510
+ )
9511
+ ),
9512
+ effectiveTimeoutMs
9513
+ );
9514
+ })
9515
+ ]);
8749
9516
  this.breaker.recordSuccess(toolDef.name);
8750
9517
  return result;
8751
9518
  } catch (e) {
9519
+ if (e instanceof ToolTimeoutError) {
9520
+ getLogger().error(String(e));
9521
+ this.breaker.recordFailure(toolDef.name);
9522
+ return JSON.stringify({
9523
+ error: String(e),
9524
+ fallback: true
9525
+ });
9526
+ }
8752
9527
  lastErr = e;
8753
9528
  if (attempt < totalAttempts - 1) {
8754
9529
  getLogger().warn(
@@ -8756,6 +9531,8 @@ var DefaultToolExecutor = class {
8756
9531
  );
8757
9532
  await new Promise((r) => setTimeout(r, backoffDelayMs(this.retryDelayMs, attempt)));
8758
9533
  }
9534
+ } finally {
9535
+ if (timeoutTimer !== void 0) clearTimeout(timeoutTimer);
8759
9536
  }
8760
9537
  }
8761
9538
  this.breaker.recordFailure(toolDef.name);
@@ -8792,7 +9569,10 @@ var DefaultToolExecutor = class {
8792
9569
  ...callContext,
8793
9570
  attempt: attempt + 1
8794
9571
  }),
8795
- signal: AbortSignal.timeout(this.requestTimeoutMs)
9572
+ // Use per-tool timeout when set, otherwise fall back to
9573
+ // the executor-level default. Mirrors Python's per-request
9574
+ // ``timeout=`` override on httpx.AsyncClient.post().
9575
+ signal: AbortSignal.timeout(effectiveTimeoutMs)
8796
9576
  });
8797
9577
  if (!resp.ok) throw new Error(`HTTP ${resp.status}`);
8798
9578
  const result = JSON.stringify(await resp.json());
@@ -8942,7 +9722,7 @@ var OpenAILLMProvider = class {
8942
9722
  body.tools = tools;
8943
9723
  }
8944
9724
  const signal = mergeAbortSignals(opts?.signal, AbortSignal.timeout(3e4));
8945
- const response = await fetch("https://api.openai.com/v1/chat/completions", {
9725
+ const response = await fetch(`${this.baseUrl}/chat/completions`, {
8946
9726
  method: "POST",
8947
9727
  headers: {
8948
9728
  "Content-Type": "application/json",
@@ -8962,50 +9742,55 @@ var OpenAILLMProvider = class {
8962
9742
  if (!reader) return;
8963
9743
  const decoder = new TextDecoder();
8964
9744
  let buffer = "";
8965
- while (true) {
8966
- const { done, value } = await reader.read();
8967
- if (done) break;
8968
- buffer += decoder.decode(value, { stream: true });
8969
- const lines = buffer.split("\n");
8970
- buffer = lines.pop() || "";
8971
- for (const line of lines) {
8972
- const trimmed = line.trim();
8973
- if (!trimmed || !trimmed.startsWith("data: ")) continue;
8974
- const data = trimmed.slice(6);
8975
- if (data === "[DONE]") continue;
8976
- let chunk;
8977
- try {
8978
- chunk = JSON.parse(data);
8979
- } catch {
8980
- continue;
8981
- }
8982
- if (chunk.usage) {
8983
- const cached = chunk.usage.prompt_tokens_details?.cached_tokens ?? 0;
8984
- const uncachedInput = Math.max(0, (chunk.usage.prompt_tokens ?? 0) - cached);
8985
- yield {
8986
- type: "usage",
8987
- inputTokens: uncachedInput,
8988
- outputTokens: chunk.usage.completion_tokens,
8989
- cacheReadInputTokens: cached
8990
- };
8991
- }
8992
- const delta = chunk.choices?.[0]?.delta;
8993
- if (!delta) continue;
8994
- if (delta.content) {
8995
- yield { type: "text", content: delta.content };
8996
- }
8997
- if (delta.tool_calls) {
8998
- for (const tc of delta.tool_calls) {
9745
+ try {
9746
+ while (true) {
9747
+ const { done, value } = await reader.read();
9748
+ if (done) break;
9749
+ buffer += decoder.decode(value, { stream: true });
9750
+ const lines = buffer.split("\n");
9751
+ buffer = lines.pop() || "";
9752
+ for (const line of lines) {
9753
+ const trimmed = line.trim();
9754
+ if (!trimmed || !trimmed.startsWith("data: ")) continue;
9755
+ const data = trimmed.slice(6);
9756
+ if (data === "[DONE]") continue;
9757
+ let chunk;
9758
+ try {
9759
+ chunk = JSON.parse(data);
9760
+ } catch {
9761
+ continue;
9762
+ }
9763
+ if (chunk.usage) {
9764
+ const cached = chunk.usage.prompt_tokens_details?.cached_tokens ?? 0;
9765
+ const uncachedInput = Math.max(0, (chunk.usage.prompt_tokens ?? 0) - cached);
8999
9766
  yield {
9000
- type: "tool_call",
9001
- index: tc.index,
9002
- id: tc.id,
9003
- name: tc.function?.name,
9004
- arguments: tc.function?.arguments
9767
+ type: "usage",
9768
+ inputTokens: uncachedInput,
9769
+ outputTokens: chunk.usage.completion_tokens,
9770
+ cacheReadInputTokens: cached
9005
9771
  };
9006
9772
  }
9773
+ const delta = chunk.choices?.[0]?.delta;
9774
+ if (!delta) continue;
9775
+ if (delta.content) {
9776
+ yield { type: "text", content: delta.content };
9777
+ }
9778
+ if (delta.tool_calls) {
9779
+ for (const tc of delta.tool_calls) {
9780
+ yield {
9781
+ type: "tool_call",
9782
+ index: tc.index,
9783
+ id: tc.id,
9784
+ name: tc.function?.name,
9785
+ arguments: tc.function?.arguments
9786
+ };
9787
+ }
9788
+ }
9007
9789
  }
9008
9790
  }
9791
+ } finally {
9792
+ reader.cancel().catch(() => {
9793
+ });
9009
9794
  }
9010
9795
  }
9011
9796
  };
@@ -9116,12 +9901,14 @@ ${systemPrompt}` : DEFAULT_PHONE_PREAMBLE;
9116
9901
  const hasAfterLlmResponse = Boolean(hookExecutor?.hasAfterLlmResponse() && hookCtx);
9117
9902
  const hasAfterLlmChunk = Boolean(hookExecutor?.hasAfterLlmChunk());
9118
9903
  const allEmittedText = [];
9904
+ const callId = callContext.call_id;
9905
+ const streamOpts = typeof callId === "string" && callId.length > 0 ? { ...opts, callId } : opts;
9119
9906
  for (let iter = 0; iter < maxIterations; iter++) {
9120
9907
  const toolCallsAccumulated = /* @__PURE__ */ new Map();
9121
9908
  const textParts = [];
9122
9909
  let hasToolCalls = false;
9123
9910
  let usageChunkReceived = false;
9124
- for await (const chunk of this.provider.stream(messages, this.openaiTools, opts)) {
9911
+ for await (const chunk of this.provider.stream(messages, this.openaiTools, streamOpts)) {
9125
9912
  if (chunk.type === "text" && chunk.content) {
9126
9913
  const content = hasAfterLlmChunk && hookExecutor ? hookExecutor.runAfterLlmChunk(chunk.content) : chunk.content;
9127
9914
  textParts.push(content);
@@ -9139,7 +9926,7 @@ ${systemPrompt}` : DEFAULT_PHONE_PREAMBLE;
9139
9926
  chunk.inputTokens ?? 0,
9140
9927
  chunk.outputTokens ?? 0,
9141
9928
  chunk.cacheReadInputTokens ?? 0,
9142
- chunk.cacheCreationInputTokens ?? 0
9929
+ chunk.cacheWriteInputTokens ?? 0
9143
9930
  );
9144
9931
  } else if (chunk.type === "tool_call") {
9145
9932
  hasToolCalls = true;
@@ -9368,12 +10155,12 @@ var TestSession = class {
9368
10155
  }
9369
10156
  continue;
9370
10157
  }
9371
- conversationHistory.push({
9372
- role: "user",
9373
- text: userInput,
9374
- timestamp: Date.now()
9375
- });
9376
10158
  if (onMessage) {
10159
+ conversationHistory.push({
10160
+ role: "user",
10161
+ text: userInput,
10162
+ timestamp: Date.now()
10163
+ });
9377
10164
  try {
9378
10165
  const responseText = await onMessage({
9379
10166
  text: userInput,
@@ -9403,6 +10190,11 @@ var TestSession = class {
9403
10190
  }
9404
10191
  log.info("");
9405
10192
  const responseText = parts.join("");
10193
+ conversationHistory.push({
10194
+ role: "user",
10195
+ text: userInput,
10196
+ timestamp: Date.now()
10197
+ });
9406
10198
  if (responseText) {
9407
10199
  conversationHistory.push({
9408
10200
  role: "assistant",
@@ -9434,6 +10226,7 @@ var TestSession = class {
9434
10226
  export {
9435
10227
  ErrorCode,
9436
10228
  PatterError,
10229
+ PatterConfigError,
9437
10230
  PatterConnectionError,
9438
10231
  AuthenticationError,
9439
10232
  ProvisionError,
@@ -9477,6 +10270,8 @@ export {
9477
10270
  mergeAbortSignals,
9478
10271
  OpenAILLMProvider,
9479
10272
  LLMLoop,
10273
+ openclawConsult,
10274
+ openclawPostCallNotifier,
9480
10275
  DEFAULT_MIN_SENTENCE_LEN,
9481
10276
  SentenceChunker,
9482
10277
  PipelineHookExecutor,