getpatter 0.5.3 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,23 +1,66 @@
1
1
  import {
2
2
  getLogger
3
- } from "./chunk-VJVDG4V5.mjs";
3
+ } from "./chunk-MVOQFAEO.mjs";
4
4
  import {
5
- __require
6
- } from "./chunk-QHHBUCMT.mjs";
5
+ __dirname,
6
+ __require,
7
+ init_esm_shims
8
+ } from "./chunk-N565J3CF.mjs";
7
9
 
8
10
  // src/test-mode.ts
11
+ init_esm_shims();
9
12
  import { createInterface } from "readline";
10
13
 
14
+ // src/llm-loop.ts
15
+ init_esm_shims();
16
+
11
17
  // src/server.ts
18
+ init_esm_shims();
12
19
  import crypto4 from "crypto";
13
20
  import express from "express";
14
21
  import { createServer } from "http";
15
22
  import { WebSocketServer } from "ws";
16
23
 
17
24
  // src/providers/openai-realtime.ts
25
+ init_esm_shims();
18
26
  import WebSocket from "ws";
27
+ var OpenAIRealtimeAudioFormat = {
28
+ G711_ULAW: "g711_ulaw",
29
+ G711_ALAW: "g711_alaw",
30
+ PCM16: "pcm16"
31
+ };
32
+ var OpenAIRealtimeModel = {
33
+ GPT_REALTIME: "gpt-realtime",
34
+ GPT_REALTIME_2: "gpt-realtime-2",
35
+ GPT_REALTIME_MINI: "gpt-realtime-mini",
36
+ GPT_4O_REALTIME_PREVIEW: "gpt-4o-realtime-preview",
37
+ GPT_4O_MINI_REALTIME_PREVIEW: "gpt-4o-mini-realtime-preview"
38
+ };
39
+ var OpenAIVoice = {
40
+ ALLOY: "alloy",
41
+ ASH: "ash",
42
+ BALLAD: "ballad",
43
+ CORAL: "coral",
44
+ ECHO: "echo",
45
+ FABLE: "fable",
46
+ NOVA: "nova",
47
+ ONYX: "onyx",
48
+ SAGE: "sage",
49
+ SHIMMER: "shimmer",
50
+ VERSE: "verse"
51
+ };
52
+ var OpenAITranscriptionModel = {
53
+ WHISPER_1: "whisper-1",
54
+ GPT_4O_TRANSCRIBE: "gpt-4o-transcribe",
55
+ GPT_4O_MINI_TRANSCRIBE: "gpt-4o-mini-transcribe",
56
+ GPT_REALTIME_WHISPER: "gpt-realtime-whisper"
57
+ };
58
+ var OpenAIRealtimeVADType = {
59
+ SERVER_VAD: "server_vad",
60
+ SEMANTIC_VAD: "semantic_vad"
61
+ };
19
62
  var OpenAIRealtimeAdapter = class {
20
- constructor(apiKey, model = "gpt-realtime-mini", voice = "alloy", instructions = "", tools, audioFormat = "g711_ulaw", options = {}) {
63
+ constructor(apiKey, model = OpenAIRealtimeModel.GPT_REALTIME_MINI, voice = OpenAIVoice.ALLOY, instructions = "", tools, audioFormat = OpenAIRealtimeAudioFormat.G711_ULAW, options = {}) {
21
64
  this.apiKey = apiKey;
22
65
  this.model = model;
23
66
  this.voice = voice;
@@ -26,6 +69,12 @@ var OpenAIRealtimeAdapter = class {
26
69
  this.audioFormat = audioFormat;
27
70
  this.options = options;
28
71
  }
72
+ apiKey;
73
+ model;
74
+ voice;
75
+ instructions;
76
+ tools;
77
+ audioFormat;
29
78
  ws = null;
30
79
  eventCallbacks = /* @__PURE__ */ new Set();
31
80
  messageListenerAttached = false;
@@ -34,7 +83,17 @@ var OpenAIRealtimeAdapter = class {
34
83
  // barge-in (see ``cancelResponse``) — matches the Python adapter.
35
84
  currentResponseItemId = null;
36
85
  currentResponseAudioMs = 0;
86
+ // Wall-clock timestamp (Date.now()) of the first ``response.audio.delta``
87
+ // received since the current response item started. ``cancelResponse``
88
+ // uses this to bound ``audio_end_ms`` to what the caller could plausibly
89
+ // have heard — generated audio frequently arrives 5-10x real-time, so
90
+ // ``audio_end_ms`` driven purely by the per-chunk byte counter overshoots
91
+ // reality and leaves phantom assistant text on the conversation. The
92
+ // wall-clock cap corresponds to the maximum playback that real-time TTS
93
+ // could have produced, which is what the user actually heard.
94
+ currentResponseFirstAudioAt = null;
37
95
  options;
96
+ /** Open the Realtime WebSocket and apply the session configuration. */
38
97
  async connect() {
39
98
  const url = `wss://api.openai.com/v1/realtime?model=${encodeURIComponent(this.model)}`;
40
99
  this.ws = new WebSocket(url, {
@@ -63,12 +122,14 @@ var OpenAIRealtimeAdapter = class {
63
122
  voice: this.voice,
64
123
  instructions: this.instructions || "You are a helpful voice assistant. Be concise.",
65
124
  turn_detection: {
66
- type: this.options.vadType ?? "server_vad",
125
+ type: this.options.vadType ?? OpenAIRealtimeVADType.SERVER_VAD,
67
126
  threshold: 0.5,
68
127
  prefix_padding_ms: 300,
69
128
  silence_duration_ms: this.options.silenceDurationMs ?? 300
70
129
  },
71
- input_audio_transcription: { model: this.options.inputAudioTranscriptionModel ?? "whisper-1" }
130
+ input_audio_transcription: {
131
+ model: this.options.inputAudioTranscriptionModel ?? OpenAITranscriptionModel.WHISPER_1
132
+ }
72
133
  };
73
134
  if (this.options.temperature !== void 0) config.temperature = this.options.temperature;
74
135
  if (this.options.maxResponseOutputTokens !== void 0) {
@@ -76,13 +137,22 @@ var OpenAIRealtimeAdapter = class {
76
137
  }
77
138
  if (this.options.modalities !== void 0) config.modalities = this.options.modalities;
78
139
  if (this.options.toolChoice !== void 0) config.tool_choice = this.options.toolChoice;
140
+ if (this.options.reasoningEffort !== void 0) {
141
+ config.reasoning = { effort: this.options.reasoningEffort };
142
+ }
79
143
  if (this.tools?.length) {
80
- config.tools = this.tools.map((t) => ({
81
- type: "function",
82
- name: t.name,
83
- description: t.description,
84
- parameters: t.parameters
85
- }));
144
+ config.tools = this.tools.map((t) => {
145
+ const def = {
146
+ type: "function",
147
+ name: t.name,
148
+ description: t.description,
149
+ parameters: t.parameters
150
+ };
151
+ if (t.strict === true) {
152
+ def.strict = true;
153
+ }
154
+ return def;
155
+ });
86
156
  }
87
157
  ws.send(JSON.stringify({ type: "session.update", session: config }));
88
158
  } else if (msg.type === "session.updated") {
@@ -124,6 +194,7 @@ var OpenAIRealtimeAdapter = class {
124
194
  }, 2e4);
125
195
  this.ensureMessageListener();
126
196
  }
197
+ /** Append a base64-encoded audio chunk to the realtime input buffer. */
127
198
  sendAudio(mulawAudio) {
128
199
  if (!this.ws || this.ws.readyState !== WebSocket.OPEN) return;
129
200
  this.ws.send(JSON.stringify({ type: "input_audio_buffer.append", audio: mulawAudio.toString("base64") }));
@@ -140,6 +211,7 @@ var OpenAIRealtimeAdapter = class {
140
211
  this.eventCallbacks.add(callback);
141
212
  this.ensureMessageListener();
142
213
  }
214
+ /** Remove a previously registered {@link onEvent} callback. */
143
215
  offEvent(callback) {
144
216
  this.eventCallbacks.delete(callback);
145
217
  }
@@ -166,6 +238,9 @@ var OpenAIRealtimeAdapter = class {
166
238
  if (t === "response.audio.delta") {
167
239
  const buf = Buffer.from(data.delta ?? "", "base64");
168
240
  this.currentResponseAudioMs += estimateAudioMs(buf, this.audioFormat);
241
+ if (this.currentResponseFirstAudioAt === null) {
242
+ this.currentResponseFirstAudioAt = Date.now();
243
+ }
169
244
  dispatch("audio", buf);
170
245
  } else if (t === "response.audio_transcript.delta") {
171
246
  dispatch("transcript_output", data.delta);
@@ -174,6 +249,7 @@ var OpenAIRealtimeAdapter = class {
174
249
  if (itemId) {
175
250
  this.currentResponseItemId = itemId;
176
251
  this.currentResponseAudioMs = 0;
252
+ this.currentResponseFirstAudioAt = null;
177
253
  }
178
254
  } else if (t === "input_audio_buffer.speech_started") {
179
255
  dispatch("speech_started", null);
@@ -186,6 +262,7 @@ var OpenAIRealtimeAdapter = class {
186
262
  } else if (t === "response.done") {
187
263
  this.currentResponseItemId = null;
188
264
  this.currentResponseAudioMs = 0;
265
+ this.currentResponseFirstAudioAt = null;
189
266
  dispatch("response_done", data.response ?? null);
190
267
  } else if (t === "error") {
191
268
  dispatch("error", data.error);
@@ -204,22 +281,44 @@ var OpenAIRealtimeAdapter = class {
204
281
  dispatch("error", { type: "socket_error", message: err?.message ?? String(err) });
205
282
  });
206
283
  }
284
+ /** Truncate the in-flight assistant turn and cancel the active response.
285
+ *
286
+ * ``audio_end_ms`` MUST reflect what the caller actually heard, not what
287
+ * the server generated. OpenAI streams audio at 5-10x real-time, so the
288
+ * byte-derived counter overstates playback whenever the consumer cleared
289
+ * its playout buffer (e.g. ``send_clear``) before the audio reached the
290
+ * speaker. We bound the truncate point by wall-clock time since the first
291
+ * chunk of this response — that's the physical maximum a 1x real-time
292
+ * playback could have produced. Without this cap, OpenAI keeps the full
293
+ * generated assistant text on the transcript, and the model replays /
294
+ * resumes from it on the next turn — manifesting as re-greetings and
295
+ * mid-sentence fragments after a barge-in storm.
296
+ */
207
297
  cancelResponse() {
208
298
  if (!this.ws) return;
209
299
  if (this.currentResponseItemId) {
300
+ let audioEndMs = this.currentResponseAudioMs;
301
+ if (this.currentResponseFirstAudioAt !== null) {
302
+ const elapsedMs = Date.now() - this.currentResponseFirstAudioAt;
303
+ audioEndMs = Math.min(audioEndMs, Math.max(elapsedMs, 0));
304
+ }
210
305
  try {
211
306
  this.ws.send(JSON.stringify({
212
307
  type: "conversation.item.truncate",
213
308
  item_id: this.currentResponseItemId,
214
309
  content_index: 0,
215
- audio_end_ms: this.currentResponseAudioMs
310
+ audio_end_ms: audioEndMs
216
311
  }));
217
312
  } catch (err) {
218
313
  getLogger().debug?.(`conversation.item.truncate failed: ${String(err)}`);
219
314
  }
220
315
  }
221
316
  this.ws.send(JSON.stringify({ type: "response.cancel" }));
317
+ this.currentResponseItemId = null;
318
+ this.currentResponseAudioMs = 0;
319
+ this.currentResponseFirstAudioAt = null;
222
320
  }
321
+ /** Inject a user text turn and request a new response. */
223
322
  async sendText(text) {
224
323
  this.ws?.send(JSON.stringify({
225
324
  type: "conversation.item.create",
@@ -227,6 +326,30 @@ var OpenAIRealtimeAdapter = class {
227
326
  }));
228
327
  this.ws?.send(JSON.stringify({ type: "response.create" }));
229
328
  }
329
+ /**
330
+ * Make the AI speak ``text`` as its opening line.
331
+ *
332
+ * Triggers ``response.create`` with explicit ``instructions`` that force
333
+ * the model to render ``text`` verbatim as its first audio utterance.
334
+ * This is the correct semantics for ``Agent.firstMessage`` per its
335
+ * docstring ("What the AI says when the callee answers").
336
+ *
337
+ * Without this, ``sendText(firstMessage)`` would inject ``text`` as
338
+ * ``role: user`` and the AI would *reply* to its own greeting, producing
339
+ * role-confused openings (e.g. a receptionist agent responding "I'd like
340
+ * to schedule a haircut" because it took its own first_message as a
341
+ * customer cue).
342
+ */
343
+ async sendFirstMessage(text) {
344
+ this.ws?.send(JSON.stringify({
345
+ type: "response.create",
346
+ response: {
347
+ modalities: ["audio", "text"],
348
+ instructions: `Say exactly the following sentence as your first turn and nothing else: "${text}"`
349
+ }
350
+ }));
351
+ }
352
+ /** Submit a tool/function-call result and request the next response. */
230
353
  async sendFunctionResult(callId, result) {
231
354
  this.ws?.send(JSON.stringify({
232
355
  type: "conversation.item.create",
@@ -234,6 +357,7 @@ var OpenAIRealtimeAdapter = class {
234
357
  }));
235
358
  this.ws?.send(JSON.stringify({ type: "response.create" }));
236
359
  }
360
+ /** Stop the heartbeat, drop listeners, and close the Realtime WebSocket. */
237
361
  close() {
238
362
  if (this.heartbeat) {
239
363
  clearInterval(this.heartbeat);
@@ -247,14 +371,16 @@ var OpenAIRealtimeAdapter = class {
247
371
  };
248
372
  function estimateAudioMs(chunk, format) {
249
373
  if (chunk.length === 0) return 0;
250
- if (format === "g711_ulaw" || format === "g711_alaw") return Math.floor(chunk.length / 8);
251
- if (format === "pcm16") {
374
+ if (format === OpenAIRealtimeAudioFormat.G711_ULAW || format === OpenAIRealtimeAudioFormat.G711_ALAW)
375
+ return Math.floor(chunk.length / 8);
376
+ if (format === OpenAIRealtimeAudioFormat.PCM16) {
252
377
  return Math.floor(chunk.length / 48);
253
378
  }
254
379
  return 0;
255
380
  }
256
381
 
257
382
  // src/providers/elevenlabs-convai.ts
383
+ init_esm_shims();
258
384
  import WebSocket2 from "ws";
259
385
  var ELEVENLABS_CONVAI_URL = "wss://api.elevenlabs.io/v1/convai/conversation";
260
386
  var ELEVENLABS_SIGNED_URL = "https://api.elevenlabs.io/v1/convai/conversation/get-signed-url";
@@ -366,6 +492,7 @@ var ElevenLabsConvAIAdapter = class _ElevenLabsConvAIAdapter {
366
492
  }
367
493
  return data.signed_url;
368
494
  }
495
+ /** Open the ConvAI WebSocket and send the conversation init payload. */
369
496
  async connect() {
370
497
  let wsUrl;
371
498
  let wsOptions;
@@ -533,6 +660,7 @@ var ElevenLabsConvAIAdapter = class _ElevenLabsConvAIAdapter {
533
660
  return;
534
661
  }
535
662
  }
663
+ /** Send a caller-side audio chunk to ConvAI as a base64 `user_audio_chunk`. */
536
664
  sendAudio(audioBytes) {
537
665
  if (!this.ws || this.ws.readyState !== WebSocket2.OPEN) return;
538
666
  this.ws.send(
@@ -541,9 +669,11 @@ var ElevenLabsConvAIAdapter = class _ElevenLabsConvAIAdapter {
541
669
  })
542
670
  );
543
671
  }
672
+ /** Register the event callback that receives ConvAI server messages. */
544
673
  onEvent(callback) {
545
674
  this.eventCallback = callback;
546
675
  }
676
+ /** Close the ConvAI WebSocket and release the event callback. */
547
677
  async close() {
548
678
  this.clearSilenceTimer();
549
679
  if (!this.ws) {
@@ -582,6 +712,7 @@ var ElevenLabsConvAIAdapter = class _ElevenLabsConvAIAdapter {
582
712
  };
583
713
 
584
714
  // src/provider-factory.ts
715
+ init_esm_shims();
585
716
  async function createSTT(agent) {
586
717
  return agent.stt ?? null;
587
718
  }
@@ -590,44 +721,166 @@ async function createTTS(agent) {
590
721
  }
591
722
 
592
723
  // src/pricing.ts
724
+ init_esm_shims();
725
+ var PricingUnit = {
726
+ MINUTE: "minute",
727
+ THOUSAND_CHARS: "1k_chars",
728
+ TOKEN: "token"
729
+ };
730
+ function resolveProviderRates(providerConfig, model) {
731
+ if (!providerConfig) return { unit: "" };
732
+ const { models, ...base } = providerConfig;
733
+ if (!model || !models) return { ...base };
734
+ let override = models[model];
735
+ if (!override) {
736
+ let bestKey = "";
737
+ for (const key of Object.keys(models)) {
738
+ if (model.startsWith(key) && key.length > bestKey.length) {
739
+ bestKey = key;
740
+ }
741
+ }
742
+ if (bestKey) override = models[bestKey];
743
+ }
744
+ if (override) {
745
+ return { ...base, ...override };
746
+ }
747
+ return { ...base };
748
+ }
593
749
  var DEFAULT_PRICING = {
594
- // STT — per minute of audio processed
595
- // Deepgram Nova-3 streaming (monolingual) — the default model Patter ships.
596
- // The previous $0.0043/min was the batch rate; streaming is $0.0077/min per
597
- // deepgram.com/pricing. For multilingual Nova-3 ($0.0092/min) override.
598
- deepgram: { unit: "minute", price: 77e-4 },
599
- whisper: { unit: "minute", price: 6e-3 },
750
+ // STT — per minute of audio processed.
751
+ deepgram: {
752
+ unit: PricingUnit.MINUTE,
753
+ // Default = Nova-3 streaming monolingual ($0.0077/min). Previous $0.0043
754
+ // was the batch rate; streaming is ~80% more expensive.
755
+ price: 77e-4,
756
+ models: {
757
+ "nova-3": { price: 77e-4 },
758
+ "nova-3-multilingual": { price: 92e-4 },
759
+ "nova-2": { price: 58e-4 },
760
+ nova: { price: 43e-4 },
761
+ "whisper-large": { price: 48e-4 },
762
+ "whisper-medium": { price: 48e-4 }
763
+ }
764
+ },
765
+ whisper: {
766
+ unit: PricingUnit.MINUTE,
767
+ // Default = whisper-1 REST ($0.006/min).
768
+ price: 6e-3,
769
+ models: {
770
+ "whisper-1": { price: 6e-3 },
771
+ "gpt-4o-transcribe": { price: 6e-3 },
772
+ "gpt-4o-mini-transcribe": { price: 3e-3 },
773
+ // Streaming Whisper variant for Realtime sessions.
774
+ "gpt-realtime-whisper": { price: 0.017 }
775
+ }
776
+ },
777
+ // OpenAI standalone transcription endpoint (separate provider_key from
778
+ // ``whisper`` so the dashboard can distinguish them).
779
+ openai_transcribe: {
780
+ unit: PricingUnit.MINUTE,
781
+ price: 6e-3,
782
+ models: {
783
+ "gpt-4o-transcribe": { price: 6e-3 },
784
+ "gpt-4o-mini-transcribe": { price: 3e-3 },
785
+ "whisper-1": { price: 6e-3 }
786
+ }
787
+ },
600
788
  // AssemblyAI Universal-Streaming — $0.15/hr = $0.0025/min
601
- assemblyai: { unit: "minute", price: 25e-4 },
789
+ assemblyai: { unit: PricingUnit.MINUTE, price: 25e-4 },
602
790
  // Cartesia ink-whisper streaming STT — ~$0.15/hr on usage plans
603
- cartesia_stt: { unit: "minute", price: 25e-4 },
791
+ cartesia_stt: { unit: PricingUnit.MINUTE, price: 25e-4 },
604
792
  // Soniox real-time STT — $0.12/hr = $0.002/min
605
- soniox: { unit: "minute", price: 2e-3 },
793
+ soniox: { unit: PricingUnit.MINUTE, price: 2e-3 },
606
794
  // Speechmatics Pro tier — $0.24/hr = $0.0040/min (new users land here).
607
795
  // Previous $0.0173 default reflected a legacy Standard tier that was
608
796
  // retired; users were being over-billed ~4.3x.
609
- speechmatics: { unit: "minute", price: 4e-3 },
797
+ speechmatics: { unit: PricingUnit.MINUTE, price: 4e-3 },
610
798
  // TTS — per 1,000 characters synthesized.
611
- // ElevenLabs default model is eleven_flash_v2_5 billed at $0.06/1k via the
612
- // direct API. The previous $0.18 matched only the Creator plan overage.
613
- elevenlabs: { unit: "1k_chars", price: 0.06 },
614
- openai_tts: { unit: "1k_chars", price: 0.015 },
615
- openai_tts_hd: { unit: "1k_chars", price: 0.03 },
616
- // Cartesia Sonic TTS — ~1 credit/char, effective $0.030/1k chars on usage plans
617
- cartesia_tts: { unit: "1k_chars", price: 0.03 },
618
- // Rime mist v2 — $0.030/1k chars pay-as-you-go
619
- rime: { unit: "1k_chars", price: 0.03 },
620
- // LMNT aurora/blizzard — $0.050/1k chars Indie overage
621
- lmnt: { unit: "1k_chars", price: 0.05 },
622
- // OpenAI Realtime — per token.
623
- // Calibrated for gpt-4o-mini-realtime-preview (the Patter default):
624
- // audio input $10 / M -> 0.00001 per token
625
- // audio output $20 / M -> 0.00002 per token
626
- // text input $0.60/ M -> 0.0000006 per token
627
- // text output $2.40/ M -> 0.0000024 per token
628
- // For gpt-4o-realtime-preview multiply by ~10, for gpt-realtime by ~3.
799
+ elevenlabs: {
800
+ unit: PricingUnit.THOUSAND_CHARS,
801
+ // Default = eleven_flash_v2_5 (Patter's default model) at $0.06/1k.
802
+ price: 0.06,
803
+ models: {
804
+ eleven_flash_v2_5: { price: 0.06 },
805
+ eleven_turbo_v2_5: { price: 0.05 },
806
+ eleven_multilingual_v2: { price: 0.18 },
807
+ eleven_monolingual_v1: { price: 0.18 },
808
+ eleven_v3: { price: 0.3 }
809
+ }
810
+ },
811
+ // ElevenLabs WebSocket streaming TTS shares pricing with REST.
812
+ elevenlabs_ws: {
813
+ unit: PricingUnit.THOUSAND_CHARS,
814
+ price: 0.06,
815
+ models: {
816
+ eleven_flash_v2_5: { price: 0.06 },
817
+ eleven_turbo_v2_5: { price: 0.05 },
818
+ eleven_multilingual_v2: { price: 0.18 },
819
+ eleven_v3: { price: 0.3 }
820
+ }
821
+ },
822
+ openai_tts: {
823
+ unit: PricingUnit.THOUSAND_CHARS,
824
+ // Default = tts-1 ($0.015/1k chars).
825
+ price: 0.015,
826
+ models: {
827
+ "tts-1": { price: 0.015 },
828
+ "tts-1-hd": { price: 0.03 },
829
+ // gpt-4o-mini-tts is billed by tokens upstream but published per
830
+ // 1k chars equivalent here for parity with the rest of the table.
831
+ "gpt-4o-mini-tts": { price: 0.012 }
832
+ }
833
+ },
834
+ // Legacy alias preserved for backward compat with users who set
835
+ // provider_key="openai_tts_hd" in their own adapters.
836
+ openai_tts_hd: { unit: PricingUnit.THOUSAND_CHARS, price: 0.03 },
837
+ cartesia_tts: {
838
+ unit: PricingUnit.THOUSAND_CHARS,
839
+ // Default = Sonic-2 (current Cartesia flagship) at ~$0.030/1k chars.
840
+ price: 0.03,
841
+ models: {
842
+ "sonic-2": { price: 0.03 },
843
+ "sonic-1": { price: 0.03 },
844
+ "sonic-english": { price: 0.03 },
845
+ "sonic-multilingual": { price: 0.03 }
846
+ }
847
+ },
848
+ rime: {
849
+ unit: PricingUnit.THOUSAND_CHARS,
850
+ // Default = mistv2 ($0.030/1k chars).
851
+ price: 0.03,
852
+ models: {
853
+ mistv2: { price: 0.03 },
854
+ mist: { price: 0.03 },
855
+ arcana: { price: 0.04 }
856
+ }
857
+ },
858
+ lmnt: {
859
+ unit: PricingUnit.THOUSAND_CHARS,
860
+ // Default = aurora ($0.050/1k chars).
861
+ price: 0.05,
862
+ models: {
863
+ aurora: { price: 0.05 },
864
+ blizzard: { price: 0.05 }
865
+ }
866
+ },
867
+ inworld: {
868
+ unit: PricingUnit.THOUSAND_CHARS,
869
+ // Default = inworld-tts-2 (placeholder rate — verify against tier).
870
+ price: 0.02,
871
+ models: {
872
+ "inworld-tts-2": { price: 0.02 },
873
+ "inworld-tts-1.5-max": { price: 0.025 },
874
+ "inworld-tts-1.5": { price: 0.025 }
875
+ }
876
+ },
877
+ // OpenAI Realtime — per token. Provider defaults match
878
+ // gpt-realtime-mini / gpt-4o-mini-realtime-preview (Patter's default).
879
+ // Per-model overrides under ``models`` are auto-resolved when the
880
+ // realtime adapter's model is threaded through ``calculateRealtimeCost``.
629
881
  openai_realtime: {
630
- unit: "token",
882
+ unit: PricingUnit.TOKEN,
883
+ // Default rates: gpt-realtime-mini / gpt-4o-mini-realtime-preview
631
884
  audio_input_per_token: 1e-5,
632
885
  audio_output_per_token: 2e-5,
633
886
  text_input_per_token: 6e-7,
@@ -636,47 +889,119 @@ var DEFAULT_PRICING = {
636
889
  // text cached $0.06/M = 10% of full. OpenAI bills the cached portion of
637
890
  // input_token_details.audio_tokens / text_tokens at these reduced rates.
638
891
  cached_audio_input_per_token: 3e-7,
639
- cached_text_input_per_token: 6e-8
892
+ cached_text_input_per_token: 6e-8,
893
+ models: {
894
+ // gpt-realtime (GA, August 2025): audio in $32/M, audio out $64/M,
895
+ // text in $4/M, text out $16/M, cached $0.40/M (audio + text).
896
+ "gpt-realtime": {
897
+ audio_input_per_token: 32e-6,
898
+ audio_output_per_token: 64e-6,
899
+ text_input_per_token: 4e-6,
900
+ text_output_per_token: 16e-6,
901
+ cached_audio_input_per_token: 4e-7,
902
+ cached_text_input_per_token: 4e-7
903
+ },
904
+ // gpt-realtime-2 (most-capable): audio in $32/M, audio out $64/M,
905
+ // text in $4/M, text out $24/M, cached $0.40/M (audio + text).
906
+ "gpt-realtime-2": {
907
+ audio_input_per_token: 32e-6,
908
+ audio_output_per_token: 64e-6,
909
+ text_input_per_token: 4e-6,
910
+ text_output_per_token: 24e-6,
911
+ cached_audio_input_per_token: 4e-7,
912
+ cached_text_input_per_token: 4e-7
913
+ },
914
+ // gpt-realtime-mini and gpt-4o-mini-realtime-preview share the
915
+ // provider defaults. Listed explicitly so tooling can introspect.
916
+ "gpt-realtime-mini": {
917
+ audio_input_per_token: 1e-5,
918
+ audio_output_per_token: 2e-5,
919
+ text_input_per_token: 6e-7,
920
+ text_output_per_token: 24e-7,
921
+ cached_audio_input_per_token: 3e-7,
922
+ cached_text_input_per_token: 6e-8
923
+ },
924
+ "gpt-4o-mini-realtime-preview": {
925
+ audio_input_per_token: 1e-5,
926
+ audio_output_per_token: 2e-5,
927
+ text_input_per_token: 6e-7,
928
+ text_output_per_token: 24e-7,
929
+ cached_audio_input_per_token: 3e-7,
930
+ cached_text_input_per_token: 6e-8
931
+ },
932
+ // gpt-4o-realtime-preview (legacy preview, ~10x mini for audio):
933
+ // audio in $100/M, audio out $200/M, text in $5/M, text out $20/M.
934
+ "gpt-4o-realtime-preview": {
935
+ audio_input_per_token: 1e-4,
936
+ audio_output_per_token: 2e-4,
937
+ text_input_per_token: 5e-6,
938
+ text_output_per_token: 2e-5,
939
+ cached_audio_input_per_token: 2e-6,
940
+ cached_text_input_per_token: 25e-7
941
+ }
942
+ }
640
943
  },
641
944
  // Telephony — per minute of call duration.
642
945
  // twilio default = US inbound local (the 99% case for voice agents receiving
643
946
  // calls on a local number). For US toll-free inbound ($0.022/min) or US
644
947
  // outbound local ($0.0140/min), override via Patter({ pricing: { twilio: {...} } }).
645
- twilio: { unit: "minute", price: 85e-4 },
646
- telnyx: { unit: "minute", price: 7e-3 }
948
+ twilio: { unit: PricingUnit.MINUTE, price: 85e-4 },
949
+ telnyx: { unit: PricingUnit.MINUTE, price: 7e-3 }
647
950
  };
951
+ function cloneProviderEntry(entry) {
952
+ const out = { ...entry };
953
+ if (entry.models) {
954
+ const models = {};
955
+ for (const [mk, mv] of Object.entries(entry.models)) {
956
+ models[mk] = { ...mv };
957
+ }
958
+ out.models = models;
959
+ }
960
+ return out;
961
+ }
648
962
  function mergePricing(overrides) {
649
963
  const merged = {};
650
964
  for (const [k, v] of Object.entries(DEFAULT_PRICING)) {
651
- merged[k] = { ...v };
965
+ merged[k] = cloneProviderEntry(v);
652
966
  }
653
967
  if (!overrides) return merged;
654
968
  for (const [provider2, values] of Object.entries(overrides)) {
655
- if (merged[provider2]) {
656
- merged[provider2] = { ...merged[provider2], ...values };
657
- } else {
658
- merged[provider2] = { ...values };
969
+ if (!merged[provider2]) {
970
+ merged[provider2] = cloneProviderEntry(values);
971
+ continue;
972
+ }
973
+ const target = merged[provider2];
974
+ for (const [k, v] of Object.entries(values)) {
975
+ if (k === "models" && v && typeof v === "object" && target.models && typeof target.models === "object") {
976
+ const mergedModels = { ...target.models };
977
+ for (const [mk, mv] of Object.entries(v)) {
978
+ mergedModels[mk] = { ...mv };
979
+ }
980
+ target.models = mergedModels;
981
+ } else {
982
+ target[k] = v;
983
+ }
659
984
  }
660
985
  }
661
986
  return merged;
662
987
  }
663
- function calculateSttCost(provider2, audioSeconds, pricing) {
664
- const config = pricing[provider2];
665
- if (!config || config.unit !== "minute") return 0;
666
- return audioSeconds / 60 * (config.price ?? 0);
988
+ function calculateSttCost(provider2, audioSeconds, pricing, model) {
989
+ const rates = resolveProviderRates(pricing[provider2], model);
990
+ if (rates.unit !== "minute") return 0;
991
+ return audioSeconds / 60 * (rates.price ?? 0);
667
992
  }
668
- function calculateTtsCost(provider2, characterCount, pricing) {
669
- const config = pricing[provider2];
670
- if (!config || config.unit !== "1k_chars") return 0;
671
- return characterCount / 1e3 * (config.price ?? 0);
993
+ function calculateTtsCost(provider2, characterCount, pricing, model) {
994
+ const rates = resolveProviderRates(pricing[provider2], model);
995
+ if (rates.unit !== "1k_chars") return 0;
996
+ return characterCount / 1e3 * (rates.price ?? 0);
672
997
  }
673
- function calculateRealtimeCost(usage, pricing) {
674
- const config = pricing.openai_realtime;
675
- if (!config || config.unit !== "token") return 0;
998
+ function calculateRealtimeCost(usage, pricing, model) {
999
+ const rates = resolveProviderRates(pricing.openai_realtime, model);
1000
+ if (rates.unit !== "token") return 0;
676
1001
  const input = usage.input_token_details ?? {};
677
1002
  const output = usage.output_token_details ?? {};
678
- const cachedAudioRate = config.cached_audio_input_per_token ?? config.audio_input_per_token ?? 0;
679
- const cachedTextRate = config.cached_text_input_per_token ?? config.text_input_per_token ?? 0;
1003
+ const cachedAudioRate = rates.cached_audio_input_per_token ?? rates.audio_input_per_token ?? 0;
1004
+ const cachedTextRate = rates.cached_text_input_per_token ?? rates.text_input_per_token ?? 0;
680
1005
  const totalAudioIn = input.audio_tokens ?? 0;
681
1006
  const totalTextIn = input.text_tokens ?? 0;
682
1007
  let cachedAudioIn;
@@ -695,25 +1020,25 @@ function calculateRealtimeCost(usage, pricing) {
695
1020
  cachedTextIn = 0;
696
1021
  }
697
1022
  let cost = 0;
698
- cost += (totalAudioIn - cachedAudioIn) * (config.audio_input_per_token ?? 0);
1023
+ cost += (totalAudioIn - cachedAudioIn) * (rates.audio_input_per_token ?? 0);
699
1024
  cost += cachedAudioIn * cachedAudioRate;
700
- cost += (totalTextIn - cachedTextIn) * (config.text_input_per_token ?? 0);
1025
+ cost += (totalTextIn - cachedTextIn) * (rates.text_input_per_token ?? 0);
701
1026
  cost += cachedTextIn * cachedTextRate;
702
- cost += (output.audio_tokens ?? 0) * (config.audio_output_per_token ?? 0);
703
- cost += (output.text_tokens ?? 0) * (config.text_output_per_token ?? 0);
1027
+ cost += (output.audio_tokens ?? 0) * (rates.audio_output_per_token ?? 0);
1028
+ cost += (output.text_tokens ?? 0) * (rates.text_output_per_token ?? 0);
704
1029
  return Math.max(0, cost);
705
1030
  }
706
- function calculateRealtimeCachedSavings(usage, pricing) {
707
- const config = pricing.openai_realtime;
708
- if (!config || config.unit !== "token") return 0;
1031
+ function calculateRealtimeCachedSavings(usage, pricing, model) {
1032
+ const rates = resolveProviderRates(pricing.openai_realtime, model);
1033
+ if (rates.unit !== "token") return 0;
709
1034
  const input = usage.input_token_details ?? {};
710
1035
  const cached = input.cached_tokens_details ?? {};
711
- const cachedAudioRate = config.cached_audio_input_per_token ?? config.audio_input_per_token ?? 0;
712
- const cachedTextRate = config.cached_text_input_per_token ?? config.text_input_per_token ?? 0;
1036
+ const cachedAudioRate = rates.cached_audio_input_per_token ?? rates.audio_input_per_token ?? 0;
1037
+ const cachedTextRate = rates.cached_text_input_per_token ?? rates.text_input_per_token ?? 0;
713
1038
  const cachedAudio = Math.min(cached.audio_tokens ?? 0, input.audio_tokens ?? 0);
714
1039
  const cachedText = Math.min(cached.text_tokens ?? 0, input.text_tokens ?? 0);
715
- const fullAudio = cachedAudio * (config.audio_input_per_token ?? 0);
716
- const fullText = cachedText * (config.text_input_per_token ?? 0);
1040
+ const fullAudio = cachedAudio * (rates.audio_input_per_token ?? 0);
1041
+ const fullText = cachedText * (rates.text_input_per_token ?? 0);
717
1042
  const discountedAudio = cachedAudio * cachedAudioRate;
718
1043
  const discountedText = cachedText * cachedTextRate;
719
1044
  return Math.max(0, fullAudio + fullText - (discountedAudio + discountedText));
@@ -745,14 +1070,32 @@ var llmPricing = {
745
1070
  "gemini-live-2.5-flash-native-audio": { input: 0.3, output: 2.5 }
746
1071
  },
747
1072
  groq: {
1073
+ // Rates as of 2026-05-08; verify against groq.com/pricing.
1074
+ // ``llama-3.3-70b-versatile`` is the Patter default for Groq. The
1075
+ // remaining models are reachable via ``model: "..."`` and were silently
1076
+ // billing $0 before this entry was added (silent under-billing).
748
1077
  "llama-3.3-70b-versatile": { input: 0.59, output: 0.79 },
749
- "llama-3.1-8b-instant": { input: 0.05, output: 0.08 }
1078
+ "llama-3.1-8b-instant": { input: 0.05, output: 0.08 },
1079
+ "llama-3.3-70b-specdec": { input: 0.59, output: 0.99 },
1080
+ "llama3-70b-8192": { input: 0.59, output: 0.79 },
1081
+ "llama3-8b-8192": { input: 0.05, output: 0.08 },
1082
+ "mixtral-8x7b-32768": { input: 0.27, output: 0.27 },
1083
+ "gemma2-9b-it": { input: 0.2, output: 0.2 }
750
1084
  },
751
1085
  cerebras: {
1086
+ // Rates as of 2026-05-08; verify against cerebras.net/inference.
1087
+ // ``gpt-oss-120b`` is the Patter default for Cerebras (set in 0.5.4).
1088
+ // On WSE-3 hardware every model size saturates the downstream TTS
1089
+ // consumption rate (~150-300 tok/sec), so the 120B price stays in line
1090
+ // with the 70B tier rather than scaling with weight count.
1091
+ "gpt-oss-120b": { input: 0.85, output: 1.2 },
1092
+ "llama3.1-8b": { input: 0.1, output: 0.2 },
752
1093
  "llama-3.3-70b": { input: 0.85, output: 1.2 },
753
- "qwen-3-32b": { input: 0.4, output: 0.8 }
1094
+ "qwen-3-32b": { input: 0.4, output: 0.8 },
1095
+ "qwen-3-235b-a22b-instruct-2507": { input: 1, output: 1.5 },
1096
+ "zai-glm-4.7": { input: 0.85, output: 1.2 }
754
1097
  },
755
- // OpenAI Chat Completions (non-Realtime) — mirrors sdk-py pricing table.
1098
+ // OpenAI Chat Completions (non-Realtime) — mirrors the Python SDK pricing table.
756
1099
  // Rates are per 1M tokens (USD), cache_read = cached input rate.
757
1100
  openai: {
758
1101
  "gpt-4o": { input: 2.5, output: 10, cache_read: 1.25 },
@@ -792,6 +1135,7 @@ function calculateTelephonyCost(provider2, durationSeconds, pricing) {
792
1135
  }
793
1136
 
794
1137
  // src/dashboard/store.ts
1138
+ init_esm_shims();
795
1139
  import { EventEmitter } from "events";
796
1140
  import * as fs from "fs";
797
1141
  import * as path from "path";
@@ -812,6 +1156,7 @@ var MetricsStore = class extends EventEmitter {
812
1156
  publish(eventType, data) {
813
1157
  this.emit("sse", { type: eventType, data });
814
1158
  }
1159
+ /** Mark a call as in-progress (creates the row if it does not yet exist). */
815
1160
  recordCallStart(data) {
816
1161
  const callId = data.call_id || "";
817
1162
  if (!callId) return;
@@ -909,6 +1254,7 @@ var MetricsStore = class extends EventEmitter {
909
1254
  }
910
1255
  this.publish("call_status", { call_id: callId, status, ...extra });
911
1256
  }
1257
+ /** Append a single conversation turn to an active call and broadcast it via SSE. */
912
1258
  recordTurn(data) {
913
1259
  const callId = data.call_id || "";
914
1260
  const turn = data.turn;
@@ -920,6 +1266,7 @@ var MetricsStore = class extends EventEmitter {
920
1266
  }
921
1267
  this.publish("turn_complete", { call_id: callId, turn });
922
1268
  }
1269
+ /** Move a call from active to completed and persist its final metrics. */
923
1270
  recordCallEnd(data, metrics) {
924
1271
  const callId = data.call_id || "";
925
1272
  if (!callId) return;
@@ -947,10 +1294,12 @@ var MetricsStore = class extends EventEmitter {
947
1294
  metrics: entry.metrics ?? null
948
1295
  });
949
1296
  }
1297
+ /** Return a window of completed calls in newest-first order. */
950
1298
  getCalls(limit = 50, offset = 0) {
951
1299
  const ordered = [...this.calls].reverse();
952
1300
  return ordered.slice(offset, offset + limit);
953
1301
  }
1302
+ /** Look up a completed call by id (newest match wins). */
954
1303
  getCall(callId) {
955
1304
  for (let i = this.calls.length - 1; i >= 0; i--) {
956
1305
  if (this.calls[i].call_id === callId) return this.calls[i];
@@ -961,9 +1310,11 @@ var MetricsStore = class extends EventEmitter {
961
1310
  getActive(callId) {
962
1311
  return this.activeCalls.get(callId);
963
1312
  }
1313
+ /** Return all currently active (not yet ended) calls. */
964
1314
  getActiveCalls() {
965
1315
  return Array.from(this.activeCalls.values());
966
1316
  }
1317
+ /** Compute summary statistics across the buffered call history. */
967
1318
  getAggregates() {
968
1319
  const totalCalls = this.calls.length;
969
1320
  if (totalCalls === 0) {
@@ -1015,6 +1366,7 @@ var MetricsStore = class extends EventEmitter {
1015
1366
  active_calls: this.activeCalls.size
1016
1367
  };
1017
1368
  }
1369
+ /** Return calls whose `started_at` falls within `[fromTs, toTs]` (Unix seconds). */
1018
1370
  getCallsInRange(fromTs = 0, toTs = 0) {
1019
1371
  return this.calls.filter((call) => {
1020
1372
  const started = call.started_at || 0;
@@ -1023,6 +1375,7 @@ var MetricsStore = class extends EventEmitter {
1023
1375
  return true;
1024
1376
  });
1025
1377
  }
1378
+ /** Number of completed calls currently in the ring buffer. */
1026
1379
  get callCount() {
1027
1380
  return this.calls.length;
1028
1381
  }
@@ -1123,7 +1476,11 @@ function parseTimestamp(raw) {
1123
1476
  return null;
1124
1477
  }
1125
1478
 
1479
+ // src/dashboard/routes.ts
1480
+ init_esm_shims();
1481
+
1126
1482
  // src/dashboard/auth.ts
1483
+ init_esm_shims();
1127
1484
  import crypto from "crypto";
1128
1485
  function timingSafeCompare(a, b) {
1129
1486
  const aBuf = Buffer.from(a);
@@ -1156,6 +1513,7 @@ function makeAuthMiddleware(token = "") {
1156
1513
  }
1157
1514
 
1158
1515
  // src/dashboard/export.ts
1516
+ init_esm_shims();
1159
1517
  function callsToCsv(calls) {
1160
1518
  const header = [
1161
1519
  "call_id",
@@ -1213,630 +1571,33 @@ function csvEscape(value) {
1213
1571
  }
1214
1572
 
1215
1573
  // src/dashboard/ui.ts
1216
- var DASHBOARD_HTML = `<!DOCTYPE html>
1217
- <html lang="en">
1218
- <head>
1219
- <meta charset="utf-8">
1220
- <meta name="viewport" content="width=device-width, initial-scale=1">
1221
- <title>Patter | Dashboard</title>
1222
- <link rel="icon" type="image/svg+xml" href="data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 1188 1773' fill='none'%3E%3Cstyle%3Epath%7Bstroke:%2309090b%7D@media(prefers-color-scheme:dark)%7Bpath%7Bstroke:%23e4e4e7%7D%7D%3C/style%3E%3Cpath d='M25 561L245 694M25 561V818M245 694V951M25 961V1218M25 1357V1614M245 1489V1747M245 1093V1351M942 823V1080M1161 955V1213M1162 555V812M942 422V679M669 585V843L787 913M942 25V282M1162 158V415M25 818L245 951M244 1094L464 962M25 961L143 890M244 1352L464 1219M942 823L1162 956M942 679L1162 812M721 811L942 679M669 842L724 809M669 586L724 553M1041 883L1162 812M245 1747L1161 1213M244 1490L942 1080M25 1357L142 1289M518 1071L942 823M721 555L942 422M942 422L1162 556M942 282L1162 415M942 25L1162 158M942 1080L1161 1213M25 1218L245 1351M25 961L245 1094M464 962L519 929M464 1219L519 1186V928L403 859M25 1357L245 1490M25 1614L245 1747M25 561L942 25M244 694L941 282M1043 484L1162 415M245 951L668 704' stroke-width='50' stroke-linecap='round'/%3E%3C/svg%3E">
1223
- <link rel="preconnect" href="https://fonts.googleapis.com">
1224
- <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
1225
- <link href="https://fonts.googleapis.com/css2?family=Instrument+Sans:wght@400;500;600;700&family=JetBrains+Mono:wght@400;500&display=swap" rel="stylesheet">
1226
- <style>
1227
- :root {
1228
- --bg: #fdfcfc;
1229
- --fg: #09090b;
1230
- --card: #ffffff;
1231
- --primary: #18181b;
1232
- --primary-fg: #fafafa;
1233
- --secondary: #f4f4f5;
1234
- --muted: #71717b;
1235
- --border: #e4e4e7;
1236
- --border-d: #d4d4d8;
1237
- --green: #22c55e;
1238
- --red: #ef4444;
1239
- --blue: #3b82f6;
1240
- --purple: #a78bfa;
1241
- --orange: #fb923c;
1242
- --yellow: #eab308;
1243
- --radius: 12px;
1244
- --font: 'Instrument Sans', ui-sans-serif, system-ui, sans-serif;
1245
- --mono: 'JetBrains Mono', ui-monospace, SFMono-Regular, Menlo, monospace;
1246
- --header-bg: #fff;
1247
- --assistant-bubble: #f0eeff;
1248
- }
1249
- @media (prefers-color-scheme: dark) {
1250
- :root {
1251
- --bg: #151518;
1252
- --fg: #e4e4e7;
1253
- --card: #1c1c21;
1254
- --primary: #e4e4e7;
1255
- --primary-fg: #18181b;
1256
- --secondary: #232329;
1257
- --muted: #8b8b95;
1258
- --border: #2c2c33;
1259
- --border-d: #3a3a44;
1260
- --green: #34d399;
1261
- --red: #f87171;
1262
- --blue: #60a5fa;
1263
- --purple: #c4b5fd;
1264
- --orange: #fdba74;
1265
- --yellow: #fbbf24;
1266
- --header-bg: #1a1a1f;
1267
- --assistant-bubble: #252230;
1268
- }
1269
- }
1270
- * { margin:0; padding:0; box-sizing:border-box; }
1271
- html { -webkit-font-smoothing: antialiased; }
1272
- body {
1273
- font-family: var(--font);
1274
- font-size: 15px;
1275
- line-height: 1.6;
1276
- color: var(--fg);
1277
- background: var(--bg);
1278
- min-height: 100vh;
1279
- }
1280
-
1281
- /* Header */
1282
- header {
1283
- position: sticky; top: 0; z-index: 100;
1284
- background: var(--header-bg);
1285
- border-bottom: 1px solid var(--border);
1286
- padding: 0 24px;
1287
- height: 56px;
1288
- display: flex; align-items: center; gap: 14px;
1289
- }
1290
- .logo {
1291
- display: flex; align-items: center; gap: 10px;
1292
- font-weight: 700; font-size: 18px; letter-spacing: -0.02em;
1293
- text-decoration: none; color: var(--fg);
1294
- }
1295
- .logo svg { width: 22px; height: 22px; }
1296
- .header-sep {
1297
- width: 1px; height: 20px; background: var(--border-d); margin: 0 2px;
1298
- }
1299
- .header-title {
1300
- font-size: 14px; font-weight: 500; color: var(--muted);
1301
- }
1302
- .badge-beta {
1303
- font-size: 10px; font-weight: 600; letter-spacing: 0.5px;
1304
- color: #e67e22; background: rgba(230,126,34,0.1);
1305
- border: 1px solid rgba(230,126,34,0.25);
1306
- padding: 2px 8px; border-radius: 100px; text-transform: uppercase;
1307
- }
1308
- .status {
1309
- margin-left: auto; font-size: 13px; color: var(--muted);
1310
- display: flex; align-items: center; gap: 6px;
1311
- }
1312
- .dot {
1313
- width: 7px; height: 7px; border-radius: 50%;
1314
- background: var(--green); display: inline-block;
1315
- }
1316
-
1317
- /* Layout */
1318
- .container { max-width: 1200px; margin: 0 auto; padding: 24px; }
1319
-
1320
- /* Stat cards */
1321
- .cards {
1322
- display: grid; grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
1323
- gap: 14px; margin-bottom: 28px;
1324
- }
1325
- .card {
1326
- background: var(--card);
1327
- border: 1px solid var(--border);
1328
- border-radius: var(--radius);
1329
- padding: 18px 20px;
1330
- }
1331
- .card .label {
1332
- font-size: 12px; color: var(--muted);
1333
- text-transform: uppercase; letter-spacing: 0.5px; font-weight: 500;
1334
- }
1335
- .card .value {
1336
- font-size: 28px; font-weight: 700; margin-top: 4px;
1337
- font-family: var(--mono); letter-spacing: -0.02em;
1338
- }
1339
- .card .sub { font-size: 12px; color: var(--muted); margin-top: 2px; }
1340
-
1341
- /* Tabs */
1342
- .nav-tabs {
1343
- display: flex; gap: 0; margin-bottom: 16px;
1344
- border-bottom: 1px solid var(--border);
1345
- }
1346
- .nav-tab {
1347
- padding: 10px 20px; font-size: 13px; font-weight: 500;
1348
- color: var(--muted); cursor: pointer;
1349
- border: none; background: none;
1350
- border-bottom: 2px solid transparent;
1351
- margin-bottom: -1px; font-family: var(--font);
1352
- transition: color .15s;
1353
- }
1354
- .nav-tab:hover { color: var(--fg); }
1355
- .nav-tab.active { color: var(--fg); border-bottom-color: var(--primary); }
1356
-
1357
- .tab-content { display: none; }
1358
- .tab-content.active { display: block; }
1359
-
1360
- /* Tables */
1361
- table {
1362
- width: 100%; border-collapse: collapse;
1363
- background: var(--card);
1364
- border: 1px solid var(--border);
1365
- border-radius: var(--radius);
1366
- overflow: hidden;
1367
- }
1368
- th {
1369
- text-align: left; font-size: 11px; text-transform: uppercase;
1370
- color: var(--muted); padding: 12px 16px;
1371
- border-bottom: 1px solid var(--border);
1372
- letter-spacing: 0.5px; font-weight: 600;
1373
- background: var(--secondary);
1374
- }
1375
- td {
1376
- padding: 12px 16px; border-bottom: 1px solid var(--border);
1377
- font-size: 13px;
1378
- }
1379
- tr:last-child td { border-bottom: none; }
1380
- tr.clickable { cursor: pointer; transition: background .1s; }
1381
- tr.clickable:hover { background: var(--secondary); }
1382
-
1383
- code {
1384
- font-family: var(--mono); font-size: 12px;
1385
- background: var(--secondary); padding: 2px 6px;
1386
- border-radius: 4px;
1387
- }
1388
-
1389
- /* Badges */
1390
- .badge {
1391
- display: inline-block; padding: 3px 10px; border-radius: 100px;
1392
- font-size: 11px; font-weight: 600;
1393
- }
1394
- .badge-active { background: rgba(34,197,94,0.1); color: #16a34a; }
1395
- .badge-ended { background: var(--secondary); color: var(--muted); }
1396
- .badge-pipeline { background: rgba(167,139,250,0.1); color: #7c3aed; }
1397
- .badge-realtime { background: rgba(59,130,246,0.1); color: #2563eb; }
1398
-
1399
- .cost { color: #16a34a; font-family: var(--mono); font-size: 13px; }
1400
- .latency { color: #ca8a04; font-family: var(--mono); font-size: 13px; }
1401
- @media (prefers-color-scheme: dark) {
1402
- .cost { color: var(--green); }
1403
- .latency { color: var(--yellow); }
1404
- code { background: var(--secondary); color: var(--fg); }
1405
- }
1406
- .empty {
1407
- text-align: center; padding: 48px; color: var(--muted);
1408
- font-size: 14px;
1409
- }
1410
-
1411
- /* Modal */
1412
- .modal-overlay {
1413
- display: none; position: fixed; inset: 0;
1414
- background: rgba(0,0,0,0.4); backdrop-filter: blur(6px);
1415
- z-index: 200;
1416
- justify-content: center; align-items: flex-start;
1417
- padding: 48px 20px; overflow-y: auto;
1418
- }
1419
- .modal-overlay.open { display: flex; }
1420
- .modal {
1421
- background: var(--card);
1422
- border: 1px solid var(--border);
1423
- border-radius: 16px;
1424
- max-width: 820px; width: 100%;
1425
- padding: 0;
1426
- box-shadow: 0 24px 64px rgba(0,0,0,0.12), 0 0 0 1px rgba(0,0,0,0.03);
1427
- overflow: hidden;
1428
- }
1429
- .modal-header {
1430
- display: flex; justify-content: space-between; align-items: center;
1431
- padding: 20px 28px;
1432
- border-bottom: 1px solid var(--border);
1433
- background: var(--bg);
1434
- }
1435
- .modal-header h2 { font-size: 15px; font-weight: 600; display: flex; align-items: center; gap: 10px; }
1436
- .modal-close {
1437
- background: none; border: 1px solid var(--border);
1438
- color: var(--muted); width: 30px; height: 30px;
1439
- border-radius: 8px; font-size: 16px; cursor: pointer;
1440
- display: flex; align-items: center; justify-content: center;
1441
- transition: all .15s;
1442
- }
1443
- .modal-close:hover { background: var(--secondary); color: var(--fg); }
1444
- .modal-body { padding: 24px 28px; }
1445
-
1446
- .detail-grid {
1447
- display: grid; grid-template-columns: 1fr 1fr;
1448
- gap: 14px; margin-bottom: 20px;
1449
- }
1450
- .detail-card {
1451
- background: var(--bg);
1452
- border: 1px solid var(--border);
1453
- border-radius: var(--radius); padding: 16px 18px;
1454
- }
1455
- .detail-card h3 {
1456
- font-size: 11px; color: var(--muted);
1457
- text-transform: uppercase; letter-spacing: 0.5px;
1458
- margin-bottom: 10px; font-weight: 600;
1459
- }
1460
- .detail-row {
1461
- display: flex; justify-content: space-between; align-items: baseline;
1462
- font-size: 13px; padding: 5px 0;
1463
- }
1464
- .detail-row .k { color: var(--muted); font-weight: 500; }
1465
- .detail-row span:last-child { font-weight: 500; text-align: right; }
1466
- .detail-row .mono { font-family: var(--mono); font-size: 12px; }
1467
- .detail-sep {
1468
- border-top: 1px solid var(--border); padding-top: 8px; margin-top: 6px;
1469
- }
1470
-
1471
- .transcript-box {
1472
- border: 1px solid var(--border);
1473
- border-radius: var(--radius);
1474
- padding: 16px; max-height: 340px; overflow-y: auto;
1475
- background: var(--bg);
1476
- }
1477
- .transcript-box .msg {
1478
- padding: 8px 12px; border-radius: 10px; font-size: 13px;
1479
- max-width: 85%; margin-bottom: 6px; line-height: 1.5;
1480
- }
1481
- .transcript-box .msg.user {
1482
- background: var(--secondary); margin-left: auto;
1483
- border-bottom-right-radius: 4px;
1484
- }
1485
- .transcript-box .msg.assistant {
1486
- background: var(--assistant-bubble); margin-right: auto;
1487
- border-bottom-left-radius: 4px;
1488
- }
1489
- .transcript-box .role {
1490
- font-weight: 600; font-size: 11px; text-transform: uppercase;
1491
- letter-spacing: 0.3px; display: block; margin-bottom: 2px;
1492
- }
1493
- .transcript-box .msg.user .role { color: var(--blue); }
1494
- .transcript-box .msg.assistant .role { color: #7c3aed; }
1495
-
1496
- /* Turn bars */
1497
- .turns-table { margin-top: 16px; }
1498
- .turns-table table { border: 1px solid var(--border); }
1499
- .bar-container { display: flex; height: 14px; border-radius: 4px; overflow: hidden; min-width: 120px; }
1500
- .bar-stt { background: var(--blue); }
1501
- .bar-llm { background: var(--purple); }
1502
- .bar-tts { background: var(--orange); }
1503
- </style>
1504
- </head>
1505
- <body>
1506
- <header>
1507
- <a href="/" class="logo">
1508
- <svg viewBox="0 0 1188 1773" fill="none" xmlns="http://www.w3.org/2000/svg">
1509
- <path d="M25 561L245 694M25 561V818M245 694V951M25 961V1218M25 1357V1614M245 1489V1747M245 1093V1351M942 823V1080M1161 955V1213M1162 555V812M942 422V679M669 585V843L787 913M942 25V282M1162 158V415M25 818L245 951M244 1094L464 962M25 961L143 890M244 1352L464 1219M942 823L1162 956M942 679L1162 812M721 811L942 679M669 842L724 809M669 586L724 553M1041 883L1162 812M245 1747L1161 1213M244 1490L942 1080M25 1357L142 1289M518 1071L942 823M721 555L942 422M942 422L1162 556M942 282L1162 415M942 25L1162 158M942 1080L1161 1213M25 1218L245 1351M25 961L245 1094M464 962L519 929M464 1219L519 1186V928L403 859M25 1357L245 1490M25 1614L245 1747M25 561L942 25M244 694L941 282M1043 484L1162 415M245 951L668 704" stroke="currentColor" stroke-width="50" stroke-linecap="round"/>
1510
- </svg>
1511
- Patter
1512
- </a>
1513
- <div class="header-sep"></div>
1514
- <span class="header-title">Dashboard</span>
1515
- <span class="badge-beta">Beta</span>
1516
- <div class="status"><span class="dot"></span> <span id="status-text">Listening</span></div>
1517
- </header>
1518
-
1519
- <div class="container">
1520
- <div class="cards">
1521
- <div class="card">
1522
- <div class="label">Total Calls</div>
1523
- <div class="value" id="stat-total">0</div>
1524
- <div class="sub"><span id="stat-active">0</span> active</div>
1525
- </div>
1526
- <div class="card">
1527
- <div class="label">Total Cost</div>
1528
- <div class="value cost" id="stat-cost">$0.00</div>
1529
- <div class="sub" id="stat-cost-breakdown">-</div>
1530
- </div>
1531
- <div class="card">
1532
- <div class="label">Avg Duration</div>
1533
- <div class="value" id="stat-duration">0s</div>
1534
- </div>
1535
- <div class="card">
1536
- <div class="label">Avg Latency</div>
1537
- <div class="value latency" id="stat-latency">0ms</div>
1538
- <div class="sub">end-to-end response</div>
1539
- </div>
1540
- </div>
1541
-
1542
- <div class="nav-tabs">
1543
- <button class="nav-tab active" data-tab="calls">Calls</button>
1544
- <button class="nav-tab" data-tab="active">Active</button>
1545
- </div>
1546
-
1547
- <div class="tab-content active" id="tab-calls">
1548
- <div class="section">
1549
- <table id="calls-table">
1550
- <thead>
1551
- <tr>
1552
- <th>Call ID</th><th>Direction</th><th>From / To</th>
1553
- <th>Duration</th><th>Mode</th><th>Cost</th><th>Avg Latency</th><th>Turns</th>
1554
- </tr>
1555
- </thead>
1556
- <tbody id="calls-body">
1557
- <tr><td colspan="8" class="empty">No calls yet. Waiting for incoming calls...</td></tr>
1558
- </tbody>
1559
- </table>
1560
- </div>
1561
- </div>
1562
-
1563
- <div class="tab-content" id="tab-active">
1564
- <div class="section">
1565
- <table>
1566
- <thead>
1567
- <tr><th>Call ID</th><th>Caller</th><th>Callee</th><th>Direction</th><th>Duration</th><th>Turns</th></tr>
1568
- </thead>
1569
- <tbody id="active-body">
1570
- <tr><td colspan="6" class="empty">No active calls</td></tr>
1571
- </tbody>
1572
- </table>
1573
- </div>
1574
- </div>
1575
- </div>
1576
-
1577
- <div class="modal-overlay" id="modal">
1578
- <div class="modal">
1579
- <div class="modal-header">
1580
- <h2 id="modal-title">Call Detail</h2>
1581
- <button class="modal-close" onclick="closeModal()">&times;</button>
1582
- </div>
1583
- <div class="modal-body" id="modal-body"></div>
1584
- </div>
1585
- </div>
1586
-
1587
- <script>
1588
- var _$ = function(s) { return document.querySelector(s); };
1589
- var _$$ = function(s) { return document.querySelectorAll(s); };
1590
-
1591
- _$$('.nav-tab').forEach(function(tab) {
1592
- tab.addEventListener('click', function() {
1593
- _$$('.nav-tab').forEach(function(t) { t.classList.remove('active'); });
1594
- _$$('.tab-content').forEach(function(t) { t.classList.remove('active'); });
1595
- tab.classList.add('active');
1596
- document.querySelector('#tab-'+tab.dataset.tab).classList.add('active');
1597
- });
1598
- });
1599
-
1600
- function esc(s) {
1601
- if (!s) return '';
1602
- return String(s).replace(/&/g,'&amp;').replace(/</g,'&lt;').replace(/>/g,'&gt;').replace(/"/g,'&quot;').replace(/'/g,'&#39;');
1603
- }
1604
- function fmtCost(v) { return v >= 0.01 ? '$'+v.toFixed(4) : v > 0 ? '$'+v.toFixed(6) : '$0.00'; }
1605
- function fmtMs(v) { return v != null && v >= 0 ? Math.round(v)+'ms' : '-'; }
1606
- function fmtDur(s) {
1607
- if (s == null || s < 0) return '-';
1608
- if (s < 60) return Math.round(s)+'s';
1609
- return Math.floor(s/60)+'m '+Math.round(s%60)+'s';
1610
- }
1611
- function shortId(id) { return id ? esc(id.length > 16 ? id.slice(0,8)+'...'+id.slice(-4) : id) : '-'; }
1612
-
1613
- function fetchJSON(url) {
1614
- return fetch(url).then(function(r) { return r.json(); });
1615
- }
1616
-
1617
- function refreshAggregates() {
1618
- return fetchJSON('/api/dashboard/aggregates').then(function(d) {
1619
- _$('#stat-total').textContent = d.total_calls;
1620
- _$('#stat-active').textContent = d.active_calls;
1621
- _$('#stat-cost').textContent = fmtCost(d.total_cost);
1622
- var cb = d.cost_breakdown;
1623
- _$('#stat-cost-breakdown').textContent =
1624
- 'STT '+fmtCost(cb.stt)+' | LLM '+fmtCost(cb.llm)+' | TTS '+fmtCost(cb.tts)+' | Tel '+fmtCost(cb.telephony);
1625
- _$('#stat-duration').textContent = fmtDur(d.avg_duration);
1626
- _$('#stat-latency').textContent = fmtMs(d.avg_latency_ms);
1627
- });
1628
- }
1629
-
1630
- function refreshCalls() {
1631
- return fetchJSON('/api/dashboard/calls?limit=50').then(function(calls) {
1632
- var body = _$('#calls-body');
1633
- if (!calls.length) {
1634
- body.innerHTML = '<tr><td colspan="8" class="empty">No calls yet. Waiting for incoming calls...</td></tr>';
1635
- return;
1636
- }
1637
- body.innerHTML = calls.map(function(c) {
1638
- var m = c.metrics || {};
1639
- var cost = m.cost || {};
1640
- var lat = m.latency_avg || {};
1641
- var mode = m.provider_mode || '-';
1642
- var turns = m.turns ? m.turns.length : 0;
1643
- var modeClass = mode === 'pipeline' ? 'badge-pipeline' : 'badge-realtime';
1644
- return '<tr class="clickable" onclick="showCall(\\''+esc(c.call_id)+'\\')">'+
1645
- '<td><code>'+shortId(c.call_id)+'</code></td>'+
1646
- '<td>'+(esc(c.direction) || '-')+'</td>'+
1647
- '<td>'+(esc(c.caller) || '-')+' &rarr; '+(esc(c.callee) || '-')+'</td>'+
1648
- '<td>'+fmtDur(m.duration_seconds)+'</td>'+
1649
- '<td><span class="badge '+modeClass+'">'+esc(mode)+'</span></td>'+
1650
- '<td class="cost">'+fmtCost(cost.total || 0)+'</td>'+
1651
- '<td class="latency">'+fmtMs(lat.total_ms || 0)+'</td>'+
1652
- '<td>'+turns+'</td></tr>';
1653
- }).join('');
1654
- });
1655
- }
1656
-
1657
- function refreshActive() {
1658
- return fetchJSON('/api/dashboard/active').then(function(active) {
1659
- var body = _$('#active-body');
1660
- if (!active.length) {
1661
- body.innerHTML = '<tr><td colspan="6" class="empty">No active calls</td></tr>';
1662
- return;
1663
- }
1664
- var now = Date.now() / 1000;
1665
- body.innerHTML = active.map(function(c) {
1666
- var dur = c.started_at ? Math.round(now - c.started_at) : 0;
1667
- var turns = c.turns ? c.turns.length : 0;
1668
- return '<tr>'+
1669
- '<td><code>'+shortId(c.call_id)+'</code></td>'+
1670
- '<td>'+(esc(c.caller) || '-')+'</td>'+
1671
- '<td>'+(esc(c.callee) || '-')+'</td>'+
1672
- '<td>'+(esc(c.direction) || '-')+'</td>'+
1673
- '<td data-started="'+(c.started_at || 0)+'">'+fmtDur(dur)+'</td>'+
1674
- '<td>'+turns+'</td></tr>';
1675
- }).join('');
1676
- });
1677
- }
1678
-
1679
- function showCall(callId) {
1680
- fetchJSON('/api/dashboard/calls/'+encodeURIComponent(callId)).then(function(c) {
1681
- if (c.error) return;
1682
- var m = c.metrics || {};
1683
- var cost = m.cost || {};
1684
- var latAvg = m.latency_avg || {};
1685
- var latP95 = m.latency_p95 || {};
1686
- var turns = m.turns || [];
1687
-
1688
- var modeLabel = (m.provider_mode || '').replace(/_/g, ' ');
1689
- var modeBadgeClass = (m.provider_mode || '').indexOf('pipeline') !== -1 ? 'badge-pipeline' : 'badge-realtime';
1690
- _$('#modal-title').innerHTML = 'Call <code>'+shortId(c.call_id)+'</code> <span class="badge '+modeBadgeClass+'" style="font-size:10px">'+esc(modeLabel)+'</span>';
1691
-
1692
- var isRealtime = (m.provider_mode || '').indexOf('realtime') !== -1;
1693
-
1694
- var html = '<div class="detail-grid">'+
1695
- '<div class="detail-card">'+
1696
- '<h3>Overview</h3>'+
1697
- '<div class="detail-row"><span class="k">Direction</span><span>'+(esc(c.direction) || '-')+'</span></div>'+
1698
- '<div class="detail-row"><span class="k">From</span><span class="mono">'+(esc(c.caller) || '-')+'</span></div>'+
1699
- '<div class="detail-row"><span class="k">To</span><span class="mono">'+(esc(c.callee) || '-')+'</span></div>'+
1700
- '<div class="detail-row"><span class="k">Duration</span><span style="font-weight:600">'+fmtDur(m.duration_seconds)+'</span></div>'+
1701
- (isRealtime ? '' :
1702
- '<div class="detail-row"><span class="k">STT</span><span>'+(esc(m.stt_provider) || '-')+'</span></div>'+
1703
- '<div class="detail-row"><span class="k">TTS</span><span>'+(esc(m.tts_provider) || '-')+'</span></div>'+
1704
- '<div class="detail-row"><span class="k">LLM</span><span>'+(esc(m.llm_provider) || '-')+'</span></div>'
1705
- )+
1706
- '<div class="detail-row"><span class="k">Telephony</span><span>'+(esc(m.telephony_provider) || '-')+'</span></div>'+
1707
- '</div>'+
1708
- '<div class="detail-card">'+
1709
- '<h3>Cost</h3>'+
1710
- (isRealtime ?
1711
- '<div class="detail-row"><span class="k">OpenAI</span><span class="cost">'+fmtCost(cost.llm || 0)+'</span></div>' :
1712
- '<div class="detail-row"><span class="k">STT</span><span class="cost">'+fmtCost(cost.stt || 0)+'</span></div>'+
1713
- '<div class="detail-row"><span class="k">LLM</span><span class="cost">'+fmtCost(cost.llm || 0)+'</span></div>'+
1714
- '<div class="detail-row"><span class="k">TTS</span><span class="cost">'+fmtCost(cost.tts || 0)+'</span></div>'
1715
- )+
1716
- '<div class="detail-row"><span class="k">Telephony</span><span class="cost">'+fmtCost(cost.telephony || 0)+'</span></div>'+
1717
- '<div class="detail-row detail-sep">'+
1718
- '<span class="k" style="font-weight:600">Total</span><span class="cost" style="font-weight:700;font-size:14px">'+fmtCost(cost.total || 0)+'</span>'+
1719
- '</div>'+
1720
- '<h3 style="margin-top:16px">Latency <span style="font-weight:400;text-transform:none;letter-spacing:0;color:var(--muted)">(avg / p95)</span></h3>'+
1721
- (isRealtime ? '' :
1722
- '<div class="detail-row"><span class="k">STT</span><span class="latency">'+fmtMs(latAvg.stt_ms)+' / '+fmtMs(latP95.stt_ms)+'</span></div>'+
1723
- '<div class="detail-row"><span class="k">LLM</span><span class="latency">'+fmtMs(latAvg.llm_ms)+' / '+fmtMs(latP95.llm_ms)+'</span></div>'+
1724
- '<div class="detail-row"><span class="k">TTS</span><span class="latency">'+fmtMs(latAvg.tts_ms)+' / '+fmtMs(latP95.tts_ms)+'</span></div>'
1725
- )+
1726
- '<div class="detail-row"><span class="k">'+(isRealtime ? 'End-to-end' : 'Total')+'</span><span class="latency" style="font-weight:700;font-size:14px">'+fmtMs(latAvg.total_ms)+' / '+fmtMs(latP95.total_ms)+'</span></div>'+
1727
- '</div></div>';
1728
-
1729
- if (turns.length) {
1730
- var maxMs = Math.max.apply(null, turns.map(function(t) {
1731
- var l = t.latency || {};
1732
- return (l.stt_ms||0) + (l.llm_ms||0) + (l.tts_ms||0) + (l.total_ms||0);
1733
- }).concat([1]));
1734
- html += '<div class="detail-card turns-table"><h3>Turns ('+turns.length+')</h3>'+
1735
- '<table><thead><tr><th>#</th><th>User</th><th>Agent</th><th>Latency</th><th>Breakdown</th></tr></thead><tbody>';
1736
- turns.forEach(function(t, i) {
1737
- var l = t.latency || {};
1738
- var total = l.total_ms || ((l.stt_ms||0) + (l.llm_ms||0) + (l.tts_ms||0));
1739
- var scale = total > 0 ? 120 / maxMs : 0;
1740
- var sttW = (l.stt_ms||0) * scale;
1741
- var llmW = (l.llm_ms||0) * scale;
1742
- var ttsW = (l.tts_ms||0) * scale;
1743
- var totalW = total > 0 && sttW === 0 && llmW === 0 && ttsW === 0 ? total * scale : 0;
1744
- html += '<tr>'+
1745
- '<td>'+(t.turn_index !== undefined ? t.turn_index : i)+'</td>'+
1746
- '<td title="'+esc(t.user_text||'')+'">'+esc((t.user_text||'').slice(0,40))+((t.user_text||'').length>40?'...':'')+'</td>'+
1747
- '<td title="'+esc(t.agent_text||'')+'">'+esc((t.agent_text||'').slice(0,40))+((t.agent_text||'').length>40?'...':'')+'</td>'+
1748
- '<td class="latency">'+fmtMs(total)+'</td>'+
1749
- '<td><div class="bar-container">'+
1750
- (sttW > 0 ? '<div class="bar-stt" style="width:'+sttW+'px" title="STT '+fmtMs(l.stt_ms)+'"></div>' : '')+
1751
- (llmW > 0 ? '<div class="bar-llm" style="width:'+llmW+'px" title="LLM '+fmtMs(l.llm_ms)+'"></div>' : '')+
1752
- (ttsW > 0 ? '<div class="bar-tts" style="width:'+ttsW+'px" title="TTS '+fmtMs(l.tts_ms)+'"></div>' : '')+
1753
- (totalW > 0 ? '<div class="bar-llm" style="width:'+totalW+'px" title="Total '+fmtMs(total)+'"></div>' : '')+
1754
- '</div></td></tr>';
1755
- });
1756
- html += '</tbody></table>'+
1757
- '<div style="margin-top:10px;font-size:11px;color:var(--muted)">'+
1758
- (isRealtime ?
1759
- '<span style="color:var(--purple)">&#9632;</span> End-to-end' :
1760
- '<span style="color:var(--blue)">&#9632;</span> STT &nbsp;'+
1761
- '<span style="color:var(--purple)">&#9632;</span> LLM &nbsp;'+
1762
- '<span style="color:var(--orange)">&#9632;</span> TTS'
1763
- )+
1764
- '</div></div>';
1765
- }
1766
-
1767
- var transcript = c.transcript || [];
1768
- if (transcript.length) {
1769
- html += '<div class="detail-card" style="margin-top:16px"><h3>Transcript</h3><div class="transcript-box">';
1770
- transcript.forEach(function(msg) {
1771
- var role = esc(msg.role || 'unknown');
1772
- html += '<div class="msg '+role+'"><span class="role">'+role+'</span>'+esc(msg.text || '')+'</div>';
1773
- });
1774
- html += '</div></div>';
1574
+ init_esm_shims();
1575
+ import { readFileSync as readFileSync2 } from "fs";
1576
+ import { join as join2, dirname } from "path";
1577
+ var FALLBACK_HTML = `<!doctype html>
1578
+ <html><head><meta charset="utf-8"><title>Patter dashboard</title></head>
1579
+ <body style="font-family:ui-sans-serif,system-ui;padding:2rem;color:#1a1a1a">
1580
+ <h1>Dashboard asset missing</h1>
1581
+ <p>The bundled <code>ui.html</code> was not found alongside this module.
1582
+ Run <code>cd dashboard-app &amp;&amp; npm run build &amp;&amp; npm run sync</code>
1583
+ from the repo root to regenerate it.</p>
1584
+ </body></html>`;
1585
+ function loadDashboardHtml() {
1586
+ const here = typeof __dirname !== "undefined" ? __dirname : dirname(".");
1587
+ const candidates = [
1588
+ join2(here, "ui.html"),
1589
+ join2(here, "dashboard", "ui.html"),
1590
+ join2(here, "..", "dashboard", "ui.html")
1591
+ ];
1592
+ for (const path3 of candidates) {
1593
+ try {
1594
+ return readFileSync2(path3, "utf8");
1595
+ } catch {
1775
1596
  }
1776
-
1777
- _$('#modal-body').innerHTML = html;
1778
- _$('#modal').classList.add('open');
1779
- });
1780
- }
1781
-
1782
- function closeModal() { _$('#modal').classList.remove('open'); }
1783
- _$('#modal').addEventListener('click', function(e) { if (e.target === _$('#modal')) closeModal(); });
1784
- document.addEventListener('keydown', function(e) { if (e.key === 'Escape') closeModal(); });
1785
-
1786
- function refresh() {
1787
- return Promise.all([refreshAggregates(), refreshCalls(), refreshActive()]).then(function() {
1788
- _$('#status-text').textContent = 'Listening';
1789
- }).catch(function() {
1790
- _$('#status-text').textContent = 'Connection error';
1791
- });
1792
- }
1793
-
1794
- refresh();
1795
-
1796
- // Update active call durations every second
1797
- setInterval(function() {
1798
- var cells = document.querySelectorAll('#active-body td[data-started]');
1799
- if (!cells.length) return;
1800
- var now = Date.now() / 1000;
1801
- cells.forEach(function(td) {
1802
- var started = parseFloat(td.getAttribute('data-started'));
1803
- if (started) td.textContent = fmtDur(Math.round(now - started));
1804
- });
1805
- }, 1000);
1806
-
1807
- if (typeof EventSource !== 'undefined') {
1808
- var sseUrl = '/api/dashboard/events';
1809
- var sseBackoff = 1000;
1810
- var sseFailures = 0;
1811
- var SSE_MAX_BACKOFF = 30000;
1812
- var SSE_MAX_FAILURES = 5;
1813
-
1814
- function connectSSE() {
1815
- var es = new EventSource(sseUrl);
1816
- function onEvent() { sseBackoff = 1000; sseFailures = 0; }
1817
- es.addEventListener('call_start', function() { onEvent(); refresh(); });
1818
- es.addEventListener('turn_complete', function() { onEvent(); refreshAggregates(); });
1819
- es.addEventListener('call_end', function() { onEvent(); refresh(); });
1820
- es.onerror = function() {
1821
- es.close();
1822
- sseFailures++;
1823
- if (sseFailures >= SSE_MAX_FAILURES) {
1824
- _$('#status-text').textContent = 'Polling';
1825
- setInterval(refresh, 5000);
1826
- return;
1827
- }
1828
- _$('#status-text').textContent = 'Reconnecting...';
1829
- setTimeout(connectSSE, sseBackoff);
1830
- sseBackoff = Math.min(sseBackoff * 2, SSE_MAX_BACKOFF);
1831
- };
1832
1597
  }
1833
- connectSSE();
1834
- } else {
1835
- setInterval(refresh, 3000);
1598
+ return FALLBACK_HTML;
1836
1599
  }
1837
- </script>
1838
- </body>
1839
- </html>`;
1600
+ var DASHBOARD_HTML = loadDashboardHtml();
1840
1601
 
1841
1602
  // src/dashboard/routes.ts
1842
1603
  function mountDashboard(app, store, token = "") {
@@ -1996,6 +1757,7 @@ function mountApi(app, store, token = "") {
1996
1757
  }
1997
1758
 
1998
1759
  // src/remote-message.ts
1760
+ init_esm_shims();
1999
1761
  import crypto2 from "crypto";
2000
1762
  var MAX_RESPONSE_BYTES = 64 * 1024;
2001
1763
  function validateWebSocketUrl(url) {
@@ -2200,43 +1962,99 @@ function isWebSocketUrl(url) {
2200
1962
  return url.startsWith("ws://") || url.startsWith("wss://");
2201
1963
  }
2202
1964
 
1965
+ // src/stream-handler.ts
1966
+ init_esm_shims();
1967
+
2203
1968
  // src/providers/deepgram-stt.ts
1969
+ init_esm_shims();
2204
1970
  import WebSocket3 from "ws";
2205
1971
 
2206
1972
  // src/errors.ts
1973
+ init_esm_shims();
1974
+ var ErrorCode = {
1975
+ /** Invalid constructor args, missing required env var, frozen-config violation. */
1976
+ CONFIG: "CONFIG",
1977
+ /** WebSocket connect failure, HTTP 5xx from provider, network error. */
1978
+ CONNECTION: "CONNECTION",
1979
+ /** Provider rejected our credentials (HTTP 401/403, invalid signature). */
1980
+ AUTH: "AUTH",
1981
+ /** Provider response, voicemail post, or other awaited operation timed out. */
1982
+ TIMEOUT: "TIMEOUT",
1983
+ /** Provider returned HTTP 429. */
1984
+ RATE_LIMIT: "RATE_LIMIT",
1985
+ /** Twilio / Telnyx webhook signature verification failed. */
1986
+ WEBHOOK_VERIFICATION: "WEBHOOK_VERIFICATION",
1987
+ /** Caller passed a malformed phone number, tool arg, etc. */
1988
+ INPUT_VALIDATION: "INPUT_VALIDATION",
1989
+ /** Generic catch-all for unexpected upstream provider failures. */
1990
+ PROVIDER_ERROR: "PROVIDER_ERROR",
1991
+ /** Phone number provisioning, webhook configuration, or carrier setup failed. */
1992
+ PROVISION: "PROVISION",
1993
+ /** Assertion failed / unexpected internal state. Likely a Patter bug. */
1994
+ INTERNAL: "INTERNAL"
1995
+ };
2207
1996
  var PatterError = class extends Error {
2208
- constructor(message) {
1997
+ /** Stable, machine-readable error code. Subclasses set the default. */
1998
+ code;
1999
+ constructor(message, options) {
2209
2000
  super(message);
2210
2001
  this.name = "PatterError";
2002
+ this.code = options?.code ?? ErrorCode.INTERNAL;
2211
2003
  }
2212
2004
  };
2213
2005
  var PatterConnectionError = class extends PatterError {
2214
- constructor(message) {
2215
- super(message);
2006
+ constructor(message, options) {
2007
+ super(message, { code: options?.code ?? ErrorCode.CONNECTION });
2216
2008
  this.name = "PatterConnectionError";
2217
2009
  }
2218
2010
  };
2219
2011
  var AuthenticationError = class extends PatterError {
2220
- constructor(message) {
2221
- super(message);
2012
+ constructor(message, options) {
2013
+ super(message, { code: options?.code ?? ErrorCode.AUTH });
2222
2014
  this.name = "AuthenticationError";
2223
2015
  }
2224
2016
  };
2225
2017
  var ProvisionError = class extends PatterError {
2226
- constructor(message) {
2227
- super(message);
2018
+ constructor(message, options) {
2019
+ super(message, { code: options?.code ?? ErrorCode.PROVISION });
2228
2020
  this.name = "ProvisionError";
2229
2021
  }
2230
2022
  };
2231
2023
  var RateLimitError = class extends PatterConnectionError {
2232
- constructor(message) {
2233
- super(message);
2024
+ constructor(message, options) {
2025
+ super(message, { code: options?.code ?? ErrorCode.RATE_LIMIT });
2234
2026
  this.name = "RateLimitError";
2235
2027
  }
2236
2028
  };
2237
2029
 
2238
2030
  // src/providers/deepgram-stt.ts
2239
2031
  var DEEPGRAM_WS_URL = "wss://api.deepgram.com/v1/listen";
2032
+ var DeepgramModel = {
2033
+ NOVA_3: "nova-3",
2034
+ NOVA_2: "nova-2",
2035
+ NOVA_2_PHONECALL: "nova-2-phonecall",
2036
+ NOVA_2_GENERAL: "nova-2-general",
2037
+ NOVA_2_MEETING: "nova-2-meeting",
2038
+ NOVA: "nova",
2039
+ ENHANCED: "enhanced",
2040
+ BASE: "base"
2041
+ };
2042
+ var DeepgramEncoding = {
2043
+ LINEAR16: "linear16",
2044
+ MULAW: "mulaw",
2045
+ ALAW: "alaw",
2046
+ OPUS: "opus",
2047
+ FLAC: "flac",
2048
+ AMR_NB: "amr-nb",
2049
+ AMR_WB: "amr-wb"
2050
+ };
2051
+ var DeepgramSampleRate = {
2052
+ HZ_8000: 8e3,
2053
+ HZ_16000: 16e3,
2054
+ HZ_24000: 24e3,
2055
+ HZ_44100: 44100,
2056
+ HZ_48000: 48e3
2057
+ };
2240
2058
  var KEEPALIVE_INTERVAL_MS = 4e3;
2241
2059
  var FINALIZE_DRAIN_MS = 100;
2242
2060
  var CLOSE_LATENCY_BUDGET_MS = 500;
@@ -2264,9 +2082,9 @@ var DeepgramSTT = class _DeepgramSTT {
2264
2082
  this.apiKey = apiKey;
2265
2083
  const opts = typeof languageOrOptions === "object" && languageOrOptions !== null ? languageOrOptions : options ?? {};
2266
2084
  this.language = (typeof languageOrOptions === "string" ? languageOrOptions : opts.language) ?? "en";
2267
- this.model = model ?? opts.model ?? "nova-3";
2268
- this.encoding = encoding ?? opts.encoding ?? "linear16";
2269
- this.sampleRate = sampleRate ?? opts.sampleRate ?? 16e3;
2085
+ this.model = model ?? opts.model ?? DeepgramModel.NOVA_3;
2086
+ this.encoding = encoding ?? opts.encoding ?? DeepgramEncoding.LINEAR16;
2087
+ this.sampleRate = sampleRate ?? opts.sampleRate ?? DeepgramSampleRate.HZ_16000;
2270
2088
  this.endpointingMs = opts.endpointingMs ?? 150;
2271
2089
  this.utteranceEndMs = opts.utteranceEndMs === null ? null : opts.utteranceEndMs ?? 1e3;
2272
2090
  this.smartFormat = opts.smartFormat ?? false;
@@ -2274,8 +2092,15 @@ var DeepgramSTT = class _DeepgramSTT {
2274
2092
  this.vadEvents = opts.vadEvents ?? true;
2275
2093
  }
2276
2094
  /** Factory for Twilio calls — mulaw 8 kHz. Forwards tuning options through. */
2277
- static forTwilio(apiKey, language = "en", model = "nova-3", options = {}) {
2278
- return new _DeepgramSTT(apiKey, language, model, "mulaw", 8e3, options);
2095
+ static forTwilio(apiKey, language = "en", model = DeepgramModel.NOVA_3, options = {}) {
2096
+ return new _DeepgramSTT(
2097
+ apiKey,
2098
+ language,
2099
+ model,
2100
+ DeepgramEncoding.MULAW,
2101
+ DeepgramSampleRate.HZ_8000,
2102
+ options
2103
+ );
2279
2104
  }
2280
2105
  buildUrl() {
2281
2106
  const params = new URLSearchParams({
@@ -2295,6 +2120,7 @@ var DeepgramSTT = class _DeepgramSTT {
2295
2120
  }
2296
2121
  return `${DEEPGRAM_WS_URL}?${params.toString()}`;
2297
2122
  }
2123
+ /** Open the streaming WebSocket and arm message + keepalive handlers. */
2298
2124
  async connect() {
2299
2125
  await this.openSocket();
2300
2126
  this.running = true;
@@ -2360,6 +2186,18 @@ var DeepgramSTT = class _DeepgramSTT {
2360
2186
  } catch {
2361
2187
  return;
2362
2188
  }
2189
+ const dataType = String(data.type ?? "unknown");
2190
+ if (dataType === "Results") {
2191
+ const transcript2 = (data.channel?.alternatives?.[0]?.transcript ?? "").trim();
2192
+ const isFinal = Boolean(data.is_final);
2193
+ const speechFinal2 = Boolean(data.speech_final);
2194
+ const fromFinalize = Boolean(data.from_finalize);
2195
+ getLogger().info(
2196
+ `[DIAG] DG Results text=${JSON.stringify(transcript2.slice(0, 60))} isFinal=${isFinal} speechFinal=${speechFinal2} fromFinalize=${fromFinalize}`
2197
+ );
2198
+ } else if (dataType !== "Metadata") {
2199
+ getLogger().info(`[DIAG] DG event type=${dataType}`);
2200
+ }
2363
2201
  if (data.type === "Metadata" && data.request_id) {
2364
2202
  this.requestId = data.request_id;
2365
2203
  return;
@@ -2444,23 +2282,71 @@ var DeepgramSTT = class _DeepgramSTT {
2444
2282
  this.running = false;
2445
2283
  }
2446
2284
  }
2285
+ /** Send a binary audio chunk to Deepgram for transcription. */
2447
2286
  sendAudio(audio) {
2448
- if (!this.ws || this.ws.readyState !== WebSocket3.OPEN) return;
2287
+ if (!this.ws || this.ws.readyState !== WebSocket3.OPEN) {
2288
+ this.audioDroppedCount++;
2289
+ if (this.audioDroppedCount === 1 || this.audioDroppedCount % 50 === 0) {
2290
+ getLogger().info(
2291
+ `[DIAG] DeepgramSTT.sendAudio dropped (ws state=${this.ws?.readyState ?? "null"}) \u2014 total dropped=${this.audioDroppedCount}`
2292
+ );
2293
+ }
2294
+ return;
2295
+ }
2449
2296
  if (audio.length === 0) return;
2297
+ this.audioSentCount++;
2298
+ if (this.audioSentCount === 1 || this.audioSentCount % 100 === 0) {
2299
+ getLogger().info(
2300
+ `[DIAG] DeepgramSTT.sendAudio: total chunks sent=${this.audioSentCount} (last=${audio.length} bytes)`
2301
+ );
2302
+ }
2450
2303
  this.ws.send(audio);
2451
2304
  }
2305
+ audioSentCount = 0;
2306
+ audioDroppedCount = 0;
2307
+ /** Register a transcript listener. */
2452
2308
  onTranscript(callback) {
2453
2309
  this.transcriptCallbacks.add(callback);
2454
2310
  }
2311
+ /** Remove a previously registered transcript listener. */
2455
2312
  offTranscript(callback) {
2456
2313
  this.transcriptCallbacks.delete(callback);
2457
2314
  }
2315
+ /** Register an error listener for socket / API failures. */
2458
2316
  onError(callback) {
2459
2317
  this.errorCallbacks.add(callback);
2460
2318
  }
2319
+ /** Remove a previously registered error listener. */
2461
2320
  offError(callback) {
2462
2321
  this.errorCallbacks.delete(callback);
2463
2322
  }
2323
+ /**
2324
+ * Force Deepgram to immediately emit a final ``Results`` frame for the
2325
+ * in-flight utterance, rather than waiting for its own endpoint
2326
+ * heuristic (utterance_end_ms ~1 s + natural-pause endpointing).
2327
+ * Called by the SDK on VAD ``speech_end`` and after barge-in cancel —
2328
+ * both moments where the SDK already knows the user has stopped
2329
+ * speaking and waiting for Deepgram's own endpointing only adds
2330
+ * dead air.
2331
+ *
2332
+ * Idempotent: safe to call when the socket is closed/closing.
2333
+ */
2334
+ finalize() {
2335
+ const ws = this.ws;
2336
+ if (!ws || ws.readyState !== WebSocket3.OPEN) {
2337
+ getLogger().info(
2338
+ `[DIAG] DeepgramSTT.finalize SKIPPED (ws state=${ws?.readyState ?? "null"})`
2339
+ );
2340
+ return;
2341
+ }
2342
+ try {
2343
+ ws.send(JSON.stringify({ type: "Finalize" }));
2344
+ getLogger().info("[DIAG] DeepgramSTT.finalize sent {type:Finalize}");
2345
+ } catch (err) {
2346
+ getLogger().info(`[DIAG] DeepgramSTT.finalize send failed: ${String(err)}`);
2347
+ }
2348
+ }
2349
+ /** Send Finalize, briefly drain trailing transcripts, then close the socket. */
2464
2350
  close() {
2465
2351
  this.running = false;
2466
2352
  this.clearKeepalive();
@@ -2492,6 +2378,7 @@ var DeepgramSTT = class _DeepgramSTT {
2492
2378
  };
2493
2379
 
2494
2380
  // src/metrics.ts
2381
+ init_esm_shims();
2495
2382
  function round(value, decimals) {
2496
2383
  const factor = 10 ** decimals;
2497
2384
  return Math.round(value * factor) / factor;
@@ -2518,6 +2405,14 @@ var CallMetricsAccumulator = class {
2518
2405
  sttProvider;
2519
2406
  ttsProvider;
2520
2407
  llmProvider;
2408
+ /**
2409
+ * Model identifiers for per-model rate resolution (see pricing.ts). Empty
2410
+ * string means "not known" → cost calc falls back to provider defaults,
2411
+ * matching pre-2026.3 behaviour.
2412
+ */
2413
+ sttModel;
2414
+ ttsModel;
2415
+ realtimeModel;
2521
2416
  _pricing;
2522
2417
  _callStart;
2523
2418
  _turns = [];
@@ -2579,6 +2474,9 @@ var CallMetricsAccumulator = class {
2579
2474
  this.sttProvider = opts.sttProvider ?? "";
2580
2475
  this.ttsProvider = opts.ttsProvider ?? "";
2581
2476
  this.llmProvider = opts.llmProvider ?? "";
2477
+ this.sttModel = opts.sttModel ?? "";
2478
+ this.ttsModel = opts.ttsModel ?? "";
2479
+ this.realtimeModel = opts.realtimeModel ?? "";
2582
2480
  this._pricing = mergePricing(opts.pricing);
2583
2481
  this._callStart = hrTimeMs();
2584
2482
  this._eventBus = opts.eventBus;
@@ -2601,6 +2499,7 @@ var CallMetricsAccumulator = class {
2601
2499
  get turnActive() {
2602
2500
  return this._turnStart !== null;
2603
2501
  }
2502
+ /** Begin a new turn — stamps the turn start timestamp and resets per-turn state. */
2604
2503
  startTurn() {
2605
2504
  this._turnStart = hrTimeMs();
2606
2505
  this._sttComplete = null;
@@ -2631,6 +2530,7 @@ var CallMetricsAccumulator = class {
2631
2530
  this.startTurn();
2632
2531
  }
2633
2532
  }
2533
+ /** Stamp end-of-STT, capture the user's transcript, and accrue billed STT seconds. */
2634
2534
  recordSttComplete(text, audioSeconds = 0) {
2635
2535
  this._sttComplete = hrTimeMs();
2636
2536
  this._sttFinalAt = this._sttComplete;
@@ -2640,11 +2540,30 @@ var CallMetricsAccumulator = class {
2640
2540
  this._turnUserText = text;
2641
2541
  this._turnSttAudioSeconds = audioSeconds;
2642
2542
  this._totalSttAudioSeconds += audioSeconds;
2543
+ if (this._eventBus) {
2544
+ const valueSec = this._turnStart !== null ? (this._sttComplete - this._turnStart) / 1e3 : 0;
2545
+ const payload = {
2546
+ timestamp: Date.now() / 1e3,
2547
+ processor: "stt",
2548
+ model: null,
2549
+ value: valueSec
2550
+ };
2551
+ this._eventBus.emit("stt_metrics", payload);
2552
+ }
2643
2553
  }
2644
2554
  /** Record the timestamp of the first LLM token (TTFT). No-op after first call. */
2645
2555
  recordLlmFirstToken() {
2646
2556
  if (this._llmFirstToken === null) {
2647
2557
  this._llmFirstToken = hrTimeMs();
2558
+ if (this._eventBus && this._sttComplete !== null && (!this._reportOnlyInitialTtfb || !this._initialTtfbEmitted)) {
2559
+ const payload = {
2560
+ timestamp: Date.now() / 1e3,
2561
+ processor: "llm",
2562
+ model: null,
2563
+ value: (this._llmFirstToken - this._sttComplete) / 1e3
2564
+ };
2565
+ this._eventBus.emit("llm_metrics", payload);
2566
+ }
2648
2567
  }
2649
2568
  }
2650
2569
  /**
@@ -2658,9 +2577,11 @@ var CallMetricsAccumulator = class {
2658
2577
  this._llmFirstSentenceComplete = hrTimeMs();
2659
2578
  }
2660
2579
  }
2580
+ /** Stamp end-of-LLM (last token received). */
2661
2581
  recordLlmComplete() {
2662
2582
  this._llmComplete = hrTimeMs();
2663
2583
  }
2584
+ /** Stamp first TTS audio byte sent on the wire (used to compute TTS TTFB). */
2664
2585
  recordTtsFirstByte() {
2665
2586
  if (this._ttsFirstByte === null) {
2666
2587
  this._ttsFirstByte = hrTimeMs();
@@ -2669,7 +2590,20 @@ var CallMetricsAccumulator = class {
2669
2590
  return;
2670
2591
  }
2671
2592
  this._initialTtfbEmitted = true;
2593
+ if (this._eventBus && this._ttsFirstByte !== null) {
2594
+ const ttsRef = this._llmFirstSentenceComplete !== null ? this._llmFirstSentenceComplete : this._llmComplete;
2595
+ if (ttsRef !== null) {
2596
+ const payload = {
2597
+ timestamp: Date.now() / 1e3,
2598
+ processor: "tts",
2599
+ model: null,
2600
+ value: (this._ttsFirstByte - ttsRef) / 1e3
2601
+ };
2602
+ this._eventBus.emit("tts_metrics", payload);
2603
+ }
2604
+ }
2672
2605
  }
2606
+ /** Record final TTS text length and stamp the last-byte timestamp. */
2673
2607
  recordTtsComplete(text) {
2674
2608
  this._totalTtsCharacters += text.length;
2675
2609
  if (this._ttsLastByte === null) {
@@ -2700,6 +2634,7 @@ var CallMetricsAccumulator = class {
2700
2634
  recordTtsStopped(ts) {
2701
2635
  this._bargeinStoppedAt = ts ?? hrTimeMs();
2702
2636
  }
2637
+ /** Close the current turn cleanly and append a `TurnMetrics` record. */
2703
2638
  recordTurnComplete(agentText) {
2704
2639
  const latency = this._computeTurnLatency();
2705
2640
  const turn = {
@@ -2717,6 +2652,7 @@ var CallMetricsAccumulator = class {
2717
2652
  this._eventBus?.emit("metrics_collected", { callId: this.callId, turn });
2718
2653
  return turn;
2719
2654
  }
2655
+ /** Close the current turn as interrupted (barge-in) and return the recorded metrics. */
2720
2656
  recordTurnInterrupted() {
2721
2657
  if (this._turnStart === null) return null;
2722
2658
  const latency = this._computeTurnLatency();
@@ -2782,6 +2718,7 @@ var CallMetricsAccumulator = class {
2782
2718
  * ``transcriptionDelay`` = turnCommitted − vadStopped (ms)
2783
2719
  * ``onUserTurnCompletedDelay`` = caller-supplied delta (ms) or 0
2784
2720
  */
2721
+ /** Emit `EOUMetrics` once VAD-stop, STT-final, and turn-committed timestamps are all known. */
2785
2722
  emitEouMetrics() {
2786
2723
  if (this._vadStoppedAt === null || this._sttFinalAt === null || this._turnCommittedAt === null) {
2787
2724
  return;
@@ -2832,16 +2769,32 @@ var CallMetricsAccumulator = class {
2832
2769
  this._eventBus?.emit("interruption", payload);
2833
2770
  }
2834
2771
  // ---- Usage tracking ----
2772
+ /** Accumulate inbound STT audio bytes for cost calculation when seconds are unknown. */
2835
2773
  addSttAudioBytes(byteCount) {
2836
2774
  this._sttByteCount += byteCount;
2837
2775
  }
2838
- recordRealtimeUsage(usage) {
2839
- this._totalRealtimeCost += calculateRealtimeCost(usage, this._pricing);
2840
- this._totalRealtimeCachedSavings += calculateRealtimeCachedSavings(usage, this._pricing);
2776
+ /**
2777
+ * Record an OpenAI Realtime usage payload and roll up its cost + cached-savings.
2778
+ *
2779
+ * `model` allows the cost calc to pick the per-model rate (e.g.
2780
+ * `gpt-realtime-2`). Defaults to whatever was supplied at construction
2781
+ * time (`this.realtimeModel`); pass an explicit value to override per-call
2782
+ * (the `response.done` payload carries the model used).
2783
+ */
2784
+ recordRealtimeUsage(usage, model) {
2785
+ const resolvedModel = model || this.realtimeModel || null;
2786
+ this._totalRealtimeCost += calculateRealtimeCost(usage, this._pricing, resolvedModel);
2787
+ this._totalRealtimeCachedSavings += calculateRealtimeCachedSavings(
2788
+ usage,
2789
+ this._pricing,
2790
+ resolvedModel
2791
+ );
2841
2792
  }
2793
+ /** Override the carrier-billed telephony cost (e.g. exact value reported via Twilio API). */
2842
2794
  setActualTelephonyCost(cost) {
2843
2795
  this._actualTelephonyCost = cost;
2844
2796
  }
2797
+ /** Override the provider-billed STT cost when an exact figure is available. */
2845
2798
  setActualSttCost(cost) {
2846
2799
  this._actualSttCost = cost;
2847
2800
  }
@@ -2869,6 +2822,7 @@ var CallMetricsAccumulator = class {
2869
2822
  );
2870
2823
  }
2871
2824
  // ---- Finalize ----
2825
+ /** Finalize the call: flush any in-flight turn, compute aggregates, and return `CallMetrics`. */
2872
2826
  endCall() {
2873
2827
  const duration = (hrTimeMs() - this._callStart) / 1e3;
2874
2828
  if (this.turnActive) {
@@ -2902,6 +2856,7 @@ var CallMetricsAccumulator = class {
2902
2856
  this._eventBus?.emit("call_ended", { callId: this.callId, metrics });
2903
2857
  return metrics;
2904
2858
  }
2859
+ /** Return the cost breakdown for the call so far without ending it. */
2905
2860
  getCostSoFar() {
2906
2861
  const duration = (hrTimeMs() - this._callStart) / 1e3;
2907
2862
  return this._computeCost(duration);
@@ -2962,6 +2917,10 @@ var CallMetricsAccumulator = class {
2962
2917
  if (ttsTotalRef !== null && this._ttsLastByte !== null) {
2963
2918
  tts_total_ms = Math.max(0, this._ttsLastByte - ttsTotalRef);
2964
2919
  }
2920
+ let agent_response_ms;
2921
+ if (endpoint_ms !== void 0 && llm_ttft_ms !== void 0 && tts_ms > 0) {
2922
+ agent_response_ms = round(endpoint_ms + llm_ttft_ms + tts_ms, 1);
2923
+ }
2965
2924
  return {
2966
2925
  stt_ms: round(stt_ms, 1),
2967
2926
  llm_ms: round(llm_ms, 1),
@@ -2971,7 +2930,8 @@ var CallMetricsAccumulator = class {
2971
2930
  total_ms: round(total_ms, 1),
2972
2931
  ...endpoint_ms !== void 0 ? { endpoint_ms: round(endpoint_ms, 1) } : {},
2973
2932
  ...bargein_ms !== void 0 ? { bargein_ms: round(bargein_ms, 1) } : {},
2974
- ...tts_total_ms !== void 0 ? { tts_total_ms: round(tts_total_ms, 1) } : {}
2933
+ ...tts_total_ms !== void 0 ? { tts_total_ms: round(tts_total_ms, 1) } : {},
2934
+ ...agent_response_ms !== void 0 ? { agent_response_ms } : {}
2975
2935
  };
2976
2936
  }
2977
2937
  _computeCost(durationSeconds) {
@@ -2987,8 +2947,18 @@ var CallMetricsAccumulator = class {
2987
2947
  tts = 0;
2988
2948
  llm = 0;
2989
2949
  } else {
2990
- stt = this._actualSttCost !== null ? this._actualSttCost : calculateSttCost(this.sttProvider, this._totalSttAudioSeconds, this._pricing);
2991
- tts = calculateTtsCost(this.ttsProvider, this._totalTtsCharacters, this._pricing);
2950
+ stt = this._actualSttCost !== null ? this._actualSttCost : calculateSttCost(
2951
+ this.sttProvider,
2952
+ this._totalSttAudioSeconds,
2953
+ this._pricing,
2954
+ this.sttModel || null
2955
+ );
2956
+ tts = calculateTtsCost(
2957
+ this.ttsProvider,
2958
+ this._totalTtsCharacters,
2959
+ this._pricing,
2960
+ this.ttsModel || null
2961
+ );
2992
2962
  llm = this._totalLlmCost;
2993
2963
  }
2994
2964
  const telephony = this._actualTelephonyCost !== null ? this._actualTelephonyCost : calculateTelephonyCost(this.telephonyProvider, durationSeconds, this._pricing);
@@ -3074,7 +3044,8 @@ var CallMetricsAccumulator = class {
3074
3044
  }
3075
3045
  };
3076
3046
 
3077
- // src/transcoding.ts
3047
+ // src/audio/transcoding.ts
3048
+ init_esm_shims();
3078
3049
  var MULAW_TO_PCM16_TABLE = (() => {
3079
3050
  const table = new Int16Array(256);
3080
3051
  for (let i = 0; i < 256; i++) {
@@ -3189,9 +3160,9 @@ var StatefulResampler = class {
3189
3160
  throw new Error("StatefulResampler: only mono (channels=1) is supported");
3190
3161
  }
3191
3162
  const key = `${this.srcRate}->${this.dstRate}`;
3192
- if (key !== "16000->8000" && key !== "8000->16000" && key !== "24000->16000") {
3163
+ if (key !== "16000->8000" && key !== "8000->16000" && key !== "24000->16000" && key !== "24000->8000") {
3193
3164
  throw new Error(
3194
- `StatefulResampler: unsupported conversion ${key}. Supported: 16000->8000, 8000->16000, 24000->16000`
3165
+ `StatefulResampler: unsupported conversion ${key}. Supported: 16000->8000, 8000->16000, 24000->16000, 24000->8000`
3195
3166
  );
3196
3167
  }
3197
3168
  }
@@ -3211,6 +3182,9 @@ var StatefulResampler = class {
3211
3182
  if (this.srcRate === 8e3 && this.dstRate === 16e3) {
3212
3183
  return this._upsample8kTo16k(aligned);
3213
3184
  }
3185
+ if (this.srcRate === 24e3 && this.dstRate === 8e3) {
3186
+ return this._resample24kTo8k(aligned);
3187
+ }
3214
3188
  return this._resample24kTo16k(aligned);
3215
3189
  }
3216
3190
  /**
@@ -3356,7 +3330,7 @@ var StatefulResampler = class {
3356
3330
  return outBuf;
3357
3331
  }
3358
3332
  // ---------------------------------------------------------------------------
3359
- // Private: 24 kHz → 16 kHz
3333
+ // Private: 24 kHz → 16 kHz / 8 kHz
3360
3334
  // ---------------------------------------------------------------------------
3361
3335
  /**
3362
3336
  * 3:2 linear-interpolation decimator (ratio srcRate/dstRate = 1.5).
@@ -3367,6 +3341,14 @@ var StatefulResampler = class {
3367
3341
  * handled using `resample24Last`.
3368
3342
  */
3369
3343
  _resample24kTo16k(buf) {
3344
+ return this._resample24kStep(buf, 24e3 / 16e3);
3345
+ }
3346
+ /** 3:1 decimation — collapses the 24k→16k→8k chain into a single step. */
3347
+ _resample24kTo8k(buf) {
3348
+ return this._resample24kStep(buf, 24e3 / 8e3);
3349
+ }
3350
+ /** Shared phase-stepping resampler used by 24→16 (step 1.5) and 24→8 (step 3). */
3351
+ _resample24kStep(buf, step) {
3370
3352
  const sampleCount = buf.length >> 1;
3371
3353
  if (sampleCount === 0) return Buffer.alloc(0);
3372
3354
  const outArr = [];
@@ -3386,7 +3368,7 @@ var StatefulResampler = class {
3386
3368
  }
3387
3369
  const interp = Math.round(s0 + (s1 - s0) * frac);
3388
3370
  outArr.push(Math.max(-32768, Math.min(32767, interp)));
3389
- phase += 24e3 / 16e3;
3371
+ phase += step;
3390
3372
  }
3391
3373
  this.resample24Last = buf.readInt16LE((sampleCount - 1) * 2);
3392
3374
  this.resample24HasHistory = true;
@@ -3405,6 +3387,9 @@ function createResampler8kTo16k() {
3405
3387
  function createResampler24kTo16k() {
3406
3388
  return new StatefulResampler({ srcRate: 24e3, dstRate: 16e3 });
3407
3389
  }
3390
+ function createResampler24kTo8k() {
3391
+ return new StatefulResampler({ srcRate: 24e3, dstRate: 8e3 });
3392
+ }
3408
3393
  var _warnedResample8kTo16k = false;
3409
3394
  var _warnedResample16kTo8k = false;
3410
3395
  var _warnedResample24kTo16k = false;
@@ -3458,6 +3443,7 @@ function resample24kTo16k(pcm24k) {
3458
3443
  }
3459
3444
 
3460
3445
  // src/handler-utils.ts
3446
+ init_esm_shims();
3461
3447
  function createHistoryManager(maxSize) {
3462
3448
  const entries = [];
3463
3449
  const push = (entry) => {
@@ -3467,59 +3453,239 @@ function createHistoryManager(maxSize) {
3467
3453
  const getHistory = () => [...entries];
3468
3454
  return { push, getHistory, entries };
3469
3455
  }
3470
- async function executeToolWebhook(webhookUrl, toolName, parsedArgs, context, label = "") {
3471
- try {
3472
- validateWebhookUrl(webhookUrl);
3473
- } catch (e) {
3474
- const tag = label ? ` (${label})` : "";
3475
- getLogger().error(`Tool webhook URL rejected${tag}: ${String(e)}`);
3476
- return JSON.stringify({ error: String(e), fallback: true });
3456
+
3457
+ // src/tools/mcp-client.ts
3458
+ init_esm_shims();
3459
+ function resolveConfig(input, index) {
3460
+ if (typeof input === "string") {
3461
+ return { url: input, headers: {}, name: `mcp[${index}]` };
3462
+ }
3463
+ if (!input.url) {
3464
+ throw new Error(`mcpServers[${index}]: missing required 'url' field`);
3465
+ }
3466
+ return {
3467
+ url: input.url,
3468
+ headers: input.headers ?? {},
3469
+ name: input.name ?? `mcp[${index}]`
3470
+ };
3471
+ }
3472
+ var MCPManager = class {
3473
+ configs;
3474
+ connected = [];
3475
+ constructor(servers) {
3476
+ this.configs = (servers ?? []).map((s, i) => resolveConfig(s, i));
3477
+ }
3478
+ get hasServers() {
3479
+ return this.configs.length > 0;
3477
3480
  }
3478
- let result = "";
3479
- for (let attempt = 0; attempt < 3; attempt++) {
3481
+ /** Connect to every configured server and discover their tools.
3482
+ * Returns the discovered tools wrapped as Patter ``ToolDefinition``s. */
3483
+ async connect() {
3484
+ if (this.configs.length === 0) return [];
3485
+ let mcpModule;
3486
+ let transportModule;
3480
3487
  try {
3481
- const resp = await fetch(webhookUrl, {
3482
- method: "POST",
3483
- headers: { "Content-Type": "application/json" },
3484
- body: JSON.stringify({
3485
- tool: toolName,
3486
- arguments: parsedArgs,
3487
- call_id: context.callId,
3488
- caller: context.caller,
3489
- attempt: attempt + 1
3490
- }),
3491
- signal: AbortSignal.timeout(1e4)
3488
+ mcpModule = await import("./client-2GJVZT42.mjs");
3489
+ transportModule = await import("./streamableHttp-WKNGHDVO.mjs");
3490
+ } catch (e) {
3491
+ throw new Error(
3492
+ `mcpServers configured but \`@modelcontextprotocol/sdk\` is not installed. Run \`npm install @modelcontextprotocol/sdk\` to enable MCP support. (import error: ${String(e)})`
3493
+ );
3494
+ }
3495
+ const aggregatedTools = [];
3496
+ for (const cfg of this.configs) {
3497
+ const transport = new transportModule.StreamableHTTPClientTransport(new URL(cfg.url), {
3498
+ requestInit: { headers: cfg.headers }
3492
3499
  });
3493
- if (!resp.ok) throw new Error(`HTTP ${resp.status}`);
3494
- result = JSON.stringify(await resp.json());
3495
- const MAX_RESPONSE_BYTES2 = 1 * 1024 * 1024;
3496
- if (result.length > MAX_RESPONSE_BYTES2) {
3497
- const tag = label ? ` (${label})` : "";
3498
- getLogger().warn(`Tool webhook response too large: ${result.length} bytes (max ${MAX_RESPONSE_BYTES2})${tag}`);
3499
- return JSON.stringify({ error: `Webhook response too large: ${result.length} bytes (max ${MAX_RESPONSE_BYTES2})`, fallback: true });
3500
+ const client = new mcpModule.Client({ name: "patter", version: "0.6.0" });
3501
+ try {
3502
+ await client.connect(transport);
3503
+ } catch (e) {
3504
+ getLogger().error(`MCP server '${cfg.name}' (${cfg.url}) connect failed: ${String(e)}`);
3505
+ try {
3506
+ await transport.close?.();
3507
+ } catch {
3508
+ }
3509
+ continue;
3500
3510
  }
3501
- return result;
3502
- } catch (e) {
3503
- if (attempt < 2) {
3504
- const tag = label ? ` (${label})` : "";
3505
- getLogger().info(`Tool webhook retry ${attempt + 1}${tag}: ${String(e)}`);
3506
- await new Promise((r) => setTimeout(r, 500));
3507
- } else {
3508
- result = JSON.stringify({ error: `Tool failed after 3 attempts: ${String(e)}`, fallback: true });
3511
+ this.connected.push({ config: cfg, client, transport });
3512
+ let listed;
3513
+ try {
3514
+ listed = await client.listTools();
3515
+ } catch (e) {
3516
+ getLogger().error(`MCP server '${cfg.name}' tools/list failed: ${String(e)}`);
3517
+ continue;
3509
3518
  }
3519
+ const tools = Array.isArray(listed?.tools) ? listed.tools : [];
3520
+ for (const t of tools) {
3521
+ if (!t?.name) continue;
3522
+ aggregatedTools.push({
3523
+ name: t.name,
3524
+ description: t.description ?? "",
3525
+ parameters: t.inputSchema ?? { type: "object", properties: {} },
3526
+ handler: async (args) => {
3527
+ const callResult = await client.callTool({
3528
+ name: t.name,
3529
+ arguments: args
3530
+ });
3531
+ const text = (callResult.content ?? []).map((c) => c.type === "text" ? c.text ?? "" : JSON.stringify(c)).join("\n");
3532
+ if (callResult.isError) {
3533
+ return JSON.stringify({ error: text || "MCP tool error", fallback: true });
3534
+ }
3535
+ return text || "{}";
3536
+ }
3537
+ });
3538
+ }
3539
+ getLogger().info(`MCP server '${cfg.name}' registered ${tools.length} tool(s)`);
3510
3540
  }
3541
+ return aggregatedTools;
3511
3542
  }
3512
- return result;
3513
- }
3543
+ /** Validate no tool name collides between MCP-discovered and
3544
+ * user-supplied tools. Throws on conflict so the user fixes it. */
3545
+ static assertNoConflicts(userTools, mcpTools) {
3546
+ if (!userTools || userTools.length === 0 || mcpTools.length === 0) return;
3547
+ const userNames = new Set(userTools.map((t) => t.name));
3548
+ for (const mcp of mcpTools) {
3549
+ if (userNames.has(mcp.name)) {
3550
+ throw new Error(
3551
+ `MCP tool '${mcp.name}' collides with a user-supplied tool of the same name. Rename one of them or remove the duplicate from agent.tools.`
3552
+ );
3553
+ }
3554
+ }
3555
+ }
3556
+ /** Close every open MCP connection. Idempotent; logs but does not
3557
+ * throw on individual failures (we don't want a flaky shutdown to
3558
+ * derail the call-end teardown). */
3559
+ async close() {
3560
+ const conns = this.connected;
3561
+ this.connected = [];
3562
+ for (const conn of conns) {
3563
+ try {
3564
+ await conn.client.close?.();
3565
+ } catch (e) {
3566
+ getLogger().debug(`MCP server '${conn.config.name}' close error (ignored): ${String(e)}`);
3567
+ }
3568
+ }
3569
+ }
3570
+ };
3514
3571
 
3515
3572
  // src/sentence-chunker.ts
3573
+ init_esm_shims();
3516
3574
  var DEFAULT_MIN_SENTENCE_LEN = 20;
3517
- var DEFAULT_MIN_WORDS_FOR_SHORT_FLUSH = 2;
3518
- var SENTENCE_TERMINATORS = ".!?\u3002\uFF01\uFF1F";
3575
+ var DEFAULT_MIN_WORDS_FOR_SHORT_FLUSH = 1;
3576
+ var HONORIFICS_EN = [
3577
+ "Mr",
3578
+ "St",
3579
+ "Mrs",
3580
+ "Ms",
3581
+ "Dr",
3582
+ "Prof",
3583
+ "Gen",
3584
+ "Sen",
3585
+ "Rep",
3586
+ "Lt",
3587
+ "Cpt",
3588
+ "Capt",
3589
+ "Col",
3590
+ "Cmdr",
3591
+ "Adm"
3592
+ ];
3593
+ var HONORIFICS_IT = [
3594
+ "Sig",
3595
+ "Sgr",
3596
+ "Dott",
3597
+ "Prof",
3598
+ "Avv",
3599
+ "Ing",
3600
+ "Geom",
3601
+ "Rag",
3602
+ "Arch",
3603
+ "On",
3604
+ "Egr",
3605
+ "Spett",
3606
+ "Gent",
3607
+ "Ill"
3608
+ ];
3609
+ var HONORIFICS_ES = [
3610
+ "Sr",
3611
+ "Sra",
3612
+ "Sres",
3613
+ "Sras",
3614
+ "Srta",
3615
+ "Srtas",
3616
+ "Dr",
3617
+ "Dra",
3618
+ "Dres",
3619
+ "Lic",
3620
+ "Licda",
3621
+ "Ing",
3622
+ "Prof",
3623
+ "Profa",
3624
+ "Arq",
3625
+ "Mtro",
3626
+ "Mtra"
3627
+ ];
3628
+ var HONORIFICS_DE = [
3629
+ "Hr",
3630
+ "Fr",
3631
+ "Frl",
3632
+ "Dr",
3633
+ "Prof",
3634
+ "Dipl",
3635
+ "Mag"
3636
+ ];
3637
+ var HONORIFICS_FR = [
3638
+ "Mme",
3639
+ "Mmes",
3640
+ "Mlle",
3641
+ "Mlles",
3642
+ "MM",
3643
+ "Dr",
3644
+ "Pr",
3645
+ "Mgr",
3646
+ "Me"
3647
+ ];
3648
+ var HONORIFICS_PT = [
3649
+ "Sr",
3650
+ "Sra",
3651
+ "Srs",
3652
+ "Sras",
3653
+ "Srta",
3654
+ "Srtas",
3655
+ "Dr",
3656
+ "Dra",
3657
+ "Eng",
3658
+ "Enga",
3659
+ "Prof",
3660
+ "Profa"
3661
+ ];
3662
+ var HONORIFICS_BY_LANGUAGE = {
3663
+ en: HONORIFICS_EN,
3664
+ it: HONORIFICS_IT,
3665
+ es: HONORIFICS_ES,
3666
+ de: HONORIFICS_DE,
3667
+ fr: HONORIFICS_FR,
3668
+ pt: HONORIFICS_PT
3669
+ };
3670
+ var HONORIFICS_ALL = Array.from(
3671
+ new Set(Object.values(HONORIFICS_BY_LANGUAGE).flat())
3672
+ ).sort((a, b) => b.length - a.length || a.localeCompare(b));
3673
+ var SENTENCE_TERMINATORS = ".!?\u2026;\u3002\uFF01\uFF1F\uFF1B\uFF0E\uFF61";
3674
+ var UNAMBIGUOUS_NON_LATIN_TERMINATORS = "\u0964\u0965\u061F\u061B\u06D4\u060F\u0589\u1367\u1362\u17D4\u17D5\u104B\u0F0E\u0F0F";
3675
+ var TERMINATOR_REGEX_CLASS = Array.from(
3676
+ new Set(SENTENCE_TERMINATORS + UNAMBIGUOUS_NON_LATIN_TERMINATORS)
3677
+ ).map((c) => c.replace(/[\\^$.|?*+()[\]{}]/g, "\\$&")).sort().join("");
3678
+ var SOFT_TERMINATORS = ",\u2014\u2013";
3679
+ var DEFAULT_AGGRESSIVE_FIRST_MIN_LEN = 40;
3680
+ var CURRENCY_SYMBOLS = "$\u20AC\xA3\xA5\u20B9\u20A9";
3681
+ var HONORIFICS_REGEX_ALT = HONORIFICS_ALL.map(
3682
+ (p) => p.replace(/[\\^$.|?*+()[\]{}]/g, "\\$&")
3683
+ ).join("|");
3684
+ var HONORIFICS_SET = new Set(HONORIFICS_ALL);
3519
3685
  function splitSentences(text, minSentenceLen = DEFAULT_MIN_SENTENCE_LEN) {
3520
3686
  const alphabets = "([A-Za-z])";
3521
- const prefixes = "(Mr|St|Mrs|Ms|Dr)[.]";
3522
- const suffixes = "(Inc|Ltd|Jr|Sr|Co)";
3687
+ const prefixes = `(${HONORIFICS_REGEX_ALT})[.]`;
3688
+ const suffixes = "(Inc|Ltd|Jr|Sr|Co|ecc|cit|cap|sez|art|pag|fig|tab|cfr|vol|ed|vs|etc|No|Vol|pp|cf|ca|op|Mt|Hwy|Rt|Pl|Ave|Blvd|Sq)";
3523
3689
  const starters = "(Mr|Mrs|Ms|Dr|Prof|Capt|Cpt|Lt|He\\s|She\\s|It\\s|They\\s|Their\\s|Our\\s|We\\s|But\\s|However\\s|That\\s|This\\s|Wherever)";
3524
3690
  const acronyms = "([A-Z][.][A-Z][.](?:[A-Z][.])?)";
3525
3691
  const websites = "[.](com|net|org|io|gov|edu|me)";
@@ -3543,14 +3709,20 @@ function splitSentences(text, minSentenceLen = DEFAULT_MIN_SENTENCE_LEN) {
3543
3709
  new RegExp(alphabets + "[.]" + alphabets + "[.]", "g"),
3544
3710
  "$1<prd>$2<prd>"
3545
3711
  );
3546
- text = text.replace(new RegExp(" " + suffixes + "[.] " + starters, "g"), " $1<stop> $2");
3712
+ text = text.replace(new RegExp(" " + suffixes + "[.] " + starters, "g"), " $1.<stop> $2");
3547
3713
  text = text.replace(new RegExp(" " + suffixes + "[.]", "g"), " $1<prd>");
3548
3714
  text = text.replace(new RegExp(" " + alphabets + "[.]", "g"), " $1<prd>");
3549
- text = text.replace(/([.!?\u3002\uff01\uff1f])(["\u201d])/g, "$1$2<stop>");
3550
- text = text.replace(/([.!?\u3002\uff01\uff1f])(?!["\u201d])/g, "$1<stop>");
3551
- text = text.replace(/<prd>/g, ".");
3552
- const splitted = text.split("<stop>");
3553
- text = text.replace(/<stop>/g, "");
3715
+ text = text.replace(
3716
+ new RegExp(`([${TERMINATOR_REGEX_CLASS}])(["\u201D])`, "g"),
3717
+ "$1$2<stop>"
3718
+ );
3719
+ text = text.replace(
3720
+ new RegExp(`([${TERMINATOR_REGEX_CLASS}])(?!["\u201D])`, "g"),
3721
+ "$1<stop>"
3722
+ );
3723
+ text = text.replace(/<prd>/g, ".");
3724
+ const splitted = text.split("<stop>");
3725
+ text = text.replace(/<stop>/g, "");
3554
3726
  const sentences = [];
3555
3727
  let buff = "";
3556
3728
  let startPos = 0;
@@ -3575,9 +3747,16 @@ var SentenceChunker = class {
3575
3747
  buffer = "";
3576
3748
  minSentenceLen;
3577
3749
  minWordsForShortFlush;
3750
+ aggressiveFirstMinLen;
3751
+ aggressiveFirstFlush;
3752
+ language;
3753
+ isFirstFlush = true;
3578
3754
  constructor(options) {
3579
3755
  this.minSentenceLen = options?.minSentenceLen ?? DEFAULT_MIN_SENTENCE_LEN;
3580
3756
  this.minWordsForShortFlush = options?.minWordsForShortFlush ?? DEFAULT_MIN_WORDS_FOR_SHORT_FLUSH;
3757
+ this.aggressiveFirstMinLen = options?.aggressiveFirstMinLen ?? DEFAULT_AGGRESSIVE_FIRST_MIN_LEN;
3758
+ this.language = (options?.language ?? "en").toLowerCase();
3759
+ this.aggressiveFirstFlush = (options?.aggressiveFirstFlush ?? false) && !this.language.startsWith("it");
3581
3760
  }
3582
3761
  /**
3583
3762
  * Feed a token. Returns zero or more complete sentences.
@@ -3588,13 +3767,21 @@ var SentenceChunker = class {
3588
3767
  * sentence, all but the last (potentially incomplete) are emitted.
3589
3768
  * - **Short-flush path** — when the buffer is shorter than `minSentenceLen`
3590
3769
  * but ends with a sentence terminator AND has at least
3591
- * `minWordsForShortFlush` whitespace-separated words, emit it
3592
- * immediately. This drops TTS TTFB on short greetings like `"Hi there!"`
3593
- * while keeping single-word utterances (`"Sì."`) buffered until
3594
- * `flush()`.
3770
+ * `minWordsForShortFlush` whitespace-separated words (default 1 — a
3771
+ * single-word reply like `"Yes."` flushes immediately for low TTS
3772
+ * TTFB). Acronym ("U.S.") and decimal ("f(x) = 2.") guards still block
3773
+ * dangerous cases. Bump `minWordsForShortFlush` to 2+ to keep
3774
+ * single-word utterances buffered until `flush()`.
3595
3775
  */
3596
3776
  push(token) {
3597
3777
  this.buffer += token;
3778
+ if (this.aggressiveFirstFlush && this.isFirstFlush) {
3779
+ const flushed = this.maybeAggressiveFirstFlush();
3780
+ if (flushed !== null) {
3781
+ this.isFirstFlush = false;
3782
+ return [flushed];
3783
+ }
3784
+ }
3598
3785
  if (this.buffer.length < this.minSentenceLen) {
3599
3786
  return this.maybeShortFlush();
3600
3787
  }
@@ -3615,16 +3802,19 @@ var SentenceChunker = class {
3615
3802
  *
3616
3803
  * A buffer qualifies when **all** of these hold:
3617
3804
  * 1. Last non-whitespace char is a sentence terminator.
3618
- * 2. Word count is at least `minWordsForShortFlush` (default 2 keeps
3619
- * single-word "Sì." / "Yes." buffered until `flush()`).
3805
+ * 2. Word count is at least `minWordsForShortFlush` (default 1
3806
+ * single-word replies like `"Yes."` flush immediately).
3620
3807
  * 3. The buffer contains exactly one terminator (the trailing one).
3621
3808
  * Multiple terminators mean we may be mid-stream of a longer merged
3622
3809
  * utterance like `"Hey! Hi! Hello! This is a sentence."` — let the
3623
3810
  * standard path keep merging.
3624
3811
  * 4. The char immediately before the terminator is NOT a digit (avoids
3625
3812
  * decimal mid-stream like `"f(x) = x * 2."` flushing before `54`).
3626
- * 5. The char immediately before the terminator is NOT an uppercase
3627
- * ASCII letter (avoids acronym patterns like `"U.S."` / `"U."`).
3813
+ * 5. The trailing word is NOT a short ASCII all-caps acronym of 1-3 chars
3814
+ * (`"U."` / `"U.S."` / `"USA."`).
3815
+ * 6. The trailing word is NOT a known honorific from any of the
3816
+ * per-language `HONORIFICS_*` constants (`"Mr."`, `"Sr."`, `"Dr."`,
3817
+ * `"Hr."`, `"Mme."`, ...).
3628
3818
  */
3629
3819
  maybeShortFlush() {
3630
3820
  const stripped = this.buffer.replace(/\s+$/, "");
@@ -3640,29 +3830,109 @@ var SentenceChunker = class {
3640
3830
  if (wordCount < this.minWordsForShortFlush) return [];
3641
3831
  if (stripped.length >= 2) {
3642
3832
  const prev = stripped[stripped.length - 2];
3643
- if (/\d/.test(prev) || /[A-Z]/.test(prev)) return [];
3833
+ if (/\d/.test(prev)) return [];
3834
+ const terminator = stripped[stripped.length - 1];
3835
+ if (terminator === ".") {
3836
+ const stripTerm = stripped.replace(
3837
+ new RegExp(`[${TERMINATOR_REGEX_CLASS}]+$`),
3838
+ ""
3839
+ );
3840
+ const tokens = stripTerm.split(/\s+/).filter((w) => w.length > 0);
3841
+ const lastWord = tokens.length > 0 ? tokens[tokens.length - 1] : "";
3842
+ if (/^[A-Z]{1,3}$/.test(lastWord)) return [];
3843
+ if (HONORIFICS_SET.has(lastWord)) return [];
3844
+ }
3644
3845
  }
3645
3846
  this.buffer = "";
3646
3847
  return [stripped];
3647
3848
  }
3849
+ /**
3850
+ * Try to flush the first clause of the response on a soft punctuation
3851
+ * boundary (comma / em-dash / en-dash) to minimise TTFA.
3852
+ *
3853
+ * Returns the flushed clause text (with terminator) or `null` if no safe
3854
+ * boundary is found. All of these guards must pass:
3855
+ *
3856
+ * 1. **Min length** — buffer ≥ `aggressiveFirstMinLen` (default 40).
3857
+ * 2. **Trailing terminator** — last non-whitespace char in `SOFT_TERMINATORS`.
3858
+ * 3. **Decimal/thousands guard** — refuse if comma is between two digits
3859
+ * or surrounded by digit-thousands grouping.
3860
+ * 4. **Currency guard** — refuse if a currency symbol appears in the
3861
+ * preceding 8 characters.
3862
+ * 5. **Balanced delimiter** — refuse if open parens/brackets/braces or
3863
+ * unmatched double-quotes still pending.
3864
+ * 6. **Ellipsis** — refuse if buffer ends with `...` or `…`.
3865
+ * 7. **Sub-token ambiguity** — only fire when at least one trailing char
3866
+ * after the terminator has arrived.
3867
+ */
3868
+ maybeAggressiveFirstFlush() {
3869
+ const rstripped = this.buffer.replace(/\s+$/, "");
3870
+ if (rstripped.length < this.aggressiveFirstMinLen) return null;
3871
+ const lastChar = rstripped[rstripped.length - 1] ?? "";
3872
+ if (!SOFT_TERMINATORS.includes(lastChar)) return null;
3873
+ const pos = rstripped.length - 1;
3874
+ if (pos + 1 >= this.buffer.length) return null;
3875
+ const nextChar = this.buffer[pos + 1] ?? "";
3876
+ if (lastChar === ",") {
3877
+ const prevChar = pos >= 1 ? rstripped[pos - 1] ?? "" : "";
3878
+ if (/\d/.test(prevChar) && /\d/.test(nextChar)) return null;
3879
+ const tail = rstripped.slice(Math.max(0, pos - 6), pos);
3880
+ if (/\d/.test(prevChar) && tail.includes(",") && /\d/.test(tail)) {
3881
+ return null;
3882
+ }
3883
+ }
3884
+ const snippet = rstripped.slice(Math.max(0, pos - 8), pos);
3885
+ for (const c of CURRENCY_SYMBOLS) {
3886
+ if (snippet.includes(c)) return null;
3887
+ }
3888
+ const opens = (rstripped.match(/[([{]/g) ?? []).length;
3889
+ const closes = (rstripped.match(/[)\]}]/g) ?? []).length;
3890
+ if (opens > closes) return null;
3891
+ const dquoteCount = (rstripped.match(/"/g) ?? []).length;
3892
+ if (dquoteCount % 2 !== 0) return null;
3893
+ if (rstripped.endsWith("...") || rstripped.endsWith("\u2026")) return null;
3894
+ if (lastChar === "," && nextChar === '"') return null;
3895
+ const flushed = rstripped;
3896
+ this.buffer = this.buffer.slice(rstripped.length).replace(/^\s+/, "");
3897
+ return flushed;
3898
+ }
3648
3899
  /** Flush remaining buffer as final sentence(s). Call at end of stream. */
3649
3900
  flush() {
3650
3901
  const remaining = this.buffer.trim();
3651
3902
  this.buffer = "";
3903
+ this.isFirstFlush = true;
3652
3904
  if (!remaining) return [];
3653
3905
  return [remaining];
3654
3906
  }
3655
3907
  /** Discard buffered text. Call on interrupt. */
3656
3908
  reset() {
3657
3909
  this.buffer = "";
3910
+ this.isFirstFlush = true;
3658
3911
  }
3659
3912
  };
3660
3913
 
3661
3914
  // src/pipeline-hooks.ts
3915
+ init_esm_shims();
3916
+ var legacyAfterLlmWarned = false;
3917
+ function normaliseAfterLlm(hook) {
3918
+ if (hook === void 0) return void 0;
3919
+ if (typeof hook === "function") {
3920
+ if (!legacyAfterLlmWarned) {
3921
+ legacyAfterLlmWarned = true;
3922
+ getLogger().warn(
3923
+ "[patter] afterLlm: (text, ctx) => string is deprecated; pass an object with { onResponse } instead. The legacy form maps to onResponse and blocks streaming TTS. Will be removed in v0.7.0."
3924
+ );
3925
+ }
3926
+ return { onResponse: hook };
3927
+ }
3928
+ return hook;
3929
+ }
3662
3930
  var PipelineHookExecutor = class {
3663
3931
  hooks;
3932
+ afterLlm;
3664
3933
  constructor(hooks) {
3665
3934
  this.hooks = hooks;
3935
+ this.afterLlm = normaliseAfterLlm(hooks?.afterLlm);
3666
3936
  }
3667
3937
  /**
3668
3938
  * Run beforeSendToStt hook. Returns null to drop the audio chunk.
@@ -3708,26 +3978,87 @@ var PipelineHookExecutor = class {
3708
3978
  }
3709
3979
  }
3710
3980
  /**
3711
- * Run afterLlm hook. Returns a possibly-modified assistant text.
3712
- * Returning ``null`` from the hook means "keep the original".
3713
- * Fail-open: on exception, the original text passes through.
3981
+ * Tier 1 — per-token sync transform. Returns the (possibly transformed)
3982
+ * chunk. Fail-open: on exception or non-string return, the original chunk
3983
+ * passes through unchanged. Must be cheap (~0 ms budget).
3714
3984
  */
3715
- async runAfterLlm(text, ctx) {
3716
- if (!this.hooks?.afterLlm) return text;
3985
+ runAfterLlmChunk(chunk) {
3986
+ if (!this.afterLlm?.onChunk) return chunk;
3717
3987
  try {
3718
- const result = await this.hooks.afterLlm(text, ctx);
3988
+ const result = this.afterLlm.onChunk(chunk);
3989
+ return typeof result === "string" ? result : chunk;
3990
+ } catch (e) {
3991
+ getLogger().error("Pipeline hook afterLlm.onChunk threw:", e);
3992
+ return chunk;
3993
+ }
3994
+ }
3995
+ /**
3996
+ * Tier 2 — per-sentence rewrite. Returns rewritten sentence text, the
3997
+ * original sentence (if hook returned `null`), or `null` to drop the
3998
+ * sentence entirely (empty string is treated as drop). Fail-open.
3999
+ */
4000
+ async runAfterLlmSentence(sentence, ctx) {
4001
+ if (!this.afterLlm?.onSentence) return sentence;
4002
+ try {
4003
+ const result = await this.afterLlm.onSentence(sentence, ctx);
4004
+ if (result === null) return sentence;
4005
+ if (result === "") return null;
4006
+ return result;
4007
+ } catch (e) {
4008
+ getLogger().error("Pipeline hook afterLlm.onSentence threw:", e);
4009
+ return sentence;
4010
+ }
4011
+ }
4012
+ /**
4013
+ * Tier 3 — per-response rewrite. Returns the (possibly rewritten) full
4014
+ * response text. Triggered after the LLM stream completes. Caller is
4015
+ * responsible for buffering tokens before invocation. Fail-open.
4016
+ */
4017
+ async runAfterLlmResponse(text, ctx) {
4018
+ if (!this.afterLlm?.onResponse) return text;
4019
+ try {
4020
+ const result = await this.afterLlm.onResponse(text, ctx);
3719
4021
  return result ?? text;
3720
4022
  } catch (e) {
3721
- getLogger().error("Pipeline hook afterLlm threw:", e);
4023
+ getLogger().error("Pipeline hook afterLlm.onResponse threw:", e);
3722
4024
  return text;
3723
4025
  }
3724
4026
  }
3725
4027
  /**
3726
- * Whether ``afterLlm`` is configured. Used by the LLM loop to decide
3727
- * whether to buffer streaming tokens before yielding them.
4028
+ * Backward-compatible alias for `runAfterLlmResponse`. Existing call sites
4029
+ * in the LLM loop continue to work unchanged.
4030
+ *
4031
+ * @deprecated Use `runAfterLlmResponse` directly.
4032
+ */
4033
+ async runAfterLlm(text, ctx) {
4034
+ return this.runAfterLlmResponse(text, ctx);
4035
+ }
4036
+ /**
4037
+ * Whether a per-response (tier 3) `onResponse` transform is configured.
4038
+ * The LLM loop uses this to decide whether to buffer streaming tokens
4039
+ * before yielding them. Per-token (tier 1) and per-sentence (tier 2)
4040
+ * transforms do NOT require buffering.
4041
+ */
4042
+ hasAfterLlmResponse() {
4043
+ return Boolean(this.afterLlm?.onResponse);
4044
+ }
4045
+ /** Whether a per-sentence (tier 2) transform is configured. */
4046
+ hasAfterLlmSentence() {
4047
+ return Boolean(this.afterLlm?.onSentence);
4048
+ }
4049
+ /** Whether a per-token (tier 1) transform is configured. */
4050
+ hasAfterLlmChunk() {
4051
+ return Boolean(this.afterLlm?.onChunk);
4052
+ }
4053
+ /**
4054
+ * Backward-compatible alias for `hasAfterLlmResponse`. The legacy callable
4055
+ * form maps to `onResponse`, so this preserves the original semantic for
4056
+ * existing call sites.
4057
+ *
4058
+ * @deprecated Use `hasAfterLlmResponse` directly.
3728
4059
  */
3729
4060
  hasAfterLlm() {
3730
- return Boolean(this.hooks?.afterLlm);
4061
+ return this.hasAfterLlmResponse();
3731
4062
  }
3732
4063
  /**
3733
4064
  * Run beforeSynthesize hook. Returns null if hook vetoes TTS for this sentence.
@@ -3758,6 +4089,7 @@ var PipelineHookExecutor = class {
3758
4089
  };
3759
4090
 
3760
4091
  // src/observability/event-bus.ts
4092
+ init_esm_shims();
3761
4093
  var EventBus = class {
3762
4094
  listeners = /* @__PURE__ */ new Map();
3763
4095
  /**
@@ -3784,17 +4116,18 @@ var EventBus = class {
3784
4116
  const res = cb(payload);
3785
4117
  if (res && typeof res.catch === "function") {
3786
4118
  res.catch(
3787
- (e) => getLogger().warn(`[EventBus] listener for "${event}" rejected:`, e)
4119
+ (e) => getLogger().error(`[EventBus] listener for "${event}" rejected:`, e)
3788
4120
  );
3789
4121
  }
3790
4122
  } catch (e) {
3791
- getLogger().warn(`[EventBus] listener for "${event}" threw:`, e);
4123
+ getLogger().error(`[EventBus] listener for "${event}" threw:`, e);
3792
4124
  }
3793
4125
  }
3794
4126
  }
3795
4127
  };
3796
4128
 
3797
4129
  // src/observability/tracing.ts
4130
+ init_esm_shims();
3798
4131
  var ENV_FLAG = "PATTER_OTEL_ENABLED";
3799
4132
  var SERVICE_NAME = "patter";
3800
4133
  var SPAN_CALL = "getpatter.call";
@@ -3982,7 +4315,7 @@ var HALLUCINATIONS = /* @__PURE__ */ new Set([
3982
4315
  "right",
3983
4316
  "cool"
3984
4317
  ]);
3985
- var StreamHandler = class {
4318
+ var StreamHandler = class _StreamHandler {
3986
4319
  deps;
3987
4320
  ws;
3988
4321
  caller;
@@ -3994,8 +4327,50 @@ var StreamHandler = class {
3994
4327
  stt = null;
3995
4328
  tts = null;
3996
4329
  isSpeaking = false;
4330
+ /**
4331
+ * Ring buffer of inbound PCM16 16 kHz frames captured while the agent
4332
+ * is speaking and the self-hearing guard is dropping audio. On
4333
+ * barge-in we flush this buffer to STT so Deepgram (or any other
4334
+ * streaming STT) receives the user's first ~500 ms of speech — which
4335
+ * would otherwise be lost while the VAD's `minSpeechDuration` window
4336
+ * accumulated and fired `speech_start`. Each frame is 20 ms × 32 bytes
4337
+ * (16 kHz × 16-bit mono) ≈ 640 bytes.
4338
+ *
4339
+ * Capped to ``INBOUND_AUDIO_RING_FRAMES`` to recover only the
4340
+ * VAD-missed leading edge of the user's speech (default 250 ms,
4341
+ * matching SileroVAD ``minSpeechDuration``). Earlier values up to
4342
+ * 600 ms were including ~350 ms of pre-speech silence/agent-bleed in
4343
+ * the replay; on PSTN (where AEC is a no-op) Deepgram trained on
4344
+ * English happily transcribes that bleed as English garbage
4345
+ * (``"The same as Edgar,"``, ``"Permadees."``) and commits it to
4346
+ * the LLM as a phantom user transcript. See BUGS.md 2026-05-05
4347
+ * post-barge-in bleed-transcription entry.
4348
+ */
4349
+ inboundAudioRing = [];
4350
+ static INBOUND_AUDIO_RING_FRAMES = 13;
4351
+ /**
4352
+ * Cached LLM provider tag used by speech-event payloads. Mirrors the
4353
+ * value passed to the metrics accumulator at construction time so the
4354
+ * speech-edge events report the same provider classification as
4355
+ * dashboard / pricing rows.
4356
+ */
4357
+ llmProviderTag = "openai";
3997
4358
  /** Set to true after a VAD error to suppress log spam for the rest of the call. */
3998
4359
  vadDisabled = false;
4360
+ /**
4361
+ * Auto-loaded SileroVAD when ``agent.vad`` is undefined. Populated by
4362
+ * ``initPipeline`` and queried alongside ``agent.vad`` on every audio frame.
4363
+ * Stays null when ``onnxruntime-node`` is not installed — the pipeline
4364
+ * then falls back to the STT-endpoint heuristic (legacy behaviour).
4365
+ */
4366
+ autoVad = null;
4367
+ /**
4368
+ * Acoustic echo canceller (NLMS adaptive filter). Lazily instantiated in
4369
+ * ``initPipeline`` when ``agent.echoCancellation`` is true. ``null``
4370
+ * otherwise — the mic path stays a pure pass-through for handset /
4371
+ * headset deployments that don't have TTS bleed.
4372
+ */
4373
+ aec = null;
3999
4374
  /**
4000
4375
  * Monotonic counter incremented on every TTS-start. The grace timer
4001
4376
  * scheduled by ``endSpeakingWithGrace`` only flips ``isSpeaking=false``
@@ -4004,20 +4379,97 @@ var StreamHandler = class {
4004
4379
  * own ``isSpeaking=true``.
4005
4380
  */
4006
4381
  speakingGeneration = 0;
4382
+ /**
4383
+ * Wall-clock timestamp (ms since epoch) when the current TTS turn
4384
+ * started — captured by ``beginSpeaking`` and cleared by
4385
+ * ``cancelSpeaking`` / the grace flip. Used to gate barge-in: we
4386
+ * suppress the cancel for the first
4387
+ * ``MIN_AGENT_SPEAKING_MS_BEFORE_BARGE_IN_AEC`` of every turn (when AEC
4388
+ * is on) so the AEC filter has time to converge — otherwise residual
4389
+ * TTS bleed in the mic stream looks like user speech to VAD and
4390
+ * triggers an immediate self-cancellation of the agent's first
4391
+ * sentence.
4392
+ */
4393
+ speakingStartedAt = null;
4394
+ /**
4395
+ * Minimum wall-clock duration (ms) the agent must have been speaking
4396
+ * before barge-in is allowed to fire when AEC is active. Covers the
4397
+ * AEC warmup window (~500 ms) plus a safety margin so residual bleed
4398
+ * during the convergence period does not self-trigger barge-in.
4399
+ */
4400
+ static MIN_AGENT_SPEAKING_MS_BEFORE_BARGE_IN_AEC = 1e3;
4401
+ /**
4402
+ * Same as the AEC variant but for deployments where AEC is OFF
4403
+ * (default on PSTN — Twilio/Telnyx). Without an adaptive filter to
4404
+ * converge, the only justification for a gate is anti-flicker on
4405
+ * micro-events (cough, click). A short 250 ms window keeps real-user
4406
+ * barge-in responsive while still filtering tiny noise spikes.
4407
+ */
4408
+ static MIN_AGENT_SPEAKING_MS_BEFORE_BARGE_IN_NO_AEC = 250;
4007
4409
  /** Handle for the pending grace-period timer, so it can be cleared on cleanup. */
4008
4410
  graceTimer = null;
4009
- /** Mark the start of a TTS span. Use instead of setting isSpeaking directly. */
4010
- beginSpeaking() {
4411
+ /**
4412
+ * AbortController for the current LLM streaming consumption. Aborted by
4413
+ * ``cancelSpeaking`` so the in-flight LLM stream stops generating tokens
4414
+ * we will never speak — saves provider cost and frees the connection
4415
+ * earlier. Mirrors Python ``_llm_cancel_event``.
4416
+ */
4417
+ llmAbort = null;
4418
+ /**
4419
+ * Wall-clock timestamp of the most recent ``cancelSpeaking`` call, or
4420
+ * ``null`` if no cancel has fired since the call started. Used by
4421
+ * ``beginSpeaking`` to enforce a short post-cancel drain window so the
4422
+ * remote PSTN player finishes flushing the previous turn's in-flight
4423
+ * audio before the next TTS chunk lands on top of it. Without this,
4424
+ * the first sentence of a post-barge-in turn audibly overlaps with
4425
+ * the tail of the cancelled turn (~50-200 ms of doubled audio).
4426
+ */
4427
+ lastCancelAt = null;
4428
+ /**
4429
+ * Minimum drain window (ms) between a ``cancelSpeaking`` and the next
4430
+ * ``beginSpeaking``. 150 ms covers a typical PSTN jitter buffer drain
4431
+ * + Twilio Media Stream clear propagation. Lower values risk audio
4432
+ * overlap on the first chunk; higher values increase the perceived
4433
+ * "agent ack" latency after a barge-in. 150 ms is the smallest value
4434
+ * that consistently eliminated the overlap during 0.6.0 acceptance.
4435
+ */
4436
+ static POST_CANCEL_DRAIN_MS = 150;
4437
+ /**
4438
+ * Mark the start of a TTS span. Use instead of setting isSpeaking
4439
+ * directly. Awaits the post-cancel drain window before flipping state
4440
+ * so the remote player has time to flush the cancelled turn's tail.
4441
+ */
4442
+ async beginSpeaking() {
4443
+ if (this.lastCancelAt !== null) {
4444
+ const elapsed = Date.now() - this.lastCancelAt;
4445
+ const remaining = _StreamHandler.POST_CANCEL_DRAIN_MS - elapsed;
4446
+ if (remaining > 0) {
4447
+ await new Promise((r) => setTimeout(r, remaining));
4448
+ }
4449
+ }
4011
4450
  this.speakingGeneration++;
4012
4451
  this.isSpeaking = true;
4452
+ this.speakingStartedAt = Date.now();
4453
+ this.inboundAudioRing = [];
4013
4454
  }
4014
4455
  /**
4015
4456
  * Atomically end speaking AND invalidate any pending grace timer.
4016
4457
  * Use instead of ``this.isSpeaking = false`` at barge-in sites.
4458
+ *
4459
+ * Also aborts the in-flight LLM stream (if any) so the provider stops
4460
+ * billing tokens we will never speak.
4017
4461
  */
4018
4462
  cancelSpeaking() {
4019
4463
  this.speakingGeneration++;
4020
4464
  this.isSpeaking = false;
4465
+ this.speakingStartedAt = null;
4466
+ this.lastCancelAt = Date.now();
4467
+ if (this.llmAbort !== null) {
4468
+ try {
4469
+ this.llmAbort.abort();
4470
+ } catch {
4471
+ }
4472
+ }
4021
4473
  }
4022
4474
  /** Cancel and clear the pending grace timer, if any. */
4023
4475
  clearGraceTimer() {
@@ -4040,18 +4492,102 @@ var StreamHandler = class {
4040
4492
  this.clearGraceTimer();
4041
4493
  this.graceTimer = setTimeout(() => {
4042
4494
  this.graceTimer = null;
4043
- if (this.speakingGeneration === gen) this.isSpeaking = false;
4495
+ if (this.speakingGeneration === gen) {
4496
+ this.isSpeaking = false;
4497
+ this.speakingStartedAt = null;
4498
+ }
4044
4499
  }, grace);
4045
4500
  } else {
4046
4501
  this.isSpeaking = false;
4502
+ this.speakingStartedAt = null;
4047
4503
  }
4048
4504
  }
4505
+ /**
4506
+ * Whether barge-in is allowed to fire right now. Gate length depends
4507
+ * on whether AEC is active: 1 s with AEC (covers filter warmup),
4508
+ * 250 ms without (anti-flicker only — keeps PSTN barge-in responsive).
4509
+ */
4510
+ canBargeIn() {
4511
+ if (this.speakingStartedAt === null) return true;
4512
+ const elapsed = Date.now() - this.speakingStartedAt;
4513
+ const gate = this.aec ? _StreamHandler.MIN_AGENT_SPEAKING_MS_BEFORE_BARGE_IN_AEC : _StreamHandler.MIN_AGENT_SPEAKING_MS_BEFORE_BARGE_IN_NO_AEC;
4514
+ return elapsed >= gate;
4515
+ }
4516
+ /**
4517
+ * Replay the audio captured by the self-hearing guard right before a
4518
+ * confirmed barge-in. VAD's ``minSpeechDuration`` window (default
4519
+ * 250 ms) means ``speech_start`` fires only AFTER the user has been
4520
+ * talking for that long; without this replay STT sees only the tail
4521
+ * of the user's interruption and produces "the line is breaking up"
4522
+ * partial transcripts. We deliberately do NOT call this on natural
4523
+ * turn end — see the comment in ``endSpeakingWithGrace`` for why.
4524
+ */
4525
+ flushInboundAudioRing() {
4526
+ if (!this.stt || this.inboundAudioRing.length === 0) return;
4527
+ const replayed = this.inboundAudioRing.length;
4528
+ for (const buf of this.inboundAudioRing) {
4529
+ try {
4530
+ this.stt.sendAudio(buf);
4531
+ } catch (err) {
4532
+ getLogger().debug(`sendAudio replay failed: ${String(err)}`);
4533
+ }
4534
+ }
4535
+ this.inboundAudioRing = [];
4536
+ getLogger().info(
4537
+ `[DIAG] Flushed ${replayed} pre-barge-in frame(s) (~${replayed * 20} ms) to STT`
4538
+ );
4539
+ }
4049
4540
  llmLoop = null;
4541
+ /**
4542
+ * Per-call tool executor — provides retry-with-exponential-backoff and a
4543
+ * per-tool circuit breaker for Realtime function calls. Pipeline mode
4544
+ * uses its own executor inside ``LLMLoop``; this one is dedicated to
4545
+ * the Realtime path so a flaky downstream (DB outage, vendor rate
4546
+ * limit) returns a structured ``{ error, fallback: true }`` instead of
4547
+ * hanging the model on retries that will keep failing.
4548
+ */
4549
+ toolExecutor = new DefaultToolExecutor();
4550
+ /**
4551
+ * MCP server connection manager — populated lazily in
4552
+ * ``initMcpTools()`` when the agent declares ``mcpServers``. Holds
4553
+ * the open MCP client connections for the lifetime of the call so
4554
+ * we can dispatch ``tools/call`` without re-handshaking on every
4555
+ * function invocation. Cleared in ``fireCallEnd``.
4556
+ */
4557
+ mcpManager = null;
4050
4558
  chunkCount = 0;
4051
4559
  callEndFired = false;
4052
4560
  sttClosed = false;
4053
4561
  currentAgentText = "";
4054
4562
  responseAudioStarted = false;
4563
+ /**
4564
+ * Realtime turn ordering buffer. OpenAI Realtime emits
4565
+ * `input_audio_transcription.completed` (user transcript) AFTER
4566
+ * `response.done` (assistant complete) because Whisper transcription
4567
+ * runs in parallel with — and slower than — model response. Without
4568
+ * this buffer the pushed `history` order is [assistant, user, ...]
4569
+ * which renders out-of-order in the dashboard.
4570
+ *
4571
+ * Behaviour:
4572
+ * - `onAdapterSpeechStopped` flips `userTranscriptPending = true`
4573
+ * - `onAdapterResponseDone` checks the flag; if set, stashes the
4574
+ * assistant text + a fallback timer
4575
+ * - `onAdapterTranscriptInput` clears the flag, pushes user, then
4576
+ * flushes any pending assistant turn
4577
+ * - The fallback timer flushes the assistant alone if the user
4578
+ * transcript never arrives (silence misclassified as speech, etc.)
4579
+ */
4580
+ userTranscriptPending = false;
4581
+ pendingAssistantTurn = null;
4582
+ pendingAssistantTimer = null;
4583
+ /**
4584
+ * Hard cap on how long we wait for the user transcript before flushing
4585
+ * the buffered assistant turn alone. 3 s covers OpenAI Whisper's typical
4586
+ * 200-800 ms post-response delay with substantial headroom for slow
4587
+ * cellular audio uploads. Beyond this we accept the order will look
4588
+ * "assistant-only" rather than block the call's transcript display.
4589
+ */
4590
+ static REALTIME_USER_TRANSCRIPT_WAIT_MS = 3e3;
4055
4591
  maxDurationTimer = null;
4056
4592
  transcriptProcessing = false;
4057
4593
  transcriptQueue = [];
@@ -4080,9 +4616,12 @@ var StreamHandler = class {
4080
4616
  this.history = createHistoryManager(200);
4081
4617
  const sttKey = deps.agent.stt?.constructor?.providerKey;
4082
4618
  const sttProviderName = deps.agent.stt ? sttKey ?? deps.agent.stt.constructor?.name ?? "custom" : void 0;
4619
+ const sttModelName = String((deps.agent.stt?.model ?? "") || "");
4083
4620
  const ttsKey = deps.agent.tts?.constructor?.providerKey;
4084
4621
  const ttsProviderName = deps.agent.tts ? ttsKey ?? deps.agent.tts.constructor?.name ?? "custom" : void 0;
4622
+ const ttsModelName = String((deps.agent.tts?.model ?? "") || "");
4085
4623
  const providerMode = deps.agent.provider ?? "openai_realtime";
4624
+ const realtimeModelName = providerMode === "openai_realtime" ? String((deps.agent.model ?? "") || "") || "gpt-realtime-mini" : "";
4086
4625
  const llmKey = deps.agent.llm?.constructor?.providerKey;
4087
4626
  let llmProviderName;
4088
4627
  if (deps.agent.llm) {
@@ -4095,6 +4634,7 @@ var StreamHandler = class {
4095
4634
  } else {
4096
4635
  llmProviderName = providerMode === "openai_realtime" ? "openai_realtime" : "openai";
4097
4636
  }
4637
+ this.llmProviderTag = llmProviderName;
4098
4638
  this._eventBus = new EventBus();
4099
4639
  this.metricsAcc = new CallMetricsAccumulator({
4100
4640
  callId: "",
@@ -4103,6 +4643,9 @@ var StreamHandler = class {
4103
4643
  sttProvider: sttProviderName,
4104
4644
  ttsProvider: ttsProviderName,
4105
4645
  llmProvider: llmProviderName,
4646
+ sttModel: sttModelName,
4647
+ ttsModel: ttsModelName,
4648
+ realtimeModel: realtimeModelName,
4106
4649
  pricing: deps.pricing,
4107
4650
  eventBus: this._eventBus,
4108
4651
  reportOnlyInitialTtfb: deps.reportOnlyInitialTtfb ?? false
@@ -4213,6 +4756,7 @@ var StreamHandler = class {
4213
4756
  * @param callId Call SID (Twilio) or call_control_id (Telnyx)
4214
4757
  * @param customParams TwiML custom parameters (Twilio only, empty for Telnyx)
4215
4758
  */
4759
+ /** Initialize per-call state, build the AI adapter, and dispatch the `onCallStart` callback. */
4216
4760
  async handleCallStart(callId, customParams = {}) {
4217
4761
  this.callId = callId;
4218
4762
  this.metricsAcc.callId = callId;
@@ -4239,7 +4783,7 @@ var StreamHandler = class {
4239
4783
  }
4240
4784
  }, MAX_CALL_DURATION_MS);
4241
4785
  try {
4242
- const { notifyDashboard } = await import("./persistence-LQBYQPQQ.mjs");
4786
+ const { notifyDashboard } = await import("./persistence-LVIAHESK.mjs");
4243
4787
  notifyDashboard({
4244
4788
  call_id: callId,
4245
4789
  caller: this.caller,
@@ -4264,25 +4808,58 @@ var StreamHandler = class {
4264
4808
  const allVars = { ...agentVars, ...safeCustomParams };
4265
4809
  const resolvedPrompt = Object.keys(allVars).length > 0 ? this.deps.resolveVariables(this.deps.agent.systemPrompt, allVars) : this.deps.agent.systemPrompt;
4266
4810
  const provider2 = this.deps.agent.provider ?? "openai_realtime";
4811
+ await this.initMcpTools();
4267
4812
  if (provider2 === "pipeline") {
4268
4813
  await this.initPipeline(resolvedPrompt);
4269
4814
  } else {
4270
4815
  await this.initRealtimeAdapter(resolvedPrompt);
4271
4816
  }
4272
4817
  }
4818
+ /**
4819
+ * Connect to every configured MCP server, discover their tools via
4820
+ * ``tools/list``, and merge them into ``agent.tools`` before the
4821
+ * adapter is built. The synthetic handlers dispatch back through the
4822
+ * MCP client so ``DefaultToolExecutor`` can invoke them like any
4823
+ * other handler-tool. No-op when ``agent.mcpServers`` is empty or the
4824
+ * optional ``@modelcontextprotocol/sdk`` is not installed.
4825
+ */
4826
+ async initMcpTools() {
4827
+ const servers = this.deps.agent.mcpServers;
4828
+ if (!servers || servers.length === 0) return;
4829
+ this.mcpManager = new MCPManager(servers);
4830
+ let discovered;
4831
+ try {
4832
+ discovered = await this.mcpManager.connect();
4833
+ } catch (e) {
4834
+ getLogger().error(`MCP connect failed (continuing without MCP tools): ${String(e)}`);
4835
+ this.mcpManager = null;
4836
+ return;
4837
+ }
4838
+ if (discovered.length === 0) return;
4839
+ MCPManager.assertNoConflicts(this.deps.agent.tools, discovered);
4840
+ const mutableAgent = this.deps.agent;
4841
+ mutableAgent.tools = [...mutableAgent.tools ?? [], ...discovered];
4842
+ getLogger().info(`MCP: merged ${discovered.length} tool(s) into agent`);
4843
+ }
4273
4844
  /** Set the stream SID (Twilio only, called after parsing 'start' event). */
4845
+ /** Set the carrier-side stream id (Twilio `streamSid` / Telnyx stream identifier). */
4274
4846
  setStreamSid(sid) {
4275
4847
  this.streamSid = sid;
4276
4848
  }
4277
4849
  /** Handle an incoming audio chunk (already decoded from base64). */
4850
+ /** Forward inbound audio bytes to the AI adapter and (in pipeline mode) the STT provider. */
4278
4851
  async handleAudio(audioBuffer) {
4279
4852
  const provider2 = this.deps.agent.provider ?? "openai_realtime";
4280
4853
  if (provider2 === "pipeline" && this.stt) {
4281
4854
  const pcm8k = mulawToPcm16(audioBuffer);
4282
- const pcm16k = this.inboundResampler.process(pcm8k);
4283
- if (this.deps.agent.vad && !this.vadDisabled) {
4855
+ let pcm16k = this.inboundResampler.process(pcm8k);
4856
+ if (this.aec) {
4857
+ pcm16k = this.aec.processNearEnd(pcm16k);
4858
+ }
4859
+ const activeVad = this.deps.agent.vad ?? this.autoVad;
4860
+ if (activeVad && !this.vadDisabled) {
4284
4861
  try {
4285
- const vadPromise = this.deps.agent.vad.processFrame(pcm16k, 16e3);
4862
+ const vadPromise = activeVad.processFrame(pcm16k, 16e3);
4286
4863
  const timeoutPromise = new Promise((resolve) => setTimeout(() => resolve(null), 25));
4287
4864
  const evt = await Promise.race([vadPromise, timeoutPromise]);
4288
4865
  if (evt) {
@@ -4291,7 +4868,11 @@ var StreamHandler = class {
4291
4868
  );
4292
4869
  }
4293
4870
  if (evt?.type === "speech_start") {
4294
- if (this.isSpeaking) {
4871
+ if (this.isSpeaking && !this.canBargeIn()) {
4872
+ getLogger().info(
4873
+ `[VAD] speech_start suppressed (agent speaking < gate, aec=${this.aec ? "on" : "off"})`
4874
+ );
4875
+ } else if (this.isSpeaking) {
4295
4876
  getLogger().info("[VAD] speech_start during TTS \u2192 BARGE-IN");
4296
4877
  this.metricsAcc.recordOverlapStart();
4297
4878
  this.metricsAcc.recordBargeinDetected();
@@ -4303,6 +4884,7 @@ var StreamHandler = class {
4303
4884
  } catch (err) {
4304
4885
  getLogger().debug(`sendClear during VAD barge-in failed: ${String(err)}`);
4305
4886
  }
4887
+ this.flushInboundAudioRing();
4306
4888
  this.metricsAcc.recordTtsStopped();
4307
4889
  this.metricsAcc.recordTurnInterrupted();
4308
4890
  this.metricsAcc.recordOverlapEnd(true);
@@ -4316,6 +4898,16 @@ var StreamHandler = class {
4316
4898
  this.metricsAcc.startTurnIfIdle();
4317
4899
  } else if (evt?.type === "speech_end") {
4318
4900
  this.metricsAcc.recordVadStop();
4901
+ try {
4902
+ const ret = this.stt?.finalize?.();
4903
+ if (ret instanceof Promise) {
4904
+ ret.catch(
4905
+ (err) => getLogger().debug(`STT finalize threw: ${String(err)}`)
4906
+ );
4907
+ }
4908
+ } catch (err) {
4909
+ getLogger().debug(`STT finalize threw: ${String(err)}`);
4910
+ }
4319
4911
  }
4320
4912
  } catch (err) {
4321
4913
  this.vadDisabled = true;
@@ -4323,7 +4915,13 @@ var StreamHandler = class {
4323
4915
  }
4324
4916
  }
4325
4917
  if (this.isSpeaking) {
4326
- if (this.deps.agent.vad) return;
4918
+ if (this.deps.agent.vad ?? this.autoVad) {
4919
+ this.inboundAudioRing.push(pcm16k);
4920
+ if (this.inboundAudioRing.length > _StreamHandler.INBOUND_AUDIO_RING_FRAMES) {
4921
+ this.inboundAudioRing.shift();
4922
+ }
4923
+ return;
4924
+ }
4327
4925
  if ((this.deps.agent.bargeInThresholdMs ?? 300) === 0) return;
4328
4926
  }
4329
4927
  const hooks = this.deps.agent.hooks;
@@ -4349,6 +4947,7 @@ var StreamHandler = class {
4349
4947
  }
4350
4948
  }
4351
4949
  /** Handle a DTMF keypress event (Twilio only). */
4950
+ /** Handle an inbound DTMF tone from the caller. */
4352
4951
  async handleDtmf(digit) {
4353
4952
  getLogger().debug(`DTMF: ${digit}`);
4354
4953
  if (this.adapter instanceof OpenAIRealtimeAdapter) {
@@ -4371,12 +4970,14 @@ var StreamHandler = class {
4371
4970
  * ``twilio_handler.py``: ``audio_sender.on_mark_confirmed(mark_name)`` +
4372
4971
  * ``handler.on_mark(mark_name)``.
4373
4972
  */
4973
+ /** Handle a Twilio Media Streams `mark` event acknowledging audio playback boundaries. */
4374
4974
  async onMark(markName) {
4375
4975
  if (markName) {
4376
4976
  this.lastConfirmedMark = markName;
4377
4977
  }
4378
4978
  }
4379
4979
  /** Handle call stop / stream end. */
4980
+ /** Handle a carrier-emitted `stop` event signalling the call has ended. */
4380
4981
  async handleStop() {
4381
4982
  this.clearGraceTimer();
4382
4983
  this.flushResamplers();
@@ -4388,6 +4989,7 @@ var StreamHandler = class {
4388
4989
  await this.fireCallEnd();
4389
4990
  }
4390
4991
  /** Handle WebSocket close event. */
4992
+ /** Tear down adapter, STT/TTS, and per-call state when the carrier WebSocket closes. */
4391
4993
  async handleWsClose() {
4392
4994
  this.clearGraceTimer();
4393
4995
  this.flushResamplers();
@@ -4422,7 +5024,7 @@ var StreamHandler = class {
4422
5024
  * (server.ts) requests ``stream_bidirectional_codec=PCMU`` at 8 kHz. So
4423
5025
  * the wire format for both providers is mulaw 8 kHz; we resample 16 kHz
4424
5026
  * PCM16 → 8 kHz then encode to mulaw. Mirrors the Python pipeline path
4425
- * (sdk-py/getpatter/handlers/telnyx_handler.py::TelnyxAudioSender).
5027
+ * (libraries/python/getpatter/handlers/telnyx_handler.py::TelnyxAudioSender).
4426
5028
  *
4427
5029
  * Maintains a 1-byte carry across calls so unaligned HTTP chunks from
4428
5030
  * streaming TTS providers never byte-swap the PCM16 samples downstream.
@@ -4451,12 +5053,61 @@ var StreamHandler = class {
4451
5053
  const label = this.deps.bridge.label;
4452
5054
  this.stt = await this.deps.bridge.createStt(this.deps.agent);
4453
5055
  this.tts = await createTTS(this.deps.agent);
5056
+ if (this.tts) {
5057
+ const carrierAware = this.tts;
5058
+ if (typeof carrierAware.setTelephonyCarrier === "function") {
5059
+ try {
5060
+ carrierAware.setTelephonyCarrier(this.deps.bridge.telephonyProvider);
5061
+ } catch (e) {
5062
+ getLogger().debug(`TTS setTelephonyCarrier failed (${label}): ${String(e)}`);
5063
+ }
5064
+ }
5065
+ }
4454
5066
  if (!this.stt) {
4455
5067
  getLogger().debug(`Pipeline mode (${label}): no STT configured`);
4456
5068
  }
4457
5069
  if (!this.tts) {
4458
5070
  getLogger().debug(`Pipeline mode (${label}): no TTS configured`);
4459
5071
  }
5072
+ if (!this.deps.agent.vad) {
5073
+ try {
5074
+ const { SileroVAD } = await import("./silero-vad-YLCXT5GQ.mjs");
5075
+ this.autoVad = await SileroVAD.forPhoneCall();
5076
+ getLogger().info(
5077
+ `auto-VAD enabled (SileroVAD, phone preset). Pass agent.vad=\u2026 to override.`
5078
+ );
5079
+ } catch (e) {
5080
+ const msg = e?.message ?? String(e);
5081
+ if (/Cannot find module|onnxruntime-node/i.test(msg)) {
5082
+ getLogger().info(
5083
+ "auto-VAD unavailable: onnxruntime-node not installed. Run `npm install onnxruntime-node@~1.18.0` for fast barge-in."
5084
+ );
5085
+ } else {
5086
+ getLogger().warn(
5087
+ `auto-VAD load failed (${msg}); falling back to STT-endpoint heuristic`
5088
+ );
5089
+ }
5090
+ }
5091
+ }
5092
+ if (this.deps.agent.echoCancellation) {
5093
+ const carrier = this.deps.bridge.telephonyProvider;
5094
+ if (carrier === "twilio" || carrier === "telnyx") {
5095
+ getLogger().warn(
5096
+ `echoCancellation: true on ${carrier} (PSTN). Server-side NLMS cannot model PSTN's ~250\u20131500 ms round-trip echo with a 32 ms filter window \u2014 it will silently no-op. Best practice: keep echoCancellation: false; rely on the carrier + caller device's built-in echo suppression and Patter's self-hearing guard. Enable AEC only for browser/native deployments where the SDK owns the audio path end-to-end.`
5097
+ );
5098
+ }
5099
+ try {
5100
+ const { NlmsEchoCanceller } = await import("./aec-PJJMUM5E.mjs");
5101
+ this.aec = new NlmsEchoCanceller({ sampleRate: 16e3 });
5102
+ getLogger().info(
5103
+ "echo cancellation enabled (NLMS, 512 taps + 0.5 s warmup \u03BC=0.5); filter converges within ~250 ms of TTS playback in low-latency loops."
5104
+ );
5105
+ } catch (e) {
5106
+ getLogger().warn(
5107
+ `echo cancellation requested but failed to load: ${String(e)}; falling back to pass-through.`
5108
+ );
5109
+ }
5110
+ }
4460
5111
  try {
4461
5112
  if (this.stt) await this.stt.connect();
4462
5113
  getLogger().debug(`Pipeline mode (${label}): STT + TTS connected`);
@@ -4470,13 +5121,19 @@ var StreamHandler = class {
4470
5121
  }
4471
5122
  if (this.deps.agent.firstMessage && !this.deps.onMessage && this.tts) {
4472
5123
  this.metricsAcc.startTurn();
5124
+ await this.beginSpeaking();
4473
5125
  let firstChunkSent = false;
4474
5126
  this.resetTtsCarry();
4475
5127
  try {
4476
5128
  for await (const chunk of this.tts.synthesizeStream(this.deps.agent.firstMessage)) {
5129
+ if (!this.isSpeaking) break;
4477
5130
  if (!firstChunkSent) {
4478
5131
  firstChunkSent = true;
4479
5132
  this.metricsAcc.recordTtsFirstByte();
5133
+ await this.emitAudioOut();
5134
+ }
5135
+ if (this.aec) {
5136
+ this.aec.pushFarEnd(chunk);
4480
5137
  }
4481
5138
  const encoded = this.encodePipelineAudio(chunk);
4482
5139
  this.deps.bridge.sendAudio(this.ws, encoded, this.streamSid);
@@ -4485,6 +5142,7 @@ var StreamHandler = class {
4485
5142
  getLogger().error(`First message TTS error (${label}):`, e);
4486
5143
  } finally {
4487
5144
  this.resetTtsCarry();
5145
+ this.endSpeakingWithGrace();
4488
5146
  }
4489
5147
  if (firstChunkSent) {
4490
5148
  await this.emitTurnMetrics(this.metricsAcc.recordTurnComplete(this.deps.agent.firstMessage));
@@ -4505,9 +5163,11 @@ var StreamHandler = class {
4505
5163
  // propagate so calculateLlmCost can match the price row
4506
5164
  resolvedPrompt,
4507
5165
  this.deps.agent.tools,
4508
- this.deps.agent.llm
5166
+ this.deps.agent.llm,
5167
+ this.deps.agent.disablePhonePreamble ?? false
4509
5168
  );
4510
5169
  this.llmLoop.setEventBus(this._eventBus);
5170
+ this.llmLoop.setOnToolCall((n, a, r) => this.recordToolCall(n, a, r));
4511
5171
  const llmLabel = this.deps.agent.llm.constructor?.name ?? "custom";
4512
5172
  getLogger().debug(`Built-in LLM loop active (pipeline, ${label}, llm=${llmLabel})`);
4513
5173
  } else if (!this.deps.onMessage && this.deps.config.openaiKey) {
@@ -4517,9 +5177,12 @@ var StreamHandler = class {
4517
5177
  this.deps.config.openaiKey,
4518
5178
  llmModel,
4519
5179
  resolvedPrompt,
4520
- this.deps.agent.tools
5180
+ this.deps.agent.tools,
5181
+ void 0,
5182
+ this.deps.agent.disablePhonePreamble ?? false
4521
5183
  );
4522
5184
  this.llmLoop.setEventBus(this._eventBus);
5185
+ this.llmLoop.setOnToolCall((n, a, r) => this.recordToolCall(n, a, r));
4523
5186
  getLogger().debug(`Built-in LLM loop active (pipeline, ${label})`);
4524
5187
  }
4525
5188
  if (this.stt) {
@@ -4555,9 +5218,14 @@ var StreamHandler = class {
4555
5218
  if (!this.isSpeaking) break;
4556
5219
  const processedAudio = await hookExecutor.runAfterSynthesize(chunk, processedText, hookCtx);
4557
5220
  if (processedAudio === null) continue;
5221
+ if (!this.isSpeaking) break;
4558
5222
  if (!ttsFirstByteSent.value) {
4559
5223
  ttsFirstByteSent.value = true;
4560
5224
  this.metricsAcc.recordTtsFirstByte();
5225
+ await this.emitAudioOut();
5226
+ }
5227
+ if (this.aec) {
5228
+ this.aec.pushFarEnd(processedAudio);
4561
5229
  }
4562
5230
  const encoded = this.encodePipelineAudio(processedAudio);
4563
5231
  this.deps.bridge.sendAudio(this.ws, encoded, this.streamSid);
@@ -4583,6 +5251,9 @@ var StreamHandler = class {
4583
5251
  }
4584
5252
  }
4585
5253
  async processTranscript(transcript) {
5254
+ getLogger().info(
5255
+ `[DIAG] processTranscript text=${JSON.stringify((transcript.text ?? "").slice(0, 60))} isFinal=${transcript.isFinal} speechFinal=${transcript.speechFinal} isSpeaking=${this.isSpeaking}`
5256
+ );
4586
5257
  let interrupted = this.handleBargeIn(transcript);
4587
5258
  if (transcript.text) {
4588
5259
  this.metricsAcc.startTurnIfIdle();
@@ -4593,6 +5264,9 @@ var StreamHandler = class {
4593
5264
  if (!transcript.isFinal || !transcript.text) return;
4594
5265
  if (!this.commitTranscript(transcript.text)) return;
4595
5266
  const label = this.deps.bridge.label;
5267
+ getLogger().info(
5268
+ `[DIAG] processTranscript COMMITTED \u2192 LLM (${label} pipeline): ${sanitizeLogValue(transcript.text.slice(0, 80))}`
5269
+ );
4596
5270
  getLogger().debug(`User (${label} pipeline): ${sanitizeLogValue(transcript.text)}`);
4597
5271
  this.metricsAcc.startTurnIfIdle();
4598
5272
  this.metricsAcc.recordSttComplete(transcript.text);
@@ -4672,7 +5346,7 @@ var StreamHandler = class {
4672
5346
  }
4673
5347
  if (!responseText) return;
4674
5348
  if (this.llmLoop) {
4675
- this.history.push({ role: "assistant", text: responseText, timestamp: Date.now() });
5349
+ await this.emitAssistantTranscript(responseText);
4676
5350
  this.metricsAcc.recordTtsComplete(responseText);
4677
5351
  } else {
4678
5352
  interrupted = await this.runRegularLlm(responseText, hookExecutor, hookCtx) || interrupted;
@@ -4690,6 +5364,12 @@ var StreamHandler = class {
4690
5364
  */
4691
5365
  handleBargeIn(transcript) {
4692
5366
  if (!transcript.text || !this.isSpeaking) return false;
5367
+ if (!this.canBargeIn()) {
5368
+ getLogger().info(
5369
+ `Barge-in transcript suppressed (agent speaking < gate, aec=${this.aec ? "on" : "off"})`
5370
+ );
5371
+ return false;
5372
+ }
4693
5373
  getLogger().debug(
4694
5374
  `Barge-in: caller spoke over agent (${sanitizeLogValue(transcript.text.slice(0, 40))})`
4695
5375
  );
@@ -4755,16 +5435,26 @@ var StreamHandler = class {
4755
5435
  async runPipelineLlm(filteredTranscript, hookExecutor, hookCtx) {
4756
5436
  const label = this.deps.bridge.label;
4757
5437
  const callCtx = { call_id: this.callId, caller: this.caller, callee: this.callee };
4758
- const chunker = new SentenceChunker();
5438
+ const chunker = new SentenceChunker({
5439
+ aggressiveFirstFlush: this.deps.agent.aggressiveFirstFlush ?? false,
5440
+ language: this.deps.agent.language
5441
+ });
4759
5442
  const allParts = [];
4760
5443
  const ttsFirstByteSent = { value: false };
4761
- this.beginSpeaking();
5444
+ await this.beginSpeaking();
5445
+ this.llmAbort = new AbortController();
5446
+ const llmSignal = this.llmAbort.signal;
4762
5447
  let llmError = false;
4763
5448
  const llmSpan = startSpan(SPAN_LLM, { "patter.call.id": this.callId });
4764
5449
  const guardAndSpeak = async (sentence, isFirst) => {
4765
5450
  if (isFirst) this.metricsAcc.recordLlmFirstSentenceComplete();
4766
5451
  const guard = checkGuardrails(sentence, this.deps.agent.guardrails);
4767
- const sentenceText = guard ? guard.replacement ?? "I'm sorry, I can't respond to that." : sentence;
5452
+ let sentenceText = guard ? guard.replacement ?? "I'm sorry, I can't respond to that." : sentence;
5453
+ if (hookExecutor.hasAfterLlmSentence()) {
5454
+ const transformed = await hookExecutor.runAfterLlmSentence(sentenceText, hookCtx);
5455
+ if (transformed === null) return;
5456
+ sentenceText = transformed;
5457
+ }
4768
5458
  await this.synthesizeSentence(sentenceText, hookExecutor, hookCtx, ttsFirstByteSent);
4769
5459
  };
4770
5460
  let firstSentenceEmitted = false;
@@ -4776,22 +5466,28 @@ var StreamHandler = class {
4776
5466
  callCtx,
4777
5467
  this.metricsAcc,
4778
5468
  hookExecutor,
4779
- hookCtx
5469
+ hookCtx,
5470
+ { signal: llmSignal }
4780
5471
  )) {
5472
+ if (llmSignal.aborted) break;
4781
5473
  this.metricsAcc.recordLlmFirstToken();
5474
+ await this.emitLlmFirstToken();
4782
5475
  allParts.push(token);
4783
5476
  for (const sentence of chunker.push(token)) {
4784
5477
  if (!this.isSpeaking) break;
4785
5478
  await guardAndSpeak(sentence, !firstSentenceEmitted);
4786
5479
  firstSentenceEmitted = true;
4787
5480
  }
4788
- if (!this.isSpeaking) break;
5481
+ if (!this.isSpeaking || llmSignal.aborted) break;
4789
5482
  }
4790
5483
  } catch (e) {
4791
- llmError = true;
4792
- chunker.reset();
4793
- getLogger().error(`LLM loop error (${label}):`, e);
4794
- this.metricsAcc.recordTurnInterrupted();
5484
+ const isAbort = e?.name === "AbortError" || llmSignal.aborted;
5485
+ if (!isAbort) {
5486
+ llmError = true;
5487
+ chunker.reset();
5488
+ getLogger().error(`LLM loop error (${label}):`, e);
5489
+ this.metricsAcc.recordTurnInterrupted();
5490
+ }
4795
5491
  }
4796
5492
  this.metricsAcc.recordLlmComplete();
4797
5493
  if (!llmError && this.isSpeaking) {
@@ -4803,6 +5499,7 @@ var StreamHandler = class {
4803
5499
  }
4804
5500
  } finally {
4805
5501
  this.endSpeakingWithGrace();
5502
+ this.llmAbort = null;
4806
5503
  try {
4807
5504
  llmSpan.end();
4808
5505
  } catch {
@@ -4823,11 +5520,11 @@ var StreamHandler = class {
4823
5520
  text = guard.replacement ?? "I'm sorry, I can't respond to that.";
4824
5521
  }
4825
5522
  this.metricsAcc.recordLlmComplete();
4826
- this.history.push({ role: "assistant", text, timestamp: Date.now() });
5523
+ await this.emitAssistantTranscript(text);
4827
5524
  const chunker = new SentenceChunker();
4828
5525
  const sentences = [...chunker.push(text), ...chunker.flush()];
4829
5526
  const ttsFirstByteSent = { value: false };
4830
- this.beginSpeaking();
5527
+ await this.beginSpeaking();
4831
5528
  let interrupted = false;
4832
5529
  try {
4833
5530
  for (const sentence of sentences) {
@@ -4835,7 +5532,13 @@ var StreamHandler = class {
4835
5532
  interrupted = true;
4836
5533
  break;
4837
5534
  }
4838
- await this.synthesizeSentence(sentence, hookExecutor, hookCtx, ttsFirstByteSent);
5535
+ let sentenceText = sentence;
5536
+ if (hookExecutor.hasAfterLlmSentence()) {
5537
+ const transformed = await hookExecutor.runAfterLlmSentence(sentenceText, hookCtx);
5538
+ if (transformed === null) continue;
5539
+ sentenceText = transformed;
5540
+ }
5541
+ await this.synthesizeSentence(sentenceText, hookExecutor, hookCtx, ttsFirstByteSent);
4839
5542
  }
4840
5543
  } finally {
4841
5544
  this.endSpeakingWithGrace();
@@ -4848,7 +5551,7 @@ var StreamHandler = class {
4848
5551
  const onMessage = this.deps.onMessage;
4849
5552
  const parts = [];
4850
5553
  this.metricsAcc.recordLlmComplete();
4851
- this.beginSpeaking();
5554
+ await this.beginSpeaking();
4852
5555
  let wsTtsStarted = false;
4853
5556
  try {
4854
5557
  for await (const chunk of this.deps.remoteHandler.callWebSocket(onMessage, msgData)) {
@@ -4860,6 +5563,7 @@ var StreamHandler = class {
4860
5563
  if (!wsTtsStarted) {
4861
5564
  wsTtsStarted = true;
4862
5565
  this.metricsAcc.recordTtsFirstByte();
5566
+ await this.emitAudioOut();
4863
5567
  }
4864
5568
  const encoded = this.encodePipelineAudio(audioChunk);
4865
5569
  this.deps.bridge.sendAudio(this.ws, encoded, this.streamSid);
@@ -4875,7 +5579,7 @@ var StreamHandler = class {
4875
5579
  const responseText = parts.join("");
4876
5580
  this.metricsAcc.recordTtsComplete(responseText);
4877
5581
  await this.emitTurnMetrics(this.metricsAcc.recordTurnComplete(responseText));
4878
- if (responseText) this.history.push({ role: "assistant", text: responseText, timestamp: Date.now() });
5582
+ if (responseText) await this.emitAssistantTranscript(responseText);
4879
5583
  }
4880
5584
  // ---------------------------------------------------------------------------
4881
5585
  // Private: OpenAI Realtime / ElevenLabs ConvAI mode
@@ -4897,7 +5601,8 @@ var StreamHandler = class {
4897
5601
  if (this.deps.agent.firstMessage) {
4898
5602
  this.metricsAcc.startTurn();
4899
5603
  if (this.adapter instanceof OpenAIRealtimeAdapter) {
4900
- await this.adapter.sendText(this.deps.agent.firstMessage);
5604
+ const sender = typeof this.adapter.sendFirstMessage === "function" ? this.adapter.sendFirstMessage.bind(this.adapter) : this.adapter.sendText.bind(this.adapter);
5605
+ await sender(this.deps.agent.firstMessage);
4901
5606
  }
4902
5607
  }
4903
5608
  this.adapter.onEvent(async (type, eventData) => {
@@ -4927,21 +5632,87 @@ var StreamHandler = class {
4927
5632
  }
4928
5633
  }
4929
5634
  };
5635
+ // ---- Speech-event helpers ------------------------------------------
5636
+ // No-op when the deps don't include a SpeechEvents dispatcher. Tracks
5637
+ // wall-clock for `speech_duration_ms` payloads.
5638
+ userSpeechStartMs = null;
5639
+ agentTurnStartMs = null;
5640
+ async emitUserSpeechStarted() {
5641
+ if (!this.deps.speechEvents) return;
5642
+ this.userSpeechStartMs = Date.now();
5643
+ await this.deps.speechEvents.fireUserSpeechStarted();
5644
+ }
5645
+ async emitUserSpeechEnded() {
5646
+ if (!this.deps.speechEvents) return;
5647
+ const duration = this.userSpeechStartMs !== null ? Math.max(0, Date.now() - this.userSpeechStartMs) : 0;
5648
+ this.userSpeechStartMs = null;
5649
+ await this.deps.speechEvents.fireUserSpeechEnded({
5650
+ speechDurationMs: duration
5651
+ });
5652
+ }
5653
+ async emitUserSpeechEos(transcriptSoFar) {
5654
+ if (!this.deps.speechEvents) return;
5655
+ await this.deps.speechEvents.fireUserSpeechEos({
5656
+ trigger: "vad_silence",
5657
+ transcriptSoFar
5658
+ });
5659
+ }
5660
+ async emitAgentSpeechStarted() {
5661
+ if (!this.deps.speechEvents) return;
5662
+ this.agentTurnStartMs = Date.now();
5663
+ const ttsKey = this.deps.agent.tts?.constructor?.providerKey;
5664
+ await this.deps.speechEvents.fireAgentSpeechStarted({
5665
+ ttsProvider: ttsKey,
5666
+ engine: this.deps.agent.provider ?? "openai_realtime"
5667
+ });
5668
+ }
5669
+ async emitAgentSpeechEnded(interrupted) {
5670
+ if (!this.deps.speechEvents) return;
5671
+ if (this.agentTurnStartMs === null) return;
5672
+ const duration = Math.max(0, Date.now() - this.agentTurnStartMs);
5673
+ this.agentTurnStartMs = null;
5674
+ await this.deps.speechEvents.fireAgentSpeechEnded({
5675
+ speechDurationMs: duration,
5676
+ interrupted
5677
+ });
5678
+ }
5679
+ /** Fire the per-turn LLM TTFT marker. Idempotent in the dispatcher
5680
+ * — guarded by `firstTokenForTurn` on the SpeechEvents instance. */
5681
+ async emitLlmFirstToken() {
5682
+ if (!this.deps.speechEvents) return;
5683
+ await this.deps.speechEvents.fireLlmFirstToken({
5684
+ llmProvider: this.llmProviderTag,
5685
+ model: this.deps.agent.model ?? ""
5686
+ });
5687
+ }
5688
+ /** Fire the per-turn first-TTS-audio marker. Idempotent in the
5689
+ * dispatcher — guarded by `firstAudioForTurn`. The provider tag falls
5690
+ * back to the engine name for Realtime / ConvAI (no separate TTS). */
5691
+ async emitAudioOut() {
5692
+ if (!this.deps.speechEvents) return;
5693
+ const ttsKey = this.deps.agent.tts?.constructor?.providerKey;
5694
+ const provider2 = ttsKey ?? this.deps.agent.provider ?? "openai_realtime";
5695
+ await this.deps.speechEvents.fireAudioOut({ ttsProvider: provider2 });
5696
+ }
4930
5697
  async onAdapterAudio(eventData) {
4931
5698
  if (!this.responseAudioStarted) {
4932
5699
  this.responseAudioStarted = true;
4933
5700
  if (this.metricsAcc.turnActive === false) this.metricsAcc.startTurn();
4934
5701
  this.metricsAcc.recordTtsFirstByte();
5702
+ await this.emitAgentSpeechStarted();
5703
+ await this.emitAudioOut();
4935
5704
  }
4936
5705
  const outAudio = eventData;
4937
5706
  this.deps.bridge.sendAudio(this.ws, outAudio.toString("base64"), this.streamSid);
4938
5707
  this.chunkCount++;
4939
5708
  this.deps.bridge.sendMark(this.ws, `audio_${this.chunkCount}`, this.streamSid);
4940
5709
  }
4941
- onAdapterSpeechStopped() {
5710
+ async onAdapterSpeechStopped() {
4942
5711
  if (!this.metricsAcc.turnActive) this.metricsAcc.startTurn();
4943
5712
  this.currentAgentText = "";
4944
5713
  this.responseAudioStarted = false;
5714
+ this.userTranscriptPending = true;
5715
+ await this.emitUserSpeechEnded();
4945
5716
  }
4946
5717
  async onAdapterTranscriptInput(inputText) {
4947
5718
  getLogger().debug(`User (${this.deps.bridge.label}): ${sanitizeLogValue(inputText)}`);
@@ -4951,6 +5722,7 @@ var StreamHandler = class {
4951
5722
  this.currentAgentText = "";
4952
5723
  this.responseAudioStarted = false;
4953
5724
  }
5725
+ await this.emitUserSpeechEos(inputText);
4954
5726
  this.metricsAcc.recordSttComplete(inputText);
4955
5727
  if (this.deps.onTranscript) {
4956
5728
  await this.deps.onTranscript({
@@ -4960,9 +5732,97 @@ var StreamHandler = class {
4960
5732
  history: [...this.history.entries]
4961
5733
  });
4962
5734
  }
5735
+ this.userTranscriptPending = false;
5736
+ if (this.pendingAssistantTurn !== null) {
5737
+ const buffered = this.pendingAssistantTurn;
5738
+ this.pendingAssistantTurn = null;
5739
+ if (this.pendingAssistantTimer) {
5740
+ clearTimeout(this.pendingAssistantTimer);
5741
+ this.pendingAssistantTimer = null;
5742
+ }
5743
+ await this.flushAssistantTurn(buffered);
5744
+ }
5745
+ }
5746
+ /**
5747
+ * Push an assistant turn into history, fire `onTranscript`, and emit
5748
+ * turn-complete metrics. Shared between the immediate path (no user
5749
+ * transcript pending) and the buffered path (flushed after user
5750
+ * transcript arrives or fallback timer fires).
5751
+ */
5752
+ async flushAssistantTurn(text) {
5753
+ this.history.push({ role: "assistant", text, timestamp: Date.now() });
5754
+ if (this.deps.onTranscript) {
5755
+ await this.deps.onTranscript({
5756
+ role: "assistant",
5757
+ text,
5758
+ call_id: this.callId,
5759
+ history: [...this.history.entries]
5760
+ });
5761
+ }
5762
+ this.responseAudioStarted = false;
5763
+ await this.emitTurnMetrics(this.metricsAcc.recordTurnComplete(text));
5764
+ }
5765
+ /**
5766
+ * Push an assistant turn into history and fire `onTranscript` so host
5767
+ * applications observe pipeline-mode replies the same way they observe
5768
+ * realtime-mode replies. Mirrors `_emit_assistant_transcript` in the
5769
+ * Python SDK and parallels `flushAssistantTurn` (realtime path).
5770
+ * Caller is responsible for filtering empty strings.
5771
+ */
5772
+ async emitAssistantTranscript(text) {
5773
+ this.history.push({ role: "assistant", text, timestamp: Date.now() });
5774
+ if (this.deps.onTranscript) {
5775
+ await this.deps.onTranscript({
5776
+ role: "assistant",
5777
+ text,
5778
+ call_id: this.callId,
5779
+ history: [...this.history.entries]
5780
+ });
5781
+ }
5782
+ }
5783
+ /**
5784
+ * Surface a tool invocation from pipeline mode into the transcript
5785
+ * timeline. Emits TWO events: one for the call (`name(argsJson)`) and
5786
+ * one for the result (`name(...) → result`, truncated to 200 chars).
5787
+ * Mirrors realtime mode's two `emitToolEvent` calls in
5788
+ * `handleFunctionCall`. Wired as the `LLMLoop` `onToolCall` observer.
5789
+ */
5790
+ async recordToolCall(name, args, result) {
5791
+ let argsText;
5792
+ try {
5793
+ argsText = JSON.stringify(args ?? {});
5794
+ } catch {
5795
+ argsText = "{}";
5796
+ }
5797
+ const callText = `${name}(${argsText})`;
5798
+ this.history.push({ role: "tool", text: callText, timestamp: Date.now() });
5799
+ if (this.deps.onTranscript) {
5800
+ await this.deps.onTranscript({
5801
+ role: "tool",
5802
+ text: callText,
5803
+ call_id: this.callId,
5804
+ tool_name: name,
5805
+ tool_args: args ?? {},
5806
+ tool_result: null
5807
+ });
5808
+ }
5809
+ const displayed = result.length > 200 ? result.slice(0, 200) + "\u2026" : result;
5810
+ const resText = `${name}(...) \u2192 ${displayed}`;
5811
+ this.history.push({ role: "tool", text: resText, timestamp: Date.now() });
5812
+ if (this.deps.onTranscript) {
5813
+ await this.deps.onTranscript({
5814
+ role: "tool",
5815
+ text: resText,
5816
+ call_id: this.callId,
5817
+ tool_name: name,
5818
+ tool_args: args ?? {},
5819
+ tool_result: result
5820
+ });
5821
+ }
4963
5822
  }
4964
5823
  async onAdapterTranscriptOutput(outputText) {
4965
5824
  if (!outputText) return;
5825
+ await this.emitLlmFirstToken();
4966
5826
  const triggered = checkGuardrails(outputText, this.deps.agent.guardrails);
4967
5827
  if (triggered) {
4968
5828
  getLogger().debug(`Guardrail '${triggered.name}' triggered`);
@@ -4976,24 +5836,75 @@ var StreamHandler = class {
4976
5836
  async onAdapterResponseDone(responseData) {
4977
5837
  if (responseData) {
4978
5838
  const usage = responseData.usage;
4979
- if (usage) this.metricsAcc.recordRealtimeUsage(usage);
5839
+ if (usage) {
5840
+ const turnModel = typeof responseData.model === "string" ? responseData.model : null;
5841
+ this.metricsAcc.recordRealtimeUsage(usage, turnModel);
5842
+ }
4980
5843
  }
4981
- if (this.currentAgentText) {
4982
- this.history.push({ role: "assistant", text: this.currentAgentText, timestamp: Date.now() });
4983
- this.responseAudioStarted = false;
4984
- await this.emitTurnMetrics(this.metricsAcc.recordTurnComplete(this.currentAgentText));
4985
- this.currentAgentText = "";
4986
- } else {
5844
+ if (!this.currentAgentText) {
4987
5845
  this.metricsAcc.recordTurnInterrupted();
4988
5846
  this.responseAudioStarted = false;
5847
+ await this.emitAgentSpeechEnded(true);
5848
+ return;
4989
5849
  }
5850
+ await this.emitAgentSpeechEnded(false);
5851
+ const text = this.currentAgentText;
5852
+ this.currentAgentText = "";
5853
+ if (this.userTranscriptPending) {
5854
+ this.pendingAssistantTurn = text;
5855
+ if (this.pendingAssistantTimer) clearTimeout(this.pendingAssistantTimer);
5856
+ this.pendingAssistantTimer = setTimeout(() => {
5857
+ const buffered = this.pendingAssistantTurn;
5858
+ this.pendingAssistantTurn = null;
5859
+ this.pendingAssistantTimer = null;
5860
+ this.userTranscriptPending = false;
5861
+ if (buffered !== null) {
5862
+ void this.flushAssistantTurn(buffered);
5863
+ }
5864
+ }, _StreamHandler.REALTIME_USER_TRANSCRIPT_WAIT_MS);
5865
+ this.responseAudioStarted = false;
5866
+ return;
5867
+ }
5868
+ await this.flushAssistantTurn(text);
4990
5869
  }
4991
- onAdapterSpeechInterrupt() {
5870
+ async onAdapterSpeechInterrupt() {
4992
5871
  this.deps.bridge.sendClear(this.ws, this.streamSid);
4993
5872
  if (this.adapter instanceof OpenAIRealtimeAdapter) this.adapter.cancelResponse();
4994
5873
  this.metricsAcc.recordTurnInterrupted();
5874
+ if (this.responseAudioStarted) {
5875
+ await this.emitAgentSpeechEnded(true);
5876
+ }
5877
+ await this.emitUserSpeechStarted();
4995
5878
  this.currentAgentText = "";
4996
5879
  this.responseAudioStarted = false;
5880
+ this.pendingAssistantTurn = null;
5881
+ if (this.pendingAssistantTimer) {
5882
+ clearTimeout(this.pendingAssistantTimer);
5883
+ this.pendingAssistantTimer = null;
5884
+ }
5885
+ this.userTranscriptPending = false;
5886
+ }
5887
+ /**
5888
+ * Emit a tool-invocation event into the transcript timeline. Pushes a
5889
+ * `role=tool` entry into `history` (so it appears in the dashboard
5890
+ * transcript next to user/assistant turns) AND fires `onTranscript` so
5891
+ * the host application can log / persist / render it. `result` is
5892
+ * truncated for log readability — the full payload is in history.
5893
+ */
5894
+ async emitToolEvent(name, args, result) {
5895
+ const argsText = JSON.stringify(args);
5896
+ const text = result === null ? `${name}(${argsText})` : `${name}(${argsText}) \u2192 ${result.length > 200 ? result.slice(0, 200) + "\u2026" : result}`;
5897
+ this.history.push({ role: "tool", text, timestamp: Date.now() });
5898
+ if (this.deps.onTranscript) {
5899
+ await this.deps.onTranscript({
5900
+ role: "tool",
5901
+ text,
5902
+ call_id: this.callId,
5903
+ tool_name: name,
5904
+ tool_args: args,
5905
+ tool_result: result
5906
+ });
5907
+ }
4997
5908
  }
4998
5909
  async handleFunctionCall(fc) {
4999
5910
  const adapter = this.adapter;
@@ -5007,11 +5918,15 @@ var StreamHandler = class {
5007
5918
  const transferTo = transferArgs.number ?? "";
5008
5919
  if (!isValidE164(transferTo)) {
5009
5920
  getLogger().warn(`transfer_call rejected (${this.deps.bridge.label}): invalid number ${JSON.stringify(transferTo)}`);
5010
- await adapter.sendFunctionResult(fc.call_id, JSON.stringify({ error: "Invalid phone number format", status: "rejected" }));
5921
+ const rejection = JSON.stringify({ error: "Invalid phone number format", status: "rejected" });
5922
+ await adapter.sendFunctionResult(fc.call_id, rejection);
5923
+ await this.emitToolEvent("transfer_call", transferArgs, rejection);
5011
5924
  return;
5012
5925
  }
5013
5926
  getLogger().debug(`Transferring call to ${transferTo}`);
5014
- await adapter.sendFunctionResult(fc.call_id, JSON.stringify({ status: "transferring", to: transferTo }));
5927
+ const result2 = JSON.stringify({ status: "transferring", to: transferTo });
5928
+ await adapter.sendFunctionResult(fc.call_id, result2);
5929
+ await this.emitToolEvent("transfer_call", transferArgs, result2);
5015
5930
  await this.deps.bridge.transferCall(this.callId, transferTo);
5016
5931
  if (this.deps.onTranscript) {
5017
5932
  await this.deps.onTranscript({ role: "system", text: `Call transferred to ${transferTo}`, call_id: this.callId });
@@ -5027,7 +5942,9 @@ var StreamHandler = class {
5027
5942
  }
5028
5943
  const reason = endArgs.reason ?? "conversation_complete";
5029
5944
  getLogger().debug(`Ending call (${this.deps.bridge.label}): ${reason}`);
5030
- await adapter.sendFunctionResult(fc.call_id, JSON.stringify({ status: "ending", reason }));
5945
+ const result2 = JSON.stringify({ status: "ending", reason });
5946
+ await adapter.sendFunctionResult(fc.call_id, result2);
5947
+ await this.emitToolEvent("end_call", endArgs, result2);
5031
5948
  await this.deps.bridge.endCall(this.callId, this.ws);
5032
5949
  if (this.deps.onTranscript) {
5033
5950
  await this.deps.onTranscript({ role: "system", text: `Call ended: ${reason}`, call_id: this.callId });
@@ -5035,22 +5952,57 @@ var StreamHandler = class {
5035
5952
  return;
5036
5953
  }
5037
5954
  const toolDef = this.deps.agent.tools?.find((t) => t.name === fc.name);
5038
- if (toolDef?.webhookUrl) {
5039
- let parsedArgs;
5955
+ if (!toolDef) {
5956
+ getLogger().warn(`Realtime tool '${fc.name}' not found in agent.tools \u2014 skipping`);
5957
+ const result2 = JSON.stringify({ error: `Tool '${fc.name}' not registered`, fallback: true });
5958
+ await adapter.sendFunctionResult(fc.call_id, result2);
5959
+ await this.emitToolEvent(fc.name, {}, result2);
5960
+ return;
5961
+ }
5962
+ let parsedArgs;
5963
+ try {
5964
+ parsedArgs = JSON.parse(fc.arguments || "{}");
5965
+ } catch {
5966
+ parsedArgs = {};
5967
+ }
5968
+ await this.emitToolEvent(fc.name, parsedArgs, null);
5969
+ const reassurance = toolDef.reassurance;
5970
+ let reassuranceTimer = null;
5971
+ if (reassurance) {
5972
+ const msg = typeof reassurance === "string" ? reassurance : reassurance.message;
5973
+ const afterMs = typeof reassurance === "string" ? 1500 : reassurance.afterMs ?? 1500;
5974
+ if (msg && this.adapter instanceof OpenAIRealtimeAdapter) {
5975
+ const realtimeAdapter = this.adapter;
5976
+ reassuranceTimer = setTimeout(() => {
5977
+ realtimeAdapter.sendText(msg).catch((e) => {
5978
+ getLogger().warn(`Reassurance message failed for tool '${fc.name}': ${String(e)}`);
5979
+ });
5980
+ }, afterMs);
5981
+ }
5982
+ }
5983
+ const onProgress = this.adapter instanceof OpenAIRealtimeAdapter ? async (text) => {
5040
5984
  try {
5041
- parsedArgs = JSON.parse(fc.arguments || "{}");
5042
- } catch {
5043
- parsedArgs = {};
5985
+ await this.adapter.sendText(text);
5986
+ } catch (e) {
5987
+ getLogger().warn(`Tool progress message failed for '${fc.name}': ${String(e)}`);
5044
5988
  }
5045
- const result = await executeToolWebhook(
5046
- toolDef.webhookUrl,
5047
- fc.name,
5989
+ } : void 0;
5990
+ let result;
5991
+ try {
5992
+ result = await this.toolExecutor.execute(
5993
+ toolDef,
5048
5994
  parsedArgs,
5049
- { callId: this.callId, caller: this.caller },
5050
- this.deps.bridge.label === "Twilio" ? "" : this.deps.bridge.label
5995
+ {
5996
+ call_id: this.callId,
5997
+ caller: this.caller
5998
+ },
5999
+ onProgress
5051
6000
  );
5052
- await adapter.sendFunctionResult(fc.call_id, result);
6001
+ } finally {
6002
+ if (reassuranceTimer) clearTimeout(reassuranceTimer);
5053
6003
  }
6004
+ await adapter.sendFunctionResult(fc.call_id, result);
6005
+ await this.emitToolEvent(fc.name, parsedArgs, result);
5054
6006
  }
5055
6007
  // ---------------------------------------------------------------------------
5056
6008
  // Private: call end / metrics finalization
@@ -5062,6 +6014,25 @@ var StreamHandler = class {
5062
6014
  clearTimeout(this.maxDurationTimer);
5063
6015
  this.maxDurationTimer = null;
5064
6016
  }
6017
+ if (this.pendingAssistantTimer) {
6018
+ clearTimeout(this.pendingAssistantTimer);
6019
+ this.pendingAssistantTimer = null;
6020
+ }
6021
+ if (this.pendingAssistantTurn !== null) {
6022
+ const buffered = this.pendingAssistantTurn;
6023
+ this.pendingAssistantTurn = null;
6024
+ try {
6025
+ await this.flushAssistantTurn(buffered);
6026
+ } catch {
6027
+ }
6028
+ }
6029
+ if (this.mcpManager) {
6030
+ try {
6031
+ await this.mcpManager.close();
6032
+ } catch {
6033
+ }
6034
+ this.mcpManager = null;
6035
+ }
5065
6036
  await this.deps.bridge.queryTelephonyCost(this.metricsAcc, this.callId);
5066
6037
  if (this.stt instanceof DeepgramSTT && this.stt.requestId) {
5067
6038
  const dgKey = this.stt.apiKey;
@@ -5088,7 +6059,7 @@ var StreamHandler = class {
5088
6059
  finalMetrics
5089
6060
  );
5090
6061
  try {
5091
- const { notifyDashboard } = await import("./persistence-LQBYQPQQ.mjs");
6062
+ const { notifyDashboard } = await import("./persistence-LVIAHESK.mjs");
5092
6063
  notifyDashboard(callEndData);
5093
6064
  } catch {
5094
6065
  }
@@ -5129,6 +6100,7 @@ async function queryDeepgramCost(metricsAcc, deepgramKey, deepgramRequestId) {
5129
6100
  }
5130
6101
 
5131
6102
  // src/services/call-log.ts
6103
+ init_esm_shims();
5132
6104
  import * as crypto3 from "crypto";
5133
6105
  import * as fs2 from "fs";
5134
6106
  import { promises as fsp } from "fs";
@@ -5226,6 +6198,7 @@ var CallLogger = class {
5226
6198
  this.root = null;
5227
6199
  }
5228
6200
  }
6201
+ /** True when a log root was configured and is writable. */
5229
6202
  get enabled() {
5230
6203
  return this.root !== null;
5231
6204
  }
@@ -5239,6 +6212,7 @@ var CallLogger = class {
5239
6212
  const safeId = sanitizeLogValue(callId, 64).replace(/\//g, "_") || "unknown";
5240
6213
  return path2.join(this.root, "calls", year, month, day, safeId);
5241
6214
  }
6215
+ /** Write the initial `metadata.json` for a new call. */
5242
6216
  async logCallStart(callId, input = {}) {
5243
6217
  if (!this.enabled) return;
5244
6218
  const startedAt = Date.now() / 1e3;
@@ -5271,6 +6245,7 @@ var CallLogger = class {
5271
6245
  this.sweepOldDays();
5272
6246
  }
5273
6247
  }
6248
+ /** Append a single turn record to the call's `transcript.jsonl`. */
5274
6249
  async logTurn(callId, turn) {
5275
6250
  if (!this.enabled) return;
5276
6251
  const dir = this.callDir(callId);
@@ -5288,6 +6263,7 @@ var CallLogger = class {
5288
6263
  );
5289
6264
  }
5290
6265
  }
6266
+ /** Append an operational event (tool_call, barge_in, error, …) to `events.jsonl`. */
5291
6267
  async logEvent(callId, eventType, payload = {}) {
5292
6268
  if (!this.enabled) return;
5293
6269
  const dir = this.callDir(callId);
@@ -5306,6 +6282,7 @@ var CallLogger = class {
5306
6282
  );
5307
6283
  }
5308
6284
  }
6285
+ /** Merge end-of-call fields into the existing `metadata.json`. */
5309
6286
  async logCallEnd(callId, input = {}) {
5310
6287
  if (!this.enabled) return;
5311
6288
  const dir = this.callDir(callId);
@@ -5432,6 +6409,18 @@ var END_CALL_TOOL = {
5432
6409
  function xmlEscape(s) {
5433
6410
  return s.replace(/&/g, "&amp;").replace(/</g, "&lt;").replace(/>/g, "&gt;").replace(/"/g, "&quot;").replace(/'/g, "&apos;");
5434
6411
  }
6412
+ function classifyTwilioAmd(answeredBy) {
6413
+ if (answeredBy === "human") return "human";
6414
+ if (answeredBy.startsWith("machine_")) return "machine";
6415
+ if (answeredBy === "fax") return "fax";
6416
+ return "unknown";
6417
+ }
6418
+ function classifyTelnyxAmd(result) {
6419
+ if (result === "human") return "human";
6420
+ if (result === "machine" || result === "machine_detected") return "machine";
6421
+ if (result === "fax") return "fax";
6422
+ return "unknown";
6423
+ }
5435
6424
  function validateWebhookUrl(url) {
5436
6425
  const parsed = new URL(url);
5437
6426
  if (!["http:", "https:"].includes(parsed.protocol)) {
@@ -5561,22 +6550,35 @@ function buildAIAdapter(config, agent, resolvedPrompt) {
5561
6550
  const agentTools = agent.tools?.map((t) => ({
5562
6551
  name: t.name,
5563
6552
  description: t.description,
5564
- parameters: t.parameters
6553
+ parameters: t.parameters,
6554
+ strict: t.strict
5565
6555
  })) ?? [];
5566
6556
  const tools = [...agentTools, TRANSFER_CALL_TOOL, END_CALL_TOOL];
5567
6557
  const openaiKey = engine && engine.kind === "openai_realtime" ? engine.apiKey : config.openaiKey ?? "";
6558
+ const adapterOptions = {};
6559
+ if (engine && engine.kind === "openai_realtime") {
6560
+ if (engine.reasoningEffort !== void 0) {
6561
+ adapterOptions.reasoningEffort = engine.reasoningEffort;
6562
+ }
6563
+ if (engine.inputAudioTranscriptionModel !== void 0) {
6564
+ adapterOptions.inputAudioTranscriptionModel = engine.inputAudioTranscriptionModel;
6565
+ }
6566
+ }
5568
6567
  return new OpenAIRealtimeAdapter(
5569
6568
  openaiKey,
5570
6569
  agent.model,
5571
6570
  agent.voice,
5572
6571
  resolvedPrompt ?? agent.systemPrompt,
5573
- tools
6572
+ tools,
6573
+ void 0,
6574
+ adapterOptions
5574
6575
  );
5575
6576
  }
5576
6577
  var TwilioBridge = class {
5577
6578
  constructor(config) {
5578
6579
  this.config = config;
5579
6580
  }
6581
+ config;
5580
6582
  label = "Twilio";
5581
6583
  telephonyProvider = "twilio";
5582
6584
  sendAudio(ws, audioBase64, streamSid) {
@@ -5649,7 +6651,10 @@ var TwilioBridge = class {
5649
6651
  getLogger().info(`Twilio actual cost: $${Math.abs(parseFloat(data.price))}`);
5650
6652
  }
5651
6653
  }
5652
- } catch {
6654
+ } catch (err) {
6655
+ getLogger().debug(
6656
+ `queryTelephonyCost(twilio) failed: ${err?.message ?? err}`
6657
+ );
5653
6658
  }
5654
6659
  }
5655
6660
  }
@@ -5669,6 +6674,7 @@ var TelnyxBridge = class {
5669
6674
  constructor(config) {
5670
6675
  this.config = config;
5671
6676
  }
6677
+ config;
5672
6678
  label = "Telnyx";
5673
6679
  telephonyProvider = "telnyx";
5674
6680
  sendAudio(ws, audioBase64, _streamSid) {
@@ -5790,7 +6796,10 @@ var TelnyxBridge = class {
5790
6796
  getLogger().info(`Telnyx actual cost: $${Math.abs(parseFloat(amount))}`);
5791
6797
  }
5792
6798
  }
5793
- } catch {
6799
+ } catch (err) {
6800
+ getLogger().debug(
6801
+ `queryTelephonyCost(telnyx) failed: ${err?.message ?? err}`
6802
+ );
5794
6803
  }
5795
6804
  }
5796
6805
  }
@@ -5811,7 +6820,8 @@ var EmbeddedServer = class {
5811
6820
  this.dashboardToken = dashboardToken;
5812
6821
  this.metricsStore = new MetricsStore();
5813
6822
  this.pricing = mergePricing(pricingOverrides);
5814
- const logRoot = resolveLogRoot();
6823
+ const logRoot = config.persistRoot === void 0 ? resolveLogRoot() : config.persistRoot;
6824
+ this.callLogger = new CallLogger(logRoot);
5815
6825
  if (logRoot) {
5816
6826
  try {
5817
6827
  const restored = this.metricsStore.hydrate(logRoot);
@@ -5823,6 +6833,17 @@ var EmbeddedServer = class {
5823
6833
  }
5824
6834
  }
5825
6835
  }
6836
+ config;
6837
+ agent;
6838
+ onCallStart;
6839
+ onCallEnd;
6840
+ onTranscript;
6841
+ onMessage;
6842
+ recording;
6843
+ voicemailMessage;
6844
+ onMetrics;
6845
+ dashboard;
6846
+ dashboardToken;
5826
6847
  server = null;
5827
6848
  wss = null;
5828
6849
  twilioTokenWarningLogged = false;
@@ -5830,11 +6851,25 @@ var EmbeddedServer = class {
5830
6851
  metricsStore;
5831
6852
  pricing;
5832
6853
  remoteHandler = new RemoteMessageHandler();
5833
- /** Opt-in per-call filesystem logger (set via PATTER_LOG_DIR). */
5834
- callLogger = new CallLogger(resolveLogRoot());
6854
+ /**
6855
+ * Opt-in per-call filesystem logger. Path is resolved by ``client.ts``
6856
+ * from the public ``LocalOptions.persist`` option (with the legacy
6857
+ * ``PATTER_LOG_DIR`` env var as fallback). Initialised in the ctor
6858
+ * because ``resolveLogRoot`` cannot see ``this.config`` from a field
6859
+ * default expression.
6860
+ */
6861
+ callLogger;
5835
6862
  /** Active WebSocket connections tracked for graceful shutdown. */
5836
6863
  activeConnections = /* @__PURE__ */ new Set();
5837
6864
  activeCallIds = /* @__PURE__ */ new Map();
6865
+ /**
6866
+ * Per-call AMD result callback set by ``Patter.call()`` for the most
6867
+ * recent outbound call. Public so ``client.ts`` can populate it after
6868
+ * server start. Cleared after firing once per call to avoid leaking
6869
+ * across calls.
6870
+ */
6871
+ onMachineDetection;
6872
+ /** Bind HTTP + WebSocket listeners on `port`, mount carrier webhooks and dashboard routes. */
5838
6873
  async start(port = 8e3) {
5839
6874
  const webhookUrlPattern = /^[a-zA-Z0-9][a-zA-Z0-9.\-]+[a-zA-Z0-9]$/;
5840
6875
  if (!webhookUrlPattern.test(this.config.webhookUrl)) {
@@ -5950,6 +6985,20 @@ var EmbeddedServer = class {
5950
6985
  const answeredBy = body["AnsweredBy"] ?? "";
5951
6986
  const callSid = body["CallSid"] ?? "";
5952
6987
  getLogger().info(`AMD result for ${sanitizeLogValue(callSid)}: ${sanitizeLogValue(answeredBy)}`);
6988
+ const cb = this.onMachineDetection;
6989
+ if (cb && callSid) {
6990
+ try {
6991
+ await cb({
6992
+ call_id: callSid,
6993
+ carrier: "twilio",
6994
+ classification: classifyTwilioAmd(answeredBy),
6995
+ raw: answeredBy,
6996
+ detected_at: Date.now() / 1e3
6997
+ });
6998
+ } catch (err) {
6999
+ getLogger().warn(`onMachineDetection callback threw: ${sanitizeLogValue(String(err))}`);
7000
+ }
7001
+ }
5953
7002
  if ((answeredBy === "machine_end_beep" || answeredBy === "machine_end_silence") && this.voicemailMessage && this.config.twilioSid && this.config.twilioToken) {
5954
7003
  if (!validateTwilioSid(callSid)) {
5955
7004
  getLogger().warn(`AMD webhook rejected: invalid CallSid ${JSON.stringify(sanitizeLogValue(callSid))}`);
@@ -5965,7 +7014,8 @@ var EmbeddedServer = class {
5965
7014
  "Content-Type": "application/x-www-form-urlencoded",
5966
7015
  "Authorization": `Basic ${Buffer.from(`${this.config.twilioSid}:${this.config.twilioToken}`).toString("base64")}`
5967
7016
  },
5968
- body: new URLSearchParams({ Twiml: twiml }).toString()
7017
+ body: new URLSearchParams({ Twiml: twiml }).toString(),
7018
+ signal: AbortSignal.timeout(1e4)
5969
7019
  });
5970
7020
  if (vmResp.ok) {
5971
7021
  getLogger().info(`Voicemail dropped for ${sanitizeLogValue(callSid)}`);
@@ -6053,6 +7103,20 @@ var EmbeddedServer = class {
6053
7103
  getLogger().info(
6054
7104
  `Telnyx AMD result for ${sanitizeLogValue(amdCallId)}: ${sanitizeLogValue(amdResult)}`
6055
7105
  );
7106
+ const cbTx = this.onMachineDetection;
7107
+ if (cbTx && amdCallId) {
7108
+ try {
7109
+ await cbTx({
7110
+ call_id: amdCallId,
7111
+ carrier: "telnyx",
7112
+ classification: classifyTelnyxAmd(amdResult),
7113
+ raw: amdResult,
7114
+ detected_at: Date.now() / 1e3
7115
+ });
7116
+ } catch (err) {
7117
+ getLogger().warn(`onMachineDetection callback threw: ${sanitizeLogValue(String(err))}`);
7118
+ }
7119
+ }
6056
7120
  if (amdCallId && (amdResult === "machine" || amdResult === "machine_detected")) {
6057
7121
  await this.handleTelnyxAmdVoicemail(amdCallId);
6058
7122
  }
@@ -6147,7 +7211,8 @@ var EmbeddedServer = class {
6147
7211
  }
6148
7212
  });
6149
7213
  await new Promise((resolve) => {
6150
- this.server.listen(port, "127.0.0.1", () => {
7214
+ const bindHost = process.env.PATTER_BIND_HOST ?? "127.0.0.1";
7215
+ this.server.listen(port, bindHost, () => {
6151
7216
  getLogger().info(`Server on port ${port}`);
6152
7217
  getLogger().info(`Webhook: https://${this.config.webhookUrl}`);
6153
7218
  getLogger().info(`Phone: ${this.config.phoneNumber}`);
@@ -6477,30 +7542,168 @@ var EmbeddedServer = class {
6477
7542
  }
6478
7543
  };
6479
7544
 
7545
+ // src/tools/circuit-breaker.ts
7546
+ init_esm_shims();
7547
+ var CircuitBreakerState = {
7548
+ CLOSED: "closed",
7549
+ OPEN: "open",
7550
+ HALF_OPEN: "half_open"
7551
+ };
7552
+ var DEFAULT_FAILURE_THRESHOLD = 5;
7553
+ var DEFAULT_COOLDOWN_MS = 3e4;
7554
+ var CircuitBreakerRegistry = class {
7555
+ threshold;
7556
+ cooldownMs;
7557
+ state = /* @__PURE__ */ new Map();
7558
+ /** Inject for deterministic tests; defaults to ``Date.now()``. */
7559
+ clock;
7560
+ constructor(opts = {}, clock = Date.now) {
7561
+ this.threshold = opts.failureThreshold ?? DEFAULT_FAILURE_THRESHOLD;
7562
+ this.cooldownMs = opts.cooldownMs ?? DEFAULT_COOLDOWN_MS;
7563
+ this.clock = clock;
7564
+ }
7565
+ /** Returns ``true`` when this tool is currently allowed to run. */
7566
+ allow(toolName) {
7567
+ if (this.threshold <= 0) return true;
7568
+ const s = this.state.get(toolName);
7569
+ if (!s) return true;
7570
+ if (s.state === CircuitBreakerState.CLOSED) return true;
7571
+ if (s.state === CircuitBreakerState.OPEN) {
7572
+ if (this.clock() - s.openedAt >= this.cooldownMs) {
7573
+ s.state = CircuitBreakerState.HALF_OPEN;
7574
+ return true;
7575
+ }
7576
+ return false;
7577
+ }
7578
+ return true;
7579
+ }
7580
+ /** Mark a successful execution. Resets the breaker to CLOSED. */
7581
+ recordSuccess(toolName) {
7582
+ const s = this.state.get(toolName);
7583
+ if (!s) return;
7584
+ s.state = CircuitBreakerState.CLOSED;
7585
+ s.consecutiveFailures = 0;
7586
+ s.openedAt = 0;
7587
+ }
7588
+ /** Mark a failed execution; trips OPEN once threshold is reached. */
7589
+ recordFailure(toolName) {
7590
+ if (this.threshold <= 0) return;
7591
+ let s = this.state.get(toolName);
7592
+ if (!s) {
7593
+ s = { state: CircuitBreakerState.CLOSED, consecutiveFailures: 0, openedAt: 0 };
7594
+ this.state.set(toolName, s);
7595
+ }
7596
+ s.consecutiveFailures += 1;
7597
+ if (s.consecutiveFailures >= this.threshold) {
7598
+ s.state = CircuitBreakerState.OPEN;
7599
+ s.openedAt = this.clock();
7600
+ }
7601
+ }
7602
+ /**
7603
+ * Time until the breaker transitions OPEN → HALF_OPEN, in ms. Returns
7604
+ * ``0`` when the breaker is currently allowing calls. Useful for
7605
+ * tests and the structured rejection JSON.
7606
+ */
7607
+ timeUntilHalfOpen(toolName) {
7608
+ const s = this.state.get(toolName);
7609
+ if (!s || s.state !== CircuitBreakerState.OPEN) return 0;
7610
+ const elapsed = this.clock() - s.openedAt;
7611
+ return Math.max(0, this.cooldownMs - elapsed);
7612
+ }
7613
+ /** Snapshot for debugging / metrics. */
7614
+ snapshot(toolName) {
7615
+ const s = this.state.get(toolName);
7616
+ return s ? { ...s } : null;
7617
+ }
7618
+ };
7619
+
6480
7620
  // src/llm-loop.ts
6481
7621
  var DEFAULT_TOOL_MAX_RETRIES = 2;
6482
7622
  var DEFAULT_TOOL_RETRY_DELAY_MS = 500;
6483
7623
  var DEFAULT_TOOL_TIMEOUT_MS = 1e4;
6484
7624
  var TOOL_MAX_RESPONSE_BYTES = 1 * 1024 * 1024;
7625
+ async function invokeHandler(handler, args, callContext, onProgress) {
7626
+ const invoked = handler(args, callContext);
7627
+ if (invoked && typeof invoked === "object" && typeof invoked[Symbol.asyncIterator] === "function" && typeof invoked.next === "function") {
7628
+ let lastResult = "";
7629
+ while (true) {
7630
+ const step = await invoked.next();
7631
+ if (step.done) {
7632
+ const ret = typeof step.value === "string" ? step.value : "";
7633
+ return ret || lastResult || "{}";
7634
+ }
7635
+ const yielded = step.value;
7636
+ if (yielded && typeof yielded === "object") {
7637
+ if (typeof yielded.progress === "string") {
7638
+ if (onProgress) await onProgress(yielded.progress);
7639
+ continue;
7640
+ }
7641
+ if (typeof yielded.result === "string") {
7642
+ lastResult = yielded.result;
7643
+ continue;
7644
+ }
7645
+ }
7646
+ if (onProgress && yielded != null) {
7647
+ const text = typeof yielded === "string" ? yielded : JSON.stringify(yielded);
7648
+ await onProgress(text);
7649
+ }
7650
+ }
7651
+ }
7652
+ return await invoked;
7653
+ }
7654
+ function backoffDelayMs(baseMs, attempt) {
7655
+ const cap = 5e3;
7656
+ const exp = Math.min(cap, baseMs * Math.pow(2, attempt));
7657
+ return Math.round(exp + Math.random() * 60);
7658
+ }
6485
7659
  var DefaultToolExecutor = class {
6486
7660
  maxRetries;
6487
7661
  retryDelayMs;
6488
7662
  requestTimeoutMs;
7663
+ breaker;
6489
7664
  constructor(opts = {}) {
6490
7665
  this.maxRetries = opts.maxRetries ?? DEFAULT_TOOL_MAX_RETRIES;
6491
7666
  this.retryDelayMs = opts.retryDelayMs ?? DEFAULT_TOOL_RETRY_DELAY_MS;
6492
7667
  this.requestTimeoutMs = opts.requestTimeoutMs ?? DEFAULT_TOOL_TIMEOUT_MS;
6493
- }
6494
- async execute(toolDef, args, callContext) {
7668
+ this.breaker = new CircuitBreakerRegistry(opts.circuitBreaker ?? {});
7669
+ }
7670
+ /** Expose the breaker for tests + dashboard observability. */
7671
+ get circuitBreaker() {
7672
+ return this.breaker;
7673
+ }
7674
+ async execute(toolDef, args, callContext, onProgress) {
7675
+ if (!this.breaker.allow(toolDef.name)) {
7676
+ const cooldown = this.breaker.timeUntilHalfOpen(toolDef.name);
7677
+ return JSON.stringify({
7678
+ error: `Tool '${toolDef.name}' is temporarily unavailable (circuit open).`,
7679
+ fallback: true,
7680
+ circuit_state: "open",
7681
+ retry_after_ms: cooldown
7682
+ });
7683
+ }
6495
7684
  if (toolDef.handler) {
6496
- try {
6497
- return await toolDef.handler(args, callContext);
6498
- } catch (e) {
6499
- return JSON.stringify({
6500
- error: `Tool handler error: ${String(e)}`,
6501
- fallback: true
6502
- });
7685
+ const totalAttempts = this.maxRetries + 1;
7686
+ let lastErr = null;
7687
+ for (let attempt = 0; attempt < totalAttempts; attempt++) {
7688
+ try {
7689
+ const result = await invokeHandler(toolDef.handler, args, callContext, onProgress);
7690
+ this.breaker.recordSuccess(toolDef.name);
7691
+ return result;
7692
+ } catch (e) {
7693
+ lastErr = e;
7694
+ if (attempt < totalAttempts - 1) {
7695
+ getLogger().warn(
7696
+ `Tool handler '${toolDef.name}' failed (attempt ${attempt + 1}/${totalAttempts}), retrying: ${String(e)}`
7697
+ );
7698
+ await new Promise((r) => setTimeout(r, backoffDelayMs(this.retryDelayMs, attempt)));
7699
+ }
7700
+ }
6503
7701
  }
7702
+ this.breaker.recordFailure(toolDef.name);
7703
+ return JSON.stringify({
7704
+ error: `Tool handler error after ${totalAttempts} attempts: ${String(lastErr)}`,
7705
+ fallback: true
7706
+ });
6504
7707
  }
6505
7708
  if (toolDef.webhookUrl) {
6506
7709
  try {
@@ -6535,20 +7738,23 @@ var DefaultToolExecutor = class {
6535
7738
  if (!resp.ok) throw new Error(`HTTP ${resp.status}`);
6536
7739
  const result = JSON.stringify(await resp.json());
6537
7740
  if (result.length > TOOL_MAX_RESPONSE_BYTES) {
7741
+ this.breaker.recordFailure(toolDef.name);
6538
7742
  return JSON.stringify({
6539
7743
  error: `Webhook response too large: ${result.length} bytes (max ${TOOL_MAX_RESPONSE_BYTES})`,
6540
7744
  fallback: true
6541
7745
  });
6542
7746
  }
7747
+ this.breaker.recordSuccess(toolDef.name);
6543
7748
  return result;
6544
7749
  } catch (e) {
6545
7750
  if (attempt < totalAttempts - 1) {
6546
7751
  getLogger().warn(
6547
- `Tool webhook '${toolDef.name}' failed (attempt ${attempt + 1}), retrying: ${String(e)}`
7752
+ `Tool webhook '${toolDef.name}' failed (attempt ${attempt + 1}/${totalAttempts}), retrying: ${String(e)}`
6548
7753
  );
6549
- await new Promise((r) => setTimeout(r, this.retryDelayMs));
7754
+ await new Promise((r) => setTimeout(r, backoffDelayMs(this.retryDelayMs, attempt)));
6550
7755
  } else {
6551
7756
  span.recordException(e);
7757
+ this.breaker.recordFailure(toolDef.name);
6552
7758
  return JSON.stringify({
6553
7759
  error: `Tool failed after ${totalAttempts} attempts: ${String(e)}`,
6554
7760
  fallback: true
@@ -6569,6 +7775,30 @@ var DefaultToolExecutor = class {
6569
7775
  });
6570
7776
  }
6571
7777
  };
7778
+ function mergeAbortSignals(...signals) {
7779
+ const filtered = signals.filter(
7780
+ (s) => s != null
7781
+ );
7782
+ if (filtered.length === 1) return filtered[0];
7783
+ if (typeof AbortSignal.any === "function") {
7784
+ return AbortSignal.any(
7785
+ filtered
7786
+ );
7787
+ }
7788
+ const controller = new AbortController();
7789
+ for (const sig of filtered) {
7790
+ if (sig.aborted) {
7791
+ controller.abort(sig.reason);
7792
+ return controller.signal;
7793
+ }
7794
+ sig.addEventListener(
7795
+ "abort",
7796
+ () => controller.abort(sig.reason),
7797
+ { once: true }
7798
+ );
7799
+ }
7800
+ return controller.signal;
7801
+ }
6572
7802
  var OpenAILLMProvider = class {
6573
7803
  apiKey;
6574
7804
  model;
@@ -6596,7 +7826,8 @@ var OpenAILLMProvider = class {
6596
7826
  this.presencePenalty = sampling.presencePenalty;
6597
7827
  this.stop = sampling.stop;
6598
7828
  }
6599
- async *stream(messages, tools) {
7829
+ /** Stream OpenAI Chat Completions chunks for the given messages/tools. */
7830
+ async *stream(messages, tools, opts) {
6600
7831
  const body = {
6601
7832
  model: this.model,
6602
7833
  messages,
@@ -6620,6 +7851,7 @@ var OpenAILLMProvider = class {
6620
7851
  if (tools) {
6621
7852
  body.tools = tools;
6622
7853
  }
7854
+ const signal = mergeAbortSignals(opts?.signal, AbortSignal.timeout(3e4));
6623
7855
  const response = await fetch("https://api.openai.com/v1/chat/completions", {
6624
7856
  method: "POST",
6625
7857
  headers: {
@@ -6627,12 +7859,14 @@ var OpenAILLMProvider = class {
6627
7859
  "Authorization": `Bearer ${this.apiKey}`
6628
7860
  },
6629
7861
  body: JSON.stringify(body),
6630
- signal: AbortSignal.timeout(3e4)
7862
+ signal
6631
7863
  });
6632
7864
  if (!response.ok) {
6633
7865
  const errText = await response.text();
6634
7866
  getLogger().error(`LLM API error: ${response.status} ${errText}`);
6635
- return;
7867
+ throw new PatterConnectionError(
7868
+ `LLM API returned ${response.status}: ${errText.slice(0, 200)}`
7869
+ );
6636
7870
  }
6637
7871
  const reader = response.body?.getReader();
6638
7872
  if (!reader) return;
@@ -6685,6 +7919,7 @@ var OpenAILLMProvider = class {
6685
7919
  }
6686
7920
  }
6687
7921
  };
7922
+ var DEFAULT_PHONE_PREAMBLE = "You are speaking on a live phone call. Respond concisely. Do not use markdown, headers, bullet lists, code fences, or emojis. Spell out numbers, currencies, dates, and units in natural spoken language. Keep replies under 2 sentences unless the caller asks for detail.";
6688
7923
  var LLMLoop = class {
6689
7924
  provider;
6690
7925
  systemPrompt;
@@ -6696,9 +7931,20 @@ var LLMLoop = class {
6696
7931
  // Fix 10: track provider/model so usage chunks can be attributed for billing.
6697
7932
  _providerName;
6698
7933
  _modelName;
6699
- constructor(apiKey, model, systemPrompt, tools, llmProvider) {
7934
+ // Optional async observer fired after a successful tool execution so
7935
+ // the host SDK (StreamHandler in pipeline mode) can surface tool calls
7936
+ // into the transcript timeline / `onTranscript` callback. Mirrors the
7937
+ // Python `on_tool_call` parameter on `LLMLoop.__init__`.
7938
+ onToolCall;
7939
+ constructor(apiKey, model, systemPrompt, tools, llmProvider, disablePhonePreamble = false) {
6700
7940
  this.provider = llmProvider ?? new OpenAILLMProvider(apiKey, model);
6701
- this.systemPrompt = systemPrompt;
7941
+ if (disablePhonePreamble) {
7942
+ this.systemPrompt = systemPrompt;
7943
+ } else {
7944
+ this.systemPrompt = systemPrompt ? `${DEFAULT_PHONE_PREAMBLE}
7945
+
7946
+ ${systemPrompt}` : DEFAULT_PHONE_PREAMBLE;
7947
+ }
6702
7948
  if (llmProvider) {
6703
7949
  const key = llmProvider.constructor?.providerKey;
6704
7950
  if (key) {
@@ -6745,6 +7991,16 @@ var LLMLoop = class {
6745
7991
  setEventBus(bus) {
6746
7992
  this.eventBus = bus;
6747
7993
  }
7994
+ /**
7995
+ * Set or replace the post-tool-execution observer. The callback is
7996
+ * awaited after every successful tool execution with
7997
+ * `(name, args, result)`. Pass `undefined` to disable. Mirrors the
7998
+ * Python `LLMLoop.set_on_tool_call` setter so callers (e.g. the
7999
+ * pipeline `StreamHandler`) can wire the loop after construction.
8000
+ */
8001
+ setOnToolCall(callback) {
8002
+ this.onToolCall = callback;
8003
+ }
6748
8004
  /**
6749
8005
  * Stream LLM response tokens, handling tool calls automatically.
6750
8006
  * Yields text tokens as they arrive from the LLM.
@@ -6753,7 +8009,7 @@ var LLMLoop = class {
6753
8009
  * from the provider are forwarded to {@link LlmUsageRecorder.recordLlmUsage}
6754
8010
  * so token costs are included in the call cost breakdown (fix 10).
6755
8011
  */
6756
- async *run(userText, history, callContext, metrics, hookExecutor, hookCtx) {
8012
+ async *run(userText, history, callContext, metrics, hookExecutor, hookCtx, opts) {
6757
8013
  let messages = this.buildMessages(history, userText);
6758
8014
  const maxIterations = 10;
6759
8015
  if (hookExecutor && hookCtx) {
@@ -6762,20 +8018,22 @@ var LLMLoop = class {
6762
8018
  hookCtx
6763
8019
  );
6764
8020
  }
6765
- const hasAfterLlm = Boolean(hookExecutor?.hasAfterLlm() && hookCtx);
8021
+ const hasAfterLlmResponse = Boolean(hookExecutor?.hasAfterLlmResponse() && hookCtx);
8022
+ const hasAfterLlmChunk = Boolean(hookExecutor?.hasAfterLlmChunk());
6766
8023
  const allEmittedText = [];
6767
8024
  for (let iter = 0; iter < maxIterations; iter++) {
6768
8025
  const toolCallsAccumulated = /* @__PURE__ */ new Map();
6769
8026
  const textParts = [];
6770
8027
  let hasToolCalls = false;
6771
- for await (const chunk of this.provider.stream(messages, this.openaiTools)) {
8028
+ for await (const chunk of this.provider.stream(messages, this.openaiTools, opts)) {
6772
8029
  if (chunk.type === "text" && chunk.content) {
6773
- textParts.push(chunk.content);
6774
- this.eventBus?.emit("llm_chunk", { text: chunk.content, iteration: iter });
6775
- if (hasAfterLlm) {
6776
- allEmittedText.push(chunk.content);
8030
+ const content = hasAfterLlmChunk && hookExecutor ? hookExecutor.runAfterLlmChunk(chunk.content) : chunk.content;
8031
+ textParts.push(content);
8032
+ this.eventBus?.emit("llm_chunk", { text: content, iteration: iter });
8033
+ if (hasAfterLlmResponse) {
8034
+ allEmittedText.push(content);
6777
8035
  } else {
6778
- yield chunk.content;
8036
+ yield content;
6779
8037
  }
6780
8038
  } else if (chunk.type === "usage") {
6781
8039
  metrics?.recordLlmUsage(
@@ -6804,9 +8062,9 @@ var LLMLoop = class {
6804
8062
  }
6805
8063
  }
6806
8064
  if (!hasToolCalls) {
6807
- if (hasAfterLlm && hookExecutor && hookCtx) {
8065
+ if (hasAfterLlmResponse && hookExecutor && hookCtx) {
6808
8066
  const finalText = allEmittedText.join("");
6809
- const rewritten = await hookExecutor.runAfterLlm(finalText, hookCtx);
8067
+ const rewritten = await hookExecutor.runAfterLlmResponse(finalText, hookCtx);
6810
8068
  if (rewritten) yield rewritten;
6811
8069
  }
6812
8070
  return;
@@ -6840,6 +8098,15 @@ var LLMLoop = class {
6840
8098
  tool_call_id: tcData.id,
6841
8099
  content: result
6842
8100
  });
8101
+ if (this.onToolCall) {
8102
+ try {
8103
+ await this.onToolCall(toolName, args, result);
8104
+ } catch (err) {
8105
+ getLogger().error(
8106
+ `onToolCall observer failed for tool '${toolName}': ${String(err)}`
8107
+ );
8108
+ }
8109
+ }
6843
8110
  }
6844
8111
  }
6845
8112
  getLogger().warn(`LLM loop hit max iterations (${maxIterations})`);
@@ -6868,6 +8135,7 @@ var LLMLoop = class {
6868
8135
 
6869
8136
  // src/test-mode.ts
6870
8137
  var TestSession = class {
8138
+ /** Run a REPL-style session that loops user input through the agent's LLM/onMessage handler. */
6871
8139
  async run(opts) {
6872
8140
  const { agent, openaiKey, onMessage, onCallStart, onCallEnd } = opts;
6873
8141
  const callId = `test_${Date.now().toString(36)}${Math.random().toString(36).slice(2, 8)}`;
@@ -6918,7 +8186,9 @@ var TestSession = class {
6918
8186
  openaiKey,
6919
8187
  llmModel,
6920
8188
  resolvedPrompt,
6921
- agent.tools
8189
+ agent.tools,
8190
+ void 0,
8191
+ agent.disablePhonePreamble ?? false
6922
8192
  );
6923
8193
  }
6924
8194
  let ended = false;
@@ -7036,6 +8306,7 @@ var TestSession = class {
7036
8306
  };
7037
8307
 
7038
8308
  export {
8309
+ ErrorCode,
7039
8310
  PatterError,
7040
8311
  PatterConnectionError,
7041
8312
  AuthenticationError,
@@ -7067,6 +8338,7 @@ export {
7067
8338
  createResampler16kTo8k,
7068
8339
  createResampler8kTo16k,
7069
8340
  createResampler24kTo16k,
8341
+ createResampler24kTo8k,
7070
8342
  resample8kTo16k,
7071
8343
  resample16kTo8k,
7072
8344
  resample24kTo16k,
@@ -7081,12 +8353,14 @@ export {
7081
8353
  isTracingEnabled,
7082
8354
  startSpan,
7083
8355
  DefaultToolExecutor,
8356
+ mergeAbortSignals,
7084
8357
  OpenAILLMProvider,
7085
8358
  LLMLoop,
7086
8359
  DEFAULT_MIN_SENTENCE_LEN,
7087
8360
  SentenceChunker,
7088
8361
  PipelineHookExecutor,
7089
8362
  EventBus,
8363
+ resolveLogRoot,
7090
8364
  EmbeddedServer,
7091
8365
  TestSession
7092
8366
  };