getpatter 0.5.2 → 0.5.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,12 +1,15 @@
1
1
  import {
2
2
  getLogger
3
- } from "./chunk-FMNRCP5X.mjs";
3
+ } from "./chunk-VJVDG4V5.mjs";
4
+ import {
5
+ __require
6
+ } from "./chunk-QHHBUCMT.mjs";
4
7
 
5
8
  // src/test-mode.ts
6
9
  import { createInterface } from "readline";
7
10
 
8
11
  // src/server.ts
9
- import crypto3 from "crypto";
12
+ import crypto4 from "crypto";
10
13
  import express from "express";
11
14
  import { createServer } from "http";
12
15
  import { WebSocketServer } from "ws";
@@ -14,14 +17,24 @@ import { WebSocketServer } from "ws";
14
17
  // src/providers/openai-realtime.ts
15
18
  import WebSocket from "ws";
16
19
  var OpenAIRealtimeAdapter = class {
17
- constructor(apiKey, model = "gpt-4o-mini-realtime-preview", voice = "alloy", instructions = "", tools) {
20
+ constructor(apiKey, model = "gpt-realtime-mini", voice = "alloy", instructions = "", tools, audioFormat = "g711_ulaw", options = {}) {
18
21
  this.apiKey = apiKey;
19
22
  this.model = model;
20
23
  this.voice = voice;
21
24
  this.instructions = instructions;
22
25
  this.tools = tools;
26
+ this.audioFormat = audioFormat;
27
+ this.options = options;
23
28
  }
24
29
  ws = null;
30
+ eventCallbacks = /* @__PURE__ */ new Set();
31
+ messageListenerAttached = false;
32
+ heartbeat = null;
33
+ // Track the in-flight assistant item id so we can truncate cleanly on
34
+ // barge-in (see ``cancelResponse``) — matches the Python adapter.
35
+ currentResponseItemId = null;
36
+ currentResponseAudioMs = 0;
37
+ options;
25
38
  async connect() {
26
39
  const url = `wss://api.openai.com/v1/realtime?model=${encodeURIComponent(this.model)}`;
27
40
  this.ws = new WebSocket(url, {
@@ -45,13 +58,24 @@ var OpenAIRealtimeAdapter = class {
45
58
  if (msg.type === "session.created" && !sessionCreated) {
46
59
  sessionCreated = true;
47
60
  const config = {
48
- input_audio_format: "g711_ulaw",
49
- output_audio_format: "g711_ulaw",
61
+ input_audio_format: this.audioFormat,
62
+ output_audio_format: this.audioFormat,
50
63
  voice: this.voice,
51
64
  instructions: this.instructions || "You are a helpful voice assistant. Be concise.",
52
- turn_detection: { type: "server_vad", threshold: 0.5, prefix_padding_ms: 300, silence_duration_ms: 500 },
53
- input_audio_transcription: { model: "whisper-1" }
65
+ turn_detection: {
66
+ type: this.options.vadType ?? "server_vad",
67
+ threshold: 0.5,
68
+ prefix_padding_ms: 300,
69
+ silence_duration_ms: this.options.silenceDurationMs ?? 300
70
+ },
71
+ input_audio_transcription: { model: this.options.inputAudioTranscriptionModel ?? "whisper-1" }
54
72
  };
73
+ if (this.options.temperature !== void 0) config.temperature = this.options.temperature;
74
+ if (this.options.maxResponseOutputTokens !== void 0) {
75
+ config.max_response_output_tokens = this.options.maxResponseOutputTokens;
76
+ }
77
+ if (this.options.modalities !== void 0) config.modalities = this.options.modalities;
78
+ if (this.options.toolChoice !== void 0) config.tool_choice = this.options.toolChoice;
55
79
  if (this.tools?.length) {
56
80
  config.tools = this.tools.map((t) => ({
57
81
  type: "function",
@@ -92,19 +116,45 @@ var OpenAIRealtimeAdapter = class {
92
116
  ws.on("message", onSetupMessage);
93
117
  ws.on("error", onSetupError);
94
118
  });
119
+ this.heartbeat = setInterval(() => {
120
+ try {
121
+ this.ws?.ping();
122
+ } catch {
123
+ }
124
+ }, 2e4);
125
+ this.ensureMessageListener();
95
126
  }
96
127
  sendAudio(mulawAudio) {
97
128
  if (!this.ws || this.ws.readyState !== WebSocket.OPEN) return;
98
129
  this.ws.send(JSON.stringify({ type: "input_audio_buffer.append", audio: mulawAudio.toString("base64") }));
99
130
  }
131
+ /**
132
+ * Register a listener for parsed realtime events.
133
+ *
134
+ * Previously every call attached a new ``ws.on('message')`` handler,
135
+ * which leaked listeners across retries and multi-consumer hooks. We now
136
+ * route all traffic through a single persistent handler that fans out to
137
+ * a Set of callbacks. Use {@link offEvent} to remove one.
138
+ */
100
139
  onEvent(callback) {
101
- if (!this.ws) return;
102
- const safeInvoke = (type, data) => {
103
- void Promise.resolve(callback(type, data)).catch(
104
- (err) => getLogger().error("onEvent callback error:", err)
105
- );
140
+ this.eventCallbacks.add(callback);
141
+ this.ensureMessageListener();
142
+ }
143
+ offEvent(callback) {
144
+ this.eventCallbacks.delete(callback);
145
+ }
146
+ ensureMessageListener() {
147
+ if (this.messageListenerAttached || !this.ws) return;
148
+ this.messageListenerAttached = true;
149
+ const ws = this.ws;
150
+ const dispatch = (type, payload) => {
151
+ for (const cb of this.eventCallbacks) {
152
+ void Promise.resolve(cb(type, payload)).catch(
153
+ (err) => getLogger().error("onEvent callback error:", err)
154
+ );
155
+ }
106
156
  };
107
- this.ws.on("message", (raw) => {
157
+ ws.on("message", (raw) => {
108
158
  let data;
109
159
  try {
110
160
  data = JSON.parse(raw.toString());
@@ -114,24 +164,61 @@ var OpenAIRealtimeAdapter = class {
114
164
  }
115
165
  const t = data.type;
116
166
  if (t === "response.audio.delta") {
117
- safeInvoke("audio", Buffer.from(data.delta ?? "", "base64"));
167
+ const buf = Buffer.from(data.delta ?? "", "base64");
168
+ this.currentResponseAudioMs += estimateAudioMs(buf, this.audioFormat);
169
+ dispatch("audio", buf);
118
170
  } else if (t === "response.audio_transcript.delta") {
119
- safeInvoke("transcript_output", data.delta);
171
+ dispatch("transcript_output", data.delta);
172
+ } else if (t === "response.content_part.added" || t === "response.output_item.added") {
173
+ const itemId = data.item?.id ?? data.item_id ?? null;
174
+ if (itemId) {
175
+ this.currentResponseItemId = itemId;
176
+ this.currentResponseAudioMs = 0;
177
+ }
120
178
  } else if (t === "input_audio_buffer.speech_started") {
121
- safeInvoke("speech_started", null);
179
+ dispatch("speech_started", null);
180
+ } else if (t === "input_audio_buffer.speech_stopped") {
181
+ dispatch("speech_stopped", null);
122
182
  } else if (t === "conversation.item.input_audio_transcription.completed") {
123
- safeInvoke("transcript_input", data.transcript);
183
+ dispatch("transcript_input", data.transcript);
124
184
  } else if (t === "response.function_call_arguments.done") {
125
- safeInvoke("function_call", { call_id: data.call_id, name: data.name, arguments: data.arguments });
185
+ dispatch("function_call", { call_id: data.call_id, name: data.name, arguments: data.arguments });
126
186
  } else if (t === "response.done") {
127
- safeInvoke("response_done", data.response ?? null);
187
+ this.currentResponseItemId = null;
188
+ this.currentResponseAudioMs = 0;
189
+ dispatch("response_done", data.response ?? null);
128
190
  } else if (t === "error") {
129
- safeInvoke("error", data.error);
191
+ dispatch("error", data.error);
192
+ }
193
+ });
194
+ ws.on("close", (code, reason) => {
195
+ if (code !== 1e3) {
196
+ dispatch("error", {
197
+ type: "connection_closed",
198
+ code,
199
+ reason: reason?.toString() ?? ""
200
+ });
130
201
  }
131
202
  });
203
+ ws.on("error", (err) => {
204
+ dispatch("error", { type: "socket_error", message: err?.message ?? String(err) });
205
+ });
132
206
  }
133
207
  cancelResponse() {
134
- this.ws?.send(JSON.stringify({ type: "response.cancel" }));
208
+ if (!this.ws) return;
209
+ if (this.currentResponseItemId) {
210
+ try {
211
+ this.ws.send(JSON.stringify({
212
+ type: "conversation.item.truncate",
213
+ item_id: this.currentResponseItemId,
214
+ content_index: 0,
215
+ audio_end_ms: this.currentResponseAudioMs
216
+ }));
217
+ } catch (err) {
218
+ getLogger().debug?.(`conversation.item.truncate failed: ${String(err)}`);
219
+ }
220
+ }
221
+ this.ws.send(JSON.stringify({ type: "response.cancel" }));
135
222
  }
136
223
  async sendText(text) {
137
224
  this.ws?.send(JSON.stringify({
@@ -148,28 +235,148 @@ var OpenAIRealtimeAdapter = class {
148
235
  this.ws?.send(JSON.stringify({ type: "response.create" }));
149
236
  }
150
237
  close() {
238
+ if (this.heartbeat) {
239
+ clearInterval(this.heartbeat);
240
+ this.heartbeat = null;
241
+ }
242
+ this.eventCallbacks.clear();
243
+ this.messageListenerAttached = false;
151
244
  this.ws?.close();
152
245
  this.ws = null;
153
246
  }
154
247
  };
248
+ function estimateAudioMs(chunk, format) {
249
+ if (chunk.length === 0) return 0;
250
+ if (format === "g711_ulaw" || format === "g711_alaw") return Math.floor(chunk.length / 8);
251
+ if (format === "pcm16") {
252
+ return Math.floor(chunk.length / 48);
253
+ }
254
+ return 0;
255
+ }
155
256
 
156
257
  // src/providers/elevenlabs-convai.ts
157
258
  import WebSocket2 from "ws";
158
259
  var ELEVENLABS_CONVAI_URL = "wss://api.elevenlabs.io/v1/convai/conversation";
159
- var ElevenLabsConvAIAdapter = class {
160
- constructor(apiKey, agentId = "", voiceId = "EXAVITQu4vr4xnSDxMaL", _modelId = "eleven_turbo_v2_5", _language = "en", firstMessage = "") {
161
- this.apiKey = apiKey;
162
- this.agentId = agentId;
163
- this.voiceId = voiceId;
164
- this.firstMessage = firstMessage;
165
- }
260
+ var ELEVENLABS_SIGNED_URL = "https://api.elevenlabs.io/v1/convai/conversation/get-signed-url";
261
+ var AGENT_SILENCE_MS = 500;
262
+ var ElevenLabsConvAIAdapter = class _ElevenLabsConvAIAdapter {
166
263
  ws = null;
167
264
  eventCallback = null;
168
- async connect() {
169
- const url = this.agentId ? `${ELEVENLABS_CONVAI_URL}?agent_id=${encodeURIComponent(this.agentId)}` : ELEVENLABS_CONVAI_URL;
170
- this.ws = new WebSocket2(url, {
171
- headers: { "xi-api-key": this.apiKey }
265
+ apiKey;
266
+ agentId;
267
+ voiceId;
268
+ // Exposed for parity with Python SDK (`self.model_id`). ConvAI does not
269
+ // accept a client-side model override today, but we preserve the value so
270
+ // callers can introspect it and we can ship the override the day the
271
+ // server exposes it.
272
+ modelId;
273
+ language;
274
+ firstMessage;
275
+ // Exposed publicly so the stream handler can detect μ-law negotiation
276
+ // (``"ulaw_8000"``) and skip resampling / transcoding on the audio path.
277
+ outputAudioFormat;
278
+ inputAudioFormat;
279
+ useSignedUrl;
280
+ // Populated from `conversation_initiation_metadata`.
281
+ conversationId = null;
282
+ agentOutputAudioFormat = null;
283
+ userInputAudioFormat = null;
284
+ agentSpeaking = false;
285
+ silenceTimer = null;
286
+ closePromise = null;
287
+ constructor(apiKeyOrOptions, agentId = "", voiceId = "EXAVITQu4vr4xnSDxMaL", firstMessage = "") {
288
+ if (typeof apiKeyOrOptions === "object") {
289
+ const o = apiKeyOrOptions;
290
+ this.apiKey = o.apiKey;
291
+ this.agentId = o.agentId ?? "";
292
+ this.voiceId = o.voiceId ?? "EXAVITQu4vr4xnSDxMaL";
293
+ this.modelId = o.modelId ?? "eleven_flash_v2_5";
294
+ this.language = o.language ?? "it";
295
+ this.firstMessage = o.firstMessage ?? "";
296
+ this.outputAudioFormat = o.outputAudioFormat;
297
+ this.inputAudioFormat = o.inputAudioFormat;
298
+ this.useSignedUrl = o.useSignedUrl ?? false;
299
+ } else {
300
+ this.apiKey = apiKeyOrOptions;
301
+ this.agentId = agentId;
302
+ this.voiceId = voiceId;
303
+ this.modelId = "eleven_flash_v2_5";
304
+ this.language = "it";
305
+ this.firstMessage = firstMessage;
306
+ this.outputAudioFormat = void 0;
307
+ this.inputAudioFormat = void 0;
308
+ this.useSignedUrl = false;
309
+ }
310
+ }
311
+ // ------------------------------------------------------------------
312
+ // Telephony factories
313
+ // ------------------------------------------------------------------
314
+ /**
315
+ * Build an adapter pre-configured for Twilio Media Streams.
316
+ *
317
+ * Negotiates `ulaw_8000` for both `outputAudioFormat` and
318
+ * `inputAudioFormat`, matching Twilio's μ-law @ 8 kHz wire format. The
319
+ * SDK's stream handler detects this and skips the 8 kHz → 16 kHz inbound
320
+ * resample and the 16 kHz → 8 kHz / PCM → μ-law outbound transcode.
321
+ * Saves ~30–80 ms first-byte plus per-frame CPU on every turn.
322
+ */
323
+ static forTwilio(apiKey, agentId, options = {}) {
324
+ return new _ElevenLabsConvAIAdapter({
325
+ ...options,
326
+ apiKey,
327
+ agentId,
328
+ outputAudioFormat: "ulaw_8000",
329
+ inputAudioFormat: "ulaw_8000"
330
+ });
331
+ }
332
+ /**
333
+ * Build an adapter pre-configured for Telnyx bidirectional media.
334
+ *
335
+ * Telnyx negotiates PCMU @ 8 kHz when `streaming_start` sets
336
+ * `stream_bidirectional_codec=PCMU` (the SDK default). Picking
337
+ * `ulaw_8000` on both ConvAI directions removes every transcode on the
338
+ * audio path — same optimization as `forTwilio`.
339
+ */
340
+ static forTelnyx(apiKey, agentId, options = {}) {
341
+ return new _ElevenLabsConvAIAdapter({
342
+ ...options,
343
+ apiKey,
344
+ agentId,
345
+ outputAudioFormat: "ulaw_8000",
346
+ inputAudioFormat: "ulaw_8000"
172
347
  });
348
+ }
349
+ async fetchSignedUrl() {
350
+ if (!this.agentId) {
351
+ throw new Error("useSignedUrl=true requires agentId");
352
+ }
353
+ const url = `${ELEVENLABS_SIGNED_URL}?agent_id=${encodeURIComponent(this.agentId)}`;
354
+ const resp = await fetch(url, {
355
+ method: "GET",
356
+ headers: { "xi-api-key": this.apiKey },
357
+ signal: AbortSignal.timeout(15e3)
358
+ });
359
+ if (!resp.ok) {
360
+ const body = await resp.text();
361
+ throw new Error(`ElevenLabs signed-url error ${resp.status}: ${body}`);
362
+ }
363
+ const data = await resp.json();
364
+ if (!data.signed_url) {
365
+ throw new Error("ElevenLabs signed-url response missing 'signed_url'");
366
+ }
367
+ return data.signed_url;
368
+ }
369
+ async connect() {
370
+ let wsUrl;
371
+ let wsOptions;
372
+ if (this.useSignedUrl) {
373
+ wsUrl = await this.fetchSignedUrl();
374
+ wsOptions = void 0;
375
+ } else {
376
+ wsUrl = this.agentId ? `${ELEVENLABS_CONVAI_URL}?agent_id=${encodeURIComponent(this.agentId)}` : ELEVENLABS_CONVAI_URL;
377
+ wsOptions = { headers: { "xi-api-key": this.apiKey } };
378
+ }
379
+ this.ws = new WebSocket2(wsUrl, wsOptions);
173
380
  await new Promise((resolve, reject) => {
174
381
  const timeout = setTimeout(
175
382
  () => reject(new Error("ElevenLabs ConvAI connect timeout")),
@@ -177,17 +384,22 @@ var ElevenLabsConvAIAdapter = class {
177
384
  );
178
385
  this.ws.once("open", () => {
179
386
  clearTimeout(timeout);
387
+ const agentCfg = {};
388
+ if (this.firstMessage) agentCfg["first_message"] = this.firstMessage;
389
+ if (this.language) agentCfg["language"] = this.language;
390
+ const override = {
391
+ tts: this.outputAudioFormat ? { voice_id: this.voiceId, output_format: this.outputAudioFormat } : { voice_id: this.voiceId }
392
+ };
393
+ if (this.inputAudioFormat) {
394
+ override["asr"] = { input_format: this.inputAudioFormat };
395
+ }
396
+ if (Object.keys(agentCfg).length > 0) {
397
+ override["agent"] = agentCfg;
398
+ }
180
399
  const config = {
181
400
  type: "conversation_initiation_client_data",
182
- conversation_config_override: {
183
- tts: { voice_id: this.voiceId }
184
- }
401
+ conversation_config_override: override
185
402
  };
186
- if (this.firstMessage) {
187
- config["conversation_config_override"]["agent"] = {
188
- first_message: this.firstMessage
189
- };
190
- }
191
403
  this.ws.send(JSON.stringify(config));
192
404
  resolve();
193
405
  });
@@ -196,54 +408,176 @@ var ElevenLabsConvAIAdapter = class {
196
408
  reject(err);
197
409
  });
198
410
  });
411
+ this.ws.on("error", (err) => {
412
+ getLogger().error("ElevenLabs ConvAI WS error:", err);
413
+ this.safeInvoke("error", err instanceof Error ? err.message : String(err));
414
+ });
415
+ this.ws.on("close", (code, reason) => {
416
+ this.clearSilenceTimer();
417
+ this.safeInvoke("close", {
418
+ code,
419
+ reason: reason?.toString() ?? ""
420
+ });
421
+ });
199
422
  this.ws.on("message", (raw) => {
200
- const cb = this.eventCallback;
201
- if (!cb) return;
202
- const safeInvoke = (type, data) => {
203
- void Promise.resolve(cb(type, data)).catch(
204
- (err) => getLogger().error("onEvent callback error:", err)
205
- );
206
- };
207
423
  let parsed;
208
424
  try {
209
425
  parsed = JSON.parse(raw.toString());
210
426
  } catch {
211
427
  return;
212
428
  }
213
- const msgType = parsed["type"];
214
- if (msgType === "audio") {
215
- const audioB64 = parsed["audio"];
216
- if (audioB64) {
217
- safeInvoke("audio", Buffer.from(audioB64, "base64"));
218
- }
219
- } else if (msgType === "user_transcript") {
220
- safeInvoke("transcript_input", parsed["text"] ?? "");
221
- } else if (msgType === "agent_response") {
222
- safeInvoke("transcript_output", parsed["text"] ?? "");
223
- safeInvoke("response_done", null);
224
- } else if (msgType === "interruption") {
225
- safeInvoke("interruption", null);
226
- } else if (msgType === "error") {
227
- safeInvoke("error", parsed);
228
- }
429
+ this.handleMessage(parsed);
229
430
  });
230
431
  }
432
+ safeInvoke(type, data) {
433
+ const cb = this.eventCallback;
434
+ if (!cb) return;
435
+ void Promise.resolve(cb(type, data)).catch(
436
+ (err) => getLogger().error("onEvent callback error:", err)
437
+ );
438
+ }
439
+ respondToPing(eventId, delayMs) {
440
+ const send = () => {
441
+ if (!this.ws || this.ws.readyState !== WebSocket2.OPEN) return;
442
+ try {
443
+ this.ws.send(JSON.stringify({ type: "pong", event_id: eventId }));
444
+ } catch (err) {
445
+ getLogger().warn("ElevenLabs ConvAI pong send failed:", err);
446
+ }
447
+ };
448
+ if (delayMs && delayMs > 0) {
449
+ setTimeout(send, delayMs);
450
+ } else {
451
+ send();
452
+ }
453
+ }
454
+ clearSilenceTimer() {
455
+ if (this.silenceTimer) {
456
+ clearTimeout(this.silenceTimer);
457
+ this.silenceTimer = null;
458
+ }
459
+ }
460
+ finalizeAgentTurn() {
461
+ this.clearSilenceTimer();
462
+ if (this.agentSpeaking) {
463
+ this.agentSpeaking = false;
464
+ this.safeInvoke("response_done", null);
465
+ }
466
+ }
467
+ scheduleSilenceDone() {
468
+ this.clearSilenceTimer();
469
+ this.silenceTimer = setTimeout(() => {
470
+ if (this.agentSpeaking) {
471
+ this.agentSpeaking = false;
472
+ this.safeInvoke("response_done", null);
473
+ }
474
+ }, AGENT_SILENCE_MS);
475
+ }
476
+ handleMessage(parsed) {
477
+ const msgType = parsed["type"];
478
+ if (msgType === "ping") {
479
+ const pingPayload = parsed["ping_event"] ?? parsed["ping"] ?? {};
480
+ const eventId = pingPayload["event_id"] ?? parsed["event_id"];
481
+ const pingMs = pingPayload["ping_ms"] ?? 0;
482
+ this.respondToPing(eventId, pingMs);
483
+ return;
484
+ }
485
+ if (msgType === "conversation_initiation_metadata") {
486
+ const meta = parsed["conversation_initiation_metadata_event"] ?? parsed;
487
+ this.conversationId = meta["conversation_id"] ?? this.conversationId;
488
+ this.agentOutputAudioFormat = meta["agent_output_audio_format"] ?? this.agentOutputAudioFormat;
489
+ this.userInputAudioFormat = meta["user_input_audio_format"] ?? this.userInputAudioFormat;
490
+ this.finalizeAgentTurn();
491
+ return;
492
+ }
493
+ if (msgType === "audio") {
494
+ const audioEvt = parsed["audio_event"];
495
+ let audioB64;
496
+ if (audioEvt) {
497
+ audioB64 = audioEvt["audio_base_64"] ?? audioEvt["audio"];
498
+ }
499
+ if (!audioB64) {
500
+ audioB64 = parsed["audio"];
501
+ }
502
+ if (audioB64) {
503
+ this.agentSpeaking = true;
504
+ this.safeInvoke("audio", Buffer.from(audioB64, "base64"));
505
+ this.scheduleSilenceDone();
506
+ }
507
+ return;
508
+ }
509
+ if (msgType === "user_transcript") {
510
+ const evt = parsed["user_transcription_event"] ?? parsed;
511
+ const text = evt["user_transcript"] ?? evt["text"] ?? "";
512
+ this.finalizeAgentTurn();
513
+ this.safeInvoke("transcript_input", text);
514
+ return;
515
+ }
516
+ if (msgType === "agent_response") {
517
+ const evt = parsed["agent_response_event"] ?? parsed;
518
+ const text = evt["agent_response"] ?? evt["text"] ?? "";
519
+ this.safeInvoke("transcript_output", text);
520
+ this.agentSpeaking = true;
521
+ this.safeInvoke("response_start", { text });
522
+ return;
523
+ }
524
+ if (msgType === "interruption") {
525
+ this.finalizeAgentTurn();
526
+ this.safeInvoke("interruption", null);
527
+ return;
528
+ }
529
+ if (msgType === "error") {
530
+ const errText = parsed["message"] ?? parsed["error"] ?? JSON.stringify(parsed);
531
+ getLogger().error("ElevenLabs ConvAI error:", errText);
532
+ this.safeInvoke("error", errText);
533
+ return;
534
+ }
535
+ }
231
536
  sendAudio(audioBytes) {
232
537
  if (!this.ws || this.ws.readyState !== WebSocket2.OPEN) return;
233
538
  this.ws.send(
234
539
  JSON.stringify({
235
- type: "audio",
236
- audio: audioBytes.toString("base64")
540
+ user_audio_chunk: audioBytes.toString("base64")
237
541
  })
238
542
  );
239
543
  }
240
544
  onEvent(callback) {
241
545
  this.eventCallback = callback;
242
546
  }
243
- close() {
244
- this.ws?.close();
245
- this.ws = null;
246
- this.eventCallback = null;
547
+ async close() {
548
+ this.clearSilenceTimer();
549
+ if (!this.ws) {
550
+ this.eventCallback = null;
551
+ return;
552
+ }
553
+ if (this.closePromise) {
554
+ await this.closePromise;
555
+ return;
556
+ }
557
+ const ws = this.ws;
558
+ this.closePromise = new Promise((resolve) => {
559
+ if (ws.readyState === WebSocket2.CLOSED || ws.readyState === WebSocket2.CLOSING) {
560
+ resolve();
561
+ return;
562
+ }
563
+ const done = () => {
564
+ resolve();
565
+ };
566
+ ws.once("close", done);
567
+ ws.once("error", done);
568
+ try {
569
+ ws.close();
570
+ } catch {
571
+ resolve();
572
+ }
573
+ });
574
+ try {
575
+ await this.closePromise;
576
+ } finally {
577
+ this.ws = null;
578
+ this.eventCallback = null;
579
+ this.closePromise = null;
580
+ }
247
581
  }
248
582
  };
249
583
 
@@ -258,21 +592,57 @@ async function createTTS(agent) {
258
592
  // src/pricing.ts
259
593
  var DEFAULT_PRICING = {
260
594
  // STT — per minute of audio processed
261
- deepgram: { unit: "minute", price: 43e-4 },
595
+ // Deepgram Nova-3 streaming (monolingual) the default model Patter ships.
596
+ // The previous $0.0043/min was the batch rate; streaming is $0.0077/min per
597
+ // deepgram.com/pricing. For multilingual Nova-3 ($0.0092/min) override.
598
+ deepgram: { unit: "minute", price: 77e-4 },
262
599
  whisper: { unit: "minute", price: 6e-3 },
263
- // TTSper 1,000 characters synthesized
264
- elevenlabs: { unit: "1k_chars", price: 0.18 },
600
+ // AssemblyAI Universal-Streaming $0.15/hr = $0.0025/min
601
+ assemblyai: { unit: "minute", price: 25e-4 },
602
+ // Cartesia ink-whisper streaming STT — ~$0.15/hr on usage plans
603
+ cartesia_stt: { unit: "minute", price: 25e-4 },
604
+ // Soniox real-time STT — $0.12/hr = $0.002/min
605
+ soniox: { unit: "minute", price: 2e-3 },
606
+ // Speechmatics Pro tier — $0.24/hr = $0.0040/min (new users land here).
607
+ // Previous $0.0173 default reflected a legacy Standard tier that was
608
+ // retired; users were being over-billed ~4.3x.
609
+ speechmatics: { unit: "minute", price: 4e-3 },
610
+ // TTS — per 1,000 characters synthesized.
611
+ // ElevenLabs default model is eleven_flash_v2_5 billed at $0.06/1k via the
612
+ // direct API. The previous $0.18 matched only the Creator plan overage.
613
+ elevenlabs: { unit: "1k_chars", price: 0.06 },
265
614
  openai_tts: { unit: "1k_chars", price: 0.015 },
266
- // OpenAI Realtime per token
615
+ openai_tts_hd: { unit: "1k_chars", price: 0.03 },
616
+ // Cartesia Sonic TTS — ~1 credit/char, effective $0.030/1k chars on usage plans
617
+ cartesia_tts: { unit: "1k_chars", price: 0.03 },
618
+ // Rime mist v2 — $0.030/1k chars pay-as-you-go
619
+ rime: { unit: "1k_chars", price: 0.03 },
620
+ // LMNT aurora/blizzard — $0.050/1k chars Indie overage
621
+ lmnt: { unit: "1k_chars", price: 0.05 },
622
+ // OpenAI Realtime — per token.
623
+ // Calibrated for gpt-4o-mini-realtime-preview (the Patter default):
624
+ // audio input $10 / M -> 0.00001 per token
625
+ // audio output $20 / M -> 0.00002 per token
626
+ // text input $0.60/ M -> 0.0000006 per token
627
+ // text output $2.40/ M -> 0.0000024 per token
628
+ // For gpt-4o-realtime-preview multiply by ~10, for gpt-realtime by ~3.
267
629
  openai_realtime: {
268
630
  unit: "token",
269
- audio_input_per_token: 1e-4,
270
- audio_output_per_token: 4e-4,
271
- text_input_per_token: 5e-6,
272
- text_output_per_token: 2e-5
631
+ audio_input_per_token: 1e-5,
632
+ audio_output_per_token: 2e-5,
633
+ text_input_per_token: 6e-7,
634
+ text_output_per_token: 24e-7,
635
+ // Prompt caching rates (official): audio cached $0.30/M ~= 3% of full,
636
+ // text cached $0.06/M = 10% of full. OpenAI bills the cached portion of
637
+ // input_token_details.audio_tokens / text_tokens at these reduced rates.
638
+ cached_audio_input_per_token: 3e-7,
639
+ cached_text_input_per_token: 6e-8
273
640
  },
274
- // Telephony — per minute of call duration
275
- twilio: { unit: "minute", price: 0.013 },
641
+ // Telephony — per minute of call duration.
642
+ // twilio default = US inbound local (the 99% case for voice agents receiving
643
+ // calls on a local number). For US toll-free inbound ($0.022/min) or US
644
+ // outbound local ($0.0140/min), override via Patter({ pricing: { twilio: {...} } }).
645
+ twilio: { unit: "minute", price: 85e-4 },
276
646
  telnyx: { unit: "minute", price: 7e-3 }
277
647
  };
278
648
  function mergePricing(overrides) {
@@ -281,22 +651,22 @@ function mergePricing(overrides) {
281
651
  merged[k] = { ...v };
282
652
  }
283
653
  if (!overrides) return merged;
284
- for (const [provider, values] of Object.entries(overrides)) {
285
- if (merged[provider]) {
286
- merged[provider] = { ...merged[provider], ...values };
654
+ for (const [provider2, values] of Object.entries(overrides)) {
655
+ if (merged[provider2]) {
656
+ merged[provider2] = { ...merged[provider2], ...values };
287
657
  } else {
288
- merged[provider] = { unit: "minute", ...values };
658
+ merged[provider2] = { ...values };
289
659
  }
290
660
  }
291
661
  return merged;
292
662
  }
293
- function calculateSttCost(provider, audioSeconds, pricing) {
294
- const config = pricing[provider];
663
+ function calculateSttCost(provider2, audioSeconds, pricing) {
664
+ const config = pricing[provider2];
295
665
  if (!config || config.unit !== "minute") return 0;
296
666
  return audioSeconds / 60 * (config.price ?? 0);
297
667
  }
298
- function calculateTtsCost(provider, characterCount, pricing) {
299
- const config = pricing[provider];
668
+ function calculateTtsCost(provider2, characterCount, pricing) {
669
+ const config = pricing[provider2];
300
670
  if (!config || config.unit !== "1k_chars") return 0;
301
671
  return characterCount / 1e3 * (config.price ?? 0);
302
672
  }
@@ -305,21 +675,126 @@ function calculateRealtimeCost(usage, pricing) {
305
675
  if (!config || config.unit !== "token") return 0;
306
676
  const input = usage.input_token_details ?? {};
307
677
  const output = usage.output_token_details ?? {};
678
+ const cachedAudioRate = config.cached_audio_input_per_token ?? config.audio_input_per_token ?? 0;
679
+ const cachedTextRate = config.cached_text_input_per_token ?? config.text_input_per_token ?? 0;
680
+ const totalAudioIn = input.audio_tokens ?? 0;
681
+ const totalTextIn = input.text_tokens ?? 0;
682
+ let cachedAudioIn;
683
+ let cachedTextIn;
684
+ const details = input.cached_tokens_details;
685
+ if (details && (details.audio_tokens !== void 0 || details.text_tokens !== void 0)) {
686
+ cachedAudioIn = Math.min(details.audio_tokens ?? 0, totalAudioIn);
687
+ cachedTextIn = Math.min(details.text_tokens ?? 0, totalTextIn);
688
+ } else if (input.cached_tokens && input.cached_tokens > 0) {
689
+ const totalIn = totalAudioIn + totalTextIn;
690
+ const ratio = totalIn > 0 ? input.cached_tokens / totalIn : 0;
691
+ cachedAudioIn = Math.min(Math.round(totalAudioIn * ratio), totalAudioIn);
692
+ cachedTextIn = Math.min(Math.round(totalTextIn * ratio), totalTextIn);
693
+ } else {
694
+ cachedAudioIn = 0;
695
+ cachedTextIn = 0;
696
+ }
308
697
  let cost = 0;
309
- cost += (input.audio_tokens ?? 0) * (config.audio_input_per_token ?? 0);
310
- cost += (input.text_tokens ?? 0) * (config.text_input_per_token ?? 0);
698
+ cost += (totalAudioIn - cachedAudioIn) * (config.audio_input_per_token ?? 0);
699
+ cost += cachedAudioIn * cachedAudioRate;
700
+ cost += (totalTextIn - cachedTextIn) * (config.text_input_per_token ?? 0);
701
+ cost += cachedTextIn * cachedTextRate;
311
702
  cost += (output.audio_tokens ?? 0) * (config.audio_output_per_token ?? 0);
312
703
  cost += (output.text_tokens ?? 0) * (config.text_output_per_token ?? 0);
313
- return cost;
704
+ return Math.max(0, cost);
705
+ }
706
+ function calculateRealtimeCachedSavings(usage, pricing) {
707
+ const config = pricing.openai_realtime;
708
+ if (!config || config.unit !== "token") return 0;
709
+ const input = usage.input_token_details ?? {};
710
+ const cached = input.cached_tokens_details ?? {};
711
+ const cachedAudioRate = config.cached_audio_input_per_token ?? config.audio_input_per_token ?? 0;
712
+ const cachedTextRate = config.cached_text_input_per_token ?? config.text_input_per_token ?? 0;
713
+ const cachedAudio = Math.min(cached.audio_tokens ?? 0, input.audio_tokens ?? 0);
714
+ const cachedText = Math.min(cached.text_tokens ?? 0, input.text_tokens ?? 0);
715
+ const fullAudio = cachedAudio * (config.audio_input_per_token ?? 0);
716
+ const fullText = cachedText * (config.text_input_per_token ?? 0);
717
+ const discountedAudio = cachedAudio * cachedAudioRate;
718
+ const discountedText = cachedText * cachedTextRate;
719
+ return Math.max(0, fullAudio + fullText - (discountedAudio + discountedText));
720
+ }
721
+ var llmPricing = {
722
+ anthropic: {
723
+ "claude-opus-4-7": {
724
+ input: 15,
725
+ output: 75,
726
+ cache_read: 1.5,
727
+ cache_write: 18.75
728
+ },
729
+ "claude-sonnet-4-6": {
730
+ input: 3,
731
+ output: 15,
732
+ cache_read: 0.3,
733
+ cache_write: 3.75
734
+ },
735
+ "claude-haiku-4-5": {
736
+ input: 1,
737
+ output: 5,
738
+ cache_read: 0.1,
739
+ cache_write: 1.25
740
+ }
741
+ },
742
+ google: {
743
+ "gemini-2.5-pro": { input: 1.25, output: 10 },
744
+ "gemini-2.5-flash": { input: 0.3, output: 2.5 },
745
+ "gemini-live-2.5-flash-native-audio": { input: 0.3, output: 2.5 }
746
+ },
747
+ groq: {
748
+ "llama-3.3-70b-versatile": { input: 0.59, output: 0.79 },
749
+ "llama-3.1-8b-instant": { input: 0.05, output: 0.08 }
750
+ },
751
+ cerebras: {
752
+ "llama-3.3-70b": { input: 0.85, output: 1.2 },
753
+ "qwen-3-32b": { input: 0.4, output: 0.8 }
754
+ },
755
+ // OpenAI Chat Completions (non-Realtime) — mirrors sdk-py pricing table.
756
+ // Rates are per 1M tokens (USD), cache_read = cached input rate.
757
+ openai: {
758
+ "gpt-4o": { input: 2.5, output: 10, cache_read: 1.25 },
759
+ "gpt-4o-mini": { input: 0.15, output: 0.6, cache_read: 0.075 },
760
+ "gpt-4.1": { input: 3, output: 12, cache_read: 0.75 },
761
+ "gpt-4.1-mini": { input: 0.8, output: 3.2, cache_read: 0.2 },
762
+ "o3": { input: 2, output: 8, cache_read: 0.5 },
763
+ "o4-mini": { input: 1.1, output: 4.4, cache_read: 0.275 }
764
+ }
765
+ };
766
+ function calculateLlmCost(provider2, model, inputTokens, outputTokens, cacheReadTokens = 0, cacheWriteTokens = 0) {
767
+ const providerTable = llmPricing[provider2];
768
+ if (!providerTable) return 0;
769
+ let rates = providerTable[model];
770
+ if (!rates) {
771
+ let bestKey = "";
772
+ for (const key of Object.keys(providerTable)) {
773
+ if (model.startsWith(key) && key.length > bestKey.length) {
774
+ bestKey = key;
775
+ }
776
+ }
777
+ if (bestKey) rates = providerTable[bestKey];
778
+ }
779
+ if (!rates) return 0;
780
+ let cost = 0;
781
+ cost += inputTokens / 1e6 * (rates.input ?? 0);
782
+ cost += outputTokens / 1e6 * (rates.output ?? 0);
783
+ cost += cacheReadTokens / 1e6 * (rates.cache_read ?? 0);
784
+ cost += cacheWriteTokens / 1e6 * (rates.cache_write ?? 0);
785
+ return Math.max(0, cost);
314
786
  }
315
- function calculateTelephonyCost(provider, durationSeconds, pricing) {
316
- const config = pricing[provider];
787
+ function calculateTelephonyCost(provider2, durationSeconds, pricing) {
788
+ const config = pricing[provider2];
317
789
  if (!config || config.unit !== "minute") return 0;
318
- return durationSeconds / 60 * (config.price ?? 0);
790
+ const minutes = provider2 === "twilio" ? Math.ceil(durationSeconds / 60) : durationSeconds / 60;
791
+ return minutes * (config.price ?? 0);
319
792
  }
320
793
 
321
794
  // src/dashboard/store.ts
322
795
  import { EventEmitter } from "events";
796
+ import * as fs from "fs";
797
+ import * as path from "path";
323
798
  var MetricsStore = class extends EventEmitter {
324
799
  maxCalls;
325
800
  calls = [];
@@ -482,6 +957,10 @@ var MetricsStore = class extends EventEmitter {
482
957
  }
483
958
  return null;
484
959
  }
960
+ /** Look up an active call by id (returns undefined if not active or unknown). */
961
+ getActive(callId) {
962
+ return this.activeCalls.get(callId);
963
+ }
485
964
  getActiveCalls() {
486
965
  return Array.from(this.activeCalls.values());
487
966
  }
@@ -547,7 +1026,102 @@ var MetricsStore = class extends EventEmitter {
547
1026
  get callCount() {
548
1027
  return this.calls.length;
549
1028
  }
1029
+ /**
1030
+ * Rebuild the in-memory call list from `metadata.json` files written by
1031
+ * `CallLogger` under `<logRoot>/calls/YYYY/MM/DD/<call_id>/`. Idempotent:
1032
+ * call_ids already in the store are skipped. Errors per file are logged
1033
+ * and swallowed so a single corrupt entry doesn't block hydration.
1034
+ *
1035
+ * Returns the number of calls newly added to the store.
1036
+ *
1037
+ * Safe to call before any traffic; intended to run once at server startup.
1038
+ */
1039
+ hydrate(logRoot) {
1040
+ if (!logRoot) return 0;
1041
+ const callsRoot = path.join(logRoot, "calls");
1042
+ if (!fs.existsSync(callsRoot)) return 0;
1043
+ const collected = [];
1044
+ const seen = new Set(this.calls.map((c) => c.call_id));
1045
+ const walk = (dir, depth) => {
1046
+ let entries;
1047
+ try {
1048
+ entries = fs.readdirSync(dir, { withFileTypes: true });
1049
+ } catch {
1050
+ return;
1051
+ }
1052
+ for (const entry of entries) {
1053
+ const childPath = path.join(dir, entry.name);
1054
+ if (depth < 3) {
1055
+ if (entry.isDirectory() && /^\d+$/.test(entry.name)) {
1056
+ walk(childPath, depth + 1);
1057
+ }
1058
+ continue;
1059
+ }
1060
+ if (!entry.isDirectory()) continue;
1061
+ const metadataPath = path.join(childPath, "metadata.json");
1062
+ if (!fs.existsSync(metadataPath)) continue;
1063
+ try {
1064
+ const raw = fs.readFileSync(metadataPath, "utf8");
1065
+ const meta = JSON.parse(raw);
1066
+ const callId = meta.call_id || entry.name;
1067
+ if (!callId || seen.has(callId)) continue;
1068
+ const record = metadataToCallRecord(callId, meta);
1069
+ if (record === null) {
1070
+ getLogger().debug(
1071
+ `MetricsStore.hydrate: skipping ${metadataPath}: unparseable started_at`
1072
+ );
1073
+ continue;
1074
+ }
1075
+ collected.push(record);
1076
+ seen.add(callId);
1077
+ } catch (err) {
1078
+ getLogger().debug(
1079
+ `MetricsStore.hydrate: skipping ${metadataPath}: ${String(err)}`
1080
+ );
1081
+ }
1082
+ }
1083
+ };
1084
+ walk(callsRoot, 0);
1085
+ collected.sort((a, b) => (a.started_at || 0) - (b.started_at || 0));
1086
+ for (const rec of collected) {
1087
+ if (this.calls.some((c) => c.call_id === rec.call_id)) continue;
1088
+ this.calls.push(rec);
1089
+ if (this.calls.length > this.maxCalls) {
1090
+ this.calls = this.calls.slice(-this.maxCalls);
1091
+ }
1092
+ }
1093
+ return collected.length;
1094
+ }
550
1095
  };
1096
+ function metadataToCallRecord(callId, meta) {
1097
+ const startedAt = parseTimestamp(meta.started_at);
1098
+ if (startedAt === null) return null;
1099
+ const endedAt = parseTimestamp(meta.ended_at);
1100
+ const status = meta.status || "completed";
1101
+ const metrics = meta.metrics && typeof meta.metrics === "object" ? meta.metrics : null;
1102
+ const transcript = Array.isArray(meta.transcript) ? meta.transcript : [];
1103
+ return {
1104
+ call_id: callId,
1105
+ caller: meta.caller || "",
1106
+ callee: meta.callee || "",
1107
+ direction: meta.direction || "inbound",
1108
+ started_at: startedAt,
1109
+ ended_at: endedAt ?? void 0,
1110
+ status,
1111
+ metrics,
1112
+ transcript
1113
+ };
1114
+ }
1115
+ function parseTimestamp(raw) {
1116
+ if (typeof raw === "number") {
1117
+ return Number.isFinite(raw) ? raw : null;
1118
+ }
1119
+ if (typeof raw === "string") {
1120
+ const ms = Date.parse(raw);
1121
+ return Number.isFinite(ms) ? ms / 1e3 : null;
1122
+ }
1123
+ return null;
1124
+ }
551
1125
 
552
1126
  // src/dashboard/auth.ts
553
1127
  import crypto from "crypto";
@@ -1628,10 +2202,52 @@ function isWebSocketUrl(url) {
1628
2202
 
1629
2203
  // src/providers/deepgram-stt.ts
1630
2204
  import WebSocket3 from "ws";
2205
+
2206
+ // src/errors.ts
2207
+ var PatterError = class extends Error {
2208
+ constructor(message) {
2209
+ super(message);
2210
+ this.name = "PatterError";
2211
+ }
2212
+ };
2213
+ var PatterConnectionError = class extends PatterError {
2214
+ constructor(message) {
2215
+ super(message);
2216
+ this.name = "PatterConnectionError";
2217
+ }
2218
+ };
2219
+ var AuthenticationError = class extends PatterError {
2220
+ constructor(message) {
2221
+ super(message);
2222
+ this.name = "AuthenticationError";
2223
+ }
2224
+ };
2225
+ var ProvisionError = class extends PatterError {
2226
+ constructor(message) {
2227
+ super(message);
2228
+ this.name = "ProvisionError";
2229
+ }
2230
+ };
2231
+ var RateLimitError = class extends PatterConnectionError {
2232
+ constructor(message) {
2233
+ super(message);
2234
+ this.name = "RateLimitError";
2235
+ }
2236
+ };
2237
+
2238
+ // src/providers/deepgram-stt.ts
1631
2239
  var DEEPGRAM_WS_URL = "wss://api.deepgram.com/v1/listen";
2240
+ var KEEPALIVE_INTERVAL_MS = 4e3;
2241
+ var FINALIZE_DRAIN_MS = 100;
2242
+ var CLOSE_LATENCY_BUDGET_MS = 500;
2243
+ var RECONNECT_CLOSE_CODES = /* @__PURE__ */ new Set([1006, 1011]);
1632
2244
  var DeepgramSTT = class _DeepgramSTT {
1633
2245
  ws = null;
1634
- callbacks = [];
2246
+ transcriptCallbacks = /* @__PURE__ */ new Set();
2247
+ errorCallbacks = /* @__PURE__ */ new Set();
2248
+ keepaliveTimer = null;
2249
+ running = false;
2250
+ reconnectAttempted = false;
1635
2251
  /** Request ID from Deepgram — used to query actual cost post-call. */
1636
2252
  requestId = "";
1637
2253
  apiKey;
@@ -1653,7 +2269,7 @@ var DeepgramSTT = class _DeepgramSTT {
1653
2269
  this.sampleRate = sampleRate ?? opts.sampleRate ?? 16e3;
1654
2270
  this.endpointingMs = opts.endpointingMs ?? 150;
1655
2271
  this.utteranceEndMs = opts.utteranceEndMs === null ? null : opts.utteranceEndMs ?? 1e3;
1656
- this.smartFormat = opts.smartFormat ?? true;
2272
+ this.smartFormat = opts.smartFormat ?? false;
1657
2273
  this.interimResults = opts.interimResults ?? true;
1658
2274
  this.vadEvents = opts.vadEvents ?? true;
1659
2275
  }
@@ -1661,7 +2277,7 @@ var DeepgramSTT = class _DeepgramSTT {
1661
2277
  static forTwilio(apiKey, language = "en", model = "nova-3", options = {}) {
1662
2278
  return new _DeepgramSTT(apiKey, language, model, "mulaw", 8e3, options);
1663
2279
  }
1664
- async connect() {
2280
+ buildUrl() {
1665
2281
  const params = new URLSearchParams({
1666
2282
  model: this.model,
1667
2283
  language: this.language,
@@ -1677,72 +2293,204 @@ var DeepgramSTT = class _DeepgramSTT {
1677
2293
  if (this.utteranceEndMs !== null) {
1678
2294
  params.set("utterance_end_ms", String(Math.max(this.utteranceEndMs, 1e3)));
1679
2295
  }
1680
- const url = `${DEEPGRAM_WS_URL}?${params.toString()}`;
1681
- this.ws = new WebSocket3(url, {
2296
+ return `${DEEPGRAM_WS_URL}?${params.toString()}`;
2297
+ }
2298
+ async connect() {
2299
+ await this.openSocket();
2300
+ this.running = true;
2301
+ this.reconnectAttempted = false;
2302
+ }
2303
+ async openSocket() {
2304
+ const url = this.buildUrl();
2305
+ const ws = new WebSocket3(url, {
1682
2306
  headers: { Authorization: `Token ${this.apiKey}` }
1683
2307
  });
2308
+ this.ws = ws;
1684
2309
  await new Promise((resolve, reject) => {
1685
- const timer = setTimeout(() => reject(new Error("Deepgram connect timeout")), 1e4);
1686
- this.ws.once("open", () => {
1687
- clearTimeout(timer);
1688
- resolve();
1689
- });
1690
- this.ws.once("error", (err) => {
2310
+ let settled = false;
2311
+ const settle = (fn) => {
2312
+ if (settled) return;
2313
+ settled = true;
1691
2314
  clearTimeout(timer);
1692
- reject(err);
2315
+ fn();
2316
+ };
2317
+ const timer = setTimeout(
2318
+ () => settle(() => reject(new PatterConnectionError("Deepgram connect timeout"))),
2319
+ 1e4
2320
+ );
2321
+ ws.once("open", () => settle(resolve));
2322
+ ws.once("error", (err) => settle(() => reject(err)));
2323
+ ws.once("unexpected-response", (_req, res) => {
2324
+ const status = res?.statusCode ?? 0;
2325
+ settle(() => {
2326
+ if (status === 401 || status === 403) {
2327
+ reject(new AuthenticationError(`Deepgram rejected the API key (HTTP ${status}).`));
2328
+ return;
2329
+ }
2330
+ if (status === 429) {
2331
+ reject(new RateLimitError("Deepgram rate limit exceeded (HTTP 429)."));
2332
+ return;
2333
+ }
2334
+ reject(new PatterConnectionError(`Deepgram WebSocket upgrade failed (HTTP ${status}).`));
2335
+ });
1693
2336
  });
1694
2337
  });
1695
- this.ws.on("message", (raw) => {
1696
- let data;
1697
- try {
1698
- data = JSON.parse(raw.toString());
1699
- } catch {
1700
- return;
1701
- }
1702
- if (data.type === "Metadata" && data.request_id) {
1703
- this.requestId = data.request_id;
1704
- return;
2338
+ ws.on("message", (raw) => this.handleMessage(raw.toString()));
2339
+ ws.on("close", (code, reason) => this.handleClose(code, reason.toString()));
2340
+ ws.on("error", (err) => this.handleError(err));
2341
+ this.keepaliveTimer = setInterval(() => {
2342
+ if (this.ws && this.ws.readyState === WebSocket3.OPEN) {
2343
+ try {
2344
+ this.ws.send(JSON.stringify({ type: "KeepAlive" }));
2345
+ } catch {
2346
+ }
1705
2347
  }
1706
- if (data.type !== "Results") return;
1707
- const alternatives = data.channel?.alternatives ?? [];
1708
- if (!alternatives.length) return;
1709
- const best = alternatives[0];
1710
- const text = (best.transcript ?? "").trim();
1711
- if (!text) return;
1712
- const transcript = {
1713
- text,
1714
- isFinal: Boolean(data.is_final) || Boolean(data.speech_final),
1715
- confidence: best.confidence ?? 0
1716
- };
1717
- for (const cb of this.callbacks) {
2348
+ }, KEEPALIVE_INTERVAL_MS);
2349
+ }
2350
+ clearKeepalive() {
2351
+ if (this.keepaliveTimer) {
2352
+ clearInterval(this.keepaliveTimer);
2353
+ this.keepaliveTimer = null;
2354
+ }
2355
+ }
2356
+ handleMessage(raw) {
2357
+ let data;
2358
+ try {
2359
+ data = JSON.parse(raw);
2360
+ } catch {
2361
+ return;
2362
+ }
2363
+ if (data.type === "Metadata" && data.request_id) {
2364
+ this.requestId = data.request_id;
2365
+ return;
2366
+ }
2367
+ if (data.type === "SpeechStarted") {
2368
+ this.emitTranscript({
2369
+ text: "",
2370
+ isFinal: false,
2371
+ confidence: 0,
2372
+ eventType: "SpeechStarted",
2373
+ requestId: this.requestId || void 0
2374
+ });
2375
+ return;
2376
+ }
2377
+ if (data.type === "UtteranceEnd") {
2378
+ this.emitTranscript({
2379
+ text: "",
2380
+ isFinal: true,
2381
+ confidence: 0,
2382
+ eventType: "UtteranceEnd",
2383
+ requestId: this.requestId || void 0
2384
+ });
2385
+ return;
2386
+ }
2387
+ if (data.type !== "Results") return;
2388
+ const alternatives = data.channel?.alternatives ?? [];
2389
+ if (!alternatives.length) return;
2390
+ const best = alternatives[0];
2391
+ const text = (best.transcript ?? "").trim();
2392
+ if (!text) return;
2393
+ const speechFinal = Boolean(data.speech_final);
2394
+ const transcript = {
2395
+ text,
2396
+ isFinal: Boolean(data.is_final) || speechFinal,
2397
+ confidence: best.confidence ?? 0,
2398
+ speechFinal,
2399
+ fromFinalize: Boolean(data.from_finalize),
2400
+ requestId: this.requestId || void 0,
2401
+ words: best.words,
2402
+ eventType: "Results"
2403
+ };
2404
+ this.emitTranscript(transcript);
2405
+ }
2406
+ emitTranscript(transcript) {
2407
+ for (const cb of this.transcriptCallbacks) {
2408
+ try {
1718
2409
  cb(transcript);
2410
+ } catch (err) {
2411
+ getLogger().error(`DeepgramSTT transcript callback threw: ${String(err)}`);
1719
2412
  }
1720
- });
2413
+ }
2414
+ }
2415
+ emitError(err) {
2416
+ for (const cb of this.errorCallbacks) {
2417
+ try {
2418
+ cb(err);
2419
+ } catch (cbErr) {
2420
+ getLogger().error(`DeepgramSTT error callback threw: ${String(cbErr)}`);
2421
+ }
2422
+ }
2423
+ }
2424
+ handleError(err) {
2425
+ getLogger().error(`DeepgramSTT WebSocket error: ${err.message}`);
2426
+ this.emitError(err);
2427
+ }
2428
+ handleClose(code, reason) {
2429
+ this.clearKeepalive();
2430
+ if (!this.running) {
2431
+ return;
2432
+ }
2433
+ const closeError = new PatterConnectionError(
2434
+ `Deepgram WebSocket closed (code=${code}${reason ? `, reason=${reason}` : ""}).`
2435
+ );
2436
+ this.emitError(closeError);
2437
+ if (RECONNECT_CLOSE_CODES.has(code) && !this.reconnectAttempted) {
2438
+ this.reconnectAttempted = true;
2439
+ this.openSocket().catch((err) => {
2440
+ this.running = false;
2441
+ this.emitError(err instanceof Error ? err : new Error(String(err)));
2442
+ });
2443
+ } else {
2444
+ this.running = false;
2445
+ }
1721
2446
  }
1722
2447
  sendAudio(audio) {
1723
2448
  if (!this.ws || this.ws.readyState !== WebSocket3.OPEN) return;
2449
+ if (audio.length === 0) return;
1724
2450
  this.ws.send(audio);
1725
2451
  }
1726
2452
  onTranscript(callback) {
1727
- if (this.callbacks.length >= 10) {
1728
- getLogger().warn("DeepgramSTT: maximum of 10 onTranscript callbacks reached; replacing the last callback.");
1729
- this.callbacks[this.callbacks.length - 1] = callback;
1730
- return;
1731
- }
1732
- this.callbacks.push(callback);
2453
+ this.transcriptCallbacks.add(callback);
1733
2454
  }
1734
- close() {
1735
- if (this.ws) {
1736
- try {
1737
- this.ws.send(JSON.stringify({ type: "CloseStream" }));
1738
- } catch {
1739
- }
1740
- this.ws.close();
1741
- this.ws = null;
1742
- }
2455
+ offTranscript(callback) {
2456
+ this.transcriptCallbacks.delete(callback);
1743
2457
  }
1744
- };
1745
-
2458
+ onError(callback) {
2459
+ this.errorCallbacks.add(callback);
2460
+ }
2461
+ offError(callback) {
2462
+ this.errorCallbacks.delete(callback);
2463
+ }
2464
+ close() {
2465
+ this.running = false;
2466
+ this.clearKeepalive();
2467
+ const ws = this.ws;
2468
+ if (!ws) return;
2469
+ this.ws = null;
2470
+ const sendSafe = (payload) => {
2471
+ if (ws.readyState === WebSocket3.OPEN) {
2472
+ try {
2473
+ ws.send(payload);
2474
+ } catch {
2475
+ }
2476
+ }
2477
+ };
2478
+ const finishClose = () => {
2479
+ sendSafe(JSON.stringify({ type: "CloseStream" }));
2480
+ try {
2481
+ ws.close();
2482
+ } catch {
2483
+ }
2484
+ };
2485
+ if (ws.readyState !== WebSocket3.OPEN) {
2486
+ finishClose();
2487
+ return;
2488
+ }
2489
+ sendSafe(JSON.stringify({ type: "Finalize" }));
2490
+ setTimeout(finishClose, Math.min(FINALIZE_DRAIN_MS, CLOSE_LATENCY_BUDGET_MS));
2491
+ }
2492
+ };
2493
+
1746
2494
  // src/metrics.ts
1747
2495
  function round(value, decimals) {
1748
2496
  const factor = 10 ** decimals;
@@ -1752,11 +2500,16 @@ function hrTimeMs() {
1752
2500
  const [sec, ns] = process.hrtime();
1753
2501
  return sec * 1e3 + ns / 1e6;
1754
2502
  }
1755
- function p95(values) {
2503
+ function percentile(values, p) {
1756
2504
  if (values.length === 0) return 0;
1757
2505
  const sorted = [...values].sort((a, b) => a - b);
1758
- const idx = Math.min(Math.floor(sorted.length * 0.95), sorted.length - 1);
1759
- return sorted[idx];
2506
+ if (sorted.length === 1) return sorted[0];
2507
+ const rank = p * (sorted.length - 1);
2508
+ const lo = Math.floor(rank);
2509
+ const hi = Math.ceil(rank);
2510
+ if (lo === hi) return sorted[lo];
2511
+ const frac = rank - lo;
2512
+ return sorted[lo] + (sorted[hi] - sorted[lo]) * frac;
1760
2513
  }
1761
2514
  var CallMetricsAccumulator = class {
1762
2515
  callId;
@@ -1771,19 +2524,54 @@ var CallMetricsAccumulator = class {
1771
2524
  // Per-turn timing state
1772
2525
  _turnStart = null;
1773
2526
  _sttComplete = null;
2527
+ _llmFirstToken = null;
2528
+ _llmFirstSentenceComplete = null;
1774
2529
  _llmComplete = null;
1775
2530
  _ttsFirstByte = null;
2531
+ /** Last TTS audio byte sent (hrTimeMs). Stamped by ``recordTtsComplete`` /
2532
+ * ``recordTtsCompleteTs``. Used to compute ``tts_total_ms``. */
2533
+ _ttsLastByte = null;
2534
+ /** Endpoint signal (hrTimeMs) — VAD stop or STT speech_final, whichever
2535
+ * fires first. Used to compute ``endpoint_ms``. */
2536
+ _endpointSignalAt = null;
2537
+ /** Monotonic stamp of LLM dispatch (paired with ``_endpointSignalAt``). */
2538
+ _turnCommittedMono = null;
2539
+ /** Barge-in detected timestamp (hrTimeMs). */
2540
+ _bargeinDetectedAt = null;
2541
+ /** TTS-stopped timestamp after barge-in (hrTimeMs). */
2542
+ _bargeinStoppedAt = null;
1776
2543
  _turnUserText = "";
1777
2544
  _turnSttAudioSeconds = 0;
1778
2545
  // Cumulative usage counters
1779
2546
  _totalSttAudioSeconds = 0;
1780
2547
  _totalTtsCharacters = 0;
1781
2548
  _totalRealtimeCost = 0;
2549
+ _totalRealtimeCachedSavings = 0;
1782
2550
  _sttByteCount = 0;
1783
2551
  _sttSampleRate = 16e3;
1784
2552
  _sttBytesPerSample = 2;
1785
2553
  _actualTelephonyCost = null;
1786
2554
  _actualSttCost = null;
2555
+ // Fix 10: accumulated LLM token cost for non-Realtime pipeline mode.
2556
+ _totalLlmCost = 0;
2557
+ // ---- EventBus integration (item 3) ----
2558
+ _eventBus;
2559
+ // ---- EOUMetrics — 4 timestamps (item 4) ----
2560
+ /** Timestamp (hrTimeMs) when VAD emitted speech_end. */
2561
+ _vadStoppedAt = null;
2562
+ /** Timestamp (hrTimeMs) when STT emitted its final transcript. */
2563
+ _sttFinalAt = null;
2564
+ /** Timestamp (hrTimeMs) when the transcript was committed to the LLM. */
2565
+ _turnCommittedAt = null;
2566
+ /** Delta (ms) from turn-committed to on_user_turn_completed hook done. */
2567
+ _onUserTurnCompletedDelayMs = null;
2568
+ // ---- InterruptionMetrics — simplified no-ML (item 5) ----
2569
+ _numInterruptions = 0;
2570
+ _numBackchannels = 0;
2571
+ _overlapStartedAt = null;
2572
+ // ---- report_only_initial_ttfb (item 6) ----
2573
+ _reportOnlyInitialTtfb;
2574
+ _initialTtfbEmitted = false;
1787
2575
  constructor(opts) {
1788
2576
  this.callId = opts.callId;
1789
2577
  this.providerMode = opts.providerMode;
@@ -1793,6 +2581,15 @@ var CallMetricsAccumulator = class {
1793
2581
  this.llmProvider = opts.llmProvider ?? "";
1794
2582
  this._pricing = mergePricing(opts.pricing);
1795
2583
  this._callStart = hrTimeMs();
2584
+ this._eventBus = opts.eventBus;
2585
+ this._reportOnlyInitialTtfb = opts.reportOnlyInitialTtfb ?? false;
2586
+ }
2587
+ /**
2588
+ * Attach (or replace) an EventBus after construction.
2589
+ * Useful when the bus is created after the accumulator (e.g. in tests).
2590
+ */
2591
+ attachEventBus(bus) {
2592
+ this._eventBus = bus;
1796
2593
  }
1797
2594
  /** Configure audio format for STT byte-to-seconds conversion. */
1798
2595
  configureSttFormat(sampleRate = 16e3, bytesPerSample = 2) {
@@ -1807,17 +2604,60 @@ var CallMetricsAccumulator = class {
1807
2604
  startTurn() {
1808
2605
  this._turnStart = hrTimeMs();
1809
2606
  this._sttComplete = null;
2607
+ this._llmFirstToken = null;
2608
+ this._llmFirstSentenceComplete = null;
1810
2609
  this._llmComplete = null;
1811
2610
  this._ttsFirstByte = null;
2611
+ this._ttsLastByte = null;
2612
+ this._endpointSignalAt = null;
2613
+ this._turnCommittedMono = null;
2614
+ this._bargeinDetectedAt = null;
2615
+ this._bargeinStoppedAt = null;
1812
2616
  this._turnUserText = "";
1813
2617
  this._turnSttAudioSeconds = 0;
2618
+ this._vadStoppedAt = null;
2619
+ this._sttFinalAt = null;
2620
+ this._turnCommittedAt = null;
2621
+ this._onUserTurnCompletedDelayMs = null;
2622
+ this._eventBus?.emit("turn_started", { callId: this.callId });
2623
+ }
2624
+ /**
2625
+ * Start a new turn only if no turn is currently open.
2626
+ * Use this at inbound-audio ingestion points so the turn timer begins
2627
+ * on the first audio byte rather than just before recordSttComplete().
2628
+ */
2629
+ startTurnIfIdle() {
2630
+ if (this._turnStart === null) {
2631
+ this.startTurn();
2632
+ }
1814
2633
  }
1815
2634
  recordSttComplete(text, audioSeconds = 0) {
1816
2635
  this._sttComplete = hrTimeMs();
2636
+ this._sttFinalAt = this._sttComplete;
2637
+ if (this._endpointSignalAt === null) {
2638
+ this._endpointSignalAt = this._sttComplete;
2639
+ }
1817
2640
  this._turnUserText = text;
1818
2641
  this._turnSttAudioSeconds = audioSeconds;
1819
2642
  this._totalSttAudioSeconds += audioSeconds;
1820
2643
  }
2644
+ /** Record the timestamp of the first LLM token (TTFT). No-op after first call. */
2645
+ recordLlmFirstToken() {
2646
+ if (this._llmFirstToken === null) {
2647
+ this._llmFirstToken = hrTimeMs();
2648
+ }
2649
+ }
2650
+ /**
2651
+ * Record when the sentence chunker emits the first complete sentence.
2652
+ * Used as the TTS span start so tts_ms reflects true TTS-provider latency
2653
+ * rather than the gap from llm_complete (which fires after the full response).
2654
+ * No-op after first call.
2655
+ */
2656
+ recordLlmFirstSentenceComplete() {
2657
+ if (this._llmFirstSentenceComplete === null) {
2658
+ this._llmFirstSentenceComplete = hrTimeMs();
2659
+ }
2660
+ }
1821
2661
  recordLlmComplete() {
1822
2662
  this._llmComplete = hrTimeMs();
1823
2663
  }
@@ -1825,9 +2665,40 @@ var CallMetricsAccumulator = class {
1825
2665
  if (this._ttsFirstByte === null) {
1826
2666
  this._ttsFirstByte = hrTimeMs();
1827
2667
  }
2668
+ if (this._reportOnlyInitialTtfb && this._initialTtfbEmitted) {
2669
+ return;
2670
+ }
2671
+ this._initialTtfbEmitted = true;
1828
2672
  }
1829
2673
  recordTtsComplete(text) {
1830
2674
  this._totalTtsCharacters += text.length;
2675
+ if (this._ttsLastByte === null) {
2676
+ this._ttsLastByte = hrTimeMs();
2677
+ }
2678
+ }
2679
+ /**
2680
+ * Capture the timestamp when the last TTS audio byte was sent on the wire.
2681
+ * Useful when the caller wants to record the timing without bumping the
2682
+ * character counter (e.g. interrupted turns where audio actually went out
2683
+ * but synthesis was truncated).
2684
+ */
2685
+ recordTtsCompleteTs(ts) {
2686
+ this._ttsLastByte = ts ?? hrTimeMs();
2687
+ }
2688
+ /**
2689
+ * Mark the moment a user interrupt (barge-in) was detected. Pairs with
2690
+ * ``recordTtsStopped`` to compute ``bargein_ms``.
2691
+ */
2692
+ recordBargeinDetected(ts) {
2693
+ this._bargeinDetectedAt = ts ?? hrTimeMs();
2694
+ }
2695
+ /**
2696
+ * Mark the moment TTS playback was actually halted after a barge-in. Call
2697
+ * this *after* ``sendClear`` returns. Pairs with ``recordBargeinDetected``
2698
+ * to compute ``bargein_ms``.
2699
+ */
2700
+ recordTtsStopped(ts) {
2701
+ this._bargeinStoppedAt = ts ?? hrTimeMs();
1831
2702
  }
1832
2703
  recordTurnComplete(agentText) {
1833
2704
  const latency = this._computeTurnLatency();
@@ -1842,6 +2713,8 @@ var CallMetricsAccumulator = class {
1842
2713
  };
1843
2714
  this._turns.push(turn);
1844
2715
  this._resetTurnState();
2716
+ this._eventBus?.emit("turn_ended", { callId: this.callId, turn });
2717
+ this._eventBus?.emit("metrics_collected", { callId: this.callId, turn });
1845
2718
  return turn;
1846
2719
  }
1847
2720
  recordTurnInterrupted() {
@@ -1860,12 +2733,111 @@ var CallMetricsAccumulator = class {
1860
2733
  this._resetTurnState();
1861
2734
  return turn;
1862
2735
  }
2736
+ // ---- EOU metrics (item 4) ----
2737
+ /**
2738
+ * Record the moment VAD emitted speech_end for the current utterance.
2739
+ * @param ts Optional override timestamp in hrTimeMs units (defaults to now).
2740
+ */
2741
+ recordVadStop(ts) {
2742
+ this._vadStoppedAt = ts ?? hrTimeMs();
2743
+ if (this._endpointSignalAt === null) {
2744
+ this._endpointSignalAt = this._vadStoppedAt;
2745
+ }
2746
+ }
2747
+ /**
2748
+ * Record the moment the STT provider delivered its final transcript.
2749
+ * Aliased to the same instant as recordSttComplete() when called from
2750
+ * the standard pipeline; can be called independently for custom pipelines.
2751
+ * @param ts Optional override timestamp in hrTimeMs units.
2752
+ */
2753
+ recordSttFinalTimestamp(ts) {
2754
+ this._sttFinalAt = ts ?? hrTimeMs();
2755
+ if (this._endpointSignalAt === null) {
2756
+ this._endpointSignalAt = this._sttFinalAt;
2757
+ }
2758
+ }
2759
+ /**
2760
+ * Record the moment the transcript was committed to the LLM (turn start).
2761
+ * After this call, ``emitEouMetrics()`` can produce a complete EOUMetrics payload.
2762
+ * @param ts Optional override timestamp in hrTimeMs units.
2763
+ */
2764
+ recordTurnCommitted(ts) {
2765
+ this._turnCommittedAt = ts ?? hrTimeMs();
2766
+ this._turnCommittedMono = hrTimeMs();
2767
+ this.emitEouMetrics();
2768
+ }
2769
+ /**
2770
+ * Record the delta (ms) between turn-committed and when on_user_turn_completed
2771
+ * pipeline hook finished. Stored for inclusion in the next ``emitEouMetrics``
2772
+ * call (or an explicit re-emit if desired).
2773
+ */
2774
+ recordOnUserTurnCompletedDelay(delayMs) {
2775
+ this._onUserTurnCompletedDelayMs = delayMs;
2776
+ }
2777
+ /**
2778
+ * Compute and emit EOUMetrics when all three prerequisite timestamps are
2779
+ * available (VAD stop, STT final, turn committed).
2780
+ *
2781
+ * ``endOfUtteranceDelay`` = sttFinal − vadStopped (ms)
2782
+ * ``transcriptionDelay`` = turnCommitted − vadStopped (ms)
2783
+ * ``onUserTurnCompletedDelay`` = caller-supplied delta (ms) or 0
2784
+ */
2785
+ emitEouMetrics() {
2786
+ if (this._vadStoppedAt === null || this._sttFinalAt === null || this._turnCommittedAt === null) {
2787
+ return;
2788
+ }
2789
+ const payload = {
2790
+ timestamp: Date.now() / 1e3,
2791
+ endOfUtteranceDelay: Math.max(0, this._sttFinalAt - this._vadStoppedAt),
2792
+ transcriptionDelay: Math.max(0, this._turnCommittedAt - this._vadStoppedAt),
2793
+ onUserTurnCompletedDelay: this._onUserTurnCompletedDelayMs ?? 0
2794
+ };
2795
+ this._eventBus?.emit("eou_metrics", payload);
2796
+ }
2797
+ // ---- InterruptionMetrics (item 5) ----
2798
+ /**
2799
+ * Record that a caller utterance started overlapping with agent speech.
2800
+ * Call this when VAD detects speech_start during TTS playback.
2801
+ * @param ts Optional override timestamp in hrTimeMs units.
2802
+ */
2803
+ recordOverlapStart(ts) {
2804
+ this._overlapStartedAt = ts ?? hrTimeMs();
2805
+ }
2806
+ /**
2807
+ * Record that the overlap ended. Emits ``InterruptionMetrics`` via the
2808
+ * event bus.
2809
+ *
2810
+ * @param wasInterruption true → barge-in (increments ``numInterruptions``),
2811
+ * false → backchannel (increments ``numBackchannels``).
2812
+ * @param ts Optional override timestamp in hrTimeMs units.
2813
+ */
2814
+ recordOverlapEnd(wasInterruption, ts) {
2815
+ const now = ts ?? hrTimeMs();
2816
+ const detectionDelay = this._overlapStartedAt !== null ? Math.max(0, now - this._overlapStartedAt) : 0;
2817
+ this._overlapStartedAt = null;
2818
+ if (wasInterruption) {
2819
+ this._numInterruptions++;
2820
+ } else {
2821
+ this._numBackchannels++;
2822
+ }
2823
+ const payload = {
2824
+ timestamp: Date.now() / 1e3,
2825
+ // Simplified: totalDuration == detectionDelay (no ML prediction window)
2826
+ totalDuration: detectionDelay,
2827
+ predictionDuration: 0,
2828
+ detectionDelay,
2829
+ numInterruptions: this._numInterruptions,
2830
+ numBackchannels: this._numBackchannels
2831
+ };
2832
+ this._eventBus?.emit("interruption", payload);
2833
+ }
1863
2834
  // ---- Usage tracking ----
1864
2835
  addSttAudioBytes(byteCount) {
1865
2836
  this._sttByteCount += byteCount;
1866
2837
  }
1867
2838
  recordRealtimeUsage(usage) {
1868
2839
  this._totalRealtimeCost += calculateRealtimeCost(usage, this._pricing);
2840
+ this._totalRealtimeCachedSavings += calculateRealtimeCachedSavings(usage, this._pricing);
1869
2841
  }
1870
2842
  setActualTelephonyCost(cost) {
1871
2843
  this._actualTelephonyCost = cost;
@@ -1873,28 +2845,62 @@ var CallMetricsAccumulator = class {
1873
2845
  setActualSttCost(cost) {
1874
2846
  this._actualSttCost = cost;
1875
2847
  }
2848
+ /**
2849
+ * Accumulate LLM token cost for pipeline mode (non-Realtime).
2850
+ *
2851
+ * Called by LLMLoop.run() when a usage chunk arrives from the provider.
2852
+ * Mirrors Python's CallMetricsAccumulator.record_llm_usage().
2853
+ *
2854
+ * @param provider LLM provider key (e.g. 'openai', 'anthropic')
2855
+ * @param model Model name (e.g. 'gpt-4o-mini')
2856
+ * @param inputTokens Total input tokens (includes cached)
2857
+ * @param outputTokens Total output tokens
2858
+ * @param cacheReadTokens Cached input tokens (subtracted from input before billing full rate)
2859
+ * @param cacheWriteTokens Cache write tokens (billed at cache_write rate if present)
2860
+ */
2861
+ recordLlmUsage(provider2, model, inputTokens, outputTokens, cacheReadTokens = 0, cacheWriteTokens = 0) {
2862
+ this._totalLlmCost += calculateLlmCost(
2863
+ provider2,
2864
+ model,
2865
+ inputTokens,
2866
+ outputTokens,
2867
+ cacheReadTokens,
2868
+ cacheWriteTokens
2869
+ );
2870
+ }
1876
2871
  // ---- Finalize ----
1877
2872
  endCall() {
1878
2873
  const duration = (hrTimeMs() - this._callStart) / 1e3;
2874
+ if (this.turnActive) {
2875
+ this.recordTurnInterrupted();
2876
+ }
1879
2877
  if (this._totalSttAudioSeconds === 0 && this._sttByteCount > 0) {
1880
2878
  this._totalSttAudioSeconds = this._sttByteCount / (this._sttSampleRate * this._sttBytesPerSample);
1881
2879
  }
1882
2880
  const cost = this._computeCost(duration);
1883
2881
  const latencyAvg = this._computeAverageLatency();
1884
- const latencyP95 = this._computeP95Latency();
1885
- return {
2882
+ const latencyP50 = this._computePercentileLatency(0.5);
2883
+ const latencyP90 = this._computePercentileLatency(0.9);
2884
+ const latencyP95 = this._computePercentileLatency(0.95);
2885
+ const latencyP99 = this._computePercentileLatency(0.99);
2886
+ const metrics = {
1886
2887
  call_id: this.callId,
1887
2888
  duration_seconds: round(duration, 2),
1888
2889
  turns: [...this._turns],
1889
2890
  cost,
1890
2891
  latency_avg: latencyAvg,
2892
+ latency_p50: latencyP50,
2893
+ latency_p90: latencyP90,
1891
2894
  latency_p95: latencyP95,
2895
+ latency_p99: latencyP99,
1892
2896
  provider_mode: this.providerMode,
1893
2897
  stt_provider: this.sttProvider,
1894
2898
  tts_provider: this.ttsProvider,
1895
2899
  llm_provider: this.llmProvider,
1896
2900
  telephony_provider: this.telephonyProvider
1897
2901
  };
2902
+ this._eventBus?.emit("call_ended", { callId: this.callId, metrics });
2903
+ return metrics;
1898
2904
  }
1899
2905
  getCostSoFar() {
1900
2906
  const duration = (hrTimeMs() - this._callStart) / 1e3;
@@ -1904,36 +2910,68 @@ var CallMetricsAccumulator = class {
1904
2910
  _resetTurnState() {
1905
2911
  this._turnStart = null;
1906
2912
  this._sttComplete = null;
2913
+ this._llmFirstToken = null;
2914
+ this._llmFirstSentenceComplete = null;
1907
2915
  this._llmComplete = null;
1908
2916
  this._ttsFirstByte = null;
2917
+ this._ttsLastByte = null;
2918
+ this._endpointSignalAt = null;
2919
+ this._turnCommittedMono = null;
2920
+ this._bargeinDetectedAt = null;
2921
+ this._bargeinStoppedAt = null;
1909
2922
  this._turnUserText = "";
1910
2923
  this._turnSttAudioSeconds = 0;
1911
2924
  }
1912
2925
  _computeTurnLatency() {
1913
2926
  let stt_ms = 0;
1914
2927
  let llm_ms = 0;
2928
+ let llm_ttft_ms;
2929
+ let llm_total_ms;
1915
2930
  let tts_ms = 0;
1916
2931
  let total_ms = 0;
2932
+ let endpoint_ms;
2933
+ let bargein_ms;
2934
+ let tts_total_ms;
1917
2935
  if (this._turnStart !== null && this._sttComplete !== null) {
1918
2936
  stt_ms = this._sttComplete - this._turnStart;
1919
2937
  }
1920
- if (this._sttComplete !== null && this._llmComplete !== null) {
2938
+ if (this._sttComplete !== null && this._llmFirstToken !== null) {
2939
+ llm_ttft_ms = Math.max(0, this._llmFirstToken - this._sttComplete);
2940
+ llm_ms = llm_ttft_ms;
2941
+ } else if (this._sttComplete !== null && this._llmComplete !== null) {
1921
2942
  llm_ms = this._llmComplete - this._sttComplete;
1922
2943
  }
1923
- if (this._llmComplete !== null && this._ttsFirstByte !== null) {
1924
- tts_ms = this._ttsFirstByte - this._llmComplete;
2944
+ if (this._sttComplete !== null && this._llmComplete !== null) {
2945
+ llm_total_ms = Math.max(0, this._llmComplete - this._sttComplete);
2946
+ }
2947
+ const ttsSpanStart = this._llmFirstSentenceComplete ?? this._llmComplete;
2948
+ if (ttsSpanStart !== null && this._ttsFirstByte !== null) {
2949
+ tts_ms = this._ttsFirstByte - ttsSpanStart;
2950
+ if (tts_ms < 0) tts_ms = 0;
1925
2951
  }
1926
2952
  if (this._turnStart !== null && this._ttsFirstByte !== null) {
1927
2953
  total_ms = this._ttsFirstByte - this._turnStart;
1928
2954
  }
1929
- if (total_ms > 0 && stt_ms === 0 && llm_ms === 0 && tts_ms === 0) {
1930
- llm_ms = total_ms;
2955
+ if (this._endpointSignalAt !== null && this._turnCommittedMono !== null) {
2956
+ endpoint_ms = Math.max(0, this._turnCommittedMono - this._endpointSignalAt);
2957
+ }
2958
+ if (this._bargeinDetectedAt !== null && this._bargeinStoppedAt !== null) {
2959
+ bargein_ms = Math.max(0, this._bargeinStoppedAt - this._bargeinDetectedAt);
2960
+ }
2961
+ const ttsTotalRef = this._llmFirstToken ?? this._llmFirstSentenceComplete ?? this._llmComplete;
2962
+ if (ttsTotalRef !== null && this._ttsLastByte !== null) {
2963
+ tts_total_ms = Math.max(0, this._ttsLastByte - ttsTotalRef);
1931
2964
  }
1932
2965
  return {
1933
2966
  stt_ms: round(stt_ms, 1),
1934
2967
  llm_ms: round(llm_ms, 1),
2968
+ ...llm_ttft_ms !== void 0 ? { llm_ttft_ms: round(llm_ttft_ms, 1) } : {},
2969
+ ...llm_total_ms !== void 0 ? { llm_total_ms: round(llm_total_ms, 1) } : {},
1935
2970
  tts_ms: round(tts_ms, 1),
1936
- total_ms: round(total_ms, 1)
2971
+ total_ms: round(total_ms, 1),
2972
+ ...endpoint_ms !== void 0 ? { endpoint_ms: round(endpoint_ms, 1) } : {},
2973
+ ...bargein_ms !== void 0 ? { bargein_ms: round(bargein_ms, 1) } : {},
2974
+ ...tts_total_ms !== void 0 ? { tts_total_ms: round(tts_total_ms, 1) } : {}
1937
2975
  };
1938
2976
  }
1939
2977
  _computeCost(durationSeconds) {
@@ -1951,7 +2989,7 @@ var CallMetricsAccumulator = class {
1951
2989
  } else {
1952
2990
  stt = this._actualSttCost !== null ? this._actualSttCost : calculateSttCost(this.sttProvider, this._totalSttAudioSeconds, this._pricing);
1953
2991
  tts = calculateTtsCost(this.ttsProvider, this._totalTtsCharacters, this._pricing);
1954
- llm = 0;
2992
+ llm = this._totalLlmCost;
1955
2993
  }
1956
2994
  const telephony = this._actualTelephonyCost !== null ? this._actualTelephonyCost : calculateTelephonyCost(this.telephonyProvider, durationSeconds, this._pricing);
1957
2995
  const total = stt + tts + llm + telephony;
@@ -1960,30 +2998,78 @@ var CallMetricsAccumulator = class {
1960
2998
  tts: round(tts, 6),
1961
2999
  llm: round(llm, 6),
1962
3000
  telephony: round(telephony, 6),
1963
- total: round(total, 6)
3001
+ total: round(total, 6),
3002
+ // Always emit (default 0) for parity with Python dataclass where
3003
+ // llm_cached_savings is a required field with default 0.0.
3004
+ llm_cached_savings: round(Math.max(0, this._totalRealtimeCachedSavings), 6)
1964
3005
  };
1965
3006
  }
3007
+ /**
3008
+ * Turns eligible for latency statistics.
3009
+ *
3010
+ * Excludes turns marked ``[interrupted]`` (barge-in, cancelled replacements)
3011
+ * because their recorded latency either reflects partial state or zero —
3012
+ * including them would drag every p95/avg bucket toward meaningless numbers.
3013
+ */
3014
+ _completedTurns() {
3015
+ return this._turns.filter(
3016
+ (t) => t.agent_text !== "[interrupted]" && t.latency.total_ms > 0
3017
+ );
3018
+ }
1966
3019
  _computeAverageLatency() {
1967
- if (this._turns.length === 0) {
3020
+ const turns = this._completedTurns();
3021
+ if (turns.length === 0) {
1968
3022
  return { stt_ms: 0, llm_ms: 0, tts_ms: 0, total_ms: 0 };
1969
3023
  }
1970
- const n = this._turns.length;
3024
+ const n = turns.length;
3025
+ const ttftValues = turns.map((t) => t.latency.llm_ttft_ms ?? 0).filter((v) => v > 0);
3026
+ const ttftAvg = ttftValues.length > 0 ? round(ttftValues.reduce((s, v) => s + v, 0) / ttftValues.length, 1) : void 0;
3027
+ const optAvg = (key) => {
3028
+ const vals = turns.map((t) => t.latency[key]).filter((v) => typeof v === "number" && v > 0);
3029
+ return vals.length > 0 ? round(vals.reduce((s, v) => s + v, 0) / vals.length, 1) : void 0;
3030
+ };
3031
+ const llmTotalAvg = optAvg("llm_total_ms");
3032
+ const endpointAvg = optAvg("endpoint_ms");
3033
+ const bargeinAvg = optAvg("bargein_ms");
3034
+ const ttsTotalAvg = optAvg("tts_total_ms");
1971
3035
  return {
1972
- stt_ms: round(this._turns.reduce((s, t) => s + t.latency.stt_ms, 0) / n, 1),
1973
- llm_ms: round(this._turns.reduce((s, t) => s + t.latency.llm_ms, 0) / n, 1),
1974
- tts_ms: round(this._turns.reduce((s, t) => s + t.latency.tts_ms, 0) / n, 1),
1975
- total_ms: round(this._turns.reduce((s, t) => s + t.latency.total_ms, 0) / n, 1)
3036
+ stt_ms: round(turns.reduce((s, t) => s + t.latency.stt_ms, 0) / n, 1),
3037
+ llm_ms: round(turns.reduce((s, t) => s + t.latency.llm_ms, 0) / n, 1),
3038
+ ...ttftAvg !== void 0 ? { llm_ttft_ms: ttftAvg } : {},
3039
+ ...llmTotalAvg !== void 0 ? { llm_total_ms: llmTotalAvg } : {},
3040
+ tts_ms: round(turns.reduce((s, t) => s + t.latency.tts_ms, 0) / n, 1),
3041
+ total_ms: round(turns.reduce((s, t) => s + t.latency.total_ms, 0) / n, 1),
3042
+ ...endpointAvg !== void 0 ? { endpoint_ms: endpointAvg } : {},
3043
+ ...bargeinAvg !== void 0 ? { bargein_ms: bargeinAvg } : {},
3044
+ ...ttsTotalAvg !== void 0 ? { tts_total_ms: ttsTotalAvg } : {}
1976
3045
  };
1977
3046
  }
1978
- _computeP95Latency() {
1979
- if (this._turns.length === 0) {
3047
+ _computePercentileLatency(p) {
3048
+ const turns = this._completedTurns();
3049
+ if (turns.length === 0) {
1980
3050
  return { stt_ms: 0, llm_ms: 0, tts_ms: 0, total_ms: 0 };
1981
3051
  }
3052
+ const nonZero = (vals) => vals.filter((v) => v > 0);
3053
+ const ttftSamples = nonZero(turns.map((t) => t.latency.llm_ttft_ms ?? 0));
3054
+ const ttftP = ttftSamples.length > 0 ? round(percentile(ttftSamples, p), 1) : void 0;
3055
+ const optPct = (key) => {
3056
+ const vals = turns.map((t) => t.latency[key]).filter((v) => typeof v === "number" && v > 0);
3057
+ return vals.length > 0 ? round(percentile(vals, p), 1) : void 0;
3058
+ };
3059
+ const llmTotalP = optPct("llm_total_ms");
3060
+ const endpointP = optPct("endpoint_ms");
3061
+ const bargeinP = optPct("bargein_ms");
3062
+ const ttsTotalP = optPct("tts_total_ms");
1982
3063
  return {
1983
- stt_ms: round(p95(this._turns.map((t) => t.latency.stt_ms)), 1),
1984
- llm_ms: round(p95(this._turns.map((t) => t.latency.llm_ms)), 1),
1985
- tts_ms: round(p95(this._turns.map((t) => t.latency.tts_ms)), 1),
1986
- total_ms: round(p95(this._turns.map((t) => t.latency.total_ms)), 1)
3064
+ stt_ms: round(percentile(nonZero(turns.map((t) => t.latency.stt_ms)), p), 1),
3065
+ llm_ms: round(percentile(nonZero(turns.map((t) => t.latency.llm_ms)), p), 1),
3066
+ ...ttftP !== void 0 ? { llm_ttft_ms: ttftP } : {},
3067
+ ...llmTotalP !== void 0 ? { llm_total_ms: llmTotalP } : {},
3068
+ tts_ms: round(percentile(nonZero(turns.map((t) => t.latency.tts_ms)), p), 1),
3069
+ total_ms: round(percentile(nonZero(turns.map((t) => t.latency.total_ms)), p), 1),
3070
+ ...endpointP !== void 0 ? { endpoint_ms: endpointP } : {},
3071
+ ...bargeinP !== void 0 ? { bargein_ms: bargeinP } : {},
3072
+ ...ttsTotalP !== void 0 ? { tts_total_ms: ttsTotalP } : {}
1987
3073
  };
1988
3074
  }
1989
3075
  };
@@ -2038,40 +3124,335 @@ function pcm16ToMulaw(pcmData) {
2038
3124
  }
2039
3125
  return out;
2040
3126
  }
3127
+ var PcmCarry = class {
3128
+ pending = null;
3129
+ /**
3130
+ * Prepend any carried odd byte, return the even-length prefix, and stash
3131
+ * any new trailing odd byte for the next call.
3132
+ *
3133
+ * Returns a zero-length buffer when no complete sample is yet available.
3134
+ */
3135
+ push(chunk) {
3136
+ const combined = this.pending !== null ? Buffer.concat([this.pending, chunk]) : chunk;
3137
+ this.pending = null;
3138
+ const alignedLen = combined.length & ~1;
3139
+ if (alignedLen < combined.length) {
3140
+ this.pending = combined.subarray(alignedLen);
3141
+ }
3142
+ return combined.subarray(0, alignedLen);
3143
+ }
3144
+ /**
3145
+ * Return any pending byte as a 1-byte buffer (rare in practice — only if
3146
+ * the entire stream had an odd byte count), then reset internal state.
3147
+ */
3148
+ flush() {
3149
+ if (this.pending === null) return Buffer.alloc(0);
3150
+ const out = this.pending;
3151
+ this.pending = null;
3152
+ return out;
3153
+ }
3154
+ /** Reset carry state without flushing. */
3155
+ reset() {
3156
+ this.pending = null;
3157
+ }
3158
+ };
3159
+ var StatefulResampler = class {
3160
+ srcRate;
3161
+ dstRate;
3162
+ // 16k→8k: 5-tap FIR state.
3163
+ // Extended sample buffer carries the 2 history samples that precede the
3164
+ // current chunk AND any "pending" input sample that did not yet generate
3165
+ // output (i.e. the odd sample when the chunk had an odd sample count).
3166
+ // `firPhase` = 0 means the next output is at input position 0 of the
3167
+ // current chunk; 1 means it starts at input position 1 (because the
3168
+ // previous chunk ended on an even-output boundary).
3169
+ firHistory = new Int16Array(2);
3170
+ // [s_{-2}, s_{-1}]
3171
+ firHistoryValid = false;
3172
+ // Pending sample carried from odd-count chunks (not the byte carry —
3173
+ // this is a complete Int16 sample that becomes the first input for the
3174
+ // next call).
3175
+ firPendingSample = null;
3176
+ // 8k→16k: last input sample deferred across chunk boundaries.
3177
+ upsampleLast = 0;
3178
+ upsampleHasHistory = false;
3179
+ // 24k→16k: fractional phase and last input sample across chunks.
3180
+ resample24Last = 0;
3181
+ resample24Phase = 0;
3182
+ resample24HasHistory = false;
3183
+ // Odd-byte alignment carry.
3184
+ carry = new PcmCarry();
3185
+ constructor(opts) {
3186
+ this.srcRate = opts.srcRate;
3187
+ this.dstRate = opts.dstRate;
3188
+ if (opts.channels !== void 0 && opts.channels !== 1) {
3189
+ throw new Error("StatefulResampler: only mono (channels=1) is supported");
3190
+ }
3191
+ const key = `${this.srcRate}->${this.dstRate}`;
3192
+ if (key !== "16000->8000" && key !== "8000->16000" && key !== "24000->16000") {
3193
+ throw new Error(
3194
+ `StatefulResampler: unsupported conversion ${key}. Supported: 16000->8000, 8000->16000, 24000->16000`
3195
+ );
3196
+ }
3197
+ }
3198
+ /**
3199
+ * Process a chunk of PCM16-LE samples.
3200
+ *
3201
+ * Handles odd-byte inputs via an internal carry buffer. Returns an even-byte-
3202
+ * aligned output buffer; may return a zero-length buffer if not enough
3203
+ * aligned input is available yet.
3204
+ */
3205
+ process(pcm) {
3206
+ const aligned = this.carry.push(pcm);
3207
+ if (aligned.length === 0) return Buffer.alloc(0);
3208
+ if (this.srcRate === 16e3 && this.dstRate === 8e3) {
3209
+ return this._downsample16kTo8k(aligned);
3210
+ }
3211
+ if (this.srcRate === 8e3 && this.dstRate === 16e3) {
3212
+ return this._upsample8kTo16k(aligned);
3213
+ }
3214
+ return this._resample24kTo16k(aligned);
3215
+ }
3216
+ /**
3217
+ * Flush internal state and return any remaining output samples.
3218
+ *
3219
+ * For 8k→16k: the deferred last sample is emitted duplicated (matching
3220
+ * the stateless helper's end-of-stream behaviour).
3221
+ * For 16k→8k: any pending odd sample is processed with edge-replication.
3222
+ * Resets all state after flushing.
3223
+ */
3224
+ flush() {
3225
+ this.carry.flush();
3226
+ if (this.srcRate === 16e3 && this.dstRate === 8e3 && this.firPendingSample !== null) {
3227
+ const s = this.firPendingSample;
3228
+ const tmp = Buffer.alloc(4);
3229
+ tmp.writeInt16LE(s, 0);
3230
+ tmp.writeInt16LE(s, 2);
3231
+ const out = this._downsample16kTo8k(tmp);
3232
+ this.firPendingSample = null;
3233
+ return out;
3234
+ }
3235
+ if (this.srcRate === 8e3 && this.dstRate === 16e3 && this.upsampleHasHistory) {
3236
+ const out = Buffer.alloc(4);
3237
+ out.writeInt16LE(this.upsampleLast, 0);
3238
+ out.writeInt16LE(this.upsampleLast, 2);
3239
+ this.upsampleHasHistory = false;
3240
+ this.upsampleLast = 0;
3241
+ return out;
3242
+ }
3243
+ return Buffer.alloc(0);
3244
+ }
3245
+ /** Reset all carried state (e.g. at call boundaries). */
3246
+ reset() {
3247
+ this.firHistory = new Int16Array(2);
3248
+ this.firHistoryValid = false;
3249
+ this.firPendingSample = null;
3250
+ this.upsampleLast = 0;
3251
+ this.upsampleHasHistory = false;
3252
+ this.resample24Last = 0;
3253
+ this.resample24Phase = 0;
3254
+ this.resample24HasHistory = false;
3255
+ this.carry.reset();
3256
+ }
3257
+ // ---------------------------------------------------------------------------
3258
+ // Private: 16 kHz → 8 kHz
3259
+ // ---------------------------------------------------------------------------
3260
+ /**
3261
+ * 2:1 decimation with a 5-tap binomial FIR anti-alias filter.
3262
+ *
3263
+ * FIR coefficients: [1, 4, 6, 4, 1] / 16 (cutoff ~Fs/4 = 4 kHz).
3264
+ *
3265
+ * Cross-chunk state:
3266
+ * - `firHistory[0]` = s_{-2}, `firHistory[1]` = s_{-1} relative to the
3267
+ * virtual stream (seeded to first-sample on the very first call).
3268
+ * - `firPendingSample` = a lone input sample carried from a chunk whose
3269
+ * sample count was odd; it will become the first input of the next chunk.
3270
+ *
3271
+ * Decimation: outputs are at even positions (0, 2, 4 …) in the virtual
3272
+ * extended stream, so every 2 input samples yield 1 output. An odd-sample-
3273
+ * count chunk leaves 1 sample in `firPendingSample`; the next chunk
3274
+ * prepends it so the output cadence is unbroken.
3275
+ */
3276
+ _downsample16kTo8k(buf) {
3277
+ const newSampleCount = buf.length >> 1;
3278
+ const hasPending = this.firPendingSample !== null;
3279
+ const totalInput = newSampleCount + (hasPending ? 1 : 0);
3280
+ const input = new Int16Array(totalInput);
3281
+ if (hasPending) {
3282
+ input[0] = this.firPendingSample;
3283
+ for (let j = 0; j < newSampleCount; j++) input[j + 1] = buf.readInt16LE(j * 2);
3284
+ } else {
3285
+ for (let j = 0; j < newSampleCount; j++) input[j] = buf.readInt16LE(j * 2);
3286
+ }
3287
+ this.firPendingSample = null;
3288
+ if (totalInput === 0) return Buffer.alloc(0);
3289
+ if (!this.firHistoryValid) {
3290
+ this.firHistory[0] = input[0];
3291
+ this.firHistory[1] = input[0];
3292
+ this.firHistoryValid = true;
3293
+ }
3294
+ const extended = new Int16Array(totalInput + 2);
3295
+ extended[0] = this.firHistory[0];
3296
+ extended[1] = this.firHistory[1];
3297
+ for (let j = 0; j < totalInput; j++) extended[j + 2] = input[j];
3298
+ const outSamples = totalInput >> 1;
3299
+ const out = Buffer.alloc(outSamples * 2);
3300
+ for (let i = 0; i < outSamples; i++) {
3301
+ const c = 2 + i * 2;
3302
+ const sM2 = extended[c - 2];
3303
+ const sM1 = extended[c - 1];
3304
+ const s0 = extended[c];
3305
+ const sP1 = c + 1 < extended.length ? extended[c + 1] : extended[extended.length - 1];
3306
+ const sP2 = c + 2 < extended.length ? extended[c + 2] : extended[extended.length - 1];
3307
+ const filtered = sM2 + 4 * sM1 + 6 * s0 + 4 * sP1 + sP2 + 8 >> 4;
3308
+ out.writeInt16LE(Math.max(-32768, Math.min(32767, filtered)), i * 2);
3309
+ }
3310
+ if (totalInput % 2 === 1) {
3311
+ this.firPendingSample = input[totalInput - 1];
3312
+ }
3313
+ if (totalInput >= 2) {
3314
+ this.firHistory[0] = input[totalInput - 2];
3315
+ this.firHistory[1] = input[totalInput - 1];
3316
+ } else {
3317
+ this.firHistory[0] = this.firHistory[1];
3318
+ this.firHistory[1] = input[0];
3319
+ }
3320
+ return out;
3321
+ }
3322
+ // ---------------------------------------------------------------------------
3323
+ // Private: 8 kHz → 16 kHz
3324
+ // ---------------------------------------------------------------------------
3325
+ /**
3326
+ * 1:2 linear-interpolation upsampler.
3327
+ *
3328
+ * For the first chunk (no history): emits 2*(N-1) samples and defers the
3329
+ * last sample. For subsequent chunks (with history): emits the deferred
3330
+ * sample + its interpolated midpoint THEN 2*(N-1) samples from the new
3331
+ * chunk, deferring the new last sample. Total across K chunks + flush =
3332
+ * 2*total_input_samples (correct output length).
3333
+ *
3334
+ * Call flush() after the final chunk to emit the last deferred sample
3335
+ * pair (self-duplicate at end of stream).
3336
+ */
3337
+ _upsample8kTo16k(buf) {
3338
+ const sampleCount = buf.length >> 1;
3339
+ if (sampleCount === 0) return Buffer.alloc(0);
3340
+ const outArr = [];
3341
+ if (this.upsampleHasHistory) {
3342
+ const next = buf.readInt16LE(0);
3343
+ outArr.push(this.upsampleLast);
3344
+ outArr.push(Math.round((this.upsampleLast + next) / 2));
3345
+ }
3346
+ for (let i = 0; i < sampleCount - 1; i++) {
3347
+ const s0 = buf.readInt16LE(i * 2);
3348
+ const s1 = buf.readInt16LE((i + 1) * 2);
3349
+ outArr.push(s0);
3350
+ outArr.push(Math.round((s0 + s1) / 2));
3351
+ }
3352
+ this.upsampleLast = buf.readInt16LE((sampleCount - 1) * 2);
3353
+ this.upsampleHasHistory = true;
3354
+ const outBuf = Buffer.alloc(outArr.length * 2);
3355
+ for (let j = 0; j < outArr.length; j++) outBuf.writeInt16LE(outArr[j], j * 2);
3356
+ return outBuf;
3357
+ }
3358
+ // ---------------------------------------------------------------------------
3359
+ // Private: 24 kHz → 16 kHz
3360
+ // ---------------------------------------------------------------------------
3361
+ /**
3362
+ * 3:2 linear-interpolation decimator (ratio srcRate/dstRate = 1.5).
3363
+ *
3364
+ * `resample24Phase` tracks the fractional input position of the next output
3365
+ * sample relative to the START of the next chunk. Negative phase means the
3366
+ * next output straddles the previous/current chunk boundary; those are
3367
+ * handled using `resample24Last`.
3368
+ */
3369
+ _resample24kTo16k(buf) {
3370
+ const sampleCount = buf.length >> 1;
3371
+ if (sampleCount === 0) return Buffer.alloc(0);
3372
+ const outArr = [];
3373
+ let phase = this.resample24Phase;
3374
+ while (true) {
3375
+ const idx = Math.floor(phase);
3376
+ if (idx >= sampleCount) break;
3377
+ const frac = phase - idx;
3378
+ let s0;
3379
+ let s1;
3380
+ if (idx < 0) {
3381
+ s0 = this.resample24HasHistory ? this.resample24Last : 0;
3382
+ s1 = buf.readInt16LE(0);
3383
+ } else {
3384
+ s0 = buf.readInt16LE(idx * 2);
3385
+ s1 = idx + 1 < sampleCount ? buf.readInt16LE((idx + 1) * 2) : s0;
3386
+ }
3387
+ const interp = Math.round(s0 + (s1 - s0) * frac);
3388
+ outArr.push(Math.max(-32768, Math.min(32767, interp)));
3389
+ phase += 24e3 / 16e3;
3390
+ }
3391
+ this.resample24Last = buf.readInt16LE((sampleCount - 1) * 2);
3392
+ this.resample24HasHistory = true;
3393
+ this.resample24Phase = phase - sampleCount;
3394
+ const outBuf = Buffer.alloc(outArr.length * 2);
3395
+ for (let j = 0; j < outArr.length; j++) outBuf.writeInt16LE(outArr[j], j * 2);
3396
+ return outBuf;
3397
+ }
3398
+ };
3399
+ function createResampler16kTo8k() {
3400
+ return new StatefulResampler({ srcRate: 16e3, dstRate: 8e3 });
3401
+ }
3402
+ function createResampler8kTo16k() {
3403
+ return new StatefulResampler({ srcRate: 8e3, dstRate: 16e3 });
3404
+ }
3405
+ function createResampler24kTo16k() {
3406
+ return new StatefulResampler({ srcRate: 24e3, dstRate: 16e3 });
3407
+ }
3408
+ var _warnedResample8kTo16k = false;
3409
+ var _warnedResample16kTo8k = false;
3410
+ var _warnedResample24kTo16k = false;
2041
3411
  function resample8kTo16k(pcm8k) {
2042
- if (pcm8k.length === 0) return Buffer.alloc(0);
2043
- const sampleCount = Math.floor(pcm8k.length / 2);
2044
- const out = Buffer.alloc(sampleCount * 2 * 2);
2045
- for (let i = 0; i < sampleCount; i++) {
2046
- const current = pcm8k.readInt16LE(i * 2);
2047
- const next = i + 1 < sampleCount ? pcm8k.readInt16LE((i + 1) * 2) : current;
2048
- const interpolated = Math.round((current + next) / 2);
2049
- out.writeInt16LE(current, i * 4);
2050
- out.writeInt16LE(interpolated, i * 4 + 2);
3412
+ if (!_warnedResample8kTo16k) {
3413
+ _warnedResample8kTo16k = true;
3414
+ getLogger().warn(
3415
+ "[patter] resample8kTo16k() is deprecated. Use createResampler8kTo16k() (StatefulResampler) to eliminate chunk-boundary discontinuities."
3416
+ );
2051
3417
  }
2052
- return out;
3418
+ if (pcm8k.length === 0) return Buffer.alloc(0);
3419
+ const r = createResampler8kTo16k();
3420
+ const main = r.process(pcm8k);
3421
+ const tail = r.flush();
3422
+ return tail.length > 0 ? Buffer.concat([main, tail]) : main;
2053
3423
  }
2054
3424
  function resample16kTo8k(pcm16k) {
2055
- if (pcm16k.length === 0) return Buffer.alloc(0);
2056
- const sampleCount = Math.floor(pcm16k.length / 2);
2057
- const outSamples = Math.floor(sampleCount / 2);
2058
- const out = Buffer.alloc(outSamples * 2);
2059
- for (let i = 0; i < outSamples; i++) {
2060
- const sample = pcm16k.readInt16LE(i * 2 * 2);
2061
- out.writeInt16LE(sample, i * 2);
3425
+ if (!_warnedResample16kTo8k) {
3426
+ _warnedResample16kTo8k = true;
3427
+ getLogger().warn(
3428
+ "[patter] resample16kTo8k() is deprecated. Use createResampler16kTo8k() (StatefulResampler) to eliminate chunk-boundary discontinuities."
3429
+ );
2062
3430
  }
2063
- return out;
3431
+ if (pcm16k.length === 0) return Buffer.alloc(0);
3432
+ const r = createResampler16kTo8k();
3433
+ const out = r.process(pcm16k);
3434
+ const tail = r.flush();
3435
+ return tail.length > 0 ? Buffer.concat([out, tail]) : out;
2064
3436
  }
2065
3437
  function resample24kTo16k(pcm24k) {
3438
+ if (!_warnedResample24kTo16k) {
3439
+ _warnedResample24kTo16k = true;
3440
+ getLogger().warn(
3441
+ "[patter] resample24kTo16k() is deprecated. Use createResampler24kTo16k() (StatefulResampler) or OpenAITTS.resampleStreaming for anti-aliased resampling."
3442
+ );
3443
+ }
2066
3444
  if (pcm24k.length === 0) return Buffer.alloc(0);
2067
3445
  const sampleCount = Math.floor(pcm24k.length / 2);
2068
3446
  const outSamples = Math.floor(sampleCount * 2 / 3);
2069
3447
  const out = Buffer.alloc(outSamples * 2);
2070
- let outIdx = 0;
2071
- for (let i = 0; i < sampleCount && outIdx < outSamples; i++) {
2072
- if (i % 3 === 2) continue;
2073
- out.writeInt16LE(pcm24k.readInt16LE(i * 2), outIdx * 2);
2074
- outIdx++;
3448
+ for (let i = 0; i < outSamples; i++) {
3449
+ const pos = i * 1.5;
3450
+ const idx = Math.floor(pos);
3451
+ const frac = pos - idx;
3452
+ const s0 = pcm24k.readInt16LE(idx * 2);
3453
+ const s1 = idx + 1 < sampleCount ? pcm24k.readInt16LE((idx + 1) * 2) : s0;
3454
+ const interp = Math.round(s0 + (s1 - s0) * frac);
3455
+ out.writeInt16LE(Math.max(-32768, Math.min(32767, interp)), i * 2);
2075
3456
  }
2076
3457
  return out;
2077
3458
  }
@@ -2133,6 +3514,8 @@ async function executeToolWebhook(webhookUrl, toolName, parsedArgs, context, lab
2133
3514
 
2134
3515
  // src/sentence-chunker.ts
2135
3516
  var DEFAULT_MIN_SENTENCE_LEN = 20;
3517
+ var DEFAULT_MIN_WORDS_FOR_SHORT_FLUSH = 2;
3518
+ var SENTENCE_TERMINATORS = ".!?\u3002\uFF01\uFF1F";
2136
3519
  function splitSentences(text, minSentenceLen = DEFAULT_MIN_SENTENCE_LEN) {
2137
3520
  const alphabets = "([A-Za-z])";
2138
3521
  const prefixes = "(Mr|St|Mrs|Ms|Dr)[.]";
@@ -2191,14 +3574,29 @@ function splitSentences(text, minSentenceLen = DEFAULT_MIN_SENTENCE_LEN) {
2191
3574
  var SentenceChunker = class {
2192
3575
  buffer = "";
2193
3576
  minSentenceLen;
3577
+ minWordsForShortFlush;
2194
3578
  constructor(options) {
2195
3579
  this.minSentenceLen = options?.minSentenceLen ?? DEFAULT_MIN_SENTENCE_LEN;
3580
+ this.minWordsForShortFlush = options?.minWordsForShortFlush ?? DEFAULT_MIN_WORDS_FOR_SHORT_FLUSH;
2196
3581
  }
2197
- /** Feed a token. Returns zero or more complete sentences. */
3582
+ /**
3583
+ * Feed a token. Returns zero or more complete sentences.
3584
+ *
3585
+ * Two emission paths:
3586
+ * - **Standard path** — when the buffer is at least `minSentenceLen`
3587
+ * characters long and the regex tokenizer reports more than one
3588
+ * sentence, all but the last (potentially incomplete) are emitted.
3589
+ * - **Short-flush path** — when the buffer is shorter than `minSentenceLen`
3590
+ * but ends with a sentence terminator AND has at least
3591
+ * `minWordsForShortFlush` whitespace-separated words, emit it
3592
+ * immediately. This drops TTS TTFB on short greetings like `"Hi there!"`
3593
+ * while keeping single-word utterances (`"Sì."`) buffered until
3594
+ * `flush()`.
3595
+ */
2198
3596
  push(token) {
2199
3597
  this.buffer += token;
2200
3598
  if (this.buffer.length < this.minSentenceLen) {
2201
- return [];
3599
+ return this.maybeShortFlush();
2202
3600
  }
2203
3601
  const sentences = splitSentences(this.buffer, this.minSentenceLen);
2204
3602
  if (sentences.length <= 1) {
@@ -2212,6 +3610,41 @@ var SentenceChunker = class {
2212
3610
  this.buffer = sentences[sentences.length - 1]?.[0] ?? "";
2213
3611
  return result;
2214
3612
  }
3613
+ /**
3614
+ * Emit the buffer when it's a short, complete single-sentence utterance.
3615
+ *
3616
+ * A buffer qualifies when **all** of these hold:
3617
+ * 1. Last non-whitespace char is a sentence terminator.
3618
+ * 2. Word count is at least `minWordsForShortFlush` (default 2 — keeps
3619
+ * single-word "Sì." / "Yes." buffered until `flush()`).
3620
+ * 3. The buffer contains exactly one terminator (the trailing one).
3621
+ * Multiple terminators mean we may be mid-stream of a longer merged
3622
+ * utterance like `"Hey! Hi! Hello! This is a sentence."` — let the
3623
+ * standard path keep merging.
3624
+ * 4. The char immediately before the terminator is NOT a digit (avoids
3625
+ * decimal mid-stream like `"f(x) = x * 2."` flushing before `54`).
3626
+ * 5. The char immediately before the terminator is NOT an uppercase
3627
+ * ASCII letter (avoids acronym patterns like `"U.S."` / `"U."`).
3628
+ */
3629
+ maybeShortFlush() {
3630
+ const stripped = this.buffer.replace(/\s+$/, "");
3631
+ if (!stripped) return [];
3632
+ const last = stripped[stripped.length - 1];
3633
+ if (!SENTENCE_TERMINATORS.includes(last)) return [];
3634
+ let terminatorCount = 0;
3635
+ for (const c of stripped) {
3636
+ if (SENTENCE_TERMINATORS.includes(c)) terminatorCount++;
3637
+ }
3638
+ if (terminatorCount !== 1) return [];
3639
+ const wordCount = stripped.split(/\s+/).filter((w) => w.length > 0).length;
3640
+ if (wordCount < this.minWordsForShortFlush) return [];
3641
+ if (stripped.length >= 2) {
3642
+ const prev = stripped[stripped.length - 2];
3643
+ if (/\d/.test(prev) || /[A-Z]/.test(prev)) return [];
3644
+ }
3645
+ this.buffer = "";
3646
+ return [stripped];
3647
+ }
2215
3648
  /** Flush remaining buffer as final sentence(s). Call at end of stream. */
2216
3649
  flush() {
2217
3650
  const remaining = this.buffer.trim();
@@ -2258,6 +3691,44 @@ var PipelineHookExecutor = class {
2258
3691
  return transcript;
2259
3692
  }
2260
3693
  }
3694
+ /**
3695
+ * Run beforeLlm hook. Returns a possibly-modified messages list.
3696
+ * Returning ``null`` from the hook means "keep the original" — the LLM
3697
+ * call is too important to be silently vetoed.
3698
+ * Fail-open: on exception, the original messages pass through.
3699
+ */
3700
+ async runBeforeLlm(messages, ctx) {
3701
+ if (!this.hooks?.beforeLlm) return messages;
3702
+ try {
3703
+ const result = await this.hooks.beforeLlm(messages, ctx);
3704
+ return result ?? messages;
3705
+ } catch (e) {
3706
+ getLogger().error("Pipeline hook beforeLlm threw:", e);
3707
+ return messages;
3708
+ }
3709
+ }
3710
+ /**
3711
+ * Run afterLlm hook. Returns a possibly-modified assistant text.
3712
+ * Returning ``null`` from the hook means "keep the original".
3713
+ * Fail-open: on exception, the original text passes through.
3714
+ */
3715
+ async runAfterLlm(text, ctx) {
3716
+ if (!this.hooks?.afterLlm) return text;
3717
+ try {
3718
+ const result = await this.hooks.afterLlm(text, ctx);
3719
+ return result ?? text;
3720
+ } catch (e) {
3721
+ getLogger().error("Pipeline hook afterLlm threw:", e);
3722
+ return text;
3723
+ }
3724
+ }
3725
+ /**
3726
+ * Whether ``afterLlm`` is configured. Used by the LLM loop to decide
3727
+ * whether to buffer streaming tokens before yielding them.
3728
+ */
3729
+ hasAfterLlm() {
3730
+ return Boolean(this.hooks?.afterLlm);
3731
+ }
2261
3732
  /**
2262
3733
  * Run beforeSynthesize hook. Returns null if hook vetoes TTS for this sentence.
2263
3734
  * If no hook is defined, returns the text unchanged.
@@ -2286,73 +3757,453 @@ var PipelineHookExecutor = class {
2286
3757
  }
2287
3758
  };
2288
3759
 
2289
- // src/stream-handler.ts
2290
- function checkGuardrails(text, guardrails) {
2291
- if (!guardrails) return null;
2292
- for (const guard of guardrails) {
2293
- let blocked = false;
2294
- if (guard.blockedTerms) {
2295
- blocked = guard.blockedTerms.some((term) => text.toLowerCase().includes(term.toLowerCase()));
3760
+ // src/observability/event-bus.ts
3761
+ var EventBus = class {
3762
+ listeners = /* @__PURE__ */ new Map();
3763
+ /**
3764
+ * Subscribe to an event type. Returns an unsubscribe function.
3765
+ */
3766
+ on(event, cb) {
3767
+ let set = this.listeners.get(event);
3768
+ if (!set) {
3769
+ set = /* @__PURE__ */ new Set();
3770
+ this.listeners.set(event, set);
2296
3771
  }
2297
- if (!blocked && guard.check) {
2298
- blocked = guard.check(text);
3772
+ set.add(cb);
3773
+ return () => set.delete(cb);
3774
+ }
3775
+ /**
3776
+ * Emit an event synchronously. Async listeners are fire-and-forget with
3777
+ * rejection logging so a badly-behaved observer never stalls the call path.
3778
+ */
3779
+ emit(event, payload) {
3780
+ const set = this.listeners.get(event);
3781
+ if (!set) return;
3782
+ for (const cb of [...set]) {
3783
+ try {
3784
+ const res = cb(payload);
3785
+ if (res && typeof res.catch === "function") {
3786
+ res.catch(
3787
+ (e) => getLogger().warn(`[EventBus] listener for "${event}" rejected:`, e)
3788
+ );
3789
+ }
3790
+ } catch (e) {
3791
+ getLogger().warn(`[EventBus] listener for "${event}" threw:`, e);
3792
+ }
2299
3793
  }
2300
- if (blocked) return guard;
2301
3794
  }
2302
- return null;
3795
+ };
3796
+
3797
+ // src/observability/tracing.ts
3798
+ var ENV_FLAG = "PATTER_OTEL_ENABLED";
3799
+ var SERVICE_NAME = "patter";
3800
+ var SPAN_CALL = "getpatter.call";
3801
+ var SPAN_STT = "getpatter.stt";
3802
+ var SPAN_LLM = "getpatter.llm";
3803
+ var SPAN_TTS = "getpatter.tts";
3804
+ var SPAN_TOOL = "getpatter.tool";
3805
+ var SPAN_ENDPOINT = "getpatter.endpoint";
3806
+ var SPAN_BARGEIN = "getpatter.bargein";
3807
+ var otel = null;
3808
+ var initialized = false;
3809
+ var tracerAvailable = false;
3810
+ var provider = null;
3811
+ function tryLoadOtel() {
3812
+ if (otel !== null) return otel;
3813
+ try {
3814
+ const mod = __require("@opentelemetry/api");
3815
+ otel = mod;
3816
+ return mod;
3817
+ } catch {
3818
+ return null;
3819
+ }
2303
3820
  }
2304
- function sanitizeLogValue(v, maxLen = 200) {
2305
- const cleaned = v.replace(/[\x00-\x1f\x7f]/g, "");
2306
- return cleaned.length > maxLen ? cleaned.slice(0, maxLen) + "..." : cleaned;
3821
+ function trySetupSdk(options, api) {
3822
+ try {
3823
+ const sdkTraceNode = __require("@opentelemetry/sdk-trace-node");
3824
+ const sdkTraceBase = __require("@opentelemetry/sdk-trace-base");
3825
+ const otlpHttp = __require("@opentelemetry/exporter-trace-otlp-http");
3826
+ const serviceName = options.serviceName ?? SERVICE_NAME;
3827
+ const providerInstance = new sdkTraceNode.NodeTracerProvider({
3828
+ resource: {
3829
+ attributes: {
3830
+ "service.name": serviceName,
3831
+ ...options.resourceAttributes ?? {}
3832
+ }
3833
+ }
3834
+ });
3835
+ const endpoint = options.otlpEndpoint ?? process.env.OTEL_EXPORTER_OTLP_ENDPOINT ?? void 0;
3836
+ const exporter = new otlpHttp.OTLPTraceExporter(
3837
+ endpoint ? { url: `${endpoint.replace(/\/$/, "")}/v1/traces` } : void 0
3838
+ );
3839
+ const processor = new sdkTraceBase.BatchSpanProcessor(exporter);
3840
+ providerInstance.addSpanProcessor?.(processor);
3841
+ providerInstance.register?.();
3842
+ try {
3843
+ api.trace.setGlobalTracerProvider?.(providerInstance);
3844
+ } catch {
3845
+ }
3846
+ return providerInstance;
3847
+ } catch (e) {
3848
+ getLogger().debug(
3849
+ `[observability] OTel SDK wire-up skipped: ${String(e?.message ?? e)}`
3850
+ );
3851
+ return null;
3852
+ }
2307
3853
  }
2308
- function isValidE164(number) {
2309
- return /^\+[1-9]\d{6,14}$/.test(number);
3854
+ function envFlagEnabled() {
3855
+ const raw = (process.env[ENV_FLAG] ?? "").toLowerCase();
3856
+ return raw === "1" || raw === "true" || raw === "yes";
2310
3857
  }
2311
- var StreamHandler = class {
2312
- deps;
2313
- ws;
2314
- caller;
2315
- callee;
2316
- // Mutable call state
2317
- streamSid = "";
2318
- callId = "";
2319
- adapter = null;
2320
- stt = null;
2321
- tts = null;
2322
- isSpeaking = false;
2323
- llmLoop = null;
2324
- chunkCount = 0;
2325
- callEndFired = false;
3858
+ function initTracing(options = {}) {
3859
+ if (initialized) return tracerAvailable;
3860
+ initialized = true;
3861
+ if (!envFlagEnabled()) {
3862
+ tracerAvailable = false;
3863
+ return false;
3864
+ }
3865
+ const api = tryLoadOtel();
3866
+ if (!api) {
3867
+ getLogger().warn(
3868
+ `${ENV_FLAG}=1 but @opentelemetry/api is not installed. Install with: npm install @opentelemetry/api @opentelemetry/sdk-trace-node @opentelemetry/sdk-trace-base @opentelemetry/exporter-trace-otlp-http`
3869
+ );
3870
+ tracerAvailable = false;
3871
+ return false;
3872
+ }
3873
+ provider = trySetupSdk(options, api);
3874
+ tracerAvailable = true;
3875
+ const serviceName = options.serviceName ?? SERVICE_NAME;
3876
+ getLogger().info(
3877
+ `[observability] Patter OTel tracing enabled (service=${serviceName}${provider ? ", exporter=otlp-http" : ", exporter=noop"})`
3878
+ );
3879
+ return true;
3880
+ }
3881
+ function isTracingEnabled() {
3882
+ return tracerAvailable && envFlagEnabled();
3883
+ }
3884
+ var NoopSpan = class {
3885
+ setAttribute(_key, _value) {
3886
+ }
3887
+ recordException(_exception) {
3888
+ }
3889
+ end() {
3890
+ }
3891
+ };
3892
+ var NOOP_SPAN = new NoopSpan();
3893
+ var RealSpan = class {
3894
+ span;
3895
+ constructor(span) {
3896
+ this.span = span;
3897
+ }
3898
+ setAttribute(key, value) {
3899
+ try {
3900
+ this.span.setAttribute(key, value);
3901
+ } catch {
3902
+ }
3903
+ }
3904
+ recordException(exception) {
3905
+ try {
3906
+ this.span.recordException(exception);
3907
+ } catch {
3908
+ }
3909
+ }
3910
+ end() {
3911
+ try {
3912
+ this.span.end();
3913
+ } catch {
3914
+ }
3915
+ }
3916
+ };
3917
+ function startSpan(name, attrs) {
3918
+ if (!isTracingEnabled() || !otel) return NOOP_SPAN;
3919
+ try {
3920
+ const tracer = otel.trace.getTracer(SERVICE_NAME);
3921
+ const rawSpan = tracer.startSpan(name, attrs ? { attributes: attrs } : void 0);
3922
+ return new RealSpan(rawSpan);
3923
+ } catch {
3924
+ return NOOP_SPAN;
3925
+ }
3926
+ }
3927
+ async function withSpan(name, attrs, fn) {
3928
+ const span = startSpan(name, attrs);
3929
+ try {
3930
+ return await fn(span);
3931
+ } catch (exc) {
3932
+ span.recordException(exc);
3933
+ throw exc;
3934
+ } finally {
3935
+ span.end();
3936
+ }
3937
+ }
3938
+
3939
+ // src/stream-handler.ts
3940
+ function checkGuardrails(text, guardrails) {
3941
+ if (!guardrails) return null;
3942
+ for (const guard of guardrails) {
3943
+ let blocked = false;
3944
+ if (guard.blockedTerms) {
3945
+ blocked = guard.blockedTerms.some((term) => text.toLowerCase().includes(term.toLowerCase()));
3946
+ }
3947
+ if (!blocked && guard.check) {
3948
+ blocked = guard.check(text);
3949
+ }
3950
+ if (blocked) return guard;
3951
+ }
3952
+ return null;
3953
+ }
3954
+ function sanitizeLogValue(v, maxLen = 200) {
3955
+ const cleaned = v.replace(/[\x00-\x1f\x7f]/g, "");
3956
+ return cleaned.length > maxLen ? cleaned.slice(0, maxLen) + "..." : cleaned;
3957
+ }
3958
+ function maskPhoneNumber(number) {
3959
+ if (!number) return "***";
3960
+ const text = String(number);
3961
+ if (text.length <= 4) return "***";
3962
+ return `***${text.slice(-4)}`;
3963
+ }
3964
+ function isValidE164(number) {
3965
+ return /^\+[1-9]\d{6,14}$/.test(number);
3966
+ }
3967
+ var HALLUCINATIONS = /* @__PURE__ */ new Set([
3968
+ "you",
3969
+ "thank you",
3970
+ "thanks",
3971
+ "yeah",
3972
+ "yes",
3973
+ "no",
3974
+ "okay",
3975
+ "ok",
3976
+ "uh",
3977
+ "um",
3978
+ "mmm",
3979
+ "hmm",
3980
+ ".",
3981
+ "bye",
3982
+ "right",
3983
+ "cool"
3984
+ ]);
3985
+ var StreamHandler = class {
3986
+ deps;
3987
+ ws;
3988
+ caller;
3989
+ callee;
3990
+ // Mutable call state
3991
+ streamSid = "";
3992
+ callId = "";
3993
+ adapter = null;
3994
+ stt = null;
3995
+ tts = null;
3996
+ isSpeaking = false;
3997
+ /** Set to true after a VAD error to suppress log spam for the rest of the call. */
3998
+ vadDisabled = false;
3999
+ /**
4000
+ * Monotonic counter incremented on every TTS-start. The grace timer
4001
+ * scheduled by ``endSpeakingWithGrace`` only flips ``isSpeaking=false``
4002
+ * if the counter still matches its capture — a new turn that started in
4003
+ * the meantime invalidates the obsolete timer instead of clobbering its
4004
+ * own ``isSpeaking=true``.
4005
+ */
4006
+ speakingGeneration = 0;
4007
+ /** Handle for the pending grace-period timer, so it can be cleared on cleanup. */
4008
+ graceTimer = null;
4009
+ /** Mark the start of a TTS span. Use instead of setting isSpeaking directly. */
4010
+ beginSpeaking() {
4011
+ this.speakingGeneration++;
4012
+ this.isSpeaking = true;
4013
+ }
4014
+ /**
4015
+ * Atomically end speaking AND invalidate any pending grace timer.
4016
+ * Use instead of ``this.isSpeaking = false`` at barge-in sites.
4017
+ */
4018
+ cancelSpeaking() {
4019
+ this.speakingGeneration++;
4020
+ this.isSpeaking = false;
4021
+ }
4022
+ /** Cancel and clear the pending grace timer, if any. */
4023
+ clearGraceTimer() {
4024
+ if (this.graceTimer !== null) {
4025
+ clearTimeout(this.graceTimer);
4026
+ this.graceTimer = null;
4027
+ }
4028
+ }
4029
+ /**
4030
+ * Mark the agent as no longer producing TTS, honoring a grace period that
4031
+ * approximates the carrier's playback buffer. The user may still hear the
4032
+ * agent for ~1 s after we finish pushing audio (Twilio buffers ~1500 ms);
4033
+ * keeping isSpeaking=true through that window keeps the VAD-driven
4034
+ * barge-in armed during the audible tail. Tunable via env.
4035
+ */
4036
+ endSpeakingWithGrace() {
4037
+ const grace = Number(process.env.PATTER_TTS_TAIL_GRACE_MS ?? 1500);
4038
+ if (grace > 0) {
4039
+ const gen = this.speakingGeneration;
4040
+ this.clearGraceTimer();
4041
+ this.graceTimer = setTimeout(() => {
4042
+ this.graceTimer = null;
4043
+ if (this.speakingGeneration === gen) this.isSpeaking = false;
4044
+ }, grace);
4045
+ } else {
4046
+ this.isSpeaking = false;
4047
+ }
4048
+ }
4049
+ llmLoop = null;
4050
+ chunkCount = 0;
4051
+ callEndFired = false;
2326
4052
  sttClosed = false;
2327
4053
  currentAgentText = "";
2328
4054
  responseAudioStarted = false;
2329
4055
  maxDurationTimer = null;
2330
4056
  transcriptProcessing = false;
2331
4057
  transcriptQueue = [];
2332
- // BUG #22 throttle statemirror Python impl.
4058
+ // Throttle state for back-to-back STT finals see ``commitTranscript``.
2333
4059
  lastCommitText = "";
2334
4060
  lastCommitAt = 0;
4061
+ // PCM16 byte-alignment carry for TTS streaming (pipeline mode).
4062
+ // HTTP streams from ElevenLabs / OpenAI / Cartesia can yield chunks of any
4063
+ // size, including odd byte counts. Silently dropping the trailing odd byte
4064
+ // misaligns every subsequent int16 sample in the stream (hi/lo bytes get
4065
+ // swapped), producing a voice drowned in loud hiss. We buffer the odd byte
4066
+ // across chunks so resample/mulaw encoding always sees aligned int16 frames.
4067
+ ttsByteCarry = null;
4068
+ // Per-session stateful resamplers eliminate chunk-boundary discontinuities.
4069
+ // Created lazily on first use; reset() on call end.
4070
+ inboundResampler = createResampler8kTo16k();
4071
+ outboundResampler = createResampler16kTo8k();
2335
4072
  history;
2336
4073
  metricsAcc;
4074
+ _eventBus;
2337
4075
  constructor(deps, ws, caller, callee) {
2338
4076
  this.deps = deps;
2339
4077
  this.ws = ws;
2340
4078
  this.caller = caller;
2341
4079
  this.callee = callee;
2342
4080
  this.history = createHistoryManager(200);
2343
- const sttProviderName = deps.agent.stt ? deps.agent.stt.constructor?.name ?? "custom" : void 0;
2344
- const ttsProviderName = deps.agent.tts ? deps.agent.tts.constructor?.name ?? "custom" : void 0;
4081
+ const sttKey = deps.agent.stt?.constructor?.providerKey;
4082
+ const sttProviderName = deps.agent.stt ? sttKey ?? deps.agent.stt.constructor?.name ?? "custom" : void 0;
4083
+ const ttsKey = deps.agent.tts?.constructor?.providerKey;
4084
+ const ttsProviderName = deps.agent.tts ? ttsKey ?? deps.agent.tts.constructor?.name ?? "custom" : void 0;
2345
4085
  const providerMode = deps.agent.provider ?? "openai_realtime";
4086
+ const llmKey = deps.agent.llm?.constructor?.providerKey;
4087
+ let llmProviderName;
4088
+ if (deps.agent.llm) {
4089
+ if (llmKey) {
4090
+ llmProviderName = llmKey;
4091
+ } else {
4092
+ const stripped = (deps.agent.llm.constructor?.name ?? "custom").replace(/LLMProvider$/i, "").replace(/LLM$/i, "").replace(/Provider$/i, "").toLowerCase();
4093
+ llmProviderName = stripped || "custom";
4094
+ }
4095
+ } else {
4096
+ llmProviderName = providerMode === "openai_realtime" ? "openai_realtime" : "openai";
4097
+ }
4098
+ this._eventBus = new EventBus();
2346
4099
  this.metricsAcc = new CallMetricsAccumulator({
2347
4100
  callId: "",
2348
4101
  providerMode,
2349
4102
  telephonyProvider: deps.bridge.telephonyProvider,
2350
4103
  sttProvider: sttProviderName,
2351
4104
  ttsProvider: ttsProviderName,
2352
- pricing: deps.pricing
4105
+ llmProvider: llmProviderName,
4106
+ pricing: deps.pricing,
4107
+ eventBus: this._eventBus,
4108
+ reportOnlyInitialTtfb: deps.reportOnlyInitialTtfb ?? false
2353
4109
  });
2354
4110
  getLogger().debug(`WebSocket connection opened (${deps.bridge.label})`);
2355
4111
  }
4112
+ /**
4113
+ * Record a completed turn in the dashboard store and fire the user-supplied
4114
+ * ``onMetrics`` callback. Centralises the 4 emit sites (firstMessage, pipeline
4115
+ * streaming/regular LLM, WebSocket remote, Realtime response_done) so the
4116
+ * payload shape lives in one place.
4117
+ */
4118
+ async emitTurnMetrics(turn) {
4119
+ if (turn == null) return;
4120
+ this.deps.metricsStore.recordTurn({ call_id: this.callId, turn });
4121
+ if (!this.deps.onMetrics) return;
4122
+ const turnMetrics = turn;
4123
+ const llm_ttft_ms = turnMetrics?.latency?.llm_ttft_ms;
4124
+ await this.deps.onMetrics({
4125
+ call_id: this.callId,
4126
+ turn,
4127
+ ...llm_ttft_ms !== void 0 ? { llm_ttft_ms } : {},
4128
+ cost_so_far: this.metricsAcc.getCostSoFar()
4129
+ });
4130
+ }
4131
+ /** Reset the TTS odd-byte carry — call at every TTS stream entry/exit. */
4132
+ resetTtsCarry() {
4133
+ this.ttsByteCarry = null;
4134
+ }
4135
+ /**
4136
+ * Flush both stateful resamplers and any TTS byte carry on call close.
4137
+ * Emits tail bytes through the telephony bridge so the last ~20 ms of audio
4138
+ * is not silently clipped on hangup. No-op if the WebSocket is already gone.
4139
+ */
4140
+ flushResamplers() {
4141
+ try {
4142
+ const inTail = this.inboundResampler.flush();
4143
+ if (inTail.length > 0 && this.stt) {
4144
+ this.stt.sendAudio(inTail);
4145
+ }
4146
+ } catch {
4147
+ }
4148
+ try {
4149
+ const outTail = this.outboundResampler.flush();
4150
+ if (outTail.length > 0 && this.ws.readyState === this.ws.OPEN) {
4151
+ const mulaw = pcm16ToMulaw(outTail);
4152
+ this.deps.bridge.sendAudio(this.ws, mulaw.toString("base64"), this.streamSid);
4153
+ }
4154
+ } catch {
4155
+ }
4156
+ this.ttsByteCarry = null;
4157
+ }
4158
+ /**
4159
+ * Start call recording when configured. Currently Twilio-only — bridges may
4160
+ * expose ``startRecording`` for parity when we add other carriers.
4161
+ */
4162
+ async startRecordingIfRequested(callId) {
4163
+ const { recording, config } = this.deps;
4164
+ if (!recording || !config.twilioSid || !config.twilioToken || !callId) return;
4165
+ if (!validateTwilioSid(callId)) {
4166
+ getLogger().warn(`Recording skipped: invalid Twilio CallSid format ${JSON.stringify(callId)}`);
4167
+ return;
4168
+ }
4169
+ try {
4170
+ const recUrl = `https://api.twilio.com/2010-04-01/Accounts/${config.twilioSid}/Calls/${callId}/Recordings.json`;
4171
+ const recResp = await fetch(recUrl, {
4172
+ method: "POST",
4173
+ headers: {
4174
+ "Authorization": `Basic ${Buffer.from(`${config.twilioSid}:${config.twilioToken}`).toString("base64")}`
4175
+ }
4176
+ });
4177
+ if (recResp.ok) {
4178
+ getLogger().debug(`Recording started for ${callId}`);
4179
+ } else {
4180
+ getLogger().warn(`could not start recording: ${await recResp.text()}`);
4181
+ }
4182
+ } catch (e) {
4183
+ getLogger().warn(`could not start recording: ${String(e)}`);
4184
+ }
4185
+ }
4186
+ // ---------------------------------------------------------------------------
4187
+ // Public: observer API
4188
+ // ---------------------------------------------------------------------------
4189
+ /**
4190
+ * Subscribe to a Patter event on the per-call EventBus.
4191
+ *
4192
+ * The most common use-case is 'metrics_collected' — fired after every
4193
+ * completed turn with the TurnMetrics payload.
4194
+ *
4195
+ * Returns an unsubscribe function; call it to stop receiving events.
4196
+ *
4197
+ * @example
4198
+ * const off = handler.addObserver((payload) => {
4199
+ * console.log('turn metrics:', payload);
4200
+ * });
4201
+ * // later:
4202
+ * off();
4203
+ */
4204
+ addObserver(cb, event = "metrics_collected") {
4205
+ return this._eventBus.on(event, cb);
4206
+ }
2356
4207
  // ---------------------------------------------------------------------------
2357
4208
  // Public: called by the provider-specific parsers in server.ts
2358
4209
  // ---------------------------------------------------------------------------
@@ -2377,8 +4228,7 @@ var StreamHandler = class {
2377
4228
  this.deps.metricsStore.recordCallStart({
2378
4229
  call_id: callId,
2379
4230
  caller: this.caller,
2380
- callee: this.callee,
2381
- direction: "inbound"
4231
+ callee: this.callee
2382
4232
  });
2383
4233
  const MAX_CALL_DURATION_MS = 60 * 60 * 1e3;
2384
4234
  this.maxDurationTimer = setTimeout(async () => {
@@ -2389,52 +4239,32 @@ var StreamHandler = class {
2389
4239
  }
2390
4240
  }, MAX_CALL_DURATION_MS);
2391
4241
  try {
2392
- const { notifyDashboard } = await import("./persistence-CYIGNHSU.mjs");
4242
+ const { notifyDashboard } = await import("./persistence-LQBYQPQQ.mjs");
2393
4243
  notifyDashboard({
2394
4244
  call_id: callId,
2395
4245
  caller: this.caller,
2396
- callee: this.callee,
2397
- direction: "inbound"
4246
+ callee: this.callee
2398
4247
  });
2399
4248
  } catch {
2400
4249
  }
2401
4250
  if (this.deps.onCallStart) {
4251
+ const direction = this.deps.metricsStore.getActive(callId)?.direction ?? "inbound";
2402
4252
  await this.deps.onCallStart({
2403
4253
  call_id: callId,
2404
4254
  caller: this.caller,
2405
4255
  callee: this.callee,
2406
- direction: "inbound",
4256
+ direction,
4257
+ telephony_provider: this.deps.bridge.telephonyProvider,
2407
4258
  ...Object.keys(customParams).length > 0 ? { custom_params: customParams } : {}
2408
4259
  });
2409
4260
  }
2410
- if (this.deps.recording && this.deps.config.twilioSid && this.deps.config.twilioToken && callId) {
2411
- if (!validateTwilioSid(callId)) {
2412
- getLogger().warn(`Recording skipped: invalid Twilio CallSid format ${JSON.stringify(callId)}`);
2413
- } else {
2414
- try {
2415
- const recUrl = `https://api.twilio.com/2010-04-01/Accounts/${this.deps.config.twilioSid}/Calls/${callId}/Recordings.json`;
2416
- const recResp = await fetch(recUrl, {
2417
- method: "POST",
2418
- headers: {
2419
- "Authorization": `Basic ${Buffer.from(`${this.deps.config.twilioSid}:${this.deps.config.twilioToken}`).toString("base64")}`
2420
- }
2421
- });
2422
- if (recResp.ok) {
2423
- getLogger().debug(`Recording started for ${callId}`);
2424
- } else {
2425
- getLogger().warn(`could not start recording: ${await recResp.text()}`);
2426
- }
2427
- } catch (e) {
2428
- getLogger().warn(`could not start recording: ${String(e)}`);
2429
- }
2430
- }
2431
- }
4261
+ await this.startRecordingIfRequested(callId);
2432
4262
  const agentVars = this.deps.sanitizeVariables(this.deps.agent.variables ?? {});
2433
4263
  const safeCustomParams = this.deps.sanitizeVariables(customParams);
2434
4264
  const allVars = { ...agentVars, ...safeCustomParams };
2435
4265
  const resolvedPrompt = Object.keys(allVars).length > 0 ? this.deps.resolveVariables(this.deps.agent.systemPrompt, allVars) : this.deps.agent.systemPrompt;
2436
- const provider = this.deps.agent.provider ?? "openai_realtime";
2437
- if (provider === "pipeline") {
4266
+ const provider2 = this.deps.agent.provider ?? "openai_realtime";
4267
+ if (provider2 === "pipeline") {
2438
4268
  await this.initPipeline(resolvedPrompt);
2439
4269
  } else {
2440
4270
  await this.initRealtimeAdapter(resolvedPrompt);
@@ -2446,13 +4276,56 @@ var StreamHandler = class {
2446
4276
  }
2447
4277
  /** Handle an incoming audio chunk (already decoded from base64). */
2448
4278
  async handleAudio(audioBuffer) {
2449
- const provider = this.deps.agent.provider ?? "openai_realtime";
2450
- if (provider === "pipeline" && this.stt) {
2451
- if (this.isSpeaking && (this.deps.agent.bargeInThresholdMs ?? 300) === 0) {
2452
- return;
2453
- }
4279
+ const provider2 = this.deps.agent.provider ?? "openai_realtime";
4280
+ if (provider2 === "pipeline" && this.stt) {
2454
4281
  const pcm8k = mulawToPcm16(audioBuffer);
2455
- const pcm16k = resample8kTo16k(pcm8k);
4282
+ const pcm16k = this.inboundResampler.process(pcm8k);
4283
+ if (this.deps.agent.vad && !this.vadDisabled) {
4284
+ try {
4285
+ const vadPromise = this.deps.agent.vad.processFrame(pcm16k, 16e3);
4286
+ const timeoutPromise = new Promise((resolve) => setTimeout(() => resolve(null), 25));
4287
+ const evt = await Promise.race([vadPromise, timeoutPromise]);
4288
+ if (evt) {
4289
+ getLogger().info(
4290
+ `[VAD] ${evt.type} agentSpeaking=${this.isSpeaking}`
4291
+ );
4292
+ }
4293
+ if (evt?.type === "speech_start") {
4294
+ if (this.isSpeaking) {
4295
+ getLogger().info("[VAD] speech_start during TTS \u2192 BARGE-IN");
4296
+ this.metricsAcc.recordOverlapStart();
4297
+ this.metricsAcc.recordBargeinDetected();
4298
+ const bargeinSpan = startSpan(SPAN_BARGEIN, { "patter.call.id": this.callId });
4299
+ try {
4300
+ this.cancelSpeaking();
4301
+ try {
4302
+ this.deps.bridge.sendClear(this.ws, this.streamSid);
4303
+ } catch (err) {
4304
+ getLogger().debug(`sendClear during VAD barge-in failed: ${String(err)}`);
4305
+ }
4306
+ this.metricsAcc.recordTtsStopped();
4307
+ this.metricsAcc.recordTurnInterrupted();
4308
+ this.metricsAcc.recordOverlapEnd(true);
4309
+ } finally {
4310
+ try {
4311
+ bargeinSpan.end();
4312
+ } catch {
4313
+ }
4314
+ }
4315
+ }
4316
+ this.metricsAcc.startTurnIfIdle();
4317
+ } else if (evt?.type === "speech_end") {
4318
+ this.metricsAcc.recordVadStop();
4319
+ }
4320
+ } catch (err) {
4321
+ this.vadDisabled = true;
4322
+ getLogger().warn(`VAD processFrame failed \u2014 disabling VAD for this call: ${String(err)}`);
4323
+ }
4324
+ }
4325
+ if (this.isSpeaking) {
4326
+ if (this.deps.agent.vad) return;
4327
+ if ((this.deps.agent.bargeInThresholdMs ?? 300) === 0) return;
4328
+ }
2456
4329
  const hooks = this.deps.agent.hooks;
2457
4330
  if (hooks) {
2458
4331
  const hookExecutor = new PipelineHookExecutor(hooks);
@@ -2460,13 +4333,15 @@ var StreamHandler = class {
2460
4333
  const processed = await hookExecutor.runBeforeSendToStt(pcm16k, hookCtx);
2461
4334
  if (processed === null) return;
2462
4335
  this.stt.sendAudio(processed);
4336
+ this.metricsAcc.addSttAudioBytes(processed.length);
2463
4337
  } else {
2464
4338
  this.stt.sendAudio(pcm16k);
4339
+ this.metricsAcc.addSttAudioBytes(pcm16k.length);
2465
4340
  }
2466
4341
  } else if (this.adapter) {
2467
- if (this.adapter instanceof ElevenLabsConvAIAdapter && this.deps.bridge.telephonyProvider === "twilio") {
4342
+ if (this.adapter instanceof ElevenLabsConvAIAdapter && this.deps.bridge.telephonyProvider === "twilio" && this.adapter.inputAudioFormat !== "ulaw_8000") {
2468
4343
  const pcm8k = mulawToPcm16(audioBuffer);
2469
- const pcm16k = resample8kTo16k(pcm8k);
4344
+ const pcm16k = this.inboundResampler.process(pcm8k);
2470
4345
  this.adapter.sendAudio(pcm16k);
2471
4346
  } else {
2472
4347
  this.adapter.sendAudio(audioBuffer);
@@ -2483,8 +4358,28 @@ var StreamHandler = class {
2483
4358
  await this.deps.onTranscript({ role: "user", text: `[DTMF: ${digit}]`, call_id: this.callId });
2484
4359
  }
2485
4360
  }
4361
+ /**
4362
+ * Last mark name Twilio has confirmed playback of. Mirrors the Python
4363
+ * ``TwilioAudioSender.last_confirmed_mark`` field — barge-in heuristics
4364
+ * compare this against the latest sent mark to decide whether the agent's
4365
+ * audio has actually reached the caller yet.
4366
+ */
4367
+ lastConfirmedMark = "";
4368
+ /**
4369
+ * Handle a Twilio ``mark`` event acknowledging that a previously sent
4370
+ * audio chunk has been played out. Mirrors Python's
4371
+ * ``twilio_handler.py``: ``audio_sender.on_mark_confirmed(mark_name)`` +
4372
+ * ``handler.on_mark(mark_name)``.
4373
+ */
4374
+ async onMark(markName) {
4375
+ if (markName) {
4376
+ this.lastConfirmedMark = markName;
4377
+ }
4378
+ }
2486
4379
  /** Handle call stop / stream end. */
2487
4380
  async handleStop() {
4381
+ this.clearGraceTimer();
4382
+ this.flushResamplers();
2488
4383
  await this.closeSttOnce();
2489
4384
  try {
2490
4385
  this.adapter?.close();
@@ -2494,6 +4389,8 @@ var StreamHandler = class {
2494
4389
  }
2495
4390
  /** Handle WebSocket close event. */
2496
4391
  async handleWsClose() {
4392
+ this.clearGraceTimer();
4393
+ this.flushResamplers();
2497
4394
  await this.closeSttOnce();
2498
4395
  try {
2499
4396
  this.adapter?.close();
@@ -2519,15 +4416,33 @@ var StreamHandler = class {
2519
4416
  // ---------------------------------------------------------------------------
2520
4417
  /**
2521
4418
  * Encode a PCM 16kHz audio chunk for the telephony provider.
2522
- * Twilio requires mulaw 8kHz; Telnyx accepts PCM 16kHz natively.
4419
+ *
4420
+ * Both Twilio and Telnyx negotiate PCMU (mulaw) 8 kHz on the bidirectional
4421
+ * media stream — Twilio always, and Telnyx because ``streaming_start``
4422
+ * (server.ts) requests ``stream_bidirectional_codec=PCMU`` at 8 kHz. So
4423
+ * the wire format for both providers is mulaw 8 kHz; we resample 16 kHz
4424
+ * PCM16 → 8 kHz then encode to mulaw. Mirrors the Python pipeline path
4425
+ * (sdk-py/getpatter/handlers/telnyx_handler.py::TelnyxAudioSender).
4426
+ *
4427
+ * Maintains a 1-byte carry across calls so unaligned HTTP chunks from
4428
+ * streaming TTS providers never byte-swap the PCM16 samples downstream.
2523
4429
  */
2524
4430
  encodePipelineAudio(pcm16k) {
2525
- if (this.deps.bridge.telephonyProvider === "twilio") {
2526
- const pcm8k = resample16kTo8k(pcm16k);
2527
- const mulaw = pcm16ToMulaw(pcm8k);
2528
- return mulaw.toString("base64");
2529
- }
2530
- return pcm16k.toString("base64");
4431
+ const aligned = this.alignPcm16(pcm16k);
4432
+ if (aligned.length === 0) return "";
4433
+ const pcm8k = this.outboundResampler.process(aligned);
4434
+ const mulaw = pcm16ToMulaw(pcm8k);
4435
+ return mulaw.toString("base64");
4436
+ }
4437
+ /**
4438
+ * Prepend any carry byte from the previous chunk, return the even-length
4439
+ * portion, and stash the final odd byte (if any) for the next call.
4440
+ */
4441
+ alignPcm16(chunk) {
4442
+ const combined = this.ttsByteCarry ? Buffer.concat([this.ttsByteCarry, chunk]) : chunk;
4443
+ const alignedLen = combined.length & ~1;
4444
+ this.ttsByteCarry = alignedLen < combined.length ? combined.subarray(alignedLen) : null;
4445
+ return combined.subarray(0, alignedLen);
2531
4446
  }
2532
4447
  // ---------------------------------------------------------------------------
2533
4448
  // Private: Pipeline mode
@@ -2556,6 +4471,7 @@ var StreamHandler = class {
2556
4471
  if (this.deps.agent.firstMessage && !this.deps.onMessage && this.tts) {
2557
4472
  this.metricsAcc.startTurn();
2558
4473
  let firstChunkSent = false;
4474
+ this.resetTtsCarry();
2559
4475
  try {
2560
4476
  for await (const chunk of this.tts.synthesizeStream(this.deps.agent.firstMessage)) {
2561
4477
  if (!firstChunkSent) {
@@ -2567,13 +4483,11 @@ var StreamHandler = class {
2567
4483
  }
2568
4484
  } catch (e) {
2569
4485
  getLogger().error(`First message TTS error (${label}):`, e);
4486
+ } finally {
4487
+ this.resetTtsCarry();
2570
4488
  }
2571
4489
  if (firstChunkSent) {
2572
- const turn = this.metricsAcc.recordTurnComplete(this.deps.agent.firstMessage);
2573
- if (turn) {
2574
- this.deps.metricsStore.recordTurn({ call_id: this.callId, turn });
2575
- if (this.deps.onMetrics) await this.deps.onMetrics({ call_id: this.callId, turn });
2576
- }
4490
+ await this.emitTurnMetrics(this.metricsAcc.recordTurnComplete(this.deps.agent.firstMessage));
2577
4491
  this.history.push({ role: "assistant", text: this.deps.agent.firstMessage, timestamp: Date.now() });
2578
4492
  }
2579
4493
  }
@@ -2583,15 +4497,17 @@ var StreamHandler = class {
2583
4497
  "Cannot pass both agent({ llm }) and serve({ onMessage }). Pick one \u2014 `llm` for built-in LLMs, `onMessage` for custom logic."
2584
4498
  );
2585
4499
  }
4500
+ const providerModel = this.deps.agent.llm?.model ?? "";
2586
4501
  this.llmLoop = new LLMLoop(
2587
4502
  "",
2588
4503
  // apiKey unused when llmProvider is supplied
2589
- "",
2590
- // model unused when llmProvider is supplied
4504
+ providerModel,
4505
+ // propagate so calculateLlmCost can match the price row
2591
4506
  resolvedPrompt,
2592
4507
  this.deps.agent.tools,
2593
4508
  this.deps.agent.llm
2594
4509
  );
4510
+ this.llmLoop.setEventBus(this._eventBus);
2595
4511
  const llmLabel = this.deps.agent.llm.constructor?.name ?? "custom";
2596
4512
  getLogger().debug(`Built-in LLM loop active (pipeline, ${label}, llm=${llmLabel})`);
2597
4513
  } else if (!this.deps.onMessage && this.deps.config.openaiKey) {
@@ -2603,6 +4519,7 @@ var StreamHandler = class {
2603
4519
  resolvedPrompt,
2604
4520
  this.deps.agent.tools
2605
4521
  );
4522
+ this.llmLoop.setEventBus(this._eventBus);
2606
4523
  getLogger().debug(`Built-in LLM loop active (pipeline, ${label})`);
2607
4524
  }
2608
4525
  if (this.stt) {
@@ -2632,6 +4549,7 @@ var StreamHandler = class {
2632
4549
  }
2633
4550
  const processedText = await hookExecutor.runBeforeSynthesize(transformed, hookCtx);
2634
4551
  if (processedText === null) return;
4552
+ this.resetTtsCarry();
2635
4553
  try {
2636
4554
  for await (const chunk of this.tts.synthesizeStream(processedText)) {
2637
4555
  if (!this.isSpeaking) break;
@@ -2646,6 +4564,8 @@ var StreamHandler = class {
2646
4564
  }
2647
4565
  } catch (e) {
2648
4566
  getLogger().error(`TTS streaming error (${this.deps.bridge.label}):`, e);
4567
+ } finally {
4568
+ this.resetTtsCarry();
2649
4569
  }
2650
4570
  }
2651
4571
  /** Handle a final transcript from STT in pipeline mode. */
@@ -2663,63 +4583,30 @@ var StreamHandler = class {
2663
4583
  }
2664
4584
  }
2665
4585
  async processTranscript(transcript) {
2666
- if (transcript.text && this.isSpeaking) {
2667
- getLogger().debug(
2668
- `Barge-in: caller spoke over agent (${sanitizeLogValue(transcript.text.slice(0, 40))})`
2669
- );
2670
- this.isSpeaking = false;
2671
- try {
2672
- this.deps.bridge.sendClear(this.ws, this.streamSid);
2673
- } catch (err) {
2674
- getLogger().debug(`sendClear during barge-in failed: ${String(err)}`);
2675
- }
2676
- this.metricsAcc.recordTurnInterrupted();
4586
+ let interrupted = this.handleBargeIn(transcript);
4587
+ if (transcript.text) {
4588
+ this.metricsAcc.startTurnIfIdle();
2677
4589
  }
2678
- if (!transcript.isFinal || !transcript.text) return;
2679
- const now = Date.now();
2680
- const normalised = transcript.text.trim().toLowerCase();
2681
- const stripped = normalised.replace(/[.,!?;: ]+$/, "").trim();
2682
- const sinceLastMs = now - this.lastCommitAt;
2683
- const HALLUCINATIONS = /* @__PURE__ */ new Set([
2684
- "you",
2685
- "thank you",
2686
- "thanks",
2687
- "yeah",
2688
- "yes",
2689
- "no",
2690
- "okay",
2691
- "ok",
2692
- "uh",
2693
- "um",
2694
- "mmm",
2695
- "hmm",
2696
- ".",
2697
- "bye",
2698
- "right",
2699
- "cool"
2700
- ]);
2701
- if (HALLUCINATIONS.has(stripped) || stripped === "") {
2702
- getLogger().debug(`Dropped likely STT hallucination: ${sanitizeLogValue(normalised.slice(0, 40))}`);
2703
- return;
2704
- }
2705
- if (sinceLastMs < 2e3 && normalised === this.lastCommitText) {
2706
- getLogger().debug(
2707
- `Dropped duplicate final transcript (${(sinceLastMs / 1e3).toFixed(1)}s since last): ${sanitizeLogValue(normalised.slice(0, 40))}`
2708
- );
2709
- return;
4590
+ if (transcript.speechFinal) {
4591
+ this.metricsAcc.recordVadStop();
2710
4592
  }
2711
- if (sinceLastMs < 500) {
2712
- getLogger().debug(
2713
- `Dropped back-to-back final transcript (${(sinceLastMs / 1e3).toFixed(2)}s since last): ${sanitizeLogValue(normalised.slice(0, 40))}`
2714
- );
2715
- return;
2716
- }
2717
- this.lastCommitText = normalised;
2718
- this.lastCommitAt = now;
4593
+ if (!transcript.isFinal || !transcript.text) return;
4594
+ if (!this.commitTranscript(transcript.text)) return;
2719
4595
  const label = this.deps.bridge.label;
2720
4596
  getLogger().debug(`User (${label} pipeline): ${sanitizeLogValue(transcript.text)}`);
2721
- this.metricsAcc.startTurn();
4597
+ this.metricsAcc.startTurnIfIdle();
2722
4598
  this.metricsAcc.recordSttComplete(transcript.text);
4599
+ this.metricsAcc.recordSttFinalTimestamp();
4600
+ const endpointSpan = startSpan(SPAN_ENDPOINT, { "patter.call.id": this.callId });
4601
+ let endpointSpanClosed = false;
4602
+ const closeEndpointSpan = () => {
4603
+ if (endpointSpanClosed) return;
4604
+ endpointSpanClosed = true;
4605
+ try {
4606
+ endpointSpan.end();
4607
+ } catch {
4608
+ }
4609
+ };
2723
4610
  if (this.deps.onTranscript) {
2724
4611
  await this.deps.onTranscript({
2725
4612
  role: "user",
@@ -2734,10 +4621,14 @@ var StreamHandler = class {
2734
4621
  if (filteredTranscript === null) {
2735
4622
  getLogger().debug(`afterTranscribe hook vetoed turn (${label})`);
2736
4623
  this.metricsAcc.recordTurnInterrupted();
4624
+ closeEndpointSpan();
2737
4625
  return;
2738
4626
  }
2739
4627
  this.history.push({ role: "user", text: filteredTranscript, timestamp: Date.now() });
2740
4628
  let responseText = "";
4629
+ this.metricsAcc.recordOnUserTurnCompletedDelay(0);
4630
+ this.metricsAcc.recordTurnCommitted();
4631
+ closeEndpointSpan();
2741
4632
  if (this.deps.onMessage && typeof this.deps.onMessage === "function") {
2742
4633
  try {
2743
4634
  responseText = await this.deps.onMessage({
@@ -2767,104 +4658,203 @@ var StreamHandler = class {
2767
4658
  if (isWebSocketUrl(this.deps.onMessage)) {
2768
4659
  await this.handleWebSocketResponse(msgData);
2769
4660
  return;
2770
- } else {
2771
- try {
2772
- responseText = await this.deps.remoteHandler.callWebhook(this.deps.onMessage, msgData);
2773
- } catch (e) {
2774
- getLogger().error(`Webhook remote error (${label}):`, e);
2775
- return;
2776
- }
2777
4661
  }
2778
- } else if (this.llmLoop) {
2779
- const callCtx = { call_id: this.callId, caller: this.caller, callee: this.callee };
2780
- const chunker = new SentenceChunker();
2781
- const allParts = [];
2782
- const ttsFirstByteSent = { value: false };
2783
- this.isSpeaking = true;
2784
- let llmError = false;
2785
4662
  try {
2786
- try {
2787
- for await (const token of this.llmLoop.run(filteredTranscript, this.history.entries, callCtx)) {
2788
- allParts.push(token);
2789
- const sentences = chunker.push(token);
2790
- for (const sentence of sentences) {
2791
- if (!this.isSpeaking) break;
2792
- const guard = checkGuardrails(sentence, this.deps.agent.guardrails);
2793
- const sentenceText = guard ? guard.replacement ?? "I'm sorry, I can't respond to that." : sentence;
2794
- await this.synthesizeSentence(sentenceText, hookExecutor, hookCtx, ttsFirstByteSent);
2795
- }
2796
- if (!this.isSpeaking) break;
2797
- }
2798
- } catch (e) {
2799
- llmError = true;
2800
- chunker.reset();
2801
- getLogger().error(`LLM loop error (${label}):`, e);
2802
- }
2803
- this.metricsAcc.recordLlmComplete();
2804
- if (!llmError && this.isSpeaking) {
2805
- for (const sentence of chunker.flush()) {
2806
- if (!this.isSpeaking) break;
2807
- const guard = checkGuardrails(sentence, this.deps.agent.guardrails);
2808
- const sentenceText = guard ? guard.replacement ?? "I'm sorry, I can't respond to that." : sentence;
2809
- await this.synthesizeSentence(sentenceText, hookExecutor, hookCtx, ttsFirstByteSent);
2810
- }
2811
- }
2812
- } finally {
2813
- this.isSpeaking = false;
4663
+ responseText = await this.deps.remoteHandler.callWebhook(this.deps.onMessage, msgData);
4664
+ } catch (e) {
4665
+ getLogger().error(`Webhook remote error (${label}):`, e);
4666
+ return;
2814
4667
  }
2815
- responseText = allParts.join("");
4668
+ } else if (this.llmLoop) {
4669
+ responseText = await this.runPipelineLlm(filteredTranscript, hookExecutor, hookCtx);
2816
4670
  } else {
2817
4671
  return;
2818
4672
  }
2819
4673
  if (!responseText) return;
2820
- if (!this.llmLoop) {
2821
- const guard = checkGuardrails(responseText, this.deps.agent.guardrails);
2822
- if (guard) {
2823
- getLogger().debug(`Guardrail '${guard.name}' triggered (pipeline)`);
2824
- responseText = guard.replacement ?? "I'm sorry, I can't respond to that.";
2825
- }
2826
- this.metricsAcc.recordLlmComplete();
4674
+ if (this.llmLoop) {
2827
4675
  this.history.push({ role: "assistant", text: responseText, timestamp: Date.now() });
2828
- const chunker = new SentenceChunker();
2829
- const sentences = [...chunker.push(responseText), ...chunker.flush()];
2830
- const ttsFirstByteSent = { value: false };
2831
- let interrupted = false;
2832
- this.isSpeaking = true;
4676
+ this.metricsAcc.recordTtsComplete(responseText);
4677
+ } else {
4678
+ interrupted = await this.runRegularLlm(responseText, hookExecutor, hookCtx) || interrupted;
4679
+ responseText = this.history.entries[this.history.entries.length - 1]?.text ?? responseText;
4680
+ }
4681
+ if (!interrupted) {
4682
+ await this.emitTurnMetrics(this.metricsAcc.recordTurnComplete(responseText));
4683
+ }
4684
+ }
4685
+ /**
4686
+ * Barge-in: caller spoke over in-flight TTS. Flip ``isSpeaking`` so the
4687
+ * sentence loop exits on its next check, clear downstream audio buffers,
4688
+ * record the interruption, and return ``true`` so the caller skips the
4689
+ * turn-complete record.
4690
+ */
4691
+ handleBargeIn(transcript) {
4692
+ if (!transcript.text || !this.isSpeaking) return false;
4693
+ getLogger().debug(
4694
+ `Barge-in: caller spoke over agent (${sanitizeLogValue(transcript.text.slice(0, 40))})`
4695
+ );
4696
+ this.metricsAcc.recordOverlapStart();
4697
+ this.metricsAcc.recordBargeinDetected();
4698
+ const bargeinSpan = startSpan(SPAN_BARGEIN, { "patter.call.id": this.callId });
4699
+ try {
4700
+ this.cancelSpeaking();
4701
+ try {
4702
+ this.deps.bridge.sendClear(this.ws, this.streamSid);
4703
+ } catch (err) {
4704
+ getLogger().debug(`sendClear during barge-in failed: ${String(err)}`);
4705
+ }
4706
+ this.metricsAcc.recordTtsStopped();
4707
+ this.metricsAcc.recordTurnInterrupted();
4708
+ this.metricsAcc.recordOverlapEnd(true);
4709
+ } finally {
2833
4710
  try {
2834
- for (const sentence of sentences) {
2835
- if (!this.isSpeaking) {
2836
- interrupted = true;
2837
- break;
4711
+ bargeinSpan.end();
4712
+ } catch {
4713
+ }
4714
+ }
4715
+ return true;
4716
+ }
4717
+ /**
4718
+ * Dedup + throttle + hallucination filter for final STT transcripts.
4719
+ * Mirrors ``PipelineStreamHandler._stt_loop`` on the Python side.
4720
+ * Returns ``true`` when the transcript should be committed to a turn,
4721
+ * ``false`` when it must be dropped. Drop reasons:
4722
+ * - text matches common short hallucinations ("you", "thanks", ...)
4723
+ * - duplicate final within 2 s of previous commit
4724
+ * - back-to-back finals under 500 ms (too tight to be real utterances)
4725
+ */
4726
+ commitTranscript(text) {
4727
+ const now = Date.now();
4728
+ const normalised = text.trim().toLowerCase();
4729
+ const stripped = normalised.replace(/[.,!?;: ]+$/, "").trim();
4730
+ const sinceLastMs = now - this.lastCommitAt;
4731
+ if (HALLUCINATIONS.has(stripped) || stripped === "") {
4732
+ getLogger().debug(`Dropped likely STT hallucination: ${sanitizeLogValue(normalised.slice(0, 40))}`);
4733
+ return false;
4734
+ }
4735
+ if (sinceLastMs < 2e3 && normalised === this.lastCommitText) {
4736
+ getLogger().debug(
4737
+ `Dropped duplicate final transcript (${(sinceLastMs / 1e3).toFixed(1)}s since last): ${sanitizeLogValue(normalised.slice(0, 40))}`
4738
+ );
4739
+ return false;
4740
+ }
4741
+ if (sinceLastMs < 500) {
4742
+ getLogger().debug(
4743
+ `Dropped back-to-back final transcript (${(sinceLastMs / 1e3).toFixed(2)}s since last): ${sanitizeLogValue(normalised.slice(0, 40))}`
4744
+ );
4745
+ return false;
4746
+ }
4747
+ this.lastCommitText = normalised;
4748
+ this.lastCommitAt = now;
4749
+ return true;
4750
+ }
4751
+ /**
4752
+ * Streaming built-in LLM path with sentence chunking and per-sentence
4753
+ * guardrails/TTS. Returns the concatenated response text.
4754
+ */
4755
+ async runPipelineLlm(filteredTranscript, hookExecutor, hookCtx) {
4756
+ const label = this.deps.bridge.label;
4757
+ const callCtx = { call_id: this.callId, caller: this.caller, callee: this.callee };
4758
+ const chunker = new SentenceChunker();
4759
+ const allParts = [];
4760
+ const ttsFirstByteSent = { value: false };
4761
+ this.beginSpeaking();
4762
+ let llmError = false;
4763
+ const llmSpan = startSpan(SPAN_LLM, { "patter.call.id": this.callId });
4764
+ const guardAndSpeak = async (sentence, isFirst) => {
4765
+ if (isFirst) this.metricsAcc.recordLlmFirstSentenceComplete();
4766
+ const guard = checkGuardrails(sentence, this.deps.agent.guardrails);
4767
+ const sentenceText = guard ? guard.replacement ?? "I'm sorry, I can't respond to that." : sentence;
4768
+ await this.synthesizeSentence(sentenceText, hookExecutor, hookCtx, ttsFirstByteSent);
4769
+ };
4770
+ let firstSentenceEmitted = false;
4771
+ try {
4772
+ try {
4773
+ for await (const token of this.llmLoop.run(
4774
+ filteredTranscript,
4775
+ this.history.entries,
4776
+ callCtx,
4777
+ this.metricsAcc,
4778
+ hookExecutor,
4779
+ hookCtx
4780
+ )) {
4781
+ this.metricsAcc.recordLlmFirstToken();
4782
+ allParts.push(token);
4783
+ for (const sentence of chunker.push(token)) {
4784
+ if (!this.isSpeaking) break;
4785
+ await guardAndSpeak(sentence, !firstSentenceEmitted);
4786
+ firstSentenceEmitted = true;
2838
4787
  }
2839
- await this.synthesizeSentence(sentence, hookExecutor, hookCtx, ttsFirstByteSent);
4788
+ if (!this.isSpeaking) break;
2840
4789
  }
2841
- } finally {
2842
- this.isSpeaking = false;
4790
+ } catch (e) {
4791
+ llmError = true;
4792
+ chunker.reset();
4793
+ getLogger().error(`LLM loop error (${label}):`, e);
4794
+ this.metricsAcc.recordTurnInterrupted();
2843
4795
  }
2844
- if (!interrupted) {
2845
- this.metricsAcc.recordTtsComplete(responseText);
4796
+ this.metricsAcc.recordLlmComplete();
4797
+ if (!llmError && this.isSpeaking) {
4798
+ for (const sentence of chunker.flush()) {
4799
+ if (!this.isSpeaking) break;
4800
+ await guardAndSpeak(sentence, !firstSentenceEmitted);
4801
+ firstSentenceEmitted = true;
4802
+ }
2846
4803
  }
2847
- } else {
2848
- this.history.push({ role: "assistant", text: responseText, timestamp: Date.now() });
2849
- this.metricsAcc.recordTtsComplete(responseText);
4804
+ } finally {
4805
+ this.endSpeakingWithGrace();
4806
+ try {
4807
+ llmSpan.end();
4808
+ } catch {
4809
+ }
4810
+ }
4811
+ return allParts.join("");
4812
+ }
4813
+ /**
4814
+ * Non-streaming path (onMessage function / webhook): apply output guardrails,
4815
+ * push to history, sentence-chunk the text, synthesize. Returns ``true`` if
4816
+ * TTS was interrupted mid-flight so the caller can skip turn-complete.
4817
+ */
4818
+ async runRegularLlm(responseText, hookExecutor, hookCtx) {
4819
+ const guard = checkGuardrails(responseText, this.deps.agent.guardrails);
4820
+ let text = responseText;
4821
+ if (guard) {
4822
+ getLogger().debug(`Guardrail '${guard.name}' triggered (pipeline)`);
4823
+ text = guard.replacement ?? "I'm sorry, I can't respond to that.";
2850
4824
  }
2851
- const turn = this.metricsAcc.recordTurnComplete(responseText);
2852
- if (turn) {
2853
- this.deps.metricsStore.recordTurn({ call_id: this.callId, turn });
2854
- if (this.deps.onMetrics) await this.deps.onMetrics({ call_id: this.callId, turn });
4825
+ this.metricsAcc.recordLlmComplete();
4826
+ this.history.push({ role: "assistant", text, timestamp: Date.now() });
4827
+ const chunker = new SentenceChunker();
4828
+ const sentences = [...chunker.push(text), ...chunker.flush()];
4829
+ const ttsFirstByteSent = { value: false };
4830
+ this.beginSpeaking();
4831
+ let interrupted = false;
4832
+ try {
4833
+ for (const sentence of sentences) {
4834
+ if (!this.isSpeaking) {
4835
+ interrupted = true;
4836
+ break;
4837
+ }
4838
+ await this.synthesizeSentence(sentence, hookExecutor, hookCtx, ttsFirstByteSent);
4839
+ }
4840
+ } finally {
4841
+ this.endSpeakingWithGrace();
2855
4842
  }
4843
+ if (!interrupted) this.metricsAcc.recordTtsComplete(text);
4844
+ return interrupted;
2856
4845
  }
2857
4846
  /** Handle streaming WebSocket remote response with TTS. */
2858
4847
  async handleWebSocketResponse(msgData) {
2859
4848
  const onMessage = this.deps.onMessage;
2860
4849
  const parts = [];
2861
4850
  this.metricsAcc.recordLlmComplete();
2862
- this.isSpeaking = true;
4851
+ this.beginSpeaking();
2863
4852
  let wsTtsStarted = false;
2864
4853
  try {
2865
4854
  for await (const chunk of this.deps.remoteHandler.callWebSocket(onMessage, msgData)) {
2866
4855
  parts.push(chunk);
2867
4856
  if (this.tts) {
4857
+ this.resetTtsCarry();
2868
4858
  for await (const audioChunk of this.tts.synthesizeStream(chunk)) {
2869
4859
  if (!this.isSpeaking) break;
2870
4860
  if (!wsTtsStarted) {
@@ -2879,15 +4869,12 @@ var StreamHandler = class {
2879
4869
  } catch (e) {
2880
4870
  getLogger().error(`WebSocket remote error (${this.deps.bridge.label}):`, e);
2881
4871
  } finally {
2882
- this.isSpeaking = false;
4872
+ this.endSpeakingWithGrace();
4873
+ this.resetTtsCarry();
2883
4874
  }
2884
4875
  const responseText = parts.join("");
2885
4876
  this.metricsAcc.recordTtsComplete(responseText);
2886
- const turn = this.metricsAcc.recordTurnComplete(responseText);
2887
- if (turn) {
2888
- this.deps.metricsStore.recordTurn({ call_id: this.callId, turn });
2889
- if (this.deps.onMetrics) await this.deps.onMetrics({ call_id: this.callId, turn });
2890
- }
4877
+ await this.emitTurnMetrics(this.metricsAcc.recordTurnComplete(responseText));
2891
4878
  if (responseText) this.history.push({ role: "assistant", text: responseText, timestamp: Date.now() });
2892
4879
  }
2893
4880
  // ---------------------------------------------------------------------------
@@ -2917,91 +4904,97 @@ var StreamHandler = class {
2917
4904
  try {
2918
4905
  await this.handleAdapterEvent(type, eventData);
2919
4906
  } catch (err) {
2920
- getLogger().error(`Adapter event handler error (${label}):`, err);
2921
- }
2922
- });
2923
- }
2924
- async handleAdapterEvent(type, eventData) {
2925
- if (type === "audio") {
2926
- if (!this.responseAudioStarted) {
2927
- this.responseAudioStarted = true;
2928
- if (this.metricsAcc.turnActive === false) {
2929
- this.metricsAcc.startTurn();
2930
- }
2931
- this.metricsAcc.recordTtsFirstByte();
2932
- }
2933
- let outAudio = eventData;
2934
- if (this.deps.bridge.telephonyProvider === "telnyx") {
2935
- outAudio = resample8kTo16k(mulawToPcm16(outAudio));
2936
- }
2937
- const encoded = outAudio.toString("base64");
2938
- this.deps.bridge.sendAudio(this.ws, encoded, this.streamSid);
2939
- this.chunkCount++;
2940
- this.deps.bridge.sendMark(this.ws, `audio_${this.chunkCount}`, this.streamSid);
2941
- } else if (type === "transcript_input") {
2942
- const inputText = eventData;
2943
- getLogger().debug(`User (${this.deps.bridge.label}): ${sanitizeLogValue(inputText)}`);
2944
- this.history.push({ role: "user", text: inputText, timestamp: Date.now() });
2945
- this.metricsAcc.startTurn();
2946
- this.currentAgentText = "";
2947
- this.responseAudioStarted = false;
2948
- if (this.deps.onTranscript) {
2949
- await this.deps.onTranscript({
2950
- role: "user",
2951
- text: inputText,
2952
- call_id: this.callId,
2953
- history: [...this.history.entries]
2954
- });
2955
- }
2956
- } else if (type === "transcript_output") {
2957
- const outputText = eventData;
2958
- if (outputText) {
2959
- const triggered = checkGuardrails(outputText, this.deps.agent.guardrails);
2960
- if (triggered) {
2961
- getLogger().debug(`Guardrail '${triggered.name}' triggered`);
2962
- if (this.adapter instanceof OpenAIRealtimeAdapter) {
2963
- this.adapter.cancelResponse();
2964
- await this.adapter.sendText(triggered.replacement ?? "I'm sorry, I can't respond to that.");
2965
- }
2966
- }
2967
- this.currentAgentText += outputText;
2968
- }
2969
- } else if (type === "response_done") {
2970
- const responseData = eventData;
2971
- if (responseData) {
2972
- const usage = responseData.usage;
2973
- if (usage) {
2974
- this.metricsAcc.recordRealtimeUsage(usage);
2975
- }
2976
- }
2977
- if (this.currentAgentText) {
2978
- this.history.push({ role: "assistant", text: this.currentAgentText, timestamp: Date.now() });
2979
- const turn = this.metricsAcc.recordTurnComplete(this.currentAgentText);
2980
- this.responseAudioStarted = false;
2981
- if (this.deps.onMetrics) {
2982
- await this.deps.onMetrics({
2983
- call_id: this.callId,
2984
- turn
2985
- });
2986
- }
2987
- this.deps.metricsStore.recordTurn({ call_id: this.callId, turn });
2988
- this.currentAgentText = "";
2989
- } else {
2990
- this.metricsAcc.recordTurnInterrupted();
2991
- this.responseAudioStarted = false;
4907
+ getLogger().error(`Adapter event handler error (${label}):`, err);
4908
+ }
4909
+ });
4910
+ }
4911
+ async handleAdapterEvent(type, eventData) {
4912
+ const handler = this.adapterEventHandlers[type];
4913
+ if (handler) await handler(eventData);
4914
+ }
4915
+ /** Event-type handler dispatch table for the Realtime adapter. */
4916
+ adapterEventHandlers = {
4917
+ audio: async (eventData) => this.onAdapterAudio(eventData),
4918
+ speech_stopped: async () => this.onAdapterSpeechStopped(),
4919
+ transcript_input: async (eventData) => this.onAdapterTranscriptInput(eventData),
4920
+ transcript_output: async (eventData) => this.onAdapterTranscriptOutput(eventData),
4921
+ response_done: async (eventData) => this.onAdapterResponseDone(eventData),
4922
+ speech_started: async () => this.onAdapterSpeechInterrupt(),
4923
+ interruption: async () => this.onAdapterSpeechInterrupt(),
4924
+ function_call: async (eventData) => {
4925
+ if (this.adapter instanceof OpenAIRealtimeAdapter) {
4926
+ await this.handleFunctionCall(eventData);
2992
4927
  }
2993
- } else if (type === "speech_started" || type === "interruption") {
2994
- this.deps.bridge.sendClear(this.ws, this.streamSid);
4928
+ }
4929
+ };
4930
+ async onAdapterAudio(eventData) {
4931
+ if (!this.responseAudioStarted) {
4932
+ this.responseAudioStarted = true;
4933
+ if (this.metricsAcc.turnActive === false) this.metricsAcc.startTurn();
4934
+ this.metricsAcc.recordTtsFirstByte();
4935
+ }
4936
+ const outAudio = eventData;
4937
+ this.deps.bridge.sendAudio(this.ws, outAudio.toString("base64"), this.streamSid);
4938
+ this.chunkCount++;
4939
+ this.deps.bridge.sendMark(this.ws, `audio_${this.chunkCount}`, this.streamSid);
4940
+ }
4941
+ onAdapterSpeechStopped() {
4942
+ if (!this.metricsAcc.turnActive) this.metricsAcc.startTurn();
4943
+ this.currentAgentText = "";
4944
+ this.responseAudioStarted = false;
4945
+ }
4946
+ async onAdapterTranscriptInput(inputText) {
4947
+ getLogger().debug(`User (${this.deps.bridge.label}): ${sanitizeLogValue(inputText)}`);
4948
+ this.history.push({ role: "user", text: inputText, timestamp: Date.now() });
4949
+ if (!this.metricsAcc.turnActive) {
4950
+ this.metricsAcc.startTurn();
4951
+ this.currentAgentText = "";
4952
+ this.responseAudioStarted = false;
4953
+ }
4954
+ this.metricsAcc.recordSttComplete(inputText);
4955
+ if (this.deps.onTranscript) {
4956
+ await this.deps.onTranscript({
4957
+ role: "user",
4958
+ text: inputText,
4959
+ call_id: this.callId,
4960
+ history: [...this.history.entries]
4961
+ });
4962
+ }
4963
+ }
4964
+ async onAdapterTranscriptOutput(outputText) {
4965
+ if (!outputText) return;
4966
+ const triggered = checkGuardrails(outputText, this.deps.agent.guardrails);
4967
+ if (triggered) {
4968
+ getLogger().debug(`Guardrail '${triggered.name}' triggered`);
2995
4969
  if (this.adapter instanceof OpenAIRealtimeAdapter) {
2996
4970
  this.adapter.cancelResponse();
4971
+ await this.adapter.sendText(triggered.replacement ?? "I'm sorry, I can't respond to that.");
2997
4972
  }
2998
- this.metricsAcc.recordTurnInterrupted();
4973
+ }
4974
+ this.currentAgentText += outputText;
4975
+ }
4976
+ async onAdapterResponseDone(responseData) {
4977
+ if (responseData) {
4978
+ const usage = responseData.usage;
4979
+ if (usage) this.metricsAcc.recordRealtimeUsage(usage);
4980
+ }
4981
+ if (this.currentAgentText) {
4982
+ this.history.push({ role: "assistant", text: this.currentAgentText, timestamp: Date.now() });
4983
+ this.responseAudioStarted = false;
4984
+ await this.emitTurnMetrics(this.metricsAcc.recordTurnComplete(this.currentAgentText));
2999
4985
  this.currentAgentText = "";
4986
+ } else {
4987
+ this.metricsAcc.recordTurnInterrupted();
3000
4988
  this.responseAudioStarted = false;
3001
- } else if (type === "function_call" && this.adapter instanceof OpenAIRealtimeAdapter) {
3002
- await this.handleFunctionCall(eventData);
3003
4989
  }
3004
4990
  }
4991
+ onAdapterSpeechInterrupt() {
4992
+ this.deps.bridge.sendClear(this.ws, this.streamSid);
4993
+ if (this.adapter instanceof OpenAIRealtimeAdapter) this.adapter.cancelResponse();
4994
+ this.metricsAcc.recordTurnInterrupted();
4995
+ this.currentAgentText = "";
4996
+ this.responseAudioStarted = false;
4997
+ }
3005
4998
  async handleFunctionCall(fc) {
3006
4999
  const adapter = this.adapter;
3007
5000
  if (fc.name === "transfer_call") {
@@ -3095,7 +5088,7 @@ var StreamHandler = class {
3095
5088
  finalMetrics
3096
5089
  );
3097
5090
  try {
3098
- const { notifyDashboard } = await import("./persistence-CYIGNHSU.mjs");
5091
+ const { notifyDashboard } = await import("./persistence-LQBYQPQQ.mjs");
3099
5092
  notifyDashboard(callEndData);
3100
5093
  } catch {
3101
5094
  }
@@ -3135,6 +5128,279 @@ async function queryDeepgramCost(metricsAcc, deepgramKey, deepgramRequestId) {
3135
5128
  }
3136
5129
  }
3137
5130
 
5131
+ // src/services/call-log.ts
5132
+ import * as crypto3 from "crypto";
5133
+ import * as fs2 from "fs";
5134
+ import { promises as fsp } from "fs";
5135
+ import * as os from "os";
5136
+ import * as path2 from "path";
5137
+ var SCHEMA_VERSION = "1.0";
5138
+ var DEFAULT_RETENTION_DAYS = 30;
5139
+ function xdgDataHome() {
5140
+ return process.env.XDG_DATA_HOME || path2.join(os.homedir(), ".local", "share");
5141
+ }
5142
+ function platformDefaultRoot() {
5143
+ if (process.platform === "darwin") {
5144
+ return path2.join(os.homedir(), "Library", "Application Support", "patter");
5145
+ }
5146
+ if (process.platform === "win32") {
5147
+ const localAppData = process.env.LOCALAPPDATA;
5148
+ if (localAppData) return path2.join(localAppData, "patter");
5149
+ return path2.join(os.homedir(), "AppData", "Local", "patter");
5150
+ }
5151
+ return path2.join(xdgDataHome(), "patter");
5152
+ }
5153
+ function resolveLogRoot(explicit) {
5154
+ const value = explicit ?? process.env.PATTER_LOG_DIR;
5155
+ if (!value) return null;
5156
+ if (value.trim().toLowerCase() === "auto") return platformDefaultRoot();
5157
+ if (value.startsWith("~")) return path2.join(os.homedir(), value.slice(1));
5158
+ return value;
5159
+ }
5160
+ function retentionDays() {
5161
+ const raw = process.env.PATTER_LOG_RETENTION_DAYS;
5162
+ if (raw === void 0) return DEFAULT_RETENTION_DAYS;
5163
+ const parsed = Number.parseInt(raw, 10);
5164
+ if (Number.isNaN(parsed)) return DEFAULT_RETENTION_DAYS;
5165
+ return Math.max(0, parsed);
5166
+ }
5167
+ function redactMode() {
5168
+ const raw = (process.env.PATTER_LOG_REDACT_PHONE || "mask").trim().toLowerCase();
5169
+ if (raw === "full" || raw === "mask" || raw === "hash_only") return raw;
5170
+ return "mask";
5171
+ }
5172
+ function redactPhone(raw) {
5173
+ if (!raw) return "";
5174
+ const mode = redactMode();
5175
+ if (mode === "full") return raw;
5176
+ if (mode === "hash_only") {
5177
+ return "sha256:" + crypto3.createHash("sha256").update(raw, "utf8").digest("hex").slice(0, 16);
5178
+ }
5179
+ return maskPhoneNumber(raw);
5180
+ }
5181
+ function utcIso(tsSeconds) {
5182
+ const ms = tsSeconds !== void 0 ? tsSeconds * 1e3 : Date.now();
5183
+ return new Date(ms).toISOString();
5184
+ }
5185
+ async function atomicWriteJson(filePath, payload) {
5186
+ const dir = path2.dirname(filePath);
5187
+ await fsp.mkdir(dir, { recursive: true });
5188
+ const tmp = path2.join(dir, `.tmp.${process.pid}.${crypto3.randomBytes(4).toString("hex")}.json`);
5189
+ try {
5190
+ const handle = await fsp.open(tmp, "w");
5191
+ try {
5192
+ await handle.writeFile(JSON.stringify(payload, null, 2) + "\n", { encoding: "utf8" });
5193
+ await handle.sync();
5194
+ } finally {
5195
+ await handle.close();
5196
+ }
5197
+ await fsp.rename(tmp, filePath);
5198
+ } catch (err) {
5199
+ try {
5200
+ await fsp.unlink(tmp);
5201
+ } catch {
5202
+ }
5203
+ throw err;
5204
+ }
5205
+ }
5206
+ async function appendJsonl(filePath, record) {
5207
+ await fsp.mkdir(path2.dirname(filePath), { recursive: true });
5208
+ await fsp.appendFile(filePath, JSON.stringify(record) + "\n", { encoding: "utf8" });
5209
+ }
5210
+ var CallLogger = class {
5211
+ root;
5212
+ constructor(root) {
5213
+ if (!root) {
5214
+ this.root = null;
5215
+ return;
5216
+ }
5217
+ const resolved = root.startsWith("~") ? path2.join(os.homedir(), root.slice(1)) : root;
5218
+ try {
5219
+ fs2.mkdirSync(resolved, { recursive: true });
5220
+ this.root = resolved;
5221
+ getLogger().info(`Call logs: ${resolved}`);
5222
+ } catch (err) {
5223
+ getLogger().warn(
5224
+ `Could not create call log root ${resolved}: ${sanitizeLogValue(String(err))}`
5225
+ );
5226
+ this.root = null;
5227
+ }
5228
+ }
5229
+ get enabled() {
5230
+ return this.root !== null;
5231
+ }
5232
+ callDir(callId, startedAtSeconds) {
5233
+ if (this.root === null) return null;
5234
+ const ms = startedAtSeconds !== void 0 ? startedAtSeconds * 1e3 : Date.now();
5235
+ const dt = new Date(ms);
5236
+ const year = String(dt.getUTCFullYear()).padStart(4, "0");
5237
+ const month = String(dt.getUTCMonth() + 1).padStart(2, "0");
5238
+ const day = String(dt.getUTCDate()).padStart(2, "0");
5239
+ const safeId = sanitizeLogValue(callId, 64).replace(/\//g, "_") || "unknown";
5240
+ return path2.join(this.root, "calls", year, month, day, safeId);
5241
+ }
5242
+ async logCallStart(callId, input = {}) {
5243
+ if (!this.enabled) return;
5244
+ const startedAt = Date.now() / 1e3;
5245
+ const dir = this.callDir(callId, startedAt);
5246
+ if (dir === null) return;
5247
+ const metadata = {
5248
+ schema_version: SCHEMA_VERSION,
5249
+ call_id: callId,
5250
+ trace_id: input.traceId ?? null,
5251
+ started_at: utcIso(startedAt),
5252
+ ended_at: null,
5253
+ duration_ms: null,
5254
+ status: "in_progress",
5255
+ caller: redactPhone(input.caller ?? ""),
5256
+ callee: redactPhone(input.callee ?? ""),
5257
+ telephony_provider: input.telephonyProvider ?? "",
5258
+ provider_mode: input.providerMode ?? "",
5259
+ agent: input.agent ?? {},
5260
+ turns: 0,
5261
+ cost: null,
5262
+ latency: null,
5263
+ error: null
5264
+ };
5265
+ try {
5266
+ await atomicWriteJson(path2.join(dir, "metadata.json"), metadata);
5267
+ } catch (err) {
5268
+ getLogger().warn(`call_log write failed (${sanitizeLogValue(callId)}): ${sanitizeLogValue(String(err))}`);
5269
+ }
5270
+ if (crypto3.randomBytes(1)[0] < 5) {
5271
+ this.sweepOldDays();
5272
+ }
5273
+ }
5274
+ async logTurn(callId, turn) {
5275
+ if (!this.enabled) return;
5276
+ const dir = this.callDir(callId);
5277
+ if (dir === null) return;
5278
+ const record = {
5279
+ schema_version: SCHEMA_VERSION,
5280
+ ts: utcIso(typeof turn.timestamp === "number" ? turn.timestamp : void 0),
5281
+ ...turn
5282
+ };
5283
+ try {
5284
+ await appendJsonl(path2.join(dir, "transcript.jsonl"), record);
5285
+ } catch (err) {
5286
+ getLogger().warn(
5287
+ `call_log turn write failed (${sanitizeLogValue(callId)}): ${sanitizeLogValue(String(err))}`
5288
+ );
5289
+ }
5290
+ }
5291
+ async logEvent(callId, eventType, payload = {}) {
5292
+ if (!this.enabled) return;
5293
+ const dir = this.callDir(callId);
5294
+ if (dir === null) return;
5295
+ const record = {
5296
+ schema_version: SCHEMA_VERSION,
5297
+ ts: utcIso(),
5298
+ type: eventType,
5299
+ data: payload
5300
+ };
5301
+ try {
5302
+ await appendJsonl(path2.join(dir, "events.jsonl"), record);
5303
+ } catch (err) {
5304
+ getLogger().warn(
5305
+ `call_log event write failed (${sanitizeLogValue(callId)}): ${sanitizeLogValue(String(err))}`
5306
+ );
5307
+ }
5308
+ }
5309
+ async logCallEnd(callId, input = {}) {
5310
+ if (!this.enabled) return;
5311
+ const dir = this.callDir(callId);
5312
+ if (dir === null) return;
5313
+ const metadataPath = path2.join(dir, "metadata.json");
5314
+ let existing = {};
5315
+ try {
5316
+ existing = JSON.parse(await fsp.readFile(metadataPath, "utf8"));
5317
+ } catch {
5318
+ existing = {
5319
+ schema_version: SCHEMA_VERSION,
5320
+ call_id: callId,
5321
+ started_at: null
5322
+ };
5323
+ }
5324
+ const merged = {
5325
+ ...existing,
5326
+ ended_at: utcIso(),
5327
+ duration_ms: input.durationSeconds !== void 0 ? Math.round(input.durationSeconds * 1e3 * 10) / 10 : null,
5328
+ status: input.status ?? "completed",
5329
+ turns: input.turns ?? null,
5330
+ cost: input.cost ?? null,
5331
+ latency: input.latency ?? null,
5332
+ error: input.error ?? null
5333
+ };
5334
+ try {
5335
+ await atomicWriteJson(metadataPath, merged);
5336
+ } catch (err) {
5337
+ getLogger().warn(
5338
+ `call_log finalize failed (${sanitizeLogValue(callId)}): ${sanitizeLogValue(String(err))}`
5339
+ );
5340
+ }
5341
+ }
5342
+ // --- Retention ---------------------------------------------------------
5343
+ sweepOldDays() {
5344
+ if (this.root === null) return;
5345
+ const days = retentionDays();
5346
+ if (days === 0) return;
5347
+ const cutoff = Date.now() / 1e3 - days * 86400;
5348
+ const callsRoot = path2.join(this.root, "calls");
5349
+ if (!fs2.existsSync(callsRoot)) return;
5350
+ try {
5351
+ for (const yearName of fs2.readdirSync(callsRoot)) {
5352
+ if (!/^\d+$/.test(yearName)) continue;
5353
+ const yearDir = path2.join(callsRoot, yearName);
5354
+ if (!fs2.statSync(yearDir).isDirectory()) continue;
5355
+ for (const monthName of fs2.readdirSync(yearDir)) {
5356
+ if (!/^\d+$/.test(monthName)) continue;
5357
+ const monthDir = path2.join(yearDir, monthName);
5358
+ if (!fs2.statSync(monthDir).isDirectory()) continue;
5359
+ for (const dayName of fs2.readdirSync(monthDir)) {
5360
+ if (!/^\d+$/.test(dayName)) continue;
5361
+ const dayDir = path2.join(monthDir, dayName);
5362
+ const y = Number.parseInt(yearName, 10);
5363
+ const m = Number.parseInt(monthName, 10);
5364
+ const d = Number.parseInt(dayName, 10);
5365
+ const ts = Date.UTC(y, m - 1, d) / 1e3;
5366
+ if (ts < cutoff) {
5367
+ rmTree(dayDir);
5368
+ }
5369
+ }
5370
+ try {
5371
+ if (fs2.readdirSync(monthDir).length === 0) fs2.rmdirSync(monthDir);
5372
+ } catch {
5373
+ }
5374
+ }
5375
+ try {
5376
+ if (fs2.readdirSync(yearDir).length === 0) fs2.rmdirSync(yearDir);
5377
+ } catch {
5378
+ }
5379
+ }
5380
+ } catch (err) {
5381
+ getLogger().debug(`call_log sweep failed: ${sanitizeLogValue(String(err))}`);
5382
+ }
5383
+ }
5384
+ };
5385
+ function rmTree(target) {
5386
+ try {
5387
+ for (const child of fs2.readdirSync(target)) {
5388
+ const childPath = path2.join(target, child);
5389
+ const stat = fs2.lstatSync(childPath);
5390
+ if (stat.isDirectory()) {
5391
+ rmTree(childPath);
5392
+ } else {
5393
+ try {
5394
+ fs2.unlinkSync(childPath);
5395
+ } catch {
5396
+ }
5397
+ }
5398
+ }
5399
+ fs2.rmdirSync(target);
5400
+ } catch {
5401
+ }
5402
+ }
5403
+
3138
5404
  // src/server.ts
3139
5405
  var TRANSFER_CALL_TOOL = {
3140
5406
  name: "transfer_call",
@@ -3171,37 +5437,76 @@ function validateWebhookUrl(url) {
3171
5437
  if (!["http:", "https:"].includes(parsed.protocol)) {
3172
5438
  throw new Error(`Invalid webhook URL scheme: ${parsed.protocol}`);
3173
5439
  }
3174
- const hostname = parsed.hostname;
3175
- const blocked = [
3176
- /^127\./,
3177
- /^10\./,
3178
- /^172\.(1[6-9]|2\d|3[01])\./,
3179
- /^192\.168\./,
3180
- /^169\.254\./,
3181
- /^0\./,
3182
- /^::1$/,
3183
- /^localhost$/i,
3184
- /^metadata\.google\.internal$/i
3185
- ];
3186
- if (blocked.some((re) => re.test(hostname))) {
3187
- throw new Error(`Webhook URL blocked: ${hostname} is a private/internal address`);
5440
+ const rawHost = parsed.hostname;
5441
+ const host = rawHost.replace(/^\[/, "").replace(/\]$/, "").toLowerCase();
5442
+ const BLOCKED_HOSTNAMES = /* @__PURE__ */ new Set([
5443
+ "localhost",
5444
+ "ip6-localhost",
5445
+ "ip6-loopback",
5446
+ "metadata",
5447
+ "metadata.google.internal",
5448
+ "metadata.azure.com"
5449
+ ]);
5450
+ if (BLOCKED_HOSTNAMES.has(host)) {
5451
+ throw new Error(`Webhook URL blocked: ${rawHost} is a private/internal address`);
5452
+ }
5453
+ const IPV4_RE = /^(\d{1,3})\.(\d{1,3})\.(\d{1,3})\.(\d{1,3})$/;
5454
+ const v4 = IPV4_RE.exec(host);
5455
+ if (v4) {
5456
+ const oct = v4.slice(1, 5).map((s) => parseInt(s, 10));
5457
+ if (oct.some((n) => n < 0 || n > 255)) {
5458
+ throw new Error(`Webhook URL blocked: ${rawHost} is not a valid IPv4 address`);
5459
+ }
5460
+ const [a, b] = oct;
5461
+ if (a === 0 || // 0.0.0.0/8 (any 0.x)
5462
+ a === 10 || // 10.0.0.0/8
5463
+ a === 127 || // 127.0.0.0/8 loopback
5464
+ a === 169 && b === 254 || // 169.254.0.0/16 link-local
5465
+ a === 172 && b >= 16 && b <= 31 || // 172.16.0.0/12
5466
+ a === 192 && b === 168) {
5467
+ throw new Error(`Webhook URL blocked: ${rawHost} is a private/internal address`);
5468
+ }
5469
+ return;
5470
+ }
5471
+ if (host.includes(":")) {
5472
+ if (host === "::1" || host === "::") {
5473
+ throw new Error(`Webhook URL blocked: ${rawHost} is a private/internal address`);
5474
+ }
5475
+ if (/^fc[0-9a-f]{0,2}:/.test(host) || /^fd[0-9a-f]{0,2}:/.test(host)) {
5476
+ throw new Error(`Webhook URL blocked: ${rawHost} is a private/internal address`);
5477
+ }
5478
+ if (/^fe[89ab][0-9a-f]?:/.test(host)) {
5479
+ throw new Error(`Webhook URL blocked: ${rawHost} is a private/internal address`);
5480
+ }
3188
5481
  }
3189
5482
  }
3190
5483
  function validateTelnyxSignature(rawBody, signature, timestamp, publicKey, toleranceSec = 300) {
3191
5484
  try {
3192
5485
  const ts = parseInt(timestamp, 10);
3193
5486
  if (!Number.isFinite(ts)) return false;
3194
- const ageMs = Date.now() - ts;
5487
+ const tsMs = ts < 1e12 ? ts * 1e3 : ts;
5488
+ const ageMs = Date.now() - tsMs;
3195
5489
  if (ageMs < 0 || ageMs > toleranceSec * 1e3) return false;
3196
5490
  const payload = `${timestamp}|${rawBody}`;
3197
5491
  const keyBuffer = Buffer.from(publicKey, "base64");
3198
- const sigBuffer = Buffer.from(signature, "base64");
3199
- const keyObject = crypto3.createPublicKey({
5492
+ const keyObject = crypto4.createPublicKey({
3200
5493
  key: keyBuffer,
3201
5494
  format: "der",
3202
5495
  type: "spki"
3203
5496
  });
3204
- return crypto3.verify(null, Buffer.from(payload), keyObject, sigBuffer);
5497
+ for (const rawSig of signature.split(",")) {
5498
+ const trimmed = rawSig.trim();
5499
+ if (!trimmed) continue;
5500
+ try {
5501
+ const sigBuffer = Buffer.from(trimmed, "base64");
5502
+ if (crypto4.verify(null, Buffer.from(payload), keyObject, sigBuffer)) {
5503
+ return true;
5504
+ }
5505
+ } catch {
5506
+ continue;
5507
+ }
5508
+ }
5509
+ return false;
3205
5510
  } catch {
3206
5511
  return false;
3207
5512
  }
@@ -3211,9 +5516,12 @@ function validateTwilioSid(sid, prefix = "CA") {
3211
5516
  }
3212
5517
  function validateTwilioSignature(url, params, signature, authToken) {
3213
5518
  const data = url + Object.keys(params).sort().reduce((acc, key) => acc + key + (params[key] ?? ""), "");
3214
- const expected = crypto3.createHmac("sha1", authToken).update(data).digest("base64");
5519
+ const expected = crypto4.createHmac("sha1", authToken).update(data).digest("base64");
3215
5520
  try {
3216
- return crypto3.timingSafeEqual(Buffer.from(signature), Buffer.from(expected));
5521
+ const sigBuf = Buffer.from(signature);
5522
+ const expBuf = Buffer.from(expected);
5523
+ if (sigBuf.length !== expBuf.length) return false;
5524
+ return crypto4.timingSafeEqual(sigBuf, expBuf);
3217
5525
  } catch {
3218
5526
  return false;
3219
5527
  }
@@ -3247,8 +5555,6 @@ function buildAIAdapter(config, agent, resolvedPrompt) {
3247
5555
  engine.apiKey,
3248
5556
  engine.agentId,
3249
5557
  agent.voice ?? "EXAVITQu4vr4xnSDxMaL",
3250
- "eleven_turbo_v2_5",
3251
- agent.language ?? "en",
3252
5558
  agent.firstMessage ?? ""
3253
5559
  );
3254
5560
  }
@@ -3353,7 +5659,7 @@ function isValidTelnyxTransferTarget(target) {
3353
5659
  if (/^\+[1-9]\d{6,14}$/.test(target)) return true;
3354
5660
  return /^sips?:[^\s@]+(@[^\s]+)?$/i.test(target);
3355
5661
  }
3356
- var TELNYX_DTMF_ALLOWED = new Set("0123456789*#ABCDabcd");
5662
+ var TELNYX_DTMF_ALLOWED = new Set("0123456789*#ABCDabcdwW");
3357
5663
  var TELNYX_DTMF_DURATION_MS = 250;
3358
5664
  async function sleep(ms) {
3359
5665
  if (ms <= 0) return;
@@ -3379,7 +5685,7 @@ var TelnyxBridge = class {
3379
5685
  return;
3380
5686
  }
3381
5687
  const telnyxKey = this.config.telnyxKey ?? "";
3382
- await fetch(`https://api.telnyx.com/v2/calls/${callId}/actions/transfer`, {
5688
+ await fetch(`https://api.telnyx.com/v2/calls/${encodeURIComponent(callId)}/actions/transfer`, {
3383
5689
  method: "POST",
3384
5690
  headers: { "Content-Type": "application/json", "Authorization": `Bearer ${telnyxKey}` },
3385
5691
  body: JSON.stringify({ to: toNumber })
@@ -3403,7 +5709,7 @@ var TelnyxBridge = class {
3403
5709
  }
3404
5710
  const duration = Math.max(100, Math.min(500, TELNYX_DTMF_DURATION_MS));
3405
5711
  for (let i = 0; i < filtered.length; i += 1) {
3406
- await fetch(`https://api.telnyx.com/v2/calls/${callId}/actions/send_dtmf`, {
5712
+ await fetch(`https://api.telnyx.com/v2/calls/${encodeURIComponent(callId)}/actions/send_dtmf`, {
3407
5713
  method: "POST",
3408
5714
  headers: { "Content-Type": "application/json", "Authorization": `Bearer ${telnyxKey}` },
3409
5715
  body: JSON.stringify({ digits: filtered[i], duration_millis: duration })
@@ -3418,7 +5724,7 @@ var TelnyxBridge = class {
3418
5724
  const telnyxKey = this.config.telnyxKey ?? "";
3419
5725
  if (!telnyxKey || !callId) return;
3420
5726
  try {
3421
- const resp = await fetch(`https://api.telnyx.com/v2/calls/${callId}/actions/record_start`, {
5727
+ const resp = await fetch(`https://api.telnyx.com/v2/calls/${encodeURIComponent(callId)}/actions/record_start`, {
3422
5728
  method: "POST",
3423
5729
  headers: { "Content-Type": "application/json", "Authorization": `Bearer ${telnyxKey}` },
3424
5730
  body: JSON.stringify({ format: "mp3", channels: "single" })
@@ -3436,7 +5742,7 @@ var TelnyxBridge = class {
3436
5742
  const telnyxKey = this.config.telnyxKey ?? "";
3437
5743
  if (!telnyxKey || !callId) return;
3438
5744
  try {
3439
- const resp = await fetch(`https://api.telnyx.com/v2/calls/${callId}/actions/record_stop`, {
5745
+ const resp = await fetch(`https://api.telnyx.com/v2/calls/${encodeURIComponent(callId)}/actions/record_stop`, {
3440
5746
  method: "POST",
3441
5747
  headers: { "Content-Type": "application/json", "Authorization": `Bearer ${telnyxKey}` },
3442
5748
  body: JSON.stringify({})
@@ -3450,11 +5756,11 @@ var TelnyxBridge = class {
3450
5756
  getLogger().warn(`Telnyx record_stop error: ${String(e)}`);
3451
5757
  }
3452
5758
  }
3453
- async endCall(callId, ws) {
5759
+ async endCall(callId, _ws) {
3454
5760
  const telnyxKey = this.config.telnyxKey ?? "";
3455
5761
  if (callId && telnyxKey) {
3456
5762
  try {
3457
- await fetch(`https://api.telnyx.com/v2/calls/${callId}/actions/hangup`, {
5763
+ await fetch(`https://api.telnyx.com/v2/calls/${encodeURIComponent(callId)}/actions/hangup`, {
3458
5764
  method: "POST",
3459
5765
  headers: { "Content-Type": "application/json", "Authorization": `Bearer ${telnyxKey}` },
3460
5766
  body: JSON.stringify({})
@@ -3462,7 +5768,6 @@ var TelnyxBridge = class {
3462
5768
  } catch {
3463
5769
  }
3464
5770
  }
3465
- ws.close();
3466
5771
  }
3467
5772
  createStt(agent) {
3468
5773
  return createSTT(agent);
@@ -3471,7 +5776,7 @@ var TelnyxBridge = class {
3471
5776
  if (this.config.telnyxKey && callId) {
3472
5777
  try {
3473
5778
  const resp = await fetch(
3474
- `https://api.telnyx.com/v2/calls/${callId}`,
5779
+ `https://api.telnyx.com/v2/calls/${encodeURIComponent(callId)}`,
3475
5780
  {
3476
5781
  headers: { "Authorization": `Bearer ${this.config.telnyxKey}` },
3477
5782
  signal: AbortSignal.timeout(5e3)
@@ -3506,6 +5811,17 @@ var EmbeddedServer = class {
3506
5811
  this.dashboardToken = dashboardToken;
3507
5812
  this.metricsStore = new MetricsStore();
3508
5813
  this.pricing = mergePricing(pricingOverrides);
5814
+ const logRoot = resolveLogRoot();
5815
+ if (logRoot) {
5816
+ try {
5817
+ const restored = this.metricsStore.hydrate(logRoot);
5818
+ if (restored > 0) {
5819
+ getLogger().info(`Dashboard hydrated ${restored} call(s) from ${logRoot}`);
5820
+ }
5821
+ } catch (err) {
5822
+ getLogger().warn(`Dashboard hydration failed: ${String(err)}`);
5823
+ }
5824
+ }
3509
5825
  }
3510
5826
  server = null;
3511
5827
  wss = null;
@@ -3514,6 +5830,8 @@ var EmbeddedServer = class {
3514
5830
  metricsStore;
3515
5831
  pricing;
3516
5832
  remoteHandler = new RemoteMessageHandler();
5833
+ /** Opt-in per-call filesystem logger (set via PATTER_LOG_DIR). */
5834
+ callLogger = new CallLogger(resolveLogRoot());
3517
5835
  /** Active WebSocket connections tracked for graceful shutdown. */
3518
5836
  activeConnections = /* @__PURE__ */ new Set();
3519
5837
  activeCallIds = /* @__PURE__ */ new Map();
@@ -3522,6 +5840,18 @@ var EmbeddedServer = class {
3522
5840
  if (!webhookUrlPattern.test(this.config.webhookUrl)) {
3523
5841
  throw new Error(`Invalid webhookUrl: must be a hostname with no protocol prefix or path (got: '${this.config.webhookUrl}')`);
3524
5842
  }
5843
+ if (this.config.requireSignature !== false) {
5844
+ if (this.config.telephonyProvider === "twilio" && !this.config.twilioToken) {
5845
+ getLogger().warn(
5846
+ "Twilio webhook enforcement ACTIVE but twilioToken is empty \u2014 webhooks will 503. Set requireSignature=false for local dev."
5847
+ );
5848
+ }
5849
+ if (this.config.telephonyProvider === "telnyx" && !this.config.telnyxPublicKey) {
5850
+ getLogger().warn(
5851
+ "Telnyx webhook enforcement ACTIVE but telnyxPublicKey is empty \u2014 webhooks will 503. Set requireSignature=false for local dev."
5852
+ );
5853
+ }
5854
+ }
3525
5855
  const app = express();
3526
5856
  app.use((req, _res, next) => {
3527
5857
  if (req.path === "/webhooks/telnyx/voice") {
@@ -3561,6 +5891,10 @@ var EmbeddedServer = class {
3561
5891
  res.status(403).send("Invalid signature");
3562
5892
  return;
3563
5893
  }
5894
+ } else if (this.config.requireSignature !== false) {
5895
+ getLogger().error("Twilio webhook rejected: twilioToken not configured and requireSignature is not false");
5896
+ res.status(503).send("Webhook signature required");
5897
+ return;
3564
5898
  }
3565
5899
  const body = req.body;
3566
5900
  const callSid = sanitizeLogValue(body["CallSid"] ?? "");
@@ -3586,6 +5920,10 @@ var EmbeddedServer = class {
3586
5920
  res.status(403).send("Invalid signature");
3587
5921
  return;
3588
5922
  }
5923
+ } else if (this.config.requireSignature !== false) {
5924
+ getLogger().error("Twilio webhook rejected: twilioToken not configured and requireSignature is not false");
5925
+ res.status(503).send("Webhook signature required");
5926
+ return;
3589
5927
  }
3590
5928
  const body = req.body;
3591
5929
  const recordingSid = sanitizeLogValue(body["RecordingSid"] ?? "");
@@ -3603,6 +5941,10 @@ var EmbeddedServer = class {
3603
5941
  res.status(403).send("Invalid signature");
3604
5942
  return;
3605
5943
  }
5944
+ } else if (this.config.requireSignature !== false) {
5945
+ getLogger().error("Twilio webhook rejected: twilioToken not configured and requireSignature is not false");
5946
+ res.status(503).send("Webhook signature required");
5947
+ return;
3606
5948
  }
3607
5949
  const body = req.body;
3608
5950
  const answeredBy = body["AnsweredBy"] ?? "";
@@ -3645,6 +5987,10 @@ var EmbeddedServer = class {
3645
5987
  res.status(403).send("Invalid signature");
3646
5988
  return;
3647
5989
  }
5990
+ } else if (this.config.requireSignature !== false) {
5991
+ getLogger().error("Twilio webhook rejected: twilioToken not configured and requireSignature is not false");
5992
+ res.status(503).send("Webhook signature required");
5993
+ return;
3648
5994
  } else if (!this.twilioTokenWarningLogged) {
3649
5995
  this.twilioTokenWarningLogged = true;
3650
5996
  getLogger().warn("Twilio webhook signature validation disabled \u2014 set twilioToken for production");
@@ -3671,6 +6017,9 @@ var EmbeddedServer = class {
3671
6017
  getLogger().warn("Telnyx webhook rejected: invalid or missing Ed25519 signature");
3672
6018
  return res.status(403).send("Invalid signature");
3673
6019
  }
6020
+ } else if (this.config.requireSignature !== false) {
6021
+ getLogger().error("Telnyx webhook rejected: telnyxPublicKey not configured and requireSignature is not false");
6022
+ return res.status(503).send("Webhook signature required");
3674
6023
  } else if (!this.telnyxSigWarningLogged) {
3675
6024
  this.telnyxSigWarningLogged = true;
3676
6025
  getLogger().warn("Telnyx webhook signature verification is disabled. Set telnyxPublicKey in LocalOptions for production use.");
@@ -3698,6 +6047,17 @@ var EmbeddedServer = class {
3698
6047
  }
3699
6048
  return res.status(200).send();
3700
6049
  }
6050
+ if (eventType === "call.machine.detection.ended") {
6051
+ const amdCallId = payload.call_control_id ?? "";
6052
+ const amdResult = String(payload.result ?? "");
6053
+ getLogger().info(
6054
+ `Telnyx AMD result for ${sanitizeLogValue(amdCallId)}: ${sanitizeLogValue(amdResult)}`
6055
+ );
6056
+ if (amdCallId && (amdResult === "machine" || amdResult === "machine_detected")) {
6057
+ await this.handleTelnyxAmdVoicemail(amdCallId);
6058
+ }
6059
+ return res.status(200).send();
6060
+ }
3701
6061
  const callControlId = payload.call_control_id ?? "";
3702
6062
  if (!callControlId) {
3703
6063
  getLogger().warn("Telnyx webhook rejected: missing call_control_id");
@@ -3715,27 +6075,18 @@ var EmbeddedServer = class {
3715
6075
  };
3716
6076
  try {
3717
6077
  if (eventType === "call.initiated") {
3718
- getLogger().info(`Telnyx call.initiated ${callControlId} \u2014 answering`);
3719
- const resp = await fetch(`${apiBase}/calls/${encodeURIComponent(callControlId)}/actions/answer`, {
3720
- method: "POST",
3721
- headers: authHeaders,
3722
- body: JSON.stringify({}),
3723
- signal: AbortSignal.timeout(1e4)
3724
- });
3725
- if (!resp.ok) {
3726
- getLogger().warn(`Telnyx answer failed: ${resp.status} ${(await resp.text()).slice(0, 200)}`);
3727
- }
3728
- } else if (eventType === "call.answered") {
3729
6078
  const caller = payload.from ?? "";
3730
6079
  const callee = payload.to ?? "";
3731
6080
  const streamUrl = `wss://${this.config.webhookUrl}/ws/stream/${encodeURIComponent(callControlId)}?caller=${encodeURIComponent(caller)}&callee=${encodeURIComponent(callee)}`;
3732
- getLogger().info(`Telnyx call.answered ${callControlId} \u2014 starting stream`);
3733
- const resp = await fetch(`${apiBase}/calls/${encodeURIComponent(callControlId)}/actions/streaming_start`, {
6081
+ getLogger().info(`Telnyx call.initiated ${callControlId} \u2014 answering with inline stream`);
6082
+ const resp = await fetch(`${apiBase}/calls/${encodeURIComponent(callControlId)}/actions/answer`, {
3734
6083
  method: "POST",
3735
6084
  headers: authHeaders,
3736
6085
  body: JSON.stringify({
3737
6086
  stream_url: streamUrl,
3738
- stream_track: "both_tracks",
6087
+ // ``inbound_track`` halves WS upstream bandwidth — outbound
6088
+ // echo was always filtered downstream anyway.
6089
+ stream_track: "inbound_track",
3739
6090
  stream_bidirectional_mode: "rtp",
3740
6091
  stream_bidirectional_codec: "PCMU",
3741
6092
  stream_bidirectional_sampling_rate: 8e3,
@@ -3744,8 +6095,10 @@ var EmbeddedServer = class {
3744
6095
  signal: AbortSignal.timeout(1e4)
3745
6096
  });
3746
6097
  if (!resp.ok) {
3747
- getLogger().warn(`Telnyx streaming_start failed: ${resp.status} ${(await resp.text()).slice(0, 200)}`);
6098
+ getLogger().warn(`Telnyx answer failed: ${resp.status} ${(await resp.text()).slice(0, 200)}`);
3748
6099
  }
6100
+ } else if (eventType === "call.answered") {
6101
+ getLogger().debug(`Telnyx call.answered ${callControlId} \u2014 stream already active (inline)`);
3749
6102
  } else {
3750
6103
  getLogger().debug(`Telnyx event ignored: ${eventType}`);
3751
6104
  }
@@ -3798,6 +6151,12 @@ var EmbeddedServer = class {
3798
6151
  getLogger().info(`Server on port ${port}`);
3799
6152
  getLogger().info(`Webhook: https://${this.config.webhookUrl}`);
3800
6153
  getLogger().info(`Phone: ${this.config.phoneNumber}`);
6154
+ const model = this.agent.model ?? "";
6155
+ if (model && model !== "gpt-4o-mini-realtime-preview" && model.includes("realtime")) {
6156
+ getLogger().warn(
6157
+ `Agent uses "${sanitizeLogValue(model)}" but DEFAULT_PRICING.openai_realtime is calibrated for "gpt-4o-mini-realtime-preview". Pass Patter({ pricing: { openai_realtime: {...} } }) to set rates for this model, otherwise the dashboard cost display will under-report.`
6158
+ );
6159
+ }
3801
6160
  if (this.dashboard) {
3802
6161
  console.log("\n\u2500\u2500\u2500\u2500 Dashboard \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500");
3803
6162
  getLogger().info(`URL: http://127.0.0.1:${port}/`);
@@ -3812,11 +6171,63 @@ var EmbeddedServer = class {
3812
6171
  });
3813
6172
  });
3814
6173
  }
6174
+ /**
6175
+ * Handle a Telnyx ``call.machine.detection.ended`` event when AMD returns
6176
+ * ``machine``: speak the configured voicemail message via ``actions/speak``
6177
+ * then hang up via ``actions/hangup``. Mirrors the Python
6178
+ * ``handle_amd_result`` helper.
6179
+ */
6180
+ async handleTelnyxAmdVoicemail(callControlId) {
6181
+ const telnyxKey = this.config.telnyxKey ?? "";
6182
+ if (!callControlId || !telnyxKey || !this.voicemailMessage) {
6183
+ return;
6184
+ }
6185
+ const encoded = encodeURIComponent(callControlId);
6186
+ const headers = {
6187
+ "Content-Type": "application/json",
6188
+ Authorization: `Bearer ${telnyxKey}`
6189
+ };
6190
+ const estimatedMs = Math.min(
6191
+ 3e4,
6192
+ Math.ceil(this.voicemailMessage.length / 14 * 1e3) + 1500
6193
+ );
6194
+ try {
6195
+ const speakResp = await fetch(
6196
+ `https://api.telnyx.com/v2/calls/${encoded}/actions/speak`,
6197
+ {
6198
+ method: "POST",
6199
+ headers,
6200
+ body: JSON.stringify({
6201
+ payload: this.voicemailMessage,
6202
+ voice: "female",
6203
+ language: "en-US"
6204
+ }),
6205
+ signal: AbortSignal.timeout(1e4)
6206
+ }
6207
+ );
6208
+ if (!speakResp.ok) {
6209
+ getLogger().warn(
6210
+ `Telnyx voicemail speak failed: ${speakResp.status} ${(await speakResp.text()).slice(0, 200)}`
6211
+ );
6212
+ }
6213
+ await new Promise((resolve) => setTimeout(resolve, estimatedMs));
6214
+ await fetch(`https://api.telnyx.com/v2/calls/${encoded}/actions/hangup`, {
6215
+ method: "POST",
6216
+ headers,
6217
+ body: JSON.stringify({}),
6218
+ signal: AbortSignal.timeout(1e4)
6219
+ });
6220
+ getLogger().info(`Voicemail dropped for Telnyx call ${sanitizeLogValue(callControlId)}`);
6221
+ } catch (e) {
6222
+ getLogger().warn(`Could not drop voicemail (Telnyx): ${String(e)}`);
6223
+ }
6224
+ }
3815
6225
  // ---------------------------------------------------------------------------
3816
6226
  // Stream handler helpers
3817
6227
  // ---------------------------------------------------------------------------
3818
6228
  /** Build the shared StreamHandlerDeps for the current server configuration. */
3819
6229
  buildStreamHandlerDeps(bridge) {
6230
+ const [wrappedStart, wrappedMetrics, wrappedEnd] = this.wrapLoggingCallbacks(bridge);
3820
6231
  return {
3821
6232
  config: this.config,
3822
6233
  agent: this.agent,
@@ -3824,17 +6235,84 @@ var EmbeddedServer = class {
3824
6235
  metricsStore: this.metricsStore,
3825
6236
  pricing: this.pricing,
3826
6237
  remoteHandler: this.remoteHandler,
3827
- onCallStart: this.onCallStart,
3828
- onCallEnd: this.onCallEnd,
6238
+ onCallStart: wrappedStart,
6239
+ onCallEnd: wrappedEnd,
3829
6240
  onTranscript: this.onTranscript,
3830
6241
  onMessage: this.onMessage,
3831
- onMetrics: this.onMetrics,
6242
+ onMetrics: wrappedMetrics,
3832
6243
  recording: this.recording,
3833
6244
  buildAIAdapter: (resolvedPrompt) => buildAIAdapter(this.config, this.agent, resolvedPrompt),
3834
6245
  sanitizeVariables,
3835
6246
  resolveVariables
3836
6247
  };
3837
6248
  }
6249
+ /**
6250
+ * Wrap user-supplied call lifecycle callbacks with CallLogger side-effects.
6251
+ * When PATTER_LOG_DIR is unset, the logger is disabled and the returned
6252
+ * wrappers degrade to just calling the user callbacks (still wrapped so
6253
+ * the logger stays consistent with future configuration changes).
6254
+ */
6255
+ wrapLoggingCallbacks(bridge) {
6256
+ const logger = this.callLogger;
6257
+ const agent = this.agent;
6258
+ const userStart = this.onCallStart;
6259
+ const userMetrics = this.onMetrics;
6260
+ const userEnd = this.onCallEnd;
6261
+ const agentSnapshot = () => {
6262
+ const snap = {
6263
+ provider: agent.provider,
6264
+ model: agent.model,
6265
+ voice: agent.voice,
6266
+ language: agent.language
6267
+ };
6268
+ if (agent.stt && agent.tts && !("engine" in agent && agent.engine)) {
6269
+ snap.mode = "pipeline";
6270
+ }
6271
+ return Object.fromEntries(Object.entries(snap).filter(([, v]) => v !== void 0));
6272
+ };
6273
+ const wrappedStart = async (data) => {
6274
+ if (logger.enabled) {
6275
+ const callId = typeof data.call_id === "string" ? data.call_id : "";
6276
+ void logger.logCallStart(callId, {
6277
+ caller: typeof data.caller === "string" ? data.caller : "",
6278
+ callee: typeof data.callee === "string" ? data.callee : "",
6279
+ telephonyProvider: bridge.telephonyProvider,
6280
+ providerMode: agent.provider ?? "",
6281
+ agent: agentSnapshot()
6282
+ }).catch((err) => getLogger().error(`call_log start error: ${String(err)}`));
6283
+ }
6284
+ if (userStart) await userStart(data);
6285
+ };
6286
+ const wrappedMetrics = async (data) => {
6287
+ if (logger.enabled) {
6288
+ const callId = typeof data.call_id === "string" ? data.call_id : "";
6289
+ const turn = data.turn;
6290
+ if (turn && typeof turn === "object") {
6291
+ void logger.logTurn(callId, turn).catch((err) => getLogger().error(`call_log turn error: ${String(err)}`));
6292
+ }
6293
+ }
6294
+ if (userMetrics) await userMetrics(data);
6295
+ };
6296
+ const wrappedEnd = async (data) => {
6297
+ if (logger.enabled) {
6298
+ const callId = typeof data.call_id === "string" ? data.call_id : "";
6299
+ const metricsObj = data.metrics ?? null;
6300
+ const latency = metricsObj ? {
6301
+ p50_ms: metricsObj.latency_p50?.total_ms ?? null,
6302
+ p95_ms: metricsObj.latency_p95?.total_ms ?? null,
6303
+ p99_ms: metricsObj.latency_p99?.total_ms ?? null
6304
+ } : null;
6305
+ void logger.logCallEnd(callId, {
6306
+ durationSeconds: metricsObj?.duration_seconds,
6307
+ turns: metricsObj?.turns?.length,
6308
+ cost: metricsObj?.cost ?? null,
6309
+ latency
6310
+ }).catch((err) => getLogger().error(`call_log end error: ${String(err)}`));
6311
+ }
6312
+ if (userEnd) await userEnd(data);
6313
+ };
6314
+ return [wrappedStart, wrappedMetrics, wrappedEnd];
6315
+ }
3838
6316
  // ---------------------------------------------------------------------------
3839
6317
  // Twilio WebSocket message parser (thin layer)
3840
6318
  // ---------------------------------------------------------------------------
@@ -3863,6 +6341,8 @@ var EmbeddedServer = class {
3863
6341
  const payload = data.media?.payload ?? "";
3864
6342
  handler.handleAudio(Buffer.from(payload, "base64"));
3865
6343
  } else if (event === "mark") {
6344
+ const markName = String(data.mark?.name ?? "");
6345
+ if (markName) await handler.onMark(markName);
3866
6346
  } else if (event === "dtmf") {
3867
6347
  const digit = data.dtmf?.digit ?? "";
3868
6348
  await handler.handleDtmf(digit);
@@ -3998,19 +6478,145 @@ var EmbeddedServer = class {
3998
6478
  };
3999
6479
 
4000
6480
  // src/llm-loop.ts
6481
+ var DEFAULT_TOOL_MAX_RETRIES = 2;
6482
+ var DEFAULT_TOOL_RETRY_DELAY_MS = 500;
6483
+ var DEFAULT_TOOL_TIMEOUT_MS = 1e4;
6484
+ var TOOL_MAX_RESPONSE_BYTES = 1 * 1024 * 1024;
6485
+ var DefaultToolExecutor = class {
6486
+ maxRetries;
6487
+ retryDelayMs;
6488
+ requestTimeoutMs;
6489
+ constructor(opts = {}) {
6490
+ this.maxRetries = opts.maxRetries ?? DEFAULT_TOOL_MAX_RETRIES;
6491
+ this.retryDelayMs = opts.retryDelayMs ?? DEFAULT_TOOL_RETRY_DELAY_MS;
6492
+ this.requestTimeoutMs = opts.requestTimeoutMs ?? DEFAULT_TOOL_TIMEOUT_MS;
6493
+ }
6494
+ async execute(toolDef, args, callContext) {
6495
+ if (toolDef.handler) {
6496
+ try {
6497
+ return await toolDef.handler(args, callContext);
6498
+ } catch (e) {
6499
+ return JSON.stringify({
6500
+ error: `Tool handler error: ${String(e)}`,
6501
+ fallback: true
6502
+ });
6503
+ }
6504
+ }
6505
+ if (toolDef.webhookUrl) {
6506
+ try {
6507
+ validateWebhookUrl(toolDef.webhookUrl);
6508
+ } catch (e) {
6509
+ return JSON.stringify({ error: `Tool webhook URL rejected: ${String(e)}` });
6510
+ }
6511
+ const callId = typeof callContext.call_id === "string" ? callContext.call_id : "";
6512
+ return await withSpan(
6513
+ SPAN_TOOL,
6514
+ {
6515
+ "patter.tool.name": toolDef.name,
6516
+ "patter.tool.transport": "webhook",
6517
+ "patter.call.id": callId
6518
+ },
6519
+ async (span) => {
6520
+ const totalAttempts = this.maxRetries + 1;
6521
+ for (let attempt = 0; attempt < totalAttempts; attempt++) {
6522
+ span.setAttribute("patter.tool.attempt", attempt + 1);
6523
+ try {
6524
+ const resp = await fetch(toolDef.webhookUrl, {
6525
+ method: "POST",
6526
+ headers: { "Content-Type": "application/json" },
6527
+ body: JSON.stringify({
6528
+ tool: toolDef.name,
6529
+ arguments: args,
6530
+ ...callContext,
6531
+ attempt: attempt + 1
6532
+ }),
6533
+ signal: AbortSignal.timeout(this.requestTimeoutMs)
6534
+ });
6535
+ if (!resp.ok) throw new Error(`HTTP ${resp.status}`);
6536
+ const result = JSON.stringify(await resp.json());
6537
+ if (result.length > TOOL_MAX_RESPONSE_BYTES) {
6538
+ return JSON.stringify({
6539
+ error: `Webhook response too large: ${result.length} bytes (max ${TOOL_MAX_RESPONSE_BYTES})`,
6540
+ fallback: true
6541
+ });
6542
+ }
6543
+ return result;
6544
+ } catch (e) {
6545
+ if (attempt < totalAttempts - 1) {
6546
+ getLogger().warn(
6547
+ `Tool webhook '${toolDef.name}' failed (attempt ${attempt + 1}), retrying: ${String(e)}`
6548
+ );
6549
+ await new Promise((r) => setTimeout(r, this.retryDelayMs));
6550
+ } else {
6551
+ span.recordException(e);
6552
+ return JSON.stringify({
6553
+ error: `Tool failed after ${totalAttempts} attempts: ${String(e)}`,
6554
+ fallback: true
6555
+ });
6556
+ }
6557
+ }
6558
+ }
6559
+ return JSON.stringify({
6560
+ error: `Tool '${toolDef.name}' exited retry loop unexpectedly`,
6561
+ fallback: true
6562
+ });
6563
+ }
6564
+ );
6565
+ }
6566
+ return JSON.stringify({
6567
+ error: `No handler or webhookUrl for tool '${toolDef.name}'`,
6568
+ fallback: true
6569
+ });
6570
+ }
6571
+ };
4001
6572
  var OpenAILLMProvider = class {
4002
6573
  apiKey;
4003
6574
  model;
4004
- constructor(apiKey, model) {
6575
+ temperature;
6576
+ maxTokens;
6577
+ responseFormat;
6578
+ parallelToolCalls;
6579
+ toolChoice;
6580
+ seed;
6581
+ topP;
6582
+ frequencyPenalty;
6583
+ presencePenalty;
6584
+ stop;
6585
+ constructor(apiKey, model, sampling = {}) {
4005
6586
  this.apiKey = apiKey;
4006
6587
  this.model = model;
6588
+ this.temperature = sampling.temperature;
6589
+ this.maxTokens = sampling.maxTokens;
6590
+ this.responseFormat = sampling.responseFormat;
6591
+ this.parallelToolCalls = sampling.parallelToolCalls;
6592
+ this.toolChoice = sampling.toolChoice;
6593
+ this.seed = sampling.seed;
6594
+ this.topP = sampling.topP;
6595
+ this.frequencyPenalty = sampling.frequencyPenalty;
6596
+ this.presencePenalty = sampling.presencePenalty;
6597
+ this.stop = sampling.stop;
4007
6598
  }
4008
6599
  async *stream(messages, tools) {
4009
6600
  const body = {
4010
6601
  model: this.model,
4011
6602
  messages,
4012
- stream: true
6603
+ stream: true,
6604
+ // Ask OpenAI to include a final usage chunk so we can attribute token
6605
+ // cost. Without this the dashboard shows LLM cost = 0 for OpenAI.
6606
+ stream_options: { include_usage: true }
4013
6607
  };
6608
+ if (this.temperature !== void 0) body.temperature = this.temperature;
6609
+ if (this.maxTokens !== void 0) {
6610
+ body.max_completion_tokens = this.maxTokens;
6611
+ }
6612
+ if (this.responseFormat !== void 0) body.response_format = this.responseFormat;
6613
+ if (this.parallelToolCalls !== void 0) body.parallel_tool_calls = this.parallelToolCalls;
6614
+ if (this.toolChoice !== void 0) body.tool_choice = this.toolChoice;
6615
+ if (this.seed !== void 0) body.seed = this.seed;
6616
+ if (this.topP !== void 0) body.top_p = this.topP;
6617
+ if (this.frequencyPenalty !== void 0) body.frequency_penalty = this.frequencyPenalty;
6618
+ if (this.presencePenalty !== void 0) body.presence_penalty = this.presencePenalty;
6619
+ if (this.stop !== void 0) body.stop = this.stop;
4014
6620
  if (tools) {
4015
6621
  body.tools = tools;
4016
6622
  }
@@ -4049,6 +6655,16 @@ var OpenAILLMProvider = class {
4049
6655
  } catch {
4050
6656
  continue;
4051
6657
  }
6658
+ if (chunk.usage) {
6659
+ const cached = chunk.usage.prompt_tokens_details?.cached_tokens ?? 0;
6660
+ const uncachedInput = Math.max(0, (chunk.usage.prompt_tokens ?? 0) - cached);
6661
+ yield {
6662
+ type: "usage",
6663
+ inputTokens: uncachedInput,
6664
+ outputTokens: chunk.usage.completion_tokens,
6665
+ cacheReadInputTokens: cached
6666
+ };
6667
+ }
4052
6668
  const delta = chunk.choices?.[0]?.delta;
4053
6669
  if (!delta) continue;
4054
6670
  if (delta.content) {
@@ -4075,10 +6691,28 @@ var LLMLoop = class {
4075
6691
  tools;
4076
6692
  openaiTools;
4077
6693
  toolMap;
6694
+ toolExecutor;
6695
+ eventBus;
6696
+ // Fix 10: track provider/model so usage chunks can be attributed for billing.
6697
+ _providerName;
6698
+ _modelName;
4078
6699
  constructor(apiKey, model, systemPrompt, tools, llmProvider) {
4079
6700
  this.provider = llmProvider ?? new OpenAILLMProvider(apiKey, model);
4080
6701
  this.systemPrompt = systemPrompt;
6702
+ if (llmProvider) {
6703
+ const key = llmProvider.constructor?.providerKey;
6704
+ if (key) {
6705
+ this._providerName = key;
6706
+ } else {
6707
+ const stripped = (llmProvider.constructor?.name ?? "custom").replace(/LLMProvider$/i, "").replace(/LLM$/i, "").replace(/Provider$/i, "").toLowerCase();
6708
+ this._providerName = stripped || "custom";
6709
+ }
6710
+ } else {
6711
+ this._providerName = "openai";
6712
+ }
6713
+ this._modelName = model;
4081
6714
  this.tools = tools ?? null;
6715
+ this.toolExecutor = new DefaultToolExecutor();
4082
6716
  this.toolMap = /* @__PURE__ */ new Map();
4083
6717
  this.openaiTools = null;
4084
6718
  if (this.tools && this.tools.length > 0) {
@@ -4096,13 +6730,40 @@ var LLMLoop = class {
4096
6730
  }
4097
6731
  }
4098
6732
  }
6733
+ /**
6734
+ * Swap in a custom tool executor (e.g. different retry policy, metrics
6735
+ * wrapping, tenant-aware fan-out). The default is ``DefaultToolExecutor``.
6736
+ */
6737
+ setToolExecutor(executor) {
6738
+ this.toolExecutor = executor;
6739
+ }
6740
+ /**
6741
+ * Wire an :class:`EventBus` so the loop emits ``llm_chunk`` per text
6742
+ * token and ``tool_call_started`` the first time each tool-call index
6743
+ * appears. Set to ``undefined`` to disable.
6744
+ */
6745
+ setEventBus(bus) {
6746
+ this.eventBus = bus;
6747
+ }
4099
6748
  /**
4100
6749
  * Stream LLM response tokens, handling tool calls automatically.
4101
6750
  * Yields text tokens as they arrive from the LLM.
6751
+ *
6752
+ * @param metrics Optional usage recorder — when provided, usage chunks
6753
+ * from the provider are forwarded to {@link LlmUsageRecorder.recordLlmUsage}
6754
+ * so token costs are included in the call cost breakdown (fix 10).
4102
6755
  */
4103
- async *run(userText, history, callContext) {
4104
- const messages = this.buildMessages(history, userText);
6756
+ async *run(userText, history, callContext, metrics, hookExecutor, hookCtx) {
6757
+ let messages = this.buildMessages(history, userText);
4105
6758
  const maxIterations = 10;
6759
+ if (hookExecutor && hookCtx) {
6760
+ messages = await hookExecutor.runBeforeLlm(
6761
+ messages,
6762
+ hookCtx
6763
+ );
6764
+ }
6765
+ const hasAfterLlm = Boolean(hookExecutor?.hasAfterLlm() && hookCtx);
6766
+ const allEmittedText = [];
4106
6767
  for (let iter = 0; iter < maxIterations; iter++) {
4107
6768
  const toolCallsAccumulated = /* @__PURE__ */ new Map();
4108
6769
  const textParts = [];
@@ -4110,12 +6771,31 @@ var LLMLoop = class {
4110
6771
  for await (const chunk of this.provider.stream(messages, this.openaiTools)) {
4111
6772
  if (chunk.type === "text" && chunk.content) {
4112
6773
  textParts.push(chunk.content);
4113
- yield chunk.content;
6774
+ this.eventBus?.emit("llm_chunk", { text: chunk.content, iteration: iter });
6775
+ if (hasAfterLlm) {
6776
+ allEmittedText.push(chunk.content);
6777
+ } else {
6778
+ yield chunk.content;
6779
+ }
6780
+ } else if (chunk.type === "usage") {
6781
+ metrics?.recordLlmUsage(
6782
+ this._providerName,
6783
+ this._modelName,
6784
+ chunk.inputTokens ?? 0,
6785
+ chunk.outputTokens ?? 0,
6786
+ chunk.cacheReadInputTokens ?? 0,
6787
+ chunk.cacheCreationInputTokens ?? 0
6788
+ );
4114
6789
  } else if (chunk.type === "tool_call") {
4115
6790
  hasToolCalls = true;
4116
6791
  const idx = chunk.index ?? 0;
4117
6792
  if (!toolCallsAccumulated.has(idx)) {
4118
6793
  toolCallsAccumulated.set(idx, { id: "", name: "", arguments: "" });
6794
+ this.eventBus?.emit("tool_call_started", {
6795
+ index: idx,
6796
+ name: chunk.name ?? "",
6797
+ args: chunk.arguments ?? ""
6798
+ });
4119
6799
  }
4120
6800
  const acc = toolCallsAccumulated.get(idx);
4121
6801
  if (chunk.id) acc.id = chunk.id;
@@ -4123,7 +6803,14 @@ var LLMLoop = class {
4123
6803
  if (chunk.arguments) acc.arguments += chunk.arguments;
4124
6804
  }
4125
6805
  }
4126
- if (!hasToolCalls) return;
6806
+ if (!hasToolCalls) {
6807
+ if (hasAfterLlm && hookExecutor && hookCtx) {
6808
+ const finalText = allEmittedText.join("");
6809
+ const rewritten = await hookExecutor.runAfterLlm(finalText, hookCtx);
6810
+ if (rewritten) yield rewritten;
6811
+ }
6812
+ return;
6813
+ }
4127
6814
  const assistantMsg = {
4128
6815
  role: "assistant",
4129
6816
  content: textParts.join("") || null,
@@ -4162,49 +6849,7 @@ var LLMLoop = class {
4162
6849
  if (!toolDef) {
4163
6850
  return JSON.stringify({ error: `Unknown tool: ${toolName}` });
4164
6851
  }
4165
- if (toolDef.handler) {
4166
- try {
4167
- return await toolDef.handler(args, callContext);
4168
- } catch (e) {
4169
- return JSON.stringify({ error: `Tool handler error: ${String(e)}` });
4170
- }
4171
- }
4172
- if (toolDef.webhookUrl) {
4173
- try {
4174
- validateWebhookUrl(toolDef.webhookUrl);
4175
- } catch (e) {
4176
- return JSON.stringify({ error: `Tool webhook URL rejected: ${String(e)}` });
4177
- }
4178
- for (let attempt = 0; attempt < 3; attempt++) {
4179
- try {
4180
- const resp = await fetch(toolDef.webhookUrl, {
4181
- method: "POST",
4182
- headers: { "Content-Type": "application/json" },
4183
- body: JSON.stringify({
4184
- tool: toolName,
4185
- arguments: args,
4186
- ...callContext,
4187
- attempt: attempt + 1
4188
- }),
4189
- signal: AbortSignal.timeout(1e4)
4190
- });
4191
- if (!resp.ok) throw new Error(`HTTP ${resp.status}`);
4192
- const result = JSON.stringify(await resp.json());
4193
- const MAX_RESPONSE_BYTES2 = 1 * 1024 * 1024;
4194
- if (result.length > MAX_RESPONSE_BYTES2) {
4195
- return JSON.stringify({ error: `Webhook response too large: ${result.length} bytes (max ${MAX_RESPONSE_BYTES2})`, fallback: true });
4196
- }
4197
- return result;
4198
- } catch (e) {
4199
- if (attempt < 2) {
4200
- await new Promise((r) => setTimeout(r, 500));
4201
- } else {
4202
- return JSON.stringify({ error: `Tool failed after 3 attempts: ${String(e)}` });
4203
- }
4204
- }
4205
- }
4206
- }
4207
- return JSON.stringify({ error: `No handler or webhookUrl for tool '${toolName}'` });
6852
+ return this.toolExecutor.execute(toolDef, args, callContext);
4208
6853
  }
4209
6854
  buildMessages(history, userText) {
4210
6855
  const messages = [
@@ -4391,6 +7036,11 @@ var TestSession = class {
4391
7036
  };
4392
7037
 
4393
7038
  export {
7039
+ PatterError,
7040
+ PatterConnectionError,
7041
+ AuthenticationError,
7042
+ ProvisionError,
7043
+ RateLimitError,
4394
7044
  OpenAIRealtimeAdapter,
4395
7045
  ElevenLabsConvAIAdapter,
4396
7046
  DEFAULT_PRICING,
@@ -4412,14 +7062,31 @@ export {
4412
7062
  CallMetricsAccumulator,
4413
7063
  mulawToPcm16,
4414
7064
  pcm16ToMulaw,
7065
+ PcmCarry,
7066
+ StatefulResampler,
7067
+ createResampler16kTo8k,
7068
+ createResampler8kTo16k,
7069
+ createResampler24kTo16k,
4415
7070
  resample8kTo16k,
4416
7071
  resample16kTo8k,
4417
7072
  resample24kTo16k,
7073
+ SPAN_CALL,
7074
+ SPAN_STT,
7075
+ SPAN_LLM,
7076
+ SPAN_TTS,
7077
+ SPAN_TOOL,
7078
+ SPAN_ENDPOINT,
7079
+ SPAN_BARGEIN,
7080
+ initTracing,
7081
+ isTracingEnabled,
7082
+ startSpan,
7083
+ DefaultToolExecutor,
4418
7084
  OpenAILLMProvider,
4419
7085
  LLMLoop,
4420
7086
  DEFAULT_MIN_SENTENCE_LEN,
4421
7087
  SentenceChunker,
4422
7088
  PipelineHookExecutor,
7089
+ EventBus,
4423
7090
  EmbeddedServer,
4424
7091
  TestSession
4425
7092
  };