getpatter 0.5.3 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +1 -1
- package/README.md +5 -2
- package/dist/aec-PJJMUM5E.mjs +228 -0
- package/dist/{banner-3GNZ6VQK.mjs → banner-UYW6UM3J.mjs} +4 -1
- package/dist/{carrier-config-33HQ2W4V.mjs → carrier-config-4ZKVYAWV.mjs} +5 -2
- package/dist/{chunk-AFUYSNDH.mjs → chunk-6GR5MHHQ.mjs} +9 -0
- package/dist/chunk-CYLJVT5G.mjs +7031 -0
- package/dist/{chunk-FIFIWBL7.mjs → chunk-JUQ5WQTQ.mjs} +2157 -883
- package/dist/{chunk-VJVDG4V5.mjs → chunk-MVOQFAEO.mjs} +5 -0
- package/dist/chunk-N565J3CF.mjs +69 -0
- package/dist/chunk-X3364LSI.mjs +363 -0
- package/dist/{chunk-SEMKNPCD.mjs → chunk-XS45BAQL.mjs} +5 -1
- package/dist/cli.js +32 -621
- package/dist/client-2GJVZT42.mjs +8935 -0
- package/dist/dashboard/ui.html +63 -0
- package/dist/{dist-YRCCJQ26.mjs → dist-RYMPCILF.mjs} +28 -2
- package/dist/index.d.mts +2199 -240
- package/dist/index.d.ts +2199 -240
- package/dist/index.js +28942 -7073
- package/dist/index.mjs +2337 -447
- package/dist/{node-cron-6PRPSBG5.mjs → node-cron-JFWQQRBU.mjs} +23 -2
- package/dist/persistence-LVIAHESK.mjs +7 -0
- package/dist/silero-vad-YLCXT5GQ.mjs +7 -0
- package/dist/streamableHttp-WKNGHDVO.mjs +1496 -0
- package/dist/test-mode-Y7YG5LFZ.mjs +8 -0
- package/dist/tunnel-43CHWPVQ.mjs +8 -0
- package/package.json +7 -7
- package/src/dashboard/ui.html +63 -0
- package/dist/chunk-QHHBUCMT.mjs +0 -25
- package/dist/persistence-LQBYQPQQ.mjs +0 -7
- package/dist/test-mode-MVJ3SKG4.mjs +0 -8
- package/dist/tunnel-UVR3PPAU.mjs +0 -8
|
@@ -1,23 +1,66 @@
|
|
|
1
1
|
import {
|
|
2
2
|
getLogger
|
|
3
|
-
} from "./chunk-
|
|
3
|
+
} from "./chunk-MVOQFAEO.mjs";
|
|
4
4
|
import {
|
|
5
|
-
|
|
6
|
-
|
|
5
|
+
__dirname,
|
|
6
|
+
__require,
|
|
7
|
+
init_esm_shims
|
|
8
|
+
} from "./chunk-N565J3CF.mjs";
|
|
7
9
|
|
|
8
10
|
// src/test-mode.ts
|
|
11
|
+
init_esm_shims();
|
|
9
12
|
import { createInterface } from "readline";
|
|
10
13
|
|
|
14
|
+
// src/llm-loop.ts
|
|
15
|
+
init_esm_shims();
|
|
16
|
+
|
|
11
17
|
// src/server.ts
|
|
18
|
+
init_esm_shims();
|
|
12
19
|
import crypto4 from "crypto";
|
|
13
20
|
import express from "express";
|
|
14
21
|
import { createServer } from "http";
|
|
15
22
|
import { WebSocketServer } from "ws";
|
|
16
23
|
|
|
17
24
|
// src/providers/openai-realtime.ts
|
|
25
|
+
init_esm_shims();
|
|
18
26
|
import WebSocket from "ws";
|
|
27
|
+
var OpenAIRealtimeAudioFormat = {
|
|
28
|
+
G711_ULAW: "g711_ulaw",
|
|
29
|
+
G711_ALAW: "g711_alaw",
|
|
30
|
+
PCM16: "pcm16"
|
|
31
|
+
};
|
|
32
|
+
var OpenAIRealtimeModel = {
|
|
33
|
+
GPT_REALTIME: "gpt-realtime",
|
|
34
|
+
GPT_REALTIME_2: "gpt-realtime-2",
|
|
35
|
+
GPT_REALTIME_MINI: "gpt-realtime-mini",
|
|
36
|
+
GPT_4O_REALTIME_PREVIEW: "gpt-4o-realtime-preview",
|
|
37
|
+
GPT_4O_MINI_REALTIME_PREVIEW: "gpt-4o-mini-realtime-preview"
|
|
38
|
+
};
|
|
39
|
+
var OpenAIVoice = {
|
|
40
|
+
ALLOY: "alloy",
|
|
41
|
+
ASH: "ash",
|
|
42
|
+
BALLAD: "ballad",
|
|
43
|
+
CORAL: "coral",
|
|
44
|
+
ECHO: "echo",
|
|
45
|
+
FABLE: "fable",
|
|
46
|
+
NOVA: "nova",
|
|
47
|
+
ONYX: "onyx",
|
|
48
|
+
SAGE: "sage",
|
|
49
|
+
SHIMMER: "shimmer",
|
|
50
|
+
VERSE: "verse"
|
|
51
|
+
};
|
|
52
|
+
var OpenAITranscriptionModel = {
|
|
53
|
+
WHISPER_1: "whisper-1",
|
|
54
|
+
GPT_4O_TRANSCRIBE: "gpt-4o-transcribe",
|
|
55
|
+
GPT_4O_MINI_TRANSCRIBE: "gpt-4o-mini-transcribe",
|
|
56
|
+
GPT_REALTIME_WHISPER: "gpt-realtime-whisper"
|
|
57
|
+
};
|
|
58
|
+
var OpenAIRealtimeVADType = {
|
|
59
|
+
SERVER_VAD: "server_vad",
|
|
60
|
+
SEMANTIC_VAD: "semantic_vad"
|
|
61
|
+
};
|
|
19
62
|
var OpenAIRealtimeAdapter = class {
|
|
20
|
-
constructor(apiKey, model =
|
|
63
|
+
constructor(apiKey, model = OpenAIRealtimeModel.GPT_REALTIME_MINI, voice = OpenAIVoice.ALLOY, instructions = "", tools, audioFormat = OpenAIRealtimeAudioFormat.G711_ULAW, options = {}) {
|
|
21
64
|
this.apiKey = apiKey;
|
|
22
65
|
this.model = model;
|
|
23
66
|
this.voice = voice;
|
|
@@ -26,6 +69,12 @@ var OpenAIRealtimeAdapter = class {
|
|
|
26
69
|
this.audioFormat = audioFormat;
|
|
27
70
|
this.options = options;
|
|
28
71
|
}
|
|
72
|
+
apiKey;
|
|
73
|
+
model;
|
|
74
|
+
voice;
|
|
75
|
+
instructions;
|
|
76
|
+
tools;
|
|
77
|
+
audioFormat;
|
|
29
78
|
ws = null;
|
|
30
79
|
eventCallbacks = /* @__PURE__ */ new Set();
|
|
31
80
|
messageListenerAttached = false;
|
|
@@ -34,7 +83,17 @@ var OpenAIRealtimeAdapter = class {
|
|
|
34
83
|
// barge-in (see ``cancelResponse``) — matches the Python adapter.
|
|
35
84
|
currentResponseItemId = null;
|
|
36
85
|
currentResponseAudioMs = 0;
|
|
86
|
+
// Wall-clock timestamp (Date.now()) of the first ``response.audio.delta``
|
|
87
|
+
// received since the current response item started. ``cancelResponse``
|
|
88
|
+
// uses this to bound ``audio_end_ms`` to what the caller could plausibly
|
|
89
|
+
// have heard — generated audio frequently arrives 5-10x real-time, so
|
|
90
|
+
// ``audio_end_ms`` driven purely by the per-chunk byte counter overshoots
|
|
91
|
+
// reality and leaves phantom assistant text on the conversation. The
|
|
92
|
+
// wall-clock cap corresponds to the maximum playback that real-time TTS
|
|
93
|
+
// could have produced, which is what the user actually heard.
|
|
94
|
+
currentResponseFirstAudioAt = null;
|
|
37
95
|
options;
|
|
96
|
+
/** Open the Realtime WebSocket and apply the session configuration. */
|
|
38
97
|
async connect() {
|
|
39
98
|
const url = `wss://api.openai.com/v1/realtime?model=${encodeURIComponent(this.model)}`;
|
|
40
99
|
this.ws = new WebSocket(url, {
|
|
@@ -63,12 +122,14 @@ var OpenAIRealtimeAdapter = class {
|
|
|
63
122
|
voice: this.voice,
|
|
64
123
|
instructions: this.instructions || "You are a helpful voice assistant. Be concise.",
|
|
65
124
|
turn_detection: {
|
|
66
|
-
type: this.options.vadType ??
|
|
125
|
+
type: this.options.vadType ?? OpenAIRealtimeVADType.SERVER_VAD,
|
|
67
126
|
threshold: 0.5,
|
|
68
127
|
prefix_padding_ms: 300,
|
|
69
128
|
silence_duration_ms: this.options.silenceDurationMs ?? 300
|
|
70
129
|
},
|
|
71
|
-
input_audio_transcription: {
|
|
130
|
+
input_audio_transcription: {
|
|
131
|
+
model: this.options.inputAudioTranscriptionModel ?? OpenAITranscriptionModel.WHISPER_1
|
|
132
|
+
}
|
|
72
133
|
};
|
|
73
134
|
if (this.options.temperature !== void 0) config.temperature = this.options.temperature;
|
|
74
135
|
if (this.options.maxResponseOutputTokens !== void 0) {
|
|
@@ -76,13 +137,22 @@ var OpenAIRealtimeAdapter = class {
|
|
|
76
137
|
}
|
|
77
138
|
if (this.options.modalities !== void 0) config.modalities = this.options.modalities;
|
|
78
139
|
if (this.options.toolChoice !== void 0) config.tool_choice = this.options.toolChoice;
|
|
140
|
+
if (this.options.reasoningEffort !== void 0) {
|
|
141
|
+
config.reasoning = { effort: this.options.reasoningEffort };
|
|
142
|
+
}
|
|
79
143
|
if (this.tools?.length) {
|
|
80
|
-
config.tools = this.tools.map((t) =>
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
144
|
+
config.tools = this.tools.map((t) => {
|
|
145
|
+
const def = {
|
|
146
|
+
type: "function",
|
|
147
|
+
name: t.name,
|
|
148
|
+
description: t.description,
|
|
149
|
+
parameters: t.parameters
|
|
150
|
+
};
|
|
151
|
+
if (t.strict === true) {
|
|
152
|
+
def.strict = true;
|
|
153
|
+
}
|
|
154
|
+
return def;
|
|
155
|
+
});
|
|
86
156
|
}
|
|
87
157
|
ws.send(JSON.stringify({ type: "session.update", session: config }));
|
|
88
158
|
} else if (msg.type === "session.updated") {
|
|
@@ -124,6 +194,7 @@ var OpenAIRealtimeAdapter = class {
|
|
|
124
194
|
}, 2e4);
|
|
125
195
|
this.ensureMessageListener();
|
|
126
196
|
}
|
|
197
|
+
/** Append a base64-encoded audio chunk to the realtime input buffer. */
|
|
127
198
|
sendAudio(mulawAudio) {
|
|
128
199
|
if (!this.ws || this.ws.readyState !== WebSocket.OPEN) return;
|
|
129
200
|
this.ws.send(JSON.stringify({ type: "input_audio_buffer.append", audio: mulawAudio.toString("base64") }));
|
|
@@ -140,6 +211,7 @@ var OpenAIRealtimeAdapter = class {
|
|
|
140
211
|
this.eventCallbacks.add(callback);
|
|
141
212
|
this.ensureMessageListener();
|
|
142
213
|
}
|
|
214
|
+
/** Remove a previously registered {@link onEvent} callback. */
|
|
143
215
|
offEvent(callback) {
|
|
144
216
|
this.eventCallbacks.delete(callback);
|
|
145
217
|
}
|
|
@@ -166,6 +238,9 @@ var OpenAIRealtimeAdapter = class {
|
|
|
166
238
|
if (t === "response.audio.delta") {
|
|
167
239
|
const buf = Buffer.from(data.delta ?? "", "base64");
|
|
168
240
|
this.currentResponseAudioMs += estimateAudioMs(buf, this.audioFormat);
|
|
241
|
+
if (this.currentResponseFirstAudioAt === null) {
|
|
242
|
+
this.currentResponseFirstAudioAt = Date.now();
|
|
243
|
+
}
|
|
169
244
|
dispatch("audio", buf);
|
|
170
245
|
} else if (t === "response.audio_transcript.delta") {
|
|
171
246
|
dispatch("transcript_output", data.delta);
|
|
@@ -174,6 +249,7 @@ var OpenAIRealtimeAdapter = class {
|
|
|
174
249
|
if (itemId) {
|
|
175
250
|
this.currentResponseItemId = itemId;
|
|
176
251
|
this.currentResponseAudioMs = 0;
|
|
252
|
+
this.currentResponseFirstAudioAt = null;
|
|
177
253
|
}
|
|
178
254
|
} else if (t === "input_audio_buffer.speech_started") {
|
|
179
255
|
dispatch("speech_started", null);
|
|
@@ -186,6 +262,7 @@ var OpenAIRealtimeAdapter = class {
|
|
|
186
262
|
} else if (t === "response.done") {
|
|
187
263
|
this.currentResponseItemId = null;
|
|
188
264
|
this.currentResponseAudioMs = 0;
|
|
265
|
+
this.currentResponseFirstAudioAt = null;
|
|
189
266
|
dispatch("response_done", data.response ?? null);
|
|
190
267
|
} else if (t === "error") {
|
|
191
268
|
dispatch("error", data.error);
|
|
@@ -204,22 +281,44 @@ var OpenAIRealtimeAdapter = class {
|
|
|
204
281
|
dispatch("error", { type: "socket_error", message: err?.message ?? String(err) });
|
|
205
282
|
});
|
|
206
283
|
}
|
|
284
|
+
/** Truncate the in-flight assistant turn and cancel the active response.
|
|
285
|
+
*
|
|
286
|
+
* ``audio_end_ms`` MUST reflect what the caller actually heard, not what
|
|
287
|
+
* the server generated. OpenAI streams audio at 5-10x real-time, so the
|
|
288
|
+
* byte-derived counter overstates playback whenever the consumer cleared
|
|
289
|
+
* its playout buffer (e.g. ``send_clear``) before the audio reached the
|
|
290
|
+
* speaker. We bound the truncate point by wall-clock time since the first
|
|
291
|
+
* chunk of this response — that's the physical maximum a 1x real-time
|
|
292
|
+
* playback could have produced. Without this cap, OpenAI keeps the full
|
|
293
|
+
* generated assistant text on the transcript, and the model replays /
|
|
294
|
+
* resumes from it on the next turn — manifesting as re-greetings and
|
|
295
|
+
* mid-sentence fragments after a barge-in storm.
|
|
296
|
+
*/
|
|
207
297
|
cancelResponse() {
|
|
208
298
|
if (!this.ws) return;
|
|
209
299
|
if (this.currentResponseItemId) {
|
|
300
|
+
let audioEndMs = this.currentResponseAudioMs;
|
|
301
|
+
if (this.currentResponseFirstAudioAt !== null) {
|
|
302
|
+
const elapsedMs = Date.now() - this.currentResponseFirstAudioAt;
|
|
303
|
+
audioEndMs = Math.min(audioEndMs, Math.max(elapsedMs, 0));
|
|
304
|
+
}
|
|
210
305
|
try {
|
|
211
306
|
this.ws.send(JSON.stringify({
|
|
212
307
|
type: "conversation.item.truncate",
|
|
213
308
|
item_id: this.currentResponseItemId,
|
|
214
309
|
content_index: 0,
|
|
215
|
-
audio_end_ms:
|
|
310
|
+
audio_end_ms: audioEndMs
|
|
216
311
|
}));
|
|
217
312
|
} catch (err) {
|
|
218
313
|
getLogger().debug?.(`conversation.item.truncate failed: ${String(err)}`);
|
|
219
314
|
}
|
|
220
315
|
}
|
|
221
316
|
this.ws.send(JSON.stringify({ type: "response.cancel" }));
|
|
317
|
+
this.currentResponseItemId = null;
|
|
318
|
+
this.currentResponseAudioMs = 0;
|
|
319
|
+
this.currentResponseFirstAudioAt = null;
|
|
222
320
|
}
|
|
321
|
+
/** Inject a user text turn and request a new response. */
|
|
223
322
|
async sendText(text) {
|
|
224
323
|
this.ws?.send(JSON.stringify({
|
|
225
324
|
type: "conversation.item.create",
|
|
@@ -227,6 +326,30 @@ var OpenAIRealtimeAdapter = class {
|
|
|
227
326
|
}));
|
|
228
327
|
this.ws?.send(JSON.stringify({ type: "response.create" }));
|
|
229
328
|
}
|
|
329
|
+
/**
|
|
330
|
+
* Make the AI speak ``text`` as its opening line.
|
|
331
|
+
*
|
|
332
|
+
* Triggers ``response.create`` with explicit ``instructions`` that force
|
|
333
|
+
* the model to render ``text`` verbatim as its first audio utterance.
|
|
334
|
+
* This is the correct semantics for ``Agent.firstMessage`` per its
|
|
335
|
+
* docstring ("What the AI says when the callee answers").
|
|
336
|
+
*
|
|
337
|
+
* Without this, ``sendText(firstMessage)`` would inject ``text`` as
|
|
338
|
+
* ``role: user`` and the AI would *reply* to its own greeting, producing
|
|
339
|
+
* role-confused openings (e.g. a receptionist agent responding "I'd like
|
|
340
|
+
* to schedule a haircut" because it took its own first_message as a
|
|
341
|
+
* customer cue).
|
|
342
|
+
*/
|
|
343
|
+
async sendFirstMessage(text) {
|
|
344
|
+
this.ws?.send(JSON.stringify({
|
|
345
|
+
type: "response.create",
|
|
346
|
+
response: {
|
|
347
|
+
modalities: ["audio", "text"],
|
|
348
|
+
instructions: `Say exactly the following sentence as your first turn and nothing else: "${text}"`
|
|
349
|
+
}
|
|
350
|
+
}));
|
|
351
|
+
}
|
|
352
|
+
/** Submit a tool/function-call result and request the next response. */
|
|
230
353
|
async sendFunctionResult(callId, result) {
|
|
231
354
|
this.ws?.send(JSON.stringify({
|
|
232
355
|
type: "conversation.item.create",
|
|
@@ -234,6 +357,7 @@ var OpenAIRealtimeAdapter = class {
|
|
|
234
357
|
}));
|
|
235
358
|
this.ws?.send(JSON.stringify({ type: "response.create" }));
|
|
236
359
|
}
|
|
360
|
+
/** Stop the heartbeat, drop listeners, and close the Realtime WebSocket. */
|
|
237
361
|
close() {
|
|
238
362
|
if (this.heartbeat) {
|
|
239
363
|
clearInterval(this.heartbeat);
|
|
@@ -247,14 +371,16 @@ var OpenAIRealtimeAdapter = class {
|
|
|
247
371
|
};
|
|
248
372
|
function estimateAudioMs(chunk, format) {
|
|
249
373
|
if (chunk.length === 0) return 0;
|
|
250
|
-
if (format ===
|
|
251
|
-
|
|
374
|
+
if (format === OpenAIRealtimeAudioFormat.G711_ULAW || format === OpenAIRealtimeAudioFormat.G711_ALAW)
|
|
375
|
+
return Math.floor(chunk.length / 8);
|
|
376
|
+
if (format === OpenAIRealtimeAudioFormat.PCM16) {
|
|
252
377
|
return Math.floor(chunk.length / 48);
|
|
253
378
|
}
|
|
254
379
|
return 0;
|
|
255
380
|
}
|
|
256
381
|
|
|
257
382
|
// src/providers/elevenlabs-convai.ts
|
|
383
|
+
init_esm_shims();
|
|
258
384
|
import WebSocket2 from "ws";
|
|
259
385
|
var ELEVENLABS_CONVAI_URL = "wss://api.elevenlabs.io/v1/convai/conversation";
|
|
260
386
|
var ELEVENLABS_SIGNED_URL = "https://api.elevenlabs.io/v1/convai/conversation/get-signed-url";
|
|
@@ -366,6 +492,7 @@ var ElevenLabsConvAIAdapter = class _ElevenLabsConvAIAdapter {
|
|
|
366
492
|
}
|
|
367
493
|
return data.signed_url;
|
|
368
494
|
}
|
|
495
|
+
/** Open the ConvAI WebSocket and send the conversation init payload. */
|
|
369
496
|
async connect() {
|
|
370
497
|
let wsUrl;
|
|
371
498
|
let wsOptions;
|
|
@@ -533,6 +660,7 @@ var ElevenLabsConvAIAdapter = class _ElevenLabsConvAIAdapter {
|
|
|
533
660
|
return;
|
|
534
661
|
}
|
|
535
662
|
}
|
|
663
|
+
/** Send a caller-side audio chunk to ConvAI as a base64 `user_audio_chunk`. */
|
|
536
664
|
sendAudio(audioBytes) {
|
|
537
665
|
if (!this.ws || this.ws.readyState !== WebSocket2.OPEN) return;
|
|
538
666
|
this.ws.send(
|
|
@@ -541,9 +669,11 @@ var ElevenLabsConvAIAdapter = class _ElevenLabsConvAIAdapter {
|
|
|
541
669
|
})
|
|
542
670
|
);
|
|
543
671
|
}
|
|
672
|
+
/** Register the event callback that receives ConvAI server messages. */
|
|
544
673
|
onEvent(callback) {
|
|
545
674
|
this.eventCallback = callback;
|
|
546
675
|
}
|
|
676
|
+
/** Close the ConvAI WebSocket and release the event callback. */
|
|
547
677
|
async close() {
|
|
548
678
|
this.clearSilenceTimer();
|
|
549
679
|
if (!this.ws) {
|
|
@@ -582,6 +712,7 @@ var ElevenLabsConvAIAdapter = class _ElevenLabsConvAIAdapter {
|
|
|
582
712
|
};
|
|
583
713
|
|
|
584
714
|
// src/provider-factory.ts
|
|
715
|
+
init_esm_shims();
|
|
585
716
|
async function createSTT(agent) {
|
|
586
717
|
return agent.stt ?? null;
|
|
587
718
|
}
|
|
@@ -590,44 +721,166 @@ async function createTTS(agent) {
|
|
|
590
721
|
}
|
|
591
722
|
|
|
592
723
|
// src/pricing.ts
|
|
724
|
+
init_esm_shims();
|
|
725
|
+
var PricingUnit = {
|
|
726
|
+
MINUTE: "minute",
|
|
727
|
+
THOUSAND_CHARS: "1k_chars",
|
|
728
|
+
TOKEN: "token"
|
|
729
|
+
};
|
|
730
|
+
function resolveProviderRates(providerConfig, model) {
|
|
731
|
+
if (!providerConfig) return { unit: "" };
|
|
732
|
+
const { models, ...base } = providerConfig;
|
|
733
|
+
if (!model || !models) return { ...base };
|
|
734
|
+
let override = models[model];
|
|
735
|
+
if (!override) {
|
|
736
|
+
let bestKey = "";
|
|
737
|
+
for (const key of Object.keys(models)) {
|
|
738
|
+
if (model.startsWith(key) && key.length > bestKey.length) {
|
|
739
|
+
bestKey = key;
|
|
740
|
+
}
|
|
741
|
+
}
|
|
742
|
+
if (bestKey) override = models[bestKey];
|
|
743
|
+
}
|
|
744
|
+
if (override) {
|
|
745
|
+
return { ...base, ...override };
|
|
746
|
+
}
|
|
747
|
+
return { ...base };
|
|
748
|
+
}
|
|
593
749
|
var DEFAULT_PRICING = {
|
|
594
|
-
// STT — per minute of audio processed
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
|
|
599
|
-
|
|
750
|
+
// STT — per minute of audio processed.
|
|
751
|
+
deepgram: {
|
|
752
|
+
unit: PricingUnit.MINUTE,
|
|
753
|
+
// Default = Nova-3 streaming monolingual ($0.0077/min). Previous $0.0043
|
|
754
|
+
// was the batch rate; streaming is ~80% more expensive.
|
|
755
|
+
price: 77e-4,
|
|
756
|
+
models: {
|
|
757
|
+
"nova-3": { price: 77e-4 },
|
|
758
|
+
"nova-3-multilingual": { price: 92e-4 },
|
|
759
|
+
"nova-2": { price: 58e-4 },
|
|
760
|
+
nova: { price: 43e-4 },
|
|
761
|
+
"whisper-large": { price: 48e-4 },
|
|
762
|
+
"whisper-medium": { price: 48e-4 }
|
|
763
|
+
}
|
|
764
|
+
},
|
|
765
|
+
whisper: {
|
|
766
|
+
unit: PricingUnit.MINUTE,
|
|
767
|
+
// Default = whisper-1 REST ($0.006/min).
|
|
768
|
+
price: 6e-3,
|
|
769
|
+
models: {
|
|
770
|
+
"whisper-1": { price: 6e-3 },
|
|
771
|
+
"gpt-4o-transcribe": { price: 6e-3 },
|
|
772
|
+
"gpt-4o-mini-transcribe": { price: 3e-3 },
|
|
773
|
+
// Streaming Whisper variant for Realtime sessions.
|
|
774
|
+
"gpt-realtime-whisper": { price: 0.017 }
|
|
775
|
+
}
|
|
776
|
+
},
|
|
777
|
+
// OpenAI standalone transcription endpoint (separate provider_key from
|
|
778
|
+
// ``whisper`` so the dashboard can distinguish them).
|
|
779
|
+
openai_transcribe: {
|
|
780
|
+
unit: PricingUnit.MINUTE,
|
|
781
|
+
price: 6e-3,
|
|
782
|
+
models: {
|
|
783
|
+
"gpt-4o-transcribe": { price: 6e-3 },
|
|
784
|
+
"gpt-4o-mini-transcribe": { price: 3e-3 },
|
|
785
|
+
"whisper-1": { price: 6e-3 }
|
|
786
|
+
}
|
|
787
|
+
},
|
|
600
788
|
// AssemblyAI Universal-Streaming — $0.15/hr = $0.0025/min
|
|
601
|
-
assemblyai: { unit:
|
|
789
|
+
assemblyai: { unit: PricingUnit.MINUTE, price: 25e-4 },
|
|
602
790
|
// Cartesia ink-whisper streaming STT — ~$0.15/hr on usage plans
|
|
603
|
-
cartesia_stt: { unit:
|
|
791
|
+
cartesia_stt: { unit: PricingUnit.MINUTE, price: 25e-4 },
|
|
604
792
|
// Soniox real-time STT — $0.12/hr = $0.002/min
|
|
605
|
-
soniox: { unit:
|
|
793
|
+
soniox: { unit: PricingUnit.MINUTE, price: 2e-3 },
|
|
606
794
|
// Speechmatics Pro tier — $0.24/hr = $0.0040/min (new users land here).
|
|
607
795
|
// Previous $0.0173 default reflected a legacy Standard tier that was
|
|
608
796
|
// retired; users were being over-billed ~4.3x.
|
|
609
|
-
speechmatics: { unit:
|
|
797
|
+
speechmatics: { unit: PricingUnit.MINUTE, price: 4e-3 },
|
|
610
798
|
// TTS — per 1,000 characters synthesized.
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
|
|
616
|
-
|
|
617
|
-
|
|
618
|
-
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
//
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
|
|
799
|
+
elevenlabs: {
|
|
800
|
+
unit: PricingUnit.THOUSAND_CHARS,
|
|
801
|
+
// Default = eleven_flash_v2_5 (Patter's default model) at $0.06/1k.
|
|
802
|
+
price: 0.06,
|
|
803
|
+
models: {
|
|
804
|
+
eleven_flash_v2_5: { price: 0.06 },
|
|
805
|
+
eleven_turbo_v2_5: { price: 0.05 },
|
|
806
|
+
eleven_multilingual_v2: { price: 0.18 },
|
|
807
|
+
eleven_monolingual_v1: { price: 0.18 },
|
|
808
|
+
eleven_v3: { price: 0.3 }
|
|
809
|
+
}
|
|
810
|
+
},
|
|
811
|
+
// ElevenLabs WebSocket streaming TTS shares pricing with REST.
|
|
812
|
+
elevenlabs_ws: {
|
|
813
|
+
unit: PricingUnit.THOUSAND_CHARS,
|
|
814
|
+
price: 0.06,
|
|
815
|
+
models: {
|
|
816
|
+
eleven_flash_v2_5: { price: 0.06 },
|
|
817
|
+
eleven_turbo_v2_5: { price: 0.05 },
|
|
818
|
+
eleven_multilingual_v2: { price: 0.18 },
|
|
819
|
+
eleven_v3: { price: 0.3 }
|
|
820
|
+
}
|
|
821
|
+
},
|
|
822
|
+
openai_tts: {
|
|
823
|
+
unit: PricingUnit.THOUSAND_CHARS,
|
|
824
|
+
// Default = tts-1 ($0.015/1k chars).
|
|
825
|
+
price: 0.015,
|
|
826
|
+
models: {
|
|
827
|
+
"tts-1": { price: 0.015 },
|
|
828
|
+
"tts-1-hd": { price: 0.03 },
|
|
829
|
+
// gpt-4o-mini-tts is billed by tokens upstream but published per
|
|
830
|
+
// 1k chars equivalent here for parity with the rest of the table.
|
|
831
|
+
"gpt-4o-mini-tts": { price: 0.012 }
|
|
832
|
+
}
|
|
833
|
+
},
|
|
834
|
+
// Legacy alias preserved for backward compat with users who set
|
|
835
|
+
// provider_key="openai_tts_hd" in their own adapters.
|
|
836
|
+
openai_tts_hd: { unit: PricingUnit.THOUSAND_CHARS, price: 0.03 },
|
|
837
|
+
cartesia_tts: {
|
|
838
|
+
unit: PricingUnit.THOUSAND_CHARS,
|
|
839
|
+
// Default = Sonic-2 (current Cartesia flagship) at ~$0.030/1k chars.
|
|
840
|
+
price: 0.03,
|
|
841
|
+
models: {
|
|
842
|
+
"sonic-2": { price: 0.03 },
|
|
843
|
+
"sonic-1": { price: 0.03 },
|
|
844
|
+
"sonic-english": { price: 0.03 },
|
|
845
|
+
"sonic-multilingual": { price: 0.03 }
|
|
846
|
+
}
|
|
847
|
+
},
|
|
848
|
+
rime: {
|
|
849
|
+
unit: PricingUnit.THOUSAND_CHARS,
|
|
850
|
+
// Default = mistv2 ($0.030/1k chars).
|
|
851
|
+
price: 0.03,
|
|
852
|
+
models: {
|
|
853
|
+
mistv2: { price: 0.03 },
|
|
854
|
+
mist: { price: 0.03 },
|
|
855
|
+
arcana: { price: 0.04 }
|
|
856
|
+
}
|
|
857
|
+
},
|
|
858
|
+
lmnt: {
|
|
859
|
+
unit: PricingUnit.THOUSAND_CHARS,
|
|
860
|
+
// Default = aurora ($0.050/1k chars).
|
|
861
|
+
price: 0.05,
|
|
862
|
+
models: {
|
|
863
|
+
aurora: { price: 0.05 },
|
|
864
|
+
blizzard: { price: 0.05 }
|
|
865
|
+
}
|
|
866
|
+
},
|
|
867
|
+
inworld: {
|
|
868
|
+
unit: PricingUnit.THOUSAND_CHARS,
|
|
869
|
+
// Default = inworld-tts-2 (placeholder rate — verify against tier).
|
|
870
|
+
price: 0.02,
|
|
871
|
+
models: {
|
|
872
|
+
"inworld-tts-2": { price: 0.02 },
|
|
873
|
+
"inworld-tts-1.5-max": { price: 0.025 },
|
|
874
|
+
"inworld-tts-1.5": { price: 0.025 }
|
|
875
|
+
}
|
|
876
|
+
},
|
|
877
|
+
// OpenAI Realtime — per token. Provider defaults match
|
|
878
|
+
// gpt-realtime-mini / gpt-4o-mini-realtime-preview (Patter's default).
|
|
879
|
+
// Per-model overrides under ``models`` are auto-resolved when the
|
|
880
|
+
// realtime adapter's model is threaded through ``calculateRealtimeCost``.
|
|
629
881
|
openai_realtime: {
|
|
630
|
-
unit:
|
|
882
|
+
unit: PricingUnit.TOKEN,
|
|
883
|
+
// Default rates: gpt-realtime-mini / gpt-4o-mini-realtime-preview
|
|
631
884
|
audio_input_per_token: 1e-5,
|
|
632
885
|
audio_output_per_token: 2e-5,
|
|
633
886
|
text_input_per_token: 6e-7,
|
|
@@ -636,47 +889,119 @@ var DEFAULT_PRICING = {
|
|
|
636
889
|
// text cached $0.06/M = 10% of full. OpenAI bills the cached portion of
|
|
637
890
|
// input_token_details.audio_tokens / text_tokens at these reduced rates.
|
|
638
891
|
cached_audio_input_per_token: 3e-7,
|
|
639
|
-
cached_text_input_per_token: 6e-8
|
|
892
|
+
cached_text_input_per_token: 6e-8,
|
|
893
|
+
models: {
|
|
894
|
+
// gpt-realtime (GA, August 2025): audio in $32/M, audio out $64/M,
|
|
895
|
+
// text in $4/M, text out $16/M, cached $0.40/M (audio + text).
|
|
896
|
+
"gpt-realtime": {
|
|
897
|
+
audio_input_per_token: 32e-6,
|
|
898
|
+
audio_output_per_token: 64e-6,
|
|
899
|
+
text_input_per_token: 4e-6,
|
|
900
|
+
text_output_per_token: 16e-6,
|
|
901
|
+
cached_audio_input_per_token: 4e-7,
|
|
902
|
+
cached_text_input_per_token: 4e-7
|
|
903
|
+
},
|
|
904
|
+
// gpt-realtime-2 (most-capable): audio in $32/M, audio out $64/M,
|
|
905
|
+
// text in $4/M, text out $24/M, cached $0.40/M (audio + text).
|
|
906
|
+
"gpt-realtime-2": {
|
|
907
|
+
audio_input_per_token: 32e-6,
|
|
908
|
+
audio_output_per_token: 64e-6,
|
|
909
|
+
text_input_per_token: 4e-6,
|
|
910
|
+
text_output_per_token: 24e-6,
|
|
911
|
+
cached_audio_input_per_token: 4e-7,
|
|
912
|
+
cached_text_input_per_token: 4e-7
|
|
913
|
+
},
|
|
914
|
+
// gpt-realtime-mini and gpt-4o-mini-realtime-preview share the
|
|
915
|
+
// provider defaults. Listed explicitly so tooling can introspect.
|
|
916
|
+
"gpt-realtime-mini": {
|
|
917
|
+
audio_input_per_token: 1e-5,
|
|
918
|
+
audio_output_per_token: 2e-5,
|
|
919
|
+
text_input_per_token: 6e-7,
|
|
920
|
+
text_output_per_token: 24e-7,
|
|
921
|
+
cached_audio_input_per_token: 3e-7,
|
|
922
|
+
cached_text_input_per_token: 6e-8
|
|
923
|
+
},
|
|
924
|
+
"gpt-4o-mini-realtime-preview": {
|
|
925
|
+
audio_input_per_token: 1e-5,
|
|
926
|
+
audio_output_per_token: 2e-5,
|
|
927
|
+
text_input_per_token: 6e-7,
|
|
928
|
+
text_output_per_token: 24e-7,
|
|
929
|
+
cached_audio_input_per_token: 3e-7,
|
|
930
|
+
cached_text_input_per_token: 6e-8
|
|
931
|
+
},
|
|
932
|
+
// gpt-4o-realtime-preview (legacy preview, ~10x mini for audio):
|
|
933
|
+
// audio in $100/M, audio out $200/M, text in $5/M, text out $20/M.
|
|
934
|
+
"gpt-4o-realtime-preview": {
|
|
935
|
+
audio_input_per_token: 1e-4,
|
|
936
|
+
audio_output_per_token: 2e-4,
|
|
937
|
+
text_input_per_token: 5e-6,
|
|
938
|
+
text_output_per_token: 2e-5,
|
|
939
|
+
cached_audio_input_per_token: 2e-6,
|
|
940
|
+
cached_text_input_per_token: 25e-7
|
|
941
|
+
}
|
|
942
|
+
}
|
|
640
943
|
},
|
|
641
944
|
// Telephony — per minute of call duration.
|
|
642
945
|
// twilio default = US inbound local (the 99% case for voice agents receiving
|
|
643
946
|
// calls on a local number). For US toll-free inbound ($0.022/min) or US
|
|
644
947
|
// outbound local ($0.0140/min), override via Patter({ pricing: { twilio: {...} } }).
|
|
645
|
-
twilio: { unit:
|
|
646
|
-
telnyx: { unit:
|
|
948
|
+
twilio: { unit: PricingUnit.MINUTE, price: 85e-4 },
|
|
949
|
+
telnyx: { unit: PricingUnit.MINUTE, price: 7e-3 }
|
|
647
950
|
};
|
|
951
|
+
function cloneProviderEntry(entry) {
|
|
952
|
+
const out = { ...entry };
|
|
953
|
+
if (entry.models) {
|
|
954
|
+
const models = {};
|
|
955
|
+
for (const [mk, mv] of Object.entries(entry.models)) {
|
|
956
|
+
models[mk] = { ...mv };
|
|
957
|
+
}
|
|
958
|
+
out.models = models;
|
|
959
|
+
}
|
|
960
|
+
return out;
|
|
961
|
+
}
|
|
648
962
|
function mergePricing(overrides) {
|
|
649
963
|
const merged = {};
|
|
650
964
|
for (const [k, v] of Object.entries(DEFAULT_PRICING)) {
|
|
651
|
-
merged[k] =
|
|
965
|
+
merged[k] = cloneProviderEntry(v);
|
|
652
966
|
}
|
|
653
967
|
if (!overrides) return merged;
|
|
654
968
|
for (const [provider2, values] of Object.entries(overrides)) {
|
|
655
|
-
if (merged[provider2]) {
|
|
656
|
-
merged[provider2] =
|
|
657
|
-
|
|
658
|
-
|
|
969
|
+
if (!merged[provider2]) {
|
|
970
|
+
merged[provider2] = cloneProviderEntry(values);
|
|
971
|
+
continue;
|
|
972
|
+
}
|
|
973
|
+
const target = merged[provider2];
|
|
974
|
+
for (const [k, v] of Object.entries(values)) {
|
|
975
|
+
if (k === "models" && v && typeof v === "object" && target.models && typeof target.models === "object") {
|
|
976
|
+
const mergedModels = { ...target.models };
|
|
977
|
+
for (const [mk, mv] of Object.entries(v)) {
|
|
978
|
+
mergedModels[mk] = { ...mv };
|
|
979
|
+
}
|
|
980
|
+
target.models = mergedModels;
|
|
981
|
+
} else {
|
|
982
|
+
target[k] = v;
|
|
983
|
+
}
|
|
659
984
|
}
|
|
660
985
|
}
|
|
661
986
|
return merged;
|
|
662
987
|
}
|
|
663
|
-
function calculateSttCost(provider2, audioSeconds, pricing) {
|
|
664
|
-
const
|
|
665
|
-
if (
|
|
666
|
-
return audioSeconds / 60 * (
|
|
988
|
+
function calculateSttCost(provider2, audioSeconds, pricing, model) {
|
|
989
|
+
const rates = resolveProviderRates(pricing[provider2], model);
|
|
990
|
+
if (rates.unit !== "minute") return 0;
|
|
991
|
+
return audioSeconds / 60 * (rates.price ?? 0);
|
|
667
992
|
}
|
|
668
|
-
function calculateTtsCost(provider2, characterCount, pricing) {
|
|
669
|
-
const
|
|
670
|
-
if (
|
|
671
|
-
return characterCount / 1e3 * (
|
|
993
|
+
function calculateTtsCost(provider2, characterCount, pricing, model) {
|
|
994
|
+
const rates = resolveProviderRates(pricing[provider2], model);
|
|
995
|
+
if (rates.unit !== "1k_chars") return 0;
|
|
996
|
+
return characterCount / 1e3 * (rates.price ?? 0);
|
|
672
997
|
}
|
|
673
|
-
function calculateRealtimeCost(usage, pricing) {
|
|
674
|
-
const
|
|
675
|
-
if (
|
|
998
|
+
function calculateRealtimeCost(usage, pricing, model) {
|
|
999
|
+
const rates = resolveProviderRates(pricing.openai_realtime, model);
|
|
1000
|
+
if (rates.unit !== "token") return 0;
|
|
676
1001
|
const input = usage.input_token_details ?? {};
|
|
677
1002
|
const output = usage.output_token_details ?? {};
|
|
678
|
-
const cachedAudioRate =
|
|
679
|
-
const cachedTextRate =
|
|
1003
|
+
const cachedAudioRate = rates.cached_audio_input_per_token ?? rates.audio_input_per_token ?? 0;
|
|
1004
|
+
const cachedTextRate = rates.cached_text_input_per_token ?? rates.text_input_per_token ?? 0;
|
|
680
1005
|
const totalAudioIn = input.audio_tokens ?? 0;
|
|
681
1006
|
const totalTextIn = input.text_tokens ?? 0;
|
|
682
1007
|
let cachedAudioIn;
|
|
@@ -695,25 +1020,25 @@ function calculateRealtimeCost(usage, pricing) {
|
|
|
695
1020
|
cachedTextIn = 0;
|
|
696
1021
|
}
|
|
697
1022
|
let cost = 0;
|
|
698
|
-
cost += (totalAudioIn - cachedAudioIn) * (
|
|
1023
|
+
cost += (totalAudioIn - cachedAudioIn) * (rates.audio_input_per_token ?? 0);
|
|
699
1024
|
cost += cachedAudioIn * cachedAudioRate;
|
|
700
|
-
cost += (totalTextIn - cachedTextIn) * (
|
|
1025
|
+
cost += (totalTextIn - cachedTextIn) * (rates.text_input_per_token ?? 0);
|
|
701
1026
|
cost += cachedTextIn * cachedTextRate;
|
|
702
|
-
cost += (output.audio_tokens ?? 0) * (
|
|
703
|
-
cost += (output.text_tokens ?? 0) * (
|
|
1027
|
+
cost += (output.audio_tokens ?? 0) * (rates.audio_output_per_token ?? 0);
|
|
1028
|
+
cost += (output.text_tokens ?? 0) * (rates.text_output_per_token ?? 0);
|
|
704
1029
|
return Math.max(0, cost);
|
|
705
1030
|
}
|
|
706
|
-
function calculateRealtimeCachedSavings(usage, pricing) {
|
|
707
|
-
const
|
|
708
|
-
if (
|
|
1031
|
+
function calculateRealtimeCachedSavings(usage, pricing, model) {
|
|
1032
|
+
const rates = resolveProviderRates(pricing.openai_realtime, model);
|
|
1033
|
+
if (rates.unit !== "token") return 0;
|
|
709
1034
|
const input = usage.input_token_details ?? {};
|
|
710
1035
|
const cached = input.cached_tokens_details ?? {};
|
|
711
|
-
const cachedAudioRate =
|
|
712
|
-
const cachedTextRate =
|
|
1036
|
+
const cachedAudioRate = rates.cached_audio_input_per_token ?? rates.audio_input_per_token ?? 0;
|
|
1037
|
+
const cachedTextRate = rates.cached_text_input_per_token ?? rates.text_input_per_token ?? 0;
|
|
713
1038
|
const cachedAudio = Math.min(cached.audio_tokens ?? 0, input.audio_tokens ?? 0);
|
|
714
1039
|
const cachedText = Math.min(cached.text_tokens ?? 0, input.text_tokens ?? 0);
|
|
715
|
-
const fullAudio = cachedAudio * (
|
|
716
|
-
const fullText = cachedText * (
|
|
1040
|
+
const fullAudio = cachedAudio * (rates.audio_input_per_token ?? 0);
|
|
1041
|
+
const fullText = cachedText * (rates.text_input_per_token ?? 0);
|
|
717
1042
|
const discountedAudio = cachedAudio * cachedAudioRate;
|
|
718
1043
|
const discountedText = cachedText * cachedTextRate;
|
|
719
1044
|
return Math.max(0, fullAudio + fullText - (discountedAudio + discountedText));
|
|
@@ -745,14 +1070,32 @@ var llmPricing = {
|
|
|
745
1070
|
"gemini-live-2.5-flash-native-audio": { input: 0.3, output: 2.5 }
|
|
746
1071
|
},
|
|
747
1072
|
groq: {
|
|
1073
|
+
// Rates as of 2026-05-08; verify against groq.com/pricing.
|
|
1074
|
+
// ``llama-3.3-70b-versatile`` is the Patter default for Groq. The
|
|
1075
|
+
// remaining models are reachable via ``model: "..."`` and were silently
|
|
1076
|
+
// billing $0 before this entry was added (silent under-billing).
|
|
748
1077
|
"llama-3.3-70b-versatile": { input: 0.59, output: 0.79 },
|
|
749
|
-
"llama-3.1-8b-instant": { input: 0.05, output: 0.08 }
|
|
1078
|
+
"llama-3.1-8b-instant": { input: 0.05, output: 0.08 },
|
|
1079
|
+
"llama-3.3-70b-specdec": { input: 0.59, output: 0.99 },
|
|
1080
|
+
"llama3-70b-8192": { input: 0.59, output: 0.79 },
|
|
1081
|
+
"llama3-8b-8192": { input: 0.05, output: 0.08 },
|
|
1082
|
+
"mixtral-8x7b-32768": { input: 0.27, output: 0.27 },
|
|
1083
|
+
"gemma2-9b-it": { input: 0.2, output: 0.2 }
|
|
750
1084
|
},
|
|
751
1085
|
cerebras: {
|
|
1086
|
+
// Rates as of 2026-05-08; verify against cerebras.net/inference.
|
|
1087
|
+
// ``gpt-oss-120b`` is the Patter default for Cerebras (set in 0.5.4).
|
|
1088
|
+
// On WSE-3 hardware every model size saturates the downstream TTS
|
|
1089
|
+
// consumption rate (~150-300 tok/sec), so the 120B price stays in line
|
|
1090
|
+
// with the 70B tier rather than scaling with weight count.
|
|
1091
|
+
"gpt-oss-120b": { input: 0.85, output: 1.2 },
|
|
1092
|
+
"llama3.1-8b": { input: 0.1, output: 0.2 },
|
|
752
1093
|
"llama-3.3-70b": { input: 0.85, output: 1.2 },
|
|
753
|
-
"qwen-3-32b": { input: 0.4, output: 0.8 }
|
|
1094
|
+
"qwen-3-32b": { input: 0.4, output: 0.8 },
|
|
1095
|
+
"qwen-3-235b-a22b-instruct-2507": { input: 1, output: 1.5 },
|
|
1096
|
+
"zai-glm-4.7": { input: 0.85, output: 1.2 }
|
|
754
1097
|
},
|
|
755
|
-
// OpenAI Chat Completions (non-Realtime) — mirrors
|
|
1098
|
+
// OpenAI Chat Completions (non-Realtime) — mirrors the Python SDK pricing table.
|
|
756
1099
|
// Rates are per 1M tokens (USD), cache_read = cached input rate.
|
|
757
1100
|
openai: {
|
|
758
1101
|
"gpt-4o": { input: 2.5, output: 10, cache_read: 1.25 },
|
|
@@ -792,6 +1135,7 @@ function calculateTelephonyCost(provider2, durationSeconds, pricing) {
|
|
|
792
1135
|
}
|
|
793
1136
|
|
|
794
1137
|
// src/dashboard/store.ts
|
|
1138
|
+
init_esm_shims();
|
|
795
1139
|
import { EventEmitter } from "events";
|
|
796
1140
|
import * as fs from "fs";
|
|
797
1141
|
import * as path from "path";
|
|
@@ -812,6 +1156,7 @@ var MetricsStore = class extends EventEmitter {
|
|
|
812
1156
|
publish(eventType, data) {
|
|
813
1157
|
this.emit("sse", { type: eventType, data });
|
|
814
1158
|
}
|
|
1159
|
+
/** Mark a call as in-progress (creates the row if it does not yet exist). */
|
|
815
1160
|
recordCallStart(data) {
|
|
816
1161
|
const callId = data.call_id || "";
|
|
817
1162
|
if (!callId) return;
|
|
@@ -909,6 +1254,7 @@ var MetricsStore = class extends EventEmitter {
|
|
|
909
1254
|
}
|
|
910
1255
|
this.publish("call_status", { call_id: callId, status, ...extra });
|
|
911
1256
|
}
|
|
1257
|
+
/** Append a single conversation turn to an active call and broadcast it via SSE. */
|
|
912
1258
|
recordTurn(data) {
|
|
913
1259
|
const callId = data.call_id || "";
|
|
914
1260
|
const turn = data.turn;
|
|
@@ -920,6 +1266,7 @@ var MetricsStore = class extends EventEmitter {
|
|
|
920
1266
|
}
|
|
921
1267
|
this.publish("turn_complete", { call_id: callId, turn });
|
|
922
1268
|
}
|
|
1269
|
+
/** Move a call from active to completed and persist its final metrics. */
|
|
923
1270
|
recordCallEnd(data, metrics) {
|
|
924
1271
|
const callId = data.call_id || "";
|
|
925
1272
|
if (!callId) return;
|
|
@@ -947,10 +1294,12 @@ var MetricsStore = class extends EventEmitter {
|
|
|
947
1294
|
metrics: entry.metrics ?? null
|
|
948
1295
|
});
|
|
949
1296
|
}
|
|
1297
|
+
/** Return a window of completed calls in newest-first order. */
|
|
950
1298
|
getCalls(limit = 50, offset = 0) {
|
|
951
1299
|
const ordered = [...this.calls].reverse();
|
|
952
1300
|
return ordered.slice(offset, offset + limit);
|
|
953
1301
|
}
|
|
1302
|
+
/** Look up a completed call by id (newest match wins). */
|
|
954
1303
|
getCall(callId) {
|
|
955
1304
|
for (let i = this.calls.length - 1; i >= 0; i--) {
|
|
956
1305
|
if (this.calls[i].call_id === callId) return this.calls[i];
|
|
@@ -961,9 +1310,11 @@ var MetricsStore = class extends EventEmitter {
|
|
|
961
1310
|
getActive(callId) {
|
|
962
1311
|
return this.activeCalls.get(callId);
|
|
963
1312
|
}
|
|
1313
|
+
/** Return all currently active (not yet ended) calls. */
|
|
964
1314
|
getActiveCalls() {
|
|
965
1315
|
return Array.from(this.activeCalls.values());
|
|
966
1316
|
}
|
|
1317
|
+
/** Compute summary statistics across the buffered call history. */
|
|
967
1318
|
getAggregates() {
|
|
968
1319
|
const totalCalls = this.calls.length;
|
|
969
1320
|
if (totalCalls === 0) {
|
|
@@ -1015,6 +1366,7 @@ var MetricsStore = class extends EventEmitter {
|
|
|
1015
1366
|
active_calls: this.activeCalls.size
|
|
1016
1367
|
};
|
|
1017
1368
|
}
|
|
1369
|
+
/** Return calls whose `started_at` falls within `[fromTs, toTs]` (Unix seconds). */
|
|
1018
1370
|
getCallsInRange(fromTs = 0, toTs = 0) {
|
|
1019
1371
|
return this.calls.filter((call) => {
|
|
1020
1372
|
const started = call.started_at || 0;
|
|
@@ -1023,6 +1375,7 @@ var MetricsStore = class extends EventEmitter {
|
|
|
1023
1375
|
return true;
|
|
1024
1376
|
});
|
|
1025
1377
|
}
|
|
1378
|
+
/** Number of completed calls currently in the ring buffer. */
|
|
1026
1379
|
get callCount() {
|
|
1027
1380
|
return this.calls.length;
|
|
1028
1381
|
}
|
|
@@ -1123,7 +1476,11 @@ function parseTimestamp(raw) {
|
|
|
1123
1476
|
return null;
|
|
1124
1477
|
}
|
|
1125
1478
|
|
|
1479
|
+
// src/dashboard/routes.ts
|
|
1480
|
+
init_esm_shims();
|
|
1481
|
+
|
|
1126
1482
|
// src/dashboard/auth.ts
|
|
1483
|
+
init_esm_shims();
|
|
1127
1484
|
import crypto from "crypto";
|
|
1128
1485
|
function timingSafeCompare(a, b) {
|
|
1129
1486
|
const aBuf = Buffer.from(a);
|
|
@@ -1156,6 +1513,7 @@ function makeAuthMiddleware(token = "") {
|
|
|
1156
1513
|
}
|
|
1157
1514
|
|
|
1158
1515
|
// src/dashboard/export.ts
|
|
1516
|
+
init_esm_shims();
|
|
1159
1517
|
function callsToCsv(calls) {
|
|
1160
1518
|
const header = [
|
|
1161
1519
|
"call_id",
|
|
@@ -1213,630 +1571,33 @@ function csvEscape(value) {
|
|
|
1213
1571
|
}
|
|
1214
1572
|
|
|
1215
1573
|
// src/dashboard/ui.ts
|
|
1216
|
-
|
|
1217
|
-
|
|
1218
|
-
|
|
1219
|
-
|
|
1220
|
-
<meta
|
|
1221
|
-
<
|
|
1222
|
-
<
|
|
1223
|
-
<
|
|
1224
|
-
<
|
|
1225
|
-
|
|
1226
|
-
|
|
1227
|
-
|
|
1228
|
-
|
|
1229
|
-
|
|
1230
|
-
|
|
1231
|
-
|
|
1232
|
-
|
|
1233
|
-
|
|
1234
|
-
|
|
1235
|
-
|
|
1236
|
-
|
|
1237
|
-
|
|
1238
|
-
--red: #ef4444;
|
|
1239
|
-
--blue: #3b82f6;
|
|
1240
|
-
--purple: #a78bfa;
|
|
1241
|
-
--orange: #fb923c;
|
|
1242
|
-
--yellow: #eab308;
|
|
1243
|
-
--radius: 12px;
|
|
1244
|
-
--font: 'Instrument Sans', ui-sans-serif, system-ui, sans-serif;
|
|
1245
|
-
--mono: 'JetBrains Mono', ui-monospace, SFMono-Regular, Menlo, monospace;
|
|
1246
|
-
--header-bg: #fff;
|
|
1247
|
-
--assistant-bubble: #f0eeff;
|
|
1248
|
-
}
|
|
1249
|
-
@media (prefers-color-scheme: dark) {
|
|
1250
|
-
:root {
|
|
1251
|
-
--bg: #151518;
|
|
1252
|
-
--fg: #e4e4e7;
|
|
1253
|
-
--card: #1c1c21;
|
|
1254
|
-
--primary: #e4e4e7;
|
|
1255
|
-
--primary-fg: #18181b;
|
|
1256
|
-
--secondary: #232329;
|
|
1257
|
-
--muted: #8b8b95;
|
|
1258
|
-
--border: #2c2c33;
|
|
1259
|
-
--border-d: #3a3a44;
|
|
1260
|
-
--green: #34d399;
|
|
1261
|
-
--red: #f87171;
|
|
1262
|
-
--blue: #60a5fa;
|
|
1263
|
-
--purple: #c4b5fd;
|
|
1264
|
-
--orange: #fdba74;
|
|
1265
|
-
--yellow: #fbbf24;
|
|
1266
|
-
--header-bg: #1a1a1f;
|
|
1267
|
-
--assistant-bubble: #252230;
|
|
1268
|
-
}
|
|
1269
|
-
}
|
|
1270
|
-
* { margin:0; padding:0; box-sizing:border-box; }
|
|
1271
|
-
html { -webkit-font-smoothing: antialiased; }
|
|
1272
|
-
body {
|
|
1273
|
-
font-family: var(--font);
|
|
1274
|
-
font-size: 15px;
|
|
1275
|
-
line-height: 1.6;
|
|
1276
|
-
color: var(--fg);
|
|
1277
|
-
background: var(--bg);
|
|
1278
|
-
min-height: 100vh;
|
|
1279
|
-
}
|
|
1280
|
-
|
|
1281
|
-
/* Header */
|
|
1282
|
-
header {
|
|
1283
|
-
position: sticky; top: 0; z-index: 100;
|
|
1284
|
-
background: var(--header-bg);
|
|
1285
|
-
border-bottom: 1px solid var(--border);
|
|
1286
|
-
padding: 0 24px;
|
|
1287
|
-
height: 56px;
|
|
1288
|
-
display: flex; align-items: center; gap: 14px;
|
|
1289
|
-
}
|
|
1290
|
-
.logo {
|
|
1291
|
-
display: flex; align-items: center; gap: 10px;
|
|
1292
|
-
font-weight: 700; font-size: 18px; letter-spacing: -0.02em;
|
|
1293
|
-
text-decoration: none; color: var(--fg);
|
|
1294
|
-
}
|
|
1295
|
-
.logo svg { width: 22px; height: 22px; }
|
|
1296
|
-
.header-sep {
|
|
1297
|
-
width: 1px; height: 20px; background: var(--border-d); margin: 0 2px;
|
|
1298
|
-
}
|
|
1299
|
-
.header-title {
|
|
1300
|
-
font-size: 14px; font-weight: 500; color: var(--muted);
|
|
1301
|
-
}
|
|
1302
|
-
.badge-beta {
|
|
1303
|
-
font-size: 10px; font-weight: 600; letter-spacing: 0.5px;
|
|
1304
|
-
color: #e67e22; background: rgba(230,126,34,0.1);
|
|
1305
|
-
border: 1px solid rgba(230,126,34,0.25);
|
|
1306
|
-
padding: 2px 8px; border-radius: 100px; text-transform: uppercase;
|
|
1307
|
-
}
|
|
1308
|
-
.status {
|
|
1309
|
-
margin-left: auto; font-size: 13px; color: var(--muted);
|
|
1310
|
-
display: flex; align-items: center; gap: 6px;
|
|
1311
|
-
}
|
|
1312
|
-
.dot {
|
|
1313
|
-
width: 7px; height: 7px; border-radius: 50%;
|
|
1314
|
-
background: var(--green); display: inline-block;
|
|
1315
|
-
}
|
|
1316
|
-
|
|
1317
|
-
/* Layout */
|
|
1318
|
-
.container { max-width: 1200px; margin: 0 auto; padding: 24px; }
|
|
1319
|
-
|
|
1320
|
-
/* Stat cards */
|
|
1321
|
-
.cards {
|
|
1322
|
-
display: grid; grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
|
|
1323
|
-
gap: 14px; margin-bottom: 28px;
|
|
1324
|
-
}
|
|
1325
|
-
.card {
|
|
1326
|
-
background: var(--card);
|
|
1327
|
-
border: 1px solid var(--border);
|
|
1328
|
-
border-radius: var(--radius);
|
|
1329
|
-
padding: 18px 20px;
|
|
1330
|
-
}
|
|
1331
|
-
.card .label {
|
|
1332
|
-
font-size: 12px; color: var(--muted);
|
|
1333
|
-
text-transform: uppercase; letter-spacing: 0.5px; font-weight: 500;
|
|
1334
|
-
}
|
|
1335
|
-
.card .value {
|
|
1336
|
-
font-size: 28px; font-weight: 700; margin-top: 4px;
|
|
1337
|
-
font-family: var(--mono); letter-spacing: -0.02em;
|
|
1338
|
-
}
|
|
1339
|
-
.card .sub { font-size: 12px; color: var(--muted); margin-top: 2px; }
|
|
1340
|
-
|
|
1341
|
-
/* Tabs */
|
|
1342
|
-
.nav-tabs {
|
|
1343
|
-
display: flex; gap: 0; margin-bottom: 16px;
|
|
1344
|
-
border-bottom: 1px solid var(--border);
|
|
1345
|
-
}
|
|
1346
|
-
.nav-tab {
|
|
1347
|
-
padding: 10px 20px; font-size: 13px; font-weight: 500;
|
|
1348
|
-
color: var(--muted); cursor: pointer;
|
|
1349
|
-
border: none; background: none;
|
|
1350
|
-
border-bottom: 2px solid transparent;
|
|
1351
|
-
margin-bottom: -1px; font-family: var(--font);
|
|
1352
|
-
transition: color .15s;
|
|
1353
|
-
}
|
|
1354
|
-
.nav-tab:hover { color: var(--fg); }
|
|
1355
|
-
.nav-tab.active { color: var(--fg); border-bottom-color: var(--primary); }
|
|
1356
|
-
|
|
1357
|
-
.tab-content { display: none; }
|
|
1358
|
-
.tab-content.active { display: block; }
|
|
1359
|
-
|
|
1360
|
-
/* Tables */
|
|
1361
|
-
table {
|
|
1362
|
-
width: 100%; border-collapse: collapse;
|
|
1363
|
-
background: var(--card);
|
|
1364
|
-
border: 1px solid var(--border);
|
|
1365
|
-
border-radius: var(--radius);
|
|
1366
|
-
overflow: hidden;
|
|
1367
|
-
}
|
|
1368
|
-
th {
|
|
1369
|
-
text-align: left; font-size: 11px; text-transform: uppercase;
|
|
1370
|
-
color: var(--muted); padding: 12px 16px;
|
|
1371
|
-
border-bottom: 1px solid var(--border);
|
|
1372
|
-
letter-spacing: 0.5px; font-weight: 600;
|
|
1373
|
-
background: var(--secondary);
|
|
1374
|
-
}
|
|
1375
|
-
td {
|
|
1376
|
-
padding: 12px 16px; border-bottom: 1px solid var(--border);
|
|
1377
|
-
font-size: 13px;
|
|
1378
|
-
}
|
|
1379
|
-
tr:last-child td { border-bottom: none; }
|
|
1380
|
-
tr.clickable { cursor: pointer; transition: background .1s; }
|
|
1381
|
-
tr.clickable:hover { background: var(--secondary); }
|
|
1382
|
-
|
|
1383
|
-
code {
|
|
1384
|
-
font-family: var(--mono); font-size: 12px;
|
|
1385
|
-
background: var(--secondary); padding: 2px 6px;
|
|
1386
|
-
border-radius: 4px;
|
|
1387
|
-
}
|
|
1388
|
-
|
|
1389
|
-
/* Badges */
|
|
1390
|
-
.badge {
|
|
1391
|
-
display: inline-block; padding: 3px 10px; border-radius: 100px;
|
|
1392
|
-
font-size: 11px; font-weight: 600;
|
|
1393
|
-
}
|
|
1394
|
-
.badge-active { background: rgba(34,197,94,0.1); color: #16a34a; }
|
|
1395
|
-
.badge-ended { background: var(--secondary); color: var(--muted); }
|
|
1396
|
-
.badge-pipeline { background: rgba(167,139,250,0.1); color: #7c3aed; }
|
|
1397
|
-
.badge-realtime { background: rgba(59,130,246,0.1); color: #2563eb; }
|
|
1398
|
-
|
|
1399
|
-
.cost { color: #16a34a; font-family: var(--mono); font-size: 13px; }
|
|
1400
|
-
.latency { color: #ca8a04; font-family: var(--mono); font-size: 13px; }
|
|
1401
|
-
@media (prefers-color-scheme: dark) {
|
|
1402
|
-
.cost { color: var(--green); }
|
|
1403
|
-
.latency { color: var(--yellow); }
|
|
1404
|
-
code { background: var(--secondary); color: var(--fg); }
|
|
1405
|
-
}
|
|
1406
|
-
.empty {
|
|
1407
|
-
text-align: center; padding: 48px; color: var(--muted);
|
|
1408
|
-
font-size: 14px;
|
|
1409
|
-
}
|
|
1410
|
-
|
|
1411
|
-
/* Modal */
|
|
1412
|
-
.modal-overlay {
|
|
1413
|
-
display: none; position: fixed; inset: 0;
|
|
1414
|
-
background: rgba(0,0,0,0.4); backdrop-filter: blur(6px);
|
|
1415
|
-
z-index: 200;
|
|
1416
|
-
justify-content: center; align-items: flex-start;
|
|
1417
|
-
padding: 48px 20px; overflow-y: auto;
|
|
1418
|
-
}
|
|
1419
|
-
.modal-overlay.open { display: flex; }
|
|
1420
|
-
.modal {
|
|
1421
|
-
background: var(--card);
|
|
1422
|
-
border: 1px solid var(--border);
|
|
1423
|
-
border-radius: 16px;
|
|
1424
|
-
max-width: 820px; width: 100%;
|
|
1425
|
-
padding: 0;
|
|
1426
|
-
box-shadow: 0 24px 64px rgba(0,0,0,0.12), 0 0 0 1px rgba(0,0,0,0.03);
|
|
1427
|
-
overflow: hidden;
|
|
1428
|
-
}
|
|
1429
|
-
.modal-header {
|
|
1430
|
-
display: flex; justify-content: space-between; align-items: center;
|
|
1431
|
-
padding: 20px 28px;
|
|
1432
|
-
border-bottom: 1px solid var(--border);
|
|
1433
|
-
background: var(--bg);
|
|
1434
|
-
}
|
|
1435
|
-
.modal-header h2 { font-size: 15px; font-weight: 600; display: flex; align-items: center; gap: 10px; }
|
|
1436
|
-
.modal-close {
|
|
1437
|
-
background: none; border: 1px solid var(--border);
|
|
1438
|
-
color: var(--muted); width: 30px; height: 30px;
|
|
1439
|
-
border-radius: 8px; font-size: 16px; cursor: pointer;
|
|
1440
|
-
display: flex; align-items: center; justify-content: center;
|
|
1441
|
-
transition: all .15s;
|
|
1442
|
-
}
|
|
1443
|
-
.modal-close:hover { background: var(--secondary); color: var(--fg); }
|
|
1444
|
-
.modal-body { padding: 24px 28px; }
|
|
1445
|
-
|
|
1446
|
-
.detail-grid {
|
|
1447
|
-
display: grid; grid-template-columns: 1fr 1fr;
|
|
1448
|
-
gap: 14px; margin-bottom: 20px;
|
|
1449
|
-
}
|
|
1450
|
-
.detail-card {
|
|
1451
|
-
background: var(--bg);
|
|
1452
|
-
border: 1px solid var(--border);
|
|
1453
|
-
border-radius: var(--radius); padding: 16px 18px;
|
|
1454
|
-
}
|
|
1455
|
-
.detail-card h3 {
|
|
1456
|
-
font-size: 11px; color: var(--muted);
|
|
1457
|
-
text-transform: uppercase; letter-spacing: 0.5px;
|
|
1458
|
-
margin-bottom: 10px; font-weight: 600;
|
|
1459
|
-
}
|
|
1460
|
-
.detail-row {
|
|
1461
|
-
display: flex; justify-content: space-between; align-items: baseline;
|
|
1462
|
-
font-size: 13px; padding: 5px 0;
|
|
1463
|
-
}
|
|
1464
|
-
.detail-row .k { color: var(--muted); font-weight: 500; }
|
|
1465
|
-
.detail-row span:last-child { font-weight: 500; text-align: right; }
|
|
1466
|
-
.detail-row .mono { font-family: var(--mono); font-size: 12px; }
|
|
1467
|
-
.detail-sep {
|
|
1468
|
-
border-top: 1px solid var(--border); padding-top: 8px; margin-top: 6px;
|
|
1469
|
-
}
|
|
1470
|
-
|
|
1471
|
-
.transcript-box {
|
|
1472
|
-
border: 1px solid var(--border);
|
|
1473
|
-
border-radius: var(--radius);
|
|
1474
|
-
padding: 16px; max-height: 340px; overflow-y: auto;
|
|
1475
|
-
background: var(--bg);
|
|
1476
|
-
}
|
|
1477
|
-
.transcript-box .msg {
|
|
1478
|
-
padding: 8px 12px; border-radius: 10px; font-size: 13px;
|
|
1479
|
-
max-width: 85%; margin-bottom: 6px; line-height: 1.5;
|
|
1480
|
-
}
|
|
1481
|
-
.transcript-box .msg.user {
|
|
1482
|
-
background: var(--secondary); margin-left: auto;
|
|
1483
|
-
border-bottom-right-radius: 4px;
|
|
1484
|
-
}
|
|
1485
|
-
.transcript-box .msg.assistant {
|
|
1486
|
-
background: var(--assistant-bubble); margin-right: auto;
|
|
1487
|
-
border-bottom-left-radius: 4px;
|
|
1488
|
-
}
|
|
1489
|
-
.transcript-box .role {
|
|
1490
|
-
font-weight: 600; font-size: 11px; text-transform: uppercase;
|
|
1491
|
-
letter-spacing: 0.3px; display: block; margin-bottom: 2px;
|
|
1492
|
-
}
|
|
1493
|
-
.transcript-box .msg.user .role { color: var(--blue); }
|
|
1494
|
-
.transcript-box .msg.assistant .role { color: #7c3aed; }
|
|
1495
|
-
|
|
1496
|
-
/* Turn bars */
|
|
1497
|
-
.turns-table { margin-top: 16px; }
|
|
1498
|
-
.turns-table table { border: 1px solid var(--border); }
|
|
1499
|
-
.bar-container { display: flex; height: 14px; border-radius: 4px; overflow: hidden; min-width: 120px; }
|
|
1500
|
-
.bar-stt { background: var(--blue); }
|
|
1501
|
-
.bar-llm { background: var(--purple); }
|
|
1502
|
-
.bar-tts { background: var(--orange); }
|
|
1503
|
-
</style>
|
|
1504
|
-
</head>
|
|
1505
|
-
<body>
|
|
1506
|
-
<header>
|
|
1507
|
-
<a href="/" class="logo">
|
|
1508
|
-
<svg viewBox="0 0 1188 1773" fill="none" xmlns="http://www.w3.org/2000/svg">
|
|
1509
|
-
<path d="M25 561L245 694M25 561V818M245 694V951M25 961V1218M25 1357V1614M245 1489V1747M245 1093V1351M942 823V1080M1161 955V1213M1162 555V812M942 422V679M669 585V843L787 913M942 25V282M1162 158V415M25 818L245 951M244 1094L464 962M25 961L143 890M244 1352L464 1219M942 823L1162 956M942 679L1162 812M721 811L942 679M669 842L724 809M669 586L724 553M1041 883L1162 812M245 1747L1161 1213M244 1490L942 1080M25 1357L142 1289M518 1071L942 823M721 555L942 422M942 422L1162 556M942 282L1162 415M942 25L1162 158M942 1080L1161 1213M25 1218L245 1351M25 961L245 1094M464 962L519 929M464 1219L519 1186V928L403 859M25 1357L245 1490M25 1614L245 1747M25 561L942 25M244 694L941 282M1043 484L1162 415M245 951L668 704" stroke="currentColor" stroke-width="50" stroke-linecap="round"/>
|
|
1510
|
-
</svg>
|
|
1511
|
-
Patter
|
|
1512
|
-
</a>
|
|
1513
|
-
<div class="header-sep"></div>
|
|
1514
|
-
<span class="header-title">Dashboard</span>
|
|
1515
|
-
<span class="badge-beta">Beta</span>
|
|
1516
|
-
<div class="status"><span class="dot"></span> <span id="status-text">Listening</span></div>
|
|
1517
|
-
</header>
|
|
1518
|
-
|
|
1519
|
-
<div class="container">
|
|
1520
|
-
<div class="cards">
|
|
1521
|
-
<div class="card">
|
|
1522
|
-
<div class="label">Total Calls</div>
|
|
1523
|
-
<div class="value" id="stat-total">0</div>
|
|
1524
|
-
<div class="sub"><span id="stat-active">0</span> active</div>
|
|
1525
|
-
</div>
|
|
1526
|
-
<div class="card">
|
|
1527
|
-
<div class="label">Total Cost</div>
|
|
1528
|
-
<div class="value cost" id="stat-cost">$0.00</div>
|
|
1529
|
-
<div class="sub" id="stat-cost-breakdown">-</div>
|
|
1530
|
-
</div>
|
|
1531
|
-
<div class="card">
|
|
1532
|
-
<div class="label">Avg Duration</div>
|
|
1533
|
-
<div class="value" id="stat-duration">0s</div>
|
|
1534
|
-
</div>
|
|
1535
|
-
<div class="card">
|
|
1536
|
-
<div class="label">Avg Latency</div>
|
|
1537
|
-
<div class="value latency" id="stat-latency">0ms</div>
|
|
1538
|
-
<div class="sub">end-to-end response</div>
|
|
1539
|
-
</div>
|
|
1540
|
-
</div>
|
|
1541
|
-
|
|
1542
|
-
<div class="nav-tabs">
|
|
1543
|
-
<button class="nav-tab active" data-tab="calls">Calls</button>
|
|
1544
|
-
<button class="nav-tab" data-tab="active">Active</button>
|
|
1545
|
-
</div>
|
|
1546
|
-
|
|
1547
|
-
<div class="tab-content active" id="tab-calls">
|
|
1548
|
-
<div class="section">
|
|
1549
|
-
<table id="calls-table">
|
|
1550
|
-
<thead>
|
|
1551
|
-
<tr>
|
|
1552
|
-
<th>Call ID</th><th>Direction</th><th>From / To</th>
|
|
1553
|
-
<th>Duration</th><th>Mode</th><th>Cost</th><th>Avg Latency</th><th>Turns</th>
|
|
1554
|
-
</tr>
|
|
1555
|
-
</thead>
|
|
1556
|
-
<tbody id="calls-body">
|
|
1557
|
-
<tr><td colspan="8" class="empty">No calls yet. Waiting for incoming calls...</td></tr>
|
|
1558
|
-
</tbody>
|
|
1559
|
-
</table>
|
|
1560
|
-
</div>
|
|
1561
|
-
</div>
|
|
1562
|
-
|
|
1563
|
-
<div class="tab-content" id="tab-active">
|
|
1564
|
-
<div class="section">
|
|
1565
|
-
<table>
|
|
1566
|
-
<thead>
|
|
1567
|
-
<tr><th>Call ID</th><th>Caller</th><th>Callee</th><th>Direction</th><th>Duration</th><th>Turns</th></tr>
|
|
1568
|
-
</thead>
|
|
1569
|
-
<tbody id="active-body">
|
|
1570
|
-
<tr><td colspan="6" class="empty">No active calls</td></tr>
|
|
1571
|
-
</tbody>
|
|
1572
|
-
</table>
|
|
1573
|
-
</div>
|
|
1574
|
-
</div>
|
|
1575
|
-
</div>
|
|
1576
|
-
|
|
1577
|
-
<div class="modal-overlay" id="modal">
|
|
1578
|
-
<div class="modal">
|
|
1579
|
-
<div class="modal-header">
|
|
1580
|
-
<h2 id="modal-title">Call Detail</h2>
|
|
1581
|
-
<button class="modal-close" onclick="closeModal()">×</button>
|
|
1582
|
-
</div>
|
|
1583
|
-
<div class="modal-body" id="modal-body"></div>
|
|
1584
|
-
</div>
|
|
1585
|
-
</div>
|
|
1586
|
-
|
|
1587
|
-
<script>
|
|
1588
|
-
var _$ = function(s) { return document.querySelector(s); };
|
|
1589
|
-
var _$$ = function(s) { return document.querySelectorAll(s); };
|
|
1590
|
-
|
|
1591
|
-
_$$('.nav-tab').forEach(function(tab) {
|
|
1592
|
-
tab.addEventListener('click', function() {
|
|
1593
|
-
_$$('.nav-tab').forEach(function(t) { t.classList.remove('active'); });
|
|
1594
|
-
_$$('.tab-content').forEach(function(t) { t.classList.remove('active'); });
|
|
1595
|
-
tab.classList.add('active');
|
|
1596
|
-
document.querySelector('#tab-'+tab.dataset.tab).classList.add('active');
|
|
1597
|
-
});
|
|
1598
|
-
});
|
|
1599
|
-
|
|
1600
|
-
function esc(s) {
|
|
1601
|
-
if (!s) return '';
|
|
1602
|
-
return String(s).replace(/&/g,'&').replace(/</g,'<').replace(/>/g,'>').replace(/"/g,'"').replace(/'/g,''');
|
|
1603
|
-
}
|
|
1604
|
-
function fmtCost(v) { return v >= 0.01 ? '$'+v.toFixed(4) : v > 0 ? '$'+v.toFixed(6) : '$0.00'; }
|
|
1605
|
-
function fmtMs(v) { return v != null && v >= 0 ? Math.round(v)+'ms' : '-'; }
|
|
1606
|
-
function fmtDur(s) {
|
|
1607
|
-
if (s == null || s < 0) return '-';
|
|
1608
|
-
if (s < 60) return Math.round(s)+'s';
|
|
1609
|
-
return Math.floor(s/60)+'m '+Math.round(s%60)+'s';
|
|
1610
|
-
}
|
|
1611
|
-
function shortId(id) { return id ? esc(id.length > 16 ? id.slice(0,8)+'...'+id.slice(-4) : id) : '-'; }
|
|
1612
|
-
|
|
1613
|
-
function fetchJSON(url) {
|
|
1614
|
-
return fetch(url).then(function(r) { return r.json(); });
|
|
1615
|
-
}
|
|
1616
|
-
|
|
1617
|
-
function refreshAggregates() {
|
|
1618
|
-
return fetchJSON('/api/dashboard/aggregates').then(function(d) {
|
|
1619
|
-
_$('#stat-total').textContent = d.total_calls;
|
|
1620
|
-
_$('#stat-active').textContent = d.active_calls;
|
|
1621
|
-
_$('#stat-cost').textContent = fmtCost(d.total_cost);
|
|
1622
|
-
var cb = d.cost_breakdown;
|
|
1623
|
-
_$('#stat-cost-breakdown').textContent =
|
|
1624
|
-
'STT '+fmtCost(cb.stt)+' | LLM '+fmtCost(cb.llm)+' | TTS '+fmtCost(cb.tts)+' | Tel '+fmtCost(cb.telephony);
|
|
1625
|
-
_$('#stat-duration').textContent = fmtDur(d.avg_duration);
|
|
1626
|
-
_$('#stat-latency').textContent = fmtMs(d.avg_latency_ms);
|
|
1627
|
-
});
|
|
1628
|
-
}
|
|
1629
|
-
|
|
1630
|
-
function refreshCalls() {
|
|
1631
|
-
return fetchJSON('/api/dashboard/calls?limit=50').then(function(calls) {
|
|
1632
|
-
var body = _$('#calls-body');
|
|
1633
|
-
if (!calls.length) {
|
|
1634
|
-
body.innerHTML = '<tr><td colspan="8" class="empty">No calls yet. Waiting for incoming calls...</td></tr>';
|
|
1635
|
-
return;
|
|
1636
|
-
}
|
|
1637
|
-
body.innerHTML = calls.map(function(c) {
|
|
1638
|
-
var m = c.metrics || {};
|
|
1639
|
-
var cost = m.cost || {};
|
|
1640
|
-
var lat = m.latency_avg || {};
|
|
1641
|
-
var mode = m.provider_mode || '-';
|
|
1642
|
-
var turns = m.turns ? m.turns.length : 0;
|
|
1643
|
-
var modeClass = mode === 'pipeline' ? 'badge-pipeline' : 'badge-realtime';
|
|
1644
|
-
return '<tr class="clickable" onclick="showCall(\\''+esc(c.call_id)+'\\')">'+
|
|
1645
|
-
'<td><code>'+shortId(c.call_id)+'</code></td>'+
|
|
1646
|
-
'<td>'+(esc(c.direction) || '-')+'</td>'+
|
|
1647
|
-
'<td>'+(esc(c.caller) || '-')+' → '+(esc(c.callee) || '-')+'</td>'+
|
|
1648
|
-
'<td>'+fmtDur(m.duration_seconds)+'</td>'+
|
|
1649
|
-
'<td><span class="badge '+modeClass+'">'+esc(mode)+'</span></td>'+
|
|
1650
|
-
'<td class="cost">'+fmtCost(cost.total || 0)+'</td>'+
|
|
1651
|
-
'<td class="latency">'+fmtMs(lat.total_ms || 0)+'</td>'+
|
|
1652
|
-
'<td>'+turns+'</td></tr>';
|
|
1653
|
-
}).join('');
|
|
1654
|
-
});
|
|
1655
|
-
}
|
|
1656
|
-
|
|
1657
|
-
function refreshActive() {
|
|
1658
|
-
return fetchJSON('/api/dashboard/active').then(function(active) {
|
|
1659
|
-
var body = _$('#active-body');
|
|
1660
|
-
if (!active.length) {
|
|
1661
|
-
body.innerHTML = '<tr><td colspan="6" class="empty">No active calls</td></tr>';
|
|
1662
|
-
return;
|
|
1663
|
-
}
|
|
1664
|
-
var now = Date.now() / 1000;
|
|
1665
|
-
body.innerHTML = active.map(function(c) {
|
|
1666
|
-
var dur = c.started_at ? Math.round(now - c.started_at) : 0;
|
|
1667
|
-
var turns = c.turns ? c.turns.length : 0;
|
|
1668
|
-
return '<tr>'+
|
|
1669
|
-
'<td><code>'+shortId(c.call_id)+'</code></td>'+
|
|
1670
|
-
'<td>'+(esc(c.caller) || '-')+'</td>'+
|
|
1671
|
-
'<td>'+(esc(c.callee) || '-')+'</td>'+
|
|
1672
|
-
'<td>'+(esc(c.direction) || '-')+'</td>'+
|
|
1673
|
-
'<td data-started="'+(c.started_at || 0)+'">'+fmtDur(dur)+'</td>'+
|
|
1674
|
-
'<td>'+turns+'</td></tr>';
|
|
1675
|
-
}).join('');
|
|
1676
|
-
});
|
|
1677
|
-
}
|
|
1678
|
-
|
|
1679
|
-
function showCall(callId) {
|
|
1680
|
-
fetchJSON('/api/dashboard/calls/'+encodeURIComponent(callId)).then(function(c) {
|
|
1681
|
-
if (c.error) return;
|
|
1682
|
-
var m = c.metrics || {};
|
|
1683
|
-
var cost = m.cost || {};
|
|
1684
|
-
var latAvg = m.latency_avg || {};
|
|
1685
|
-
var latP95 = m.latency_p95 || {};
|
|
1686
|
-
var turns = m.turns || [];
|
|
1687
|
-
|
|
1688
|
-
var modeLabel = (m.provider_mode || '').replace(/_/g, ' ');
|
|
1689
|
-
var modeBadgeClass = (m.provider_mode || '').indexOf('pipeline') !== -1 ? 'badge-pipeline' : 'badge-realtime';
|
|
1690
|
-
_$('#modal-title').innerHTML = 'Call <code>'+shortId(c.call_id)+'</code> <span class="badge '+modeBadgeClass+'" style="font-size:10px">'+esc(modeLabel)+'</span>';
|
|
1691
|
-
|
|
1692
|
-
var isRealtime = (m.provider_mode || '').indexOf('realtime') !== -1;
|
|
1693
|
-
|
|
1694
|
-
var html = '<div class="detail-grid">'+
|
|
1695
|
-
'<div class="detail-card">'+
|
|
1696
|
-
'<h3>Overview</h3>'+
|
|
1697
|
-
'<div class="detail-row"><span class="k">Direction</span><span>'+(esc(c.direction) || '-')+'</span></div>'+
|
|
1698
|
-
'<div class="detail-row"><span class="k">From</span><span class="mono">'+(esc(c.caller) || '-')+'</span></div>'+
|
|
1699
|
-
'<div class="detail-row"><span class="k">To</span><span class="mono">'+(esc(c.callee) || '-')+'</span></div>'+
|
|
1700
|
-
'<div class="detail-row"><span class="k">Duration</span><span style="font-weight:600">'+fmtDur(m.duration_seconds)+'</span></div>'+
|
|
1701
|
-
(isRealtime ? '' :
|
|
1702
|
-
'<div class="detail-row"><span class="k">STT</span><span>'+(esc(m.stt_provider) || '-')+'</span></div>'+
|
|
1703
|
-
'<div class="detail-row"><span class="k">TTS</span><span>'+(esc(m.tts_provider) || '-')+'</span></div>'+
|
|
1704
|
-
'<div class="detail-row"><span class="k">LLM</span><span>'+(esc(m.llm_provider) || '-')+'</span></div>'
|
|
1705
|
-
)+
|
|
1706
|
-
'<div class="detail-row"><span class="k">Telephony</span><span>'+(esc(m.telephony_provider) || '-')+'</span></div>'+
|
|
1707
|
-
'</div>'+
|
|
1708
|
-
'<div class="detail-card">'+
|
|
1709
|
-
'<h3>Cost</h3>'+
|
|
1710
|
-
(isRealtime ?
|
|
1711
|
-
'<div class="detail-row"><span class="k">OpenAI</span><span class="cost">'+fmtCost(cost.llm || 0)+'</span></div>' :
|
|
1712
|
-
'<div class="detail-row"><span class="k">STT</span><span class="cost">'+fmtCost(cost.stt || 0)+'</span></div>'+
|
|
1713
|
-
'<div class="detail-row"><span class="k">LLM</span><span class="cost">'+fmtCost(cost.llm || 0)+'</span></div>'+
|
|
1714
|
-
'<div class="detail-row"><span class="k">TTS</span><span class="cost">'+fmtCost(cost.tts || 0)+'</span></div>'
|
|
1715
|
-
)+
|
|
1716
|
-
'<div class="detail-row"><span class="k">Telephony</span><span class="cost">'+fmtCost(cost.telephony || 0)+'</span></div>'+
|
|
1717
|
-
'<div class="detail-row detail-sep">'+
|
|
1718
|
-
'<span class="k" style="font-weight:600">Total</span><span class="cost" style="font-weight:700;font-size:14px">'+fmtCost(cost.total || 0)+'</span>'+
|
|
1719
|
-
'</div>'+
|
|
1720
|
-
'<h3 style="margin-top:16px">Latency <span style="font-weight:400;text-transform:none;letter-spacing:0;color:var(--muted)">(avg / p95)</span></h3>'+
|
|
1721
|
-
(isRealtime ? '' :
|
|
1722
|
-
'<div class="detail-row"><span class="k">STT</span><span class="latency">'+fmtMs(latAvg.stt_ms)+' / '+fmtMs(latP95.stt_ms)+'</span></div>'+
|
|
1723
|
-
'<div class="detail-row"><span class="k">LLM</span><span class="latency">'+fmtMs(latAvg.llm_ms)+' / '+fmtMs(latP95.llm_ms)+'</span></div>'+
|
|
1724
|
-
'<div class="detail-row"><span class="k">TTS</span><span class="latency">'+fmtMs(latAvg.tts_ms)+' / '+fmtMs(latP95.tts_ms)+'</span></div>'
|
|
1725
|
-
)+
|
|
1726
|
-
'<div class="detail-row"><span class="k">'+(isRealtime ? 'End-to-end' : 'Total')+'</span><span class="latency" style="font-weight:700;font-size:14px">'+fmtMs(latAvg.total_ms)+' / '+fmtMs(latP95.total_ms)+'</span></div>'+
|
|
1727
|
-
'</div></div>';
|
|
1728
|
-
|
|
1729
|
-
if (turns.length) {
|
|
1730
|
-
var maxMs = Math.max.apply(null, turns.map(function(t) {
|
|
1731
|
-
var l = t.latency || {};
|
|
1732
|
-
return (l.stt_ms||0) + (l.llm_ms||0) + (l.tts_ms||0) + (l.total_ms||0);
|
|
1733
|
-
}).concat([1]));
|
|
1734
|
-
html += '<div class="detail-card turns-table"><h3>Turns ('+turns.length+')</h3>'+
|
|
1735
|
-
'<table><thead><tr><th>#</th><th>User</th><th>Agent</th><th>Latency</th><th>Breakdown</th></tr></thead><tbody>';
|
|
1736
|
-
turns.forEach(function(t, i) {
|
|
1737
|
-
var l = t.latency || {};
|
|
1738
|
-
var total = l.total_ms || ((l.stt_ms||0) + (l.llm_ms||0) + (l.tts_ms||0));
|
|
1739
|
-
var scale = total > 0 ? 120 / maxMs : 0;
|
|
1740
|
-
var sttW = (l.stt_ms||0) * scale;
|
|
1741
|
-
var llmW = (l.llm_ms||0) * scale;
|
|
1742
|
-
var ttsW = (l.tts_ms||0) * scale;
|
|
1743
|
-
var totalW = total > 0 && sttW === 0 && llmW === 0 && ttsW === 0 ? total * scale : 0;
|
|
1744
|
-
html += '<tr>'+
|
|
1745
|
-
'<td>'+(t.turn_index !== undefined ? t.turn_index : i)+'</td>'+
|
|
1746
|
-
'<td title="'+esc(t.user_text||'')+'">'+esc((t.user_text||'').slice(0,40))+((t.user_text||'').length>40?'...':'')+'</td>'+
|
|
1747
|
-
'<td title="'+esc(t.agent_text||'')+'">'+esc((t.agent_text||'').slice(0,40))+((t.agent_text||'').length>40?'...':'')+'</td>'+
|
|
1748
|
-
'<td class="latency">'+fmtMs(total)+'</td>'+
|
|
1749
|
-
'<td><div class="bar-container">'+
|
|
1750
|
-
(sttW > 0 ? '<div class="bar-stt" style="width:'+sttW+'px" title="STT '+fmtMs(l.stt_ms)+'"></div>' : '')+
|
|
1751
|
-
(llmW > 0 ? '<div class="bar-llm" style="width:'+llmW+'px" title="LLM '+fmtMs(l.llm_ms)+'"></div>' : '')+
|
|
1752
|
-
(ttsW > 0 ? '<div class="bar-tts" style="width:'+ttsW+'px" title="TTS '+fmtMs(l.tts_ms)+'"></div>' : '')+
|
|
1753
|
-
(totalW > 0 ? '<div class="bar-llm" style="width:'+totalW+'px" title="Total '+fmtMs(total)+'"></div>' : '')+
|
|
1754
|
-
'</div></td></tr>';
|
|
1755
|
-
});
|
|
1756
|
-
html += '</tbody></table>'+
|
|
1757
|
-
'<div style="margin-top:10px;font-size:11px;color:var(--muted)">'+
|
|
1758
|
-
(isRealtime ?
|
|
1759
|
-
'<span style="color:var(--purple)">■</span> End-to-end' :
|
|
1760
|
-
'<span style="color:var(--blue)">■</span> STT '+
|
|
1761
|
-
'<span style="color:var(--purple)">■</span> LLM '+
|
|
1762
|
-
'<span style="color:var(--orange)">■</span> TTS'
|
|
1763
|
-
)+
|
|
1764
|
-
'</div></div>';
|
|
1765
|
-
}
|
|
1766
|
-
|
|
1767
|
-
var transcript = c.transcript || [];
|
|
1768
|
-
if (transcript.length) {
|
|
1769
|
-
html += '<div class="detail-card" style="margin-top:16px"><h3>Transcript</h3><div class="transcript-box">';
|
|
1770
|
-
transcript.forEach(function(msg) {
|
|
1771
|
-
var role = esc(msg.role || 'unknown');
|
|
1772
|
-
html += '<div class="msg '+role+'"><span class="role">'+role+'</span>'+esc(msg.text || '')+'</div>';
|
|
1773
|
-
});
|
|
1774
|
-
html += '</div></div>';
|
|
1574
|
+
init_esm_shims();
|
|
1575
|
+
import { readFileSync as readFileSync2 } from "fs";
|
|
1576
|
+
import { join as join2, dirname } from "path";
|
|
1577
|
+
var FALLBACK_HTML = `<!doctype html>
|
|
1578
|
+
<html><head><meta charset="utf-8"><title>Patter dashboard</title></head>
|
|
1579
|
+
<body style="font-family:ui-sans-serif,system-ui;padding:2rem;color:#1a1a1a">
|
|
1580
|
+
<h1>Dashboard asset missing</h1>
|
|
1581
|
+
<p>The bundled <code>ui.html</code> was not found alongside this module.
|
|
1582
|
+
Run <code>cd dashboard-app && npm run build && npm run sync</code>
|
|
1583
|
+
from the repo root to regenerate it.</p>
|
|
1584
|
+
</body></html>`;
|
|
1585
|
+
function loadDashboardHtml() {
|
|
1586
|
+
const here = typeof __dirname !== "undefined" ? __dirname : dirname(".");
|
|
1587
|
+
const candidates = [
|
|
1588
|
+
join2(here, "ui.html"),
|
|
1589
|
+
join2(here, "dashboard", "ui.html"),
|
|
1590
|
+
join2(here, "..", "dashboard", "ui.html")
|
|
1591
|
+
];
|
|
1592
|
+
for (const path3 of candidates) {
|
|
1593
|
+
try {
|
|
1594
|
+
return readFileSync2(path3, "utf8");
|
|
1595
|
+
} catch {
|
|
1775
1596
|
}
|
|
1776
|
-
|
|
1777
|
-
_$('#modal-body').innerHTML = html;
|
|
1778
|
-
_$('#modal').classList.add('open');
|
|
1779
|
-
});
|
|
1780
|
-
}
|
|
1781
|
-
|
|
1782
|
-
function closeModal() { _$('#modal').classList.remove('open'); }
|
|
1783
|
-
_$('#modal').addEventListener('click', function(e) { if (e.target === _$('#modal')) closeModal(); });
|
|
1784
|
-
document.addEventListener('keydown', function(e) { if (e.key === 'Escape') closeModal(); });
|
|
1785
|
-
|
|
1786
|
-
function refresh() {
|
|
1787
|
-
return Promise.all([refreshAggregates(), refreshCalls(), refreshActive()]).then(function() {
|
|
1788
|
-
_$('#status-text').textContent = 'Listening';
|
|
1789
|
-
}).catch(function() {
|
|
1790
|
-
_$('#status-text').textContent = 'Connection error';
|
|
1791
|
-
});
|
|
1792
|
-
}
|
|
1793
|
-
|
|
1794
|
-
refresh();
|
|
1795
|
-
|
|
1796
|
-
// Update active call durations every second
|
|
1797
|
-
setInterval(function() {
|
|
1798
|
-
var cells = document.querySelectorAll('#active-body td[data-started]');
|
|
1799
|
-
if (!cells.length) return;
|
|
1800
|
-
var now = Date.now() / 1000;
|
|
1801
|
-
cells.forEach(function(td) {
|
|
1802
|
-
var started = parseFloat(td.getAttribute('data-started'));
|
|
1803
|
-
if (started) td.textContent = fmtDur(Math.round(now - started));
|
|
1804
|
-
});
|
|
1805
|
-
}, 1000);
|
|
1806
|
-
|
|
1807
|
-
if (typeof EventSource !== 'undefined') {
|
|
1808
|
-
var sseUrl = '/api/dashboard/events';
|
|
1809
|
-
var sseBackoff = 1000;
|
|
1810
|
-
var sseFailures = 0;
|
|
1811
|
-
var SSE_MAX_BACKOFF = 30000;
|
|
1812
|
-
var SSE_MAX_FAILURES = 5;
|
|
1813
|
-
|
|
1814
|
-
function connectSSE() {
|
|
1815
|
-
var es = new EventSource(sseUrl);
|
|
1816
|
-
function onEvent() { sseBackoff = 1000; sseFailures = 0; }
|
|
1817
|
-
es.addEventListener('call_start', function() { onEvent(); refresh(); });
|
|
1818
|
-
es.addEventListener('turn_complete', function() { onEvent(); refreshAggregates(); });
|
|
1819
|
-
es.addEventListener('call_end', function() { onEvent(); refresh(); });
|
|
1820
|
-
es.onerror = function() {
|
|
1821
|
-
es.close();
|
|
1822
|
-
sseFailures++;
|
|
1823
|
-
if (sseFailures >= SSE_MAX_FAILURES) {
|
|
1824
|
-
_$('#status-text').textContent = 'Polling';
|
|
1825
|
-
setInterval(refresh, 5000);
|
|
1826
|
-
return;
|
|
1827
|
-
}
|
|
1828
|
-
_$('#status-text').textContent = 'Reconnecting...';
|
|
1829
|
-
setTimeout(connectSSE, sseBackoff);
|
|
1830
|
-
sseBackoff = Math.min(sseBackoff * 2, SSE_MAX_BACKOFF);
|
|
1831
|
-
};
|
|
1832
1597
|
}
|
|
1833
|
-
|
|
1834
|
-
} else {
|
|
1835
|
-
setInterval(refresh, 3000);
|
|
1598
|
+
return FALLBACK_HTML;
|
|
1836
1599
|
}
|
|
1837
|
-
|
|
1838
|
-
</body>
|
|
1839
|
-
</html>`;
|
|
1600
|
+
var DASHBOARD_HTML = loadDashboardHtml();
|
|
1840
1601
|
|
|
1841
1602
|
// src/dashboard/routes.ts
|
|
1842
1603
|
function mountDashboard(app, store, token = "") {
|
|
@@ -1996,6 +1757,7 @@ function mountApi(app, store, token = "") {
|
|
|
1996
1757
|
}
|
|
1997
1758
|
|
|
1998
1759
|
// src/remote-message.ts
|
|
1760
|
+
init_esm_shims();
|
|
1999
1761
|
import crypto2 from "crypto";
|
|
2000
1762
|
var MAX_RESPONSE_BYTES = 64 * 1024;
|
|
2001
1763
|
function validateWebSocketUrl(url) {
|
|
@@ -2200,43 +1962,99 @@ function isWebSocketUrl(url) {
|
|
|
2200
1962
|
return url.startsWith("ws://") || url.startsWith("wss://");
|
|
2201
1963
|
}
|
|
2202
1964
|
|
|
1965
|
+
// src/stream-handler.ts
|
|
1966
|
+
init_esm_shims();
|
|
1967
|
+
|
|
2203
1968
|
// src/providers/deepgram-stt.ts
|
|
1969
|
+
init_esm_shims();
|
|
2204
1970
|
import WebSocket3 from "ws";
|
|
2205
1971
|
|
|
2206
1972
|
// src/errors.ts
|
|
1973
|
+
init_esm_shims();
|
|
1974
|
+
var ErrorCode = {
|
|
1975
|
+
/** Invalid constructor args, missing required env var, frozen-config violation. */
|
|
1976
|
+
CONFIG: "CONFIG",
|
|
1977
|
+
/** WebSocket connect failure, HTTP 5xx from provider, network error. */
|
|
1978
|
+
CONNECTION: "CONNECTION",
|
|
1979
|
+
/** Provider rejected our credentials (HTTP 401/403, invalid signature). */
|
|
1980
|
+
AUTH: "AUTH",
|
|
1981
|
+
/** Provider response, voicemail post, or other awaited operation timed out. */
|
|
1982
|
+
TIMEOUT: "TIMEOUT",
|
|
1983
|
+
/** Provider returned HTTP 429. */
|
|
1984
|
+
RATE_LIMIT: "RATE_LIMIT",
|
|
1985
|
+
/** Twilio / Telnyx webhook signature verification failed. */
|
|
1986
|
+
WEBHOOK_VERIFICATION: "WEBHOOK_VERIFICATION",
|
|
1987
|
+
/** Caller passed a malformed phone number, tool arg, etc. */
|
|
1988
|
+
INPUT_VALIDATION: "INPUT_VALIDATION",
|
|
1989
|
+
/** Generic catch-all for unexpected upstream provider failures. */
|
|
1990
|
+
PROVIDER_ERROR: "PROVIDER_ERROR",
|
|
1991
|
+
/** Phone number provisioning, webhook configuration, or carrier setup failed. */
|
|
1992
|
+
PROVISION: "PROVISION",
|
|
1993
|
+
/** Assertion failed / unexpected internal state. Likely a Patter bug. */
|
|
1994
|
+
INTERNAL: "INTERNAL"
|
|
1995
|
+
};
|
|
2207
1996
|
var PatterError = class extends Error {
|
|
2208
|
-
|
|
1997
|
+
/** Stable, machine-readable error code. Subclasses set the default. */
|
|
1998
|
+
code;
|
|
1999
|
+
constructor(message, options) {
|
|
2209
2000
|
super(message);
|
|
2210
2001
|
this.name = "PatterError";
|
|
2002
|
+
this.code = options?.code ?? ErrorCode.INTERNAL;
|
|
2211
2003
|
}
|
|
2212
2004
|
};
|
|
2213
2005
|
var PatterConnectionError = class extends PatterError {
|
|
2214
|
-
constructor(message) {
|
|
2215
|
-
super(message);
|
|
2006
|
+
constructor(message, options) {
|
|
2007
|
+
super(message, { code: options?.code ?? ErrorCode.CONNECTION });
|
|
2216
2008
|
this.name = "PatterConnectionError";
|
|
2217
2009
|
}
|
|
2218
2010
|
};
|
|
2219
2011
|
var AuthenticationError = class extends PatterError {
|
|
2220
|
-
constructor(message) {
|
|
2221
|
-
super(message);
|
|
2012
|
+
constructor(message, options) {
|
|
2013
|
+
super(message, { code: options?.code ?? ErrorCode.AUTH });
|
|
2222
2014
|
this.name = "AuthenticationError";
|
|
2223
2015
|
}
|
|
2224
2016
|
};
|
|
2225
2017
|
var ProvisionError = class extends PatterError {
|
|
2226
|
-
constructor(message) {
|
|
2227
|
-
super(message);
|
|
2018
|
+
constructor(message, options) {
|
|
2019
|
+
super(message, { code: options?.code ?? ErrorCode.PROVISION });
|
|
2228
2020
|
this.name = "ProvisionError";
|
|
2229
2021
|
}
|
|
2230
2022
|
};
|
|
2231
2023
|
var RateLimitError = class extends PatterConnectionError {
|
|
2232
|
-
constructor(message) {
|
|
2233
|
-
super(message);
|
|
2024
|
+
constructor(message, options) {
|
|
2025
|
+
super(message, { code: options?.code ?? ErrorCode.RATE_LIMIT });
|
|
2234
2026
|
this.name = "RateLimitError";
|
|
2235
2027
|
}
|
|
2236
2028
|
};
|
|
2237
2029
|
|
|
2238
2030
|
// src/providers/deepgram-stt.ts
|
|
2239
2031
|
var DEEPGRAM_WS_URL = "wss://api.deepgram.com/v1/listen";
|
|
2032
|
+
var DeepgramModel = {
|
|
2033
|
+
NOVA_3: "nova-3",
|
|
2034
|
+
NOVA_2: "nova-2",
|
|
2035
|
+
NOVA_2_PHONECALL: "nova-2-phonecall",
|
|
2036
|
+
NOVA_2_GENERAL: "nova-2-general",
|
|
2037
|
+
NOVA_2_MEETING: "nova-2-meeting",
|
|
2038
|
+
NOVA: "nova",
|
|
2039
|
+
ENHANCED: "enhanced",
|
|
2040
|
+
BASE: "base"
|
|
2041
|
+
};
|
|
2042
|
+
var DeepgramEncoding = {
|
|
2043
|
+
LINEAR16: "linear16",
|
|
2044
|
+
MULAW: "mulaw",
|
|
2045
|
+
ALAW: "alaw",
|
|
2046
|
+
OPUS: "opus",
|
|
2047
|
+
FLAC: "flac",
|
|
2048
|
+
AMR_NB: "amr-nb",
|
|
2049
|
+
AMR_WB: "amr-wb"
|
|
2050
|
+
};
|
|
2051
|
+
var DeepgramSampleRate = {
|
|
2052
|
+
HZ_8000: 8e3,
|
|
2053
|
+
HZ_16000: 16e3,
|
|
2054
|
+
HZ_24000: 24e3,
|
|
2055
|
+
HZ_44100: 44100,
|
|
2056
|
+
HZ_48000: 48e3
|
|
2057
|
+
};
|
|
2240
2058
|
var KEEPALIVE_INTERVAL_MS = 4e3;
|
|
2241
2059
|
var FINALIZE_DRAIN_MS = 100;
|
|
2242
2060
|
var CLOSE_LATENCY_BUDGET_MS = 500;
|
|
@@ -2264,9 +2082,9 @@ var DeepgramSTT = class _DeepgramSTT {
|
|
|
2264
2082
|
this.apiKey = apiKey;
|
|
2265
2083
|
const opts = typeof languageOrOptions === "object" && languageOrOptions !== null ? languageOrOptions : options ?? {};
|
|
2266
2084
|
this.language = (typeof languageOrOptions === "string" ? languageOrOptions : opts.language) ?? "en";
|
|
2267
|
-
this.model = model ?? opts.model ??
|
|
2268
|
-
this.encoding = encoding ?? opts.encoding ??
|
|
2269
|
-
this.sampleRate = sampleRate ?? opts.sampleRate ??
|
|
2085
|
+
this.model = model ?? opts.model ?? DeepgramModel.NOVA_3;
|
|
2086
|
+
this.encoding = encoding ?? opts.encoding ?? DeepgramEncoding.LINEAR16;
|
|
2087
|
+
this.sampleRate = sampleRate ?? opts.sampleRate ?? DeepgramSampleRate.HZ_16000;
|
|
2270
2088
|
this.endpointingMs = opts.endpointingMs ?? 150;
|
|
2271
2089
|
this.utteranceEndMs = opts.utteranceEndMs === null ? null : opts.utteranceEndMs ?? 1e3;
|
|
2272
2090
|
this.smartFormat = opts.smartFormat ?? false;
|
|
@@ -2274,8 +2092,15 @@ var DeepgramSTT = class _DeepgramSTT {
|
|
|
2274
2092
|
this.vadEvents = opts.vadEvents ?? true;
|
|
2275
2093
|
}
|
|
2276
2094
|
/** Factory for Twilio calls — mulaw 8 kHz. Forwards tuning options through. */
|
|
2277
|
-
static forTwilio(apiKey, language = "en", model =
|
|
2278
|
-
return new _DeepgramSTT(
|
|
2095
|
+
static forTwilio(apiKey, language = "en", model = DeepgramModel.NOVA_3, options = {}) {
|
|
2096
|
+
return new _DeepgramSTT(
|
|
2097
|
+
apiKey,
|
|
2098
|
+
language,
|
|
2099
|
+
model,
|
|
2100
|
+
DeepgramEncoding.MULAW,
|
|
2101
|
+
DeepgramSampleRate.HZ_8000,
|
|
2102
|
+
options
|
|
2103
|
+
);
|
|
2279
2104
|
}
|
|
2280
2105
|
buildUrl() {
|
|
2281
2106
|
const params = new URLSearchParams({
|
|
@@ -2295,6 +2120,7 @@ var DeepgramSTT = class _DeepgramSTT {
|
|
|
2295
2120
|
}
|
|
2296
2121
|
return `${DEEPGRAM_WS_URL}?${params.toString()}`;
|
|
2297
2122
|
}
|
|
2123
|
+
/** Open the streaming WebSocket and arm message + keepalive handlers. */
|
|
2298
2124
|
async connect() {
|
|
2299
2125
|
await this.openSocket();
|
|
2300
2126
|
this.running = true;
|
|
@@ -2360,6 +2186,18 @@ var DeepgramSTT = class _DeepgramSTT {
|
|
|
2360
2186
|
} catch {
|
|
2361
2187
|
return;
|
|
2362
2188
|
}
|
|
2189
|
+
const dataType = String(data.type ?? "unknown");
|
|
2190
|
+
if (dataType === "Results") {
|
|
2191
|
+
const transcript2 = (data.channel?.alternatives?.[0]?.transcript ?? "").trim();
|
|
2192
|
+
const isFinal = Boolean(data.is_final);
|
|
2193
|
+
const speechFinal2 = Boolean(data.speech_final);
|
|
2194
|
+
const fromFinalize = Boolean(data.from_finalize);
|
|
2195
|
+
getLogger().info(
|
|
2196
|
+
`[DIAG] DG Results text=${JSON.stringify(transcript2.slice(0, 60))} isFinal=${isFinal} speechFinal=${speechFinal2} fromFinalize=${fromFinalize}`
|
|
2197
|
+
);
|
|
2198
|
+
} else if (dataType !== "Metadata") {
|
|
2199
|
+
getLogger().info(`[DIAG] DG event type=${dataType}`);
|
|
2200
|
+
}
|
|
2363
2201
|
if (data.type === "Metadata" && data.request_id) {
|
|
2364
2202
|
this.requestId = data.request_id;
|
|
2365
2203
|
return;
|
|
@@ -2444,23 +2282,71 @@ var DeepgramSTT = class _DeepgramSTT {
|
|
|
2444
2282
|
this.running = false;
|
|
2445
2283
|
}
|
|
2446
2284
|
}
|
|
2285
|
+
/** Send a binary audio chunk to Deepgram for transcription. */
|
|
2447
2286
|
sendAudio(audio) {
|
|
2448
|
-
if (!this.ws || this.ws.readyState !== WebSocket3.OPEN)
|
|
2287
|
+
if (!this.ws || this.ws.readyState !== WebSocket3.OPEN) {
|
|
2288
|
+
this.audioDroppedCount++;
|
|
2289
|
+
if (this.audioDroppedCount === 1 || this.audioDroppedCount % 50 === 0) {
|
|
2290
|
+
getLogger().info(
|
|
2291
|
+
`[DIAG] DeepgramSTT.sendAudio dropped (ws state=${this.ws?.readyState ?? "null"}) \u2014 total dropped=${this.audioDroppedCount}`
|
|
2292
|
+
);
|
|
2293
|
+
}
|
|
2294
|
+
return;
|
|
2295
|
+
}
|
|
2449
2296
|
if (audio.length === 0) return;
|
|
2297
|
+
this.audioSentCount++;
|
|
2298
|
+
if (this.audioSentCount === 1 || this.audioSentCount % 100 === 0) {
|
|
2299
|
+
getLogger().info(
|
|
2300
|
+
`[DIAG] DeepgramSTT.sendAudio: total chunks sent=${this.audioSentCount} (last=${audio.length} bytes)`
|
|
2301
|
+
);
|
|
2302
|
+
}
|
|
2450
2303
|
this.ws.send(audio);
|
|
2451
2304
|
}
|
|
2305
|
+
audioSentCount = 0;
|
|
2306
|
+
audioDroppedCount = 0;
|
|
2307
|
+
/** Register a transcript listener. */
|
|
2452
2308
|
onTranscript(callback) {
|
|
2453
2309
|
this.transcriptCallbacks.add(callback);
|
|
2454
2310
|
}
|
|
2311
|
+
/** Remove a previously registered transcript listener. */
|
|
2455
2312
|
offTranscript(callback) {
|
|
2456
2313
|
this.transcriptCallbacks.delete(callback);
|
|
2457
2314
|
}
|
|
2315
|
+
/** Register an error listener for socket / API failures. */
|
|
2458
2316
|
onError(callback) {
|
|
2459
2317
|
this.errorCallbacks.add(callback);
|
|
2460
2318
|
}
|
|
2319
|
+
/** Remove a previously registered error listener. */
|
|
2461
2320
|
offError(callback) {
|
|
2462
2321
|
this.errorCallbacks.delete(callback);
|
|
2463
2322
|
}
|
|
2323
|
+
/**
|
|
2324
|
+
* Force Deepgram to immediately emit a final ``Results`` frame for the
|
|
2325
|
+
* in-flight utterance, rather than waiting for its own endpoint
|
|
2326
|
+
* heuristic (utterance_end_ms ~1 s + natural-pause endpointing).
|
|
2327
|
+
* Called by the SDK on VAD ``speech_end`` and after barge-in cancel —
|
|
2328
|
+
* both moments where the SDK already knows the user has stopped
|
|
2329
|
+
* speaking and waiting for Deepgram's own endpointing only adds
|
|
2330
|
+
* dead air.
|
|
2331
|
+
*
|
|
2332
|
+
* Idempotent: safe to call when the socket is closed/closing.
|
|
2333
|
+
*/
|
|
2334
|
+
finalize() {
|
|
2335
|
+
const ws = this.ws;
|
|
2336
|
+
if (!ws || ws.readyState !== WebSocket3.OPEN) {
|
|
2337
|
+
getLogger().info(
|
|
2338
|
+
`[DIAG] DeepgramSTT.finalize SKIPPED (ws state=${ws?.readyState ?? "null"})`
|
|
2339
|
+
);
|
|
2340
|
+
return;
|
|
2341
|
+
}
|
|
2342
|
+
try {
|
|
2343
|
+
ws.send(JSON.stringify({ type: "Finalize" }));
|
|
2344
|
+
getLogger().info("[DIAG] DeepgramSTT.finalize sent {type:Finalize}");
|
|
2345
|
+
} catch (err) {
|
|
2346
|
+
getLogger().info(`[DIAG] DeepgramSTT.finalize send failed: ${String(err)}`);
|
|
2347
|
+
}
|
|
2348
|
+
}
|
|
2349
|
+
/** Send Finalize, briefly drain trailing transcripts, then close the socket. */
|
|
2464
2350
|
close() {
|
|
2465
2351
|
this.running = false;
|
|
2466
2352
|
this.clearKeepalive();
|
|
@@ -2492,6 +2378,7 @@ var DeepgramSTT = class _DeepgramSTT {
|
|
|
2492
2378
|
};
|
|
2493
2379
|
|
|
2494
2380
|
// src/metrics.ts
|
|
2381
|
+
init_esm_shims();
|
|
2495
2382
|
function round(value, decimals) {
|
|
2496
2383
|
const factor = 10 ** decimals;
|
|
2497
2384
|
return Math.round(value * factor) / factor;
|
|
@@ -2518,6 +2405,14 @@ var CallMetricsAccumulator = class {
|
|
|
2518
2405
|
sttProvider;
|
|
2519
2406
|
ttsProvider;
|
|
2520
2407
|
llmProvider;
|
|
2408
|
+
/**
|
|
2409
|
+
* Model identifiers for per-model rate resolution (see pricing.ts). Empty
|
|
2410
|
+
* string means "not known" → cost calc falls back to provider defaults,
|
|
2411
|
+
* matching pre-2026.3 behaviour.
|
|
2412
|
+
*/
|
|
2413
|
+
sttModel;
|
|
2414
|
+
ttsModel;
|
|
2415
|
+
realtimeModel;
|
|
2521
2416
|
_pricing;
|
|
2522
2417
|
_callStart;
|
|
2523
2418
|
_turns = [];
|
|
@@ -2579,6 +2474,9 @@ var CallMetricsAccumulator = class {
|
|
|
2579
2474
|
this.sttProvider = opts.sttProvider ?? "";
|
|
2580
2475
|
this.ttsProvider = opts.ttsProvider ?? "";
|
|
2581
2476
|
this.llmProvider = opts.llmProvider ?? "";
|
|
2477
|
+
this.sttModel = opts.sttModel ?? "";
|
|
2478
|
+
this.ttsModel = opts.ttsModel ?? "";
|
|
2479
|
+
this.realtimeModel = opts.realtimeModel ?? "";
|
|
2582
2480
|
this._pricing = mergePricing(opts.pricing);
|
|
2583
2481
|
this._callStart = hrTimeMs();
|
|
2584
2482
|
this._eventBus = opts.eventBus;
|
|
@@ -2601,6 +2499,7 @@ var CallMetricsAccumulator = class {
|
|
|
2601
2499
|
get turnActive() {
|
|
2602
2500
|
return this._turnStart !== null;
|
|
2603
2501
|
}
|
|
2502
|
+
/** Begin a new turn — stamps the turn start timestamp and resets per-turn state. */
|
|
2604
2503
|
startTurn() {
|
|
2605
2504
|
this._turnStart = hrTimeMs();
|
|
2606
2505
|
this._sttComplete = null;
|
|
@@ -2631,6 +2530,7 @@ var CallMetricsAccumulator = class {
|
|
|
2631
2530
|
this.startTurn();
|
|
2632
2531
|
}
|
|
2633
2532
|
}
|
|
2533
|
+
/** Stamp end-of-STT, capture the user's transcript, and accrue billed STT seconds. */
|
|
2634
2534
|
recordSttComplete(text, audioSeconds = 0) {
|
|
2635
2535
|
this._sttComplete = hrTimeMs();
|
|
2636
2536
|
this._sttFinalAt = this._sttComplete;
|
|
@@ -2640,11 +2540,30 @@ var CallMetricsAccumulator = class {
|
|
|
2640
2540
|
this._turnUserText = text;
|
|
2641
2541
|
this._turnSttAudioSeconds = audioSeconds;
|
|
2642
2542
|
this._totalSttAudioSeconds += audioSeconds;
|
|
2543
|
+
if (this._eventBus) {
|
|
2544
|
+
const valueSec = this._turnStart !== null ? (this._sttComplete - this._turnStart) / 1e3 : 0;
|
|
2545
|
+
const payload = {
|
|
2546
|
+
timestamp: Date.now() / 1e3,
|
|
2547
|
+
processor: "stt",
|
|
2548
|
+
model: null,
|
|
2549
|
+
value: valueSec
|
|
2550
|
+
};
|
|
2551
|
+
this._eventBus.emit("stt_metrics", payload);
|
|
2552
|
+
}
|
|
2643
2553
|
}
|
|
2644
2554
|
/** Record the timestamp of the first LLM token (TTFT). No-op after first call. */
|
|
2645
2555
|
recordLlmFirstToken() {
|
|
2646
2556
|
if (this._llmFirstToken === null) {
|
|
2647
2557
|
this._llmFirstToken = hrTimeMs();
|
|
2558
|
+
if (this._eventBus && this._sttComplete !== null && (!this._reportOnlyInitialTtfb || !this._initialTtfbEmitted)) {
|
|
2559
|
+
const payload = {
|
|
2560
|
+
timestamp: Date.now() / 1e3,
|
|
2561
|
+
processor: "llm",
|
|
2562
|
+
model: null,
|
|
2563
|
+
value: (this._llmFirstToken - this._sttComplete) / 1e3
|
|
2564
|
+
};
|
|
2565
|
+
this._eventBus.emit("llm_metrics", payload);
|
|
2566
|
+
}
|
|
2648
2567
|
}
|
|
2649
2568
|
}
|
|
2650
2569
|
/**
|
|
@@ -2658,9 +2577,11 @@ var CallMetricsAccumulator = class {
|
|
|
2658
2577
|
this._llmFirstSentenceComplete = hrTimeMs();
|
|
2659
2578
|
}
|
|
2660
2579
|
}
|
|
2580
|
+
/** Stamp end-of-LLM (last token received). */
|
|
2661
2581
|
recordLlmComplete() {
|
|
2662
2582
|
this._llmComplete = hrTimeMs();
|
|
2663
2583
|
}
|
|
2584
|
+
/** Stamp first TTS audio byte sent on the wire (used to compute TTS TTFB). */
|
|
2664
2585
|
recordTtsFirstByte() {
|
|
2665
2586
|
if (this._ttsFirstByte === null) {
|
|
2666
2587
|
this._ttsFirstByte = hrTimeMs();
|
|
@@ -2669,7 +2590,20 @@ var CallMetricsAccumulator = class {
|
|
|
2669
2590
|
return;
|
|
2670
2591
|
}
|
|
2671
2592
|
this._initialTtfbEmitted = true;
|
|
2593
|
+
if (this._eventBus && this._ttsFirstByte !== null) {
|
|
2594
|
+
const ttsRef = this._llmFirstSentenceComplete !== null ? this._llmFirstSentenceComplete : this._llmComplete;
|
|
2595
|
+
if (ttsRef !== null) {
|
|
2596
|
+
const payload = {
|
|
2597
|
+
timestamp: Date.now() / 1e3,
|
|
2598
|
+
processor: "tts",
|
|
2599
|
+
model: null,
|
|
2600
|
+
value: (this._ttsFirstByte - ttsRef) / 1e3
|
|
2601
|
+
};
|
|
2602
|
+
this._eventBus.emit("tts_metrics", payload);
|
|
2603
|
+
}
|
|
2604
|
+
}
|
|
2672
2605
|
}
|
|
2606
|
+
/** Record final TTS text length and stamp the last-byte timestamp. */
|
|
2673
2607
|
recordTtsComplete(text) {
|
|
2674
2608
|
this._totalTtsCharacters += text.length;
|
|
2675
2609
|
if (this._ttsLastByte === null) {
|
|
@@ -2700,6 +2634,7 @@ var CallMetricsAccumulator = class {
|
|
|
2700
2634
|
recordTtsStopped(ts) {
|
|
2701
2635
|
this._bargeinStoppedAt = ts ?? hrTimeMs();
|
|
2702
2636
|
}
|
|
2637
|
+
/** Close the current turn cleanly and append a `TurnMetrics` record. */
|
|
2703
2638
|
recordTurnComplete(agentText) {
|
|
2704
2639
|
const latency = this._computeTurnLatency();
|
|
2705
2640
|
const turn = {
|
|
@@ -2717,6 +2652,7 @@ var CallMetricsAccumulator = class {
|
|
|
2717
2652
|
this._eventBus?.emit("metrics_collected", { callId: this.callId, turn });
|
|
2718
2653
|
return turn;
|
|
2719
2654
|
}
|
|
2655
|
+
/** Close the current turn as interrupted (barge-in) and return the recorded metrics. */
|
|
2720
2656
|
recordTurnInterrupted() {
|
|
2721
2657
|
if (this._turnStart === null) return null;
|
|
2722
2658
|
const latency = this._computeTurnLatency();
|
|
@@ -2782,6 +2718,7 @@ var CallMetricsAccumulator = class {
|
|
|
2782
2718
|
* ``transcriptionDelay`` = turnCommitted − vadStopped (ms)
|
|
2783
2719
|
* ``onUserTurnCompletedDelay`` = caller-supplied delta (ms) or 0
|
|
2784
2720
|
*/
|
|
2721
|
+
/** Emit `EOUMetrics` once VAD-stop, STT-final, and turn-committed timestamps are all known. */
|
|
2785
2722
|
emitEouMetrics() {
|
|
2786
2723
|
if (this._vadStoppedAt === null || this._sttFinalAt === null || this._turnCommittedAt === null) {
|
|
2787
2724
|
return;
|
|
@@ -2832,16 +2769,32 @@ var CallMetricsAccumulator = class {
|
|
|
2832
2769
|
this._eventBus?.emit("interruption", payload);
|
|
2833
2770
|
}
|
|
2834
2771
|
// ---- Usage tracking ----
|
|
2772
|
+
/** Accumulate inbound STT audio bytes for cost calculation when seconds are unknown. */
|
|
2835
2773
|
addSttAudioBytes(byteCount) {
|
|
2836
2774
|
this._sttByteCount += byteCount;
|
|
2837
2775
|
}
|
|
2838
|
-
|
|
2839
|
-
|
|
2840
|
-
|
|
2776
|
+
/**
|
|
2777
|
+
* Record an OpenAI Realtime usage payload and roll up its cost + cached-savings.
|
|
2778
|
+
*
|
|
2779
|
+
* `model` allows the cost calc to pick the per-model rate (e.g.
|
|
2780
|
+
* `gpt-realtime-2`). Defaults to whatever was supplied at construction
|
|
2781
|
+
* time (`this.realtimeModel`); pass an explicit value to override per-call
|
|
2782
|
+
* (the `response.done` payload carries the model used).
|
|
2783
|
+
*/
|
|
2784
|
+
recordRealtimeUsage(usage, model) {
|
|
2785
|
+
const resolvedModel = model || this.realtimeModel || null;
|
|
2786
|
+
this._totalRealtimeCost += calculateRealtimeCost(usage, this._pricing, resolvedModel);
|
|
2787
|
+
this._totalRealtimeCachedSavings += calculateRealtimeCachedSavings(
|
|
2788
|
+
usage,
|
|
2789
|
+
this._pricing,
|
|
2790
|
+
resolvedModel
|
|
2791
|
+
);
|
|
2841
2792
|
}
|
|
2793
|
+
/** Override the carrier-billed telephony cost (e.g. exact value reported via Twilio API). */
|
|
2842
2794
|
setActualTelephonyCost(cost) {
|
|
2843
2795
|
this._actualTelephonyCost = cost;
|
|
2844
2796
|
}
|
|
2797
|
+
/** Override the provider-billed STT cost when an exact figure is available. */
|
|
2845
2798
|
setActualSttCost(cost) {
|
|
2846
2799
|
this._actualSttCost = cost;
|
|
2847
2800
|
}
|
|
@@ -2869,6 +2822,7 @@ var CallMetricsAccumulator = class {
|
|
|
2869
2822
|
);
|
|
2870
2823
|
}
|
|
2871
2824
|
// ---- Finalize ----
|
|
2825
|
+
/** Finalize the call: flush any in-flight turn, compute aggregates, and return `CallMetrics`. */
|
|
2872
2826
|
endCall() {
|
|
2873
2827
|
const duration = (hrTimeMs() - this._callStart) / 1e3;
|
|
2874
2828
|
if (this.turnActive) {
|
|
@@ -2902,6 +2856,7 @@ var CallMetricsAccumulator = class {
|
|
|
2902
2856
|
this._eventBus?.emit("call_ended", { callId: this.callId, metrics });
|
|
2903
2857
|
return metrics;
|
|
2904
2858
|
}
|
|
2859
|
+
/** Return the cost breakdown for the call so far without ending it. */
|
|
2905
2860
|
getCostSoFar() {
|
|
2906
2861
|
const duration = (hrTimeMs() - this._callStart) / 1e3;
|
|
2907
2862
|
return this._computeCost(duration);
|
|
@@ -2962,6 +2917,10 @@ var CallMetricsAccumulator = class {
|
|
|
2962
2917
|
if (ttsTotalRef !== null && this._ttsLastByte !== null) {
|
|
2963
2918
|
tts_total_ms = Math.max(0, this._ttsLastByte - ttsTotalRef);
|
|
2964
2919
|
}
|
|
2920
|
+
let agent_response_ms;
|
|
2921
|
+
if (endpoint_ms !== void 0 && llm_ttft_ms !== void 0 && tts_ms > 0) {
|
|
2922
|
+
agent_response_ms = round(endpoint_ms + llm_ttft_ms + tts_ms, 1);
|
|
2923
|
+
}
|
|
2965
2924
|
return {
|
|
2966
2925
|
stt_ms: round(stt_ms, 1),
|
|
2967
2926
|
llm_ms: round(llm_ms, 1),
|
|
@@ -2971,7 +2930,8 @@ var CallMetricsAccumulator = class {
|
|
|
2971
2930
|
total_ms: round(total_ms, 1),
|
|
2972
2931
|
...endpoint_ms !== void 0 ? { endpoint_ms: round(endpoint_ms, 1) } : {},
|
|
2973
2932
|
...bargein_ms !== void 0 ? { bargein_ms: round(bargein_ms, 1) } : {},
|
|
2974
|
-
...tts_total_ms !== void 0 ? { tts_total_ms: round(tts_total_ms, 1) } : {}
|
|
2933
|
+
...tts_total_ms !== void 0 ? { tts_total_ms: round(tts_total_ms, 1) } : {},
|
|
2934
|
+
...agent_response_ms !== void 0 ? { agent_response_ms } : {}
|
|
2975
2935
|
};
|
|
2976
2936
|
}
|
|
2977
2937
|
_computeCost(durationSeconds) {
|
|
@@ -2987,8 +2947,18 @@ var CallMetricsAccumulator = class {
|
|
|
2987
2947
|
tts = 0;
|
|
2988
2948
|
llm = 0;
|
|
2989
2949
|
} else {
|
|
2990
|
-
stt = this._actualSttCost !== null ? this._actualSttCost : calculateSttCost(
|
|
2991
|
-
|
|
2950
|
+
stt = this._actualSttCost !== null ? this._actualSttCost : calculateSttCost(
|
|
2951
|
+
this.sttProvider,
|
|
2952
|
+
this._totalSttAudioSeconds,
|
|
2953
|
+
this._pricing,
|
|
2954
|
+
this.sttModel || null
|
|
2955
|
+
);
|
|
2956
|
+
tts = calculateTtsCost(
|
|
2957
|
+
this.ttsProvider,
|
|
2958
|
+
this._totalTtsCharacters,
|
|
2959
|
+
this._pricing,
|
|
2960
|
+
this.ttsModel || null
|
|
2961
|
+
);
|
|
2992
2962
|
llm = this._totalLlmCost;
|
|
2993
2963
|
}
|
|
2994
2964
|
const telephony = this._actualTelephonyCost !== null ? this._actualTelephonyCost : calculateTelephonyCost(this.telephonyProvider, durationSeconds, this._pricing);
|
|
@@ -3074,7 +3044,8 @@ var CallMetricsAccumulator = class {
|
|
|
3074
3044
|
}
|
|
3075
3045
|
};
|
|
3076
3046
|
|
|
3077
|
-
// src/transcoding.ts
|
|
3047
|
+
// src/audio/transcoding.ts
|
|
3048
|
+
init_esm_shims();
|
|
3078
3049
|
var MULAW_TO_PCM16_TABLE = (() => {
|
|
3079
3050
|
const table = new Int16Array(256);
|
|
3080
3051
|
for (let i = 0; i < 256; i++) {
|
|
@@ -3189,9 +3160,9 @@ var StatefulResampler = class {
|
|
|
3189
3160
|
throw new Error("StatefulResampler: only mono (channels=1) is supported");
|
|
3190
3161
|
}
|
|
3191
3162
|
const key = `${this.srcRate}->${this.dstRate}`;
|
|
3192
|
-
if (key !== "16000->8000" && key !== "8000->16000" && key !== "24000->16000") {
|
|
3163
|
+
if (key !== "16000->8000" && key !== "8000->16000" && key !== "24000->16000" && key !== "24000->8000") {
|
|
3193
3164
|
throw new Error(
|
|
3194
|
-
`StatefulResampler: unsupported conversion ${key}. Supported: 16000->8000, 8000->16000, 24000->16000`
|
|
3165
|
+
`StatefulResampler: unsupported conversion ${key}. Supported: 16000->8000, 8000->16000, 24000->16000, 24000->8000`
|
|
3195
3166
|
);
|
|
3196
3167
|
}
|
|
3197
3168
|
}
|
|
@@ -3211,6 +3182,9 @@ var StatefulResampler = class {
|
|
|
3211
3182
|
if (this.srcRate === 8e3 && this.dstRate === 16e3) {
|
|
3212
3183
|
return this._upsample8kTo16k(aligned);
|
|
3213
3184
|
}
|
|
3185
|
+
if (this.srcRate === 24e3 && this.dstRate === 8e3) {
|
|
3186
|
+
return this._resample24kTo8k(aligned);
|
|
3187
|
+
}
|
|
3214
3188
|
return this._resample24kTo16k(aligned);
|
|
3215
3189
|
}
|
|
3216
3190
|
/**
|
|
@@ -3356,7 +3330,7 @@ var StatefulResampler = class {
|
|
|
3356
3330
|
return outBuf;
|
|
3357
3331
|
}
|
|
3358
3332
|
// ---------------------------------------------------------------------------
|
|
3359
|
-
// Private: 24 kHz → 16 kHz
|
|
3333
|
+
// Private: 24 kHz → 16 kHz / 8 kHz
|
|
3360
3334
|
// ---------------------------------------------------------------------------
|
|
3361
3335
|
/**
|
|
3362
3336
|
* 3:2 linear-interpolation decimator (ratio srcRate/dstRate = 1.5).
|
|
@@ -3367,6 +3341,14 @@ var StatefulResampler = class {
|
|
|
3367
3341
|
* handled using `resample24Last`.
|
|
3368
3342
|
*/
|
|
3369
3343
|
_resample24kTo16k(buf) {
|
|
3344
|
+
return this._resample24kStep(buf, 24e3 / 16e3);
|
|
3345
|
+
}
|
|
3346
|
+
/** 3:1 decimation — collapses the 24k→16k→8k chain into a single step. */
|
|
3347
|
+
_resample24kTo8k(buf) {
|
|
3348
|
+
return this._resample24kStep(buf, 24e3 / 8e3);
|
|
3349
|
+
}
|
|
3350
|
+
/** Shared phase-stepping resampler used by 24→16 (step 1.5) and 24→8 (step 3). */
|
|
3351
|
+
_resample24kStep(buf, step) {
|
|
3370
3352
|
const sampleCount = buf.length >> 1;
|
|
3371
3353
|
if (sampleCount === 0) return Buffer.alloc(0);
|
|
3372
3354
|
const outArr = [];
|
|
@@ -3386,7 +3368,7 @@ var StatefulResampler = class {
|
|
|
3386
3368
|
}
|
|
3387
3369
|
const interp = Math.round(s0 + (s1 - s0) * frac);
|
|
3388
3370
|
outArr.push(Math.max(-32768, Math.min(32767, interp)));
|
|
3389
|
-
phase +=
|
|
3371
|
+
phase += step;
|
|
3390
3372
|
}
|
|
3391
3373
|
this.resample24Last = buf.readInt16LE((sampleCount - 1) * 2);
|
|
3392
3374
|
this.resample24HasHistory = true;
|
|
@@ -3405,6 +3387,9 @@ function createResampler8kTo16k() {
|
|
|
3405
3387
|
function createResampler24kTo16k() {
|
|
3406
3388
|
return new StatefulResampler({ srcRate: 24e3, dstRate: 16e3 });
|
|
3407
3389
|
}
|
|
3390
|
+
function createResampler24kTo8k() {
|
|
3391
|
+
return new StatefulResampler({ srcRate: 24e3, dstRate: 8e3 });
|
|
3392
|
+
}
|
|
3408
3393
|
var _warnedResample8kTo16k = false;
|
|
3409
3394
|
var _warnedResample16kTo8k = false;
|
|
3410
3395
|
var _warnedResample24kTo16k = false;
|
|
@@ -3458,6 +3443,7 @@ function resample24kTo16k(pcm24k) {
|
|
|
3458
3443
|
}
|
|
3459
3444
|
|
|
3460
3445
|
// src/handler-utils.ts
|
|
3446
|
+
init_esm_shims();
|
|
3461
3447
|
function createHistoryManager(maxSize) {
|
|
3462
3448
|
const entries = [];
|
|
3463
3449
|
const push = (entry) => {
|
|
@@ -3467,59 +3453,239 @@ function createHistoryManager(maxSize) {
|
|
|
3467
3453
|
const getHistory = () => [...entries];
|
|
3468
3454
|
return { push, getHistory, entries };
|
|
3469
3455
|
}
|
|
3470
|
-
|
|
3471
|
-
|
|
3472
|
-
|
|
3473
|
-
|
|
3474
|
-
|
|
3475
|
-
|
|
3476
|
-
|
|
3456
|
+
|
|
3457
|
+
// src/tools/mcp-client.ts
|
|
3458
|
+
init_esm_shims();
|
|
3459
|
+
function resolveConfig(input, index) {
|
|
3460
|
+
if (typeof input === "string") {
|
|
3461
|
+
return { url: input, headers: {}, name: `mcp[${index}]` };
|
|
3462
|
+
}
|
|
3463
|
+
if (!input.url) {
|
|
3464
|
+
throw new Error(`mcpServers[${index}]: missing required 'url' field`);
|
|
3465
|
+
}
|
|
3466
|
+
return {
|
|
3467
|
+
url: input.url,
|
|
3468
|
+
headers: input.headers ?? {},
|
|
3469
|
+
name: input.name ?? `mcp[${index}]`
|
|
3470
|
+
};
|
|
3471
|
+
}
|
|
3472
|
+
var MCPManager = class {
|
|
3473
|
+
configs;
|
|
3474
|
+
connected = [];
|
|
3475
|
+
constructor(servers) {
|
|
3476
|
+
this.configs = (servers ?? []).map((s, i) => resolveConfig(s, i));
|
|
3477
|
+
}
|
|
3478
|
+
get hasServers() {
|
|
3479
|
+
return this.configs.length > 0;
|
|
3477
3480
|
}
|
|
3478
|
-
|
|
3479
|
-
|
|
3481
|
+
/** Connect to every configured server and discover their tools.
|
|
3482
|
+
* Returns the discovered tools wrapped as Patter ``ToolDefinition``s. */
|
|
3483
|
+
async connect() {
|
|
3484
|
+
if (this.configs.length === 0) return [];
|
|
3485
|
+
let mcpModule;
|
|
3486
|
+
let transportModule;
|
|
3480
3487
|
try {
|
|
3481
|
-
|
|
3482
|
-
|
|
3483
|
-
|
|
3484
|
-
|
|
3485
|
-
|
|
3486
|
-
|
|
3487
|
-
|
|
3488
|
-
|
|
3489
|
-
|
|
3490
|
-
|
|
3491
|
-
|
|
3488
|
+
mcpModule = await import("./client-2GJVZT42.mjs");
|
|
3489
|
+
transportModule = await import("./streamableHttp-WKNGHDVO.mjs");
|
|
3490
|
+
} catch (e) {
|
|
3491
|
+
throw new Error(
|
|
3492
|
+
`mcpServers configured but \`@modelcontextprotocol/sdk\` is not installed. Run \`npm install @modelcontextprotocol/sdk\` to enable MCP support. (import error: ${String(e)})`
|
|
3493
|
+
);
|
|
3494
|
+
}
|
|
3495
|
+
const aggregatedTools = [];
|
|
3496
|
+
for (const cfg of this.configs) {
|
|
3497
|
+
const transport = new transportModule.StreamableHTTPClientTransport(new URL(cfg.url), {
|
|
3498
|
+
requestInit: { headers: cfg.headers }
|
|
3492
3499
|
});
|
|
3493
|
-
|
|
3494
|
-
|
|
3495
|
-
|
|
3496
|
-
|
|
3497
|
-
|
|
3498
|
-
|
|
3499
|
-
|
|
3500
|
+
const client = new mcpModule.Client({ name: "patter", version: "0.6.0" });
|
|
3501
|
+
try {
|
|
3502
|
+
await client.connect(transport);
|
|
3503
|
+
} catch (e) {
|
|
3504
|
+
getLogger().error(`MCP server '${cfg.name}' (${cfg.url}) connect failed: ${String(e)}`);
|
|
3505
|
+
try {
|
|
3506
|
+
await transport.close?.();
|
|
3507
|
+
} catch {
|
|
3508
|
+
}
|
|
3509
|
+
continue;
|
|
3500
3510
|
}
|
|
3501
|
-
|
|
3502
|
-
|
|
3503
|
-
|
|
3504
|
-
|
|
3505
|
-
|
|
3506
|
-
|
|
3507
|
-
|
|
3508
|
-
result = JSON.stringify({ error: `Tool failed after 3 attempts: ${String(e)}`, fallback: true });
|
|
3511
|
+
this.connected.push({ config: cfg, client, transport });
|
|
3512
|
+
let listed;
|
|
3513
|
+
try {
|
|
3514
|
+
listed = await client.listTools();
|
|
3515
|
+
} catch (e) {
|
|
3516
|
+
getLogger().error(`MCP server '${cfg.name}' tools/list failed: ${String(e)}`);
|
|
3517
|
+
continue;
|
|
3509
3518
|
}
|
|
3519
|
+
const tools = Array.isArray(listed?.tools) ? listed.tools : [];
|
|
3520
|
+
for (const t of tools) {
|
|
3521
|
+
if (!t?.name) continue;
|
|
3522
|
+
aggregatedTools.push({
|
|
3523
|
+
name: t.name,
|
|
3524
|
+
description: t.description ?? "",
|
|
3525
|
+
parameters: t.inputSchema ?? { type: "object", properties: {} },
|
|
3526
|
+
handler: async (args) => {
|
|
3527
|
+
const callResult = await client.callTool({
|
|
3528
|
+
name: t.name,
|
|
3529
|
+
arguments: args
|
|
3530
|
+
});
|
|
3531
|
+
const text = (callResult.content ?? []).map((c) => c.type === "text" ? c.text ?? "" : JSON.stringify(c)).join("\n");
|
|
3532
|
+
if (callResult.isError) {
|
|
3533
|
+
return JSON.stringify({ error: text || "MCP tool error", fallback: true });
|
|
3534
|
+
}
|
|
3535
|
+
return text || "{}";
|
|
3536
|
+
}
|
|
3537
|
+
});
|
|
3538
|
+
}
|
|
3539
|
+
getLogger().info(`MCP server '${cfg.name}' registered ${tools.length} tool(s)`);
|
|
3510
3540
|
}
|
|
3541
|
+
return aggregatedTools;
|
|
3511
3542
|
}
|
|
3512
|
-
|
|
3513
|
-
|
|
3543
|
+
/** Validate no tool name collides between MCP-discovered and
|
|
3544
|
+
* user-supplied tools. Throws on conflict so the user fixes it. */
|
|
3545
|
+
static assertNoConflicts(userTools, mcpTools) {
|
|
3546
|
+
if (!userTools || userTools.length === 0 || mcpTools.length === 0) return;
|
|
3547
|
+
const userNames = new Set(userTools.map((t) => t.name));
|
|
3548
|
+
for (const mcp of mcpTools) {
|
|
3549
|
+
if (userNames.has(mcp.name)) {
|
|
3550
|
+
throw new Error(
|
|
3551
|
+
`MCP tool '${mcp.name}' collides with a user-supplied tool of the same name. Rename one of them or remove the duplicate from agent.tools.`
|
|
3552
|
+
);
|
|
3553
|
+
}
|
|
3554
|
+
}
|
|
3555
|
+
}
|
|
3556
|
+
/** Close every open MCP connection. Idempotent; logs but does not
|
|
3557
|
+
* throw on individual failures (we don't want a flaky shutdown to
|
|
3558
|
+
* derail the call-end teardown). */
|
|
3559
|
+
async close() {
|
|
3560
|
+
const conns = this.connected;
|
|
3561
|
+
this.connected = [];
|
|
3562
|
+
for (const conn of conns) {
|
|
3563
|
+
try {
|
|
3564
|
+
await conn.client.close?.();
|
|
3565
|
+
} catch (e) {
|
|
3566
|
+
getLogger().debug(`MCP server '${conn.config.name}' close error (ignored): ${String(e)}`);
|
|
3567
|
+
}
|
|
3568
|
+
}
|
|
3569
|
+
}
|
|
3570
|
+
};
|
|
3514
3571
|
|
|
3515
3572
|
// src/sentence-chunker.ts
|
|
3573
|
+
init_esm_shims();
|
|
3516
3574
|
var DEFAULT_MIN_SENTENCE_LEN = 20;
|
|
3517
|
-
var DEFAULT_MIN_WORDS_FOR_SHORT_FLUSH =
|
|
3518
|
-
var
|
|
3575
|
+
var DEFAULT_MIN_WORDS_FOR_SHORT_FLUSH = 1;
|
|
3576
|
+
var HONORIFICS_EN = [
|
|
3577
|
+
"Mr",
|
|
3578
|
+
"St",
|
|
3579
|
+
"Mrs",
|
|
3580
|
+
"Ms",
|
|
3581
|
+
"Dr",
|
|
3582
|
+
"Prof",
|
|
3583
|
+
"Gen",
|
|
3584
|
+
"Sen",
|
|
3585
|
+
"Rep",
|
|
3586
|
+
"Lt",
|
|
3587
|
+
"Cpt",
|
|
3588
|
+
"Capt",
|
|
3589
|
+
"Col",
|
|
3590
|
+
"Cmdr",
|
|
3591
|
+
"Adm"
|
|
3592
|
+
];
|
|
3593
|
+
var HONORIFICS_IT = [
|
|
3594
|
+
"Sig",
|
|
3595
|
+
"Sgr",
|
|
3596
|
+
"Dott",
|
|
3597
|
+
"Prof",
|
|
3598
|
+
"Avv",
|
|
3599
|
+
"Ing",
|
|
3600
|
+
"Geom",
|
|
3601
|
+
"Rag",
|
|
3602
|
+
"Arch",
|
|
3603
|
+
"On",
|
|
3604
|
+
"Egr",
|
|
3605
|
+
"Spett",
|
|
3606
|
+
"Gent",
|
|
3607
|
+
"Ill"
|
|
3608
|
+
];
|
|
3609
|
+
var HONORIFICS_ES = [
|
|
3610
|
+
"Sr",
|
|
3611
|
+
"Sra",
|
|
3612
|
+
"Sres",
|
|
3613
|
+
"Sras",
|
|
3614
|
+
"Srta",
|
|
3615
|
+
"Srtas",
|
|
3616
|
+
"Dr",
|
|
3617
|
+
"Dra",
|
|
3618
|
+
"Dres",
|
|
3619
|
+
"Lic",
|
|
3620
|
+
"Licda",
|
|
3621
|
+
"Ing",
|
|
3622
|
+
"Prof",
|
|
3623
|
+
"Profa",
|
|
3624
|
+
"Arq",
|
|
3625
|
+
"Mtro",
|
|
3626
|
+
"Mtra"
|
|
3627
|
+
];
|
|
3628
|
+
var HONORIFICS_DE = [
|
|
3629
|
+
"Hr",
|
|
3630
|
+
"Fr",
|
|
3631
|
+
"Frl",
|
|
3632
|
+
"Dr",
|
|
3633
|
+
"Prof",
|
|
3634
|
+
"Dipl",
|
|
3635
|
+
"Mag"
|
|
3636
|
+
];
|
|
3637
|
+
var HONORIFICS_FR = [
|
|
3638
|
+
"Mme",
|
|
3639
|
+
"Mmes",
|
|
3640
|
+
"Mlle",
|
|
3641
|
+
"Mlles",
|
|
3642
|
+
"MM",
|
|
3643
|
+
"Dr",
|
|
3644
|
+
"Pr",
|
|
3645
|
+
"Mgr",
|
|
3646
|
+
"Me"
|
|
3647
|
+
];
|
|
3648
|
+
var HONORIFICS_PT = [
|
|
3649
|
+
"Sr",
|
|
3650
|
+
"Sra",
|
|
3651
|
+
"Srs",
|
|
3652
|
+
"Sras",
|
|
3653
|
+
"Srta",
|
|
3654
|
+
"Srtas",
|
|
3655
|
+
"Dr",
|
|
3656
|
+
"Dra",
|
|
3657
|
+
"Eng",
|
|
3658
|
+
"Enga",
|
|
3659
|
+
"Prof",
|
|
3660
|
+
"Profa"
|
|
3661
|
+
];
|
|
3662
|
+
var HONORIFICS_BY_LANGUAGE = {
|
|
3663
|
+
en: HONORIFICS_EN,
|
|
3664
|
+
it: HONORIFICS_IT,
|
|
3665
|
+
es: HONORIFICS_ES,
|
|
3666
|
+
de: HONORIFICS_DE,
|
|
3667
|
+
fr: HONORIFICS_FR,
|
|
3668
|
+
pt: HONORIFICS_PT
|
|
3669
|
+
};
|
|
3670
|
+
var HONORIFICS_ALL = Array.from(
|
|
3671
|
+
new Set(Object.values(HONORIFICS_BY_LANGUAGE).flat())
|
|
3672
|
+
).sort((a, b) => b.length - a.length || a.localeCompare(b));
|
|
3673
|
+
var SENTENCE_TERMINATORS = ".!?\u2026;\u3002\uFF01\uFF1F\uFF1B\uFF0E\uFF61";
|
|
3674
|
+
var UNAMBIGUOUS_NON_LATIN_TERMINATORS = "\u0964\u0965\u061F\u061B\u06D4\u060F\u0589\u1367\u1362\u17D4\u17D5\u104B\u0F0E\u0F0F";
|
|
3675
|
+
var TERMINATOR_REGEX_CLASS = Array.from(
|
|
3676
|
+
new Set(SENTENCE_TERMINATORS + UNAMBIGUOUS_NON_LATIN_TERMINATORS)
|
|
3677
|
+
).map((c) => c.replace(/[\\^$.|?*+()[\]{}]/g, "\\$&")).sort().join("");
|
|
3678
|
+
var SOFT_TERMINATORS = ",\u2014\u2013";
|
|
3679
|
+
var DEFAULT_AGGRESSIVE_FIRST_MIN_LEN = 40;
|
|
3680
|
+
var CURRENCY_SYMBOLS = "$\u20AC\xA3\xA5\u20B9\u20A9";
|
|
3681
|
+
var HONORIFICS_REGEX_ALT = HONORIFICS_ALL.map(
|
|
3682
|
+
(p) => p.replace(/[\\^$.|?*+()[\]{}]/g, "\\$&")
|
|
3683
|
+
).join("|");
|
|
3684
|
+
var HONORIFICS_SET = new Set(HONORIFICS_ALL);
|
|
3519
3685
|
function splitSentences(text, minSentenceLen = DEFAULT_MIN_SENTENCE_LEN) {
|
|
3520
3686
|
const alphabets = "([A-Za-z])";
|
|
3521
|
-
const prefixes =
|
|
3522
|
-
const suffixes = "(Inc|Ltd|Jr|Sr|Co)";
|
|
3687
|
+
const prefixes = `(${HONORIFICS_REGEX_ALT})[.]`;
|
|
3688
|
+
const suffixes = "(Inc|Ltd|Jr|Sr|Co|ecc|cit|cap|sez|art|pag|fig|tab|cfr|vol|ed|vs|etc|No|Vol|pp|cf|ca|op|Mt|Hwy|Rt|Pl|Ave|Blvd|Sq)";
|
|
3523
3689
|
const starters = "(Mr|Mrs|Ms|Dr|Prof|Capt|Cpt|Lt|He\\s|She\\s|It\\s|They\\s|Their\\s|Our\\s|We\\s|But\\s|However\\s|That\\s|This\\s|Wherever)";
|
|
3524
3690
|
const acronyms = "([A-Z][.][A-Z][.](?:[A-Z][.])?)";
|
|
3525
3691
|
const websites = "[.](com|net|org|io|gov|edu|me)";
|
|
@@ -3543,14 +3709,20 @@ function splitSentences(text, minSentenceLen = DEFAULT_MIN_SENTENCE_LEN) {
|
|
|
3543
3709
|
new RegExp(alphabets + "[.]" + alphabets + "[.]", "g"),
|
|
3544
3710
|
"$1<prd>$2<prd>"
|
|
3545
3711
|
);
|
|
3546
|
-
text = text.replace(new RegExp(" " + suffixes + "[.] " + starters, "g"), " $1
|
|
3712
|
+
text = text.replace(new RegExp(" " + suffixes + "[.] " + starters, "g"), " $1.<stop> $2");
|
|
3547
3713
|
text = text.replace(new RegExp(" " + suffixes + "[.]", "g"), " $1<prd>");
|
|
3548
3714
|
text = text.replace(new RegExp(" " + alphabets + "[.]", "g"), " $1<prd>");
|
|
3549
|
-
text = text.replace(
|
|
3550
|
-
|
|
3551
|
-
|
|
3552
|
-
|
|
3553
|
-
text = text.replace(
|
|
3715
|
+
text = text.replace(
|
|
3716
|
+
new RegExp(`([${TERMINATOR_REGEX_CLASS}])(["\u201D])`, "g"),
|
|
3717
|
+
"$1$2<stop>"
|
|
3718
|
+
);
|
|
3719
|
+
text = text.replace(
|
|
3720
|
+
new RegExp(`([${TERMINATOR_REGEX_CLASS}])(?!["\u201D])`, "g"),
|
|
3721
|
+
"$1<stop>"
|
|
3722
|
+
);
|
|
3723
|
+
text = text.replace(/<prd>/g, ".");
|
|
3724
|
+
const splitted = text.split("<stop>");
|
|
3725
|
+
text = text.replace(/<stop>/g, "");
|
|
3554
3726
|
const sentences = [];
|
|
3555
3727
|
let buff = "";
|
|
3556
3728
|
let startPos = 0;
|
|
@@ -3575,9 +3747,16 @@ var SentenceChunker = class {
|
|
|
3575
3747
|
buffer = "";
|
|
3576
3748
|
minSentenceLen;
|
|
3577
3749
|
minWordsForShortFlush;
|
|
3750
|
+
aggressiveFirstMinLen;
|
|
3751
|
+
aggressiveFirstFlush;
|
|
3752
|
+
language;
|
|
3753
|
+
isFirstFlush = true;
|
|
3578
3754
|
constructor(options) {
|
|
3579
3755
|
this.minSentenceLen = options?.minSentenceLen ?? DEFAULT_MIN_SENTENCE_LEN;
|
|
3580
3756
|
this.minWordsForShortFlush = options?.minWordsForShortFlush ?? DEFAULT_MIN_WORDS_FOR_SHORT_FLUSH;
|
|
3757
|
+
this.aggressiveFirstMinLen = options?.aggressiveFirstMinLen ?? DEFAULT_AGGRESSIVE_FIRST_MIN_LEN;
|
|
3758
|
+
this.language = (options?.language ?? "en").toLowerCase();
|
|
3759
|
+
this.aggressiveFirstFlush = (options?.aggressiveFirstFlush ?? false) && !this.language.startsWith("it");
|
|
3581
3760
|
}
|
|
3582
3761
|
/**
|
|
3583
3762
|
* Feed a token. Returns zero or more complete sentences.
|
|
@@ -3588,13 +3767,21 @@ var SentenceChunker = class {
|
|
|
3588
3767
|
* sentence, all but the last (potentially incomplete) are emitted.
|
|
3589
3768
|
* - **Short-flush path** — when the buffer is shorter than `minSentenceLen`
|
|
3590
3769
|
* but ends with a sentence terminator AND has at least
|
|
3591
|
-
* `minWordsForShortFlush` whitespace-separated words
|
|
3592
|
-
*
|
|
3593
|
-
*
|
|
3594
|
-
* `
|
|
3770
|
+
* `minWordsForShortFlush` whitespace-separated words (default 1 — a
|
|
3771
|
+
* single-word reply like `"Yes."` flushes immediately for low TTS
|
|
3772
|
+
* TTFB). Acronym ("U.S.") and decimal ("f(x) = 2.") guards still block
|
|
3773
|
+
* dangerous cases. Bump `minWordsForShortFlush` to 2+ to keep
|
|
3774
|
+
* single-word utterances buffered until `flush()`.
|
|
3595
3775
|
*/
|
|
3596
3776
|
push(token) {
|
|
3597
3777
|
this.buffer += token;
|
|
3778
|
+
if (this.aggressiveFirstFlush && this.isFirstFlush) {
|
|
3779
|
+
const flushed = this.maybeAggressiveFirstFlush();
|
|
3780
|
+
if (flushed !== null) {
|
|
3781
|
+
this.isFirstFlush = false;
|
|
3782
|
+
return [flushed];
|
|
3783
|
+
}
|
|
3784
|
+
}
|
|
3598
3785
|
if (this.buffer.length < this.minSentenceLen) {
|
|
3599
3786
|
return this.maybeShortFlush();
|
|
3600
3787
|
}
|
|
@@ -3615,16 +3802,19 @@ var SentenceChunker = class {
|
|
|
3615
3802
|
*
|
|
3616
3803
|
* A buffer qualifies when **all** of these hold:
|
|
3617
3804
|
* 1. Last non-whitespace char is a sentence terminator.
|
|
3618
|
-
* 2. Word count is at least `minWordsForShortFlush` (default
|
|
3619
|
-
* single-word
|
|
3805
|
+
* 2. Word count is at least `minWordsForShortFlush` (default 1 —
|
|
3806
|
+
* single-word replies like `"Yes."` flush immediately).
|
|
3620
3807
|
* 3. The buffer contains exactly one terminator (the trailing one).
|
|
3621
3808
|
* Multiple terminators mean we may be mid-stream of a longer merged
|
|
3622
3809
|
* utterance like `"Hey! Hi! Hello! This is a sentence."` — let the
|
|
3623
3810
|
* standard path keep merging.
|
|
3624
3811
|
* 4. The char immediately before the terminator is NOT a digit (avoids
|
|
3625
3812
|
* decimal mid-stream like `"f(x) = x * 2."` flushing before `54`).
|
|
3626
|
-
* 5. The
|
|
3627
|
-
*
|
|
3813
|
+
* 5. The trailing word is NOT a short ASCII all-caps acronym of 1-3 chars
|
|
3814
|
+
* (`"U."` / `"U.S."` / `"USA."`).
|
|
3815
|
+
* 6. The trailing word is NOT a known honorific from any of the
|
|
3816
|
+
* per-language `HONORIFICS_*` constants (`"Mr."`, `"Sr."`, `"Dr."`,
|
|
3817
|
+
* `"Hr."`, `"Mme."`, ...).
|
|
3628
3818
|
*/
|
|
3629
3819
|
maybeShortFlush() {
|
|
3630
3820
|
const stripped = this.buffer.replace(/\s+$/, "");
|
|
@@ -3640,29 +3830,109 @@ var SentenceChunker = class {
|
|
|
3640
3830
|
if (wordCount < this.minWordsForShortFlush) return [];
|
|
3641
3831
|
if (stripped.length >= 2) {
|
|
3642
3832
|
const prev = stripped[stripped.length - 2];
|
|
3643
|
-
if (/\d/.test(prev)
|
|
3833
|
+
if (/\d/.test(prev)) return [];
|
|
3834
|
+
const terminator = stripped[stripped.length - 1];
|
|
3835
|
+
if (terminator === ".") {
|
|
3836
|
+
const stripTerm = stripped.replace(
|
|
3837
|
+
new RegExp(`[${TERMINATOR_REGEX_CLASS}]+$`),
|
|
3838
|
+
""
|
|
3839
|
+
);
|
|
3840
|
+
const tokens = stripTerm.split(/\s+/).filter((w) => w.length > 0);
|
|
3841
|
+
const lastWord = tokens.length > 0 ? tokens[tokens.length - 1] : "";
|
|
3842
|
+
if (/^[A-Z]{1,3}$/.test(lastWord)) return [];
|
|
3843
|
+
if (HONORIFICS_SET.has(lastWord)) return [];
|
|
3844
|
+
}
|
|
3644
3845
|
}
|
|
3645
3846
|
this.buffer = "";
|
|
3646
3847
|
return [stripped];
|
|
3647
3848
|
}
|
|
3849
|
+
/**
|
|
3850
|
+
* Try to flush the first clause of the response on a soft punctuation
|
|
3851
|
+
* boundary (comma / em-dash / en-dash) to minimise TTFA.
|
|
3852
|
+
*
|
|
3853
|
+
* Returns the flushed clause text (with terminator) or `null` if no safe
|
|
3854
|
+
* boundary is found. All of these guards must pass:
|
|
3855
|
+
*
|
|
3856
|
+
* 1. **Min length** — buffer ≥ `aggressiveFirstMinLen` (default 40).
|
|
3857
|
+
* 2. **Trailing terminator** — last non-whitespace char in `SOFT_TERMINATORS`.
|
|
3858
|
+
* 3. **Decimal/thousands guard** — refuse if comma is between two digits
|
|
3859
|
+
* or surrounded by digit-thousands grouping.
|
|
3860
|
+
* 4. **Currency guard** — refuse if a currency symbol appears in the
|
|
3861
|
+
* preceding 8 characters.
|
|
3862
|
+
* 5. **Balanced delimiter** — refuse if open parens/brackets/braces or
|
|
3863
|
+
* unmatched double-quotes still pending.
|
|
3864
|
+
* 6. **Ellipsis** — refuse if buffer ends with `...` or `…`.
|
|
3865
|
+
* 7. **Sub-token ambiguity** — only fire when at least one trailing char
|
|
3866
|
+
* after the terminator has arrived.
|
|
3867
|
+
*/
|
|
3868
|
+
maybeAggressiveFirstFlush() {
|
|
3869
|
+
const rstripped = this.buffer.replace(/\s+$/, "");
|
|
3870
|
+
if (rstripped.length < this.aggressiveFirstMinLen) return null;
|
|
3871
|
+
const lastChar = rstripped[rstripped.length - 1] ?? "";
|
|
3872
|
+
if (!SOFT_TERMINATORS.includes(lastChar)) return null;
|
|
3873
|
+
const pos = rstripped.length - 1;
|
|
3874
|
+
if (pos + 1 >= this.buffer.length) return null;
|
|
3875
|
+
const nextChar = this.buffer[pos + 1] ?? "";
|
|
3876
|
+
if (lastChar === ",") {
|
|
3877
|
+
const prevChar = pos >= 1 ? rstripped[pos - 1] ?? "" : "";
|
|
3878
|
+
if (/\d/.test(prevChar) && /\d/.test(nextChar)) return null;
|
|
3879
|
+
const tail = rstripped.slice(Math.max(0, pos - 6), pos);
|
|
3880
|
+
if (/\d/.test(prevChar) && tail.includes(",") && /\d/.test(tail)) {
|
|
3881
|
+
return null;
|
|
3882
|
+
}
|
|
3883
|
+
}
|
|
3884
|
+
const snippet = rstripped.slice(Math.max(0, pos - 8), pos);
|
|
3885
|
+
for (const c of CURRENCY_SYMBOLS) {
|
|
3886
|
+
if (snippet.includes(c)) return null;
|
|
3887
|
+
}
|
|
3888
|
+
const opens = (rstripped.match(/[([{]/g) ?? []).length;
|
|
3889
|
+
const closes = (rstripped.match(/[)\]}]/g) ?? []).length;
|
|
3890
|
+
if (opens > closes) return null;
|
|
3891
|
+
const dquoteCount = (rstripped.match(/"/g) ?? []).length;
|
|
3892
|
+
if (dquoteCount % 2 !== 0) return null;
|
|
3893
|
+
if (rstripped.endsWith("...") || rstripped.endsWith("\u2026")) return null;
|
|
3894
|
+
if (lastChar === "," && nextChar === '"') return null;
|
|
3895
|
+
const flushed = rstripped;
|
|
3896
|
+
this.buffer = this.buffer.slice(rstripped.length).replace(/^\s+/, "");
|
|
3897
|
+
return flushed;
|
|
3898
|
+
}
|
|
3648
3899
|
/** Flush remaining buffer as final sentence(s). Call at end of stream. */
|
|
3649
3900
|
flush() {
|
|
3650
3901
|
const remaining = this.buffer.trim();
|
|
3651
3902
|
this.buffer = "";
|
|
3903
|
+
this.isFirstFlush = true;
|
|
3652
3904
|
if (!remaining) return [];
|
|
3653
3905
|
return [remaining];
|
|
3654
3906
|
}
|
|
3655
3907
|
/** Discard buffered text. Call on interrupt. */
|
|
3656
3908
|
reset() {
|
|
3657
3909
|
this.buffer = "";
|
|
3910
|
+
this.isFirstFlush = true;
|
|
3658
3911
|
}
|
|
3659
3912
|
};
|
|
3660
3913
|
|
|
3661
3914
|
// src/pipeline-hooks.ts
|
|
3915
|
+
init_esm_shims();
|
|
3916
|
+
var legacyAfterLlmWarned = false;
|
|
3917
|
+
function normaliseAfterLlm(hook) {
|
|
3918
|
+
if (hook === void 0) return void 0;
|
|
3919
|
+
if (typeof hook === "function") {
|
|
3920
|
+
if (!legacyAfterLlmWarned) {
|
|
3921
|
+
legacyAfterLlmWarned = true;
|
|
3922
|
+
getLogger().warn(
|
|
3923
|
+
"[patter] afterLlm: (text, ctx) => string is deprecated; pass an object with { onResponse } instead. The legacy form maps to onResponse and blocks streaming TTS. Will be removed in v0.7.0."
|
|
3924
|
+
);
|
|
3925
|
+
}
|
|
3926
|
+
return { onResponse: hook };
|
|
3927
|
+
}
|
|
3928
|
+
return hook;
|
|
3929
|
+
}
|
|
3662
3930
|
var PipelineHookExecutor = class {
|
|
3663
3931
|
hooks;
|
|
3932
|
+
afterLlm;
|
|
3664
3933
|
constructor(hooks) {
|
|
3665
3934
|
this.hooks = hooks;
|
|
3935
|
+
this.afterLlm = normaliseAfterLlm(hooks?.afterLlm);
|
|
3666
3936
|
}
|
|
3667
3937
|
/**
|
|
3668
3938
|
* Run beforeSendToStt hook. Returns null to drop the audio chunk.
|
|
@@ -3708,26 +3978,87 @@ var PipelineHookExecutor = class {
|
|
|
3708
3978
|
}
|
|
3709
3979
|
}
|
|
3710
3980
|
/**
|
|
3711
|
-
*
|
|
3712
|
-
*
|
|
3713
|
-
*
|
|
3981
|
+
* Tier 1 — per-token sync transform. Returns the (possibly transformed)
|
|
3982
|
+
* chunk. Fail-open: on exception or non-string return, the original chunk
|
|
3983
|
+
* passes through unchanged. Must be cheap (~0 ms budget).
|
|
3714
3984
|
*/
|
|
3715
|
-
|
|
3716
|
-
if (!this.
|
|
3985
|
+
runAfterLlmChunk(chunk) {
|
|
3986
|
+
if (!this.afterLlm?.onChunk) return chunk;
|
|
3717
3987
|
try {
|
|
3718
|
-
const result =
|
|
3988
|
+
const result = this.afterLlm.onChunk(chunk);
|
|
3989
|
+
return typeof result === "string" ? result : chunk;
|
|
3990
|
+
} catch (e) {
|
|
3991
|
+
getLogger().error("Pipeline hook afterLlm.onChunk threw:", e);
|
|
3992
|
+
return chunk;
|
|
3993
|
+
}
|
|
3994
|
+
}
|
|
3995
|
+
/**
|
|
3996
|
+
* Tier 2 — per-sentence rewrite. Returns rewritten sentence text, the
|
|
3997
|
+
* original sentence (if hook returned `null`), or `null` to drop the
|
|
3998
|
+
* sentence entirely (empty string is treated as drop). Fail-open.
|
|
3999
|
+
*/
|
|
4000
|
+
async runAfterLlmSentence(sentence, ctx) {
|
|
4001
|
+
if (!this.afterLlm?.onSentence) return sentence;
|
|
4002
|
+
try {
|
|
4003
|
+
const result = await this.afterLlm.onSentence(sentence, ctx);
|
|
4004
|
+
if (result === null) return sentence;
|
|
4005
|
+
if (result === "") return null;
|
|
4006
|
+
return result;
|
|
4007
|
+
} catch (e) {
|
|
4008
|
+
getLogger().error("Pipeline hook afterLlm.onSentence threw:", e);
|
|
4009
|
+
return sentence;
|
|
4010
|
+
}
|
|
4011
|
+
}
|
|
4012
|
+
/**
|
|
4013
|
+
* Tier 3 — per-response rewrite. Returns the (possibly rewritten) full
|
|
4014
|
+
* response text. Triggered after the LLM stream completes. Caller is
|
|
4015
|
+
* responsible for buffering tokens before invocation. Fail-open.
|
|
4016
|
+
*/
|
|
4017
|
+
async runAfterLlmResponse(text, ctx) {
|
|
4018
|
+
if (!this.afterLlm?.onResponse) return text;
|
|
4019
|
+
try {
|
|
4020
|
+
const result = await this.afterLlm.onResponse(text, ctx);
|
|
3719
4021
|
return result ?? text;
|
|
3720
4022
|
} catch (e) {
|
|
3721
|
-
getLogger().error("Pipeline hook afterLlm threw:", e);
|
|
4023
|
+
getLogger().error("Pipeline hook afterLlm.onResponse threw:", e);
|
|
3722
4024
|
return text;
|
|
3723
4025
|
}
|
|
3724
4026
|
}
|
|
3725
4027
|
/**
|
|
3726
|
-
*
|
|
3727
|
-
*
|
|
4028
|
+
* Backward-compatible alias for `runAfterLlmResponse`. Existing call sites
|
|
4029
|
+
* in the LLM loop continue to work unchanged.
|
|
4030
|
+
*
|
|
4031
|
+
* @deprecated Use `runAfterLlmResponse` directly.
|
|
4032
|
+
*/
|
|
4033
|
+
async runAfterLlm(text, ctx) {
|
|
4034
|
+
return this.runAfterLlmResponse(text, ctx);
|
|
4035
|
+
}
|
|
4036
|
+
/**
|
|
4037
|
+
* Whether a per-response (tier 3) `onResponse` transform is configured.
|
|
4038
|
+
* The LLM loop uses this to decide whether to buffer streaming tokens
|
|
4039
|
+
* before yielding them. Per-token (tier 1) and per-sentence (tier 2)
|
|
4040
|
+
* transforms do NOT require buffering.
|
|
4041
|
+
*/
|
|
4042
|
+
hasAfterLlmResponse() {
|
|
4043
|
+
return Boolean(this.afterLlm?.onResponse);
|
|
4044
|
+
}
|
|
4045
|
+
/** Whether a per-sentence (tier 2) transform is configured. */
|
|
4046
|
+
hasAfterLlmSentence() {
|
|
4047
|
+
return Boolean(this.afterLlm?.onSentence);
|
|
4048
|
+
}
|
|
4049
|
+
/** Whether a per-token (tier 1) transform is configured. */
|
|
4050
|
+
hasAfterLlmChunk() {
|
|
4051
|
+
return Boolean(this.afterLlm?.onChunk);
|
|
4052
|
+
}
|
|
4053
|
+
/**
|
|
4054
|
+
* Backward-compatible alias for `hasAfterLlmResponse`. The legacy callable
|
|
4055
|
+
* form maps to `onResponse`, so this preserves the original semantic for
|
|
4056
|
+
* existing call sites.
|
|
4057
|
+
*
|
|
4058
|
+
* @deprecated Use `hasAfterLlmResponse` directly.
|
|
3728
4059
|
*/
|
|
3729
4060
|
hasAfterLlm() {
|
|
3730
|
-
return
|
|
4061
|
+
return this.hasAfterLlmResponse();
|
|
3731
4062
|
}
|
|
3732
4063
|
/**
|
|
3733
4064
|
* Run beforeSynthesize hook. Returns null if hook vetoes TTS for this sentence.
|
|
@@ -3758,6 +4089,7 @@ var PipelineHookExecutor = class {
|
|
|
3758
4089
|
};
|
|
3759
4090
|
|
|
3760
4091
|
// src/observability/event-bus.ts
|
|
4092
|
+
init_esm_shims();
|
|
3761
4093
|
var EventBus = class {
|
|
3762
4094
|
listeners = /* @__PURE__ */ new Map();
|
|
3763
4095
|
/**
|
|
@@ -3784,17 +4116,18 @@ var EventBus = class {
|
|
|
3784
4116
|
const res = cb(payload);
|
|
3785
4117
|
if (res && typeof res.catch === "function") {
|
|
3786
4118
|
res.catch(
|
|
3787
|
-
(e) => getLogger().
|
|
4119
|
+
(e) => getLogger().error(`[EventBus] listener for "${event}" rejected:`, e)
|
|
3788
4120
|
);
|
|
3789
4121
|
}
|
|
3790
4122
|
} catch (e) {
|
|
3791
|
-
getLogger().
|
|
4123
|
+
getLogger().error(`[EventBus] listener for "${event}" threw:`, e);
|
|
3792
4124
|
}
|
|
3793
4125
|
}
|
|
3794
4126
|
}
|
|
3795
4127
|
};
|
|
3796
4128
|
|
|
3797
4129
|
// src/observability/tracing.ts
|
|
4130
|
+
init_esm_shims();
|
|
3798
4131
|
var ENV_FLAG = "PATTER_OTEL_ENABLED";
|
|
3799
4132
|
var SERVICE_NAME = "patter";
|
|
3800
4133
|
var SPAN_CALL = "getpatter.call";
|
|
@@ -3982,7 +4315,7 @@ var HALLUCINATIONS = /* @__PURE__ */ new Set([
|
|
|
3982
4315
|
"right",
|
|
3983
4316
|
"cool"
|
|
3984
4317
|
]);
|
|
3985
|
-
var StreamHandler = class {
|
|
4318
|
+
var StreamHandler = class _StreamHandler {
|
|
3986
4319
|
deps;
|
|
3987
4320
|
ws;
|
|
3988
4321
|
caller;
|
|
@@ -3994,8 +4327,50 @@ var StreamHandler = class {
|
|
|
3994
4327
|
stt = null;
|
|
3995
4328
|
tts = null;
|
|
3996
4329
|
isSpeaking = false;
|
|
4330
|
+
/**
|
|
4331
|
+
* Ring buffer of inbound PCM16 16 kHz frames captured while the agent
|
|
4332
|
+
* is speaking and the self-hearing guard is dropping audio. On
|
|
4333
|
+
* barge-in we flush this buffer to STT so Deepgram (or any other
|
|
4334
|
+
* streaming STT) receives the user's first ~500 ms of speech — which
|
|
4335
|
+
* would otherwise be lost while the VAD's `minSpeechDuration` window
|
|
4336
|
+
* accumulated and fired `speech_start`. Each frame is 20 ms × 32 bytes
|
|
4337
|
+
* (16 kHz × 16-bit mono) ≈ 640 bytes.
|
|
4338
|
+
*
|
|
4339
|
+
* Capped to ``INBOUND_AUDIO_RING_FRAMES`` to recover only the
|
|
4340
|
+
* VAD-missed leading edge of the user's speech (default 250 ms,
|
|
4341
|
+
* matching SileroVAD ``minSpeechDuration``). Earlier values up to
|
|
4342
|
+
* 600 ms were including ~350 ms of pre-speech silence/agent-bleed in
|
|
4343
|
+
* the replay; on PSTN (where AEC is a no-op) Deepgram trained on
|
|
4344
|
+
* English happily transcribes that bleed as English garbage
|
|
4345
|
+
* (``"The same as Edgar,"``, ``"Permadees."``) and commits it to
|
|
4346
|
+
* the LLM as a phantom user transcript. See BUGS.md 2026-05-05
|
|
4347
|
+
* post-barge-in bleed-transcription entry.
|
|
4348
|
+
*/
|
|
4349
|
+
inboundAudioRing = [];
|
|
4350
|
+
static INBOUND_AUDIO_RING_FRAMES = 13;
|
|
4351
|
+
/**
|
|
4352
|
+
* Cached LLM provider tag used by speech-event payloads. Mirrors the
|
|
4353
|
+
* value passed to the metrics accumulator at construction time so the
|
|
4354
|
+
* speech-edge events report the same provider classification as
|
|
4355
|
+
* dashboard / pricing rows.
|
|
4356
|
+
*/
|
|
4357
|
+
llmProviderTag = "openai";
|
|
3997
4358
|
/** Set to true after a VAD error to suppress log spam for the rest of the call. */
|
|
3998
4359
|
vadDisabled = false;
|
|
4360
|
+
/**
|
|
4361
|
+
* Auto-loaded SileroVAD when ``agent.vad`` is undefined. Populated by
|
|
4362
|
+
* ``initPipeline`` and queried alongside ``agent.vad`` on every audio frame.
|
|
4363
|
+
* Stays null when ``onnxruntime-node`` is not installed — the pipeline
|
|
4364
|
+
* then falls back to the STT-endpoint heuristic (legacy behaviour).
|
|
4365
|
+
*/
|
|
4366
|
+
autoVad = null;
|
|
4367
|
+
/**
|
|
4368
|
+
* Acoustic echo canceller (NLMS adaptive filter). Lazily instantiated in
|
|
4369
|
+
* ``initPipeline`` when ``agent.echoCancellation`` is true. ``null``
|
|
4370
|
+
* otherwise — the mic path stays a pure pass-through for handset /
|
|
4371
|
+
* headset deployments that don't have TTS bleed.
|
|
4372
|
+
*/
|
|
4373
|
+
aec = null;
|
|
3999
4374
|
/**
|
|
4000
4375
|
* Monotonic counter incremented on every TTS-start. The grace timer
|
|
4001
4376
|
* scheduled by ``endSpeakingWithGrace`` only flips ``isSpeaking=false``
|
|
@@ -4004,20 +4379,97 @@ var StreamHandler = class {
|
|
|
4004
4379
|
* own ``isSpeaking=true``.
|
|
4005
4380
|
*/
|
|
4006
4381
|
speakingGeneration = 0;
|
|
4382
|
+
/**
|
|
4383
|
+
* Wall-clock timestamp (ms since epoch) when the current TTS turn
|
|
4384
|
+
* started — captured by ``beginSpeaking`` and cleared by
|
|
4385
|
+
* ``cancelSpeaking`` / the grace flip. Used to gate barge-in: we
|
|
4386
|
+
* suppress the cancel for the first
|
|
4387
|
+
* ``MIN_AGENT_SPEAKING_MS_BEFORE_BARGE_IN_AEC`` of every turn (when AEC
|
|
4388
|
+
* is on) so the AEC filter has time to converge — otherwise residual
|
|
4389
|
+
* TTS bleed in the mic stream looks like user speech to VAD and
|
|
4390
|
+
* triggers an immediate self-cancellation of the agent's first
|
|
4391
|
+
* sentence.
|
|
4392
|
+
*/
|
|
4393
|
+
speakingStartedAt = null;
|
|
4394
|
+
/**
|
|
4395
|
+
* Minimum wall-clock duration (ms) the agent must have been speaking
|
|
4396
|
+
* before barge-in is allowed to fire when AEC is active. Covers the
|
|
4397
|
+
* AEC warmup window (~500 ms) plus a safety margin so residual bleed
|
|
4398
|
+
* during the convergence period does not self-trigger barge-in.
|
|
4399
|
+
*/
|
|
4400
|
+
static MIN_AGENT_SPEAKING_MS_BEFORE_BARGE_IN_AEC = 1e3;
|
|
4401
|
+
/**
|
|
4402
|
+
* Same as the AEC variant but for deployments where AEC is OFF
|
|
4403
|
+
* (default on PSTN — Twilio/Telnyx). Without an adaptive filter to
|
|
4404
|
+
* converge, the only justification for a gate is anti-flicker on
|
|
4405
|
+
* micro-events (cough, click). A short 250 ms window keeps real-user
|
|
4406
|
+
* barge-in responsive while still filtering tiny noise spikes.
|
|
4407
|
+
*/
|
|
4408
|
+
static MIN_AGENT_SPEAKING_MS_BEFORE_BARGE_IN_NO_AEC = 250;
|
|
4007
4409
|
/** Handle for the pending grace-period timer, so it can be cleared on cleanup. */
|
|
4008
4410
|
graceTimer = null;
|
|
4009
|
-
/**
|
|
4010
|
-
|
|
4411
|
+
/**
|
|
4412
|
+
* AbortController for the current LLM streaming consumption. Aborted by
|
|
4413
|
+
* ``cancelSpeaking`` so the in-flight LLM stream stops generating tokens
|
|
4414
|
+
* we will never speak — saves provider cost and frees the connection
|
|
4415
|
+
* earlier. Mirrors Python ``_llm_cancel_event``.
|
|
4416
|
+
*/
|
|
4417
|
+
llmAbort = null;
|
|
4418
|
+
/**
|
|
4419
|
+
* Wall-clock timestamp of the most recent ``cancelSpeaking`` call, or
|
|
4420
|
+
* ``null`` if no cancel has fired since the call started. Used by
|
|
4421
|
+
* ``beginSpeaking`` to enforce a short post-cancel drain window so the
|
|
4422
|
+
* remote PSTN player finishes flushing the previous turn's in-flight
|
|
4423
|
+
* audio before the next TTS chunk lands on top of it. Without this,
|
|
4424
|
+
* the first sentence of a post-barge-in turn audibly overlaps with
|
|
4425
|
+
* the tail of the cancelled turn (~50-200 ms of doubled audio).
|
|
4426
|
+
*/
|
|
4427
|
+
lastCancelAt = null;
|
|
4428
|
+
/**
|
|
4429
|
+
* Minimum drain window (ms) between a ``cancelSpeaking`` and the next
|
|
4430
|
+
* ``beginSpeaking``. 150 ms covers a typical PSTN jitter buffer drain
|
|
4431
|
+
* + Twilio Media Stream clear propagation. Lower values risk audio
|
|
4432
|
+
* overlap on the first chunk; higher values increase the perceived
|
|
4433
|
+
* "agent ack" latency after a barge-in. 150 ms is the smallest value
|
|
4434
|
+
* that consistently eliminated the overlap during 0.6.0 acceptance.
|
|
4435
|
+
*/
|
|
4436
|
+
static POST_CANCEL_DRAIN_MS = 150;
|
|
4437
|
+
/**
|
|
4438
|
+
* Mark the start of a TTS span. Use instead of setting isSpeaking
|
|
4439
|
+
* directly. Awaits the post-cancel drain window before flipping state
|
|
4440
|
+
* so the remote player has time to flush the cancelled turn's tail.
|
|
4441
|
+
*/
|
|
4442
|
+
async beginSpeaking() {
|
|
4443
|
+
if (this.lastCancelAt !== null) {
|
|
4444
|
+
const elapsed = Date.now() - this.lastCancelAt;
|
|
4445
|
+
const remaining = _StreamHandler.POST_CANCEL_DRAIN_MS - elapsed;
|
|
4446
|
+
if (remaining > 0) {
|
|
4447
|
+
await new Promise((r) => setTimeout(r, remaining));
|
|
4448
|
+
}
|
|
4449
|
+
}
|
|
4011
4450
|
this.speakingGeneration++;
|
|
4012
4451
|
this.isSpeaking = true;
|
|
4452
|
+
this.speakingStartedAt = Date.now();
|
|
4453
|
+
this.inboundAudioRing = [];
|
|
4013
4454
|
}
|
|
4014
4455
|
/**
|
|
4015
4456
|
* Atomically end speaking AND invalidate any pending grace timer.
|
|
4016
4457
|
* Use instead of ``this.isSpeaking = false`` at barge-in sites.
|
|
4458
|
+
*
|
|
4459
|
+
* Also aborts the in-flight LLM stream (if any) so the provider stops
|
|
4460
|
+
* billing tokens we will never speak.
|
|
4017
4461
|
*/
|
|
4018
4462
|
cancelSpeaking() {
|
|
4019
4463
|
this.speakingGeneration++;
|
|
4020
4464
|
this.isSpeaking = false;
|
|
4465
|
+
this.speakingStartedAt = null;
|
|
4466
|
+
this.lastCancelAt = Date.now();
|
|
4467
|
+
if (this.llmAbort !== null) {
|
|
4468
|
+
try {
|
|
4469
|
+
this.llmAbort.abort();
|
|
4470
|
+
} catch {
|
|
4471
|
+
}
|
|
4472
|
+
}
|
|
4021
4473
|
}
|
|
4022
4474
|
/** Cancel and clear the pending grace timer, if any. */
|
|
4023
4475
|
clearGraceTimer() {
|
|
@@ -4040,18 +4492,102 @@ var StreamHandler = class {
|
|
|
4040
4492
|
this.clearGraceTimer();
|
|
4041
4493
|
this.graceTimer = setTimeout(() => {
|
|
4042
4494
|
this.graceTimer = null;
|
|
4043
|
-
if (this.speakingGeneration === gen)
|
|
4495
|
+
if (this.speakingGeneration === gen) {
|
|
4496
|
+
this.isSpeaking = false;
|
|
4497
|
+
this.speakingStartedAt = null;
|
|
4498
|
+
}
|
|
4044
4499
|
}, grace);
|
|
4045
4500
|
} else {
|
|
4046
4501
|
this.isSpeaking = false;
|
|
4502
|
+
this.speakingStartedAt = null;
|
|
4047
4503
|
}
|
|
4048
4504
|
}
|
|
4505
|
+
/**
|
|
4506
|
+
* Whether barge-in is allowed to fire right now. Gate length depends
|
|
4507
|
+
* on whether AEC is active: 1 s with AEC (covers filter warmup),
|
|
4508
|
+
* 250 ms without (anti-flicker only — keeps PSTN barge-in responsive).
|
|
4509
|
+
*/
|
|
4510
|
+
canBargeIn() {
|
|
4511
|
+
if (this.speakingStartedAt === null) return true;
|
|
4512
|
+
const elapsed = Date.now() - this.speakingStartedAt;
|
|
4513
|
+
const gate = this.aec ? _StreamHandler.MIN_AGENT_SPEAKING_MS_BEFORE_BARGE_IN_AEC : _StreamHandler.MIN_AGENT_SPEAKING_MS_BEFORE_BARGE_IN_NO_AEC;
|
|
4514
|
+
return elapsed >= gate;
|
|
4515
|
+
}
|
|
4516
|
+
/**
|
|
4517
|
+
* Replay the audio captured by the self-hearing guard right before a
|
|
4518
|
+
* confirmed barge-in. VAD's ``minSpeechDuration`` window (default
|
|
4519
|
+
* 250 ms) means ``speech_start`` fires only AFTER the user has been
|
|
4520
|
+
* talking for that long; without this replay STT sees only the tail
|
|
4521
|
+
* of the user's interruption and produces "the line is breaking up"
|
|
4522
|
+
* partial transcripts. We deliberately do NOT call this on natural
|
|
4523
|
+
* turn end — see the comment in ``endSpeakingWithGrace`` for why.
|
|
4524
|
+
*/
|
|
4525
|
+
flushInboundAudioRing() {
|
|
4526
|
+
if (!this.stt || this.inboundAudioRing.length === 0) return;
|
|
4527
|
+
const replayed = this.inboundAudioRing.length;
|
|
4528
|
+
for (const buf of this.inboundAudioRing) {
|
|
4529
|
+
try {
|
|
4530
|
+
this.stt.sendAudio(buf);
|
|
4531
|
+
} catch (err) {
|
|
4532
|
+
getLogger().debug(`sendAudio replay failed: ${String(err)}`);
|
|
4533
|
+
}
|
|
4534
|
+
}
|
|
4535
|
+
this.inboundAudioRing = [];
|
|
4536
|
+
getLogger().info(
|
|
4537
|
+
`[DIAG] Flushed ${replayed} pre-barge-in frame(s) (~${replayed * 20} ms) to STT`
|
|
4538
|
+
);
|
|
4539
|
+
}
|
|
4049
4540
|
llmLoop = null;
|
|
4541
|
+
/**
|
|
4542
|
+
* Per-call tool executor — provides retry-with-exponential-backoff and a
|
|
4543
|
+
* per-tool circuit breaker for Realtime function calls. Pipeline mode
|
|
4544
|
+
* uses its own executor inside ``LLMLoop``; this one is dedicated to
|
|
4545
|
+
* the Realtime path so a flaky downstream (DB outage, vendor rate
|
|
4546
|
+
* limit) returns a structured ``{ error, fallback: true }`` instead of
|
|
4547
|
+
* hanging the model on retries that will keep failing.
|
|
4548
|
+
*/
|
|
4549
|
+
toolExecutor = new DefaultToolExecutor();
|
|
4550
|
+
/**
|
|
4551
|
+
* MCP server connection manager — populated lazily in
|
|
4552
|
+
* ``initMcpTools()`` when the agent declares ``mcpServers``. Holds
|
|
4553
|
+
* the open MCP client connections for the lifetime of the call so
|
|
4554
|
+
* we can dispatch ``tools/call`` without re-handshaking on every
|
|
4555
|
+
* function invocation. Cleared in ``fireCallEnd``.
|
|
4556
|
+
*/
|
|
4557
|
+
mcpManager = null;
|
|
4050
4558
|
chunkCount = 0;
|
|
4051
4559
|
callEndFired = false;
|
|
4052
4560
|
sttClosed = false;
|
|
4053
4561
|
currentAgentText = "";
|
|
4054
4562
|
responseAudioStarted = false;
|
|
4563
|
+
/**
|
|
4564
|
+
* Realtime turn ordering buffer. OpenAI Realtime emits
|
|
4565
|
+
* `input_audio_transcription.completed` (user transcript) AFTER
|
|
4566
|
+
* `response.done` (assistant complete) because Whisper transcription
|
|
4567
|
+
* runs in parallel with — and slower than — model response. Without
|
|
4568
|
+
* this buffer the pushed `history` order is [assistant, user, ...]
|
|
4569
|
+
* which renders out-of-order in the dashboard.
|
|
4570
|
+
*
|
|
4571
|
+
* Behaviour:
|
|
4572
|
+
* - `onAdapterSpeechStopped` flips `userTranscriptPending = true`
|
|
4573
|
+
* - `onAdapterResponseDone` checks the flag; if set, stashes the
|
|
4574
|
+
* assistant text + a fallback timer
|
|
4575
|
+
* - `onAdapterTranscriptInput` clears the flag, pushes user, then
|
|
4576
|
+
* flushes any pending assistant turn
|
|
4577
|
+
* - The fallback timer flushes the assistant alone if the user
|
|
4578
|
+
* transcript never arrives (silence misclassified as speech, etc.)
|
|
4579
|
+
*/
|
|
4580
|
+
userTranscriptPending = false;
|
|
4581
|
+
pendingAssistantTurn = null;
|
|
4582
|
+
pendingAssistantTimer = null;
|
|
4583
|
+
/**
|
|
4584
|
+
* Hard cap on how long we wait for the user transcript before flushing
|
|
4585
|
+
* the buffered assistant turn alone. 3 s covers OpenAI Whisper's typical
|
|
4586
|
+
* 200-800 ms post-response delay with substantial headroom for slow
|
|
4587
|
+
* cellular audio uploads. Beyond this we accept the order will look
|
|
4588
|
+
* "assistant-only" rather than block the call's transcript display.
|
|
4589
|
+
*/
|
|
4590
|
+
static REALTIME_USER_TRANSCRIPT_WAIT_MS = 3e3;
|
|
4055
4591
|
maxDurationTimer = null;
|
|
4056
4592
|
transcriptProcessing = false;
|
|
4057
4593
|
transcriptQueue = [];
|
|
@@ -4080,9 +4616,12 @@ var StreamHandler = class {
|
|
|
4080
4616
|
this.history = createHistoryManager(200);
|
|
4081
4617
|
const sttKey = deps.agent.stt?.constructor?.providerKey;
|
|
4082
4618
|
const sttProviderName = deps.agent.stt ? sttKey ?? deps.agent.stt.constructor?.name ?? "custom" : void 0;
|
|
4619
|
+
const sttModelName = String((deps.agent.stt?.model ?? "") || "");
|
|
4083
4620
|
const ttsKey = deps.agent.tts?.constructor?.providerKey;
|
|
4084
4621
|
const ttsProviderName = deps.agent.tts ? ttsKey ?? deps.agent.tts.constructor?.name ?? "custom" : void 0;
|
|
4622
|
+
const ttsModelName = String((deps.agent.tts?.model ?? "") || "");
|
|
4085
4623
|
const providerMode = deps.agent.provider ?? "openai_realtime";
|
|
4624
|
+
const realtimeModelName = providerMode === "openai_realtime" ? String((deps.agent.model ?? "") || "") || "gpt-realtime-mini" : "";
|
|
4086
4625
|
const llmKey = deps.agent.llm?.constructor?.providerKey;
|
|
4087
4626
|
let llmProviderName;
|
|
4088
4627
|
if (deps.agent.llm) {
|
|
@@ -4095,6 +4634,7 @@ var StreamHandler = class {
|
|
|
4095
4634
|
} else {
|
|
4096
4635
|
llmProviderName = providerMode === "openai_realtime" ? "openai_realtime" : "openai";
|
|
4097
4636
|
}
|
|
4637
|
+
this.llmProviderTag = llmProviderName;
|
|
4098
4638
|
this._eventBus = new EventBus();
|
|
4099
4639
|
this.metricsAcc = new CallMetricsAccumulator({
|
|
4100
4640
|
callId: "",
|
|
@@ -4103,6 +4643,9 @@ var StreamHandler = class {
|
|
|
4103
4643
|
sttProvider: sttProviderName,
|
|
4104
4644
|
ttsProvider: ttsProviderName,
|
|
4105
4645
|
llmProvider: llmProviderName,
|
|
4646
|
+
sttModel: sttModelName,
|
|
4647
|
+
ttsModel: ttsModelName,
|
|
4648
|
+
realtimeModel: realtimeModelName,
|
|
4106
4649
|
pricing: deps.pricing,
|
|
4107
4650
|
eventBus: this._eventBus,
|
|
4108
4651
|
reportOnlyInitialTtfb: deps.reportOnlyInitialTtfb ?? false
|
|
@@ -4213,6 +4756,7 @@ var StreamHandler = class {
|
|
|
4213
4756
|
* @param callId Call SID (Twilio) or call_control_id (Telnyx)
|
|
4214
4757
|
* @param customParams TwiML custom parameters (Twilio only, empty for Telnyx)
|
|
4215
4758
|
*/
|
|
4759
|
+
/** Initialize per-call state, build the AI adapter, and dispatch the `onCallStart` callback. */
|
|
4216
4760
|
async handleCallStart(callId, customParams = {}) {
|
|
4217
4761
|
this.callId = callId;
|
|
4218
4762
|
this.metricsAcc.callId = callId;
|
|
@@ -4239,7 +4783,7 @@ var StreamHandler = class {
|
|
|
4239
4783
|
}
|
|
4240
4784
|
}, MAX_CALL_DURATION_MS);
|
|
4241
4785
|
try {
|
|
4242
|
-
const { notifyDashboard } = await import("./persistence-
|
|
4786
|
+
const { notifyDashboard } = await import("./persistence-LVIAHESK.mjs");
|
|
4243
4787
|
notifyDashboard({
|
|
4244
4788
|
call_id: callId,
|
|
4245
4789
|
caller: this.caller,
|
|
@@ -4264,25 +4808,58 @@ var StreamHandler = class {
|
|
|
4264
4808
|
const allVars = { ...agentVars, ...safeCustomParams };
|
|
4265
4809
|
const resolvedPrompt = Object.keys(allVars).length > 0 ? this.deps.resolveVariables(this.deps.agent.systemPrompt, allVars) : this.deps.agent.systemPrompt;
|
|
4266
4810
|
const provider2 = this.deps.agent.provider ?? "openai_realtime";
|
|
4811
|
+
await this.initMcpTools();
|
|
4267
4812
|
if (provider2 === "pipeline") {
|
|
4268
4813
|
await this.initPipeline(resolvedPrompt);
|
|
4269
4814
|
} else {
|
|
4270
4815
|
await this.initRealtimeAdapter(resolvedPrompt);
|
|
4271
4816
|
}
|
|
4272
4817
|
}
|
|
4818
|
+
/**
|
|
4819
|
+
* Connect to every configured MCP server, discover their tools via
|
|
4820
|
+
* ``tools/list``, and merge them into ``agent.tools`` before the
|
|
4821
|
+
* adapter is built. The synthetic handlers dispatch back through the
|
|
4822
|
+
* MCP client so ``DefaultToolExecutor`` can invoke them like any
|
|
4823
|
+
* other handler-tool. No-op when ``agent.mcpServers`` is empty or the
|
|
4824
|
+
* optional ``@modelcontextprotocol/sdk`` is not installed.
|
|
4825
|
+
*/
|
|
4826
|
+
async initMcpTools() {
|
|
4827
|
+
const servers = this.deps.agent.mcpServers;
|
|
4828
|
+
if (!servers || servers.length === 0) return;
|
|
4829
|
+
this.mcpManager = new MCPManager(servers);
|
|
4830
|
+
let discovered;
|
|
4831
|
+
try {
|
|
4832
|
+
discovered = await this.mcpManager.connect();
|
|
4833
|
+
} catch (e) {
|
|
4834
|
+
getLogger().error(`MCP connect failed (continuing without MCP tools): ${String(e)}`);
|
|
4835
|
+
this.mcpManager = null;
|
|
4836
|
+
return;
|
|
4837
|
+
}
|
|
4838
|
+
if (discovered.length === 0) return;
|
|
4839
|
+
MCPManager.assertNoConflicts(this.deps.agent.tools, discovered);
|
|
4840
|
+
const mutableAgent = this.deps.agent;
|
|
4841
|
+
mutableAgent.tools = [...mutableAgent.tools ?? [], ...discovered];
|
|
4842
|
+
getLogger().info(`MCP: merged ${discovered.length} tool(s) into agent`);
|
|
4843
|
+
}
|
|
4273
4844
|
/** Set the stream SID (Twilio only, called after parsing 'start' event). */
|
|
4845
|
+
/** Set the carrier-side stream id (Twilio `streamSid` / Telnyx stream identifier). */
|
|
4274
4846
|
setStreamSid(sid) {
|
|
4275
4847
|
this.streamSid = sid;
|
|
4276
4848
|
}
|
|
4277
4849
|
/** Handle an incoming audio chunk (already decoded from base64). */
|
|
4850
|
+
/** Forward inbound audio bytes to the AI adapter and (in pipeline mode) the STT provider. */
|
|
4278
4851
|
async handleAudio(audioBuffer) {
|
|
4279
4852
|
const provider2 = this.deps.agent.provider ?? "openai_realtime";
|
|
4280
4853
|
if (provider2 === "pipeline" && this.stt) {
|
|
4281
4854
|
const pcm8k = mulawToPcm16(audioBuffer);
|
|
4282
|
-
|
|
4283
|
-
if (this.
|
|
4855
|
+
let pcm16k = this.inboundResampler.process(pcm8k);
|
|
4856
|
+
if (this.aec) {
|
|
4857
|
+
pcm16k = this.aec.processNearEnd(pcm16k);
|
|
4858
|
+
}
|
|
4859
|
+
const activeVad = this.deps.agent.vad ?? this.autoVad;
|
|
4860
|
+
if (activeVad && !this.vadDisabled) {
|
|
4284
4861
|
try {
|
|
4285
|
-
const vadPromise =
|
|
4862
|
+
const vadPromise = activeVad.processFrame(pcm16k, 16e3);
|
|
4286
4863
|
const timeoutPromise = new Promise((resolve) => setTimeout(() => resolve(null), 25));
|
|
4287
4864
|
const evt = await Promise.race([vadPromise, timeoutPromise]);
|
|
4288
4865
|
if (evt) {
|
|
@@ -4291,7 +4868,11 @@ var StreamHandler = class {
|
|
|
4291
4868
|
);
|
|
4292
4869
|
}
|
|
4293
4870
|
if (evt?.type === "speech_start") {
|
|
4294
|
-
if (this.isSpeaking) {
|
|
4871
|
+
if (this.isSpeaking && !this.canBargeIn()) {
|
|
4872
|
+
getLogger().info(
|
|
4873
|
+
`[VAD] speech_start suppressed (agent speaking < gate, aec=${this.aec ? "on" : "off"})`
|
|
4874
|
+
);
|
|
4875
|
+
} else if (this.isSpeaking) {
|
|
4295
4876
|
getLogger().info("[VAD] speech_start during TTS \u2192 BARGE-IN");
|
|
4296
4877
|
this.metricsAcc.recordOverlapStart();
|
|
4297
4878
|
this.metricsAcc.recordBargeinDetected();
|
|
@@ -4303,6 +4884,7 @@ var StreamHandler = class {
|
|
|
4303
4884
|
} catch (err) {
|
|
4304
4885
|
getLogger().debug(`sendClear during VAD barge-in failed: ${String(err)}`);
|
|
4305
4886
|
}
|
|
4887
|
+
this.flushInboundAudioRing();
|
|
4306
4888
|
this.metricsAcc.recordTtsStopped();
|
|
4307
4889
|
this.metricsAcc.recordTurnInterrupted();
|
|
4308
4890
|
this.metricsAcc.recordOverlapEnd(true);
|
|
@@ -4316,6 +4898,16 @@ var StreamHandler = class {
|
|
|
4316
4898
|
this.metricsAcc.startTurnIfIdle();
|
|
4317
4899
|
} else if (evt?.type === "speech_end") {
|
|
4318
4900
|
this.metricsAcc.recordVadStop();
|
|
4901
|
+
try {
|
|
4902
|
+
const ret = this.stt?.finalize?.();
|
|
4903
|
+
if (ret instanceof Promise) {
|
|
4904
|
+
ret.catch(
|
|
4905
|
+
(err) => getLogger().debug(`STT finalize threw: ${String(err)}`)
|
|
4906
|
+
);
|
|
4907
|
+
}
|
|
4908
|
+
} catch (err) {
|
|
4909
|
+
getLogger().debug(`STT finalize threw: ${String(err)}`);
|
|
4910
|
+
}
|
|
4319
4911
|
}
|
|
4320
4912
|
} catch (err) {
|
|
4321
4913
|
this.vadDisabled = true;
|
|
@@ -4323,7 +4915,13 @@ var StreamHandler = class {
|
|
|
4323
4915
|
}
|
|
4324
4916
|
}
|
|
4325
4917
|
if (this.isSpeaking) {
|
|
4326
|
-
if (this.deps.agent.vad)
|
|
4918
|
+
if (this.deps.agent.vad ?? this.autoVad) {
|
|
4919
|
+
this.inboundAudioRing.push(pcm16k);
|
|
4920
|
+
if (this.inboundAudioRing.length > _StreamHandler.INBOUND_AUDIO_RING_FRAMES) {
|
|
4921
|
+
this.inboundAudioRing.shift();
|
|
4922
|
+
}
|
|
4923
|
+
return;
|
|
4924
|
+
}
|
|
4327
4925
|
if ((this.deps.agent.bargeInThresholdMs ?? 300) === 0) return;
|
|
4328
4926
|
}
|
|
4329
4927
|
const hooks = this.deps.agent.hooks;
|
|
@@ -4349,6 +4947,7 @@ var StreamHandler = class {
|
|
|
4349
4947
|
}
|
|
4350
4948
|
}
|
|
4351
4949
|
/** Handle a DTMF keypress event (Twilio only). */
|
|
4950
|
+
/** Handle an inbound DTMF tone from the caller. */
|
|
4352
4951
|
async handleDtmf(digit) {
|
|
4353
4952
|
getLogger().debug(`DTMF: ${digit}`);
|
|
4354
4953
|
if (this.adapter instanceof OpenAIRealtimeAdapter) {
|
|
@@ -4371,12 +4970,14 @@ var StreamHandler = class {
|
|
|
4371
4970
|
* ``twilio_handler.py``: ``audio_sender.on_mark_confirmed(mark_name)`` +
|
|
4372
4971
|
* ``handler.on_mark(mark_name)``.
|
|
4373
4972
|
*/
|
|
4973
|
+
/** Handle a Twilio Media Streams `mark` event acknowledging audio playback boundaries. */
|
|
4374
4974
|
async onMark(markName) {
|
|
4375
4975
|
if (markName) {
|
|
4376
4976
|
this.lastConfirmedMark = markName;
|
|
4377
4977
|
}
|
|
4378
4978
|
}
|
|
4379
4979
|
/** Handle call stop / stream end. */
|
|
4980
|
+
/** Handle a carrier-emitted `stop` event signalling the call has ended. */
|
|
4380
4981
|
async handleStop() {
|
|
4381
4982
|
this.clearGraceTimer();
|
|
4382
4983
|
this.flushResamplers();
|
|
@@ -4388,6 +4989,7 @@ var StreamHandler = class {
|
|
|
4388
4989
|
await this.fireCallEnd();
|
|
4389
4990
|
}
|
|
4390
4991
|
/** Handle WebSocket close event. */
|
|
4992
|
+
/** Tear down adapter, STT/TTS, and per-call state when the carrier WebSocket closes. */
|
|
4391
4993
|
async handleWsClose() {
|
|
4392
4994
|
this.clearGraceTimer();
|
|
4393
4995
|
this.flushResamplers();
|
|
@@ -4422,7 +5024,7 @@ var StreamHandler = class {
|
|
|
4422
5024
|
* (server.ts) requests ``stream_bidirectional_codec=PCMU`` at 8 kHz. So
|
|
4423
5025
|
* the wire format for both providers is mulaw 8 kHz; we resample 16 kHz
|
|
4424
5026
|
* PCM16 → 8 kHz then encode to mulaw. Mirrors the Python pipeline path
|
|
4425
|
-
* (
|
|
5027
|
+
* (libraries/python/getpatter/handlers/telnyx_handler.py::TelnyxAudioSender).
|
|
4426
5028
|
*
|
|
4427
5029
|
* Maintains a 1-byte carry across calls so unaligned HTTP chunks from
|
|
4428
5030
|
* streaming TTS providers never byte-swap the PCM16 samples downstream.
|
|
@@ -4451,12 +5053,61 @@ var StreamHandler = class {
|
|
|
4451
5053
|
const label = this.deps.bridge.label;
|
|
4452
5054
|
this.stt = await this.deps.bridge.createStt(this.deps.agent);
|
|
4453
5055
|
this.tts = await createTTS(this.deps.agent);
|
|
5056
|
+
if (this.tts) {
|
|
5057
|
+
const carrierAware = this.tts;
|
|
5058
|
+
if (typeof carrierAware.setTelephonyCarrier === "function") {
|
|
5059
|
+
try {
|
|
5060
|
+
carrierAware.setTelephonyCarrier(this.deps.bridge.telephonyProvider);
|
|
5061
|
+
} catch (e) {
|
|
5062
|
+
getLogger().debug(`TTS setTelephonyCarrier failed (${label}): ${String(e)}`);
|
|
5063
|
+
}
|
|
5064
|
+
}
|
|
5065
|
+
}
|
|
4454
5066
|
if (!this.stt) {
|
|
4455
5067
|
getLogger().debug(`Pipeline mode (${label}): no STT configured`);
|
|
4456
5068
|
}
|
|
4457
5069
|
if (!this.tts) {
|
|
4458
5070
|
getLogger().debug(`Pipeline mode (${label}): no TTS configured`);
|
|
4459
5071
|
}
|
|
5072
|
+
if (!this.deps.agent.vad) {
|
|
5073
|
+
try {
|
|
5074
|
+
const { SileroVAD } = await import("./silero-vad-YLCXT5GQ.mjs");
|
|
5075
|
+
this.autoVad = await SileroVAD.forPhoneCall();
|
|
5076
|
+
getLogger().info(
|
|
5077
|
+
`auto-VAD enabled (SileroVAD, phone preset). Pass agent.vad=\u2026 to override.`
|
|
5078
|
+
);
|
|
5079
|
+
} catch (e) {
|
|
5080
|
+
const msg = e?.message ?? String(e);
|
|
5081
|
+
if (/Cannot find module|onnxruntime-node/i.test(msg)) {
|
|
5082
|
+
getLogger().info(
|
|
5083
|
+
"auto-VAD unavailable: onnxruntime-node not installed. Run `npm install onnxruntime-node@~1.18.0` for fast barge-in."
|
|
5084
|
+
);
|
|
5085
|
+
} else {
|
|
5086
|
+
getLogger().warn(
|
|
5087
|
+
`auto-VAD load failed (${msg}); falling back to STT-endpoint heuristic`
|
|
5088
|
+
);
|
|
5089
|
+
}
|
|
5090
|
+
}
|
|
5091
|
+
}
|
|
5092
|
+
if (this.deps.agent.echoCancellation) {
|
|
5093
|
+
const carrier = this.deps.bridge.telephonyProvider;
|
|
5094
|
+
if (carrier === "twilio" || carrier === "telnyx") {
|
|
5095
|
+
getLogger().warn(
|
|
5096
|
+
`echoCancellation: true on ${carrier} (PSTN). Server-side NLMS cannot model PSTN's ~250\u20131500 ms round-trip echo with a 32 ms filter window \u2014 it will silently no-op. Best practice: keep echoCancellation: false; rely on the carrier + caller device's built-in echo suppression and Patter's self-hearing guard. Enable AEC only for browser/native deployments where the SDK owns the audio path end-to-end.`
|
|
5097
|
+
);
|
|
5098
|
+
}
|
|
5099
|
+
try {
|
|
5100
|
+
const { NlmsEchoCanceller } = await import("./aec-PJJMUM5E.mjs");
|
|
5101
|
+
this.aec = new NlmsEchoCanceller({ sampleRate: 16e3 });
|
|
5102
|
+
getLogger().info(
|
|
5103
|
+
"echo cancellation enabled (NLMS, 512 taps + 0.5 s warmup \u03BC=0.5); filter converges within ~250 ms of TTS playback in low-latency loops."
|
|
5104
|
+
);
|
|
5105
|
+
} catch (e) {
|
|
5106
|
+
getLogger().warn(
|
|
5107
|
+
`echo cancellation requested but failed to load: ${String(e)}; falling back to pass-through.`
|
|
5108
|
+
);
|
|
5109
|
+
}
|
|
5110
|
+
}
|
|
4460
5111
|
try {
|
|
4461
5112
|
if (this.stt) await this.stt.connect();
|
|
4462
5113
|
getLogger().debug(`Pipeline mode (${label}): STT + TTS connected`);
|
|
@@ -4470,13 +5121,19 @@ var StreamHandler = class {
|
|
|
4470
5121
|
}
|
|
4471
5122
|
if (this.deps.agent.firstMessage && !this.deps.onMessage && this.tts) {
|
|
4472
5123
|
this.metricsAcc.startTurn();
|
|
5124
|
+
await this.beginSpeaking();
|
|
4473
5125
|
let firstChunkSent = false;
|
|
4474
5126
|
this.resetTtsCarry();
|
|
4475
5127
|
try {
|
|
4476
5128
|
for await (const chunk of this.tts.synthesizeStream(this.deps.agent.firstMessage)) {
|
|
5129
|
+
if (!this.isSpeaking) break;
|
|
4477
5130
|
if (!firstChunkSent) {
|
|
4478
5131
|
firstChunkSent = true;
|
|
4479
5132
|
this.metricsAcc.recordTtsFirstByte();
|
|
5133
|
+
await this.emitAudioOut();
|
|
5134
|
+
}
|
|
5135
|
+
if (this.aec) {
|
|
5136
|
+
this.aec.pushFarEnd(chunk);
|
|
4480
5137
|
}
|
|
4481
5138
|
const encoded = this.encodePipelineAudio(chunk);
|
|
4482
5139
|
this.deps.bridge.sendAudio(this.ws, encoded, this.streamSid);
|
|
@@ -4485,6 +5142,7 @@ var StreamHandler = class {
|
|
|
4485
5142
|
getLogger().error(`First message TTS error (${label}):`, e);
|
|
4486
5143
|
} finally {
|
|
4487
5144
|
this.resetTtsCarry();
|
|
5145
|
+
this.endSpeakingWithGrace();
|
|
4488
5146
|
}
|
|
4489
5147
|
if (firstChunkSent) {
|
|
4490
5148
|
await this.emitTurnMetrics(this.metricsAcc.recordTurnComplete(this.deps.agent.firstMessage));
|
|
@@ -4505,9 +5163,11 @@ var StreamHandler = class {
|
|
|
4505
5163
|
// propagate so calculateLlmCost can match the price row
|
|
4506
5164
|
resolvedPrompt,
|
|
4507
5165
|
this.deps.agent.tools,
|
|
4508
|
-
this.deps.agent.llm
|
|
5166
|
+
this.deps.agent.llm,
|
|
5167
|
+
this.deps.agent.disablePhonePreamble ?? false
|
|
4509
5168
|
);
|
|
4510
5169
|
this.llmLoop.setEventBus(this._eventBus);
|
|
5170
|
+
this.llmLoop.setOnToolCall((n, a, r) => this.recordToolCall(n, a, r));
|
|
4511
5171
|
const llmLabel = this.deps.agent.llm.constructor?.name ?? "custom";
|
|
4512
5172
|
getLogger().debug(`Built-in LLM loop active (pipeline, ${label}, llm=${llmLabel})`);
|
|
4513
5173
|
} else if (!this.deps.onMessage && this.deps.config.openaiKey) {
|
|
@@ -4517,9 +5177,12 @@ var StreamHandler = class {
|
|
|
4517
5177
|
this.deps.config.openaiKey,
|
|
4518
5178
|
llmModel,
|
|
4519
5179
|
resolvedPrompt,
|
|
4520
|
-
this.deps.agent.tools
|
|
5180
|
+
this.deps.agent.tools,
|
|
5181
|
+
void 0,
|
|
5182
|
+
this.deps.agent.disablePhonePreamble ?? false
|
|
4521
5183
|
);
|
|
4522
5184
|
this.llmLoop.setEventBus(this._eventBus);
|
|
5185
|
+
this.llmLoop.setOnToolCall((n, a, r) => this.recordToolCall(n, a, r));
|
|
4523
5186
|
getLogger().debug(`Built-in LLM loop active (pipeline, ${label})`);
|
|
4524
5187
|
}
|
|
4525
5188
|
if (this.stt) {
|
|
@@ -4555,9 +5218,14 @@ var StreamHandler = class {
|
|
|
4555
5218
|
if (!this.isSpeaking) break;
|
|
4556
5219
|
const processedAudio = await hookExecutor.runAfterSynthesize(chunk, processedText, hookCtx);
|
|
4557
5220
|
if (processedAudio === null) continue;
|
|
5221
|
+
if (!this.isSpeaking) break;
|
|
4558
5222
|
if (!ttsFirstByteSent.value) {
|
|
4559
5223
|
ttsFirstByteSent.value = true;
|
|
4560
5224
|
this.metricsAcc.recordTtsFirstByte();
|
|
5225
|
+
await this.emitAudioOut();
|
|
5226
|
+
}
|
|
5227
|
+
if (this.aec) {
|
|
5228
|
+
this.aec.pushFarEnd(processedAudio);
|
|
4561
5229
|
}
|
|
4562
5230
|
const encoded = this.encodePipelineAudio(processedAudio);
|
|
4563
5231
|
this.deps.bridge.sendAudio(this.ws, encoded, this.streamSid);
|
|
@@ -4583,6 +5251,9 @@ var StreamHandler = class {
|
|
|
4583
5251
|
}
|
|
4584
5252
|
}
|
|
4585
5253
|
async processTranscript(transcript) {
|
|
5254
|
+
getLogger().info(
|
|
5255
|
+
`[DIAG] processTranscript text=${JSON.stringify((transcript.text ?? "").slice(0, 60))} isFinal=${transcript.isFinal} speechFinal=${transcript.speechFinal} isSpeaking=${this.isSpeaking}`
|
|
5256
|
+
);
|
|
4586
5257
|
let interrupted = this.handleBargeIn(transcript);
|
|
4587
5258
|
if (transcript.text) {
|
|
4588
5259
|
this.metricsAcc.startTurnIfIdle();
|
|
@@ -4593,6 +5264,9 @@ var StreamHandler = class {
|
|
|
4593
5264
|
if (!transcript.isFinal || !transcript.text) return;
|
|
4594
5265
|
if (!this.commitTranscript(transcript.text)) return;
|
|
4595
5266
|
const label = this.deps.bridge.label;
|
|
5267
|
+
getLogger().info(
|
|
5268
|
+
`[DIAG] processTranscript COMMITTED \u2192 LLM (${label} pipeline): ${sanitizeLogValue(transcript.text.slice(0, 80))}`
|
|
5269
|
+
);
|
|
4596
5270
|
getLogger().debug(`User (${label} pipeline): ${sanitizeLogValue(transcript.text)}`);
|
|
4597
5271
|
this.metricsAcc.startTurnIfIdle();
|
|
4598
5272
|
this.metricsAcc.recordSttComplete(transcript.text);
|
|
@@ -4672,7 +5346,7 @@ var StreamHandler = class {
|
|
|
4672
5346
|
}
|
|
4673
5347
|
if (!responseText) return;
|
|
4674
5348
|
if (this.llmLoop) {
|
|
4675
|
-
this.
|
|
5349
|
+
await this.emitAssistantTranscript(responseText);
|
|
4676
5350
|
this.metricsAcc.recordTtsComplete(responseText);
|
|
4677
5351
|
} else {
|
|
4678
5352
|
interrupted = await this.runRegularLlm(responseText, hookExecutor, hookCtx) || interrupted;
|
|
@@ -4690,6 +5364,12 @@ var StreamHandler = class {
|
|
|
4690
5364
|
*/
|
|
4691
5365
|
handleBargeIn(transcript) {
|
|
4692
5366
|
if (!transcript.text || !this.isSpeaking) return false;
|
|
5367
|
+
if (!this.canBargeIn()) {
|
|
5368
|
+
getLogger().info(
|
|
5369
|
+
`Barge-in transcript suppressed (agent speaking < gate, aec=${this.aec ? "on" : "off"})`
|
|
5370
|
+
);
|
|
5371
|
+
return false;
|
|
5372
|
+
}
|
|
4693
5373
|
getLogger().debug(
|
|
4694
5374
|
`Barge-in: caller spoke over agent (${sanitizeLogValue(transcript.text.slice(0, 40))})`
|
|
4695
5375
|
);
|
|
@@ -4755,16 +5435,26 @@ var StreamHandler = class {
|
|
|
4755
5435
|
async runPipelineLlm(filteredTranscript, hookExecutor, hookCtx) {
|
|
4756
5436
|
const label = this.deps.bridge.label;
|
|
4757
5437
|
const callCtx = { call_id: this.callId, caller: this.caller, callee: this.callee };
|
|
4758
|
-
const chunker = new SentenceChunker(
|
|
5438
|
+
const chunker = new SentenceChunker({
|
|
5439
|
+
aggressiveFirstFlush: this.deps.agent.aggressiveFirstFlush ?? false,
|
|
5440
|
+
language: this.deps.agent.language
|
|
5441
|
+
});
|
|
4759
5442
|
const allParts = [];
|
|
4760
5443
|
const ttsFirstByteSent = { value: false };
|
|
4761
|
-
this.beginSpeaking();
|
|
5444
|
+
await this.beginSpeaking();
|
|
5445
|
+
this.llmAbort = new AbortController();
|
|
5446
|
+
const llmSignal = this.llmAbort.signal;
|
|
4762
5447
|
let llmError = false;
|
|
4763
5448
|
const llmSpan = startSpan(SPAN_LLM, { "patter.call.id": this.callId });
|
|
4764
5449
|
const guardAndSpeak = async (sentence, isFirst) => {
|
|
4765
5450
|
if (isFirst) this.metricsAcc.recordLlmFirstSentenceComplete();
|
|
4766
5451
|
const guard = checkGuardrails(sentence, this.deps.agent.guardrails);
|
|
4767
|
-
|
|
5452
|
+
let sentenceText = guard ? guard.replacement ?? "I'm sorry, I can't respond to that." : sentence;
|
|
5453
|
+
if (hookExecutor.hasAfterLlmSentence()) {
|
|
5454
|
+
const transformed = await hookExecutor.runAfterLlmSentence(sentenceText, hookCtx);
|
|
5455
|
+
if (transformed === null) return;
|
|
5456
|
+
sentenceText = transformed;
|
|
5457
|
+
}
|
|
4768
5458
|
await this.synthesizeSentence(sentenceText, hookExecutor, hookCtx, ttsFirstByteSent);
|
|
4769
5459
|
};
|
|
4770
5460
|
let firstSentenceEmitted = false;
|
|
@@ -4776,22 +5466,28 @@ var StreamHandler = class {
|
|
|
4776
5466
|
callCtx,
|
|
4777
5467
|
this.metricsAcc,
|
|
4778
5468
|
hookExecutor,
|
|
4779
|
-
hookCtx
|
|
5469
|
+
hookCtx,
|
|
5470
|
+
{ signal: llmSignal }
|
|
4780
5471
|
)) {
|
|
5472
|
+
if (llmSignal.aborted) break;
|
|
4781
5473
|
this.metricsAcc.recordLlmFirstToken();
|
|
5474
|
+
await this.emitLlmFirstToken();
|
|
4782
5475
|
allParts.push(token);
|
|
4783
5476
|
for (const sentence of chunker.push(token)) {
|
|
4784
5477
|
if (!this.isSpeaking) break;
|
|
4785
5478
|
await guardAndSpeak(sentence, !firstSentenceEmitted);
|
|
4786
5479
|
firstSentenceEmitted = true;
|
|
4787
5480
|
}
|
|
4788
|
-
if (!this.isSpeaking) break;
|
|
5481
|
+
if (!this.isSpeaking || llmSignal.aborted) break;
|
|
4789
5482
|
}
|
|
4790
5483
|
} catch (e) {
|
|
4791
|
-
|
|
4792
|
-
|
|
4793
|
-
|
|
4794
|
-
|
|
5484
|
+
const isAbort = e?.name === "AbortError" || llmSignal.aborted;
|
|
5485
|
+
if (!isAbort) {
|
|
5486
|
+
llmError = true;
|
|
5487
|
+
chunker.reset();
|
|
5488
|
+
getLogger().error(`LLM loop error (${label}):`, e);
|
|
5489
|
+
this.metricsAcc.recordTurnInterrupted();
|
|
5490
|
+
}
|
|
4795
5491
|
}
|
|
4796
5492
|
this.metricsAcc.recordLlmComplete();
|
|
4797
5493
|
if (!llmError && this.isSpeaking) {
|
|
@@ -4803,6 +5499,7 @@ var StreamHandler = class {
|
|
|
4803
5499
|
}
|
|
4804
5500
|
} finally {
|
|
4805
5501
|
this.endSpeakingWithGrace();
|
|
5502
|
+
this.llmAbort = null;
|
|
4806
5503
|
try {
|
|
4807
5504
|
llmSpan.end();
|
|
4808
5505
|
} catch {
|
|
@@ -4823,11 +5520,11 @@ var StreamHandler = class {
|
|
|
4823
5520
|
text = guard.replacement ?? "I'm sorry, I can't respond to that.";
|
|
4824
5521
|
}
|
|
4825
5522
|
this.metricsAcc.recordLlmComplete();
|
|
4826
|
-
this.
|
|
5523
|
+
await this.emitAssistantTranscript(text);
|
|
4827
5524
|
const chunker = new SentenceChunker();
|
|
4828
5525
|
const sentences = [...chunker.push(text), ...chunker.flush()];
|
|
4829
5526
|
const ttsFirstByteSent = { value: false };
|
|
4830
|
-
this.beginSpeaking();
|
|
5527
|
+
await this.beginSpeaking();
|
|
4831
5528
|
let interrupted = false;
|
|
4832
5529
|
try {
|
|
4833
5530
|
for (const sentence of sentences) {
|
|
@@ -4835,7 +5532,13 @@ var StreamHandler = class {
|
|
|
4835
5532
|
interrupted = true;
|
|
4836
5533
|
break;
|
|
4837
5534
|
}
|
|
4838
|
-
|
|
5535
|
+
let sentenceText = sentence;
|
|
5536
|
+
if (hookExecutor.hasAfterLlmSentence()) {
|
|
5537
|
+
const transformed = await hookExecutor.runAfterLlmSentence(sentenceText, hookCtx);
|
|
5538
|
+
if (transformed === null) continue;
|
|
5539
|
+
sentenceText = transformed;
|
|
5540
|
+
}
|
|
5541
|
+
await this.synthesizeSentence(sentenceText, hookExecutor, hookCtx, ttsFirstByteSent);
|
|
4839
5542
|
}
|
|
4840
5543
|
} finally {
|
|
4841
5544
|
this.endSpeakingWithGrace();
|
|
@@ -4848,7 +5551,7 @@ var StreamHandler = class {
|
|
|
4848
5551
|
const onMessage = this.deps.onMessage;
|
|
4849
5552
|
const parts = [];
|
|
4850
5553
|
this.metricsAcc.recordLlmComplete();
|
|
4851
|
-
this.beginSpeaking();
|
|
5554
|
+
await this.beginSpeaking();
|
|
4852
5555
|
let wsTtsStarted = false;
|
|
4853
5556
|
try {
|
|
4854
5557
|
for await (const chunk of this.deps.remoteHandler.callWebSocket(onMessage, msgData)) {
|
|
@@ -4860,6 +5563,7 @@ var StreamHandler = class {
|
|
|
4860
5563
|
if (!wsTtsStarted) {
|
|
4861
5564
|
wsTtsStarted = true;
|
|
4862
5565
|
this.metricsAcc.recordTtsFirstByte();
|
|
5566
|
+
await this.emitAudioOut();
|
|
4863
5567
|
}
|
|
4864
5568
|
const encoded = this.encodePipelineAudio(audioChunk);
|
|
4865
5569
|
this.deps.bridge.sendAudio(this.ws, encoded, this.streamSid);
|
|
@@ -4875,7 +5579,7 @@ var StreamHandler = class {
|
|
|
4875
5579
|
const responseText = parts.join("");
|
|
4876
5580
|
this.metricsAcc.recordTtsComplete(responseText);
|
|
4877
5581
|
await this.emitTurnMetrics(this.metricsAcc.recordTurnComplete(responseText));
|
|
4878
|
-
if (responseText) this.
|
|
5582
|
+
if (responseText) await this.emitAssistantTranscript(responseText);
|
|
4879
5583
|
}
|
|
4880
5584
|
// ---------------------------------------------------------------------------
|
|
4881
5585
|
// Private: OpenAI Realtime / ElevenLabs ConvAI mode
|
|
@@ -4897,7 +5601,8 @@ var StreamHandler = class {
|
|
|
4897
5601
|
if (this.deps.agent.firstMessage) {
|
|
4898
5602
|
this.metricsAcc.startTurn();
|
|
4899
5603
|
if (this.adapter instanceof OpenAIRealtimeAdapter) {
|
|
4900
|
-
|
|
5604
|
+
const sender = typeof this.adapter.sendFirstMessage === "function" ? this.adapter.sendFirstMessage.bind(this.adapter) : this.adapter.sendText.bind(this.adapter);
|
|
5605
|
+
await sender(this.deps.agent.firstMessage);
|
|
4901
5606
|
}
|
|
4902
5607
|
}
|
|
4903
5608
|
this.adapter.onEvent(async (type, eventData) => {
|
|
@@ -4927,21 +5632,87 @@ var StreamHandler = class {
|
|
|
4927
5632
|
}
|
|
4928
5633
|
}
|
|
4929
5634
|
};
|
|
5635
|
+
// ---- Speech-event helpers ------------------------------------------
|
|
5636
|
+
// No-op when the deps don't include a SpeechEvents dispatcher. Tracks
|
|
5637
|
+
// wall-clock for `speech_duration_ms` payloads.
|
|
5638
|
+
userSpeechStartMs = null;
|
|
5639
|
+
agentTurnStartMs = null;
|
|
5640
|
+
async emitUserSpeechStarted() {
|
|
5641
|
+
if (!this.deps.speechEvents) return;
|
|
5642
|
+
this.userSpeechStartMs = Date.now();
|
|
5643
|
+
await this.deps.speechEvents.fireUserSpeechStarted();
|
|
5644
|
+
}
|
|
5645
|
+
async emitUserSpeechEnded() {
|
|
5646
|
+
if (!this.deps.speechEvents) return;
|
|
5647
|
+
const duration = this.userSpeechStartMs !== null ? Math.max(0, Date.now() - this.userSpeechStartMs) : 0;
|
|
5648
|
+
this.userSpeechStartMs = null;
|
|
5649
|
+
await this.deps.speechEvents.fireUserSpeechEnded({
|
|
5650
|
+
speechDurationMs: duration
|
|
5651
|
+
});
|
|
5652
|
+
}
|
|
5653
|
+
async emitUserSpeechEos(transcriptSoFar) {
|
|
5654
|
+
if (!this.deps.speechEvents) return;
|
|
5655
|
+
await this.deps.speechEvents.fireUserSpeechEos({
|
|
5656
|
+
trigger: "vad_silence",
|
|
5657
|
+
transcriptSoFar
|
|
5658
|
+
});
|
|
5659
|
+
}
|
|
5660
|
+
async emitAgentSpeechStarted() {
|
|
5661
|
+
if (!this.deps.speechEvents) return;
|
|
5662
|
+
this.agentTurnStartMs = Date.now();
|
|
5663
|
+
const ttsKey = this.deps.agent.tts?.constructor?.providerKey;
|
|
5664
|
+
await this.deps.speechEvents.fireAgentSpeechStarted({
|
|
5665
|
+
ttsProvider: ttsKey,
|
|
5666
|
+
engine: this.deps.agent.provider ?? "openai_realtime"
|
|
5667
|
+
});
|
|
5668
|
+
}
|
|
5669
|
+
async emitAgentSpeechEnded(interrupted) {
|
|
5670
|
+
if (!this.deps.speechEvents) return;
|
|
5671
|
+
if (this.agentTurnStartMs === null) return;
|
|
5672
|
+
const duration = Math.max(0, Date.now() - this.agentTurnStartMs);
|
|
5673
|
+
this.agentTurnStartMs = null;
|
|
5674
|
+
await this.deps.speechEvents.fireAgentSpeechEnded({
|
|
5675
|
+
speechDurationMs: duration,
|
|
5676
|
+
interrupted
|
|
5677
|
+
});
|
|
5678
|
+
}
|
|
5679
|
+
/** Fire the per-turn LLM TTFT marker. Idempotent in the dispatcher
|
|
5680
|
+
* — guarded by `firstTokenForTurn` on the SpeechEvents instance. */
|
|
5681
|
+
async emitLlmFirstToken() {
|
|
5682
|
+
if (!this.deps.speechEvents) return;
|
|
5683
|
+
await this.deps.speechEvents.fireLlmFirstToken({
|
|
5684
|
+
llmProvider: this.llmProviderTag,
|
|
5685
|
+
model: this.deps.agent.model ?? ""
|
|
5686
|
+
});
|
|
5687
|
+
}
|
|
5688
|
+
/** Fire the per-turn first-TTS-audio marker. Idempotent in the
|
|
5689
|
+
* dispatcher — guarded by `firstAudioForTurn`. The provider tag falls
|
|
5690
|
+
* back to the engine name for Realtime / ConvAI (no separate TTS). */
|
|
5691
|
+
async emitAudioOut() {
|
|
5692
|
+
if (!this.deps.speechEvents) return;
|
|
5693
|
+
const ttsKey = this.deps.agent.tts?.constructor?.providerKey;
|
|
5694
|
+
const provider2 = ttsKey ?? this.deps.agent.provider ?? "openai_realtime";
|
|
5695
|
+
await this.deps.speechEvents.fireAudioOut({ ttsProvider: provider2 });
|
|
5696
|
+
}
|
|
4930
5697
|
async onAdapterAudio(eventData) {
|
|
4931
5698
|
if (!this.responseAudioStarted) {
|
|
4932
5699
|
this.responseAudioStarted = true;
|
|
4933
5700
|
if (this.metricsAcc.turnActive === false) this.metricsAcc.startTurn();
|
|
4934
5701
|
this.metricsAcc.recordTtsFirstByte();
|
|
5702
|
+
await this.emitAgentSpeechStarted();
|
|
5703
|
+
await this.emitAudioOut();
|
|
4935
5704
|
}
|
|
4936
5705
|
const outAudio = eventData;
|
|
4937
5706
|
this.deps.bridge.sendAudio(this.ws, outAudio.toString("base64"), this.streamSid);
|
|
4938
5707
|
this.chunkCount++;
|
|
4939
5708
|
this.deps.bridge.sendMark(this.ws, `audio_${this.chunkCount}`, this.streamSid);
|
|
4940
5709
|
}
|
|
4941
|
-
onAdapterSpeechStopped() {
|
|
5710
|
+
async onAdapterSpeechStopped() {
|
|
4942
5711
|
if (!this.metricsAcc.turnActive) this.metricsAcc.startTurn();
|
|
4943
5712
|
this.currentAgentText = "";
|
|
4944
5713
|
this.responseAudioStarted = false;
|
|
5714
|
+
this.userTranscriptPending = true;
|
|
5715
|
+
await this.emitUserSpeechEnded();
|
|
4945
5716
|
}
|
|
4946
5717
|
async onAdapterTranscriptInput(inputText) {
|
|
4947
5718
|
getLogger().debug(`User (${this.deps.bridge.label}): ${sanitizeLogValue(inputText)}`);
|
|
@@ -4951,6 +5722,7 @@ var StreamHandler = class {
|
|
|
4951
5722
|
this.currentAgentText = "";
|
|
4952
5723
|
this.responseAudioStarted = false;
|
|
4953
5724
|
}
|
|
5725
|
+
await this.emitUserSpeechEos(inputText);
|
|
4954
5726
|
this.metricsAcc.recordSttComplete(inputText);
|
|
4955
5727
|
if (this.deps.onTranscript) {
|
|
4956
5728
|
await this.deps.onTranscript({
|
|
@@ -4960,9 +5732,97 @@ var StreamHandler = class {
|
|
|
4960
5732
|
history: [...this.history.entries]
|
|
4961
5733
|
});
|
|
4962
5734
|
}
|
|
5735
|
+
this.userTranscriptPending = false;
|
|
5736
|
+
if (this.pendingAssistantTurn !== null) {
|
|
5737
|
+
const buffered = this.pendingAssistantTurn;
|
|
5738
|
+
this.pendingAssistantTurn = null;
|
|
5739
|
+
if (this.pendingAssistantTimer) {
|
|
5740
|
+
clearTimeout(this.pendingAssistantTimer);
|
|
5741
|
+
this.pendingAssistantTimer = null;
|
|
5742
|
+
}
|
|
5743
|
+
await this.flushAssistantTurn(buffered);
|
|
5744
|
+
}
|
|
5745
|
+
}
|
|
5746
|
+
/**
|
|
5747
|
+
* Push an assistant turn into history, fire `onTranscript`, and emit
|
|
5748
|
+
* turn-complete metrics. Shared between the immediate path (no user
|
|
5749
|
+
* transcript pending) and the buffered path (flushed after user
|
|
5750
|
+
* transcript arrives or fallback timer fires).
|
|
5751
|
+
*/
|
|
5752
|
+
async flushAssistantTurn(text) {
|
|
5753
|
+
this.history.push({ role: "assistant", text, timestamp: Date.now() });
|
|
5754
|
+
if (this.deps.onTranscript) {
|
|
5755
|
+
await this.deps.onTranscript({
|
|
5756
|
+
role: "assistant",
|
|
5757
|
+
text,
|
|
5758
|
+
call_id: this.callId,
|
|
5759
|
+
history: [...this.history.entries]
|
|
5760
|
+
});
|
|
5761
|
+
}
|
|
5762
|
+
this.responseAudioStarted = false;
|
|
5763
|
+
await this.emitTurnMetrics(this.metricsAcc.recordTurnComplete(text));
|
|
5764
|
+
}
|
|
5765
|
+
/**
|
|
5766
|
+
* Push an assistant turn into history and fire `onTranscript` so host
|
|
5767
|
+
* applications observe pipeline-mode replies the same way they observe
|
|
5768
|
+
* realtime-mode replies. Mirrors `_emit_assistant_transcript` in the
|
|
5769
|
+
* Python SDK and parallels `flushAssistantTurn` (realtime path).
|
|
5770
|
+
* Caller is responsible for filtering empty strings.
|
|
5771
|
+
*/
|
|
5772
|
+
async emitAssistantTranscript(text) {
|
|
5773
|
+
this.history.push({ role: "assistant", text, timestamp: Date.now() });
|
|
5774
|
+
if (this.deps.onTranscript) {
|
|
5775
|
+
await this.deps.onTranscript({
|
|
5776
|
+
role: "assistant",
|
|
5777
|
+
text,
|
|
5778
|
+
call_id: this.callId,
|
|
5779
|
+
history: [...this.history.entries]
|
|
5780
|
+
});
|
|
5781
|
+
}
|
|
5782
|
+
}
|
|
5783
|
+
/**
|
|
5784
|
+
* Surface a tool invocation from pipeline mode into the transcript
|
|
5785
|
+
* timeline. Emits TWO events: one for the call (`name(argsJson)`) and
|
|
5786
|
+
* one for the result (`name(...) → result`, truncated to 200 chars).
|
|
5787
|
+
* Mirrors realtime mode's two `emitToolEvent` calls in
|
|
5788
|
+
* `handleFunctionCall`. Wired as the `LLMLoop` `onToolCall` observer.
|
|
5789
|
+
*/
|
|
5790
|
+
async recordToolCall(name, args, result) {
|
|
5791
|
+
let argsText;
|
|
5792
|
+
try {
|
|
5793
|
+
argsText = JSON.stringify(args ?? {});
|
|
5794
|
+
} catch {
|
|
5795
|
+
argsText = "{}";
|
|
5796
|
+
}
|
|
5797
|
+
const callText = `${name}(${argsText})`;
|
|
5798
|
+
this.history.push({ role: "tool", text: callText, timestamp: Date.now() });
|
|
5799
|
+
if (this.deps.onTranscript) {
|
|
5800
|
+
await this.deps.onTranscript({
|
|
5801
|
+
role: "tool",
|
|
5802
|
+
text: callText,
|
|
5803
|
+
call_id: this.callId,
|
|
5804
|
+
tool_name: name,
|
|
5805
|
+
tool_args: args ?? {},
|
|
5806
|
+
tool_result: null
|
|
5807
|
+
});
|
|
5808
|
+
}
|
|
5809
|
+
const displayed = result.length > 200 ? result.slice(0, 200) + "\u2026" : result;
|
|
5810
|
+
const resText = `${name}(...) \u2192 ${displayed}`;
|
|
5811
|
+
this.history.push({ role: "tool", text: resText, timestamp: Date.now() });
|
|
5812
|
+
if (this.deps.onTranscript) {
|
|
5813
|
+
await this.deps.onTranscript({
|
|
5814
|
+
role: "tool",
|
|
5815
|
+
text: resText,
|
|
5816
|
+
call_id: this.callId,
|
|
5817
|
+
tool_name: name,
|
|
5818
|
+
tool_args: args ?? {},
|
|
5819
|
+
tool_result: result
|
|
5820
|
+
});
|
|
5821
|
+
}
|
|
4963
5822
|
}
|
|
4964
5823
|
async onAdapterTranscriptOutput(outputText) {
|
|
4965
5824
|
if (!outputText) return;
|
|
5825
|
+
await this.emitLlmFirstToken();
|
|
4966
5826
|
const triggered = checkGuardrails(outputText, this.deps.agent.guardrails);
|
|
4967
5827
|
if (triggered) {
|
|
4968
5828
|
getLogger().debug(`Guardrail '${triggered.name}' triggered`);
|
|
@@ -4976,24 +5836,75 @@ var StreamHandler = class {
|
|
|
4976
5836
|
async onAdapterResponseDone(responseData) {
|
|
4977
5837
|
if (responseData) {
|
|
4978
5838
|
const usage = responseData.usage;
|
|
4979
|
-
if (usage)
|
|
5839
|
+
if (usage) {
|
|
5840
|
+
const turnModel = typeof responseData.model === "string" ? responseData.model : null;
|
|
5841
|
+
this.metricsAcc.recordRealtimeUsage(usage, turnModel);
|
|
5842
|
+
}
|
|
4980
5843
|
}
|
|
4981
|
-
if (this.currentAgentText) {
|
|
4982
|
-
this.history.push({ role: "assistant", text: this.currentAgentText, timestamp: Date.now() });
|
|
4983
|
-
this.responseAudioStarted = false;
|
|
4984
|
-
await this.emitTurnMetrics(this.metricsAcc.recordTurnComplete(this.currentAgentText));
|
|
4985
|
-
this.currentAgentText = "";
|
|
4986
|
-
} else {
|
|
5844
|
+
if (!this.currentAgentText) {
|
|
4987
5845
|
this.metricsAcc.recordTurnInterrupted();
|
|
4988
5846
|
this.responseAudioStarted = false;
|
|
5847
|
+
await this.emitAgentSpeechEnded(true);
|
|
5848
|
+
return;
|
|
4989
5849
|
}
|
|
5850
|
+
await this.emitAgentSpeechEnded(false);
|
|
5851
|
+
const text = this.currentAgentText;
|
|
5852
|
+
this.currentAgentText = "";
|
|
5853
|
+
if (this.userTranscriptPending) {
|
|
5854
|
+
this.pendingAssistantTurn = text;
|
|
5855
|
+
if (this.pendingAssistantTimer) clearTimeout(this.pendingAssistantTimer);
|
|
5856
|
+
this.pendingAssistantTimer = setTimeout(() => {
|
|
5857
|
+
const buffered = this.pendingAssistantTurn;
|
|
5858
|
+
this.pendingAssistantTurn = null;
|
|
5859
|
+
this.pendingAssistantTimer = null;
|
|
5860
|
+
this.userTranscriptPending = false;
|
|
5861
|
+
if (buffered !== null) {
|
|
5862
|
+
void this.flushAssistantTurn(buffered);
|
|
5863
|
+
}
|
|
5864
|
+
}, _StreamHandler.REALTIME_USER_TRANSCRIPT_WAIT_MS);
|
|
5865
|
+
this.responseAudioStarted = false;
|
|
5866
|
+
return;
|
|
5867
|
+
}
|
|
5868
|
+
await this.flushAssistantTurn(text);
|
|
4990
5869
|
}
|
|
4991
|
-
onAdapterSpeechInterrupt() {
|
|
5870
|
+
async onAdapterSpeechInterrupt() {
|
|
4992
5871
|
this.deps.bridge.sendClear(this.ws, this.streamSid);
|
|
4993
5872
|
if (this.adapter instanceof OpenAIRealtimeAdapter) this.adapter.cancelResponse();
|
|
4994
5873
|
this.metricsAcc.recordTurnInterrupted();
|
|
5874
|
+
if (this.responseAudioStarted) {
|
|
5875
|
+
await this.emitAgentSpeechEnded(true);
|
|
5876
|
+
}
|
|
5877
|
+
await this.emitUserSpeechStarted();
|
|
4995
5878
|
this.currentAgentText = "";
|
|
4996
5879
|
this.responseAudioStarted = false;
|
|
5880
|
+
this.pendingAssistantTurn = null;
|
|
5881
|
+
if (this.pendingAssistantTimer) {
|
|
5882
|
+
clearTimeout(this.pendingAssistantTimer);
|
|
5883
|
+
this.pendingAssistantTimer = null;
|
|
5884
|
+
}
|
|
5885
|
+
this.userTranscriptPending = false;
|
|
5886
|
+
}
|
|
5887
|
+
/**
|
|
5888
|
+
* Emit a tool-invocation event into the transcript timeline. Pushes a
|
|
5889
|
+
* `role=tool` entry into `history` (so it appears in the dashboard
|
|
5890
|
+
* transcript next to user/assistant turns) AND fires `onTranscript` so
|
|
5891
|
+
* the host application can log / persist / render it. `result` is
|
|
5892
|
+
* truncated for log readability — the full payload is in history.
|
|
5893
|
+
*/
|
|
5894
|
+
async emitToolEvent(name, args, result) {
|
|
5895
|
+
const argsText = JSON.stringify(args);
|
|
5896
|
+
const text = result === null ? `${name}(${argsText})` : `${name}(${argsText}) \u2192 ${result.length > 200 ? result.slice(0, 200) + "\u2026" : result}`;
|
|
5897
|
+
this.history.push({ role: "tool", text, timestamp: Date.now() });
|
|
5898
|
+
if (this.deps.onTranscript) {
|
|
5899
|
+
await this.deps.onTranscript({
|
|
5900
|
+
role: "tool",
|
|
5901
|
+
text,
|
|
5902
|
+
call_id: this.callId,
|
|
5903
|
+
tool_name: name,
|
|
5904
|
+
tool_args: args,
|
|
5905
|
+
tool_result: result
|
|
5906
|
+
});
|
|
5907
|
+
}
|
|
4997
5908
|
}
|
|
4998
5909
|
async handleFunctionCall(fc) {
|
|
4999
5910
|
const adapter = this.adapter;
|
|
@@ -5007,11 +5918,15 @@ var StreamHandler = class {
|
|
|
5007
5918
|
const transferTo = transferArgs.number ?? "";
|
|
5008
5919
|
if (!isValidE164(transferTo)) {
|
|
5009
5920
|
getLogger().warn(`transfer_call rejected (${this.deps.bridge.label}): invalid number ${JSON.stringify(transferTo)}`);
|
|
5010
|
-
|
|
5921
|
+
const rejection = JSON.stringify({ error: "Invalid phone number format", status: "rejected" });
|
|
5922
|
+
await adapter.sendFunctionResult(fc.call_id, rejection);
|
|
5923
|
+
await this.emitToolEvent("transfer_call", transferArgs, rejection);
|
|
5011
5924
|
return;
|
|
5012
5925
|
}
|
|
5013
5926
|
getLogger().debug(`Transferring call to ${transferTo}`);
|
|
5014
|
-
|
|
5927
|
+
const result2 = JSON.stringify({ status: "transferring", to: transferTo });
|
|
5928
|
+
await adapter.sendFunctionResult(fc.call_id, result2);
|
|
5929
|
+
await this.emitToolEvent("transfer_call", transferArgs, result2);
|
|
5015
5930
|
await this.deps.bridge.transferCall(this.callId, transferTo);
|
|
5016
5931
|
if (this.deps.onTranscript) {
|
|
5017
5932
|
await this.deps.onTranscript({ role: "system", text: `Call transferred to ${transferTo}`, call_id: this.callId });
|
|
@@ -5027,7 +5942,9 @@ var StreamHandler = class {
|
|
|
5027
5942
|
}
|
|
5028
5943
|
const reason = endArgs.reason ?? "conversation_complete";
|
|
5029
5944
|
getLogger().debug(`Ending call (${this.deps.bridge.label}): ${reason}`);
|
|
5030
|
-
|
|
5945
|
+
const result2 = JSON.stringify({ status: "ending", reason });
|
|
5946
|
+
await adapter.sendFunctionResult(fc.call_id, result2);
|
|
5947
|
+
await this.emitToolEvent("end_call", endArgs, result2);
|
|
5031
5948
|
await this.deps.bridge.endCall(this.callId, this.ws);
|
|
5032
5949
|
if (this.deps.onTranscript) {
|
|
5033
5950
|
await this.deps.onTranscript({ role: "system", text: `Call ended: ${reason}`, call_id: this.callId });
|
|
@@ -5035,22 +5952,57 @@ var StreamHandler = class {
|
|
|
5035
5952
|
return;
|
|
5036
5953
|
}
|
|
5037
5954
|
const toolDef = this.deps.agent.tools?.find((t) => t.name === fc.name);
|
|
5038
|
-
if (toolDef
|
|
5039
|
-
|
|
5955
|
+
if (!toolDef) {
|
|
5956
|
+
getLogger().warn(`Realtime tool '${fc.name}' not found in agent.tools \u2014 skipping`);
|
|
5957
|
+
const result2 = JSON.stringify({ error: `Tool '${fc.name}' not registered`, fallback: true });
|
|
5958
|
+
await adapter.sendFunctionResult(fc.call_id, result2);
|
|
5959
|
+
await this.emitToolEvent(fc.name, {}, result2);
|
|
5960
|
+
return;
|
|
5961
|
+
}
|
|
5962
|
+
let parsedArgs;
|
|
5963
|
+
try {
|
|
5964
|
+
parsedArgs = JSON.parse(fc.arguments || "{}");
|
|
5965
|
+
} catch {
|
|
5966
|
+
parsedArgs = {};
|
|
5967
|
+
}
|
|
5968
|
+
await this.emitToolEvent(fc.name, parsedArgs, null);
|
|
5969
|
+
const reassurance = toolDef.reassurance;
|
|
5970
|
+
let reassuranceTimer = null;
|
|
5971
|
+
if (reassurance) {
|
|
5972
|
+
const msg = typeof reassurance === "string" ? reassurance : reassurance.message;
|
|
5973
|
+
const afterMs = typeof reassurance === "string" ? 1500 : reassurance.afterMs ?? 1500;
|
|
5974
|
+
if (msg && this.adapter instanceof OpenAIRealtimeAdapter) {
|
|
5975
|
+
const realtimeAdapter = this.adapter;
|
|
5976
|
+
reassuranceTimer = setTimeout(() => {
|
|
5977
|
+
realtimeAdapter.sendText(msg).catch((e) => {
|
|
5978
|
+
getLogger().warn(`Reassurance message failed for tool '${fc.name}': ${String(e)}`);
|
|
5979
|
+
});
|
|
5980
|
+
}, afterMs);
|
|
5981
|
+
}
|
|
5982
|
+
}
|
|
5983
|
+
const onProgress = this.adapter instanceof OpenAIRealtimeAdapter ? async (text) => {
|
|
5040
5984
|
try {
|
|
5041
|
-
|
|
5042
|
-
} catch {
|
|
5043
|
-
|
|
5985
|
+
await this.adapter.sendText(text);
|
|
5986
|
+
} catch (e) {
|
|
5987
|
+
getLogger().warn(`Tool progress message failed for '${fc.name}': ${String(e)}`);
|
|
5044
5988
|
}
|
|
5045
|
-
|
|
5046
|
-
|
|
5047
|
-
|
|
5989
|
+
} : void 0;
|
|
5990
|
+
let result;
|
|
5991
|
+
try {
|
|
5992
|
+
result = await this.toolExecutor.execute(
|
|
5993
|
+
toolDef,
|
|
5048
5994
|
parsedArgs,
|
|
5049
|
-
{
|
|
5050
|
-
|
|
5995
|
+
{
|
|
5996
|
+
call_id: this.callId,
|
|
5997
|
+
caller: this.caller
|
|
5998
|
+
},
|
|
5999
|
+
onProgress
|
|
5051
6000
|
);
|
|
5052
|
-
|
|
6001
|
+
} finally {
|
|
6002
|
+
if (reassuranceTimer) clearTimeout(reassuranceTimer);
|
|
5053
6003
|
}
|
|
6004
|
+
await adapter.sendFunctionResult(fc.call_id, result);
|
|
6005
|
+
await this.emitToolEvent(fc.name, parsedArgs, result);
|
|
5054
6006
|
}
|
|
5055
6007
|
// ---------------------------------------------------------------------------
|
|
5056
6008
|
// Private: call end / metrics finalization
|
|
@@ -5062,6 +6014,25 @@ var StreamHandler = class {
|
|
|
5062
6014
|
clearTimeout(this.maxDurationTimer);
|
|
5063
6015
|
this.maxDurationTimer = null;
|
|
5064
6016
|
}
|
|
6017
|
+
if (this.pendingAssistantTimer) {
|
|
6018
|
+
clearTimeout(this.pendingAssistantTimer);
|
|
6019
|
+
this.pendingAssistantTimer = null;
|
|
6020
|
+
}
|
|
6021
|
+
if (this.pendingAssistantTurn !== null) {
|
|
6022
|
+
const buffered = this.pendingAssistantTurn;
|
|
6023
|
+
this.pendingAssistantTurn = null;
|
|
6024
|
+
try {
|
|
6025
|
+
await this.flushAssistantTurn(buffered);
|
|
6026
|
+
} catch {
|
|
6027
|
+
}
|
|
6028
|
+
}
|
|
6029
|
+
if (this.mcpManager) {
|
|
6030
|
+
try {
|
|
6031
|
+
await this.mcpManager.close();
|
|
6032
|
+
} catch {
|
|
6033
|
+
}
|
|
6034
|
+
this.mcpManager = null;
|
|
6035
|
+
}
|
|
5065
6036
|
await this.deps.bridge.queryTelephonyCost(this.metricsAcc, this.callId);
|
|
5066
6037
|
if (this.stt instanceof DeepgramSTT && this.stt.requestId) {
|
|
5067
6038
|
const dgKey = this.stt.apiKey;
|
|
@@ -5088,7 +6059,7 @@ var StreamHandler = class {
|
|
|
5088
6059
|
finalMetrics
|
|
5089
6060
|
);
|
|
5090
6061
|
try {
|
|
5091
|
-
const { notifyDashboard } = await import("./persistence-
|
|
6062
|
+
const { notifyDashboard } = await import("./persistence-LVIAHESK.mjs");
|
|
5092
6063
|
notifyDashboard(callEndData);
|
|
5093
6064
|
} catch {
|
|
5094
6065
|
}
|
|
@@ -5129,6 +6100,7 @@ async function queryDeepgramCost(metricsAcc, deepgramKey, deepgramRequestId) {
|
|
|
5129
6100
|
}
|
|
5130
6101
|
|
|
5131
6102
|
// src/services/call-log.ts
|
|
6103
|
+
init_esm_shims();
|
|
5132
6104
|
import * as crypto3 from "crypto";
|
|
5133
6105
|
import * as fs2 from "fs";
|
|
5134
6106
|
import { promises as fsp } from "fs";
|
|
@@ -5226,6 +6198,7 @@ var CallLogger = class {
|
|
|
5226
6198
|
this.root = null;
|
|
5227
6199
|
}
|
|
5228
6200
|
}
|
|
6201
|
+
/** True when a log root was configured and is writable. */
|
|
5229
6202
|
get enabled() {
|
|
5230
6203
|
return this.root !== null;
|
|
5231
6204
|
}
|
|
@@ -5239,6 +6212,7 @@ var CallLogger = class {
|
|
|
5239
6212
|
const safeId = sanitizeLogValue(callId, 64).replace(/\//g, "_") || "unknown";
|
|
5240
6213
|
return path2.join(this.root, "calls", year, month, day, safeId);
|
|
5241
6214
|
}
|
|
6215
|
+
/** Write the initial `metadata.json` for a new call. */
|
|
5242
6216
|
async logCallStart(callId, input = {}) {
|
|
5243
6217
|
if (!this.enabled) return;
|
|
5244
6218
|
const startedAt = Date.now() / 1e3;
|
|
@@ -5271,6 +6245,7 @@ var CallLogger = class {
|
|
|
5271
6245
|
this.sweepOldDays();
|
|
5272
6246
|
}
|
|
5273
6247
|
}
|
|
6248
|
+
/** Append a single turn record to the call's `transcript.jsonl`. */
|
|
5274
6249
|
async logTurn(callId, turn) {
|
|
5275
6250
|
if (!this.enabled) return;
|
|
5276
6251
|
const dir = this.callDir(callId);
|
|
@@ -5288,6 +6263,7 @@ var CallLogger = class {
|
|
|
5288
6263
|
);
|
|
5289
6264
|
}
|
|
5290
6265
|
}
|
|
6266
|
+
/** Append an operational event (tool_call, barge_in, error, …) to `events.jsonl`. */
|
|
5291
6267
|
async logEvent(callId, eventType, payload = {}) {
|
|
5292
6268
|
if (!this.enabled) return;
|
|
5293
6269
|
const dir = this.callDir(callId);
|
|
@@ -5306,6 +6282,7 @@ var CallLogger = class {
|
|
|
5306
6282
|
);
|
|
5307
6283
|
}
|
|
5308
6284
|
}
|
|
6285
|
+
/** Merge end-of-call fields into the existing `metadata.json`. */
|
|
5309
6286
|
async logCallEnd(callId, input = {}) {
|
|
5310
6287
|
if (!this.enabled) return;
|
|
5311
6288
|
const dir = this.callDir(callId);
|
|
@@ -5432,6 +6409,18 @@ var END_CALL_TOOL = {
|
|
|
5432
6409
|
function xmlEscape(s) {
|
|
5433
6410
|
return s.replace(/&/g, "&").replace(/</g, "<").replace(/>/g, ">").replace(/"/g, """).replace(/'/g, "'");
|
|
5434
6411
|
}
|
|
6412
|
+
function classifyTwilioAmd(answeredBy) {
|
|
6413
|
+
if (answeredBy === "human") return "human";
|
|
6414
|
+
if (answeredBy.startsWith("machine_")) return "machine";
|
|
6415
|
+
if (answeredBy === "fax") return "fax";
|
|
6416
|
+
return "unknown";
|
|
6417
|
+
}
|
|
6418
|
+
function classifyTelnyxAmd(result) {
|
|
6419
|
+
if (result === "human") return "human";
|
|
6420
|
+
if (result === "machine" || result === "machine_detected") return "machine";
|
|
6421
|
+
if (result === "fax") return "fax";
|
|
6422
|
+
return "unknown";
|
|
6423
|
+
}
|
|
5435
6424
|
function validateWebhookUrl(url) {
|
|
5436
6425
|
const parsed = new URL(url);
|
|
5437
6426
|
if (!["http:", "https:"].includes(parsed.protocol)) {
|
|
@@ -5561,22 +6550,35 @@ function buildAIAdapter(config, agent, resolvedPrompt) {
|
|
|
5561
6550
|
const agentTools = agent.tools?.map((t) => ({
|
|
5562
6551
|
name: t.name,
|
|
5563
6552
|
description: t.description,
|
|
5564
|
-
parameters: t.parameters
|
|
6553
|
+
parameters: t.parameters,
|
|
6554
|
+
strict: t.strict
|
|
5565
6555
|
})) ?? [];
|
|
5566
6556
|
const tools = [...agentTools, TRANSFER_CALL_TOOL, END_CALL_TOOL];
|
|
5567
6557
|
const openaiKey = engine && engine.kind === "openai_realtime" ? engine.apiKey : config.openaiKey ?? "";
|
|
6558
|
+
const adapterOptions = {};
|
|
6559
|
+
if (engine && engine.kind === "openai_realtime") {
|
|
6560
|
+
if (engine.reasoningEffort !== void 0) {
|
|
6561
|
+
adapterOptions.reasoningEffort = engine.reasoningEffort;
|
|
6562
|
+
}
|
|
6563
|
+
if (engine.inputAudioTranscriptionModel !== void 0) {
|
|
6564
|
+
adapterOptions.inputAudioTranscriptionModel = engine.inputAudioTranscriptionModel;
|
|
6565
|
+
}
|
|
6566
|
+
}
|
|
5568
6567
|
return new OpenAIRealtimeAdapter(
|
|
5569
6568
|
openaiKey,
|
|
5570
6569
|
agent.model,
|
|
5571
6570
|
agent.voice,
|
|
5572
6571
|
resolvedPrompt ?? agent.systemPrompt,
|
|
5573
|
-
tools
|
|
6572
|
+
tools,
|
|
6573
|
+
void 0,
|
|
6574
|
+
adapterOptions
|
|
5574
6575
|
);
|
|
5575
6576
|
}
|
|
5576
6577
|
var TwilioBridge = class {
|
|
5577
6578
|
constructor(config) {
|
|
5578
6579
|
this.config = config;
|
|
5579
6580
|
}
|
|
6581
|
+
config;
|
|
5580
6582
|
label = "Twilio";
|
|
5581
6583
|
telephonyProvider = "twilio";
|
|
5582
6584
|
sendAudio(ws, audioBase64, streamSid) {
|
|
@@ -5649,7 +6651,10 @@ var TwilioBridge = class {
|
|
|
5649
6651
|
getLogger().info(`Twilio actual cost: $${Math.abs(parseFloat(data.price))}`);
|
|
5650
6652
|
}
|
|
5651
6653
|
}
|
|
5652
|
-
} catch {
|
|
6654
|
+
} catch (err) {
|
|
6655
|
+
getLogger().debug(
|
|
6656
|
+
`queryTelephonyCost(twilio) failed: ${err?.message ?? err}`
|
|
6657
|
+
);
|
|
5653
6658
|
}
|
|
5654
6659
|
}
|
|
5655
6660
|
}
|
|
@@ -5669,6 +6674,7 @@ var TelnyxBridge = class {
|
|
|
5669
6674
|
constructor(config) {
|
|
5670
6675
|
this.config = config;
|
|
5671
6676
|
}
|
|
6677
|
+
config;
|
|
5672
6678
|
label = "Telnyx";
|
|
5673
6679
|
telephonyProvider = "telnyx";
|
|
5674
6680
|
sendAudio(ws, audioBase64, _streamSid) {
|
|
@@ -5790,7 +6796,10 @@ var TelnyxBridge = class {
|
|
|
5790
6796
|
getLogger().info(`Telnyx actual cost: $${Math.abs(parseFloat(amount))}`);
|
|
5791
6797
|
}
|
|
5792
6798
|
}
|
|
5793
|
-
} catch {
|
|
6799
|
+
} catch (err) {
|
|
6800
|
+
getLogger().debug(
|
|
6801
|
+
`queryTelephonyCost(telnyx) failed: ${err?.message ?? err}`
|
|
6802
|
+
);
|
|
5794
6803
|
}
|
|
5795
6804
|
}
|
|
5796
6805
|
}
|
|
@@ -5811,7 +6820,8 @@ var EmbeddedServer = class {
|
|
|
5811
6820
|
this.dashboardToken = dashboardToken;
|
|
5812
6821
|
this.metricsStore = new MetricsStore();
|
|
5813
6822
|
this.pricing = mergePricing(pricingOverrides);
|
|
5814
|
-
const logRoot = resolveLogRoot();
|
|
6823
|
+
const logRoot = config.persistRoot === void 0 ? resolveLogRoot() : config.persistRoot;
|
|
6824
|
+
this.callLogger = new CallLogger(logRoot);
|
|
5815
6825
|
if (logRoot) {
|
|
5816
6826
|
try {
|
|
5817
6827
|
const restored = this.metricsStore.hydrate(logRoot);
|
|
@@ -5823,6 +6833,17 @@ var EmbeddedServer = class {
|
|
|
5823
6833
|
}
|
|
5824
6834
|
}
|
|
5825
6835
|
}
|
|
6836
|
+
config;
|
|
6837
|
+
agent;
|
|
6838
|
+
onCallStart;
|
|
6839
|
+
onCallEnd;
|
|
6840
|
+
onTranscript;
|
|
6841
|
+
onMessage;
|
|
6842
|
+
recording;
|
|
6843
|
+
voicemailMessage;
|
|
6844
|
+
onMetrics;
|
|
6845
|
+
dashboard;
|
|
6846
|
+
dashboardToken;
|
|
5826
6847
|
server = null;
|
|
5827
6848
|
wss = null;
|
|
5828
6849
|
twilioTokenWarningLogged = false;
|
|
@@ -5830,11 +6851,25 @@ var EmbeddedServer = class {
|
|
|
5830
6851
|
metricsStore;
|
|
5831
6852
|
pricing;
|
|
5832
6853
|
remoteHandler = new RemoteMessageHandler();
|
|
5833
|
-
/**
|
|
5834
|
-
|
|
6854
|
+
/**
|
|
6855
|
+
* Opt-in per-call filesystem logger. Path is resolved by ``client.ts``
|
|
6856
|
+
* from the public ``LocalOptions.persist`` option (with the legacy
|
|
6857
|
+
* ``PATTER_LOG_DIR`` env var as fallback). Initialised in the ctor
|
|
6858
|
+
* because ``resolveLogRoot`` cannot see ``this.config`` from a field
|
|
6859
|
+
* default expression.
|
|
6860
|
+
*/
|
|
6861
|
+
callLogger;
|
|
5835
6862
|
/** Active WebSocket connections tracked for graceful shutdown. */
|
|
5836
6863
|
activeConnections = /* @__PURE__ */ new Set();
|
|
5837
6864
|
activeCallIds = /* @__PURE__ */ new Map();
|
|
6865
|
+
/**
|
|
6866
|
+
* Per-call AMD result callback set by ``Patter.call()`` for the most
|
|
6867
|
+
* recent outbound call. Public so ``client.ts`` can populate it after
|
|
6868
|
+
* server start. Cleared after firing once per call to avoid leaking
|
|
6869
|
+
* across calls.
|
|
6870
|
+
*/
|
|
6871
|
+
onMachineDetection;
|
|
6872
|
+
/** Bind HTTP + WebSocket listeners on `port`, mount carrier webhooks and dashboard routes. */
|
|
5838
6873
|
async start(port = 8e3) {
|
|
5839
6874
|
const webhookUrlPattern = /^[a-zA-Z0-9][a-zA-Z0-9.\-]+[a-zA-Z0-9]$/;
|
|
5840
6875
|
if (!webhookUrlPattern.test(this.config.webhookUrl)) {
|
|
@@ -5950,6 +6985,20 @@ var EmbeddedServer = class {
|
|
|
5950
6985
|
const answeredBy = body["AnsweredBy"] ?? "";
|
|
5951
6986
|
const callSid = body["CallSid"] ?? "";
|
|
5952
6987
|
getLogger().info(`AMD result for ${sanitizeLogValue(callSid)}: ${sanitizeLogValue(answeredBy)}`);
|
|
6988
|
+
const cb = this.onMachineDetection;
|
|
6989
|
+
if (cb && callSid) {
|
|
6990
|
+
try {
|
|
6991
|
+
await cb({
|
|
6992
|
+
call_id: callSid,
|
|
6993
|
+
carrier: "twilio",
|
|
6994
|
+
classification: classifyTwilioAmd(answeredBy),
|
|
6995
|
+
raw: answeredBy,
|
|
6996
|
+
detected_at: Date.now() / 1e3
|
|
6997
|
+
});
|
|
6998
|
+
} catch (err) {
|
|
6999
|
+
getLogger().warn(`onMachineDetection callback threw: ${sanitizeLogValue(String(err))}`);
|
|
7000
|
+
}
|
|
7001
|
+
}
|
|
5953
7002
|
if ((answeredBy === "machine_end_beep" || answeredBy === "machine_end_silence") && this.voicemailMessage && this.config.twilioSid && this.config.twilioToken) {
|
|
5954
7003
|
if (!validateTwilioSid(callSid)) {
|
|
5955
7004
|
getLogger().warn(`AMD webhook rejected: invalid CallSid ${JSON.stringify(sanitizeLogValue(callSid))}`);
|
|
@@ -5965,7 +7014,8 @@ var EmbeddedServer = class {
|
|
|
5965
7014
|
"Content-Type": "application/x-www-form-urlencoded",
|
|
5966
7015
|
"Authorization": `Basic ${Buffer.from(`${this.config.twilioSid}:${this.config.twilioToken}`).toString("base64")}`
|
|
5967
7016
|
},
|
|
5968
|
-
body: new URLSearchParams({ Twiml: twiml }).toString()
|
|
7017
|
+
body: new URLSearchParams({ Twiml: twiml }).toString(),
|
|
7018
|
+
signal: AbortSignal.timeout(1e4)
|
|
5969
7019
|
});
|
|
5970
7020
|
if (vmResp.ok) {
|
|
5971
7021
|
getLogger().info(`Voicemail dropped for ${sanitizeLogValue(callSid)}`);
|
|
@@ -6053,6 +7103,20 @@ var EmbeddedServer = class {
|
|
|
6053
7103
|
getLogger().info(
|
|
6054
7104
|
`Telnyx AMD result for ${sanitizeLogValue(amdCallId)}: ${sanitizeLogValue(amdResult)}`
|
|
6055
7105
|
);
|
|
7106
|
+
const cbTx = this.onMachineDetection;
|
|
7107
|
+
if (cbTx && amdCallId) {
|
|
7108
|
+
try {
|
|
7109
|
+
await cbTx({
|
|
7110
|
+
call_id: amdCallId,
|
|
7111
|
+
carrier: "telnyx",
|
|
7112
|
+
classification: classifyTelnyxAmd(amdResult),
|
|
7113
|
+
raw: amdResult,
|
|
7114
|
+
detected_at: Date.now() / 1e3
|
|
7115
|
+
});
|
|
7116
|
+
} catch (err) {
|
|
7117
|
+
getLogger().warn(`onMachineDetection callback threw: ${sanitizeLogValue(String(err))}`);
|
|
7118
|
+
}
|
|
7119
|
+
}
|
|
6056
7120
|
if (amdCallId && (amdResult === "machine" || amdResult === "machine_detected")) {
|
|
6057
7121
|
await this.handleTelnyxAmdVoicemail(amdCallId);
|
|
6058
7122
|
}
|
|
@@ -6147,7 +7211,8 @@ var EmbeddedServer = class {
|
|
|
6147
7211
|
}
|
|
6148
7212
|
});
|
|
6149
7213
|
await new Promise((resolve) => {
|
|
6150
|
-
|
|
7214
|
+
const bindHost = process.env.PATTER_BIND_HOST ?? "127.0.0.1";
|
|
7215
|
+
this.server.listen(port, bindHost, () => {
|
|
6151
7216
|
getLogger().info(`Server on port ${port}`);
|
|
6152
7217
|
getLogger().info(`Webhook: https://${this.config.webhookUrl}`);
|
|
6153
7218
|
getLogger().info(`Phone: ${this.config.phoneNumber}`);
|
|
@@ -6477,30 +7542,168 @@ var EmbeddedServer = class {
|
|
|
6477
7542
|
}
|
|
6478
7543
|
};
|
|
6479
7544
|
|
|
7545
|
+
// src/tools/circuit-breaker.ts
|
|
7546
|
+
init_esm_shims();
|
|
7547
|
+
var CircuitBreakerState = {
|
|
7548
|
+
CLOSED: "closed",
|
|
7549
|
+
OPEN: "open",
|
|
7550
|
+
HALF_OPEN: "half_open"
|
|
7551
|
+
};
|
|
7552
|
+
var DEFAULT_FAILURE_THRESHOLD = 5;
|
|
7553
|
+
var DEFAULT_COOLDOWN_MS = 3e4;
|
|
7554
|
+
var CircuitBreakerRegistry = class {
|
|
7555
|
+
threshold;
|
|
7556
|
+
cooldownMs;
|
|
7557
|
+
state = /* @__PURE__ */ new Map();
|
|
7558
|
+
/** Inject for deterministic tests; defaults to ``Date.now()``. */
|
|
7559
|
+
clock;
|
|
7560
|
+
constructor(opts = {}, clock = Date.now) {
|
|
7561
|
+
this.threshold = opts.failureThreshold ?? DEFAULT_FAILURE_THRESHOLD;
|
|
7562
|
+
this.cooldownMs = opts.cooldownMs ?? DEFAULT_COOLDOWN_MS;
|
|
7563
|
+
this.clock = clock;
|
|
7564
|
+
}
|
|
7565
|
+
/** Returns ``true`` when this tool is currently allowed to run. */
|
|
7566
|
+
allow(toolName) {
|
|
7567
|
+
if (this.threshold <= 0) return true;
|
|
7568
|
+
const s = this.state.get(toolName);
|
|
7569
|
+
if (!s) return true;
|
|
7570
|
+
if (s.state === CircuitBreakerState.CLOSED) return true;
|
|
7571
|
+
if (s.state === CircuitBreakerState.OPEN) {
|
|
7572
|
+
if (this.clock() - s.openedAt >= this.cooldownMs) {
|
|
7573
|
+
s.state = CircuitBreakerState.HALF_OPEN;
|
|
7574
|
+
return true;
|
|
7575
|
+
}
|
|
7576
|
+
return false;
|
|
7577
|
+
}
|
|
7578
|
+
return true;
|
|
7579
|
+
}
|
|
7580
|
+
/** Mark a successful execution. Resets the breaker to CLOSED. */
|
|
7581
|
+
recordSuccess(toolName) {
|
|
7582
|
+
const s = this.state.get(toolName);
|
|
7583
|
+
if (!s) return;
|
|
7584
|
+
s.state = CircuitBreakerState.CLOSED;
|
|
7585
|
+
s.consecutiveFailures = 0;
|
|
7586
|
+
s.openedAt = 0;
|
|
7587
|
+
}
|
|
7588
|
+
/** Mark a failed execution; trips OPEN once threshold is reached. */
|
|
7589
|
+
recordFailure(toolName) {
|
|
7590
|
+
if (this.threshold <= 0) return;
|
|
7591
|
+
let s = this.state.get(toolName);
|
|
7592
|
+
if (!s) {
|
|
7593
|
+
s = { state: CircuitBreakerState.CLOSED, consecutiveFailures: 0, openedAt: 0 };
|
|
7594
|
+
this.state.set(toolName, s);
|
|
7595
|
+
}
|
|
7596
|
+
s.consecutiveFailures += 1;
|
|
7597
|
+
if (s.consecutiveFailures >= this.threshold) {
|
|
7598
|
+
s.state = CircuitBreakerState.OPEN;
|
|
7599
|
+
s.openedAt = this.clock();
|
|
7600
|
+
}
|
|
7601
|
+
}
|
|
7602
|
+
/**
|
|
7603
|
+
* Time until the breaker transitions OPEN → HALF_OPEN, in ms. Returns
|
|
7604
|
+
* ``0`` when the breaker is currently allowing calls. Useful for
|
|
7605
|
+
* tests and the structured rejection JSON.
|
|
7606
|
+
*/
|
|
7607
|
+
timeUntilHalfOpen(toolName) {
|
|
7608
|
+
const s = this.state.get(toolName);
|
|
7609
|
+
if (!s || s.state !== CircuitBreakerState.OPEN) return 0;
|
|
7610
|
+
const elapsed = this.clock() - s.openedAt;
|
|
7611
|
+
return Math.max(0, this.cooldownMs - elapsed);
|
|
7612
|
+
}
|
|
7613
|
+
/** Snapshot for debugging / metrics. */
|
|
7614
|
+
snapshot(toolName) {
|
|
7615
|
+
const s = this.state.get(toolName);
|
|
7616
|
+
return s ? { ...s } : null;
|
|
7617
|
+
}
|
|
7618
|
+
};
|
|
7619
|
+
|
|
6480
7620
|
// src/llm-loop.ts
|
|
6481
7621
|
var DEFAULT_TOOL_MAX_RETRIES = 2;
|
|
6482
7622
|
var DEFAULT_TOOL_RETRY_DELAY_MS = 500;
|
|
6483
7623
|
var DEFAULT_TOOL_TIMEOUT_MS = 1e4;
|
|
6484
7624
|
var TOOL_MAX_RESPONSE_BYTES = 1 * 1024 * 1024;
|
|
7625
|
+
async function invokeHandler(handler, args, callContext, onProgress) {
|
|
7626
|
+
const invoked = handler(args, callContext);
|
|
7627
|
+
if (invoked && typeof invoked === "object" && typeof invoked[Symbol.asyncIterator] === "function" && typeof invoked.next === "function") {
|
|
7628
|
+
let lastResult = "";
|
|
7629
|
+
while (true) {
|
|
7630
|
+
const step = await invoked.next();
|
|
7631
|
+
if (step.done) {
|
|
7632
|
+
const ret = typeof step.value === "string" ? step.value : "";
|
|
7633
|
+
return ret || lastResult || "{}";
|
|
7634
|
+
}
|
|
7635
|
+
const yielded = step.value;
|
|
7636
|
+
if (yielded && typeof yielded === "object") {
|
|
7637
|
+
if (typeof yielded.progress === "string") {
|
|
7638
|
+
if (onProgress) await onProgress(yielded.progress);
|
|
7639
|
+
continue;
|
|
7640
|
+
}
|
|
7641
|
+
if (typeof yielded.result === "string") {
|
|
7642
|
+
lastResult = yielded.result;
|
|
7643
|
+
continue;
|
|
7644
|
+
}
|
|
7645
|
+
}
|
|
7646
|
+
if (onProgress && yielded != null) {
|
|
7647
|
+
const text = typeof yielded === "string" ? yielded : JSON.stringify(yielded);
|
|
7648
|
+
await onProgress(text);
|
|
7649
|
+
}
|
|
7650
|
+
}
|
|
7651
|
+
}
|
|
7652
|
+
return await invoked;
|
|
7653
|
+
}
|
|
7654
|
+
function backoffDelayMs(baseMs, attempt) {
|
|
7655
|
+
const cap = 5e3;
|
|
7656
|
+
const exp = Math.min(cap, baseMs * Math.pow(2, attempt));
|
|
7657
|
+
return Math.round(exp + Math.random() * 60);
|
|
7658
|
+
}
|
|
6485
7659
|
var DefaultToolExecutor = class {
|
|
6486
7660
|
maxRetries;
|
|
6487
7661
|
retryDelayMs;
|
|
6488
7662
|
requestTimeoutMs;
|
|
7663
|
+
breaker;
|
|
6489
7664
|
constructor(opts = {}) {
|
|
6490
7665
|
this.maxRetries = opts.maxRetries ?? DEFAULT_TOOL_MAX_RETRIES;
|
|
6491
7666
|
this.retryDelayMs = opts.retryDelayMs ?? DEFAULT_TOOL_RETRY_DELAY_MS;
|
|
6492
7667
|
this.requestTimeoutMs = opts.requestTimeoutMs ?? DEFAULT_TOOL_TIMEOUT_MS;
|
|
6493
|
-
|
|
6494
|
-
|
|
7668
|
+
this.breaker = new CircuitBreakerRegistry(opts.circuitBreaker ?? {});
|
|
7669
|
+
}
|
|
7670
|
+
/** Expose the breaker for tests + dashboard observability. */
|
|
7671
|
+
get circuitBreaker() {
|
|
7672
|
+
return this.breaker;
|
|
7673
|
+
}
|
|
7674
|
+
async execute(toolDef, args, callContext, onProgress) {
|
|
7675
|
+
if (!this.breaker.allow(toolDef.name)) {
|
|
7676
|
+
const cooldown = this.breaker.timeUntilHalfOpen(toolDef.name);
|
|
7677
|
+
return JSON.stringify({
|
|
7678
|
+
error: `Tool '${toolDef.name}' is temporarily unavailable (circuit open).`,
|
|
7679
|
+
fallback: true,
|
|
7680
|
+
circuit_state: "open",
|
|
7681
|
+
retry_after_ms: cooldown
|
|
7682
|
+
});
|
|
7683
|
+
}
|
|
6495
7684
|
if (toolDef.handler) {
|
|
6496
|
-
|
|
6497
|
-
|
|
6498
|
-
|
|
6499
|
-
|
|
6500
|
-
|
|
6501
|
-
|
|
6502
|
-
|
|
7685
|
+
const totalAttempts = this.maxRetries + 1;
|
|
7686
|
+
let lastErr = null;
|
|
7687
|
+
for (let attempt = 0; attempt < totalAttempts; attempt++) {
|
|
7688
|
+
try {
|
|
7689
|
+
const result = await invokeHandler(toolDef.handler, args, callContext, onProgress);
|
|
7690
|
+
this.breaker.recordSuccess(toolDef.name);
|
|
7691
|
+
return result;
|
|
7692
|
+
} catch (e) {
|
|
7693
|
+
lastErr = e;
|
|
7694
|
+
if (attempt < totalAttempts - 1) {
|
|
7695
|
+
getLogger().warn(
|
|
7696
|
+
`Tool handler '${toolDef.name}' failed (attempt ${attempt + 1}/${totalAttempts}), retrying: ${String(e)}`
|
|
7697
|
+
);
|
|
7698
|
+
await new Promise((r) => setTimeout(r, backoffDelayMs(this.retryDelayMs, attempt)));
|
|
7699
|
+
}
|
|
7700
|
+
}
|
|
6503
7701
|
}
|
|
7702
|
+
this.breaker.recordFailure(toolDef.name);
|
|
7703
|
+
return JSON.stringify({
|
|
7704
|
+
error: `Tool handler error after ${totalAttempts} attempts: ${String(lastErr)}`,
|
|
7705
|
+
fallback: true
|
|
7706
|
+
});
|
|
6504
7707
|
}
|
|
6505
7708
|
if (toolDef.webhookUrl) {
|
|
6506
7709
|
try {
|
|
@@ -6535,20 +7738,23 @@ var DefaultToolExecutor = class {
|
|
|
6535
7738
|
if (!resp.ok) throw new Error(`HTTP ${resp.status}`);
|
|
6536
7739
|
const result = JSON.stringify(await resp.json());
|
|
6537
7740
|
if (result.length > TOOL_MAX_RESPONSE_BYTES) {
|
|
7741
|
+
this.breaker.recordFailure(toolDef.name);
|
|
6538
7742
|
return JSON.stringify({
|
|
6539
7743
|
error: `Webhook response too large: ${result.length} bytes (max ${TOOL_MAX_RESPONSE_BYTES})`,
|
|
6540
7744
|
fallback: true
|
|
6541
7745
|
});
|
|
6542
7746
|
}
|
|
7747
|
+
this.breaker.recordSuccess(toolDef.name);
|
|
6543
7748
|
return result;
|
|
6544
7749
|
} catch (e) {
|
|
6545
7750
|
if (attempt < totalAttempts - 1) {
|
|
6546
7751
|
getLogger().warn(
|
|
6547
|
-
`Tool webhook '${toolDef.name}' failed (attempt ${attempt + 1}), retrying: ${String(e)}`
|
|
7752
|
+
`Tool webhook '${toolDef.name}' failed (attempt ${attempt + 1}/${totalAttempts}), retrying: ${String(e)}`
|
|
6548
7753
|
);
|
|
6549
|
-
await new Promise((r) => setTimeout(r, this.retryDelayMs));
|
|
7754
|
+
await new Promise((r) => setTimeout(r, backoffDelayMs(this.retryDelayMs, attempt)));
|
|
6550
7755
|
} else {
|
|
6551
7756
|
span.recordException(e);
|
|
7757
|
+
this.breaker.recordFailure(toolDef.name);
|
|
6552
7758
|
return JSON.stringify({
|
|
6553
7759
|
error: `Tool failed after ${totalAttempts} attempts: ${String(e)}`,
|
|
6554
7760
|
fallback: true
|
|
@@ -6569,6 +7775,30 @@ var DefaultToolExecutor = class {
|
|
|
6569
7775
|
});
|
|
6570
7776
|
}
|
|
6571
7777
|
};
|
|
7778
|
+
function mergeAbortSignals(...signals) {
|
|
7779
|
+
const filtered = signals.filter(
|
|
7780
|
+
(s) => s != null
|
|
7781
|
+
);
|
|
7782
|
+
if (filtered.length === 1) return filtered[0];
|
|
7783
|
+
if (typeof AbortSignal.any === "function") {
|
|
7784
|
+
return AbortSignal.any(
|
|
7785
|
+
filtered
|
|
7786
|
+
);
|
|
7787
|
+
}
|
|
7788
|
+
const controller = new AbortController();
|
|
7789
|
+
for (const sig of filtered) {
|
|
7790
|
+
if (sig.aborted) {
|
|
7791
|
+
controller.abort(sig.reason);
|
|
7792
|
+
return controller.signal;
|
|
7793
|
+
}
|
|
7794
|
+
sig.addEventListener(
|
|
7795
|
+
"abort",
|
|
7796
|
+
() => controller.abort(sig.reason),
|
|
7797
|
+
{ once: true }
|
|
7798
|
+
);
|
|
7799
|
+
}
|
|
7800
|
+
return controller.signal;
|
|
7801
|
+
}
|
|
6572
7802
|
var OpenAILLMProvider = class {
|
|
6573
7803
|
apiKey;
|
|
6574
7804
|
model;
|
|
@@ -6596,7 +7826,8 @@ var OpenAILLMProvider = class {
|
|
|
6596
7826
|
this.presencePenalty = sampling.presencePenalty;
|
|
6597
7827
|
this.stop = sampling.stop;
|
|
6598
7828
|
}
|
|
6599
|
-
|
|
7829
|
+
/** Stream OpenAI Chat Completions chunks for the given messages/tools. */
|
|
7830
|
+
async *stream(messages, tools, opts) {
|
|
6600
7831
|
const body = {
|
|
6601
7832
|
model: this.model,
|
|
6602
7833
|
messages,
|
|
@@ -6620,6 +7851,7 @@ var OpenAILLMProvider = class {
|
|
|
6620
7851
|
if (tools) {
|
|
6621
7852
|
body.tools = tools;
|
|
6622
7853
|
}
|
|
7854
|
+
const signal = mergeAbortSignals(opts?.signal, AbortSignal.timeout(3e4));
|
|
6623
7855
|
const response = await fetch("https://api.openai.com/v1/chat/completions", {
|
|
6624
7856
|
method: "POST",
|
|
6625
7857
|
headers: {
|
|
@@ -6627,12 +7859,14 @@ var OpenAILLMProvider = class {
|
|
|
6627
7859
|
"Authorization": `Bearer ${this.apiKey}`
|
|
6628
7860
|
},
|
|
6629
7861
|
body: JSON.stringify(body),
|
|
6630
|
-
signal
|
|
7862
|
+
signal
|
|
6631
7863
|
});
|
|
6632
7864
|
if (!response.ok) {
|
|
6633
7865
|
const errText = await response.text();
|
|
6634
7866
|
getLogger().error(`LLM API error: ${response.status} ${errText}`);
|
|
6635
|
-
|
|
7867
|
+
throw new PatterConnectionError(
|
|
7868
|
+
`LLM API returned ${response.status}: ${errText.slice(0, 200)}`
|
|
7869
|
+
);
|
|
6636
7870
|
}
|
|
6637
7871
|
const reader = response.body?.getReader();
|
|
6638
7872
|
if (!reader) return;
|
|
@@ -6685,6 +7919,7 @@ var OpenAILLMProvider = class {
|
|
|
6685
7919
|
}
|
|
6686
7920
|
}
|
|
6687
7921
|
};
|
|
7922
|
+
var DEFAULT_PHONE_PREAMBLE = "You are speaking on a live phone call. Respond concisely. Do not use markdown, headers, bullet lists, code fences, or emojis. Spell out numbers, currencies, dates, and units in natural spoken language. Keep replies under 2 sentences unless the caller asks for detail.";
|
|
6688
7923
|
var LLMLoop = class {
|
|
6689
7924
|
provider;
|
|
6690
7925
|
systemPrompt;
|
|
@@ -6696,9 +7931,20 @@ var LLMLoop = class {
|
|
|
6696
7931
|
// Fix 10: track provider/model so usage chunks can be attributed for billing.
|
|
6697
7932
|
_providerName;
|
|
6698
7933
|
_modelName;
|
|
6699
|
-
|
|
7934
|
+
// Optional async observer fired after a successful tool execution so
|
|
7935
|
+
// the host SDK (StreamHandler in pipeline mode) can surface tool calls
|
|
7936
|
+
// into the transcript timeline / `onTranscript` callback. Mirrors the
|
|
7937
|
+
// Python `on_tool_call` parameter on `LLMLoop.__init__`.
|
|
7938
|
+
onToolCall;
|
|
7939
|
+
constructor(apiKey, model, systemPrompt, tools, llmProvider, disablePhonePreamble = false) {
|
|
6700
7940
|
this.provider = llmProvider ?? new OpenAILLMProvider(apiKey, model);
|
|
6701
|
-
|
|
7941
|
+
if (disablePhonePreamble) {
|
|
7942
|
+
this.systemPrompt = systemPrompt;
|
|
7943
|
+
} else {
|
|
7944
|
+
this.systemPrompt = systemPrompt ? `${DEFAULT_PHONE_PREAMBLE}
|
|
7945
|
+
|
|
7946
|
+
${systemPrompt}` : DEFAULT_PHONE_PREAMBLE;
|
|
7947
|
+
}
|
|
6702
7948
|
if (llmProvider) {
|
|
6703
7949
|
const key = llmProvider.constructor?.providerKey;
|
|
6704
7950
|
if (key) {
|
|
@@ -6745,6 +7991,16 @@ var LLMLoop = class {
|
|
|
6745
7991
|
setEventBus(bus) {
|
|
6746
7992
|
this.eventBus = bus;
|
|
6747
7993
|
}
|
|
7994
|
+
/**
|
|
7995
|
+
* Set or replace the post-tool-execution observer. The callback is
|
|
7996
|
+
* awaited after every successful tool execution with
|
|
7997
|
+
* `(name, args, result)`. Pass `undefined` to disable. Mirrors the
|
|
7998
|
+
* Python `LLMLoop.set_on_tool_call` setter so callers (e.g. the
|
|
7999
|
+
* pipeline `StreamHandler`) can wire the loop after construction.
|
|
8000
|
+
*/
|
|
8001
|
+
setOnToolCall(callback) {
|
|
8002
|
+
this.onToolCall = callback;
|
|
8003
|
+
}
|
|
6748
8004
|
/**
|
|
6749
8005
|
* Stream LLM response tokens, handling tool calls automatically.
|
|
6750
8006
|
* Yields text tokens as they arrive from the LLM.
|
|
@@ -6753,7 +8009,7 @@ var LLMLoop = class {
|
|
|
6753
8009
|
* from the provider are forwarded to {@link LlmUsageRecorder.recordLlmUsage}
|
|
6754
8010
|
* so token costs are included in the call cost breakdown (fix 10).
|
|
6755
8011
|
*/
|
|
6756
|
-
async *run(userText, history, callContext, metrics, hookExecutor, hookCtx) {
|
|
8012
|
+
async *run(userText, history, callContext, metrics, hookExecutor, hookCtx, opts) {
|
|
6757
8013
|
let messages = this.buildMessages(history, userText);
|
|
6758
8014
|
const maxIterations = 10;
|
|
6759
8015
|
if (hookExecutor && hookCtx) {
|
|
@@ -6762,20 +8018,22 @@ var LLMLoop = class {
|
|
|
6762
8018
|
hookCtx
|
|
6763
8019
|
);
|
|
6764
8020
|
}
|
|
6765
|
-
const
|
|
8021
|
+
const hasAfterLlmResponse = Boolean(hookExecutor?.hasAfterLlmResponse() && hookCtx);
|
|
8022
|
+
const hasAfterLlmChunk = Boolean(hookExecutor?.hasAfterLlmChunk());
|
|
6766
8023
|
const allEmittedText = [];
|
|
6767
8024
|
for (let iter = 0; iter < maxIterations; iter++) {
|
|
6768
8025
|
const toolCallsAccumulated = /* @__PURE__ */ new Map();
|
|
6769
8026
|
const textParts = [];
|
|
6770
8027
|
let hasToolCalls = false;
|
|
6771
|
-
for await (const chunk of this.provider.stream(messages, this.openaiTools)) {
|
|
8028
|
+
for await (const chunk of this.provider.stream(messages, this.openaiTools, opts)) {
|
|
6772
8029
|
if (chunk.type === "text" && chunk.content) {
|
|
6773
|
-
|
|
6774
|
-
|
|
6775
|
-
|
|
6776
|
-
|
|
8030
|
+
const content = hasAfterLlmChunk && hookExecutor ? hookExecutor.runAfterLlmChunk(chunk.content) : chunk.content;
|
|
8031
|
+
textParts.push(content);
|
|
8032
|
+
this.eventBus?.emit("llm_chunk", { text: content, iteration: iter });
|
|
8033
|
+
if (hasAfterLlmResponse) {
|
|
8034
|
+
allEmittedText.push(content);
|
|
6777
8035
|
} else {
|
|
6778
|
-
yield
|
|
8036
|
+
yield content;
|
|
6779
8037
|
}
|
|
6780
8038
|
} else if (chunk.type === "usage") {
|
|
6781
8039
|
metrics?.recordLlmUsage(
|
|
@@ -6804,9 +8062,9 @@ var LLMLoop = class {
|
|
|
6804
8062
|
}
|
|
6805
8063
|
}
|
|
6806
8064
|
if (!hasToolCalls) {
|
|
6807
|
-
if (
|
|
8065
|
+
if (hasAfterLlmResponse && hookExecutor && hookCtx) {
|
|
6808
8066
|
const finalText = allEmittedText.join("");
|
|
6809
|
-
const rewritten = await hookExecutor.
|
|
8067
|
+
const rewritten = await hookExecutor.runAfterLlmResponse(finalText, hookCtx);
|
|
6810
8068
|
if (rewritten) yield rewritten;
|
|
6811
8069
|
}
|
|
6812
8070
|
return;
|
|
@@ -6840,6 +8098,15 @@ var LLMLoop = class {
|
|
|
6840
8098
|
tool_call_id: tcData.id,
|
|
6841
8099
|
content: result
|
|
6842
8100
|
});
|
|
8101
|
+
if (this.onToolCall) {
|
|
8102
|
+
try {
|
|
8103
|
+
await this.onToolCall(toolName, args, result);
|
|
8104
|
+
} catch (err) {
|
|
8105
|
+
getLogger().error(
|
|
8106
|
+
`onToolCall observer failed for tool '${toolName}': ${String(err)}`
|
|
8107
|
+
);
|
|
8108
|
+
}
|
|
8109
|
+
}
|
|
6843
8110
|
}
|
|
6844
8111
|
}
|
|
6845
8112
|
getLogger().warn(`LLM loop hit max iterations (${maxIterations})`);
|
|
@@ -6868,6 +8135,7 @@ var LLMLoop = class {
|
|
|
6868
8135
|
|
|
6869
8136
|
// src/test-mode.ts
|
|
6870
8137
|
var TestSession = class {
|
|
8138
|
+
/** Run a REPL-style session that loops user input through the agent's LLM/onMessage handler. */
|
|
6871
8139
|
async run(opts) {
|
|
6872
8140
|
const { agent, openaiKey, onMessage, onCallStart, onCallEnd } = opts;
|
|
6873
8141
|
const callId = `test_${Date.now().toString(36)}${Math.random().toString(36).slice(2, 8)}`;
|
|
@@ -6918,7 +8186,9 @@ var TestSession = class {
|
|
|
6918
8186
|
openaiKey,
|
|
6919
8187
|
llmModel,
|
|
6920
8188
|
resolvedPrompt,
|
|
6921
|
-
agent.tools
|
|
8189
|
+
agent.tools,
|
|
8190
|
+
void 0,
|
|
8191
|
+
agent.disablePhonePreamble ?? false
|
|
6922
8192
|
);
|
|
6923
8193
|
}
|
|
6924
8194
|
let ended = false;
|
|
@@ -7036,6 +8306,7 @@ var TestSession = class {
|
|
|
7036
8306
|
};
|
|
7037
8307
|
|
|
7038
8308
|
export {
|
|
8309
|
+
ErrorCode,
|
|
7039
8310
|
PatterError,
|
|
7040
8311
|
PatterConnectionError,
|
|
7041
8312
|
AuthenticationError,
|
|
@@ -7067,6 +8338,7 @@ export {
|
|
|
7067
8338
|
createResampler16kTo8k,
|
|
7068
8339
|
createResampler8kTo16k,
|
|
7069
8340
|
createResampler24kTo16k,
|
|
8341
|
+
createResampler24kTo8k,
|
|
7070
8342
|
resample8kTo16k,
|
|
7071
8343
|
resample16kTo8k,
|
|
7072
8344
|
resample24kTo16k,
|
|
@@ -7081,12 +8353,14 @@ export {
|
|
|
7081
8353
|
isTracingEnabled,
|
|
7082
8354
|
startSpan,
|
|
7083
8355
|
DefaultToolExecutor,
|
|
8356
|
+
mergeAbortSignals,
|
|
7084
8357
|
OpenAILLMProvider,
|
|
7085
8358
|
LLMLoop,
|
|
7086
8359
|
DEFAULT_MIN_SENTENCE_LEN,
|
|
7087
8360
|
SentenceChunker,
|
|
7088
8361
|
PipelineHookExecutor,
|
|
7089
8362
|
EventBus,
|
|
8363
|
+
resolveLogRoot,
|
|
7090
8364
|
EmbeddedServer,
|
|
7091
8365
|
TestSession
|
|
7092
8366
|
};
|