getpatter 0.6.0 → 0.6.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/barge-in-strategies-X6ARMGIQ.mjs +12 -0
- package/dist/chunk-CL2U3YET.mjs +1429 -0
- package/dist/chunk-D4424JZR.mjs +71 -0
- package/dist/{chunk-JUQ5WQTQ.mjs → chunk-LE63CSOB.mjs} +1424 -969
- package/dist/{chunk-X3364LSI.mjs → chunk-R2T4JABZ.mjs} +49 -2
- package/dist/cli.js +315 -37
- package/dist/dashboard/ui.html +13 -13
- package/dist/index.d.mts +2136 -709
- package/dist/index.d.ts +2136 -709
- package/dist/index.js +5674 -2233
- package/dist/index.mjs +2338 -915
- package/dist/openai-realtime-2-CNFARP25.mjs +8 -0
- package/dist/{silero-vad-YLCXT5GQ.mjs → silero-vad-LNDFGIY7.mjs} +1 -1
- package/dist/{test-mode-Y7YG5LFZ.mjs → test-mode-RS57BDM6.mjs} +2 -1
- package/package.json +1 -1
- package/src/dashboard/ui.html +13 -13
|
@@ -1,3 +1,11 @@
|
|
|
1
|
+
import {
|
|
2
|
+
OpenAIRealtime2Adapter,
|
|
3
|
+
OpenAIRealtimeAdapter,
|
|
4
|
+
createResampler16kTo8k,
|
|
5
|
+
createResampler8kTo16k,
|
|
6
|
+
mulawToPcm16,
|
|
7
|
+
pcm16ToMulaw
|
|
8
|
+
} from "./chunk-CL2U3YET.mjs";
|
|
1
9
|
import {
|
|
2
10
|
getLogger
|
|
3
11
|
} from "./chunk-MVOQFAEO.mjs";
|
|
@@ -21,367 +29,9 @@ import express from "express";
|
|
|
21
29
|
import { createServer } from "http";
|
|
22
30
|
import { WebSocketServer } from "ws";
|
|
23
31
|
|
|
24
|
-
// src/providers/openai-realtime.ts
|
|
25
|
-
init_esm_shims();
|
|
26
|
-
import WebSocket from "ws";
|
|
27
|
-
var OpenAIRealtimeAudioFormat = {
|
|
28
|
-
G711_ULAW: "g711_ulaw",
|
|
29
|
-
G711_ALAW: "g711_alaw",
|
|
30
|
-
PCM16: "pcm16"
|
|
31
|
-
};
|
|
32
|
-
var OpenAIRealtimeModel = {
|
|
33
|
-
GPT_REALTIME: "gpt-realtime",
|
|
34
|
-
GPT_REALTIME_2: "gpt-realtime-2",
|
|
35
|
-
GPT_REALTIME_MINI: "gpt-realtime-mini",
|
|
36
|
-
GPT_4O_REALTIME_PREVIEW: "gpt-4o-realtime-preview",
|
|
37
|
-
GPT_4O_MINI_REALTIME_PREVIEW: "gpt-4o-mini-realtime-preview"
|
|
38
|
-
};
|
|
39
|
-
var OpenAIVoice = {
|
|
40
|
-
ALLOY: "alloy",
|
|
41
|
-
ASH: "ash",
|
|
42
|
-
BALLAD: "ballad",
|
|
43
|
-
CORAL: "coral",
|
|
44
|
-
ECHO: "echo",
|
|
45
|
-
FABLE: "fable",
|
|
46
|
-
NOVA: "nova",
|
|
47
|
-
ONYX: "onyx",
|
|
48
|
-
SAGE: "sage",
|
|
49
|
-
SHIMMER: "shimmer",
|
|
50
|
-
VERSE: "verse"
|
|
51
|
-
};
|
|
52
|
-
var OpenAITranscriptionModel = {
|
|
53
|
-
WHISPER_1: "whisper-1",
|
|
54
|
-
GPT_4O_TRANSCRIBE: "gpt-4o-transcribe",
|
|
55
|
-
GPT_4O_MINI_TRANSCRIBE: "gpt-4o-mini-transcribe",
|
|
56
|
-
GPT_REALTIME_WHISPER: "gpt-realtime-whisper"
|
|
57
|
-
};
|
|
58
|
-
var OpenAIRealtimeVADType = {
|
|
59
|
-
SERVER_VAD: "server_vad",
|
|
60
|
-
SEMANTIC_VAD: "semantic_vad"
|
|
61
|
-
};
|
|
62
|
-
var OpenAIRealtimeAdapter = class {
|
|
63
|
-
constructor(apiKey, model = OpenAIRealtimeModel.GPT_REALTIME_MINI, voice = OpenAIVoice.ALLOY, instructions = "", tools, audioFormat = OpenAIRealtimeAudioFormat.G711_ULAW, options = {}) {
|
|
64
|
-
this.apiKey = apiKey;
|
|
65
|
-
this.model = model;
|
|
66
|
-
this.voice = voice;
|
|
67
|
-
this.instructions = instructions;
|
|
68
|
-
this.tools = tools;
|
|
69
|
-
this.audioFormat = audioFormat;
|
|
70
|
-
this.options = options;
|
|
71
|
-
}
|
|
72
|
-
apiKey;
|
|
73
|
-
model;
|
|
74
|
-
voice;
|
|
75
|
-
instructions;
|
|
76
|
-
tools;
|
|
77
|
-
audioFormat;
|
|
78
|
-
ws = null;
|
|
79
|
-
eventCallbacks = /* @__PURE__ */ new Set();
|
|
80
|
-
messageListenerAttached = false;
|
|
81
|
-
heartbeat = null;
|
|
82
|
-
// Track the in-flight assistant item id so we can truncate cleanly on
|
|
83
|
-
// barge-in (see ``cancelResponse``) — matches the Python adapter.
|
|
84
|
-
currentResponseItemId = null;
|
|
85
|
-
currentResponseAudioMs = 0;
|
|
86
|
-
// Wall-clock timestamp (Date.now()) of the first ``response.audio.delta``
|
|
87
|
-
// received since the current response item started. ``cancelResponse``
|
|
88
|
-
// uses this to bound ``audio_end_ms`` to what the caller could plausibly
|
|
89
|
-
// have heard — generated audio frequently arrives 5-10x real-time, so
|
|
90
|
-
// ``audio_end_ms`` driven purely by the per-chunk byte counter overshoots
|
|
91
|
-
// reality and leaves phantom assistant text on the conversation. The
|
|
92
|
-
// wall-clock cap corresponds to the maximum playback that real-time TTS
|
|
93
|
-
// could have produced, which is what the user actually heard.
|
|
94
|
-
currentResponseFirstAudioAt = null;
|
|
95
|
-
options;
|
|
96
|
-
/** Open the Realtime WebSocket and apply the session configuration. */
|
|
97
|
-
async connect() {
|
|
98
|
-
const url = `wss://api.openai.com/v1/realtime?model=${encodeURIComponent(this.model)}`;
|
|
99
|
-
this.ws = new WebSocket(url, {
|
|
100
|
-
headers: {
|
|
101
|
-
Authorization: `Bearer ${this.apiKey}`,
|
|
102
|
-
"OpenAI-Beta": "realtime=v1"
|
|
103
|
-
}
|
|
104
|
-
});
|
|
105
|
-
await new Promise((resolve, reject) => {
|
|
106
|
-
let sessionCreated = false;
|
|
107
|
-
let settled = false;
|
|
108
|
-
const ws = this.ws;
|
|
109
|
-
const onSetupMessage = (raw) => {
|
|
110
|
-
let msg;
|
|
111
|
-
try {
|
|
112
|
-
msg = JSON.parse(raw.toString());
|
|
113
|
-
} catch (e) {
|
|
114
|
-
getLogger().warn(`OpenAI Realtime: failed to parse message: ${String(e)}`);
|
|
115
|
-
return;
|
|
116
|
-
}
|
|
117
|
-
if (msg.type === "session.created" && !sessionCreated) {
|
|
118
|
-
sessionCreated = true;
|
|
119
|
-
const config = {
|
|
120
|
-
input_audio_format: this.audioFormat,
|
|
121
|
-
output_audio_format: this.audioFormat,
|
|
122
|
-
voice: this.voice,
|
|
123
|
-
instructions: this.instructions || "You are a helpful voice assistant. Be concise.",
|
|
124
|
-
turn_detection: {
|
|
125
|
-
type: this.options.vadType ?? OpenAIRealtimeVADType.SERVER_VAD,
|
|
126
|
-
threshold: 0.5,
|
|
127
|
-
prefix_padding_ms: 300,
|
|
128
|
-
silence_duration_ms: this.options.silenceDurationMs ?? 300
|
|
129
|
-
},
|
|
130
|
-
input_audio_transcription: {
|
|
131
|
-
model: this.options.inputAudioTranscriptionModel ?? OpenAITranscriptionModel.WHISPER_1
|
|
132
|
-
}
|
|
133
|
-
};
|
|
134
|
-
if (this.options.temperature !== void 0) config.temperature = this.options.temperature;
|
|
135
|
-
if (this.options.maxResponseOutputTokens !== void 0) {
|
|
136
|
-
config.max_response_output_tokens = this.options.maxResponseOutputTokens;
|
|
137
|
-
}
|
|
138
|
-
if (this.options.modalities !== void 0) config.modalities = this.options.modalities;
|
|
139
|
-
if (this.options.toolChoice !== void 0) config.tool_choice = this.options.toolChoice;
|
|
140
|
-
if (this.options.reasoningEffort !== void 0) {
|
|
141
|
-
config.reasoning = { effort: this.options.reasoningEffort };
|
|
142
|
-
}
|
|
143
|
-
if (this.tools?.length) {
|
|
144
|
-
config.tools = this.tools.map((t) => {
|
|
145
|
-
const def = {
|
|
146
|
-
type: "function",
|
|
147
|
-
name: t.name,
|
|
148
|
-
description: t.description,
|
|
149
|
-
parameters: t.parameters
|
|
150
|
-
};
|
|
151
|
-
if (t.strict === true) {
|
|
152
|
-
def.strict = true;
|
|
153
|
-
}
|
|
154
|
-
return def;
|
|
155
|
-
});
|
|
156
|
-
}
|
|
157
|
-
ws.send(JSON.stringify({ type: "session.update", session: config }));
|
|
158
|
-
} else if (msg.type === "session.updated") {
|
|
159
|
-
cleanup();
|
|
160
|
-
resolve();
|
|
161
|
-
}
|
|
162
|
-
};
|
|
163
|
-
const onSetupError = (err) => {
|
|
164
|
-
cleanup();
|
|
165
|
-
try {
|
|
166
|
-
ws.close();
|
|
167
|
-
} catch {
|
|
168
|
-
}
|
|
169
|
-
reject(err);
|
|
170
|
-
};
|
|
171
|
-
const cleanup = () => {
|
|
172
|
-
if (settled) return;
|
|
173
|
-
settled = true;
|
|
174
|
-
clearTimeout(timer);
|
|
175
|
-
ws.off("message", onSetupMessage);
|
|
176
|
-
ws.off("error", onSetupError);
|
|
177
|
-
};
|
|
178
|
-
const timer = setTimeout(() => {
|
|
179
|
-
cleanup();
|
|
180
|
-
try {
|
|
181
|
-
ws.close();
|
|
182
|
-
} catch {
|
|
183
|
-
}
|
|
184
|
-
reject(new Error("OpenAI Realtime connect timeout"));
|
|
185
|
-
}, 15e3);
|
|
186
|
-
ws.on("message", onSetupMessage);
|
|
187
|
-
ws.on("error", onSetupError);
|
|
188
|
-
});
|
|
189
|
-
this.heartbeat = setInterval(() => {
|
|
190
|
-
try {
|
|
191
|
-
this.ws?.ping();
|
|
192
|
-
} catch {
|
|
193
|
-
}
|
|
194
|
-
}, 2e4);
|
|
195
|
-
this.ensureMessageListener();
|
|
196
|
-
}
|
|
197
|
-
/** Append a base64-encoded audio chunk to the realtime input buffer. */
|
|
198
|
-
sendAudio(mulawAudio) {
|
|
199
|
-
if (!this.ws || this.ws.readyState !== WebSocket.OPEN) return;
|
|
200
|
-
this.ws.send(JSON.stringify({ type: "input_audio_buffer.append", audio: mulawAudio.toString("base64") }));
|
|
201
|
-
}
|
|
202
|
-
/**
|
|
203
|
-
* Register a listener for parsed realtime events.
|
|
204
|
-
*
|
|
205
|
-
* Previously every call attached a new ``ws.on('message')`` handler,
|
|
206
|
-
* which leaked listeners across retries and multi-consumer hooks. We now
|
|
207
|
-
* route all traffic through a single persistent handler that fans out to
|
|
208
|
-
* a Set of callbacks. Use {@link offEvent} to remove one.
|
|
209
|
-
*/
|
|
210
|
-
onEvent(callback) {
|
|
211
|
-
this.eventCallbacks.add(callback);
|
|
212
|
-
this.ensureMessageListener();
|
|
213
|
-
}
|
|
214
|
-
/** Remove a previously registered {@link onEvent} callback. */
|
|
215
|
-
offEvent(callback) {
|
|
216
|
-
this.eventCallbacks.delete(callback);
|
|
217
|
-
}
|
|
218
|
-
ensureMessageListener() {
|
|
219
|
-
if (this.messageListenerAttached || !this.ws) return;
|
|
220
|
-
this.messageListenerAttached = true;
|
|
221
|
-
const ws = this.ws;
|
|
222
|
-
const dispatch = (type, payload) => {
|
|
223
|
-
for (const cb of this.eventCallbacks) {
|
|
224
|
-
void Promise.resolve(cb(type, payload)).catch(
|
|
225
|
-
(err) => getLogger().error("onEvent callback error:", err)
|
|
226
|
-
);
|
|
227
|
-
}
|
|
228
|
-
};
|
|
229
|
-
ws.on("message", (raw) => {
|
|
230
|
-
let data;
|
|
231
|
-
try {
|
|
232
|
-
data = JSON.parse(raw.toString());
|
|
233
|
-
} catch (e) {
|
|
234
|
-
getLogger().warn(`OpenAI Realtime: failed to parse event message: ${String(e)}`);
|
|
235
|
-
return;
|
|
236
|
-
}
|
|
237
|
-
const t = data.type;
|
|
238
|
-
if (t === "response.audio.delta") {
|
|
239
|
-
const buf = Buffer.from(data.delta ?? "", "base64");
|
|
240
|
-
this.currentResponseAudioMs += estimateAudioMs(buf, this.audioFormat);
|
|
241
|
-
if (this.currentResponseFirstAudioAt === null) {
|
|
242
|
-
this.currentResponseFirstAudioAt = Date.now();
|
|
243
|
-
}
|
|
244
|
-
dispatch("audio", buf);
|
|
245
|
-
} else if (t === "response.audio_transcript.delta") {
|
|
246
|
-
dispatch("transcript_output", data.delta);
|
|
247
|
-
} else if (t === "response.content_part.added" || t === "response.output_item.added") {
|
|
248
|
-
const itemId = data.item?.id ?? data.item_id ?? null;
|
|
249
|
-
if (itemId) {
|
|
250
|
-
this.currentResponseItemId = itemId;
|
|
251
|
-
this.currentResponseAudioMs = 0;
|
|
252
|
-
this.currentResponseFirstAudioAt = null;
|
|
253
|
-
}
|
|
254
|
-
} else if (t === "input_audio_buffer.speech_started") {
|
|
255
|
-
dispatch("speech_started", null);
|
|
256
|
-
} else if (t === "input_audio_buffer.speech_stopped") {
|
|
257
|
-
dispatch("speech_stopped", null);
|
|
258
|
-
} else if (t === "conversation.item.input_audio_transcription.completed") {
|
|
259
|
-
dispatch("transcript_input", data.transcript);
|
|
260
|
-
} else if (t === "response.function_call_arguments.done") {
|
|
261
|
-
dispatch("function_call", { call_id: data.call_id, name: data.name, arguments: data.arguments });
|
|
262
|
-
} else if (t === "response.done") {
|
|
263
|
-
this.currentResponseItemId = null;
|
|
264
|
-
this.currentResponseAudioMs = 0;
|
|
265
|
-
this.currentResponseFirstAudioAt = null;
|
|
266
|
-
dispatch("response_done", data.response ?? null);
|
|
267
|
-
} else if (t === "error") {
|
|
268
|
-
dispatch("error", data.error);
|
|
269
|
-
}
|
|
270
|
-
});
|
|
271
|
-
ws.on("close", (code, reason) => {
|
|
272
|
-
if (code !== 1e3) {
|
|
273
|
-
dispatch("error", {
|
|
274
|
-
type: "connection_closed",
|
|
275
|
-
code,
|
|
276
|
-
reason: reason?.toString() ?? ""
|
|
277
|
-
});
|
|
278
|
-
}
|
|
279
|
-
});
|
|
280
|
-
ws.on("error", (err) => {
|
|
281
|
-
dispatch("error", { type: "socket_error", message: err?.message ?? String(err) });
|
|
282
|
-
});
|
|
283
|
-
}
|
|
284
|
-
/** Truncate the in-flight assistant turn and cancel the active response.
|
|
285
|
-
*
|
|
286
|
-
* ``audio_end_ms`` MUST reflect what the caller actually heard, not what
|
|
287
|
-
* the server generated. OpenAI streams audio at 5-10x real-time, so the
|
|
288
|
-
* byte-derived counter overstates playback whenever the consumer cleared
|
|
289
|
-
* its playout buffer (e.g. ``send_clear``) before the audio reached the
|
|
290
|
-
* speaker. We bound the truncate point by wall-clock time since the first
|
|
291
|
-
* chunk of this response — that's the physical maximum a 1x real-time
|
|
292
|
-
* playback could have produced. Without this cap, OpenAI keeps the full
|
|
293
|
-
* generated assistant text on the transcript, and the model replays /
|
|
294
|
-
* resumes from it on the next turn — manifesting as re-greetings and
|
|
295
|
-
* mid-sentence fragments after a barge-in storm.
|
|
296
|
-
*/
|
|
297
|
-
cancelResponse() {
|
|
298
|
-
if (!this.ws) return;
|
|
299
|
-
if (this.currentResponseItemId) {
|
|
300
|
-
let audioEndMs = this.currentResponseAudioMs;
|
|
301
|
-
if (this.currentResponseFirstAudioAt !== null) {
|
|
302
|
-
const elapsedMs = Date.now() - this.currentResponseFirstAudioAt;
|
|
303
|
-
audioEndMs = Math.min(audioEndMs, Math.max(elapsedMs, 0));
|
|
304
|
-
}
|
|
305
|
-
try {
|
|
306
|
-
this.ws.send(JSON.stringify({
|
|
307
|
-
type: "conversation.item.truncate",
|
|
308
|
-
item_id: this.currentResponseItemId,
|
|
309
|
-
content_index: 0,
|
|
310
|
-
audio_end_ms: audioEndMs
|
|
311
|
-
}));
|
|
312
|
-
} catch (err) {
|
|
313
|
-
getLogger().debug?.(`conversation.item.truncate failed: ${String(err)}`);
|
|
314
|
-
}
|
|
315
|
-
}
|
|
316
|
-
this.ws.send(JSON.stringify({ type: "response.cancel" }));
|
|
317
|
-
this.currentResponseItemId = null;
|
|
318
|
-
this.currentResponseAudioMs = 0;
|
|
319
|
-
this.currentResponseFirstAudioAt = null;
|
|
320
|
-
}
|
|
321
|
-
/** Inject a user text turn and request a new response. */
|
|
322
|
-
async sendText(text) {
|
|
323
|
-
this.ws?.send(JSON.stringify({
|
|
324
|
-
type: "conversation.item.create",
|
|
325
|
-
item: { type: "message", role: "user", content: [{ type: "input_text", text }] }
|
|
326
|
-
}));
|
|
327
|
-
this.ws?.send(JSON.stringify({ type: "response.create" }));
|
|
328
|
-
}
|
|
329
|
-
/**
|
|
330
|
-
* Make the AI speak ``text`` as its opening line.
|
|
331
|
-
*
|
|
332
|
-
* Triggers ``response.create`` with explicit ``instructions`` that force
|
|
333
|
-
* the model to render ``text`` verbatim as its first audio utterance.
|
|
334
|
-
* This is the correct semantics for ``Agent.firstMessage`` per its
|
|
335
|
-
* docstring ("What the AI says when the callee answers").
|
|
336
|
-
*
|
|
337
|
-
* Without this, ``sendText(firstMessage)`` would inject ``text`` as
|
|
338
|
-
* ``role: user`` and the AI would *reply* to its own greeting, producing
|
|
339
|
-
* role-confused openings (e.g. a receptionist agent responding "I'd like
|
|
340
|
-
* to schedule a haircut" because it took its own first_message as a
|
|
341
|
-
* customer cue).
|
|
342
|
-
*/
|
|
343
|
-
async sendFirstMessage(text) {
|
|
344
|
-
this.ws?.send(JSON.stringify({
|
|
345
|
-
type: "response.create",
|
|
346
|
-
response: {
|
|
347
|
-
modalities: ["audio", "text"],
|
|
348
|
-
instructions: `Say exactly the following sentence as your first turn and nothing else: "${text}"`
|
|
349
|
-
}
|
|
350
|
-
}));
|
|
351
|
-
}
|
|
352
|
-
/** Submit a tool/function-call result and request the next response. */
|
|
353
|
-
async sendFunctionResult(callId, result) {
|
|
354
|
-
this.ws?.send(JSON.stringify({
|
|
355
|
-
type: "conversation.item.create",
|
|
356
|
-
item: { type: "function_call_output", call_id: callId, output: result }
|
|
357
|
-
}));
|
|
358
|
-
this.ws?.send(JSON.stringify({ type: "response.create" }));
|
|
359
|
-
}
|
|
360
|
-
/** Stop the heartbeat, drop listeners, and close the Realtime WebSocket. */
|
|
361
|
-
close() {
|
|
362
|
-
if (this.heartbeat) {
|
|
363
|
-
clearInterval(this.heartbeat);
|
|
364
|
-
this.heartbeat = null;
|
|
365
|
-
}
|
|
366
|
-
this.eventCallbacks.clear();
|
|
367
|
-
this.messageListenerAttached = false;
|
|
368
|
-
this.ws?.close();
|
|
369
|
-
this.ws = null;
|
|
370
|
-
}
|
|
371
|
-
};
|
|
372
|
-
function estimateAudioMs(chunk, format) {
|
|
373
|
-
if (chunk.length === 0) return 0;
|
|
374
|
-
if (format === OpenAIRealtimeAudioFormat.G711_ULAW || format === OpenAIRealtimeAudioFormat.G711_ALAW)
|
|
375
|
-
return Math.floor(chunk.length / 8);
|
|
376
|
-
if (format === OpenAIRealtimeAudioFormat.PCM16) {
|
|
377
|
-
return Math.floor(chunk.length / 48);
|
|
378
|
-
}
|
|
379
|
-
return 0;
|
|
380
|
-
}
|
|
381
|
-
|
|
382
32
|
// src/providers/elevenlabs-convai.ts
|
|
383
33
|
init_esm_shims();
|
|
384
|
-
import
|
|
34
|
+
import WebSocket from "ws";
|
|
385
35
|
var ELEVENLABS_CONVAI_URL = "wss://api.elevenlabs.io/v1/convai/conversation";
|
|
386
36
|
var ELEVENLABS_SIGNED_URL = "https://api.elevenlabs.io/v1/convai/conversation/get-signed-url";
|
|
387
37
|
var AGENT_SILENCE_MS = 500;
|
|
@@ -503,8 +153,8 @@ var ElevenLabsConvAIAdapter = class _ElevenLabsConvAIAdapter {
|
|
|
503
153
|
wsUrl = this.agentId ? `${ELEVENLABS_CONVAI_URL}?agent_id=${encodeURIComponent(this.agentId)}` : ELEVENLABS_CONVAI_URL;
|
|
504
154
|
wsOptions = { headers: { "xi-api-key": this.apiKey } };
|
|
505
155
|
}
|
|
506
|
-
this.ws = new
|
|
507
|
-
await new Promise((
|
|
156
|
+
this.ws = new WebSocket(wsUrl, wsOptions);
|
|
157
|
+
await new Promise((resolve2, reject) => {
|
|
508
158
|
const timeout = setTimeout(
|
|
509
159
|
() => reject(new Error("ElevenLabs ConvAI connect timeout")),
|
|
510
160
|
15e3
|
|
@@ -528,7 +178,7 @@ var ElevenLabsConvAIAdapter = class _ElevenLabsConvAIAdapter {
|
|
|
528
178
|
conversation_config_override: override
|
|
529
179
|
};
|
|
530
180
|
this.ws.send(JSON.stringify(config));
|
|
531
|
-
|
|
181
|
+
resolve2();
|
|
532
182
|
});
|
|
533
183
|
this.ws.once("error", (err) => {
|
|
534
184
|
clearTimeout(timeout);
|
|
@@ -565,7 +215,7 @@ var ElevenLabsConvAIAdapter = class _ElevenLabsConvAIAdapter {
|
|
|
565
215
|
}
|
|
566
216
|
respondToPing(eventId, delayMs) {
|
|
567
217
|
const send = () => {
|
|
568
|
-
if (!this.ws || this.ws.readyState !==
|
|
218
|
+
if (!this.ws || this.ws.readyState !== WebSocket.OPEN) return;
|
|
569
219
|
try {
|
|
570
220
|
this.ws.send(JSON.stringify({ type: "pong", event_id: eventId }));
|
|
571
221
|
} catch (err) {
|
|
@@ -662,7 +312,7 @@ var ElevenLabsConvAIAdapter = class _ElevenLabsConvAIAdapter {
|
|
|
662
312
|
}
|
|
663
313
|
/** Send a caller-side audio chunk to ConvAI as a base64 `user_audio_chunk`. */
|
|
664
314
|
sendAudio(audioBytes) {
|
|
665
|
-
if (!this.ws || this.ws.readyState !==
|
|
315
|
+
if (!this.ws || this.ws.readyState !== WebSocket.OPEN) return;
|
|
666
316
|
this.ws.send(
|
|
667
317
|
JSON.stringify({
|
|
668
318
|
user_audio_chunk: audioBytes.toString("base64")
|
|
@@ -685,20 +335,20 @@ var ElevenLabsConvAIAdapter = class _ElevenLabsConvAIAdapter {
|
|
|
685
335
|
return;
|
|
686
336
|
}
|
|
687
337
|
const ws = this.ws;
|
|
688
|
-
this.closePromise = new Promise((
|
|
689
|
-
if (ws.readyState ===
|
|
690
|
-
|
|
338
|
+
this.closePromise = new Promise((resolve2) => {
|
|
339
|
+
if (ws.readyState === WebSocket.CLOSED || ws.readyState === WebSocket.CLOSING) {
|
|
340
|
+
resolve2();
|
|
691
341
|
return;
|
|
692
342
|
}
|
|
693
343
|
const done = () => {
|
|
694
|
-
|
|
344
|
+
resolve2();
|
|
695
345
|
};
|
|
696
346
|
ws.once("close", done);
|
|
697
347
|
ws.once("error", done);
|
|
698
348
|
try {
|
|
699
349
|
ws.close();
|
|
700
350
|
} catch {
|
|
701
|
-
|
|
351
|
+
resolve2();
|
|
702
352
|
}
|
|
703
353
|
});
|
|
704
354
|
try {
|
|
@@ -722,6 +372,8 @@ async function createTTS(agent) {
|
|
|
722
372
|
|
|
723
373
|
// src/pricing.ts
|
|
724
374
|
init_esm_shims();
|
|
375
|
+
var PRICING_VERSION = "2026.3";
|
|
376
|
+
var PRICING_LAST_UPDATED = "2026-05-08";
|
|
725
377
|
var PricingUnit = {
|
|
726
378
|
MINUTE: "minute",
|
|
727
379
|
THOUSAND_CHARS: "1k_chars",
|
|
@@ -750,14 +402,26 @@ var DEFAULT_PRICING = {
|
|
|
750
402
|
// STT — per minute of audio processed.
|
|
751
403
|
deepgram: {
|
|
752
404
|
unit: PricingUnit.MINUTE,
|
|
753
|
-
// Default = Nova-3 streaming monolingual ($0.
|
|
754
|
-
//
|
|
755
|
-
|
|
405
|
+
// Default = Nova-3 streaming monolingual ($0.0048/min, current Pay-
|
|
406
|
+
// As-You-Go promotional rate). Source: https://deepgram.com/pricing
|
|
407
|
+
// (verified 2026-05-11). The promo replaces the standard $0.0077/min
|
|
408
|
+
// quoted at Nova-3 launch and is the rate customers actually pay
|
|
409
|
+
// today; revisit when Deepgram removes the "Limited-time promotional
|
|
410
|
+
// rates on streaming" banner.
|
|
411
|
+
price: 48e-4,
|
|
756
412
|
models: {
|
|
757
|
-
|
|
758
|
-
"nova-3
|
|
413
|
+
// Nova-3 family — current flagship.
|
|
414
|
+
"nova-3": { price: 48e-4 },
|
|
415
|
+
"nova-3-multilingual": { price: 58e-4 },
|
|
416
|
+
// Flux family — new event-driven turn-taking STT (2026 launch).
|
|
417
|
+
flux: { price: 65e-4 },
|
|
418
|
+
"flux-english": { price: 65e-4 },
|
|
419
|
+
"flux-multilingual": { price: 78e-4 },
|
|
420
|
+
// Legacy Nova-2 / Nova-1 — still supported but no longer featured on
|
|
421
|
+
// the public pricing page; rates kept as last verified.
|
|
759
422
|
"nova-2": { price: 58e-4 },
|
|
760
423
|
nova: { price: 43e-4 },
|
|
424
|
+
// Whisper Cloud via Deepgram — separate tier.
|
|
761
425
|
"whisper-large": { price: 48e-4 },
|
|
762
426
|
"whisper-medium": { price: 48e-4 }
|
|
763
427
|
}
|
|
@@ -796,27 +460,30 @@ var DEFAULT_PRICING = {
|
|
|
796
460
|
// retired; users were being over-billed ~4.3x.
|
|
797
461
|
speechmatics: { unit: PricingUnit.MINUTE, price: 4e-3 },
|
|
798
462
|
// TTS — per 1,000 characters synthesized.
|
|
463
|
+
// Source: https://elevenlabs.io/pricing/api (verified 2026-05-11). The
|
|
464
|
+
// per-1K-character API/overage rate is flat across all plan tiers (Free
|
|
465
|
+
// through Business); only the included character bundle varies by plan.
|
|
799
466
|
elevenlabs: {
|
|
800
467
|
unit: PricingUnit.THOUSAND_CHARS,
|
|
801
|
-
// Default = eleven_flash_v2_5 (Patter's default model) at $0.
|
|
802
|
-
price: 0.
|
|
468
|
+
// Default = eleven_flash_v2_5 (Patter's default model) at $0.05/1k.
|
|
469
|
+
price: 0.05,
|
|
803
470
|
models: {
|
|
804
|
-
eleven_flash_v2_5: { price: 0.
|
|
471
|
+
eleven_flash_v2_5: { price: 0.05 },
|
|
805
472
|
eleven_turbo_v2_5: { price: 0.05 },
|
|
806
|
-
eleven_multilingual_v2: { price: 0.
|
|
807
|
-
eleven_monolingual_v1: { price: 0.
|
|
808
|
-
eleven_v3: { price: 0.
|
|
473
|
+
eleven_multilingual_v2: { price: 0.1 },
|
|
474
|
+
eleven_monolingual_v1: { price: 0.1 },
|
|
475
|
+
eleven_v3: { price: 0.1 }
|
|
809
476
|
}
|
|
810
477
|
},
|
|
811
478
|
// ElevenLabs WebSocket streaming TTS shares pricing with REST.
|
|
812
479
|
elevenlabs_ws: {
|
|
813
480
|
unit: PricingUnit.THOUSAND_CHARS,
|
|
814
|
-
price: 0.
|
|
481
|
+
price: 0.05,
|
|
815
482
|
models: {
|
|
816
|
-
eleven_flash_v2_5: { price: 0.
|
|
483
|
+
eleven_flash_v2_5: { price: 0.05 },
|
|
817
484
|
eleven_turbo_v2_5: { price: 0.05 },
|
|
818
|
-
eleven_multilingual_v2: { price: 0.
|
|
819
|
-
eleven_v3: { price: 0.
|
|
485
|
+
eleven_multilingual_v2: { price: 0.1 },
|
|
486
|
+
eleven_v3: { price: 0.1 }
|
|
820
487
|
}
|
|
821
488
|
},
|
|
822
489
|
openai_tts: {
|
|
@@ -946,7 +613,24 @@ var DEFAULT_PRICING = {
|
|
|
946
613
|
// calls on a local number). For US toll-free inbound ($0.022/min) or US
|
|
947
614
|
// outbound local ($0.0140/min), override via Patter({ pricing: { twilio: {...} } }).
|
|
948
615
|
twilio: { unit: PricingUnit.MINUTE, price: 85e-4 },
|
|
949
|
-
|
|
616
|
+
// Telnyx — direction-aware rates as of 2026-05-11.
|
|
617
|
+
// Sources:
|
|
618
|
+
// https://telnyx.com/pricing/elastic-sip
|
|
619
|
+
// https://telnyx.com/pricing/voice-api
|
|
620
|
+
// US inbound (DID / local termination, Pay-As-You-Go): $0.0035/min
|
|
621
|
+
// US outbound (Pay-As-You-Go, mid-range of $0.005-$0.009): $0.007/min
|
|
622
|
+
// Billing granularity is per-MINUTE (Telnyx rounds partial minutes up
|
|
623
|
+
// on the invoice; prior internal docs incorrectly claimed per-second).
|
|
624
|
+
// The legacy ``telnyx`` key is preserved at the outbound rate as a
|
|
625
|
+
// safe fallback for users who override ``pricing: { telnyx: {...} }``
|
|
626
|
+
// without knowing the direction; the metrics layer currently uses
|
|
627
|
+
// this flat key (direction is not threaded through to
|
|
628
|
+
// ``calculateTelephonyCost``). Direction-aware billing can be enabled
|
|
629
|
+
// by override-only: ``new Patter({ pricing: { telnyx: { unit: 'minute',
|
|
630
|
+
// price: 0.0035 } } })`` to bill all inbound at the lower rate.
|
|
631
|
+
telnyx: { unit: PricingUnit.MINUTE, price: 7e-3 },
|
|
632
|
+
telnyx_inbound: { unit: PricingUnit.MINUTE, price: 35e-4 },
|
|
633
|
+
telnyx_outbound: { unit: PricingUnit.MINUTE, price: 7e-3 }
|
|
950
634
|
};
|
|
951
635
|
function cloneProviderEntry(entry) {
|
|
952
636
|
const out = { ...entry };
|
|
@@ -1083,16 +767,18 @@ var llmPricing = {
|
|
|
1083
767
|
"gemma2-9b-it": { input: 0.2, output: 0.2 }
|
|
1084
768
|
},
|
|
1085
769
|
cerebras: {
|
|
1086
|
-
// Rates as of 2026-05-
|
|
1087
|
-
// ``
|
|
1088
|
-
//
|
|
1089
|
-
//
|
|
1090
|
-
// with the
|
|
1091
|
-
|
|
1092
|
-
"
|
|
770
|
+
// Rates as of 2026-05-11 verified against the canonical per-model docs
|
|
771
|
+
// pages at ``https://inference-docs.cerebras.ai/models/<model>``. The
|
|
772
|
+
// previous 2026-05-08 update overcharged across the board (gpt-oss-120b
|
|
773
|
+
// 2.4x input, qwen-3-235b 1.67x input) because it conflated the launch
|
|
774
|
+
// blog quotes with the "Exploration pricing" banner now shown on each
|
|
775
|
+
// model page. Parity with libraries/python/getpatter/pricing.py.
|
|
776
|
+
"gpt-oss-120b": { input: 0.35, output: 0.75 },
|
|
777
|
+
"llama3.1-8b": { input: 0.1, output: 0.1 },
|
|
1093
778
|
"llama-3.3-70b": { input: 0.85, output: 1.2 },
|
|
1094
779
|
"qwen-3-32b": { input: 0.4, output: 0.8 },
|
|
1095
|
-
"qwen-3-235b-a22b-instruct-2507": { input:
|
|
780
|
+
"qwen-3-235b-a22b-instruct-2507": { input: 0.6, output: 1.2 },
|
|
781
|
+
"qwen-3-coder-480b": { input: 2, output: 2 },
|
|
1096
782
|
"zai-glm-4.7": { input: 0.85, output: 1.2 }
|
|
1097
783
|
},
|
|
1098
784
|
// OpenAI Chat Completions (non-Realtime) — mirrors the Python SDK pricing table.
|
|
@@ -1137,12 +823,45 @@ function calculateTelephonyCost(provider2, durationSeconds, pricing) {
|
|
|
1137
823
|
// src/dashboard/store.ts
|
|
1138
824
|
init_esm_shims();
|
|
1139
825
|
import { EventEmitter } from "events";
|
|
826
|
+
import * as fs2 from "fs";
|
|
827
|
+
import * as path2 from "path";
|
|
828
|
+
|
|
829
|
+
// src/version.ts
|
|
830
|
+
init_esm_shims();
|
|
1140
831
|
import * as fs from "fs";
|
|
1141
832
|
import * as path from "path";
|
|
833
|
+
function readVersion() {
|
|
834
|
+
try {
|
|
835
|
+
const pkgPath = path.resolve(__dirname, "..", "package.json");
|
|
836
|
+
const pkg = JSON.parse(fs.readFileSync(pkgPath, "utf8"));
|
|
837
|
+
return typeof pkg.version === "string" && pkg.version.length > 0 ? pkg.version : "";
|
|
838
|
+
} catch {
|
|
839
|
+
return "";
|
|
840
|
+
}
|
|
841
|
+
}
|
|
842
|
+
var VERSION = readVersion();
|
|
843
|
+
|
|
844
|
+
// src/dashboard/store.ts
|
|
845
|
+
function sdkVersion() {
|
|
846
|
+
return VERSION;
|
|
847
|
+
}
|
|
1142
848
|
var MetricsStore = class extends EventEmitter {
|
|
1143
849
|
maxCalls;
|
|
1144
850
|
calls = [];
|
|
1145
851
|
activeCalls = /* @__PURE__ */ new Map();
|
|
852
|
+
/**
|
|
853
|
+
* User-driven soft delete: call_ids the operator removed from the
|
|
854
|
+
* dashboard view. The on-disk artefacts written by ``CallLogger``
|
|
855
|
+
* (``metadata.json``, ``transcript.jsonl``) are intentionally NOT
|
|
856
|
+
* touched — they serve as the durable backup. All read paths
|
|
857
|
+
* (``getCalls`` / ``getCall`` / ``getAggregates`` / ``getCallsInRange``
|
|
858
|
+
* / ``hydrate``) filter against this set so the call is invisible
|
|
859
|
+
* to the UI and excluded from rolling metrics. Populated from
|
|
860
|
+
* ``<logRoot>/.deleted_call_ids.json`` on hydrate so deletions
|
|
861
|
+
* survive a process restart. Parity with Python.
|
|
862
|
+
*/
|
|
863
|
+
deletedCallIds = /* @__PURE__ */ new Set();
|
|
864
|
+
deletedIdsPath = null;
|
|
1146
865
|
/**
|
|
1147
866
|
* Accepts either a numeric ``maxCalls`` (legacy positional — matches the
|
|
1148
867
|
* original TS API) or an options object ``{ maxCalls }`` to align with the
|
|
@@ -1235,6 +954,8 @@ var MetricsStore = class extends EventEmitter {
|
|
|
1235
954
|
ended_at: Date.now() / 1e3,
|
|
1236
955
|
status,
|
|
1237
956
|
metrics: null,
|
|
957
|
+
...active.turns && active.turns.length > 0 ? { turns: active.turns } : {},
|
|
958
|
+
...active.transcript && active.transcript.length > 0 ? { transcript: active.transcript } : {},
|
|
1238
959
|
...extra
|
|
1239
960
|
};
|
|
1240
961
|
this.activeCalls.delete(callId);
|
|
@@ -1263,6 +984,21 @@ var MetricsStore = class extends EventEmitter {
|
|
|
1263
984
|
if (active) {
|
|
1264
985
|
if (!active.turns) active.turns = [];
|
|
1265
986
|
active.turns.push(turn);
|
|
987
|
+
if (!active.transcript) active.transcript = [];
|
|
988
|
+
const turnRecord = turn;
|
|
989
|
+
const userText = typeof turnRecord.user_text === "string" ? turnRecord.user_text : "";
|
|
990
|
+
const agentText = typeof turnRecord.agent_text === "string" ? turnRecord.agent_text : "";
|
|
991
|
+
const ts = typeof turnRecord.timestamp === "number" ? turnRecord.timestamp : Date.now() / 1e3;
|
|
992
|
+
if (userText.length > 0) {
|
|
993
|
+
active.transcript.push({ role: "user", text: userText, timestamp: ts });
|
|
994
|
+
}
|
|
995
|
+
if (agentText.length > 0 && agentText !== "[interrupted]") {
|
|
996
|
+
active.transcript.push({
|
|
997
|
+
role: "assistant",
|
|
998
|
+
text: agentText,
|
|
999
|
+
timestamp: ts
|
|
1000
|
+
});
|
|
1001
|
+
}
|
|
1266
1002
|
}
|
|
1267
1003
|
this.publish("turn_complete", { call_id: callId, turn });
|
|
1268
1004
|
}
|
|
@@ -1272,40 +1008,140 @@ var MetricsStore = class extends EventEmitter {
|
|
|
1272
1008
|
if (!callId) return;
|
|
1273
1009
|
const active = this.activeCalls.get(callId);
|
|
1274
1010
|
this.activeCalls.delete(callId);
|
|
1275
|
-
|
|
1276
|
-
|
|
1011
|
+
let existingIdx = -1;
|
|
1012
|
+
if (active === void 0) {
|
|
1013
|
+
for (let i = this.calls.length - 1; i >= 0; i--) {
|
|
1014
|
+
if (this.calls[i].call_id === callId) {
|
|
1015
|
+
existingIdx = i;
|
|
1016
|
+
break;
|
|
1017
|
+
}
|
|
1018
|
+
}
|
|
1019
|
+
}
|
|
1020
|
+
const existing = existingIdx >= 0 ? this.calls[existingIdx] : void 0;
|
|
1021
|
+
const priorStatus = active?.status ?? existing?.status;
|
|
1022
|
+
const resolvedStatus = priorStatus && priorStatus !== "in-progress" ? priorStatus : "completed";
|
|
1023
|
+
const dataTranscript = data.transcript;
|
|
1024
|
+
const resolvedTranscript = dataTranscript && dataTranscript.length > 0 ? dataTranscript : active?.transcript && active.transcript.length > 0 ? active.transcript : existing?.transcript && existing.transcript.length > 0 ? existing.transcript : [];
|
|
1025
|
+
const resolvedTurns = active?.turns && active.turns.length > 0 ? active.turns : existing?.turns && existing.turns.length > 0 ? existing.turns : void 0;
|
|
1277
1026
|
const entry = {
|
|
1278
1027
|
call_id: callId,
|
|
1279
|
-
caller: data.caller || active?.caller || "",
|
|
1280
|
-
callee: data.callee || active?.callee || "",
|
|
1281
|
-
direction: active?.direction || data.direction || "inbound",
|
|
1282
|
-
started_at: active?.started_at || 0,
|
|
1028
|
+
caller: data.caller || active?.caller || existing?.caller || "",
|
|
1029
|
+
callee: data.callee || active?.callee || existing?.callee || "",
|
|
1030
|
+
direction: active?.direction || existing?.direction || data.direction || "inbound",
|
|
1031
|
+
started_at: active?.started_at || existing?.started_at || 0,
|
|
1283
1032
|
ended_at: Date.now() / 1e3,
|
|
1284
|
-
transcript:
|
|
1033
|
+
transcript: resolvedTranscript,
|
|
1034
|
+
...resolvedTurns ? { turns: resolvedTurns } : {},
|
|
1285
1035
|
status: resolvedStatus,
|
|
1286
|
-
metrics: metrics ?? null
|
|
1036
|
+
metrics: metrics ?? existing?.metrics ?? null
|
|
1287
1037
|
};
|
|
1288
|
-
|
|
1289
|
-
|
|
1290
|
-
|
|
1038
|
+
if (existingIdx >= 0) {
|
|
1039
|
+
this.calls[existingIdx] = entry;
|
|
1040
|
+
} else {
|
|
1041
|
+
this.calls.push(entry);
|
|
1042
|
+
if (this.calls.length > this.maxCalls) {
|
|
1043
|
+
this.calls = this.calls.slice(-this.maxCalls);
|
|
1044
|
+
}
|
|
1291
1045
|
}
|
|
1292
1046
|
this.publish("call_end", {
|
|
1293
1047
|
call_id: callId,
|
|
1294
1048
|
metrics: entry.metrics ?? null
|
|
1295
1049
|
});
|
|
1296
1050
|
}
|
|
1297
|
-
/**
|
|
1051
|
+
/**
|
|
1052
|
+
* Return a window of completed calls in newest-first order.
|
|
1053
|
+
*
|
|
1054
|
+
* Soft-deleted call_ids (see ``deleteCalls``) are filtered out so the
|
|
1055
|
+
* dashboard never re-shows a row the user removed. The on-disk
|
|
1056
|
+
* artefacts are intentionally preserved as a backup.
|
|
1057
|
+
*/
|
|
1298
1058
|
getCalls(limit = 50, offset = 0) {
|
|
1299
|
-
const
|
|
1059
|
+
const visible = this.calls.filter((c) => !this.deletedCallIds.has(c.call_id));
|
|
1060
|
+
const ordered = visible.reverse();
|
|
1300
1061
|
return ordered.slice(offset, offset + limit);
|
|
1301
1062
|
}
|
|
1302
|
-
/**
|
|
1063
|
+
/**
|
|
1064
|
+
* Look up a completed call by id (newest match wins).
|
|
1065
|
+
*
|
|
1066
|
+
* Soft-deleted call_ids resolve to ``null`` so the SPA's detail pane
|
|
1067
|
+
* cannot render a row the user removed.
|
|
1068
|
+
*/
|
|
1303
1069
|
getCall(callId) {
|
|
1070
|
+
if (this.deletedCallIds.has(callId)) return null;
|
|
1304
1071
|
for (let i = this.calls.length - 1; i >= 0; i--) {
|
|
1305
1072
|
if (this.calls[i].call_id === callId) return this.calls[i];
|
|
1306
1073
|
}
|
|
1307
1074
|
return null;
|
|
1308
1075
|
}
|
|
1076
|
+
/**
|
|
1077
|
+
* Soft-delete one or more calls from the dashboard view.
|
|
1078
|
+
*
|
|
1079
|
+
* Adds each ``call_id`` to an in-memory set. Subsequent reads via
|
|
1080
|
+
* ``getCalls`` / ``getCall`` / ``getAggregates`` / ``getCallsInRange``
|
|
1081
|
+
* exclude the deleted ids, so rolling metrics (avg latency, total
|
|
1082
|
+
* spend) are recomputed without them. The on-disk
|
|
1083
|
+
* ``metadata.json`` / ``transcript.jsonl`` files written by
|
|
1084
|
+
* ``CallLogger`` are NOT touched — they serve as a durable backup
|
|
1085
|
+
* the operator can audit outside the dashboard.
|
|
1086
|
+
*
|
|
1087
|
+
* Active calls are never deletable. A call_id that is currently
|
|
1088
|
+
* in ``activeCalls`` is silently skipped so a mid-call delete
|
|
1089
|
+
* from the UI cannot orphan the live transcript pane.
|
|
1090
|
+
*
|
|
1091
|
+
* Persisted to ``<logRoot>/.deleted_call_ids.json`` (best-effort)
|
|
1092
|
+
* when ``hydrate()`` has been called with a log root. Parity with
|
|
1093
|
+
* Python ``delete_calls``.
|
|
1094
|
+
*
|
|
1095
|
+
* @returns The list of call_ids actually accepted as deleted.
|
|
1096
|
+
*/
|
|
1097
|
+
deleteCalls(callIds) {
|
|
1098
|
+
const ids = /* @__PURE__ */ new Set();
|
|
1099
|
+
for (const cid of callIds || []) {
|
|
1100
|
+
if (typeof cid === "string" && cid && !this.activeCalls.has(cid)) {
|
|
1101
|
+
ids.add(cid);
|
|
1102
|
+
}
|
|
1103
|
+
}
|
|
1104
|
+
if (ids.size === 0) return [];
|
|
1105
|
+
const accepted = [];
|
|
1106
|
+
for (const cid of ids) {
|
|
1107
|
+
if (!this.deletedCallIds.has(cid)) {
|
|
1108
|
+
this.deletedCallIds.add(cid);
|
|
1109
|
+
accepted.push(cid);
|
|
1110
|
+
}
|
|
1111
|
+
}
|
|
1112
|
+
if (accepted.length === 0) return [];
|
|
1113
|
+
accepted.sort();
|
|
1114
|
+
this.persistDeletedIds();
|
|
1115
|
+
this.publish("calls_deleted", { call_ids: accepted });
|
|
1116
|
+
return accepted;
|
|
1117
|
+
}
|
|
1118
|
+
/** Whether ``callId`` was soft-deleted from the dashboard. */
|
|
1119
|
+
isDeleted(callId) {
|
|
1120
|
+
return this.deletedCallIds.has(callId);
|
|
1121
|
+
}
|
|
1122
|
+
/** Snapshot of soft-deleted call_ids (sorted). */
|
|
1123
|
+
getDeletedCallIds() {
|
|
1124
|
+
return Array.from(this.deletedCallIds).sort();
|
|
1125
|
+
}
|
|
1126
|
+
/** Atomically persist the deleted-ids set to disk. Best-effort. */
|
|
1127
|
+
persistDeletedIds() {
|
|
1128
|
+
if (this.deletedIdsPath === null) return;
|
|
1129
|
+
try {
|
|
1130
|
+
const dir = path2.dirname(this.deletedIdsPath);
|
|
1131
|
+
fs2.mkdirSync(dir, { recursive: true });
|
|
1132
|
+
const tmp = this.deletedIdsPath + ".tmp";
|
|
1133
|
+
const payload = {
|
|
1134
|
+
version: 1,
|
|
1135
|
+
deleted_call_ids: Array.from(this.deletedCallIds).sort()
|
|
1136
|
+
};
|
|
1137
|
+
fs2.writeFileSync(tmp, JSON.stringify(payload, null, 2), "utf8");
|
|
1138
|
+
fs2.renameSync(tmp, this.deletedIdsPath);
|
|
1139
|
+
} catch (err) {
|
|
1140
|
+
getLogger().debug(
|
|
1141
|
+
`MetricsStore.persistDeletedIds: ${String(err)}`
|
|
1142
|
+
);
|
|
1143
|
+
}
|
|
1144
|
+
}
|
|
1309
1145
|
/** Look up an active call by id (returns undefined if not active or unknown). */
|
|
1310
1146
|
getActive(callId) {
|
|
1311
1147
|
return this.activeCalls.get(callId);
|
|
@@ -1314,9 +1150,17 @@ var MetricsStore = class extends EventEmitter {
|
|
|
1314
1150
|
getActiveCalls() {
|
|
1315
1151
|
return Array.from(this.activeCalls.values());
|
|
1316
1152
|
}
|
|
1317
|
-
/**
|
|
1153
|
+
/**
|
|
1154
|
+
* Compute summary statistics across the buffered call history.
|
|
1155
|
+
*
|
|
1156
|
+
* Soft-deleted calls are excluded so rolling metrics (avg latency,
|
|
1157
|
+
* total spend) match exactly what the operator sees in the call list.
|
|
1158
|
+
*/
|
|
1318
1159
|
getAggregates() {
|
|
1319
|
-
const
|
|
1160
|
+
const visible = this.calls.filter(
|
|
1161
|
+
(c) => !this.deletedCallIds.has(c.call_id)
|
|
1162
|
+
);
|
|
1163
|
+
const totalCalls = visible.length;
|
|
1320
1164
|
if (totalCalls === 0) {
|
|
1321
1165
|
return {
|
|
1322
1166
|
total_calls: 0,
|
|
@@ -1324,7 +1168,8 @@ var MetricsStore = class extends EventEmitter {
|
|
|
1324
1168
|
avg_duration: 0,
|
|
1325
1169
|
avg_latency_ms: 0,
|
|
1326
1170
|
cost_breakdown: { stt: 0, tts: 0, llm: 0, telephony: 0 },
|
|
1327
|
-
active_calls: this.activeCalls.size
|
|
1171
|
+
active_calls: this.activeCalls.size,
|
|
1172
|
+
sdk_version: sdkVersion()
|
|
1328
1173
|
};
|
|
1329
1174
|
}
|
|
1330
1175
|
let totalCost = 0;
|
|
@@ -1335,7 +1180,7 @@ var MetricsStore = class extends EventEmitter {
|
|
|
1335
1180
|
let costTts = 0;
|
|
1336
1181
|
let costLlm = 0;
|
|
1337
1182
|
let costTel = 0;
|
|
1338
|
-
for (const call of
|
|
1183
|
+
for (const call of visible) {
|
|
1339
1184
|
const m = call.metrics;
|
|
1340
1185
|
if (!m) continue;
|
|
1341
1186
|
const cost = m.cost || {};
|
|
@@ -1346,7 +1191,7 @@ var MetricsStore = class extends EventEmitter {
|
|
|
1346
1191
|
costTel += cost.telephony || 0;
|
|
1347
1192
|
totalDuration += m.duration_seconds || 0;
|
|
1348
1193
|
const avgLat = m.latency_avg || {};
|
|
1349
|
-
const tMs = avgLat.total_ms || 0;
|
|
1194
|
+
const tMs = avgLat.agent_response_ms || avgLat.total_ms || 0;
|
|
1350
1195
|
if (tMs > 0) {
|
|
1351
1196
|
totalLatency += tMs;
|
|
1352
1197
|
latencyCount++;
|
|
@@ -1363,21 +1208,30 @@ var MetricsStore = class extends EventEmitter {
|
|
|
1363
1208
|
llm: Math.round(costLlm * 1e6) / 1e6,
|
|
1364
1209
|
telephony: Math.round(costTel * 1e6) / 1e6
|
|
1365
1210
|
},
|
|
1366
|
-
active_calls: this.activeCalls.size
|
|
1211
|
+
active_calls: this.activeCalls.size,
|
|
1212
|
+
sdk_version: sdkVersion()
|
|
1367
1213
|
};
|
|
1368
1214
|
}
|
|
1369
|
-
/**
|
|
1215
|
+
/**
|
|
1216
|
+
* Return calls whose `started_at` falls within `[fromTs, toTs]` (Unix
|
|
1217
|
+
* seconds). Soft-deleted calls are filtered out.
|
|
1218
|
+
*/
|
|
1370
1219
|
getCallsInRange(fromTs = 0, toTs = 0) {
|
|
1371
1220
|
return this.calls.filter((call) => {
|
|
1221
|
+
if (this.deletedCallIds.has(call.call_id)) return false;
|
|
1372
1222
|
const started = call.started_at || 0;
|
|
1373
1223
|
if (fromTs && started < fromTs) return false;
|
|
1374
1224
|
if (toTs && started > toTs) return false;
|
|
1375
1225
|
return true;
|
|
1376
1226
|
});
|
|
1377
1227
|
}
|
|
1378
|
-
/** Number of completed calls currently in the ring buffer. */
|
|
1228
|
+
/** Number of completed (non-deleted) calls currently in the ring buffer. */
|
|
1379
1229
|
get callCount() {
|
|
1380
|
-
|
|
1230
|
+
let n = 0;
|
|
1231
|
+
for (const c of this.calls) {
|
|
1232
|
+
if (!this.deletedCallIds.has(c.call_id)) n++;
|
|
1233
|
+
}
|
|
1234
|
+
return n;
|
|
1381
1235
|
}
|
|
1382
1236
|
/**
|
|
1383
1237
|
* Rebuild the in-memory call list from `metadata.json` files written by
|
|
@@ -1391,19 +1245,37 @@ var MetricsStore = class extends EventEmitter {
|
|
|
1391
1245
|
*/
|
|
1392
1246
|
hydrate(logRoot) {
|
|
1393
1247
|
if (!logRoot) return 0;
|
|
1394
|
-
const
|
|
1395
|
-
|
|
1248
|
+
const deletedIdsPath = path2.join(logRoot, ".deleted_call_ids.json");
|
|
1249
|
+
this.deletedIdsPath = deletedIdsPath;
|
|
1250
|
+
if (fs2.existsSync(deletedIdsPath)) {
|
|
1251
|
+
try {
|
|
1252
|
+
const raw = fs2.readFileSync(deletedIdsPath, "utf8");
|
|
1253
|
+
const payload = JSON.parse(raw);
|
|
1254
|
+
const arr = Array.isArray(payload.deleted_call_ids) ? payload.deleted_call_ids : [];
|
|
1255
|
+
for (const cid of arr) {
|
|
1256
|
+
if (typeof cid === "string" && cid.length > 0) {
|
|
1257
|
+
this.deletedCallIds.add(cid);
|
|
1258
|
+
}
|
|
1259
|
+
}
|
|
1260
|
+
} catch (err) {
|
|
1261
|
+
getLogger().debug(
|
|
1262
|
+
`MetricsStore.hydrate: skipping ${deletedIdsPath}: ${String(err)}`
|
|
1263
|
+
);
|
|
1264
|
+
}
|
|
1265
|
+
}
|
|
1266
|
+
const callsRoot = path2.join(logRoot, "calls");
|
|
1267
|
+
if (!fs2.existsSync(callsRoot)) return 0;
|
|
1396
1268
|
const collected = [];
|
|
1397
1269
|
const seen = new Set(this.calls.map((c) => c.call_id));
|
|
1398
1270
|
const walk = (dir, depth) => {
|
|
1399
1271
|
let entries;
|
|
1400
1272
|
try {
|
|
1401
|
-
entries =
|
|
1273
|
+
entries = fs2.readdirSync(dir, { withFileTypes: true });
|
|
1402
1274
|
} catch {
|
|
1403
1275
|
return;
|
|
1404
1276
|
}
|
|
1405
1277
|
for (const entry of entries) {
|
|
1406
|
-
const childPath =
|
|
1278
|
+
const childPath = path2.join(dir, entry.name);
|
|
1407
1279
|
if (depth < 3) {
|
|
1408
1280
|
if (entry.isDirectory() && /^\d+$/.test(entry.name)) {
|
|
1409
1281
|
walk(childPath, depth + 1);
|
|
@@ -1411,10 +1283,10 @@ var MetricsStore = class extends EventEmitter {
|
|
|
1411
1283
|
continue;
|
|
1412
1284
|
}
|
|
1413
1285
|
if (!entry.isDirectory()) continue;
|
|
1414
|
-
const metadataPath =
|
|
1415
|
-
if (!
|
|
1286
|
+
const metadataPath = path2.join(childPath, "metadata.json");
|
|
1287
|
+
if (!fs2.existsSync(metadataPath)) continue;
|
|
1416
1288
|
try {
|
|
1417
|
-
const raw =
|
|
1289
|
+
const raw = fs2.readFileSync(metadataPath, "utf8");
|
|
1418
1290
|
const meta = JSON.parse(raw);
|
|
1419
1291
|
const callId = meta.call_id || entry.name;
|
|
1420
1292
|
if (!callId || seen.has(callId)) continue;
|
|
@@ -1425,6 +1297,12 @@ var MetricsStore = class extends EventEmitter {
|
|
|
1425
1297
|
);
|
|
1426
1298
|
continue;
|
|
1427
1299
|
}
|
|
1300
|
+
if (!record.transcript || record.transcript.length === 0) {
|
|
1301
|
+
const fromJsonl = loadTranscriptJsonl(
|
|
1302
|
+
path2.join(childPath, "transcript.jsonl")
|
|
1303
|
+
);
|
|
1304
|
+
if (fromJsonl.length > 0) record.transcript = fromJsonl;
|
|
1305
|
+
}
|
|
1428
1306
|
collected.push(record);
|
|
1429
1307
|
seen.add(callId);
|
|
1430
1308
|
} catch (err) {
|
|
@@ -1446,12 +1324,45 @@ var MetricsStore = class extends EventEmitter {
|
|
|
1446
1324
|
return collected.length;
|
|
1447
1325
|
}
|
|
1448
1326
|
};
|
|
1327
|
+
function metricsFromTopLevel(meta) {
|
|
1328
|
+
const cost = meta.cost && typeof meta.cost === "object" ? meta.cost : null;
|
|
1329
|
+
const latency = meta.latency && typeof meta.latency === "object" ? meta.latency : null;
|
|
1330
|
+
const durationMs = meta.duration_ms;
|
|
1331
|
+
const telephony = meta.telephony_provider;
|
|
1332
|
+
if (cost === null && latency === null && durationMs == null && !telephony) {
|
|
1333
|
+
return null;
|
|
1334
|
+
}
|
|
1335
|
+
const out = {};
|
|
1336
|
+
if (cost !== null) out.cost = cost;
|
|
1337
|
+
if (latency !== null) {
|
|
1338
|
+
const fullAvg = latency.avg && typeof latency.avg === "object" ? latency.avg : null;
|
|
1339
|
+
const fullP50 = latency.p50 && typeof latency.p50 === "object" ? latency.p50 : null;
|
|
1340
|
+
const fullP95 = latency.p95 && typeof latency.p95 === "object" ? latency.p95 : null;
|
|
1341
|
+
const fullP99 = latency.p99 && typeof latency.p99 === "object" ? latency.p99 : null;
|
|
1342
|
+
if (fullAvg) out.latency_avg = fullAvg;
|
|
1343
|
+
if (fullP50) out.latency_p50 = fullP50;
|
|
1344
|
+
if (fullP95) out.latency_p95 = fullP95;
|
|
1345
|
+
if (fullP99) out.latency_p99 = fullP99;
|
|
1346
|
+
if (!fullAvg && !fullP50 && !fullP95) {
|
|
1347
|
+
const totalMs = typeof latency.p95_ms === "number" && latency.p95_ms || typeof latency.p50_ms === "number" && latency.p50_ms || 0;
|
|
1348
|
+
out.latency_avg = { total_ms: totalMs };
|
|
1349
|
+
}
|
|
1350
|
+
out.latency = latency;
|
|
1351
|
+
}
|
|
1352
|
+
if (typeof durationMs === "number" && durationMs > 0) {
|
|
1353
|
+
out.duration_seconds = durationMs / 1e3;
|
|
1354
|
+
}
|
|
1355
|
+
if (typeof telephony === "string" && telephony) {
|
|
1356
|
+
out.telephony_provider = telephony;
|
|
1357
|
+
}
|
|
1358
|
+
return Object.keys(out).length > 0 ? out : null;
|
|
1359
|
+
}
|
|
1449
1360
|
function metadataToCallRecord(callId, meta) {
|
|
1450
1361
|
const startedAt = parseTimestamp(meta.started_at);
|
|
1451
1362
|
if (startedAt === null) return null;
|
|
1452
1363
|
const endedAt = parseTimestamp(meta.ended_at);
|
|
1453
1364
|
const status = meta.status || "completed";
|
|
1454
|
-
const metrics = meta.metrics && typeof meta.metrics === "object" ? meta.metrics :
|
|
1365
|
+
const metrics = meta.metrics && typeof meta.metrics === "object" ? meta.metrics : metricsFromTopLevel(meta);
|
|
1455
1366
|
const transcript = Array.isArray(meta.transcript) ? meta.transcript : [];
|
|
1456
1367
|
return {
|
|
1457
1368
|
call_id: callId,
|
|
@@ -1465,6 +1376,36 @@ function metadataToCallRecord(callId, meta) {
|
|
|
1465
1376
|
transcript
|
|
1466
1377
|
};
|
|
1467
1378
|
}
|
|
1379
|
+
function loadTranscriptJsonl(filePath) {
|
|
1380
|
+
try {
|
|
1381
|
+
if (!fs2.existsSync(filePath)) return [];
|
|
1382
|
+
const raw = fs2.readFileSync(filePath, "utf8");
|
|
1383
|
+
const lines = raw.split("\n").filter((l) => l.trim().length > 0);
|
|
1384
|
+
const out = [];
|
|
1385
|
+
for (const line of lines) {
|
|
1386
|
+
let row;
|
|
1387
|
+
try {
|
|
1388
|
+
row = JSON.parse(line);
|
|
1389
|
+
} catch {
|
|
1390
|
+
continue;
|
|
1391
|
+
}
|
|
1392
|
+
const tsIso = typeof row.ts === "string" ? Date.parse(row.ts) : NaN;
|
|
1393
|
+
const tsNumeric = typeof row.timestamp === "number" ? row.timestamp * 1e3 : NaN;
|
|
1394
|
+
const timestamp = Number.isFinite(tsIso) ? tsIso : Number.isFinite(tsNumeric) ? tsNumeric : 0;
|
|
1395
|
+
const userText = typeof row.user_text === "string" ? row.user_text : "";
|
|
1396
|
+
const agentText = typeof row.agent_text === "string" ? row.agent_text : "";
|
|
1397
|
+
if (userText.length > 0) {
|
|
1398
|
+
out.push({ role: "user", text: userText, timestamp });
|
|
1399
|
+
}
|
|
1400
|
+
if (agentText.length > 0 && agentText !== "[interrupted]") {
|
|
1401
|
+
out.push({ role: "assistant", text: agentText, timestamp });
|
|
1402
|
+
}
|
|
1403
|
+
}
|
|
1404
|
+
return out;
|
|
1405
|
+
} catch {
|
|
1406
|
+
return [];
|
|
1407
|
+
}
|
|
1408
|
+
}
|
|
1468
1409
|
function parseTimestamp(raw) {
|
|
1469
1410
|
if (typeof raw === "number") {
|
|
1470
1411
|
return Number.isFinite(raw) ? raw : null;
|
|
@@ -1572,8 +1513,8 @@ function csvEscape(value) {
|
|
|
1572
1513
|
|
|
1573
1514
|
// src/dashboard/ui.ts
|
|
1574
1515
|
init_esm_shims();
|
|
1575
|
-
import { readFileSync as
|
|
1576
|
-
import { join as join2, dirname } from "path";
|
|
1516
|
+
import { readFileSync as readFileSync3 } from "fs";
|
|
1517
|
+
import { join as join2, dirname as dirname2 } from "path";
|
|
1577
1518
|
var FALLBACK_HTML = `<!doctype html>
|
|
1578
1519
|
<html><head><meta charset="utf-8"><title>Patter dashboard</title></head>
|
|
1579
1520
|
<body style="font-family:ui-sans-serif,system-ui;padding:2rem;color:#1a1a1a">
|
|
@@ -1583,15 +1524,15 @@ Run <code>cd dashboard-app && npm run build && npm run sync</cod
|
|
|
1583
1524
|
from the repo root to regenerate it.</p>
|
|
1584
1525
|
</body></html>`;
|
|
1585
1526
|
function loadDashboardHtml() {
|
|
1586
|
-
const here = typeof __dirname !== "undefined" ? __dirname :
|
|
1527
|
+
const here = typeof __dirname !== "undefined" ? __dirname : dirname2(".");
|
|
1587
1528
|
const candidates = [
|
|
1588
1529
|
join2(here, "ui.html"),
|
|
1589
1530
|
join2(here, "dashboard", "ui.html"),
|
|
1590
1531
|
join2(here, "..", "dashboard", "ui.html")
|
|
1591
1532
|
];
|
|
1592
|
-
for (const
|
|
1533
|
+
for (const path4 of candidates) {
|
|
1593
1534
|
try {
|
|
1594
|
-
return
|
|
1535
|
+
return readFileSync3(path4, "utf8");
|
|
1595
1536
|
} catch {
|
|
1596
1537
|
}
|
|
1597
1538
|
}
|
|
@@ -1611,7 +1552,8 @@ function mountDashboard(app, store, token = "") {
|
|
|
1611
1552
|
res.json(store.getCalls(limit, offset));
|
|
1612
1553
|
});
|
|
1613
1554
|
app.get("/api/dashboard/calls/:callId", auth, (req, res) => {
|
|
1614
|
-
const
|
|
1555
|
+
const callId = String(req.params.callId);
|
|
1556
|
+
const call = store.getCall(callId) ?? store.getActive(callId);
|
|
1615
1557
|
if (!call) {
|
|
1616
1558
|
res.status(404).json({ error: "Not found" });
|
|
1617
1559
|
return;
|
|
@@ -1624,6 +1566,24 @@ function mountDashboard(app, store, token = "") {
|
|
|
1624
1566
|
app.get("/api/dashboard/aggregates", auth, (_req, res) => {
|
|
1625
1567
|
res.json(store.getAggregates());
|
|
1626
1568
|
});
|
|
1569
|
+
app.delete("/api/dashboard/calls/:callId", auth, (req, res) => {
|
|
1570
|
+
const callId = String(req.params.callId);
|
|
1571
|
+
const accepted = store.deleteCalls([callId]);
|
|
1572
|
+
res.json({ deleted: accepted, count: accepted.length });
|
|
1573
|
+
});
|
|
1574
|
+
app.post("/api/dashboard/calls/delete", auth, (req, res) => {
|
|
1575
|
+
const body = req.body ?? {};
|
|
1576
|
+
const raw = body.call_ids;
|
|
1577
|
+
if (!Array.isArray(raw)) {
|
|
1578
|
+
res.status(400).json({ error: "Expected JSON body { 'call_ids': [...] }" });
|
|
1579
|
+
return;
|
|
1580
|
+
}
|
|
1581
|
+
const ids = raw.filter(
|
|
1582
|
+
(cid) => typeof cid === "string" && cid.length > 0
|
|
1583
|
+
);
|
|
1584
|
+
const accepted = store.deleteCalls(ids);
|
|
1585
|
+
res.json({ deleted: accepted, count: accepted.length });
|
|
1586
|
+
});
|
|
1627
1587
|
app.get("/api/dashboard/events", auth, (req, res) => {
|
|
1628
1588
|
res.writeHead(200, {
|
|
1629
1589
|
"Content-Type": "text/event-stream",
|
|
@@ -1696,7 +1656,8 @@ function mountApi(app, store, token = "") {
|
|
|
1696
1656
|
res.json({ data: active, count: active.length });
|
|
1697
1657
|
});
|
|
1698
1658
|
app.get("/api/v1/calls/:callId", auth, (req, res) => {
|
|
1699
|
-
const
|
|
1659
|
+
const callId = String(req.params.callId);
|
|
1660
|
+
const call = store.getCall(callId) ?? store.getActive(callId);
|
|
1700
1661
|
if (!call) {
|
|
1701
1662
|
res.status(404).json({ error: "Call not found" });
|
|
1702
1663
|
return;
|
|
@@ -1868,8 +1829,8 @@ var RemoteMessageHandler = class {
|
|
|
1868
1829
|
"WebSocket URL uses unencrypted ws:// \u2014 call transcripts and phone numbers will be sent in plaintext. Use wss:// in production."
|
|
1869
1830
|
);
|
|
1870
1831
|
}
|
|
1871
|
-
const { WebSocket:
|
|
1872
|
-
const ws = new
|
|
1832
|
+
const { WebSocket: WebSocket3 } = await import("ws");
|
|
1833
|
+
const ws = new WebSocket3(url);
|
|
1873
1834
|
const chunks = [];
|
|
1874
1835
|
let done = false;
|
|
1875
1836
|
let error = null;
|
|
@@ -1923,10 +1884,10 @@ var RemoteMessageHandler = class {
|
|
|
1923
1884
|
}
|
|
1924
1885
|
});
|
|
1925
1886
|
try {
|
|
1926
|
-
await new Promise((
|
|
1887
|
+
await new Promise((resolve2, reject) => {
|
|
1927
1888
|
ws.on("open", () => {
|
|
1928
1889
|
ws.send(JSON.stringify(data));
|
|
1929
|
-
|
|
1890
|
+
resolve2();
|
|
1930
1891
|
});
|
|
1931
1892
|
ws.on("error", (err) => {
|
|
1932
1893
|
reject(err);
|
|
@@ -1936,11 +1897,11 @@ var RemoteMessageHandler = class {
|
|
|
1936
1897
|
yield chunks.shift();
|
|
1937
1898
|
}
|
|
1938
1899
|
while (!done && !error) {
|
|
1939
|
-
const text = await new Promise((
|
|
1900
|
+
const text = await new Promise((resolve2) => {
|
|
1940
1901
|
if (chunks.length > 0) {
|
|
1941
|
-
|
|
1902
|
+
resolve2(chunks.shift());
|
|
1942
1903
|
} else {
|
|
1943
|
-
resolveNext =
|
|
1904
|
+
resolveNext = resolve2;
|
|
1944
1905
|
}
|
|
1945
1906
|
});
|
|
1946
1907
|
if (text === null) break;
|
|
@@ -1967,7 +1928,7 @@ init_esm_shims();
|
|
|
1967
1928
|
|
|
1968
1929
|
// src/providers/deepgram-stt.ts
|
|
1969
1930
|
init_esm_shims();
|
|
1970
|
-
import
|
|
1931
|
+
import WebSocket2 from "ws";
|
|
1971
1932
|
|
|
1972
1933
|
// src/errors.ts
|
|
1973
1934
|
init_esm_shims();
|
|
@@ -2060,6 +2021,8 @@ var FINALIZE_DRAIN_MS = 100;
|
|
|
2060
2021
|
var CLOSE_LATENCY_BUDGET_MS = 500;
|
|
2061
2022
|
var RECONNECT_CLOSE_CODES = /* @__PURE__ */ new Set([1006, 1011]);
|
|
2062
2023
|
var DeepgramSTT = class _DeepgramSTT {
|
|
2024
|
+
/** Stable pricing/dashboard key — read by stream-handler/metrics. */
|
|
2025
|
+
static providerKey = "deepgram";
|
|
2063
2026
|
ws = null;
|
|
2064
2027
|
transcriptCallbacks = /* @__PURE__ */ new Set();
|
|
2065
2028
|
errorCallbacks = /* @__PURE__ */ new Set();
|
|
@@ -2120,6 +2083,64 @@ var DeepgramSTT = class _DeepgramSTT {
|
|
|
2120
2083
|
}
|
|
2121
2084
|
return `${DEEPGRAM_WS_URL}?${params.toString()}`;
|
|
2122
2085
|
}
|
|
2086
|
+
/**
|
|
2087
|
+
* Pre-call WebSocket warmup for the Deepgram `/v1/listen` endpoint.
|
|
2088
|
+
*
|
|
2089
|
+
* Opens the WS (full DNS + TLS + auth handshake), idles ~250 ms so the
|
|
2090
|
+
* provider edge keeps the session warm in its routing table, then
|
|
2091
|
+
* closes cleanly. By the time `connect()` is invoked at call-pickup
|
|
2092
|
+
* the DNS resolver is hot, the TCP+TLS session is in the connection
|
|
2093
|
+
* pool, and recent WS auth is still warm at Deepgram's edge — net
|
|
2094
|
+
* wire time saving of 200-500 ms vs a cold WS open.
|
|
2095
|
+
*
|
|
2096
|
+
* Billing safety: Deepgram bills on streamed audio seconds (per
|
|
2097
|
+
* https://deepgram.com/pricing). Opening + closing the WebSocket
|
|
2098
|
+
* without sending any audio frames does not consume billable seconds.
|
|
2099
|
+
* Best-effort: any failure is logged at debug level and never raised.
|
|
2100
|
+
*/
|
|
2101
|
+
async warmup() {
|
|
2102
|
+
const params = new URLSearchParams({
|
|
2103
|
+
model: this.model,
|
|
2104
|
+
language: this.language,
|
|
2105
|
+
encoding: this.encoding,
|
|
2106
|
+
sample_rate: String(this.sampleRate),
|
|
2107
|
+
channels: "1"
|
|
2108
|
+
});
|
|
2109
|
+
const url = `${DEEPGRAM_WS_URL}?${params.toString()}`;
|
|
2110
|
+
let ws = null;
|
|
2111
|
+
try {
|
|
2112
|
+
ws = await new Promise((resolve2, reject) => {
|
|
2113
|
+
const sock = new WebSocket2(url, {
|
|
2114
|
+
headers: { Authorization: `Token ${this.apiKey}` }
|
|
2115
|
+
});
|
|
2116
|
+
const timer = setTimeout(() => {
|
|
2117
|
+
try {
|
|
2118
|
+
sock.close();
|
|
2119
|
+
} catch {
|
|
2120
|
+
}
|
|
2121
|
+
reject(new Error("Deepgram STT warmup connect timeout"));
|
|
2122
|
+
}, 5e3);
|
|
2123
|
+
sock.once("open", () => {
|
|
2124
|
+
clearTimeout(timer);
|
|
2125
|
+
resolve2(sock);
|
|
2126
|
+
});
|
|
2127
|
+
sock.once("error", (err) => {
|
|
2128
|
+
clearTimeout(timer);
|
|
2129
|
+
reject(err);
|
|
2130
|
+
});
|
|
2131
|
+
});
|
|
2132
|
+
await new Promise((r) => setTimeout(r, 250));
|
|
2133
|
+
} catch (err) {
|
|
2134
|
+
getLogger().debug(`Deepgram STT warmup failed (best-effort): ${String(err)}`);
|
|
2135
|
+
} finally {
|
|
2136
|
+
if (ws) {
|
|
2137
|
+
try {
|
|
2138
|
+
ws.close();
|
|
2139
|
+
} catch {
|
|
2140
|
+
}
|
|
2141
|
+
}
|
|
2142
|
+
}
|
|
2143
|
+
}
|
|
2123
2144
|
/** Open the streaming WebSocket and arm message + keepalive handlers. */
|
|
2124
2145
|
async connect() {
|
|
2125
2146
|
await this.openSocket();
|
|
@@ -2128,11 +2149,11 @@ var DeepgramSTT = class _DeepgramSTT {
|
|
|
2128
2149
|
}
|
|
2129
2150
|
async openSocket() {
|
|
2130
2151
|
const url = this.buildUrl();
|
|
2131
|
-
const ws = new
|
|
2152
|
+
const ws = new WebSocket2(url, {
|
|
2132
2153
|
headers: { Authorization: `Token ${this.apiKey}` }
|
|
2133
2154
|
});
|
|
2134
2155
|
this.ws = ws;
|
|
2135
|
-
await new Promise((
|
|
2156
|
+
await new Promise((resolve2, reject) => {
|
|
2136
2157
|
let settled = false;
|
|
2137
2158
|
const settle = (fn) => {
|
|
2138
2159
|
if (settled) return;
|
|
@@ -2144,7 +2165,7 @@ var DeepgramSTT = class _DeepgramSTT {
|
|
|
2144
2165
|
() => settle(() => reject(new PatterConnectionError("Deepgram connect timeout"))),
|
|
2145
2166
|
1e4
|
|
2146
2167
|
);
|
|
2147
|
-
ws.once("open", () => settle(
|
|
2168
|
+
ws.once("open", () => settle(resolve2));
|
|
2148
2169
|
ws.once("error", (err) => settle(() => reject(err)));
|
|
2149
2170
|
ws.once("unexpected-response", (_req, res) => {
|
|
2150
2171
|
const status = res?.statusCode ?? 0;
|
|
@@ -2165,7 +2186,7 @@ var DeepgramSTT = class _DeepgramSTT {
|
|
|
2165
2186
|
ws.on("close", (code, reason) => this.handleClose(code, reason.toString()));
|
|
2166
2187
|
ws.on("error", (err) => this.handleError(err));
|
|
2167
2188
|
this.keepaliveTimer = setInterval(() => {
|
|
2168
|
-
if (this.ws && this.ws.readyState ===
|
|
2189
|
+
if (this.ws && this.ws.readyState === WebSocket2.OPEN) {
|
|
2169
2190
|
try {
|
|
2170
2191
|
this.ws.send(JSON.stringify({ type: "KeepAlive" }));
|
|
2171
2192
|
} catch {
|
|
@@ -2284,7 +2305,7 @@ var DeepgramSTT = class _DeepgramSTT {
|
|
|
2284
2305
|
}
|
|
2285
2306
|
/** Send a binary audio chunk to Deepgram for transcription. */
|
|
2286
2307
|
sendAudio(audio) {
|
|
2287
|
-
if (!this.ws || this.ws.readyState !==
|
|
2308
|
+
if (!this.ws || this.ws.readyState !== WebSocket2.OPEN) {
|
|
2288
2309
|
this.audioDroppedCount++;
|
|
2289
2310
|
if (this.audioDroppedCount === 1 || this.audioDroppedCount % 50 === 0) {
|
|
2290
2311
|
getLogger().info(
|
|
@@ -2333,7 +2354,7 @@ var DeepgramSTT = class _DeepgramSTT {
|
|
|
2333
2354
|
*/
|
|
2334
2355
|
finalize() {
|
|
2335
2356
|
const ws = this.ws;
|
|
2336
|
-
if (!ws || ws.readyState !==
|
|
2357
|
+
if (!ws || ws.readyState !== WebSocket2.OPEN) {
|
|
2337
2358
|
getLogger().info(
|
|
2338
2359
|
`[DIAG] DeepgramSTT.finalize SKIPPED (ws state=${ws?.readyState ?? "null"})`
|
|
2339
2360
|
);
|
|
@@ -2354,7 +2375,7 @@ var DeepgramSTT = class _DeepgramSTT {
|
|
|
2354
2375
|
if (!ws) return;
|
|
2355
2376
|
this.ws = null;
|
|
2356
2377
|
const sendSafe = (payload) => {
|
|
2357
|
-
if (ws.readyState ===
|
|
2378
|
+
if (ws.readyState === WebSocket2.OPEN) {
|
|
2358
2379
|
try {
|
|
2359
2380
|
ws.send(payload);
|
|
2360
2381
|
} catch {
|
|
@@ -2368,7 +2389,7 @@ var DeepgramSTT = class _DeepgramSTT {
|
|
|
2368
2389
|
} catch {
|
|
2369
2390
|
}
|
|
2370
2391
|
};
|
|
2371
|
-
if (ws.readyState !==
|
|
2392
|
+
if (ws.readyState !== WebSocket2.OPEN) {
|
|
2372
2393
|
finishClose();
|
|
2373
2394
|
return;
|
|
2374
2395
|
}
|
|
@@ -2437,6 +2458,21 @@ var CallMetricsAccumulator = class {
|
|
|
2437
2458
|
_bargeinStoppedAt = null;
|
|
2438
2459
|
_turnUserText = "";
|
|
2439
2460
|
_turnSttAudioSeconds = 0;
|
|
2461
|
+
/**
|
|
2462
|
+
* Guard against the recordTurnInterrupted / recordTurnComplete race.
|
|
2463
|
+
*
|
|
2464
|
+
* A VAD-path barge-in fires ``recordTurnInterrupted`` synchronously
|
|
2465
|
+
* inside ``handleAudioAsync`` while the in-flight pipeline LLM stream
|
|
2466
|
+
* keeps unwinding on its own task. When the LLM stream eventually
|
|
2467
|
+
* exits, the existing pipeline path falls through to
|
|
2468
|
+
* ``recordTurnComplete``, which would push a second turn for the same
|
|
2469
|
+
* logical exchange (this time carrying ``user_text=''`` because the
|
|
2470
|
+
* field was already reset). ``_turnAlreadyClosed`` is flipped by
|
|
2471
|
+
* ``recordTurnInterrupted`` and read by ``recordTurnComplete`` so the
|
|
2472
|
+
* late ``recordTurnComplete`` becomes a no-op until the next
|
|
2473
|
+
* ``startTurn`` re-arms the accumulator.
|
|
2474
|
+
*/
|
|
2475
|
+
_turnAlreadyClosed = false;
|
|
2440
2476
|
// Cumulative usage counters
|
|
2441
2477
|
_totalSttAudioSeconds = 0;
|
|
2442
2478
|
_totalTtsCharacters = 0;
|
|
@@ -2449,6 +2485,10 @@ var CallMetricsAccumulator = class {
|
|
|
2449
2485
|
_actualSttCost = null;
|
|
2450
2486
|
// Fix 10: accumulated LLM token cost for non-Realtime pipeline mode.
|
|
2451
2487
|
_totalLlmCost = 0;
|
|
2488
|
+
// Last LLM model identifier from a recordLlmUsage call — emitted on
|
|
2489
|
+
// CallMetrics.llm_model so the dashboard cost panel can display
|
|
2490
|
+
// "Cerebras gpt-oss-120b" instead of just "Cerebras".
|
|
2491
|
+
_llmModel = "";
|
|
2452
2492
|
// ---- EventBus integration (item 3) ----
|
|
2453
2493
|
_eventBus;
|
|
2454
2494
|
// ---- EOUMetrics — 4 timestamps (item 4) ----
|
|
@@ -2467,6 +2507,22 @@ var CallMetricsAccumulator = class {
|
|
|
2467
2507
|
// ---- report_only_initial_ttfb (item 6) ----
|
|
2468
2508
|
_reportOnlyInitialTtfb;
|
|
2469
2509
|
_initialTtfbEmitted = false;
|
|
2510
|
+
// ---- Barge-in anchor hygiene ----
|
|
2511
|
+
/**
|
|
2512
|
+
* Last barge-in detection timestamp (hrTimeMs). Used by
|
|
2513
|
+
* ``_computeTurnLatency`` to gate endpoint_ms / stt_ms emission on turns
|
|
2514
|
+
* that started immediately after a barge-in — those turns have unreliable
|
|
2515
|
+
* VAD/STT anchors and would otherwise pollute the p95 distribution with
|
|
2516
|
+
* synthetic 6+ second spikes.
|
|
2517
|
+
*/
|
|
2518
|
+
_lastBargeinAt = null;
|
|
2519
|
+
/**
|
|
2520
|
+
* Count of turns where ``recordSttComplete`` fired but no legitimate VAD
|
|
2521
|
+
* ``speech_end`` had stamped ``_endpointSignalAt``. Exposed via metrics so
|
|
2522
|
+
* we can spot environments where PSTN packet loss is dropping VAD stops
|
|
2523
|
+
* (the common cause of missing endpoint signals).
|
|
2524
|
+
*/
|
|
2525
|
+
_endpointSignalMissingCount = 0;
|
|
2470
2526
|
constructor(opts) {
|
|
2471
2527
|
this.callId = opts.callId;
|
|
2472
2528
|
this.providerMode = opts.providerMode;
|
|
@@ -2514,6 +2570,7 @@ var CallMetricsAccumulator = class {
|
|
|
2514
2570
|
this._bargeinStoppedAt = null;
|
|
2515
2571
|
this._turnUserText = "";
|
|
2516
2572
|
this._turnSttAudioSeconds = 0;
|
|
2573
|
+
this._turnAlreadyClosed = false;
|
|
2517
2574
|
this._vadStoppedAt = null;
|
|
2518
2575
|
this._sttFinalAt = null;
|
|
2519
2576
|
this._turnCommittedAt = null;
|
|
@@ -2530,12 +2587,46 @@ var CallMetricsAccumulator = class {
|
|
|
2530
2587
|
this.startTurn();
|
|
2531
2588
|
}
|
|
2532
2589
|
}
|
|
2590
|
+
/**
|
|
2591
|
+
* Anchor the current turn at a legitimate VAD ``speech_start`` event.
|
|
2592
|
+
*
|
|
2593
|
+
* Industry-standard pattern: every VAD ``speech_start`` that fires while the agent
|
|
2594
|
+
* is NOT in the suppressed warmup window re-anchors the turn timer to
|
|
2595
|
+
* the wall-clock moment the user actually started speaking. Re-anchors:
|
|
2596
|
+
*
|
|
2597
|
+
* * ``_turnStart`` — fixes the case where a phantom ``speech_start``
|
|
2598
|
+
* during agent TTS or a partial transcript from the previous user
|
|
2599
|
+
* attempt already stamped the field. Without this, the legitimate
|
|
2600
|
+
* user-speech ``speech_start`` no-op'd and ``user_speech_duration_ms``
|
|
2601
|
+
* inflated from ~1 s to 5-7 s (the original "I waited 7 seconds"
|
|
2602
|
+
* dashboard symptom).
|
|
2603
|
+
* * ``_endpointSignalAt``, ``_vadStoppedAt``, ``_sttFinalAt`` — any
|
|
2604
|
+
* stale anchor from a rejected barge-in / dropped final transcript
|
|
2605
|
+
* on the same uncommitted turn is cleared, so the next
|
|
2606
|
+
* ``recordVadStop`` / ``recordSttFinalTimestamp`` stamps fresh.
|
|
2607
|
+
* * ``_sttComplete``, ``_llmFirstToken``, ``_initialTtfbEmitted`` — same
|
|
2608
|
+
* rationale for the downstream pipeline timestamps.
|
|
2609
|
+
*
|
|
2610
|
+
* No-op once the turn is committed (``_turnCommittedMono`` set): a
|
|
2611
|
+
* VAD ``speech_start`` after commit belongs to the NEXT turn's
|
|
2612
|
+
* barge-in path, handled by ``recordTurnInterrupted`` instead.
|
|
2613
|
+
*/
|
|
2614
|
+
anchorUserSpeechStart() {
|
|
2615
|
+
if (this._turnCommittedMono !== null) return;
|
|
2616
|
+
this._turnStart = hrTimeMs();
|
|
2617
|
+
this._endpointSignalAt = null;
|
|
2618
|
+
this._vadStoppedAt = null;
|
|
2619
|
+
this._sttFinalAt = null;
|
|
2620
|
+
this._sttComplete = null;
|
|
2621
|
+
this._llmFirstToken = null;
|
|
2622
|
+
this._initialTtfbEmitted = false;
|
|
2623
|
+
}
|
|
2533
2624
|
/** Stamp end-of-STT, capture the user's transcript, and accrue billed STT seconds. */
|
|
2534
2625
|
recordSttComplete(text, audioSeconds = 0) {
|
|
2535
2626
|
this._sttComplete = hrTimeMs();
|
|
2536
2627
|
this._sttFinalAt = this._sttComplete;
|
|
2537
2628
|
if (this._endpointSignalAt === null) {
|
|
2538
|
-
this.
|
|
2629
|
+
this._endpointSignalMissingCount++;
|
|
2539
2630
|
}
|
|
2540
2631
|
this._turnUserText = text;
|
|
2541
2632
|
this._turnSttAudioSeconds = audioSeconds;
|
|
@@ -2624,7 +2715,9 @@ var CallMetricsAccumulator = class {
|
|
|
2624
2715
|
* ``recordTtsStopped`` to compute ``bargein_ms``.
|
|
2625
2716
|
*/
|
|
2626
2717
|
recordBargeinDetected(ts) {
|
|
2627
|
-
|
|
2718
|
+
const t = ts ?? hrTimeMs();
|
|
2719
|
+
this._bargeinDetectedAt = t;
|
|
2720
|
+
this._lastBargeinAt = t;
|
|
2628
2721
|
}
|
|
2629
2722
|
/**
|
|
2630
2723
|
* Mark the moment TTS playback was actually halted after a barge-in. Call
|
|
@@ -2634,8 +2727,18 @@ var CallMetricsAccumulator = class {
|
|
|
2634
2727
|
recordTtsStopped(ts) {
|
|
2635
2728
|
this._bargeinStoppedAt = ts ?? hrTimeMs();
|
|
2636
2729
|
}
|
|
2637
|
-
/**
|
|
2730
|
+
/**
|
|
2731
|
+
* Close the current turn cleanly and append a `TurnMetrics` record.
|
|
2732
|
+
*
|
|
2733
|
+
* Returns ``null`` when ``recordTurnInterrupted`` has already closed
|
|
2734
|
+
* the current turn — this protects against the VAD-barge-in /
|
|
2735
|
+
* pipeline-LLM race where both paths try to finalise the same logical
|
|
2736
|
+
* turn and the second would otherwise push a phantom entry with
|
|
2737
|
+
* ``user_text=''``. The caller treats ``null`` as "nothing to emit";
|
|
2738
|
+
* ``emitTurnMetrics`` is already null-safe.
|
|
2739
|
+
*/
|
|
2638
2740
|
recordTurnComplete(agentText) {
|
|
2741
|
+
if (this._turnAlreadyClosed) return null;
|
|
2639
2742
|
const latency = this._computeTurnLatency();
|
|
2640
2743
|
const turn = {
|
|
2641
2744
|
turn_index: this._turns.length,
|
|
@@ -2648,13 +2751,23 @@ var CallMetricsAccumulator = class {
|
|
|
2648
2751
|
};
|
|
2649
2752
|
this._turns.push(turn);
|
|
2650
2753
|
this._resetTurnState();
|
|
2754
|
+
this._turnAlreadyClosed = true;
|
|
2651
2755
|
this._eventBus?.emit("turn_ended", { callId: this.callId, turn });
|
|
2652
2756
|
this._eventBus?.emit("metrics_collected", { callId: this.callId, turn });
|
|
2653
2757
|
return turn;
|
|
2654
2758
|
}
|
|
2655
|
-
/**
|
|
2759
|
+
/**
|
|
2760
|
+
* Close the current turn as interrupted (barge-in) and return the
|
|
2761
|
+
* recorded metrics. Returns ``null`` when no turn is open, OR when
|
|
2762
|
+
* ``recordTurnComplete`` has already finalised the current turn —
|
|
2763
|
+
* bidirectional parity with the guard at the top of
|
|
2764
|
+
* ``recordTurnComplete``. Prevents an out-of-order interruption (e.g.
|
|
2765
|
+
* a future refactor that reorders the bargein + LLM-unwind paths)
|
|
2766
|
+
* from overwriting a turn that the complete path already emitted.
|
|
2767
|
+
*/
|
|
2656
2768
|
recordTurnInterrupted() {
|
|
2657
2769
|
if (this._turnStart === null) return null;
|
|
2770
|
+
if (this._turnAlreadyClosed) return null;
|
|
2658
2771
|
const latency = this._computeTurnLatency();
|
|
2659
2772
|
const turn = {
|
|
2660
2773
|
turn_index: this._turns.length,
|
|
@@ -2666,7 +2779,12 @@ var CallMetricsAccumulator = class {
|
|
|
2666
2779
|
timestamp: Date.now() / 1e3
|
|
2667
2780
|
};
|
|
2668
2781
|
this._turns.push(turn);
|
|
2782
|
+
this._eventBus?.emit("turn_ended", { callId: this.callId, turn });
|
|
2783
|
+
this._eventBus?.emit("metrics_collected", { callId: this.callId, turn });
|
|
2669
2784
|
this._resetTurnState();
|
|
2785
|
+
this._turnAlreadyClosed = true;
|
|
2786
|
+
this._turnCommittedMono = null;
|
|
2787
|
+
this._endpointSignalAt = null;
|
|
2670
2788
|
return turn;
|
|
2671
2789
|
}
|
|
2672
2790
|
// ---- EOU metrics (item 4) ----
|
|
@@ -2812,6 +2930,7 @@ var CallMetricsAccumulator = class {
|
|
|
2812
2930
|
* @param cacheWriteTokens Cache write tokens (billed at cache_write rate if present)
|
|
2813
2931
|
*/
|
|
2814
2932
|
recordLlmUsage(provider2, model, inputTokens, outputTokens, cacheReadTokens = 0, cacheWriteTokens = 0) {
|
|
2933
|
+
this._llmModel = model;
|
|
2815
2934
|
this._totalLlmCost += calculateLlmCost(
|
|
2816
2935
|
provider2,
|
|
2817
2936
|
model,
|
|
@@ -2851,7 +2970,10 @@ var CallMetricsAccumulator = class {
|
|
|
2851
2970
|
stt_provider: this.sttProvider,
|
|
2852
2971
|
tts_provider: this.ttsProvider,
|
|
2853
2972
|
llm_provider: this.llmProvider,
|
|
2854
|
-
telephony_provider: this.telephonyProvider
|
|
2973
|
+
telephony_provider: this.telephonyProvider,
|
|
2974
|
+
stt_model: this.sttModel,
|
|
2975
|
+
tts_model: this.ttsModel,
|
|
2976
|
+
llm_model: this._llmModel
|
|
2855
2977
|
};
|
|
2856
2978
|
this._eventBus?.emit("call_ended", { callId: this.callId, metrics });
|
|
2857
2979
|
return metrics;
|
|
@@ -2861,6 +2983,15 @@ var CallMetricsAccumulator = class {
|
|
|
2861
2983
|
const duration = (hrTimeMs() - this._callStart) / 1e3;
|
|
2862
2984
|
return this._computeCost(duration);
|
|
2863
2985
|
}
|
|
2986
|
+
/**
|
|
2987
|
+
* Number of turns where recordSttComplete fired without a prior legitimate
|
|
2988
|
+
* VAD speech_end. Surfaced for diagnostics — a non-zero value points at
|
|
2989
|
+
* dropped VAD stops (commonly PSTN packet loss), which is why we stopped
|
|
2990
|
+
* faking _endpointSignalAt from _sttComplete in 0.6.x.
|
|
2991
|
+
*/
|
|
2992
|
+
get endpointSignalMissingCount() {
|
|
2993
|
+
return this._endpointSignalMissingCount;
|
|
2994
|
+
}
|
|
2864
2995
|
// ---- Internal ----
|
|
2865
2996
|
_resetTurnState() {
|
|
2866
2997
|
this._turnStart = null;
|
|
@@ -2876,6 +3007,7 @@ var CallMetricsAccumulator = class {
|
|
|
2876
3007
|
this._bargeinStoppedAt = null;
|
|
2877
3008
|
this._turnUserText = "";
|
|
2878
3009
|
this._turnSttAudioSeconds = 0;
|
|
3010
|
+
this._initialTtfbEmitted = false;
|
|
2879
3011
|
}
|
|
2880
3012
|
_computeTurnLatency() {
|
|
2881
3013
|
let stt_ms = 0;
|
|
@@ -2887,8 +3019,19 @@ var CallMetricsAccumulator = class {
|
|
|
2887
3019
|
let endpoint_ms;
|
|
2888
3020
|
let bargein_ms;
|
|
2889
3021
|
let tts_total_ms;
|
|
2890
|
-
|
|
2891
|
-
|
|
3022
|
+
let user_speech_duration_ms;
|
|
3023
|
+
const postBargein = this._lastBargeinAt !== null && this._turnStart !== null && Math.abs(this._turnStart - this._lastBargeinAt) <= 100;
|
|
3024
|
+
if (this._sttComplete !== null) {
|
|
3025
|
+
const anchor = this._endpointSignalAt ?? this._turnStart;
|
|
3026
|
+
if (anchor !== null) {
|
|
3027
|
+
stt_ms = Math.max(0, this._sttComplete - anchor);
|
|
3028
|
+
}
|
|
3029
|
+
}
|
|
3030
|
+
if (this._turnStart !== null && this._endpointSignalAt !== null) {
|
|
3031
|
+
user_speech_duration_ms = Math.max(
|
|
3032
|
+
0,
|
|
3033
|
+
this._endpointSignalAt - this._turnStart
|
|
3034
|
+
);
|
|
2892
3035
|
}
|
|
2893
3036
|
if (this._sttComplete !== null && this._llmFirstToken !== null) {
|
|
2894
3037
|
llm_ttft_ms = Math.max(0, this._llmFirstToken - this._sttComplete);
|
|
@@ -2921,9 +3064,14 @@ var CallMetricsAccumulator = class {
|
|
|
2921
3064
|
if (endpoint_ms !== void 0 && llm_ttft_ms !== void 0 && tts_ms > 0) {
|
|
2922
3065
|
agent_response_ms = round(endpoint_ms + llm_ttft_ms + tts_ms, 1);
|
|
2923
3066
|
}
|
|
3067
|
+
if (postBargein) {
|
|
3068
|
+
stt_ms = 0;
|
|
3069
|
+
endpoint_ms = void 0;
|
|
3070
|
+
}
|
|
2924
3071
|
return {
|
|
2925
3072
|
stt_ms: round(stt_ms, 1),
|
|
2926
3073
|
llm_ms: round(llm_ms, 1),
|
|
3074
|
+
...user_speech_duration_ms !== void 0 ? { user_speech_duration_ms: round(user_speech_duration_ms, 1) } : {},
|
|
2927
3075
|
...llm_ttft_ms !== void 0 ? { llm_ttft_ms: round(llm_ttft_ms, 1) } : {},
|
|
2928
3076
|
...llm_total_ms !== void 0 ? { llm_total_ms: round(llm_total_ms, 1) } : {},
|
|
2929
3077
|
tts_ms: round(tts_ms, 1),
|
|
@@ -3002,6 +3150,8 @@ var CallMetricsAccumulator = class {
|
|
|
3002
3150
|
const endpointAvg = optAvg("endpoint_ms");
|
|
3003
3151
|
const bargeinAvg = optAvg("bargein_ms");
|
|
3004
3152
|
const ttsTotalAvg = optAvg("tts_total_ms");
|
|
3153
|
+
const userSpeechAvg = optAvg("user_speech_duration_ms");
|
|
3154
|
+
const agentResponseAvg = optAvg("agent_response_ms");
|
|
3005
3155
|
return {
|
|
3006
3156
|
stt_ms: round(turns.reduce((s, t) => s + t.latency.stt_ms, 0) / n, 1),
|
|
3007
3157
|
llm_ms: round(turns.reduce((s, t) => s + t.latency.llm_ms, 0) / n, 1),
|
|
@@ -3011,7 +3161,9 @@ var CallMetricsAccumulator = class {
|
|
|
3011
3161
|
total_ms: round(turns.reduce((s, t) => s + t.latency.total_ms, 0) / n, 1),
|
|
3012
3162
|
...endpointAvg !== void 0 ? { endpoint_ms: endpointAvg } : {},
|
|
3013
3163
|
...bargeinAvg !== void 0 ? { bargein_ms: bargeinAvg } : {},
|
|
3014
|
-
...ttsTotalAvg !== void 0 ? { tts_total_ms: ttsTotalAvg } : {}
|
|
3164
|
+
...ttsTotalAvg !== void 0 ? { tts_total_ms: ttsTotalAvg } : {},
|
|
3165
|
+
...userSpeechAvg !== void 0 ? { user_speech_duration_ms: userSpeechAvg } : {},
|
|
3166
|
+
...agentResponseAvg !== void 0 ? { agent_response_ms: agentResponseAvg } : {}
|
|
3015
3167
|
};
|
|
3016
3168
|
}
|
|
3017
3169
|
_computePercentileLatency(p) {
|
|
@@ -3030,6 +3182,8 @@ var CallMetricsAccumulator = class {
|
|
|
3030
3182
|
const endpointP = optPct("endpoint_ms");
|
|
3031
3183
|
const bargeinP = optPct("bargein_ms");
|
|
3032
3184
|
const ttsTotalP = optPct("tts_total_ms");
|
|
3185
|
+
const userSpeechP = optPct("user_speech_duration_ms");
|
|
3186
|
+
const agentResponseP = optPct("agent_response_ms");
|
|
3033
3187
|
return {
|
|
3034
3188
|
stt_ms: round(percentile(nonZero(turns.map((t) => t.latency.stt_ms)), p), 1),
|
|
3035
3189
|
llm_ms: round(percentile(nonZero(turns.map((t) => t.latency.llm_ms)), p), 1),
|
|
@@ -3039,409 +3193,13 @@ var CallMetricsAccumulator = class {
|
|
|
3039
3193
|
total_ms: round(percentile(nonZero(turns.map((t) => t.latency.total_ms)), p), 1),
|
|
3040
3194
|
...endpointP !== void 0 ? { endpoint_ms: endpointP } : {},
|
|
3041
3195
|
...bargeinP !== void 0 ? { bargein_ms: bargeinP } : {},
|
|
3042
|
-
...ttsTotalP !== void 0 ? { tts_total_ms: ttsTotalP } : {}
|
|
3196
|
+
...ttsTotalP !== void 0 ? { tts_total_ms: ttsTotalP } : {},
|
|
3197
|
+
...userSpeechP !== void 0 ? { user_speech_duration_ms: userSpeechP } : {},
|
|
3198
|
+
...agentResponseP !== void 0 ? { agent_response_ms: agentResponseP } : {}
|
|
3043
3199
|
};
|
|
3044
3200
|
}
|
|
3045
3201
|
};
|
|
3046
3202
|
|
|
3047
|
-
// src/audio/transcoding.ts
|
|
3048
|
-
init_esm_shims();
|
|
3049
|
-
var MULAW_TO_PCM16_TABLE = (() => {
|
|
3050
|
-
const table = new Int16Array(256);
|
|
3051
|
-
for (let i = 0; i < 256; i++) {
|
|
3052
|
-
const mu = ~i & 255;
|
|
3053
|
-
const sign = mu & 128 ? -1 : 1;
|
|
3054
|
-
const exponent = mu >> 4 & 7;
|
|
3055
|
-
const mantissa = mu & 15;
|
|
3056
|
-
const magnitude = (mantissa << 1 | 33) << exponent + 2;
|
|
3057
|
-
table[i] = sign * (magnitude - 132);
|
|
3058
|
-
}
|
|
3059
|
-
return table;
|
|
3060
|
-
})();
|
|
3061
|
-
var PCM16_TO_MULAW_TABLE = (() => {
|
|
3062
|
-
const BIAS = 132;
|
|
3063
|
-
const CLIP = 32635;
|
|
3064
|
-
const table = new Uint8Array(65536);
|
|
3065
|
-
for (let i = 0; i < 65536; i++) {
|
|
3066
|
-
let sample = i >= 32768 ? i - 65536 : i;
|
|
3067
|
-
const sign = sample < 0 ? 128 : 0;
|
|
3068
|
-
if (sample < 0) sample = -sample;
|
|
3069
|
-
if (sample > CLIP) sample = CLIP;
|
|
3070
|
-
sample += BIAS;
|
|
3071
|
-
let exponent = 7;
|
|
3072
|
-
const exponentMask = 16384;
|
|
3073
|
-
for (let shift = exponentMask; shift > 0 && (sample & shift) === 0; shift >>= 1) {
|
|
3074
|
-
exponent--;
|
|
3075
|
-
}
|
|
3076
|
-
const mantissa = sample >> exponent + 3 & 15;
|
|
3077
|
-
const mulaw = ~(sign | exponent << 4 | mantissa) & 255;
|
|
3078
|
-
table[i] = mulaw;
|
|
3079
|
-
}
|
|
3080
|
-
return table;
|
|
3081
|
-
})();
|
|
3082
|
-
function mulawToPcm16(mulawData) {
|
|
3083
|
-
const out = Buffer.alloc(mulawData.length * 2);
|
|
3084
|
-
for (let i = 0; i < mulawData.length; i++) {
|
|
3085
|
-
out.writeInt16LE(MULAW_TO_PCM16_TABLE[mulawData[i]], i * 2);
|
|
3086
|
-
}
|
|
3087
|
-
return out;
|
|
3088
|
-
}
|
|
3089
|
-
function pcm16ToMulaw(pcmData) {
|
|
3090
|
-
const sampleCount = Math.floor(pcmData.length / 2);
|
|
3091
|
-
const out = Buffer.alloc(sampleCount);
|
|
3092
|
-
for (let i = 0; i < sampleCount; i++) {
|
|
3093
|
-
const sample = pcmData.readInt16LE(i * 2);
|
|
3094
|
-
out[i] = PCM16_TO_MULAW_TABLE[sample + 65536 & 65535];
|
|
3095
|
-
}
|
|
3096
|
-
return out;
|
|
3097
|
-
}
|
|
3098
|
-
var PcmCarry = class {
|
|
3099
|
-
pending = null;
|
|
3100
|
-
/**
|
|
3101
|
-
* Prepend any carried odd byte, return the even-length prefix, and stash
|
|
3102
|
-
* any new trailing odd byte for the next call.
|
|
3103
|
-
*
|
|
3104
|
-
* Returns a zero-length buffer when no complete sample is yet available.
|
|
3105
|
-
*/
|
|
3106
|
-
push(chunk) {
|
|
3107
|
-
const combined = this.pending !== null ? Buffer.concat([this.pending, chunk]) : chunk;
|
|
3108
|
-
this.pending = null;
|
|
3109
|
-
const alignedLen = combined.length & ~1;
|
|
3110
|
-
if (alignedLen < combined.length) {
|
|
3111
|
-
this.pending = combined.subarray(alignedLen);
|
|
3112
|
-
}
|
|
3113
|
-
return combined.subarray(0, alignedLen);
|
|
3114
|
-
}
|
|
3115
|
-
/**
|
|
3116
|
-
* Return any pending byte as a 1-byte buffer (rare in practice — only if
|
|
3117
|
-
* the entire stream had an odd byte count), then reset internal state.
|
|
3118
|
-
*/
|
|
3119
|
-
flush() {
|
|
3120
|
-
if (this.pending === null) return Buffer.alloc(0);
|
|
3121
|
-
const out = this.pending;
|
|
3122
|
-
this.pending = null;
|
|
3123
|
-
return out;
|
|
3124
|
-
}
|
|
3125
|
-
/** Reset carry state without flushing. */
|
|
3126
|
-
reset() {
|
|
3127
|
-
this.pending = null;
|
|
3128
|
-
}
|
|
3129
|
-
};
|
|
3130
|
-
var StatefulResampler = class {
|
|
3131
|
-
srcRate;
|
|
3132
|
-
dstRate;
|
|
3133
|
-
// 16k→8k: 5-tap FIR state.
|
|
3134
|
-
// Extended sample buffer carries the 2 history samples that precede the
|
|
3135
|
-
// current chunk AND any "pending" input sample that did not yet generate
|
|
3136
|
-
// output (i.e. the odd sample when the chunk had an odd sample count).
|
|
3137
|
-
// `firPhase` = 0 means the next output is at input position 0 of the
|
|
3138
|
-
// current chunk; 1 means it starts at input position 1 (because the
|
|
3139
|
-
// previous chunk ended on an even-output boundary).
|
|
3140
|
-
firHistory = new Int16Array(2);
|
|
3141
|
-
// [s_{-2}, s_{-1}]
|
|
3142
|
-
firHistoryValid = false;
|
|
3143
|
-
// Pending sample carried from odd-count chunks (not the byte carry —
|
|
3144
|
-
// this is a complete Int16 sample that becomes the first input for the
|
|
3145
|
-
// next call).
|
|
3146
|
-
firPendingSample = null;
|
|
3147
|
-
// 8k→16k: last input sample deferred across chunk boundaries.
|
|
3148
|
-
upsampleLast = 0;
|
|
3149
|
-
upsampleHasHistory = false;
|
|
3150
|
-
// 24k→16k: fractional phase and last input sample across chunks.
|
|
3151
|
-
resample24Last = 0;
|
|
3152
|
-
resample24Phase = 0;
|
|
3153
|
-
resample24HasHistory = false;
|
|
3154
|
-
// Odd-byte alignment carry.
|
|
3155
|
-
carry = new PcmCarry();
|
|
3156
|
-
constructor(opts) {
|
|
3157
|
-
this.srcRate = opts.srcRate;
|
|
3158
|
-
this.dstRate = opts.dstRate;
|
|
3159
|
-
if (opts.channels !== void 0 && opts.channels !== 1) {
|
|
3160
|
-
throw new Error("StatefulResampler: only mono (channels=1) is supported");
|
|
3161
|
-
}
|
|
3162
|
-
const key = `${this.srcRate}->${this.dstRate}`;
|
|
3163
|
-
if (key !== "16000->8000" && key !== "8000->16000" && key !== "24000->16000" && key !== "24000->8000") {
|
|
3164
|
-
throw new Error(
|
|
3165
|
-
`StatefulResampler: unsupported conversion ${key}. Supported: 16000->8000, 8000->16000, 24000->16000, 24000->8000`
|
|
3166
|
-
);
|
|
3167
|
-
}
|
|
3168
|
-
}
|
|
3169
|
-
/**
|
|
3170
|
-
* Process a chunk of PCM16-LE samples.
|
|
3171
|
-
*
|
|
3172
|
-
* Handles odd-byte inputs via an internal carry buffer. Returns an even-byte-
|
|
3173
|
-
* aligned output buffer; may return a zero-length buffer if not enough
|
|
3174
|
-
* aligned input is available yet.
|
|
3175
|
-
*/
|
|
3176
|
-
process(pcm) {
|
|
3177
|
-
const aligned = this.carry.push(pcm);
|
|
3178
|
-
if (aligned.length === 0) return Buffer.alloc(0);
|
|
3179
|
-
if (this.srcRate === 16e3 && this.dstRate === 8e3) {
|
|
3180
|
-
return this._downsample16kTo8k(aligned);
|
|
3181
|
-
}
|
|
3182
|
-
if (this.srcRate === 8e3 && this.dstRate === 16e3) {
|
|
3183
|
-
return this._upsample8kTo16k(aligned);
|
|
3184
|
-
}
|
|
3185
|
-
if (this.srcRate === 24e3 && this.dstRate === 8e3) {
|
|
3186
|
-
return this._resample24kTo8k(aligned);
|
|
3187
|
-
}
|
|
3188
|
-
return this._resample24kTo16k(aligned);
|
|
3189
|
-
}
|
|
3190
|
-
/**
|
|
3191
|
-
* Flush internal state and return any remaining output samples.
|
|
3192
|
-
*
|
|
3193
|
-
* For 8k→16k: the deferred last sample is emitted duplicated (matching
|
|
3194
|
-
* the stateless helper's end-of-stream behaviour).
|
|
3195
|
-
* For 16k→8k: any pending odd sample is processed with edge-replication.
|
|
3196
|
-
* Resets all state after flushing.
|
|
3197
|
-
*/
|
|
3198
|
-
flush() {
|
|
3199
|
-
this.carry.flush();
|
|
3200
|
-
if (this.srcRate === 16e3 && this.dstRate === 8e3 && this.firPendingSample !== null) {
|
|
3201
|
-
const s = this.firPendingSample;
|
|
3202
|
-
const tmp = Buffer.alloc(4);
|
|
3203
|
-
tmp.writeInt16LE(s, 0);
|
|
3204
|
-
tmp.writeInt16LE(s, 2);
|
|
3205
|
-
const out = this._downsample16kTo8k(tmp);
|
|
3206
|
-
this.firPendingSample = null;
|
|
3207
|
-
return out;
|
|
3208
|
-
}
|
|
3209
|
-
if (this.srcRate === 8e3 && this.dstRate === 16e3 && this.upsampleHasHistory) {
|
|
3210
|
-
const out = Buffer.alloc(4);
|
|
3211
|
-
out.writeInt16LE(this.upsampleLast, 0);
|
|
3212
|
-
out.writeInt16LE(this.upsampleLast, 2);
|
|
3213
|
-
this.upsampleHasHistory = false;
|
|
3214
|
-
this.upsampleLast = 0;
|
|
3215
|
-
return out;
|
|
3216
|
-
}
|
|
3217
|
-
return Buffer.alloc(0);
|
|
3218
|
-
}
|
|
3219
|
-
/** Reset all carried state (e.g. at call boundaries). */
|
|
3220
|
-
reset() {
|
|
3221
|
-
this.firHistory = new Int16Array(2);
|
|
3222
|
-
this.firHistoryValid = false;
|
|
3223
|
-
this.firPendingSample = null;
|
|
3224
|
-
this.upsampleLast = 0;
|
|
3225
|
-
this.upsampleHasHistory = false;
|
|
3226
|
-
this.resample24Last = 0;
|
|
3227
|
-
this.resample24Phase = 0;
|
|
3228
|
-
this.resample24HasHistory = false;
|
|
3229
|
-
this.carry.reset();
|
|
3230
|
-
}
|
|
3231
|
-
// ---------------------------------------------------------------------------
|
|
3232
|
-
// Private: 16 kHz → 8 kHz
|
|
3233
|
-
// ---------------------------------------------------------------------------
|
|
3234
|
-
/**
|
|
3235
|
-
* 2:1 decimation with a 5-tap binomial FIR anti-alias filter.
|
|
3236
|
-
*
|
|
3237
|
-
* FIR coefficients: [1, 4, 6, 4, 1] / 16 (cutoff ~Fs/4 = 4 kHz).
|
|
3238
|
-
*
|
|
3239
|
-
* Cross-chunk state:
|
|
3240
|
-
* - `firHistory[0]` = s_{-2}, `firHistory[1]` = s_{-1} relative to the
|
|
3241
|
-
* virtual stream (seeded to first-sample on the very first call).
|
|
3242
|
-
* - `firPendingSample` = a lone input sample carried from a chunk whose
|
|
3243
|
-
* sample count was odd; it will become the first input of the next chunk.
|
|
3244
|
-
*
|
|
3245
|
-
* Decimation: outputs are at even positions (0, 2, 4 …) in the virtual
|
|
3246
|
-
* extended stream, so every 2 input samples yield 1 output. An odd-sample-
|
|
3247
|
-
* count chunk leaves 1 sample in `firPendingSample`; the next chunk
|
|
3248
|
-
* prepends it so the output cadence is unbroken.
|
|
3249
|
-
*/
|
|
3250
|
-
_downsample16kTo8k(buf) {
|
|
3251
|
-
const newSampleCount = buf.length >> 1;
|
|
3252
|
-
const hasPending = this.firPendingSample !== null;
|
|
3253
|
-
const totalInput = newSampleCount + (hasPending ? 1 : 0);
|
|
3254
|
-
const input = new Int16Array(totalInput);
|
|
3255
|
-
if (hasPending) {
|
|
3256
|
-
input[0] = this.firPendingSample;
|
|
3257
|
-
for (let j = 0; j < newSampleCount; j++) input[j + 1] = buf.readInt16LE(j * 2);
|
|
3258
|
-
} else {
|
|
3259
|
-
for (let j = 0; j < newSampleCount; j++) input[j] = buf.readInt16LE(j * 2);
|
|
3260
|
-
}
|
|
3261
|
-
this.firPendingSample = null;
|
|
3262
|
-
if (totalInput === 0) return Buffer.alloc(0);
|
|
3263
|
-
if (!this.firHistoryValid) {
|
|
3264
|
-
this.firHistory[0] = input[0];
|
|
3265
|
-
this.firHistory[1] = input[0];
|
|
3266
|
-
this.firHistoryValid = true;
|
|
3267
|
-
}
|
|
3268
|
-
const extended = new Int16Array(totalInput + 2);
|
|
3269
|
-
extended[0] = this.firHistory[0];
|
|
3270
|
-
extended[1] = this.firHistory[1];
|
|
3271
|
-
for (let j = 0; j < totalInput; j++) extended[j + 2] = input[j];
|
|
3272
|
-
const outSamples = totalInput >> 1;
|
|
3273
|
-
const out = Buffer.alloc(outSamples * 2);
|
|
3274
|
-
for (let i = 0; i < outSamples; i++) {
|
|
3275
|
-
const c = 2 + i * 2;
|
|
3276
|
-
const sM2 = extended[c - 2];
|
|
3277
|
-
const sM1 = extended[c - 1];
|
|
3278
|
-
const s0 = extended[c];
|
|
3279
|
-
const sP1 = c + 1 < extended.length ? extended[c + 1] : extended[extended.length - 1];
|
|
3280
|
-
const sP2 = c + 2 < extended.length ? extended[c + 2] : extended[extended.length - 1];
|
|
3281
|
-
const filtered = sM2 + 4 * sM1 + 6 * s0 + 4 * sP1 + sP2 + 8 >> 4;
|
|
3282
|
-
out.writeInt16LE(Math.max(-32768, Math.min(32767, filtered)), i * 2);
|
|
3283
|
-
}
|
|
3284
|
-
if (totalInput % 2 === 1) {
|
|
3285
|
-
this.firPendingSample = input[totalInput - 1];
|
|
3286
|
-
}
|
|
3287
|
-
if (totalInput >= 2) {
|
|
3288
|
-
this.firHistory[0] = input[totalInput - 2];
|
|
3289
|
-
this.firHistory[1] = input[totalInput - 1];
|
|
3290
|
-
} else {
|
|
3291
|
-
this.firHistory[0] = this.firHistory[1];
|
|
3292
|
-
this.firHistory[1] = input[0];
|
|
3293
|
-
}
|
|
3294
|
-
return out;
|
|
3295
|
-
}
|
|
3296
|
-
// ---------------------------------------------------------------------------
|
|
3297
|
-
// Private: 8 kHz → 16 kHz
|
|
3298
|
-
// ---------------------------------------------------------------------------
|
|
3299
|
-
/**
|
|
3300
|
-
* 1:2 linear-interpolation upsampler.
|
|
3301
|
-
*
|
|
3302
|
-
* For the first chunk (no history): emits 2*(N-1) samples and defers the
|
|
3303
|
-
* last sample. For subsequent chunks (with history): emits the deferred
|
|
3304
|
-
* sample + its interpolated midpoint THEN 2*(N-1) samples from the new
|
|
3305
|
-
* chunk, deferring the new last sample. Total across K chunks + flush =
|
|
3306
|
-
* 2*total_input_samples (correct output length).
|
|
3307
|
-
*
|
|
3308
|
-
* Call flush() after the final chunk to emit the last deferred sample
|
|
3309
|
-
* pair (self-duplicate at end of stream).
|
|
3310
|
-
*/
|
|
3311
|
-
_upsample8kTo16k(buf) {
|
|
3312
|
-
const sampleCount = buf.length >> 1;
|
|
3313
|
-
if (sampleCount === 0) return Buffer.alloc(0);
|
|
3314
|
-
const outArr = [];
|
|
3315
|
-
if (this.upsampleHasHistory) {
|
|
3316
|
-
const next = buf.readInt16LE(0);
|
|
3317
|
-
outArr.push(this.upsampleLast);
|
|
3318
|
-
outArr.push(Math.round((this.upsampleLast + next) / 2));
|
|
3319
|
-
}
|
|
3320
|
-
for (let i = 0; i < sampleCount - 1; i++) {
|
|
3321
|
-
const s0 = buf.readInt16LE(i * 2);
|
|
3322
|
-
const s1 = buf.readInt16LE((i + 1) * 2);
|
|
3323
|
-
outArr.push(s0);
|
|
3324
|
-
outArr.push(Math.round((s0 + s1) / 2));
|
|
3325
|
-
}
|
|
3326
|
-
this.upsampleLast = buf.readInt16LE((sampleCount - 1) * 2);
|
|
3327
|
-
this.upsampleHasHistory = true;
|
|
3328
|
-
const outBuf = Buffer.alloc(outArr.length * 2);
|
|
3329
|
-
for (let j = 0; j < outArr.length; j++) outBuf.writeInt16LE(outArr[j], j * 2);
|
|
3330
|
-
return outBuf;
|
|
3331
|
-
}
|
|
3332
|
-
// ---------------------------------------------------------------------------
|
|
3333
|
-
// Private: 24 kHz → 16 kHz / 8 kHz
|
|
3334
|
-
// ---------------------------------------------------------------------------
|
|
3335
|
-
/**
|
|
3336
|
-
* 3:2 linear-interpolation decimator (ratio srcRate/dstRate = 1.5).
|
|
3337
|
-
*
|
|
3338
|
-
* `resample24Phase` tracks the fractional input position of the next output
|
|
3339
|
-
* sample relative to the START of the next chunk. Negative phase means the
|
|
3340
|
-
* next output straddles the previous/current chunk boundary; those are
|
|
3341
|
-
* handled using `resample24Last`.
|
|
3342
|
-
*/
|
|
3343
|
-
_resample24kTo16k(buf) {
|
|
3344
|
-
return this._resample24kStep(buf, 24e3 / 16e3);
|
|
3345
|
-
}
|
|
3346
|
-
/** 3:1 decimation — collapses the 24k→16k→8k chain into a single step. */
|
|
3347
|
-
_resample24kTo8k(buf) {
|
|
3348
|
-
return this._resample24kStep(buf, 24e3 / 8e3);
|
|
3349
|
-
}
|
|
3350
|
-
/** Shared phase-stepping resampler used by 24→16 (step 1.5) and 24→8 (step 3). */
|
|
3351
|
-
_resample24kStep(buf, step) {
|
|
3352
|
-
const sampleCount = buf.length >> 1;
|
|
3353
|
-
if (sampleCount === 0) return Buffer.alloc(0);
|
|
3354
|
-
const outArr = [];
|
|
3355
|
-
let phase = this.resample24Phase;
|
|
3356
|
-
while (true) {
|
|
3357
|
-
const idx = Math.floor(phase);
|
|
3358
|
-
if (idx >= sampleCount) break;
|
|
3359
|
-
const frac = phase - idx;
|
|
3360
|
-
let s0;
|
|
3361
|
-
let s1;
|
|
3362
|
-
if (idx < 0) {
|
|
3363
|
-
s0 = this.resample24HasHistory ? this.resample24Last : 0;
|
|
3364
|
-
s1 = buf.readInt16LE(0);
|
|
3365
|
-
} else {
|
|
3366
|
-
s0 = buf.readInt16LE(idx * 2);
|
|
3367
|
-
s1 = idx + 1 < sampleCount ? buf.readInt16LE((idx + 1) * 2) : s0;
|
|
3368
|
-
}
|
|
3369
|
-
const interp = Math.round(s0 + (s1 - s0) * frac);
|
|
3370
|
-
outArr.push(Math.max(-32768, Math.min(32767, interp)));
|
|
3371
|
-
phase += step;
|
|
3372
|
-
}
|
|
3373
|
-
this.resample24Last = buf.readInt16LE((sampleCount - 1) * 2);
|
|
3374
|
-
this.resample24HasHistory = true;
|
|
3375
|
-
this.resample24Phase = phase - sampleCount;
|
|
3376
|
-
const outBuf = Buffer.alloc(outArr.length * 2);
|
|
3377
|
-
for (let j = 0; j < outArr.length; j++) outBuf.writeInt16LE(outArr[j], j * 2);
|
|
3378
|
-
return outBuf;
|
|
3379
|
-
}
|
|
3380
|
-
};
|
|
3381
|
-
function createResampler16kTo8k() {
|
|
3382
|
-
return new StatefulResampler({ srcRate: 16e3, dstRate: 8e3 });
|
|
3383
|
-
}
|
|
3384
|
-
function createResampler8kTo16k() {
|
|
3385
|
-
return new StatefulResampler({ srcRate: 8e3, dstRate: 16e3 });
|
|
3386
|
-
}
|
|
3387
|
-
function createResampler24kTo16k() {
|
|
3388
|
-
return new StatefulResampler({ srcRate: 24e3, dstRate: 16e3 });
|
|
3389
|
-
}
|
|
3390
|
-
function createResampler24kTo8k() {
|
|
3391
|
-
return new StatefulResampler({ srcRate: 24e3, dstRate: 8e3 });
|
|
3392
|
-
}
|
|
3393
|
-
var _warnedResample8kTo16k = false;
|
|
3394
|
-
var _warnedResample16kTo8k = false;
|
|
3395
|
-
var _warnedResample24kTo16k = false;
|
|
3396
|
-
function resample8kTo16k(pcm8k) {
|
|
3397
|
-
if (!_warnedResample8kTo16k) {
|
|
3398
|
-
_warnedResample8kTo16k = true;
|
|
3399
|
-
getLogger().warn(
|
|
3400
|
-
"[patter] resample8kTo16k() is deprecated. Use createResampler8kTo16k() (StatefulResampler) to eliminate chunk-boundary discontinuities."
|
|
3401
|
-
);
|
|
3402
|
-
}
|
|
3403
|
-
if (pcm8k.length === 0) return Buffer.alloc(0);
|
|
3404
|
-
const r = createResampler8kTo16k();
|
|
3405
|
-
const main = r.process(pcm8k);
|
|
3406
|
-
const tail = r.flush();
|
|
3407
|
-
return tail.length > 0 ? Buffer.concat([main, tail]) : main;
|
|
3408
|
-
}
|
|
3409
|
-
function resample16kTo8k(pcm16k) {
|
|
3410
|
-
if (!_warnedResample16kTo8k) {
|
|
3411
|
-
_warnedResample16kTo8k = true;
|
|
3412
|
-
getLogger().warn(
|
|
3413
|
-
"[patter] resample16kTo8k() is deprecated. Use createResampler16kTo8k() (StatefulResampler) to eliminate chunk-boundary discontinuities."
|
|
3414
|
-
);
|
|
3415
|
-
}
|
|
3416
|
-
if (pcm16k.length === 0) return Buffer.alloc(0);
|
|
3417
|
-
const r = createResampler16kTo8k();
|
|
3418
|
-
const out = r.process(pcm16k);
|
|
3419
|
-
const tail = r.flush();
|
|
3420
|
-
return tail.length > 0 ? Buffer.concat([out, tail]) : out;
|
|
3421
|
-
}
|
|
3422
|
-
function resample24kTo16k(pcm24k) {
|
|
3423
|
-
if (!_warnedResample24kTo16k) {
|
|
3424
|
-
_warnedResample24kTo16k = true;
|
|
3425
|
-
getLogger().warn(
|
|
3426
|
-
"[patter] resample24kTo16k() is deprecated. Use createResampler24kTo16k() (StatefulResampler) or OpenAITTS.resampleStreaming for anti-aliased resampling."
|
|
3427
|
-
);
|
|
3428
|
-
}
|
|
3429
|
-
if (pcm24k.length === 0) return Buffer.alloc(0);
|
|
3430
|
-
const sampleCount = Math.floor(pcm24k.length / 2);
|
|
3431
|
-
const outSamples = Math.floor(sampleCount * 2 / 3);
|
|
3432
|
-
const out = Buffer.alloc(outSamples * 2);
|
|
3433
|
-
for (let i = 0; i < outSamples; i++) {
|
|
3434
|
-
const pos = i * 1.5;
|
|
3435
|
-
const idx = Math.floor(pos);
|
|
3436
|
-
const frac = pos - idx;
|
|
3437
|
-
const s0 = pcm24k.readInt16LE(idx * 2);
|
|
3438
|
-
const s1 = idx + 1 < sampleCount ? pcm24k.readInt16LE((idx + 1) * 2) : s0;
|
|
3439
|
-
const interp = Math.round(s0 + (s1 - s0) * frac);
|
|
3440
|
-
out.writeInt16LE(Math.max(-32768, Math.min(32767, interp)), i * 2);
|
|
3441
|
-
}
|
|
3442
|
-
return out;
|
|
3443
|
-
}
|
|
3444
|
-
|
|
3445
3203
|
// src/handler-utils.ts
|
|
3446
3204
|
init_esm_shims();
|
|
3447
3205
|
function createHistoryManager(maxSize) {
|
|
@@ -4313,7 +4071,23 @@ var HALLUCINATIONS = /* @__PURE__ */ new Set([
|
|
|
4313
4071
|
".",
|
|
4314
4072
|
"bye",
|
|
4315
4073
|
"right",
|
|
4316
|
-
"cool"
|
|
4074
|
+
"cool",
|
|
4075
|
+
// Whisper YouTube-caption hallucinations
|
|
4076
|
+
"thank you for watching",
|
|
4077
|
+
"thanks for watching",
|
|
4078
|
+
"thank you for watching!",
|
|
4079
|
+
"thanks for watching!",
|
|
4080
|
+
"thank you so much for watching",
|
|
4081
|
+
"thanks for listening",
|
|
4082
|
+
"please subscribe",
|
|
4083
|
+
"subscribe",
|
|
4084
|
+
"music",
|
|
4085
|
+
"[music]",
|
|
4086
|
+
"\u266A",
|
|
4087
|
+
"[no audio]",
|
|
4088
|
+
"[silence]",
|
|
4089
|
+
"[blank_audio]",
|
|
4090
|
+
"(silence)"
|
|
4317
4091
|
]);
|
|
4318
4092
|
var StreamHandler = class _StreamHandler {
|
|
4319
4093
|
deps;
|
|
@@ -4391,6 +4165,43 @@ var StreamHandler = class _StreamHandler {
|
|
|
4391
4165
|
* sentence.
|
|
4392
4166
|
*/
|
|
4393
4167
|
speakingStartedAt = null;
|
|
4168
|
+
/**
|
|
4169
|
+
* Wall-clock (ms) when the FIRST TTS audio chunk actually reached the
|
|
4170
|
+
* carrier wire — set in ``markFirstAudioSent`` after ``bridge.sendAudio``
|
|
4171
|
+
* succeeds, cleared by ``beginSpeaking`` / ``cancelSpeaking``. The barge-in
|
|
4172
|
+
* gate measures elapsed from this instant, NOT from ``speakingStartedAt``,
|
|
4173
|
+
* because ElevenLabs (and other cloud TTS) take 200-700 ms to emit the
|
|
4174
|
+
* first byte. A gate anchored to ``beginSpeaking`` would expire on
|
|
4175
|
+
* background noise before any audio went out, exit the TTS loop on
|
|
4176
|
+
* ``isSpeaking=false``, and silently cut the agent's first turn.
|
|
4177
|
+
*/
|
|
4178
|
+
firstAudioSentAt = null;
|
|
4179
|
+
/**
|
|
4180
|
+
* Optional barge-in confirmation strategies. With an empty array the
|
|
4181
|
+
* SDK falls back to the legacy "cancel on first VAD speech_start"
|
|
4182
|
+
* behaviour. With one or more strategies, a VAD speech_start during
|
|
4183
|
+
* TTS marks the barge-in as *pending* — TTS keeps streaming naturally
|
|
4184
|
+
* — and the strategies are consulted on every STT transcript via
|
|
4185
|
+
* ``handleBargeIn``. The first strategy that returns ``true`` cancels
|
|
4186
|
+
* the agent; if none confirm within ``bargeInConfirmMs`` the pending
|
|
4187
|
+
* state is dropped and the agent finishes its sentence.
|
|
4188
|
+
*/
|
|
4189
|
+
bargeInStrategies;
|
|
4190
|
+
/** Pending-barge-in confirmation timeout in milliseconds. */
|
|
4191
|
+
bargeInConfirmMs;
|
|
4192
|
+
/** Wall-clock (ms) when the current pending barge-in started, or
|
|
4193
|
+
* ``null`` if no barge-in is pending. */
|
|
4194
|
+
bargeInPendingSince = null;
|
|
4195
|
+
/** Timer that fires the pending-barge-in timeout. */
|
|
4196
|
+
bargeInPendingTimer = null;
|
|
4197
|
+
/**
|
|
4198
|
+
* Set to true when a VAD ``speech_start`` was suppressed by the
|
|
4199
|
+
* anti-echo gate during the current agent turn. Cleared on
|
|
4200
|
+
* ``beginSpeaking`` and ``cancelSpeaking``. When the turn ends
|
|
4201
|
+
* naturally (grace timer), the inbound audio ring is flushed to STT
|
|
4202
|
+
* so the user's speech is not silently discarded.
|
|
4203
|
+
*/
|
|
4204
|
+
suppressedSpeechPending = false;
|
|
4394
4205
|
/**
|
|
4395
4206
|
* Minimum wall-clock duration (ms) the agent must have been speaking
|
|
4396
4207
|
* before barge-in is allowed to fire when AEC is active. Covers the
|
|
@@ -4402,10 +4213,17 @@ var StreamHandler = class _StreamHandler {
|
|
|
4402
4213
|
* Same as the AEC variant but for deployments where AEC is OFF
|
|
4403
4214
|
* (default on PSTN — Twilio/Telnyx). Without an adaptive filter to
|
|
4404
4215
|
* converge, the only justification for a gate is anti-flicker on
|
|
4405
|
-
* micro-events (cough, click).
|
|
4406
|
-
*
|
|
4216
|
+
* micro-events (cough, click). Raised 100 → 500 ms on 2026-05-19
|
|
4217
|
+
* after the 0.6.2 acceptance run showed a phantom VAD speech_start
|
|
4218
|
+
* firing on the very first inbound frame (~500 ms into the call,
|
|
4219
|
+
* which is past a 100 ms gate). The phantom barge-in cancelled the
|
|
4220
|
+
* prewarmed firstMessage, the user heard a clipped (graffiante)
|
|
4221
|
+
* audio fragment, and the SDK left ``_turnAlreadyClosed=true`` so
|
|
4222
|
+
* subsequent ``recordTurnComplete`` calls were no-ops. 500 ms
|
|
4223
|
+
* filters those phantoms while still letting a real interruption
|
|
4224
|
+
* land within half a second of agent onset.
|
|
4407
4225
|
*/
|
|
4408
|
-
static MIN_AGENT_SPEAKING_MS_BEFORE_BARGE_IN_NO_AEC =
|
|
4226
|
+
static MIN_AGENT_SPEAKING_MS_BEFORE_BARGE_IN_NO_AEC = 500;
|
|
4409
4227
|
/** Handle for the pending grace-period timer, so it can be cleared on cleanup. */
|
|
4410
4228
|
graceTimer = null;
|
|
4411
4229
|
/**
|
|
@@ -4425,6 +4243,32 @@ var StreamHandler = class _StreamHandler {
|
|
|
4425
4243
|
* the tail of the cancelled turn (~50-200 ms of doubled audio).
|
|
4426
4244
|
*/
|
|
4427
4245
|
lastCancelAt = null;
|
|
4246
|
+
/**
|
|
4247
|
+
* Promise queue tracking outstanding Twilio marks the SDK has sent but
|
|
4248
|
+
* not yet seen echoed back. Used by the firstMessage send loop to bound
|
|
4249
|
+
* the depth of audio queued at the carrier — without this the loop
|
|
4250
|
+
* pushes the entire TTS stream into Twilio's WebSocket in one burst,
|
|
4251
|
+
* and a sendClear issued mid-buffer races against several seconds of
|
|
4252
|
+
* already-queued media frames (BUG #128). The window depth is
|
|
4253
|
+
* ``FIRST_MESSAGE_MARK_WINDOW``; ``onMark`` drains entries as Twilio
|
|
4254
|
+
* confirms playback, ``cancelSpeaking`` resolves every pending entry so
|
|
4255
|
+
* any awaiter exits immediately. Telnyx never populates this queue
|
|
4256
|
+
* (Telnyx's media-stream protocol has no mark concept — the loop
|
|
4257
|
+
* falls back to time-based pacing on that carrier).
|
|
4258
|
+
*/
|
|
4259
|
+
pendingMarks = [];
|
|
4260
|
+
/**
|
|
4261
|
+
* Monotonic counter for first-message mark names. Distinct from
|
|
4262
|
+
* ``chunkCount`` (which the Realtime path uses) so the two paths can
|
|
4263
|
+
* coexist without name collisions even when firstMessage finishes while
|
|
4264
|
+
* a Realtime turn is still streaming.
|
|
4265
|
+
*/
|
|
4266
|
+
// firstMessageMarkCounter / FIRST_MESSAGE_MARK_WINDOW /
|
|
4267
|
+
// MARK_AWAIT_TIMEOUT_MS were retired with the move to the Twilio-FIFO-
|
|
4268
|
+
// trusts model (sendPacedFirstMessageBytes no longer emits marks).
|
|
4269
|
+
// Marks are still consumed via ``onMark`` for any adapter that wants
|
|
4270
|
+
// to round-trip one, but the firstMessage path no longer back-pressures
|
|
4271
|
+
// on them.
|
|
4428
4272
|
/**
|
|
4429
4273
|
* Minimum drain window (ms) between a ``cancelSpeaking`` and the next
|
|
4430
4274
|
* ``beginSpeaking``. 150 ms covers a typical PSTN jitter buffer drain
|
|
@@ -4439,7 +4283,7 @@ var StreamHandler = class _StreamHandler {
|
|
|
4439
4283
|
* directly. Awaits the post-cancel drain window before flipping state
|
|
4440
4284
|
* so the remote player has time to flush the cancelled turn's tail.
|
|
4441
4285
|
*/
|
|
4442
|
-
async beginSpeaking() {
|
|
4286
|
+
async beginSpeaking(isFirstMessage = false) {
|
|
4443
4287
|
if (this.lastCancelAt !== null) {
|
|
4444
4288
|
const elapsed = Date.now() - this.lastCancelAt;
|
|
4445
4289
|
const remaining = _StreamHandler.POST_CANCEL_DRAIN_MS - elapsed;
|
|
@@ -4450,7 +4294,23 @@ var StreamHandler = class _StreamHandler {
|
|
|
4450
4294
|
this.speakingGeneration++;
|
|
4451
4295
|
this.isSpeaking = true;
|
|
4452
4296
|
this.speakingStartedAt = Date.now();
|
|
4297
|
+
this.suppressedSpeechPending = false;
|
|
4298
|
+
void isFirstMessage;
|
|
4299
|
+
this.firstAudioSentAt = Date.now();
|
|
4453
4300
|
this.inboundAudioRing = [];
|
|
4301
|
+
this.resetVad();
|
|
4302
|
+
}
|
|
4303
|
+
/**
|
|
4304
|
+
* Record that the first TTS audio chunk of the current turn has hit the
|
|
4305
|
+
* carrier wire. Idempotent within a turn — only the first call sets the
|
|
4306
|
+
* timestamp; later chunks are no-ops. Must be invoked AFTER the underlying
|
|
4307
|
+
* ``bridge.sendAudio`` resolves so the gate is anchored to "audio actually
|
|
4308
|
+
* went out", not "we asked the carrier to send it".
|
|
4309
|
+
*/
|
|
4310
|
+
markFirstAudioSent() {
|
|
4311
|
+
if (this.firstAudioSentAt === null) {
|
|
4312
|
+
this.firstAudioSentAt = Date.now();
|
|
4313
|
+
}
|
|
4454
4314
|
}
|
|
4455
4315
|
/**
|
|
4456
4316
|
* Atomically end speaking AND invalidate any pending grace timer.
|
|
@@ -4463,14 +4323,55 @@ var StreamHandler = class _StreamHandler {
|
|
|
4463
4323
|
this.speakingGeneration++;
|
|
4464
4324
|
this.isSpeaking = false;
|
|
4465
4325
|
this.speakingStartedAt = null;
|
|
4326
|
+
this.firstAudioSentAt = null;
|
|
4466
4327
|
this.lastCancelAt = Date.now();
|
|
4328
|
+
this.suppressedSpeechPending = false;
|
|
4329
|
+
this.drainPendingMarks();
|
|
4467
4330
|
if (this.llmAbort !== null) {
|
|
4468
4331
|
try {
|
|
4469
4332
|
this.llmAbort.abort();
|
|
4470
4333
|
} catch {
|
|
4471
4334
|
}
|
|
4472
4335
|
}
|
|
4336
|
+
const ttsCancelable = this.tts;
|
|
4337
|
+
if (typeof ttsCancelable?.cancelActiveStream === "function") {
|
|
4338
|
+
try {
|
|
4339
|
+
ttsCancelable.cancelActiveStream();
|
|
4340
|
+
} catch (err) {
|
|
4341
|
+
getLogger().debug(`TTS cancelActiveStream raised: ${String(err)}`);
|
|
4342
|
+
}
|
|
4343
|
+
}
|
|
4344
|
+
}
|
|
4345
|
+
/**
|
|
4346
|
+
* Resolve every entry in ``pendingMarks`` and empty the queue. Idempotent
|
|
4347
|
+
* — safe to call from ``cancelSpeaking`` and again from the grace path
|
|
4348
|
+
* without leaking pending promises.
|
|
4349
|
+
*/
|
|
4350
|
+
drainPendingMarks() {
|
|
4351
|
+
if (this.pendingMarks.length === 0) return;
|
|
4352
|
+
for (const entry of this.pendingMarks) {
|
|
4353
|
+
try {
|
|
4354
|
+
entry.resolve();
|
|
4355
|
+
} catch {
|
|
4356
|
+
}
|
|
4357
|
+
}
|
|
4358
|
+
this.pendingMarks.length = 0;
|
|
4473
4359
|
}
|
|
4360
|
+
// Mark-based back-pressure (sendMarkAwaitable / waitForMarkWindow)
|
|
4361
|
+
// was removed when sendPacedFirstMessageBytes switched to the
|
|
4362
|
+
// Twilio-FIFO-trusts model — see that method's doc comment for
|
|
4363
|
+
// rationale. ``pendingMarks`` and ``onMark`` are still kept so an
|
|
4364
|
+
// adapter that wants to round-trip a mark for some other purpose can
|
|
4365
|
+
// still do so without breaking the firstMessage path.
|
|
4366
|
+
/**
|
|
4367
|
+
* Bytes-per-millisecond for a 16 kHz PCM16 mono stream. Used by
|
|
4368
|
+
* ``sendPacedFirstMessageBytes`` to translate chunk size into a
|
|
4369
|
+
* playout-duration sleep so we never deliver faster than the carrier
|
|
4370
|
+
* can decode + play out (which manifested as severe crackling on the
|
|
4371
|
+
* HTTP-TTS path with client-side resampling). 16000 samples/sec × 2
|
|
4372
|
+
* bytes/sample = 32 bytes/ms.
|
|
4373
|
+
*/
|
|
4374
|
+
static PCM16_16K_BYTES_PER_MS = 32;
|
|
4474
4375
|
/** Cancel and clear the pending grace timer, if any. */
|
|
4475
4376
|
clearGraceTimer() {
|
|
4476
4377
|
if (this.graceTimer !== null) {
|
|
@@ -4495,11 +4396,53 @@ var StreamHandler = class _StreamHandler {
|
|
|
4495
4396
|
if (this.speakingGeneration === gen) {
|
|
4496
4397
|
this.isSpeaking = false;
|
|
4497
4398
|
this.speakingStartedAt = null;
|
|
4399
|
+
this.firstAudioSentAt = null;
|
|
4400
|
+
this.clearPendingBargeIn();
|
|
4401
|
+
void this.resetBargeInStrategies();
|
|
4402
|
+
if (this.suppressedSpeechPending) {
|
|
4403
|
+
this.suppressedSpeechPending = false;
|
|
4404
|
+
this.flushInboundAudioRing();
|
|
4405
|
+
}
|
|
4406
|
+
this.resetVad();
|
|
4498
4407
|
}
|
|
4499
4408
|
}, grace);
|
|
4500
4409
|
} else {
|
|
4501
4410
|
this.isSpeaking = false;
|
|
4502
4411
|
this.speakingStartedAt = null;
|
|
4412
|
+
this.firstAudioSentAt = null;
|
|
4413
|
+
this.clearPendingBargeIn();
|
|
4414
|
+
void this.resetBargeInStrategies();
|
|
4415
|
+
if (this.suppressedSpeechPending) {
|
|
4416
|
+
this.suppressedSpeechPending = false;
|
|
4417
|
+
this.flushInboundAudioRing();
|
|
4418
|
+
}
|
|
4419
|
+
this.resetVad();
|
|
4420
|
+
}
|
|
4421
|
+
}
|
|
4422
|
+
async resetBargeInStrategies() {
|
|
4423
|
+
if (this.bargeInStrategies.length === 0) return;
|
|
4424
|
+
const { resetStrategies } = await import("./barge-in-strategies-X6ARMGIQ.mjs");
|
|
4425
|
+
await resetStrategies(this.bargeInStrategies);
|
|
4426
|
+
}
|
|
4427
|
+
/**
|
|
4428
|
+
* Reset the active VAD provider's per-utterance state. No-op when the
|
|
4429
|
+
* provider does not implement the optional ``reset()`` hook. Safe to call
|
|
4430
|
+
* from any context — failures are swallowed and the VAD is disabled for
|
|
4431
|
+
* the rest of the call so a flaky reset can never silently kill barge-in
|
|
4432
|
+
* for every subsequent turn.
|
|
4433
|
+
*/
|
|
4434
|
+
resetVad() {
|
|
4435
|
+
const activeVad = this.deps.agent.vad ?? this.autoVad;
|
|
4436
|
+
if (!activeVad || this.vadDisabled) return;
|
|
4437
|
+
try {
|
|
4438
|
+
const ret = activeVad.reset?.();
|
|
4439
|
+
if (ret instanceof Promise) {
|
|
4440
|
+
ret.catch((err) => {
|
|
4441
|
+
getLogger().debug(`VAD reset threw: ${String(err)}`);
|
|
4442
|
+
});
|
|
4443
|
+
}
|
|
4444
|
+
} catch (err) {
|
|
4445
|
+
getLogger().debug(`VAD reset threw: ${String(err)}`);
|
|
4503
4446
|
}
|
|
4504
4447
|
}
|
|
4505
4448
|
/**
|
|
@@ -4509,7 +4452,8 @@ var StreamHandler = class _StreamHandler {
|
|
|
4509
4452
|
*/
|
|
4510
4453
|
canBargeIn() {
|
|
4511
4454
|
if (this.speakingStartedAt === null) return true;
|
|
4512
|
-
|
|
4455
|
+
if (this.firstAudioSentAt === null) return false;
|
|
4456
|
+
const elapsed = Date.now() - this.firstAudioSentAt;
|
|
4513
4457
|
const gate = this.aec ? _StreamHandler.MIN_AGENT_SPEAKING_MS_BEFORE_BARGE_IN_AEC : _StreamHandler.MIN_AGENT_SPEAKING_MS_BEFORE_BARGE_IN_NO_AEC;
|
|
4514
4458
|
return elapsed >= gate;
|
|
4515
4459
|
}
|
|
@@ -4613,6 +4557,9 @@ var StreamHandler = class _StreamHandler {
|
|
|
4613
4557
|
this.ws = ws;
|
|
4614
4558
|
this.caller = caller;
|
|
4615
4559
|
this.callee = callee;
|
|
4560
|
+
this.bargeInStrategies = (deps.agent.bargeInStrategies ?? []).slice();
|
|
4561
|
+
const confirmMs = deps.agent.bargeInConfirmMs;
|
|
4562
|
+
this.bargeInConfirmMs = typeof confirmMs === "number" && Number.isFinite(confirmMs) && confirmMs > 0 ? confirmMs : 1500;
|
|
4616
4563
|
this.history = createHistoryManager(200);
|
|
4617
4564
|
const sttKey = deps.agent.stt?.constructor?.providerKey;
|
|
4618
4565
|
const sttProviderName = deps.agent.stt ? sttKey ?? deps.agent.stt.constructor?.name ?? "custom" : void 0;
|
|
@@ -4860,7 +4807,7 @@ var StreamHandler = class _StreamHandler {
|
|
|
4860
4807
|
if (activeVad && !this.vadDisabled) {
|
|
4861
4808
|
try {
|
|
4862
4809
|
const vadPromise = activeVad.processFrame(pcm16k, 16e3);
|
|
4863
|
-
const timeoutPromise = new Promise((
|
|
4810
|
+
const timeoutPromise = new Promise((resolve2) => setTimeout(() => resolve2(null), 25));
|
|
4864
4811
|
const evt = await Promise.race([vadPromise, timeoutPromise]);
|
|
4865
4812
|
if (evt) {
|
|
4866
4813
|
getLogger().info(
|
|
@@ -4868,11 +4815,18 @@ var StreamHandler = class _StreamHandler {
|
|
|
4868
4815
|
);
|
|
4869
4816
|
}
|
|
4870
4817
|
if (evt?.type === "speech_start") {
|
|
4871
|
-
|
|
4818
|
+
const phantomSuppressed = this.isSpeaking && !this.canBargeIn();
|
|
4819
|
+
if (phantomSuppressed) {
|
|
4872
4820
|
getLogger().info(
|
|
4873
4821
|
`[VAD] speech_start suppressed (agent speaking < gate, aec=${this.aec ? "on" : "off"})`
|
|
4874
4822
|
);
|
|
4823
|
+
this.suppressedSpeechPending = true;
|
|
4875
4824
|
} else if (this.isSpeaking) {
|
|
4825
|
+
if (this.bargeInStrategies.length > 0) {
|
|
4826
|
+
this.startPendingBargeIn();
|
|
4827
|
+
this.metricsAcc.anchorUserSpeechStart();
|
|
4828
|
+
return;
|
|
4829
|
+
}
|
|
4876
4830
|
getLogger().info("[VAD] speech_start during TTS \u2192 BARGE-IN");
|
|
4877
4831
|
this.metricsAcc.recordOverlapStart();
|
|
4878
4832
|
this.metricsAcc.recordBargeinDetected();
|
|
@@ -4895,7 +4849,9 @@ var StreamHandler = class _StreamHandler {
|
|
|
4895
4849
|
}
|
|
4896
4850
|
}
|
|
4897
4851
|
}
|
|
4898
|
-
|
|
4852
|
+
if (!phantomSuppressed) {
|
|
4853
|
+
this.metricsAcc.anchorUserSpeechStart();
|
|
4854
|
+
}
|
|
4899
4855
|
} else if (evt?.type === "speech_end") {
|
|
4900
4856
|
this.metricsAcc.recordVadStop();
|
|
4901
4857
|
try {
|
|
@@ -4972,13 +4928,36 @@ var StreamHandler = class _StreamHandler {
|
|
|
4972
4928
|
*/
|
|
4973
4929
|
/** Handle a Twilio Media Streams `mark` event acknowledging audio playback boundaries. */
|
|
4974
4930
|
async onMark(markName) {
|
|
4975
|
-
if (markName)
|
|
4976
|
-
|
|
4931
|
+
if (!markName) return;
|
|
4932
|
+
const idx = this.pendingMarks.findIndex((m) => m.name === markName);
|
|
4933
|
+
if (idx < 0) return;
|
|
4934
|
+
this.lastConfirmedMark = markName;
|
|
4935
|
+
const resolved = this.pendingMarks.splice(0, idx + 1);
|
|
4936
|
+
for (const entry of resolved) {
|
|
4937
|
+
try {
|
|
4938
|
+
entry.resolve();
|
|
4939
|
+
} catch {
|
|
4940
|
+
}
|
|
4977
4941
|
}
|
|
4978
4942
|
}
|
|
4979
4943
|
/** Handle call stop / stream end. */
|
|
4980
4944
|
/** Handle a carrier-emitted `stop` event signalling the call has ended. */
|
|
4981
4945
|
async handleStop() {
|
|
4946
|
+
if (this.llmAbort !== null) {
|
|
4947
|
+
try {
|
|
4948
|
+
this.llmAbort.abort();
|
|
4949
|
+
} catch {
|
|
4950
|
+
}
|
|
4951
|
+
}
|
|
4952
|
+
const ttsCancelable = this.tts;
|
|
4953
|
+
if (typeof ttsCancelable?.cancelActiveStream === "function") {
|
|
4954
|
+
try {
|
|
4955
|
+
ttsCancelable.cancelActiveStream();
|
|
4956
|
+
} catch {
|
|
4957
|
+
}
|
|
4958
|
+
}
|
|
4959
|
+
this.clearPendingBargeIn();
|
|
4960
|
+
this.drainPendingMarks();
|
|
4982
4961
|
this.clearGraceTimer();
|
|
4983
4962
|
this.flushResamplers();
|
|
4984
4963
|
await this.closeSttOnce();
|
|
@@ -4991,6 +4970,21 @@ var StreamHandler = class _StreamHandler {
|
|
|
4991
4970
|
/** Handle WebSocket close event. */
|
|
4992
4971
|
/** Tear down adapter, STT/TTS, and per-call state when the carrier WebSocket closes. */
|
|
4993
4972
|
async handleWsClose() {
|
|
4973
|
+
if (this.llmAbort !== null) {
|
|
4974
|
+
try {
|
|
4975
|
+
this.llmAbort.abort();
|
|
4976
|
+
} catch {
|
|
4977
|
+
}
|
|
4978
|
+
}
|
|
4979
|
+
const ttsCancelable = this.tts;
|
|
4980
|
+
if (typeof ttsCancelable?.cancelActiveStream === "function") {
|
|
4981
|
+
try {
|
|
4982
|
+
ttsCancelable.cancelActiveStream();
|
|
4983
|
+
} catch {
|
|
4984
|
+
}
|
|
4985
|
+
}
|
|
4986
|
+
this.clearPendingBargeIn();
|
|
4987
|
+
this.drainPendingMarks();
|
|
4994
4988
|
this.clearGraceTimer();
|
|
4995
4989
|
this.flushResamplers();
|
|
4996
4990
|
await this.closeSttOnce();
|
|
@@ -5029,13 +5023,39 @@ var StreamHandler = class _StreamHandler {
|
|
|
5029
5023
|
* Maintains a 1-byte carry across calls so unaligned HTTP chunks from
|
|
5030
5024
|
* streaming TTS providers never byte-swap the PCM16 samples downstream.
|
|
5031
5025
|
*/
|
|
5032
|
-
encodePipelineAudio(
|
|
5033
|
-
|
|
5026
|
+
encodePipelineAudio(audioChunk) {
|
|
5027
|
+
if (this.ttsOutputFormatNativeForCarrier === true) {
|
|
5028
|
+
return audioChunk.toString("base64");
|
|
5029
|
+
}
|
|
5030
|
+
const aligned = this.alignPcm16(audioChunk);
|
|
5034
5031
|
if (aligned.length === 0) return "";
|
|
5035
5032
|
const pcm8k = this.outboundResampler.process(aligned);
|
|
5036
5033
|
const mulaw = pcm16ToMulaw(pcm8k);
|
|
5037
5034
|
return mulaw.toString("base64");
|
|
5038
5035
|
}
|
|
5036
|
+
/**
|
|
5037
|
+
* Cached result of ``isTtsOutputFormatNativeForCarrier()`` — settled
|
|
5038
|
+
* once at ``initPipeline`` time after ``setTelephonyCarrier`` has run
|
|
5039
|
+
* on the TTS adapter. Stable for the call lifetime: changes to the
|
|
5040
|
+
* adapter's output format mid-call would NOT flip this. ``true`` means
|
|
5041
|
+
* ``encodePipelineAudio`` can take the bypass path.
|
|
5042
|
+
*/
|
|
5043
|
+
ttsOutputFormatNativeForCarrier = false;
|
|
5044
|
+
/**
|
|
5045
|
+
* Probe whether the TTS adapter is configured to emit bytes already in
|
|
5046
|
+
* the carrier's wire codec. Currently: Twilio expects ``ulaw_8000``,
|
|
5047
|
+
* Telnyx expects ``pcm_16000`` (no client transcode in either case if
|
|
5048
|
+
* matched). Anything else takes the resample-and-encode path.
|
|
5049
|
+
*/
|
|
5050
|
+
isTtsOutputFormatNativeForCarrier() {
|
|
5051
|
+
if (!this.tts) return false;
|
|
5052
|
+
const fmt = this.tts.outputFormat;
|
|
5053
|
+
if (typeof fmt !== "string") return false;
|
|
5054
|
+
const carrier = this.deps.bridge.telephonyProvider;
|
|
5055
|
+
if (carrier === "twilio") return fmt === "ulaw_8000";
|
|
5056
|
+
if (carrier === "telnyx") return fmt === "pcm_16000";
|
|
5057
|
+
return false;
|
|
5058
|
+
}
|
|
5039
5059
|
/**
|
|
5040
5060
|
* Prepend any carry byte from the previous chunk, return the even-length
|
|
5041
5061
|
* portion, and stash the final odd byte (if any) for the next call.
|
|
@@ -5046,6 +5066,60 @@ var StreamHandler = class _StreamHandler {
|
|
|
5046
5066
|
this.ttsByteCarry = alignedLen < combined.length ? combined.subarray(alignedLen) : null;
|
|
5047
5067
|
return combined.subarray(0, alignedLen);
|
|
5048
5068
|
}
|
|
5069
|
+
/**
|
|
5070
|
+
* Stream a cached firstMessage buffer in pacing-friendly chunks.
|
|
5071
|
+
*
|
|
5072
|
+
* Splits ``prewarmBytes`` into 20 ms slices (matching Twilio's PSTN
|
|
5073
|
+
* frame quantum) and
|
|
5074
|
+
* forwards each through ``deps.bridge.sendAudio`` exactly like the
|
|
5075
|
+
* live TTS path does — preserving Twilio mark/clear granularity. A
|
|
5076
|
+
* single multi-second sendAudio call would push the whole intro into
|
|
5077
|
+
* the carrier in one go and a ``sendClear`` issued mid-buffer would
|
|
5078
|
+
* have nothing to clear ("agent keeps talking after barge-in" UX bug
|
|
5079
|
+
* on the very first turn).
|
|
5080
|
+
*
|
|
5081
|
+
* Returns ``true`` when at least one chunk hit the wire — the caller
|
|
5082
|
+
* uses that to decide whether to record TTS-first-byte / turn-complete
|
|
5083
|
+
* metrics.
|
|
5084
|
+
*/
|
|
5085
|
+
async streamPrewarmBytes(prewarmBytes) {
|
|
5086
|
+
return this.sendPacedFirstMessageBytes(prewarmBytes);
|
|
5087
|
+
}
|
|
5088
|
+
/**
|
|
5089
|
+
* Iterate ``bytes`` in 20 ms slices (Twilio PSTN frame quantum) and
|
|
5090
|
+
* forward each via ``deps.bridge.sendAudio`` with mark-gated pacing
|
|
5091
|
+
* (Twilio) or playout-time-based pacing (Telnyx). Caps the carrier-
|
|
5092
|
+
* side buffer at ``FIRST_MESSAGE_MARK_WINDOW`` chunks so a barge-in's
|
|
5093
|
+
* ``sendClear`` has ~120 ms (Twilio) or zero (Telnyx, immediately
|
|
5094
|
+
* after the latest sleep) of audio to flush.
|
|
5095
|
+
*
|
|
5096
|
+
* Bails immediately when ``isSpeaking`` flips to false — both via the
|
|
5097
|
+
* loop's pre-iter check and via ``drainPendingMarks`` (called from
|
|
5098
|
+
* ``cancelSpeaking``) which unblocks any in-flight ``waitForMarkWindow``.
|
|
5099
|
+
*
|
|
5100
|
+
* Returns ``true`` when at least one chunk hit the wire — the caller
|
|
5101
|
+
* uses that to decide whether to record TTS-first-byte / turn-complete
|
|
5102
|
+
* metrics. See BUG #128 for the regression this fix targets.
|
|
5103
|
+
*/
|
|
5104
|
+
async sendPacedFirstMessageBytes(bytes) {
|
|
5105
|
+
if (this.pendingMarks.length > 0) this.drainPendingMarks();
|
|
5106
|
+
let firstChunkSent = false;
|
|
5107
|
+
const PSTN_FRAME_MS = 20;
|
|
5108
|
+
const bytesPerMs = this.ttsOutputFormatNativeForCarrier ? 8 : _StreamHandler.PCM16_16K_BYTES_PER_MS;
|
|
5109
|
+
const sliceBytes = bytesPerMs * PSTN_FRAME_MS;
|
|
5110
|
+
for (let i = 0; i < bytes.length; i += sliceBytes) {
|
|
5111
|
+
if (!this.isSpeaking) break;
|
|
5112
|
+
const chunk = bytes.subarray(i, i + sliceBytes);
|
|
5113
|
+
if (!firstChunkSent) firstChunkSent = true;
|
|
5114
|
+
if (this.aec && !this.ttsOutputFormatNativeForCarrier) {
|
|
5115
|
+
this.aec.pushFarEnd(chunk);
|
|
5116
|
+
}
|
|
5117
|
+
const encoded = this.encodePipelineAudio(chunk);
|
|
5118
|
+
this.deps.bridge.sendAudio(this.ws, encoded, this.streamSid);
|
|
5119
|
+
this.markFirstAudioSent();
|
|
5120
|
+
}
|
|
5121
|
+
return firstChunkSent;
|
|
5122
|
+
}
|
|
5049
5123
|
// ---------------------------------------------------------------------------
|
|
5050
5124
|
// Private: Pipeline mode
|
|
5051
5125
|
// ---------------------------------------------------------------------------
|
|
@@ -5062,6 +5136,12 @@ var StreamHandler = class _StreamHandler {
|
|
|
5062
5136
|
getLogger().debug(`TTS setTelephonyCarrier failed (${label}): ${String(e)}`);
|
|
5063
5137
|
}
|
|
5064
5138
|
}
|
|
5139
|
+
this.ttsOutputFormatNativeForCarrier = this.isTtsOutputFormatNativeForCarrier();
|
|
5140
|
+
if (this.ttsOutputFormatNativeForCarrier) {
|
|
5141
|
+
getLogger().debug(
|
|
5142
|
+
`TTS outputFormat matches ${this.deps.bridge.telephonyProvider} wire codec \u2014 bypassing client-side transcode`
|
|
5143
|
+
);
|
|
5144
|
+
}
|
|
5065
5145
|
}
|
|
5066
5146
|
if (!this.stt) {
|
|
5067
5147
|
getLogger().debug(`Pipeline mode (${label}): no STT configured`);
|
|
@@ -5071,7 +5151,7 @@ var StreamHandler = class _StreamHandler {
|
|
|
5071
5151
|
}
|
|
5072
5152
|
if (!this.deps.agent.vad) {
|
|
5073
5153
|
try {
|
|
5074
|
-
const { SileroVAD } = await import("./silero-vad-
|
|
5154
|
+
const { SileroVAD } = await import("./silero-vad-LNDFGIY7.mjs");
|
|
5075
5155
|
this.autoVad = await SileroVAD.forPhoneCall();
|
|
5076
5156
|
getLogger().info(
|
|
5077
5157
|
`auto-VAD enabled (SileroVAD, phone preset). Pass agent.vad=\u2026 to override.`
|
|
@@ -5108,35 +5188,106 @@ var StreamHandler = class _StreamHandler {
|
|
|
5108
5188
|
);
|
|
5109
5189
|
}
|
|
5110
5190
|
}
|
|
5111
|
-
|
|
5112
|
-
|
|
5113
|
-
getLogger().debug(`Pipeline mode (${label}): STT + TTS connected`);
|
|
5114
|
-
} catch (e) {
|
|
5115
|
-
getLogger().error(`Pipeline connect FAILED (${label}):`, e);
|
|
5191
|
+
let parked;
|
|
5192
|
+
if (this.deps.popPrewarmedConnections) {
|
|
5116
5193
|
try {
|
|
5117
|
-
|
|
5118
|
-
} catch {
|
|
5194
|
+
parked = this.deps.popPrewarmedConnections(this.callId);
|
|
5195
|
+
} catch (err) {
|
|
5196
|
+
getLogger().debug(`popPrewarmedConnections raised: ${String(err)}`);
|
|
5197
|
+
}
|
|
5198
|
+
}
|
|
5199
|
+
const parkedTts = parked?.tts;
|
|
5200
|
+
if (parkedTts && this.tts) {
|
|
5201
|
+
const ttsAny = this.tts;
|
|
5202
|
+
if (typeof ttsAny.adoptWebSocket === "function" && parkedTts.ws.readyState === 1) {
|
|
5203
|
+
try {
|
|
5204
|
+
ttsAny.adoptWebSocket(parkedTts);
|
|
5205
|
+
getLogger().info(`[CONNECT] callId=${this.callId} provider=tts source=adopted ms=0`);
|
|
5206
|
+
} catch (err) {
|
|
5207
|
+
getLogger().debug(`TTS adoptWebSocket failed: ${String(err)}; falling back`);
|
|
5208
|
+
try {
|
|
5209
|
+
parkedTts.ws.close();
|
|
5210
|
+
} catch {
|
|
5211
|
+
}
|
|
5212
|
+
}
|
|
5213
|
+
} else {
|
|
5214
|
+
try {
|
|
5215
|
+
parkedTts.ws.close();
|
|
5216
|
+
} catch {
|
|
5217
|
+
}
|
|
5218
|
+
}
|
|
5219
|
+
}
|
|
5220
|
+
let sttConnectPromise = null;
|
|
5221
|
+
if (this.stt) {
|
|
5222
|
+
const sttAny = this.stt;
|
|
5223
|
+
const sttStarted = Date.now();
|
|
5224
|
+
if (parked?.stt && typeof sttAny.adoptWebSocket === "function" && parked.stt.readyState === 1) {
|
|
5225
|
+
try {
|
|
5226
|
+
sttAny.adoptWebSocket(parked.stt);
|
|
5227
|
+
getLogger().info(
|
|
5228
|
+
`[CONNECT] callId=${this.callId} provider=stt source=adopted ms=${Date.now() - sttStarted}`
|
|
5229
|
+
);
|
|
5230
|
+
sttConnectPromise = Promise.resolve();
|
|
5231
|
+
} catch (err) {
|
|
5232
|
+
getLogger().debug(`STT adoptWebSocket failed: ${String(err)}; falling back`);
|
|
5233
|
+
try {
|
|
5234
|
+
parked.stt.close();
|
|
5235
|
+
} catch {
|
|
5236
|
+
}
|
|
5237
|
+
sttConnectPromise = (async () => {
|
|
5238
|
+
await this.stt.connect();
|
|
5239
|
+
getLogger().info(
|
|
5240
|
+
`[CONNECT] callId=${this.callId} provider=stt source=fresh ms=${Date.now() - sttStarted}`
|
|
5241
|
+
);
|
|
5242
|
+
})();
|
|
5243
|
+
}
|
|
5244
|
+
} else {
|
|
5245
|
+
if (parked?.stt) {
|
|
5246
|
+
try {
|
|
5247
|
+
parked.stt.close();
|
|
5248
|
+
} catch {
|
|
5249
|
+
}
|
|
5250
|
+
}
|
|
5251
|
+
sttConnectPromise = (async () => {
|
|
5252
|
+
await this.stt.connect();
|
|
5253
|
+
getLogger().info(
|
|
5254
|
+
`[CONNECT] callId=${this.callId} provider=stt source=fresh ms=${Date.now() - sttStarted}`
|
|
5255
|
+
);
|
|
5256
|
+
})();
|
|
5119
5257
|
}
|
|
5120
|
-
return;
|
|
5121
5258
|
}
|
|
5259
|
+
getLogger().debug(`Pipeline mode (${label}): STT connect kicked off`);
|
|
5122
5260
|
if (this.deps.agent.firstMessage && !this.deps.onMessage && this.tts) {
|
|
5123
5261
|
this.metricsAcc.startTurn();
|
|
5124
|
-
await this.beginSpeaking();
|
|
5262
|
+
await this.beginSpeaking(true);
|
|
5125
5263
|
let firstChunkSent = false;
|
|
5126
5264
|
this.resetTtsCarry();
|
|
5265
|
+
let prewarmBytes;
|
|
5266
|
+
if (this.deps.popPrewarmAudio) {
|
|
5267
|
+
try {
|
|
5268
|
+
prewarmBytes = this.deps.popPrewarmAudio(this.callId);
|
|
5269
|
+
} catch (err) {
|
|
5270
|
+
getLogger().debug(`popPrewarmAudio raised: ${String(err)}`);
|
|
5271
|
+
}
|
|
5272
|
+
}
|
|
5127
5273
|
try {
|
|
5128
|
-
|
|
5129
|
-
|
|
5130
|
-
|
|
5131
|
-
|
|
5132
|
-
|
|
5133
|
-
|
|
5134
|
-
|
|
5135
|
-
|
|
5136
|
-
|
|
5274
|
+
if (prewarmBytes) {
|
|
5275
|
+
this.metricsAcc.recordTtsFirstByte();
|
|
5276
|
+
await this.emitAudioOut();
|
|
5277
|
+
firstChunkSent = await this.streamPrewarmBytes(prewarmBytes);
|
|
5278
|
+
} else {
|
|
5279
|
+
for await (const chunk of this.tts.synthesizeStream(this.deps.agent.firstMessage)) {
|
|
5280
|
+
if (!this.isSpeaking) break;
|
|
5281
|
+
if (!firstChunkSent) {
|
|
5282
|
+
firstChunkSent = true;
|
|
5283
|
+
this.metricsAcc.recordTtsFirstByte();
|
|
5284
|
+
await this.emitAudioOut();
|
|
5285
|
+
}
|
|
5286
|
+
if (this.aec) this.aec.pushFarEnd(chunk);
|
|
5287
|
+
const encoded = this.encodePipelineAudio(chunk);
|
|
5288
|
+
this.deps.bridge.sendAudio(this.ws, encoded, this.streamSid);
|
|
5289
|
+
this.markFirstAudioSent();
|
|
5137
5290
|
}
|
|
5138
|
-
const encoded = this.encodePipelineAudio(chunk);
|
|
5139
|
-
this.deps.bridge.sendAudio(this.ws, encoded, this.streamSid);
|
|
5140
5291
|
}
|
|
5141
5292
|
} catch (e) {
|
|
5142
5293
|
getLogger().error(`First message TTS error (${label}):`, e);
|
|
@@ -5145,6 +5296,7 @@ var StreamHandler = class _StreamHandler {
|
|
|
5145
5296
|
this.endSpeakingWithGrace();
|
|
5146
5297
|
}
|
|
5147
5298
|
if (firstChunkSent) {
|
|
5299
|
+
this.metricsAcc.recordTtsComplete(this.deps.agent.firstMessage);
|
|
5148
5300
|
await this.emitTurnMetrics(this.metricsAcc.recordTurnComplete(this.deps.agent.firstMessage));
|
|
5149
5301
|
this.history.push({ role: "assistant", text: this.deps.agent.firstMessage, timestamp: Date.now() });
|
|
5150
5302
|
}
|
|
@@ -5186,6 +5338,18 @@ var StreamHandler = class _StreamHandler {
|
|
|
5186
5338
|
getLogger().debug(`Built-in LLM loop active (pipeline, ${label})`);
|
|
5187
5339
|
}
|
|
5188
5340
|
if (this.stt) {
|
|
5341
|
+
if (sttConnectPromise) {
|
|
5342
|
+
try {
|
|
5343
|
+
await sttConnectPromise;
|
|
5344
|
+
} catch (e) {
|
|
5345
|
+
getLogger().error(`STT connect FAILED (${label}):`, e);
|
|
5346
|
+
try {
|
|
5347
|
+
await this.deps.bridge.endCall(this.callId, this.ws);
|
|
5348
|
+
} catch {
|
|
5349
|
+
}
|
|
5350
|
+
return;
|
|
5351
|
+
}
|
|
5352
|
+
}
|
|
5189
5353
|
this.stt.onTranscript(async (transcript) => {
|
|
5190
5354
|
await this.handleTranscript(transcript);
|
|
5191
5355
|
});
|
|
@@ -5229,6 +5393,7 @@ var StreamHandler = class _StreamHandler {
|
|
|
5229
5393
|
}
|
|
5230
5394
|
const encoded = this.encodePipelineAudio(processedAudio);
|
|
5231
5395
|
this.deps.bridge.sendAudio(this.ws, encoded, this.streamSid);
|
|
5396
|
+
this.markFirstAudioSent();
|
|
5232
5397
|
}
|
|
5233
5398
|
} catch (e) {
|
|
5234
5399
|
getLogger().error(`TTS streaming error (${this.deps.bridge.label}):`, e);
|
|
@@ -5262,7 +5427,10 @@ var StreamHandler = class _StreamHandler {
|
|
|
5262
5427
|
this.metricsAcc.recordVadStop();
|
|
5263
5428
|
}
|
|
5264
5429
|
if (!transcript.isFinal || !transcript.text) return;
|
|
5265
|
-
if (!this.commitTranscript(transcript.text))
|
|
5430
|
+
if (!this.commitTranscript(transcript.text)) {
|
|
5431
|
+
this.metricsAcc.anchorUserSpeechStart();
|
|
5432
|
+
return;
|
|
5433
|
+
}
|
|
5266
5434
|
const label = this.deps.bridge.label;
|
|
5267
5435
|
getLogger().info(
|
|
5268
5436
|
`[DIAG] processTranscript COMMITTED \u2192 LLM (${label} pipeline): ${sanitizeLogValue(transcript.text.slice(0, 80))}`
|
|
@@ -5342,6 +5510,9 @@ var StreamHandler = class _StreamHandler {
|
|
|
5342
5510
|
} else if (this.llmLoop) {
|
|
5343
5511
|
responseText = await this.runPipelineLlm(filteredTranscript, hookExecutor, hookCtx);
|
|
5344
5512
|
} else {
|
|
5513
|
+
getLogger().warn(
|
|
5514
|
+
`Pipeline (${label}) has no llm/onMessage handler \u2014 transcript "${sanitizeLogValue(filteredTranscript.slice(0, 60))}" dropped. Check that agent.llm or onMessage is configured.`
|
|
5515
|
+
);
|
|
5345
5516
|
return;
|
|
5346
5517
|
}
|
|
5347
5518
|
if (!responseText) return;
|
|
@@ -5362,7 +5533,7 @@ var StreamHandler = class _StreamHandler {
|
|
|
5362
5533
|
* record the interruption, and return ``true`` so the caller skips the
|
|
5363
5534
|
* turn-complete record.
|
|
5364
5535
|
*/
|
|
5365
|
-
|
|
5536
|
+
async handleBargeInAsync(transcript) {
|
|
5366
5537
|
if (!transcript.text || !this.isSpeaking) return false;
|
|
5367
5538
|
if (!this.canBargeIn()) {
|
|
5368
5539
|
getLogger().info(
|
|
@@ -5370,10 +5541,67 @@ var StreamHandler = class _StreamHandler {
|
|
|
5370
5541
|
);
|
|
5371
5542
|
return false;
|
|
5372
5543
|
}
|
|
5544
|
+
if (this.bargeInStrategies.length > 0) {
|
|
5545
|
+
const { evaluateStrategies } = await import("./barge-in-strategies-X6ARMGIQ.mjs");
|
|
5546
|
+
const confirmed = await evaluateStrategies(this.bargeInStrategies, {
|
|
5547
|
+
transcript: transcript.text,
|
|
5548
|
+
isInterim: transcript.isFinal === false,
|
|
5549
|
+
agentSpeaking: this.isSpeaking
|
|
5550
|
+
});
|
|
5551
|
+
if (!confirmed) {
|
|
5552
|
+
getLogger().debug(
|
|
5553
|
+
`Barge-in NOT confirmed by any strategy (${sanitizeLogValue(
|
|
5554
|
+
transcript.text.slice(0, 40)
|
|
5555
|
+
)}); agent continues talking`
|
|
5556
|
+
);
|
|
5557
|
+
return false;
|
|
5558
|
+
}
|
|
5559
|
+
getLogger().info(
|
|
5560
|
+
`Barge-in confirmed by strategy on transcript ${sanitizeLogValue(
|
|
5561
|
+
transcript.text.slice(0, 40)
|
|
5562
|
+
)}`
|
|
5563
|
+
);
|
|
5564
|
+
}
|
|
5565
|
+
this.runBargeInCancel(transcript.text);
|
|
5566
|
+
return true;
|
|
5567
|
+
}
|
|
5568
|
+
/**
|
|
5569
|
+
* Synchronous wrapper that callers in legacy code paths can keep using.
|
|
5570
|
+
* When ``bargeInStrategies`` is empty the work is fully synchronous and
|
|
5571
|
+
* the result is correct. With strategies the call is dispatched as a
|
|
5572
|
+
* floating promise — non-confirmed transcripts simply skip the cancel
|
|
5573
|
+
* and the legacy boolean return is meaningless under that opt-in path.
|
|
5574
|
+
*/
|
|
5575
|
+
handleBargeIn(transcript) {
|
|
5576
|
+
if (!transcript.text || !this.isSpeaking) return false;
|
|
5577
|
+
if (this.bargeInStrategies.length === 0) {
|
|
5578
|
+
if (!this.canBargeIn()) {
|
|
5579
|
+
getLogger().info(
|
|
5580
|
+
`Barge-in transcript suppressed (agent speaking < gate, aec=${this.aec ? "on" : "off"})`
|
|
5581
|
+
);
|
|
5582
|
+
return false;
|
|
5583
|
+
}
|
|
5584
|
+
this.runBargeInCancel(transcript.text);
|
|
5585
|
+
return true;
|
|
5586
|
+
}
|
|
5587
|
+
void this.handleBargeInAsync(transcript).catch(
|
|
5588
|
+
(err) => getLogger().debug(`handleBargeInAsync threw: ${String(err)}`)
|
|
5589
|
+
);
|
|
5590
|
+
return false;
|
|
5591
|
+
}
|
|
5592
|
+
/**
|
|
5593
|
+
* Run the cancel/flush sequence for a confirmed barge-in. Shared by
|
|
5594
|
+
* the legacy synchronous path and the strategy-confirmed async path.
|
|
5595
|
+
*/
|
|
5596
|
+
runBargeInCancel(transcriptText) {
|
|
5597
|
+
const hadPending = this.bargeInPendingSince !== null;
|
|
5598
|
+
this.clearPendingBargeIn();
|
|
5373
5599
|
getLogger().debug(
|
|
5374
|
-
`Barge-in: caller spoke over agent (${sanitizeLogValue(
|
|
5600
|
+
`Barge-in: caller spoke over agent (${sanitizeLogValue(transcriptText.slice(0, 40))})`
|
|
5375
5601
|
);
|
|
5376
|
-
|
|
5602
|
+
if (!hadPending) {
|
|
5603
|
+
this.metricsAcc.recordOverlapStart();
|
|
5604
|
+
}
|
|
5377
5605
|
this.metricsAcc.recordBargeinDetected();
|
|
5378
5606
|
const bargeinSpan = startSpan(SPAN_BARGEIN, { "patter.call.id": this.callId });
|
|
5379
5607
|
try {
|
|
@@ -5385,6 +5613,7 @@ var StreamHandler = class _StreamHandler {
|
|
|
5385
5613
|
}
|
|
5386
5614
|
this.metricsAcc.recordTtsStopped();
|
|
5387
5615
|
this.metricsAcc.recordTurnInterrupted();
|
|
5616
|
+
this.metricsAcc.anchorUserSpeechStart();
|
|
5388
5617
|
this.metricsAcc.recordOverlapEnd(true);
|
|
5389
5618
|
} finally {
|
|
5390
5619
|
try {
|
|
@@ -5392,7 +5621,34 @@ var StreamHandler = class _StreamHandler {
|
|
|
5392
5621
|
} catch {
|
|
5393
5622
|
}
|
|
5394
5623
|
}
|
|
5395
|
-
|
|
5624
|
+
}
|
|
5625
|
+
/** Mark a VAD-detected barge-in as pending (no cancel yet). */
|
|
5626
|
+
startPendingBargeIn() {
|
|
5627
|
+
if (this.bargeInPendingSince !== null) return;
|
|
5628
|
+
this.bargeInPendingSince = Date.now();
|
|
5629
|
+
this.metricsAcc.recordOverlapStart();
|
|
5630
|
+
getLogger().info(
|
|
5631
|
+
"Barge-in PENDING (VAD speech_start during TTS); awaiting strategy confirmation"
|
|
5632
|
+
);
|
|
5633
|
+
this.bargeInPendingTimer = setTimeout(() => {
|
|
5634
|
+
if (this.bargeInPendingSince === null) return;
|
|
5635
|
+
getLogger().info(
|
|
5636
|
+
`Pending barge-in timed out after ${this.bargeInConfirmMs}ms; agent resumes (no strategy confirmed)`
|
|
5637
|
+
);
|
|
5638
|
+
this.metricsAcc.recordOverlapEnd(false);
|
|
5639
|
+
this.metricsAcc.anchorUserSpeechStart();
|
|
5640
|
+
this.bargeInPendingSince = null;
|
|
5641
|
+
this.bargeInPendingTimer = null;
|
|
5642
|
+
}, this.bargeInConfirmMs);
|
|
5643
|
+
}
|
|
5644
|
+
/** Drop pending state without cancelling — used on confirm and on
|
|
5645
|
+
* agent stop. Idempotent. */
|
|
5646
|
+
clearPendingBargeIn() {
|
|
5647
|
+
if (this.bargeInPendingTimer !== null) {
|
|
5648
|
+
clearTimeout(this.bargeInPendingTimer);
|
|
5649
|
+
this.bargeInPendingTimer = null;
|
|
5650
|
+
}
|
|
5651
|
+
this.bargeInPendingSince = null;
|
|
5396
5652
|
}
|
|
5397
5653
|
/**
|
|
5398
5654
|
* Dedup + throttle + hallucination filter for final STT transcripts.
|
|
@@ -5567,6 +5823,7 @@ var StreamHandler = class _StreamHandler {
|
|
|
5567
5823
|
}
|
|
5568
5824
|
const encoded = this.encodePipelineAudio(audioChunk);
|
|
5569
5825
|
this.deps.bridge.sendAudio(this.ws, encoded, this.streamSid);
|
|
5826
|
+
this.markFirstAudioSent();
|
|
5570
5827
|
}
|
|
5571
5828
|
}
|
|
5572
5829
|
}
|
|
@@ -5587,16 +5844,49 @@ var StreamHandler = class _StreamHandler {
|
|
|
5587
5844
|
async initRealtimeAdapter(resolvedPrompt) {
|
|
5588
5845
|
const label = this.deps.bridge.label;
|
|
5589
5846
|
this.adapter = this.deps.buildAIAdapter(resolvedPrompt);
|
|
5590
|
-
|
|
5591
|
-
|
|
5592
|
-
getLogger().debug(`AI adapter connected (${label})`);
|
|
5593
|
-
} catch (e) {
|
|
5594
|
-
getLogger().error(`AI adapter connect FAILED (${label}):`, e);
|
|
5847
|
+
let parked;
|
|
5848
|
+
if (typeof this.deps.popPrewarmedConnections === "function") {
|
|
5595
5849
|
try {
|
|
5596
|
-
|
|
5597
|
-
} catch {
|
|
5850
|
+
parked = this.deps.popPrewarmedConnections(this.callId);
|
|
5851
|
+
} catch (err) {
|
|
5852
|
+
getLogger().debug(`popPrewarmedConnections raised: ${String(err)}`);
|
|
5853
|
+
}
|
|
5854
|
+
}
|
|
5855
|
+
const parkedRealtimeWs = parked?.openaiRealtime;
|
|
5856
|
+
let adoptOk = false;
|
|
5857
|
+
if (parkedRealtimeWs !== void 0) {
|
|
5858
|
+
const adapterAny = this.adapter;
|
|
5859
|
+
const wsAlive = parkedRealtimeWs.readyState === 1;
|
|
5860
|
+
if (typeof adapterAny?.adoptWebSocket === "function" && wsAlive) {
|
|
5861
|
+
try {
|
|
5862
|
+
adapterAny.adoptWebSocket(parkedRealtimeWs);
|
|
5863
|
+
getLogger().info(
|
|
5864
|
+
`[CONNECT] callId=${this.callId} provider=openai_realtime source=adopted ms=0`
|
|
5865
|
+
);
|
|
5866
|
+
adoptOk = true;
|
|
5867
|
+
} catch (err) {
|
|
5868
|
+
getLogger().debug(`Realtime adoptWebSocket failed: ${String(err)}; falling back`);
|
|
5869
|
+
}
|
|
5870
|
+
}
|
|
5871
|
+
if (!adoptOk) {
|
|
5872
|
+
try {
|
|
5873
|
+
parkedRealtimeWs.close();
|
|
5874
|
+
} catch {
|
|
5875
|
+
}
|
|
5876
|
+
}
|
|
5877
|
+
}
|
|
5878
|
+
if (!adoptOk) {
|
|
5879
|
+
try {
|
|
5880
|
+
await this.adapter.connect();
|
|
5881
|
+
getLogger().debug(`AI adapter connected (${label})`);
|
|
5882
|
+
} catch (e) {
|
|
5883
|
+
getLogger().error(`AI adapter connect FAILED (${label}):`, e);
|
|
5884
|
+
try {
|
|
5885
|
+
await this.deps.bridge.endCall(this.callId, this.ws);
|
|
5886
|
+
} catch {
|
|
5887
|
+
}
|
|
5888
|
+
return;
|
|
5598
5889
|
}
|
|
5599
|
-
return;
|
|
5600
5890
|
}
|
|
5601
5891
|
if (this.deps.agent.firstMessage) {
|
|
5602
5892
|
this.metricsAcc.startTurn();
|
|
@@ -5704,6 +5994,7 @@ var StreamHandler = class _StreamHandler {
|
|
|
5704
5994
|
}
|
|
5705
5995
|
const outAudio = eventData;
|
|
5706
5996
|
this.deps.bridge.sendAudio(this.ws, outAudio.toString("base64"), this.streamSid);
|
|
5997
|
+
this.markFirstAudioSent();
|
|
5707
5998
|
this.chunkCount++;
|
|
5708
5999
|
this.deps.bridge.sendMark(this.ws, `audio_${this.chunkCount}`, this.streamSid);
|
|
5709
6000
|
}
|
|
@@ -5715,8 +6006,21 @@ var StreamHandler = class _StreamHandler {
|
|
|
5715
6006
|
await this.emitUserSpeechEnded();
|
|
5716
6007
|
}
|
|
5717
6008
|
async onAdapterTranscriptInput(inputText) {
|
|
6009
|
+
const stripped = inputText.trim().toLowerCase();
|
|
6010
|
+
if (HALLUCINATIONS.has(stripped) || stripped === "") {
|
|
6011
|
+
getLogger().debug(
|
|
6012
|
+
`Realtime transcript_input dropped (likely Whisper hallucination on silence/echo): ${sanitizeLogValue(inputText.slice(0, 60))}`
|
|
6013
|
+
);
|
|
6014
|
+
this.userTranscriptPending = false;
|
|
6015
|
+
return;
|
|
6016
|
+
}
|
|
5718
6017
|
getLogger().debug(`User (${this.deps.bridge.label}): ${sanitizeLogValue(inputText)}`);
|
|
5719
6018
|
this.history.push({ role: "user", text: inputText, timestamp: Date.now() });
|
|
6019
|
+
if (this.adapter instanceof OpenAIRealtimeAdapter) {
|
|
6020
|
+
void this.adapter.requestResponse().catch(
|
|
6021
|
+
(err) => getLogger().debug(`Realtime requestResponse failed: ${String(err)}`)
|
|
6022
|
+
);
|
|
6023
|
+
}
|
|
5720
6024
|
if (!this.metricsAcc.turnActive) {
|
|
5721
6025
|
this.metricsAcc.startTurn();
|
|
5722
6026
|
this.currentAgentText = "";
|
|
@@ -5868,6 +6172,18 @@ var StreamHandler = class _StreamHandler {
|
|
|
5868
6172
|
await this.flushAssistantTurn(text);
|
|
5869
6173
|
}
|
|
5870
6174
|
async onAdapterSpeechInterrupt() {
|
|
6175
|
+
if (this.adapter instanceof OpenAIRealtimeAdapter) {
|
|
6176
|
+
const startedAt = this.adapter.currentResponseFirstAudioAt;
|
|
6177
|
+
if (startedAt !== null) {
|
|
6178
|
+
const elapsedMs = Date.now() - startedAt;
|
|
6179
|
+
if (elapsedMs < _StreamHandler.MIN_AGENT_SPEAKING_MS_BEFORE_BARGE_IN_NO_AEC) {
|
|
6180
|
+
getLogger().info(
|
|
6181
|
+
`Realtime barge-in suppressed (response < gate, ${elapsedMs}ms)`
|
|
6182
|
+
);
|
|
6183
|
+
return;
|
|
6184
|
+
}
|
|
6185
|
+
}
|
|
6186
|
+
}
|
|
5871
6187
|
this.deps.bridge.sendClear(this.ws, this.streamSid);
|
|
5872
6188
|
if (this.adapter instanceof OpenAIRealtimeAdapter) this.adapter.cancelResponse();
|
|
5873
6189
|
this.metricsAcc.recordTurnInterrupted();
|
|
@@ -6050,9 +6366,10 @@ var StreamHandler = class _StreamHandler {
|
|
|
6050
6366
|
metrics: finalMetrics
|
|
6051
6367
|
};
|
|
6052
6368
|
const cost = finalMetrics.cost?.total ?? 0;
|
|
6053
|
-
const
|
|
6369
|
+
const p95Obj = finalMetrics.latency_p95;
|
|
6370
|
+
const latencyP95 = p95Obj?.agent_response_ms ?? p95Obj?.total_ms ?? 0;
|
|
6054
6371
|
getLogger().info(
|
|
6055
|
-
`Call ended: ${this.callId} (${finalMetrics.duration_seconds.toFixed(1)}s, ${finalMetrics.turns.length} turns, cost=$${cost.toFixed(4)}, p95=${Math.round(latencyP95)}ms)`
|
|
6372
|
+
`Call ended: ${this.callId} (${finalMetrics.duration_seconds.toFixed(1)}s, ${finalMetrics.turns.length} turns, cost=$${cost.toFixed(4)}, p95 wait=${Math.round(latencyP95)}ms)`
|
|
6056
6373
|
);
|
|
6057
6374
|
this.deps.metricsStore.recordCallEnd(
|
|
6058
6375
|
callEndData,
|
|
@@ -6102,31 +6419,31 @@ async function queryDeepgramCost(metricsAcc, deepgramKey, deepgramRequestId) {
|
|
|
6102
6419
|
// src/services/call-log.ts
|
|
6103
6420
|
init_esm_shims();
|
|
6104
6421
|
import * as crypto3 from "crypto";
|
|
6105
|
-
import * as
|
|
6422
|
+
import * as fs3 from "fs";
|
|
6106
6423
|
import { promises as fsp } from "fs";
|
|
6107
6424
|
import * as os from "os";
|
|
6108
|
-
import * as
|
|
6425
|
+
import * as path3 from "path";
|
|
6109
6426
|
var SCHEMA_VERSION = "1.0";
|
|
6110
6427
|
var DEFAULT_RETENTION_DAYS = 30;
|
|
6111
6428
|
function xdgDataHome() {
|
|
6112
|
-
return process.env.XDG_DATA_HOME ||
|
|
6429
|
+
return process.env.XDG_DATA_HOME || path3.join(os.homedir(), ".local", "share");
|
|
6113
6430
|
}
|
|
6114
6431
|
function platformDefaultRoot() {
|
|
6115
6432
|
if (process.platform === "darwin") {
|
|
6116
|
-
return
|
|
6433
|
+
return path3.join(os.homedir(), "Library", "Application Support", "patter");
|
|
6117
6434
|
}
|
|
6118
6435
|
if (process.platform === "win32") {
|
|
6119
6436
|
const localAppData = process.env.LOCALAPPDATA;
|
|
6120
|
-
if (localAppData) return
|
|
6121
|
-
return
|
|
6437
|
+
if (localAppData) return path3.join(localAppData, "patter");
|
|
6438
|
+
return path3.join(os.homedir(), "AppData", "Local", "patter");
|
|
6122
6439
|
}
|
|
6123
|
-
return
|
|
6440
|
+
return path3.join(xdgDataHome(), "patter");
|
|
6124
6441
|
}
|
|
6125
6442
|
function resolveLogRoot(explicit) {
|
|
6126
6443
|
const value = explicit ?? process.env.PATTER_LOG_DIR;
|
|
6127
6444
|
if (!value) return null;
|
|
6128
6445
|
if (value.trim().toLowerCase() === "auto") return platformDefaultRoot();
|
|
6129
|
-
if (value.startsWith("~")) return
|
|
6446
|
+
if (value.startsWith("~")) return path3.join(os.homedir(), value.slice(1));
|
|
6130
6447
|
return value;
|
|
6131
6448
|
}
|
|
6132
6449
|
function retentionDays() {
|
|
@@ -6137,9 +6454,9 @@ function retentionDays() {
|
|
|
6137
6454
|
return Math.max(0, parsed);
|
|
6138
6455
|
}
|
|
6139
6456
|
function redactMode() {
|
|
6140
|
-
const raw = (process.env.PATTER_LOG_REDACT_PHONE || "
|
|
6457
|
+
const raw = (process.env.PATTER_LOG_REDACT_PHONE || "full").trim().toLowerCase();
|
|
6141
6458
|
if (raw === "full" || raw === "mask" || raw === "hash_only") return raw;
|
|
6142
|
-
return "
|
|
6459
|
+
return "full";
|
|
6143
6460
|
}
|
|
6144
6461
|
function redactPhone(raw) {
|
|
6145
6462
|
if (!raw) return "";
|
|
@@ -6155,9 +6472,9 @@ function utcIso(tsSeconds) {
|
|
|
6155
6472
|
return new Date(ms).toISOString();
|
|
6156
6473
|
}
|
|
6157
6474
|
async function atomicWriteJson(filePath, payload) {
|
|
6158
|
-
const dir =
|
|
6475
|
+
const dir = path3.dirname(filePath);
|
|
6159
6476
|
await fsp.mkdir(dir, { recursive: true });
|
|
6160
|
-
const tmp =
|
|
6477
|
+
const tmp = path3.join(dir, `.tmp.${process.pid}.${crypto3.randomBytes(4).toString("hex")}.json`);
|
|
6161
6478
|
try {
|
|
6162
6479
|
const handle = await fsp.open(tmp, "w");
|
|
6163
6480
|
try {
|
|
@@ -6176,7 +6493,7 @@ async function atomicWriteJson(filePath, payload) {
|
|
|
6176
6493
|
}
|
|
6177
6494
|
}
|
|
6178
6495
|
async function appendJsonl(filePath, record) {
|
|
6179
|
-
await fsp.mkdir(
|
|
6496
|
+
await fsp.mkdir(path3.dirname(filePath), { recursive: true });
|
|
6180
6497
|
await fsp.appendFile(filePath, JSON.stringify(record) + "\n", { encoding: "utf8" });
|
|
6181
6498
|
}
|
|
6182
6499
|
var CallLogger = class {
|
|
@@ -6186,9 +6503,9 @@ var CallLogger = class {
|
|
|
6186
6503
|
this.root = null;
|
|
6187
6504
|
return;
|
|
6188
6505
|
}
|
|
6189
|
-
const resolved = root.startsWith("~") ?
|
|
6506
|
+
const resolved = root.startsWith("~") ? path3.join(os.homedir(), root.slice(1)) : root;
|
|
6190
6507
|
try {
|
|
6191
|
-
|
|
6508
|
+
fs3.mkdirSync(resolved, { recursive: true });
|
|
6192
6509
|
this.root = resolved;
|
|
6193
6510
|
getLogger().info(`Call logs: ${resolved}`);
|
|
6194
6511
|
} catch (err) {
|
|
@@ -6210,7 +6527,7 @@ var CallLogger = class {
|
|
|
6210
6527
|
const month = String(dt.getUTCMonth() + 1).padStart(2, "0");
|
|
6211
6528
|
const day = String(dt.getUTCDate()).padStart(2, "0");
|
|
6212
6529
|
const safeId = sanitizeLogValue(callId, 64).replace(/\//g, "_") || "unknown";
|
|
6213
|
-
return
|
|
6530
|
+
return path3.join(this.root, "calls", year, month, day, safeId);
|
|
6214
6531
|
}
|
|
6215
6532
|
/** Write the initial `metadata.json` for a new call. */
|
|
6216
6533
|
async logCallStart(callId, input = {}) {
|
|
@@ -6228,6 +6545,7 @@ var CallLogger = class {
|
|
|
6228
6545
|
status: "in_progress",
|
|
6229
6546
|
caller: redactPhone(input.caller ?? ""),
|
|
6230
6547
|
callee: redactPhone(input.callee ?? ""),
|
|
6548
|
+
direction: input.direction || "inbound",
|
|
6231
6549
|
telephony_provider: input.telephonyProvider ?? "",
|
|
6232
6550
|
provider_mode: input.providerMode ?? "",
|
|
6233
6551
|
agent: input.agent ?? {},
|
|
@@ -6237,7 +6555,7 @@ var CallLogger = class {
|
|
|
6237
6555
|
error: null
|
|
6238
6556
|
};
|
|
6239
6557
|
try {
|
|
6240
|
-
await atomicWriteJson(
|
|
6558
|
+
await atomicWriteJson(path3.join(dir, "metadata.json"), metadata);
|
|
6241
6559
|
} catch (err) {
|
|
6242
6560
|
getLogger().warn(`call_log write failed (${sanitizeLogValue(callId)}): ${sanitizeLogValue(String(err))}`);
|
|
6243
6561
|
}
|
|
@@ -6256,7 +6574,7 @@ var CallLogger = class {
|
|
|
6256
6574
|
...turn
|
|
6257
6575
|
};
|
|
6258
6576
|
try {
|
|
6259
|
-
await appendJsonl(
|
|
6577
|
+
await appendJsonl(path3.join(dir, "transcript.jsonl"), record);
|
|
6260
6578
|
} catch (err) {
|
|
6261
6579
|
getLogger().warn(
|
|
6262
6580
|
`call_log turn write failed (${sanitizeLogValue(callId)}): ${sanitizeLogValue(String(err))}`
|
|
@@ -6275,7 +6593,7 @@ var CallLogger = class {
|
|
|
6275
6593
|
data: payload
|
|
6276
6594
|
};
|
|
6277
6595
|
try {
|
|
6278
|
-
await appendJsonl(
|
|
6596
|
+
await appendJsonl(path3.join(dir, "events.jsonl"), record);
|
|
6279
6597
|
} catch (err) {
|
|
6280
6598
|
getLogger().warn(
|
|
6281
6599
|
`call_log event write failed (${sanitizeLogValue(callId)}): ${sanitizeLogValue(String(err))}`
|
|
@@ -6287,7 +6605,7 @@ var CallLogger = class {
|
|
|
6287
6605
|
if (!this.enabled) return;
|
|
6288
6606
|
const dir = this.callDir(callId);
|
|
6289
6607
|
if (dir === null) return;
|
|
6290
|
-
const metadataPath =
|
|
6608
|
+
const metadataPath = path3.join(dir, "metadata.json");
|
|
6291
6609
|
let existing = {};
|
|
6292
6610
|
try {
|
|
6293
6611
|
existing = JSON.parse(await fsp.readFile(metadataPath, "utf8"));
|
|
@@ -6322,20 +6640,20 @@ var CallLogger = class {
|
|
|
6322
6640
|
const days = retentionDays();
|
|
6323
6641
|
if (days === 0) return;
|
|
6324
6642
|
const cutoff = Date.now() / 1e3 - days * 86400;
|
|
6325
|
-
const callsRoot =
|
|
6326
|
-
if (!
|
|
6643
|
+
const callsRoot = path3.join(this.root, "calls");
|
|
6644
|
+
if (!fs3.existsSync(callsRoot)) return;
|
|
6327
6645
|
try {
|
|
6328
|
-
for (const yearName of
|
|
6646
|
+
for (const yearName of fs3.readdirSync(callsRoot)) {
|
|
6329
6647
|
if (!/^\d+$/.test(yearName)) continue;
|
|
6330
|
-
const yearDir =
|
|
6331
|
-
if (!
|
|
6332
|
-
for (const monthName of
|
|
6648
|
+
const yearDir = path3.join(callsRoot, yearName);
|
|
6649
|
+
if (!fs3.statSync(yearDir).isDirectory()) continue;
|
|
6650
|
+
for (const monthName of fs3.readdirSync(yearDir)) {
|
|
6333
6651
|
if (!/^\d+$/.test(monthName)) continue;
|
|
6334
|
-
const monthDir =
|
|
6335
|
-
if (!
|
|
6336
|
-
for (const dayName of
|
|
6652
|
+
const monthDir = path3.join(yearDir, monthName);
|
|
6653
|
+
if (!fs3.statSync(monthDir).isDirectory()) continue;
|
|
6654
|
+
for (const dayName of fs3.readdirSync(monthDir)) {
|
|
6337
6655
|
if (!/^\d+$/.test(dayName)) continue;
|
|
6338
|
-
const dayDir =
|
|
6656
|
+
const dayDir = path3.join(monthDir, dayName);
|
|
6339
6657
|
const y = Number.parseInt(yearName, 10);
|
|
6340
6658
|
const m = Number.parseInt(monthName, 10);
|
|
6341
6659
|
const d = Number.parseInt(dayName, 10);
|
|
@@ -6345,12 +6663,12 @@ var CallLogger = class {
|
|
|
6345
6663
|
}
|
|
6346
6664
|
}
|
|
6347
6665
|
try {
|
|
6348
|
-
if (
|
|
6666
|
+
if (fs3.readdirSync(monthDir).length === 0) fs3.rmdirSync(monthDir);
|
|
6349
6667
|
} catch {
|
|
6350
6668
|
}
|
|
6351
6669
|
}
|
|
6352
6670
|
try {
|
|
6353
|
-
if (
|
|
6671
|
+
if (fs3.readdirSync(yearDir).length === 0) fs3.rmdirSync(yearDir);
|
|
6354
6672
|
} catch {
|
|
6355
6673
|
}
|
|
6356
6674
|
}
|
|
@@ -6361,19 +6679,19 @@ var CallLogger = class {
|
|
|
6361
6679
|
};
|
|
6362
6680
|
function rmTree(target) {
|
|
6363
6681
|
try {
|
|
6364
|
-
for (const child of
|
|
6365
|
-
const childPath =
|
|
6366
|
-
const stat =
|
|
6682
|
+
for (const child of fs3.readdirSync(target)) {
|
|
6683
|
+
const childPath = path3.join(target, child);
|
|
6684
|
+
const stat = fs3.lstatSync(childPath);
|
|
6367
6685
|
if (stat.isDirectory()) {
|
|
6368
6686
|
rmTree(childPath);
|
|
6369
6687
|
} else {
|
|
6370
6688
|
try {
|
|
6371
|
-
|
|
6689
|
+
fs3.unlinkSync(childPath);
|
|
6372
6690
|
} catch {
|
|
6373
6691
|
}
|
|
6374
6692
|
}
|
|
6375
6693
|
}
|
|
6376
|
-
|
|
6694
|
+
fs3.rmdirSync(target);
|
|
6377
6695
|
} catch {
|
|
6378
6696
|
}
|
|
6379
6697
|
}
|
|
@@ -6554,9 +6872,10 @@ function buildAIAdapter(config, agent, resolvedPrompt) {
|
|
|
6554
6872
|
strict: t.strict
|
|
6555
6873
|
})) ?? [];
|
|
6556
6874
|
const tools = [...agentTools, TRANSFER_CALL_TOOL, END_CALL_TOOL];
|
|
6557
|
-
const
|
|
6875
|
+
const isOpenAIEngine = engine && (engine.kind === "openai_realtime" || engine.kind === "openai_realtime_2");
|
|
6876
|
+
const openaiKey = isOpenAIEngine ? engine.apiKey : config.openaiKey ?? "";
|
|
6558
6877
|
const adapterOptions = {};
|
|
6559
|
-
if (
|
|
6878
|
+
if (isOpenAIEngine) {
|
|
6560
6879
|
if (engine.reasoningEffort !== void 0) {
|
|
6561
6880
|
adapterOptions.reasoningEffort = engine.reasoningEffort;
|
|
6562
6881
|
}
|
|
@@ -6564,7 +6883,8 @@ function buildAIAdapter(config, agent, resolvedPrompt) {
|
|
|
6564
6883
|
adapterOptions.inputAudioTranscriptionModel = engine.inputAudioTranscriptionModel;
|
|
6565
6884
|
}
|
|
6566
6885
|
}
|
|
6567
|
-
|
|
6886
|
+
const AdapterCtor = engine && engine.kind === "openai_realtime_2" ? OpenAIRealtime2Adapter : OpenAIRealtimeAdapter;
|
|
6887
|
+
return new AdapterCtor(
|
|
6568
6888
|
openaiKey,
|
|
6569
6889
|
agent.model,
|
|
6570
6890
|
agent.voice,
|
|
@@ -6668,7 +6988,7 @@ var TELNYX_DTMF_ALLOWED = new Set("0123456789*#ABCDabcdwW");
|
|
|
6668
6988
|
var TELNYX_DTMF_DURATION_MS = 250;
|
|
6669
6989
|
async function sleep(ms) {
|
|
6670
6990
|
if (ms <= 0) return;
|
|
6671
|
-
await new Promise((
|
|
6991
|
+
await new Promise((resolve2) => setTimeout(resolve2, ms));
|
|
6672
6992
|
}
|
|
6673
6993
|
var TelnyxBridge = class {
|
|
6674
6994
|
constructor(config) {
|
|
@@ -6869,6 +7189,33 @@ var EmbeddedServer = class {
|
|
|
6869
7189
|
* across calls.
|
|
6870
7190
|
*/
|
|
6871
7191
|
onMachineDetection;
|
|
7192
|
+
/**
|
|
7193
|
+
* Pre-warm first-message audio accessor wired by ``Patter.serve()``.
|
|
7194
|
+
* The per-call StreamHandler invokes this with its ``callId`` at the
|
|
7195
|
+
* start of the firstMessage emit; a defined return is sent verbatim
|
|
7196
|
+
* in place of running TTS again. ``undefined`` means "no prewarm
|
|
7197
|
+
* cache for this call — fall back to live synthesis". Default is a
|
|
7198
|
+
* no-op so callers that instantiate ``EmbeddedServer`` directly
|
|
7199
|
+
* (tests) work without further setup.
|
|
7200
|
+
*/
|
|
7201
|
+
popPrewarmAudio = () => void 0;
|
|
7202
|
+
/**
|
|
7203
|
+
* Pre-warmed provider WebSocket accessor wired by ``Patter.serve()``.
|
|
7204
|
+
* The per-call StreamHandler invokes this with its ``callId`` at
|
|
7205
|
+
* pipeline init; defined returns hand off pre-opened STT / TTS /
|
|
7206
|
+
* Realtime sockets so the live first turn skips the cold-handshake.
|
|
7207
|
+
* Default is a no-op for direct ``EmbeddedServer`` callers.
|
|
7208
|
+
*/
|
|
7209
|
+
popPrewarmedConnections = () => void 0;
|
|
7210
|
+
/**
|
|
7211
|
+
* Prewarm waste recorder wired by ``Patter.serve()``. Invoked from
|
|
7212
|
+
* the Twilio status callback (no-answer / busy / failed / canceled)
|
|
7213
|
+
* and the Telnyx call.hangup / AMD-machine handlers so the cache
|
|
7214
|
+
* entry is evicted when the call terminates before the media stream
|
|
7215
|
+
* starts. Default is a no-op so direct ``EmbeddedServer`` callers
|
|
7216
|
+
* (tests) work without further setup. See FIX #91.
|
|
7217
|
+
*/
|
|
7218
|
+
recordPrewarmWaste = () => void 0;
|
|
6872
7219
|
/** Bind HTTP + WebSocket listeners on `port`, mount carrier webhooks and dashboard routes. */
|
|
6873
7220
|
async start(port = 8e3) {
|
|
6874
7221
|
const webhookUrlPattern = /^[a-zA-Z0-9][a-zA-Z0-9.\-]+[a-zA-Z0-9]$/;
|
|
@@ -6944,6 +7291,13 @@ var EmbeddedServer = class {
|
|
|
6944
7291
|
if (!Number.isNaN(parsed)) extra.duration_seconds = parsed;
|
|
6945
7292
|
this.metricsStore.updateCallStatus(callSid, callStatus, extra);
|
|
6946
7293
|
}
|
|
7294
|
+
if (callSid && (callStatus === "no-answer" || callStatus === "busy" || callStatus === "failed" || callStatus === "canceled")) {
|
|
7295
|
+
try {
|
|
7296
|
+
this.recordPrewarmWaste(callSid);
|
|
7297
|
+
} catch (err) {
|
|
7298
|
+
getLogger().debug(`recordPrewarmWaste threw: ${String(err)}`);
|
|
7299
|
+
}
|
|
7300
|
+
}
|
|
6947
7301
|
res.status(204).send();
|
|
6948
7302
|
});
|
|
6949
7303
|
app.post("/webhooks/twilio/recording", (req, res) => {
|
|
@@ -6999,6 +7353,13 @@ var EmbeddedServer = class {
|
|
|
6999
7353
|
getLogger().warn(`onMachineDetection callback threw: ${sanitizeLogValue(String(err))}`);
|
|
7000
7354
|
}
|
|
7001
7355
|
}
|
|
7356
|
+
if ((answeredBy === "machine_end_beep" || answeredBy === "machine_end_silence") && callSid) {
|
|
7357
|
+
try {
|
|
7358
|
+
this.recordPrewarmWaste(callSid);
|
|
7359
|
+
} catch (err) {
|
|
7360
|
+
getLogger().debug(`recordPrewarmWaste threw: ${String(err)}`);
|
|
7361
|
+
}
|
|
7362
|
+
}
|
|
7002
7363
|
if ((answeredBy === "machine_end_beep" || answeredBy === "machine_end_silence") && this.voicemailMessage && this.config.twilioSid && this.config.twilioToken) {
|
|
7003
7364
|
if (!validateTwilioSid(callSid)) {
|
|
7004
7365
|
getLogger().warn(`AMD webhook rejected: invalid CallSid ${JSON.stringify(sanitizeLogValue(callSid))}`);
|
|
@@ -7119,6 +7480,26 @@ var EmbeddedServer = class {
|
|
|
7119
7480
|
}
|
|
7120
7481
|
if (amdCallId && (amdResult === "machine" || amdResult === "machine_detected")) {
|
|
7121
7482
|
await this.handleTelnyxAmdVoicemail(amdCallId);
|
|
7483
|
+
try {
|
|
7484
|
+
this.recordPrewarmWaste(amdCallId);
|
|
7485
|
+
} catch (err) {
|
|
7486
|
+
getLogger().debug(`recordPrewarmWaste threw: ${String(err)}`);
|
|
7487
|
+
}
|
|
7488
|
+
}
|
|
7489
|
+
return res.status(200).send();
|
|
7490
|
+
}
|
|
7491
|
+
if (eventType === "call.hangup") {
|
|
7492
|
+
const hangupCallId = payload.call_control_id ?? "";
|
|
7493
|
+
const hangupCause = String(payload.hangup_cause ?? "");
|
|
7494
|
+
getLogger().info(
|
|
7495
|
+
`Telnyx call.hangup for ${sanitizeLogValue(hangupCallId)} (cause=${sanitizeLogValue(hangupCause)})`
|
|
7496
|
+
);
|
|
7497
|
+
if (hangupCallId) {
|
|
7498
|
+
try {
|
|
7499
|
+
this.recordPrewarmWaste(hangupCallId);
|
|
7500
|
+
} catch (err) {
|
|
7501
|
+
getLogger().debug(`recordPrewarmWaste threw: ${String(err)}`);
|
|
7502
|
+
}
|
|
7122
7503
|
}
|
|
7123
7504
|
return res.status(200).send();
|
|
7124
7505
|
}
|
|
@@ -7210,7 +7591,7 @@ var EmbeddedServer = class {
|
|
|
7210
7591
|
this.handleTwilioStream(ws, url);
|
|
7211
7592
|
}
|
|
7212
7593
|
});
|
|
7213
|
-
await new Promise((
|
|
7594
|
+
await new Promise((resolve2) => {
|
|
7214
7595
|
const bindHost = process.env.PATTER_BIND_HOST ?? "127.0.0.1";
|
|
7215
7596
|
this.server.listen(port, bindHost, () => {
|
|
7216
7597
|
getLogger().info(`Server on port ${port}`);
|
|
@@ -7232,7 +7613,7 @@ var EmbeddedServer = class {
|
|
|
7232
7613
|
}
|
|
7233
7614
|
console.log("\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n");
|
|
7234
7615
|
}
|
|
7235
|
-
|
|
7616
|
+
resolve2();
|
|
7236
7617
|
});
|
|
7237
7618
|
});
|
|
7238
7619
|
}
|
|
@@ -7275,7 +7656,7 @@ var EmbeddedServer = class {
|
|
|
7275
7656
|
`Telnyx voicemail speak failed: ${speakResp.status} ${(await speakResp.text()).slice(0, 200)}`
|
|
7276
7657
|
);
|
|
7277
7658
|
}
|
|
7278
|
-
await new Promise((
|
|
7659
|
+
await new Promise((resolve2) => setTimeout(resolve2, estimatedMs));
|
|
7279
7660
|
await fetch(`https://api.telnyx.com/v2/calls/${encoded}/actions/hangup`, {
|
|
7280
7661
|
method: "POST",
|
|
7281
7662
|
headers,
|
|
@@ -7308,7 +7689,9 @@ var EmbeddedServer = class {
|
|
|
7308
7689
|
recording: this.recording,
|
|
7309
7690
|
buildAIAdapter: (resolvedPrompt) => buildAIAdapter(this.config, this.agent, resolvedPrompt),
|
|
7310
7691
|
sanitizeVariables,
|
|
7311
|
-
resolveVariables
|
|
7692
|
+
resolveVariables,
|
|
7693
|
+
popPrewarmAudio: this.popPrewarmAudio,
|
|
7694
|
+
popPrewarmedConnections: this.popPrewarmedConnections
|
|
7312
7695
|
};
|
|
7313
7696
|
}
|
|
7314
7697
|
/**
|
|
@@ -7335,12 +7718,20 @@ var EmbeddedServer = class {
|
|
|
7335
7718
|
}
|
|
7336
7719
|
return Object.fromEntries(Object.entries(snap).filter(([, v]) => v !== void 0));
|
|
7337
7720
|
};
|
|
7721
|
+
const store = this.metricsStore;
|
|
7338
7722
|
const wrappedStart = async (data) => {
|
|
7339
7723
|
if (logger.enabled) {
|
|
7340
7724
|
const callId = typeof data.call_id === "string" ? data.call_id : "";
|
|
7725
|
+
const dataCaller = typeof data.caller === "string" ? data.caller : "";
|
|
7726
|
+
const dataCallee = typeof data.callee === "string" ? data.callee : "";
|
|
7727
|
+
const active = callId ? store.getActive(callId) : void 0;
|
|
7728
|
+
const resolvedCaller = dataCaller || active?.caller || "";
|
|
7729
|
+
const resolvedCallee = dataCallee || active?.callee || "";
|
|
7730
|
+
const resolvedDirection = (typeof data.direction === "string" ? data.direction : "") || active?.direction || "inbound";
|
|
7341
7731
|
void logger.logCallStart(callId, {
|
|
7342
|
-
caller:
|
|
7343
|
-
callee:
|
|
7732
|
+
caller: resolvedCaller,
|
|
7733
|
+
callee: resolvedCallee,
|
|
7734
|
+
direction: resolvedDirection,
|
|
7344
7735
|
telephonyProvider: bridge.telephonyProvider,
|
|
7345
7736
|
providerMode: agent.provider ?? "",
|
|
7346
7737
|
agent: agentSnapshot()
|
|
@@ -7365,7 +7756,11 @@ var EmbeddedServer = class {
|
|
|
7365
7756
|
const latency = metricsObj ? {
|
|
7366
7757
|
p50_ms: metricsObj.latency_p50?.total_ms ?? null,
|
|
7367
7758
|
p95_ms: metricsObj.latency_p95?.total_ms ?? null,
|
|
7368
|
-
p99_ms: metricsObj.latency_p99?.total_ms ?? null
|
|
7759
|
+
p99_ms: metricsObj.latency_p99?.total_ms ?? null,
|
|
7760
|
+
avg: metricsObj.latency_avg ?? null,
|
|
7761
|
+
p50: metricsObj.latency_p50 ?? null,
|
|
7762
|
+
p95: metricsObj.latency_p95 ?? null,
|
|
7763
|
+
p99: metricsObj.latency_p99 ?? null
|
|
7369
7764
|
} : null;
|
|
7370
7765
|
void logger.logCallEnd(callId, {
|
|
7371
7766
|
durationSeconds: metricsObj?.duration_seconds,
|
|
@@ -7494,8 +7889,8 @@ var EmbeddedServer = class {
|
|
|
7494
7889
|
*/
|
|
7495
7890
|
async stop() {
|
|
7496
7891
|
if (!this.server) return;
|
|
7497
|
-
const httpClosePromise = new Promise((
|
|
7498
|
-
this.server.close(() =>
|
|
7892
|
+
const httpClosePromise = new Promise((resolve2) => {
|
|
7893
|
+
this.server.close(() => resolve2());
|
|
7499
7894
|
});
|
|
7500
7895
|
const isTelnyx = this.config.telephonyProvider === "telnyx";
|
|
7501
7896
|
for (const [ws, callId] of this.activeCallIds) {
|
|
@@ -7515,15 +7910,15 @@ var EmbeddedServer = class {
|
|
|
7515
7910
|
if (this.activeConnections.size > 0) {
|
|
7516
7911
|
getLogger().info(`Waiting for ${this.activeConnections.size} active connection(s) to close...`);
|
|
7517
7912
|
await Promise.race([
|
|
7518
|
-
new Promise((
|
|
7913
|
+
new Promise((resolve2) => {
|
|
7519
7914
|
const checkInterval = setInterval(() => {
|
|
7520
7915
|
if (this.activeConnections.size === 0) {
|
|
7521
7916
|
clearInterval(checkInterval);
|
|
7522
|
-
|
|
7917
|
+
resolve2();
|
|
7523
7918
|
}
|
|
7524
7919
|
}, 100);
|
|
7525
7920
|
}),
|
|
7526
|
-
new Promise((
|
|
7921
|
+
new Promise((resolve2) => setTimeout(resolve2, GRACEFUL_SHUTDOWN_TIMEOUT_MS))
|
|
7527
7922
|
]);
|
|
7528
7923
|
}
|
|
7529
7924
|
if (this.activeConnections.size > 0) {
|
|
@@ -7800,6 +8195,8 @@ function mergeAbortSignals(...signals) {
|
|
|
7800
8195
|
return controller.signal;
|
|
7801
8196
|
}
|
|
7802
8197
|
var OpenAILLMProvider = class {
|
|
8198
|
+
/** Stable pricing/dashboard key — read by stream-handler/metrics. */
|
|
8199
|
+
static providerKey = "openai";
|
|
7803
8200
|
apiKey;
|
|
7804
8201
|
model;
|
|
7805
8202
|
temperature;
|
|
@@ -7826,6 +8223,35 @@ var OpenAILLMProvider = class {
|
|
|
7826
8223
|
this.presencePenalty = sampling.presencePenalty;
|
|
7827
8224
|
this.stop = sampling.stop;
|
|
7828
8225
|
}
|
|
8226
|
+
/** Subclasses (Cerebras, Groq) override this with their own host. */
|
|
8227
|
+
get baseUrl() {
|
|
8228
|
+
return "https://api.openai.com/v1";
|
|
8229
|
+
}
|
|
8230
|
+
/**
|
|
8231
|
+
* Pre-call DNS / TLS / HTTP-keepalive warmup.
|
|
8232
|
+
*
|
|
8233
|
+
* Issues a lightweight ``GET ${baseUrl}/models`` so DNS, TLS and HTTP/2
|
|
8234
|
+
* are already up by the time the first ``chat.completions`` call lands.
|
|
8235
|
+
* Best-effort: 5 s timeout, all exceptions swallowed at debug level.
|
|
8236
|
+
*
|
|
8237
|
+
* Note: an HTTPS GET warms DNS + TLS + connection pool but does NOT
|
|
8238
|
+
* warm the inference path itself; for true inference warmup a real
|
|
8239
|
+
* low-token request is needed, left as a follow-up. STT / TTS providers ship concrete
|
|
8240
|
+
* WebSocket-based prewarms (Cartesia / Deepgram / AssemblyAI for STT;
|
|
8241
|
+
* ElevenLabs WS for TTS) which save 200-500 ms each — those dominate
|
|
8242
|
+
* the cold-start latency budget.
|
|
8243
|
+
*/
|
|
8244
|
+
async warmup() {
|
|
8245
|
+
try {
|
|
8246
|
+
await fetch(`${this.baseUrl}/models`, {
|
|
8247
|
+
method: "GET",
|
|
8248
|
+
headers: { Authorization: `Bearer ${this.apiKey}` },
|
|
8249
|
+
signal: AbortSignal.timeout(5e3)
|
|
8250
|
+
});
|
|
8251
|
+
} catch (err) {
|
|
8252
|
+
getLogger().debug(`LLM warmup failed (best-effort): ${String(err)}`);
|
|
8253
|
+
}
|
|
8254
|
+
}
|
|
7829
8255
|
/** Stream OpenAI Chat Completions chunks for the given messages/tools. */
|
|
7830
8256
|
async *stream(messages, tools, opts) {
|
|
7831
8257
|
const body = {
|
|
@@ -7931,6 +8357,11 @@ var LLMLoop = class {
|
|
|
7931
8357
|
// Fix 10: track provider/model so usage chunks can be attributed for billing.
|
|
7932
8358
|
_providerName;
|
|
7933
8359
|
_modelName;
|
|
8360
|
+
// Diagnostics for the char/4 fallback billing path (see iterate loop).
|
|
8361
|
+
// Counted per-LLMLoop instance (i.e. per call). Surfaced only via logs
|
|
8362
|
+
// — keeps recordLlmUsage's public signature unchanged. Parity with Python.
|
|
8363
|
+
_usageMissingCount = 0;
|
|
8364
|
+
_loggedUsageFallback = false;
|
|
7934
8365
|
// Optional async observer fired after a successful tool execution so
|
|
7935
8366
|
// the host SDK (StreamHandler in pipeline mode) can surface tool calls
|
|
7936
8367
|
// into the transcript timeline / `onTranscript` callback. Mirrors the
|
|
@@ -8025,6 +8456,7 @@ ${systemPrompt}` : DEFAULT_PHONE_PREAMBLE;
|
|
|
8025
8456
|
const toolCallsAccumulated = /* @__PURE__ */ new Map();
|
|
8026
8457
|
const textParts = [];
|
|
8027
8458
|
let hasToolCalls = false;
|
|
8459
|
+
let usageChunkReceived = false;
|
|
8028
8460
|
for await (const chunk of this.provider.stream(messages, this.openaiTools, opts)) {
|
|
8029
8461
|
if (chunk.type === "text" && chunk.content) {
|
|
8030
8462
|
const content = hasAfterLlmChunk && hookExecutor ? hookExecutor.runAfterLlmChunk(chunk.content) : chunk.content;
|
|
@@ -8036,6 +8468,7 @@ ${systemPrompt}` : DEFAULT_PHONE_PREAMBLE;
|
|
|
8036
8468
|
yield content;
|
|
8037
8469
|
}
|
|
8038
8470
|
} else if (chunk.type === "usage") {
|
|
8471
|
+
usageChunkReceived = true;
|
|
8039
8472
|
metrics?.recordLlmUsage(
|
|
8040
8473
|
this._providerName,
|
|
8041
8474
|
this._modelName,
|
|
@@ -8061,6 +8494,35 @@ ${systemPrompt}` : DEFAULT_PHONE_PREAMBLE;
|
|
|
8061
8494
|
if (chunk.arguments) acc.arguments += chunk.arguments;
|
|
8062
8495
|
}
|
|
8063
8496
|
}
|
|
8497
|
+
if (!usageChunkReceived && metrics) {
|
|
8498
|
+
let inputChars = 0;
|
|
8499
|
+
for (const m of messages) {
|
|
8500
|
+
const c = m.content;
|
|
8501
|
+
if (typeof c === "string") inputChars += c.length;
|
|
8502
|
+
}
|
|
8503
|
+
const outputChars = textParts.reduce((s, p) => s + p.length, 0);
|
|
8504
|
+
const estimatedInput = Math.max(1, Math.floor(inputChars / 4));
|
|
8505
|
+
const estimatedOutput = Math.max(1, Math.floor(outputChars / 4));
|
|
8506
|
+
metrics.recordLlmUsage(
|
|
8507
|
+
this._providerName,
|
|
8508
|
+
this._modelName,
|
|
8509
|
+
estimatedInput,
|
|
8510
|
+
estimatedOutput,
|
|
8511
|
+
0,
|
|
8512
|
+
0
|
|
8513
|
+
);
|
|
8514
|
+
this._usageMissingCount += 1;
|
|
8515
|
+
if (!this._loggedUsageFallback) {
|
|
8516
|
+
this._loggedUsageFallback = true;
|
|
8517
|
+
getLogger().info(
|
|
8518
|
+
`llm_usage_fallback provider=${this._providerName} model=${this._modelName} input_chars=${inputChars} output_chars=${outputChars} est_input_tokens=${estimatedInput} est_output_tokens=${estimatedOutput}`
|
|
8519
|
+
);
|
|
8520
|
+
} else {
|
|
8521
|
+
getLogger().debug(
|
|
8522
|
+
`llm_usage_fallback provider=${this._providerName} model=${this._modelName} iteration=${iter} input_chars=${inputChars} output_chars=${outputChars} est_input_tokens=${estimatedInput} est_output_tokens=${estimatedOutput} total_missing=${this._usageMissingCount}`
|
|
8523
|
+
);
|
|
8524
|
+
}
|
|
8525
|
+
}
|
|
8064
8526
|
if (!hasToolCalls) {
|
|
8065
8527
|
if (hasAfterLlmResponse && hookExecutor && hookCtx) {
|
|
8066
8528
|
const finalText = allEmittedText.join("");
|
|
@@ -8213,7 +8675,7 @@ var TestSession = class {
|
|
|
8213
8675
|
input: process.stdin,
|
|
8214
8676
|
output: process.stdout
|
|
8215
8677
|
});
|
|
8216
|
-
const askQuestion = (prompt) => new Promise((
|
|
8678
|
+
const askQuestion = (prompt) => new Promise((resolve2) => rl.question(prompt, resolve2));
|
|
8217
8679
|
try {
|
|
8218
8680
|
while (!ended) {
|
|
8219
8681
|
let userInput;
|
|
@@ -8312,14 +8774,17 @@ export {
|
|
|
8312
8774
|
AuthenticationError,
|
|
8313
8775
|
ProvisionError,
|
|
8314
8776
|
RateLimitError,
|
|
8315
|
-
OpenAIRealtimeAdapter,
|
|
8316
8777
|
ElevenLabsConvAIAdapter,
|
|
8778
|
+
PRICING_VERSION,
|
|
8779
|
+
PRICING_LAST_UPDATED,
|
|
8780
|
+
PricingUnit,
|
|
8317
8781
|
DEFAULT_PRICING,
|
|
8318
8782
|
mergePricing,
|
|
8319
8783
|
calculateSttCost,
|
|
8320
8784
|
calculateTtsCost,
|
|
8321
8785
|
calculateRealtimeCost,
|
|
8322
8786
|
calculateTelephonyCost,
|
|
8787
|
+
VERSION,
|
|
8323
8788
|
MetricsStore,
|
|
8324
8789
|
makeAuthMiddleware,
|
|
8325
8790
|
callsToCsv,
|
|
@@ -8329,19 +8794,9 @@ export {
|
|
|
8329
8794
|
RemoteMessageHandler,
|
|
8330
8795
|
isRemoteUrl,
|
|
8331
8796
|
isWebSocketUrl,
|
|
8797
|
+
DeepgramModel,
|
|
8332
8798
|
DeepgramSTT,
|
|
8333
8799
|
CallMetricsAccumulator,
|
|
8334
|
-
mulawToPcm16,
|
|
8335
|
-
pcm16ToMulaw,
|
|
8336
|
-
PcmCarry,
|
|
8337
|
-
StatefulResampler,
|
|
8338
|
-
createResampler16kTo8k,
|
|
8339
|
-
createResampler8kTo16k,
|
|
8340
|
-
createResampler24kTo16k,
|
|
8341
|
-
createResampler24kTo8k,
|
|
8342
|
-
resample8kTo16k,
|
|
8343
|
-
resample16kTo8k,
|
|
8344
|
-
resample24kTo16k,
|
|
8345
8800
|
SPAN_CALL,
|
|
8346
8801
|
SPAN_STT,
|
|
8347
8802
|
SPAN_LLM,
|