getpatter 0.5.1 → 0.5.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +5 -5
- package/dist/banner-3GNZ6VQK.mjs +19 -0
- package/dist/{carrier-config-CPG5CROM.mjs → carrier-config-33HQ2W4V.mjs} +2 -2
- package/dist/{chunk-B6C3KIBG.mjs → chunk-FIFIWBL7.mjs} +3226 -569
- package/dist/chunk-QHHBUCMT.mjs +25 -0
- package/dist/{chunk-AKQFOFLG.mjs → chunk-SEMKNPCD.mjs} +7 -2
- package/dist/{chunk-FMNRCP5X.mjs → chunk-VJVDG4V5.mjs} +1 -1
- package/dist/cli.js +133 -15
- package/dist/dist-YRCCJQ26.mjs +1631 -0
- package/dist/index.d.mts +2000 -289
- package/dist/index.d.ts +2000 -289
- package/dist/index.js +8019 -1984
- package/dist/index.mjs +1885 -618
- package/dist/node-cron-6PRPSBG5.mjs +1348 -0
- package/dist/onnxruntime_binding-4Q2WV26X.node +0 -0
- package/dist/onnxruntime_binding-5PVQ7RFC.node +0 -0
- package/dist/onnxruntime_binding-FNOPH2XG.node +0 -0
- package/dist/onnxruntime_binding-HSGOY4IT.node +0 -0
- package/dist/onnxruntime_binding-OY2N3XIT.node +0 -0
- package/dist/onnxruntime_binding-ZPEJPBCV.node +0 -0
- package/dist/{persistence-CYIGNHSU.mjs → persistence-LQBYQPQQ.mjs} +1 -1
- package/dist/test-mode-MVJ3SKG4.mjs +8 -0
- package/dist/tunnel-UVR3PPAU.mjs +8 -0
- package/package.json +10 -3
- package/dist/chunk-OOIUSZB4.mjs +0 -37
- package/dist/node-cron-373UVDIO.mjs +0 -935
- package/dist/test-mode-JZMYE5HY.mjs +0 -8
- package/dist/tunnel-O7ICMSTP.mjs +0 -8
|
@@ -1,12 +1,15 @@
|
|
|
1
1
|
import {
|
|
2
2
|
getLogger
|
|
3
|
-
} from "./chunk-
|
|
3
|
+
} from "./chunk-VJVDG4V5.mjs";
|
|
4
|
+
import {
|
|
5
|
+
__require
|
|
6
|
+
} from "./chunk-QHHBUCMT.mjs";
|
|
4
7
|
|
|
5
8
|
// src/test-mode.ts
|
|
6
9
|
import { createInterface } from "readline";
|
|
7
10
|
|
|
8
11
|
// src/server.ts
|
|
9
|
-
import
|
|
12
|
+
import crypto4 from "crypto";
|
|
10
13
|
import express from "express";
|
|
11
14
|
import { createServer } from "http";
|
|
12
15
|
import { WebSocketServer } from "ws";
|
|
@@ -14,14 +17,24 @@ import { WebSocketServer } from "ws";
|
|
|
14
17
|
// src/providers/openai-realtime.ts
|
|
15
18
|
import WebSocket from "ws";
|
|
16
19
|
var OpenAIRealtimeAdapter = class {
|
|
17
|
-
constructor(apiKey, model = "gpt-
|
|
20
|
+
constructor(apiKey, model = "gpt-realtime-mini", voice = "alloy", instructions = "", tools, audioFormat = "g711_ulaw", options = {}) {
|
|
18
21
|
this.apiKey = apiKey;
|
|
19
22
|
this.model = model;
|
|
20
23
|
this.voice = voice;
|
|
21
24
|
this.instructions = instructions;
|
|
22
25
|
this.tools = tools;
|
|
26
|
+
this.audioFormat = audioFormat;
|
|
27
|
+
this.options = options;
|
|
23
28
|
}
|
|
24
29
|
ws = null;
|
|
30
|
+
eventCallbacks = /* @__PURE__ */ new Set();
|
|
31
|
+
messageListenerAttached = false;
|
|
32
|
+
heartbeat = null;
|
|
33
|
+
// Track the in-flight assistant item id so we can truncate cleanly on
|
|
34
|
+
// barge-in (see ``cancelResponse``) — matches the Python adapter.
|
|
35
|
+
currentResponseItemId = null;
|
|
36
|
+
currentResponseAudioMs = 0;
|
|
37
|
+
options;
|
|
25
38
|
async connect() {
|
|
26
39
|
const url = `wss://api.openai.com/v1/realtime?model=${encodeURIComponent(this.model)}`;
|
|
27
40
|
this.ws = new WebSocket(url, {
|
|
@@ -45,13 +58,24 @@ var OpenAIRealtimeAdapter = class {
|
|
|
45
58
|
if (msg.type === "session.created" && !sessionCreated) {
|
|
46
59
|
sessionCreated = true;
|
|
47
60
|
const config = {
|
|
48
|
-
input_audio_format:
|
|
49
|
-
output_audio_format:
|
|
61
|
+
input_audio_format: this.audioFormat,
|
|
62
|
+
output_audio_format: this.audioFormat,
|
|
50
63
|
voice: this.voice,
|
|
51
64
|
instructions: this.instructions || "You are a helpful voice assistant. Be concise.",
|
|
52
|
-
turn_detection: {
|
|
53
|
-
|
|
65
|
+
turn_detection: {
|
|
66
|
+
type: this.options.vadType ?? "server_vad",
|
|
67
|
+
threshold: 0.5,
|
|
68
|
+
prefix_padding_ms: 300,
|
|
69
|
+
silence_duration_ms: this.options.silenceDurationMs ?? 300
|
|
70
|
+
},
|
|
71
|
+
input_audio_transcription: { model: this.options.inputAudioTranscriptionModel ?? "whisper-1" }
|
|
54
72
|
};
|
|
73
|
+
if (this.options.temperature !== void 0) config.temperature = this.options.temperature;
|
|
74
|
+
if (this.options.maxResponseOutputTokens !== void 0) {
|
|
75
|
+
config.max_response_output_tokens = this.options.maxResponseOutputTokens;
|
|
76
|
+
}
|
|
77
|
+
if (this.options.modalities !== void 0) config.modalities = this.options.modalities;
|
|
78
|
+
if (this.options.toolChoice !== void 0) config.tool_choice = this.options.toolChoice;
|
|
55
79
|
if (this.tools?.length) {
|
|
56
80
|
config.tools = this.tools.map((t) => ({
|
|
57
81
|
type: "function",
|
|
@@ -92,19 +116,45 @@ var OpenAIRealtimeAdapter = class {
|
|
|
92
116
|
ws.on("message", onSetupMessage);
|
|
93
117
|
ws.on("error", onSetupError);
|
|
94
118
|
});
|
|
119
|
+
this.heartbeat = setInterval(() => {
|
|
120
|
+
try {
|
|
121
|
+
this.ws?.ping();
|
|
122
|
+
} catch {
|
|
123
|
+
}
|
|
124
|
+
}, 2e4);
|
|
125
|
+
this.ensureMessageListener();
|
|
95
126
|
}
|
|
96
127
|
sendAudio(mulawAudio) {
|
|
97
128
|
if (!this.ws || this.ws.readyState !== WebSocket.OPEN) return;
|
|
98
129
|
this.ws.send(JSON.stringify({ type: "input_audio_buffer.append", audio: mulawAudio.toString("base64") }));
|
|
99
130
|
}
|
|
131
|
+
/**
|
|
132
|
+
* Register a listener for parsed realtime events.
|
|
133
|
+
*
|
|
134
|
+
* Previously every call attached a new ``ws.on('message')`` handler,
|
|
135
|
+
* which leaked listeners across retries and multi-consumer hooks. We now
|
|
136
|
+
* route all traffic through a single persistent handler that fans out to
|
|
137
|
+
* a Set of callbacks. Use {@link offEvent} to remove one.
|
|
138
|
+
*/
|
|
100
139
|
onEvent(callback) {
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
140
|
+
this.eventCallbacks.add(callback);
|
|
141
|
+
this.ensureMessageListener();
|
|
142
|
+
}
|
|
143
|
+
offEvent(callback) {
|
|
144
|
+
this.eventCallbacks.delete(callback);
|
|
145
|
+
}
|
|
146
|
+
ensureMessageListener() {
|
|
147
|
+
if (this.messageListenerAttached || !this.ws) return;
|
|
148
|
+
this.messageListenerAttached = true;
|
|
149
|
+
const ws = this.ws;
|
|
150
|
+
const dispatch = (type, payload) => {
|
|
151
|
+
for (const cb of this.eventCallbacks) {
|
|
152
|
+
void Promise.resolve(cb(type, payload)).catch(
|
|
153
|
+
(err) => getLogger().error("onEvent callback error:", err)
|
|
154
|
+
);
|
|
155
|
+
}
|
|
106
156
|
};
|
|
107
|
-
|
|
157
|
+
ws.on("message", (raw) => {
|
|
108
158
|
let data;
|
|
109
159
|
try {
|
|
110
160
|
data = JSON.parse(raw.toString());
|
|
@@ -114,24 +164,61 @@ var OpenAIRealtimeAdapter = class {
|
|
|
114
164
|
}
|
|
115
165
|
const t = data.type;
|
|
116
166
|
if (t === "response.audio.delta") {
|
|
117
|
-
|
|
167
|
+
const buf = Buffer.from(data.delta ?? "", "base64");
|
|
168
|
+
this.currentResponseAudioMs += estimateAudioMs(buf, this.audioFormat);
|
|
169
|
+
dispatch("audio", buf);
|
|
118
170
|
} else if (t === "response.audio_transcript.delta") {
|
|
119
|
-
|
|
171
|
+
dispatch("transcript_output", data.delta);
|
|
172
|
+
} else if (t === "response.content_part.added" || t === "response.output_item.added") {
|
|
173
|
+
const itemId = data.item?.id ?? data.item_id ?? null;
|
|
174
|
+
if (itemId) {
|
|
175
|
+
this.currentResponseItemId = itemId;
|
|
176
|
+
this.currentResponseAudioMs = 0;
|
|
177
|
+
}
|
|
120
178
|
} else if (t === "input_audio_buffer.speech_started") {
|
|
121
|
-
|
|
179
|
+
dispatch("speech_started", null);
|
|
180
|
+
} else if (t === "input_audio_buffer.speech_stopped") {
|
|
181
|
+
dispatch("speech_stopped", null);
|
|
122
182
|
} else if (t === "conversation.item.input_audio_transcription.completed") {
|
|
123
|
-
|
|
183
|
+
dispatch("transcript_input", data.transcript);
|
|
124
184
|
} else if (t === "response.function_call_arguments.done") {
|
|
125
|
-
|
|
185
|
+
dispatch("function_call", { call_id: data.call_id, name: data.name, arguments: data.arguments });
|
|
126
186
|
} else if (t === "response.done") {
|
|
127
|
-
|
|
187
|
+
this.currentResponseItemId = null;
|
|
188
|
+
this.currentResponseAudioMs = 0;
|
|
189
|
+
dispatch("response_done", data.response ?? null);
|
|
128
190
|
} else if (t === "error") {
|
|
129
|
-
|
|
191
|
+
dispatch("error", data.error);
|
|
192
|
+
}
|
|
193
|
+
});
|
|
194
|
+
ws.on("close", (code, reason) => {
|
|
195
|
+
if (code !== 1e3) {
|
|
196
|
+
dispatch("error", {
|
|
197
|
+
type: "connection_closed",
|
|
198
|
+
code,
|
|
199
|
+
reason: reason?.toString() ?? ""
|
|
200
|
+
});
|
|
130
201
|
}
|
|
131
202
|
});
|
|
203
|
+
ws.on("error", (err) => {
|
|
204
|
+
dispatch("error", { type: "socket_error", message: err?.message ?? String(err) });
|
|
205
|
+
});
|
|
132
206
|
}
|
|
133
207
|
cancelResponse() {
|
|
134
|
-
this.ws
|
|
208
|
+
if (!this.ws) return;
|
|
209
|
+
if (this.currentResponseItemId) {
|
|
210
|
+
try {
|
|
211
|
+
this.ws.send(JSON.stringify({
|
|
212
|
+
type: "conversation.item.truncate",
|
|
213
|
+
item_id: this.currentResponseItemId,
|
|
214
|
+
content_index: 0,
|
|
215
|
+
audio_end_ms: this.currentResponseAudioMs
|
|
216
|
+
}));
|
|
217
|
+
} catch (err) {
|
|
218
|
+
getLogger().debug?.(`conversation.item.truncate failed: ${String(err)}`);
|
|
219
|
+
}
|
|
220
|
+
}
|
|
221
|
+
this.ws.send(JSON.stringify({ type: "response.cancel" }));
|
|
135
222
|
}
|
|
136
223
|
async sendText(text) {
|
|
137
224
|
this.ws?.send(JSON.stringify({
|
|
@@ -148,28 +235,148 @@ var OpenAIRealtimeAdapter = class {
|
|
|
148
235
|
this.ws?.send(JSON.stringify({ type: "response.create" }));
|
|
149
236
|
}
|
|
150
237
|
close() {
|
|
238
|
+
if (this.heartbeat) {
|
|
239
|
+
clearInterval(this.heartbeat);
|
|
240
|
+
this.heartbeat = null;
|
|
241
|
+
}
|
|
242
|
+
this.eventCallbacks.clear();
|
|
243
|
+
this.messageListenerAttached = false;
|
|
151
244
|
this.ws?.close();
|
|
152
245
|
this.ws = null;
|
|
153
246
|
}
|
|
154
247
|
};
|
|
248
|
+
function estimateAudioMs(chunk, format) {
|
|
249
|
+
if (chunk.length === 0) return 0;
|
|
250
|
+
if (format === "g711_ulaw" || format === "g711_alaw") return Math.floor(chunk.length / 8);
|
|
251
|
+
if (format === "pcm16") {
|
|
252
|
+
return Math.floor(chunk.length / 48);
|
|
253
|
+
}
|
|
254
|
+
return 0;
|
|
255
|
+
}
|
|
155
256
|
|
|
156
257
|
// src/providers/elevenlabs-convai.ts
|
|
157
258
|
import WebSocket2 from "ws";
|
|
158
259
|
var ELEVENLABS_CONVAI_URL = "wss://api.elevenlabs.io/v1/convai/conversation";
|
|
159
|
-
var
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
this.agentId = agentId;
|
|
163
|
-
this.voiceId = voiceId;
|
|
164
|
-
this.firstMessage = firstMessage;
|
|
165
|
-
}
|
|
260
|
+
var ELEVENLABS_SIGNED_URL = "https://api.elevenlabs.io/v1/convai/conversation/get-signed-url";
|
|
261
|
+
var AGENT_SILENCE_MS = 500;
|
|
262
|
+
var ElevenLabsConvAIAdapter = class _ElevenLabsConvAIAdapter {
|
|
166
263
|
ws = null;
|
|
167
264
|
eventCallback = null;
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
265
|
+
apiKey;
|
|
266
|
+
agentId;
|
|
267
|
+
voiceId;
|
|
268
|
+
// Exposed for parity with Python SDK (`self.model_id`). ConvAI does not
|
|
269
|
+
// accept a client-side model override today, but we preserve the value so
|
|
270
|
+
// callers can introspect it and we can ship the override the day the
|
|
271
|
+
// server exposes it.
|
|
272
|
+
modelId;
|
|
273
|
+
language;
|
|
274
|
+
firstMessage;
|
|
275
|
+
// Exposed publicly so the stream handler can detect μ-law negotiation
|
|
276
|
+
// (``"ulaw_8000"``) and skip resampling / transcoding on the audio path.
|
|
277
|
+
outputAudioFormat;
|
|
278
|
+
inputAudioFormat;
|
|
279
|
+
useSignedUrl;
|
|
280
|
+
// Populated from `conversation_initiation_metadata`.
|
|
281
|
+
conversationId = null;
|
|
282
|
+
agentOutputAudioFormat = null;
|
|
283
|
+
userInputAudioFormat = null;
|
|
284
|
+
agentSpeaking = false;
|
|
285
|
+
silenceTimer = null;
|
|
286
|
+
closePromise = null;
|
|
287
|
+
constructor(apiKeyOrOptions, agentId = "", voiceId = "EXAVITQu4vr4xnSDxMaL", firstMessage = "") {
|
|
288
|
+
if (typeof apiKeyOrOptions === "object") {
|
|
289
|
+
const o = apiKeyOrOptions;
|
|
290
|
+
this.apiKey = o.apiKey;
|
|
291
|
+
this.agentId = o.agentId ?? "";
|
|
292
|
+
this.voiceId = o.voiceId ?? "EXAVITQu4vr4xnSDxMaL";
|
|
293
|
+
this.modelId = o.modelId ?? "eleven_flash_v2_5";
|
|
294
|
+
this.language = o.language ?? "it";
|
|
295
|
+
this.firstMessage = o.firstMessage ?? "";
|
|
296
|
+
this.outputAudioFormat = o.outputAudioFormat;
|
|
297
|
+
this.inputAudioFormat = o.inputAudioFormat;
|
|
298
|
+
this.useSignedUrl = o.useSignedUrl ?? false;
|
|
299
|
+
} else {
|
|
300
|
+
this.apiKey = apiKeyOrOptions;
|
|
301
|
+
this.agentId = agentId;
|
|
302
|
+
this.voiceId = voiceId;
|
|
303
|
+
this.modelId = "eleven_flash_v2_5";
|
|
304
|
+
this.language = "it";
|
|
305
|
+
this.firstMessage = firstMessage;
|
|
306
|
+
this.outputAudioFormat = void 0;
|
|
307
|
+
this.inputAudioFormat = void 0;
|
|
308
|
+
this.useSignedUrl = false;
|
|
309
|
+
}
|
|
310
|
+
}
|
|
311
|
+
// ------------------------------------------------------------------
|
|
312
|
+
// Telephony factories
|
|
313
|
+
// ------------------------------------------------------------------
|
|
314
|
+
/**
|
|
315
|
+
* Build an adapter pre-configured for Twilio Media Streams.
|
|
316
|
+
*
|
|
317
|
+
* Negotiates `ulaw_8000` for both `outputAudioFormat` and
|
|
318
|
+
* `inputAudioFormat`, matching Twilio's μ-law @ 8 kHz wire format. The
|
|
319
|
+
* SDK's stream handler detects this and skips the 8 kHz → 16 kHz inbound
|
|
320
|
+
* resample and the 16 kHz → 8 kHz / PCM → μ-law outbound transcode.
|
|
321
|
+
* Saves ~30–80 ms first-byte plus per-frame CPU on every turn.
|
|
322
|
+
*/
|
|
323
|
+
static forTwilio(apiKey, agentId, options = {}) {
|
|
324
|
+
return new _ElevenLabsConvAIAdapter({
|
|
325
|
+
...options,
|
|
326
|
+
apiKey,
|
|
327
|
+
agentId,
|
|
328
|
+
outputAudioFormat: "ulaw_8000",
|
|
329
|
+
inputAudioFormat: "ulaw_8000"
|
|
330
|
+
});
|
|
331
|
+
}
|
|
332
|
+
/**
|
|
333
|
+
* Build an adapter pre-configured for Telnyx bidirectional media.
|
|
334
|
+
*
|
|
335
|
+
* Telnyx negotiates PCMU @ 8 kHz when `streaming_start` sets
|
|
336
|
+
* `stream_bidirectional_codec=PCMU` (the SDK default). Picking
|
|
337
|
+
* `ulaw_8000` on both ConvAI directions removes every transcode on the
|
|
338
|
+
* audio path — same optimization as `forTwilio`.
|
|
339
|
+
*/
|
|
340
|
+
static forTelnyx(apiKey, agentId, options = {}) {
|
|
341
|
+
return new _ElevenLabsConvAIAdapter({
|
|
342
|
+
...options,
|
|
343
|
+
apiKey,
|
|
344
|
+
agentId,
|
|
345
|
+
outputAudioFormat: "ulaw_8000",
|
|
346
|
+
inputAudioFormat: "ulaw_8000"
|
|
172
347
|
});
|
|
348
|
+
}
|
|
349
|
+
async fetchSignedUrl() {
|
|
350
|
+
if (!this.agentId) {
|
|
351
|
+
throw new Error("useSignedUrl=true requires agentId");
|
|
352
|
+
}
|
|
353
|
+
const url = `${ELEVENLABS_SIGNED_URL}?agent_id=${encodeURIComponent(this.agentId)}`;
|
|
354
|
+
const resp = await fetch(url, {
|
|
355
|
+
method: "GET",
|
|
356
|
+
headers: { "xi-api-key": this.apiKey },
|
|
357
|
+
signal: AbortSignal.timeout(15e3)
|
|
358
|
+
});
|
|
359
|
+
if (!resp.ok) {
|
|
360
|
+
const body = await resp.text();
|
|
361
|
+
throw new Error(`ElevenLabs signed-url error ${resp.status}: ${body}`);
|
|
362
|
+
}
|
|
363
|
+
const data = await resp.json();
|
|
364
|
+
if (!data.signed_url) {
|
|
365
|
+
throw new Error("ElevenLabs signed-url response missing 'signed_url'");
|
|
366
|
+
}
|
|
367
|
+
return data.signed_url;
|
|
368
|
+
}
|
|
369
|
+
async connect() {
|
|
370
|
+
let wsUrl;
|
|
371
|
+
let wsOptions;
|
|
372
|
+
if (this.useSignedUrl) {
|
|
373
|
+
wsUrl = await this.fetchSignedUrl();
|
|
374
|
+
wsOptions = void 0;
|
|
375
|
+
} else {
|
|
376
|
+
wsUrl = this.agentId ? `${ELEVENLABS_CONVAI_URL}?agent_id=${encodeURIComponent(this.agentId)}` : ELEVENLABS_CONVAI_URL;
|
|
377
|
+
wsOptions = { headers: { "xi-api-key": this.apiKey } };
|
|
378
|
+
}
|
|
379
|
+
this.ws = new WebSocket2(wsUrl, wsOptions);
|
|
173
380
|
await new Promise((resolve, reject) => {
|
|
174
381
|
const timeout = setTimeout(
|
|
175
382
|
() => reject(new Error("ElevenLabs ConvAI connect timeout")),
|
|
@@ -177,17 +384,22 @@ var ElevenLabsConvAIAdapter = class {
|
|
|
177
384
|
);
|
|
178
385
|
this.ws.once("open", () => {
|
|
179
386
|
clearTimeout(timeout);
|
|
387
|
+
const agentCfg = {};
|
|
388
|
+
if (this.firstMessage) agentCfg["first_message"] = this.firstMessage;
|
|
389
|
+
if (this.language) agentCfg["language"] = this.language;
|
|
390
|
+
const override = {
|
|
391
|
+
tts: this.outputAudioFormat ? { voice_id: this.voiceId, output_format: this.outputAudioFormat } : { voice_id: this.voiceId }
|
|
392
|
+
};
|
|
393
|
+
if (this.inputAudioFormat) {
|
|
394
|
+
override["asr"] = { input_format: this.inputAudioFormat };
|
|
395
|
+
}
|
|
396
|
+
if (Object.keys(agentCfg).length > 0) {
|
|
397
|
+
override["agent"] = agentCfg;
|
|
398
|
+
}
|
|
180
399
|
const config = {
|
|
181
400
|
type: "conversation_initiation_client_data",
|
|
182
|
-
conversation_config_override:
|
|
183
|
-
tts: { voice_id: this.voiceId }
|
|
184
|
-
}
|
|
401
|
+
conversation_config_override: override
|
|
185
402
|
};
|
|
186
|
-
if (this.firstMessage) {
|
|
187
|
-
config["conversation_config_override"]["agent"] = {
|
|
188
|
-
first_message: this.firstMessage
|
|
189
|
-
};
|
|
190
|
-
}
|
|
191
403
|
this.ws.send(JSON.stringify(config));
|
|
192
404
|
resolve();
|
|
193
405
|
});
|
|
@@ -196,54 +408,176 @@ var ElevenLabsConvAIAdapter = class {
|
|
|
196
408
|
reject(err);
|
|
197
409
|
});
|
|
198
410
|
});
|
|
411
|
+
this.ws.on("error", (err) => {
|
|
412
|
+
getLogger().error("ElevenLabs ConvAI WS error:", err);
|
|
413
|
+
this.safeInvoke("error", err instanceof Error ? err.message : String(err));
|
|
414
|
+
});
|
|
415
|
+
this.ws.on("close", (code, reason) => {
|
|
416
|
+
this.clearSilenceTimer();
|
|
417
|
+
this.safeInvoke("close", {
|
|
418
|
+
code,
|
|
419
|
+
reason: reason?.toString() ?? ""
|
|
420
|
+
});
|
|
421
|
+
});
|
|
199
422
|
this.ws.on("message", (raw) => {
|
|
200
|
-
const cb = this.eventCallback;
|
|
201
|
-
if (!cb) return;
|
|
202
|
-
const safeInvoke = (type, data) => {
|
|
203
|
-
void Promise.resolve(cb(type, data)).catch(
|
|
204
|
-
(err) => getLogger().error("onEvent callback error:", err)
|
|
205
|
-
);
|
|
206
|
-
};
|
|
207
423
|
let parsed;
|
|
208
424
|
try {
|
|
209
425
|
parsed = JSON.parse(raw.toString());
|
|
210
426
|
} catch {
|
|
211
427
|
return;
|
|
212
428
|
}
|
|
213
|
-
|
|
214
|
-
if (msgType === "audio") {
|
|
215
|
-
const audioB64 = parsed["audio"];
|
|
216
|
-
if (audioB64) {
|
|
217
|
-
safeInvoke("audio", Buffer.from(audioB64, "base64"));
|
|
218
|
-
}
|
|
219
|
-
} else if (msgType === "user_transcript") {
|
|
220
|
-
safeInvoke("transcript_input", parsed["text"] ?? "");
|
|
221
|
-
} else if (msgType === "agent_response") {
|
|
222
|
-
safeInvoke("transcript_output", parsed["text"] ?? "");
|
|
223
|
-
safeInvoke("response_done", null);
|
|
224
|
-
} else if (msgType === "interruption") {
|
|
225
|
-
safeInvoke("interruption", null);
|
|
226
|
-
} else if (msgType === "error") {
|
|
227
|
-
safeInvoke("error", parsed);
|
|
228
|
-
}
|
|
429
|
+
this.handleMessage(parsed);
|
|
229
430
|
});
|
|
230
431
|
}
|
|
432
|
+
safeInvoke(type, data) {
|
|
433
|
+
const cb = this.eventCallback;
|
|
434
|
+
if (!cb) return;
|
|
435
|
+
void Promise.resolve(cb(type, data)).catch(
|
|
436
|
+
(err) => getLogger().error("onEvent callback error:", err)
|
|
437
|
+
);
|
|
438
|
+
}
|
|
439
|
+
respondToPing(eventId, delayMs) {
|
|
440
|
+
const send = () => {
|
|
441
|
+
if (!this.ws || this.ws.readyState !== WebSocket2.OPEN) return;
|
|
442
|
+
try {
|
|
443
|
+
this.ws.send(JSON.stringify({ type: "pong", event_id: eventId }));
|
|
444
|
+
} catch (err) {
|
|
445
|
+
getLogger().warn("ElevenLabs ConvAI pong send failed:", err);
|
|
446
|
+
}
|
|
447
|
+
};
|
|
448
|
+
if (delayMs && delayMs > 0) {
|
|
449
|
+
setTimeout(send, delayMs);
|
|
450
|
+
} else {
|
|
451
|
+
send();
|
|
452
|
+
}
|
|
453
|
+
}
|
|
454
|
+
clearSilenceTimer() {
|
|
455
|
+
if (this.silenceTimer) {
|
|
456
|
+
clearTimeout(this.silenceTimer);
|
|
457
|
+
this.silenceTimer = null;
|
|
458
|
+
}
|
|
459
|
+
}
|
|
460
|
+
finalizeAgentTurn() {
|
|
461
|
+
this.clearSilenceTimer();
|
|
462
|
+
if (this.agentSpeaking) {
|
|
463
|
+
this.agentSpeaking = false;
|
|
464
|
+
this.safeInvoke("response_done", null);
|
|
465
|
+
}
|
|
466
|
+
}
|
|
467
|
+
scheduleSilenceDone() {
|
|
468
|
+
this.clearSilenceTimer();
|
|
469
|
+
this.silenceTimer = setTimeout(() => {
|
|
470
|
+
if (this.agentSpeaking) {
|
|
471
|
+
this.agentSpeaking = false;
|
|
472
|
+
this.safeInvoke("response_done", null);
|
|
473
|
+
}
|
|
474
|
+
}, AGENT_SILENCE_MS);
|
|
475
|
+
}
|
|
476
|
+
handleMessage(parsed) {
|
|
477
|
+
const msgType = parsed["type"];
|
|
478
|
+
if (msgType === "ping") {
|
|
479
|
+
const pingPayload = parsed["ping_event"] ?? parsed["ping"] ?? {};
|
|
480
|
+
const eventId = pingPayload["event_id"] ?? parsed["event_id"];
|
|
481
|
+
const pingMs = pingPayload["ping_ms"] ?? 0;
|
|
482
|
+
this.respondToPing(eventId, pingMs);
|
|
483
|
+
return;
|
|
484
|
+
}
|
|
485
|
+
if (msgType === "conversation_initiation_metadata") {
|
|
486
|
+
const meta = parsed["conversation_initiation_metadata_event"] ?? parsed;
|
|
487
|
+
this.conversationId = meta["conversation_id"] ?? this.conversationId;
|
|
488
|
+
this.agentOutputAudioFormat = meta["agent_output_audio_format"] ?? this.agentOutputAudioFormat;
|
|
489
|
+
this.userInputAudioFormat = meta["user_input_audio_format"] ?? this.userInputAudioFormat;
|
|
490
|
+
this.finalizeAgentTurn();
|
|
491
|
+
return;
|
|
492
|
+
}
|
|
493
|
+
if (msgType === "audio") {
|
|
494
|
+
const audioEvt = parsed["audio_event"];
|
|
495
|
+
let audioB64;
|
|
496
|
+
if (audioEvt) {
|
|
497
|
+
audioB64 = audioEvt["audio_base_64"] ?? audioEvt["audio"];
|
|
498
|
+
}
|
|
499
|
+
if (!audioB64) {
|
|
500
|
+
audioB64 = parsed["audio"];
|
|
501
|
+
}
|
|
502
|
+
if (audioB64) {
|
|
503
|
+
this.agentSpeaking = true;
|
|
504
|
+
this.safeInvoke("audio", Buffer.from(audioB64, "base64"));
|
|
505
|
+
this.scheduleSilenceDone();
|
|
506
|
+
}
|
|
507
|
+
return;
|
|
508
|
+
}
|
|
509
|
+
if (msgType === "user_transcript") {
|
|
510
|
+
const evt = parsed["user_transcription_event"] ?? parsed;
|
|
511
|
+
const text = evt["user_transcript"] ?? evt["text"] ?? "";
|
|
512
|
+
this.finalizeAgentTurn();
|
|
513
|
+
this.safeInvoke("transcript_input", text);
|
|
514
|
+
return;
|
|
515
|
+
}
|
|
516
|
+
if (msgType === "agent_response") {
|
|
517
|
+
const evt = parsed["agent_response_event"] ?? parsed;
|
|
518
|
+
const text = evt["agent_response"] ?? evt["text"] ?? "";
|
|
519
|
+
this.safeInvoke("transcript_output", text);
|
|
520
|
+
this.agentSpeaking = true;
|
|
521
|
+
this.safeInvoke("response_start", { text });
|
|
522
|
+
return;
|
|
523
|
+
}
|
|
524
|
+
if (msgType === "interruption") {
|
|
525
|
+
this.finalizeAgentTurn();
|
|
526
|
+
this.safeInvoke("interruption", null);
|
|
527
|
+
return;
|
|
528
|
+
}
|
|
529
|
+
if (msgType === "error") {
|
|
530
|
+
const errText = parsed["message"] ?? parsed["error"] ?? JSON.stringify(parsed);
|
|
531
|
+
getLogger().error("ElevenLabs ConvAI error:", errText);
|
|
532
|
+
this.safeInvoke("error", errText);
|
|
533
|
+
return;
|
|
534
|
+
}
|
|
535
|
+
}
|
|
231
536
|
sendAudio(audioBytes) {
|
|
232
537
|
if (!this.ws || this.ws.readyState !== WebSocket2.OPEN) return;
|
|
233
538
|
this.ws.send(
|
|
234
539
|
JSON.stringify({
|
|
235
|
-
|
|
236
|
-
audio: audioBytes.toString("base64")
|
|
540
|
+
user_audio_chunk: audioBytes.toString("base64")
|
|
237
541
|
})
|
|
238
542
|
);
|
|
239
543
|
}
|
|
240
544
|
onEvent(callback) {
|
|
241
545
|
this.eventCallback = callback;
|
|
242
546
|
}
|
|
243
|
-
close() {
|
|
244
|
-
this.
|
|
245
|
-
this.ws
|
|
246
|
-
|
|
547
|
+
async close() {
|
|
548
|
+
this.clearSilenceTimer();
|
|
549
|
+
if (!this.ws) {
|
|
550
|
+
this.eventCallback = null;
|
|
551
|
+
return;
|
|
552
|
+
}
|
|
553
|
+
if (this.closePromise) {
|
|
554
|
+
await this.closePromise;
|
|
555
|
+
return;
|
|
556
|
+
}
|
|
557
|
+
const ws = this.ws;
|
|
558
|
+
this.closePromise = new Promise((resolve) => {
|
|
559
|
+
if (ws.readyState === WebSocket2.CLOSED || ws.readyState === WebSocket2.CLOSING) {
|
|
560
|
+
resolve();
|
|
561
|
+
return;
|
|
562
|
+
}
|
|
563
|
+
const done = () => {
|
|
564
|
+
resolve();
|
|
565
|
+
};
|
|
566
|
+
ws.once("close", done);
|
|
567
|
+
ws.once("error", done);
|
|
568
|
+
try {
|
|
569
|
+
ws.close();
|
|
570
|
+
} catch {
|
|
571
|
+
resolve();
|
|
572
|
+
}
|
|
573
|
+
});
|
|
574
|
+
try {
|
|
575
|
+
await this.closePromise;
|
|
576
|
+
} finally {
|
|
577
|
+
this.ws = null;
|
|
578
|
+
this.eventCallback = null;
|
|
579
|
+
this.closePromise = null;
|
|
580
|
+
}
|
|
247
581
|
}
|
|
248
582
|
};
|
|
249
583
|
|
|
@@ -258,21 +592,57 @@ async function createTTS(agent) {
|
|
|
258
592
|
// src/pricing.ts
|
|
259
593
|
var DEFAULT_PRICING = {
|
|
260
594
|
// STT — per minute of audio processed
|
|
261
|
-
|
|
595
|
+
// Deepgram Nova-3 streaming (monolingual) — the default model Patter ships.
|
|
596
|
+
// The previous $0.0043/min was the batch rate; streaming is $0.0077/min per
|
|
597
|
+
// deepgram.com/pricing. For multilingual Nova-3 ($0.0092/min) override.
|
|
598
|
+
deepgram: { unit: "minute", price: 77e-4 },
|
|
262
599
|
whisper: { unit: "minute", price: 6e-3 },
|
|
263
|
-
//
|
|
264
|
-
|
|
600
|
+
// AssemblyAI Universal-Streaming — $0.15/hr = $0.0025/min
|
|
601
|
+
assemblyai: { unit: "minute", price: 25e-4 },
|
|
602
|
+
// Cartesia ink-whisper streaming STT — ~$0.15/hr on usage plans
|
|
603
|
+
cartesia_stt: { unit: "minute", price: 25e-4 },
|
|
604
|
+
// Soniox real-time STT — $0.12/hr = $0.002/min
|
|
605
|
+
soniox: { unit: "minute", price: 2e-3 },
|
|
606
|
+
// Speechmatics Pro tier — $0.24/hr = $0.0040/min (new users land here).
|
|
607
|
+
// Previous $0.0173 default reflected a legacy Standard tier that was
|
|
608
|
+
// retired; users were being over-billed ~4.3x.
|
|
609
|
+
speechmatics: { unit: "minute", price: 4e-3 },
|
|
610
|
+
// TTS — per 1,000 characters synthesized.
|
|
611
|
+
// ElevenLabs default model is eleven_flash_v2_5 billed at $0.06/1k via the
|
|
612
|
+
// direct API. The previous $0.18 matched only the Creator plan overage.
|
|
613
|
+
elevenlabs: { unit: "1k_chars", price: 0.06 },
|
|
265
614
|
openai_tts: { unit: "1k_chars", price: 0.015 },
|
|
266
|
-
|
|
615
|
+
openai_tts_hd: { unit: "1k_chars", price: 0.03 },
|
|
616
|
+
// Cartesia Sonic TTS — ~1 credit/char, effective $0.030/1k chars on usage plans
|
|
617
|
+
cartesia_tts: { unit: "1k_chars", price: 0.03 },
|
|
618
|
+
// Rime mist v2 — $0.030/1k chars pay-as-you-go
|
|
619
|
+
rime: { unit: "1k_chars", price: 0.03 },
|
|
620
|
+
// LMNT aurora/blizzard — $0.050/1k chars Indie overage
|
|
621
|
+
lmnt: { unit: "1k_chars", price: 0.05 },
|
|
622
|
+
// OpenAI Realtime — per token.
|
|
623
|
+
// Calibrated for gpt-4o-mini-realtime-preview (the Patter default):
|
|
624
|
+
// audio input $10 / M -> 0.00001 per token
|
|
625
|
+
// audio output $20 / M -> 0.00002 per token
|
|
626
|
+
// text input $0.60/ M -> 0.0000006 per token
|
|
627
|
+
// text output $2.40/ M -> 0.0000024 per token
|
|
628
|
+
// For gpt-4o-realtime-preview multiply by ~10, for gpt-realtime by ~3.
|
|
267
629
|
openai_realtime: {
|
|
268
630
|
unit: "token",
|
|
269
|
-
audio_input_per_token: 1e-
|
|
270
|
-
audio_output_per_token:
|
|
271
|
-
text_input_per_token:
|
|
272
|
-
text_output_per_token:
|
|
631
|
+
audio_input_per_token: 1e-5,
|
|
632
|
+
audio_output_per_token: 2e-5,
|
|
633
|
+
text_input_per_token: 6e-7,
|
|
634
|
+
text_output_per_token: 24e-7,
|
|
635
|
+
// Prompt caching rates (official): audio cached $0.30/M ~= 3% of full,
|
|
636
|
+
// text cached $0.06/M = 10% of full. OpenAI bills the cached portion of
|
|
637
|
+
// input_token_details.audio_tokens / text_tokens at these reduced rates.
|
|
638
|
+
cached_audio_input_per_token: 3e-7,
|
|
639
|
+
cached_text_input_per_token: 6e-8
|
|
273
640
|
},
|
|
274
|
-
// Telephony — per minute of call duration
|
|
275
|
-
twilio
|
|
641
|
+
// Telephony — per minute of call duration.
|
|
642
|
+
// twilio default = US inbound local (the 99% case for voice agents receiving
|
|
643
|
+
// calls on a local number). For US toll-free inbound ($0.022/min) or US
|
|
644
|
+
// outbound local ($0.0140/min), override via Patter({ pricing: { twilio: {...} } }).
|
|
645
|
+
twilio: { unit: "minute", price: 85e-4 },
|
|
276
646
|
telnyx: { unit: "minute", price: 7e-3 }
|
|
277
647
|
};
|
|
278
648
|
function mergePricing(overrides) {
|
|
@@ -281,22 +651,22 @@ function mergePricing(overrides) {
|
|
|
281
651
|
merged[k] = { ...v };
|
|
282
652
|
}
|
|
283
653
|
if (!overrides) return merged;
|
|
284
|
-
for (const [
|
|
285
|
-
if (merged[
|
|
286
|
-
merged[
|
|
654
|
+
for (const [provider2, values] of Object.entries(overrides)) {
|
|
655
|
+
if (merged[provider2]) {
|
|
656
|
+
merged[provider2] = { ...merged[provider2], ...values };
|
|
287
657
|
} else {
|
|
288
|
-
merged[
|
|
658
|
+
merged[provider2] = { ...values };
|
|
289
659
|
}
|
|
290
660
|
}
|
|
291
661
|
return merged;
|
|
292
662
|
}
|
|
293
|
-
function calculateSttCost(
|
|
294
|
-
const config = pricing[
|
|
663
|
+
function calculateSttCost(provider2, audioSeconds, pricing) {
|
|
664
|
+
const config = pricing[provider2];
|
|
295
665
|
if (!config || config.unit !== "minute") return 0;
|
|
296
666
|
return audioSeconds / 60 * (config.price ?? 0);
|
|
297
667
|
}
|
|
298
|
-
function calculateTtsCost(
|
|
299
|
-
const config = pricing[
|
|
668
|
+
function calculateTtsCost(provider2, characterCount, pricing) {
|
|
669
|
+
const config = pricing[provider2];
|
|
300
670
|
if (!config || config.unit !== "1k_chars") return 0;
|
|
301
671
|
return characterCount / 1e3 * (config.price ?? 0);
|
|
302
672
|
}
|
|
@@ -305,21 +675,126 @@ function calculateRealtimeCost(usage, pricing) {
|
|
|
305
675
|
if (!config || config.unit !== "token") return 0;
|
|
306
676
|
const input = usage.input_token_details ?? {};
|
|
307
677
|
const output = usage.output_token_details ?? {};
|
|
678
|
+
const cachedAudioRate = config.cached_audio_input_per_token ?? config.audio_input_per_token ?? 0;
|
|
679
|
+
const cachedTextRate = config.cached_text_input_per_token ?? config.text_input_per_token ?? 0;
|
|
680
|
+
const totalAudioIn = input.audio_tokens ?? 0;
|
|
681
|
+
const totalTextIn = input.text_tokens ?? 0;
|
|
682
|
+
let cachedAudioIn;
|
|
683
|
+
let cachedTextIn;
|
|
684
|
+
const details = input.cached_tokens_details;
|
|
685
|
+
if (details && (details.audio_tokens !== void 0 || details.text_tokens !== void 0)) {
|
|
686
|
+
cachedAudioIn = Math.min(details.audio_tokens ?? 0, totalAudioIn);
|
|
687
|
+
cachedTextIn = Math.min(details.text_tokens ?? 0, totalTextIn);
|
|
688
|
+
} else if (input.cached_tokens && input.cached_tokens > 0) {
|
|
689
|
+
const totalIn = totalAudioIn + totalTextIn;
|
|
690
|
+
const ratio = totalIn > 0 ? input.cached_tokens / totalIn : 0;
|
|
691
|
+
cachedAudioIn = Math.min(Math.round(totalAudioIn * ratio), totalAudioIn);
|
|
692
|
+
cachedTextIn = Math.min(Math.round(totalTextIn * ratio), totalTextIn);
|
|
693
|
+
} else {
|
|
694
|
+
cachedAudioIn = 0;
|
|
695
|
+
cachedTextIn = 0;
|
|
696
|
+
}
|
|
308
697
|
let cost = 0;
|
|
309
|
-
cost += (
|
|
310
|
-
cost +=
|
|
698
|
+
cost += (totalAudioIn - cachedAudioIn) * (config.audio_input_per_token ?? 0);
|
|
699
|
+
cost += cachedAudioIn * cachedAudioRate;
|
|
700
|
+
cost += (totalTextIn - cachedTextIn) * (config.text_input_per_token ?? 0);
|
|
701
|
+
cost += cachedTextIn * cachedTextRate;
|
|
311
702
|
cost += (output.audio_tokens ?? 0) * (config.audio_output_per_token ?? 0);
|
|
312
703
|
cost += (output.text_tokens ?? 0) * (config.text_output_per_token ?? 0);
|
|
313
|
-
return cost;
|
|
704
|
+
return Math.max(0, cost);
|
|
705
|
+
}
|
|
706
|
+
function calculateRealtimeCachedSavings(usage, pricing) {
|
|
707
|
+
const config = pricing.openai_realtime;
|
|
708
|
+
if (!config || config.unit !== "token") return 0;
|
|
709
|
+
const input = usage.input_token_details ?? {};
|
|
710
|
+
const cached = input.cached_tokens_details ?? {};
|
|
711
|
+
const cachedAudioRate = config.cached_audio_input_per_token ?? config.audio_input_per_token ?? 0;
|
|
712
|
+
const cachedTextRate = config.cached_text_input_per_token ?? config.text_input_per_token ?? 0;
|
|
713
|
+
const cachedAudio = Math.min(cached.audio_tokens ?? 0, input.audio_tokens ?? 0);
|
|
714
|
+
const cachedText = Math.min(cached.text_tokens ?? 0, input.text_tokens ?? 0);
|
|
715
|
+
const fullAudio = cachedAudio * (config.audio_input_per_token ?? 0);
|
|
716
|
+
const fullText = cachedText * (config.text_input_per_token ?? 0);
|
|
717
|
+
const discountedAudio = cachedAudio * cachedAudioRate;
|
|
718
|
+
const discountedText = cachedText * cachedTextRate;
|
|
719
|
+
return Math.max(0, fullAudio + fullText - (discountedAudio + discountedText));
|
|
720
|
+
}
|
|
721
|
+
var llmPricing = {
|
|
722
|
+
anthropic: {
|
|
723
|
+
"claude-opus-4-7": {
|
|
724
|
+
input: 15,
|
|
725
|
+
output: 75,
|
|
726
|
+
cache_read: 1.5,
|
|
727
|
+
cache_write: 18.75
|
|
728
|
+
},
|
|
729
|
+
"claude-sonnet-4-6": {
|
|
730
|
+
input: 3,
|
|
731
|
+
output: 15,
|
|
732
|
+
cache_read: 0.3,
|
|
733
|
+
cache_write: 3.75
|
|
734
|
+
},
|
|
735
|
+
"claude-haiku-4-5": {
|
|
736
|
+
input: 1,
|
|
737
|
+
output: 5,
|
|
738
|
+
cache_read: 0.1,
|
|
739
|
+
cache_write: 1.25
|
|
740
|
+
}
|
|
741
|
+
},
|
|
742
|
+
google: {
|
|
743
|
+
"gemini-2.5-pro": { input: 1.25, output: 10 },
|
|
744
|
+
"gemini-2.5-flash": { input: 0.3, output: 2.5 },
|
|
745
|
+
"gemini-live-2.5-flash-native-audio": { input: 0.3, output: 2.5 }
|
|
746
|
+
},
|
|
747
|
+
groq: {
|
|
748
|
+
"llama-3.3-70b-versatile": { input: 0.59, output: 0.79 },
|
|
749
|
+
"llama-3.1-8b-instant": { input: 0.05, output: 0.08 }
|
|
750
|
+
},
|
|
751
|
+
cerebras: {
|
|
752
|
+
"llama-3.3-70b": { input: 0.85, output: 1.2 },
|
|
753
|
+
"qwen-3-32b": { input: 0.4, output: 0.8 }
|
|
754
|
+
},
|
|
755
|
+
// OpenAI Chat Completions (non-Realtime) — mirrors sdk-py pricing table.
|
|
756
|
+
// Rates are per 1M tokens (USD), cache_read = cached input rate.
|
|
757
|
+
openai: {
|
|
758
|
+
"gpt-4o": { input: 2.5, output: 10, cache_read: 1.25 },
|
|
759
|
+
"gpt-4o-mini": { input: 0.15, output: 0.6, cache_read: 0.075 },
|
|
760
|
+
"gpt-4.1": { input: 3, output: 12, cache_read: 0.75 },
|
|
761
|
+
"gpt-4.1-mini": { input: 0.8, output: 3.2, cache_read: 0.2 },
|
|
762
|
+
"o3": { input: 2, output: 8, cache_read: 0.5 },
|
|
763
|
+
"o4-mini": { input: 1.1, output: 4.4, cache_read: 0.275 }
|
|
764
|
+
}
|
|
765
|
+
};
|
|
766
|
+
function calculateLlmCost(provider2, model, inputTokens, outputTokens, cacheReadTokens = 0, cacheWriteTokens = 0) {
|
|
767
|
+
const providerTable = llmPricing[provider2];
|
|
768
|
+
if (!providerTable) return 0;
|
|
769
|
+
let rates = providerTable[model];
|
|
770
|
+
if (!rates) {
|
|
771
|
+
let bestKey = "";
|
|
772
|
+
for (const key of Object.keys(providerTable)) {
|
|
773
|
+
if (model.startsWith(key) && key.length > bestKey.length) {
|
|
774
|
+
bestKey = key;
|
|
775
|
+
}
|
|
776
|
+
}
|
|
777
|
+
if (bestKey) rates = providerTable[bestKey];
|
|
778
|
+
}
|
|
779
|
+
if (!rates) return 0;
|
|
780
|
+
let cost = 0;
|
|
781
|
+
cost += inputTokens / 1e6 * (rates.input ?? 0);
|
|
782
|
+
cost += outputTokens / 1e6 * (rates.output ?? 0);
|
|
783
|
+
cost += cacheReadTokens / 1e6 * (rates.cache_read ?? 0);
|
|
784
|
+
cost += cacheWriteTokens / 1e6 * (rates.cache_write ?? 0);
|
|
785
|
+
return Math.max(0, cost);
|
|
314
786
|
}
|
|
315
|
-
function calculateTelephonyCost(
|
|
316
|
-
const config = pricing[
|
|
787
|
+
function calculateTelephonyCost(provider2, durationSeconds, pricing) {
|
|
788
|
+
const config = pricing[provider2];
|
|
317
789
|
if (!config || config.unit !== "minute") return 0;
|
|
318
|
-
|
|
790
|
+
const minutes = provider2 === "twilio" ? Math.ceil(durationSeconds / 60) : durationSeconds / 60;
|
|
791
|
+
return minutes * (config.price ?? 0);
|
|
319
792
|
}
|
|
320
793
|
|
|
321
794
|
// src/dashboard/store.ts
|
|
322
795
|
import { EventEmitter } from "events";
|
|
796
|
+
import * as fs from "fs";
|
|
797
|
+
import * as path from "path";
|
|
323
798
|
var MetricsStore = class extends EventEmitter {
|
|
324
799
|
maxCalls;
|
|
325
800
|
calls = [];
|
|
@@ -482,6 +957,10 @@ var MetricsStore = class extends EventEmitter {
|
|
|
482
957
|
}
|
|
483
958
|
return null;
|
|
484
959
|
}
|
|
960
|
+
/** Look up an active call by id (returns undefined if not active or unknown). */
|
|
961
|
+
getActive(callId) {
|
|
962
|
+
return this.activeCalls.get(callId);
|
|
963
|
+
}
|
|
485
964
|
getActiveCalls() {
|
|
486
965
|
return Array.from(this.activeCalls.values());
|
|
487
966
|
}
|
|
@@ -547,7 +1026,102 @@ var MetricsStore = class extends EventEmitter {
|
|
|
547
1026
|
get callCount() {
|
|
548
1027
|
return this.calls.length;
|
|
549
1028
|
}
|
|
1029
|
+
/**
|
|
1030
|
+
* Rebuild the in-memory call list from `metadata.json` files written by
|
|
1031
|
+
* `CallLogger` under `<logRoot>/calls/YYYY/MM/DD/<call_id>/`. Idempotent:
|
|
1032
|
+
* call_ids already in the store are skipped. Errors per file are logged
|
|
1033
|
+
* and swallowed so a single corrupt entry doesn't block hydration.
|
|
1034
|
+
*
|
|
1035
|
+
* Returns the number of calls newly added to the store.
|
|
1036
|
+
*
|
|
1037
|
+
* Safe to call before any traffic; intended to run once at server startup.
|
|
1038
|
+
*/
|
|
1039
|
+
hydrate(logRoot) {
|
|
1040
|
+
if (!logRoot) return 0;
|
|
1041
|
+
const callsRoot = path.join(logRoot, "calls");
|
|
1042
|
+
if (!fs.existsSync(callsRoot)) return 0;
|
|
1043
|
+
const collected = [];
|
|
1044
|
+
const seen = new Set(this.calls.map((c) => c.call_id));
|
|
1045
|
+
const walk = (dir, depth) => {
|
|
1046
|
+
let entries;
|
|
1047
|
+
try {
|
|
1048
|
+
entries = fs.readdirSync(dir, { withFileTypes: true });
|
|
1049
|
+
} catch {
|
|
1050
|
+
return;
|
|
1051
|
+
}
|
|
1052
|
+
for (const entry of entries) {
|
|
1053
|
+
const childPath = path.join(dir, entry.name);
|
|
1054
|
+
if (depth < 3) {
|
|
1055
|
+
if (entry.isDirectory() && /^\d+$/.test(entry.name)) {
|
|
1056
|
+
walk(childPath, depth + 1);
|
|
1057
|
+
}
|
|
1058
|
+
continue;
|
|
1059
|
+
}
|
|
1060
|
+
if (!entry.isDirectory()) continue;
|
|
1061
|
+
const metadataPath = path.join(childPath, "metadata.json");
|
|
1062
|
+
if (!fs.existsSync(metadataPath)) continue;
|
|
1063
|
+
try {
|
|
1064
|
+
const raw = fs.readFileSync(metadataPath, "utf8");
|
|
1065
|
+
const meta = JSON.parse(raw);
|
|
1066
|
+
const callId = meta.call_id || entry.name;
|
|
1067
|
+
if (!callId || seen.has(callId)) continue;
|
|
1068
|
+
const record = metadataToCallRecord(callId, meta);
|
|
1069
|
+
if (record === null) {
|
|
1070
|
+
getLogger().debug(
|
|
1071
|
+
`MetricsStore.hydrate: skipping ${metadataPath}: unparseable started_at`
|
|
1072
|
+
);
|
|
1073
|
+
continue;
|
|
1074
|
+
}
|
|
1075
|
+
collected.push(record);
|
|
1076
|
+
seen.add(callId);
|
|
1077
|
+
} catch (err) {
|
|
1078
|
+
getLogger().debug(
|
|
1079
|
+
`MetricsStore.hydrate: skipping ${metadataPath}: ${String(err)}`
|
|
1080
|
+
);
|
|
1081
|
+
}
|
|
1082
|
+
}
|
|
1083
|
+
};
|
|
1084
|
+
walk(callsRoot, 0);
|
|
1085
|
+
collected.sort((a, b) => (a.started_at || 0) - (b.started_at || 0));
|
|
1086
|
+
for (const rec of collected) {
|
|
1087
|
+
if (this.calls.some((c) => c.call_id === rec.call_id)) continue;
|
|
1088
|
+
this.calls.push(rec);
|
|
1089
|
+
if (this.calls.length > this.maxCalls) {
|
|
1090
|
+
this.calls = this.calls.slice(-this.maxCalls);
|
|
1091
|
+
}
|
|
1092
|
+
}
|
|
1093
|
+
return collected.length;
|
|
1094
|
+
}
|
|
550
1095
|
};
|
|
1096
|
+
function metadataToCallRecord(callId, meta) {
|
|
1097
|
+
const startedAt = parseTimestamp(meta.started_at);
|
|
1098
|
+
if (startedAt === null) return null;
|
|
1099
|
+
const endedAt = parseTimestamp(meta.ended_at);
|
|
1100
|
+
const status = meta.status || "completed";
|
|
1101
|
+
const metrics = meta.metrics && typeof meta.metrics === "object" ? meta.metrics : null;
|
|
1102
|
+
const transcript = Array.isArray(meta.transcript) ? meta.transcript : [];
|
|
1103
|
+
return {
|
|
1104
|
+
call_id: callId,
|
|
1105
|
+
caller: meta.caller || "",
|
|
1106
|
+
callee: meta.callee || "",
|
|
1107
|
+
direction: meta.direction || "inbound",
|
|
1108
|
+
started_at: startedAt,
|
|
1109
|
+
ended_at: endedAt ?? void 0,
|
|
1110
|
+
status,
|
|
1111
|
+
metrics,
|
|
1112
|
+
transcript
|
|
1113
|
+
};
|
|
1114
|
+
}
|
|
1115
|
+
function parseTimestamp(raw) {
|
|
1116
|
+
if (typeof raw === "number") {
|
|
1117
|
+
return Number.isFinite(raw) ? raw : null;
|
|
1118
|
+
}
|
|
1119
|
+
if (typeof raw === "string") {
|
|
1120
|
+
const ms = Date.parse(raw);
|
|
1121
|
+
return Number.isFinite(ms) ? ms / 1e3 : null;
|
|
1122
|
+
}
|
|
1123
|
+
return null;
|
|
1124
|
+
}
|
|
551
1125
|
|
|
552
1126
|
// src/dashboard/auth.ts
|
|
553
1127
|
import crypto from "crypto";
|
|
@@ -1628,10 +2202,52 @@ function isWebSocketUrl(url) {
|
|
|
1628
2202
|
|
|
1629
2203
|
// src/providers/deepgram-stt.ts
|
|
1630
2204
|
import WebSocket3 from "ws";
|
|
2205
|
+
|
|
2206
|
+
// src/errors.ts
|
|
2207
|
+
var PatterError = class extends Error {
|
|
2208
|
+
constructor(message) {
|
|
2209
|
+
super(message);
|
|
2210
|
+
this.name = "PatterError";
|
|
2211
|
+
}
|
|
2212
|
+
};
|
|
2213
|
+
var PatterConnectionError = class extends PatterError {
|
|
2214
|
+
constructor(message) {
|
|
2215
|
+
super(message);
|
|
2216
|
+
this.name = "PatterConnectionError";
|
|
2217
|
+
}
|
|
2218
|
+
};
|
|
2219
|
+
var AuthenticationError = class extends PatterError {
|
|
2220
|
+
constructor(message) {
|
|
2221
|
+
super(message);
|
|
2222
|
+
this.name = "AuthenticationError";
|
|
2223
|
+
}
|
|
2224
|
+
};
|
|
2225
|
+
var ProvisionError = class extends PatterError {
|
|
2226
|
+
constructor(message) {
|
|
2227
|
+
super(message);
|
|
2228
|
+
this.name = "ProvisionError";
|
|
2229
|
+
}
|
|
2230
|
+
};
|
|
2231
|
+
var RateLimitError = class extends PatterConnectionError {
|
|
2232
|
+
constructor(message) {
|
|
2233
|
+
super(message);
|
|
2234
|
+
this.name = "RateLimitError";
|
|
2235
|
+
}
|
|
2236
|
+
};
|
|
2237
|
+
|
|
2238
|
+
// src/providers/deepgram-stt.ts
|
|
1631
2239
|
var DEEPGRAM_WS_URL = "wss://api.deepgram.com/v1/listen";
|
|
2240
|
+
var KEEPALIVE_INTERVAL_MS = 4e3;
|
|
2241
|
+
var FINALIZE_DRAIN_MS = 100;
|
|
2242
|
+
var CLOSE_LATENCY_BUDGET_MS = 500;
|
|
2243
|
+
var RECONNECT_CLOSE_CODES = /* @__PURE__ */ new Set([1006, 1011]);
|
|
1632
2244
|
var DeepgramSTT = class _DeepgramSTT {
|
|
1633
2245
|
ws = null;
|
|
1634
|
-
|
|
2246
|
+
transcriptCallbacks = /* @__PURE__ */ new Set();
|
|
2247
|
+
errorCallbacks = /* @__PURE__ */ new Set();
|
|
2248
|
+
keepaliveTimer = null;
|
|
2249
|
+
running = false;
|
|
2250
|
+
reconnectAttempted = false;
|
|
1635
2251
|
/** Request ID from Deepgram — used to query actual cost post-call. */
|
|
1636
2252
|
requestId = "";
|
|
1637
2253
|
apiKey;
|
|
@@ -1653,7 +2269,7 @@ var DeepgramSTT = class _DeepgramSTT {
|
|
|
1653
2269
|
this.sampleRate = sampleRate ?? opts.sampleRate ?? 16e3;
|
|
1654
2270
|
this.endpointingMs = opts.endpointingMs ?? 150;
|
|
1655
2271
|
this.utteranceEndMs = opts.utteranceEndMs === null ? null : opts.utteranceEndMs ?? 1e3;
|
|
1656
|
-
this.smartFormat = opts.smartFormat ??
|
|
2272
|
+
this.smartFormat = opts.smartFormat ?? false;
|
|
1657
2273
|
this.interimResults = opts.interimResults ?? true;
|
|
1658
2274
|
this.vadEvents = opts.vadEvents ?? true;
|
|
1659
2275
|
}
|
|
@@ -1661,7 +2277,7 @@ var DeepgramSTT = class _DeepgramSTT {
|
|
|
1661
2277
|
static forTwilio(apiKey, language = "en", model = "nova-3", options = {}) {
|
|
1662
2278
|
return new _DeepgramSTT(apiKey, language, model, "mulaw", 8e3, options);
|
|
1663
2279
|
}
|
|
1664
|
-
|
|
2280
|
+
buildUrl() {
|
|
1665
2281
|
const params = new URLSearchParams({
|
|
1666
2282
|
model: this.model,
|
|
1667
2283
|
language: this.language,
|
|
@@ -1677,69 +2293,201 @@ var DeepgramSTT = class _DeepgramSTT {
|
|
|
1677
2293
|
if (this.utteranceEndMs !== null) {
|
|
1678
2294
|
params.set("utterance_end_ms", String(Math.max(this.utteranceEndMs, 1e3)));
|
|
1679
2295
|
}
|
|
1680
|
-
|
|
1681
|
-
|
|
2296
|
+
return `${DEEPGRAM_WS_URL}?${params.toString()}`;
|
|
2297
|
+
}
|
|
2298
|
+
async connect() {
|
|
2299
|
+
await this.openSocket();
|
|
2300
|
+
this.running = true;
|
|
2301
|
+
this.reconnectAttempted = false;
|
|
2302
|
+
}
|
|
2303
|
+
async openSocket() {
|
|
2304
|
+
const url = this.buildUrl();
|
|
2305
|
+
const ws = new WebSocket3(url, {
|
|
1682
2306
|
headers: { Authorization: `Token ${this.apiKey}` }
|
|
1683
2307
|
});
|
|
2308
|
+
this.ws = ws;
|
|
1684
2309
|
await new Promise((resolve, reject) => {
|
|
1685
|
-
|
|
1686
|
-
|
|
1687
|
-
|
|
1688
|
-
|
|
1689
|
-
});
|
|
1690
|
-
this.ws.once("error", (err) => {
|
|
2310
|
+
let settled = false;
|
|
2311
|
+
const settle = (fn) => {
|
|
2312
|
+
if (settled) return;
|
|
2313
|
+
settled = true;
|
|
1691
2314
|
clearTimeout(timer);
|
|
1692
|
-
|
|
2315
|
+
fn();
|
|
2316
|
+
};
|
|
2317
|
+
const timer = setTimeout(
|
|
2318
|
+
() => settle(() => reject(new PatterConnectionError("Deepgram connect timeout"))),
|
|
2319
|
+
1e4
|
|
2320
|
+
);
|
|
2321
|
+
ws.once("open", () => settle(resolve));
|
|
2322
|
+
ws.once("error", (err) => settle(() => reject(err)));
|
|
2323
|
+
ws.once("unexpected-response", (_req, res) => {
|
|
2324
|
+
const status = res?.statusCode ?? 0;
|
|
2325
|
+
settle(() => {
|
|
2326
|
+
if (status === 401 || status === 403) {
|
|
2327
|
+
reject(new AuthenticationError(`Deepgram rejected the API key (HTTP ${status}).`));
|
|
2328
|
+
return;
|
|
2329
|
+
}
|
|
2330
|
+
if (status === 429) {
|
|
2331
|
+
reject(new RateLimitError("Deepgram rate limit exceeded (HTTP 429)."));
|
|
2332
|
+
return;
|
|
2333
|
+
}
|
|
2334
|
+
reject(new PatterConnectionError(`Deepgram WebSocket upgrade failed (HTTP ${status}).`));
|
|
2335
|
+
});
|
|
1693
2336
|
});
|
|
1694
2337
|
});
|
|
1695
|
-
|
|
1696
|
-
|
|
1697
|
-
|
|
1698
|
-
|
|
1699
|
-
|
|
1700
|
-
|
|
1701
|
-
|
|
1702
|
-
|
|
1703
|
-
|
|
1704
|
-
return;
|
|
2338
|
+
ws.on("message", (raw) => this.handleMessage(raw.toString()));
|
|
2339
|
+
ws.on("close", (code, reason) => this.handleClose(code, reason.toString()));
|
|
2340
|
+
ws.on("error", (err) => this.handleError(err));
|
|
2341
|
+
this.keepaliveTimer = setInterval(() => {
|
|
2342
|
+
if (this.ws && this.ws.readyState === WebSocket3.OPEN) {
|
|
2343
|
+
try {
|
|
2344
|
+
this.ws.send(JSON.stringify({ type: "KeepAlive" }));
|
|
2345
|
+
} catch {
|
|
2346
|
+
}
|
|
1705
2347
|
}
|
|
1706
|
-
|
|
1707
|
-
|
|
1708
|
-
|
|
1709
|
-
|
|
1710
|
-
|
|
1711
|
-
|
|
1712
|
-
|
|
1713
|
-
|
|
1714
|
-
|
|
1715
|
-
|
|
1716
|
-
|
|
1717
|
-
|
|
2348
|
+
}, KEEPALIVE_INTERVAL_MS);
|
|
2349
|
+
}
|
|
2350
|
+
clearKeepalive() {
|
|
2351
|
+
if (this.keepaliveTimer) {
|
|
2352
|
+
clearInterval(this.keepaliveTimer);
|
|
2353
|
+
this.keepaliveTimer = null;
|
|
2354
|
+
}
|
|
2355
|
+
}
|
|
2356
|
+
handleMessage(raw) {
|
|
2357
|
+
let data;
|
|
2358
|
+
try {
|
|
2359
|
+
data = JSON.parse(raw);
|
|
2360
|
+
} catch {
|
|
2361
|
+
return;
|
|
2362
|
+
}
|
|
2363
|
+
if (data.type === "Metadata" && data.request_id) {
|
|
2364
|
+
this.requestId = data.request_id;
|
|
2365
|
+
return;
|
|
2366
|
+
}
|
|
2367
|
+
if (data.type === "SpeechStarted") {
|
|
2368
|
+
this.emitTranscript({
|
|
2369
|
+
text: "",
|
|
2370
|
+
isFinal: false,
|
|
2371
|
+
confidence: 0,
|
|
2372
|
+
eventType: "SpeechStarted",
|
|
2373
|
+
requestId: this.requestId || void 0
|
|
2374
|
+
});
|
|
2375
|
+
return;
|
|
2376
|
+
}
|
|
2377
|
+
if (data.type === "UtteranceEnd") {
|
|
2378
|
+
this.emitTranscript({
|
|
2379
|
+
text: "",
|
|
2380
|
+
isFinal: true,
|
|
2381
|
+
confidence: 0,
|
|
2382
|
+
eventType: "UtteranceEnd",
|
|
2383
|
+
requestId: this.requestId || void 0
|
|
2384
|
+
});
|
|
2385
|
+
return;
|
|
2386
|
+
}
|
|
2387
|
+
if (data.type !== "Results") return;
|
|
2388
|
+
const alternatives = data.channel?.alternatives ?? [];
|
|
2389
|
+
if (!alternatives.length) return;
|
|
2390
|
+
const best = alternatives[0];
|
|
2391
|
+
const text = (best.transcript ?? "").trim();
|
|
2392
|
+
if (!text) return;
|
|
2393
|
+
const speechFinal = Boolean(data.speech_final);
|
|
2394
|
+
const transcript = {
|
|
2395
|
+
text,
|
|
2396
|
+
isFinal: Boolean(data.is_final) || speechFinal,
|
|
2397
|
+
confidence: best.confidence ?? 0,
|
|
2398
|
+
speechFinal,
|
|
2399
|
+
fromFinalize: Boolean(data.from_finalize),
|
|
2400
|
+
requestId: this.requestId || void 0,
|
|
2401
|
+
words: best.words,
|
|
2402
|
+
eventType: "Results"
|
|
2403
|
+
};
|
|
2404
|
+
this.emitTranscript(transcript);
|
|
2405
|
+
}
|
|
2406
|
+
emitTranscript(transcript) {
|
|
2407
|
+
for (const cb of this.transcriptCallbacks) {
|
|
2408
|
+
try {
|
|
1718
2409
|
cb(transcript);
|
|
2410
|
+
} catch (err) {
|
|
2411
|
+
getLogger().error(`DeepgramSTT transcript callback threw: ${String(err)}`);
|
|
1719
2412
|
}
|
|
1720
|
-
}
|
|
2413
|
+
}
|
|
2414
|
+
}
|
|
2415
|
+
emitError(err) {
|
|
2416
|
+
for (const cb of this.errorCallbacks) {
|
|
2417
|
+
try {
|
|
2418
|
+
cb(err);
|
|
2419
|
+
} catch (cbErr) {
|
|
2420
|
+
getLogger().error(`DeepgramSTT error callback threw: ${String(cbErr)}`);
|
|
2421
|
+
}
|
|
2422
|
+
}
|
|
2423
|
+
}
|
|
2424
|
+
handleError(err) {
|
|
2425
|
+
getLogger().error(`DeepgramSTT WebSocket error: ${err.message}`);
|
|
2426
|
+
this.emitError(err);
|
|
2427
|
+
}
|
|
2428
|
+
handleClose(code, reason) {
|
|
2429
|
+
this.clearKeepalive();
|
|
2430
|
+
if (!this.running) {
|
|
2431
|
+
return;
|
|
2432
|
+
}
|
|
2433
|
+
const closeError = new PatterConnectionError(
|
|
2434
|
+
`Deepgram WebSocket closed (code=${code}${reason ? `, reason=${reason}` : ""}).`
|
|
2435
|
+
);
|
|
2436
|
+
this.emitError(closeError);
|
|
2437
|
+
if (RECONNECT_CLOSE_CODES.has(code) && !this.reconnectAttempted) {
|
|
2438
|
+
this.reconnectAttempted = true;
|
|
2439
|
+
this.openSocket().catch((err) => {
|
|
2440
|
+
this.running = false;
|
|
2441
|
+
this.emitError(err instanceof Error ? err : new Error(String(err)));
|
|
2442
|
+
});
|
|
2443
|
+
} else {
|
|
2444
|
+
this.running = false;
|
|
2445
|
+
}
|
|
1721
2446
|
}
|
|
1722
2447
|
sendAudio(audio) {
|
|
1723
2448
|
if (!this.ws || this.ws.readyState !== WebSocket3.OPEN) return;
|
|
2449
|
+
if (audio.length === 0) return;
|
|
1724
2450
|
this.ws.send(audio);
|
|
1725
2451
|
}
|
|
1726
2452
|
onTranscript(callback) {
|
|
1727
|
-
|
|
1728
|
-
|
|
1729
|
-
|
|
1730
|
-
|
|
1731
|
-
|
|
1732
|
-
|
|
2453
|
+
this.transcriptCallbacks.add(callback);
|
|
2454
|
+
}
|
|
2455
|
+
offTranscript(callback) {
|
|
2456
|
+
this.transcriptCallbacks.delete(callback);
|
|
2457
|
+
}
|
|
2458
|
+
onError(callback) {
|
|
2459
|
+
this.errorCallbacks.add(callback);
|
|
2460
|
+
}
|
|
2461
|
+
offError(callback) {
|
|
2462
|
+
this.errorCallbacks.delete(callback);
|
|
1733
2463
|
}
|
|
1734
2464
|
close() {
|
|
1735
|
-
|
|
1736
|
-
|
|
1737
|
-
|
|
1738
|
-
|
|
2465
|
+
this.running = false;
|
|
2466
|
+
this.clearKeepalive();
|
|
2467
|
+
const ws = this.ws;
|
|
2468
|
+
if (!ws) return;
|
|
2469
|
+
this.ws = null;
|
|
2470
|
+
const sendSafe = (payload) => {
|
|
2471
|
+
if (ws.readyState === WebSocket3.OPEN) {
|
|
2472
|
+
try {
|
|
2473
|
+
ws.send(payload);
|
|
2474
|
+
} catch {
|
|
2475
|
+
}
|
|
1739
2476
|
}
|
|
1740
|
-
|
|
1741
|
-
|
|
2477
|
+
};
|
|
2478
|
+
const finishClose = () => {
|
|
2479
|
+
sendSafe(JSON.stringify({ type: "CloseStream" }));
|
|
2480
|
+
try {
|
|
2481
|
+
ws.close();
|
|
2482
|
+
} catch {
|
|
2483
|
+
}
|
|
2484
|
+
};
|
|
2485
|
+
if (ws.readyState !== WebSocket3.OPEN) {
|
|
2486
|
+
finishClose();
|
|
2487
|
+
return;
|
|
1742
2488
|
}
|
|
2489
|
+
sendSafe(JSON.stringify({ type: "Finalize" }));
|
|
2490
|
+
setTimeout(finishClose, Math.min(FINALIZE_DRAIN_MS, CLOSE_LATENCY_BUDGET_MS));
|
|
1743
2491
|
}
|
|
1744
2492
|
};
|
|
1745
2493
|
|
|
@@ -1752,11 +2500,16 @@ function hrTimeMs() {
|
|
|
1752
2500
|
const [sec, ns] = process.hrtime();
|
|
1753
2501
|
return sec * 1e3 + ns / 1e6;
|
|
1754
2502
|
}
|
|
1755
|
-
function
|
|
2503
|
+
function percentile(values, p) {
|
|
1756
2504
|
if (values.length === 0) return 0;
|
|
1757
2505
|
const sorted = [...values].sort((a, b) => a - b);
|
|
1758
|
-
|
|
1759
|
-
|
|
2506
|
+
if (sorted.length === 1) return sorted[0];
|
|
2507
|
+
const rank = p * (sorted.length - 1);
|
|
2508
|
+
const lo = Math.floor(rank);
|
|
2509
|
+
const hi = Math.ceil(rank);
|
|
2510
|
+
if (lo === hi) return sorted[lo];
|
|
2511
|
+
const frac = rank - lo;
|
|
2512
|
+
return sorted[lo] + (sorted[hi] - sorted[lo]) * frac;
|
|
1760
2513
|
}
|
|
1761
2514
|
var CallMetricsAccumulator = class {
|
|
1762
2515
|
callId;
|
|
@@ -1771,19 +2524,54 @@ var CallMetricsAccumulator = class {
|
|
|
1771
2524
|
// Per-turn timing state
|
|
1772
2525
|
_turnStart = null;
|
|
1773
2526
|
_sttComplete = null;
|
|
2527
|
+
_llmFirstToken = null;
|
|
2528
|
+
_llmFirstSentenceComplete = null;
|
|
1774
2529
|
_llmComplete = null;
|
|
1775
2530
|
_ttsFirstByte = null;
|
|
2531
|
+
/** Last TTS audio byte sent (hrTimeMs). Stamped by ``recordTtsComplete`` /
|
|
2532
|
+
* ``recordTtsCompleteTs``. Used to compute ``tts_total_ms``. */
|
|
2533
|
+
_ttsLastByte = null;
|
|
2534
|
+
/** Endpoint signal (hrTimeMs) — VAD stop or STT speech_final, whichever
|
|
2535
|
+
* fires first. Used to compute ``endpoint_ms``. */
|
|
2536
|
+
_endpointSignalAt = null;
|
|
2537
|
+
/** Monotonic stamp of LLM dispatch (paired with ``_endpointSignalAt``). */
|
|
2538
|
+
_turnCommittedMono = null;
|
|
2539
|
+
/** Barge-in detected timestamp (hrTimeMs). */
|
|
2540
|
+
_bargeinDetectedAt = null;
|
|
2541
|
+
/** TTS-stopped timestamp after barge-in (hrTimeMs). */
|
|
2542
|
+
_bargeinStoppedAt = null;
|
|
1776
2543
|
_turnUserText = "";
|
|
1777
2544
|
_turnSttAudioSeconds = 0;
|
|
1778
2545
|
// Cumulative usage counters
|
|
1779
2546
|
_totalSttAudioSeconds = 0;
|
|
1780
2547
|
_totalTtsCharacters = 0;
|
|
1781
2548
|
_totalRealtimeCost = 0;
|
|
2549
|
+
_totalRealtimeCachedSavings = 0;
|
|
1782
2550
|
_sttByteCount = 0;
|
|
1783
2551
|
_sttSampleRate = 16e3;
|
|
1784
2552
|
_sttBytesPerSample = 2;
|
|
1785
2553
|
_actualTelephonyCost = null;
|
|
1786
2554
|
_actualSttCost = null;
|
|
2555
|
+
// Fix 10: accumulated LLM token cost for non-Realtime pipeline mode.
|
|
2556
|
+
_totalLlmCost = 0;
|
|
2557
|
+
// ---- EventBus integration (item 3) ----
|
|
2558
|
+
_eventBus;
|
|
2559
|
+
// ---- EOUMetrics — 4 timestamps (item 4) ----
|
|
2560
|
+
/** Timestamp (hrTimeMs) when VAD emitted speech_end. */
|
|
2561
|
+
_vadStoppedAt = null;
|
|
2562
|
+
/** Timestamp (hrTimeMs) when STT emitted its final transcript. */
|
|
2563
|
+
_sttFinalAt = null;
|
|
2564
|
+
/** Timestamp (hrTimeMs) when the transcript was committed to the LLM. */
|
|
2565
|
+
_turnCommittedAt = null;
|
|
2566
|
+
/** Delta (ms) from turn-committed to on_user_turn_completed hook done. */
|
|
2567
|
+
_onUserTurnCompletedDelayMs = null;
|
|
2568
|
+
// ---- InterruptionMetrics — simplified no-ML (item 5) ----
|
|
2569
|
+
_numInterruptions = 0;
|
|
2570
|
+
_numBackchannels = 0;
|
|
2571
|
+
_overlapStartedAt = null;
|
|
2572
|
+
// ---- report_only_initial_ttfb (item 6) ----
|
|
2573
|
+
_reportOnlyInitialTtfb;
|
|
2574
|
+
_initialTtfbEmitted = false;
|
|
1787
2575
|
constructor(opts) {
|
|
1788
2576
|
this.callId = opts.callId;
|
|
1789
2577
|
this.providerMode = opts.providerMode;
|
|
@@ -1793,6 +2581,15 @@ var CallMetricsAccumulator = class {
|
|
|
1793
2581
|
this.llmProvider = opts.llmProvider ?? "";
|
|
1794
2582
|
this._pricing = mergePricing(opts.pricing);
|
|
1795
2583
|
this._callStart = hrTimeMs();
|
|
2584
|
+
this._eventBus = opts.eventBus;
|
|
2585
|
+
this._reportOnlyInitialTtfb = opts.reportOnlyInitialTtfb ?? false;
|
|
2586
|
+
}
|
|
2587
|
+
/**
|
|
2588
|
+
* Attach (or replace) an EventBus after construction.
|
|
2589
|
+
* Useful when the bus is created after the accumulator (e.g. in tests).
|
|
2590
|
+
*/
|
|
2591
|
+
attachEventBus(bus) {
|
|
2592
|
+
this._eventBus = bus;
|
|
1796
2593
|
}
|
|
1797
2594
|
/** Configure audio format for STT byte-to-seconds conversion. */
|
|
1798
2595
|
configureSttFormat(sampleRate = 16e3, bytesPerSample = 2) {
|
|
@@ -1807,17 +2604,60 @@ var CallMetricsAccumulator = class {
|
|
|
1807
2604
|
startTurn() {
|
|
1808
2605
|
this._turnStart = hrTimeMs();
|
|
1809
2606
|
this._sttComplete = null;
|
|
2607
|
+
this._llmFirstToken = null;
|
|
2608
|
+
this._llmFirstSentenceComplete = null;
|
|
1810
2609
|
this._llmComplete = null;
|
|
1811
2610
|
this._ttsFirstByte = null;
|
|
2611
|
+
this._ttsLastByte = null;
|
|
2612
|
+
this._endpointSignalAt = null;
|
|
2613
|
+
this._turnCommittedMono = null;
|
|
2614
|
+
this._bargeinDetectedAt = null;
|
|
2615
|
+
this._bargeinStoppedAt = null;
|
|
1812
2616
|
this._turnUserText = "";
|
|
1813
2617
|
this._turnSttAudioSeconds = 0;
|
|
2618
|
+
this._vadStoppedAt = null;
|
|
2619
|
+
this._sttFinalAt = null;
|
|
2620
|
+
this._turnCommittedAt = null;
|
|
2621
|
+
this._onUserTurnCompletedDelayMs = null;
|
|
2622
|
+
this._eventBus?.emit("turn_started", { callId: this.callId });
|
|
2623
|
+
}
|
|
2624
|
+
/**
|
|
2625
|
+
* Start a new turn only if no turn is currently open.
|
|
2626
|
+
* Use this at inbound-audio ingestion points so the turn timer begins
|
|
2627
|
+
* on the first audio byte rather than just before recordSttComplete().
|
|
2628
|
+
*/
|
|
2629
|
+
startTurnIfIdle() {
|
|
2630
|
+
if (this._turnStart === null) {
|
|
2631
|
+
this.startTurn();
|
|
2632
|
+
}
|
|
1814
2633
|
}
|
|
1815
2634
|
recordSttComplete(text, audioSeconds = 0) {
|
|
1816
2635
|
this._sttComplete = hrTimeMs();
|
|
2636
|
+
this._sttFinalAt = this._sttComplete;
|
|
2637
|
+
if (this._endpointSignalAt === null) {
|
|
2638
|
+
this._endpointSignalAt = this._sttComplete;
|
|
2639
|
+
}
|
|
1817
2640
|
this._turnUserText = text;
|
|
1818
2641
|
this._turnSttAudioSeconds = audioSeconds;
|
|
1819
2642
|
this._totalSttAudioSeconds += audioSeconds;
|
|
1820
2643
|
}
|
|
2644
|
+
/** Record the timestamp of the first LLM token (TTFT). No-op after first call. */
|
|
2645
|
+
recordLlmFirstToken() {
|
|
2646
|
+
if (this._llmFirstToken === null) {
|
|
2647
|
+
this._llmFirstToken = hrTimeMs();
|
|
2648
|
+
}
|
|
2649
|
+
}
|
|
2650
|
+
/**
|
|
2651
|
+
* Record when the sentence chunker emits the first complete sentence.
|
|
2652
|
+
* Used as the TTS span start so tts_ms reflects true TTS-provider latency
|
|
2653
|
+
* rather than the gap from llm_complete (which fires after the full response).
|
|
2654
|
+
* No-op after first call.
|
|
2655
|
+
*/
|
|
2656
|
+
recordLlmFirstSentenceComplete() {
|
|
2657
|
+
if (this._llmFirstSentenceComplete === null) {
|
|
2658
|
+
this._llmFirstSentenceComplete = hrTimeMs();
|
|
2659
|
+
}
|
|
2660
|
+
}
|
|
1821
2661
|
recordLlmComplete() {
|
|
1822
2662
|
this._llmComplete = hrTimeMs();
|
|
1823
2663
|
}
|
|
@@ -1825,9 +2665,40 @@ var CallMetricsAccumulator = class {
|
|
|
1825
2665
|
if (this._ttsFirstByte === null) {
|
|
1826
2666
|
this._ttsFirstByte = hrTimeMs();
|
|
1827
2667
|
}
|
|
2668
|
+
if (this._reportOnlyInitialTtfb && this._initialTtfbEmitted) {
|
|
2669
|
+
return;
|
|
2670
|
+
}
|
|
2671
|
+
this._initialTtfbEmitted = true;
|
|
1828
2672
|
}
|
|
1829
2673
|
recordTtsComplete(text) {
|
|
1830
2674
|
this._totalTtsCharacters += text.length;
|
|
2675
|
+
if (this._ttsLastByte === null) {
|
|
2676
|
+
this._ttsLastByte = hrTimeMs();
|
|
2677
|
+
}
|
|
2678
|
+
}
|
|
2679
|
+
/**
|
|
2680
|
+
* Capture the timestamp when the last TTS audio byte was sent on the wire.
|
|
2681
|
+
* Useful when the caller wants to record the timing without bumping the
|
|
2682
|
+
* character counter (e.g. interrupted turns where audio actually went out
|
|
2683
|
+
* but synthesis was truncated).
|
|
2684
|
+
*/
|
|
2685
|
+
recordTtsCompleteTs(ts) {
|
|
2686
|
+
this._ttsLastByte = ts ?? hrTimeMs();
|
|
2687
|
+
}
|
|
2688
|
+
/**
|
|
2689
|
+
* Mark the moment a user interrupt (barge-in) was detected. Pairs with
|
|
2690
|
+
* ``recordTtsStopped`` to compute ``bargein_ms``.
|
|
2691
|
+
*/
|
|
2692
|
+
recordBargeinDetected(ts) {
|
|
2693
|
+
this._bargeinDetectedAt = ts ?? hrTimeMs();
|
|
2694
|
+
}
|
|
2695
|
+
/**
|
|
2696
|
+
* Mark the moment TTS playback was actually halted after a barge-in. Call
|
|
2697
|
+
* this *after* ``sendClear`` returns. Pairs with ``recordBargeinDetected``
|
|
2698
|
+
* to compute ``bargein_ms``.
|
|
2699
|
+
*/
|
|
2700
|
+
recordTtsStopped(ts) {
|
|
2701
|
+
this._bargeinStoppedAt = ts ?? hrTimeMs();
|
|
1831
2702
|
}
|
|
1832
2703
|
recordTurnComplete(agentText) {
|
|
1833
2704
|
const latency = this._computeTurnLatency();
|
|
@@ -1842,6 +2713,8 @@ var CallMetricsAccumulator = class {
|
|
|
1842
2713
|
};
|
|
1843
2714
|
this._turns.push(turn);
|
|
1844
2715
|
this._resetTurnState();
|
|
2716
|
+
this._eventBus?.emit("turn_ended", { callId: this.callId, turn });
|
|
2717
|
+
this._eventBus?.emit("metrics_collected", { callId: this.callId, turn });
|
|
1845
2718
|
return turn;
|
|
1846
2719
|
}
|
|
1847
2720
|
recordTurnInterrupted() {
|
|
@@ -1860,12 +2733,111 @@ var CallMetricsAccumulator = class {
|
|
|
1860
2733
|
this._resetTurnState();
|
|
1861
2734
|
return turn;
|
|
1862
2735
|
}
|
|
2736
|
+
// ---- EOU metrics (item 4) ----
|
|
2737
|
+
/**
|
|
2738
|
+
* Record the moment VAD emitted speech_end for the current utterance.
|
|
2739
|
+
* @param ts Optional override timestamp in hrTimeMs units (defaults to now).
|
|
2740
|
+
*/
|
|
2741
|
+
recordVadStop(ts) {
|
|
2742
|
+
this._vadStoppedAt = ts ?? hrTimeMs();
|
|
2743
|
+
if (this._endpointSignalAt === null) {
|
|
2744
|
+
this._endpointSignalAt = this._vadStoppedAt;
|
|
2745
|
+
}
|
|
2746
|
+
}
|
|
2747
|
+
/**
|
|
2748
|
+
* Record the moment the STT provider delivered its final transcript.
|
|
2749
|
+
* Aliased to the same instant as recordSttComplete() when called from
|
|
2750
|
+
* the standard pipeline; can be called independently for custom pipelines.
|
|
2751
|
+
* @param ts Optional override timestamp in hrTimeMs units.
|
|
2752
|
+
*/
|
|
2753
|
+
recordSttFinalTimestamp(ts) {
|
|
2754
|
+
this._sttFinalAt = ts ?? hrTimeMs();
|
|
2755
|
+
if (this._endpointSignalAt === null) {
|
|
2756
|
+
this._endpointSignalAt = this._sttFinalAt;
|
|
2757
|
+
}
|
|
2758
|
+
}
|
|
2759
|
+
/**
|
|
2760
|
+
* Record the moment the transcript was committed to the LLM (turn start).
|
|
2761
|
+
* After this call, ``emitEouMetrics()`` can produce a complete EOUMetrics payload.
|
|
2762
|
+
* @param ts Optional override timestamp in hrTimeMs units.
|
|
2763
|
+
*/
|
|
2764
|
+
recordTurnCommitted(ts) {
|
|
2765
|
+
this._turnCommittedAt = ts ?? hrTimeMs();
|
|
2766
|
+
this._turnCommittedMono = hrTimeMs();
|
|
2767
|
+
this.emitEouMetrics();
|
|
2768
|
+
}
|
|
2769
|
+
/**
|
|
2770
|
+
* Record the delta (ms) between turn-committed and when on_user_turn_completed
|
|
2771
|
+
* pipeline hook finished. Stored for inclusion in the next ``emitEouMetrics``
|
|
2772
|
+
* call (or an explicit re-emit if desired).
|
|
2773
|
+
*/
|
|
2774
|
+
recordOnUserTurnCompletedDelay(delayMs) {
|
|
2775
|
+
this._onUserTurnCompletedDelayMs = delayMs;
|
|
2776
|
+
}
|
|
2777
|
+
/**
|
|
2778
|
+
* Compute and emit EOUMetrics when all three prerequisite timestamps are
|
|
2779
|
+
* available (VAD stop, STT final, turn committed).
|
|
2780
|
+
*
|
|
2781
|
+
* ``endOfUtteranceDelay`` = sttFinal − vadStopped (ms)
|
|
2782
|
+
* ``transcriptionDelay`` = turnCommitted − vadStopped (ms)
|
|
2783
|
+
* ``onUserTurnCompletedDelay`` = caller-supplied delta (ms) or 0
|
|
2784
|
+
*/
|
|
2785
|
+
emitEouMetrics() {
|
|
2786
|
+
if (this._vadStoppedAt === null || this._sttFinalAt === null || this._turnCommittedAt === null) {
|
|
2787
|
+
return;
|
|
2788
|
+
}
|
|
2789
|
+
const payload = {
|
|
2790
|
+
timestamp: Date.now() / 1e3,
|
|
2791
|
+
endOfUtteranceDelay: Math.max(0, this._sttFinalAt - this._vadStoppedAt),
|
|
2792
|
+
transcriptionDelay: Math.max(0, this._turnCommittedAt - this._vadStoppedAt),
|
|
2793
|
+
onUserTurnCompletedDelay: this._onUserTurnCompletedDelayMs ?? 0
|
|
2794
|
+
};
|
|
2795
|
+
this._eventBus?.emit("eou_metrics", payload);
|
|
2796
|
+
}
|
|
2797
|
+
// ---- InterruptionMetrics (item 5) ----
|
|
2798
|
+
/**
|
|
2799
|
+
* Record that a caller utterance started overlapping with agent speech.
|
|
2800
|
+
* Call this when VAD detects speech_start during TTS playback.
|
|
2801
|
+
* @param ts Optional override timestamp in hrTimeMs units.
|
|
2802
|
+
*/
|
|
2803
|
+
recordOverlapStart(ts) {
|
|
2804
|
+
this._overlapStartedAt = ts ?? hrTimeMs();
|
|
2805
|
+
}
|
|
2806
|
+
/**
|
|
2807
|
+
* Record that the overlap ended. Emits ``InterruptionMetrics`` via the
|
|
2808
|
+
* event bus.
|
|
2809
|
+
*
|
|
2810
|
+
* @param wasInterruption true → barge-in (increments ``numInterruptions``),
|
|
2811
|
+
* false → backchannel (increments ``numBackchannels``).
|
|
2812
|
+
* @param ts Optional override timestamp in hrTimeMs units.
|
|
2813
|
+
*/
|
|
2814
|
+
recordOverlapEnd(wasInterruption, ts) {
|
|
2815
|
+
const now = ts ?? hrTimeMs();
|
|
2816
|
+
const detectionDelay = this._overlapStartedAt !== null ? Math.max(0, now - this._overlapStartedAt) : 0;
|
|
2817
|
+
this._overlapStartedAt = null;
|
|
2818
|
+
if (wasInterruption) {
|
|
2819
|
+
this._numInterruptions++;
|
|
2820
|
+
} else {
|
|
2821
|
+
this._numBackchannels++;
|
|
2822
|
+
}
|
|
2823
|
+
const payload = {
|
|
2824
|
+
timestamp: Date.now() / 1e3,
|
|
2825
|
+
// Simplified: totalDuration == detectionDelay (no ML prediction window)
|
|
2826
|
+
totalDuration: detectionDelay,
|
|
2827
|
+
predictionDuration: 0,
|
|
2828
|
+
detectionDelay,
|
|
2829
|
+
numInterruptions: this._numInterruptions,
|
|
2830
|
+
numBackchannels: this._numBackchannels
|
|
2831
|
+
};
|
|
2832
|
+
this._eventBus?.emit("interruption", payload);
|
|
2833
|
+
}
|
|
1863
2834
|
// ---- Usage tracking ----
|
|
1864
2835
|
addSttAudioBytes(byteCount) {
|
|
1865
2836
|
this._sttByteCount += byteCount;
|
|
1866
2837
|
}
|
|
1867
2838
|
recordRealtimeUsage(usage) {
|
|
1868
2839
|
this._totalRealtimeCost += calculateRealtimeCost(usage, this._pricing);
|
|
2840
|
+
this._totalRealtimeCachedSavings += calculateRealtimeCachedSavings(usage, this._pricing);
|
|
1869
2841
|
}
|
|
1870
2842
|
setActualTelephonyCost(cost) {
|
|
1871
2843
|
this._actualTelephonyCost = cost;
|
|
@@ -1873,28 +2845,62 @@ var CallMetricsAccumulator = class {
|
|
|
1873
2845
|
setActualSttCost(cost) {
|
|
1874
2846
|
this._actualSttCost = cost;
|
|
1875
2847
|
}
|
|
2848
|
+
/**
|
|
2849
|
+
* Accumulate LLM token cost for pipeline mode (non-Realtime).
|
|
2850
|
+
*
|
|
2851
|
+
* Called by LLMLoop.run() when a usage chunk arrives from the provider.
|
|
2852
|
+
* Mirrors Python's CallMetricsAccumulator.record_llm_usage().
|
|
2853
|
+
*
|
|
2854
|
+
* @param provider LLM provider key (e.g. 'openai', 'anthropic')
|
|
2855
|
+
* @param model Model name (e.g. 'gpt-4o-mini')
|
|
2856
|
+
* @param inputTokens Total input tokens (includes cached)
|
|
2857
|
+
* @param outputTokens Total output tokens
|
|
2858
|
+
* @param cacheReadTokens Cached input tokens (subtracted from input before billing full rate)
|
|
2859
|
+
* @param cacheWriteTokens Cache write tokens (billed at cache_write rate if present)
|
|
2860
|
+
*/
|
|
2861
|
+
recordLlmUsage(provider2, model, inputTokens, outputTokens, cacheReadTokens = 0, cacheWriteTokens = 0) {
|
|
2862
|
+
this._totalLlmCost += calculateLlmCost(
|
|
2863
|
+
provider2,
|
|
2864
|
+
model,
|
|
2865
|
+
inputTokens,
|
|
2866
|
+
outputTokens,
|
|
2867
|
+
cacheReadTokens,
|
|
2868
|
+
cacheWriteTokens
|
|
2869
|
+
);
|
|
2870
|
+
}
|
|
1876
2871
|
// ---- Finalize ----
|
|
1877
2872
|
endCall() {
|
|
1878
2873
|
const duration = (hrTimeMs() - this._callStart) / 1e3;
|
|
2874
|
+
if (this.turnActive) {
|
|
2875
|
+
this.recordTurnInterrupted();
|
|
2876
|
+
}
|
|
1879
2877
|
if (this._totalSttAudioSeconds === 0 && this._sttByteCount > 0) {
|
|
1880
2878
|
this._totalSttAudioSeconds = this._sttByteCount / (this._sttSampleRate * this._sttBytesPerSample);
|
|
1881
2879
|
}
|
|
1882
2880
|
const cost = this._computeCost(duration);
|
|
1883
2881
|
const latencyAvg = this._computeAverageLatency();
|
|
1884
|
-
const
|
|
1885
|
-
|
|
2882
|
+
const latencyP50 = this._computePercentileLatency(0.5);
|
|
2883
|
+
const latencyP90 = this._computePercentileLatency(0.9);
|
|
2884
|
+
const latencyP95 = this._computePercentileLatency(0.95);
|
|
2885
|
+
const latencyP99 = this._computePercentileLatency(0.99);
|
|
2886
|
+
const metrics = {
|
|
1886
2887
|
call_id: this.callId,
|
|
1887
2888
|
duration_seconds: round(duration, 2),
|
|
1888
2889
|
turns: [...this._turns],
|
|
1889
2890
|
cost,
|
|
1890
2891
|
latency_avg: latencyAvg,
|
|
2892
|
+
latency_p50: latencyP50,
|
|
2893
|
+
latency_p90: latencyP90,
|
|
1891
2894
|
latency_p95: latencyP95,
|
|
2895
|
+
latency_p99: latencyP99,
|
|
1892
2896
|
provider_mode: this.providerMode,
|
|
1893
2897
|
stt_provider: this.sttProvider,
|
|
1894
2898
|
tts_provider: this.ttsProvider,
|
|
1895
2899
|
llm_provider: this.llmProvider,
|
|
1896
2900
|
telephony_provider: this.telephonyProvider
|
|
1897
2901
|
};
|
|
2902
|
+
this._eventBus?.emit("call_ended", { callId: this.callId, metrics });
|
|
2903
|
+
return metrics;
|
|
1898
2904
|
}
|
|
1899
2905
|
getCostSoFar() {
|
|
1900
2906
|
const duration = (hrTimeMs() - this._callStart) / 1e3;
|
|
@@ -1904,36 +2910,68 @@ var CallMetricsAccumulator = class {
|
|
|
1904
2910
|
_resetTurnState() {
|
|
1905
2911
|
this._turnStart = null;
|
|
1906
2912
|
this._sttComplete = null;
|
|
2913
|
+
this._llmFirstToken = null;
|
|
2914
|
+
this._llmFirstSentenceComplete = null;
|
|
1907
2915
|
this._llmComplete = null;
|
|
1908
2916
|
this._ttsFirstByte = null;
|
|
2917
|
+
this._ttsLastByte = null;
|
|
2918
|
+
this._endpointSignalAt = null;
|
|
2919
|
+
this._turnCommittedMono = null;
|
|
2920
|
+
this._bargeinDetectedAt = null;
|
|
2921
|
+
this._bargeinStoppedAt = null;
|
|
1909
2922
|
this._turnUserText = "";
|
|
1910
2923
|
this._turnSttAudioSeconds = 0;
|
|
1911
2924
|
}
|
|
1912
2925
|
_computeTurnLatency() {
|
|
1913
2926
|
let stt_ms = 0;
|
|
1914
2927
|
let llm_ms = 0;
|
|
2928
|
+
let llm_ttft_ms;
|
|
2929
|
+
let llm_total_ms;
|
|
1915
2930
|
let tts_ms = 0;
|
|
1916
2931
|
let total_ms = 0;
|
|
2932
|
+
let endpoint_ms;
|
|
2933
|
+
let bargein_ms;
|
|
2934
|
+
let tts_total_ms;
|
|
1917
2935
|
if (this._turnStart !== null && this._sttComplete !== null) {
|
|
1918
2936
|
stt_ms = this._sttComplete - this._turnStart;
|
|
1919
2937
|
}
|
|
1920
|
-
if (this._sttComplete !== null && this.
|
|
2938
|
+
if (this._sttComplete !== null && this._llmFirstToken !== null) {
|
|
2939
|
+
llm_ttft_ms = Math.max(0, this._llmFirstToken - this._sttComplete);
|
|
2940
|
+
llm_ms = llm_ttft_ms;
|
|
2941
|
+
} else if (this._sttComplete !== null && this._llmComplete !== null) {
|
|
1921
2942
|
llm_ms = this._llmComplete - this._sttComplete;
|
|
1922
2943
|
}
|
|
1923
|
-
if (this.
|
|
1924
|
-
|
|
2944
|
+
if (this._sttComplete !== null && this._llmComplete !== null) {
|
|
2945
|
+
llm_total_ms = Math.max(0, this._llmComplete - this._sttComplete);
|
|
2946
|
+
}
|
|
2947
|
+
const ttsSpanStart = this._llmFirstSentenceComplete ?? this._llmComplete;
|
|
2948
|
+
if (ttsSpanStart !== null && this._ttsFirstByte !== null) {
|
|
2949
|
+
tts_ms = this._ttsFirstByte - ttsSpanStart;
|
|
2950
|
+
if (tts_ms < 0) tts_ms = 0;
|
|
1925
2951
|
}
|
|
1926
2952
|
if (this._turnStart !== null && this._ttsFirstByte !== null) {
|
|
1927
2953
|
total_ms = this._ttsFirstByte - this._turnStart;
|
|
1928
2954
|
}
|
|
1929
|
-
if (
|
|
1930
|
-
|
|
2955
|
+
if (this._endpointSignalAt !== null && this._turnCommittedMono !== null) {
|
|
2956
|
+
endpoint_ms = Math.max(0, this._turnCommittedMono - this._endpointSignalAt);
|
|
2957
|
+
}
|
|
2958
|
+
if (this._bargeinDetectedAt !== null && this._bargeinStoppedAt !== null) {
|
|
2959
|
+
bargein_ms = Math.max(0, this._bargeinStoppedAt - this._bargeinDetectedAt);
|
|
2960
|
+
}
|
|
2961
|
+
const ttsTotalRef = this._llmFirstToken ?? this._llmFirstSentenceComplete ?? this._llmComplete;
|
|
2962
|
+
if (ttsTotalRef !== null && this._ttsLastByte !== null) {
|
|
2963
|
+
tts_total_ms = Math.max(0, this._ttsLastByte - ttsTotalRef);
|
|
1931
2964
|
}
|
|
1932
2965
|
return {
|
|
1933
2966
|
stt_ms: round(stt_ms, 1),
|
|
1934
2967
|
llm_ms: round(llm_ms, 1),
|
|
2968
|
+
...llm_ttft_ms !== void 0 ? { llm_ttft_ms: round(llm_ttft_ms, 1) } : {},
|
|
2969
|
+
...llm_total_ms !== void 0 ? { llm_total_ms: round(llm_total_ms, 1) } : {},
|
|
1935
2970
|
tts_ms: round(tts_ms, 1),
|
|
1936
|
-
total_ms: round(total_ms, 1)
|
|
2971
|
+
total_ms: round(total_ms, 1),
|
|
2972
|
+
...endpoint_ms !== void 0 ? { endpoint_ms: round(endpoint_ms, 1) } : {},
|
|
2973
|
+
...bargein_ms !== void 0 ? { bargein_ms: round(bargein_ms, 1) } : {},
|
|
2974
|
+
...tts_total_ms !== void 0 ? { tts_total_ms: round(tts_total_ms, 1) } : {}
|
|
1937
2975
|
};
|
|
1938
2976
|
}
|
|
1939
2977
|
_computeCost(durationSeconds) {
|
|
@@ -1951,7 +2989,7 @@ var CallMetricsAccumulator = class {
|
|
|
1951
2989
|
} else {
|
|
1952
2990
|
stt = this._actualSttCost !== null ? this._actualSttCost : calculateSttCost(this.sttProvider, this._totalSttAudioSeconds, this._pricing);
|
|
1953
2991
|
tts = calculateTtsCost(this.ttsProvider, this._totalTtsCharacters, this._pricing);
|
|
1954
|
-
llm =
|
|
2992
|
+
llm = this._totalLlmCost;
|
|
1955
2993
|
}
|
|
1956
2994
|
const telephony = this._actualTelephonyCost !== null ? this._actualTelephonyCost : calculateTelephonyCost(this.telephonyProvider, durationSeconds, this._pricing);
|
|
1957
2995
|
const total = stt + tts + llm + telephony;
|
|
@@ -1960,30 +2998,78 @@ var CallMetricsAccumulator = class {
|
|
|
1960
2998
|
tts: round(tts, 6),
|
|
1961
2999
|
llm: round(llm, 6),
|
|
1962
3000
|
telephony: round(telephony, 6),
|
|
1963
|
-
total: round(total, 6)
|
|
3001
|
+
total: round(total, 6),
|
|
3002
|
+
// Always emit (default 0) for parity with Python dataclass where
|
|
3003
|
+
// llm_cached_savings is a required field with default 0.0.
|
|
3004
|
+
llm_cached_savings: round(Math.max(0, this._totalRealtimeCachedSavings), 6)
|
|
1964
3005
|
};
|
|
1965
3006
|
}
|
|
3007
|
+
/**
|
|
3008
|
+
* Turns eligible for latency statistics.
|
|
3009
|
+
*
|
|
3010
|
+
* Excludes turns marked ``[interrupted]`` (barge-in, cancelled replacements)
|
|
3011
|
+
* because their recorded latency either reflects partial state or zero —
|
|
3012
|
+
* including them would drag every p95/avg bucket toward meaningless numbers.
|
|
3013
|
+
*/
|
|
3014
|
+
_completedTurns() {
|
|
3015
|
+
return this._turns.filter(
|
|
3016
|
+
(t) => t.agent_text !== "[interrupted]" && t.latency.total_ms > 0
|
|
3017
|
+
);
|
|
3018
|
+
}
|
|
1966
3019
|
_computeAverageLatency() {
|
|
1967
|
-
|
|
3020
|
+
const turns = this._completedTurns();
|
|
3021
|
+
if (turns.length === 0) {
|
|
1968
3022
|
return { stt_ms: 0, llm_ms: 0, tts_ms: 0, total_ms: 0 };
|
|
1969
3023
|
}
|
|
1970
|
-
const n =
|
|
3024
|
+
const n = turns.length;
|
|
3025
|
+
const ttftValues = turns.map((t) => t.latency.llm_ttft_ms ?? 0).filter((v) => v > 0);
|
|
3026
|
+
const ttftAvg = ttftValues.length > 0 ? round(ttftValues.reduce((s, v) => s + v, 0) / ttftValues.length, 1) : void 0;
|
|
3027
|
+
const optAvg = (key) => {
|
|
3028
|
+
const vals = turns.map((t) => t.latency[key]).filter((v) => typeof v === "number" && v > 0);
|
|
3029
|
+
return vals.length > 0 ? round(vals.reduce((s, v) => s + v, 0) / vals.length, 1) : void 0;
|
|
3030
|
+
};
|
|
3031
|
+
const llmTotalAvg = optAvg("llm_total_ms");
|
|
3032
|
+
const endpointAvg = optAvg("endpoint_ms");
|
|
3033
|
+
const bargeinAvg = optAvg("bargein_ms");
|
|
3034
|
+
const ttsTotalAvg = optAvg("tts_total_ms");
|
|
1971
3035
|
return {
|
|
1972
|
-
stt_ms: round(
|
|
1973
|
-
llm_ms: round(
|
|
1974
|
-
|
|
1975
|
-
|
|
3036
|
+
stt_ms: round(turns.reduce((s, t) => s + t.latency.stt_ms, 0) / n, 1),
|
|
3037
|
+
llm_ms: round(turns.reduce((s, t) => s + t.latency.llm_ms, 0) / n, 1),
|
|
3038
|
+
...ttftAvg !== void 0 ? { llm_ttft_ms: ttftAvg } : {},
|
|
3039
|
+
...llmTotalAvg !== void 0 ? { llm_total_ms: llmTotalAvg } : {},
|
|
3040
|
+
tts_ms: round(turns.reduce((s, t) => s + t.latency.tts_ms, 0) / n, 1),
|
|
3041
|
+
total_ms: round(turns.reduce((s, t) => s + t.latency.total_ms, 0) / n, 1),
|
|
3042
|
+
...endpointAvg !== void 0 ? { endpoint_ms: endpointAvg } : {},
|
|
3043
|
+
...bargeinAvg !== void 0 ? { bargein_ms: bargeinAvg } : {},
|
|
3044
|
+
...ttsTotalAvg !== void 0 ? { tts_total_ms: ttsTotalAvg } : {}
|
|
1976
3045
|
};
|
|
1977
3046
|
}
|
|
1978
|
-
|
|
1979
|
-
|
|
3047
|
+
_computePercentileLatency(p) {
|
|
3048
|
+
const turns = this._completedTurns();
|
|
3049
|
+
if (turns.length === 0) {
|
|
1980
3050
|
return { stt_ms: 0, llm_ms: 0, tts_ms: 0, total_ms: 0 };
|
|
1981
3051
|
}
|
|
3052
|
+
const nonZero = (vals) => vals.filter((v) => v > 0);
|
|
3053
|
+
const ttftSamples = nonZero(turns.map((t) => t.latency.llm_ttft_ms ?? 0));
|
|
3054
|
+
const ttftP = ttftSamples.length > 0 ? round(percentile(ttftSamples, p), 1) : void 0;
|
|
3055
|
+
const optPct = (key) => {
|
|
3056
|
+
const vals = turns.map((t) => t.latency[key]).filter((v) => typeof v === "number" && v > 0);
|
|
3057
|
+
return vals.length > 0 ? round(percentile(vals, p), 1) : void 0;
|
|
3058
|
+
};
|
|
3059
|
+
const llmTotalP = optPct("llm_total_ms");
|
|
3060
|
+
const endpointP = optPct("endpoint_ms");
|
|
3061
|
+
const bargeinP = optPct("bargein_ms");
|
|
3062
|
+
const ttsTotalP = optPct("tts_total_ms");
|
|
1982
3063
|
return {
|
|
1983
|
-
stt_ms: round(
|
|
1984
|
-
llm_ms: round(
|
|
1985
|
-
|
|
1986
|
-
|
|
3064
|
+
stt_ms: round(percentile(nonZero(turns.map((t) => t.latency.stt_ms)), p), 1),
|
|
3065
|
+
llm_ms: round(percentile(nonZero(turns.map((t) => t.latency.llm_ms)), p), 1),
|
|
3066
|
+
...ttftP !== void 0 ? { llm_ttft_ms: ttftP } : {},
|
|
3067
|
+
...llmTotalP !== void 0 ? { llm_total_ms: llmTotalP } : {},
|
|
3068
|
+
tts_ms: round(percentile(nonZero(turns.map((t) => t.latency.tts_ms)), p), 1),
|
|
3069
|
+
total_ms: round(percentile(nonZero(turns.map((t) => t.latency.total_ms)), p), 1),
|
|
3070
|
+
...endpointP !== void 0 ? { endpoint_ms: endpointP } : {},
|
|
3071
|
+
...bargeinP !== void 0 ? { bargein_ms: bargeinP } : {},
|
|
3072
|
+
...ttsTotalP !== void 0 ? { tts_total_ms: ttsTotalP } : {}
|
|
1987
3073
|
};
|
|
1988
3074
|
}
|
|
1989
3075
|
};
|
|
@@ -2038,40 +3124,335 @@ function pcm16ToMulaw(pcmData) {
|
|
|
2038
3124
|
}
|
|
2039
3125
|
return out;
|
|
2040
3126
|
}
|
|
3127
|
+
var PcmCarry = class {
|
|
3128
|
+
pending = null;
|
|
3129
|
+
/**
|
|
3130
|
+
* Prepend any carried odd byte, return the even-length prefix, and stash
|
|
3131
|
+
* any new trailing odd byte for the next call.
|
|
3132
|
+
*
|
|
3133
|
+
* Returns a zero-length buffer when no complete sample is yet available.
|
|
3134
|
+
*/
|
|
3135
|
+
push(chunk) {
|
|
3136
|
+
const combined = this.pending !== null ? Buffer.concat([this.pending, chunk]) : chunk;
|
|
3137
|
+
this.pending = null;
|
|
3138
|
+
const alignedLen = combined.length & ~1;
|
|
3139
|
+
if (alignedLen < combined.length) {
|
|
3140
|
+
this.pending = combined.subarray(alignedLen);
|
|
3141
|
+
}
|
|
3142
|
+
return combined.subarray(0, alignedLen);
|
|
3143
|
+
}
|
|
3144
|
+
/**
|
|
3145
|
+
* Return any pending byte as a 1-byte buffer (rare in practice — only if
|
|
3146
|
+
* the entire stream had an odd byte count), then reset internal state.
|
|
3147
|
+
*/
|
|
3148
|
+
flush() {
|
|
3149
|
+
if (this.pending === null) return Buffer.alloc(0);
|
|
3150
|
+
const out = this.pending;
|
|
3151
|
+
this.pending = null;
|
|
3152
|
+
return out;
|
|
3153
|
+
}
|
|
3154
|
+
/** Reset carry state without flushing. */
|
|
3155
|
+
reset() {
|
|
3156
|
+
this.pending = null;
|
|
3157
|
+
}
|
|
3158
|
+
};
|
|
3159
|
+
var StatefulResampler = class {
|
|
3160
|
+
srcRate;
|
|
3161
|
+
dstRate;
|
|
3162
|
+
// 16k→8k: 5-tap FIR state.
|
|
3163
|
+
// Extended sample buffer carries the 2 history samples that precede the
|
|
3164
|
+
// current chunk AND any "pending" input sample that did not yet generate
|
|
3165
|
+
// output (i.e. the odd sample when the chunk had an odd sample count).
|
|
3166
|
+
// `firPhase` = 0 means the next output is at input position 0 of the
|
|
3167
|
+
// current chunk; 1 means it starts at input position 1 (because the
|
|
3168
|
+
// previous chunk ended on an even-output boundary).
|
|
3169
|
+
firHistory = new Int16Array(2);
|
|
3170
|
+
// [s_{-2}, s_{-1}]
|
|
3171
|
+
firHistoryValid = false;
|
|
3172
|
+
// Pending sample carried from odd-count chunks (not the byte carry —
|
|
3173
|
+
// this is a complete Int16 sample that becomes the first input for the
|
|
3174
|
+
// next call).
|
|
3175
|
+
firPendingSample = null;
|
|
3176
|
+
// 8k→16k: last input sample deferred across chunk boundaries.
|
|
3177
|
+
upsampleLast = 0;
|
|
3178
|
+
upsampleHasHistory = false;
|
|
3179
|
+
// 24k→16k: fractional phase and last input sample across chunks.
|
|
3180
|
+
resample24Last = 0;
|
|
3181
|
+
resample24Phase = 0;
|
|
3182
|
+
resample24HasHistory = false;
|
|
3183
|
+
// Odd-byte alignment carry.
|
|
3184
|
+
carry = new PcmCarry();
|
|
3185
|
+
constructor(opts) {
|
|
3186
|
+
this.srcRate = opts.srcRate;
|
|
3187
|
+
this.dstRate = opts.dstRate;
|
|
3188
|
+
if (opts.channels !== void 0 && opts.channels !== 1) {
|
|
3189
|
+
throw new Error("StatefulResampler: only mono (channels=1) is supported");
|
|
3190
|
+
}
|
|
3191
|
+
const key = `${this.srcRate}->${this.dstRate}`;
|
|
3192
|
+
if (key !== "16000->8000" && key !== "8000->16000" && key !== "24000->16000") {
|
|
3193
|
+
throw new Error(
|
|
3194
|
+
`StatefulResampler: unsupported conversion ${key}. Supported: 16000->8000, 8000->16000, 24000->16000`
|
|
3195
|
+
);
|
|
3196
|
+
}
|
|
3197
|
+
}
|
|
3198
|
+
/**
|
|
3199
|
+
* Process a chunk of PCM16-LE samples.
|
|
3200
|
+
*
|
|
3201
|
+
* Handles odd-byte inputs via an internal carry buffer. Returns an even-byte-
|
|
3202
|
+
* aligned output buffer; may return a zero-length buffer if not enough
|
|
3203
|
+
* aligned input is available yet.
|
|
3204
|
+
*/
|
|
3205
|
+
process(pcm) {
|
|
3206
|
+
const aligned = this.carry.push(pcm);
|
|
3207
|
+
if (aligned.length === 0) return Buffer.alloc(0);
|
|
3208
|
+
if (this.srcRate === 16e3 && this.dstRate === 8e3) {
|
|
3209
|
+
return this._downsample16kTo8k(aligned);
|
|
3210
|
+
}
|
|
3211
|
+
if (this.srcRate === 8e3 && this.dstRate === 16e3) {
|
|
3212
|
+
return this._upsample8kTo16k(aligned);
|
|
3213
|
+
}
|
|
3214
|
+
return this._resample24kTo16k(aligned);
|
|
3215
|
+
}
|
|
3216
|
+
/**
|
|
3217
|
+
* Flush internal state and return any remaining output samples.
|
|
3218
|
+
*
|
|
3219
|
+
* For 8k→16k: the deferred last sample is emitted duplicated (matching
|
|
3220
|
+
* the stateless helper's end-of-stream behaviour).
|
|
3221
|
+
* For 16k→8k: any pending odd sample is processed with edge-replication.
|
|
3222
|
+
* Resets all state after flushing.
|
|
3223
|
+
*/
|
|
3224
|
+
flush() {
|
|
3225
|
+
this.carry.flush();
|
|
3226
|
+
if (this.srcRate === 16e3 && this.dstRate === 8e3 && this.firPendingSample !== null) {
|
|
3227
|
+
const s = this.firPendingSample;
|
|
3228
|
+
const tmp = Buffer.alloc(4);
|
|
3229
|
+
tmp.writeInt16LE(s, 0);
|
|
3230
|
+
tmp.writeInt16LE(s, 2);
|
|
3231
|
+
const out = this._downsample16kTo8k(tmp);
|
|
3232
|
+
this.firPendingSample = null;
|
|
3233
|
+
return out;
|
|
3234
|
+
}
|
|
3235
|
+
if (this.srcRate === 8e3 && this.dstRate === 16e3 && this.upsampleHasHistory) {
|
|
3236
|
+
const out = Buffer.alloc(4);
|
|
3237
|
+
out.writeInt16LE(this.upsampleLast, 0);
|
|
3238
|
+
out.writeInt16LE(this.upsampleLast, 2);
|
|
3239
|
+
this.upsampleHasHistory = false;
|
|
3240
|
+
this.upsampleLast = 0;
|
|
3241
|
+
return out;
|
|
3242
|
+
}
|
|
3243
|
+
return Buffer.alloc(0);
|
|
3244
|
+
}
|
|
3245
|
+
/** Reset all carried state (e.g. at call boundaries). */
|
|
3246
|
+
reset() {
|
|
3247
|
+
this.firHistory = new Int16Array(2);
|
|
3248
|
+
this.firHistoryValid = false;
|
|
3249
|
+
this.firPendingSample = null;
|
|
3250
|
+
this.upsampleLast = 0;
|
|
3251
|
+
this.upsampleHasHistory = false;
|
|
3252
|
+
this.resample24Last = 0;
|
|
3253
|
+
this.resample24Phase = 0;
|
|
3254
|
+
this.resample24HasHistory = false;
|
|
3255
|
+
this.carry.reset();
|
|
3256
|
+
}
|
|
3257
|
+
// ---------------------------------------------------------------------------
|
|
3258
|
+
// Private: 16 kHz → 8 kHz
|
|
3259
|
+
// ---------------------------------------------------------------------------
|
|
3260
|
+
/**
|
|
3261
|
+
* 2:1 decimation with a 5-tap binomial FIR anti-alias filter.
|
|
3262
|
+
*
|
|
3263
|
+
* FIR coefficients: [1, 4, 6, 4, 1] / 16 (cutoff ~Fs/4 = 4 kHz).
|
|
3264
|
+
*
|
|
3265
|
+
* Cross-chunk state:
|
|
3266
|
+
* - `firHistory[0]` = s_{-2}, `firHistory[1]` = s_{-1} relative to the
|
|
3267
|
+
* virtual stream (seeded to first-sample on the very first call).
|
|
3268
|
+
* - `firPendingSample` = a lone input sample carried from a chunk whose
|
|
3269
|
+
* sample count was odd; it will become the first input of the next chunk.
|
|
3270
|
+
*
|
|
3271
|
+
* Decimation: outputs are at even positions (0, 2, 4 …) in the virtual
|
|
3272
|
+
* extended stream, so every 2 input samples yield 1 output. An odd-sample-
|
|
3273
|
+
* count chunk leaves 1 sample in `firPendingSample`; the next chunk
|
|
3274
|
+
* prepends it so the output cadence is unbroken.
|
|
3275
|
+
*/
|
|
3276
|
+
_downsample16kTo8k(buf) {
|
|
3277
|
+
const newSampleCount = buf.length >> 1;
|
|
3278
|
+
const hasPending = this.firPendingSample !== null;
|
|
3279
|
+
const totalInput = newSampleCount + (hasPending ? 1 : 0);
|
|
3280
|
+
const input = new Int16Array(totalInput);
|
|
3281
|
+
if (hasPending) {
|
|
3282
|
+
input[0] = this.firPendingSample;
|
|
3283
|
+
for (let j = 0; j < newSampleCount; j++) input[j + 1] = buf.readInt16LE(j * 2);
|
|
3284
|
+
} else {
|
|
3285
|
+
for (let j = 0; j < newSampleCount; j++) input[j] = buf.readInt16LE(j * 2);
|
|
3286
|
+
}
|
|
3287
|
+
this.firPendingSample = null;
|
|
3288
|
+
if (totalInput === 0) return Buffer.alloc(0);
|
|
3289
|
+
if (!this.firHistoryValid) {
|
|
3290
|
+
this.firHistory[0] = input[0];
|
|
3291
|
+
this.firHistory[1] = input[0];
|
|
3292
|
+
this.firHistoryValid = true;
|
|
3293
|
+
}
|
|
3294
|
+
const extended = new Int16Array(totalInput + 2);
|
|
3295
|
+
extended[0] = this.firHistory[0];
|
|
3296
|
+
extended[1] = this.firHistory[1];
|
|
3297
|
+
for (let j = 0; j < totalInput; j++) extended[j + 2] = input[j];
|
|
3298
|
+
const outSamples = totalInput >> 1;
|
|
3299
|
+
const out = Buffer.alloc(outSamples * 2);
|
|
3300
|
+
for (let i = 0; i < outSamples; i++) {
|
|
3301
|
+
const c = 2 + i * 2;
|
|
3302
|
+
const sM2 = extended[c - 2];
|
|
3303
|
+
const sM1 = extended[c - 1];
|
|
3304
|
+
const s0 = extended[c];
|
|
3305
|
+
const sP1 = c + 1 < extended.length ? extended[c + 1] : extended[extended.length - 1];
|
|
3306
|
+
const sP2 = c + 2 < extended.length ? extended[c + 2] : extended[extended.length - 1];
|
|
3307
|
+
const filtered = sM2 + 4 * sM1 + 6 * s0 + 4 * sP1 + sP2 + 8 >> 4;
|
|
3308
|
+
out.writeInt16LE(Math.max(-32768, Math.min(32767, filtered)), i * 2);
|
|
3309
|
+
}
|
|
3310
|
+
if (totalInput % 2 === 1) {
|
|
3311
|
+
this.firPendingSample = input[totalInput - 1];
|
|
3312
|
+
}
|
|
3313
|
+
if (totalInput >= 2) {
|
|
3314
|
+
this.firHistory[0] = input[totalInput - 2];
|
|
3315
|
+
this.firHistory[1] = input[totalInput - 1];
|
|
3316
|
+
} else {
|
|
3317
|
+
this.firHistory[0] = this.firHistory[1];
|
|
3318
|
+
this.firHistory[1] = input[0];
|
|
3319
|
+
}
|
|
3320
|
+
return out;
|
|
3321
|
+
}
|
|
3322
|
+
// ---------------------------------------------------------------------------
|
|
3323
|
+
// Private: 8 kHz → 16 kHz
|
|
3324
|
+
// ---------------------------------------------------------------------------
|
|
3325
|
+
/**
|
|
3326
|
+
* 1:2 linear-interpolation upsampler.
|
|
3327
|
+
*
|
|
3328
|
+
* For the first chunk (no history): emits 2*(N-1) samples and defers the
|
|
3329
|
+
* last sample. For subsequent chunks (with history): emits the deferred
|
|
3330
|
+
* sample + its interpolated midpoint THEN 2*(N-1) samples from the new
|
|
3331
|
+
* chunk, deferring the new last sample. Total across K chunks + flush =
|
|
3332
|
+
* 2*total_input_samples (correct output length).
|
|
3333
|
+
*
|
|
3334
|
+
* Call flush() after the final chunk to emit the last deferred sample
|
|
3335
|
+
* pair (self-duplicate at end of stream).
|
|
3336
|
+
*/
|
|
3337
|
+
_upsample8kTo16k(buf) {
|
|
3338
|
+
const sampleCount = buf.length >> 1;
|
|
3339
|
+
if (sampleCount === 0) return Buffer.alloc(0);
|
|
3340
|
+
const outArr = [];
|
|
3341
|
+
if (this.upsampleHasHistory) {
|
|
3342
|
+
const next = buf.readInt16LE(0);
|
|
3343
|
+
outArr.push(this.upsampleLast);
|
|
3344
|
+
outArr.push(Math.round((this.upsampleLast + next) / 2));
|
|
3345
|
+
}
|
|
3346
|
+
for (let i = 0; i < sampleCount - 1; i++) {
|
|
3347
|
+
const s0 = buf.readInt16LE(i * 2);
|
|
3348
|
+
const s1 = buf.readInt16LE((i + 1) * 2);
|
|
3349
|
+
outArr.push(s0);
|
|
3350
|
+
outArr.push(Math.round((s0 + s1) / 2));
|
|
3351
|
+
}
|
|
3352
|
+
this.upsampleLast = buf.readInt16LE((sampleCount - 1) * 2);
|
|
3353
|
+
this.upsampleHasHistory = true;
|
|
3354
|
+
const outBuf = Buffer.alloc(outArr.length * 2);
|
|
3355
|
+
for (let j = 0; j < outArr.length; j++) outBuf.writeInt16LE(outArr[j], j * 2);
|
|
3356
|
+
return outBuf;
|
|
3357
|
+
}
|
|
3358
|
+
// ---------------------------------------------------------------------------
|
|
3359
|
+
// Private: 24 kHz → 16 kHz
|
|
3360
|
+
// ---------------------------------------------------------------------------
|
|
3361
|
+
/**
|
|
3362
|
+
* 3:2 linear-interpolation decimator (ratio srcRate/dstRate = 1.5).
|
|
3363
|
+
*
|
|
3364
|
+
* `resample24Phase` tracks the fractional input position of the next output
|
|
3365
|
+
* sample relative to the START of the next chunk. Negative phase means the
|
|
3366
|
+
* next output straddles the previous/current chunk boundary; those are
|
|
3367
|
+
* handled using `resample24Last`.
|
|
3368
|
+
*/
|
|
3369
|
+
_resample24kTo16k(buf) {
|
|
3370
|
+
const sampleCount = buf.length >> 1;
|
|
3371
|
+
if (sampleCount === 0) return Buffer.alloc(0);
|
|
3372
|
+
const outArr = [];
|
|
3373
|
+
let phase = this.resample24Phase;
|
|
3374
|
+
while (true) {
|
|
3375
|
+
const idx = Math.floor(phase);
|
|
3376
|
+
if (idx >= sampleCount) break;
|
|
3377
|
+
const frac = phase - idx;
|
|
3378
|
+
let s0;
|
|
3379
|
+
let s1;
|
|
3380
|
+
if (idx < 0) {
|
|
3381
|
+
s0 = this.resample24HasHistory ? this.resample24Last : 0;
|
|
3382
|
+
s1 = buf.readInt16LE(0);
|
|
3383
|
+
} else {
|
|
3384
|
+
s0 = buf.readInt16LE(idx * 2);
|
|
3385
|
+
s1 = idx + 1 < sampleCount ? buf.readInt16LE((idx + 1) * 2) : s0;
|
|
3386
|
+
}
|
|
3387
|
+
const interp = Math.round(s0 + (s1 - s0) * frac);
|
|
3388
|
+
outArr.push(Math.max(-32768, Math.min(32767, interp)));
|
|
3389
|
+
phase += 24e3 / 16e3;
|
|
3390
|
+
}
|
|
3391
|
+
this.resample24Last = buf.readInt16LE((sampleCount - 1) * 2);
|
|
3392
|
+
this.resample24HasHistory = true;
|
|
3393
|
+
this.resample24Phase = phase - sampleCount;
|
|
3394
|
+
const outBuf = Buffer.alloc(outArr.length * 2);
|
|
3395
|
+
for (let j = 0; j < outArr.length; j++) outBuf.writeInt16LE(outArr[j], j * 2);
|
|
3396
|
+
return outBuf;
|
|
3397
|
+
}
|
|
3398
|
+
};
|
|
3399
|
+
function createResampler16kTo8k() {
|
|
3400
|
+
return new StatefulResampler({ srcRate: 16e3, dstRate: 8e3 });
|
|
3401
|
+
}
|
|
3402
|
+
function createResampler8kTo16k() {
|
|
3403
|
+
return new StatefulResampler({ srcRate: 8e3, dstRate: 16e3 });
|
|
3404
|
+
}
|
|
3405
|
+
function createResampler24kTo16k() {
|
|
3406
|
+
return new StatefulResampler({ srcRate: 24e3, dstRate: 16e3 });
|
|
3407
|
+
}
|
|
3408
|
+
var _warnedResample8kTo16k = false;
|
|
3409
|
+
var _warnedResample16kTo8k = false;
|
|
3410
|
+
var _warnedResample24kTo16k = false;
|
|
2041
3411
|
function resample8kTo16k(pcm8k) {
|
|
2042
|
-
if (
|
|
2043
|
-
|
|
2044
|
-
|
|
2045
|
-
|
|
2046
|
-
|
|
2047
|
-
const next = i + 1 < sampleCount ? pcm8k.readInt16LE((i + 1) * 2) : current;
|
|
2048
|
-
const interpolated = Math.round((current + next) / 2);
|
|
2049
|
-
out.writeInt16LE(current, i * 4);
|
|
2050
|
-
out.writeInt16LE(interpolated, i * 4 + 2);
|
|
3412
|
+
if (!_warnedResample8kTo16k) {
|
|
3413
|
+
_warnedResample8kTo16k = true;
|
|
3414
|
+
getLogger().warn(
|
|
3415
|
+
"[patter] resample8kTo16k() is deprecated. Use createResampler8kTo16k() (StatefulResampler) to eliminate chunk-boundary discontinuities."
|
|
3416
|
+
);
|
|
2051
3417
|
}
|
|
2052
|
-
return
|
|
3418
|
+
if (pcm8k.length === 0) return Buffer.alloc(0);
|
|
3419
|
+
const r = createResampler8kTo16k();
|
|
3420
|
+
const main = r.process(pcm8k);
|
|
3421
|
+
const tail = r.flush();
|
|
3422
|
+
return tail.length > 0 ? Buffer.concat([main, tail]) : main;
|
|
2053
3423
|
}
|
|
2054
3424
|
function resample16kTo8k(pcm16k) {
|
|
2055
|
-
if (
|
|
2056
|
-
|
|
2057
|
-
|
|
2058
|
-
|
|
2059
|
-
|
|
2060
|
-
const sample = pcm16k.readInt16LE(i * 2 * 2);
|
|
2061
|
-
out.writeInt16LE(sample, i * 2);
|
|
3425
|
+
if (!_warnedResample16kTo8k) {
|
|
3426
|
+
_warnedResample16kTo8k = true;
|
|
3427
|
+
getLogger().warn(
|
|
3428
|
+
"[patter] resample16kTo8k() is deprecated. Use createResampler16kTo8k() (StatefulResampler) to eliminate chunk-boundary discontinuities."
|
|
3429
|
+
);
|
|
2062
3430
|
}
|
|
2063
|
-
return
|
|
3431
|
+
if (pcm16k.length === 0) return Buffer.alloc(0);
|
|
3432
|
+
const r = createResampler16kTo8k();
|
|
3433
|
+
const out = r.process(pcm16k);
|
|
3434
|
+
const tail = r.flush();
|
|
3435
|
+
return tail.length > 0 ? Buffer.concat([out, tail]) : out;
|
|
2064
3436
|
}
|
|
2065
3437
|
function resample24kTo16k(pcm24k) {
|
|
3438
|
+
if (!_warnedResample24kTo16k) {
|
|
3439
|
+
_warnedResample24kTo16k = true;
|
|
3440
|
+
getLogger().warn(
|
|
3441
|
+
"[patter] resample24kTo16k() is deprecated. Use createResampler24kTo16k() (StatefulResampler) or OpenAITTS.resampleStreaming for anti-aliased resampling."
|
|
3442
|
+
);
|
|
3443
|
+
}
|
|
2066
3444
|
if (pcm24k.length === 0) return Buffer.alloc(0);
|
|
2067
3445
|
const sampleCount = Math.floor(pcm24k.length / 2);
|
|
2068
3446
|
const outSamples = Math.floor(sampleCount * 2 / 3);
|
|
2069
3447
|
const out = Buffer.alloc(outSamples * 2);
|
|
2070
|
-
let
|
|
2071
|
-
|
|
2072
|
-
|
|
2073
|
-
|
|
2074
|
-
|
|
3448
|
+
for (let i = 0; i < outSamples; i++) {
|
|
3449
|
+
const pos = i * 1.5;
|
|
3450
|
+
const idx = Math.floor(pos);
|
|
3451
|
+
const frac = pos - idx;
|
|
3452
|
+
const s0 = pcm24k.readInt16LE(idx * 2);
|
|
3453
|
+
const s1 = idx + 1 < sampleCount ? pcm24k.readInt16LE((idx + 1) * 2) : s0;
|
|
3454
|
+
const interp = Math.round(s0 + (s1 - s0) * frac);
|
|
3455
|
+
out.writeInt16LE(Math.max(-32768, Math.min(32767, interp)), i * 2);
|
|
2075
3456
|
}
|
|
2076
3457
|
return out;
|
|
2077
3458
|
}
|
|
@@ -2133,6 +3514,8 @@ async function executeToolWebhook(webhookUrl, toolName, parsedArgs, context, lab
|
|
|
2133
3514
|
|
|
2134
3515
|
// src/sentence-chunker.ts
|
|
2135
3516
|
var DEFAULT_MIN_SENTENCE_LEN = 20;
|
|
3517
|
+
var DEFAULT_MIN_WORDS_FOR_SHORT_FLUSH = 2;
|
|
3518
|
+
var SENTENCE_TERMINATORS = ".!?\u3002\uFF01\uFF1F";
|
|
2136
3519
|
function splitSentences(text, minSentenceLen = DEFAULT_MIN_SENTENCE_LEN) {
|
|
2137
3520
|
const alphabets = "([A-Za-z])";
|
|
2138
3521
|
const prefixes = "(Mr|St|Mrs|Ms|Dr)[.]";
|
|
@@ -2191,14 +3574,29 @@ function splitSentences(text, minSentenceLen = DEFAULT_MIN_SENTENCE_LEN) {
|
|
|
2191
3574
|
var SentenceChunker = class {
|
|
2192
3575
|
buffer = "";
|
|
2193
3576
|
minSentenceLen;
|
|
3577
|
+
minWordsForShortFlush;
|
|
2194
3578
|
constructor(options) {
|
|
2195
3579
|
this.minSentenceLen = options?.minSentenceLen ?? DEFAULT_MIN_SENTENCE_LEN;
|
|
3580
|
+
this.minWordsForShortFlush = options?.minWordsForShortFlush ?? DEFAULT_MIN_WORDS_FOR_SHORT_FLUSH;
|
|
2196
3581
|
}
|
|
2197
|
-
/**
|
|
3582
|
+
/**
|
|
3583
|
+
* Feed a token. Returns zero or more complete sentences.
|
|
3584
|
+
*
|
|
3585
|
+
* Two emission paths:
|
|
3586
|
+
* - **Standard path** — when the buffer is at least `minSentenceLen`
|
|
3587
|
+
* characters long and the regex tokenizer reports more than one
|
|
3588
|
+
* sentence, all but the last (potentially incomplete) are emitted.
|
|
3589
|
+
* - **Short-flush path** — when the buffer is shorter than `minSentenceLen`
|
|
3590
|
+
* but ends with a sentence terminator AND has at least
|
|
3591
|
+
* `minWordsForShortFlush` whitespace-separated words, emit it
|
|
3592
|
+
* immediately. This drops TTS TTFB on short greetings like `"Hi there!"`
|
|
3593
|
+
* while keeping single-word utterances (`"Sì."`) buffered until
|
|
3594
|
+
* `flush()`.
|
|
3595
|
+
*/
|
|
2198
3596
|
push(token) {
|
|
2199
3597
|
this.buffer += token;
|
|
2200
3598
|
if (this.buffer.length < this.minSentenceLen) {
|
|
2201
|
-
return
|
|
3599
|
+
return this.maybeShortFlush();
|
|
2202
3600
|
}
|
|
2203
3601
|
const sentences = splitSentences(this.buffer, this.minSentenceLen);
|
|
2204
3602
|
if (sentences.length <= 1) {
|
|
@@ -2212,6 +3610,41 @@ var SentenceChunker = class {
|
|
|
2212
3610
|
this.buffer = sentences[sentences.length - 1]?.[0] ?? "";
|
|
2213
3611
|
return result;
|
|
2214
3612
|
}
|
|
3613
|
+
/**
|
|
3614
|
+
* Emit the buffer when it's a short, complete single-sentence utterance.
|
|
3615
|
+
*
|
|
3616
|
+
* A buffer qualifies when **all** of these hold:
|
|
3617
|
+
* 1. Last non-whitespace char is a sentence terminator.
|
|
3618
|
+
* 2. Word count is at least `minWordsForShortFlush` (default 2 — keeps
|
|
3619
|
+
* single-word "Sì." / "Yes." buffered until `flush()`).
|
|
3620
|
+
* 3. The buffer contains exactly one terminator (the trailing one).
|
|
3621
|
+
* Multiple terminators mean we may be mid-stream of a longer merged
|
|
3622
|
+
* utterance like `"Hey! Hi! Hello! This is a sentence."` — let the
|
|
3623
|
+
* standard path keep merging.
|
|
3624
|
+
* 4. The char immediately before the terminator is NOT a digit (avoids
|
|
3625
|
+
* decimal mid-stream like `"f(x) = x * 2."` flushing before `54`).
|
|
3626
|
+
* 5. The char immediately before the terminator is NOT an uppercase
|
|
3627
|
+
* ASCII letter (avoids acronym patterns like `"U.S."` / `"U."`).
|
|
3628
|
+
*/
|
|
3629
|
+
maybeShortFlush() {
|
|
3630
|
+
const stripped = this.buffer.replace(/\s+$/, "");
|
|
3631
|
+
if (!stripped) return [];
|
|
3632
|
+
const last = stripped[stripped.length - 1];
|
|
3633
|
+
if (!SENTENCE_TERMINATORS.includes(last)) return [];
|
|
3634
|
+
let terminatorCount = 0;
|
|
3635
|
+
for (const c of stripped) {
|
|
3636
|
+
if (SENTENCE_TERMINATORS.includes(c)) terminatorCount++;
|
|
3637
|
+
}
|
|
3638
|
+
if (terminatorCount !== 1) return [];
|
|
3639
|
+
const wordCount = stripped.split(/\s+/).filter((w) => w.length > 0).length;
|
|
3640
|
+
if (wordCount < this.minWordsForShortFlush) return [];
|
|
3641
|
+
if (stripped.length >= 2) {
|
|
3642
|
+
const prev = stripped[stripped.length - 2];
|
|
3643
|
+
if (/\d/.test(prev) || /[A-Z]/.test(prev)) return [];
|
|
3644
|
+
}
|
|
3645
|
+
this.buffer = "";
|
|
3646
|
+
return [stripped];
|
|
3647
|
+
}
|
|
2215
3648
|
/** Flush remaining buffer as final sentence(s). Call at end of stream. */
|
|
2216
3649
|
flush() {
|
|
2217
3650
|
const remaining = this.buffer.trim();
|
|
@@ -2258,6 +3691,44 @@ var PipelineHookExecutor = class {
|
|
|
2258
3691
|
return transcript;
|
|
2259
3692
|
}
|
|
2260
3693
|
}
|
|
3694
|
+
/**
|
|
3695
|
+
* Run beforeLlm hook. Returns a possibly-modified messages list.
|
|
3696
|
+
* Returning ``null`` from the hook means "keep the original" — the LLM
|
|
3697
|
+
* call is too important to be silently vetoed.
|
|
3698
|
+
* Fail-open: on exception, the original messages pass through.
|
|
3699
|
+
*/
|
|
3700
|
+
async runBeforeLlm(messages, ctx) {
|
|
3701
|
+
if (!this.hooks?.beforeLlm) return messages;
|
|
3702
|
+
try {
|
|
3703
|
+
const result = await this.hooks.beforeLlm(messages, ctx);
|
|
3704
|
+
return result ?? messages;
|
|
3705
|
+
} catch (e) {
|
|
3706
|
+
getLogger().error("Pipeline hook beforeLlm threw:", e);
|
|
3707
|
+
return messages;
|
|
3708
|
+
}
|
|
3709
|
+
}
|
|
3710
|
+
/**
|
|
3711
|
+
* Run afterLlm hook. Returns a possibly-modified assistant text.
|
|
3712
|
+
* Returning ``null`` from the hook means "keep the original".
|
|
3713
|
+
* Fail-open: on exception, the original text passes through.
|
|
3714
|
+
*/
|
|
3715
|
+
async runAfterLlm(text, ctx) {
|
|
3716
|
+
if (!this.hooks?.afterLlm) return text;
|
|
3717
|
+
try {
|
|
3718
|
+
const result = await this.hooks.afterLlm(text, ctx);
|
|
3719
|
+
return result ?? text;
|
|
3720
|
+
} catch (e) {
|
|
3721
|
+
getLogger().error("Pipeline hook afterLlm threw:", e);
|
|
3722
|
+
return text;
|
|
3723
|
+
}
|
|
3724
|
+
}
|
|
3725
|
+
/**
|
|
3726
|
+
* Whether ``afterLlm`` is configured. Used by the LLM loop to decide
|
|
3727
|
+
* whether to buffer streaming tokens before yielding them.
|
|
3728
|
+
*/
|
|
3729
|
+
hasAfterLlm() {
|
|
3730
|
+
return Boolean(this.hooks?.afterLlm);
|
|
3731
|
+
}
|
|
2261
3732
|
/**
|
|
2262
3733
|
* Run beforeSynthesize hook. Returns null if hook vetoes TTS for this sentence.
|
|
2263
3734
|
* If no hook is defined, returns the text unchanged.
|
|
@@ -2286,6 +3757,185 @@ var PipelineHookExecutor = class {
|
|
|
2286
3757
|
}
|
|
2287
3758
|
};
|
|
2288
3759
|
|
|
3760
|
+
// src/observability/event-bus.ts
|
|
3761
|
+
var EventBus = class {
|
|
3762
|
+
listeners = /* @__PURE__ */ new Map();
|
|
3763
|
+
/**
|
|
3764
|
+
* Subscribe to an event type. Returns an unsubscribe function.
|
|
3765
|
+
*/
|
|
3766
|
+
on(event, cb) {
|
|
3767
|
+
let set = this.listeners.get(event);
|
|
3768
|
+
if (!set) {
|
|
3769
|
+
set = /* @__PURE__ */ new Set();
|
|
3770
|
+
this.listeners.set(event, set);
|
|
3771
|
+
}
|
|
3772
|
+
set.add(cb);
|
|
3773
|
+
return () => set.delete(cb);
|
|
3774
|
+
}
|
|
3775
|
+
/**
|
|
3776
|
+
* Emit an event synchronously. Async listeners are fire-and-forget with
|
|
3777
|
+
* rejection logging so a badly-behaved observer never stalls the call path.
|
|
3778
|
+
*/
|
|
3779
|
+
emit(event, payload) {
|
|
3780
|
+
const set = this.listeners.get(event);
|
|
3781
|
+
if (!set) return;
|
|
3782
|
+
for (const cb of [...set]) {
|
|
3783
|
+
try {
|
|
3784
|
+
const res = cb(payload);
|
|
3785
|
+
if (res && typeof res.catch === "function") {
|
|
3786
|
+
res.catch(
|
|
3787
|
+
(e) => getLogger().warn(`[EventBus] listener for "${event}" rejected:`, e)
|
|
3788
|
+
);
|
|
3789
|
+
}
|
|
3790
|
+
} catch (e) {
|
|
3791
|
+
getLogger().warn(`[EventBus] listener for "${event}" threw:`, e);
|
|
3792
|
+
}
|
|
3793
|
+
}
|
|
3794
|
+
}
|
|
3795
|
+
};
|
|
3796
|
+
|
|
3797
|
+
// src/observability/tracing.ts
|
|
3798
|
+
var ENV_FLAG = "PATTER_OTEL_ENABLED";
|
|
3799
|
+
var SERVICE_NAME = "patter";
|
|
3800
|
+
var SPAN_CALL = "getpatter.call";
|
|
3801
|
+
var SPAN_STT = "getpatter.stt";
|
|
3802
|
+
var SPAN_LLM = "getpatter.llm";
|
|
3803
|
+
var SPAN_TTS = "getpatter.tts";
|
|
3804
|
+
var SPAN_TOOL = "getpatter.tool";
|
|
3805
|
+
var SPAN_ENDPOINT = "getpatter.endpoint";
|
|
3806
|
+
var SPAN_BARGEIN = "getpatter.bargein";
|
|
3807
|
+
var otel = null;
|
|
3808
|
+
var initialized = false;
|
|
3809
|
+
var tracerAvailable = false;
|
|
3810
|
+
var provider = null;
|
|
3811
|
+
function tryLoadOtel() {
|
|
3812
|
+
if (otel !== null) return otel;
|
|
3813
|
+
try {
|
|
3814
|
+
const mod = __require("@opentelemetry/api");
|
|
3815
|
+
otel = mod;
|
|
3816
|
+
return mod;
|
|
3817
|
+
} catch {
|
|
3818
|
+
return null;
|
|
3819
|
+
}
|
|
3820
|
+
}
|
|
3821
|
+
function trySetupSdk(options, api) {
|
|
3822
|
+
try {
|
|
3823
|
+
const sdkTraceNode = __require("@opentelemetry/sdk-trace-node");
|
|
3824
|
+
const sdkTraceBase = __require("@opentelemetry/sdk-trace-base");
|
|
3825
|
+
const otlpHttp = __require("@opentelemetry/exporter-trace-otlp-http");
|
|
3826
|
+
const serviceName = options.serviceName ?? SERVICE_NAME;
|
|
3827
|
+
const providerInstance = new sdkTraceNode.NodeTracerProvider({
|
|
3828
|
+
resource: {
|
|
3829
|
+
attributes: {
|
|
3830
|
+
"service.name": serviceName,
|
|
3831
|
+
...options.resourceAttributes ?? {}
|
|
3832
|
+
}
|
|
3833
|
+
}
|
|
3834
|
+
});
|
|
3835
|
+
const endpoint = options.otlpEndpoint ?? process.env.OTEL_EXPORTER_OTLP_ENDPOINT ?? void 0;
|
|
3836
|
+
const exporter = new otlpHttp.OTLPTraceExporter(
|
|
3837
|
+
endpoint ? { url: `${endpoint.replace(/\/$/, "")}/v1/traces` } : void 0
|
|
3838
|
+
);
|
|
3839
|
+
const processor = new sdkTraceBase.BatchSpanProcessor(exporter);
|
|
3840
|
+
providerInstance.addSpanProcessor?.(processor);
|
|
3841
|
+
providerInstance.register?.();
|
|
3842
|
+
try {
|
|
3843
|
+
api.trace.setGlobalTracerProvider?.(providerInstance);
|
|
3844
|
+
} catch {
|
|
3845
|
+
}
|
|
3846
|
+
return providerInstance;
|
|
3847
|
+
} catch (e) {
|
|
3848
|
+
getLogger().debug(
|
|
3849
|
+
`[observability] OTel SDK wire-up skipped: ${String(e?.message ?? e)}`
|
|
3850
|
+
);
|
|
3851
|
+
return null;
|
|
3852
|
+
}
|
|
3853
|
+
}
|
|
3854
|
+
function envFlagEnabled() {
|
|
3855
|
+
const raw = (process.env[ENV_FLAG] ?? "").toLowerCase();
|
|
3856
|
+
return raw === "1" || raw === "true" || raw === "yes";
|
|
3857
|
+
}
|
|
3858
|
+
function initTracing(options = {}) {
|
|
3859
|
+
if (initialized) return tracerAvailable;
|
|
3860
|
+
initialized = true;
|
|
3861
|
+
if (!envFlagEnabled()) {
|
|
3862
|
+
tracerAvailable = false;
|
|
3863
|
+
return false;
|
|
3864
|
+
}
|
|
3865
|
+
const api = tryLoadOtel();
|
|
3866
|
+
if (!api) {
|
|
3867
|
+
getLogger().warn(
|
|
3868
|
+
`${ENV_FLAG}=1 but @opentelemetry/api is not installed. Install with: npm install @opentelemetry/api @opentelemetry/sdk-trace-node @opentelemetry/sdk-trace-base @opentelemetry/exporter-trace-otlp-http`
|
|
3869
|
+
);
|
|
3870
|
+
tracerAvailable = false;
|
|
3871
|
+
return false;
|
|
3872
|
+
}
|
|
3873
|
+
provider = trySetupSdk(options, api);
|
|
3874
|
+
tracerAvailable = true;
|
|
3875
|
+
const serviceName = options.serviceName ?? SERVICE_NAME;
|
|
3876
|
+
getLogger().info(
|
|
3877
|
+
`[observability] Patter OTel tracing enabled (service=${serviceName}${provider ? ", exporter=otlp-http" : ", exporter=noop"})`
|
|
3878
|
+
);
|
|
3879
|
+
return true;
|
|
3880
|
+
}
|
|
3881
|
+
function isTracingEnabled() {
|
|
3882
|
+
return tracerAvailable && envFlagEnabled();
|
|
3883
|
+
}
|
|
3884
|
+
var NoopSpan = class {
|
|
3885
|
+
setAttribute(_key, _value) {
|
|
3886
|
+
}
|
|
3887
|
+
recordException(_exception) {
|
|
3888
|
+
}
|
|
3889
|
+
end() {
|
|
3890
|
+
}
|
|
3891
|
+
};
|
|
3892
|
+
var NOOP_SPAN = new NoopSpan();
|
|
3893
|
+
var RealSpan = class {
|
|
3894
|
+
span;
|
|
3895
|
+
constructor(span) {
|
|
3896
|
+
this.span = span;
|
|
3897
|
+
}
|
|
3898
|
+
setAttribute(key, value) {
|
|
3899
|
+
try {
|
|
3900
|
+
this.span.setAttribute(key, value);
|
|
3901
|
+
} catch {
|
|
3902
|
+
}
|
|
3903
|
+
}
|
|
3904
|
+
recordException(exception) {
|
|
3905
|
+
try {
|
|
3906
|
+
this.span.recordException(exception);
|
|
3907
|
+
} catch {
|
|
3908
|
+
}
|
|
3909
|
+
}
|
|
3910
|
+
end() {
|
|
3911
|
+
try {
|
|
3912
|
+
this.span.end();
|
|
3913
|
+
} catch {
|
|
3914
|
+
}
|
|
3915
|
+
}
|
|
3916
|
+
};
|
|
3917
|
+
function startSpan(name, attrs) {
|
|
3918
|
+
if (!isTracingEnabled() || !otel) return NOOP_SPAN;
|
|
3919
|
+
try {
|
|
3920
|
+
const tracer = otel.trace.getTracer(SERVICE_NAME);
|
|
3921
|
+
const rawSpan = tracer.startSpan(name, attrs ? { attributes: attrs } : void 0);
|
|
3922
|
+
return new RealSpan(rawSpan);
|
|
3923
|
+
} catch {
|
|
3924
|
+
return NOOP_SPAN;
|
|
3925
|
+
}
|
|
3926
|
+
}
|
|
3927
|
+
async function withSpan(name, attrs, fn) {
|
|
3928
|
+
const span = startSpan(name, attrs);
|
|
3929
|
+
try {
|
|
3930
|
+
return await fn(span);
|
|
3931
|
+
} catch (exc) {
|
|
3932
|
+
span.recordException(exc);
|
|
3933
|
+
throw exc;
|
|
3934
|
+
} finally {
|
|
3935
|
+
span.end();
|
|
3936
|
+
}
|
|
3937
|
+
}
|
|
3938
|
+
|
|
2289
3939
|
// src/stream-handler.ts
|
|
2290
3940
|
function checkGuardrails(text, guardrails) {
|
|
2291
3941
|
if (!guardrails) return null;
|
|
@@ -2305,9 +3955,33 @@ function sanitizeLogValue(v, maxLen = 200) {
|
|
|
2305
3955
|
const cleaned = v.replace(/[\x00-\x1f\x7f]/g, "");
|
|
2306
3956
|
return cleaned.length > maxLen ? cleaned.slice(0, maxLen) + "..." : cleaned;
|
|
2307
3957
|
}
|
|
3958
|
+
function maskPhoneNumber(number) {
|
|
3959
|
+
if (!number) return "***";
|
|
3960
|
+
const text = String(number);
|
|
3961
|
+
if (text.length <= 4) return "***";
|
|
3962
|
+
return `***${text.slice(-4)}`;
|
|
3963
|
+
}
|
|
2308
3964
|
function isValidE164(number) {
|
|
2309
3965
|
return /^\+[1-9]\d{6,14}$/.test(number);
|
|
2310
3966
|
}
|
|
3967
|
+
var HALLUCINATIONS = /* @__PURE__ */ new Set([
|
|
3968
|
+
"you",
|
|
3969
|
+
"thank you",
|
|
3970
|
+
"thanks",
|
|
3971
|
+
"yeah",
|
|
3972
|
+
"yes",
|
|
3973
|
+
"no",
|
|
3974
|
+
"okay",
|
|
3975
|
+
"ok",
|
|
3976
|
+
"uh",
|
|
3977
|
+
"um",
|
|
3978
|
+
"mmm",
|
|
3979
|
+
"hmm",
|
|
3980
|
+
".",
|
|
3981
|
+
"bye",
|
|
3982
|
+
"right",
|
|
3983
|
+
"cool"
|
|
3984
|
+
]);
|
|
2311
3985
|
var StreamHandler = class {
|
|
2312
3986
|
deps;
|
|
2313
3987
|
ws;
|
|
@@ -2320,6 +3994,58 @@ var StreamHandler = class {
|
|
|
2320
3994
|
stt = null;
|
|
2321
3995
|
tts = null;
|
|
2322
3996
|
isSpeaking = false;
|
|
3997
|
+
/** Set to true after a VAD error to suppress log spam for the rest of the call. */
|
|
3998
|
+
vadDisabled = false;
|
|
3999
|
+
/**
|
|
4000
|
+
* Monotonic counter incremented on every TTS-start. The grace timer
|
|
4001
|
+
* scheduled by ``endSpeakingWithGrace`` only flips ``isSpeaking=false``
|
|
4002
|
+
* if the counter still matches its capture — a new turn that started in
|
|
4003
|
+
* the meantime invalidates the obsolete timer instead of clobbering its
|
|
4004
|
+
* own ``isSpeaking=true``.
|
|
4005
|
+
*/
|
|
4006
|
+
speakingGeneration = 0;
|
|
4007
|
+
/** Handle for the pending grace-period timer, so it can be cleared on cleanup. */
|
|
4008
|
+
graceTimer = null;
|
|
4009
|
+
/** Mark the start of a TTS span. Use instead of setting isSpeaking directly. */
|
|
4010
|
+
beginSpeaking() {
|
|
4011
|
+
this.speakingGeneration++;
|
|
4012
|
+
this.isSpeaking = true;
|
|
4013
|
+
}
|
|
4014
|
+
/**
|
|
4015
|
+
* Atomically end speaking AND invalidate any pending grace timer.
|
|
4016
|
+
* Use instead of ``this.isSpeaking = false`` at barge-in sites.
|
|
4017
|
+
*/
|
|
4018
|
+
cancelSpeaking() {
|
|
4019
|
+
this.speakingGeneration++;
|
|
4020
|
+
this.isSpeaking = false;
|
|
4021
|
+
}
|
|
4022
|
+
/** Cancel and clear the pending grace timer, if any. */
|
|
4023
|
+
clearGraceTimer() {
|
|
4024
|
+
if (this.graceTimer !== null) {
|
|
4025
|
+
clearTimeout(this.graceTimer);
|
|
4026
|
+
this.graceTimer = null;
|
|
4027
|
+
}
|
|
4028
|
+
}
|
|
4029
|
+
/**
|
|
4030
|
+
* Mark the agent as no longer producing TTS, honoring a grace period that
|
|
4031
|
+
* approximates the carrier's playback buffer. The user may still hear the
|
|
4032
|
+
* agent for ~1 s after we finish pushing audio (Twilio buffers ~1500 ms);
|
|
4033
|
+
* keeping isSpeaking=true through that window keeps the VAD-driven
|
|
4034
|
+
* barge-in armed during the audible tail. Tunable via env.
|
|
4035
|
+
*/
|
|
4036
|
+
endSpeakingWithGrace() {
|
|
4037
|
+
const grace = Number(process.env.PATTER_TTS_TAIL_GRACE_MS ?? 1500);
|
|
4038
|
+
if (grace > 0) {
|
|
4039
|
+
const gen = this.speakingGeneration;
|
|
4040
|
+
this.clearGraceTimer();
|
|
4041
|
+
this.graceTimer = setTimeout(() => {
|
|
4042
|
+
this.graceTimer = null;
|
|
4043
|
+
if (this.speakingGeneration === gen) this.isSpeaking = false;
|
|
4044
|
+
}, grace);
|
|
4045
|
+
} else {
|
|
4046
|
+
this.isSpeaking = false;
|
|
4047
|
+
}
|
|
4048
|
+
}
|
|
2323
4049
|
llmLoop = null;
|
|
2324
4050
|
chunkCount = 0;
|
|
2325
4051
|
callEndFired = false;
|
|
@@ -2329,30 +4055,155 @@ var StreamHandler = class {
|
|
|
2329
4055
|
maxDurationTimer = null;
|
|
2330
4056
|
transcriptProcessing = false;
|
|
2331
4057
|
transcriptQueue = [];
|
|
2332
|
-
//
|
|
4058
|
+
// Throttle state for back-to-back STT finals — see ``commitTranscript``.
|
|
2333
4059
|
lastCommitText = "";
|
|
2334
4060
|
lastCommitAt = 0;
|
|
4061
|
+
// PCM16 byte-alignment carry for TTS streaming (pipeline mode).
|
|
4062
|
+
// HTTP streams from ElevenLabs / OpenAI / Cartesia can yield chunks of any
|
|
4063
|
+
// size, including odd byte counts. Silently dropping the trailing odd byte
|
|
4064
|
+
// misaligns every subsequent int16 sample in the stream (hi/lo bytes get
|
|
4065
|
+
// swapped), producing a voice drowned in loud hiss. We buffer the odd byte
|
|
4066
|
+
// across chunks so resample/mulaw encoding always sees aligned int16 frames.
|
|
4067
|
+
ttsByteCarry = null;
|
|
4068
|
+
// Per-session stateful resamplers eliminate chunk-boundary discontinuities.
|
|
4069
|
+
// Created lazily on first use; reset() on call end.
|
|
4070
|
+
inboundResampler = createResampler8kTo16k();
|
|
4071
|
+
outboundResampler = createResampler16kTo8k();
|
|
2335
4072
|
history;
|
|
2336
4073
|
metricsAcc;
|
|
4074
|
+
_eventBus;
|
|
2337
4075
|
constructor(deps, ws, caller, callee) {
|
|
2338
4076
|
this.deps = deps;
|
|
2339
4077
|
this.ws = ws;
|
|
2340
4078
|
this.caller = caller;
|
|
2341
4079
|
this.callee = callee;
|
|
2342
4080
|
this.history = createHistoryManager(200);
|
|
2343
|
-
const
|
|
2344
|
-
const
|
|
4081
|
+
const sttKey = deps.agent.stt?.constructor?.providerKey;
|
|
4082
|
+
const sttProviderName = deps.agent.stt ? sttKey ?? deps.agent.stt.constructor?.name ?? "custom" : void 0;
|
|
4083
|
+
const ttsKey = deps.agent.tts?.constructor?.providerKey;
|
|
4084
|
+
const ttsProviderName = deps.agent.tts ? ttsKey ?? deps.agent.tts.constructor?.name ?? "custom" : void 0;
|
|
2345
4085
|
const providerMode = deps.agent.provider ?? "openai_realtime";
|
|
4086
|
+
const llmKey = deps.agent.llm?.constructor?.providerKey;
|
|
4087
|
+
let llmProviderName;
|
|
4088
|
+
if (deps.agent.llm) {
|
|
4089
|
+
if (llmKey) {
|
|
4090
|
+
llmProviderName = llmKey;
|
|
4091
|
+
} else {
|
|
4092
|
+
const stripped = (deps.agent.llm.constructor?.name ?? "custom").replace(/LLMProvider$/i, "").replace(/LLM$/i, "").replace(/Provider$/i, "").toLowerCase();
|
|
4093
|
+
llmProviderName = stripped || "custom";
|
|
4094
|
+
}
|
|
4095
|
+
} else {
|
|
4096
|
+
llmProviderName = providerMode === "openai_realtime" ? "openai_realtime" : "openai";
|
|
4097
|
+
}
|
|
4098
|
+
this._eventBus = new EventBus();
|
|
2346
4099
|
this.metricsAcc = new CallMetricsAccumulator({
|
|
2347
4100
|
callId: "",
|
|
2348
4101
|
providerMode,
|
|
2349
4102
|
telephonyProvider: deps.bridge.telephonyProvider,
|
|
2350
4103
|
sttProvider: sttProviderName,
|
|
2351
4104
|
ttsProvider: ttsProviderName,
|
|
2352
|
-
|
|
4105
|
+
llmProvider: llmProviderName,
|
|
4106
|
+
pricing: deps.pricing,
|
|
4107
|
+
eventBus: this._eventBus,
|
|
4108
|
+
reportOnlyInitialTtfb: deps.reportOnlyInitialTtfb ?? false
|
|
2353
4109
|
});
|
|
2354
4110
|
getLogger().debug(`WebSocket connection opened (${deps.bridge.label})`);
|
|
2355
4111
|
}
|
|
4112
|
+
/**
|
|
4113
|
+
* Record a completed turn in the dashboard store and fire the user-supplied
|
|
4114
|
+
* ``onMetrics`` callback. Centralises the 4 emit sites (firstMessage, pipeline
|
|
4115
|
+
* streaming/regular LLM, WebSocket remote, Realtime response_done) so the
|
|
4116
|
+
* payload shape lives in one place.
|
|
4117
|
+
*/
|
|
4118
|
+
async emitTurnMetrics(turn) {
|
|
4119
|
+
if (turn == null) return;
|
|
4120
|
+
this.deps.metricsStore.recordTurn({ call_id: this.callId, turn });
|
|
4121
|
+
if (!this.deps.onMetrics) return;
|
|
4122
|
+
const turnMetrics = turn;
|
|
4123
|
+
const llm_ttft_ms = turnMetrics?.latency?.llm_ttft_ms;
|
|
4124
|
+
await this.deps.onMetrics({
|
|
4125
|
+
call_id: this.callId,
|
|
4126
|
+
turn,
|
|
4127
|
+
...llm_ttft_ms !== void 0 ? { llm_ttft_ms } : {},
|
|
4128
|
+
cost_so_far: this.metricsAcc.getCostSoFar()
|
|
4129
|
+
});
|
|
4130
|
+
}
|
|
4131
|
+
/** Reset the TTS odd-byte carry — call at every TTS stream entry/exit. */
|
|
4132
|
+
resetTtsCarry() {
|
|
4133
|
+
this.ttsByteCarry = null;
|
|
4134
|
+
}
|
|
4135
|
+
/**
|
|
4136
|
+
* Flush both stateful resamplers and any TTS byte carry on call close.
|
|
4137
|
+
* Emits tail bytes through the telephony bridge so the last ~20 ms of audio
|
|
4138
|
+
* is not silently clipped on hangup. No-op if the WebSocket is already gone.
|
|
4139
|
+
*/
|
|
4140
|
+
flushResamplers() {
|
|
4141
|
+
try {
|
|
4142
|
+
const inTail = this.inboundResampler.flush();
|
|
4143
|
+
if (inTail.length > 0 && this.stt) {
|
|
4144
|
+
this.stt.sendAudio(inTail);
|
|
4145
|
+
}
|
|
4146
|
+
} catch {
|
|
4147
|
+
}
|
|
4148
|
+
try {
|
|
4149
|
+
const outTail = this.outboundResampler.flush();
|
|
4150
|
+
if (outTail.length > 0 && this.ws.readyState === this.ws.OPEN) {
|
|
4151
|
+
const mulaw = pcm16ToMulaw(outTail);
|
|
4152
|
+
this.deps.bridge.sendAudio(this.ws, mulaw.toString("base64"), this.streamSid);
|
|
4153
|
+
}
|
|
4154
|
+
} catch {
|
|
4155
|
+
}
|
|
4156
|
+
this.ttsByteCarry = null;
|
|
4157
|
+
}
|
|
4158
|
+
/**
|
|
4159
|
+
* Start call recording when configured. Currently Twilio-only — bridges may
|
|
4160
|
+
* expose ``startRecording`` for parity when we add other carriers.
|
|
4161
|
+
*/
|
|
4162
|
+
async startRecordingIfRequested(callId) {
|
|
4163
|
+
const { recording, config } = this.deps;
|
|
4164
|
+
if (!recording || !config.twilioSid || !config.twilioToken || !callId) return;
|
|
4165
|
+
if (!validateTwilioSid(callId)) {
|
|
4166
|
+
getLogger().warn(`Recording skipped: invalid Twilio CallSid format ${JSON.stringify(callId)}`);
|
|
4167
|
+
return;
|
|
4168
|
+
}
|
|
4169
|
+
try {
|
|
4170
|
+
const recUrl = `https://api.twilio.com/2010-04-01/Accounts/${config.twilioSid}/Calls/${callId}/Recordings.json`;
|
|
4171
|
+
const recResp = await fetch(recUrl, {
|
|
4172
|
+
method: "POST",
|
|
4173
|
+
headers: {
|
|
4174
|
+
"Authorization": `Basic ${Buffer.from(`${config.twilioSid}:${config.twilioToken}`).toString("base64")}`
|
|
4175
|
+
}
|
|
4176
|
+
});
|
|
4177
|
+
if (recResp.ok) {
|
|
4178
|
+
getLogger().debug(`Recording started for ${callId}`);
|
|
4179
|
+
} else {
|
|
4180
|
+
getLogger().warn(`could not start recording: ${await recResp.text()}`);
|
|
4181
|
+
}
|
|
4182
|
+
} catch (e) {
|
|
4183
|
+
getLogger().warn(`could not start recording: ${String(e)}`);
|
|
4184
|
+
}
|
|
4185
|
+
}
|
|
4186
|
+
// ---------------------------------------------------------------------------
|
|
4187
|
+
// Public: observer API
|
|
4188
|
+
// ---------------------------------------------------------------------------
|
|
4189
|
+
/**
|
|
4190
|
+
* Subscribe to a Patter event on the per-call EventBus.
|
|
4191
|
+
*
|
|
4192
|
+
* The most common use-case is 'metrics_collected' — fired after every
|
|
4193
|
+
* completed turn with the TurnMetrics payload.
|
|
4194
|
+
*
|
|
4195
|
+
* Returns an unsubscribe function; call it to stop receiving events.
|
|
4196
|
+
*
|
|
4197
|
+
* @example
|
|
4198
|
+
* const off = handler.addObserver((payload) => {
|
|
4199
|
+
* console.log('turn metrics:', payload);
|
|
4200
|
+
* });
|
|
4201
|
+
* // later:
|
|
4202
|
+
* off();
|
|
4203
|
+
*/
|
|
4204
|
+
addObserver(cb, event = "metrics_collected") {
|
|
4205
|
+
return this._eventBus.on(event, cb);
|
|
4206
|
+
}
|
|
2356
4207
|
// ---------------------------------------------------------------------------
|
|
2357
4208
|
// Public: called by the provider-specific parsers in server.ts
|
|
2358
4209
|
// ---------------------------------------------------------------------------
|
|
@@ -2377,8 +4228,7 @@ var StreamHandler = class {
|
|
|
2377
4228
|
this.deps.metricsStore.recordCallStart({
|
|
2378
4229
|
call_id: callId,
|
|
2379
4230
|
caller: this.caller,
|
|
2380
|
-
callee: this.callee
|
|
2381
|
-
direction: "inbound"
|
|
4231
|
+
callee: this.callee
|
|
2382
4232
|
});
|
|
2383
4233
|
const MAX_CALL_DURATION_MS = 60 * 60 * 1e3;
|
|
2384
4234
|
this.maxDurationTimer = setTimeout(async () => {
|
|
@@ -2389,52 +4239,32 @@ var StreamHandler = class {
|
|
|
2389
4239
|
}
|
|
2390
4240
|
}, MAX_CALL_DURATION_MS);
|
|
2391
4241
|
try {
|
|
2392
|
-
const { notifyDashboard } = await import("./persistence-
|
|
4242
|
+
const { notifyDashboard } = await import("./persistence-LQBYQPQQ.mjs");
|
|
2393
4243
|
notifyDashboard({
|
|
2394
4244
|
call_id: callId,
|
|
2395
4245
|
caller: this.caller,
|
|
2396
|
-
callee: this.callee
|
|
2397
|
-
direction: "inbound"
|
|
4246
|
+
callee: this.callee
|
|
2398
4247
|
});
|
|
2399
4248
|
} catch {
|
|
2400
4249
|
}
|
|
2401
4250
|
if (this.deps.onCallStart) {
|
|
4251
|
+
const direction = this.deps.metricsStore.getActive(callId)?.direction ?? "inbound";
|
|
2402
4252
|
await this.deps.onCallStart({
|
|
2403
4253
|
call_id: callId,
|
|
2404
4254
|
caller: this.caller,
|
|
2405
4255
|
callee: this.callee,
|
|
2406
|
-
direction
|
|
4256
|
+
direction,
|
|
4257
|
+
telephony_provider: this.deps.bridge.telephonyProvider,
|
|
2407
4258
|
...Object.keys(customParams).length > 0 ? { custom_params: customParams } : {}
|
|
2408
4259
|
});
|
|
2409
4260
|
}
|
|
2410
|
-
|
|
2411
|
-
if (!validateTwilioSid(callId)) {
|
|
2412
|
-
getLogger().warn(`Recording skipped: invalid Twilio CallSid format ${JSON.stringify(callId)}`);
|
|
2413
|
-
} else {
|
|
2414
|
-
try {
|
|
2415
|
-
const recUrl = `https://api.twilio.com/2010-04-01/Accounts/${this.deps.config.twilioSid}/Calls/${callId}/Recordings.json`;
|
|
2416
|
-
const recResp = await fetch(recUrl, {
|
|
2417
|
-
method: "POST",
|
|
2418
|
-
headers: {
|
|
2419
|
-
"Authorization": `Basic ${Buffer.from(`${this.deps.config.twilioSid}:${this.deps.config.twilioToken}`).toString("base64")}`
|
|
2420
|
-
}
|
|
2421
|
-
});
|
|
2422
|
-
if (recResp.ok) {
|
|
2423
|
-
getLogger().debug(`Recording started for ${callId}`);
|
|
2424
|
-
} else {
|
|
2425
|
-
getLogger().warn(`could not start recording: ${await recResp.text()}`);
|
|
2426
|
-
}
|
|
2427
|
-
} catch (e) {
|
|
2428
|
-
getLogger().warn(`could not start recording: ${String(e)}`);
|
|
2429
|
-
}
|
|
2430
|
-
}
|
|
2431
|
-
}
|
|
4261
|
+
await this.startRecordingIfRequested(callId);
|
|
2432
4262
|
const agentVars = this.deps.sanitizeVariables(this.deps.agent.variables ?? {});
|
|
2433
4263
|
const safeCustomParams = this.deps.sanitizeVariables(customParams);
|
|
2434
4264
|
const allVars = { ...agentVars, ...safeCustomParams };
|
|
2435
4265
|
const resolvedPrompt = Object.keys(allVars).length > 0 ? this.deps.resolveVariables(this.deps.agent.systemPrompt, allVars) : this.deps.agent.systemPrompt;
|
|
2436
|
-
const
|
|
2437
|
-
if (
|
|
4266
|
+
const provider2 = this.deps.agent.provider ?? "openai_realtime";
|
|
4267
|
+
if (provider2 === "pipeline") {
|
|
2438
4268
|
await this.initPipeline(resolvedPrompt);
|
|
2439
4269
|
} else {
|
|
2440
4270
|
await this.initRealtimeAdapter(resolvedPrompt);
|
|
@@ -2446,13 +4276,56 @@ var StreamHandler = class {
|
|
|
2446
4276
|
}
|
|
2447
4277
|
/** Handle an incoming audio chunk (already decoded from base64). */
|
|
2448
4278
|
async handleAudio(audioBuffer) {
|
|
2449
|
-
const
|
|
2450
|
-
if (
|
|
2451
|
-
if (this.isSpeaking && (this.deps.agent.bargeInThresholdMs ?? 300) === 0) {
|
|
2452
|
-
return;
|
|
2453
|
-
}
|
|
4279
|
+
const provider2 = this.deps.agent.provider ?? "openai_realtime";
|
|
4280
|
+
if (provider2 === "pipeline" && this.stt) {
|
|
2454
4281
|
const pcm8k = mulawToPcm16(audioBuffer);
|
|
2455
|
-
const pcm16k =
|
|
4282
|
+
const pcm16k = this.inboundResampler.process(pcm8k);
|
|
4283
|
+
if (this.deps.agent.vad && !this.vadDisabled) {
|
|
4284
|
+
try {
|
|
4285
|
+
const vadPromise = this.deps.agent.vad.processFrame(pcm16k, 16e3);
|
|
4286
|
+
const timeoutPromise = new Promise((resolve) => setTimeout(() => resolve(null), 25));
|
|
4287
|
+
const evt = await Promise.race([vadPromise, timeoutPromise]);
|
|
4288
|
+
if (evt) {
|
|
4289
|
+
getLogger().info(
|
|
4290
|
+
`[VAD] ${evt.type} agentSpeaking=${this.isSpeaking}`
|
|
4291
|
+
);
|
|
4292
|
+
}
|
|
4293
|
+
if (evt?.type === "speech_start") {
|
|
4294
|
+
if (this.isSpeaking) {
|
|
4295
|
+
getLogger().info("[VAD] speech_start during TTS \u2192 BARGE-IN");
|
|
4296
|
+
this.metricsAcc.recordOverlapStart();
|
|
4297
|
+
this.metricsAcc.recordBargeinDetected();
|
|
4298
|
+
const bargeinSpan = startSpan(SPAN_BARGEIN, { "patter.call.id": this.callId });
|
|
4299
|
+
try {
|
|
4300
|
+
this.cancelSpeaking();
|
|
4301
|
+
try {
|
|
4302
|
+
this.deps.bridge.sendClear(this.ws, this.streamSid);
|
|
4303
|
+
} catch (err) {
|
|
4304
|
+
getLogger().debug(`sendClear during VAD barge-in failed: ${String(err)}`);
|
|
4305
|
+
}
|
|
4306
|
+
this.metricsAcc.recordTtsStopped();
|
|
4307
|
+
this.metricsAcc.recordTurnInterrupted();
|
|
4308
|
+
this.metricsAcc.recordOverlapEnd(true);
|
|
4309
|
+
} finally {
|
|
4310
|
+
try {
|
|
4311
|
+
bargeinSpan.end();
|
|
4312
|
+
} catch {
|
|
4313
|
+
}
|
|
4314
|
+
}
|
|
4315
|
+
}
|
|
4316
|
+
this.metricsAcc.startTurnIfIdle();
|
|
4317
|
+
} else if (evt?.type === "speech_end") {
|
|
4318
|
+
this.metricsAcc.recordVadStop();
|
|
4319
|
+
}
|
|
4320
|
+
} catch (err) {
|
|
4321
|
+
this.vadDisabled = true;
|
|
4322
|
+
getLogger().warn(`VAD processFrame failed \u2014 disabling VAD for this call: ${String(err)}`);
|
|
4323
|
+
}
|
|
4324
|
+
}
|
|
4325
|
+
if (this.isSpeaking) {
|
|
4326
|
+
if (this.deps.agent.vad) return;
|
|
4327
|
+
if ((this.deps.agent.bargeInThresholdMs ?? 300) === 0) return;
|
|
4328
|
+
}
|
|
2456
4329
|
const hooks = this.deps.agent.hooks;
|
|
2457
4330
|
if (hooks) {
|
|
2458
4331
|
const hookExecutor = new PipelineHookExecutor(hooks);
|
|
@@ -2460,13 +4333,15 @@ var StreamHandler = class {
|
|
|
2460
4333
|
const processed = await hookExecutor.runBeforeSendToStt(pcm16k, hookCtx);
|
|
2461
4334
|
if (processed === null) return;
|
|
2462
4335
|
this.stt.sendAudio(processed);
|
|
4336
|
+
this.metricsAcc.addSttAudioBytes(processed.length);
|
|
2463
4337
|
} else {
|
|
2464
4338
|
this.stt.sendAudio(pcm16k);
|
|
4339
|
+
this.metricsAcc.addSttAudioBytes(pcm16k.length);
|
|
2465
4340
|
}
|
|
2466
4341
|
} else if (this.adapter) {
|
|
2467
|
-
if (this.adapter instanceof ElevenLabsConvAIAdapter && this.deps.bridge.telephonyProvider === "twilio") {
|
|
4342
|
+
if (this.adapter instanceof ElevenLabsConvAIAdapter && this.deps.bridge.telephonyProvider === "twilio" && this.adapter.inputAudioFormat !== "ulaw_8000") {
|
|
2468
4343
|
const pcm8k = mulawToPcm16(audioBuffer);
|
|
2469
|
-
const pcm16k =
|
|
4344
|
+
const pcm16k = this.inboundResampler.process(pcm8k);
|
|
2470
4345
|
this.adapter.sendAudio(pcm16k);
|
|
2471
4346
|
} else {
|
|
2472
4347
|
this.adapter.sendAudio(audioBuffer);
|
|
@@ -2483,8 +4358,28 @@ var StreamHandler = class {
|
|
|
2483
4358
|
await this.deps.onTranscript({ role: "user", text: `[DTMF: ${digit}]`, call_id: this.callId });
|
|
2484
4359
|
}
|
|
2485
4360
|
}
|
|
4361
|
+
/**
|
|
4362
|
+
* Last mark name Twilio has confirmed playback of. Mirrors the Python
|
|
4363
|
+
* ``TwilioAudioSender.last_confirmed_mark`` field — barge-in heuristics
|
|
4364
|
+
* compare this against the latest sent mark to decide whether the agent's
|
|
4365
|
+
* audio has actually reached the caller yet.
|
|
4366
|
+
*/
|
|
4367
|
+
lastConfirmedMark = "";
|
|
4368
|
+
/**
|
|
4369
|
+
* Handle a Twilio ``mark`` event acknowledging that a previously sent
|
|
4370
|
+
* audio chunk has been played out. Mirrors Python's
|
|
4371
|
+
* ``twilio_handler.py``: ``audio_sender.on_mark_confirmed(mark_name)`` +
|
|
4372
|
+
* ``handler.on_mark(mark_name)``.
|
|
4373
|
+
*/
|
|
4374
|
+
async onMark(markName) {
|
|
4375
|
+
if (markName) {
|
|
4376
|
+
this.lastConfirmedMark = markName;
|
|
4377
|
+
}
|
|
4378
|
+
}
|
|
2486
4379
|
/** Handle call stop / stream end. */
|
|
2487
4380
|
async handleStop() {
|
|
4381
|
+
this.clearGraceTimer();
|
|
4382
|
+
this.flushResamplers();
|
|
2488
4383
|
await this.closeSttOnce();
|
|
2489
4384
|
try {
|
|
2490
4385
|
this.adapter?.close();
|
|
@@ -2494,6 +4389,8 @@ var StreamHandler = class {
|
|
|
2494
4389
|
}
|
|
2495
4390
|
/** Handle WebSocket close event. */
|
|
2496
4391
|
async handleWsClose() {
|
|
4392
|
+
this.clearGraceTimer();
|
|
4393
|
+
this.flushResamplers();
|
|
2497
4394
|
await this.closeSttOnce();
|
|
2498
4395
|
try {
|
|
2499
4396
|
this.adapter?.close();
|
|
@@ -2519,15 +4416,33 @@ var StreamHandler = class {
|
|
|
2519
4416
|
// ---------------------------------------------------------------------------
|
|
2520
4417
|
/**
|
|
2521
4418
|
* Encode a PCM 16kHz audio chunk for the telephony provider.
|
|
2522
|
-
*
|
|
4419
|
+
*
|
|
4420
|
+
* Both Twilio and Telnyx negotiate PCMU (mulaw) 8 kHz on the bidirectional
|
|
4421
|
+
* media stream — Twilio always, and Telnyx because ``streaming_start``
|
|
4422
|
+
* (server.ts) requests ``stream_bidirectional_codec=PCMU`` at 8 kHz. So
|
|
4423
|
+
* the wire format for both providers is mulaw 8 kHz; we resample 16 kHz
|
|
4424
|
+
* PCM16 → 8 kHz then encode to mulaw. Mirrors the Python pipeline path
|
|
4425
|
+
* (sdk-py/getpatter/handlers/telnyx_handler.py::TelnyxAudioSender).
|
|
4426
|
+
*
|
|
4427
|
+
* Maintains a 1-byte carry across calls so unaligned HTTP chunks from
|
|
4428
|
+
* streaming TTS providers never byte-swap the PCM16 samples downstream.
|
|
2523
4429
|
*/
|
|
2524
4430
|
encodePipelineAudio(pcm16k) {
|
|
2525
|
-
|
|
2526
|
-
|
|
2527
|
-
|
|
2528
|
-
|
|
2529
|
-
|
|
2530
|
-
|
|
4431
|
+
const aligned = this.alignPcm16(pcm16k);
|
|
4432
|
+
if (aligned.length === 0) return "";
|
|
4433
|
+
const pcm8k = this.outboundResampler.process(aligned);
|
|
4434
|
+
const mulaw = pcm16ToMulaw(pcm8k);
|
|
4435
|
+
return mulaw.toString("base64");
|
|
4436
|
+
}
|
|
4437
|
+
/**
|
|
4438
|
+
* Prepend any carry byte from the previous chunk, return the even-length
|
|
4439
|
+
* portion, and stash the final odd byte (if any) for the next call.
|
|
4440
|
+
*/
|
|
4441
|
+
alignPcm16(chunk) {
|
|
4442
|
+
const combined = this.ttsByteCarry ? Buffer.concat([this.ttsByteCarry, chunk]) : chunk;
|
|
4443
|
+
const alignedLen = combined.length & ~1;
|
|
4444
|
+
this.ttsByteCarry = alignedLen < combined.length ? combined.subarray(alignedLen) : null;
|
|
4445
|
+
return combined.subarray(0, alignedLen);
|
|
2531
4446
|
}
|
|
2532
4447
|
// ---------------------------------------------------------------------------
|
|
2533
4448
|
// Private: Pipeline mode
|
|
@@ -2556,6 +4471,7 @@ var StreamHandler = class {
|
|
|
2556
4471
|
if (this.deps.agent.firstMessage && !this.deps.onMessage && this.tts) {
|
|
2557
4472
|
this.metricsAcc.startTurn();
|
|
2558
4473
|
let firstChunkSent = false;
|
|
4474
|
+
this.resetTtsCarry();
|
|
2559
4475
|
try {
|
|
2560
4476
|
for await (const chunk of this.tts.synthesizeStream(this.deps.agent.firstMessage)) {
|
|
2561
4477
|
if (!firstChunkSent) {
|
|
@@ -2567,13 +4483,11 @@ var StreamHandler = class {
|
|
|
2567
4483
|
}
|
|
2568
4484
|
} catch (e) {
|
|
2569
4485
|
getLogger().error(`First message TTS error (${label}):`, e);
|
|
4486
|
+
} finally {
|
|
4487
|
+
this.resetTtsCarry();
|
|
2570
4488
|
}
|
|
2571
4489
|
if (firstChunkSent) {
|
|
2572
|
-
|
|
2573
|
-
if (turn) {
|
|
2574
|
-
this.deps.metricsStore.recordTurn({ call_id: this.callId, turn });
|
|
2575
|
-
if (this.deps.onMetrics) await this.deps.onMetrics({ call_id: this.callId, turn });
|
|
2576
|
-
}
|
|
4490
|
+
await this.emitTurnMetrics(this.metricsAcc.recordTurnComplete(this.deps.agent.firstMessage));
|
|
2577
4491
|
this.history.push({ role: "assistant", text: this.deps.agent.firstMessage, timestamp: Date.now() });
|
|
2578
4492
|
}
|
|
2579
4493
|
}
|
|
@@ -2583,15 +4497,17 @@ var StreamHandler = class {
|
|
|
2583
4497
|
"Cannot pass both agent({ llm }) and serve({ onMessage }). Pick one \u2014 `llm` for built-in LLMs, `onMessage` for custom logic."
|
|
2584
4498
|
);
|
|
2585
4499
|
}
|
|
4500
|
+
const providerModel = this.deps.agent.llm?.model ?? "";
|
|
2586
4501
|
this.llmLoop = new LLMLoop(
|
|
2587
4502
|
"",
|
|
2588
4503
|
// apiKey unused when llmProvider is supplied
|
|
2589
|
-
|
|
2590
|
-
//
|
|
4504
|
+
providerModel,
|
|
4505
|
+
// propagate so calculateLlmCost can match the price row
|
|
2591
4506
|
resolvedPrompt,
|
|
2592
4507
|
this.deps.agent.tools,
|
|
2593
4508
|
this.deps.agent.llm
|
|
2594
4509
|
);
|
|
4510
|
+
this.llmLoop.setEventBus(this._eventBus);
|
|
2595
4511
|
const llmLabel = this.deps.agent.llm.constructor?.name ?? "custom";
|
|
2596
4512
|
getLogger().debug(`Built-in LLM loop active (pipeline, ${label}, llm=${llmLabel})`);
|
|
2597
4513
|
} else if (!this.deps.onMessage && this.deps.config.openaiKey) {
|
|
@@ -2603,6 +4519,7 @@ var StreamHandler = class {
|
|
|
2603
4519
|
resolvedPrompt,
|
|
2604
4520
|
this.deps.agent.tools
|
|
2605
4521
|
);
|
|
4522
|
+
this.llmLoop.setEventBus(this._eventBus);
|
|
2606
4523
|
getLogger().debug(`Built-in LLM loop active (pipeline, ${label})`);
|
|
2607
4524
|
}
|
|
2608
4525
|
if (this.stt) {
|
|
@@ -2632,6 +4549,7 @@ var StreamHandler = class {
|
|
|
2632
4549
|
}
|
|
2633
4550
|
const processedText = await hookExecutor.runBeforeSynthesize(transformed, hookCtx);
|
|
2634
4551
|
if (processedText === null) return;
|
|
4552
|
+
this.resetTtsCarry();
|
|
2635
4553
|
try {
|
|
2636
4554
|
for await (const chunk of this.tts.synthesizeStream(processedText)) {
|
|
2637
4555
|
if (!this.isSpeaking) break;
|
|
@@ -2646,6 +4564,8 @@ var StreamHandler = class {
|
|
|
2646
4564
|
}
|
|
2647
4565
|
} catch (e) {
|
|
2648
4566
|
getLogger().error(`TTS streaming error (${this.deps.bridge.label}):`, e);
|
|
4567
|
+
} finally {
|
|
4568
|
+
this.resetTtsCarry();
|
|
2649
4569
|
}
|
|
2650
4570
|
}
|
|
2651
4571
|
/** Handle a final transcript from STT in pipeline mode. */
|
|
@@ -2663,63 +4583,30 @@ var StreamHandler = class {
|
|
|
2663
4583
|
}
|
|
2664
4584
|
}
|
|
2665
4585
|
async processTranscript(transcript) {
|
|
2666
|
-
|
|
2667
|
-
|
|
2668
|
-
|
|
2669
|
-
);
|
|
2670
|
-
this.isSpeaking = false;
|
|
2671
|
-
try {
|
|
2672
|
-
this.deps.bridge.sendClear(this.ws, this.streamSid);
|
|
2673
|
-
} catch (err) {
|
|
2674
|
-
getLogger().debug(`sendClear during barge-in failed: ${String(err)}`);
|
|
2675
|
-
}
|
|
2676
|
-
this.metricsAcc.recordTurnInterrupted();
|
|
4586
|
+
let interrupted = this.handleBargeIn(transcript);
|
|
4587
|
+
if (transcript.text) {
|
|
4588
|
+
this.metricsAcc.startTurnIfIdle();
|
|
2677
4589
|
}
|
|
2678
|
-
if (
|
|
2679
|
-
|
|
2680
|
-
const normalised = transcript.text.trim().toLowerCase();
|
|
2681
|
-
const stripped = normalised.replace(/[.,!?;: ]+$/, "").trim();
|
|
2682
|
-
const sinceLastMs = now - this.lastCommitAt;
|
|
2683
|
-
const HALLUCINATIONS = /* @__PURE__ */ new Set([
|
|
2684
|
-
"you",
|
|
2685
|
-
"thank you",
|
|
2686
|
-
"thanks",
|
|
2687
|
-
"yeah",
|
|
2688
|
-
"yes",
|
|
2689
|
-
"no",
|
|
2690
|
-
"okay",
|
|
2691
|
-
"ok",
|
|
2692
|
-
"uh",
|
|
2693
|
-
"um",
|
|
2694
|
-
"mmm",
|
|
2695
|
-
"hmm",
|
|
2696
|
-
".",
|
|
2697
|
-
"bye",
|
|
2698
|
-
"right",
|
|
2699
|
-
"cool"
|
|
2700
|
-
]);
|
|
2701
|
-
if (HALLUCINATIONS.has(stripped) || stripped === "") {
|
|
2702
|
-
getLogger().debug(`Dropped likely STT hallucination: ${sanitizeLogValue(normalised.slice(0, 40))}`);
|
|
2703
|
-
return;
|
|
4590
|
+
if (transcript.speechFinal) {
|
|
4591
|
+
this.metricsAcc.recordVadStop();
|
|
2704
4592
|
}
|
|
2705
|
-
if (
|
|
2706
|
-
|
|
2707
|
-
`Dropped duplicate final transcript (${(sinceLastMs / 1e3).toFixed(1)}s since last): ${sanitizeLogValue(normalised.slice(0, 40))}`
|
|
2708
|
-
);
|
|
2709
|
-
return;
|
|
2710
|
-
}
|
|
2711
|
-
if (sinceLastMs < 500) {
|
|
2712
|
-
getLogger().debug(
|
|
2713
|
-
`Dropped back-to-back final transcript (${(sinceLastMs / 1e3).toFixed(2)}s since last): ${sanitizeLogValue(normalised.slice(0, 40))}`
|
|
2714
|
-
);
|
|
2715
|
-
return;
|
|
2716
|
-
}
|
|
2717
|
-
this.lastCommitText = normalised;
|
|
2718
|
-
this.lastCommitAt = now;
|
|
4593
|
+
if (!transcript.isFinal || !transcript.text) return;
|
|
4594
|
+
if (!this.commitTranscript(transcript.text)) return;
|
|
2719
4595
|
const label = this.deps.bridge.label;
|
|
2720
4596
|
getLogger().debug(`User (${label} pipeline): ${sanitizeLogValue(transcript.text)}`);
|
|
2721
|
-
this.metricsAcc.
|
|
4597
|
+
this.metricsAcc.startTurnIfIdle();
|
|
2722
4598
|
this.metricsAcc.recordSttComplete(transcript.text);
|
|
4599
|
+
this.metricsAcc.recordSttFinalTimestamp();
|
|
4600
|
+
const endpointSpan = startSpan(SPAN_ENDPOINT, { "patter.call.id": this.callId });
|
|
4601
|
+
let endpointSpanClosed = false;
|
|
4602
|
+
const closeEndpointSpan = () => {
|
|
4603
|
+
if (endpointSpanClosed) return;
|
|
4604
|
+
endpointSpanClosed = true;
|
|
4605
|
+
try {
|
|
4606
|
+
endpointSpan.end();
|
|
4607
|
+
} catch {
|
|
4608
|
+
}
|
|
4609
|
+
};
|
|
2723
4610
|
if (this.deps.onTranscript) {
|
|
2724
4611
|
await this.deps.onTranscript({
|
|
2725
4612
|
role: "user",
|
|
@@ -2734,10 +4621,14 @@ var StreamHandler = class {
|
|
|
2734
4621
|
if (filteredTranscript === null) {
|
|
2735
4622
|
getLogger().debug(`afterTranscribe hook vetoed turn (${label})`);
|
|
2736
4623
|
this.metricsAcc.recordTurnInterrupted();
|
|
4624
|
+
closeEndpointSpan();
|
|
2737
4625
|
return;
|
|
2738
4626
|
}
|
|
2739
4627
|
this.history.push({ role: "user", text: filteredTranscript, timestamp: Date.now() });
|
|
2740
4628
|
let responseText = "";
|
|
4629
|
+
this.metricsAcc.recordOnUserTurnCompletedDelay(0);
|
|
4630
|
+
this.metricsAcc.recordTurnCommitted();
|
|
4631
|
+
closeEndpointSpan();
|
|
2741
4632
|
if (this.deps.onMessage && typeof this.deps.onMessage === "function") {
|
|
2742
4633
|
try {
|
|
2743
4634
|
responseText = await this.deps.onMessage({
|
|
@@ -2767,104 +4658,203 @@ var StreamHandler = class {
|
|
|
2767
4658
|
if (isWebSocketUrl(this.deps.onMessage)) {
|
|
2768
4659
|
await this.handleWebSocketResponse(msgData);
|
|
2769
4660
|
return;
|
|
2770
|
-
} else {
|
|
2771
|
-
try {
|
|
2772
|
-
responseText = await this.deps.remoteHandler.callWebhook(this.deps.onMessage, msgData);
|
|
2773
|
-
} catch (e) {
|
|
2774
|
-
getLogger().error(`Webhook remote error (${label}):`, e);
|
|
2775
|
-
return;
|
|
2776
|
-
}
|
|
2777
4661
|
}
|
|
2778
|
-
} else if (this.llmLoop) {
|
|
2779
|
-
const callCtx = { call_id: this.callId, caller: this.caller, callee: this.callee };
|
|
2780
|
-
const chunker = new SentenceChunker();
|
|
2781
|
-
const allParts = [];
|
|
2782
|
-
const ttsFirstByteSent = { value: false };
|
|
2783
|
-
this.isSpeaking = true;
|
|
2784
|
-
let llmError = false;
|
|
2785
4662
|
try {
|
|
2786
|
-
|
|
2787
|
-
|
|
2788
|
-
|
|
2789
|
-
|
|
2790
|
-
for (const sentence of sentences) {
|
|
2791
|
-
if (!this.isSpeaking) break;
|
|
2792
|
-
const guard = checkGuardrails(sentence, this.deps.agent.guardrails);
|
|
2793
|
-
const sentenceText = guard ? guard.replacement ?? "I'm sorry, I can't respond to that." : sentence;
|
|
2794
|
-
await this.synthesizeSentence(sentenceText, hookExecutor, hookCtx, ttsFirstByteSent);
|
|
2795
|
-
}
|
|
2796
|
-
if (!this.isSpeaking) break;
|
|
2797
|
-
}
|
|
2798
|
-
} catch (e) {
|
|
2799
|
-
llmError = true;
|
|
2800
|
-
chunker.reset();
|
|
2801
|
-
getLogger().error(`LLM loop error (${label}):`, e);
|
|
2802
|
-
}
|
|
2803
|
-
this.metricsAcc.recordLlmComplete();
|
|
2804
|
-
if (!llmError && this.isSpeaking) {
|
|
2805
|
-
for (const sentence of chunker.flush()) {
|
|
2806
|
-
if (!this.isSpeaking) break;
|
|
2807
|
-
const guard = checkGuardrails(sentence, this.deps.agent.guardrails);
|
|
2808
|
-
const sentenceText = guard ? guard.replacement ?? "I'm sorry, I can't respond to that." : sentence;
|
|
2809
|
-
await this.synthesizeSentence(sentenceText, hookExecutor, hookCtx, ttsFirstByteSent);
|
|
2810
|
-
}
|
|
2811
|
-
}
|
|
2812
|
-
} finally {
|
|
2813
|
-
this.isSpeaking = false;
|
|
4663
|
+
responseText = await this.deps.remoteHandler.callWebhook(this.deps.onMessage, msgData);
|
|
4664
|
+
} catch (e) {
|
|
4665
|
+
getLogger().error(`Webhook remote error (${label}):`, e);
|
|
4666
|
+
return;
|
|
2814
4667
|
}
|
|
2815
|
-
|
|
4668
|
+
} else if (this.llmLoop) {
|
|
4669
|
+
responseText = await this.runPipelineLlm(filteredTranscript, hookExecutor, hookCtx);
|
|
2816
4670
|
} else {
|
|
2817
4671
|
return;
|
|
2818
4672
|
}
|
|
2819
4673
|
if (!responseText) return;
|
|
2820
|
-
if (
|
|
2821
|
-
const guard = checkGuardrails(responseText, this.deps.agent.guardrails);
|
|
2822
|
-
if (guard) {
|
|
2823
|
-
getLogger().debug(`Guardrail '${guard.name}' triggered (pipeline)`);
|
|
2824
|
-
responseText = guard.replacement ?? "I'm sorry, I can't respond to that.";
|
|
2825
|
-
}
|
|
2826
|
-
this.metricsAcc.recordLlmComplete();
|
|
4674
|
+
if (this.llmLoop) {
|
|
2827
4675
|
this.history.push({ role: "assistant", text: responseText, timestamp: Date.now() });
|
|
2828
|
-
|
|
2829
|
-
|
|
2830
|
-
|
|
2831
|
-
|
|
2832
|
-
|
|
4676
|
+
this.metricsAcc.recordTtsComplete(responseText);
|
|
4677
|
+
} else {
|
|
4678
|
+
interrupted = await this.runRegularLlm(responseText, hookExecutor, hookCtx) || interrupted;
|
|
4679
|
+
responseText = this.history.entries[this.history.entries.length - 1]?.text ?? responseText;
|
|
4680
|
+
}
|
|
4681
|
+
if (!interrupted) {
|
|
4682
|
+
await this.emitTurnMetrics(this.metricsAcc.recordTurnComplete(responseText));
|
|
4683
|
+
}
|
|
4684
|
+
}
|
|
4685
|
+
/**
|
|
4686
|
+
* Barge-in: caller spoke over in-flight TTS. Flip ``isSpeaking`` so the
|
|
4687
|
+
* sentence loop exits on its next check, clear downstream audio buffers,
|
|
4688
|
+
* record the interruption, and return ``true`` so the caller skips the
|
|
4689
|
+
* turn-complete record.
|
|
4690
|
+
*/
|
|
4691
|
+
handleBargeIn(transcript) {
|
|
4692
|
+
if (!transcript.text || !this.isSpeaking) return false;
|
|
4693
|
+
getLogger().debug(
|
|
4694
|
+
`Barge-in: caller spoke over agent (${sanitizeLogValue(transcript.text.slice(0, 40))})`
|
|
4695
|
+
);
|
|
4696
|
+
this.metricsAcc.recordOverlapStart();
|
|
4697
|
+
this.metricsAcc.recordBargeinDetected();
|
|
4698
|
+
const bargeinSpan = startSpan(SPAN_BARGEIN, { "patter.call.id": this.callId });
|
|
4699
|
+
try {
|
|
4700
|
+
this.cancelSpeaking();
|
|
4701
|
+
try {
|
|
4702
|
+
this.deps.bridge.sendClear(this.ws, this.streamSid);
|
|
4703
|
+
} catch (err) {
|
|
4704
|
+
getLogger().debug(`sendClear during barge-in failed: ${String(err)}`);
|
|
4705
|
+
}
|
|
4706
|
+
this.metricsAcc.recordTtsStopped();
|
|
4707
|
+
this.metricsAcc.recordTurnInterrupted();
|
|
4708
|
+
this.metricsAcc.recordOverlapEnd(true);
|
|
4709
|
+
} finally {
|
|
2833
4710
|
try {
|
|
2834
|
-
|
|
2835
|
-
|
|
2836
|
-
|
|
2837
|
-
|
|
4711
|
+
bargeinSpan.end();
|
|
4712
|
+
} catch {
|
|
4713
|
+
}
|
|
4714
|
+
}
|
|
4715
|
+
return true;
|
|
4716
|
+
}
|
|
4717
|
+
/**
|
|
4718
|
+
* Dedup + throttle + hallucination filter for final STT transcripts.
|
|
4719
|
+
* Mirrors ``PipelineStreamHandler._stt_loop`` on the Python side.
|
|
4720
|
+
* Returns ``true`` when the transcript should be committed to a turn,
|
|
4721
|
+
* ``false`` when it must be dropped. Drop reasons:
|
|
4722
|
+
* - text matches common short hallucinations ("you", "thanks", ...)
|
|
4723
|
+
* - duplicate final within 2 s of previous commit
|
|
4724
|
+
* - back-to-back finals under 500 ms (too tight to be real utterances)
|
|
4725
|
+
*/
|
|
4726
|
+
commitTranscript(text) {
|
|
4727
|
+
const now = Date.now();
|
|
4728
|
+
const normalised = text.trim().toLowerCase();
|
|
4729
|
+
const stripped = normalised.replace(/[.,!?;: ]+$/, "").trim();
|
|
4730
|
+
const sinceLastMs = now - this.lastCommitAt;
|
|
4731
|
+
if (HALLUCINATIONS.has(stripped) || stripped === "") {
|
|
4732
|
+
getLogger().debug(`Dropped likely STT hallucination: ${sanitizeLogValue(normalised.slice(0, 40))}`);
|
|
4733
|
+
return false;
|
|
4734
|
+
}
|
|
4735
|
+
if (sinceLastMs < 2e3 && normalised === this.lastCommitText) {
|
|
4736
|
+
getLogger().debug(
|
|
4737
|
+
`Dropped duplicate final transcript (${(sinceLastMs / 1e3).toFixed(1)}s since last): ${sanitizeLogValue(normalised.slice(0, 40))}`
|
|
4738
|
+
);
|
|
4739
|
+
return false;
|
|
4740
|
+
}
|
|
4741
|
+
if (sinceLastMs < 500) {
|
|
4742
|
+
getLogger().debug(
|
|
4743
|
+
`Dropped back-to-back final transcript (${(sinceLastMs / 1e3).toFixed(2)}s since last): ${sanitizeLogValue(normalised.slice(0, 40))}`
|
|
4744
|
+
);
|
|
4745
|
+
return false;
|
|
4746
|
+
}
|
|
4747
|
+
this.lastCommitText = normalised;
|
|
4748
|
+
this.lastCommitAt = now;
|
|
4749
|
+
return true;
|
|
4750
|
+
}
|
|
4751
|
+
/**
|
|
4752
|
+
* Streaming built-in LLM path with sentence chunking and per-sentence
|
|
4753
|
+
* guardrails/TTS. Returns the concatenated response text.
|
|
4754
|
+
*/
|
|
4755
|
+
async runPipelineLlm(filteredTranscript, hookExecutor, hookCtx) {
|
|
4756
|
+
const label = this.deps.bridge.label;
|
|
4757
|
+
const callCtx = { call_id: this.callId, caller: this.caller, callee: this.callee };
|
|
4758
|
+
const chunker = new SentenceChunker();
|
|
4759
|
+
const allParts = [];
|
|
4760
|
+
const ttsFirstByteSent = { value: false };
|
|
4761
|
+
this.beginSpeaking();
|
|
4762
|
+
let llmError = false;
|
|
4763
|
+
const llmSpan = startSpan(SPAN_LLM, { "patter.call.id": this.callId });
|
|
4764
|
+
const guardAndSpeak = async (sentence, isFirst) => {
|
|
4765
|
+
if (isFirst) this.metricsAcc.recordLlmFirstSentenceComplete();
|
|
4766
|
+
const guard = checkGuardrails(sentence, this.deps.agent.guardrails);
|
|
4767
|
+
const sentenceText = guard ? guard.replacement ?? "I'm sorry, I can't respond to that." : sentence;
|
|
4768
|
+
await this.synthesizeSentence(sentenceText, hookExecutor, hookCtx, ttsFirstByteSent);
|
|
4769
|
+
};
|
|
4770
|
+
let firstSentenceEmitted = false;
|
|
4771
|
+
try {
|
|
4772
|
+
try {
|
|
4773
|
+
for await (const token of this.llmLoop.run(
|
|
4774
|
+
filteredTranscript,
|
|
4775
|
+
this.history.entries,
|
|
4776
|
+
callCtx,
|
|
4777
|
+
this.metricsAcc,
|
|
4778
|
+
hookExecutor,
|
|
4779
|
+
hookCtx
|
|
4780
|
+
)) {
|
|
4781
|
+
this.metricsAcc.recordLlmFirstToken();
|
|
4782
|
+
allParts.push(token);
|
|
4783
|
+
for (const sentence of chunker.push(token)) {
|
|
4784
|
+
if (!this.isSpeaking) break;
|
|
4785
|
+
await guardAndSpeak(sentence, !firstSentenceEmitted);
|
|
4786
|
+
firstSentenceEmitted = true;
|
|
2838
4787
|
}
|
|
2839
|
-
|
|
4788
|
+
if (!this.isSpeaking) break;
|
|
2840
4789
|
}
|
|
2841
|
-
}
|
|
2842
|
-
|
|
4790
|
+
} catch (e) {
|
|
4791
|
+
llmError = true;
|
|
4792
|
+
chunker.reset();
|
|
4793
|
+
getLogger().error(`LLM loop error (${label}):`, e);
|
|
4794
|
+
this.metricsAcc.recordTurnInterrupted();
|
|
2843
4795
|
}
|
|
2844
|
-
|
|
2845
|
-
|
|
4796
|
+
this.metricsAcc.recordLlmComplete();
|
|
4797
|
+
if (!llmError && this.isSpeaking) {
|
|
4798
|
+
for (const sentence of chunker.flush()) {
|
|
4799
|
+
if (!this.isSpeaking) break;
|
|
4800
|
+
await guardAndSpeak(sentence, !firstSentenceEmitted);
|
|
4801
|
+
firstSentenceEmitted = true;
|
|
4802
|
+
}
|
|
2846
4803
|
}
|
|
2847
|
-
}
|
|
2848
|
-
this.
|
|
2849
|
-
|
|
4804
|
+
} finally {
|
|
4805
|
+
this.endSpeakingWithGrace();
|
|
4806
|
+
try {
|
|
4807
|
+
llmSpan.end();
|
|
4808
|
+
} catch {
|
|
4809
|
+
}
|
|
4810
|
+
}
|
|
4811
|
+
return allParts.join("");
|
|
4812
|
+
}
|
|
4813
|
+
/**
|
|
4814
|
+
* Non-streaming path (onMessage function / webhook): apply output guardrails,
|
|
4815
|
+
* push to history, sentence-chunk the text, synthesize. Returns ``true`` if
|
|
4816
|
+
* TTS was interrupted mid-flight so the caller can skip turn-complete.
|
|
4817
|
+
*/
|
|
4818
|
+
async runRegularLlm(responseText, hookExecutor, hookCtx) {
|
|
4819
|
+
const guard = checkGuardrails(responseText, this.deps.agent.guardrails);
|
|
4820
|
+
let text = responseText;
|
|
4821
|
+
if (guard) {
|
|
4822
|
+
getLogger().debug(`Guardrail '${guard.name}' triggered (pipeline)`);
|
|
4823
|
+
text = guard.replacement ?? "I'm sorry, I can't respond to that.";
|
|
2850
4824
|
}
|
|
2851
|
-
|
|
2852
|
-
|
|
2853
|
-
|
|
2854
|
-
|
|
4825
|
+
this.metricsAcc.recordLlmComplete();
|
|
4826
|
+
this.history.push({ role: "assistant", text, timestamp: Date.now() });
|
|
4827
|
+
const chunker = new SentenceChunker();
|
|
4828
|
+
const sentences = [...chunker.push(text), ...chunker.flush()];
|
|
4829
|
+
const ttsFirstByteSent = { value: false };
|
|
4830
|
+
this.beginSpeaking();
|
|
4831
|
+
let interrupted = false;
|
|
4832
|
+
try {
|
|
4833
|
+
for (const sentence of sentences) {
|
|
4834
|
+
if (!this.isSpeaking) {
|
|
4835
|
+
interrupted = true;
|
|
4836
|
+
break;
|
|
4837
|
+
}
|
|
4838
|
+
await this.synthesizeSentence(sentence, hookExecutor, hookCtx, ttsFirstByteSent);
|
|
4839
|
+
}
|
|
4840
|
+
} finally {
|
|
4841
|
+
this.endSpeakingWithGrace();
|
|
2855
4842
|
}
|
|
4843
|
+
if (!interrupted) this.metricsAcc.recordTtsComplete(text);
|
|
4844
|
+
return interrupted;
|
|
2856
4845
|
}
|
|
2857
4846
|
/** Handle streaming WebSocket remote response with TTS. */
|
|
2858
4847
|
async handleWebSocketResponse(msgData) {
|
|
2859
4848
|
const onMessage = this.deps.onMessage;
|
|
2860
4849
|
const parts = [];
|
|
2861
4850
|
this.metricsAcc.recordLlmComplete();
|
|
2862
|
-
this.
|
|
4851
|
+
this.beginSpeaking();
|
|
2863
4852
|
let wsTtsStarted = false;
|
|
2864
4853
|
try {
|
|
2865
4854
|
for await (const chunk of this.deps.remoteHandler.callWebSocket(onMessage, msgData)) {
|
|
2866
4855
|
parts.push(chunk);
|
|
2867
4856
|
if (this.tts) {
|
|
4857
|
+
this.resetTtsCarry();
|
|
2868
4858
|
for await (const audioChunk of this.tts.synthesizeStream(chunk)) {
|
|
2869
4859
|
if (!this.isSpeaking) break;
|
|
2870
4860
|
if (!wsTtsStarted) {
|
|
@@ -2879,15 +4869,12 @@ var StreamHandler = class {
|
|
|
2879
4869
|
} catch (e) {
|
|
2880
4870
|
getLogger().error(`WebSocket remote error (${this.deps.bridge.label}):`, e);
|
|
2881
4871
|
} finally {
|
|
2882
|
-
this.
|
|
4872
|
+
this.endSpeakingWithGrace();
|
|
4873
|
+
this.resetTtsCarry();
|
|
2883
4874
|
}
|
|
2884
4875
|
const responseText = parts.join("");
|
|
2885
4876
|
this.metricsAcc.recordTtsComplete(responseText);
|
|
2886
|
-
|
|
2887
|
-
if (turn) {
|
|
2888
|
-
this.deps.metricsStore.recordTurn({ call_id: this.callId, turn });
|
|
2889
|
-
if (this.deps.onMetrics) await this.deps.onMetrics({ call_id: this.callId, turn });
|
|
2890
|
-
}
|
|
4877
|
+
await this.emitTurnMetrics(this.metricsAcc.recordTurnComplete(responseText));
|
|
2891
4878
|
if (responseText) this.history.push({ role: "assistant", text: responseText, timestamp: Date.now() });
|
|
2892
4879
|
}
|
|
2893
4880
|
// ---------------------------------------------------------------------------
|
|
@@ -2922,86 +4909,92 @@ var StreamHandler = class {
|
|
|
2922
4909
|
});
|
|
2923
4910
|
}
|
|
2924
4911
|
async handleAdapterEvent(type, eventData) {
|
|
2925
|
-
|
|
2926
|
-
|
|
2927
|
-
|
|
2928
|
-
|
|
2929
|
-
|
|
2930
|
-
|
|
2931
|
-
|
|
2932
|
-
|
|
2933
|
-
|
|
2934
|
-
|
|
2935
|
-
|
|
2936
|
-
|
|
2937
|
-
|
|
2938
|
-
|
|
2939
|
-
|
|
2940
|
-
this.deps.bridge.sendMark(this.ws, `audio_${this.chunkCount}`, this.streamSid);
|
|
2941
|
-
} else if (type === "transcript_input") {
|
|
2942
|
-
const inputText = eventData;
|
|
2943
|
-
getLogger().debug(`User (${this.deps.bridge.label}): ${sanitizeLogValue(inputText)}`);
|
|
2944
|
-
this.history.push({ role: "user", text: inputText, timestamp: Date.now() });
|
|
2945
|
-
this.metricsAcc.startTurn();
|
|
2946
|
-
this.currentAgentText = "";
|
|
2947
|
-
this.responseAudioStarted = false;
|
|
2948
|
-
if (this.deps.onTranscript) {
|
|
2949
|
-
await this.deps.onTranscript({
|
|
2950
|
-
role: "user",
|
|
2951
|
-
text: inputText,
|
|
2952
|
-
call_id: this.callId,
|
|
2953
|
-
history: [...this.history.entries]
|
|
2954
|
-
});
|
|
2955
|
-
}
|
|
2956
|
-
} else if (type === "transcript_output") {
|
|
2957
|
-
const outputText = eventData;
|
|
2958
|
-
if (outputText) {
|
|
2959
|
-
const triggered = checkGuardrails(outputText, this.deps.agent.guardrails);
|
|
2960
|
-
if (triggered) {
|
|
2961
|
-
getLogger().debug(`Guardrail '${triggered.name}' triggered`);
|
|
2962
|
-
if (this.adapter instanceof OpenAIRealtimeAdapter) {
|
|
2963
|
-
this.adapter.cancelResponse();
|
|
2964
|
-
await this.adapter.sendText(triggered.replacement ?? "I'm sorry, I can't respond to that.");
|
|
2965
|
-
}
|
|
2966
|
-
}
|
|
2967
|
-
this.currentAgentText += outputText;
|
|
2968
|
-
}
|
|
2969
|
-
} else if (type === "response_done") {
|
|
2970
|
-
const responseData = eventData;
|
|
2971
|
-
if (responseData) {
|
|
2972
|
-
const usage = responseData.usage;
|
|
2973
|
-
if (usage) {
|
|
2974
|
-
this.metricsAcc.recordRealtimeUsage(usage);
|
|
2975
|
-
}
|
|
2976
|
-
}
|
|
2977
|
-
if (this.currentAgentText) {
|
|
2978
|
-
this.history.push({ role: "assistant", text: this.currentAgentText, timestamp: Date.now() });
|
|
2979
|
-
const turn = this.metricsAcc.recordTurnComplete(this.currentAgentText);
|
|
2980
|
-
this.responseAudioStarted = false;
|
|
2981
|
-
if (this.deps.onMetrics) {
|
|
2982
|
-
await this.deps.onMetrics({
|
|
2983
|
-
call_id: this.callId,
|
|
2984
|
-
turn
|
|
2985
|
-
});
|
|
2986
|
-
}
|
|
2987
|
-
this.deps.metricsStore.recordTurn({ call_id: this.callId, turn });
|
|
2988
|
-
this.currentAgentText = "";
|
|
2989
|
-
} else {
|
|
2990
|
-
this.metricsAcc.recordTurnInterrupted();
|
|
2991
|
-
this.responseAudioStarted = false;
|
|
4912
|
+
const handler = this.adapterEventHandlers[type];
|
|
4913
|
+
if (handler) await handler(eventData);
|
|
4914
|
+
}
|
|
4915
|
+
/** Event-type → handler dispatch table for the Realtime adapter. */
|
|
4916
|
+
adapterEventHandlers = {
|
|
4917
|
+
audio: async (eventData) => this.onAdapterAudio(eventData),
|
|
4918
|
+
speech_stopped: async () => this.onAdapterSpeechStopped(),
|
|
4919
|
+
transcript_input: async (eventData) => this.onAdapterTranscriptInput(eventData),
|
|
4920
|
+
transcript_output: async (eventData) => this.onAdapterTranscriptOutput(eventData),
|
|
4921
|
+
response_done: async (eventData) => this.onAdapterResponseDone(eventData),
|
|
4922
|
+
speech_started: async () => this.onAdapterSpeechInterrupt(),
|
|
4923
|
+
interruption: async () => this.onAdapterSpeechInterrupt(),
|
|
4924
|
+
function_call: async (eventData) => {
|
|
4925
|
+
if (this.adapter instanceof OpenAIRealtimeAdapter) {
|
|
4926
|
+
await this.handleFunctionCall(eventData);
|
|
2992
4927
|
}
|
|
2993
|
-
}
|
|
2994
|
-
|
|
4928
|
+
}
|
|
4929
|
+
};
|
|
4930
|
+
async onAdapterAudio(eventData) {
|
|
4931
|
+
if (!this.responseAudioStarted) {
|
|
4932
|
+
this.responseAudioStarted = true;
|
|
4933
|
+
if (this.metricsAcc.turnActive === false) this.metricsAcc.startTurn();
|
|
4934
|
+
this.metricsAcc.recordTtsFirstByte();
|
|
4935
|
+
}
|
|
4936
|
+
const outAudio = eventData;
|
|
4937
|
+
this.deps.bridge.sendAudio(this.ws, outAudio.toString("base64"), this.streamSid);
|
|
4938
|
+
this.chunkCount++;
|
|
4939
|
+
this.deps.bridge.sendMark(this.ws, `audio_${this.chunkCount}`, this.streamSid);
|
|
4940
|
+
}
|
|
4941
|
+
onAdapterSpeechStopped() {
|
|
4942
|
+
if (!this.metricsAcc.turnActive) this.metricsAcc.startTurn();
|
|
4943
|
+
this.currentAgentText = "";
|
|
4944
|
+
this.responseAudioStarted = false;
|
|
4945
|
+
}
|
|
4946
|
+
async onAdapterTranscriptInput(inputText) {
|
|
4947
|
+
getLogger().debug(`User (${this.deps.bridge.label}): ${sanitizeLogValue(inputText)}`);
|
|
4948
|
+
this.history.push({ role: "user", text: inputText, timestamp: Date.now() });
|
|
4949
|
+
if (!this.metricsAcc.turnActive) {
|
|
4950
|
+
this.metricsAcc.startTurn();
|
|
4951
|
+
this.currentAgentText = "";
|
|
4952
|
+
this.responseAudioStarted = false;
|
|
4953
|
+
}
|
|
4954
|
+
this.metricsAcc.recordSttComplete(inputText);
|
|
4955
|
+
if (this.deps.onTranscript) {
|
|
4956
|
+
await this.deps.onTranscript({
|
|
4957
|
+
role: "user",
|
|
4958
|
+
text: inputText,
|
|
4959
|
+
call_id: this.callId,
|
|
4960
|
+
history: [...this.history.entries]
|
|
4961
|
+
});
|
|
4962
|
+
}
|
|
4963
|
+
}
|
|
4964
|
+
async onAdapterTranscriptOutput(outputText) {
|
|
4965
|
+
if (!outputText) return;
|
|
4966
|
+
const triggered = checkGuardrails(outputText, this.deps.agent.guardrails);
|
|
4967
|
+
if (triggered) {
|
|
4968
|
+
getLogger().debug(`Guardrail '${triggered.name}' triggered`);
|
|
2995
4969
|
if (this.adapter instanceof OpenAIRealtimeAdapter) {
|
|
2996
4970
|
this.adapter.cancelResponse();
|
|
4971
|
+
await this.adapter.sendText(triggered.replacement ?? "I'm sorry, I can't respond to that.");
|
|
2997
4972
|
}
|
|
2998
|
-
|
|
4973
|
+
}
|
|
4974
|
+
this.currentAgentText += outputText;
|
|
4975
|
+
}
|
|
4976
|
+
async onAdapterResponseDone(responseData) {
|
|
4977
|
+
if (responseData) {
|
|
4978
|
+
const usage = responseData.usage;
|
|
4979
|
+
if (usage) this.metricsAcc.recordRealtimeUsage(usage);
|
|
4980
|
+
}
|
|
4981
|
+
if (this.currentAgentText) {
|
|
4982
|
+
this.history.push({ role: "assistant", text: this.currentAgentText, timestamp: Date.now() });
|
|
4983
|
+
this.responseAudioStarted = false;
|
|
4984
|
+
await this.emitTurnMetrics(this.metricsAcc.recordTurnComplete(this.currentAgentText));
|
|
2999
4985
|
this.currentAgentText = "";
|
|
4986
|
+
} else {
|
|
4987
|
+
this.metricsAcc.recordTurnInterrupted();
|
|
3000
4988
|
this.responseAudioStarted = false;
|
|
3001
|
-
} else if (type === "function_call" && this.adapter instanceof OpenAIRealtimeAdapter) {
|
|
3002
|
-
await this.handleFunctionCall(eventData);
|
|
3003
4989
|
}
|
|
3004
4990
|
}
|
|
4991
|
+
onAdapterSpeechInterrupt() {
|
|
4992
|
+
this.deps.bridge.sendClear(this.ws, this.streamSid);
|
|
4993
|
+
if (this.adapter instanceof OpenAIRealtimeAdapter) this.adapter.cancelResponse();
|
|
4994
|
+
this.metricsAcc.recordTurnInterrupted();
|
|
4995
|
+
this.currentAgentText = "";
|
|
4996
|
+
this.responseAudioStarted = false;
|
|
4997
|
+
}
|
|
3005
4998
|
async handleFunctionCall(fc) {
|
|
3006
4999
|
const adapter = this.adapter;
|
|
3007
5000
|
if (fc.name === "transfer_call") {
|
|
@@ -3095,7 +5088,7 @@ var StreamHandler = class {
|
|
|
3095
5088
|
finalMetrics
|
|
3096
5089
|
);
|
|
3097
5090
|
try {
|
|
3098
|
-
const { notifyDashboard } = await import("./persistence-
|
|
5091
|
+
const { notifyDashboard } = await import("./persistence-LQBYQPQQ.mjs");
|
|
3099
5092
|
notifyDashboard(callEndData);
|
|
3100
5093
|
} catch {
|
|
3101
5094
|
}
|
|
@@ -3135,6 +5128,279 @@ async function queryDeepgramCost(metricsAcc, deepgramKey, deepgramRequestId) {
|
|
|
3135
5128
|
}
|
|
3136
5129
|
}
|
|
3137
5130
|
|
|
5131
|
+
// src/services/call-log.ts
|
|
5132
|
+
import * as crypto3 from "crypto";
|
|
5133
|
+
import * as fs2 from "fs";
|
|
5134
|
+
import { promises as fsp } from "fs";
|
|
5135
|
+
import * as os from "os";
|
|
5136
|
+
import * as path2 from "path";
|
|
5137
|
+
var SCHEMA_VERSION = "1.0";
|
|
5138
|
+
var DEFAULT_RETENTION_DAYS = 30;
|
|
5139
|
+
function xdgDataHome() {
|
|
5140
|
+
return process.env.XDG_DATA_HOME || path2.join(os.homedir(), ".local", "share");
|
|
5141
|
+
}
|
|
5142
|
+
function platformDefaultRoot() {
|
|
5143
|
+
if (process.platform === "darwin") {
|
|
5144
|
+
return path2.join(os.homedir(), "Library", "Application Support", "patter");
|
|
5145
|
+
}
|
|
5146
|
+
if (process.platform === "win32") {
|
|
5147
|
+
const localAppData = process.env.LOCALAPPDATA;
|
|
5148
|
+
if (localAppData) return path2.join(localAppData, "patter");
|
|
5149
|
+
return path2.join(os.homedir(), "AppData", "Local", "patter");
|
|
5150
|
+
}
|
|
5151
|
+
return path2.join(xdgDataHome(), "patter");
|
|
5152
|
+
}
|
|
5153
|
+
function resolveLogRoot(explicit) {
|
|
5154
|
+
const value = explicit ?? process.env.PATTER_LOG_DIR;
|
|
5155
|
+
if (!value) return null;
|
|
5156
|
+
if (value.trim().toLowerCase() === "auto") return platformDefaultRoot();
|
|
5157
|
+
if (value.startsWith("~")) return path2.join(os.homedir(), value.slice(1));
|
|
5158
|
+
return value;
|
|
5159
|
+
}
|
|
5160
|
+
function retentionDays() {
|
|
5161
|
+
const raw = process.env.PATTER_LOG_RETENTION_DAYS;
|
|
5162
|
+
if (raw === void 0) return DEFAULT_RETENTION_DAYS;
|
|
5163
|
+
const parsed = Number.parseInt(raw, 10);
|
|
5164
|
+
if (Number.isNaN(parsed)) return DEFAULT_RETENTION_DAYS;
|
|
5165
|
+
return Math.max(0, parsed);
|
|
5166
|
+
}
|
|
5167
|
+
function redactMode() {
|
|
5168
|
+
const raw = (process.env.PATTER_LOG_REDACT_PHONE || "mask").trim().toLowerCase();
|
|
5169
|
+
if (raw === "full" || raw === "mask" || raw === "hash_only") return raw;
|
|
5170
|
+
return "mask";
|
|
5171
|
+
}
|
|
5172
|
+
function redactPhone(raw) {
|
|
5173
|
+
if (!raw) return "";
|
|
5174
|
+
const mode = redactMode();
|
|
5175
|
+
if (mode === "full") return raw;
|
|
5176
|
+
if (mode === "hash_only") {
|
|
5177
|
+
return "sha256:" + crypto3.createHash("sha256").update(raw, "utf8").digest("hex").slice(0, 16);
|
|
5178
|
+
}
|
|
5179
|
+
return maskPhoneNumber(raw);
|
|
5180
|
+
}
|
|
5181
|
+
function utcIso(tsSeconds) {
|
|
5182
|
+
const ms = tsSeconds !== void 0 ? tsSeconds * 1e3 : Date.now();
|
|
5183
|
+
return new Date(ms).toISOString();
|
|
5184
|
+
}
|
|
5185
|
+
async function atomicWriteJson(filePath, payload) {
|
|
5186
|
+
const dir = path2.dirname(filePath);
|
|
5187
|
+
await fsp.mkdir(dir, { recursive: true });
|
|
5188
|
+
const tmp = path2.join(dir, `.tmp.${process.pid}.${crypto3.randomBytes(4).toString("hex")}.json`);
|
|
5189
|
+
try {
|
|
5190
|
+
const handle = await fsp.open(tmp, "w");
|
|
5191
|
+
try {
|
|
5192
|
+
await handle.writeFile(JSON.stringify(payload, null, 2) + "\n", { encoding: "utf8" });
|
|
5193
|
+
await handle.sync();
|
|
5194
|
+
} finally {
|
|
5195
|
+
await handle.close();
|
|
5196
|
+
}
|
|
5197
|
+
await fsp.rename(tmp, filePath);
|
|
5198
|
+
} catch (err) {
|
|
5199
|
+
try {
|
|
5200
|
+
await fsp.unlink(tmp);
|
|
5201
|
+
} catch {
|
|
5202
|
+
}
|
|
5203
|
+
throw err;
|
|
5204
|
+
}
|
|
5205
|
+
}
|
|
5206
|
+
async function appendJsonl(filePath, record) {
|
|
5207
|
+
await fsp.mkdir(path2.dirname(filePath), { recursive: true });
|
|
5208
|
+
await fsp.appendFile(filePath, JSON.stringify(record) + "\n", { encoding: "utf8" });
|
|
5209
|
+
}
|
|
5210
|
+
var CallLogger = class {
|
|
5211
|
+
root;
|
|
5212
|
+
constructor(root) {
|
|
5213
|
+
if (!root) {
|
|
5214
|
+
this.root = null;
|
|
5215
|
+
return;
|
|
5216
|
+
}
|
|
5217
|
+
const resolved = root.startsWith("~") ? path2.join(os.homedir(), root.slice(1)) : root;
|
|
5218
|
+
try {
|
|
5219
|
+
fs2.mkdirSync(resolved, { recursive: true });
|
|
5220
|
+
this.root = resolved;
|
|
5221
|
+
getLogger().info(`Call logs: ${resolved}`);
|
|
5222
|
+
} catch (err) {
|
|
5223
|
+
getLogger().warn(
|
|
5224
|
+
`Could not create call log root ${resolved}: ${sanitizeLogValue(String(err))}`
|
|
5225
|
+
);
|
|
5226
|
+
this.root = null;
|
|
5227
|
+
}
|
|
5228
|
+
}
|
|
5229
|
+
get enabled() {
|
|
5230
|
+
return this.root !== null;
|
|
5231
|
+
}
|
|
5232
|
+
callDir(callId, startedAtSeconds) {
|
|
5233
|
+
if (this.root === null) return null;
|
|
5234
|
+
const ms = startedAtSeconds !== void 0 ? startedAtSeconds * 1e3 : Date.now();
|
|
5235
|
+
const dt = new Date(ms);
|
|
5236
|
+
const year = String(dt.getUTCFullYear()).padStart(4, "0");
|
|
5237
|
+
const month = String(dt.getUTCMonth() + 1).padStart(2, "0");
|
|
5238
|
+
const day = String(dt.getUTCDate()).padStart(2, "0");
|
|
5239
|
+
const safeId = sanitizeLogValue(callId, 64).replace(/\//g, "_") || "unknown";
|
|
5240
|
+
return path2.join(this.root, "calls", year, month, day, safeId);
|
|
5241
|
+
}
|
|
5242
|
+
async logCallStart(callId, input = {}) {
|
|
5243
|
+
if (!this.enabled) return;
|
|
5244
|
+
const startedAt = Date.now() / 1e3;
|
|
5245
|
+
const dir = this.callDir(callId, startedAt);
|
|
5246
|
+
if (dir === null) return;
|
|
5247
|
+
const metadata = {
|
|
5248
|
+
schema_version: SCHEMA_VERSION,
|
|
5249
|
+
call_id: callId,
|
|
5250
|
+
trace_id: input.traceId ?? null,
|
|
5251
|
+
started_at: utcIso(startedAt),
|
|
5252
|
+
ended_at: null,
|
|
5253
|
+
duration_ms: null,
|
|
5254
|
+
status: "in_progress",
|
|
5255
|
+
caller: redactPhone(input.caller ?? ""),
|
|
5256
|
+
callee: redactPhone(input.callee ?? ""),
|
|
5257
|
+
telephony_provider: input.telephonyProvider ?? "",
|
|
5258
|
+
provider_mode: input.providerMode ?? "",
|
|
5259
|
+
agent: input.agent ?? {},
|
|
5260
|
+
turns: 0,
|
|
5261
|
+
cost: null,
|
|
5262
|
+
latency: null,
|
|
5263
|
+
error: null
|
|
5264
|
+
};
|
|
5265
|
+
try {
|
|
5266
|
+
await atomicWriteJson(path2.join(dir, "metadata.json"), metadata);
|
|
5267
|
+
} catch (err) {
|
|
5268
|
+
getLogger().warn(`call_log write failed (${sanitizeLogValue(callId)}): ${sanitizeLogValue(String(err))}`);
|
|
5269
|
+
}
|
|
5270
|
+
if (crypto3.randomBytes(1)[0] < 5) {
|
|
5271
|
+
this.sweepOldDays();
|
|
5272
|
+
}
|
|
5273
|
+
}
|
|
5274
|
+
async logTurn(callId, turn) {
|
|
5275
|
+
if (!this.enabled) return;
|
|
5276
|
+
const dir = this.callDir(callId);
|
|
5277
|
+
if (dir === null) return;
|
|
5278
|
+
const record = {
|
|
5279
|
+
schema_version: SCHEMA_VERSION,
|
|
5280
|
+
ts: utcIso(typeof turn.timestamp === "number" ? turn.timestamp : void 0),
|
|
5281
|
+
...turn
|
|
5282
|
+
};
|
|
5283
|
+
try {
|
|
5284
|
+
await appendJsonl(path2.join(dir, "transcript.jsonl"), record);
|
|
5285
|
+
} catch (err) {
|
|
5286
|
+
getLogger().warn(
|
|
5287
|
+
`call_log turn write failed (${sanitizeLogValue(callId)}): ${sanitizeLogValue(String(err))}`
|
|
5288
|
+
);
|
|
5289
|
+
}
|
|
5290
|
+
}
|
|
5291
|
+
async logEvent(callId, eventType, payload = {}) {
|
|
5292
|
+
if (!this.enabled) return;
|
|
5293
|
+
const dir = this.callDir(callId);
|
|
5294
|
+
if (dir === null) return;
|
|
5295
|
+
const record = {
|
|
5296
|
+
schema_version: SCHEMA_VERSION,
|
|
5297
|
+
ts: utcIso(),
|
|
5298
|
+
type: eventType,
|
|
5299
|
+
data: payload
|
|
5300
|
+
};
|
|
5301
|
+
try {
|
|
5302
|
+
await appendJsonl(path2.join(dir, "events.jsonl"), record);
|
|
5303
|
+
} catch (err) {
|
|
5304
|
+
getLogger().warn(
|
|
5305
|
+
`call_log event write failed (${sanitizeLogValue(callId)}): ${sanitizeLogValue(String(err))}`
|
|
5306
|
+
);
|
|
5307
|
+
}
|
|
5308
|
+
}
|
|
5309
|
+
async logCallEnd(callId, input = {}) {
|
|
5310
|
+
if (!this.enabled) return;
|
|
5311
|
+
const dir = this.callDir(callId);
|
|
5312
|
+
if (dir === null) return;
|
|
5313
|
+
const metadataPath = path2.join(dir, "metadata.json");
|
|
5314
|
+
let existing = {};
|
|
5315
|
+
try {
|
|
5316
|
+
existing = JSON.parse(await fsp.readFile(metadataPath, "utf8"));
|
|
5317
|
+
} catch {
|
|
5318
|
+
existing = {
|
|
5319
|
+
schema_version: SCHEMA_VERSION,
|
|
5320
|
+
call_id: callId,
|
|
5321
|
+
started_at: null
|
|
5322
|
+
};
|
|
5323
|
+
}
|
|
5324
|
+
const merged = {
|
|
5325
|
+
...existing,
|
|
5326
|
+
ended_at: utcIso(),
|
|
5327
|
+
duration_ms: input.durationSeconds !== void 0 ? Math.round(input.durationSeconds * 1e3 * 10) / 10 : null,
|
|
5328
|
+
status: input.status ?? "completed",
|
|
5329
|
+
turns: input.turns ?? null,
|
|
5330
|
+
cost: input.cost ?? null,
|
|
5331
|
+
latency: input.latency ?? null,
|
|
5332
|
+
error: input.error ?? null
|
|
5333
|
+
};
|
|
5334
|
+
try {
|
|
5335
|
+
await atomicWriteJson(metadataPath, merged);
|
|
5336
|
+
} catch (err) {
|
|
5337
|
+
getLogger().warn(
|
|
5338
|
+
`call_log finalize failed (${sanitizeLogValue(callId)}): ${sanitizeLogValue(String(err))}`
|
|
5339
|
+
);
|
|
5340
|
+
}
|
|
5341
|
+
}
|
|
5342
|
+
// --- Retention ---------------------------------------------------------
|
|
5343
|
+
sweepOldDays() {
|
|
5344
|
+
if (this.root === null) return;
|
|
5345
|
+
const days = retentionDays();
|
|
5346
|
+
if (days === 0) return;
|
|
5347
|
+
const cutoff = Date.now() / 1e3 - days * 86400;
|
|
5348
|
+
const callsRoot = path2.join(this.root, "calls");
|
|
5349
|
+
if (!fs2.existsSync(callsRoot)) return;
|
|
5350
|
+
try {
|
|
5351
|
+
for (const yearName of fs2.readdirSync(callsRoot)) {
|
|
5352
|
+
if (!/^\d+$/.test(yearName)) continue;
|
|
5353
|
+
const yearDir = path2.join(callsRoot, yearName);
|
|
5354
|
+
if (!fs2.statSync(yearDir).isDirectory()) continue;
|
|
5355
|
+
for (const monthName of fs2.readdirSync(yearDir)) {
|
|
5356
|
+
if (!/^\d+$/.test(monthName)) continue;
|
|
5357
|
+
const monthDir = path2.join(yearDir, monthName);
|
|
5358
|
+
if (!fs2.statSync(monthDir).isDirectory()) continue;
|
|
5359
|
+
for (const dayName of fs2.readdirSync(monthDir)) {
|
|
5360
|
+
if (!/^\d+$/.test(dayName)) continue;
|
|
5361
|
+
const dayDir = path2.join(monthDir, dayName);
|
|
5362
|
+
const y = Number.parseInt(yearName, 10);
|
|
5363
|
+
const m = Number.parseInt(monthName, 10);
|
|
5364
|
+
const d = Number.parseInt(dayName, 10);
|
|
5365
|
+
const ts = Date.UTC(y, m - 1, d) / 1e3;
|
|
5366
|
+
if (ts < cutoff) {
|
|
5367
|
+
rmTree(dayDir);
|
|
5368
|
+
}
|
|
5369
|
+
}
|
|
5370
|
+
try {
|
|
5371
|
+
if (fs2.readdirSync(monthDir).length === 0) fs2.rmdirSync(monthDir);
|
|
5372
|
+
} catch {
|
|
5373
|
+
}
|
|
5374
|
+
}
|
|
5375
|
+
try {
|
|
5376
|
+
if (fs2.readdirSync(yearDir).length === 0) fs2.rmdirSync(yearDir);
|
|
5377
|
+
} catch {
|
|
5378
|
+
}
|
|
5379
|
+
}
|
|
5380
|
+
} catch (err) {
|
|
5381
|
+
getLogger().debug(`call_log sweep failed: ${sanitizeLogValue(String(err))}`);
|
|
5382
|
+
}
|
|
5383
|
+
}
|
|
5384
|
+
};
|
|
5385
|
+
function rmTree(target) {
|
|
5386
|
+
try {
|
|
5387
|
+
for (const child of fs2.readdirSync(target)) {
|
|
5388
|
+
const childPath = path2.join(target, child);
|
|
5389
|
+
const stat = fs2.lstatSync(childPath);
|
|
5390
|
+
if (stat.isDirectory()) {
|
|
5391
|
+
rmTree(childPath);
|
|
5392
|
+
} else {
|
|
5393
|
+
try {
|
|
5394
|
+
fs2.unlinkSync(childPath);
|
|
5395
|
+
} catch {
|
|
5396
|
+
}
|
|
5397
|
+
}
|
|
5398
|
+
}
|
|
5399
|
+
fs2.rmdirSync(target);
|
|
5400
|
+
} catch {
|
|
5401
|
+
}
|
|
5402
|
+
}
|
|
5403
|
+
|
|
3138
5404
|
// src/server.ts
|
|
3139
5405
|
var TRANSFER_CALL_TOOL = {
|
|
3140
5406
|
name: "transfer_call",
|
|
@@ -3171,37 +5437,76 @@ function validateWebhookUrl(url) {
|
|
|
3171
5437
|
if (!["http:", "https:"].includes(parsed.protocol)) {
|
|
3172
5438
|
throw new Error(`Invalid webhook URL scheme: ${parsed.protocol}`);
|
|
3173
5439
|
}
|
|
3174
|
-
const
|
|
3175
|
-
const
|
|
3176
|
-
|
|
3177
|
-
|
|
3178
|
-
|
|
3179
|
-
|
|
3180
|
-
|
|
3181
|
-
|
|
3182
|
-
|
|
3183
|
-
|
|
3184
|
-
|
|
3185
|
-
|
|
3186
|
-
|
|
3187
|
-
|
|
5440
|
+
const rawHost = parsed.hostname;
|
|
5441
|
+
const host = rawHost.replace(/^\[/, "").replace(/\]$/, "").toLowerCase();
|
|
5442
|
+
const BLOCKED_HOSTNAMES = /* @__PURE__ */ new Set([
|
|
5443
|
+
"localhost",
|
|
5444
|
+
"ip6-localhost",
|
|
5445
|
+
"ip6-loopback",
|
|
5446
|
+
"metadata",
|
|
5447
|
+
"metadata.google.internal",
|
|
5448
|
+
"metadata.azure.com"
|
|
5449
|
+
]);
|
|
5450
|
+
if (BLOCKED_HOSTNAMES.has(host)) {
|
|
5451
|
+
throw new Error(`Webhook URL blocked: ${rawHost} is a private/internal address`);
|
|
5452
|
+
}
|
|
5453
|
+
const IPV4_RE = /^(\d{1,3})\.(\d{1,3})\.(\d{1,3})\.(\d{1,3})$/;
|
|
5454
|
+
const v4 = IPV4_RE.exec(host);
|
|
5455
|
+
if (v4) {
|
|
5456
|
+
const oct = v4.slice(1, 5).map((s) => parseInt(s, 10));
|
|
5457
|
+
if (oct.some((n) => n < 0 || n > 255)) {
|
|
5458
|
+
throw new Error(`Webhook URL blocked: ${rawHost} is not a valid IPv4 address`);
|
|
5459
|
+
}
|
|
5460
|
+
const [a, b] = oct;
|
|
5461
|
+
if (a === 0 || // 0.0.0.0/8 (any 0.x)
|
|
5462
|
+
a === 10 || // 10.0.0.0/8
|
|
5463
|
+
a === 127 || // 127.0.0.0/8 loopback
|
|
5464
|
+
a === 169 && b === 254 || // 169.254.0.0/16 link-local
|
|
5465
|
+
a === 172 && b >= 16 && b <= 31 || // 172.16.0.0/12
|
|
5466
|
+
a === 192 && b === 168) {
|
|
5467
|
+
throw new Error(`Webhook URL blocked: ${rawHost} is a private/internal address`);
|
|
5468
|
+
}
|
|
5469
|
+
return;
|
|
5470
|
+
}
|
|
5471
|
+
if (host.includes(":")) {
|
|
5472
|
+
if (host === "::1" || host === "::") {
|
|
5473
|
+
throw new Error(`Webhook URL blocked: ${rawHost} is a private/internal address`);
|
|
5474
|
+
}
|
|
5475
|
+
if (/^fc[0-9a-f]{0,2}:/.test(host) || /^fd[0-9a-f]{0,2}:/.test(host)) {
|
|
5476
|
+
throw new Error(`Webhook URL blocked: ${rawHost} is a private/internal address`);
|
|
5477
|
+
}
|
|
5478
|
+
if (/^fe[89ab][0-9a-f]?:/.test(host)) {
|
|
5479
|
+
throw new Error(`Webhook URL blocked: ${rawHost} is a private/internal address`);
|
|
5480
|
+
}
|
|
3188
5481
|
}
|
|
3189
5482
|
}
|
|
3190
5483
|
function validateTelnyxSignature(rawBody, signature, timestamp, publicKey, toleranceSec = 300) {
|
|
3191
5484
|
try {
|
|
3192
5485
|
const ts = parseInt(timestamp, 10);
|
|
3193
5486
|
if (!Number.isFinite(ts)) return false;
|
|
3194
|
-
const
|
|
5487
|
+
const tsMs = ts < 1e12 ? ts * 1e3 : ts;
|
|
5488
|
+
const ageMs = Date.now() - tsMs;
|
|
3195
5489
|
if (ageMs < 0 || ageMs > toleranceSec * 1e3) return false;
|
|
3196
5490
|
const payload = `${timestamp}|${rawBody}`;
|
|
3197
5491
|
const keyBuffer = Buffer.from(publicKey, "base64");
|
|
3198
|
-
const
|
|
3199
|
-
const keyObject = crypto3.createPublicKey({
|
|
5492
|
+
const keyObject = crypto4.createPublicKey({
|
|
3200
5493
|
key: keyBuffer,
|
|
3201
5494
|
format: "der",
|
|
3202
5495
|
type: "spki"
|
|
3203
5496
|
});
|
|
3204
|
-
|
|
5497
|
+
for (const rawSig of signature.split(",")) {
|
|
5498
|
+
const trimmed = rawSig.trim();
|
|
5499
|
+
if (!trimmed) continue;
|
|
5500
|
+
try {
|
|
5501
|
+
const sigBuffer = Buffer.from(trimmed, "base64");
|
|
5502
|
+
if (crypto4.verify(null, Buffer.from(payload), keyObject, sigBuffer)) {
|
|
5503
|
+
return true;
|
|
5504
|
+
}
|
|
5505
|
+
} catch {
|
|
5506
|
+
continue;
|
|
5507
|
+
}
|
|
5508
|
+
}
|
|
5509
|
+
return false;
|
|
3205
5510
|
} catch {
|
|
3206
5511
|
return false;
|
|
3207
5512
|
}
|
|
@@ -3211,9 +5516,12 @@ function validateTwilioSid(sid, prefix = "CA") {
|
|
|
3211
5516
|
}
|
|
3212
5517
|
function validateTwilioSignature(url, params, signature, authToken) {
|
|
3213
5518
|
const data = url + Object.keys(params).sort().reduce((acc, key) => acc + key + (params[key] ?? ""), "");
|
|
3214
|
-
const expected =
|
|
5519
|
+
const expected = crypto4.createHmac("sha1", authToken).update(data).digest("base64");
|
|
3215
5520
|
try {
|
|
3216
|
-
|
|
5521
|
+
const sigBuf = Buffer.from(signature);
|
|
5522
|
+
const expBuf = Buffer.from(expected);
|
|
5523
|
+
if (sigBuf.length !== expBuf.length) return false;
|
|
5524
|
+
return crypto4.timingSafeEqual(sigBuf, expBuf);
|
|
3217
5525
|
} catch {
|
|
3218
5526
|
return false;
|
|
3219
5527
|
}
|
|
@@ -3246,9 +5554,7 @@ function buildAIAdapter(config, agent, resolvedPrompt) {
|
|
|
3246
5554
|
return new ElevenLabsConvAIAdapter(
|
|
3247
5555
|
engine.apiKey,
|
|
3248
5556
|
engine.agentId,
|
|
3249
|
-
agent.voice ?? "
|
|
3250
|
-
"eleven_turbo_v2_5",
|
|
3251
|
-
agent.language ?? "en",
|
|
5557
|
+
agent.voice ?? "EXAVITQu4vr4xnSDxMaL",
|
|
3252
5558
|
agent.firstMessage ?? ""
|
|
3253
5559
|
);
|
|
3254
5560
|
}
|
|
@@ -3353,7 +5659,7 @@ function isValidTelnyxTransferTarget(target) {
|
|
|
3353
5659
|
if (/^\+[1-9]\d{6,14}$/.test(target)) return true;
|
|
3354
5660
|
return /^sips?:[^\s@]+(@[^\s]+)?$/i.test(target);
|
|
3355
5661
|
}
|
|
3356
|
-
var TELNYX_DTMF_ALLOWED = new Set("0123456789*#
|
|
5662
|
+
var TELNYX_DTMF_ALLOWED = new Set("0123456789*#ABCDabcdwW");
|
|
3357
5663
|
var TELNYX_DTMF_DURATION_MS = 250;
|
|
3358
5664
|
async function sleep(ms) {
|
|
3359
5665
|
if (ms <= 0) return;
|
|
@@ -3379,7 +5685,7 @@ var TelnyxBridge = class {
|
|
|
3379
5685
|
return;
|
|
3380
5686
|
}
|
|
3381
5687
|
const telnyxKey = this.config.telnyxKey ?? "";
|
|
3382
|
-
await fetch(`https://api.telnyx.com/v2/calls/${callId}/actions/transfer`, {
|
|
5688
|
+
await fetch(`https://api.telnyx.com/v2/calls/${encodeURIComponent(callId)}/actions/transfer`, {
|
|
3383
5689
|
method: "POST",
|
|
3384
5690
|
headers: { "Content-Type": "application/json", "Authorization": `Bearer ${telnyxKey}` },
|
|
3385
5691
|
body: JSON.stringify({ to: toNumber })
|
|
@@ -3403,7 +5709,7 @@ var TelnyxBridge = class {
|
|
|
3403
5709
|
}
|
|
3404
5710
|
const duration = Math.max(100, Math.min(500, TELNYX_DTMF_DURATION_MS));
|
|
3405
5711
|
for (let i = 0; i < filtered.length; i += 1) {
|
|
3406
|
-
await fetch(`https://api.telnyx.com/v2/calls/${callId}/actions/send_dtmf`, {
|
|
5712
|
+
await fetch(`https://api.telnyx.com/v2/calls/${encodeURIComponent(callId)}/actions/send_dtmf`, {
|
|
3407
5713
|
method: "POST",
|
|
3408
5714
|
headers: { "Content-Type": "application/json", "Authorization": `Bearer ${telnyxKey}` },
|
|
3409
5715
|
body: JSON.stringify({ digits: filtered[i], duration_millis: duration })
|
|
@@ -3418,7 +5724,7 @@ var TelnyxBridge = class {
|
|
|
3418
5724
|
const telnyxKey = this.config.telnyxKey ?? "";
|
|
3419
5725
|
if (!telnyxKey || !callId) return;
|
|
3420
5726
|
try {
|
|
3421
|
-
const resp = await fetch(`https://api.telnyx.com/v2/calls/${callId}/actions/record_start`, {
|
|
5727
|
+
const resp = await fetch(`https://api.telnyx.com/v2/calls/${encodeURIComponent(callId)}/actions/record_start`, {
|
|
3422
5728
|
method: "POST",
|
|
3423
5729
|
headers: { "Content-Type": "application/json", "Authorization": `Bearer ${telnyxKey}` },
|
|
3424
5730
|
body: JSON.stringify({ format: "mp3", channels: "single" })
|
|
@@ -3436,7 +5742,7 @@ var TelnyxBridge = class {
|
|
|
3436
5742
|
const telnyxKey = this.config.telnyxKey ?? "";
|
|
3437
5743
|
if (!telnyxKey || !callId) return;
|
|
3438
5744
|
try {
|
|
3439
|
-
const resp = await fetch(`https://api.telnyx.com/v2/calls/${callId}/actions/record_stop`, {
|
|
5745
|
+
const resp = await fetch(`https://api.telnyx.com/v2/calls/${encodeURIComponent(callId)}/actions/record_stop`, {
|
|
3440
5746
|
method: "POST",
|
|
3441
5747
|
headers: { "Content-Type": "application/json", "Authorization": `Bearer ${telnyxKey}` },
|
|
3442
5748
|
body: JSON.stringify({})
|
|
@@ -3450,11 +5756,11 @@ var TelnyxBridge = class {
|
|
|
3450
5756
|
getLogger().warn(`Telnyx record_stop error: ${String(e)}`);
|
|
3451
5757
|
}
|
|
3452
5758
|
}
|
|
3453
|
-
async endCall(callId,
|
|
5759
|
+
async endCall(callId, _ws) {
|
|
3454
5760
|
const telnyxKey = this.config.telnyxKey ?? "";
|
|
3455
5761
|
if (callId && telnyxKey) {
|
|
3456
5762
|
try {
|
|
3457
|
-
await fetch(`https://api.telnyx.com/v2/calls/${callId}/actions/hangup`, {
|
|
5763
|
+
await fetch(`https://api.telnyx.com/v2/calls/${encodeURIComponent(callId)}/actions/hangup`, {
|
|
3458
5764
|
method: "POST",
|
|
3459
5765
|
headers: { "Content-Type": "application/json", "Authorization": `Bearer ${telnyxKey}` },
|
|
3460
5766
|
body: JSON.stringify({})
|
|
@@ -3462,7 +5768,6 @@ var TelnyxBridge = class {
|
|
|
3462
5768
|
} catch {
|
|
3463
5769
|
}
|
|
3464
5770
|
}
|
|
3465
|
-
ws.close();
|
|
3466
5771
|
}
|
|
3467
5772
|
createStt(agent) {
|
|
3468
5773
|
return createSTT(agent);
|
|
@@ -3471,7 +5776,7 @@ var TelnyxBridge = class {
|
|
|
3471
5776
|
if (this.config.telnyxKey && callId) {
|
|
3472
5777
|
try {
|
|
3473
5778
|
const resp = await fetch(
|
|
3474
|
-
`https://api.telnyx.com/v2/calls/${callId}`,
|
|
5779
|
+
`https://api.telnyx.com/v2/calls/${encodeURIComponent(callId)}`,
|
|
3475
5780
|
{
|
|
3476
5781
|
headers: { "Authorization": `Bearer ${this.config.telnyxKey}` },
|
|
3477
5782
|
signal: AbortSignal.timeout(5e3)
|
|
@@ -3506,6 +5811,17 @@ var EmbeddedServer = class {
|
|
|
3506
5811
|
this.dashboardToken = dashboardToken;
|
|
3507
5812
|
this.metricsStore = new MetricsStore();
|
|
3508
5813
|
this.pricing = mergePricing(pricingOverrides);
|
|
5814
|
+
const logRoot = resolveLogRoot();
|
|
5815
|
+
if (logRoot) {
|
|
5816
|
+
try {
|
|
5817
|
+
const restored = this.metricsStore.hydrate(logRoot);
|
|
5818
|
+
if (restored > 0) {
|
|
5819
|
+
getLogger().info(`Dashboard hydrated ${restored} call(s) from ${logRoot}`);
|
|
5820
|
+
}
|
|
5821
|
+
} catch (err) {
|
|
5822
|
+
getLogger().warn(`Dashboard hydration failed: ${String(err)}`);
|
|
5823
|
+
}
|
|
5824
|
+
}
|
|
3509
5825
|
}
|
|
3510
5826
|
server = null;
|
|
3511
5827
|
wss = null;
|
|
@@ -3514,6 +5830,8 @@ var EmbeddedServer = class {
|
|
|
3514
5830
|
metricsStore;
|
|
3515
5831
|
pricing;
|
|
3516
5832
|
remoteHandler = new RemoteMessageHandler();
|
|
5833
|
+
/** Opt-in per-call filesystem logger (set via PATTER_LOG_DIR). */
|
|
5834
|
+
callLogger = new CallLogger(resolveLogRoot());
|
|
3517
5835
|
/** Active WebSocket connections tracked for graceful shutdown. */
|
|
3518
5836
|
activeConnections = /* @__PURE__ */ new Set();
|
|
3519
5837
|
activeCallIds = /* @__PURE__ */ new Map();
|
|
@@ -3522,6 +5840,18 @@ var EmbeddedServer = class {
|
|
|
3522
5840
|
if (!webhookUrlPattern.test(this.config.webhookUrl)) {
|
|
3523
5841
|
throw new Error(`Invalid webhookUrl: must be a hostname with no protocol prefix or path (got: '${this.config.webhookUrl}')`);
|
|
3524
5842
|
}
|
|
5843
|
+
if (this.config.requireSignature !== false) {
|
|
5844
|
+
if (this.config.telephonyProvider === "twilio" && !this.config.twilioToken) {
|
|
5845
|
+
getLogger().warn(
|
|
5846
|
+
"Twilio webhook enforcement ACTIVE but twilioToken is empty \u2014 webhooks will 503. Set requireSignature=false for local dev."
|
|
5847
|
+
);
|
|
5848
|
+
}
|
|
5849
|
+
if (this.config.telephonyProvider === "telnyx" && !this.config.telnyxPublicKey) {
|
|
5850
|
+
getLogger().warn(
|
|
5851
|
+
"Telnyx webhook enforcement ACTIVE but telnyxPublicKey is empty \u2014 webhooks will 503. Set requireSignature=false for local dev."
|
|
5852
|
+
);
|
|
5853
|
+
}
|
|
5854
|
+
}
|
|
3525
5855
|
const app = express();
|
|
3526
5856
|
app.use((req, _res, next) => {
|
|
3527
5857
|
if (req.path === "/webhooks/telnyx/voice") {
|
|
@@ -3549,14 +5879,8 @@ var EmbeddedServer = class {
|
|
|
3549
5879
|
res.json({ status: "ok", mode: "local" });
|
|
3550
5880
|
});
|
|
3551
5881
|
if (this.dashboard) {
|
|
3552
|
-
if (!this.dashboardToken) {
|
|
3553
|
-
getLogger().warn(
|
|
3554
|
-
"Dashboard is enabled without authentication. Set dashboardToken to protect call data. This is safe for local development but should not be exposed on a public network."
|
|
3555
|
-
);
|
|
3556
|
-
}
|
|
3557
5882
|
mountDashboard(app, this.metricsStore, this.dashboardToken);
|
|
3558
5883
|
mountApi(app, this.metricsStore, this.dashboardToken);
|
|
3559
|
-
getLogger().info("Dashboard: http://127.0.0.1:" + port + "/");
|
|
3560
5884
|
}
|
|
3561
5885
|
app.post("/webhooks/twilio/status", (req, res) => {
|
|
3562
5886
|
if (this.config.twilioToken) {
|
|
@@ -3567,6 +5891,10 @@ var EmbeddedServer = class {
|
|
|
3567
5891
|
res.status(403).send("Invalid signature");
|
|
3568
5892
|
return;
|
|
3569
5893
|
}
|
|
5894
|
+
} else if (this.config.requireSignature !== false) {
|
|
5895
|
+
getLogger().error("Twilio webhook rejected: twilioToken not configured and requireSignature is not false");
|
|
5896
|
+
res.status(503).send("Webhook signature required");
|
|
5897
|
+
return;
|
|
3570
5898
|
}
|
|
3571
5899
|
const body = req.body;
|
|
3572
5900
|
const callSid = sanitizeLogValue(body["CallSid"] ?? "");
|
|
@@ -3592,6 +5920,10 @@ var EmbeddedServer = class {
|
|
|
3592
5920
|
res.status(403).send("Invalid signature");
|
|
3593
5921
|
return;
|
|
3594
5922
|
}
|
|
5923
|
+
} else if (this.config.requireSignature !== false) {
|
|
5924
|
+
getLogger().error("Twilio webhook rejected: twilioToken not configured and requireSignature is not false");
|
|
5925
|
+
res.status(503).send("Webhook signature required");
|
|
5926
|
+
return;
|
|
3595
5927
|
}
|
|
3596
5928
|
const body = req.body;
|
|
3597
5929
|
const recordingSid = sanitizeLogValue(body["RecordingSid"] ?? "");
|
|
@@ -3609,6 +5941,10 @@ var EmbeddedServer = class {
|
|
|
3609
5941
|
res.status(403).send("Invalid signature");
|
|
3610
5942
|
return;
|
|
3611
5943
|
}
|
|
5944
|
+
} else if (this.config.requireSignature !== false) {
|
|
5945
|
+
getLogger().error("Twilio webhook rejected: twilioToken not configured and requireSignature is not false");
|
|
5946
|
+
res.status(503).send("Webhook signature required");
|
|
5947
|
+
return;
|
|
3612
5948
|
}
|
|
3613
5949
|
const body = req.body;
|
|
3614
5950
|
const answeredBy = body["AnsweredBy"] ?? "";
|
|
@@ -3651,6 +5987,10 @@ var EmbeddedServer = class {
|
|
|
3651
5987
|
res.status(403).send("Invalid signature");
|
|
3652
5988
|
return;
|
|
3653
5989
|
}
|
|
5990
|
+
} else if (this.config.requireSignature !== false) {
|
|
5991
|
+
getLogger().error("Twilio webhook rejected: twilioToken not configured and requireSignature is not false");
|
|
5992
|
+
res.status(503).send("Webhook signature required");
|
|
5993
|
+
return;
|
|
3654
5994
|
} else if (!this.twilioTokenWarningLogged) {
|
|
3655
5995
|
this.twilioTokenWarningLogged = true;
|
|
3656
5996
|
getLogger().warn("Twilio webhook signature validation disabled \u2014 set twilioToken for production");
|
|
@@ -3677,6 +6017,9 @@ var EmbeddedServer = class {
|
|
|
3677
6017
|
getLogger().warn("Telnyx webhook rejected: invalid or missing Ed25519 signature");
|
|
3678
6018
|
return res.status(403).send("Invalid signature");
|
|
3679
6019
|
}
|
|
6020
|
+
} else if (this.config.requireSignature !== false) {
|
|
6021
|
+
getLogger().error("Telnyx webhook rejected: telnyxPublicKey not configured and requireSignature is not false");
|
|
6022
|
+
return res.status(503).send("Webhook signature required");
|
|
3680
6023
|
} else if (!this.telnyxSigWarningLogged) {
|
|
3681
6024
|
this.telnyxSigWarningLogged = true;
|
|
3682
6025
|
getLogger().warn("Telnyx webhook signature verification is disabled. Set telnyxPublicKey in LocalOptions for production use.");
|
|
@@ -3704,6 +6047,17 @@ var EmbeddedServer = class {
|
|
|
3704
6047
|
}
|
|
3705
6048
|
return res.status(200).send();
|
|
3706
6049
|
}
|
|
6050
|
+
if (eventType === "call.machine.detection.ended") {
|
|
6051
|
+
const amdCallId = payload.call_control_id ?? "";
|
|
6052
|
+
const amdResult = String(payload.result ?? "");
|
|
6053
|
+
getLogger().info(
|
|
6054
|
+
`Telnyx AMD result for ${sanitizeLogValue(amdCallId)}: ${sanitizeLogValue(amdResult)}`
|
|
6055
|
+
);
|
|
6056
|
+
if (amdCallId && (amdResult === "machine" || amdResult === "machine_detected")) {
|
|
6057
|
+
await this.handleTelnyxAmdVoicemail(amdCallId);
|
|
6058
|
+
}
|
|
6059
|
+
return res.status(200).send();
|
|
6060
|
+
}
|
|
3707
6061
|
const callControlId = payload.call_control_id ?? "";
|
|
3708
6062
|
if (!callControlId) {
|
|
3709
6063
|
getLogger().warn("Telnyx webhook rejected: missing call_control_id");
|
|
@@ -3721,27 +6075,18 @@ var EmbeddedServer = class {
|
|
|
3721
6075
|
};
|
|
3722
6076
|
try {
|
|
3723
6077
|
if (eventType === "call.initiated") {
|
|
3724
|
-
getLogger().info(`Telnyx call.initiated ${callControlId} \u2014 answering`);
|
|
3725
|
-
const resp = await fetch(`${apiBase}/calls/${encodeURIComponent(callControlId)}/actions/answer`, {
|
|
3726
|
-
method: "POST",
|
|
3727
|
-
headers: authHeaders,
|
|
3728
|
-
body: JSON.stringify({}),
|
|
3729
|
-
signal: AbortSignal.timeout(1e4)
|
|
3730
|
-
});
|
|
3731
|
-
if (!resp.ok) {
|
|
3732
|
-
getLogger().warn(`Telnyx answer failed: ${resp.status} ${(await resp.text()).slice(0, 200)}`);
|
|
3733
|
-
}
|
|
3734
|
-
} else if (eventType === "call.answered") {
|
|
3735
6078
|
const caller = payload.from ?? "";
|
|
3736
6079
|
const callee = payload.to ?? "";
|
|
3737
6080
|
const streamUrl = `wss://${this.config.webhookUrl}/ws/stream/${encodeURIComponent(callControlId)}?caller=${encodeURIComponent(caller)}&callee=${encodeURIComponent(callee)}`;
|
|
3738
|
-
getLogger().info(`Telnyx call.
|
|
3739
|
-
const resp = await fetch(`${apiBase}/calls/${encodeURIComponent(callControlId)}/actions/
|
|
6081
|
+
getLogger().info(`Telnyx call.initiated ${callControlId} \u2014 answering with inline stream`);
|
|
6082
|
+
const resp = await fetch(`${apiBase}/calls/${encodeURIComponent(callControlId)}/actions/answer`, {
|
|
3740
6083
|
method: "POST",
|
|
3741
6084
|
headers: authHeaders,
|
|
3742
6085
|
body: JSON.stringify({
|
|
3743
6086
|
stream_url: streamUrl,
|
|
3744
|
-
|
|
6087
|
+
// ``inbound_track`` halves WS upstream bandwidth — outbound
|
|
6088
|
+
// echo was always filtered downstream anyway.
|
|
6089
|
+
stream_track: "inbound_track",
|
|
3745
6090
|
stream_bidirectional_mode: "rtp",
|
|
3746
6091
|
stream_bidirectional_codec: "PCMU",
|
|
3747
6092
|
stream_bidirectional_sampling_rate: 8e3,
|
|
@@ -3750,8 +6095,10 @@ var EmbeddedServer = class {
|
|
|
3750
6095
|
signal: AbortSignal.timeout(1e4)
|
|
3751
6096
|
});
|
|
3752
6097
|
if (!resp.ok) {
|
|
3753
|
-
getLogger().warn(`Telnyx
|
|
6098
|
+
getLogger().warn(`Telnyx answer failed: ${resp.status} ${(await resp.text()).slice(0, 200)}`);
|
|
3754
6099
|
}
|
|
6100
|
+
} else if (eventType === "call.answered") {
|
|
6101
|
+
getLogger().debug(`Telnyx call.answered ${callControlId} \u2014 stream already active (inline)`);
|
|
3755
6102
|
} else {
|
|
3756
6103
|
getLogger().debug(`Telnyx event ignored: ${eventType}`);
|
|
3757
6104
|
}
|
|
@@ -3773,7 +6120,6 @@ var EmbeddedServer = class {
|
|
|
3773
6120
|
socket.destroy();
|
|
3774
6121
|
return;
|
|
3775
6122
|
}
|
|
3776
|
-
getLogger().info(`Upgrade request: ${req.url}`);
|
|
3777
6123
|
this.wss.handleUpgrade(req, socket, head, (ws) => {
|
|
3778
6124
|
wsConnectionsByIp.set(remoteIp, (wsConnectionsByIp.get(remoteIp) ?? 0) + 1);
|
|
3779
6125
|
ws.once("close", () => {
|
|
@@ -3789,7 +6135,6 @@ var EmbeddedServer = class {
|
|
|
3789
6135
|
});
|
|
3790
6136
|
this.wss.on("connection", (ws, req) => {
|
|
3791
6137
|
const url = new URL(req.url ?? "", `http://localhost`);
|
|
3792
|
-
getLogger().info(`WebSocket connected: ${req.url}`);
|
|
3793
6138
|
this.activeConnections.add(ws);
|
|
3794
6139
|
ws.once("close", () => {
|
|
3795
6140
|
this.activeConnections.delete(ws);
|
|
@@ -3803,28 +6148,86 @@ var EmbeddedServer = class {
|
|
|
3803
6148
|
});
|
|
3804
6149
|
await new Promise((resolve) => {
|
|
3805
6150
|
this.server.listen(port, "127.0.0.1", () => {
|
|
3806
|
-
getLogger().info(`
|
|
3807
|
-
\u2588\u2588\u2588\u2588\u2588\u2588\u2557 \u2588\u2588\u2588\u2588\u2588\u2557 \u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2557\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2557\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2557\u2588\u2588\u2588\u2588\u2588\u2588\u2557
|
|
3808
|
-
\u2588\u2588\u2554\u2550\u2550\u2588\u2588\u2557\u2588\u2588\u2554\u2550\u2550\u2588\u2588\u2557\u255A\u2550\u2550\u2588\u2588\u2554\u2550\u2550\u255D\u255A\u2550\u2550\u2588\u2588\u2554\u2550\u2550\u255D\u2588\u2588\u2554\u2550\u2550\u2550\u2550\u255D\u2588\u2588\u2554\u2550\u2550\u2588\u2588\u2557
|
|
3809
|
-
\u2588\u2588\u2588\u2588\u2588\u2588\u2554\u255D\u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2551 \u2588\u2588\u2551 \u2588\u2588\u2551 \u2588\u2588\u2588\u2588\u2588\u2557 \u2588\u2588\u2588\u2588\u2588\u2588\u2554\u255D
|
|
3810
|
-
\u2588\u2588\u2554\u2550\u2550\u2550\u255D \u2588\u2588\u2554\u2550\u2550\u2588\u2588\u2551 \u2588\u2588\u2551 \u2588\u2588\u2551 \u2588\u2588\u2554\u2550\u2550\u255D \u2588\u2588\u2554\u2550\u2550\u2588\u2588\u2557
|
|
3811
|
-
\u2588\u2588\u2551 \u2588\u2588\u2551 \u2588\u2588\u2551 \u2588\u2588\u2551 \u2588\u2588\u2551 \u2588\u2588\u2588\u2588\u2588\u2588\u2588\u2557\u2588\u2588\u2551 \u2588\u2588\u2551
|
|
3812
|
-
\u255A\u2550\u255D \u255A\u2550\u255D \u255A\u2550\u255D \u255A\u2550\u255D \u255A\u2550\u255D \u255A\u2550\u2550\u2550\u2550\u2550\u2550\u255D\u255A\u2550\u255D \u255A\u2550\u255D
|
|
3813
|
-
|
|
3814
|
-
Connect AI agents to phone numbers in 4 lines of code
|
|
3815
|
-
`);
|
|
3816
6151
|
getLogger().info(`Server on port ${port}`);
|
|
3817
6152
|
getLogger().info(`Webhook: https://${this.config.webhookUrl}`);
|
|
3818
|
-
getLogger().info(`Phone:
|
|
6153
|
+
getLogger().info(`Phone: ${this.config.phoneNumber}`);
|
|
6154
|
+
const model = this.agent.model ?? "";
|
|
6155
|
+
if (model && model !== "gpt-4o-mini-realtime-preview" && model.includes("realtime")) {
|
|
6156
|
+
getLogger().warn(
|
|
6157
|
+
`Agent uses "${sanitizeLogValue(model)}" but DEFAULT_PRICING.openai_realtime is calibrated for "gpt-4o-mini-realtime-preview". Pass Patter({ pricing: { openai_realtime: {...} } }) to set rates for this model, otherwise the dashboard cost display will under-report.`
|
|
6158
|
+
);
|
|
6159
|
+
}
|
|
6160
|
+
if (this.dashboard) {
|
|
6161
|
+
console.log("\n\u2500\u2500\u2500\u2500 Dashboard \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500");
|
|
6162
|
+
getLogger().info(`URL: http://127.0.0.1:${port}/`);
|
|
6163
|
+
if (!this.dashboardToken) {
|
|
6164
|
+
getLogger().warn(
|
|
6165
|
+
"Dashboard is enabled without authentication. Set dashboardToken to protect call data. This is safe for local development but should not be exposed on a public network."
|
|
6166
|
+
);
|
|
6167
|
+
}
|
|
6168
|
+
console.log("\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n");
|
|
6169
|
+
}
|
|
3819
6170
|
resolve();
|
|
3820
6171
|
});
|
|
3821
6172
|
});
|
|
3822
6173
|
}
|
|
6174
|
+
/**
|
|
6175
|
+
* Handle a Telnyx ``call.machine.detection.ended`` event when AMD returns
|
|
6176
|
+
* ``machine``: speak the configured voicemail message via ``actions/speak``
|
|
6177
|
+
* then hang up via ``actions/hangup``. Mirrors the Python
|
|
6178
|
+
* ``handle_amd_result`` helper.
|
|
6179
|
+
*/
|
|
6180
|
+
async handleTelnyxAmdVoicemail(callControlId) {
|
|
6181
|
+
const telnyxKey = this.config.telnyxKey ?? "";
|
|
6182
|
+
if (!callControlId || !telnyxKey || !this.voicemailMessage) {
|
|
6183
|
+
return;
|
|
6184
|
+
}
|
|
6185
|
+
const encoded = encodeURIComponent(callControlId);
|
|
6186
|
+
const headers = {
|
|
6187
|
+
"Content-Type": "application/json",
|
|
6188
|
+
Authorization: `Bearer ${telnyxKey}`
|
|
6189
|
+
};
|
|
6190
|
+
const estimatedMs = Math.min(
|
|
6191
|
+
3e4,
|
|
6192
|
+
Math.ceil(this.voicemailMessage.length / 14 * 1e3) + 1500
|
|
6193
|
+
);
|
|
6194
|
+
try {
|
|
6195
|
+
const speakResp = await fetch(
|
|
6196
|
+
`https://api.telnyx.com/v2/calls/${encoded}/actions/speak`,
|
|
6197
|
+
{
|
|
6198
|
+
method: "POST",
|
|
6199
|
+
headers,
|
|
6200
|
+
body: JSON.stringify({
|
|
6201
|
+
payload: this.voicemailMessage,
|
|
6202
|
+
voice: "female",
|
|
6203
|
+
language: "en-US"
|
|
6204
|
+
}),
|
|
6205
|
+
signal: AbortSignal.timeout(1e4)
|
|
6206
|
+
}
|
|
6207
|
+
);
|
|
6208
|
+
if (!speakResp.ok) {
|
|
6209
|
+
getLogger().warn(
|
|
6210
|
+
`Telnyx voicemail speak failed: ${speakResp.status} ${(await speakResp.text()).slice(0, 200)}`
|
|
6211
|
+
);
|
|
6212
|
+
}
|
|
6213
|
+
await new Promise((resolve) => setTimeout(resolve, estimatedMs));
|
|
6214
|
+
await fetch(`https://api.telnyx.com/v2/calls/${encoded}/actions/hangup`, {
|
|
6215
|
+
method: "POST",
|
|
6216
|
+
headers,
|
|
6217
|
+
body: JSON.stringify({}),
|
|
6218
|
+
signal: AbortSignal.timeout(1e4)
|
|
6219
|
+
});
|
|
6220
|
+
getLogger().info(`Voicemail dropped for Telnyx call ${sanitizeLogValue(callControlId)}`);
|
|
6221
|
+
} catch (e) {
|
|
6222
|
+
getLogger().warn(`Could not drop voicemail (Telnyx): ${String(e)}`);
|
|
6223
|
+
}
|
|
6224
|
+
}
|
|
3823
6225
|
// ---------------------------------------------------------------------------
|
|
3824
6226
|
// Stream handler helpers
|
|
3825
6227
|
// ---------------------------------------------------------------------------
|
|
3826
6228
|
/** Build the shared StreamHandlerDeps for the current server configuration. */
|
|
3827
6229
|
buildStreamHandlerDeps(bridge) {
|
|
6230
|
+
const [wrappedStart, wrappedMetrics, wrappedEnd] = this.wrapLoggingCallbacks(bridge);
|
|
3828
6231
|
return {
|
|
3829
6232
|
config: this.config,
|
|
3830
6233
|
agent: this.agent,
|
|
@@ -3832,17 +6235,84 @@ Connect AI agents to phone numbers in 4 lines of code
|
|
|
3832
6235
|
metricsStore: this.metricsStore,
|
|
3833
6236
|
pricing: this.pricing,
|
|
3834
6237
|
remoteHandler: this.remoteHandler,
|
|
3835
|
-
onCallStart:
|
|
3836
|
-
onCallEnd:
|
|
6238
|
+
onCallStart: wrappedStart,
|
|
6239
|
+
onCallEnd: wrappedEnd,
|
|
3837
6240
|
onTranscript: this.onTranscript,
|
|
3838
6241
|
onMessage: this.onMessage,
|
|
3839
|
-
onMetrics:
|
|
6242
|
+
onMetrics: wrappedMetrics,
|
|
3840
6243
|
recording: this.recording,
|
|
3841
6244
|
buildAIAdapter: (resolvedPrompt) => buildAIAdapter(this.config, this.agent, resolvedPrompt),
|
|
3842
6245
|
sanitizeVariables,
|
|
3843
6246
|
resolveVariables
|
|
3844
6247
|
};
|
|
3845
6248
|
}
|
|
6249
|
+
/**
|
|
6250
|
+
* Wrap user-supplied call lifecycle callbacks with CallLogger side-effects.
|
|
6251
|
+
* When PATTER_LOG_DIR is unset, the logger is disabled and the returned
|
|
6252
|
+
* wrappers degrade to just calling the user callbacks (still wrapped so
|
|
6253
|
+
* the logger stays consistent with future configuration changes).
|
|
6254
|
+
*/
|
|
6255
|
+
wrapLoggingCallbacks(bridge) {
|
|
6256
|
+
const logger = this.callLogger;
|
|
6257
|
+
const agent = this.agent;
|
|
6258
|
+
const userStart = this.onCallStart;
|
|
6259
|
+
const userMetrics = this.onMetrics;
|
|
6260
|
+
const userEnd = this.onCallEnd;
|
|
6261
|
+
const agentSnapshot = () => {
|
|
6262
|
+
const snap = {
|
|
6263
|
+
provider: agent.provider,
|
|
6264
|
+
model: agent.model,
|
|
6265
|
+
voice: agent.voice,
|
|
6266
|
+
language: agent.language
|
|
6267
|
+
};
|
|
6268
|
+
if (agent.stt && agent.tts && !("engine" in agent && agent.engine)) {
|
|
6269
|
+
snap.mode = "pipeline";
|
|
6270
|
+
}
|
|
6271
|
+
return Object.fromEntries(Object.entries(snap).filter(([, v]) => v !== void 0));
|
|
6272
|
+
};
|
|
6273
|
+
const wrappedStart = async (data) => {
|
|
6274
|
+
if (logger.enabled) {
|
|
6275
|
+
const callId = typeof data.call_id === "string" ? data.call_id : "";
|
|
6276
|
+
void logger.logCallStart(callId, {
|
|
6277
|
+
caller: typeof data.caller === "string" ? data.caller : "",
|
|
6278
|
+
callee: typeof data.callee === "string" ? data.callee : "",
|
|
6279
|
+
telephonyProvider: bridge.telephonyProvider,
|
|
6280
|
+
providerMode: agent.provider ?? "",
|
|
6281
|
+
agent: agentSnapshot()
|
|
6282
|
+
}).catch((err) => getLogger().error(`call_log start error: ${String(err)}`));
|
|
6283
|
+
}
|
|
6284
|
+
if (userStart) await userStart(data);
|
|
6285
|
+
};
|
|
6286
|
+
const wrappedMetrics = async (data) => {
|
|
6287
|
+
if (logger.enabled) {
|
|
6288
|
+
const callId = typeof data.call_id === "string" ? data.call_id : "";
|
|
6289
|
+
const turn = data.turn;
|
|
6290
|
+
if (turn && typeof turn === "object") {
|
|
6291
|
+
void logger.logTurn(callId, turn).catch((err) => getLogger().error(`call_log turn error: ${String(err)}`));
|
|
6292
|
+
}
|
|
6293
|
+
}
|
|
6294
|
+
if (userMetrics) await userMetrics(data);
|
|
6295
|
+
};
|
|
6296
|
+
const wrappedEnd = async (data) => {
|
|
6297
|
+
if (logger.enabled) {
|
|
6298
|
+
const callId = typeof data.call_id === "string" ? data.call_id : "";
|
|
6299
|
+
const metricsObj = data.metrics ?? null;
|
|
6300
|
+
const latency = metricsObj ? {
|
|
6301
|
+
p50_ms: metricsObj.latency_p50?.total_ms ?? null,
|
|
6302
|
+
p95_ms: metricsObj.latency_p95?.total_ms ?? null,
|
|
6303
|
+
p99_ms: metricsObj.latency_p99?.total_ms ?? null
|
|
6304
|
+
} : null;
|
|
6305
|
+
void logger.logCallEnd(callId, {
|
|
6306
|
+
durationSeconds: metricsObj?.duration_seconds,
|
|
6307
|
+
turns: metricsObj?.turns?.length,
|
|
6308
|
+
cost: metricsObj?.cost ?? null,
|
|
6309
|
+
latency
|
|
6310
|
+
}).catch((err) => getLogger().error(`call_log end error: ${String(err)}`));
|
|
6311
|
+
}
|
|
6312
|
+
if (userEnd) await userEnd(data);
|
|
6313
|
+
};
|
|
6314
|
+
return [wrappedStart, wrappedMetrics, wrappedEnd];
|
|
6315
|
+
}
|
|
3846
6316
|
// ---------------------------------------------------------------------------
|
|
3847
6317
|
// Twilio WebSocket message parser (thin layer)
|
|
3848
6318
|
// ---------------------------------------------------------------------------
|
|
@@ -3861,7 +6331,6 @@ Connect AI agents to phone numbers in 4 lines of code
|
|
|
3861
6331
|
return;
|
|
3862
6332
|
}
|
|
3863
6333
|
const event = data.event;
|
|
3864
|
-
getLogger().info(`WS event: ${event}`);
|
|
3865
6334
|
if (event === "start") {
|
|
3866
6335
|
handler.setStreamSid(data.streamSid ?? "");
|
|
3867
6336
|
const callSid = data.start?.callSid ?? "";
|
|
@@ -3872,6 +6341,8 @@ Connect AI agents to phone numbers in 4 lines of code
|
|
|
3872
6341
|
const payload = data.media?.payload ?? "";
|
|
3873
6342
|
handler.handleAudio(Buffer.from(payload, "base64"));
|
|
3874
6343
|
} else if (event === "mark") {
|
|
6344
|
+
const markName = String(data.mark?.name ?? "");
|
|
6345
|
+
if (markName) await handler.onMark(markName);
|
|
3875
6346
|
} else if (event === "dtmf") {
|
|
3876
6347
|
const digit = data.dtmf?.digit ?? "";
|
|
3877
6348
|
await handler.handleDtmf(digit);
|
|
@@ -3907,7 +6378,6 @@ Connect AI agents to phone numbers in 4 lines of code
|
|
|
3907
6378
|
}
|
|
3908
6379
|
const event = data.event ?? "";
|
|
3909
6380
|
if (event === "connected") return;
|
|
3910
|
-
getLogger().info(`Telnyx event: ${event}`);
|
|
3911
6381
|
if (event === "start" && !streamStarted) {
|
|
3912
6382
|
streamStarted = true;
|
|
3913
6383
|
const callControlId = data.start?.call_control_id ?? "";
|
|
@@ -4008,19 +6478,145 @@ Connect AI agents to phone numbers in 4 lines of code
|
|
|
4008
6478
|
};
|
|
4009
6479
|
|
|
4010
6480
|
// src/llm-loop.ts
|
|
6481
|
+
var DEFAULT_TOOL_MAX_RETRIES = 2;
|
|
6482
|
+
var DEFAULT_TOOL_RETRY_DELAY_MS = 500;
|
|
6483
|
+
var DEFAULT_TOOL_TIMEOUT_MS = 1e4;
|
|
6484
|
+
var TOOL_MAX_RESPONSE_BYTES = 1 * 1024 * 1024;
|
|
6485
|
+
var DefaultToolExecutor = class {
|
|
6486
|
+
maxRetries;
|
|
6487
|
+
retryDelayMs;
|
|
6488
|
+
requestTimeoutMs;
|
|
6489
|
+
constructor(opts = {}) {
|
|
6490
|
+
this.maxRetries = opts.maxRetries ?? DEFAULT_TOOL_MAX_RETRIES;
|
|
6491
|
+
this.retryDelayMs = opts.retryDelayMs ?? DEFAULT_TOOL_RETRY_DELAY_MS;
|
|
6492
|
+
this.requestTimeoutMs = opts.requestTimeoutMs ?? DEFAULT_TOOL_TIMEOUT_MS;
|
|
6493
|
+
}
|
|
6494
|
+
async execute(toolDef, args, callContext) {
|
|
6495
|
+
if (toolDef.handler) {
|
|
6496
|
+
try {
|
|
6497
|
+
return await toolDef.handler(args, callContext);
|
|
6498
|
+
} catch (e) {
|
|
6499
|
+
return JSON.stringify({
|
|
6500
|
+
error: `Tool handler error: ${String(e)}`,
|
|
6501
|
+
fallback: true
|
|
6502
|
+
});
|
|
6503
|
+
}
|
|
6504
|
+
}
|
|
6505
|
+
if (toolDef.webhookUrl) {
|
|
6506
|
+
try {
|
|
6507
|
+
validateWebhookUrl(toolDef.webhookUrl);
|
|
6508
|
+
} catch (e) {
|
|
6509
|
+
return JSON.stringify({ error: `Tool webhook URL rejected: ${String(e)}` });
|
|
6510
|
+
}
|
|
6511
|
+
const callId = typeof callContext.call_id === "string" ? callContext.call_id : "";
|
|
6512
|
+
return await withSpan(
|
|
6513
|
+
SPAN_TOOL,
|
|
6514
|
+
{
|
|
6515
|
+
"patter.tool.name": toolDef.name,
|
|
6516
|
+
"patter.tool.transport": "webhook",
|
|
6517
|
+
"patter.call.id": callId
|
|
6518
|
+
},
|
|
6519
|
+
async (span) => {
|
|
6520
|
+
const totalAttempts = this.maxRetries + 1;
|
|
6521
|
+
for (let attempt = 0; attempt < totalAttempts; attempt++) {
|
|
6522
|
+
span.setAttribute("patter.tool.attempt", attempt + 1);
|
|
6523
|
+
try {
|
|
6524
|
+
const resp = await fetch(toolDef.webhookUrl, {
|
|
6525
|
+
method: "POST",
|
|
6526
|
+
headers: { "Content-Type": "application/json" },
|
|
6527
|
+
body: JSON.stringify({
|
|
6528
|
+
tool: toolDef.name,
|
|
6529
|
+
arguments: args,
|
|
6530
|
+
...callContext,
|
|
6531
|
+
attempt: attempt + 1
|
|
6532
|
+
}),
|
|
6533
|
+
signal: AbortSignal.timeout(this.requestTimeoutMs)
|
|
6534
|
+
});
|
|
6535
|
+
if (!resp.ok) throw new Error(`HTTP ${resp.status}`);
|
|
6536
|
+
const result = JSON.stringify(await resp.json());
|
|
6537
|
+
if (result.length > TOOL_MAX_RESPONSE_BYTES) {
|
|
6538
|
+
return JSON.stringify({
|
|
6539
|
+
error: `Webhook response too large: ${result.length} bytes (max ${TOOL_MAX_RESPONSE_BYTES})`,
|
|
6540
|
+
fallback: true
|
|
6541
|
+
});
|
|
6542
|
+
}
|
|
6543
|
+
return result;
|
|
6544
|
+
} catch (e) {
|
|
6545
|
+
if (attempt < totalAttempts - 1) {
|
|
6546
|
+
getLogger().warn(
|
|
6547
|
+
`Tool webhook '${toolDef.name}' failed (attempt ${attempt + 1}), retrying: ${String(e)}`
|
|
6548
|
+
);
|
|
6549
|
+
await new Promise((r) => setTimeout(r, this.retryDelayMs));
|
|
6550
|
+
} else {
|
|
6551
|
+
span.recordException(e);
|
|
6552
|
+
return JSON.stringify({
|
|
6553
|
+
error: `Tool failed after ${totalAttempts} attempts: ${String(e)}`,
|
|
6554
|
+
fallback: true
|
|
6555
|
+
});
|
|
6556
|
+
}
|
|
6557
|
+
}
|
|
6558
|
+
}
|
|
6559
|
+
return JSON.stringify({
|
|
6560
|
+
error: `Tool '${toolDef.name}' exited retry loop unexpectedly`,
|
|
6561
|
+
fallback: true
|
|
6562
|
+
});
|
|
6563
|
+
}
|
|
6564
|
+
);
|
|
6565
|
+
}
|
|
6566
|
+
return JSON.stringify({
|
|
6567
|
+
error: `No handler or webhookUrl for tool '${toolDef.name}'`,
|
|
6568
|
+
fallback: true
|
|
6569
|
+
});
|
|
6570
|
+
}
|
|
6571
|
+
};
|
|
4011
6572
|
var OpenAILLMProvider = class {
|
|
4012
6573
|
apiKey;
|
|
4013
6574
|
model;
|
|
4014
|
-
|
|
6575
|
+
temperature;
|
|
6576
|
+
maxTokens;
|
|
6577
|
+
responseFormat;
|
|
6578
|
+
parallelToolCalls;
|
|
6579
|
+
toolChoice;
|
|
6580
|
+
seed;
|
|
6581
|
+
topP;
|
|
6582
|
+
frequencyPenalty;
|
|
6583
|
+
presencePenalty;
|
|
6584
|
+
stop;
|
|
6585
|
+
constructor(apiKey, model, sampling = {}) {
|
|
4015
6586
|
this.apiKey = apiKey;
|
|
4016
6587
|
this.model = model;
|
|
6588
|
+
this.temperature = sampling.temperature;
|
|
6589
|
+
this.maxTokens = sampling.maxTokens;
|
|
6590
|
+
this.responseFormat = sampling.responseFormat;
|
|
6591
|
+
this.parallelToolCalls = sampling.parallelToolCalls;
|
|
6592
|
+
this.toolChoice = sampling.toolChoice;
|
|
6593
|
+
this.seed = sampling.seed;
|
|
6594
|
+
this.topP = sampling.topP;
|
|
6595
|
+
this.frequencyPenalty = sampling.frequencyPenalty;
|
|
6596
|
+
this.presencePenalty = sampling.presencePenalty;
|
|
6597
|
+
this.stop = sampling.stop;
|
|
4017
6598
|
}
|
|
4018
6599
|
async *stream(messages, tools) {
|
|
4019
6600
|
const body = {
|
|
4020
6601
|
model: this.model,
|
|
4021
6602
|
messages,
|
|
4022
|
-
stream: true
|
|
6603
|
+
stream: true,
|
|
6604
|
+
// Ask OpenAI to include a final usage chunk so we can attribute token
|
|
6605
|
+
// cost. Without this the dashboard shows LLM cost = 0 for OpenAI.
|
|
6606
|
+
stream_options: { include_usage: true }
|
|
4023
6607
|
};
|
|
6608
|
+
if (this.temperature !== void 0) body.temperature = this.temperature;
|
|
6609
|
+
if (this.maxTokens !== void 0) {
|
|
6610
|
+
body.max_completion_tokens = this.maxTokens;
|
|
6611
|
+
}
|
|
6612
|
+
if (this.responseFormat !== void 0) body.response_format = this.responseFormat;
|
|
6613
|
+
if (this.parallelToolCalls !== void 0) body.parallel_tool_calls = this.parallelToolCalls;
|
|
6614
|
+
if (this.toolChoice !== void 0) body.tool_choice = this.toolChoice;
|
|
6615
|
+
if (this.seed !== void 0) body.seed = this.seed;
|
|
6616
|
+
if (this.topP !== void 0) body.top_p = this.topP;
|
|
6617
|
+
if (this.frequencyPenalty !== void 0) body.frequency_penalty = this.frequencyPenalty;
|
|
6618
|
+
if (this.presencePenalty !== void 0) body.presence_penalty = this.presencePenalty;
|
|
6619
|
+
if (this.stop !== void 0) body.stop = this.stop;
|
|
4024
6620
|
if (tools) {
|
|
4025
6621
|
body.tools = tools;
|
|
4026
6622
|
}
|
|
@@ -4059,6 +6655,16 @@ var OpenAILLMProvider = class {
|
|
|
4059
6655
|
} catch {
|
|
4060
6656
|
continue;
|
|
4061
6657
|
}
|
|
6658
|
+
if (chunk.usage) {
|
|
6659
|
+
const cached = chunk.usage.prompt_tokens_details?.cached_tokens ?? 0;
|
|
6660
|
+
const uncachedInput = Math.max(0, (chunk.usage.prompt_tokens ?? 0) - cached);
|
|
6661
|
+
yield {
|
|
6662
|
+
type: "usage",
|
|
6663
|
+
inputTokens: uncachedInput,
|
|
6664
|
+
outputTokens: chunk.usage.completion_tokens,
|
|
6665
|
+
cacheReadInputTokens: cached
|
|
6666
|
+
};
|
|
6667
|
+
}
|
|
4062
6668
|
const delta = chunk.choices?.[0]?.delta;
|
|
4063
6669
|
if (!delta) continue;
|
|
4064
6670
|
if (delta.content) {
|
|
@@ -4085,10 +6691,28 @@ var LLMLoop = class {
|
|
|
4085
6691
|
tools;
|
|
4086
6692
|
openaiTools;
|
|
4087
6693
|
toolMap;
|
|
6694
|
+
toolExecutor;
|
|
6695
|
+
eventBus;
|
|
6696
|
+
// Fix 10: track provider/model so usage chunks can be attributed for billing.
|
|
6697
|
+
_providerName;
|
|
6698
|
+
_modelName;
|
|
4088
6699
|
constructor(apiKey, model, systemPrompt, tools, llmProvider) {
|
|
4089
6700
|
this.provider = llmProvider ?? new OpenAILLMProvider(apiKey, model);
|
|
4090
6701
|
this.systemPrompt = systemPrompt;
|
|
6702
|
+
if (llmProvider) {
|
|
6703
|
+
const key = llmProvider.constructor?.providerKey;
|
|
6704
|
+
if (key) {
|
|
6705
|
+
this._providerName = key;
|
|
6706
|
+
} else {
|
|
6707
|
+
const stripped = (llmProvider.constructor?.name ?? "custom").replace(/LLMProvider$/i, "").replace(/LLM$/i, "").replace(/Provider$/i, "").toLowerCase();
|
|
6708
|
+
this._providerName = stripped || "custom";
|
|
6709
|
+
}
|
|
6710
|
+
} else {
|
|
6711
|
+
this._providerName = "openai";
|
|
6712
|
+
}
|
|
6713
|
+
this._modelName = model;
|
|
4091
6714
|
this.tools = tools ?? null;
|
|
6715
|
+
this.toolExecutor = new DefaultToolExecutor();
|
|
4092
6716
|
this.toolMap = /* @__PURE__ */ new Map();
|
|
4093
6717
|
this.openaiTools = null;
|
|
4094
6718
|
if (this.tools && this.tools.length > 0) {
|
|
@@ -4106,13 +6730,40 @@ var LLMLoop = class {
|
|
|
4106
6730
|
}
|
|
4107
6731
|
}
|
|
4108
6732
|
}
|
|
6733
|
+
/**
|
|
6734
|
+
* Swap in a custom tool executor (e.g. different retry policy, metrics
|
|
6735
|
+
* wrapping, tenant-aware fan-out). The default is ``DefaultToolExecutor``.
|
|
6736
|
+
*/
|
|
6737
|
+
setToolExecutor(executor) {
|
|
6738
|
+
this.toolExecutor = executor;
|
|
6739
|
+
}
|
|
6740
|
+
/**
|
|
6741
|
+
* Wire an :class:`EventBus` so the loop emits ``llm_chunk`` per text
|
|
6742
|
+
* token and ``tool_call_started`` the first time each tool-call index
|
|
6743
|
+
* appears. Set to ``undefined`` to disable.
|
|
6744
|
+
*/
|
|
6745
|
+
setEventBus(bus) {
|
|
6746
|
+
this.eventBus = bus;
|
|
6747
|
+
}
|
|
4109
6748
|
/**
|
|
4110
6749
|
* Stream LLM response tokens, handling tool calls automatically.
|
|
4111
6750
|
* Yields text tokens as they arrive from the LLM.
|
|
6751
|
+
*
|
|
6752
|
+
* @param metrics Optional usage recorder — when provided, usage chunks
|
|
6753
|
+
* from the provider are forwarded to {@link LlmUsageRecorder.recordLlmUsage}
|
|
6754
|
+
* so token costs are included in the call cost breakdown (fix 10).
|
|
4112
6755
|
*/
|
|
4113
|
-
async *run(userText, history, callContext) {
|
|
4114
|
-
|
|
6756
|
+
async *run(userText, history, callContext, metrics, hookExecutor, hookCtx) {
|
|
6757
|
+
let messages = this.buildMessages(history, userText);
|
|
4115
6758
|
const maxIterations = 10;
|
|
6759
|
+
if (hookExecutor && hookCtx) {
|
|
6760
|
+
messages = await hookExecutor.runBeforeLlm(
|
|
6761
|
+
messages,
|
|
6762
|
+
hookCtx
|
|
6763
|
+
);
|
|
6764
|
+
}
|
|
6765
|
+
const hasAfterLlm = Boolean(hookExecutor?.hasAfterLlm() && hookCtx);
|
|
6766
|
+
const allEmittedText = [];
|
|
4116
6767
|
for (let iter = 0; iter < maxIterations; iter++) {
|
|
4117
6768
|
const toolCallsAccumulated = /* @__PURE__ */ new Map();
|
|
4118
6769
|
const textParts = [];
|
|
@@ -4120,12 +6771,31 @@ var LLMLoop = class {
|
|
|
4120
6771
|
for await (const chunk of this.provider.stream(messages, this.openaiTools)) {
|
|
4121
6772
|
if (chunk.type === "text" && chunk.content) {
|
|
4122
6773
|
textParts.push(chunk.content);
|
|
4123
|
-
|
|
6774
|
+
this.eventBus?.emit("llm_chunk", { text: chunk.content, iteration: iter });
|
|
6775
|
+
if (hasAfterLlm) {
|
|
6776
|
+
allEmittedText.push(chunk.content);
|
|
6777
|
+
} else {
|
|
6778
|
+
yield chunk.content;
|
|
6779
|
+
}
|
|
6780
|
+
} else if (chunk.type === "usage") {
|
|
6781
|
+
metrics?.recordLlmUsage(
|
|
6782
|
+
this._providerName,
|
|
6783
|
+
this._modelName,
|
|
6784
|
+
chunk.inputTokens ?? 0,
|
|
6785
|
+
chunk.outputTokens ?? 0,
|
|
6786
|
+
chunk.cacheReadInputTokens ?? 0,
|
|
6787
|
+
chunk.cacheCreationInputTokens ?? 0
|
|
6788
|
+
);
|
|
4124
6789
|
} else if (chunk.type === "tool_call") {
|
|
4125
6790
|
hasToolCalls = true;
|
|
4126
6791
|
const idx = chunk.index ?? 0;
|
|
4127
6792
|
if (!toolCallsAccumulated.has(idx)) {
|
|
4128
6793
|
toolCallsAccumulated.set(idx, { id: "", name: "", arguments: "" });
|
|
6794
|
+
this.eventBus?.emit("tool_call_started", {
|
|
6795
|
+
index: idx,
|
|
6796
|
+
name: chunk.name ?? "",
|
|
6797
|
+
args: chunk.arguments ?? ""
|
|
6798
|
+
});
|
|
4129
6799
|
}
|
|
4130
6800
|
const acc = toolCallsAccumulated.get(idx);
|
|
4131
6801
|
if (chunk.id) acc.id = chunk.id;
|
|
@@ -4133,7 +6803,14 @@ var LLMLoop = class {
|
|
|
4133
6803
|
if (chunk.arguments) acc.arguments += chunk.arguments;
|
|
4134
6804
|
}
|
|
4135
6805
|
}
|
|
4136
|
-
if (!hasToolCalls)
|
|
6806
|
+
if (!hasToolCalls) {
|
|
6807
|
+
if (hasAfterLlm && hookExecutor && hookCtx) {
|
|
6808
|
+
const finalText = allEmittedText.join("");
|
|
6809
|
+
const rewritten = await hookExecutor.runAfterLlm(finalText, hookCtx);
|
|
6810
|
+
if (rewritten) yield rewritten;
|
|
6811
|
+
}
|
|
6812
|
+
return;
|
|
6813
|
+
}
|
|
4137
6814
|
const assistantMsg = {
|
|
4138
6815
|
role: "assistant",
|
|
4139
6816
|
content: textParts.join("") || null,
|
|
@@ -4172,49 +6849,7 @@ var LLMLoop = class {
|
|
|
4172
6849
|
if (!toolDef) {
|
|
4173
6850
|
return JSON.stringify({ error: `Unknown tool: ${toolName}` });
|
|
4174
6851
|
}
|
|
4175
|
-
|
|
4176
|
-
try {
|
|
4177
|
-
return await toolDef.handler(args, callContext);
|
|
4178
|
-
} catch (e) {
|
|
4179
|
-
return JSON.stringify({ error: `Tool handler error: ${String(e)}` });
|
|
4180
|
-
}
|
|
4181
|
-
}
|
|
4182
|
-
if (toolDef.webhookUrl) {
|
|
4183
|
-
try {
|
|
4184
|
-
validateWebhookUrl(toolDef.webhookUrl);
|
|
4185
|
-
} catch (e) {
|
|
4186
|
-
return JSON.stringify({ error: `Tool webhook URL rejected: ${String(e)}` });
|
|
4187
|
-
}
|
|
4188
|
-
for (let attempt = 0; attempt < 3; attempt++) {
|
|
4189
|
-
try {
|
|
4190
|
-
const resp = await fetch(toolDef.webhookUrl, {
|
|
4191
|
-
method: "POST",
|
|
4192
|
-
headers: { "Content-Type": "application/json" },
|
|
4193
|
-
body: JSON.stringify({
|
|
4194
|
-
tool: toolName,
|
|
4195
|
-
arguments: args,
|
|
4196
|
-
...callContext,
|
|
4197
|
-
attempt: attempt + 1
|
|
4198
|
-
}),
|
|
4199
|
-
signal: AbortSignal.timeout(1e4)
|
|
4200
|
-
});
|
|
4201
|
-
if (!resp.ok) throw new Error(`HTTP ${resp.status}`);
|
|
4202
|
-
const result = JSON.stringify(await resp.json());
|
|
4203
|
-
const MAX_RESPONSE_BYTES2 = 1 * 1024 * 1024;
|
|
4204
|
-
if (result.length > MAX_RESPONSE_BYTES2) {
|
|
4205
|
-
return JSON.stringify({ error: `Webhook response too large: ${result.length} bytes (max ${MAX_RESPONSE_BYTES2})`, fallback: true });
|
|
4206
|
-
}
|
|
4207
|
-
return result;
|
|
4208
|
-
} catch (e) {
|
|
4209
|
-
if (attempt < 2) {
|
|
4210
|
-
await new Promise((r) => setTimeout(r, 500));
|
|
4211
|
-
} else {
|
|
4212
|
-
return JSON.stringify({ error: `Tool failed after 3 attempts: ${String(e)}` });
|
|
4213
|
-
}
|
|
4214
|
-
}
|
|
4215
|
-
}
|
|
4216
|
-
}
|
|
4217
|
-
return JSON.stringify({ error: `No handler or webhookUrl for tool '${toolName}'` });
|
|
6852
|
+
return this.toolExecutor.execute(toolDef, args, callContext);
|
|
4218
6853
|
}
|
|
4219
6854
|
buildMessages(history, userText) {
|
|
4220
6855
|
const messages = [
|
|
@@ -4401,6 +7036,11 @@ var TestSession = class {
|
|
|
4401
7036
|
};
|
|
4402
7037
|
|
|
4403
7038
|
export {
|
|
7039
|
+
PatterError,
|
|
7040
|
+
PatterConnectionError,
|
|
7041
|
+
AuthenticationError,
|
|
7042
|
+
ProvisionError,
|
|
7043
|
+
RateLimitError,
|
|
4404
7044
|
OpenAIRealtimeAdapter,
|
|
4405
7045
|
ElevenLabsConvAIAdapter,
|
|
4406
7046
|
DEFAULT_PRICING,
|
|
@@ -4422,14 +7062,31 @@ export {
|
|
|
4422
7062
|
CallMetricsAccumulator,
|
|
4423
7063
|
mulawToPcm16,
|
|
4424
7064
|
pcm16ToMulaw,
|
|
7065
|
+
PcmCarry,
|
|
7066
|
+
StatefulResampler,
|
|
7067
|
+
createResampler16kTo8k,
|
|
7068
|
+
createResampler8kTo16k,
|
|
7069
|
+
createResampler24kTo16k,
|
|
4425
7070
|
resample8kTo16k,
|
|
4426
7071
|
resample16kTo8k,
|
|
4427
7072
|
resample24kTo16k,
|
|
7073
|
+
SPAN_CALL,
|
|
7074
|
+
SPAN_STT,
|
|
7075
|
+
SPAN_LLM,
|
|
7076
|
+
SPAN_TTS,
|
|
7077
|
+
SPAN_TOOL,
|
|
7078
|
+
SPAN_ENDPOINT,
|
|
7079
|
+
SPAN_BARGEIN,
|
|
7080
|
+
initTracing,
|
|
7081
|
+
isTracingEnabled,
|
|
7082
|
+
startSpan,
|
|
7083
|
+
DefaultToolExecutor,
|
|
4428
7084
|
OpenAILLMProvider,
|
|
4429
7085
|
LLMLoop,
|
|
4430
7086
|
DEFAULT_MIN_SENTENCE_LEN,
|
|
4431
7087
|
SentenceChunker,
|
|
4432
7088
|
PipelineHookExecutor,
|
|
7089
|
+
EventBus,
|
|
4433
7090
|
EmbeddedServer,
|
|
4434
7091
|
TestSession
|
|
4435
7092
|
};
|