getpatter 0.6.1 → 0.6.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/dist/{carrier-config-4ZKVYAWV.mjs → carrier-config-3WDQXP5J.mjs} +43 -1
- package/dist/chunk-CL2U3YET.mjs +1429 -0
- package/dist/{chunk-RV7APPYE.mjs → chunk-R2T4JABZ.mjs} +13 -0
- package/dist/{chunk-TEW3NAZJ.mjs → chunk-Z6W5XFWS.mjs} +1071 -1520
- package/dist/cli.js +48 -23
- package/dist/dashboard/ui.html +8 -8
- package/dist/index.d.mts +3912 -3428
- package/dist/index.d.ts +3912 -3428
- package/dist/index.js +2507 -1159
- package/dist/index.mjs +1175 -875
- package/dist/openai-realtime-2-CNFARP25.mjs +8 -0
- package/dist/{silero-vad-NSEXI4XS.mjs → silero-vad-LNDFGIY7.mjs} +1 -1
- package/dist/{test-mode-WEKKNBLD.mjs → test-mode-MDBQ4ECE.mjs} +2 -1
- package/package.json +1 -1
- package/src/dashboard/ui.html +8 -8
|
@@ -1,3 +1,11 @@
|
|
|
1
|
+
import {
|
|
2
|
+
OpenAIRealtime2Adapter,
|
|
3
|
+
OpenAIRealtimeAdapter,
|
|
4
|
+
createResampler16kTo8k,
|
|
5
|
+
createResampler8kTo16k,
|
|
6
|
+
mulawToPcm16,
|
|
7
|
+
pcm16ToMulaw
|
|
8
|
+
} from "./chunk-CL2U3YET.mjs";
|
|
1
9
|
import {
|
|
2
10
|
getLogger
|
|
3
11
|
} from "./chunk-MVOQFAEO.mjs";
|
|
@@ -16,1264 +24,14 @@ init_esm_shims();
|
|
|
16
24
|
|
|
17
25
|
// src/server.ts
|
|
18
26
|
init_esm_shims();
|
|
19
|
-
import
|
|
27
|
+
import crypto5 from "crypto";
|
|
20
28
|
import express from "express";
|
|
21
29
|
import { createServer } from "http";
|
|
22
30
|
import { WebSocketServer } from "ws";
|
|
23
31
|
|
|
24
|
-
// src/providers/openai-realtime.ts
|
|
25
|
-
init_esm_shims();
|
|
26
|
-
import WebSocket from "ws";
|
|
27
|
-
var OpenAIRealtimeAudioFormat = {
|
|
28
|
-
G711_ULAW: "g711_ulaw",
|
|
29
|
-
G711_ALAW: "g711_alaw",
|
|
30
|
-
PCM16: "pcm16"
|
|
31
|
-
};
|
|
32
|
-
var OpenAIRealtimeModel = {
|
|
33
|
-
GPT_REALTIME: "gpt-realtime",
|
|
34
|
-
GPT_REALTIME_2: "gpt-realtime-2",
|
|
35
|
-
GPT_REALTIME_MINI: "gpt-realtime-mini",
|
|
36
|
-
GPT_4O_REALTIME_PREVIEW: "gpt-4o-realtime-preview",
|
|
37
|
-
GPT_4O_MINI_REALTIME_PREVIEW: "gpt-4o-mini-realtime-preview"
|
|
38
|
-
};
|
|
39
|
-
var OpenAIVoice = {
|
|
40
|
-
ALLOY: "alloy",
|
|
41
|
-
ASH: "ash",
|
|
42
|
-
BALLAD: "ballad",
|
|
43
|
-
CORAL: "coral",
|
|
44
|
-
ECHO: "echo",
|
|
45
|
-
FABLE: "fable",
|
|
46
|
-
NOVA: "nova",
|
|
47
|
-
ONYX: "onyx",
|
|
48
|
-
SAGE: "sage",
|
|
49
|
-
SHIMMER: "shimmer",
|
|
50
|
-
VERSE: "verse"
|
|
51
|
-
};
|
|
52
|
-
var OpenAITranscriptionModel = {
|
|
53
|
-
WHISPER_1: "whisper-1",
|
|
54
|
-
GPT_4O_TRANSCRIBE: "gpt-4o-transcribe",
|
|
55
|
-
GPT_4O_MINI_TRANSCRIBE: "gpt-4o-mini-transcribe",
|
|
56
|
-
GPT_REALTIME_WHISPER: "gpt-realtime-whisper"
|
|
57
|
-
};
|
|
58
|
-
var OpenAIRealtimeVADType = {
|
|
59
|
-
SERVER_VAD: "server_vad",
|
|
60
|
-
SEMANTIC_VAD: "semantic_vad"
|
|
61
|
-
};
|
|
62
|
-
var OpenAIRealtimeAdapter = class {
|
|
63
|
-
constructor(apiKey, model = OpenAIRealtimeModel.GPT_REALTIME_MINI, voice = OpenAIVoice.ALLOY, instructions = "", tools, audioFormat = OpenAIRealtimeAudioFormat.G711_ULAW, options = {}) {
|
|
64
|
-
this.apiKey = apiKey;
|
|
65
|
-
this.model = model;
|
|
66
|
-
this.voice = voice;
|
|
67
|
-
this.instructions = instructions;
|
|
68
|
-
this.tools = tools;
|
|
69
|
-
this.audioFormat = audioFormat;
|
|
70
|
-
this.options = options;
|
|
71
|
-
}
|
|
72
|
-
apiKey;
|
|
73
|
-
model;
|
|
74
|
-
voice;
|
|
75
|
-
instructions;
|
|
76
|
-
tools;
|
|
77
|
-
audioFormat;
|
|
78
|
-
// Fields exposed `protected` (not `private`) so a subclass can implement
|
|
79
|
-
// alternate transports — e.g. `OpenAIRealtime2Adapter` overrides
|
|
80
|
-
// `connect()` to speak the GA Realtime API while reusing the rest of
|
|
81
|
-
// the runtime (audio dispatch, barge-in, heartbeat).
|
|
82
|
-
ws = null;
|
|
83
|
-
eventCallbacks = /* @__PURE__ */ new Set();
|
|
84
|
-
messageListenerAttached = false;
|
|
85
|
-
heartbeat = null;
|
|
86
|
-
// Track the in-flight assistant item id so we can truncate cleanly on
|
|
87
|
-
// barge-in (see ``cancelResponse``) — matches the Python adapter.
|
|
88
|
-
currentResponseItemId = null;
|
|
89
|
-
currentResponseAudioMs = 0;
|
|
90
|
-
// Wall-clock timestamp (Date.now()) of the first ``response.audio.delta``
|
|
91
|
-
// received since the current response item started. ``cancelResponse``
|
|
92
|
-
// uses this to bound ``audio_end_ms`` to what the caller could plausibly
|
|
93
|
-
// have heard — generated audio frequently arrives 5-10x real-time, so
|
|
94
|
-
// ``audio_end_ms`` driven purely by the per-chunk byte counter overshoots
|
|
95
|
-
// reality and leaves phantom assistant text on the conversation. The
|
|
96
|
-
// wall-clock cap corresponds to the maximum playback that real-time TTS
|
|
97
|
-
// could have produced, which is what the user actually heard.
|
|
98
|
-
currentResponseFirstAudioAt = null;
|
|
99
|
-
options;
|
|
100
|
-
/**
|
|
101
|
-
* Build the production session.update body. Mirrors the body sent
|
|
102
|
-
* inside `connect()` so warmup can apply identical configuration to
|
|
103
|
-
* the upstream session and prime it without billing.
|
|
104
|
-
*/
|
|
105
|
-
buildSessionConfig() {
|
|
106
|
-
const config = {
|
|
107
|
-
input_audio_format: this.audioFormat,
|
|
108
|
-
output_audio_format: this.audioFormat,
|
|
109
|
-
voice: this.voice,
|
|
110
|
-
instructions: this.instructions || "You are a helpful voice assistant. Be concise.",
|
|
111
|
-
turn_detection: {
|
|
112
|
-
type: this.options.vadType ?? OpenAIRealtimeVADType.SERVER_VAD,
|
|
113
|
-
threshold: 0.5,
|
|
114
|
-
prefix_padding_ms: 300,
|
|
115
|
-
silence_duration_ms: this.options.silenceDurationMs ?? 300
|
|
116
|
-
},
|
|
117
|
-
input_audio_transcription: {
|
|
118
|
-
model: this.options.inputAudioTranscriptionModel ?? OpenAITranscriptionModel.WHISPER_1
|
|
119
|
-
}
|
|
120
|
-
};
|
|
121
|
-
if (this.options.temperature !== void 0) config.temperature = this.options.temperature;
|
|
122
|
-
if (this.options.maxResponseOutputTokens !== void 0) {
|
|
123
|
-
config.max_response_output_tokens = this.options.maxResponseOutputTokens;
|
|
124
|
-
}
|
|
125
|
-
if (this.options.modalities !== void 0) config.modalities = this.options.modalities;
|
|
126
|
-
if (this.options.toolChoice !== void 0) config.tool_choice = this.options.toolChoice;
|
|
127
|
-
if (this.options.reasoningEffort !== void 0) {
|
|
128
|
-
config.reasoning = { effort: this.options.reasoningEffort };
|
|
129
|
-
}
|
|
130
|
-
if (this.tools?.length) {
|
|
131
|
-
config.tools = this.tools.map((t) => {
|
|
132
|
-
const def = {
|
|
133
|
-
type: "function",
|
|
134
|
-
name: t.name,
|
|
135
|
-
description: t.description,
|
|
136
|
-
parameters: t.parameters
|
|
137
|
-
};
|
|
138
|
-
if (t.strict === true) {
|
|
139
|
-
def.strict = true;
|
|
140
|
-
}
|
|
141
|
-
return def;
|
|
142
|
-
});
|
|
143
|
-
}
|
|
144
|
-
return config;
|
|
145
|
-
}
|
|
146
|
-
/**
|
|
147
|
-
* Pre-call WebSocket warmup for the OpenAI Realtime endpoint.
|
|
148
|
-
*
|
|
149
|
-
* The canonical session-only warm step on the Realtime API: open the
|
|
150
|
-
* WS, wait for `session.created`, send a single `session.update`
|
|
151
|
-
* containing the same fields that the production `connect()` path
|
|
152
|
-
* applies (`input_audio_format`, `output_audio_format`, `voice`,
|
|
153
|
-
* `instructions`, `turn_detection`, `input_audio_transcription`,
|
|
154
|
-
* plus any opt-in fields populated on the adapter), wait for the
|
|
155
|
-
* matching `session.updated` ack, then close cleanly. This primes
|
|
156
|
-
* the per-session state on the OpenAI side — DNS + TLS + auth
|
|
157
|
-
* handshake + initial config exchange — without ever invoking the
|
|
158
|
-
* model.
|
|
159
|
-
*
|
|
160
|
-
* Earlier revisions sent `response.create` with
|
|
161
|
-
* `{"response": {"generate": false}}` to prime the inference path.
|
|
162
|
-
* That field is NOT in the OpenAI Realtime API schema; the server
|
|
163
|
-
* either ignores it (and bills tokens for a real model response) or
|
|
164
|
-
* rejects the request with `invalid_request_error`. Both behaviours
|
|
165
|
-
* are billing-unsafe or a no-op beyond TLS warm. The
|
|
166
|
-
* `session.update` flow is documented and side-effect-free.
|
|
167
|
-
*
|
|
168
|
-
* Billing safety: `session.update` only mutates session
|
|
169
|
-
* configuration. It does NOT invoke the model, does NOT consume any
|
|
170
|
-
* audio buffer, and does NOT trigger token generation, so no
|
|
171
|
-
* per-token cost is accrued. Best-effort: failures are logged at
|
|
172
|
-
* debug level and never raised.
|
|
173
|
-
*/
|
|
174
|
-
async warmup() {
|
|
175
|
-
const url = `wss://api.openai.com/v1/realtime?model=${encodeURIComponent(this.model)}`;
|
|
176
|
-
let ws = null;
|
|
177
|
-
try {
|
|
178
|
-
ws = await new Promise((resolve, reject) => {
|
|
179
|
-
const sock = new WebSocket(url, {
|
|
180
|
-
headers: {
|
|
181
|
-
Authorization: `Bearer ${this.apiKey}`,
|
|
182
|
-
"OpenAI-Beta": "realtime=v1"
|
|
183
|
-
}
|
|
184
|
-
});
|
|
185
|
-
const timer = setTimeout(() => {
|
|
186
|
-
try {
|
|
187
|
-
sock.close();
|
|
188
|
-
} catch {
|
|
189
|
-
}
|
|
190
|
-
reject(new Error("OpenAI Realtime warmup connect timeout"));
|
|
191
|
-
}, 5e3);
|
|
192
|
-
sock.once("open", () => {
|
|
193
|
-
clearTimeout(timer);
|
|
194
|
-
resolve(sock);
|
|
195
|
-
});
|
|
196
|
-
sock.once("error", (err) => {
|
|
197
|
-
clearTimeout(timer);
|
|
198
|
-
reject(err);
|
|
199
|
-
});
|
|
200
|
-
});
|
|
201
|
-
const sessionCreated = await new Promise((resolve) => {
|
|
202
|
-
const timer = setTimeout(() => resolve(false), 2e3);
|
|
203
|
-
const onMsg = (raw) => {
|
|
204
|
-
try {
|
|
205
|
-
const data = JSON.parse(raw.toString());
|
|
206
|
-
if (data.type === "session.created") {
|
|
207
|
-
clearTimeout(timer);
|
|
208
|
-
ws.off("message", onMsg);
|
|
209
|
-
resolve(true);
|
|
210
|
-
}
|
|
211
|
-
} catch {
|
|
212
|
-
}
|
|
213
|
-
};
|
|
214
|
-
ws.on("message", onMsg);
|
|
215
|
-
});
|
|
216
|
-
if (!sessionCreated) return;
|
|
217
|
-
try {
|
|
218
|
-
ws.send(JSON.stringify({ type: "session.update", session: this.buildSessionConfig() }));
|
|
219
|
-
} catch {
|
|
220
|
-
return;
|
|
221
|
-
}
|
|
222
|
-
await new Promise((resolve) => {
|
|
223
|
-
const timer = setTimeout(() => resolve(), 1500);
|
|
224
|
-
const onMsg = (raw) => {
|
|
225
|
-
try {
|
|
226
|
-
const data = JSON.parse(raw.toString());
|
|
227
|
-
if (data.type === "session.updated") {
|
|
228
|
-
clearTimeout(timer);
|
|
229
|
-
ws.off("message", onMsg);
|
|
230
|
-
resolve();
|
|
231
|
-
}
|
|
232
|
-
} catch {
|
|
233
|
-
}
|
|
234
|
-
};
|
|
235
|
-
ws.on("message", onMsg);
|
|
236
|
-
});
|
|
237
|
-
} catch (err) {
|
|
238
|
-
getLogger().debug(`OpenAI Realtime warmup failed (best-effort): ${String(err)}`);
|
|
239
|
-
} finally {
|
|
240
|
-
if (ws) {
|
|
241
|
-
try {
|
|
242
|
-
ws.close();
|
|
243
|
-
} catch {
|
|
244
|
-
}
|
|
245
|
-
}
|
|
246
|
-
}
|
|
247
|
-
}
|
|
248
|
-
/** Open the Realtime WebSocket and apply the session configuration. */
|
|
249
|
-
async connect() {
|
|
250
|
-
const url = `wss://api.openai.com/v1/realtime?model=${encodeURIComponent(this.model)}`;
|
|
251
|
-
this.ws = new WebSocket(url, {
|
|
252
|
-
headers: {
|
|
253
|
-
Authorization: `Bearer ${this.apiKey}`,
|
|
254
|
-
"OpenAI-Beta": "realtime=v1"
|
|
255
|
-
}
|
|
256
|
-
});
|
|
257
|
-
await new Promise((resolve, reject) => {
|
|
258
|
-
let sessionCreated = false;
|
|
259
|
-
let settled = false;
|
|
260
|
-
const ws = this.ws;
|
|
261
|
-
const onSetupMessage = (raw) => {
|
|
262
|
-
let msg;
|
|
263
|
-
try {
|
|
264
|
-
msg = JSON.parse(raw.toString());
|
|
265
|
-
} catch (e) {
|
|
266
|
-
getLogger().warn(`OpenAI Realtime: failed to parse message: ${String(e)}`);
|
|
267
|
-
return;
|
|
268
|
-
}
|
|
269
|
-
if (msg.type === "session.created" && !sessionCreated) {
|
|
270
|
-
sessionCreated = true;
|
|
271
|
-
ws.send(JSON.stringify({ type: "session.update", session: this.buildSessionConfig() }));
|
|
272
|
-
} else if (msg.type === "session.updated") {
|
|
273
|
-
cleanup();
|
|
274
|
-
resolve();
|
|
275
|
-
}
|
|
276
|
-
};
|
|
277
|
-
const onSetupError = (err) => {
|
|
278
|
-
cleanup();
|
|
279
|
-
try {
|
|
280
|
-
ws.close();
|
|
281
|
-
} catch {
|
|
282
|
-
}
|
|
283
|
-
reject(err);
|
|
284
|
-
};
|
|
285
|
-
const cleanup = () => {
|
|
286
|
-
if (settled) return;
|
|
287
|
-
settled = true;
|
|
288
|
-
clearTimeout(timer);
|
|
289
|
-
ws.off("message", onSetupMessage);
|
|
290
|
-
ws.off("error", onSetupError);
|
|
291
|
-
};
|
|
292
|
-
const timer = setTimeout(() => {
|
|
293
|
-
cleanup();
|
|
294
|
-
try {
|
|
295
|
-
ws.close();
|
|
296
|
-
} catch {
|
|
297
|
-
}
|
|
298
|
-
reject(new Error("OpenAI Realtime connect timeout"));
|
|
299
|
-
}, 15e3);
|
|
300
|
-
ws.on("message", onSetupMessage);
|
|
301
|
-
ws.on("error", onSetupError);
|
|
302
|
-
});
|
|
303
|
-
this.armHeartbeatAndListener();
|
|
304
|
-
}
|
|
305
|
-
/**
|
|
306
|
-
* Adopt a pre-opened, already-`session.updated` Realtime WebSocket
|
|
307
|
-
* produced by the prewarm pipeline (see `Patter.parkProviderConnections`).
|
|
308
|
-
* Skips the fresh `new WebSocket()` + `session.created` /
|
|
309
|
-
* `session.update` round-trip — saves ~250-450 ms on first turn.
|
|
310
|
-
*
|
|
311
|
-
* Caller MUST verify `ws.readyState === OPEN` before calling and MUST
|
|
312
|
-
* have already received `session.updated` on the parked socket. If
|
|
313
|
-
* the parked WS died between park and adopt, fall back to `connect()`.
|
|
314
|
-
*/
|
|
315
|
-
adoptWebSocket(ws) {
|
|
316
|
-
this.ws = ws;
|
|
317
|
-
this.armHeartbeatAndListener();
|
|
318
|
-
}
|
|
319
|
-
armHeartbeatAndListener() {
|
|
320
|
-
this.heartbeat = setInterval(() => {
|
|
321
|
-
try {
|
|
322
|
-
this.ws?.ping();
|
|
323
|
-
} catch {
|
|
324
|
-
}
|
|
325
|
-
}, 2e4);
|
|
326
|
-
this.ensureMessageListener();
|
|
327
|
-
}
|
|
328
|
-
/**
|
|
329
|
-
* Open a fresh Realtime WS, exchange `session.created` /
|
|
330
|
-
* `session.update` / `session.updated` (so the upstream session is
|
|
331
|
-
* fully primed), and return the OPEN socket WITHOUT arming the
|
|
332
|
-
* heartbeat / message listener. Used by the prewarm pipeline to park
|
|
333
|
-
* a Realtime connection during ringing; the live consumer adopts it
|
|
334
|
-
* via {@link adoptWebSocket}.
|
|
335
|
-
*
|
|
336
|
-
* Bounded by 8 s. Throws on timeout / handshake failure — callers
|
|
337
|
-
* (the prewarm pipeline) treat any error as a cache miss and the
|
|
338
|
-
* call falls through to the cold `connect()` path.
|
|
339
|
-
*
|
|
340
|
-
* Billing safety: `session.update` does not invoke the model. No
|
|
341
|
-
* tokens are billed.
|
|
342
|
-
*/
|
|
343
|
-
async openParkedConnection() {
|
|
344
|
-
const url = `wss://api.openai.com/v1/realtime?model=${encodeURIComponent(this.model)}`;
|
|
345
|
-
const ws = new WebSocket(url, {
|
|
346
|
-
headers: {
|
|
347
|
-
Authorization: `Bearer ${this.apiKey}`,
|
|
348
|
-
"OpenAI-Beta": "realtime=v1"
|
|
349
|
-
}
|
|
350
|
-
});
|
|
351
|
-
await new Promise((resolve, reject) => {
|
|
352
|
-
let sessionCreated = false;
|
|
353
|
-
let settled = false;
|
|
354
|
-
const onMessage = (raw) => {
|
|
355
|
-
let msg;
|
|
356
|
-
try {
|
|
357
|
-
msg = JSON.parse(raw.toString());
|
|
358
|
-
} catch {
|
|
359
|
-
return;
|
|
360
|
-
}
|
|
361
|
-
if (msg.type === "session.created" && !sessionCreated) {
|
|
362
|
-
sessionCreated = true;
|
|
363
|
-
try {
|
|
364
|
-
ws.send(JSON.stringify({ type: "session.update", session: this.buildSessionConfig() }));
|
|
365
|
-
} catch (err) {
|
|
366
|
-
cleanup();
|
|
367
|
-
reject(err instanceof Error ? err : new Error(String(err)));
|
|
368
|
-
}
|
|
369
|
-
} else if (msg.type === "session.updated") {
|
|
370
|
-
cleanup();
|
|
371
|
-
resolve();
|
|
372
|
-
}
|
|
373
|
-
};
|
|
374
|
-
const onError = (err) => {
|
|
375
|
-
cleanup();
|
|
376
|
-
reject(err);
|
|
377
|
-
};
|
|
378
|
-
const cleanup = () => {
|
|
379
|
-
if (settled) return;
|
|
380
|
-
settled = true;
|
|
381
|
-
clearTimeout(timer);
|
|
382
|
-
ws.off("message", onMessage);
|
|
383
|
-
ws.off("error", onError);
|
|
384
|
-
};
|
|
385
|
-
const timer = setTimeout(() => {
|
|
386
|
-
cleanup();
|
|
387
|
-
reject(new Error("OpenAI Realtime park connect timeout"));
|
|
388
|
-
}, 8e3);
|
|
389
|
-
ws.on("message", onMessage);
|
|
390
|
-
ws.on("error", onError);
|
|
391
|
-
});
|
|
392
|
-
return ws;
|
|
393
|
-
}
|
|
394
|
-
/** Append a base64-encoded audio chunk to the realtime input buffer. */
|
|
395
|
-
sendAudio(mulawAudio) {
|
|
396
|
-
if (!this.ws || this.ws.readyState !== WebSocket.OPEN) return;
|
|
397
|
-
this.ws.send(JSON.stringify({ type: "input_audio_buffer.append", audio: mulawAudio.toString("base64") }));
|
|
398
|
-
}
|
|
399
|
-
/**
|
|
400
|
-
* Register a listener for parsed realtime events.
|
|
401
|
-
*
|
|
402
|
-
* Previously every call attached a new ``ws.on('message')`` handler,
|
|
403
|
-
* which leaked listeners across retries and multi-consumer hooks. We now
|
|
404
|
-
* route all traffic through a single persistent handler that fans out to
|
|
405
|
-
* a Set of callbacks. Use {@link offEvent} to remove one.
|
|
406
|
-
*/
|
|
407
|
-
onEvent(callback) {
|
|
408
|
-
this.eventCallbacks.add(callback);
|
|
409
|
-
this.ensureMessageListener();
|
|
410
|
-
}
|
|
411
|
-
/** Remove a previously registered {@link onEvent} callback. */
|
|
412
|
-
offEvent(callback) {
|
|
413
|
-
this.eventCallbacks.delete(callback);
|
|
414
|
-
}
|
|
415
|
-
ensureMessageListener() {
|
|
416
|
-
if (this.messageListenerAttached || !this.ws) return;
|
|
417
|
-
this.messageListenerAttached = true;
|
|
418
|
-
const ws = this.ws;
|
|
419
|
-
const dispatch = (type, payload) => {
|
|
420
|
-
for (const cb of this.eventCallbacks) {
|
|
421
|
-
void Promise.resolve(cb(type, payload)).catch(
|
|
422
|
-
(err) => getLogger().error("onEvent callback error:", err)
|
|
423
|
-
);
|
|
424
|
-
}
|
|
425
|
-
};
|
|
426
|
-
ws.on("message", (raw) => {
|
|
427
|
-
let data;
|
|
428
|
-
try {
|
|
429
|
-
data = JSON.parse(raw.toString());
|
|
430
|
-
} catch (e) {
|
|
431
|
-
getLogger().warn(`OpenAI Realtime: failed to parse event message: ${String(e)}`);
|
|
432
|
-
return;
|
|
433
|
-
}
|
|
434
|
-
const t = data.type;
|
|
435
|
-
if (t === "response.audio.delta") {
|
|
436
|
-
const buf = Buffer.from(data.delta ?? "", "base64");
|
|
437
|
-
this.currentResponseAudioMs += estimateAudioMs(buf, this.audioFormat);
|
|
438
|
-
if (this.currentResponseFirstAudioAt === null) {
|
|
439
|
-
this.currentResponseFirstAudioAt = Date.now();
|
|
440
|
-
}
|
|
441
|
-
dispatch("audio", buf);
|
|
442
|
-
} else if (t === "response.audio_transcript.delta") {
|
|
443
|
-
dispatch("transcript_output", data.delta);
|
|
444
|
-
} else if (t === "response.content_part.added" || t === "response.output_item.added") {
|
|
445
|
-
const itemId = data.item?.id ?? data.item_id ?? null;
|
|
446
|
-
if (itemId) {
|
|
447
|
-
this.currentResponseItemId = itemId;
|
|
448
|
-
this.currentResponseAudioMs = 0;
|
|
449
|
-
this.currentResponseFirstAudioAt = null;
|
|
450
|
-
}
|
|
451
|
-
} else if (t === "input_audio_buffer.speech_started") {
|
|
452
|
-
dispatch("speech_started", null);
|
|
453
|
-
} else if (t === "input_audio_buffer.speech_stopped") {
|
|
454
|
-
dispatch("speech_stopped", null);
|
|
455
|
-
} else if (t === "conversation.item.input_audio_transcription.completed") {
|
|
456
|
-
dispatch("transcript_input", data.transcript);
|
|
457
|
-
} else if (t === "response.function_call_arguments.done") {
|
|
458
|
-
dispatch("function_call", { call_id: data.call_id, name: data.name, arguments: data.arguments });
|
|
459
|
-
} else if (t === "response.done") {
|
|
460
|
-
this.currentResponseItemId = null;
|
|
461
|
-
this.currentResponseAudioMs = 0;
|
|
462
|
-
this.currentResponseFirstAudioAt = null;
|
|
463
|
-
dispatch("response_done", data.response ?? null);
|
|
464
|
-
} else if (t === "error") {
|
|
465
|
-
dispatch("error", data.error);
|
|
466
|
-
}
|
|
467
|
-
});
|
|
468
|
-
ws.on("close", (code, reason) => {
|
|
469
|
-
if (code !== 1e3) {
|
|
470
|
-
dispatch("error", {
|
|
471
|
-
type: "connection_closed",
|
|
472
|
-
code,
|
|
473
|
-
reason: reason?.toString() ?? ""
|
|
474
|
-
});
|
|
475
|
-
}
|
|
476
|
-
});
|
|
477
|
-
ws.on("error", (err) => {
|
|
478
|
-
dispatch("error", { type: "socket_error", message: err?.message ?? String(err) });
|
|
479
|
-
});
|
|
480
|
-
}
|
|
481
|
-
/** Truncate the in-flight assistant turn and cancel the active response.
|
|
482
|
-
*
|
|
483
|
-
* ``audio_end_ms`` MUST reflect what the caller actually heard, not what
|
|
484
|
-
* the server generated. OpenAI streams audio at 5-10x real-time, so the
|
|
485
|
-
* byte-derived counter overstates playback whenever the consumer cleared
|
|
486
|
-
* its playout buffer (e.g. ``send_clear``) before the audio reached the
|
|
487
|
-
* speaker. We bound the truncate point by wall-clock time since the first
|
|
488
|
-
* chunk of this response — that's the physical maximum a 1x real-time
|
|
489
|
-
* playback could have produced. Without this cap, OpenAI keeps the full
|
|
490
|
-
* generated assistant text on the transcript, and the model replays /
|
|
491
|
-
* resumes from it on the next turn — manifesting as re-greetings and
|
|
492
|
-
* mid-sentence fragments after a barge-in storm.
|
|
493
|
-
*/
|
|
494
|
-
cancelResponse() {
|
|
495
|
-
if (!this.ws) return;
|
|
496
|
-
if (this.currentResponseItemId) {
|
|
497
|
-
let audioEndMs = this.currentResponseAudioMs;
|
|
498
|
-
if (this.currentResponseFirstAudioAt !== null) {
|
|
499
|
-
const elapsedMs = Date.now() - this.currentResponseFirstAudioAt;
|
|
500
|
-
audioEndMs = Math.min(audioEndMs, Math.max(elapsedMs, 0));
|
|
501
|
-
}
|
|
502
|
-
try {
|
|
503
|
-
this.ws.send(JSON.stringify({
|
|
504
|
-
type: "conversation.item.truncate",
|
|
505
|
-
item_id: this.currentResponseItemId,
|
|
506
|
-
content_index: 0,
|
|
507
|
-
audio_end_ms: audioEndMs
|
|
508
|
-
}));
|
|
509
|
-
} catch (err) {
|
|
510
|
-
getLogger().debug?.(`conversation.item.truncate failed: ${String(err)}`);
|
|
511
|
-
}
|
|
512
|
-
}
|
|
513
|
-
this.ws.send(JSON.stringify({ type: "response.cancel" }));
|
|
514
|
-
this.currentResponseItemId = null;
|
|
515
|
-
this.currentResponseAudioMs = 0;
|
|
516
|
-
this.currentResponseFirstAudioAt = null;
|
|
517
|
-
}
|
|
518
|
-
/** Inject a user text turn and request a new response. */
|
|
519
|
-
async sendText(text) {
|
|
520
|
-
this.ws?.send(JSON.stringify({
|
|
521
|
-
type: "conversation.item.create",
|
|
522
|
-
item: { type: "message", role: "user", content: [{ type: "input_text", text }] }
|
|
523
|
-
}));
|
|
524
|
-
this.ws?.send(JSON.stringify({ type: "response.create" }));
|
|
525
|
-
}
|
|
526
|
-
/**
|
|
527
|
-
* Make the AI speak ``text`` as its opening line.
|
|
528
|
-
*
|
|
529
|
-
* Triggers ``response.create`` with explicit ``instructions`` that force
|
|
530
|
-
* the model to render ``text`` verbatim as its first audio utterance.
|
|
531
|
-
* This is the correct semantics for ``Agent.firstMessage`` per its
|
|
532
|
-
* docstring ("What the AI says when the callee answers").
|
|
533
|
-
*
|
|
534
|
-
* Without this, ``sendText(firstMessage)`` would inject ``text`` as
|
|
535
|
-
* ``role: user`` and the AI would *reply* to its own greeting, producing
|
|
536
|
-
* role-confused openings (e.g. a receptionist agent responding "I'd like
|
|
537
|
-
* to schedule a haircut" because it took its own first_message as a
|
|
538
|
-
* customer cue).
|
|
539
|
-
*/
|
|
540
|
-
async sendFirstMessage(text) {
|
|
541
|
-
this.ws?.send(JSON.stringify({
|
|
542
|
-
type: "response.create",
|
|
543
|
-
response: {
|
|
544
|
-
modalities: ["audio", "text"],
|
|
545
|
-
instructions: `Say exactly the following sentence as your first turn and nothing else: "${text}"`
|
|
546
|
-
}
|
|
547
|
-
}));
|
|
548
|
-
}
|
|
549
|
-
/** Submit a tool/function-call result and request the next response. */
|
|
550
|
-
async sendFunctionResult(callId, result) {
|
|
551
|
-
this.ws?.send(JSON.stringify({
|
|
552
|
-
type: "conversation.item.create",
|
|
553
|
-
item: { type: "function_call_output", call_id: callId, output: result }
|
|
554
|
-
}));
|
|
555
|
-
this.ws?.send(JSON.stringify({ type: "response.create" }));
|
|
556
|
-
}
|
|
557
|
-
/** Stop the heartbeat, drop listeners, and close the Realtime WebSocket. */
|
|
558
|
-
close() {
|
|
559
|
-
if (this.heartbeat) {
|
|
560
|
-
clearInterval(this.heartbeat);
|
|
561
|
-
this.heartbeat = null;
|
|
562
|
-
}
|
|
563
|
-
this.eventCallbacks.clear();
|
|
564
|
-
this.messageListenerAttached = false;
|
|
565
|
-
this.ws?.close();
|
|
566
|
-
this.ws = null;
|
|
567
|
-
}
|
|
568
|
-
};
|
|
569
|
-
function estimateAudioMs(chunk, format) {
|
|
570
|
-
if (chunk.length === 0) return 0;
|
|
571
|
-
if (format === OpenAIRealtimeAudioFormat.G711_ULAW || format === OpenAIRealtimeAudioFormat.G711_ALAW)
|
|
572
|
-
return Math.floor(chunk.length / 8);
|
|
573
|
-
if (format === OpenAIRealtimeAudioFormat.PCM16) {
|
|
574
|
-
return Math.floor(chunk.length / 48);
|
|
575
|
-
}
|
|
576
|
-
return 0;
|
|
577
|
-
}
|
|
578
|
-
|
|
579
|
-
// src/providers/openai-realtime-2.ts
|
|
580
|
-
init_esm_shims();
|
|
581
|
-
import WebSocket2 from "ws";
|
|
582
|
-
|
|
583
|
-
// src/audio/transcoding.ts
|
|
584
|
-
init_esm_shims();
|
|
585
|
-
var MULAW_TO_PCM16_TABLE = (() => {
|
|
586
|
-
const table = new Int16Array(256);
|
|
587
|
-
for (let i = 0; i < 256; i++) {
|
|
588
|
-
const mu = ~i & 255;
|
|
589
|
-
const sign = mu & 128 ? -1 : 1;
|
|
590
|
-
const exponent = mu >> 4 & 7;
|
|
591
|
-
const mantissa = mu & 15;
|
|
592
|
-
const magnitude = (mantissa << 1 | 33) << exponent + 2;
|
|
593
|
-
table[i] = sign * (magnitude - 132);
|
|
594
|
-
}
|
|
595
|
-
return table;
|
|
596
|
-
})();
|
|
597
|
-
var PCM16_TO_MULAW_TABLE = (() => {
|
|
598
|
-
const BIAS = 132;
|
|
599
|
-
const CLIP = 32635;
|
|
600
|
-
const table = new Uint8Array(65536);
|
|
601
|
-
for (let i = 0; i < 65536; i++) {
|
|
602
|
-
let sample = i >= 32768 ? i - 65536 : i;
|
|
603
|
-
const sign = sample < 0 ? 128 : 0;
|
|
604
|
-
if (sample < 0) sample = -sample;
|
|
605
|
-
if (sample > CLIP) sample = CLIP;
|
|
606
|
-
sample += BIAS;
|
|
607
|
-
let exponent = 7;
|
|
608
|
-
const exponentMask = 16384;
|
|
609
|
-
for (let shift = exponentMask; shift > 0 && (sample & shift) === 0; shift >>= 1) {
|
|
610
|
-
exponent--;
|
|
611
|
-
}
|
|
612
|
-
const mantissa = sample >> exponent + 3 & 15;
|
|
613
|
-
const mulaw = ~(sign | exponent << 4 | mantissa) & 255;
|
|
614
|
-
table[i] = mulaw;
|
|
615
|
-
}
|
|
616
|
-
return table;
|
|
617
|
-
})();
|
|
618
|
-
function mulawToPcm16(mulawData) {
|
|
619
|
-
const out = Buffer.alloc(mulawData.length * 2);
|
|
620
|
-
for (let i = 0; i < mulawData.length; i++) {
|
|
621
|
-
out.writeInt16LE(MULAW_TO_PCM16_TABLE[mulawData[i]], i * 2);
|
|
622
|
-
}
|
|
623
|
-
return out;
|
|
624
|
-
}
|
|
625
|
-
function pcm16ToMulaw(pcmData) {
|
|
626
|
-
const sampleCount = Math.floor(pcmData.length / 2);
|
|
627
|
-
const out = Buffer.alloc(sampleCount);
|
|
628
|
-
for (let i = 0; i < sampleCount; i++) {
|
|
629
|
-
const sample = pcmData.readInt16LE(i * 2);
|
|
630
|
-
out[i] = PCM16_TO_MULAW_TABLE[sample + 65536 & 65535];
|
|
631
|
-
}
|
|
632
|
-
return out;
|
|
633
|
-
}
|
|
634
|
-
var PcmCarry = class {
|
|
635
|
-
pending = null;
|
|
636
|
-
/**
|
|
637
|
-
* Prepend any carried odd byte, return the even-length prefix, and stash
|
|
638
|
-
* any new trailing odd byte for the next call.
|
|
639
|
-
*
|
|
640
|
-
* Returns a zero-length buffer when no complete sample is yet available.
|
|
641
|
-
*/
|
|
642
|
-
push(chunk) {
|
|
643
|
-
const combined = this.pending !== null ? Buffer.concat([this.pending, chunk]) : chunk;
|
|
644
|
-
this.pending = null;
|
|
645
|
-
const alignedLen = combined.length & ~1;
|
|
646
|
-
if (alignedLen < combined.length) {
|
|
647
|
-
this.pending = combined.subarray(alignedLen);
|
|
648
|
-
}
|
|
649
|
-
return combined.subarray(0, alignedLen);
|
|
650
|
-
}
|
|
651
|
-
/**
|
|
652
|
-
* Return any pending byte as a 1-byte buffer (rare in practice — only if
|
|
653
|
-
* the entire stream had an odd byte count), then reset internal state.
|
|
654
|
-
*/
|
|
655
|
-
flush() {
|
|
656
|
-
if (this.pending === null) return Buffer.alloc(0);
|
|
657
|
-
const out = this.pending;
|
|
658
|
-
this.pending = null;
|
|
659
|
-
return out;
|
|
660
|
-
}
|
|
661
|
-
/** Reset carry state without flushing. */
|
|
662
|
-
reset() {
|
|
663
|
-
this.pending = null;
|
|
664
|
-
}
|
|
665
|
-
};
|
|
666
|
-
var StatefulResampler = class {
|
|
667
|
-
srcRate;
|
|
668
|
-
dstRate;
|
|
669
|
-
// 16k→8k: 5-tap FIR state.
|
|
670
|
-
// Extended sample buffer carries the 2 history samples that precede the
|
|
671
|
-
// current chunk AND any "pending" input sample that did not yet generate
|
|
672
|
-
// output (i.e. the odd sample when the chunk had an odd sample count).
|
|
673
|
-
// `firPhase` = 0 means the next output is at input position 0 of the
|
|
674
|
-
// current chunk; 1 means it starts at input position 1 (because the
|
|
675
|
-
// previous chunk ended on an even-output boundary).
|
|
676
|
-
firHistory = new Int16Array(2);
|
|
677
|
-
// [s_{-2}, s_{-1}]
|
|
678
|
-
firHistoryValid = false;
|
|
679
|
-
// Pending sample carried from odd-count chunks (not the byte carry —
|
|
680
|
-
// this is a complete Int16 sample that becomes the first input for the
|
|
681
|
-
// next call).
|
|
682
|
-
firPendingSample = null;
|
|
683
|
-
// 8k→16k: last input sample deferred across chunk boundaries.
|
|
684
|
-
upsampleLast = 0;
|
|
685
|
-
upsampleHasHistory = false;
|
|
686
|
-
// 24k→16k: fractional phase and last input sample across chunks.
|
|
687
|
-
resample24Last = 0;
|
|
688
|
-
resample24Phase = 0;
|
|
689
|
-
resample24HasHistory = false;
|
|
690
|
-
// Odd-byte alignment carry.
|
|
691
|
-
carry = new PcmCarry();
|
|
692
|
-
constructor(opts) {
|
|
693
|
-
this.srcRate = opts.srcRate;
|
|
694
|
-
this.dstRate = opts.dstRate;
|
|
695
|
-
if (opts.channels !== void 0 && opts.channels !== 1) {
|
|
696
|
-
throw new Error("StatefulResampler: only mono (channels=1) is supported");
|
|
697
|
-
}
|
|
698
|
-
const key = `${this.srcRate}->${this.dstRate}`;
|
|
699
|
-
if (key !== "16000->8000" && key !== "8000->16000" && key !== "24000->16000" && key !== "24000->8000") {
|
|
700
|
-
throw new Error(
|
|
701
|
-
`StatefulResampler: unsupported conversion ${key}. Supported: 16000->8000, 8000->16000, 24000->16000, 24000->8000`
|
|
702
|
-
);
|
|
703
|
-
}
|
|
704
|
-
}
|
|
705
|
-
/**
|
|
706
|
-
* Process a chunk of PCM16-LE samples.
|
|
707
|
-
*
|
|
708
|
-
* Handles odd-byte inputs via an internal carry buffer. Returns an even-byte-
|
|
709
|
-
* aligned output buffer; may return a zero-length buffer if not enough
|
|
710
|
-
* aligned input is available yet.
|
|
711
|
-
*/
|
|
712
|
-
process(pcm) {
|
|
713
|
-
const aligned = this.carry.push(pcm);
|
|
714
|
-
if (aligned.length === 0) return Buffer.alloc(0);
|
|
715
|
-
if (this.srcRate === 16e3 && this.dstRate === 8e3) {
|
|
716
|
-
return this._downsample16kTo8k(aligned);
|
|
717
|
-
}
|
|
718
|
-
if (this.srcRate === 8e3 && this.dstRate === 16e3) {
|
|
719
|
-
return this._upsample8kTo16k(aligned);
|
|
720
|
-
}
|
|
721
|
-
if (this.srcRate === 24e3 && this.dstRate === 8e3) {
|
|
722
|
-
return this._resample24kTo8k(aligned);
|
|
723
|
-
}
|
|
724
|
-
return this._resample24kTo16k(aligned);
|
|
725
|
-
}
|
|
726
|
-
/**
|
|
727
|
-
* Flush internal state and return any remaining output samples.
|
|
728
|
-
*
|
|
729
|
-
* For 8k→16k: the deferred last sample is emitted duplicated (matching
|
|
730
|
-
* the stateless helper's end-of-stream behaviour).
|
|
731
|
-
* For 16k→8k: any pending odd sample is processed with edge-replication.
|
|
732
|
-
* Resets all state after flushing.
|
|
733
|
-
*/
|
|
734
|
-
flush() {
|
|
735
|
-
this.carry.flush();
|
|
736
|
-
if (this.srcRate === 16e3 && this.dstRate === 8e3 && this.firPendingSample !== null) {
|
|
737
|
-
const s = this.firPendingSample;
|
|
738
|
-
const tmp = Buffer.alloc(4);
|
|
739
|
-
tmp.writeInt16LE(s, 0);
|
|
740
|
-
tmp.writeInt16LE(s, 2);
|
|
741
|
-
const out = this._downsample16kTo8k(tmp);
|
|
742
|
-
this.firPendingSample = null;
|
|
743
|
-
return out;
|
|
744
|
-
}
|
|
745
|
-
if (this.srcRate === 8e3 && this.dstRate === 16e3 && this.upsampleHasHistory) {
|
|
746
|
-
const out = Buffer.alloc(4);
|
|
747
|
-
out.writeInt16LE(this.upsampleLast, 0);
|
|
748
|
-
out.writeInt16LE(this.upsampleLast, 2);
|
|
749
|
-
this.upsampleHasHistory = false;
|
|
750
|
-
this.upsampleLast = 0;
|
|
751
|
-
return out;
|
|
752
|
-
}
|
|
753
|
-
return Buffer.alloc(0);
|
|
754
|
-
}
|
|
755
|
-
/** Reset all carried state (e.g. at call boundaries). */
|
|
756
|
-
reset() {
|
|
757
|
-
this.firHistory = new Int16Array(2);
|
|
758
|
-
this.firHistoryValid = false;
|
|
759
|
-
this.firPendingSample = null;
|
|
760
|
-
this.upsampleLast = 0;
|
|
761
|
-
this.upsampleHasHistory = false;
|
|
762
|
-
this.resample24Last = 0;
|
|
763
|
-
this.resample24Phase = 0;
|
|
764
|
-
this.resample24HasHistory = false;
|
|
765
|
-
this.carry.reset();
|
|
766
|
-
}
|
|
767
|
-
// ---------------------------------------------------------------------------
|
|
768
|
-
// Private: 16 kHz → 8 kHz
|
|
769
|
-
// ---------------------------------------------------------------------------
|
|
770
|
-
/**
|
|
771
|
-
* 2:1 decimation with a 5-tap binomial FIR anti-alias filter.
|
|
772
|
-
*
|
|
773
|
-
* FIR coefficients: [1, 4, 6, 4, 1] / 16 (cutoff ~Fs/4 = 4 kHz).
|
|
774
|
-
*
|
|
775
|
-
* Cross-chunk state:
|
|
776
|
-
* - `firHistory[0]` = s_{-2}, `firHistory[1]` = s_{-1} relative to the
|
|
777
|
-
* virtual stream (seeded to first-sample on the very first call).
|
|
778
|
-
* - `firPendingSample` = a lone input sample carried from a chunk whose
|
|
779
|
-
* sample count was odd; it will become the first input of the next chunk.
|
|
780
|
-
*
|
|
781
|
-
* Decimation: outputs are at even positions (0, 2, 4 …) in the virtual
|
|
782
|
-
* extended stream, so every 2 input samples yield 1 output. An odd-sample-
|
|
783
|
-
* count chunk leaves 1 sample in `firPendingSample`; the next chunk
|
|
784
|
-
* prepends it so the output cadence is unbroken.
|
|
785
|
-
*/
|
|
786
|
-
_downsample16kTo8k(buf) {
|
|
787
|
-
const newSampleCount = buf.length >> 1;
|
|
788
|
-
const hasPending = this.firPendingSample !== null;
|
|
789
|
-
const totalInput = newSampleCount + (hasPending ? 1 : 0);
|
|
790
|
-
const input = new Int16Array(totalInput);
|
|
791
|
-
if (hasPending) {
|
|
792
|
-
input[0] = this.firPendingSample;
|
|
793
|
-
for (let j = 0; j < newSampleCount; j++) input[j + 1] = buf.readInt16LE(j * 2);
|
|
794
|
-
} else {
|
|
795
|
-
for (let j = 0; j < newSampleCount; j++) input[j] = buf.readInt16LE(j * 2);
|
|
796
|
-
}
|
|
797
|
-
this.firPendingSample = null;
|
|
798
|
-
if (totalInput === 0) return Buffer.alloc(0);
|
|
799
|
-
if (!this.firHistoryValid) {
|
|
800
|
-
this.firHistory[0] = 0;
|
|
801
|
-
this.firHistory[1] = 0;
|
|
802
|
-
this.firHistoryValid = true;
|
|
803
|
-
}
|
|
804
|
-
const extended = new Int16Array(totalInput + 2);
|
|
805
|
-
extended[0] = this.firHistory[0];
|
|
806
|
-
extended[1] = this.firHistory[1];
|
|
807
|
-
for (let j = 0; j < totalInput; j++) extended[j + 2] = input[j];
|
|
808
|
-
const outSamples = totalInput >> 1;
|
|
809
|
-
const out = Buffer.alloc(outSamples * 2);
|
|
810
|
-
for (let i = 0; i < outSamples; i++) {
|
|
811
|
-
const c = 2 + i * 2;
|
|
812
|
-
const sM2 = extended[c - 2];
|
|
813
|
-
const sM1 = extended[c - 1];
|
|
814
|
-
const s0 = extended[c];
|
|
815
|
-
const sP1 = c + 1 < extended.length ? extended[c + 1] : extended[extended.length - 1];
|
|
816
|
-
const sP2 = c + 2 < extended.length ? extended[c + 2] : extended[extended.length - 1];
|
|
817
|
-
const filtered = sM2 + 4 * sM1 + 6 * s0 + 4 * sP1 + sP2 + 8 >> 4;
|
|
818
|
-
out.writeInt16LE(Math.max(-32768, Math.min(32767, filtered)), i * 2);
|
|
819
|
-
}
|
|
820
|
-
if (totalInput % 2 === 1) {
|
|
821
|
-
this.firPendingSample = input[totalInput - 1];
|
|
822
|
-
}
|
|
823
|
-
if (totalInput >= 2) {
|
|
824
|
-
this.firHistory[0] = input[totalInput - 2];
|
|
825
|
-
this.firHistory[1] = input[totalInput - 1];
|
|
826
|
-
} else {
|
|
827
|
-
this.firHistory[0] = this.firHistory[1];
|
|
828
|
-
this.firHistory[1] = input[0];
|
|
829
|
-
}
|
|
830
|
-
return out;
|
|
831
|
-
}
|
|
832
|
-
// ---------------------------------------------------------------------------
|
|
833
|
-
// Private: 8 kHz → 16 kHz
|
|
834
|
-
// ---------------------------------------------------------------------------
|
|
835
|
-
/**
|
|
836
|
-
* 1:2 linear-interpolation upsampler.
|
|
837
|
-
*
|
|
838
|
-
* For the first chunk (no history): emits 2*(N-1) samples and defers the
|
|
839
|
-
* last sample. For subsequent chunks (with history): emits the deferred
|
|
840
|
-
* sample + its interpolated midpoint THEN 2*(N-1) samples from the new
|
|
841
|
-
* chunk, deferring the new last sample. Total across K chunks + flush =
|
|
842
|
-
* 2*total_input_samples (correct output length).
|
|
843
|
-
*
|
|
844
|
-
* Call flush() after the final chunk to emit the last deferred sample
|
|
845
|
-
* pair (self-duplicate at end of stream).
|
|
846
|
-
*/
|
|
847
|
-
_upsample8kTo16k(buf) {
|
|
848
|
-
const sampleCount = buf.length >> 1;
|
|
849
|
-
if (sampleCount === 0) return Buffer.alloc(0);
|
|
850
|
-
const outArr = [];
|
|
851
|
-
if (this.upsampleHasHistory) {
|
|
852
|
-
const next = buf.readInt16LE(0);
|
|
853
|
-
outArr.push(this.upsampleLast);
|
|
854
|
-
outArr.push(Math.round((this.upsampleLast + next) / 2));
|
|
855
|
-
}
|
|
856
|
-
for (let i = 0; i < sampleCount - 1; i++) {
|
|
857
|
-
const s0 = buf.readInt16LE(i * 2);
|
|
858
|
-
const s1 = buf.readInt16LE((i + 1) * 2);
|
|
859
|
-
outArr.push(s0);
|
|
860
|
-
outArr.push(Math.round((s0 + s1) / 2));
|
|
861
|
-
}
|
|
862
|
-
this.upsampleLast = buf.readInt16LE((sampleCount - 1) * 2);
|
|
863
|
-
this.upsampleHasHistory = true;
|
|
864
|
-
const outBuf = Buffer.alloc(outArr.length * 2);
|
|
865
|
-
for (let j = 0; j < outArr.length; j++) outBuf.writeInt16LE(outArr[j], j * 2);
|
|
866
|
-
return outBuf;
|
|
867
|
-
}
|
|
868
|
-
// ---------------------------------------------------------------------------
|
|
869
|
-
// Private: 24 kHz → 16 kHz / 8 kHz
|
|
870
|
-
// ---------------------------------------------------------------------------
|
|
871
|
-
/**
|
|
872
|
-
* 3:2 linear-interpolation decimator (ratio srcRate/dstRate = 1.5).
|
|
873
|
-
*
|
|
874
|
-
* `resample24Phase` tracks the fractional input position of the next output
|
|
875
|
-
* sample relative to the START of the next chunk. Negative phase means the
|
|
876
|
-
* next output straddles the previous/current chunk boundary; those are
|
|
877
|
-
* handled using `resample24Last`.
|
|
878
|
-
*/
|
|
879
|
-
_resample24kTo16k(buf) {
|
|
880
|
-
return this._resample24kStep(buf, 24e3 / 16e3);
|
|
881
|
-
}
|
|
882
|
-
/** 3:1 decimation — collapses the 24k→16k→8k chain into a single step. */
|
|
883
|
-
_resample24kTo8k(buf) {
|
|
884
|
-
return this._resample24kStep(buf, 24e3 / 8e3);
|
|
885
|
-
}
|
|
886
|
-
/** Shared phase-stepping resampler used by 24→16 (step 1.5) and 24→8 (step 3). */
|
|
887
|
-
_resample24kStep(buf, step) {
|
|
888
|
-
const sampleCount = buf.length >> 1;
|
|
889
|
-
if (sampleCount === 0) return Buffer.alloc(0);
|
|
890
|
-
const outArr = [];
|
|
891
|
-
let phase = this.resample24Phase;
|
|
892
|
-
while (true) {
|
|
893
|
-
const idx = Math.floor(phase);
|
|
894
|
-
if (idx >= sampleCount) break;
|
|
895
|
-
const frac = phase - idx;
|
|
896
|
-
let s0;
|
|
897
|
-
let s1;
|
|
898
|
-
if (idx < 0) {
|
|
899
|
-
s0 = this.resample24HasHistory ? this.resample24Last : 0;
|
|
900
|
-
s1 = buf.readInt16LE(0);
|
|
901
|
-
} else {
|
|
902
|
-
s0 = buf.readInt16LE(idx * 2);
|
|
903
|
-
s1 = idx + 1 < sampleCount ? buf.readInt16LE((idx + 1) * 2) : s0;
|
|
904
|
-
}
|
|
905
|
-
const interp = Math.round(s0 + (s1 - s0) * frac);
|
|
906
|
-
outArr.push(Math.max(-32768, Math.min(32767, interp)));
|
|
907
|
-
phase += step;
|
|
908
|
-
}
|
|
909
|
-
this.resample24Last = buf.readInt16LE((sampleCount - 1) * 2);
|
|
910
|
-
this.resample24HasHistory = true;
|
|
911
|
-
this.resample24Phase = phase - sampleCount;
|
|
912
|
-
const outBuf = Buffer.alloc(outArr.length * 2);
|
|
913
|
-
for (let j = 0; j < outArr.length; j++) outBuf.writeInt16LE(outArr[j], j * 2);
|
|
914
|
-
return outBuf;
|
|
915
|
-
}
|
|
916
|
-
};
|
|
917
|
-
function createResampler16kTo8k() {
|
|
918
|
-
return new StatefulResampler({ srcRate: 16e3, dstRate: 8e3 });
|
|
919
|
-
}
|
|
920
|
-
function createResampler8kTo16k() {
|
|
921
|
-
return new StatefulResampler({ srcRate: 8e3, dstRate: 16e3 });
|
|
922
|
-
}
|
|
923
|
-
function createResampler24kTo16k() {
|
|
924
|
-
return new StatefulResampler({ srcRate: 24e3, dstRate: 16e3 });
|
|
925
|
-
}
|
|
926
|
-
function createResampler24kTo8k() {
|
|
927
|
-
return new StatefulResampler({ srcRate: 24e3, dstRate: 8e3 });
|
|
928
|
-
}
|
|
929
|
-
var _warnedResample8kTo16k = false;
|
|
930
|
-
var _warnedResample16kTo8k = false;
|
|
931
|
-
var _warnedResample24kTo16k = false;
|
|
932
|
-
function resample8kTo16k(pcm8k) {
|
|
933
|
-
if (!_warnedResample8kTo16k) {
|
|
934
|
-
_warnedResample8kTo16k = true;
|
|
935
|
-
getLogger().warn(
|
|
936
|
-
"[patter] resample8kTo16k() is deprecated. Use createResampler8kTo16k() (StatefulResampler) to eliminate chunk-boundary discontinuities."
|
|
937
|
-
);
|
|
938
|
-
}
|
|
939
|
-
if (pcm8k.length === 0) return Buffer.alloc(0);
|
|
940
|
-
const r = createResampler8kTo16k();
|
|
941
|
-
const main = r.process(pcm8k);
|
|
942
|
-
const tail = r.flush();
|
|
943
|
-
return tail.length > 0 ? Buffer.concat([main, tail]) : main;
|
|
944
|
-
}
|
|
945
|
-
function resample16kTo8k(pcm16k) {
|
|
946
|
-
if (!_warnedResample16kTo8k) {
|
|
947
|
-
_warnedResample16kTo8k = true;
|
|
948
|
-
getLogger().warn(
|
|
949
|
-
"[patter] resample16kTo8k() is deprecated. Use createResampler16kTo8k() (StatefulResampler) to eliminate chunk-boundary discontinuities."
|
|
950
|
-
);
|
|
951
|
-
}
|
|
952
|
-
if (pcm16k.length === 0) return Buffer.alloc(0);
|
|
953
|
-
const r = createResampler16kTo8k();
|
|
954
|
-
const out = r.process(pcm16k);
|
|
955
|
-
const tail = r.flush();
|
|
956
|
-
return tail.length > 0 ? Buffer.concat([out, tail]) : out;
|
|
957
|
-
}
|
|
958
|
-
function resample24kTo16k(pcm24k) {
|
|
959
|
-
if (!_warnedResample24kTo16k) {
|
|
960
|
-
_warnedResample24kTo16k = true;
|
|
961
|
-
getLogger().warn(
|
|
962
|
-
"[patter] resample24kTo16k() is deprecated. Use createResampler24kTo16k() (StatefulResampler) or OpenAITTS.resampleStreaming for anti-aliased resampling."
|
|
963
|
-
);
|
|
964
|
-
}
|
|
965
|
-
if (pcm24k.length === 0) return Buffer.alloc(0);
|
|
966
|
-
const sampleCount = Math.floor(pcm24k.length / 2);
|
|
967
|
-
const outSamples = Math.floor(sampleCount * 2 / 3);
|
|
968
|
-
const out = Buffer.alloc(outSamples * 2);
|
|
969
|
-
for (let i = 0; i < outSamples; i++) {
|
|
970
|
-
const pos = i * 1.5;
|
|
971
|
-
const idx = Math.floor(pos);
|
|
972
|
-
const frac = pos - idx;
|
|
973
|
-
const s0 = pcm24k.readInt16LE(idx * 2);
|
|
974
|
-
const s1 = idx + 1 < sampleCount ? pcm24k.readInt16LE((idx + 1) * 2) : s0;
|
|
975
|
-
const interp = Math.round(s0 + (s1 - s0) * frac);
|
|
976
|
-
out.writeInt16LE(Math.max(-32768, Math.min(32767, interp)), i * 2);
|
|
977
|
-
}
|
|
978
|
-
return out;
|
|
979
|
-
}
|
|
980
|
-
|
|
981
|
-
// src/providers/openai-realtime-2.ts
|
|
982
|
-
var GA_TO_V1_EVENT_NAMES = {
|
|
983
|
-
"response.output_audio.delta": "response.audio.delta",
|
|
984
|
-
"response.output_audio.done": "response.audio.done",
|
|
985
|
-
"response.output_audio_transcript.delta": "response.audio_transcript.delta",
|
|
986
|
-
"response.output_audio_transcript.done": "response.audio_transcript.done"
|
|
987
|
-
};
|
|
988
|
-
var OpenAIRealtime2Adapter = class extends OpenAIRealtimeAdapter {
|
|
989
|
-
/** Two-stage outbound resampler for 24 kHz → 8 kHz. Created lazily on
|
|
990
|
-
* the first audio frame so each Realtime session has its own state.
|
|
991
|
-
*
|
|
992
|
-
* We chain `24k → 16k → 8k` instead of using the direct `24k → 8k`
|
|
993
|
-
* variant of {@link StatefulResampler}: the direct path is a 3:1
|
|
994
|
-
* decimation with linear interpolation only — no anti-alias filter
|
|
995
|
-
* — so any energy above 4 kHz in the source aliases down into the
|
|
996
|
-
* audible band and is heard as raspy/scratchy artefacts on speech.
|
|
997
|
-
* `gpt-realtime-2` outputs voice with significant content above
|
|
998
|
-
* 4 kHz. The second stage (16k → 8k) uses a 5-tap FIR anti-alias
|
|
999
|
-
* filter which removes the offending band before decimation, and
|
|
1000
|
-
* empirically (see commit message) the chain produces audibly
|
|
1001
|
-
* cleaner output. The 24k → 16k step is still pure linear-interp
|
|
1002
|
-
* but the inputs to it stay below the Nyquist of the 16 kHz stage,
|
|
1003
|
-
* so it doesn't introduce new artefacts.
|
|
1004
|
-
*/
|
|
1005
|
-
outboundResampler24To16 = null;
|
|
1006
|
-
outboundResampler16To8 = null;
|
|
1007
|
-
/** Last 8 kHz input sample carried across chunk boundaries for the
|
|
1008
|
-
* direct 3× linear upsample (see `transcodeInboundMulaw8ToPcm24`).
|
|
1009
|
-
* The carry guarantees the very first output of each chunk
|
|
1010
|
-
* interpolates from the *real* preceding sample, not from the chunk's
|
|
1011
|
-
* own first sample replicated — without it every 20 ms Twilio frame
|
|
1012
|
-
* boundary becomes a small DC step that the GA server VAD interprets
|
|
1013
|
-
* as constant low-energy noise, which never crosses the speech
|
|
1014
|
-
* threshold. */
|
|
1015
|
-
inbound8kCarry = null;
|
|
1016
|
-
/** GA-shape `session.update` payload. See module-level docstring. */
|
|
1017
|
-
buildGASessionConfig() {
|
|
1018
|
-
const opts = this.options;
|
|
1019
|
-
const fmt = { type: "audio/pcm", rate: 24e3 };
|
|
1020
|
-
const config = {
|
|
1021
|
-
type: "realtime",
|
|
1022
|
-
output_modalities: opts.modalities ?? ["audio"],
|
|
1023
|
-
audio: {
|
|
1024
|
-
input: {
|
|
1025
|
-
format: fmt,
|
|
1026
|
-
transcription: {
|
|
1027
|
-
model: opts.inputAudioTranscriptionModel ?? OpenAITranscriptionModel.WHISPER_1
|
|
1028
|
-
},
|
|
1029
|
-
// Lower threshold (0.3 vs the 0.5 default) because the inbound
|
|
1030
|
-
// audio is telephony-band (8 kHz) linearly upsampled to 24 kHz —
|
|
1031
|
-
// the upper 4-12 kHz band is interpolation, not real harmonics,
|
|
1032
|
-
// and the GA server VAD's default tuning was calibrated against
|
|
1033
|
-
// studio-quality 24 kHz audio. A more permissive threshold
|
|
1034
|
-
// recovers reliable speech detection on phone-band input.
|
|
1035
|
-
turn_detection: {
|
|
1036
|
-
type: opts.vadType ?? OpenAIRealtimeVADType.SERVER_VAD,
|
|
1037
|
-
threshold: 0.1,
|
|
1038
|
-
prefix_padding_ms: 300,
|
|
1039
|
-
silence_duration_ms: opts.silenceDurationMs ?? 500
|
|
1040
|
-
}
|
|
1041
|
-
},
|
|
1042
|
-
output: {
|
|
1043
|
-
format: fmt,
|
|
1044
|
-
voice: this.voice
|
|
1045
|
-
}
|
|
1046
|
-
},
|
|
1047
|
-
instructions: this.instructions || "You are a helpful voice assistant. Be concise."
|
|
1048
|
-
};
|
|
1049
|
-
if (opts.temperature !== void 0) config.temperature = opts.temperature;
|
|
1050
|
-
if (opts.maxResponseOutputTokens !== void 0) {
|
|
1051
|
-
config.max_output_tokens = opts.maxResponseOutputTokens;
|
|
1052
|
-
}
|
|
1053
|
-
if (opts.toolChoice !== void 0) config.tool_choice = opts.toolChoice;
|
|
1054
|
-
if (opts.reasoningEffort !== void 0) {
|
|
1055
|
-
config.reasoning = { effort: opts.reasoningEffort };
|
|
1056
|
-
}
|
|
1057
|
-
if (this.tools?.length) {
|
|
1058
|
-
config.tools = this.tools.map((t) => {
|
|
1059
|
-
const def = {
|
|
1060
|
-
type: "function",
|
|
1061
|
-
name: t.name,
|
|
1062
|
-
description: t.description,
|
|
1063
|
-
parameters: t.parameters
|
|
1064
|
-
};
|
|
1065
|
-
if (t.strict === true) def.strict = true;
|
|
1066
|
-
return def;
|
|
1067
|
-
});
|
|
1068
|
-
}
|
|
1069
|
-
return config;
|
|
1070
|
-
}
|
|
1071
|
-
/**
|
|
1072
|
-
* Open the Realtime WebSocket against the GA endpoint and apply the GA
|
|
1073
|
-
* session configuration. Header `OpenAI-Beta: realtime=v1` is OMITTED
|
|
1074
|
-
* (the GA endpoint rejects it). Wire shape uses nested `audio.{input,
|
|
1075
|
-
* output}` + `output_modalities` + `session.type === "realtime"`.
|
|
1076
|
-
*/
|
|
1077
|
-
async connect() {
|
|
1078
|
-
const url = `wss://api.openai.com/v1/realtime?model=${encodeURIComponent(this.model)}`;
|
|
1079
|
-
this.ws = new WebSocket2(url, {
|
|
1080
|
-
headers: { Authorization: `Bearer ${this.apiKey}` }
|
|
1081
|
-
});
|
|
1082
|
-
const wsRef = this.ws;
|
|
1083
|
-
const originalOn = wsRef.on.bind(this.ws);
|
|
1084
|
-
wsRef.on = (event, handler) => {
|
|
1085
|
-
if (event !== "message") return originalOn(event, handler);
|
|
1086
|
-
const wrapped = (raw, ...rest) => {
|
|
1087
|
-
try {
|
|
1088
|
-
const text = typeof raw === "string" ? raw : raw.toString();
|
|
1089
|
-
const parsed = JSON.parse(text);
|
|
1090
|
-
const t = parsed.type;
|
|
1091
|
-
if (t && t in GA_TO_V1_EVENT_NAMES) {
|
|
1092
|
-
const newType = GA_TO_V1_EVENT_NAMES[t];
|
|
1093
|
-
if (t === "response.output_audio.delta" && typeof parsed.delta === "string") {
|
|
1094
|
-
const mulaw = this.transcodeOutboundPcm24ToMulaw8Buffer(parsed.delta);
|
|
1095
|
-
const FRAME_BYTES = 160;
|
|
1096
|
-
if (mulaw.length === 0) return;
|
|
1097
|
-
for (let off = 0; off < mulaw.length; off += FRAME_BYTES) {
|
|
1098
|
-
const slice = mulaw.subarray(off, Math.min(off + FRAME_BYTES, mulaw.length));
|
|
1099
|
-
const frame = { ...parsed, type: newType, delta: slice.toString("base64") };
|
|
1100
|
-
handler(Buffer.from(JSON.stringify(frame)), ...rest);
|
|
1101
|
-
}
|
|
1102
|
-
return;
|
|
1103
|
-
}
|
|
1104
|
-
parsed.type = newType;
|
|
1105
|
-
handler(Buffer.from(JSON.stringify(parsed)), ...rest);
|
|
1106
|
-
return;
|
|
1107
|
-
}
|
|
1108
|
-
} catch {
|
|
1109
|
-
}
|
|
1110
|
-
handler(raw, ...rest);
|
|
1111
|
-
};
|
|
1112
|
-
return originalOn(event, wrapped);
|
|
1113
|
-
};
|
|
1114
|
-
await new Promise((resolve, reject) => {
|
|
1115
|
-
let sessionCreated = false;
|
|
1116
|
-
let settled = false;
|
|
1117
|
-
const ws = this.ws;
|
|
1118
|
-
const onSetupMessage = (raw) => {
|
|
1119
|
-
let msg;
|
|
1120
|
-
try {
|
|
1121
|
-
msg = JSON.parse(raw.toString());
|
|
1122
|
-
} catch (e) {
|
|
1123
|
-
getLogger().warn(`OpenAI Realtime 2: failed to parse message: ${String(e)}`);
|
|
1124
|
-
return;
|
|
1125
|
-
}
|
|
1126
|
-
if (msg.type === "session.created" && !sessionCreated) {
|
|
1127
|
-
sessionCreated = true;
|
|
1128
|
-
ws.send(JSON.stringify({ type: "session.update", session: this.buildGASessionConfig() }));
|
|
1129
|
-
} else if (msg.type === "session.updated") {
|
|
1130
|
-
cleanup();
|
|
1131
|
-
resolve();
|
|
1132
|
-
} else if (msg.type === "error") {
|
|
1133
|
-
cleanup();
|
|
1134
|
-
try {
|
|
1135
|
-
ws.close();
|
|
1136
|
-
} catch {
|
|
1137
|
-
}
|
|
1138
|
-
reject(new Error(`OpenAI Realtime 2 setup error: ${msg.error?.message ?? JSON.stringify(msg)}`));
|
|
1139
|
-
}
|
|
1140
|
-
};
|
|
1141
|
-
const onSetupError = (err) => {
|
|
1142
|
-
cleanup();
|
|
1143
|
-
try {
|
|
1144
|
-
ws.close();
|
|
1145
|
-
} catch {
|
|
1146
|
-
}
|
|
1147
|
-
reject(err);
|
|
1148
|
-
};
|
|
1149
|
-
const cleanup = () => {
|
|
1150
|
-
if (settled) return;
|
|
1151
|
-
settled = true;
|
|
1152
|
-
clearTimeout(timer);
|
|
1153
|
-
ws.off("message", onSetupMessage);
|
|
1154
|
-
ws.off("error", onSetupError);
|
|
1155
|
-
};
|
|
1156
|
-
const timer = setTimeout(() => {
|
|
1157
|
-
cleanup();
|
|
1158
|
-
try {
|
|
1159
|
-
ws.close();
|
|
1160
|
-
} catch {
|
|
1161
|
-
}
|
|
1162
|
-
reject(new Error("OpenAI Realtime 2 connect timeout"));
|
|
1163
|
-
}, 15e3);
|
|
1164
|
-
ws.on("message", onSetupMessage);
|
|
1165
|
-
ws.on("error", onSetupError);
|
|
1166
|
-
});
|
|
1167
|
-
this.armHeartbeatAndListener();
|
|
1168
|
-
}
|
|
1169
|
-
/**
|
|
1170
|
-
* GA-API variant of {@link OpenAIRealtimeAdapter.sendFirstMessage}. Two
|
|
1171
|
-
* differences from the v1 path:
|
|
1172
|
-
*
|
|
1173
|
-
* 1. The v1 implementation sends `response.modalities` which the GA
|
|
1174
|
-
* endpoint rejects with `Unknown parameter: 'response.modalities'`.
|
|
1175
|
-
* Use `output_modalities` to match the GA `session.update` shape.
|
|
1176
|
-
*
|
|
1177
|
-
* 2. The GA `response.create` does NOT inherit `audio.output.voice`
|
|
1178
|
-
* from the session — it falls back to the server-side default
|
|
1179
|
-
* (`marin`, female) when the field is omitted on the response
|
|
1180
|
-
* itself. Session-level `voice: "alloy"` only affects subsequent
|
|
1181
|
-
* server-VAD-triggered responses, NOT this explicit
|
|
1182
|
-
* `response.create`. We re-inject the configured voice here so the
|
|
1183
|
-
* first-message voice matches the rest of the call.
|
|
1184
|
-
*/
|
|
1185
|
-
/**
|
|
1186
|
-
* Override the parent `sendAudio` to transcode inbound carrier audio
|
|
1187
|
-
* (mulaw 8 kHz from Twilio/Telnyx) into PCM-16 24 kHz before sending
|
|
1188
|
-
* `input_audio_buffer.append`. The GA server's audio engine ignores
|
|
1189
|
-
* mulaw frames (commit returns "buffer only has 0.00ms of audio") even
|
|
1190
|
-
* though it accepts `audio/pcmu` at the protocol level.
|
|
1191
|
-
*/
|
|
1192
|
-
sendAudio(mulawAudio) {
|
|
1193
|
-
if (!this.ws || this.ws.readyState !== this.ws.OPEN) return;
|
|
1194
|
-
const pcm24k = this.transcodeInboundMulaw8ToPcm24(mulawAudio);
|
|
1195
|
-
this.ws.send(JSON.stringify({
|
|
1196
|
-
type: "input_audio_buffer.append",
|
|
1197
|
-
audio: pcm24k.toString("base64")
|
|
1198
|
-
}));
|
|
1199
|
-
}
|
|
1200
|
-
/**
|
|
1201
|
-
* mulaw 8 kHz Buffer → PCM-16-LE 24 kHz Buffer.
|
|
1202
|
-
*
|
|
1203
|
-
* Direct 3× linear-interpolation upsample with a one-sample carry
|
|
1204
|
-
* across chunk boundaries. For every consecutive pair of 8 kHz
|
|
1205
|
-
* samples `(s_a, s_b)` we emit three 24 kHz samples:
|
|
1206
|
-
*
|
|
1207
|
-
* out_0 = s_a
|
|
1208
|
-
* out_1 = 2/3·s_a + 1/3·s_b
|
|
1209
|
-
* out_2 = 1/3·s_a + 2/3·s_b
|
|
1210
|
-
*
|
|
1211
|
-
* The carry stores the last 8 kHz sample of the chunk so the next
|
|
1212
|
-
* chunk can start by pairing `(carry, firstNewSample)` — that's what
|
|
1213
|
-
* keeps the output rate exact (each input sample → 3 output samples)
|
|
1214
|
-
* and eliminates the chunk-boundary DC step that confused the GA
|
|
1215
|
-
* server VAD. The first chunk has no carry and loses 3 samples at
|
|
1216
|
-
* the leading edge (375 µs of audio); that's well below any audible
|
|
1217
|
-
* artefact and well below the GA VAD's 300 ms prefix-padding window.
|
|
1218
|
-
*/
|
|
1219
|
-
transcodeInboundMulaw8ToPcm24(mulaw) {
|
|
1220
|
-
const pcm8 = mulawToPcm16(mulaw);
|
|
1221
|
-
const samples8 = pcm8.length / 2;
|
|
1222
|
-
if (samples8 === 0) return Buffer.alloc(0);
|
|
1223
|
-
const GAIN = 2;
|
|
1224
|
-
const inputs = [];
|
|
1225
|
-
if (this.inbound8kCarry !== null) inputs.push(this.inbound8kCarry);
|
|
1226
|
-
for (let i = 0; i < samples8; i++) {
|
|
1227
|
-
const raw = pcm8.readInt16LE(i * 2) * GAIN;
|
|
1228
|
-
inputs.push(Math.max(-32768, Math.min(32767, raw)));
|
|
1229
|
-
}
|
|
1230
|
-
this.inbound8kCarry = inputs[inputs.length - 1];
|
|
1231
|
-
const numPairs = inputs.length - 1;
|
|
1232
|
-
if (numPairs <= 0) return Buffer.alloc(0);
|
|
1233
|
-
const out = Buffer.allocUnsafe(numPairs * 3 * 2);
|
|
1234
|
-
for (let i = 0; i < numPairs; i++) {
|
|
1235
|
-
const s0 = inputs[i];
|
|
1236
|
-
const s1 = inputs[i + 1];
|
|
1237
|
-
out.writeInt16LE(s0, i * 6);
|
|
1238
|
-
out.writeInt16LE(Math.round((s0 * 2 + s1) / 3), i * 6 + 2);
|
|
1239
|
-
out.writeInt16LE(Math.round((s0 + s1 * 2) / 3), i * 6 + 4);
|
|
1240
|
-
}
|
|
1241
|
-
return out;
|
|
1242
|
-
}
|
|
1243
|
-
/**
|
|
1244
|
-
* Base64 PCM-16-LE 24 kHz → Base64 mulaw 8 kHz. Used by the WS
|
|
1245
|
-
* translation shim on each `response.output_audio.delta`. The stateful
|
|
1246
|
-
* resampler is created lazily and reused across all deltas in this
|
|
1247
|
-
* session so the 3:1 decimator's phase carries across chunk
|
|
1248
|
-
* boundaries — without that, every chunk boundary produces a click.
|
|
1249
|
-
*/
|
|
1250
|
-
transcodeOutboundPcm24ToMulaw8Buffer(deltaB64) {
|
|
1251
|
-
if (!this.outboundResampler24To16) {
|
|
1252
|
-
this.outboundResampler24To16 = new StatefulResampler({ srcRate: 24e3, dstRate: 16e3 });
|
|
1253
|
-
this.outboundResampler16To8 = new StatefulResampler({ srcRate: 16e3, dstRate: 8e3 });
|
|
1254
|
-
}
|
|
1255
|
-
const pcm24 = Buffer.from(deltaB64, "base64");
|
|
1256
|
-
const pcm16 = this.outboundResampler24To16.process(pcm24);
|
|
1257
|
-
const pcm8 = this.outboundResampler16To8.process(pcm16);
|
|
1258
|
-
if (pcm8.length === 0) return Buffer.alloc(0);
|
|
1259
|
-
return pcm16ToMulaw(pcm8);
|
|
1260
|
-
}
|
|
1261
|
-
async sendFirstMessage(text) {
|
|
1262
|
-
this.ws?.send(JSON.stringify({
|
|
1263
|
-
type: "response.create",
|
|
1264
|
-
response: {
|
|
1265
|
-
output_modalities: ["audio"],
|
|
1266
|
-
audio: { output: { voice: this.voice } },
|
|
1267
|
-
reasoning: { effort: "minimal" },
|
|
1268
|
-
instructions: `Say exactly the following sentence as your first turn and nothing else: "${text}"`
|
|
1269
|
-
}
|
|
1270
|
-
}));
|
|
1271
|
-
}
|
|
1272
|
-
};
|
|
1273
|
-
|
|
1274
32
|
// src/providers/elevenlabs-convai.ts
|
|
1275
33
|
init_esm_shims();
|
|
1276
|
-
import
|
|
34
|
+
import WebSocket from "ws";
|
|
1277
35
|
var ELEVENLABS_CONVAI_URL = "wss://api.elevenlabs.io/v1/convai/conversation";
|
|
1278
36
|
var ELEVENLABS_SIGNED_URL = "https://api.elevenlabs.io/v1/convai/conversation/get-signed-url";
|
|
1279
37
|
var AGENT_SILENCE_MS = 500;
|
|
@@ -1395,8 +153,8 @@ var ElevenLabsConvAIAdapter = class _ElevenLabsConvAIAdapter {
|
|
|
1395
153
|
wsUrl = this.agentId ? `${ELEVENLABS_CONVAI_URL}?agent_id=${encodeURIComponent(this.agentId)}` : ELEVENLABS_CONVAI_URL;
|
|
1396
154
|
wsOptions = { headers: { "xi-api-key": this.apiKey } };
|
|
1397
155
|
}
|
|
1398
|
-
this.ws = new
|
|
1399
|
-
await new Promise((
|
|
156
|
+
this.ws = new WebSocket(wsUrl, wsOptions);
|
|
157
|
+
await new Promise((resolve2, reject) => {
|
|
1400
158
|
const timeout = setTimeout(
|
|
1401
159
|
() => reject(new Error("ElevenLabs ConvAI connect timeout")),
|
|
1402
160
|
15e3
|
|
@@ -1420,7 +178,7 @@ var ElevenLabsConvAIAdapter = class _ElevenLabsConvAIAdapter {
|
|
|
1420
178
|
conversation_config_override: override
|
|
1421
179
|
};
|
|
1422
180
|
this.ws.send(JSON.stringify(config));
|
|
1423
|
-
|
|
181
|
+
resolve2();
|
|
1424
182
|
});
|
|
1425
183
|
this.ws.once("error", (err) => {
|
|
1426
184
|
clearTimeout(timeout);
|
|
@@ -1457,7 +215,7 @@ var ElevenLabsConvAIAdapter = class _ElevenLabsConvAIAdapter {
|
|
|
1457
215
|
}
|
|
1458
216
|
respondToPing(eventId, delayMs) {
|
|
1459
217
|
const send = () => {
|
|
1460
|
-
if (!this.ws || this.ws.readyState !==
|
|
218
|
+
if (!this.ws || this.ws.readyState !== WebSocket.OPEN) return;
|
|
1461
219
|
try {
|
|
1462
220
|
this.ws.send(JSON.stringify({ type: "pong", event_id: eventId }));
|
|
1463
221
|
} catch (err) {
|
|
@@ -1554,7 +312,7 @@ var ElevenLabsConvAIAdapter = class _ElevenLabsConvAIAdapter {
|
|
|
1554
312
|
}
|
|
1555
313
|
/** Send a caller-side audio chunk to ConvAI as a base64 `user_audio_chunk`. */
|
|
1556
314
|
sendAudio(audioBytes) {
|
|
1557
|
-
if (!this.ws || this.ws.readyState !==
|
|
315
|
+
if (!this.ws || this.ws.readyState !== WebSocket.OPEN) return;
|
|
1558
316
|
this.ws.send(
|
|
1559
317
|
JSON.stringify({
|
|
1560
318
|
user_audio_chunk: audioBytes.toString("base64")
|
|
@@ -1577,20 +335,20 @@ var ElevenLabsConvAIAdapter = class _ElevenLabsConvAIAdapter {
|
|
|
1577
335
|
return;
|
|
1578
336
|
}
|
|
1579
337
|
const ws = this.ws;
|
|
1580
|
-
this.closePromise = new Promise((
|
|
1581
|
-
if (ws.readyState ===
|
|
1582
|
-
|
|
338
|
+
this.closePromise = new Promise((resolve2) => {
|
|
339
|
+
if (ws.readyState === WebSocket.CLOSED || ws.readyState === WebSocket.CLOSING) {
|
|
340
|
+
resolve2();
|
|
1583
341
|
return;
|
|
1584
342
|
}
|
|
1585
343
|
const done = () => {
|
|
1586
|
-
|
|
344
|
+
resolve2();
|
|
1587
345
|
};
|
|
1588
346
|
ws.once("close", done);
|
|
1589
347
|
ws.once("error", done);
|
|
1590
348
|
try {
|
|
1591
349
|
ws.close();
|
|
1592
350
|
} catch {
|
|
1593
|
-
|
|
351
|
+
resolve2();
|
|
1594
352
|
}
|
|
1595
353
|
});
|
|
1596
354
|
try {
|
|
@@ -1603,6 +361,157 @@ var ElevenLabsConvAIAdapter = class _ElevenLabsConvAIAdapter {
|
|
|
1603
361
|
}
|
|
1604
362
|
};
|
|
1605
363
|
|
|
364
|
+
// src/providers/plivo-adapter.ts
|
|
365
|
+
init_esm_shims();
|
|
366
|
+
var PLIVO_API_BASE = "https://api.plivo.com/v1";
|
|
367
|
+
async function dropPlivoVoicemail(callUuid, voicemailMessage, authId, authToken) {
|
|
368
|
+
if (!callUuid || !voicemailMessage || !authId || !authToken) return;
|
|
369
|
+
const auth = `Basic ${Buffer.from(`${authId}:${authToken}`).toString("base64")}`;
|
|
370
|
+
const base = `${PLIVO_API_BASE}/Account/${encodeURIComponent(authId)}/Call/${encodeURIComponent(callUuid)}`;
|
|
371
|
+
try {
|
|
372
|
+
const speak = await fetch(`${base}/Speak/`, {
|
|
373
|
+
method: "POST",
|
|
374
|
+
headers: { "Content-Type": "application/x-www-form-urlencoded", Authorization: auth },
|
|
375
|
+
body: new URLSearchParams({ text: voicemailMessage }).toString(),
|
|
376
|
+
signal: AbortSignal.timeout(1e4)
|
|
377
|
+
});
|
|
378
|
+
if (!speak.ok) {
|
|
379
|
+
getLogger().warn(
|
|
380
|
+
`Plivo voicemail Speak failed (${speak.status}): ${(await speak.text()).slice(0, 200)}`
|
|
381
|
+
);
|
|
382
|
+
return;
|
|
383
|
+
}
|
|
384
|
+
await new Promise(
|
|
385
|
+
(r) => setTimeout(r, Math.min(3e4, voicemailMessage.length * 60))
|
|
386
|
+
);
|
|
387
|
+
await fetch(`${base}/`, { method: "DELETE", headers: { Authorization: auth } });
|
|
388
|
+
getLogger().info(`Voicemail dropped for ${callUuid}`);
|
|
389
|
+
} catch (e) {
|
|
390
|
+
getLogger().warn(`Could not drop voicemail: ${String(e)}`);
|
|
391
|
+
}
|
|
392
|
+
}
|
|
393
|
+
function xmlEscapePlivo(s) {
|
|
394
|
+
return s.replace(/&/g, "&").replace(/</g, "<").replace(/>/g, ">").replace(/"/g, """).replace(/'/g, "'");
|
|
395
|
+
}
|
|
396
|
+
var PlivoAdapter = class {
|
|
397
|
+
authId;
|
|
398
|
+
baseUrl;
|
|
399
|
+
authHeader;
|
|
400
|
+
constructor(authId, authToken) {
|
|
401
|
+
if (!authId) throw new Error("PlivoAdapter: authId is required");
|
|
402
|
+
if (!authToken) throw new Error("PlivoAdapter: authToken is required");
|
|
403
|
+
this.authId = authId;
|
|
404
|
+
this.baseUrl = `${PLIVO_API_BASE}/Account/${encodeURIComponent(authId)}`;
|
|
405
|
+
this.authHeader = `Basic ${Buffer.from(`${authId}:${authToken}`).toString("base64")}`;
|
|
406
|
+
}
|
|
407
|
+
async request(method, path4, jsonBody) {
|
|
408
|
+
const headers = { Authorization: this.authHeader };
|
|
409
|
+
if (jsonBody !== void 0) headers["Content-Type"] = "application/json";
|
|
410
|
+
const response = await fetch(`${this.baseUrl}${path4}`, {
|
|
411
|
+
method,
|
|
412
|
+
headers,
|
|
413
|
+
body: jsonBody !== void 0 ? JSON.stringify(jsonBody) : void 0,
|
|
414
|
+
signal: AbortSignal.timeout(3e4)
|
|
415
|
+
});
|
|
416
|
+
const text = await response.text();
|
|
417
|
+
if (!response.ok && response.status !== 404) {
|
|
418
|
+
throw new Error(`Plivo ${method} ${path4} failed: ${response.status} ${text}`);
|
|
419
|
+
}
|
|
420
|
+
let data = {};
|
|
421
|
+
if (text) {
|
|
422
|
+
try {
|
|
423
|
+
data = JSON.parse(text);
|
|
424
|
+
} catch {
|
|
425
|
+
}
|
|
426
|
+
}
|
|
427
|
+
return { status: response.status, data };
|
|
428
|
+
}
|
|
429
|
+
/** Search and rent an available Plivo number for the given ISO country. */
|
|
430
|
+
async provisionNumber(countryIso) {
|
|
431
|
+
const { data } = await this.request(
|
|
432
|
+
"GET",
|
|
433
|
+
`/PhoneNumber/?country_iso=${encodeURIComponent(countryIso)}&limit=1`
|
|
434
|
+
);
|
|
435
|
+
const number = data.objects?.[0]?.number;
|
|
436
|
+
if (!number) throw new Error(`PlivoAdapter: no numbers available for ${countryIso}`);
|
|
437
|
+
await this.request("POST", `/PhoneNumber/${encodeURIComponent(number)}/`);
|
|
438
|
+
return number;
|
|
439
|
+
}
|
|
440
|
+
/**
|
|
441
|
+
* Point the inbound answer flow for ``number`` at ``answerUrl`` by creating
|
|
442
|
+
* (or reusing) a Plivo Application and linking the number to it. Most
|
|
443
|
+
* production deployments pre-configure this in the Plivo console; this
|
|
444
|
+
* mirrors Twilio's ``configureNumber`` auto-setup convenience.
|
|
445
|
+
*/
|
|
446
|
+
async configureNumber(number, answerUrl) {
|
|
447
|
+
const { data } = await this.request("POST", "/Application/", {
|
|
448
|
+
app_name: "patter-inbound",
|
|
449
|
+
answer_url: answerUrl,
|
|
450
|
+
answer_method: "POST"
|
|
451
|
+
});
|
|
452
|
+
if (!data.app_id) {
|
|
453
|
+
getLogger().warn("Plivo Application create returned no app_id");
|
|
454
|
+
return;
|
|
455
|
+
}
|
|
456
|
+
await this.request("POST", `/Number/${encodeURIComponent(number)}/`, { app_id: data.app_id });
|
|
457
|
+
}
|
|
458
|
+
/**
|
|
459
|
+
* Place an outbound Plivo call routed through ``answerUrl``. Returns Plivo's
|
|
460
|
+
* ``request_uuid``. The WSS URL travels inside the answer XML, not as a dial
|
|
461
|
+
* parameter — mirroring the Python adapter.
|
|
462
|
+
*/
|
|
463
|
+
async initiateCall(opts) {
|
|
464
|
+
const payload = {
|
|
465
|
+
from: opts.from,
|
|
466
|
+
to: opts.to,
|
|
467
|
+
answer_url: opts.answerUrl,
|
|
468
|
+
answer_method: "POST"
|
|
469
|
+
};
|
|
470
|
+
if (opts.ringTimeout != null) payload.ring_timeout = Math.max(1, Math.floor(opts.ringTimeout));
|
|
471
|
+
if (opts.machineDetection) {
|
|
472
|
+
payload.machine_detection = "true";
|
|
473
|
+
payload.machine_detection_time = 5e3;
|
|
474
|
+
if (opts.machineDetectionUrl) {
|
|
475
|
+
payload.machine_detection_url = opts.machineDetectionUrl;
|
|
476
|
+
payload.machine_detection_method = "POST";
|
|
477
|
+
}
|
|
478
|
+
}
|
|
479
|
+
const { data } = await this.request("POST", "/Call/", payload);
|
|
480
|
+
return { requestUuid: data.request_uuid ?? "" };
|
|
481
|
+
}
|
|
482
|
+
/** Hang up an active Plivo call by CallUUID. 204 and 404 are both success. */
|
|
483
|
+
async endCall(callUuid) {
|
|
484
|
+
if (!callUuid) throw new Error("PlivoAdapter: callUuid is required");
|
|
485
|
+
try {
|
|
486
|
+
await this.request("DELETE", `/Call/${encodeURIComponent(callUuid)}/`);
|
|
487
|
+
} catch (err) {
|
|
488
|
+
getLogger().warn(`[PlivoAdapter] endCall failed for ${callUuid}: ${String(err)}`);
|
|
489
|
+
throw err;
|
|
490
|
+
}
|
|
491
|
+
}
|
|
492
|
+
/**
|
|
493
|
+
* Build the Plivo answer XML. Unlike Twilio (``url=`` attribute), Plivo's
|
|
494
|
+
* ``<Stream>`` takes the WSS URL as its **text content**. ``bidirectional``
|
|
495
|
+
* enables two-way audio; ``keepCallAlive`` keeps the leg up for the lifetime
|
|
496
|
+
* of the WebSocket. ``extraHeaders`` (comma-separated ``key=value``) is
|
|
497
|
+
* delivered back on the WS ``start`` frame as a caller/callee fallback.
|
|
498
|
+
*
|
|
499
|
+
* Mirrors the Python adapter's ``generate_stream_xml``.
|
|
500
|
+
*/
|
|
501
|
+
static generateStreamXml(streamUrl, contentType = "audio/x-mulaw;rate=8000", extraHeaders) {
|
|
502
|
+
let attrs = `bidirectional="true" keepCallAlive="true" contentType="${xmlEscapePlivo(contentType)}"`;
|
|
503
|
+
if (extraHeaders) {
|
|
504
|
+
const joined = Object.entries(extraHeaders).map(([k, v]) => `${k}=${v}`).join(",");
|
|
505
|
+
attrs += ` extraHeaders="${xmlEscapePlivo(joined)}"`;
|
|
506
|
+
}
|
|
507
|
+
return `<Response><Stream ${attrs}>${xmlEscapePlivo(streamUrl)}</Stream></Response>`;
|
|
508
|
+
}
|
|
509
|
+
};
|
|
510
|
+
|
|
511
|
+
// src/telephony/plivo.ts
|
|
512
|
+
init_esm_shims();
|
|
513
|
+
import crypto from "crypto";
|
|
514
|
+
|
|
1606
515
|
// src/provider-factory.ts
|
|
1607
516
|
init_esm_shims();
|
|
1608
517
|
async function createSTT(agent) {
|
|
@@ -1612,8 +521,172 @@ async function createTTS(agent) {
|
|
|
1612
521
|
return agent.tts ?? null;
|
|
1613
522
|
}
|
|
1614
523
|
|
|
524
|
+
// src/telephony/plivo.ts
|
|
525
|
+
var Carrier = class {
|
|
526
|
+
kind = "plivo";
|
|
527
|
+
authId;
|
|
528
|
+
authToken;
|
|
529
|
+
constructor(opts = {}) {
|
|
530
|
+
const authId = opts.authId ?? process.env.PLIVO_AUTH_ID;
|
|
531
|
+
const authToken = opts.authToken ?? process.env.PLIVO_AUTH_TOKEN;
|
|
532
|
+
if (!authId) {
|
|
533
|
+
throw new Error(
|
|
534
|
+
"Plivo carrier requires authId. Pass { authId: 'MA...' } or set PLIVO_AUTH_ID in the environment."
|
|
535
|
+
);
|
|
536
|
+
}
|
|
537
|
+
if (!authToken) {
|
|
538
|
+
throw new Error(
|
|
539
|
+
"Plivo carrier requires authToken. Pass { authToken: '...' } or set PLIVO_AUTH_TOKEN in the environment."
|
|
540
|
+
);
|
|
541
|
+
}
|
|
542
|
+
this.authId = authId;
|
|
543
|
+
this.authToken = authToken;
|
|
544
|
+
}
|
|
545
|
+
};
|
|
546
|
+
function classifyPlivoAmd(result) {
|
|
547
|
+
const r = (result || "").trim().toLowerCase();
|
|
548
|
+
if (r === "human" || r === "person") return "human";
|
|
549
|
+
if (r.startsWith("machine") || r === "answering_machine" || r === "amd" || r === "true") {
|
|
550
|
+
return "machine";
|
|
551
|
+
}
|
|
552
|
+
if (r === "fax") return "fax";
|
|
553
|
+
return "unknown";
|
|
554
|
+
}
|
|
555
|
+
function validatePlivoSignature(url, nonce, signature, authToken, params, method = "POST") {
|
|
556
|
+
if (!signature || !nonce || !authToken) return false;
|
|
557
|
+
let base = url;
|
|
558
|
+
if (method === "POST" && params && Object.keys(params).length > 0) {
|
|
559
|
+
const keys = Object.keys(params).sort();
|
|
560
|
+
base += keys.map((k) => `${k}${params[k]}`).join("");
|
|
561
|
+
}
|
|
562
|
+
const signed = `${base}.${nonce}`;
|
|
563
|
+
const expected = crypto.createHmac("sha256", authToken).update(signed).digest("base64");
|
|
564
|
+
const expBuf = Buffer.from(expected);
|
|
565
|
+
for (const rawSig of signature.split(",")) {
|
|
566
|
+
const trimmed = rawSig.trim();
|
|
567
|
+
if (!trimmed) continue;
|
|
568
|
+
try {
|
|
569
|
+
const sigBuf = Buffer.from(trimmed);
|
|
570
|
+
if (sigBuf.length === expBuf.length && crypto.timingSafeEqual(sigBuf, expBuf)) {
|
|
571
|
+
return true;
|
|
572
|
+
}
|
|
573
|
+
} catch {
|
|
574
|
+
continue;
|
|
575
|
+
}
|
|
576
|
+
}
|
|
577
|
+
return false;
|
|
578
|
+
}
|
|
579
|
+
var PLIVO_DTMF_ALLOWED = new Set("0123456789*#ABCDabcdwW");
|
|
580
|
+
var PlivoBridge = class {
|
|
581
|
+
constructor(config) {
|
|
582
|
+
this.config = config;
|
|
583
|
+
const authId = config.plivoAuthId ?? "";
|
|
584
|
+
const authToken = config.plivoAuthToken ?? "";
|
|
585
|
+
this.authHeader = `Basic ${Buffer.from(`${authId}:${authToken}`).toString("base64")}`;
|
|
586
|
+
this.apiBase = `https://api.plivo.com/v1/Account/${encodeURIComponent(authId)}`;
|
|
587
|
+
}
|
|
588
|
+
config;
|
|
589
|
+
label = "Plivo";
|
|
590
|
+
telephonyProvider = "plivo";
|
|
591
|
+
inputWireFormat = "ulaw_8000";
|
|
592
|
+
authHeader;
|
|
593
|
+
apiBase;
|
|
594
|
+
sendAudio(ws, audioBase64, _streamSid) {
|
|
595
|
+
ws.send(
|
|
596
|
+
JSON.stringify({
|
|
597
|
+
event: "playAudio",
|
|
598
|
+
media: { contentType: "audio/x-mulaw", sampleRate: 8e3, payload: audioBase64 }
|
|
599
|
+
})
|
|
600
|
+
);
|
|
601
|
+
}
|
|
602
|
+
sendMark(ws, markName, streamSid) {
|
|
603
|
+
ws.send(JSON.stringify({ event: "checkpoint", streamId: streamSid, name: markName }));
|
|
604
|
+
}
|
|
605
|
+
sendClear(ws, streamSid) {
|
|
606
|
+
ws.send(JSON.stringify({ event: "clearAudio", streamId: streamSid }));
|
|
607
|
+
}
|
|
608
|
+
async transferCall(callId, toNumber) {
|
|
609
|
+
if (!/^\+[1-9]\d{6,14}$/.test(toNumber)) {
|
|
610
|
+
getLogger().warn(`PlivoBridge.transferCall rejected: invalid target ${JSON.stringify(toNumber)}`);
|
|
611
|
+
return;
|
|
612
|
+
}
|
|
613
|
+
if (!this.config.plivoAuthId || !this.config.plivoAuthToken || !callId) return;
|
|
614
|
+
if (!this.config.webhookUrl) {
|
|
615
|
+
getLogger().warn("PlivoBridge.transferCall skipped: no webhookUrl for aleg_url");
|
|
616
|
+
return;
|
|
617
|
+
}
|
|
618
|
+
const alegUrl = `https://${this.config.webhookUrl}/webhooks/plivo/transfer?to=${encodeURIComponent(toNumber)}`;
|
|
619
|
+
await fetch(`${this.apiBase}/Call/${encodeURIComponent(callId)}/`, {
|
|
620
|
+
method: "POST",
|
|
621
|
+
headers: { "Content-Type": "application/json", Authorization: this.authHeader },
|
|
622
|
+
body: JSON.stringify({ legs: "aleg", aleg_url: alegUrl, aleg_method: "GET" })
|
|
623
|
+
});
|
|
624
|
+
getLogger().info(`Call transferred to ${toNumber}`);
|
|
625
|
+
}
|
|
626
|
+
async sendDtmf(ws, _callId, digits, _delayMs) {
|
|
627
|
+
const filtered = Array.from(digits ?? "").filter((d) => PLIVO_DTMF_ALLOWED.has(d)).join("");
|
|
628
|
+
if (!filtered) {
|
|
629
|
+
getLogger().warn(`PlivoBridge.sendDtmf: no valid digits in ${JSON.stringify(digits)}`);
|
|
630
|
+
return;
|
|
631
|
+
}
|
|
632
|
+
ws.send(JSON.stringify({ event: "sendDTMF", dtmf: filtered }));
|
|
633
|
+
}
|
|
634
|
+
async startRecording(callId) {
|
|
635
|
+
if (!this.config.plivoAuthId || !this.config.plivoAuthToken || !callId) return;
|
|
636
|
+
try {
|
|
637
|
+
const resp = await fetch(`${this.apiBase}/Call/${encodeURIComponent(callId)}/Record/`, {
|
|
638
|
+
method: "POST",
|
|
639
|
+
headers: { Authorization: this.authHeader }
|
|
640
|
+
});
|
|
641
|
+
if (!resp.ok) {
|
|
642
|
+
getLogger().warn(`Plivo record start failed (${resp.status}): ${(await resp.text()).slice(0, 200)}`);
|
|
643
|
+
} else {
|
|
644
|
+
getLogger().info("Plivo recording started");
|
|
645
|
+
}
|
|
646
|
+
} catch (e) {
|
|
647
|
+
getLogger().warn(`Plivo record start error: ${String(e)}`);
|
|
648
|
+
}
|
|
649
|
+
}
|
|
650
|
+
async endCall(callId, _ws) {
|
|
651
|
+
if (!this.config.plivoAuthId || !this.config.plivoAuthToken || !callId) return;
|
|
652
|
+
try {
|
|
653
|
+
const resp = await fetch(`${this.apiBase}/Call/${encodeURIComponent(callId)}/`, {
|
|
654
|
+
method: "DELETE",
|
|
655
|
+
headers: { Authorization: this.authHeader }
|
|
656
|
+
});
|
|
657
|
+
if (!resp.ok && resp.status !== 404) {
|
|
658
|
+
getLogger().warn(`Plivo hangup returned ${resp.status}`);
|
|
659
|
+
}
|
|
660
|
+
} catch {
|
|
661
|
+
}
|
|
662
|
+
}
|
|
663
|
+
createStt(agent) {
|
|
664
|
+
return createSTT(agent);
|
|
665
|
+
}
|
|
666
|
+
async queryTelephonyCost(metricsAcc, callId) {
|
|
667
|
+
if (!this.config.plivoAuthId || !this.config.plivoAuthToken || !callId) return;
|
|
668
|
+
try {
|
|
669
|
+
const resp = await fetch(`${this.apiBase}/Call/${encodeURIComponent(callId)}/`, {
|
|
670
|
+
headers: { Authorization: this.authHeader },
|
|
671
|
+
signal: AbortSignal.timeout(5e3)
|
|
672
|
+
});
|
|
673
|
+
if (resp.ok) {
|
|
674
|
+
const data = await resp.json();
|
|
675
|
+
if (data.total_amount != null) {
|
|
676
|
+
metricsAcc.setActualTelephonyCost(Math.abs(parseFloat(data.total_amount)));
|
|
677
|
+
getLogger().info(`Plivo actual cost: $${data.total_amount}`);
|
|
678
|
+
}
|
|
679
|
+
}
|
|
680
|
+
} catch (err) {
|
|
681
|
+
getLogger().debug(`queryTelephonyCost(plivo) failed: ${err?.message ?? err}`);
|
|
682
|
+
}
|
|
683
|
+
}
|
|
684
|
+
};
|
|
685
|
+
|
|
1615
686
|
// src/pricing.ts
|
|
1616
687
|
init_esm_shims();
|
|
688
|
+
var PRICING_VERSION = "2026.3";
|
|
689
|
+
var PRICING_LAST_UPDATED = "2026-05-08";
|
|
1617
690
|
var PricingUnit = {
|
|
1618
691
|
MINUTE: "minute",
|
|
1619
692
|
THOUSAND_CHARS: "1k_chars",
|
|
@@ -1852,7 +925,7 @@ var DEFAULT_PRICING = {
|
|
|
1852
925
|
// twilio default = US inbound local (the 99% case for voice agents receiving
|
|
1853
926
|
// calls on a local number). For US toll-free inbound ($0.022/min) or US
|
|
1854
927
|
// outbound local ($0.0140/min), override via Patter({ pricing: { twilio: {...} } }).
|
|
1855
|
-
twilio: { unit: PricingUnit.MINUTE, price: 85e-4 },
|
|
928
|
+
twilio: { unit: PricingUnit.MINUTE, price: 85e-4, roundUp: true },
|
|
1856
929
|
// Telnyx — direction-aware rates as of 2026-05-11.
|
|
1857
930
|
// Sources:
|
|
1858
931
|
// https://telnyx.com/pricing/elastic-sip
|
|
@@ -1870,7 +943,17 @@ var DEFAULT_PRICING = {
|
|
|
1870
943
|
// price: 0.0035 } } })`` to bill all inbound at the lower rate.
|
|
1871
944
|
telnyx: { unit: PricingUnit.MINUTE, price: 7e-3 },
|
|
1872
945
|
telnyx_inbound: { unit: PricingUnit.MINUTE, price: 35e-4 },
|
|
1873
|
-
telnyx_outbound: { unit: PricingUnit.MINUTE, price: 7e-3 }
|
|
946
|
+
telnyx_outbound: { unit: PricingUnit.MINUTE, price: 7e-3 },
|
|
947
|
+
// Plivo — official US pay-as-you-go voice rates (per minute; Plivo rounds
|
|
948
|
+
// partial minutes up like Twilio). Source: https://www.plivo.com/voice/pricing/
|
|
949
|
+
// US local inbound: $0.0055/min
|
|
950
|
+
// US local outbound: $0.0115/min
|
|
951
|
+
// US toll-free inbound: $0.0180/min (override via new Patter({ pricing }))
|
|
952
|
+
// The flat ``plivo`` key defaults to inbound local; the billed amount is
|
|
953
|
+
// also reconciled post-call from the Plivo CDR (``total_amount``).
|
|
954
|
+
plivo: { unit: PricingUnit.MINUTE, price: 55e-4, roundUp: true },
|
|
955
|
+
plivo_inbound: { unit: PricingUnit.MINUTE, price: 55e-4, roundUp: true },
|
|
956
|
+
plivo_outbound: { unit: PricingUnit.MINUTE, price: 0.0115, roundUp: true }
|
|
1874
957
|
};
|
|
1875
958
|
function cloneProviderEntry(entry) {
|
|
1876
959
|
const out = { ...entry };
|
|
@@ -2056,15 +1139,35 @@ function calculateLlmCost(provider2, model, inputTokens, outputTokens, cacheRead
|
|
|
2056
1139
|
function calculateTelephonyCost(provider2, durationSeconds, pricing) {
|
|
2057
1140
|
const config = pricing[provider2];
|
|
2058
1141
|
if (!config || config.unit !== "minute") return 0;
|
|
2059
|
-
const minutes =
|
|
1142
|
+
const minutes = config.roundUp ? Math.ceil(durationSeconds / 60) : durationSeconds / 60;
|
|
2060
1143
|
return minutes * (config.price ?? 0);
|
|
2061
1144
|
}
|
|
2062
1145
|
|
|
2063
1146
|
// src/dashboard/store.ts
|
|
2064
1147
|
init_esm_shims();
|
|
2065
1148
|
import { EventEmitter } from "events";
|
|
1149
|
+
import * as fs2 from "fs";
|
|
1150
|
+
import * as path2 from "path";
|
|
1151
|
+
|
|
1152
|
+
// src/version.ts
|
|
1153
|
+
init_esm_shims();
|
|
2066
1154
|
import * as fs from "fs";
|
|
2067
1155
|
import * as path from "path";
|
|
1156
|
+
function readVersion() {
|
|
1157
|
+
try {
|
|
1158
|
+
const pkgPath = path.resolve(__dirname, "..", "package.json");
|
|
1159
|
+
const pkg = JSON.parse(fs.readFileSync(pkgPath, "utf8"));
|
|
1160
|
+
return typeof pkg.version === "string" && pkg.version.length > 0 ? pkg.version : "";
|
|
1161
|
+
} catch {
|
|
1162
|
+
return "";
|
|
1163
|
+
}
|
|
1164
|
+
}
|
|
1165
|
+
var VERSION = readVersion();
|
|
1166
|
+
|
|
1167
|
+
// src/dashboard/store.ts
|
|
1168
|
+
function sdkVersion() {
|
|
1169
|
+
return VERSION;
|
|
1170
|
+
}
|
|
2068
1171
|
var MetricsStore = class extends EventEmitter {
|
|
2069
1172
|
maxCalls;
|
|
2070
1173
|
calls = [];
|
|
@@ -2347,15 +1450,15 @@ var MetricsStore = class extends EventEmitter {
|
|
|
2347
1450
|
persistDeletedIds() {
|
|
2348
1451
|
if (this.deletedIdsPath === null) return;
|
|
2349
1452
|
try {
|
|
2350
|
-
const dir =
|
|
2351
|
-
|
|
1453
|
+
const dir = path2.dirname(this.deletedIdsPath);
|
|
1454
|
+
fs2.mkdirSync(dir, { recursive: true });
|
|
2352
1455
|
const tmp = this.deletedIdsPath + ".tmp";
|
|
2353
1456
|
const payload = {
|
|
2354
1457
|
version: 1,
|
|
2355
1458
|
deleted_call_ids: Array.from(this.deletedCallIds).sort()
|
|
2356
1459
|
};
|
|
2357
|
-
|
|
2358
|
-
|
|
1460
|
+
fs2.writeFileSync(tmp, JSON.stringify(payload, null, 2), "utf8");
|
|
1461
|
+
fs2.renameSync(tmp, this.deletedIdsPath);
|
|
2359
1462
|
} catch (err) {
|
|
2360
1463
|
getLogger().debug(
|
|
2361
1464
|
`MetricsStore.persistDeletedIds: ${String(err)}`
|
|
@@ -2388,7 +1491,8 @@ var MetricsStore = class extends EventEmitter {
|
|
|
2388
1491
|
avg_duration: 0,
|
|
2389
1492
|
avg_latency_ms: 0,
|
|
2390
1493
|
cost_breakdown: { stt: 0, tts: 0, llm: 0, telephony: 0 },
|
|
2391
|
-
active_calls: this.activeCalls.size
|
|
1494
|
+
active_calls: this.activeCalls.size,
|
|
1495
|
+
sdk_version: sdkVersion()
|
|
2392
1496
|
};
|
|
2393
1497
|
}
|
|
2394
1498
|
let totalCost = 0;
|
|
@@ -2427,7 +1531,8 @@ var MetricsStore = class extends EventEmitter {
|
|
|
2427
1531
|
llm: Math.round(costLlm * 1e6) / 1e6,
|
|
2428
1532
|
telephony: Math.round(costTel * 1e6) / 1e6
|
|
2429
1533
|
},
|
|
2430
|
-
active_calls: this.activeCalls.size
|
|
1534
|
+
active_calls: this.activeCalls.size,
|
|
1535
|
+
sdk_version: sdkVersion()
|
|
2431
1536
|
};
|
|
2432
1537
|
}
|
|
2433
1538
|
/**
|
|
@@ -2463,11 +1568,11 @@ var MetricsStore = class extends EventEmitter {
|
|
|
2463
1568
|
*/
|
|
2464
1569
|
hydrate(logRoot) {
|
|
2465
1570
|
if (!logRoot) return 0;
|
|
2466
|
-
const deletedIdsPath =
|
|
1571
|
+
const deletedIdsPath = path2.join(logRoot, ".deleted_call_ids.json");
|
|
2467
1572
|
this.deletedIdsPath = deletedIdsPath;
|
|
2468
|
-
if (
|
|
1573
|
+
if (fs2.existsSync(deletedIdsPath)) {
|
|
2469
1574
|
try {
|
|
2470
|
-
const raw =
|
|
1575
|
+
const raw = fs2.readFileSync(deletedIdsPath, "utf8");
|
|
2471
1576
|
const payload = JSON.parse(raw);
|
|
2472
1577
|
const arr = Array.isArray(payload.deleted_call_ids) ? payload.deleted_call_ids : [];
|
|
2473
1578
|
for (const cid of arr) {
|
|
@@ -2481,19 +1586,19 @@ var MetricsStore = class extends EventEmitter {
|
|
|
2481
1586
|
);
|
|
2482
1587
|
}
|
|
2483
1588
|
}
|
|
2484
|
-
const callsRoot =
|
|
2485
|
-
if (!
|
|
1589
|
+
const callsRoot = path2.join(logRoot, "calls");
|
|
1590
|
+
if (!fs2.existsSync(callsRoot)) return 0;
|
|
2486
1591
|
const collected = [];
|
|
2487
1592
|
const seen = new Set(this.calls.map((c) => c.call_id));
|
|
2488
1593
|
const walk = (dir, depth) => {
|
|
2489
1594
|
let entries;
|
|
2490
1595
|
try {
|
|
2491
|
-
entries =
|
|
1596
|
+
entries = fs2.readdirSync(dir, { withFileTypes: true });
|
|
2492
1597
|
} catch {
|
|
2493
1598
|
return;
|
|
2494
1599
|
}
|
|
2495
1600
|
for (const entry of entries) {
|
|
2496
|
-
const childPath =
|
|
1601
|
+
const childPath = path2.join(dir, entry.name);
|
|
2497
1602
|
if (depth < 3) {
|
|
2498
1603
|
if (entry.isDirectory() && /^\d+$/.test(entry.name)) {
|
|
2499
1604
|
walk(childPath, depth + 1);
|
|
@@ -2501,10 +1606,10 @@ var MetricsStore = class extends EventEmitter {
|
|
|
2501
1606
|
continue;
|
|
2502
1607
|
}
|
|
2503
1608
|
if (!entry.isDirectory()) continue;
|
|
2504
|
-
const metadataPath =
|
|
2505
|
-
if (!
|
|
1609
|
+
const metadataPath = path2.join(childPath, "metadata.json");
|
|
1610
|
+
if (!fs2.existsSync(metadataPath)) continue;
|
|
2506
1611
|
try {
|
|
2507
|
-
const raw =
|
|
1612
|
+
const raw = fs2.readFileSync(metadataPath, "utf8");
|
|
2508
1613
|
const meta = JSON.parse(raw);
|
|
2509
1614
|
const callId = meta.call_id || entry.name;
|
|
2510
1615
|
if (!callId || seen.has(callId)) continue;
|
|
@@ -2517,7 +1622,7 @@ var MetricsStore = class extends EventEmitter {
|
|
|
2517
1622
|
}
|
|
2518
1623
|
if (!record.transcript || record.transcript.length === 0) {
|
|
2519
1624
|
const fromJsonl = loadTranscriptJsonl(
|
|
2520
|
-
|
|
1625
|
+
path2.join(childPath, "transcript.jsonl")
|
|
2521
1626
|
);
|
|
2522
1627
|
if (fromJsonl.length > 0) record.transcript = fromJsonl;
|
|
2523
1628
|
}
|
|
@@ -2596,8 +1701,8 @@ function metadataToCallRecord(callId, meta) {
|
|
|
2596
1701
|
}
|
|
2597
1702
|
function loadTranscriptJsonl(filePath) {
|
|
2598
1703
|
try {
|
|
2599
|
-
if (!
|
|
2600
|
-
const raw =
|
|
1704
|
+
if (!fs2.existsSync(filePath)) return [];
|
|
1705
|
+
const raw = fs2.readFileSync(filePath, "utf8");
|
|
2601
1706
|
const lines = raw.split("\n").filter((l) => l.trim().length > 0);
|
|
2602
1707
|
const out = [];
|
|
2603
1708
|
for (const line of lines) {
|
|
@@ -2640,15 +1745,15 @@ init_esm_shims();
|
|
|
2640
1745
|
|
|
2641
1746
|
// src/dashboard/auth.ts
|
|
2642
1747
|
init_esm_shims();
|
|
2643
|
-
import
|
|
1748
|
+
import crypto2 from "crypto";
|
|
2644
1749
|
function timingSafeCompare(a, b) {
|
|
2645
1750
|
const aBuf = Buffer.from(a);
|
|
2646
1751
|
const bBuf = Buffer.from(b);
|
|
2647
1752
|
if (aBuf.length !== bBuf.length) {
|
|
2648
|
-
|
|
1753
|
+
crypto2.timingSafeEqual(aBuf, aBuf);
|
|
2649
1754
|
return false;
|
|
2650
1755
|
}
|
|
2651
|
-
return
|
|
1756
|
+
return crypto2.timingSafeEqual(aBuf, bBuf);
|
|
2652
1757
|
}
|
|
2653
1758
|
function makeAuthMiddleware(token = "") {
|
|
2654
1759
|
return (req, res, next) => {
|
|
@@ -2731,7 +1836,7 @@ function csvEscape(value) {
|
|
|
2731
1836
|
|
|
2732
1837
|
// src/dashboard/ui.ts
|
|
2733
1838
|
init_esm_shims();
|
|
2734
|
-
import { readFileSync as
|
|
1839
|
+
import { readFileSync as readFileSync3 } from "fs";
|
|
2735
1840
|
import { join as join2, dirname as dirname2 } from "path";
|
|
2736
1841
|
var FALLBACK_HTML = `<!doctype html>
|
|
2737
1842
|
<html><head><meta charset="utf-8"><title>Patter dashboard</title></head>
|
|
@@ -2748,9 +1853,9 @@ function loadDashboardHtml() {
|
|
|
2748
1853
|
join2(here, "dashboard", "ui.html"),
|
|
2749
1854
|
join2(here, "..", "dashboard", "ui.html")
|
|
2750
1855
|
];
|
|
2751
|
-
for (const
|
|
1856
|
+
for (const path4 of candidates) {
|
|
2752
1857
|
try {
|
|
2753
|
-
return
|
|
1858
|
+
return readFileSync3(path4, "utf8");
|
|
2754
1859
|
} catch {
|
|
2755
1860
|
}
|
|
2756
1861
|
}
|
|
@@ -2937,7 +2042,7 @@ function mountApi(app, store, token = "") {
|
|
|
2937
2042
|
|
|
2938
2043
|
// src/remote-message.ts
|
|
2939
2044
|
init_esm_shims();
|
|
2940
|
-
import
|
|
2045
|
+
import crypto3 from "crypto";
|
|
2941
2046
|
var MAX_RESPONSE_BYTES = 64 * 1024;
|
|
2942
2047
|
function validateWebSocketUrl(url) {
|
|
2943
2048
|
let translated = url;
|
|
@@ -2965,7 +2070,7 @@ var RemoteMessageHandler = class {
|
|
|
2965
2070
|
if (!this.webhookSecret) {
|
|
2966
2071
|
throw new Error("Cannot sign without a webhookSecret");
|
|
2967
2072
|
}
|
|
2968
|
-
return
|
|
2073
|
+
return crypto3.createHmac("sha256", this.webhookSecret).update(body).digest("hex");
|
|
2969
2074
|
}
|
|
2970
2075
|
/**
|
|
2971
2076
|
* Release resources held by this handler.
|
|
@@ -3047,8 +2152,8 @@ var RemoteMessageHandler = class {
|
|
|
3047
2152
|
"WebSocket URL uses unencrypted ws:// \u2014 call transcripts and phone numbers will be sent in plaintext. Use wss:// in production."
|
|
3048
2153
|
);
|
|
3049
2154
|
}
|
|
3050
|
-
const { WebSocket:
|
|
3051
|
-
const ws = new
|
|
2155
|
+
const { WebSocket: WebSocket3 } = await import("ws");
|
|
2156
|
+
const ws = new WebSocket3(url);
|
|
3052
2157
|
const chunks = [];
|
|
3053
2158
|
let done = false;
|
|
3054
2159
|
let error = null;
|
|
@@ -3102,10 +2207,10 @@ var RemoteMessageHandler = class {
|
|
|
3102
2207
|
}
|
|
3103
2208
|
});
|
|
3104
2209
|
try {
|
|
3105
|
-
await new Promise((
|
|
2210
|
+
await new Promise((resolve2, reject) => {
|
|
3106
2211
|
ws.on("open", () => {
|
|
3107
2212
|
ws.send(JSON.stringify(data));
|
|
3108
|
-
|
|
2213
|
+
resolve2();
|
|
3109
2214
|
});
|
|
3110
2215
|
ws.on("error", (err) => {
|
|
3111
2216
|
reject(err);
|
|
@@ -3115,11 +2220,11 @@ var RemoteMessageHandler = class {
|
|
|
3115
2220
|
yield chunks.shift();
|
|
3116
2221
|
}
|
|
3117
2222
|
while (!done && !error) {
|
|
3118
|
-
const text = await new Promise((
|
|
2223
|
+
const text = await new Promise((resolve2) => {
|
|
3119
2224
|
if (chunks.length > 0) {
|
|
3120
|
-
|
|
2225
|
+
resolve2(chunks.shift());
|
|
3121
2226
|
} else {
|
|
3122
|
-
resolveNext =
|
|
2227
|
+
resolveNext = resolve2;
|
|
3123
2228
|
}
|
|
3124
2229
|
});
|
|
3125
2230
|
if (text === null) break;
|
|
@@ -3146,7 +2251,7 @@ init_esm_shims();
|
|
|
3146
2251
|
|
|
3147
2252
|
// src/providers/deepgram-stt.ts
|
|
3148
2253
|
init_esm_shims();
|
|
3149
|
-
import
|
|
2254
|
+
import WebSocket2 from "ws";
|
|
3150
2255
|
|
|
3151
2256
|
// src/errors.ts
|
|
3152
2257
|
init_esm_shims();
|
|
@@ -3327,8 +2432,8 @@ var DeepgramSTT = class _DeepgramSTT {
|
|
|
3327
2432
|
const url = `${DEEPGRAM_WS_URL}?${params.toString()}`;
|
|
3328
2433
|
let ws = null;
|
|
3329
2434
|
try {
|
|
3330
|
-
ws = await new Promise((
|
|
3331
|
-
const sock = new
|
|
2435
|
+
ws = await new Promise((resolve2, reject) => {
|
|
2436
|
+
const sock = new WebSocket2(url, {
|
|
3332
2437
|
headers: { Authorization: `Token ${this.apiKey}` }
|
|
3333
2438
|
});
|
|
3334
2439
|
const timer = setTimeout(() => {
|
|
@@ -3340,7 +2445,7 @@ var DeepgramSTT = class _DeepgramSTT {
|
|
|
3340
2445
|
}, 5e3);
|
|
3341
2446
|
sock.once("open", () => {
|
|
3342
2447
|
clearTimeout(timer);
|
|
3343
|
-
|
|
2448
|
+
resolve2(sock);
|
|
3344
2449
|
});
|
|
3345
2450
|
sock.once("error", (err) => {
|
|
3346
2451
|
clearTimeout(timer);
|
|
@@ -3367,11 +2472,11 @@ var DeepgramSTT = class _DeepgramSTT {
|
|
|
3367
2472
|
}
|
|
3368
2473
|
async openSocket() {
|
|
3369
2474
|
const url = this.buildUrl();
|
|
3370
|
-
const ws = new
|
|
2475
|
+
const ws = new WebSocket2(url, {
|
|
3371
2476
|
headers: { Authorization: `Token ${this.apiKey}` }
|
|
3372
2477
|
});
|
|
3373
2478
|
this.ws = ws;
|
|
3374
|
-
await new Promise((
|
|
2479
|
+
await new Promise((resolve2, reject) => {
|
|
3375
2480
|
let settled = false;
|
|
3376
2481
|
const settle = (fn) => {
|
|
3377
2482
|
if (settled) return;
|
|
@@ -3383,7 +2488,7 @@ var DeepgramSTT = class _DeepgramSTT {
|
|
|
3383
2488
|
() => settle(() => reject(new PatterConnectionError("Deepgram connect timeout"))),
|
|
3384
2489
|
1e4
|
|
3385
2490
|
);
|
|
3386
|
-
ws.once("open", () => settle(
|
|
2491
|
+
ws.once("open", () => settle(resolve2));
|
|
3387
2492
|
ws.once("error", (err) => settle(() => reject(err)));
|
|
3388
2493
|
ws.once("unexpected-response", (_req, res) => {
|
|
3389
2494
|
const status = res?.statusCode ?? 0;
|
|
@@ -3404,7 +2509,7 @@ var DeepgramSTT = class _DeepgramSTT {
|
|
|
3404
2509
|
ws.on("close", (code, reason) => this.handleClose(code, reason.toString()));
|
|
3405
2510
|
ws.on("error", (err) => this.handleError(err));
|
|
3406
2511
|
this.keepaliveTimer = setInterval(() => {
|
|
3407
|
-
if (this.ws && this.ws.readyState ===
|
|
2512
|
+
if (this.ws && this.ws.readyState === WebSocket2.OPEN) {
|
|
3408
2513
|
try {
|
|
3409
2514
|
this.ws.send(JSON.stringify({ type: "KeepAlive" }));
|
|
3410
2515
|
} catch {
|
|
@@ -3523,7 +2628,7 @@ var DeepgramSTT = class _DeepgramSTT {
|
|
|
3523
2628
|
}
|
|
3524
2629
|
/** Send a binary audio chunk to Deepgram for transcription. */
|
|
3525
2630
|
sendAudio(audio) {
|
|
3526
|
-
if (!this.ws || this.ws.readyState !==
|
|
2631
|
+
if (!this.ws || this.ws.readyState !== WebSocket2.OPEN) {
|
|
3527
2632
|
this.audioDroppedCount++;
|
|
3528
2633
|
if (this.audioDroppedCount === 1 || this.audioDroppedCount % 50 === 0) {
|
|
3529
2634
|
getLogger().info(
|
|
@@ -3572,7 +2677,7 @@ var DeepgramSTT = class _DeepgramSTT {
|
|
|
3572
2677
|
*/
|
|
3573
2678
|
finalize() {
|
|
3574
2679
|
const ws = this.ws;
|
|
3575
|
-
if (!ws || ws.readyState !==
|
|
2680
|
+
if (!ws || ws.readyState !== WebSocket2.OPEN) {
|
|
3576
2681
|
getLogger().info(
|
|
3577
2682
|
`[DIAG] DeepgramSTT.finalize SKIPPED (ws state=${ws?.readyState ?? "null"})`
|
|
3578
2683
|
);
|
|
@@ -3593,7 +2698,7 @@ var DeepgramSTT = class _DeepgramSTT {
|
|
|
3593
2698
|
if (!ws) return;
|
|
3594
2699
|
this.ws = null;
|
|
3595
2700
|
const sendSafe = (payload) => {
|
|
3596
|
-
if (ws.readyState ===
|
|
2701
|
+
if (ws.readyState === WebSocket2.OPEN) {
|
|
3597
2702
|
try {
|
|
3598
2703
|
ws.send(payload);
|
|
3599
2704
|
} catch {
|
|
@@ -3607,7 +2712,7 @@ var DeepgramSTT = class _DeepgramSTT {
|
|
|
3607
2712
|
} catch {
|
|
3608
2713
|
}
|
|
3609
2714
|
};
|
|
3610
|
-
if (ws.readyState !==
|
|
2715
|
+
if (ws.readyState !== WebSocket2.OPEN) {
|
|
3611
2716
|
finishClose();
|
|
3612
2717
|
return;
|
|
3613
2718
|
}
|
|
@@ -3676,6 +2781,21 @@ var CallMetricsAccumulator = class {
|
|
|
3676
2781
|
_bargeinStoppedAt = null;
|
|
3677
2782
|
_turnUserText = "";
|
|
3678
2783
|
_turnSttAudioSeconds = 0;
|
|
2784
|
+
/**
|
|
2785
|
+
* Guard against the recordTurnInterrupted / recordTurnComplete race.
|
|
2786
|
+
*
|
|
2787
|
+
* A VAD-path barge-in fires ``recordTurnInterrupted`` synchronously
|
|
2788
|
+
* inside ``handleAudioAsync`` while the in-flight pipeline LLM stream
|
|
2789
|
+
* keeps unwinding on its own task. When the LLM stream eventually
|
|
2790
|
+
* exits, the existing pipeline path falls through to
|
|
2791
|
+
* ``recordTurnComplete``, which would push a second turn for the same
|
|
2792
|
+
* logical exchange (this time carrying ``user_text=''`` because the
|
|
2793
|
+
* field was already reset). ``_turnAlreadyClosed`` is flipped by
|
|
2794
|
+
* ``recordTurnInterrupted`` and read by ``recordTurnComplete`` so the
|
|
2795
|
+
* late ``recordTurnComplete`` becomes a no-op until the next
|
|
2796
|
+
* ``startTurn`` re-arms the accumulator.
|
|
2797
|
+
*/
|
|
2798
|
+
_turnAlreadyClosed = false;
|
|
3679
2799
|
// Cumulative usage counters
|
|
3680
2800
|
_totalSttAudioSeconds = 0;
|
|
3681
2801
|
_totalTtsCharacters = 0;
|
|
@@ -3773,6 +2893,7 @@ var CallMetricsAccumulator = class {
|
|
|
3773
2893
|
this._bargeinStoppedAt = null;
|
|
3774
2894
|
this._turnUserText = "";
|
|
3775
2895
|
this._turnSttAudioSeconds = 0;
|
|
2896
|
+
this._turnAlreadyClosed = false;
|
|
3776
2897
|
this._vadStoppedAt = null;
|
|
3777
2898
|
this._sttFinalAt = null;
|
|
3778
2899
|
this._turnCommittedAt = null;
|
|
@@ -3929,8 +3050,18 @@ var CallMetricsAccumulator = class {
|
|
|
3929
3050
|
recordTtsStopped(ts) {
|
|
3930
3051
|
this._bargeinStoppedAt = ts ?? hrTimeMs();
|
|
3931
3052
|
}
|
|
3932
|
-
/**
|
|
3053
|
+
/**
|
|
3054
|
+
* Close the current turn cleanly and append a `TurnMetrics` record.
|
|
3055
|
+
*
|
|
3056
|
+
* Returns ``null`` when ``recordTurnInterrupted`` has already closed
|
|
3057
|
+
* the current turn — this protects against the VAD-barge-in /
|
|
3058
|
+
* pipeline-LLM race where both paths try to finalise the same logical
|
|
3059
|
+
* turn and the second would otherwise push a phantom entry with
|
|
3060
|
+
* ``user_text=''``. The caller treats ``null`` as "nothing to emit";
|
|
3061
|
+
* ``emitTurnMetrics`` is already null-safe.
|
|
3062
|
+
*/
|
|
3933
3063
|
recordTurnComplete(agentText) {
|
|
3064
|
+
if (this._turnAlreadyClosed) return null;
|
|
3934
3065
|
const latency = this._computeTurnLatency();
|
|
3935
3066
|
const turn = {
|
|
3936
3067
|
turn_index: this._turns.length,
|
|
@@ -3943,13 +3074,23 @@ var CallMetricsAccumulator = class {
|
|
|
3943
3074
|
};
|
|
3944
3075
|
this._turns.push(turn);
|
|
3945
3076
|
this._resetTurnState();
|
|
3077
|
+
this._turnAlreadyClosed = true;
|
|
3946
3078
|
this._eventBus?.emit("turn_ended", { callId: this.callId, turn });
|
|
3947
3079
|
this._eventBus?.emit("metrics_collected", { callId: this.callId, turn });
|
|
3948
3080
|
return turn;
|
|
3949
3081
|
}
|
|
3950
|
-
/**
|
|
3082
|
+
/**
|
|
3083
|
+
* Close the current turn as interrupted (barge-in) and return the
|
|
3084
|
+
* recorded metrics. Returns ``null`` when no turn is open, OR when
|
|
3085
|
+
* ``recordTurnComplete`` has already finalised the current turn —
|
|
3086
|
+
* bidirectional parity with the guard at the top of
|
|
3087
|
+
* ``recordTurnComplete``. Prevents an out-of-order interruption (e.g.
|
|
3088
|
+
* a future refactor that reorders the bargein + LLM-unwind paths)
|
|
3089
|
+
* from overwriting a turn that the complete path already emitted.
|
|
3090
|
+
*/
|
|
3951
3091
|
recordTurnInterrupted() {
|
|
3952
3092
|
if (this._turnStart === null) return null;
|
|
3093
|
+
if (this._turnAlreadyClosed) return null;
|
|
3953
3094
|
const latency = this._computeTurnLatency();
|
|
3954
3095
|
const turn = {
|
|
3955
3096
|
turn_index: this._turns.length,
|
|
@@ -3964,6 +3105,7 @@ var CallMetricsAccumulator = class {
|
|
|
3964
3105
|
this._eventBus?.emit("turn_ended", { callId: this.callId, turn });
|
|
3965
3106
|
this._eventBus?.emit("metrics_collected", { callId: this.callId, turn });
|
|
3966
3107
|
this._resetTurnState();
|
|
3108
|
+
this._turnAlreadyClosed = true;
|
|
3967
3109
|
this._turnCommittedMono = null;
|
|
3968
3110
|
this._endpointSignalAt = null;
|
|
3969
3111
|
return turn;
|
|
@@ -5236,6 +4378,35 @@ function maskPhoneNumber(number) {
|
|
|
5236
4378
|
function isValidE164(number) {
|
|
5237
4379
|
return /^\+[1-9]\d{6,14}$/.test(number);
|
|
5238
4380
|
}
|
|
4381
|
+
function augmentWithBuiltinHandoffTools(userTools, callbacks) {
|
|
4382
|
+
const out = [...userTools ?? []];
|
|
4383
|
+
if (callbacks.transferCall) {
|
|
4384
|
+
const transferCall = callbacks.transferCall;
|
|
4385
|
+
out.push({
|
|
4386
|
+
...TRANSFER_CALL_TOOL,
|
|
4387
|
+
handler: async (args) => {
|
|
4388
|
+
const number = typeof args.number === "string" ? args.number : "";
|
|
4389
|
+
if (!isValidE164(number)) {
|
|
4390
|
+
return JSON.stringify({ error: "Invalid phone number format", status: "rejected" });
|
|
4391
|
+
}
|
|
4392
|
+
await transferCall(number);
|
|
4393
|
+
return JSON.stringify({ status: "transferring", to: number });
|
|
4394
|
+
}
|
|
4395
|
+
});
|
|
4396
|
+
}
|
|
4397
|
+
if (callbacks.endCall) {
|
|
4398
|
+
const endCall = callbacks.endCall;
|
|
4399
|
+
out.push({
|
|
4400
|
+
...END_CALL_TOOL,
|
|
4401
|
+
handler: async (args) => {
|
|
4402
|
+
const reason = typeof args.reason === "string" ? args.reason : "conversation_complete";
|
|
4403
|
+
await endCall(reason);
|
|
4404
|
+
return JSON.stringify({ status: "ending", reason });
|
|
4405
|
+
}
|
|
4406
|
+
});
|
|
4407
|
+
}
|
|
4408
|
+
return out;
|
|
4409
|
+
}
|
|
5239
4410
|
var HALLUCINATIONS = /* @__PURE__ */ new Set([
|
|
5240
4411
|
"you",
|
|
5241
4412
|
"thank you",
|
|
@@ -5252,7 +4423,23 @@ var HALLUCINATIONS = /* @__PURE__ */ new Set([
|
|
|
5252
4423
|
".",
|
|
5253
4424
|
"bye",
|
|
5254
4425
|
"right",
|
|
5255
|
-
"cool"
|
|
4426
|
+
"cool",
|
|
4427
|
+
// Whisper YouTube-caption hallucinations
|
|
4428
|
+
"thank you for watching",
|
|
4429
|
+
"thanks for watching",
|
|
4430
|
+
"thank you for watching!",
|
|
4431
|
+
"thanks for watching!",
|
|
4432
|
+
"thank you so much for watching",
|
|
4433
|
+
"thanks for listening",
|
|
4434
|
+
"please subscribe",
|
|
4435
|
+
"subscribe",
|
|
4436
|
+
"music",
|
|
4437
|
+
"[music]",
|
|
4438
|
+
"\u266A",
|
|
4439
|
+
"[no audio]",
|
|
4440
|
+
"[silence]",
|
|
4441
|
+
"[blank_audio]",
|
|
4442
|
+
"(silence)"
|
|
5256
4443
|
]);
|
|
5257
4444
|
var StreamHandler = class _StreamHandler {
|
|
5258
4445
|
deps;
|
|
@@ -5378,13 +4565,17 @@ var StreamHandler = class _StreamHandler {
|
|
|
5378
4565
|
* Same as the AEC variant but for deployments where AEC is OFF
|
|
5379
4566
|
* (default on PSTN — Twilio/Telnyx). Without an adaptive filter to
|
|
5380
4567
|
* converge, the only justification for a gate is anti-flicker on
|
|
5381
|
-
* micro-events (cough, click). 100
|
|
5382
|
-
*
|
|
5383
|
-
* the
|
|
5384
|
-
*
|
|
5385
|
-
*
|
|
5386
|
-
|
|
5387
|
-
|
|
4568
|
+
* micro-events (cough, click). Raised 100 → 500 ms on 2026-05-19
|
|
4569
|
+
* after the 0.6.2 acceptance run showed a phantom VAD speech_start
|
|
4570
|
+
* firing on the very first inbound frame (~500 ms into the call,
|
|
4571
|
+
* which is past a 100 ms gate). The phantom barge-in cancelled the
|
|
4572
|
+
* prewarmed firstMessage, the user heard a clipped (graffiante)
|
|
4573
|
+
* audio fragment, and the SDK left ``_turnAlreadyClosed=true`` so
|
|
4574
|
+
* subsequent ``recordTurnComplete`` calls were no-ops. 500 ms
|
|
4575
|
+
* filters those phantoms while still letting a real interruption
|
|
4576
|
+
* land within half a second of agent onset.
|
|
4577
|
+
*/
|
|
4578
|
+
static MIN_AGENT_SPEAKING_MS_BEFORE_BARGE_IN_NO_AEC = 500;
|
|
5388
4579
|
/** Handle for the pending grace-period timer, so it can be cleared on cleanup. */
|
|
5389
4580
|
graceTimer = null;
|
|
5390
4581
|
/**
|
|
@@ -5424,30 +4615,12 @@ var StreamHandler = class _StreamHandler {
|
|
|
5424
4615
|
* coexist without name collisions even when firstMessage finishes while
|
|
5425
4616
|
* a Realtime turn is still streaming.
|
|
5426
4617
|
*/
|
|
5427
|
-
firstMessageMarkCounter
|
|
5428
|
-
|
|
5429
|
-
|
|
5430
|
-
|
|
5431
|
-
|
|
5432
|
-
|
|
5433
|
-
* — vs. ~2-5 s with the previous burst-send code, which was the
|
|
5434
|
-
* root cause of "firstMessage non interrompibile". Higher values
|
|
5435
|
-
* smooth playback under jittery RTT (each mark echo adds ~150-250 ms
|
|
5436
|
-
* RTT on PSTN) at the cost of longer barge-in latency; lower values
|
|
5437
|
-
* risk under-buffering. 3 hit the smallest barge-in cap without
|
|
5438
|
-
* audible gaps in 2026-05 acceptance.
|
|
5439
|
-
*/
|
|
5440
|
-
static FIRST_MESSAGE_MARK_WINDOW = 3;
|
|
5441
|
-
/**
|
|
5442
|
-
* Per-chunk soft timeout (ms) while awaiting a mark echo. Twilio's
|
|
5443
|
-
* mark echoes typically arrive within 100-250 ms of audio playback.
|
|
5444
|
-
* Capping at 500 ms guards against carriers (or test doubles) that
|
|
5445
|
-
* never echo — without it a stalled echo would deadlock the loop and
|
|
5446
|
-
* the agent would freeze mid-utterance. On timeout we drop the
|
|
5447
|
-
* waiter from the queue and continue: playout may glitch by one
|
|
5448
|
-
* chunk but the call stays alive.
|
|
5449
|
-
*/
|
|
5450
|
-
static MARK_AWAIT_TIMEOUT_MS = 500;
|
|
4618
|
+
// firstMessageMarkCounter / FIRST_MESSAGE_MARK_WINDOW /
|
|
4619
|
+
// MARK_AWAIT_TIMEOUT_MS were retired with the move to the Twilio-FIFO-
|
|
4620
|
+
// trusts model (sendPacedFirstMessageBytes no longer emits marks).
|
|
4621
|
+
// Marks are still consumed via ``onMark`` for any adapter that wants
|
|
4622
|
+
// to round-trip one, but the firstMessage path no longer back-pressures
|
|
4623
|
+
// on them.
|
|
5451
4624
|
/**
|
|
5452
4625
|
* Minimum drain window (ms) between a ``cancelSpeaking`` and the next
|
|
5453
4626
|
* ``beginSpeaking``. 150 ms covers a typical PSTN jitter buffer drain
|
|
@@ -5512,6 +4685,14 @@ var StreamHandler = class _StreamHandler {
|
|
|
5512
4685
|
} catch {
|
|
5513
4686
|
}
|
|
5514
4687
|
}
|
|
4688
|
+
const ttsCancelable = this.tts;
|
|
4689
|
+
if (typeof ttsCancelable?.cancelActiveStream === "function") {
|
|
4690
|
+
try {
|
|
4691
|
+
ttsCancelable.cancelActiveStream();
|
|
4692
|
+
} catch (err) {
|
|
4693
|
+
getLogger().debug(`TTS cancelActiveStream raised: ${String(err)}`);
|
|
4694
|
+
}
|
|
4695
|
+
}
|
|
5515
4696
|
}
|
|
5516
4697
|
/**
|
|
5517
4698
|
* Resolve every entry in ``pendingMarks`` and empty the queue. Idempotent
|
|
@@ -5528,56 +4709,19 @@ var StreamHandler = class _StreamHandler {
|
|
|
5528
4709
|
}
|
|
5529
4710
|
this.pendingMarks.length = 0;
|
|
5530
4711
|
}
|
|
4712
|
+
// Mark-based back-pressure (sendMarkAwaitable / waitForMarkWindow)
|
|
4713
|
+
// was removed when sendPacedFirstMessageBytes switched to the
|
|
4714
|
+
// Twilio-FIFO-trusts model — see that method's doc comment for
|
|
4715
|
+
// rationale. ``pendingMarks`` and ``onMark`` are still kept so an
|
|
4716
|
+
// adapter that wants to round-trip a mark for some other purpose can
|
|
4717
|
+
// still do so without breaking the firstMessage path.
|
|
5531
4718
|
/**
|
|
5532
|
-
*
|
|
5533
|
-
*
|
|
5534
|
-
*
|
|
5535
|
-
*
|
|
5536
|
-
*
|
|
5537
|
-
|
|
5538
|
-
sendMarkAwaitable() {
|
|
5539
|
-
if (this.deps.bridge.telephonyProvider !== "twilio") return null;
|
|
5540
|
-
this.firstMessageMarkCounter += 1;
|
|
5541
|
-
const markName = `fm_${this.firstMessageMarkCounter}`;
|
|
5542
|
-
let resolve;
|
|
5543
|
-
const promise = new Promise((r) => {
|
|
5544
|
-
resolve = r;
|
|
5545
|
-
});
|
|
5546
|
-
this.pendingMarks.push({ name: markName, resolve, promise });
|
|
5547
|
-
try {
|
|
5548
|
-
this.deps.bridge.sendMark(this.ws, markName, this.streamSid);
|
|
5549
|
-
} catch (err) {
|
|
5550
|
-
getLogger().debug(`sendMark failed (${markName}): ${String(err)}`);
|
|
5551
|
-
const idx = this.pendingMarks.findIndex((m) => m.name === markName);
|
|
5552
|
-
if (idx >= 0) this.pendingMarks.splice(idx, 1);
|
|
5553
|
-
return Promise.resolve();
|
|
5554
|
-
}
|
|
5555
|
-
return promise;
|
|
5556
|
-
}
|
|
5557
|
-
/**
|
|
5558
|
-
* If the in-flight mark queue is at or above ``FIRST_MESSAGE_MARK_WINDOW``
|
|
5559
|
-
* entries, wait for the oldest entry to clear (mark echoed, agent
|
|
5560
|
-
* cancelled, or per-mark timeout). Repeats until the queue depth is
|
|
5561
|
-
* within the window — under high RTT the carrier may have several
|
|
5562
|
-
* marks queued and we want every loop iteration to be naturally back-
|
|
5563
|
-
* pressured by playback.
|
|
5564
|
-
*/
|
|
5565
|
-
async waitForMarkWindow() {
|
|
5566
|
-
while (this.isSpeaking && this.pendingMarks.length >= _StreamHandler.FIRST_MESSAGE_MARK_WINDOW) {
|
|
5567
|
-
const oldest = this.pendingMarks[0];
|
|
5568
|
-
const timeout = new Promise(
|
|
5569
|
-
(resolve) => setTimeout(resolve, _StreamHandler.MARK_AWAIT_TIMEOUT_MS)
|
|
5570
|
-
);
|
|
5571
|
-
await Promise.race([oldest.promise, timeout]);
|
|
5572
|
-
if (this.pendingMarks[0] === oldest) {
|
|
5573
|
-
this.pendingMarks.shift();
|
|
5574
|
-
}
|
|
5575
|
-
}
|
|
5576
|
-
}
|
|
5577
|
-
/**
|
|
5578
|
-
* Bytes-per-millisecond for a 16 kHz PCM16 mono stream. Used by the
|
|
5579
|
-
* non-Twilio firstMessage pacing path to translate chunk size into a
|
|
5580
|
-
* playout-duration sleep. 16000 samples/sec × 2 bytes = 32 bytes/ms.
|
|
4719
|
+
* Bytes-per-millisecond for a 16 kHz PCM16 mono stream. Used by
|
|
4720
|
+
* ``sendPacedFirstMessageBytes`` to translate chunk size into a
|
|
4721
|
+
* playout-duration sleep so we never deliver faster than the carrier
|
|
4722
|
+
* can decode + play out (which manifested as severe crackling on the
|
|
4723
|
+
* HTTP-TTS path with client-side resampling). 16000 samples/sec × 2
|
|
4724
|
+
* bytes/sample = 32 bytes/ms.
|
|
5581
4725
|
*/
|
|
5582
4726
|
static PCM16_16K_BYTES_PER_MS = 32;
|
|
5583
4727
|
/** Cancel and clear the pending grace timer, if any. */
|
|
@@ -5854,8 +4998,8 @@ var StreamHandler = class _StreamHandler {
|
|
|
5854
4998
|
this.ttsByteCarry = null;
|
|
5855
4999
|
}
|
|
5856
5000
|
/**
|
|
5857
|
-
* Start call recording when configured.
|
|
5858
|
-
*
|
|
5001
|
+
* Start call recording when configured. Bridges expose
|
|
5002
|
+
* ``startRecording`` for carrier parity (Twilio and Telnyx supported).
|
|
5859
5003
|
*/
|
|
5860
5004
|
async startRecordingIfRequested(callId) {
|
|
5861
5005
|
const { recording, config } = this.deps;
|
|
@@ -6015,7 +5159,7 @@ var StreamHandler = class _StreamHandler {
|
|
|
6015
5159
|
if (activeVad && !this.vadDisabled) {
|
|
6016
5160
|
try {
|
|
6017
5161
|
const vadPromise = activeVad.processFrame(pcm16k, 16e3);
|
|
6018
|
-
const timeoutPromise = new Promise((
|
|
5162
|
+
const timeoutPromise = new Promise((resolve2) => setTimeout(() => resolve2(null), 25));
|
|
6019
5163
|
const evt = await Promise.race([vadPromise, timeoutPromise]);
|
|
6020
5164
|
if (evt) {
|
|
6021
5165
|
getLogger().info(
|
|
@@ -6101,7 +5245,7 @@ var StreamHandler = class _StreamHandler {
|
|
|
6101
5245
|
this.metricsAcc.addSttAudioBytes(pcm16k.length);
|
|
6102
5246
|
}
|
|
6103
5247
|
} else if (this.adapter) {
|
|
6104
|
-
if (this.adapter instanceof ElevenLabsConvAIAdapter && this.deps.bridge.
|
|
5248
|
+
if (this.adapter instanceof ElevenLabsConvAIAdapter && this.deps.bridge.inputWireFormat === "ulaw_8000" && this.adapter.inputAudioFormat !== "ulaw_8000") {
|
|
6105
5249
|
const pcm8k = mulawToPcm16(audioBuffer);
|
|
6106
5250
|
const pcm16k = this.inboundResampler.process(pcm8k);
|
|
6107
5251
|
this.adapter.sendAudio(pcm16k);
|
|
@@ -6151,9 +5295,21 @@ var StreamHandler = class _StreamHandler {
|
|
|
6151
5295
|
/** Handle call stop / stream end. */
|
|
6152
5296
|
/** Handle a carrier-emitted `stop` event signalling the call has ended. */
|
|
6153
5297
|
async handleStop() {
|
|
5298
|
+
if (this.llmAbort !== null) {
|
|
5299
|
+
try {
|
|
5300
|
+
this.llmAbort.abort();
|
|
5301
|
+
} catch {
|
|
5302
|
+
}
|
|
5303
|
+
}
|
|
5304
|
+
const ttsCancelable = this.tts;
|
|
5305
|
+
if (typeof ttsCancelable?.cancelActiveStream === "function") {
|
|
5306
|
+
try {
|
|
5307
|
+
ttsCancelable.cancelActiveStream();
|
|
5308
|
+
} catch {
|
|
5309
|
+
}
|
|
5310
|
+
}
|
|
6154
5311
|
this.clearPendingBargeIn();
|
|
6155
5312
|
this.drainPendingMarks();
|
|
6156
|
-
this.firstMessageMarkCounter = 0;
|
|
6157
5313
|
this.clearGraceTimer();
|
|
6158
5314
|
this.flushResamplers();
|
|
6159
5315
|
await this.closeSttOnce();
|
|
@@ -6166,9 +5322,21 @@ var StreamHandler = class _StreamHandler {
|
|
|
6166
5322
|
/** Handle WebSocket close event. */
|
|
6167
5323
|
/** Tear down adapter, STT/TTS, and per-call state when the carrier WebSocket closes. */
|
|
6168
5324
|
async handleWsClose() {
|
|
5325
|
+
if (this.llmAbort !== null) {
|
|
5326
|
+
try {
|
|
5327
|
+
this.llmAbort.abort();
|
|
5328
|
+
} catch {
|
|
5329
|
+
}
|
|
5330
|
+
}
|
|
5331
|
+
const ttsCancelable = this.tts;
|
|
5332
|
+
if (typeof ttsCancelable?.cancelActiveStream === "function") {
|
|
5333
|
+
try {
|
|
5334
|
+
ttsCancelable.cancelActiveStream();
|
|
5335
|
+
} catch {
|
|
5336
|
+
}
|
|
5337
|
+
}
|
|
6169
5338
|
this.clearPendingBargeIn();
|
|
6170
5339
|
this.drainPendingMarks();
|
|
6171
|
-
this.firstMessageMarkCounter = 0;
|
|
6172
5340
|
this.clearGraceTimer();
|
|
6173
5341
|
this.flushResamplers();
|
|
6174
5342
|
await this.closeSttOnce();
|
|
@@ -6207,13 +5375,40 @@ var StreamHandler = class _StreamHandler {
|
|
|
6207
5375
|
* Maintains a 1-byte carry across calls so unaligned HTTP chunks from
|
|
6208
5376
|
* streaming TTS providers never byte-swap the PCM16 samples downstream.
|
|
6209
5377
|
*/
|
|
6210
|
-
encodePipelineAudio(
|
|
6211
|
-
|
|
5378
|
+
encodePipelineAudio(audioChunk) {
|
|
5379
|
+
if (this.ttsOutputFormatNativeForCarrier === true) {
|
|
5380
|
+
return audioChunk.toString("base64");
|
|
5381
|
+
}
|
|
5382
|
+
const aligned = this.alignPcm16(audioChunk);
|
|
6212
5383
|
if (aligned.length === 0) return "";
|
|
6213
5384
|
const pcm8k = this.outboundResampler.process(aligned);
|
|
6214
5385
|
const mulaw = pcm16ToMulaw(pcm8k);
|
|
6215
5386
|
return mulaw.toString("base64");
|
|
6216
5387
|
}
|
|
5388
|
+
/**
|
|
5389
|
+
* Cached result of ``isTtsOutputFormatNativeForCarrier()`` — settled
|
|
5390
|
+
* once at ``initPipeline`` time after ``setTelephonyCarrier`` has run
|
|
5391
|
+
* on the TTS adapter. Stable for the call lifetime: changes to the
|
|
5392
|
+
* adapter's output format mid-call would NOT flip this. ``true`` means
|
|
5393
|
+
* ``encodePipelineAudio`` can take the bypass path.
|
|
5394
|
+
*/
|
|
5395
|
+
ttsOutputFormatNativeForCarrier = false;
|
|
5396
|
+
/**
|
|
5397
|
+
* Probe whether the TTS adapter is configured to emit bytes already in
|
|
5398
|
+
* the carrier's wire codec. Currently: Twilio expects ``ulaw_8000``,
|
|
5399
|
+
* Telnyx expects ``pcm_16000`` (no client transcode in either case if
|
|
5400
|
+
* matched). Anything else takes the resample-and-encode path.
|
|
5401
|
+
*/
|
|
5402
|
+
isTtsOutputFormatNativeForCarrier() {
|
|
5403
|
+
if (!this.tts) return false;
|
|
5404
|
+
const fmt = this.tts.outputFormat;
|
|
5405
|
+
if (typeof fmt !== "string") return false;
|
|
5406
|
+
const carrier = this.deps.bridge.telephonyProvider;
|
|
5407
|
+
if (carrier === "twilio") return fmt === "ulaw_8000";
|
|
5408
|
+
if (carrier === "telnyx") return fmt === "pcm_16000";
|
|
5409
|
+
if (carrier === "plivo") return fmt === "ulaw_8000";
|
|
5410
|
+
return false;
|
|
5411
|
+
}
|
|
6217
5412
|
/**
|
|
6218
5413
|
* Prepend any carry byte from the previous chunk, return the even-length
|
|
6219
5414
|
* portion, and stash the final odd byte (if any) for the next call.
|
|
@@ -6224,17 +5419,11 @@ var StreamHandler = class _StreamHandler {
|
|
|
6224
5419
|
this.ttsByteCarry = alignedLen < combined.length ? combined.subarray(alignedLen) : null;
|
|
6225
5420
|
return combined.subarray(0, alignedLen);
|
|
6226
5421
|
}
|
|
6227
|
-
/**
|
|
6228
|
-
* 40 ms @ 16 kHz mono PCM16 = 1280 bytes. Sized to mirror the smallest
|
|
6229
|
-
* live-TTS chunk boundary so cancel granularity (mark/clear bookkeeping)
|
|
6230
|
-
* is identical regardless of whether the firstMessage came from the
|
|
6231
|
-
* prewarm cache or a live ``tts.synthesizeStream`` stream.
|
|
6232
|
-
*/
|
|
6233
|
-
static PREWARM_CHUNK_BYTES = 1280;
|
|
6234
5422
|
/**
|
|
6235
5423
|
* Stream a cached firstMessage buffer in pacing-friendly chunks.
|
|
6236
5424
|
*
|
|
6237
|
-
* Splits ``prewarmBytes`` into
|
|
5425
|
+
* Splits ``prewarmBytes`` into 20 ms slices (matching Twilio's PSTN
|
|
5426
|
+
* frame quantum) and
|
|
6238
5427
|
* forwards each through ``deps.bridge.sendAudio`` exactly like the
|
|
6239
5428
|
* live TTS path does — preserving Twilio mark/clear granularity. A
|
|
6240
5429
|
* single multi-second sendAudio call would push the whole intro into
|
|
@@ -6250,7 +5439,7 @@ var StreamHandler = class _StreamHandler {
|
|
|
6250
5439
|
return this.sendPacedFirstMessageBytes(prewarmBytes);
|
|
6251
5440
|
}
|
|
6252
5441
|
/**
|
|
6253
|
-
* Iterate ``bytes``
|
|
5442
|
+
* Iterate ``bytes`` in 20 ms slices (Twilio PSTN frame quantum) and
|
|
6254
5443
|
* forward each via ``deps.bridge.sendAudio`` with mark-gated pacing
|
|
6255
5444
|
* (Twilio) or playout-time-based pacing (Telnyx). Caps the carrier-
|
|
6256
5445
|
* side buffer at ``FIRST_MESSAGE_MARK_WINDOW`` chunks so a barge-in's
|
|
@@ -6267,30 +5456,20 @@ var StreamHandler = class _StreamHandler {
|
|
|
6267
5456
|
*/
|
|
6268
5457
|
async sendPacedFirstMessageBytes(bytes) {
|
|
6269
5458
|
if (this.pendingMarks.length > 0) this.drainPendingMarks();
|
|
6270
|
-
this.firstMessageMarkCounter = 0;
|
|
6271
5459
|
let firstChunkSent = false;
|
|
6272
|
-
|
|
6273
|
-
|
|
6274
|
-
|
|
6275
|
-
|
|
5460
|
+
const PSTN_FRAME_MS = 20;
|
|
5461
|
+
const bytesPerMs = this.ttsOutputFormatNativeForCarrier ? 8 : _StreamHandler.PCM16_16K_BYTES_PER_MS;
|
|
5462
|
+
const sliceBytes = bytesPerMs * PSTN_FRAME_MS;
|
|
5463
|
+
for (let i = 0; i < bytes.length; i += sliceBytes) {
|
|
6276
5464
|
if (!this.isSpeaking) break;
|
|
6277
|
-
const chunk = bytes.subarray(i, i +
|
|
5465
|
+
const chunk = bytes.subarray(i, i + sliceBytes);
|
|
6278
5466
|
if (!firstChunkSent) firstChunkSent = true;
|
|
6279
|
-
if (this.aec
|
|
5467
|
+
if (this.aec && !this.ttsOutputFormatNativeForCarrier) {
|
|
5468
|
+
this.aec.pushFarEnd(chunk);
|
|
5469
|
+
}
|
|
6280
5470
|
const encoded = this.encodePipelineAudio(chunk);
|
|
6281
5471
|
this.deps.bridge.sendAudio(this.ws, encoded, this.streamSid);
|
|
6282
5472
|
this.markFirstAudioSent();
|
|
6283
|
-
const markPromise = this.sendMarkAwaitable();
|
|
6284
|
-
if (!initialFillComplete && this.pendingMarks.length >= _StreamHandler.FIRST_MESSAGE_MARK_WINDOW) {
|
|
6285
|
-
initialFillComplete = true;
|
|
6286
|
-
}
|
|
6287
|
-
if (markPromise === null || initialFillComplete) {
|
|
6288
|
-
const playoutMs = Math.max(
|
|
6289
|
-
1,
|
|
6290
|
-
Math.floor(chunk.length / _StreamHandler.PCM16_16K_BYTES_PER_MS)
|
|
6291
|
-
);
|
|
6292
|
-
await new Promise((resolve) => setTimeout(resolve, playoutMs));
|
|
6293
|
-
}
|
|
6294
5473
|
}
|
|
6295
5474
|
return firstChunkSent;
|
|
6296
5475
|
}
|
|
@@ -6310,6 +5489,12 @@ var StreamHandler = class _StreamHandler {
|
|
|
6310
5489
|
getLogger().debug(`TTS setTelephonyCarrier failed (${label}): ${String(e)}`);
|
|
6311
5490
|
}
|
|
6312
5491
|
}
|
|
5492
|
+
this.ttsOutputFormatNativeForCarrier = this.isTtsOutputFormatNativeForCarrier();
|
|
5493
|
+
if (this.ttsOutputFormatNativeForCarrier) {
|
|
5494
|
+
getLogger().debug(
|
|
5495
|
+
`TTS outputFormat matches ${this.deps.bridge.telephonyProvider} wire codec \u2014 bypassing client-side transcode`
|
|
5496
|
+
);
|
|
5497
|
+
}
|
|
6313
5498
|
}
|
|
6314
5499
|
if (!this.stt) {
|
|
6315
5500
|
getLogger().debug(`Pipeline mode (${label}): no STT configured`);
|
|
@@ -6319,7 +5504,7 @@ var StreamHandler = class _StreamHandler {
|
|
|
6319
5504
|
}
|
|
6320
5505
|
if (!this.deps.agent.vad) {
|
|
6321
5506
|
try {
|
|
6322
|
-
const { SileroVAD } = await import("./silero-vad-
|
|
5507
|
+
const { SileroVAD } = await import("./silero-vad-LNDFGIY7.mjs");
|
|
6323
5508
|
this.autoVad = await SileroVAD.forPhoneCall();
|
|
6324
5509
|
getLogger().info(
|
|
6325
5510
|
`auto-VAD enabled (SileroVAD, phone preset). Pass agent.vad=\u2026 to override.`
|
|
@@ -6338,12 +5523,9 @@ var StreamHandler = class _StreamHandler {
|
|
|
6338
5523
|
}
|
|
6339
5524
|
}
|
|
6340
5525
|
if (this.deps.agent.echoCancellation) {
|
|
6341
|
-
|
|
6342
|
-
|
|
6343
|
-
|
|
6344
|
-
`echoCancellation: true on ${carrier} (PSTN). Server-side NLMS cannot model PSTN's ~250\u20131500 ms round-trip echo with a 32 ms filter window \u2014 it will silently no-op. Best practice: keep echoCancellation: false; rely on the carrier + caller device's built-in echo suppression and Patter's self-hearing guard. Enable AEC only for browser/native deployments where the SDK owns the audio path end-to-end.`
|
|
6345
|
-
);
|
|
6346
|
-
}
|
|
5526
|
+
getLogger().warn(
|
|
5527
|
+
`echoCancellation: true on ${this.deps.bridge.telephonyProvider} (PSTN). Server-side NLMS cannot model PSTN's ~250\u20131500 ms round-trip echo with a 32 ms filter window \u2014 it will silently no-op. Best practice: keep echoCancellation: false; rely on the carrier + caller device's built-in echo suppression and Patter's self-hearing guard. Enable AEC only for browser/native deployments where the SDK owns the audio path end-to-end.`
|
|
5528
|
+
);
|
|
6347
5529
|
try {
|
|
6348
5530
|
const { NlmsEchoCanceller } = await import("./aec-PJJMUM5E.mjs");
|
|
6349
5531
|
this.aec = new NlmsEchoCanceller({ sampleRate: 16e3 });
|
|
@@ -6476,13 +5658,20 @@ var StreamHandler = class _StreamHandler {
|
|
|
6476
5658
|
);
|
|
6477
5659
|
}
|
|
6478
5660
|
const providerModel = this.deps.agent.llm?.model ?? "";
|
|
5661
|
+
const augmentedTools = augmentWithBuiltinHandoffTools(
|
|
5662
|
+
this.deps.agent.tools,
|
|
5663
|
+
{
|
|
5664
|
+
transferCall: (number) => this.deps.bridge.transferCall(this.callId, number),
|
|
5665
|
+
endCall: () => this.deps.bridge.endCall(this.callId, this.ws)
|
|
5666
|
+
}
|
|
5667
|
+
);
|
|
6479
5668
|
this.llmLoop = new LLMLoop(
|
|
6480
5669
|
"",
|
|
6481
5670
|
// apiKey unused when llmProvider is supplied
|
|
6482
5671
|
providerModel,
|
|
6483
5672
|
// propagate so calculateLlmCost can match the price row
|
|
6484
5673
|
resolvedPrompt,
|
|
6485
|
-
|
|
5674
|
+
augmentedTools,
|
|
6486
5675
|
this.deps.agent.llm,
|
|
6487
5676
|
this.deps.agent.disablePhonePreamble ?? false
|
|
6488
5677
|
);
|
|
@@ -6493,11 +5682,18 @@ var StreamHandler = class _StreamHandler {
|
|
|
6493
5682
|
} else if (!this.deps.onMessage && this.deps.config.openaiKey) {
|
|
6494
5683
|
let llmModel = this.deps.agent.model || "gpt-4o-mini";
|
|
6495
5684
|
if (llmModel.includes("realtime")) llmModel = "gpt-4o-mini";
|
|
5685
|
+
const augmentedTools = augmentWithBuiltinHandoffTools(
|
|
5686
|
+
this.deps.agent.tools,
|
|
5687
|
+
{
|
|
5688
|
+
transferCall: (number) => this.deps.bridge.transferCall(this.callId, number),
|
|
5689
|
+
endCall: () => this.deps.bridge.endCall(this.callId, this.ws)
|
|
5690
|
+
}
|
|
5691
|
+
);
|
|
6496
5692
|
this.llmLoop = new LLMLoop(
|
|
6497
5693
|
this.deps.config.openaiKey,
|
|
6498
5694
|
llmModel,
|
|
6499
5695
|
resolvedPrompt,
|
|
6500
|
-
|
|
5696
|
+
augmentedTools,
|
|
6501
5697
|
void 0,
|
|
6502
5698
|
this.deps.agent.disablePhonePreamble ?? false
|
|
6503
5699
|
);
|
|
@@ -7012,16 +6208,49 @@ var StreamHandler = class _StreamHandler {
|
|
|
7012
6208
|
async initRealtimeAdapter(resolvedPrompt) {
|
|
7013
6209
|
const label = this.deps.bridge.label;
|
|
7014
6210
|
this.adapter = this.deps.buildAIAdapter(resolvedPrompt);
|
|
7015
|
-
|
|
7016
|
-
|
|
7017
|
-
getLogger().debug(`AI adapter connected (${label})`);
|
|
7018
|
-
} catch (e) {
|
|
7019
|
-
getLogger().error(`AI adapter connect FAILED (${label}):`, e);
|
|
6211
|
+
let parked;
|
|
6212
|
+
if (typeof this.deps.popPrewarmedConnections === "function") {
|
|
7020
6213
|
try {
|
|
7021
|
-
|
|
7022
|
-
} catch {
|
|
6214
|
+
parked = this.deps.popPrewarmedConnections(this.callId);
|
|
6215
|
+
} catch (err) {
|
|
6216
|
+
getLogger().debug(`popPrewarmedConnections raised: ${String(err)}`);
|
|
6217
|
+
}
|
|
6218
|
+
}
|
|
6219
|
+
const parkedRealtimeWs = parked?.openaiRealtime;
|
|
6220
|
+
let adoptOk = false;
|
|
6221
|
+
if (parkedRealtimeWs !== void 0) {
|
|
6222
|
+
const adapterAny = this.adapter;
|
|
6223
|
+
const wsAlive = parkedRealtimeWs.readyState === 1;
|
|
6224
|
+
if (typeof adapterAny?.adoptWebSocket === "function" && wsAlive) {
|
|
6225
|
+
try {
|
|
6226
|
+
adapterAny.adoptWebSocket(parkedRealtimeWs);
|
|
6227
|
+
getLogger().info(
|
|
6228
|
+
`[CONNECT] callId=${this.callId} provider=openai_realtime source=adopted ms=0`
|
|
6229
|
+
);
|
|
6230
|
+
adoptOk = true;
|
|
6231
|
+
} catch (err) {
|
|
6232
|
+
getLogger().debug(`Realtime adoptWebSocket failed: ${String(err)}; falling back`);
|
|
6233
|
+
}
|
|
6234
|
+
}
|
|
6235
|
+
if (!adoptOk) {
|
|
6236
|
+
try {
|
|
6237
|
+
parkedRealtimeWs.close();
|
|
6238
|
+
} catch {
|
|
6239
|
+
}
|
|
6240
|
+
}
|
|
6241
|
+
}
|
|
6242
|
+
if (!adoptOk) {
|
|
6243
|
+
try {
|
|
6244
|
+
await this.adapter.connect();
|
|
6245
|
+
getLogger().debug(`AI adapter connected (${label})`);
|
|
6246
|
+
} catch (e) {
|
|
6247
|
+
getLogger().error(`AI adapter connect FAILED (${label}):`, e);
|
|
6248
|
+
try {
|
|
6249
|
+
await this.deps.bridge.endCall(this.callId, this.ws);
|
|
6250
|
+
} catch {
|
|
6251
|
+
}
|
|
6252
|
+
return;
|
|
7023
6253
|
}
|
|
7024
|
-
return;
|
|
7025
6254
|
}
|
|
7026
6255
|
if (this.deps.agent.firstMessage) {
|
|
7027
6256
|
this.metricsAcc.startTurn();
|
|
@@ -7141,8 +6370,21 @@ var StreamHandler = class _StreamHandler {
|
|
|
7141
6370
|
await this.emitUserSpeechEnded();
|
|
7142
6371
|
}
|
|
7143
6372
|
async onAdapterTranscriptInput(inputText) {
|
|
6373
|
+
const stripped = inputText.trim().toLowerCase();
|
|
6374
|
+
if (HALLUCINATIONS.has(stripped) || stripped === "") {
|
|
6375
|
+
getLogger().debug(
|
|
6376
|
+
`Realtime transcript_input dropped (likely Whisper hallucination on silence/echo): ${sanitizeLogValue(inputText.slice(0, 60))}`
|
|
6377
|
+
);
|
|
6378
|
+
this.userTranscriptPending = false;
|
|
6379
|
+
return;
|
|
6380
|
+
}
|
|
7144
6381
|
getLogger().debug(`User (${this.deps.bridge.label}): ${sanitizeLogValue(inputText)}`);
|
|
7145
6382
|
this.history.push({ role: "user", text: inputText, timestamp: Date.now() });
|
|
6383
|
+
if (this.adapter instanceof OpenAIRealtimeAdapter) {
|
|
6384
|
+
void this.adapter.requestResponse().catch(
|
|
6385
|
+
(err) => getLogger().debug(`Realtime requestResponse failed: ${String(err)}`)
|
|
6386
|
+
);
|
|
6387
|
+
}
|
|
7146
6388
|
if (!this.metricsAcc.turnActive) {
|
|
7147
6389
|
this.metricsAcc.startTurn();
|
|
7148
6390
|
this.currentAgentText = "";
|
|
@@ -7294,6 +6536,18 @@ var StreamHandler = class _StreamHandler {
|
|
|
7294
6536
|
await this.flushAssistantTurn(text);
|
|
7295
6537
|
}
|
|
7296
6538
|
async onAdapterSpeechInterrupt() {
|
|
6539
|
+
if (this.adapter instanceof OpenAIRealtimeAdapter) {
|
|
6540
|
+
const startedAt = this.adapter.currentResponseFirstAudioAt;
|
|
6541
|
+
if (startedAt !== null) {
|
|
6542
|
+
const elapsedMs = Date.now() - startedAt;
|
|
6543
|
+
if (elapsedMs < _StreamHandler.MIN_AGENT_SPEAKING_MS_BEFORE_BARGE_IN_NO_AEC) {
|
|
6544
|
+
getLogger().info(
|
|
6545
|
+
`Realtime barge-in suppressed (response < gate, ${elapsedMs}ms)`
|
|
6546
|
+
);
|
|
6547
|
+
return;
|
|
6548
|
+
}
|
|
6549
|
+
}
|
|
6550
|
+
}
|
|
7297
6551
|
this.deps.bridge.sendClear(this.ws, this.streamSid);
|
|
7298
6552
|
if (this.adapter instanceof OpenAIRealtimeAdapter) this.adapter.cancelResponse();
|
|
7299
6553
|
this.metricsAcc.recordTurnInterrupted();
|
|
@@ -7528,32 +6782,32 @@ async function queryDeepgramCost(metricsAcc, deepgramKey, deepgramRequestId) {
|
|
|
7528
6782
|
|
|
7529
6783
|
// src/services/call-log.ts
|
|
7530
6784
|
init_esm_shims();
|
|
7531
|
-
import * as
|
|
7532
|
-
import * as
|
|
6785
|
+
import * as crypto4 from "crypto";
|
|
6786
|
+
import * as fs3 from "fs";
|
|
7533
6787
|
import { promises as fsp } from "fs";
|
|
7534
6788
|
import * as os from "os";
|
|
7535
|
-
import * as
|
|
6789
|
+
import * as path3 from "path";
|
|
7536
6790
|
var SCHEMA_VERSION = "1.0";
|
|
7537
6791
|
var DEFAULT_RETENTION_DAYS = 30;
|
|
7538
6792
|
function xdgDataHome() {
|
|
7539
|
-
return process.env.XDG_DATA_HOME ||
|
|
6793
|
+
return process.env.XDG_DATA_HOME || path3.join(os.homedir(), ".local", "share");
|
|
7540
6794
|
}
|
|
7541
6795
|
function platformDefaultRoot() {
|
|
7542
6796
|
if (process.platform === "darwin") {
|
|
7543
|
-
return
|
|
6797
|
+
return path3.join(os.homedir(), "Library", "Application Support", "patter");
|
|
7544
6798
|
}
|
|
7545
6799
|
if (process.platform === "win32") {
|
|
7546
6800
|
const localAppData = process.env.LOCALAPPDATA;
|
|
7547
|
-
if (localAppData) return
|
|
7548
|
-
return
|
|
6801
|
+
if (localAppData) return path3.join(localAppData, "patter");
|
|
6802
|
+
return path3.join(os.homedir(), "AppData", "Local", "patter");
|
|
7549
6803
|
}
|
|
7550
|
-
return
|
|
6804
|
+
return path3.join(xdgDataHome(), "patter");
|
|
7551
6805
|
}
|
|
7552
6806
|
function resolveLogRoot(explicit) {
|
|
7553
6807
|
const value = explicit ?? process.env.PATTER_LOG_DIR;
|
|
7554
6808
|
if (!value) return null;
|
|
7555
6809
|
if (value.trim().toLowerCase() === "auto") return platformDefaultRoot();
|
|
7556
|
-
if (value.startsWith("~")) return
|
|
6810
|
+
if (value.startsWith("~")) return path3.join(os.homedir(), value.slice(1));
|
|
7557
6811
|
return value;
|
|
7558
6812
|
}
|
|
7559
6813
|
function retentionDays() {
|
|
@@ -7564,16 +6818,16 @@ function retentionDays() {
|
|
|
7564
6818
|
return Math.max(0, parsed);
|
|
7565
6819
|
}
|
|
7566
6820
|
function redactMode() {
|
|
7567
|
-
const raw = (process.env.PATTER_LOG_REDACT_PHONE || "
|
|
6821
|
+
const raw = (process.env.PATTER_LOG_REDACT_PHONE || "full").trim().toLowerCase();
|
|
7568
6822
|
if (raw === "full" || raw === "mask" || raw === "hash_only") return raw;
|
|
7569
|
-
return "
|
|
6823
|
+
return "full";
|
|
7570
6824
|
}
|
|
7571
6825
|
function redactPhone(raw) {
|
|
7572
6826
|
if (!raw) return "";
|
|
7573
6827
|
const mode = redactMode();
|
|
7574
6828
|
if (mode === "full") return raw;
|
|
7575
6829
|
if (mode === "hash_only") {
|
|
7576
|
-
return "sha256:" +
|
|
6830
|
+
return "sha256:" + crypto4.createHash("sha256").update(raw, "utf8").digest("hex").slice(0, 16);
|
|
7577
6831
|
}
|
|
7578
6832
|
return maskPhoneNumber(raw);
|
|
7579
6833
|
}
|
|
@@ -7582,9 +6836,9 @@ function utcIso(tsSeconds) {
|
|
|
7582
6836
|
return new Date(ms).toISOString();
|
|
7583
6837
|
}
|
|
7584
6838
|
async function atomicWriteJson(filePath, payload) {
|
|
7585
|
-
const dir =
|
|
6839
|
+
const dir = path3.dirname(filePath);
|
|
7586
6840
|
await fsp.mkdir(dir, { recursive: true });
|
|
7587
|
-
const tmp =
|
|
6841
|
+
const tmp = path3.join(dir, `.tmp.${process.pid}.${crypto4.randomBytes(4).toString("hex")}.json`);
|
|
7588
6842
|
try {
|
|
7589
6843
|
const handle = await fsp.open(tmp, "w");
|
|
7590
6844
|
try {
|
|
@@ -7603,7 +6857,7 @@ async function atomicWriteJson(filePath, payload) {
|
|
|
7603
6857
|
}
|
|
7604
6858
|
}
|
|
7605
6859
|
async function appendJsonl(filePath, record) {
|
|
7606
|
-
await fsp.mkdir(
|
|
6860
|
+
await fsp.mkdir(path3.dirname(filePath), { recursive: true });
|
|
7607
6861
|
await fsp.appendFile(filePath, JSON.stringify(record) + "\n", { encoding: "utf8" });
|
|
7608
6862
|
}
|
|
7609
6863
|
var CallLogger = class {
|
|
@@ -7613,9 +6867,9 @@ var CallLogger = class {
|
|
|
7613
6867
|
this.root = null;
|
|
7614
6868
|
return;
|
|
7615
6869
|
}
|
|
7616
|
-
const resolved = root.startsWith("~") ?
|
|
6870
|
+
const resolved = root.startsWith("~") ? path3.join(os.homedir(), root.slice(1)) : root;
|
|
7617
6871
|
try {
|
|
7618
|
-
|
|
6872
|
+
fs3.mkdirSync(resolved, { recursive: true });
|
|
7619
6873
|
this.root = resolved;
|
|
7620
6874
|
getLogger().info(`Call logs: ${resolved}`);
|
|
7621
6875
|
} catch (err) {
|
|
@@ -7637,7 +6891,7 @@ var CallLogger = class {
|
|
|
7637
6891
|
const month = String(dt.getUTCMonth() + 1).padStart(2, "0");
|
|
7638
6892
|
const day = String(dt.getUTCDate()).padStart(2, "0");
|
|
7639
6893
|
const safeId = sanitizeLogValue(callId, 64).replace(/\//g, "_") || "unknown";
|
|
7640
|
-
return
|
|
6894
|
+
return path3.join(this.root, "calls", year, month, day, safeId);
|
|
7641
6895
|
}
|
|
7642
6896
|
/** Write the initial `metadata.json` for a new call. */
|
|
7643
6897
|
async logCallStart(callId, input = {}) {
|
|
@@ -7655,6 +6909,7 @@ var CallLogger = class {
|
|
|
7655
6909
|
status: "in_progress",
|
|
7656
6910
|
caller: redactPhone(input.caller ?? ""),
|
|
7657
6911
|
callee: redactPhone(input.callee ?? ""),
|
|
6912
|
+
direction: input.direction || "inbound",
|
|
7658
6913
|
telephony_provider: input.telephonyProvider ?? "",
|
|
7659
6914
|
provider_mode: input.providerMode ?? "",
|
|
7660
6915
|
agent: input.agent ?? {},
|
|
@@ -7664,11 +6919,11 @@ var CallLogger = class {
|
|
|
7664
6919
|
error: null
|
|
7665
6920
|
};
|
|
7666
6921
|
try {
|
|
7667
|
-
await atomicWriteJson(
|
|
6922
|
+
await atomicWriteJson(path3.join(dir, "metadata.json"), metadata);
|
|
7668
6923
|
} catch (err) {
|
|
7669
6924
|
getLogger().warn(`call_log write failed (${sanitizeLogValue(callId)}): ${sanitizeLogValue(String(err))}`);
|
|
7670
6925
|
}
|
|
7671
|
-
if (
|
|
6926
|
+
if (crypto4.randomBytes(1)[0] < 5) {
|
|
7672
6927
|
this.sweepOldDays();
|
|
7673
6928
|
}
|
|
7674
6929
|
}
|
|
@@ -7683,7 +6938,7 @@ var CallLogger = class {
|
|
|
7683
6938
|
...turn
|
|
7684
6939
|
};
|
|
7685
6940
|
try {
|
|
7686
|
-
await appendJsonl(
|
|
6941
|
+
await appendJsonl(path3.join(dir, "transcript.jsonl"), record);
|
|
7687
6942
|
} catch (err) {
|
|
7688
6943
|
getLogger().warn(
|
|
7689
6944
|
`call_log turn write failed (${sanitizeLogValue(callId)}): ${sanitizeLogValue(String(err))}`
|
|
@@ -7702,7 +6957,7 @@ var CallLogger = class {
|
|
|
7702
6957
|
data: payload
|
|
7703
6958
|
};
|
|
7704
6959
|
try {
|
|
7705
|
-
await appendJsonl(
|
|
6960
|
+
await appendJsonl(path3.join(dir, "events.jsonl"), record);
|
|
7706
6961
|
} catch (err) {
|
|
7707
6962
|
getLogger().warn(
|
|
7708
6963
|
`call_log event write failed (${sanitizeLogValue(callId)}): ${sanitizeLogValue(String(err))}`
|
|
@@ -7714,7 +6969,7 @@ var CallLogger = class {
|
|
|
7714
6969
|
if (!this.enabled) return;
|
|
7715
6970
|
const dir = this.callDir(callId);
|
|
7716
6971
|
if (dir === null) return;
|
|
7717
|
-
const metadataPath =
|
|
6972
|
+
const metadataPath = path3.join(dir, "metadata.json");
|
|
7718
6973
|
let existing = {};
|
|
7719
6974
|
try {
|
|
7720
6975
|
existing = JSON.parse(await fsp.readFile(metadataPath, "utf8"));
|
|
@@ -7749,20 +7004,20 @@ var CallLogger = class {
|
|
|
7749
7004
|
const days = retentionDays();
|
|
7750
7005
|
if (days === 0) return;
|
|
7751
7006
|
const cutoff = Date.now() / 1e3 - days * 86400;
|
|
7752
|
-
const callsRoot =
|
|
7753
|
-
if (!
|
|
7007
|
+
const callsRoot = path3.join(this.root, "calls");
|
|
7008
|
+
if (!fs3.existsSync(callsRoot)) return;
|
|
7754
7009
|
try {
|
|
7755
|
-
for (const yearName of
|
|
7010
|
+
for (const yearName of fs3.readdirSync(callsRoot)) {
|
|
7756
7011
|
if (!/^\d+$/.test(yearName)) continue;
|
|
7757
|
-
const yearDir =
|
|
7758
|
-
if (!
|
|
7759
|
-
for (const monthName of
|
|
7012
|
+
const yearDir = path3.join(callsRoot, yearName);
|
|
7013
|
+
if (!fs3.statSync(yearDir).isDirectory()) continue;
|
|
7014
|
+
for (const monthName of fs3.readdirSync(yearDir)) {
|
|
7760
7015
|
if (!/^\d+$/.test(monthName)) continue;
|
|
7761
|
-
const monthDir =
|
|
7762
|
-
if (!
|
|
7763
|
-
for (const dayName of
|
|
7016
|
+
const monthDir = path3.join(yearDir, monthName);
|
|
7017
|
+
if (!fs3.statSync(monthDir).isDirectory()) continue;
|
|
7018
|
+
for (const dayName of fs3.readdirSync(monthDir)) {
|
|
7764
7019
|
if (!/^\d+$/.test(dayName)) continue;
|
|
7765
|
-
const dayDir =
|
|
7020
|
+
const dayDir = path3.join(monthDir, dayName);
|
|
7766
7021
|
const y = Number.parseInt(yearName, 10);
|
|
7767
7022
|
const m = Number.parseInt(monthName, 10);
|
|
7768
7023
|
const d = Number.parseInt(dayName, 10);
|
|
@@ -7772,12 +7027,12 @@ var CallLogger = class {
|
|
|
7772
7027
|
}
|
|
7773
7028
|
}
|
|
7774
7029
|
try {
|
|
7775
|
-
if (
|
|
7030
|
+
if (fs3.readdirSync(monthDir).length === 0) fs3.rmdirSync(monthDir);
|
|
7776
7031
|
} catch {
|
|
7777
7032
|
}
|
|
7778
7033
|
}
|
|
7779
7034
|
try {
|
|
7780
|
-
if (
|
|
7035
|
+
if (fs3.readdirSync(yearDir).length === 0) fs3.rmdirSync(yearDir);
|
|
7781
7036
|
} catch {
|
|
7782
7037
|
}
|
|
7783
7038
|
}
|
|
@@ -7788,19 +7043,19 @@ var CallLogger = class {
|
|
|
7788
7043
|
};
|
|
7789
7044
|
function rmTree(target) {
|
|
7790
7045
|
try {
|
|
7791
|
-
for (const child of
|
|
7792
|
-
const childPath =
|
|
7793
|
-
const stat =
|
|
7046
|
+
for (const child of fs3.readdirSync(target)) {
|
|
7047
|
+
const childPath = path3.join(target, child);
|
|
7048
|
+
const stat = fs3.lstatSync(childPath);
|
|
7794
7049
|
if (stat.isDirectory()) {
|
|
7795
7050
|
rmTree(childPath);
|
|
7796
7051
|
} else {
|
|
7797
7052
|
try {
|
|
7798
|
-
|
|
7053
|
+
fs3.unlinkSync(childPath);
|
|
7799
7054
|
} catch {
|
|
7800
7055
|
}
|
|
7801
7056
|
}
|
|
7802
7057
|
}
|
|
7803
|
-
|
|
7058
|
+
fs3.rmdirSync(target);
|
|
7804
7059
|
} catch {
|
|
7805
7060
|
}
|
|
7806
7061
|
}
|
|
@@ -7848,6 +7103,19 @@ function classifyTelnyxAmd(result) {
|
|
|
7848
7103
|
if (result === "fax") return "fax";
|
|
7849
7104
|
return "unknown";
|
|
7850
7105
|
}
|
|
7106
|
+
function twilioStatusToOutcome(callStatus) {
|
|
7107
|
+
const s = (callStatus || "").toLowerCase();
|
|
7108
|
+
if (s === "no-answer") return "no_answer";
|
|
7109
|
+
if (s === "busy") return "busy";
|
|
7110
|
+
return "failed";
|
|
7111
|
+
}
|
|
7112
|
+
function telnyxHangupOutcome(cause) {
|
|
7113
|
+
const c = (cause || "").toLowerCase();
|
|
7114
|
+
if (c === "no_answer" || c === "timeout" || c === "no_user_response") return "no_answer";
|
|
7115
|
+
if (c === "user_busy" || c === "busy") return "busy";
|
|
7116
|
+
if (c === "call_rejected" || c === "rejected" || c === "destination_out_of_order") return "failed";
|
|
7117
|
+
return null;
|
|
7118
|
+
}
|
|
7851
7119
|
function validateWebhookUrl(url) {
|
|
7852
7120
|
const parsed = new URL(url);
|
|
7853
7121
|
if (!["http:", "https:"].includes(parsed.protocol)) {
|
|
@@ -7905,7 +7173,7 @@ function validateTelnyxSignature(rawBody, signature, timestamp, publicKey, toler
|
|
|
7905
7173
|
if (ageMs < 0 || ageMs > toleranceSec * 1e3) return false;
|
|
7906
7174
|
const payload = `${timestamp}|${rawBody}`;
|
|
7907
7175
|
const keyBuffer = Buffer.from(publicKey, "base64");
|
|
7908
|
-
const keyObject =
|
|
7176
|
+
const keyObject = crypto5.createPublicKey({
|
|
7909
7177
|
key: keyBuffer,
|
|
7910
7178
|
format: "der",
|
|
7911
7179
|
type: "spki"
|
|
@@ -7915,7 +7183,7 @@ function validateTelnyxSignature(rawBody, signature, timestamp, publicKey, toler
|
|
|
7915
7183
|
if (!trimmed) continue;
|
|
7916
7184
|
try {
|
|
7917
7185
|
const sigBuffer = Buffer.from(trimmed, "base64");
|
|
7918
|
-
if (
|
|
7186
|
+
if (crypto5.verify(null, Buffer.from(payload), keyObject, sigBuffer)) {
|
|
7919
7187
|
return true;
|
|
7920
7188
|
}
|
|
7921
7189
|
} catch {
|
|
@@ -7932,12 +7200,12 @@ function validateTwilioSid(sid, prefix = "CA") {
|
|
|
7932
7200
|
}
|
|
7933
7201
|
function validateTwilioSignature(url, params, signature, authToken) {
|
|
7934
7202
|
const data = url + Object.keys(params).sort().reduce((acc, key) => acc + key + (params[key] ?? ""), "");
|
|
7935
|
-
const expected =
|
|
7203
|
+
const expected = crypto5.createHmac("sha1", authToken).update(data).digest("base64");
|
|
7936
7204
|
try {
|
|
7937
7205
|
const sigBuf = Buffer.from(signature);
|
|
7938
7206
|
const expBuf = Buffer.from(expected);
|
|
7939
7207
|
if (sigBuf.length !== expBuf.length) return false;
|
|
7940
|
-
return
|
|
7208
|
+
return crypto5.timingSafeEqual(sigBuf, expBuf);
|
|
7941
7209
|
} catch {
|
|
7942
7210
|
return false;
|
|
7943
7211
|
}
|
|
@@ -8010,6 +7278,7 @@ var TwilioBridge = class {
|
|
|
8010
7278
|
config;
|
|
8011
7279
|
label = "Twilio";
|
|
8012
7280
|
telephonyProvider = "twilio";
|
|
7281
|
+
inputWireFormat = "ulaw_8000";
|
|
8013
7282
|
sendAudio(ws, audioBase64, streamSid) {
|
|
8014
7283
|
ws.send(JSON.stringify({ event: "media", streamSid, media: { payload: audioBase64 } }));
|
|
8015
7284
|
}
|
|
@@ -8097,7 +7366,7 @@ var TELNYX_DTMF_ALLOWED = new Set("0123456789*#ABCDabcdwW");
|
|
|
8097
7366
|
var TELNYX_DTMF_DURATION_MS = 250;
|
|
8098
7367
|
async function sleep(ms) {
|
|
8099
7368
|
if (ms <= 0) return;
|
|
8100
|
-
await new Promise((
|
|
7369
|
+
await new Promise((resolve2) => setTimeout(resolve2, ms));
|
|
8101
7370
|
}
|
|
8102
7371
|
var TelnyxBridge = class {
|
|
8103
7372
|
constructor(config) {
|
|
@@ -8106,6 +7375,11 @@ var TelnyxBridge = class {
|
|
|
8106
7375
|
config;
|
|
8107
7376
|
label = "Telnyx";
|
|
8108
7377
|
telephonyProvider = "telnyx";
|
|
7378
|
+
// ``streaming_start`` negotiates PCMU bidirectional by default — keeping
|
|
7379
|
+
// ``ulaw_8000`` here matches what TwilioBridge does and keeps the stream
|
|
7380
|
+
// handler's input-transcode branch in the right shape. If a deployment
|
|
7381
|
+
// overrides the negotiation to L16, this should flip to ``pcm_16000``.
|
|
7382
|
+
inputWireFormat = "ulaw_8000";
|
|
8109
7383
|
sendAudio(ws, audioBase64, _streamSid) {
|
|
8110
7384
|
ws.send(JSON.stringify({ event: "media", media: { payload: audioBase64 } }));
|
|
8111
7385
|
}
|
|
@@ -8127,7 +7401,7 @@ var TelnyxBridge = class {
|
|
|
8127
7401
|
});
|
|
8128
7402
|
getLogger().info(`Telnyx call transferred to ${toNumber}`);
|
|
8129
7403
|
}
|
|
8130
|
-
async sendDtmf(callId, digits, delayMs) {
|
|
7404
|
+
async sendDtmf(_ws, callId, digits, delayMs) {
|
|
8131
7405
|
if (!digits) {
|
|
8132
7406
|
getLogger().warn("TelnyxBridge.sendDtmf called with empty digits");
|
|
8133
7407
|
return;
|
|
@@ -8325,6 +7599,99 @@ var EmbeddedServer = class {
|
|
|
8325
7599
|
* (tests) work without further setup. See FIX #91.
|
|
8326
7600
|
*/
|
|
8327
7601
|
recordPrewarmWaste = () => void 0;
|
|
7602
|
+
/**
|
|
7603
|
+
* Per-callId completion deferreds for ``Patter.call({ wait: true })``.
|
|
7604
|
+
* Resolved by the FIRST terminal signal: the Twilio/Telnyx status callback
|
|
7605
|
+
* for no-media outcomes (no-answer / busy / failed), or ``onCallEnd`` for a
|
|
7606
|
+
* connected call (answered / voicemail). The AMD classification is recorded
|
|
7607
|
+
* per callId so the connected-call path can distinguish ``answered`` from
|
|
7608
|
+
* ``voicemail``. This is what lets ``call({ wait: true })`` resolve to a
|
|
7609
|
+
* structured {@link CallResult} without the caller hand-wiring ``onCallEnd``
|
|
7610
|
+
* to a promise. Public so ``client.ts`` can register/await + fail in-flight
|
|
7611
|
+
* waiters on ``disconnect()``. Mirrors Python's ``EmbeddedServer._completions``.
|
|
7612
|
+
*/
|
|
7613
|
+
completions = /* @__PURE__ */ new Map();
|
|
7614
|
+
/** AMD classification recorded per callId, used by the connected-call path. */
|
|
7615
|
+
amdClass = /* @__PURE__ */ new Map();
|
|
7616
|
+
// === Outbound completion registry (call({ wait: true })) ===
|
|
7617
|
+
/**
|
|
7618
|
+
* Register (or return) a completion promise for an outbound call.
|
|
7619
|
+
*
|
|
7620
|
+
* Called by ``Patter.call({ wait: true })`` immediately after the carrier
|
|
7621
|
+
* accepts the dial — the promise resolves to a {@link CallResult} once a
|
|
7622
|
+
* terminal signal arrives. Idempotent: returns the existing pending promise
|
|
7623
|
+
* if one is already registered for ``callId``. Mirrors Python's
|
|
7624
|
+
* ``register_completion``.
|
|
7625
|
+
*/
|
|
7626
|
+
registerCompletion(callId) {
|
|
7627
|
+
const existing = this.completions.get(callId);
|
|
7628
|
+
if (existing && !existing.done) {
|
|
7629
|
+
return existing.promise;
|
|
7630
|
+
}
|
|
7631
|
+
let resolve2;
|
|
7632
|
+
let reject;
|
|
7633
|
+
const promise = new Promise((res, rej) => {
|
|
7634
|
+
resolve2 = res;
|
|
7635
|
+
reject = rej;
|
|
7636
|
+
});
|
|
7637
|
+
this.completions.set(callId, { promise, resolve: resolve2, reject, done: false });
|
|
7638
|
+
return promise;
|
|
7639
|
+
}
|
|
7640
|
+
/** Drop a registered completion (e.g. on a backstop timeout) without resolving it. */
|
|
7641
|
+
deleteCompletion(callId) {
|
|
7642
|
+
this.completions.delete(callId);
|
|
7643
|
+
this.amdClass.delete(callId);
|
|
7644
|
+
}
|
|
7645
|
+
/**
|
|
7646
|
+
* Resolve a pending completion with a {@link CallResult}.
|
|
7647
|
+
*
|
|
7648
|
+
* No-op when no completion is registered for ``callId`` (the common case —
|
|
7649
|
+
* most calls are placed without ``wait: true``) or it is already done.
|
|
7650
|
+
* Builds the result from the ``onCallEnd`` payload when ``data`` is provided
|
|
7651
|
+
* (connected calls carry transcript + {@link CallMetrics}); no-media
|
|
7652
|
+
* outcomes pass ``data`` undefined and yield an empty transcript / no cost.
|
|
7653
|
+
* Mirrors Python's ``_resolve_completion``.
|
|
7654
|
+
*/
|
|
7655
|
+
resolveCompletion(callId, args) {
|
|
7656
|
+
const entry = this.completions.get(callId);
|
|
7657
|
+
if (!entry || entry.done) return;
|
|
7658
|
+
const data = args.data;
|
|
7659
|
+
const metrics = data?.metrics ?? null;
|
|
7660
|
+
const cost = metrics?.cost ?? null;
|
|
7661
|
+
const durationRaw = metrics?.duration_seconds;
|
|
7662
|
+
const duration = typeof durationRaw === "number" ? durationRaw : 0;
|
|
7663
|
+
const transcriptRaw = data?.transcript;
|
|
7664
|
+
const transcript = Array.isArray(transcriptRaw) ? transcriptRaw : [];
|
|
7665
|
+
const result = {
|
|
7666
|
+
callId,
|
|
7667
|
+
outcome: args.outcome,
|
|
7668
|
+
status: args.status,
|
|
7669
|
+
durationSeconds: duration,
|
|
7670
|
+
transcript,
|
|
7671
|
+
cost,
|
|
7672
|
+
metrics
|
|
7673
|
+
};
|
|
7674
|
+
entry.done = true;
|
|
7675
|
+
entry.resolve(result);
|
|
7676
|
+
this.completions.delete(callId);
|
|
7677
|
+
this.amdClass.delete(callId);
|
|
7678
|
+
}
|
|
7679
|
+
/**
|
|
7680
|
+
* Fail every in-flight completion with ``error``. Called by
|
|
7681
|
+
* ``Patter.disconnect()`` so a ``call({ wait: true })`` awaiter does not
|
|
7682
|
+
* hang until its backstop timeout once the server is gone. Mirrors the
|
|
7683
|
+
* Python ``disconnect()`` change that fails in-flight ``wait=True`` awaiters.
|
|
7684
|
+
*/
|
|
7685
|
+
failPendingCompletions(error) {
|
|
7686
|
+
for (const entry of this.completions.values()) {
|
|
7687
|
+
if (!entry.done) {
|
|
7688
|
+
entry.done = true;
|
|
7689
|
+
entry.reject(error);
|
|
7690
|
+
}
|
|
7691
|
+
}
|
|
7692
|
+
this.completions.clear();
|
|
7693
|
+
this.amdClass.clear();
|
|
7694
|
+
}
|
|
8328
7695
|
/** Bind HTTP + WebSocket listeners on `port`, mount carrier webhooks and dashboard routes. */
|
|
8329
7696
|
async start(port = 8e3) {
|
|
8330
7697
|
const webhookUrlPattern = /^[a-zA-Z0-9][a-zA-Z0-9.\-]+[a-zA-Z0-9]$/;
|
|
@@ -8388,8 +7755,10 @@ var EmbeddedServer = class {
|
|
|
8388
7755
|
return;
|
|
8389
7756
|
}
|
|
8390
7757
|
const body = req.body;
|
|
8391
|
-
const
|
|
8392
|
-
const
|
|
7758
|
+
const rawCallSid = body["CallSid"] ?? "";
|
|
7759
|
+
const rawCallStatus = body["CallStatus"] ?? "";
|
|
7760
|
+
const callSid = sanitizeLogValue(rawCallSid);
|
|
7761
|
+
const callStatus = sanitizeLogValue(rawCallStatus);
|
|
8393
7762
|
const duration = body["CallDuration"] ?? body["Duration"] ?? "";
|
|
8394
7763
|
getLogger().info(
|
|
8395
7764
|
`Twilio status ${callStatus} for call ${callSid} (duration=${duration})`
|
|
@@ -8406,6 +7775,10 @@ var EmbeddedServer = class {
|
|
|
8406
7775
|
} catch (err) {
|
|
8407
7776
|
getLogger().debug(`recordPrewarmWaste threw: ${String(err)}`);
|
|
8408
7777
|
}
|
|
7778
|
+
this.resolveCompletion(rawCallSid, {
|
|
7779
|
+
outcome: twilioStatusToOutcome(rawCallStatus),
|
|
7780
|
+
status: rawCallStatus
|
|
7781
|
+
});
|
|
8409
7782
|
}
|
|
8410
7783
|
res.status(204).send();
|
|
8411
7784
|
});
|
|
@@ -8448,6 +7821,9 @@ var EmbeddedServer = class {
|
|
|
8448
7821
|
const answeredBy = body["AnsweredBy"] ?? "";
|
|
8449
7822
|
const callSid = body["CallSid"] ?? "";
|
|
8450
7823
|
getLogger().info(`AMD result for ${sanitizeLogValue(callSid)}: ${sanitizeLogValue(answeredBy)}`);
|
|
7824
|
+
if (callSid) {
|
|
7825
|
+
this.amdClass.set(callSid, classifyTwilioAmd(answeredBy));
|
|
7826
|
+
}
|
|
8451
7827
|
const cb = this.onMachineDetection;
|
|
8452
7828
|
if (cb && callSid) {
|
|
8453
7829
|
try {
|
|
@@ -8573,6 +7949,9 @@ var EmbeddedServer = class {
|
|
|
8573
7949
|
getLogger().info(
|
|
8574
7950
|
`Telnyx AMD result for ${sanitizeLogValue(amdCallId)}: ${sanitizeLogValue(amdResult)}`
|
|
8575
7951
|
);
|
|
7952
|
+
if (amdCallId) {
|
|
7953
|
+
this.amdClass.set(amdCallId, classifyTelnyxAmd(amdResult));
|
|
7954
|
+
}
|
|
8576
7955
|
const cbTx = this.onMachineDetection;
|
|
8577
7956
|
if (cbTx && amdCallId) {
|
|
8578
7957
|
try {
|
|
@@ -8609,6 +7988,13 @@ var EmbeddedServer = class {
|
|
|
8609
7988
|
} catch (err) {
|
|
8610
7989
|
getLogger().debug(`recordPrewarmWaste threw: ${String(err)}`);
|
|
8611
7990
|
}
|
|
7991
|
+
const noMediaOutcome = telnyxHangupOutcome(hangupCause);
|
|
7992
|
+
if (noMediaOutcome !== null) {
|
|
7993
|
+
this.resolveCompletion(hangupCallId, {
|
|
7994
|
+
outcome: noMediaOutcome,
|
|
7995
|
+
status: hangupCause
|
|
7996
|
+
});
|
|
7997
|
+
}
|
|
8612
7998
|
}
|
|
8613
7999
|
return res.status(200).send();
|
|
8614
8000
|
}
|
|
@@ -8661,6 +8047,121 @@ var EmbeddedServer = class {
|
|
|
8661
8047
|
}
|
|
8662
8048
|
return res.status(200).send();
|
|
8663
8049
|
});
|
|
8050
|
+
const validatePlivoRequest = (req, res) => {
|
|
8051
|
+
const authToken = this.config.plivoAuthToken;
|
|
8052
|
+
if (!authToken) {
|
|
8053
|
+
if (this.config.requireSignature !== false) {
|
|
8054
|
+
getLogger().error(
|
|
8055
|
+
"Plivo webhook rejected: plivoAuthToken not configured and requireSignature is not false"
|
|
8056
|
+
);
|
|
8057
|
+
res.status(503).send("Webhook signature required");
|
|
8058
|
+
return false;
|
|
8059
|
+
}
|
|
8060
|
+
return true;
|
|
8061
|
+
}
|
|
8062
|
+
const method = req.method.toUpperCase();
|
|
8063
|
+
const params = method === "POST" && req.body && typeof req.body === "object" ? Object.fromEntries(
|
|
8064
|
+
Object.entries(req.body).map(([k, v]) => [k, String(v)])
|
|
8065
|
+
) : {};
|
|
8066
|
+
const signature = req.headers["x-plivo-signature-v3"] || "";
|
|
8067
|
+
const nonce = req.headers["x-plivo-signature-v3-nonce"] || "";
|
|
8068
|
+
const url = `https://${this.config.webhookUrl}${req.originalUrl}`;
|
|
8069
|
+
if (!validatePlivoSignature(url, nonce, signature, authToken, params, method)) {
|
|
8070
|
+
getLogger().warn("Plivo webhook rejected: invalid or missing V3 signature");
|
|
8071
|
+
res.status(403).send("Invalid signature");
|
|
8072
|
+
return false;
|
|
8073
|
+
}
|
|
8074
|
+
return true;
|
|
8075
|
+
};
|
|
8076
|
+
app.post("/webhooks/plivo/voice", (req, res) => {
|
|
8077
|
+
if (!validatePlivoRequest(req, res)) return;
|
|
8078
|
+
const body = req.body ?? {};
|
|
8079
|
+
const callUuid = body["CallUUID"] ?? "";
|
|
8080
|
+
const caller = body["From"] ?? "";
|
|
8081
|
+
const callee = body["To"] ?? "";
|
|
8082
|
+
const qs = `?caller=${encodeURIComponent(caller)}&callee=${encodeURIComponent(callee)}`;
|
|
8083
|
+
const streamUrl = `wss://${this.config.webhookUrl}/ws/plivo/stream/${callUuid || "outbound"}${qs}`;
|
|
8084
|
+
const xml = PlivoAdapter.generateStreamXml(streamUrl, "audio/x-mulaw;rate=8000", {
|
|
8085
|
+
"X-PH-caller": caller,
|
|
8086
|
+
"X-PH-callee": callee
|
|
8087
|
+
});
|
|
8088
|
+
res.type("text/xml").send(xml);
|
|
8089
|
+
});
|
|
8090
|
+
app.post("/webhooks/plivo/status", (req, res) => {
|
|
8091
|
+
if (!validatePlivoRequest(req, res)) return;
|
|
8092
|
+
const body = req.body ?? {};
|
|
8093
|
+
const callUuid = body["CallUUID"] ?? "";
|
|
8094
|
+
const callStatus = body["CallStatus"] ?? body["Status"] ?? "";
|
|
8095
|
+
const duration = body["Duration"] ?? body["BillDuration"] ?? "";
|
|
8096
|
+
getLogger().info(
|
|
8097
|
+
`Plivo status ${sanitizeLogValue(callStatus)} for call ${sanitizeLogValue(callUuid)} (duration=${duration})`
|
|
8098
|
+
);
|
|
8099
|
+
if (callUuid && callStatus) {
|
|
8100
|
+
const extra = {};
|
|
8101
|
+
const parsed = parseFloat(duration);
|
|
8102
|
+
if (!Number.isNaN(parsed)) extra.duration_seconds = parsed;
|
|
8103
|
+
this.metricsStore.updateCallStatus(callUuid, callStatus, extra);
|
|
8104
|
+
}
|
|
8105
|
+
if (callUuid && ["no-answer", "busy", "failed", "timeout", "cancel"].includes(callStatus)) {
|
|
8106
|
+
try {
|
|
8107
|
+
this.recordPrewarmWaste(callUuid);
|
|
8108
|
+
} catch (err) {
|
|
8109
|
+
getLogger().debug(`recordPrewarmWaste threw: ${String(err)}`);
|
|
8110
|
+
}
|
|
8111
|
+
const outcome = callStatus === "no-answer" || callStatus === "timeout" ? "no_answer" : callStatus === "busy" ? "busy" : "failed";
|
|
8112
|
+
this.resolveCompletion(callUuid, { outcome, status: callStatus });
|
|
8113
|
+
}
|
|
8114
|
+
res.status(200).send();
|
|
8115
|
+
});
|
|
8116
|
+
app.post("/webhooks/plivo/amd", async (req, res) => {
|
|
8117
|
+
if (!validatePlivoRequest(req, res)) return;
|
|
8118
|
+
const body = req.body ?? {};
|
|
8119
|
+
const callUuid = body["CallUUID"] ?? "";
|
|
8120
|
+
const amdRaw = body["Machine"] || body["MachineDetection"] || body["AnsweredBy"] || body["CallStatus"] || "";
|
|
8121
|
+
getLogger().info(`AMD result for ${sanitizeLogValue(callUuid)}: ${sanitizeLogValue(amdRaw)}`);
|
|
8122
|
+
const classification = classifyPlivoAmd(amdRaw);
|
|
8123
|
+
if (callUuid) this.amdClass.set(callUuid, classification);
|
|
8124
|
+
const cb = this.onMachineDetection;
|
|
8125
|
+
if (cb && callUuid) {
|
|
8126
|
+
try {
|
|
8127
|
+
await cb({
|
|
8128
|
+
call_id: callUuid,
|
|
8129
|
+
carrier: "plivo",
|
|
8130
|
+
classification,
|
|
8131
|
+
raw: amdRaw,
|
|
8132
|
+
detected_at: Date.now() / 1e3
|
|
8133
|
+
});
|
|
8134
|
+
} catch (err) {
|
|
8135
|
+
getLogger().warn(`onMachineDetection callback threw: ${sanitizeLogValue(String(err))}`);
|
|
8136
|
+
}
|
|
8137
|
+
}
|
|
8138
|
+
if (classification === "machine" && callUuid) {
|
|
8139
|
+
try {
|
|
8140
|
+
this.recordPrewarmWaste(callUuid);
|
|
8141
|
+
} catch (err) {
|
|
8142
|
+
getLogger().debug(`recordPrewarmWaste threw: ${String(err)}`);
|
|
8143
|
+
}
|
|
8144
|
+
if (this.voicemailMessage && this.config.plivoAuthId && this.config.plivoAuthToken) {
|
|
8145
|
+
await dropPlivoVoicemail(
|
|
8146
|
+
callUuid,
|
|
8147
|
+
this.voicemailMessage,
|
|
8148
|
+
this.config.plivoAuthId,
|
|
8149
|
+
this.config.plivoAuthToken
|
|
8150
|
+
);
|
|
8151
|
+
}
|
|
8152
|
+
}
|
|
8153
|
+
res.status(200).send();
|
|
8154
|
+
});
|
|
8155
|
+
app.all("/webhooks/plivo/transfer", (req, res) => {
|
|
8156
|
+
if (!validatePlivoRequest(req, res)) return;
|
|
8157
|
+
const to = String(req.query.to ?? "");
|
|
8158
|
+
if (!to || !/^\+[1-9]\d{6,14}$/.test(to)) {
|
|
8159
|
+
getLogger().warn(`Plivo transfer XML: invalid target ${JSON.stringify(to)}`);
|
|
8160
|
+
res.type("text/xml").send("<Response><Hangup/></Response>");
|
|
8161
|
+
return;
|
|
8162
|
+
}
|
|
8163
|
+
res.type("text/xml").send(`<Response><Dial><Number>${xmlEscape(to)}</Number></Dial></Response>`);
|
|
8164
|
+
});
|
|
8664
8165
|
this.server = createServer(app);
|
|
8665
8166
|
this.wss = new WebSocketServer({ noServer: true });
|
|
8666
8167
|
const MAX_WS_PER_IP = 10;
|
|
@@ -8693,14 +8194,16 @@ var EmbeddedServer = class {
|
|
|
8693
8194
|
ws.once("close", () => {
|
|
8694
8195
|
this.activeConnections.delete(ws);
|
|
8695
8196
|
});
|
|
8696
|
-
const
|
|
8697
|
-
if (
|
|
8197
|
+
const provider2 = this.config.telephonyProvider;
|
|
8198
|
+
if (provider2 === "telnyx") {
|
|
8698
8199
|
this.handleTelnyxStream(ws, url);
|
|
8200
|
+
} else if (provider2 === "plivo") {
|
|
8201
|
+
this.handlePlivoStream(ws, url);
|
|
8699
8202
|
} else {
|
|
8700
8203
|
this.handleTwilioStream(ws, url);
|
|
8701
8204
|
}
|
|
8702
8205
|
});
|
|
8703
|
-
await new Promise((
|
|
8206
|
+
await new Promise((resolve2) => {
|
|
8704
8207
|
const bindHost = process.env.PATTER_BIND_HOST ?? "127.0.0.1";
|
|
8705
8208
|
this.server.listen(port, bindHost, () => {
|
|
8706
8209
|
getLogger().info(`Server on port ${port}`);
|
|
@@ -8722,7 +8225,7 @@ var EmbeddedServer = class {
|
|
|
8722
8225
|
}
|
|
8723
8226
|
console.log("\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\n");
|
|
8724
8227
|
}
|
|
8725
|
-
|
|
8228
|
+
resolve2();
|
|
8726
8229
|
});
|
|
8727
8230
|
});
|
|
8728
8231
|
}
|
|
@@ -8765,7 +8268,7 @@ var EmbeddedServer = class {
|
|
|
8765
8268
|
`Telnyx voicemail speak failed: ${speakResp.status} ${(await speakResp.text()).slice(0, 200)}`
|
|
8766
8269
|
);
|
|
8767
8270
|
}
|
|
8768
|
-
await new Promise((
|
|
8271
|
+
await new Promise((resolve2) => setTimeout(resolve2, estimatedMs));
|
|
8769
8272
|
await fetch(`https://api.telnyx.com/v2/calls/${encoded}/actions/hangup`, {
|
|
8770
8273
|
method: "POST",
|
|
8771
8274
|
headers,
|
|
@@ -8836,9 +8339,11 @@ var EmbeddedServer = class {
|
|
|
8836
8339
|
const active = callId ? store.getActive(callId) : void 0;
|
|
8837
8340
|
const resolvedCaller = dataCaller || active?.caller || "";
|
|
8838
8341
|
const resolvedCallee = dataCallee || active?.callee || "";
|
|
8342
|
+
const resolvedDirection = (typeof data.direction === "string" ? data.direction : "") || active?.direction || "inbound";
|
|
8839
8343
|
void logger.logCallStart(callId, {
|
|
8840
8344
|
caller: resolvedCaller,
|
|
8841
8345
|
callee: resolvedCallee,
|
|
8346
|
+
direction: resolvedDirection,
|
|
8842
8347
|
telephonyProvider: bridge.telephonyProvider,
|
|
8843
8348
|
providerMode: agent.provider ?? "",
|
|
8844
8349
|
agent: agentSnapshot()
|
|
@@ -8877,6 +8382,12 @@ var EmbeddedServer = class {
|
|
|
8877
8382
|
}).catch((err) => getLogger().error(`call_log end error: ${String(err)}`));
|
|
8878
8383
|
}
|
|
8879
8384
|
if (userEnd) await userEnd(data);
|
|
8385
|
+
const cid = typeof data.call_id === "string" ? data.call_id : "";
|
|
8386
|
+
if (cid) {
|
|
8387
|
+
const cls = this.amdClass.get(cid);
|
|
8388
|
+
const outcome = cls === "machine" ? "voicemail" : "answered";
|
|
8389
|
+
this.resolveCompletion(cid, { outcome, status: "completed", data });
|
|
8390
|
+
}
|
|
8880
8391
|
};
|
|
8881
8392
|
return [wrappedStart, wrappedMetrics, wrappedEnd];
|
|
8882
8393
|
}
|
|
@@ -8983,6 +8494,52 @@ var EmbeddedServer = class {
|
|
|
8983
8494
|
});
|
|
8984
8495
|
}
|
|
8985
8496
|
// ---------------------------------------------------------------------------
|
|
8497
|
+
// Plivo WebSocket message parser (thin layer)
|
|
8498
|
+
// ---------------------------------------------------------------------------
|
|
8499
|
+
handlePlivoStream(ws, url) {
|
|
8500
|
+
const caller = url.searchParams.get("caller") ?? "";
|
|
8501
|
+
const callee = url.searchParams.get("callee") ?? "";
|
|
8502
|
+
const bridge = new PlivoBridge(this.config);
|
|
8503
|
+
const handler = new StreamHandler(this.buildStreamHandlerDeps(bridge), ws, caller, callee);
|
|
8504
|
+
ws.on("message", async (raw) => {
|
|
8505
|
+
try {
|
|
8506
|
+
let data;
|
|
8507
|
+
try {
|
|
8508
|
+
data = JSON.parse(raw.toString());
|
|
8509
|
+
} catch (e) {
|
|
8510
|
+
getLogger().error("Failed to parse Plivo WS message:", e);
|
|
8511
|
+
return;
|
|
8512
|
+
}
|
|
8513
|
+
const event = data.event ?? "";
|
|
8514
|
+
if (event === "start") {
|
|
8515
|
+
handler.setStreamSid(data.start?.streamId ?? "");
|
|
8516
|
+
const callId = data.start?.callId ?? "";
|
|
8517
|
+
if (callId) this.activeCallIds.set(ws, callId);
|
|
8518
|
+
await handler.handleCallStart(callId);
|
|
8519
|
+
} else if (event === "media") {
|
|
8520
|
+
const payload = data.media?.payload ?? "";
|
|
8521
|
+
if (payload) handler.handleAudio(Buffer.from(payload, "base64"));
|
|
8522
|
+
} else if (event === "playedStream") {
|
|
8523
|
+
const markName = String(data.name ?? "");
|
|
8524
|
+
if (markName) await handler.onMark(markName);
|
|
8525
|
+
} else if (event === "dtmf") {
|
|
8526
|
+
const digit = String(data.dtmf?.digit ?? "").trim();
|
|
8527
|
+
if (digit) await handler.handleDtmf(digit);
|
|
8528
|
+
} else if (event === "playFailed" || event === "error") {
|
|
8529
|
+
getLogger().warn(`Plivo ${event}: ${data.reason ?? "unknown"}`);
|
|
8530
|
+
} else if (event === "stop") {
|
|
8531
|
+
await handler.handleStop();
|
|
8532
|
+
}
|
|
8533
|
+
} catch (err) {
|
|
8534
|
+
getLogger().error("Stream handler error (Plivo):", err);
|
|
8535
|
+
}
|
|
8536
|
+
});
|
|
8537
|
+
ws.on("close", async () => {
|
|
8538
|
+
this.activeCallIds.delete(ws);
|
|
8539
|
+
await handler.handleWsClose();
|
|
8540
|
+
});
|
|
8541
|
+
}
|
|
8542
|
+
// ---------------------------------------------------------------------------
|
|
8986
8543
|
// Graceful shutdown
|
|
8987
8544
|
// ---------------------------------------------------------------------------
|
|
8988
8545
|
/**
|
|
@@ -8996,13 +8553,13 @@ var EmbeddedServer = class {
|
|
|
8996
8553
|
*/
|
|
8997
8554
|
async stop() {
|
|
8998
8555
|
if (!this.server) return;
|
|
8999
|
-
const httpClosePromise = new Promise((
|
|
9000
|
-
this.server.close(() =>
|
|
8556
|
+
const httpClosePromise = new Promise((resolve2) => {
|
|
8557
|
+
this.server.close(() => resolve2());
|
|
9001
8558
|
});
|
|
9002
|
-
const
|
|
8559
|
+
const provider2 = this.config.telephonyProvider;
|
|
9003
8560
|
for (const [ws, callId] of this.activeCallIds) {
|
|
9004
8561
|
try {
|
|
9005
|
-
const bridge =
|
|
8562
|
+
const bridge = provider2 === "telnyx" ? new TelnyxBridge(this.config) : provider2 === "plivo" ? new PlivoBridge(this.config) : new TwilioBridge(this.config);
|
|
9006
8563
|
await bridge.endCall(callId, ws);
|
|
9007
8564
|
} catch {
|
|
9008
8565
|
}
|
|
@@ -9017,15 +8574,15 @@ var EmbeddedServer = class {
|
|
|
9017
8574
|
if (this.activeConnections.size > 0) {
|
|
9018
8575
|
getLogger().info(`Waiting for ${this.activeConnections.size} active connection(s) to close...`);
|
|
9019
8576
|
await Promise.race([
|
|
9020
|
-
new Promise((
|
|
8577
|
+
new Promise((resolve2) => {
|
|
9021
8578
|
const checkInterval = setInterval(() => {
|
|
9022
8579
|
if (this.activeConnections.size === 0) {
|
|
9023
8580
|
clearInterval(checkInterval);
|
|
9024
|
-
|
|
8581
|
+
resolve2();
|
|
9025
8582
|
}
|
|
9026
8583
|
}, 100);
|
|
9027
8584
|
}),
|
|
9028
|
-
new Promise((
|
|
8585
|
+
new Promise((resolve2) => setTimeout(resolve2, GRACEFUL_SHUTDOWN_TIMEOUT_MS))
|
|
9029
8586
|
]);
|
|
9030
8587
|
}
|
|
9031
8588
|
if (this.activeConnections.size > 0) {
|
|
@@ -9782,7 +9339,7 @@ var TestSession = class {
|
|
|
9782
9339
|
input: process.stdin,
|
|
9783
9340
|
output: process.stdout
|
|
9784
9341
|
});
|
|
9785
|
-
const askQuestion = (prompt) => new Promise((
|
|
9342
|
+
const askQuestion = (prompt) => new Promise((resolve2) => rl.question(prompt, resolve2));
|
|
9786
9343
|
try {
|
|
9787
9344
|
while (!ended) {
|
|
9788
9345
|
let userInput;
|
|
@@ -9881,26 +9438,19 @@ export {
|
|
|
9881
9438
|
AuthenticationError,
|
|
9882
9439
|
ProvisionError,
|
|
9883
9440
|
RateLimitError,
|
|
9884
|
-
OpenAIRealtimeAdapter,
|
|
9885
|
-
mulawToPcm16,
|
|
9886
|
-
pcm16ToMulaw,
|
|
9887
|
-
PcmCarry,
|
|
9888
|
-
StatefulResampler,
|
|
9889
|
-
createResampler16kTo8k,
|
|
9890
|
-
createResampler8kTo16k,
|
|
9891
|
-
createResampler24kTo16k,
|
|
9892
|
-
createResampler24kTo8k,
|
|
9893
|
-
resample8kTo16k,
|
|
9894
|
-
resample16kTo8k,
|
|
9895
|
-
resample24kTo16k,
|
|
9896
|
-
OpenAIRealtime2Adapter,
|
|
9897
9441
|
ElevenLabsConvAIAdapter,
|
|
9442
|
+
PlivoAdapter,
|
|
9443
|
+
Carrier,
|
|
9444
|
+
PRICING_VERSION,
|
|
9445
|
+
PRICING_LAST_UPDATED,
|
|
9446
|
+
PricingUnit,
|
|
9898
9447
|
DEFAULT_PRICING,
|
|
9899
9448
|
mergePricing,
|
|
9900
9449
|
calculateSttCost,
|
|
9901
9450
|
calculateTtsCost,
|
|
9902
9451
|
calculateRealtimeCost,
|
|
9903
9452
|
calculateTelephonyCost,
|
|
9453
|
+
VERSION,
|
|
9904
9454
|
MetricsStore,
|
|
9905
9455
|
makeAuthMiddleware,
|
|
9906
9456
|
callsToCsv,
|
|
@@ -9910,6 +9460,7 @@ export {
|
|
|
9910
9460
|
RemoteMessageHandler,
|
|
9911
9461
|
isRemoteUrl,
|
|
9912
9462
|
isWebSocketUrl,
|
|
9463
|
+
DeepgramModel,
|
|
9913
9464
|
DeepgramSTT,
|
|
9914
9465
|
CallMetricsAccumulator,
|
|
9915
9466
|
SPAN_CALL,
|