getpatter 0.6.3 → 0.6.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +5 -4
- package/dist/{carrier-config-3WDQXP5J.mjs → carrier-config-7YGNRBPO.mjs} +17 -11
- package/dist/{chunk-R2T4JABZ.mjs → chunk-3VVATR6A.mjs} +8 -6
- package/dist/{chunk-Z6W5XFWS.mjs → chunk-7IIV3BY4.mjs} +981 -196
- package/dist/{chunk-CL2U3YET.mjs → chunk-BO227NTF.mjs} +271 -54
- package/dist/cli.js +63 -20
- package/dist/dashboard/ui.html +10 -10
- package/dist/index.d.mts +867 -187
- package/dist/index.d.ts +867 -187
- package/dist/index.js +1785 -517
- package/dist/index.mjs +501 -250
- package/dist/{openai-realtime-2-CNFARP25.mjs → openai-realtime-2-L5EKAAUH.mjs} +1 -1
- package/dist/{silero-vad-LNDFGIY7.mjs → silero-vad-RGF5HCIR.mjs} +1 -1
- package/dist/{test-mode-MDBQ4ECE.mjs → test-mode-4QLLWYVV.mjs} +2 -2
- package/package.json +2 -1
- package/src/dashboard/ui.html +10 -10
package/dist/index.js
CHANGED
|
@@ -49,7 +49,7 @@ var init_cjs_shims = __esm({
|
|
|
49
49
|
});
|
|
50
50
|
|
|
51
51
|
// src/errors.ts
|
|
52
|
-
var ErrorCode, PatterError, PatterConnectionError, AuthenticationError, ProvisionError, RateLimitError;
|
|
52
|
+
var ErrorCode, PatterError, PatterConfigError, PatterConnectionError, AuthenticationError, ProvisionError, RateLimitError;
|
|
53
53
|
var init_errors = __esm({
|
|
54
54
|
"src/errors.ts"() {
|
|
55
55
|
"use strict";
|
|
@@ -85,6 +85,12 @@ var init_errors = __esm({
|
|
|
85
85
|
this.code = options?.code ?? ErrorCode.INTERNAL;
|
|
86
86
|
}
|
|
87
87
|
};
|
|
88
|
+
PatterConfigError = class extends PatterError {
|
|
89
|
+
constructor(message, options) {
|
|
90
|
+
super(message, { code: options?.code ?? ErrorCode.CONFIG });
|
|
91
|
+
this.name = "PatterConfigError";
|
|
92
|
+
}
|
|
93
|
+
};
|
|
88
94
|
PatterConnectionError = class extends PatterError {
|
|
89
95
|
constructor(message, options) {
|
|
90
96
|
super(message, { code: options?.code ?? ErrorCode.CONNECTION });
|
|
@@ -136,6 +142,45 @@ var init_logger = __esm({
|
|
|
136
142
|
});
|
|
137
143
|
|
|
138
144
|
// src/providers/openai-realtime.ts
|
|
145
|
+
function validateRealtimeTurnDetection(td) {
|
|
146
|
+
if (td === void 0) return;
|
|
147
|
+
if (td.type !== void 0 && td.type !== "server_vad" && td.type !== "semantic_vad") {
|
|
148
|
+
throw new Error(
|
|
149
|
+
`RealtimeTurnDetection.type must be 'server_vad' or 'semantic_vad', got ${JSON.stringify(td.type)}`
|
|
150
|
+
);
|
|
151
|
+
}
|
|
152
|
+
if (td.eagerness !== void 0 && td.eagerness !== "low" && td.eagerness !== "medium" && td.eagerness !== "high" && td.eagerness !== "auto") {
|
|
153
|
+
throw new Error(
|
|
154
|
+
`RealtimeTurnDetection.eagerness must be one of low|medium|high|auto, got ${JSON.stringify(td.eagerness)}`
|
|
155
|
+
);
|
|
156
|
+
}
|
|
157
|
+
if (td.eagerness !== void 0 && td.type !== "semantic_vad") {
|
|
158
|
+
throw new Error(
|
|
159
|
+
"RealtimeTurnDetection.eagerness is only valid when type='semantic_vad'"
|
|
160
|
+
);
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
function buildTurnDetection(td, opts) {
|
|
164
|
+
validateRealtimeTurnDetection(td);
|
|
165
|
+
let detection;
|
|
166
|
+
if (td?.type === "semantic_vad") {
|
|
167
|
+
detection = { type: "semantic_vad" };
|
|
168
|
+
if (td.eagerness !== void 0) detection.eagerness = td.eagerness;
|
|
169
|
+
} else {
|
|
170
|
+
detection = {
|
|
171
|
+
type: td?.type ?? opts.defaultType,
|
|
172
|
+
threshold: td?.threshold ?? 0.5,
|
|
173
|
+
prefix_padding_ms: td?.prefixPaddingMs ?? 300,
|
|
174
|
+
silence_duration_ms: td?.silenceDurationMs ?? opts.defaultSilenceMs
|
|
175
|
+
};
|
|
176
|
+
}
|
|
177
|
+
if (opts.includeResponseGating) {
|
|
178
|
+
const serverManaged = !(opts.gateResponseOnTranscript ?? false);
|
|
179
|
+
detection.create_response = serverManaged;
|
|
180
|
+
detection.interrupt_response = serverManaged;
|
|
181
|
+
}
|
|
182
|
+
return detection;
|
|
183
|
+
}
|
|
139
184
|
function estimateAudioMs(chunk, format) {
|
|
140
185
|
if (chunk.length === 0) return 0;
|
|
141
186
|
if (format === OpenAIRealtimeAudioFormat.G711_ULAW || format === OpenAIRealtimeAudioFormat.G711_ALAW)
|
|
@@ -196,6 +241,7 @@ var init_openai_realtime = __esm({
|
|
|
196
241
|
this.tools = tools;
|
|
197
242
|
this.audioFormat = audioFormat;
|
|
198
243
|
this.options = options;
|
|
244
|
+
this.gateResponseOnTranscript = options.gateResponseOnTranscript ?? false;
|
|
199
245
|
}
|
|
200
246
|
apiKey;
|
|
201
247
|
model;
|
|
@@ -225,6 +271,23 @@ var init_openai_realtime = __esm({
|
|
|
225
271
|
// could have produced, which is what the user actually heard.
|
|
226
272
|
currentResponseFirstAudioAt = null;
|
|
227
273
|
options;
|
|
274
|
+
// When true, the stream handler waits for the Whisper ``transcript_input``
|
|
275
|
+
// event before requesting the model response (legacy behavior). When false
|
|
276
|
+
// (default) the response is requested on ``speech_stopped`` and the
|
|
277
|
+
// transcript is display-only. Read by the stream handler via
|
|
278
|
+
// ``getGateResponseOnTranscript()``.
|
|
279
|
+
gateResponseOnTranscript;
|
|
280
|
+
/**
|
|
281
|
+
* Whether the stream handler should gate the model response on the Whisper
|
|
282
|
+
* transcript (legacy) or fire it on `speech_stopped` (default, decoupled).
|
|
283
|
+
*
|
|
284
|
+
* `false` (default) — the response is requested on `speech_stopped`,
|
|
285
|
+
* independently of Whisper. `true` — the response is requested only after
|
|
286
|
+
* `transcript_input` passes the hallucination filter.
|
|
287
|
+
*/
|
|
288
|
+
getGateResponseOnTranscript() {
|
|
289
|
+
return this.gateResponseOnTranscript;
|
|
290
|
+
}
|
|
228
291
|
/**
|
|
229
292
|
* Build the production session.update body. Mirrors the body sent
|
|
230
293
|
* inside `connect()` so warmup can apply identical configuration to
|
|
@@ -236,16 +299,26 @@ var init_openai_realtime = __esm({
|
|
|
236
299
|
output_audio_format: this.audioFormat,
|
|
237
300
|
voice: this.voice,
|
|
238
301
|
instructions: this.instructions || "You are a helpful voice assistant. Be concise.",
|
|
239
|
-
turn_detection
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
302
|
+
// v1 turn_detection carries NO create_response / interrupt_response
|
|
303
|
+
// keys. The v1 server defaults (`create_response: true`,
|
|
304
|
+
// `interrupt_response: true`) ARE the server-managed behaviour we want by
|
|
305
|
+
// default, so omitting them is equivalent to sending `true` — gating
|
|
306
|
+
// disabled here. `gateResponseOnTranscript` is still threaded through for
|
|
307
|
+
// symmetry with the GA builder, but has no wire effect while
|
|
308
|
+
// includeResponseGating is false.
|
|
309
|
+
turn_detection: buildTurnDetection(this.options.turnDetection, {
|
|
310
|
+
defaultType: this.options.vadType ?? OpenAIRealtimeVADType.SERVER_VAD,
|
|
311
|
+
defaultSilenceMs: this.options.silenceDurationMs ?? 300,
|
|
312
|
+
includeResponseGating: false,
|
|
313
|
+
gateResponseOnTranscript: this.gateResponseOnTranscript
|
|
314
|
+
}),
|
|
245
315
|
input_audio_transcription: {
|
|
246
316
|
model: this.options.inputAudioTranscriptionModel ?? OpenAITranscriptionModel.WHISPER_1
|
|
247
317
|
}
|
|
248
318
|
};
|
|
319
|
+
if (this.options.noiseReduction !== void 0) {
|
|
320
|
+
config2.input_audio_noise_reduction = { type: this.options.noiseReduction };
|
|
321
|
+
}
|
|
249
322
|
if (this.options.temperature !== void 0) config2.temperature = this.options.temperature;
|
|
250
323
|
if (this.options.maxResponseOutputTokens !== void 0) {
|
|
251
324
|
config2.max_response_output_tokens = this.options.maxResponseOutputTokens;
|
|
@@ -509,6 +582,10 @@ var init_openai_realtime = __esm({
|
|
|
509
582
|
};
|
|
510
583
|
const timer = setTimeout(() => {
|
|
511
584
|
cleanup();
|
|
585
|
+
try {
|
|
586
|
+
ws.close();
|
|
587
|
+
} catch {
|
|
588
|
+
}
|
|
512
589
|
reject(new Error("OpenAI Realtime park connect timeout"));
|
|
513
590
|
}, 8e3);
|
|
514
591
|
ws.on("message", onMessage);
|
|
@@ -603,20 +680,33 @@ var init_openai_realtime = __esm({
|
|
|
603
680
|
dispatch("error", { type: "socket_error", message: err?.message ?? String(err) });
|
|
604
681
|
});
|
|
605
682
|
}
|
|
606
|
-
/** Truncate the in-flight assistant turn
|
|
683
|
+
/** Truncate the in-flight assistant turn's playback offset on the server.
|
|
684
|
+
*
|
|
685
|
+
* Sends ONLY ``conversation.item.truncate`` — no ``response.cancel``. This
|
|
686
|
+
* is the half of barge-in handling that a WebSocket transport MUST always
|
|
687
|
+
* perform: per OpenAI's docs, the GA server auto-truncates on barge-in only
|
|
688
|
+
* over WebRTC / SIP; on the WebSocket transport the client is responsible
|
|
689
|
+
* for telling the server how much of the assistant turn was actually heard.
|
|
690
|
+
* In server-managed mode (``interrupt_response: true``) the server already
|
|
691
|
+
* cancels the response itself, so issuing ``response.cancel`` here would be
|
|
692
|
+
* redundant / rejected — call this method, not {@link cancelResponse}.
|
|
607
693
|
*
|
|
608
694
|
* ``audio_end_ms`` MUST reflect what the caller actually heard, not what
|
|
609
695
|
* the server generated. OpenAI streams audio at 5-10x real-time, so the
|
|
610
696
|
* byte-derived counter overstates playback whenever the consumer cleared
|
|
611
|
-
* its playout buffer (e.g. ``
|
|
697
|
+
* its playout buffer (e.g. ``sendClear``) before the audio reached the
|
|
612
698
|
* speaker. We bound the truncate point by wall-clock time since the first
|
|
613
699
|
* chunk of this response — that's the physical maximum a 1x real-time
|
|
614
700
|
* playback could have produced. Without this cap, OpenAI keeps the full
|
|
615
701
|
* generated assistant text on the transcript, and the model replays /
|
|
616
702
|
* resumes from it on the next turn — manifesting as re-greetings and
|
|
617
703
|
* mid-sentence fragments after a barge-in storm.
|
|
704
|
+
*
|
|
705
|
+
* No-op when no response is in flight, keeping it idempotent across stale
|
|
706
|
+
* callers. Resets per-response tracking so post-truncate late frames and
|
|
707
|
+
* the next response start clean.
|
|
618
708
|
*/
|
|
619
|
-
|
|
709
|
+
truncate() {
|
|
620
710
|
if (!this.ws) return;
|
|
621
711
|
if (!this.currentResponseItemId) {
|
|
622
712
|
return;
|
|
@@ -636,11 +726,31 @@ var init_openai_realtime = __esm({
|
|
|
636
726
|
} catch (err) {
|
|
637
727
|
getLogger().debug?.(`conversation.item.truncate failed: ${String(err)}`);
|
|
638
728
|
}
|
|
639
|
-
this.ws.send(JSON.stringify({ type: "response.cancel" }));
|
|
640
729
|
this.currentResponseItemId = null;
|
|
641
730
|
this.currentResponseAudioMs = 0;
|
|
642
731
|
this.currentResponseFirstAudioAt = null;
|
|
643
732
|
}
|
|
733
|
+
/** Truncate the in-flight assistant turn AND cancel the active response.
|
|
734
|
+
*
|
|
735
|
+
* Sends BOTH ``conversation.item.truncate`` (the played-offset bookkeeping)
|
|
736
|
+
* AND ``response.cancel``. Use this on the LEGACY client-managed barge-in
|
|
737
|
+
* path (``gateResponseOnTranscript`` true → ``interrupt_response: false``,
|
|
738
|
+
* so the server does NOT cancel for us) and for explicit cancels driven by
|
|
739
|
+
* Patter (e.g. on transfer / hangup). In server-managed mode call
|
|
740
|
+
* {@link truncate} instead — the server already cancels the response, and an
|
|
741
|
+
* extra ``response.cancel`` would be redundant / rejected.
|
|
742
|
+
*
|
|
743
|
+
* Truncation bounding semantics are identical to {@link truncate}; see its
|
|
744
|
+
* doc comment for the ``audio_end_ms`` wall-clock cap rationale.
|
|
745
|
+
*/
|
|
746
|
+
cancelResponse() {
|
|
747
|
+
if (!this.ws) return;
|
|
748
|
+
if (!this.currentResponseItemId) {
|
|
749
|
+
return;
|
|
750
|
+
}
|
|
751
|
+
this.truncate();
|
|
752
|
+
this.ws.send(JSON.stringify({ type: "response.cancel" }));
|
|
753
|
+
}
|
|
644
754
|
/** Inject a user text turn and request a new response. */
|
|
645
755
|
async sendText(text) {
|
|
646
756
|
this.ws?.send(JSON.stringify({
|
|
@@ -685,6 +795,32 @@ var init_openai_realtime = __esm({
|
|
|
685
795
|
}
|
|
686
796
|
}));
|
|
687
797
|
}
|
|
798
|
+
/**
|
|
799
|
+
* Speak a short reassurance filler WITHOUT injecting a `role:user` turn.
|
|
800
|
+
*
|
|
801
|
+
* Same no-fake-turn shape as {@link sendFirstMessage}: a bare
|
|
802
|
+
* `response.create` carrying explicit `instructions`, so the filler is the
|
|
803
|
+
* assistant's own in-band audio. The reassurance scheduler in the
|
|
804
|
+
* stream-handler routes here instead of {@link sendText} — which would emit
|
|
805
|
+
* a `conversation.item.create` with `role:'user'` and falsely show the
|
|
806
|
+
* caller saying "One moment." in the transcript. Fillers must not imply
|
|
807
|
+
* success or failure.
|
|
808
|
+
*
|
|
809
|
+
* Uses `modalities: ['audio', 'text']` (v1-beta shape). The GA subclass
|
|
810
|
+
* {@link OpenAIRealtime2Adapter} overrides this with `output_modalities`
|
|
811
|
+
* and re-injects `audio.output.voice` so the GA endpoint does not reject
|
|
812
|
+
* the request. Mirrors Python `OpenAIRealtimeAdapter.send_reassurance` in
|
|
813
|
+
* `providers/openai_realtime.py`.
|
|
814
|
+
*/
|
|
815
|
+
async sendReassurance(text) {
|
|
816
|
+
this.ws?.send(JSON.stringify({
|
|
817
|
+
type: "response.create",
|
|
818
|
+
response: {
|
|
819
|
+
modalities: ["audio", "text"],
|
|
820
|
+
instructions: `Say exactly this and nothing else: "${text}"`
|
|
821
|
+
}
|
|
822
|
+
}));
|
|
823
|
+
}
|
|
688
824
|
/** Submit a tool/function-call result and request the next response. */
|
|
689
825
|
async sendFunctionResult(callId, result) {
|
|
690
826
|
this.ws?.send(JSON.stringify({
|
|
@@ -925,7 +1061,12 @@ var init_transcoding = __esm({
|
|
|
925
1061
|
* Resets all state after flushing.
|
|
926
1062
|
*/
|
|
927
1063
|
flush() {
|
|
928
|
-
this.carry.flush();
|
|
1064
|
+
const carryTail = this.carry.flush();
|
|
1065
|
+
if (carryTail.length > 0) {
|
|
1066
|
+
getLogger().warn(
|
|
1067
|
+
"[patter] StatefulResampler.flush: trailing odd byte discarded \u2014 upstream produced odd-length PCM stream"
|
|
1068
|
+
);
|
|
1069
|
+
}
|
|
929
1070
|
if (this.srcRate === 16e3 && this.dstRate === 8e3 && this.firPendingSample !== null) {
|
|
930
1071
|
const s = this.firPendingSample;
|
|
931
1072
|
const tmp = Buffer.alloc(4);
|
|
@@ -1165,44 +1306,46 @@ var init_openai_realtime_2 = __esm({
|
|
|
1165
1306
|
buildGASessionConfig() {
|
|
1166
1307
|
const opts = this.options;
|
|
1167
1308
|
const fmt = { type: "audio/pcm", rate: 24e3 };
|
|
1309
|
+
const audioInput = {
|
|
1310
|
+
format: fmt,
|
|
1311
|
+
transcription: {
|
|
1312
|
+
model: opts.inputAudioTranscriptionModel ?? OpenAITranscriptionModel.WHISPER_1
|
|
1313
|
+
},
|
|
1314
|
+
// Response creation + barge-in cancellation (issue #154 — hand
|
|
1315
|
+
// turn-taking to the server by default):
|
|
1316
|
+
// - DEFAULT (`gateResponseOnTranscript` false → SERVER-MANAGED):
|
|
1317
|
+
// `create_response: true` lets the SERVER auto-create the response
|
|
1318
|
+
// when it commits the user's audio buffer
|
|
1319
|
+
// (`input_audio_buffer.committed`). `interrupt_response: true` lets the
|
|
1320
|
+
// SERVER cancel the in-flight response on its own VAD `speech_started`.
|
|
1321
|
+
// The e2e model replies immediately, in parallel with the Whisper
|
|
1322
|
+
// transcript — no transcript wait (~500 ms reclaimed), no client-side
|
|
1323
|
+
// race. On a WebSocket transport the client STILL must clear the
|
|
1324
|
+
// carrier buffer (`sendClear`) and `conversation.item.truncate` the
|
|
1325
|
+
// played offset on barge-in (the server only auto-truncates on
|
|
1326
|
+
// WebRTC/SIP), but it does NOT send `response.cancel`. Whisper is
|
|
1327
|
+
// display-only — it can never trigger / gate / cancel the response.
|
|
1328
|
+
// - LEGACY (`gateResponseOnTranscript` true → CLIENT-MANAGED opt-out):
|
|
1329
|
+
// `create_response: false` + `interrupt_response: false` so the stream
|
|
1330
|
+
// handler drives `response.create` (after the hallucination filter)
|
|
1331
|
+
// and `response.cancel` (on barge-in) itself. Escape hatch for no-AEC
|
|
1332
|
+
// PSTN self-interruption. Both keys are tied to the same switch inside
|
|
1333
|
+
// `buildTurnDetection`.
|
|
1334
|
+
turn_detection: buildTurnDetection(opts.turnDetection, {
|
|
1335
|
+
defaultType: opts.vadType ?? OpenAIRealtimeVADType.SERVER_VAD,
|
|
1336
|
+
defaultSilenceMs: opts.silenceDurationMs ?? 300,
|
|
1337
|
+
includeResponseGating: true,
|
|
1338
|
+
gateResponseOnTranscript: this.getGateResponseOnTranscript()
|
|
1339
|
+
})
|
|
1340
|
+
};
|
|
1341
|
+
if (opts.noiseReduction !== void 0) {
|
|
1342
|
+
audioInput.noise_reduction = { type: opts.noiseReduction };
|
|
1343
|
+
}
|
|
1168
1344
|
const config2 = {
|
|
1169
1345
|
type: "realtime",
|
|
1170
1346
|
output_modalities: opts.modalities ?? ["audio"],
|
|
1171
1347
|
audio: {
|
|
1172
|
-
input:
|
|
1173
|
-
format: fmt,
|
|
1174
|
-
transcription: {
|
|
1175
|
-
model: opts.inputAudioTranscriptionModel ?? OpenAITranscriptionModel.WHISPER_1
|
|
1176
|
-
},
|
|
1177
|
-
// VAD threshold raised back to the OpenAI default (0.5) on
|
|
1178
|
-
// 2026-05-22. The earlier 0.1 tuning (motivated by the
|
|
1179
|
-
// upsampled telephony-band loss in high frequencies) made the
|
|
1180
|
-
// server VAD trigger on the carrier-loopback echo of the
|
|
1181
|
-
// agent's OWN outbound audio in PSTN no-AEC scenarios.
|
|
1182
|
-
// Combined with the default ``turn_detection.create_response:
|
|
1183
|
-
// true``, every phantom ``speech_started`` ended a turn early
|
|
1184
|
-
// and auto-created a new response that the agent immediately
|
|
1185
|
-
// spoke over, leading to a runaway loop where the first
|
|
1186
|
-
// message was repeatedly cut and re-generated.
|
|
1187
|
-
turn_detection: {
|
|
1188
|
-
type: opts.vadType ?? OpenAIRealtimeVADType.SERVER_VAD,
|
|
1189
|
-
threshold: 0.5,
|
|
1190
|
-
prefix_padding_ms: 300,
|
|
1191
|
-
silence_duration_ms: opts.silenceDurationMs ?? 500,
|
|
1192
|
-
// Defer ``response.create`` to the application: when OpenAI's
|
|
1193
|
-
// server VAD commits an ``input_audio_buffer.committed`` segment
|
|
1194
|
-
// that turns out to be a Whisper hallucination on silence/echo,
|
|
1195
|
-
// auto-creating a response would generate a phantom turn (the
|
|
1196
|
-
// model reads the hallucinated text as user input). Patter
|
|
1197
|
-
// triggers ``response.create`` explicitly in the Realtime
|
|
1198
|
-
// stream-handler AFTER validating ``transcript_input`` against
|
|
1199
|
-
// the hallucination filter. Pair with ``interrupt_response:
|
|
1200
|
-
// false`` so server VAD also leaves in-flight responses alone —
|
|
1201
|
-
// barge-in is gated client-side.
|
|
1202
|
-
create_response: false,
|
|
1203
|
-
interrupt_response: false
|
|
1204
|
-
}
|
|
1205
|
-
},
|
|
1348
|
+
input: audioInput,
|
|
1206
1349
|
output: {
|
|
1207
1350
|
format: fmt,
|
|
1208
1351
|
voice: this.voice
|
|
@@ -1255,14 +1398,7 @@ var init_openai_realtime_2 = __esm({
|
|
|
1255
1398
|
if (t && t in GA_TO_V1_EVENT_NAMES) {
|
|
1256
1399
|
const newType = GA_TO_V1_EVENT_NAMES[t];
|
|
1257
1400
|
if (t === "response.output_audio.delta" && typeof parsed.delta === "string") {
|
|
1258
|
-
|
|
1259
|
-
const FRAME_BYTES = 160;
|
|
1260
|
-
if (mulaw.length === 0) return;
|
|
1261
|
-
for (let off = 0; off < mulaw.length; off += FRAME_BYTES) {
|
|
1262
|
-
const slice = mulaw.subarray(off, Math.min(off + FRAME_BYTES, mulaw.length));
|
|
1263
|
-
const frame = { ...parsed, type: newType, delta: slice.toString("base64") };
|
|
1264
|
-
handler(Buffer.from(JSON.stringify(frame)), ...rest);
|
|
1265
|
-
}
|
|
1401
|
+
this.translateGaAudioDelta(parsed, handler, rest);
|
|
1266
1402
|
return;
|
|
1267
1403
|
}
|
|
1268
1404
|
parsed.type = newType;
|
|
@@ -1291,6 +1427,7 @@ var init_openai_realtime_2 = __esm({
|
|
|
1291
1427
|
sessionCreated = true;
|
|
1292
1428
|
ws.send(JSON.stringify({ type: "session.update", session: this.buildGASessionConfig() }));
|
|
1293
1429
|
} else if (msg.type === "session.updated") {
|
|
1430
|
+
this.warnIfOutputFormatUnexpected(msg);
|
|
1294
1431
|
cleanup();
|
|
1295
1432
|
resolve2();
|
|
1296
1433
|
} else if (msg.type === "error") {
|
|
@@ -1396,6 +1533,10 @@ var init_openai_realtime_2 = __esm({
|
|
|
1396
1533
|
};
|
|
1397
1534
|
const timer = setTimeout(() => {
|
|
1398
1535
|
cleanup();
|
|
1536
|
+
try {
|
|
1537
|
+
ws.close();
|
|
1538
|
+
} catch {
|
|
1539
|
+
}
|
|
1399
1540
|
reject(new Error("OpenAI Realtime 2 park connect timeout"));
|
|
1400
1541
|
}, 8e3);
|
|
1401
1542
|
ws.on("message", onMessage);
|
|
@@ -1443,8 +1584,12 @@ var init_openai_realtime_2 = __esm({
|
|
|
1443
1584
|
const parsed = JSON.parse(text);
|
|
1444
1585
|
const t = parsed.type;
|
|
1445
1586
|
if (t && Object.prototype.hasOwnProperty.call(GA_TO_V1_EVENT_NAMES, t)) {
|
|
1587
|
+
if (t === "response.output_audio.delta" && typeof parsed.delta === "string") {
|
|
1588
|
+
this.translateGaAudioDelta(parsed, handler, rest);
|
|
1589
|
+
return;
|
|
1590
|
+
}
|
|
1446
1591
|
parsed.type = GA_TO_V1_EVENT_NAMES[t];
|
|
1447
|
-
handler(JSON.stringify(parsed), ...rest);
|
|
1592
|
+
handler(Buffer.from(JSON.stringify(parsed)), ...rest);
|
|
1448
1593
|
return;
|
|
1449
1594
|
}
|
|
1450
1595
|
} catch {
|
|
@@ -1529,6 +1674,55 @@ var init_openai_realtime_2 = __esm({
|
|
|
1529
1674
|
}
|
|
1530
1675
|
return out;
|
|
1531
1676
|
}
|
|
1677
|
+
/**
|
|
1678
|
+
* Log-only safety net for issue #154. The GA server echoes the *effective*
|
|
1679
|
+
* session config in `session.updated`; we request `audio/pcm` @ 24 kHz and
|
|
1680
|
+
* transcode PCM24→mulaw8 ourselves (see
|
|
1681
|
+
* `transcodeOutboundPcm24ToMulaw8Buffer`). If a future GA schema change ever
|
|
1682
|
+
* made the server return a different output format, that transcode — which
|
|
1683
|
+
* assumes PCM16-LE @ 24 kHz — would silently corrupt audio, exactly the
|
|
1684
|
+
* v1-beta failure mode #154 fixed. Warn so the drift surfaces in logs instead
|
|
1685
|
+
* of as static. Never gates audio.
|
|
1686
|
+
*/
|
|
1687
|
+
warnIfOutputFormatUnexpected(msg) {
|
|
1688
|
+
const fmt = msg?.session?.audio?.output?.format;
|
|
1689
|
+
if (!fmt || typeof fmt !== "object") return;
|
|
1690
|
+
if (fmt.type !== "audio/pcm" || fmt.rate != null && fmt.rate !== 24e3) {
|
|
1691
|
+
getLogger().warn(
|
|
1692
|
+
`OpenAI Realtime 2: server-echoed output format ${JSON.stringify(fmt)} differs from the requested audio/pcm@24000 \u2014 the outbound PCM24\u2192mulaw8 transcode assumes PCM16-LE 24 kHz, so carrier audio may be garbled (issue #154). Informational only; audio is not gated on this.`
|
|
1693
|
+
);
|
|
1694
|
+
}
|
|
1695
|
+
}
|
|
1696
|
+
/**
|
|
1697
|
+
* Shared audio-delta translation helper. Transcodes a GA
|
|
1698
|
+
* `response.output_audio.delta` payload (base64 PCM-16-LE 24 kHz)
|
|
1699
|
+
* into mulaw 8 kHz and splits the result into 160-byte (20 ms) frames,
|
|
1700
|
+
* dispatching one synthetic `response.audio.delta` event per frame.
|
|
1701
|
+
*
|
|
1702
|
+
* Called from BOTH the `connect()` shim and the `adoptWebSocket()` shim
|
|
1703
|
+
* so that warm-path (prewarm/adopted) calls receive identical transcoding
|
|
1704
|
+
* to cold-path calls. Without this, adopted sockets forwarded raw PCM-24
|
|
1705
|
+
* to Twilio/Telnyx, producing garbled or silent audio on every warm call.
|
|
1706
|
+
*
|
|
1707
|
+
* @param parsed - The parsed GA event object (type already checked to be
|
|
1708
|
+
* `response.output_audio.delta` with a string `delta`).
|
|
1709
|
+
* @param handler - The downstream message listener to dispatch each frame to.
|
|
1710
|
+
* @param rest - Extra arguments forwarded from the original `message` event.
|
|
1711
|
+
* @returns `true` if frames were dispatched (caller should return early),
|
|
1712
|
+
* `false` if the resampler is still warming up (zero output bytes).
|
|
1713
|
+
*/
|
|
1714
|
+
translateGaAudioDelta(parsed, handler, rest) {
|
|
1715
|
+
const newType = GA_TO_V1_EVENT_NAMES["response.output_audio.delta"];
|
|
1716
|
+
const mulaw = this.transcodeOutboundPcm24ToMulaw8Buffer(parsed.delta);
|
|
1717
|
+
const FRAME_BYTES = 160;
|
|
1718
|
+
if (mulaw.length === 0) return false;
|
|
1719
|
+
for (let off = 0; off < mulaw.length; off += FRAME_BYTES) {
|
|
1720
|
+
const slice = mulaw.subarray(off, Math.min(off + FRAME_BYTES, mulaw.length));
|
|
1721
|
+
const frame = { ...parsed, type: newType, delta: slice.toString("base64") };
|
|
1722
|
+
handler(Buffer.from(JSON.stringify(frame)), ...rest);
|
|
1723
|
+
}
|
|
1724
|
+
return true;
|
|
1725
|
+
}
|
|
1532
1726
|
/**
|
|
1533
1727
|
* Base64 PCM-16-LE 24 kHz → Base64 mulaw 8 kHz. Used by the WS
|
|
1534
1728
|
* translation shim on each `response.output_audio.delta`. The stateful
|
|
@@ -1558,6 +1752,34 @@ var init_openai_realtime_2 = __esm({
|
|
|
1558
1752
|
}
|
|
1559
1753
|
this.ws?.send(JSON.stringify({ type: "response.create", response: responseBody }));
|
|
1560
1754
|
}
|
|
1755
|
+
/**
|
|
1756
|
+
* Speak a short reassurance filler WITHOUT injecting a `role:user` turn.
|
|
1757
|
+
*
|
|
1758
|
+
* GA-shape sibling of {@link sendFirstMessage} (and override of the base v1
|
|
1759
|
+
* {@link OpenAIRealtimeAdapter.sendReassurance}): a bare `response.create`
|
|
1760
|
+
* carrying explicit `instructions` so the filler is the assistant's own
|
|
1761
|
+
* in-band audio. No `conversation.item.create` with `role:"user"` is
|
|
1762
|
+
* emitted, so the transcript shows no phantom caller line. The GA endpoint
|
|
1763
|
+
* rejects `response.modalities` and does not inherit `audio.output.voice`
|
|
1764
|
+
* for an explicit `response.create`, so — exactly as in
|
|
1765
|
+
* {@link sendFirstMessage} — we send `output_modalities` and re-inject the
|
|
1766
|
+
* voice. Fillers must not imply success or failure.
|
|
1767
|
+
*
|
|
1768
|
+
* Mirrors Python `OpenAIRealtime2Adapter.send_reassurance` in
|
|
1769
|
+
* `providers/openai_realtime_2.py`.
|
|
1770
|
+
*/
|
|
1771
|
+
async sendReassurance(text) {
|
|
1772
|
+
if (!this.ws) return;
|
|
1773
|
+
const responseBody = {
|
|
1774
|
+
output_modalities: ["audio"],
|
|
1775
|
+
audio: { output: { voice: this.voice } },
|
|
1776
|
+
instructions: `Say exactly this and nothing else: "${text}"`
|
|
1777
|
+
};
|
|
1778
|
+
if (this.options.reasoningEffort !== void 0) {
|
|
1779
|
+
responseBody.reasoning = { effort: this.options.reasoningEffort };
|
|
1780
|
+
}
|
|
1781
|
+
this.ws.send(JSON.stringify({ type: "response.create", response: responseBody }));
|
|
1782
|
+
}
|
|
1561
1783
|
};
|
|
1562
1784
|
}
|
|
1563
1785
|
});
|
|
@@ -2345,11 +2567,25 @@ function calculateRealtimeCachedSavings(usage, pricing, model) {
|
|
|
2345
2567
|
const rates = resolveProviderRates(pricing.openai_realtime, model);
|
|
2346
2568
|
if (rates.unit !== "token") return 0;
|
|
2347
2569
|
const input = usage.input_token_details ?? {};
|
|
2348
|
-
const cached2 = input.cached_tokens_details ?? {};
|
|
2349
2570
|
const cachedAudioRate = rates.cached_audio_input_per_token ?? rates.audio_input_per_token ?? 0;
|
|
2350
2571
|
const cachedTextRate = rates.cached_text_input_per_token ?? rates.text_input_per_token ?? 0;
|
|
2351
|
-
const
|
|
2352
|
-
const
|
|
2572
|
+
const totalAudio = input.audio_tokens ?? 0;
|
|
2573
|
+
const totalText = input.text_tokens ?? 0;
|
|
2574
|
+
let cachedAudio;
|
|
2575
|
+
let cachedText;
|
|
2576
|
+
const details = input.cached_tokens_details;
|
|
2577
|
+
if (details && (details.audio_tokens !== void 0 || details.text_tokens !== void 0)) {
|
|
2578
|
+
cachedAudio = Math.min(details.audio_tokens ?? 0, totalAudio);
|
|
2579
|
+
cachedText = Math.min(details.text_tokens ?? 0, totalText);
|
|
2580
|
+
} else if (input.cached_tokens && input.cached_tokens > 0) {
|
|
2581
|
+
const totalIn = totalAudio + totalText;
|
|
2582
|
+
const ratio = totalIn > 0 ? input.cached_tokens / totalIn : 0;
|
|
2583
|
+
cachedAudio = Math.min(Math.round(totalAudio * ratio), totalAudio);
|
|
2584
|
+
cachedText = Math.min(Math.round(totalText * ratio), totalText);
|
|
2585
|
+
} else {
|
|
2586
|
+
cachedAudio = 0;
|
|
2587
|
+
cachedText = 0;
|
|
2588
|
+
}
|
|
2353
2589
|
const fullAudio = cachedAudio * (rates.audio_input_per_token ?? 0);
|
|
2354
2590
|
const fullText = cachedText * (rates.text_input_per_token ?? 0);
|
|
2355
2591
|
const discountedAudio = cachedAudio * cachedAudioRate;
|
|
@@ -2797,8 +3033,8 @@ function loadTranscriptJsonl(filePath) {
|
|
|
2797
3033
|
} catch {
|
|
2798
3034
|
continue;
|
|
2799
3035
|
}
|
|
2800
|
-
const tsIso = typeof row.ts === "string" ? Date.parse(row.ts) : NaN;
|
|
2801
|
-
const tsNumeric = typeof row.timestamp === "number" ? row.timestamp
|
|
3036
|
+
const tsIso = typeof row.ts === "string" ? Date.parse(row.ts) / 1e3 : NaN;
|
|
3037
|
+
const tsNumeric = typeof row.timestamp === "number" ? row.timestamp : NaN;
|
|
2802
3038
|
const timestamp = Number.isFinite(tsIso) ? tsIso : Number.isFinite(tsNumeric) ? tsNumeric : 0;
|
|
2803
3039
|
const userText = typeof row.user_text === "string" ? row.user_text : "";
|
|
2804
3040
|
const agentText = typeof row.agent_text === "string" ? row.agent_text : "";
|
|
@@ -2956,14 +3192,49 @@ var init_store = __esm({
|
|
|
2956
3192
|
} else {
|
|
2957
3193
|
for (let i = this.calls.length - 1; i >= 0; i--) {
|
|
2958
3194
|
if (this.calls[i].call_id === callId) {
|
|
2959
|
-
this.calls[i].status
|
|
2960
|
-
Object.assign(this.calls[i], extra);
|
|
3195
|
+
this.calls[i] = { ...this.calls[i], status, ...extra };
|
|
2961
3196
|
break;
|
|
2962
3197
|
}
|
|
2963
3198
|
}
|
|
2964
3199
|
}
|
|
2965
3200
|
this.publish("call_status", { call_id: callId, status, ...extra });
|
|
2966
3201
|
}
|
|
3202
|
+
/**
|
|
3203
|
+
* Record a single transcript line (user/assistant) as it becomes known.
|
|
3204
|
+
*
|
|
3205
|
+
* FIX-5 (issue #154): the live forward path for the dashboard transcript.
|
|
3206
|
+
* The Realtime stream handler calls this the moment each line is known — the
|
|
3207
|
+
* user line right after the hallucination filter accepts it, the assistant
|
|
3208
|
+
* line when its turn flushes — keyed by the monotonic ``turnIndex`` reserved
|
|
3209
|
+
* at turn-open (``reserveTurnIndex``). Each line is appended to the active
|
|
3210
|
+
* call's ``transcript`` array and broadcast over SSE as a ``transcript_line``
|
|
3211
|
+
* event so the dashboard can render lines as they arrive and re-sort by
|
|
3212
|
+
* ``(turnIndex, user<assistant)`` — making a late-arriving user line land
|
|
3213
|
+
* ABOVE its agent line. ``recordTurn`` de-dups against the lines pushed here
|
|
3214
|
+
* by ``(turnIndex, role)`` so the metrics path never double-pushes the same
|
|
3215
|
+
* text. Parity with Python ``record_transcript_line``.
|
|
3216
|
+
*/
|
|
3217
|
+
recordTranscriptLine(data) {
|
|
3218
|
+
const callId = data.call_id || "";
|
|
3219
|
+
const { role, text, turnIndex } = data;
|
|
3220
|
+
if (!callId || role !== "user" && role !== "assistant" || !text) return;
|
|
3221
|
+
const active = this.activeCalls.get(callId);
|
|
3222
|
+
if (active) {
|
|
3223
|
+
if (!active.transcript) active.transcript = [];
|
|
3224
|
+
active.transcript.push({
|
|
3225
|
+
role,
|
|
3226
|
+
text,
|
|
3227
|
+
timestamp: Date.now() / 1e3,
|
|
3228
|
+
turnIndex
|
|
3229
|
+
});
|
|
3230
|
+
}
|
|
3231
|
+
this.publish("transcript_line", {
|
|
3232
|
+
call_id: callId,
|
|
3233
|
+
turnIndex,
|
|
3234
|
+
role,
|
|
3235
|
+
text
|
|
3236
|
+
});
|
|
3237
|
+
}
|
|
2967
3238
|
/** Append a single conversation turn to an active call and broadcast it via SSE. */
|
|
2968
3239
|
recordTurn(data) {
|
|
2969
3240
|
const callId = data.call_id || "";
|
|
@@ -2978,14 +3249,19 @@ var init_store = __esm({
|
|
|
2978
3249
|
const userText = typeof turnRecord.user_text === "string" ? turnRecord.user_text : "";
|
|
2979
3250
|
const agentText = typeof turnRecord.agent_text === "string" ? turnRecord.agent_text : "";
|
|
2980
3251
|
const ts = typeof turnRecord.timestamp === "number" ? turnRecord.timestamp : Date.now() / 1e3;
|
|
2981
|
-
|
|
2982
|
-
|
|
3252
|
+
const turnIndex = typeof turnRecord.turn_index === "number" ? turnRecord.turn_index : void 0;
|
|
3253
|
+
const alreadyLive = (role) => turnIndex !== void 0 && (active.transcript ?? []).some(
|
|
3254
|
+
(e) => e.turnIndex === turnIndex && e.role === role
|
|
3255
|
+
);
|
|
3256
|
+
if (userText.length > 0 && !alreadyLive("user")) {
|
|
3257
|
+
active.transcript.push({ role: "user", text: userText, timestamp: ts, turnIndex });
|
|
2983
3258
|
}
|
|
2984
|
-
if (agentText.length > 0 && agentText !== "[interrupted]") {
|
|
3259
|
+
if (agentText.length > 0 && agentText !== "[interrupted]" && !alreadyLive("assistant")) {
|
|
2985
3260
|
active.transcript.push({
|
|
2986
3261
|
role: "assistant",
|
|
2987
3262
|
text: agentText,
|
|
2988
|
-
timestamp: ts
|
|
3263
|
+
timestamp: ts,
|
|
3264
|
+
turnIndex
|
|
2989
3265
|
});
|
|
2990
3266
|
}
|
|
2991
3267
|
}
|
|
@@ -3058,7 +3334,7 @@ var init_store = __esm({
|
|
|
3058
3334
|
getCall(callId) {
|
|
3059
3335
|
if (this.deletedCallIds.has(callId)) return null;
|
|
3060
3336
|
for (let i = this.calls.length - 1; i >= 0; i--) {
|
|
3061
|
-
if (this.calls[i].call_id === callId) return this.calls[i];
|
|
3337
|
+
if (this.calls[i].call_id === callId) return { ...this.calls[i] };
|
|
3062
3338
|
}
|
|
3063
3339
|
return null;
|
|
3064
3340
|
}
|
|
@@ -3100,7 +3376,9 @@ var init_store = __esm({
|
|
|
3100
3376
|
}
|
|
3101
3377
|
if (accepted.length === 0) return [];
|
|
3102
3378
|
accepted.sort();
|
|
3103
|
-
this.persistDeletedIds()
|
|
3379
|
+
this.persistDeletedIds().catch(
|
|
3380
|
+
(err) => getLogger().debug(`MetricsStore.deleteCalls: persistDeletedIds failed: ${String(err)}`)
|
|
3381
|
+
);
|
|
3104
3382
|
this.publish("calls_deleted", { call_ids: accepted });
|
|
3105
3383
|
return accepted;
|
|
3106
3384
|
}
|
|
@@ -3112,19 +3390,19 @@ var init_store = __esm({
|
|
|
3112
3390
|
getDeletedCallIds() {
|
|
3113
3391
|
return Array.from(this.deletedCallIds).sort();
|
|
3114
3392
|
}
|
|
3115
|
-
/** Atomically persist the deleted-ids set to disk. Best-effort. */
|
|
3116
|
-
persistDeletedIds() {
|
|
3393
|
+
/** Atomically persist the deleted-ids set to disk. Best-effort async. */
|
|
3394
|
+
async persistDeletedIds() {
|
|
3117
3395
|
if (this.deletedIdsPath === null) return;
|
|
3118
3396
|
try {
|
|
3119
3397
|
const dir = path2.dirname(this.deletedIdsPath);
|
|
3120
|
-
fs2.
|
|
3398
|
+
await fs2.promises.mkdir(dir, { recursive: true });
|
|
3121
3399
|
const tmp = this.deletedIdsPath + ".tmp";
|
|
3122
3400
|
const payload = {
|
|
3123
3401
|
version: 1,
|
|
3124
3402
|
deleted_call_ids: Array.from(this.deletedCallIds).sort()
|
|
3125
3403
|
};
|
|
3126
|
-
fs2.
|
|
3127
|
-
fs2.
|
|
3404
|
+
await fs2.promises.writeFile(tmp, JSON.stringify(payload, null, 2), "utf8");
|
|
3405
|
+
await fs2.promises.rename(tmp, this.deletedIdsPath);
|
|
3128
3406
|
} catch (err) {
|
|
3129
3407
|
getLogger().debug(
|
|
3130
3408
|
`MetricsStore.persistDeletedIds: ${String(err)}`
|
|
@@ -3133,7 +3411,8 @@ var init_store = __esm({
|
|
|
3133
3411
|
}
|
|
3134
3412
|
/** Look up an active call by id (returns undefined if not active or unknown). */
|
|
3135
3413
|
getActive(callId) {
|
|
3136
|
-
|
|
3414
|
+
const rec = this.activeCalls.get(callId);
|
|
3415
|
+
return rec !== void 0 ? { ...rec } : void 0;
|
|
3137
3416
|
}
|
|
3138
3417
|
/** Return all currently active (not yet ended) calls. */
|
|
3139
3418
|
getActiveCalls() {
|
|
@@ -3460,8 +3739,8 @@ function mountDashboard(app, store, token = "") {
|
|
|
3460
3739
|
res.type("text/html").send(DASHBOARD_HTML);
|
|
3461
3740
|
});
|
|
3462
3741
|
app.get("/api/dashboard/calls", auth2, (req, res) => {
|
|
3463
|
-
const limit = Math.min(parseInt(req.query.limit || "50", 10) || 50, 1e3);
|
|
3464
|
-
const offset = parseInt(req.query.offset || "0", 10) || 0;
|
|
3742
|
+
const limit = Math.min(Math.max(0, parseInt(req.query.limit || "50", 10) || 50), 1e3);
|
|
3743
|
+
const offset = Math.max(0, parseInt(req.query.offset || "0", 10) || 0);
|
|
3465
3744
|
res.json(store.getCalls(limit, offset));
|
|
3466
3745
|
});
|
|
3467
3746
|
app.get("/api/dashboard/calls/:callId", auth2, (req, res) => {
|
|
@@ -3551,8 +3830,8 @@ data: ${data}
|
|
|
3551
3830
|
function mountApi(app, store, token = "") {
|
|
3552
3831
|
const auth2 = makeAuthMiddleware(token);
|
|
3553
3832
|
app.get("/api/v1/calls", auth2, (req, res) => {
|
|
3554
|
-
const limit = Math.min(parseInt(req.query.limit || "50", 10) || 50, 1e3);
|
|
3555
|
-
const offset = parseInt(req.query.offset || "0", 10) || 0;
|
|
3833
|
+
const limit = Math.min(Math.max(0, parseInt(req.query.limit || "50", 10) || 50), 1e3);
|
|
3834
|
+
const offset = Math.max(0, parseInt(req.query.offset || "0", 10) || 0);
|
|
3556
3835
|
const calls = store.getCalls(limit, offset);
|
|
3557
3836
|
res.json({
|
|
3558
3837
|
data: calls,
|
|
@@ -3831,14 +4110,31 @@ var init_remote_message = __esm({
|
|
|
3831
4110
|
while (chunks.length > 0) {
|
|
3832
4111
|
yield chunks.shift();
|
|
3833
4112
|
}
|
|
4113
|
+
const READ_TIMEOUT_MS = 3e4;
|
|
3834
4114
|
while (!done && !error2) {
|
|
3835
|
-
const
|
|
4115
|
+
const messagePromise = new Promise((resolve2) => {
|
|
3836
4116
|
if (chunks.length > 0) {
|
|
3837
4117
|
resolve2(chunks.shift());
|
|
3838
4118
|
} else {
|
|
3839
4119
|
resolveNext = resolve2;
|
|
3840
4120
|
}
|
|
3841
4121
|
});
|
|
4122
|
+
let timeoutHandle;
|
|
4123
|
+
const timeoutPromise = new Promise((_, reject) => {
|
|
4124
|
+
timeoutHandle = setTimeout(
|
|
4125
|
+
() => reject(new Error("WebSocket read timeout: no frame received within 30 s")),
|
|
4126
|
+
READ_TIMEOUT_MS
|
|
4127
|
+
);
|
|
4128
|
+
});
|
|
4129
|
+
let text;
|
|
4130
|
+
try {
|
|
4131
|
+
text = await Promise.race([messagePromise, timeoutPromise]);
|
|
4132
|
+
} catch (timeoutErr) {
|
|
4133
|
+
resolveNext = null;
|
|
4134
|
+
throw timeoutErr;
|
|
4135
|
+
} finally {
|
|
4136
|
+
clearTimeout(timeoutHandle);
|
|
4137
|
+
}
|
|
3842
4138
|
if (text === null) break;
|
|
3843
4139
|
yield text;
|
|
3844
4140
|
}
|
|
@@ -4080,18 +4376,6 @@ var init_deepgram_stt = __esm({
|
|
|
4080
4376
|
} catch {
|
|
4081
4377
|
return;
|
|
4082
4378
|
}
|
|
4083
|
-
const dataType = String(data.type ?? "unknown");
|
|
4084
|
-
if (dataType === "Results") {
|
|
4085
|
-
const transcript2 = (data.channel?.alternatives?.[0]?.transcript ?? "").trim();
|
|
4086
|
-
const isFinal = Boolean(data.is_final);
|
|
4087
|
-
const speechFinal2 = Boolean(data.speech_final);
|
|
4088
|
-
const fromFinalize = Boolean(data.from_finalize);
|
|
4089
|
-
getLogger().info(
|
|
4090
|
-
`[DIAG] DG Results text=${JSON.stringify(transcript2.slice(0, 60))} isFinal=${isFinal} speechFinal=${speechFinal2} fromFinalize=${fromFinalize}`
|
|
4091
|
-
);
|
|
4092
|
-
} else if (dataType !== "Metadata") {
|
|
4093
|
-
getLogger().info(`[DIAG] DG event type=${dataType}`);
|
|
4094
|
-
}
|
|
4095
4379
|
if (data.type === "Metadata" && data.request_id) {
|
|
4096
4380
|
this.requestId = data.request_id;
|
|
4097
4381
|
return;
|
|
@@ -4181,7 +4465,7 @@ var init_deepgram_stt = __esm({
|
|
|
4181
4465
|
if (!this.ws || this.ws.readyState !== import_ws4.default.OPEN) {
|
|
4182
4466
|
this.audioDroppedCount++;
|
|
4183
4467
|
if (this.audioDroppedCount === 1 || this.audioDroppedCount % 50 === 0) {
|
|
4184
|
-
getLogger().
|
|
4468
|
+
getLogger().debug(
|
|
4185
4469
|
`[DIAG] DeepgramSTT.sendAudio dropped (ws state=${this.ws?.readyState ?? "null"}) \u2014 total dropped=${this.audioDroppedCount}`
|
|
4186
4470
|
);
|
|
4187
4471
|
}
|
|
@@ -4190,7 +4474,7 @@ var init_deepgram_stt = __esm({
|
|
|
4190
4474
|
if (audio.length === 0) return;
|
|
4191
4475
|
this.audioSentCount++;
|
|
4192
4476
|
if (this.audioSentCount === 1 || this.audioSentCount % 100 === 0) {
|
|
4193
|
-
getLogger().
|
|
4477
|
+
getLogger().debug(
|
|
4194
4478
|
`[DIAG] DeepgramSTT.sendAudio: total chunks sent=${this.audioSentCount} (last=${audio.length} bytes)`
|
|
4195
4479
|
);
|
|
4196
4480
|
}
|
|
@@ -4228,16 +4512,16 @@ var init_deepgram_stt = __esm({
|
|
|
4228
4512
|
finalize() {
|
|
4229
4513
|
const ws = this.ws;
|
|
4230
4514
|
if (!ws || ws.readyState !== import_ws4.default.OPEN) {
|
|
4231
|
-
getLogger().
|
|
4515
|
+
getLogger().debug(
|
|
4232
4516
|
`[DIAG] DeepgramSTT.finalize SKIPPED (ws state=${ws?.readyState ?? "null"})`
|
|
4233
4517
|
);
|
|
4234
4518
|
return;
|
|
4235
4519
|
}
|
|
4236
4520
|
try {
|
|
4237
4521
|
ws.send(JSON.stringify({ type: "Finalize" }));
|
|
4238
|
-
getLogger().
|
|
4522
|
+
getLogger().debug("[DIAG] DeepgramSTT.finalize sent {type:Finalize}");
|
|
4239
4523
|
} catch (err) {
|
|
4240
|
-
getLogger().
|
|
4524
|
+
getLogger().debug(`[DIAG] DeepgramSTT.finalize send failed: ${String(err)}`);
|
|
4241
4525
|
}
|
|
4242
4526
|
}
|
|
4243
4527
|
/** Send Finalize, briefly drain trailing transcripts, then close the socket. */
|
|
@@ -4317,6 +4601,7 @@ var init_metrics = __esm({
|
|
|
4317
4601
|
_pricing;
|
|
4318
4602
|
_callStart;
|
|
4319
4603
|
_turns = [];
|
|
4604
|
+
// mutable internal array; immutable when exposed via TurnMetrics[] → readonly TurnMetrics[]
|
|
4320
4605
|
// Per-turn timing state
|
|
4321
4606
|
_turnStart = null;
|
|
4322
4607
|
_sttComplete = null;
|
|
@@ -4403,6 +4688,16 @@ var init_metrics = __esm({
|
|
|
4403
4688
|
* (the common cause of missing endpoint signals).
|
|
4404
4689
|
*/
|
|
4405
4690
|
_endpointSignalMissingCount = 0;
|
|
4691
|
+
/**
|
|
4692
|
+
* Monotonic per-call turn counter. Reserved at turn OPEN
|
|
4693
|
+
* (``onAdapterSpeechStopped`` / ``speech_stopped``) via
|
|
4694
|
+
* ``reserveTurnIndex()`` and threaded through the buffering pipeline into
|
|
4695
|
+
* ``recordTurnComplete`` / ``recordTurnInterrupted`` as ``preReservedIndex``.
|
|
4696
|
+
* This makes ``turn_index`` stable under drops / interrupts (previously it
|
|
4697
|
+
* was assigned at completion as ``this._turns.length``, which shifted when a
|
|
4698
|
+
* turn was dropped). Parity with Python ``_next_turn_index``.
|
|
4699
|
+
*/
|
|
4700
|
+
_nextTurnIndex = 0;
|
|
4406
4701
|
constructor(opts) {
|
|
4407
4702
|
this.callId = opts.callId;
|
|
4408
4703
|
this.providerMode = opts.providerMode;
|
|
@@ -4451,12 +4746,27 @@ var init_metrics = __esm({
|
|
|
4451
4746
|
this._turnUserText = "";
|
|
4452
4747
|
this._turnSttAudioSeconds = 0;
|
|
4453
4748
|
this._turnAlreadyClosed = false;
|
|
4749
|
+
this._initialTtfbEmitted = false;
|
|
4454
4750
|
this._vadStoppedAt = null;
|
|
4455
4751
|
this._sttFinalAt = null;
|
|
4456
4752
|
this._turnCommittedAt = null;
|
|
4457
4753
|
this._onUserTurnCompletedDelayMs = null;
|
|
4458
4754
|
this._eventBus?.emit("turn_started", { callId: this.callId });
|
|
4459
4755
|
}
|
|
4756
|
+
/**
|
|
4757
|
+
* Reserve and return the next monotonic turn index.
|
|
4758
|
+
*
|
|
4759
|
+
* Called once per turn at the moment the turn OPENS (Realtime:
|
|
4760
|
+
* ``onAdapterSpeechStopped``). The returned index is threaded through the
|
|
4761
|
+
* buffering pipeline and handed back to ``recordTurnComplete`` /
|
|
4762
|
+
* ``recordTurnInterrupted`` as ``preReservedIndex`` so the emitted
|
|
4763
|
+
* ``turn_index`` matches the live per-line transcript ordering even when a
|
|
4764
|
+
* turn is dropped or interrupted between open and close. Parity with Python
|
|
4765
|
+
* ``reserve_turn_index``.
|
|
4766
|
+
*/
|
|
4767
|
+
reserveTurnIndex() {
|
|
4768
|
+
return this._nextTurnIndex++;
|
|
4769
|
+
}
|
|
4460
4770
|
/**
|
|
4461
4771
|
* Start a new turn only if no turn is currently open.
|
|
4462
4772
|
* Use this at inbound-audio ingestion points so the turn timer begins
|
|
@@ -4494,6 +4804,7 @@ var init_metrics = __esm({
|
|
|
4494
4804
|
anchorUserSpeechStart() {
|
|
4495
4805
|
if (this._turnCommittedMono !== null) return;
|
|
4496
4806
|
this._turnStart = hrTimeMs();
|
|
4807
|
+
this._turnAlreadyClosed = false;
|
|
4497
4808
|
this._endpointSignalAt = null;
|
|
4498
4809
|
this._vadStoppedAt = null;
|
|
4499
4810
|
this._sttFinalAt = null;
|
|
@@ -4617,11 +4928,14 @@ var init_metrics = __esm({
|
|
|
4617
4928
|
* ``user_text=''``. The caller treats ``null`` as "nothing to emit";
|
|
4618
4929
|
* ``emitTurnMetrics`` is already null-safe.
|
|
4619
4930
|
*/
|
|
4620
|
-
recordTurnComplete(agentText) {
|
|
4931
|
+
recordTurnComplete(agentText, preReservedIndex) {
|
|
4621
4932
|
if (this._turnAlreadyClosed) return null;
|
|
4622
4933
|
const latency = this._computeTurnLatency();
|
|
4623
4934
|
const turn = {
|
|
4624
|
-
|
|
4935
|
+
// Use the pre-reserved index (stable across drops/interrupts) when the
|
|
4936
|
+
// caller threaded one through; otherwise fall back to the append
|
|
4937
|
+
// position for back-compat with callers that never reserved.
|
|
4938
|
+
turn_index: preReservedIndex ?? this._turns.length,
|
|
4625
4939
|
user_text: this._turnUserText,
|
|
4626
4940
|
agent_text: agentText,
|
|
4627
4941
|
latency,
|
|
@@ -4630,10 +4944,10 @@ var init_metrics = __esm({
|
|
|
4630
4944
|
timestamp: Date.now() / 1e3
|
|
4631
4945
|
};
|
|
4632
4946
|
this._turns.push(turn);
|
|
4633
|
-
this._resetTurnState();
|
|
4634
|
-
this._turnAlreadyClosed = true;
|
|
4635
4947
|
this._eventBus?.emit("turn_ended", { callId: this.callId, turn });
|
|
4636
4948
|
this._eventBus?.emit("metrics_collected", { callId: this.callId, turn });
|
|
4949
|
+
this._resetTurnState();
|
|
4950
|
+
this._turnAlreadyClosed = true;
|
|
4637
4951
|
return turn;
|
|
4638
4952
|
}
|
|
4639
4953
|
/**
|
|
@@ -4645,12 +4959,12 @@ var init_metrics = __esm({
|
|
|
4645
4959
|
* a future refactor that reorders the bargein + LLM-unwind paths)
|
|
4646
4960
|
* from overwriting a turn that the complete path already emitted.
|
|
4647
4961
|
*/
|
|
4648
|
-
recordTurnInterrupted() {
|
|
4962
|
+
recordTurnInterrupted(preReservedIndex) {
|
|
4649
4963
|
if (this._turnStart === null) return null;
|
|
4650
4964
|
if (this._turnAlreadyClosed) return null;
|
|
4651
4965
|
const latency = this._computeTurnLatency();
|
|
4652
4966
|
const turn = {
|
|
4653
|
-
turn_index: this._turns.length,
|
|
4967
|
+
turn_index: preReservedIndex ?? this._turns.length,
|
|
4654
4968
|
user_text: this._turnUserText,
|
|
4655
4969
|
agent_text: "[interrupted]",
|
|
4656
4970
|
latency,
|
|
@@ -4702,8 +5016,10 @@ var init_metrics = __esm({
|
|
|
4702
5016
|
}
|
|
4703
5017
|
/**
|
|
4704
5018
|
* Record the delta (ms) between turn-committed and when on_user_turn_completed
|
|
4705
|
-
* pipeline hook finished.
|
|
4706
|
-
*
|
|
5019
|
+
* pipeline hook finished. Does NOT re-emit: like Python's
|
|
5020
|
+
* ``record_on_user_turn_completed_delay``, this only stores the value; the
|
|
5021
|
+
* single EOU emission happens on ``recordTurnCommitted`` (3-timestamp guard,
|
|
5022
|
+
* delay defaults to 0 if not yet recorded).
|
|
4707
5023
|
*/
|
|
4708
5024
|
recordOnUserTurnCompletedDelay(delayMs) {
|
|
4709
5025
|
this._onUserTurnCompletedDelayMs = delayMs;
|
|
@@ -4716,7 +5032,7 @@ var init_metrics = __esm({
|
|
|
4716
5032
|
* ``transcriptionDelay`` = turnCommitted − vadStopped (ms)
|
|
4717
5033
|
* ``onUserTurnCompletedDelay`` = caller-supplied delta (ms) or 0
|
|
4718
5034
|
*/
|
|
4719
|
-
/** Emit `EOUMetrics` once VAD-stop, STT-final,
|
|
5035
|
+
/** Emit `EOUMetrics` once VAD-stop, STT-final, turn-committed, and on_user_turn_completed delay are all known. */
|
|
4720
5036
|
emitEouMetrics() {
|
|
4721
5037
|
if (this._vadStoppedAt === null || this._sttFinalAt === null || this._turnCommittedAt === null) {
|
|
4722
5038
|
return;
|
|
@@ -5265,10 +5581,13 @@ var init_circuit_breaker = __esm({
|
|
|
5265
5581
|
if (s.state === CircuitBreakerState.OPEN) {
|
|
5266
5582
|
if (this.clock() - s.openedAt >= this.cooldownMs) {
|
|
5267
5583
|
s.state = CircuitBreakerState.HALF_OPEN;
|
|
5584
|
+
s.probeInFlight = true;
|
|
5268
5585
|
return true;
|
|
5269
5586
|
}
|
|
5270
5587
|
return false;
|
|
5271
5588
|
}
|
|
5589
|
+
if (s.probeInFlight) return false;
|
|
5590
|
+
s.probeInFlight = true;
|
|
5272
5591
|
return true;
|
|
5273
5592
|
}
|
|
5274
5593
|
/** Mark a successful execution. Resets the breaker to CLOSED. */
|
|
@@ -5278,19 +5597,21 @@ var init_circuit_breaker = __esm({
|
|
|
5278
5597
|
s.state = CircuitBreakerState.CLOSED;
|
|
5279
5598
|
s.consecutiveFailures = 0;
|
|
5280
5599
|
s.openedAt = 0;
|
|
5600
|
+
s.probeInFlight = false;
|
|
5281
5601
|
}
|
|
5282
5602
|
/** Mark a failed execution; trips OPEN once threshold is reached. */
|
|
5283
5603
|
recordFailure(toolName) {
|
|
5284
5604
|
if (this.threshold <= 0) return;
|
|
5285
5605
|
let s = this.state.get(toolName);
|
|
5286
5606
|
if (!s) {
|
|
5287
|
-
s = { state: CircuitBreakerState.CLOSED, consecutiveFailures: 0, openedAt: 0 };
|
|
5607
|
+
s = { state: CircuitBreakerState.CLOSED, consecutiveFailures: 0, openedAt: 0, probeInFlight: false };
|
|
5288
5608
|
this.state.set(toolName, s);
|
|
5289
5609
|
}
|
|
5290
5610
|
s.consecutiveFailures += 1;
|
|
5291
5611
|
if (s.consecutiveFailures >= this.threshold) {
|
|
5292
5612
|
s.state = CircuitBreakerState.OPEN;
|
|
5293
5613
|
s.openedAt = this.clock();
|
|
5614
|
+
s.probeInFlight = false;
|
|
5294
5615
|
}
|
|
5295
5616
|
}
|
|
5296
5617
|
/**
|
|
@@ -5314,6 +5635,10 @@ var init_circuit_breaker = __esm({
|
|
|
5314
5635
|
});
|
|
5315
5636
|
|
|
5316
5637
|
// src/llm-loop.ts
|
|
5638
|
+
function resolveToolTimeoutMs(toolTimeoutMs, defaultMs) {
|
|
5639
|
+
if (toolTimeoutMs === void 0) return defaultMs;
|
|
5640
|
+
return Math.max(100, Math.min(toolTimeoutMs, MAX_TOOL_TIMEOUT_MS));
|
|
5641
|
+
}
|
|
5317
5642
|
async function invokeHandler(handler, args, callContext, onProgress) {
|
|
5318
5643
|
const invoked = handler(args, callContext);
|
|
5319
5644
|
if (invoked && typeof invoked === "object" && typeof invoked[Symbol.asyncIterator] === "function" && typeof invoked.next === "function") {
|
|
@@ -5372,7 +5697,7 @@ function mergeAbortSignals(...signals) {
|
|
|
5372
5697
|
}
|
|
5373
5698
|
return controller.signal;
|
|
5374
5699
|
}
|
|
5375
|
-
var DEFAULT_TOOL_MAX_RETRIES, DEFAULT_TOOL_RETRY_DELAY_MS, DEFAULT_TOOL_TIMEOUT_MS, TOOL_MAX_RESPONSE_BYTES, DefaultToolExecutor, OpenAILLMProvider, DEFAULT_PHONE_PREAMBLE, LLMLoop;
|
|
5700
|
+
var DEFAULT_TOOL_MAX_RETRIES, DEFAULT_TOOL_RETRY_DELAY_MS, DEFAULT_TOOL_TIMEOUT_MS, MAX_TOOL_TIMEOUT_MS, TOOL_MAX_RESPONSE_BYTES, ToolTimeoutError, DefaultToolExecutor, OpenAILLMProvider, DEFAULT_PHONE_PREAMBLE, LLMLoop;
|
|
5376
5701
|
var init_llm_loop = __esm({
|
|
5377
5702
|
"src/llm-loop.ts"() {
|
|
5378
5703
|
"use strict";
|
|
@@ -5385,7 +5710,14 @@ var init_llm_loop = __esm({
|
|
|
5385
5710
|
DEFAULT_TOOL_MAX_RETRIES = 2;
|
|
5386
5711
|
DEFAULT_TOOL_RETRY_DELAY_MS = 500;
|
|
5387
5712
|
DEFAULT_TOOL_TIMEOUT_MS = 1e4;
|
|
5713
|
+
MAX_TOOL_TIMEOUT_MS = 3e5;
|
|
5388
5714
|
TOOL_MAX_RESPONSE_BYTES = 1 * 1024 * 1024;
|
|
5715
|
+
ToolTimeoutError = class extends Error {
|
|
5716
|
+
constructor(message) {
|
|
5717
|
+
super(message);
|
|
5718
|
+
this.name = "ToolTimeoutError";
|
|
5719
|
+
}
|
|
5720
|
+
};
|
|
5389
5721
|
DefaultToolExecutor = class {
|
|
5390
5722
|
maxRetries;
|
|
5391
5723
|
retryDelayMs;
|
|
@@ -5411,15 +5743,41 @@ var init_llm_loop = __esm({
|
|
|
5411
5743
|
retry_after_ms: cooldown
|
|
5412
5744
|
});
|
|
5413
5745
|
}
|
|
5746
|
+
const effectiveTimeoutMs = resolveToolTimeoutMs(
|
|
5747
|
+
toolDef.timeoutMs,
|
|
5748
|
+
this.requestTimeoutMs
|
|
5749
|
+
);
|
|
5414
5750
|
if (toolDef.handler) {
|
|
5415
5751
|
const totalAttempts = this.maxRetries + 1;
|
|
5416
5752
|
let lastErr = null;
|
|
5417
5753
|
for (let attempt = 0; attempt < totalAttempts; attempt++) {
|
|
5754
|
+
let timeoutTimer;
|
|
5418
5755
|
try {
|
|
5419
|
-
const
|
|
5756
|
+
const handlerPromise = invokeHandler(toolDef.handler, args, callContext, onProgress);
|
|
5757
|
+
const result = await Promise.race([
|
|
5758
|
+
handlerPromise,
|
|
5759
|
+
new Promise((_, reject) => {
|
|
5760
|
+
timeoutTimer = setTimeout(
|
|
5761
|
+
() => reject(
|
|
5762
|
+
new ToolTimeoutError(
|
|
5763
|
+
`Tool handler '${toolDef.name}' timed out after ${effectiveTimeoutMs}ms`
|
|
5764
|
+
)
|
|
5765
|
+
),
|
|
5766
|
+
effectiveTimeoutMs
|
|
5767
|
+
);
|
|
5768
|
+
})
|
|
5769
|
+
]);
|
|
5420
5770
|
this.breaker.recordSuccess(toolDef.name);
|
|
5421
5771
|
return result;
|
|
5422
5772
|
} catch (e) {
|
|
5773
|
+
if (e instanceof ToolTimeoutError) {
|
|
5774
|
+
getLogger().error(String(e));
|
|
5775
|
+
this.breaker.recordFailure(toolDef.name);
|
|
5776
|
+
return JSON.stringify({
|
|
5777
|
+
error: String(e),
|
|
5778
|
+
fallback: true
|
|
5779
|
+
});
|
|
5780
|
+
}
|
|
5423
5781
|
lastErr = e;
|
|
5424
5782
|
if (attempt < totalAttempts - 1) {
|
|
5425
5783
|
getLogger().warn(
|
|
@@ -5427,6 +5785,8 @@ var init_llm_loop = __esm({
|
|
|
5427
5785
|
);
|
|
5428
5786
|
await new Promise((r) => setTimeout(r, backoffDelayMs(this.retryDelayMs, attempt)));
|
|
5429
5787
|
}
|
|
5788
|
+
} finally {
|
|
5789
|
+
if (timeoutTimer !== void 0) clearTimeout(timeoutTimer);
|
|
5430
5790
|
}
|
|
5431
5791
|
}
|
|
5432
5792
|
this.breaker.recordFailure(toolDef.name);
|
|
@@ -5463,7 +5823,10 @@ var init_llm_loop = __esm({
|
|
|
5463
5823
|
...callContext,
|
|
5464
5824
|
attempt: attempt + 1
|
|
5465
5825
|
}),
|
|
5466
|
-
|
|
5826
|
+
// Use per-tool timeout when set, otherwise fall back to
|
|
5827
|
+
// the executor-level default. Mirrors Python's per-request
|
|
5828
|
+
// ``timeout=`` override on httpx.AsyncClient.post().
|
|
5829
|
+
signal: AbortSignal.timeout(effectiveTimeoutMs)
|
|
5467
5830
|
});
|
|
5468
5831
|
if (!resp.ok) throw new Error(`HTTP ${resp.status}`);
|
|
5469
5832
|
const result = JSON.stringify(await resp.json());
|
|
@@ -5589,7 +5952,7 @@ var init_llm_loop = __esm({
|
|
|
5589
5952
|
body.tools = tools;
|
|
5590
5953
|
}
|
|
5591
5954
|
const signal = mergeAbortSignals(opts?.signal, AbortSignal.timeout(3e4));
|
|
5592
|
-
const response = await fetch(
|
|
5955
|
+
const response = await fetch(`${this.baseUrl}/chat/completions`, {
|
|
5593
5956
|
method: "POST",
|
|
5594
5957
|
headers: {
|
|
5595
5958
|
"Content-Type": "application/json",
|
|
@@ -5609,50 +5972,55 @@ var init_llm_loop = __esm({
|
|
|
5609
5972
|
if (!reader) return;
|
|
5610
5973
|
const decoder = new TextDecoder();
|
|
5611
5974
|
let buffer = "";
|
|
5612
|
-
|
|
5613
|
-
|
|
5614
|
-
|
|
5615
|
-
|
|
5616
|
-
|
|
5617
|
-
|
|
5618
|
-
|
|
5619
|
-
const
|
|
5620
|
-
|
|
5621
|
-
|
|
5622
|
-
|
|
5623
|
-
|
|
5624
|
-
|
|
5625
|
-
|
|
5626
|
-
|
|
5627
|
-
|
|
5628
|
-
|
|
5629
|
-
|
|
5630
|
-
|
|
5631
|
-
|
|
5632
|
-
|
|
5633
|
-
type: "usage",
|
|
5634
|
-
inputTokens: uncachedInput,
|
|
5635
|
-
outputTokens: chunk.usage.completion_tokens,
|
|
5636
|
-
cacheReadInputTokens: cached2
|
|
5637
|
-
};
|
|
5638
|
-
}
|
|
5639
|
-
const delta = chunk.choices?.[0]?.delta;
|
|
5640
|
-
if (!delta) continue;
|
|
5641
|
-
if (delta.content) {
|
|
5642
|
-
yield { type: "text", content: delta.content };
|
|
5643
|
-
}
|
|
5644
|
-
if (delta.tool_calls) {
|
|
5645
|
-
for (const tc of delta.tool_calls) {
|
|
5975
|
+
try {
|
|
5976
|
+
while (true) {
|
|
5977
|
+
const { done, value } = await reader.read();
|
|
5978
|
+
if (done) break;
|
|
5979
|
+
buffer += decoder.decode(value, { stream: true });
|
|
5980
|
+
const lines = buffer.split("\n");
|
|
5981
|
+
buffer = lines.pop() || "";
|
|
5982
|
+
for (const line of lines) {
|
|
5983
|
+
const trimmed = line.trim();
|
|
5984
|
+
if (!trimmed || !trimmed.startsWith("data: ")) continue;
|
|
5985
|
+
const data = trimmed.slice(6);
|
|
5986
|
+
if (data === "[DONE]") continue;
|
|
5987
|
+
let chunk;
|
|
5988
|
+
try {
|
|
5989
|
+
chunk = JSON.parse(data);
|
|
5990
|
+
} catch {
|
|
5991
|
+
continue;
|
|
5992
|
+
}
|
|
5993
|
+
if (chunk.usage) {
|
|
5994
|
+
const cached2 = chunk.usage.prompt_tokens_details?.cached_tokens ?? 0;
|
|
5995
|
+
const uncachedInput = Math.max(0, (chunk.usage.prompt_tokens ?? 0) - cached2);
|
|
5646
5996
|
yield {
|
|
5647
|
-
type: "
|
|
5648
|
-
|
|
5649
|
-
|
|
5650
|
-
|
|
5651
|
-
arguments: tc.function?.arguments
|
|
5997
|
+
type: "usage",
|
|
5998
|
+
inputTokens: uncachedInput,
|
|
5999
|
+
outputTokens: chunk.usage.completion_tokens,
|
|
6000
|
+
cacheReadInputTokens: cached2
|
|
5652
6001
|
};
|
|
5653
6002
|
}
|
|
6003
|
+
const delta = chunk.choices?.[0]?.delta;
|
|
6004
|
+
if (!delta) continue;
|
|
6005
|
+
if (delta.content) {
|
|
6006
|
+
yield { type: "text", content: delta.content };
|
|
6007
|
+
}
|
|
6008
|
+
if (delta.tool_calls) {
|
|
6009
|
+
for (const tc of delta.tool_calls) {
|
|
6010
|
+
yield {
|
|
6011
|
+
type: "tool_call",
|
|
6012
|
+
index: tc.index,
|
|
6013
|
+
id: tc.id,
|
|
6014
|
+
name: tc.function?.name,
|
|
6015
|
+
arguments: tc.function?.arguments
|
|
6016
|
+
};
|
|
6017
|
+
}
|
|
6018
|
+
}
|
|
5654
6019
|
}
|
|
5655
6020
|
}
|
|
6021
|
+
} finally {
|
|
6022
|
+
reader.cancel().catch(() => {
|
|
6023
|
+
});
|
|
5656
6024
|
}
|
|
5657
6025
|
}
|
|
5658
6026
|
};
|
|
@@ -5786,7 +6154,7 @@ ${systemPrompt}` : DEFAULT_PHONE_PREAMBLE;
|
|
|
5786
6154
|
chunk.inputTokens ?? 0,
|
|
5787
6155
|
chunk.outputTokens ?? 0,
|
|
5788
6156
|
chunk.cacheReadInputTokens ?? 0,
|
|
5789
|
-
chunk.
|
|
6157
|
+
chunk.cacheWriteInputTokens ?? 0
|
|
5790
6158
|
);
|
|
5791
6159
|
} else if (chunk.type === "tool_call") {
|
|
5792
6160
|
hasToolCalls = true;
|
|
@@ -6180,8 +6548,8 @@ function getElementAtPath(obj, path6) {
|
|
|
6180
6548
|
}
|
|
6181
6549
|
function promiseAllObject(promisesObj) {
|
|
6182
6550
|
const keys = Object.keys(promisesObj);
|
|
6183
|
-
const
|
|
6184
|
-
return Promise.all(
|
|
6551
|
+
const promises2 = keys.map((key) => promisesObj[key]);
|
|
6552
|
+
return Promise.all(promises2).then((results) => {
|
|
6185
6553
|
const resolvedObj = {};
|
|
6186
6554
|
for (let i = 0; i < keys.length; i++) {
|
|
6187
6555
|
resolvedObj[keys[i]] = results[i];
|
|
@@ -23972,6 +24340,8 @@ var init_mcp_client = __esm({
|
|
|
23972
24340
|
"use strict";
|
|
23973
24341
|
init_cjs_shims();
|
|
23974
24342
|
init_logger();
|
|
24343
|
+
init_server();
|
|
24344
|
+
init_version();
|
|
23975
24345
|
MCPManager = class {
|
|
23976
24346
|
configs;
|
|
23977
24347
|
connected = [];
|
|
@@ -23997,10 +24367,16 @@ var init_mcp_client = __esm({
|
|
|
23997
24367
|
}
|
|
23998
24368
|
const aggregatedTools = [];
|
|
23999
24369
|
for (const cfg of this.configs) {
|
|
24370
|
+
try {
|
|
24371
|
+
validateWebhookUrl(cfg.url);
|
|
24372
|
+
} catch (e) {
|
|
24373
|
+
getLogger().error(`MCP server '${cfg.name}' (${cfg.url}) rejected by SSRF guard: ${String(e)}`);
|
|
24374
|
+
continue;
|
|
24375
|
+
}
|
|
24000
24376
|
const transport = new transportModule.StreamableHTTPClientTransport(new URL(cfg.url), {
|
|
24001
24377
|
requestInit: { headers: cfg.headers }
|
|
24002
24378
|
});
|
|
24003
|
-
const client = new mcpModule.Client({ name: "patter", version:
|
|
24379
|
+
const client = new mcpModule.Client({ name: "patter", version: VERSION });
|
|
24004
24380
|
try {
|
|
24005
24381
|
await client.connect(transport);
|
|
24006
24382
|
} catch (e) {
|
|
@@ -24074,6 +24450,276 @@ var init_mcp_client = __esm({
|
|
|
24074
24450
|
}
|
|
24075
24451
|
});
|
|
24076
24452
|
|
|
24453
|
+
// src/consult.ts
|
|
24454
|
+
function isLoopbackOrPrivateHost(baseUrl) {
|
|
24455
|
+
let host;
|
|
24456
|
+
try {
|
|
24457
|
+
host = new URL(baseUrl).hostname.toLowerCase();
|
|
24458
|
+
} catch {
|
|
24459
|
+
return false;
|
|
24460
|
+
}
|
|
24461
|
+
if (host.startsWith("[") && host.endsWith("]")) host = host.slice(1, -1);
|
|
24462
|
+
if (host === "localhost" || host === "0.0.0.0" || host === "::1") return true;
|
|
24463
|
+
if (host.endsWith(".local")) return true;
|
|
24464
|
+
if (/^127\./.test(host) || /^10\./.test(host) || /^192\.168\./.test(host)) return true;
|
|
24465
|
+
if (/^169\.254\./.test(host)) return true;
|
|
24466
|
+
const m = host.match(/^172\.(\d+)\./);
|
|
24467
|
+
if (m) {
|
|
24468
|
+
const octet = Number(m[1]);
|
|
24469
|
+
if (octet >= 16 && octet <= 31) return true;
|
|
24470
|
+
}
|
|
24471
|
+
if (host.includes(":") && (/^f[cd][0-9a-f]{2}:/.test(host) || /^fe[89ab][0-9a-f]:/.test(host))) {
|
|
24472
|
+
return true;
|
|
24473
|
+
}
|
|
24474
|
+
return false;
|
|
24475
|
+
}
|
|
24476
|
+
function openclawConsult(agent, opts = {}) {
|
|
24477
|
+
if (!agent || !OPENCLAW_AGENT_RE.test(agent)) {
|
|
24478
|
+
throw new Error(
|
|
24479
|
+
"OpenClaw agent must be a non-empty id of letters, digits, and ._:/- only"
|
|
24480
|
+
);
|
|
24481
|
+
}
|
|
24482
|
+
const baseUrl = opts.baseUrl ?? OPENCLAW_DEFAULT_BASE_URL;
|
|
24483
|
+
const model = agent.includes("/") || agent.includes(":") ? agent : `openclaw/${agent}`;
|
|
24484
|
+
return {
|
|
24485
|
+
openaiCompatible: {
|
|
24486
|
+
baseUrl,
|
|
24487
|
+
model,
|
|
24488
|
+
apiKey: opts.apiKey,
|
|
24489
|
+
apiKeyEnv: OPENCLAW_API_KEY_ENV,
|
|
24490
|
+
sessionHeader: OPENCLAW_SESSION_HEADER
|
|
24491
|
+
},
|
|
24492
|
+
timeoutMs: opts.timeoutMs ?? DEFAULT_TIMEOUT_MS,
|
|
24493
|
+
toolName: opts.toolName ?? DEFAULT_TOOL_NAME,
|
|
24494
|
+
description: opts.description ?? OPENCLAW_DESCRIPTION,
|
|
24495
|
+
reassurance: opts.reassurance ?? OPENCLAW_REASSURANCE,
|
|
24496
|
+
headers: opts.headers,
|
|
24497
|
+
allowLoopback: opts.allowLoopback ?? isLoopbackOrPrivateHost(baseUrl)
|
|
24498
|
+
};
|
|
24499
|
+
}
|
|
24500
|
+
function buildConsultTool(config2) {
|
|
24501
|
+
const hasUrl = config2.url != null;
|
|
24502
|
+
const hasOpenAI = config2.openaiCompatible != null;
|
|
24503
|
+
if (hasUrl === hasOpenAI) {
|
|
24504
|
+
throw new Error("ConsultConfig requires exactly one of url or openaiCompatible");
|
|
24505
|
+
}
|
|
24506
|
+
const timeoutMs = config2.timeoutMs ?? DEFAULT_TIMEOUT_MS;
|
|
24507
|
+
const baseHeaders = {
|
|
24508
|
+
...config2.headers ?? {},
|
|
24509
|
+
"Content-Type": "application/json"
|
|
24510
|
+
};
|
|
24511
|
+
const handler = hasOpenAI ? buildOpenAIHandler(config2.openaiCompatible, baseHeaders, timeoutMs, config2.allowLoopback ?? false) : buildWebhookHandler(config2.url, baseHeaders, timeoutMs, config2.allowLoopback ?? false);
|
|
24512
|
+
const tool2 = {
|
|
24513
|
+
name: config2.toolName ?? DEFAULT_TOOL_NAME,
|
|
24514
|
+
description: config2.description ?? DEFAULT_DESCRIPTION,
|
|
24515
|
+
parameters: PARAMETERS,
|
|
24516
|
+
handler
|
|
24517
|
+
};
|
|
24518
|
+
return config2.reassurance != null ? { ...tool2, reassurance: config2.reassurance } : tool2;
|
|
24519
|
+
}
|
|
24520
|
+
function buildWebhookHandler(url2, headers, timeoutMs, allowLoopback) {
|
|
24521
|
+
validateWebhookUrl(url2, allowLoopback);
|
|
24522
|
+
return async (args, context) => {
|
|
24523
|
+
const requestText = typeof args?.request === "string" ? args.request : "";
|
|
24524
|
+
const payload = {
|
|
24525
|
+
request: requestText,
|
|
24526
|
+
call_id: context?.call_id ?? "",
|
|
24527
|
+
caller: context?.caller ?? "",
|
|
24528
|
+
callee: context?.callee ?? ""
|
|
24529
|
+
};
|
|
24530
|
+
let body;
|
|
24531
|
+
try {
|
|
24532
|
+
const resp = await fetch(url2, {
|
|
24533
|
+
method: "POST",
|
|
24534
|
+
headers,
|
|
24535
|
+
body: JSON.stringify(payload),
|
|
24536
|
+
signal: AbortSignal.timeout(timeoutMs)
|
|
24537
|
+
});
|
|
24538
|
+
if (!resp.ok) {
|
|
24539
|
+
getLogger().warn(`consult tool: orchestrator returned HTTP ${resp.status}`);
|
|
24540
|
+
return GRACEFUL_FALLBACK;
|
|
24541
|
+
}
|
|
24542
|
+
body = (await resp.text()).slice(0, MAX_RESPONSE_CHARS);
|
|
24543
|
+
} catch (e) {
|
|
24544
|
+
getLogger().warn(
|
|
24545
|
+
`consult tool: orchestrator call failed: ${e instanceof Error ? e.name : "error"}`
|
|
24546
|
+
);
|
|
24547
|
+
return GRACEFUL_FALLBACK;
|
|
24548
|
+
}
|
|
24549
|
+
try {
|
|
24550
|
+
const data = JSON.parse(body);
|
|
24551
|
+
if (data && typeof data === "object" && !Array.isArray(data)) {
|
|
24552
|
+
const obj = data;
|
|
24553
|
+
for (const key of REPLY_KEYS) {
|
|
24554
|
+
if (typeof obj[key] === "string") return obj[key];
|
|
24555
|
+
}
|
|
24556
|
+
}
|
|
24557
|
+
return JSON.stringify(data);
|
|
24558
|
+
} catch {
|
|
24559
|
+
return body;
|
|
24560
|
+
}
|
|
24561
|
+
};
|
|
24562
|
+
}
|
|
24563
|
+
function buildOpenAIHandler(oc, baseHeaders, timeoutMs, allowLoopback) {
|
|
24564
|
+
const endpoint = oc.baseUrl.replace(/\/+$/, "") + "/chat/completions";
|
|
24565
|
+
validateWebhookUrl(endpoint, allowLoopback);
|
|
24566
|
+
const apiKey = oc.apiKey ?? (oc.apiKeyEnv ? process.env[oc.apiKeyEnv] : void 0);
|
|
24567
|
+
const headers = { ...baseHeaders };
|
|
24568
|
+
if (apiKey) headers.Authorization = `Bearer ${apiKey}`;
|
|
24569
|
+
const sessionHeader = oc.sessionHeader;
|
|
24570
|
+
const model = oc.model;
|
|
24571
|
+
return async (args, context) => {
|
|
24572
|
+
const requestText = typeof args?.request === "string" ? args.request : "";
|
|
24573
|
+
const callId = context?.call_id ?? "";
|
|
24574
|
+
const caller = context?.caller ?? "";
|
|
24575
|
+
const callee = context?.callee ?? "";
|
|
24576
|
+
const contextLines = ["You are answering an inbound phone call relayed by a voice agent."];
|
|
24577
|
+
if (caller) contextLines.push(`Caller: ${caller}`);
|
|
24578
|
+
if (callee) contextLines.push(`Line dialed: ${callee}`);
|
|
24579
|
+
contextLines.push(
|
|
24580
|
+
"Reply concisely in a spoken, conversational style \u2014 it is read aloud to the caller."
|
|
24581
|
+
);
|
|
24582
|
+
const reqHeaders = { ...headers };
|
|
24583
|
+
if (sessionHeader && callId) reqHeaders[sessionHeader] = callId;
|
|
24584
|
+
const payload = {
|
|
24585
|
+
model,
|
|
24586
|
+
messages: [
|
|
24587
|
+
{ role: "system", content: contextLines.join("\n") },
|
|
24588
|
+
{ role: "user", content: requestText }
|
|
24589
|
+
],
|
|
24590
|
+
stream: false
|
|
24591
|
+
};
|
|
24592
|
+
if (callId) payload.user = callId;
|
|
24593
|
+
try {
|
|
24594
|
+
const resp = await fetch(endpoint, {
|
|
24595
|
+
method: "POST",
|
|
24596
|
+
headers: reqHeaders,
|
|
24597
|
+
body: JSON.stringify(payload),
|
|
24598
|
+
signal: AbortSignal.timeout(timeoutMs)
|
|
24599
|
+
});
|
|
24600
|
+
if (resp.status === 404) {
|
|
24601
|
+
getLogger().warn(
|
|
24602
|
+
"consult tool: OpenAI-compatible endpoint returned 404 \u2014 is it enabled? (OpenClaw: set gateway.http.endpoints.chatCompletions.enabled = true)"
|
|
24603
|
+
);
|
|
24604
|
+
return GRACEFUL_FALLBACK;
|
|
24605
|
+
}
|
|
24606
|
+
if (!resp.ok) {
|
|
24607
|
+
getLogger().warn(`consult tool: openai-compatible returned HTTP ${resp.status}`);
|
|
24608
|
+
return GRACEFUL_FALLBACK;
|
|
24609
|
+
}
|
|
24610
|
+
const data = await resp.json();
|
|
24611
|
+
const content = data?.choices?.[0]?.message?.content;
|
|
24612
|
+
if (typeof content === "string" && content.trim()) {
|
|
24613
|
+
return content.trim().slice(0, MAX_RESPONSE_CHARS);
|
|
24614
|
+
}
|
|
24615
|
+
getLogger().warn("consult tool: response missing choices[0].message.content");
|
|
24616
|
+
return GRACEFUL_FALLBACK;
|
|
24617
|
+
} catch (e) {
|
|
24618
|
+
getLogger().warn(
|
|
24619
|
+
`consult tool: openai-compatible call failed: ${e instanceof Error ? e.name : "error"}`
|
|
24620
|
+
);
|
|
24621
|
+
return GRACEFUL_FALLBACK;
|
|
24622
|
+
}
|
|
24623
|
+
};
|
|
24624
|
+
}
|
|
24625
|
+
function buildPostCallRecord(data, includeTranscript) {
|
|
24626
|
+
const lines = [];
|
|
24627
|
+
const caller = data.caller;
|
|
24628
|
+
const callee = data.callee;
|
|
24629
|
+
if (caller) lines.push(`Caller: ${caller}`);
|
|
24630
|
+
if (callee) lines.push(`Line dialed: ${callee}`);
|
|
24631
|
+
const metrics = data.metrics;
|
|
24632
|
+
const duration3 = metrics?.durationSeconds ?? metrics?.duration_seconds;
|
|
24633
|
+
if (typeof duration3 === "number") lines.push(`Duration: ${Math.round(duration3)}s`);
|
|
24634
|
+
if (includeTranscript) {
|
|
24635
|
+
const entries = data.transcript ?? [];
|
|
24636
|
+
const rendered = entries.filter((e) => e && typeof e === "object").map((e) => `${e.role ?? "?"}: ${e.text ?? ""}`).join("\n");
|
|
24637
|
+
if (rendered) lines.push("Transcript:\n" + rendered.slice(0, POSTCALL_MAX_TRANSCRIPT_CHARS));
|
|
24638
|
+
}
|
|
24639
|
+
return lines.length ? lines.join("\n") : "(no call details available)";
|
|
24640
|
+
}
|
|
24641
|
+
function openclawPostCallNotifier(agent, opts = {}) {
|
|
24642
|
+
const cfg = openclawConsult(agent, {
|
|
24643
|
+
baseUrl: opts.baseUrl,
|
|
24644
|
+
apiKey: opts.apiKey,
|
|
24645
|
+
timeoutMs: opts.timeoutMs ?? DEFAULT_TIMEOUT_MS,
|
|
24646
|
+
allowLoopback: opts.allowLoopback
|
|
24647
|
+
});
|
|
24648
|
+
const oc = cfg.openaiCompatible;
|
|
24649
|
+
const endpoint = oc.baseUrl.replace(/\/+$/, "") + "/chat/completions";
|
|
24650
|
+
validateWebhookUrl(endpoint, cfg.allowLoopback ?? false);
|
|
24651
|
+
const apiKey = oc.apiKey ?? (oc.apiKeyEnv ? process.env[oc.apiKeyEnv] : void 0);
|
|
24652
|
+
const sessionHeader = oc.sessionHeader;
|
|
24653
|
+
const model = oc.model;
|
|
24654
|
+
const timeoutMs = cfg.timeoutMs ?? DEFAULT_TIMEOUT_MS;
|
|
24655
|
+
const includeTranscript = opts.includeTranscript ?? true;
|
|
24656
|
+
const instruction = opts.instruction ?? POSTCALL_INSTRUCTION;
|
|
24657
|
+
return async (data) => {
|
|
24658
|
+
const callId = (data ?? {}).call_id ?? "";
|
|
24659
|
+
const record2 = buildPostCallRecord(data ?? {}, includeTranscript);
|
|
24660
|
+
const headers = { "Content-Type": "application/json" };
|
|
24661
|
+
if (apiKey) headers.Authorization = `Bearer ${apiKey}`;
|
|
24662
|
+
if (sessionHeader && callId) headers[sessionHeader] = callId;
|
|
24663
|
+
const payload = {
|
|
24664
|
+
model,
|
|
24665
|
+
messages: [
|
|
24666
|
+
{ role: "system", content: instruction },
|
|
24667
|
+
{ role: "user", content: record2 }
|
|
24668
|
+
],
|
|
24669
|
+
stream: false
|
|
24670
|
+
};
|
|
24671
|
+
if (callId) payload.user = callId;
|
|
24672
|
+
try {
|
|
24673
|
+
const resp = await fetch(endpoint, {
|
|
24674
|
+
method: "POST",
|
|
24675
|
+
headers,
|
|
24676
|
+
body: JSON.stringify(payload),
|
|
24677
|
+
signal: AbortSignal.timeout(timeoutMs)
|
|
24678
|
+
});
|
|
24679
|
+
if (!resp.ok) {
|
|
24680
|
+
getLogger().warn(`openclaw post-call notify: HTTP ${resp.status}`);
|
|
24681
|
+
}
|
|
24682
|
+
} catch (e) {
|
|
24683
|
+
getLogger().warn(
|
|
24684
|
+
`openclaw post-call notify failed: ${e instanceof Error ? e.name : "error"}`
|
|
24685
|
+
);
|
|
24686
|
+
}
|
|
24687
|
+
};
|
|
24688
|
+
}
|
|
24689
|
+
var DEFAULT_TIMEOUT_MS, DEFAULT_TOOL_NAME, DEFAULT_DESCRIPTION, MAX_RESPONSE_CHARS, REPLY_KEYS, GRACEFUL_FALLBACK, OPENCLAW_DEFAULT_BASE_URL, OPENCLAW_API_KEY_ENV, OPENCLAW_SESSION_HEADER, OPENCLAW_DESCRIPTION, OPENCLAW_REASSURANCE, OPENCLAW_AGENT_RE, PARAMETERS, POSTCALL_INSTRUCTION, POSTCALL_MAX_TRANSCRIPT_CHARS;
|
|
24690
|
+
var init_consult = __esm({
|
|
24691
|
+
"src/consult.ts"() {
|
|
24692
|
+
"use strict";
|
|
24693
|
+
init_cjs_shims();
|
|
24694
|
+
init_logger();
|
|
24695
|
+
init_server();
|
|
24696
|
+
DEFAULT_TIMEOUT_MS = 3e4;
|
|
24697
|
+
DEFAULT_TOOL_NAME = "consult_agent";
|
|
24698
|
+
DEFAULT_DESCRIPTION = "Consult your back-office agent for deeper reasoning, fresh information, or actions beyond this call. Use when the caller asks something you cannot answer directly.";
|
|
24699
|
+
MAX_RESPONSE_CHARS = 1e6;
|
|
24700
|
+
REPLY_KEYS = ["reply", "response", "text", "result", "answer", "message"];
|
|
24701
|
+
GRACEFUL_FALLBACK = "I wasn't able to reach the system to get that answer right now.";
|
|
24702
|
+
OPENCLAW_DEFAULT_BASE_URL = "http://127.0.0.1:18789/v1";
|
|
24703
|
+
OPENCLAW_API_KEY_ENV = "OPENCLAW_API_KEY";
|
|
24704
|
+
OPENCLAW_SESSION_HEADER = "x-openclaw-session-key";
|
|
24705
|
+
OPENCLAW_DESCRIPTION = "Consult your OpenClaw agent for anything account-specific \u2014 appointments, customer records, schedules, or actions in the back-office system. NEVER state an appointment time, customer detail, or schedule fact from your own memory; ALWAYS call this tool for those and read back what it returns.";
|
|
24706
|
+
OPENCLAW_REASSURANCE = "Let me check on that for you, one moment.";
|
|
24707
|
+
OPENCLAW_AGENT_RE = /^[A-Za-z0-9._:/-]+$/;
|
|
24708
|
+
PARAMETERS = {
|
|
24709
|
+
type: "object",
|
|
24710
|
+
properties: {
|
|
24711
|
+
request: {
|
|
24712
|
+
type: "string",
|
|
24713
|
+
description: "The question or task to send to your back-office agent for deeper reasoning, fresh information, or an action beyond this call. State it self-containedly \u2014 the dialog history is not forwarded with the consult."
|
|
24714
|
+
}
|
|
24715
|
+
},
|
|
24716
|
+
required: ["request"]
|
|
24717
|
+
};
|
|
24718
|
+
POSTCALL_INSTRUCTION = "A phone call handled by the voice agent has just ended. Here is the record of the call. Log it and follow up if anything needs action.";
|
|
24719
|
+
POSTCALL_MAX_TRANSCRIPT_CHARS = 12e3;
|
|
24720
|
+
}
|
|
24721
|
+
});
|
|
24722
|
+
|
|
24077
24723
|
// src/sentence-chunker.ts
|
|
24078
24724
|
function splitSentences(text, minSentenceLen = DEFAULT_MIN_SENTENCE_LEN) {
|
|
24079
24725
|
const alphabets = "([A-Za-z])";
|
|
@@ -26616,6 +27262,8 @@ var init_silero_vad = __esm({
|
|
|
26616
27262
|
speechThresholdDuration = 0;
|
|
26617
27263
|
silenceThresholdDuration = 0;
|
|
26618
27264
|
closed = false;
|
|
27265
|
+
/** Transitions produced in the current processFrame call but not yet returned. */
|
|
27266
|
+
eventQueue = [];
|
|
26619
27267
|
/**
|
|
26620
27268
|
* Load the Silero VAD model.
|
|
26621
27269
|
* Throws if `onnxruntime-node` is not installed.
|
|
@@ -26741,22 +27389,21 @@ var init_silero_vad = __esm({
|
|
|
26741
27389
|
);
|
|
26742
27390
|
}
|
|
26743
27391
|
if (pcmChunk.length === 0) {
|
|
26744
|
-
return null;
|
|
27392
|
+
return this.eventQueue.shift() ?? null;
|
|
26745
27393
|
}
|
|
26746
27394
|
const numSamples = Math.floor(pcmChunk.length / 2);
|
|
26747
27395
|
if (numSamples === 0) {
|
|
26748
|
-
return null;
|
|
27396
|
+
return this.eventQueue.shift() ?? null;
|
|
26749
27397
|
}
|
|
26750
27398
|
const samples = new Float32Array(numSamples);
|
|
26751
27399
|
for (let i = 0; i < numSamples; i++) {
|
|
26752
|
-
samples[i] = pcmChunk.readInt16LE(i * 2) /
|
|
27400
|
+
samples[i] = pcmChunk.readInt16LE(i * 2) / 32768;
|
|
26753
27401
|
}
|
|
26754
27402
|
const merged = new Float32Array(this.pending.length + samples.length);
|
|
26755
27403
|
merged.set(this.pending, 0);
|
|
26756
27404
|
merged.set(samples, this.pending.length);
|
|
26757
27405
|
this.pending = merged;
|
|
26758
27406
|
const windowSize = this.model.windowSizeSamples;
|
|
26759
|
-
let event = null;
|
|
26760
27407
|
while (this.pending.length >= windowSize) {
|
|
26761
27408
|
const window = this.pending.slice(0, windowSize);
|
|
26762
27409
|
this.pending = this.pending.slice(windowSize);
|
|
@@ -26765,10 +27412,10 @@ var init_silero_vad = __esm({
|
|
|
26765
27412
|
const windowDuration = windowSize / this.opts.sampleRate;
|
|
26766
27413
|
const transition = this.advanceState(p, windowDuration);
|
|
26767
27414
|
if (transition !== null) {
|
|
26768
|
-
|
|
27415
|
+
this.eventQueue.push(transition);
|
|
26769
27416
|
}
|
|
26770
27417
|
}
|
|
26771
|
-
return
|
|
27418
|
+
return this.eventQueue.shift() ?? null;
|
|
26772
27419
|
}
|
|
26773
27420
|
advanceState(p, windowDuration) {
|
|
26774
27421
|
const opts = this.opts;
|
|
@@ -26823,6 +27470,7 @@ var init_silero_vad = __esm({
|
|
|
26823
27470
|
this.pubSpeaking = false;
|
|
26824
27471
|
this.speechThresholdDuration = 0;
|
|
26825
27472
|
this.silenceThresholdDuration = 0;
|
|
27473
|
+
this.eventQueue = [];
|
|
26826
27474
|
this.expFilter.reset();
|
|
26827
27475
|
this.model.reset();
|
|
26828
27476
|
}
|
|
@@ -27063,6 +27711,13 @@ var init_aec = __esm({
|
|
|
27063
27711
|
});
|
|
27064
27712
|
|
|
27065
27713
|
// src/stream-handler.ts
|
|
27714
|
+
function applyToolCallPreambles(prompt, knob) {
|
|
27715
|
+
if (!knob) return prompt;
|
|
27716
|
+
const block = typeof knob === "string" ? knob : DEFAULT_TOOL_CALL_PREAMBLE_BLOCK;
|
|
27717
|
+
return prompt ? `${block}
|
|
27718
|
+
|
|
27719
|
+
${prompt}` : block;
|
|
27720
|
+
}
|
|
27066
27721
|
function checkGuardrails(text, guardrails) {
|
|
27067
27722
|
if (!guardrails) return null;
|
|
27068
27723
|
for (const guard of guardrails) {
|
|
@@ -27119,6 +27774,13 @@ function augmentWithBuiltinHandoffTools(userTools, callbacks) {
|
|
|
27119
27774
|
}
|
|
27120
27775
|
return out;
|
|
27121
27776
|
}
|
|
27777
|
+
function isSttHallucination(text) {
|
|
27778
|
+
const stripped = text.trim().toLowerCase().replace(/[.,!?;:…。!?\s]+$/u, "").trim();
|
|
27779
|
+
if (stripped === "") return true;
|
|
27780
|
+
if (HALLUCINATIONS.has(stripped)) return true;
|
|
27781
|
+
const pieces = stripped.split(/[.!?…。!?]+/u).map((p) => p.trim()).filter((p) => p.length > 0);
|
|
27782
|
+
return pieces.length > 1 && pieces.every((p) => HALLUCINATIONS.has(p));
|
|
27783
|
+
}
|
|
27122
27784
|
async function queryDeepgramCost(metricsAcc, deepgramKey, deepgramRequestId) {
|
|
27123
27785
|
try {
|
|
27124
27786
|
const projResp = await fetch("https://api.deepgram.com/v1/projects", {
|
|
@@ -27149,7 +27811,7 @@ async function queryDeepgramCost(metricsAcc, deepgramKey, deepgramRequestId) {
|
|
|
27149
27811
|
} catch {
|
|
27150
27812
|
}
|
|
27151
27813
|
}
|
|
27152
|
-
var HALLUCINATIONS, StreamHandler;
|
|
27814
|
+
var DEFAULT_TOOL_CALL_PREAMBLE_BLOCK, HALLUCINATIONS, StreamHandler;
|
|
27153
27815
|
var init_stream_handler = __esm({
|
|
27154
27816
|
"src/stream-handler.ts"() {
|
|
27155
27817
|
"use strict";
|
|
@@ -27167,39 +27829,96 @@ var init_stream_handler = __esm({
|
|
|
27167
27829
|
init_mcp_client();
|
|
27168
27830
|
init_logger();
|
|
27169
27831
|
init_server();
|
|
27832
|
+
init_consult();
|
|
27170
27833
|
init_sentence_chunker();
|
|
27171
27834
|
init_pipeline_hooks();
|
|
27172
27835
|
init_event_bus();
|
|
27173
27836
|
init_tracing();
|
|
27837
|
+
DEFAULT_TOOL_CALL_PREAMBLE_BLOCK = `# Preambles
|
|
27838
|
+
|
|
27839
|
+
Use short preambles only when they help the user understand that work is happening. A preamble is one short spoken update describing the action you are about to take \u2014 not hidden reasoning, and never a claim about the result.
|
|
27840
|
+
|
|
27841
|
+
## When to use a preamble
|
|
27842
|
+
Use a preamble when:
|
|
27843
|
+
- you are about to call a tool that may take noticeable time;
|
|
27844
|
+
- you need to reason through a multi-step request;
|
|
27845
|
+
- you are checking records, availability, account state, or policy details;
|
|
27846
|
+
- you are preparing an escalation or handoff;
|
|
27847
|
+
- silence would make the assistant feel unresponsive.
|
|
27848
|
+
|
|
27849
|
+
When a preamble is needed, output it immediately before the reasoning or tool call.
|
|
27850
|
+
|
|
27851
|
+
## When to NOT use a preamble
|
|
27852
|
+
Do not use a preamble when:
|
|
27853
|
+
- the answer is direct and can be given immediately;
|
|
27854
|
+
- the user is only confirming, correcting, or declining something;
|
|
27855
|
+
- the audio is unclear and you need clarification instead;
|
|
27856
|
+
- the tool call is lightweight and the user would not benefit from an update.
|
|
27857
|
+
|
|
27858
|
+
## Style
|
|
27859
|
+
- Keep it to one short sentence (two only before a high-impact action).
|
|
27860
|
+
- Vary the wording across turns; do not reuse the same opener.
|
|
27861
|
+
- Describe the action, not the internal reasoning.
|
|
27862
|
+
- Never imply success or failure before the tool returns.
|
|
27863
|
+
|
|
27864
|
+
Prefer:
|
|
27865
|
+
- "I'll check that order now."
|
|
27866
|
+
- "I'll look up your appointment details."
|
|
27867
|
+
- "I'll verify that before we make any changes."
|
|
27868
|
+
- "I'll check the policy and then give you the next step."
|
|
27869
|
+
- "I'll pull that up so we can make sure it's the right account."
|
|
27870
|
+
|
|
27871
|
+
Avoid:
|
|
27872
|
+
- "Let me think about that for a second."
|
|
27873
|
+
- "Please wait while I process your request."
|
|
27874
|
+
- "I'm going to use my tools now."
|
|
27875
|
+
- "Hmm..." / "One moment while I process that..."`;
|
|
27174
27876
|
HALLUCINATIONS = /* @__PURE__ */ new Set([
|
|
27175
|
-
|
|
27176
|
-
|
|
27177
|
-
|
|
27178
|
-
|
|
27179
|
-
|
|
27180
|
-
|
|
27181
|
-
|
|
27182
|
-
|
|
27183
|
-
|
|
27184
|
-
|
|
27185
|
-
|
|
27186
|
-
|
|
27187
|
-
|
|
27188
|
-
|
|
27189
|
-
|
|
27190
|
-
"cool",
|
|
27191
|
-
// Whisper YouTube-caption hallucinations
|
|
27877
|
+
// Issue #154: the hallucination filter is now DISPLAY-ONLY — it no longer
|
|
27878
|
+
// gates response creation (the server drives the response on
|
|
27879
|
+
// ``input_audio_buffer.committed`` by default). Dropping a phrase here
|
|
27880
|
+
// therefore deletes the user's transcript line (recordSttComplete never
|
|
27881
|
+
// fires → empty user_text → dashboard skips the user line). So this set is
|
|
27882
|
+
// restricted to genuine NON-SPEECH artefacts that Whisper emits on
|
|
27883
|
+
// silence / TTS echo, NOT real conversational words. Standalone words like
|
|
27884
|
+
// 'yes', 'no', 'okay', 'right', 'you', 'thanks' were REMOVED — they are
|
|
27885
|
+
// legitimate user replies and must reach the transcript. Parity with
|
|
27886
|
+
// Python ``_STT_HALLUCINATIONS``.
|
|
27887
|
+
//
|
|
27888
|
+
// Whisper caption / training-set hallucinations. Whisper was trained heavily
|
|
27889
|
+
// on captioned video, so on silence / PSTN echo it falls back to the most
|
|
27890
|
+
// common caption credits + sign-offs. Curated from widely-reported
|
|
27891
|
+
// Whisper-on-silence outputs across the open-source ASR community.
|
|
27192
27892
|
"thank you for watching",
|
|
27193
27893
|
"thanks for watching",
|
|
27194
27894
|
"thank you for watching!",
|
|
27195
27895
|
"thanks for watching!",
|
|
27196
27896
|
"thank you so much for watching",
|
|
27897
|
+
"thank you for watching please subscribe",
|
|
27898
|
+
"thanks for watching please subscribe",
|
|
27197
27899
|
"thanks for listening",
|
|
27900
|
+
"we'll see you next time",
|
|
27901
|
+
"see you next time",
|
|
27902
|
+
"bye bye",
|
|
27198
27903
|
"please subscribe",
|
|
27904
|
+
"please subscribe to my channel",
|
|
27905
|
+
"don't forget to subscribe",
|
|
27906
|
+
"like and subscribe",
|
|
27199
27907
|
"subscribe",
|
|
27908
|
+
"subtitles by the amara.org community",
|
|
27909
|
+
"subtitles by the amara org community",
|
|
27910
|
+
"subtitles by",
|
|
27911
|
+
"transcribed by",
|
|
27912
|
+
"transcription by castingwords",
|
|
27913
|
+
"the end",
|
|
27914
|
+
// Music / sound markers.
|
|
27200
27915
|
"music",
|
|
27201
27916
|
"[music]",
|
|
27917
|
+
"piano music",
|
|
27918
|
+
"applause",
|
|
27919
|
+
"[applause]",
|
|
27202
27920
|
"\u266A",
|
|
27921
|
+
// Silence markers.
|
|
27203
27922
|
"[no audio]",
|
|
27204
27923
|
"[silence]",
|
|
27205
27924
|
"[blank_audio]",
|
|
@@ -27503,7 +28222,14 @@ var init_stream_handler = __esm({
|
|
|
27503
28222
|
* barge-in armed during the audible tail. Tunable via env.
|
|
27504
28223
|
*/
|
|
27505
28224
|
endSpeakingWithGrace() {
|
|
27506
|
-
const
|
|
28225
|
+
const rawGrace = process.env.PATTER_TTS_TAIL_GRACE_MS;
|
|
28226
|
+
const parsedGrace = rawGrace !== void 0 ? Number(rawGrace) : NaN;
|
|
28227
|
+
const grace = rawGrace !== void 0 && Number.isFinite(parsedGrace) ? parsedGrace : 1500;
|
|
28228
|
+
if (rawGrace !== void 0 && !Number.isFinite(parsedGrace)) {
|
|
28229
|
+
getLogger().warn(
|
|
28230
|
+
`PATTER_TTS_TAIL_GRACE_MS="${rawGrace}" is not a valid number \u2014 using default 1500ms`
|
|
28231
|
+
);
|
|
28232
|
+
}
|
|
27507
28233
|
if (grace > 0) {
|
|
27508
28234
|
const gen = this.speakingGeneration;
|
|
27509
28235
|
this.clearGraceTimer();
|
|
@@ -27597,6 +28323,14 @@ var init_stream_handler = __esm({
|
|
|
27597
28323
|
`[DIAG] Flushed ${replayed} pre-barge-in frame(s) (~${replayed * 20} ms) to STT`
|
|
27598
28324
|
);
|
|
27599
28325
|
}
|
|
28326
|
+
/**
|
|
28327
|
+
* Per-call resolved tool list. Starts as ``null`` (falls back to
|
|
28328
|
+
* ``deps.agent.tools``). Populated by ``initMcpTools`` when MCP servers
|
|
28329
|
+
* are configured so discovered tools are merged in without mutating the
|
|
28330
|
+
* shared ``AgentOptions`` object. Code that needs the effective tool list
|
|
28331
|
+
* should read ``this.resolvedTools ?? this.deps.agent.tools``.
|
|
28332
|
+
*/
|
|
28333
|
+
resolvedTools = null;
|
|
27600
28334
|
llmLoop = null;
|
|
27601
28335
|
/**
|
|
27602
28336
|
* Per-call tool executor — provides retry-with-exponential-backoff and a
|
|
@@ -27640,6 +28374,17 @@ var init_stream_handler = __esm({
|
|
|
27640
28374
|
userTranscriptPending = false;
|
|
27641
28375
|
pendingAssistantTurn = null;
|
|
27642
28376
|
pendingAssistantTimer = null;
|
|
28377
|
+
/**
|
|
28378
|
+
* Reserved monotonic turn index for the in-flight Realtime turn (issue
|
|
28379
|
+
* #154, fix 5/6). Reserved in ``onAdapterSpeechStopped`` via
|
|
28380
|
+
* ``metricsAcc.reserveTurnIndex()`` the moment the turn OPENS, then threaded
|
|
28381
|
+
* through to the live per-line transcript events (``recordTranscriptLine``)
|
|
28382
|
+
* and into ``recordTurnComplete`` / ``recordTurnInterrupted`` so the
|
|
28383
|
+
* dashboard can sort a late-arriving user line ABOVE its agent line by
|
|
28384
|
+
* ``(turnIndex, role)``. ``null`` until the first turn opens. Parity with
|
|
28385
|
+
* Python ``_current_turn_index``.
|
|
28386
|
+
*/
|
|
28387
|
+
currentTurnIndex = null;
|
|
27643
28388
|
/**
|
|
27644
28389
|
* Hard cap on how long we wait for the user transcript before flushing
|
|
27645
28390
|
* the buffered assistant turn alone. 3 s covers OpenAI Whisper's typical
|
|
@@ -27721,6 +28466,23 @@ var init_stream_handler = __esm({
|
|
|
27721
28466
|
* streaming/regular LLM, WebSocket remote, Realtime response_done) so the
|
|
27722
28467
|
* payload shape lives in one place.
|
|
27723
28468
|
*/
|
|
28469
|
+
/**
|
|
28470
|
+
* Emit a live per-line transcript event to the dashboard store (issue #154,
|
|
28471
|
+
* fix 5). Routed through a single helper so the call shape lives in one
|
|
28472
|
+
* place. ``recordTranscriptLine`` appends the line to the active call's
|
|
28473
|
+
* transcript and publishes a ``transcript_line`` SSE event; the dashboard
|
|
28474
|
+
* sorts by (turnIndex, user<assistant) so a late user line lands above its
|
|
28475
|
+
* agent line. No-op when no turn index has been reserved yet.
|
|
28476
|
+
*/
|
|
28477
|
+
emitTranscriptLine(role, text) {
|
|
28478
|
+
if (this.currentTurnIndex === null) return;
|
|
28479
|
+
this.deps.metricsStore.recordTranscriptLine({
|
|
28480
|
+
call_id: this.callId,
|
|
28481
|
+
turnIndex: this.currentTurnIndex,
|
|
28482
|
+
role,
|
|
28483
|
+
text
|
|
28484
|
+
});
|
|
28485
|
+
}
|
|
27724
28486
|
async emitTurnMetrics(turn) {
|
|
27725
28487
|
if (turn == null) return;
|
|
27726
28488
|
this.deps.metricsStore.recordTurn({ call_id: this.callId, turn });
|
|
@@ -27827,7 +28589,7 @@ var init_stream_handler = __esm({
|
|
|
27827
28589
|
if (customParams.callee && !this.callee) this.callee = customParams.callee;
|
|
27828
28590
|
const mode = this.deps.agent.engine ? `engine=${this.deps.agent.engine.kind ?? "unknown"}` : "pipeline";
|
|
27829
28591
|
getLogger().info(
|
|
27830
|
-
`Call started: ${callId} (${this.deps.bridge.label}, ${mode}, ${
|
|
28592
|
+
`Call started: ${callId} (${this.deps.bridge.label}, ${mode}, ${maskPhoneNumber(this.caller || "?")} \u2192 ${maskPhoneNumber(this.callee || "?")})`
|
|
27831
28593
|
);
|
|
27832
28594
|
if (Object.keys(customParams).length > 0) {
|
|
27833
28595
|
getLogger().debug(`Custom params: ${sanitizeLogValue(JSON.stringify(customParams))}`);
|
|
@@ -27872,10 +28634,13 @@ var init_stream_handler = __esm({
|
|
|
27872
28634
|
const resolvedPrompt = Object.keys(allVars).length > 0 ? this.deps.resolveVariables(this.deps.agent.systemPrompt, allVars) : this.deps.agent.systemPrompt;
|
|
27873
28635
|
const provider2 = this.deps.agent.provider ?? "openai_realtime";
|
|
27874
28636
|
await this.initMcpTools();
|
|
28637
|
+
this.injectConsultTool();
|
|
27875
28638
|
if (provider2 === "pipeline") {
|
|
27876
28639
|
await this.initPipeline(resolvedPrompt);
|
|
27877
28640
|
} else {
|
|
27878
|
-
await this.initRealtimeAdapter(
|
|
28641
|
+
await this.initRealtimeAdapter(
|
|
28642
|
+
applyToolCallPreambles(resolvedPrompt, this.deps.agent.toolCallPreambles)
|
|
28643
|
+
);
|
|
27879
28644
|
}
|
|
27880
28645
|
}
|
|
27881
28646
|
/**
|
|
@@ -27900,10 +28665,25 @@ var init_stream_handler = __esm({
|
|
|
27900
28665
|
}
|
|
27901
28666
|
if (discovered.length === 0) return;
|
|
27902
28667
|
MCPManager.assertNoConflicts(this.deps.agent.tools, discovered);
|
|
27903
|
-
|
|
27904
|
-
mutableAgent.tools = [...mutableAgent.tools ?? [], ...discovered];
|
|
28668
|
+
this.resolvedTools = [...this.deps.agent.tools ?? [], ...discovered];
|
|
27905
28669
|
getLogger().info(`MCP: merged ${discovered.length} tool(s) into agent`);
|
|
27906
28670
|
}
|
|
28671
|
+
/**
|
|
28672
|
+
* Merge the built-in ``consult`` tool into the per-call tool list when
|
|
28673
|
+
* ``agent.consult`` is set, mirroring {@link initMcpTools}: the shared
|
|
28674
|
+
* ``deps.agent`` is NOT mutated; the merged list is stored on
|
|
28675
|
+
* ``this.resolvedTools`` so ``buildAIAdapter`` (Realtime) and the pipeline
|
|
28676
|
+
* ``LLMLoop`` both see it. Idempotent — a no-op if a tool with the same name
|
|
28677
|
+
* is already present.
|
|
28678
|
+
*/
|
|
28679
|
+
injectConsultTool() {
|
|
28680
|
+
const consult = this.deps.agent.consult;
|
|
28681
|
+
if (!consult) return;
|
|
28682
|
+
const consultTool = buildConsultTool(consult);
|
|
28683
|
+
const base = this.resolvedTools ?? (this.deps.agent.tools ?? []);
|
|
28684
|
+
if (base.some((t) => t.name === consultTool.name)) return;
|
|
28685
|
+
this.resolvedTools = [...base, consultTool];
|
|
28686
|
+
}
|
|
27907
28687
|
/** Set the stream SID (Twilio only, called after parsing 'start' event). */
|
|
27908
28688
|
/** Set the carrier-side stream id (Twilio `streamSid` / Telnyx stream identifier). */
|
|
27909
28689
|
setStreamSid(sid) {
|
|
@@ -27923,8 +28703,12 @@ var init_stream_handler = __esm({
|
|
|
27923
28703
|
if (activeVad && !this.vadDisabled) {
|
|
27924
28704
|
try {
|
|
27925
28705
|
const vadPromise = activeVad.processFrame(pcm16k, 16e3);
|
|
27926
|
-
|
|
28706
|
+
let vadTimeoutId;
|
|
28707
|
+
const timeoutPromise = new Promise((resolve2) => {
|
|
28708
|
+
vadTimeoutId = setTimeout(() => resolve2(null), 25);
|
|
28709
|
+
});
|
|
27927
28710
|
const evt = await Promise.race([vadPromise, timeoutPromise]);
|
|
28711
|
+
clearTimeout(vadTimeoutId);
|
|
27928
28712
|
if (evt) {
|
|
27929
28713
|
getLogger().info(
|
|
27930
28714
|
`[VAD] ${evt.type} agentSpeaking=${this.isSpeaking}`
|
|
@@ -27997,7 +28781,7 @@ var init_stream_handler = __esm({
|
|
|
27997
28781
|
if ((this.deps.agent.bargeInThresholdMs ?? 300) === 0) return;
|
|
27998
28782
|
}
|
|
27999
28783
|
const hooks = this.deps.agent.hooks;
|
|
28000
|
-
if (hooks) {
|
|
28784
|
+
if (hooks?.beforeSendToStt) {
|
|
28001
28785
|
const hookExecutor = new PipelineHookExecutor(hooks);
|
|
28002
28786
|
const hookCtx = this.buildHookContext();
|
|
28003
28787
|
const processed = await hookExecutor.runBeforeSendToStt(pcm16k, hookCtx);
|
|
@@ -28423,7 +29207,7 @@ var init_stream_handler = __esm({
|
|
|
28423
29207
|
}
|
|
28424
29208
|
const providerModel = this.deps.agent.llm?.model ?? "";
|
|
28425
29209
|
const augmentedTools = augmentWithBuiltinHandoffTools(
|
|
28426
|
-
this.deps.agent.tools,
|
|
29210
|
+
this.resolvedTools ?? this.deps.agent.tools,
|
|
28427
29211
|
{
|
|
28428
29212
|
transferCall: (number4) => this.deps.bridge.transferCall(this.callId, number4),
|
|
28429
29213
|
endCall: () => this.deps.bridge.endCall(this.callId, this.ws)
|
|
@@ -28447,7 +29231,7 @@ var init_stream_handler = __esm({
|
|
|
28447
29231
|
let llmModel = this.deps.agent.model || "gpt-4o-mini";
|
|
28448
29232
|
if (llmModel.includes("realtime")) llmModel = "gpt-4o-mini";
|
|
28449
29233
|
const augmentedTools = augmentWithBuiltinHandoffTools(
|
|
28450
|
-
this.deps.agent.tools,
|
|
29234
|
+
this.resolvedTools ?? this.deps.agent.tools,
|
|
28451
29235
|
{
|
|
28452
29236
|
transferCall: (number4) => this.deps.bridge.transferCall(this.callId, number4),
|
|
28453
29237
|
endCall: () => this.deps.bridge.endCall(this.callId, this.ws)
|
|
@@ -28971,7 +29755,7 @@ var init_stream_handler = __esm({
|
|
|
28971
29755
|
// ---------------------------------------------------------------------------
|
|
28972
29756
|
async initRealtimeAdapter(resolvedPrompt) {
|
|
28973
29757
|
const label = this.deps.bridge.label;
|
|
28974
|
-
this.adapter = this.deps.buildAIAdapter(resolvedPrompt);
|
|
29758
|
+
this.adapter = this.deps.buildAIAdapter(resolvedPrompt, this.resolvedTools ?? void 0);
|
|
28975
29759
|
let parked;
|
|
28976
29760
|
if (typeof this.deps.popPrewarmedConnections === "function") {
|
|
28977
29761
|
try {
|
|
@@ -29044,6 +29828,7 @@ var init_stream_handler = __esm({
|
|
|
29044
29828
|
response_done: async (eventData) => this.onAdapterResponseDone(eventData),
|
|
29045
29829
|
speech_started: async () => this.onAdapterSpeechInterrupt(),
|
|
29046
29830
|
interruption: async () => this.onAdapterSpeechInterrupt(),
|
|
29831
|
+
error: async (eventData) => this.onAdapterError(eventData),
|
|
29047
29832
|
function_call: async (eventData) => {
|
|
29048
29833
|
if (this.adapter instanceof OpenAIRealtimeAdapter) {
|
|
29049
29834
|
await this.handleFunctionCall(eventData);
|
|
@@ -29130,21 +29915,31 @@ var init_stream_handler = __esm({
|
|
|
29130
29915
|
if (!this.metricsAcc.turnActive) this.metricsAcc.startTurn();
|
|
29131
29916
|
this.currentAgentText = "";
|
|
29132
29917
|
this.responseAudioStarted = false;
|
|
29918
|
+
this.currentTurnIndex = this.metricsAcc.reserveTurnIndex();
|
|
29133
29919
|
this.userTranscriptPending = true;
|
|
29134
29920
|
await this.emitUserSpeechEnded();
|
|
29135
29921
|
}
|
|
29136
29922
|
async onAdapterTranscriptInput(inputText) {
|
|
29137
|
-
|
|
29138
|
-
if (HALLUCINATIONS.has(stripped) || stripped === "") {
|
|
29923
|
+
if (isSttHallucination(inputText)) {
|
|
29139
29924
|
getLogger().debug(
|
|
29140
29925
|
`Realtime transcript_input dropped (likely Whisper hallucination on silence/echo): ${sanitizeLogValue(inputText.slice(0, 60))}`
|
|
29141
29926
|
);
|
|
29142
29927
|
this.userTranscriptPending = false;
|
|
29928
|
+
if (this.pendingAssistantTurn !== null) {
|
|
29929
|
+
const buffered = this.pendingAssistantTurn;
|
|
29930
|
+
this.pendingAssistantTurn = null;
|
|
29931
|
+
if (this.pendingAssistantTimer) {
|
|
29932
|
+
clearTimeout(this.pendingAssistantTimer);
|
|
29933
|
+
this.pendingAssistantTimer = null;
|
|
29934
|
+
}
|
|
29935
|
+
await this.flushAssistantTurn(buffered);
|
|
29936
|
+
}
|
|
29143
29937
|
return;
|
|
29144
29938
|
}
|
|
29145
29939
|
getLogger().debug(`User (${this.deps.bridge.label}): ${sanitizeLogValue(inputText)}`);
|
|
29146
29940
|
this.history.push({ role: "user", text: inputText, timestamp: Date.now() });
|
|
29147
|
-
|
|
29941
|
+
this.emitTranscriptLine("user", inputText);
|
|
29942
|
+
if (this.adapter instanceof OpenAIRealtimeAdapter && this.adapter.getGateResponseOnTranscript()) {
|
|
29148
29943
|
void this.adapter.requestResponse().catch(
|
|
29149
29944
|
(err) => getLogger().debug(`Realtime requestResponse failed: ${String(err)}`)
|
|
29150
29945
|
);
|
|
@@ -29191,8 +29986,12 @@ var init_stream_handler = __esm({
|
|
|
29191
29986
|
history: [...this.history.entries]
|
|
29192
29987
|
});
|
|
29193
29988
|
}
|
|
29989
|
+
const reservedIndex = this.currentTurnIndex;
|
|
29990
|
+
this.emitTranscriptLine("assistant", text);
|
|
29194
29991
|
this.responseAudioStarted = false;
|
|
29195
|
-
await this.emitTurnMetrics(
|
|
29992
|
+
await this.emitTurnMetrics(
|
|
29993
|
+
this.metricsAcc.recordTurnComplete(text, reservedIndex ?? void 0)
|
|
29994
|
+
);
|
|
29196
29995
|
}
|
|
29197
29996
|
/**
|
|
29198
29997
|
* Push an assistant turn into history and fire `onTranscript` so host
|
|
@@ -29291,7 +30090,9 @@ var init_stream_handler = __esm({
|
|
|
29291
30090
|
this.pendingAssistantTimer = null;
|
|
29292
30091
|
this.userTranscriptPending = false;
|
|
29293
30092
|
if (buffered !== null) {
|
|
29294
|
-
|
|
30093
|
+
this.flushAssistantTurn(buffered).catch(
|
|
30094
|
+
(err) => getLogger().error("flushAssistantTurn (fallback timer) failed:", err)
|
|
30095
|
+
);
|
|
29295
30096
|
}
|
|
29296
30097
|
}, _StreamHandler.REALTIME_USER_TRANSCRIPT_WAIT_MS);
|
|
29297
30098
|
this.responseAudioStarted = false;
|
|
@@ -29300,7 +30101,9 @@ var init_stream_handler = __esm({
|
|
|
29300
30101
|
await this.flushAssistantTurn(text);
|
|
29301
30102
|
}
|
|
29302
30103
|
async onAdapterSpeechInterrupt() {
|
|
29303
|
-
|
|
30104
|
+
const isEngine = this.adapter instanceof OpenAIRealtimeAdapter;
|
|
30105
|
+
const clientManaged = isEngine && this.adapter.getGateResponseOnTranscript();
|
|
30106
|
+
if (clientManaged) {
|
|
29304
30107
|
const startedAt = this.adapter.currentResponseFirstAudioAt;
|
|
29305
30108
|
if (startedAt !== null) {
|
|
29306
30109
|
const elapsedMs = Date.now() - startedAt;
|
|
@@ -29313,12 +30116,20 @@ var init_stream_handler = __esm({
|
|
|
29313
30116
|
}
|
|
29314
30117
|
}
|
|
29315
30118
|
this.deps.bridge.sendClear(this.ws, this.streamSid);
|
|
29316
|
-
if (
|
|
30119
|
+
if (clientManaged) {
|
|
30120
|
+
this.metricsAcc.recordBargeinDetected();
|
|
30121
|
+
this.adapter.cancelResponse();
|
|
30122
|
+
} else if (isEngine) {
|
|
30123
|
+
this.adapter.truncate();
|
|
30124
|
+
}
|
|
29317
30125
|
this.metricsAcc.recordTurnInterrupted();
|
|
29318
30126
|
if (this.responseAudioStarted) {
|
|
29319
30127
|
await this.emitAgentSpeechEnded(true);
|
|
29320
30128
|
}
|
|
29321
30129
|
await this.emitUserSpeechStarted();
|
|
30130
|
+
if (clientManaged) {
|
|
30131
|
+
this.metricsAcc.anchorUserSpeechStart();
|
|
30132
|
+
}
|
|
29322
30133
|
this.currentAgentText = "";
|
|
29323
30134
|
this.responseAudioStarted = false;
|
|
29324
30135
|
this.pendingAssistantTurn = null;
|
|
@@ -29328,6 +30139,28 @@ var init_stream_handler = __esm({
|
|
|
29328
30139
|
}
|
|
29329
30140
|
this.userTranscriptPending = false;
|
|
29330
30141
|
}
|
|
30142
|
+
/**
|
|
30143
|
+
* Handle a Realtime ``error`` event (issue #154, fix 4).
|
|
30144
|
+
*
|
|
30145
|
+
* Both Realtime providers dispatch ``('error', …)`` for server-side errors,
|
|
30146
|
+
* non-normal socket closes, and socket errors, but the stream handler
|
|
30147
|
+
* previously had no entry for it in the dispatch table so these were
|
|
30148
|
+
* silently swallowed. We surface them at WARN level with ONLY the error
|
|
30149
|
+
* envelope fields (``type`` / ``code`` / ``message``) — never any audio or
|
|
30150
|
+
* transcript body, to avoid logging PII. The call is NOT terminated: the
|
|
30151
|
+
* provider decides whether to recover, and many of these (e.g. a transient
|
|
30152
|
+
* ``input_audio_buffer_commit_empty``) are non-fatal. Parity with the
|
|
30153
|
+
* Python ``elif ev_type == 'error'`` branches.
|
|
30154
|
+
*/
|
|
30155
|
+
async onAdapterError(eventData) {
|
|
30156
|
+
const err = eventData ?? {};
|
|
30157
|
+
const type = typeof err.type === "string" ? err.type : "unknown";
|
|
30158
|
+
const code = typeof err.code === "string" ? err.code : "";
|
|
30159
|
+
const message = typeof err.message === "string" ? err.message : "";
|
|
30160
|
+
getLogger().warn(
|
|
30161
|
+
`Realtime error (${this.deps.bridge.label}) type=${type} code=${code} message=${sanitizeLogValue(message)}`
|
|
30162
|
+
);
|
|
30163
|
+
}
|
|
29331
30164
|
/**
|
|
29332
30165
|
* Emit a tool-invocation event into the transcript timeline. Pushes a
|
|
29333
30166
|
* `role=tool` entry into `history` (so it appears in the dashboard
|
|
@@ -29395,7 +30228,8 @@ var init_stream_handler = __esm({
|
|
|
29395
30228
|
}
|
|
29396
30229
|
return;
|
|
29397
30230
|
}
|
|
29398
|
-
const
|
|
30231
|
+
const effectiveTools = this.resolvedTools ?? this.deps.agent.tools;
|
|
30232
|
+
const toolDef = effectiveTools?.find((t) => t.name === fc.name);
|
|
29399
30233
|
if (!toolDef) {
|
|
29400
30234
|
getLogger().warn(`Realtime tool '${fc.name}' not found in agent.tools \u2014 skipping`);
|
|
29401
30235
|
const result2 = JSON.stringify({ error: `Tool '${fc.name}' not registered`, fallback: true });
|
|
@@ -29418,7 +30252,8 @@ var init_stream_handler = __esm({
|
|
|
29418
30252
|
if (msg && this.adapter instanceof OpenAIRealtimeAdapter) {
|
|
29419
30253
|
const realtimeAdapter = this.adapter;
|
|
29420
30254
|
reassuranceTimer = setTimeout(() => {
|
|
29421
|
-
realtimeAdapter.
|
|
30255
|
+
const fire = typeof realtimeAdapter.sendReassurance === "function" ? realtimeAdapter.sendReassurance(msg) : realtimeAdapter.sendText(msg);
|
|
30256
|
+
fire.catch((e) => {
|
|
29422
30257
|
getLogger().warn(`Reassurance message failed for tool '${fc.name}': ${String(e)}`);
|
|
29423
30258
|
});
|
|
29424
30259
|
}, afterMs);
|
|
@@ -29438,7 +30273,8 @@ var init_stream_handler = __esm({
|
|
|
29438
30273
|
parsedArgs,
|
|
29439
30274
|
{
|
|
29440
30275
|
call_id: this.callId,
|
|
29441
|
-
caller: this.caller
|
|
30276
|
+
caller: this.caller,
|
|
30277
|
+
callee: this.callee
|
|
29442
30278
|
},
|
|
29443
30279
|
onProgress
|
|
29444
30280
|
);
|
|
@@ -29588,21 +30424,21 @@ async function appendJsonl(filePath, record2) {
|
|
|
29588
30424
|
await import_node_fs2.promises.mkdir(path4.dirname(filePath), { recursive: true });
|
|
29589
30425
|
await import_node_fs2.promises.appendFile(filePath, JSON.stringify(record2) + "\n", { encoding: "utf8" });
|
|
29590
30426
|
}
|
|
29591
|
-
function
|
|
30427
|
+
async function rmTreeAsync(target) {
|
|
29592
30428
|
try {
|
|
29593
|
-
for (const child of
|
|
30429
|
+
for (const child of await import_node_fs2.promises.readdir(target)) {
|
|
29594
30430
|
const childPath = path4.join(target, child);
|
|
29595
|
-
const stat =
|
|
30431
|
+
const stat = await import_node_fs2.promises.lstat(childPath);
|
|
29596
30432
|
if (stat.isDirectory()) {
|
|
29597
|
-
|
|
30433
|
+
await rmTreeAsync(childPath);
|
|
29598
30434
|
} else {
|
|
29599
30435
|
try {
|
|
29600
|
-
|
|
30436
|
+
await import_node_fs2.promises.unlink(childPath);
|
|
29601
30437
|
} catch {
|
|
29602
30438
|
}
|
|
29603
30439
|
}
|
|
29604
30440
|
}
|
|
29605
|
-
|
|
30441
|
+
await import_node_fs2.promises.rmdir(target);
|
|
29606
30442
|
} catch {
|
|
29607
30443
|
}
|
|
29608
30444
|
}
|
|
@@ -29684,7 +30520,9 @@ var init_call_log = __esm({
|
|
|
29684
30520
|
getLogger().warn(`call_log write failed (${sanitizeLogValue(callId)}): ${sanitizeLogValue(String(err))}`);
|
|
29685
30521
|
}
|
|
29686
30522
|
if (crypto5.randomBytes(1)[0] < 5) {
|
|
29687
|
-
this.sweepOldDays()
|
|
30523
|
+
void this.sweepOldDays().catch(
|
|
30524
|
+
(e) => getLogger().debug(`call_log sweep failed: ${sanitizeLogValue(String(e))}`)
|
|
30525
|
+
);
|
|
29688
30526
|
}
|
|
29689
30527
|
}
|
|
29690
30528
|
/** Append a single turn record to the call's `transcript.jsonl`. */
|
|
@@ -29759,23 +30597,27 @@ var init_call_log = __esm({
|
|
|
29759
30597
|
}
|
|
29760
30598
|
}
|
|
29761
30599
|
// --- Retention ---------------------------------------------------------
|
|
29762
|
-
sweepOldDays() {
|
|
30600
|
+
async sweepOldDays() {
|
|
29763
30601
|
if (this.root === null) return;
|
|
29764
30602
|
const days = retentionDays();
|
|
29765
30603
|
if (days === 0) return;
|
|
29766
30604
|
const cutoff = Date.now() / 1e3 - days * 86400;
|
|
29767
30605
|
const callsRoot = path4.join(this.root, "calls");
|
|
29768
|
-
if (!fs4.existsSync(callsRoot)) return;
|
|
29769
30606
|
try {
|
|
29770
|
-
|
|
30607
|
+
await import_node_fs2.promises.access(callsRoot);
|
|
30608
|
+
} catch {
|
|
30609
|
+
return;
|
|
30610
|
+
}
|
|
30611
|
+
try {
|
|
30612
|
+
for (const yearName of await import_node_fs2.promises.readdir(callsRoot)) {
|
|
29771
30613
|
if (!/^\d+$/.test(yearName)) continue;
|
|
29772
30614
|
const yearDir = path4.join(callsRoot, yearName);
|
|
29773
|
-
if (!
|
|
29774
|
-
for (const monthName of
|
|
30615
|
+
if (!(await import_node_fs2.promises.stat(yearDir)).isDirectory()) continue;
|
|
30616
|
+
for (const monthName of await import_node_fs2.promises.readdir(yearDir)) {
|
|
29775
30617
|
if (!/^\d+$/.test(monthName)) continue;
|
|
29776
30618
|
const monthDir = path4.join(yearDir, monthName);
|
|
29777
|
-
if (!
|
|
29778
|
-
for (const dayName of
|
|
30619
|
+
if (!(await import_node_fs2.promises.stat(monthDir)).isDirectory()) continue;
|
|
30620
|
+
for (const dayName of await import_node_fs2.promises.readdir(monthDir)) {
|
|
29779
30621
|
if (!/^\d+$/.test(dayName)) continue;
|
|
29780
30622
|
const dayDir = path4.join(monthDir, dayName);
|
|
29781
30623
|
const y = Number.parseInt(yearName, 10);
|
|
@@ -29783,16 +30625,16 @@ var init_call_log = __esm({
|
|
|
29783
30625
|
const d = Number.parseInt(dayName, 10);
|
|
29784
30626
|
const ts = Date.UTC(y, m - 1, d) / 1e3;
|
|
29785
30627
|
if (ts < cutoff) {
|
|
29786
|
-
|
|
30628
|
+
await rmTreeAsync(dayDir);
|
|
29787
30629
|
}
|
|
29788
30630
|
}
|
|
29789
30631
|
try {
|
|
29790
|
-
if (
|
|
30632
|
+
if ((await import_node_fs2.promises.readdir(monthDir)).length === 0) await import_node_fs2.promises.rmdir(monthDir);
|
|
29791
30633
|
} catch {
|
|
29792
30634
|
}
|
|
29793
30635
|
}
|
|
29794
30636
|
try {
|
|
29795
|
-
if (
|
|
30637
|
+
if ((await import_node_fs2.promises.readdir(yearDir)).length === 0) await import_node_fs2.promises.rmdir(yearDir);
|
|
29796
30638
|
} catch {
|
|
29797
30639
|
}
|
|
29798
30640
|
}
|
|
@@ -29833,13 +30675,16 @@ function telnyxHangupOutcome(cause) {
|
|
|
29833
30675
|
if (c === "call_rejected" || c === "rejected" || c === "destination_out_of_order") return "failed";
|
|
29834
30676
|
return null;
|
|
29835
30677
|
}
|
|
29836
|
-
function validateWebhookUrl(url2) {
|
|
30678
|
+
function validateWebhookUrl(url2, allowLoopback = false) {
|
|
29837
30679
|
const parsed = new URL(url2);
|
|
29838
30680
|
if (!["http:", "https:"].includes(parsed.protocol)) {
|
|
29839
30681
|
throw new Error(`Invalid webhook URL scheme: ${parsed.protocol}`);
|
|
29840
30682
|
}
|
|
29841
30683
|
const rawHost = parsed.hostname;
|
|
29842
30684
|
const host = rawHost.replace(/^\[/, "").replace(/\]$/, "").toLowerCase();
|
|
30685
|
+
if (allowLoopback) {
|
|
30686
|
+
return;
|
|
30687
|
+
}
|
|
29843
30688
|
const BLOCKED_HOSTNAMES = /* @__PURE__ */ new Set([
|
|
29844
30689
|
"localhost",
|
|
29845
30690
|
"ip6-localhost",
|
|
@@ -29881,6 +30726,34 @@ function validateWebhookUrl(url2) {
|
|
|
29881
30726
|
}
|
|
29882
30727
|
}
|
|
29883
30728
|
}
|
|
30729
|
+
function extractHost(value) {
|
|
30730
|
+
const trimmed = value.trim();
|
|
30731
|
+
if (!trimmed) return "";
|
|
30732
|
+
let host = trimmed.replace(/^[a-z]+:\/\//i, "").replace(/\/.*$/, "");
|
|
30733
|
+
if (host.startsWith("[")) {
|
|
30734
|
+
return host.slice(1).split("]", 1)[0].toLowerCase();
|
|
30735
|
+
}
|
|
30736
|
+
if (!host.includes("::")) {
|
|
30737
|
+
const lastColon = host.lastIndexOf(":");
|
|
30738
|
+
if (lastColon !== -1 && /^\d+$/.test(host.slice(lastColon + 1))) {
|
|
30739
|
+
host = host.slice(0, lastColon);
|
|
30740
|
+
}
|
|
30741
|
+
}
|
|
30742
|
+
return host.toLowerCase();
|
|
30743
|
+
}
|
|
30744
|
+
function isLoopbackHost(value) {
|
|
30745
|
+
const host = extractHost(value);
|
|
30746
|
+
if (!host) return false;
|
|
30747
|
+
if (host === "localhost" || host === "ip6-localhost" || host === "ip6-loopback") {
|
|
30748
|
+
return true;
|
|
30749
|
+
}
|
|
30750
|
+
if (host === "::1" || host === "::ffff:127.0.0.1") return true;
|
|
30751
|
+
const v4 = /^(\d{1,3})\.(\d{1,3})\.(\d{1,3})\.(\d{1,3})$/.exec(host);
|
|
30752
|
+
if (v4) {
|
|
30753
|
+
return parseInt(v4[1], 10) === 127;
|
|
30754
|
+
}
|
|
30755
|
+
return false;
|
|
30756
|
+
}
|
|
29884
30757
|
function validateTelnyxSignature(rawBody, signature, timestamp, publicKey, toleranceSec = 300) {
|
|
29885
30758
|
try {
|
|
29886
30759
|
const ts = parseInt(timestamp, 10);
|
|
@@ -29944,7 +30817,7 @@ function resolveVariables(template, variables) {
|
|
|
29944
30817
|
}
|
|
29945
30818
|
return result;
|
|
29946
30819
|
}
|
|
29947
|
-
function buildAIAdapter(config2, agent, resolvedPrompt) {
|
|
30820
|
+
function buildAIAdapter(config2, agent, resolvedPrompt, toolsOverride) {
|
|
29948
30821
|
const engine = agent.engine;
|
|
29949
30822
|
if (agent.provider === "elevenlabs_convai") {
|
|
29950
30823
|
if (!engine || engine.kind !== "elevenlabs_convai") {
|
|
@@ -29959,12 +30832,24 @@ function buildAIAdapter(config2, agent, resolvedPrompt) {
|
|
|
29959
30832
|
agent.firstMessage ?? ""
|
|
29960
30833
|
);
|
|
29961
30834
|
}
|
|
29962
|
-
const
|
|
29963
|
-
|
|
29964
|
-
description
|
|
29965
|
-
|
|
29966
|
-
|
|
29967
|
-
|
|
30835
|
+
const preamblesOn = Boolean(agent.toolCallPreambles);
|
|
30836
|
+
const agentTools = (toolsOverride ?? agent.tools)?.map((t) => {
|
|
30837
|
+
let description = t.description;
|
|
30838
|
+
const reassurance = t.reassurance;
|
|
30839
|
+
const sample = typeof reassurance === "string" ? reassurance : void 0;
|
|
30840
|
+
if (preamblesOn && sample) {
|
|
30841
|
+
description = `${description}
|
|
30842
|
+
|
|
30843
|
+
Preamble sample phrases:
|
|
30844
|
+
- ${sample}`;
|
|
30845
|
+
}
|
|
30846
|
+
return {
|
|
30847
|
+
name: t.name,
|
|
30848
|
+
description,
|
|
30849
|
+
parameters: t.parameters,
|
|
30850
|
+
strict: t.strict
|
|
30851
|
+
};
|
|
30852
|
+
}) ?? [];
|
|
29968
30853
|
const tools = [...agentTools, TRANSFER_CALL_TOOL, END_CALL_TOOL];
|
|
29969
30854
|
const isOpenAIEngine = engine && (engine.kind === "openai_realtime" || engine.kind === "openai_realtime_2");
|
|
29970
30855
|
const openaiKey = isOpenAIEngine ? engine.apiKey : config2.openaiKey ?? "";
|
|
@@ -29976,8 +30861,27 @@ function buildAIAdapter(config2, agent, resolvedPrompt) {
|
|
|
29976
30861
|
if (engine.inputAudioTranscriptionModel !== void 0) {
|
|
29977
30862
|
adapterOptions.inputAudioTranscriptionModel = engine.inputAudioTranscriptionModel;
|
|
29978
30863
|
}
|
|
30864
|
+
if (engine.noiseReduction !== void 0) {
|
|
30865
|
+
adapterOptions.noiseReduction = engine.noiseReduction;
|
|
30866
|
+
}
|
|
30867
|
+
if (engine.turnDetection !== void 0) {
|
|
30868
|
+
adapterOptions.turnDetection = engine.turnDetection;
|
|
30869
|
+
}
|
|
30870
|
+
if (engine.gateResponseOnTranscript !== void 0) {
|
|
30871
|
+
adapterOptions.gateResponseOnTranscript = engine.gateResponseOnTranscript;
|
|
30872
|
+
}
|
|
30873
|
+
}
|
|
30874
|
+
const agentOpts = agent;
|
|
30875
|
+
if (agentOpts.openaiRealtimeNoiseReduction !== void 0) {
|
|
30876
|
+
adapterOptions.noiseReduction = agentOpts.openaiRealtimeNoiseReduction;
|
|
29979
30877
|
}
|
|
29980
|
-
|
|
30878
|
+
if (agentOpts.realtimeTurnDetection !== void 0) {
|
|
30879
|
+
adapterOptions.turnDetection = agentOpts.realtimeTurnDetection;
|
|
30880
|
+
}
|
|
30881
|
+
if (agentOpts.openaiRealtimeGateResponseOnTranscript !== void 0) {
|
|
30882
|
+
adapterOptions.gateResponseOnTranscript = agentOpts.openaiRealtimeGateResponseOnTranscript;
|
|
30883
|
+
}
|
|
30884
|
+
const AdapterCtor = OpenAIRealtime2Adapter;
|
|
29981
30885
|
return new AdapterCtor(
|
|
29982
30886
|
openaiKey,
|
|
29983
30887
|
agent.model,
|
|
@@ -30006,7 +30910,6 @@ var init_server = __esm({
|
|
|
30006
30910
|
import_express = __toESM(require("express"));
|
|
30007
30911
|
import_http = require("http");
|
|
30008
30912
|
import_ws5 = require("ws");
|
|
30009
|
-
init_openai_realtime();
|
|
30010
30913
|
init_openai_realtime_2();
|
|
30011
30914
|
init_elevenlabs_convai();
|
|
30012
30915
|
init_plivo_adapter();
|
|
@@ -30069,6 +30972,11 @@ var init_server = __esm({
|
|
|
30069
30972
|
getLogger().warn(`TwilioBridge.transferCall rejected: invalid CallSid ${JSON.stringify(callId)}`);
|
|
30070
30973
|
return;
|
|
30071
30974
|
}
|
|
30975
|
+
const E164_RE = /^\+[1-9]\d{6,14}$/;
|
|
30976
|
+
if (!E164_RE.test(toNumber)) {
|
|
30977
|
+
getLogger().warn(`TwilioBridge.transferCall rejected: invalid target ${JSON.stringify(toNumber)}`);
|
|
30978
|
+
return;
|
|
30979
|
+
}
|
|
30072
30980
|
const transferUrl = `https://api.twilio.com/2010-04-01/Accounts/${this.config.twilioSid}/Calls/${callId}.json`;
|
|
30073
30981
|
await fetch(transferUrl, {
|
|
30074
30982
|
method: "POST",
|
|
@@ -30275,7 +31183,7 @@ var init_server = __esm({
|
|
|
30275
31183
|
};
|
|
30276
31184
|
GRACEFUL_SHUTDOWN_TIMEOUT_MS = 1e4;
|
|
30277
31185
|
EmbeddedServer = class {
|
|
30278
|
-
constructor(config2, agent, onCallStart, onCallEnd, onTranscript, onMessage, recording = false, voicemailMessage = "", onMetrics, pricingOverrides, dashboard = true, dashboardToken = "") {
|
|
31186
|
+
constructor(config2, agent, onCallStart, onCallEnd, onTranscript, onMessage, recording = false, voicemailMessage = "", onMetrics, pricingOverrides, dashboard = true, dashboardToken = "", allowInsecureDashboard = false) {
|
|
30279
31187
|
this.config = config2;
|
|
30280
31188
|
this.agent = agent;
|
|
30281
31189
|
this.onCallStart = onCallStart;
|
|
@@ -30287,6 +31195,7 @@ var init_server = __esm({
|
|
|
30287
31195
|
this.onMetrics = onMetrics;
|
|
30288
31196
|
this.dashboard = dashboard;
|
|
30289
31197
|
this.dashboardToken = dashboardToken;
|
|
31198
|
+
this.allowInsecureDashboard = allowInsecureDashboard;
|
|
30290
31199
|
this.metricsStore = new MetricsStore();
|
|
30291
31200
|
this.pricing = mergePricing(pricingOverrides);
|
|
30292
31201
|
const logRoot = config2.persistRoot === void 0 ? resolveLogRoot() : config2.persistRoot;
|
|
@@ -30313,8 +31222,31 @@ var init_server = __esm({
|
|
|
30313
31222
|
onMetrics;
|
|
30314
31223
|
dashboard;
|
|
30315
31224
|
dashboardToken;
|
|
31225
|
+
allowInsecureDashboard;
|
|
30316
31226
|
server = null;
|
|
30317
31227
|
wss = null;
|
|
31228
|
+
/**
|
|
31229
|
+
* Whether the dashboard + ``/api/*`` routes were mounted in ``start()``.
|
|
31230
|
+
* The dashboard is now ALWAYS mounted when enabled (it never 404s): an
|
|
31231
|
+
* exposed, token-less bind is protected with an auto-generated token
|
|
31232
|
+
* rather than refused. This flag is therefore ``true`` whenever the
|
|
31233
|
+
* dashboard is enabled — kept so the startup banner can gate on it.
|
|
31234
|
+
*/
|
|
31235
|
+
dashboardMounted = false;
|
|
31236
|
+
/**
|
|
31237
|
+
* The token actually in effect for the dashboard + ``/api/*`` routes,
|
|
31238
|
+
* resolved in ``start()``. One of: the explicit ``dashboardToken`` if set;
|
|
31239
|
+
* a freshly generated UUID when the bind is exposed and
|
|
31240
|
+
* ``allowInsecureDashboard`` is ``false``; or ``''`` (OPEN) for loopback
|
|
31241
|
+
* local dev and for an exposed bind with ``allowInsecureDashboard=true``.
|
|
31242
|
+
* Read by the startup banner (to print the ready URL with ``?token=``) and
|
|
31243
|
+
* by authentic tests (to authenticate).
|
|
31244
|
+
*/
|
|
31245
|
+
effectiveDashboardToken = "";
|
|
31246
|
+
/** The token in effect for the dashboard, resolved at ``start()``. Empty string = served OPEN. */
|
|
31247
|
+
get resolvedDashboardToken() {
|
|
31248
|
+
return this.effectiveDashboardToken;
|
|
31249
|
+
}
|
|
30318
31250
|
twilioTokenWarningLogged = false;
|
|
30319
31251
|
telnyxSigWarningLogged = false;
|
|
30320
31252
|
metricsStore;
|
|
@@ -30332,12 +31264,14 @@ var init_server = __esm({
|
|
|
30332
31264
|
activeConnections = /* @__PURE__ */ new Set();
|
|
30333
31265
|
activeCallIds = /* @__PURE__ */ new Map();
|
|
30334
31266
|
/**
|
|
30335
|
-
* Per-call AMD result
|
|
30336
|
-
*
|
|
30337
|
-
*
|
|
30338
|
-
*
|
|
31267
|
+
* Per-call AMD result callbacks keyed by CallSid / call_control_id.
|
|
31268
|
+
* Public so ``client.ts`` can register a callback per outbound call.
|
|
31269
|
+
* The Map slot is deleted after the callback fires once — preventing
|
|
31270
|
+
* cross-call misfires when multiple concurrent outbound calls are in
|
|
31271
|
+
* flight (single-slot was a race condition: the last registered callback
|
|
31272
|
+
* would win for every in-flight AMD result).
|
|
30339
31273
|
*/
|
|
30340
|
-
|
|
31274
|
+
onMachineDetectionByCallSid = /* @__PURE__ */ new Map();
|
|
30341
31275
|
/**
|
|
30342
31276
|
* Pre-warm first-message audio accessor wired by ``Patter.serve()``.
|
|
30343
31277
|
* The per-call StreamHandler invokes this with its ``callId`` at the
|
|
@@ -30458,6 +31392,42 @@ var init_server = __esm({
|
|
|
30458
31392
|
this.completions.clear();
|
|
30459
31393
|
this.amdClass.clear();
|
|
30460
31394
|
}
|
|
31395
|
+
/**
|
|
31396
|
+
* Decide whether this server is reachable beyond loopback (127.0.0.1).
|
|
31397
|
+
*
|
|
31398
|
+
* The dashboard serves call transcripts and metadata (PII), so before
|
|
31399
|
+
* mounting it unauthenticated we must know whether anyone off-host can
|
|
31400
|
+
* reach the port. Signals (in order):
|
|
31401
|
+
*
|
|
31402
|
+
* (a)+(b) — a public webhook URL. ``client.ts`` resolves
|
|
31403
|
+
* ``config.webhookUrl`` to the live hostname for every serve path:
|
|
31404
|
+
* a cloudflared quick-tunnel host, a {@link StaticTunnel} hostname,
|
|
31405
|
+
* or an explicit ``webhookUrl``. A tunnel directive (signal a) and a
|
|
31406
|
+
* public webhook URL (signal b) therefore both surface here as a
|
|
31407
|
+
* non-loopback, non-private webhook host. This is the case that
|
|
31408
|
+
* matters for tunnels — the whole port (dashboard included) is
|
|
31409
|
+
* published on a public ``*.trycloudflare.com`` URL.
|
|
31410
|
+
*
|
|
31411
|
+
* (c) — an EXPLICIT non-loopback bind override via ``PATTER_BIND_HOST``.
|
|
31412
|
+
* Node's ``http.Server.listen(port, host)`` defaults to 127.0.0.1
|
|
31413
|
+
* here (see ``start()``), so plain local dev is never flagged; only
|
|
31414
|
+
* an operator who set ``PATTER_BIND_HOST`` to e.g. ``0.0.0.0`` is.
|
|
31415
|
+
*
|
|
31416
|
+
* Only loopback webhook hosts (127.0.0.0/8, localhost, ::1) are treated as
|
|
31417
|
+
* not-exposed. RFC1918 / LAN hosts ARE exposure — they are reachable by
|
|
31418
|
+
* other machines on the network — matching the Python SDK's gate.
|
|
31419
|
+
*/
|
|
31420
|
+
isExposed() {
|
|
31421
|
+
const bindOverride = process.env.PATTER_BIND_HOST;
|
|
31422
|
+
if (bindOverride && !isLoopbackHost(bindOverride)) {
|
|
31423
|
+
return true;
|
|
31424
|
+
}
|
|
31425
|
+
const host = extractHost(this.config.webhookUrl ?? "");
|
|
31426
|
+
if (host && !isLoopbackHost(host)) {
|
|
31427
|
+
return true;
|
|
31428
|
+
}
|
|
31429
|
+
return false;
|
|
31430
|
+
}
|
|
30461
31431
|
/** Bind HTTP + WebSocket listeners on `port`, mount carrier webhooks and dashboard routes. */
|
|
30462
31432
|
async start(port = 8e3) {
|
|
30463
31433
|
const webhookUrlPattern = /^[a-zA-Z0-9][a-zA-Z0-9.\-]+[a-zA-Z0-9]$/;
|
|
@@ -30493,6 +31463,9 @@ var init_server = __esm({
|
|
|
30493
31463
|
}
|
|
30494
31464
|
next();
|
|
30495
31465
|
});
|
|
31466
|
+
req.on("error", (err) => {
|
|
31467
|
+
next(err);
|
|
31468
|
+
});
|
|
30496
31469
|
} else {
|
|
30497
31470
|
next();
|
|
30498
31471
|
}
|
|
@@ -30503,8 +31476,25 @@ var init_server = __esm({
|
|
|
30503
31476
|
res.json({ status: "ok", mode: "local" });
|
|
30504
31477
|
});
|
|
30505
31478
|
if (this.dashboard) {
|
|
30506
|
-
|
|
30507
|
-
|
|
31479
|
+
const exposed = this.isExposed();
|
|
31480
|
+
if (this.dashboardToken) {
|
|
31481
|
+
this.effectiveDashboardToken = this.dashboardToken;
|
|
31482
|
+
} else if (exposed && !this.allowInsecureDashboard) {
|
|
31483
|
+
this.effectiveDashboardToken = import_node_crypto4.default.randomUUID();
|
|
31484
|
+
getLogger().warn(
|
|
31485
|
+
`Dashboard is reachable beyond 127.0.0.1 without a configured token; protecting it with an auto-generated token. Open: http://127.0.0.1:${port}/?token=${this.effectiveDashboardToken} Set dashboardToken for a stable token, or allowInsecureDashboard=true to serve it open.`
|
|
31486
|
+
);
|
|
31487
|
+
} else if (exposed && this.allowInsecureDashboard) {
|
|
31488
|
+
this.effectiveDashboardToken = "";
|
|
31489
|
+
getLogger().warn(
|
|
31490
|
+
"Dashboard served WITHOUT authentication on a publicly-reachable bind (allowInsecureDashboard=true). Call transcripts and metadata are exposed to anyone who can reach this URL."
|
|
31491
|
+
);
|
|
31492
|
+
} else {
|
|
31493
|
+
this.effectiveDashboardToken = "";
|
|
31494
|
+
}
|
|
31495
|
+
mountDashboard(app, this.metricsStore, this.effectiveDashboardToken);
|
|
31496
|
+
mountApi(app, this.metricsStore, this.effectiveDashboardToken);
|
|
31497
|
+
this.dashboardMounted = true;
|
|
30508
31498
|
}
|
|
30509
31499
|
app.post("/webhooks/twilio/status", (req, res) => {
|
|
30510
31500
|
if (this.config.twilioToken) {
|
|
@@ -30590,8 +31580,9 @@ var init_server = __esm({
|
|
|
30590
31580
|
if (callSid) {
|
|
30591
31581
|
this.amdClass.set(callSid, classifyTwilioAmd(answeredBy));
|
|
30592
31582
|
}
|
|
30593
|
-
const cb = this.
|
|
31583
|
+
const cb = callSid ? this.onMachineDetectionByCallSid.get(callSid) : void 0;
|
|
30594
31584
|
if (cb && callSid) {
|
|
31585
|
+
this.onMachineDetectionByCallSid.delete(callSid);
|
|
30595
31586
|
try {
|
|
30596
31587
|
await cb({
|
|
30597
31588
|
call_id: callSid,
|
|
@@ -30718,8 +31709,9 @@ var init_server = __esm({
|
|
|
30718
31709
|
if (amdCallId) {
|
|
30719
31710
|
this.amdClass.set(amdCallId, classifyTelnyxAmd(amdResult));
|
|
30720
31711
|
}
|
|
30721
|
-
const cbTx = this.
|
|
31712
|
+
const cbTx = amdCallId ? this.onMachineDetectionByCallSid.get(amdCallId) : void 0;
|
|
30722
31713
|
if (cbTx && amdCallId) {
|
|
31714
|
+
this.onMachineDetectionByCallSid.delete(amdCallId);
|
|
30723
31715
|
try {
|
|
30724
31716
|
await cbTx({
|
|
30725
31717
|
call_id: amdCallId,
|
|
@@ -30887,8 +31879,13 @@ var init_server = __esm({
|
|
|
30887
31879
|
getLogger().info(`AMD result for ${sanitizeLogValue(callUuid)}: ${sanitizeLogValue(amdRaw)}`);
|
|
30888
31880
|
const classification = classifyPlivoAmd(amdRaw);
|
|
30889
31881
|
if (callUuid) this.amdClass.set(callUuid, classification);
|
|
30890
|
-
|
|
31882
|
+
let cbKey = callUuid && this.onMachineDetectionByCallSid.has(callUuid) ? callUuid : void 0;
|
|
31883
|
+
if (cbKey === void 0 && this.onMachineDetectionByCallSid.size === 1) {
|
|
31884
|
+
cbKey = this.onMachineDetectionByCallSid.keys().next().value;
|
|
31885
|
+
}
|
|
31886
|
+
const cb = cbKey !== void 0 ? this.onMachineDetectionByCallSid.get(cbKey) : void 0;
|
|
30891
31887
|
if (cb && callUuid) {
|
|
31888
|
+
if (cbKey !== void 0) this.onMachineDetectionByCallSid.delete(cbKey);
|
|
30892
31889
|
try {
|
|
30893
31890
|
await cb({
|
|
30894
31891
|
call_id: callUuid,
|
|
@@ -30969,27 +31966,34 @@ var init_server = __esm({
|
|
|
30969
31966
|
this.handleTwilioStream(ws, url2);
|
|
30970
31967
|
}
|
|
30971
31968
|
});
|
|
30972
|
-
await new Promise((resolve2) => {
|
|
31969
|
+
await new Promise((resolve2, reject) => {
|
|
30973
31970
|
const bindHost = process.env.PATTER_BIND_HOST ?? "127.0.0.1";
|
|
31971
|
+
this.server.once("error", reject);
|
|
30974
31972
|
this.server.listen(port, bindHost, () => {
|
|
31973
|
+
this.server.off("error", reject);
|
|
30975
31974
|
getLogger().info(`Server on port ${port}`);
|
|
30976
31975
|
getLogger().info(`Webhook: https://${this.config.webhookUrl}`);
|
|
30977
31976
|
getLogger().info(`Phone: ${this.config.phoneNumber}`);
|
|
30978
31977
|
const model = this.agent.model ?? "";
|
|
30979
|
-
|
|
31978
|
+
const calibrated = ["gpt-realtime-mini", "gpt-4o-mini-realtime-preview"];
|
|
31979
|
+
if (model && !calibrated.includes(model) && model.includes("realtime")) {
|
|
30980
31980
|
getLogger().warn(
|
|
30981
|
-
`Agent uses "${sanitizeLogValue(model)}" but DEFAULT_PRICING.openai_realtime is calibrated for
|
|
31981
|
+
`Agent uses "${sanitizeLogValue(model)}" but DEFAULT_PRICING.openai_realtime is calibrated for the default Realtime models (gpt-realtime-mini / gpt-4o-mini-realtime-preview). Pass Patter({ pricing: { openai_realtime: {...} } }) to set rates for this model, otherwise the dashboard cost display will under-report.`
|
|
30982
31982
|
);
|
|
30983
31983
|
}
|
|
30984
|
-
if (this.dashboard) {
|
|
30985
|
-
|
|
30986
|
-
|
|
30987
|
-
|
|
31984
|
+
if (this.dashboard && this.dashboardMounted) {
|
|
31985
|
+
getLogger().info("\u2500\u2500\u2500\u2500 Dashboard \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500");
|
|
31986
|
+
if (this.effectiveDashboardToken) {
|
|
31987
|
+
getLogger().info(
|
|
31988
|
+
`URL: http://127.0.0.1:${port}/?token=${this.effectiveDashboardToken}`
|
|
31989
|
+
);
|
|
31990
|
+
} else {
|
|
31991
|
+
getLogger().info(`URL: http://127.0.0.1:${port}/`);
|
|
30988
31992
|
getLogger().warn(
|
|
30989
31993
|
"Dashboard is enabled without authentication. Set dashboardToken to protect call data. This is safe for local development but should not be exposed on a public network."
|
|
30990
31994
|
);
|
|
30991
31995
|
}
|
|
30992
|
-
|
|
31996
|
+
getLogger().info("\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500");
|
|
30993
31997
|
}
|
|
30994
31998
|
resolve2();
|
|
30995
31999
|
});
|
|
@@ -31065,7 +32069,7 @@ var init_server = __esm({
|
|
|
31065
32069
|
onMessage: this.onMessage,
|
|
31066
32070
|
onMetrics: wrappedMetrics,
|
|
31067
32071
|
recording: this.recording,
|
|
31068
|
-
buildAIAdapter: (resolvedPrompt) => buildAIAdapter(this.config, this.agent, resolvedPrompt),
|
|
32072
|
+
buildAIAdapter: (resolvedPrompt, toolsOverride) => buildAIAdapter(this.config, this.agent, resolvedPrompt, toolsOverride),
|
|
31069
32073
|
sanitizeVariables,
|
|
31070
32074
|
resolveVariables,
|
|
31071
32075
|
popPrewarmAudio: this.popPrewarmAudio,
|
|
@@ -31339,17 +32343,18 @@ var init_server = __esm({
|
|
|
31339
32343
|
}
|
|
31340
32344
|
if (this.activeConnections.size > 0) {
|
|
31341
32345
|
getLogger().info(`Waiting for ${this.activeConnections.size} active connection(s) to close...`);
|
|
31342
|
-
|
|
31343
|
-
|
|
31344
|
-
|
|
31345
|
-
|
|
31346
|
-
|
|
31347
|
-
|
|
31348
|
-
|
|
31349
|
-
|
|
31350
|
-
|
|
31351
|
-
|
|
31352
|
-
]);
|
|
32346
|
+
let checkInterval;
|
|
32347
|
+
const drainPromise = new Promise((resolve2) => {
|
|
32348
|
+
checkInterval = setInterval(() => {
|
|
32349
|
+
if (this.activeConnections.size === 0) {
|
|
32350
|
+
clearInterval(checkInterval);
|
|
32351
|
+
resolve2();
|
|
32352
|
+
}
|
|
32353
|
+
}, 100);
|
|
32354
|
+
});
|
|
32355
|
+
const timeoutPromise = new Promise((resolve2) => setTimeout(resolve2, GRACEFUL_SHUTDOWN_TIMEOUT_MS));
|
|
32356
|
+
await Promise.race([drainPromise, timeoutPromise]);
|
|
32357
|
+
clearInterval(checkInterval);
|
|
31353
32358
|
}
|
|
31354
32359
|
if (this.activeConnections.size > 0) {
|
|
31355
32360
|
getLogger().info(`Force-closing ${this.activeConnections.size} remaining connection(s)`);
|
|
@@ -31478,6 +32483,9 @@ __export(carrier_config_exports, {
|
|
|
31478
32483
|
configureTelnyxNumber: () => configureTelnyxNumber,
|
|
31479
32484
|
configureTwilioNumber: () => configureTwilioNumber
|
|
31480
32485
|
});
|
|
32486
|
+
function redactPhone2(n) {
|
|
32487
|
+
return n.slice(0, 3) + "***" + n.slice(-4);
|
|
32488
|
+
}
|
|
31481
32489
|
async function configureTwilioNumber(accountSid, authToken, phoneNumber, voiceUrl) {
|
|
31482
32490
|
const auth2 = `Basic ${Buffer.from(`${accountSid}:${authToken}`).toString("base64")}`;
|
|
31483
32491
|
const listUrl = `${TWILIO_API_BASE}/Accounts/${accountSid}/IncomingPhoneNumbers.json?PhoneNumber=${encodeURIComponent(phoneNumber)}`;
|
|
@@ -31493,7 +32501,7 @@ async function configureTwilioNumber(accountSid, authToken, phoneNumber, voiceUr
|
|
|
31493
32501
|
const body = await listResp.json();
|
|
31494
32502
|
const match = body.incoming_phone_numbers?.[0];
|
|
31495
32503
|
if (!match) {
|
|
31496
|
-
throw new Error(`Twilio number ${phoneNumber} not found on account ${accountSid}`);
|
|
32504
|
+
throw new Error(`Twilio number ${redactPhone2(phoneNumber)} not found on account ${accountSid}`);
|
|
31497
32505
|
}
|
|
31498
32506
|
const updateUrl = `${TWILIO_API_BASE}/Accounts/${accountSid}/IncomingPhoneNumbers/${match.sid}.json`;
|
|
31499
32507
|
const form = new URLSearchParams({ VoiceUrl: voiceUrl, VoiceMethod: "POST" });
|
|
@@ -31512,17 +32520,20 @@ async function configureTwilioNumber(accountSid, authToken, phoneNumber, voiceUr
|
|
|
31512
32520
|
}
|
|
31513
32521
|
}
|
|
31514
32522
|
async function configureTelnyxNumber(apiKey, connectionId, phoneNumber) {
|
|
31515
|
-
const resp = await fetch(
|
|
31516
|
-
|
|
31517
|
-
|
|
31518
|
-
|
|
31519
|
-
|
|
31520
|
-
|
|
31521
|
-
|
|
31522
|
-
|
|
32523
|
+
const resp = await fetch(
|
|
32524
|
+
`${TELNYX_API_BASE}/phone_numbers/${encodeURIComponent(phoneNumber)}/voice`,
|
|
32525
|
+
{
|
|
32526
|
+
method: "PATCH",
|
|
32527
|
+
headers: {
|
|
32528
|
+
Authorization: `Bearer ${apiKey}`,
|
|
32529
|
+
"Content-Type": "application/json"
|
|
32530
|
+
},
|
|
32531
|
+
body: JSON.stringify({ connection_id: connectionId, tech_prefix_enabled: false })
|
|
32532
|
+
}
|
|
32533
|
+
);
|
|
31523
32534
|
if (!resp.ok) {
|
|
31524
32535
|
throw new Error(
|
|
31525
|
-
`Telnyx PATCH /phone_numbers/${phoneNumber} failed: ${resp.status} ${await resp.text()}`
|
|
32536
|
+
`Telnyx PATCH /phone_numbers/${redactPhone2(phoneNumber)}/voice failed: ${resp.status} ${await resp.text()}`
|
|
31526
32537
|
);
|
|
31527
32538
|
}
|
|
31528
32539
|
}
|
|
@@ -31572,7 +32583,7 @@ async function autoConfigureCarrier(params) {
|
|
|
31572
32583
|
if (provider2 === "telnyx" && params.telnyxKey && params.telnyxConnectionId) {
|
|
31573
32584
|
try {
|
|
31574
32585
|
await configureTelnyxNumber(params.telnyxKey, params.telnyxConnectionId, params.phoneNumber);
|
|
31575
|
-
log3.info("Telnyx number
|
|
32586
|
+
log3.info("Telnyx number ***%s associated with connection %s", params.phoneNumber.slice(-4), params.telnyxConnectionId);
|
|
31576
32587
|
} catch (err) {
|
|
31577
32588
|
log3.warn("Could not auto-configure Telnyx number: %s", err instanceof Error ? err.message : String(err));
|
|
31578
32589
|
}
|
|
@@ -31722,12 +32733,12 @@ var init_test_mode = __esm({
|
|
|
31722
32733
|
}
|
|
31723
32734
|
continue;
|
|
31724
32735
|
}
|
|
31725
|
-
conversationHistory.push({
|
|
31726
|
-
role: "user",
|
|
31727
|
-
text: userInput,
|
|
31728
|
-
timestamp: Date.now()
|
|
31729
|
-
});
|
|
31730
32736
|
if (onMessage) {
|
|
32737
|
+
conversationHistory.push({
|
|
32738
|
+
role: "user",
|
|
32739
|
+
text: userInput,
|
|
32740
|
+
timestamp: Date.now()
|
|
32741
|
+
});
|
|
31731
32742
|
try {
|
|
31732
32743
|
const responseText = await onMessage({
|
|
31733
32744
|
text: userInput,
|
|
@@ -31757,6 +32768,11 @@ var init_test_mode = __esm({
|
|
|
31757
32768
|
}
|
|
31758
32769
|
log3.info("");
|
|
31759
32770
|
const responseText = parts.join("");
|
|
32771
|
+
conversationHistory.push({
|
|
32772
|
+
role: "user",
|
|
32773
|
+
text: userInput,
|
|
32774
|
+
timestamp: Date.now()
|
|
32775
|
+
});
|
|
31760
32776
|
if (responseText) {
|
|
31761
32777
|
conversationHistory.push({
|
|
31762
32778
|
role: "assistant",
|
|
@@ -33216,6 +34232,7 @@ __export(index_exports, {
|
|
|
33216
34232
|
PRICING_VERSION: () => PRICING_VERSION,
|
|
33217
34233
|
PartialStreamError: () => PartialStreamError,
|
|
33218
34234
|
Patter: () => Patter,
|
|
34235
|
+
PatterConfigError: () => PatterConfigError,
|
|
33219
34236
|
PatterConnectionError: () => PatterConnectionError,
|
|
33220
34237
|
PatterError: () => PatterError,
|
|
33221
34238
|
PatterTool: () => PatterTool,
|
|
@@ -33303,6 +34320,8 @@ __export(index_exports, {
|
|
|
33303
34320
|
mulawToPcm16: () => mulawToPcm16,
|
|
33304
34321
|
notifyDashboard: () => notifyDashboard,
|
|
33305
34322
|
openaiTts: () => openaiTts,
|
|
34323
|
+
openclawConsult: () => openclawConsult,
|
|
34324
|
+
openclawPostCallNotifier: () => openclawPostCallNotifier,
|
|
33306
34325
|
pcm16ToMulaw: () => pcm16ToMulaw,
|
|
33307
34326
|
resample16kTo8k: () => resample16kTo8k,
|
|
33308
34327
|
resample24kTo16k: () => resample24kTo16k,
|
|
@@ -33333,6 +34352,7 @@ init_server();
|
|
|
33333
34352
|
|
|
33334
34353
|
// src/engines/openai.ts
|
|
33335
34354
|
init_cjs_shims();
|
|
34355
|
+
init_openai_realtime();
|
|
33336
34356
|
var Realtime = class {
|
|
33337
34357
|
kind = "openai_realtime";
|
|
33338
34358
|
apiKey;
|
|
@@ -33340,6 +34360,9 @@ var Realtime = class {
|
|
|
33340
34360
|
voice;
|
|
33341
34361
|
reasoningEffort;
|
|
33342
34362
|
inputAudioTranscriptionModel;
|
|
34363
|
+
noiseReduction;
|
|
34364
|
+
turnDetection;
|
|
34365
|
+
gateResponseOnTranscript;
|
|
33343
34366
|
constructor(opts = {}) {
|
|
33344
34367
|
const key = opts.apiKey ?? process.env.OPENAI_API_KEY;
|
|
33345
34368
|
if (!key) {
|
|
@@ -33347,16 +34370,26 @@ var Realtime = class {
|
|
|
33347
34370
|
"OpenAI Realtime requires an apiKey. Pass { apiKey: 'sk-...' } or set OPENAI_API_KEY in the environment."
|
|
33348
34371
|
);
|
|
33349
34372
|
}
|
|
34373
|
+
if (opts.noiseReduction !== void 0 && opts.noiseReduction !== "near_field" && opts.noiseReduction !== "far_field") {
|
|
34374
|
+
throw new Error(
|
|
34375
|
+
`noiseReduction must be 'near_field' or 'far_field', got ${JSON.stringify(opts.noiseReduction)}`
|
|
34376
|
+
);
|
|
34377
|
+
}
|
|
34378
|
+
validateRealtimeTurnDetection(opts.turnDetection);
|
|
33350
34379
|
this.apiKey = key;
|
|
33351
34380
|
this.model = opts.model ?? "gpt-realtime-mini";
|
|
33352
34381
|
this.voice = opts.voice ?? "alloy";
|
|
33353
34382
|
this.reasoningEffort = opts.reasoningEffort;
|
|
33354
34383
|
this.inputAudioTranscriptionModel = opts.inputAudioTranscriptionModel;
|
|
34384
|
+
this.noiseReduction = opts.noiseReduction;
|
|
34385
|
+
this.turnDetection = opts.turnDetection;
|
|
34386
|
+
this.gateResponseOnTranscript = opts.gateResponseOnTranscript;
|
|
33355
34387
|
}
|
|
33356
34388
|
};
|
|
33357
34389
|
|
|
33358
34390
|
// src/engines/openai-2.ts
|
|
33359
34391
|
init_cjs_shims();
|
|
34392
|
+
init_openai_realtime();
|
|
33360
34393
|
var Realtime2 = class {
|
|
33361
34394
|
kind = "openai_realtime_2";
|
|
33362
34395
|
apiKey;
|
|
@@ -33364,6 +34397,9 @@ var Realtime2 = class {
|
|
|
33364
34397
|
voice;
|
|
33365
34398
|
reasoningEffort;
|
|
33366
34399
|
inputAudioTranscriptionModel;
|
|
34400
|
+
noiseReduction;
|
|
34401
|
+
turnDetection;
|
|
34402
|
+
gateResponseOnTranscript;
|
|
33367
34403
|
constructor(opts = {}) {
|
|
33368
34404
|
const key = opts.apiKey ?? process.env.OPENAI_API_KEY;
|
|
33369
34405
|
if (!key) {
|
|
@@ -33371,11 +34407,20 @@ var Realtime2 = class {
|
|
|
33371
34407
|
"OpenAI Realtime 2 requires an apiKey. Pass { apiKey: 'sk-...' } or set OPENAI_API_KEY in the environment."
|
|
33372
34408
|
);
|
|
33373
34409
|
}
|
|
34410
|
+
if (opts.noiseReduction !== void 0 && opts.noiseReduction !== "near_field" && opts.noiseReduction !== "far_field") {
|
|
34411
|
+
throw new Error(
|
|
34412
|
+
`noiseReduction must be 'near_field' or 'far_field', got ${JSON.stringify(opts.noiseReduction)}`
|
|
34413
|
+
);
|
|
34414
|
+
}
|
|
34415
|
+
validateRealtimeTurnDetection(opts.turnDetection);
|
|
33374
34416
|
this.apiKey = key;
|
|
33375
34417
|
this.model = opts.model ?? "gpt-realtime-2";
|
|
33376
34418
|
this.voice = opts.voice ?? "alloy";
|
|
33377
34419
|
this.reasoningEffort = opts.reasoningEffort;
|
|
33378
34420
|
this.inputAudioTranscriptionModel = opts.inputAudioTranscriptionModel;
|
|
34421
|
+
this.noiseReduction = opts.noiseReduction;
|
|
34422
|
+
this.turnDetection = opts.turnDetection;
|
|
34423
|
+
this.gateResponseOnTranscript = opts.gateResponseOnTranscript;
|
|
33379
34424
|
}
|
|
33380
34425
|
};
|
|
33381
34426
|
|
|
@@ -33809,7 +34854,7 @@ function resolvePersistRoot(persist) {
|
|
|
33809
34854
|
if (typeof persist === "string") return resolveLogRoot(persist);
|
|
33810
34855
|
const envRoot = resolveLogRoot();
|
|
33811
34856
|
if (envRoot !== null) return envRoot;
|
|
33812
|
-
return
|
|
34857
|
+
return null;
|
|
33813
34858
|
}
|
|
33814
34859
|
function closeParkedConnections(slot) {
|
|
33815
34860
|
if (slot.stt) {
|
|
@@ -34093,7 +35138,12 @@ var Patter = class {
|
|
|
34093
35138
|
...working,
|
|
34094
35139
|
provider: "openai_realtime",
|
|
34095
35140
|
model: working.model ?? engine.model,
|
|
34096
|
-
voice: working.voice ?? engine.voice
|
|
35141
|
+
voice: working.voice ?? engine.voice,
|
|
35142
|
+
// Explicit agent() kwargs win over the engine marker value
|
|
35143
|
+
// (same precedence as Python: explicit kwarg > engine > default).
|
|
35144
|
+
openaiRealtimeNoiseReduction: working.openaiRealtimeNoiseReduction ?? engine.noiseReduction,
|
|
35145
|
+
realtimeTurnDetection: working.realtimeTurnDetection ?? engine.turnDetection,
|
|
35146
|
+
openaiRealtimeGateResponseOnTranscript: working.openaiRealtimeGateResponseOnTranscript ?? engine.gateResponseOnTranscript
|
|
34097
35147
|
};
|
|
34098
35148
|
if (!this.localConfig.openaiKey) {
|
|
34099
35149
|
this.localConfig = { ...this.localConfig, openaiKey: engine.apiKey };
|
|
@@ -34118,6 +35168,11 @@ var Patter = class {
|
|
|
34118
35168
|
throw new Error(`provider must be one of: ${valid.join(", ")}. Got: '${working.provider}'`);
|
|
34119
35169
|
}
|
|
34120
35170
|
}
|
|
35171
|
+
if (working.consult && working.provider === "elevenlabs_convai") {
|
|
35172
|
+
getLogger().warn(
|
|
35173
|
+
"consult is set but provider is ElevenLabs ConvAI; the consult tool is only injected in Realtime and Pipeline modes and will be ignored for this agent."
|
|
35174
|
+
);
|
|
35175
|
+
}
|
|
34121
35176
|
if (working.llm !== void 0) {
|
|
34122
35177
|
const llm = working.llm;
|
|
34123
35178
|
if (!llm || typeof llm.stream !== "function") {
|
|
@@ -34256,7 +35311,8 @@ var Patter = class {
|
|
|
34256
35311
|
opts.onMetrics,
|
|
34257
35312
|
opts.pricing,
|
|
34258
35313
|
opts.dashboard ?? true,
|
|
34259
|
-
opts.dashboardToken ?? ""
|
|
35314
|
+
opts.dashboardToken ?? "",
|
|
35315
|
+
opts.allowInsecureDashboard ?? false
|
|
34260
35316
|
);
|
|
34261
35317
|
this.embeddedServer.popPrewarmAudio = this.popPrewarmAudio;
|
|
34262
35318
|
this.embeddedServer.popPrewarmedConnections = this.popPrewarmedConnections;
|
|
@@ -34668,8 +35724,8 @@ var Patter = class {
|
|
|
34668
35724
|
if (!options.to) {
|
|
34669
35725
|
throw new Error("'to' phone number is required");
|
|
34670
35726
|
}
|
|
34671
|
-
if (
|
|
34672
|
-
throw new Error(
|
|
35727
|
+
if (!/^\+[1-9]\d{6,14}$/.test(options.to)) {
|
|
35728
|
+
throw new Error("'to' must be E.164 format (+<country><digits>). Got value with invalid format.");
|
|
34673
35729
|
}
|
|
34674
35730
|
if (options.wait && !this.embeddedServer) {
|
|
34675
35731
|
throw new PatterConnectionError(
|
|
@@ -34680,9 +35736,6 @@ var Patter = class {
|
|
|
34680
35736
|
let callId = "";
|
|
34681
35737
|
const effectiveRingTimeout = options.ringTimeout === void 0 ? 25 : options.ringTimeout;
|
|
34682
35738
|
const wantsAmd = options.machineDetection !== false || Boolean(options.voicemailMessage);
|
|
34683
|
-
if (this.embeddedServer) {
|
|
34684
|
-
this.embeddedServer.onMachineDetection = options.onMachineDetection;
|
|
34685
|
-
}
|
|
34686
35739
|
if (options.agent.prewarm !== false) {
|
|
34687
35740
|
this.spawnProviderWarmup(options.agent);
|
|
34688
35741
|
}
|
|
@@ -34727,6 +35780,12 @@ var Patter = class {
|
|
|
34727
35780
|
};
|
|
34728
35781
|
if (this.embeddedServer) {
|
|
34729
35782
|
this.embeddedServer.metricsStore.recordCallInitiated(initiatedPayload);
|
|
35783
|
+
if (options.onMachineDetection) {
|
|
35784
|
+
this.embeddedServer.onMachineDetectionByCallSid.set(
|
|
35785
|
+
telnyxCallId,
|
|
35786
|
+
options.onMachineDetection
|
|
35787
|
+
);
|
|
35788
|
+
}
|
|
34730
35789
|
}
|
|
34731
35790
|
try {
|
|
34732
35791
|
const { notifyDashboard: notifyDashboard2 } = await Promise.resolve().then(() => (init_persistence(), persistence_exports));
|
|
@@ -34792,6 +35851,12 @@ var Patter = class {
|
|
|
34792
35851
|
};
|
|
34793
35852
|
if (this.embeddedServer) {
|
|
34794
35853
|
this.embeddedServer.metricsStore.recordCallInitiated(initiatedPayload);
|
|
35854
|
+
if (options.onMachineDetection) {
|
|
35855
|
+
this.embeddedServer.onMachineDetectionByCallSid.set(
|
|
35856
|
+
plivoCallId,
|
|
35857
|
+
options.onMachineDetection
|
|
35858
|
+
);
|
|
35859
|
+
}
|
|
34795
35860
|
}
|
|
34796
35861
|
try {
|
|
34797
35862
|
const { notifyDashboard: notifyDashboard2 } = await Promise.resolve().then(() => (init_persistence(), persistence_exports));
|
|
@@ -34861,6 +35926,12 @@ var Patter = class {
|
|
|
34861
35926
|
};
|
|
34862
35927
|
if (this.embeddedServer) {
|
|
34863
35928
|
this.embeddedServer.metricsStore.recordCallInitiated(initiatedPayload);
|
|
35929
|
+
if (options.onMachineDetection) {
|
|
35930
|
+
this.embeddedServer.onMachineDetectionByCallSid.set(
|
|
35931
|
+
twilioCallSid,
|
|
35932
|
+
options.onMachineDetection
|
|
35933
|
+
);
|
|
35934
|
+
}
|
|
34864
35935
|
if (twilioNotificationsPath) {
|
|
34865
35936
|
getLogger().info(
|
|
34866
35937
|
`Outbound call ${twilioCallSid} placed. Twilio notifications: https://api.twilio.com${twilioNotificationsPath} (check here if the call drops with no audio).`
|
|
@@ -35144,6 +36215,7 @@ function defineTool(input) {
|
|
|
35144
36215
|
}
|
|
35145
36216
|
|
|
35146
36217
|
// src/index.ts
|
|
36218
|
+
init_consult();
|
|
35147
36219
|
init_logger();
|
|
35148
36220
|
init_sentence_chunker();
|
|
35149
36221
|
init_pipeline_hooks();
|
|
@@ -35361,8 +36433,8 @@ var FallbackLLMProvider = class {
|
|
|
35361
36433
|
* markers are filtered out so callers can concatenate the yielded strings
|
|
35362
36434
|
* directly.
|
|
35363
36435
|
*/
|
|
35364
|
-
async *completeStream(messages, tools) {
|
|
35365
|
-
for await (const chunk of this.stream(messages, tools)) {
|
|
36436
|
+
async *completeStream(messages, tools, opts) {
|
|
36437
|
+
for await (const chunk of this.stream(messages, tools, opts)) {
|
|
35366
36438
|
if (chunk.type === "text") {
|
|
35367
36439
|
yield chunk.content ?? "";
|
|
35368
36440
|
}
|
|
@@ -35372,14 +36444,15 @@ var FallbackLLMProvider = class {
|
|
|
35372
36444
|
// LLMProvider implementation
|
|
35373
36445
|
// -----------------------------------------------------------------------
|
|
35374
36446
|
/** Streaming entry point — yields chunks from the first provider that succeeds. */
|
|
35375
|
-
async *stream(messages, tools) {
|
|
36447
|
+
async *stream(messages, tools, opts) {
|
|
35376
36448
|
const errors = [];
|
|
35377
36449
|
const result = yield* this.tryProviders(
|
|
35378
36450
|
messages,
|
|
35379
36451
|
tools,
|
|
35380
36452
|
/* availableOnly */
|
|
35381
36453
|
true,
|
|
35382
|
-
errors
|
|
36454
|
+
errors,
|
|
36455
|
+
opts
|
|
35383
36456
|
);
|
|
35384
36457
|
if (result === "done") return;
|
|
35385
36458
|
getLogger().warn(
|
|
@@ -35390,7 +36463,8 @@ var FallbackLLMProvider = class {
|
|
|
35390
36463
|
tools,
|
|
35391
36464
|
/* availableOnly */
|
|
35392
36465
|
false,
|
|
35393
|
-
errors
|
|
36466
|
+
errors,
|
|
36467
|
+
opts
|
|
35394
36468
|
);
|
|
35395
36469
|
if (retryResult === "done") return;
|
|
35396
36470
|
throw new AllProvidersFailedError(
|
|
@@ -35400,7 +36474,7 @@ var FallbackLLMProvider = class {
|
|
|
35400
36474
|
// -----------------------------------------------------------------------
|
|
35401
36475
|
// Internals
|
|
35402
36476
|
// -----------------------------------------------------------------------
|
|
35403
|
-
async *tryProviders(messages, tools, availableOnly, errors) {
|
|
36477
|
+
async *tryProviders(messages, tools, availableOnly, errors, opts) {
|
|
35404
36478
|
for (let i = 0; i < this.providers.length; i++) {
|
|
35405
36479
|
if (availableOnly && !this.availability[i]) continue;
|
|
35406
36480
|
for (let attempt = 0; attempt < this.maxRetryPerProvider; attempt++) {
|
|
@@ -35409,7 +36483,7 @@ var FallbackLLMProvider = class {
|
|
|
35409
36483
|
`FallbackLLMProvider: trying provider ${i}${attempt > 0 ? ` (retry ${attempt})` : ""}`
|
|
35410
36484
|
);
|
|
35411
36485
|
let yieldedTokens = false;
|
|
35412
|
-
const gen = this.providers[i].stream(messages, tools);
|
|
36486
|
+
const gen = this.providers[i].stream(messages, tools, opts);
|
|
35413
36487
|
while (true) {
|
|
35414
36488
|
let iterResult;
|
|
35415
36489
|
try {
|
|
@@ -35523,7 +36597,7 @@ var PARAMETERS_SCHEMA = {
|
|
|
35523
36597
|
required: ["to"]
|
|
35524
36598
|
};
|
|
35525
36599
|
var DEFAULT_NAME = "make_phone_call";
|
|
35526
|
-
var
|
|
36600
|
+
var DEFAULT_DESCRIPTION2 = "Place a real outbound phone call. Returns a JSON object with the full transcript, call status, duration in seconds, and cost. Use this when the user asks you to call someone, schedule appointments by phone, or otherwise reach a human via voice.";
|
|
35527
36601
|
var PatterTool = class {
|
|
35528
36602
|
name;
|
|
35529
36603
|
description;
|
|
@@ -35532,6 +36606,11 @@ var PatterTool = class {
|
|
|
35532
36606
|
maxDurationSec;
|
|
35533
36607
|
recording;
|
|
35534
36608
|
started = false;
|
|
36609
|
+
/** Cached in-progress (or completed) start promise so concurrent execute()
|
|
36610
|
+
* callers all await the same boot sequence instead of each racing into
|
|
36611
|
+
* phone.serve(). Reset to null on failure so callers can retry after a
|
|
36612
|
+
* transient error. */
|
|
36613
|
+
startPromise = null;
|
|
35535
36614
|
constructor(opts) {
|
|
35536
36615
|
if (!opts.phone) {
|
|
35537
36616
|
throw new Error("PatterTool: `phone` (a Patter instance) is required.");
|
|
@@ -35539,7 +36618,7 @@ var PatterTool = class {
|
|
|
35539
36618
|
this.phone = opts.phone;
|
|
35540
36619
|
this.agent = opts.agent;
|
|
35541
36620
|
this.name = opts.name ?? DEFAULT_NAME;
|
|
35542
|
-
this.description = opts.description ??
|
|
36621
|
+
this.description = opts.description ?? DEFAULT_DESCRIPTION2;
|
|
35543
36622
|
this.maxDurationSec = Math.max(5, Math.min(1800, opts.maxDurationSec ?? 180));
|
|
35544
36623
|
this.recording = opts.recording ?? false;
|
|
35545
36624
|
}
|
|
@@ -35583,8 +36662,21 @@ var PatterTool = class {
|
|
|
35583
36662
|
* `serve()` provides here. No `onCallEnd` callback is wired: the SDK's own
|
|
35584
36663
|
* per-callId completion registry resolves the result, so the user's
|
|
35585
36664
|
* `onCallEnd` slot is left free.
|
|
36665
|
+
*
|
|
36666
|
+
* Idempotent and concurrency-safe: concurrent callers all await the same
|
|
36667
|
+
* in-progress boot instead of each racing into `phone.serve()`.
|
|
35586
36668
|
*/
|
|
35587
36669
|
async start() {
|
|
36670
|
+
if (this.startPromise) return this.startPromise;
|
|
36671
|
+
this.startPromise = this._doStart();
|
|
36672
|
+
try {
|
|
36673
|
+
await this.startPromise;
|
|
36674
|
+
} catch (err) {
|
|
36675
|
+
this.startPromise = null;
|
|
36676
|
+
throw err;
|
|
36677
|
+
}
|
|
36678
|
+
}
|
|
36679
|
+
async _doStart() {
|
|
35588
36680
|
if (this.started) return;
|
|
35589
36681
|
if (!this.agent) {
|
|
35590
36682
|
throw new Error(
|
|
@@ -35610,6 +36702,7 @@ var PatterTool = class {
|
|
|
35610
36702
|
}
|
|
35611
36703
|
}
|
|
35612
36704
|
this.started = false;
|
|
36705
|
+
this.startPromise = null;
|
|
35613
36706
|
}
|
|
35614
36707
|
// --- Execution ----------------------------------------------------------
|
|
35615
36708
|
/**
|
|
@@ -35981,7 +37074,8 @@ var UltravoxRealtimeAdapter = class {
|
|
|
35981
37074
|
"X-API-Key": this.apiKey,
|
|
35982
37075
|
"Content-Type": "application/json"
|
|
35983
37076
|
},
|
|
35984
|
-
body: JSON.stringify(body)
|
|
37077
|
+
body: JSON.stringify(body),
|
|
37078
|
+
signal: AbortSignal.timeout(15e3)
|
|
35985
37079
|
});
|
|
35986
37080
|
if (!resp.ok) {
|
|
35987
37081
|
const text = await resp.text().catch(() => "");
|
|
@@ -35992,12 +37086,36 @@ var UltravoxRealtimeAdapter = class {
|
|
|
35992
37086
|
this.ws = new import_ws6.default(call.joinUrl);
|
|
35993
37087
|
await new Promise((resolve2, reject) => {
|
|
35994
37088
|
const ws = this.ws;
|
|
37089
|
+
let settled = false;
|
|
37090
|
+
const timer = setTimeout(() => {
|
|
37091
|
+
if (settled) return;
|
|
37092
|
+
settled = true;
|
|
37093
|
+
ws.off("open", onOpen);
|
|
37094
|
+
ws.off("error", onError);
|
|
37095
|
+
this.ws = null;
|
|
37096
|
+
try {
|
|
37097
|
+
ws.close();
|
|
37098
|
+
} catch {
|
|
37099
|
+
}
|
|
37100
|
+
reject(new Error("Ultravox WS connect timeout"));
|
|
37101
|
+
}, 15e3);
|
|
35995
37102
|
const onOpen = () => {
|
|
37103
|
+
if (settled) return;
|
|
37104
|
+
settled = true;
|
|
37105
|
+
clearTimeout(timer);
|
|
35996
37106
|
ws.off("error", onError);
|
|
35997
37107
|
resolve2();
|
|
35998
37108
|
};
|
|
35999
37109
|
const onError = (err) => {
|
|
37110
|
+
if (settled) return;
|
|
37111
|
+
settled = true;
|
|
37112
|
+
clearTimeout(timer);
|
|
36000
37113
|
ws.off("open", onOpen);
|
|
37114
|
+
this.ws = null;
|
|
37115
|
+
try {
|
|
37116
|
+
ws.close();
|
|
37117
|
+
} catch {
|
|
37118
|
+
}
|
|
36001
37119
|
reject(err);
|
|
36002
37120
|
};
|
|
36003
37121
|
ws.once("open", onOpen);
|
|
@@ -36845,7 +37963,7 @@ var STT = class extends DeepgramSTT {
|
|
|
36845
37963
|
{
|
|
36846
37964
|
endpointingMs: opts.endpointingMs ?? 150,
|
|
36847
37965
|
utteranceEndMs: opts.utteranceEndMs === null ? null : opts.utteranceEndMs ?? 1e3,
|
|
36848
|
-
smartFormat: opts.smartFormat ??
|
|
37966
|
+
smartFormat: opts.smartFormat ?? false,
|
|
36849
37967
|
interimResults: opts.interimResults ?? true,
|
|
36850
37968
|
...opts.vadEvents !== void 0 ? { vadEvents: opts.vadEvents } : {}
|
|
36851
37969
|
}
|
|
@@ -37165,7 +38283,7 @@ var CartesiaSTT = class {
|
|
|
37165
38283
|
});
|
|
37166
38284
|
ws.once("error", (err) => {
|
|
37167
38285
|
clearTimeout(timer);
|
|
37168
|
-
reject(err);
|
|
38286
|
+
reject(new Error(`Cartesia STT park connect failed: ${describeWarmupError(err)}`));
|
|
37169
38287
|
});
|
|
37170
38288
|
});
|
|
37171
38289
|
return ws;
|
|
@@ -37521,7 +38639,7 @@ var SonioxSTT = class _SonioxSTT {
|
|
|
37521
38639
|
/** Stable pricing/dashboard key — read by stream-handler/metrics. */
|
|
37522
38640
|
static providerKey = "soniox";
|
|
37523
38641
|
ws = null;
|
|
37524
|
-
callbacks =
|
|
38642
|
+
callbacks = /* @__PURE__ */ new Set();
|
|
37525
38643
|
final = new TokenAccumulator();
|
|
37526
38644
|
keepaliveTimer = null;
|
|
37527
38645
|
apiKey;
|
|
@@ -37683,16 +38801,13 @@ var SonioxSTT = class _SonioxSTT {
|
|
|
37683
38801
|
if (audio.length === 0) return;
|
|
37684
38802
|
this.ws.send(audio);
|
|
37685
38803
|
}
|
|
37686
|
-
/** Register a transcript listener
|
|
38804
|
+
/** Register a transcript listener. */
|
|
37687
38805
|
onTranscript(callback) {
|
|
37688
|
-
|
|
37689
|
-
|
|
37690
|
-
|
|
37691
|
-
|
|
37692
|
-
|
|
37693
|
-
return;
|
|
37694
|
-
}
|
|
37695
|
-
this.callbacks.push(callback);
|
|
38806
|
+
this.callbacks.add(callback);
|
|
38807
|
+
}
|
|
38808
|
+
/** Unregister a previously registered transcript listener. */
|
|
38809
|
+
offTranscript(callback) {
|
|
38810
|
+
this.callbacks.delete(callback);
|
|
37696
38811
|
}
|
|
37697
38812
|
/** Send the empty-frame stream terminator and close the WebSocket. */
|
|
37698
38813
|
close() {
|
|
@@ -37774,12 +38889,6 @@ var VALID_DOMAINS = /* @__PURE__ */ new Set([
|
|
|
37774
38889
|
AssemblyAIDomain.GENERAL,
|
|
37775
38890
|
AssemblyAIDomain.MEDICAL_V1
|
|
37776
38891
|
]);
|
|
37777
|
-
var AssemblyAISTTNotConnectedError = class extends Error {
|
|
37778
|
-
constructor(message = "AssemblyAISTT is not connected") {
|
|
37779
|
-
super(message);
|
|
37780
|
-
this.name = "AssemblyAISTTNotConnectedError";
|
|
37781
|
-
}
|
|
37782
|
-
};
|
|
37783
38892
|
var AssemblyAISTT = class _AssemblyAISTT {
|
|
37784
38893
|
constructor(apiKey, options = {}) {
|
|
37785
38894
|
this.apiKey = apiKey;
|
|
@@ -38103,9 +39212,10 @@ var AssemblyAISTT = class _AssemblyAISTT {
|
|
|
38103
39212
|
*/
|
|
38104
39213
|
updateConfiguration(params) {
|
|
38105
39214
|
if (!this.ws || this.ws.readyState !== import_ws9.default.OPEN) {
|
|
38106
|
-
|
|
38107
|
-
"AssemblyAISTT.updateConfiguration: WebSocket is not open"
|
|
39215
|
+
getLogger().debug(
|
|
39216
|
+
"AssemblyAISTT.updateConfiguration: WebSocket is not open \u2014 dropping update (call teardown)."
|
|
38108
39217
|
);
|
|
39218
|
+
return;
|
|
38109
39219
|
}
|
|
38110
39220
|
const payload = {
|
|
38111
39221
|
type: AssemblyAIClientFrame.UPDATE_CONFIGURATION
|
|
@@ -38127,9 +39237,10 @@ var AssemblyAISTT = class _AssemblyAISTT {
|
|
|
38127
39237
|
/** Force the server to finalize the current turn (for barge-in). */
|
|
38128
39238
|
forceEndpoint() {
|
|
38129
39239
|
if (!this.ws || this.ws.readyState !== import_ws9.default.OPEN) {
|
|
38130
|
-
|
|
38131
|
-
"AssemblyAISTT.forceEndpoint: WebSocket is not open"
|
|
39240
|
+
getLogger().debug(
|
|
39241
|
+
"AssemblyAISTT.forceEndpoint: WebSocket is not open \u2014 dropping request (call teardown)."
|
|
38132
39242
|
);
|
|
39243
|
+
return;
|
|
38133
39244
|
}
|
|
38134
39245
|
this.ws.send(JSON.stringify({ type: AssemblyAIClientFrame.FORCE_ENDPOINT }));
|
|
38135
39246
|
}
|
|
@@ -38144,6 +39255,14 @@ var AssemblyAISTT = class _AssemblyAISTT {
|
|
|
38144
39255
|
async close() {
|
|
38145
39256
|
this.closing = true;
|
|
38146
39257
|
if (!this.ws) return;
|
|
39258
|
+
if (this.chunkBufferBytes > 0 && this.ws.readyState === import_ws9.default.OPEN) {
|
|
39259
|
+
try {
|
|
39260
|
+
this.ws.send(Buffer.concat(this.chunkBuffer, this.chunkBufferBytes));
|
|
39261
|
+
} catch {
|
|
39262
|
+
}
|
|
39263
|
+
this.chunkBuffer = [];
|
|
39264
|
+
this.chunkBufferBytes = 0;
|
|
39265
|
+
}
|
|
38147
39266
|
try {
|
|
38148
39267
|
this.ws.send(JSON.stringify({ type: AssemblyAIClientFrame.TERMINATE }));
|
|
38149
39268
|
} catch {
|
|
@@ -39350,7 +40469,7 @@ var TTS3 = class extends OpenAITTS {
|
|
|
39350
40469
|
opts.model ?? "gpt-4o-mini-tts",
|
|
39351
40470
|
opts.instructions ?? null,
|
|
39352
40471
|
opts.speed ?? null,
|
|
39353
|
-
opts.antiAlias ??
|
|
40472
|
+
opts.antiAlias ?? true
|
|
39354
40473
|
);
|
|
39355
40474
|
}
|
|
39356
40475
|
};
|
|
@@ -39525,7 +40644,6 @@ init_cjs_shims();
|
|
|
39525
40644
|
init_cjs_shims();
|
|
39526
40645
|
init_logger();
|
|
39527
40646
|
var INWORLD_BASE_URL = "https://api.inworld.ai/tts/v1/voice:stream";
|
|
39528
|
-
var INWORLD_VOICES_URL = "https://api.inworld.ai/tts/v1/voices";
|
|
39529
40647
|
var InworldModel = {
|
|
39530
40648
|
TTS_2: "inworld-tts-2",
|
|
39531
40649
|
TTS_1_5_MAX: "inworld-tts-1.5-max",
|
|
@@ -39614,7 +40732,8 @@ var InworldTTS = class {
|
|
|
39614
40732
|
*/
|
|
39615
40733
|
async warmup() {
|
|
39616
40734
|
try {
|
|
39617
|
-
|
|
40735
|
+
const voicesUrl = new URL(this.baseUrl).origin + "/tts/v1/voices";
|
|
40736
|
+
await fetch(voicesUrl, {
|
|
39618
40737
|
method: "GET",
|
|
39619
40738
|
headers: {
|
|
39620
40739
|
Authorization: `Basic ${this.authToken}`
|
|
@@ -39874,58 +40993,87 @@ var AnthropicLLMProvider = class {
|
|
|
39874
40993
|
const toolIndexByBlock = /* @__PURE__ */ new Map();
|
|
39875
40994
|
const toolIdByBlock = /* @__PURE__ */ new Map();
|
|
39876
40995
|
let nextIndex = 0;
|
|
39877
|
-
|
|
39878
|
-
|
|
39879
|
-
|
|
39880
|
-
|
|
39881
|
-
|
|
39882
|
-
|
|
39883
|
-
|
|
39884
|
-
|
|
39885
|
-
|
|
39886
|
-
const
|
|
39887
|
-
|
|
39888
|
-
|
|
39889
|
-
|
|
39890
|
-
|
|
39891
|
-
|
|
39892
|
-
continue;
|
|
39893
|
-
|
|
39894
|
-
|
|
39895
|
-
|
|
39896
|
-
|
|
39897
|
-
|
|
39898
|
-
|
|
39899
|
-
|
|
39900
|
-
|
|
39901
|
-
|
|
39902
|
-
|
|
39903
|
-
|
|
39904
|
-
id: toolId,
|
|
39905
|
-
name: toolName,
|
|
39906
|
-
arguments: ""
|
|
39907
|
-
};
|
|
39908
|
-
continue;
|
|
39909
|
-
}
|
|
39910
|
-
if (event.type === "content_block_delta") {
|
|
39911
|
-
if (event.delta?.type === "text_delta" && event.delta.text) {
|
|
39912
|
-
yield { type: "text", content: event.delta.text };
|
|
40996
|
+
let inputTokens = 0;
|
|
40997
|
+
let outputTokens = 0;
|
|
40998
|
+
let cacheReadTokens = 0;
|
|
40999
|
+
let cacheWriteTokens = 0;
|
|
41000
|
+
try {
|
|
41001
|
+
while (true) {
|
|
41002
|
+
const { done, value } = await reader.read();
|
|
41003
|
+
if (done) break;
|
|
41004
|
+
buffer += decoder.decode(value, { stream: true });
|
|
41005
|
+
const lines = buffer.split("\n");
|
|
41006
|
+
buffer = lines.pop() || "";
|
|
41007
|
+
for (const line of lines) {
|
|
41008
|
+
const trimmed = line.trim();
|
|
41009
|
+
if (!trimmed.startsWith("data: ")) continue;
|
|
41010
|
+
const data = trimmed.slice(6);
|
|
41011
|
+
if (!data || data === "[DONE]") continue;
|
|
41012
|
+
let event;
|
|
41013
|
+
try {
|
|
41014
|
+
event = JSON.parse(data);
|
|
41015
|
+
} catch {
|
|
41016
|
+
continue;
|
|
41017
|
+
}
|
|
41018
|
+
if (event.type === "message_start" && event.message?.usage) {
|
|
41019
|
+
const u = event.message.usage;
|
|
41020
|
+
if (u.input_tokens) inputTokens = u.input_tokens;
|
|
41021
|
+
if (u.cache_creation_input_tokens) cacheWriteTokens = u.cache_creation_input_tokens;
|
|
41022
|
+
if (u.cache_read_input_tokens) cacheReadTokens = u.cache_read_input_tokens;
|
|
39913
41023
|
continue;
|
|
39914
41024
|
}
|
|
39915
|
-
if (event.
|
|
41025
|
+
if (event.type === "message_delta" && event.usage?.output_tokens) {
|
|
41026
|
+
outputTokens = event.usage.output_tokens;
|
|
41027
|
+
continue;
|
|
41028
|
+
}
|
|
41029
|
+
if (event.type === "content_block_start" && event.content_block?.type === "tool_use") {
|
|
39916
41030
|
const blockIdx = event.index ?? 0;
|
|
39917
|
-
const
|
|
39918
|
-
|
|
39919
|
-
|
|
39920
|
-
|
|
39921
|
-
|
|
39922
|
-
|
|
39923
|
-
|
|
39924
|
-
|
|
41031
|
+
const toolId = event.content_block.id ?? "";
|
|
41032
|
+
const toolName = event.content_block.name ?? "";
|
|
41033
|
+
const patterIndex = nextIndex++;
|
|
41034
|
+
toolIndexByBlock.set(blockIdx, patterIndex);
|
|
41035
|
+
toolIdByBlock.set(blockIdx, toolId);
|
|
41036
|
+
yield {
|
|
41037
|
+
type: "tool_call",
|
|
41038
|
+
index: patterIndex,
|
|
41039
|
+
id: toolId,
|
|
41040
|
+
name: toolName,
|
|
41041
|
+
arguments: ""
|
|
41042
|
+
};
|
|
41043
|
+
continue;
|
|
41044
|
+
}
|
|
41045
|
+
if (event.type === "content_block_delta") {
|
|
41046
|
+
if (event.delta?.type === "text_delta" && event.delta.text) {
|
|
41047
|
+
yield { type: "text", content: event.delta.text };
|
|
41048
|
+
continue;
|
|
41049
|
+
}
|
|
41050
|
+
if (event.delta?.type === "input_json_delta" && event.delta.partial_json) {
|
|
41051
|
+
const blockIdx = event.index ?? 0;
|
|
41052
|
+
const patterIndex = toolIndexByBlock.get(blockIdx);
|
|
41053
|
+
if (patterIndex !== void 0) {
|
|
41054
|
+
yield {
|
|
41055
|
+
type: "tool_call",
|
|
41056
|
+
index: patterIndex,
|
|
41057
|
+
id: toolIdByBlock.get(blockIdx),
|
|
41058
|
+
arguments: event.delta.partial_json
|
|
41059
|
+
};
|
|
41060
|
+
}
|
|
39925
41061
|
}
|
|
39926
41062
|
}
|
|
39927
41063
|
}
|
|
39928
41064
|
}
|
|
41065
|
+
} finally {
|
|
41066
|
+
reader.cancel().catch(() => {
|
|
41067
|
+
});
|
|
41068
|
+
}
|
|
41069
|
+
if (inputTokens > 0 || outputTokens > 0 || cacheReadTokens > 0 || cacheWriteTokens > 0) {
|
|
41070
|
+
yield {
|
|
41071
|
+
type: "usage",
|
|
41072
|
+
inputTokens,
|
|
41073
|
+
outputTokens,
|
|
41074
|
+
cacheReadInputTokens: cacheReadTokens,
|
|
41075
|
+
cacheWriteInputTokens: cacheWriteTokens
|
|
41076
|
+
};
|
|
39929
41077
|
}
|
|
39930
41078
|
yield { type: "done" };
|
|
39931
41079
|
}
|
|
@@ -39985,16 +41133,17 @@ function toAnthropicMessages(messages) {
|
|
|
39985
41133
|
}
|
|
39986
41134
|
if (role === "tool") {
|
|
39987
41135
|
const contentStr = typeof rawMsg.content === "string" ? rawMsg.content : JSON.stringify(rawMsg.content);
|
|
39988
|
-
|
|
39989
|
-
|
|
39990
|
-
|
|
39991
|
-
|
|
39992
|
-
|
|
39993
|
-
|
|
39994
|
-
|
|
39995
|
-
|
|
39996
|
-
|
|
39997
|
-
|
|
41136
|
+
const toolResultBlock = {
|
|
41137
|
+
type: "tool_result",
|
|
41138
|
+
tool_use_id: rawMsg.tool_call_id ?? "",
|
|
41139
|
+
content: contentStr
|
|
41140
|
+
};
|
|
41141
|
+
const prev = out.length > 0 ? out[out.length - 1] : void 0;
|
|
41142
|
+
if (prev && prev.role === "user" && Array.isArray(prev.content) && prev.content.length > 0 && prev.content.every((b) => b["type"] === "tool_result")) {
|
|
41143
|
+
prev.content.push(toolResultBlock);
|
|
41144
|
+
} else {
|
|
41145
|
+
out.push({ role: "user", content: [toolResultBlock] });
|
|
41146
|
+
}
|
|
39998
41147
|
continue;
|
|
39999
41148
|
}
|
|
40000
41149
|
}
|
|
@@ -40137,50 +41286,55 @@ async function* parseOpenAISseStream(response) {
|
|
|
40137
41286
|
if (!reader) return;
|
|
40138
41287
|
const decoder = new TextDecoder();
|
|
40139
41288
|
let buffer = "";
|
|
40140
|
-
|
|
40141
|
-
|
|
40142
|
-
|
|
40143
|
-
|
|
40144
|
-
|
|
40145
|
-
|
|
40146
|
-
|
|
40147
|
-
const
|
|
40148
|
-
|
|
40149
|
-
|
|
40150
|
-
|
|
40151
|
-
|
|
40152
|
-
|
|
40153
|
-
|
|
40154
|
-
|
|
40155
|
-
|
|
40156
|
-
|
|
40157
|
-
|
|
40158
|
-
|
|
40159
|
-
|
|
40160
|
-
|
|
40161
|
-
type: "usage",
|
|
40162
|
-
inputTokens: usage.prompt_tokens,
|
|
40163
|
-
outputTokens: usage.completion_tokens,
|
|
40164
|
-
cacheReadInputTokens: cached2
|
|
40165
|
-
};
|
|
40166
|
-
}
|
|
40167
|
-
const delta = chunk.choices?.[0]?.delta;
|
|
40168
|
-
if (!delta) continue;
|
|
40169
|
-
if (delta.content) {
|
|
40170
|
-
yield { type: "text", content: delta.content };
|
|
40171
|
-
}
|
|
40172
|
-
if (delta.tool_calls) {
|
|
40173
|
-
for (const tc of delta.tool_calls) {
|
|
41289
|
+
try {
|
|
41290
|
+
while (true) {
|
|
41291
|
+
const { done, value } = await reader.read();
|
|
41292
|
+
if (done) break;
|
|
41293
|
+
buffer += decoder.decode(value, { stream: true });
|
|
41294
|
+
const lines = buffer.split("\n");
|
|
41295
|
+
buffer = lines.pop() || "";
|
|
41296
|
+
for (const line of lines) {
|
|
41297
|
+
const trimmed = line.trim();
|
|
41298
|
+
if (!trimmed || !trimmed.startsWith("data: ")) continue;
|
|
41299
|
+
const data = trimmed.slice(6);
|
|
41300
|
+
if (data === "[DONE]") continue;
|
|
41301
|
+
let chunk;
|
|
41302
|
+
try {
|
|
41303
|
+
chunk = JSON.parse(data);
|
|
41304
|
+
} catch {
|
|
41305
|
+
continue;
|
|
41306
|
+
}
|
|
41307
|
+
const usage = chunk.usage ?? chunk.x_groq?.usage;
|
|
41308
|
+
if (usage) {
|
|
41309
|
+
const cached2 = chunk.usage?.prompt_tokens_details?.cached_tokens ?? 0;
|
|
40174
41310
|
yield {
|
|
40175
|
-
type: "
|
|
40176
|
-
|
|
40177
|
-
|
|
40178
|
-
|
|
40179
|
-
arguments: tc.function?.arguments
|
|
41311
|
+
type: "usage",
|
|
41312
|
+
inputTokens: usage.prompt_tokens,
|
|
41313
|
+
outputTokens: usage.completion_tokens,
|
|
41314
|
+
cacheReadInputTokens: cached2
|
|
40180
41315
|
};
|
|
40181
41316
|
}
|
|
41317
|
+
const delta = chunk.choices?.[0]?.delta;
|
|
41318
|
+
if (!delta) continue;
|
|
41319
|
+
if (delta.content) {
|
|
41320
|
+
yield { type: "text", content: delta.content };
|
|
41321
|
+
}
|
|
41322
|
+
if (delta.tool_calls) {
|
|
41323
|
+
for (const tc of delta.tool_calls) {
|
|
41324
|
+
yield {
|
|
41325
|
+
type: "tool_call",
|
|
41326
|
+
index: tc.index,
|
|
41327
|
+
id: tc.id,
|
|
41328
|
+
name: tc.function?.name,
|
|
41329
|
+
arguments: tc.function?.arguments
|
|
41330
|
+
};
|
|
41331
|
+
}
|
|
41332
|
+
}
|
|
40182
41333
|
}
|
|
40183
41334
|
}
|
|
41335
|
+
} finally {
|
|
41336
|
+
reader.cancel().catch(() => {
|
|
41337
|
+
});
|
|
40184
41338
|
}
|
|
40185
41339
|
}
|
|
40186
41340
|
|
|
@@ -40349,11 +41503,21 @@ var CerebrasLLMProvider = class {
|
|
|
40349
41503
|
}
|
|
40350
41504
|
const advisoryMs = parseRateLimitResetMs(response.headers);
|
|
40351
41505
|
const exponentialMs = RETRY_BACKOFF_BASE_MS * Math.pow(2, attempt);
|
|
40352
|
-
const delayMs = Math.max(advisoryMs, exponentialMs);
|
|
41506
|
+
const delayMs = Math.min(5e3, Math.max(advisoryMs, exponentialMs));
|
|
40353
41507
|
getLogger().warn(
|
|
40354
41508
|
`Cerebras API ${response.status} (attempt ${attempt + 1}/${maxAttempts}); retrying after ${delayMs}ms`
|
|
40355
41509
|
);
|
|
40356
|
-
await new Promise((
|
|
41510
|
+
await new Promise((resolve2, reject) => {
|
|
41511
|
+
const t = setTimeout(resolve2, delayMs);
|
|
41512
|
+
opts?.signal?.addEventListener(
|
|
41513
|
+
"abort",
|
|
41514
|
+
() => {
|
|
41515
|
+
clearTimeout(t);
|
|
41516
|
+
reject(opts.signal.reason);
|
|
41517
|
+
},
|
|
41518
|
+
{ once: true }
|
|
41519
|
+
);
|
|
41520
|
+
});
|
|
40357
41521
|
}
|
|
40358
41522
|
throw new PatterError(`Cerebras API error ${lastStatus}: ${lastErrText || "request failed"}`);
|
|
40359
41523
|
}
|
|
@@ -40516,47 +41680,52 @@ var GoogleLLMProvider = class {
|
|
|
40516
41680
|
let buffer = "";
|
|
40517
41681
|
let nextIndex = 0;
|
|
40518
41682
|
let lastUsage;
|
|
40519
|
-
|
|
40520
|
-
|
|
40521
|
-
|
|
40522
|
-
|
|
40523
|
-
|
|
40524
|
-
|
|
40525
|
-
|
|
40526
|
-
const
|
|
40527
|
-
|
|
40528
|
-
|
|
40529
|
-
|
|
40530
|
-
|
|
40531
|
-
|
|
40532
|
-
|
|
40533
|
-
|
|
40534
|
-
|
|
40535
|
-
}
|
|
40536
|
-
if (payload.usageMetadata) {
|
|
40537
|
-
lastUsage = payload.usageMetadata;
|
|
40538
|
-
}
|
|
40539
|
-
const candidate = payload.candidates?.[0];
|
|
40540
|
-
const parts = candidate?.content?.parts ?? [];
|
|
40541
|
-
for (const part of parts) {
|
|
40542
|
-
if (part.functionCall) {
|
|
40543
|
-
const args = part.functionCall.args ?? {};
|
|
40544
|
-
const callId = part.functionCall.id ?? `gemini_call_${nextIndex}`;
|
|
40545
|
-
yield {
|
|
40546
|
-
type: "tool_call",
|
|
40547
|
-
index: nextIndex,
|
|
40548
|
-
id: callId,
|
|
40549
|
-
name: part.functionCall.name ?? "",
|
|
40550
|
-
arguments: JSON.stringify(args)
|
|
40551
|
-
};
|
|
40552
|
-
nextIndex++;
|
|
41683
|
+
try {
|
|
41684
|
+
while (true) {
|
|
41685
|
+
const { done, value } = await reader.read();
|
|
41686
|
+
if (done) break;
|
|
41687
|
+
buffer += decoder.decode(value, { stream: true });
|
|
41688
|
+
const lines = buffer.split("\n");
|
|
41689
|
+
buffer = lines.pop() || "";
|
|
41690
|
+
for (const line of lines) {
|
|
41691
|
+
const trimmed = line.trim();
|
|
41692
|
+
if (!trimmed.startsWith("data: ")) continue;
|
|
41693
|
+
const data = trimmed.slice(6);
|
|
41694
|
+
if (!data) continue;
|
|
41695
|
+
let payload;
|
|
41696
|
+
try {
|
|
41697
|
+
payload = JSON.parse(data);
|
|
41698
|
+
} catch {
|
|
40553
41699
|
continue;
|
|
40554
41700
|
}
|
|
40555
|
-
if (
|
|
40556
|
-
|
|
41701
|
+
if (payload.usageMetadata) {
|
|
41702
|
+
lastUsage = payload.usageMetadata;
|
|
41703
|
+
}
|
|
41704
|
+
const candidate = payload.candidates?.[0];
|
|
41705
|
+
const parts = candidate?.content?.parts ?? [];
|
|
41706
|
+
for (const part of parts) {
|
|
41707
|
+
if (part.functionCall) {
|
|
41708
|
+
const args = part.functionCall.args ?? {};
|
|
41709
|
+
const callId = part.functionCall.id ?? `gemini_call_${nextIndex}`;
|
|
41710
|
+
yield {
|
|
41711
|
+
type: "tool_call",
|
|
41712
|
+
index: nextIndex,
|
|
41713
|
+
id: callId,
|
|
41714
|
+
name: part.functionCall.name ?? "",
|
|
41715
|
+
arguments: JSON.stringify(args)
|
|
41716
|
+
};
|
|
41717
|
+
nextIndex++;
|
|
41718
|
+
continue;
|
|
41719
|
+
}
|
|
41720
|
+
if (part.text) {
|
|
41721
|
+
yield { type: "text", content: part.text };
|
|
41722
|
+
}
|
|
40557
41723
|
}
|
|
40558
41724
|
}
|
|
40559
41725
|
}
|
|
41726
|
+
} finally {
|
|
41727
|
+
reader.cancel().catch(() => {
|
|
41728
|
+
});
|
|
40560
41729
|
}
|
|
40561
41730
|
if (lastUsage) {
|
|
40562
41731
|
yield {
|
|
@@ -40650,7 +41819,17 @@ function toGeminiContents(messages) {
|
|
|
40650
41819
|
continue;
|
|
40651
41820
|
}
|
|
40652
41821
|
}
|
|
40653
|
-
|
|
41822
|
+
const merged = [];
|
|
41823
|
+
for (const entry of contents) {
|
|
41824
|
+
const prev = merged[merged.length - 1];
|
|
41825
|
+
const isFunctionResponseOnly = (c) => c.role === "user" && c.parts.every((p) => p.functionResponse !== void 0);
|
|
41826
|
+
if (prev && isFunctionResponseOnly(prev) && isFunctionResponseOnly(entry)) {
|
|
41827
|
+
prev.parts.push(...entry.parts);
|
|
41828
|
+
} else {
|
|
41829
|
+
merged.push(entry);
|
|
41830
|
+
}
|
|
41831
|
+
}
|
|
41832
|
+
return { systemInstruction: systemParts.join("\n\n"), contents: merged };
|
|
40654
41833
|
}
|
|
40655
41834
|
|
|
40656
41835
|
// src/llm/google.ts
|
|
@@ -40679,7 +41858,6 @@ init_silero_vad();
|
|
|
40679
41858
|
// src/providers/deepfilternet-filter.ts
|
|
40680
41859
|
init_cjs_shims();
|
|
40681
41860
|
init_logger();
|
|
40682
|
-
init_transcoding();
|
|
40683
41861
|
function log2() {
|
|
40684
41862
|
return getLogger();
|
|
40685
41863
|
}
|
|
@@ -40709,6 +41887,57 @@ function float32ToPcm16(samples) {
|
|
|
40709
41887
|
}
|
|
40710
41888
|
return out;
|
|
40711
41889
|
}
|
|
41890
|
+
var ArbitraryResampler = class {
|
|
41891
|
+
srcRate;
|
|
41892
|
+
dstRate;
|
|
41893
|
+
phase = 0;
|
|
41894
|
+
// fractional position into the current chunk
|
|
41895
|
+
lastSample = 0;
|
|
41896
|
+
// last input sample from the previous chunk
|
|
41897
|
+
hasHistory = false;
|
|
41898
|
+
constructor(srcRate, dstRate) {
|
|
41899
|
+
this.srcRate = srcRate;
|
|
41900
|
+
this.dstRate = dstRate;
|
|
41901
|
+
}
|
|
41902
|
+
/** Process a chunk of PCM16-LE mono audio and return resampled PCM16-LE. */
|
|
41903
|
+
process(pcm) {
|
|
41904
|
+
const sampleCount = Math.floor(pcm.length / 2);
|
|
41905
|
+
if (sampleCount === 0) return Buffer.alloc(0);
|
|
41906
|
+
const step = this.srcRate / this.dstRate;
|
|
41907
|
+
const outArr = [];
|
|
41908
|
+
let phase = this.phase;
|
|
41909
|
+
while (true) {
|
|
41910
|
+
const idx = Math.floor(phase);
|
|
41911
|
+
if (idx >= sampleCount) break;
|
|
41912
|
+
const frac = phase - idx;
|
|
41913
|
+
let s0;
|
|
41914
|
+
let s1;
|
|
41915
|
+
if (idx < 0) {
|
|
41916
|
+
s0 = this.hasHistory ? this.lastSample : 0;
|
|
41917
|
+
s1 = pcm.readInt16LE(0);
|
|
41918
|
+
} else {
|
|
41919
|
+
s0 = pcm.readInt16LE(idx * 2);
|
|
41920
|
+
s1 = idx + 1 < sampleCount ? pcm.readInt16LE((idx + 1) * 2) : s0;
|
|
41921
|
+
}
|
|
41922
|
+
const interp = Math.round(s0 + (s1 - s0) * frac);
|
|
41923
|
+
outArr.push(Math.max(-32768, Math.min(32767, interp)));
|
|
41924
|
+
phase += step;
|
|
41925
|
+
}
|
|
41926
|
+
this.lastSample = pcm.readInt16LE((sampleCount - 1) * 2);
|
|
41927
|
+
this.hasHistory = true;
|
|
41928
|
+
this.phase = phase - sampleCount;
|
|
41929
|
+
const out = Buffer.alloc(outArr.length * 2);
|
|
41930
|
+
for (let j = 0; j < outArr.length; j++) out.writeInt16LE(outArr[j], j * 2);
|
|
41931
|
+
return out;
|
|
41932
|
+
}
|
|
41933
|
+
/** Flush any buffered state and reset. Returns any remaining tail output. */
|
|
41934
|
+
flush() {
|
|
41935
|
+
this.phase = 0;
|
|
41936
|
+
this.lastSample = 0;
|
|
41937
|
+
this.hasHistory = false;
|
|
41938
|
+
return Buffer.alloc(0);
|
|
41939
|
+
}
|
|
41940
|
+
};
|
|
40712
41941
|
var DeepFilterNetFilter = class {
|
|
40713
41942
|
modelPath;
|
|
40714
41943
|
silenceWarnings;
|
|
@@ -40716,8 +41945,9 @@ var DeepFilterNetFilter = class {
|
|
|
40716
41945
|
ort = null;
|
|
40717
41946
|
warned = false;
|
|
40718
41947
|
closed = false;
|
|
40719
|
-
//
|
|
41948
|
+
// Stateful resamplers for src_sr↔48k conversions so chunk-boundary
|
|
40720
41949
|
// samples are not discarded. Lazy-created and torn down on rate change.
|
|
41950
|
+
// Uses ArbitraryResampler which supports any integer rate pair.
|
|
40721
41951
|
_resamplerSrcRate = null;
|
|
40722
41952
|
_upsamplerInst = null;
|
|
40723
41953
|
_downsamplerInst = null;
|
|
@@ -40775,8 +42005,8 @@ var DeepFilterNetFilter = class {
|
|
|
40775
42005
|
try {
|
|
40776
42006
|
if (this._resamplerSrcRate !== sampleRate) {
|
|
40777
42007
|
this._resamplerSrcRate = sampleRate;
|
|
40778
|
-
this._upsamplerInst = new
|
|
40779
|
-
this._downsamplerInst = new
|
|
42008
|
+
this._upsamplerInst = new ArbitraryResampler(sampleRate, DEEPFILTERNET_SR);
|
|
42009
|
+
this._downsamplerInst = new ArbitraryResampler(DEEPFILTERNET_SR, sampleRate);
|
|
40780
42010
|
}
|
|
40781
42011
|
const samples = pcm16ToFloat32(pcmChunk);
|
|
40782
42012
|
const pcm16Up = this._upsamplerInst.process(float32ToPcm16(new Float32Array(samples)));
|
|
@@ -40940,6 +42170,17 @@ var Tool = class {
|
|
|
40940
42170
|
parameters;
|
|
40941
42171
|
handler;
|
|
40942
42172
|
webhookUrl;
|
|
42173
|
+
reassurance;
|
|
42174
|
+
/**
|
|
42175
|
+
* Per-tool execution timeout in milliseconds. `undefined` uses the
|
|
42176
|
+
* executor default (10 000 ms). Mirrors Python `timeout_s`.
|
|
42177
|
+
*/
|
|
42178
|
+
timeoutMs;
|
|
42179
|
+
/**
|
|
42180
|
+
* Enable OpenAI strict mode for this tool's function schema. Off by
|
|
42181
|
+
* default. Mirrors Python `strict` on `Tool`.
|
|
42182
|
+
*/
|
|
42183
|
+
strict;
|
|
40943
42184
|
constructor(opts) {
|
|
40944
42185
|
if (!opts.name) {
|
|
40945
42186
|
throw new Error("Tool requires a non-empty name.");
|
|
@@ -40957,6 +42198,9 @@ var Tool = class {
|
|
|
40957
42198
|
this.parameters = opts.parameters ?? { type: "object", properties: {} };
|
|
40958
42199
|
if (hasHandler) this.handler = opts.handler;
|
|
40959
42200
|
if (hasWebhook) this.webhookUrl = opts.webhookUrl;
|
|
42201
|
+
if (opts.reassurance !== void 0) this.reassurance = opts.reassurance;
|
|
42202
|
+
if (opts.timeoutMs !== void 0) this.timeoutMs = opts.timeoutMs;
|
|
42203
|
+
if (opts.strict !== void 0) this.strict = opts.strict;
|
|
40960
42204
|
}
|
|
40961
42205
|
};
|
|
40962
42206
|
function tool(opts) {
|
|
@@ -41120,7 +42364,6 @@ var ChatContext = class _ChatContext {
|
|
|
41120
42364
|
init_cjs_shims();
|
|
41121
42365
|
init_logger();
|
|
41122
42366
|
var DTMF_EVENTS = [
|
|
41123
|
-
"0",
|
|
41124
42367
|
"1",
|
|
41125
42368
|
"2",
|
|
41126
42369
|
"3",
|
|
@@ -41130,6 +42373,7 @@ var DTMF_EVENTS = [
|
|
|
41130
42373
|
"7",
|
|
41131
42374
|
"8",
|
|
41132
42375
|
"9",
|
|
42376
|
+
"0",
|
|
41133
42377
|
"*",
|
|
41134
42378
|
"#",
|
|
41135
42379
|
"A",
|
|
@@ -41809,18 +43053,24 @@ var TelnyxAdapter = class {
|
|
|
41809
43053
|
"/number_orders",
|
|
41810
43054
|
orderBody
|
|
41811
43055
|
);
|
|
41812
|
-
const orderId = order.data?.id
|
|
43056
|
+
const orderId = order.data?.id;
|
|
43057
|
+
if (!orderId) throw new Error("TelnyxAdapter: /number_orders returned no order id");
|
|
41813
43058
|
return { phoneNumber: chosen, orderId };
|
|
41814
43059
|
}
|
|
41815
43060
|
/** Attach a number to a Call Control Application. */
|
|
41816
43061
|
async configureNumber(phoneNumber, opts) {
|
|
41817
43062
|
if (!phoneNumber) throw new Error("TelnyxAdapter: phoneNumber is required");
|
|
41818
43063
|
if (!opts.connectionId) throw new Error("TelnyxAdapter: connectionId is required");
|
|
41819
|
-
|
|
41820
|
-
|
|
41821
|
-
|
|
41822
|
-
|
|
41823
|
-
|
|
43064
|
+
try {
|
|
43065
|
+
await this.request(
|
|
43066
|
+
"PATCH",
|
|
43067
|
+
`/phone_numbers/${encodeURIComponent(phoneNumber)}/voice`,
|
|
43068
|
+
{ connection_id: opts.connectionId, tech_prefix_enabled: false }
|
|
43069
|
+
);
|
|
43070
|
+
} catch (err) {
|
|
43071
|
+
const status = err instanceof Error ? err.message.replace(/\+\d{7,15}/g, "[REDACTED]") : String(err);
|
|
43072
|
+
throw new Error(`TelnyxAdapter: configureNumber failed: ${status}`);
|
|
43073
|
+
}
|
|
41824
43074
|
}
|
|
41825
43075
|
/**
|
|
41826
43076
|
* Place an outbound call on the Call Control Application.
|
|
@@ -41928,7 +43178,7 @@ var TelnyxSTT = class {
|
|
|
41928
43178
|
/** Stable pricing/dashboard key — read by stream-handler/metrics. */
|
|
41929
43179
|
static providerKey = "telnyx_stt";
|
|
41930
43180
|
ws = null;
|
|
41931
|
-
callbacks =
|
|
43181
|
+
callbacks = /* @__PURE__ */ new Set();
|
|
41932
43182
|
headerSent = false;
|
|
41933
43183
|
/** Open the streaming WebSocket and arm message handlers. */
|
|
41934
43184
|
async connect() {
|
|
@@ -41984,14 +43234,13 @@ var TelnyxSTT = class {
|
|
|
41984
43234
|
}
|
|
41985
43235
|
this.ws.send(audio);
|
|
41986
43236
|
}
|
|
41987
|
-
/** Register a transcript listener
|
|
43237
|
+
/** Register a transcript listener. */
|
|
41988
43238
|
onTranscript(callback) {
|
|
41989
|
-
|
|
41990
|
-
|
|
41991
|
-
|
|
41992
|
-
|
|
41993
|
-
|
|
41994
|
-
this.callbacks.push(callback);
|
|
43239
|
+
this.callbacks.add(callback);
|
|
43240
|
+
}
|
|
43241
|
+
/** Unregister a previously-registered transcript listener. */
|
|
43242
|
+
offTranscript(callback) {
|
|
43243
|
+
this.callbacks.delete(callback);
|
|
41995
43244
|
}
|
|
41996
43245
|
/** Close the streaming WebSocket. */
|
|
41997
43246
|
close() {
|
|
@@ -42002,6 +43251,7 @@ var TelnyxSTT = class {
|
|
|
42002
43251
|
}
|
|
42003
43252
|
this.ws = null;
|
|
42004
43253
|
}
|
|
43254
|
+
this.headerSent = false;
|
|
42005
43255
|
}
|
|
42006
43256
|
};
|
|
42007
43257
|
|
|
@@ -42023,6 +43273,7 @@ var TelnyxTTSSampleRate = {
|
|
|
42023
43273
|
HZ_24000: 24e3
|
|
42024
43274
|
};
|
|
42025
43275
|
var DEFAULT_VOICE = TelnyxTTSVoice.NATURAL_HD_ASTRA;
|
|
43276
|
+
var FRAME_TIMEOUT_MS2 = 3e4;
|
|
42026
43277
|
var TelnyxTTS = class {
|
|
42027
43278
|
constructor(apiKey, voice = DEFAULT_VOICE, baseUrl = TELNYX_TTS_WS_URL) {
|
|
42028
43279
|
this.apiKey = apiKey;
|
|
@@ -42050,69 +43301,83 @@ var TelnyxTTS = class {
|
|
|
42050
43301
|
*/
|
|
42051
43302
|
async *synthesizeStream(text) {
|
|
42052
43303
|
const url2 = `${this.baseUrl}?voice=${encodeURIComponent(this.voice)}`;
|
|
42053
|
-
|
|
42054
|
-
|
|
42055
|
-
|
|
42056
|
-
|
|
42057
|
-
|
|
42058
|
-
|
|
42059
|
-
|
|
42060
|
-
|
|
43304
|
+
let ws = null;
|
|
43305
|
+
try {
|
|
43306
|
+
let push2 = function(item) {
|
|
43307
|
+
const w = waiters.shift();
|
|
43308
|
+
if (w) {
|
|
43309
|
+
w(item);
|
|
43310
|
+
} else {
|
|
43311
|
+
queue.push(item);
|
|
43312
|
+
}
|
|
43313
|
+
};
|
|
43314
|
+
var push = push2;
|
|
43315
|
+
ws = new import_ws13.default(url2, {
|
|
43316
|
+
headers: { Authorization: `Bearer ${this.apiKey}` }
|
|
42061
43317
|
});
|
|
42062
|
-
|
|
42063
|
-
|
|
42064
|
-
|
|
43318
|
+
await new Promise((resolve2, reject) => {
|
|
43319
|
+
const timer = setTimeout(() => reject(new Error("Telnyx TTS connect timeout")), 1e4);
|
|
43320
|
+
ws.once("open", () => {
|
|
43321
|
+
clearTimeout(timer);
|
|
43322
|
+
resolve2();
|
|
43323
|
+
});
|
|
43324
|
+
ws.once("error", (err) => {
|
|
43325
|
+
clearTimeout(timer);
|
|
43326
|
+
reject(err);
|
|
43327
|
+
});
|
|
42065
43328
|
});
|
|
42066
|
-
|
|
42067
|
-
|
|
42068
|
-
|
|
42069
|
-
|
|
42070
|
-
|
|
42071
|
-
|
|
42072
|
-
|
|
42073
|
-
|
|
42074
|
-
|
|
42075
|
-
}
|
|
42076
|
-
}
|
|
42077
|
-
ws.on("message", (raw) => {
|
|
42078
|
-
let data;
|
|
42079
|
-
try {
|
|
42080
|
-
data = JSON.parse(raw.toString());
|
|
42081
|
-
} catch {
|
|
42082
|
-
getLogger().warn("TelnyxTTS: received invalid JSON");
|
|
42083
|
-
return;
|
|
42084
|
-
}
|
|
42085
|
-
const audioB64 = data.audio;
|
|
42086
|
-
if (!audioB64) return;
|
|
42087
|
-
try {
|
|
42088
|
-
const audioBytes = Buffer.from(audioB64, "base64");
|
|
42089
|
-
if (audioBytes.length > 0) {
|
|
42090
|
-
push(audioBytes);
|
|
43329
|
+
const queue = [];
|
|
43330
|
+
const waiters = [];
|
|
43331
|
+
ws.on("message", (raw) => {
|
|
43332
|
+
let data;
|
|
43333
|
+
try {
|
|
43334
|
+
data = JSON.parse(raw.toString());
|
|
43335
|
+
} catch {
|
|
43336
|
+
getLogger().warn("TelnyxTTS: received invalid JSON");
|
|
43337
|
+
return;
|
|
42091
43338
|
}
|
|
42092
|
-
|
|
42093
|
-
|
|
42094
|
-
|
|
42095
|
-
|
|
42096
|
-
|
|
42097
|
-
|
|
42098
|
-
|
|
42099
|
-
|
|
42100
|
-
|
|
42101
|
-
|
|
42102
|
-
|
|
42103
|
-
|
|
42104
|
-
|
|
43339
|
+
const audioB64 = data.audio;
|
|
43340
|
+
if (!audioB64) return;
|
|
43341
|
+
try {
|
|
43342
|
+
const audioBytes = Buffer.from(audioB64, "base64");
|
|
43343
|
+
if (audioBytes.length > 0) {
|
|
43344
|
+
push2(audioBytes);
|
|
43345
|
+
}
|
|
43346
|
+
} catch {
|
|
43347
|
+
}
|
|
43348
|
+
});
|
|
43349
|
+
ws.on("close", () => {
|
|
43350
|
+
push2(null);
|
|
43351
|
+
});
|
|
43352
|
+
ws.on("error", (err) => {
|
|
43353
|
+
push2({ error: err instanceof Error ? err : new Error(String(err)) });
|
|
43354
|
+
});
|
|
43355
|
+
ws.send(JSON.stringify({ text: " " }));
|
|
43356
|
+
ws.send(JSON.stringify({ text }));
|
|
43357
|
+
ws.send(JSON.stringify({ text: "" }));
|
|
42105
43358
|
while (true) {
|
|
42106
|
-
|
|
43359
|
+
let frameTimer;
|
|
43360
|
+
const item = queue.length > 0 ? queue.shift() : await Promise.race([
|
|
43361
|
+
new Promise((resolve2) => waiters.push(resolve2)),
|
|
43362
|
+
new Promise((_, reject) => {
|
|
43363
|
+
frameTimer = setTimeout(
|
|
43364
|
+
() => reject(new Error("Telnyx TTS frame timeout")),
|
|
43365
|
+
FRAME_TIMEOUT_MS2
|
|
43366
|
+
);
|
|
43367
|
+
})
|
|
43368
|
+
]).finally(() => {
|
|
43369
|
+
if (frameTimer !== void 0) clearTimeout(frameTimer);
|
|
43370
|
+
});
|
|
42107
43371
|
if (item === null) return;
|
|
42108
43372
|
if (typeof item === "object" && "error" in item) throw item.error;
|
|
42109
43373
|
yield item;
|
|
42110
43374
|
}
|
|
42111
43375
|
} finally {
|
|
42112
43376
|
try {
|
|
42113
|
-
ws
|
|
43377
|
+
ws?.close();
|
|
42114
43378
|
} catch {
|
|
42115
43379
|
}
|
|
43380
|
+
ws?.removeAllListeners();
|
|
42116
43381
|
}
|
|
42117
43382
|
}
|
|
42118
43383
|
};
|
|
@@ -42187,6 +43452,7 @@ init_event_bus();
|
|
|
42187
43452
|
PRICING_VERSION,
|
|
42188
43453
|
PartialStreamError,
|
|
42189
43454
|
Patter,
|
|
43455
|
+
PatterConfigError,
|
|
42190
43456
|
PatterConnectionError,
|
|
42191
43457
|
PatterError,
|
|
42192
43458
|
PatterTool,
|
|
@@ -42274,6 +43540,8 @@ init_event_bus();
|
|
|
42274
43540
|
mulawToPcm16,
|
|
42275
43541
|
notifyDashboard,
|
|
42276
43542
|
openaiTts,
|
|
43543
|
+
openclawConsult,
|
|
43544
|
+
openclawPostCallNotifier,
|
|
42277
43545
|
pcm16ToMulaw,
|
|
42278
43546
|
resample16kTo8k,
|
|
42279
43547
|
resample24kTo16k,
|