getpatter 0.6.3 → 0.6.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +5 -4
- package/dist/{carrier-config-3WDQXP5J.mjs → carrier-config-7YGNRBPO.mjs} +17 -11
- package/dist/{chunk-R2T4JABZ.mjs → chunk-3VVATR6A.mjs} +8 -6
- package/dist/{chunk-CL2U3YET.mjs → chunk-BO227NTF.mjs} +271 -54
- package/dist/{chunk-Z6W5XFWS.mjs → chunk-CRPJLVHB.mjs} +992 -197
- package/dist/cli.js +63 -20
- package/dist/dashboard/ui.html +10 -10
- package/dist/index.d.mts +1250 -192
- package/dist/index.d.ts +1250 -192
- package/dist/index.js +2062 -518
- package/dist/index.mjs +759 -250
- package/dist/{openai-realtime-2-CNFARP25.mjs → openai-realtime-2-L5EKAAUH.mjs} +1 -1
- package/dist/{silero-vad-LNDFGIY7.mjs → silero-vad-RGF5HCIR.mjs} +1 -1
- package/dist/{test-mode-MDBQ4ECE.mjs → test-mode-HGHI2AUV.mjs} +2 -2
- package/package.json +2 -1
- package/src/dashboard/ui.html +10 -10
package/dist/index.js
CHANGED
|
@@ -49,7 +49,7 @@ var init_cjs_shims = __esm({
|
|
|
49
49
|
});
|
|
50
50
|
|
|
51
51
|
// src/errors.ts
|
|
52
|
-
var ErrorCode, PatterError, PatterConnectionError, AuthenticationError, ProvisionError, RateLimitError;
|
|
52
|
+
var ErrorCode, PatterError, PatterConfigError, PatterConnectionError, AuthenticationError, ProvisionError, RateLimitError;
|
|
53
53
|
var init_errors = __esm({
|
|
54
54
|
"src/errors.ts"() {
|
|
55
55
|
"use strict";
|
|
@@ -85,6 +85,12 @@ var init_errors = __esm({
|
|
|
85
85
|
this.code = options?.code ?? ErrorCode.INTERNAL;
|
|
86
86
|
}
|
|
87
87
|
};
|
|
88
|
+
PatterConfigError = class extends PatterError {
|
|
89
|
+
constructor(message, options) {
|
|
90
|
+
super(message, { code: options?.code ?? ErrorCode.CONFIG });
|
|
91
|
+
this.name = "PatterConfigError";
|
|
92
|
+
}
|
|
93
|
+
};
|
|
88
94
|
PatterConnectionError = class extends PatterError {
|
|
89
95
|
constructor(message, options) {
|
|
90
96
|
super(message, { code: options?.code ?? ErrorCode.CONNECTION });
|
|
@@ -136,6 +142,45 @@ var init_logger = __esm({
|
|
|
136
142
|
});
|
|
137
143
|
|
|
138
144
|
// src/providers/openai-realtime.ts
|
|
145
|
+
function validateRealtimeTurnDetection(td) {
|
|
146
|
+
if (td === void 0) return;
|
|
147
|
+
if (td.type !== void 0 && td.type !== "server_vad" && td.type !== "semantic_vad") {
|
|
148
|
+
throw new Error(
|
|
149
|
+
`RealtimeTurnDetection.type must be 'server_vad' or 'semantic_vad', got ${JSON.stringify(td.type)}`
|
|
150
|
+
);
|
|
151
|
+
}
|
|
152
|
+
if (td.eagerness !== void 0 && td.eagerness !== "low" && td.eagerness !== "medium" && td.eagerness !== "high" && td.eagerness !== "auto") {
|
|
153
|
+
throw new Error(
|
|
154
|
+
`RealtimeTurnDetection.eagerness must be one of low|medium|high|auto, got ${JSON.stringify(td.eagerness)}`
|
|
155
|
+
);
|
|
156
|
+
}
|
|
157
|
+
if (td.eagerness !== void 0 && td.type !== "semantic_vad") {
|
|
158
|
+
throw new Error(
|
|
159
|
+
"RealtimeTurnDetection.eagerness is only valid when type='semantic_vad'"
|
|
160
|
+
);
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
function buildTurnDetection(td, opts) {
|
|
164
|
+
validateRealtimeTurnDetection(td);
|
|
165
|
+
let detection;
|
|
166
|
+
if (td?.type === "semantic_vad") {
|
|
167
|
+
detection = { type: "semantic_vad" };
|
|
168
|
+
if (td.eagerness !== void 0) detection.eagerness = td.eagerness;
|
|
169
|
+
} else {
|
|
170
|
+
detection = {
|
|
171
|
+
type: td?.type ?? opts.defaultType,
|
|
172
|
+
threshold: td?.threshold ?? 0.5,
|
|
173
|
+
prefix_padding_ms: td?.prefixPaddingMs ?? 300,
|
|
174
|
+
silence_duration_ms: td?.silenceDurationMs ?? opts.defaultSilenceMs
|
|
175
|
+
};
|
|
176
|
+
}
|
|
177
|
+
if (opts.includeResponseGating) {
|
|
178
|
+
const serverManaged = !(opts.gateResponseOnTranscript ?? false);
|
|
179
|
+
detection.create_response = serverManaged;
|
|
180
|
+
detection.interrupt_response = serverManaged;
|
|
181
|
+
}
|
|
182
|
+
return detection;
|
|
183
|
+
}
|
|
139
184
|
function estimateAudioMs(chunk, format) {
|
|
140
185
|
if (chunk.length === 0) return 0;
|
|
141
186
|
if (format === OpenAIRealtimeAudioFormat.G711_ULAW || format === OpenAIRealtimeAudioFormat.G711_ALAW)
|
|
@@ -196,6 +241,7 @@ var init_openai_realtime = __esm({
|
|
|
196
241
|
this.tools = tools;
|
|
197
242
|
this.audioFormat = audioFormat;
|
|
198
243
|
this.options = options;
|
|
244
|
+
this.gateResponseOnTranscript = options.gateResponseOnTranscript ?? false;
|
|
199
245
|
}
|
|
200
246
|
apiKey;
|
|
201
247
|
model;
|
|
@@ -225,6 +271,23 @@ var init_openai_realtime = __esm({
|
|
|
225
271
|
// could have produced, which is what the user actually heard.
|
|
226
272
|
currentResponseFirstAudioAt = null;
|
|
227
273
|
options;
|
|
274
|
+
// When true, the stream handler waits for the Whisper ``transcript_input``
|
|
275
|
+
// event before requesting the model response (legacy behavior). When false
|
|
276
|
+
// (default) the response is requested on ``speech_stopped`` and the
|
|
277
|
+
// transcript is display-only. Read by the stream handler via
|
|
278
|
+
// ``getGateResponseOnTranscript()``.
|
|
279
|
+
gateResponseOnTranscript;
|
|
280
|
+
/**
|
|
281
|
+
* Whether the stream handler should gate the model response on the Whisper
|
|
282
|
+
* transcript (legacy) or fire it on `speech_stopped` (default, decoupled).
|
|
283
|
+
*
|
|
284
|
+
* `false` (default) — the response is requested on `speech_stopped`,
|
|
285
|
+
* independently of Whisper. `true` — the response is requested only after
|
|
286
|
+
* `transcript_input` passes the hallucination filter.
|
|
287
|
+
*/
|
|
288
|
+
getGateResponseOnTranscript() {
|
|
289
|
+
return this.gateResponseOnTranscript;
|
|
290
|
+
}
|
|
228
291
|
/**
|
|
229
292
|
* Build the production session.update body. Mirrors the body sent
|
|
230
293
|
* inside `connect()` so warmup can apply identical configuration to
|
|
@@ -236,16 +299,26 @@ var init_openai_realtime = __esm({
|
|
|
236
299
|
output_audio_format: this.audioFormat,
|
|
237
300
|
voice: this.voice,
|
|
238
301
|
instructions: this.instructions || "You are a helpful voice assistant. Be concise.",
|
|
239
|
-
turn_detection
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
302
|
+
// v1 turn_detection carries NO create_response / interrupt_response
|
|
303
|
+
// keys. The v1 server defaults (`create_response: true`,
|
|
304
|
+
// `interrupt_response: true`) ARE the server-managed behaviour we want by
|
|
305
|
+
// default, so omitting them is equivalent to sending `true` — gating
|
|
306
|
+
// disabled here. `gateResponseOnTranscript` is still threaded through for
|
|
307
|
+
// symmetry with the GA builder, but has no wire effect while
|
|
308
|
+
// includeResponseGating is false.
|
|
309
|
+
turn_detection: buildTurnDetection(this.options.turnDetection, {
|
|
310
|
+
defaultType: this.options.vadType ?? OpenAIRealtimeVADType.SERVER_VAD,
|
|
311
|
+
defaultSilenceMs: this.options.silenceDurationMs ?? 300,
|
|
312
|
+
includeResponseGating: false,
|
|
313
|
+
gateResponseOnTranscript: this.gateResponseOnTranscript
|
|
314
|
+
}),
|
|
245
315
|
input_audio_transcription: {
|
|
246
316
|
model: this.options.inputAudioTranscriptionModel ?? OpenAITranscriptionModel.WHISPER_1
|
|
247
317
|
}
|
|
248
318
|
};
|
|
319
|
+
if (this.options.noiseReduction !== void 0) {
|
|
320
|
+
config2.input_audio_noise_reduction = { type: this.options.noiseReduction };
|
|
321
|
+
}
|
|
249
322
|
if (this.options.temperature !== void 0) config2.temperature = this.options.temperature;
|
|
250
323
|
if (this.options.maxResponseOutputTokens !== void 0) {
|
|
251
324
|
config2.max_response_output_tokens = this.options.maxResponseOutputTokens;
|
|
@@ -509,6 +582,10 @@ var init_openai_realtime = __esm({
|
|
|
509
582
|
};
|
|
510
583
|
const timer = setTimeout(() => {
|
|
511
584
|
cleanup();
|
|
585
|
+
try {
|
|
586
|
+
ws.close();
|
|
587
|
+
} catch {
|
|
588
|
+
}
|
|
512
589
|
reject(new Error("OpenAI Realtime park connect timeout"));
|
|
513
590
|
}, 8e3);
|
|
514
591
|
ws.on("message", onMessage);
|
|
@@ -603,20 +680,33 @@ var init_openai_realtime = __esm({
|
|
|
603
680
|
dispatch("error", { type: "socket_error", message: err?.message ?? String(err) });
|
|
604
681
|
});
|
|
605
682
|
}
|
|
606
|
-
/** Truncate the in-flight assistant turn
|
|
683
|
+
/** Truncate the in-flight assistant turn's playback offset on the server.
|
|
684
|
+
*
|
|
685
|
+
* Sends ONLY ``conversation.item.truncate`` — no ``response.cancel``. This
|
|
686
|
+
* is the half of barge-in handling that a WebSocket transport MUST always
|
|
687
|
+
* perform: per OpenAI's docs, the GA server auto-truncates on barge-in only
|
|
688
|
+
* over WebRTC / SIP; on the WebSocket transport the client is responsible
|
|
689
|
+
* for telling the server how much of the assistant turn was actually heard.
|
|
690
|
+
* In server-managed mode (``interrupt_response: true``) the server already
|
|
691
|
+
* cancels the response itself, so issuing ``response.cancel`` here would be
|
|
692
|
+
* redundant / rejected — call this method, not {@link cancelResponse}.
|
|
607
693
|
*
|
|
608
694
|
* ``audio_end_ms`` MUST reflect what the caller actually heard, not what
|
|
609
695
|
* the server generated. OpenAI streams audio at 5-10x real-time, so the
|
|
610
696
|
* byte-derived counter overstates playback whenever the consumer cleared
|
|
611
|
-
* its playout buffer (e.g. ``
|
|
697
|
+
* its playout buffer (e.g. ``sendClear``) before the audio reached the
|
|
612
698
|
* speaker. We bound the truncate point by wall-clock time since the first
|
|
613
699
|
* chunk of this response — that's the physical maximum a 1x real-time
|
|
614
700
|
* playback could have produced. Without this cap, OpenAI keeps the full
|
|
615
701
|
* generated assistant text on the transcript, and the model replays /
|
|
616
702
|
* resumes from it on the next turn — manifesting as re-greetings and
|
|
617
703
|
* mid-sentence fragments after a barge-in storm.
|
|
704
|
+
*
|
|
705
|
+
* No-op when no response is in flight, keeping it idempotent across stale
|
|
706
|
+
* callers. Resets per-response tracking so post-truncate late frames and
|
|
707
|
+
* the next response start clean.
|
|
618
708
|
*/
|
|
619
|
-
|
|
709
|
+
truncate() {
|
|
620
710
|
if (!this.ws) return;
|
|
621
711
|
if (!this.currentResponseItemId) {
|
|
622
712
|
return;
|
|
@@ -636,11 +726,31 @@ var init_openai_realtime = __esm({
|
|
|
636
726
|
} catch (err) {
|
|
637
727
|
getLogger().debug?.(`conversation.item.truncate failed: ${String(err)}`);
|
|
638
728
|
}
|
|
639
|
-
this.ws.send(JSON.stringify({ type: "response.cancel" }));
|
|
640
729
|
this.currentResponseItemId = null;
|
|
641
730
|
this.currentResponseAudioMs = 0;
|
|
642
731
|
this.currentResponseFirstAudioAt = null;
|
|
643
732
|
}
|
|
733
|
+
/** Truncate the in-flight assistant turn AND cancel the active response.
|
|
734
|
+
*
|
|
735
|
+
* Sends BOTH ``conversation.item.truncate`` (the played-offset bookkeeping)
|
|
736
|
+
* AND ``response.cancel``. Use this on the LEGACY client-managed barge-in
|
|
737
|
+
* path (``gateResponseOnTranscript`` true → ``interrupt_response: false``,
|
|
738
|
+
* so the server does NOT cancel for us) and for explicit cancels driven by
|
|
739
|
+
* Patter (e.g. on transfer / hangup). In server-managed mode call
|
|
740
|
+
* {@link truncate} instead — the server already cancels the response, and an
|
|
741
|
+
* extra ``response.cancel`` would be redundant / rejected.
|
|
742
|
+
*
|
|
743
|
+
* Truncation bounding semantics are identical to {@link truncate}; see its
|
|
744
|
+
* doc comment for the ``audio_end_ms`` wall-clock cap rationale.
|
|
745
|
+
*/
|
|
746
|
+
cancelResponse() {
|
|
747
|
+
if (!this.ws) return;
|
|
748
|
+
if (!this.currentResponseItemId) {
|
|
749
|
+
return;
|
|
750
|
+
}
|
|
751
|
+
this.truncate();
|
|
752
|
+
this.ws.send(JSON.stringify({ type: "response.cancel" }));
|
|
753
|
+
}
|
|
644
754
|
/** Inject a user text turn and request a new response. */
|
|
645
755
|
async sendText(text) {
|
|
646
756
|
this.ws?.send(JSON.stringify({
|
|
@@ -685,6 +795,32 @@ var init_openai_realtime = __esm({
|
|
|
685
795
|
}
|
|
686
796
|
}));
|
|
687
797
|
}
|
|
798
|
+
/**
|
|
799
|
+
* Speak a short reassurance filler WITHOUT injecting a `role:user` turn.
|
|
800
|
+
*
|
|
801
|
+
* Same no-fake-turn shape as {@link sendFirstMessage}: a bare
|
|
802
|
+
* `response.create` carrying explicit `instructions`, so the filler is the
|
|
803
|
+
* assistant's own in-band audio. The reassurance scheduler in the
|
|
804
|
+
* stream-handler routes here instead of {@link sendText} — which would emit
|
|
805
|
+
* a `conversation.item.create` with `role:'user'` and falsely show the
|
|
806
|
+
* caller saying "One moment." in the transcript. Fillers must not imply
|
|
807
|
+
* success or failure.
|
|
808
|
+
*
|
|
809
|
+
* Uses `modalities: ['audio', 'text']` (v1-beta shape). The GA subclass
|
|
810
|
+
* {@link OpenAIRealtime2Adapter} overrides this with `output_modalities`
|
|
811
|
+
* and re-injects `audio.output.voice` so the GA endpoint does not reject
|
|
812
|
+
* the request. Mirrors Python `OpenAIRealtimeAdapter.send_reassurance` in
|
|
813
|
+
* `providers/openai_realtime.py`.
|
|
814
|
+
*/
|
|
815
|
+
async sendReassurance(text) {
|
|
816
|
+
this.ws?.send(JSON.stringify({
|
|
817
|
+
type: "response.create",
|
|
818
|
+
response: {
|
|
819
|
+
modalities: ["audio", "text"],
|
|
820
|
+
instructions: `Say exactly this and nothing else: "${text}"`
|
|
821
|
+
}
|
|
822
|
+
}));
|
|
823
|
+
}
|
|
688
824
|
/** Submit a tool/function-call result and request the next response. */
|
|
689
825
|
async sendFunctionResult(callId, result) {
|
|
690
826
|
this.ws?.send(JSON.stringify({
|
|
@@ -925,7 +1061,12 @@ var init_transcoding = __esm({
|
|
|
925
1061
|
* Resets all state after flushing.
|
|
926
1062
|
*/
|
|
927
1063
|
flush() {
|
|
928
|
-
this.carry.flush();
|
|
1064
|
+
const carryTail = this.carry.flush();
|
|
1065
|
+
if (carryTail.length > 0) {
|
|
1066
|
+
getLogger().warn(
|
|
1067
|
+
"[patter] StatefulResampler.flush: trailing odd byte discarded \u2014 upstream produced odd-length PCM stream"
|
|
1068
|
+
);
|
|
1069
|
+
}
|
|
929
1070
|
if (this.srcRate === 16e3 && this.dstRate === 8e3 && this.firPendingSample !== null) {
|
|
930
1071
|
const s = this.firPendingSample;
|
|
931
1072
|
const tmp = Buffer.alloc(4);
|
|
@@ -1165,44 +1306,46 @@ var init_openai_realtime_2 = __esm({
|
|
|
1165
1306
|
buildGASessionConfig() {
|
|
1166
1307
|
const opts = this.options;
|
|
1167
1308
|
const fmt = { type: "audio/pcm", rate: 24e3 };
|
|
1309
|
+
const audioInput = {
|
|
1310
|
+
format: fmt,
|
|
1311
|
+
transcription: {
|
|
1312
|
+
model: opts.inputAudioTranscriptionModel ?? OpenAITranscriptionModel.WHISPER_1
|
|
1313
|
+
},
|
|
1314
|
+
// Response creation + barge-in cancellation (issue #154 — hand
|
|
1315
|
+
// turn-taking to the server by default):
|
|
1316
|
+
// - DEFAULT (`gateResponseOnTranscript` false → SERVER-MANAGED):
|
|
1317
|
+
// `create_response: true` lets the SERVER auto-create the response
|
|
1318
|
+
// when it commits the user's audio buffer
|
|
1319
|
+
// (`input_audio_buffer.committed`). `interrupt_response: true` lets the
|
|
1320
|
+
// SERVER cancel the in-flight response on its own VAD `speech_started`.
|
|
1321
|
+
// The e2e model replies immediately, in parallel with the Whisper
|
|
1322
|
+
// transcript — no transcript wait (~500 ms reclaimed), no client-side
|
|
1323
|
+
// race. On a WebSocket transport the client STILL must clear the
|
|
1324
|
+
// carrier buffer (`sendClear`) and `conversation.item.truncate` the
|
|
1325
|
+
// played offset on barge-in (the server only auto-truncates on
|
|
1326
|
+
// WebRTC/SIP), but it does NOT send `response.cancel`. Whisper is
|
|
1327
|
+
// display-only — it can never trigger / gate / cancel the response.
|
|
1328
|
+
// - LEGACY (`gateResponseOnTranscript` true → CLIENT-MANAGED opt-out):
|
|
1329
|
+
// `create_response: false` + `interrupt_response: false` so the stream
|
|
1330
|
+
// handler drives `response.create` (after the hallucination filter)
|
|
1331
|
+
// and `response.cancel` (on barge-in) itself. Escape hatch for no-AEC
|
|
1332
|
+
// PSTN self-interruption. Both keys are tied to the same switch inside
|
|
1333
|
+
// `buildTurnDetection`.
|
|
1334
|
+
turn_detection: buildTurnDetection(opts.turnDetection, {
|
|
1335
|
+
defaultType: opts.vadType ?? OpenAIRealtimeVADType.SERVER_VAD,
|
|
1336
|
+
defaultSilenceMs: opts.silenceDurationMs ?? 300,
|
|
1337
|
+
includeResponseGating: true,
|
|
1338
|
+
gateResponseOnTranscript: this.getGateResponseOnTranscript()
|
|
1339
|
+
})
|
|
1340
|
+
};
|
|
1341
|
+
if (opts.noiseReduction !== void 0) {
|
|
1342
|
+
audioInput.noise_reduction = { type: opts.noiseReduction };
|
|
1343
|
+
}
|
|
1168
1344
|
const config2 = {
|
|
1169
1345
|
type: "realtime",
|
|
1170
1346
|
output_modalities: opts.modalities ?? ["audio"],
|
|
1171
1347
|
audio: {
|
|
1172
|
-
input:
|
|
1173
|
-
format: fmt,
|
|
1174
|
-
transcription: {
|
|
1175
|
-
model: opts.inputAudioTranscriptionModel ?? OpenAITranscriptionModel.WHISPER_1
|
|
1176
|
-
},
|
|
1177
|
-
// VAD threshold raised back to the OpenAI default (0.5) on
|
|
1178
|
-
// 2026-05-22. The earlier 0.1 tuning (motivated by the
|
|
1179
|
-
// upsampled telephony-band loss in high frequencies) made the
|
|
1180
|
-
// server VAD trigger on the carrier-loopback echo of the
|
|
1181
|
-
// agent's OWN outbound audio in PSTN no-AEC scenarios.
|
|
1182
|
-
// Combined with the default ``turn_detection.create_response:
|
|
1183
|
-
// true``, every phantom ``speech_started`` ended a turn early
|
|
1184
|
-
// and auto-created a new response that the agent immediately
|
|
1185
|
-
// spoke over, leading to a runaway loop where the first
|
|
1186
|
-
// message was repeatedly cut and re-generated.
|
|
1187
|
-
turn_detection: {
|
|
1188
|
-
type: opts.vadType ?? OpenAIRealtimeVADType.SERVER_VAD,
|
|
1189
|
-
threshold: 0.5,
|
|
1190
|
-
prefix_padding_ms: 300,
|
|
1191
|
-
silence_duration_ms: opts.silenceDurationMs ?? 500,
|
|
1192
|
-
// Defer ``response.create`` to the application: when OpenAI's
|
|
1193
|
-
// server VAD commits an ``input_audio_buffer.committed`` segment
|
|
1194
|
-
// that turns out to be a Whisper hallucination on silence/echo,
|
|
1195
|
-
// auto-creating a response would generate a phantom turn (the
|
|
1196
|
-
// model reads the hallucinated text as user input). Patter
|
|
1197
|
-
// triggers ``response.create`` explicitly in the Realtime
|
|
1198
|
-
// stream-handler AFTER validating ``transcript_input`` against
|
|
1199
|
-
// the hallucination filter. Pair with ``interrupt_response:
|
|
1200
|
-
// false`` so server VAD also leaves in-flight responses alone —
|
|
1201
|
-
// barge-in is gated client-side.
|
|
1202
|
-
create_response: false,
|
|
1203
|
-
interrupt_response: false
|
|
1204
|
-
}
|
|
1205
|
-
},
|
|
1348
|
+
input: audioInput,
|
|
1206
1349
|
output: {
|
|
1207
1350
|
format: fmt,
|
|
1208
1351
|
voice: this.voice
|
|
@@ -1255,14 +1398,7 @@ var init_openai_realtime_2 = __esm({
|
|
|
1255
1398
|
if (t && t in GA_TO_V1_EVENT_NAMES) {
|
|
1256
1399
|
const newType = GA_TO_V1_EVENT_NAMES[t];
|
|
1257
1400
|
if (t === "response.output_audio.delta" && typeof parsed.delta === "string") {
|
|
1258
|
-
|
|
1259
|
-
const FRAME_BYTES = 160;
|
|
1260
|
-
if (mulaw.length === 0) return;
|
|
1261
|
-
for (let off = 0; off < mulaw.length; off += FRAME_BYTES) {
|
|
1262
|
-
const slice = mulaw.subarray(off, Math.min(off + FRAME_BYTES, mulaw.length));
|
|
1263
|
-
const frame = { ...parsed, type: newType, delta: slice.toString("base64") };
|
|
1264
|
-
handler(Buffer.from(JSON.stringify(frame)), ...rest);
|
|
1265
|
-
}
|
|
1401
|
+
this.translateGaAudioDelta(parsed, handler, rest);
|
|
1266
1402
|
return;
|
|
1267
1403
|
}
|
|
1268
1404
|
parsed.type = newType;
|
|
@@ -1291,6 +1427,7 @@ var init_openai_realtime_2 = __esm({
|
|
|
1291
1427
|
sessionCreated = true;
|
|
1292
1428
|
ws.send(JSON.stringify({ type: "session.update", session: this.buildGASessionConfig() }));
|
|
1293
1429
|
} else if (msg.type === "session.updated") {
|
|
1430
|
+
this.warnIfOutputFormatUnexpected(msg);
|
|
1294
1431
|
cleanup();
|
|
1295
1432
|
resolve2();
|
|
1296
1433
|
} else if (msg.type === "error") {
|
|
@@ -1396,6 +1533,10 @@ var init_openai_realtime_2 = __esm({
|
|
|
1396
1533
|
};
|
|
1397
1534
|
const timer = setTimeout(() => {
|
|
1398
1535
|
cleanup();
|
|
1536
|
+
try {
|
|
1537
|
+
ws.close();
|
|
1538
|
+
} catch {
|
|
1539
|
+
}
|
|
1399
1540
|
reject(new Error("OpenAI Realtime 2 park connect timeout"));
|
|
1400
1541
|
}, 8e3);
|
|
1401
1542
|
ws.on("message", onMessage);
|
|
@@ -1443,8 +1584,12 @@ var init_openai_realtime_2 = __esm({
|
|
|
1443
1584
|
const parsed = JSON.parse(text);
|
|
1444
1585
|
const t = parsed.type;
|
|
1445
1586
|
if (t && Object.prototype.hasOwnProperty.call(GA_TO_V1_EVENT_NAMES, t)) {
|
|
1587
|
+
if (t === "response.output_audio.delta" && typeof parsed.delta === "string") {
|
|
1588
|
+
this.translateGaAudioDelta(parsed, handler, rest);
|
|
1589
|
+
return;
|
|
1590
|
+
}
|
|
1446
1591
|
parsed.type = GA_TO_V1_EVENT_NAMES[t];
|
|
1447
|
-
handler(JSON.stringify(parsed), ...rest);
|
|
1592
|
+
handler(Buffer.from(JSON.stringify(parsed)), ...rest);
|
|
1448
1593
|
return;
|
|
1449
1594
|
}
|
|
1450
1595
|
} catch {
|
|
@@ -1529,6 +1674,55 @@ var init_openai_realtime_2 = __esm({
|
|
|
1529
1674
|
}
|
|
1530
1675
|
return out;
|
|
1531
1676
|
}
|
|
1677
|
+
/**
|
|
1678
|
+
* Log-only safety net for issue #154. The GA server echoes the *effective*
|
|
1679
|
+
* session config in `session.updated`; we request `audio/pcm` @ 24 kHz and
|
|
1680
|
+
* transcode PCM24→mulaw8 ourselves (see
|
|
1681
|
+
* `transcodeOutboundPcm24ToMulaw8Buffer`). If a future GA schema change ever
|
|
1682
|
+
* made the server return a different output format, that transcode — which
|
|
1683
|
+
* assumes PCM16-LE @ 24 kHz — would silently corrupt audio, exactly the
|
|
1684
|
+
* v1-beta failure mode #154 fixed. Warn so the drift surfaces in logs instead
|
|
1685
|
+
* of as static. Never gates audio.
|
|
1686
|
+
*/
|
|
1687
|
+
warnIfOutputFormatUnexpected(msg) {
|
|
1688
|
+
const fmt = msg?.session?.audio?.output?.format;
|
|
1689
|
+
if (!fmt || typeof fmt !== "object") return;
|
|
1690
|
+
if (fmt.type !== "audio/pcm" || fmt.rate != null && fmt.rate !== 24e3) {
|
|
1691
|
+
getLogger().warn(
|
|
1692
|
+
`OpenAI Realtime 2: server-echoed output format ${JSON.stringify(fmt)} differs from the requested audio/pcm@24000 \u2014 the outbound PCM24\u2192mulaw8 transcode assumes PCM16-LE 24 kHz, so carrier audio may be garbled (issue #154). Informational only; audio is not gated on this.`
|
|
1693
|
+
);
|
|
1694
|
+
}
|
|
1695
|
+
}
|
|
1696
|
+
/**
|
|
1697
|
+
* Shared audio-delta translation helper. Transcodes a GA
|
|
1698
|
+
* `response.output_audio.delta` payload (base64 PCM-16-LE 24 kHz)
|
|
1699
|
+
* into mulaw 8 kHz and splits the result into 160-byte (20 ms) frames,
|
|
1700
|
+
* dispatching one synthetic `response.audio.delta` event per frame.
|
|
1701
|
+
*
|
|
1702
|
+
* Called from BOTH the `connect()` shim and the `adoptWebSocket()` shim
|
|
1703
|
+
* so that warm-path (prewarm/adopted) calls receive identical transcoding
|
|
1704
|
+
* to cold-path calls. Without this, adopted sockets forwarded raw PCM-24
|
|
1705
|
+
* to Twilio/Telnyx, producing garbled or silent audio on every warm call.
|
|
1706
|
+
*
|
|
1707
|
+
* @param parsed - The parsed GA event object (type already checked to be
|
|
1708
|
+
* `response.output_audio.delta` with a string `delta`).
|
|
1709
|
+
* @param handler - The downstream message listener to dispatch each frame to.
|
|
1710
|
+
* @param rest - Extra arguments forwarded from the original `message` event.
|
|
1711
|
+
* @returns `true` if frames were dispatched (caller should return early),
|
|
1712
|
+
* `false` if the resampler is still warming up (zero output bytes).
|
|
1713
|
+
*/
|
|
1714
|
+
translateGaAudioDelta(parsed, handler, rest) {
|
|
1715
|
+
const newType = GA_TO_V1_EVENT_NAMES["response.output_audio.delta"];
|
|
1716
|
+
const mulaw = this.transcodeOutboundPcm24ToMulaw8Buffer(parsed.delta);
|
|
1717
|
+
const FRAME_BYTES = 160;
|
|
1718
|
+
if (mulaw.length === 0) return false;
|
|
1719
|
+
for (let off = 0; off < mulaw.length; off += FRAME_BYTES) {
|
|
1720
|
+
const slice = mulaw.subarray(off, Math.min(off + FRAME_BYTES, mulaw.length));
|
|
1721
|
+
const frame = { ...parsed, type: newType, delta: slice.toString("base64") };
|
|
1722
|
+
handler(Buffer.from(JSON.stringify(frame)), ...rest);
|
|
1723
|
+
}
|
|
1724
|
+
return true;
|
|
1725
|
+
}
|
|
1532
1726
|
/**
|
|
1533
1727
|
* Base64 PCM-16-LE 24 kHz → Base64 mulaw 8 kHz. Used by the WS
|
|
1534
1728
|
* translation shim on each `response.output_audio.delta`. The stateful
|
|
@@ -1558,6 +1752,34 @@ var init_openai_realtime_2 = __esm({
|
|
|
1558
1752
|
}
|
|
1559
1753
|
this.ws?.send(JSON.stringify({ type: "response.create", response: responseBody }));
|
|
1560
1754
|
}
|
|
1755
|
+
/**
|
|
1756
|
+
* Speak a short reassurance filler WITHOUT injecting a `role:user` turn.
|
|
1757
|
+
*
|
|
1758
|
+
* GA-shape sibling of {@link sendFirstMessage} (and override of the base v1
|
|
1759
|
+
* {@link OpenAIRealtimeAdapter.sendReassurance}): a bare `response.create`
|
|
1760
|
+
* carrying explicit `instructions` so the filler is the assistant's own
|
|
1761
|
+
* in-band audio. No `conversation.item.create` with `role:"user"` is
|
|
1762
|
+
* emitted, so the transcript shows no phantom caller line. The GA endpoint
|
|
1763
|
+
* rejects `response.modalities` and does not inherit `audio.output.voice`
|
|
1764
|
+
* for an explicit `response.create`, so — exactly as in
|
|
1765
|
+
* {@link sendFirstMessage} — we send `output_modalities` and re-inject the
|
|
1766
|
+
* voice. Fillers must not imply success or failure.
|
|
1767
|
+
*
|
|
1768
|
+
* Mirrors Python `OpenAIRealtime2Adapter.send_reassurance` in
|
|
1769
|
+
* `providers/openai_realtime_2.py`.
|
|
1770
|
+
*/
|
|
1771
|
+
async sendReassurance(text) {
|
|
1772
|
+
if (!this.ws) return;
|
|
1773
|
+
const responseBody = {
|
|
1774
|
+
output_modalities: ["audio"],
|
|
1775
|
+
audio: { output: { voice: this.voice } },
|
|
1776
|
+
instructions: `Say exactly this and nothing else: "${text}"`
|
|
1777
|
+
};
|
|
1778
|
+
if (this.options.reasoningEffort !== void 0) {
|
|
1779
|
+
responseBody.reasoning = { effort: this.options.reasoningEffort };
|
|
1780
|
+
}
|
|
1781
|
+
this.ws.send(JSON.stringify({ type: "response.create", response: responseBody }));
|
|
1782
|
+
}
|
|
1561
1783
|
};
|
|
1562
1784
|
}
|
|
1563
1785
|
});
|
|
@@ -2345,11 +2567,25 @@ function calculateRealtimeCachedSavings(usage, pricing, model) {
|
|
|
2345
2567
|
const rates = resolveProviderRates(pricing.openai_realtime, model);
|
|
2346
2568
|
if (rates.unit !== "token") return 0;
|
|
2347
2569
|
const input = usage.input_token_details ?? {};
|
|
2348
|
-
const cached2 = input.cached_tokens_details ?? {};
|
|
2349
2570
|
const cachedAudioRate = rates.cached_audio_input_per_token ?? rates.audio_input_per_token ?? 0;
|
|
2350
2571
|
const cachedTextRate = rates.cached_text_input_per_token ?? rates.text_input_per_token ?? 0;
|
|
2351
|
-
const
|
|
2352
|
-
const
|
|
2572
|
+
const totalAudio = input.audio_tokens ?? 0;
|
|
2573
|
+
const totalText = input.text_tokens ?? 0;
|
|
2574
|
+
let cachedAudio;
|
|
2575
|
+
let cachedText;
|
|
2576
|
+
const details = input.cached_tokens_details;
|
|
2577
|
+
if (details && (details.audio_tokens !== void 0 || details.text_tokens !== void 0)) {
|
|
2578
|
+
cachedAudio = Math.min(details.audio_tokens ?? 0, totalAudio);
|
|
2579
|
+
cachedText = Math.min(details.text_tokens ?? 0, totalText);
|
|
2580
|
+
} else if (input.cached_tokens && input.cached_tokens > 0) {
|
|
2581
|
+
const totalIn = totalAudio + totalText;
|
|
2582
|
+
const ratio = totalIn > 0 ? input.cached_tokens / totalIn : 0;
|
|
2583
|
+
cachedAudio = Math.min(Math.round(totalAudio * ratio), totalAudio);
|
|
2584
|
+
cachedText = Math.min(Math.round(totalText * ratio), totalText);
|
|
2585
|
+
} else {
|
|
2586
|
+
cachedAudio = 0;
|
|
2587
|
+
cachedText = 0;
|
|
2588
|
+
}
|
|
2353
2589
|
const fullAudio = cachedAudio * (rates.audio_input_per_token ?? 0);
|
|
2354
2590
|
const fullText = cachedText * (rates.text_input_per_token ?? 0);
|
|
2355
2591
|
const discountedAudio = cachedAudio * cachedAudioRate;
|
|
@@ -2797,8 +3033,8 @@ function loadTranscriptJsonl(filePath) {
|
|
|
2797
3033
|
} catch {
|
|
2798
3034
|
continue;
|
|
2799
3035
|
}
|
|
2800
|
-
const tsIso = typeof row.ts === "string" ? Date.parse(row.ts) : NaN;
|
|
2801
|
-
const tsNumeric = typeof row.timestamp === "number" ? row.timestamp
|
|
3036
|
+
const tsIso = typeof row.ts === "string" ? Date.parse(row.ts) / 1e3 : NaN;
|
|
3037
|
+
const tsNumeric = typeof row.timestamp === "number" ? row.timestamp : NaN;
|
|
2802
3038
|
const timestamp = Number.isFinite(tsIso) ? tsIso : Number.isFinite(tsNumeric) ? tsNumeric : 0;
|
|
2803
3039
|
const userText = typeof row.user_text === "string" ? row.user_text : "";
|
|
2804
3040
|
const agentText = typeof row.agent_text === "string" ? row.agent_text : "";
|
|
@@ -2956,14 +3192,49 @@ var init_store = __esm({
|
|
|
2956
3192
|
} else {
|
|
2957
3193
|
for (let i = this.calls.length - 1; i >= 0; i--) {
|
|
2958
3194
|
if (this.calls[i].call_id === callId) {
|
|
2959
|
-
this.calls[i].status
|
|
2960
|
-
Object.assign(this.calls[i], extra);
|
|
3195
|
+
this.calls[i] = { ...this.calls[i], status, ...extra };
|
|
2961
3196
|
break;
|
|
2962
3197
|
}
|
|
2963
3198
|
}
|
|
2964
3199
|
}
|
|
2965
3200
|
this.publish("call_status", { call_id: callId, status, ...extra });
|
|
2966
3201
|
}
|
|
3202
|
+
/**
|
|
3203
|
+
* Record a single transcript line (user/assistant) as it becomes known.
|
|
3204
|
+
*
|
|
3205
|
+
* FIX-5 (issue #154): the live forward path for the dashboard transcript.
|
|
3206
|
+
* The Realtime stream handler calls this the moment each line is known — the
|
|
3207
|
+
* user line right after the hallucination filter accepts it, the assistant
|
|
3208
|
+
* line when its turn flushes — keyed by the monotonic ``turnIndex`` reserved
|
|
3209
|
+
* at turn-open (``reserveTurnIndex``). Each line is appended to the active
|
|
3210
|
+
* call's ``transcript`` array and broadcast over SSE as a ``transcript_line``
|
|
3211
|
+
* event so the dashboard can render lines as they arrive and re-sort by
|
|
3212
|
+
* ``(turnIndex, user<assistant)`` — making a late-arriving user line land
|
|
3213
|
+
* ABOVE its agent line. ``recordTurn`` de-dups against the lines pushed here
|
|
3214
|
+
* by ``(turnIndex, role)`` so the metrics path never double-pushes the same
|
|
3215
|
+
* text. Parity with Python ``record_transcript_line``.
|
|
3216
|
+
*/
|
|
3217
|
+
recordTranscriptLine(data) {
|
|
3218
|
+
const callId = data.call_id || "";
|
|
3219
|
+
const { role, text, turnIndex } = data;
|
|
3220
|
+
if (!callId || role !== "user" && role !== "assistant" || !text) return;
|
|
3221
|
+
const active = this.activeCalls.get(callId);
|
|
3222
|
+
if (active) {
|
|
3223
|
+
if (!active.transcript) active.transcript = [];
|
|
3224
|
+
active.transcript.push({
|
|
3225
|
+
role,
|
|
3226
|
+
text,
|
|
3227
|
+
timestamp: Date.now() / 1e3,
|
|
3228
|
+
turnIndex
|
|
3229
|
+
});
|
|
3230
|
+
}
|
|
3231
|
+
this.publish("transcript_line", {
|
|
3232
|
+
call_id: callId,
|
|
3233
|
+
turnIndex,
|
|
3234
|
+
role,
|
|
3235
|
+
text
|
|
3236
|
+
});
|
|
3237
|
+
}
|
|
2967
3238
|
/** Append a single conversation turn to an active call and broadcast it via SSE. */
|
|
2968
3239
|
recordTurn(data) {
|
|
2969
3240
|
const callId = data.call_id || "";
|
|
@@ -2978,14 +3249,19 @@ var init_store = __esm({
|
|
|
2978
3249
|
const userText = typeof turnRecord.user_text === "string" ? turnRecord.user_text : "";
|
|
2979
3250
|
const agentText = typeof turnRecord.agent_text === "string" ? turnRecord.agent_text : "";
|
|
2980
3251
|
const ts = typeof turnRecord.timestamp === "number" ? turnRecord.timestamp : Date.now() / 1e3;
|
|
2981
|
-
|
|
2982
|
-
|
|
3252
|
+
const turnIndex = typeof turnRecord.turn_index === "number" ? turnRecord.turn_index : void 0;
|
|
3253
|
+
const alreadyLive = (role) => turnIndex !== void 0 && (active.transcript ?? []).some(
|
|
3254
|
+
(e) => e.turnIndex === turnIndex && e.role === role
|
|
3255
|
+
);
|
|
3256
|
+
if (userText.length > 0 && !alreadyLive("user")) {
|
|
3257
|
+
active.transcript.push({ role: "user", text: userText, timestamp: ts, turnIndex });
|
|
2983
3258
|
}
|
|
2984
|
-
if (agentText.length > 0 && agentText !== "[interrupted]") {
|
|
3259
|
+
if (agentText.length > 0 && agentText !== "[interrupted]" && !alreadyLive("assistant")) {
|
|
2985
3260
|
active.transcript.push({
|
|
2986
3261
|
role: "assistant",
|
|
2987
3262
|
text: agentText,
|
|
2988
|
-
timestamp: ts
|
|
3263
|
+
timestamp: ts,
|
|
3264
|
+
turnIndex
|
|
2989
3265
|
});
|
|
2990
3266
|
}
|
|
2991
3267
|
}
|
|
@@ -3058,7 +3334,7 @@ var init_store = __esm({
|
|
|
3058
3334
|
getCall(callId) {
|
|
3059
3335
|
if (this.deletedCallIds.has(callId)) return null;
|
|
3060
3336
|
for (let i = this.calls.length - 1; i >= 0; i--) {
|
|
3061
|
-
if (this.calls[i].call_id === callId) return this.calls[i];
|
|
3337
|
+
if (this.calls[i].call_id === callId) return { ...this.calls[i] };
|
|
3062
3338
|
}
|
|
3063
3339
|
return null;
|
|
3064
3340
|
}
|
|
@@ -3100,7 +3376,9 @@ var init_store = __esm({
|
|
|
3100
3376
|
}
|
|
3101
3377
|
if (accepted.length === 0) return [];
|
|
3102
3378
|
accepted.sort();
|
|
3103
|
-
this.persistDeletedIds()
|
|
3379
|
+
this.persistDeletedIds().catch(
|
|
3380
|
+
(err) => getLogger().debug(`MetricsStore.deleteCalls: persistDeletedIds failed: ${String(err)}`)
|
|
3381
|
+
);
|
|
3104
3382
|
this.publish("calls_deleted", { call_ids: accepted });
|
|
3105
3383
|
return accepted;
|
|
3106
3384
|
}
|
|
@@ -3112,19 +3390,19 @@ var init_store = __esm({
|
|
|
3112
3390
|
getDeletedCallIds() {
|
|
3113
3391
|
return Array.from(this.deletedCallIds).sort();
|
|
3114
3392
|
}
|
|
3115
|
-
/** Atomically persist the deleted-ids set to disk. Best-effort. */
|
|
3116
|
-
persistDeletedIds() {
|
|
3393
|
+
/** Atomically persist the deleted-ids set to disk. Best-effort async. */
|
|
3394
|
+
async persistDeletedIds() {
|
|
3117
3395
|
if (this.deletedIdsPath === null) return;
|
|
3118
3396
|
try {
|
|
3119
3397
|
const dir = path2.dirname(this.deletedIdsPath);
|
|
3120
|
-
fs2.
|
|
3398
|
+
await fs2.promises.mkdir(dir, { recursive: true });
|
|
3121
3399
|
const tmp = this.deletedIdsPath + ".tmp";
|
|
3122
3400
|
const payload = {
|
|
3123
3401
|
version: 1,
|
|
3124
3402
|
deleted_call_ids: Array.from(this.deletedCallIds).sort()
|
|
3125
3403
|
};
|
|
3126
|
-
fs2.
|
|
3127
|
-
fs2.
|
|
3404
|
+
await fs2.promises.writeFile(tmp, JSON.stringify(payload, null, 2), "utf8");
|
|
3405
|
+
await fs2.promises.rename(tmp, this.deletedIdsPath);
|
|
3128
3406
|
} catch (err) {
|
|
3129
3407
|
getLogger().debug(
|
|
3130
3408
|
`MetricsStore.persistDeletedIds: ${String(err)}`
|
|
@@ -3133,7 +3411,8 @@ var init_store = __esm({
|
|
|
3133
3411
|
}
|
|
3134
3412
|
/** Look up an active call by id (returns undefined if not active or unknown). */
|
|
3135
3413
|
getActive(callId) {
|
|
3136
|
-
|
|
3414
|
+
const rec = this.activeCalls.get(callId);
|
|
3415
|
+
return rec !== void 0 ? { ...rec } : void 0;
|
|
3137
3416
|
}
|
|
3138
3417
|
/** Return all currently active (not yet ended) calls. */
|
|
3139
3418
|
getActiveCalls() {
|
|
@@ -3460,8 +3739,8 @@ function mountDashboard(app, store, token = "") {
|
|
|
3460
3739
|
res.type("text/html").send(DASHBOARD_HTML);
|
|
3461
3740
|
});
|
|
3462
3741
|
app.get("/api/dashboard/calls", auth2, (req, res) => {
|
|
3463
|
-
const limit = Math.min(parseInt(req.query.limit || "50", 10) || 50, 1e3);
|
|
3464
|
-
const offset = parseInt(req.query.offset || "0", 10) || 0;
|
|
3742
|
+
const limit = Math.min(Math.max(0, parseInt(req.query.limit || "50", 10) || 50), 1e3);
|
|
3743
|
+
const offset = Math.max(0, parseInt(req.query.offset || "0", 10) || 0);
|
|
3465
3744
|
res.json(store.getCalls(limit, offset));
|
|
3466
3745
|
});
|
|
3467
3746
|
app.get("/api/dashboard/calls/:callId", auth2, (req, res) => {
|
|
@@ -3551,8 +3830,8 @@ data: ${data}
|
|
|
3551
3830
|
function mountApi(app, store, token = "") {
|
|
3552
3831
|
const auth2 = makeAuthMiddleware(token);
|
|
3553
3832
|
app.get("/api/v1/calls", auth2, (req, res) => {
|
|
3554
|
-
const limit = Math.min(parseInt(req.query.limit || "50", 10) || 50, 1e3);
|
|
3555
|
-
const offset = parseInt(req.query.offset || "0", 10) || 0;
|
|
3833
|
+
const limit = Math.min(Math.max(0, parseInt(req.query.limit || "50", 10) || 50), 1e3);
|
|
3834
|
+
const offset = Math.max(0, parseInt(req.query.offset || "0", 10) || 0);
|
|
3556
3835
|
const calls = store.getCalls(limit, offset);
|
|
3557
3836
|
res.json({
|
|
3558
3837
|
data: calls,
|
|
@@ -3831,14 +4110,31 @@ var init_remote_message = __esm({
|
|
|
3831
4110
|
while (chunks.length > 0) {
|
|
3832
4111
|
yield chunks.shift();
|
|
3833
4112
|
}
|
|
4113
|
+
const READ_TIMEOUT_MS = 3e4;
|
|
3834
4114
|
while (!done && !error2) {
|
|
3835
|
-
const
|
|
4115
|
+
const messagePromise = new Promise((resolve2) => {
|
|
3836
4116
|
if (chunks.length > 0) {
|
|
3837
4117
|
resolve2(chunks.shift());
|
|
3838
4118
|
} else {
|
|
3839
4119
|
resolveNext = resolve2;
|
|
3840
4120
|
}
|
|
3841
4121
|
});
|
|
4122
|
+
let timeoutHandle;
|
|
4123
|
+
const timeoutPromise = new Promise((_, reject) => {
|
|
4124
|
+
timeoutHandle = setTimeout(
|
|
4125
|
+
() => reject(new Error("WebSocket read timeout: no frame received within 30 s")),
|
|
4126
|
+
READ_TIMEOUT_MS
|
|
4127
|
+
);
|
|
4128
|
+
});
|
|
4129
|
+
let text;
|
|
4130
|
+
try {
|
|
4131
|
+
text = await Promise.race([messagePromise, timeoutPromise]);
|
|
4132
|
+
} catch (timeoutErr) {
|
|
4133
|
+
resolveNext = null;
|
|
4134
|
+
throw timeoutErr;
|
|
4135
|
+
} finally {
|
|
4136
|
+
clearTimeout(timeoutHandle);
|
|
4137
|
+
}
|
|
3842
4138
|
if (text === null) break;
|
|
3843
4139
|
yield text;
|
|
3844
4140
|
}
|
|
@@ -4080,18 +4376,6 @@ var init_deepgram_stt = __esm({
|
|
|
4080
4376
|
} catch {
|
|
4081
4377
|
return;
|
|
4082
4378
|
}
|
|
4083
|
-
const dataType = String(data.type ?? "unknown");
|
|
4084
|
-
if (dataType === "Results") {
|
|
4085
|
-
const transcript2 = (data.channel?.alternatives?.[0]?.transcript ?? "").trim();
|
|
4086
|
-
const isFinal = Boolean(data.is_final);
|
|
4087
|
-
const speechFinal2 = Boolean(data.speech_final);
|
|
4088
|
-
const fromFinalize = Boolean(data.from_finalize);
|
|
4089
|
-
getLogger().info(
|
|
4090
|
-
`[DIAG] DG Results text=${JSON.stringify(transcript2.slice(0, 60))} isFinal=${isFinal} speechFinal=${speechFinal2} fromFinalize=${fromFinalize}`
|
|
4091
|
-
);
|
|
4092
|
-
} else if (dataType !== "Metadata") {
|
|
4093
|
-
getLogger().info(`[DIAG] DG event type=${dataType}`);
|
|
4094
|
-
}
|
|
4095
4379
|
if (data.type === "Metadata" && data.request_id) {
|
|
4096
4380
|
this.requestId = data.request_id;
|
|
4097
4381
|
return;
|
|
@@ -4181,7 +4465,7 @@ var init_deepgram_stt = __esm({
|
|
|
4181
4465
|
if (!this.ws || this.ws.readyState !== import_ws4.default.OPEN) {
|
|
4182
4466
|
this.audioDroppedCount++;
|
|
4183
4467
|
if (this.audioDroppedCount === 1 || this.audioDroppedCount % 50 === 0) {
|
|
4184
|
-
getLogger().
|
|
4468
|
+
getLogger().debug(
|
|
4185
4469
|
`[DIAG] DeepgramSTT.sendAudio dropped (ws state=${this.ws?.readyState ?? "null"}) \u2014 total dropped=${this.audioDroppedCount}`
|
|
4186
4470
|
);
|
|
4187
4471
|
}
|
|
@@ -4190,7 +4474,7 @@ var init_deepgram_stt = __esm({
|
|
|
4190
4474
|
if (audio.length === 0) return;
|
|
4191
4475
|
this.audioSentCount++;
|
|
4192
4476
|
if (this.audioSentCount === 1 || this.audioSentCount % 100 === 0) {
|
|
4193
|
-
getLogger().
|
|
4477
|
+
getLogger().debug(
|
|
4194
4478
|
`[DIAG] DeepgramSTT.sendAudio: total chunks sent=${this.audioSentCount} (last=${audio.length} bytes)`
|
|
4195
4479
|
);
|
|
4196
4480
|
}
|
|
@@ -4228,16 +4512,16 @@ var init_deepgram_stt = __esm({
|
|
|
4228
4512
|
finalize() {
|
|
4229
4513
|
const ws = this.ws;
|
|
4230
4514
|
if (!ws || ws.readyState !== import_ws4.default.OPEN) {
|
|
4231
|
-
getLogger().
|
|
4515
|
+
getLogger().debug(
|
|
4232
4516
|
`[DIAG] DeepgramSTT.finalize SKIPPED (ws state=${ws?.readyState ?? "null"})`
|
|
4233
4517
|
);
|
|
4234
4518
|
return;
|
|
4235
4519
|
}
|
|
4236
4520
|
try {
|
|
4237
4521
|
ws.send(JSON.stringify({ type: "Finalize" }));
|
|
4238
|
-
getLogger().
|
|
4522
|
+
getLogger().debug("[DIAG] DeepgramSTT.finalize sent {type:Finalize}");
|
|
4239
4523
|
} catch (err) {
|
|
4240
|
-
getLogger().
|
|
4524
|
+
getLogger().debug(`[DIAG] DeepgramSTT.finalize send failed: ${String(err)}`);
|
|
4241
4525
|
}
|
|
4242
4526
|
}
|
|
4243
4527
|
/** Send Finalize, briefly drain trailing transcripts, then close the socket. */
|
|
@@ -4317,6 +4601,7 @@ var init_metrics = __esm({
|
|
|
4317
4601
|
_pricing;
|
|
4318
4602
|
_callStart;
|
|
4319
4603
|
_turns = [];
|
|
4604
|
+
// mutable internal array; immutable when exposed via TurnMetrics[] → readonly TurnMetrics[]
|
|
4320
4605
|
// Per-turn timing state
|
|
4321
4606
|
_turnStart = null;
|
|
4322
4607
|
_sttComplete = null;
|
|
@@ -4403,6 +4688,16 @@ var init_metrics = __esm({
|
|
|
4403
4688
|
* (the common cause of missing endpoint signals).
|
|
4404
4689
|
*/
|
|
4405
4690
|
_endpointSignalMissingCount = 0;
|
|
4691
|
+
/**
|
|
4692
|
+
* Monotonic per-call turn counter. Reserved at turn OPEN
|
|
4693
|
+
* (``onAdapterSpeechStopped`` / ``speech_stopped``) via
|
|
4694
|
+
* ``reserveTurnIndex()`` and threaded through the buffering pipeline into
|
|
4695
|
+
* ``recordTurnComplete`` / ``recordTurnInterrupted`` as ``preReservedIndex``.
|
|
4696
|
+
* This makes ``turn_index`` stable under drops / interrupts (previously it
|
|
4697
|
+
* was assigned at completion as ``this._turns.length``, which shifted when a
|
|
4698
|
+
* turn was dropped). Parity with Python ``_next_turn_index``.
|
|
4699
|
+
*/
|
|
4700
|
+
_nextTurnIndex = 0;
|
|
4406
4701
|
constructor(opts) {
|
|
4407
4702
|
this.callId = opts.callId;
|
|
4408
4703
|
this.providerMode = opts.providerMode;
|
|
@@ -4451,12 +4746,27 @@ var init_metrics = __esm({
|
|
|
4451
4746
|
this._turnUserText = "";
|
|
4452
4747
|
this._turnSttAudioSeconds = 0;
|
|
4453
4748
|
this._turnAlreadyClosed = false;
|
|
4749
|
+
this._initialTtfbEmitted = false;
|
|
4454
4750
|
this._vadStoppedAt = null;
|
|
4455
4751
|
this._sttFinalAt = null;
|
|
4456
4752
|
this._turnCommittedAt = null;
|
|
4457
4753
|
this._onUserTurnCompletedDelayMs = null;
|
|
4458
4754
|
this._eventBus?.emit("turn_started", { callId: this.callId });
|
|
4459
4755
|
}
|
|
4756
|
+
/**
|
|
4757
|
+
* Reserve and return the next monotonic turn index.
|
|
4758
|
+
*
|
|
4759
|
+
* Called once per turn at the moment the turn OPENS (Realtime:
|
|
4760
|
+
* ``onAdapterSpeechStopped``). The returned index is threaded through the
|
|
4761
|
+
* buffering pipeline and handed back to ``recordTurnComplete`` /
|
|
4762
|
+
* ``recordTurnInterrupted`` as ``preReservedIndex`` so the emitted
|
|
4763
|
+
* ``turn_index`` matches the live per-line transcript ordering even when a
|
|
4764
|
+
* turn is dropped or interrupted between open and close. Parity with Python
|
|
4765
|
+
* ``reserve_turn_index``.
|
|
4766
|
+
*/
|
|
4767
|
+
reserveTurnIndex() {
|
|
4768
|
+
return this._nextTurnIndex++;
|
|
4769
|
+
}
|
|
4460
4770
|
/**
|
|
4461
4771
|
* Start a new turn only if no turn is currently open.
|
|
4462
4772
|
* Use this at inbound-audio ingestion points so the turn timer begins
|
|
@@ -4494,6 +4804,7 @@ var init_metrics = __esm({
|
|
|
4494
4804
|
anchorUserSpeechStart() {
|
|
4495
4805
|
if (this._turnCommittedMono !== null) return;
|
|
4496
4806
|
this._turnStart = hrTimeMs();
|
|
4807
|
+
this._turnAlreadyClosed = false;
|
|
4497
4808
|
this._endpointSignalAt = null;
|
|
4498
4809
|
this._vadStoppedAt = null;
|
|
4499
4810
|
this._sttFinalAt = null;
|
|
@@ -4617,11 +4928,14 @@ var init_metrics = __esm({
|
|
|
4617
4928
|
* ``user_text=''``. The caller treats ``null`` as "nothing to emit";
|
|
4618
4929
|
* ``emitTurnMetrics`` is already null-safe.
|
|
4619
4930
|
*/
|
|
4620
|
-
recordTurnComplete(agentText) {
|
|
4931
|
+
recordTurnComplete(agentText, preReservedIndex) {
|
|
4621
4932
|
if (this._turnAlreadyClosed) return null;
|
|
4622
4933
|
const latency = this._computeTurnLatency();
|
|
4623
4934
|
const turn = {
|
|
4624
|
-
|
|
4935
|
+
// Use the pre-reserved index (stable across drops/interrupts) when the
|
|
4936
|
+
// caller threaded one through; otherwise fall back to the append
|
|
4937
|
+
// position for back-compat with callers that never reserved.
|
|
4938
|
+
turn_index: preReservedIndex ?? this._turns.length,
|
|
4625
4939
|
user_text: this._turnUserText,
|
|
4626
4940
|
agent_text: agentText,
|
|
4627
4941
|
latency,
|
|
@@ -4630,10 +4944,10 @@ var init_metrics = __esm({
|
|
|
4630
4944
|
timestamp: Date.now() / 1e3
|
|
4631
4945
|
};
|
|
4632
4946
|
this._turns.push(turn);
|
|
4633
|
-
this._resetTurnState();
|
|
4634
|
-
this._turnAlreadyClosed = true;
|
|
4635
4947
|
this._eventBus?.emit("turn_ended", { callId: this.callId, turn });
|
|
4636
4948
|
this._eventBus?.emit("metrics_collected", { callId: this.callId, turn });
|
|
4949
|
+
this._resetTurnState();
|
|
4950
|
+
this._turnAlreadyClosed = true;
|
|
4637
4951
|
return turn;
|
|
4638
4952
|
}
|
|
4639
4953
|
/**
|
|
@@ -4645,12 +4959,12 @@ var init_metrics = __esm({
|
|
|
4645
4959
|
* a future refactor that reorders the bargein + LLM-unwind paths)
|
|
4646
4960
|
* from overwriting a turn that the complete path already emitted.
|
|
4647
4961
|
*/
|
|
4648
|
-
recordTurnInterrupted() {
|
|
4962
|
+
recordTurnInterrupted(preReservedIndex) {
|
|
4649
4963
|
if (this._turnStart === null) return null;
|
|
4650
4964
|
if (this._turnAlreadyClosed) return null;
|
|
4651
4965
|
const latency = this._computeTurnLatency();
|
|
4652
4966
|
const turn = {
|
|
4653
|
-
turn_index: this._turns.length,
|
|
4967
|
+
turn_index: preReservedIndex ?? this._turns.length,
|
|
4654
4968
|
user_text: this._turnUserText,
|
|
4655
4969
|
agent_text: "[interrupted]",
|
|
4656
4970
|
latency,
|
|
@@ -4702,8 +5016,10 @@ var init_metrics = __esm({
|
|
|
4702
5016
|
}
|
|
4703
5017
|
/**
|
|
4704
5018
|
* Record the delta (ms) between turn-committed and when on_user_turn_completed
|
|
4705
|
-
* pipeline hook finished.
|
|
4706
|
-
*
|
|
5019
|
+
* pipeline hook finished. Does NOT re-emit: like Python's
|
|
5020
|
+
* ``record_on_user_turn_completed_delay``, this only stores the value; the
|
|
5021
|
+
* single EOU emission happens on ``recordTurnCommitted`` (3-timestamp guard,
|
|
5022
|
+
* delay defaults to 0 if not yet recorded).
|
|
4707
5023
|
*/
|
|
4708
5024
|
recordOnUserTurnCompletedDelay(delayMs) {
|
|
4709
5025
|
this._onUserTurnCompletedDelayMs = delayMs;
|
|
@@ -4716,7 +5032,7 @@ var init_metrics = __esm({
|
|
|
4716
5032
|
* ``transcriptionDelay`` = turnCommitted − vadStopped (ms)
|
|
4717
5033
|
* ``onUserTurnCompletedDelay`` = caller-supplied delta (ms) or 0
|
|
4718
5034
|
*/
|
|
4719
|
-
/** Emit `EOUMetrics` once VAD-stop, STT-final,
|
|
5035
|
+
/** Emit `EOUMetrics` once VAD-stop, STT-final, turn-committed, and on_user_turn_completed delay are all known. */
|
|
4720
5036
|
emitEouMetrics() {
|
|
4721
5037
|
if (this._vadStoppedAt === null || this._sttFinalAt === null || this._turnCommittedAt === null) {
|
|
4722
5038
|
return;
|
|
@@ -5265,10 +5581,13 @@ var init_circuit_breaker = __esm({
|
|
|
5265
5581
|
if (s.state === CircuitBreakerState.OPEN) {
|
|
5266
5582
|
if (this.clock() - s.openedAt >= this.cooldownMs) {
|
|
5267
5583
|
s.state = CircuitBreakerState.HALF_OPEN;
|
|
5584
|
+
s.probeInFlight = true;
|
|
5268
5585
|
return true;
|
|
5269
5586
|
}
|
|
5270
5587
|
return false;
|
|
5271
5588
|
}
|
|
5589
|
+
if (s.probeInFlight) return false;
|
|
5590
|
+
s.probeInFlight = true;
|
|
5272
5591
|
return true;
|
|
5273
5592
|
}
|
|
5274
5593
|
/** Mark a successful execution. Resets the breaker to CLOSED. */
|
|
@@ -5278,19 +5597,21 @@ var init_circuit_breaker = __esm({
|
|
|
5278
5597
|
s.state = CircuitBreakerState.CLOSED;
|
|
5279
5598
|
s.consecutiveFailures = 0;
|
|
5280
5599
|
s.openedAt = 0;
|
|
5600
|
+
s.probeInFlight = false;
|
|
5281
5601
|
}
|
|
5282
5602
|
/** Mark a failed execution; trips OPEN once threshold is reached. */
|
|
5283
5603
|
recordFailure(toolName) {
|
|
5284
5604
|
if (this.threshold <= 0) return;
|
|
5285
5605
|
let s = this.state.get(toolName);
|
|
5286
5606
|
if (!s) {
|
|
5287
|
-
s = { state: CircuitBreakerState.CLOSED, consecutiveFailures: 0, openedAt: 0 };
|
|
5607
|
+
s = { state: CircuitBreakerState.CLOSED, consecutiveFailures: 0, openedAt: 0, probeInFlight: false };
|
|
5288
5608
|
this.state.set(toolName, s);
|
|
5289
5609
|
}
|
|
5290
5610
|
s.consecutiveFailures += 1;
|
|
5291
5611
|
if (s.consecutiveFailures >= this.threshold) {
|
|
5292
5612
|
s.state = CircuitBreakerState.OPEN;
|
|
5293
5613
|
s.openedAt = this.clock();
|
|
5614
|
+
s.probeInFlight = false;
|
|
5294
5615
|
}
|
|
5295
5616
|
}
|
|
5296
5617
|
/**
|
|
@@ -5314,6 +5635,10 @@ var init_circuit_breaker = __esm({
|
|
|
5314
5635
|
});
|
|
5315
5636
|
|
|
5316
5637
|
// src/llm-loop.ts
|
|
5638
|
+
function resolveToolTimeoutMs(toolTimeoutMs, defaultMs) {
|
|
5639
|
+
if (toolTimeoutMs === void 0) return defaultMs;
|
|
5640
|
+
return Math.max(100, Math.min(toolTimeoutMs, MAX_TOOL_TIMEOUT_MS));
|
|
5641
|
+
}
|
|
5317
5642
|
async function invokeHandler(handler, args, callContext, onProgress) {
|
|
5318
5643
|
const invoked = handler(args, callContext);
|
|
5319
5644
|
if (invoked && typeof invoked === "object" && typeof invoked[Symbol.asyncIterator] === "function" && typeof invoked.next === "function") {
|
|
@@ -5372,7 +5697,7 @@ function mergeAbortSignals(...signals) {
|
|
|
5372
5697
|
}
|
|
5373
5698
|
return controller.signal;
|
|
5374
5699
|
}
|
|
5375
|
-
var DEFAULT_TOOL_MAX_RETRIES, DEFAULT_TOOL_RETRY_DELAY_MS, DEFAULT_TOOL_TIMEOUT_MS, TOOL_MAX_RESPONSE_BYTES, DefaultToolExecutor, OpenAILLMProvider, DEFAULT_PHONE_PREAMBLE, LLMLoop;
|
|
5700
|
+
var DEFAULT_TOOL_MAX_RETRIES, DEFAULT_TOOL_RETRY_DELAY_MS, DEFAULT_TOOL_TIMEOUT_MS, MAX_TOOL_TIMEOUT_MS, TOOL_MAX_RESPONSE_BYTES, ToolTimeoutError, DefaultToolExecutor, OpenAILLMProvider, DEFAULT_PHONE_PREAMBLE, LLMLoop;
|
|
5376
5701
|
var init_llm_loop = __esm({
|
|
5377
5702
|
"src/llm-loop.ts"() {
|
|
5378
5703
|
"use strict";
|
|
@@ -5385,7 +5710,14 @@ var init_llm_loop = __esm({
|
|
|
5385
5710
|
DEFAULT_TOOL_MAX_RETRIES = 2;
|
|
5386
5711
|
DEFAULT_TOOL_RETRY_DELAY_MS = 500;
|
|
5387
5712
|
DEFAULT_TOOL_TIMEOUT_MS = 1e4;
|
|
5713
|
+
MAX_TOOL_TIMEOUT_MS = 3e5;
|
|
5388
5714
|
TOOL_MAX_RESPONSE_BYTES = 1 * 1024 * 1024;
|
|
5715
|
+
ToolTimeoutError = class extends Error {
|
|
5716
|
+
constructor(message) {
|
|
5717
|
+
super(message);
|
|
5718
|
+
this.name = "ToolTimeoutError";
|
|
5719
|
+
}
|
|
5720
|
+
};
|
|
5389
5721
|
DefaultToolExecutor = class {
|
|
5390
5722
|
maxRetries;
|
|
5391
5723
|
retryDelayMs;
|
|
@@ -5411,15 +5743,41 @@ var init_llm_loop = __esm({
|
|
|
5411
5743
|
retry_after_ms: cooldown
|
|
5412
5744
|
});
|
|
5413
5745
|
}
|
|
5746
|
+
const effectiveTimeoutMs = resolveToolTimeoutMs(
|
|
5747
|
+
toolDef.timeoutMs,
|
|
5748
|
+
this.requestTimeoutMs
|
|
5749
|
+
);
|
|
5414
5750
|
if (toolDef.handler) {
|
|
5415
5751
|
const totalAttempts = this.maxRetries + 1;
|
|
5416
5752
|
let lastErr = null;
|
|
5417
5753
|
for (let attempt = 0; attempt < totalAttempts; attempt++) {
|
|
5754
|
+
let timeoutTimer;
|
|
5418
5755
|
try {
|
|
5419
|
-
const
|
|
5756
|
+
const handlerPromise = invokeHandler(toolDef.handler, args, callContext, onProgress);
|
|
5757
|
+
const result = await Promise.race([
|
|
5758
|
+
handlerPromise,
|
|
5759
|
+
new Promise((_, reject) => {
|
|
5760
|
+
timeoutTimer = setTimeout(
|
|
5761
|
+
() => reject(
|
|
5762
|
+
new ToolTimeoutError(
|
|
5763
|
+
`Tool handler '${toolDef.name}' timed out after ${effectiveTimeoutMs}ms`
|
|
5764
|
+
)
|
|
5765
|
+
),
|
|
5766
|
+
effectiveTimeoutMs
|
|
5767
|
+
);
|
|
5768
|
+
})
|
|
5769
|
+
]);
|
|
5420
5770
|
this.breaker.recordSuccess(toolDef.name);
|
|
5421
5771
|
return result;
|
|
5422
5772
|
} catch (e) {
|
|
5773
|
+
if (e instanceof ToolTimeoutError) {
|
|
5774
|
+
getLogger().error(String(e));
|
|
5775
|
+
this.breaker.recordFailure(toolDef.name);
|
|
5776
|
+
return JSON.stringify({
|
|
5777
|
+
error: String(e),
|
|
5778
|
+
fallback: true
|
|
5779
|
+
});
|
|
5780
|
+
}
|
|
5423
5781
|
lastErr = e;
|
|
5424
5782
|
if (attempt < totalAttempts - 1) {
|
|
5425
5783
|
getLogger().warn(
|
|
@@ -5427,6 +5785,8 @@ var init_llm_loop = __esm({
|
|
|
5427
5785
|
);
|
|
5428
5786
|
await new Promise((r) => setTimeout(r, backoffDelayMs(this.retryDelayMs, attempt)));
|
|
5429
5787
|
}
|
|
5788
|
+
} finally {
|
|
5789
|
+
if (timeoutTimer !== void 0) clearTimeout(timeoutTimer);
|
|
5430
5790
|
}
|
|
5431
5791
|
}
|
|
5432
5792
|
this.breaker.recordFailure(toolDef.name);
|
|
@@ -5463,7 +5823,10 @@ var init_llm_loop = __esm({
|
|
|
5463
5823
|
...callContext,
|
|
5464
5824
|
attempt: attempt + 1
|
|
5465
5825
|
}),
|
|
5466
|
-
|
|
5826
|
+
// Use per-tool timeout when set, otherwise fall back to
|
|
5827
|
+
// the executor-level default. Mirrors Python's per-request
|
|
5828
|
+
// ``timeout=`` override on httpx.AsyncClient.post().
|
|
5829
|
+
signal: AbortSignal.timeout(effectiveTimeoutMs)
|
|
5467
5830
|
});
|
|
5468
5831
|
if (!resp.ok) throw new Error(`HTTP ${resp.status}`);
|
|
5469
5832
|
const result = JSON.stringify(await resp.json());
|
|
@@ -5589,7 +5952,7 @@ var init_llm_loop = __esm({
|
|
|
5589
5952
|
body.tools = tools;
|
|
5590
5953
|
}
|
|
5591
5954
|
const signal = mergeAbortSignals(opts?.signal, AbortSignal.timeout(3e4));
|
|
5592
|
-
const response = await fetch(
|
|
5955
|
+
const response = await fetch(`${this.baseUrl}/chat/completions`, {
|
|
5593
5956
|
method: "POST",
|
|
5594
5957
|
headers: {
|
|
5595
5958
|
"Content-Type": "application/json",
|
|
@@ -5609,50 +5972,55 @@ var init_llm_loop = __esm({
|
|
|
5609
5972
|
if (!reader) return;
|
|
5610
5973
|
const decoder = new TextDecoder();
|
|
5611
5974
|
let buffer = "";
|
|
5612
|
-
|
|
5613
|
-
|
|
5614
|
-
|
|
5615
|
-
|
|
5616
|
-
|
|
5617
|
-
|
|
5618
|
-
|
|
5619
|
-
const
|
|
5620
|
-
|
|
5621
|
-
|
|
5622
|
-
|
|
5623
|
-
|
|
5624
|
-
|
|
5625
|
-
|
|
5626
|
-
|
|
5627
|
-
|
|
5628
|
-
|
|
5629
|
-
|
|
5630
|
-
|
|
5631
|
-
|
|
5632
|
-
|
|
5633
|
-
type: "usage",
|
|
5634
|
-
inputTokens: uncachedInput,
|
|
5635
|
-
outputTokens: chunk.usage.completion_tokens,
|
|
5636
|
-
cacheReadInputTokens: cached2
|
|
5637
|
-
};
|
|
5638
|
-
}
|
|
5639
|
-
const delta = chunk.choices?.[0]?.delta;
|
|
5640
|
-
if (!delta) continue;
|
|
5641
|
-
if (delta.content) {
|
|
5642
|
-
yield { type: "text", content: delta.content };
|
|
5643
|
-
}
|
|
5644
|
-
if (delta.tool_calls) {
|
|
5645
|
-
for (const tc of delta.tool_calls) {
|
|
5975
|
+
try {
|
|
5976
|
+
while (true) {
|
|
5977
|
+
const { done, value } = await reader.read();
|
|
5978
|
+
if (done) break;
|
|
5979
|
+
buffer += decoder.decode(value, { stream: true });
|
|
5980
|
+
const lines = buffer.split("\n");
|
|
5981
|
+
buffer = lines.pop() || "";
|
|
5982
|
+
for (const line of lines) {
|
|
5983
|
+
const trimmed = line.trim();
|
|
5984
|
+
if (!trimmed || !trimmed.startsWith("data: ")) continue;
|
|
5985
|
+
const data = trimmed.slice(6);
|
|
5986
|
+
if (data === "[DONE]") continue;
|
|
5987
|
+
let chunk;
|
|
5988
|
+
try {
|
|
5989
|
+
chunk = JSON.parse(data);
|
|
5990
|
+
} catch {
|
|
5991
|
+
continue;
|
|
5992
|
+
}
|
|
5993
|
+
if (chunk.usage) {
|
|
5994
|
+
const cached2 = chunk.usage.prompt_tokens_details?.cached_tokens ?? 0;
|
|
5995
|
+
const uncachedInput = Math.max(0, (chunk.usage.prompt_tokens ?? 0) - cached2);
|
|
5646
5996
|
yield {
|
|
5647
|
-
type: "
|
|
5648
|
-
|
|
5649
|
-
|
|
5650
|
-
|
|
5651
|
-
arguments: tc.function?.arguments
|
|
5997
|
+
type: "usage",
|
|
5998
|
+
inputTokens: uncachedInput,
|
|
5999
|
+
outputTokens: chunk.usage.completion_tokens,
|
|
6000
|
+
cacheReadInputTokens: cached2
|
|
5652
6001
|
};
|
|
5653
6002
|
}
|
|
6003
|
+
const delta = chunk.choices?.[0]?.delta;
|
|
6004
|
+
if (!delta) continue;
|
|
6005
|
+
if (delta.content) {
|
|
6006
|
+
yield { type: "text", content: delta.content };
|
|
6007
|
+
}
|
|
6008
|
+
if (delta.tool_calls) {
|
|
6009
|
+
for (const tc of delta.tool_calls) {
|
|
6010
|
+
yield {
|
|
6011
|
+
type: "tool_call",
|
|
6012
|
+
index: tc.index,
|
|
6013
|
+
id: tc.id,
|
|
6014
|
+
name: tc.function?.name,
|
|
6015
|
+
arguments: tc.function?.arguments
|
|
6016
|
+
};
|
|
6017
|
+
}
|
|
6018
|
+
}
|
|
5654
6019
|
}
|
|
5655
6020
|
}
|
|
6021
|
+
} finally {
|
|
6022
|
+
reader.cancel().catch(() => {
|
|
6023
|
+
});
|
|
5656
6024
|
}
|
|
5657
6025
|
}
|
|
5658
6026
|
};
|
|
@@ -5763,12 +6131,14 @@ ${systemPrompt}` : DEFAULT_PHONE_PREAMBLE;
|
|
|
5763
6131
|
const hasAfterLlmResponse = Boolean(hookExecutor?.hasAfterLlmResponse() && hookCtx);
|
|
5764
6132
|
const hasAfterLlmChunk = Boolean(hookExecutor?.hasAfterLlmChunk());
|
|
5765
6133
|
const allEmittedText = [];
|
|
6134
|
+
const callId = callContext.call_id;
|
|
6135
|
+
const streamOpts = typeof callId === "string" && callId.length > 0 ? { ...opts, callId } : opts;
|
|
5766
6136
|
for (let iter = 0; iter < maxIterations; iter++) {
|
|
5767
6137
|
const toolCallsAccumulated = /* @__PURE__ */ new Map();
|
|
5768
6138
|
const textParts = [];
|
|
5769
6139
|
let hasToolCalls = false;
|
|
5770
6140
|
let usageChunkReceived = false;
|
|
5771
|
-
for await (const chunk of this.provider.stream(messages, this.openaiTools,
|
|
6141
|
+
for await (const chunk of this.provider.stream(messages, this.openaiTools, streamOpts)) {
|
|
5772
6142
|
if (chunk.type === "text" && chunk.content) {
|
|
5773
6143
|
const content = hasAfterLlmChunk && hookExecutor ? hookExecutor.runAfterLlmChunk(chunk.content) : chunk.content;
|
|
5774
6144
|
textParts.push(content);
|
|
@@ -5786,7 +6156,7 @@ ${systemPrompt}` : DEFAULT_PHONE_PREAMBLE;
|
|
|
5786
6156
|
chunk.inputTokens ?? 0,
|
|
5787
6157
|
chunk.outputTokens ?? 0,
|
|
5788
6158
|
chunk.cacheReadInputTokens ?? 0,
|
|
5789
|
-
chunk.
|
|
6159
|
+
chunk.cacheWriteInputTokens ?? 0
|
|
5790
6160
|
);
|
|
5791
6161
|
} else if (chunk.type === "tool_call") {
|
|
5792
6162
|
hasToolCalls = true;
|
|
@@ -6180,8 +6550,8 @@ function getElementAtPath(obj, path6) {
|
|
|
6180
6550
|
}
|
|
6181
6551
|
function promiseAllObject(promisesObj) {
|
|
6182
6552
|
const keys = Object.keys(promisesObj);
|
|
6183
|
-
const
|
|
6184
|
-
return Promise.all(
|
|
6553
|
+
const promises2 = keys.map((key) => promisesObj[key]);
|
|
6554
|
+
return Promise.all(promises2).then((results) => {
|
|
6185
6555
|
const resolvedObj = {};
|
|
6186
6556
|
for (let i = 0; i < keys.length; i++) {
|
|
6187
6557
|
resolvedObj[keys[i]] = results[i];
|
|
@@ -23972,6 +24342,8 @@ var init_mcp_client = __esm({
|
|
|
23972
24342
|
"use strict";
|
|
23973
24343
|
init_cjs_shims();
|
|
23974
24344
|
init_logger();
|
|
24345
|
+
init_server();
|
|
24346
|
+
init_version();
|
|
23975
24347
|
MCPManager = class {
|
|
23976
24348
|
configs;
|
|
23977
24349
|
connected = [];
|
|
@@ -23997,10 +24369,16 @@ var init_mcp_client = __esm({
|
|
|
23997
24369
|
}
|
|
23998
24370
|
const aggregatedTools = [];
|
|
23999
24371
|
for (const cfg of this.configs) {
|
|
24372
|
+
try {
|
|
24373
|
+
validateWebhookUrl(cfg.url);
|
|
24374
|
+
} catch (e) {
|
|
24375
|
+
getLogger().error(`MCP server '${cfg.name}' (${cfg.url}) rejected by SSRF guard: ${String(e)}`);
|
|
24376
|
+
continue;
|
|
24377
|
+
}
|
|
24000
24378
|
const transport = new transportModule.StreamableHTTPClientTransport(new URL(cfg.url), {
|
|
24001
24379
|
requestInit: { headers: cfg.headers }
|
|
24002
24380
|
});
|
|
24003
|
-
const client = new mcpModule.Client({ name: "patter", version:
|
|
24381
|
+
const client = new mcpModule.Client({ name: "patter", version: VERSION });
|
|
24004
24382
|
try {
|
|
24005
24383
|
await client.connect(transport);
|
|
24006
24384
|
} catch (e) {
|
|
@@ -24074,6 +24452,276 @@ var init_mcp_client = __esm({
|
|
|
24074
24452
|
}
|
|
24075
24453
|
});
|
|
24076
24454
|
|
|
24455
|
+
// src/consult.ts
|
|
24456
|
+
function isLoopbackOrPrivateHost(baseUrl) {
|
|
24457
|
+
let host;
|
|
24458
|
+
try {
|
|
24459
|
+
host = new URL(baseUrl).hostname.toLowerCase();
|
|
24460
|
+
} catch {
|
|
24461
|
+
return false;
|
|
24462
|
+
}
|
|
24463
|
+
if (host.startsWith("[") && host.endsWith("]")) host = host.slice(1, -1);
|
|
24464
|
+
if (host === "localhost" || host === "0.0.0.0" || host === "::1") return true;
|
|
24465
|
+
if (host.endsWith(".local")) return true;
|
|
24466
|
+
if (/^127\./.test(host) || /^10\./.test(host) || /^192\.168\./.test(host)) return true;
|
|
24467
|
+
if (/^169\.254\./.test(host)) return true;
|
|
24468
|
+
const m = host.match(/^172\.(\d+)\./);
|
|
24469
|
+
if (m) {
|
|
24470
|
+
const octet = Number(m[1]);
|
|
24471
|
+
if (octet >= 16 && octet <= 31) return true;
|
|
24472
|
+
}
|
|
24473
|
+
if (host.includes(":") && (/^f[cd][0-9a-f]{2}:/.test(host) || /^fe[89ab][0-9a-f]:/.test(host))) {
|
|
24474
|
+
return true;
|
|
24475
|
+
}
|
|
24476
|
+
return false;
|
|
24477
|
+
}
|
|
24478
|
+
function openclawConsult(agent, opts = {}) {
|
|
24479
|
+
if (!agent || !OPENCLAW_AGENT_RE.test(agent)) {
|
|
24480
|
+
throw new Error(
|
|
24481
|
+
"OpenClaw agent must be a non-empty id of letters, digits, and ._:/- only"
|
|
24482
|
+
);
|
|
24483
|
+
}
|
|
24484
|
+
const baseUrl = opts.baseUrl ?? OPENCLAW_DEFAULT_BASE_URL;
|
|
24485
|
+
const model = agent.includes("/") || agent.includes(":") ? agent : `openclaw/${agent}`;
|
|
24486
|
+
return {
|
|
24487
|
+
openaiCompatible: {
|
|
24488
|
+
baseUrl,
|
|
24489
|
+
model,
|
|
24490
|
+
apiKey: opts.apiKey,
|
|
24491
|
+
apiKeyEnv: OPENCLAW_API_KEY_ENV,
|
|
24492
|
+
sessionHeader: OPENCLAW_SESSION_HEADER
|
|
24493
|
+
},
|
|
24494
|
+
timeoutMs: opts.timeoutMs ?? DEFAULT_TIMEOUT_MS,
|
|
24495
|
+
toolName: opts.toolName ?? DEFAULT_TOOL_NAME,
|
|
24496
|
+
description: opts.description ?? OPENCLAW_DESCRIPTION,
|
|
24497
|
+
reassurance: opts.reassurance ?? OPENCLAW_REASSURANCE,
|
|
24498
|
+
headers: opts.headers,
|
|
24499
|
+
allowLoopback: opts.allowLoopback ?? isLoopbackOrPrivateHost(baseUrl)
|
|
24500
|
+
};
|
|
24501
|
+
}
|
|
24502
|
+
function buildConsultTool(config2) {
|
|
24503
|
+
const hasUrl = config2.url != null;
|
|
24504
|
+
const hasOpenAI = config2.openaiCompatible != null;
|
|
24505
|
+
if (hasUrl === hasOpenAI) {
|
|
24506
|
+
throw new Error("ConsultConfig requires exactly one of url or openaiCompatible");
|
|
24507
|
+
}
|
|
24508
|
+
const timeoutMs = config2.timeoutMs ?? DEFAULT_TIMEOUT_MS;
|
|
24509
|
+
const baseHeaders = {
|
|
24510
|
+
...config2.headers ?? {},
|
|
24511
|
+
"Content-Type": "application/json"
|
|
24512
|
+
};
|
|
24513
|
+
const handler = hasOpenAI ? buildOpenAIHandler(config2.openaiCompatible, baseHeaders, timeoutMs, config2.allowLoopback ?? false) : buildWebhookHandler(config2.url, baseHeaders, timeoutMs, config2.allowLoopback ?? false);
|
|
24514
|
+
const tool2 = {
|
|
24515
|
+
name: config2.toolName ?? DEFAULT_TOOL_NAME,
|
|
24516
|
+
description: config2.description ?? DEFAULT_DESCRIPTION,
|
|
24517
|
+
parameters: PARAMETERS,
|
|
24518
|
+
handler
|
|
24519
|
+
};
|
|
24520
|
+
return config2.reassurance != null ? { ...tool2, reassurance: config2.reassurance } : tool2;
|
|
24521
|
+
}
|
|
24522
|
+
function buildWebhookHandler(url2, headers, timeoutMs, allowLoopback) {
|
|
24523
|
+
validateWebhookUrl(url2, allowLoopback);
|
|
24524
|
+
return async (args, context) => {
|
|
24525
|
+
const requestText = typeof args?.request === "string" ? args.request : "";
|
|
24526
|
+
const payload = {
|
|
24527
|
+
request: requestText,
|
|
24528
|
+
call_id: context?.call_id ?? "",
|
|
24529
|
+
caller: context?.caller ?? "",
|
|
24530
|
+
callee: context?.callee ?? ""
|
|
24531
|
+
};
|
|
24532
|
+
let body;
|
|
24533
|
+
try {
|
|
24534
|
+
const resp = await fetch(url2, {
|
|
24535
|
+
method: "POST",
|
|
24536
|
+
headers,
|
|
24537
|
+
body: JSON.stringify(payload),
|
|
24538
|
+
signal: AbortSignal.timeout(timeoutMs)
|
|
24539
|
+
});
|
|
24540
|
+
if (!resp.ok) {
|
|
24541
|
+
getLogger().warn(`consult tool: orchestrator returned HTTP ${resp.status}`);
|
|
24542
|
+
return GRACEFUL_FALLBACK;
|
|
24543
|
+
}
|
|
24544
|
+
body = (await resp.text()).slice(0, MAX_RESPONSE_CHARS);
|
|
24545
|
+
} catch (e) {
|
|
24546
|
+
getLogger().warn(
|
|
24547
|
+
`consult tool: orchestrator call failed: ${e instanceof Error ? e.name : "error"}`
|
|
24548
|
+
);
|
|
24549
|
+
return GRACEFUL_FALLBACK;
|
|
24550
|
+
}
|
|
24551
|
+
try {
|
|
24552
|
+
const data = JSON.parse(body);
|
|
24553
|
+
if (data && typeof data === "object" && !Array.isArray(data)) {
|
|
24554
|
+
const obj = data;
|
|
24555
|
+
for (const key of REPLY_KEYS) {
|
|
24556
|
+
if (typeof obj[key] === "string") return obj[key];
|
|
24557
|
+
}
|
|
24558
|
+
}
|
|
24559
|
+
return JSON.stringify(data);
|
|
24560
|
+
} catch {
|
|
24561
|
+
return body;
|
|
24562
|
+
}
|
|
24563
|
+
};
|
|
24564
|
+
}
|
|
24565
|
+
function buildOpenAIHandler(oc, baseHeaders, timeoutMs, allowLoopback) {
|
|
24566
|
+
const endpoint = oc.baseUrl.replace(/\/+$/, "") + "/chat/completions";
|
|
24567
|
+
validateWebhookUrl(endpoint, allowLoopback);
|
|
24568
|
+
const apiKey = oc.apiKey ?? (oc.apiKeyEnv ? process.env[oc.apiKeyEnv] : void 0);
|
|
24569
|
+
const headers = { ...baseHeaders };
|
|
24570
|
+
if (apiKey) headers.Authorization = `Bearer ${apiKey}`;
|
|
24571
|
+
const sessionHeader = oc.sessionHeader;
|
|
24572
|
+
const model = oc.model;
|
|
24573
|
+
return async (args, context) => {
|
|
24574
|
+
const requestText = typeof args?.request === "string" ? args.request : "";
|
|
24575
|
+
const callId = context?.call_id ?? "";
|
|
24576
|
+
const caller = context?.caller ?? "";
|
|
24577
|
+
const callee = context?.callee ?? "";
|
|
24578
|
+
const contextLines = ["You are answering an inbound phone call relayed by a voice agent."];
|
|
24579
|
+
if (caller) contextLines.push(`Caller: ${caller}`);
|
|
24580
|
+
if (callee) contextLines.push(`Line dialed: ${callee}`);
|
|
24581
|
+
contextLines.push(
|
|
24582
|
+
"Reply concisely in a spoken, conversational style \u2014 it is read aloud to the caller."
|
|
24583
|
+
);
|
|
24584
|
+
const reqHeaders = { ...headers };
|
|
24585
|
+
if (sessionHeader && callId) reqHeaders[sessionHeader] = callId;
|
|
24586
|
+
const payload = {
|
|
24587
|
+
model,
|
|
24588
|
+
messages: [
|
|
24589
|
+
{ role: "system", content: contextLines.join("\n") },
|
|
24590
|
+
{ role: "user", content: requestText }
|
|
24591
|
+
],
|
|
24592
|
+
stream: false
|
|
24593
|
+
};
|
|
24594
|
+
if (callId) payload.user = callId;
|
|
24595
|
+
try {
|
|
24596
|
+
const resp = await fetch(endpoint, {
|
|
24597
|
+
method: "POST",
|
|
24598
|
+
headers: reqHeaders,
|
|
24599
|
+
body: JSON.stringify(payload),
|
|
24600
|
+
signal: AbortSignal.timeout(timeoutMs)
|
|
24601
|
+
});
|
|
24602
|
+
if (resp.status === 404) {
|
|
24603
|
+
getLogger().warn(
|
|
24604
|
+
"consult tool: OpenAI-compatible endpoint returned 404 \u2014 is it enabled? (OpenClaw: set gateway.http.endpoints.chatCompletions.enabled = true)"
|
|
24605
|
+
);
|
|
24606
|
+
return GRACEFUL_FALLBACK;
|
|
24607
|
+
}
|
|
24608
|
+
if (!resp.ok) {
|
|
24609
|
+
getLogger().warn(`consult tool: openai-compatible returned HTTP ${resp.status}`);
|
|
24610
|
+
return GRACEFUL_FALLBACK;
|
|
24611
|
+
}
|
|
24612
|
+
const data = await resp.json();
|
|
24613
|
+
const content = data?.choices?.[0]?.message?.content;
|
|
24614
|
+
if (typeof content === "string" && content.trim()) {
|
|
24615
|
+
return content.trim().slice(0, MAX_RESPONSE_CHARS);
|
|
24616
|
+
}
|
|
24617
|
+
getLogger().warn("consult tool: response missing choices[0].message.content");
|
|
24618
|
+
return GRACEFUL_FALLBACK;
|
|
24619
|
+
} catch (e) {
|
|
24620
|
+
getLogger().warn(
|
|
24621
|
+
`consult tool: openai-compatible call failed: ${e instanceof Error ? e.name : "error"}`
|
|
24622
|
+
);
|
|
24623
|
+
return GRACEFUL_FALLBACK;
|
|
24624
|
+
}
|
|
24625
|
+
};
|
|
24626
|
+
}
|
|
24627
|
+
function buildPostCallRecord(data, includeTranscript) {
|
|
24628
|
+
const lines = [];
|
|
24629
|
+
const caller = data.caller;
|
|
24630
|
+
const callee = data.callee;
|
|
24631
|
+
if (caller) lines.push(`Caller: ${caller}`);
|
|
24632
|
+
if (callee) lines.push(`Line dialed: ${callee}`);
|
|
24633
|
+
const metrics = data.metrics;
|
|
24634
|
+
const duration3 = metrics?.durationSeconds ?? metrics?.duration_seconds;
|
|
24635
|
+
if (typeof duration3 === "number") lines.push(`Duration: ${Math.round(duration3)}s`);
|
|
24636
|
+
if (includeTranscript) {
|
|
24637
|
+
const entries = data.transcript ?? [];
|
|
24638
|
+
const rendered = entries.filter((e) => e && typeof e === "object").map((e) => `${e.role ?? "?"}: ${e.text ?? ""}`).join("\n");
|
|
24639
|
+
if (rendered) lines.push("Transcript:\n" + rendered.slice(0, POSTCALL_MAX_TRANSCRIPT_CHARS));
|
|
24640
|
+
}
|
|
24641
|
+
return lines.length ? lines.join("\n") : "(no call details available)";
|
|
24642
|
+
}
|
|
24643
|
+
function openclawPostCallNotifier(agent, opts = {}) {
|
|
24644
|
+
const cfg = openclawConsult(agent, {
|
|
24645
|
+
baseUrl: opts.baseUrl,
|
|
24646
|
+
apiKey: opts.apiKey,
|
|
24647
|
+
timeoutMs: opts.timeoutMs ?? DEFAULT_TIMEOUT_MS,
|
|
24648
|
+
allowLoopback: opts.allowLoopback
|
|
24649
|
+
});
|
|
24650
|
+
const oc = cfg.openaiCompatible;
|
|
24651
|
+
const endpoint = oc.baseUrl.replace(/\/+$/, "") + "/chat/completions";
|
|
24652
|
+
validateWebhookUrl(endpoint, cfg.allowLoopback ?? false);
|
|
24653
|
+
const apiKey = oc.apiKey ?? (oc.apiKeyEnv ? process.env[oc.apiKeyEnv] : void 0);
|
|
24654
|
+
const sessionHeader = oc.sessionHeader;
|
|
24655
|
+
const model = oc.model;
|
|
24656
|
+
const timeoutMs = cfg.timeoutMs ?? DEFAULT_TIMEOUT_MS;
|
|
24657
|
+
const includeTranscript = opts.includeTranscript ?? true;
|
|
24658
|
+
const instruction = opts.instruction ?? POSTCALL_INSTRUCTION;
|
|
24659
|
+
return async (data) => {
|
|
24660
|
+
const callId = (data ?? {}).call_id ?? "";
|
|
24661
|
+
const record2 = buildPostCallRecord(data ?? {}, includeTranscript);
|
|
24662
|
+
const headers = { "Content-Type": "application/json" };
|
|
24663
|
+
if (apiKey) headers.Authorization = `Bearer ${apiKey}`;
|
|
24664
|
+
if (sessionHeader && callId) headers[sessionHeader] = callId;
|
|
24665
|
+
const payload = {
|
|
24666
|
+
model,
|
|
24667
|
+
messages: [
|
|
24668
|
+
{ role: "system", content: instruction },
|
|
24669
|
+
{ role: "user", content: record2 }
|
|
24670
|
+
],
|
|
24671
|
+
stream: false
|
|
24672
|
+
};
|
|
24673
|
+
if (callId) payload.user = callId;
|
|
24674
|
+
try {
|
|
24675
|
+
const resp = await fetch(endpoint, {
|
|
24676
|
+
method: "POST",
|
|
24677
|
+
headers,
|
|
24678
|
+
body: JSON.stringify(payload),
|
|
24679
|
+
signal: AbortSignal.timeout(timeoutMs)
|
|
24680
|
+
});
|
|
24681
|
+
if (!resp.ok) {
|
|
24682
|
+
getLogger().warn(`openclaw post-call notify: HTTP ${resp.status}`);
|
|
24683
|
+
}
|
|
24684
|
+
} catch (e) {
|
|
24685
|
+
getLogger().warn(
|
|
24686
|
+
`openclaw post-call notify failed: ${e instanceof Error ? e.name : "error"}`
|
|
24687
|
+
);
|
|
24688
|
+
}
|
|
24689
|
+
};
|
|
24690
|
+
}
|
|
24691
|
+
var DEFAULT_TIMEOUT_MS, DEFAULT_TOOL_NAME, DEFAULT_DESCRIPTION, MAX_RESPONSE_CHARS, REPLY_KEYS, GRACEFUL_FALLBACK, OPENCLAW_DEFAULT_BASE_URL, OPENCLAW_API_KEY_ENV, OPENCLAW_SESSION_HEADER, OPENCLAW_DESCRIPTION, OPENCLAW_REASSURANCE, OPENCLAW_AGENT_RE, PARAMETERS, POSTCALL_INSTRUCTION, POSTCALL_MAX_TRANSCRIPT_CHARS;
|
|
24692
|
+
var init_consult = __esm({
|
|
24693
|
+
"src/consult.ts"() {
|
|
24694
|
+
"use strict";
|
|
24695
|
+
init_cjs_shims();
|
|
24696
|
+
init_logger();
|
|
24697
|
+
init_server();
|
|
24698
|
+
DEFAULT_TIMEOUT_MS = 3e4;
|
|
24699
|
+
DEFAULT_TOOL_NAME = "consult_agent";
|
|
24700
|
+
DEFAULT_DESCRIPTION = "Consult your back-office agent for deeper reasoning, fresh information, or actions beyond this call. Use when the caller asks something you cannot answer directly.";
|
|
24701
|
+
MAX_RESPONSE_CHARS = 1e6;
|
|
24702
|
+
REPLY_KEYS = ["reply", "response", "text", "result", "answer", "message"];
|
|
24703
|
+
GRACEFUL_FALLBACK = "I wasn't able to reach the system to get that answer right now.";
|
|
24704
|
+
OPENCLAW_DEFAULT_BASE_URL = "http://127.0.0.1:18789/v1";
|
|
24705
|
+
OPENCLAW_API_KEY_ENV = "OPENCLAW_API_KEY";
|
|
24706
|
+
OPENCLAW_SESSION_HEADER = "x-openclaw-session-key";
|
|
24707
|
+
OPENCLAW_DESCRIPTION = "Consult your OpenClaw agent for anything account-specific \u2014 appointments, customer records, schedules, or actions in the back-office system. NEVER state an appointment time, customer detail, or schedule fact from your own memory; ALWAYS call this tool for those and read back what it returns.";
|
|
24708
|
+
OPENCLAW_REASSURANCE = "Let me check on that for you, one moment.";
|
|
24709
|
+
OPENCLAW_AGENT_RE = /^[A-Za-z0-9._:/-]+$/;
|
|
24710
|
+
PARAMETERS = {
|
|
24711
|
+
type: "object",
|
|
24712
|
+
properties: {
|
|
24713
|
+
request: {
|
|
24714
|
+
type: "string",
|
|
24715
|
+
description: "The question or task to send to your back-office agent for deeper reasoning, fresh information, or an action beyond this call. State it self-containedly \u2014 the dialog history is not forwarded with the consult."
|
|
24716
|
+
}
|
|
24717
|
+
},
|
|
24718
|
+
required: ["request"]
|
|
24719
|
+
};
|
|
24720
|
+
POSTCALL_INSTRUCTION = "A phone call handled by the voice agent has just ended. Here is the record of the call. Log it and follow up if anything needs action.";
|
|
24721
|
+
POSTCALL_MAX_TRANSCRIPT_CHARS = 12e3;
|
|
24722
|
+
}
|
|
24723
|
+
});
|
|
24724
|
+
|
|
24077
24725
|
// src/sentence-chunker.ts
|
|
24078
24726
|
function splitSentences(text, minSentenceLen = DEFAULT_MIN_SENTENCE_LEN) {
|
|
24079
24727
|
const alphabets = "([A-Za-z])";
|
|
@@ -26616,6 +27264,8 @@ var init_silero_vad = __esm({
|
|
|
26616
27264
|
speechThresholdDuration = 0;
|
|
26617
27265
|
silenceThresholdDuration = 0;
|
|
26618
27266
|
closed = false;
|
|
27267
|
+
/** Transitions produced in the current processFrame call but not yet returned. */
|
|
27268
|
+
eventQueue = [];
|
|
26619
27269
|
/**
|
|
26620
27270
|
* Load the Silero VAD model.
|
|
26621
27271
|
* Throws if `onnxruntime-node` is not installed.
|
|
@@ -26741,22 +27391,21 @@ var init_silero_vad = __esm({
|
|
|
26741
27391
|
);
|
|
26742
27392
|
}
|
|
26743
27393
|
if (pcmChunk.length === 0) {
|
|
26744
|
-
return null;
|
|
27394
|
+
return this.eventQueue.shift() ?? null;
|
|
26745
27395
|
}
|
|
26746
27396
|
const numSamples = Math.floor(pcmChunk.length / 2);
|
|
26747
27397
|
if (numSamples === 0) {
|
|
26748
|
-
return null;
|
|
27398
|
+
return this.eventQueue.shift() ?? null;
|
|
26749
27399
|
}
|
|
26750
27400
|
const samples = new Float32Array(numSamples);
|
|
26751
27401
|
for (let i = 0; i < numSamples; i++) {
|
|
26752
|
-
samples[i] = pcmChunk.readInt16LE(i * 2) /
|
|
27402
|
+
samples[i] = pcmChunk.readInt16LE(i * 2) / 32768;
|
|
26753
27403
|
}
|
|
26754
27404
|
const merged = new Float32Array(this.pending.length + samples.length);
|
|
26755
27405
|
merged.set(this.pending, 0);
|
|
26756
27406
|
merged.set(samples, this.pending.length);
|
|
26757
27407
|
this.pending = merged;
|
|
26758
27408
|
const windowSize = this.model.windowSizeSamples;
|
|
26759
|
-
let event = null;
|
|
26760
27409
|
while (this.pending.length >= windowSize) {
|
|
26761
27410
|
const window = this.pending.slice(0, windowSize);
|
|
26762
27411
|
this.pending = this.pending.slice(windowSize);
|
|
@@ -26765,10 +27414,10 @@ var init_silero_vad = __esm({
|
|
|
26765
27414
|
const windowDuration = windowSize / this.opts.sampleRate;
|
|
26766
27415
|
const transition = this.advanceState(p, windowDuration);
|
|
26767
27416
|
if (transition !== null) {
|
|
26768
|
-
|
|
27417
|
+
this.eventQueue.push(transition);
|
|
26769
27418
|
}
|
|
26770
27419
|
}
|
|
26771
|
-
return
|
|
27420
|
+
return this.eventQueue.shift() ?? null;
|
|
26772
27421
|
}
|
|
26773
27422
|
advanceState(p, windowDuration) {
|
|
26774
27423
|
const opts = this.opts;
|
|
@@ -26823,6 +27472,7 @@ var init_silero_vad = __esm({
|
|
|
26823
27472
|
this.pubSpeaking = false;
|
|
26824
27473
|
this.speechThresholdDuration = 0;
|
|
26825
27474
|
this.silenceThresholdDuration = 0;
|
|
27475
|
+
this.eventQueue = [];
|
|
26826
27476
|
this.expFilter.reset();
|
|
26827
27477
|
this.model.reset();
|
|
26828
27478
|
}
|
|
@@ -27063,6 +27713,13 @@ var init_aec = __esm({
|
|
|
27063
27713
|
});
|
|
27064
27714
|
|
|
27065
27715
|
// src/stream-handler.ts
|
|
27716
|
+
function applyToolCallPreambles(prompt, knob) {
|
|
27717
|
+
if (!knob) return prompt;
|
|
27718
|
+
const block = typeof knob === "string" ? knob : DEFAULT_TOOL_CALL_PREAMBLE_BLOCK;
|
|
27719
|
+
return prompt ? `${block}
|
|
27720
|
+
|
|
27721
|
+
${prompt}` : block;
|
|
27722
|
+
}
|
|
27066
27723
|
function checkGuardrails(text, guardrails) {
|
|
27067
27724
|
if (!guardrails) return null;
|
|
27068
27725
|
for (const guard of guardrails) {
|
|
@@ -27119,6 +27776,13 @@ function augmentWithBuiltinHandoffTools(userTools, callbacks) {
|
|
|
27119
27776
|
}
|
|
27120
27777
|
return out;
|
|
27121
27778
|
}
|
|
27779
|
+
function isSttHallucination(text) {
|
|
27780
|
+
const stripped = text.trim().toLowerCase().replace(/[.,!?;:…。!?\s]+$/u, "").trim();
|
|
27781
|
+
if (stripped === "") return true;
|
|
27782
|
+
if (HALLUCINATIONS.has(stripped)) return true;
|
|
27783
|
+
const pieces = stripped.split(/[.!?…。!?]+/u).map((p) => p.trim()).filter((p) => p.length > 0);
|
|
27784
|
+
return pieces.length > 1 && pieces.every((p) => HALLUCINATIONS.has(p));
|
|
27785
|
+
}
|
|
27122
27786
|
async function queryDeepgramCost(metricsAcc, deepgramKey, deepgramRequestId) {
|
|
27123
27787
|
try {
|
|
27124
27788
|
const projResp = await fetch("https://api.deepgram.com/v1/projects", {
|
|
@@ -27149,7 +27813,7 @@ async function queryDeepgramCost(metricsAcc, deepgramKey, deepgramRequestId) {
|
|
|
27149
27813
|
} catch {
|
|
27150
27814
|
}
|
|
27151
27815
|
}
|
|
27152
|
-
var HALLUCINATIONS, StreamHandler;
|
|
27816
|
+
var DEFAULT_TOOL_CALL_PREAMBLE_BLOCK, HALLUCINATIONS, StreamHandler;
|
|
27153
27817
|
var init_stream_handler = __esm({
|
|
27154
27818
|
"src/stream-handler.ts"() {
|
|
27155
27819
|
"use strict";
|
|
@@ -27167,39 +27831,96 @@ var init_stream_handler = __esm({
|
|
|
27167
27831
|
init_mcp_client();
|
|
27168
27832
|
init_logger();
|
|
27169
27833
|
init_server();
|
|
27834
|
+
init_consult();
|
|
27170
27835
|
init_sentence_chunker();
|
|
27171
27836
|
init_pipeline_hooks();
|
|
27172
27837
|
init_event_bus();
|
|
27173
27838
|
init_tracing();
|
|
27839
|
+
DEFAULT_TOOL_CALL_PREAMBLE_BLOCK = `# Preambles
|
|
27840
|
+
|
|
27841
|
+
Use short preambles only when they help the user understand that work is happening. A preamble is one short spoken update describing the action you are about to take \u2014 not hidden reasoning, and never a claim about the result.
|
|
27842
|
+
|
|
27843
|
+
## When to use a preamble
|
|
27844
|
+
Use a preamble when:
|
|
27845
|
+
- you are about to call a tool that may take noticeable time;
|
|
27846
|
+
- you need to reason through a multi-step request;
|
|
27847
|
+
- you are checking records, availability, account state, or policy details;
|
|
27848
|
+
- you are preparing an escalation or handoff;
|
|
27849
|
+
- silence would make the assistant feel unresponsive.
|
|
27850
|
+
|
|
27851
|
+
When a preamble is needed, output it immediately before the reasoning or tool call.
|
|
27852
|
+
|
|
27853
|
+
## When to NOT use a preamble
|
|
27854
|
+
Do not use a preamble when:
|
|
27855
|
+
- the answer is direct and can be given immediately;
|
|
27856
|
+
- the user is only confirming, correcting, or declining something;
|
|
27857
|
+
- the audio is unclear and you need clarification instead;
|
|
27858
|
+
- the tool call is lightweight and the user would not benefit from an update.
|
|
27859
|
+
|
|
27860
|
+
## Style
|
|
27861
|
+
- Keep it to one short sentence (two only before a high-impact action).
|
|
27862
|
+
- Vary the wording across turns; do not reuse the same opener.
|
|
27863
|
+
- Describe the action, not the internal reasoning.
|
|
27864
|
+
- Never imply success or failure before the tool returns.
|
|
27865
|
+
|
|
27866
|
+
Prefer:
|
|
27867
|
+
- "I'll check that order now."
|
|
27868
|
+
- "I'll look up your appointment details."
|
|
27869
|
+
- "I'll verify that before we make any changes."
|
|
27870
|
+
- "I'll check the policy and then give you the next step."
|
|
27871
|
+
- "I'll pull that up so we can make sure it's the right account."
|
|
27872
|
+
|
|
27873
|
+
Avoid:
|
|
27874
|
+
- "Let me think about that for a second."
|
|
27875
|
+
- "Please wait while I process your request."
|
|
27876
|
+
- "I'm going to use my tools now."
|
|
27877
|
+
- "Hmm..." / "One moment while I process that..."`;
|
|
27174
27878
|
HALLUCINATIONS = /* @__PURE__ */ new Set([
|
|
27175
|
-
|
|
27176
|
-
|
|
27177
|
-
|
|
27178
|
-
|
|
27179
|
-
|
|
27180
|
-
|
|
27181
|
-
|
|
27182
|
-
|
|
27183
|
-
|
|
27184
|
-
|
|
27185
|
-
|
|
27186
|
-
|
|
27187
|
-
|
|
27188
|
-
|
|
27189
|
-
|
|
27190
|
-
"cool",
|
|
27191
|
-
// Whisper YouTube-caption hallucinations
|
|
27879
|
+
// Issue #154: the hallucination filter is now DISPLAY-ONLY — it no longer
|
|
27880
|
+
// gates response creation (the server drives the response on
|
|
27881
|
+
// ``input_audio_buffer.committed`` by default). Dropping a phrase here
|
|
27882
|
+
// therefore deletes the user's transcript line (recordSttComplete never
|
|
27883
|
+
// fires → empty user_text → dashboard skips the user line). So this set is
|
|
27884
|
+
// restricted to genuine NON-SPEECH artefacts that Whisper emits on
|
|
27885
|
+
// silence / TTS echo, NOT real conversational words. Standalone words like
|
|
27886
|
+
// 'yes', 'no', 'okay', 'right', 'you', 'thanks' were REMOVED — they are
|
|
27887
|
+
// legitimate user replies and must reach the transcript. Parity with
|
|
27888
|
+
// Python ``_STT_HALLUCINATIONS``.
|
|
27889
|
+
//
|
|
27890
|
+
// Whisper caption / training-set hallucinations. Whisper was trained heavily
|
|
27891
|
+
// on captioned video, so on silence / PSTN echo it falls back to the most
|
|
27892
|
+
// common caption credits + sign-offs. Curated from widely-reported
|
|
27893
|
+
// Whisper-on-silence outputs across the open-source ASR community.
|
|
27192
27894
|
"thank you for watching",
|
|
27193
27895
|
"thanks for watching",
|
|
27194
27896
|
"thank you for watching!",
|
|
27195
27897
|
"thanks for watching!",
|
|
27196
27898
|
"thank you so much for watching",
|
|
27899
|
+
"thank you for watching please subscribe",
|
|
27900
|
+
"thanks for watching please subscribe",
|
|
27197
27901
|
"thanks for listening",
|
|
27902
|
+
"we'll see you next time",
|
|
27903
|
+
"see you next time",
|
|
27904
|
+
"bye bye",
|
|
27198
27905
|
"please subscribe",
|
|
27906
|
+
"please subscribe to my channel",
|
|
27907
|
+
"don't forget to subscribe",
|
|
27908
|
+
"like and subscribe",
|
|
27199
27909
|
"subscribe",
|
|
27910
|
+
"subtitles by the amara.org community",
|
|
27911
|
+
"subtitles by the amara org community",
|
|
27912
|
+
"subtitles by",
|
|
27913
|
+
"transcribed by",
|
|
27914
|
+
"transcription by castingwords",
|
|
27915
|
+
"the end",
|
|
27916
|
+
// Music / sound markers.
|
|
27200
27917
|
"music",
|
|
27201
27918
|
"[music]",
|
|
27919
|
+
"piano music",
|
|
27920
|
+
"applause",
|
|
27921
|
+
"[applause]",
|
|
27202
27922
|
"\u266A",
|
|
27923
|
+
// Silence markers.
|
|
27203
27924
|
"[no audio]",
|
|
27204
27925
|
"[silence]",
|
|
27205
27926
|
"[blank_audio]",
|
|
@@ -27503,7 +28224,14 @@ var init_stream_handler = __esm({
|
|
|
27503
28224
|
* barge-in armed during the audible tail. Tunable via env.
|
|
27504
28225
|
*/
|
|
27505
28226
|
endSpeakingWithGrace() {
|
|
27506
|
-
const
|
|
28227
|
+
const rawGrace = process.env.PATTER_TTS_TAIL_GRACE_MS;
|
|
28228
|
+
const parsedGrace = rawGrace !== void 0 ? Number(rawGrace) : NaN;
|
|
28229
|
+
const grace = rawGrace !== void 0 && Number.isFinite(parsedGrace) ? parsedGrace : 1500;
|
|
28230
|
+
if (rawGrace !== void 0 && !Number.isFinite(parsedGrace)) {
|
|
28231
|
+
getLogger().warn(
|
|
28232
|
+
`PATTER_TTS_TAIL_GRACE_MS="${rawGrace}" is not a valid number \u2014 using default 1500ms`
|
|
28233
|
+
);
|
|
28234
|
+
}
|
|
27507
28235
|
if (grace > 0) {
|
|
27508
28236
|
const gen = this.speakingGeneration;
|
|
27509
28237
|
this.clearGraceTimer();
|
|
@@ -27597,6 +28325,14 @@ var init_stream_handler = __esm({
|
|
|
27597
28325
|
`[DIAG] Flushed ${replayed} pre-barge-in frame(s) (~${replayed * 20} ms) to STT`
|
|
27598
28326
|
);
|
|
27599
28327
|
}
|
|
28328
|
+
/**
|
|
28329
|
+
* Per-call resolved tool list. Starts as ``null`` (falls back to
|
|
28330
|
+
* ``deps.agent.tools``). Populated by ``initMcpTools`` when MCP servers
|
|
28331
|
+
* are configured so discovered tools are merged in without mutating the
|
|
28332
|
+
* shared ``AgentOptions`` object. Code that needs the effective tool list
|
|
28333
|
+
* should read ``this.resolvedTools ?? this.deps.agent.tools``.
|
|
28334
|
+
*/
|
|
28335
|
+
resolvedTools = null;
|
|
27600
28336
|
llmLoop = null;
|
|
27601
28337
|
/**
|
|
27602
28338
|
* Per-call tool executor — provides retry-with-exponential-backoff and a
|
|
@@ -27640,6 +28376,17 @@ var init_stream_handler = __esm({
|
|
|
27640
28376
|
userTranscriptPending = false;
|
|
27641
28377
|
pendingAssistantTurn = null;
|
|
27642
28378
|
pendingAssistantTimer = null;
|
|
28379
|
+
/**
|
|
28380
|
+
* Reserved monotonic turn index for the in-flight Realtime turn (issue
|
|
28381
|
+
* #154, fix 5/6). Reserved in ``onAdapterSpeechStopped`` via
|
|
28382
|
+
* ``metricsAcc.reserveTurnIndex()`` the moment the turn OPENS, then threaded
|
|
28383
|
+
* through to the live per-line transcript events (``recordTranscriptLine``)
|
|
28384
|
+
* and into ``recordTurnComplete`` / ``recordTurnInterrupted`` so the
|
|
28385
|
+
* dashboard can sort a late-arriving user line ABOVE its agent line by
|
|
28386
|
+
* ``(turnIndex, role)``. ``null`` until the first turn opens. Parity with
|
|
28387
|
+
* Python ``_current_turn_index``.
|
|
28388
|
+
*/
|
|
28389
|
+
currentTurnIndex = null;
|
|
27643
28390
|
/**
|
|
27644
28391
|
* Hard cap on how long we wait for the user transcript before flushing
|
|
27645
28392
|
* the buffered assistant turn alone. 3 s covers OpenAI Whisper's typical
|
|
@@ -27721,6 +28468,23 @@ var init_stream_handler = __esm({
|
|
|
27721
28468
|
* streaming/regular LLM, WebSocket remote, Realtime response_done) so the
|
|
27722
28469
|
* payload shape lives in one place.
|
|
27723
28470
|
*/
|
|
28471
|
+
/**
|
|
28472
|
+
* Emit a live per-line transcript event to the dashboard store (issue #154,
|
|
28473
|
+
* fix 5). Routed through a single helper so the call shape lives in one
|
|
28474
|
+
* place. ``recordTranscriptLine`` appends the line to the active call's
|
|
28475
|
+
* transcript and publishes a ``transcript_line`` SSE event; the dashboard
|
|
28476
|
+
* sorts by (turnIndex, user<assistant) so a late user line lands above its
|
|
28477
|
+
* agent line. No-op when no turn index has been reserved yet.
|
|
28478
|
+
*/
|
|
28479
|
+
emitTranscriptLine(role, text) {
|
|
28480
|
+
if (this.currentTurnIndex === null) return;
|
|
28481
|
+
this.deps.metricsStore.recordTranscriptLine({
|
|
28482
|
+
call_id: this.callId,
|
|
28483
|
+
turnIndex: this.currentTurnIndex,
|
|
28484
|
+
role,
|
|
28485
|
+
text
|
|
28486
|
+
});
|
|
28487
|
+
}
|
|
27724
28488
|
async emitTurnMetrics(turn) {
|
|
27725
28489
|
if (turn == null) return;
|
|
27726
28490
|
this.deps.metricsStore.recordTurn({ call_id: this.callId, turn });
|
|
@@ -27827,7 +28591,7 @@ var init_stream_handler = __esm({
|
|
|
27827
28591
|
if (customParams.callee && !this.callee) this.callee = customParams.callee;
|
|
27828
28592
|
const mode = this.deps.agent.engine ? `engine=${this.deps.agent.engine.kind ?? "unknown"}` : "pipeline";
|
|
27829
28593
|
getLogger().info(
|
|
27830
|
-
`Call started: ${callId} (${this.deps.bridge.label}, ${mode}, ${
|
|
28594
|
+
`Call started: ${callId} (${this.deps.bridge.label}, ${mode}, ${maskPhoneNumber(this.caller || "?")} \u2192 ${maskPhoneNumber(this.callee || "?")})`
|
|
27831
28595
|
);
|
|
27832
28596
|
if (Object.keys(customParams).length > 0) {
|
|
27833
28597
|
getLogger().debug(`Custom params: ${sanitizeLogValue(JSON.stringify(customParams))}`);
|
|
@@ -27872,10 +28636,13 @@ var init_stream_handler = __esm({
|
|
|
27872
28636
|
const resolvedPrompt = Object.keys(allVars).length > 0 ? this.deps.resolveVariables(this.deps.agent.systemPrompt, allVars) : this.deps.agent.systemPrompt;
|
|
27873
28637
|
const provider2 = this.deps.agent.provider ?? "openai_realtime";
|
|
27874
28638
|
await this.initMcpTools();
|
|
28639
|
+
this.injectConsultTool();
|
|
27875
28640
|
if (provider2 === "pipeline") {
|
|
27876
28641
|
await this.initPipeline(resolvedPrompt);
|
|
27877
28642
|
} else {
|
|
27878
|
-
await this.initRealtimeAdapter(
|
|
28643
|
+
await this.initRealtimeAdapter(
|
|
28644
|
+
applyToolCallPreambles(resolvedPrompt, this.deps.agent.toolCallPreambles)
|
|
28645
|
+
);
|
|
27879
28646
|
}
|
|
27880
28647
|
}
|
|
27881
28648
|
/**
|
|
@@ -27900,10 +28667,25 @@ var init_stream_handler = __esm({
|
|
|
27900
28667
|
}
|
|
27901
28668
|
if (discovered.length === 0) return;
|
|
27902
28669
|
MCPManager.assertNoConflicts(this.deps.agent.tools, discovered);
|
|
27903
|
-
|
|
27904
|
-
mutableAgent.tools = [...mutableAgent.tools ?? [], ...discovered];
|
|
28670
|
+
this.resolvedTools = [...this.deps.agent.tools ?? [], ...discovered];
|
|
27905
28671
|
getLogger().info(`MCP: merged ${discovered.length} tool(s) into agent`);
|
|
27906
28672
|
}
|
|
28673
|
+
/**
|
|
28674
|
+
* Merge the built-in ``consult`` tool into the per-call tool list when
|
|
28675
|
+
* ``agent.consult`` is set, mirroring {@link initMcpTools}: the shared
|
|
28676
|
+
* ``deps.agent`` is NOT mutated; the merged list is stored on
|
|
28677
|
+
* ``this.resolvedTools`` so ``buildAIAdapter`` (Realtime) and the pipeline
|
|
28678
|
+
* ``LLMLoop`` both see it. Idempotent — a no-op if a tool with the same name
|
|
28679
|
+
* is already present.
|
|
28680
|
+
*/
|
|
28681
|
+
injectConsultTool() {
|
|
28682
|
+
const consult = this.deps.agent.consult;
|
|
28683
|
+
if (!consult) return;
|
|
28684
|
+
const consultTool = buildConsultTool(consult);
|
|
28685
|
+
const base = this.resolvedTools ?? (this.deps.agent.tools ?? []);
|
|
28686
|
+
if (base.some((t) => t.name === consultTool.name)) return;
|
|
28687
|
+
this.resolvedTools = [...base, consultTool];
|
|
28688
|
+
}
|
|
27907
28689
|
/** Set the stream SID (Twilio only, called after parsing 'start' event). */
|
|
27908
28690
|
/** Set the carrier-side stream id (Twilio `streamSid` / Telnyx stream identifier). */
|
|
27909
28691
|
setStreamSid(sid) {
|
|
@@ -27923,8 +28705,12 @@ var init_stream_handler = __esm({
|
|
|
27923
28705
|
if (activeVad && !this.vadDisabled) {
|
|
27924
28706
|
try {
|
|
27925
28707
|
const vadPromise = activeVad.processFrame(pcm16k, 16e3);
|
|
27926
|
-
|
|
28708
|
+
let vadTimeoutId;
|
|
28709
|
+
const timeoutPromise = new Promise((resolve2) => {
|
|
28710
|
+
vadTimeoutId = setTimeout(() => resolve2(null), 25);
|
|
28711
|
+
});
|
|
27927
28712
|
const evt = await Promise.race([vadPromise, timeoutPromise]);
|
|
28713
|
+
clearTimeout(vadTimeoutId);
|
|
27928
28714
|
if (evt) {
|
|
27929
28715
|
getLogger().info(
|
|
27930
28716
|
`[VAD] ${evt.type} agentSpeaking=${this.isSpeaking}`
|
|
@@ -27997,7 +28783,7 @@ var init_stream_handler = __esm({
|
|
|
27997
28783
|
if ((this.deps.agent.bargeInThresholdMs ?? 300) === 0) return;
|
|
27998
28784
|
}
|
|
27999
28785
|
const hooks = this.deps.agent.hooks;
|
|
28000
|
-
if (hooks) {
|
|
28786
|
+
if (hooks?.beforeSendToStt) {
|
|
28001
28787
|
const hookExecutor = new PipelineHookExecutor(hooks);
|
|
28002
28788
|
const hookCtx = this.buildHookContext();
|
|
28003
28789
|
const processed = await hookExecutor.runBeforeSendToStt(pcm16k, hookCtx);
|
|
@@ -28423,7 +29209,7 @@ var init_stream_handler = __esm({
|
|
|
28423
29209
|
}
|
|
28424
29210
|
const providerModel = this.deps.agent.llm?.model ?? "";
|
|
28425
29211
|
const augmentedTools = augmentWithBuiltinHandoffTools(
|
|
28426
|
-
this.deps.agent.tools,
|
|
29212
|
+
this.resolvedTools ?? this.deps.agent.tools,
|
|
28427
29213
|
{
|
|
28428
29214
|
transferCall: (number4) => this.deps.bridge.transferCall(this.callId, number4),
|
|
28429
29215
|
endCall: () => this.deps.bridge.endCall(this.callId, this.ws)
|
|
@@ -28447,7 +29233,7 @@ var init_stream_handler = __esm({
|
|
|
28447
29233
|
let llmModel = this.deps.agent.model || "gpt-4o-mini";
|
|
28448
29234
|
if (llmModel.includes("realtime")) llmModel = "gpt-4o-mini";
|
|
28449
29235
|
const augmentedTools = augmentWithBuiltinHandoffTools(
|
|
28450
|
-
this.deps.agent.tools,
|
|
29236
|
+
this.resolvedTools ?? this.deps.agent.tools,
|
|
28451
29237
|
{
|
|
28452
29238
|
transferCall: (number4) => this.deps.bridge.transferCall(this.callId, number4),
|
|
28453
29239
|
endCall: () => this.deps.bridge.endCall(this.callId, this.ws)
|
|
@@ -28871,6 +29657,14 @@ var init_stream_handler = __esm({
|
|
|
28871
29657
|
chunker.reset();
|
|
28872
29658
|
getLogger().error(`LLM loop error (${label}):`, e);
|
|
28873
29659
|
this.metricsAcc.recordTurnInterrupted();
|
|
29660
|
+
const fallback = this.deps.agent.llmErrorMessage;
|
|
29661
|
+
if (fallback && !ttsFirstByteSent.value && this.isSpeaking) {
|
|
29662
|
+
try {
|
|
29663
|
+
await this.synthesizeSentence(fallback, hookExecutor, hookCtx, ttsFirstByteSent);
|
|
29664
|
+
} catch (err) {
|
|
29665
|
+
getLogger().error(`llmErrorMessage fallback synthesis failed (${label}):`, err);
|
|
29666
|
+
}
|
|
29667
|
+
}
|
|
28874
29668
|
}
|
|
28875
29669
|
}
|
|
28876
29670
|
this.metricsAcc.recordLlmComplete();
|
|
@@ -28971,7 +29765,7 @@ var init_stream_handler = __esm({
|
|
|
28971
29765
|
// ---------------------------------------------------------------------------
|
|
28972
29766
|
async initRealtimeAdapter(resolvedPrompt) {
|
|
28973
29767
|
const label = this.deps.bridge.label;
|
|
28974
|
-
this.adapter = this.deps.buildAIAdapter(resolvedPrompt);
|
|
29768
|
+
this.adapter = this.deps.buildAIAdapter(resolvedPrompt, this.resolvedTools ?? void 0);
|
|
28975
29769
|
let parked;
|
|
28976
29770
|
if (typeof this.deps.popPrewarmedConnections === "function") {
|
|
28977
29771
|
try {
|
|
@@ -29044,6 +29838,7 @@ var init_stream_handler = __esm({
|
|
|
29044
29838
|
response_done: async (eventData) => this.onAdapterResponseDone(eventData),
|
|
29045
29839
|
speech_started: async () => this.onAdapterSpeechInterrupt(),
|
|
29046
29840
|
interruption: async () => this.onAdapterSpeechInterrupt(),
|
|
29841
|
+
error: async (eventData) => this.onAdapterError(eventData),
|
|
29047
29842
|
function_call: async (eventData) => {
|
|
29048
29843
|
if (this.adapter instanceof OpenAIRealtimeAdapter) {
|
|
29049
29844
|
await this.handleFunctionCall(eventData);
|
|
@@ -29130,21 +29925,31 @@ var init_stream_handler = __esm({
|
|
|
29130
29925
|
if (!this.metricsAcc.turnActive) this.metricsAcc.startTurn();
|
|
29131
29926
|
this.currentAgentText = "";
|
|
29132
29927
|
this.responseAudioStarted = false;
|
|
29928
|
+
this.currentTurnIndex = this.metricsAcc.reserveTurnIndex();
|
|
29133
29929
|
this.userTranscriptPending = true;
|
|
29134
29930
|
await this.emitUserSpeechEnded();
|
|
29135
29931
|
}
|
|
29136
29932
|
async onAdapterTranscriptInput(inputText) {
|
|
29137
|
-
|
|
29138
|
-
if (HALLUCINATIONS.has(stripped) || stripped === "") {
|
|
29933
|
+
if (isSttHallucination(inputText)) {
|
|
29139
29934
|
getLogger().debug(
|
|
29140
29935
|
`Realtime transcript_input dropped (likely Whisper hallucination on silence/echo): ${sanitizeLogValue(inputText.slice(0, 60))}`
|
|
29141
29936
|
);
|
|
29142
29937
|
this.userTranscriptPending = false;
|
|
29938
|
+
if (this.pendingAssistantTurn !== null) {
|
|
29939
|
+
const buffered = this.pendingAssistantTurn;
|
|
29940
|
+
this.pendingAssistantTurn = null;
|
|
29941
|
+
if (this.pendingAssistantTimer) {
|
|
29942
|
+
clearTimeout(this.pendingAssistantTimer);
|
|
29943
|
+
this.pendingAssistantTimer = null;
|
|
29944
|
+
}
|
|
29945
|
+
await this.flushAssistantTurn(buffered);
|
|
29946
|
+
}
|
|
29143
29947
|
return;
|
|
29144
29948
|
}
|
|
29145
29949
|
getLogger().debug(`User (${this.deps.bridge.label}): ${sanitizeLogValue(inputText)}`);
|
|
29146
29950
|
this.history.push({ role: "user", text: inputText, timestamp: Date.now() });
|
|
29147
|
-
|
|
29951
|
+
this.emitTranscriptLine("user", inputText);
|
|
29952
|
+
if (this.adapter instanceof OpenAIRealtimeAdapter && this.adapter.getGateResponseOnTranscript()) {
|
|
29148
29953
|
void this.adapter.requestResponse().catch(
|
|
29149
29954
|
(err) => getLogger().debug(`Realtime requestResponse failed: ${String(err)}`)
|
|
29150
29955
|
);
|
|
@@ -29191,8 +29996,12 @@ var init_stream_handler = __esm({
|
|
|
29191
29996
|
history: [...this.history.entries]
|
|
29192
29997
|
});
|
|
29193
29998
|
}
|
|
29999
|
+
const reservedIndex = this.currentTurnIndex;
|
|
30000
|
+
this.emitTranscriptLine("assistant", text);
|
|
29194
30001
|
this.responseAudioStarted = false;
|
|
29195
|
-
await this.emitTurnMetrics(
|
|
30002
|
+
await this.emitTurnMetrics(
|
|
30003
|
+
this.metricsAcc.recordTurnComplete(text, reservedIndex ?? void 0)
|
|
30004
|
+
);
|
|
29196
30005
|
}
|
|
29197
30006
|
/**
|
|
29198
30007
|
* Push an assistant turn into history and fire `onTranscript` so host
|
|
@@ -29291,7 +30100,9 @@ var init_stream_handler = __esm({
|
|
|
29291
30100
|
this.pendingAssistantTimer = null;
|
|
29292
30101
|
this.userTranscriptPending = false;
|
|
29293
30102
|
if (buffered !== null) {
|
|
29294
|
-
|
|
30103
|
+
this.flushAssistantTurn(buffered).catch(
|
|
30104
|
+
(err) => getLogger().error("flushAssistantTurn (fallback timer) failed:", err)
|
|
30105
|
+
);
|
|
29295
30106
|
}
|
|
29296
30107
|
}, _StreamHandler.REALTIME_USER_TRANSCRIPT_WAIT_MS);
|
|
29297
30108
|
this.responseAudioStarted = false;
|
|
@@ -29300,7 +30111,9 @@ var init_stream_handler = __esm({
|
|
|
29300
30111
|
await this.flushAssistantTurn(text);
|
|
29301
30112
|
}
|
|
29302
30113
|
async onAdapterSpeechInterrupt() {
|
|
29303
|
-
|
|
30114
|
+
const isEngine = this.adapter instanceof OpenAIRealtimeAdapter;
|
|
30115
|
+
const clientManaged = isEngine && this.adapter.getGateResponseOnTranscript();
|
|
30116
|
+
if (clientManaged) {
|
|
29304
30117
|
const startedAt = this.adapter.currentResponseFirstAudioAt;
|
|
29305
30118
|
if (startedAt !== null) {
|
|
29306
30119
|
const elapsedMs = Date.now() - startedAt;
|
|
@@ -29313,12 +30126,20 @@ var init_stream_handler = __esm({
|
|
|
29313
30126
|
}
|
|
29314
30127
|
}
|
|
29315
30128
|
this.deps.bridge.sendClear(this.ws, this.streamSid);
|
|
29316
|
-
if (
|
|
30129
|
+
if (clientManaged) {
|
|
30130
|
+
this.metricsAcc.recordBargeinDetected();
|
|
30131
|
+
this.adapter.cancelResponse();
|
|
30132
|
+
} else if (isEngine) {
|
|
30133
|
+
this.adapter.truncate();
|
|
30134
|
+
}
|
|
29317
30135
|
this.metricsAcc.recordTurnInterrupted();
|
|
29318
30136
|
if (this.responseAudioStarted) {
|
|
29319
30137
|
await this.emitAgentSpeechEnded(true);
|
|
29320
30138
|
}
|
|
29321
30139
|
await this.emitUserSpeechStarted();
|
|
30140
|
+
if (clientManaged) {
|
|
30141
|
+
this.metricsAcc.anchorUserSpeechStart();
|
|
30142
|
+
}
|
|
29322
30143
|
this.currentAgentText = "";
|
|
29323
30144
|
this.responseAudioStarted = false;
|
|
29324
30145
|
this.pendingAssistantTurn = null;
|
|
@@ -29328,6 +30149,28 @@ var init_stream_handler = __esm({
|
|
|
29328
30149
|
}
|
|
29329
30150
|
this.userTranscriptPending = false;
|
|
29330
30151
|
}
|
|
30152
|
+
/**
|
|
30153
|
+
* Handle a Realtime ``error`` event (issue #154, fix 4).
|
|
30154
|
+
*
|
|
30155
|
+
* Both Realtime providers dispatch ``('error', …)`` for server-side errors,
|
|
30156
|
+
* non-normal socket closes, and socket errors, but the stream handler
|
|
30157
|
+
* previously had no entry for it in the dispatch table so these were
|
|
30158
|
+
* silently swallowed. We surface them at WARN level with ONLY the error
|
|
30159
|
+
* envelope fields (``type`` / ``code`` / ``message``) — never any audio or
|
|
30160
|
+
* transcript body, to avoid logging PII. The call is NOT terminated: the
|
|
30161
|
+
* provider decides whether to recover, and many of these (e.g. a transient
|
|
30162
|
+
* ``input_audio_buffer_commit_empty``) are non-fatal. Parity with the
|
|
30163
|
+
* Python ``elif ev_type == 'error'`` branches.
|
|
30164
|
+
*/
|
|
30165
|
+
async onAdapterError(eventData) {
|
|
30166
|
+
const err = eventData ?? {};
|
|
30167
|
+
const type = typeof err.type === "string" ? err.type : "unknown";
|
|
30168
|
+
const code = typeof err.code === "string" ? err.code : "";
|
|
30169
|
+
const message = typeof err.message === "string" ? err.message : "";
|
|
30170
|
+
getLogger().warn(
|
|
30171
|
+
`Realtime error (${this.deps.bridge.label}) type=${type} code=${code} message=${sanitizeLogValue(message)}`
|
|
30172
|
+
);
|
|
30173
|
+
}
|
|
29331
30174
|
/**
|
|
29332
30175
|
* Emit a tool-invocation event into the transcript timeline. Pushes a
|
|
29333
30176
|
* `role=tool` entry into `history` (so it appears in the dashboard
|
|
@@ -29395,7 +30238,8 @@ var init_stream_handler = __esm({
|
|
|
29395
30238
|
}
|
|
29396
30239
|
return;
|
|
29397
30240
|
}
|
|
29398
|
-
const
|
|
30241
|
+
const effectiveTools = this.resolvedTools ?? this.deps.agent.tools;
|
|
30242
|
+
const toolDef = effectiveTools?.find((t) => t.name === fc.name);
|
|
29399
30243
|
if (!toolDef) {
|
|
29400
30244
|
getLogger().warn(`Realtime tool '${fc.name}' not found in agent.tools \u2014 skipping`);
|
|
29401
30245
|
const result2 = JSON.stringify({ error: `Tool '${fc.name}' not registered`, fallback: true });
|
|
@@ -29418,7 +30262,8 @@ var init_stream_handler = __esm({
|
|
|
29418
30262
|
if (msg && this.adapter instanceof OpenAIRealtimeAdapter) {
|
|
29419
30263
|
const realtimeAdapter = this.adapter;
|
|
29420
30264
|
reassuranceTimer = setTimeout(() => {
|
|
29421
|
-
realtimeAdapter.
|
|
30265
|
+
const fire = typeof realtimeAdapter.sendReassurance === "function" ? realtimeAdapter.sendReassurance(msg) : realtimeAdapter.sendText(msg);
|
|
30266
|
+
fire.catch((e) => {
|
|
29422
30267
|
getLogger().warn(`Reassurance message failed for tool '${fc.name}': ${String(e)}`);
|
|
29423
30268
|
});
|
|
29424
30269
|
}, afterMs);
|
|
@@ -29438,7 +30283,8 @@ var init_stream_handler = __esm({
|
|
|
29438
30283
|
parsedArgs,
|
|
29439
30284
|
{
|
|
29440
30285
|
call_id: this.callId,
|
|
29441
|
-
caller: this.caller
|
|
30286
|
+
caller: this.caller,
|
|
30287
|
+
callee: this.callee
|
|
29442
30288
|
},
|
|
29443
30289
|
onProgress
|
|
29444
30290
|
);
|
|
@@ -29588,21 +30434,21 @@ async function appendJsonl(filePath, record2) {
|
|
|
29588
30434
|
await import_node_fs2.promises.mkdir(path4.dirname(filePath), { recursive: true });
|
|
29589
30435
|
await import_node_fs2.promises.appendFile(filePath, JSON.stringify(record2) + "\n", { encoding: "utf8" });
|
|
29590
30436
|
}
|
|
29591
|
-
function
|
|
30437
|
+
async function rmTreeAsync(target) {
|
|
29592
30438
|
try {
|
|
29593
|
-
for (const child of
|
|
30439
|
+
for (const child of await import_node_fs2.promises.readdir(target)) {
|
|
29594
30440
|
const childPath = path4.join(target, child);
|
|
29595
|
-
const stat =
|
|
30441
|
+
const stat = await import_node_fs2.promises.lstat(childPath);
|
|
29596
30442
|
if (stat.isDirectory()) {
|
|
29597
|
-
|
|
30443
|
+
await rmTreeAsync(childPath);
|
|
29598
30444
|
} else {
|
|
29599
30445
|
try {
|
|
29600
|
-
|
|
30446
|
+
await import_node_fs2.promises.unlink(childPath);
|
|
29601
30447
|
} catch {
|
|
29602
30448
|
}
|
|
29603
30449
|
}
|
|
29604
30450
|
}
|
|
29605
|
-
|
|
30451
|
+
await import_node_fs2.promises.rmdir(target);
|
|
29606
30452
|
} catch {
|
|
29607
30453
|
}
|
|
29608
30454
|
}
|
|
@@ -29684,7 +30530,9 @@ var init_call_log = __esm({
|
|
|
29684
30530
|
getLogger().warn(`call_log write failed (${sanitizeLogValue(callId)}): ${sanitizeLogValue(String(err))}`);
|
|
29685
30531
|
}
|
|
29686
30532
|
if (crypto5.randomBytes(1)[0] < 5) {
|
|
29687
|
-
this.sweepOldDays()
|
|
30533
|
+
void this.sweepOldDays().catch(
|
|
30534
|
+
(e) => getLogger().debug(`call_log sweep failed: ${sanitizeLogValue(String(e))}`)
|
|
30535
|
+
);
|
|
29688
30536
|
}
|
|
29689
30537
|
}
|
|
29690
30538
|
/** Append a single turn record to the call's `transcript.jsonl`. */
|
|
@@ -29759,23 +30607,27 @@ var init_call_log = __esm({
|
|
|
29759
30607
|
}
|
|
29760
30608
|
}
|
|
29761
30609
|
// --- Retention ---------------------------------------------------------
|
|
29762
|
-
sweepOldDays() {
|
|
30610
|
+
async sweepOldDays() {
|
|
29763
30611
|
if (this.root === null) return;
|
|
29764
30612
|
const days = retentionDays();
|
|
29765
30613
|
if (days === 0) return;
|
|
29766
30614
|
const cutoff = Date.now() / 1e3 - days * 86400;
|
|
29767
30615
|
const callsRoot = path4.join(this.root, "calls");
|
|
29768
|
-
if (!fs4.existsSync(callsRoot)) return;
|
|
29769
30616
|
try {
|
|
29770
|
-
|
|
30617
|
+
await import_node_fs2.promises.access(callsRoot);
|
|
30618
|
+
} catch {
|
|
30619
|
+
return;
|
|
30620
|
+
}
|
|
30621
|
+
try {
|
|
30622
|
+
for (const yearName of await import_node_fs2.promises.readdir(callsRoot)) {
|
|
29771
30623
|
if (!/^\d+$/.test(yearName)) continue;
|
|
29772
30624
|
const yearDir = path4.join(callsRoot, yearName);
|
|
29773
|
-
if (!
|
|
29774
|
-
for (const monthName of
|
|
30625
|
+
if (!(await import_node_fs2.promises.stat(yearDir)).isDirectory()) continue;
|
|
30626
|
+
for (const monthName of await import_node_fs2.promises.readdir(yearDir)) {
|
|
29775
30627
|
if (!/^\d+$/.test(monthName)) continue;
|
|
29776
30628
|
const monthDir = path4.join(yearDir, monthName);
|
|
29777
|
-
if (!
|
|
29778
|
-
for (const dayName of
|
|
30629
|
+
if (!(await import_node_fs2.promises.stat(monthDir)).isDirectory()) continue;
|
|
30630
|
+
for (const dayName of await import_node_fs2.promises.readdir(monthDir)) {
|
|
29779
30631
|
if (!/^\d+$/.test(dayName)) continue;
|
|
29780
30632
|
const dayDir = path4.join(monthDir, dayName);
|
|
29781
30633
|
const y = Number.parseInt(yearName, 10);
|
|
@@ -29783,16 +30635,16 @@ var init_call_log = __esm({
|
|
|
29783
30635
|
const d = Number.parseInt(dayName, 10);
|
|
29784
30636
|
const ts = Date.UTC(y, m - 1, d) / 1e3;
|
|
29785
30637
|
if (ts < cutoff) {
|
|
29786
|
-
|
|
30638
|
+
await rmTreeAsync(dayDir);
|
|
29787
30639
|
}
|
|
29788
30640
|
}
|
|
29789
30641
|
try {
|
|
29790
|
-
if (
|
|
30642
|
+
if ((await import_node_fs2.promises.readdir(monthDir)).length === 0) await import_node_fs2.promises.rmdir(monthDir);
|
|
29791
30643
|
} catch {
|
|
29792
30644
|
}
|
|
29793
30645
|
}
|
|
29794
30646
|
try {
|
|
29795
|
-
if (
|
|
30647
|
+
if ((await import_node_fs2.promises.readdir(yearDir)).length === 0) await import_node_fs2.promises.rmdir(yearDir);
|
|
29796
30648
|
} catch {
|
|
29797
30649
|
}
|
|
29798
30650
|
}
|
|
@@ -29833,13 +30685,16 @@ function telnyxHangupOutcome(cause) {
|
|
|
29833
30685
|
if (c === "call_rejected" || c === "rejected" || c === "destination_out_of_order") return "failed";
|
|
29834
30686
|
return null;
|
|
29835
30687
|
}
|
|
29836
|
-
function validateWebhookUrl(url2) {
|
|
30688
|
+
function validateWebhookUrl(url2, allowLoopback = false) {
|
|
29837
30689
|
const parsed = new URL(url2);
|
|
29838
30690
|
if (!["http:", "https:"].includes(parsed.protocol)) {
|
|
29839
30691
|
throw new Error(`Invalid webhook URL scheme: ${parsed.protocol}`);
|
|
29840
30692
|
}
|
|
29841
30693
|
const rawHost = parsed.hostname;
|
|
29842
30694
|
const host = rawHost.replace(/^\[/, "").replace(/\]$/, "").toLowerCase();
|
|
30695
|
+
if (allowLoopback) {
|
|
30696
|
+
return;
|
|
30697
|
+
}
|
|
29843
30698
|
const BLOCKED_HOSTNAMES = /* @__PURE__ */ new Set([
|
|
29844
30699
|
"localhost",
|
|
29845
30700
|
"ip6-localhost",
|
|
@@ -29881,6 +30736,34 @@ function validateWebhookUrl(url2) {
|
|
|
29881
30736
|
}
|
|
29882
30737
|
}
|
|
29883
30738
|
}
|
|
30739
|
+
function extractHost(value) {
|
|
30740
|
+
const trimmed = value.trim();
|
|
30741
|
+
if (!trimmed) return "";
|
|
30742
|
+
let host = trimmed.replace(/^[a-z]+:\/\//i, "").replace(/\/.*$/, "");
|
|
30743
|
+
if (host.startsWith("[")) {
|
|
30744
|
+
return host.slice(1).split("]", 1)[0].toLowerCase();
|
|
30745
|
+
}
|
|
30746
|
+
if (!host.includes("::")) {
|
|
30747
|
+
const lastColon = host.lastIndexOf(":");
|
|
30748
|
+
if (lastColon !== -1 && /^\d+$/.test(host.slice(lastColon + 1))) {
|
|
30749
|
+
host = host.slice(0, lastColon);
|
|
30750
|
+
}
|
|
30751
|
+
}
|
|
30752
|
+
return host.toLowerCase();
|
|
30753
|
+
}
|
|
30754
|
+
function isLoopbackHost(value) {
|
|
30755
|
+
const host = extractHost(value);
|
|
30756
|
+
if (!host) return false;
|
|
30757
|
+
if (host === "localhost" || host === "ip6-localhost" || host === "ip6-loopback") {
|
|
30758
|
+
return true;
|
|
30759
|
+
}
|
|
30760
|
+
if (host === "::1" || host === "::ffff:127.0.0.1") return true;
|
|
30761
|
+
const v4 = /^(\d{1,3})\.(\d{1,3})\.(\d{1,3})\.(\d{1,3})$/.exec(host);
|
|
30762
|
+
if (v4) {
|
|
30763
|
+
return parseInt(v4[1], 10) === 127;
|
|
30764
|
+
}
|
|
30765
|
+
return false;
|
|
30766
|
+
}
|
|
29884
30767
|
function validateTelnyxSignature(rawBody, signature, timestamp, publicKey, toleranceSec = 300) {
|
|
29885
30768
|
try {
|
|
29886
30769
|
const ts = parseInt(timestamp, 10);
|
|
@@ -29944,7 +30827,7 @@ function resolveVariables(template, variables) {
|
|
|
29944
30827
|
}
|
|
29945
30828
|
return result;
|
|
29946
30829
|
}
|
|
29947
|
-
function buildAIAdapter(config2, agent, resolvedPrompt) {
|
|
30830
|
+
function buildAIAdapter(config2, agent, resolvedPrompt, toolsOverride) {
|
|
29948
30831
|
const engine = agent.engine;
|
|
29949
30832
|
if (agent.provider === "elevenlabs_convai") {
|
|
29950
30833
|
if (!engine || engine.kind !== "elevenlabs_convai") {
|
|
@@ -29959,12 +30842,24 @@ function buildAIAdapter(config2, agent, resolvedPrompt) {
|
|
|
29959
30842
|
agent.firstMessage ?? ""
|
|
29960
30843
|
);
|
|
29961
30844
|
}
|
|
29962
|
-
const
|
|
29963
|
-
|
|
29964
|
-
description
|
|
29965
|
-
|
|
29966
|
-
|
|
29967
|
-
|
|
30845
|
+
const preamblesOn = Boolean(agent.toolCallPreambles);
|
|
30846
|
+
const agentTools = (toolsOverride ?? agent.tools)?.map((t) => {
|
|
30847
|
+
let description = t.description;
|
|
30848
|
+
const reassurance = t.reassurance;
|
|
30849
|
+
const sample = typeof reassurance === "string" ? reassurance : void 0;
|
|
30850
|
+
if (preamblesOn && sample) {
|
|
30851
|
+
description = `${description}
|
|
30852
|
+
|
|
30853
|
+
Preamble sample phrases:
|
|
30854
|
+
- ${sample}`;
|
|
30855
|
+
}
|
|
30856
|
+
return {
|
|
30857
|
+
name: t.name,
|
|
30858
|
+
description,
|
|
30859
|
+
parameters: t.parameters,
|
|
30860
|
+
strict: t.strict
|
|
30861
|
+
};
|
|
30862
|
+
}) ?? [];
|
|
29968
30863
|
const tools = [...agentTools, TRANSFER_CALL_TOOL, END_CALL_TOOL];
|
|
29969
30864
|
const isOpenAIEngine = engine && (engine.kind === "openai_realtime" || engine.kind === "openai_realtime_2");
|
|
29970
30865
|
const openaiKey = isOpenAIEngine ? engine.apiKey : config2.openaiKey ?? "";
|
|
@@ -29976,8 +30871,27 @@ function buildAIAdapter(config2, agent, resolvedPrompt) {
|
|
|
29976
30871
|
if (engine.inputAudioTranscriptionModel !== void 0) {
|
|
29977
30872
|
adapterOptions.inputAudioTranscriptionModel = engine.inputAudioTranscriptionModel;
|
|
29978
30873
|
}
|
|
30874
|
+
if (engine.noiseReduction !== void 0) {
|
|
30875
|
+
adapterOptions.noiseReduction = engine.noiseReduction;
|
|
30876
|
+
}
|
|
30877
|
+
if (engine.turnDetection !== void 0) {
|
|
30878
|
+
adapterOptions.turnDetection = engine.turnDetection;
|
|
30879
|
+
}
|
|
30880
|
+
if (engine.gateResponseOnTranscript !== void 0) {
|
|
30881
|
+
adapterOptions.gateResponseOnTranscript = engine.gateResponseOnTranscript;
|
|
30882
|
+
}
|
|
30883
|
+
}
|
|
30884
|
+
const agentOpts = agent;
|
|
30885
|
+
if (agentOpts.openaiRealtimeNoiseReduction !== void 0) {
|
|
30886
|
+
adapterOptions.noiseReduction = agentOpts.openaiRealtimeNoiseReduction;
|
|
30887
|
+
}
|
|
30888
|
+
if (agentOpts.realtimeTurnDetection !== void 0) {
|
|
30889
|
+
adapterOptions.turnDetection = agentOpts.realtimeTurnDetection;
|
|
29979
30890
|
}
|
|
29980
|
-
|
|
30891
|
+
if (agentOpts.openaiRealtimeGateResponseOnTranscript !== void 0) {
|
|
30892
|
+
adapterOptions.gateResponseOnTranscript = agentOpts.openaiRealtimeGateResponseOnTranscript;
|
|
30893
|
+
}
|
|
30894
|
+
const AdapterCtor = OpenAIRealtime2Adapter;
|
|
29981
30895
|
return new AdapterCtor(
|
|
29982
30896
|
openaiKey,
|
|
29983
30897
|
agent.model,
|
|
@@ -30006,7 +30920,6 @@ var init_server = __esm({
|
|
|
30006
30920
|
import_express = __toESM(require("express"));
|
|
30007
30921
|
import_http = require("http");
|
|
30008
30922
|
import_ws5 = require("ws");
|
|
30009
|
-
init_openai_realtime();
|
|
30010
30923
|
init_openai_realtime_2();
|
|
30011
30924
|
init_elevenlabs_convai();
|
|
30012
30925
|
init_plivo_adapter();
|
|
@@ -30069,6 +30982,11 @@ var init_server = __esm({
|
|
|
30069
30982
|
getLogger().warn(`TwilioBridge.transferCall rejected: invalid CallSid ${JSON.stringify(callId)}`);
|
|
30070
30983
|
return;
|
|
30071
30984
|
}
|
|
30985
|
+
const E164_RE = /^\+[1-9]\d{6,14}$/;
|
|
30986
|
+
if (!E164_RE.test(toNumber)) {
|
|
30987
|
+
getLogger().warn(`TwilioBridge.transferCall rejected: invalid target ${JSON.stringify(toNumber)}`);
|
|
30988
|
+
return;
|
|
30989
|
+
}
|
|
30072
30990
|
const transferUrl = `https://api.twilio.com/2010-04-01/Accounts/${this.config.twilioSid}/Calls/${callId}.json`;
|
|
30073
30991
|
await fetch(transferUrl, {
|
|
30074
30992
|
method: "POST",
|
|
@@ -30275,7 +31193,7 @@ var init_server = __esm({
|
|
|
30275
31193
|
};
|
|
30276
31194
|
GRACEFUL_SHUTDOWN_TIMEOUT_MS = 1e4;
|
|
30277
31195
|
EmbeddedServer = class {
|
|
30278
|
-
constructor(config2, agent, onCallStart, onCallEnd, onTranscript, onMessage, recording = false, voicemailMessage = "", onMetrics, pricingOverrides, dashboard = true, dashboardToken = "") {
|
|
31196
|
+
constructor(config2, agent, onCallStart, onCallEnd, onTranscript, onMessage, recording = false, voicemailMessage = "", onMetrics, pricingOverrides, dashboard = true, dashboardToken = "", allowInsecureDashboard = false) {
|
|
30279
31197
|
this.config = config2;
|
|
30280
31198
|
this.agent = agent;
|
|
30281
31199
|
this.onCallStart = onCallStart;
|
|
@@ -30287,6 +31205,7 @@ var init_server = __esm({
|
|
|
30287
31205
|
this.onMetrics = onMetrics;
|
|
30288
31206
|
this.dashboard = dashboard;
|
|
30289
31207
|
this.dashboardToken = dashboardToken;
|
|
31208
|
+
this.allowInsecureDashboard = allowInsecureDashboard;
|
|
30290
31209
|
this.metricsStore = new MetricsStore();
|
|
30291
31210
|
this.pricing = mergePricing(pricingOverrides);
|
|
30292
31211
|
const logRoot = config2.persistRoot === void 0 ? resolveLogRoot() : config2.persistRoot;
|
|
@@ -30313,8 +31232,31 @@ var init_server = __esm({
|
|
|
30313
31232
|
onMetrics;
|
|
30314
31233
|
dashboard;
|
|
30315
31234
|
dashboardToken;
|
|
31235
|
+
allowInsecureDashboard;
|
|
30316
31236
|
server = null;
|
|
30317
31237
|
wss = null;
|
|
31238
|
+
/**
|
|
31239
|
+
* Whether the dashboard + ``/api/*`` routes were mounted in ``start()``.
|
|
31240
|
+
* The dashboard is now ALWAYS mounted when enabled (it never 404s): an
|
|
31241
|
+
* exposed, token-less bind is protected with an auto-generated token
|
|
31242
|
+
* rather than refused. This flag is therefore ``true`` whenever the
|
|
31243
|
+
* dashboard is enabled — kept so the startup banner can gate on it.
|
|
31244
|
+
*/
|
|
31245
|
+
dashboardMounted = false;
|
|
31246
|
+
/**
|
|
31247
|
+
* The token actually in effect for the dashboard + ``/api/*`` routes,
|
|
31248
|
+
* resolved in ``start()``. One of: the explicit ``dashboardToken`` if set;
|
|
31249
|
+
* a freshly generated UUID when the bind is exposed and
|
|
31250
|
+
* ``allowInsecureDashboard`` is ``false``; or ``''`` (OPEN) for loopback
|
|
31251
|
+
* local dev and for an exposed bind with ``allowInsecureDashboard=true``.
|
|
31252
|
+
* Read by the startup banner (to print the ready URL with ``?token=``) and
|
|
31253
|
+
* by authentic tests (to authenticate).
|
|
31254
|
+
*/
|
|
31255
|
+
effectiveDashboardToken = "";
|
|
31256
|
+
/** The token in effect for the dashboard, resolved at ``start()``. Empty string = served OPEN. */
|
|
31257
|
+
get resolvedDashboardToken() {
|
|
31258
|
+
return this.effectiveDashboardToken;
|
|
31259
|
+
}
|
|
30318
31260
|
twilioTokenWarningLogged = false;
|
|
30319
31261
|
telnyxSigWarningLogged = false;
|
|
30320
31262
|
metricsStore;
|
|
@@ -30332,12 +31274,14 @@ var init_server = __esm({
|
|
|
30332
31274
|
activeConnections = /* @__PURE__ */ new Set();
|
|
30333
31275
|
activeCallIds = /* @__PURE__ */ new Map();
|
|
30334
31276
|
/**
|
|
30335
|
-
* Per-call AMD result
|
|
30336
|
-
*
|
|
30337
|
-
*
|
|
30338
|
-
*
|
|
31277
|
+
* Per-call AMD result callbacks keyed by CallSid / call_control_id.
|
|
31278
|
+
* Public so ``client.ts`` can register a callback per outbound call.
|
|
31279
|
+
* The Map slot is deleted after the callback fires once — preventing
|
|
31280
|
+
* cross-call misfires when multiple concurrent outbound calls are in
|
|
31281
|
+
* flight (single-slot was a race condition: the last registered callback
|
|
31282
|
+
* would win for every in-flight AMD result).
|
|
30339
31283
|
*/
|
|
30340
|
-
|
|
31284
|
+
onMachineDetectionByCallSid = /* @__PURE__ */ new Map();
|
|
30341
31285
|
/**
|
|
30342
31286
|
* Pre-warm first-message audio accessor wired by ``Patter.serve()``.
|
|
30343
31287
|
* The per-call StreamHandler invokes this with its ``callId`` at the
|
|
@@ -30458,6 +31402,42 @@ var init_server = __esm({
|
|
|
30458
31402
|
this.completions.clear();
|
|
30459
31403
|
this.amdClass.clear();
|
|
30460
31404
|
}
|
|
31405
|
+
/**
|
|
31406
|
+
* Decide whether this server is reachable beyond loopback (127.0.0.1).
|
|
31407
|
+
*
|
|
31408
|
+
* The dashboard serves call transcripts and metadata (PII), so before
|
|
31409
|
+
* mounting it unauthenticated we must know whether anyone off-host can
|
|
31410
|
+
* reach the port. Signals (in order):
|
|
31411
|
+
*
|
|
31412
|
+
* (a)+(b) — a public webhook URL. ``client.ts`` resolves
|
|
31413
|
+
* ``config.webhookUrl`` to the live hostname for every serve path:
|
|
31414
|
+
* a cloudflared quick-tunnel host, a {@link StaticTunnel} hostname,
|
|
31415
|
+
* or an explicit ``webhookUrl``. A tunnel directive (signal a) and a
|
|
31416
|
+
* public webhook URL (signal b) therefore both surface here as a
|
|
31417
|
+
* non-loopback, non-private webhook host. This is the case that
|
|
31418
|
+
* matters for tunnels — the whole port (dashboard included) is
|
|
31419
|
+
* published on a public ``*.trycloudflare.com`` URL.
|
|
31420
|
+
*
|
|
31421
|
+
* (c) — an EXPLICIT non-loopback bind override via ``PATTER_BIND_HOST``.
|
|
31422
|
+
* Node's ``http.Server.listen(port, host)`` defaults to 127.0.0.1
|
|
31423
|
+
* here (see ``start()``), so plain local dev is never flagged; only
|
|
31424
|
+
* an operator who set ``PATTER_BIND_HOST`` to e.g. ``0.0.0.0`` is.
|
|
31425
|
+
*
|
|
31426
|
+
* Only loopback webhook hosts (127.0.0.0/8, localhost, ::1) are treated as
|
|
31427
|
+
* not-exposed. RFC1918 / LAN hosts ARE exposure — they are reachable by
|
|
31428
|
+
* other machines on the network — matching the Python SDK's gate.
|
|
31429
|
+
*/
|
|
31430
|
+
isExposed() {
|
|
31431
|
+
const bindOverride = process.env.PATTER_BIND_HOST;
|
|
31432
|
+
if (bindOverride && !isLoopbackHost(bindOverride)) {
|
|
31433
|
+
return true;
|
|
31434
|
+
}
|
|
31435
|
+
const host = extractHost(this.config.webhookUrl ?? "");
|
|
31436
|
+
if (host && !isLoopbackHost(host)) {
|
|
31437
|
+
return true;
|
|
31438
|
+
}
|
|
31439
|
+
return false;
|
|
31440
|
+
}
|
|
30461
31441
|
/** Bind HTTP + WebSocket listeners on `port`, mount carrier webhooks and dashboard routes. */
|
|
30462
31442
|
async start(port = 8e3) {
|
|
30463
31443
|
const webhookUrlPattern = /^[a-zA-Z0-9][a-zA-Z0-9.\-]+[a-zA-Z0-9]$/;
|
|
@@ -30493,6 +31473,9 @@ var init_server = __esm({
|
|
|
30493
31473
|
}
|
|
30494
31474
|
next();
|
|
30495
31475
|
});
|
|
31476
|
+
req.on("error", (err) => {
|
|
31477
|
+
next(err);
|
|
31478
|
+
});
|
|
30496
31479
|
} else {
|
|
30497
31480
|
next();
|
|
30498
31481
|
}
|
|
@@ -30503,8 +31486,25 @@ var init_server = __esm({
|
|
|
30503
31486
|
res.json({ status: "ok", mode: "local" });
|
|
30504
31487
|
});
|
|
30505
31488
|
if (this.dashboard) {
|
|
30506
|
-
|
|
30507
|
-
|
|
31489
|
+
const exposed = this.isExposed();
|
|
31490
|
+
if (this.dashboardToken) {
|
|
31491
|
+
this.effectiveDashboardToken = this.dashboardToken;
|
|
31492
|
+
} else if (exposed && !this.allowInsecureDashboard) {
|
|
31493
|
+
this.effectiveDashboardToken = import_node_crypto4.default.randomUUID();
|
|
31494
|
+
getLogger().warn(
|
|
31495
|
+
`Dashboard is reachable beyond 127.0.0.1 without a configured token; protecting it with an auto-generated token. Open: http://127.0.0.1:${port}/?token=${this.effectiveDashboardToken} Set dashboardToken for a stable token, or allowInsecureDashboard=true to serve it open.`
|
|
31496
|
+
);
|
|
31497
|
+
} else if (exposed && this.allowInsecureDashboard) {
|
|
31498
|
+
this.effectiveDashboardToken = "";
|
|
31499
|
+
getLogger().warn(
|
|
31500
|
+
"Dashboard served WITHOUT authentication on a publicly-reachable bind (allowInsecureDashboard=true). Call transcripts and metadata are exposed to anyone who can reach this URL."
|
|
31501
|
+
);
|
|
31502
|
+
} else {
|
|
31503
|
+
this.effectiveDashboardToken = "";
|
|
31504
|
+
}
|
|
31505
|
+
mountDashboard(app, this.metricsStore, this.effectiveDashboardToken);
|
|
31506
|
+
mountApi(app, this.metricsStore, this.effectiveDashboardToken);
|
|
31507
|
+
this.dashboardMounted = true;
|
|
30508
31508
|
}
|
|
30509
31509
|
app.post("/webhooks/twilio/status", (req, res) => {
|
|
30510
31510
|
if (this.config.twilioToken) {
|
|
@@ -30590,8 +31590,9 @@ var init_server = __esm({
|
|
|
30590
31590
|
if (callSid) {
|
|
30591
31591
|
this.amdClass.set(callSid, classifyTwilioAmd(answeredBy));
|
|
30592
31592
|
}
|
|
30593
|
-
const cb = this.
|
|
31593
|
+
const cb = callSid ? this.onMachineDetectionByCallSid.get(callSid) : void 0;
|
|
30594
31594
|
if (cb && callSid) {
|
|
31595
|
+
this.onMachineDetectionByCallSid.delete(callSid);
|
|
30595
31596
|
try {
|
|
30596
31597
|
await cb({
|
|
30597
31598
|
call_id: callSid,
|
|
@@ -30718,8 +31719,9 @@ var init_server = __esm({
|
|
|
30718
31719
|
if (amdCallId) {
|
|
30719
31720
|
this.amdClass.set(amdCallId, classifyTelnyxAmd(amdResult));
|
|
30720
31721
|
}
|
|
30721
|
-
const cbTx = this.
|
|
31722
|
+
const cbTx = amdCallId ? this.onMachineDetectionByCallSid.get(amdCallId) : void 0;
|
|
30722
31723
|
if (cbTx && amdCallId) {
|
|
31724
|
+
this.onMachineDetectionByCallSid.delete(amdCallId);
|
|
30723
31725
|
try {
|
|
30724
31726
|
await cbTx({
|
|
30725
31727
|
call_id: amdCallId,
|
|
@@ -30887,8 +31889,13 @@ var init_server = __esm({
|
|
|
30887
31889
|
getLogger().info(`AMD result for ${sanitizeLogValue(callUuid)}: ${sanitizeLogValue(amdRaw)}`);
|
|
30888
31890
|
const classification = classifyPlivoAmd(amdRaw);
|
|
30889
31891
|
if (callUuid) this.amdClass.set(callUuid, classification);
|
|
30890
|
-
|
|
31892
|
+
let cbKey = callUuid && this.onMachineDetectionByCallSid.has(callUuid) ? callUuid : void 0;
|
|
31893
|
+
if (cbKey === void 0 && this.onMachineDetectionByCallSid.size === 1) {
|
|
31894
|
+
cbKey = this.onMachineDetectionByCallSid.keys().next().value;
|
|
31895
|
+
}
|
|
31896
|
+
const cb = cbKey !== void 0 ? this.onMachineDetectionByCallSid.get(cbKey) : void 0;
|
|
30891
31897
|
if (cb && callUuid) {
|
|
31898
|
+
if (cbKey !== void 0) this.onMachineDetectionByCallSid.delete(cbKey);
|
|
30892
31899
|
try {
|
|
30893
31900
|
await cb({
|
|
30894
31901
|
call_id: callUuid,
|
|
@@ -30969,27 +31976,34 @@ var init_server = __esm({
|
|
|
30969
31976
|
this.handleTwilioStream(ws, url2);
|
|
30970
31977
|
}
|
|
30971
31978
|
});
|
|
30972
|
-
await new Promise((resolve2) => {
|
|
31979
|
+
await new Promise((resolve2, reject) => {
|
|
30973
31980
|
const bindHost = process.env.PATTER_BIND_HOST ?? "127.0.0.1";
|
|
31981
|
+
this.server.once("error", reject);
|
|
30974
31982
|
this.server.listen(port, bindHost, () => {
|
|
31983
|
+
this.server.off("error", reject);
|
|
30975
31984
|
getLogger().info(`Server on port ${port}`);
|
|
30976
31985
|
getLogger().info(`Webhook: https://${this.config.webhookUrl}`);
|
|
30977
31986
|
getLogger().info(`Phone: ${this.config.phoneNumber}`);
|
|
30978
31987
|
const model = this.agent.model ?? "";
|
|
30979
|
-
|
|
31988
|
+
const calibrated = ["gpt-realtime-mini", "gpt-4o-mini-realtime-preview"];
|
|
31989
|
+
if (model && !calibrated.includes(model) && model.includes("realtime")) {
|
|
30980
31990
|
getLogger().warn(
|
|
30981
|
-
`Agent uses "${sanitizeLogValue(model)}" but DEFAULT_PRICING.openai_realtime is calibrated for
|
|
31991
|
+
`Agent uses "${sanitizeLogValue(model)}" but DEFAULT_PRICING.openai_realtime is calibrated for the default Realtime models (gpt-realtime-mini / gpt-4o-mini-realtime-preview). Pass Patter({ pricing: { openai_realtime: {...} } }) to set rates for this model, otherwise the dashboard cost display will under-report.`
|
|
30982
31992
|
);
|
|
30983
31993
|
}
|
|
30984
|
-
if (this.dashboard) {
|
|
30985
|
-
|
|
30986
|
-
|
|
30987
|
-
|
|
31994
|
+
if (this.dashboard && this.dashboardMounted) {
|
|
31995
|
+
getLogger().info("\u2500\u2500\u2500\u2500 Dashboard \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500");
|
|
31996
|
+
if (this.effectiveDashboardToken) {
|
|
31997
|
+
getLogger().info(
|
|
31998
|
+
`URL: http://127.0.0.1:${port}/?token=${this.effectiveDashboardToken}`
|
|
31999
|
+
);
|
|
32000
|
+
} else {
|
|
32001
|
+
getLogger().info(`URL: http://127.0.0.1:${port}/`);
|
|
30988
32002
|
getLogger().warn(
|
|
30989
32003
|
"Dashboard is enabled without authentication. Set dashboardToken to protect call data. This is safe for local development but should not be exposed on a public network."
|
|
30990
32004
|
);
|
|
30991
32005
|
}
|
|
30992
|
-
|
|
32006
|
+
getLogger().info("\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500");
|
|
30993
32007
|
}
|
|
30994
32008
|
resolve2();
|
|
30995
32009
|
});
|
|
@@ -31065,7 +32079,7 @@ var init_server = __esm({
|
|
|
31065
32079
|
onMessage: this.onMessage,
|
|
31066
32080
|
onMetrics: wrappedMetrics,
|
|
31067
32081
|
recording: this.recording,
|
|
31068
|
-
buildAIAdapter: (resolvedPrompt) => buildAIAdapter(this.config, this.agent, resolvedPrompt),
|
|
32082
|
+
buildAIAdapter: (resolvedPrompt, toolsOverride) => buildAIAdapter(this.config, this.agent, resolvedPrompt, toolsOverride),
|
|
31069
32083
|
sanitizeVariables,
|
|
31070
32084
|
resolveVariables,
|
|
31071
32085
|
popPrewarmAudio: this.popPrewarmAudio,
|
|
@@ -31339,17 +32353,18 @@ var init_server = __esm({
|
|
|
31339
32353
|
}
|
|
31340
32354
|
if (this.activeConnections.size > 0) {
|
|
31341
32355
|
getLogger().info(`Waiting for ${this.activeConnections.size} active connection(s) to close...`);
|
|
31342
|
-
|
|
31343
|
-
|
|
31344
|
-
|
|
31345
|
-
|
|
31346
|
-
|
|
31347
|
-
|
|
31348
|
-
|
|
31349
|
-
|
|
31350
|
-
|
|
31351
|
-
|
|
31352
|
-
]);
|
|
32356
|
+
let checkInterval;
|
|
32357
|
+
const drainPromise = new Promise((resolve2) => {
|
|
32358
|
+
checkInterval = setInterval(() => {
|
|
32359
|
+
if (this.activeConnections.size === 0) {
|
|
32360
|
+
clearInterval(checkInterval);
|
|
32361
|
+
resolve2();
|
|
32362
|
+
}
|
|
32363
|
+
}, 100);
|
|
32364
|
+
});
|
|
32365
|
+
const timeoutPromise = new Promise((resolve2) => setTimeout(resolve2, GRACEFUL_SHUTDOWN_TIMEOUT_MS));
|
|
32366
|
+
await Promise.race([drainPromise, timeoutPromise]);
|
|
32367
|
+
clearInterval(checkInterval);
|
|
31353
32368
|
}
|
|
31354
32369
|
if (this.activeConnections.size > 0) {
|
|
31355
32370
|
getLogger().info(`Force-closing ${this.activeConnections.size} remaining connection(s)`);
|
|
@@ -31478,6 +32493,9 @@ __export(carrier_config_exports, {
|
|
|
31478
32493
|
configureTelnyxNumber: () => configureTelnyxNumber,
|
|
31479
32494
|
configureTwilioNumber: () => configureTwilioNumber
|
|
31480
32495
|
});
|
|
32496
|
+
function redactPhone2(n) {
|
|
32497
|
+
return n.slice(0, 3) + "***" + n.slice(-4);
|
|
32498
|
+
}
|
|
31481
32499
|
async function configureTwilioNumber(accountSid, authToken, phoneNumber, voiceUrl) {
|
|
31482
32500
|
const auth2 = `Basic ${Buffer.from(`${accountSid}:${authToken}`).toString("base64")}`;
|
|
31483
32501
|
const listUrl = `${TWILIO_API_BASE}/Accounts/${accountSid}/IncomingPhoneNumbers.json?PhoneNumber=${encodeURIComponent(phoneNumber)}`;
|
|
@@ -31493,7 +32511,7 @@ async function configureTwilioNumber(accountSid, authToken, phoneNumber, voiceUr
|
|
|
31493
32511
|
const body = await listResp.json();
|
|
31494
32512
|
const match = body.incoming_phone_numbers?.[0];
|
|
31495
32513
|
if (!match) {
|
|
31496
|
-
throw new Error(`Twilio number ${phoneNumber} not found on account ${accountSid}`);
|
|
32514
|
+
throw new Error(`Twilio number ${redactPhone2(phoneNumber)} not found on account ${accountSid}`);
|
|
31497
32515
|
}
|
|
31498
32516
|
const updateUrl = `${TWILIO_API_BASE}/Accounts/${accountSid}/IncomingPhoneNumbers/${match.sid}.json`;
|
|
31499
32517
|
const form = new URLSearchParams({ VoiceUrl: voiceUrl, VoiceMethod: "POST" });
|
|
@@ -31512,17 +32530,20 @@ async function configureTwilioNumber(accountSid, authToken, phoneNumber, voiceUr
|
|
|
31512
32530
|
}
|
|
31513
32531
|
}
|
|
31514
32532
|
async function configureTelnyxNumber(apiKey, connectionId, phoneNumber) {
|
|
31515
|
-
const resp = await fetch(
|
|
31516
|
-
|
|
31517
|
-
|
|
31518
|
-
|
|
31519
|
-
|
|
31520
|
-
|
|
31521
|
-
|
|
31522
|
-
|
|
32533
|
+
const resp = await fetch(
|
|
32534
|
+
`${TELNYX_API_BASE}/phone_numbers/${encodeURIComponent(phoneNumber)}/voice`,
|
|
32535
|
+
{
|
|
32536
|
+
method: "PATCH",
|
|
32537
|
+
headers: {
|
|
32538
|
+
Authorization: `Bearer ${apiKey}`,
|
|
32539
|
+
"Content-Type": "application/json"
|
|
32540
|
+
},
|
|
32541
|
+
body: JSON.stringify({ connection_id: connectionId, tech_prefix_enabled: false })
|
|
32542
|
+
}
|
|
32543
|
+
);
|
|
31523
32544
|
if (!resp.ok) {
|
|
31524
32545
|
throw new Error(
|
|
31525
|
-
`Telnyx PATCH /phone_numbers/${phoneNumber} failed: ${resp.status} ${await resp.text()}`
|
|
32546
|
+
`Telnyx PATCH /phone_numbers/${redactPhone2(phoneNumber)}/voice failed: ${resp.status} ${await resp.text()}`
|
|
31526
32547
|
);
|
|
31527
32548
|
}
|
|
31528
32549
|
}
|
|
@@ -31572,7 +32593,7 @@ async function autoConfigureCarrier(params) {
|
|
|
31572
32593
|
if (provider2 === "telnyx" && params.telnyxKey && params.telnyxConnectionId) {
|
|
31573
32594
|
try {
|
|
31574
32595
|
await configureTelnyxNumber(params.telnyxKey, params.telnyxConnectionId, params.phoneNumber);
|
|
31575
|
-
log3.info("Telnyx number
|
|
32596
|
+
log3.info("Telnyx number ***%s associated with connection %s", params.phoneNumber.slice(-4), params.telnyxConnectionId);
|
|
31576
32597
|
} catch (err) {
|
|
31577
32598
|
log3.warn("Could not auto-configure Telnyx number: %s", err instanceof Error ? err.message : String(err));
|
|
31578
32599
|
}
|
|
@@ -31722,12 +32743,12 @@ var init_test_mode = __esm({
|
|
|
31722
32743
|
}
|
|
31723
32744
|
continue;
|
|
31724
32745
|
}
|
|
31725
|
-
conversationHistory.push({
|
|
31726
|
-
role: "user",
|
|
31727
|
-
text: userInput,
|
|
31728
|
-
timestamp: Date.now()
|
|
31729
|
-
});
|
|
31730
32746
|
if (onMessage) {
|
|
32747
|
+
conversationHistory.push({
|
|
32748
|
+
role: "user",
|
|
32749
|
+
text: userInput,
|
|
32750
|
+
timestamp: Date.now()
|
|
32751
|
+
});
|
|
31731
32752
|
try {
|
|
31732
32753
|
const responseText = await onMessage({
|
|
31733
32754
|
text: userInput,
|
|
@@ -31757,6 +32778,11 @@ var init_test_mode = __esm({
|
|
|
31757
32778
|
}
|
|
31758
32779
|
log3.info("");
|
|
31759
32780
|
const responseText = parts.join("");
|
|
32781
|
+
conversationHistory.push({
|
|
32782
|
+
role: "user",
|
|
32783
|
+
text: userInput,
|
|
32784
|
+
timestamp: Date.now()
|
|
32785
|
+
});
|
|
31760
32786
|
if (responseText) {
|
|
31761
32787
|
conversationHistory.push({
|
|
31762
32788
|
role: "assistant",
|
|
@@ -33189,6 +34215,7 @@ __export(index_exports, {
|
|
|
33189
34215
|
GoogleLLM: () => LLM5,
|
|
33190
34216
|
GroqLLM: () => LLM3,
|
|
33191
34217
|
Guardrail: () => Guardrail,
|
|
34218
|
+
HermesLLM: () => LLM7,
|
|
33192
34219
|
IVRActivity: () => IVRActivity,
|
|
33193
34220
|
InworldTTS: () => TTS7,
|
|
33194
34221
|
KrispFrameDuration: () => KrispFrameDuration,
|
|
@@ -33199,6 +34226,8 @@ __export(index_exports, {
|
|
|
33199
34226
|
MetricsStore: () => MetricsStore,
|
|
33200
34227
|
MinWordsStrategy: () => MinWordsStrategy,
|
|
33201
34228
|
Ngrok: () => Ngrok,
|
|
34229
|
+
OpenAICompatibleLLM: () => LLM6,
|
|
34230
|
+
OpenAICompatibleLLMProvider: () => OpenAICompatibleLLMProvider,
|
|
33202
34231
|
OpenAILLM: () => LLM,
|
|
33203
34232
|
OpenAILLMProvider: () => OpenAILLMProvider,
|
|
33204
34233
|
OpenAIRealtime: () => Realtime,
|
|
@@ -33212,10 +34241,12 @@ __export(index_exports, {
|
|
|
33212
34241
|
OpenAITranscribeSTT: () => STT3,
|
|
33213
34242
|
OpenAITranscriptionModel: () => OpenAITranscriptionModel,
|
|
33214
34243
|
OpenAIVoice: () => OpenAIVoice,
|
|
34244
|
+
OpenClawLLM: () => LLM8,
|
|
33215
34245
|
PRICING_LAST_UPDATED: () => PRICING_LAST_UPDATED,
|
|
33216
34246
|
PRICING_VERSION: () => PRICING_VERSION,
|
|
33217
34247
|
PartialStreamError: () => PartialStreamError,
|
|
33218
34248
|
Patter: () => Patter,
|
|
34249
|
+
PatterConfigError: () => PatterConfigError,
|
|
33219
34250
|
PatterConnectionError: () => PatterConnectionError,
|
|
33220
34251
|
PatterError: () => PatterError,
|
|
33221
34252
|
PatterTool: () => PatterTool,
|
|
@@ -33303,6 +34334,8 @@ __export(index_exports, {
|
|
|
33303
34334
|
mulawToPcm16: () => mulawToPcm16,
|
|
33304
34335
|
notifyDashboard: () => notifyDashboard,
|
|
33305
34336
|
openaiTts: () => openaiTts,
|
|
34337
|
+
openclawConsult: () => openclawConsult,
|
|
34338
|
+
openclawPostCallNotifier: () => openclawPostCallNotifier,
|
|
33306
34339
|
pcm16ToMulaw: () => pcm16ToMulaw,
|
|
33307
34340
|
resample16kTo8k: () => resample16kTo8k,
|
|
33308
34341
|
resample24kTo16k: () => resample24kTo16k,
|
|
@@ -33333,6 +34366,7 @@ init_server();
|
|
|
33333
34366
|
|
|
33334
34367
|
// src/engines/openai.ts
|
|
33335
34368
|
init_cjs_shims();
|
|
34369
|
+
init_openai_realtime();
|
|
33336
34370
|
var Realtime = class {
|
|
33337
34371
|
kind = "openai_realtime";
|
|
33338
34372
|
apiKey;
|
|
@@ -33340,6 +34374,9 @@ var Realtime = class {
|
|
|
33340
34374
|
voice;
|
|
33341
34375
|
reasoningEffort;
|
|
33342
34376
|
inputAudioTranscriptionModel;
|
|
34377
|
+
noiseReduction;
|
|
34378
|
+
turnDetection;
|
|
34379
|
+
gateResponseOnTranscript;
|
|
33343
34380
|
constructor(opts = {}) {
|
|
33344
34381
|
const key = opts.apiKey ?? process.env.OPENAI_API_KEY;
|
|
33345
34382
|
if (!key) {
|
|
@@ -33347,16 +34384,26 @@ var Realtime = class {
|
|
|
33347
34384
|
"OpenAI Realtime requires an apiKey. Pass { apiKey: 'sk-...' } or set OPENAI_API_KEY in the environment."
|
|
33348
34385
|
);
|
|
33349
34386
|
}
|
|
34387
|
+
if (opts.noiseReduction !== void 0 && opts.noiseReduction !== "near_field" && opts.noiseReduction !== "far_field") {
|
|
34388
|
+
throw new Error(
|
|
34389
|
+
`noiseReduction must be 'near_field' or 'far_field', got ${JSON.stringify(opts.noiseReduction)}`
|
|
34390
|
+
);
|
|
34391
|
+
}
|
|
34392
|
+
validateRealtimeTurnDetection(opts.turnDetection);
|
|
33350
34393
|
this.apiKey = key;
|
|
33351
34394
|
this.model = opts.model ?? "gpt-realtime-mini";
|
|
33352
34395
|
this.voice = opts.voice ?? "alloy";
|
|
33353
34396
|
this.reasoningEffort = opts.reasoningEffort;
|
|
33354
34397
|
this.inputAudioTranscriptionModel = opts.inputAudioTranscriptionModel;
|
|
34398
|
+
this.noiseReduction = opts.noiseReduction;
|
|
34399
|
+
this.turnDetection = opts.turnDetection;
|
|
34400
|
+
this.gateResponseOnTranscript = opts.gateResponseOnTranscript;
|
|
33355
34401
|
}
|
|
33356
34402
|
};
|
|
33357
34403
|
|
|
33358
34404
|
// src/engines/openai-2.ts
|
|
33359
34405
|
init_cjs_shims();
|
|
34406
|
+
init_openai_realtime();
|
|
33360
34407
|
var Realtime2 = class {
|
|
33361
34408
|
kind = "openai_realtime_2";
|
|
33362
34409
|
apiKey;
|
|
@@ -33364,6 +34411,9 @@ var Realtime2 = class {
|
|
|
33364
34411
|
voice;
|
|
33365
34412
|
reasoningEffort;
|
|
33366
34413
|
inputAudioTranscriptionModel;
|
|
34414
|
+
noiseReduction;
|
|
34415
|
+
turnDetection;
|
|
34416
|
+
gateResponseOnTranscript;
|
|
33367
34417
|
constructor(opts = {}) {
|
|
33368
34418
|
const key = opts.apiKey ?? process.env.OPENAI_API_KEY;
|
|
33369
34419
|
if (!key) {
|
|
@@ -33371,11 +34421,20 @@ var Realtime2 = class {
|
|
|
33371
34421
|
"OpenAI Realtime 2 requires an apiKey. Pass { apiKey: 'sk-...' } or set OPENAI_API_KEY in the environment."
|
|
33372
34422
|
);
|
|
33373
34423
|
}
|
|
34424
|
+
if (opts.noiseReduction !== void 0 && opts.noiseReduction !== "near_field" && opts.noiseReduction !== "far_field") {
|
|
34425
|
+
throw new Error(
|
|
34426
|
+
`noiseReduction must be 'near_field' or 'far_field', got ${JSON.stringify(opts.noiseReduction)}`
|
|
34427
|
+
);
|
|
34428
|
+
}
|
|
34429
|
+
validateRealtimeTurnDetection(opts.turnDetection);
|
|
33374
34430
|
this.apiKey = key;
|
|
33375
34431
|
this.model = opts.model ?? "gpt-realtime-2";
|
|
33376
34432
|
this.voice = opts.voice ?? "alloy";
|
|
33377
34433
|
this.reasoningEffort = opts.reasoningEffort;
|
|
33378
34434
|
this.inputAudioTranscriptionModel = opts.inputAudioTranscriptionModel;
|
|
34435
|
+
this.noiseReduction = opts.noiseReduction;
|
|
34436
|
+
this.turnDetection = opts.turnDetection;
|
|
34437
|
+
this.gateResponseOnTranscript = opts.gateResponseOnTranscript;
|
|
33379
34438
|
}
|
|
33380
34439
|
};
|
|
33381
34440
|
|
|
@@ -33809,7 +34868,7 @@ function resolvePersistRoot(persist) {
|
|
|
33809
34868
|
if (typeof persist === "string") return resolveLogRoot(persist);
|
|
33810
34869
|
const envRoot = resolveLogRoot();
|
|
33811
34870
|
if (envRoot !== null) return envRoot;
|
|
33812
|
-
return
|
|
34871
|
+
return null;
|
|
33813
34872
|
}
|
|
33814
34873
|
function closeParkedConnections(slot) {
|
|
33815
34874
|
if (slot.stt) {
|
|
@@ -34093,7 +35152,12 @@ var Patter = class {
|
|
|
34093
35152
|
...working,
|
|
34094
35153
|
provider: "openai_realtime",
|
|
34095
35154
|
model: working.model ?? engine.model,
|
|
34096
|
-
voice: working.voice ?? engine.voice
|
|
35155
|
+
voice: working.voice ?? engine.voice,
|
|
35156
|
+
// Explicit agent() kwargs win over the engine marker value
|
|
35157
|
+
// (same precedence as Python: explicit kwarg > engine > default).
|
|
35158
|
+
openaiRealtimeNoiseReduction: working.openaiRealtimeNoiseReduction ?? engine.noiseReduction,
|
|
35159
|
+
realtimeTurnDetection: working.realtimeTurnDetection ?? engine.turnDetection,
|
|
35160
|
+
openaiRealtimeGateResponseOnTranscript: working.openaiRealtimeGateResponseOnTranscript ?? engine.gateResponseOnTranscript
|
|
34097
35161
|
};
|
|
34098
35162
|
if (!this.localConfig.openaiKey) {
|
|
34099
35163
|
this.localConfig = { ...this.localConfig, openaiKey: engine.apiKey };
|
|
@@ -34118,6 +35182,11 @@ var Patter = class {
|
|
|
34118
35182
|
throw new Error(`provider must be one of: ${valid.join(", ")}. Got: '${working.provider}'`);
|
|
34119
35183
|
}
|
|
34120
35184
|
}
|
|
35185
|
+
if (working.consult && working.provider === "elevenlabs_convai") {
|
|
35186
|
+
getLogger().warn(
|
|
35187
|
+
"consult is set but provider is ElevenLabs ConvAI; the consult tool is only injected in Realtime and Pipeline modes and will be ignored for this agent."
|
|
35188
|
+
);
|
|
35189
|
+
}
|
|
34121
35190
|
if (working.llm !== void 0) {
|
|
34122
35191
|
const llm = working.llm;
|
|
34123
35192
|
if (!llm || typeof llm.stream !== "function") {
|
|
@@ -34256,7 +35325,8 @@ var Patter = class {
|
|
|
34256
35325
|
opts.onMetrics,
|
|
34257
35326
|
opts.pricing,
|
|
34258
35327
|
opts.dashboard ?? true,
|
|
34259
|
-
opts.dashboardToken ?? ""
|
|
35328
|
+
opts.dashboardToken ?? "",
|
|
35329
|
+
opts.allowInsecureDashboard ?? false
|
|
34260
35330
|
);
|
|
34261
35331
|
this.embeddedServer.popPrewarmAudio = this.popPrewarmAudio;
|
|
34262
35332
|
this.embeddedServer.popPrewarmedConnections = this.popPrewarmedConnections;
|
|
@@ -34668,8 +35738,8 @@ var Patter = class {
|
|
|
34668
35738
|
if (!options.to) {
|
|
34669
35739
|
throw new Error("'to' phone number is required");
|
|
34670
35740
|
}
|
|
34671
|
-
if (
|
|
34672
|
-
throw new Error(
|
|
35741
|
+
if (!/^\+[1-9]\d{6,14}$/.test(options.to)) {
|
|
35742
|
+
throw new Error("'to' must be E.164 format (+<country><digits>). Got value with invalid format.");
|
|
34673
35743
|
}
|
|
34674
35744
|
if (options.wait && !this.embeddedServer) {
|
|
34675
35745
|
throw new PatterConnectionError(
|
|
@@ -34680,9 +35750,6 @@ var Patter = class {
|
|
|
34680
35750
|
let callId = "";
|
|
34681
35751
|
const effectiveRingTimeout = options.ringTimeout === void 0 ? 25 : options.ringTimeout;
|
|
34682
35752
|
const wantsAmd = options.machineDetection !== false || Boolean(options.voicemailMessage);
|
|
34683
|
-
if (this.embeddedServer) {
|
|
34684
|
-
this.embeddedServer.onMachineDetection = options.onMachineDetection;
|
|
34685
|
-
}
|
|
34686
35753
|
if (options.agent.prewarm !== false) {
|
|
34687
35754
|
this.spawnProviderWarmup(options.agent);
|
|
34688
35755
|
}
|
|
@@ -34727,6 +35794,12 @@ var Patter = class {
|
|
|
34727
35794
|
};
|
|
34728
35795
|
if (this.embeddedServer) {
|
|
34729
35796
|
this.embeddedServer.metricsStore.recordCallInitiated(initiatedPayload);
|
|
35797
|
+
if (options.onMachineDetection) {
|
|
35798
|
+
this.embeddedServer.onMachineDetectionByCallSid.set(
|
|
35799
|
+
telnyxCallId,
|
|
35800
|
+
options.onMachineDetection
|
|
35801
|
+
);
|
|
35802
|
+
}
|
|
34730
35803
|
}
|
|
34731
35804
|
try {
|
|
34732
35805
|
const { notifyDashboard: notifyDashboard2 } = await Promise.resolve().then(() => (init_persistence(), persistence_exports));
|
|
@@ -34792,6 +35865,12 @@ var Patter = class {
|
|
|
34792
35865
|
};
|
|
34793
35866
|
if (this.embeddedServer) {
|
|
34794
35867
|
this.embeddedServer.metricsStore.recordCallInitiated(initiatedPayload);
|
|
35868
|
+
if (options.onMachineDetection) {
|
|
35869
|
+
this.embeddedServer.onMachineDetectionByCallSid.set(
|
|
35870
|
+
plivoCallId,
|
|
35871
|
+
options.onMachineDetection
|
|
35872
|
+
);
|
|
35873
|
+
}
|
|
34795
35874
|
}
|
|
34796
35875
|
try {
|
|
34797
35876
|
const { notifyDashboard: notifyDashboard2 } = await Promise.resolve().then(() => (init_persistence(), persistence_exports));
|
|
@@ -34861,6 +35940,12 @@ var Patter = class {
|
|
|
34861
35940
|
};
|
|
34862
35941
|
if (this.embeddedServer) {
|
|
34863
35942
|
this.embeddedServer.metricsStore.recordCallInitiated(initiatedPayload);
|
|
35943
|
+
if (options.onMachineDetection) {
|
|
35944
|
+
this.embeddedServer.onMachineDetectionByCallSid.set(
|
|
35945
|
+
twilioCallSid,
|
|
35946
|
+
options.onMachineDetection
|
|
35947
|
+
);
|
|
35948
|
+
}
|
|
34864
35949
|
if (twilioNotificationsPath) {
|
|
34865
35950
|
getLogger().info(
|
|
34866
35951
|
`Outbound call ${twilioCallSid} placed. Twilio notifications: https://api.twilio.com${twilioNotificationsPath} (check here if the call drops with no audio).`
|
|
@@ -35144,6 +36229,7 @@ function defineTool(input) {
|
|
|
35144
36229
|
}
|
|
35145
36230
|
|
|
35146
36231
|
// src/index.ts
|
|
36232
|
+
init_consult();
|
|
35147
36233
|
init_logger();
|
|
35148
36234
|
init_sentence_chunker();
|
|
35149
36235
|
init_pipeline_hooks();
|
|
@@ -35361,8 +36447,8 @@ var FallbackLLMProvider = class {
|
|
|
35361
36447
|
* markers are filtered out so callers can concatenate the yielded strings
|
|
35362
36448
|
* directly.
|
|
35363
36449
|
*/
|
|
35364
|
-
async *completeStream(messages, tools) {
|
|
35365
|
-
for await (const chunk of this.stream(messages, tools)) {
|
|
36450
|
+
async *completeStream(messages, tools, opts) {
|
|
36451
|
+
for await (const chunk of this.stream(messages, tools, opts)) {
|
|
35366
36452
|
if (chunk.type === "text") {
|
|
35367
36453
|
yield chunk.content ?? "";
|
|
35368
36454
|
}
|
|
@@ -35372,14 +36458,15 @@ var FallbackLLMProvider = class {
|
|
|
35372
36458
|
// LLMProvider implementation
|
|
35373
36459
|
// -----------------------------------------------------------------------
|
|
35374
36460
|
/** Streaming entry point — yields chunks from the first provider that succeeds. */
|
|
35375
|
-
async *stream(messages, tools) {
|
|
36461
|
+
async *stream(messages, tools, opts) {
|
|
35376
36462
|
const errors = [];
|
|
35377
36463
|
const result = yield* this.tryProviders(
|
|
35378
36464
|
messages,
|
|
35379
36465
|
tools,
|
|
35380
36466
|
/* availableOnly */
|
|
35381
36467
|
true,
|
|
35382
|
-
errors
|
|
36468
|
+
errors,
|
|
36469
|
+
opts
|
|
35383
36470
|
);
|
|
35384
36471
|
if (result === "done") return;
|
|
35385
36472
|
getLogger().warn(
|
|
@@ -35390,7 +36477,8 @@ var FallbackLLMProvider = class {
|
|
|
35390
36477
|
tools,
|
|
35391
36478
|
/* availableOnly */
|
|
35392
36479
|
false,
|
|
35393
|
-
errors
|
|
36480
|
+
errors,
|
|
36481
|
+
opts
|
|
35394
36482
|
);
|
|
35395
36483
|
if (retryResult === "done") return;
|
|
35396
36484
|
throw new AllProvidersFailedError(
|
|
@@ -35400,7 +36488,7 @@ var FallbackLLMProvider = class {
|
|
|
35400
36488
|
// -----------------------------------------------------------------------
|
|
35401
36489
|
// Internals
|
|
35402
36490
|
// -----------------------------------------------------------------------
|
|
35403
|
-
async *tryProviders(messages, tools, availableOnly, errors) {
|
|
36491
|
+
async *tryProviders(messages, tools, availableOnly, errors, opts) {
|
|
35404
36492
|
for (let i = 0; i < this.providers.length; i++) {
|
|
35405
36493
|
if (availableOnly && !this.availability[i]) continue;
|
|
35406
36494
|
for (let attempt = 0; attempt < this.maxRetryPerProvider; attempt++) {
|
|
@@ -35409,7 +36497,7 @@ var FallbackLLMProvider = class {
|
|
|
35409
36497
|
`FallbackLLMProvider: trying provider ${i}${attempt > 0 ? ` (retry ${attempt})` : ""}`
|
|
35410
36498
|
);
|
|
35411
36499
|
let yieldedTokens = false;
|
|
35412
|
-
const gen = this.providers[i].stream(messages, tools);
|
|
36500
|
+
const gen = this.providers[i].stream(messages, tools, opts);
|
|
35413
36501
|
while (true) {
|
|
35414
36502
|
let iterResult;
|
|
35415
36503
|
try {
|
|
@@ -35523,7 +36611,7 @@ var PARAMETERS_SCHEMA = {
|
|
|
35523
36611
|
required: ["to"]
|
|
35524
36612
|
};
|
|
35525
36613
|
var DEFAULT_NAME = "make_phone_call";
|
|
35526
|
-
var
|
|
36614
|
+
var DEFAULT_DESCRIPTION2 = "Place a real outbound phone call. Returns a JSON object with the full transcript, call status, duration in seconds, and cost. Use this when the user asks you to call someone, schedule appointments by phone, or otherwise reach a human via voice.";
|
|
35527
36615
|
var PatterTool = class {
|
|
35528
36616
|
name;
|
|
35529
36617
|
description;
|
|
@@ -35532,6 +36620,11 @@ var PatterTool = class {
|
|
|
35532
36620
|
maxDurationSec;
|
|
35533
36621
|
recording;
|
|
35534
36622
|
started = false;
|
|
36623
|
+
/** Cached in-progress (or completed) start promise so concurrent execute()
|
|
36624
|
+
* callers all await the same boot sequence instead of each racing into
|
|
36625
|
+
* phone.serve(). Reset to null on failure so callers can retry after a
|
|
36626
|
+
* transient error. */
|
|
36627
|
+
startPromise = null;
|
|
35535
36628
|
constructor(opts) {
|
|
35536
36629
|
if (!opts.phone) {
|
|
35537
36630
|
throw new Error("PatterTool: `phone` (a Patter instance) is required.");
|
|
@@ -35539,7 +36632,7 @@ var PatterTool = class {
|
|
|
35539
36632
|
this.phone = opts.phone;
|
|
35540
36633
|
this.agent = opts.agent;
|
|
35541
36634
|
this.name = opts.name ?? DEFAULT_NAME;
|
|
35542
|
-
this.description = opts.description ??
|
|
36635
|
+
this.description = opts.description ?? DEFAULT_DESCRIPTION2;
|
|
35543
36636
|
this.maxDurationSec = Math.max(5, Math.min(1800, opts.maxDurationSec ?? 180));
|
|
35544
36637
|
this.recording = opts.recording ?? false;
|
|
35545
36638
|
}
|
|
@@ -35583,8 +36676,21 @@ var PatterTool = class {
|
|
|
35583
36676
|
* `serve()` provides here. No `onCallEnd` callback is wired: the SDK's own
|
|
35584
36677
|
* per-callId completion registry resolves the result, so the user's
|
|
35585
36678
|
* `onCallEnd` slot is left free.
|
|
36679
|
+
*
|
|
36680
|
+
* Idempotent and concurrency-safe: concurrent callers all await the same
|
|
36681
|
+
* in-progress boot instead of each racing into `phone.serve()`.
|
|
35586
36682
|
*/
|
|
35587
36683
|
async start() {
|
|
36684
|
+
if (this.startPromise) return this.startPromise;
|
|
36685
|
+
this.startPromise = this._doStart();
|
|
36686
|
+
try {
|
|
36687
|
+
await this.startPromise;
|
|
36688
|
+
} catch (err) {
|
|
36689
|
+
this.startPromise = null;
|
|
36690
|
+
throw err;
|
|
36691
|
+
}
|
|
36692
|
+
}
|
|
36693
|
+
async _doStart() {
|
|
35588
36694
|
if (this.started) return;
|
|
35589
36695
|
if (!this.agent) {
|
|
35590
36696
|
throw new Error(
|
|
@@ -35610,6 +36716,7 @@ var PatterTool = class {
|
|
|
35610
36716
|
}
|
|
35611
36717
|
}
|
|
35612
36718
|
this.started = false;
|
|
36719
|
+
this.startPromise = null;
|
|
35613
36720
|
}
|
|
35614
36721
|
// --- Execution ----------------------------------------------------------
|
|
35615
36722
|
/**
|
|
@@ -35981,7 +37088,8 @@ var UltravoxRealtimeAdapter = class {
|
|
|
35981
37088
|
"X-API-Key": this.apiKey,
|
|
35982
37089
|
"Content-Type": "application/json"
|
|
35983
37090
|
},
|
|
35984
|
-
body: JSON.stringify(body)
|
|
37091
|
+
body: JSON.stringify(body),
|
|
37092
|
+
signal: AbortSignal.timeout(15e3)
|
|
35985
37093
|
});
|
|
35986
37094
|
if (!resp.ok) {
|
|
35987
37095
|
const text = await resp.text().catch(() => "");
|
|
@@ -35992,12 +37100,36 @@ var UltravoxRealtimeAdapter = class {
|
|
|
35992
37100
|
this.ws = new import_ws6.default(call.joinUrl);
|
|
35993
37101
|
await new Promise((resolve2, reject) => {
|
|
35994
37102
|
const ws = this.ws;
|
|
37103
|
+
let settled = false;
|
|
37104
|
+
const timer = setTimeout(() => {
|
|
37105
|
+
if (settled) return;
|
|
37106
|
+
settled = true;
|
|
37107
|
+
ws.off("open", onOpen);
|
|
37108
|
+
ws.off("error", onError);
|
|
37109
|
+
this.ws = null;
|
|
37110
|
+
try {
|
|
37111
|
+
ws.close();
|
|
37112
|
+
} catch {
|
|
37113
|
+
}
|
|
37114
|
+
reject(new Error("Ultravox WS connect timeout"));
|
|
37115
|
+
}, 15e3);
|
|
35995
37116
|
const onOpen = () => {
|
|
37117
|
+
if (settled) return;
|
|
37118
|
+
settled = true;
|
|
37119
|
+
clearTimeout(timer);
|
|
35996
37120
|
ws.off("error", onError);
|
|
35997
37121
|
resolve2();
|
|
35998
37122
|
};
|
|
35999
37123
|
const onError = (err) => {
|
|
37124
|
+
if (settled) return;
|
|
37125
|
+
settled = true;
|
|
37126
|
+
clearTimeout(timer);
|
|
36000
37127
|
ws.off("open", onOpen);
|
|
37128
|
+
this.ws = null;
|
|
37129
|
+
try {
|
|
37130
|
+
ws.close();
|
|
37131
|
+
} catch {
|
|
37132
|
+
}
|
|
36001
37133
|
reject(err);
|
|
36002
37134
|
};
|
|
36003
37135
|
ws.once("open", onOpen);
|
|
@@ -36845,7 +37977,7 @@ var STT = class extends DeepgramSTT {
|
|
|
36845
37977
|
{
|
|
36846
37978
|
endpointingMs: opts.endpointingMs ?? 150,
|
|
36847
37979
|
utteranceEndMs: opts.utteranceEndMs === null ? null : opts.utteranceEndMs ?? 1e3,
|
|
36848
|
-
smartFormat: opts.smartFormat ??
|
|
37980
|
+
smartFormat: opts.smartFormat ?? false,
|
|
36849
37981
|
interimResults: opts.interimResults ?? true,
|
|
36850
37982
|
...opts.vadEvents !== void 0 ? { vadEvents: opts.vadEvents } : {}
|
|
36851
37983
|
}
|
|
@@ -37165,7 +38297,7 @@ var CartesiaSTT = class {
|
|
|
37165
38297
|
});
|
|
37166
38298
|
ws.once("error", (err) => {
|
|
37167
38299
|
clearTimeout(timer);
|
|
37168
|
-
reject(err);
|
|
38300
|
+
reject(new Error(`Cartesia STT park connect failed: ${describeWarmupError(err)}`));
|
|
37169
38301
|
});
|
|
37170
38302
|
});
|
|
37171
38303
|
return ws;
|
|
@@ -37521,7 +38653,7 @@ var SonioxSTT = class _SonioxSTT {
|
|
|
37521
38653
|
/** Stable pricing/dashboard key — read by stream-handler/metrics. */
|
|
37522
38654
|
static providerKey = "soniox";
|
|
37523
38655
|
ws = null;
|
|
37524
|
-
callbacks =
|
|
38656
|
+
callbacks = /* @__PURE__ */ new Set();
|
|
37525
38657
|
final = new TokenAccumulator();
|
|
37526
38658
|
keepaliveTimer = null;
|
|
37527
38659
|
apiKey;
|
|
@@ -37683,16 +38815,13 @@ var SonioxSTT = class _SonioxSTT {
|
|
|
37683
38815
|
if (audio.length === 0) return;
|
|
37684
38816
|
this.ws.send(audio);
|
|
37685
38817
|
}
|
|
37686
|
-
/** Register a transcript listener
|
|
38818
|
+
/** Register a transcript listener. */
|
|
37687
38819
|
onTranscript(callback) {
|
|
37688
|
-
|
|
37689
|
-
|
|
37690
|
-
|
|
37691
|
-
|
|
37692
|
-
|
|
37693
|
-
return;
|
|
37694
|
-
}
|
|
37695
|
-
this.callbacks.push(callback);
|
|
38820
|
+
this.callbacks.add(callback);
|
|
38821
|
+
}
|
|
38822
|
+
/** Unregister a previously registered transcript listener. */
|
|
38823
|
+
offTranscript(callback) {
|
|
38824
|
+
this.callbacks.delete(callback);
|
|
37696
38825
|
}
|
|
37697
38826
|
/** Send the empty-frame stream terminator and close the WebSocket. */
|
|
37698
38827
|
close() {
|
|
@@ -37774,12 +38903,6 @@ var VALID_DOMAINS = /* @__PURE__ */ new Set([
|
|
|
37774
38903
|
AssemblyAIDomain.GENERAL,
|
|
37775
38904
|
AssemblyAIDomain.MEDICAL_V1
|
|
37776
38905
|
]);
|
|
37777
|
-
var AssemblyAISTTNotConnectedError = class extends Error {
|
|
37778
|
-
constructor(message = "AssemblyAISTT is not connected") {
|
|
37779
|
-
super(message);
|
|
37780
|
-
this.name = "AssemblyAISTTNotConnectedError";
|
|
37781
|
-
}
|
|
37782
|
-
};
|
|
37783
38906
|
var AssemblyAISTT = class _AssemblyAISTT {
|
|
37784
38907
|
constructor(apiKey, options = {}) {
|
|
37785
38908
|
this.apiKey = apiKey;
|
|
@@ -38103,9 +39226,10 @@ var AssemblyAISTT = class _AssemblyAISTT {
|
|
|
38103
39226
|
*/
|
|
38104
39227
|
updateConfiguration(params) {
|
|
38105
39228
|
if (!this.ws || this.ws.readyState !== import_ws9.default.OPEN) {
|
|
38106
|
-
|
|
38107
|
-
"AssemblyAISTT.updateConfiguration: WebSocket is not open"
|
|
39229
|
+
getLogger().debug(
|
|
39230
|
+
"AssemblyAISTT.updateConfiguration: WebSocket is not open \u2014 dropping update (call teardown)."
|
|
38108
39231
|
);
|
|
39232
|
+
return;
|
|
38109
39233
|
}
|
|
38110
39234
|
const payload = {
|
|
38111
39235
|
type: AssemblyAIClientFrame.UPDATE_CONFIGURATION
|
|
@@ -38127,9 +39251,10 @@ var AssemblyAISTT = class _AssemblyAISTT {
|
|
|
38127
39251
|
/** Force the server to finalize the current turn (for barge-in). */
|
|
38128
39252
|
forceEndpoint() {
|
|
38129
39253
|
if (!this.ws || this.ws.readyState !== import_ws9.default.OPEN) {
|
|
38130
|
-
|
|
38131
|
-
"AssemblyAISTT.forceEndpoint: WebSocket is not open"
|
|
39254
|
+
getLogger().debug(
|
|
39255
|
+
"AssemblyAISTT.forceEndpoint: WebSocket is not open \u2014 dropping request (call teardown)."
|
|
38132
39256
|
);
|
|
39257
|
+
return;
|
|
38133
39258
|
}
|
|
38134
39259
|
this.ws.send(JSON.stringify({ type: AssemblyAIClientFrame.FORCE_ENDPOINT }));
|
|
38135
39260
|
}
|
|
@@ -38144,6 +39269,14 @@ var AssemblyAISTT = class _AssemblyAISTT {
|
|
|
38144
39269
|
async close() {
|
|
38145
39270
|
this.closing = true;
|
|
38146
39271
|
if (!this.ws) return;
|
|
39272
|
+
if (this.chunkBufferBytes > 0 && this.ws.readyState === import_ws9.default.OPEN) {
|
|
39273
|
+
try {
|
|
39274
|
+
this.ws.send(Buffer.concat(this.chunkBuffer, this.chunkBufferBytes));
|
|
39275
|
+
} catch {
|
|
39276
|
+
}
|
|
39277
|
+
this.chunkBuffer = [];
|
|
39278
|
+
this.chunkBufferBytes = 0;
|
|
39279
|
+
}
|
|
38147
39280
|
try {
|
|
38148
39281
|
this.ws.send(JSON.stringify({ type: AssemblyAIClientFrame.TERMINATE }));
|
|
38149
39282
|
} catch {
|
|
@@ -39350,7 +40483,7 @@ var TTS3 = class extends OpenAITTS {
|
|
|
39350
40483
|
opts.model ?? "gpt-4o-mini-tts",
|
|
39351
40484
|
opts.instructions ?? null,
|
|
39352
40485
|
opts.speed ?? null,
|
|
39353
|
-
opts.antiAlias ??
|
|
40486
|
+
opts.antiAlias ?? true
|
|
39354
40487
|
);
|
|
39355
40488
|
}
|
|
39356
40489
|
};
|
|
@@ -39525,7 +40658,6 @@ init_cjs_shims();
|
|
|
39525
40658
|
init_cjs_shims();
|
|
39526
40659
|
init_logger();
|
|
39527
40660
|
var INWORLD_BASE_URL = "https://api.inworld.ai/tts/v1/voice:stream";
|
|
39528
|
-
var INWORLD_VOICES_URL = "https://api.inworld.ai/tts/v1/voices";
|
|
39529
40661
|
var InworldModel = {
|
|
39530
40662
|
TTS_2: "inworld-tts-2",
|
|
39531
40663
|
TTS_1_5_MAX: "inworld-tts-1.5-max",
|
|
@@ -39614,7 +40746,8 @@ var InworldTTS = class {
|
|
|
39614
40746
|
*/
|
|
39615
40747
|
async warmup() {
|
|
39616
40748
|
try {
|
|
39617
|
-
|
|
40749
|
+
const voicesUrl = new URL(this.baseUrl).origin + "/tts/v1/voices";
|
|
40750
|
+
await fetch(voicesUrl, {
|
|
39618
40751
|
method: "GET",
|
|
39619
40752
|
headers: {
|
|
39620
40753
|
Authorization: `Basic ${this.authToken}`
|
|
@@ -39874,58 +41007,87 @@ var AnthropicLLMProvider = class {
|
|
|
39874
41007
|
const toolIndexByBlock = /* @__PURE__ */ new Map();
|
|
39875
41008
|
const toolIdByBlock = /* @__PURE__ */ new Map();
|
|
39876
41009
|
let nextIndex = 0;
|
|
39877
|
-
|
|
39878
|
-
|
|
39879
|
-
|
|
39880
|
-
|
|
39881
|
-
|
|
39882
|
-
|
|
39883
|
-
|
|
39884
|
-
|
|
39885
|
-
|
|
39886
|
-
const
|
|
39887
|
-
|
|
39888
|
-
|
|
39889
|
-
|
|
39890
|
-
|
|
39891
|
-
|
|
39892
|
-
continue;
|
|
39893
|
-
|
|
39894
|
-
|
|
39895
|
-
|
|
39896
|
-
|
|
39897
|
-
|
|
39898
|
-
|
|
39899
|
-
|
|
39900
|
-
|
|
39901
|
-
|
|
39902
|
-
|
|
39903
|
-
|
|
39904
|
-
|
|
39905
|
-
|
|
39906
|
-
|
|
39907
|
-
|
|
39908
|
-
continue;
|
|
39909
|
-
}
|
|
39910
|
-
if (event.type === "content_block_delta") {
|
|
39911
|
-
if (event.delta?.type === "text_delta" && event.delta.text) {
|
|
39912
|
-
yield { type: "text", content: event.delta.text };
|
|
41010
|
+
let inputTokens = 0;
|
|
41011
|
+
let outputTokens = 0;
|
|
41012
|
+
let cacheReadTokens = 0;
|
|
41013
|
+
let cacheWriteTokens = 0;
|
|
41014
|
+
try {
|
|
41015
|
+
while (true) {
|
|
41016
|
+
const { done, value } = await reader.read();
|
|
41017
|
+
if (done) break;
|
|
41018
|
+
buffer += decoder.decode(value, { stream: true });
|
|
41019
|
+
const lines = buffer.split("\n");
|
|
41020
|
+
buffer = lines.pop() || "";
|
|
41021
|
+
for (const line of lines) {
|
|
41022
|
+
const trimmed = line.trim();
|
|
41023
|
+
if (!trimmed.startsWith("data: ")) continue;
|
|
41024
|
+
const data = trimmed.slice(6);
|
|
41025
|
+
if (!data || data === "[DONE]") continue;
|
|
41026
|
+
let event;
|
|
41027
|
+
try {
|
|
41028
|
+
event = JSON.parse(data);
|
|
41029
|
+
} catch {
|
|
41030
|
+
continue;
|
|
41031
|
+
}
|
|
41032
|
+
if (event.type === "message_start" && event.message?.usage) {
|
|
41033
|
+
const u = event.message.usage;
|
|
41034
|
+
if (u.input_tokens) inputTokens = u.input_tokens;
|
|
41035
|
+
if (u.cache_creation_input_tokens) cacheWriteTokens = u.cache_creation_input_tokens;
|
|
41036
|
+
if (u.cache_read_input_tokens) cacheReadTokens = u.cache_read_input_tokens;
|
|
41037
|
+
continue;
|
|
41038
|
+
}
|
|
41039
|
+
if (event.type === "message_delta" && event.usage?.output_tokens) {
|
|
41040
|
+
outputTokens = event.usage.output_tokens;
|
|
39913
41041
|
continue;
|
|
39914
41042
|
}
|
|
39915
|
-
if (event.
|
|
41043
|
+
if (event.type === "content_block_start" && event.content_block?.type === "tool_use") {
|
|
39916
41044
|
const blockIdx = event.index ?? 0;
|
|
39917
|
-
const
|
|
39918
|
-
|
|
39919
|
-
|
|
39920
|
-
|
|
39921
|
-
|
|
39922
|
-
|
|
39923
|
-
|
|
39924
|
-
|
|
41045
|
+
const toolId = event.content_block.id ?? "";
|
|
41046
|
+
const toolName = event.content_block.name ?? "";
|
|
41047
|
+
const patterIndex = nextIndex++;
|
|
41048
|
+
toolIndexByBlock.set(blockIdx, patterIndex);
|
|
41049
|
+
toolIdByBlock.set(blockIdx, toolId);
|
|
41050
|
+
yield {
|
|
41051
|
+
type: "tool_call",
|
|
41052
|
+
index: patterIndex,
|
|
41053
|
+
id: toolId,
|
|
41054
|
+
name: toolName,
|
|
41055
|
+
arguments: ""
|
|
41056
|
+
};
|
|
41057
|
+
continue;
|
|
41058
|
+
}
|
|
41059
|
+
if (event.type === "content_block_delta") {
|
|
41060
|
+
if (event.delta?.type === "text_delta" && event.delta.text) {
|
|
41061
|
+
yield { type: "text", content: event.delta.text };
|
|
41062
|
+
continue;
|
|
41063
|
+
}
|
|
41064
|
+
if (event.delta?.type === "input_json_delta" && event.delta.partial_json) {
|
|
41065
|
+
const blockIdx = event.index ?? 0;
|
|
41066
|
+
const patterIndex = toolIndexByBlock.get(blockIdx);
|
|
41067
|
+
if (patterIndex !== void 0) {
|
|
41068
|
+
yield {
|
|
41069
|
+
type: "tool_call",
|
|
41070
|
+
index: patterIndex,
|
|
41071
|
+
id: toolIdByBlock.get(blockIdx),
|
|
41072
|
+
arguments: event.delta.partial_json
|
|
41073
|
+
};
|
|
41074
|
+
}
|
|
39925
41075
|
}
|
|
39926
41076
|
}
|
|
39927
41077
|
}
|
|
39928
41078
|
}
|
|
41079
|
+
} finally {
|
|
41080
|
+
reader.cancel().catch(() => {
|
|
41081
|
+
});
|
|
41082
|
+
}
|
|
41083
|
+
if (inputTokens > 0 || outputTokens > 0 || cacheReadTokens > 0 || cacheWriteTokens > 0) {
|
|
41084
|
+
yield {
|
|
41085
|
+
type: "usage",
|
|
41086
|
+
inputTokens,
|
|
41087
|
+
outputTokens,
|
|
41088
|
+
cacheReadInputTokens: cacheReadTokens,
|
|
41089
|
+
cacheWriteInputTokens: cacheWriteTokens
|
|
41090
|
+
};
|
|
39929
41091
|
}
|
|
39930
41092
|
yield { type: "done" };
|
|
39931
41093
|
}
|
|
@@ -39985,16 +41147,17 @@ function toAnthropicMessages(messages) {
|
|
|
39985
41147
|
}
|
|
39986
41148
|
if (role === "tool") {
|
|
39987
41149
|
const contentStr = typeof rawMsg.content === "string" ? rawMsg.content : JSON.stringify(rawMsg.content);
|
|
39988
|
-
|
|
39989
|
-
|
|
39990
|
-
|
|
39991
|
-
|
|
39992
|
-
|
|
39993
|
-
|
|
39994
|
-
|
|
39995
|
-
|
|
39996
|
-
|
|
39997
|
-
|
|
41150
|
+
const toolResultBlock = {
|
|
41151
|
+
type: "tool_result",
|
|
41152
|
+
tool_use_id: rawMsg.tool_call_id ?? "",
|
|
41153
|
+
content: contentStr
|
|
41154
|
+
};
|
|
41155
|
+
const prev = out.length > 0 ? out[out.length - 1] : void 0;
|
|
41156
|
+
if (prev && prev.role === "user" && Array.isArray(prev.content) && prev.content.length > 0 && prev.content.every((b) => b["type"] === "tool_result")) {
|
|
41157
|
+
prev.content.push(toolResultBlock);
|
|
41158
|
+
} else {
|
|
41159
|
+
out.push({ role: "user", content: [toolResultBlock] });
|
|
41160
|
+
}
|
|
39998
41161
|
continue;
|
|
39999
41162
|
}
|
|
40000
41163
|
}
|
|
@@ -40137,50 +41300,55 @@ async function* parseOpenAISseStream(response) {
|
|
|
40137
41300
|
if (!reader) return;
|
|
40138
41301
|
const decoder = new TextDecoder();
|
|
40139
41302
|
let buffer = "";
|
|
40140
|
-
|
|
40141
|
-
|
|
40142
|
-
|
|
40143
|
-
|
|
40144
|
-
|
|
40145
|
-
|
|
40146
|
-
|
|
40147
|
-
const
|
|
40148
|
-
|
|
40149
|
-
|
|
40150
|
-
|
|
40151
|
-
|
|
40152
|
-
|
|
40153
|
-
|
|
40154
|
-
|
|
40155
|
-
|
|
40156
|
-
|
|
40157
|
-
|
|
40158
|
-
|
|
40159
|
-
|
|
40160
|
-
|
|
40161
|
-
type: "usage",
|
|
40162
|
-
inputTokens: usage.prompt_tokens,
|
|
40163
|
-
outputTokens: usage.completion_tokens,
|
|
40164
|
-
cacheReadInputTokens: cached2
|
|
40165
|
-
};
|
|
40166
|
-
}
|
|
40167
|
-
const delta = chunk.choices?.[0]?.delta;
|
|
40168
|
-
if (!delta) continue;
|
|
40169
|
-
if (delta.content) {
|
|
40170
|
-
yield { type: "text", content: delta.content };
|
|
40171
|
-
}
|
|
40172
|
-
if (delta.tool_calls) {
|
|
40173
|
-
for (const tc of delta.tool_calls) {
|
|
41303
|
+
try {
|
|
41304
|
+
while (true) {
|
|
41305
|
+
const { done, value } = await reader.read();
|
|
41306
|
+
if (done) break;
|
|
41307
|
+
buffer += decoder.decode(value, { stream: true });
|
|
41308
|
+
const lines = buffer.split("\n");
|
|
41309
|
+
buffer = lines.pop() || "";
|
|
41310
|
+
for (const line of lines) {
|
|
41311
|
+
const trimmed = line.trim();
|
|
41312
|
+
if (!trimmed || !trimmed.startsWith("data: ")) continue;
|
|
41313
|
+
const data = trimmed.slice(6);
|
|
41314
|
+
if (data === "[DONE]") continue;
|
|
41315
|
+
let chunk;
|
|
41316
|
+
try {
|
|
41317
|
+
chunk = JSON.parse(data);
|
|
41318
|
+
} catch {
|
|
41319
|
+
continue;
|
|
41320
|
+
}
|
|
41321
|
+
const usage = chunk.usage ?? chunk.x_groq?.usage;
|
|
41322
|
+
if (usage) {
|
|
41323
|
+
const cached2 = chunk.usage?.prompt_tokens_details?.cached_tokens ?? 0;
|
|
40174
41324
|
yield {
|
|
40175
|
-
type: "
|
|
40176
|
-
|
|
40177
|
-
|
|
40178
|
-
|
|
40179
|
-
arguments: tc.function?.arguments
|
|
41325
|
+
type: "usage",
|
|
41326
|
+
inputTokens: usage.prompt_tokens,
|
|
41327
|
+
outputTokens: usage.completion_tokens,
|
|
41328
|
+
cacheReadInputTokens: cached2
|
|
40180
41329
|
};
|
|
40181
41330
|
}
|
|
41331
|
+
const delta = chunk.choices?.[0]?.delta;
|
|
41332
|
+
if (!delta) continue;
|
|
41333
|
+
if (delta.content) {
|
|
41334
|
+
yield { type: "text", content: delta.content };
|
|
41335
|
+
}
|
|
41336
|
+
if (delta.tool_calls) {
|
|
41337
|
+
for (const tc of delta.tool_calls) {
|
|
41338
|
+
yield {
|
|
41339
|
+
type: "tool_call",
|
|
41340
|
+
index: tc.index,
|
|
41341
|
+
id: tc.id,
|
|
41342
|
+
name: tc.function?.name,
|
|
41343
|
+
arguments: tc.function?.arguments
|
|
41344
|
+
};
|
|
41345
|
+
}
|
|
41346
|
+
}
|
|
40182
41347
|
}
|
|
40183
41348
|
}
|
|
41349
|
+
} finally {
|
|
41350
|
+
reader.cancel().catch(() => {
|
|
41351
|
+
});
|
|
40184
41352
|
}
|
|
40185
41353
|
}
|
|
40186
41354
|
|
|
@@ -40349,11 +41517,21 @@ var CerebrasLLMProvider = class {
|
|
|
40349
41517
|
}
|
|
40350
41518
|
const advisoryMs = parseRateLimitResetMs(response.headers);
|
|
40351
41519
|
const exponentialMs = RETRY_BACKOFF_BASE_MS * Math.pow(2, attempt);
|
|
40352
|
-
const delayMs = Math.max(advisoryMs, exponentialMs);
|
|
41520
|
+
const delayMs = Math.min(5e3, Math.max(advisoryMs, exponentialMs));
|
|
40353
41521
|
getLogger().warn(
|
|
40354
41522
|
`Cerebras API ${response.status} (attempt ${attempt + 1}/${maxAttempts}); retrying after ${delayMs}ms`
|
|
40355
41523
|
);
|
|
40356
|
-
await new Promise((
|
|
41524
|
+
await new Promise((resolve2, reject) => {
|
|
41525
|
+
const t = setTimeout(resolve2, delayMs);
|
|
41526
|
+
opts?.signal?.addEventListener(
|
|
41527
|
+
"abort",
|
|
41528
|
+
() => {
|
|
41529
|
+
clearTimeout(t);
|
|
41530
|
+
reject(opts.signal.reason);
|
|
41531
|
+
},
|
|
41532
|
+
{ once: true }
|
|
41533
|
+
);
|
|
41534
|
+
});
|
|
40357
41535
|
}
|
|
40358
41536
|
throw new PatterError(`Cerebras API error ${lastStatus}: ${lastErrText || "request failed"}`);
|
|
40359
41537
|
}
|
|
@@ -40516,47 +41694,52 @@ var GoogleLLMProvider = class {
|
|
|
40516
41694
|
let buffer = "";
|
|
40517
41695
|
let nextIndex = 0;
|
|
40518
41696
|
let lastUsage;
|
|
40519
|
-
|
|
40520
|
-
|
|
40521
|
-
|
|
40522
|
-
|
|
40523
|
-
|
|
40524
|
-
|
|
40525
|
-
|
|
40526
|
-
const
|
|
40527
|
-
|
|
40528
|
-
|
|
40529
|
-
|
|
40530
|
-
|
|
40531
|
-
|
|
40532
|
-
|
|
40533
|
-
|
|
40534
|
-
|
|
40535
|
-
}
|
|
40536
|
-
if (payload.usageMetadata) {
|
|
40537
|
-
lastUsage = payload.usageMetadata;
|
|
40538
|
-
}
|
|
40539
|
-
const candidate = payload.candidates?.[0];
|
|
40540
|
-
const parts = candidate?.content?.parts ?? [];
|
|
40541
|
-
for (const part of parts) {
|
|
40542
|
-
if (part.functionCall) {
|
|
40543
|
-
const args = part.functionCall.args ?? {};
|
|
40544
|
-
const callId = part.functionCall.id ?? `gemini_call_${nextIndex}`;
|
|
40545
|
-
yield {
|
|
40546
|
-
type: "tool_call",
|
|
40547
|
-
index: nextIndex,
|
|
40548
|
-
id: callId,
|
|
40549
|
-
name: part.functionCall.name ?? "",
|
|
40550
|
-
arguments: JSON.stringify(args)
|
|
40551
|
-
};
|
|
40552
|
-
nextIndex++;
|
|
41697
|
+
try {
|
|
41698
|
+
while (true) {
|
|
41699
|
+
const { done, value } = await reader.read();
|
|
41700
|
+
if (done) break;
|
|
41701
|
+
buffer += decoder.decode(value, { stream: true });
|
|
41702
|
+
const lines = buffer.split("\n");
|
|
41703
|
+
buffer = lines.pop() || "";
|
|
41704
|
+
for (const line of lines) {
|
|
41705
|
+
const trimmed = line.trim();
|
|
41706
|
+
if (!trimmed.startsWith("data: ")) continue;
|
|
41707
|
+
const data = trimmed.slice(6);
|
|
41708
|
+
if (!data) continue;
|
|
41709
|
+
let payload;
|
|
41710
|
+
try {
|
|
41711
|
+
payload = JSON.parse(data);
|
|
41712
|
+
} catch {
|
|
40553
41713
|
continue;
|
|
40554
41714
|
}
|
|
40555
|
-
if (
|
|
40556
|
-
|
|
41715
|
+
if (payload.usageMetadata) {
|
|
41716
|
+
lastUsage = payload.usageMetadata;
|
|
41717
|
+
}
|
|
41718
|
+
const candidate = payload.candidates?.[0];
|
|
41719
|
+
const parts = candidate?.content?.parts ?? [];
|
|
41720
|
+
for (const part of parts) {
|
|
41721
|
+
if (part.functionCall) {
|
|
41722
|
+
const args = part.functionCall.args ?? {};
|
|
41723
|
+
const callId = part.functionCall.id ?? `gemini_call_${nextIndex}`;
|
|
41724
|
+
yield {
|
|
41725
|
+
type: "tool_call",
|
|
41726
|
+
index: nextIndex,
|
|
41727
|
+
id: callId,
|
|
41728
|
+
name: part.functionCall.name ?? "",
|
|
41729
|
+
arguments: JSON.stringify(args)
|
|
41730
|
+
};
|
|
41731
|
+
nextIndex++;
|
|
41732
|
+
continue;
|
|
41733
|
+
}
|
|
41734
|
+
if (part.text) {
|
|
41735
|
+
yield { type: "text", content: part.text };
|
|
41736
|
+
}
|
|
40557
41737
|
}
|
|
40558
41738
|
}
|
|
40559
41739
|
}
|
|
41740
|
+
} finally {
|
|
41741
|
+
reader.cancel().catch(() => {
|
|
41742
|
+
});
|
|
40560
41743
|
}
|
|
40561
41744
|
if (lastUsage) {
|
|
40562
41745
|
yield {
|
|
@@ -40650,7 +41833,17 @@ function toGeminiContents(messages) {
|
|
|
40650
41833
|
continue;
|
|
40651
41834
|
}
|
|
40652
41835
|
}
|
|
40653
|
-
|
|
41836
|
+
const merged = [];
|
|
41837
|
+
for (const entry of contents) {
|
|
41838
|
+
const prev = merged[merged.length - 1];
|
|
41839
|
+
const isFunctionResponseOnly = (c) => c.role === "user" && c.parts.every((p) => p.functionResponse !== void 0);
|
|
41840
|
+
if (prev && isFunctionResponseOnly(prev) && isFunctionResponseOnly(entry)) {
|
|
41841
|
+
prev.parts.push(...entry.parts);
|
|
41842
|
+
} else {
|
|
41843
|
+
merged.push(entry);
|
|
41844
|
+
}
|
|
41845
|
+
}
|
|
41846
|
+
return { systemInstruction: systemParts.join("\n\n"), contents: merged };
|
|
40654
41847
|
}
|
|
40655
41848
|
|
|
40656
41849
|
// src/llm/google.ts
|
|
@@ -40673,13 +41866,270 @@ var LLM5 = class extends GoogleLLMProvider {
|
|
|
40673
41866
|
}
|
|
40674
41867
|
};
|
|
40675
41868
|
|
|
41869
|
+
// src/llm/openai-compatible.ts
|
|
41870
|
+
init_cjs_shims();
|
|
41871
|
+
init_llm_loop();
|
|
41872
|
+
init_errors();
|
|
41873
|
+
init_logger();
|
|
41874
|
+
init_version();
|
|
41875
|
+
var DEFAULT_TIMEOUT_S = 60;
|
|
41876
|
+
var OpenAICompatibleLLMProvider = class {
|
|
41877
|
+
/**
|
|
41878
|
+
* Stable pricing/dashboard key — read by stream-handler/metrics. Typed as
|
|
41879
|
+
* ``string`` (not the narrowed literal) so the Hermes / OpenClaw presets can
|
|
41880
|
+
* override it with their own key while still extending this class.
|
|
41881
|
+
*/
|
|
41882
|
+
static providerKey = "openai_compatible";
|
|
41883
|
+
/** Resolved bearer; undefined for keyless gateways. */
|
|
41884
|
+
apiKey;
|
|
41885
|
+
model;
|
|
41886
|
+
baseUrl;
|
|
41887
|
+
timeoutMs;
|
|
41888
|
+
extraHeaders;
|
|
41889
|
+
sessionUserPrefix;
|
|
41890
|
+
sessionIdHeader;
|
|
41891
|
+
sessionIdPrefix;
|
|
41892
|
+
sessionKeyHeader;
|
|
41893
|
+
sessionKey;
|
|
41894
|
+
temperature;
|
|
41895
|
+
maxTokens;
|
|
41896
|
+
responseFormat;
|
|
41897
|
+
parallelToolCalls;
|
|
41898
|
+
toolChoice;
|
|
41899
|
+
seed;
|
|
41900
|
+
topP;
|
|
41901
|
+
frequencyPenalty;
|
|
41902
|
+
presencePenalty;
|
|
41903
|
+
stop;
|
|
41904
|
+
constructor(options) {
|
|
41905
|
+
if (!options.baseUrl) {
|
|
41906
|
+
throw new Error(
|
|
41907
|
+
'OpenAICompatibleLLMProvider requires a baseUrl (e.g. "http://127.0.0.1:11434/v1").'
|
|
41908
|
+
);
|
|
41909
|
+
}
|
|
41910
|
+
if (!options.model) {
|
|
41911
|
+
throw new Error("OpenAICompatibleLLMProvider requires a model.");
|
|
41912
|
+
}
|
|
41913
|
+
this.apiKey = options.apiKey ?? (options.apiKeyEnv ? process.env[options.apiKeyEnv] : void 0);
|
|
41914
|
+
this.model = options.model;
|
|
41915
|
+
this.baseUrl = options.baseUrl;
|
|
41916
|
+
this.timeoutMs = (options.timeout ?? DEFAULT_TIMEOUT_S) * 1e3;
|
|
41917
|
+
this.extraHeaders = options.extraHeaders;
|
|
41918
|
+
this.sessionUserPrefix = options.sessionUserPrefix;
|
|
41919
|
+
this.sessionIdHeader = options.sessionIdHeader;
|
|
41920
|
+
this.sessionIdPrefix = options.sessionIdPrefix;
|
|
41921
|
+
this.sessionKeyHeader = options.sessionKeyHeader;
|
|
41922
|
+
this.sessionKey = options.sessionKey;
|
|
41923
|
+
this.temperature = options.temperature;
|
|
41924
|
+
this.maxTokens = options.maxTokens;
|
|
41925
|
+
this.responseFormat = options.responseFormat;
|
|
41926
|
+
this.parallelToolCalls = options.parallelToolCalls;
|
|
41927
|
+
this.toolChoice = options.toolChoice;
|
|
41928
|
+
this.seed = options.seed;
|
|
41929
|
+
this.topP = options.topP;
|
|
41930
|
+
this.frequencyPenalty = options.frequencyPenalty;
|
|
41931
|
+
this.presencePenalty = options.presencePenalty;
|
|
41932
|
+
this.stop = options.stop;
|
|
41933
|
+
}
|
|
41934
|
+
/**
|
|
41935
|
+
* Assemble the request headers. ``User-Agent`` is set first so any
|
|
41936
|
+
* ``extraHeaders`` (and the per-call session headers) layer on top without
|
|
41937
|
+
* silently dropping the SDK attribution, and the ``Authorization`` header is
|
|
41938
|
+
* only added when a key is present (keyless gateways omit it).
|
|
41939
|
+
*
|
|
41940
|
+
* The two session headers are emitted INDEPENDENTLY, each gated on its own
|
|
41941
|
+
* config (decoupled from ``sessionUserPrefix`` and from each other):
|
|
41942
|
+
* - ``sessionIdHeader`` (+ ``callId``) → ``` `${sessionIdPrefix}${callId}` ```
|
|
41943
|
+
* - ``sessionKeyHeader`` (+ ``sessionKey``) → the static ``sessionKey`` value.
|
|
41944
|
+
* ``sessionKey`` is a credential-grade memory scope and is never logged.
|
|
41945
|
+
*/
|
|
41946
|
+
buildHeaders(callId) {
|
|
41947
|
+
const headers = {
|
|
41948
|
+
"Content-Type": "application/json",
|
|
41949
|
+
"User-Agent": `getpatter/${VERSION}`,
|
|
41950
|
+
...this.extraHeaders ?? {}
|
|
41951
|
+
};
|
|
41952
|
+
if (this.apiKey) {
|
|
41953
|
+
headers.Authorization = `Bearer ${this.apiKey}`;
|
|
41954
|
+
}
|
|
41955
|
+
if (this.sessionIdHeader && callId) {
|
|
41956
|
+
headers[this.sessionIdHeader] = `${this.sessionIdPrefix ?? ""}${callId}`;
|
|
41957
|
+
}
|
|
41958
|
+
if (this.sessionKeyHeader && this.sessionKey) {
|
|
41959
|
+
headers[this.sessionKeyHeader] = this.sessionKey;
|
|
41960
|
+
}
|
|
41961
|
+
return headers;
|
|
41962
|
+
}
|
|
41963
|
+
/**
|
|
41964
|
+
* Pre-call DNS / TLS warmup for the configured endpoint. Best-effort:
|
|
41965
|
+
* 5 s timeout, all exceptions swallowed at debug level. The ``Authorization``
|
|
41966
|
+
* header is only sent when a key is present so the operator-grade bearer is
|
|
41967
|
+
* never echoed for keyless gateways (and the key is never logged).
|
|
41968
|
+
*/
|
|
41969
|
+
async warmup() {
|
|
41970
|
+
try {
|
|
41971
|
+
const headers = {};
|
|
41972
|
+
if (this.apiKey) headers.Authorization = `Bearer ${this.apiKey}`;
|
|
41973
|
+
await fetch(`${this.baseUrl}/models`, {
|
|
41974
|
+
method: "GET",
|
|
41975
|
+
headers,
|
|
41976
|
+
signal: AbortSignal.timeout(5e3)
|
|
41977
|
+
});
|
|
41978
|
+
} catch (err) {
|
|
41979
|
+
getLogger().debug(
|
|
41980
|
+
`OpenAI-compatible LLM warmup failed (best-effort): ${String(err)}`
|
|
41981
|
+
);
|
|
41982
|
+
}
|
|
41983
|
+
}
|
|
41984
|
+
/**
|
|
41985
|
+
* Build the request body. Mirrors the base OpenAI provider's sampling-kwarg
|
|
41986
|
+
* assembly and additionally sets ``user`` for session continuity when
|
|
41987
|
+
* ``sessionUserPrefix`` is set AND a ``callId`` is available — so the default
|
|
41988
|
+
* (prefix unset) behaviour is byte-identical to the base provider.
|
|
41989
|
+
*/
|
|
41990
|
+
buildBody(messages, tools, callId) {
|
|
41991
|
+
const body = {
|
|
41992
|
+
model: this.model,
|
|
41993
|
+
messages,
|
|
41994
|
+
stream: true,
|
|
41995
|
+
stream_options: { include_usage: true }
|
|
41996
|
+
};
|
|
41997
|
+
if (this.temperature !== void 0) body.temperature = this.temperature;
|
|
41998
|
+
if (this.maxTokens !== void 0) body.max_completion_tokens = this.maxTokens;
|
|
41999
|
+
if (this.responseFormat !== void 0) body.response_format = this.responseFormat;
|
|
42000
|
+
if (this.parallelToolCalls !== void 0) body.parallel_tool_calls = this.parallelToolCalls;
|
|
42001
|
+
if (this.toolChoice !== void 0) body.tool_choice = this.toolChoice;
|
|
42002
|
+
if (this.seed !== void 0) body.seed = this.seed;
|
|
42003
|
+
if (this.topP !== void 0) body.top_p = this.topP;
|
|
42004
|
+
if (this.frequencyPenalty !== void 0) body.frequency_penalty = this.frequencyPenalty;
|
|
42005
|
+
if (this.presencePenalty !== void 0) body.presence_penalty = this.presencePenalty;
|
|
42006
|
+
if (this.stop !== void 0) body.stop = this.stop;
|
|
42007
|
+
if (tools) body.tools = tools;
|
|
42008
|
+
if (this.sessionUserPrefix !== void 0 && callId) {
|
|
42009
|
+
body.user = `${this.sessionUserPrefix}${callId}`;
|
|
42010
|
+
}
|
|
42011
|
+
return body;
|
|
42012
|
+
}
|
|
42013
|
+
/** Stream Patter-format LLM chunks from the configured chat completions API. */
|
|
42014
|
+
async *stream(messages, tools, opts) {
|
|
42015
|
+
const callId = opts?.callId;
|
|
42016
|
+
const body = this.buildBody(messages, tools, callId);
|
|
42017
|
+
const response = await fetch(`${this.baseUrl}/chat/completions`, {
|
|
42018
|
+
method: "POST",
|
|
42019
|
+
headers: this.buildHeaders(callId),
|
|
42020
|
+
body: JSON.stringify(body),
|
|
42021
|
+
signal: mergeAbortSignals(opts?.signal, AbortSignal.timeout(this.timeoutMs))
|
|
42022
|
+
});
|
|
42023
|
+
if (!response.ok) {
|
|
42024
|
+
const errText = await response.text();
|
|
42025
|
+
getLogger().error(
|
|
42026
|
+
`OpenAI-compatible API error: ${response.status} ${errText}`
|
|
42027
|
+
);
|
|
42028
|
+
throw new PatterConnectionError(
|
|
42029
|
+
`LLM API returned ${response.status}: ${errText.slice(0, 200)}`
|
|
42030
|
+
);
|
|
42031
|
+
}
|
|
42032
|
+
yield* parseOpenAISseStream(response);
|
|
42033
|
+
}
|
|
42034
|
+
};
|
|
42035
|
+
var LLM6 = class extends OpenAICompatibleLLMProvider {
|
|
42036
|
+
static providerKey = "openai_compatible";
|
|
42037
|
+
};
|
|
42038
|
+
|
|
42039
|
+
// src/llm/hermes.ts
|
|
42040
|
+
init_cjs_shims();
|
|
42041
|
+
var BASE_URL = "http://127.0.0.1:8642/v1";
|
|
42042
|
+
var DEFAULT_MODEL5 = "hermes-agent";
|
|
42043
|
+
var API_KEY_ENV = "API_SERVER_KEY";
|
|
42044
|
+
var MODEL_ENV = "API_SERVER_MODEL_NAME";
|
|
42045
|
+
var SESSION_USER_PREFIX = "patter-call-";
|
|
42046
|
+
var SESSION_ID_HEADER = "X-Hermes-Session-Id";
|
|
42047
|
+
var SESSION_ID_PREFIX = "patter-call-";
|
|
42048
|
+
var SESSION_KEY_HEADER = "X-Hermes-Session-Key";
|
|
42049
|
+
var DEFAULT_TIMEOUT_S2 = 120;
|
|
42050
|
+
var LLM7 = class extends OpenAICompatibleLLMProvider {
|
|
42051
|
+
static providerKey = "hermes";
|
|
42052
|
+
constructor(opts = {}) {
|
|
42053
|
+
const model = opts.model ?? process.env[MODEL_ENV] ?? DEFAULT_MODEL5;
|
|
42054
|
+
const options = {
|
|
42055
|
+
apiKey: opts.apiKey,
|
|
42056
|
+
apiKeyEnv: API_KEY_ENV,
|
|
42057
|
+
baseUrl: opts.baseUrl ?? BASE_URL,
|
|
42058
|
+
model,
|
|
42059
|
+
timeout: opts.timeout ?? DEFAULT_TIMEOUT_S2,
|
|
42060
|
+
sessionUserPrefix: SESSION_USER_PREFIX,
|
|
42061
|
+
sessionIdHeader: SESSION_ID_HEADER,
|
|
42062
|
+
sessionIdPrefix: SESSION_ID_PREFIX,
|
|
42063
|
+
sessionKeyHeader: SESSION_KEY_HEADER,
|
|
42064
|
+
sessionKey: opts.sessionKey,
|
|
42065
|
+
extraHeaders: opts.extraHeaders,
|
|
42066
|
+
temperature: opts.temperature,
|
|
42067
|
+
maxTokens: opts.maxTokens,
|
|
42068
|
+
responseFormat: opts.responseFormat,
|
|
42069
|
+
parallelToolCalls: opts.parallelToolCalls,
|
|
42070
|
+
toolChoice: opts.toolChoice,
|
|
42071
|
+
seed: opts.seed,
|
|
42072
|
+
topP: opts.topP,
|
|
42073
|
+
frequencyPenalty: opts.frequencyPenalty,
|
|
42074
|
+
presencePenalty: opts.presencePenalty,
|
|
42075
|
+
stop: opts.stop
|
|
42076
|
+
};
|
|
42077
|
+
super(options);
|
|
42078
|
+
}
|
|
42079
|
+
};
|
|
42080
|
+
|
|
42081
|
+
// src/llm/openclaw.ts
|
|
42082
|
+
init_cjs_shims();
|
|
42083
|
+
var BASE_URL2 = "http://127.0.0.1:18789/v1";
|
|
42084
|
+
var API_KEY_ENV2 = "OPENCLAW_API_KEY";
|
|
42085
|
+
var SESSION_HEADER = "x-openclaw-session-key";
|
|
42086
|
+
var SESSION_USER_PREFIX2 = "patter-call-";
|
|
42087
|
+
var DEFAULT_TIMEOUT_S3 = 120;
|
|
42088
|
+
var OPENCLAW_AGENT_RE2 = /^[A-Za-z0-9._:/-]+$/;
|
|
42089
|
+
var LLM8 = class extends OpenAICompatibleLLMProvider {
|
|
42090
|
+
static providerKey = "openclaw";
|
|
42091
|
+
constructor(opts) {
|
|
42092
|
+
const agent = opts?.agent;
|
|
42093
|
+
if (!agent || !OPENCLAW_AGENT_RE2.test(agent)) {
|
|
42094
|
+
throw new Error(
|
|
42095
|
+
`Invalid OpenClaw agent id: ${JSON.stringify(agent)}. Allowed characters: letters, digits, dot, underscore, colon, slash, dash.`
|
|
42096
|
+
);
|
|
42097
|
+
}
|
|
42098
|
+
const model = agent.includes("/") || agent.includes(":") ? agent : `openclaw/${agent}`;
|
|
42099
|
+
const options = {
|
|
42100
|
+
apiKey: opts.apiKey,
|
|
42101
|
+
apiKeyEnv: API_KEY_ENV2,
|
|
42102
|
+
baseUrl: opts.baseUrl ?? BASE_URL2,
|
|
42103
|
+
model,
|
|
42104
|
+
timeout: opts.timeout ?? DEFAULT_TIMEOUT_S3,
|
|
42105
|
+
sessionUserPrefix: SESSION_USER_PREFIX2,
|
|
42106
|
+
// Wire-identical to the prior behaviour: header value is the raw call id
|
|
42107
|
+
// (empty prefix), and OpenClaw's gateway also derives the session from
|
|
42108
|
+
// the ``user`` field above. No separate memory-scope header.
|
|
42109
|
+
sessionIdHeader: SESSION_HEADER,
|
|
42110
|
+
sessionIdPrefix: "",
|
|
42111
|
+
extraHeaders: opts.extraHeaders,
|
|
42112
|
+
temperature: opts.temperature,
|
|
42113
|
+
maxTokens: opts.maxTokens,
|
|
42114
|
+
responseFormat: opts.responseFormat,
|
|
42115
|
+
parallelToolCalls: opts.parallelToolCalls,
|
|
42116
|
+
toolChoice: opts.toolChoice,
|
|
42117
|
+
seed: opts.seed,
|
|
42118
|
+
topP: opts.topP,
|
|
42119
|
+
frequencyPenalty: opts.frequencyPenalty,
|
|
42120
|
+
presencePenalty: opts.presencePenalty,
|
|
42121
|
+
stop: opts.stop
|
|
42122
|
+
};
|
|
42123
|
+
super(options);
|
|
42124
|
+
}
|
|
42125
|
+
};
|
|
42126
|
+
|
|
40676
42127
|
// src/index.ts
|
|
40677
42128
|
init_silero_vad();
|
|
40678
42129
|
|
|
40679
42130
|
// src/providers/deepfilternet-filter.ts
|
|
40680
42131
|
init_cjs_shims();
|
|
40681
42132
|
init_logger();
|
|
40682
|
-
init_transcoding();
|
|
40683
42133
|
function log2() {
|
|
40684
42134
|
return getLogger();
|
|
40685
42135
|
}
|
|
@@ -40709,6 +42159,57 @@ function float32ToPcm16(samples) {
|
|
|
40709
42159
|
}
|
|
40710
42160
|
return out;
|
|
40711
42161
|
}
|
|
42162
|
+
var ArbitraryResampler = class {
|
|
42163
|
+
srcRate;
|
|
42164
|
+
dstRate;
|
|
42165
|
+
phase = 0;
|
|
42166
|
+
// fractional position into the current chunk
|
|
42167
|
+
lastSample = 0;
|
|
42168
|
+
// last input sample from the previous chunk
|
|
42169
|
+
hasHistory = false;
|
|
42170
|
+
constructor(srcRate, dstRate) {
|
|
42171
|
+
this.srcRate = srcRate;
|
|
42172
|
+
this.dstRate = dstRate;
|
|
42173
|
+
}
|
|
42174
|
+
/** Process a chunk of PCM16-LE mono audio and return resampled PCM16-LE. */
|
|
42175
|
+
process(pcm) {
|
|
42176
|
+
const sampleCount = Math.floor(pcm.length / 2);
|
|
42177
|
+
if (sampleCount === 0) return Buffer.alloc(0);
|
|
42178
|
+
const step = this.srcRate / this.dstRate;
|
|
42179
|
+
const outArr = [];
|
|
42180
|
+
let phase = this.phase;
|
|
42181
|
+
while (true) {
|
|
42182
|
+
const idx = Math.floor(phase);
|
|
42183
|
+
if (idx >= sampleCount) break;
|
|
42184
|
+
const frac = phase - idx;
|
|
42185
|
+
let s0;
|
|
42186
|
+
let s1;
|
|
42187
|
+
if (idx < 0) {
|
|
42188
|
+
s0 = this.hasHistory ? this.lastSample : 0;
|
|
42189
|
+
s1 = pcm.readInt16LE(0);
|
|
42190
|
+
} else {
|
|
42191
|
+
s0 = pcm.readInt16LE(idx * 2);
|
|
42192
|
+
s1 = idx + 1 < sampleCount ? pcm.readInt16LE((idx + 1) * 2) : s0;
|
|
42193
|
+
}
|
|
42194
|
+
const interp = Math.round(s0 + (s1 - s0) * frac);
|
|
42195
|
+
outArr.push(Math.max(-32768, Math.min(32767, interp)));
|
|
42196
|
+
phase += step;
|
|
42197
|
+
}
|
|
42198
|
+
this.lastSample = pcm.readInt16LE((sampleCount - 1) * 2);
|
|
42199
|
+
this.hasHistory = true;
|
|
42200
|
+
this.phase = phase - sampleCount;
|
|
42201
|
+
const out = Buffer.alloc(outArr.length * 2);
|
|
42202
|
+
for (let j = 0; j < outArr.length; j++) out.writeInt16LE(outArr[j], j * 2);
|
|
42203
|
+
return out;
|
|
42204
|
+
}
|
|
42205
|
+
/** Flush any buffered state and reset. Returns any remaining tail output. */
|
|
42206
|
+
flush() {
|
|
42207
|
+
this.phase = 0;
|
|
42208
|
+
this.lastSample = 0;
|
|
42209
|
+
this.hasHistory = false;
|
|
42210
|
+
return Buffer.alloc(0);
|
|
42211
|
+
}
|
|
42212
|
+
};
|
|
40712
42213
|
var DeepFilterNetFilter = class {
|
|
40713
42214
|
modelPath;
|
|
40714
42215
|
silenceWarnings;
|
|
@@ -40716,8 +42217,9 @@ var DeepFilterNetFilter = class {
|
|
|
40716
42217
|
ort = null;
|
|
40717
42218
|
warned = false;
|
|
40718
42219
|
closed = false;
|
|
40719
|
-
//
|
|
42220
|
+
// Stateful resamplers for src_sr↔48k conversions so chunk-boundary
|
|
40720
42221
|
// samples are not discarded. Lazy-created and torn down on rate change.
|
|
42222
|
+
// Uses ArbitraryResampler which supports any integer rate pair.
|
|
40721
42223
|
_resamplerSrcRate = null;
|
|
40722
42224
|
_upsamplerInst = null;
|
|
40723
42225
|
_downsamplerInst = null;
|
|
@@ -40775,8 +42277,8 @@ var DeepFilterNetFilter = class {
|
|
|
40775
42277
|
try {
|
|
40776
42278
|
if (this._resamplerSrcRate !== sampleRate) {
|
|
40777
42279
|
this._resamplerSrcRate = sampleRate;
|
|
40778
|
-
this._upsamplerInst = new
|
|
40779
|
-
this._downsamplerInst = new
|
|
42280
|
+
this._upsamplerInst = new ArbitraryResampler(sampleRate, DEEPFILTERNET_SR);
|
|
42281
|
+
this._downsamplerInst = new ArbitraryResampler(DEEPFILTERNET_SR, sampleRate);
|
|
40780
42282
|
}
|
|
40781
42283
|
const samples = pcm16ToFloat32(pcmChunk);
|
|
40782
42284
|
const pcm16Up = this._upsamplerInst.process(float32ToPcm16(new Float32Array(samples)));
|
|
@@ -40940,6 +42442,17 @@ var Tool = class {
|
|
|
40940
42442
|
parameters;
|
|
40941
42443
|
handler;
|
|
40942
42444
|
webhookUrl;
|
|
42445
|
+
reassurance;
|
|
42446
|
+
/**
|
|
42447
|
+
* Per-tool execution timeout in milliseconds. `undefined` uses the
|
|
42448
|
+
* executor default (10 000 ms). Mirrors Python `timeout_s`.
|
|
42449
|
+
*/
|
|
42450
|
+
timeoutMs;
|
|
42451
|
+
/**
|
|
42452
|
+
* Enable OpenAI strict mode for this tool's function schema. Off by
|
|
42453
|
+
* default. Mirrors Python `strict` on `Tool`.
|
|
42454
|
+
*/
|
|
42455
|
+
strict;
|
|
40943
42456
|
constructor(opts) {
|
|
40944
42457
|
if (!opts.name) {
|
|
40945
42458
|
throw new Error("Tool requires a non-empty name.");
|
|
@@ -40957,6 +42470,9 @@ var Tool = class {
|
|
|
40957
42470
|
this.parameters = opts.parameters ?? { type: "object", properties: {} };
|
|
40958
42471
|
if (hasHandler) this.handler = opts.handler;
|
|
40959
42472
|
if (hasWebhook) this.webhookUrl = opts.webhookUrl;
|
|
42473
|
+
if (opts.reassurance !== void 0) this.reassurance = opts.reassurance;
|
|
42474
|
+
if (opts.timeoutMs !== void 0) this.timeoutMs = opts.timeoutMs;
|
|
42475
|
+
if (opts.strict !== void 0) this.strict = opts.strict;
|
|
40960
42476
|
}
|
|
40961
42477
|
};
|
|
40962
42478
|
function tool(opts) {
|
|
@@ -41120,7 +42636,6 @@ var ChatContext = class _ChatContext {
|
|
|
41120
42636
|
init_cjs_shims();
|
|
41121
42637
|
init_logger();
|
|
41122
42638
|
var DTMF_EVENTS = [
|
|
41123
|
-
"0",
|
|
41124
42639
|
"1",
|
|
41125
42640
|
"2",
|
|
41126
42641
|
"3",
|
|
@@ -41130,6 +42645,7 @@ var DTMF_EVENTS = [
|
|
|
41130
42645
|
"7",
|
|
41131
42646
|
"8",
|
|
41132
42647
|
"9",
|
|
42648
|
+
"0",
|
|
41133
42649
|
"*",
|
|
41134
42650
|
"#",
|
|
41135
42651
|
"A",
|
|
@@ -41809,18 +43325,24 @@ var TelnyxAdapter = class {
|
|
|
41809
43325
|
"/number_orders",
|
|
41810
43326
|
orderBody
|
|
41811
43327
|
);
|
|
41812
|
-
const orderId = order.data?.id
|
|
43328
|
+
const orderId = order.data?.id;
|
|
43329
|
+
if (!orderId) throw new Error("TelnyxAdapter: /number_orders returned no order id");
|
|
41813
43330
|
return { phoneNumber: chosen, orderId };
|
|
41814
43331
|
}
|
|
41815
43332
|
/** Attach a number to a Call Control Application. */
|
|
41816
43333
|
async configureNumber(phoneNumber, opts) {
|
|
41817
43334
|
if (!phoneNumber) throw new Error("TelnyxAdapter: phoneNumber is required");
|
|
41818
43335
|
if (!opts.connectionId) throw new Error("TelnyxAdapter: connectionId is required");
|
|
41819
|
-
|
|
41820
|
-
|
|
41821
|
-
|
|
41822
|
-
|
|
41823
|
-
|
|
43336
|
+
try {
|
|
43337
|
+
await this.request(
|
|
43338
|
+
"PATCH",
|
|
43339
|
+
`/phone_numbers/${encodeURIComponent(phoneNumber)}/voice`,
|
|
43340
|
+
{ connection_id: opts.connectionId, tech_prefix_enabled: false }
|
|
43341
|
+
);
|
|
43342
|
+
} catch (err) {
|
|
43343
|
+
const status = err instanceof Error ? err.message.replace(/\+\d{7,15}/g, "[REDACTED]") : String(err);
|
|
43344
|
+
throw new Error(`TelnyxAdapter: configureNumber failed: ${status}`);
|
|
43345
|
+
}
|
|
41824
43346
|
}
|
|
41825
43347
|
/**
|
|
41826
43348
|
* Place an outbound call on the Call Control Application.
|
|
@@ -41928,7 +43450,7 @@ var TelnyxSTT = class {
|
|
|
41928
43450
|
/** Stable pricing/dashboard key — read by stream-handler/metrics. */
|
|
41929
43451
|
static providerKey = "telnyx_stt";
|
|
41930
43452
|
ws = null;
|
|
41931
|
-
callbacks =
|
|
43453
|
+
callbacks = /* @__PURE__ */ new Set();
|
|
41932
43454
|
headerSent = false;
|
|
41933
43455
|
/** Open the streaming WebSocket and arm message handlers. */
|
|
41934
43456
|
async connect() {
|
|
@@ -41984,14 +43506,13 @@ var TelnyxSTT = class {
|
|
|
41984
43506
|
}
|
|
41985
43507
|
this.ws.send(audio);
|
|
41986
43508
|
}
|
|
41987
|
-
/** Register a transcript listener
|
|
43509
|
+
/** Register a transcript listener. */
|
|
41988
43510
|
onTranscript(callback) {
|
|
41989
|
-
|
|
41990
|
-
|
|
41991
|
-
|
|
41992
|
-
|
|
41993
|
-
|
|
41994
|
-
this.callbacks.push(callback);
|
|
43511
|
+
this.callbacks.add(callback);
|
|
43512
|
+
}
|
|
43513
|
+
/** Unregister a previously-registered transcript listener. */
|
|
43514
|
+
offTranscript(callback) {
|
|
43515
|
+
this.callbacks.delete(callback);
|
|
41995
43516
|
}
|
|
41996
43517
|
/** Close the streaming WebSocket. */
|
|
41997
43518
|
close() {
|
|
@@ -42002,6 +43523,7 @@ var TelnyxSTT = class {
|
|
|
42002
43523
|
}
|
|
42003
43524
|
this.ws = null;
|
|
42004
43525
|
}
|
|
43526
|
+
this.headerSent = false;
|
|
42005
43527
|
}
|
|
42006
43528
|
};
|
|
42007
43529
|
|
|
@@ -42023,6 +43545,7 @@ var TelnyxTTSSampleRate = {
|
|
|
42023
43545
|
HZ_24000: 24e3
|
|
42024
43546
|
};
|
|
42025
43547
|
var DEFAULT_VOICE = TelnyxTTSVoice.NATURAL_HD_ASTRA;
|
|
43548
|
+
var FRAME_TIMEOUT_MS2 = 3e4;
|
|
42026
43549
|
var TelnyxTTS = class {
|
|
42027
43550
|
constructor(apiKey, voice = DEFAULT_VOICE, baseUrl = TELNYX_TTS_WS_URL) {
|
|
42028
43551
|
this.apiKey = apiKey;
|
|
@@ -42050,69 +43573,83 @@ var TelnyxTTS = class {
|
|
|
42050
43573
|
*/
|
|
42051
43574
|
async *synthesizeStream(text) {
|
|
42052
43575
|
const url2 = `${this.baseUrl}?voice=${encodeURIComponent(this.voice)}`;
|
|
42053
|
-
|
|
42054
|
-
|
|
42055
|
-
|
|
42056
|
-
|
|
42057
|
-
|
|
42058
|
-
|
|
42059
|
-
|
|
42060
|
-
|
|
43576
|
+
let ws = null;
|
|
43577
|
+
try {
|
|
43578
|
+
let push2 = function(item) {
|
|
43579
|
+
const w = waiters.shift();
|
|
43580
|
+
if (w) {
|
|
43581
|
+
w(item);
|
|
43582
|
+
} else {
|
|
43583
|
+
queue.push(item);
|
|
43584
|
+
}
|
|
43585
|
+
};
|
|
43586
|
+
var push = push2;
|
|
43587
|
+
ws = new import_ws13.default(url2, {
|
|
43588
|
+
headers: { Authorization: `Bearer ${this.apiKey}` }
|
|
42061
43589
|
});
|
|
42062
|
-
|
|
42063
|
-
|
|
42064
|
-
|
|
43590
|
+
await new Promise((resolve2, reject) => {
|
|
43591
|
+
const timer = setTimeout(() => reject(new Error("Telnyx TTS connect timeout")), 1e4);
|
|
43592
|
+
ws.once("open", () => {
|
|
43593
|
+
clearTimeout(timer);
|
|
43594
|
+
resolve2();
|
|
43595
|
+
});
|
|
43596
|
+
ws.once("error", (err) => {
|
|
43597
|
+
clearTimeout(timer);
|
|
43598
|
+
reject(err);
|
|
43599
|
+
});
|
|
42065
43600
|
});
|
|
42066
|
-
|
|
42067
|
-
|
|
42068
|
-
|
|
42069
|
-
|
|
42070
|
-
|
|
42071
|
-
|
|
42072
|
-
|
|
42073
|
-
|
|
42074
|
-
|
|
42075
|
-
}
|
|
42076
|
-
}
|
|
42077
|
-
ws.on("message", (raw) => {
|
|
42078
|
-
let data;
|
|
42079
|
-
try {
|
|
42080
|
-
data = JSON.parse(raw.toString());
|
|
42081
|
-
} catch {
|
|
42082
|
-
getLogger().warn("TelnyxTTS: received invalid JSON");
|
|
42083
|
-
return;
|
|
42084
|
-
}
|
|
42085
|
-
const audioB64 = data.audio;
|
|
42086
|
-
if (!audioB64) return;
|
|
42087
|
-
try {
|
|
42088
|
-
const audioBytes = Buffer.from(audioB64, "base64");
|
|
42089
|
-
if (audioBytes.length > 0) {
|
|
42090
|
-
push(audioBytes);
|
|
43601
|
+
const queue = [];
|
|
43602
|
+
const waiters = [];
|
|
43603
|
+
ws.on("message", (raw) => {
|
|
43604
|
+
let data;
|
|
43605
|
+
try {
|
|
43606
|
+
data = JSON.parse(raw.toString());
|
|
43607
|
+
} catch {
|
|
43608
|
+
getLogger().warn("TelnyxTTS: received invalid JSON");
|
|
43609
|
+
return;
|
|
42091
43610
|
}
|
|
42092
|
-
|
|
42093
|
-
|
|
42094
|
-
|
|
42095
|
-
|
|
42096
|
-
|
|
42097
|
-
|
|
42098
|
-
|
|
42099
|
-
|
|
42100
|
-
|
|
42101
|
-
|
|
42102
|
-
|
|
42103
|
-
|
|
42104
|
-
|
|
43611
|
+
const audioB64 = data.audio;
|
|
43612
|
+
if (!audioB64) return;
|
|
43613
|
+
try {
|
|
43614
|
+
const audioBytes = Buffer.from(audioB64, "base64");
|
|
43615
|
+
if (audioBytes.length > 0) {
|
|
43616
|
+
push2(audioBytes);
|
|
43617
|
+
}
|
|
43618
|
+
} catch {
|
|
43619
|
+
}
|
|
43620
|
+
});
|
|
43621
|
+
ws.on("close", () => {
|
|
43622
|
+
push2(null);
|
|
43623
|
+
});
|
|
43624
|
+
ws.on("error", (err) => {
|
|
43625
|
+
push2({ error: err instanceof Error ? err : new Error(String(err)) });
|
|
43626
|
+
});
|
|
43627
|
+
ws.send(JSON.stringify({ text: " " }));
|
|
43628
|
+
ws.send(JSON.stringify({ text }));
|
|
43629
|
+
ws.send(JSON.stringify({ text: "" }));
|
|
42105
43630
|
while (true) {
|
|
42106
|
-
|
|
43631
|
+
let frameTimer;
|
|
43632
|
+
const item = queue.length > 0 ? queue.shift() : await Promise.race([
|
|
43633
|
+
new Promise((resolve2) => waiters.push(resolve2)),
|
|
43634
|
+
new Promise((_, reject) => {
|
|
43635
|
+
frameTimer = setTimeout(
|
|
43636
|
+
() => reject(new Error("Telnyx TTS frame timeout")),
|
|
43637
|
+
FRAME_TIMEOUT_MS2
|
|
43638
|
+
);
|
|
43639
|
+
})
|
|
43640
|
+
]).finally(() => {
|
|
43641
|
+
if (frameTimer !== void 0) clearTimeout(frameTimer);
|
|
43642
|
+
});
|
|
42107
43643
|
if (item === null) return;
|
|
42108
43644
|
if (typeof item === "object" && "error" in item) throw item.error;
|
|
42109
43645
|
yield item;
|
|
42110
43646
|
}
|
|
42111
43647
|
} finally {
|
|
42112
43648
|
try {
|
|
42113
|
-
ws
|
|
43649
|
+
ws?.close();
|
|
42114
43650
|
} catch {
|
|
42115
43651
|
}
|
|
43652
|
+
ws?.removeAllListeners();
|
|
42116
43653
|
}
|
|
42117
43654
|
}
|
|
42118
43655
|
};
|
|
@@ -42160,6 +43697,7 @@ init_event_bus();
|
|
|
42160
43697
|
GoogleLLM,
|
|
42161
43698
|
GroqLLM,
|
|
42162
43699
|
Guardrail,
|
|
43700
|
+
HermesLLM,
|
|
42163
43701
|
IVRActivity,
|
|
42164
43702
|
InworldTTS,
|
|
42165
43703
|
KrispFrameDuration,
|
|
@@ -42170,6 +43708,8 @@ init_event_bus();
|
|
|
42170
43708
|
MetricsStore,
|
|
42171
43709
|
MinWordsStrategy,
|
|
42172
43710
|
Ngrok,
|
|
43711
|
+
OpenAICompatibleLLM,
|
|
43712
|
+
OpenAICompatibleLLMProvider,
|
|
42173
43713
|
OpenAILLM,
|
|
42174
43714
|
OpenAILLMProvider,
|
|
42175
43715
|
OpenAIRealtime,
|
|
@@ -42183,10 +43723,12 @@ init_event_bus();
|
|
|
42183
43723
|
OpenAITranscribeSTT,
|
|
42184
43724
|
OpenAITranscriptionModel,
|
|
42185
43725
|
OpenAIVoice,
|
|
43726
|
+
OpenClawLLM,
|
|
42186
43727
|
PRICING_LAST_UPDATED,
|
|
42187
43728
|
PRICING_VERSION,
|
|
42188
43729
|
PartialStreamError,
|
|
42189
43730
|
Patter,
|
|
43731
|
+
PatterConfigError,
|
|
42190
43732
|
PatterConnectionError,
|
|
42191
43733
|
PatterError,
|
|
42192
43734
|
PatterTool,
|
|
@@ -42274,6 +43816,8 @@ init_event_bus();
|
|
|
42274
43816
|
mulawToPcm16,
|
|
42275
43817
|
notifyDashboard,
|
|
42276
43818
|
openaiTts,
|
|
43819
|
+
openclawConsult,
|
|
43820
|
+
openclawPostCallNotifier,
|
|
42277
43821
|
pcm16ToMulaw,
|
|
42278
43822
|
resample16kTo8k,
|
|
42279
43823
|
resample24kTo16k,
|