getpatter 0.6.5 → 0.6.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-CRPJLVHB.mjs → chunk-YJX2EKON.mjs} +649 -80
- package/dist/cli.js +492 -2
- package/dist/index.d.mts +233 -10
- package/dist/index.d.ts +233 -10
- package/dist/index.js +1572 -198
- package/dist/index.mjs +864 -78
- package/dist/{test-mode-HGHI2AUV.mjs → test-mode-XFOADUNE.mjs} +1 -1
- package/package.json +1 -1
|
@@ -29,6 +29,105 @@ import express from "express";
|
|
|
29
29
|
import { createServer } from "http";
|
|
30
30
|
import { WebSocketServer } from "ws";
|
|
31
31
|
|
|
32
|
+
// src/telemetry/call-metrics.ts
|
|
33
|
+
init_esm_shims();
|
|
34
|
+
function engineFromMode(mode) {
|
|
35
|
+
if (mode === "openai_realtime" || mode === "openai_realtime_2") return "realtime";
|
|
36
|
+
if (mode === "elevenlabs_convai") return "convai";
|
|
37
|
+
if (mode === "pipeline") return "pipeline";
|
|
38
|
+
return "other";
|
|
39
|
+
}
|
|
40
|
+
function providerFromMetrics(m) {
|
|
41
|
+
const mode = m.provider_mode;
|
|
42
|
+
if (mode === "openai_realtime" || mode === "openai_realtime_2") return "openai";
|
|
43
|
+
if (mode === "elevenlabs_convai") return "elevenlabs";
|
|
44
|
+
for (const key of ["llm_provider", "stt_provider", "tts_provider"]) {
|
|
45
|
+
const v = m[key];
|
|
46
|
+
if (typeof v === "string" && v) return v.toLowerCase();
|
|
47
|
+
}
|
|
48
|
+
return "other";
|
|
49
|
+
}
|
|
50
|
+
function providerFromMode(mode) {
|
|
51
|
+
if (mode === "openai_realtime" || mode === "openai_realtime_2") return "openai";
|
|
52
|
+
if (mode === "elevenlabs_convai") return "elevenlabs";
|
|
53
|
+
return "other";
|
|
54
|
+
}
|
|
55
|
+
function carrierFamily(tp) {
|
|
56
|
+
return typeof tp === "string" && tp ? tp.toLowerCase() : "none";
|
|
57
|
+
}
|
|
58
|
+
function direction(value) {
|
|
59
|
+
const v = typeof value === "string" ? value.toLowerCase() : "";
|
|
60
|
+
return v === "inbound" || v === "outbound" ? v : void 0;
|
|
61
|
+
}
|
|
62
|
+
function turnCountBucket(n) {
|
|
63
|
+
if (n <= 0) return "0";
|
|
64
|
+
if (n === 1) return "1";
|
|
65
|
+
if (n <= 3) return "2_3";
|
|
66
|
+
if (n <= 6) return "4_6";
|
|
67
|
+
if (n <= 12) return "7_12";
|
|
68
|
+
return "13_plus";
|
|
69
|
+
}
|
|
70
|
+
function latencyMs(m) {
|
|
71
|
+
const p95 = m.latency_p95;
|
|
72
|
+
if (p95 && typeof p95 === "object") {
|
|
73
|
+
return p95.agent_response_ms;
|
|
74
|
+
}
|
|
75
|
+
return void 0;
|
|
76
|
+
}
|
|
77
|
+
function recordCallStarted(telemetry, opts) {
|
|
78
|
+
if (!telemetry) return;
|
|
79
|
+
try {
|
|
80
|
+
const dims = {
|
|
81
|
+
engine: engineFromMode(opts.providerMode),
|
|
82
|
+
provider: providerFromMode(opts.providerMode),
|
|
83
|
+
carrier: carrierFamily(opts.telephonyProvider)
|
|
84
|
+
};
|
|
85
|
+
const d = direction(opts.direction);
|
|
86
|
+
if (d !== void 0) dims.direction = d;
|
|
87
|
+
telemetry.record("call_started", dims);
|
|
88
|
+
} catch {
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
function recordCallCompleted(telemetry, opts) {
|
|
92
|
+
if (!telemetry) return;
|
|
93
|
+
try {
|
|
94
|
+
const dims = { outcome: opts.outcome };
|
|
95
|
+
const d = direction(opts.direction);
|
|
96
|
+
if (d !== void 0) dims.direction = d;
|
|
97
|
+
const metrics = opts.metrics;
|
|
98
|
+
if (metrics && typeof metrics === "object") {
|
|
99
|
+
const m = metrics;
|
|
100
|
+
dims.engine = engineFromMode(m.provider_mode);
|
|
101
|
+
dims.provider = providerFromMetrics(m);
|
|
102
|
+
dims.carrier = carrierFamily(m.telephony_provider);
|
|
103
|
+
if (typeof m.duration_seconds === "number") {
|
|
104
|
+
dims.duration_seconds = Math.max(0, Math.round(m.duration_seconds));
|
|
105
|
+
}
|
|
106
|
+
const lat = latencyMs(m);
|
|
107
|
+
if (typeof lat === "number") dims.latency_ms = Math.max(0, Math.round(lat));
|
|
108
|
+
const cost = m.cost;
|
|
109
|
+
if (cost && typeof cost === "object") {
|
|
110
|
+
const total = cost.total;
|
|
111
|
+
if (typeof total === "number" && Number.isFinite(total)) {
|
|
112
|
+
dims.cost_usd = Math.max(0, Math.round(total * 1e4) / 1e4);
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
if (Array.isArray(m.turns)) {
|
|
116
|
+
dims.turn_count_bucket = turnCountBucket(m.turns.length);
|
|
117
|
+
}
|
|
118
|
+
const errorCode = m.error_code;
|
|
119
|
+
if (typeof errorCode === "string" && errorCode) {
|
|
120
|
+
dims.error_code = errorCode;
|
|
121
|
+
dims.outcome = "error";
|
|
122
|
+
}
|
|
123
|
+
} else if (opts.carrier !== void 0) {
|
|
124
|
+
dims.carrier = carrierFamily(opts.carrier);
|
|
125
|
+
}
|
|
126
|
+
telemetry.record("call_completed", dims);
|
|
127
|
+
} catch {
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
|
|
32
131
|
// src/providers/elevenlabs-convai.ts
|
|
33
132
|
init_esm_shims();
|
|
34
133
|
import WebSocket from "ws";
|
|
@@ -2826,6 +2925,9 @@ var CallMetricsAccumulator = class {
|
|
|
2826
2925
|
ttsModel;
|
|
2827
2926
|
realtimeModel;
|
|
2828
2927
|
_pricing;
|
|
2928
|
+
// Terminal error code (lowercased ErrorCode value or "other"); set by
|
|
2929
|
+
// recordError when the call ends abnormally. Empty for a clean call.
|
|
2930
|
+
_errorCode = "";
|
|
2829
2931
|
_callStart;
|
|
2830
2932
|
_turns = [];
|
|
2831
2933
|
// mutable internal array; immutable when exposed via TurnMetrics[] → readonly TurnMetrics[]
|
|
@@ -3396,11 +3498,35 @@ var CallMetricsAccumulator = class {
|
|
|
3396
3498
|
telephony_provider: this.telephonyProvider,
|
|
3397
3499
|
stt_model: this.sttModel,
|
|
3398
3500
|
tts_model: this.ttsModel,
|
|
3399
|
-
llm_model: this._llmModel
|
|
3501
|
+
llm_model: this._llmModel,
|
|
3502
|
+
error_code: this._errorCode
|
|
3400
3503
|
};
|
|
3401
3504
|
this._eventBus?.emit("call_ended", { callId: this.callId, metrics });
|
|
3402
3505
|
return metrics;
|
|
3403
3506
|
}
|
|
3507
|
+
/**
|
|
3508
|
+
* Record the call's terminal error as a coarse, anonymous code. Stores the
|
|
3509
|
+
* PatterError `.code` lowercased; maps common timeout/connection errors; falls
|
|
3510
|
+
* back to "other". Never stores the message. Last write wins.
|
|
3511
|
+
*/
|
|
3512
|
+
recordError(err) {
|
|
3513
|
+
const code = err?.code;
|
|
3514
|
+
const name = err?.name;
|
|
3515
|
+
const sys = typeof code === "string" ? code : "";
|
|
3516
|
+
if (sys.startsWith("ECONN") || sys === "EHOSTUNREACH" || sys === "ENETUNREACH" || sys === "EPIPE") {
|
|
3517
|
+
this._errorCode = "connection";
|
|
3518
|
+
return;
|
|
3519
|
+
}
|
|
3520
|
+
if (typeof code === "string" && code) {
|
|
3521
|
+
this._errorCode = code.toLowerCase();
|
|
3522
|
+
return;
|
|
3523
|
+
}
|
|
3524
|
+
if (name === "TimeoutError" || name === "AbortError") {
|
|
3525
|
+
this._errorCode = "timeout";
|
|
3526
|
+
} else {
|
|
3527
|
+
this._errorCode = "other";
|
|
3528
|
+
}
|
|
3529
|
+
}
|
|
3404
3530
|
/** Return the cost breakdown for the call so far without ending it. */
|
|
3405
3531
|
getCostSoFar() {
|
|
3406
3532
|
const duration = (hrTimeMs() - this._callStart) / 1e3;
|
|
@@ -4879,6 +5005,28 @@ function isSttHallucination(text) {
|
|
|
4879
5005
|
const pieces = stripped.split(/[.!?…。!?]+/u).map((p) => p.trim()).filter((p) => p.length > 0);
|
|
4880
5006
|
return pieces.length > 1 && pieces.every((p) => HALLUCINATIONS.has(p));
|
|
4881
5007
|
}
|
|
5008
|
+
var ECHO_WORD_OVERLAP_THRESHOLD = 0.6;
|
|
5009
|
+
var ECHO_MIN_CANDIDATE_WORDS = 4;
|
|
5010
|
+
function normalizeForEcho(text) {
|
|
5011
|
+
return text.toLowerCase().replace(/[^\p{L}\p{N}\s]/gu, " ").replace(/\s+/u, " ").trim().replace(/\s+/gu, " ");
|
|
5012
|
+
}
|
|
5013
|
+
function looksLikeEcho(candidate, agentText) {
|
|
5014
|
+
const a = normalizeForEcho(agentText);
|
|
5015
|
+
const c = normalizeForEcho(candidate);
|
|
5016
|
+
if (!a || !c) return false;
|
|
5017
|
+
const words = c.split(" ").filter(Boolean);
|
|
5018
|
+
if (words.length < ECHO_MIN_CANDIDATE_WORDS) return false;
|
|
5019
|
+
if (a.includes(c)) return true;
|
|
5020
|
+
const agentWords = new Set(a.split(" "));
|
|
5021
|
+
const overlap = words.filter((w) => agentWords.has(w)).length / words.length;
|
|
5022
|
+
return overlap >= ECHO_WORD_OVERLAP_THRESHOLD;
|
|
5023
|
+
}
|
|
5024
|
+
function isNearDuplicate(a, b) {
|
|
5025
|
+
if (!a || !b) return false;
|
|
5026
|
+
if (a === b) return true;
|
|
5027
|
+
const [shorter, longer] = a.length <= b.length ? [a, b] : [b, a];
|
|
5028
|
+
return longer.startsWith(shorter + " ");
|
|
5029
|
+
}
|
|
4882
5030
|
var StreamHandler = class _StreamHandler {
|
|
4883
5031
|
deps;
|
|
4884
5032
|
ws;
|
|
@@ -4891,6 +5039,17 @@ var StreamHandler = class _StreamHandler {
|
|
|
4891
5039
|
stt = null;
|
|
4892
5040
|
tts = null;
|
|
4893
5041
|
isSpeaking = false;
|
|
5042
|
+
/**
|
|
5043
|
+
* True only while the post-TTS tail-grace window is pending: the agent has
|
|
5044
|
+
* finished its turn but ``isSpeaking`` is still held for
|
|
5045
|
+
* ``PATTER_TTS_TAIL_GRACE_MS`` to swallow the fading echo tail. A VAD
|
|
5046
|
+
* ``speech_start`` (or a transcript) during this window is the user's NEXT
|
|
5047
|
+
* turn, not a barge-in — there is nothing left to interrupt. Set by
|
|
5048
|
+
* ``endSpeakingWithGrace``; cleared by ``beginSpeaking``, the grace flip,
|
|
5049
|
+
* ``cancelSpeaking``, and ``endTailGraceForNewTurn``. Parity with Python
|
|
5050
|
+
* ``_tail_grace_active``.
|
|
5051
|
+
*/
|
|
5052
|
+
tailGraceActive = false;
|
|
4894
5053
|
/**
|
|
4895
5054
|
* Ring buffer of inbound PCM16 16 kHz frames captured while the agent
|
|
4896
5055
|
* is speaking and the self-hearing guard is dropping audio. On
|
|
@@ -4966,6 +5125,35 @@ var StreamHandler = class _StreamHandler {
|
|
|
4966
5125
|
* ``isSpeaking=false``, and silently cut the agent's first turn.
|
|
4967
5126
|
*/
|
|
4968
5127
|
firstAudioSentAt = null;
|
|
5128
|
+
/**
|
|
5129
|
+
* Estimated wall-clock (ms) when the LAST audio byte pushed to the carrier
|
|
5130
|
+
* finishes PLAYING on the phone. The pipeline pushes TTS audio as fast as
|
|
5131
|
+
* the provider synthesizes it (no pacing) and the carrier buffers + plays
|
|
5132
|
+
* at realtime, so "we finished pushing" and "the caller finished hearing"
|
|
5133
|
+
* can diverge by tens of seconds — especially with agent-runtime LLMs
|
|
5134
|
+
* (Hermes/OpenClaw) that deliver a long reply all at once after a thinking
|
|
5135
|
+
* pause. ``endSpeakingWithGrace`` holds ``isSpeaking=true`` (with
|
|
5136
|
+
* ``tailGraceActive=false``) until this cursor passes, so a barge-in during
|
|
5137
|
+
* the audible backlog still takes the cancel path (``sendClear`` drops the
|
|
5138
|
+
* carrier buffer) instead of being treated as a calm next turn. Advanced by
|
|
5139
|
+
* ``trackOutboundPlayback``; reset by ``cancelSpeaking`` (the buffer is
|
|
5140
|
+
* cleared) and ``endTailGraceForNewTurn``.
|
|
5141
|
+
*/
|
|
5142
|
+
playbackBufferedUntil = 0;
|
|
5143
|
+
/**
|
|
5144
|
+
* Per-turn playback timeline used to estimate the response prefix the
|
|
5145
|
+
* caller actually HEARD when a barge-in lands. ``turnPlaybackTotalMs``
|
|
5146
|
+
* accumulates the playout duration of every chunk pushed this turn
|
|
5147
|
+
* (including filler audio, which keeps the timeline aligned);
|
|
5148
|
+
* ``turnSpokenSegments`` records ``{text, startMs}`` for each RESPONSE
|
|
5149
|
+
* sentence at its first audible chunk (filler / error-fallback audio
|
|
5150
|
+
* advances the clock but adds no segment). ``heard = total - backlog``
|
|
5151
|
+
* then maps to a sentence-granular prefix — see ``heardResponsePrefix``.
|
|
5152
|
+
* Both reset at ``beginSpeaking``. Mirrors Python
|
|
5153
|
+
* ``_turn_playback_total_s`` / ``_turn_spoken_segments``.
|
|
5154
|
+
*/
|
|
5155
|
+
turnPlaybackTotalMs = 0;
|
|
5156
|
+
turnSpokenSegments = [];
|
|
4969
5157
|
/**
|
|
4970
5158
|
* Optional barge-in confirmation strategies. With an empty array the
|
|
4971
5159
|
* SDK falls back to the legacy "cancel on first VAD speech_start"
|
|
@@ -5083,11 +5271,15 @@ var StreamHandler = class _StreamHandler {
|
|
|
5083
5271
|
}
|
|
5084
5272
|
this.speakingGeneration++;
|
|
5085
5273
|
this.isSpeaking = true;
|
|
5274
|
+
this.tailGraceActive = false;
|
|
5086
5275
|
this.speakingStartedAt = Date.now();
|
|
5087
5276
|
this.suppressedSpeechPending = false;
|
|
5088
5277
|
void isFirstMessage;
|
|
5089
5278
|
this.firstAudioSentAt = Date.now();
|
|
5090
5279
|
this.inboundAudioRing = [];
|
|
5280
|
+
this.currentAgentSpokenText = "";
|
|
5281
|
+
this.turnPlaybackTotalMs = 0;
|
|
5282
|
+
this.turnSpokenSegments = [];
|
|
5091
5283
|
this.resetVad();
|
|
5092
5284
|
}
|
|
5093
5285
|
/**
|
|
@@ -5102,6 +5294,87 @@ var StreamHandler = class _StreamHandler {
|
|
|
5102
5294
|
this.firstAudioSentAt = Date.now();
|
|
5103
5295
|
}
|
|
5104
5296
|
}
|
|
5297
|
+
/**
|
|
5298
|
+
* Advance ``playbackBufferedUntil`` by the playout duration of an outbound
|
|
5299
|
+
* TTS chunk. ``numBytes`` is the size of the chunk BEFORE carrier encoding
|
|
5300
|
+
* (the same buffer handed to ``encodePipelineAudio``): PCM16 @ 16 kHz in
|
|
5301
|
+
* the default path (32 bytes/ms), or the carrier's native μ-law @ 8 kHz
|
|
5302
|
+
* (8 bytes/ms) when the TTS adapter emits wire format directly
|
|
5303
|
+
* (``ttsOutputFormatNativeForCarrier`` — Twilio/Plivo ``ulaw_8000``;
|
|
5304
|
+
* Telnyx native is ``pcm_16000`` so it stays at 32 bytes/ms).
|
|
5305
|
+
*/
|
|
5306
|
+
trackOutboundPlayback(numBytes) {
|
|
5307
|
+
if (numBytes <= 0) return;
|
|
5308
|
+
const bytesPerMs = this.ttsOutputFormatNativeForCarrier && this.deps.bridge.telephonyProvider !== "telnyx" ? 8 : 32;
|
|
5309
|
+
const now = Date.now();
|
|
5310
|
+
const chunkMs = numBytes / bytesPerMs;
|
|
5311
|
+
const base = this.playbackBufferedUntil > now ? this.playbackBufferedUntil : now;
|
|
5312
|
+
this.playbackBufferedUntil = base + chunkMs;
|
|
5313
|
+
this.turnPlaybackTotalMs += chunkMs;
|
|
5314
|
+
}
|
|
5315
|
+
/**
|
|
5316
|
+
* Estimate the response prefix the caller actually HEARD this turn.
|
|
5317
|
+
*
|
|
5318
|
+
* The pipeline pushes audio faster than realtime, so at barge-in time
|
|
5319
|
+
* ``heard = totalPushed - carrierBacklog`` ms of audio have actually
|
|
5320
|
+
* played. Mapped at sentence granularity against ``turnSpokenSegments``:
|
|
5321
|
+
* a sentence counts as heard once its playback has STARTED
|
|
5322
|
+
* (``startMs <= heardMs``), so the sentence playing at the moment of
|
|
5323
|
+
* interruption is included.
|
|
5324
|
+
*
|
|
5325
|
+
* Returns ``null`` when no segments were tracked this turn (nothing
|
|
5326
|
+
* synthesized through the tracked path — callers fall back to the legacy
|
|
5327
|
+
* full-text behaviour). Mirrors Python ``_heard_response_prefix``.
|
|
5328
|
+
*/
|
|
5329
|
+
heardResponsePrefix() {
|
|
5330
|
+
if (this.turnSpokenSegments.length === 0) return null;
|
|
5331
|
+
const remainingMs = Math.max(0, this.playbackBufferedUntil - Date.now());
|
|
5332
|
+
const heardMs = Math.max(0, this.turnPlaybackTotalMs - remainingMs);
|
|
5333
|
+
const heard = this.turnSpokenSegments.filter((s) => s.startMs <= heardMs);
|
|
5334
|
+
return {
|
|
5335
|
+
text: heard.map((s) => s.text).join(" "),
|
|
5336
|
+
heardEverything: heard.length === this.turnSpokenSegments.length
|
|
5337
|
+
};
|
|
5338
|
+
}
|
|
5339
|
+
/**
|
|
5340
|
+
* Replace the text of the most recent assistant entry in the conversation
|
|
5341
|
+
* history. No-op when the last entry is not an assistant turn (e.g. the
|
|
5342
|
+
* caller's next turn was already committed).
|
|
5343
|
+
*/
|
|
5344
|
+
rewriteLastAssistantEntry(text) {
|
|
5345
|
+
const entries = this.history.entries;
|
|
5346
|
+
const last = entries[entries.length - 1];
|
|
5347
|
+
if (last && last.role === "assistant") {
|
|
5348
|
+
entries[entries.length - 1] = { ...last, text };
|
|
5349
|
+
}
|
|
5350
|
+
}
|
|
5351
|
+
/**
|
|
5352
|
+
* LiveKit-style "heard prefix" semantics for a barge-in that lands AFTER
|
|
5353
|
+
* the turn completed, while the carrier is still playing the buffered
|
|
5354
|
+
* tail.
|
|
5355
|
+
*
|
|
5356
|
+
* The completed turn already recorded its FULL reply in history, but the
|
|
5357
|
+
* caller only heard part of it before interrupting — a stateful agent
|
|
5358
|
+
* runtime (Hermes / OpenClaw) would otherwise "remember saying" things
|
|
5359
|
+
* the caller never heard. Rewrites the last assistant entry to the heard
|
|
5360
|
+
* prefix + ``[interrupted by caller]``.
|
|
5361
|
+
*
|
|
5362
|
+
* MUST run BEFORE ``cancelSpeaking`` resets ``playbackBufferedUntil``
|
|
5363
|
+
* (the backlog is the heard-prefix input). No-op when a turn is still in
|
|
5364
|
+
* flight (the streaming path applies its own marker), when there is no
|
|
5365
|
+
* backlog, or when everything was already heard. Mirrors Python
|
|
5366
|
+
* ``_maybe_truncate_completed_turn_history``.
|
|
5367
|
+
*/
|
|
5368
|
+
maybeTruncateCompletedTurnHistory() {
|
|
5369
|
+
if (this.dispatchTask !== null) return;
|
|
5370
|
+
const remainingMs = this.playbackBufferedUntil - Date.now();
|
|
5371
|
+
if (remainingMs <= 0) return;
|
|
5372
|
+
const heard = this.heardResponsePrefix();
|
|
5373
|
+
if (heard === null || heard.heardEverything) return;
|
|
5374
|
+
this.rewriteLastAssistantEntry(
|
|
5375
|
+
heard.text ? `${heard.text} [interrupted by caller]` : "[interrupted by caller]"
|
|
5376
|
+
);
|
|
5377
|
+
}
|
|
5105
5378
|
/**
|
|
5106
5379
|
* Atomically end speaking AND invalidate any pending grace timer.
|
|
5107
5380
|
* Use instead of ``this.isSpeaking = false`` at barge-in sites.
|
|
@@ -5112,10 +5385,12 @@ var StreamHandler = class _StreamHandler {
|
|
|
5112
5385
|
cancelSpeaking() {
|
|
5113
5386
|
this.speakingGeneration++;
|
|
5114
5387
|
this.isSpeaking = false;
|
|
5388
|
+
this.tailGraceActive = false;
|
|
5115
5389
|
this.speakingStartedAt = null;
|
|
5116
5390
|
this.firstAudioSentAt = null;
|
|
5117
5391
|
this.lastCancelAt = Date.now();
|
|
5118
5392
|
this.suppressedSpeechPending = false;
|
|
5393
|
+
this.playbackBufferedUntil = 0;
|
|
5119
5394
|
this.drainPendingMarks();
|
|
5120
5395
|
if (this.llmAbort !== null) {
|
|
5121
5396
|
try {
|
|
@@ -5188,23 +5463,37 @@ var StreamHandler = class _StreamHandler {
|
|
|
5188
5463
|
if (grace > 0) {
|
|
5189
5464
|
const gen = this.speakingGeneration;
|
|
5190
5465
|
this.clearGraceTimer();
|
|
5191
|
-
|
|
5192
|
-
this.
|
|
5193
|
-
|
|
5194
|
-
this.
|
|
5195
|
-
this.
|
|
5196
|
-
|
|
5197
|
-
|
|
5198
|
-
|
|
5199
|
-
|
|
5200
|
-
this.
|
|
5201
|
-
this.
|
|
5466
|
+
const startTailGrace = () => {
|
|
5467
|
+
this.tailGraceActive = true;
|
|
5468
|
+
this.graceTimer = setTimeout(() => {
|
|
5469
|
+
this.graceTimer = null;
|
|
5470
|
+
if (this.speakingGeneration === gen) {
|
|
5471
|
+
this.isSpeaking = false;
|
|
5472
|
+
this.tailGraceActive = false;
|
|
5473
|
+
this.speakingStartedAt = null;
|
|
5474
|
+
this.firstAudioSentAt = null;
|
|
5475
|
+
this.clearPendingBargeIn();
|
|
5476
|
+
void this.resetBargeInStrategies();
|
|
5477
|
+
if (this.suppressedSpeechPending) {
|
|
5478
|
+
this.suppressedSpeechPending = false;
|
|
5479
|
+
this.flushInboundAudioRing();
|
|
5480
|
+
}
|
|
5481
|
+
this.resetVad();
|
|
5202
5482
|
}
|
|
5203
|
-
|
|
5204
|
-
|
|
5205
|
-
|
|
5483
|
+
}, grace);
|
|
5484
|
+
};
|
|
5485
|
+
const bufferedMs = Math.max(0, this.playbackBufferedUntil - Date.now());
|
|
5486
|
+
if (bufferedMs <= 0) {
|
|
5487
|
+
startTailGrace();
|
|
5488
|
+
} else {
|
|
5489
|
+
this.graceTimer = setTimeout(() => {
|
|
5490
|
+
this.graceTimer = null;
|
|
5491
|
+
if (this.speakingGeneration === gen) startTailGrace();
|
|
5492
|
+
}, bufferedMs);
|
|
5493
|
+
}
|
|
5206
5494
|
} else {
|
|
5207
5495
|
this.isSpeaking = false;
|
|
5496
|
+
this.tailGraceActive = false;
|
|
5208
5497
|
this.speakingStartedAt = null;
|
|
5209
5498
|
this.firstAudioSentAt = null;
|
|
5210
5499
|
this.clearPendingBargeIn();
|
|
@@ -5216,6 +5505,35 @@ var StreamHandler = class _StreamHandler {
|
|
|
5216
5505
|
this.resetVad();
|
|
5217
5506
|
}
|
|
5218
5507
|
}
|
|
5508
|
+
/**
|
|
5509
|
+
* End the post-TTS tail-grace window because the user has begun their next
|
|
5510
|
+
* turn. Unlike a barge-in, the agent's response already played out in full
|
|
5511
|
+
* — there is nothing to cancel and no turn was interrupted. We flip the
|
|
5512
|
+
* speaking flag off (bumping ``speakingGeneration`` so the scheduled grace
|
|
5513
|
+
* timer no-ops), recover any leading audio the self-hearing guard captured
|
|
5514
|
+
* into the ring (the user's first ~250 ms, which VAD needed before it could
|
|
5515
|
+
* emit ``speech_start``), and let the live STT stream take over. We do NOT
|
|
5516
|
+
* call ``sendClear``, ``recordBargeinDetected`` or ``recordTurnInterrupted``
|
|
5517
|
+
* — none apply to a turn that completed normally.
|
|
5518
|
+
*
|
|
5519
|
+
* Without this, fast next-turn speech (humans reply in 200-700 ms, well
|
|
5520
|
+
* inside the 1500 ms default grace) is withheld from STT and recorded as an
|
|
5521
|
+
* empty ``[interrupted]`` turn, after which the agent goes silent for the
|
|
5522
|
+
* rest of the call. Parity with Python ``_end_tail_grace_for_new_turn``.
|
|
5523
|
+
*/
|
|
5524
|
+
endTailGraceForNewTurn() {
|
|
5525
|
+
this.isSpeaking = false;
|
|
5526
|
+
this.tailGraceActive = false;
|
|
5527
|
+
this.speakingStartedAt = null;
|
|
5528
|
+
this.firstAudioSentAt = null;
|
|
5529
|
+
this.playbackBufferedUntil = 0;
|
|
5530
|
+
this.speakingGeneration++;
|
|
5531
|
+
this.clearGraceTimer();
|
|
5532
|
+
this.clearPendingBargeIn();
|
|
5533
|
+
void this.resetBargeInStrategies();
|
|
5534
|
+
this.suppressedSpeechPending = false;
|
|
5535
|
+
this.flushInboundAudioRing();
|
|
5536
|
+
}
|
|
5219
5537
|
async resetBargeInStrategies() {
|
|
5220
5538
|
if (this.bargeInStrategies.length === 0) return;
|
|
5221
5539
|
const { resetStrategies } = await import("./barge-in-strategies-X6ARMGIQ.mjs");
|
|
@@ -5351,9 +5669,43 @@ var StreamHandler = class _StreamHandler {
|
|
|
5351
5669
|
maxDurationTimer = null;
|
|
5352
5670
|
transcriptProcessing = false;
|
|
5353
5671
|
transcriptQueue = [];
|
|
5672
|
+
/**
|
|
5673
|
+
* The in-flight turn dispatch (LLM + TTS) runs as a SINGLE tracked promise
|
|
5674
|
+
* so the transcript drain loop keeps running ``handleBargeIn`` against the
|
|
5675
|
+
* LIVE turn during a long (30-90 s) agent-runtime response, instead of
|
|
5676
|
+
* head-of-line-blocking on it. Exactly one is in flight: the launcher awaits
|
|
5677
|
+
* the previous one to settle (fast — a barge-in already aborted it) before
|
|
5678
|
+
* starting the next, preserving history/metrics ordering. Parity with
|
|
5679
|
+
* Python ``_dispatch_task``.
|
|
5680
|
+
*/
|
|
5681
|
+
dispatchTask = null;
|
|
5682
|
+
/**
|
|
5683
|
+
* Cap (ms) on how long teardown waits for the backgrounded dispatch to
|
|
5684
|
+
* settle. JS promises are not cancellable, so a user-supplied ``onMessage``
|
|
5685
|
+
* (which receives no AbortSignal) parked on a hung external call could block
|
|
5686
|
+
* call cleanup indefinitely — `llmAbort.abort()` only unblocks the built-in
|
|
5687
|
+
* LLM/TTS paths. We bound the WAIT (Python hard-cancels the task instead).
|
|
5688
|
+
* 30 s matches the webhook ceiling.
|
|
5689
|
+
*/
|
|
5690
|
+
static DISPATCH_SETTLE_TIMEOUT_MS = 3e4;
|
|
5691
|
+
/**
|
|
5692
|
+
* Opt-in (default OFF): forward inbound audio to STT even while the agent is
|
|
5693
|
+
* speaking, so the transcript barge-in path can receive a transcript on
|
|
5694
|
+
* echo-masked PSTN links where the VAD never fires. ECHO RISK without AEC.
|
|
5695
|
+
* Parity with Python ``_forward_stt_while_speaking``.
|
|
5696
|
+
*/
|
|
5697
|
+
forwardSttWhileSpeaking = ["1", "true", "yes"].includes(
|
|
5698
|
+
(process.env.PATTER_FORWARD_STT_WHILE_SPEAKING ?? "").trim().toLowerCase()
|
|
5699
|
+
);
|
|
5354
5700
|
// Throttle state for back-to-back STT finals — see ``commitTranscript``.
|
|
5355
5701
|
lastCommitText = "";
|
|
5356
5702
|
lastCommitAt = 0;
|
|
5703
|
+
/** The agent's spoken text for the CURRENT turn, accumulated as tokens stream.
|
|
5704
|
+
* The echo guard rejects transcripts matching it (the agent's own TTS bleeding
|
|
5705
|
+
* back into STT when audio is forwarded during TTS without effective AEC).
|
|
5706
|
+
* Reset in ``beginSpeaking``; only consulted while ``forwardSttWhileSpeaking``.
|
|
5707
|
+
* Parity with Python ``_current_agent_spoken_text``. */
|
|
5708
|
+
currentAgentSpokenText = "";
|
|
5357
5709
|
// PCM16 byte-alignment carry for TTS streaming (pipeline mode).
|
|
5358
5710
|
// HTTP streams from ElevenLabs / OpenAI / Cartesia can yield chunks of any
|
|
5359
5711
|
// size, including odd byte counts. Silently dropping the trailing odd byte
|
|
@@ -5373,6 +5725,11 @@ var StreamHandler = class _StreamHandler {
|
|
|
5373
5725
|
this.ws = ws;
|
|
5374
5726
|
this.caller = caller;
|
|
5375
5727
|
this.callee = callee;
|
|
5728
|
+
if (this.forwardSttWhileSpeaking) {
|
|
5729
|
+
getLogger().warn(
|
|
5730
|
+
"PATTER_FORWARD_STT_WHILE_SPEAKING=on: inbound audio is sent to STT during TTS so transcript barge-in works on echo-masked links. Without AEC the agent's own voice may be transcribed as a phantom interruption \u2014 pair with agent.bargeInStrategies."
|
|
5731
|
+
);
|
|
5732
|
+
}
|
|
5376
5733
|
this.bargeInStrategies = (deps.agent.bargeInStrategies ?? []).slice();
|
|
5377
5734
|
const confirmMs = deps.agent.bargeInConfirmMs;
|
|
5378
5735
|
this.bargeInConfirmMs = typeof confirmMs === "number" && Number.isFinite(confirmMs) && confirmMs > 0 ? confirmMs : 1500;
|
|
@@ -5572,12 +5929,12 @@ var StreamHandler = class _StreamHandler {
|
|
|
5572
5929
|
} catch {
|
|
5573
5930
|
}
|
|
5574
5931
|
if (this.deps.onCallStart) {
|
|
5575
|
-
const
|
|
5932
|
+
const direction2 = this.deps.metricsStore.getActive(callId)?.direction ?? "inbound";
|
|
5576
5933
|
await this.deps.onCallStart({
|
|
5577
5934
|
call_id: callId,
|
|
5578
5935
|
caller: this.caller,
|
|
5579
5936
|
callee: this.callee,
|
|
5580
|
-
direction,
|
|
5937
|
+
direction: direction2,
|
|
5581
5938
|
telephony_provider: this.deps.bridge.telephonyProvider,
|
|
5582
5939
|
...Object.keys(customParams).length > 0 ? { custom_params: customParams } : {}
|
|
5583
5940
|
});
|
|
@@ -5644,6 +6001,17 @@ var StreamHandler = class _StreamHandler {
|
|
|
5644
6001
|
setStreamSid(sid) {
|
|
5645
6002
|
this.streamSid = sid;
|
|
5646
6003
|
}
|
|
6004
|
+
/**
|
|
6005
|
+
* Record a terminal/processing error as a coarse, anonymous code on the call
|
|
6006
|
+
* metrics (code only, never the message). Surfaced via `call_completed`
|
|
6007
|
+
* telemetry. Safe to call with any value; last write wins.
|
|
6008
|
+
*/
|
|
6009
|
+
recordError(err) {
|
|
6010
|
+
try {
|
|
6011
|
+
this.metricsAcc.recordError(err);
|
|
6012
|
+
} catch {
|
|
6013
|
+
}
|
|
6014
|
+
}
|
|
5647
6015
|
/** Handle an incoming audio chunk (already decoded from base64). */
|
|
5648
6016
|
/** Forward inbound audio bytes to the AI adapter and (in pipeline mode) the STT provider. */
|
|
5649
6017
|
async handleAudio(audioBuffer) {
|
|
@@ -5670,6 +6038,9 @@ var StreamHandler = class _StreamHandler {
|
|
|
5670
6038
|
);
|
|
5671
6039
|
}
|
|
5672
6040
|
if (evt?.type === "speech_start") {
|
|
6041
|
+
if (this.isSpeaking && this.tailGraceActive) {
|
|
6042
|
+
this.endTailGraceForNewTurn();
|
|
6043
|
+
}
|
|
5673
6044
|
const phantomSuppressed = this.isSpeaking && !this.canBargeIn();
|
|
5674
6045
|
if (phantomSuppressed) {
|
|
5675
6046
|
getLogger().info(
|
|
@@ -5677,7 +6048,8 @@ var StreamHandler = class _StreamHandler {
|
|
|
5677
6048
|
);
|
|
5678
6049
|
this.suppressedSpeechPending = true;
|
|
5679
6050
|
} else if (this.isSpeaking) {
|
|
5680
|
-
|
|
6051
|
+
const deferCancel = this.bargeInStrategies.length > 0 || this.forwardSttWhileSpeaking && !this.aec;
|
|
6052
|
+
if (deferCancel) {
|
|
5681
6053
|
this.startPendingBargeIn();
|
|
5682
6054
|
this.metricsAcc.anchorUserSpeechStart();
|
|
5683
6055
|
return;
|
|
@@ -5687,6 +6059,7 @@ var StreamHandler = class _StreamHandler {
|
|
|
5687
6059
|
this.metricsAcc.recordBargeinDetected();
|
|
5688
6060
|
const bargeinSpan = startSpan(SPAN_BARGEIN, { "patter.call.id": this.callId });
|
|
5689
6061
|
try {
|
|
6062
|
+
this.maybeTruncateCompletedTurnHistory();
|
|
5690
6063
|
this.cancelSpeaking();
|
|
5691
6064
|
try {
|
|
5692
6065
|
this.deps.bridge.sendClear(this.ws, this.streamSid);
|
|
@@ -5731,9 +6104,10 @@ var StreamHandler = class _StreamHandler {
|
|
|
5731
6104
|
if (this.inboundAudioRing.length > _StreamHandler.INBOUND_AUDIO_RING_FRAMES) {
|
|
5732
6105
|
this.inboundAudioRing.shift();
|
|
5733
6106
|
}
|
|
6107
|
+
if (!this.forwardSttWhileSpeaking) return;
|
|
6108
|
+
} else if ((this.deps.agent.bargeInThresholdMs ?? 300) === 0) {
|
|
5734
6109
|
return;
|
|
5735
6110
|
}
|
|
5736
|
-
if ((this.deps.agent.bargeInThresholdMs ?? 300) === 0) return;
|
|
5737
6111
|
}
|
|
5738
6112
|
const hooks = this.deps.agent.hooks;
|
|
5739
6113
|
if (hooks?.beforeSendToStt) {
|
|
@@ -5795,6 +6169,27 @@ var StreamHandler = class _StreamHandler {
|
|
|
5795
6169
|
}
|
|
5796
6170
|
}
|
|
5797
6171
|
}
|
|
6172
|
+
/**
|
|
6173
|
+
* Await the backgrounded turn dispatch during teardown, but never block
|
|
6174
|
+
* longer than ``DISPATCH_SETTLE_TIMEOUT_MS``. The earlier ``llmAbort.abort()``
|
|
6175
|
+
* settles the built-in LLM/TTS paths immediately; the cap only bites a
|
|
6176
|
+
* misbehaving user ``onMessage`` parked on a hung external call (JS promises
|
|
6177
|
+
* can't be cancelled). No-op when nothing is in flight.
|
|
6178
|
+
*/
|
|
6179
|
+
async settleDispatchForTeardown() {
|
|
6180
|
+
if (!this.dispatchTask) return;
|
|
6181
|
+
const settle = this.dispatchTask.catch(() => {
|
|
6182
|
+
});
|
|
6183
|
+
let timer;
|
|
6184
|
+
const cap = new Promise((resolve2) => {
|
|
6185
|
+
timer = setTimeout(resolve2, _StreamHandler.DISPATCH_SETTLE_TIMEOUT_MS);
|
|
6186
|
+
});
|
|
6187
|
+
try {
|
|
6188
|
+
await Promise.race([settle, cap]);
|
|
6189
|
+
} finally {
|
|
6190
|
+
if (timer) clearTimeout(timer);
|
|
6191
|
+
}
|
|
6192
|
+
}
|
|
5798
6193
|
/** Handle call stop / stream end. */
|
|
5799
6194
|
/** Handle a carrier-emitted `stop` event signalling the call has ended. */
|
|
5800
6195
|
async handleStop() {
|
|
@@ -5811,6 +6206,7 @@ var StreamHandler = class _StreamHandler {
|
|
|
5811
6206
|
} catch {
|
|
5812
6207
|
}
|
|
5813
6208
|
}
|
|
6209
|
+
await this.settleDispatchForTeardown();
|
|
5814
6210
|
this.clearPendingBargeIn();
|
|
5815
6211
|
this.drainPendingMarks();
|
|
5816
6212
|
this.clearGraceTimer();
|
|
@@ -5838,6 +6234,7 @@ var StreamHandler = class _StreamHandler {
|
|
|
5838
6234
|
} catch {
|
|
5839
6235
|
}
|
|
5840
6236
|
}
|
|
6237
|
+
await this.settleDispatchForTeardown();
|
|
5841
6238
|
this.clearPendingBargeIn();
|
|
5842
6239
|
this.drainPendingMarks();
|
|
5843
6240
|
this.clearGraceTimer();
|
|
@@ -6232,7 +6629,7 @@ var StreamHandler = class _StreamHandler {
|
|
|
6232
6629
|
};
|
|
6233
6630
|
}
|
|
6234
6631
|
/** Synthesize a single sentence through TTS with hooks, sending audio to telephony. */
|
|
6235
|
-
async synthesizeSentence(sentence, hookExecutor, hookCtx, ttsFirstByteSent) {
|
|
6632
|
+
async synthesizeSentence(sentence, hookExecutor, hookCtx, ttsFirstByteSent, recordSegment = true) {
|
|
6236
6633
|
if (!this.tts || !this.isSpeaking) return;
|
|
6237
6634
|
let transformed = sentence;
|
|
6238
6635
|
const transforms = this.deps.agent.textTransforms;
|
|
@@ -6258,8 +6655,16 @@ var StreamHandler = class _StreamHandler {
|
|
|
6258
6655
|
if (this.aec) {
|
|
6259
6656
|
this.aec.pushFarEnd(processedAudio);
|
|
6260
6657
|
}
|
|
6658
|
+
if (recordSegment) {
|
|
6659
|
+
this.turnSpokenSegments.push({
|
|
6660
|
+
text: processedText,
|
|
6661
|
+
startMs: this.turnPlaybackTotalMs
|
|
6662
|
+
});
|
|
6663
|
+
recordSegment = false;
|
|
6664
|
+
}
|
|
6261
6665
|
const encoded = this.encodePipelineAudio(processedAudio);
|
|
6262
6666
|
this.deps.bridge.sendAudio(this.ws, encoded, this.streamSid);
|
|
6667
|
+
this.trackOutboundPlayback(processedAudio.length);
|
|
6263
6668
|
this.markFirstAudioSent();
|
|
6264
6669
|
}
|
|
6265
6670
|
} catch (e) {
|
|
@@ -6334,64 +6739,101 @@ var StreamHandler = class _StreamHandler {
|
|
|
6334
6739
|
return;
|
|
6335
6740
|
}
|
|
6336
6741
|
this.history.push({ role: "user", text: filteredTranscript, timestamp: Date.now() });
|
|
6337
|
-
let responseText = "";
|
|
6338
6742
|
this.metricsAcc.recordOnUserTurnCompletedDelay(0);
|
|
6339
6743
|
this.metricsAcc.recordTurnCommitted();
|
|
6340
6744
|
closeEndpointSpan();
|
|
6341
|
-
|
|
6342
|
-
|
|
6343
|
-
|
|
6745
|
+
await this.dispatchTask?.catch(() => {
|
|
6746
|
+
});
|
|
6747
|
+
const historySnapshot = [...this.history.entries];
|
|
6748
|
+
this.dispatchTask = this.dispatchTurn(
|
|
6749
|
+
filteredTranscript,
|
|
6750
|
+
hookExecutor,
|
|
6751
|
+
hookCtx,
|
|
6752
|
+
interrupted,
|
|
6753
|
+
historySnapshot
|
|
6754
|
+
);
|
|
6755
|
+
}
|
|
6756
|
+
/**
|
|
6757
|
+
* Post-commit turn body (LLM dispatch → TTS → turn-complete) run as a
|
|
6758
|
+
* tracked background task so the transcript drain loop is not blocked for
|
|
6759
|
+
* the whole (possibly 30-90 s) agent-runtime turn. A barge-in — transcript
|
|
6760
|
+
* (now reachable mid-turn) or VAD — aborts the in-flight ``llmAbort`` and
|
|
6761
|
+
* flips ``isSpeaking``, which the LLM/TTS loops here observe and break on.
|
|
6762
|
+
* Parity with Python ``_dispatch_turn``.
|
|
6763
|
+
*/
|
|
6764
|
+
async dispatchTurn(filteredTranscript, hookExecutor, hookCtx, interrupted, historySnapshot) {
|
|
6765
|
+
const label = this.deps.bridge.label;
|
|
6766
|
+
let responseText = "";
|
|
6767
|
+
try {
|
|
6768
|
+
if (this.deps.onMessage && typeof this.deps.onMessage === "function") {
|
|
6769
|
+
try {
|
|
6770
|
+
responseText = await this.deps.onMessage({
|
|
6771
|
+
text: filteredTranscript,
|
|
6772
|
+
call_id: this.callId,
|
|
6773
|
+
caller: this.caller,
|
|
6774
|
+
callee: this.callee,
|
|
6775
|
+
history: historySnapshot
|
|
6776
|
+
});
|
|
6777
|
+
} catch (e) {
|
|
6778
|
+
getLogger().error(`onMessage error (${label}):`, e);
|
|
6779
|
+
return;
|
|
6780
|
+
}
|
|
6781
|
+
if (!responseText) {
|
|
6782
|
+
getLogger().warn(
|
|
6783
|
+
`onMessage returned empty/void (${label}) \u2014 no TTS will play. If you intended to observe transcripts, use onTranscript instead; if you meant to answer via the built-in LLM, remove onMessage and pass openaiKey.`
|
|
6784
|
+
);
|
|
6785
|
+
}
|
|
6786
|
+
} else if (this.deps.onMessage && isRemoteUrl(this.deps.onMessage)) {
|
|
6787
|
+
const msgData = {
|
|
6344
6788
|
text: filteredTranscript,
|
|
6345
6789
|
call_id: this.callId,
|
|
6346
6790
|
caller: this.caller,
|
|
6347
6791
|
callee: this.callee,
|
|
6348
|
-
history:
|
|
6349
|
-
}
|
|
6350
|
-
|
|
6351
|
-
|
|
6352
|
-
|
|
6353
|
-
|
|
6354
|
-
|
|
6792
|
+
history: historySnapshot
|
|
6793
|
+
};
|
|
6794
|
+
if (isWebSocketUrl(this.deps.onMessage)) {
|
|
6795
|
+
await this.handleWebSocketResponse(msgData);
|
|
6796
|
+
return;
|
|
6797
|
+
}
|
|
6798
|
+
try {
|
|
6799
|
+
responseText = await this.deps.remoteHandler.callWebhook(this.deps.onMessage, msgData);
|
|
6800
|
+
} catch (e) {
|
|
6801
|
+
getLogger().error(`Webhook remote error (${label}):`, e);
|
|
6802
|
+
return;
|
|
6803
|
+
}
|
|
6804
|
+
} else if (this.llmLoop) {
|
|
6805
|
+
const llmResult = await this.runPipelineLlm(
|
|
6806
|
+
filteredTranscript,
|
|
6807
|
+
hookExecutor,
|
|
6808
|
+
hookCtx,
|
|
6809
|
+
historySnapshot
|
|
6810
|
+
);
|
|
6811
|
+
responseText = llmResult.text;
|
|
6812
|
+
interrupted = interrupted || llmResult.interrupted;
|
|
6813
|
+
} else {
|
|
6355
6814
|
getLogger().warn(
|
|
6356
|
-
`
|
|
6815
|
+
`Pipeline (${label}) has no llm/onMessage handler \u2014 transcript "${sanitizeLogValue(filteredTranscript.slice(0, 60))}" dropped. Check that agent.llm or onMessage is configured.`
|
|
6357
6816
|
);
|
|
6358
|
-
}
|
|
6359
|
-
} else if (this.deps.onMessage && isRemoteUrl(this.deps.onMessage)) {
|
|
6360
|
-
const msgData = {
|
|
6361
|
-
text: filteredTranscript,
|
|
6362
|
-
call_id: this.callId,
|
|
6363
|
-
caller: this.caller,
|
|
6364
|
-
callee: this.callee,
|
|
6365
|
-
history: [...this.history.entries]
|
|
6366
|
-
};
|
|
6367
|
-
if (isWebSocketUrl(this.deps.onMessage)) {
|
|
6368
|
-
await this.handleWebSocketResponse(msgData);
|
|
6369
6817
|
return;
|
|
6370
6818
|
}
|
|
6371
|
-
|
|
6372
|
-
|
|
6373
|
-
|
|
6374
|
-
|
|
6375
|
-
|
|
6819
|
+
if (!responseText) return;
|
|
6820
|
+
if (this.llmLoop) {
|
|
6821
|
+
let spokenText = responseText;
|
|
6822
|
+
if (interrupted) {
|
|
6823
|
+
const heard = this.heardResponsePrefix();
|
|
6824
|
+
spokenText = heard === null ? `${responseText} [interrupted by caller]` : heard.text ? `${heard.text} [interrupted by caller]` : "[interrupted by caller]";
|
|
6825
|
+
}
|
|
6826
|
+
await this.emitAssistantTranscript(spokenText);
|
|
6827
|
+
if (!interrupted) this.metricsAcc.recordTtsComplete(responseText);
|
|
6828
|
+
} else {
|
|
6829
|
+
interrupted = await this.runRegularLlm(responseText, hookExecutor, hookCtx) || interrupted;
|
|
6830
|
+
responseText = this.history.entries[this.history.entries.length - 1]?.text ?? responseText;
|
|
6376
6831
|
}
|
|
6377
|
-
|
|
6378
|
-
|
|
6379
|
-
|
|
6380
|
-
|
|
6381
|
-
|
|
6382
|
-
);
|
|
6383
|
-
return;
|
|
6384
|
-
}
|
|
6385
|
-
if (!responseText) return;
|
|
6386
|
-
if (this.llmLoop) {
|
|
6387
|
-
await this.emitAssistantTranscript(responseText);
|
|
6388
|
-
this.metricsAcc.recordTtsComplete(responseText);
|
|
6389
|
-
} else {
|
|
6390
|
-
interrupted = await this.runRegularLlm(responseText, hookExecutor, hookCtx) || interrupted;
|
|
6391
|
-
responseText = this.history.entries[this.history.entries.length - 1]?.text ?? responseText;
|
|
6392
|
-
}
|
|
6393
|
-
if (!interrupted) {
|
|
6394
|
-
await this.emitTurnMetrics(this.metricsAcc.recordTurnComplete(responseText));
|
|
6832
|
+
if (!interrupted) {
|
|
6833
|
+
await this.emitTurnMetrics(this.metricsAcc.recordTurnComplete(responseText));
|
|
6834
|
+
}
|
|
6835
|
+
} finally {
|
|
6836
|
+
this.dispatchTask = null;
|
|
6395
6837
|
}
|
|
6396
6838
|
}
|
|
6397
6839
|
/**
|
|
@@ -6402,6 +6844,18 @@ var StreamHandler = class _StreamHandler {
|
|
|
6402
6844
|
*/
|
|
6403
6845
|
async handleBargeInAsync(transcript) {
|
|
6404
6846
|
if (!transcript.text || !this.isSpeaking) return false;
|
|
6847
|
+
if (this.tailGraceActive) {
|
|
6848
|
+
this.endTailGraceForNewTurn();
|
|
6849
|
+
return false;
|
|
6850
|
+
}
|
|
6851
|
+
if (this.forwardSttWhileSpeaking && looksLikeEcho(transcript.text, this.currentAgentSpokenText)) {
|
|
6852
|
+
getLogger().info(
|
|
6853
|
+
`Barge-in suppressed: transcript matches agent's own speech (echo) \u2014 ${sanitizeLogValue(
|
|
6854
|
+
transcript.text.slice(0, 40)
|
|
6855
|
+
)}`
|
|
6856
|
+
);
|
|
6857
|
+
return false;
|
|
6858
|
+
}
|
|
6405
6859
|
if (!this.canBargeIn()) {
|
|
6406
6860
|
getLogger().info(
|
|
6407
6861
|
`Barge-in transcript suppressed (agent speaking < gate, aec=${this.aec ? "on" : "off"})`
|
|
@@ -6441,6 +6895,18 @@ var StreamHandler = class _StreamHandler {
|
|
|
6441
6895
|
*/
|
|
6442
6896
|
handleBargeIn(transcript) {
|
|
6443
6897
|
if (!transcript.text || !this.isSpeaking) return false;
|
|
6898
|
+
if (this.tailGraceActive) {
|
|
6899
|
+
this.endTailGraceForNewTurn();
|
|
6900
|
+
return false;
|
|
6901
|
+
}
|
|
6902
|
+
if (this.forwardSttWhileSpeaking && looksLikeEcho(transcript.text, this.currentAgentSpokenText)) {
|
|
6903
|
+
getLogger().info(
|
|
6904
|
+
`Barge-in suppressed: transcript matches agent's own speech (echo) \u2014 ${sanitizeLogValue(
|
|
6905
|
+
transcript.text.slice(0, 40)
|
|
6906
|
+
)}`
|
|
6907
|
+
);
|
|
6908
|
+
return false;
|
|
6909
|
+
}
|
|
6444
6910
|
if (this.bargeInStrategies.length === 0) {
|
|
6445
6911
|
if (!this.canBargeIn()) {
|
|
6446
6912
|
getLogger().info(
|
|
@@ -6472,6 +6938,7 @@ var StreamHandler = class _StreamHandler {
|
|
|
6472
6938
|
this.metricsAcc.recordBargeinDetected();
|
|
6473
6939
|
const bargeinSpan = startSpan(SPAN_BARGEIN, { "patter.call.id": this.callId });
|
|
6474
6940
|
try {
|
|
6941
|
+
this.maybeTruncateCompletedTurnHistory();
|
|
6475
6942
|
this.cancelSpeaking();
|
|
6476
6943
|
try {
|
|
6477
6944
|
this.deps.bridge.sendClear(this.ws, this.streamSid);
|
|
@@ -6535,15 +7002,21 @@ var StreamHandler = class _StreamHandler {
|
|
|
6535
7002
|
getLogger().debug(`Dropped likely STT hallucination: ${sanitizeLogValue(normalised.slice(0, 40))}`);
|
|
6536
7003
|
return false;
|
|
6537
7004
|
}
|
|
7005
|
+
if (this.forwardSttWhileSpeaking && this.isSpeaking && looksLikeEcho(text, this.currentAgentSpokenText)) {
|
|
7006
|
+
getLogger().debug(
|
|
7007
|
+
`Dropped agent-echo transcript (not a user turn): ${sanitizeLogValue(normalised.slice(0, 40))}`
|
|
7008
|
+
);
|
|
7009
|
+
return false;
|
|
7010
|
+
}
|
|
6538
7011
|
if (sinceLastMs < 2e3 && normalised === this.lastCommitText) {
|
|
6539
7012
|
getLogger().debug(
|
|
6540
7013
|
`Dropped duplicate final transcript (${(sinceLastMs / 1e3).toFixed(1)}s since last): ${sanitizeLogValue(normalised.slice(0, 40))}`
|
|
6541
7014
|
);
|
|
6542
7015
|
return false;
|
|
6543
7016
|
}
|
|
6544
|
-
if (sinceLastMs < 500) {
|
|
7017
|
+
if (sinceLastMs < 500 && isNearDuplicate(normalised, this.lastCommitText)) {
|
|
6545
7018
|
getLogger().debug(
|
|
6546
|
-
`Dropped back-to-back final
|
|
7019
|
+
`Dropped back-to-back near-duplicate final (${(sinceLastMs / 1e3).toFixed(2)}s since last): ${sanitizeLogValue(normalised.slice(0, 40))}`
|
|
6547
7020
|
);
|
|
6548
7021
|
return false;
|
|
6549
7022
|
}
|
|
@@ -6551,11 +7024,63 @@ var StreamHandler = class _StreamHandler {
|
|
|
6551
7024
|
this.lastCommitAt = now;
|
|
6552
7025
|
return true;
|
|
6553
7026
|
}
|
|
7027
|
+
/**
|
|
7028
|
+
* Schedule the opt-in long-turn filler and return its async ``clear()``.
|
|
7029
|
+
*
|
|
7030
|
+
* When ``agent.longTurnMessage`` is unset / empty the returned clear is a
|
|
7031
|
+
* no-op (byte-identical to today's behaviour). Otherwise a one-shot timer
|
|
7032
|
+
* fires after ``agent.longTurnMessageAfterS`` seconds and, IFF no audio has
|
|
7033
|
+
* reached the carrier this turn (``!ttsFirstByteSent.value``) AND we still own
|
|
7034
|
+
* the floor (``this.isSpeaking``), synthesizes the filler ONCE via the same
|
|
7035
|
+
* per-sentence TTS primitive every sentence uses.
|
|
7036
|
+
*
|
|
7037
|
+
* The returned ``clear()`` is **async**: it stops the timer AND, if the filler
|
|
7038
|
+
* already started synthesizing (its ``setTimeout`` callback runs in a separate
|
|
7039
|
+
* macro-task, so it can fire just before the first real sentence), AWAITS the
|
|
7040
|
+
* in-flight synthesis so the filler audio can never interleave with the real
|
|
7041
|
+
* sentence that follows. Idempotent; self-synthesis failure degrades to
|
|
7042
|
+
* silence (never crashes the turn). The caller must clear on first real audio,
|
|
7043
|
+
* on the error branch, and in the finally.
|
|
7044
|
+
*/
|
|
7045
|
+
scheduleLongTurnFiller(ttsFirstByteSent, hookExecutor, hookCtx, label) {
|
|
7046
|
+
const message = this.deps.agent.longTurnMessage;
|
|
7047
|
+
if (!message) return async () => {
|
|
7048
|
+
};
|
|
7049
|
+
const afterS = this.deps.agent.longTurnMessageAfterS ?? 4;
|
|
7050
|
+
let cancelled = false;
|
|
7051
|
+
let inFlight = null;
|
|
7052
|
+
const timer = setTimeout(() => {
|
|
7053
|
+
if (cancelled || ttsFirstByteSent.value || !this.isSpeaking) return;
|
|
7054
|
+
inFlight = this.synthesizeSentence(
|
|
7055
|
+
message,
|
|
7056
|
+
hookExecutor,
|
|
7057
|
+
hookCtx,
|
|
7058
|
+
ttsFirstByteSent,
|
|
7059
|
+
false
|
|
7060
|
+
).catch((err) => {
|
|
7061
|
+
getLogger().error(
|
|
7062
|
+
`longTurnMessage filler synthesis failed (${label}):`,
|
|
7063
|
+
err
|
|
7064
|
+
);
|
|
7065
|
+
});
|
|
7066
|
+
}, Math.max(0, afterS * 1e3));
|
|
7067
|
+
return async () => {
|
|
7068
|
+
cancelled = true;
|
|
7069
|
+
clearTimeout(timer);
|
|
7070
|
+
if (inFlight !== null) {
|
|
7071
|
+
const pending = inFlight;
|
|
7072
|
+
inFlight = null;
|
|
7073
|
+
await pending;
|
|
7074
|
+
}
|
|
7075
|
+
};
|
|
7076
|
+
}
|
|
6554
7077
|
/**
|
|
6555
7078
|
* Streaming built-in LLM path with sentence chunking and per-sentence
|
|
6556
|
-
* guardrails/TTS. Returns the concatenated response text
|
|
7079
|
+
* guardrails/TTS. Returns the concatenated (plain) response text plus whether
|
|
7080
|
+
* the turn was cut short by a barge-in — the caller applies the interrupted
|
|
7081
|
+
* marker to history only, keeping metrics on the plain text.
|
|
6557
7082
|
*/
|
|
6558
|
-
async runPipelineLlm(filteredTranscript, hookExecutor, hookCtx) {
|
|
7083
|
+
async runPipelineLlm(filteredTranscript, hookExecutor, hookCtx, historySnapshot) {
|
|
6559
7084
|
const label = this.deps.bridge.label;
|
|
6560
7085
|
const callCtx = { call_id: this.callId, caller: this.caller, callee: this.callee };
|
|
6561
7086
|
const chunker = new SentenceChunker({
|
|
@@ -6568,6 +7093,12 @@ var StreamHandler = class _StreamHandler {
|
|
|
6568
7093
|
this.llmAbort = new AbortController();
|
|
6569
7094
|
const llmSignal = this.llmAbort.signal;
|
|
6570
7095
|
let llmError = false;
|
|
7096
|
+
const clearLongTurnFiller = this.scheduleLongTurnFiller(
|
|
7097
|
+
ttsFirstByteSent,
|
|
7098
|
+
hookExecutor,
|
|
7099
|
+
hookCtx,
|
|
7100
|
+
label
|
|
7101
|
+
);
|
|
6571
7102
|
const llmSpan = startSpan(SPAN_LLM, { "patter.call.id": this.callId });
|
|
6572
7103
|
const guardAndSpeak = async (sentence, isFirst) => {
|
|
6573
7104
|
if (isFirst) this.metricsAcc.recordLlmFirstSentenceComplete();
|
|
@@ -6578,6 +7109,7 @@ var StreamHandler = class _StreamHandler {
|
|
|
6578
7109
|
if (transformed === null) return;
|
|
6579
7110
|
sentenceText = transformed;
|
|
6580
7111
|
}
|
|
7112
|
+
await clearLongTurnFiller();
|
|
6581
7113
|
await this.synthesizeSentence(sentenceText, hookExecutor, hookCtx, ttsFirstByteSent);
|
|
6582
7114
|
};
|
|
6583
7115
|
let firstSentenceEmitted = false;
|
|
@@ -6585,7 +7117,7 @@ var StreamHandler = class _StreamHandler {
|
|
|
6585
7117
|
try {
|
|
6586
7118
|
for await (const token of this.llmLoop.run(
|
|
6587
7119
|
filteredTranscript,
|
|
6588
|
-
|
|
7120
|
+
historySnapshot,
|
|
6589
7121
|
callCtx,
|
|
6590
7122
|
this.metricsAcc,
|
|
6591
7123
|
hookExecutor,
|
|
@@ -6596,6 +7128,7 @@ var StreamHandler = class _StreamHandler {
|
|
|
6596
7128
|
this.metricsAcc.recordLlmFirstToken();
|
|
6597
7129
|
await this.emitLlmFirstToken();
|
|
6598
7130
|
allParts.push(token);
|
|
7131
|
+
this.currentAgentSpokenText = allParts.join("");
|
|
6599
7132
|
for (const sentence of chunker.push(token)) {
|
|
6600
7133
|
if (!this.isSpeaking) break;
|
|
6601
7134
|
await guardAndSpeak(sentence, !firstSentenceEmitted);
|
|
@@ -6605,6 +7138,7 @@ var StreamHandler = class _StreamHandler {
|
|
|
6605
7138
|
}
|
|
6606
7139
|
} catch (e) {
|
|
6607
7140
|
const isAbort = e?.name === "AbortError" || llmSignal.aborted;
|
|
7141
|
+
await clearLongTurnFiller();
|
|
6608
7142
|
if (!isAbort) {
|
|
6609
7143
|
llmError = true;
|
|
6610
7144
|
chunker.reset();
|
|
@@ -6613,7 +7147,7 @@ var StreamHandler = class _StreamHandler {
|
|
|
6613
7147
|
const fallback = this.deps.agent.llmErrorMessage;
|
|
6614
7148
|
if (fallback && !ttsFirstByteSent.value && this.isSpeaking) {
|
|
6615
7149
|
try {
|
|
6616
|
-
await this.synthesizeSentence(fallback, hookExecutor, hookCtx, ttsFirstByteSent);
|
|
7150
|
+
await this.synthesizeSentence(fallback, hookExecutor, hookCtx, ttsFirstByteSent, false);
|
|
6617
7151
|
} catch (err) {
|
|
6618
7152
|
getLogger().error(`llmErrorMessage fallback synthesis failed (${label}):`, err);
|
|
6619
7153
|
}
|
|
@@ -6629,6 +7163,7 @@ var StreamHandler = class _StreamHandler {
|
|
|
6629
7163
|
}
|
|
6630
7164
|
}
|
|
6631
7165
|
} finally {
|
|
7166
|
+
await clearLongTurnFiller();
|
|
6632
7167
|
this.endSpeakingWithGrace();
|
|
6633
7168
|
this.llmAbort = null;
|
|
6634
7169
|
try {
|
|
@@ -6636,7 +7171,7 @@ var StreamHandler = class _StreamHandler {
|
|
|
6636
7171
|
} catch {
|
|
6637
7172
|
}
|
|
6638
7173
|
}
|
|
6639
|
-
return allParts.join("");
|
|
7174
|
+
return { text: allParts.join(""), interrupted: llmSignal.aborted };
|
|
6640
7175
|
}
|
|
6641
7176
|
/**
|
|
6642
7177
|
* Non-streaming path (onMessage function / webhook): apply output guardrails,
|
|
@@ -7764,13 +8299,14 @@ function isLoopbackHost(value) {
|
|
|
7764
8299
|
}
|
|
7765
8300
|
return false;
|
|
7766
8301
|
}
|
|
8302
|
+
var TELNYX_FUTURE_SKEW_MS = 3e4;
|
|
7767
8303
|
function validateTelnyxSignature(rawBody, signature, timestamp, publicKey, toleranceSec = 300) {
|
|
7768
8304
|
try {
|
|
7769
8305
|
const ts = parseInt(timestamp, 10);
|
|
7770
8306
|
if (!Number.isFinite(ts)) return false;
|
|
7771
8307
|
const tsMs = ts < 1e12 ? ts * 1e3 : ts;
|
|
7772
8308
|
const ageMs = Date.now() - tsMs;
|
|
7773
|
-
if (ageMs
|
|
8309
|
+
if (ageMs > toleranceSec * 1e3 || ageMs < -TELNYX_FUTURE_SKEW_MS) return false;
|
|
7774
8310
|
const payload = `${timestamp}|${rawBody}`;
|
|
7775
8311
|
const keyBuffer = Buffer.from(publicKey, "base64");
|
|
7776
8312
|
const keyObject = crypto5.createPublicKey({
|
|
@@ -7816,7 +8352,7 @@ function sanitizeVariables(raw) {
|
|
|
7816
8352
|
for (const key of Object.keys(raw)) {
|
|
7817
8353
|
if (BLOCKED_KEYS.has(key)) continue;
|
|
7818
8354
|
const val = raw[key];
|
|
7819
|
-
safe[key] = typeof val === "string" ? val : String(val ?? "");
|
|
8355
|
+
safe[key] = (typeof val === "string" ? val : String(val ?? "")).replace(/[\x00-\x1f\x7f]/g, "").slice(0, 500);
|
|
7820
8356
|
}
|
|
7821
8357
|
return safe;
|
|
7822
8358
|
}
|
|
@@ -8212,6 +8748,9 @@ var EmbeddedServer = class {
|
|
|
8212
8748
|
twilioTokenWarningLogged = false;
|
|
8213
8749
|
telnyxSigWarningLogged = false;
|
|
8214
8750
|
metricsStore;
|
|
8751
|
+
/** Anonymous telemetry client, set by ``client.ts`` ``serve()``; emits the
|
|
8752
|
+
* per-call ``call_completed`` event from the call-end path. */
|
|
8753
|
+
telemetry;
|
|
8215
8754
|
pricing;
|
|
8216
8755
|
remoteHandler = new RemoteMessageHandler();
|
|
8217
8756
|
/**
|
|
@@ -8315,6 +8854,12 @@ var EmbeddedServer = class {
|
|
|
8315
8854
|
* Mirrors Python's ``_resolve_completion``.
|
|
8316
8855
|
*/
|
|
8317
8856
|
resolveCompletion(callId, args) {
|
|
8857
|
+
if (args.outcome === "no_answer" || args.outcome === "busy" || args.outcome === "failed") {
|
|
8858
|
+
recordCallCompleted(this.telemetry, {
|
|
8859
|
+
outcome: args.outcome,
|
|
8860
|
+
carrier: this.config.telephonyProvider
|
|
8861
|
+
});
|
|
8862
|
+
}
|
|
8318
8863
|
const entry = this.completions.get(callId);
|
|
8319
8864
|
if (!entry || entry.done) return;
|
|
8320
8865
|
const data = args.data;
|
|
@@ -9063,7 +9608,13 @@ var EmbeddedServer = class {
|
|
|
9063
9608
|
return Object.fromEntries(Object.entries(snap).filter(([, v]) => v !== void 0));
|
|
9064
9609
|
};
|
|
9065
9610
|
const store = this.metricsStore;
|
|
9611
|
+
const telemetry = this.telemetry;
|
|
9066
9612
|
const wrappedStart = async (data) => {
|
|
9613
|
+
recordCallStarted(telemetry, {
|
|
9614
|
+
providerMode: agent.provider ?? void 0,
|
|
9615
|
+
telephonyProvider: bridge.telephonyProvider,
|
|
9616
|
+
direction: data.direction
|
|
9617
|
+
});
|
|
9067
9618
|
if (logger.enabled) {
|
|
9068
9619
|
const callId = typeof data.call_id === "string" ? data.call_id : "";
|
|
9069
9620
|
const dataCaller = typeof data.caller === "string" ? data.caller : "";
|
|
@@ -9094,6 +9645,11 @@ var EmbeddedServer = class {
|
|
|
9094
9645
|
if (userMetrics) await userMetrics(data);
|
|
9095
9646
|
};
|
|
9096
9647
|
const wrappedEnd = async (data) => {
|
|
9648
|
+
recordCallCompleted(this.telemetry, {
|
|
9649
|
+
outcome: "completed",
|
|
9650
|
+
metrics: data.metrics,
|
|
9651
|
+
direction: data.direction
|
|
9652
|
+
});
|
|
9097
9653
|
if (logger.enabled) {
|
|
9098
9654
|
const callId = typeof data.call_id === "string" ? data.call_id : "";
|
|
9099
9655
|
const metricsObj = data.metrics ?? null;
|
|
@@ -9149,7 +9705,7 @@ var EmbeddedServer = class {
|
|
|
9149
9705
|
await handler.handleCallStart(callSid, customParameters);
|
|
9150
9706
|
} else if (event === "media") {
|
|
9151
9707
|
const payload = data.media?.payload ?? "";
|
|
9152
|
-
handler.handleAudio(Buffer.from(payload, "base64"));
|
|
9708
|
+
await handler.handleAudio(Buffer.from(payload, "base64"));
|
|
9153
9709
|
} else if (event === "mark") {
|
|
9154
9710
|
const markName = String(data.mark?.name ?? "");
|
|
9155
9711
|
if (markName) await handler.onMark(markName);
|
|
@@ -9161,6 +9717,7 @@ var EmbeddedServer = class {
|
|
|
9161
9717
|
}
|
|
9162
9718
|
} catch (err) {
|
|
9163
9719
|
getLogger().error("Stream handler error:", err);
|
|
9720
|
+
handler.recordError(err);
|
|
9164
9721
|
}
|
|
9165
9722
|
});
|
|
9166
9723
|
ws.on("close", async () => {
|
|
@@ -9205,7 +9762,7 @@ var EmbeddedServer = class {
|
|
|
9205
9762
|
if (track !== "inbound") return;
|
|
9206
9763
|
const audioChunk = data.media?.payload ?? "";
|
|
9207
9764
|
if (!audioChunk) return;
|
|
9208
|
-
handler.handleAudio(Buffer.from(audioChunk, "base64"));
|
|
9765
|
+
await handler.handleAudio(Buffer.from(audioChunk, "base64"));
|
|
9209
9766
|
} else if (event === "dtmf") {
|
|
9210
9767
|
const digit = String(data.dtmf?.digit ?? "").trim();
|
|
9211
9768
|
if (digit) {
|
|
@@ -9219,9 +9776,11 @@ var EmbeddedServer = class {
|
|
|
9219
9776
|
}
|
|
9220
9777
|
} catch (err) {
|
|
9221
9778
|
getLogger().error("Stream handler error (Telnyx):", err);
|
|
9779
|
+
handler.recordError(err);
|
|
9222
9780
|
}
|
|
9223
9781
|
});
|
|
9224
9782
|
ws.on("close", async () => {
|
|
9783
|
+
this.activeCallIds.delete(ws);
|
|
9225
9784
|
await handler.handleWsClose();
|
|
9226
9785
|
});
|
|
9227
9786
|
}
|
|
@@ -9250,7 +9809,7 @@ var EmbeddedServer = class {
|
|
|
9250
9809
|
await handler.handleCallStart(callId);
|
|
9251
9810
|
} else if (event === "media") {
|
|
9252
9811
|
const payload = data.media?.payload ?? "";
|
|
9253
|
-
if (payload) handler.handleAudio(Buffer.from(payload, "base64"));
|
|
9812
|
+
if (payload) await handler.handleAudio(Buffer.from(payload, "base64"));
|
|
9254
9813
|
} else if (event === "playedStream") {
|
|
9255
9814
|
const markName = String(data.name ?? "");
|
|
9256
9815
|
if (markName) await handler.onMark(markName);
|
|
@@ -9264,6 +9823,7 @@ var EmbeddedServer = class {
|
|
|
9264
9823
|
}
|
|
9265
9824
|
} catch (err) {
|
|
9266
9825
|
getLogger().error("Stream handler error (Plivo):", err);
|
|
9826
|
+
handler.recordError(err);
|
|
9267
9827
|
}
|
|
9268
9828
|
});
|
|
9269
9829
|
ws.on("close", async () => {
|
|
@@ -9733,7 +10293,7 @@ var OpenAILLMProvider = class {
|
|
|
9733
10293
|
});
|
|
9734
10294
|
if (!response.ok) {
|
|
9735
10295
|
const errText = await response.text();
|
|
9736
|
-
getLogger().error(`LLM API error: ${response.status} ${errText}`);
|
|
10296
|
+
getLogger().error(`LLM API error: ${response.status} ${errText.slice(0, 200)}`);
|
|
9737
10297
|
throw new PatterConnectionError(
|
|
9738
10298
|
`LLM API returned ${response.status}: ${errText.slice(0, 200)}`
|
|
9739
10299
|
);
|
|
@@ -9902,7 +10462,15 @@ ${systemPrompt}` : DEFAULT_PHONE_PREAMBLE;
|
|
|
9902
10462
|
const hasAfterLlmChunk = Boolean(hookExecutor?.hasAfterLlmChunk());
|
|
9903
10463
|
const allEmittedText = [];
|
|
9904
10464
|
const callId = callContext.call_id;
|
|
9905
|
-
const
|
|
10465
|
+
const caller = callContext.caller;
|
|
10466
|
+
const callee = callContext.callee;
|
|
10467
|
+
const hasContext = typeof callId === "string" && callId.length > 0 || typeof caller === "string" && caller.length > 0 || typeof callee === "string" && callee.length > 0;
|
|
10468
|
+
const streamOpts = hasContext ? {
|
|
10469
|
+
...opts,
|
|
10470
|
+
...typeof callId === "string" && callId.length > 0 ? { callId } : {},
|
|
10471
|
+
...typeof caller === "string" && caller.length > 0 ? { caller } : {},
|
|
10472
|
+
...typeof callee === "string" && callee.length > 0 ? { callee } : {}
|
|
10473
|
+
} : opts;
|
|
9906
10474
|
for (let iter = 0; iter < maxIterations; iter++) {
|
|
9907
10475
|
const toolCallsAccumulated = /* @__PURE__ */ new Map();
|
|
9908
10476
|
const textParts = [];
|
|
@@ -10036,6 +10604,7 @@ ${systemPrompt}` : DEFAULT_PHONE_PREAMBLE;
|
|
|
10036
10604
|
{ role: "system", content: this.systemPrompt }
|
|
10037
10605
|
];
|
|
10038
10606
|
for (const entry of history) {
|
|
10607
|
+
if (entry.role === "tool") continue;
|
|
10039
10608
|
messages.push({
|
|
10040
10609
|
role: entry.role === "assistant" ? "assistant" : "user",
|
|
10041
10610
|
content: entry.text
|