@alexkroman1/aai 1.3.0 → 1.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +6 -6
- package/CHANGELOG.md +6 -0
- package/dist/{constants-VTFoymJ-.js → constants-BL3nvg4I.js} +8 -1
- package/dist/host/_pipeline-test-fakes.d.ts +10 -0
- package/dist/host/runtime-barrel.js +111 -45
- package/dist/host/to-vercel-tools.d.ts +4 -3
- package/dist/index.js +2 -2
- package/dist/sdk/constants.d.ts +7 -0
- package/dist/sdk/protocol.js +1 -1
- package/host/_pipeline-test-fakes.ts +34 -0
- package/host/pipeline-session.test.ts +235 -0
- package/host/pipeline-session.ts +126 -42
- package/host/to-vercel-tools.test.ts +35 -1
- package/host/to-vercel-tools.ts +8 -4
- package/package.json +1 -1
- package/sdk/__snapshots__/exports.test.ts.snap +1 -0
- package/sdk/constants.ts +8 -0
package/.turbo/turbo-build.log
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
|
|
2
|
-
> @alexkroman1/aai@1.3.
|
|
2
|
+
> @alexkroman1/aai@1.3.1 build /home/runner/work/agent/agent/packages/aai
|
|
3
3
|
> tsdown && tsc -p tsconfig.build.json
|
|
4
4
|
|
|
5
5
|
[34mℹ[39m [34mtsdown v0.21.7[39m powered by [38;2;255;126;23mrolldown v1.0.0-rc.12[39m
|
|
@@ -8,15 +8,15 @@
|
|
|
8
8
|
[34mℹ[39m target: [34mnode22[39m
|
|
9
9
|
[34mℹ[39m tsconfig: [34mtsconfig.json[39m
|
|
10
10
|
[34mℹ[39m Build start
|
|
11
|
-
[34mℹ[39m [2mdist/[22m[1mhost/runtime-barrel.js[22m [
|
|
12
|
-
[34mℹ[39m [2mdist/[22m[1mindex.js[22m [2m 6.
|
|
11
|
+
[34mℹ[39m [2mdist/[22m[1mhost/runtime-barrel.js[22m [2m63.51 kB[22m [2m│ gzip: 19.17 kB[22m
|
|
12
|
+
[34mℹ[39m [2mdist/[22m[1mindex.js[22m [2m 6.67 kB[22m [2m│ gzip: 2.65 kB[22m
|
|
13
13
|
[34mℹ[39m [2mdist/[22m[1mhost/providers/tts-barrel.js[22m [2m 5.52 kB[22m [2m│ gzip: 2.12 kB[22m
|
|
14
14
|
[34mℹ[39m [2mdist/[22m[1msdk/protocol.js[22m [2m 4.75 kB[22m [2m│ gzip: 1.76 kB[22m
|
|
15
15
|
[34mℹ[39m [2mdist/[22m[1mhost/providers/stt-barrel.js[22m [2m 3.08 kB[22m [2m│ gzip: 1.26 kB[22m
|
|
16
16
|
[34mℹ[39m [2mdist/[22m[1msdk/manifest-barrel.js[22m [2m 0.26 kB[22m [2m│ gzip: 0.17 kB[22m
|
|
17
|
-
[34mℹ[39m [2mdist/[22mconstants-
|
|
17
|
+
[34mℹ[39m [2mdist/[22mconstants-BL3nvg4I.js [2m 3.10 kB[22m [2m│ gzip: 1.38 kB[22m
|
|
18
18
|
[34mℹ[39m [2mdist/[22m_internal-types-CoDTiBd1.js [2m 2.33 kB[22m [2m│ gzip: 0.99 kB[22m
|
|
19
19
|
[34mℹ[39m [2mdist/[22mtypes-Cfx_4QDK.js [2m 1.74 kB[22m [2m│ gzip: 0.93 kB[22m
|
|
20
20
|
[34mℹ[39m [2mdist/[22mws-upgrade-BeOQ7fXL.js [2m 1.14 kB[22m [2m│ gzip: 0.54 kB[22m
|
|
21
|
-
[34mℹ[39m 10 files, total:
|
|
22
|
-
[32m✔[39m Build complete in [
|
|
21
|
+
[34mℹ[39m 10 files, total: 92.10 kB
|
|
22
|
+
[32m✔[39m Build complete in [32m43ms[39m
|
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,11 @@
|
|
|
1
1
|
# @alexkroman1/aai
|
|
2
2
|
|
|
3
|
+
## 1.3.1
|
|
4
|
+
|
|
5
|
+
### Patch Changes
|
|
6
|
+
|
|
7
|
+
- 5a9f3d5: Pipeline session concurrency fixes: serialize turns across duplicate STT finals, bound TTS flush with abort+timeout, cascade provider errors to terminate session, atomic provider open, snapshot conversation history in tool executions.
|
|
8
|
+
|
|
3
9
|
## 1.3.0
|
|
4
10
|
|
|
5
11
|
### Minor Changes
|
|
@@ -21,6 +21,13 @@ const FETCH_TIMEOUT_MS = 15e3;
|
|
|
21
21
|
const RUN_CODE_TIMEOUT_MS = 5e3;
|
|
22
22
|
/** Maximum time to wait for sessions to stop during graceful shutdown. */
|
|
23
23
|
const DEFAULT_SHUTDOWN_TIMEOUT_MS = 3e4;
|
|
24
|
+
/**
|
|
25
|
+
* Maximum time to wait for a pipeline-mode TTS drain after `flush()` before
|
|
26
|
+
* forcing the turn to complete. Prevents a stuck TTS provider from wedging
|
|
27
|
+
* the session. Short relative to `DEFAULT_SHUTDOWN_TIMEOUT_MS` so stop()
|
|
28
|
+
* can still reclaim the socket cleanly.
|
|
29
|
+
*/
|
|
30
|
+
const PIPELINE_FLUSH_TIMEOUT_MS = 1e4;
|
|
24
31
|
/** Maximum length for tool result strings sent to clients. */
|
|
25
32
|
const MAX_TOOL_RESULT_CHARS = 4e3;
|
|
26
33
|
/** Maximum chars for webpage text after HTML-to-text conversion. */
|
|
@@ -44,4 +51,4 @@ const WS_OPEN = 1;
|
|
|
44
51
|
*/
|
|
45
52
|
const AGENT_CSP = "default-src 'self'; script-src 'self' 'unsafe-eval' blob:; style-src 'self' 'unsafe-inline' https://fonts.googleapis.com; connect-src 'self' wss: ws:; img-src 'self' data:; font-src 'self' https://fonts.gstatic.com; object-src 'none'; base-uri 'self'";
|
|
46
53
|
//#endregion
|
|
47
|
-
export {
|
|
54
|
+
export { TOOL_EXECUTION_TIMEOUT_MS as _, DEFAULT_SHUTDOWN_TIMEOUT_MS as a, FETCH_TIMEOUT_MS as c, MAX_PAGE_CHARS as d, MAX_TOOL_RESULT_CHARS as f, RUN_CODE_TIMEOUT_MS as g, PIPELINE_FLUSH_TIMEOUT_MS as h, DEFAULT_SESSION_START_TIMEOUT_MS as i, MAX_HTML_BYTES as l, MAX_WS_PAYLOAD_BYTES as m, DEFAULT_IDLE_TIMEOUT_MS as n, DEFAULT_STT_SAMPLE_RATE as o, MAX_VALUE_SIZE as p, DEFAULT_MAX_HISTORY as r, DEFAULT_TTS_SAMPLE_RATE as s, AGENT_CSP as t, MAX_MESSAGE_BUFFER_SIZE as u, WS_OPEN as v };
|
|
@@ -59,6 +59,16 @@ export type FakeTtsProvider = TtsProvider & {
|
|
|
59
59
|
export declare function createFakeTtsProvider(options?: {
|
|
60
60
|
autoDoneOnFlush?: boolean;
|
|
61
61
|
}): FakeTtsProvider;
|
|
62
|
+
/**
|
|
63
|
+
* Fake STT provider that throws on `open()` with a given error code. Used to
|
|
64
|
+
* test atomic provider open — TTS should not be opened at all when STT fails.
|
|
65
|
+
*/
|
|
66
|
+
export declare function createFailingSttProvider(code: "stt_connect_failed" | "stt_auth_failed" | "stt_stream_error", message: string): SttProvider;
|
|
67
|
+
/**
|
|
68
|
+
* Fake TTS provider that throws on `open()` with a given error code. Used to
|
|
69
|
+
* test atomic provider open — STT should be closed when TTS fails.
|
|
70
|
+
*/
|
|
71
|
+
export declare function createFailingTtsProvider(code: "tts_connect_failed" | "tts_auth_failed" | "tts_stream_error", message: string): TtsProvider;
|
|
62
72
|
/**
|
|
63
73
|
* A scripted stream part. `text` yields a `text-delta` in the LLM provider's
|
|
64
74
|
* raw wire format; `tool-call` / `tool-result` emit the corresponding parts
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { a as DEFAULT_SHUTDOWN_TIMEOUT_MS, c as FETCH_TIMEOUT_MS, d as MAX_PAGE_CHARS, f as MAX_TOOL_RESULT_CHARS, g as
|
|
1
|
+
import { _ as TOOL_EXECUTION_TIMEOUT_MS, a as DEFAULT_SHUTDOWN_TIMEOUT_MS, c as FETCH_TIMEOUT_MS, d as MAX_PAGE_CHARS, f as MAX_TOOL_RESULT_CHARS, g as RUN_CODE_TIMEOUT_MS, h as PIPELINE_FLUSH_TIMEOUT_MS, l as MAX_HTML_BYTES, m as MAX_WS_PAYLOAD_BYTES, o as DEFAULT_STT_SAMPLE_RATE, p as MAX_VALUE_SIZE, s as DEFAULT_TTS_SAMPLE_RATE, t as AGENT_CSP } from "../constants-BL3nvg4I.js";
|
|
2
2
|
import { r as DEFAULT_SYSTEM_PROMPT } from "../types-Cfx_4QDK.js";
|
|
3
3
|
import { i as toolError, n as errorDetail, r as errorMessage, t as parseWsUpgradeParams } from "../ws-upgrade-BeOQ7fXL.js";
|
|
4
4
|
import { ClientMessageSchema, buildReadyConfig, lenientParse } from "../sdk/protocol.js";
|
|
@@ -518,7 +518,7 @@ function toVercelTools(schemas, ctx) {
|
|
|
518
518
|
const opts = {};
|
|
519
519
|
if (signal !== void 0) opts.signal = signal;
|
|
520
520
|
if (options.toolCallId !== void 0) opts.toolCallId = options.toolCallId;
|
|
521
|
-
return ctx.executeTool(schema.name, input, ctx.sessionId, ctx.messages(), opts);
|
|
521
|
+
return ctx.executeTool(schema.name, input, ctx.sessionId, ctx.messages().slice(), opts);
|
|
522
522
|
}
|
|
523
523
|
});
|
|
524
524
|
return out;
|
|
@@ -612,11 +612,30 @@ function createPipelineSession(opts) {
|
|
|
612
612
|
});
|
|
613
613
|
const sessionAbort = new AbortController();
|
|
614
614
|
let audioReady = false;
|
|
615
|
+
let terminated = false;
|
|
615
616
|
let turnController = null;
|
|
616
617
|
let nextReplyId = 0;
|
|
617
618
|
const sttSubs = [];
|
|
618
619
|
const ttsSubs = [];
|
|
620
|
+
/**
|
|
621
|
+
* Tear down the session after an unrecoverable provider error. Aborts the
|
|
622
|
+
* in-flight turn, cancels TTS, signals providers to close via sessionAbort,
|
|
623
|
+
* and flips `terminated` so future STT events and audio frames become
|
|
624
|
+
* no-ops. Idempotent.
|
|
625
|
+
*/
|
|
626
|
+
function terminate() {
|
|
627
|
+
if (terminated) return;
|
|
628
|
+
terminated = true;
|
|
629
|
+
if (turnController !== null) {
|
|
630
|
+
turnController.abort();
|
|
631
|
+
turnController = null;
|
|
632
|
+
}
|
|
633
|
+
ctx.tts?.cancel();
|
|
634
|
+
ctx.cancelReply();
|
|
635
|
+
sessionAbort.abort();
|
|
636
|
+
}
|
|
619
637
|
function onSttPartial(_text) {
|
|
638
|
+
if (terminated) return;
|
|
620
639
|
if (turnController === null) return;
|
|
621
640
|
log.info("Pipeline barge-in", { sessionId: opts.id });
|
|
622
641
|
turnController.abort();
|
|
@@ -626,8 +645,17 @@ function createPipelineSession(opts) {
|
|
|
626
645
|
client.event({ type: "cancelled" });
|
|
627
646
|
}
|
|
628
647
|
function onSttFinal(text) {
|
|
648
|
+
if (terminated) return;
|
|
629
649
|
const trimmed = text.trim();
|
|
630
650
|
if (trimmed.length === 0) return;
|
|
651
|
+
if (turnController !== null) {
|
|
652
|
+
log.info("Pipeline replacing in-flight turn", { sessionId: opts.id });
|
|
653
|
+
turnController.abort();
|
|
654
|
+
turnController = null;
|
|
655
|
+
ctx.tts?.cancel();
|
|
656
|
+
ctx.cancelReply();
|
|
657
|
+
client.event({ type: "cancelled" });
|
|
658
|
+
}
|
|
631
659
|
client.event({
|
|
632
660
|
type: "user_transcript",
|
|
633
661
|
text
|
|
@@ -641,20 +669,24 @@ function createPipelineSession(opts) {
|
|
|
641
669
|
ctx.chainTurn(turn);
|
|
642
670
|
}
|
|
643
671
|
function onSttError(err) {
|
|
672
|
+
if (terminated) return;
|
|
644
673
|
log.error("STT error", {
|
|
645
674
|
code: err.code,
|
|
646
675
|
message: err.message,
|
|
647
676
|
sessionId: opts.id
|
|
648
677
|
});
|
|
649
678
|
emitError(client, "stt", err.message);
|
|
679
|
+
terminate();
|
|
650
680
|
}
|
|
651
681
|
function onTtsError(err) {
|
|
682
|
+
if (terminated) return;
|
|
652
683
|
log.error("TTS error", {
|
|
653
684
|
code: err.code,
|
|
654
685
|
message: err.message,
|
|
655
686
|
sessionId: opts.id
|
|
656
687
|
});
|
|
657
688
|
emitError(client, "tts", err.message);
|
|
689
|
+
terminate();
|
|
658
690
|
}
|
|
659
691
|
async function consumeLlmStream(ctl, messages, tools, onDelta) {
|
|
660
692
|
const deps = {
|
|
@@ -689,14 +721,48 @@ function createPipelineSession(opts) {
|
|
|
689
721
|
}
|
|
690
722
|
}
|
|
691
723
|
}
|
|
692
|
-
|
|
724
|
+
/**
|
|
725
|
+
* Flush TTS and wait for drain. Resolves on any of:
|
|
726
|
+
* - TTS emits `done`
|
|
727
|
+
* - `signal` aborts (barge-in, provider error, session stop)
|
|
728
|
+
* - `PIPELINE_FLUSH_TIMEOUT_MS` elapses
|
|
729
|
+
* Resolves immediately if no TTS session.
|
|
730
|
+
*/
|
|
731
|
+
function flushTtsAndWait(signal) {
|
|
693
732
|
const tts = ctx.tts;
|
|
694
733
|
if (!tts) return Promise.resolve();
|
|
695
734
|
return new Promise((resolve) => {
|
|
696
|
-
|
|
697
|
-
|
|
735
|
+
let off = null;
|
|
736
|
+
let timer = null;
|
|
737
|
+
const cleanup = () => {
|
|
738
|
+
if (off) {
|
|
739
|
+
off();
|
|
740
|
+
off = null;
|
|
741
|
+
}
|
|
742
|
+
if (timer) {
|
|
743
|
+
clearTimeout(timer);
|
|
744
|
+
timer = null;
|
|
745
|
+
}
|
|
746
|
+
signal.removeEventListener("abort", onAbort);
|
|
747
|
+
};
|
|
748
|
+
const finish = () => {
|
|
749
|
+
cleanup();
|
|
698
750
|
resolve();
|
|
699
|
-
}
|
|
751
|
+
};
|
|
752
|
+
const onAbort = () => finish();
|
|
753
|
+
if (signal.aborted) {
|
|
754
|
+
resolve();
|
|
755
|
+
return;
|
|
756
|
+
}
|
|
757
|
+
signal.addEventListener("abort", onAbort, { once: true });
|
|
758
|
+
off = tts.on("done", finish);
|
|
759
|
+
timer = setTimeout(() => {
|
|
760
|
+
log.warn("TTS flush timeout", {
|
|
761
|
+
sessionId: opts.id,
|
|
762
|
+
timeoutMs: PIPELINE_FLUSH_TIMEOUT_MS
|
|
763
|
+
});
|
|
764
|
+
finish();
|
|
765
|
+
}, PIPELINE_FLUSH_TIMEOUT_MS);
|
|
700
766
|
tts.flush();
|
|
701
767
|
});
|
|
702
768
|
}
|
|
@@ -724,7 +790,7 @@ function createPipelineSession(opts) {
|
|
|
724
790
|
if (turnController === ctl) turnController = null;
|
|
725
791
|
return;
|
|
726
792
|
}
|
|
727
|
-
await flushTtsAndWait();
|
|
793
|
+
await flushTtsAndWait(ctl.signal);
|
|
728
794
|
if (ctl.signal.aborted) {
|
|
729
795
|
if (turnController === ctl) turnController = null;
|
|
730
796
|
return;
|
|
@@ -737,6 +803,35 @@ function createPipelineSession(opts) {
|
|
|
737
803
|
client.event({ type: "reply_done" });
|
|
738
804
|
if (turnController === ctl) turnController = null;
|
|
739
805
|
}
|
|
806
|
+
function reportOpenRejection(which, reason) {
|
|
807
|
+
const msg = errorMessage(reason);
|
|
808
|
+
log.error(`${which === "stt" ? "STT" : "TTS"} open failed`, {
|
|
809
|
+
error: msg,
|
|
810
|
+
sessionId: opts.id
|
|
811
|
+
});
|
|
812
|
+
emitError(client, which, msg);
|
|
813
|
+
}
|
|
814
|
+
async function adoptStt(sttSession, teardown) {
|
|
815
|
+
if (teardown) {
|
|
816
|
+
await sttSession.close().catch(() => void 0);
|
|
817
|
+
return;
|
|
818
|
+
}
|
|
819
|
+
ctx.stt = sttSession;
|
|
820
|
+
sttSubs.push(sttSession.on("partial", onSttPartial));
|
|
821
|
+
sttSubs.push(sttSession.on("final", onSttFinal));
|
|
822
|
+
sttSubs.push(sttSession.on("error", onSttError));
|
|
823
|
+
}
|
|
824
|
+
async function adoptTts(ttsSession, teardown) {
|
|
825
|
+
if (teardown) {
|
|
826
|
+
await ttsSession.close().catch(() => void 0);
|
|
827
|
+
return;
|
|
828
|
+
}
|
|
829
|
+
ctx.tts = ttsSession;
|
|
830
|
+
ttsSubs.push(ttsSession.on("audio", (pcm) => {
|
|
831
|
+
client.playAudioChunk(new Uint8Array(pcm.buffer, pcm.byteOffset, pcm.byteLength));
|
|
832
|
+
}));
|
|
833
|
+
ttsSubs.push(ttsSession.on("error", onTtsError));
|
|
834
|
+
}
|
|
740
835
|
async function openProviders() {
|
|
741
836
|
const [sttResult, ttsResult] = await Promise.allSettled([opts.stt.open({
|
|
742
837
|
sampleRate,
|
|
@@ -748,47 +843,15 @@ function createPipelineSession(opts) {
|
|
|
748
843
|
apiKey: opts.ttsApiKey,
|
|
749
844
|
signal: sessionAbort.signal
|
|
750
845
|
})]);
|
|
751
|
-
if (sttResult.status === "rejected")
|
|
752
|
-
|
|
753
|
-
log.error("STT open failed", {
|
|
754
|
-
error: msg,
|
|
755
|
-
sessionId: opts.id
|
|
756
|
-
});
|
|
757
|
-
emitError(client, "stt", msg);
|
|
758
|
-
}
|
|
759
|
-
if (ttsResult.status === "rejected") {
|
|
760
|
-
const msg = errorMessage(ttsResult.reason);
|
|
761
|
-
log.error("TTS open failed", {
|
|
762
|
-
error: msg,
|
|
763
|
-
sessionId: opts.id
|
|
764
|
-
});
|
|
765
|
-
emitError(client, "tts", msg);
|
|
766
|
-
}
|
|
846
|
+
if (sttResult.status === "rejected") reportOpenRejection("stt", sttResult.reason);
|
|
847
|
+
if (ttsResult.status === "rejected") reportOpenRejection("tts", ttsResult.reason);
|
|
767
848
|
const aborted = sessionAbort.signal.aborted;
|
|
768
849
|
const sttFailed = sttResult.status === "rejected";
|
|
769
850
|
const ttsFailed = ttsResult.status === "rejected";
|
|
770
851
|
const teardown = aborted || sttFailed || ttsFailed;
|
|
771
|
-
if (sttResult.status === "fulfilled")
|
|
772
|
-
|
|
773
|
-
|
|
774
|
-
else {
|
|
775
|
-
ctx.stt = sttSession;
|
|
776
|
-
sttSubs.push(sttSession.on("partial", onSttPartial));
|
|
777
|
-
sttSubs.push(sttSession.on("final", onSttFinal));
|
|
778
|
-
sttSubs.push(sttSession.on("error", onSttError));
|
|
779
|
-
}
|
|
780
|
-
}
|
|
781
|
-
if (ttsResult.status === "fulfilled") {
|
|
782
|
-
const ttsSession = ttsResult.value;
|
|
783
|
-
if (teardown) await ttsSession.close().catch(() => void 0);
|
|
784
|
-
else {
|
|
785
|
-
ctx.tts = ttsSession;
|
|
786
|
-
ttsSubs.push(ttsSession.on("audio", (pcm) => {
|
|
787
|
-
client.playAudioChunk(new Uint8Array(pcm.buffer, pcm.byteOffset, pcm.byteLength));
|
|
788
|
-
}));
|
|
789
|
-
ttsSubs.push(ttsSession.on("error", onTtsError));
|
|
790
|
-
}
|
|
791
|
-
}
|
|
852
|
+
if (sttResult.status === "fulfilled") await adoptStt(sttResult.value, teardown);
|
|
853
|
+
if (ttsResult.status === "fulfilled") await adoptTts(ttsResult.value, teardown);
|
|
854
|
+
if (!aborted && (sttFailed || ttsFailed)) terminate();
|
|
792
855
|
}
|
|
793
856
|
return {
|
|
794
857
|
async start() {
|
|
@@ -807,7 +870,7 @@ function createPipelineSession(opts) {
|
|
|
807
870
|
await ctx.tts?.close().catch(() => {});
|
|
808
871
|
},
|
|
809
872
|
onAudio(data) {
|
|
810
|
-
if (!audioReady) return;
|
|
873
|
+
if (terminated || !audioReady) return;
|
|
811
874
|
const offset = data.byteOffset;
|
|
812
875
|
const length = data.byteLength;
|
|
813
876
|
let pcm;
|
|
@@ -823,6 +886,7 @@ function createPipelineSession(opts) {
|
|
|
823
886
|
audioReady = true;
|
|
824
887
|
},
|
|
825
888
|
onCancel() {
|
|
889
|
+
if (terminated) return;
|
|
826
890
|
turnController?.abort();
|
|
827
891
|
turnController = null;
|
|
828
892
|
ctx.tts?.cancel();
|
|
@@ -830,6 +894,7 @@ function createPipelineSession(opts) {
|
|
|
830
894
|
client.event({ type: "cancelled" });
|
|
831
895
|
},
|
|
832
896
|
onReset() {
|
|
897
|
+
if (terminated) return;
|
|
833
898
|
turnController?.abort();
|
|
834
899
|
turnController = null;
|
|
835
900
|
ctx.tts?.cancel();
|
|
@@ -839,6 +904,7 @@ function createPipelineSession(opts) {
|
|
|
839
904
|
client.event({ type: "reset" });
|
|
840
905
|
},
|
|
841
906
|
onHistory(incoming) {
|
|
907
|
+
if (terminated) return;
|
|
842
908
|
ctx.pushMessages(...incoming.map((m) => ({
|
|
843
909
|
role: m.role,
|
|
844
910
|
content: m.content
|
|
@@ -22,9 +22,10 @@ export interface ToVercelToolsContext {
|
|
|
22
22
|
/** Session id threaded to {@link executeTool}. */
|
|
23
23
|
sessionId: string;
|
|
24
24
|
/**
|
|
25
|
-
* Returns the current conversation history at call-time.
|
|
26
|
-
*
|
|
27
|
-
*
|
|
25
|
+
* Returns the current conversation history at call-time. The orchestrator
|
|
26
|
+
* calls this per invocation; `toVercelTools` snapshots the returned array
|
|
27
|
+
* before forwarding to `executeTool` so concurrent mutations cannot leak
|
|
28
|
+
* across tool calls.
|
|
28
29
|
*/
|
|
29
30
|
messages: () => readonly Message[];
|
|
30
31
|
/**
|
package/dist/index.js
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { _ as
|
|
1
|
+
import { _ as TOOL_EXECUTION_TIMEOUT_MS, a as DEFAULT_SHUTDOWN_TIMEOUT_MS, c as FETCH_TIMEOUT_MS, d as MAX_PAGE_CHARS, f as MAX_TOOL_RESULT_CHARS, g as RUN_CODE_TIMEOUT_MS, h as PIPELINE_FLUSH_TIMEOUT_MS, i as DEFAULT_SESSION_START_TIMEOUT_MS, l as MAX_HTML_BYTES, m as MAX_WS_PAYLOAD_BYTES, n as DEFAULT_IDLE_TIMEOUT_MS, o as DEFAULT_STT_SAMPLE_RATE, p as MAX_VALUE_SIZE, r as DEFAULT_MAX_HISTORY, s as DEFAULT_TTS_SAMPLE_RATE, t as AGENT_CSP, u as MAX_MESSAGE_BUFFER_SIZE, v as WS_OPEN } from "./constants-BL3nvg4I.js";
|
|
2
2
|
import { i as ToolChoiceSchema, n as DEFAULT_GREETING, r as DEFAULT_SYSTEM_PROMPT, t as BuiltinToolSchema } from "./types-Cfx_4QDK.js";
|
|
3
3
|
import { i as toolError, n as errorDetail, r as errorMessage, t as parseWsUpgradeParams } from "./ws-upgrade-BeOQ7fXL.js";
|
|
4
4
|
//#region sdk/allowed-hosts.ts
|
|
@@ -154,4 +154,4 @@ function agent(def) {
|
|
|
154
154
|
};
|
|
155
155
|
}
|
|
156
156
|
//#endregion
|
|
157
|
-
export { AGENT_CSP, BuiltinToolSchema, DEFAULT_GREETING, DEFAULT_IDLE_TIMEOUT_MS, DEFAULT_MAX_HISTORY, DEFAULT_SESSION_START_TIMEOUT_MS, DEFAULT_SHUTDOWN_TIMEOUT_MS, DEFAULT_STT_SAMPLE_RATE, DEFAULT_SYSTEM_PROMPT, DEFAULT_TTS_SAMPLE_RATE, FETCH_TIMEOUT_MS, MAX_HTML_BYTES, MAX_MESSAGE_BUFFER_SIZE, MAX_PAGE_CHARS, MAX_TOOL_RESULT_CHARS, MAX_VALUE_SIZE, MAX_WS_PAYLOAD_BYTES, RUN_CODE_TIMEOUT_MS, TOOL_EXECUTION_TIMEOUT_MS, ToolChoiceSchema, WS_OPEN, agent, errorDetail, errorMessage, matchesAllowedHost, parseWsUpgradeParams, tool, toolError, validateAllowedHostPattern };
|
|
157
|
+
export { AGENT_CSP, BuiltinToolSchema, DEFAULT_GREETING, DEFAULT_IDLE_TIMEOUT_MS, DEFAULT_MAX_HISTORY, DEFAULT_SESSION_START_TIMEOUT_MS, DEFAULT_SHUTDOWN_TIMEOUT_MS, DEFAULT_STT_SAMPLE_RATE, DEFAULT_SYSTEM_PROMPT, DEFAULT_TTS_SAMPLE_RATE, FETCH_TIMEOUT_MS, MAX_HTML_BYTES, MAX_MESSAGE_BUFFER_SIZE, MAX_PAGE_CHARS, MAX_TOOL_RESULT_CHARS, MAX_VALUE_SIZE, MAX_WS_PAYLOAD_BYTES, PIPELINE_FLUSH_TIMEOUT_MS, RUN_CODE_TIMEOUT_MS, TOOL_EXECUTION_TIMEOUT_MS, ToolChoiceSchema, WS_OPEN, agent, errorDetail, errorMessage, matchesAllowedHost, parseWsUpgradeParams, tool, toolError, validateAllowedHostPattern };
|
package/dist/sdk/constants.d.ts
CHANGED
|
@@ -20,6 +20,13 @@ export declare const FETCH_TIMEOUT_MS = 15000;
|
|
|
20
20
|
export declare const RUN_CODE_TIMEOUT_MS = 5000;
|
|
21
21
|
/** Maximum time to wait for sessions to stop during graceful shutdown. */
|
|
22
22
|
export declare const DEFAULT_SHUTDOWN_TIMEOUT_MS = 30000;
|
|
23
|
+
/**
|
|
24
|
+
* Maximum time to wait for a pipeline-mode TTS drain after `flush()` before
|
|
25
|
+
* forcing the turn to complete. Prevents a stuck TTS provider from wedging
|
|
26
|
+
* the session. Short relative to `DEFAULT_SHUTDOWN_TIMEOUT_MS` so stop()
|
|
27
|
+
* can still reclaim the socket cleanly.
|
|
28
|
+
*/
|
|
29
|
+
export declare const PIPELINE_FLUSH_TIMEOUT_MS = 10000;
|
|
23
30
|
/** Maximum length for tool result strings sent to clients. */
|
|
24
31
|
export declare const MAX_TOOL_RESULT_CHARS = 4000;
|
|
25
32
|
/** Maximum chars for webpage text after HTML-to-text conversion. */
|
package/dist/sdk/protocol.js
CHANGED
|
@@ -168,6 +168,40 @@ export function createFakeTtsProvider(
|
|
|
168
168
|
};
|
|
169
169
|
}
|
|
170
170
|
|
|
171
|
+
/**
|
|
172
|
+
* Fake STT provider that throws on `open()` with a given error code. Used to
|
|
173
|
+
* test atomic provider open — TTS should not be opened at all when STT fails.
|
|
174
|
+
*/
|
|
175
|
+
export function createFailingSttProvider(
|
|
176
|
+
code: "stt_connect_failed" | "stt_auth_failed" | "stt_stream_error",
|
|
177
|
+
message: string,
|
|
178
|
+
): SttProvider {
|
|
179
|
+
return {
|
|
180
|
+
name: "failing-stt",
|
|
181
|
+
async open(): Promise<SttSession> {
|
|
182
|
+
const err = Object.assign(new Error(message), { code }) as Error & { code: typeof code };
|
|
183
|
+
throw err;
|
|
184
|
+
},
|
|
185
|
+
};
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
/**
|
|
189
|
+
* Fake TTS provider that throws on `open()` with a given error code. Used to
|
|
190
|
+
* test atomic provider open — STT should be closed when TTS fails.
|
|
191
|
+
*/
|
|
192
|
+
export function createFailingTtsProvider(
|
|
193
|
+
code: "tts_connect_failed" | "tts_auth_failed" | "tts_stream_error",
|
|
194
|
+
message: string,
|
|
195
|
+
): TtsProvider {
|
|
196
|
+
return {
|
|
197
|
+
name: "failing-tts",
|
|
198
|
+
async open(): Promise<TtsSession> {
|
|
199
|
+
const err = Object.assign(new Error(message), { code }) as Error & { code: typeof code };
|
|
200
|
+
throw err;
|
|
201
|
+
},
|
|
202
|
+
};
|
|
203
|
+
}
|
|
204
|
+
|
|
171
205
|
// ─── Fake LLM ───────────────────────────────────────────────────────────────
|
|
172
206
|
|
|
173
207
|
/**
|
|
@@ -6,6 +6,8 @@ import type { AgentConfig } from "../sdk/_internal-types.ts";
|
|
|
6
6
|
import type { ClientEvent } from "../sdk/protocol.ts";
|
|
7
7
|
import { DEFAULT_SYSTEM_PROMPT } from "../sdk/types.ts";
|
|
8
8
|
import {
|
|
9
|
+
createFailingSttProvider,
|
|
10
|
+
createFailingTtsProvider,
|
|
9
11
|
createFakeLanguageModel,
|
|
10
12
|
createFakeSttProvider,
|
|
11
13
|
createFakeTtsProvider,
|
|
@@ -335,3 +337,236 @@ describe("createPipelineSession — STT error", () => {
|
|
|
335
337
|
await session.stop();
|
|
336
338
|
});
|
|
337
339
|
});
|
|
340
|
+
|
|
341
|
+
describe("createPipelineSession — duplicate final", () => {
|
|
342
|
+
test("second final during AGENT_REPLYING aborts prior turn and starts new one", async () => {
|
|
343
|
+
// Multi-part first step with delay so the first turn is still streaming
|
|
344
|
+
// when the second final arrives.
|
|
345
|
+
const steps: ScriptedPart[][] = [
|
|
346
|
+
[
|
|
347
|
+
{ type: "text", text: "first " },
|
|
348
|
+
{ type: "text", text: "reply " },
|
|
349
|
+
{ type: "text", text: "continues" },
|
|
350
|
+
],
|
|
351
|
+
[{ type: "text", text: "second reply" }],
|
|
352
|
+
];
|
|
353
|
+
const { opts, stt, tts, client } = makeOpts({
|
|
354
|
+
llm: createFakeLanguageModel({ steps, delayMs: 20 }),
|
|
355
|
+
});
|
|
356
|
+
|
|
357
|
+
const session = createPipelineSession(opts);
|
|
358
|
+
await session.start();
|
|
359
|
+
|
|
360
|
+
const sttSession = stt.last();
|
|
361
|
+
const ttsSession = tts.last();
|
|
362
|
+
if (!(sttSession && ttsSession)) throw new Error("providers didn't open");
|
|
363
|
+
|
|
364
|
+
sttSession.fireFinal("first question");
|
|
365
|
+
await vi.waitFor(() => {
|
|
366
|
+
expect(ttsSession.sendText.mock.calls.length).toBeGreaterThan(0);
|
|
367
|
+
});
|
|
368
|
+
|
|
369
|
+
// Second final arrives mid-reply.
|
|
370
|
+
sttSession.fireFinal("second question");
|
|
371
|
+
await session.waitForTurn();
|
|
372
|
+
|
|
373
|
+
// TTS.cancel fires once to abandon the first turn's audio.
|
|
374
|
+
expect(ttsSession.cancel).toHaveBeenCalledTimes(1);
|
|
375
|
+
|
|
376
|
+
// Both user transcripts reach the client.
|
|
377
|
+
const userTranscripts = client.events.filter(
|
|
378
|
+
(e) => (e as ClientEvent).type === "user_transcript",
|
|
379
|
+
);
|
|
380
|
+
expect(userTranscripts).toHaveLength(2);
|
|
381
|
+
|
|
382
|
+
// Second reply's text was synthesized.
|
|
383
|
+
expect(ttsSession.textChunks).toContain("second reply");
|
|
384
|
+
|
|
385
|
+
// Exactly one reply_done (for the second turn).
|
|
386
|
+
const replyDones = client.events.filter((e) => (e as ClientEvent).type === "reply_done");
|
|
387
|
+
expect(replyDones).toHaveLength(1);
|
|
388
|
+
|
|
389
|
+
await session.stop();
|
|
390
|
+
});
|
|
391
|
+
});
|
|
392
|
+
|
|
393
|
+
describe("createPipelineSession — flush timeout/abort", () => {
|
|
394
|
+
test("flush that never drains does not wedge stop()", async () => {
|
|
395
|
+
// autoDoneOnFlush: false → TTS never fires `done`, so flushTtsAndWait must
|
|
396
|
+
// resolve via the turn-abort signal when stop() fires.
|
|
397
|
+
const script: ScriptedPart[] = [{ type: "text", text: "hi" }];
|
|
398
|
+
const tts = createFakeTtsProvider({ autoDoneOnFlush: false });
|
|
399
|
+
const { opts, stt, client } = makeOpts({
|
|
400
|
+
llm: createFakeLanguageModel({ script }),
|
|
401
|
+
tts,
|
|
402
|
+
});
|
|
403
|
+
|
|
404
|
+
const session = createPipelineSession(opts);
|
|
405
|
+
await session.start();
|
|
406
|
+
|
|
407
|
+
const sttSession = stt.last();
|
|
408
|
+
const ttsSession = tts.last();
|
|
409
|
+
if (!(sttSession && ttsSession)) throw new Error("providers didn't open");
|
|
410
|
+
|
|
411
|
+
sttSession.fireFinal("hi");
|
|
412
|
+
// Wait until the turn has reached the flush step — without this guard,
|
|
413
|
+
// stop() aborts the controller before flushTtsAndWait is even called.
|
|
414
|
+
await vi.waitFor(() => {
|
|
415
|
+
expect(ttsSession.flush).toHaveBeenCalledTimes(1);
|
|
416
|
+
});
|
|
417
|
+
await session.stop();
|
|
418
|
+
|
|
419
|
+
// Turn aborted before reply_done could fire.
|
|
420
|
+
const types = eventTypes(client.events);
|
|
421
|
+
expect(types).not.toContain("reply_done");
|
|
422
|
+
});
|
|
423
|
+
});
|
|
424
|
+
|
|
425
|
+
describe("createPipelineSession — mid-session provider errors", () => {
|
|
426
|
+
test("STT error during reply aborts turn and stops further transcripts", async () => {
|
|
427
|
+
const script: ScriptedPart[] = [{ type: "text", text: "reply" }];
|
|
428
|
+
const { opts, stt, tts, client } = makeOpts({
|
|
429
|
+
llm: createFakeLanguageModel({ script, delayMs: 20 }),
|
|
430
|
+
});
|
|
431
|
+
|
|
432
|
+
const session = createPipelineSession(opts);
|
|
433
|
+
await session.start();
|
|
434
|
+
|
|
435
|
+
const sttSession = stt.last();
|
|
436
|
+
const ttsSession = tts.last();
|
|
437
|
+
if (!(sttSession && ttsSession)) throw new Error("providers didn't open");
|
|
438
|
+
|
|
439
|
+
sttSession.fireFinal("first");
|
|
440
|
+
await vi.waitFor(() => {
|
|
441
|
+
expect(ttsSession.sendText.mock.calls.length).toBeGreaterThan(0);
|
|
442
|
+
});
|
|
443
|
+
sttSession.fireError("stt_stream_error", "socket died");
|
|
444
|
+
await session.waitForTurn();
|
|
445
|
+
|
|
446
|
+
const errors = client.events.filter((e) => (e as ClientEvent).type === "error");
|
|
447
|
+
expect(errors).toHaveLength(1);
|
|
448
|
+
expect(errors[0]).toMatchObject({ code: "stt", message: "socket died" });
|
|
449
|
+
|
|
450
|
+
// Turn was aborted (TTS cancelled).
|
|
451
|
+
expect(ttsSession.cancel).toHaveBeenCalled();
|
|
452
|
+
|
|
453
|
+
// Further STT events are no-ops.
|
|
454
|
+
sttSession.fireFinal("ignored after error");
|
|
455
|
+
await session.waitForTurn();
|
|
456
|
+
const userTranscripts = client.events.filter(
|
|
457
|
+
(e) => (e as ClientEvent).type === "user_transcript",
|
|
458
|
+
);
|
|
459
|
+
expect(userTranscripts).toHaveLength(1);
|
|
460
|
+
|
|
461
|
+
await session.stop();
|
|
462
|
+
});
|
|
463
|
+
|
|
464
|
+
test("TTS error during reply aborts turn and stops further user transcripts", async () => {
|
|
465
|
+
const script: ScriptedPart[] = [{ type: "text", text: "reply" }];
|
|
466
|
+
const { opts, stt, tts, client } = makeOpts({
|
|
467
|
+
llm: createFakeLanguageModel({ script, delayMs: 20 }),
|
|
468
|
+
});
|
|
469
|
+
|
|
470
|
+
const session = createPipelineSession(opts);
|
|
471
|
+
await session.start();
|
|
472
|
+
|
|
473
|
+
const sttSession = stt.last();
|
|
474
|
+
const ttsSession = tts.last();
|
|
475
|
+
if (!(sttSession && ttsSession)) throw new Error("providers didn't open");
|
|
476
|
+
|
|
477
|
+
sttSession.fireFinal("first");
|
|
478
|
+
await vi.waitFor(() => {
|
|
479
|
+
expect(ttsSession.sendText.mock.calls.length).toBeGreaterThan(0);
|
|
480
|
+
});
|
|
481
|
+
ttsSession.fireError("tts_stream_error", "socket died");
|
|
482
|
+
await session.waitForTurn();
|
|
483
|
+
|
|
484
|
+
const errors = client.events.filter((e) => (e as ClientEvent).type === "error");
|
|
485
|
+
expect(errors).toHaveLength(1);
|
|
486
|
+
expect(errors[0]).toMatchObject({ code: "tts", message: "socket died" });
|
|
487
|
+
|
|
488
|
+
sttSession.fireFinal("should be ignored");
|
|
489
|
+
await session.waitForTurn();
|
|
490
|
+
const userTranscripts = client.events.filter(
|
|
491
|
+
(e) => (e as ClientEvent).type === "user_transcript",
|
|
492
|
+
);
|
|
493
|
+
expect(userTranscripts).toHaveLength(1);
|
|
494
|
+
|
|
495
|
+
await session.stop();
|
|
496
|
+
});
|
|
497
|
+
|
|
498
|
+
test("cancel/reset/history are no-ops after terminate", async () => {
|
|
499
|
+
const { opts, stt, client } = makeOpts();
|
|
500
|
+
const session = createPipelineSession(opts);
|
|
501
|
+
await session.start();
|
|
502
|
+
|
|
503
|
+
const sttSession = stt.last();
|
|
504
|
+
if (!sttSession) throw new Error("STT didn't open");
|
|
505
|
+
|
|
506
|
+
sttSession.fireError("stt_stream_error", "dead");
|
|
507
|
+
await session.waitForTurn();
|
|
508
|
+
|
|
509
|
+
const eventsBefore = client.events.length;
|
|
510
|
+
|
|
511
|
+
session.onCancel();
|
|
512
|
+
session.onReset();
|
|
513
|
+
session.onHistory([{ role: "user", content: "nope" }]);
|
|
514
|
+
|
|
515
|
+
expect(client.events).toHaveLength(eventsBefore);
|
|
516
|
+
|
|
517
|
+
await session.stop();
|
|
518
|
+
});
|
|
519
|
+
});
|
|
520
|
+
|
|
521
|
+
describe("createPipelineSession — atomic provider open", () => {
|
|
522
|
+
test("STT is closed when TTS open fails, session becomes terminated", async () => {
|
|
523
|
+
const stt = createFakeSttProvider();
|
|
524
|
+
const failingTts = createFailingTtsProvider("tts_connect_failed", "bad key");
|
|
525
|
+
|
|
526
|
+
const { opts, client } = makeOpts({ stt, tts: failingTts });
|
|
527
|
+
const session = createPipelineSession(opts);
|
|
528
|
+
await session.start();
|
|
529
|
+
|
|
530
|
+
const sttSession = stt.last();
|
|
531
|
+
expect(sttSession).toBeDefined();
|
|
532
|
+
expect(sttSession?.closed.value).toBe(true);
|
|
533
|
+
|
|
534
|
+
const errors = client.events.filter((e) => (e as ClientEvent).type === "error");
|
|
535
|
+
expect(errors).toHaveLength(1);
|
|
536
|
+
expect(errors[0]).toMatchObject({ code: "tts", message: "bad key" });
|
|
537
|
+
|
|
538
|
+
// Session terminated — further STT events are no-ops (even though
|
|
539
|
+
// listeners were never wired, terminate() also ensures onCancel etc. work).
|
|
540
|
+
sttSession?.fireFinal("ignored");
|
|
541
|
+
await session.waitForTurn();
|
|
542
|
+
const userTranscripts = client.events.filter(
|
|
543
|
+
(e) => (e as ClientEvent).type === "user_transcript",
|
|
544
|
+
);
|
|
545
|
+
expect(userTranscripts).toHaveLength(0);
|
|
546
|
+
|
|
547
|
+
await session.stop();
|
|
548
|
+
});
|
|
549
|
+
|
|
550
|
+
test("TTS is never opened when STT open fails", async () => {
|
|
551
|
+
const failingStt = createFailingSttProvider("stt_connect_failed", "bad key");
|
|
552
|
+
const tts = createFakeTtsProvider();
|
|
553
|
+
const ttsOpenSpy = vi.spyOn(tts, "open");
|
|
554
|
+
|
|
555
|
+
const { opts, client } = makeOpts({ stt: failingStt, tts });
|
|
556
|
+
const session = createPipelineSession(opts);
|
|
557
|
+
await session.start();
|
|
558
|
+
|
|
559
|
+
// STT and TTS open concurrently via Promise.allSettled — TTS.open is
|
|
560
|
+
// still called, but once STT fails its result is discarded and the TTS
|
|
561
|
+
// session is closed.
|
|
562
|
+
expect(ttsOpenSpy).toHaveBeenCalledTimes(1);
|
|
563
|
+
const ttsSession = tts.last();
|
|
564
|
+
expect(ttsSession?.closed.value).toBe(true);
|
|
565
|
+
|
|
566
|
+
const errors = client.events.filter((e) => (e as ClientEvent).type === "error");
|
|
567
|
+
expect(errors).toHaveLength(1);
|
|
568
|
+
expect(errors[0]).toMatchObject({ code: "stt", message: "bad key" });
|
|
569
|
+
|
|
570
|
+
await session.stop();
|
|
571
|
+
});
|
|
572
|
+
});
|
package/host/pipeline-session.ts
CHANGED
|
@@ -10,11 +10,12 @@
|
|
|
10
10
|
import type { LanguageModel, ModelMessage } from "ai";
|
|
11
11
|
import { stepCountIs, streamText } from "ai";
|
|
12
12
|
import type { AgentConfig, ExecuteTool, ToolSchema } from "../sdk/_internal-types.ts";
|
|
13
|
-
import { DEFAULT_STT_SAMPLE_RATE } from "../sdk/constants.ts";
|
|
13
|
+
import { DEFAULT_STT_SAMPLE_RATE, PIPELINE_FLUSH_TIMEOUT_MS } from "../sdk/constants.ts";
|
|
14
14
|
import type { ClientSink, SessionErrorCode } from "../sdk/protocol.ts";
|
|
15
15
|
import type {
|
|
16
16
|
SttError,
|
|
17
17
|
SttProvider,
|
|
18
|
+
SttSession,
|
|
18
19
|
TtsError,
|
|
19
20
|
TtsProvider,
|
|
20
21
|
TtsSession,
|
|
@@ -157,12 +158,32 @@ export function createPipelineSession(opts: PipelineSessionOptions): Session {
|
|
|
157
158
|
|
|
158
159
|
const sessionAbort = new AbortController();
|
|
159
160
|
let audioReady = false;
|
|
161
|
+
let terminated = false;
|
|
160
162
|
let turnController: AbortController | null = null;
|
|
161
163
|
let nextReplyId = 0;
|
|
162
164
|
const sttSubs: Unsubscribe[] = [];
|
|
163
165
|
const ttsSubs: Unsubscribe[] = [];
|
|
164
166
|
|
|
167
|
+
/**
|
|
168
|
+
* Tear down the session after an unrecoverable provider error. Aborts the
|
|
169
|
+
* in-flight turn, cancels TTS, signals providers to close via sessionAbort,
|
|
170
|
+
* and flips `terminated` so future STT events and audio frames become
|
|
171
|
+
* no-ops. Idempotent.
|
|
172
|
+
*/
|
|
173
|
+
function terminate(): void {
|
|
174
|
+
if (terminated) return;
|
|
175
|
+
terminated = true;
|
|
176
|
+
if (turnController !== null) {
|
|
177
|
+
turnController.abort();
|
|
178
|
+
turnController = null;
|
|
179
|
+
}
|
|
180
|
+
ctx.tts?.cancel();
|
|
181
|
+
ctx.cancelReply();
|
|
182
|
+
sessionAbort.abort();
|
|
183
|
+
}
|
|
184
|
+
|
|
165
185
|
function onSttPartial(_text: string): void {
|
|
186
|
+
if (terminated) return;
|
|
166
187
|
if (turnController === null) return;
|
|
167
188
|
log.info("Pipeline barge-in", { sessionId: opts.id });
|
|
168
189
|
turnController.abort();
|
|
@@ -173,8 +194,22 @@ export function createPipelineSession(opts: PipelineSessionOptions): Session {
|
|
|
173
194
|
}
|
|
174
195
|
|
|
175
196
|
function onSttFinal(text: string): void {
|
|
197
|
+
if (terminated) return;
|
|
176
198
|
const trimmed = text.trim();
|
|
177
199
|
if (trimmed.length === 0) return;
|
|
200
|
+
// If a prior turn is still running (duplicate/late STT final or a
|
|
201
|
+
// second utterance without an intervening partial), abort it before
|
|
202
|
+
// launching a new one. Matches LiveKit's `current_speech.interrupt()`
|
|
203
|
+
// and Pipecat's `InterruptionFrame` broadcast: single-slot current
|
|
204
|
+
// turn, replace in-flight rather than queue.
|
|
205
|
+
if (turnController !== null) {
|
|
206
|
+
log.info("Pipeline replacing in-flight turn", { sessionId: opts.id });
|
|
207
|
+
turnController.abort();
|
|
208
|
+
turnController = null;
|
|
209
|
+
ctx.tts?.cancel();
|
|
210
|
+
ctx.cancelReply();
|
|
211
|
+
client.event({ type: "cancelled" });
|
|
212
|
+
}
|
|
178
213
|
client.event({ type: "user_transcript", text });
|
|
179
214
|
const turn = runTurn(trimmed).catch((err: unknown) => {
|
|
180
215
|
log.error("Pipeline turn crashed", { error: errorMessage(err), sessionId: opts.id });
|
|
@@ -183,13 +218,17 @@ export function createPipelineSession(opts: PipelineSessionOptions): Session {
|
|
|
183
218
|
}
|
|
184
219
|
|
|
185
220
|
function onSttError(err: SttError): void {
|
|
221
|
+
if (terminated) return;
|
|
186
222
|
log.error("STT error", { code: err.code, message: err.message, sessionId: opts.id });
|
|
187
223
|
emitError(client, "stt", err.message);
|
|
224
|
+
terminate();
|
|
188
225
|
}
|
|
189
226
|
|
|
190
227
|
function onTtsError(err: TtsError): void {
|
|
228
|
+
if (terminated) return;
|
|
191
229
|
log.error("TTS error", { code: err.code, message: err.message, sessionId: opts.id });
|
|
192
230
|
emitError(client, "tts", err.message);
|
|
231
|
+
terminate();
|
|
193
232
|
}
|
|
194
233
|
|
|
195
234
|
async function consumeLlmStream(
|
|
@@ -231,14 +270,48 @@ export function createPipelineSession(opts: PipelineSessionOptions): Session {
|
|
|
231
270
|
}
|
|
232
271
|
}
|
|
233
272
|
|
|
234
|
-
|
|
273
|
+
/**
|
|
274
|
+
* Flush TTS and wait for drain. Resolves on any of:
|
|
275
|
+
* - TTS emits `done`
|
|
276
|
+
* - `signal` aborts (barge-in, provider error, session stop)
|
|
277
|
+
* - `PIPELINE_FLUSH_TIMEOUT_MS` elapses
|
|
278
|
+
* Resolves immediately if no TTS session.
|
|
279
|
+
*/
|
|
280
|
+
function flushTtsAndWait(signal: AbortSignal): Promise<void> {
|
|
235
281
|
const tts = ctx.tts;
|
|
236
282
|
if (!tts) return Promise.resolve();
|
|
237
283
|
return new Promise<void>((resolve) => {
|
|
238
|
-
|
|
239
|
-
|
|
284
|
+
let off: Unsubscribe | null = null;
|
|
285
|
+
let timer: ReturnType<typeof setTimeout> | null = null;
|
|
286
|
+
const cleanup = () => {
|
|
287
|
+
if (off) {
|
|
288
|
+
off();
|
|
289
|
+
off = null;
|
|
290
|
+
}
|
|
291
|
+
if (timer) {
|
|
292
|
+
clearTimeout(timer);
|
|
293
|
+
timer = null;
|
|
294
|
+
}
|
|
295
|
+
signal.removeEventListener("abort", onAbort);
|
|
296
|
+
};
|
|
297
|
+
const finish = () => {
|
|
298
|
+
cleanup();
|
|
240
299
|
resolve();
|
|
241
|
-
}
|
|
300
|
+
};
|
|
301
|
+
const onAbort = () => finish();
|
|
302
|
+
if (signal.aborted) {
|
|
303
|
+
resolve();
|
|
304
|
+
return;
|
|
305
|
+
}
|
|
306
|
+
signal.addEventListener("abort", onAbort, { once: true });
|
|
307
|
+
off = tts.on("done", finish);
|
|
308
|
+
timer = setTimeout(() => {
|
|
309
|
+
log.warn("TTS flush timeout", {
|
|
310
|
+
sessionId: opts.id,
|
|
311
|
+
timeoutMs: PIPELINE_FLUSH_TIMEOUT_MS,
|
|
312
|
+
});
|
|
313
|
+
finish();
|
|
314
|
+
}, PIPELINE_FLUSH_TIMEOUT_MS);
|
|
242
315
|
tts.flush();
|
|
243
316
|
});
|
|
244
317
|
}
|
|
@@ -269,7 +342,7 @@ export function createPipelineSession(opts: PipelineSessionOptions): Session {
|
|
|
269
342
|
return;
|
|
270
343
|
}
|
|
271
344
|
|
|
272
|
-
await flushTtsAndWait();
|
|
345
|
+
await flushTtsAndWait(ctl.signal);
|
|
273
346
|
|
|
274
347
|
if (ctl.signal.aborted) {
|
|
275
348
|
if (turnController === ctl) turnController = null;
|
|
@@ -284,6 +357,40 @@ export function createPipelineSession(opts: PipelineSessionOptions): Session {
|
|
|
284
357
|
if (turnController === ctl) turnController = null;
|
|
285
358
|
}
|
|
286
359
|
|
|
360
|
+
function reportOpenRejection(which: "stt" | "tts", reason: unknown): void {
|
|
361
|
+
const msg = errorMessage(reason);
|
|
362
|
+
log.error(`${which === "stt" ? "STT" : "TTS"} open failed`, {
|
|
363
|
+
error: msg,
|
|
364
|
+
sessionId: opts.id,
|
|
365
|
+
});
|
|
366
|
+
emitError(client, which, msg);
|
|
367
|
+
}
|
|
368
|
+
|
|
369
|
+
async function adoptStt(sttSession: SttSession, teardown: boolean): Promise<void> {
|
|
370
|
+
if (teardown) {
|
|
371
|
+
await sttSession.close().catch(() => undefined);
|
|
372
|
+
return;
|
|
373
|
+
}
|
|
374
|
+
ctx.stt = sttSession;
|
|
375
|
+
sttSubs.push(sttSession.on("partial", onSttPartial));
|
|
376
|
+
sttSubs.push(sttSession.on("final", onSttFinal));
|
|
377
|
+
sttSubs.push(sttSession.on("error", onSttError));
|
|
378
|
+
}
|
|
379
|
+
|
|
380
|
+
async function adoptTts(ttsSession: TtsSession, teardown: boolean): Promise<void> {
|
|
381
|
+
if (teardown) {
|
|
382
|
+
await ttsSession.close().catch(() => undefined);
|
|
383
|
+
return;
|
|
384
|
+
}
|
|
385
|
+
ctx.tts = ttsSession;
|
|
386
|
+
ttsSubs.push(
|
|
387
|
+
ttsSession.on("audio", (pcm) => {
|
|
388
|
+
client.playAudioChunk(new Uint8Array(pcm.buffer, pcm.byteOffset, pcm.byteLength));
|
|
389
|
+
}),
|
|
390
|
+
);
|
|
391
|
+
ttsSubs.push(ttsSession.on("error", onTtsError));
|
|
392
|
+
}
|
|
393
|
+
|
|
287
394
|
async function openProviders(): Promise<void> {
|
|
288
395
|
const [sttResult, ttsResult] = await Promise.allSettled([
|
|
289
396
|
opts.stt.open({
|
|
@@ -299,47 +406,21 @@ export function createPipelineSession(opts: PipelineSessionOptions): Session {
|
|
|
299
406
|
}),
|
|
300
407
|
]);
|
|
301
408
|
|
|
302
|
-
if (sttResult.status === "rejected")
|
|
303
|
-
|
|
304
|
-
log.error("STT open failed", { error: msg, sessionId: opts.id });
|
|
305
|
-
emitError(client, "stt", msg);
|
|
306
|
-
}
|
|
307
|
-
if (ttsResult.status === "rejected") {
|
|
308
|
-
const msg = errorMessage(ttsResult.reason);
|
|
309
|
-
log.error("TTS open failed", { error: msg, sessionId: opts.id });
|
|
310
|
-
emitError(client, "tts", msg);
|
|
311
|
-
}
|
|
409
|
+
if (sttResult.status === "rejected") reportOpenRejection("stt", sttResult.reason);
|
|
410
|
+
if (ttsResult.status === "rejected") reportOpenRejection("tts", ttsResult.reason);
|
|
312
411
|
|
|
313
412
|
const aborted = sessionAbort.signal.aborted;
|
|
314
413
|
const sttFailed = sttResult.status === "rejected";
|
|
315
414
|
const ttsFailed = ttsResult.status === "rejected";
|
|
316
415
|
const teardown = aborted || sttFailed || ttsFailed;
|
|
317
416
|
|
|
318
|
-
if (sttResult.status === "fulfilled")
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
sttSubs.push(sttSession.on("final", onSttFinal));
|
|
326
|
-
sttSubs.push(sttSession.on("error", onSttError));
|
|
327
|
-
}
|
|
328
|
-
}
|
|
329
|
-
if (ttsResult.status === "fulfilled") {
|
|
330
|
-
const ttsSession = ttsResult.value;
|
|
331
|
-
if (teardown) {
|
|
332
|
-
await ttsSession.close().catch(() => undefined);
|
|
333
|
-
} else {
|
|
334
|
-
ctx.tts = ttsSession;
|
|
335
|
-
ttsSubs.push(
|
|
336
|
-
ttsSession.on("audio", (pcm) => {
|
|
337
|
-
client.playAudioChunk(new Uint8Array(pcm.buffer, pcm.byteOffset, pcm.byteLength));
|
|
338
|
-
}),
|
|
339
|
-
);
|
|
340
|
-
ttsSubs.push(ttsSession.on("error", onTtsError));
|
|
341
|
-
}
|
|
342
|
-
}
|
|
417
|
+
if (sttResult.status === "fulfilled") await adoptStt(sttResult.value, teardown);
|
|
418
|
+
if (ttsResult.status === "fulfilled") await adoptTts(ttsResult.value, teardown);
|
|
419
|
+
|
|
420
|
+
// If either provider failed (but the session wasn't itself aborted),
|
|
421
|
+
// mark the session terminated so subsequent events become no-ops.
|
|
422
|
+
// Aborted-by-stop() sessions don't need terminate() — stop() handles cleanup.
|
|
423
|
+
if (!aborted && (sttFailed || ttsFailed)) terminate();
|
|
343
424
|
}
|
|
344
425
|
|
|
345
426
|
return {
|
|
@@ -363,7 +444,7 @@ export function createPipelineSession(opts: PipelineSessionOptions): Session {
|
|
|
363
444
|
});
|
|
364
445
|
},
|
|
365
446
|
onAudio(data: Uint8Array): void {
|
|
366
|
-
if (!audioReady) return;
|
|
447
|
+
if (terminated || !audioReady) return;
|
|
367
448
|
const offset = data.byteOffset;
|
|
368
449
|
const length = data.byteLength;
|
|
369
450
|
let pcm: Int16Array;
|
|
@@ -380,6 +461,7 @@ export function createPipelineSession(opts: PipelineSessionOptions): Session {
|
|
|
380
461
|
audioReady = true;
|
|
381
462
|
},
|
|
382
463
|
onCancel(): void {
|
|
464
|
+
if (terminated) return;
|
|
383
465
|
turnController?.abort();
|
|
384
466
|
turnController = null;
|
|
385
467
|
ctx.tts?.cancel();
|
|
@@ -387,6 +469,7 @@ export function createPipelineSession(opts: PipelineSessionOptions): Session {
|
|
|
387
469
|
client.event({ type: "cancelled" });
|
|
388
470
|
},
|
|
389
471
|
onReset(): void {
|
|
472
|
+
if (terminated) return;
|
|
390
473
|
turnController?.abort();
|
|
391
474
|
turnController = null;
|
|
392
475
|
ctx.tts?.cancel();
|
|
@@ -396,6 +479,7 @@ export function createPipelineSession(opts: PipelineSessionOptions): Session {
|
|
|
396
479
|
client.event({ type: "reset" });
|
|
397
480
|
},
|
|
398
481
|
onHistory(incoming: readonly { role: "user" | "assistant"; content: string }[]): void {
|
|
482
|
+
if (terminated) return;
|
|
399
483
|
ctx.pushMessages(...incoming.map((m) => ({ role: m.role, content: m.content })));
|
|
400
484
|
},
|
|
401
485
|
waitForTurn(): Promise<void> {
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
// Copyright 2025 the AAI authors. MIT license.
|
|
2
2
|
import { describe, expect, test, vi } from "vitest";
|
|
3
|
-
import type { ToolSchema } from "../sdk/_internal-types.ts";
|
|
3
|
+
import type { ExecuteTool, ToolSchema } from "../sdk/_internal-types.ts";
|
|
4
|
+
import type { Message } from "../sdk/types.ts";
|
|
4
5
|
import { toVercelTools } from "./to-vercel-tools.ts";
|
|
5
6
|
|
|
6
7
|
const schemas: ToolSchema[] = [
|
|
@@ -151,3 +152,36 @@ describe("toVercelTools", () => {
|
|
|
151
152
|
expect(receivedCallId).toBe("tc-3");
|
|
152
153
|
});
|
|
153
154
|
});
|
|
155
|
+
|
|
156
|
+
describe("toVercelTools — message snapshot isolation", () => {
|
|
157
|
+
test("tool execute sees a snapshot, not a live ref to messages array", async () => {
|
|
158
|
+
const messagesBox = { messages: [{ role: "user" as const, content: "first" }] };
|
|
159
|
+
let observedInsideExecute: readonly Message[] | undefined;
|
|
160
|
+
|
|
161
|
+
const executeTool: ExecuteTool = async (_name, _args, _sid, msgs) => {
|
|
162
|
+
observedInsideExecute = msgs;
|
|
163
|
+
// Mutate the original array; the snapshot we captured must be unaffected.
|
|
164
|
+
messagesBox.messages.push({ role: "user", content: "second" });
|
|
165
|
+
return "ok";
|
|
166
|
+
};
|
|
167
|
+
|
|
168
|
+
const tools = toVercelTools(
|
|
169
|
+
[{ name: "t", description: "", parameters: { type: "object", properties: {} } }],
|
|
170
|
+
{
|
|
171
|
+
executeTool,
|
|
172
|
+
sessionId: "s",
|
|
173
|
+
messages: () => messagesBox.messages,
|
|
174
|
+
},
|
|
175
|
+
);
|
|
176
|
+
|
|
177
|
+
const t = tools.t;
|
|
178
|
+
if (!t?.execute) throw new Error("tool.execute missing");
|
|
179
|
+
await t.execute({}, { toolCallId: "c1", messages: [] });
|
|
180
|
+
|
|
181
|
+
// The caller-observable messages array has 2 entries after the push.
|
|
182
|
+
expect(messagesBox.messages).toHaveLength(2);
|
|
183
|
+
// But the snapshot the tool executed against was frozen at length 1.
|
|
184
|
+
expect(observedInsideExecute).toHaveLength(1);
|
|
185
|
+
expect(observedInsideExecute?.[0]).toMatchObject({ content: "first" });
|
|
186
|
+
});
|
|
187
|
+
});
|
package/host/to-vercel-tools.ts
CHANGED
|
@@ -25,9 +25,10 @@ export interface ToVercelToolsContext {
|
|
|
25
25
|
/** Session id threaded to {@link executeTool}. */
|
|
26
26
|
sessionId: string;
|
|
27
27
|
/**
|
|
28
|
-
* Returns the current conversation history at call-time.
|
|
29
|
-
*
|
|
30
|
-
*
|
|
28
|
+
* Returns the current conversation history at call-time. The orchestrator
|
|
29
|
+
* calls this per invocation; `toVercelTools` snapshots the returned array
|
|
30
|
+
* before forwarding to `executeTool` so concurrent mutations cannot leak
|
|
31
|
+
* across tool calls.
|
|
31
32
|
*/
|
|
32
33
|
messages: () => readonly Message[];
|
|
33
34
|
/**
|
|
@@ -62,7 +63,10 @@ export function toVercelTools(
|
|
|
62
63
|
const opts: ExecuteToolOptions = {};
|
|
63
64
|
if (signal !== undefined) opts.signal = signal;
|
|
64
65
|
if (options.toolCallId !== undefined) opts.toolCallId = options.toolCallId;
|
|
65
|
-
|
|
66
|
+
// Snapshot the messages array so concurrent mutation (e.g. a new
|
|
67
|
+
// turn starting after this one was aborted) can't leak into this
|
|
68
|
+
// tool's view of history.
|
|
69
|
+
return ctx.executeTool(schema.name, input, ctx.sessionId, ctx.messages().slice(), opts);
|
|
66
70
|
},
|
|
67
71
|
});
|
|
68
72
|
}
|
package/package.json
CHANGED
package/sdk/constants.ts
CHANGED
|
@@ -34,6 +34,14 @@ export const RUN_CODE_TIMEOUT_MS = 5000;
|
|
|
34
34
|
/** Maximum time to wait for sessions to stop during graceful shutdown. */
|
|
35
35
|
export const DEFAULT_SHUTDOWN_TIMEOUT_MS = 30_000;
|
|
36
36
|
|
|
37
|
+
/**
|
|
38
|
+
* Maximum time to wait for a pipeline-mode TTS drain after `flush()` before
|
|
39
|
+
* forcing the turn to complete. Prevents a stuck TTS provider from wedging
|
|
40
|
+
* the session. Short relative to `DEFAULT_SHUTDOWN_TIMEOUT_MS` so stop()
|
|
41
|
+
* can still reclaim the socket cleanly.
|
|
42
|
+
*/
|
|
43
|
+
export const PIPELINE_FLUSH_TIMEOUT_MS = 10_000;
|
|
44
|
+
|
|
37
45
|
// ─── Size / length limits ────────────────────────────────────────────────
|
|
38
46
|
|
|
39
47
|
/** Maximum length for tool result strings sent to clients. */
|