@alexkroman1/aai 1.3.0 → 1.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,5 @@
1
1
 
2
- > @alexkroman1/aai@1.3.0 build /home/runner/work/agent/agent/packages/aai
2
+ > @alexkroman1/aai@1.3.1 build /home/runner/work/agent/agent/packages/aai
3
3
  > tsdown && tsc -p tsconfig.build.json
4
4
 
5
5
  ℹ tsdown v0.21.7 powered by rolldown v1.0.0-rc.12
@@ -8,15 +8,15 @@
8
8
  ℹ target: node22
9
9
  ℹ tsconfig: tsconfig.json
10
10
  ℹ Build start
11
- ℹ dist/host/runtime-barrel.js 61.50 kB │ gzip: 18.59 kB
12
- ℹ dist/index.js  6.62 kB │ gzip: 2.63 kB
11
+ ℹ dist/host/runtime-barrel.js 63.51 kB │ gzip: 19.17 kB
12
+ ℹ dist/index.js  6.67 kB │ gzip: 2.65 kB
13
13
  ℹ dist/host/providers/tts-barrel.js  5.52 kB │ gzip: 2.12 kB
14
14
  ℹ dist/sdk/protocol.js  4.75 kB │ gzip: 1.76 kB
15
15
  ℹ dist/host/providers/stt-barrel.js  3.08 kB │ gzip: 1.26 kB
16
16
  ℹ dist/sdk/manifest-barrel.js  0.26 kB │ gzip: 0.17 kB
17
- ℹ dist/constants-VTFoymJ-.js  2.75 kB │ gzip: 1.23 kB
17
+ ℹ dist/constants-BL3nvg4I.js  3.10 kB │ gzip: 1.38 kB
18
18
  ℹ dist/_internal-types-CoDTiBd1.js  2.33 kB │ gzip: 0.99 kB
19
19
  ℹ dist/types-Cfx_4QDK.js  1.74 kB │ gzip: 0.93 kB
20
20
  ℹ dist/ws-upgrade-BeOQ7fXL.js  1.14 kB │ gzip: 0.54 kB
21
- ℹ 10 files, total: 89.68 kB
22
- ✔ Build complete in 46ms
21
+ ℹ 10 files, total: 92.10 kB
22
+ ✔ Build complete in 43ms
package/CHANGELOG.md CHANGED
@@ -1,5 +1,11 @@
1
1
  # @alexkroman1/aai
2
2
 
3
+ ## 1.3.1
4
+
5
+ ### Patch Changes
6
+
7
+ - 5a9f3d5: Pipeline session concurrency fixes: serialize turns across duplicate STT finals, bound TTS flush with abort+timeout, cascade provider errors to terminate session, atomic provider open, snapshot conversation history in tool executions.
8
+
3
9
  ## 1.3.0
4
10
 
5
11
  ### Minor Changes
@@ -21,6 +21,13 @@ const FETCH_TIMEOUT_MS = 15e3;
21
21
  const RUN_CODE_TIMEOUT_MS = 5e3;
22
22
  /** Maximum time to wait for sessions to stop during graceful shutdown. */
23
23
  const DEFAULT_SHUTDOWN_TIMEOUT_MS = 3e4;
24
+ /**
25
+ * Maximum time to wait for a pipeline-mode TTS drain after `flush()` before
26
+ * forcing the turn to complete. Prevents a stuck TTS provider from wedging
27
+ * the session. Short relative to `DEFAULT_SHUTDOWN_TIMEOUT_MS` so stop()
28
+ * can still reclaim the socket cleanly.
29
+ */
30
+ const PIPELINE_FLUSH_TIMEOUT_MS = 1e4;
24
31
  /** Maximum length for tool result strings sent to clients. */
25
32
  const MAX_TOOL_RESULT_CHARS = 4e3;
26
33
  /** Maximum chars for webpage text after HTML-to-text conversion. */
@@ -44,4 +51,4 @@ const WS_OPEN = 1;
44
51
  */
45
52
  const AGENT_CSP = "default-src 'self'; script-src 'self' 'unsafe-eval' blob:; style-src 'self' 'unsafe-inline' https://fonts.googleapis.com; connect-src 'self' wss: ws:; img-src 'self' data:; font-src 'self' https://fonts.gstatic.com; object-src 'none'; base-uri 'self'";
46
53
  //#endregion
47
- export { WS_OPEN as _, DEFAULT_SHUTDOWN_TIMEOUT_MS as a, FETCH_TIMEOUT_MS as c, MAX_PAGE_CHARS as d, MAX_TOOL_RESULT_CHARS as f, TOOL_EXECUTION_TIMEOUT_MS as g, RUN_CODE_TIMEOUT_MS as h, DEFAULT_SESSION_START_TIMEOUT_MS as i, MAX_HTML_BYTES as l, MAX_WS_PAYLOAD_BYTES as m, DEFAULT_IDLE_TIMEOUT_MS as n, DEFAULT_STT_SAMPLE_RATE as o, MAX_VALUE_SIZE as p, DEFAULT_MAX_HISTORY as r, DEFAULT_TTS_SAMPLE_RATE as s, AGENT_CSP as t, MAX_MESSAGE_BUFFER_SIZE as u };
54
+ export { TOOL_EXECUTION_TIMEOUT_MS as _, DEFAULT_SHUTDOWN_TIMEOUT_MS as a, FETCH_TIMEOUT_MS as c, MAX_PAGE_CHARS as d, MAX_TOOL_RESULT_CHARS as f, RUN_CODE_TIMEOUT_MS as g, PIPELINE_FLUSH_TIMEOUT_MS as h, DEFAULT_SESSION_START_TIMEOUT_MS as i, MAX_HTML_BYTES as l, MAX_WS_PAYLOAD_BYTES as m, DEFAULT_IDLE_TIMEOUT_MS as n, DEFAULT_STT_SAMPLE_RATE as o, MAX_VALUE_SIZE as p, DEFAULT_MAX_HISTORY as r, DEFAULT_TTS_SAMPLE_RATE as s, AGENT_CSP as t, MAX_MESSAGE_BUFFER_SIZE as u, WS_OPEN as v };
@@ -59,6 +59,16 @@ export type FakeTtsProvider = TtsProvider & {
59
59
  export declare function createFakeTtsProvider(options?: {
60
60
  autoDoneOnFlush?: boolean;
61
61
  }): FakeTtsProvider;
62
+ /**
63
+ * Fake STT provider that throws on `open()` with a given error code. Used to
64
+ * test atomic provider open — TTS should not be opened at all when STT fails.
65
+ */
66
+ export declare function createFailingSttProvider(code: "stt_connect_failed" | "stt_auth_failed" | "stt_stream_error", message: string): SttProvider;
67
+ /**
68
+ * Fake TTS provider that throws on `open()` with a given error code. Used to
69
+ * test atomic provider open — STT should be closed when TTS fails.
70
+ */
71
+ export declare function createFailingTtsProvider(code: "tts_connect_failed" | "tts_auth_failed" | "tts_stream_error", message: string): TtsProvider;
62
72
  /**
63
73
  * A scripted stream part. `text` yields a `text-delta` in the LLM provider's
64
74
  * raw wire format; `tool-call` / `tool-result` emit the corresponding parts
@@ -1,4 +1,4 @@
1
- import { a as DEFAULT_SHUTDOWN_TIMEOUT_MS, c as FETCH_TIMEOUT_MS, d as MAX_PAGE_CHARS, f as MAX_TOOL_RESULT_CHARS, g as TOOL_EXECUTION_TIMEOUT_MS, h as RUN_CODE_TIMEOUT_MS, l as MAX_HTML_BYTES, m as MAX_WS_PAYLOAD_BYTES, o as DEFAULT_STT_SAMPLE_RATE, p as MAX_VALUE_SIZE, s as DEFAULT_TTS_SAMPLE_RATE, t as AGENT_CSP } from "../constants-VTFoymJ-.js";
1
+ import { _ as TOOL_EXECUTION_TIMEOUT_MS, a as DEFAULT_SHUTDOWN_TIMEOUT_MS, c as FETCH_TIMEOUT_MS, d as MAX_PAGE_CHARS, f as MAX_TOOL_RESULT_CHARS, g as RUN_CODE_TIMEOUT_MS, h as PIPELINE_FLUSH_TIMEOUT_MS, l as MAX_HTML_BYTES, m as MAX_WS_PAYLOAD_BYTES, o as DEFAULT_STT_SAMPLE_RATE, p as MAX_VALUE_SIZE, s as DEFAULT_TTS_SAMPLE_RATE, t as AGENT_CSP } from "../constants-BL3nvg4I.js";
2
2
  import { r as DEFAULT_SYSTEM_PROMPT } from "../types-Cfx_4QDK.js";
3
3
  import { i as toolError, n as errorDetail, r as errorMessage, t as parseWsUpgradeParams } from "../ws-upgrade-BeOQ7fXL.js";
4
4
  import { ClientMessageSchema, buildReadyConfig, lenientParse } from "../sdk/protocol.js";
@@ -518,7 +518,7 @@ function toVercelTools(schemas, ctx) {
518
518
  const opts = {};
519
519
  if (signal !== void 0) opts.signal = signal;
520
520
  if (options.toolCallId !== void 0) opts.toolCallId = options.toolCallId;
521
- return ctx.executeTool(schema.name, input, ctx.sessionId, ctx.messages(), opts);
521
+ return ctx.executeTool(schema.name, input, ctx.sessionId, ctx.messages().slice(), opts);
522
522
  }
523
523
  });
524
524
  return out;
@@ -612,11 +612,30 @@ function createPipelineSession(opts) {
612
612
  });
613
613
  const sessionAbort = new AbortController();
614
614
  let audioReady = false;
615
+ let terminated = false;
615
616
  let turnController = null;
616
617
  let nextReplyId = 0;
617
618
  const sttSubs = [];
618
619
  const ttsSubs = [];
620
+ /**
621
+ * Tear down the session after an unrecoverable provider error. Aborts the
622
+ * in-flight turn, cancels TTS, signals providers to close via sessionAbort,
623
+ * and flips `terminated` so future STT events and audio frames become
624
+ * no-ops. Idempotent.
625
+ */
626
+ function terminate() {
627
+ if (terminated) return;
628
+ terminated = true;
629
+ if (turnController !== null) {
630
+ turnController.abort();
631
+ turnController = null;
632
+ }
633
+ ctx.tts?.cancel();
634
+ ctx.cancelReply();
635
+ sessionAbort.abort();
636
+ }
619
637
  function onSttPartial(_text) {
638
+ if (terminated) return;
620
639
  if (turnController === null) return;
621
640
  log.info("Pipeline barge-in", { sessionId: opts.id });
622
641
  turnController.abort();
@@ -626,8 +645,17 @@ function createPipelineSession(opts) {
626
645
  client.event({ type: "cancelled" });
627
646
  }
628
647
  function onSttFinal(text) {
648
+ if (terminated) return;
629
649
  const trimmed = text.trim();
630
650
  if (trimmed.length === 0) return;
651
+ if (turnController !== null) {
652
+ log.info("Pipeline replacing in-flight turn", { sessionId: opts.id });
653
+ turnController.abort();
654
+ turnController = null;
655
+ ctx.tts?.cancel();
656
+ ctx.cancelReply();
657
+ client.event({ type: "cancelled" });
658
+ }
631
659
  client.event({
632
660
  type: "user_transcript",
633
661
  text
@@ -641,20 +669,24 @@ function createPipelineSession(opts) {
641
669
  ctx.chainTurn(turn);
642
670
  }
643
671
  function onSttError(err) {
672
+ if (terminated) return;
644
673
  log.error("STT error", {
645
674
  code: err.code,
646
675
  message: err.message,
647
676
  sessionId: opts.id
648
677
  });
649
678
  emitError(client, "stt", err.message);
679
+ terminate();
650
680
  }
651
681
  function onTtsError(err) {
682
+ if (terminated) return;
652
683
  log.error("TTS error", {
653
684
  code: err.code,
654
685
  message: err.message,
655
686
  sessionId: opts.id
656
687
  });
657
688
  emitError(client, "tts", err.message);
689
+ terminate();
658
690
  }
659
691
  async function consumeLlmStream(ctl, messages, tools, onDelta) {
660
692
  const deps = {
@@ -689,14 +721,48 @@ function createPipelineSession(opts) {
689
721
  }
690
722
  }
691
723
  }
692
- function flushTtsAndWait() {
724
+ /**
725
+ * Flush TTS and wait for drain. Resolves on any of:
726
+ * - TTS emits `done`
727
+ * - `signal` aborts (barge-in, provider error, session stop)
728
+ * - `PIPELINE_FLUSH_TIMEOUT_MS` elapses
729
+ * Resolves immediately if no TTS session.
730
+ */
731
+ function flushTtsAndWait(signal) {
693
732
  const tts = ctx.tts;
694
733
  if (!tts) return Promise.resolve();
695
734
  return new Promise((resolve) => {
696
- const off = tts.on("done", () => {
697
- off();
735
+ let off = null;
736
+ let timer = null;
737
+ const cleanup = () => {
738
+ if (off) {
739
+ off();
740
+ off = null;
741
+ }
742
+ if (timer) {
743
+ clearTimeout(timer);
744
+ timer = null;
745
+ }
746
+ signal.removeEventListener("abort", onAbort);
747
+ };
748
+ const finish = () => {
749
+ cleanup();
698
750
  resolve();
699
- });
751
+ };
752
+ const onAbort = () => finish();
753
+ if (signal.aborted) {
754
+ resolve();
755
+ return;
756
+ }
757
+ signal.addEventListener("abort", onAbort, { once: true });
758
+ off = tts.on("done", finish);
759
+ timer = setTimeout(() => {
760
+ log.warn("TTS flush timeout", {
761
+ sessionId: opts.id,
762
+ timeoutMs: PIPELINE_FLUSH_TIMEOUT_MS
763
+ });
764
+ finish();
765
+ }, PIPELINE_FLUSH_TIMEOUT_MS);
700
766
  tts.flush();
701
767
  });
702
768
  }
@@ -724,7 +790,7 @@ function createPipelineSession(opts) {
724
790
  if (turnController === ctl) turnController = null;
725
791
  return;
726
792
  }
727
- await flushTtsAndWait();
793
+ await flushTtsAndWait(ctl.signal);
728
794
  if (ctl.signal.aborted) {
729
795
  if (turnController === ctl) turnController = null;
730
796
  return;
@@ -737,6 +803,35 @@ function createPipelineSession(opts) {
737
803
  client.event({ type: "reply_done" });
738
804
  if (turnController === ctl) turnController = null;
739
805
  }
806
+ function reportOpenRejection(which, reason) {
807
+ const msg = errorMessage(reason);
808
+ log.error(`${which === "stt" ? "STT" : "TTS"} open failed`, {
809
+ error: msg,
810
+ sessionId: opts.id
811
+ });
812
+ emitError(client, which, msg);
813
+ }
814
+ async function adoptStt(sttSession, teardown) {
815
+ if (teardown) {
816
+ await sttSession.close().catch(() => void 0);
817
+ return;
818
+ }
819
+ ctx.stt = sttSession;
820
+ sttSubs.push(sttSession.on("partial", onSttPartial));
821
+ sttSubs.push(sttSession.on("final", onSttFinal));
822
+ sttSubs.push(sttSession.on("error", onSttError));
823
+ }
824
+ async function adoptTts(ttsSession, teardown) {
825
+ if (teardown) {
826
+ await ttsSession.close().catch(() => void 0);
827
+ return;
828
+ }
829
+ ctx.tts = ttsSession;
830
+ ttsSubs.push(ttsSession.on("audio", (pcm) => {
831
+ client.playAudioChunk(new Uint8Array(pcm.buffer, pcm.byteOffset, pcm.byteLength));
832
+ }));
833
+ ttsSubs.push(ttsSession.on("error", onTtsError));
834
+ }
740
835
  async function openProviders() {
741
836
  const [sttResult, ttsResult] = await Promise.allSettled([opts.stt.open({
742
837
  sampleRate,
@@ -748,47 +843,15 @@ function createPipelineSession(opts) {
748
843
  apiKey: opts.ttsApiKey,
749
844
  signal: sessionAbort.signal
750
845
  })]);
751
- if (sttResult.status === "rejected") {
752
- const msg = errorMessage(sttResult.reason);
753
- log.error("STT open failed", {
754
- error: msg,
755
- sessionId: opts.id
756
- });
757
- emitError(client, "stt", msg);
758
- }
759
- if (ttsResult.status === "rejected") {
760
- const msg = errorMessage(ttsResult.reason);
761
- log.error("TTS open failed", {
762
- error: msg,
763
- sessionId: opts.id
764
- });
765
- emitError(client, "tts", msg);
766
- }
846
+ if (sttResult.status === "rejected") reportOpenRejection("stt", sttResult.reason);
847
+ if (ttsResult.status === "rejected") reportOpenRejection("tts", ttsResult.reason);
767
848
  const aborted = sessionAbort.signal.aborted;
768
849
  const sttFailed = sttResult.status === "rejected";
769
850
  const ttsFailed = ttsResult.status === "rejected";
770
851
  const teardown = aborted || sttFailed || ttsFailed;
771
- if (sttResult.status === "fulfilled") {
772
- const sttSession = sttResult.value;
773
- if (teardown) await sttSession.close().catch(() => void 0);
774
- else {
775
- ctx.stt = sttSession;
776
- sttSubs.push(sttSession.on("partial", onSttPartial));
777
- sttSubs.push(sttSession.on("final", onSttFinal));
778
- sttSubs.push(sttSession.on("error", onSttError));
779
- }
780
- }
781
- if (ttsResult.status === "fulfilled") {
782
- const ttsSession = ttsResult.value;
783
- if (teardown) await ttsSession.close().catch(() => void 0);
784
- else {
785
- ctx.tts = ttsSession;
786
- ttsSubs.push(ttsSession.on("audio", (pcm) => {
787
- client.playAudioChunk(new Uint8Array(pcm.buffer, pcm.byteOffset, pcm.byteLength));
788
- }));
789
- ttsSubs.push(ttsSession.on("error", onTtsError));
790
- }
791
- }
852
+ if (sttResult.status === "fulfilled") await adoptStt(sttResult.value, teardown);
853
+ if (ttsResult.status === "fulfilled") await adoptTts(ttsResult.value, teardown);
854
+ if (!aborted && (sttFailed || ttsFailed)) terminate();
792
855
  }
793
856
  return {
794
857
  async start() {
@@ -807,7 +870,7 @@ function createPipelineSession(opts) {
807
870
  await ctx.tts?.close().catch(() => {});
808
871
  },
809
872
  onAudio(data) {
810
- if (!audioReady) return;
873
+ if (terminated || !audioReady) return;
811
874
  const offset = data.byteOffset;
812
875
  const length = data.byteLength;
813
876
  let pcm;
@@ -823,6 +886,7 @@ function createPipelineSession(opts) {
823
886
  audioReady = true;
824
887
  },
825
888
  onCancel() {
889
+ if (terminated) return;
826
890
  turnController?.abort();
827
891
  turnController = null;
828
892
  ctx.tts?.cancel();
@@ -830,6 +894,7 @@ function createPipelineSession(opts) {
830
894
  client.event({ type: "cancelled" });
831
895
  },
832
896
  onReset() {
897
+ if (terminated) return;
833
898
  turnController?.abort();
834
899
  turnController = null;
835
900
  ctx.tts?.cancel();
@@ -839,6 +904,7 @@ function createPipelineSession(opts) {
839
904
  client.event({ type: "reset" });
840
905
  },
841
906
  onHistory(incoming) {
907
+ if (terminated) return;
842
908
  ctx.pushMessages(...incoming.map((m) => ({
843
909
  role: m.role,
844
910
  content: m.content
@@ -22,9 +22,10 @@ export interface ToVercelToolsContext {
22
22
  /** Session id threaded to {@link executeTool}. */
23
23
  sessionId: string;
24
24
  /**
25
- * Returns the current conversation history at call-time. Called per
26
- * tool invocation so late calls see fresh state instead of a snapshot
27
- * captured when the tool bag was built.
25
+ * Returns the current conversation history at call-time. The orchestrator
26
+ * calls this per invocation; `toVercelTools` snapshots the returned array
27
+ * before forwarding to `executeTool` so concurrent mutations cannot leak
28
+ * across tool calls.
28
29
  */
29
30
  messages: () => readonly Message[];
30
31
  /**
package/dist/index.js CHANGED
@@ -1,4 +1,4 @@
1
- import { _ as WS_OPEN, a as DEFAULT_SHUTDOWN_TIMEOUT_MS, c as FETCH_TIMEOUT_MS, d as MAX_PAGE_CHARS, f as MAX_TOOL_RESULT_CHARS, g as TOOL_EXECUTION_TIMEOUT_MS, h as RUN_CODE_TIMEOUT_MS, i as DEFAULT_SESSION_START_TIMEOUT_MS, l as MAX_HTML_BYTES, m as MAX_WS_PAYLOAD_BYTES, n as DEFAULT_IDLE_TIMEOUT_MS, o as DEFAULT_STT_SAMPLE_RATE, p as MAX_VALUE_SIZE, r as DEFAULT_MAX_HISTORY, s as DEFAULT_TTS_SAMPLE_RATE, t as AGENT_CSP, u as MAX_MESSAGE_BUFFER_SIZE } from "./constants-VTFoymJ-.js";
1
+ import { _ as TOOL_EXECUTION_TIMEOUT_MS, a as DEFAULT_SHUTDOWN_TIMEOUT_MS, c as FETCH_TIMEOUT_MS, d as MAX_PAGE_CHARS, f as MAX_TOOL_RESULT_CHARS, g as RUN_CODE_TIMEOUT_MS, h as PIPELINE_FLUSH_TIMEOUT_MS, i as DEFAULT_SESSION_START_TIMEOUT_MS, l as MAX_HTML_BYTES, m as MAX_WS_PAYLOAD_BYTES, n as DEFAULT_IDLE_TIMEOUT_MS, o as DEFAULT_STT_SAMPLE_RATE, p as MAX_VALUE_SIZE, r as DEFAULT_MAX_HISTORY, s as DEFAULT_TTS_SAMPLE_RATE, t as AGENT_CSP, u as MAX_MESSAGE_BUFFER_SIZE, v as WS_OPEN } from "./constants-BL3nvg4I.js";
2
2
  import { i as ToolChoiceSchema, n as DEFAULT_GREETING, r as DEFAULT_SYSTEM_PROMPT, t as BuiltinToolSchema } from "./types-Cfx_4QDK.js";
3
3
  import { i as toolError, n as errorDetail, r as errorMessage, t as parseWsUpgradeParams } from "./ws-upgrade-BeOQ7fXL.js";
4
4
  //#region sdk/allowed-hosts.ts
@@ -154,4 +154,4 @@ function agent(def) {
154
154
  };
155
155
  }
156
156
  //#endregion
157
- export { AGENT_CSP, BuiltinToolSchema, DEFAULT_GREETING, DEFAULT_IDLE_TIMEOUT_MS, DEFAULT_MAX_HISTORY, DEFAULT_SESSION_START_TIMEOUT_MS, DEFAULT_SHUTDOWN_TIMEOUT_MS, DEFAULT_STT_SAMPLE_RATE, DEFAULT_SYSTEM_PROMPT, DEFAULT_TTS_SAMPLE_RATE, FETCH_TIMEOUT_MS, MAX_HTML_BYTES, MAX_MESSAGE_BUFFER_SIZE, MAX_PAGE_CHARS, MAX_TOOL_RESULT_CHARS, MAX_VALUE_SIZE, MAX_WS_PAYLOAD_BYTES, RUN_CODE_TIMEOUT_MS, TOOL_EXECUTION_TIMEOUT_MS, ToolChoiceSchema, WS_OPEN, agent, errorDetail, errorMessage, matchesAllowedHost, parseWsUpgradeParams, tool, toolError, validateAllowedHostPattern };
157
+ export { AGENT_CSP, BuiltinToolSchema, DEFAULT_GREETING, DEFAULT_IDLE_TIMEOUT_MS, DEFAULT_MAX_HISTORY, DEFAULT_SESSION_START_TIMEOUT_MS, DEFAULT_SHUTDOWN_TIMEOUT_MS, DEFAULT_STT_SAMPLE_RATE, DEFAULT_SYSTEM_PROMPT, DEFAULT_TTS_SAMPLE_RATE, FETCH_TIMEOUT_MS, MAX_HTML_BYTES, MAX_MESSAGE_BUFFER_SIZE, MAX_PAGE_CHARS, MAX_TOOL_RESULT_CHARS, MAX_VALUE_SIZE, MAX_WS_PAYLOAD_BYTES, PIPELINE_FLUSH_TIMEOUT_MS, RUN_CODE_TIMEOUT_MS, TOOL_EXECUTION_TIMEOUT_MS, ToolChoiceSchema, WS_OPEN, agent, errorDetail, errorMessage, matchesAllowedHost, parseWsUpgradeParams, tool, toolError, validateAllowedHostPattern };
@@ -20,6 +20,13 @@ export declare const FETCH_TIMEOUT_MS = 15000;
20
20
  export declare const RUN_CODE_TIMEOUT_MS = 5000;
21
21
  /** Maximum time to wait for sessions to stop during graceful shutdown. */
22
22
  export declare const DEFAULT_SHUTDOWN_TIMEOUT_MS = 30000;
23
+ /**
24
+ * Maximum time to wait for a pipeline-mode TTS drain after `flush()` before
25
+ * forcing the turn to complete. Prevents a stuck TTS provider from wedging
26
+ * the session. Short relative to `DEFAULT_SHUTDOWN_TIMEOUT_MS` so stop()
27
+ * can still reclaim the socket cleanly.
28
+ */
29
+ export declare const PIPELINE_FLUSH_TIMEOUT_MS = 10000;
23
30
  /** Maximum length for tool result strings sent to clients. */
24
31
  export declare const MAX_TOOL_RESULT_CHARS = 4000;
25
32
  /** Maximum chars for webpage text after HTML-to-text conversion. */
@@ -1,4 +1,4 @@
1
- import { f as MAX_TOOL_RESULT_CHARS } from "../constants-VTFoymJ-.js";
1
+ import { f as MAX_TOOL_RESULT_CHARS } from "../constants-BL3nvg4I.js";
2
2
  import { z } from "zod";
3
3
  //#region sdk/protocol.ts
4
4
  /**
@@ -168,6 +168,40 @@ export function createFakeTtsProvider(
168
168
  };
169
169
  }
170
170
 
171
+ /**
172
+ * Fake STT provider that throws on `open()` with a given error code. Used to
173
+ * test atomic provider open — TTS should not be opened at all when STT fails.
174
+ */
175
+ export function createFailingSttProvider(
176
+ code: "stt_connect_failed" | "stt_auth_failed" | "stt_stream_error",
177
+ message: string,
178
+ ): SttProvider {
179
+ return {
180
+ name: "failing-stt",
181
+ async open(): Promise<SttSession> {
182
+ const err = Object.assign(new Error(message), { code }) as Error & { code: typeof code };
183
+ throw err;
184
+ },
185
+ };
186
+ }
187
+
188
+ /**
189
+ * Fake TTS provider that throws on `open()` with a given error code. Used to
190
+ * test atomic provider open — STT should be closed when TTS fails.
191
+ */
192
+ export function createFailingTtsProvider(
193
+ code: "tts_connect_failed" | "tts_auth_failed" | "tts_stream_error",
194
+ message: string,
195
+ ): TtsProvider {
196
+ return {
197
+ name: "failing-tts",
198
+ async open(): Promise<TtsSession> {
199
+ const err = Object.assign(new Error(message), { code }) as Error & { code: typeof code };
200
+ throw err;
201
+ },
202
+ };
203
+ }
204
+
171
205
  // ─── Fake LLM ───────────────────────────────────────────────────────────────
172
206
 
173
207
  /**
@@ -6,6 +6,8 @@ import type { AgentConfig } from "../sdk/_internal-types.ts";
6
6
  import type { ClientEvent } from "../sdk/protocol.ts";
7
7
  import { DEFAULT_SYSTEM_PROMPT } from "../sdk/types.ts";
8
8
  import {
9
+ createFailingSttProvider,
10
+ createFailingTtsProvider,
9
11
  createFakeLanguageModel,
10
12
  createFakeSttProvider,
11
13
  createFakeTtsProvider,
@@ -335,3 +337,236 @@ describe("createPipelineSession — STT error", () => {
335
337
  await session.stop();
336
338
  });
337
339
  });
340
+
341
+ describe("createPipelineSession — duplicate final", () => {
342
+ test("second final during AGENT_REPLYING aborts prior turn and starts new one", async () => {
343
+ // Multi-part first step with delay so the first turn is still streaming
344
+ // when the second final arrives.
345
+ const steps: ScriptedPart[][] = [
346
+ [
347
+ { type: "text", text: "first " },
348
+ { type: "text", text: "reply " },
349
+ { type: "text", text: "continues" },
350
+ ],
351
+ [{ type: "text", text: "second reply" }],
352
+ ];
353
+ const { opts, stt, tts, client } = makeOpts({
354
+ llm: createFakeLanguageModel({ steps, delayMs: 20 }),
355
+ });
356
+
357
+ const session = createPipelineSession(opts);
358
+ await session.start();
359
+
360
+ const sttSession = stt.last();
361
+ const ttsSession = tts.last();
362
+ if (!(sttSession && ttsSession)) throw new Error("providers didn't open");
363
+
364
+ sttSession.fireFinal("first question");
365
+ await vi.waitFor(() => {
366
+ expect(ttsSession.sendText.mock.calls.length).toBeGreaterThan(0);
367
+ });
368
+
369
+ // Second final arrives mid-reply.
370
+ sttSession.fireFinal("second question");
371
+ await session.waitForTurn();
372
+
373
+ // TTS.cancel fires once to abandon the first turn's audio.
374
+ expect(ttsSession.cancel).toHaveBeenCalledTimes(1);
375
+
376
+ // Both user transcripts reach the client.
377
+ const userTranscripts = client.events.filter(
378
+ (e) => (e as ClientEvent).type === "user_transcript",
379
+ );
380
+ expect(userTranscripts).toHaveLength(2);
381
+
382
+ // Second reply's text was synthesized.
383
+ expect(ttsSession.textChunks).toContain("second reply");
384
+
385
+ // Exactly one reply_done (for the second turn).
386
+ const replyDones = client.events.filter((e) => (e as ClientEvent).type === "reply_done");
387
+ expect(replyDones).toHaveLength(1);
388
+
389
+ await session.stop();
390
+ });
391
+ });
392
+
393
+ describe("createPipelineSession — flush timeout/abort", () => {
394
+ test("flush that never drains does not wedge stop()", async () => {
395
+ // autoDoneOnFlush: false → TTS never fires `done`, so flushTtsAndWait must
396
+ // resolve via the turn-abort signal when stop() fires.
397
+ const script: ScriptedPart[] = [{ type: "text", text: "hi" }];
398
+ const tts = createFakeTtsProvider({ autoDoneOnFlush: false });
399
+ const { opts, stt, client } = makeOpts({
400
+ llm: createFakeLanguageModel({ script }),
401
+ tts,
402
+ });
403
+
404
+ const session = createPipelineSession(opts);
405
+ await session.start();
406
+
407
+ const sttSession = stt.last();
408
+ const ttsSession = tts.last();
409
+ if (!(sttSession && ttsSession)) throw new Error("providers didn't open");
410
+
411
+ sttSession.fireFinal("hi");
412
+ // Wait until the turn has reached the flush step — without this guard,
413
+ // stop() aborts the controller before flushTtsAndWait is even called.
414
+ await vi.waitFor(() => {
415
+ expect(ttsSession.flush).toHaveBeenCalledTimes(1);
416
+ });
417
+ await session.stop();
418
+
419
+ // Turn aborted before reply_done could fire.
420
+ const types = eventTypes(client.events);
421
+ expect(types).not.toContain("reply_done");
422
+ });
423
+ });
424
+
425
+ describe("createPipelineSession — mid-session provider errors", () => {
426
+ test("STT error during reply aborts turn and stops further transcripts", async () => {
427
+ const script: ScriptedPart[] = [{ type: "text", text: "reply" }];
428
+ const { opts, stt, tts, client } = makeOpts({
429
+ llm: createFakeLanguageModel({ script, delayMs: 20 }),
430
+ });
431
+
432
+ const session = createPipelineSession(opts);
433
+ await session.start();
434
+
435
+ const sttSession = stt.last();
436
+ const ttsSession = tts.last();
437
+ if (!(sttSession && ttsSession)) throw new Error("providers didn't open");
438
+
439
+ sttSession.fireFinal("first");
440
+ await vi.waitFor(() => {
441
+ expect(ttsSession.sendText.mock.calls.length).toBeGreaterThan(0);
442
+ });
443
+ sttSession.fireError("stt_stream_error", "socket died");
444
+ await session.waitForTurn();
445
+
446
+ const errors = client.events.filter((e) => (e as ClientEvent).type === "error");
447
+ expect(errors).toHaveLength(1);
448
+ expect(errors[0]).toMatchObject({ code: "stt", message: "socket died" });
449
+
450
+ // Turn was aborted (TTS cancelled).
451
+ expect(ttsSession.cancel).toHaveBeenCalled();
452
+
453
+ // Further STT events are no-ops.
454
+ sttSession.fireFinal("ignored after error");
455
+ await session.waitForTurn();
456
+ const userTranscripts = client.events.filter(
457
+ (e) => (e as ClientEvent).type === "user_transcript",
458
+ );
459
+ expect(userTranscripts).toHaveLength(1);
460
+
461
+ await session.stop();
462
+ });
463
+
464
+ test("TTS error during reply aborts turn and stops further user transcripts", async () => {
465
+ const script: ScriptedPart[] = [{ type: "text", text: "reply" }];
466
+ const { opts, stt, tts, client } = makeOpts({
467
+ llm: createFakeLanguageModel({ script, delayMs: 20 }),
468
+ });
469
+
470
+ const session = createPipelineSession(opts);
471
+ await session.start();
472
+
473
+ const sttSession = stt.last();
474
+ const ttsSession = tts.last();
475
+ if (!(sttSession && ttsSession)) throw new Error("providers didn't open");
476
+
477
+ sttSession.fireFinal("first");
478
+ await vi.waitFor(() => {
479
+ expect(ttsSession.sendText.mock.calls.length).toBeGreaterThan(0);
480
+ });
481
+ ttsSession.fireError("tts_stream_error", "socket died");
482
+ await session.waitForTurn();
483
+
484
+ const errors = client.events.filter((e) => (e as ClientEvent).type === "error");
485
+ expect(errors).toHaveLength(1);
486
+ expect(errors[0]).toMatchObject({ code: "tts", message: "socket died" });
487
+
488
+ sttSession.fireFinal("should be ignored");
489
+ await session.waitForTurn();
490
+ const userTranscripts = client.events.filter(
491
+ (e) => (e as ClientEvent).type === "user_transcript",
492
+ );
493
+ expect(userTranscripts).toHaveLength(1);
494
+
495
+ await session.stop();
496
+ });
497
+
498
+ test("cancel/reset/history are no-ops after terminate", async () => {
499
+ const { opts, stt, client } = makeOpts();
500
+ const session = createPipelineSession(opts);
501
+ await session.start();
502
+
503
+ const sttSession = stt.last();
504
+ if (!sttSession) throw new Error("STT didn't open");
505
+
506
+ sttSession.fireError("stt_stream_error", "dead");
507
+ await session.waitForTurn();
508
+
509
+ const eventsBefore = client.events.length;
510
+
511
+ session.onCancel();
512
+ session.onReset();
513
+ session.onHistory([{ role: "user", content: "nope" }]);
514
+
515
+ expect(client.events).toHaveLength(eventsBefore);
516
+
517
+ await session.stop();
518
+ });
519
+ });
520
+
521
+ describe("createPipelineSession — atomic provider open", () => {
522
+ test("STT is closed when TTS open fails, session becomes terminated", async () => {
523
+ const stt = createFakeSttProvider();
524
+ const failingTts = createFailingTtsProvider("tts_connect_failed", "bad key");
525
+
526
+ const { opts, client } = makeOpts({ stt, tts: failingTts });
527
+ const session = createPipelineSession(opts);
528
+ await session.start();
529
+
530
+ const sttSession = stt.last();
531
+ expect(sttSession).toBeDefined();
532
+ expect(sttSession?.closed.value).toBe(true);
533
+
534
+ const errors = client.events.filter((e) => (e as ClientEvent).type === "error");
535
+ expect(errors).toHaveLength(1);
536
+ expect(errors[0]).toMatchObject({ code: "tts", message: "bad key" });
537
+
538
+ // Session terminated — further STT events are no-ops (even though
539
+ // listeners were never wired, terminate() also ensures onCancel etc. work).
540
+ sttSession?.fireFinal("ignored");
541
+ await session.waitForTurn();
542
+ const userTranscripts = client.events.filter(
543
+ (e) => (e as ClientEvent).type === "user_transcript",
544
+ );
545
+ expect(userTranscripts).toHaveLength(0);
546
+
547
+ await session.stop();
548
+ });
549
+
550
+ test("TTS is never opened when STT open fails", async () => {
551
+ const failingStt = createFailingSttProvider("stt_connect_failed", "bad key");
552
+ const tts = createFakeTtsProvider();
553
+ const ttsOpenSpy = vi.spyOn(tts, "open");
554
+
555
+ const { opts, client } = makeOpts({ stt: failingStt, tts });
556
+ const session = createPipelineSession(opts);
557
+ await session.start();
558
+
559
+ // STT and TTS open concurrently via Promise.allSettled — TTS.open is
560
+ // still called, but once STT fails its result is discarded and the TTS
561
+ // session is closed.
562
+ expect(ttsOpenSpy).toHaveBeenCalledTimes(1);
563
+ const ttsSession = tts.last();
564
+ expect(ttsSession?.closed.value).toBe(true);
565
+
566
+ const errors = client.events.filter((e) => (e as ClientEvent).type === "error");
567
+ expect(errors).toHaveLength(1);
568
+ expect(errors[0]).toMatchObject({ code: "stt", message: "bad key" });
569
+
570
+ await session.stop();
571
+ });
572
+ });
@@ -10,11 +10,12 @@
10
10
  import type { LanguageModel, ModelMessage } from "ai";
11
11
  import { stepCountIs, streamText } from "ai";
12
12
  import type { AgentConfig, ExecuteTool, ToolSchema } from "../sdk/_internal-types.ts";
13
- import { DEFAULT_STT_SAMPLE_RATE } from "../sdk/constants.ts";
13
+ import { DEFAULT_STT_SAMPLE_RATE, PIPELINE_FLUSH_TIMEOUT_MS } from "../sdk/constants.ts";
14
14
  import type { ClientSink, SessionErrorCode } from "../sdk/protocol.ts";
15
15
  import type {
16
16
  SttError,
17
17
  SttProvider,
18
+ SttSession,
18
19
  TtsError,
19
20
  TtsProvider,
20
21
  TtsSession,
@@ -157,12 +158,32 @@ export function createPipelineSession(opts: PipelineSessionOptions): Session {
157
158
 
158
159
  const sessionAbort = new AbortController();
159
160
  let audioReady = false;
161
+ let terminated = false;
160
162
  let turnController: AbortController | null = null;
161
163
  let nextReplyId = 0;
162
164
  const sttSubs: Unsubscribe[] = [];
163
165
  const ttsSubs: Unsubscribe[] = [];
164
166
 
167
+ /**
168
+ * Tear down the session after an unrecoverable provider error. Aborts the
169
+ * in-flight turn, cancels TTS, signals providers to close via sessionAbort,
170
+ * and flips `terminated` so future STT events and audio frames become
171
+ * no-ops. Idempotent.
172
+ */
173
+ function terminate(): void {
174
+ if (terminated) return;
175
+ terminated = true;
176
+ if (turnController !== null) {
177
+ turnController.abort();
178
+ turnController = null;
179
+ }
180
+ ctx.tts?.cancel();
181
+ ctx.cancelReply();
182
+ sessionAbort.abort();
183
+ }
184
+
165
185
  function onSttPartial(_text: string): void {
186
+ if (terminated) return;
166
187
  if (turnController === null) return;
167
188
  log.info("Pipeline barge-in", { sessionId: opts.id });
168
189
  turnController.abort();
@@ -173,8 +194,22 @@ export function createPipelineSession(opts: PipelineSessionOptions): Session {
173
194
  }
174
195
 
175
196
  function onSttFinal(text: string): void {
197
+ if (terminated) return;
176
198
  const trimmed = text.trim();
177
199
  if (trimmed.length === 0) return;
200
+ // If a prior turn is still running (duplicate/late STT final or a
201
+ // second utterance without an intervening partial), abort it before
202
+ // launching a new one. Matches LiveKit's `current_speech.interrupt()`
203
+ // and Pipecat's `InterruptionFrame` broadcast: single-slot current
204
+ // turn, replace in-flight rather than queue.
205
+ if (turnController !== null) {
206
+ log.info("Pipeline replacing in-flight turn", { sessionId: opts.id });
207
+ turnController.abort();
208
+ turnController = null;
209
+ ctx.tts?.cancel();
210
+ ctx.cancelReply();
211
+ client.event({ type: "cancelled" });
212
+ }
178
213
  client.event({ type: "user_transcript", text });
179
214
  const turn = runTurn(trimmed).catch((err: unknown) => {
180
215
  log.error("Pipeline turn crashed", { error: errorMessage(err), sessionId: opts.id });
@@ -183,13 +218,17 @@ export function createPipelineSession(opts: PipelineSessionOptions): Session {
183
218
  }
184
219
 
185
220
  function onSttError(err: SttError): void {
221
+ if (terminated) return;
186
222
  log.error("STT error", { code: err.code, message: err.message, sessionId: opts.id });
187
223
  emitError(client, "stt", err.message);
224
+ terminate();
188
225
  }
189
226
 
190
227
  function onTtsError(err: TtsError): void {
228
+ if (terminated) return;
191
229
  log.error("TTS error", { code: err.code, message: err.message, sessionId: opts.id });
192
230
  emitError(client, "tts", err.message);
231
+ terminate();
193
232
  }
194
233
 
195
234
  async function consumeLlmStream(
@@ -231,14 +270,48 @@ export function createPipelineSession(opts: PipelineSessionOptions): Session {
231
270
  }
232
271
  }
233
272
 
234
- function flushTtsAndWait(): Promise<void> {
273
+ /**
274
+ * Flush TTS and wait for drain. Resolves on any of:
275
+ * - TTS emits `done`
276
+ * - `signal` aborts (barge-in, provider error, session stop)
277
+ * - `PIPELINE_FLUSH_TIMEOUT_MS` elapses
278
+ * Resolves immediately if no TTS session.
279
+ */
280
+ function flushTtsAndWait(signal: AbortSignal): Promise<void> {
235
281
  const tts = ctx.tts;
236
282
  if (!tts) return Promise.resolve();
237
283
  return new Promise<void>((resolve) => {
238
- const off = tts.on("done", () => {
239
- off();
284
+ let off: Unsubscribe | null = null;
285
+ let timer: ReturnType<typeof setTimeout> | null = null;
286
+ const cleanup = () => {
287
+ if (off) {
288
+ off();
289
+ off = null;
290
+ }
291
+ if (timer) {
292
+ clearTimeout(timer);
293
+ timer = null;
294
+ }
295
+ signal.removeEventListener("abort", onAbort);
296
+ };
297
+ const finish = () => {
298
+ cleanup();
240
299
  resolve();
241
- });
300
+ };
301
+ const onAbort = () => finish();
302
+ if (signal.aborted) {
303
+ resolve();
304
+ return;
305
+ }
306
+ signal.addEventListener("abort", onAbort, { once: true });
307
+ off = tts.on("done", finish);
308
+ timer = setTimeout(() => {
309
+ log.warn("TTS flush timeout", {
310
+ sessionId: opts.id,
311
+ timeoutMs: PIPELINE_FLUSH_TIMEOUT_MS,
312
+ });
313
+ finish();
314
+ }, PIPELINE_FLUSH_TIMEOUT_MS);
242
315
  tts.flush();
243
316
  });
244
317
  }
@@ -269,7 +342,7 @@ export function createPipelineSession(opts: PipelineSessionOptions): Session {
269
342
  return;
270
343
  }
271
344
 
272
- await flushTtsAndWait();
345
+ await flushTtsAndWait(ctl.signal);
273
346
 
274
347
  if (ctl.signal.aborted) {
275
348
  if (turnController === ctl) turnController = null;
@@ -284,6 +357,40 @@ export function createPipelineSession(opts: PipelineSessionOptions): Session {
284
357
  if (turnController === ctl) turnController = null;
285
358
  }
286
359
 
360
+ function reportOpenRejection(which: "stt" | "tts", reason: unknown): void {
361
+ const msg = errorMessage(reason);
362
+ log.error(`${which === "stt" ? "STT" : "TTS"} open failed`, {
363
+ error: msg,
364
+ sessionId: opts.id,
365
+ });
366
+ emitError(client, which, msg);
367
+ }
368
+
369
+ async function adoptStt(sttSession: SttSession, teardown: boolean): Promise<void> {
370
+ if (teardown) {
371
+ await sttSession.close().catch(() => undefined);
372
+ return;
373
+ }
374
+ ctx.stt = sttSession;
375
+ sttSubs.push(sttSession.on("partial", onSttPartial));
376
+ sttSubs.push(sttSession.on("final", onSttFinal));
377
+ sttSubs.push(sttSession.on("error", onSttError));
378
+ }
379
+
380
+ async function adoptTts(ttsSession: TtsSession, teardown: boolean): Promise<void> {
381
+ if (teardown) {
382
+ await ttsSession.close().catch(() => undefined);
383
+ return;
384
+ }
385
+ ctx.tts = ttsSession;
386
+ ttsSubs.push(
387
+ ttsSession.on("audio", (pcm) => {
388
+ client.playAudioChunk(new Uint8Array(pcm.buffer, pcm.byteOffset, pcm.byteLength));
389
+ }),
390
+ );
391
+ ttsSubs.push(ttsSession.on("error", onTtsError));
392
+ }
393
+
287
394
  async function openProviders(): Promise<void> {
288
395
  const [sttResult, ttsResult] = await Promise.allSettled([
289
396
  opts.stt.open({
@@ -299,47 +406,21 @@ export function createPipelineSession(opts: PipelineSessionOptions): Session {
299
406
  }),
300
407
  ]);
301
408
 
302
- if (sttResult.status === "rejected") {
303
- const msg = errorMessage(sttResult.reason);
304
- log.error("STT open failed", { error: msg, sessionId: opts.id });
305
- emitError(client, "stt", msg);
306
- }
307
- if (ttsResult.status === "rejected") {
308
- const msg = errorMessage(ttsResult.reason);
309
- log.error("TTS open failed", { error: msg, sessionId: opts.id });
310
- emitError(client, "tts", msg);
311
- }
409
+ if (sttResult.status === "rejected") reportOpenRejection("stt", sttResult.reason);
410
+ if (ttsResult.status === "rejected") reportOpenRejection("tts", ttsResult.reason);
312
411
 
313
412
  const aborted = sessionAbort.signal.aborted;
314
413
  const sttFailed = sttResult.status === "rejected";
315
414
  const ttsFailed = ttsResult.status === "rejected";
316
415
  const teardown = aborted || sttFailed || ttsFailed;
317
416
 
318
- if (sttResult.status === "fulfilled") {
319
- const sttSession = sttResult.value;
320
- if (teardown) {
321
- await sttSession.close().catch(() => undefined);
322
- } else {
323
- ctx.stt = sttSession;
324
- sttSubs.push(sttSession.on("partial", onSttPartial));
325
- sttSubs.push(sttSession.on("final", onSttFinal));
326
- sttSubs.push(sttSession.on("error", onSttError));
327
- }
328
- }
329
- if (ttsResult.status === "fulfilled") {
330
- const ttsSession = ttsResult.value;
331
- if (teardown) {
332
- await ttsSession.close().catch(() => undefined);
333
- } else {
334
- ctx.tts = ttsSession;
335
- ttsSubs.push(
336
- ttsSession.on("audio", (pcm) => {
337
- client.playAudioChunk(new Uint8Array(pcm.buffer, pcm.byteOffset, pcm.byteLength));
338
- }),
339
- );
340
- ttsSubs.push(ttsSession.on("error", onTtsError));
341
- }
342
- }
417
+ if (sttResult.status === "fulfilled") await adoptStt(sttResult.value, teardown);
418
+ if (ttsResult.status === "fulfilled") await adoptTts(ttsResult.value, teardown);
419
+
420
+ // If either provider failed (but the session wasn't itself aborted),
421
+ // mark the session terminated so subsequent events become no-ops.
422
+ // Aborted-by-stop() sessions don't need terminate() — stop() handles cleanup.
423
+ if (!aborted && (sttFailed || ttsFailed)) terminate();
343
424
  }
344
425
 
345
426
  return {
@@ -363,7 +444,7 @@ export function createPipelineSession(opts: PipelineSessionOptions): Session {
363
444
  });
364
445
  },
365
446
  onAudio(data: Uint8Array): void {
366
- if (!audioReady) return;
447
+ if (terminated || !audioReady) return;
367
448
  const offset = data.byteOffset;
368
449
  const length = data.byteLength;
369
450
  let pcm: Int16Array;
@@ -380,6 +461,7 @@ export function createPipelineSession(opts: PipelineSessionOptions): Session {
380
461
  audioReady = true;
381
462
  },
382
463
  onCancel(): void {
464
+ if (terminated) return;
383
465
  turnController?.abort();
384
466
  turnController = null;
385
467
  ctx.tts?.cancel();
@@ -387,6 +469,7 @@ export function createPipelineSession(opts: PipelineSessionOptions): Session {
387
469
  client.event({ type: "cancelled" });
388
470
  },
389
471
  onReset(): void {
472
+ if (terminated) return;
390
473
  turnController?.abort();
391
474
  turnController = null;
392
475
  ctx.tts?.cancel();
@@ -396,6 +479,7 @@ export function createPipelineSession(opts: PipelineSessionOptions): Session {
396
479
  client.event({ type: "reset" });
397
480
  },
398
481
  onHistory(incoming: readonly { role: "user" | "assistant"; content: string }[]): void {
482
+ if (terminated) return;
399
483
  ctx.pushMessages(...incoming.map((m) => ({ role: m.role, content: m.content })));
400
484
  },
401
485
  waitForTurn(): Promise<void> {
@@ -1,6 +1,7 @@
1
1
  // Copyright 2025 the AAI authors. MIT license.
2
2
  import { describe, expect, test, vi } from "vitest";
3
- import type { ToolSchema } from "../sdk/_internal-types.ts";
3
+ import type { ExecuteTool, ToolSchema } from "../sdk/_internal-types.ts";
4
+ import type { Message } from "../sdk/types.ts";
4
5
  import { toVercelTools } from "./to-vercel-tools.ts";
5
6
 
6
7
  const schemas: ToolSchema[] = [
@@ -151,3 +152,36 @@ describe("toVercelTools", () => {
151
152
  expect(receivedCallId).toBe("tc-3");
152
153
  });
153
154
  });
155
+
156
+ describe("toVercelTools — message snapshot isolation", () => {
157
+ test("tool execute sees a snapshot, not a live ref to messages array", async () => {
158
+ const messagesBox = { messages: [{ role: "user" as const, content: "first" }] };
159
+ let observedInsideExecute: readonly Message[] | undefined;
160
+
161
+ const executeTool: ExecuteTool = async (_name, _args, _sid, msgs) => {
162
+ observedInsideExecute = msgs;
163
+ // Mutate the original array; the snapshot we captured must be unaffected.
164
+ messagesBox.messages.push({ role: "user", content: "second" });
165
+ return "ok";
166
+ };
167
+
168
+ const tools = toVercelTools(
169
+ [{ name: "t", description: "", parameters: { type: "object", properties: {} } }],
170
+ {
171
+ executeTool,
172
+ sessionId: "s",
173
+ messages: () => messagesBox.messages,
174
+ },
175
+ );
176
+
177
+ const t = tools.t;
178
+ if (!t?.execute) throw new Error("tool.execute missing");
179
+ await t.execute({}, { toolCallId: "c1", messages: [] });
180
+
181
+ // The caller-observable messages array has 2 entries after the push.
182
+ expect(messagesBox.messages).toHaveLength(2);
183
+ // But the snapshot the tool executed against was frozen at length 1.
184
+ expect(observedInsideExecute).toHaveLength(1);
185
+ expect(observedInsideExecute?.[0]).toMatchObject({ content: "first" });
186
+ });
187
+ });
@@ -25,9 +25,10 @@ export interface ToVercelToolsContext {
25
25
  /** Session id threaded to {@link executeTool}. */
26
26
  sessionId: string;
27
27
  /**
28
- * Returns the current conversation history at call-time. Called per
29
- * tool invocation so late calls see fresh state instead of a snapshot
30
- * captured when the tool bag was built.
28
+ * Returns the current conversation history at call-time. The orchestrator
29
+ * calls this per invocation; `toVercelTools` snapshots the returned array
30
+ * before forwarding to `executeTool` so concurrent mutations cannot leak
31
+ * across tool calls.
31
32
  */
32
33
  messages: () => readonly Message[];
33
34
  /**
@@ -62,7 +63,10 @@ export function toVercelTools(
62
63
  const opts: ExecuteToolOptions = {};
63
64
  if (signal !== undefined) opts.signal = signal;
64
65
  if (options.toolCallId !== undefined) opts.toolCallId = options.toolCallId;
65
- return ctx.executeTool(schema.name, input, ctx.sessionId, ctx.messages(), opts);
66
+ // Snapshot the messages array so concurrent mutation (e.g. a new
67
+ // turn starting after this one was aborted) can't leak into this
68
+ // tool's view of history.
69
+ return ctx.executeTool(schema.name, input, ctx.sessionId, ctx.messages().slice(), opts);
66
70
  },
67
71
  });
68
72
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@alexkroman1/aai",
3
- "version": "1.3.0",
3
+ "version": "1.3.1",
4
4
  "type": "module",
5
5
  "exports": {
6
6
  ".": {
@@ -19,6 +19,7 @@ exports[`export surface stability > @alexkroman1/aai main export 1`] = `
19
19
  "MAX_TOOL_RESULT_CHARS",
20
20
  "MAX_VALUE_SIZE",
21
21
  "MAX_WS_PAYLOAD_BYTES",
22
+ "PIPELINE_FLUSH_TIMEOUT_MS",
22
23
  "RUN_CODE_TIMEOUT_MS",
23
24
  "TOOL_EXECUTION_TIMEOUT_MS",
24
25
  "ToolChoiceSchema",
package/sdk/constants.ts CHANGED
@@ -34,6 +34,14 @@ export const RUN_CODE_TIMEOUT_MS = 5000;
34
34
  /** Maximum time to wait for sessions to stop during graceful shutdown. */
35
35
  export const DEFAULT_SHUTDOWN_TIMEOUT_MS = 30_000;
36
36
 
37
+ /**
38
+ * Maximum time to wait for a pipeline-mode TTS drain after `flush()` before
39
+ * forcing the turn to complete. Prevents a stuck TTS provider from wedging
40
+ * the session. Short relative to `DEFAULT_SHUTDOWN_TIMEOUT_MS` so stop()
41
+ * can still reclaim the socket cleanly.
42
+ */
43
+ export const PIPELINE_FLUSH_TIMEOUT_MS = 10_000;
44
+
37
45
  // ─── Size / length limits ────────────────────────────────────────────────
38
46
 
39
47
  /** Maximum length for tool result strings sent to clients. */