npm - @alexkroman1/aai - Versions diffs - 1.4.5 → 1.5.0 - Mend

@alexkroman1/aai 1.4.5 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (78) hide show

package/.turbo/turbo-build.log +9 -9
package/CHANGELOG.md +13 -0
package/dist/assemblyai-C969QGi4.js +35 -0
package/dist/cartesia-BfQPOQ7Y.js +37 -0
package/dist/host/_pipeline-test-fakes.d.ts +3 -1
package/dist/host/providers/stt/deepgram.d.ts +28 -0
package/dist/host/providers/tts/cartesia.d.ts +1 -1
package/dist/host/providers/tts/rime.d.ts +44 -0
package/dist/host/runtime-barrel.d.ts +4 -2
package/dist/host/runtime-barrel.js +1432 -1208
package/dist/host/runtime.d.ts +2 -2
package/dist/host/s2s.d.ts +16 -16
package/dist/host/session-core.d.ts +37 -0
package/dist/host/transports/pipeline-transport.d.ts +48 -0
package/dist/host/transports/s2s-transport.d.ts +19 -0
package/dist/host/transports/types.d.ts +45 -0
package/dist/host/ws-handler.d.ts +14 -10
package/dist/sdk/protocol.d.ts +6 -5
package/dist/sdk/providers/llm-barrel.js +1 -1
package/dist/sdk/providers/stt/deepgram.d.ts +35 -0
package/dist/sdk/providers/stt-barrel.d.ts +1 -0
package/dist/sdk/providers/stt-barrel.js +2 -2
package/dist/sdk/providers/tts/cartesia.d.ts +12 -4
package/dist/sdk/providers/tts/rime.d.ts +42 -0
package/dist/sdk/providers/tts-barrel.d.ts +1 -0
package/dist/sdk/providers/tts-barrel.js +2 -2
package/host/_pipeline-test-fakes.ts +6 -3
package/host/_test-utils.ts +209 -128
package/host/cleanup.test.ts +25 -298
package/host/integration/pipeline-reference.integration.test.ts +30 -35
package/host/providers/resolve.ts +10 -2
package/host/providers/stt/deepgram.test.ts +229 -0
package/host/providers/stt/deepgram.ts +172 -0
package/host/providers/tts/cartesia.ts +7 -3
package/host/providers/tts/rime.test.ts +251 -0
package/host/providers/tts/rime.ts +322 -0
package/host/runtime-barrel.ts +4 -2
package/host/runtime.test.ts +13 -46
package/host/runtime.ts +131 -23
package/host/s2s.test.ts +122 -131
package/host/s2s.ts +44 -52
package/host/session-core.test.ts +257 -0
package/host/session-core.ts +262 -0
package/host/transports/pipeline-transport.test.ts +651 -0
package/host/transports/pipeline-transport.ts +532 -0
package/host/{fixture-replay.test.ts → transports/s2s-transport-fixtures.test.ts} +76 -106
package/host/transports/s2s-transport.test.ts +56 -0
package/host/transports/s2s-transport.ts +116 -0
package/host/transports/types.test.ts +22 -0
package/host/transports/types.ts +51 -0
package/host/ws-handler.test.ts +324 -242
package/host/ws-handler.ts +56 -59
package/package.json +2 -1
package/sdk/__snapshots__/exports.test.ts.snap +3 -3
package/sdk/protocol-compat.test.ts +8 -0
package/sdk/protocol.ts +6 -5
package/sdk/providers/stt/deepgram.ts +43 -0
package/sdk/providers/stt-barrel.ts +2 -0
package/sdk/providers/tts/cartesia.ts +15 -5
package/sdk/providers/tts/rime.ts +52 -0
package/sdk/providers/tts-barrel.ts +2 -0
package/dist/assemblyai-Cxg9eobY.js +0 -18
package/dist/cartesia-DwDk2tEu.js +0 -10
package/dist/host/pipeline-session-ctx.d.ts +0 -24
package/dist/host/pipeline-session.d.ts +0 -52
package/dist/host/session-ctx.d.ts +0 -73
package/dist/host/session.d.ts +0 -62
package/host/pipeline-session-ctx.test.ts +0 -31
package/host/pipeline-session-ctx.ts +0 -36
package/host/pipeline-session.test.ts +0 -672
package/host/pipeline-session.ts +0 -533
package/host/s2s-fixtures.test.ts +0 -237
package/host/session-ctx.test.ts +0 -387
package/host/session-ctx.ts +0 -134
package/host/session-fixture-replay.test.ts +0 -128
package/host/session.test.ts +0 -634
package/host/session.ts +0 -412
/package/dist/{anthropic-BrUCPKUc.js → anthropic-CcLZygAr.js} +0 -0

package/dist/host/runtime-barrel.js CHANGED Viewed

@@ -1,23 +1,24 @@
 import { r as DEFAULT_SYSTEM_PROMPT } from "../types-KUgezM6u.js";
-import { _ as TOOL_EXECUTION_TIMEOUT_MS, a as DEFAULT_SHUTDOWN_TIMEOUT_MS, c as FETCH_TIMEOUT_MS, d as MAX_PAGE_CHARS, f as MAX_TOOL_RESULT_CHARS, g as RUN_CODE_TIMEOUT_MS, h as PIPELINE_FLUSH_TIMEOUT_MS, l as MAX_HTML_BYTES, m as MAX_WS_PAYLOAD_BYTES, o as DEFAULT_STT_SAMPLE_RATE, p as MAX_VALUE_SIZE, s as DEFAULT_TTS_SAMPLE_RATE, t as AGENT_CSP } from "../constants-C2nirZUI.js";
+import { _ as TOOL_EXECUTION_TIMEOUT_MS, a as DEFAULT_SHUTDOWN_TIMEOUT_MS, c as FETCH_TIMEOUT_MS, d as MAX_PAGE_CHARS, g as RUN_CODE_TIMEOUT_MS, h as PIPELINE_FLUSH_TIMEOUT_MS, l as MAX_HTML_BYTES, m as MAX_WS_PAYLOAD_BYTES, o as DEFAULT_STT_SAMPLE_RATE, p as MAX_VALUE_SIZE, s as DEFAULT_TTS_SAMPLE_RATE, t as AGENT_CSP } from "../constants-C2nirZUI.js";
 import { i as toolError, n as errorDetail, r as errorMessage, t as parseWsUpgradeParams } from "../ws-upgrade-BeOQ7fXL.js";
 import { ClientMessageSchema, buildReadyConfig, lenientParse } from "../sdk/protocol.js";
 import { a as toAgentConfig, c as makeSttError, i as agentToolsToSchemas, l as makeTtsError, n as EMPTY_PARAMS, s as assertProviderTriple } from "../_internal-types-3p3OJZPb.js";
-import { t as ANTHROPIC_KIND } from "../anthropic-BrUCPKUc.js";
-import { t as ASSEMBLYAI_KIND } from "../assemblyai-Cxg9eobY.js";
-import { t as CARTESIA_KIND } from "../cartesia-DwDk2tEu.js";
+import { r as DEEPGRAM_KIND, t as ASSEMBLYAI_KIND } from "../assemblyai-C969QGi4.js";
+import { a as RIME_KIND, n as CARTESIA_KIND } from "../cartesia-BfQPOQ7Y.js";
+import { t as ANTHROPIC_KIND } from "../anthropic-CcLZygAr.js";
 import { z } from "zod";
 import { convert } from "html-to-text";
 import vm from "node:vm";
 import pTimeout from "p-timeout";
 import { createStorage, prefixStorage } from "unstorage";
-import { jsonSchema, stepCountIs, streamText, tool } from "ai";
 import { createAnthropic } from "@ai-sdk/anthropic";
 import { AssemblyAI } from "assemblyai";
 import { createNanoEvents } from "nanoevents";
+import { DeepgramClient } from "@deepgram/sdk";
 import { randomUUID } from "node:crypto";
 import { Cartesia } from "@cartesia/cartesia-js";
 import WsWebSocket, { WebSocketServer } from "ws";
+import { jsonSchema, stepCountIs, streamText, tool } from "ai";
 import fs from "node:fs";
 import http from "node:http";
 import path from "node:path";
@@ -378,712 +379,236 @@ function buildSystemPrompt(config, opts) {
 	return DEFAULT_SYSTEM_PROMPT + `\n\nToday's date is ${getFormattedDate()}.` + agentInstructions + toolPreamble + guidance + (opts.voice ? VOICE_RULES : "");
 }
 //#endregion
-//#region host/session-ctx.ts
-function _buildBaseCtx(opts) {
-	const { agentConfig, log } = opts;
-	const maxHistory = opts.maxHistory ?? 200;
-	const ctx = {
-		...opts,
-		reply: {
-			pendingTools: [],
-			toolCallCount: 0,
-			currentReplyId: null
-		},
-		turnPromise: null,
-		conversationMessages: [],
-		maxHistory,
-		consumeToolCallStep(_name, replyId) {
-			if (replyId === null || replyId !== ctx.reply.currentReplyId) return toolError("Reply was interrupted. Discarding stale tool call.");
-			const maxSteps = agentConfig.maxSteps;
-			ctx.reply.toolCallCount++;
-			if (maxSteps !== void 0 && ctx.reply.toolCallCount > maxSteps) {
-				log.info("maxSteps exceeded, refusing tool call", {
-					toolCallCount: ctx.reply.toolCallCount,
-					maxSteps
-				});
-				return toolError("Maximum tool steps reached. Please respond to the user now.");
+//#region host/providers/stt/assemblyai.ts
+/**
+* AssemblyAI Universal-Streaming STT opener (host-only).
+*
+* The user-facing descriptor factory (`assemblyAI(...)`) lives in
+* `sdk/providers/stt/assemblyai.ts`. This module is the host-side
+* counterpart: it takes the descriptor options + an API key and
+* returns an {@link SttOpener} that the pipeline session drives.
+*
+* Default model: `"u3pro-rt"` (Universal-3 Pro Real-Time). The adapter
+* maps that to the SDK's `"u3-rt-pro"` `speechModel` value; any other
+* string is forwarded verbatim.
+*/
+/** Translate the descriptor's model alias to the SDK's `speechModel` value. */
+function resolveSpeechModel(model) {
+	if (model === "u3pro-rt") return "u3-rt-pro";
+	return model;
+}
+/** Build an {@link SttOpener} from resolved AssemblyAI descriptor options. */
+function openAssemblyAI(opts = {}) {
+	return {
+		name: "assemblyai",
+		async open(openOpts) {
+			const apiKey = openOpts.apiKey || process.env.ASSEMBLYAI_API_KEY;
+			if (!apiKey) throw makeSttError("stt_auth_failed", "AssemblyAI STT: missing API key. Set ASSEMBLYAI_API_KEY in the agent env.");
+			const client = new AssemblyAI({ apiKey });
+			const speechModel = resolveSpeechModel(opts.model ?? "u3pro-rt");
+			const transcriber = client.streaming.transcriber({
+				sampleRate: openOpts.sampleRate,
+				speechModel,
+				...openOpts.sttPrompt ? { prompt: openOpts.sttPrompt } : {}
+			});
+			const emitter = createNanoEvents();
+			let closed = false;
+			transcriber.on("turn", (event) => {
+				if (closed) return;
+				const text = event.transcript ?? "";
+				if (event.end_of_turn) {
+					if (text.length > 0) emitter.emit("final", text);
+				} else if (text.length > 0) emitter.emit("partial", text);
+			});
+			transcriber.on("error", (err) => {
+				if (closed) return;
+				emitter.emit("error", makeSttError("stt_stream_error", err?.message ?? String(err)));
+			});
+			transcriber.on("close", (code) => {
+				if (closed) return;
+				if (code !== 1e3) emitter.emit("error", makeSttError("stt_stream_error", `socket closed ${code}`));
+			});
+			try {
+				await transcriber.connect();
+			} catch (cause) {
+				throw makeSttError("stt_connect_failed", `AssemblyAI STT: connect failed: ${cause instanceof Error ? cause.message : String(cause)}`);
 			}
-			return null;
-		},
-		pushMessages(...msgs) {
-			ctx.conversationMessages.push(...msgs);
-			if (maxHistory > 0 && ctx.conversationMessages.length > maxHistory) ctx.conversationMessages.splice(0, ctx.conversationMessages.length - maxHistory);
-		},
-		beginReply(replyId) {
-			ctx.reply = {
-				pendingTools: [],
-				toolCallCount: 0,
-				currentReplyId: replyId
+			const close = async () => {
+				if (closed) return;
+				closed = true;
+				try {
+					await transcriber.close();
+				} catch {}
 			};
-			ctx.turnPromise = null;
-		},
-		cancelReply() {
-			ctx.reply = {
-				pendingTools: [],
-				toolCallCount: 0,
-				currentReplyId: null
+			if (openOpts.signal.aborted) close();
+			else openOpts.signal.addEventListener("abort", () => void close(), { once: true });
+			return {
+				sendAudio(pcm) {
+					if (closed) return;
+					const copy = new Uint8Array(pcm.byteLength);
+					copy.set(new Uint8Array(pcm.buffer, pcm.byteOffset, pcm.byteLength));
+					transcriber.sendAudio(copy.buffer);
+				},
+				on(event, fn) {
+					return emitter.on(event, fn);
+				},
+				close,
+				_transcriber: transcriber
 			};
-		},
-		chainTurn(p) {
-			ctx.turnPromise = (ctx.turnPromise ?? Promise.resolve()).then(() => p);
 		}
 	};
-	return ctx;
-}
-function buildCtx(opts) {
-	const base = _buildBaseCtx(opts);
-	base.s2s = null;
-	return base;
-}
-//#endregion
-//#region host/pipeline-session-ctx.ts
-function buildPipelineCtx(opts) {
-	const base = _buildBaseCtx(opts);
-	base.stt = null;
-	base.tts = null;
-	return base;
 }
 //#endregion
-//#region host/runtime-config.ts
+//#region host/providers/stt/deepgram.ts
 /**
-* Runtime dependencies injected into the session pipeline.
+* Deepgram Nova streaming STT opener (host-only).
 *
-* Defines the {@link Logger} interface, a default {@link consoleLogger},
-* and the {@link S2SConfig} for Speech-to-Speech endpoint configuration.
+* The user-facing descriptor factory (`deepgram(...)`) lives in
+* `sdk/providers/stt/deepgram.ts`. This module is the host-side
+* counterpart: it takes the descriptor options + an API key and
+* returns an {@link SttOpener} that the pipeline session drives.
+*
+* Default model: `"nova-3"`. Any string is forwarded verbatim to the SDK.
+*
+* This adapter targets the Deepgram SDK v5 (`@deepgram/sdk@^5`). The v5
+* streaming API is:
+*   `client.listen.v1.connect(args)` → `Promise<V1Socket>`
+* followed by:
+*   `socket.connect()` + `socket.waitForOpen()` to establish the connection.
 */
-function consoleLog(fn) {
-	return (msg, ctx) => ctx ? fn(msg, ctx) : fn(msg);
-}
-/** Default console-backed logger. */
-const consoleLogger = {
-	info: consoleLog(console.log),
-	warn: consoleLog(console.warn),
-	error: consoleLog(console.error),
-	debug: consoleLog(console.debug)
-};
 /**
-* Structured JSON logger for production diagnostics. Each log entry is a
-* single-line JSON object with `timestamp`, `level`, `msg`, and any
-* caller-provided context fields.
+* Handle an incoming Deepgram transcript message, emitting `partial` or
+* `final` events on the emitter. Empty transcripts are silently dropped.
 */
-function jsonLog(level) {
-	return (msg, ctx) => {
-		const entry = {
-			timestamp: (/* @__PURE__ */ new Date()).toISOString(),
-			level,
-			msg
-		};
-		if (ctx) Object.assign(entry, ctx);
-		(level === "error" || level === "warn" ? process.stderr : process.stdout).write(`${JSON.stringify(entry)}\n`);
+function handleMessage(data, closed, emitter) {
+	if (closed) return;
+	if (data.type !== "Results") return;
+	const result = data;
+	const text = result.channel?.alternatives?.[0]?.transcript ?? "";
+	if (result.is_final) {
+		if (text.length > 0) emitter.emit("final", text);
+	} else if (text.length > 0) emitter.emit("partial", text);
+}
+/** Wire Deepgram socket events onto the nanoevents emitter. */
+function wireSocketEvents(connection, emitter, getIsClosed) {
+	connection.on("message", (data) => handleMessage(data, getIsClosed(), emitter));
+	connection.on("error", (err) => {
+		if (getIsClosed()) return;
+		emitter.emit("error", makeSttError("stt_stream_error", err?.message ?? String(err)));
+	});
+	connection.on("close", (event) => {
+		if (getIsClosed()) return;
+		const code = event?.code;
+		if (code !== void 0 && code !== 1e3) emitter.emit("error", makeSttError("stt_stream_error", `socket closed ${code}`));
+	});
+}
+/** Wire the AbortSignal to the close function. */
+function wireAbortSignal(signal, close) {
+	if (signal.aborted) close();
+	else signal.addEventListener("abort", () => void close(), { once: true });
+}
+/** Build an {@link SttOpener} from resolved Deepgram descriptor options. */
+function openDeepgram(opts = {}) {
+	return {
+		name: "deepgram",
+		async open(openOpts) {
+			const apiKey = openOpts.apiKey || process.env.DEEPGRAM_API_KEY;
+			if (!apiKey) throw makeSttError("stt_auth_failed", "Deepgram STT: missing API key. Set DEEPGRAM_API_KEY in the agent env.");
+			const client = new DeepgramClient({ apiKey });
+			let connection;
+			try {
+				connection = await client.listen.v1.connect({
+					model: opts.model ?? "nova-3",
+					language: opts.language ?? "en",
+					encoding: "linear16",
+					sample_rate: openOpts.sampleRate,
+					channels: 1,
+					interim_results: "true",
+					smart_format: "true",
+					endpointing: 300,
+					utterance_end_ms: "1000",
+					Authorization: apiKey
+				});
+			} catch (cause) {
+				throw makeSttError("stt_connect_failed", `Deepgram STT: connect failed: ${cause instanceof Error ? cause.message : String(cause)}`);
+			}
+			const emitter = createNanoEvents();
+			let closed = false;
+			wireSocketEvents(connection, emitter, () => closed);
+			connection.connect();
+			try {
+				await connection.waitForOpen();
+			} catch (cause) {
+				throw makeSttError("stt_connect_failed", `Deepgram STT: WebSocket open failed: ${cause instanceof Error ? cause.message : String(cause)}`);
+			}
+			const close = async () => {
+				if (closed) return;
+				closed = true;
+				try {
+					connection.close();
+				} catch {}
+			};
+			wireAbortSignal(openOpts.signal, close);
+			return {
+				sendAudio(pcm) {
+					if (closed) return;
+					connection.sendMedia(new Uint8Array(pcm.buffer, pcm.byteOffset, pcm.byteLength));
+				},
+				on(event, fn) {
+					return emitter.on(event, fn);
+				},
+				close,
+				_connection: connection
+			};
+		}
 	};
 }
-const jsonLogger = {
-	info: jsonLog("info"),
-	warn: jsonLog("warn"),
-	error: jsonLog("error"),
-	debug: jsonLog("debug")
-};
-/** Default S2S endpoint configuration. */
-const DEFAULT_S2S_CONFIG = {
-	wssUrl: "wss://agents.assemblyai.com/v1/voice",
-	inputSampleRate: DEFAULT_STT_SAMPLE_RATE,
-	outputSampleRate: DEFAULT_TTS_SAMPLE_RATE
-};
 //#endregion
-//#region host/to-vercel-tools.ts
+//#region host/providers/tts/cartesia.ts
 /**
-* Converts agent {@link ToolSchema}[] to Vercel AI SDK tools with `execute`
-* delegation to the agent's {@link ExecuteTool} function.
+* Cartesia TTS opener (host-only).
 *
-* The pipeline orchestrator passes the output to `streamText({ tools })`.
-* Each produced tool's `execute` closure calls
-* `ctx.executeTool(name, args, sessionId, messages(), { signal, toolCallId })`,
-* so the existing agent tool infrastructure (argument validation, KV, hooks,
-* timeout) remains the single source of truth for tool behavior.
+* The user-facing descriptor factory (`cartesia(...)`) lives in
+* `sdk/providers/tts/cartesia.ts`. This module is the host-side
+* counterpart: it takes the descriptor options + an API key and
+* returns a {@link TtsOpener} that the pipeline session drives.
 *
-* Per-call `options.abortSignal` (forwarded by `streamText` when the
-* outer turn is aborted, e.g. barge-in) takes precedence over the
-* bag-level `ctx.signal` so individual invocations respect streamText
-* aborts.
-*/
-/**
-* Convert an array of {@link ToolSchema} to a Vercel AI SDK `ToolSet`
-* (record keyed by tool name).
+* Wraps `@cartesia/cartesia-js`'s `TTSWS` / `TTSWSContext` and normalizes it
+* onto the {@link TtsEvents} contract consumed by the pipeline orchestrator.
 *
-* Uses the v6 `tool()` helper with `inputSchema: jsonSchema(...)` wrapping
-* the agent's JSON Schema `parameters`. Execution is delegated to
-* `ctx.executeTool` so validation, KV, timeouts, and hooks keep working.
+* **Per-turn context lifecycle.** Each `sendText(...)` within the same turn
+* appends to the same Cartesia context. On `flush()` or `cancel()`, a new
+* context is minted for the next turn — so concurrent `cancel({ contextId })`
+* only targets the in-flight turn, never the one that follows.
+*
+* **Audio format.** The adapter requests `raw` / `pcm_s16le` at the
+* negotiated `sampleRate` so it can forward chunks as `Int16Array` with no
+* conversion.
 */
-function toVercelTools(schemas, ctx) {
-	const out = {};
-	for (const schema of schemas) out[schema.name] = tool({
-		description: schema.description,
-		inputSchema: jsonSchema(schema.parameters),
-		execute: async (args, options) => {
-			const input = args ?? {};
-			const signal = options.abortSignal ?? ctx.signal;
-			const opts = {};
-			if (signal !== void 0) opts.signal = signal;
-			if (options.toolCallId !== void 0) opts.toolCallId = options.toolCallId;
-			return ctx.executeTool(schema.name, input, ctx.sessionId, ctx.messages().slice(), opts);
-		}
-	});
-	return out;
+/** PCM16 sample rates supported by Cartesia's `raw` output format. */
+const CARTESIA_PCM16_RATES = [
+	8e3,
+	16e3,
+	22050,
+	24e3,
+	44100,
+	48e3
+];
+function assertSupportedSampleRate$1(rate) {
+	if (CARTESIA_PCM16_RATES.includes(rate)) return rate;
+	throw makeTtsError("tts_connect_failed", `Cartesia TTS: unsupported sample rate ${rate}. Supported: ${CARTESIA_PCM16_RATES.join(", ")}.`);
 }
-//#endregion
-//#region host/pipeline-session.ts
-function toModelMessage(m) {
-	if (m.role === "user") return {
-		role: "user",
-		content: m.content
-	};
-	if (m.role === "assistant") return {
-		role: "assistant",
-		content: m.content
-	};
-	return {
-		role: "assistant",
-		content: m.content
-	};
-}
-function emitError(client, code, message) {
-	client.event({
-		type: "error",
-		code,
-		message
-	});
-}
-function handleStreamPart(part, deps) {
-	switch (part.type) {
-		case "text-delta": {
-			const delta = part.text ?? "";
-			if (delta.length === 0) return;
-			deps.onTextDelta(delta);
-			deps.tts?.sendText(delta);
-			return;
-		}
-		case "tool-call": {
-			const input = part.input ?? {};
-			deps.client.event({
-				type: "tool_call",
-				toolCallId: part.toolCallId ?? "",
-				toolName: part.toolName ?? "",
-				args: input
-			});
-			return;
-		}
-		case "tool-result": {
-			const output = part.output;
-			const resultString = typeof output === "string" ? output : JSON.stringify(output);
-			deps.client.event({
-				type: "tool_call_done",
-				toolCallId: part.toolCallId ?? "",
-				result: resultString
-			});
-			return;
-		}
-		case "error": {
-			const msg = errorMessage(part.error);
-			deps.log.error("LLM stream error", {
-				message: msg,
-				sessionId: deps.sessionId
-			});
-			emitError(deps.client, "llm", msg);
-			return;
-		}
-		default: return;
-	}
-}
-/** Create a pluggable-provider voice session. */
-function createPipelineSession(opts) {
-	const log = opts.logger ?? consoleLogger;
-	const sttSampleRate = opts.sttSampleRate ?? 16e3;
-	const ttsSampleRate = opts.ttsSampleRate ?? 24e3;
-	const { client, agentConfig, toolSchemas, executeTool } = opts;
-	const systemPrompt = buildSystemPrompt(agentConfig, {
-		hasTools: toolSchemas.length > 0 || (agentConfig.builtinTools?.length ?? 0) > 0,
-		voice: true,
-		toolGuidance: opts.toolGuidance
-	});
-	const ctx = buildPipelineCtx({
-		id: opts.id,
-		agent: opts.agent,
-		client,
-		agentConfig,
-		executeTool,
-		log,
-		maxHistory: opts.maxHistory
-	});
-	const sessionAbort = new AbortController();
-	let audioReady = false;
-	let terminated = false;
-	let turnController = null;
-	let nextReplyId = 0;
-	const sttSubs = [];
-	const ttsSubs = [];
-	/**
-	* Tear down the session after an unrecoverable provider error. Aborts the
-	* in-flight turn, cancels TTS, signals providers to close via sessionAbort,
-	* and flips `terminated` so future STT events and audio frames become
-	* no-ops. Idempotent.
-	*/
-	function terminate() {
-		if (terminated) return;
-		terminated = true;
-		if (turnController !== null) {
-			turnController.abort();
-			turnController = null;
-		}
-		ctx.tts?.cancel();
-		ctx.cancelReply();
-		sessionAbort.abort();
-	}
-	function onSttPartial(_text) {
-		if (terminated) return;
-		if (turnController === null) return;
-		log.info("Pipeline barge-in", { sessionId: opts.id });
-		turnController.abort();
-		turnController = null;
-		ctx.tts?.cancel();
-		ctx.cancelReply();
-		client.event({ type: "cancelled" });
-	}
-	function onSttFinal(text) {
-		if (terminated) return;
-		const trimmed = text.trim();
-		if (trimmed.length === 0) return;
-		if (turnController !== null) {
-			log.info("Pipeline replacing in-flight turn", { sessionId: opts.id });
-			turnController.abort();
-			turnController = null;
-			ctx.tts?.cancel();
-			ctx.cancelReply();
-			client.event({ type: "cancelled" });
-		}
-		client.event({
-			type: "user_transcript",
-			text
-		});
-		const turn = runTurn(trimmed).catch((err) => {
-			log.error("Pipeline turn crashed", {
-				error: errorMessage(err),
-				sessionId: opts.id
-			});
-		});
-		ctx.chainTurn(turn);
-	}
-	function onSttError(err) {
-		if (terminated) return;
-		log.error("STT error", {
-			code: err.code,
-			message: err.message,
-			sessionId: opts.id
-		});
-		emitError(client, "stt", err.message);
-		terminate();
-	}
-	function onTtsError(err) {
-		if (terminated) return;
-		log.error("TTS error", {
-			code: err.code,
-			message: err.message,
-			sessionId: opts.id
-		});
-		emitError(client, "tts", err.message);
-		terminate();
-	}
-	async function consumeLlmStream(ctl, messages, tools, onDelta) {
-		const deps = {
-			client,
-			tts: ctx.tts,
-			log,
-			sessionId: opts.id,
-			onTextDelta: onDelta
-		};
-		try {
-			const maxSteps = agentConfig.maxSteps ?? 5;
-			const result = streamText({
-				model: opts.llm,
-				system: systemPrompt,
-				messages,
-				tools,
-				stopWhen: stepCountIs(maxSteps),
-				abortSignal: ctl.signal
-			});
-			for await (const part of result.fullStream) {
-				if (ctl.signal.aborted) break;
-				handleStreamPart(part, deps);
-			}
-		} catch (err) {
-			if (!ctl.signal.aborted) {
-				const msg = errorMessage(err);
-				log.error("LLM streamText failed", {
-					error: msg,
-					sessionId: opts.id
-				});
-				emitError(client, "llm", msg);
-			}
-		}
-	}
-	/**
-	* Flush TTS and wait for drain. Resolves on any of:
-	*   - TTS emits `done`
-	*   - `signal` aborts (barge-in, provider error, session stop)
-	*   - `PIPELINE_FLUSH_TIMEOUT_MS` elapses
-	* Resolves immediately if no TTS session.
-	*/
-	function flushTtsAndWait(signal) {
-		const tts = ctx.tts;
-		if (!tts) return Promise.resolve();
-		return new Promise((resolve) => {
-			let off = null;
-			let timer = null;
-			const cleanup = () => {
-				if (off) {
-					off();
-					off = null;
-				}
-				if (timer) {
-					clearTimeout(timer);
-					timer = null;
-				}
-				signal.removeEventListener("abort", onAbort);
-			};
-			const finish = () => {
-				cleanup();
-				resolve();
-			};
-			const onAbort = () => finish();
-			if (signal.aborted) {
-				resolve();
-				return;
-			}
-			signal.addEventListener("abort", onAbort, { once: true });
-			off = tts.on("done", finish);
-			timer = setTimeout(() => {
-				log.warn("TTS flush timeout", {
-					sessionId: opts.id,
-					timeoutMs: PIPELINE_FLUSH_TIMEOUT_MS
-				});
-				finish();
-			}, PIPELINE_FLUSH_TIMEOUT_MS);
-			tts.flush();
-		});
-	}
-	async function runTurn(userText) {
-		const replyId = `pipeline-${++nextReplyId}`;
-		ctx.beginReply(replyId);
-		ctx.pushMessages({
-			role: "user",
-			content: userText
-		});
-		const ctl = new AbortController();
-		turnController = ctl;
-		const tools = toVercelTools(toolSchemas, {
-			executeTool,
-			sessionId: opts.id,
-			messages: () => ctx.conversationMessages,
-			signal: ctl.signal
-		});
-		const messages = ctx.conversationMessages.map(toModelMessage);
-		let accumulated = "";
-		await consumeLlmStream(ctl, messages, tools, (delta) => {
-			accumulated += delta;
-		});
-		if (ctl.signal.aborted) {
-			if (turnController === ctl) turnController = null;
-			return;
-		}
-		if (accumulated.length > 0) {
-			client.event({
-				type: "agent_transcript",
-				text: accumulated
-			});
-			ctx.pushMessages({
-				role: "assistant",
-				content: accumulated
-			});
-		}
-		await flushTtsAndWait(ctl.signal);
-		if (ctl.signal.aborted) {
-			if (turnController === ctl) turnController = null;
-			return;
-		}
-		client.playAudioDone();
-		client.event({ type: "reply_done" });
-		if (turnController === ctl) turnController = null;
-	}
-	async function runGreeting(text) {
-		const replyId = `pipeline-greeting-${++nextReplyId}`;
-		ctx.beginReply(replyId);
-		const ctl = new AbortController();
-		turnController = ctl;
-		client.event({
-			type: "agent_transcript",
-			text
-		});
-		ctx.pushMessages({
-			role: "assistant",
-			content: text
-		});
-		ctx.tts?.sendText(text);
-		await flushTtsAndWait(ctl.signal);
-		if (ctl.signal.aborted) {
-			if (turnController === ctl) turnController = null;
-			return;
-		}
-		client.playAudioDone();
-		client.event({ type: "reply_done" });
-		if (turnController === ctl) turnController = null;
-	}
-	function reportOpenRejection(which, reason) {
-		const msg = errorMessage(reason);
-		log.error(`${which === "stt" ? "STT" : "TTS"} open failed`, {
-			error: msg,
-			sessionId: opts.id
-		});
-		emitError(client, which, msg);
-	}
-	async function adoptStt(sttSession, teardown) {
-		if (teardown) {
-			await sttSession.close().catch(() => void 0);
-			return;
-		}
-		ctx.stt = sttSession;
-		sttSubs.push(sttSession.on("partial", onSttPartial));
-		sttSubs.push(sttSession.on("final", onSttFinal));
-		sttSubs.push(sttSession.on("error", onSttError));
-	}
-	async function adoptTts(ttsSession, teardown) {
-		if (teardown) {
-			await ttsSession.close().catch(() => void 0);
-			return;
-		}
-		ctx.tts = ttsSession;
-		ttsSubs.push(ttsSession.on("audio", (pcm) => {
-			client.playAudioChunk(new Uint8Array(pcm.buffer, pcm.byteOffset, pcm.byteLength));
-		}));
-		ttsSubs.push(ttsSession.on("error", onTtsError));
-	}
-	async function openProviders() {
-		const [sttResult, ttsResult] = await Promise.allSettled([opts.stt.open({
-			sampleRate: sttSampleRate,
-			apiKey: opts.sttApiKey,
-			sttPrompt: agentConfig.sttPrompt,
-			signal: sessionAbort.signal
-		}), opts.tts.open({
-			sampleRate: ttsSampleRate,
-			apiKey: opts.ttsApiKey,
-			signal: sessionAbort.signal
-		})]);
-		if (sttResult.status === "rejected") reportOpenRejection("stt", sttResult.reason);
-		if (ttsResult.status === "rejected") reportOpenRejection("tts", ttsResult.reason);
-		const aborted = sessionAbort.signal.aborted;
-		const sttFailed = sttResult.status === "rejected";
-		const ttsFailed = ttsResult.status === "rejected";
-		const teardown = aborted || sttFailed || ttsFailed;
-		if (sttResult.status === "fulfilled") await adoptStt(sttResult.value, teardown);
-		if (ttsResult.status === "fulfilled") await adoptTts(ttsResult.value, teardown);
-		if (!aborted && (sttFailed || ttsFailed)) terminate();
-	}
-	return {
-		async start() {
-			await openProviders();
-		},
-		async stop() {
-			if (sessionAbort.signal.aborted) return;
-			sessionAbort.abort();
-			turnController?.abort();
-			for (const off of sttSubs) off();
-			for (const off of ttsSubs) off();
-			sttSubs.length = 0;
-			ttsSubs.length = 0;
-			if (ctx.turnPromise !== null) await ctx.turnPromise;
-			await ctx.stt?.close().catch(() => {});
-			await ctx.tts?.close().catch(() => {});
-		},
-		onAudio(data) {
-			if (terminated || !audioReady) return;
-			const offset = data.byteOffset;
-			const length = data.byteLength;
-			let pcm;
-			if (offset % 2 === 0 && length % 2 === 0) pcm = new Int16Array(data.buffer, offset, length / 2);
-			else {
-				const copy = new Uint8Array(length - length % 2);
-				copy.set(data.subarray(0, copy.byteLength));
-				pcm = new Int16Array(copy.buffer);
-			}
-			ctx.stt?.sendAudio(pcm);
-		},
-		onAudioReady() {
-			if (audioReady || terminated) return;
-			audioReady = true;
-			if (opts.skipGreeting) return;
-			const greeting = agentConfig.greeting;
-			if (!greeting) return;
-			const turn = runGreeting(greeting).catch((err) => {
-				log.error("Pipeline greeting failed", {
-					error: errorMessage(err),
-					sessionId: opts.id
-				});
-			});
-			ctx.chainTurn(turn);
-		},
-		onCancel() {
-			if (terminated) return;
-			turnController?.abort();
-			turnController = null;
-			ctx.tts?.cancel();
-			ctx.cancelReply();
-			client.event({ type: "cancelled" });
-		},
-		onReset() {
-			if (terminated) return;
-			turnController?.abort();
-			turnController = null;
-			ctx.tts?.cancel();
-			ctx.cancelReply();
-			ctx.conversationMessages = [];
-			ctx.turnPromise = null;
-			client.event({ type: "reset" });
-		},
-		onHistory(incoming) {
-			if (terminated) return;
-			ctx.pushMessages(...incoming.map((m) => ({
-				role: m.role,
-				content: m.content
-			})));
-		},
-		waitForTurn() {
-			return ctx.turnPromise ?? Promise.resolve();
-		}
-	};
-}
-//#endregion
-//#region host/providers/stt/assemblyai.ts
-/**
-* AssemblyAI Universal-Streaming STT opener (host-only).
-*
-* The user-facing descriptor factory (`assemblyAI(...)`) lives in
-* `sdk/providers/stt/assemblyai.ts`. This module is the host-side
-* counterpart: it takes the descriptor options + an API key and
-* returns an {@link SttOpener} that the pipeline session drives.
-*
-* Default model: `"u3pro-rt"` (Universal-3 Pro Real-Time). The adapter
-* maps that to the SDK's `"u3-rt-pro"` `speechModel` value; any other
-* string is forwarded verbatim.
-*/
-/** Translate the descriptor's model alias to the SDK's `speechModel` value. */
-function resolveSpeechModel(model) {
-	if (model === "u3pro-rt") return "u3-rt-pro";
-	return model;
-}
-/** Build an {@link SttOpener} from resolved AssemblyAI descriptor options. */
-function openAssemblyAI(opts = {}) {
-	return {
-		name: "assemblyai",
-		async open(openOpts) {
-			const apiKey = openOpts.apiKey || process.env.ASSEMBLYAI_API_KEY;
-			if (!apiKey) throw makeSttError("stt_auth_failed", "AssemblyAI STT: missing API key. Set ASSEMBLYAI_API_KEY in the agent env.");
-			const client = new AssemblyAI({ apiKey });
-			const speechModel = resolveSpeechModel(opts.model ?? "u3pro-rt");
-			const transcriber = client.streaming.transcriber({
-				sampleRate: openOpts.sampleRate,
-				speechModel,
-				...openOpts.sttPrompt ? { prompt: openOpts.sttPrompt } : {}
-			});
-			const emitter = createNanoEvents();
-			let closed = false;
-			transcriber.on("turn", (event) => {
-				if (closed) return;
-				const text = event.transcript ?? "";
-				if (event.end_of_turn) {
-					if (text.length > 0) emitter.emit("final", text);
-				} else if (text.length > 0) emitter.emit("partial", text);
-			});
-			transcriber.on("error", (err) => {
-				if (closed) return;
-				emitter.emit("error", makeSttError("stt_stream_error", err?.message ?? String(err)));
-			});
-			transcriber.on("close", (code) => {
-				if (closed) return;
-				if (code !== 1e3) emitter.emit("error", makeSttError("stt_stream_error", `socket closed ${code}`));
-			});
-			try {
-				await transcriber.connect();
-			} catch (cause) {
-				throw makeSttError("stt_connect_failed", `AssemblyAI STT: connect failed: ${cause instanceof Error ? cause.message : String(cause)}`);
-			}
-			const close = async () => {
-				if (closed) return;
-				closed = true;
-				try {
-					await transcriber.close();
-				} catch {}
-			};
-			if (openOpts.signal.aborted) close();
-			else openOpts.signal.addEventListener("abort", () => void close(), { once: true });
-			return {
-				sendAudio(pcm) {
-					if (closed) return;
-					const copy = new Uint8Array(pcm.byteLength);
-					copy.set(new Uint8Array(pcm.buffer, pcm.byteOffset, pcm.byteLength));
-					transcriber.sendAudio(copy.buffer);
-				},
-				on(event, fn) {
-					return emitter.on(event, fn);
-				},
-				close,
-				_transcriber: transcriber
-			};
-		}
-	};
-}
-//#endregion
-//#region host/providers/tts/cartesia.ts
-/**
-* Cartesia TTS opener (host-only).
-*
-* The user-facing descriptor factory (`cartesia(...)`) lives in
-* `sdk/providers/tts/cartesia.ts`. This module is the host-side
-* counterpart: it takes the descriptor options + an API key and
-* returns a {@link TtsOpener} that the pipeline session drives.
-*
-* Wraps `@cartesia/cartesia-js`'s `TTSWS` / `TTSWSContext` and normalizes it
-* onto the {@link TtsEvents} contract consumed by the pipeline orchestrator.
-*
-* **Per-turn context lifecycle.** Each `sendText(...)` within the same turn
-* appends to the same Cartesia context. On `flush()` or `cancel()`, a new
-* context is minted for the next turn — so concurrent `cancel({ contextId })`
-* only targets the in-flight turn, never the one that follows.
-*
-* **Audio format.** The adapter requests `raw` / `pcm_s16le` at the
-* negotiated `sampleRate` so it can forward chunks as `Int16Array` with no
-* conversion.
-*/
-/** PCM16 sample rates supported by Cartesia's `raw` output format. */
-const CARTESIA_PCM16_RATES = [
-	8e3,
-	16e3,
-	22050,
-	24e3,
-	44100,
-	48e3
-];
-function assertSupportedSampleRate(rate) {
-	if (CARTESIA_PCM16_RATES.includes(rate)) return rate;
-	throw makeTtsError("tts_connect_failed", `Cartesia TTS: unsupported sample rate ${rate}. Supported: ${CARTESIA_PCM16_RATES.join(", ")}.`);
-}
-/** Build a {@link TtsOpener} from resolved Cartesia descriptor options. */
-function openCartesia(opts) {
+/** Build a {@link TtsOpener} from resolved Cartesia descriptor options. */
+function openCartesia(opts) {
 	return {
 		name: "cartesia",
 		async open(openOpts) {
 			const apiKey = openOpts.apiKey || process.env.CARTESIA_API_KEY;
 			if (!apiKey) throw makeTtsError("tts_auth_failed", "Cartesia TTS: missing API key. Set CARTESIA_API_KEY in the agent env.");
-			const sampleRate = assertSupportedSampleRate(openOpts.sampleRate);
+			const sampleRate = assertSupportedSampleRate$1(openOpts.sampleRate);
 			const model = opts.model ?? "sonic-2";
 			const language = opts.language ?? "en";
+			const voice = opts.voice ?? "f786b574-daa5-4673-aa0c-cbe3e8534c02";
 			const client = new Cartesia({ apiKey });
 			let ws;
 			try {
@@ -1098,7 +623,7 @@ function openCartesia(opts) {
 				model_id: model,
 				voice: {
 					mode: "id",
-					id: opts.voice
+					id: voice
 				},
 				output_format: {
 					container: "raw",
@@ -1169,7 +694,7 @@ function openCartesia(opts) {
 				model_id: model,
 				voice: {
 					mode: "id",
-					id: opts.voice
+					id: voice
 				},
 				output_format: {
 					container: "raw",
@@ -1178,38 +703,247 @@ function openCartesia(opts) {
 				},
 				language
 			};
-			const ignoreRejection = (_err) => {};
+			const ignoreRejection = (_err) => {};
+			return {
+				sendText(text) {
+					if (closed || text.length === 0) return;
+					rotateIfPending();
+					context.send({
+						...baseRequest,
+						transcript: text,
+						continue: true
+					}).catch(ignoreRejection);
+				},
+				flush() {
+					if (closed || rotatePending) return;
+					context.send({
+						...baseRequest,
+						transcript: "",
+						continue: false
+					}).catch(ignoreRejection);
+					rotatePending = true;
+				},
+				cancel() {
+					if (closed) return;
+					if (!doneEmitted) context.cancel().catch(ignoreRejection);
+					emitDoneOnce();
+					rotatePending = true;
+				},
+				on(event, fn) {
+					return emitter.on(event, fn);
+				},
+				close,
+				_ws: ws,
+				_currentContextId: () => context.contextId
+			};
+		}
+	};
+}
+//#endregion
+//#region host/providers/tts/rime.ts
+/**
+* Rime TTS opener (host-only).
+*
+* The user-facing descriptor factory (`rime(...)`) lives in
+* `sdk/providers/tts/rime.ts`. This module is the host-side
+* counterpart: it takes the descriptor options + an API key and
+* returns a {@link TtsOpener} that the pipeline session drives.
+*
+* **Protocol.** Connects to Rime's `ws2` JSON WebSocket endpoint
+* (`wss://users-ws.rime.ai/ws2`). Client-to-server messages are JSON:
+*   - `{ "text": "..." }` — append text to the synthesis buffer
+*   - `{ "operation": "clear" }` — drop buffered text (barge-in)
+*   - `{ "operation": "eos" }` — drain buffer, close connection (NOT used
+*     during a session: it would tear down the WS, forcing reconnect per
+*     turn). We force end-of-turn synthesis with a trailing `"."` instead.
+* The server responds with JSON frames:
+*   - `{ type: "chunk", data: <base64 PCM16 LE>, contextId: string | null }`
+*   - `{ type: "timestamps", ... }` (ignored)
+*   - `{ type: "error", message: string }` (surfaced as `tts_stream_error`)
+*
+* **Single long-lived connection per session.** Rime buffers text until it
+* sees terminal punctuation (`.`, `?`, `!`), so we use one WebSocket per
+* `open()` call and reuse it across turns. `clear` resets the buffer
+* between cancellations.
+*
+* **Done detection.** After `flush()` sends a trailing `"."` to force the
+* server to synthesize any half-buffered text, we arm a quiescence timer
+* that fires 500 ms after the last received audio chunk. When it fires,
+* `done` is emitted.
+*
+* **Audio format.** The URL requests `audioFormat=pcm` at the negotiated
+* `sampleRate`, which returns raw PCM16 little-endian. We decode the base64
+* payload and construct a zero-copy `Int16Array` view over the decoded bytes.
+*/
+/** PCM16 sample rates accepted by the Rime `ws2` endpoint. */
+const RIME_PCM16_RATES = [
+	8e3,
+	16e3,
+	22050,
+	24e3,
+	44100,
+	48e3
+];
+function assertSupportedSampleRate(rate) {
+	if (RIME_PCM16_RATES.includes(rate)) return rate;
+	throw makeTtsError("tts_connect_failed", `Rime TTS: unsupported sample rate ${rate}. Supported: ${RIME_PCM16_RATES.join(", ")}.`);
+}
+/**
+* Decode a base64 string from Rime into a zero-copy `Int16Array`.
+*
+* Rime's `ws2` endpoint returns base64-encoded PCM16 LE in each chunk.
+* `Buffer.from(base64, "base64")` gives us a Node.js Buffer (which is a
+* Uint8Array subclass) with `byteOffset === 0`. PCM16 bytes always come in
+* pairs so the length is guaranteed to be even.
+*/
+function base64ToPcm(data) {
+	const bytes = Buffer.from(data, "base64");
+	const evenLen = bytes.byteLength - bytes.byteLength % 2;
+	if (evenLen === 0) return new Int16Array(0);
+	return new Int16Array(bytes.buffer, bytes.byteOffset, evenLen / 2);
+}
+/** Quiescence timeout in ms — how long to wait after the last audio chunk before emitting `done`. */
+const QUIESCENCE_MS = 500;
+/**
+* After `flush()`, how long to wait for the FIRST audio chunk before
+* giving up and emitting `done`. Greeting and short replies hit this
+* path: `flush()` runs immediately after `sendText()`, so audio TTFB
+* exceeds the 500 ms quiescence window. Once the first chunk arrives,
+* we transition to the shorter quiescence timeout.
+*/
+const FIRST_AUDIO_TIMEOUT_MS = 5e3;
+/** Wait for the WebSocket `open` event; reject on first `error`. */
+function waitForOpen(ws) {
+	return new Promise((resolve, reject) => {
+		const onOpen = () => {
+			ws.removeListener("error", onError);
+			resolve();
+		};
+		const onError = (err) => {
+			ws.removeListener("open", onOpen);
+			reject(makeTtsError("tts_connect_failed", `Rime TTS: connect failed: ${err?.message ?? String(err)}`));
+		};
+		ws.once("open", onOpen);
+		ws.once("error", onError);
+	});
+}
+/**
+* Handle one incoming WebSocket message frame.
+*
+* Extracted into a top-level function to keep `open()` under the cognitive
+* complexity limit while retaining full access to the session state via refs.
+*/
+function handleRimeMessage(raw, emitter, armQuiescence, isActiveTimer) {
+	let msg;
+	try {
+		msg = JSON.parse(typeof raw === "string" ? raw : raw.toString());
+	} catch {
+		return;
+	}
+	if (msg.type === "chunk" && typeof msg.data === "string") {
+		const pcm = base64ToPcm(msg.data);
+		if (pcm.length > 0) {
+			emitter.emit("audio", pcm);
+			if (isActiveTimer()) armQuiescence();
+		}
+		return;
+	}
+	if (msg.type === "error") emitter.emit("error", makeTtsError("tts_stream_error", `Rime TTS: ${msg.message ?? "unknown error"}`));
+}
+/** Build a {@link TtsOpener} from resolved Rime descriptor options. */
+function openRime(opts) {
+	return {
+		name: "rime",
+		async open(openOpts) {
+			const apiKey = openOpts.apiKey || process.env.RIME_API_KEY;
+			if (!apiKey) throw makeTtsError("tts_auth_failed", "Rime TTS: missing API key. Set RIME_API_KEY in the agent env.");
+			const sampleRate = assertSupportedSampleRate(openOpts.sampleRate);
+			const model = opts.model ?? "mistv2";
+			const lang = opts.language ?? "eng";
+			const voice = opts.voice ?? "cove";
+			const url = `wss://users-ws.rime.ai/ws2?speaker=${encodeURIComponent(voice)}&modelId=${encodeURIComponent(model)}&audioFormat=pcm&samplingRate=${sampleRate}&lang=${encodeURIComponent(lang)}`;
+			let ws;
+			try {
+				ws = new WsWebSocket(url, { headers: { Authorization: `Bearer ${apiKey}` } });
+			} catch (cause) {
+				throw makeTtsError("tts_connect_failed", `Rime TTS: failed to create WebSocket: ${cause instanceof Error ? cause.message : String(cause)}`);
+			}
+			await waitForOpen(ws);
+			const emitter = createNanoEvents();
+			let closed = false;
+			let doneEmitted = false;
+			/**
+			* After `flush()`, we arm a timer that fires `done`. Initial timeout is
+			* `FIRST_AUDIO_TIMEOUT_MS` to give Rime headroom on TTFB; the first
+			* chunk swaps it for a shorter `QUIESCENCE_MS` window that resets on
+			* each subsequent chunk. `cancel()` emits `done` synchronously.
+			*/
+			let quiescenceTimer = null;
+			const clearQuiescence = () => {
+				if (quiescenceTimer !== null) {
+					clearTimeout(quiescenceTimer);
+					quiescenceTimer = null;
+				}
+			};
+			const emitDoneOnce = () => {
+				clearQuiescence();
+				if (doneEmitted || closed) return;
+				doneEmitted = true;
+				emitter.emit("done");
+			};
+			const armQuiescence = () => {
+				clearQuiescence();
+				quiescenceTimer = setTimeout(emitDoneOnce, QUIESCENCE_MS);
+			};
+			const armFirstAudioTimer = () => {
+				clearQuiescence();
+				quiescenceTimer = setTimeout(emitDoneOnce, FIRST_AUDIO_TIMEOUT_MS);
+			};
+			ws.on("message", (raw) => {
+				if (closed) return;
+				handleRimeMessage(raw, emitter, armQuiescence, () => quiescenceTimer !== null);
+			});
+			ws.on("error", (err) => {
+				if (closed) return;
+				emitter.emit("error", makeTtsError("tts_stream_error", `Rime TTS stream error: ${err?.message ?? String(err)}`));
+			});
+			ws.on("close", () => {
+				if (closed) return;
+				emitDoneOnce();
+			});
+			const close = async () => {
+				if (closed) return;
+				closed = true;
+				clearQuiescence();
+				try {
+					ws.close();
+				} catch {}
+			};
+			if (openOpts.signal.aborted) close();
+			else openOpts.signal.addEventListener("abort", () => void close(), { once: true });
 			return {
 				sendText(text) {
 					if (closed || text.length === 0) return;
-					rotateIfPending();
-					context.send({
-						...baseRequest,
-						transcript: text,
-						continue: true
-					}).catch(ignoreRejection);
+					if (ws.readyState !== WsWebSocket.OPEN) return;
+					doneEmitted = false;
+					ws.send(JSON.stringify({ text }));
 				},
 				flush() {
-					if (closed || rotatePending) return;
-					context.send({
-						...baseRequest,
-						transcript: "",
-						continue: false
-					}).catch(ignoreRejection);
-					rotatePending = true;
+					if (closed) return;
+					if (ws.readyState !== WsWebSocket.OPEN) return;
+					ws.send(JSON.stringify({ text: "." }));
+					armFirstAudioTimer();
 				},
 				cancel() {
 					if (closed) return;
-					if (!doneEmitted) context.cancel().catch(ignoreRejection);
+					if (ws.readyState === WsWebSocket.OPEN) ws.send(JSON.stringify({ operation: "clear" }));
 					emitDoneOnce();
-					rotatePending = true;
 				},
 				on(event, fn) {
 					return emitter.on(event, fn);
 				},
 				close,
-				_ws: ws,
-				_currentContextId: () => context.contextId
+				_ws: ws
 			};
 		}
 	};
@@ -1225,53 +959,785 @@ function openCartesia(opts) {
 * resolvers here to turn each descriptor into its openable / callable
 * host-side counterpart, importing the third-party SDK only at that point.
 *
-* The guest sandbox never imports these functions, which is how the agent
-* bundle stays free of `@ai-sdk/anthropic` / `assemblyai` /
-* `@cartesia/cartesia-js`.
+* The guest sandbox never imports these functions, which is how the agent
+* bundle stays free of `@ai-sdk/anthropic` / `assemblyai` /
+* `@cartesia/cartesia-js`.
+*/
+/**
+* Look up a provider API key: agent env first (set via `aai secret put` or
+* `.env`), then the host's `process.env` as a fallback for self-hosted mode.
+* Returns `""` if neither has it — the caller decides whether that's fatal.
+*/
+function resolveApiKey(envVar, env) {
+	return env[envVar] ?? process.env[envVar] ?? "";
+}
+/** Resolve an {@link SttProvider} descriptor into a host-side opener. */
+function resolveStt(descriptor) {
+	switch (descriptor.kind) {
+		case ASSEMBLYAI_KIND: return openAssemblyAI(descriptor.options);
+		case DEEPGRAM_KIND: return openDeepgram(descriptor.options);
+		default: throw new Error(`Unknown STT provider kind: "${descriptor.kind}". Supported: ${ASSEMBLYAI_KIND}, ${DEEPGRAM_KIND}.`);
+	}
+}
+/** Resolve a {@link TtsProvider} descriptor into a host-side opener. */
+function resolveTts(descriptor) {
+	switch (descriptor.kind) {
+		case CARTESIA_KIND: return openCartesia(descriptor.options);
+		case RIME_KIND: return openRime(descriptor.options);
+		default: throw new Error(`Unknown TTS provider kind: "${descriptor.kind}". Supported: ${CARTESIA_KIND}, ${RIME_KIND}.`);
+	}
+}
+/**
+* Resolve an {@link LlmProvider} descriptor into a Vercel AI SDK
+* {@link LanguageModel}.
+*
+* The API key is pulled from the agent's env (e.g. `ANTHROPIC_API_KEY`).
+* Missing keys throw here — the pipeline session would fail on first
+* `streamText` call otherwise, and the error is clearer at construction.
+*/
+function resolveLlm(descriptor, env) {
+	switch (descriptor.kind) {
+		case ANTHROPIC_KIND: {
+			const options = descriptor.options;
+			const apiKey = resolveApiKey("ANTHROPIC_API_KEY", env);
+			if (!apiKey) throw new Error("Anthropic LLM: missing API key. Set ANTHROPIC_API_KEY in the agent env.");
+			return createAnthropic({
+				apiKey,
+				baseURL: "https://api.anthropic.com/v1"
+			})(options.model);
+		}
+		default: throw new Error(`Unknown LLM provider kind: "${descriptor.kind}". Supported: ${ANTHROPIC_KIND}.`);
+	}
+}
+//#endregion
+//#region host/runtime-config.ts
+/**
+* Runtime dependencies injected into the session pipeline.
+*
+* Defines the {@link Logger} interface, a default {@link consoleLogger},
+* and the {@link S2SConfig} for Speech-to-Speech endpoint configuration.
+*/
+function consoleLog(fn) {
+	return (msg, ctx) => ctx ? fn(msg, ctx) : fn(msg);
+}
+/** Default console-backed logger. */
+const consoleLogger = {
+	info: consoleLog(console.log),
+	warn: consoleLog(console.warn),
+	error: consoleLog(console.error),
+	debug: consoleLog(console.debug)
+};
+/**
+* Structured JSON logger for production diagnostics. Each log entry is a
+* single-line JSON object with `timestamp`, `level`, `msg`, and any
+* caller-provided context fields.
+*/
+function jsonLog(level) {
+	return (msg, ctx) => {
+		const entry = {
+			timestamp: (/* @__PURE__ */ new Date()).toISOString(),
+			level,
+			msg
+		};
+		if (ctx) Object.assign(entry, ctx);
+		(level === "error" || level === "warn" ? process.stderr : process.stdout).write(`${JSON.stringify(entry)}\n`);
+	};
+}
+const jsonLogger = {
+	info: jsonLog("info"),
+	warn: jsonLog("warn"),
+	error: jsonLog("error"),
+	debug: jsonLog("debug")
+};
+/** Default S2S endpoint configuration. */
+const DEFAULT_S2S_CONFIG = {
+	wssUrl: "wss://agents.assemblyai.com/v1/voice",
+	inputSampleRate: DEFAULT_STT_SAMPLE_RATE,
+	outputSampleRate: DEFAULT_TTS_SAMPLE_RATE
+};
+//#endregion
+//#region host/session-core.ts
+const REPLY_DONE_SLOW_THRESHOLD_MS = 50;
+function createSessionCore(opts) {
+	const log = opts.logger ?? consoleLogger;
+	const maxHistory = opts.maxHistory ?? 200;
+	const idleMs = (() => {
+		const raw = opts.agentConfig.idleTimeoutMs ?? 3e5;
+		return raw === 0 || !Number.isFinite(raw) ? 0 : raw;
+	})();
+	let reply = {
+		currentReplyId: null,
+		pendingTools: [],
+		toolCallCount: 0
+	};
+	let history = [];
+	let turnPromise = null;
+	let idleTimer = null;
+	let stopped = false;
+	function emit(event) {
+		opts.client.event(event);
+	}
+	function resetIdle() {
+		if (stopped || idleMs <= 0) return;
+		if (idleTimer) clearTimeout(idleTimer);
+		idleTimer = setTimeout(() => {
+			log.info("session idle timeout", { sid: opts.id });
+			emit({ type: "idle_timeout" });
+		}, idleMs);
+	}
+	function pushMessages(...msgs) {
+		history.push(...msgs);
+		if (maxHistory > 0 && history.length > maxHistory) history.splice(0, history.length - maxHistory);
+	}
+	function beginReply(replyId) {
+		reply = {
+			currentReplyId: replyId,
+			pendingTools: [],
+			toolCallCount: 0
+		};
+		turnPromise = null;
+	}
+	function cancelReply() {
+		reply = {
+			currentReplyId: null,
+			pendingTools: [],
+			toolCallCount: 0
+		};
+	}
+	function flushReply(startMs, hadTurnPromise) {
+		const stepsUsed = reply.toolCallCount;
+		if (stepsUsed > 0) log.info("Turn complete", {
+			steps: stepsUsed,
+			agent: opts.agent
+		});
+		opts.client.playAudioDone();
+		emit({ type: "reply_done" });
+		reply.currentReplyId = null;
+		const durationMs = Date.now() - startMs;
+		if (durationMs >= REPLY_DONE_SLOW_THRESHOLD_MS) log.warn("slow reply_done dispatch", {
+			sid: opts.id,
+			agent: opts.agent,
+			durationMs,
+			hadTurnPromise
+		});
+	}
+	return {
+		id: opts.id,
+		async start() {
+			resetIdle();
+			await opts.transport.start();
+		},
+		async stop() {
+			if (stopped) return;
+			stopped = true;
+			if (idleTimer) {
+				clearTimeout(idleTimer);
+				idleTimer = null;
+			}
+			if (turnPromise !== null) await turnPromise;
+			await opts.transport.stop();
+		},
+		onAudio(bytes) {
+			resetIdle();
+			opts.transport.sendUserAudio(bytes);
+		},
+		onAudioReady() {},
+		onCancel() {
+			opts.transport.cancelReply();
+			emit({ type: "cancelled" });
+		},
+		onReset() {
+			cancelReply();
+			history = [];
+			emit({ type: "reset" });
+		},
+		onHistory(messages) {
+			pushMessages(...messages);
+		},
+		onReplyStarted(replyId) {
+			beginReply(replyId);
+		},
+		onReplyDone() {
+			const startMs = Date.now();
+			const doneReplyId = reply.currentReplyId;
+			if (doneReplyId === null) {
+				log.debug("Dropping duplicate reply.done (no active reply)");
+				return;
+			}
+			const hadTurnPromise = turnPromise !== null;
+			const sendPending = () => {
+				if (reply.currentReplyId !== doneReplyId) {
+					reply.pendingTools = [];
+					return;
+				}
+				if (reply.pendingTools.length > 0) {
+					for (const tool of reply.pendingTools) opts.transport.sendToolResult(tool.callId, tool.result);
+					reply.pendingTools = [];
+				} else flushReply(startMs, hadTurnPromise);
+			};
+			if (hadTurnPromise) turnPromise?.then(sendPending);
+			else sendPending();
+		},
+		onCancelled() {
+			cancelReply();
+			emit({ type: "cancelled" });
+		},
+		onAudioChunk(bytes) {
+			opts.client.playAudioChunk(bytes);
+		},
+		onAudioDone() {
+			opts.client.playAudioDone();
+		},
+		onUserTranscript(text) {
+			emit({
+				type: "user_transcript",
+				text
+			});
+			pushMessages({
+				role: "user",
+				content: text
+			});
+		},
+		onAgentTranscript(text, interrupted) {
+			emit({
+				type: "agent_transcript",
+				text
+			});
+			if (!interrupted) pushMessages({
+				role: "assistant",
+				content: text
+			});
+		},
+		onToolCall(callId, name, args) {
+			emit({
+				type: "tool_call",
+				toolCallId: callId,
+				toolName: name,
+				args
+			});
+			if (reply.currentReplyId === null) {
+				log.warn("tool_call with no active reply", {
+					sid: opts.id,
+					name
+				});
+				return;
+			}
+			reply.toolCallCount++;
+			const maxSteps = opts.agentConfig.maxSteps;
+			if (maxSteps !== void 0 && reply.toolCallCount > maxSteps) {
+				log.info("maxSteps exceeded; refusing tool call", {
+					toolCallCount: reply.toolCallCount,
+					maxSteps
+				});
+				reply.pendingTools.push({
+					callId,
+					result: JSON.stringify({ error: "Maximum tool steps reached. Please respond to the user now." })
+				});
+				emit({
+					type: "tool_call_done",
+					toolCallId: callId,
+					result: "{}"
+				});
+				return;
+			}
+			const p = (async () => {
+				try {
+					const result = await opts.executeTool(name, args, opts.id, history);
+					reply.pendingTools.push({
+						callId,
+						result
+					});
+					emit({
+						type: "tool_call_done",
+						toolCallId: callId,
+						result
+					});
+				} catch (err) {
+					const message = err instanceof Error ? err.message : String(err);
+					reply.pendingTools.push({
+						callId,
+						result: JSON.stringify({ error: message })
+					});
+					emit({
+						type: "tool_call_done",
+						toolCallId: callId,
+						result: message
+					});
+				}
+			})();
+			turnPromise = (turnPromise ?? Promise.resolve()).then(() => p);
+		},
+		onError(code, message) {
+			emit({
+				type: "error",
+				code,
+				message
+			});
+		},
+		onSpeechStarted() {
+			emit({ type: "speech_started" });
+		},
+		onSpeechStopped() {
+			emit({ type: "speech_stopped" });
+		}
+	};
+}
+//#endregion
+//#region host/tool-executor.ts
+/**
+* Tool execution — validates arguments and invokes tool handlers.
+*
+* {@link executeToolCall} is the single entry point used by both the
+* direct (self-hosted) runtime and the platform sandbox sidecar.
+*/
+const yieldTick = () => new Promise((r) => setTimeout(r, 0));
+function buildToolContext(opts) {
+	const { env, state, kv, messages, sessionId } = opts;
+	return {
+		env,
+		state: state ?? {},
+		get kv() {
+			if (!kv) throw new Error("KV not available");
+			return kv;
+		},
+		messages: messages ?? [],
+		sessionId: sessionId ?? "",
+		send(event, data) {
+			opts.send?.(event, data);
+		}
+	};
+}
+async function executeToolCall(name, args, options) {
+	const { tool } = options;
+	const parsed = (tool.parameters ?? EMPTY_PARAMS).safeParse(args);
+	if (!parsed.success) return toolError(`Invalid arguments for tool "${name}": ${(parsed.error?.issues ?? []).map((i) => `${i.path.map(String).join(".")}: ${i.message}`).join(", ")}`);
+	try {
+		const ctx = buildToolContext(options);
+		await yieldTick();
+		const result = await pTimeout(Promise.resolve(tool.execute(parsed.data, ctx)), {
+			milliseconds: TOOL_EXECUTION_TIMEOUT_MS,
+			message: `Tool "${name}" timed out after ${TOOL_EXECUTION_TIMEOUT_MS}ms`
+		});
+		await yieldTick();
+		if (result == null) return "null";
+		return typeof result === "string" ? result : JSON.stringify(result);
+	} catch (err) {
+		const log = options.logger;
+		if (log) log.warn("Tool execution failed", {
+			tool: name,
+			error: errorDetail(err)
+		});
+		else console.warn(`[tool-executor] Tool execution failed: ${name}`, err);
+		return toolError(errorMessage(err));
+	}
+}
+//#endregion
+//#region host/to-vercel-tools.ts
+/**
+* Converts agent {@link ToolSchema}[] to Vercel AI SDK tools with `execute`
+* delegation to the agent's {@link ExecuteTool} function.
+*
+* The pipeline orchestrator passes the output to `streamText({ tools })`.
+* Each produced tool's `execute` closure calls
+* `ctx.executeTool(name, args, sessionId, messages(), { signal, toolCallId })`,
+* so the existing agent tool infrastructure (argument validation, KV, hooks,
+* timeout) remains the single source of truth for tool behavior.
+*
+* Per-call `options.abortSignal` (forwarded by `streamText` when the
+* outer turn is aborted, e.g. barge-in) takes precedence over the
+* bag-level `ctx.signal` so individual invocations respect streamText
+* aborts.
 */
 /**
-* Look up a provider API key: agent env first (set via `aai secret put` or
-* `.env`), then the host's `process.env` as a fallback for self-hosted mode.
-* Returns `""` if neither has it — the caller decides whether that's fatal.
+* Convert an array of {@link ToolSchema} to a Vercel AI SDK `ToolSet`
+* (record keyed by tool name).
+*
+* Uses the v6 `tool()` helper with `inputSchema: jsonSchema(...)` wrapping
+* the agent's JSON Schema `parameters`. Execution is delegated to
+* `ctx.executeTool` so validation, KV, timeouts, and hooks keep working.
 */
-function resolveApiKey(envVar, env) {
-	return env[envVar] ?? process.env[envVar] ?? "";
+function toVercelTools(schemas, ctx) {
+	const out = {};
+	for (const schema of schemas) out[schema.name] = tool({
+		description: schema.description,
+		inputSchema: jsonSchema(schema.parameters),
+		execute: async (args, options) => {
+			const input = args ?? {};
+			const signal = options.abortSignal ?? ctx.signal;
+			const opts = {};
+			if (signal !== void 0) opts.signal = signal;
+			if (options.toolCallId !== void 0) opts.toolCallId = options.toolCallId;
+			return ctx.executeTool(schema.name, input, ctx.sessionId, ctx.messages().slice(), opts);
+		}
+	});
+	return out;
 }
-/** Resolve an {@link SttProvider} descriptor into a host-side opener. */
-function resolveStt(descriptor) {
-	switch (descriptor.kind) {
-		case ASSEMBLYAI_KIND: return openAssemblyAI(descriptor.options);
-		default: throw new Error(`Unknown STT provider kind: "${descriptor.kind}". Supported: ${ASSEMBLYAI_KIND}.`);
+//#endregion
+//#region host/transports/pipeline-transport.ts
+function toModelMessage(m) {
+	if (m.role === "user") return {
+		role: "user",
+		content: m.content
+	};
+	return {
+		role: "assistant",
+		content: m.content
+	};
+}
+/** Create a pipeline-mode Transport (STT → LLM → TTS). */
+function createPipelineTransport(opts) {
+	const log = opts.logger ?? consoleLogger;
+	const sttSampleRate = opts.sttSampleRate ?? 16e3;
+	const ttsSampleRate = opts.ttsSampleRate ?? 24e3;
+	const maxSteps = opts.maxSteps ?? 5;
+	const toolChoice = opts.toolChoice ?? "auto";
+	const toolSchemas = opts.toolSchemas ?? [];
+	const executeTool = opts.executeTool ?? (async () => {
+		throw new Error("No executeTool provided");
+	});
+	const { callbacks, sessionConfig } = opts;
+	const systemPrompt = sessionConfig.systemPrompt;
+	const sessionAbort = new AbortController();
+	let audioReady = false;
+	let terminated = false;
+	let sttSession = null;
+	let ttsSession = null;
+	let turnController = null;
+	let nextReplyId = 0;
+	const conversationMessages = sessionConfig.history ? [...sessionConfig.history] : [];
+	let turnPromise = null;
+	const sttSubs = [];
+	const ttsSubs = [];
+	function pushMessages(...msgs) {
+		conversationMessages.push(...msgs);
+		if (conversationMessages.length > 200) conversationMessages.splice(0, conversationMessages.length - 200);
+	}
+	function chainTurn(p) {
+		turnPromise = (turnPromise ?? Promise.resolve()).then(() => p);
+	}
+	function emitError(code, message) {
+		callbacks.onError(code, message);
+	}
+	/**
+	* Tear down after an unrecoverable provider error. Aborts the in-flight
+	* turn, cancels TTS, signals providers to close. Idempotent.
+	*/
+	function terminate() {
+		if (terminated) return;
+		terminated = true;
+		if (turnController !== null) {
+			turnController.abort();
+			turnController = null;
+		}
+		ttsSession?.cancel();
+		callbacks.onCancelled();
+		sessionAbort.abort();
+	}
+	function onSttPartial(_text) {
+		if (terminated) return;
+		if (turnController === null) return;
+		log.info("Pipeline barge-in", { sid: opts.sid });
+		turnController.abort();
+		turnController = null;
+		ttsSession?.cancel();
+		callbacks.onCancelled();
+	}
+	function onSttFinal(text) {
+		if (terminated) return;
+		const trimmed = text.trim();
+		if (trimmed.length === 0) return;
+		if (turnController !== null) {
+			log.info("Pipeline replacing in-flight turn", { sid: opts.sid });
+			turnController.abort();
+			turnController = null;
+			ttsSession?.cancel();
+			callbacks.onCancelled();
+		}
+		callbacks.onUserTranscript(text);
+		chainTurn(runTurn(trimmed).catch((err) => {
+			log.error("Pipeline turn crashed", {
+				error: errorMessage(err),
+				sid: opts.sid
+			});
+		}));
+	}
+	function onSttError(err) {
+		if (terminated) return;
+		log.error("STT error", {
+			code: err.code,
+			message: err.message,
+			sid: opts.sid
+		});
+		emitError("stt", err.message);
+		terminate();
+	}
+	function onTtsError(err) {
+		if (terminated) return;
+		log.error("TTS error", {
+			code: err.code,
+			message: err.message,
+			sid: opts.sid
+		});
+		emitError("tts", err.message);
+		terminate();
+	}
+	async function consumeLlmStream(ctl, messages, tools, onDelta) {
+		try {
+			const result = streamText({
+				model: opts.llm,
+				system: systemPrompt,
+				messages,
+				tools,
+				toolChoice,
+				stopWhen: stepCountIs(maxSteps),
+				abortSignal: ctl.signal
+			});
+			for await (const part of result.fullStream) {
+				if (ctl.signal.aborted) break;
+				handleStreamPart(part, ctl, onDelta);
+			}
+		} catch (err) {
+			if (!ctl.signal.aborted) {
+				const msg = errorMessage(err);
+				log.error("LLM streamText failed", {
+					error: msg,
+					sid: opts.sid
+				});
+				emitError("llm", msg);
+			}
+		}
+	}
+	function handleStreamPart(part, _ctl, onDelta) {
+		switch (part.type) {
+			case "text-delta": {
+				const delta = part.text ?? "";
+				if (delta.length === 0) return;
+				onDelta(delta);
+				ttsSession?.sendText(delta);
+				return;
+			}
+			case "tool-call": {
+				const input = part.input ?? {};
+				callbacks.onToolCall(part.toolCallId ?? "", part.toolName ?? "", input);
+				return;
+			}
+			case "error": {
+				const msg = errorMessage(part.error);
+				log.error("LLM stream error", {
+					message: msg,
+					sid: opts.sid
+				});
+				emitError("llm", msg);
+				return;
+			}
+			default: return;
+		}
+	}
+	/**
+	* Flush TTS and wait for drain. Resolves on:
+	*   - TTS emits `done`
+	*   - `signal` aborts (barge-in / provider error / session stop)
+	*   - PIPELINE_FLUSH_TIMEOUT_MS elapses
+	* Resolves immediately if no TTS session.
+	*/
+	function flushTtsAndWait(signal) {
+		const tts = ttsSession;
+		if (!tts) return Promise.resolve();
+		return new Promise((resolve) => {
+			let off = null;
+			let timer = null;
+			const cleanup = () => {
+				if (off) {
+					off();
+					off = null;
+				}
+				if (timer) {
+					clearTimeout(timer);
+					timer = null;
+				}
+				signal.removeEventListener("abort", onAbort);
+			};
+			const finish = () => {
+				cleanup();
+				resolve();
+			};
+			const onAbort = () => finish();
+			if (signal.aborted) {
+				resolve();
+				return;
+			}
+			signal.addEventListener("abort", onAbort, { once: true });
+			off = tts.on("done", finish);
+			timer = setTimeout(() => {
+				log.warn("TTS flush timeout", {
+					sid: opts.sid,
+					timeoutMs: PIPELINE_FLUSH_TIMEOUT_MS
+				});
+				finish();
+			}, PIPELINE_FLUSH_TIMEOUT_MS);
+			tts.flush();
+		});
+	}
+	async function runTurn(userText) {
+		const replyId = `pipeline-${++nextReplyId}`;
+		callbacks.onReplyStarted(replyId);
+		pushMessages({
+			role: "user",
+			content: userText
+		});
+		const ctl = new AbortController();
+		turnController = ctl;
+		const tools = toVercelTools(toolSchemas, {
+			executeTool,
+			sessionId: opts.sid,
+			messages: () => conversationMessages,
+			signal: ctl.signal
+		});
+		const messages = conversationMessages.map(toModelMessage);
+		let accumulated = "";
+		await consumeLlmStream(ctl, messages, tools, (delta) => {
+			accumulated += delta;
+		});
+		if (ctl.signal.aborted) {
+			if (turnController === ctl) turnController = null;
+			return;
+		}
+		if (accumulated.length > 0) {
+			callbacks.onAgentTranscript(accumulated, false);
+			pushMessages({
+				role: "assistant",
+				content: accumulated
+			});
+		}
+		await flushTtsAndWait(ctl.signal);
+		if (ctl.signal.aborted) {
+			if (turnController === ctl) turnController = null;
+			return;
+		}
+		callbacks.onReplyDone();
+		if (turnController === ctl) turnController = null;
+	}
+	async function runGreeting(text) {
+		const replyId = `pipeline-greeting-${++nextReplyId}`;
+		callbacks.onReplyStarted(replyId);
+		const ctl = new AbortController();
+		turnController = ctl;
+		callbacks.onAgentTranscript(text, false);
+		pushMessages({
+			role: "assistant",
+			content: text
+		});
+		ttsSession?.sendText(text);
+		await flushTtsAndWait(ctl.signal);
+		if (ctl.signal.aborted) {
+			if (turnController === ctl) turnController = null;
+			return;
+		}
+		callbacks.onReplyDone();
+		if (turnController === ctl) turnController = null;
+	}
+	function reportOpenRejection(which, reason) {
+		const msg = errorMessage(reason);
+		log.error(`${which === "stt" ? "STT" : "TTS"} open failed`, {
+			error: msg,
+			sid: opts.sid
+		});
+		emitError(which, msg);
+	}
+	async function adoptStt(session, teardown) {
+		if (teardown) {
+			await session.close().catch(() => void 0);
+			return;
+		}
+		sttSession = session;
+		sttSubs.push(session.on("partial", onSttPartial));
+		sttSubs.push(session.on("final", onSttFinal));
+		sttSubs.push(session.on("error", onSttError));
+	}
+	async function adoptTts(session, teardown) {
+		if (teardown) {
+			await session.close().catch(() => void 0);
+			return;
+		}
+		ttsSession = session;
+		ttsSubs.push(session.on("audio", (pcm) => {
+			callbacks.onAudioChunk(new Uint8Array(pcm.buffer, pcm.byteOffset, pcm.byteLength));
+		}));
+		ttsSubs.push(session.on("error", onTtsError));
+	}
+	async function openProviders() {
+		const [sttResult, ttsResult] = await Promise.allSettled([opts.stt.open({
+			sampleRate: sttSampleRate,
+			apiKey: opts.providerKeys.stt,
+			sttPrompt: opts.sttPrompt,
+			signal: sessionAbort.signal
+		}), opts.tts.open({
+			sampleRate: ttsSampleRate,
+			apiKey: opts.providerKeys.tts,
+			signal: sessionAbort.signal
+		})]);
+		if (sttResult.status === "rejected") reportOpenRejection("stt", sttResult.reason);
+		if (ttsResult.status === "rejected") reportOpenRejection("tts", ttsResult.reason);
+		const aborted = sessionAbort.signal.aborted;
+		const sttFailed = sttResult.status === "rejected";
+		const ttsFailed = ttsResult.status === "rejected";
+		const teardown = aborted || sttFailed || ttsFailed;
+		if (sttResult.status === "fulfilled") await adoptStt(sttResult.value, teardown);
+		if (ttsResult.status === "fulfilled") await adoptTts(ttsResult.value, teardown);
+		if (!aborted && (sttFailed || ttsFailed)) terminate();
 	}
-}
-/** Resolve a {@link TtsProvider} descriptor into a host-side opener. */
-function resolveTts(descriptor) {
-	switch (descriptor.kind) {
-		case CARTESIA_KIND: return openCartesia(descriptor.options);
-		default: throw new Error(`Unknown TTS provider kind: "${descriptor.kind}". Supported: ${CARTESIA_KIND}.`);
+	function onAudioReady() {
+		if (audioReady || terminated) return;
+		audioReady = true;
+		if (opts.skipGreeting) return;
+		const greeting = sessionConfig.greeting;
+		if (!greeting) return;
+		chainTurn(runGreeting(greeting).catch((err) => {
+			log.error("Pipeline greeting failed", {
+				error: errorMessage(err),
+				sid: opts.sid
+			});
+		}));
 	}
-}
-/**
-* Resolve an {@link LlmProvider} descriptor into a Vercel AI SDK
-* {@link LanguageModel}.
-*
-* The API key is pulled from the agent's env (e.g. `ANTHROPIC_API_KEY`).
-* Missing keys throw here — the pipeline session would fail on first
-* `streamText` call otherwise, and the error is clearer at construction.
-*/
-function resolveLlm(descriptor, env) {
-	switch (descriptor.kind) {
-		case ANTHROPIC_KIND: {
-			const options = descriptor.options;
-			const apiKey = resolveApiKey("ANTHROPIC_API_KEY", env);
-			if (!apiKey) throw new Error("Anthropic LLM: missing API key. Set ANTHROPIC_API_KEY in the agent env.");
-			return createAnthropic({
-				apiKey,
-				baseURL: "https://api.anthropic.com/v1"
-			})(options.model);
+	return {
+		async start() {
+			await openProviders();
+			callbacks.onSessionReady?.(opts.sid);
+			onAudioReady();
+		},
+		async stop() {
+			if (sessionAbort.signal.aborted) return;
+			sessionAbort.abort();
+			turnController?.abort();
+			for (const off of sttSubs) off();
+			for (const off of ttsSubs) off();
+			sttSubs.length = 0;
+			ttsSubs.length = 0;
+			if (turnPromise !== null) await turnPromise;
+			await sttSession?.close().catch(() => {});
+			await ttsSession?.close().catch(() => {});
+		},
+		sendUserAudio(bytes) {
+			if (terminated || !audioReady) return;
+			const offset = bytes.byteOffset;
+			const length = bytes.byteLength;
+			let pcm;
+			if (offset % 2 === 0 && length % 2 === 0) pcm = new Int16Array(bytes.buffer, offset, length / 2);
+			else {
+				const copy = new Uint8Array(length - length % 2);
+				copy.set(bytes.subarray(0, copy.byteLength));
+				pcm = new Int16Array(copy.buffer);
+			}
+			sttSession?.sendAudio(pcm);
+		},
+		sendToolResult(_callId, _result) {},
+		cancelReply() {
+			if (terminated) return;
+			turnController?.abort();
+			turnController = null;
+			ttsSession?.cancel();
 		}
-		default: throw new Error(`Unknown LLM provider kind: "${descriptor.kind}". Supported: ${ANTHROPIC_KIND}.`);
-	}
+	};
 }
 //#endregion
 //#region host/s2s.ts
@@ -1326,72 +1792,59 @@ function parseS2sMessage(obj) {
 	const result = S2sMessageSchema.safeParse(obj);
 	return result.success ? result.data : void 0;
 }
-function dispatchS2sMessage(emitter, msg, state, dispatchCtx) {
+function dispatchS2sMessage(callbacks, msg, state, ctx) {
 	switch (msg.type) {
 		case "session.ready":
-			emitter.emit("ready", { sessionId: msg.session_id });
+			callbacks.onSessionReady(msg.session_id);
 			break;
 		case "session.updated": break;
 		case "input.speech.started":
 			if (!state.speechActive) {
 				state.speechActive = true;
-				emitter.emit("event", { type: "speech_started" });
+				callbacks.onSpeechStarted();
 			}
 			break;
 		case "input.speech.stopped":
 			if (state.speechActive) {
 				state.speechActive = false;
-				emitter.emit("event", { type: "speech_stopped" });
+				callbacks.onSpeechStopped();
 			}
 			break;
 		case "transcript.user":
-			emitter.emit("event", {
-				type: "user_transcript",
-				text: msg.text
-			});
+			callbacks.onUserTranscript(msg.text);
 			break;
 		case "reply.started":
-			emitter.emit("replyStarted", { replyId: msg.reply_id });
+			callbacks.onReplyStarted(msg.reply_id);
 			break;
 		case "transcript.agent":
-			emitter.emit("event", {
-				type: "agent_transcript",
-				text: msg.text,
-				_interrupted: msg.interrupted
-			});
+			callbacks.onAgentTranscript(msg.text, msg.interrupted);
 			break;
 		case "tool.call":
-			emitter.emit("event", {
-				type: "tool_call",
-				toolCallId: msg.call_id,
-				toolName: msg.name,
-				args: msg.args
-			});
+			callbacks.onToolCall(msg.call_id, msg.name, msg.args);
 			break;
 		case "reply.done":
-			dispatchCtx.log.info("S2S << reply.done", {
-				...dispatchCtx.sid !== void 0 ? { sid: dispatchCtx.sid } : {},
+			ctx.log.info("S2S << reply.done", {
+				...ctx.sid !== void 0 ? { sid: ctx.sid } : {},
 				status: msg.status ?? "completed"
 			});
-			if (msg.status === "interrupted") emitter.emit("event", { type: "cancelled" });
-			else emitter.emit("event", { type: "reply_done" });
+			if (msg.status === "interrupted") callbacks.onCancelled();
+			else callbacks.onReplyDone();
 			break;
 		case "session.error":
-			if (msg.code === "session_not_found" || msg.code === "session_forbidden") emitter.emit("sessionExpired");
-			else emitter.emit("error", new Error(msg.message));
+			if (msg.code === "session_not_found" || msg.code === "session_forbidden") callbacks.onSessionExpired();
+			else callbacks.onError(new Error(msg.message));
 			break;
 		case "error":
-			emitter.emit("error", new Error(msg.message));
+			callbacks.onError(new Error(msg.message));
 			break;
 		default: break;
 	}
 }
 function connectS2s(opts) {
-	const { apiKey, config, createWebSocket, logger: log = consoleLogger, sid } = opts;
+	const { apiKey, config, createWebSocket, callbacks, logger: log = consoleLogger, sid } = opts;
 	return new Promise((resolve, reject) => {
 		log.info("S2S connecting", { url: config.wssUrl });
 		const ws = createWebSocket(config.wssUrl, { headers: { Authorization: `Bearer ${apiKey}` } });
-		const emitter = createNanoEvents();
 		const dispatchState = { speechActive: false };
 		const dispatchCtx = sid !== void 0 ? {
 			log,
@@ -1409,7 +1862,6 @@ function connectS2s(opts) {
 			ws.send(json);
 		}
 		const handle = {
-			on: emitter.on.bind(emitter),
 			sendAudio(audio) {
 				if (ws.readyState !== 1) {
 					log.debug("S2S sendAudio dropped: socket not open");
@@ -1422,16 +1874,15 @@ function connectS2s(opts) {
 				ws.send(jsonFrame);
 			},
 			sendToolResult(callId, result) {
-				const msg = {
-					type: "tool.result",
-					call_id: callId,
-					result
-				};
 				log.info("S2S >> tool.result", {
 					call_id: callId,
 					resultLength: result.length
 				});
-				send(msg);
+				send({
+					type: "tool.result",
+					call_id: callId,
+					result
+				});
 			},
 			updateSession(sessionConfig) {
 				const { systemPrompt, ...rest } = sessionConfig;
@@ -1468,8 +1919,7 @@ function connectS2s(opts) {
 		}
 		function handleAudioFastPath(obj) {
 			if (obj.type === "reply.audio" && typeof obj.data === "string") {
-				const audioBytes = base64ToUint8(obj.data);
-				emitter.emit("audio", { audio: audioBytes });
+				callbacks.onAudio(base64ToUint8(obj.data));
 				return true;
 			}
 			return false;
@@ -1479,7 +1929,7 @@ function connectS2s(opts) {
 			if (obj.type === "reply.done") return;
 			log.info(`S2S << ${obj.type}`);
 		}
-		function handleS2sMessage(ev) {
+		ws.addEventListener("message", (ev) => {
 			const raw = tryParseJson(ev.data);
 			if (raw === void 0) return;
 			if (typeof raw !== "object" || raw === null || Array.isArray(raw)) {
@@ -1494,9 +1944,8 @@ function connectS2s(opts) {
 				log.warn(`S2S << unrecognised message type: ${obj.type ?? JSON.stringify(raw).slice(0, 200)}`);
 				return;
 			}
-			dispatchS2sMessage(emitter, parsed, dispatchState, dispatchCtx);
-		}
-		ws.addEventListener("message", handleS2sMessage);
+			dispatchS2sMessage(callbacks, parsed, dispatchState, dispatchCtx);
+		});
 		ws.addEventListener("close", (ev) => {
 			const code = ev.code ?? 0;
 			const reason = ev.reason ?? "";
@@ -1505,394 +1954,102 @@ function connectS2s(opts) {
 				reason
 			});
 			if (!opened) reject(/* @__PURE__ */ new Error(`WebSocket closed before open (code: ${code})`));
-			emitter.emit("close", code, reason);
+			callbacks.onClose(code, reason);
 		});
 		ws.addEventListener("error", (ev) => {
 			const message = typeof ev.message === "string" ? ev.message : "WebSocket error";
 			const errObj = new Error(message);
 			log.error("S2S WebSocket error", { error: errObj.message });
 			if (!opened) reject(errObj);
-			else emitter.emit("error", errObj);
+			else callbacks.onError(errObj);
 		});
 	});
 }
 //#endregion
-//#region host/session.ts
-/** @internal Not part of the public API. Exposed for testing only. */
+//#region host/transports/s2s-transport.ts
+/** @internal Exposed for testing — allows spying on connectS2s in unit tests. */
 const _internals = { connectS2s };
-/**
-* Create an idle timer that closes the S2S connection after inactivity.
-* Convention: `timeoutMs <= 0` disables the timer entirely (returns a no-op).
-* This allows agents to opt out of idle timeout via `idleTimeoutMs: 0` in their config.
-*/
-function createIdleTimer(opts) {
-	if (opts.timeoutMs <= 0) return {
-		reset() {},
-		clear() {}
-	};
-	let timer = null;
-	return {
-		reset() {
-			if (timer !== null) clearTimeout(timer);
-			timer = setTimeout(() => {
-				opts.log.info("S2S idle timeout", {
-					timeoutMs: opts.timeoutMs,
-					agent: opts.agent
-				});
-				opts.client.event({ type: "idle_timeout" });
-				opts.ctx.s2s?.close();
-			}, opts.timeoutMs);
-		},
-		clear() {
-			if (timer !== null) {
-				clearTimeout(timer);
-				timer = null;
+function createS2sTransport(opts) {
+	const log = opts.logger ?? consoleLogger;
+	const createWs = opts.createWebSocket ?? defaultCreateS2sWebSocket;
+	let handle = null;
+	let currentReplyId = null;
+	async function start() {
+		handle = await _internals.connectS2s({
+			apiKey: opts.apiKey,
+			config: opts.s2sConfig,
+			createWebSocket: createWs,
+			logger: log,
+			sid: opts.sid,
+			callbacks: {
+				onSessionReady: (providerSessionId) => opts.callbacks.onSessionReady?.(providerSessionId),
+				onReplyStarted: (replyId) => {
+					currentReplyId = replyId;
+					opts.callbacks.onReplyStarted(replyId);
+				},
+				onReplyDone: () => {
+					currentReplyId = null;
+					opts.callbacks.onReplyDone();
+				},
+				onCancelled: () => {
+					currentReplyId = null;
+					opts.callbacks.onCancelled();
+				},
+				onAudio: (bytes) => opts.callbacks.onAudioChunk(bytes),
+				onUserTranscript: opts.callbacks.onUserTranscript,
+				onAgentTranscript: opts.callbacks.onAgentTranscript,
+				onToolCall: opts.callbacks.onToolCall,
+				onSpeechStarted: opts.callbacks.onSpeechStarted,
+				onSpeechStopped: opts.callbacks.onSpeechStopped,
+				onSessionExpired: () => {
+					log.info("S2S session expired", { sid: opts.sid });
+					handle?.close();
+				},
+				onError: (err) => opts.callbacks.onError("internal", err.message),
+				onClose: (code, reason) => {
+					if (currentReplyId !== null) {
+						log.warn("S2S closed with active reply", {
+							sid: opts.sid,
+							agent: opts.agent,
+							activeReplyId: currentReplyId,
+							code,
+							reason
+						});
+						opts.callbacks.onError("connection", `S2S closed mid-reply (code=${code})`);
+					} else log.info("S2S closed", {
+						code,
+						reason
+					});
+				}
 			}
-		}
-	};
-}
-/**
-* Complete a tool call by truncating the result, emitting a `tool_call_done` event,
-* and accumulating the result in `ctx.reply.pendingTools` — but only if the reply that
-* initiated this call is still active.
-*/
-function finishToolCall(ctx, callId, result, replyId) {
-	const truncatedResult = result.length > 4e3 ? result.slice(0, MAX_TOOL_RESULT_CHARS) : result;
-	ctx.client.event({
-		type: "tool_call_done",
-		toolCallId: callId,
-		result: truncatedResult
-	});
-	if (replyId !== null && replyId === ctx.reply.currentReplyId) {
-		ctx.reply.pendingTools.push({
-			callId,
-			result
-		});
-		if (ctx.maxHistory > 0 && ctx.reply.pendingTools.length > ctx.maxHistory) ctx.reply.pendingTools.shift();
-	}
-}
-async function handleToolCall(ctx, event) {
-	const { toolCallId: callId, toolName: name, args: parsedArgs } = event;
-	const replyId = ctx.reply.currentReplyId;
-	ctx.client.event(event);
-	const refused = ctx.consumeToolCallStep(name, replyId);
-	if (refused !== null) {
-		finishToolCall(ctx, callId, refused, replyId);
-		return;
-	}
-	ctx.log.info("S2S tool call", {
-		tool: name,
-		callId,
-		args: parsedArgs,
-		agent: ctx.agent
-	});
-	let result;
-	try {
-		result = await ctx.executeTool(name, parsedArgs, ctx.id, ctx.conversationMessages);
-	} catch (err) {
-		const msg = errorMessage(err);
-		ctx.log.error("Tool execution failed", {
-			tool: name,
-			error: errorDetail(err)
 		});
-		result = toolError(msg);
-	}
-	ctx.log.info("S2S tool result", {
-		tool: name,
-		callId,
-		resultLength: result.length
-	});
-	finishToolCall(ctx, callId, result, replyId);
-}
-function handleUserTranscript(ctx, text) {
-	ctx.log.info("S2S user transcript", { text });
-	ctx.client.event({
-		type: "user_transcript",
-		text
-	});
-	ctx.pushMessages({
-		role: "user",
-		content: text
-	});
-}
-function handleAgentTranscript(ctx, text, interrupted) {
-	ctx.client.event({
-		type: "agent_transcript",
-		text
-	});
-	if (!interrupted) ctx.pushMessages({
-		role: "assistant",
-		content: text
-	});
-}
-function handleReplyCancelled(ctx) {
-	ctx.log.info("S2S reply interrupted (barge-in)");
-	ctx.cancelReply();
-	ctx.client.event({ type: "cancelled" });
-}
-/**
-* Warn when the entry-to-emit time for a reply_done dispatch exceeds this.
-* Tool-less sessions should be sub-millisecond; sessions with pending tools
-* will legitimately spend time awaiting ctx.turnPromise. We log both (with
-* `hadTurnPromise`) so event-loop starvation is distinguishable from
-* genuine tool-call latency.
-*/
-const REPLY_DONE_SLOW_THRESHOLD_MS = 50;
-function handleReplyDone(ctx) {
-	const startMs = Date.now();
-	const doneReplyId = ctx.reply.currentReplyId;
-	if (doneReplyId === null) {
-		ctx.log.debug("Dropping duplicate reply.done (no active reply)");
-		return;
+		handle.updateSession(opts.sessionConfig);
 	}
-	const hadTurnPromise = ctx.turnPromise !== null;
-	const sendPending = () => {
-		if (ctx.reply.currentReplyId !== doneReplyId) {
-			ctx.reply.pendingTools = [];
-			return;
-		}
-		if (ctx.reply.pendingTools.length > 0) {
-			for (const tool of ctx.reply.pendingTools) ctx.s2s?.sendToolResult(tool.callId, tool.result);
-			ctx.reply.pendingTools = [];
-		} else {
-			const stepsUsed = ctx.reply.toolCallCount;
-			if (stepsUsed > 0) ctx.log.info("Turn complete", {
-				steps: stepsUsed,
-				agent: ctx.agent
-			});
-			ctx.client.playAudioDone();
-			ctx.client.event({ type: "reply_done" });
-			ctx.reply.currentReplyId = null;
-			const durationMs = Date.now() - startMs;
-			if (durationMs >= REPLY_DONE_SLOW_THRESHOLD_MS) ctx.log.warn("slow reply_done dispatch", {
-				sid: ctx.id,
-				agent: ctx.agent,
-				durationMs,
-				hadTurnPromise
-			});
-		}
-	};
-	if (hadTurnPromise) ctx.turnPromise?.then(sendPending);
-	else sendPending();
-}
-function setupListeners(ctx, handle) {
-	handle.on("ready", ({ sessionId }) => ctx.log.info("S2S session ready", { sessionId }));
-	handle.on("replyStarted", ({ replyId }) => {
-		ctx.beginReply(replyId);
-	});
-	handle.on("sessionExpired", () => {
-		ctx.log.info("S2S session expired");
-		handle.close();
-	});
-	handle.on("audio", ({ audio }) => ctx.client.playAudioChunk(audio));
-	handle.on("error", (err) => {
-		ctx.log.error("S2S error", { message: err.message });
-		ctx.client.event({
-			type: "error",
-			code: "internal",
-			message: err.message
-		});
-		handle.close();
-	});
-	handle.on("close", (code, reason) => {
-		const activeReplyId = ctx.reply.currentReplyId;
-		if (activeReplyId !== null) ctx.log.warn("S2S closed with active reply", {
-			sid: ctx.id,
-			agent: ctx.agent,
-			activeReplyId,
-			code,
-			reason
-		});
-		else ctx.log.info("S2S closed", {
-			code,
-			reason
-		});
-		ctx.s2s = null;
-		ctx.cancelReply();
-	});
-	handle.on("event", (event) => {
-		switch (event.type) {
-			case "user_transcript":
-				handleUserTranscript(ctx, event.text);
-				break;
-			case "agent_transcript":
-				handleAgentTranscript(ctx, event.text, event._interrupted ?? false);
-				break;
-			case "tool_call": {
-				const p = handleToolCall(ctx, event).catch((err) => {
-					ctx.log.error("Tool call handler failed", { err: errorMessage(err) });
-				});
-				ctx.chainTurn(p);
-				break;
-			}
-			case "reply_done":
-				handleReplyDone(ctx);
-				break;
-			case "cancelled":
-				handleReplyCancelled(ctx);
-				break;
-			default: ctx.client.event(event);
-		}
-	});
-}
-function createS2sSession(opts) {
-	const { id, agent, client, toolSchemas, apiKey, s2sConfig, executeTool, createWebSocket = defaultCreateS2sWebSocket, logger: log = consoleLogger } = opts;
-	const agentConfig = opts.skipGreeting ? {
-		...opts.agentConfig,
-		greeting: ""
-	} : opts.agentConfig;
-	const systemPrompt = buildSystemPrompt(agentConfig, {
-		hasTools: toolSchemas.length > 0 || (agentConfig.builtinTools?.length ?? 0) > 0,
-		voice: true,
-		toolGuidance: opts.toolGuidance
-	});
-	const s2sTools = toolSchemas.map((ts) => ({
-		type: "function",
-		name: ts.name,
-		description: ts.description,
-		parameters: ts.parameters
-	}));
-	const sessionAbort = new AbortController();
-	const ctx = buildCtx({
-		id,
-		agent,
-		client,
-		agentConfig,
-		executeTool,
-		log,
-		maxHistory: opts.maxHistory
-	});
-	const rawTimeout = agentConfig.idleTimeoutMs ?? 3e5;
-	const idle = createIdleTimer({
-		timeoutMs: rawTimeout === 0 || !Number.isFinite(rawTimeout) ? 0 : rawTimeout,
-		agent,
-		log,
-		client,
-		ctx
-	});
-	let connectGeneration = 0;
-	const sessionUpdatePayload = {
-		systemPrompt,
-		tools: s2sTools,
-		...agentConfig.greeting ? { greeting: agentConfig.greeting } : {}
-	};
-	async function connectAndSetup() {
-		const generation = ++connectGeneration;
-		try {
-			const handle = await _internals.connectS2s({
-				apiKey,
-				config: s2sConfig,
-				createWebSocket,
-				logger: log,
-				sid: id
-			});
-			if (sessionAbort.signal.aborted || generation !== connectGeneration) {
-				handle.close();
-				return;
-			}
-			setupListeners(ctx, handle);
-			handle.updateSession(sessionUpdatePayload);
-			ctx.s2s = handle;
-			idle.reset();
-		} catch (err) {
-			const msg = errorMessage(err);
-			log.error("S2S connect failed", { error: errorDetail(err) });
-			client.event({
-				type: "error",
-				code: "internal",
-				message: msg
-			});
-		}
+	async function stop() {
+		handle?.close();
+		handle = null;
 	}
 	return {
-		async start() {
-			await connectAndSetup();
-		},
-		async stop() {
-			if (sessionAbort.signal.aborted) return;
-			sessionAbort.abort();
-			idle.clear();
-			if (ctx.turnPromise !== null) await ctx.turnPromise;
-			ctx.s2s?.close();
-		},
-		onAudio(data) {
-			idle.reset();
-			ctx.s2s?.sendAudio(data);
-		},
-		onAudioReady() {},
-		onCancel() {
-			client.event({ type: "cancelled" });
-		},
-		onReset() {
-			ctx.cancelReply();
-			ctx.conversationMessages = [];
-			ctx.reply.toolCallCount = 0;
-			ctx.turnPromise = null;
-			idle.clear();
-			ctx.s2s?.close();
-			client.event({ type: "reset" });
-			connectAndSetup().catch((err) => log.error("S2S reset reconnect failed", { error: errorMessage(err) }));
+		start,
+		stop,
+		sendUserAudio(bytes) {
+			handle?.sendAudio(bytes);
 		},
-		onHistory(incoming) {
-			ctx.pushMessages(...incoming.map((m) => ({
-				role: m.role,
-				content: m.content
-			})));
+		sendToolResult(callId, result) {
+			handle?.sendToolResult(callId, result);
 		},
-		waitForTurn() {
-			return ctx.turnPromise ?? Promise.resolve();
-		}
-	};
-}
-//#endregion
-//#region host/tool-executor.ts
-/**
-* Tool execution — validates arguments and invokes tool handlers.
-*
-* {@link executeToolCall} is the single entry point used by both the
-* direct (self-hosted) runtime and the platform sandbox sidecar.
-*/
-const yieldTick = () => new Promise((r) => setTimeout(r, 0));
-function buildToolContext(opts) {
-	const { env, state, kv, messages, sessionId } = opts;
-	return {
-		env,
-		state: state ?? {},
-		get kv() {
-			if (!kv) throw new Error("KV not available");
-			return kv;
+		cancelReply() {
+			currentReplyId = null;
 		},
-		messages: messages ?? [],
-		sessionId: sessionId ?? "",
-		send(event, data) {
-			opts.send?.(event, data);
+		updateSession(config) {
+			handle?.updateSession({
+				systemPrompt: config.systemPrompt,
+				tools: config.tools ?? [],
+				...config.greeting !== void 0 ? { greeting: config.greeting } : {}
+			});
 		}
 	};
 }
-async function executeToolCall(name, args, options) {
-	const { tool } = options;
-	const parsed = (tool.parameters ?? EMPTY_PARAMS).safeParse(args);
-	if (!parsed.success) return toolError(`Invalid arguments for tool "${name}": ${(parsed.error?.issues ?? []).map((i) => `${i.path.map(String).join(".")}: ${i.message}`).join(", ")}`);
-	try {
-		const ctx = buildToolContext(options);
-		await yieldTick();
-		const result = await pTimeout(Promise.resolve(tool.execute(parsed.data, ctx)), {
-			milliseconds: TOOL_EXECUTION_TIMEOUT_MS,
-			message: `Tool "${name}" timed out after ${TOOL_EXECUTION_TIMEOUT_MS}ms`
-		});
-		await yieldTick();
-		if (result == null) return "null";
-		return typeof result === "string" ? result : JSON.stringify(result);
-	} catch (err) {
-		const log = options.logger;
-		if (log) log.warn("Tool execution failed", {
-			tool: name,
-			error: errorDetail(err)
-		});
-		else console.warn(`[tool-executor] Tool execution failed: ${name}`, err);
-		return toolError(errorMessage(err));
-	}
-}
 //#endregion
 //#region host/unstorage-kv.ts
 /**
@@ -1944,20 +2101,20 @@ function createUnstorageKv(options) {
 *
 * Audio validation is handled at the host transport layer (see server.ts).
 */
+const AUDIO_DONE_FRAME = JSON.stringify({ type: "audio_done" });
 /**
 * Creates a {@link ClientSink} backed by a plain WebSocket.
 *
-* Text events are sent as JSON text frames; audio chunks are sent as
-* binary frames (zero-copy).
+* Session events are sent as JSON text frames; audio chunks are sent as raw
+* PCM16 binary frames.
 */
 function createClientSink(ws, log) {
-	/** Send data over ws, silently dropping if the socket is not open. */
 	function safeSend(data) {
 		try {
 			if (ws.readyState !== 1) return;
 			ws.send(data);
 		} catch (err) {
-			log.debug?.("safeSend: socket closed between readyState check and send", { error: errorMessage(err) });
+			log.debug?.("safeSend: socket closed between readyState check and send", { error: err instanceof Error ? err.message : String(err) });
 		}
 	}
 	return {
@@ -1971,7 +2128,7 @@ function createClientSink(ws, log) {
 			safeSend(chunk);
 		},
 		playAudioDone() {
-			safeSend(JSON.stringify({ type: "audio_done" }));
+			safeSend(AUDIO_DONE_FRAME);
 		}
 	};
 }
@@ -1980,35 +2137,32 @@ function handleBinaryAudio(data, session) {
 		session.onAudio(data);
 		return true;
 	}
-	if (data instanceof ArrayBuffer) {
-		session.onAudio(new Uint8Array(data));
-		return true;
-	}
 	return false;
 }
-function handleTextMessage(data, session, log, ctx, sid) {
-	if (typeof data !== "string") return;
-	let json;
+function handleTextMessage(data, session, log, sid) {
+	if (typeof data !== "string") {
+		log.warn("ws: non-string, non-binary frame received; dropping", { sid });
+		return;
+	}
+	let parsed;
 	try {
-		json = JSON.parse(data);
+		parsed = JSON.parse(data);
 	} catch {
-		log.warn("Invalid JSON from client", {
-			...ctx,
-			sid
+		log.warn("ws: invalid JSON; dropping", {
+			sid,
+			data: data.slice(0, 200)
 		});
 		return;
 	}
-	const parsed = lenientParse(ClientMessageSchema, json);
-	if (!parsed.ok) {
-		if (parsed.malformed) log.warn("Invalid client message", {
-			...ctx,
+	const result = lenientParse(ClientMessageSchema, parsed);
+	if (!result.ok) {
+		if (result.malformed) log.warn("ws: malformed client message", {
 			sid,
-			error: parsed.error
+			error: result.error
 		});
 		return;
 	}
-	const msg = parsed.data;
-	switch (msg.type) {
+	switch (result.data.type) {
 		case "audio_ready":
 			session.onAudioReady();
 			break;
@@ -2019,19 +2173,19 @@ function handleTextMessage(data, session, log, ctx, sid) {
 			session.onReset();
 			break;
 		case "history":
-			session.onHistory(msg.messages);
+			session.onHistory(result.data.messages);
 			break;
 		default: break;
 	}
 }
 /**
-* Attaches session lifecycle handlers to a native WebSocket using
-* plain JSON text frames and binary audio frames.
+* Attaches session lifecycle handlers to a native WebSocket using JSON text
+* frames for control messages and raw PCM16 binary frames for audio.
 *
 * Connection flow:
-* 1. WebSocket opens → server sends `{ type: "config", ...ReadyConfig }`
-* 2. Client sets up audio → sends `{ type: "audio_ready" }`
-* 3. If reconnecting → client sends `{ type: "history", messages: [...] }`
+* 1. WebSocket opens → server sends JSON CONFIG frame with sampleRate, ttsSampleRate, sessionId
+* 2. Client sets up audio → sends JSON AUDIO_READY frame
+* 3. If reconnecting → client sends JSON HISTORY frame with prior messages
 */
 function wireSessionSocket(ws, opts) {
 	const { sessions, logger: log = consoleLogger } = opts;
@@ -2041,7 +2195,7 @@ function wireSessionSocket(ws, opts) {
 	let session = null;
 	/** Set to true once session.start() resolves. Messages arriving before
 	*  this flag is set are buffered and replayed once the session is ready,
-	*  preventing audio/text from being dispatched to a half-initialized session. */
+	*  preventing audio/frames from being dispatched to a half-initialized session. */
 	let sessionReady = false;
 	let messageBuffer = [];
 	function drainBuffer() {
@@ -2049,9 +2203,8 @@ function wireSessionSocket(ws, opts) {
 		const buf = messageBuffer;
 		messageBuffer = null;
 		for (const event of buf) {
-			const { data } = event;
-			if (handleBinaryAudio(data, session)) continue;
-			handleTextMessage(data, session, log, ctx, sid);
+			if (handleBinaryAudio(event.data, session)) continue;
+			handleTextMessage(event.data, session, log, sid);
 		}
 	}
 	function onOpen() {
@@ -2066,7 +2219,9 @@ function wireSessionSocket(ws, opts) {
 		opts.onSinkCreated?.(sessionId, client);
 		ws.send(JSON.stringify({
 			type: "config",
-			...opts.readyConfig,
+			audioFormat: opts.readyConfig.audioFormat,
+			sampleRate: opts.readyConfig.sampleRate,
+			ttsSampleRate: opts.readyConfig.ttsSampleRate,
 			sessionId
 		}));
 		const timeoutMs = opts.sessionStartTimeoutMs ?? 1e4;
@@ -2099,9 +2254,8 @@ function wireSessionSocket(ws, opts) {
 			if (messageBuffer && messageBuffer.length < 100) messageBuffer.push(event);
 			return;
 		}
-		const { data } = event;
-		if (handleBinaryAudio(data, session)) return;
-		handleTextMessage(data, session, log, ctx, sid);
+		if (handleBinaryAudio(event.data, session)) return;
+		handleTextMessage(event.data, session, log, sid);
 	});
 	ws.addEventListener("close", () => {
 		log.info("Session disconnected", {
@@ -2132,6 +2286,30 @@ function wireSessionSocket(ws, opts) {
 //#endregion
 //#region host/runtime.ts
 /**
+* Resolve the API key env-var for the configured STT provider.
+*
+* Each STT provider uses its own env var (e.g. `ASSEMBLYAI_API_KEY`,
+* `DEEPGRAM_API_KEY`). We read the kind from the descriptor if it is one;
+* pre-resolved openers have no kind field so we fall back to AssemblyAI for
+* backward compatibility (openers supply their own key at open-time anyway).
+*/
+function resolveSttApiKey(stt, env) {
+	if ((stt != null && "kind" in stt && typeof stt.kind === "string" ? stt.kind : void 0) === "deepgram") return resolveApiKey("DEEPGRAM_API_KEY", env);
+	return resolveApiKey("ASSEMBLYAI_API_KEY", env);
+}
+/**
+* Resolve the API key env-var for the configured TTS provider.
+*
+* Each TTS provider uses its own env var (e.g. `CARTESIA_API_KEY`,
+* `RIME_API_KEY`). We read the kind from the descriptor if it is one;
+* pre-resolved openers have no kind field so we fall back to Cartesia for
+* backward compatibility (openers supply their own key at open-time anyway).
+*/
+function resolveTtsApiKey(tts, env) {
+	if ((tts != null && "kind" in tts && typeof tts.kind === "string" ? tts.kind : void 0) === "rime") return resolveApiKey("RIME_API_KEY", env);
+	return resolveApiKey("CARTESIA_API_KEY", env);
+}
+/**
 * Distinguish a descriptor (`{ kind, options }`) from an already-resolved
 * opener / `LanguageModel`. The production path always passes descriptors;
 * openers are a test escape hatch (fakes in `_pipeline-test-fakes.ts`).
@@ -2236,40 +2414,86 @@ function createRuntime(opts) {
 	} : null;
 	function createSession(sessionOpts) {
 		sinkMap.set(sessionOpts.id, sessionOpts.client);
-		if (pipelineProviders) return createPipelineSession({
-			id: sessionOpts.id,
+		const isPipeline = Boolean(pipelineProviders);
+		const systemPrompt = buildSystemPrompt(agentConfig, {
+			hasTools: toolSchemas.length > 0 || (agentConfig.builtinTools?.length ?? 0) > 0,
+			voice: true,
+			toolGuidance
+		});
+		let core = null;
+		function bindCore() {
+			if (!core) throw new Error("SessionCore not yet created");
+			return core;
+		}
+		const callbacks = {
+			onReplyStarted: (replyId) => bindCore().onReplyStarted(replyId),
+			onReplyDone: () => bindCore().onReplyDone(),
+			onCancelled: () => bindCore().onCancelled(),
+			onAudioChunk: (bytes) => bindCore().onAudioChunk(bytes),
+			onAudioDone: () => bindCore().onAudioDone(),
+			onUserTranscript: (text) => bindCore().onUserTranscript(text),
+			onAgentTranscript: (text, interrupted) => bindCore().onAgentTranscript(text, interrupted),
+			onToolCall: isPipeline ? (id, name, args) => sessionOpts.client.event({
+				type: "tool_call",
+				toolCallId: id,
+				toolName: name,
+				args
+			}) : (id, name, args) => bindCore().onToolCall(id, name, args),
+			onError: (code, message) => bindCore().onError(code, message),
+			onSpeechStarted: () => bindCore().onSpeechStarted(),
+			onSpeechStopped: () => bindCore().onSpeechStopped()
+		};
+		let transport;
+		if (pipelineProviders) transport = createPipelineTransport({
+			sid: sessionOpts.id,
 			agent: sessionOpts.agent,
-			client: sessionOpts.client,
-			agentConfig,
-			toolSchemas,
-			toolGuidance,
-			executeTool,
 			stt: pipelineProviders.stt,
 			llm: pipelineProviders.llm,
 			tts: pipelineProviders.tts,
-			sttApiKey: resolveApiKey("ASSEMBLYAI_API_KEY", env),
-			ttsApiKey: resolveApiKey("CARTESIA_API_KEY", env),
+			callbacks,
+			sessionConfig: {
+				systemPrompt,
+				greeting: agentConfig.greeting,
+				tools: toolSchemas
+			},
+			toolSchemas,
+			executeTool,
+			providerKeys: {
+				stt: resolveSttApiKey(opts.stt, env),
+				tts: resolveTtsApiKey(opts.tts, env)
+			},
 			sttSampleRate: s2sConfig.inputSampleRate,
 			ttsSampleRate: s2sConfig.outputSampleRate,
+			maxSteps: agentConfig.maxSteps,
+			toolChoice: agentConfig.toolChoice,
 			skipGreeting: sessionOpts.skipGreeting ?? false,
 			logger
 		});
-		const apiKey = env.ASSEMBLYAI_API_KEY ?? "";
-		return createS2sSession({
+		else transport = createS2sTransport({
+			apiKey: env.ASSEMBLYAI_API_KEY ?? "",
+			s2sConfig,
+			sessionConfig: {
+				systemPrompt,
+				tools: toolSchemas,
+				...agentConfig.greeting !== void 0 ? { greeting: agentConfig.greeting } : {}
+			},
+			toolSchemas,
+			callbacks,
+			sid: sessionOpts.id,
+			agent: sessionOpts.agent,
+			...createWebSocket ? { createWebSocket } : {},
+			logger
+		});
+		core = createSessionCore({
 			id: sessionOpts.id,
 			agent: sessionOpts.agent,
 			client: sessionOpts.client,
 			agentConfig,
-			toolSchemas,
-			toolGuidance,
-			apiKey,
-			s2sConfig,
 			executeTool,
-			...createWebSocket ? { createWebSocket } : {},
-			skipGreeting: sessionOpts.skipGreeting ?? false,
-			logger,
-			...sessionOpts.resumeFrom ? { resumeFrom: sessionOpts.resumeFrom } : {}
+			transport,
+			logger
 		});
+		return core;
 	}
 	function startSession(ws, startOpts) {
 		const resumeFrom = startOpts?.resumeFrom;
@@ -2454,4 +2678,4 @@ function createServer(options) {
 	};
 }
 //#endregion
-export { DEFAULT_S2S_CONFIG, _buildBaseCtx, _internals, buildCtx, consoleLogger, createRuntime, createS2sSession, createServer, createUnstorageKv, executeInIsolate, executeToolCall, jsonLogger, resolveAllBuiltins, wireSessionSocket };
+export { DEFAULT_S2S_CONFIG, _internals, consoleLogger, createPipelineTransport, createRuntime, createS2sTransport, createServer, createSessionCore, createUnstorageKv, executeInIsolate, executeToolCall, jsonLogger, resolveAllBuiltins, wireSessionSocket };