npm - @alexkroman1/aai - Versions diffs - 1.8.0 → 1.8.2 - Mend

@alexkroman1/aai 1.8.0 → 1.8.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

package/.turbo/turbo-build.log +5 -5
package/CHANGELOG.md +19 -0
package/dist/{_internal-types-CfOAbK6V.js → _internal-types-8v1qAa4A.js} +14 -2
package/dist/host/runtime-barrel.js +106 -30
package/dist/host/transports/openai-realtime-transport.d.ts +2 -0
package/dist/host/transports/types.d.ts +7 -0
package/dist/sdk/_internal-types.d.ts +9 -0
package/dist/sdk/manifest-barrel.js +1 -1
package/host/builtin-tools.ts +2 -2
package/host/runtime.ts +10 -0
package/host/s2s.ts +41 -22
package/host/transports/openai-realtime-transport.test.ts +98 -30
package/host/transports/openai-realtime-transport.ts +60 -8
package/host/transports/pipeline-transport.ts +23 -0
package/host/transports/types.ts +7 -0
package/package.json +1 -1
package/sdk/_internal-types.ts +14 -1

package/.turbo/turbo-build.log CHANGED Viewed

@@ -1,5 +1,5 @@
-> @alexkroman1/aai@1.8.0 build /home/runner/work/agent/agent/packages/aai
+> @alexkroman1/aai@1.8.2 build /home/runner/work/agent/agent/packages/aai
 > tsdown && tsc -p tsconfig.build.json
 [34mℹ[39m [34mtsdown v0.21.7[39m powered by [38;2;255;126;23mrolldown v1.0.0-rc.12[39m
@@ -8,7 +8,7 @@
 [34mℹ[39m target: [34mnode22[39m
 [34mℹ[39m tsconfig: [34mtsconfig.json[39m
 [34mℹ[39m Build start
-[34mℹ[39m [2mdist/[22m[1mhost/runtime-barrel.js[22m          [2m108.22 kB[22m [2m│ gzip: 29.84 kB[22m
+[34mℹ[39m [2mdist/[22m[1mhost/runtime-barrel.js[22m          [2m110.46 kB[22m [2m│ gzip: 30.38 kB[22m
 [34mℹ[39m [2mdist/[22m[1msdk/protocol.js[22m                 [2m  5.70 kB[22m [2m│ gzip:  1.92 kB[22m
 [34mℹ[39m [2mdist/[22m[1mindex.js[22m                        [2m  2.88 kB[22m [2m│ gzip:  1.24 kB[22m
 [34mℹ[39m [2mdist/[22m[1msdk/manifest-barrel.js[22m          [2m  0.36 kB[22m [2m│ gzip:  0.20 kB[22m
@@ -18,7 +18,7 @@
 [34mℹ[39m [2mdist/[22m[1msdk/providers/tts-barrel.js[22m     [2m  0.25 kB[22m [2m│ gzip:  0.16 kB[22m
 [34mℹ[39m [2mdist/[22m[1msdk/providers/vector-barrel.js[22m  [2m  0.22 kB[22m [2m│ gzip:  0.15 kB[22m
 [34mℹ[39m [2mdist/[22m[1msdk/providers/s2s-barrel.js[22m     [2m  0.15 kB[22m [2m│ gzip:  0.12 kB[22m
-[34mℹ[39m [2mdist/[22m_internal-types-CfOAbK6V.js     [2m  5.45 kB[22m [2m│ gzip:  1.87 kB[22m
+[34mℹ[39m [2mdist/[22m_internal-types-8v1qAa4A.js     [2m  6.04 kB[22m [2m│ gzip:  2.15 kB[22m
 [34mℹ[39m [2mdist/[22mtypes-DOWVZhb9.js               [2m  5.39 kB[22m [2m│ gzip:  2.27 kB[22m
 [34mℹ[39m [2mdist/[22msoniox-BQdL0mB5.js              [2m  2.03 kB[22m [2m│ gzip:  0.54 kB[22m
 [34mℹ[39m [2mdist/[22mconstants-y68COEGj.js           [2m  1.70 kB[22m [2m│ gzip:  0.76 kB[22m
@@ -28,5 +28,5 @@
 [34mℹ[39m [2mdist/[22ms3-BtCMvCod.js                  [2m  0.76 kB[22m [2m│ gzip:  0.29 kB[22m
 [34mℹ[39m [2mdist/[22mpinecone-CeJ69aRs.js            [2m  0.48 kB[22m [2m│ gzip:  0.24 kB[22m
 [34mℹ[39m [2mdist/[22mopenai-realtime-cjPAHMMx.js     [2m  0.27 kB[22m [2m│ gzip:  0.19 kB[22m
-[34mℹ[39m 20 files, total: 138.16 kB
-[32m✔[39m Build complete in [32m48ms[39m
+[34mℹ[39m 20 files, total: 140.98 kB
+[32m✔[39m Build complete in [32m42ms[39m

package/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,24 @@
 # @alexkroman1/aai
+## 1.8.2
+### Patch Changes
+- bb06b4e: Fix S2S tool calls arriving with empty args. Strip the $schema keyword from Zod-generated JSON Schema for tool parameters — some S2S providers ship the dialect URI to the underlying model and emit tool calls with empty args even when required params are listed. Also accept both 'arguments' and 'args' field names on the wire. Pipeline transport now surfaces tool-result stream parts as tool_call_done so the client UI flips pending → done.
+## 1.8.1
+### Patch Changes
+- ba8effb: Make OpenAI Realtime usable end-to-end on gpt-realtime-2:
+  - Accept GA-renamed audio/transcript server events (`response.output_audio.{delta,done}`, `response.output_audio_transcript.{delta,done}`) alongside the legacy `response.audio.*` names so audio and transcript reach the client.
+  - Trigger the agent's `greeting` on connect by sending a one-shot `response.create` with quoted instructions, and honor `skipGreeting` so resumed sessions don't replay it.
+  - Coalesce `response.create` across multiple `sendToolResult` calls in the same tick. Multi-tool turns previously sent one `response.create` per tool, the second of which OpenAI rejected as `conversation_already_has_active_response`, stranding the turn so the model never received the tool results.
+  - Log unhandled event types and the full payload of `error` events to make silently rejected `session.update` fields visible.
+- f4cc5ef: Migrate OpenAI Realtime transport to GA API schema (gpt-realtime-2). Drop OpenAI-Beta: realtime=v1 connect header and update session.update to session.type=realtime, output_modalities, and nested audio.input/audio.output with audio/pcm format.
 ## 1.8.0
 ### Minor Changes

package/dist/{_internal-types-CfOAbK6V.js → _internal-types-8v1qAa4A.js} RENAMED Viewed

@@ -128,13 +128,25 @@ const ToolSchemaSchema = z.object({
 	parameters: z.record(z.string(), z.unknown())
 });
 const EMPTY_PARAMS = z.object({});
+/**
+* Convert a Zod schema to the JSON Schema shape that S2S providers expect.
+* Strips the `$schema` keyword: `z.toJSONSchema` (Zod v4) tags output with
+* the JSON Schema 2020-12 dialect URI, and some Realtime/S2S providers
+* either reject the field outright or ship it through to the underlying
+* model with a malformed function spec — observed empirically as tool
+* calls that arrive with `args: {}` even when required params are listed.
+*/
+function toToolJsonSchema(zodSchema) {
+	const { $schema: _omit, ...rest } = z.toJSONSchema(zodSchema);
+	return rest;
+}
 function agentToolsToSchemas(tools) {
 	return Object.entries(tools).map(([name, def]) => ({
 		type: "function",
 		name,
 		description: def.description,
-		parameters: z.toJSONSchema(def.parameters ?? EMPTY_PARAMS)
+		parameters: toToolJsonSchema(def.parameters ?? EMPTY_PARAMS)
 	}));
 }
 //#endregion
-export { toAgentConfig as a, makeSttError as c, agentToolsToSchemas as i, makeTtsError as l, EMPTY_PARAMS as n, ProviderDescriptorSchema as o, ToolSchemaSchema as r, assertProviderTriple as s, AgentConfigSchema as t };
+export { toAgentConfig as a, assertProviderTriple as c, agentToolsToSchemas as i, makeSttError as l, EMPTY_PARAMS as n, toToolJsonSchema as o, ToolSchemaSchema as r, ProviderDescriptorSchema as s, AgentConfigSchema as t, makeTtsError as u };

package/dist/host/runtime-barrel.js CHANGED Viewed

@@ -1,8 +1,8 @@
 import { r as DEFAULT_SYSTEM_PROMPT } from "../types-DOWVZhb9.js";
-import { _ as TOOL_EXECUTION_TIMEOUT_MS, a as DEFAULT_SHUTDOWN_TIMEOUT_MS, c as FETCH_TIMEOUT_MS, d as MAX_PAGE_CHARS, g as RUN_CODE_TIMEOUT_MS, h as PIPELINE_FLUSH_TIMEOUT_MS, l as MAX_HTML_BYTES, m as MAX_WS_PAYLOAD_BYTES, o as DEFAULT_STT_SAMPLE_RATE, p as MAX_VALUE_SIZE, s as DEFAULT_TTS_SAMPLE_RATE, t as AGENT_CSP } from "../constants-y68COEGj.js";
+import { _ as TOOL_EXECUTION_TIMEOUT_MS, a as DEFAULT_SHUTDOWN_TIMEOUT_MS, c as FETCH_TIMEOUT_MS, d as MAX_PAGE_CHARS, f as MAX_TOOL_RESULT_CHARS, g as RUN_CODE_TIMEOUT_MS, h as PIPELINE_FLUSH_TIMEOUT_MS, l as MAX_HTML_BYTES, m as MAX_WS_PAYLOAD_BYTES, o as DEFAULT_STT_SAMPLE_RATE, p as MAX_VALUE_SIZE, s as DEFAULT_TTS_SAMPLE_RATE, t as AGENT_CSP } from "../constants-y68COEGj.js";
 import { i as toolError, n as errorDetail, r as errorMessage, t as parseWsUpgradeParams } from "../ws-upgrade-CG8-by1n.js";
 import { ClientMessageSchema, VectorRequestSchema, buildReadyConfig, lenientParse } from "../sdk/protocol.js";
-import { a as toAgentConfig, c as makeSttError, i as agentToolsToSchemas, l as makeTtsError, n as EMPTY_PARAMS, s as assertProviderTriple } from "../_internal-types-CfOAbK6V.js";
+import { a as toAgentConfig, c as assertProviderTriple, i as agentToolsToSchemas, l as makeSttError, n as EMPTY_PARAMS, o as toToolJsonSchema, u as makeTtsError } from "../_internal-types-8v1qAa4A.js";
 import { a as MISTRAL_KIND, d as ANTHROPIC_KIND, l as GOOGLE_KIND, r as OPENAI_KIND, s as GROQ_KIND } from "../xai-BDI61Y2M.js";
 import { a as DEEPGRAM_KIND, r as ELEVENLABS_KIND, s as ASSEMBLYAI_KIND, t as SONIOX_KIND } from "../soniox-BQdL0mB5.js";
 import { a as CARTESIA_KIND, n as RIME_KIND } from "../rime-58p9mDR8.js";
@@ -311,7 +311,7 @@ function resolveAllBuiltins(names, opts) {
 			type: "function",
 			name: toolName,
 			description: def.description,
-			parameters: z.toJSONSchema(def.parameters ?? EMPTY_PARAMS)
+			parameters: toToolJsonSchema(def.parameters ?? EMPTY_PARAMS)
 		});
 		if (def.guidance) guidance.push(def.guidance);
 	}
@@ -1681,6 +1681,7 @@ function createOpenaiRealtimeTransport(opts) {
 	const agentTranscriptBuffers = /* @__PURE__ */ new Map();
 	const toolBuffers = /* @__PURE__ */ new Map();
 	let currentResponseId = null;
+	let responseCreateQueued = false;
 	function send(payload) {
 		if (!ws || ws.readyState !== 1) {
 			log.debug("OpenAI Realtime send dropped: socket not open", { type: payload.type });
@@ -1688,17 +1689,39 @@ function createOpenaiRealtimeTransport(opts) {
 		}
 		ws.send(JSON.stringify(payload));
 	}
+	function sendGreeting() {
+		if (opts.skipGreeting) return;
+		const greeting = opts.sessionConfig.greeting;
+		if (!greeting) return;
+		send({
+			type: "response.create",
+			response: { instructions: `Say exactly: ${JSON.stringify(greeting)}` }
+		});
+	}
 	function sendSessionUpdate() {
 		send({
 			type: "session.update",
 			session: {
-				modalities: ["audio", "text"],
-				voice,
+				type: "realtime",
+				output_modalities: ["audio"],
 				instructions: opts.sessionConfig.systemPrompt,
-				input_audio_format: "pcm16",
-				output_audio_format: "pcm16",
-				input_audio_transcription: { model: "whisper-1" },
-				turn_detection: { type: "server_vad" },
+				audio: {
+					input: {
+						format: {
+							type: "audio/pcm",
+							rate: 24e3
+						},
+						turn_detection: { type: "server_vad" },
+						transcription: { model: "whisper-1" }
+					},
+					output: {
+						format: {
+							type: "audio/pcm",
+							rate: 24e3
+						},
+						voice
+					}
+				},
 				tools: opts.toolSchemas,
 				tool_choice: opts.toolChoice
 			}
@@ -1708,15 +1731,13 @@ function createOpenaiRealtimeTransport(opts) {
 		const url = `${baseUrl}?model=${encodeURIComponent(model)}`;
 		log.info("OpenAI Realtime connecting", { url });
 		return new Promise((resolve, reject) => {
-			const sock = createWs(url, { headers: {
-				Authorization: `Bearer ${opts.apiKey}`,
-				"OpenAI-Beta": "realtime=v1"
-			} });
+			const sock = createWs(url, { headers: { Authorization: `Bearer ${opts.apiKey}` } });
 			ws = sock;
 			let opened = false;
 			sock.addEventListener("open", () => {
 				opened = true;
 				sendSessionUpdate();
+				sendGreeting();
 				resolve();
 			});
 			sock.addEventListener("message", (ev) => handleMessage(ev.data));
@@ -1773,11 +1794,17 @@ function createOpenaiRealtimeTransport(opts) {
 	function handleErrorEvent(obj) {
 		const err = obj.error;
 		const message = typeof err?.message === "string" ? err.message : "OpenAI Realtime error";
+		log.warn("OpenAI Realtime error event", { error: obj.error });
 		clearTurnBuffers();
 		opts.callbacks.onError("internal", message);
 	}
 	function handleOutputItemAdded(obj) {
 		const item = obj.item;
+		log.info("OpenAI Realtime output_item.added", {
+			itemType: item?.type,
+			name: item?.name,
+			callId: item?.call_id
+		});
 		if (item?.type !== "function_call" || !item.id) return;
 		toolBuffers.set(item.id, {
 			callId: item.call_id ?? "",
@@ -1810,7 +1837,13 @@ function createOpenaiRealtimeTransport(opts) {
 		toolBuffers.delete(id);
 		const callId = asString(obj.call_id) || (buf?.callId ?? "");
 		const name = asString(obj.name) || (buf?.name ?? "");
-		const args = parseToolArgs(asString(obj.arguments) || (buf?.argsBuffer ?? ""), name, callId);
+		const argsStr = asString(obj.arguments) || (buf?.argsBuffer ?? "");
+		log.info("OpenAI Realtime tool call", {
+			name,
+			callId,
+			args: argsStr
+		});
+		const args = parseToolArgs(argsStr, name, callId);
 		opts.callbacks.onToolCall(callId, name, args);
 	}
 	function handleMessage(data) {
@@ -1824,9 +1857,11 @@ function createOpenaiRealtimeTransport(opts) {
 		if (typeof raw !== "object" || raw === null) return;
 		const obj = raw;
 		switch (obj.type) {
+			case "response.output_audio.delta":
 			case "response.audio.delta":
 				handleAudioDelta(obj);
 				return;
+			case "response.output_audio.done":
 			case "response.audio.done":
 				opts.callbacks.onAudioDone();
 				return;
@@ -1842,9 +1877,11 @@ function createOpenaiRealtimeTransport(opts) {
 			case "response.created":
 				handleResponseCreated(obj);
 				return;
+			case "response.output_audio_transcript.delta":
 			case "response.audio_transcript.delta":
 				handleAgentTranscriptDelta(obj);
 				return;
+			case "response.output_audio_transcript.done":
 			case "response.audio_transcript.done":
 				handleAgentTranscriptDone(obj);
 				return;
@@ -1863,7 +1900,9 @@ function createOpenaiRealtimeTransport(opts) {
 			case "error":
 				handleErrorEvent(obj);
 				return;
-			default: return;
+			default:
+				log.debug("OpenAI Realtime: unhandled event", { type: obj.type });
+				return;
 		}
 	}
 	function handleClose(code, reason) {
@@ -1893,6 +1932,11 @@ function createOpenaiRealtimeTransport(opts) {
 			ws.send(`{"type":"input_audio_buffer.append","audio":"${uint8ToBase64(bytes)}"}`);
 		},
 		sendToolResult(callId, result) {
+			log.info("OpenAI Realtime sendToolResult", {
+				callId,
+				resultLen: result.length,
+				preview: result.slice(0, 200)
+			});
 			send({
 				type: "conversation.item.create",
 				item: {
@@ -1901,7 +1945,13 @@ function createOpenaiRealtimeTransport(opts) {
 					output: result
 				}
 			});
-			send({ type: "response.create" });
+			if (!responseCreateQueued) {
+				responseCreateQueued = true;
+				queueMicrotask(() => {
+					responseCreateQueued = false;
+					send({ type: "response.create" });
+				});
+			}
 		},
 		cancelReply() {
 			if (currentResponseId === null) return;
@@ -2091,6 +2141,14 @@ function createPipelineTransport(opts) {
 			onDelta(out);
 			ttsSession?.sendText(out);
 		}
+		function emitToolResult(part) {
+			const callId = part.toolCallId ?? "";
+			if (!callId) return;
+			const raw = part.output ?? part.result ?? "";
+			const str = typeof raw === "string" ? raw : JSON.stringify(raw);
+			const truncated = str.length > 4e3 ? str.slice(0, MAX_TOOL_RESULT_CHARS) : str;
+			callbacks.onToolCallDone?.(callId, truncated);
+		}
 		return function handlePart(part) {
 			switch (part.type) {
 				case "text-delta":
@@ -2104,6 +2162,9 @@ function createPipelineTransport(opts) {
 					callbacks.onToolCall(part.toolCallId ?? "", part.toolName ?? "", input);
 					return;
 				}
+				case "tool-result":
+					emitToolResult(part);
+					return;
 				case "error": {
 					const msg = errorMessage(part.error);
 					log.error("LLM stream error", {
@@ -2346,8 +2407,14 @@ const S2sMessageSchema = z.discriminatedUnion("type", [
 		type: z.literal("tool.call"),
 		call_id: z.string(),
 		name: z.string(),
-		args: z.record(z.string(), z.unknown()).optional().default({})
-	}),
+		arguments: z.record(z.string(), z.unknown()).optional(),
+		args: z.record(z.string(), z.unknown()).optional()
+	}).transform((m) => ({
+		type: m.type,
+		call_id: m.call_id,
+		name: m.name,
+		args: m.arguments ?? m.args ?? {}
+	})),
 	z.object({
 		type: z.literal("reply.done"),
 		status: z.string().optional()
@@ -2495,6 +2562,20 @@ function connectS2s(opts) {
 			if (type === "reply.audio" || type === "input.audio" || type === "reply.done" || type === "session.error") return;
 			log.info(`S2S << ${type}`);
 		}
+		function handleObject(obj, raw) {
+			logIncoming(obj.type);
+			if (obj.type === "tool.call") log.info("S2S << tool.call payload", { payload: JSON.stringify(obj) });
+			if (obj.type === "reply.audio" && typeof obj.data === "string") {
+				callbacks.onAudio(base64ToUint8(obj.data));
+				return;
+			}
+			const parsed = parseS2sMessage(obj);
+			if (!parsed) {
+				log.warn(`S2S << unrecognised message type: ${obj.type ?? JSON.stringify(raw).slice(0, 200)}`);
+				return;
+			}
+			dispatchS2sMessage(callbacks, parsed, dispatchState, dispatchCtx);
+		}
 		ws.addEventListener("message", (ev) => {
 			let raw;
 			try {
@@ -2507,18 +2588,7 @@ function connectS2s(opts) {
 				log.warn("S2S << non-object JSON message", { type: typeof raw });
 				return;
 			}
-			const obj = raw;
-			logIncoming(obj.type);
-			if (obj.type === "reply.audio" && typeof obj.data === "string") {
-				callbacks.onAudio(base64ToUint8(obj.data));
-				return;
-			}
-			const parsed = parseS2sMessage(obj);
-			if (!parsed) {
-				log.warn(`S2S << unrecognised message type: ${obj.type ?? JSON.stringify(raw).slice(0, 200)}`);
-				return;
-			}
-			dispatchS2sMessage(callbacks, parsed, dispatchState, dispatchCtx);
+			handleObject(raw, raw);
 		});
 		ws.addEventListener("close", (ev) => {
 			const code = ev.code ?? 0;
@@ -3085,6 +3155,7 @@ function createRuntime(opts) {
 			callbacks,
 			sid: sessionOpts.id,
 			agent: sessionOpts.agent,
+			skipGreeting: sessionOpts.skipGreeting ?? false,
 			...createOpenaiRealtimeWebSocket ? { createWebSocket: createOpenaiRealtimeWebSocket } : {},
 			logger
 		});
@@ -3149,6 +3220,11 @@ function createRuntime(opts) {
 					toolName: name,
 					args
 				}) : (id, name, args) => bindCore().onToolCall(id, name, args),
+				...isPipeline ? { onToolCallDone: (id, result) => sessionOpts.client.event({
+					type: "tool_call_done",
+					toolCallId: id,
+					result
+				}) } : {},
 				onError: (code, message) => bindCore().onError(code, message),
 				onSpeechStarted: () => bindCore().onSpeechStarted(),
 				onSpeechStopped: () => bindCore().onSpeechStopped()

package/dist/host/transports/openai-realtime-transport.d.ts CHANGED Viewed

@@ -37,6 +37,8 @@ export type OpenaiRealtimeTransportOptions = {
     callbacks: TransportCallbacks;
     sid: string;
     agent: string;
+    /** Skip the initial greeting (used for session resume). */
+    skipGreeting?: boolean;
     createWebSocket?: CreateOpenaiRealtimeWebSocket;
     logger?: Logger;
 };

package/dist/host/transports/types.d.ts CHANGED Viewed

@@ -13,6 +13,13 @@ export type TransportCallbacks = {
     onUserTranscript(text: string): void;
     onAgentTranscript(text: string, interrupted: boolean): void;
     onToolCall(callId: string, name: string, args: Record<string, unknown>): void;
+    /**
+     * Tool execution finished. Pipeline mode invokes this from the
+     * `tool-result` stream part so the client UI can mark the call done.
+     * S2S transports leave this unset — SessionCore.onToolCall emits the
+     * `tool_call_done` event itself after dispatching the tool.
+     */
+    onToolCallDone?(callId: string, result: string): void;
     onError(code: SessionErrorCode, message: string): void;
     onSpeechStarted(): void;
     onSpeechStopped(): void;

package/dist/sdk/_internal-types.d.ts CHANGED Viewed

@@ -85,4 +85,13 @@ export type ToolSchema = {
     parameters: JSONSchema7;
 };
 export declare const EMPTY_PARAMS: z.ZodObject<{}, z.core.$strip>;
+/**
+ * Convert a Zod schema to the JSON Schema shape that S2S providers expect.
+ * Strips the `$schema` keyword: `z.toJSONSchema` (Zod v4) tags output with
+ * the JSON Schema 2020-12 dialect URI, and some Realtime/S2S providers
+ * either reject the field outright or ship it through to the underlying
+ * model with a malformed function spec — observed empirically as tool
+ * calls that arrive with `args: {}` even when required params are listed.
+ */
+export declare function toToolJsonSchema(zodSchema: z.ZodTypeAny): JSONSchema7;
 export declare function agentToolsToSchemas(tools: Readonly<Record<string, ToolDef>>): ToolSchema[];

package/dist/sdk/manifest-barrel.js CHANGED Viewed

@@ -1,2 +1,2 @@
-import { a as toAgentConfig, i as agentToolsToSchemas, n as EMPTY_PARAMS, o as ProviderDescriptorSchema, r as ToolSchemaSchema, s as assertProviderTriple, t as AgentConfigSchema } from "../_internal-types-CfOAbK6V.js";
+import { a as toAgentConfig, c as assertProviderTriple, i as agentToolsToSchemas, n as EMPTY_PARAMS, r as ToolSchemaSchema, s as ProviderDescriptorSchema, t as AgentConfigSchema } from "../_internal-types-8v1qAa4A.js";
 export { AgentConfigSchema, EMPTY_PARAMS, ProviderDescriptorSchema, ToolSchemaSchema, agentToolsToSchemas, assertProviderTriple, toAgentConfig };

package/host/builtin-tools.ts CHANGED Viewed

@@ -9,7 +9,7 @@
 import { convert } from "html-to-text";
 import { z } from "zod";
-import { EMPTY_PARAMS, type ToolSchema } from "../sdk/_internal-types.ts";
+import { EMPTY_PARAMS, type ToolSchema, toToolJsonSchema } from "../sdk/_internal-types.ts";
 import { FETCH_TIMEOUT_MS, MAX_HTML_BYTES, MAX_PAGE_CHARS } from "../sdk/constants.ts";
 import type { ToolDef } from "../sdk/types.ts";
 import { createRunCode } from "./_run-code.ts";
@@ -242,7 +242,7 @@ export function resolveAllBuiltins(
         type: "function",
         name: toolName,
         description: def.description,
-        parameters: z.toJSONSchema(def.parameters ?? EMPTY_PARAMS) as ToolSchema["parameters"],
+        parameters: toToolJsonSchema(def.parameters ?? EMPTY_PARAMS) as ToolSchema["parameters"],
       });
       if (def.guidance) guidance.push(def.guidance);
     }

package/host/runtime.ts CHANGED Viewed

@@ -432,6 +432,7 @@ export function createRuntime(opts: RuntimeOptions): Runtime {
       callbacks,
       sid: sessionOpts.id,
       agent: sessionOpts.agent,
+      skipGreeting: sessionOpts.skipGreeting ?? false,
       ...(createOpenaiRealtimeWebSocket ? { createWebSocket: createOpenaiRealtimeWebSocket } : {}),
       logger,
     });
@@ -512,6 +513,15 @@ export function createRuntime(opts: RuntimeOptions): Runtime {
         ? (id, name, args) =>
             sessionOpts.client.event({ type: "tool_call", toolCallId: id, toolName: name, args })
         : (id, name, args) => bindCore().onToolCall(id, name, args),
+      // Pipeline: emit `tool_call_done` when streamText surfaces the
+      // `tool-result` part so the UI can flip status from pending → done.
+      // S2S transports never set this; SessionCore.onToolCall emits done itself.
+      ...(isPipeline
+        ? {
+            onToolCallDone: (id: string, result: string) =>
+              sessionOpts.client.event({ type: "tool_call_done", toolCallId: id, result }),
+          }
+        : {}),
       onError: (code, message) => bindCore().onError(code, message),
       onSpeechStarted: () => bindCore().onSpeechStarted(),
       onSpeechStopped: () => bindCore().onSpeechStopped(),

package/host/s2s.ts CHANGED Viewed

@@ -55,12 +55,23 @@ const S2sMessageSchema = z.discriminatedUnion("type", [
     item_id: z.string().optional().default(""),
     interrupted: z.boolean().optional().default(false),
   }),
-  z.object({
-    type: z.literal("tool.call"),
-    call_id: z.string(),
-    name: z.string(),
-    args: z.record(z.string(), z.unknown()).optional().default({}),
-  }),
+  // AssemblyAI's S2S protocol delivers tool args under `arguments`; older
+  // implementations and our internal tests use `args`. Accept either, with
+  // `arguments` taking precedence so the live wire format wins.
+  z
+    .object({
+      type: z.literal("tool.call"),
+      call_id: z.string(),
+      name: z.string(),
+      arguments: z.record(z.string(), z.unknown()).optional(),
+      args: z.record(z.string(), z.unknown()).optional(),
+    })
+    .transform((m) => ({
+      type: m.type,
+      call_id: m.call_id,
+      name: m.name,
+      args: m.arguments ?? m.args ?? {},
+    })),
   z.object({ type: z.literal("reply.done"), status: z.string().optional() }),
   z.object({ type: z.literal("session.error"), code: z.string(), message: z.string() }),
   z.object({ type: z.literal("error"), message: z.string() }),
@@ -303,27 +314,20 @@ export function connectS2s(opts: ConnectS2sOptions): Promise<S2sHandle> {
       log.info(`S2S << ${type}`);
     }
-    ws.addEventListener("message", (ev) => {
-      let raw: unknown;
-      try {
-        raw = JSON.parse(String(ev.data));
-      } catch {
-        log.warn("S2S << invalid JSON", { data: String(ev.data).slice(0, 200) });
-        return;
-      }
-      if (typeof raw !== "object" || raw === null || Array.isArray(raw)) {
-        log.warn("S2S << non-object JSON message", { type: typeof raw });
-        return;
-      }
-      const obj = raw as Record<string, unknown>;
+    function handleObject(obj: Record<string, unknown>, raw: unknown): void {
       logIncoming(obj.type);
+      // Log the full tool.call payload so we can diagnose provider-side
+      // empty-args problems — the underlying LLM emitting a function call
+      // without populating its required parameters. Without the full
+      // payload we cannot tell apart "field-name mismatch" from
+      // "model emitted no args."
+      if (obj.type === "tool.call") {
+        log.info("S2S << tool.call payload", { payload: JSON.stringify(obj) });
+      }
       if (obj.type === "reply.audio" && typeof obj.data === "string") {
         callbacks.onAudio(base64ToUint8(obj.data));
         return;
       }
       const parsed = parseS2sMessage(obj);
       if (!parsed) {
         log.warn(
@@ -332,6 +336,21 @@ export function connectS2s(opts: ConnectS2sOptions): Promise<S2sHandle> {
         return;
       }
       dispatchS2sMessage(callbacks, parsed, dispatchState, dispatchCtx);
+    }
+    ws.addEventListener("message", (ev) => {
+      let raw: unknown;
+      try {
+        raw = JSON.parse(String(ev.data));
+      } catch {
+        log.warn("S2S << invalid JSON", { data: String(ev.data).slice(0, 200) });
+        return;
+      }
+      if (typeof raw !== "object" || raw === null || Array.isArray(raw)) {
+        log.warn("S2S << non-object JSON message", { type: typeof raw });
+        return;
+      }
+      handleObject(raw as Record<string, unknown>, raw);
     });
     ws.addEventListener("close", (ev) => {

package/host/transports/openai-realtime-transport.test.ts CHANGED Viewed

@@ -83,7 +83,6 @@ describe("openai-realtime-transport: connect and session.update", () => {
       options: { model: "gpt-realtime", voice: "cedar" },
       sessionConfig: {
         systemPrompt: "Be terse.",
-        greeting: "Hi.",
         tools: [],
       },
       toolSchemas: [
@@ -109,10 +108,7 @@ describe("openai-realtime-transport: connect and session.update", () => {
     expect(createWs).toHaveBeenCalledWith(
       "wss://api.openai.com/v1/realtime?model=gpt-realtime",
       expect.objectContaining({
-        headers: expect.objectContaining({
-          Authorization: "Bearer sk-test",
-          "OpenAI-Beta": "realtime=v1",
-        }),
+        headers: { Authorization: "Bearer sk-test" },
       }),
     );
@@ -121,13 +117,14 @@ describe("openai-realtime-transport: connect and session.update", () => {
     if (first === undefined) throw new Error("expected one send");
     const msg = JSON.parse(first);
     expect(msg.type).toBe("session.update");
-    expect(msg.session.voice).toBe("cedar");
+    expect(msg.session.type).toBe("realtime");
+    expect(msg.session.output_modalities).toEqual(["audio"]);
     expect(msg.session.instructions).toBe("Be terse.");
-    expect(msg.session.input_audio_format).toBe("pcm16");
-    expect(msg.session.output_audio_format).toBe("pcm16");
-    expect(msg.session.modalities).toEqual(["audio", "text"]);
-    expect(msg.session.input_audio_transcription).toEqual({ model: "whisper-1" });
-    expect(msg.session.turn_detection.type).toBe("server_vad");
+    expect(msg.session.audio.input.format).toEqual({ type: "audio/pcm", rate: 24_000 });
+    expect(msg.session.audio.input.turn_detection.type).toBe("server_vad");
+    expect(msg.session.audio.input.transcription).toEqual({ model: "whisper-1" });
+    expect(msg.session.audio.output.format).toEqual({ type: "audio/pcm", rate: 24_000 });
+    expect(msg.session.audio.output.voice).toBe("cedar");
     expect(msg.session.tools).toEqual([
       expect.objectContaining({ type: "function", name: "lookup" }),
     ]);
@@ -135,6 +132,57 @@ describe("openai-realtime-transport: connect and session.update", () => {
   });
 });
+describe("greeting", () => {
+  function makeWithGreeting(args: { greeting?: string; skipGreeting?: boolean }) {
+    const fake = makeFakeWs();
+    const transport = createOpenaiRealtimeTransport({
+      apiKey: "sk",
+      options: {},
+      sessionConfig: {
+        systemPrompt: "",
+        ...(args.greeting !== undefined ? { greeting: args.greeting } : {}),
+      },
+      toolSchemas: [],
+      toolChoice: "auto",
+      callbacks: noopCallbacks(),
+      sid: "s",
+      agent: "a",
+      ...(args.skipGreeting !== undefined ? { skipGreeting: args.skipGreeting } : {}),
+      createWebSocket: () => fake,
+      logger: silentLogger,
+    });
+    const ready = transport.start();
+    fake.fire("open");
+    return { fake, ready };
+  }
+  test("sends response.create with quoted greeting after session.update", async () => {
+    const { fake, ready } = makeWithGreeting({ greeting: 'Hello, "friend".' });
+    await ready;
+    expect(fake.sent.length).toBe(2);
+    expect(JSON.parse(fake.sent[0] ?? "{}").type).toBe("session.update");
+    const greetingMsg = JSON.parse(fake.sent[1] ?? "{}");
+    expect(greetingMsg.type).toBe("response.create");
+    // JSON.stringify quotes the greeting and escapes any embedded quotes —
+    // protects against prompt-injection by closing the instruction string.
+    expect(greetingMsg.response.instructions).toBe('Say exactly: "Hello, \\"friend\\"."');
+  });
+  test("no greeting send when greeting is undefined", async () => {
+    const { fake, ready } = makeWithGreeting({});
+    await ready;
+    expect(fake.sent.length).toBe(1);
+    expect(JSON.parse(fake.sent[0] ?? "{}").type).toBe("session.update");
+  });
+  test("skipGreeting suppresses the greeting send", async () => {
+    const { fake, ready } = makeWithGreeting({ greeting: "Hi.", skipGreeting: true });
+    await ready;
+    expect(fake.sent.length).toBe(1);
+    expect(JSON.parse(fake.sent[0] ?? "{}").type).toBe("session.update");
+  });
+});
 describe("audio in/out", () => {
   test("sendUserAudio sends input_audio_buffer.append with base64 payload", async () => {
     const { fake, transport, ready } = startedTransport();
@@ -150,21 +198,25 @@ describe("audio in/out", () => {
     expect(Buffer.from(msg.audio, "base64")).toEqual(Buffer.from([1, 2, 3, 4]));
   });
-  test("response.audio.delta calls onAudioChunk with decoded bytes", async () => {
+  test.each([
+    ["response.audio.delta"],
+    ["response.output_audio.delta"],
+  ])("%s calls onAudioChunk with decoded bytes", async (type) => {
     const { fake, cbs, ready } = startedTransport();
     await ready;
     const audio = Buffer.from([5, 6, 7, 8]).toString("base64");
-    fake.fire("message", {
-      data: JSON.stringify({ type: "response.audio.delta", delta: audio }),
-    });
+    fake.fire("message", { data: JSON.stringify({ type, delta: audio }) });
     expect(cbs.onAudioChunk).toHaveBeenCalledTimes(1);
     expect(cbs.onAudioChunk).toHaveBeenCalledWith(new Uint8Array([5, 6, 7, 8]));
   });
-  test("response.audio.done calls onAudioDone", async () => {
+  test.each([
+    ["response.audio.done"],
+    ["response.output_audio.done"],
+  ])("%s calls onAudioDone", async (type) => {
     const { fake, cbs, ready } = startedTransport();
     await ready;
-    fake.fire("message", { data: JSON.stringify({ type: "response.audio.done" }) });
+    fake.fire("message", { data: JSON.stringify({ type }) });
     expect(cbs.onAudioDone).toHaveBeenCalledTimes(1);
   });
 });
@@ -206,27 +258,22 @@ describe("VAD, user transcript, reply lifecycle, agent transcript", () => {
     expect(cbs.onReplyDone).toHaveBeenCalledTimes(1);
   });
-  test("agent transcript: deltas accumulated, emitted on done", async () => {
+  test.each([
+    ["response.audio_transcript", "legacy"],
+    ["response.output_audio_transcript", "GA"],
+  ])("agent transcript (%s): deltas accumulated, emitted on done", async (prefix) => {
     const { fake, cbs, ready } = startedTransport();
     await ready;
     const item_id = "item_x";
     fake.fire("message", {
-      data: JSON.stringify({
-        type: "response.audio_transcript.delta",
-        item_id,
-        delta: "Hi ",
-      }),
+      data: JSON.stringify({ type: `${prefix}.delta`, item_id, delta: "Hi " }),
     });
     fake.fire("message", {
-      data: JSON.stringify({
-        type: "response.audio_transcript.delta",
-        item_id,
-        delta: "there.",
-      }),
+      data: JSON.stringify({ type: `${prefix}.delta`, item_id, delta: "there." }),
     });
     expect(cbs.onAgentTranscript).not.toHaveBeenCalled();
     fake.fire("message", {
-      data: JSON.stringify({ type: "response.audio_transcript.done", item_id }),
+      data: JSON.stringify({ type: `${prefix}.done`, item_id }),
     });
     expect(cbs.onAgentTranscript).toHaveBeenCalledWith("Hi there.", false);
   });
@@ -308,15 +355,36 @@ describe("tool calls", () => {
     await ready;
     fake.sent.length = 0; // drop session.update
     transport.sendToolResult("call_1", '{"ok":true}');
-    expect(fake.sent.length).toBe(2);
+    // function_call_output is sent immediately; response.create is queued.
+    expect(fake.sent.length).toBe(1);
     const m1 = JSON.parse(fake.sent[0] ?? "{}");
     expect(m1.type).toBe("conversation.item.create");
     expect(m1.item.type).toBe("function_call_output");
     expect(m1.item.call_id).toBe("call_1");
     expect(m1.item.output).toBe('{"ok":true}');
+    await new Promise((r) => queueMicrotask(() => r(undefined)));
+    expect(fake.sent.length).toBe(2);
     const m2 = JSON.parse(fake.sent[1] ?? "{}");
     expect(m2.type).toBe("response.create");
   });
+  test("multiple sendToolResult calls coalesce into a single response.create", async () => {
+    const { fake, transport, ready } = startedTransport();
+    await ready;
+    fake.sent.length = 0;
+    // Synchronous burst — session-core flushes pending tool results in a loop.
+    transport.sendToolResult("call_1", '{"a":1}');
+    transport.sendToolResult("call_2", '{"b":2}');
+    transport.sendToolResult("call_3", '{"c":3}');
+    // Three function_call_outputs sent immediately, no response.create yet.
+    expect(fake.sent.length).toBe(3);
+    expect(fake.sent.every((s) => JSON.parse(s).type === "conversation.item.create")).toBe(true);
+    await new Promise((r) => queueMicrotask(() => r(undefined)));
+    // After the microtask, exactly one response.create — second one would be
+    // rejected as `conversation_already_has_active_response`.
+    expect(fake.sent.length).toBe(4);
+    expect(JSON.parse(fake.sent[3] ?? "{}").type).toBe("response.create");
+  });
 });
 describe("cancel, error, close", () => {

package/host/transports/openai-realtime-transport.ts CHANGED Viewed

@@ -49,6 +49,8 @@ export type OpenaiRealtimeTransportOptions = {
   callbacks: TransportCallbacks;
   sid: string;
   agent: string;
+  /** Skip the initial greeting (used for session resume). */
+  skipGreeting?: boolean;
   createWebSocket?: CreateOpenaiRealtimeWebSocket;
   logger?: Logger;
 };
@@ -66,6 +68,7 @@ export function createOpenaiRealtimeTransport(opts: OpenaiRealtimeTransportOptio
   type ToolBuffer = { callId: string; name: string; argsBuffer: string };
   const toolBuffers = new Map<string, ToolBuffer>();
   let currentResponseId: string | null = null;
+  let responseCreateQueued = false;
   function send(payload: Record<string, unknown>): void {
     if (!ws || ws.readyState !== WS_OPEN) {
@@ -75,17 +78,37 @@ export function createOpenaiRealtimeTransport(opts: OpenaiRealtimeTransportOptio
     ws.send(JSON.stringify(payload));
   }
+  function sendGreeting(): void {
+    if (opts.skipGreeting) return;
+    const greeting = opts.sessionConfig.greeting;
+    if (!greeting) return;
+    // OpenAI Realtime has no native greeting field — trigger it as a one-shot
+    // response with custom instructions that override the system prompt for
+    // this turn only. Audio + transcript ride the normal response.* events.
+    send({
+      type: "response.create",
+      response: { instructions: `Say exactly: ${JSON.stringify(greeting)}` },
+    });
+  }
   function sendSessionUpdate(): void {
     send({
       type: "session.update",
       session: {
-        modalities: ["audio", "text"],
-        voice,
+        type: "realtime",
+        output_modalities: ["audio"],
         instructions: opts.sessionConfig.systemPrompt,
-        input_audio_format: "pcm16",
-        output_audio_format: "pcm16",
-        input_audio_transcription: { model: "whisper-1" },
-        turn_detection: { type: "server_vad" },
+        audio: {
+          input: {
+            format: { type: "audio/pcm", rate: 24_000 },
+            turn_detection: { type: "server_vad" },
+            transcription: { model: "whisper-1" },
+          },
+          output: {
+            format: { type: "audio/pcm", rate: 24_000 },
+            voice,
+          },
+        },
         tools: opts.toolSchemas,
         tool_choice: opts.toolChoice,
       },
@@ -99,7 +122,6 @@ export function createOpenaiRealtimeTransport(opts: OpenaiRealtimeTransportOptio
       const sock = createWs(url, {
         headers: {
           Authorization: `Bearer ${opts.apiKey}`,
-          "OpenAI-Beta": "realtime=v1",
         },
       });
       ws = sock;
@@ -108,6 +130,7 @@ export function createOpenaiRealtimeTransport(opts: OpenaiRealtimeTransportOptio
       sock.addEventListener("open", () => {
         opened = true;
         sendSessionUpdate();
+        sendGreeting();
         resolve();
       });
       sock.addEventListener("message", (ev) => handleMessage(ev.data));
@@ -177,6 +200,7 @@ export function createOpenaiRealtimeTransport(opts: OpenaiRealtimeTransportOptio
   function handleErrorEvent(obj: Record<string, unknown>): void {
     const err = obj.error as { message?: unknown } | undefined;
     const message = typeof err?.message === "string" ? err.message : "OpenAI Realtime error";
+    log.warn("OpenAI Realtime error event", { error: obj.error });
     clearTurnBuffers();
     opts.callbacks.onError("internal", message);
   }
@@ -185,6 +209,11 @@ export function createOpenaiRealtimeTransport(opts: OpenaiRealtimeTransportOptio
     const item = obj.item as
       | { id?: string; type?: string; name?: string; call_id?: string }
       | undefined;
+    log.info("OpenAI Realtime output_item.added", {
+      itemType: item?.type,
+      name: item?.name,
+      callId: item?.call_id,
+    });
     if (item?.type !== "function_call" || !item.id) return;
     toolBuffers.set(item.id, {
       callId: item.call_id ?? "",
@@ -220,6 +249,7 @@ export function createOpenaiRealtimeTransport(opts: OpenaiRealtimeTransportOptio
     const callId = asString(obj.call_id) || (buf?.callId ?? "");
     const name = asString(obj.name) || (buf?.name ?? "");
     const argsStr = asString(obj.arguments) || (buf?.argsBuffer ?? "");
+    log.info("OpenAI Realtime tool call", { name, callId, args: argsStr });
     const args = parseToolArgs(argsStr, name, callId);
     opts.callbacks.onToolCall(callId, name, args);
   }
@@ -235,9 +265,14 @@ export function createOpenaiRealtimeTransport(opts: OpenaiRealtimeTransportOptio
     if (typeof raw !== "object" || raw === null) return;
     const obj = raw as Record<string, unknown>;
     switch (obj.type) {
+      // GA renamed audio output events to `response.output_audio.*` and
+      // transcript events to `response.output_audio_transcript.*`. The legacy
+      // (beta) names are kept as aliases so older snapshots still work.
+      case "response.output_audio.delta":
       case "response.audio.delta":
         handleAudioDelta(obj);
         return;
+      case "response.output_audio.done":
       case "response.audio.done":
         opts.callbacks.onAudioDone();
         return;
@@ -253,9 +288,11 @@ export function createOpenaiRealtimeTransport(opts: OpenaiRealtimeTransportOptio
       case "response.created":
         handleResponseCreated(obj);
         return;
+      case "response.output_audio_transcript.delta":
       case "response.audio_transcript.delta":
         handleAgentTranscriptDelta(obj);
         return;
+      case "response.output_audio_transcript.done":
       case "response.audio_transcript.done":
         handleAgentTranscriptDone(obj);
         return;
@@ -275,6 +312,7 @@ export function createOpenaiRealtimeTransport(opts: OpenaiRealtimeTransportOptio
         handleErrorEvent(obj);
         return;
       default:
+        log.debug("OpenAI Realtime: unhandled event", { type: obj.type });
         return;
     }
   }
@@ -302,11 +340,25 @@ export function createOpenaiRealtimeTransport(opts: OpenaiRealtimeTransportOptio
       ws.send(`{"type":"input_audio_buffer.append","audio":"${uint8ToBase64(bytes)}"}`);
     },
     sendToolResult(callId, result) {
+      log.info("OpenAI Realtime sendToolResult", {
+        callId,
+        resultLen: result.length,
+        preview: result.slice(0, 200),
+      });
       send({
         type: "conversation.item.create",
         item: { type: "function_call_output", call_id: callId, output: result },
       });
-      send({ type: "response.create" });
+      // Multiple tool results from one turn arrive synchronously; coalesce them
+      // into a single response.create per tick. OpenAI rejects a second
+      // response.create while one is in flight, which strands the turn.
+      if (!responseCreateQueued) {
+        responseCreateQueued = true;
+        queueMicrotask(() => {
+          responseCreateQueued = false;
+          send({ type: "response.create" });
+        });
+      }
     },
     cancelReply() {
       if (currentResponseId === null) return;

package/host/transports/pipeline-transport.ts CHANGED Viewed

@@ -14,6 +14,7 @@ import {
   DEFAULT_MAX_HISTORY,
   DEFAULT_STT_SAMPLE_RATE,
   DEFAULT_TTS_SAMPLE_RATE,
+  MAX_TOOL_RESULT_CHARS,
   PIPELINE_FLUSH_TIMEOUT_MS,
 } from "../../sdk/constants.ts";
 import type { SessionErrorCode } from "../../sdk/protocol.ts";
@@ -235,6 +236,25 @@ export function createPipelineTransport(opts: PipelineTransportOptions): Transpo
       ttsSession?.sendText(out);
     }
+    function emitToolResult(part: {
+      readonly toolCallId?: string;
+      readonly output?: unknown;
+    }): void {
+      // Inline execution finished — surface completion so the client UI can
+      // flip the tool-call from "pending" to "done". Schema requires a
+      // string result capped at MAX_TOOL_RESULT_CHARS.
+      const callId = part.toolCallId ?? "";
+      if (!callId) return;
+      const raw =
+        (part as { output?: unknown; result?: unknown }).output ??
+        (part as { result?: unknown }).result ??
+        "";
+      const str = typeof raw === "string" ? raw : JSON.stringify(raw);
+      const truncated =
+        str.length > MAX_TOOL_RESULT_CHARS ? str.slice(0, MAX_TOOL_RESULT_CHARS) : str;
+      callbacks.onToolCallDone?.(callId, truncated);
+    }
     return function handlePart(part: {
       readonly type: string;
       readonly text?: string;
@@ -257,6 +277,9 @@ export function createPipelineTransport(opts: PipelineTransportOptions): Transpo
           callbacks.onToolCall(part.toolCallId ?? "", part.toolName ?? "", input);
           return;
         }
+        case "tool-result":
+          emitToolResult(part);
+          return;
         case "error": {
           const msg = errorMessage(part.error);
           log.error("LLM stream error", { message: msg, sid: opts.sid });

package/host/transports/types.ts CHANGED Viewed

@@ -17,6 +17,13 @@ export type TransportCallbacks = {
   onUserTranscript(text: string): void;
   onAgentTranscript(text: string, interrupted: boolean): void;
   onToolCall(callId: string, name: string, args: Record<string, unknown>): void;
+  /**
+   * Tool execution finished. Pipeline mode invokes this from the
+   * `tool-result` stream part so the client UI can mark the call done.
+   * S2S transports leave this unset — SessionCore.onToolCall emits the
+   * `tool_call_done` event itself after dispatching the tool.
+   */
+  onToolCallDone?(callId: string, result: string): void;
   onError(code: SessionErrorCode, message: string): void;
   onSpeechStarted(): void;
   onSpeechStopped(): void;

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@alexkroman1/aai",
-  "version": "1.8.0",
+  "version": "1.8.2",
   "type": "module",
   "exports": {
     ".": {

package/sdk/_internal-types.ts CHANGED Viewed

@@ -118,11 +118,24 @@ export type ToolSchema = {
 export const EMPTY_PARAMS = z.object({});
+/**
+ * Convert a Zod schema to the JSON Schema shape that S2S providers expect.
+ * Strips the `$schema` keyword: `z.toJSONSchema` (Zod v4) tags output with
+ * the JSON Schema 2020-12 dialect URI, and some Realtime/S2S providers
+ * either reject the field outright or ship it through to the underlying
+ * model with a malformed function spec — observed empirically as tool
+ * calls that arrive with `args: {}` even when required params are listed.
+ */
+export function toToolJsonSchema(zodSchema: z.ZodTypeAny): JSONSchema7 {
+  const { $schema: _omit, ...rest } = z.toJSONSchema(zodSchema) as Record<string, unknown>;
+  return rest as JSONSchema7;
+}
 export function agentToolsToSchemas(tools: Readonly<Record<string, ToolDef>>): ToolSchema[] {
   return Object.entries(tools).map(([name, def]) => ({
     type: "function",
     name,
     description: def.description,
-    parameters: z.toJSONSchema(def.parameters ?? EMPTY_PARAMS) as JSONSchema7,
+    parameters: toToolJsonSchema(def.parameters ?? EMPTY_PARAMS),
   }));
 }