npm - @livekit/agents - Versions diffs - 1.0.34 → 1.0.36 - Mend

@livekit/agents 1.0.34 → 1.0.36

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (117) hide show

package/dist/cli.cjs.map +1 -1
package/dist/inference/api_protos.d.cts +4 -4
package/dist/inference/api_protos.d.ts +4 -4
package/dist/inference/llm.cjs +30 -3
package/dist/inference/llm.cjs.map +1 -1
package/dist/inference/llm.d.cts +3 -1
package/dist/inference/llm.d.ts +3 -1
package/dist/inference/llm.d.ts.map +1 -1
package/dist/inference/llm.js +30 -3
package/dist/inference/llm.js.map +1 -1
package/dist/ipc/inference_proc_executor.cjs.map +1 -1
package/dist/ipc/job_proc_executor.cjs.map +1 -1
package/dist/ipc/job_proc_lazy_main.cjs +1 -1
package/dist/ipc/job_proc_lazy_main.cjs.map +1 -1
package/dist/ipc/job_proc_lazy_main.js +1 -1
package/dist/ipc/job_proc_lazy_main.js.map +1 -1
package/dist/llm/chat_context.cjs +20 -2
package/dist/llm/chat_context.cjs.map +1 -1
package/dist/llm/chat_context.d.cts +9 -0
package/dist/llm/chat_context.d.ts +9 -0
package/dist/llm/chat_context.d.ts.map +1 -1
package/dist/llm/chat_context.js +20 -2
package/dist/llm/chat_context.js.map +1 -1
package/dist/llm/llm.cjs.map +1 -1
package/dist/llm/llm.d.cts +1 -0
package/dist/llm/llm.d.ts +1 -0
package/dist/llm/llm.d.ts.map +1 -1
package/dist/llm/llm.js.map +1 -1
package/dist/llm/provider_format/openai.cjs +43 -20
package/dist/llm/provider_format/openai.cjs.map +1 -1
package/dist/llm/provider_format/openai.d.ts.map +1 -1
package/dist/llm/provider_format/openai.js +43 -20
package/dist/llm/provider_format/openai.js.map +1 -1
package/dist/llm/provider_format/openai.test.cjs +35 -0
package/dist/llm/provider_format/openai.test.cjs.map +1 -1
package/dist/llm/provider_format/openai.test.js +35 -0
package/dist/llm/provider_format/openai.test.js.map +1 -1
package/dist/llm/provider_format/utils.cjs +1 -1
package/dist/llm/provider_format/utils.cjs.map +1 -1
package/dist/llm/provider_format/utils.d.ts.map +1 -1
package/dist/llm/provider_format/utils.js +1 -1
package/dist/llm/provider_format/utils.js.map +1 -1
package/dist/voice/agent_activity.cjs +19 -19
package/dist/voice/agent_activity.cjs.map +1 -1
package/dist/voice/agent_activity.d.ts.map +1 -1
package/dist/voice/agent_activity.js +19 -19
package/dist/voice/agent_activity.js.map +1 -1
package/dist/voice/agent_session.cjs +64 -25
package/dist/voice/agent_session.cjs.map +1 -1
package/dist/voice/agent_session.d.cts +25 -1
package/dist/voice/agent_session.d.ts +25 -1
package/dist/voice/agent_session.d.ts.map +1 -1
package/dist/voice/agent_session.js +64 -25
package/dist/voice/agent_session.js.map +1 -1
package/dist/voice/background_audio.cjs.map +1 -1
package/dist/voice/generation.cjs +2 -1
package/dist/voice/generation.cjs.map +1 -1
package/dist/voice/generation.d.ts.map +1 -1
package/dist/voice/generation.js +2 -1
package/dist/voice/generation.js.map +1 -1
package/dist/voice/index.cjs +14 -1
package/dist/voice/index.cjs.map +1 -1
package/dist/voice/index.d.cts +1 -0
package/dist/voice/index.d.ts +1 -0
package/dist/voice/index.d.ts.map +1 -1
package/dist/voice/index.js +3 -1
package/dist/voice/index.js.map +1 -1
package/dist/voice/room_io/room_io.cjs +1 -0
package/dist/voice/room_io/room_io.cjs.map +1 -1
package/dist/voice/room_io/room_io.d.ts.map +1 -1
package/dist/voice/room_io/room_io.js +1 -0
package/dist/voice/room_io/room_io.js.map +1 -1
package/dist/voice/speech_handle.cjs +12 -3
package/dist/voice/speech_handle.cjs.map +1 -1
package/dist/voice/speech_handle.d.cts +12 -2
package/dist/voice/speech_handle.d.ts +12 -2
package/dist/voice/speech_handle.d.ts.map +1 -1
package/dist/voice/speech_handle.js +10 -2
package/dist/voice/speech_handle.js.map +1 -1
package/dist/voice/testing/index.cjs +52 -0
package/dist/voice/testing/index.cjs.map +1 -0
package/dist/voice/testing/index.d.cts +20 -0
package/dist/voice/testing/index.d.ts +20 -0
package/dist/voice/testing/index.d.ts.map +1 -0
package/dist/voice/testing/index.js +31 -0
package/dist/voice/testing/index.js.map +1 -0
package/dist/voice/testing/run_result.cjs +477 -0
package/dist/voice/testing/run_result.cjs.map +1 -0
package/dist/voice/testing/run_result.d.cts +226 -0
package/dist/voice/testing/run_result.d.ts +226 -0
package/dist/voice/testing/run_result.d.ts.map +1 -0
package/dist/voice/testing/run_result.js +451 -0
package/dist/voice/testing/run_result.js.map +1 -0
package/dist/voice/testing/types.cjs +46 -0
package/dist/voice/testing/types.cjs.map +1 -0
package/dist/voice/testing/types.d.cts +83 -0
package/dist/voice/testing/types.d.ts +83 -0
package/dist/voice/testing/types.d.ts.map +1 -0
package/dist/voice/testing/types.js +19 -0
package/dist/voice/testing/types.js.map +1 -0
package/package.json +3 -3
package/src/inference/llm.ts +42 -3
package/src/ipc/job_proc_lazy_main.ts +1 -1
package/src/llm/chat_context.ts +32 -2
package/src/llm/llm.ts +1 -0
package/src/llm/provider_format/openai.test.ts +40 -0
package/src/llm/provider_format/openai.ts +46 -19
package/src/llm/provider_format/utils.ts +5 -1
package/src/voice/agent_activity.ts +24 -22
package/src/voice/agent_session.ts +73 -28
package/src/voice/generation.ts +1 -0
package/src/voice/index.ts +1 -0
package/src/voice/room_io/room_io.ts +1 -0
package/src/voice/speech_handle.ts +24 -4
package/src/voice/testing/index.ts +49 -0
package/src/voice/testing/run_result.ts +576 -0
package/src/voice/testing/types.ts +118 -0

package/dist/voice/testing/types.d.ts ADDED Viewed

@@ -0,0 +1,83 @@
+import type { AgentHandoffItem, ChatMessage, ChatRole, FunctionCall, FunctionCallOutput } from '../../llm/chat_context.js';
+import type { Agent } from '../agent.js';
+/**
+ * Event representing an assistant or user message in the conversation.
+ */
+export interface ChatMessageEvent {
+    type: 'message';
+    item: ChatMessage;
+}
+/**
+ * Event representing a function/tool call initiated by the LLM.
+ */
+export interface FunctionCallEvent {
+    type: 'function_call';
+    item: FunctionCall;
+}
+/**
+ * Event representing the output/result of a function call.
+ */
+export interface FunctionCallOutputEvent {
+    type: 'function_call_output';
+    item: FunctionCallOutput;
+}
+/**
+ * Event representing an agent handoff (switching from one agent to another).
+ */
+export interface AgentHandoffEvent {
+    type: 'agent_handoff';
+    item: AgentHandoffItem;
+    oldAgent?: Agent;
+    newAgent: Agent;
+}
+/**
+ * Union type of all possible run events that can occur during a test run.
+ */
+export type RunEvent = ChatMessageEvent | FunctionCallEvent | FunctionCallOutputEvent | AgentHandoffEvent;
+/**
+ * Type guard to check if an event is a ChatMessageEvent.
+ */
+export declare function isChatMessageEvent(event: RunEvent): event is ChatMessageEvent;
+/**
+ * Type guard to check if an event is a FunctionCallEvent.
+ */
+export declare function isFunctionCallEvent(event: RunEvent): event is FunctionCallEvent;
+/**
+ * Type guard to check if an event is a FunctionCallOutputEvent.
+ */
+export declare function isFunctionCallOutputEvent(event: RunEvent): event is FunctionCallOutputEvent;
+/**
+ * Type guard to check if an event is an AgentHandoffEvent.
+ */
+export declare function isAgentHandoffEvent(event: RunEvent): event is AgentHandoffEvent;
+/**
+ * Options for message assertion.
+ */
+export interface MessageAssertOptions {
+    role?: ChatRole;
+}
+/**
+ * Options for function call assertion.
+ */
+export interface FunctionCallAssertOptions {
+    name?: string;
+    args?: Record<string, unknown>;
+}
+/**
+ * Options for function call output assertion.
+ */
+export interface FunctionCallOutputAssertOptions {
+    output?: string;
+    isError?: boolean;
+}
+/**
+ * Options for agent handoff assertion.
+ */
+export interface AgentHandoffAssertOptions {
+    newAgentType?: new (...args: any[]) => Agent;
+}
+/**
+ * Event type literals for type-safe event filtering.
+ */
+export type EventType = 'message' | 'function_call' | 'function_call_output' | 'agent_handoff';
+//# sourceMappingURL=types.d.ts.map

package/dist/voice/testing/types.d.ts.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../../src/voice/testing/types.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EACV,gBAAgB,EAChB,WAAW,EACX,QAAQ,EACR,YAAY,EACZ,kBAAkB,EACnB,MAAM,2BAA2B,CAAC;AACnC,OAAO,KAAK,EAAE,KAAK,EAAE,MAAM,aAAa,CAAC;AAEzC;;GAEG;AACH,MAAM,WAAW,gBAAgB;IAC/B,IAAI,EAAE,SAAS,CAAC;IAChB,IAAI,EAAE,WAAW,CAAC;CACnB;AAED;;GAEG;AACH,MAAM,WAAW,iBAAiB;IAChC,IAAI,EAAE,eAAe,CAAC;IACtB,IAAI,EAAE,YAAY,CAAC;CACpB;AAED;;GAEG;AACH,MAAM,WAAW,uBAAuB;IACtC,IAAI,EAAE,sBAAsB,CAAC;IAC7B,IAAI,EAAE,kBAAkB,CAAC;CAC1B;AAED;;GAEG;AACH,MAAM,WAAW,iBAAiB;IAChC,IAAI,EAAE,eAAe,CAAC;IACtB,IAAI,EAAE,gBAAgB,CAAC;IACvB,QAAQ,CAAC,EAAE,KAAK,CAAC;IACjB,QAAQ,EAAE,KAAK,CAAC;CACjB;AAED;;GAEG;AACH,MAAM,MAAM,QAAQ,GAChB,gBAAgB,GAChB,iBAAiB,GACjB,uBAAuB,GACvB,iBAAiB,CAAC;AAEtB;;GAEG;AACH,wBAAgB,kBAAkB,CAAC,KAAK,EAAE,QAAQ,GAAG,KAAK,IAAI,gBAAgB,CAE7E;AAED;;GAEG;AACH,wBAAgB,mBAAmB,CAAC,KAAK,EAAE,QAAQ,GAAG,KAAK,IAAI,iBAAiB,CAE/E;AAED;;GAEG;AACH,wBAAgB,yBAAyB,CAAC,KAAK,EAAE,QAAQ,GAAG,KAAK,IAAI,uBAAuB,CAE3F;AAED;;GAEG;AACH,wBAAgB,mBAAmB,CAAC,KAAK,EAAE,QAAQ,GAAG,KAAK,IAAI,iBAAiB,CAE/E;AAED;;GAEG;AACH,MAAM,WAAW,oBAAoB;IACnC,IAAI,CAAC,EAAE,QAAQ,CAAC;CACjB;AAED;;GAEG;AACH,MAAM,WAAW,yBAAyB;IACxC,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,IAAI,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;CAChC;AAED;;GAEG;AACH,MAAM,WAAW,+BAA+B;IAC9C,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,OAAO,CAAC,EAAE,OAAO,CAAC;CACnB;AAED;;GAEG;AACH,MAAM,WAAW,yBAAyB;IAExC,YAAY,CAAC,EAAE,KAAK,GAAG,IAAI,EAAE,GAAG,EAAE,KAAK,KAAK,CAAC;CAC9C;AAED;;GAEG;AACH,MAAM,MAAM,SAAS,GAAG,SAAS,GAAG,eAAe,GAAG,sBAAsB,GAAG,eAAe,CAAC"}

package/dist/voice/testing/types.js ADDED Viewed

@@ -0,0 +1,19 @@
+function isChatMessageEvent(event) {
+  return event.type === "message";
+}
+function isFunctionCallEvent(event) {
+  return event.type === "function_call";
+}
+function isFunctionCallOutputEvent(event) {
+  return event.type === "function_call_output";
+}
+function isAgentHandoffEvent(event) {
+  return event.type === "agent_handoff";
+}
+export {
+  isAgentHandoffEvent,
+  isChatMessageEvent,
+  isFunctionCallEvent,
+  isFunctionCallOutputEvent
+};
+//# sourceMappingURL=types.js.map

package/dist/voice/testing/types.js.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"sources":["../../../src/voice/testing/types.ts"],"sourcesContent":["// SPDX-FileCopyrightText: 2025 LiveKit, Inc.\n//\n// SPDX-License-Identifier: Apache-2.0\nimport type {\n AgentHandoffItem,\n ChatMessage,\n ChatRole,\n FunctionCall,\n FunctionCallOutput,\n} from '../../llm/chat_context.js';\nimport type { Agent } from '../agent.js';\n\n/**\n * Event representing an assistant or user message in the conversation.\n */\nexport interface ChatMessageEvent {\n type: 'message';\n item: ChatMessage;\n}\n\n/**\n * Event representing a function/tool call initiated by the LLM.\n */\nexport interface FunctionCallEvent {\n type: 'function_call';\n item: FunctionCall;\n}\n\n/**\n * Event representing the output/result of a function call.\n */\nexport interface FunctionCallOutputEvent {\n type: 'function_call_output';\n item: FunctionCallOutput;\n}\n\n/**\n * Event representing an agent handoff (switching from one agent to another).\n */\nexport interface AgentHandoffEvent {\n type: 'agent_handoff';\n item: AgentHandoffItem;\n oldAgent?: Agent;\n newAgent: Agent;\n}\n\n/**\n * Union type of all possible run events that can occur during a test run.\n */\nexport type RunEvent =\n | ChatMessageEvent\n | FunctionCallEvent\n | FunctionCallOutputEvent\n | AgentHandoffEvent;\n\n/**\n * Type guard to check if an event is a ChatMessageEvent.\n */\nexport function isChatMessageEvent(event: RunEvent): event is ChatMessageEvent {\n return event.type === 'message';\n}\n\n/**\n * Type guard to check if an event is a FunctionCallEvent.\n */\nexport function isFunctionCallEvent(event: RunEvent): event is FunctionCallEvent {\n return event.type === 'function_call';\n}\n\n/**\n * Type guard to check if an event is a FunctionCallOutputEvent.\n */\nexport function isFunctionCallOutputEvent(event: RunEvent): event is FunctionCallOutputEvent {\n return event.type === 'function_call_output';\n}\n\n/**\n * Type guard to check if an event is an AgentHandoffEvent.\n */\nexport function isAgentHandoffEvent(event: RunEvent): event is AgentHandoffEvent {\n return event.type === 'agent_handoff';\n}\n\n/**\n * Options for message assertion.\n */\nexport interface MessageAssertOptions {\n role?: ChatRole;\n}\n\n/**\n * Options for function call assertion.\n */\nexport interface FunctionCallAssertOptions {\n name?: string;\n args?: Record<string, unknown>;\n}\n\n/**\n * Options for function call output assertion.\n */\nexport interface FunctionCallOutputAssertOptions {\n output?: string;\n isError?: boolean;\n}\n\n/**\n * Options for agent handoff assertion.\n */\nexport interface AgentHandoffAssertOptions {\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n newAgentType?: new (...args: any[]) => Agent;\n}\n\n/**\n * Event type literals for type-safe event filtering.\n */\nexport type EventType = 'message' | 'function_call' | 'function_call_output' | 'agent_handoff';\n"],"mappings":"AA0DO,SAAS,mBAAmB,OAA4C;AAC7E,SAAO,MAAM,SAAS;AACxB;AAKO,SAAS,oBAAoB,OAA6C;AAC/E,SAAO,MAAM,SAAS;AACxB;AAKO,SAAS,0BAA0B,OAAmD;AAC3F,SAAO,MAAM,SAAS;AACxB;AAKO,SAAS,oBAAoB,OAA6C;AAC/E,SAAO,MAAM,SAAS;AACxB;","names":[]}

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@livekit/agents",
-  "version": "1.0.34",
+  "version": "1.0.36",
   "description": "LiveKit Agents - Node.js",
   "main": "dist/index.js",
   "require": "dist/index.cjs",
@@ -26,7 +26,7 @@
     "README.md"
   ],
   "devDependencies": {
-    "@livekit/rtc-node": "^0.13.22",
+    "@livekit/rtc-node": "^0.13.24",
     "@microsoft/api-extractor": "^7.35.0",
     "@types/fluent-ffmpeg": "^2.1.28",
     "@types/json-schema": "^7.0.15",
@@ -70,7 +70,7 @@
     "zod-to-json-schema": "^3.24.6"
   },
   "peerDependencies": {
-    "@livekit/rtc-node": "^0.13.22",
+    "@livekit/rtc-node": "^0.13.24",
     "zod": "^3.25.76 || ^4.1.8"
   },
   "scripts": {

package/src/inference/llm.ts CHANGED Viewed

@@ -27,7 +27,14 @@ export type OpenAIModels =
   | 'openai/gpt-4o-mini'
   | 'openai/gpt-oss-120b';
-export type GoogleModels = 'google/gemini-2.0-flash-lite';
+export type GoogleModels =
+  | 'google/gemini-3-pro-preview'
+  | 'google/gemini-3-flash-preview'
+  | 'google/gemini-2.5-pro'
+  | 'google/gemini-2.5-flash'
+  | 'google/gemini-2.5-flash-lite'
+  | 'google/gemini-2.0-flash'
+  | 'google/gemini-2.0-flash-lite';
 export type QwenModels = 'qwen/qwen3-235b-a22b-instruct';
@@ -235,6 +242,7 @@ export class LLMStream extends llm.LLMStream {
   private toolIndex?: number;
   private fncName?: string;
   private fncRawArguments?: string;
+  private toolExtra?: Record<string, unknown>;
   constructor(
     llm: LLM,
@@ -277,6 +285,7 @@ export class LLMStream extends llm.LLMStream {
     // (defined inside the run method to make sure the state is reset for each run/attempt)
     let retryable = true;
     this.toolCallId = this.fncName = this.fncRawArguments = this.toolIndex = undefined;
+    this.toolExtra = undefined;
     try {
       const messages = (await this.chatCtx.toProviderFormat(
@@ -428,6 +437,7 @@ export class LLMStream extends llm.LLMStream {
         if (this.toolCallId && tool.id && tool.index !== this.toolIndex) {
           callChunk = this.createRunningToolCallChunk(id, delta);
           this.toolCallId = this.fncName = this.fncRawArguments = undefined;
+          this.toolExtra = undefined;
         }
         // Start or continue building the current tool call
@@ -436,6 +446,10 @@ export class LLMStream extends llm.LLMStream {
           this.toolCallId = tool.id;
           this.fncName = tool.function.name;
           this.fncRawArguments = tool.function.arguments || '';
+          // Extract extra from tool call (e.g., Google thought signatures)
+          this.toolExtra =
+            // eslint-disable-next-line @typescript-eslint/no-explicit-any
+            ((tool as any).extra_content as Record<string, unknown> | undefined) ?? undefined;
         } else if (tool.function.arguments) {
           this.fncRawArguments = (this.fncRawArguments || '') + tool.function.arguments;
         }
@@ -454,11 +468,17 @@ export class LLMStream extends llm.LLMStream {
     ) {
       const callChunk = this.createRunningToolCallChunk(id, delta);
       this.toolCallId = this.fncName = this.fncRawArguments = undefined;
+      this.toolExtra = undefined;
       return callChunk;
     }
+    // Extract extra from delta (e.g., Google thought signatures on text parts)
+    const deltaExtra =
+      // eslint-disable-next-line @typescript-eslint/no-explicit-any
+      ((delta as any).extra_content as Record<string, unknown> | undefined) ?? undefined;
     // Regular content message
-    if (!delta.content) {
+    if (!delta.content && !deltaExtra) {
       return undefined;
     }
@@ -466,7 +486,8 @@ export class LLMStream extends llm.LLMStream {
       id,
       delta: {
         role: 'assistant',
-        content: delta.content,
+        content: delta.content || undefined,
+        extra: deltaExtra,
       },
     };
   }
@@ -475,19 +496,37 @@ export class LLMStream extends llm.LLMStream {
     id: string,
     delta: OpenAI.Chat.Completions.ChatCompletionChunk.Choice.Delta,
   ): llm.ChatChunk {
+    const toolExtra = this.toolExtra ? { ...this.toolExtra } : {};
+    const thoughtSignature = this.extractThoughtSignature(toolExtra);
+    const deltaExtra =
+      // eslint-disable-next-line @typescript-eslint/no-explicit-any
+      ((delta as any).extra_content as Record<string, unknown> | undefined) ?? undefined;
     return {
       id,
       delta: {
         role: 'assistant',
         content: delta.content || undefined,
+        extra: deltaExtra,
         toolCalls: [
           llm.FunctionCall.create({
             callId: this.toolCallId || '',
             name: this.fncName || '',
             args: this.fncRawArguments || '',
+            extra: toolExtra,
+            thoughtSignature,
           }),
         ],
       },
     };
   }
+  private extractThoughtSignature(extra?: Record<string, unknown>): string | undefined {
+    const googleExtra = extra?.google;
+    if (googleExtra && typeof googleExtra === 'object') {
+      // eslint-disable-next-line @typescript-eslint/no-explicit-any
+      return (googleExtra as any).thoughtSignature || (googleExtra as any).thought_signature;
+    }
+    return undefined;
+  }
 }

package/src/ipc/job_proc_lazy_main.ts CHANGED Viewed

@@ -136,7 +136,7 @@ const startJob = (
       shutdownTasks.push(callback());
     }
     await Promise.all(shutdownTasks).catch((error) =>
-      logger.error('error while shutting down the job', error),
+      logger.error({ error }, 'error while shutting down the job'),
     );
     process.send!({ case: 'done' });

package/src/llm/chat_context.ts CHANGED Viewed

@@ -189,6 +189,12 @@ export class FunctionCall {
   createdAt: number;
+  extra: Record<string, unknown>;
+  /**
+   * Optional grouping identifier for parallel tool calls.
+   */
+  groupId?: string;
   /**
    * Opaque signature for Gemini thinking mode.
    * When using Gemini 3+ models with thinking enabled, this signature must be
@@ -202,6 +208,8 @@ export class FunctionCall {
     args: string;
     id?: string;
     createdAt?: number;
+    extra?: Record<string, unknown>;
+    groupId?: string;
     thoughtSignature?: string;
   }) {
     const {
@@ -210,6 +218,8 @@ export class FunctionCall {
       args,
       id = shortuuid('item_'),
       createdAt = Date.now(),
+      extra = {},
+      groupId,
       thoughtSignature,
     } = params;
     this.id = id;
@@ -217,7 +227,15 @@ export class FunctionCall {
     this.args = args;
     this.name = name;
     this.createdAt = createdAt;
-    this.thoughtSignature = thoughtSignature;
+    this.extra = { ...extra };
+    this.groupId = groupId;
+    this.thoughtSignature =
+      thoughtSignature ??
+      (typeof this.extra.google === 'object' && this.extra.google !== null
+        ? // eslint-disable-next-line @typescript-eslint/no-explicit-any
+          (this.extra.google as any).thoughtSignature ||
+          (this.extra.google as any).thought_signature
+        : undefined);
   }
   static create(params: {
@@ -226,6 +244,8 @@ export class FunctionCall {
     args: string;
     id?: string;
     createdAt?: number;
+    extra?: Record<string, unknown>;
+    groupId?: string;
     thoughtSignature?: string;
   }) {
     return new FunctionCall(params);
@@ -241,6 +261,14 @@ export class FunctionCall {
       args: this.args,
     };
+    if (Object.keys(this.extra).length > 0) {
+      result.extra = this.extra as JSONValue;
+    }
+    if (this.groupId) {
+      result.groupId = this.groupId;
+    }
     if (this.thoughtSignature) {
       result.thoughtSignature = this.thoughtSignature;
     }
@@ -627,7 +655,9 @@ export class ChatContext {
           a.name !== b.name ||
           a.callId !== b.callId ||
           a.args !== b.args ||
-          a.thoughtSignature !== b.thoughtSignature
+          a.thoughtSignature !== b.thoughtSignature ||
+          a.groupId !== b.groupId ||
+          JSON.stringify(a.extra) !== JSON.stringify(b.extra)
         ) {
           return false;
         }

package/src/llm/llm.ts CHANGED Viewed

@@ -17,6 +17,7 @@ export interface ChoiceDelta {
   role: ChatRole;
   content?: string;
   toolCalls?: FunctionCall[];
+  extra?: Record<string, unknown>;
 }
 export interface CompletionUsage {

package/src/llm/provider_format/openai.test.ts CHANGED Viewed

@@ -258,6 +258,46 @@ describe('toChatCtx', () => {
     ]);
   });
+  it('should include provider-specific extra content on tool calls', async () => {
+    const ctx = ChatContext.empty();
+    const msg = ctx.addMessage({ role: 'assistant', content: 'Running tool' });
+    const toolCall = FunctionCall.create({
+      id: `${msg.id}/tool_1`,
+      callId: 'call_789',
+      name: 'google_call',
+      args: '{}',
+      extra: { google: { thoughtSignature: 'sig-123' } },
+    });
+    const toolOutput = FunctionCallOutput.create({
+      callId: 'call_789',
+      output: '{"result": "ok"}',
+      isError: false,
+    });
+    ctx.insert([toolCall, toolOutput]);
+    const result = await toChatCtx(ctx);
+    expect(result[0]).toEqual({
+      role: 'assistant',
+      content: 'Running tool',
+      tool_calls: [
+        {
+          type: 'function',
+          id: 'call_789',
+          function: { name: 'google_call', arguments: '{}' },
+          extra_content: { google: { thoughtSignature: 'sig-123' } },
+        },
+      ],
+    });
+    expect(result[1]).toEqual({
+      role: 'tool',
+      tool_call_id: 'call_789',
+      content: '{"result": "ok"}',
+    });
+  });
   it('should handle multiple tool calls in one message', async () => {
     const ctx = ChatContext.empty();

package/src/llm/provider_format/openai.ts CHANGED Viewed

@@ -17,11 +17,20 @@ export async function toChatCtx(chatCtx: ChatContext, injectDummyUserMessage: bo
       ? await toChatItem(group.message)
       : { role: 'assistant' };
-    const toolCalls = group.toolCalls.map((toolCall) => ({
-      type: 'function',
-      id: toolCall.callId,
-      function: { name: toolCall.name, arguments: toolCall.args },
-    }));
+    const toolCalls = group.toolCalls.map((toolCall) => {
+      const tc: Record<string, any> = {
+        type: 'function',
+        id: toolCall.callId,
+        function: { name: toolCall.name, arguments: toolCall.args },
+      };
+      // Include provider-specific extra content (e.g., Google thought signatures)
+      const googleExtra = getGoogleExtra(toolCall);
+      if (googleExtra) {
+        tc.extra_content = { google: googleExtra };
+      }
+      return tc;
+    });
     if (toolCalls.length > 0) {
       message['tool_calls'] = toolCalls;
@@ -53,24 +62,33 @@ async function toChatItem(item: ChatItem) {
       }
     }
-    const content =
-      listContent.length == 0
-        ? textContent
-        : textContent.length == 0
-          ? listContent
-          : [...listContent, { type: 'text', text: textContent }];
+    const result: Record<string, any> = { role: item.role };
+    if (listContent.length === 0) {
+      result.content = textContent;
+    } else {
+      if (textContent.length > 0) {
+        listContent.push({ type: 'text', text: textContent });
+      }
+      result.content = listContent;
+    }
-    return { role: item.role, content };
+    return result;
   } else if (item.type === 'function_call') {
+    const tc: Record<string, any> = {
+      id: item.callId,
+      type: 'function',
+      function: { name: item.name, arguments: item.args },
+    };
+    // Include provider-specific extra content (e.g., Google thought signatures)
+    const googleExtra = getGoogleExtra(item);
+    if (googleExtra) {
+      tc.extra_content = { google: googleExtra };
+    }
     return {
       role: 'assistant',
-      tool_calls: [
-        {
-          id: item.callId,
-          type: 'function',
-          function: { name: item.name, arguments: item.args },
-        },
-      ],
+      tool_calls: [tc],
     };
   } else if (item.type === 'function_call_output') {
     return {
@@ -84,6 +102,15 @@ async function toChatItem(item: ChatItem) {
   throw new Error(`Unsupported item type: ${item['type']}`);
 }
+function getGoogleExtra(
+  item: Partial<{ extra?: Record<string, unknown>; thoughtSignature?: string }>,
+): Record<string, unknown> | undefined {
+  const googleExtra =
+    (item.extra?.google as Record<string, unknown> | undefined) ||
+    (item.thoughtSignature ? { thoughtSignature: item.thoughtSignature } : undefined);
+  return googleExtra;
+}
 async function toImageContent(content: ImageContent) {
   const cacheKey = 'serialized_image'; // TODO: use hash of encoding options if available
   let serialized: SerializedImage;

package/src/llm/provider_format/utils.ts CHANGED Viewed

@@ -133,7 +133,11 @@ export function groupToolCalls(chatCtx: ChatContext) {
     if (isAssistantMessage || isFunctionCall) {
       // only assistant messages and function calls can be grouped
-      const groupId = item.id.split('/')[0]!;
+      // For function calls, use group_id if available (for parallel function calls),
+      // otherwise fall back to id-based grouping for backwards compatibility
+      const groupId =
+        item.type === 'function_call' && item.groupId ? item.groupId : item.id.split('/')[0]!;
       if (itemGroups[groupId] === undefined) {
         itemGroups[groupId] = ChatItemGroup.create();

package/src/voice/agent_activity.ts CHANGED Viewed

@@ -1350,11 +1350,14 @@ export class AgentActivity implements RecognitionHooks {
     );
     tasks.push(llmTask);
-    const [ttsTextInput, llmOutput] = llmGenData.textStream.tee();
     let ttsTask: Task<void> | null = null;
     let ttsStream: ReadableStream<AudioFrame> | null = null;
+    let llmOutput: ReadableStream<string>;
     if (audioOutput) {
+      // Only tee the stream when we need TTS
+      const [ttsTextInput, textOutput] = llmGenData.textStream.tee();
+      llmOutput = textOutput;
       [ttsTask, ttsStream] = performTTSInference(
         (...args) => this.agent.ttsNode(...args),
         ttsTextInput,
@@ -1362,6 +1365,9 @@ export class AgentActivity implements RecognitionHooks {
         replyAbortController,
       );
       tasks.push(ttsTask);
+    } else {
+      // No TTS needed, use the stream directly
+      llmOutput = llmGenData.textStream;
     }
     await speechHandle.waitIfNotInterrupted([speechHandle._waitForScheduled()]);
@@ -1421,12 +1427,16 @@ export class AgentActivity implements RecognitionHooks {
     //TODO(AJS-272): before executing tools, make sure we generated all the text
     // (this ensure everything is kept ordered)
-    const onToolExecutionStarted = (_: FunctionCall) => {
-      // TODO(brian): handle speech_handle item_added
+    const onToolExecutionStarted = (f: FunctionCall) => {
+      speechHandle._itemAdded([f]);
+      this.agent._chatCtx.items.push(f);
+      this.agentSession._toolItemsAdded([f]);
     };
-    const onToolExecutionCompleted = (_: ToolExecutionOutput) => {
-      // TODO(brian): handle speech_handle item_added
+    const onToolExecutionCompleted = (out: ToolExecutionOutput) => {
+      if (out.toolCallOutput) {
+        speechHandle._itemAdded([out.toolCallOutput]);
+      }
     };
     const [executeToolsTask, toolOutput] = performToolExecutions({
@@ -1501,6 +1511,7 @@ export class AgentActivity implements RecognitionHooks {
         });
         chatCtx.insert(message);
         this.agent._chatCtx.insert(message);
+        speechHandle._itemAdded([message]);
         this.agentSession._conversationItemAdded(message);
       }
@@ -1528,6 +1539,7 @@ export class AgentActivity implements RecognitionHooks {
       });
       chatCtx.insert(message);
       this.agent._chatCtx.insert(message);
+      speechHandle._itemAdded([message]);
       this.agentSession._conversationItemAdded(message);
       this.logger.info(
         { speech_id: speechHandle.id, message: textOut.text },
@@ -1612,28 +1624,18 @@ export class AgentActivity implements RecognitionHooks {
     if (shouldGenerateToolReply) {
       chatCtx.insert(toolMessages);
-      const handle = SpeechHandle.create({
-        allowInterruptions: speechHandle.allowInterruptions,
-        stepIndex: speechHandle._stepIndex + 1,
-        parent: speechHandle,
-      });
-      this.agentSession.emit(
-        AgentSessionEventTypes.SpeechCreated,
-        createSpeechCreatedEvent({
-          userInitiated: false,
-          source: 'tool_response',
-          speechHandle: handle,
-        }),
-      );
+      // Increment step count on SAME handle (parity with Python agent_activity.py L2081)
+      speechHandle._numSteps += 1;
       // Avoid setting tool_choice to "required" or a specific function when
       // passing tool response back to the LLM
       const respondToolChoice = draining || modelSettings.toolChoice === 'none' ? 'none' : 'auto';
+      // Reuse same speechHandle for tool response (parity with Python agent_activity.py L2122-2140)
       const toolResponseTask = this.createSpeechTask({
         task: Task.from(() =>
           this.pipelineReplyTask(
-            handle,
+            speechHandle,
             chatCtx,
             toolCtx,
             { toolChoice: respondToolChoice },
@@ -1643,13 +1645,13 @@ export class AgentActivity implements RecognitionHooks {
             toolMessages,
           ),
         ),
-        ownedSpeechHandle: handle,
+        ownedSpeechHandle: speechHandle,
         name: 'AgentActivity.pipelineReply',
       });
       toolResponseTask.finally(() => this.onPipelineReplyDone());
-      this.scheduleSpeech(handle, SpeechHandle.SPEECH_PRIORITY_NORMAL, true);
+      this.scheduleSpeech(speechHandle, SpeechHandle.SPEECH_PRIORITY_NORMAL, true);
     } else if (functionToolsExecutedEvent.functionCallOutputs.length > 0) {
       for (const msg of toolMessages) {
         msg.createdAt = replyStartedAt;