npm - @livekit/agents - Versions diffs - 1.0.33 → 1.0.35 - Mend

@livekit/agents 1.0.33 → 1.0.35

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (90) hide show

package/dist/cli.cjs.map +1 -1
package/dist/inference/api_protos.d.cts +4 -4
package/dist/inference/api_protos.d.ts +4 -4
package/dist/inference/llm.cjs +30 -5
package/dist/inference/llm.cjs.map +1 -1
package/dist/inference/llm.d.cts +3 -1
package/dist/inference/llm.d.ts +3 -1
package/dist/inference/llm.d.ts.map +1 -1
package/dist/inference/llm.js +30 -5
package/dist/inference/llm.js.map +1 -1
package/dist/ipc/inference_proc_executor.cjs.map +1 -1
package/dist/ipc/job_proc_executor.cjs.map +1 -1
package/dist/ipc/job_proc_lazy_main.cjs +1 -1
package/dist/ipc/job_proc_lazy_main.cjs.map +1 -1
package/dist/ipc/job_proc_lazy_main.js +1 -1
package/dist/ipc/job_proc_lazy_main.js.map +1 -1
package/dist/llm/chat_context.cjs +20 -2
package/dist/llm/chat_context.cjs.map +1 -1
package/dist/llm/chat_context.d.cts +9 -0
package/dist/llm/chat_context.d.ts +9 -0
package/dist/llm/chat_context.d.ts.map +1 -1
package/dist/llm/chat_context.js +20 -2
package/dist/llm/chat_context.js.map +1 -1
package/dist/llm/fallback_adapter.cjs +278 -0
package/dist/llm/fallback_adapter.cjs.map +1 -0
package/dist/llm/fallback_adapter.d.cts +73 -0
package/dist/llm/fallback_adapter.d.ts +73 -0
package/dist/llm/fallback_adapter.d.ts.map +1 -0
package/dist/llm/fallback_adapter.js +254 -0
package/dist/llm/fallback_adapter.js.map +1 -0
package/dist/llm/fallback_adapter.test.cjs +176 -0
package/dist/llm/fallback_adapter.test.cjs.map +1 -0
package/dist/llm/fallback_adapter.test.js +175 -0
package/dist/llm/fallback_adapter.test.js.map +1 -0
package/dist/llm/index.cjs +3 -0
package/dist/llm/index.cjs.map +1 -1
package/dist/llm/index.d.cts +1 -0
package/dist/llm/index.d.ts +1 -0
package/dist/llm/index.d.ts.map +1 -1
package/dist/llm/index.js +4 -0
package/dist/llm/index.js.map +1 -1
package/dist/llm/llm.cjs +1 -1
package/dist/llm/llm.cjs.map +1 -1
package/dist/llm/llm.d.cts +1 -0
package/dist/llm/llm.d.ts +1 -0
package/dist/llm/llm.d.ts.map +1 -1
package/dist/llm/llm.js +1 -1
package/dist/llm/llm.js.map +1 -1
package/dist/llm/provider_format/openai.cjs +43 -20
package/dist/llm/provider_format/openai.cjs.map +1 -1
package/dist/llm/provider_format/openai.d.ts.map +1 -1
package/dist/llm/provider_format/openai.js +43 -20
package/dist/llm/provider_format/openai.js.map +1 -1
package/dist/llm/provider_format/openai.test.cjs +35 -0
package/dist/llm/provider_format/openai.test.cjs.map +1 -1
package/dist/llm/provider_format/openai.test.js +35 -0
package/dist/llm/provider_format/openai.test.js.map +1 -1
package/dist/llm/provider_format/utils.cjs +1 -1
package/dist/llm/provider_format/utils.cjs.map +1 -1
package/dist/llm/provider_format/utils.d.ts.map +1 -1
package/dist/llm/provider_format/utils.js +1 -1
package/dist/llm/provider_format/utils.js.map +1 -1
package/dist/stt/stt.cjs +1 -1
package/dist/stt/stt.cjs.map +1 -1
package/dist/stt/stt.js +1 -1
package/dist/stt/stt.js.map +1 -1
package/dist/tts/tts.cjs +2 -2
package/dist/tts/tts.cjs.map +1 -1
package/dist/tts/tts.js +2 -2
package/dist/tts/tts.js.map +1 -1
package/dist/voice/background_audio.cjs.map +1 -1
package/dist/voice/generation.cjs +2 -1
package/dist/voice/generation.cjs.map +1 -1
package/dist/voice/generation.d.ts.map +1 -1
package/dist/voice/generation.js +2 -1
package/dist/voice/generation.js.map +1 -1
package/package.json +1 -1
package/src/inference/llm.ts +42 -5
package/src/ipc/job_proc_lazy_main.ts +1 -1
package/src/llm/chat_context.ts +32 -2
package/src/llm/fallback_adapter.test.ts +238 -0
package/src/llm/fallback_adapter.ts +391 -0
package/src/llm/index.ts +6 -0
package/src/llm/llm.ts +2 -1
package/src/llm/provider_format/openai.test.ts +40 -0
package/src/llm/provider_format/openai.ts +46 -19
package/src/llm/provider_format/utils.ts +5 -1
package/src/stt/stt.ts +1 -1
package/src/tts/tts.ts +2 -2
package/src/voice/generation.ts +1 -0

package/src/llm/fallback_adapter.ts ADDED Viewed

@@ -0,0 +1,391 @@
+// SPDX-FileCopyrightText: 2024 LiveKit, Inc.
+//
+// SPDX-License-Identifier: Apache-2.0
+import { APIConnectionError, APIError } from '../_exceptions.js';
+import { log } from '../log.js';
+import { type APIConnectOptions, DEFAULT_API_CONNECT_OPTIONS } from '../types.js';
+import type { ChatContext } from './chat_context.js';
+import type { ChatChunk } from './llm.js';
+import { LLM, LLMStream } from './llm.js';
+import type { ToolChoice, ToolContext } from './tool_context.js';
+/**
+ * Default connection options for FallbackAdapter.
+ * Uses max_retry=0 since fallback handles retries at a higher level.
+ */
+const DEFAULT_FALLBACK_API_CONNECT_OPTIONS: APIConnectOptions = {
+  maxRetry: 0,
+  timeoutMs: DEFAULT_API_CONNECT_OPTIONS.timeoutMs,
+  retryIntervalMs: DEFAULT_API_CONNECT_OPTIONS.retryIntervalMs,
+};
+/**
+ * Internal status tracking for each LLM instance.
+ */
+interface LLMStatus {
+  available: boolean;
+  recoveringTask: Promise<void> | null;
+}
+/**
+ * Event emitted when an LLM's availability changes.
+ */
+export interface AvailabilityChangedEvent {
+  llm: LLM;
+  available: boolean;
+}
+/**
+ * Options for creating a FallbackAdapter.
+ */
+export interface FallbackAdapterOptions {
+  /** List of LLM instances to fallback to (in order). */
+  llms: LLM[];
+  /** Timeout for each LLM attempt in seconds. Defaults to 5.0. */
+  attemptTimeout?: number;
+  /** Internal retries per LLM before moving to next. Defaults to 0. */
+  maxRetryPerLLM?: number;
+  /** Interval between retries in seconds. Defaults to 0.5. */
+  retryInterval?: number;
+  /** Whether to retry when LLM fails after chunks are sent. Defaults to false. */
+  retryOnChunkSent?: boolean;
+}
+/**
+ * FallbackAdapter is an LLM that can fallback to a different LLM if the current LLM fails.
+ *
+ * @example
+ * ```typescript
+ * const fallbackLLM = new FallbackAdapter({
+ *   llms: [primaryLLM, secondaryLLM, tertiaryLLM],
+ *   attemptTimeout: 5.0,
+ *   maxRetryPerLLM: 1,
+ * });
+ * ```
+ */
+export class FallbackAdapter extends LLM {
+  readonly llms: LLM[];
+  readonly attemptTimeout: number;
+  readonly maxRetryPerLLM: number;
+  readonly retryInterval: number;
+  readonly retryOnChunkSent: boolean;
+  /** @internal */
+  _status: LLMStatus[];
+  private logger = log();
+  constructor(options: FallbackAdapterOptions) {
+    super();
+    if (!options.llms || options.llms.length < 1) {
+      throw new Error('at least one LLM instance must be provided.');
+    }
+    this.llms = options.llms;
+    this.attemptTimeout = options.attemptTimeout ?? 5.0;
+    this.maxRetryPerLLM = options.maxRetryPerLLM ?? 0;
+    this.retryInterval = options.retryInterval ?? 0.5;
+    this.retryOnChunkSent = options.retryOnChunkSent ?? false;
+    // Initialize status for each LLM
+    this._status = this.llms.map(() => ({
+      available: true,
+      recoveringTask: null,
+    }));
+    // Forward metrics_collected events from child LLMs
+    for (const llm of this.llms) {
+      llm.on('metrics_collected', (metrics) => {
+        this.emit('metrics_collected', metrics);
+      });
+    }
+  }
+  get model(): string {
+    return 'FallbackAdapter';
+  }
+  label(): string {
+    return 'FallbackAdapter';
+  }
+  chat(opts: {
+    chatCtx: ChatContext;
+    toolCtx?: ToolContext;
+    connOptions?: APIConnectOptions;
+    parallelToolCalls?: boolean;
+    toolChoice?: ToolChoice;
+    extraKwargs?: Record<string, unknown>;
+  }): LLMStream {
+    return new FallbackLLMStream(this, {
+      chatCtx: opts.chatCtx,
+      toolCtx: opts.toolCtx,
+      connOptions: opts.connOptions || DEFAULT_FALLBACK_API_CONNECT_OPTIONS,
+      parallelToolCalls: opts.parallelToolCalls,
+      toolChoice: opts.toolChoice,
+      extraKwargs: opts.extraKwargs,
+    });
+  }
+  /**
+   * Emit availability changed event.
+   * @internal
+   */
+  _emitAvailabilityChanged(llm: LLM, available: boolean): void {
+    const event: AvailabilityChangedEvent = { llm, available };
+    // Use type assertion for custom event
+    (this as unknown as { emit: (event: string, data: AvailabilityChangedEvent) => void }).emit(
+      'llm_availability_changed',
+      event,
+    );
+  }
+}
+/**
+ * LLMStream implementation for FallbackAdapter.
+ * Handles fallback logic between multiple LLM providers.
+ */
+class FallbackLLMStream extends LLMStream {
+  private adapter: FallbackAdapter;
+  private parallelToolCalls?: boolean;
+  private toolChoice?: ToolChoice;
+  private extraKwargs?: Record<string, unknown>;
+  private _currentStream?: LLMStream;
+  private _log = log();
+  constructor(
+    adapter: FallbackAdapter,
+    opts: {
+      chatCtx: ChatContext;
+      toolCtx?: ToolContext;
+      connOptions: APIConnectOptions;
+      parallelToolCalls?: boolean;
+      toolChoice?: ToolChoice;
+      extraKwargs?: Record<string, unknown>;
+    },
+  ) {
+    super(adapter, {
+      chatCtx: opts.chatCtx,
+      toolCtx: opts.toolCtx,
+      connOptions: opts.connOptions,
+    });
+    this.adapter = adapter;
+    this.parallelToolCalls = opts.parallelToolCalls;
+    this.toolChoice = opts.toolChoice;
+    this.extraKwargs = opts.extraKwargs;
+  }
+  /**
+   * Override chatCtx to return current stream's context if available.
+   */
+  override get chatCtx(): ChatContext {
+    return this._currentStream?.chatCtx ?? super.chatCtx;
+  }
+  /**
+   * Try to generate with a single LLM.
+   * Returns an async generator that yields chunks.
+   */
+  private async *tryGenerate(
+    llm: LLM,
+    checkRecovery: boolean = false,
+  ): AsyncGenerator<ChatChunk, void, unknown> {
+    const connOptions: APIConnectOptions = {
+      ...this.connOptions,
+      maxRetry: this.adapter.maxRetryPerLLM,
+      timeoutMs: this.adapter.attemptTimeout * 1000,
+      retryIntervalMs: this.adapter.retryInterval * 1000,
+    };
+    const stream = llm.chat({
+      chatCtx: super.chatCtx,
+      toolCtx: this.toolCtx,
+      connOptions,
+      parallelToolCalls: this.parallelToolCalls,
+      toolChoice: this.toolChoice,
+      extraKwargs: this.extraKwargs,
+    });
+    // Listen for error events - child LLMs emit errors via their LLM instance, not the stream
+    let streamError: Error | undefined;
+    const errorHandler = (ev: { error: Error }) => {
+      streamError = ev.error;
+    };
+    llm.on('error', errorHandler);
+    try {
+      let shouldSetCurrent = !checkRecovery;
+      for await (const chunk of stream) {
+        if (shouldSetCurrent) {
+          shouldSetCurrent = false;
+          this._currentStream = stream;
+        }
+        yield chunk;
+      }
+      // If an error was emitted but not thrown through iteration, throw it now
+      if (streamError) {
+        throw streamError;
+      }
+    } catch (error) {
+      if (error instanceof APIError) {
+        if (checkRecovery) {
+          this._log.warn({ llm: llm.label(), error }, 'recovery failed');
+        } else {
+          this._log.warn({ llm: llm.label(), error }, 'failed, switching to next LLM');
+        }
+        throw error;
+      }
+      // Handle timeout errors
+      if (error instanceof Error && error.name === 'AbortError') {
+        if (checkRecovery) {
+          this._log.warn({ llm: llm.label() }, 'recovery timed out');
+        } else {
+          this._log.warn({ llm: llm.label() }, 'timed out, switching to next LLM');
+        }
+        throw error;
+      }
+      // Unexpected error
+      if (checkRecovery) {
+        this._log.error({ llm: llm.label(), error }, 'recovery unexpected error');
+      } else {
+        this._log.error({ llm: llm.label(), error }, 'unexpected error, switching to next LLM');
+      }
+      throw error;
+    } finally {
+      llm.off('error', errorHandler);
+    }
+  }
+  /**
+   * Start background recovery task for an LLM.
+   */
+  private tryRecovery(llm: LLM, index: number): void {
+    const status = this.adapter._status[index]!;
+    // Skip if already recovering
+    if (status.recoveringTask !== null) {
+      return;
+    }
+    const recoverTask = async (): Promise<void> => {
+      try {
+        // Try to generate (just iterate to check if it works)
+        // eslint-disable-next-line @typescript-eslint/no-unused-vars
+        for await (const _chunk of this.tryGenerate(llm, true)) {
+          // Just consume the stream to verify it works
+        }
+        // Recovery successful
+        status.available = true;
+        this._log.info({ llm: llm.label() }, 'LLM recovered');
+        this.adapter._emitAvailabilityChanged(llm, true);
+      } catch {
+        // Recovery failed, stay unavailable
+      } finally {
+        status.recoveringTask = null;
+      }
+    };
+    // Fire and forget
+    status.recoveringTask = recoverTask();
+  }
+  /**
+   * Main run method - iterates through LLMs with fallback logic.
+   */
+  protected async run(): Promise<void> {
+    const startTime = Date.now();
+    // Check if all LLMs are unavailable
+    const allFailed = this.adapter._status.every((s) => !s.available);
+    if (allFailed) {
+      this._log.error('all LLMs are unavailable, retrying...');
+    }
+    for (let i = 0; i < this.adapter.llms.length; i++) {
+      const llm = this.adapter.llms[i]!;
+      const status = this.adapter._status[i]!;
+      this._log.debug(
+        { llm: llm.label(), index: i, available: status.available, allFailed },
+        'checking LLM',
+      );
+      if (status.available || allFailed) {
+        let textSent = '';
+        const toolCallsSent: string[] = [];
+        try {
+          this._log.info({ llm: llm.label() }, 'FallbackAdapter: Attempting provider');
+          let chunkCount = 0;
+          for await (const chunk of this.tryGenerate(llm, false)) {
+            chunkCount++;
+            // Track what's been sent
+            if (chunk.delta) {
+              if (chunk.delta.content) {
+                textSent += chunk.delta.content;
+              }
+              if (chunk.delta.toolCalls) {
+                for (const tc of chunk.delta.toolCalls) {
+                  if (tc.name) {
+                    toolCallsSent.push(tc.name);
+                  }
+                }
+              }
+            }
+            // Forward chunk to queue
+            this._log.debug({ llm: llm.label(), chunkCount }, 'run: forwarding chunk to queue');
+            this.queue.put(chunk);
+          }
+          // Success!
+          this._log.info(
+            { llm: llm.label(), totalChunks: chunkCount, textLength: textSent.length },
+            'FallbackAdapter: Provider succeeded',
+          );
+          return;
+        } catch (error) {
+          // Mark as unavailable if it was available before
+          if (status.available) {
+            status.available = false;
+            this.adapter._emitAvailabilityChanged(llm, false);
+          }
+          // Check if we sent data before failing
+          if (textSent || toolCallsSent.length > 0) {
+            const extra = { textSent, toolCallsSent };
+            if (!this.adapter.retryOnChunkSent) {
+              this._log.error(
+                { llm: llm.label(), ...extra },
+                'failed after sending chunk, skip retrying. Set `retryOnChunkSent` to `true` to enable.',
+              );
+              throw error;
+            }
+            this._log.warn(
+              { llm: llm.label(), ...extra },
+              'failed after sending chunk, retrying...',
+            );
+          }
+        }
+      }
+      // Trigger background recovery for this LLM
+      this.tryRecovery(llm, i);
+    }
+    // All LLMs failed
+    const duration = (Date.now() - startTime) / 1000;
+    const labels = this.adapter.llms.map((l) => l.label()).join(', ');
+    throw new APIConnectionError({
+      message: `all LLMs failed (${labels}) after ${duration.toFixed(2)}s`,
+    });
+  }
+}

package/src/llm/index.ts CHANGED Viewed

@@ -66,3 +66,9 @@ export {
   toJsonSchema,
   type OpenAIFunctionParameters,
 } from './utils.js';
+export {
+  FallbackAdapter,
+  type AvailabilityChangedEvent,
+  type FallbackAdapterOptions,
+} from './fallback_adapter.js';

package/src/llm/llm.ts CHANGED Viewed

@@ -17,6 +17,7 @@ export interface ChoiceDelta {
   role: ChatRole;
   content?: string;
   toolCalls?: FunctionCall[];
+  extra?: Record<string, unknown>;
 }
 export interface CompletionUsage {
@@ -135,7 +136,7 @@ export abstract class LLMStream implements AsyncIterableIterator<ChatChunk> {
     // is run **after** the constructor has finished. Otherwise we get
     // runtime error when trying to access class variables in the
     // `run` method.
-    startSoon(() => this.mainTask().then(() => this.queue.close()));
+    startSoon(() => this.mainTask().finally(() => this.queue.close()));
   }
   private _mainTaskImpl = async (span: Span) => {

package/src/llm/provider_format/openai.test.ts CHANGED Viewed

@@ -258,6 +258,46 @@ describe('toChatCtx', () => {
     ]);
   });
+  it('should include provider-specific extra content on tool calls', async () => {
+    const ctx = ChatContext.empty();
+    const msg = ctx.addMessage({ role: 'assistant', content: 'Running tool' });
+    const toolCall = FunctionCall.create({
+      id: `${msg.id}/tool_1`,
+      callId: 'call_789',
+      name: 'google_call',
+      args: '{}',
+      extra: { google: { thoughtSignature: 'sig-123' } },
+    });
+    const toolOutput = FunctionCallOutput.create({
+      callId: 'call_789',
+      output: '{"result": "ok"}',
+      isError: false,
+    });
+    ctx.insert([toolCall, toolOutput]);
+    const result = await toChatCtx(ctx);
+    expect(result[0]).toEqual({
+      role: 'assistant',
+      content: 'Running tool',
+      tool_calls: [
+        {
+          type: 'function',
+          id: 'call_789',
+          function: { name: 'google_call', arguments: '{}' },
+          extra_content: { google: { thoughtSignature: 'sig-123' } },
+        },
+      ],
+    });
+    expect(result[1]).toEqual({
+      role: 'tool',
+      tool_call_id: 'call_789',
+      content: '{"result": "ok"}',
+    });
+  });
   it('should handle multiple tool calls in one message', async () => {
     const ctx = ChatContext.empty();

package/src/llm/provider_format/openai.ts CHANGED Viewed

@@ -17,11 +17,20 @@ export async function toChatCtx(chatCtx: ChatContext, injectDummyUserMessage: bo
       ? await toChatItem(group.message)
       : { role: 'assistant' };
-    const toolCalls = group.toolCalls.map((toolCall) => ({
-      type: 'function',
-      id: toolCall.callId,
-      function: { name: toolCall.name, arguments: toolCall.args },
-    }));
+    const toolCalls = group.toolCalls.map((toolCall) => {
+      const tc: Record<string, any> = {
+        type: 'function',
+        id: toolCall.callId,
+        function: { name: toolCall.name, arguments: toolCall.args },
+      };
+      // Include provider-specific extra content (e.g., Google thought signatures)
+      const googleExtra = getGoogleExtra(toolCall);
+      if (googleExtra) {
+        tc.extra_content = { google: googleExtra };
+      }
+      return tc;
+    });
     if (toolCalls.length > 0) {
       message['tool_calls'] = toolCalls;
@@ -53,24 +62,33 @@ async function toChatItem(item: ChatItem) {
       }
     }
-    const content =
-      listContent.length == 0
-        ? textContent
-        : textContent.length == 0
-          ? listContent
-          : [...listContent, { type: 'text', text: textContent }];
+    const result: Record<string, any> = { role: item.role };
+    if (listContent.length === 0) {
+      result.content = textContent;
+    } else {
+      if (textContent.length > 0) {
+        listContent.push({ type: 'text', text: textContent });
+      }
+      result.content = listContent;
+    }
-    return { role: item.role, content };
+    return result;
   } else if (item.type === 'function_call') {
+    const tc: Record<string, any> = {
+      id: item.callId,
+      type: 'function',
+      function: { name: item.name, arguments: item.args },
+    };
+    // Include provider-specific extra content (e.g., Google thought signatures)
+    const googleExtra = getGoogleExtra(item);
+    if (googleExtra) {
+      tc.extra_content = { google: googleExtra };
+    }
     return {
       role: 'assistant',
-      tool_calls: [
-        {
-          id: item.callId,
-          type: 'function',
-          function: { name: item.name, arguments: item.args },
-        },
-      ],
+      tool_calls: [tc],
     };
   } else if (item.type === 'function_call_output') {
     return {
@@ -84,6 +102,15 @@ async function toChatItem(item: ChatItem) {
   throw new Error(`Unsupported item type: ${item['type']}`);
 }
+function getGoogleExtra(
+  item: Partial<{ extra?: Record<string, unknown>; thoughtSignature?: string }>,
+): Record<string, unknown> | undefined {
+  const googleExtra =
+    (item.extra?.google as Record<string, unknown> | undefined) ||
+    (item.thoughtSignature ? { thoughtSignature: item.thoughtSignature } : undefined);
+  return googleExtra;
+}
 async function toImageContent(content: ImageContent) {
   const cacheKey = 'serialized_image'; // TODO: use hash of encoding options if available
   let serialized: SerializedImage;

package/src/llm/provider_format/utils.ts CHANGED Viewed

@@ -133,7 +133,11 @@ export function groupToolCalls(chatCtx: ChatContext) {
     if (isAssistantMessage || isFunctionCall) {
       // only assistant messages and function calls can be grouped
-      const groupId = item.id.split('/')[0]!;
+      // For function calls, use group_id if available (for parallel function calls),
+      // otherwise fall back to id-based grouping for backwards compatibility
+      const groupId =
+        item.type === 'function_call' && item.groupId ? item.groupId : item.id.split('/')[0]!;
       if (itemGroups[groupId] === undefined) {
         itemGroups[groupId] = ChatItemGroup.create();

package/src/stt/stt.ts CHANGED Viewed

@@ -195,7 +195,7 @@ export abstract class SpeechStream implements AsyncIterableIterator<SpeechEvent>
     // is run **after** the constructor has finished. Otherwise we get
     // runtime error when trying to access class variables in the
     // `run` method.
-    startSoon(() => this.mainTask().then(() => this.queue.close()));
+    startSoon(() => this.mainTask().finally(() => this.queue.close()));
   }
   private async mainTask() {

package/src/tts/tts.ts CHANGED Viewed

@@ -169,7 +169,7 @@ export abstract class SynthesizeStream
     // is run **after** the constructor has finished. Otherwise we get
     // runtime error when trying to access class variables in the
     // `run` method.
-    startSoon(() => this.mainTask().then(() => this.queue.close()));
+    startSoon(() => this.mainTask().finally(() => this.queue.close()));
   }
   private _mainTaskImpl = async (span: Span) => {
@@ -448,7 +448,7 @@ export abstract class ChunkedStream implements AsyncIterableIterator<Synthesized
     // is run **after** the constructor has finished. Otherwise we get
     // runtime error when trying to access class variables in the
     // `run` method.
-    Promise.resolve().then(() => this.mainTask().then(() => this.queue.close()));
+    Promise.resolve().then(() => this.mainTask().finally(() => this.queue.close()));
   }
   private _mainTaskImpl = async (span: Span) => {

package/src/voice/generation.ts CHANGED Viewed

@@ -444,6 +444,7 @@ export function performLLMInference(
                 args: tool.args,
                 // Preserve thought signature for Gemini 3+ thinking mode
                 thoughtSignature: tool.thoughtSignature,
+                extra: tool.extra || {},
               });
               data.generatedToolCalls.push(toolCall);