npm - @livekit/agents-plugin-openai - Versions diffs - 0.9.3 → 1.0.0-next.0 - Mend

@livekit/agents-plugin-openai 0.9.3 → 1.0.0-next.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (47) hide show

package/dist/index.cjs +16 -5
package/dist/index.cjs.map +1 -1
package/dist/index.d.cts +4 -4
package/dist/index.d.ts +4 -4
package/dist/index.d.ts.map +1 -1
package/dist/index.js +14 -3
package/dist/index.js.map +1 -1
package/dist/llm.cjs +156 -197
package/dist/llm.cjs.map +1 -1
package/dist/llm.d.cts +27 -8
package/dist/llm.d.ts +27 -8
package/dist/llm.d.ts.map +1 -1
package/dist/llm.js +164 -188
package/dist/llm.js.map +1 -1
package/dist/models.cjs +14 -0
package/dist/models.cjs.map +1 -1
package/dist/models.d.cts +11 -6
package/dist/models.d.ts +11 -6
package/dist/models.d.ts.map +1 -1
package/dist/models.js +6 -0
package/dist/models.js.map +1 -1
package/dist/realtime/api_proto.cjs.map +1 -1
package/dist/realtime/api_proto.d.cts +15 -0
package/dist/realtime/api_proto.d.ts +15 -0
package/dist/realtime/api_proto.d.ts.map +1 -1
package/dist/realtime/api_proto.js.map +1 -1
package/dist/realtime/realtime_model.cjs +1057 -820
package/dist/realtime/realtime_model.cjs.map +1 -1
package/dist/realtime/realtime_model.d.cts +126 -160
package/dist/realtime/realtime_model.d.ts +126 -160
package/dist/realtime/realtime_model.d.ts.map +1 -1
package/dist/realtime/realtime_model.js +1067 -825
package/dist/realtime/realtime_model.js.map +1 -1
package/dist/tts.cjs +5 -5
package/dist/tts.cjs.map +1 -1
package/dist/tts.d.cts +2 -1
package/dist/tts.d.ts +2 -1
package/dist/tts.d.ts.map +1 -1
package/dist/tts.js +6 -6
package/dist/tts.js.map +1 -1
package/package.json +9 -7
package/src/index.ts +19 -5
package/src/llm.ts +227 -228
package/src/models.ts +83 -5
package/src/realtime/api_proto.ts +15 -1
package/src/realtime/realtime_model.ts +1305 -996
package/src/tts.ts +6 -6

package/src/llm.ts CHANGED Viewed

@@ -1,10 +1,16 @@
-// SPDX-FileCopyrightText: 2024 LiveKit, Inc.
+// SPDX-FileCopyrightText: 2025 LiveKit, Inc.
 //
 // SPDX-License-Identifier: Apache-2.0
-import { llm, log } from '@livekit/agents';
-import { randomUUID } from 'node:crypto';
+import type { APIConnectOptions } from '@livekit/agents';
+import {
+  APIConnectionError,
+  APIStatusError,
+  APITimeoutError,
+  DEFAULT_API_CONNECT_OPTIONS,
+  llm,
+  toError,
+} from '@livekit/agents';
 import { AzureOpenAI, OpenAI } from 'openai';
-import sharp from 'sharp';
 import type {
   CerebrasChatModels,
   ChatModels,
@@ -25,21 +31,29 @@ export interface LLMOptions {
   user?: string;
   temperature?: number;
   client?: OpenAI;
+  toolChoice?: llm.ToolChoice;
+  parallelToolCalls?: boolean;
+  metadata?: Record<string, string>;
+  maxCompletionTokens?: number;
+  serviceTier?: string;
+  store?: boolean;
 }
 const defaultLLMOptions: LLMOptions = {
-  model: 'gpt-4o',
+  model: 'gpt-4.1',
   apiKey: process.env.OPENAI_API_KEY,
+  parallelToolCalls: true,
 };
 const defaultAzureLLMOptions: LLMOptions = {
-  model: 'gpt-4o',
+  model: 'gpt-4.1',
   apiKey: process.env.AZURE_API_KEY,
 };
 export class LLM extends llm.LLM {
   #opts: LLMOptions;
   #client: OpenAI;
+  #providerFmt: llm.ProviderFormat;
   /**
    * Create a new instance of OpenAI LLM.
@@ -48,10 +62,14 @@ export class LLM extends llm.LLM {
    * `apiKey` must be set to your OpenAI API key, either using the argument or by setting the
    * `OPENAI_API_KEY` environmental variable.
    */
-  constructor(opts: Partial<LLMOptions> = defaultLLMOptions) {
+  constructor(
+    opts: Partial<LLMOptions> = defaultLLMOptions,
+    providerFmt: llm.ProviderFormat = 'openai',
+  ) {
     super();
     this.#opts = { ...defaultLLMOptions, ...opts };
+    this.#providerFmt = providerFmt;
     if (this.#opts.apiKey === undefined) {
       throw new Error('OpenAI API key is required, whether as an argument or as $OPENAI_API_KEY');
     }
@@ -64,6 +82,14 @@ export class LLM extends llm.LLM {
       });
   }
+  label(): string {
+    return 'openai.LLM';
+  }
+  get model(): string {
+    return this.#opts.model;
+  }
   /**
    * Create a new instance of OpenAI LLM with Azure.
    *
@@ -415,29 +441,65 @@ export class LLM extends llm.LLM {
   chat({
     chatCtx,
-    fncCtx,
-    temperature,
-    n,
+    toolCtx,
+    connOptions = DEFAULT_API_CONNECT_OPTIONS,
     parallelToolCalls,
+    toolChoice,
+    extraKwargs,
   }: {
     chatCtx: llm.ChatContext;
-    fncCtx?: llm.FunctionContext | undefined;
-    temperature?: number | undefined;
-    n?: number | undefined;
-    parallelToolCalls?: boolean | undefined;
+    toolCtx?: llm.ToolContext;
+    connOptions?: APIConnectOptions;
+    parallelToolCalls?: boolean;
+    toolChoice?: llm.ToolChoice;
+    extraKwargs?: Record<string, any>;
   }): LLMStream {
-    temperature = temperature || this.#opts.temperature;
+    const extras: Record<string, any> = { ...extraKwargs }; // eslint-disable-line @typescript-eslint/no-explicit-any
+    if (this.#opts.metadata) {
+      extras.metadata = this.#opts.metadata;
+    }
+    if (this.#opts.user) {
+      extras.user = this.#opts.user;
+    }
+    if (this.#opts.maxCompletionTokens) {
+      extras.max_completion_tokens = this.#opts.maxCompletionTokens;
+    }
+    if (this.#opts.temperature) {
+      extras.temperature = this.#opts.temperature;
+    }
+    if (this.#opts.serviceTier) {
+      extras.service_tier = this.#opts.serviceTier;
+    }
+    if (this.#opts.store !== undefined) {
+      extras.store = this.#opts.store;
+    }
+    parallelToolCalls =
+      parallelToolCalls !== undefined ? parallelToolCalls : this.#opts.parallelToolCalls;
+    if (toolCtx && Object.keys(toolCtx).length > 0 && parallelToolCalls !== undefined) {
+      extras.parallel_tool_calls = parallelToolCalls;
+    }
-    return new LLMStream(
-      this,
-      this.#client,
+    toolChoice = toolChoice !== undefined ? toolChoice : this.#opts.toolChoice;
+    if (toolChoice) {
+      extras.tool_choice = toolChoice;
+    }
+    return new LLMStream(this, {
+      model: this.#opts.model,
+      providerFmt: this.#providerFmt,
+      client: this.#client,
       chatCtx,
-      fncCtx,
-      this.#opts,
-      parallelToolCalls,
-      temperature,
-      n,
-    );
+      toolCtx,
+      connOptions,
+      extraKwargs: extras,
+    });
   }
 }
@@ -445,78 +507,113 @@ export class LLMStream extends llm.LLMStream {
   #toolCallId?: string;
   #fncName?: string;
   #fncRawArguments?: string;
+  #toolIndex?: number;
   #client: OpenAI;
-  #logger = log();
-  #id = randomUUID();
-  label = 'openai.LLMStream';
+  #providerFmt: llm.ProviderFormat;
+  #extraKwargs: Record<string, any>;
+  private model: string | ChatModels;
   constructor(
     llm: LLM,
-    client: OpenAI,
-    chatCtx: llm.ChatContext,
-    fncCtx: llm.FunctionContext | undefined,
-    opts: LLMOptions,
-    parallelToolCalls?: boolean,
-    temperature?: number,
-    n?: number,
+    {
+      model,
+      providerFmt,
+      client,
+      chatCtx,
+      toolCtx,
+      connOptions,
+      extraKwargs,
+    }: {
+      model: string | ChatModels;
+      providerFmt: llm.ProviderFormat;
+      client: OpenAI;
+      chatCtx: llm.ChatContext;
+      toolCtx?: llm.ToolContext;
+      connOptions: APIConnectOptions;
+      extraKwargs: Record<string, any>;
+    },
   ) {
-    super(llm, chatCtx, fncCtx);
+    super(llm, { chatCtx, toolCtx, connOptions });
     this.#client = client;
-    this.#run(opts, n, parallelToolCalls, temperature);
+    this.#providerFmt = providerFmt;
+    this.#extraKwargs = extraKwargs;
+    this.model = model;
   }
-  async #run(opts: LLMOptions, n?: number, parallelToolCalls?: boolean, temperature?: number) {
-    const tools = this.fncCtx
-      ? Object.entries(this.fncCtx).map(([name, func]) => ({
-          type: 'function' as const,
-          function: {
-            name,
-            description: func.description,
-            // don't format parameters if they are raw openai params
-            parameters:
-              func.parameters.type == ('object' as const)
-                ? func.parameters
-                : llm.oaiParams(func.parameters),
-          },
-        }))
-      : undefined;
+  protected async run(): Promise<void> {
+    let retryable = true;
     try {
+      const messages = (await this.chatCtx.toProviderFormat(
+        this.#providerFmt,
+      )) as OpenAI.ChatCompletionMessageParam[];
+      const tools = this.toolCtx
+        ? Object.entries(this.toolCtx).map(([name, func]) => ({
+            type: 'function' as const,
+            function: {
+              name,
+              description: func.description,
+              parameters: llm.toJsonSchema(
+                func.parameters,
+              ) as unknown as OpenAI.Chat.Completions.ChatCompletionTool['function']['parameters'],
+            },
+          }))
+        : undefined;
       const stream = await this.#client.chat.completions.create({
-        model: opts.model,
-        user: opts.user,
-        n,
-        messages: await Promise.all(
-          this.chatCtx.messages.map(async (m) => await buildMessage(m, this.#id)),
-        ),
-        temperature: temperature || opts.temperature,
-        stream_options: { include_usage: true },
-        stream: true,
+        model: this.model,
+        messages,
         tools,
-        parallel_tool_calls: this.fncCtx && parallelToolCalls,
+        stream: true,
+        stream_options: { include_usage: true },
+        ...this.#extraKwargs,
       });
       for await (const chunk of stream) {
         for (const choice of chunk.choices) {
+          if (this.abortController.signal.aborted) {
+            break;
+          }
           const chatChunk = this.#parseChoice(chunk.id, choice);
           if (chatChunk) {
+            retryable = false;
             this.queue.put(chatChunk);
           }
+        }
-          if (chunk.usage) {
-            const usage = chunk.usage;
-            this.queue.put({
-              requestId: chunk.id,
-              choices: [],
-              usage: {
-                completionTokens: usage.completion_tokens,
-                promptTokens: usage.prompt_tokens,
-                totalTokens: usage.total_tokens,
-              },
-            });
-          }
+        if (chunk.usage) {
+          const usage = chunk.usage;
+          retryable = false;
+          this.queue.put({
+            id: chunk.id,
+            usage: {
+              completionTokens: usage.completion_tokens,
+              promptTokens: usage.prompt_tokens,
+              promptCachedTokens: usage.prompt_tokens_details?.cached_tokens || 0,
+              totalTokens: usage.total_tokens,
+            },
+          });
         }
       }
+    } catch (error) {
+      if (error instanceof OpenAI.APIConnectionTimeoutError) {
+        throw new APITimeoutError({ options: { retryable } });
+      } else if (error instanceof OpenAI.APIError) {
+        throw new APIStatusError({
+          message: error.message,
+          options: {
+            statusCode: error.status,
+            body: error.error,
+            requestId: error.request_id,
+            retryable,
+          },
+        });
+      } else {
+        throw new APIConnectionError({
+          message: toError(error).message,
+          options: { retryable },
+        });
+      }
     } finally {
       this.queue.close();
     }
@@ -525,6 +622,10 @@ export class LLMStream extends llm.LLMStream {
   #parseChoice(id: string, choice: OpenAI.ChatCompletionChunk.Choice): llm.ChatChunk | undefined {
     const delta = choice.delta;
+    // https://github.com/livekit/agents/issues/688
+    // the delta can be None when using Azure OpenAI (content filtering)
+    if (delta === undefined) return undefined;
     if (delta.tool_calls) {
       // check if we have functions to calls
       for (const tool of delta.tool_calls) {
@@ -532,17 +633,38 @@ export class LLMStream extends llm.LLMStream {
           continue; // oai may add other tools in the future
         }
+        /**
+         * The way OpenAI streams tool calls is a bit tricky.
+         *
+         * For any new tool call, it first emits a delta tool call with id, and function name,
+         * the rest of the delta chunks will only stream the remaining arguments string,
+         * until a new tool call is started or the tool call is finished.
+         * See below for an example.
+         *
+         * Choice(delta=ChoiceDelta(content=None, function_call=None, refusal=None, role='assistant', tool_calls=None), finish_reason=None, index=0, logprobs=None)
+         * [ChoiceDeltaToolCall(index=0, id='call_LaVeHWUHpef9K1sd5UO8TtLg', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')]
+         * [ChoiceDeltaToolCall(index=0, id=None, function=ChoiceDeltaToolCallFunction(arguments='{"location": "P', name=None), type=None)]
+         * [ChoiceDeltaToolCall(index=0, id=None, function=ChoiceDeltaToolCallFunction(arguments='aris}', name=None), type=None)]
+         * [ChoiceDeltaToolCall(index=1, id='call_ThU4OmMdQXnnVmpXGOCknXIB', function=ChoiceDeltaToolCallFunction(arguments='', name='get_weather'), type='function')]
+         * [ChoiceDeltaToolCall(index=1, id=None, function=ChoiceDeltaToolCallFunction(arguments='{"location": "T', name=None), type=None)]
+         * [ChoiceDeltaToolCall(index=1, id=None, function=ChoiceDeltaToolCallFunction(arguments='okyo', name=None), type=None)]
+         * Choice(delta=ChoiceDelta(content=None, function_call=None, refusal=None, role=None, tool_calls=None), finish_reason='tool_calls', index=0, logprobs=None)
+         */
         let callChunk: llm.ChatChunk | undefined;
-        if (this.#toolCallId && tool.id && tool.id !== this.#toolCallId) {
-          callChunk = this.#tryBuildFunction(id, choice);
+        // If we have a previous tool call and this is a new one, emit the previous
+        if (this.#toolCallId && tool.id && tool.index !== this.#toolIndex) {
+          callChunk = this.#createRunningToolCallChunk(id, delta);
+          this.#toolCallId = this.#fncName = this.#fncRawArguments = undefined;
         }
+        // Start or continue building the current tool call
         if (tool.function.name) {
+          this.#toolIndex = tool.index;
           this.#toolCallId = tool.id;
           this.#fncName = tool.function.name;
           this.#fncRawArguments = tool.function.arguments || '';
         } else if (tool.function.arguments) {
-          this.#fncRawArguments += tool.function.arguments;
+          this.#fncRawArguments = (this.#fncRawArguments || '') + tool.function.arguments;
         }
         if (callChunk) {
@@ -551,171 +673,48 @@ export class LLMStream extends llm.LLMStream {
       }
     }
+    // If we're done with tool calls, emit the final one
     if (
       choice.finish_reason &&
       ['tool_calls', 'stop'].includes(choice.finish_reason) &&
-      this.#toolCallId
+      this.#toolCallId !== undefined
     ) {
-      // we're done with the tool calls, run the last one
-      return this.#tryBuildFunction(id, choice);
+      const callChunk = this.#createRunningToolCallChunk(id, delta);
+      this.#toolCallId = this.#fncName = this.#fncRawArguments = undefined;
+      return callChunk;
     }
-    return {
-      requestId: id,
-      choices: [
-        {
-          delta: { content: delta.content || undefined, role: llm.ChatRole.ASSISTANT },
-          index: choice.index,
-        },
-      ],
-    };
-  }
-  #tryBuildFunction(
-    id: string,
-    choice: OpenAI.ChatCompletionChunk.Choice,
-  ): llm.ChatChunk | undefined {
-    if (!this.fncCtx) {
-      this.#logger.warn('oai stream tried to run function without function context');
-      return undefined;
-    }
-    if (!this.#toolCallId) {
-      this.#logger.warn('oai stream tried to run function but toolCallId is not set');
-      return undefined;
-    }
-    if (!this.#fncRawArguments || !this.#fncName) {
-      this.#logger.warn('oai stream tried to run function but rawArguments or fncName are not set');
+    // Regular content message
+    if (!delta.content) {
       return undefined;
     }
-    const functionInfo = llm.oaiBuildFunctionInfo(
-      this.fncCtx,
-      this.#toolCallId,
-      this.#fncName,
-      this.#fncRawArguments,
-    );
-    this.#toolCallId = this.#fncName = this.#fncRawArguments = undefined;
-    this._functionCalls.push(functionInfo);
     return {
-      requestId: id,
-      choices: [
-        {
-          delta: {
-            content: choice.delta.content || undefined,
-            role: llm.ChatRole.ASSISTANT,
-            toolCalls: this._functionCalls,
-          },
-          index: choice.index,
-        },
-      ],
-    };
-  }
-}
-const buildMessage = async (msg: llm.ChatMessage, cacheKey: any) => {
-  const oaiMsg: Partial<OpenAI.ChatCompletionMessageParam> = {};
-  switch (msg.role) {
-    case llm.ChatRole.SYSTEM:
-      oaiMsg.role = 'system';
-      break;
-    case llm.ChatRole.USER:
-      oaiMsg.role = 'user';
-      break;
-    case llm.ChatRole.ASSISTANT:
-      oaiMsg.role = 'assistant';
-      break;
-    case llm.ChatRole.TOOL:
-      oaiMsg.role = 'tool';
-      if (oaiMsg.role === 'tool') {
-        oaiMsg.tool_call_id = msg.toolCallId;
-      }
-      break;
-  }
-  if (msg.role === llm.ChatRole.TOOL) {
-    try {
-      const serializedContent =
-        typeof msg.content === 'string' ? msg.content : JSON.stringify(msg.content);
-      oaiMsg.content = serializedContent;
-    } catch (e) {
-      throw Error(`Tool call output is not JSON serializable: ${e}`);
-    }
-  } else {
-    if (typeof msg.content === 'string') {
-      oaiMsg.content = msg.content;
-    } else if (Array.isArray(msg.content)) {
-      oaiMsg.content = (await Promise.all(
-        msg.content.map(async (c) => {
-          if (typeof c === 'string') {
-            return { type: 'text', text: c };
-          } else if (
-            // typescript type guard for determining ChatAudio vs ChatImage
-            ((c: llm.ChatAudio | llm.ChatImage): c is llm.ChatImage => {
-              return (c as llm.ChatImage).image !== undefined;
-            })(c)
-          ) {
-            return await buildImageContent(c, cacheKey);
-          } else {
-            throw new Error('ChatAudio is not supported');
-          }
-        }),
-      )) as OpenAI.ChatCompletionContentPart[];
-    } else if (msg.content === undefined) {
-      oaiMsg.content = '';
-    }
-  }
-  // make sure to provide when function has been called inside the context
-  // (+ raw_arguments)
-  if (msg.toolCalls && oaiMsg.role === 'assistant') {
-    oaiMsg.tool_calls = Object.entries(msg.toolCalls).map(([name, func]) => ({
-      id: func.toolCallId,
-      type: 'function' as const,
-      function: {
-        name: name,
-        arguments: func.rawParams,
-      },
-    }));
-  }
-  return oaiMsg as OpenAI.ChatCompletionMessageParam;
-};
-const buildImageContent = async (image: llm.ChatImage, cacheKey: any) => {
-  if (typeof image.image === 'string') {
-    // image url
-    return {
-      type: 'image_url',
-      image_url: {
-        url: image.image,
-        detail: 'auto',
+      id,
+      delta: {
+        role: 'assistant',
+        content: delta.content,
       },
     };
-  } else {
-    if (!image.cache[cacheKey]) {
-      // inside our internal implementation, we allow to put extra metadata to
-      // each ChatImage (avoid to reencode each time we do a chatcompletion request)
-      let encoded = sharp(image.image.data);
-      if (image.inferenceHeight && image.inferenceHeight) {
-        encoded = encoded.resize(image.inferenceWidth, image.inferenceHeight);
-      }
-      image.cache[cacheKey] = await encoded
-        .jpeg()
-        .toBuffer()
-        .then((buffer) => buffer.toString('utf-8'));
-    }
+  }
+  #createRunningToolCallChunk(
+    id: string,
+    delta: OpenAI.Chat.Completions.ChatCompletionChunk.Choice.Delta,
+  ): llm.ChatChunk {
     return {
-      type: 'image_url',
-      image_url: {
-        url: `data:image/jpeg;base64,${image.cache[cacheKey]}`,
+      id,
+      delta: {
+        role: 'assistant',
+        content: delta.content || undefined,
+        toolCalls: [
+          llm.FunctionCall.create({
+            callId: this.#toolCallId!,
+            name: this.#fncName || '',
+            args: this.#fncRawArguments || '',
+          }),
+        ],
       },
     };
   }
-};
+}