npm - @livekit/agents-plugin-openai - Versions diffs - 1.0.31 → 1.0.33 - Mend

@livekit/agents-plugin-openai 1.0.31 → 1.0.33

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (31) hide show

package/dist/realtime/api_proto.cjs.map +1 -1
package/dist/realtime/api_proto.d.cts +50 -12
package/dist/realtime/api_proto.d.ts +50 -12
package/dist/realtime/api_proto.d.ts.map +1 -1
package/dist/realtime/api_proto.js.map +1 -1
package/dist/realtime/index.cjs +19 -0
package/dist/realtime/index.cjs.map +1 -1
package/dist/realtime/index.d.cts +1 -0
package/dist/realtime/index.d.ts +1 -0
package/dist/realtime/index.d.ts.map +1 -1
package/dist/realtime/index.js +4 -0
package/dist/realtime/index.js.map +1 -1
package/dist/realtime/realtime_model.cjs +69 -33
package/dist/realtime/realtime_model.cjs.map +1 -1
package/dist/realtime/realtime_model.d.cts +14 -6
package/dist/realtime/realtime_model.d.ts +14 -6
package/dist/realtime/realtime_model.d.ts.map +1 -1
package/dist/realtime/realtime_model.js +69 -33
package/dist/realtime/realtime_model.js.map +1 -1
package/dist/realtime/realtime_model_beta.cjs +1300 -0
package/dist/realtime/realtime_model_beta.cjs.map +1 -0
package/dist/realtime/realtime_model_beta.d.cts +165 -0
package/dist/realtime/realtime_model_beta.d.ts +165 -0
package/dist/realtime/realtime_model_beta.d.ts.map +1 -0
package/dist/realtime/realtime_model_beta.js +1280 -0
package/dist/realtime/realtime_model_beta.js.map +1 -0
package/package.json +5 -5
package/src/realtime/api_proto.ts +76 -17
package/src/realtime/index.ts +1 -0
package/src/realtime/realtime_model.ts +86 -49
package/src/realtime/realtime_model_beta.ts +1665 -0

package/dist/realtime/realtime_model.cjs CHANGED Viewed

@@ -52,7 +52,6 @@ class CreateResponseHandle {
   }
 }
 const DEFAULT_FIRST_RETRY_INTERVAL_MS = 100;
-const DEFAULT_TEMPERATURE = 0.8;
 const DEFAULT_TURN_DETECTION = {
   type: "semantic_vad",
   eagerness: "medium",
@@ -78,14 +77,15 @@ const DEFAULT_MAX_SESSION_DURATION = 20 * 60 * 1e3;
 const DEFAULT_REALTIME_MODEL_OPTIONS = {
   model: "gpt-realtime",
   voice: "marin",
-  temperature: DEFAULT_TEMPERATURE,
   inputAudioTranscription: DEFAULT_INPUT_AUDIO_TRANSCRIPTION,
+  inputAudioNoiseReduction: void 0,
   turnDetection: DEFAULT_TURN_DETECTION,
   toolChoice: DEFAULT_TOOL_CHOICE,
   maxResponseOutputTokens: DEFAULT_MAX_RESPONSE_OUTPUT_TOKENS,
   maxSessionDuration: DEFAULT_MAX_SESSION_DURATION,
   connOptions: import_agents.DEFAULT_API_CONNECT_OPTIONS,
-  modalities: ["text", "audio"]
+  modalities: ["text", "audio"],
+  tracing: void 0
 };
 class RealtimeModel extends import_agents.llm.RealtimeModel {
   sampleRate = api_proto.SAMPLE_RATE;
@@ -94,6 +94,9 @@ class RealtimeModel extends import_agents.llm.RealtimeModel {
   outFrameSize = api_proto.OUT_FRAME_SIZE;
   /* @internal */
   _options;
+  get model() {
+    return this._options.model;
+  }
   constructor(options = {}) {
     const modalities = options.modalities || DEFAULT_REALTIME_MODEL_OPTIONS.modalities;
     super({
@@ -146,11 +149,10 @@ class RealtimeModel extends import_agents.llm.RealtimeModel {
    * @param baseURL - Base URL for the API endpoint. If undefined, constructed from the azure_endpoint.
    * @param voice - Voice setting for audio outputs. Defaults to "alloy".
    * @param inputAudioTranscription - Options for transcribing input audio. Defaults to @see DEFAULT_INPUT_AUDIO_TRANSCRIPTION.
+   * @param inputAudioNoiseReduction - Options for noise reduction. Defaults to undefined.
    * @param turnDetection - Options for server-based voice activity detection (VAD). Defaults to @see DEFAULT_SERVER_VAD_OPTIONS.
-   * @param temperature - Sampling temperature for response generation. Defaults to @see DEFAULT_TEMPERATURE.
    * @param speed - Speed of the audio output. Defaults to 1.0.
-   * @param maxResponseOutputTokens - Maximum number of tokens in the response. Defaults to @see DEFAULT_MAX_RESPONSE_OUTPUT_TOKENS.
-   * @param maxSessionDuration - Maximum duration of the session in milliseconds. Defaults to @see DEFAULT_MAX_SESSION_DURATION.
+   * @param tracing - Tracing configuration. Defaults to undefined.
    *
    * @returns A RealtimeModel instance configured for Azure OpenAI Service.
    *
@@ -164,10 +166,13 @@ class RealtimeModel extends import_agents.llm.RealtimeModel {
     entraToken,
     baseURL,
     voice = "alloy",
+    temperature,
+    // eslint-disable-line @typescript-eslint/no-unused-vars
     inputAudioTranscription = AZURE_DEFAULT_INPUT_AUDIO_TRANSCRIPTION,
+    inputAudioNoiseReduction,
     turnDetection = AZURE_DEFAULT_TURN_DETECTION,
-    temperature = 0.8,
-    speed
+    speed,
+    tracing
   }) {
     apiKey = apiKey || process.env.AZURE_OPENAI_API_KEY;
     if (!apiKey && !entraToken) {
@@ -193,9 +198,10 @@ class RealtimeModel extends import_agents.llm.RealtimeModel {
     return new RealtimeModel({
       voice,
       inputAudioTranscription,
+      inputAudioNoiseReduction,
       turnDetection,
-      temperature,
       speed,
+      tracing,
       apiKey,
       azureDeployment,
       apiVersion,
@@ -272,24 +278,31 @@ class RealtimeSession extends import_agents.llm.RealtimeSession {
     this.messageChannel.put(command);
   }
   createSessionUpdateEvent() {
-    const modalities = this.oaiRealtimeModel._options.modalities.includes("audio") ? ["text", "audio"] : ["text"];
+    const audioFormat = { type: "audio/pcm", rate: SAMPLE_RATE };
+    const modality = this.oaiRealtimeModel._options.modalities.includes("audio") ? "audio" : "text";
     return {
       type: "session.update",
       session: {
+        type: "realtime",
         model: this.oaiRealtimeModel._options.model,
-        voice: this.oaiRealtimeModel._options.voice,
-        input_audio_format: "pcm16",
-        output_audio_format: "pcm16",
-        modalities,
-        turn_detection: this.oaiRealtimeModel._options.turnDetection,
-        input_audio_transcription: this.oaiRealtimeModel._options.inputAudioTranscription,
-        // TODO(shubhra): add inputAudioNoiseReduction
-        temperature: this.oaiRealtimeModel._options.temperature,
+        output_modalities: [modality],
+        audio: {
+          input: {
+            format: audioFormat,
+            noise_reduction: this.oaiRealtimeModel._options.inputAudioNoiseReduction,
+            transcription: this.oaiRealtimeModel._options.inputAudioTranscription,
+            turn_detection: this.oaiRealtimeModel._options.turnDetection
+          },
+          output: {
+            format: audioFormat,
+            speed: this.oaiRealtimeModel._options.speed,
+            voice: this.oaiRealtimeModel._options.voice
+          }
+        },
+        max_output_tokens: this.oaiRealtimeModel._options.maxResponseOutputTokens === Infinity ? "inf" : this.oaiRealtimeModel._options.maxResponseOutputTokens,
         tool_choice: toOaiToolChoice(this.oaiRealtimeModel._options.toolChoice),
-        max_response_output_tokens: this.oaiRealtimeModel._options.maxResponseOutputTokens === Infinity ? "inf" : this.oaiRealtimeModel._options.maxResponseOutputTokens,
-        // TODO(shubhra): add tracing options
-        instructions: this.instructions,
-        speed: this.oaiRealtimeModel._options.speed
+        tracing: this.oaiRealtimeModel._options.tracing,
+        instructions: this.instructions
       }
     };
   }
@@ -405,6 +418,7 @@ class RealtimeSession extends import_agents.llm.RealtimeSession {
     return {
       type: "session.update",
       session: {
+        type: "realtime",
         model: this.oaiRealtimeModel._options.model,
         tools: oaiTools
       },
@@ -416,6 +430,7 @@ class RealtimeSession extends import_agents.llm.RealtimeSession {
     this.sendEvent({
       type: "session.update",
       session: {
+        type: "realtime",
         instructions: _instructions
       },
       event_id: eventId
@@ -423,7 +438,9 @@ class RealtimeSession extends import_agents.llm.RealtimeSession {
     this.instructions = _instructions;
   }
   updateOptions({ toolChoice }) {
-    const options = {};
+    const options = {
+      type: "realtime"
+    };
     this.oaiRealtimeModel._options.toolChoice = toolChoice;
     options.tool_choice = toOaiToolChoice(toolChoice);
     this.sendEvent({
@@ -522,8 +539,12 @@ class RealtimeSession extends import_agents.llm.RealtimeSession {
         throw new Error("Microsoft API key or entraToken is required");
       }
     } else {
+      if (!this.oaiRealtimeModel._options.apiKey) {
+        throw new Error(
+          "OpenAI API key is required but not set. Check OPENAI_API_KEY environment variable."
+        );
+      }
       headers.Authorization = `Bearer ${this.oaiRealtimeModel._options.apiKey}`;
-      headers["OpenAI-Beta"] = "realtime=v1";
     }
     const url = processBaseURL({
       baseURL: this.oaiRealtimeModel._options.baseURL,
@@ -690,6 +711,7 @@ class RealtimeSession extends import_agents.llm.RealtimeSession {
         case "response.output_item.added":
           this.handleResponseOutputItemAdded(event);
           break;
+        case "conversation.item.added":
         case "conversation.item.created":
           this.handleConversationItemCreated(event);
           break;
@@ -708,21 +730,27 @@ class RealtimeSession extends import_agents.llm.RealtimeSession {
         case "response.content_part.done":
           this.handleResponseContentPartDone(event);
           break;
+        case "response.output_text.delta":
         case "response.text.delta":
           this.handleResponseTextDelta(event);
           break;
+        case "response.output_text.done":
         case "response.text.done":
           this.handleResponseTextDone(event);
           break;
+        case "response.output_audio_transcript.delta":
         case "response.audio_transcript.delta":
           this.handleResponseAudioTranscriptDelta(event);
           break;
+        case "response.output_audio.delta":
         case "response.audio.delta":
           this.handleResponseAudioDelta(event);
           break;
+        case "response.output_audio_transcript.done":
         case "response.audio_transcript.done":
           this.handleResponseAudioTranscriptDone(event);
           break;
+        case "response.output_audio.done":
         case "response.audio.done":
           this.handleResponseAudioDone(event);
           break;
@@ -798,7 +826,8 @@ class RealtimeSession extends import_agents.llm.RealtimeSession {
     const generationEv = {
       messageStream: this.currentGeneration.messageChannel.stream(),
       functionStream: this.currentGeneration.functionChannel.stream(),
-      userInitiated: false
+      userInitiated: false,
+      responseId: event.response.id
     };
     const clientEventId = (_a = event.response.metadata) == null ? void 0 : _a.client_event_id;
     if (clientEventId) {
@@ -918,11 +947,12 @@ class RealtimeSession extends import_agents.llm.RealtimeSession {
       this.#logger.warn(`itemGeneration not found for itemId=${itemId}`);
       return;
     }
-    if (itemType === "text" && this.oaiRealtimeModel.capabilities.audioOutput) {
+    const isTextType = itemType === "text" || itemType === "output_text";
+    if (isTextType && this.oaiRealtimeModel.capabilities.audioOutput) {
       this.#logger.warn("Text response received from OpenAI Realtime API in audio modality.");
     }
     if (!itemGeneration.modalities.done) {
-      const modalityResult = itemType === "text" ? ["text"] : ["audio", "text"];
+      const modalityResult = isTextType ? ["text"] : ["audio", "text"];
       itemGeneration.modalities.resolve(modalityResult);
     }
     if (this.currentGeneration._firstTokenTimestamp === void 0) {
@@ -930,6 +960,9 @@ class RealtimeSession extends import_agents.llm.RealtimeSession {
     }
   }
   handleResponseContentPartDone(event) {
+    if (!event.part) {
+      return;
+    }
     if (event.part.type !== "text") {
       return;
     }
@@ -1020,11 +1053,13 @@ class RealtimeSession extends import_agents.llm.RealtimeSession {
       if (!item.call_id || !item.name || !item.arguments) {
         throw new Error("item is not a function call");
       }
-      this.currentGeneration.functionChannel.write({
-        callId: item.call_id,
-        name: item.name,
-        args: item.arguments
-      });
+      this.currentGeneration.functionChannel.write(
+        import_agents.llm.FunctionCall.create({
+          callId: item.call_id,
+          name: item.name,
+          args: item.arguments
+        })
+      );
     } else if (itemType === "message") {
       const itemGeneration = this.currentGeneration.messages.get(itemId);
       if (!itemGeneration) {
@@ -1157,7 +1192,8 @@ class RealtimeSession extends import_agents.llm.RealtimeSession {
     const generation_ev = {
       messageStream: this.currentGeneration.messageChannel.stream(),
       functionStream: this.currentGeneration.functionChannel.stream(),
-      userInitiated: false
+      userInitiated: false,
+      responseId
     };
     const handle = this.responseCreatedFutures[responseId];
     if (handle) {