npm - @mastra/voice-openai-realtime - Versions diffs - 0.1.0-alpha.1 → 0.1.0-alpha.3 - Mend

@mastra/voice-openai-realtime 0.1.0-alpha.1 → 0.1.0-alpha.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/.turbo/turbo-build.log +7 -7
package/CHANGELOG.md +19 -0
package/dist/_tsup-dts-rollup.d.cts +23 -12
package/dist/_tsup-dts-rollup.d.ts +23 -12
package/dist/index.cjs +56 -37
package/dist/index.js +56 -37
package/package.json +2 -2
package/src/index.ts +81 -61

package/.turbo/turbo-build.log CHANGED Viewed

@@ -1,23 +1,23 @@
-> @mastra/voice-openai-realtime@0.1.0-alpha.1 build /home/runner/work/mastra/mastra/voice/openai-realtime-api
+> @mastra/voice-openai-realtime@0.1.0-alpha.3 build /home/runner/work/mastra/mastra/voice/openai-realtime-api
 > tsup src/index.ts --format esm,cjs --experimental-dts --clean --treeshake
 [34mCLI[39m Building entry: src/index.ts
 [34mCLI[39m Using tsconfig: tsconfig.json
 [34mCLI[39m tsup v8.4.0
 [34mTSC[39m Build start
-[32mTSC[39m ⚡️ Build success in 9409ms
+[32mTSC[39m ⚡️ Build success in 8539ms
 [34mDTS[39m Build start
 [34mCLI[39m Target: es2022
 Analysis will use the bundled TypeScript version 5.8.2
 [36mWriting package typings: /home/runner/work/mastra/mastra/voice/openai-realtime-api/dist/_tsup-dts-rollup.d.ts[39m
 Analysis will use the bundled TypeScript version 5.8.2
 [36mWriting package typings: /home/runner/work/mastra/mastra/voice/openai-realtime-api/dist/_tsup-dts-rollup.d.cts[39m
-[32mDTS[39m ⚡️ Build success in 10803ms
+[32mDTS[39m ⚡️ Build success in 12102ms
 [34mCLI[39m Cleaning output folder
 [34mESM[39m Build start
 [34mCJS[39m Build start
-[32mCJS[39m [1mdist/index.cjs [22m[32m17.77 KB[39m
-[32mCJS[39m ⚡️ Build success in 694ms
-[32mESM[39m [1mdist/index.js [22m[32m17.72 KB[39m
-[32mESM[39m ⚡️ Build success in 695ms
+[32mESM[39m [1mdist/index.js [22m[32m18.38 KB[39m
+[32mESM[39m ⚡️ Build success in 718ms
+[32mCJS[39m [1mdist/index.cjs [22m[32m18.44 KB[39m
+[32mCJS[39m ⚡️ Build success in 718ms

package/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,24 @@
 # @mastra/voice-openai-realtime
+## 0.1.0-alpha.3
+### Patch Changes
+- a4686e8: Realtime event queue
+- Updated dependencies [b3b34f5]
+- Updated dependencies [a4686e8]
+  - @mastra/core@0.7.0-alpha.3
+## 0.1.0-alpha.2
+### Patch Changes
+- Updated dependencies [a838fde]
+- Updated dependencies [a8bd4cf]
+- Updated dependencies [7a3eeb0]
+- Updated dependencies [6530ad1]
+  - @mastra/core@0.7.0-alpha.2
 ## 0.1.0-alpha.1
 ### Minor Changes

package/dist/_tsup-dts-rollup.d.cts CHANGED Viewed

@@ -50,14 +50,15 @@ export declare class OpenAIRealtimeVoice extends MastraVoice {
     private instructions?;
     private tools?;
     private debug;
+    private queue;
+    private transcriber;
     /**
      * Creates a new instance of OpenAIRealtimeVoice.
      *
      * @param options - Configuration options for the voice instance
-     * @param options.chatModel - Configuration for the chat model
-     * @param options.chatModel.model - The model ID to use (defaults to GPT-4 Mini Realtime)
-     * @param options.chatModel.apiKey - OpenAI API key. Falls back to process.env.OPENAI_API_KEY
-     * @param options.chatModel.tools - Tools configuration for the model
+     * @param options.url - The base URL for the OpenAI Realtime API
+     * @param options.model - The model ID to use (defaults to GPT-4 Mini Realtime)
+     * @param options.apiKey - OpenAI API key. Falls back to process.env.OPENAI_API_KEY
      * @param options.speaker - Voice ID to use (defaults to 'alloy')
      * @param options.debug - Enable debug mode
      *
@@ -72,15 +73,12 @@ export declare class OpenAIRealtimeVoice extends MastraVoice {
      * });
      * ```
      */
-    constructor({ chatModel, speaker, debug, }?: {
-        chatModel?: {
-            model?: string;
-            apiKey?: string;
-            tools?: TTools;
-            instructions?: string;
-            url?: string;
-        };
+    constructor(options?: {
+        model?: string;
+        url?: string;
+        apiKey?: string;
         speaker?: Realtime.Voice;
+        transcriber?: Realtime.AudioTranscriptionModel;
         debug?: boolean;
     });
     /**
@@ -108,6 +106,19 @@ export declare class OpenAIRealtimeVoice extends MastraVoice {
      * ```
      */
     close(): void;
+    /**
+     * Equips the voice instance with a set of instructions.
+     * Instructions allow the model to perform additional actions during conversations.
+     *
+     * @param instructions - Optional instructions to addInstructions
+     * @returns Transformed instructions ready for use with the model
+     *
+     * @example
+     * ```typescript
+     * voice.addInstuctions('You are a helpful assistant.');
+     * ```
+     */
+    addInstructions(instructions?: string): void;
     /**
      * Equips the voice instance with a set of tools.
      * Tools allow the model to perform additional actions during conversations.

package/dist/_tsup-dts-rollup.d.ts CHANGED Viewed

@@ -50,14 +50,15 @@ export declare class OpenAIRealtimeVoice extends MastraVoice {
     private instructions?;
     private tools?;
     private debug;
+    private queue;
+    private transcriber;
     /**
      * Creates a new instance of OpenAIRealtimeVoice.
      *
      * @param options - Configuration options for the voice instance
-     * @param options.chatModel - Configuration for the chat model
-     * @param options.chatModel.model - The model ID to use (defaults to GPT-4 Mini Realtime)
-     * @param options.chatModel.apiKey - OpenAI API key. Falls back to process.env.OPENAI_API_KEY
-     * @param options.chatModel.tools - Tools configuration for the model
+     * @param options.url - The base URL for the OpenAI Realtime API
+     * @param options.model - The model ID to use (defaults to GPT-4 Mini Realtime)
+     * @param options.apiKey - OpenAI API key. Falls back to process.env.OPENAI_API_KEY
      * @param options.speaker - Voice ID to use (defaults to 'alloy')
      * @param options.debug - Enable debug mode
      *
@@ -72,15 +73,12 @@ export declare class OpenAIRealtimeVoice extends MastraVoice {
      * });
      * ```
      */
-    constructor({ chatModel, speaker, debug, }?: {
-        chatModel?: {
-            model?: string;
-            apiKey?: string;
-            tools?: TTools;
-            instructions?: string;
-            url?: string;
-        };
+    constructor(options?: {
+        model?: string;
+        url?: string;
+        apiKey?: string;
         speaker?: Realtime.Voice;
+        transcriber?: Realtime.AudioTranscriptionModel;
         debug?: boolean;
     });
     /**
@@ -108,6 +106,19 @@ export declare class OpenAIRealtimeVoice extends MastraVoice {
      * ```
      */
     close(): void;
+    /**
+     * Equips the voice instance with a set of instructions.
+     * Instructions allow the model to perform additional actions during conversations.
+     *
+     * @param instructions - Optional instructions to addInstructions
+     * @returns Transformed instructions ready for use with the model
+     *
+     * @example
+     * ```typescript
+     * voice.addInstuctions('You are a helpful assistant.');
+     * ```
+     */
+    addInstructions(instructions?: string): void;
     /**
      * Equips the voice instance with a set of tools.
      * Tools allow the model to perform additional actions during conversations.

package/dist/index.cjs CHANGED Viewed

@@ -1,10 +1,10 @@
 'use strict';
-var voice = require('@mastra/core/voice');
+var events = require('events');
 var stream = require('stream');
-var zodToJsonSchema = require('zod-to-json-schema');
+var voice = require('@mastra/core/voice');
 var ws = require('ws');
-var events = require('events');
+var zodToJsonSchema = require('zod-to-json-schema');
 // src/index.ts
 var transformTools = (tools) => {
@@ -65,7 +65,10 @@ var transformTools = (tools) => {
 var isReadableStream = (obj) => {
   return obj && obj instanceof stream.Readable && typeof obj.read === "function" && typeof obj.pipe === "function" && obj.readable === true;
 };
+// src/index.ts
 var DEFAULT_VOICE = "alloy";
+var DEFAULT_TRANSCRIBER = "whisper-1";
 var DEFAULT_URL = "wss://api.openai.com/v1/realtime";
 var DEFAULT_MODEL = "gpt-4o-mini-realtime-preview-2024-12-17";
 var VOICES = ["alloy", "ash", "ballad", "coral", "echo", "sage", "shimmer", "verse"];
@@ -77,14 +80,15 @@ var OpenAIRealtimeVoice = class extends voice.MastraVoice {
   instructions;
   tools;
   debug;
+  queue = [];
+  transcriber;
   /**
    * Creates a new instance of OpenAIRealtimeVoice.
    *
    * @param options - Configuration options for the voice instance
-   * @param options.chatModel - Configuration for the chat model
-   * @param options.chatModel.model - The model ID to use (defaults to GPT-4 Mini Realtime)
-   * @param options.chatModel.apiKey - OpenAI API key. Falls back to process.env.OPENAI_API_KEY
-   * @param options.chatModel.tools - Tools configuration for the model
+   * @param options.url - The base URL for the OpenAI Realtime API
+   * @param options.model - The model ID to use (defaults to GPT-4 Mini Realtime)
+   * @param options.apiKey - OpenAI API key. Falls back to process.env.OPENAI_API_KEY
    * @param options.speaker - Voice ID to use (defaults to 'alloy')
    * @param options.debug - Enable debug mode
    *
@@ -99,14 +103,10 @@ var OpenAIRealtimeVoice = class extends voice.MastraVoice {
    * });
    * ```
    */
-  constructor({
-    chatModel,
-    speaker,
-    debug = false
-  } = {}) {
+  constructor(options = {}) {
     super();
-    const url = `${chatModel?.url || DEFAULT_URL}?model=${chatModel?.model || DEFAULT_MODEL}`;
-    const apiKey = chatModel?.apiKey || process.env.OPENAI_API_KEY;
+    const url = `${options.url || DEFAULT_URL}?model=${options.model || DEFAULT_MODEL}`;
+    const apiKey = options.apiKey || process.env.OPENAI_API_KEY;
     this.ws = new ws.WebSocket(url, void 0, {
       headers: {
         Authorization: "Bearer " + apiKey,
@@ -116,10 +116,9 @@ var OpenAIRealtimeVoice = class extends voice.MastraVoice {
     this.client = new events.EventEmitter();
     this.state = "close";
     this.events = {};
-    this.tools = chatModel?.tools;
-    this.instructions = chatModel?.instructions;
-    this.speaker = speaker || DEFAULT_VOICE;
-    this.debug = debug;
+    this.speaker = options.speaker || DEFAULT_VOICE;
+    this.transcriber = options.transcriber || DEFAULT_TRANSCRIBER;
+    this.debug = options.debug || false;
     this.setupEventListeners();
   }
   /**
@@ -150,6 +149,21 @@ var OpenAIRealtimeVoice = class extends voice.MastraVoice {
     this.ws.close();
     this.state = "close";
   }
+  /**
+   * Equips the voice instance with a set of instructions.
+   * Instructions allow the model to perform additional actions during conversations.
+   *
+   * @param instructions - Optional instructions to addInstructions
+   * @returns Transformed instructions ready for use with the model
+   *
+   * @example
+   * ```typescript
+   * voice.addInstuctions('You are a helpful assistant.');
+   * ```
+   */
+  addInstructions(instructions) {
+    this.instructions = instructions;
+  }
   /**
    * Equips the voice instance with a set of tools.
    * Tools allow the model to perform additional actions during conversations.
@@ -167,10 +181,7 @@ var OpenAIRealtimeVoice = class extends voice.MastraVoice {
    * ```
    */
   addTools(tools) {
-    const openaiTools = transformTools(tools);
-    this.updateConfig({
-      tools: openaiTools.map((t) => t.openaiTool)
-    });
+    this.tools = tools || {};
   }
   /**
    * Emits a speaking event using the configured voice model.
@@ -314,7 +325,7 @@ var OpenAIRealtimeVoice = class extends voice.MastraVoice {
       instructions: this.instructions,
       tools: openaiTools.map((t) => t.openaiTool),
       input_audio_transcription: {
-        model: "whisper-1"
+        model: this.transcriber
       },
       voice: this.speaker
     });
@@ -462,6 +473,10 @@ var OpenAIRealtimeVoice = class extends voice.MastraVoice {
     });
     this.client.on("session.created", (ev) => {
       this.emit("session.created", ev);
+      const queue = this.queue.splice(0, this.queue.length);
+      for (const ev2 of queue) {
+        this.ws.send(JSON.stringify(ev2));
+      }
     });
     this.client.on("session.updated", (ev) => {
       this.emit("session.updated", ev);
@@ -474,10 +489,10 @@ var OpenAIRealtimeVoice = class extends voice.MastraVoice {
       this.emit("speaker", speakerStream);
     });
     this.client.on("conversation.item.input_audio_transcription.delta", (ev) => {
-      this.emit("transcribing", { text: ev.delta, response_id: ev.response_id, role: "user" });
+      this.emit("writing", { text: ev.delta, response_id: ev.response_id, role: "user" });
     });
     this.client.on("conversation.item.input_audio_transcription.done", (ev) => {
-      this.emit("transcribing", { text: "\n", response_id: ev.response_id, role: "user" });
+      this.emit("writing", { text: "\n", response_id: ev.response_id, role: "user" });
     });
     this.client.on("response.audio.delta", (ev) => {
       const audio = Buffer.from(ev.delta, "base64");
@@ -491,19 +506,19 @@ var OpenAIRealtimeVoice = class extends voice.MastraVoice {
       stream?.end();
     });
     this.client.on("response.audio_transcript.delta", (ev) => {
-      this.emit("writing", { text: ev.delta, response_id: ev.response_id });
+      this.emit("writing", { text: ev.delta, response_id: ev.response_id, role: "assistant" });
     });
     this.client.on("response.audio_transcript.done", (ev) => {
-      this.emit("writing", { text: "\n", response_id: ev.response_id });
+      this.emit("writing", { text: "\n", response_id: ev.response_id, role: "assistant" });
     });
     this.client.on("response.text.delta", (ev) => {
-      this.emit("writing", { text: ev.delta, response_id: ev.response_id });
+      this.emit("writing", { text: ev.delta, response_id: ev.response_id, role: "assistant" });
     });
     this.client.on("response.text.done", (ev) => {
-      this.emit("writing", { text: "\n", response_id: ev.response_id });
+      this.emit("writing", { text: "\n", response_id: ev.response_id, role: "assistant" });
     });
-    this.client.on("response.done", (ev) => {
-      this.handleFunctionCalls(ev);
+    this.client.on("response.done", async (ev) => {
+      await this.handleFunctionCalls(ev);
       this.emit("response.done", ev);
       speakerStreams.delete(ev.response.id);
     });
@@ -565,12 +580,16 @@ var OpenAIRealtimeVoice = class extends voice.MastraVoice {
     return btoa(binary);
   }
   sendEvent(type, data) {
-    this.ws.send(
-      JSON.stringify({
-        type,
-        ...data
-      })
-    );
+    if (this.ws.readyState !== this.ws.OPEN) {
+      this.queue.push({ type, ...data });
+    } else {
+      this.ws.send(
+        JSON.stringify({
+          type,
+          ...data
+        })
+      );
+    }
   }
 };

package/dist/index.js CHANGED Viewed

@@ -1,8 +1,8 @@
-import { MastraVoice } from '@mastra/core/voice';
+import { EventEmitter } from 'events';
 import { PassThrough, Readable } from 'stream';
-import { zodToJsonSchema } from 'zod-to-json-schema';
+import { MastraVoice } from '@mastra/core/voice';
 import { WebSocket } from 'ws';
-import { EventEmitter } from 'events';
+import { zodToJsonSchema } from 'zod-to-json-schema';
 // src/index.ts
 var transformTools = (tools) => {
@@ -63,7 +63,10 @@ var transformTools = (tools) => {
 var isReadableStream = (obj) => {
   return obj && obj instanceof Readable && typeof obj.read === "function" && typeof obj.pipe === "function" && obj.readable === true;
 };
+// src/index.ts
 var DEFAULT_VOICE = "alloy";
+var DEFAULT_TRANSCRIBER = "whisper-1";
 var DEFAULT_URL = "wss://api.openai.com/v1/realtime";
 var DEFAULT_MODEL = "gpt-4o-mini-realtime-preview-2024-12-17";
 var VOICES = ["alloy", "ash", "ballad", "coral", "echo", "sage", "shimmer", "verse"];
@@ -75,14 +78,15 @@ var OpenAIRealtimeVoice = class extends MastraVoice {
   instructions;
   tools;
   debug;
+  queue = [];
+  transcriber;
   /**
    * Creates a new instance of OpenAIRealtimeVoice.
    *
    * @param options - Configuration options for the voice instance
-   * @param options.chatModel - Configuration for the chat model
-   * @param options.chatModel.model - The model ID to use (defaults to GPT-4 Mini Realtime)
-   * @param options.chatModel.apiKey - OpenAI API key. Falls back to process.env.OPENAI_API_KEY
-   * @param options.chatModel.tools - Tools configuration for the model
+   * @param options.url - The base URL for the OpenAI Realtime API
+   * @param options.model - The model ID to use (defaults to GPT-4 Mini Realtime)
+   * @param options.apiKey - OpenAI API key. Falls back to process.env.OPENAI_API_KEY
    * @param options.speaker - Voice ID to use (defaults to 'alloy')
    * @param options.debug - Enable debug mode
    *
@@ -97,14 +101,10 @@ var OpenAIRealtimeVoice = class extends MastraVoice {
    * });
    * ```
    */
-  constructor({
-    chatModel,
-    speaker,
-    debug = false
-  } = {}) {
+  constructor(options = {}) {
     super();
-    const url = `${chatModel?.url || DEFAULT_URL}?model=${chatModel?.model || DEFAULT_MODEL}`;
-    const apiKey = chatModel?.apiKey || process.env.OPENAI_API_KEY;
+    const url = `${options.url || DEFAULT_URL}?model=${options.model || DEFAULT_MODEL}`;
+    const apiKey = options.apiKey || process.env.OPENAI_API_KEY;
     this.ws = new WebSocket(url, void 0, {
       headers: {
         Authorization: "Bearer " + apiKey,
@@ -114,10 +114,9 @@ var OpenAIRealtimeVoice = class extends MastraVoice {
     this.client = new EventEmitter();
     this.state = "close";
     this.events = {};
-    this.tools = chatModel?.tools;
-    this.instructions = chatModel?.instructions;
-    this.speaker = speaker || DEFAULT_VOICE;
-    this.debug = debug;
+    this.speaker = options.speaker || DEFAULT_VOICE;
+    this.transcriber = options.transcriber || DEFAULT_TRANSCRIBER;
+    this.debug = options.debug || false;
     this.setupEventListeners();
   }
   /**
@@ -148,6 +147,21 @@ var OpenAIRealtimeVoice = class extends MastraVoice {
     this.ws.close();
     this.state = "close";
   }
+  /**
+   * Equips the voice instance with a set of instructions.
+   * Instructions allow the model to perform additional actions during conversations.
+   *
+   * @param instructions - Optional instructions to addInstructions
+   * @returns Transformed instructions ready for use with the model
+   *
+   * @example
+   * ```typescript
+   * voice.addInstuctions('You are a helpful assistant.');
+   * ```
+   */
+  addInstructions(instructions) {
+    this.instructions = instructions;
+  }
   /**
    * Equips the voice instance with a set of tools.
    * Tools allow the model to perform additional actions during conversations.
@@ -165,10 +179,7 @@ var OpenAIRealtimeVoice = class extends MastraVoice {
    * ```
    */
   addTools(tools) {
-    const openaiTools = transformTools(tools);
-    this.updateConfig({
-      tools: openaiTools.map((t) => t.openaiTool)
-    });
+    this.tools = tools || {};
   }
   /**
    * Emits a speaking event using the configured voice model.
@@ -312,7 +323,7 @@ var OpenAIRealtimeVoice = class extends MastraVoice {
       instructions: this.instructions,
       tools: openaiTools.map((t) => t.openaiTool),
       input_audio_transcription: {
-        model: "whisper-1"
+        model: this.transcriber
       },
       voice: this.speaker
     });
@@ -460,6 +471,10 @@ var OpenAIRealtimeVoice = class extends MastraVoice {
     });
     this.client.on("session.created", (ev) => {
       this.emit("session.created", ev);
+      const queue = this.queue.splice(0, this.queue.length);
+      for (const ev2 of queue) {
+        this.ws.send(JSON.stringify(ev2));
+      }
     });
     this.client.on("session.updated", (ev) => {
       this.emit("session.updated", ev);
@@ -472,10 +487,10 @@ var OpenAIRealtimeVoice = class extends MastraVoice {
       this.emit("speaker", speakerStream);
     });
     this.client.on("conversation.item.input_audio_transcription.delta", (ev) => {
-      this.emit("transcribing", { text: ev.delta, response_id: ev.response_id, role: "user" });
+      this.emit("writing", { text: ev.delta, response_id: ev.response_id, role: "user" });
     });
     this.client.on("conversation.item.input_audio_transcription.done", (ev) => {
-      this.emit("transcribing", { text: "\n", response_id: ev.response_id, role: "user" });
+      this.emit("writing", { text: "\n", response_id: ev.response_id, role: "user" });
     });
     this.client.on("response.audio.delta", (ev) => {
       const audio = Buffer.from(ev.delta, "base64");
@@ -489,19 +504,19 @@ var OpenAIRealtimeVoice = class extends MastraVoice {
       stream?.end();
     });
     this.client.on("response.audio_transcript.delta", (ev) => {
-      this.emit("writing", { text: ev.delta, response_id: ev.response_id });
+      this.emit("writing", { text: ev.delta, response_id: ev.response_id, role: "assistant" });
     });
     this.client.on("response.audio_transcript.done", (ev) => {
-      this.emit("writing", { text: "\n", response_id: ev.response_id });
+      this.emit("writing", { text: "\n", response_id: ev.response_id, role: "assistant" });
     });
     this.client.on("response.text.delta", (ev) => {
-      this.emit("writing", { text: ev.delta, response_id: ev.response_id });
+      this.emit("writing", { text: ev.delta, response_id: ev.response_id, role: "assistant" });
     });
     this.client.on("response.text.done", (ev) => {
-      this.emit("writing", { text: "\n", response_id: ev.response_id });
+      this.emit("writing", { text: "\n", response_id: ev.response_id, role: "assistant" });
     });
-    this.client.on("response.done", (ev) => {
-      this.handleFunctionCalls(ev);
+    this.client.on("response.done", async (ev) => {
+      await this.handleFunctionCalls(ev);
       this.emit("response.done", ev);
       speakerStreams.delete(ev.response.id);
     });
@@ -563,12 +578,16 @@ var OpenAIRealtimeVoice = class extends MastraVoice {
     return btoa(binary);
   }
   sendEvent(type, data) {
-    this.ws.send(
-      JSON.stringify({
-        type,
-        ...data
-      })
-    );
+    if (this.ws.readyState !== this.ws.OPEN) {
+      this.queue.push({ type, ...data });
+    } else {
+      this.ws.send(
+        JSON.stringify({
+          type,
+          ...data
+        })
+      );
+    }
   }
 };

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@mastra/voice-openai-realtime",
-  "version": "0.1.0-alpha.1",
+  "version": "0.1.0-alpha.3",
   "description": "Mastra OpenAI Realtime API integration",
   "type": "module",
   "main": "dist/index.js",
@@ -22,7 +22,7 @@
     "openai-realtime-api": "^1.0.7",
     "ws": "^8.18.1",
     "zod-to-json-schema": "^3.24.1",
-    "@mastra/core": "^0.7.0-alpha.1"
+    "@mastra/core": "^0.7.0-alpha.3"
   },
   "devDependencies": {
     "@microsoft/api-extractor": "^7.49.2",

package/src/index.ts CHANGED Viewed

@@ -1,10 +1,10 @@
+import { EventEmitter } from 'events';
+import { PassThrough } from 'stream';
 import type { ToolsInput } from '@mastra/core/agent';
 import { MastraVoice } from '@mastra/core/voice';
-import { isReadableStream, transformTools } from './utils';
-import { WebSocket } from 'ws';
-import { EventEmitter } from 'events';
 import type { Realtime, RealtimeServerEvents } from 'openai-realtime-api';
-import { PassThrough } from 'stream';
+import { WebSocket } from 'ws';
+import { isReadableStream, transformTools } from './utils';
 /**
  * Event callback function type
@@ -29,6 +29,8 @@ type EventMap = {
 /** Default voice for text-to-speech. 'alloy' provides a neutral, balanced voice suitable for most use cases */
 const DEFAULT_VOICE: Realtime.Voice = 'alloy';
+const DEFAULT_TRANSCRIBER: Realtime.AudioTranscriptionModel = 'whisper-1';
 const DEFAULT_URL = 'wss://api.openai.com/v1/realtime';
 /**
@@ -36,21 +38,22 @@ const DEFAULT_URL = 'wss://api.openai.com/v1/realtime';
  * This model is optimized for low-latency responses while maintaining high quality output.
  */
 const DEFAULT_MODEL = 'gpt-4o-mini-realtime-preview-2024-12-17';
-/**
- * Default Voice Activity Detection (VAD) configuration.
- * These settings control how the system detects speech segments.
- *
- * @property {string} type - Uses server-side VAD for better accuracy
- * @property {number} threshold - Speech detection sensitivity (0.5 = balanced)
- * @property {number} prefix_padding_ms - Includes 1 second of audio before speech
- * @property {number} silence_duration_ms - Waits 1 second of silence before ending turn
- */
-const DEFAULT_VAD_CONFIG = {
-  type: 'server_vad',
-  threshold: 0.5,
-  prefix_padding_ms: 1000,
-  silence_duration_ms: 1000,
-} as Realtime.TurnDetection;
+// /**
+//  * Default Voice Activity Detection (VAD) configuration.
+//  * These settings control how the system detects speech segments.
+//  *
+//  * @property {string} type - Uses server-side VAD for better accuracy
+//  * @property {number} threshold - Speech detection sensitivity (0.5 = balanced)
+//  * @property {number} prefix_padding_ms - Includes 1 second of audio before speech
+//  * @property {number} silence_duration_ms - Waits 1 second of silence before ending turn
+//  */
+// const DEFAULT_VAD_CONFIG = {
+//   type: 'server_vad',
+//   threshold: 0.5,
+//   prefix_padding_ms: 1000,
+//   silence_duration_ms: 1000,
+// } as Realtime.TurnDetection;
 type TTools = ToolsInput;
@@ -110,15 +113,16 @@ export class OpenAIRealtimeVoice extends MastraVoice {
   private instructions?: string;
   private tools?: TTools;
   private debug: boolean;
+  private queue: unknown[] = [];
+  private transcriber: Realtime.AudioTranscriptionModel;
   /**
    * Creates a new instance of OpenAIRealtimeVoice.
    *
    * @param options - Configuration options for the voice instance
-   * @param options.chatModel - Configuration for the chat model
-   * @param options.chatModel.model - The model ID to use (defaults to GPT-4 Mini Realtime)
-   * @param options.chatModel.apiKey - OpenAI API key. Falls back to process.env.OPENAI_API_KEY
-   * @param options.chatModel.tools - Tools configuration for the model
+   * @param options.url - The base URL for the OpenAI Realtime API
+   * @param options.model - The model ID to use (defaults to GPT-4 Mini Realtime)
+   * @param options.apiKey - OpenAI API key. Falls back to process.env.OPENAI_API_KEY
    * @param options.speaker - Voice ID to use (defaults to 'alloy')
    * @param options.debug - Enable debug mode
    *
@@ -133,25 +137,20 @@ export class OpenAIRealtimeVoice extends MastraVoice {
    * });
    * ```
    */
-  constructor({
-    chatModel,
-    speaker,
-    debug = false,
-  }: {
-    chatModel?: {
+  constructor(
+    options: {
       model?: string;
-      apiKey?: string;
-      tools?: TTools;
-      instructions?: string;
       url?: string;
-    };
-    speaker?: Realtime.Voice;
-    debug?: boolean;
-  } = {}) {
+      apiKey?: string;
+      speaker?: Realtime.Voice;
+      transcriber?: Realtime.AudioTranscriptionModel;
+      debug?: boolean;
+    } = {},
+  ) {
     super();
-    const url = `${chatModel?.url || DEFAULT_URL}?model=${chatModel?.model || DEFAULT_MODEL}`;
-    const apiKey = chatModel?.apiKey || process.env.OPENAI_API_KEY;
+    const url = `${options.url || DEFAULT_URL}?model=${options.model || DEFAULT_MODEL}`;
+    const apiKey = options.apiKey || process.env.OPENAI_API_KEY;
     this.ws = new WebSocket(url, undefined, {
       headers: {
         Authorization: 'Bearer ' + apiKey,
@@ -162,10 +161,9 @@ export class OpenAIRealtimeVoice extends MastraVoice {
     this.client = new EventEmitter();
     this.state = 'close';
     this.events = {} as EventMap;
-    this.tools = chatModel?.tools;
-    this.instructions = chatModel?.instructions;
-    this.speaker = speaker || DEFAULT_VOICE;
-    this.debug = debug;
+    this.speaker = options.speaker || DEFAULT_VOICE;
+    this.transcriber = options.transcriber || DEFAULT_TRANSCRIBER;
+    this.debug = options.debug || false;
     this.setupEventListeners();
   }
@@ -199,6 +197,22 @@ export class OpenAIRealtimeVoice extends MastraVoice {
     this.state = 'close';
   }
+  /**
+   * Equips the voice instance with a set of instructions.
+   * Instructions allow the model to perform additional actions during conversations.
+   *
+   * @param instructions - Optional instructions to addInstructions
+   * @returns Transformed instructions ready for use with the model
+   *
+   * @example
+   * ```typescript
+   * voice.addInstuctions('You are a helpful assistant.');
+   * ```
+   */
+  addInstructions(instructions?: string) {
+    this.instructions = instructions;
+  }
   /**
    * Equips the voice instance with a set of tools.
    * Tools allow the model to perform additional actions during conversations.
@@ -216,10 +230,7 @@ export class OpenAIRealtimeVoice extends MastraVoice {
    * ```
    */
   addTools(tools?: TTools) {
-    const openaiTools = transformTools(tools);
-    this.updateConfig({
-      tools: openaiTools.map(t => t.openaiTool),
-    });
+    this.tools = tools || {};
   }
   /**
@@ -375,7 +386,7 @@ export class OpenAIRealtimeVoice extends MastraVoice {
       instructions: this.instructions,
       tools: openaiTools.map(t => t.openaiTool),
       input_audio_transcription: {
-        model: 'whisper-1',
+        model: this.transcriber,
       },
       voice: this.speaker,
     });
@@ -535,6 +546,11 @@ export class OpenAIRealtimeVoice extends MastraVoice {
     this.client.on('session.created', ev => {
       this.emit('session.created', ev);
+      const queue = this.queue.splice(0, this.queue.length);
+      for (const ev of queue) {
+        this.ws.send(JSON.stringify(ev));
+      }
     });
     this.client.on('session.updated', ev => {
       this.emit('session.updated', ev);
@@ -550,10 +566,10 @@ export class OpenAIRealtimeVoice extends MastraVoice {
       this.emit('speaker', speakerStream);
     });
     this.client.on('conversation.item.input_audio_transcription.delta', ev => {
-      this.emit('transcribing', { text: ev.delta, response_id: ev.response_id, role: 'user' });
+      this.emit('writing', { text: ev.delta, response_id: ev.response_id, role: 'user' });
     });
     this.client.on('conversation.item.input_audio_transcription.done', ev => {
-      this.emit('transcribing', { text: '\n', response_id: ev.response_id, role: 'user' });
+      this.emit('writing', { text: '\n', response_id: ev.response_id, role: 'user' });
     });
     this.client.on('response.audio.delta', ev => {
       const audio = Buffer.from(ev.delta, 'base64');
@@ -569,19 +585,19 @@ export class OpenAIRealtimeVoice extends MastraVoice {
       stream?.end();
     });
     this.client.on('response.audio_transcript.delta', ev => {
-      this.emit('writing', { text: ev.delta, response_id: ev.response_id });
+      this.emit('writing', { text: ev.delta, response_id: ev.response_id, role: 'assistant' });
     });
     this.client.on('response.audio_transcript.done', ev => {
-      this.emit('writing', { text: '\n', response_id: ev.response_id });
+      this.emit('writing', { text: '\n', response_id: ev.response_id, role: 'assistant' });
     });
     this.client.on('response.text.delta', ev => {
-      this.emit('writing', { text: ev.delta, response_id: ev.response_id });
+      this.emit('writing', { text: ev.delta, response_id: ev.response_id, role: 'assistant' });
     });
     this.client.on('response.text.done', ev => {
-      this.emit('writing', { text: '\n', response_id: ev.response_id });
+      this.emit('writing', { text: '\n', response_id: ev.response_id, role: 'assistant' });
     });
-    this.client.on('response.done', ev => {
-      this.handleFunctionCalls(ev);
+    this.client.on('response.done', async ev => {
+      await this.handleFunctionCalls(ev);
       this.emit('response.done', ev);
       speakerStreams.delete(ev.response.id);
     });
@@ -647,11 +663,15 @@ export class OpenAIRealtimeVoice extends MastraVoice {
   }
   private sendEvent(type: string, data: any) {
-    this.ws.send(
-      JSON.stringify({
-        type: type,
-        ...data,
-      }),
-    );
+    if (this.ws.readyState !== this.ws.OPEN) {
+      this.queue.push({ type: type, ...data });
+    } else {
+      this.ws.send(
+        JSON.stringify({
+          type: type,
+          ...data,
+        }),
+      );
+    }
   }
 }