@mastra/voice-openai-realtime 0.1.0-alpha.2 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,23 +1,23 @@
1
1
 
2
- > @mastra/voice-openai-realtime@0.1.0-alpha.2 build /home/runner/work/mastra/mastra/voice/openai-realtime-api
2
+ > @mastra/voice-openai-realtime@0.1.0-alpha.3 build /home/runner/work/mastra/mastra/voice/openai-realtime-api
3
3
  > tsup src/index.ts --format esm,cjs --experimental-dts --clean --treeshake
4
4
 
5
5
  CLI Building entry: src/index.ts
6
6
  CLI Using tsconfig: tsconfig.json
7
7
  CLI tsup v8.4.0
8
8
  TSC Build start
9
- TSC ⚡️ Build success in 9123ms
9
+ TSC ⚡️ Build success in 7321ms
10
10
  DTS Build start
11
11
  CLI Target: es2022
12
12
  Analysis will use the bundled TypeScript version 5.8.2
13
13
  Writing package typings: /home/runner/work/mastra/mastra/voice/openai-realtime-api/dist/_tsup-dts-rollup.d.ts
14
14
  Analysis will use the bundled TypeScript version 5.8.2
15
15
  Writing package typings: /home/runner/work/mastra/mastra/voice/openai-realtime-api/dist/_tsup-dts-rollup.d.cts
16
- DTS ⚡️ Build success in 10151ms
16
+ DTS ⚡️ Build success in 10514ms
17
17
  CLI Cleaning output folder
18
18
  ESM Build start
19
19
  CJS Build start
20
- CJS dist/index.cjs 17.80 KB
21
- CJS ⚡️ Build success in 842ms
22
- ESM dist/index.js 17.75 KB
23
- ESM ⚡️ Build success in 843ms
20
+ ESM dist/index.js 18.38 KB
21
+ ESM ⚡️ Build success in 841ms
22
+ CJS dist/index.cjs 18.44 KB
23
+ CJS ⚡️ Build success in 843ms
package/CHANGELOG.md CHANGED
@@ -1,5 +1,35 @@
1
1
  # @mastra/voice-openai-realtime
2
2
 
3
+ ## 0.1.0
4
+
5
+ ### Minor Changes
6
+
7
+ - 443b118: This update removed an external dependency on an unmaintained package and implemented a native websocket connection.
8
+
9
+ ### Patch Changes
10
+
11
+ - a4686e8: Realtime event queue
12
+ - Updated dependencies [b4fbc59]
13
+ - Updated dependencies [a838fde]
14
+ - Updated dependencies [a8bd4cf]
15
+ - Updated dependencies [7a3eeb0]
16
+ - Updated dependencies [0b54522]
17
+ - Updated dependencies [b3b34f5]
18
+ - Updated dependencies [1af25d5]
19
+ - Updated dependencies [a4686e8]
20
+ - Updated dependencies [6530ad1]
21
+ - Updated dependencies [27439ad]
22
+ - @mastra/core@0.7.0
23
+
24
+ ## 0.1.0-alpha.3
25
+
26
+ ### Patch Changes
27
+
28
+ - a4686e8: Realtime event queue
29
+ - Updated dependencies [b3b34f5]
30
+ - Updated dependencies [a4686e8]
31
+ - @mastra/core@0.7.0-alpha.3
32
+
3
33
  ## 0.1.0-alpha.2
4
34
 
5
35
  ### Patch Changes
@@ -50,14 +50,15 @@ export declare class OpenAIRealtimeVoice extends MastraVoice {
50
50
  private instructions?;
51
51
  private tools?;
52
52
  private debug;
53
+ private queue;
54
+ private transcriber;
53
55
  /**
54
56
  * Creates a new instance of OpenAIRealtimeVoice.
55
57
  *
56
58
  * @param options - Configuration options for the voice instance
57
- * @param options.chatModel - Configuration for the chat model
58
- * @param options.chatModel.model - The model ID to use (defaults to GPT-4 Mini Realtime)
59
- * @param options.chatModel.apiKey - OpenAI API key. Falls back to process.env.OPENAI_API_KEY
60
- * @param options.chatModel.tools - Tools configuration for the model
59
+ * @param options.url - The base URL for the OpenAI Realtime API
60
+ * @param options.model - The model ID to use (defaults to GPT-4 Mini Realtime)
61
+ * @param options.apiKey - OpenAI API key. Falls back to process.env.OPENAI_API_KEY
61
62
  * @param options.speaker - Voice ID to use (defaults to 'alloy')
62
63
  * @param options.debug - Enable debug mode
63
64
  *
@@ -72,15 +73,12 @@ export declare class OpenAIRealtimeVoice extends MastraVoice {
72
73
  * });
73
74
  * ```
74
75
  */
75
- constructor({ chatModel, speaker, debug, }?: {
76
- chatModel?: {
77
- model?: string;
78
- apiKey?: string;
79
- tools?: TTools;
80
- instructions?: string;
81
- url?: string;
82
- };
76
+ constructor(options?: {
77
+ model?: string;
78
+ url?: string;
79
+ apiKey?: string;
83
80
  speaker?: Realtime.Voice;
81
+ transcriber?: Realtime.AudioTranscriptionModel;
84
82
  debug?: boolean;
85
83
  });
86
84
  /**
@@ -108,6 +106,19 @@ export declare class OpenAIRealtimeVoice extends MastraVoice {
108
106
  * ```
109
107
  */
110
108
  close(): void;
109
+ /**
110
+ * Equips the voice instance with a set of instructions.
111
+ * Instructions allow the model to perform additional actions during conversations.
112
+ *
113
+ * @param instructions - Optional instructions to addInstructions
114
+ * @returns Transformed instructions ready for use with the model
115
+ *
116
+ * @example
117
+ * ```typescript
118
+ * voice.addInstuctions('You are a helpful assistant.');
119
+ * ```
120
+ */
121
+ addInstructions(instructions?: string): void;
111
122
  /**
112
123
  * Equips the voice instance with a set of tools.
113
124
  * Tools allow the model to perform additional actions during conversations.
@@ -50,14 +50,15 @@ export declare class OpenAIRealtimeVoice extends MastraVoice {
50
50
  private instructions?;
51
51
  private tools?;
52
52
  private debug;
53
+ private queue;
54
+ private transcriber;
53
55
  /**
54
56
  * Creates a new instance of OpenAIRealtimeVoice.
55
57
  *
56
58
  * @param options - Configuration options for the voice instance
57
- * @param options.chatModel - Configuration for the chat model
58
- * @param options.chatModel.model - The model ID to use (defaults to GPT-4 Mini Realtime)
59
- * @param options.chatModel.apiKey - OpenAI API key. Falls back to process.env.OPENAI_API_KEY
60
- * @param options.chatModel.tools - Tools configuration for the model
59
+ * @param options.url - The base URL for the OpenAI Realtime API
60
+ * @param options.model - The model ID to use (defaults to GPT-4 Mini Realtime)
61
+ * @param options.apiKey - OpenAI API key. Falls back to process.env.OPENAI_API_KEY
61
62
  * @param options.speaker - Voice ID to use (defaults to 'alloy')
62
63
  * @param options.debug - Enable debug mode
63
64
  *
@@ -72,15 +73,12 @@ export declare class OpenAIRealtimeVoice extends MastraVoice {
72
73
  * });
73
74
  * ```
74
75
  */
75
- constructor({ chatModel, speaker, debug, }?: {
76
- chatModel?: {
77
- model?: string;
78
- apiKey?: string;
79
- tools?: TTools;
80
- instructions?: string;
81
- url?: string;
82
- };
76
+ constructor(options?: {
77
+ model?: string;
78
+ url?: string;
79
+ apiKey?: string;
83
80
  speaker?: Realtime.Voice;
81
+ transcriber?: Realtime.AudioTranscriptionModel;
84
82
  debug?: boolean;
85
83
  });
86
84
  /**
@@ -108,6 +106,19 @@ export declare class OpenAIRealtimeVoice extends MastraVoice {
108
106
  * ```
109
107
  */
110
108
  close(): void;
109
+ /**
110
+ * Equips the voice instance with a set of instructions.
111
+ * Instructions allow the model to perform additional actions during conversations.
112
+ *
113
+ * @param instructions - Optional instructions to addInstructions
114
+ * @returns Transformed instructions ready for use with the model
115
+ *
116
+ * @example
117
+ * ```typescript
118
+ * voice.addInstuctions('You are a helpful assistant.');
119
+ * ```
120
+ */
121
+ addInstructions(instructions?: string): void;
111
122
  /**
112
123
  * Equips the voice instance with a set of tools.
113
124
  * Tools allow the model to perform additional actions during conversations.
package/dist/index.cjs CHANGED
@@ -68,6 +68,7 @@ var isReadableStream = (obj) => {
68
68
 
69
69
  // src/index.ts
70
70
  var DEFAULT_VOICE = "alloy";
71
+ var DEFAULT_TRANSCRIBER = "whisper-1";
71
72
  var DEFAULT_URL = "wss://api.openai.com/v1/realtime";
72
73
  var DEFAULT_MODEL = "gpt-4o-mini-realtime-preview-2024-12-17";
73
74
  var VOICES = ["alloy", "ash", "ballad", "coral", "echo", "sage", "shimmer", "verse"];
@@ -79,14 +80,15 @@ var OpenAIRealtimeVoice = class extends voice.MastraVoice {
79
80
  instructions;
80
81
  tools;
81
82
  debug;
83
+ queue = [];
84
+ transcriber;
82
85
  /**
83
86
  * Creates a new instance of OpenAIRealtimeVoice.
84
87
  *
85
88
  * @param options - Configuration options for the voice instance
86
- * @param options.chatModel - Configuration for the chat model
87
- * @param options.chatModel.model - The model ID to use (defaults to GPT-4 Mini Realtime)
88
- * @param options.chatModel.apiKey - OpenAI API key. Falls back to process.env.OPENAI_API_KEY
89
- * @param options.chatModel.tools - Tools configuration for the model
89
+ * @param options.url - The base URL for the OpenAI Realtime API
90
+ * @param options.model - The model ID to use (defaults to GPT-4 Mini Realtime)
91
+ * @param options.apiKey - OpenAI API key. Falls back to process.env.OPENAI_API_KEY
90
92
  * @param options.speaker - Voice ID to use (defaults to 'alloy')
91
93
  * @param options.debug - Enable debug mode
92
94
  *
@@ -101,14 +103,10 @@ var OpenAIRealtimeVoice = class extends voice.MastraVoice {
101
103
  * });
102
104
  * ```
103
105
  */
104
- constructor({
105
- chatModel,
106
- speaker,
107
- debug = false
108
- } = {}) {
106
+ constructor(options = {}) {
109
107
  super();
110
- const url = `${chatModel?.url || DEFAULT_URL}?model=${chatModel?.model || DEFAULT_MODEL}`;
111
- const apiKey = chatModel?.apiKey || process.env.OPENAI_API_KEY;
108
+ const url = `${options.url || DEFAULT_URL}?model=${options.model || DEFAULT_MODEL}`;
109
+ const apiKey = options.apiKey || process.env.OPENAI_API_KEY;
112
110
  this.ws = new ws.WebSocket(url, void 0, {
113
111
  headers: {
114
112
  Authorization: "Bearer " + apiKey,
@@ -118,10 +116,9 @@ var OpenAIRealtimeVoice = class extends voice.MastraVoice {
118
116
  this.client = new events.EventEmitter();
119
117
  this.state = "close";
120
118
  this.events = {};
121
- this.tools = chatModel?.tools;
122
- this.instructions = chatModel?.instructions;
123
- this.speaker = speaker || DEFAULT_VOICE;
124
- this.debug = debug;
119
+ this.speaker = options.speaker || DEFAULT_VOICE;
120
+ this.transcriber = options.transcriber || DEFAULT_TRANSCRIBER;
121
+ this.debug = options.debug || false;
125
122
  this.setupEventListeners();
126
123
  }
127
124
  /**
@@ -152,6 +149,21 @@ var OpenAIRealtimeVoice = class extends voice.MastraVoice {
152
149
  this.ws.close();
153
150
  this.state = "close";
154
151
  }
152
+ /**
153
+ * Equips the voice instance with a set of instructions.
154
+ * Instructions allow the model to perform additional actions during conversations.
155
+ *
156
+ * @param instructions - Optional instructions to addInstructions
157
+ * @returns Transformed instructions ready for use with the model
158
+ *
159
+ * @example
160
+ * ```typescript
161
+ * voice.addInstuctions('You are a helpful assistant.');
162
+ * ```
163
+ */
164
+ addInstructions(instructions) {
165
+ this.instructions = instructions;
166
+ }
155
167
  /**
156
168
  * Equips the voice instance with a set of tools.
157
169
  * Tools allow the model to perform additional actions during conversations.
@@ -169,10 +181,7 @@ var OpenAIRealtimeVoice = class extends voice.MastraVoice {
169
181
  * ```
170
182
  */
171
183
  addTools(tools) {
172
- const openaiTools = transformTools(tools);
173
- this.updateConfig({
174
- tools: openaiTools.map((t) => t.openaiTool)
175
- });
184
+ this.tools = tools || {};
176
185
  }
177
186
  /**
178
187
  * Emits a speaking event using the configured voice model.
@@ -316,7 +325,7 @@ var OpenAIRealtimeVoice = class extends voice.MastraVoice {
316
325
  instructions: this.instructions,
317
326
  tools: openaiTools.map((t) => t.openaiTool),
318
327
  input_audio_transcription: {
319
- model: "whisper-1"
328
+ model: this.transcriber
320
329
  },
321
330
  voice: this.speaker
322
331
  });
@@ -464,6 +473,10 @@ var OpenAIRealtimeVoice = class extends voice.MastraVoice {
464
473
  });
465
474
  this.client.on("session.created", (ev) => {
466
475
  this.emit("session.created", ev);
476
+ const queue = this.queue.splice(0, this.queue.length);
477
+ for (const ev2 of queue) {
478
+ this.ws.send(JSON.stringify(ev2));
479
+ }
467
480
  });
468
481
  this.client.on("session.updated", (ev) => {
469
482
  this.emit("session.updated", ev);
@@ -476,10 +489,10 @@ var OpenAIRealtimeVoice = class extends voice.MastraVoice {
476
489
  this.emit("speaker", speakerStream);
477
490
  });
478
491
  this.client.on("conversation.item.input_audio_transcription.delta", (ev) => {
479
- this.emit("transcribing", { text: ev.delta, response_id: ev.response_id, role: "user" });
492
+ this.emit("writing", { text: ev.delta, response_id: ev.response_id, role: "user" });
480
493
  });
481
494
  this.client.on("conversation.item.input_audio_transcription.done", (ev) => {
482
- this.emit("transcribing", { text: "\n", response_id: ev.response_id, role: "user" });
495
+ this.emit("writing", { text: "\n", response_id: ev.response_id, role: "user" });
483
496
  });
484
497
  this.client.on("response.audio.delta", (ev) => {
485
498
  const audio = Buffer.from(ev.delta, "base64");
@@ -493,16 +506,16 @@ var OpenAIRealtimeVoice = class extends voice.MastraVoice {
493
506
  stream?.end();
494
507
  });
495
508
  this.client.on("response.audio_transcript.delta", (ev) => {
496
- this.emit("writing", { text: ev.delta, response_id: ev.response_id });
509
+ this.emit("writing", { text: ev.delta, response_id: ev.response_id, role: "assistant" });
497
510
  });
498
511
  this.client.on("response.audio_transcript.done", (ev) => {
499
- this.emit("writing", { text: "\n", response_id: ev.response_id });
512
+ this.emit("writing", { text: "\n", response_id: ev.response_id, role: "assistant" });
500
513
  });
501
514
  this.client.on("response.text.delta", (ev) => {
502
- this.emit("writing", { text: ev.delta, response_id: ev.response_id });
515
+ this.emit("writing", { text: ev.delta, response_id: ev.response_id, role: "assistant" });
503
516
  });
504
517
  this.client.on("response.text.done", (ev) => {
505
- this.emit("writing", { text: "\n", response_id: ev.response_id });
518
+ this.emit("writing", { text: "\n", response_id: ev.response_id, role: "assistant" });
506
519
  });
507
520
  this.client.on("response.done", async (ev) => {
508
521
  await this.handleFunctionCalls(ev);
@@ -567,12 +580,16 @@ var OpenAIRealtimeVoice = class extends voice.MastraVoice {
567
580
  return btoa(binary);
568
581
  }
569
582
  sendEvent(type, data) {
570
- this.ws.send(
571
- JSON.stringify({
572
- type,
573
- ...data
574
- })
575
- );
583
+ if (this.ws.readyState !== this.ws.OPEN) {
584
+ this.queue.push({ type, ...data });
585
+ } else {
586
+ this.ws.send(
587
+ JSON.stringify({
588
+ type,
589
+ ...data
590
+ })
591
+ );
592
+ }
576
593
  }
577
594
  };
578
595
 
package/dist/index.js CHANGED
@@ -66,6 +66,7 @@ var isReadableStream = (obj) => {
66
66
 
67
67
  // src/index.ts
68
68
  var DEFAULT_VOICE = "alloy";
69
+ var DEFAULT_TRANSCRIBER = "whisper-1";
69
70
  var DEFAULT_URL = "wss://api.openai.com/v1/realtime";
70
71
  var DEFAULT_MODEL = "gpt-4o-mini-realtime-preview-2024-12-17";
71
72
  var VOICES = ["alloy", "ash", "ballad", "coral", "echo", "sage", "shimmer", "verse"];
@@ -77,14 +78,15 @@ var OpenAIRealtimeVoice = class extends MastraVoice {
77
78
  instructions;
78
79
  tools;
79
80
  debug;
81
+ queue = [];
82
+ transcriber;
80
83
  /**
81
84
  * Creates a new instance of OpenAIRealtimeVoice.
82
85
  *
83
86
  * @param options - Configuration options for the voice instance
84
- * @param options.chatModel - Configuration for the chat model
85
- * @param options.chatModel.model - The model ID to use (defaults to GPT-4 Mini Realtime)
86
- * @param options.chatModel.apiKey - OpenAI API key. Falls back to process.env.OPENAI_API_KEY
87
- * @param options.chatModel.tools - Tools configuration for the model
87
+ * @param options.url - The base URL for the OpenAI Realtime API
88
+ * @param options.model - The model ID to use (defaults to GPT-4 Mini Realtime)
89
+ * @param options.apiKey - OpenAI API key. Falls back to process.env.OPENAI_API_KEY
88
90
  * @param options.speaker - Voice ID to use (defaults to 'alloy')
89
91
  * @param options.debug - Enable debug mode
90
92
  *
@@ -99,14 +101,10 @@ var OpenAIRealtimeVoice = class extends MastraVoice {
99
101
  * });
100
102
  * ```
101
103
  */
102
- constructor({
103
- chatModel,
104
- speaker,
105
- debug = false
106
- } = {}) {
104
+ constructor(options = {}) {
107
105
  super();
108
- const url = `${chatModel?.url || DEFAULT_URL}?model=${chatModel?.model || DEFAULT_MODEL}`;
109
- const apiKey = chatModel?.apiKey || process.env.OPENAI_API_KEY;
106
+ const url = `${options.url || DEFAULT_URL}?model=${options.model || DEFAULT_MODEL}`;
107
+ const apiKey = options.apiKey || process.env.OPENAI_API_KEY;
110
108
  this.ws = new WebSocket(url, void 0, {
111
109
  headers: {
112
110
  Authorization: "Bearer " + apiKey,
@@ -116,10 +114,9 @@ var OpenAIRealtimeVoice = class extends MastraVoice {
116
114
  this.client = new EventEmitter();
117
115
  this.state = "close";
118
116
  this.events = {};
119
- this.tools = chatModel?.tools;
120
- this.instructions = chatModel?.instructions;
121
- this.speaker = speaker || DEFAULT_VOICE;
122
- this.debug = debug;
117
+ this.speaker = options.speaker || DEFAULT_VOICE;
118
+ this.transcriber = options.transcriber || DEFAULT_TRANSCRIBER;
119
+ this.debug = options.debug || false;
123
120
  this.setupEventListeners();
124
121
  }
125
122
  /**
@@ -150,6 +147,21 @@ var OpenAIRealtimeVoice = class extends MastraVoice {
150
147
  this.ws.close();
151
148
  this.state = "close";
152
149
  }
150
+ /**
151
+ * Equips the voice instance with a set of instructions.
152
+ * Instructions allow the model to perform additional actions during conversations.
153
+ *
154
+ * @param instructions - Optional instructions to addInstructions
155
+ * @returns Transformed instructions ready for use with the model
156
+ *
157
+ * @example
158
+ * ```typescript
159
+ * voice.addInstuctions('You are a helpful assistant.');
160
+ * ```
161
+ */
162
+ addInstructions(instructions) {
163
+ this.instructions = instructions;
164
+ }
153
165
  /**
154
166
  * Equips the voice instance with a set of tools.
155
167
  * Tools allow the model to perform additional actions during conversations.
@@ -167,10 +179,7 @@ var OpenAIRealtimeVoice = class extends MastraVoice {
167
179
  * ```
168
180
  */
169
181
  addTools(tools) {
170
- const openaiTools = transformTools(tools);
171
- this.updateConfig({
172
- tools: openaiTools.map((t) => t.openaiTool)
173
- });
182
+ this.tools = tools || {};
174
183
  }
175
184
  /**
176
185
  * Emits a speaking event using the configured voice model.
@@ -314,7 +323,7 @@ var OpenAIRealtimeVoice = class extends MastraVoice {
314
323
  instructions: this.instructions,
315
324
  tools: openaiTools.map((t) => t.openaiTool),
316
325
  input_audio_transcription: {
317
- model: "whisper-1"
326
+ model: this.transcriber
318
327
  },
319
328
  voice: this.speaker
320
329
  });
@@ -462,6 +471,10 @@ var OpenAIRealtimeVoice = class extends MastraVoice {
462
471
  });
463
472
  this.client.on("session.created", (ev) => {
464
473
  this.emit("session.created", ev);
474
+ const queue = this.queue.splice(0, this.queue.length);
475
+ for (const ev2 of queue) {
476
+ this.ws.send(JSON.stringify(ev2));
477
+ }
465
478
  });
466
479
  this.client.on("session.updated", (ev) => {
467
480
  this.emit("session.updated", ev);
@@ -474,10 +487,10 @@ var OpenAIRealtimeVoice = class extends MastraVoice {
474
487
  this.emit("speaker", speakerStream);
475
488
  });
476
489
  this.client.on("conversation.item.input_audio_transcription.delta", (ev) => {
477
- this.emit("transcribing", { text: ev.delta, response_id: ev.response_id, role: "user" });
490
+ this.emit("writing", { text: ev.delta, response_id: ev.response_id, role: "user" });
478
491
  });
479
492
  this.client.on("conversation.item.input_audio_transcription.done", (ev) => {
480
- this.emit("transcribing", { text: "\n", response_id: ev.response_id, role: "user" });
493
+ this.emit("writing", { text: "\n", response_id: ev.response_id, role: "user" });
481
494
  });
482
495
  this.client.on("response.audio.delta", (ev) => {
483
496
  const audio = Buffer.from(ev.delta, "base64");
@@ -491,16 +504,16 @@ var OpenAIRealtimeVoice = class extends MastraVoice {
491
504
  stream?.end();
492
505
  });
493
506
  this.client.on("response.audio_transcript.delta", (ev) => {
494
- this.emit("writing", { text: ev.delta, response_id: ev.response_id });
507
+ this.emit("writing", { text: ev.delta, response_id: ev.response_id, role: "assistant" });
495
508
  });
496
509
  this.client.on("response.audio_transcript.done", (ev) => {
497
- this.emit("writing", { text: "\n", response_id: ev.response_id });
510
+ this.emit("writing", { text: "\n", response_id: ev.response_id, role: "assistant" });
498
511
  });
499
512
  this.client.on("response.text.delta", (ev) => {
500
- this.emit("writing", { text: ev.delta, response_id: ev.response_id });
513
+ this.emit("writing", { text: ev.delta, response_id: ev.response_id, role: "assistant" });
501
514
  });
502
515
  this.client.on("response.text.done", (ev) => {
503
- this.emit("writing", { text: "\n", response_id: ev.response_id });
516
+ this.emit("writing", { text: "\n", response_id: ev.response_id, role: "assistant" });
504
517
  });
505
518
  this.client.on("response.done", async (ev) => {
506
519
  await this.handleFunctionCalls(ev);
@@ -565,12 +578,16 @@ var OpenAIRealtimeVoice = class extends MastraVoice {
565
578
  return btoa(binary);
566
579
  }
567
580
  sendEvent(type, data) {
568
- this.ws.send(
569
- JSON.stringify({
570
- type,
571
- ...data
572
- })
573
- );
581
+ if (this.ws.readyState !== this.ws.OPEN) {
582
+ this.queue.push({ type, ...data });
583
+ } else {
584
+ this.ws.send(
585
+ JSON.stringify({
586
+ type,
587
+ ...data
588
+ })
589
+ );
590
+ }
574
591
  }
575
592
  };
576
593
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@mastra/voice-openai-realtime",
3
- "version": "0.1.0-alpha.2",
3
+ "version": "0.1.0",
4
4
  "description": "Mastra OpenAI Realtime API integration",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",
@@ -22,7 +22,7 @@
22
22
  "openai-realtime-api": "^1.0.7",
23
23
  "ws": "^8.18.1",
24
24
  "zod-to-json-schema": "^3.24.1",
25
- "@mastra/core": "^0.7.0-alpha.2"
25
+ "@mastra/core": "^0.7.0"
26
26
  },
27
27
  "devDependencies": {
28
28
  "@microsoft/api-extractor": "^7.49.2",
package/src/index.ts CHANGED
@@ -29,6 +29,8 @@ type EventMap = {
29
29
  /** Default voice for text-to-speech. 'alloy' provides a neutral, balanced voice suitable for most use cases */
30
30
  const DEFAULT_VOICE: Realtime.Voice = 'alloy';
31
31
 
32
+ const DEFAULT_TRANSCRIBER: Realtime.AudioTranscriptionModel = 'whisper-1';
33
+
32
34
  const DEFAULT_URL = 'wss://api.openai.com/v1/realtime';
33
35
 
34
36
  /**
@@ -111,15 +113,16 @@ export class OpenAIRealtimeVoice extends MastraVoice {
111
113
  private instructions?: string;
112
114
  private tools?: TTools;
113
115
  private debug: boolean;
116
+ private queue: unknown[] = [];
117
+ private transcriber: Realtime.AudioTranscriptionModel;
114
118
 
115
119
  /**
116
120
  * Creates a new instance of OpenAIRealtimeVoice.
117
121
  *
118
122
  * @param options - Configuration options for the voice instance
119
- * @param options.chatModel - Configuration for the chat model
120
- * @param options.chatModel.model - The model ID to use (defaults to GPT-4 Mini Realtime)
121
- * @param options.chatModel.apiKey - OpenAI API key. Falls back to process.env.OPENAI_API_KEY
122
- * @param options.chatModel.tools - Tools configuration for the model
123
+ * @param options.url - The base URL for the OpenAI Realtime API
124
+ * @param options.model - The model ID to use (defaults to GPT-4 Mini Realtime)
125
+ * @param options.apiKey - OpenAI API key. Falls back to process.env.OPENAI_API_KEY
123
126
  * @param options.speaker - Voice ID to use (defaults to 'alloy')
124
127
  * @param options.debug - Enable debug mode
125
128
  *
@@ -134,25 +137,20 @@ export class OpenAIRealtimeVoice extends MastraVoice {
134
137
  * });
135
138
  * ```
136
139
  */
137
- constructor({
138
- chatModel,
139
- speaker,
140
- debug = false,
141
- }: {
142
- chatModel?: {
140
+ constructor(
141
+ options: {
143
142
  model?: string;
144
- apiKey?: string;
145
- tools?: TTools;
146
- instructions?: string;
147
143
  url?: string;
148
- };
149
- speaker?: Realtime.Voice;
150
- debug?: boolean;
151
- } = {}) {
144
+ apiKey?: string;
145
+ speaker?: Realtime.Voice;
146
+ transcriber?: Realtime.AudioTranscriptionModel;
147
+ debug?: boolean;
148
+ } = {},
149
+ ) {
152
150
  super();
153
151
 
154
- const url = `${chatModel?.url || DEFAULT_URL}?model=${chatModel?.model || DEFAULT_MODEL}`;
155
- const apiKey = chatModel?.apiKey || process.env.OPENAI_API_KEY;
152
+ const url = `${options.url || DEFAULT_URL}?model=${options.model || DEFAULT_MODEL}`;
153
+ const apiKey = options.apiKey || process.env.OPENAI_API_KEY;
156
154
  this.ws = new WebSocket(url, undefined, {
157
155
  headers: {
158
156
  Authorization: 'Bearer ' + apiKey,
@@ -163,10 +161,9 @@ export class OpenAIRealtimeVoice extends MastraVoice {
163
161
  this.client = new EventEmitter();
164
162
  this.state = 'close';
165
163
  this.events = {} as EventMap;
166
- this.tools = chatModel?.tools;
167
- this.instructions = chatModel?.instructions;
168
- this.speaker = speaker || DEFAULT_VOICE;
169
- this.debug = debug;
164
+ this.speaker = options.speaker || DEFAULT_VOICE;
165
+ this.transcriber = options.transcriber || DEFAULT_TRANSCRIBER;
166
+ this.debug = options.debug || false;
170
167
  this.setupEventListeners();
171
168
  }
172
169
 
@@ -200,6 +197,22 @@ export class OpenAIRealtimeVoice extends MastraVoice {
200
197
  this.state = 'close';
201
198
  }
202
199
 
200
+ /**
201
+ * Equips the voice instance with a set of instructions.
202
+ * Instructions allow the model to perform additional actions during conversations.
203
+ *
204
+ * @param instructions - Optional instructions to addInstructions
205
+ * @returns Transformed instructions ready for use with the model
206
+ *
207
+ * @example
208
+ * ```typescript
209
+ * voice.addInstuctions('You are a helpful assistant.');
210
+ * ```
211
+ */
212
+ addInstructions(instructions?: string) {
213
+ this.instructions = instructions;
214
+ }
215
+
203
216
  /**
204
217
  * Equips the voice instance with a set of tools.
205
218
  * Tools allow the model to perform additional actions during conversations.
@@ -217,10 +230,7 @@ export class OpenAIRealtimeVoice extends MastraVoice {
217
230
  * ```
218
231
  */
219
232
  addTools(tools?: TTools) {
220
- const openaiTools = transformTools(tools);
221
- this.updateConfig({
222
- tools: openaiTools.map(t => t.openaiTool),
223
- });
233
+ this.tools = tools || {};
224
234
  }
225
235
 
226
236
  /**
@@ -376,7 +386,7 @@ export class OpenAIRealtimeVoice extends MastraVoice {
376
386
  instructions: this.instructions,
377
387
  tools: openaiTools.map(t => t.openaiTool),
378
388
  input_audio_transcription: {
379
- model: 'whisper-1',
389
+ model: this.transcriber,
380
390
  },
381
391
  voice: this.speaker,
382
392
  });
@@ -536,6 +546,11 @@ export class OpenAIRealtimeVoice extends MastraVoice {
536
546
 
537
547
  this.client.on('session.created', ev => {
538
548
  this.emit('session.created', ev);
549
+
550
+ const queue = this.queue.splice(0, this.queue.length);
551
+ for (const ev of queue) {
552
+ this.ws.send(JSON.stringify(ev));
553
+ }
539
554
  });
540
555
  this.client.on('session.updated', ev => {
541
556
  this.emit('session.updated', ev);
@@ -551,10 +566,10 @@ export class OpenAIRealtimeVoice extends MastraVoice {
551
566
  this.emit('speaker', speakerStream);
552
567
  });
553
568
  this.client.on('conversation.item.input_audio_transcription.delta', ev => {
554
- this.emit('transcribing', { text: ev.delta, response_id: ev.response_id, role: 'user' });
569
+ this.emit('writing', { text: ev.delta, response_id: ev.response_id, role: 'user' });
555
570
  });
556
571
  this.client.on('conversation.item.input_audio_transcription.done', ev => {
557
- this.emit('transcribing', { text: '\n', response_id: ev.response_id, role: 'user' });
572
+ this.emit('writing', { text: '\n', response_id: ev.response_id, role: 'user' });
558
573
  });
559
574
  this.client.on('response.audio.delta', ev => {
560
575
  const audio = Buffer.from(ev.delta, 'base64');
@@ -570,16 +585,16 @@ export class OpenAIRealtimeVoice extends MastraVoice {
570
585
  stream?.end();
571
586
  });
572
587
  this.client.on('response.audio_transcript.delta', ev => {
573
- this.emit('writing', { text: ev.delta, response_id: ev.response_id });
588
+ this.emit('writing', { text: ev.delta, response_id: ev.response_id, role: 'assistant' });
574
589
  });
575
590
  this.client.on('response.audio_transcript.done', ev => {
576
- this.emit('writing', { text: '\n', response_id: ev.response_id });
591
+ this.emit('writing', { text: '\n', response_id: ev.response_id, role: 'assistant' });
577
592
  });
578
593
  this.client.on('response.text.delta', ev => {
579
- this.emit('writing', { text: ev.delta, response_id: ev.response_id });
594
+ this.emit('writing', { text: ev.delta, response_id: ev.response_id, role: 'assistant' });
580
595
  });
581
596
  this.client.on('response.text.done', ev => {
582
- this.emit('writing', { text: '\n', response_id: ev.response_id });
597
+ this.emit('writing', { text: '\n', response_id: ev.response_id, role: 'assistant' });
583
598
  });
584
599
  this.client.on('response.done', async ev => {
585
600
  await this.handleFunctionCalls(ev);
@@ -648,11 +663,15 @@ export class OpenAIRealtimeVoice extends MastraVoice {
648
663
  }
649
664
 
650
665
  private sendEvent(type: string, data: any) {
651
- this.ws.send(
652
- JSON.stringify({
653
- type: type,
654
- ...data,
655
- }),
656
- );
666
+ if (this.ws.readyState !== this.ws.OPEN) {
667
+ this.queue.push({ type: type, ...data });
668
+ } else {
669
+ this.ws.send(
670
+ JSON.stringify({
671
+ type: type,
672
+ ...data,
673
+ }),
674
+ );
675
+ }
657
676
  }
658
677
  }