@mastra/voice-openai-realtime 0.0.0-vnextWorkflows-20250416071310 → 0.0.0-vnextWorkflows-20250417075051

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,17 +1,47 @@
1
1
  # @mastra/voice-openai-realtime
2
2
 
3
- ## 0.0.0-vnextWorkflows-20250416071310
3
+ ## 0.0.0-vnextWorkflows-20250417075051
4
4
 
5
5
  ### Minor Changes
6
6
 
7
+ - 9aaa64b: Don't connect the ws connection until connect is called
8
+ - 1ef0f1f: Disconnect
7
9
  - 185f8e9: Emit add tools and silence warnings
8
10
 
9
11
  ### Patch Changes
10
12
 
13
+ - 6cf1417: Voice reference and tool-call-start
14
+ - Updated dependencies [9ee4293]
11
15
  - Updated dependencies [03f3cd0]
12
16
  - Updated dependencies [8a8a73b]
17
+ - Updated dependencies [fe3ae4d]
13
18
  - Updated dependencies [6f92295]
14
- - @mastra/core@0.0.0-vnextWorkflows-20250416071310
19
+ - @mastra/core@0.0.0-vnextWorkflows-20250417075051
20
+
21
+ ## 0.2.0-alpha.5
22
+
23
+ ### Patch Changes
24
+
25
+ - 6cf1417: Voice reference and tool-call-start
26
+ - Updated dependencies [fe3ae4d]
27
+ - @mastra/core@0.9.0-alpha.3
28
+
29
+ ## 0.2.0-alpha.4
30
+
31
+ ### Minor Changes
32
+
33
+ - 1ef0f1f: Disconnect
34
+
35
+ ## 0.2.0-alpha.3
36
+
37
+ ### Minor Changes
38
+
39
+ - 9aaa64b: Don't connect the ws connection until connect is called
40
+
41
+ ### Patch Changes
42
+
43
+ - Updated dependencies [9ee4293]
44
+ - @mastra/core@0.8.4-alpha.2
15
45
 
16
46
  ## 0.2.0-alpha.2
17
47
 
package/README.md CHANGED
@@ -20,6 +20,7 @@ OPENAI_API_KEY=your_api_key
20
20
 
21
21
  ```typescript
22
22
  import { OpenAIRealtimeVoice } from '@mastra/voice-openai-realtime';
23
+ import { getMicrophoneStream } from '@mastra/node-audio';
23
24
 
24
25
  // Create a voice instance with default configuration
25
26
  const voice = new OpenAIRealtimeVoice();
@@ -43,7 +43,8 @@ export declare type OpenAIExecuteFunction = (args: any) => Promise<any>;
43
43
  * ```
44
44
  */
45
45
  export declare class OpenAIRealtimeVoice extends MastraVoice {
46
- private ws;
46
+ private options;
47
+ private ws?;
47
48
  private state;
48
49
  private client;
49
50
  private events;
@@ -220,6 +221,7 @@ export declare class OpenAIRealtimeVoice extends MastraVoice {
220
221
  * ```
221
222
  */
222
223
  connect(): Promise<void>;
224
+ disconnect(): void;
223
225
  /**
224
226
  * Streams audio data in real-time to the OpenAI service.
225
227
  * Useful for continuous audio streaming scenarios like live microphone input.
@@ -43,7 +43,8 @@ export declare type OpenAIExecuteFunction = (args: any) => Promise<any>;
43
43
  * ```
44
44
  */
45
45
  export declare class OpenAIRealtimeVoice extends MastraVoice {
46
- private ws;
46
+ private options;
47
+ private ws?;
47
48
  private state;
48
49
  private client;
49
50
  private events;
@@ -220,6 +221,7 @@ export declare class OpenAIRealtimeVoice extends MastraVoice {
220
221
  * ```
221
222
  */
222
223
  connect(): Promise<void>;
224
+ disconnect(): void;
223
225
  /**
224
226
  * Streams audio data in real-time to the OpenAI service.
225
227
  * Useful for continuous audio streaming scenarios like live microphone input.
package/dist/index.cjs CHANGED
@@ -76,15 +76,6 @@ var DEFAULT_URL = "wss://api.openai.com/v1/realtime";
76
76
  var DEFAULT_MODEL = "gpt-4o-mini-realtime-preview-2024-12-17";
77
77
  var VOICES = ["alloy", "ash", "ballad", "coral", "echo", "sage", "shimmer", "verse"];
78
78
  var OpenAIRealtimeVoice = class extends voice.MastraVoice {
79
- ws;
80
- state;
81
- client;
82
- events;
83
- instructions;
84
- tools;
85
- debug;
86
- queue = [];
87
- transcriber;
88
79
  /**
89
80
  * Creates a new instance of OpenAIRealtimeVoice.
90
81
  *
@@ -108,22 +99,23 @@ var OpenAIRealtimeVoice = class extends voice.MastraVoice {
108
99
  */
109
100
  constructor(options = {}) {
110
101
  super();
111
- const url = `${options.url || DEFAULT_URL}?model=${options.model || DEFAULT_MODEL}`;
112
- const apiKey = options.apiKey || process.env.OPENAI_API_KEY;
113
- this.ws = new ws.WebSocket(url, void 0, {
114
- headers: {
115
- Authorization: "Bearer " + apiKey,
116
- "OpenAI-Beta": "realtime=v1"
117
- }
118
- });
102
+ this.options = options;
119
103
  this.client = new events.EventEmitter();
120
104
  this.state = "close";
121
105
  this.events = {};
122
106
  this.speaker = options.speaker || DEFAULT_VOICE;
123
107
  this.transcriber = options.transcriber || DEFAULT_TRANSCRIBER;
124
108
  this.debug = options.debug || false;
125
- this.setupEventListeners();
126
109
  }
110
+ ws;
111
+ state;
112
+ client;
113
+ events;
114
+ instructions;
115
+ tools;
116
+ debug;
117
+ queue = [];
118
+ transcriber;
127
119
  /**
128
120
  * Returns a list of available voice speakers.
129
121
  *
@@ -300,7 +292,7 @@ var OpenAIRealtimeVoice = class extends voice.MastraVoice {
300
292
  }
301
293
  waitForOpen() {
302
294
  return new Promise((resolve) => {
303
- this.ws.on("open", resolve);
295
+ this.ws?.on("open", resolve);
304
296
  });
305
297
  }
306
298
  waitForSessionCreated() {
@@ -321,8 +313,16 @@ var OpenAIRealtimeVoice = class extends voice.MastraVoice {
321
313
  * ```
322
314
  */
323
315
  async connect() {
324
- await this.waitForOpen();
325
- await this.waitForSessionCreated();
316
+ const url = `${this.options.url || DEFAULT_URL}?model=${this.options.model || DEFAULT_MODEL}`;
317
+ const apiKey = this.options.apiKey || process.env.OPENAI_API_KEY;
318
+ this.ws = new ws.WebSocket(url, void 0, {
319
+ headers: {
320
+ Authorization: "Bearer " + apiKey,
321
+ "OpenAI-Beta": "realtime=v1"
322
+ }
323
+ });
324
+ this.setupEventListeners();
325
+ await Promise.all([this.waitForOpen(), this.waitForSessionCreated()]);
326
326
  const openaiTools = transformTools(this.tools);
327
327
  this.updateConfig({
328
328
  instructions: this.instructions,
@@ -334,6 +334,10 @@ var OpenAIRealtimeVoice = class extends voice.MastraVoice {
334
334
  });
335
335
  this.state = "open";
336
336
  }
337
+ disconnect() {
338
+ this.state = "close";
339
+ this.ws?.close();
340
+ }
337
341
  /**
338
342
  * Streams audio data in real-time to the OpenAI service.
339
343
  * Useful for continuous audio streaming scenarios like live microphone input.
@@ -466,6 +470,9 @@ var OpenAIRealtimeVoice = class extends voice.MastraVoice {
466
470
  }
467
471
  setupEventListeners() {
468
472
  const speakerStreams = /* @__PURE__ */ new Map();
473
+ if (!this.ws) {
474
+ throw new Error("WebSocket not initialized");
475
+ }
469
476
  this.ws.on("message", (message) => {
470
477
  const data = JSON.parse(message.toString());
471
478
  this.client.emit(data.type, data);
@@ -478,7 +485,7 @@ var OpenAIRealtimeVoice = class extends voice.MastraVoice {
478
485
  this.emit("session.created", ev);
479
486
  const queue = this.queue.splice(0, this.queue.length);
480
487
  for (const ev2 of queue) {
481
- this.ws.send(JSON.stringify(ev2));
488
+ this.ws?.send(JSON.stringify(ev2));
482
489
  }
483
490
  });
484
491
  this.client.on("session.updated", (ev) => {
@@ -541,16 +548,25 @@ var OpenAIRealtimeVoice = class extends voice.MastraVoice {
541
548
  console.warn(`Tool "${output.name}" not found`);
542
549
  return;
543
550
  }
551
+ if (tool?.execute) {
552
+ this.emit("tool-call-start", {
553
+ toolCallId: output.call_id,
554
+ toolName: output.name,
555
+ toolDescription: tool.description,
556
+ args: context
557
+ });
558
+ }
544
559
  const result = await tool?.execute?.(
545
560
  { context },
546
561
  {
547
- toolCallId: "unknown",
562
+ toolCallId: output.call_id,
548
563
  messages: []
549
564
  }
550
565
  );
551
- this.emit("tool-result", {
566
+ this.emit("tool-call-result", {
552
567
  toolCallId: output.call_id,
553
568
  toolName: output.name,
569
+ toolDescription: tool.description,
554
570
  args: context,
555
571
  result
556
572
  });
@@ -589,10 +605,10 @@ var OpenAIRealtimeVoice = class extends voice.MastraVoice {
589
605
  return btoa(binary);
590
606
  }
591
607
  sendEvent(type, data) {
592
- if (this.ws.readyState !== this.ws.OPEN) {
608
+ if (!this.ws || this.ws.readyState !== this.ws.OPEN) {
593
609
  this.queue.push({ type, ...data });
594
610
  } else {
595
- this.ws.send(
611
+ this.ws?.send(
596
612
  JSON.stringify({
597
613
  type,
598
614
  ...data
package/dist/index.js CHANGED
@@ -74,15 +74,6 @@ var DEFAULT_URL = "wss://api.openai.com/v1/realtime";
74
74
  var DEFAULT_MODEL = "gpt-4o-mini-realtime-preview-2024-12-17";
75
75
  var VOICES = ["alloy", "ash", "ballad", "coral", "echo", "sage", "shimmer", "verse"];
76
76
  var OpenAIRealtimeVoice = class extends MastraVoice {
77
- ws;
78
- state;
79
- client;
80
- events;
81
- instructions;
82
- tools;
83
- debug;
84
- queue = [];
85
- transcriber;
86
77
  /**
87
78
  * Creates a new instance of OpenAIRealtimeVoice.
88
79
  *
@@ -106,22 +97,23 @@ var OpenAIRealtimeVoice = class extends MastraVoice {
106
97
  */
107
98
  constructor(options = {}) {
108
99
  super();
109
- const url = `${options.url || DEFAULT_URL}?model=${options.model || DEFAULT_MODEL}`;
110
- const apiKey = options.apiKey || process.env.OPENAI_API_KEY;
111
- this.ws = new WebSocket(url, void 0, {
112
- headers: {
113
- Authorization: "Bearer " + apiKey,
114
- "OpenAI-Beta": "realtime=v1"
115
- }
116
- });
100
+ this.options = options;
117
101
  this.client = new EventEmitter();
118
102
  this.state = "close";
119
103
  this.events = {};
120
104
  this.speaker = options.speaker || DEFAULT_VOICE;
121
105
  this.transcriber = options.transcriber || DEFAULT_TRANSCRIBER;
122
106
  this.debug = options.debug || false;
123
- this.setupEventListeners();
124
107
  }
108
+ ws;
109
+ state;
110
+ client;
111
+ events;
112
+ instructions;
113
+ tools;
114
+ debug;
115
+ queue = [];
116
+ transcriber;
125
117
  /**
126
118
  * Returns a list of available voice speakers.
127
119
  *
@@ -298,7 +290,7 @@ var OpenAIRealtimeVoice = class extends MastraVoice {
298
290
  }
299
291
  waitForOpen() {
300
292
  return new Promise((resolve) => {
301
- this.ws.on("open", resolve);
293
+ this.ws?.on("open", resolve);
302
294
  });
303
295
  }
304
296
  waitForSessionCreated() {
@@ -319,8 +311,16 @@ var OpenAIRealtimeVoice = class extends MastraVoice {
319
311
  * ```
320
312
  */
321
313
  async connect() {
322
- await this.waitForOpen();
323
- await this.waitForSessionCreated();
314
+ const url = `${this.options.url || DEFAULT_URL}?model=${this.options.model || DEFAULT_MODEL}`;
315
+ const apiKey = this.options.apiKey || process.env.OPENAI_API_KEY;
316
+ this.ws = new WebSocket(url, void 0, {
317
+ headers: {
318
+ Authorization: "Bearer " + apiKey,
319
+ "OpenAI-Beta": "realtime=v1"
320
+ }
321
+ });
322
+ this.setupEventListeners();
323
+ await Promise.all([this.waitForOpen(), this.waitForSessionCreated()]);
324
324
  const openaiTools = transformTools(this.tools);
325
325
  this.updateConfig({
326
326
  instructions: this.instructions,
@@ -332,6 +332,10 @@ var OpenAIRealtimeVoice = class extends MastraVoice {
332
332
  });
333
333
  this.state = "open";
334
334
  }
335
+ disconnect() {
336
+ this.state = "close";
337
+ this.ws?.close();
338
+ }
335
339
  /**
336
340
  * Streams audio data in real-time to the OpenAI service.
337
341
  * Useful for continuous audio streaming scenarios like live microphone input.
@@ -464,6 +468,9 @@ var OpenAIRealtimeVoice = class extends MastraVoice {
464
468
  }
465
469
  setupEventListeners() {
466
470
  const speakerStreams = /* @__PURE__ */ new Map();
471
+ if (!this.ws) {
472
+ throw new Error("WebSocket not initialized");
473
+ }
467
474
  this.ws.on("message", (message) => {
468
475
  const data = JSON.parse(message.toString());
469
476
  this.client.emit(data.type, data);
@@ -476,7 +483,7 @@ var OpenAIRealtimeVoice = class extends MastraVoice {
476
483
  this.emit("session.created", ev);
477
484
  const queue = this.queue.splice(0, this.queue.length);
478
485
  for (const ev2 of queue) {
479
- this.ws.send(JSON.stringify(ev2));
486
+ this.ws?.send(JSON.stringify(ev2));
480
487
  }
481
488
  });
482
489
  this.client.on("session.updated", (ev) => {
@@ -539,16 +546,25 @@ var OpenAIRealtimeVoice = class extends MastraVoice {
539
546
  console.warn(`Tool "${output.name}" not found`);
540
547
  return;
541
548
  }
549
+ if (tool?.execute) {
550
+ this.emit("tool-call-start", {
551
+ toolCallId: output.call_id,
552
+ toolName: output.name,
553
+ toolDescription: tool.description,
554
+ args: context
555
+ });
556
+ }
542
557
  const result = await tool?.execute?.(
543
558
  { context },
544
559
  {
545
- toolCallId: "unknown",
560
+ toolCallId: output.call_id,
546
561
  messages: []
547
562
  }
548
563
  );
549
- this.emit("tool-result", {
564
+ this.emit("tool-call-result", {
550
565
  toolCallId: output.call_id,
551
566
  toolName: output.name,
567
+ toolDescription: tool.description,
552
568
  args: context,
553
569
  result
554
570
  });
@@ -587,10 +603,10 @@ var OpenAIRealtimeVoice = class extends MastraVoice {
587
603
  return btoa(binary);
588
604
  }
589
605
  sendEvent(type, data) {
590
- if (this.ws.readyState !== this.ws.OPEN) {
606
+ if (!this.ws || this.ws.readyState !== this.ws.OPEN) {
591
607
  this.queue.push({ type, ...data });
592
608
  } else {
593
- this.ws.send(
609
+ this.ws?.send(
594
610
  JSON.stringify({
595
611
  type,
596
612
  ...data
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@mastra/voice-openai-realtime",
3
- "version": "0.0.0-vnextWorkflows-20250416071310",
3
+ "version": "0.0.0-vnextWorkflows-20250417075051",
4
4
  "description": "Mastra OpenAI Realtime API integration",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",
@@ -23,7 +23,7 @@
23
23
  "openai-realtime-api": "^1.0.7",
24
24
  "ws": "^8.18.1",
25
25
  "zod-to-json-schema": "^3.24.5",
26
- "@mastra/core": "0.0.0-vnextWorkflows-20250416071310"
26
+ "@mastra/core": "0.0.0-vnextWorkflows-20250417075051"
27
27
  },
28
28
  "devDependencies": {
29
29
  "@microsoft/api-extractor": "^7.52.2",
package/src/index.ts CHANGED
@@ -106,7 +106,7 @@ type RealtimeClientServerEventMap = {
106
106
  * ```
107
107
  */
108
108
  export class OpenAIRealtimeVoice extends MastraVoice {
109
- private ws: WebSocket;
109
+ private ws?: WebSocket;
110
110
  private state: 'close' | 'open';
111
111
  private client: EventEmitter<RealtimeClientServerEventMap>;
112
112
  private events: EventMap;
@@ -138,7 +138,7 @@ export class OpenAIRealtimeVoice extends MastraVoice {
138
138
  * ```
139
139
  */
140
140
  constructor(
141
- options: {
141
+ private options: {
142
142
  model?: string;
143
143
  url?: string;
144
144
  apiKey?: string;
@@ -149,22 +149,12 @@ export class OpenAIRealtimeVoice extends MastraVoice {
149
149
  ) {
150
150
  super();
151
151
 
152
- const url = `${options.url || DEFAULT_URL}?model=${options.model || DEFAULT_MODEL}`;
153
- const apiKey = options.apiKey || process.env.OPENAI_API_KEY;
154
- this.ws = new WebSocket(url, undefined, {
155
- headers: {
156
- Authorization: 'Bearer ' + apiKey,
157
- 'OpenAI-Beta': 'realtime=v1',
158
- },
159
- });
160
-
161
152
  this.client = new EventEmitter();
162
153
  this.state = 'close';
163
154
  this.events = {} as EventMap;
164
155
  this.speaker = options.speaker || DEFAULT_VOICE;
165
156
  this.transcriber = options.transcriber || DEFAULT_TRANSCRIBER;
166
157
  this.debug = options.debug || false;
167
- this.setupEventListeners();
168
158
  }
169
159
 
170
160
  /**
@@ -355,7 +345,7 @@ export class OpenAIRealtimeVoice extends MastraVoice {
355
345
 
356
346
  waitForOpen() {
357
347
  return new Promise(resolve => {
358
- this.ws.on('open', resolve);
348
+ this.ws?.on('open', resolve);
359
349
  });
360
350
  }
361
351
 
@@ -378,8 +368,17 @@ export class OpenAIRealtimeVoice extends MastraVoice {
378
368
  * ```
379
369
  */
380
370
  async connect() {
381
- await this.waitForOpen();
382
- await this.waitForSessionCreated();
371
+ const url = `${this.options.url || DEFAULT_URL}?model=${this.options.model || DEFAULT_MODEL}`;
372
+ const apiKey = this.options.apiKey || process.env.OPENAI_API_KEY;
373
+ this.ws = new WebSocket(url, undefined, {
374
+ headers: {
375
+ Authorization: 'Bearer ' + apiKey,
376
+ 'OpenAI-Beta': 'realtime=v1',
377
+ },
378
+ });
379
+
380
+ this.setupEventListeners();
381
+ await Promise.all([this.waitForOpen(), this.waitForSessionCreated()]);
383
382
 
384
383
  const openaiTools = transformTools(this.tools);
385
384
  this.updateConfig({
@@ -393,6 +392,11 @@ export class OpenAIRealtimeVoice extends MastraVoice {
393
392
  this.state = 'open';
394
393
  }
395
394
 
395
+ disconnect() {
396
+ this.state = 'close';
397
+ this.ws?.close();
398
+ }
399
+
396
400
  /**
397
401
  * Streams audio data in real-time to the OpenAI service.
398
402
  * Useful for continuous audio streaming scenarios like live microphone input.
@@ -534,6 +538,10 @@ export class OpenAIRealtimeVoice extends MastraVoice {
534
538
  private setupEventListeners(): void {
535
539
  const speakerStreams = new Map<string, StreamWithId>();
536
540
 
541
+ if (!this.ws) {
542
+ throw new Error('WebSocket not initialized');
543
+ }
544
+
537
545
  this.ws.on('message', message => {
538
546
  const data = JSON.parse(message.toString());
539
547
  this.client.emit(data.type, data);
@@ -549,7 +557,7 @@ export class OpenAIRealtimeVoice extends MastraVoice {
549
557
 
550
558
  const queue = this.queue.splice(0, this.queue.length);
551
559
  for (const ev of queue) {
552
- this.ws.send(JSON.stringify(ev));
560
+ this.ws?.send(JSON.stringify(ev));
553
561
  }
554
562
  });
555
563
  this.client.on('session.updated', ev => {
@@ -619,19 +627,32 @@ export class OpenAIRealtimeVoice extends MastraVoice {
619
627
  console.warn(`Tool "${output.name}" not found`);
620
628
  return;
621
629
  }
630
+
631
+ if (tool?.execute) {
632
+ this.emit('tool-call-start', {
633
+ toolCallId: output.call_id,
634
+ toolName: output.name,
635
+ toolDescription: tool.description,
636
+ args: context,
637
+ });
638
+ }
639
+
622
640
  const result = await tool?.execute?.(
623
641
  { context },
624
642
  {
625
- toolCallId: 'unknown',
643
+ toolCallId: output.call_id,
626
644
  messages: [],
627
645
  },
628
646
  );
629
- this.emit('tool-result', {
647
+
648
+ this.emit('tool-call-result', {
630
649
  toolCallId: output.call_id,
631
650
  toolName: output.name,
651
+ toolDescription: tool.description,
632
652
  args: context,
633
653
  result,
634
654
  });
655
+
635
656
  this.sendEvent('conversation.item.create', {
636
657
  item: {
637
658
  type: 'function_call_output',
@@ -669,10 +690,10 @@ export class OpenAIRealtimeVoice extends MastraVoice {
669
690
  }
670
691
 
671
692
  private sendEvent(type: string, data: any) {
672
- if (this.ws.readyState !== this.ws.OPEN) {
693
+ if (!this.ws || this.ws.readyState !== this.ws.OPEN) {
673
694
  this.queue.push({ type: type, ...data });
674
695
  } else {
675
- this.ws.send(
696
+ this.ws?.send(
676
697
  JSON.stringify({
677
698
  type: type,
678
699
  ...data,