@mastra/voice-openai-realtime 0.0.0-vnextWorkflows-20250416071310 → 0.0.0-vnextWorkflows-20250417075051
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +32 -2
- package/README.md +1 -0
- package/dist/_tsup-dts-rollup.d.cts +3 -1
- package/dist/_tsup-dts-rollup.d.ts +3 -1
- package/dist/index.cjs +42 -26
- package/dist/index.js +42 -26
- package/package.json +2 -2
- package/src/index.ts +41 -20
package/CHANGELOG.md
CHANGED
|
@@ -1,17 +1,47 @@
|
|
|
1
1
|
# @mastra/voice-openai-realtime
|
|
2
2
|
|
|
3
|
-
## 0.0.0-vnextWorkflows-
|
|
3
|
+
## 0.0.0-vnextWorkflows-20250417075051
|
|
4
4
|
|
|
5
5
|
### Minor Changes
|
|
6
6
|
|
|
7
|
+
- 9aaa64b: Don't connect the ws connection until connect is called
|
|
8
|
+
- 1ef0f1f: Disconnect
|
|
7
9
|
- 185f8e9: Emit add tools and silence warnings
|
|
8
10
|
|
|
9
11
|
### Patch Changes
|
|
10
12
|
|
|
13
|
+
- 6cf1417: Voice reference and tool-call-start
|
|
14
|
+
- Updated dependencies [9ee4293]
|
|
11
15
|
- Updated dependencies [03f3cd0]
|
|
12
16
|
- Updated dependencies [8a8a73b]
|
|
17
|
+
- Updated dependencies [fe3ae4d]
|
|
13
18
|
- Updated dependencies [6f92295]
|
|
14
|
-
- @mastra/core@0.0.0-vnextWorkflows-
|
|
19
|
+
- @mastra/core@0.0.0-vnextWorkflows-20250417075051
|
|
20
|
+
|
|
21
|
+
## 0.2.0-alpha.5
|
|
22
|
+
|
|
23
|
+
### Patch Changes
|
|
24
|
+
|
|
25
|
+
- 6cf1417: Voice reference and tool-call-start
|
|
26
|
+
- Updated dependencies [fe3ae4d]
|
|
27
|
+
- @mastra/core@0.9.0-alpha.3
|
|
28
|
+
|
|
29
|
+
## 0.2.0-alpha.4
|
|
30
|
+
|
|
31
|
+
### Minor Changes
|
|
32
|
+
|
|
33
|
+
- 1ef0f1f: Disconnect
|
|
34
|
+
|
|
35
|
+
## 0.2.0-alpha.3
|
|
36
|
+
|
|
37
|
+
### Minor Changes
|
|
38
|
+
|
|
39
|
+
- 9aaa64b: Don't connect the ws connection until connect is called
|
|
40
|
+
|
|
41
|
+
### Patch Changes
|
|
42
|
+
|
|
43
|
+
- Updated dependencies [9ee4293]
|
|
44
|
+
- @mastra/core@0.8.4-alpha.2
|
|
15
45
|
|
|
16
46
|
## 0.2.0-alpha.2
|
|
17
47
|
|
package/README.md
CHANGED
|
@@ -20,6 +20,7 @@ OPENAI_API_KEY=your_api_key
|
|
|
20
20
|
|
|
21
21
|
```typescript
|
|
22
22
|
import { OpenAIRealtimeVoice } from '@mastra/voice-openai-realtime';
|
|
23
|
+
import { getMicrophoneStream } from '@mastra/node-audio';
|
|
23
24
|
|
|
24
25
|
// Create a voice instance with default configuration
|
|
25
26
|
const voice = new OpenAIRealtimeVoice();
|
|
@@ -43,7 +43,8 @@ export declare type OpenAIExecuteFunction = (args: any) => Promise<any>;
|
|
|
43
43
|
* ```
|
|
44
44
|
*/
|
|
45
45
|
export declare class OpenAIRealtimeVoice extends MastraVoice {
|
|
46
|
-
private
|
|
46
|
+
private options;
|
|
47
|
+
private ws?;
|
|
47
48
|
private state;
|
|
48
49
|
private client;
|
|
49
50
|
private events;
|
|
@@ -220,6 +221,7 @@ export declare class OpenAIRealtimeVoice extends MastraVoice {
|
|
|
220
221
|
* ```
|
|
221
222
|
*/
|
|
222
223
|
connect(): Promise<void>;
|
|
224
|
+
disconnect(): void;
|
|
223
225
|
/**
|
|
224
226
|
* Streams audio data in real-time to the OpenAI service.
|
|
225
227
|
* Useful for continuous audio streaming scenarios like live microphone input.
|
|
@@ -43,7 +43,8 @@ export declare type OpenAIExecuteFunction = (args: any) => Promise<any>;
|
|
|
43
43
|
* ```
|
|
44
44
|
*/
|
|
45
45
|
export declare class OpenAIRealtimeVoice extends MastraVoice {
|
|
46
|
-
private
|
|
46
|
+
private options;
|
|
47
|
+
private ws?;
|
|
47
48
|
private state;
|
|
48
49
|
private client;
|
|
49
50
|
private events;
|
|
@@ -220,6 +221,7 @@ export declare class OpenAIRealtimeVoice extends MastraVoice {
|
|
|
220
221
|
* ```
|
|
221
222
|
*/
|
|
222
223
|
connect(): Promise<void>;
|
|
224
|
+
disconnect(): void;
|
|
223
225
|
/**
|
|
224
226
|
* Streams audio data in real-time to the OpenAI service.
|
|
225
227
|
* Useful for continuous audio streaming scenarios like live microphone input.
|
package/dist/index.cjs
CHANGED
|
@@ -76,15 +76,6 @@ var DEFAULT_URL = "wss://api.openai.com/v1/realtime";
|
|
|
76
76
|
var DEFAULT_MODEL = "gpt-4o-mini-realtime-preview-2024-12-17";
|
|
77
77
|
var VOICES = ["alloy", "ash", "ballad", "coral", "echo", "sage", "shimmer", "verse"];
|
|
78
78
|
var OpenAIRealtimeVoice = class extends voice.MastraVoice {
|
|
79
|
-
ws;
|
|
80
|
-
state;
|
|
81
|
-
client;
|
|
82
|
-
events;
|
|
83
|
-
instructions;
|
|
84
|
-
tools;
|
|
85
|
-
debug;
|
|
86
|
-
queue = [];
|
|
87
|
-
transcriber;
|
|
88
79
|
/**
|
|
89
80
|
* Creates a new instance of OpenAIRealtimeVoice.
|
|
90
81
|
*
|
|
@@ -108,22 +99,23 @@ var OpenAIRealtimeVoice = class extends voice.MastraVoice {
|
|
|
108
99
|
*/
|
|
109
100
|
constructor(options = {}) {
|
|
110
101
|
super();
|
|
111
|
-
|
|
112
|
-
const apiKey = options.apiKey || process.env.OPENAI_API_KEY;
|
|
113
|
-
this.ws = new ws.WebSocket(url, void 0, {
|
|
114
|
-
headers: {
|
|
115
|
-
Authorization: "Bearer " + apiKey,
|
|
116
|
-
"OpenAI-Beta": "realtime=v1"
|
|
117
|
-
}
|
|
118
|
-
});
|
|
102
|
+
this.options = options;
|
|
119
103
|
this.client = new events.EventEmitter();
|
|
120
104
|
this.state = "close";
|
|
121
105
|
this.events = {};
|
|
122
106
|
this.speaker = options.speaker || DEFAULT_VOICE;
|
|
123
107
|
this.transcriber = options.transcriber || DEFAULT_TRANSCRIBER;
|
|
124
108
|
this.debug = options.debug || false;
|
|
125
|
-
this.setupEventListeners();
|
|
126
109
|
}
|
|
110
|
+
ws;
|
|
111
|
+
state;
|
|
112
|
+
client;
|
|
113
|
+
events;
|
|
114
|
+
instructions;
|
|
115
|
+
tools;
|
|
116
|
+
debug;
|
|
117
|
+
queue = [];
|
|
118
|
+
transcriber;
|
|
127
119
|
/**
|
|
128
120
|
* Returns a list of available voice speakers.
|
|
129
121
|
*
|
|
@@ -300,7 +292,7 @@ var OpenAIRealtimeVoice = class extends voice.MastraVoice {
|
|
|
300
292
|
}
|
|
301
293
|
waitForOpen() {
|
|
302
294
|
return new Promise((resolve) => {
|
|
303
|
-
this.ws
|
|
295
|
+
this.ws?.on("open", resolve);
|
|
304
296
|
});
|
|
305
297
|
}
|
|
306
298
|
waitForSessionCreated() {
|
|
@@ -321,8 +313,16 @@ var OpenAIRealtimeVoice = class extends voice.MastraVoice {
|
|
|
321
313
|
* ```
|
|
322
314
|
*/
|
|
323
315
|
async connect() {
|
|
324
|
-
|
|
325
|
-
|
|
316
|
+
const url = `${this.options.url || DEFAULT_URL}?model=${this.options.model || DEFAULT_MODEL}`;
|
|
317
|
+
const apiKey = this.options.apiKey || process.env.OPENAI_API_KEY;
|
|
318
|
+
this.ws = new ws.WebSocket(url, void 0, {
|
|
319
|
+
headers: {
|
|
320
|
+
Authorization: "Bearer " + apiKey,
|
|
321
|
+
"OpenAI-Beta": "realtime=v1"
|
|
322
|
+
}
|
|
323
|
+
});
|
|
324
|
+
this.setupEventListeners();
|
|
325
|
+
await Promise.all([this.waitForOpen(), this.waitForSessionCreated()]);
|
|
326
326
|
const openaiTools = transformTools(this.tools);
|
|
327
327
|
this.updateConfig({
|
|
328
328
|
instructions: this.instructions,
|
|
@@ -334,6 +334,10 @@ var OpenAIRealtimeVoice = class extends voice.MastraVoice {
|
|
|
334
334
|
});
|
|
335
335
|
this.state = "open";
|
|
336
336
|
}
|
|
337
|
+
disconnect() {
|
|
338
|
+
this.state = "close";
|
|
339
|
+
this.ws?.close();
|
|
340
|
+
}
|
|
337
341
|
/**
|
|
338
342
|
* Streams audio data in real-time to the OpenAI service.
|
|
339
343
|
* Useful for continuous audio streaming scenarios like live microphone input.
|
|
@@ -466,6 +470,9 @@ var OpenAIRealtimeVoice = class extends voice.MastraVoice {
|
|
|
466
470
|
}
|
|
467
471
|
setupEventListeners() {
|
|
468
472
|
const speakerStreams = /* @__PURE__ */ new Map();
|
|
473
|
+
if (!this.ws) {
|
|
474
|
+
throw new Error("WebSocket not initialized");
|
|
475
|
+
}
|
|
469
476
|
this.ws.on("message", (message) => {
|
|
470
477
|
const data = JSON.parse(message.toString());
|
|
471
478
|
this.client.emit(data.type, data);
|
|
@@ -478,7 +485,7 @@ var OpenAIRealtimeVoice = class extends voice.MastraVoice {
|
|
|
478
485
|
this.emit("session.created", ev);
|
|
479
486
|
const queue = this.queue.splice(0, this.queue.length);
|
|
480
487
|
for (const ev2 of queue) {
|
|
481
|
-
this.ws
|
|
488
|
+
this.ws?.send(JSON.stringify(ev2));
|
|
482
489
|
}
|
|
483
490
|
});
|
|
484
491
|
this.client.on("session.updated", (ev) => {
|
|
@@ -541,16 +548,25 @@ var OpenAIRealtimeVoice = class extends voice.MastraVoice {
|
|
|
541
548
|
console.warn(`Tool "${output.name}" not found`);
|
|
542
549
|
return;
|
|
543
550
|
}
|
|
551
|
+
if (tool?.execute) {
|
|
552
|
+
this.emit("tool-call-start", {
|
|
553
|
+
toolCallId: output.call_id,
|
|
554
|
+
toolName: output.name,
|
|
555
|
+
toolDescription: tool.description,
|
|
556
|
+
args: context
|
|
557
|
+
});
|
|
558
|
+
}
|
|
544
559
|
const result = await tool?.execute?.(
|
|
545
560
|
{ context },
|
|
546
561
|
{
|
|
547
|
-
toolCallId:
|
|
562
|
+
toolCallId: output.call_id,
|
|
548
563
|
messages: []
|
|
549
564
|
}
|
|
550
565
|
);
|
|
551
|
-
this.emit("tool-result", {
|
|
566
|
+
this.emit("tool-call-result", {
|
|
552
567
|
toolCallId: output.call_id,
|
|
553
568
|
toolName: output.name,
|
|
569
|
+
toolDescription: tool.description,
|
|
554
570
|
args: context,
|
|
555
571
|
result
|
|
556
572
|
});
|
|
@@ -589,10 +605,10 @@ var OpenAIRealtimeVoice = class extends voice.MastraVoice {
|
|
|
589
605
|
return btoa(binary);
|
|
590
606
|
}
|
|
591
607
|
sendEvent(type, data) {
|
|
592
|
-
if (this.ws.readyState !== this.ws.OPEN) {
|
|
608
|
+
if (!this.ws || this.ws.readyState !== this.ws.OPEN) {
|
|
593
609
|
this.queue.push({ type, ...data });
|
|
594
610
|
} else {
|
|
595
|
-
this.ws
|
|
611
|
+
this.ws?.send(
|
|
596
612
|
JSON.stringify({
|
|
597
613
|
type,
|
|
598
614
|
...data
|
package/dist/index.js
CHANGED
|
@@ -74,15 +74,6 @@ var DEFAULT_URL = "wss://api.openai.com/v1/realtime";
|
|
|
74
74
|
var DEFAULT_MODEL = "gpt-4o-mini-realtime-preview-2024-12-17";
|
|
75
75
|
var VOICES = ["alloy", "ash", "ballad", "coral", "echo", "sage", "shimmer", "verse"];
|
|
76
76
|
var OpenAIRealtimeVoice = class extends MastraVoice {
|
|
77
|
-
ws;
|
|
78
|
-
state;
|
|
79
|
-
client;
|
|
80
|
-
events;
|
|
81
|
-
instructions;
|
|
82
|
-
tools;
|
|
83
|
-
debug;
|
|
84
|
-
queue = [];
|
|
85
|
-
transcriber;
|
|
86
77
|
/**
|
|
87
78
|
* Creates a new instance of OpenAIRealtimeVoice.
|
|
88
79
|
*
|
|
@@ -106,22 +97,23 @@ var OpenAIRealtimeVoice = class extends MastraVoice {
|
|
|
106
97
|
*/
|
|
107
98
|
constructor(options = {}) {
|
|
108
99
|
super();
|
|
109
|
-
|
|
110
|
-
const apiKey = options.apiKey || process.env.OPENAI_API_KEY;
|
|
111
|
-
this.ws = new WebSocket(url, void 0, {
|
|
112
|
-
headers: {
|
|
113
|
-
Authorization: "Bearer " + apiKey,
|
|
114
|
-
"OpenAI-Beta": "realtime=v1"
|
|
115
|
-
}
|
|
116
|
-
});
|
|
100
|
+
this.options = options;
|
|
117
101
|
this.client = new EventEmitter();
|
|
118
102
|
this.state = "close";
|
|
119
103
|
this.events = {};
|
|
120
104
|
this.speaker = options.speaker || DEFAULT_VOICE;
|
|
121
105
|
this.transcriber = options.transcriber || DEFAULT_TRANSCRIBER;
|
|
122
106
|
this.debug = options.debug || false;
|
|
123
|
-
this.setupEventListeners();
|
|
124
107
|
}
|
|
108
|
+
ws;
|
|
109
|
+
state;
|
|
110
|
+
client;
|
|
111
|
+
events;
|
|
112
|
+
instructions;
|
|
113
|
+
tools;
|
|
114
|
+
debug;
|
|
115
|
+
queue = [];
|
|
116
|
+
transcriber;
|
|
125
117
|
/**
|
|
126
118
|
* Returns a list of available voice speakers.
|
|
127
119
|
*
|
|
@@ -298,7 +290,7 @@ var OpenAIRealtimeVoice = class extends MastraVoice {
|
|
|
298
290
|
}
|
|
299
291
|
waitForOpen() {
|
|
300
292
|
return new Promise((resolve) => {
|
|
301
|
-
this.ws
|
|
293
|
+
this.ws?.on("open", resolve);
|
|
302
294
|
});
|
|
303
295
|
}
|
|
304
296
|
waitForSessionCreated() {
|
|
@@ -319,8 +311,16 @@ var OpenAIRealtimeVoice = class extends MastraVoice {
|
|
|
319
311
|
* ```
|
|
320
312
|
*/
|
|
321
313
|
async connect() {
|
|
322
|
-
|
|
323
|
-
|
|
314
|
+
const url = `${this.options.url || DEFAULT_URL}?model=${this.options.model || DEFAULT_MODEL}`;
|
|
315
|
+
const apiKey = this.options.apiKey || process.env.OPENAI_API_KEY;
|
|
316
|
+
this.ws = new WebSocket(url, void 0, {
|
|
317
|
+
headers: {
|
|
318
|
+
Authorization: "Bearer " + apiKey,
|
|
319
|
+
"OpenAI-Beta": "realtime=v1"
|
|
320
|
+
}
|
|
321
|
+
});
|
|
322
|
+
this.setupEventListeners();
|
|
323
|
+
await Promise.all([this.waitForOpen(), this.waitForSessionCreated()]);
|
|
324
324
|
const openaiTools = transformTools(this.tools);
|
|
325
325
|
this.updateConfig({
|
|
326
326
|
instructions: this.instructions,
|
|
@@ -332,6 +332,10 @@ var OpenAIRealtimeVoice = class extends MastraVoice {
|
|
|
332
332
|
});
|
|
333
333
|
this.state = "open";
|
|
334
334
|
}
|
|
335
|
+
disconnect() {
|
|
336
|
+
this.state = "close";
|
|
337
|
+
this.ws?.close();
|
|
338
|
+
}
|
|
335
339
|
/**
|
|
336
340
|
* Streams audio data in real-time to the OpenAI service.
|
|
337
341
|
* Useful for continuous audio streaming scenarios like live microphone input.
|
|
@@ -464,6 +468,9 @@ var OpenAIRealtimeVoice = class extends MastraVoice {
|
|
|
464
468
|
}
|
|
465
469
|
setupEventListeners() {
|
|
466
470
|
const speakerStreams = /* @__PURE__ */ new Map();
|
|
471
|
+
if (!this.ws) {
|
|
472
|
+
throw new Error("WebSocket not initialized");
|
|
473
|
+
}
|
|
467
474
|
this.ws.on("message", (message) => {
|
|
468
475
|
const data = JSON.parse(message.toString());
|
|
469
476
|
this.client.emit(data.type, data);
|
|
@@ -476,7 +483,7 @@ var OpenAIRealtimeVoice = class extends MastraVoice {
|
|
|
476
483
|
this.emit("session.created", ev);
|
|
477
484
|
const queue = this.queue.splice(0, this.queue.length);
|
|
478
485
|
for (const ev2 of queue) {
|
|
479
|
-
this.ws
|
|
486
|
+
this.ws?.send(JSON.stringify(ev2));
|
|
480
487
|
}
|
|
481
488
|
});
|
|
482
489
|
this.client.on("session.updated", (ev) => {
|
|
@@ -539,16 +546,25 @@ var OpenAIRealtimeVoice = class extends MastraVoice {
|
|
|
539
546
|
console.warn(`Tool "${output.name}" not found`);
|
|
540
547
|
return;
|
|
541
548
|
}
|
|
549
|
+
if (tool?.execute) {
|
|
550
|
+
this.emit("tool-call-start", {
|
|
551
|
+
toolCallId: output.call_id,
|
|
552
|
+
toolName: output.name,
|
|
553
|
+
toolDescription: tool.description,
|
|
554
|
+
args: context
|
|
555
|
+
});
|
|
556
|
+
}
|
|
542
557
|
const result = await tool?.execute?.(
|
|
543
558
|
{ context },
|
|
544
559
|
{
|
|
545
|
-
toolCallId:
|
|
560
|
+
toolCallId: output.call_id,
|
|
546
561
|
messages: []
|
|
547
562
|
}
|
|
548
563
|
);
|
|
549
|
-
this.emit("tool-result", {
|
|
564
|
+
this.emit("tool-call-result", {
|
|
550
565
|
toolCallId: output.call_id,
|
|
551
566
|
toolName: output.name,
|
|
567
|
+
toolDescription: tool.description,
|
|
552
568
|
args: context,
|
|
553
569
|
result
|
|
554
570
|
});
|
|
@@ -587,10 +603,10 @@ var OpenAIRealtimeVoice = class extends MastraVoice {
|
|
|
587
603
|
return btoa(binary);
|
|
588
604
|
}
|
|
589
605
|
sendEvent(type, data) {
|
|
590
|
-
if (this.ws.readyState !== this.ws.OPEN) {
|
|
606
|
+
if (!this.ws || this.ws.readyState !== this.ws.OPEN) {
|
|
591
607
|
this.queue.push({ type, ...data });
|
|
592
608
|
} else {
|
|
593
|
-
this.ws
|
|
609
|
+
this.ws?.send(
|
|
594
610
|
JSON.stringify({
|
|
595
611
|
type,
|
|
596
612
|
...data
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@mastra/voice-openai-realtime",
|
|
3
|
-
"version": "0.0.0-vnextWorkflows-
|
|
3
|
+
"version": "0.0.0-vnextWorkflows-20250417075051",
|
|
4
4
|
"description": "Mastra OpenAI Realtime API integration",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "dist/index.js",
|
|
@@ -23,7 +23,7 @@
|
|
|
23
23
|
"openai-realtime-api": "^1.0.7",
|
|
24
24
|
"ws": "^8.18.1",
|
|
25
25
|
"zod-to-json-schema": "^3.24.5",
|
|
26
|
-
"@mastra/core": "0.0.0-vnextWorkflows-
|
|
26
|
+
"@mastra/core": "0.0.0-vnextWorkflows-20250417075051"
|
|
27
27
|
},
|
|
28
28
|
"devDependencies": {
|
|
29
29
|
"@microsoft/api-extractor": "^7.52.2",
|
package/src/index.ts
CHANGED
|
@@ -106,7 +106,7 @@ type RealtimeClientServerEventMap = {
|
|
|
106
106
|
* ```
|
|
107
107
|
*/
|
|
108
108
|
export class OpenAIRealtimeVoice extends MastraVoice {
|
|
109
|
-
private ws
|
|
109
|
+
private ws?: WebSocket;
|
|
110
110
|
private state: 'close' | 'open';
|
|
111
111
|
private client: EventEmitter<RealtimeClientServerEventMap>;
|
|
112
112
|
private events: EventMap;
|
|
@@ -138,7 +138,7 @@ export class OpenAIRealtimeVoice extends MastraVoice {
|
|
|
138
138
|
* ```
|
|
139
139
|
*/
|
|
140
140
|
constructor(
|
|
141
|
-
options: {
|
|
141
|
+
private options: {
|
|
142
142
|
model?: string;
|
|
143
143
|
url?: string;
|
|
144
144
|
apiKey?: string;
|
|
@@ -149,22 +149,12 @@ export class OpenAIRealtimeVoice extends MastraVoice {
|
|
|
149
149
|
) {
|
|
150
150
|
super();
|
|
151
151
|
|
|
152
|
-
const url = `${options.url || DEFAULT_URL}?model=${options.model || DEFAULT_MODEL}`;
|
|
153
|
-
const apiKey = options.apiKey || process.env.OPENAI_API_KEY;
|
|
154
|
-
this.ws = new WebSocket(url, undefined, {
|
|
155
|
-
headers: {
|
|
156
|
-
Authorization: 'Bearer ' + apiKey,
|
|
157
|
-
'OpenAI-Beta': 'realtime=v1',
|
|
158
|
-
},
|
|
159
|
-
});
|
|
160
|
-
|
|
161
152
|
this.client = new EventEmitter();
|
|
162
153
|
this.state = 'close';
|
|
163
154
|
this.events = {} as EventMap;
|
|
164
155
|
this.speaker = options.speaker || DEFAULT_VOICE;
|
|
165
156
|
this.transcriber = options.transcriber || DEFAULT_TRANSCRIBER;
|
|
166
157
|
this.debug = options.debug || false;
|
|
167
|
-
this.setupEventListeners();
|
|
168
158
|
}
|
|
169
159
|
|
|
170
160
|
/**
|
|
@@ -355,7 +345,7 @@ export class OpenAIRealtimeVoice extends MastraVoice {
|
|
|
355
345
|
|
|
356
346
|
waitForOpen() {
|
|
357
347
|
return new Promise(resolve => {
|
|
358
|
-
this.ws
|
|
348
|
+
this.ws?.on('open', resolve);
|
|
359
349
|
});
|
|
360
350
|
}
|
|
361
351
|
|
|
@@ -378,8 +368,17 @@ export class OpenAIRealtimeVoice extends MastraVoice {
|
|
|
378
368
|
* ```
|
|
379
369
|
*/
|
|
380
370
|
async connect() {
|
|
381
|
-
|
|
382
|
-
|
|
371
|
+
const url = `${this.options.url || DEFAULT_URL}?model=${this.options.model || DEFAULT_MODEL}`;
|
|
372
|
+
const apiKey = this.options.apiKey || process.env.OPENAI_API_KEY;
|
|
373
|
+
this.ws = new WebSocket(url, undefined, {
|
|
374
|
+
headers: {
|
|
375
|
+
Authorization: 'Bearer ' + apiKey,
|
|
376
|
+
'OpenAI-Beta': 'realtime=v1',
|
|
377
|
+
},
|
|
378
|
+
});
|
|
379
|
+
|
|
380
|
+
this.setupEventListeners();
|
|
381
|
+
await Promise.all([this.waitForOpen(), this.waitForSessionCreated()]);
|
|
383
382
|
|
|
384
383
|
const openaiTools = transformTools(this.tools);
|
|
385
384
|
this.updateConfig({
|
|
@@ -393,6 +392,11 @@ export class OpenAIRealtimeVoice extends MastraVoice {
|
|
|
393
392
|
this.state = 'open';
|
|
394
393
|
}
|
|
395
394
|
|
|
395
|
+
disconnect() {
|
|
396
|
+
this.state = 'close';
|
|
397
|
+
this.ws?.close();
|
|
398
|
+
}
|
|
399
|
+
|
|
396
400
|
/**
|
|
397
401
|
* Streams audio data in real-time to the OpenAI service.
|
|
398
402
|
* Useful for continuous audio streaming scenarios like live microphone input.
|
|
@@ -534,6 +538,10 @@ export class OpenAIRealtimeVoice extends MastraVoice {
|
|
|
534
538
|
private setupEventListeners(): void {
|
|
535
539
|
const speakerStreams = new Map<string, StreamWithId>();
|
|
536
540
|
|
|
541
|
+
if (!this.ws) {
|
|
542
|
+
throw new Error('WebSocket not initialized');
|
|
543
|
+
}
|
|
544
|
+
|
|
537
545
|
this.ws.on('message', message => {
|
|
538
546
|
const data = JSON.parse(message.toString());
|
|
539
547
|
this.client.emit(data.type, data);
|
|
@@ -549,7 +557,7 @@ export class OpenAIRealtimeVoice extends MastraVoice {
|
|
|
549
557
|
|
|
550
558
|
const queue = this.queue.splice(0, this.queue.length);
|
|
551
559
|
for (const ev of queue) {
|
|
552
|
-
this.ws
|
|
560
|
+
this.ws?.send(JSON.stringify(ev));
|
|
553
561
|
}
|
|
554
562
|
});
|
|
555
563
|
this.client.on('session.updated', ev => {
|
|
@@ -619,19 +627,32 @@ export class OpenAIRealtimeVoice extends MastraVoice {
|
|
|
619
627
|
console.warn(`Tool "${output.name}" not found`);
|
|
620
628
|
return;
|
|
621
629
|
}
|
|
630
|
+
|
|
631
|
+
if (tool?.execute) {
|
|
632
|
+
this.emit('tool-call-start', {
|
|
633
|
+
toolCallId: output.call_id,
|
|
634
|
+
toolName: output.name,
|
|
635
|
+
toolDescription: tool.description,
|
|
636
|
+
args: context,
|
|
637
|
+
});
|
|
638
|
+
}
|
|
639
|
+
|
|
622
640
|
const result = await tool?.execute?.(
|
|
623
641
|
{ context },
|
|
624
642
|
{
|
|
625
|
-
toolCallId:
|
|
643
|
+
toolCallId: output.call_id,
|
|
626
644
|
messages: [],
|
|
627
645
|
},
|
|
628
646
|
);
|
|
629
|
-
|
|
647
|
+
|
|
648
|
+
this.emit('tool-call-result', {
|
|
630
649
|
toolCallId: output.call_id,
|
|
631
650
|
toolName: output.name,
|
|
651
|
+
toolDescription: tool.description,
|
|
632
652
|
args: context,
|
|
633
653
|
result,
|
|
634
654
|
});
|
|
655
|
+
|
|
635
656
|
this.sendEvent('conversation.item.create', {
|
|
636
657
|
item: {
|
|
637
658
|
type: 'function_call_output',
|
|
@@ -669,10 +690,10 @@ export class OpenAIRealtimeVoice extends MastraVoice {
|
|
|
669
690
|
}
|
|
670
691
|
|
|
671
692
|
private sendEvent(type: string, data: any) {
|
|
672
|
-
if (this.ws.readyState !== this.ws.OPEN) {
|
|
693
|
+
if (!this.ws || this.ws.readyState !== this.ws.OPEN) {
|
|
673
694
|
this.queue.push({ type: type, ...data });
|
|
674
695
|
} else {
|
|
675
|
-
this.ws
|
|
696
|
+
this.ws?.send(
|
|
676
697
|
JSON.stringify({
|
|
677
698
|
type: type,
|
|
678
699
|
...data,
|