@mastra/voice-openai-realtime 0.0.4 → 0.1.0-alpha.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +7 -7
- package/CHANGELOG.md +20 -0
- package/dist/_tsup-dts-rollup.d.cts +18 -18
- package/dist/_tsup-dts-rollup.d.ts +18 -18
- package/dist/index.cjs +158 -62
- package/dist/index.js +159 -63
- package/package.json +6 -4
- package/src/index.ts +188 -74
- package/src/utils.ts +1 -0
package/.turbo/turbo-build.log
CHANGED
|
@@ -1,23 +1,23 @@
|
|
|
1
1
|
|
|
2
|
-
> @mastra/voice-openai-realtime@0.0
|
|
2
|
+
> @mastra/voice-openai-realtime@0.1.0-alpha.1 build /home/runner/work/mastra/mastra/voice/openai-realtime-api
|
|
3
3
|
> tsup src/index.ts --format esm,cjs --experimental-dts --clean --treeshake
|
|
4
4
|
|
|
5
5
|
[34mCLI[39m Building entry: src/index.ts
|
|
6
6
|
[34mCLI[39m Using tsconfig: tsconfig.json
|
|
7
7
|
[34mCLI[39m tsup v8.4.0
|
|
8
8
|
[34mTSC[39m Build start
|
|
9
|
-
[32mTSC[39m ⚡️ Build success in
|
|
9
|
+
[32mTSC[39m ⚡️ Build success in 9409ms
|
|
10
10
|
[34mDTS[39m Build start
|
|
11
11
|
[34mCLI[39m Target: es2022
|
|
12
12
|
Analysis will use the bundled TypeScript version 5.8.2
|
|
13
13
|
[36mWriting package typings: /home/runner/work/mastra/mastra/voice/openai-realtime-api/dist/_tsup-dts-rollup.d.ts[39m
|
|
14
14
|
Analysis will use the bundled TypeScript version 5.8.2
|
|
15
15
|
[36mWriting package typings: /home/runner/work/mastra/mastra/voice/openai-realtime-api/dist/_tsup-dts-rollup.d.cts[39m
|
|
16
|
-
[32mDTS[39m ⚡️ Build success in
|
|
16
|
+
[32mDTS[39m ⚡️ Build success in 10803ms
|
|
17
17
|
[34mCLI[39m Cleaning output folder
|
|
18
18
|
[34mESM[39m Build start
|
|
19
19
|
[34mCJS[39m Build start
|
|
20
|
-
[
|
|
21
|
-
[
|
|
22
|
-
[
|
|
23
|
-
[
|
|
20
|
+
[32mCJS[39m [1mdist/index.cjs [22m[32m17.77 KB[39m
|
|
21
|
+
[32mCJS[39m ⚡️ Build success in 694ms
|
|
22
|
+
[32mESM[39m [1mdist/index.js [22m[32m17.72 KB[39m
|
|
23
|
+
[32mESM[39m ⚡️ Build success in 695ms
|
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,25 @@
|
|
|
1
1
|
# @mastra/voice-openai-realtime
|
|
2
2
|
|
|
3
|
+
## 0.1.0-alpha.1
|
|
4
|
+
|
|
5
|
+
### Minor Changes
|
|
6
|
+
|
|
7
|
+
- 443b118: This update removed an external dependency on an unmaintained package and implemented a native websocket connection.
|
|
8
|
+
|
|
9
|
+
### Patch Changes
|
|
10
|
+
|
|
11
|
+
- Updated dependencies [0b54522]
|
|
12
|
+
- Updated dependencies [1af25d5]
|
|
13
|
+
- Updated dependencies [27439ad]
|
|
14
|
+
- @mastra/core@0.7.0-alpha.1
|
|
15
|
+
|
|
16
|
+
## 0.0.5-alpha.0
|
|
17
|
+
|
|
18
|
+
### Patch Changes
|
|
19
|
+
|
|
20
|
+
- Updated dependencies [b4fbc59]
|
|
21
|
+
- @mastra/core@0.6.5-alpha.0
|
|
22
|
+
|
|
3
23
|
## 0.0.4
|
|
4
24
|
|
|
5
25
|
### Patch Changes
|
|
@@ -43,10 +43,13 @@ export declare type OpenAIExecuteFunction = (args: any) => Promise<any>;
|
|
|
43
43
|
* ```
|
|
44
44
|
*/
|
|
45
45
|
export declare class OpenAIRealtimeVoice extends MastraVoice {
|
|
46
|
-
private
|
|
46
|
+
private ws;
|
|
47
47
|
private state;
|
|
48
|
+
private client;
|
|
48
49
|
private events;
|
|
49
|
-
|
|
50
|
+
private instructions?;
|
|
51
|
+
private tools?;
|
|
52
|
+
private debug;
|
|
50
53
|
/**
|
|
51
54
|
* Creates a new instance of OpenAIRealtimeVoice.
|
|
52
55
|
*
|
|
@@ -55,13 +58,8 @@ export declare class OpenAIRealtimeVoice extends MastraVoice {
|
|
|
55
58
|
* @param options.chatModel.model - The model ID to use (defaults to GPT-4 Mini Realtime)
|
|
56
59
|
* @param options.chatModel.apiKey - OpenAI API key. Falls back to process.env.OPENAI_API_KEY
|
|
57
60
|
* @param options.chatModel.tools - Tools configuration for the model
|
|
58
|
-
* @param options.chatModel.options - Additional options for the realtime client
|
|
59
|
-
* @param options.chatModel.options.sessionConfig - Session configuration overrides
|
|
60
|
-
* @param options.chatModel.options.url - Custom WebSocket URL
|
|
61
|
-
* @param options.chatModel.options.dangerouslyAllowAPIKeyInBrowser - Whether to allow API key in browser
|
|
62
|
-
* @param options.chatModel.options.debug - Enable debug logging
|
|
63
|
-
* @param options.chatModel.options.tools - Additional tools configuration
|
|
64
61
|
* @param options.speaker - Voice ID to use (defaults to 'alloy')
|
|
62
|
+
* @param options.debug - Enable debug mode
|
|
65
63
|
*
|
|
66
64
|
* @example
|
|
67
65
|
* ```typescript
|
|
@@ -74,20 +72,16 @@ export declare class OpenAIRealtimeVoice extends MastraVoice {
|
|
|
74
72
|
* });
|
|
75
73
|
* ```
|
|
76
74
|
*/
|
|
77
|
-
constructor({ chatModel, speaker, }?: {
|
|
75
|
+
constructor({ chatModel, speaker, debug, }?: {
|
|
78
76
|
chatModel?: {
|
|
79
77
|
model?: string;
|
|
80
78
|
apiKey?: string;
|
|
81
79
|
tools?: TTools;
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
url?: string;
|
|
85
|
-
dangerouslyAllowAPIKeyInBrowser?: boolean;
|
|
86
|
-
debug?: boolean;
|
|
87
|
-
tools?: TTools;
|
|
88
|
-
};
|
|
80
|
+
instructions?: string;
|
|
81
|
+
url?: string;
|
|
89
82
|
};
|
|
90
83
|
speaker?: Realtime.Voice;
|
|
84
|
+
debug?: boolean;
|
|
91
85
|
});
|
|
92
86
|
/**
|
|
93
87
|
* Returns a list of available voice speakers.
|
|
@@ -175,7 +169,7 @@ export declare class OpenAIRealtimeVoice extends MastraVoice {
|
|
|
175
169
|
* });
|
|
176
170
|
* ```
|
|
177
171
|
*/
|
|
178
|
-
updateConfig(sessionConfig:
|
|
172
|
+
updateConfig(sessionConfig: unknown): void;
|
|
179
173
|
/**
|
|
180
174
|
* Processes audio input for speech recognition.
|
|
181
175
|
* Takes a readable stream of audio data and emits a writing event.
|
|
@@ -200,6 +194,8 @@ export declare class OpenAIRealtimeVoice extends MastraVoice {
|
|
|
200
194
|
* ```
|
|
201
195
|
*/
|
|
202
196
|
listen(audioData: NodeJS.ReadableStream): Promise<void>;
|
|
197
|
+
waitForOpen(): Promise<unknown>;
|
|
198
|
+
waitForSessionCreated(): Promise<unknown>;
|
|
203
199
|
/**
|
|
204
200
|
* Establishes a connection to the OpenAI realtime service.
|
|
205
201
|
* Must be called before using speak, listen, or relay functions.
|
|
@@ -231,7 +227,7 @@ export declare class OpenAIRealtimeVoice extends MastraVoice {
|
|
|
231
227
|
* await voice.relay(micStream);
|
|
232
228
|
* ```
|
|
233
229
|
*/
|
|
234
|
-
send(audioData: NodeJS.ReadableStream | Int16Array): Promise<void>;
|
|
230
|
+
send(audioData: NodeJS.ReadableStream | Int16Array, eventId?: string): Promise<void>;
|
|
235
231
|
/**
|
|
236
232
|
* Sends a response to the OpenAI Realtime API.
|
|
237
233
|
*
|
|
@@ -304,11 +300,15 @@ export declare class OpenAIRealtimeVoice extends MastraVoice {
|
|
|
304
300
|
*/
|
|
305
301
|
private emit;
|
|
306
302
|
private setupEventListeners;
|
|
303
|
+
private handleFunctionCalls;
|
|
304
|
+
private handleFunctionCall;
|
|
307
305
|
private int16ArrayToBase64;
|
|
306
|
+
private sendEvent;
|
|
308
307
|
}
|
|
309
308
|
|
|
310
309
|
export declare const transformTools: (tools?: TTools_2) => {
|
|
311
310
|
openaiTool: {
|
|
311
|
+
type: string;
|
|
312
312
|
name: string;
|
|
313
313
|
description: string;
|
|
314
314
|
parameters: {
|
|
@@ -43,10 +43,13 @@ export declare type OpenAIExecuteFunction = (args: any) => Promise<any>;
|
|
|
43
43
|
* ```
|
|
44
44
|
*/
|
|
45
45
|
export declare class OpenAIRealtimeVoice extends MastraVoice {
|
|
46
|
-
private
|
|
46
|
+
private ws;
|
|
47
47
|
private state;
|
|
48
|
+
private client;
|
|
48
49
|
private events;
|
|
49
|
-
|
|
50
|
+
private instructions?;
|
|
51
|
+
private tools?;
|
|
52
|
+
private debug;
|
|
50
53
|
/**
|
|
51
54
|
* Creates a new instance of OpenAIRealtimeVoice.
|
|
52
55
|
*
|
|
@@ -55,13 +58,8 @@ export declare class OpenAIRealtimeVoice extends MastraVoice {
|
|
|
55
58
|
* @param options.chatModel.model - The model ID to use (defaults to GPT-4 Mini Realtime)
|
|
56
59
|
* @param options.chatModel.apiKey - OpenAI API key. Falls back to process.env.OPENAI_API_KEY
|
|
57
60
|
* @param options.chatModel.tools - Tools configuration for the model
|
|
58
|
-
* @param options.chatModel.options - Additional options for the realtime client
|
|
59
|
-
* @param options.chatModel.options.sessionConfig - Session configuration overrides
|
|
60
|
-
* @param options.chatModel.options.url - Custom WebSocket URL
|
|
61
|
-
* @param options.chatModel.options.dangerouslyAllowAPIKeyInBrowser - Whether to allow API key in browser
|
|
62
|
-
* @param options.chatModel.options.debug - Enable debug logging
|
|
63
|
-
* @param options.chatModel.options.tools - Additional tools configuration
|
|
64
61
|
* @param options.speaker - Voice ID to use (defaults to 'alloy')
|
|
62
|
+
* @param options.debug - Enable debug mode
|
|
65
63
|
*
|
|
66
64
|
* @example
|
|
67
65
|
* ```typescript
|
|
@@ -74,20 +72,16 @@ export declare class OpenAIRealtimeVoice extends MastraVoice {
|
|
|
74
72
|
* });
|
|
75
73
|
* ```
|
|
76
74
|
*/
|
|
77
|
-
constructor({ chatModel, speaker, }?: {
|
|
75
|
+
constructor({ chatModel, speaker, debug, }?: {
|
|
78
76
|
chatModel?: {
|
|
79
77
|
model?: string;
|
|
80
78
|
apiKey?: string;
|
|
81
79
|
tools?: TTools;
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
url?: string;
|
|
85
|
-
dangerouslyAllowAPIKeyInBrowser?: boolean;
|
|
86
|
-
debug?: boolean;
|
|
87
|
-
tools?: TTools;
|
|
88
|
-
};
|
|
80
|
+
instructions?: string;
|
|
81
|
+
url?: string;
|
|
89
82
|
};
|
|
90
83
|
speaker?: Realtime.Voice;
|
|
84
|
+
debug?: boolean;
|
|
91
85
|
});
|
|
92
86
|
/**
|
|
93
87
|
* Returns a list of available voice speakers.
|
|
@@ -175,7 +169,7 @@ export declare class OpenAIRealtimeVoice extends MastraVoice {
|
|
|
175
169
|
* });
|
|
176
170
|
* ```
|
|
177
171
|
*/
|
|
178
|
-
updateConfig(sessionConfig:
|
|
172
|
+
updateConfig(sessionConfig: unknown): void;
|
|
179
173
|
/**
|
|
180
174
|
* Processes audio input for speech recognition.
|
|
181
175
|
* Takes a readable stream of audio data and emits a writing event.
|
|
@@ -200,6 +194,8 @@ export declare class OpenAIRealtimeVoice extends MastraVoice {
|
|
|
200
194
|
* ```
|
|
201
195
|
*/
|
|
202
196
|
listen(audioData: NodeJS.ReadableStream): Promise<void>;
|
|
197
|
+
waitForOpen(): Promise<unknown>;
|
|
198
|
+
waitForSessionCreated(): Promise<unknown>;
|
|
203
199
|
/**
|
|
204
200
|
* Establishes a connection to the OpenAI realtime service.
|
|
205
201
|
* Must be called before using speak, listen, or relay functions.
|
|
@@ -231,7 +227,7 @@ export declare class OpenAIRealtimeVoice extends MastraVoice {
|
|
|
231
227
|
* await voice.relay(micStream);
|
|
232
228
|
* ```
|
|
233
229
|
*/
|
|
234
|
-
send(audioData: NodeJS.ReadableStream | Int16Array): Promise<void>;
|
|
230
|
+
send(audioData: NodeJS.ReadableStream | Int16Array, eventId?: string): Promise<void>;
|
|
235
231
|
/**
|
|
236
232
|
* Sends a response to the OpenAI Realtime API.
|
|
237
233
|
*
|
|
@@ -304,11 +300,15 @@ export declare class OpenAIRealtimeVoice extends MastraVoice {
|
|
|
304
300
|
*/
|
|
305
301
|
private emit;
|
|
306
302
|
private setupEventListeners;
|
|
303
|
+
private handleFunctionCalls;
|
|
304
|
+
private handleFunctionCall;
|
|
307
305
|
private int16ArrayToBase64;
|
|
306
|
+
private sendEvent;
|
|
308
307
|
}
|
|
309
308
|
|
|
310
309
|
export declare const transformTools: (tools?: TTools_2) => {
|
|
311
310
|
openaiTool: {
|
|
311
|
+
type: string;
|
|
312
312
|
name: string;
|
|
313
313
|
description: string;
|
|
314
314
|
parameters: {
|
package/dist/index.cjs
CHANGED
|
@@ -1,9 +1,10 @@
|
|
|
1
1
|
'use strict';
|
|
2
2
|
|
|
3
3
|
var voice = require('@mastra/core/voice');
|
|
4
|
-
var openaiRealtimeApi = require('openai-realtime-api');
|
|
5
4
|
var stream = require('stream');
|
|
6
5
|
var zodToJsonSchema = require('zod-to-json-schema');
|
|
6
|
+
var ws = require('ws');
|
|
7
|
+
var events = require('events');
|
|
7
8
|
|
|
8
9
|
// src/index.ts
|
|
9
10
|
var transformTools = (tools) => {
|
|
@@ -29,6 +30,7 @@ var transformTools = (tools) => {
|
|
|
29
30
|
continue;
|
|
30
31
|
}
|
|
31
32
|
const openaiTool = {
|
|
33
|
+
type: "function",
|
|
32
34
|
name,
|
|
33
35
|
description: tool.description || `Tool: ${name}`,
|
|
34
36
|
parameters
|
|
@@ -63,22 +65,18 @@ var transformTools = (tools) => {
|
|
|
63
65
|
var isReadableStream = (obj) => {
|
|
64
66
|
return obj && obj instanceof stream.Readable && typeof obj.read === "function" && typeof obj.pipe === "function" && obj.readable === true;
|
|
65
67
|
};
|
|
66
|
-
|
|
67
|
-
// src/index.ts
|
|
68
68
|
var DEFAULT_VOICE = "alloy";
|
|
69
|
+
var DEFAULT_URL = "wss://api.openai.com/v1/realtime";
|
|
69
70
|
var DEFAULT_MODEL = "gpt-4o-mini-realtime-preview-2024-12-17";
|
|
70
|
-
var DEFAULT_VAD_CONFIG = {
|
|
71
|
-
type: "server_vad",
|
|
72
|
-
threshold: 0.5,
|
|
73
|
-
prefix_padding_ms: 1e3,
|
|
74
|
-
silence_duration_ms: 1e3
|
|
75
|
-
};
|
|
76
71
|
var VOICES = ["alloy", "ash", "ballad", "coral", "echo", "sage", "shimmer", "verse"];
|
|
77
72
|
var OpenAIRealtimeVoice = class extends voice.MastraVoice {
|
|
78
|
-
|
|
73
|
+
ws;
|
|
79
74
|
state;
|
|
75
|
+
client;
|
|
80
76
|
events;
|
|
77
|
+
instructions;
|
|
81
78
|
tools;
|
|
79
|
+
debug;
|
|
82
80
|
/**
|
|
83
81
|
* Creates a new instance of OpenAIRealtimeVoice.
|
|
84
82
|
*
|
|
@@ -87,13 +85,8 @@ var OpenAIRealtimeVoice = class extends voice.MastraVoice {
|
|
|
87
85
|
* @param options.chatModel.model - The model ID to use (defaults to GPT-4 Mini Realtime)
|
|
88
86
|
* @param options.chatModel.apiKey - OpenAI API key. Falls back to process.env.OPENAI_API_KEY
|
|
89
87
|
* @param options.chatModel.tools - Tools configuration for the model
|
|
90
|
-
* @param options.chatModel.options - Additional options for the realtime client
|
|
91
|
-
* @param options.chatModel.options.sessionConfig - Session configuration overrides
|
|
92
|
-
* @param options.chatModel.options.url - Custom WebSocket URL
|
|
93
|
-
* @param options.chatModel.options.dangerouslyAllowAPIKeyInBrowser - Whether to allow API key in browser
|
|
94
|
-
* @param options.chatModel.options.debug - Enable debug logging
|
|
95
|
-
* @param options.chatModel.options.tools - Additional tools configuration
|
|
96
88
|
* @param options.speaker - Voice ID to use (defaults to 'alloy')
|
|
89
|
+
* @param options.debug - Enable debug mode
|
|
97
90
|
*
|
|
98
91
|
* @example
|
|
99
92
|
* ```typescript
|
|
@@ -108,25 +101,26 @@ var OpenAIRealtimeVoice = class extends voice.MastraVoice {
|
|
|
108
101
|
*/
|
|
109
102
|
constructor({
|
|
110
103
|
chatModel,
|
|
111
|
-
speaker
|
|
104
|
+
speaker,
|
|
105
|
+
debug = false
|
|
112
106
|
} = {}) {
|
|
113
107
|
super();
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
turn_detection: DEFAULT_VAD_CONFIG,
|
|
121
|
-
...chatModel?.options?.sessionConfig
|
|
108
|
+
const url = `${chatModel?.url || DEFAULT_URL}?model=${chatModel?.model || DEFAULT_MODEL}`;
|
|
109
|
+
const apiKey = chatModel?.apiKey || process.env.OPENAI_API_KEY;
|
|
110
|
+
this.ws = new ws.WebSocket(url, void 0, {
|
|
111
|
+
headers: {
|
|
112
|
+
Authorization: "Bearer " + apiKey,
|
|
113
|
+
"OpenAI-Beta": "realtime=v1"
|
|
122
114
|
}
|
|
123
115
|
});
|
|
116
|
+
this.client = new events.EventEmitter();
|
|
124
117
|
this.state = "close";
|
|
125
118
|
this.events = {};
|
|
119
|
+
this.tools = chatModel?.tools;
|
|
120
|
+
this.instructions = chatModel?.instructions;
|
|
121
|
+
this.speaker = speaker || DEFAULT_VOICE;
|
|
122
|
+
this.debug = debug;
|
|
126
123
|
this.setupEventListeners();
|
|
127
|
-
if (chatModel?.tools) {
|
|
128
|
-
this.addTools(chatModel.tools);
|
|
129
|
-
}
|
|
130
124
|
}
|
|
131
125
|
/**
|
|
132
126
|
* Returns a list of available voice speakers.
|
|
@@ -152,8 +146,8 @@ var OpenAIRealtimeVoice = class extends voice.MastraVoice {
|
|
|
152
146
|
* ```
|
|
153
147
|
*/
|
|
154
148
|
close() {
|
|
155
|
-
if (!this.
|
|
156
|
-
this.
|
|
149
|
+
if (!this.ws) return;
|
|
150
|
+
this.ws.close();
|
|
157
151
|
this.state = "close";
|
|
158
152
|
}
|
|
159
153
|
/**
|
|
@@ -173,10 +167,10 @@ var OpenAIRealtimeVoice = class extends voice.MastraVoice {
|
|
|
173
167
|
* ```
|
|
174
168
|
*/
|
|
175
169
|
addTools(tools) {
|
|
176
|
-
const
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
}
|
|
170
|
+
const openaiTools = transformTools(tools);
|
|
171
|
+
this.updateConfig({
|
|
172
|
+
tools: openaiTools.map((t) => t.openaiTool)
|
|
173
|
+
});
|
|
180
174
|
}
|
|
181
175
|
/**
|
|
182
176
|
* Emits a speaking event using the configured voice model.
|
|
@@ -212,7 +206,7 @@ var OpenAIRealtimeVoice = class extends voice.MastraVoice {
|
|
|
212
206
|
if (input.trim().length === 0) {
|
|
213
207
|
throw new Error("Input text is empty");
|
|
214
208
|
}
|
|
215
|
-
this.
|
|
209
|
+
this.sendEvent("response.create", {
|
|
216
210
|
response: {
|
|
217
211
|
instructions: `Repeat the following text: ${input}`,
|
|
218
212
|
voice: options?.speaker ? options.speaker : void 0
|
|
@@ -238,7 +232,7 @@ var OpenAIRealtimeVoice = class extends voice.MastraVoice {
|
|
|
238
232
|
* ```
|
|
239
233
|
*/
|
|
240
234
|
updateConfig(sessionConfig) {
|
|
241
|
-
this.
|
|
235
|
+
this.sendEvent("session.update", { session: sessionConfig });
|
|
242
236
|
}
|
|
243
237
|
/**
|
|
244
238
|
* Processes audio input for speech recognition.
|
|
@@ -273,14 +267,14 @@ var OpenAIRealtimeVoice = class extends voice.MastraVoice {
|
|
|
273
267
|
const buffer = Buffer.concat(chunks);
|
|
274
268
|
const int16Array = new Int16Array(buffer.buffer, buffer.byteOffset ?? 0, (buffer.byteLength ?? 0) / 2);
|
|
275
269
|
const base64Audio = this.int16ArrayToBase64(int16Array);
|
|
276
|
-
this.
|
|
270
|
+
this.sendEvent("conversation.item.create", {
|
|
277
271
|
item: {
|
|
278
272
|
type: "message",
|
|
279
273
|
role: "user",
|
|
280
274
|
content: [{ type: "input_audio", audio: base64Audio }]
|
|
281
275
|
}
|
|
282
276
|
});
|
|
283
|
-
this.
|
|
277
|
+
this.sendEvent("response.create", {
|
|
284
278
|
response: {
|
|
285
279
|
modalities: ["text"],
|
|
286
280
|
instructions: `ONLY repeat the input and DO NOT say anything else`
|
|
@@ -290,6 +284,16 @@ var OpenAIRealtimeVoice = class extends voice.MastraVoice {
|
|
|
290
284
|
this.emit("error", new Error("Unsupported audio data format"));
|
|
291
285
|
}
|
|
292
286
|
}
|
|
287
|
+
waitForOpen() {
|
|
288
|
+
return new Promise((resolve) => {
|
|
289
|
+
this.ws.on("open", resolve);
|
|
290
|
+
});
|
|
291
|
+
}
|
|
292
|
+
waitForSessionCreated() {
|
|
293
|
+
return new Promise((resolve) => {
|
|
294
|
+
this.client.on("session.created", resolve);
|
|
295
|
+
});
|
|
296
|
+
}
|
|
293
297
|
/**
|
|
294
298
|
* Establishes a connection to the OpenAI realtime service.
|
|
295
299
|
* Must be called before using speak, listen, or relay functions.
|
|
@@ -303,8 +307,17 @@ var OpenAIRealtimeVoice = class extends voice.MastraVoice {
|
|
|
303
307
|
* ```
|
|
304
308
|
*/
|
|
305
309
|
async connect() {
|
|
306
|
-
await this.
|
|
307
|
-
await this.
|
|
310
|
+
await this.waitForOpen();
|
|
311
|
+
await this.waitForSessionCreated();
|
|
312
|
+
const openaiTools = transformTools(this.tools);
|
|
313
|
+
this.updateConfig({
|
|
314
|
+
instructions: this.instructions,
|
|
315
|
+
tools: openaiTools.map((t) => t.openaiTool),
|
|
316
|
+
input_audio_transcription: {
|
|
317
|
+
model: "whisper-1"
|
|
318
|
+
},
|
|
319
|
+
voice: this.speaker
|
|
320
|
+
});
|
|
308
321
|
this.state = "open";
|
|
309
322
|
}
|
|
310
323
|
/**
|
|
@@ -325,7 +338,7 @@ var OpenAIRealtimeVoice = class extends voice.MastraVoice {
|
|
|
325
338
|
* await voice.relay(micStream);
|
|
326
339
|
* ```
|
|
327
340
|
*/
|
|
328
|
-
async send(audioData) {
|
|
341
|
+
async send(audioData, eventId) {
|
|
329
342
|
if (!this.state || this.state !== "open") {
|
|
330
343
|
console.warn("Cannot relay audio when not open. Call open() first.");
|
|
331
344
|
return;
|
|
@@ -335,15 +348,14 @@ var OpenAIRealtimeVoice = class extends voice.MastraVoice {
|
|
|
335
348
|
stream.on("data", (chunk) => {
|
|
336
349
|
try {
|
|
337
350
|
const buffer = Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk);
|
|
338
|
-
|
|
339
|
-
this.client.appendInputAudio(int16Array);
|
|
351
|
+
this.sendEvent("input_audio_buffer.append", { audio: buffer.toString("base64"), event_id: eventId });
|
|
340
352
|
} catch (err) {
|
|
341
353
|
this.emit("error", err);
|
|
342
354
|
}
|
|
343
355
|
});
|
|
344
356
|
} else if (audioData instanceof Int16Array) {
|
|
345
357
|
try {
|
|
346
|
-
this.
|
|
358
|
+
this.sendEvent("input_audio_buffer.append", { audio: audioData, event_id: eventId });
|
|
347
359
|
} catch (err) {
|
|
348
360
|
this.emit("error", err);
|
|
349
361
|
}
|
|
@@ -370,7 +382,7 @@ var OpenAIRealtimeVoice = class extends voice.MastraVoice {
|
|
|
370
382
|
* });
|
|
371
383
|
*/
|
|
372
384
|
async answer({ options }) {
|
|
373
|
-
this.
|
|
385
|
+
this.sendEvent("response.create", { response: options ?? {} });
|
|
374
386
|
}
|
|
375
387
|
/**
|
|
376
388
|
* Registers an event listener for voice events.
|
|
@@ -439,29 +451,105 @@ var OpenAIRealtimeVoice = class extends voice.MastraVoice {
|
|
|
439
451
|
}
|
|
440
452
|
}
|
|
441
453
|
setupEventListeners() {
|
|
442
|
-
|
|
443
|
-
|
|
454
|
+
const speakerStreams = /* @__PURE__ */ new Map();
|
|
455
|
+
this.ws.on("message", (message) => {
|
|
456
|
+
const data = JSON.parse(message.toString());
|
|
457
|
+
this.client.emit(data.type, data);
|
|
458
|
+
if (this.debug) {
|
|
459
|
+
const { delta, ...fields } = data;
|
|
460
|
+
console.log(data.type, fields, delta?.length < 100 ? delta : "");
|
|
461
|
+
}
|
|
444
462
|
});
|
|
445
|
-
this.client.on("
|
|
446
|
-
this.emit("
|
|
463
|
+
this.client.on("session.created", (ev) => {
|
|
464
|
+
this.emit("session.created", ev);
|
|
447
465
|
});
|
|
448
|
-
this.client.on("
|
|
449
|
-
this.emit("
|
|
466
|
+
this.client.on("session.updated", (ev) => {
|
|
467
|
+
this.emit("session.updated", ev);
|
|
450
468
|
});
|
|
451
|
-
this.client.on("
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
469
|
+
this.client.on("response.created", (ev) => {
|
|
470
|
+
this.emit("response.created", ev);
|
|
471
|
+
const speakerStream = new stream.PassThrough();
|
|
472
|
+
speakerStream.id = ev.response.id;
|
|
473
|
+
speakerStreams.set(ev.response.id, speakerStream);
|
|
474
|
+
this.emit("speaker", speakerStream);
|
|
455
475
|
});
|
|
456
|
-
this.client.on("conversation.item.
|
|
457
|
-
this.emit("
|
|
476
|
+
this.client.on("conversation.item.input_audio_transcription.delta", (ev) => {
|
|
477
|
+
this.emit("transcribing", { text: ev.delta, response_id: ev.response_id, role: "user" });
|
|
458
478
|
});
|
|
459
|
-
this.client.on("conversation.item.
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
479
|
+
this.client.on("conversation.item.input_audio_transcription.done", (ev) => {
|
|
480
|
+
this.emit("transcribing", { text: "\n", response_id: ev.response_id, role: "user" });
|
|
481
|
+
});
|
|
482
|
+
this.client.on("response.audio.delta", (ev) => {
|
|
483
|
+
const audio = Buffer.from(ev.delta, "base64");
|
|
484
|
+
this.emit("speaking", { audio, response_id: ev.response_id });
|
|
485
|
+
const stream = speakerStreams.get(ev.response_id);
|
|
486
|
+
stream?.write(audio);
|
|
487
|
+
});
|
|
488
|
+
this.client.on("response.audio.done", (ev) => {
|
|
489
|
+
this.emit("speaking.done", { response_id: ev.response_id });
|
|
490
|
+
const stream = speakerStreams.get(ev.response_id);
|
|
491
|
+
stream?.end();
|
|
492
|
+
});
|
|
493
|
+
this.client.on("response.audio_transcript.delta", (ev) => {
|
|
494
|
+
this.emit("writing", { text: ev.delta, response_id: ev.response_id });
|
|
464
495
|
});
|
|
496
|
+
this.client.on("response.audio_transcript.done", (ev) => {
|
|
497
|
+
this.emit("writing", { text: "\n", response_id: ev.response_id });
|
|
498
|
+
});
|
|
499
|
+
this.client.on("response.text.delta", (ev) => {
|
|
500
|
+
this.emit("writing", { text: ev.delta, response_id: ev.response_id });
|
|
501
|
+
});
|
|
502
|
+
this.client.on("response.text.done", (ev) => {
|
|
503
|
+
this.emit("writing", { text: "\n", response_id: ev.response_id });
|
|
504
|
+
});
|
|
505
|
+
this.client.on("response.done", (ev) => {
|
|
506
|
+
this.handleFunctionCalls(ev);
|
|
507
|
+
this.emit("response.done", ev);
|
|
508
|
+
speakerStreams.delete(ev.response.id);
|
|
509
|
+
});
|
|
510
|
+
}
|
|
511
|
+
async handleFunctionCalls(ev) {
|
|
512
|
+
for (const output of ev.response?.output ?? []) {
|
|
513
|
+
if (output.type === "function_call") {
|
|
514
|
+
await this.handleFunctionCall(output);
|
|
515
|
+
}
|
|
516
|
+
}
|
|
517
|
+
}
|
|
518
|
+
async handleFunctionCall(output) {
|
|
519
|
+
try {
|
|
520
|
+
const context = JSON.parse(output.arguments);
|
|
521
|
+
const tool = this.tools?.[output.name];
|
|
522
|
+
if (!tool) {
|
|
523
|
+
console.warn(`Tool "${output.name}" not found`);
|
|
524
|
+
return;
|
|
525
|
+
}
|
|
526
|
+
const result = await tool?.execute?.(
|
|
527
|
+
{ context },
|
|
528
|
+
{
|
|
529
|
+
toolCallId: "unknown",
|
|
530
|
+
messages: []
|
|
531
|
+
}
|
|
532
|
+
);
|
|
533
|
+
this.sendEvent("conversation.item.create", {
|
|
534
|
+
item: {
|
|
535
|
+
type: "function_call_output",
|
|
536
|
+
call_id: output.call_id,
|
|
537
|
+
output: JSON.stringify(result)
|
|
538
|
+
}
|
|
539
|
+
});
|
|
540
|
+
} catch (e) {
|
|
541
|
+
const err = e;
|
|
542
|
+
console.warn(`Error calling tool "${output.name}":`, err.message);
|
|
543
|
+
this.sendEvent("conversation.item.create", {
|
|
544
|
+
item: {
|
|
545
|
+
type: "function_call_output",
|
|
546
|
+
call_id: output.call_id,
|
|
547
|
+
output: JSON.stringify({ error: err.message })
|
|
548
|
+
}
|
|
549
|
+
});
|
|
550
|
+
} finally {
|
|
551
|
+
this.sendEvent("response.create", {});
|
|
552
|
+
}
|
|
465
553
|
}
|
|
466
554
|
int16ArrayToBase64(int16Array) {
|
|
467
555
|
const buffer = new ArrayBuffer(int16Array.length * 2);
|
|
@@ -476,6 +564,14 @@ var OpenAIRealtimeVoice = class extends voice.MastraVoice {
|
|
|
476
564
|
}
|
|
477
565
|
return btoa(binary);
|
|
478
566
|
}
|
|
567
|
+
sendEvent(type, data) {
|
|
568
|
+
this.ws.send(
|
|
569
|
+
JSON.stringify({
|
|
570
|
+
type,
|
|
571
|
+
...data
|
|
572
|
+
})
|
|
573
|
+
);
|
|
574
|
+
}
|
|
479
575
|
};
|
|
480
576
|
|
|
481
577
|
exports.OpenAIRealtimeVoice = OpenAIRealtimeVoice;
|