@mastra/voice-openai-realtime 0.1.0-alpha.1 → 0.1.0-alpha.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +7 -7
- package/CHANGELOG.md +19 -0
- package/dist/_tsup-dts-rollup.d.cts +23 -12
- package/dist/_tsup-dts-rollup.d.ts +23 -12
- package/dist/index.cjs +56 -37
- package/dist/index.js +56 -37
- package/package.json +2 -2
- package/src/index.ts +81 -61
package/.turbo/turbo-build.log
CHANGED
|
@@ -1,23 +1,23 @@
|
|
|
1
1
|
|
|
2
|
-
> @mastra/voice-openai-realtime@0.1.0-alpha.
|
|
2
|
+
> @mastra/voice-openai-realtime@0.1.0-alpha.3 build /home/runner/work/mastra/mastra/voice/openai-realtime-api
|
|
3
3
|
> tsup src/index.ts --format esm,cjs --experimental-dts --clean --treeshake
|
|
4
4
|
|
|
5
5
|
[34mCLI[39m Building entry: src/index.ts
|
|
6
6
|
[34mCLI[39m Using tsconfig: tsconfig.json
|
|
7
7
|
[34mCLI[39m tsup v8.4.0
|
|
8
8
|
[34mTSC[39m Build start
|
|
9
|
-
[32mTSC[39m ⚡️ Build success in
|
|
9
|
+
[32mTSC[39m ⚡️ Build success in 8539ms
|
|
10
10
|
[34mDTS[39m Build start
|
|
11
11
|
[34mCLI[39m Target: es2022
|
|
12
12
|
Analysis will use the bundled TypeScript version 5.8.2
|
|
13
13
|
[36mWriting package typings: /home/runner/work/mastra/mastra/voice/openai-realtime-api/dist/_tsup-dts-rollup.d.ts[39m
|
|
14
14
|
Analysis will use the bundled TypeScript version 5.8.2
|
|
15
15
|
[36mWriting package typings: /home/runner/work/mastra/mastra/voice/openai-realtime-api/dist/_tsup-dts-rollup.d.cts[39m
|
|
16
|
-
[32mDTS[39m ⚡️ Build success in
|
|
16
|
+
[32mDTS[39m ⚡️ Build success in 12102ms
|
|
17
17
|
[34mCLI[39m Cleaning output folder
|
|
18
18
|
[34mESM[39m Build start
|
|
19
19
|
[34mCJS[39m Build start
|
|
20
|
-
[
|
|
21
|
-
[
|
|
22
|
-
[
|
|
23
|
-
[
|
|
20
|
+
[32mESM[39m [1mdist/index.js [22m[32m18.38 KB[39m
|
|
21
|
+
[32mESM[39m ⚡️ Build success in 718ms
|
|
22
|
+
[32mCJS[39m [1mdist/index.cjs [22m[32m18.44 KB[39m
|
|
23
|
+
[32mCJS[39m ⚡️ Build success in 718ms
|
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,24 @@
|
|
|
1
1
|
# @mastra/voice-openai-realtime
|
|
2
2
|
|
|
3
|
+
## 0.1.0-alpha.3
|
|
4
|
+
|
|
5
|
+
### Patch Changes
|
|
6
|
+
|
|
7
|
+
- a4686e8: Realtime event queue
|
|
8
|
+
- Updated dependencies [b3b34f5]
|
|
9
|
+
- Updated dependencies [a4686e8]
|
|
10
|
+
- @mastra/core@0.7.0-alpha.3
|
|
11
|
+
|
|
12
|
+
## 0.1.0-alpha.2
|
|
13
|
+
|
|
14
|
+
### Patch Changes
|
|
15
|
+
|
|
16
|
+
- Updated dependencies [a838fde]
|
|
17
|
+
- Updated dependencies [a8bd4cf]
|
|
18
|
+
- Updated dependencies [7a3eeb0]
|
|
19
|
+
- Updated dependencies [6530ad1]
|
|
20
|
+
- @mastra/core@0.7.0-alpha.2
|
|
21
|
+
|
|
3
22
|
## 0.1.0-alpha.1
|
|
4
23
|
|
|
5
24
|
### Minor Changes
|
|
@@ -50,14 +50,15 @@ export declare class OpenAIRealtimeVoice extends MastraVoice {
|
|
|
50
50
|
private instructions?;
|
|
51
51
|
private tools?;
|
|
52
52
|
private debug;
|
|
53
|
+
private queue;
|
|
54
|
+
private transcriber;
|
|
53
55
|
/**
|
|
54
56
|
* Creates a new instance of OpenAIRealtimeVoice.
|
|
55
57
|
*
|
|
56
58
|
* @param options - Configuration options for the voice instance
|
|
57
|
-
* @param options.
|
|
58
|
-
* @param options.
|
|
59
|
-
* @param options.
|
|
60
|
-
* @param options.chatModel.tools - Tools configuration for the model
|
|
59
|
+
* @param options.url - The base URL for the OpenAI Realtime API
|
|
60
|
+
* @param options.model - The model ID to use (defaults to GPT-4 Mini Realtime)
|
|
61
|
+
* @param options.apiKey - OpenAI API key. Falls back to process.env.OPENAI_API_KEY
|
|
61
62
|
* @param options.speaker - Voice ID to use (defaults to 'alloy')
|
|
62
63
|
* @param options.debug - Enable debug mode
|
|
63
64
|
*
|
|
@@ -72,15 +73,12 @@ export declare class OpenAIRealtimeVoice extends MastraVoice {
|
|
|
72
73
|
* });
|
|
73
74
|
* ```
|
|
74
75
|
*/
|
|
75
|
-
constructor(
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
tools?: TTools;
|
|
80
|
-
instructions?: string;
|
|
81
|
-
url?: string;
|
|
82
|
-
};
|
|
76
|
+
constructor(options?: {
|
|
77
|
+
model?: string;
|
|
78
|
+
url?: string;
|
|
79
|
+
apiKey?: string;
|
|
83
80
|
speaker?: Realtime.Voice;
|
|
81
|
+
transcriber?: Realtime.AudioTranscriptionModel;
|
|
84
82
|
debug?: boolean;
|
|
85
83
|
});
|
|
86
84
|
/**
|
|
@@ -108,6 +106,19 @@ export declare class OpenAIRealtimeVoice extends MastraVoice {
|
|
|
108
106
|
* ```
|
|
109
107
|
*/
|
|
110
108
|
close(): void;
|
|
109
|
+
/**
|
|
110
|
+
* Equips the voice instance with a set of instructions.
|
|
111
|
+
* Instructions allow the model to perform additional actions during conversations.
|
|
112
|
+
*
|
|
113
|
+
* @param instructions - Optional instructions to addInstructions
|
|
114
|
+
* @returns Transformed instructions ready for use with the model
|
|
115
|
+
*
|
|
116
|
+
* @example
|
|
117
|
+
* ```typescript
|
|
118
|
+
* voice.addInstuctions('You are a helpful assistant.');
|
|
119
|
+
* ```
|
|
120
|
+
*/
|
|
121
|
+
addInstructions(instructions?: string): void;
|
|
111
122
|
/**
|
|
112
123
|
* Equips the voice instance with a set of tools.
|
|
113
124
|
* Tools allow the model to perform additional actions during conversations.
|
|
@@ -50,14 +50,15 @@ export declare class OpenAIRealtimeVoice extends MastraVoice {
|
|
|
50
50
|
private instructions?;
|
|
51
51
|
private tools?;
|
|
52
52
|
private debug;
|
|
53
|
+
private queue;
|
|
54
|
+
private transcriber;
|
|
53
55
|
/**
|
|
54
56
|
* Creates a new instance of OpenAIRealtimeVoice.
|
|
55
57
|
*
|
|
56
58
|
* @param options - Configuration options for the voice instance
|
|
57
|
-
* @param options.
|
|
58
|
-
* @param options.
|
|
59
|
-
* @param options.
|
|
60
|
-
* @param options.chatModel.tools - Tools configuration for the model
|
|
59
|
+
* @param options.url - The base URL for the OpenAI Realtime API
|
|
60
|
+
* @param options.model - The model ID to use (defaults to GPT-4 Mini Realtime)
|
|
61
|
+
* @param options.apiKey - OpenAI API key. Falls back to process.env.OPENAI_API_KEY
|
|
61
62
|
* @param options.speaker - Voice ID to use (defaults to 'alloy')
|
|
62
63
|
* @param options.debug - Enable debug mode
|
|
63
64
|
*
|
|
@@ -72,15 +73,12 @@ export declare class OpenAIRealtimeVoice extends MastraVoice {
|
|
|
72
73
|
* });
|
|
73
74
|
* ```
|
|
74
75
|
*/
|
|
75
|
-
constructor(
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
tools?: TTools;
|
|
80
|
-
instructions?: string;
|
|
81
|
-
url?: string;
|
|
82
|
-
};
|
|
76
|
+
constructor(options?: {
|
|
77
|
+
model?: string;
|
|
78
|
+
url?: string;
|
|
79
|
+
apiKey?: string;
|
|
83
80
|
speaker?: Realtime.Voice;
|
|
81
|
+
transcriber?: Realtime.AudioTranscriptionModel;
|
|
84
82
|
debug?: boolean;
|
|
85
83
|
});
|
|
86
84
|
/**
|
|
@@ -108,6 +106,19 @@ export declare class OpenAIRealtimeVoice extends MastraVoice {
|
|
|
108
106
|
* ```
|
|
109
107
|
*/
|
|
110
108
|
close(): void;
|
|
109
|
+
/**
|
|
110
|
+
* Equips the voice instance with a set of instructions.
|
|
111
|
+
* Instructions allow the model to perform additional actions during conversations.
|
|
112
|
+
*
|
|
113
|
+
* @param instructions - Optional instructions to addInstructions
|
|
114
|
+
* @returns Transformed instructions ready for use with the model
|
|
115
|
+
*
|
|
116
|
+
* @example
|
|
117
|
+
* ```typescript
|
|
118
|
+
* voice.addInstuctions('You are a helpful assistant.');
|
|
119
|
+
* ```
|
|
120
|
+
*/
|
|
121
|
+
addInstructions(instructions?: string): void;
|
|
111
122
|
/**
|
|
112
123
|
* Equips the voice instance with a set of tools.
|
|
113
124
|
* Tools allow the model to perform additional actions during conversations.
|
package/dist/index.cjs
CHANGED
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
'use strict';
|
|
2
2
|
|
|
3
|
-
var
|
|
3
|
+
var events = require('events');
|
|
4
4
|
var stream = require('stream');
|
|
5
|
-
var
|
|
5
|
+
var voice = require('@mastra/core/voice');
|
|
6
6
|
var ws = require('ws');
|
|
7
|
-
var
|
|
7
|
+
var zodToJsonSchema = require('zod-to-json-schema');
|
|
8
8
|
|
|
9
9
|
// src/index.ts
|
|
10
10
|
var transformTools = (tools) => {
|
|
@@ -65,7 +65,10 @@ var transformTools = (tools) => {
|
|
|
65
65
|
var isReadableStream = (obj) => {
|
|
66
66
|
return obj && obj instanceof stream.Readable && typeof obj.read === "function" && typeof obj.pipe === "function" && obj.readable === true;
|
|
67
67
|
};
|
|
68
|
+
|
|
69
|
+
// src/index.ts
|
|
68
70
|
var DEFAULT_VOICE = "alloy";
|
|
71
|
+
var DEFAULT_TRANSCRIBER = "whisper-1";
|
|
69
72
|
var DEFAULT_URL = "wss://api.openai.com/v1/realtime";
|
|
70
73
|
var DEFAULT_MODEL = "gpt-4o-mini-realtime-preview-2024-12-17";
|
|
71
74
|
var VOICES = ["alloy", "ash", "ballad", "coral", "echo", "sage", "shimmer", "verse"];
|
|
@@ -77,14 +80,15 @@ var OpenAIRealtimeVoice = class extends voice.MastraVoice {
|
|
|
77
80
|
instructions;
|
|
78
81
|
tools;
|
|
79
82
|
debug;
|
|
83
|
+
queue = [];
|
|
84
|
+
transcriber;
|
|
80
85
|
/**
|
|
81
86
|
* Creates a new instance of OpenAIRealtimeVoice.
|
|
82
87
|
*
|
|
83
88
|
* @param options - Configuration options for the voice instance
|
|
84
|
-
* @param options.
|
|
85
|
-
* @param options.
|
|
86
|
-
* @param options.
|
|
87
|
-
* @param options.chatModel.tools - Tools configuration for the model
|
|
89
|
+
* @param options.url - The base URL for the OpenAI Realtime API
|
|
90
|
+
* @param options.model - The model ID to use (defaults to GPT-4 Mini Realtime)
|
|
91
|
+
* @param options.apiKey - OpenAI API key. Falls back to process.env.OPENAI_API_KEY
|
|
88
92
|
* @param options.speaker - Voice ID to use (defaults to 'alloy')
|
|
89
93
|
* @param options.debug - Enable debug mode
|
|
90
94
|
*
|
|
@@ -99,14 +103,10 @@ var OpenAIRealtimeVoice = class extends voice.MastraVoice {
|
|
|
99
103
|
* });
|
|
100
104
|
* ```
|
|
101
105
|
*/
|
|
102
|
-
constructor({
|
|
103
|
-
chatModel,
|
|
104
|
-
speaker,
|
|
105
|
-
debug = false
|
|
106
|
-
} = {}) {
|
|
106
|
+
constructor(options = {}) {
|
|
107
107
|
super();
|
|
108
|
-
const url = `${
|
|
109
|
-
const apiKey =
|
|
108
|
+
const url = `${options.url || DEFAULT_URL}?model=${options.model || DEFAULT_MODEL}`;
|
|
109
|
+
const apiKey = options.apiKey || process.env.OPENAI_API_KEY;
|
|
110
110
|
this.ws = new ws.WebSocket(url, void 0, {
|
|
111
111
|
headers: {
|
|
112
112
|
Authorization: "Bearer " + apiKey,
|
|
@@ -116,10 +116,9 @@ var OpenAIRealtimeVoice = class extends voice.MastraVoice {
|
|
|
116
116
|
this.client = new events.EventEmitter();
|
|
117
117
|
this.state = "close";
|
|
118
118
|
this.events = {};
|
|
119
|
-
this.
|
|
120
|
-
this.
|
|
121
|
-
this.
|
|
122
|
-
this.debug = debug;
|
|
119
|
+
this.speaker = options.speaker || DEFAULT_VOICE;
|
|
120
|
+
this.transcriber = options.transcriber || DEFAULT_TRANSCRIBER;
|
|
121
|
+
this.debug = options.debug || false;
|
|
123
122
|
this.setupEventListeners();
|
|
124
123
|
}
|
|
125
124
|
/**
|
|
@@ -150,6 +149,21 @@ var OpenAIRealtimeVoice = class extends voice.MastraVoice {
|
|
|
150
149
|
this.ws.close();
|
|
151
150
|
this.state = "close";
|
|
152
151
|
}
|
|
152
|
+
/**
|
|
153
|
+
* Equips the voice instance with a set of instructions.
|
|
154
|
+
* Instructions allow the model to perform additional actions during conversations.
|
|
155
|
+
*
|
|
156
|
+
* @param instructions - Optional instructions to addInstructions
|
|
157
|
+
* @returns Transformed instructions ready for use with the model
|
|
158
|
+
*
|
|
159
|
+
* @example
|
|
160
|
+
* ```typescript
|
|
161
|
+
* voice.addInstuctions('You are a helpful assistant.');
|
|
162
|
+
* ```
|
|
163
|
+
*/
|
|
164
|
+
addInstructions(instructions) {
|
|
165
|
+
this.instructions = instructions;
|
|
166
|
+
}
|
|
153
167
|
/**
|
|
154
168
|
* Equips the voice instance with a set of tools.
|
|
155
169
|
* Tools allow the model to perform additional actions during conversations.
|
|
@@ -167,10 +181,7 @@ var OpenAIRealtimeVoice = class extends voice.MastraVoice {
|
|
|
167
181
|
* ```
|
|
168
182
|
*/
|
|
169
183
|
addTools(tools) {
|
|
170
|
-
|
|
171
|
-
this.updateConfig({
|
|
172
|
-
tools: openaiTools.map((t) => t.openaiTool)
|
|
173
|
-
});
|
|
184
|
+
this.tools = tools || {};
|
|
174
185
|
}
|
|
175
186
|
/**
|
|
176
187
|
* Emits a speaking event using the configured voice model.
|
|
@@ -314,7 +325,7 @@ var OpenAIRealtimeVoice = class extends voice.MastraVoice {
|
|
|
314
325
|
instructions: this.instructions,
|
|
315
326
|
tools: openaiTools.map((t) => t.openaiTool),
|
|
316
327
|
input_audio_transcription: {
|
|
317
|
-
model:
|
|
328
|
+
model: this.transcriber
|
|
318
329
|
},
|
|
319
330
|
voice: this.speaker
|
|
320
331
|
});
|
|
@@ -462,6 +473,10 @@ var OpenAIRealtimeVoice = class extends voice.MastraVoice {
|
|
|
462
473
|
});
|
|
463
474
|
this.client.on("session.created", (ev) => {
|
|
464
475
|
this.emit("session.created", ev);
|
|
476
|
+
const queue = this.queue.splice(0, this.queue.length);
|
|
477
|
+
for (const ev2 of queue) {
|
|
478
|
+
this.ws.send(JSON.stringify(ev2));
|
|
479
|
+
}
|
|
465
480
|
});
|
|
466
481
|
this.client.on("session.updated", (ev) => {
|
|
467
482
|
this.emit("session.updated", ev);
|
|
@@ -474,10 +489,10 @@ var OpenAIRealtimeVoice = class extends voice.MastraVoice {
|
|
|
474
489
|
this.emit("speaker", speakerStream);
|
|
475
490
|
});
|
|
476
491
|
this.client.on("conversation.item.input_audio_transcription.delta", (ev) => {
|
|
477
|
-
this.emit("
|
|
492
|
+
this.emit("writing", { text: ev.delta, response_id: ev.response_id, role: "user" });
|
|
478
493
|
});
|
|
479
494
|
this.client.on("conversation.item.input_audio_transcription.done", (ev) => {
|
|
480
|
-
this.emit("
|
|
495
|
+
this.emit("writing", { text: "\n", response_id: ev.response_id, role: "user" });
|
|
481
496
|
});
|
|
482
497
|
this.client.on("response.audio.delta", (ev) => {
|
|
483
498
|
const audio = Buffer.from(ev.delta, "base64");
|
|
@@ -491,19 +506,19 @@ var OpenAIRealtimeVoice = class extends voice.MastraVoice {
|
|
|
491
506
|
stream?.end();
|
|
492
507
|
});
|
|
493
508
|
this.client.on("response.audio_transcript.delta", (ev) => {
|
|
494
|
-
this.emit("writing", { text: ev.delta, response_id: ev.response_id });
|
|
509
|
+
this.emit("writing", { text: ev.delta, response_id: ev.response_id, role: "assistant" });
|
|
495
510
|
});
|
|
496
511
|
this.client.on("response.audio_transcript.done", (ev) => {
|
|
497
|
-
this.emit("writing", { text: "\n", response_id: ev.response_id });
|
|
512
|
+
this.emit("writing", { text: "\n", response_id: ev.response_id, role: "assistant" });
|
|
498
513
|
});
|
|
499
514
|
this.client.on("response.text.delta", (ev) => {
|
|
500
|
-
this.emit("writing", { text: ev.delta, response_id: ev.response_id });
|
|
515
|
+
this.emit("writing", { text: ev.delta, response_id: ev.response_id, role: "assistant" });
|
|
501
516
|
});
|
|
502
517
|
this.client.on("response.text.done", (ev) => {
|
|
503
|
-
this.emit("writing", { text: "\n", response_id: ev.response_id });
|
|
518
|
+
this.emit("writing", { text: "\n", response_id: ev.response_id, role: "assistant" });
|
|
504
519
|
});
|
|
505
|
-
this.client.on("response.done", (ev) => {
|
|
506
|
-
this.handleFunctionCalls(ev);
|
|
520
|
+
this.client.on("response.done", async (ev) => {
|
|
521
|
+
await this.handleFunctionCalls(ev);
|
|
507
522
|
this.emit("response.done", ev);
|
|
508
523
|
speakerStreams.delete(ev.response.id);
|
|
509
524
|
});
|
|
@@ -565,12 +580,16 @@ var OpenAIRealtimeVoice = class extends voice.MastraVoice {
|
|
|
565
580
|
return btoa(binary);
|
|
566
581
|
}
|
|
567
582
|
sendEvent(type, data) {
|
|
568
|
-
this.ws.
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
|
|
583
|
+
if (this.ws.readyState !== this.ws.OPEN) {
|
|
584
|
+
this.queue.push({ type, ...data });
|
|
585
|
+
} else {
|
|
586
|
+
this.ws.send(
|
|
587
|
+
JSON.stringify({
|
|
588
|
+
type,
|
|
589
|
+
...data
|
|
590
|
+
})
|
|
591
|
+
);
|
|
592
|
+
}
|
|
574
593
|
}
|
|
575
594
|
};
|
|
576
595
|
|
package/dist/index.js
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { EventEmitter } from 'events';
|
|
2
2
|
import { PassThrough, Readable } from 'stream';
|
|
3
|
-
import {
|
|
3
|
+
import { MastraVoice } from '@mastra/core/voice';
|
|
4
4
|
import { WebSocket } from 'ws';
|
|
5
|
-
import {
|
|
5
|
+
import { zodToJsonSchema } from 'zod-to-json-schema';
|
|
6
6
|
|
|
7
7
|
// src/index.ts
|
|
8
8
|
var transformTools = (tools) => {
|
|
@@ -63,7 +63,10 @@ var transformTools = (tools) => {
|
|
|
63
63
|
var isReadableStream = (obj) => {
|
|
64
64
|
return obj && obj instanceof Readable && typeof obj.read === "function" && typeof obj.pipe === "function" && obj.readable === true;
|
|
65
65
|
};
|
|
66
|
+
|
|
67
|
+
// src/index.ts
|
|
66
68
|
var DEFAULT_VOICE = "alloy";
|
|
69
|
+
var DEFAULT_TRANSCRIBER = "whisper-1";
|
|
67
70
|
var DEFAULT_URL = "wss://api.openai.com/v1/realtime";
|
|
68
71
|
var DEFAULT_MODEL = "gpt-4o-mini-realtime-preview-2024-12-17";
|
|
69
72
|
var VOICES = ["alloy", "ash", "ballad", "coral", "echo", "sage", "shimmer", "verse"];
|
|
@@ -75,14 +78,15 @@ var OpenAIRealtimeVoice = class extends MastraVoice {
|
|
|
75
78
|
instructions;
|
|
76
79
|
tools;
|
|
77
80
|
debug;
|
|
81
|
+
queue = [];
|
|
82
|
+
transcriber;
|
|
78
83
|
/**
|
|
79
84
|
* Creates a new instance of OpenAIRealtimeVoice.
|
|
80
85
|
*
|
|
81
86
|
* @param options - Configuration options for the voice instance
|
|
82
|
-
* @param options.
|
|
83
|
-
* @param options.
|
|
84
|
-
* @param options.
|
|
85
|
-
* @param options.chatModel.tools - Tools configuration for the model
|
|
87
|
+
* @param options.url - The base URL for the OpenAI Realtime API
|
|
88
|
+
* @param options.model - The model ID to use (defaults to GPT-4 Mini Realtime)
|
|
89
|
+
* @param options.apiKey - OpenAI API key. Falls back to process.env.OPENAI_API_KEY
|
|
86
90
|
* @param options.speaker - Voice ID to use (defaults to 'alloy')
|
|
87
91
|
* @param options.debug - Enable debug mode
|
|
88
92
|
*
|
|
@@ -97,14 +101,10 @@ var OpenAIRealtimeVoice = class extends MastraVoice {
|
|
|
97
101
|
* });
|
|
98
102
|
* ```
|
|
99
103
|
*/
|
|
100
|
-
constructor({
|
|
101
|
-
chatModel,
|
|
102
|
-
speaker,
|
|
103
|
-
debug = false
|
|
104
|
-
} = {}) {
|
|
104
|
+
constructor(options = {}) {
|
|
105
105
|
super();
|
|
106
|
-
const url = `${
|
|
107
|
-
const apiKey =
|
|
106
|
+
const url = `${options.url || DEFAULT_URL}?model=${options.model || DEFAULT_MODEL}`;
|
|
107
|
+
const apiKey = options.apiKey || process.env.OPENAI_API_KEY;
|
|
108
108
|
this.ws = new WebSocket(url, void 0, {
|
|
109
109
|
headers: {
|
|
110
110
|
Authorization: "Bearer " + apiKey,
|
|
@@ -114,10 +114,9 @@ var OpenAIRealtimeVoice = class extends MastraVoice {
|
|
|
114
114
|
this.client = new EventEmitter();
|
|
115
115
|
this.state = "close";
|
|
116
116
|
this.events = {};
|
|
117
|
-
this.
|
|
118
|
-
this.
|
|
119
|
-
this.
|
|
120
|
-
this.debug = debug;
|
|
117
|
+
this.speaker = options.speaker || DEFAULT_VOICE;
|
|
118
|
+
this.transcriber = options.transcriber || DEFAULT_TRANSCRIBER;
|
|
119
|
+
this.debug = options.debug || false;
|
|
121
120
|
this.setupEventListeners();
|
|
122
121
|
}
|
|
123
122
|
/**
|
|
@@ -148,6 +147,21 @@ var OpenAIRealtimeVoice = class extends MastraVoice {
|
|
|
148
147
|
this.ws.close();
|
|
149
148
|
this.state = "close";
|
|
150
149
|
}
|
|
150
|
+
/**
|
|
151
|
+
* Equips the voice instance with a set of instructions.
|
|
152
|
+
* Instructions allow the model to perform additional actions during conversations.
|
|
153
|
+
*
|
|
154
|
+
* @param instructions - Optional instructions to addInstructions
|
|
155
|
+
* @returns Transformed instructions ready for use with the model
|
|
156
|
+
*
|
|
157
|
+
* @example
|
|
158
|
+
* ```typescript
|
|
159
|
+
* voice.addInstuctions('You are a helpful assistant.');
|
|
160
|
+
* ```
|
|
161
|
+
*/
|
|
162
|
+
addInstructions(instructions) {
|
|
163
|
+
this.instructions = instructions;
|
|
164
|
+
}
|
|
151
165
|
/**
|
|
152
166
|
* Equips the voice instance with a set of tools.
|
|
153
167
|
* Tools allow the model to perform additional actions during conversations.
|
|
@@ -165,10 +179,7 @@ var OpenAIRealtimeVoice = class extends MastraVoice {
|
|
|
165
179
|
* ```
|
|
166
180
|
*/
|
|
167
181
|
addTools(tools) {
|
|
168
|
-
|
|
169
|
-
this.updateConfig({
|
|
170
|
-
tools: openaiTools.map((t) => t.openaiTool)
|
|
171
|
-
});
|
|
182
|
+
this.tools = tools || {};
|
|
172
183
|
}
|
|
173
184
|
/**
|
|
174
185
|
* Emits a speaking event using the configured voice model.
|
|
@@ -312,7 +323,7 @@ var OpenAIRealtimeVoice = class extends MastraVoice {
|
|
|
312
323
|
instructions: this.instructions,
|
|
313
324
|
tools: openaiTools.map((t) => t.openaiTool),
|
|
314
325
|
input_audio_transcription: {
|
|
315
|
-
model:
|
|
326
|
+
model: this.transcriber
|
|
316
327
|
},
|
|
317
328
|
voice: this.speaker
|
|
318
329
|
});
|
|
@@ -460,6 +471,10 @@ var OpenAIRealtimeVoice = class extends MastraVoice {
|
|
|
460
471
|
});
|
|
461
472
|
this.client.on("session.created", (ev) => {
|
|
462
473
|
this.emit("session.created", ev);
|
|
474
|
+
const queue = this.queue.splice(0, this.queue.length);
|
|
475
|
+
for (const ev2 of queue) {
|
|
476
|
+
this.ws.send(JSON.stringify(ev2));
|
|
477
|
+
}
|
|
463
478
|
});
|
|
464
479
|
this.client.on("session.updated", (ev) => {
|
|
465
480
|
this.emit("session.updated", ev);
|
|
@@ -472,10 +487,10 @@ var OpenAIRealtimeVoice = class extends MastraVoice {
|
|
|
472
487
|
this.emit("speaker", speakerStream);
|
|
473
488
|
});
|
|
474
489
|
this.client.on("conversation.item.input_audio_transcription.delta", (ev) => {
|
|
475
|
-
this.emit("
|
|
490
|
+
this.emit("writing", { text: ev.delta, response_id: ev.response_id, role: "user" });
|
|
476
491
|
});
|
|
477
492
|
this.client.on("conversation.item.input_audio_transcription.done", (ev) => {
|
|
478
|
-
this.emit("
|
|
493
|
+
this.emit("writing", { text: "\n", response_id: ev.response_id, role: "user" });
|
|
479
494
|
});
|
|
480
495
|
this.client.on("response.audio.delta", (ev) => {
|
|
481
496
|
const audio = Buffer.from(ev.delta, "base64");
|
|
@@ -489,19 +504,19 @@ var OpenAIRealtimeVoice = class extends MastraVoice {
|
|
|
489
504
|
stream?.end();
|
|
490
505
|
});
|
|
491
506
|
this.client.on("response.audio_transcript.delta", (ev) => {
|
|
492
|
-
this.emit("writing", { text: ev.delta, response_id: ev.response_id });
|
|
507
|
+
this.emit("writing", { text: ev.delta, response_id: ev.response_id, role: "assistant" });
|
|
493
508
|
});
|
|
494
509
|
this.client.on("response.audio_transcript.done", (ev) => {
|
|
495
|
-
this.emit("writing", { text: "\n", response_id: ev.response_id });
|
|
510
|
+
this.emit("writing", { text: "\n", response_id: ev.response_id, role: "assistant" });
|
|
496
511
|
});
|
|
497
512
|
this.client.on("response.text.delta", (ev) => {
|
|
498
|
-
this.emit("writing", { text: ev.delta, response_id: ev.response_id });
|
|
513
|
+
this.emit("writing", { text: ev.delta, response_id: ev.response_id, role: "assistant" });
|
|
499
514
|
});
|
|
500
515
|
this.client.on("response.text.done", (ev) => {
|
|
501
|
-
this.emit("writing", { text: "\n", response_id: ev.response_id });
|
|
516
|
+
this.emit("writing", { text: "\n", response_id: ev.response_id, role: "assistant" });
|
|
502
517
|
});
|
|
503
|
-
this.client.on("response.done", (ev) => {
|
|
504
|
-
this.handleFunctionCalls(ev);
|
|
518
|
+
this.client.on("response.done", async (ev) => {
|
|
519
|
+
await this.handleFunctionCalls(ev);
|
|
505
520
|
this.emit("response.done", ev);
|
|
506
521
|
speakerStreams.delete(ev.response.id);
|
|
507
522
|
});
|
|
@@ -563,12 +578,16 @@ var OpenAIRealtimeVoice = class extends MastraVoice {
|
|
|
563
578
|
return btoa(binary);
|
|
564
579
|
}
|
|
565
580
|
sendEvent(type, data) {
|
|
566
|
-
this.ws.
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
|
|
581
|
+
if (this.ws.readyState !== this.ws.OPEN) {
|
|
582
|
+
this.queue.push({ type, ...data });
|
|
583
|
+
} else {
|
|
584
|
+
this.ws.send(
|
|
585
|
+
JSON.stringify({
|
|
586
|
+
type,
|
|
587
|
+
...data
|
|
588
|
+
})
|
|
589
|
+
);
|
|
590
|
+
}
|
|
572
591
|
}
|
|
573
592
|
};
|
|
574
593
|
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@mastra/voice-openai-realtime",
|
|
3
|
-
"version": "0.1.0-alpha.
|
|
3
|
+
"version": "0.1.0-alpha.3",
|
|
4
4
|
"description": "Mastra OpenAI Realtime API integration",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "dist/index.js",
|
|
@@ -22,7 +22,7 @@
|
|
|
22
22
|
"openai-realtime-api": "^1.0.7",
|
|
23
23
|
"ws": "^8.18.1",
|
|
24
24
|
"zod-to-json-schema": "^3.24.1",
|
|
25
|
-
"@mastra/core": "^0.7.0-alpha.
|
|
25
|
+
"@mastra/core": "^0.7.0-alpha.3"
|
|
26
26
|
},
|
|
27
27
|
"devDependencies": {
|
|
28
28
|
"@microsoft/api-extractor": "^7.49.2",
|
package/src/index.ts
CHANGED
|
@@ -1,10 +1,10 @@
|
|
|
1
|
+
import { EventEmitter } from 'events';
|
|
2
|
+
import { PassThrough } from 'stream';
|
|
1
3
|
import type { ToolsInput } from '@mastra/core/agent';
|
|
2
4
|
import { MastraVoice } from '@mastra/core/voice';
|
|
3
|
-
import { isReadableStream, transformTools } from './utils';
|
|
4
|
-
import { WebSocket } from 'ws';
|
|
5
|
-
import { EventEmitter } from 'events';
|
|
6
5
|
import type { Realtime, RealtimeServerEvents } from 'openai-realtime-api';
|
|
7
|
-
import {
|
|
6
|
+
import { WebSocket } from 'ws';
|
|
7
|
+
import { isReadableStream, transformTools } from './utils';
|
|
8
8
|
|
|
9
9
|
/**
|
|
10
10
|
* Event callback function type
|
|
@@ -29,6 +29,8 @@ type EventMap = {
|
|
|
29
29
|
/** Default voice for text-to-speech. 'alloy' provides a neutral, balanced voice suitable for most use cases */
|
|
30
30
|
const DEFAULT_VOICE: Realtime.Voice = 'alloy';
|
|
31
31
|
|
|
32
|
+
const DEFAULT_TRANSCRIBER: Realtime.AudioTranscriptionModel = 'whisper-1';
|
|
33
|
+
|
|
32
34
|
const DEFAULT_URL = 'wss://api.openai.com/v1/realtime';
|
|
33
35
|
|
|
34
36
|
/**
|
|
@@ -36,21 +38,22 @@ const DEFAULT_URL = 'wss://api.openai.com/v1/realtime';
|
|
|
36
38
|
* This model is optimized for low-latency responses while maintaining high quality output.
|
|
37
39
|
*/
|
|
38
40
|
const DEFAULT_MODEL = 'gpt-4o-mini-realtime-preview-2024-12-17';
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
41
|
+
|
|
42
|
+
// /**
|
|
43
|
+
// * Default Voice Activity Detection (VAD) configuration.
|
|
44
|
+
// * These settings control how the system detects speech segments.
|
|
45
|
+
// *
|
|
46
|
+
// * @property {string} type - Uses server-side VAD for better accuracy
|
|
47
|
+
// * @property {number} threshold - Speech detection sensitivity (0.5 = balanced)
|
|
48
|
+
// * @property {number} prefix_padding_ms - Includes 1 second of audio before speech
|
|
49
|
+
// * @property {number} silence_duration_ms - Waits 1 second of silence before ending turn
|
|
50
|
+
// */
|
|
51
|
+
// const DEFAULT_VAD_CONFIG = {
|
|
52
|
+
// type: 'server_vad',
|
|
53
|
+
// threshold: 0.5,
|
|
54
|
+
// prefix_padding_ms: 1000,
|
|
55
|
+
// silence_duration_ms: 1000,
|
|
56
|
+
// } as Realtime.TurnDetection;
|
|
54
57
|
|
|
55
58
|
type TTools = ToolsInput;
|
|
56
59
|
|
|
@@ -110,15 +113,16 @@ export class OpenAIRealtimeVoice extends MastraVoice {
|
|
|
110
113
|
private instructions?: string;
|
|
111
114
|
private tools?: TTools;
|
|
112
115
|
private debug: boolean;
|
|
116
|
+
private queue: unknown[] = [];
|
|
117
|
+
private transcriber: Realtime.AudioTranscriptionModel;
|
|
113
118
|
|
|
114
119
|
/**
|
|
115
120
|
* Creates a new instance of OpenAIRealtimeVoice.
|
|
116
121
|
*
|
|
117
122
|
* @param options - Configuration options for the voice instance
|
|
118
|
-
* @param options.
|
|
119
|
-
* @param options.
|
|
120
|
-
* @param options.
|
|
121
|
-
* @param options.chatModel.tools - Tools configuration for the model
|
|
123
|
+
* @param options.url - The base URL for the OpenAI Realtime API
|
|
124
|
+
* @param options.model - The model ID to use (defaults to GPT-4 Mini Realtime)
|
|
125
|
+
* @param options.apiKey - OpenAI API key. Falls back to process.env.OPENAI_API_KEY
|
|
122
126
|
* @param options.speaker - Voice ID to use (defaults to 'alloy')
|
|
123
127
|
* @param options.debug - Enable debug mode
|
|
124
128
|
*
|
|
@@ -133,25 +137,20 @@ export class OpenAIRealtimeVoice extends MastraVoice {
|
|
|
133
137
|
* });
|
|
134
138
|
* ```
|
|
135
139
|
*/
|
|
136
|
-
constructor(
|
|
137
|
-
|
|
138
|
-
speaker,
|
|
139
|
-
debug = false,
|
|
140
|
-
}: {
|
|
141
|
-
chatModel?: {
|
|
140
|
+
constructor(
|
|
141
|
+
options: {
|
|
142
142
|
model?: string;
|
|
143
|
-
apiKey?: string;
|
|
144
|
-
tools?: TTools;
|
|
145
|
-
instructions?: string;
|
|
146
143
|
url?: string;
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
144
|
+
apiKey?: string;
|
|
145
|
+
speaker?: Realtime.Voice;
|
|
146
|
+
transcriber?: Realtime.AudioTranscriptionModel;
|
|
147
|
+
debug?: boolean;
|
|
148
|
+
} = {},
|
|
149
|
+
) {
|
|
151
150
|
super();
|
|
152
151
|
|
|
153
|
-
const url = `${
|
|
154
|
-
const apiKey =
|
|
152
|
+
const url = `${options.url || DEFAULT_URL}?model=${options.model || DEFAULT_MODEL}`;
|
|
153
|
+
const apiKey = options.apiKey || process.env.OPENAI_API_KEY;
|
|
155
154
|
this.ws = new WebSocket(url, undefined, {
|
|
156
155
|
headers: {
|
|
157
156
|
Authorization: 'Bearer ' + apiKey,
|
|
@@ -162,10 +161,9 @@ export class OpenAIRealtimeVoice extends MastraVoice {
|
|
|
162
161
|
this.client = new EventEmitter();
|
|
163
162
|
this.state = 'close';
|
|
164
163
|
this.events = {} as EventMap;
|
|
165
|
-
this.
|
|
166
|
-
this.
|
|
167
|
-
this.
|
|
168
|
-
this.debug = debug;
|
|
164
|
+
this.speaker = options.speaker || DEFAULT_VOICE;
|
|
165
|
+
this.transcriber = options.transcriber || DEFAULT_TRANSCRIBER;
|
|
166
|
+
this.debug = options.debug || false;
|
|
169
167
|
this.setupEventListeners();
|
|
170
168
|
}
|
|
171
169
|
|
|
@@ -199,6 +197,22 @@ export class OpenAIRealtimeVoice extends MastraVoice {
|
|
|
199
197
|
this.state = 'close';
|
|
200
198
|
}
|
|
201
199
|
|
|
200
|
+
/**
|
|
201
|
+
* Equips the voice instance with a set of instructions.
|
|
202
|
+
* Instructions allow the model to perform additional actions during conversations.
|
|
203
|
+
*
|
|
204
|
+
* @param instructions - Optional instructions to addInstructions
|
|
205
|
+
* @returns Transformed instructions ready for use with the model
|
|
206
|
+
*
|
|
207
|
+
* @example
|
|
208
|
+
* ```typescript
|
|
209
|
+
* voice.addInstuctions('You are a helpful assistant.');
|
|
210
|
+
* ```
|
|
211
|
+
*/
|
|
212
|
+
addInstructions(instructions?: string) {
|
|
213
|
+
this.instructions = instructions;
|
|
214
|
+
}
|
|
215
|
+
|
|
202
216
|
/**
|
|
203
217
|
* Equips the voice instance with a set of tools.
|
|
204
218
|
* Tools allow the model to perform additional actions during conversations.
|
|
@@ -216,10 +230,7 @@ export class OpenAIRealtimeVoice extends MastraVoice {
|
|
|
216
230
|
* ```
|
|
217
231
|
*/
|
|
218
232
|
addTools(tools?: TTools) {
|
|
219
|
-
|
|
220
|
-
this.updateConfig({
|
|
221
|
-
tools: openaiTools.map(t => t.openaiTool),
|
|
222
|
-
});
|
|
233
|
+
this.tools = tools || {};
|
|
223
234
|
}
|
|
224
235
|
|
|
225
236
|
/**
|
|
@@ -375,7 +386,7 @@ export class OpenAIRealtimeVoice extends MastraVoice {
|
|
|
375
386
|
instructions: this.instructions,
|
|
376
387
|
tools: openaiTools.map(t => t.openaiTool),
|
|
377
388
|
input_audio_transcription: {
|
|
378
|
-
model:
|
|
389
|
+
model: this.transcriber,
|
|
379
390
|
},
|
|
380
391
|
voice: this.speaker,
|
|
381
392
|
});
|
|
@@ -535,6 +546,11 @@ export class OpenAIRealtimeVoice extends MastraVoice {
|
|
|
535
546
|
|
|
536
547
|
this.client.on('session.created', ev => {
|
|
537
548
|
this.emit('session.created', ev);
|
|
549
|
+
|
|
550
|
+
const queue = this.queue.splice(0, this.queue.length);
|
|
551
|
+
for (const ev of queue) {
|
|
552
|
+
this.ws.send(JSON.stringify(ev));
|
|
553
|
+
}
|
|
538
554
|
});
|
|
539
555
|
this.client.on('session.updated', ev => {
|
|
540
556
|
this.emit('session.updated', ev);
|
|
@@ -550,10 +566,10 @@ export class OpenAIRealtimeVoice extends MastraVoice {
|
|
|
550
566
|
this.emit('speaker', speakerStream);
|
|
551
567
|
});
|
|
552
568
|
this.client.on('conversation.item.input_audio_transcription.delta', ev => {
|
|
553
|
-
this.emit('
|
|
569
|
+
this.emit('writing', { text: ev.delta, response_id: ev.response_id, role: 'user' });
|
|
554
570
|
});
|
|
555
571
|
this.client.on('conversation.item.input_audio_transcription.done', ev => {
|
|
556
|
-
this.emit('
|
|
572
|
+
this.emit('writing', { text: '\n', response_id: ev.response_id, role: 'user' });
|
|
557
573
|
});
|
|
558
574
|
this.client.on('response.audio.delta', ev => {
|
|
559
575
|
const audio = Buffer.from(ev.delta, 'base64');
|
|
@@ -569,19 +585,19 @@ export class OpenAIRealtimeVoice extends MastraVoice {
|
|
|
569
585
|
stream?.end();
|
|
570
586
|
});
|
|
571
587
|
this.client.on('response.audio_transcript.delta', ev => {
|
|
572
|
-
this.emit('writing', { text: ev.delta, response_id: ev.response_id });
|
|
588
|
+
this.emit('writing', { text: ev.delta, response_id: ev.response_id, role: 'assistant' });
|
|
573
589
|
});
|
|
574
590
|
this.client.on('response.audio_transcript.done', ev => {
|
|
575
|
-
this.emit('writing', { text: '\n', response_id: ev.response_id });
|
|
591
|
+
this.emit('writing', { text: '\n', response_id: ev.response_id, role: 'assistant' });
|
|
576
592
|
});
|
|
577
593
|
this.client.on('response.text.delta', ev => {
|
|
578
|
-
this.emit('writing', { text: ev.delta, response_id: ev.response_id });
|
|
594
|
+
this.emit('writing', { text: ev.delta, response_id: ev.response_id, role: 'assistant' });
|
|
579
595
|
});
|
|
580
596
|
this.client.on('response.text.done', ev => {
|
|
581
|
-
this.emit('writing', { text: '\n', response_id: ev.response_id });
|
|
597
|
+
this.emit('writing', { text: '\n', response_id: ev.response_id, role: 'assistant' });
|
|
582
598
|
});
|
|
583
|
-
this.client.on('response.done', ev => {
|
|
584
|
-
this.handleFunctionCalls(ev);
|
|
599
|
+
this.client.on('response.done', async ev => {
|
|
600
|
+
await this.handleFunctionCalls(ev);
|
|
585
601
|
this.emit('response.done', ev);
|
|
586
602
|
speakerStreams.delete(ev.response.id);
|
|
587
603
|
});
|
|
@@ -647,11 +663,15 @@ export class OpenAIRealtimeVoice extends MastraVoice {
|
|
|
647
663
|
}
|
|
648
664
|
|
|
649
665
|
private sendEvent(type: string, data: any) {
|
|
650
|
-
this.ws.
|
|
651
|
-
|
|
652
|
-
|
|
653
|
-
|
|
654
|
-
|
|
655
|
-
|
|
666
|
+
if (this.ws.readyState !== this.ws.OPEN) {
|
|
667
|
+
this.queue.push({ type: type, ...data });
|
|
668
|
+
} else {
|
|
669
|
+
this.ws.send(
|
|
670
|
+
JSON.stringify({
|
|
671
|
+
type: type,
|
|
672
|
+
...data,
|
|
673
|
+
}),
|
|
674
|
+
);
|
|
675
|
+
}
|
|
656
676
|
}
|
|
657
677
|
}
|