@mastra/voice-openai-realtime 0.1.0-alpha.2 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +7 -7
- package/CHANGELOG.md +30 -0
- package/dist/_tsup-dts-rollup.d.cts +23 -12
- package/dist/_tsup-dts-rollup.d.ts +23 -12
- package/dist/index.cjs +49 -32
- package/dist/index.js +49 -32
- package/package.json +2 -2
- package/src/index.ts +59 -40
package/.turbo/turbo-build.log
CHANGED
|
@@ -1,23 +1,23 @@
|
|
|
1
1
|
|
|
2
|
-
> @mastra/voice-openai-realtime@0.1.0-alpha.
|
|
2
|
+
> @mastra/voice-openai-realtime@0.1.0-alpha.3 build /home/runner/work/mastra/mastra/voice/openai-realtime-api
|
|
3
3
|
> tsup src/index.ts --format esm,cjs --experimental-dts --clean --treeshake
|
|
4
4
|
|
|
5
5
|
[34mCLI[39m Building entry: src/index.ts
|
|
6
6
|
[34mCLI[39m Using tsconfig: tsconfig.json
|
|
7
7
|
[34mCLI[39m tsup v8.4.0
|
|
8
8
|
[34mTSC[39m Build start
|
|
9
|
-
[32mTSC[39m ⚡️ Build success in
|
|
9
|
+
[32mTSC[39m ⚡️ Build success in 7321ms
|
|
10
10
|
[34mDTS[39m Build start
|
|
11
11
|
[34mCLI[39m Target: es2022
|
|
12
12
|
Analysis will use the bundled TypeScript version 5.8.2
|
|
13
13
|
[36mWriting package typings: /home/runner/work/mastra/mastra/voice/openai-realtime-api/dist/_tsup-dts-rollup.d.ts[39m
|
|
14
14
|
Analysis will use the bundled TypeScript version 5.8.2
|
|
15
15
|
[36mWriting package typings: /home/runner/work/mastra/mastra/voice/openai-realtime-api/dist/_tsup-dts-rollup.d.cts[39m
|
|
16
|
-
[32mDTS[39m ⚡️ Build success in
|
|
16
|
+
[32mDTS[39m ⚡️ Build success in 10514ms
|
|
17
17
|
[34mCLI[39m Cleaning output folder
|
|
18
18
|
[34mESM[39m Build start
|
|
19
19
|
[34mCJS[39m Build start
|
|
20
|
-
[
|
|
21
|
-
[
|
|
22
|
-
[
|
|
23
|
-
[
|
|
20
|
+
[32mESM[39m [1mdist/index.js [22m[32m18.38 KB[39m
|
|
21
|
+
[32mESM[39m ⚡️ Build success in 841ms
|
|
22
|
+
[32mCJS[39m [1mdist/index.cjs [22m[32m18.44 KB[39m
|
|
23
|
+
[32mCJS[39m ⚡️ Build success in 843ms
|
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,35 @@
|
|
|
1
1
|
# @mastra/voice-openai-realtime
|
|
2
2
|
|
|
3
|
+
## 0.1.0
|
|
4
|
+
|
|
5
|
+
### Minor Changes
|
|
6
|
+
|
|
7
|
+
- 443b118: This update removed an external dependency on an unmaintained package and implemented a native websocket connection.
|
|
8
|
+
|
|
9
|
+
### Patch Changes
|
|
10
|
+
|
|
11
|
+
- a4686e8: Realtime event queue
|
|
12
|
+
- Updated dependencies [b4fbc59]
|
|
13
|
+
- Updated dependencies [a838fde]
|
|
14
|
+
- Updated dependencies [a8bd4cf]
|
|
15
|
+
- Updated dependencies [7a3eeb0]
|
|
16
|
+
- Updated dependencies [0b54522]
|
|
17
|
+
- Updated dependencies [b3b34f5]
|
|
18
|
+
- Updated dependencies [1af25d5]
|
|
19
|
+
- Updated dependencies [a4686e8]
|
|
20
|
+
- Updated dependencies [6530ad1]
|
|
21
|
+
- Updated dependencies [27439ad]
|
|
22
|
+
- @mastra/core@0.7.0
|
|
23
|
+
|
|
24
|
+
## 0.1.0-alpha.3
|
|
25
|
+
|
|
26
|
+
### Patch Changes
|
|
27
|
+
|
|
28
|
+
- a4686e8: Realtime event queue
|
|
29
|
+
- Updated dependencies [b3b34f5]
|
|
30
|
+
- Updated dependencies [a4686e8]
|
|
31
|
+
- @mastra/core@0.7.0-alpha.3
|
|
32
|
+
|
|
3
33
|
## 0.1.0-alpha.2
|
|
4
34
|
|
|
5
35
|
### Patch Changes
|
|
@@ -50,14 +50,15 @@ export declare class OpenAIRealtimeVoice extends MastraVoice {
|
|
|
50
50
|
private instructions?;
|
|
51
51
|
private tools?;
|
|
52
52
|
private debug;
|
|
53
|
+
private queue;
|
|
54
|
+
private transcriber;
|
|
53
55
|
/**
|
|
54
56
|
* Creates a new instance of OpenAIRealtimeVoice.
|
|
55
57
|
*
|
|
56
58
|
* @param options - Configuration options for the voice instance
|
|
57
|
-
* @param options.
|
|
58
|
-
* @param options.
|
|
59
|
-
* @param options.
|
|
60
|
-
* @param options.chatModel.tools - Tools configuration for the model
|
|
59
|
+
* @param options.url - The base URL for the OpenAI Realtime API
|
|
60
|
+
* @param options.model - The model ID to use (defaults to GPT-4 Mini Realtime)
|
|
61
|
+
* @param options.apiKey - OpenAI API key. Falls back to process.env.OPENAI_API_KEY
|
|
61
62
|
* @param options.speaker - Voice ID to use (defaults to 'alloy')
|
|
62
63
|
* @param options.debug - Enable debug mode
|
|
63
64
|
*
|
|
@@ -72,15 +73,12 @@ export declare class OpenAIRealtimeVoice extends MastraVoice {
|
|
|
72
73
|
* });
|
|
73
74
|
* ```
|
|
74
75
|
*/
|
|
75
|
-
constructor(
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
tools?: TTools;
|
|
80
|
-
instructions?: string;
|
|
81
|
-
url?: string;
|
|
82
|
-
};
|
|
76
|
+
constructor(options?: {
|
|
77
|
+
model?: string;
|
|
78
|
+
url?: string;
|
|
79
|
+
apiKey?: string;
|
|
83
80
|
speaker?: Realtime.Voice;
|
|
81
|
+
transcriber?: Realtime.AudioTranscriptionModel;
|
|
84
82
|
debug?: boolean;
|
|
85
83
|
});
|
|
86
84
|
/**
|
|
@@ -108,6 +106,19 @@ export declare class OpenAIRealtimeVoice extends MastraVoice {
|
|
|
108
106
|
* ```
|
|
109
107
|
*/
|
|
110
108
|
close(): void;
|
|
109
|
+
/**
|
|
110
|
+
* Equips the voice instance with a set of instructions.
|
|
111
|
+
* Instructions allow the model to perform additional actions during conversations.
|
|
112
|
+
*
|
|
113
|
+
* @param instructions - Optional instructions to addInstructions
|
|
114
|
+
* @returns Transformed instructions ready for use with the model
|
|
115
|
+
*
|
|
116
|
+
* @example
|
|
117
|
+
* ```typescript
|
|
118
|
+
* voice.addInstuctions('You are a helpful assistant.');
|
|
119
|
+
* ```
|
|
120
|
+
*/
|
|
121
|
+
addInstructions(instructions?: string): void;
|
|
111
122
|
/**
|
|
112
123
|
* Equips the voice instance with a set of tools.
|
|
113
124
|
* Tools allow the model to perform additional actions during conversations.
|
|
@@ -50,14 +50,15 @@ export declare class OpenAIRealtimeVoice extends MastraVoice {
|
|
|
50
50
|
private instructions?;
|
|
51
51
|
private tools?;
|
|
52
52
|
private debug;
|
|
53
|
+
private queue;
|
|
54
|
+
private transcriber;
|
|
53
55
|
/**
|
|
54
56
|
* Creates a new instance of OpenAIRealtimeVoice.
|
|
55
57
|
*
|
|
56
58
|
* @param options - Configuration options for the voice instance
|
|
57
|
-
* @param options.
|
|
58
|
-
* @param options.
|
|
59
|
-
* @param options.
|
|
60
|
-
* @param options.chatModel.tools - Tools configuration for the model
|
|
59
|
+
* @param options.url - The base URL for the OpenAI Realtime API
|
|
60
|
+
* @param options.model - The model ID to use (defaults to GPT-4 Mini Realtime)
|
|
61
|
+
* @param options.apiKey - OpenAI API key. Falls back to process.env.OPENAI_API_KEY
|
|
61
62
|
* @param options.speaker - Voice ID to use (defaults to 'alloy')
|
|
62
63
|
* @param options.debug - Enable debug mode
|
|
63
64
|
*
|
|
@@ -72,15 +73,12 @@ export declare class OpenAIRealtimeVoice extends MastraVoice {
|
|
|
72
73
|
* });
|
|
73
74
|
* ```
|
|
74
75
|
*/
|
|
75
|
-
constructor(
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
tools?: TTools;
|
|
80
|
-
instructions?: string;
|
|
81
|
-
url?: string;
|
|
82
|
-
};
|
|
76
|
+
constructor(options?: {
|
|
77
|
+
model?: string;
|
|
78
|
+
url?: string;
|
|
79
|
+
apiKey?: string;
|
|
83
80
|
speaker?: Realtime.Voice;
|
|
81
|
+
transcriber?: Realtime.AudioTranscriptionModel;
|
|
84
82
|
debug?: boolean;
|
|
85
83
|
});
|
|
86
84
|
/**
|
|
@@ -108,6 +106,19 @@ export declare class OpenAIRealtimeVoice extends MastraVoice {
|
|
|
108
106
|
* ```
|
|
109
107
|
*/
|
|
110
108
|
close(): void;
|
|
109
|
+
/**
|
|
110
|
+
* Equips the voice instance with a set of instructions.
|
|
111
|
+
* Instructions allow the model to perform additional actions during conversations.
|
|
112
|
+
*
|
|
113
|
+
* @param instructions - Optional instructions to addInstructions
|
|
114
|
+
* @returns Transformed instructions ready for use with the model
|
|
115
|
+
*
|
|
116
|
+
* @example
|
|
117
|
+
* ```typescript
|
|
118
|
+
* voice.addInstuctions('You are a helpful assistant.');
|
|
119
|
+
* ```
|
|
120
|
+
*/
|
|
121
|
+
addInstructions(instructions?: string): void;
|
|
111
122
|
/**
|
|
112
123
|
* Equips the voice instance with a set of tools.
|
|
113
124
|
* Tools allow the model to perform additional actions during conversations.
|
package/dist/index.cjs
CHANGED
|
@@ -68,6 +68,7 @@ var isReadableStream = (obj) => {
|
|
|
68
68
|
|
|
69
69
|
// src/index.ts
|
|
70
70
|
var DEFAULT_VOICE = "alloy";
|
|
71
|
+
var DEFAULT_TRANSCRIBER = "whisper-1";
|
|
71
72
|
var DEFAULT_URL = "wss://api.openai.com/v1/realtime";
|
|
72
73
|
var DEFAULT_MODEL = "gpt-4o-mini-realtime-preview-2024-12-17";
|
|
73
74
|
var VOICES = ["alloy", "ash", "ballad", "coral", "echo", "sage", "shimmer", "verse"];
|
|
@@ -79,14 +80,15 @@ var OpenAIRealtimeVoice = class extends voice.MastraVoice {
|
|
|
79
80
|
instructions;
|
|
80
81
|
tools;
|
|
81
82
|
debug;
|
|
83
|
+
queue = [];
|
|
84
|
+
transcriber;
|
|
82
85
|
/**
|
|
83
86
|
* Creates a new instance of OpenAIRealtimeVoice.
|
|
84
87
|
*
|
|
85
88
|
* @param options - Configuration options for the voice instance
|
|
86
|
-
* @param options.
|
|
87
|
-
* @param options.
|
|
88
|
-
* @param options.
|
|
89
|
-
* @param options.chatModel.tools - Tools configuration for the model
|
|
89
|
+
* @param options.url - The base URL for the OpenAI Realtime API
|
|
90
|
+
* @param options.model - The model ID to use (defaults to GPT-4 Mini Realtime)
|
|
91
|
+
* @param options.apiKey - OpenAI API key. Falls back to process.env.OPENAI_API_KEY
|
|
90
92
|
* @param options.speaker - Voice ID to use (defaults to 'alloy')
|
|
91
93
|
* @param options.debug - Enable debug mode
|
|
92
94
|
*
|
|
@@ -101,14 +103,10 @@ var OpenAIRealtimeVoice = class extends voice.MastraVoice {
|
|
|
101
103
|
* });
|
|
102
104
|
* ```
|
|
103
105
|
*/
|
|
104
|
-
constructor({
|
|
105
|
-
chatModel,
|
|
106
|
-
speaker,
|
|
107
|
-
debug = false
|
|
108
|
-
} = {}) {
|
|
106
|
+
constructor(options = {}) {
|
|
109
107
|
super();
|
|
110
|
-
const url = `${
|
|
111
|
-
const apiKey =
|
|
108
|
+
const url = `${options.url || DEFAULT_URL}?model=${options.model || DEFAULT_MODEL}`;
|
|
109
|
+
const apiKey = options.apiKey || process.env.OPENAI_API_KEY;
|
|
112
110
|
this.ws = new ws.WebSocket(url, void 0, {
|
|
113
111
|
headers: {
|
|
114
112
|
Authorization: "Bearer " + apiKey,
|
|
@@ -118,10 +116,9 @@ var OpenAIRealtimeVoice = class extends voice.MastraVoice {
|
|
|
118
116
|
this.client = new events.EventEmitter();
|
|
119
117
|
this.state = "close";
|
|
120
118
|
this.events = {};
|
|
121
|
-
this.
|
|
122
|
-
this.
|
|
123
|
-
this.
|
|
124
|
-
this.debug = debug;
|
|
119
|
+
this.speaker = options.speaker || DEFAULT_VOICE;
|
|
120
|
+
this.transcriber = options.transcriber || DEFAULT_TRANSCRIBER;
|
|
121
|
+
this.debug = options.debug || false;
|
|
125
122
|
this.setupEventListeners();
|
|
126
123
|
}
|
|
127
124
|
/**
|
|
@@ -152,6 +149,21 @@ var OpenAIRealtimeVoice = class extends voice.MastraVoice {
|
|
|
152
149
|
this.ws.close();
|
|
153
150
|
this.state = "close";
|
|
154
151
|
}
|
|
152
|
+
/**
|
|
153
|
+
* Equips the voice instance with a set of instructions.
|
|
154
|
+
* Instructions allow the model to perform additional actions during conversations.
|
|
155
|
+
*
|
|
156
|
+
* @param instructions - Optional instructions to addInstructions
|
|
157
|
+
* @returns Transformed instructions ready for use with the model
|
|
158
|
+
*
|
|
159
|
+
* @example
|
|
160
|
+
* ```typescript
|
|
161
|
+
* voice.addInstuctions('You are a helpful assistant.');
|
|
162
|
+
* ```
|
|
163
|
+
*/
|
|
164
|
+
addInstructions(instructions) {
|
|
165
|
+
this.instructions = instructions;
|
|
166
|
+
}
|
|
155
167
|
/**
|
|
156
168
|
* Equips the voice instance with a set of tools.
|
|
157
169
|
* Tools allow the model to perform additional actions during conversations.
|
|
@@ -169,10 +181,7 @@ var OpenAIRealtimeVoice = class extends voice.MastraVoice {
|
|
|
169
181
|
* ```
|
|
170
182
|
*/
|
|
171
183
|
addTools(tools) {
|
|
172
|
-
|
|
173
|
-
this.updateConfig({
|
|
174
|
-
tools: openaiTools.map((t) => t.openaiTool)
|
|
175
|
-
});
|
|
184
|
+
this.tools = tools || {};
|
|
176
185
|
}
|
|
177
186
|
/**
|
|
178
187
|
* Emits a speaking event using the configured voice model.
|
|
@@ -316,7 +325,7 @@ var OpenAIRealtimeVoice = class extends voice.MastraVoice {
|
|
|
316
325
|
instructions: this.instructions,
|
|
317
326
|
tools: openaiTools.map((t) => t.openaiTool),
|
|
318
327
|
input_audio_transcription: {
|
|
319
|
-
model:
|
|
328
|
+
model: this.transcriber
|
|
320
329
|
},
|
|
321
330
|
voice: this.speaker
|
|
322
331
|
});
|
|
@@ -464,6 +473,10 @@ var OpenAIRealtimeVoice = class extends voice.MastraVoice {
|
|
|
464
473
|
});
|
|
465
474
|
this.client.on("session.created", (ev) => {
|
|
466
475
|
this.emit("session.created", ev);
|
|
476
|
+
const queue = this.queue.splice(0, this.queue.length);
|
|
477
|
+
for (const ev2 of queue) {
|
|
478
|
+
this.ws.send(JSON.stringify(ev2));
|
|
479
|
+
}
|
|
467
480
|
});
|
|
468
481
|
this.client.on("session.updated", (ev) => {
|
|
469
482
|
this.emit("session.updated", ev);
|
|
@@ -476,10 +489,10 @@ var OpenAIRealtimeVoice = class extends voice.MastraVoice {
|
|
|
476
489
|
this.emit("speaker", speakerStream);
|
|
477
490
|
});
|
|
478
491
|
this.client.on("conversation.item.input_audio_transcription.delta", (ev) => {
|
|
479
|
-
this.emit("
|
|
492
|
+
this.emit("writing", { text: ev.delta, response_id: ev.response_id, role: "user" });
|
|
480
493
|
});
|
|
481
494
|
this.client.on("conversation.item.input_audio_transcription.done", (ev) => {
|
|
482
|
-
this.emit("
|
|
495
|
+
this.emit("writing", { text: "\n", response_id: ev.response_id, role: "user" });
|
|
483
496
|
});
|
|
484
497
|
this.client.on("response.audio.delta", (ev) => {
|
|
485
498
|
const audio = Buffer.from(ev.delta, "base64");
|
|
@@ -493,16 +506,16 @@ var OpenAIRealtimeVoice = class extends voice.MastraVoice {
|
|
|
493
506
|
stream?.end();
|
|
494
507
|
});
|
|
495
508
|
this.client.on("response.audio_transcript.delta", (ev) => {
|
|
496
|
-
this.emit("writing", { text: ev.delta, response_id: ev.response_id });
|
|
509
|
+
this.emit("writing", { text: ev.delta, response_id: ev.response_id, role: "assistant" });
|
|
497
510
|
});
|
|
498
511
|
this.client.on("response.audio_transcript.done", (ev) => {
|
|
499
|
-
this.emit("writing", { text: "\n", response_id: ev.response_id });
|
|
512
|
+
this.emit("writing", { text: "\n", response_id: ev.response_id, role: "assistant" });
|
|
500
513
|
});
|
|
501
514
|
this.client.on("response.text.delta", (ev) => {
|
|
502
|
-
this.emit("writing", { text: ev.delta, response_id: ev.response_id });
|
|
515
|
+
this.emit("writing", { text: ev.delta, response_id: ev.response_id, role: "assistant" });
|
|
503
516
|
});
|
|
504
517
|
this.client.on("response.text.done", (ev) => {
|
|
505
|
-
this.emit("writing", { text: "\n", response_id: ev.response_id });
|
|
518
|
+
this.emit("writing", { text: "\n", response_id: ev.response_id, role: "assistant" });
|
|
506
519
|
});
|
|
507
520
|
this.client.on("response.done", async (ev) => {
|
|
508
521
|
await this.handleFunctionCalls(ev);
|
|
@@ -567,12 +580,16 @@ var OpenAIRealtimeVoice = class extends voice.MastraVoice {
|
|
|
567
580
|
return btoa(binary);
|
|
568
581
|
}
|
|
569
582
|
sendEvent(type, data) {
|
|
570
|
-
this.ws.
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
|
|
583
|
+
if (this.ws.readyState !== this.ws.OPEN) {
|
|
584
|
+
this.queue.push({ type, ...data });
|
|
585
|
+
} else {
|
|
586
|
+
this.ws.send(
|
|
587
|
+
JSON.stringify({
|
|
588
|
+
type,
|
|
589
|
+
...data
|
|
590
|
+
})
|
|
591
|
+
);
|
|
592
|
+
}
|
|
576
593
|
}
|
|
577
594
|
};
|
|
578
595
|
|
package/dist/index.js
CHANGED
|
@@ -66,6 +66,7 @@ var isReadableStream = (obj) => {
|
|
|
66
66
|
|
|
67
67
|
// src/index.ts
|
|
68
68
|
var DEFAULT_VOICE = "alloy";
|
|
69
|
+
var DEFAULT_TRANSCRIBER = "whisper-1";
|
|
69
70
|
var DEFAULT_URL = "wss://api.openai.com/v1/realtime";
|
|
70
71
|
var DEFAULT_MODEL = "gpt-4o-mini-realtime-preview-2024-12-17";
|
|
71
72
|
var VOICES = ["alloy", "ash", "ballad", "coral", "echo", "sage", "shimmer", "verse"];
|
|
@@ -77,14 +78,15 @@ var OpenAIRealtimeVoice = class extends MastraVoice {
|
|
|
77
78
|
instructions;
|
|
78
79
|
tools;
|
|
79
80
|
debug;
|
|
81
|
+
queue = [];
|
|
82
|
+
transcriber;
|
|
80
83
|
/**
|
|
81
84
|
* Creates a new instance of OpenAIRealtimeVoice.
|
|
82
85
|
*
|
|
83
86
|
* @param options - Configuration options for the voice instance
|
|
84
|
-
* @param options.
|
|
85
|
-
* @param options.
|
|
86
|
-
* @param options.
|
|
87
|
-
* @param options.chatModel.tools - Tools configuration for the model
|
|
87
|
+
* @param options.url - The base URL for the OpenAI Realtime API
|
|
88
|
+
* @param options.model - The model ID to use (defaults to GPT-4 Mini Realtime)
|
|
89
|
+
* @param options.apiKey - OpenAI API key. Falls back to process.env.OPENAI_API_KEY
|
|
88
90
|
* @param options.speaker - Voice ID to use (defaults to 'alloy')
|
|
89
91
|
* @param options.debug - Enable debug mode
|
|
90
92
|
*
|
|
@@ -99,14 +101,10 @@ var OpenAIRealtimeVoice = class extends MastraVoice {
|
|
|
99
101
|
* });
|
|
100
102
|
* ```
|
|
101
103
|
*/
|
|
102
|
-
constructor({
|
|
103
|
-
chatModel,
|
|
104
|
-
speaker,
|
|
105
|
-
debug = false
|
|
106
|
-
} = {}) {
|
|
104
|
+
constructor(options = {}) {
|
|
107
105
|
super();
|
|
108
|
-
const url = `${
|
|
109
|
-
const apiKey =
|
|
106
|
+
const url = `${options.url || DEFAULT_URL}?model=${options.model || DEFAULT_MODEL}`;
|
|
107
|
+
const apiKey = options.apiKey || process.env.OPENAI_API_KEY;
|
|
110
108
|
this.ws = new WebSocket(url, void 0, {
|
|
111
109
|
headers: {
|
|
112
110
|
Authorization: "Bearer " + apiKey,
|
|
@@ -116,10 +114,9 @@ var OpenAIRealtimeVoice = class extends MastraVoice {
|
|
|
116
114
|
this.client = new EventEmitter();
|
|
117
115
|
this.state = "close";
|
|
118
116
|
this.events = {};
|
|
119
|
-
this.
|
|
120
|
-
this.
|
|
121
|
-
this.
|
|
122
|
-
this.debug = debug;
|
|
117
|
+
this.speaker = options.speaker || DEFAULT_VOICE;
|
|
118
|
+
this.transcriber = options.transcriber || DEFAULT_TRANSCRIBER;
|
|
119
|
+
this.debug = options.debug || false;
|
|
123
120
|
this.setupEventListeners();
|
|
124
121
|
}
|
|
125
122
|
/**
|
|
@@ -150,6 +147,21 @@ var OpenAIRealtimeVoice = class extends MastraVoice {
|
|
|
150
147
|
this.ws.close();
|
|
151
148
|
this.state = "close";
|
|
152
149
|
}
|
|
150
|
+
/**
|
|
151
|
+
* Equips the voice instance with a set of instructions.
|
|
152
|
+
* Instructions allow the model to perform additional actions during conversations.
|
|
153
|
+
*
|
|
154
|
+
* @param instructions - Optional instructions to addInstructions
|
|
155
|
+
* @returns Transformed instructions ready for use with the model
|
|
156
|
+
*
|
|
157
|
+
* @example
|
|
158
|
+
* ```typescript
|
|
159
|
+
* voice.addInstuctions('You are a helpful assistant.');
|
|
160
|
+
* ```
|
|
161
|
+
*/
|
|
162
|
+
addInstructions(instructions) {
|
|
163
|
+
this.instructions = instructions;
|
|
164
|
+
}
|
|
153
165
|
/**
|
|
154
166
|
* Equips the voice instance with a set of tools.
|
|
155
167
|
* Tools allow the model to perform additional actions during conversations.
|
|
@@ -167,10 +179,7 @@ var OpenAIRealtimeVoice = class extends MastraVoice {
|
|
|
167
179
|
* ```
|
|
168
180
|
*/
|
|
169
181
|
addTools(tools) {
|
|
170
|
-
|
|
171
|
-
this.updateConfig({
|
|
172
|
-
tools: openaiTools.map((t) => t.openaiTool)
|
|
173
|
-
});
|
|
182
|
+
this.tools = tools || {};
|
|
174
183
|
}
|
|
175
184
|
/**
|
|
176
185
|
* Emits a speaking event using the configured voice model.
|
|
@@ -314,7 +323,7 @@ var OpenAIRealtimeVoice = class extends MastraVoice {
|
|
|
314
323
|
instructions: this.instructions,
|
|
315
324
|
tools: openaiTools.map((t) => t.openaiTool),
|
|
316
325
|
input_audio_transcription: {
|
|
317
|
-
model:
|
|
326
|
+
model: this.transcriber
|
|
318
327
|
},
|
|
319
328
|
voice: this.speaker
|
|
320
329
|
});
|
|
@@ -462,6 +471,10 @@ var OpenAIRealtimeVoice = class extends MastraVoice {
|
|
|
462
471
|
});
|
|
463
472
|
this.client.on("session.created", (ev) => {
|
|
464
473
|
this.emit("session.created", ev);
|
|
474
|
+
const queue = this.queue.splice(0, this.queue.length);
|
|
475
|
+
for (const ev2 of queue) {
|
|
476
|
+
this.ws.send(JSON.stringify(ev2));
|
|
477
|
+
}
|
|
465
478
|
});
|
|
466
479
|
this.client.on("session.updated", (ev) => {
|
|
467
480
|
this.emit("session.updated", ev);
|
|
@@ -474,10 +487,10 @@ var OpenAIRealtimeVoice = class extends MastraVoice {
|
|
|
474
487
|
this.emit("speaker", speakerStream);
|
|
475
488
|
});
|
|
476
489
|
this.client.on("conversation.item.input_audio_transcription.delta", (ev) => {
|
|
477
|
-
this.emit("
|
|
490
|
+
this.emit("writing", { text: ev.delta, response_id: ev.response_id, role: "user" });
|
|
478
491
|
});
|
|
479
492
|
this.client.on("conversation.item.input_audio_transcription.done", (ev) => {
|
|
480
|
-
this.emit("
|
|
493
|
+
this.emit("writing", { text: "\n", response_id: ev.response_id, role: "user" });
|
|
481
494
|
});
|
|
482
495
|
this.client.on("response.audio.delta", (ev) => {
|
|
483
496
|
const audio = Buffer.from(ev.delta, "base64");
|
|
@@ -491,16 +504,16 @@ var OpenAIRealtimeVoice = class extends MastraVoice {
|
|
|
491
504
|
stream?.end();
|
|
492
505
|
});
|
|
493
506
|
this.client.on("response.audio_transcript.delta", (ev) => {
|
|
494
|
-
this.emit("writing", { text: ev.delta, response_id: ev.response_id });
|
|
507
|
+
this.emit("writing", { text: ev.delta, response_id: ev.response_id, role: "assistant" });
|
|
495
508
|
});
|
|
496
509
|
this.client.on("response.audio_transcript.done", (ev) => {
|
|
497
|
-
this.emit("writing", { text: "\n", response_id: ev.response_id });
|
|
510
|
+
this.emit("writing", { text: "\n", response_id: ev.response_id, role: "assistant" });
|
|
498
511
|
});
|
|
499
512
|
this.client.on("response.text.delta", (ev) => {
|
|
500
|
-
this.emit("writing", { text: ev.delta, response_id: ev.response_id });
|
|
513
|
+
this.emit("writing", { text: ev.delta, response_id: ev.response_id, role: "assistant" });
|
|
501
514
|
});
|
|
502
515
|
this.client.on("response.text.done", (ev) => {
|
|
503
|
-
this.emit("writing", { text: "\n", response_id: ev.response_id });
|
|
516
|
+
this.emit("writing", { text: "\n", response_id: ev.response_id, role: "assistant" });
|
|
504
517
|
});
|
|
505
518
|
this.client.on("response.done", async (ev) => {
|
|
506
519
|
await this.handleFunctionCalls(ev);
|
|
@@ -565,12 +578,16 @@ var OpenAIRealtimeVoice = class extends MastraVoice {
|
|
|
565
578
|
return btoa(binary);
|
|
566
579
|
}
|
|
567
580
|
sendEvent(type, data) {
|
|
568
|
-
this.ws.
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
|
|
581
|
+
if (this.ws.readyState !== this.ws.OPEN) {
|
|
582
|
+
this.queue.push({ type, ...data });
|
|
583
|
+
} else {
|
|
584
|
+
this.ws.send(
|
|
585
|
+
JSON.stringify({
|
|
586
|
+
type,
|
|
587
|
+
...data
|
|
588
|
+
})
|
|
589
|
+
);
|
|
590
|
+
}
|
|
574
591
|
}
|
|
575
592
|
};
|
|
576
593
|
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@mastra/voice-openai-realtime",
|
|
3
|
-
"version": "0.1.0
|
|
3
|
+
"version": "0.1.0",
|
|
4
4
|
"description": "Mastra OpenAI Realtime API integration",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "dist/index.js",
|
|
@@ -22,7 +22,7 @@
|
|
|
22
22
|
"openai-realtime-api": "^1.0.7",
|
|
23
23
|
"ws": "^8.18.1",
|
|
24
24
|
"zod-to-json-schema": "^3.24.1",
|
|
25
|
-
"@mastra/core": "^0.7.0
|
|
25
|
+
"@mastra/core": "^0.7.0"
|
|
26
26
|
},
|
|
27
27
|
"devDependencies": {
|
|
28
28
|
"@microsoft/api-extractor": "^7.49.2",
|
package/src/index.ts
CHANGED
|
@@ -29,6 +29,8 @@ type EventMap = {
|
|
|
29
29
|
/** Default voice for text-to-speech. 'alloy' provides a neutral, balanced voice suitable for most use cases */
|
|
30
30
|
const DEFAULT_VOICE: Realtime.Voice = 'alloy';
|
|
31
31
|
|
|
32
|
+
const DEFAULT_TRANSCRIBER: Realtime.AudioTranscriptionModel = 'whisper-1';
|
|
33
|
+
|
|
32
34
|
const DEFAULT_URL = 'wss://api.openai.com/v1/realtime';
|
|
33
35
|
|
|
34
36
|
/**
|
|
@@ -111,15 +113,16 @@ export class OpenAIRealtimeVoice extends MastraVoice {
|
|
|
111
113
|
private instructions?: string;
|
|
112
114
|
private tools?: TTools;
|
|
113
115
|
private debug: boolean;
|
|
116
|
+
private queue: unknown[] = [];
|
|
117
|
+
private transcriber: Realtime.AudioTranscriptionModel;
|
|
114
118
|
|
|
115
119
|
/**
|
|
116
120
|
* Creates a new instance of OpenAIRealtimeVoice.
|
|
117
121
|
*
|
|
118
122
|
* @param options - Configuration options for the voice instance
|
|
119
|
-
* @param options.
|
|
120
|
-
* @param options.
|
|
121
|
-
* @param options.
|
|
122
|
-
* @param options.chatModel.tools - Tools configuration for the model
|
|
123
|
+
* @param options.url - The base URL for the OpenAI Realtime API
|
|
124
|
+
* @param options.model - The model ID to use (defaults to GPT-4 Mini Realtime)
|
|
125
|
+
* @param options.apiKey - OpenAI API key. Falls back to process.env.OPENAI_API_KEY
|
|
123
126
|
* @param options.speaker - Voice ID to use (defaults to 'alloy')
|
|
124
127
|
* @param options.debug - Enable debug mode
|
|
125
128
|
*
|
|
@@ -134,25 +137,20 @@ export class OpenAIRealtimeVoice extends MastraVoice {
|
|
|
134
137
|
* });
|
|
135
138
|
* ```
|
|
136
139
|
*/
|
|
137
|
-
constructor(
|
|
138
|
-
|
|
139
|
-
speaker,
|
|
140
|
-
debug = false,
|
|
141
|
-
}: {
|
|
142
|
-
chatModel?: {
|
|
140
|
+
constructor(
|
|
141
|
+
options: {
|
|
143
142
|
model?: string;
|
|
144
|
-
apiKey?: string;
|
|
145
|
-
tools?: TTools;
|
|
146
|
-
instructions?: string;
|
|
147
143
|
url?: string;
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
144
|
+
apiKey?: string;
|
|
145
|
+
speaker?: Realtime.Voice;
|
|
146
|
+
transcriber?: Realtime.AudioTranscriptionModel;
|
|
147
|
+
debug?: boolean;
|
|
148
|
+
} = {},
|
|
149
|
+
) {
|
|
152
150
|
super();
|
|
153
151
|
|
|
154
|
-
const url = `${
|
|
155
|
-
const apiKey =
|
|
152
|
+
const url = `${options.url || DEFAULT_URL}?model=${options.model || DEFAULT_MODEL}`;
|
|
153
|
+
const apiKey = options.apiKey || process.env.OPENAI_API_KEY;
|
|
156
154
|
this.ws = new WebSocket(url, undefined, {
|
|
157
155
|
headers: {
|
|
158
156
|
Authorization: 'Bearer ' + apiKey,
|
|
@@ -163,10 +161,9 @@ export class OpenAIRealtimeVoice extends MastraVoice {
|
|
|
163
161
|
this.client = new EventEmitter();
|
|
164
162
|
this.state = 'close';
|
|
165
163
|
this.events = {} as EventMap;
|
|
166
|
-
this.
|
|
167
|
-
this.
|
|
168
|
-
this.
|
|
169
|
-
this.debug = debug;
|
|
164
|
+
this.speaker = options.speaker || DEFAULT_VOICE;
|
|
165
|
+
this.transcriber = options.transcriber || DEFAULT_TRANSCRIBER;
|
|
166
|
+
this.debug = options.debug || false;
|
|
170
167
|
this.setupEventListeners();
|
|
171
168
|
}
|
|
172
169
|
|
|
@@ -200,6 +197,22 @@ export class OpenAIRealtimeVoice extends MastraVoice {
|
|
|
200
197
|
this.state = 'close';
|
|
201
198
|
}
|
|
202
199
|
|
|
200
|
+
/**
|
|
201
|
+
* Equips the voice instance with a set of instructions.
|
|
202
|
+
* Instructions allow the model to perform additional actions during conversations.
|
|
203
|
+
*
|
|
204
|
+
* @param instructions - Optional instructions to addInstructions
|
|
205
|
+
* @returns Transformed instructions ready for use with the model
|
|
206
|
+
*
|
|
207
|
+
* @example
|
|
208
|
+
* ```typescript
|
|
209
|
+
* voice.addInstuctions('You are a helpful assistant.');
|
|
210
|
+
* ```
|
|
211
|
+
*/
|
|
212
|
+
addInstructions(instructions?: string) {
|
|
213
|
+
this.instructions = instructions;
|
|
214
|
+
}
|
|
215
|
+
|
|
203
216
|
/**
|
|
204
217
|
* Equips the voice instance with a set of tools.
|
|
205
218
|
* Tools allow the model to perform additional actions during conversations.
|
|
@@ -217,10 +230,7 @@ export class OpenAIRealtimeVoice extends MastraVoice {
|
|
|
217
230
|
* ```
|
|
218
231
|
*/
|
|
219
232
|
addTools(tools?: TTools) {
|
|
220
|
-
|
|
221
|
-
this.updateConfig({
|
|
222
|
-
tools: openaiTools.map(t => t.openaiTool),
|
|
223
|
-
});
|
|
233
|
+
this.tools = tools || {};
|
|
224
234
|
}
|
|
225
235
|
|
|
226
236
|
/**
|
|
@@ -376,7 +386,7 @@ export class OpenAIRealtimeVoice extends MastraVoice {
|
|
|
376
386
|
instructions: this.instructions,
|
|
377
387
|
tools: openaiTools.map(t => t.openaiTool),
|
|
378
388
|
input_audio_transcription: {
|
|
379
|
-
model:
|
|
389
|
+
model: this.transcriber,
|
|
380
390
|
},
|
|
381
391
|
voice: this.speaker,
|
|
382
392
|
});
|
|
@@ -536,6 +546,11 @@ export class OpenAIRealtimeVoice extends MastraVoice {
|
|
|
536
546
|
|
|
537
547
|
this.client.on('session.created', ev => {
|
|
538
548
|
this.emit('session.created', ev);
|
|
549
|
+
|
|
550
|
+
const queue = this.queue.splice(0, this.queue.length);
|
|
551
|
+
for (const ev of queue) {
|
|
552
|
+
this.ws.send(JSON.stringify(ev));
|
|
553
|
+
}
|
|
539
554
|
});
|
|
540
555
|
this.client.on('session.updated', ev => {
|
|
541
556
|
this.emit('session.updated', ev);
|
|
@@ -551,10 +566,10 @@ export class OpenAIRealtimeVoice extends MastraVoice {
|
|
|
551
566
|
this.emit('speaker', speakerStream);
|
|
552
567
|
});
|
|
553
568
|
this.client.on('conversation.item.input_audio_transcription.delta', ev => {
|
|
554
|
-
this.emit('
|
|
569
|
+
this.emit('writing', { text: ev.delta, response_id: ev.response_id, role: 'user' });
|
|
555
570
|
});
|
|
556
571
|
this.client.on('conversation.item.input_audio_transcription.done', ev => {
|
|
557
|
-
this.emit('
|
|
572
|
+
this.emit('writing', { text: '\n', response_id: ev.response_id, role: 'user' });
|
|
558
573
|
});
|
|
559
574
|
this.client.on('response.audio.delta', ev => {
|
|
560
575
|
const audio = Buffer.from(ev.delta, 'base64');
|
|
@@ -570,16 +585,16 @@ export class OpenAIRealtimeVoice extends MastraVoice {
|
|
|
570
585
|
stream?.end();
|
|
571
586
|
});
|
|
572
587
|
this.client.on('response.audio_transcript.delta', ev => {
|
|
573
|
-
this.emit('writing', { text: ev.delta, response_id: ev.response_id });
|
|
588
|
+
this.emit('writing', { text: ev.delta, response_id: ev.response_id, role: 'assistant' });
|
|
574
589
|
});
|
|
575
590
|
this.client.on('response.audio_transcript.done', ev => {
|
|
576
|
-
this.emit('writing', { text: '\n', response_id: ev.response_id });
|
|
591
|
+
this.emit('writing', { text: '\n', response_id: ev.response_id, role: 'assistant' });
|
|
577
592
|
});
|
|
578
593
|
this.client.on('response.text.delta', ev => {
|
|
579
|
-
this.emit('writing', { text: ev.delta, response_id: ev.response_id });
|
|
594
|
+
this.emit('writing', { text: ev.delta, response_id: ev.response_id, role: 'assistant' });
|
|
580
595
|
});
|
|
581
596
|
this.client.on('response.text.done', ev => {
|
|
582
|
-
this.emit('writing', { text: '\n', response_id: ev.response_id });
|
|
597
|
+
this.emit('writing', { text: '\n', response_id: ev.response_id, role: 'assistant' });
|
|
583
598
|
});
|
|
584
599
|
this.client.on('response.done', async ev => {
|
|
585
600
|
await this.handleFunctionCalls(ev);
|
|
@@ -648,11 +663,15 @@ export class OpenAIRealtimeVoice extends MastraVoice {
|
|
|
648
663
|
}
|
|
649
664
|
|
|
650
665
|
private sendEvent(type: string, data: any) {
|
|
651
|
-
this.ws.
|
|
652
|
-
|
|
653
|
-
|
|
654
|
-
|
|
655
|
-
|
|
656
|
-
|
|
666
|
+
if (this.ws.readyState !== this.ws.OPEN) {
|
|
667
|
+
this.queue.push({ type: type, ...data });
|
|
668
|
+
} else {
|
|
669
|
+
this.ws.send(
|
|
670
|
+
JSON.stringify({
|
|
671
|
+
type: type,
|
|
672
|
+
...data,
|
|
673
|
+
}),
|
|
674
|
+
);
|
|
675
|
+
}
|
|
657
676
|
}
|
|
658
677
|
}
|