@mastra/voice-openai-realtime 0.10.6 → 0.10.7-alpha.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,348 +0,0 @@
1
- import { MastraVoice } from '@mastra/core/voice';
2
- import type { Realtime } from 'openai-realtime-api';
3
- import type { RuntimeContext } from '@mastra/core/runtime-context';
4
- import type { ToolsInput } from '@mastra/core/agent';
5
-
6
- /**
7
- * Event callback function type
8
- */
9
- declare type EventCallback = (...args: any[]) => void;
10
-
11
- export declare const isReadableStream: (obj: unknown) => unknown;
12
-
13
- export declare type OpenAIExecuteFunction = (args: any) => Promise<any>;
14
-
15
- /**
16
- * OpenAIRealtimeVoice provides real-time voice interaction capabilities using OpenAI's
17
- * WebSocket-based API. It supports:
18
- * - Real-time text-to-speech
19
- * - Speech-to-text (transcription)
20
- * - Voice activity detection
21
- * - Multiple voice options
22
- * - Event-based audio streaming
23
- *
24
- * The class manages WebSocket connections, audio streaming, and event handling
25
- * for seamless voice interactions.
26
- *
27
- * @extends MastraVoice
28
- *
29
- * @example
30
- * ```typescript
31
- * const voice = new OpenAIRealtimeVoice({
32
- * apiKey: process.env.OPENAI_API_KEY,
33
- * model: 'gpt-4o-mini-realtime'
34
- * });
35
- *
36
- * await voice.open();
37
- * voice.on('speaking', (audioData) => {
38
- * // Handle audio data
39
- * });
40
- *
41
- * await voice.speak('Hello, how can I help you today?');
42
- * ```
43
- */
44
- export declare class OpenAIRealtimeVoice extends MastraVoice {
45
- private options;
46
- private ws?;
47
- private state;
48
- private client;
49
- private events;
50
- private instructions?;
51
- private tools?;
52
- private debug;
53
- private queue;
54
- private transcriber;
55
- private runtimeContext?;
56
- /**
57
- * Creates a new instance of OpenAIRealtimeVoice.
58
- *
59
- * @param options - Configuration options for the voice instance
60
- * @param options.url - The base URL for the OpenAI Realtime API
61
- * @param options.model - The model ID to use (defaults to GPT-4 Mini Realtime)
62
- * @param options.apiKey - OpenAI API key. Falls back to process.env.OPENAI_API_KEY
63
- * @param options.speaker - Voice ID to use (defaults to 'alloy')
64
- * @param options.debug - Enable debug mode
65
- *
66
- * @example
67
- * ```typescript
68
- * const voice = new OpenAIRealtimeVoice({
69
- * apiKey: 'your-api-key',
70
- * model: 'gpt-4o-mini-realtime',
71
- * speaker: 'alloy'
72
- * });
73
- * ```
74
- */
75
- constructor(options?: {
76
- model?: string;
77
- url?: string;
78
- apiKey?: string;
79
- speaker?: Realtime.Voice;
80
- transcriber?: Realtime.AudioTranscriptionModel;
81
- debug?: boolean;
82
- });
83
- /**
84
- * Returns a list of available voice speakers.
85
- *
86
- * @returns Promise resolving to an array of voice objects, each containing at least a voiceId
87
- *
88
- * @example
89
- * ```typescript
90
- * const speakers = await voice.getSpeakers();
91
- * // speakers = [{ voiceId: 'alloy' }, { voiceId: 'echo' }, ...]
92
- * ```
93
- */
94
- getSpeakers(): Promise<Array<{
95
- voiceId: string;
96
- [key: string]: any;
97
- }>>;
98
- /**
99
- * Disconnects from the OpenAI realtime session and cleans up resources.
100
- * Should be called when you're done with the voice instance.
101
- *
102
- * @example
103
- * ```typescript
104
- * voice.close(); // Disconnects and cleans up
105
- * ```
106
- */
107
- close(): void;
108
- /**
109
- * Equips the voice instance with a set of instructions.
110
- * Instructions allow the model to perform additional actions during conversations.
111
- *
112
- * @param instructions - Optional instructions to addInstructions
113
- * @returns Transformed instructions ready for use with the model
114
- *
115
- * @example
116
- * ```typescript
117
- * voice.addInstructions('You are a helpful assistant.');
118
- * ```
119
- */
120
- addInstructions(instructions?: string): void;
121
- /**
122
- * Equips the voice instance with a set of tools.
123
- * Tools allow the model to perform additional actions during conversations.
124
- *
125
- * @param tools - Optional tools configuration to addTools
126
- * @returns Transformed tools configuration ready for use with the model
127
- *
128
- * @example
129
- * ```typescript
130
- * const tools = {
131
- * search: async (query: string) => { ... },
132
- * calculate: (expression: string) => { ... }
133
- * };
134
- * voice.addTools(tools);
135
- * ```
136
- */
137
- addTools(tools?: TTools): void;
138
- /**
139
- * Emits a speaking event using the configured voice model.
140
- * Can accept either a string or a readable stream as input.
141
- *
142
- * @param input - The text to convert to speech, or a readable stream containing the text
143
- * @param options - Optional configuration for this specific speech request
144
- * @param options.speaker - Override the voice to use for this specific request
145
- *
146
- * @throws {Error} If the input text is empty
147
- *
148
- * @example
149
- * ```typescript
150
- * // Simple text to speech
151
- * await voice.speak('Hello world');
152
- *
153
- * // With custom voice
154
- * await voice.speak('Hello world', { speaker: 'echo' });
155
- *
156
- * // Using a stream
157
- * const stream = fs.createReadStream('text.txt');
158
- * await voice.speak(stream);
159
- * ```
160
- */
161
- speak(input: string | NodeJS.ReadableStream, options?: {
162
- speaker?: Realtime.Voice;
163
- }): Promise<void>;
164
- /**
165
- * Updates the session configuration for the voice instance.
166
- * This can be used to modify voice settings, turn detection, and other parameters.
167
- *
168
- * @param sessionConfig - New session configuration to apply
169
- *
170
- * @example
171
- * ```typescript
172
- * voice.updateConfig({
173
- * voice: 'echo',
174
- * turn_detection: {
175
- * type: 'server_vad',
176
- * threshold: 0.5,
177
- * silence_duration_ms: 1000
178
- * }
179
- * });
180
- * ```
181
- */
182
- updateConfig(sessionConfig: unknown): void;
183
- /**
184
- * Checks if listening capabilities are enabled.
185
- *
186
- * @returns {Promise<{ enabled: boolean }>}
187
- */
188
- getListener(): Promise<{
189
- enabled: boolean;
190
- }>;
191
- /**
192
- * Processes audio input for speech recognition.
193
- * Takes a readable stream of audio data and emits a writing event.
194
- * The output of the writing event is int16 audio data.
195
- *
196
- * @param audioData - Readable stream containing the audio data to process
197
- * @param options - Optional configuration for audio processing
198
- *
199
- * @throws {Error} If the audio data format is not supported
200
- *
201
- * @example
202
- * ```typescript
203
- * // Process audio from a file
204
- * const audioStream = fs.createReadStream('audio.raw');
205
- * await voice.listen(audioStream);
206
- *
207
- * // Process audio with options
208
- * await voice.listen(microphoneStream, {
209
- * format: 'int16',
210
- * sampleRate: 24000
211
- * });
212
- * ```
213
- */
214
- listen(audioData: NodeJS.ReadableStream): Promise<void>;
215
- waitForOpen(): Promise<unknown>;
216
- waitForSessionCreated(): Promise<unknown>;
217
- /**
218
- * Establishes a connection to the OpenAI realtime service.
219
- * Must be called before using speak, listen, or relay functions.
220
- *
221
- * @throws {Error} If connection fails or session creation times out
222
- *
223
- * @example
224
- * ```typescript
225
- * await voice.open();
226
- * // Now ready for voice interactions
227
- * ```
228
- */
229
- connect({ runtimeContext }?: {
230
- runtimeContext?: RuntimeContext;
231
- }): Promise<void>;
232
- disconnect(): void;
233
- /**
234
- * Streams audio data in real-time to the OpenAI service.
235
- * Useful for continuous audio streaming scenarios like live microphone input.
236
- * Must be in 'open' state before calling this method.
237
- *
238
- * @param audioData - Readable stream of audio data to relay
239
- * @throws {Error} If audio format is not supported
240
- *
241
- * @example
242
- * ```typescript
243
- * // First connect
244
- * await voice.open();
245
- *
246
- * // Then relay audio
247
- * const micStream = getMicrophoneStream();
248
- * await voice.relay(micStream);
249
- * ```
250
- */
251
- send(audioData: NodeJS.ReadableStream | Int16Array, eventId?: string): Promise<void>;
252
- /**
253
- * Sends a response to the OpenAI Realtime API.
254
- *
255
- * Trigger a response to the real-time session.
256
- *
257
- * @param {Object} params - The parameters object
258
- * @param {Realtime.ResponseConfig} params.options - Configuration options for the response
259
- * @returns {Promise<void>} A promise that resolves when the response has been sent
260
- *
261
- * @example
262
- * // Send a simple text response
263
- * await realtimeVoice.answer({
264
- * options: {
265
- * content: "Hello, how can I help you today?",
266
- * voice: "alloy"
267
- * }
268
- * });
269
- */
270
- answer({ options }: {
271
- options?: Realtime.ResponseConfig;
272
- }): Promise<void>;
273
- /**
274
- * Registers an event listener for voice events.
275
- * Available events: 'speaking', 'writing, 'error'
276
- * Can listen to OpenAI Realtime events by prefixing with 'openAIRealtime:'
277
- * Such as 'openAIRealtime:conversation.item.completed', 'openAIRealtime:conversation.updated', etc.
278
- *
279
- * @param event - Name of the event to listen for
280
- * @param callback - Function to call when the event occurs
281
- *
282
- * @example
283
- * ```typescript
284
- * // Listen for speech events
285
- * voice.on('speaking', (audioData: Int16Array) => {
286
- * // Handle audio data
287
- * });
288
- *
289
- * // Handle errors
290
- * voice.on('error', (error: Error) => {
291
- * console.error('Voice error:', error);
292
- * });
293
- * ```
294
- */
295
- on(event: string, callback: EventCallback): void;
296
- /**
297
- * Removes a previously registered event listener.
298
- *
299
- * @param event - Name of the event to stop listening to
300
- * @param callback - The specific callback function to remove
301
- *
302
- * @example
303
- * ```typescript
304
- * // Create event handler
305
- * const handleSpeech = (audioData: Int16Array) => {
306
- * // Handle audio data
307
- * };
308
- *
309
- * // Add listener
310
- * voice.on('speaking', handleSpeech);
311
- *
312
- * // Later, remove the listener
313
- * voice.off('speaking', handleSpeech);
314
- * ```
315
- */
316
- off(event: string, callback: EventCallback): void;
317
- /**
318
- * Emit an event with arguments
319
- * @param event Event name
320
- * @param args Arguments to pass to the callbacks
321
- */
322
- private emit;
323
- private setupEventListeners;
324
- private handleFunctionCalls;
325
- private handleFunctionCall;
326
- private int16ArrayToBase64;
327
- private sendEvent;
328
- }
329
-
330
- declare type ToolDefinition = {
331
- type: 'function';
332
- name: string;
333
- description: string;
334
- parameters: {
335
- [key: string]: any;
336
- };
337
- };
338
-
339
- export declare const transformTools: (tools?: TTools_2) => {
340
- openaiTool: ToolDefinition;
341
- execute: OpenAIExecuteFunction;
342
- }[];
343
-
344
- declare type TTools = ToolsInput;
345
-
346
- declare type TTools_2 = ToolsInput;
347
-
348
- export { }
package/dist/index.d.cts DELETED
@@ -1 +0,0 @@
1
- export { OpenAIRealtimeVoice } from './_tsup-dts-rollup.cjs';