@mastra/voice-openai-realtime 0.11.3-alpha.0 → 0.11.3-alpha.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,14 @@
1
1
  # @mastra/voice-openai-realtime
2
2
 
3
+ ## 0.11.3-alpha.1
4
+
5
+ ### Patch Changes
6
+
7
+ - [#7343](https://github.com/mastra-ai/mastra/pull/7343) [`de3cbc6`](https://github.com/mastra-ai/mastra/commit/de3cbc61079211431bd30487982ea3653517278e) Thanks [@LekoArts](https://github.com/LekoArts)! - Update the `package.json` file to include additional fields like `repository`, `homepage` or `files`.
8
+
9
+ - Updated dependencies [[`85ef90b`](https://github.com/mastra-ai/mastra/commit/85ef90bb2cd4ae4df855c7ac175f7d392c55c1bf), [`de3cbc6`](https://github.com/mastra-ai/mastra/commit/de3cbc61079211431bd30487982ea3653517278e)]:
10
+ - @mastra/core@0.15.3-alpha.5
11
+
3
12
  ## 0.11.3-alpha.0
4
13
 
5
14
  ### Patch Changes
package/package.json CHANGED
@@ -1,10 +1,14 @@
1
1
  {
2
2
  "name": "@mastra/voice-openai-realtime",
3
- "version": "0.11.3-alpha.0",
3
+ "version": "0.11.3-alpha.1",
4
4
  "description": "Mastra OpenAI Realtime API integration",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",
7
7
  "types": "dist/index.d.ts",
8
+ "files": [
9
+ "dist",
10
+ "CHANGELOG.md"
11
+ ],
8
12
  "exports": {
9
13
  ".": {
10
14
  "import": {
@@ -33,14 +37,23 @@
33
37
  "typescript": "^5.8.3",
34
38
  "vitest": "^3.2.4",
35
39
  "zod": "^3.25.76",
36
- "@internal/types-builder": "0.0.9",
37
- "@mastra/core": "0.15.3-alpha.4",
38
- "@internal/lint": "0.0.34"
40
+ "@internal/lint": "0.0.34",
41
+ "@mastra/core": "0.15.3-alpha.5",
42
+ "@internal/types-builder": "0.0.9"
39
43
  },
40
44
  "peerDependencies": {
41
45
  "@mastra/core": ">=0.15.2-0 <0.16.0-0",
42
46
  "zod": "^3.25.0 || ^4.0.0"
43
47
  },
48
+ "homepage": "https://mastra.ai",
49
+ "repository": {
50
+ "type": "git",
51
+ "url": "git+https://github.com/mastra-ai/mastra.git",
52
+ "directory": "voice/openai-realtime-api"
53
+ },
54
+ "bugs": {
55
+ "url": "https://github.com/mastra-ai/mastra/issues"
56
+ },
44
57
  "scripts": {
45
58
  "build": "tsup --silent --config tsup.config.ts",
46
59
  "build:watch": "tsup --watch --silent --config tsup.config.ts",
@@ -1,4 +0,0 @@
1
-
2
- > @mastra/voice-openai-realtime@0.11.3-alpha.0 build /home/runner/work/mastra/mastra/voice/openai-realtime-api
3
- > tsup --silent --config tsup.config.ts
4
-
package/eslint.config.js DELETED
@@ -1,6 +0,0 @@
1
- import { createConfig } from '@internal/lint/eslint';
2
-
3
- const config = await createConfig();
4
-
5
- /** @type {import("eslint").Linter.Config[]} */
6
- export default [...config];
package/src/index.test.ts DELETED
@@ -1,109 +0,0 @@
1
- import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
2
- import { OpenAIRealtimeVoice } from './index';
3
-
4
- // Mock RealtimeClient
5
- vi.mock('openai-realtime-api', () => {
6
- return {
7
- RealtimeClient: vi.fn().mockImplementation(() => ({
8
- connect: vi.fn().mockResolvedValue(undefined),
9
- disconnect: vi.fn(),
10
- waitForSessionCreated: vi.fn().mockResolvedValue(undefined),
11
- updateSession: vi.fn(),
12
- appendInputAudio: vi.fn(),
13
- on: vi.fn(),
14
- emit: vi.fn(),
15
- })),
16
- };
17
- });
18
-
19
- vi.mock('ws', () => {
20
- return {
21
- WebSocket: vi.fn().mockImplementation(() => ({
22
- send: vi.fn(),
23
- close: vi.fn(),
24
- on: vi.fn(),
25
- })),
26
- };
27
- });
28
-
29
- describe('OpenAIRealtimeVoice', () => {
30
- let voice: OpenAIRealtimeVoice;
31
-
32
- beforeEach(() => {
33
- vi.clearAllMocks();
34
- voice = new OpenAIRealtimeVoice({
35
- apiKey: 'test-api-key',
36
- });
37
- voice.waitForOpen = () => Promise.resolve();
38
- voice.waitForSessionCreated = () => Promise.resolve();
39
- });
40
-
41
- afterEach(() => {
42
- voice?.disconnect();
43
- });
44
-
45
- describe('initialization', () => {
46
- it('should initialize with default values', () => {
47
- expect(voice).toBeInstanceOf(OpenAIRealtimeVoice);
48
- });
49
-
50
- it('should initialize with custom speaker', () => {
51
- const customVoice = new OpenAIRealtimeVoice({
52
- speaker: 'shimmer',
53
- });
54
- expect(customVoice).toBeInstanceOf(OpenAIRealtimeVoice);
55
- });
56
- });
57
-
58
- describe('getSpeakers', () => {
59
- it('should return array of available voices', async () => {
60
- const speakers = await voice.getSpeakers();
61
- expect(Array.isArray(speakers)).toBe(true);
62
- expect(speakers.length).toBeGreaterThan(0);
63
- expect(speakers[0]).toHaveProperty('voiceId');
64
- });
65
- });
66
-
67
- describe('speak', () => {
68
- it('should handle string input', async () => {
69
- const testText = 'Hello, world!';
70
- await voice.speak(testText);
71
- });
72
-
73
- it('should throw error on empty input', async () => {
74
- await expect(voice.speak('')).rejects.toThrow('Input text is empty');
75
- });
76
- });
77
-
78
- describe('send', () => {
79
- it('should handle Int16Array input', async () => {
80
- const testArray = new Int16Array([1, 2, 3]);
81
-
82
- await voice.connect();
83
- voice.send(testArray);
84
- });
85
- });
86
-
87
- describe('event handling', () => {
88
- it('should register and trigger event listeners', () => {
89
- const mockCallback = vi.fn();
90
- voice.on('speak', mockCallback);
91
-
92
- // Simulate event emission
93
- (voice as any).emit('speak', 'test');
94
-
95
- expect(mockCallback).toHaveBeenCalledWith('test');
96
- });
97
-
98
- it('should remove event listeners', () => {
99
- const mockCallback = vi.fn();
100
- voice.on('speak', mockCallback);
101
- voice.off('speak', mockCallback);
102
-
103
- // Simulate event emission
104
- (voice as any).emit('speak', 'test');
105
-
106
- expect(mockCallback).not.toHaveBeenCalled();
107
- });
108
- });
109
- });
package/src/index.ts DELETED
@@ -1,716 +0,0 @@
1
- import { EventEmitter } from 'events';
2
- import { PassThrough } from 'stream';
3
- import type { ToolsInput } from '@mastra/core/agent';
4
- import type { RuntimeContext } from '@mastra/core/runtime-context';
5
- import { MastraVoice } from '@mastra/core/voice';
6
- import type { Realtime, RealtimeServerEvents } from 'openai-realtime-api';
7
- import { WebSocket } from 'ws';
8
- import { isReadableStream, transformTools } from './utils';
9
-
10
- /**
11
- * Event callback function type
12
- */
13
- type EventCallback = (...args: any[]) => void;
14
-
15
- type StreamWithId = PassThrough & { id: string };
16
-
17
- /**
18
- * Map of event types to their callback arrays
19
- */
20
- type EventMap = {
21
- transcribing: [{ text: string }];
22
- writing: [{ text: string }];
23
- speaking: [{ audio: string }];
24
- speaker: [StreamWithId];
25
- error: [Error];
26
- } & {
27
- [key: string]: EventCallback[];
28
- };
29
-
30
- /** Default voice for text-to-speech. 'alloy' provides a neutral, balanced voice suitable for most use cases */
31
- const DEFAULT_VOICE: Realtime.Voice = 'alloy';
32
-
33
- const DEFAULT_TRANSCRIBER: Realtime.AudioTranscriptionModel = 'whisper-1';
34
-
35
- const DEFAULT_URL = 'wss://api.openai.com/v1/realtime';
36
-
37
- /**
38
- * Default model for real-time voice interactions.
39
- * This model is optimized for low-latency responses while maintaining high quality output.
40
- */
41
- const DEFAULT_MODEL = 'gpt-4o-mini-realtime-preview-2024-12-17';
42
-
43
- // /**
44
- // * Default Voice Activity Detection (VAD) configuration.
45
- // * These settings control how the system detects speech segments.
46
- // *
47
- // * @property {string} type - Uses server-side VAD for better accuracy
48
- // * @property {number} threshold - Speech detection sensitivity (0.5 = balanced)
49
- // * @property {number} prefix_padding_ms - Includes 1 second of audio before speech
50
- // * @property {number} silence_duration_ms - Waits 1 second of silence before ending turn
51
- // */
52
- // const DEFAULT_VAD_CONFIG = {
53
- // type: 'server_vad',
54
- // threshold: 0.5,
55
- // prefix_padding_ms: 1000,
56
- // silence_duration_ms: 1000,
57
- // } as Realtime.TurnDetection;
58
-
59
- type TTools = ToolsInput;
60
-
61
- /**
62
- * Available voice options for text-to-speech.
63
- * Each voice has unique characteristics suitable for different use cases:
64
- * - alloy: Neutral and balanced
65
- * - echo: Warm and natural
66
- * - shimmer: Clear and expressive
67
- * - And more...
68
- */
69
- const VOICES = ['alloy', 'ash', 'ballad', 'coral', 'echo', 'sage', 'shimmer', 'verse'];
70
-
71
- type RealtimeClientServerEventMap = {
72
- [K in RealtimeServerEvents.EventType]: [RealtimeServerEvents.EventMap[K]];
73
- } & {
74
- ['conversation.item.input_audio_transcription.delta']: [{ delta: string; response_id: string }];
75
- ['conversation.item.input_audio_transcription.done']: [{ response_id: string }];
76
- };
77
-
78
- /**
79
- * OpenAIRealtimeVoice provides real-time voice interaction capabilities using OpenAI's
80
- * WebSocket-based API. It supports:
81
- * - Real-time text-to-speech
82
- * - Speech-to-text (transcription)
83
- * - Voice activity detection
84
- * - Multiple voice options
85
- * - Event-based audio streaming
86
- *
87
- * The class manages WebSocket connections, audio streaming, and event handling
88
- * for seamless voice interactions.
89
- *
90
- * @extends MastraVoice
91
- *
92
- * @example
93
- * ```typescript
94
- * const voice = new OpenAIRealtimeVoice({
95
- * apiKey: process.env.OPENAI_API_KEY,
96
- * model: 'gpt-4o-mini-realtime'
97
- * });
98
- *
99
- * await voice.open();
100
- * voice.on('speaking', (audioData) => {
101
- * // Handle audio data
102
- * });
103
- *
104
- * await voice.speak('Hello, how can I help you today?');
105
- * ```
106
- */
107
- export class OpenAIRealtimeVoice extends MastraVoice {
108
- private ws?: WebSocket;
109
- private state: 'close' | 'open';
110
- private client: EventEmitter<RealtimeClientServerEventMap>;
111
- private events: EventMap;
112
- private instructions?: string;
113
- private tools?: TTools;
114
- private debug: boolean;
115
- private queue: unknown[] = [];
116
- private transcriber: Realtime.AudioTranscriptionModel;
117
- private runtimeContext?: RuntimeContext;
118
- /**
119
- * Creates a new instance of OpenAIRealtimeVoice.
120
- *
121
- * @param options - Configuration options for the voice instance
122
- * @param options.url - The base URL for the OpenAI Realtime API
123
- * @param options.model - The model ID to use (defaults to GPT-4 Mini Realtime)
124
- * @param options.apiKey - OpenAI API key. Falls back to process.env.OPENAI_API_KEY
125
- * @param options.speaker - Voice ID to use (defaults to 'alloy')
126
- * @param options.debug - Enable debug mode
127
- *
128
- * @example
129
- * ```typescript
130
- * const voice = new OpenAIRealtimeVoice({
131
- * apiKey: 'your-api-key',
132
- * model: 'gpt-4o-mini-realtime',
133
- * speaker: 'alloy'
134
- * });
135
- * ```
136
- */
137
- constructor(
138
- private options: {
139
- model?: string;
140
- url?: string;
141
- apiKey?: string;
142
- speaker?: Realtime.Voice;
143
- transcriber?: Realtime.AudioTranscriptionModel;
144
- debug?: boolean;
145
- } = {},
146
- ) {
147
- super();
148
-
149
- this.client = new EventEmitter();
150
- this.state = 'close';
151
- this.events = {} as EventMap;
152
- this.speaker = options.speaker || DEFAULT_VOICE;
153
- this.transcriber = options.transcriber || DEFAULT_TRANSCRIBER;
154
- this.debug = options.debug || false;
155
- }
156
-
157
- /**
158
- * Returns a list of available voice speakers.
159
- *
160
- * @returns Promise resolving to an array of voice objects, each containing at least a voiceId
161
- *
162
- * @example
163
- * ```typescript
164
- * const speakers = await voice.getSpeakers();
165
- * // speakers = [{ voiceId: 'alloy' }, { voiceId: 'echo' }, ...]
166
- * ```
167
- */
168
- getSpeakers(): Promise<Array<{ voiceId: string; [key: string]: any }>> {
169
- return Promise.resolve(VOICES.map(v => ({ voiceId: v })));
170
- }
171
-
172
- /**
173
- * Disconnects from the OpenAI realtime session and cleans up resources.
174
- * Should be called when you're done with the voice instance.
175
- *
176
- * @example
177
- * ```typescript
178
- * voice.close(); // Disconnects and cleans up
179
- * ```
180
- */
181
- close() {
182
- if (!this.ws) return;
183
- this.ws.close();
184
- this.state = 'close';
185
- }
186
-
187
- /**
188
- * Equips the voice instance with a set of instructions.
189
- * Instructions allow the model to perform additional actions during conversations.
190
- *
191
- * @param instructions - Optional instructions to addInstructions
192
- * @returns Transformed instructions ready for use with the model
193
- *
194
- * @example
195
- * ```typescript
196
- * voice.addInstructions('You are a helpful assistant.');
197
- * ```
198
- */
199
- addInstructions(instructions?: string) {
200
- this.instructions = instructions;
201
- }
202
-
203
- /**
204
- * Equips the voice instance with a set of tools.
205
- * Tools allow the model to perform additional actions during conversations.
206
- *
207
- * @param tools - Optional tools configuration to addTools
208
- * @returns Transformed tools configuration ready for use with the model
209
- *
210
- * @example
211
- * ```typescript
212
- * const tools = {
213
- * search: async (query: string) => { ... },
214
- * calculate: (expression: string) => { ... }
215
- * };
216
- * voice.addTools(tools);
217
- * ```
218
- */
219
- addTools(tools?: TTools) {
220
- this.tools = tools || {};
221
- }
222
-
223
- /**
224
- * Emits a speaking event using the configured voice model.
225
- * Can accept either a string or a readable stream as input.
226
- *
227
- * @param input - The text to convert to speech, or a readable stream containing the text
228
- * @param options - Optional configuration for this specific speech request
229
- * @param options.speaker - Override the voice to use for this specific request
230
- *
231
- * @throws {Error} If the input text is empty
232
- *
233
- * @example
234
- * ```typescript
235
- * // Simple text to speech
236
- * await voice.speak('Hello world');
237
- *
238
- * // With custom voice
239
- * await voice.speak('Hello world', { speaker: 'echo' });
240
- *
241
- * // Using a stream
242
- * const stream = fs.createReadStream('text.txt');
243
- * await voice.speak(stream);
244
- * ```
245
- */
246
- async speak(input: string | NodeJS.ReadableStream, options?: { speaker?: Realtime.Voice }): Promise<void> {
247
- if (typeof input !== 'string') {
248
- const chunks: Buffer[] = [];
249
- for await (const chunk of input) {
250
- chunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(String(chunk)));
251
- }
252
- input = Buffer.concat(chunks).toString('utf-8');
253
- }
254
-
255
- if (input.trim().length === 0) {
256
- throw new Error('Input text is empty');
257
- }
258
-
259
- this.sendEvent('response.create', {
260
- response: {
261
- instructions: `Repeat the following text: ${input}`,
262
- voice: options?.speaker ? options.speaker : undefined,
263
- },
264
- });
265
- }
266
-
267
- /**
268
- * Updates the session configuration for the voice instance.
269
- * This can be used to modify voice settings, turn detection, and other parameters.
270
- *
271
- * @param sessionConfig - New session configuration to apply
272
- *
273
- * @example
274
- * ```typescript
275
- * voice.updateConfig({
276
- * voice: 'echo',
277
- * turn_detection: {
278
- * type: 'server_vad',
279
- * threshold: 0.5,
280
- * silence_duration_ms: 1000
281
- * }
282
- * });
283
- * ```
284
- */
285
- updateConfig(sessionConfig: unknown): void {
286
- this.sendEvent('session.update', { session: sessionConfig });
287
- }
288
-
289
- /**
290
- * Checks if listening capabilities are enabled.
291
- *
292
- * @returns {Promise<{ enabled: boolean }>}
293
- */
294
- async getListener() {
295
- return { enabled: true };
296
- }
297
-
298
- /**
299
- * Processes audio input for speech recognition.
300
- * Takes a readable stream of audio data and emits a writing event.
301
- * The output of the writing event is int16 audio data.
302
- *
303
- * @param audioData - Readable stream containing the audio data to process
304
- * @param options - Optional configuration for audio processing
305
- *
306
- * @throws {Error} If the audio data format is not supported
307
- *
308
- * @example
309
- * ```typescript
310
- * // Process audio from a file
311
- * const audioStream = fs.createReadStream('audio.raw');
312
- * await voice.listen(audioStream);
313
- *
314
- * // Process audio with options
315
- * await voice.listen(microphoneStream, {
316
- * format: 'int16',
317
- * sampleRate: 24000
318
- * });
319
- * ```
320
- */
321
- async listen(audioData: NodeJS.ReadableStream): Promise<void> {
322
- if (isReadableStream(audioData)) {
323
- const chunks: Buffer[] = [];
324
- for await (const chunk of audioData) {
325
- const buffer = Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk);
326
- chunks.push(buffer);
327
- }
328
-
329
- const buffer = Buffer.concat(chunks);
330
- const int16Array = new Int16Array(buffer.buffer, buffer.byteOffset ?? 0, (buffer.byteLength ?? 0) / 2);
331
- const base64Audio = this.int16ArrayToBase64(int16Array);
332
-
333
- this.sendEvent('conversation.item.create', {
334
- item: {
335
- type: 'message',
336
- role: 'user',
337
- content: [{ type: 'input_audio', audio: base64Audio }],
338
- },
339
- });
340
-
341
- this.sendEvent('response.create', {
342
- response: {
343
- modalities: ['text'],
344
- instructions: `ONLY repeat the input and DO NOT say anything else`,
345
- },
346
- });
347
- } else {
348
- this.emit('error', new Error('Unsupported audio data format'));
349
- }
350
- }
351
-
352
- waitForOpen() {
353
- return new Promise(resolve => {
354
- this.ws?.on('open', resolve);
355
- });
356
- }
357
-
358
- waitForSessionCreated() {
359
- return new Promise(resolve => {
360
- this.client.on('session.created', resolve);
361
- });
362
- }
363
-
364
- /**
365
- * Establishes a connection to the OpenAI realtime service.
366
- * Must be called before using speak, listen, or relay functions.
367
- *
368
- * @throws {Error} If connection fails or session creation times out
369
- *
370
- * @example
371
- * ```typescript
372
- * await voice.open();
373
- * // Now ready for voice interactions
374
- * ```
375
- */
376
- async connect({ runtimeContext }: { runtimeContext?: RuntimeContext } = {}) {
377
- const url = `${this.options.url || DEFAULT_URL}?model=${this.options.model || DEFAULT_MODEL}`;
378
- const apiKey = this.options.apiKey || process.env.OPENAI_API_KEY;
379
- this.runtimeContext = runtimeContext;
380
-
381
- this.ws = new WebSocket(url, undefined, {
382
- headers: {
383
- Authorization: 'Bearer ' + apiKey,
384
- 'OpenAI-Beta': 'realtime=v1',
385
- },
386
- });
387
-
388
- this.setupEventListeners();
389
- await Promise.all([this.waitForOpen(), this.waitForSessionCreated()]);
390
-
391
- const openaiTools = transformTools(this.tools);
392
- this.updateConfig({
393
- instructions: this.instructions,
394
- tools: openaiTools.map(t => t.openaiTool),
395
- input_audio_transcription: {
396
- model: this.transcriber,
397
- },
398
- voice: this.speaker,
399
- });
400
- this.state = 'open';
401
- }
402
-
403
- disconnect() {
404
- this.state = 'close';
405
- this.ws?.close();
406
- }
407
-
408
- /**
409
- * Streams audio data in real-time to the OpenAI service.
410
- * Useful for continuous audio streaming scenarios like live microphone input.
411
- * Must be in 'open' state before calling this method.
412
- *
413
- * @param audioData - Readable stream of audio data to relay
414
- * @throws {Error} If audio format is not supported
415
- *
416
- * @example
417
- * ```typescript
418
- * // First connect
419
- * await voice.open();
420
- *
421
- * // Then relay audio
422
- * const micStream = getMicrophoneStream();
423
- * await voice.relay(micStream);
424
- * ```
425
- */
426
- async send(audioData: NodeJS.ReadableStream | Int16Array, eventId?: string): Promise<void> {
427
- if (!this.state || this.state !== 'open') {
428
- console.warn('Cannot relay audio when not open. Call open() first.');
429
- return;
430
- }
431
-
432
- if (isReadableStream(audioData)) {
433
- const stream = audioData as NodeJS.ReadableStream;
434
- stream.on('data', chunk => {
435
- try {
436
- const buffer = Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk);
437
- this.sendEvent('input_audio_buffer.append', { audio: buffer.toString('base64'), event_id: eventId });
438
- } catch (err) {
439
- this.emit('error', err);
440
- }
441
- });
442
- } else if (audioData instanceof Int16Array) {
443
- try {
444
- const base64Audio = this.int16ArrayToBase64(audioData);
445
- this.sendEvent('input_audio_buffer.append', { audio: base64Audio, event_id: eventId });
446
- } catch (err) {
447
- this.emit('error', err);
448
- }
449
- } else {
450
- this.emit('error', new Error('Unsupported audio data format'));
451
- }
452
- }
453
-
454
- /**
455
- * Sends a response to the OpenAI Realtime API.
456
- *
457
- * Trigger a response to the real-time session.
458
- *
459
- * @param {Object} params - The parameters object
460
- * @param {Realtime.ResponseConfig} params.options - Configuration options for the response
461
- * @returns {Promise<void>} A promise that resolves when the response has been sent
462
- *
463
- * @example
464
- * // Send a simple text response
465
- * await realtimeVoice.answer({
466
- * options: {
467
- * content: "Hello, how can I help you today?",
468
- * voice: "alloy"
469
- * }
470
- * });
471
- */
472
- async answer({ options }: { options?: Realtime.ResponseConfig }) {
473
- this.sendEvent('response.create', { response: options ?? {} });
474
- }
475
-
476
- /**
477
- * Registers an event listener for voice events.
478
- * Available events: 'speaking', 'writing, 'error'
479
- * Can listen to OpenAI Realtime events by prefixing with 'openAIRealtime:'
480
- * Such as 'openAIRealtime:conversation.item.completed', 'openAIRealtime:conversation.updated', etc.
481
- *
482
- * @param event - Name of the event to listen for
483
- * @param callback - Function to call when the event occurs
484
- *
485
- * @example
486
- * ```typescript
487
- * // Listen for speech events
488
- * voice.on('speaking', (audioData: Int16Array) => {
489
- * // Handle audio data
490
- * });
491
- *
492
- * // Handle errors
493
- * voice.on('error', (error: Error) => {
494
- * console.error('Voice error:', error);
495
- * });
496
- * ```
497
- */
498
- on(event: string, callback: EventCallback): void {
499
- if (!this.events[event]) {
500
- this.events[event] = [];
501
- }
502
- this.events[event].push(callback);
503
- }
504
-
505
- /**
506
- * Removes a previously registered event listener.
507
- *
508
- * @param event - Name of the event to stop listening to
509
- * @param callback - The specific callback function to remove
510
- *
511
- * @example
512
- * ```typescript
513
- * // Create event handler
514
- * const handleSpeech = (audioData: Int16Array) => {
515
- * // Handle audio data
516
- * };
517
- *
518
- * // Add listener
519
- * voice.on('speaking', handleSpeech);
520
- *
521
- * // Later, remove the listener
522
- * voice.off('speaking', handleSpeech);
523
- * ```
524
- */
525
- off(event: string, callback: EventCallback): void {
526
- if (!this.events[event]) return;
527
-
528
- const index = this.events[event].indexOf(callback);
529
- if (index !== -1) {
530
- this.events[event].splice(index, 1);
531
- }
532
- }
533
-
534
- /**
535
- * Emit an event with arguments
536
- * @param event Event name
537
- * @param args Arguments to pass to the callbacks
538
- */
539
- private emit(event: string, ...args: any[]): void {
540
- if (!this.events[event]) return;
541
-
542
- for (const callback of this.events[event]) {
543
- callback(...args);
544
- }
545
- }
546
-
547
- private setupEventListeners(): void {
548
- const speakerStreams = new Map<string, StreamWithId>();
549
-
550
- if (!this.ws) {
551
- throw new Error('WebSocket not initialized');
552
- }
553
-
554
- this.ws.on('message', message => {
555
- const data = JSON.parse(message.toString());
556
- this.client.emit(data.type, data);
557
-
558
- if (this.debug) {
559
- const { delta, ...fields } = data;
560
- console.log(data.type, fields, delta?.length < 100 ? delta : '');
561
- }
562
- });
563
-
564
- this.client.on('session.created', ev => {
565
- this.emit('session.created', ev);
566
-
567
- const queue = this.queue.splice(0, this.queue.length);
568
- for (const ev of queue) {
569
- this.ws?.send(JSON.stringify(ev));
570
- }
571
- });
572
- this.client.on('session.updated', ev => {
573
- this.emit('session.updated', ev);
574
- });
575
- this.client.on('response.created', ev => {
576
- this.emit('response.created', ev);
577
-
578
- const speakerStream = new PassThrough() as StreamWithId;
579
-
580
- speakerStream.id = ev.response.id;
581
-
582
- speakerStreams.set(ev.response.id, speakerStream);
583
- this.emit('speaker', speakerStream);
584
- });
585
- this.client.on('conversation.item.input_audio_transcription.delta', ev => {
586
- this.emit('writing', { text: ev.delta, response_id: ev.response_id, role: 'user' });
587
- });
588
- this.client.on('conversation.item.input_audio_transcription.done', ev => {
589
- this.emit('writing', { text: '\n', response_id: ev.response_id, role: 'user' });
590
- });
591
- this.client.on('response.audio.delta', ev => {
592
- const audio = Buffer.from(ev.delta, 'base64');
593
- this.emit('speaking', { audio, response_id: ev.response_id });
594
-
595
- const stream = speakerStreams.get(ev.response_id);
596
- stream?.write(audio);
597
- });
598
- this.client.on('response.audio.done', ev => {
599
- this.emit('speaking.done', { response_id: ev.response_id });
600
-
601
- const stream = speakerStreams.get(ev.response_id);
602
- stream?.end();
603
- });
604
- this.client.on('response.audio_transcript.delta', ev => {
605
- this.emit('writing', { text: ev.delta, response_id: ev.response_id, role: 'assistant' });
606
- });
607
- this.client.on('response.audio_transcript.done', ev => {
608
- this.emit('writing', { text: '\n', response_id: ev.response_id, role: 'assistant' });
609
- });
610
- this.client.on('response.text.delta', ev => {
611
- this.emit('writing', { text: ev.delta, response_id: ev.response_id, role: 'assistant' });
612
- });
613
- this.client.on('response.text.done', ev => {
614
- this.emit('writing', { text: '\n', response_id: ev.response_id, role: 'assistant' });
615
- });
616
- this.client.on('response.done', async ev => {
617
- await this.handleFunctionCalls(ev);
618
- this.emit('response.done', ev);
619
- speakerStreams.delete(ev.response.id);
620
- });
621
- this.client.on('error', async ev => {
622
- this.emit('error', ev);
623
- });
624
- }
625
-
626
- private async handleFunctionCalls(ev: any) {
627
- for (const output of ev.response?.output ?? []) {
628
- if (output.type === 'function_call') {
629
- await this.handleFunctionCall(output);
630
- }
631
- }
632
- }
633
-
634
- private async handleFunctionCall(output: any) {
635
- try {
636
- const context = JSON.parse(output.arguments);
637
- const tool = this.tools?.[output.name];
638
- if (!tool) {
639
- console.warn(`Tool "${output.name}" not found`);
640
- return;
641
- }
642
-
643
- if (tool?.execute) {
644
- this.emit('tool-call-start', {
645
- toolCallId: output.call_id,
646
- toolName: output.name,
647
- toolDescription: tool.description,
648
- args: context,
649
- });
650
- }
651
-
652
- const result = await tool?.execute?.(
653
- { context, runtimeContext: this.runtimeContext },
654
- {
655
- toolCallId: output.call_id,
656
- messages: [],
657
- },
658
- );
659
-
660
- this.emit('tool-call-result', {
661
- toolCallId: output.call_id,
662
- toolName: output.name,
663
- toolDescription: tool.description,
664
- args: context,
665
- result,
666
- });
667
-
668
- this.sendEvent('conversation.item.create', {
669
- item: {
670
- type: 'function_call_output',
671
- call_id: output.call_id,
672
- output: JSON.stringify(result),
673
- },
674
- });
675
- } catch (e) {
676
- const err = e as Error;
677
- console.warn(`Error calling tool "${output.name}":`, err.message);
678
- this.sendEvent('conversation.item.create', {
679
- item: {
680
- type: 'function_call_output',
681
- call_id: output.call_id,
682
- output: JSON.stringify({ error: err.message }),
683
- },
684
- });
685
- } finally {
686
- this.sendEvent('response.create', {});
687
- }
688
- }
689
-
690
- private int16ArrayToBase64(int16Array: Int16Array): string {
691
- const buffer = new ArrayBuffer(int16Array.length * 2);
692
- const view = new DataView(buffer);
693
- for (let i = 0; i < int16Array.length; i++) {
694
- view.setInt16(i * 2, int16Array[i]!, true);
695
- }
696
- const uint8Array = new Uint8Array(buffer);
697
- let binary = '';
698
- for (let i = 0; i < uint8Array.length; i++) {
699
- binary += String.fromCharCode(uint8Array[i]!);
700
- }
701
- return btoa(binary);
702
- }
703
-
704
- private sendEvent(type: string, data: any) {
705
- if (!this.ws || this.ws.readyState !== this.ws.OPEN) {
706
- this.queue.push({ type: type, ...data });
707
- } else {
708
- this.ws?.send(
709
- JSON.stringify({
710
- type: type,
711
- ...data,
712
- }),
713
- );
714
- }
715
- }
716
- }
package/src/utils.test.ts DELETED
@@ -1,119 +0,0 @@
1
- import { createTool } from '@mastra/core/tools';
2
- import { describe, it, expect } from 'vitest';
3
- import { z } from 'zod';
4
- import { transformTools } from './utils';
5
-
6
- // Vitest provides these globals automatically, but we can import them explicitly for clarity
7
-
8
- describe('transformTools', () => {
9
- describe('Basic Tool Transformation', () => {
10
- it('should transform a tool with Zod inputSchema to OpenAI format', () => {
11
- // Create a test tool with Zod schema
12
- const tool = createTool({
13
- id: 'zodTool',
14
- description: 'A tool with Zod schema',
15
- inputSchema: z.object({
16
- name: z.string(),
17
- age: z.number().optional(),
18
- }),
19
- outputSchema: z.string(),
20
- execute: async ({ context }) => {
21
- return `Hello, ${context.name}`;
22
- },
23
- });
24
-
25
- // Transform the tool
26
- const transformedTools = transformTools({
27
- zodTool: tool,
28
- });
29
-
30
- // Assert the transformation results
31
- expect(transformedTools).toHaveLength(1);
32
- const { openaiTool } = transformedTools[0];
33
-
34
- expect(openaiTool).toMatchObject({
35
- type: 'function',
36
- name: 'zodTool',
37
- description: 'A tool with Zod schema',
38
- parameters: expect.objectContaining({
39
- type: 'object',
40
- properties: expect.objectContaining({
41
- name: expect.objectContaining({ type: 'string' }),
42
- age: expect.objectContaining({ type: 'number' }),
43
- }),
44
- required: ['name'],
45
- }),
46
- });
47
- });
48
-
49
- it('should transform a tool with JSON schema parameters to OpenAI format', () => {
50
- // Create a test tool with direct JSON schema
51
- const tool = {
52
- id: 'jsonTool',
53
- description: 'A tool with JSON schema',
54
- parameters: {
55
- type: 'object',
56
- properties: {
57
- query: { type: 'string' },
58
- limit: { type: 'integer' },
59
- },
60
- required: ['query'],
61
- },
62
- execute: async (args: { query: string; limit?: number }) => {
63
- return `Searched for: ${args.query}`;
64
- },
65
- };
66
-
67
- // Transform the tool
68
- const transformedTools = transformTools({
69
- jsonTool: tool,
70
- });
71
-
72
- // Assert the transformation results
73
- expect(transformedTools).toHaveLength(1);
74
- const { openaiTool } = transformedTools[0];
75
-
76
- expect(openaiTool).toMatchObject({
77
- type: 'function',
78
- name: 'jsonTool',
79
- description: 'A tool with JSON schema',
80
- parameters: expect.objectContaining({
81
- type: 'object',
82
- properties: expect.objectContaining({
83
- query: expect.objectContaining({ type: 'string' }),
84
- limit: expect.objectContaining({ type: 'integer' }),
85
- }),
86
- required: ['query'],
87
- }),
88
- });
89
- });
90
- });
91
-
92
- describe('Tool Execution Tests', () => {
93
- it('should create an adapter function for tool execution', async () => {
94
- // Create a tool that expects context
95
- const tool = createTool({
96
- id: 'messageTool',
97
- description: 'A tool that processes a message',
98
- inputSchema: z.object({
99
- message: z.string(),
100
- }),
101
- outputSchema: z.string(),
102
- execute: async ({ context }) => {
103
- return `Processed: ${context.message}`;
104
- },
105
- });
106
-
107
- // Transform the tool
108
- const transformedTools = transformTools({
109
- messageTool: tool,
110
- });
111
-
112
- // Execute the transformed tool
113
- const result = await transformedTools[0].execute({ message: 'Hello' });
114
-
115
- // Verify the adapter correctly passes the context
116
- expect(result).toBe('Processed: Hello');
117
- });
118
- });
119
- });
package/src/utils.ts DELETED
@@ -1,106 +0,0 @@
1
- import { Readable } from 'stream';
2
- import type { ToolsInput } from '@mastra/core/agent';
3
- import { zodToJsonSchema } from 'zod-to-json-schema';
4
-
5
- export type OpenAIExecuteFunction = (args: any) => Promise<any>;
6
- type ToolDefinition = {
7
- type: 'function';
8
- name: string;
9
- description: string;
10
- parameters: {
11
- [key: string]: any;
12
- };
13
- };
14
-
15
- type TTools = ToolsInput;
16
- export const transformTools = (tools?: TTools) => {
17
- const openaiTools: { openaiTool: ToolDefinition; execute: OpenAIExecuteFunction }[] = [];
18
- for (const [name, tool] of Object.entries(tools || {})) {
19
- let parameters: { [key: string]: any };
20
-
21
- if ('inputSchema' in tool && tool.inputSchema) {
22
- if (isZodObject(tool.inputSchema)) {
23
- parameters = zodToJsonSchema(tool.inputSchema);
24
- delete parameters.$schema;
25
- } else {
26
- parameters = tool.inputSchema;
27
- }
28
- } else if ('parameters' in tool) {
29
- if (isZodObject(tool.parameters)) {
30
- parameters = zodToJsonSchema(tool.parameters);
31
- delete parameters.$schema;
32
- } else {
33
- parameters = tool.parameters;
34
- }
35
- } else {
36
- console.warn(`Tool ${name} has neither inputSchema nor parameters, skipping`);
37
- continue;
38
- }
39
- const openaiTool: ToolDefinition = {
40
- type: 'function',
41
- name,
42
- description: tool.description || `Tool: ${name}`,
43
- parameters,
44
- };
45
-
46
- if (tool.execute) {
47
- // Create an adapter function that works with both ToolAction and VercelTool execute functions
48
- const executeAdapter = async (args: any) => {
49
- try {
50
- if (!tool.execute) {
51
- throw new Error(`Tool ${name} has no execute function`);
52
- }
53
-
54
- // For ToolAction, the first argument is a context object with the args in a 'context' property
55
- if ('inputSchema' in tool) {
56
- return await tool.execute(
57
- { context: args },
58
- {
59
- toolCallId: 'unknown',
60
- messages: [],
61
- },
62
- );
63
- }
64
- // For VercelTool, pass args directly
65
- else {
66
- // Create a minimal ToolExecutionOptions object with required properties
67
- const options = {
68
- toolCallId: 'unknown',
69
- messages: [],
70
- };
71
- return await tool.execute(args, options);
72
- }
73
- } catch (error) {
74
- console.error(`Error executing tool ${name}:`, error);
75
- throw error;
76
- }
77
- };
78
- openaiTools.push({ openaiTool, execute: executeAdapter });
79
- } else {
80
- console.warn(`Tool ${name} has no execute function, skipping`);
81
- }
82
- }
83
- return openaiTools;
84
- };
85
-
86
- export const isReadableStream = (obj: unknown) => {
87
- return (
88
- obj &&
89
- obj instanceof Readable &&
90
- typeof obj.read === 'function' &&
91
- typeof obj.pipe === 'function' &&
92
- obj.readable === true
93
- );
94
- };
95
-
96
- function isZodObject(schema: unknown) {
97
- return (
98
- !!schema &&
99
- typeof schema === 'object' &&
100
- '_def' in schema &&
101
- schema._def &&
102
- typeof schema._def === 'object' &&
103
- 'typeName' in schema._def &&
104
- schema._def.typeName === 'ZodObject'
105
- );
106
- }
@@ -1,9 +0,0 @@
1
- {
2
- "extends": ["./tsconfig.json", "../../tsconfig.build.json"],
3
- "compilerOptions": {
4
- "outDir": "./dist",
5
- "rootDir": "./src"
6
- },
7
- "include": ["src/**/*"],
8
- "exclude": ["node_modules", "**/*.test.ts", "src/**/*.mock.ts"]
9
- }
package/tsconfig.json DELETED
@@ -1,5 +0,0 @@
1
- {
2
- "extends": "../../tsconfig.node.json",
3
- "include": ["src/**/*", "tsup.config.ts"],
4
- "exclude": ["node_modules", "**/*.test.ts"]
5
- }
package/tsup.config.ts DELETED
@@ -1,17 +0,0 @@
1
- import { generateTypes } from '@internal/types-builder';
2
- import { defineConfig } from 'tsup';
3
-
4
- export default defineConfig({
5
- entry: ['src/index.ts'],
6
- format: ['esm', 'cjs'],
7
- clean: true,
8
- dts: false,
9
- splitting: true,
10
- treeshake: {
11
- preset: 'smallest',
12
- },
13
- sourcemap: true,
14
- onSuccess: async () => {
15
- await generateTypes(process.cwd());
16
- },
17
- });
package/vitest.config.ts DELETED
@@ -1,8 +0,0 @@
1
- import { defineConfig } from 'vitest/config';
2
-
3
- export default defineConfig({
4
- test: {
5
- globals: true,
6
- include: ['src/**/*.test.ts'],
7
- },
8
- });