@mastra/voice-openai-realtime 0.0.5-alpha.0 → 0.1.0-alpha.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,23 +1,23 @@
1
1
 
2
- > @mastra/voice-openai-realtime@0.0.5-alpha.0 build /home/runner/work/mastra/mastra/voice/openai-realtime-api
2
+ > @mastra/voice-openai-realtime@0.1.0-alpha.2 build /home/runner/work/mastra/mastra/voice/openai-realtime-api
3
3
  > tsup src/index.ts --format esm,cjs --experimental-dts --clean --treeshake
4
4
 
5
5
  CLI Building entry: src/index.ts
6
6
  CLI Using tsconfig: tsconfig.json
7
7
  CLI tsup v8.4.0
8
8
  TSC Build start
9
- TSC ⚡️ Build success in 9719ms
9
+ TSC ⚡️ Build success in 9123ms
10
10
  DTS Build start
11
11
  CLI Target: es2022
12
12
  Analysis will use the bundled TypeScript version 5.8.2
13
13
  Writing package typings: /home/runner/work/mastra/mastra/voice/openai-realtime-api/dist/_tsup-dts-rollup.d.ts
14
14
  Analysis will use the bundled TypeScript version 5.8.2
15
15
  Writing package typings: /home/runner/work/mastra/mastra/voice/openai-realtime-api/dist/_tsup-dts-rollup.d.cts
16
- DTS ⚡️ Build success in 10943ms
16
+ DTS ⚡️ Build success in 10151ms
17
17
  CLI Cleaning output folder
18
18
  ESM Build start
19
19
  CJS Build start
20
- CJS dist/index.cjs 14.99 KB
21
- CJS ⚡️ Build success in 515ms
22
- ESM dist/index.js 14.90 KB
23
- ESM ⚡️ Build success in 515ms
20
+ CJS dist/index.cjs 17.80 KB
21
+ CJS ⚡️ Build success in 842ms
22
+ ESM dist/index.js 17.75 KB
23
+ ESM ⚡️ Build success in 843ms
package/CHANGELOG.md CHANGED
@@ -1,5 +1,28 @@
1
1
  # @mastra/voice-openai-realtime
2
2
 
3
+ ## 0.1.0-alpha.2
4
+
5
+ ### Patch Changes
6
+
7
+ - Updated dependencies [a838fde]
8
+ - Updated dependencies [a8bd4cf]
9
+ - Updated dependencies [7a3eeb0]
10
+ - Updated dependencies [6530ad1]
11
+ - @mastra/core@0.7.0-alpha.2
12
+
13
+ ## 0.1.0-alpha.1
14
+
15
+ ### Minor Changes
16
+
17
+ - 443b118: This update removed an external dependency on an unmaintained package and implemented a native websocket connection.
18
+
19
+ ### Patch Changes
20
+
21
+ - Updated dependencies [0b54522]
22
+ - Updated dependencies [1af25d5]
23
+ - Updated dependencies [27439ad]
24
+ - @mastra/core@0.7.0-alpha.1
25
+
3
26
  ## 0.0.5-alpha.0
4
27
 
5
28
  ### Patch Changes
@@ -43,10 +43,13 @@ export declare type OpenAIExecuteFunction = (args: any) => Promise<any>;
43
43
  * ```
44
44
  */
45
45
  export declare class OpenAIRealtimeVoice extends MastraVoice {
46
- private client;
46
+ private ws;
47
47
  private state;
48
+ private client;
48
49
  private events;
49
- tools?: TTools;
50
+ private instructions?;
51
+ private tools?;
52
+ private debug;
50
53
  /**
51
54
  * Creates a new instance of OpenAIRealtimeVoice.
52
55
  *
@@ -55,13 +58,8 @@ export declare class OpenAIRealtimeVoice extends MastraVoice {
55
58
  * @param options.chatModel.model - The model ID to use (defaults to GPT-4 Mini Realtime)
56
59
  * @param options.chatModel.apiKey - OpenAI API key. Falls back to process.env.OPENAI_API_KEY
57
60
  * @param options.chatModel.tools - Tools configuration for the model
58
- * @param options.chatModel.options - Additional options for the realtime client
59
- * @param options.chatModel.options.sessionConfig - Session configuration overrides
60
- * @param options.chatModel.options.url - Custom WebSocket URL
61
- * @param options.chatModel.options.dangerouslyAllowAPIKeyInBrowser - Whether to allow API key in browser
62
- * @param options.chatModel.options.debug - Enable debug logging
63
- * @param options.chatModel.options.tools - Additional tools configuration
64
61
  * @param options.speaker - Voice ID to use (defaults to 'alloy')
62
+ * @param options.debug - Enable debug mode
65
63
  *
66
64
  * @example
67
65
  * ```typescript
@@ -74,20 +72,16 @@ export declare class OpenAIRealtimeVoice extends MastraVoice {
74
72
  * });
75
73
  * ```
76
74
  */
77
- constructor({ chatModel, speaker, }?: {
75
+ constructor({ chatModel, speaker, debug, }?: {
78
76
  chatModel?: {
79
77
  model?: string;
80
78
  apiKey?: string;
81
79
  tools?: TTools;
82
- options?: {
83
- sessionConfig?: Realtime.SessionConfig;
84
- url?: string;
85
- dangerouslyAllowAPIKeyInBrowser?: boolean;
86
- debug?: boolean;
87
- tools?: TTools;
88
- };
80
+ instructions?: string;
81
+ url?: string;
89
82
  };
90
83
  speaker?: Realtime.Voice;
84
+ debug?: boolean;
91
85
  });
92
86
  /**
93
87
  * Returns a list of available voice speakers.
@@ -175,7 +169,7 @@ export declare class OpenAIRealtimeVoice extends MastraVoice {
175
169
  * });
176
170
  * ```
177
171
  */
178
- updateConfig(sessionConfig: Realtime.SessionConfig): void;
172
+ updateConfig(sessionConfig: unknown): void;
179
173
  /**
180
174
  * Processes audio input for speech recognition.
181
175
  * Takes a readable stream of audio data and emits a writing event.
@@ -200,6 +194,8 @@ export declare class OpenAIRealtimeVoice extends MastraVoice {
200
194
  * ```
201
195
  */
202
196
  listen(audioData: NodeJS.ReadableStream): Promise<void>;
197
+ waitForOpen(): Promise<unknown>;
198
+ waitForSessionCreated(): Promise<unknown>;
203
199
  /**
204
200
  * Establishes a connection to the OpenAI realtime service.
205
201
  * Must be called before using speak, listen, or relay functions.
@@ -231,7 +227,7 @@ export declare class OpenAIRealtimeVoice extends MastraVoice {
231
227
  * await voice.relay(micStream);
232
228
  * ```
233
229
  */
234
- send(audioData: NodeJS.ReadableStream | Int16Array): Promise<void>;
230
+ send(audioData: NodeJS.ReadableStream | Int16Array, eventId?: string): Promise<void>;
235
231
  /**
236
232
  * Sends a response to the OpenAI Realtime API.
237
233
  *
@@ -304,11 +300,15 @@ export declare class OpenAIRealtimeVoice extends MastraVoice {
304
300
  */
305
301
  private emit;
306
302
  private setupEventListeners;
303
+ private handleFunctionCalls;
304
+ private handleFunctionCall;
307
305
  private int16ArrayToBase64;
306
+ private sendEvent;
308
307
  }
309
308
 
310
309
  export declare const transformTools: (tools?: TTools_2) => {
311
310
  openaiTool: {
311
+ type: string;
312
312
  name: string;
313
313
  description: string;
314
314
  parameters: {
@@ -43,10 +43,13 @@ export declare type OpenAIExecuteFunction = (args: any) => Promise<any>;
43
43
  * ```
44
44
  */
45
45
  export declare class OpenAIRealtimeVoice extends MastraVoice {
46
- private client;
46
+ private ws;
47
47
  private state;
48
+ private client;
48
49
  private events;
49
- tools?: TTools;
50
+ private instructions?;
51
+ private tools?;
52
+ private debug;
50
53
  /**
51
54
  * Creates a new instance of OpenAIRealtimeVoice.
52
55
  *
@@ -55,13 +58,8 @@ export declare class OpenAIRealtimeVoice extends MastraVoice {
55
58
  * @param options.chatModel.model - The model ID to use (defaults to GPT-4 Mini Realtime)
56
59
  * @param options.chatModel.apiKey - OpenAI API key. Falls back to process.env.OPENAI_API_KEY
57
60
  * @param options.chatModel.tools - Tools configuration for the model
58
- * @param options.chatModel.options - Additional options for the realtime client
59
- * @param options.chatModel.options.sessionConfig - Session configuration overrides
60
- * @param options.chatModel.options.url - Custom WebSocket URL
61
- * @param options.chatModel.options.dangerouslyAllowAPIKeyInBrowser - Whether to allow API key in browser
62
- * @param options.chatModel.options.debug - Enable debug logging
63
- * @param options.chatModel.options.tools - Additional tools configuration
64
61
  * @param options.speaker - Voice ID to use (defaults to 'alloy')
62
+ * @param options.debug - Enable debug mode
65
63
  *
66
64
  * @example
67
65
  * ```typescript
@@ -74,20 +72,16 @@ export declare class OpenAIRealtimeVoice extends MastraVoice {
74
72
  * });
75
73
  * ```
76
74
  */
77
- constructor({ chatModel, speaker, }?: {
75
+ constructor({ chatModel, speaker, debug, }?: {
78
76
  chatModel?: {
79
77
  model?: string;
80
78
  apiKey?: string;
81
79
  tools?: TTools;
82
- options?: {
83
- sessionConfig?: Realtime.SessionConfig;
84
- url?: string;
85
- dangerouslyAllowAPIKeyInBrowser?: boolean;
86
- debug?: boolean;
87
- tools?: TTools;
88
- };
80
+ instructions?: string;
81
+ url?: string;
89
82
  };
90
83
  speaker?: Realtime.Voice;
84
+ debug?: boolean;
91
85
  });
92
86
  /**
93
87
  * Returns a list of available voice speakers.
@@ -175,7 +169,7 @@ export declare class OpenAIRealtimeVoice extends MastraVoice {
175
169
  * });
176
170
  * ```
177
171
  */
178
- updateConfig(sessionConfig: Realtime.SessionConfig): void;
172
+ updateConfig(sessionConfig: unknown): void;
179
173
  /**
180
174
  * Processes audio input for speech recognition.
181
175
  * Takes a readable stream of audio data and emits a writing event.
@@ -200,6 +194,8 @@ export declare class OpenAIRealtimeVoice extends MastraVoice {
200
194
  * ```
201
195
  */
202
196
  listen(audioData: NodeJS.ReadableStream): Promise<void>;
197
+ waitForOpen(): Promise<unknown>;
198
+ waitForSessionCreated(): Promise<unknown>;
203
199
  /**
204
200
  * Establishes a connection to the OpenAI realtime service.
205
201
  * Must be called before using speak, listen, or relay functions.
@@ -231,7 +227,7 @@ export declare class OpenAIRealtimeVoice extends MastraVoice {
231
227
  * await voice.relay(micStream);
232
228
  * ```
233
229
  */
234
- send(audioData: NodeJS.ReadableStream | Int16Array): Promise<void>;
230
+ send(audioData: NodeJS.ReadableStream | Int16Array, eventId?: string): Promise<void>;
235
231
  /**
236
232
  * Sends a response to the OpenAI Realtime API.
237
233
  *
@@ -304,11 +300,15 @@ export declare class OpenAIRealtimeVoice extends MastraVoice {
304
300
  */
305
301
  private emit;
306
302
  private setupEventListeners;
303
+ private handleFunctionCalls;
304
+ private handleFunctionCall;
307
305
  private int16ArrayToBase64;
306
+ private sendEvent;
308
307
  }
309
308
 
310
309
  export declare const transformTools: (tools?: TTools_2) => {
311
310
  openaiTool: {
311
+ type: string;
312
312
  name: string;
313
313
  description: string;
314
314
  parameters: {
package/dist/index.cjs CHANGED
@@ -1,8 +1,9 @@
1
1
  'use strict';
2
2
 
3
- var voice = require('@mastra/core/voice');
4
- var openaiRealtimeApi = require('openai-realtime-api');
3
+ var events = require('events');
5
4
  var stream = require('stream');
5
+ var voice = require('@mastra/core/voice');
6
+ var ws = require('ws');
6
7
  var zodToJsonSchema = require('zod-to-json-schema');
7
8
 
8
9
  // src/index.ts
@@ -29,6 +30,7 @@ var transformTools = (tools) => {
29
30
  continue;
30
31
  }
31
32
  const openaiTool = {
33
+ type: "function",
32
34
  name,
33
35
  description: tool.description || `Tool: ${name}`,
34
36
  parameters
@@ -66,19 +68,17 @@ var isReadableStream = (obj) => {
66
68
 
67
69
  // src/index.ts
68
70
  var DEFAULT_VOICE = "alloy";
71
+ var DEFAULT_URL = "wss://api.openai.com/v1/realtime";
69
72
  var DEFAULT_MODEL = "gpt-4o-mini-realtime-preview-2024-12-17";
70
- var DEFAULT_VAD_CONFIG = {
71
- type: "server_vad",
72
- threshold: 0.5,
73
- prefix_padding_ms: 1e3,
74
- silence_duration_ms: 1e3
75
- };
76
73
  var VOICES = ["alloy", "ash", "ballad", "coral", "echo", "sage", "shimmer", "verse"];
77
74
  var OpenAIRealtimeVoice = class extends voice.MastraVoice {
78
- client;
75
+ ws;
79
76
  state;
77
+ client;
80
78
  events;
79
+ instructions;
81
80
  tools;
81
+ debug;
82
82
  /**
83
83
  * Creates a new instance of OpenAIRealtimeVoice.
84
84
  *
@@ -87,13 +87,8 @@ var OpenAIRealtimeVoice = class extends voice.MastraVoice {
87
87
  * @param options.chatModel.model - The model ID to use (defaults to GPT-4 Mini Realtime)
88
88
  * @param options.chatModel.apiKey - OpenAI API key. Falls back to process.env.OPENAI_API_KEY
89
89
  * @param options.chatModel.tools - Tools configuration for the model
90
- * @param options.chatModel.options - Additional options for the realtime client
91
- * @param options.chatModel.options.sessionConfig - Session configuration overrides
92
- * @param options.chatModel.options.url - Custom WebSocket URL
93
- * @param options.chatModel.options.dangerouslyAllowAPIKeyInBrowser - Whether to allow API key in browser
94
- * @param options.chatModel.options.debug - Enable debug logging
95
- * @param options.chatModel.options.tools - Additional tools configuration
96
90
  * @param options.speaker - Voice ID to use (defaults to 'alloy')
91
+ * @param options.debug - Enable debug mode
97
92
  *
98
93
  * @example
99
94
  * ```typescript
@@ -108,25 +103,26 @@ var OpenAIRealtimeVoice = class extends voice.MastraVoice {
108
103
  */
109
104
  constructor({
110
105
  chatModel,
111
- speaker
106
+ speaker,
107
+ debug = false
112
108
  } = {}) {
113
109
  super();
114
- this.client = new openaiRealtimeApi.RealtimeClient({
115
- apiKey: chatModel?.apiKey || process.env.OPENAI_API_KEY,
116
- model: chatModel?.model || DEFAULT_MODEL,
117
- ...chatModel?.options,
118
- sessionConfig: {
119
- voice: speaker || DEFAULT_VOICE,
120
- turn_detection: DEFAULT_VAD_CONFIG,
121
- ...chatModel?.options?.sessionConfig
110
+ const url = `${chatModel?.url || DEFAULT_URL}?model=${chatModel?.model || DEFAULT_MODEL}`;
111
+ const apiKey = chatModel?.apiKey || process.env.OPENAI_API_KEY;
112
+ this.ws = new ws.WebSocket(url, void 0, {
113
+ headers: {
114
+ Authorization: "Bearer " + apiKey,
115
+ "OpenAI-Beta": "realtime=v1"
122
116
  }
123
117
  });
118
+ this.client = new events.EventEmitter();
124
119
  this.state = "close";
125
120
  this.events = {};
121
+ this.tools = chatModel?.tools;
122
+ this.instructions = chatModel?.instructions;
123
+ this.speaker = speaker || DEFAULT_VOICE;
124
+ this.debug = debug;
126
125
  this.setupEventListeners();
127
- if (chatModel?.tools) {
128
- this.addTools(chatModel.tools);
129
- }
130
126
  }
131
127
  /**
132
128
  * Returns a list of available voice speakers.
@@ -152,8 +148,8 @@ var OpenAIRealtimeVoice = class extends voice.MastraVoice {
152
148
  * ```
153
149
  */
154
150
  close() {
155
- if (!this.client) return;
156
- this.client.disconnect();
151
+ if (!this.ws) return;
152
+ this.ws.close();
157
153
  this.state = "close";
158
154
  }
159
155
  /**
@@ -173,10 +169,10 @@ var OpenAIRealtimeVoice = class extends voice.MastraVoice {
173
169
  * ```
174
170
  */
175
171
  addTools(tools) {
176
- const transformedTools = transformTools(tools);
177
- for (const tool of transformedTools) {
178
- this.client.addTool(tool.openaiTool, tool.execute);
179
- }
172
+ const openaiTools = transformTools(tools);
173
+ this.updateConfig({
174
+ tools: openaiTools.map((t) => t.openaiTool)
175
+ });
180
176
  }
181
177
  /**
182
178
  * Emits a speaking event using the configured voice model.
@@ -212,7 +208,7 @@ var OpenAIRealtimeVoice = class extends voice.MastraVoice {
212
208
  if (input.trim().length === 0) {
213
209
  throw new Error("Input text is empty");
214
210
  }
215
- this.client.realtime.send("response.create", {
211
+ this.sendEvent("response.create", {
216
212
  response: {
217
213
  instructions: `Repeat the following text: ${input}`,
218
214
  voice: options?.speaker ? options.speaker : void 0
@@ -238,7 +234,7 @@ var OpenAIRealtimeVoice = class extends voice.MastraVoice {
238
234
  * ```
239
235
  */
240
236
  updateConfig(sessionConfig) {
241
- this.client.updateSession(sessionConfig);
237
+ this.sendEvent("session.update", { session: sessionConfig });
242
238
  }
243
239
  /**
244
240
  * Processes audio input for speech recognition.
@@ -273,14 +269,14 @@ var OpenAIRealtimeVoice = class extends voice.MastraVoice {
273
269
  const buffer = Buffer.concat(chunks);
274
270
  const int16Array = new Int16Array(buffer.buffer, buffer.byteOffset ?? 0, (buffer.byteLength ?? 0) / 2);
275
271
  const base64Audio = this.int16ArrayToBase64(int16Array);
276
- this.client.realtime.send("conversation.item.create", {
272
+ this.sendEvent("conversation.item.create", {
277
273
  item: {
278
274
  type: "message",
279
275
  role: "user",
280
276
  content: [{ type: "input_audio", audio: base64Audio }]
281
277
  }
282
278
  });
283
- this.client.realtime.send("response.create", {
279
+ this.sendEvent("response.create", {
284
280
  response: {
285
281
  modalities: ["text"],
286
282
  instructions: `ONLY repeat the input and DO NOT say anything else`
@@ -290,6 +286,16 @@ var OpenAIRealtimeVoice = class extends voice.MastraVoice {
290
286
  this.emit("error", new Error("Unsupported audio data format"));
291
287
  }
292
288
  }
289
+ waitForOpen() {
290
+ return new Promise((resolve) => {
291
+ this.ws.on("open", resolve);
292
+ });
293
+ }
294
+ waitForSessionCreated() {
295
+ return new Promise((resolve) => {
296
+ this.client.on("session.created", resolve);
297
+ });
298
+ }
293
299
  /**
294
300
  * Establishes a connection to the OpenAI realtime service.
295
301
  * Must be called before using speak, listen, or relay functions.
@@ -303,8 +309,17 @@ var OpenAIRealtimeVoice = class extends voice.MastraVoice {
303
309
  * ```
304
310
  */
305
311
  async connect() {
306
- await this.client.connect();
307
- await this.client.waitForSessionCreated();
312
+ await this.waitForOpen();
313
+ await this.waitForSessionCreated();
314
+ const openaiTools = transformTools(this.tools);
315
+ this.updateConfig({
316
+ instructions: this.instructions,
317
+ tools: openaiTools.map((t) => t.openaiTool),
318
+ input_audio_transcription: {
319
+ model: "whisper-1"
320
+ },
321
+ voice: this.speaker
322
+ });
308
323
  this.state = "open";
309
324
  }
310
325
  /**
@@ -325,7 +340,7 @@ var OpenAIRealtimeVoice = class extends voice.MastraVoice {
325
340
  * await voice.relay(micStream);
326
341
  * ```
327
342
  */
328
- async send(audioData) {
343
+ async send(audioData, eventId) {
329
344
  if (!this.state || this.state !== "open") {
330
345
  console.warn("Cannot relay audio when not open. Call open() first.");
331
346
  return;
@@ -335,15 +350,14 @@ var OpenAIRealtimeVoice = class extends voice.MastraVoice {
335
350
  stream.on("data", (chunk) => {
336
351
  try {
337
352
  const buffer = Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk);
338
- const int16Array = new Int16Array(buffer.buffer, buffer.byteOffset, buffer.byteLength / 2);
339
- this.client.appendInputAudio(int16Array);
353
+ this.sendEvent("input_audio_buffer.append", { audio: buffer.toString("base64"), event_id: eventId });
340
354
  } catch (err) {
341
355
  this.emit("error", err);
342
356
  }
343
357
  });
344
358
  } else if (audioData instanceof Int16Array) {
345
359
  try {
346
- this.client.appendInputAudio(audioData);
360
+ this.sendEvent("input_audio_buffer.append", { audio: audioData, event_id: eventId });
347
361
  } catch (err) {
348
362
  this.emit("error", err);
349
363
  }
@@ -370,7 +384,7 @@ var OpenAIRealtimeVoice = class extends voice.MastraVoice {
370
384
  * });
371
385
  */
372
386
  async answer({ options }) {
373
- this.client.realtime.send("response.create", { response: options ?? {} });
387
+ this.sendEvent("response.create", { response: options ?? {} });
374
388
  }
375
389
  /**
376
390
  * Registers an event listener for voice events.
@@ -439,29 +453,105 @@ var OpenAIRealtimeVoice = class extends voice.MastraVoice {
439
453
  }
440
454
  }
441
455
  setupEventListeners() {
442
- this.client.on("error", (error) => {
443
- this.emit("error", error);
456
+ const speakerStreams = /* @__PURE__ */ new Map();
457
+ this.ws.on("message", (message) => {
458
+ const data = JSON.parse(message.toString());
459
+ this.client.emit(data.type, data);
460
+ if (this.debug) {
461
+ const { delta, ...fields } = data;
462
+ console.log(data.type, fields, delta?.length < 100 ? delta : "");
463
+ }
444
464
  });
445
- this.client.on("conversation.created", (conversation) => {
446
- this.emit("openAIRealtime:conversation.created", conversation);
465
+ this.client.on("session.created", (ev) => {
466
+ this.emit("session.created", ev);
447
467
  });
448
- this.client.on("conversation.interrupted", () => {
449
- this.emit("openAIRealtime:conversation.interrupted");
468
+ this.client.on("session.updated", (ev) => {
469
+ this.emit("session.updated", ev);
450
470
  });
451
- this.client.on("conversation.updated", ({ delta }) => {
452
- if (delta?.audio) {
453
- this.emit("speaking", { audio: delta.audio });
454
- }
471
+ this.client.on("response.created", (ev) => {
472
+ this.emit("response.created", ev);
473
+ const speakerStream = new stream.PassThrough();
474
+ speakerStream.id = ev.response.id;
475
+ speakerStreams.set(ev.response.id, speakerStream);
476
+ this.emit("speaker", speakerStream);
455
477
  });
456
- this.client.on("conversation.item.appended", (item) => {
457
- this.emit("openAIRealtime:conversation.item.appended", item);
478
+ this.client.on("conversation.item.input_audio_transcription.delta", (ev) => {
479
+ this.emit("transcribing", { text: ev.delta, response_id: ev.response_id, role: "user" });
458
480
  });
459
- this.client.on("conversation.item.completed", ({ item, delta }) => {
460
- if (item.formatted.transcript) {
461
- this.emit("writing", { text: item.formatted.transcript, role: item.role });
462
- }
463
- this.emit("openAIRealtime:conversation.item.completed", { item, delta });
481
+ this.client.on("conversation.item.input_audio_transcription.done", (ev) => {
482
+ this.emit("transcribing", { text: "\n", response_id: ev.response_id, role: "user" });
483
+ });
484
+ this.client.on("response.audio.delta", (ev) => {
485
+ const audio = Buffer.from(ev.delta, "base64");
486
+ this.emit("speaking", { audio, response_id: ev.response_id });
487
+ const stream = speakerStreams.get(ev.response_id);
488
+ stream?.write(audio);
489
+ });
490
+ this.client.on("response.audio.done", (ev) => {
491
+ this.emit("speaking.done", { response_id: ev.response_id });
492
+ const stream = speakerStreams.get(ev.response_id);
493
+ stream?.end();
494
+ });
495
+ this.client.on("response.audio_transcript.delta", (ev) => {
496
+ this.emit("writing", { text: ev.delta, response_id: ev.response_id });
464
497
  });
498
+ this.client.on("response.audio_transcript.done", (ev) => {
499
+ this.emit("writing", { text: "\n", response_id: ev.response_id });
500
+ });
501
+ this.client.on("response.text.delta", (ev) => {
502
+ this.emit("writing", { text: ev.delta, response_id: ev.response_id });
503
+ });
504
+ this.client.on("response.text.done", (ev) => {
505
+ this.emit("writing", { text: "\n", response_id: ev.response_id });
506
+ });
507
+ this.client.on("response.done", async (ev) => {
508
+ await this.handleFunctionCalls(ev);
509
+ this.emit("response.done", ev);
510
+ speakerStreams.delete(ev.response.id);
511
+ });
512
+ }
513
+ async handleFunctionCalls(ev) {
514
+ for (const output of ev.response?.output ?? []) {
515
+ if (output.type === "function_call") {
516
+ await this.handleFunctionCall(output);
517
+ }
518
+ }
519
+ }
520
+ async handleFunctionCall(output) {
521
+ try {
522
+ const context = JSON.parse(output.arguments);
523
+ const tool = this.tools?.[output.name];
524
+ if (!tool) {
525
+ console.warn(`Tool "${output.name}" not found`);
526
+ return;
527
+ }
528
+ const result = await tool?.execute?.(
529
+ { context },
530
+ {
531
+ toolCallId: "unknown",
532
+ messages: []
533
+ }
534
+ );
535
+ this.sendEvent("conversation.item.create", {
536
+ item: {
537
+ type: "function_call_output",
538
+ call_id: output.call_id,
539
+ output: JSON.stringify(result)
540
+ }
541
+ });
542
+ } catch (e) {
543
+ const err = e;
544
+ console.warn(`Error calling tool "${output.name}":`, err.message);
545
+ this.sendEvent("conversation.item.create", {
546
+ item: {
547
+ type: "function_call_output",
548
+ call_id: output.call_id,
549
+ output: JSON.stringify({ error: err.message })
550
+ }
551
+ });
552
+ } finally {
553
+ this.sendEvent("response.create", {});
554
+ }
465
555
  }
466
556
  int16ArrayToBase64(int16Array) {
467
557
  const buffer = new ArrayBuffer(int16Array.length * 2);
@@ -476,6 +566,14 @@ var OpenAIRealtimeVoice = class extends voice.MastraVoice {
476
566
  }
477
567
  return btoa(binary);
478
568
  }
569
+ sendEvent(type, data) {
570
+ this.ws.send(
571
+ JSON.stringify({
572
+ type,
573
+ ...data
574
+ })
575
+ );
576
+ }
479
577
  };
480
578
 
481
579
  exports.OpenAIRealtimeVoice = OpenAIRealtimeVoice;