@mastra/voice-openai-realtime 0.11.3-alpha.0 → 0.11.3-alpha.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +18 -0
- package/package.json +18 -5
- package/.turbo/turbo-build.log +0 -4
- package/eslint.config.js +0 -6
- package/src/index.test.ts +0 -109
- package/src/index.ts +0 -716
- package/src/utils.test.ts +0 -119
- package/src/utils.ts +0 -106
- package/tsconfig.build.json +0 -9
- package/tsconfig.json +0 -5
- package/tsup.config.ts +0 -17
- package/vitest.config.ts +0 -8
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,23 @@
|
|
|
1
1
|
# @mastra/voice-openai-realtime
|
|
2
2
|
|
|
3
|
+
## 0.11.3-alpha.2
|
|
4
|
+
|
|
5
|
+
### Patch Changes
|
|
6
|
+
|
|
7
|
+
- [#7394](https://github.com/mastra-ai/mastra/pull/7394) [`f0dfcac`](https://github.com/mastra-ai/mastra/commit/f0dfcac4458bdf789b975e2d63e984f5d1e7c4d3) Thanks [@NikAiyer](https://github.com/NikAiyer)! - updated core peerdep
|
|
8
|
+
|
|
9
|
+
- Updated dependencies [[`7149d8d`](https://github.com/mastra-ai/mastra/commit/7149d8d4bdc1edf0008e0ca9b7925eb0b8b60dbe)]:
|
|
10
|
+
- @mastra/core@0.15.3-alpha.7
|
|
11
|
+
|
|
12
|
+
## 0.11.3-alpha.1
|
|
13
|
+
|
|
14
|
+
### Patch Changes
|
|
15
|
+
|
|
16
|
+
- [#7343](https://github.com/mastra-ai/mastra/pull/7343) [`de3cbc6`](https://github.com/mastra-ai/mastra/commit/de3cbc61079211431bd30487982ea3653517278e) Thanks [@LekoArts](https://github.com/LekoArts)! - Update the `package.json` file to include additional fields like `repository`, `homepage` or `files`.
|
|
17
|
+
|
|
18
|
+
- Updated dependencies [[`85ef90b`](https://github.com/mastra-ai/mastra/commit/85ef90bb2cd4ae4df855c7ac175f7d392c55c1bf), [`de3cbc6`](https://github.com/mastra-ai/mastra/commit/de3cbc61079211431bd30487982ea3653517278e)]:
|
|
19
|
+
- @mastra/core@0.15.3-alpha.5
|
|
20
|
+
|
|
3
21
|
## 0.11.3-alpha.0
|
|
4
22
|
|
|
5
23
|
### Patch Changes
|
package/package.json
CHANGED
|
@@ -1,10 +1,14 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@mastra/voice-openai-realtime",
|
|
3
|
-
"version": "0.11.3-alpha.
|
|
3
|
+
"version": "0.11.3-alpha.2",
|
|
4
4
|
"description": "Mastra OpenAI Realtime API integration",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "dist/index.js",
|
|
7
7
|
"types": "dist/index.d.ts",
|
|
8
|
+
"files": [
|
|
9
|
+
"dist",
|
|
10
|
+
"CHANGELOG.md"
|
|
11
|
+
],
|
|
8
12
|
"exports": {
|
|
9
13
|
".": {
|
|
10
14
|
"import": {
|
|
@@ -33,14 +37,23 @@
|
|
|
33
37
|
"typescript": "^5.8.3",
|
|
34
38
|
"vitest": "^3.2.4",
|
|
35
39
|
"zod": "^3.25.76",
|
|
36
|
-
"@internal/
|
|
37
|
-
"@mastra/core": "0.15.3-alpha.
|
|
38
|
-
"@internal/
|
|
40
|
+
"@internal/lint": "0.0.34",
|
|
41
|
+
"@mastra/core": "0.15.3-alpha.7",
|
|
42
|
+
"@internal/types-builder": "0.0.9"
|
|
39
43
|
},
|
|
40
44
|
"peerDependencies": {
|
|
41
|
-
"@mastra/core": ">=0.15.
|
|
45
|
+
"@mastra/core": ">=0.15.3-0 <0.16.0-0",
|
|
42
46
|
"zod": "^3.25.0 || ^4.0.0"
|
|
43
47
|
},
|
|
48
|
+
"homepage": "https://mastra.ai",
|
|
49
|
+
"repository": {
|
|
50
|
+
"type": "git",
|
|
51
|
+
"url": "git+https://github.com/mastra-ai/mastra.git",
|
|
52
|
+
"directory": "voice/openai-realtime-api"
|
|
53
|
+
},
|
|
54
|
+
"bugs": {
|
|
55
|
+
"url": "https://github.com/mastra-ai/mastra/issues"
|
|
56
|
+
},
|
|
44
57
|
"scripts": {
|
|
45
58
|
"build": "tsup --silent --config tsup.config.ts",
|
|
46
59
|
"build:watch": "tsup --watch --silent --config tsup.config.ts",
|
package/.turbo/turbo-build.log
DELETED
package/eslint.config.js
DELETED
package/src/index.test.ts
DELETED
|
@@ -1,109 +0,0 @@
|
|
|
1
|
-
import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
|
|
2
|
-
import { OpenAIRealtimeVoice } from './index';
|
|
3
|
-
|
|
4
|
-
// Mock RealtimeClient
|
|
5
|
-
vi.mock('openai-realtime-api', () => {
|
|
6
|
-
return {
|
|
7
|
-
RealtimeClient: vi.fn().mockImplementation(() => ({
|
|
8
|
-
connect: vi.fn().mockResolvedValue(undefined),
|
|
9
|
-
disconnect: vi.fn(),
|
|
10
|
-
waitForSessionCreated: vi.fn().mockResolvedValue(undefined),
|
|
11
|
-
updateSession: vi.fn(),
|
|
12
|
-
appendInputAudio: vi.fn(),
|
|
13
|
-
on: vi.fn(),
|
|
14
|
-
emit: vi.fn(),
|
|
15
|
-
})),
|
|
16
|
-
};
|
|
17
|
-
});
|
|
18
|
-
|
|
19
|
-
vi.mock('ws', () => {
|
|
20
|
-
return {
|
|
21
|
-
WebSocket: vi.fn().mockImplementation(() => ({
|
|
22
|
-
send: vi.fn(),
|
|
23
|
-
close: vi.fn(),
|
|
24
|
-
on: vi.fn(),
|
|
25
|
-
})),
|
|
26
|
-
};
|
|
27
|
-
});
|
|
28
|
-
|
|
29
|
-
describe('OpenAIRealtimeVoice', () => {
|
|
30
|
-
let voice: OpenAIRealtimeVoice;
|
|
31
|
-
|
|
32
|
-
beforeEach(() => {
|
|
33
|
-
vi.clearAllMocks();
|
|
34
|
-
voice = new OpenAIRealtimeVoice({
|
|
35
|
-
apiKey: 'test-api-key',
|
|
36
|
-
});
|
|
37
|
-
voice.waitForOpen = () => Promise.resolve();
|
|
38
|
-
voice.waitForSessionCreated = () => Promise.resolve();
|
|
39
|
-
});
|
|
40
|
-
|
|
41
|
-
afterEach(() => {
|
|
42
|
-
voice?.disconnect();
|
|
43
|
-
});
|
|
44
|
-
|
|
45
|
-
describe('initialization', () => {
|
|
46
|
-
it('should initialize with default values', () => {
|
|
47
|
-
expect(voice).toBeInstanceOf(OpenAIRealtimeVoice);
|
|
48
|
-
});
|
|
49
|
-
|
|
50
|
-
it('should initialize with custom speaker', () => {
|
|
51
|
-
const customVoice = new OpenAIRealtimeVoice({
|
|
52
|
-
speaker: 'shimmer',
|
|
53
|
-
});
|
|
54
|
-
expect(customVoice).toBeInstanceOf(OpenAIRealtimeVoice);
|
|
55
|
-
});
|
|
56
|
-
});
|
|
57
|
-
|
|
58
|
-
describe('getSpeakers', () => {
|
|
59
|
-
it('should return array of available voices', async () => {
|
|
60
|
-
const speakers = await voice.getSpeakers();
|
|
61
|
-
expect(Array.isArray(speakers)).toBe(true);
|
|
62
|
-
expect(speakers.length).toBeGreaterThan(0);
|
|
63
|
-
expect(speakers[0]).toHaveProperty('voiceId');
|
|
64
|
-
});
|
|
65
|
-
});
|
|
66
|
-
|
|
67
|
-
describe('speak', () => {
|
|
68
|
-
it('should handle string input', async () => {
|
|
69
|
-
const testText = 'Hello, world!';
|
|
70
|
-
await voice.speak(testText);
|
|
71
|
-
});
|
|
72
|
-
|
|
73
|
-
it('should throw error on empty input', async () => {
|
|
74
|
-
await expect(voice.speak('')).rejects.toThrow('Input text is empty');
|
|
75
|
-
});
|
|
76
|
-
});
|
|
77
|
-
|
|
78
|
-
describe('send', () => {
|
|
79
|
-
it('should handle Int16Array input', async () => {
|
|
80
|
-
const testArray = new Int16Array([1, 2, 3]);
|
|
81
|
-
|
|
82
|
-
await voice.connect();
|
|
83
|
-
voice.send(testArray);
|
|
84
|
-
});
|
|
85
|
-
});
|
|
86
|
-
|
|
87
|
-
describe('event handling', () => {
|
|
88
|
-
it('should register and trigger event listeners', () => {
|
|
89
|
-
const mockCallback = vi.fn();
|
|
90
|
-
voice.on('speak', mockCallback);
|
|
91
|
-
|
|
92
|
-
// Simulate event emission
|
|
93
|
-
(voice as any).emit('speak', 'test');
|
|
94
|
-
|
|
95
|
-
expect(mockCallback).toHaveBeenCalledWith('test');
|
|
96
|
-
});
|
|
97
|
-
|
|
98
|
-
it('should remove event listeners', () => {
|
|
99
|
-
const mockCallback = vi.fn();
|
|
100
|
-
voice.on('speak', mockCallback);
|
|
101
|
-
voice.off('speak', mockCallback);
|
|
102
|
-
|
|
103
|
-
// Simulate event emission
|
|
104
|
-
(voice as any).emit('speak', 'test');
|
|
105
|
-
|
|
106
|
-
expect(mockCallback).not.toHaveBeenCalled();
|
|
107
|
-
});
|
|
108
|
-
});
|
|
109
|
-
});
|
package/src/index.ts
DELETED
|
@@ -1,716 +0,0 @@
|
|
|
1
|
-
import { EventEmitter } from 'events';
|
|
2
|
-
import { PassThrough } from 'stream';
|
|
3
|
-
import type { ToolsInput } from '@mastra/core/agent';
|
|
4
|
-
import type { RuntimeContext } from '@mastra/core/runtime-context';
|
|
5
|
-
import { MastraVoice } from '@mastra/core/voice';
|
|
6
|
-
import type { Realtime, RealtimeServerEvents } from 'openai-realtime-api';
|
|
7
|
-
import { WebSocket } from 'ws';
|
|
8
|
-
import { isReadableStream, transformTools } from './utils';
|
|
9
|
-
|
|
10
|
-
/**
|
|
11
|
-
* Event callback function type
|
|
12
|
-
*/
|
|
13
|
-
type EventCallback = (...args: any[]) => void;
|
|
14
|
-
|
|
15
|
-
type StreamWithId = PassThrough & { id: string };
|
|
16
|
-
|
|
17
|
-
/**
|
|
18
|
-
* Map of event types to their callback arrays
|
|
19
|
-
*/
|
|
20
|
-
type EventMap = {
|
|
21
|
-
transcribing: [{ text: string }];
|
|
22
|
-
writing: [{ text: string }];
|
|
23
|
-
speaking: [{ audio: string }];
|
|
24
|
-
speaker: [StreamWithId];
|
|
25
|
-
error: [Error];
|
|
26
|
-
} & {
|
|
27
|
-
[key: string]: EventCallback[];
|
|
28
|
-
};
|
|
29
|
-
|
|
30
|
-
/** Default voice for text-to-speech. 'alloy' provides a neutral, balanced voice suitable for most use cases */
|
|
31
|
-
const DEFAULT_VOICE: Realtime.Voice = 'alloy';
|
|
32
|
-
|
|
33
|
-
const DEFAULT_TRANSCRIBER: Realtime.AudioTranscriptionModel = 'whisper-1';
|
|
34
|
-
|
|
35
|
-
const DEFAULT_URL = 'wss://api.openai.com/v1/realtime';
|
|
36
|
-
|
|
37
|
-
/**
|
|
38
|
-
* Default model for real-time voice interactions.
|
|
39
|
-
* This model is optimized for low-latency responses while maintaining high quality output.
|
|
40
|
-
*/
|
|
41
|
-
const DEFAULT_MODEL = 'gpt-4o-mini-realtime-preview-2024-12-17';
|
|
42
|
-
|
|
43
|
-
// /**
|
|
44
|
-
// * Default Voice Activity Detection (VAD) configuration.
|
|
45
|
-
// * These settings control how the system detects speech segments.
|
|
46
|
-
// *
|
|
47
|
-
// * @property {string} type - Uses server-side VAD for better accuracy
|
|
48
|
-
// * @property {number} threshold - Speech detection sensitivity (0.5 = balanced)
|
|
49
|
-
// * @property {number} prefix_padding_ms - Includes 1 second of audio before speech
|
|
50
|
-
// * @property {number} silence_duration_ms - Waits 1 second of silence before ending turn
|
|
51
|
-
// */
|
|
52
|
-
// const DEFAULT_VAD_CONFIG = {
|
|
53
|
-
// type: 'server_vad',
|
|
54
|
-
// threshold: 0.5,
|
|
55
|
-
// prefix_padding_ms: 1000,
|
|
56
|
-
// silence_duration_ms: 1000,
|
|
57
|
-
// } as Realtime.TurnDetection;
|
|
58
|
-
|
|
59
|
-
type TTools = ToolsInput;
|
|
60
|
-
|
|
61
|
-
/**
|
|
62
|
-
* Available voice options for text-to-speech.
|
|
63
|
-
* Each voice has unique characteristics suitable for different use cases:
|
|
64
|
-
* - alloy: Neutral and balanced
|
|
65
|
-
* - echo: Warm and natural
|
|
66
|
-
* - shimmer: Clear and expressive
|
|
67
|
-
* - And more...
|
|
68
|
-
*/
|
|
69
|
-
const VOICES = ['alloy', 'ash', 'ballad', 'coral', 'echo', 'sage', 'shimmer', 'verse'];
|
|
70
|
-
|
|
71
|
-
type RealtimeClientServerEventMap = {
|
|
72
|
-
[K in RealtimeServerEvents.EventType]: [RealtimeServerEvents.EventMap[K]];
|
|
73
|
-
} & {
|
|
74
|
-
['conversation.item.input_audio_transcription.delta']: [{ delta: string; response_id: string }];
|
|
75
|
-
['conversation.item.input_audio_transcription.done']: [{ response_id: string }];
|
|
76
|
-
};
|
|
77
|
-
|
|
78
|
-
/**
|
|
79
|
-
* OpenAIRealtimeVoice provides real-time voice interaction capabilities using OpenAI's
|
|
80
|
-
* WebSocket-based API. It supports:
|
|
81
|
-
* - Real-time text-to-speech
|
|
82
|
-
* - Speech-to-text (transcription)
|
|
83
|
-
* - Voice activity detection
|
|
84
|
-
* - Multiple voice options
|
|
85
|
-
* - Event-based audio streaming
|
|
86
|
-
*
|
|
87
|
-
* The class manages WebSocket connections, audio streaming, and event handling
|
|
88
|
-
* for seamless voice interactions.
|
|
89
|
-
*
|
|
90
|
-
* @extends MastraVoice
|
|
91
|
-
*
|
|
92
|
-
* @example
|
|
93
|
-
* ```typescript
|
|
94
|
-
* const voice = new OpenAIRealtimeVoice({
|
|
95
|
-
* apiKey: process.env.OPENAI_API_KEY,
|
|
96
|
-
* model: 'gpt-4o-mini-realtime'
|
|
97
|
-
* });
|
|
98
|
-
*
|
|
99
|
-
* await voice.open();
|
|
100
|
-
* voice.on('speaking', (audioData) => {
|
|
101
|
-
* // Handle audio data
|
|
102
|
-
* });
|
|
103
|
-
*
|
|
104
|
-
* await voice.speak('Hello, how can I help you today?');
|
|
105
|
-
* ```
|
|
106
|
-
*/
|
|
107
|
-
export class OpenAIRealtimeVoice extends MastraVoice {
|
|
108
|
-
private ws?: WebSocket;
|
|
109
|
-
private state: 'close' | 'open';
|
|
110
|
-
private client: EventEmitter<RealtimeClientServerEventMap>;
|
|
111
|
-
private events: EventMap;
|
|
112
|
-
private instructions?: string;
|
|
113
|
-
private tools?: TTools;
|
|
114
|
-
private debug: boolean;
|
|
115
|
-
private queue: unknown[] = [];
|
|
116
|
-
private transcriber: Realtime.AudioTranscriptionModel;
|
|
117
|
-
private runtimeContext?: RuntimeContext;
|
|
118
|
-
/**
|
|
119
|
-
* Creates a new instance of OpenAIRealtimeVoice.
|
|
120
|
-
*
|
|
121
|
-
* @param options - Configuration options for the voice instance
|
|
122
|
-
* @param options.url - The base URL for the OpenAI Realtime API
|
|
123
|
-
* @param options.model - The model ID to use (defaults to GPT-4 Mini Realtime)
|
|
124
|
-
* @param options.apiKey - OpenAI API key. Falls back to process.env.OPENAI_API_KEY
|
|
125
|
-
* @param options.speaker - Voice ID to use (defaults to 'alloy')
|
|
126
|
-
* @param options.debug - Enable debug mode
|
|
127
|
-
*
|
|
128
|
-
* @example
|
|
129
|
-
* ```typescript
|
|
130
|
-
* const voice = new OpenAIRealtimeVoice({
|
|
131
|
-
* apiKey: 'your-api-key',
|
|
132
|
-
* model: 'gpt-4o-mini-realtime',
|
|
133
|
-
* speaker: 'alloy'
|
|
134
|
-
* });
|
|
135
|
-
* ```
|
|
136
|
-
*/
|
|
137
|
-
constructor(
|
|
138
|
-
private options: {
|
|
139
|
-
model?: string;
|
|
140
|
-
url?: string;
|
|
141
|
-
apiKey?: string;
|
|
142
|
-
speaker?: Realtime.Voice;
|
|
143
|
-
transcriber?: Realtime.AudioTranscriptionModel;
|
|
144
|
-
debug?: boolean;
|
|
145
|
-
} = {},
|
|
146
|
-
) {
|
|
147
|
-
super();
|
|
148
|
-
|
|
149
|
-
this.client = new EventEmitter();
|
|
150
|
-
this.state = 'close';
|
|
151
|
-
this.events = {} as EventMap;
|
|
152
|
-
this.speaker = options.speaker || DEFAULT_VOICE;
|
|
153
|
-
this.transcriber = options.transcriber || DEFAULT_TRANSCRIBER;
|
|
154
|
-
this.debug = options.debug || false;
|
|
155
|
-
}
|
|
156
|
-
|
|
157
|
-
/**
|
|
158
|
-
* Returns a list of available voice speakers.
|
|
159
|
-
*
|
|
160
|
-
* @returns Promise resolving to an array of voice objects, each containing at least a voiceId
|
|
161
|
-
*
|
|
162
|
-
* @example
|
|
163
|
-
* ```typescript
|
|
164
|
-
* const speakers = await voice.getSpeakers();
|
|
165
|
-
* // speakers = [{ voiceId: 'alloy' }, { voiceId: 'echo' }, ...]
|
|
166
|
-
* ```
|
|
167
|
-
*/
|
|
168
|
-
getSpeakers(): Promise<Array<{ voiceId: string; [key: string]: any }>> {
|
|
169
|
-
return Promise.resolve(VOICES.map(v => ({ voiceId: v })));
|
|
170
|
-
}
|
|
171
|
-
|
|
172
|
-
/**
|
|
173
|
-
* Disconnects from the OpenAI realtime session and cleans up resources.
|
|
174
|
-
* Should be called when you're done with the voice instance.
|
|
175
|
-
*
|
|
176
|
-
* @example
|
|
177
|
-
* ```typescript
|
|
178
|
-
* voice.close(); // Disconnects and cleans up
|
|
179
|
-
* ```
|
|
180
|
-
*/
|
|
181
|
-
close() {
|
|
182
|
-
if (!this.ws) return;
|
|
183
|
-
this.ws.close();
|
|
184
|
-
this.state = 'close';
|
|
185
|
-
}
|
|
186
|
-
|
|
187
|
-
/**
|
|
188
|
-
* Equips the voice instance with a set of instructions.
|
|
189
|
-
* Instructions allow the model to perform additional actions during conversations.
|
|
190
|
-
*
|
|
191
|
-
* @param instructions - Optional instructions to addInstructions
|
|
192
|
-
* @returns Transformed instructions ready for use with the model
|
|
193
|
-
*
|
|
194
|
-
* @example
|
|
195
|
-
* ```typescript
|
|
196
|
-
* voice.addInstructions('You are a helpful assistant.');
|
|
197
|
-
* ```
|
|
198
|
-
*/
|
|
199
|
-
addInstructions(instructions?: string) {
|
|
200
|
-
this.instructions = instructions;
|
|
201
|
-
}
|
|
202
|
-
|
|
203
|
-
/**
|
|
204
|
-
* Equips the voice instance with a set of tools.
|
|
205
|
-
* Tools allow the model to perform additional actions during conversations.
|
|
206
|
-
*
|
|
207
|
-
* @param tools - Optional tools configuration to addTools
|
|
208
|
-
* @returns Transformed tools configuration ready for use with the model
|
|
209
|
-
*
|
|
210
|
-
* @example
|
|
211
|
-
* ```typescript
|
|
212
|
-
* const tools = {
|
|
213
|
-
* search: async (query: string) => { ... },
|
|
214
|
-
* calculate: (expression: string) => { ... }
|
|
215
|
-
* };
|
|
216
|
-
* voice.addTools(tools);
|
|
217
|
-
* ```
|
|
218
|
-
*/
|
|
219
|
-
addTools(tools?: TTools) {
|
|
220
|
-
this.tools = tools || {};
|
|
221
|
-
}
|
|
222
|
-
|
|
223
|
-
/**
|
|
224
|
-
* Emits a speaking event using the configured voice model.
|
|
225
|
-
* Can accept either a string or a readable stream as input.
|
|
226
|
-
*
|
|
227
|
-
* @param input - The text to convert to speech, or a readable stream containing the text
|
|
228
|
-
* @param options - Optional configuration for this specific speech request
|
|
229
|
-
* @param options.speaker - Override the voice to use for this specific request
|
|
230
|
-
*
|
|
231
|
-
* @throws {Error} If the input text is empty
|
|
232
|
-
*
|
|
233
|
-
* @example
|
|
234
|
-
* ```typescript
|
|
235
|
-
* // Simple text to speech
|
|
236
|
-
* await voice.speak('Hello world');
|
|
237
|
-
*
|
|
238
|
-
* // With custom voice
|
|
239
|
-
* await voice.speak('Hello world', { speaker: 'echo' });
|
|
240
|
-
*
|
|
241
|
-
* // Using a stream
|
|
242
|
-
* const stream = fs.createReadStream('text.txt');
|
|
243
|
-
* await voice.speak(stream);
|
|
244
|
-
* ```
|
|
245
|
-
*/
|
|
246
|
-
async speak(input: string | NodeJS.ReadableStream, options?: { speaker?: Realtime.Voice }): Promise<void> {
|
|
247
|
-
if (typeof input !== 'string') {
|
|
248
|
-
const chunks: Buffer[] = [];
|
|
249
|
-
for await (const chunk of input) {
|
|
250
|
-
chunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(String(chunk)));
|
|
251
|
-
}
|
|
252
|
-
input = Buffer.concat(chunks).toString('utf-8');
|
|
253
|
-
}
|
|
254
|
-
|
|
255
|
-
if (input.trim().length === 0) {
|
|
256
|
-
throw new Error('Input text is empty');
|
|
257
|
-
}
|
|
258
|
-
|
|
259
|
-
this.sendEvent('response.create', {
|
|
260
|
-
response: {
|
|
261
|
-
instructions: `Repeat the following text: ${input}`,
|
|
262
|
-
voice: options?.speaker ? options.speaker : undefined,
|
|
263
|
-
},
|
|
264
|
-
});
|
|
265
|
-
}
|
|
266
|
-
|
|
267
|
-
/**
|
|
268
|
-
* Updates the session configuration for the voice instance.
|
|
269
|
-
* This can be used to modify voice settings, turn detection, and other parameters.
|
|
270
|
-
*
|
|
271
|
-
* @param sessionConfig - New session configuration to apply
|
|
272
|
-
*
|
|
273
|
-
* @example
|
|
274
|
-
* ```typescript
|
|
275
|
-
* voice.updateConfig({
|
|
276
|
-
* voice: 'echo',
|
|
277
|
-
* turn_detection: {
|
|
278
|
-
* type: 'server_vad',
|
|
279
|
-
* threshold: 0.5,
|
|
280
|
-
* silence_duration_ms: 1000
|
|
281
|
-
* }
|
|
282
|
-
* });
|
|
283
|
-
* ```
|
|
284
|
-
*/
|
|
285
|
-
updateConfig(sessionConfig: unknown): void {
|
|
286
|
-
this.sendEvent('session.update', { session: sessionConfig });
|
|
287
|
-
}
|
|
288
|
-
|
|
289
|
-
/**
|
|
290
|
-
* Checks if listening capabilities are enabled.
|
|
291
|
-
*
|
|
292
|
-
* @returns {Promise<{ enabled: boolean }>}
|
|
293
|
-
*/
|
|
294
|
-
async getListener() {
|
|
295
|
-
return { enabled: true };
|
|
296
|
-
}
|
|
297
|
-
|
|
298
|
-
/**
|
|
299
|
-
* Processes audio input for speech recognition.
|
|
300
|
-
* Takes a readable stream of audio data and emits a writing event.
|
|
301
|
-
* The output of the writing event is int16 audio data.
|
|
302
|
-
*
|
|
303
|
-
* @param audioData - Readable stream containing the audio data to process
|
|
304
|
-
* @param options - Optional configuration for audio processing
|
|
305
|
-
*
|
|
306
|
-
* @throws {Error} If the audio data format is not supported
|
|
307
|
-
*
|
|
308
|
-
* @example
|
|
309
|
-
* ```typescript
|
|
310
|
-
* // Process audio from a file
|
|
311
|
-
* const audioStream = fs.createReadStream('audio.raw');
|
|
312
|
-
* await voice.listen(audioStream);
|
|
313
|
-
*
|
|
314
|
-
* // Process audio with options
|
|
315
|
-
* await voice.listen(microphoneStream, {
|
|
316
|
-
* format: 'int16',
|
|
317
|
-
* sampleRate: 24000
|
|
318
|
-
* });
|
|
319
|
-
* ```
|
|
320
|
-
*/
|
|
321
|
-
async listen(audioData: NodeJS.ReadableStream): Promise<void> {
|
|
322
|
-
if (isReadableStream(audioData)) {
|
|
323
|
-
const chunks: Buffer[] = [];
|
|
324
|
-
for await (const chunk of audioData) {
|
|
325
|
-
const buffer = Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk);
|
|
326
|
-
chunks.push(buffer);
|
|
327
|
-
}
|
|
328
|
-
|
|
329
|
-
const buffer = Buffer.concat(chunks);
|
|
330
|
-
const int16Array = new Int16Array(buffer.buffer, buffer.byteOffset ?? 0, (buffer.byteLength ?? 0) / 2);
|
|
331
|
-
const base64Audio = this.int16ArrayToBase64(int16Array);
|
|
332
|
-
|
|
333
|
-
this.sendEvent('conversation.item.create', {
|
|
334
|
-
item: {
|
|
335
|
-
type: 'message',
|
|
336
|
-
role: 'user',
|
|
337
|
-
content: [{ type: 'input_audio', audio: base64Audio }],
|
|
338
|
-
},
|
|
339
|
-
});
|
|
340
|
-
|
|
341
|
-
this.sendEvent('response.create', {
|
|
342
|
-
response: {
|
|
343
|
-
modalities: ['text'],
|
|
344
|
-
instructions: `ONLY repeat the input and DO NOT say anything else`,
|
|
345
|
-
},
|
|
346
|
-
});
|
|
347
|
-
} else {
|
|
348
|
-
this.emit('error', new Error('Unsupported audio data format'));
|
|
349
|
-
}
|
|
350
|
-
}
|
|
351
|
-
|
|
352
|
-
waitForOpen() {
|
|
353
|
-
return new Promise(resolve => {
|
|
354
|
-
this.ws?.on('open', resolve);
|
|
355
|
-
});
|
|
356
|
-
}
|
|
357
|
-
|
|
358
|
-
waitForSessionCreated() {
|
|
359
|
-
return new Promise(resolve => {
|
|
360
|
-
this.client.on('session.created', resolve);
|
|
361
|
-
});
|
|
362
|
-
}
|
|
363
|
-
|
|
364
|
-
/**
|
|
365
|
-
* Establishes a connection to the OpenAI realtime service.
|
|
366
|
-
* Must be called before using speak, listen, or relay functions.
|
|
367
|
-
*
|
|
368
|
-
* @throws {Error} If connection fails or session creation times out
|
|
369
|
-
*
|
|
370
|
-
* @example
|
|
371
|
-
* ```typescript
|
|
372
|
-
* await voice.open();
|
|
373
|
-
* // Now ready for voice interactions
|
|
374
|
-
* ```
|
|
375
|
-
*/
|
|
376
|
-
async connect({ runtimeContext }: { runtimeContext?: RuntimeContext } = {}) {
|
|
377
|
-
const url = `${this.options.url || DEFAULT_URL}?model=${this.options.model || DEFAULT_MODEL}`;
|
|
378
|
-
const apiKey = this.options.apiKey || process.env.OPENAI_API_KEY;
|
|
379
|
-
this.runtimeContext = runtimeContext;
|
|
380
|
-
|
|
381
|
-
this.ws = new WebSocket(url, undefined, {
|
|
382
|
-
headers: {
|
|
383
|
-
Authorization: 'Bearer ' + apiKey,
|
|
384
|
-
'OpenAI-Beta': 'realtime=v1',
|
|
385
|
-
},
|
|
386
|
-
});
|
|
387
|
-
|
|
388
|
-
this.setupEventListeners();
|
|
389
|
-
await Promise.all([this.waitForOpen(), this.waitForSessionCreated()]);
|
|
390
|
-
|
|
391
|
-
const openaiTools = transformTools(this.tools);
|
|
392
|
-
this.updateConfig({
|
|
393
|
-
instructions: this.instructions,
|
|
394
|
-
tools: openaiTools.map(t => t.openaiTool),
|
|
395
|
-
input_audio_transcription: {
|
|
396
|
-
model: this.transcriber,
|
|
397
|
-
},
|
|
398
|
-
voice: this.speaker,
|
|
399
|
-
});
|
|
400
|
-
this.state = 'open';
|
|
401
|
-
}
|
|
402
|
-
|
|
403
|
-
disconnect() {
|
|
404
|
-
this.state = 'close';
|
|
405
|
-
this.ws?.close();
|
|
406
|
-
}
|
|
407
|
-
|
|
408
|
-
/**
|
|
409
|
-
* Streams audio data in real-time to the OpenAI service.
|
|
410
|
-
* Useful for continuous audio streaming scenarios like live microphone input.
|
|
411
|
-
* Must be in 'open' state before calling this method.
|
|
412
|
-
*
|
|
413
|
-
* @param audioData - Readable stream of audio data to relay
|
|
414
|
-
* @throws {Error} If audio format is not supported
|
|
415
|
-
*
|
|
416
|
-
* @example
|
|
417
|
-
* ```typescript
|
|
418
|
-
* // First connect
|
|
419
|
-
* await voice.open();
|
|
420
|
-
*
|
|
421
|
-
* // Then relay audio
|
|
422
|
-
* const micStream = getMicrophoneStream();
|
|
423
|
-
* await voice.relay(micStream);
|
|
424
|
-
* ```
|
|
425
|
-
*/
|
|
426
|
-
async send(audioData: NodeJS.ReadableStream | Int16Array, eventId?: string): Promise<void> {
|
|
427
|
-
if (!this.state || this.state !== 'open') {
|
|
428
|
-
console.warn('Cannot relay audio when not open. Call open() first.');
|
|
429
|
-
return;
|
|
430
|
-
}
|
|
431
|
-
|
|
432
|
-
if (isReadableStream(audioData)) {
|
|
433
|
-
const stream = audioData as NodeJS.ReadableStream;
|
|
434
|
-
stream.on('data', chunk => {
|
|
435
|
-
try {
|
|
436
|
-
const buffer = Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk);
|
|
437
|
-
this.sendEvent('input_audio_buffer.append', { audio: buffer.toString('base64'), event_id: eventId });
|
|
438
|
-
} catch (err) {
|
|
439
|
-
this.emit('error', err);
|
|
440
|
-
}
|
|
441
|
-
});
|
|
442
|
-
} else if (audioData instanceof Int16Array) {
|
|
443
|
-
try {
|
|
444
|
-
const base64Audio = this.int16ArrayToBase64(audioData);
|
|
445
|
-
this.sendEvent('input_audio_buffer.append', { audio: base64Audio, event_id: eventId });
|
|
446
|
-
} catch (err) {
|
|
447
|
-
this.emit('error', err);
|
|
448
|
-
}
|
|
449
|
-
} else {
|
|
450
|
-
this.emit('error', new Error('Unsupported audio data format'));
|
|
451
|
-
}
|
|
452
|
-
}
|
|
453
|
-
|
|
454
|
-
/**
|
|
455
|
-
* Sends a response to the OpenAI Realtime API.
|
|
456
|
-
*
|
|
457
|
-
* Trigger a response to the real-time session.
|
|
458
|
-
*
|
|
459
|
-
* @param {Object} params - The parameters object
|
|
460
|
-
* @param {Realtime.ResponseConfig} params.options - Configuration options for the response
|
|
461
|
-
* @returns {Promise<void>} A promise that resolves when the response has been sent
|
|
462
|
-
*
|
|
463
|
-
* @example
|
|
464
|
-
* // Send a simple text response
|
|
465
|
-
* await realtimeVoice.answer({
|
|
466
|
-
* options: {
|
|
467
|
-
* content: "Hello, how can I help you today?",
|
|
468
|
-
* voice: "alloy"
|
|
469
|
-
* }
|
|
470
|
-
* });
|
|
471
|
-
*/
|
|
472
|
-
async answer({ options }: { options?: Realtime.ResponseConfig }) {
|
|
473
|
-
this.sendEvent('response.create', { response: options ?? {} });
|
|
474
|
-
}
|
|
475
|
-
|
|
476
|
-
/**
|
|
477
|
-
* Registers an event listener for voice events.
|
|
478
|
-
* Available events: 'speaking', 'writing, 'error'
|
|
479
|
-
* Can listen to OpenAI Realtime events by prefixing with 'openAIRealtime:'
|
|
480
|
-
* Such as 'openAIRealtime:conversation.item.completed', 'openAIRealtime:conversation.updated', etc.
|
|
481
|
-
*
|
|
482
|
-
* @param event - Name of the event to listen for
|
|
483
|
-
* @param callback - Function to call when the event occurs
|
|
484
|
-
*
|
|
485
|
-
* @example
|
|
486
|
-
* ```typescript
|
|
487
|
-
* // Listen for speech events
|
|
488
|
-
* voice.on('speaking', (audioData: Int16Array) => {
|
|
489
|
-
* // Handle audio data
|
|
490
|
-
* });
|
|
491
|
-
*
|
|
492
|
-
* // Handle errors
|
|
493
|
-
* voice.on('error', (error: Error) => {
|
|
494
|
-
* console.error('Voice error:', error);
|
|
495
|
-
* });
|
|
496
|
-
* ```
|
|
497
|
-
*/
|
|
498
|
-
on(event: string, callback: EventCallback): void {
|
|
499
|
-
if (!this.events[event]) {
|
|
500
|
-
this.events[event] = [];
|
|
501
|
-
}
|
|
502
|
-
this.events[event].push(callback);
|
|
503
|
-
}
|
|
504
|
-
|
|
505
|
-
/**
|
|
506
|
-
* Removes a previously registered event listener.
|
|
507
|
-
*
|
|
508
|
-
* @param event - Name of the event to stop listening to
|
|
509
|
-
* @param callback - The specific callback function to remove
|
|
510
|
-
*
|
|
511
|
-
* @example
|
|
512
|
-
* ```typescript
|
|
513
|
-
* // Create event handler
|
|
514
|
-
* const handleSpeech = (audioData: Int16Array) => {
|
|
515
|
-
* // Handle audio data
|
|
516
|
-
* };
|
|
517
|
-
*
|
|
518
|
-
* // Add listener
|
|
519
|
-
* voice.on('speaking', handleSpeech);
|
|
520
|
-
*
|
|
521
|
-
* // Later, remove the listener
|
|
522
|
-
* voice.off('speaking', handleSpeech);
|
|
523
|
-
* ```
|
|
524
|
-
*/
|
|
525
|
-
off(event: string, callback: EventCallback): void {
|
|
526
|
-
if (!this.events[event]) return;
|
|
527
|
-
|
|
528
|
-
const index = this.events[event].indexOf(callback);
|
|
529
|
-
if (index !== -1) {
|
|
530
|
-
this.events[event].splice(index, 1);
|
|
531
|
-
}
|
|
532
|
-
}
|
|
533
|
-
|
|
534
|
-
/**
|
|
535
|
-
* Emit an event with arguments
|
|
536
|
-
* @param event Event name
|
|
537
|
-
* @param args Arguments to pass to the callbacks
|
|
538
|
-
*/
|
|
539
|
-
private emit(event: string, ...args: any[]): void {
|
|
540
|
-
if (!this.events[event]) return;
|
|
541
|
-
|
|
542
|
-
for (const callback of this.events[event]) {
|
|
543
|
-
callback(...args);
|
|
544
|
-
}
|
|
545
|
-
}
|
|
546
|
-
|
|
547
|
-
private setupEventListeners(): void {
|
|
548
|
-
const speakerStreams = new Map<string, StreamWithId>();
|
|
549
|
-
|
|
550
|
-
if (!this.ws) {
|
|
551
|
-
throw new Error('WebSocket not initialized');
|
|
552
|
-
}
|
|
553
|
-
|
|
554
|
-
this.ws.on('message', message => {
|
|
555
|
-
const data = JSON.parse(message.toString());
|
|
556
|
-
this.client.emit(data.type, data);
|
|
557
|
-
|
|
558
|
-
if (this.debug) {
|
|
559
|
-
const { delta, ...fields } = data;
|
|
560
|
-
console.log(data.type, fields, delta?.length < 100 ? delta : '');
|
|
561
|
-
}
|
|
562
|
-
});
|
|
563
|
-
|
|
564
|
-
this.client.on('session.created', ev => {
|
|
565
|
-
this.emit('session.created', ev);
|
|
566
|
-
|
|
567
|
-
const queue = this.queue.splice(0, this.queue.length);
|
|
568
|
-
for (const ev of queue) {
|
|
569
|
-
this.ws?.send(JSON.stringify(ev));
|
|
570
|
-
}
|
|
571
|
-
});
|
|
572
|
-
this.client.on('session.updated', ev => {
|
|
573
|
-
this.emit('session.updated', ev);
|
|
574
|
-
});
|
|
575
|
-
this.client.on('response.created', ev => {
|
|
576
|
-
this.emit('response.created', ev);
|
|
577
|
-
|
|
578
|
-
const speakerStream = new PassThrough() as StreamWithId;
|
|
579
|
-
|
|
580
|
-
speakerStream.id = ev.response.id;
|
|
581
|
-
|
|
582
|
-
speakerStreams.set(ev.response.id, speakerStream);
|
|
583
|
-
this.emit('speaker', speakerStream);
|
|
584
|
-
});
|
|
585
|
-
this.client.on('conversation.item.input_audio_transcription.delta', ev => {
|
|
586
|
-
this.emit('writing', { text: ev.delta, response_id: ev.response_id, role: 'user' });
|
|
587
|
-
});
|
|
588
|
-
this.client.on('conversation.item.input_audio_transcription.done', ev => {
|
|
589
|
-
this.emit('writing', { text: '\n', response_id: ev.response_id, role: 'user' });
|
|
590
|
-
});
|
|
591
|
-
this.client.on('response.audio.delta', ev => {
|
|
592
|
-
const audio = Buffer.from(ev.delta, 'base64');
|
|
593
|
-
this.emit('speaking', { audio, response_id: ev.response_id });
|
|
594
|
-
|
|
595
|
-
const stream = speakerStreams.get(ev.response_id);
|
|
596
|
-
stream?.write(audio);
|
|
597
|
-
});
|
|
598
|
-
this.client.on('response.audio.done', ev => {
|
|
599
|
-
this.emit('speaking.done', { response_id: ev.response_id });
|
|
600
|
-
|
|
601
|
-
const stream = speakerStreams.get(ev.response_id);
|
|
602
|
-
stream?.end();
|
|
603
|
-
});
|
|
604
|
-
this.client.on('response.audio_transcript.delta', ev => {
|
|
605
|
-
this.emit('writing', { text: ev.delta, response_id: ev.response_id, role: 'assistant' });
|
|
606
|
-
});
|
|
607
|
-
this.client.on('response.audio_transcript.done', ev => {
|
|
608
|
-
this.emit('writing', { text: '\n', response_id: ev.response_id, role: 'assistant' });
|
|
609
|
-
});
|
|
610
|
-
this.client.on('response.text.delta', ev => {
|
|
611
|
-
this.emit('writing', { text: ev.delta, response_id: ev.response_id, role: 'assistant' });
|
|
612
|
-
});
|
|
613
|
-
this.client.on('response.text.done', ev => {
|
|
614
|
-
this.emit('writing', { text: '\n', response_id: ev.response_id, role: 'assistant' });
|
|
615
|
-
});
|
|
616
|
-
this.client.on('response.done', async ev => {
|
|
617
|
-
await this.handleFunctionCalls(ev);
|
|
618
|
-
this.emit('response.done', ev);
|
|
619
|
-
speakerStreams.delete(ev.response.id);
|
|
620
|
-
});
|
|
621
|
-
this.client.on('error', async ev => {
|
|
622
|
-
this.emit('error', ev);
|
|
623
|
-
});
|
|
624
|
-
}
|
|
625
|
-
|
|
626
|
-
private async handleFunctionCalls(ev: any) {
|
|
627
|
-
for (const output of ev.response?.output ?? []) {
|
|
628
|
-
if (output.type === 'function_call') {
|
|
629
|
-
await this.handleFunctionCall(output);
|
|
630
|
-
}
|
|
631
|
-
}
|
|
632
|
-
}
|
|
633
|
-
|
|
634
|
-
private async handleFunctionCall(output: any) {
|
|
635
|
-
try {
|
|
636
|
-
const context = JSON.parse(output.arguments);
|
|
637
|
-
const tool = this.tools?.[output.name];
|
|
638
|
-
if (!tool) {
|
|
639
|
-
console.warn(`Tool "${output.name}" not found`);
|
|
640
|
-
return;
|
|
641
|
-
}
|
|
642
|
-
|
|
643
|
-
if (tool?.execute) {
|
|
644
|
-
this.emit('tool-call-start', {
|
|
645
|
-
toolCallId: output.call_id,
|
|
646
|
-
toolName: output.name,
|
|
647
|
-
toolDescription: tool.description,
|
|
648
|
-
args: context,
|
|
649
|
-
});
|
|
650
|
-
}
|
|
651
|
-
|
|
652
|
-
const result = await tool?.execute?.(
|
|
653
|
-
{ context, runtimeContext: this.runtimeContext },
|
|
654
|
-
{
|
|
655
|
-
toolCallId: output.call_id,
|
|
656
|
-
messages: [],
|
|
657
|
-
},
|
|
658
|
-
);
|
|
659
|
-
|
|
660
|
-
this.emit('tool-call-result', {
|
|
661
|
-
toolCallId: output.call_id,
|
|
662
|
-
toolName: output.name,
|
|
663
|
-
toolDescription: tool.description,
|
|
664
|
-
args: context,
|
|
665
|
-
result,
|
|
666
|
-
});
|
|
667
|
-
|
|
668
|
-
this.sendEvent('conversation.item.create', {
|
|
669
|
-
item: {
|
|
670
|
-
type: 'function_call_output',
|
|
671
|
-
call_id: output.call_id,
|
|
672
|
-
output: JSON.stringify(result),
|
|
673
|
-
},
|
|
674
|
-
});
|
|
675
|
-
} catch (e) {
|
|
676
|
-
const err = e as Error;
|
|
677
|
-
console.warn(`Error calling tool "${output.name}":`, err.message);
|
|
678
|
-
this.sendEvent('conversation.item.create', {
|
|
679
|
-
item: {
|
|
680
|
-
type: 'function_call_output',
|
|
681
|
-
call_id: output.call_id,
|
|
682
|
-
output: JSON.stringify({ error: err.message }),
|
|
683
|
-
},
|
|
684
|
-
});
|
|
685
|
-
} finally {
|
|
686
|
-
this.sendEvent('response.create', {});
|
|
687
|
-
}
|
|
688
|
-
}
|
|
689
|
-
|
|
690
|
-
private int16ArrayToBase64(int16Array: Int16Array): string {
|
|
691
|
-
const buffer = new ArrayBuffer(int16Array.length * 2);
|
|
692
|
-
const view = new DataView(buffer);
|
|
693
|
-
for (let i = 0; i < int16Array.length; i++) {
|
|
694
|
-
view.setInt16(i * 2, int16Array[i]!, true);
|
|
695
|
-
}
|
|
696
|
-
const uint8Array = new Uint8Array(buffer);
|
|
697
|
-
let binary = '';
|
|
698
|
-
for (let i = 0; i < uint8Array.length; i++) {
|
|
699
|
-
binary += String.fromCharCode(uint8Array[i]!);
|
|
700
|
-
}
|
|
701
|
-
return btoa(binary);
|
|
702
|
-
}
|
|
703
|
-
|
|
704
|
-
private sendEvent(type: string, data: any) {
|
|
705
|
-
if (!this.ws || this.ws.readyState !== this.ws.OPEN) {
|
|
706
|
-
this.queue.push({ type: type, ...data });
|
|
707
|
-
} else {
|
|
708
|
-
this.ws?.send(
|
|
709
|
-
JSON.stringify({
|
|
710
|
-
type: type,
|
|
711
|
-
...data,
|
|
712
|
-
}),
|
|
713
|
-
);
|
|
714
|
-
}
|
|
715
|
-
}
|
|
716
|
-
}
|
package/src/utils.test.ts
DELETED
|
@@ -1,119 +0,0 @@
|
|
|
1
|
-
import { createTool } from '@mastra/core/tools';
|
|
2
|
-
import { describe, it, expect } from 'vitest';
|
|
3
|
-
import { z } from 'zod';
|
|
4
|
-
import { transformTools } from './utils';
|
|
5
|
-
|
|
6
|
-
// Vitest provides these globals automatically, but we can import them explicitly for clarity
|
|
7
|
-
|
|
8
|
-
describe('transformTools', () => {
|
|
9
|
-
describe('Basic Tool Transformation', () => {
|
|
10
|
-
it('should transform a tool with Zod inputSchema to OpenAI format', () => {
|
|
11
|
-
// Create a test tool with Zod schema
|
|
12
|
-
const tool = createTool({
|
|
13
|
-
id: 'zodTool',
|
|
14
|
-
description: 'A tool with Zod schema',
|
|
15
|
-
inputSchema: z.object({
|
|
16
|
-
name: z.string(),
|
|
17
|
-
age: z.number().optional(),
|
|
18
|
-
}),
|
|
19
|
-
outputSchema: z.string(),
|
|
20
|
-
execute: async ({ context }) => {
|
|
21
|
-
return `Hello, ${context.name}`;
|
|
22
|
-
},
|
|
23
|
-
});
|
|
24
|
-
|
|
25
|
-
// Transform the tool
|
|
26
|
-
const transformedTools = transformTools({
|
|
27
|
-
zodTool: tool,
|
|
28
|
-
});
|
|
29
|
-
|
|
30
|
-
// Assert the transformation results
|
|
31
|
-
expect(transformedTools).toHaveLength(1);
|
|
32
|
-
const { openaiTool } = transformedTools[0];
|
|
33
|
-
|
|
34
|
-
expect(openaiTool).toMatchObject({
|
|
35
|
-
type: 'function',
|
|
36
|
-
name: 'zodTool',
|
|
37
|
-
description: 'A tool with Zod schema',
|
|
38
|
-
parameters: expect.objectContaining({
|
|
39
|
-
type: 'object',
|
|
40
|
-
properties: expect.objectContaining({
|
|
41
|
-
name: expect.objectContaining({ type: 'string' }),
|
|
42
|
-
age: expect.objectContaining({ type: 'number' }),
|
|
43
|
-
}),
|
|
44
|
-
required: ['name'],
|
|
45
|
-
}),
|
|
46
|
-
});
|
|
47
|
-
});
|
|
48
|
-
|
|
49
|
-
it('should transform a tool with JSON schema parameters to OpenAI format', () => {
|
|
50
|
-
// Create a test tool with direct JSON schema
|
|
51
|
-
const tool = {
|
|
52
|
-
id: 'jsonTool',
|
|
53
|
-
description: 'A tool with JSON schema',
|
|
54
|
-
parameters: {
|
|
55
|
-
type: 'object',
|
|
56
|
-
properties: {
|
|
57
|
-
query: { type: 'string' },
|
|
58
|
-
limit: { type: 'integer' },
|
|
59
|
-
},
|
|
60
|
-
required: ['query'],
|
|
61
|
-
},
|
|
62
|
-
execute: async (args: { query: string; limit?: number }) => {
|
|
63
|
-
return `Searched for: ${args.query}`;
|
|
64
|
-
},
|
|
65
|
-
};
|
|
66
|
-
|
|
67
|
-
// Transform the tool
|
|
68
|
-
const transformedTools = transformTools({
|
|
69
|
-
jsonTool: tool,
|
|
70
|
-
});
|
|
71
|
-
|
|
72
|
-
// Assert the transformation results
|
|
73
|
-
expect(transformedTools).toHaveLength(1);
|
|
74
|
-
const { openaiTool } = transformedTools[0];
|
|
75
|
-
|
|
76
|
-
expect(openaiTool).toMatchObject({
|
|
77
|
-
type: 'function',
|
|
78
|
-
name: 'jsonTool',
|
|
79
|
-
description: 'A tool with JSON schema',
|
|
80
|
-
parameters: expect.objectContaining({
|
|
81
|
-
type: 'object',
|
|
82
|
-
properties: expect.objectContaining({
|
|
83
|
-
query: expect.objectContaining({ type: 'string' }),
|
|
84
|
-
limit: expect.objectContaining({ type: 'integer' }),
|
|
85
|
-
}),
|
|
86
|
-
required: ['query'],
|
|
87
|
-
}),
|
|
88
|
-
});
|
|
89
|
-
});
|
|
90
|
-
});
|
|
91
|
-
|
|
92
|
-
describe('Tool Execution Tests', () => {
|
|
93
|
-
it('should create an adapter function for tool execution', async () => {
|
|
94
|
-
// Create a tool that expects context
|
|
95
|
-
const tool = createTool({
|
|
96
|
-
id: 'messageTool',
|
|
97
|
-
description: 'A tool that processes a message',
|
|
98
|
-
inputSchema: z.object({
|
|
99
|
-
message: z.string(),
|
|
100
|
-
}),
|
|
101
|
-
outputSchema: z.string(),
|
|
102
|
-
execute: async ({ context }) => {
|
|
103
|
-
return `Processed: ${context.message}`;
|
|
104
|
-
},
|
|
105
|
-
});
|
|
106
|
-
|
|
107
|
-
// Transform the tool
|
|
108
|
-
const transformedTools = transformTools({
|
|
109
|
-
messageTool: tool,
|
|
110
|
-
});
|
|
111
|
-
|
|
112
|
-
// Execute the transformed tool
|
|
113
|
-
const result = await transformedTools[0].execute({ message: 'Hello' });
|
|
114
|
-
|
|
115
|
-
// Verify the adapter correctly passes the context
|
|
116
|
-
expect(result).toBe('Processed: Hello');
|
|
117
|
-
});
|
|
118
|
-
});
|
|
119
|
-
});
|
package/src/utils.ts
DELETED
|
@@ -1,106 +0,0 @@
|
|
|
1
|
-
import { Readable } from 'stream';
|
|
2
|
-
import type { ToolsInput } from '@mastra/core/agent';
|
|
3
|
-
import { zodToJsonSchema } from 'zod-to-json-schema';
|
|
4
|
-
|
|
5
|
-
export type OpenAIExecuteFunction = (args: any) => Promise<any>;
|
|
6
|
-
type ToolDefinition = {
|
|
7
|
-
type: 'function';
|
|
8
|
-
name: string;
|
|
9
|
-
description: string;
|
|
10
|
-
parameters: {
|
|
11
|
-
[key: string]: any;
|
|
12
|
-
};
|
|
13
|
-
};
|
|
14
|
-
|
|
15
|
-
type TTools = ToolsInput;
|
|
16
|
-
export const transformTools = (tools?: TTools) => {
|
|
17
|
-
const openaiTools: { openaiTool: ToolDefinition; execute: OpenAIExecuteFunction }[] = [];
|
|
18
|
-
for (const [name, tool] of Object.entries(tools || {})) {
|
|
19
|
-
let parameters: { [key: string]: any };
|
|
20
|
-
|
|
21
|
-
if ('inputSchema' in tool && tool.inputSchema) {
|
|
22
|
-
if (isZodObject(tool.inputSchema)) {
|
|
23
|
-
parameters = zodToJsonSchema(tool.inputSchema);
|
|
24
|
-
delete parameters.$schema;
|
|
25
|
-
} else {
|
|
26
|
-
parameters = tool.inputSchema;
|
|
27
|
-
}
|
|
28
|
-
} else if ('parameters' in tool) {
|
|
29
|
-
if (isZodObject(tool.parameters)) {
|
|
30
|
-
parameters = zodToJsonSchema(tool.parameters);
|
|
31
|
-
delete parameters.$schema;
|
|
32
|
-
} else {
|
|
33
|
-
parameters = tool.parameters;
|
|
34
|
-
}
|
|
35
|
-
} else {
|
|
36
|
-
console.warn(`Tool ${name} has neither inputSchema nor parameters, skipping`);
|
|
37
|
-
continue;
|
|
38
|
-
}
|
|
39
|
-
const openaiTool: ToolDefinition = {
|
|
40
|
-
type: 'function',
|
|
41
|
-
name,
|
|
42
|
-
description: tool.description || `Tool: ${name}`,
|
|
43
|
-
parameters,
|
|
44
|
-
};
|
|
45
|
-
|
|
46
|
-
if (tool.execute) {
|
|
47
|
-
// Create an adapter function that works with both ToolAction and VercelTool execute functions
|
|
48
|
-
const executeAdapter = async (args: any) => {
|
|
49
|
-
try {
|
|
50
|
-
if (!tool.execute) {
|
|
51
|
-
throw new Error(`Tool ${name} has no execute function`);
|
|
52
|
-
}
|
|
53
|
-
|
|
54
|
-
// For ToolAction, the first argument is a context object with the args in a 'context' property
|
|
55
|
-
if ('inputSchema' in tool) {
|
|
56
|
-
return await tool.execute(
|
|
57
|
-
{ context: args },
|
|
58
|
-
{
|
|
59
|
-
toolCallId: 'unknown',
|
|
60
|
-
messages: [],
|
|
61
|
-
},
|
|
62
|
-
);
|
|
63
|
-
}
|
|
64
|
-
// For VercelTool, pass args directly
|
|
65
|
-
else {
|
|
66
|
-
// Create a minimal ToolExecutionOptions object with required properties
|
|
67
|
-
const options = {
|
|
68
|
-
toolCallId: 'unknown',
|
|
69
|
-
messages: [],
|
|
70
|
-
};
|
|
71
|
-
return await tool.execute(args, options);
|
|
72
|
-
}
|
|
73
|
-
} catch (error) {
|
|
74
|
-
console.error(`Error executing tool ${name}:`, error);
|
|
75
|
-
throw error;
|
|
76
|
-
}
|
|
77
|
-
};
|
|
78
|
-
openaiTools.push({ openaiTool, execute: executeAdapter });
|
|
79
|
-
} else {
|
|
80
|
-
console.warn(`Tool ${name} has no execute function, skipping`);
|
|
81
|
-
}
|
|
82
|
-
}
|
|
83
|
-
return openaiTools;
|
|
84
|
-
};
|
|
85
|
-
|
|
86
|
-
export const isReadableStream = (obj: unknown) => {
|
|
87
|
-
return (
|
|
88
|
-
obj &&
|
|
89
|
-
obj instanceof Readable &&
|
|
90
|
-
typeof obj.read === 'function' &&
|
|
91
|
-
typeof obj.pipe === 'function' &&
|
|
92
|
-
obj.readable === true
|
|
93
|
-
);
|
|
94
|
-
};
|
|
95
|
-
|
|
96
|
-
function isZodObject(schema: unknown) {
|
|
97
|
-
return (
|
|
98
|
-
!!schema &&
|
|
99
|
-
typeof schema === 'object' &&
|
|
100
|
-
'_def' in schema &&
|
|
101
|
-
schema._def &&
|
|
102
|
-
typeof schema._def === 'object' &&
|
|
103
|
-
'typeName' in schema._def &&
|
|
104
|
-
schema._def.typeName === 'ZodObject'
|
|
105
|
-
);
|
|
106
|
-
}
|
package/tsconfig.build.json
DELETED
package/tsconfig.json
DELETED
package/tsup.config.ts
DELETED
|
@@ -1,17 +0,0 @@
|
|
|
1
|
-
import { generateTypes } from '@internal/types-builder';
|
|
2
|
-
import { defineConfig } from 'tsup';
|
|
3
|
-
|
|
4
|
-
export default defineConfig({
|
|
5
|
-
entry: ['src/index.ts'],
|
|
6
|
-
format: ['esm', 'cjs'],
|
|
7
|
-
clean: true,
|
|
8
|
-
dts: false,
|
|
9
|
-
splitting: true,
|
|
10
|
-
treeshake: {
|
|
11
|
-
preset: 'smallest',
|
|
12
|
-
},
|
|
13
|
-
sourcemap: true,
|
|
14
|
-
onSuccess: async () => {
|
|
15
|
-
await generateTypes(process.cwd());
|
|
16
|
-
},
|
|
17
|
-
});
|