@ai-sdk/openai 4.0.0-beta.6 → 4.0.0-beta.74
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +644 -24
- package/README.md +2 -0
- package/dist/index.d.ts +240 -44
- package/dist/index.js +3345 -1683
- package/dist/index.js.map +1 -1
- package/dist/internal/index.d.ts +390 -36
- package/dist/internal/index.js +2707 -1706
- package/dist/internal/index.js.map +1 -1
- package/docs/03-openai.mdx +413 -39
- package/package.json +17 -18
- package/src/chat/convert-openai-chat-usage.ts +1 -1
- package/src/chat/convert-to-openai-chat-messages.ts +96 -68
- package/src/chat/map-openai-finish-reason.ts +1 -1
- package/src/chat/openai-chat-api.ts +6 -2
- package/src/chat/{openai-chat-options.ts → openai-chat-language-model-options.ts} +11 -1
- package/src/chat/openai-chat-language-model.ts +82 -148
- package/src/chat/openai-chat-prepare-tools.ts +3 -3
- package/src/completion/convert-openai-completion-usage.ts +1 -1
- package/src/completion/convert-to-openai-completion-prompt.ts +1 -2
- package/src/completion/map-openai-finish-reason.ts +1 -1
- package/src/completion/openai-completion-api.ts +5 -2
- package/src/completion/{openai-completion-options.ts → openai-completion-language-model-options.ts} +5 -1
- package/src/completion/openai-completion-language-model.ts +53 -17
- package/src/embedding/{openai-embedding-options.ts → openai-embedding-model-options.ts} +5 -1
- package/src/embedding/openai-embedding-model.ts +22 -5
- package/src/files/openai-files-api.ts +17 -0
- package/src/files/openai-files-options.ts +22 -0
- package/src/files/openai-files.ts +100 -0
- package/src/image/openai-image-model-options.ts +123 -0
- package/src/image/openai-image-model.ts +62 -83
- package/src/index.ts +15 -6
- package/src/internal/index.ts +7 -6
- package/src/openai-config.ts +7 -7
- package/src/openai-language-model-capabilities.ts +5 -4
- package/src/openai-provider.ts +80 -9
- package/src/openai-stream-error.ts +181 -0
- package/src/openai-tools.ts +12 -1
- package/src/realtime/index.ts +2 -0
- package/src/realtime/openai-realtime-event-mapper.ts +436 -0
- package/src/realtime/openai-realtime-model-options.ts +3 -0
- package/src/realtime/openai-realtime-model.ts +111 -0
- package/src/responses/convert-openai-responses-usage.ts +1 -1
- package/src/responses/convert-to-openai-responses-input.ts +345 -90
- package/src/responses/map-openai-responses-finish-reason.ts +1 -1
- package/src/responses/openai-responses-api.ts +186 -17
- package/src/responses/{openai-responses-options.ts → openai-responses-language-model-options.ts} +55 -1
- package/src/responses/openai-responses-language-model.ts +330 -52
- package/src/responses/openai-responses-prepare-tools.ts +129 -18
- package/src/responses/openai-responses-provider-metadata.ts +12 -2
- package/src/skills/openai-skills-api.ts +31 -0
- package/src/skills/openai-skills.ts +83 -0
- package/src/speech/{openai-speech-options.ts → openai-speech-model-options.ts} +5 -1
- package/src/speech/openai-speech-model.ts +23 -7
- package/src/tool/apply-patch.ts +33 -32
- package/src/tool/code-interpreter.ts +40 -41
- package/src/tool/custom.ts +2 -8
- package/src/tool/file-search.ts +3 -3
- package/src/tool/image-generation.ts +2 -2
- package/src/tool/local-shell.ts +2 -2
- package/src/tool/mcp.ts +3 -3
- package/src/tool/shell.ts +9 -4
- package/src/tool/tool-search.ts +98 -0
- package/src/tool/web-search-preview.ts +2 -2
- package/src/tool/web-search.ts +10 -2
- package/src/transcription/{openai-transcription-options.ts → openai-transcription-model-options.ts} +5 -1
- package/src/transcription/openai-transcription-model.ts +35 -13
- package/dist/index.d.mts +0 -1107
- package/dist/index.mjs +0 -6509
- package/dist/index.mjs.map +0 -1
- package/dist/internal/index.d.mts +0 -1137
- package/dist/internal/index.mjs +0 -6322
- package/dist/internal/index.mjs.map +0 -1
- package/src/image/openai-image-options.ts +0 -31
|
@@ -0,0 +1,436 @@
|
|
|
1
|
+
import type {
|
|
2
|
+
Experimental_RealtimeModelV4ClientEvent as RealtimeModelV4ClientEvent,
|
|
3
|
+
Experimental_RealtimeModelV4ServerEvent as RealtimeModelV4ServerEvent,
|
|
4
|
+
Experimental_RealtimeModelV4SessionConfig as RealtimeModelV4SessionConfig,
|
|
5
|
+
} from '@ai-sdk/provider';
|
|
6
|
+
|
|
7
|
+
type OpenAIRealtimeWireEvent = {
|
|
8
|
+
type: string;
|
|
9
|
+
session?: { id?: string };
|
|
10
|
+
item?: { id?: string } & Record<string, unknown>;
|
|
11
|
+
response?: { id?: string; status?: string };
|
|
12
|
+
error?: { message?: string; code?: string };
|
|
13
|
+
item_id: string;
|
|
14
|
+
previous_item_id?: string;
|
|
15
|
+
response_id: string;
|
|
16
|
+
transcript?: string;
|
|
17
|
+
delta: string;
|
|
18
|
+
text?: string;
|
|
19
|
+
call_id: string;
|
|
20
|
+
name: string;
|
|
21
|
+
arguments: string;
|
|
22
|
+
message?: string;
|
|
23
|
+
code?: string;
|
|
24
|
+
};
|
|
25
|
+
|
|
26
|
+
/**
|
|
27
|
+
* Parses a raw OpenAI Realtime API server event into a normalized event.
|
|
28
|
+
*/
|
|
29
|
+
export function parseOpenAIRealtimeServerEvent(
|
|
30
|
+
raw: unknown,
|
|
31
|
+
): RealtimeModelV4ServerEvent {
|
|
32
|
+
const event = raw as OpenAIRealtimeWireEvent;
|
|
33
|
+
const type = event.type;
|
|
34
|
+
|
|
35
|
+
switch (type) {
|
|
36
|
+
// ── Session lifecycle ──────────────────────────────────────────
|
|
37
|
+
case 'session.created':
|
|
38
|
+
return {
|
|
39
|
+
type: 'session-created',
|
|
40
|
+
sessionId: event.session?.id,
|
|
41
|
+
raw,
|
|
42
|
+
};
|
|
43
|
+
|
|
44
|
+
case 'session.updated':
|
|
45
|
+
return { type: 'session-updated', raw };
|
|
46
|
+
|
|
47
|
+
// ── Input audio buffer ─────────────────────────────────────────
|
|
48
|
+
case 'input_audio_buffer.speech_started':
|
|
49
|
+
return {
|
|
50
|
+
type: 'speech-started',
|
|
51
|
+
itemId: event.item_id,
|
|
52
|
+
raw,
|
|
53
|
+
};
|
|
54
|
+
|
|
55
|
+
case 'input_audio_buffer.speech_stopped':
|
|
56
|
+
return {
|
|
57
|
+
type: 'speech-stopped',
|
|
58
|
+
itemId: event.item_id,
|
|
59
|
+
raw,
|
|
60
|
+
};
|
|
61
|
+
|
|
62
|
+
case 'input_audio_buffer.committed':
|
|
63
|
+
return {
|
|
64
|
+
type: 'audio-committed',
|
|
65
|
+
itemId: event.item_id,
|
|
66
|
+
previousItemId: event.previous_item_id,
|
|
67
|
+
raw,
|
|
68
|
+
};
|
|
69
|
+
|
|
70
|
+
// ── Conversation items ─────────────────────────────────────────
|
|
71
|
+
case 'conversation.item.added':
|
|
72
|
+
return {
|
|
73
|
+
type: 'conversation-item-added',
|
|
74
|
+
itemId: event.item?.id ?? event.item_id,
|
|
75
|
+
item: event.item,
|
|
76
|
+
raw,
|
|
77
|
+
};
|
|
78
|
+
|
|
79
|
+
case 'conversation.item.input_audio_transcription.completed':
|
|
80
|
+
return {
|
|
81
|
+
type: 'input-transcription-completed',
|
|
82
|
+
itemId: event.item_id,
|
|
83
|
+
transcript: event.transcript ?? '',
|
|
84
|
+
raw,
|
|
85
|
+
};
|
|
86
|
+
|
|
87
|
+
// ── Response lifecycle ──────────────────────────────────────────
|
|
88
|
+
case 'response.created':
|
|
89
|
+
return {
|
|
90
|
+
type: 'response-created',
|
|
91
|
+
responseId: event.response?.id ?? event.response_id,
|
|
92
|
+
raw,
|
|
93
|
+
};
|
|
94
|
+
|
|
95
|
+
case 'response.done':
|
|
96
|
+
return {
|
|
97
|
+
type: 'response-done',
|
|
98
|
+
responseId: event.response?.id ?? event.response_id,
|
|
99
|
+
status: event.response?.status ?? 'completed',
|
|
100
|
+
raw,
|
|
101
|
+
};
|
|
102
|
+
|
|
103
|
+
// ── Output item lifecycle ───────────────────────────────────────
|
|
104
|
+
case 'response.output_item.added':
|
|
105
|
+
return {
|
|
106
|
+
type: 'output-item-added',
|
|
107
|
+
responseId: event.response_id,
|
|
108
|
+
itemId: event.item?.id ?? event.item_id,
|
|
109
|
+
raw,
|
|
110
|
+
};
|
|
111
|
+
|
|
112
|
+
case 'response.output_item.done':
|
|
113
|
+
return {
|
|
114
|
+
type: 'output-item-done',
|
|
115
|
+
responseId: event.response_id,
|
|
116
|
+
itemId: event.item?.id ?? event.item_id,
|
|
117
|
+
raw,
|
|
118
|
+
};
|
|
119
|
+
|
|
120
|
+
case 'response.content_part.added':
|
|
121
|
+
return {
|
|
122
|
+
type: 'content-part-added',
|
|
123
|
+
responseId: event.response_id,
|
|
124
|
+
itemId: event.item_id,
|
|
125
|
+
raw,
|
|
126
|
+
};
|
|
127
|
+
|
|
128
|
+
case 'response.content_part.done':
|
|
129
|
+
return {
|
|
130
|
+
type: 'content-part-done',
|
|
131
|
+
responseId: event.response_id,
|
|
132
|
+
itemId: event.item_id,
|
|
133
|
+
raw,
|
|
134
|
+
};
|
|
135
|
+
|
|
136
|
+
// ── Audio output ────────────────────────────────────────────────
|
|
137
|
+
case 'response.output_audio.delta':
|
|
138
|
+
return {
|
|
139
|
+
type: 'audio-delta',
|
|
140
|
+
responseId: event.response_id,
|
|
141
|
+
itemId: event.item_id,
|
|
142
|
+
delta: event.delta,
|
|
143
|
+
raw,
|
|
144
|
+
};
|
|
145
|
+
|
|
146
|
+
case 'response.output_audio.done':
|
|
147
|
+
return {
|
|
148
|
+
type: 'audio-done',
|
|
149
|
+
responseId: event.response_id,
|
|
150
|
+
itemId: event.item_id,
|
|
151
|
+
raw,
|
|
152
|
+
};
|
|
153
|
+
|
|
154
|
+
// ── Audio transcript output ─────────────────────────────────────
|
|
155
|
+
case 'response.output_audio_transcript.delta':
|
|
156
|
+
return {
|
|
157
|
+
type: 'audio-transcript-delta',
|
|
158
|
+
responseId: event.response_id,
|
|
159
|
+
itemId: event.item_id,
|
|
160
|
+
delta: event.delta,
|
|
161
|
+
raw,
|
|
162
|
+
};
|
|
163
|
+
|
|
164
|
+
case 'response.output_audio_transcript.done':
|
|
165
|
+
return {
|
|
166
|
+
type: 'audio-transcript-done',
|
|
167
|
+
responseId: event.response_id,
|
|
168
|
+
itemId: event.item_id,
|
|
169
|
+
transcript: event.transcript,
|
|
170
|
+
raw,
|
|
171
|
+
};
|
|
172
|
+
|
|
173
|
+
// ── Text output ─────────────────────────────────────────────────
|
|
174
|
+
case 'response.output_text.delta':
|
|
175
|
+
return {
|
|
176
|
+
type: 'text-delta',
|
|
177
|
+
responseId: event.response_id,
|
|
178
|
+
itemId: event.item_id,
|
|
179
|
+
delta: event.delta,
|
|
180
|
+
raw,
|
|
181
|
+
};
|
|
182
|
+
|
|
183
|
+
case 'response.output_text.done':
|
|
184
|
+
return {
|
|
185
|
+
type: 'text-done',
|
|
186
|
+
responseId: event.response_id,
|
|
187
|
+
itemId: event.item_id,
|
|
188
|
+
text: event.text,
|
|
189
|
+
raw,
|
|
190
|
+
};
|
|
191
|
+
|
|
192
|
+
// ── Function calling ────────────────────────────────────────────
|
|
193
|
+
case 'response.function_call_arguments.delta':
|
|
194
|
+
return {
|
|
195
|
+
type: 'function-call-arguments-delta',
|
|
196
|
+
responseId: event.response_id,
|
|
197
|
+
itemId: event.item_id,
|
|
198
|
+
callId: event.call_id,
|
|
199
|
+
delta: event.delta,
|
|
200
|
+
raw,
|
|
201
|
+
};
|
|
202
|
+
|
|
203
|
+
case 'response.function_call_arguments.done':
|
|
204
|
+
return {
|
|
205
|
+
type: 'function-call-arguments-done',
|
|
206
|
+
responseId: event.response_id,
|
|
207
|
+
itemId: event.item_id,
|
|
208
|
+
callId: event.call_id,
|
|
209
|
+
name: event.name,
|
|
210
|
+
arguments: event.arguments,
|
|
211
|
+
raw,
|
|
212
|
+
};
|
|
213
|
+
|
|
214
|
+
// ── Error ───────────────────────────────────────────────────────
|
|
215
|
+
case 'error':
|
|
216
|
+
return {
|
|
217
|
+
type: 'error',
|
|
218
|
+
message: event.error?.message ?? event.message ?? 'Unknown error',
|
|
219
|
+
code: event.error?.code ?? event.code,
|
|
220
|
+
raw,
|
|
221
|
+
};
|
|
222
|
+
|
|
223
|
+
// ── Pass-through ────────────────────────────────────────────────
|
|
224
|
+
default:
|
|
225
|
+
return { type: 'custom', rawType: type, raw };
|
|
226
|
+
}
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
/**
|
|
230
|
+
* Serializes a normalized client event into OpenAI's Realtime API format.
|
|
231
|
+
*/
|
|
232
|
+
export function serializeOpenAIRealtimeClientEvent(
|
|
233
|
+
event: RealtimeModelV4ClientEvent,
|
|
234
|
+
modelId: string,
|
|
235
|
+
): unknown {
|
|
236
|
+
switch (event.type) {
|
|
237
|
+
case 'session-update':
|
|
238
|
+
return {
|
|
239
|
+
type: 'session.update',
|
|
240
|
+
session: buildOpenAISessionConfig(event.config, modelId),
|
|
241
|
+
};
|
|
242
|
+
|
|
243
|
+
case 'input-audio-append':
|
|
244
|
+
return {
|
|
245
|
+
type: 'input_audio_buffer.append',
|
|
246
|
+
audio: event.audio,
|
|
247
|
+
};
|
|
248
|
+
|
|
249
|
+
case 'input-audio-commit':
|
|
250
|
+
return { type: 'input_audio_buffer.commit' };
|
|
251
|
+
|
|
252
|
+
case 'input-audio-clear':
|
|
253
|
+
return { type: 'input_audio_buffer.clear' };
|
|
254
|
+
|
|
255
|
+
case 'conversation-item-create': {
|
|
256
|
+
const item = event.item;
|
|
257
|
+
switch (item.type) {
|
|
258
|
+
case 'text-message':
|
|
259
|
+
return {
|
|
260
|
+
type: 'conversation.item.create',
|
|
261
|
+
item: {
|
|
262
|
+
type: 'message',
|
|
263
|
+
role: item.role,
|
|
264
|
+
content: [{ type: 'input_text', text: item.text }],
|
|
265
|
+
},
|
|
266
|
+
};
|
|
267
|
+
case 'audio-message':
|
|
268
|
+
return {
|
|
269
|
+
type: 'conversation.item.create',
|
|
270
|
+
item: {
|
|
271
|
+
type: 'message',
|
|
272
|
+
role: item.role,
|
|
273
|
+
content: [{ type: 'input_audio', audio: item.audio }],
|
|
274
|
+
},
|
|
275
|
+
};
|
|
276
|
+
case 'function-call-output':
|
|
277
|
+
return {
|
|
278
|
+
type: 'conversation.item.create',
|
|
279
|
+
item: {
|
|
280
|
+
type: 'function_call_output',
|
|
281
|
+
call_id: item.callId,
|
|
282
|
+
output: item.output,
|
|
283
|
+
},
|
|
284
|
+
};
|
|
285
|
+
}
|
|
286
|
+
break;
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
case 'conversation-item-truncate':
|
|
290
|
+
return {
|
|
291
|
+
type: 'conversation.item.truncate',
|
|
292
|
+
item_id: event.itemId,
|
|
293
|
+
content_index: event.contentIndex,
|
|
294
|
+
audio_end_ms: event.audioEndMs,
|
|
295
|
+
};
|
|
296
|
+
|
|
297
|
+
case 'response-create':
|
|
298
|
+
return {
|
|
299
|
+
type: 'response.create',
|
|
300
|
+
...(event.options != null
|
|
301
|
+
? {
|
|
302
|
+
response: {
|
|
303
|
+
...(event.options.modalities != null
|
|
304
|
+
? { output_modalities: event.options.modalities }
|
|
305
|
+
: {}),
|
|
306
|
+
...(event.options.instructions != null
|
|
307
|
+
? { instructions: event.options.instructions }
|
|
308
|
+
: {}),
|
|
309
|
+
...(event.options.metadata != null
|
|
310
|
+
? { metadata: event.options.metadata }
|
|
311
|
+
: {}),
|
|
312
|
+
},
|
|
313
|
+
}
|
|
314
|
+
: {}),
|
|
315
|
+
};
|
|
316
|
+
|
|
317
|
+
case 'response-cancel':
|
|
318
|
+
return { type: 'response.cancel' };
|
|
319
|
+
}
|
|
320
|
+
}
|
|
321
|
+
|
|
322
|
+
/**
|
|
323
|
+
* Builds an OpenAI-specific session configuration from a normalized config.
|
|
324
|
+
*/
|
|
325
|
+
export function buildOpenAISessionConfig(
|
|
326
|
+
config: RealtimeModelV4SessionConfig,
|
|
327
|
+
modelId: string,
|
|
328
|
+
): Record<string, unknown> {
|
|
329
|
+
const session: Record<string, unknown> = {
|
|
330
|
+
type: 'realtime',
|
|
331
|
+
model: modelId,
|
|
332
|
+
};
|
|
333
|
+
|
|
334
|
+
if (config.instructions != null) {
|
|
335
|
+
session.instructions = config.instructions;
|
|
336
|
+
}
|
|
337
|
+
|
|
338
|
+
if (config.outputModalities != null) {
|
|
339
|
+
session.output_modalities = config.outputModalities;
|
|
340
|
+
}
|
|
341
|
+
|
|
342
|
+
const audio: Record<string, unknown> = {};
|
|
343
|
+
|
|
344
|
+
if (
|
|
345
|
+
config.inputAudioFormat != null ||
|
|
346
|
+
config.inputAudioTranscription != null ||
|
|
347
|
+
config.turnDetection != null
|
|
348
|
+
) {
|
|
349
|
+
const input: Record<string, unknown> = {};
|
|
350
|
+
|
|
351
|
+
if (config.inputAudioFormat != null) {
|
|
352
|
+
input.format = {
|
|
353
|
+
type: config.inputAudioFormat.type,
|
|
354
|
+
...(config.inputAudioFormat.rate != null
|
|
355
|
+
? { rate: config.inputAudioFormat.rate }
|
|
356
|
+
: {}),
|
|
357
|
+
};
|
|
358
|
+
}
|
|
359
|
+
|
|
360
|
+
if (config.turnDetection != null) {
|
|
361
|
+
if (config.turnDetection.type === 'disabled') {
|
|
362
|
+
input.turn_detection = null;
|
|
363
|
+
} else {
|
|
364
|
+
const td: Record<string, unknown> = {
|
|
365
|
+
type:
|
|
366
|
+
config.turnDetection.type === 'server-vad'
|
|
367
|
+
? 'server_vad'
|
|
368
|
+
: 'semantic_vad',
|
|
369
|
+
};
|
|
370
|
+
if (config.turnDetection.threshold != null) {
|
|
371
|
+
td.threshold = config.turnDetection.threshold;
|
|
372
|
+
}
|
|
373
|
+
if (config.turnDetection.silenceDurationMs != null) {
|
|
374
|
+
td.silence_duration_ms = config.turnDetection.silenceDurationMs;
|
|
375
|
+
}
|
|
376
|
+
if (config.turnDetection.prefixPaddingMs != null) {
|
|
377
|
+
td.prefix_padding_ms = config.turnDetection.prefixPaddingMs;
|
|
378
|
+
}
|
|
379
|
+
input.turn_detection = td;
|
|
380
|
+
}
|
|
381
|
+
}
|
|
382
|
+
|
|
383
|
+
if (config.inputAudioTranscription != null) {
|
|
384
|
+
input.transcription = {
|
|
385
|
+
model: config.inputAudioTranscription.model ?? 'gpt-realtime-whisper',
|
|
386
|
+
...(config.inputAudioTranscription.language != null
|
|
387
|
+
? { language: config.inputAudioTranscription.language }
|
|
388
|
+
: {}),
|
|
389
|
+
...(config.inputAudioTranscription.prompt != null
|
|
390
|
+
? { prompt: config.inputAudioTranscription.prompt }
|
|
391
|
+
: {}),
|
|
392
|
+
};
|
|
393
|
+
}
|
|
394
|
+
|
|
395
|
+
audio.input = input;
|
|
396
|
+
}
|
|
397
|
+
|
|
398
|
+
if (config.outputAudioFormat != null || config.voice != null) {
|
|
399
|
+
const output: Record<string, unknown> = {};
|
|
400
|
+
|
|
401
|
+
if (config.outputAudioFormat != null) {
|
|
402
|
+
output.format = {
|
|
403
|
+
type: config.outputAudioFormat.type,
|
|
404
|
+
...(config.outputAudioFormat.rate != null
|
|
405
|
+
? { rate: config.outputAudioFormat.rate }
|
|
406
|
+
: {}),
|
|
407
|
+
};
|
|
408
|
+
}
|
|
409
|
+
|
|
410
|
+
if (config.voice != null) {
|
|
411
|
+
output.voice = config.voice;
|
|
412
|
+
}
|
|
413
|
+
|
|
414
|
+
audio.output = output;
|
|
415
|
+
}
|
|
416
|
+
|
|
417
|
+
if (Object.keys(audio).length > 0) {
|
|
418
|
+
session.audio = audio;
|
|
419
|
+
}
|
|
420
|
+
|
|
421
|
+
if (config.tools != null && config.tools.length > 0) {
|
|
422
|
+
session.tools = config.tools.map(tool => ({
|
|
423
|
+
type: tool.type,
|
|
424
|
+
name: tool.name,
|
|
425
|
+
description: tool.description,
|
|
426
|
+
parameters: tool.parameters,
|
|
427
|
+
}));
|
|
428
|
+
session.tool_choice = 'auto';
|
|
429
|
+
}
|
|
430
|
+
|
|
431
|
+
if (config.providerOptions != null) {
|
|
432
|
+
Object.assign(session, config.providerOptions);
|
|
433
|
+
}
|
|
434
|
+
|
|
435
|
+
return session;
|
|
436
|
+
}
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
import type {
|
|
2
|
+
Experimental_RealtimeModelV4 as RealtimeModelV4,
|
|
3
|
+
Experimental_RealtimeModelV4ClientEvent as RealtimeModelV4ClientEvent,
|
|
4
|
+
Experimental_RealtimeModelV4ClientSecretOptions as RealtimeModelV4ClientSecretOptions,
|
|
5
|
+
Experimental_RealtimeModelV4ClientSecretResult as RealtimeModelV4ClientSecretResult,
|
|
6
|
+
Experimental_RealtimeModelV4ServerEvent as RealtimeModelV4ServerEvent,
|
|
7
|
+
Experimental_RealtimeModelV4SessionConfig as RealtimeModelV4SessionConfig,
|
|
8
|
+
} from '@ai-sdk/provider';
|
|
9
|
+
import type { FetchFunction } from '@ai-sdk/provider-utils';
|
|
10
|
+
import {
|
|
11
|
+
buildOpenAISessionConfig,
|
|
12
|
+
parseOpenAIRealtimeServerEvent,
|
|
13
|
+
serializeOpenAIRealtimeClientEvent,
|
|
14
|
+
} from './openai-realtime-event-mapper';
|
|
15
|
+
|
|
16
|
+
export type OpenAIRealtimeModelConfig = {
|
|
17
|
+
provider: string;
|
|
18
|
+
baseURL: string;
|
|
19
|
+
headers: () => Record<string, string | undefined>;
|
|
20
|
+
fetch?: FetchFunction;
|
|
21
|
+
};
|
|
22
|
+
|
|
23
|
+
export class OpenAIRealtimeModel implements RealtimeModelV4 {
|
|
24
|
+
readonly specificationVersion = 'v4' as const;
|
|
25
|
+
readonly provider: string;
|
|
26
|
+
readonly modelId: string;
|
|
27
|
+
|
|
28
|
+
private readonly config: OpenAIRealtimeModelConfig;
|
|
29
|
+
|
|
30
|
+
constructor(modelId: string, config: OpenAIRealtimeModelConfig) {
|
|
31
|
+
this.modelId = modelId;
|
|
32
|
+
this.provider = config.provider;
|
|
33
|
+
this.config = config;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
async doCreateClientSecret(
|
|
37
|
+
options: RealtimeModelV4ClientSecretOptions,
|
|
38
|
+
): Promise<RealtimeModelV4ClientSecretResult> {
|
|
39
|
+
const fetchFn = this.config.fetch ?? fetch;
|
|
40
|
+
const url = `${this.config.baseURL}/realtime/client_secrets`;
|
|
41
|
+
|
|
42
|
+
const session =
|
|
43
|
+
options.sessionConfig != null
|
|
44
|
+
? buildOpenAISessionConfig(options.sessionConfig, this.modelId)
|
|
45
|
+
: { type: 'realtime', model: this.modelId };
|
|
46
|
+
|
|
47
|
+
const response = await fetchFn(url, {
|
|
48
|
+
method: 'POST',
|
|
49
|
+
headers: {
|
|
50
|
+
...this.config.headers(),
|
|
51
|
+
'Content-Type': 'application/json',
|
|
52
|
+
},
|
|
53
|
+
body: JSON.stringify({
|
|
54
|
+
session,
|
|
55
|
+
...(options.expiresAfterSeconds != null
|
|
56
|
+
? {
|
|
57
|
+
// `anchor` is required by the client secrets endpoint; without it
|
|
58
|
+
// the request fails with "Missing required parameter:
|
|
59
|
+
// 'expires_after.anchor'".
|
|
60
|
+
expires_after: {
|
|
61
|
+
anchor: 'created_at',
|
|
62
|
+
seconds: options.expiresAfterSeconds,
|
|
63
|
+
},
|
|
64
|
+
}
|
|
65
|
+
: {}),
|
|
66
|
+
}),
|
|
67
|
+
});
|
|
68
|
+
|
|
69
|
+
if (!response.ok) {
|
|
70
|
+
const text = await response.text();
|
|
71
|
+
throw new Error(
|
|
72
|
+
`OpenAI realtime client secret request failed: ${response.status} ${text}`,
|
|
73
|
+
);
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
const data = (await response.json()) as {
|
|
77
|
+
value: string;
|
|
78
|
+
expires_at?: number;
|
|
79
|
+
};
|
|
80
|
+
|
|
81
|
+
return {
|
|
82
|
+
token: data.value,
|
|
83
|
+
url: `wss://${new URL(this.config.baseURL).host}/v1/realtime?model=${encodeURIComponent(this.modelId)}`,
|
|
84
|
+
expiresAt: data.expires_at,
|
|
85
|
+
};
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
getWebSocketConfig(options: { token: string; url: string }): {
|
|
89
|
+
url: string;
|
|
90
|
+
protocols?: string[];
|
|
91
|
+
} {
|
|
92
|
+
return {
|
|
93
|
+
url: options.url,
|
|
94
|
+
protocols: ['realtime', `openai-insecure-api-key.${options.token}`],
|
|
95
|
+
};
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
parseServerEvent(raw: unknown): RealtimeModelV4ServerEvent {
|
|
99
|
+
return parseOpenAIRealtimeServerEvent(raw);
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
serializeClientEvent(event: RealtimeModelV4ClientEvent): unknown {
|
|
103
|
+
return serializeOpenAIRealtimeClientEvent(event, this.modelId);
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
buildSessionConfig(
|
|
107
|
+
config: RealtimeModelV4SessionConfig,
|
|
108
|
+
): Record<string, unknown> {
|
|
109
|
+
return buildOpenAISessionConfig(config, this.modelId);
|
|
110
|
+
}
|
|
111
|
+
}
|