@ai-sdk/openai 4.0.0-beta.7 → 4.0.0-beta.74

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. package/CHANGELOG.md +636 -24
  2. package/README.md +2 -0
  3. package/dist/index.d.ts +240 -44
  4. package/dist/index.js +3345 -1683
  5. package/dist/index.js.map +1 -1
  6. package/dist/internal/index.d.ts +390 -36
  7. package/dist/internal/index.js +2707 -1706
  8. package/dist/internal/index.js.map +1 -1
  9. package/docs/03-openai.mdx +413 -39
  10. package/package.json +16 -17
  11. package/src/chat/convert-openai-chat-usage.ts +1 -1
  12. package/src/chat/convert-to-openai-chat-messages.ts +96 -68
  13. package/src/chat/map-openai-finish-reason.ts +1 -1
  14. package/src/chat/openai-chat-api.ts +6 -2
  15. package/src/chat/{openai-chat-options.ts → openai-chat-language-model-options.ts} +11 -1
  16. package/src/chat/openai-chat-language-model.ts +82 -148
  17. package/src/chat/openai-chat-prepare-tools.ts +3 -3
  18. package/src/completion/convert-openai-completion-usage.ts +1 -1
  19. package/src/completion/convert-to-openai-completion-prompt.ts +1 -2
  20. package/src/completion/map-openai-finish-reason.ts +1 -1
  21. package/src/completion/openai-completion-api.ts +5 -2
  22. package/src/completion/{openai-completion-options.ts → openai-completion-language-model-options.ts} +5 -1
  23. package/src/completion/openai-completion-language-model.ts +53 -17
  24. package/src/embedding/{openai-embedding-options.ts → openai-embedding-model-options.ts} +5 -1
  25. package/src/embedding/openai-embedding-model.ts +22 -5
  26. package/src/files/openai-files-api.ts +17 -0
  27. package/src/files/openai-files-options.ts +22 -0
  28. package/src/files/openai-files.ts +100 -0
  29. package/src/image/openai-image-model-options.ts +123 -0
  30. package/src/image/openai-image-model.ts +62 -83
  31. package/src/index.ts +15 -6
  32. package/src/internal/index.ts +7 -6
  33. package/src/openai-config.ts +7 -7
  34. package/src/openai-language-model-capabilities.ts +5 -4
  35. package/src/openai-provider.ts +80 -9
  36. package/src/openai-stream-error.ts +181 -0
  37. package/src/openai-tools.ts +12 -1
  38. package/src/realtime/index.ts +2 -0
  39. package/src/realtime/openai-realtime-event-mapper.ts +436 -0
  40. package/src/realtime/openai-realtime-model-options.ts +3 -0
  41. package/src/realtime/openai-realtime-model.ts +111 -0
  42. package/src/responses/convert-openai-responses-usage.ts +1 -1
  43. package/src/responses/convert-to-openai-responses-input.ts +345 -90
  44. package/src/responses/map-openai-responses-finish-reason.ts +1 -1
  45. package/src/responses/openai-responses-api.ts +186 -17
  46. package/src/responses/{openai-responses-options.ts → openai-responses-language-model-options.ts} +55 -1
  47. package/src/responses/openai-responses-language-model.ts +330 -52
  48. package/src/responses/openai-responses-prepare-tools.ts +129 -18
  49. package/src/responses/openai-responses-provider-metadata.ts +12 -2
  50. package/src/skills/openai-skills-api.ts +31 -0
  51. package/src/skills/openai-skills.ts +83 -0
  52. package/src/speech/{openai-speech-options.ts → openai-speech-model-options.ts} +5 -1
  53. package/src/speech/openai-speech-model.ts +23 -7
  54. package/src/tool/apply-patch.ts +33 -32
  55. package/src/tool/code-interpreter.ts +40 -41
  56. package/src/tool/custom.ts +2 -8
  57. package/src/tool/file-search.ts +3 -3
  58. package/src/tool/image-generation.ts +2 -2
  59. package/src/tool/local-shell.ts +2 -2
  60. package/src/tool/mcp.ts +3 -3
  61. package/src/tool/shell.ts +9 -4
  62. package/src/tool/tool-search.ts +98 -0
  63. package/src/tool/web-search-preview.ts +2 -2
  64. package/src/tool/web-search.ts +10 -2
  65. package/src/transcription/{openai-transcription-options.ts → openai-transcription-model-options.ts} +5 -1
  66. package/src/transcription/openai-transcription-model.ts +35 -13
  67. package/dist/index.d.mts +0 -1107
  68. package/dist/index.mjs +0 -6509
  69. package/dist/index.mjs.map +0 -1
  70. package/dist/internal/index.d.mts +0 -1137
  71. package/dist/internal/index.mjs +0 -6322
  72. package/dist/internal/index.mjs.map +0 -1
  73. package/src/image/openai-image-options.ts +0 -31
@@ -0,0 +1,436 @@
1
+ import type {
2
+ Experimental_RealtimeModelV4ClientEvent as RealtimeModelV4ClientEvent,
3
+ Experimental_RealtimeModelV4ServerEvent as RealtimeModelV4ServerEvent,
4
+ Experimental_RealtimeModelV4SessionConfig as RealtimeModelV4SessionConfig,
5
+ } from '@ai-sdk/provider';
6
+
7
+ type OpenAIRealtimeWireEvent = {
8
+ type: string;
9
+ session?: { id?: string };
10
+ item?: { id?: string } & Record<string, unknown>;
11
+ response?: { id?: string; status?: string };
12
+ error?: { message?: string; code?: string };
13
+ item_id: string;
14
+ previous_item_id?: string;
15
+ response_id: string;
16
+ transcript?: string;
17
+ delta: string;
18
+ text?: string;
19
+ call_id: string;
20
+ name: string;
21
+ arguments: string;
22
+ message?: string;
23
+ code?: string;
24
+ };
25
+
26
+ /**
27
+ * Parses a raw OpenAI Realtime API server event into a normalized event.
28
+ */
29
+ export function parseOpenAIRealtimeServerEvent(
30
+ raw: unknown,
31
+ ): RealtimeModelV4ServerEvent {
32
+ const event = raw as OpenAIRealtimeWireEvent;
33
+ const type = event.type;
34
+
35
+ switch (type) {
36
+ // ── Session lifecycle ──────────────────────────────────────────
37
+ case 'session.created':
38
+ return {
39
+ type: 'session-created',
40
+ sessionId: event.session?.id,
41
+ raw,
42
+ };
43
+
44
+ case 'session.updated':
45
+ return { type: 'session-updated', raw };
46
+
47
+ // ── Input audio buffer ─────────────────────────────────────────
48
+ case 'input_audio_buffer.speech_started':
49
+ return {
50
+ type: 'speech-started',
51
+ itemId: event.item_id,
52
+ raw,
53
+ };
54
+
55
+ case 'input_audio_buffer.speech_stopped':
56
+ return {
57
+ type: 'speech-stopped',
58
+ itemId: event.item_id,
59
+ raw,
60
+ };
61
+
62
+ case 'input_audio_buffer.committed':
63
+ return {
64
+ type: 'audio-committed',
65
+ itemId: event.item_id,
66
+ previousItemId: event.previous_item_id,
67
+ raw,
68
+ };
69
+
70
+ // ── Conversation items ─────────────────────────────────────────
71
+ case 'conversation.item.added':
72
+ return {
73
+ type: 'conversation-item-added',
74
+ itemId: event.item?.id ?? event.item_id,
75
+ item: event.item,
76
+ raw,
77
+ };
78
+
79
+ case 'conversation.item.input_audio_transcription.completed':
80
+ return {
81
+ type: 'input-transcription-completed',
82
+ itemId: event.item_id,
83
+ transcript: event.transcript ?? '',
84
+ raw,
85
+ };
86
+
87
+ // ── Response lifecycle ──────────────────────────────────────────
88
+ case 'response.created':
89
+ return {
90
+ type: 'response-created',
91
+ responseId: event.response?.id ?? event.response_id,
92
+ raw,
93
+ };
94
+
95
+ case 'response.done':
96
+ return {
97
+ type: 'response-done',
98
+ responseId: event.response?.id ?? event.response_id,
99
+ status: event.response?.status ?? 'completed',
100
+ raw,
101
+ };
102
+
103
+ // ── Output item lifecycle ───────────────────────────────────────
104
+ case 'response.output_item.added':
105
+ return {
106
+ type: 'output-item-added',
107
+ responseId: event.response_id,
108
+ itemId: event.item?.id ?? event.item_id,
109
+ raw,
110
+ };
111
+
112
+ case 'response.output_item.done':
113
+ return {
114
+ type: 'output-item-done',
115
+ responseId: event.response_id,
116
+ itemId: event.item?.id ?? event.item_id,
117
+ raw,
118
+ };
119
+
120
+ case 'response.content_part.added':
121
+ return {
122
+ type: 'content-part-added',
123
+ responseId: event.response_id,
124
+ itemId: event.item_id,
125
+ raw,
126
+ };
127
+
128
+ case 'response.content_part.done':
129
+ return {
130
+ type: 'content-part-done',
131
+ responseId: event.response_id,
132
+ itemId: event.item_id,
133
+ raw,
134
+ };
135
+
136
+ // ── Audio output ────────────────────────────────────────────────
137
+ case 'response.output_audio.delta':
138
+ return {
139
+ type: 'audio-delta',
140
+ responseId: event.response_id,
141
+ itemId: event.item_id,
142
+ delta: event.delta,
143
+ raw,
144
+ };
145
+
146
+ case 'response.output_audio.done':
147
+ return {
148
+ type: 'audio-done',
149
+ responseId: event.response_id,
150
+ itemId: event.item_id,
151
+ raw,
152
+ };
153
+
154
+ // ── Audio transcript output ─────────────────────────────────────
155
+ case 'response.output_audio_transcript.delta':
156
+ return {
157
+ type: 'audio-transcript-delta',
158
+ responseId: event.response_id,
159
+ itemId: event.item_id,
160
+ delta: event.delta,
161
+ raw,
162
+ };
163
+
164
+ case 'response.output_audio_transcript.done':
165
+ return {
166
+ type: 'audio-transcript-done',
167
+ responseId: event.response_id,
168
+ itemId: event.item_id,
169
+ transcript: event.transcript,
170
+ raw,
171
+ };
172
+
173
+ // ── Text output ─────────────────────────────────────────────────
174
+ case 'response.output_text.delta':
175
+ return {
176
+ type: 'text-delta',
177
+ responseId: event.response_id,
178
+ itemId: event.item_id,
179
+ delta: event.delta,
180
+ raw,
181
+ };
182
+
183
+ case 'response.output_text.done':
184
+ return {
185
+ type: 'text-done',
186
+ responseId: event.response_id,
187
+ itemId: event.item_id,
188
+ text: event.text,
189
+ raw,
190
+ };
191
+
192
+ // ── Function calling ────────────────────────────────────────────
193
+ case 'response.function_call_arguments.delta':
194
+ return {
195
+ type: 'function-call-arguments-delta',
196
+ responseId: event.response_id,
197
+ itemId: event.item_id,
198
+ callId: event.call_id,
199
+ delta: event.delta,
200
+ raw,
201
+ };
202
+
203
+ case 'response.function_call_arguments.done':
204
+ return {
205
+ type: 'function-call-arguments-done',
206
+ responseId: event.response_id,
207
+ itemId: event.item_id,
208
+ callId: event.call_id,
209
+ name: event.name,
210
+ arguments: event.arguments,
211
+ raw,
212
+ };
213
+
214
+ // ── Error ───────────────────────────────────────────────────────
215
+ case 'error':
216
+ return {
217
+ type: 'error',
218
+ message: event.error?.message ?? event.message ?? 'Unknown error',
219
+ code: event.error?.code ?? event.code,
220
+ raw,
221
+ };
222
+
223
+ // ── Pass-through ────────────────────────────────────────────────
224
+ default:
225
+ return { type: 'custom', rawType: type, raw };
226
+ }
227
+ }
228
+
229
+ /**
230
+ * Serializes a normalized client event into OpenAI's Realtime API format.
231
+ */
232
+ export function serializeOpenAIRealtimeClientEvent(
233
+ event: RealtimeModelV4ClientEvent,
234
+ modelId: string,
235
+ ): unknown {
236
+ switch (event.type) {
237
+ case 'session-update':
238
+ return {
239
+ type: 'session.update',
240
+ session: buildOpenAISessionConfig(event.config, modelId),
241
+ };
242
+
243
+ case 'input-audio-append':
244
+ return {
245
+ type: 'input_audio_buffer.append',
246
+ audio: event.audio,
247
+ };
248
+
249
+ case 'input-audio-commit':
250
+ return { type: 'input_audio_buffer.commit' };
251
+
252
+ case 'input-audio-clear':
253
+ return { type: 'input_audio_buffer.clear' };
254
+
255
+ case 'conversation-item-create': {
256
+ const item = event.item;
257
+ switch (item.type) {
258
+ case 'text-message':
259
+ return {
260
+ type: 'conversation.item.create',
261
+ item: {
262
+ type: 'message',
263
+ role: item.role,
264
+ content: [{ type: 'input_text', text: item.text }],
265
+ },
266
+ };
267
+ case 'audio-message':
268
+ return {
269
+ type: 'conversation.item.create',
270
+ item: {
271
+ type: 'message',
272
+ role: item.role,
273
+ content: [{ type: 'input_audio', audio: item.audio }],
274
+ },
275
+ };
276
+ case 'function-call-output':
277
+ return {
278
+ type: 'conversation.item.create',
279
+ item: {
280
+ type: 'function_call_output',
281
+ call_id: item.callId,
282
+ output: item.output,
283
+ },
284
+ };
285
+ }
286
+ break;
287
+ }
288
+
289
+ case 'conversation-item-truncate':
290
+ return {
291
+ type: 'conversation.item.truncate',
292
+ item_id: event.itemId,
293
+ content_index: event.contentIndex,
294
+ audio_end_ms: event.audioEndMs,
295
+ };
296
+
297
+ case 'response-create':
298
+ return {
299
+ type: 'response.create',
300
+ ...(event.options != null
301
+ ? {
302
+ response: {
303
+ ...(event.options.modalities != null
304
+ ? { output_modalities: event.options.modalities }
305
+ : {}),
306
+ ...(event.options.instructions != null
307
+ ? { instructions: event.options.instructions }
308
+ : {}),
309
+ ...(event.options.metadata != null
310
+ ? { metadata: event.options.metadata }
311
+ : {}),
312
+ },
313
+ }
314
+ : {}),
315
+ };
316
+
317
+ case 'response-cancel':
318
+ return { type: 'response.cancel' };
319
+ }
320
+ }
321
+
322
+ /**
323
+ * Builds an OpenAI-specific session configuration from a normalized config.
324
+ */
325
+ export function buildOpenAISessionConfig(
326
+ config: RealtimeModelV4SessionConfig,
327
+ modelId: string,
328
+ ): Record<string, unknown> {
329
+ const session: Record<string, unknown> = {
330
+ type: 'realtime',
331
+ model: modelId,
332
+ };
333
+
334
+ if (config.instructions != null) {
335
+ session.instructions = config.instructions;
336
+ }
337
+
338
+ if (config.outputModalities != null) {
339
+ session.output_modalities = config.outputModalities;
340
+ }
341
+
342
+ const audio: Record<string, unknown> = {};
343
+
344
+ if (
345
+ config.inputAudioFormat != null ||
346
+ config.inputAudioTranscription != null ||
347
+ config.turnDetection != null
348
+ ) {
349
+ const input: Record<string, unknown> = {};
350
+
351
+ if (config.inputAudioFormat != null) {
352
+ input.format = {
353
+ type: config.inputAudioFormat.type,
354
+ ...(config.inputAudioFormat.rate != null
355
+ ? { rate: config.inputAudioFormat.rate }
356
+ : {}),
357
+ };
358
+ }
359
+
360
+ if (config.turnDetection != null) {
361
+ if (config.turnDetection.type === 'disabled') {
362
+ input.turn_detection = null;
363
+ } else {
364
+ const td: Record<string, unknown> = {
365
+ type:
366
+ config.turnDetection.type === 'server-vad'
367
+ ? 'server_vad'
368
+ : 'semantic_vad',
369
+ };
370
+ if (config.turnDetection.threshold != null) {
371
+ td.threshold = config.turnDetection.threshold;
372
+ }
373
+ if (config.turnDetection.silenceDurationMs != null) {
374
+ td.silence_duration_ms = config.turnDetection.silenceDurationMs;
375
+ }
376
+ if (config.turnDetection.prefixPaddingMs != null) {
377
+ td.prefix_padding_ms = config.turnDetection.prefixPaddingMs;
378
+ }
379
+ input.turn_detection = td;
380
+ }
381
+ }
382
+
383
+ if (config.inputAudioTranscription != null) {
384
+ input.transcription = {
385
+ model: config.inputAudioTranscription.model ?? 'gpt-realtime-whisper',
386
+ ...(config.inputAudioTranscription.language != null
387
+ ? { language: config.inputAudioTranscription.language }
388
+ : {}),
389
+ ...(config.inputAudioTranscription.prompt != null
390
+ ? { prompt: config.inputAudioTranscription.prompt }
391
+ : {}),
392
+ };
393
+ }
394
+
395
+ audio.input = input;
396
+ }
397
+
398
+ if (config.outputAudioFormat != null || config.voice != null) {
399
+ const output: Record<string, unknown> = {};
400
+
401
+ if (config.outputAudioFormat != null) {
402
+ output.format = {
403
+ type: config.outputAudioFormat.type,
404
+ ...(config.outputAudioFormat.rate != null
405
+ ? { rate: config.outputAudioFormat.rate }
406
+ : {}),
407
+ };
408
+ }
409
+
410
+ if (config.voice != null) {
411
+ output.voice = config.voice;
412
+ }
413
+
414
+ audio.output = output;
415
+ }
416
+
417
+ if (Object.keys(audio).length > 0) {
418
+ session.audio = audio;
419
+ }
420
+
421
+ if (config.tools != null && config.tools.length > 0) {
422
+ session.tools = config.tools.map(tool => ({
423
+ type: tool.type,
424
+ name: tool.name,
425
+ description: tool.description,
426
+ parameters: tool.parameters,
427
+ }));
428
+ session.tool_choice = 'auto';
429
+ }
430
+
431
+ if (config.providerOptions != null) {
432
+ Object.assign(session, config.providerOptions);
433
+ }
434
+
435
+ return session;
436
+ }
@@ -0,0 +1,3 @@
1
+ export type OpenAIRealtimeModelId = string;
2
+
3
+ export type OpenAIRealtimeModelOptions = Record<string, never>;
@@ -0,0 +1,111 @@
1
+ import type {
2
+ Experimental_RealtimeModelV4 as RealtimeModelV4,
3
+ Experimental_RealtimeModelV4ClientEvent as RealtimeModelV4ClientEvent,
4
+ Experimental_RealtimeModelV4ClientSecretOptions as RealtimeModelV4ClientSecretOptions,
5
+ Experimental_RealtimeModelV4ClientSecretResult as RealtimeModelV4ClientSecretResult,
6
+ Experimental_RealtimeModelV4ServerEvent as RealtimeModelV4ServerEvent,
7
+ Experimental_RealtimeModelV4SessionConfig as RealtimeModelV4SessionConfig,
8
+ } from '@ai-sdk/provider';
9
+ import type { FetchFunction } from '@ai-sdk/provider-utils';
10
+ import {
11
+ buildOpenAISessionConfig,
12
+ parseOpenAIRealtimeServerEvent,
13
+ serializeOpenAIRealtimeClientEvent,
14
+ } from './openai-realtime-event-mapper';
15
+
16
+ export type OpenAIRealtimeModelConfig = {
17
+ provider: string;
18
+ baseURL: string;
19
+ headers: () => Record<string, string | undefined>;
20
+ fetch?: FetchFunction;
21
+ };
22
+
23
+ export class OpenAIRealtimeModel implements RealtimeModelV4 {
24
+ readonly specificationVersion = 'v4' as const;
25
+ readonly provider: string;
26
+ readonly modelId: string;
27
+
28
+ private readonly config: OpenAIRealtimeModelConfig;
29
+
30
+ constructor(modelId: string, config: OpenAIRealtimeModelConfig) {
31
+ this.modelId = modelId;
32
+ this.provider = config.provider;
33
+ this.config = config;
34
+ }
35
+
36
+ async doCreateClientSecret(
37
+ options: RealtimeModelV4ClientSecretOptions,
38
+ ): Promise<RealtimeModelV4ClientSecretResult> {
39
+ const fetchFn = this.config.fetch ?? fetch;
40
+ const url = `${this.config.baseURL}/realtime/client_secrets`;
41
+
42
+ const session =
43
+ options.sessionConfig != null
44
+ ? buildOpenAISessionConfig(options.sessionConfig, this.modelId)
45
+ : { type: 'realtime', model: this.modelId };
46
+
47
+ const response = await fetchFn(url, {
48
+ method: 'POST',
49
+ headers: {
50
+ ...this.config.headers(),
51
+ 'Content-Type': 'application/json',
52
+ },
53
+ body: JSON.stringify({
54
+ session,
55
+ ...(options.expiresAfterSeconds != null
56
+ ? {
57
+ // `anchor` is required by the client secrets endpoint; without it
58
+ // the request fails with "Missing required parameter:
59
+ // 'expires_after.anchor'".
60
+ expires_after: {
61
+ anchor: 'created_at',
62
+ seconds: options.expiresAfterSeconds,
63
+ },
64
+ }
65
+ : {}),
66
+ }),
67
+ });
68
+
69
+ if (!response.ok) {
70
+ const text = await response.text();
71
+ throw new Error(
72
+ `OpenAI realtime client secret request failed: ${response.status} ${text}`,
73
+ );
74
+ }
75
+
76
+ const data = (await response.json()) as {
77
+ value: string;
78
+ expires_at?: number;
79
+ };
80
+
81
+ return {
82
+ token: data.value,
83
+ url: `wss://${new URL(this.config.baseURL).host}/v1/realtime?model=${encodeURIComponent(this.modelId)}`,
84
+ expiresAt: data.expires_at,
85
+ };
86
+ }
87
+
88
+ getWebSocketConfig(options: { token: string; url: string }): {
89
+ url: string;
90
+ protocols?: string[];
91
+ } {
92
+ return {
93
+ url: options.url,
94
+ protocols: ['realtime', `openai-insecure-api-key.${options.token}`],
95
+ };
96
+ }
97
+
98
+ parseServerEvent(raw: unknown): RealtimeModelV4ServerEvent {
99
+ return parseOpenAIRealtimeServerEvent(raw);
100
+ }
101
+
102
+ serializeClientEvent(event: RealtimeModelV4ClientEvent): unknown {
103
+ return serializeOpenAIRealtimeClientEvent(event, this.modelId);
104
+ }
105
+
106
+ buildSessionConfig(
107
+ config: RealtimeModelV4SessionConfig,
108
+ ): Record<string, unknown> {
109
+ return buildOpenAISessionConfig(config, this.modelId);
110
+ }
111
+ }
@@ -1,4 +1,4 @@
1
- import { LanguageModelV4Usage } from '@ai-sdk/provider';
1
+ import type { LanguageModelV4Usage } from '@ai-sdk/provider';
2
2
 
3
3
  export type OpenAIResponsesUsage = {
4
4
  input_tokens: number;