@ai-sdk/openai 4.0.0-beta.6 → 4.0.0-beta.74

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. package/CHANGELOG.md +644 -24
  2. package/README.md +2 -0
  3. package/dist/index.d.ts +240 -44
  4. package/dist/index.js +3345 -1683
  5. package/dist/index.js.map +1 -1
  6. package/dist/internal/index.d.ts +390 -36
  7. package/dist/internal/index.js +2707 -1706
  8. package/dist/internal/index.js.map +1 -1
  9. package/docs/03-openai.mdx +413 -39
  10. package/package.json +17 -18
  11. package/src/chat/convert-openai-chat-usage.ts +1 -1
  12. package/src/chat/convert-to-openai-chat-messages.ts +96 -68
  13. package/src/chat/map-openai-finish-reason.ts +1 -1
  14. package/src/chat/openai-chat-api.ts +6 -2
  15. package/src/chat/{openai-chat-options.ts → openai-chat-language-model-options.ts} +11 -1
  16. package/src/chat/openai-chat-language-model.ts +82 -148
  17. package/src/chat/openai-chat-prepare-tools.ts +3 -3
  18. package/src/completion/convert-openai-completion-usage.ts +1 -1
  19. package/src/completion/convert-to-openai-completion-prompt.ts +1 -2
  20. package/src/completion/map-openai-finish-reason.ts +1 -1
  21. package/src/completion/openai-completion-api.ts +5 -2
  22. package/src/completion/{openai-completion-options.ts → openai-completion-language-model-options.ts} +5 -1
  23. package/src/completion/openai-completion-language-model.ts +53 -17
  24. package/src/embedding/{openai-embedding-options.ts → openai-embedding-model-options.ts} +5 -1
  25. package/src/embedding/openai-embedding-model.ts +22 -5
  26. package/src/files/openai-files-api.ts +17 -0
  27. package/src/files/openai-files-options.ts +22 -0
  28. package/src/files/openai-files.ts +100 -0
  29. package/src/image/openai-image-model-options.ts +123 -0
  30. package/src/image/openai-image-model.ts +62 -83
  31. package/src/index.ts +15 -6
  32. package/src/internal/index.ts +7 -6
  33. package/src/openai-config.ts +7 -7
  34. package/src/openai-language-model-capabilities.ts +5 -4
  35. package/src/openai-provider.ts +80 -9
  36. package/src/openai-stream-error.ts +181 -0
  37. package/src/openai-tools.ts +12 -1
  38. package/src/realtime/index.ts +2 -0
  39. package/src/realtime/openai-realtime-event-mapper.ts +436 -0
  40. package/src/realtime/openai-realtime-model-options.ts +3 -0
  41. package/src/realtime/openai-realtime-model.ts +111 -0
  42. package/src/responses/convert-openai-responses-usage.ts +1 -1
  43. package/src/responses/convert-to-openai-responses-input.ts +345 -90
  44. package/src/responses/map-openai-responses-finish-reason.ts +1 -1
  45. package/src/responses/openai-responses-api.ts +186 -17
  46. package/src/responses/{openai-responses-options.ts → openai-responses-language-model-options.ts} +55 -1
  47. package/src/responses/openai-responses-language-model.ts +330 -52
  48. package/src/responses/openai-responses-prepare-tools.ts +129 -18
  49. package/src/responses/openai-responses-provider-metadata.ts +12 -2
  50. package/src/skills/openai-skills-api.ts +31 -0
  51. package/src/skills/openai-skills.ts +83 -0
  52. package/src/speech/{openai-speech-options.ts → openai-speech-model-options.ts} +5 -1
  53. package/src/speech/openai-speech-model.ts +23 -7
  54. package/src/tool/apply-patch.ts +33 -32
  55. package/src/tool/code-interpreter.ts +40 -41
  56. package/src/tool/custom.ts +2 -8
  57. package/src/tool/file-search.ts +3 -3
  58. package/src/tool/image-generation.ts +2 -2
  59. package/src/tool/local-shell.ts +2 -2
  60. package/src/tool/mcp.ts +3 -3
  61. package/src/tool/shell.ts +9 -4
  62. package/src/tool/tool-search.ts +98 -0
  63. package/src/tool/web-search-preview.ts +2 -2
  64. package/src/tool/web-search.ts +10 -2
  65. package/src/transcription/{openai-transcription-options.ts → openai-transcription-model-options.ts} +5 -1
  66. package/src/transcription/openai-transcription-model.ts +35 -13
  67. package/dist/index.d.mts +0 -1107
  68. package/dist/index.mjs +0 -6509
  69. package/dist/index.mjs.map +0 -1
  70. package/dist/internal/index.d.mts +0 -1137
  71. package/dist/internal/index.mjs +0 -6322
  72. package/dist/internal/index.mjs.map +0 -1
  73. package/src/image/openai-image-options.ts +0 -31
@@ -165,6 +165,10 @@ The following provider options are available:
165
165
 
166
166
  Whether to store the generation. Defaults to `true`.
167
167
 
168
+ - **passThroughUnsupportedFiles** _boolean_
169
+
170
+ Whether to pass through non-image file types as generic input files. Defaults to `false`, which restricts inline file inputs to images and PDFs. Enable this when the target OpenAI Responses model supports additional file media types.
171
+
168
172
  - **maxToolCalls** _integer_
169
173
  The maximum number of total calls to built-in tools that can be processed in a response.
170
174
  This maximum number applies across all built-in tool calls, not per individual tool.
@@ -238,7 +242,6 @@ The following provider options are available:
238
242
 
239
243
  - **truncation** _string_
240
244
  The truncation strategy to use for the model response.
241
-
242
245
  - Auto: If the input to this Response exceeds the model's context window size, the model will truncate the response to fit the context window by dropping items from the beginning of the conversation.
243
246
  - disabled (default): If the input size will exceed the context window size for a model, the request will fail with a 400 error.
244
247
 
@@ -257,6 +260,11 @@ The following provider options are available:
257
260
  - **forceReasoning** _boolean_
258
261
  Force treating this model as a reasoning model. This is useful for "stealth" reasoning models (e.g. via a custom baseURL) where the model ID is not recognized by the SDK's allowlist. When enabled, the SDK applies reasoning-model parameter compatibility rules and defaults `systemMessageMode` to `developer` unless overridden.
259
262
 
263
+ - **contextManagement** _Array<object>_
264
+ Enable server-side context management (compaction). When configured, the server automatically compresses conversation context when token usage crosses a specified threshold. Each object in the array should have:
265
+ - `type`: `'compaction'`
266
+ - `compactThreshold`: _number_ — the token count at which compaction is triggered
267
+
260
268
  The OpenAI responses provider also returns provider-specific metadata:
261
269
 
262
270
  For Responses models, you can type this metadata using `OpenaiResponsesProviderMetadata`:
@@ -309,7 +317,7 @@ const result = streamText({
309
317
  },
310
318
  });
311
319
 
312
- for await (const part of result.fullStream) {
320
+ for await (const part of result.stream) {
313
321
  if (part.type === 'reasoning') {
314
322
  console.log(`Reasoning: ${part.textDelta}`);
315
323
  } else if (part.type === 'text-delta') {
@@ -430,6 +438,38 @@ The `textVerbosity` parameter scales output length without changing the underlyi
430
438
  - `'medium'`: Balanced detail (default)
431
439
  - `'high'`: Verbose responses with comprehensive detail
432
440
 
441
+ #### Namespaced Function Calls
442
+
443
+ OpenAI supports grouping related function tools into
444
+ [namespaces](https://developers.openai.com/api/docs/guides/function-calling#defining-namespaces).
445
+ When the Responses API returns a `function_call` with a `namespace`, the OpenAI provider
446
+ exposes this value on the generated `tool-call` part as
447
+ `providerMetadata.openai.namespace`.
448
+
449
+ ```ts
450
+ for (const part of result.content) {
451
+ if (part.type === 'tool-call') {
452
+ console.log(part.providerMetadata?.openai?.namespace);
453
+ }
454
+ }
455
+ ```
456
+
457
+ When using `streamText`, the namespace is available on the `tool-input-end` event and on
458
+ the final `tool-call` event:
459
+
460
+ ```ts
461
+ for await (const part of result.stream) {
462
+ if (part.type === 'tool-input-end' || part.type === 'tool-call') {
463
+ console.log(part.providerMetadata?.openai?.namespace);
464
+ }
465
+ }
466
+ ```
467
+
468
+ If you persist or reconstruct messages for later turns, preserve the OpenAI provider
469
+ metadata on tool-call parts. The SDK uses `providerMetadata.openai.namespace` or
470
+ `providerOptions.openai.namespace` to round-trip the namespace back to OpenAI on
471
+ subsequent requests.
472
+
433
473
  #### Web Search Tool
434
474
 
435
475
  The OpenAI responses API supports web search through the `openai.tools.webSearch` tool.
@@ -601,7 +641,7 @@ const result = streamText({
601
641
  },
602
642
  });
603
643
 
604
- for await (const part of result.fullStream) {
644
+ for await (const part of result.stream) {
605
645
  if (part.type == 'tool-result' && !part.dynamic) {
606
646
  const base64Image = part.output.result;
607
647
  }
@@ -696,7 +736,6 @@ The MCP tool can be configured with:
696
736
  - **allowedTools** _string[] | object_ (optional)
697
737
 
698
738
  Controls which tools from the MCP server are available. Can be:
699
-
700
739
  - An array of tool names: `['tool1', 'tool2']`
701
740
  - An object with filters:
702
741
  ```ts
@@ -717,7 +756,6 @@ The MCP tool can be configured with:
717
756
  - **requireApproval** _'always' | 'never' | object_ (optional)
718
757
 
719
758
  Controls which MCP tool calls require user approval before execution. Can be:
720
-
721
759
  - `'always'`: All MCP tool calls require approval
722
760
  - `'never'`: No MCP tool calls require approval (default)
723
761
  - An object with filters:
@@ -764,7 +802,7 @@ const result = await generateText({
764
802
  }),
765
803
  },
766
804
  prompt: 'List the files in my home directory.',
767
- stopWhen: stepCountIs(2),
805
+ stopWhen: isStepCount(2),
768
806
  });
769
807
  ```
770
808
 
@@ -922,7 +960,7 @@ const result = await generateText({
922
960
  }),
923
961
  },
924
962
  prompt: 'Use the skill to solve this problem.',
925
- stopWhen: stepCountIs(5),
963
+ stopWhen: isStepCount(5),
926
964
  });
927
965
  ```
928
966
 
@@ -937,7 +975,7 @@ enabling iterative, multi-step code editing workflows.
937
975
 
938
976
  ```ts
939
977
  import { openai } from '@ai-sdk/openai';
940
- import { generateText, stepCountIs } from 'ai';
978
+ import { generateText, isStepCount } from 'ai';
941
979
 
942
980
  const result = await generateText({
943
981
  model: openai('gpt-5.1'),
@@ -949,7 +987,7 @@ const result = await generateText({
949
987
  }),
950
988
  },
951
989
  prompt: 'Create a python file that calculates the factorial of a number',
952
- stopWhen: stepCountIs(5),
990
+ stopWhen: isStepCount(5),
953
991
  });
954
992
  ```
955
993
 
@@ -958,6 +996,191 @@ Your execute function must return:
958
996
  - **status** _'completed' | 'failed'_ - Whether the patch was applied successfully
959
997
  - **output** _string_ (optional) - Human-readable log text (e.g., results or error messages)
960
998
 
999
+ #### Tool Search
1000
+
1001
+ Tool search allows the model to dynamically search for and load tools into context as needed,
1002
+ rather than loading all tool definitions up front. This can reduce token usage, cost, and latency
1003
+ when you have many tools. Mark the tools you want to make searchable with `deferLoading: true`
1004
+ in their `providerOptions`.
1005
+
1006
+ There are two execution modes:
1007
+
1008
+ - **Server-executed (hosted):** OpenAI searches across the deferred tools declared in the request and returns the loaded subset in the same response. No extra round-trip is needed.
1009
+ - **Client-executed:** The model emits a `tool_search_call`, your application performs the lookup, and you return the matching tools via the `execute` callback.
1010
+
1011
+ ##### Server-Executed (Hosted) Tool Search
1012
+
1013
+ Use hosted tool search when the candidate tools are already known at request time.
1014
+ Add `openai.tools.toolSearch()` with no arguments and mark your tools with `deferLoading: true`:
1015
+
1016
+ ```ts
1017
+ import { openai } from '@ai-sdk/openai';
1018
+ import { generateText, tool, isStepCount } from 'ai';
1019
+ import { z } from 'zod';
1020
+
1021
+ const result = await generateText({
1022
+ model: openai.responses('gpt-5.4'),
1023
+ prompt: 'What is the weather in San Francisco?',
1024
+ stopWhen: isStepCount(10),
1025
+ tools: {
1026
+ toolSearch: openai.tools.toolSearch(),
1027
+
1028
+ get_weather: tool({
1029
+ description: 'Get the current weather at a specific location',
1030
+ inputSchema: z.object({
1031
+ location: z.string(),
1032
+ unit: z.enum(['celsius', 'fahrenheit']),
1033
+ }),
1034
+ execute: async ({ location, unit }) => ({
1035
+ location,
1036
+ temperature: unit === 'celsius' ? 18 : 64,
1037
+ }),
1038
+ providerOptions: {
1039
+ openai: { deferLoading: true },
1040
+ },
1041
+ }),
1042
+
1043
+ search_files: tool({
1044
+ description: 'Search through files in the workspace',
1045
+ inputSchema: z.object({ query: z.string() }),
1046
+ execute: async ({ query }) => ({
1047
+ results: [`Found 3 files matching "${query}"`],
1048
+ }),
1049
+ providerOptions: {
1050
+ openai: { deferLoading: true },
1051
+ },
1052
+ }),
1053
+ },
1054
+ });
1055
+ ```
1056
+
1057
+ In hosted mode, the model internally searches the deferred tools, loads the relevant ones, and
1058
+ proceeds to call them — all within a single response. The `tool_search_call` and
1059
+ `tool_search_output` items appear in the response with `execution: 'server'` and `call_id: null`.
1060
+
1061
+ ##### Namespaces
1062
+
1063
+ Use `providerOptions.openai.namespace` to group related function tools for OpenAI.
1064
+ The SDK keeps each tool executable as a normal AI SDK tool, but serializes grouped
1065
+ tools as OpenAI `namespace` entries in the request:
1066
+
1067
+ ```ts
1068
+ const crmNamespace = {
1069
+ name: 'crm',
1070
+ description: 'CRM tools for customer lookup and order management.',
1071
+ };
1072
+
1073
+ const result = await generateText({
1074
+ model: openai.responses('gpt-5.4'),
1075
+ prompt: 'List open orders for customer cust_123.',
1076
+ tools: {
1077
+ toolSearch: openai.tools.toolSearch(),
1078
+
1079
+ get_customer_profile: tool({
1080
+ description: 'Fetch a customer profile by customer ID.',
1081
+ inputSchema: z.object({ customer_id: z.string() }),
1082
+ execute: async ({ customer_id }) => ({ customer_id }),
1083
+ providerOptions: {
1084
+ openai: { namespace: crmNamespace },
1085
+ },
1086
+ }),
1087
+
1088
+ list_open_orders: tool({
1089
+ description: 'List open orders for a customer ID.',
1090
+ inputSchema: z.object({ customer_id: z.string() }),
1091
+ execute: async ({ customer_id }) => ({ customer_id, orders: [] }),
1092
+ providerOptions: {
1093
+ openai: {
1094
+ namespace: crmNamespace,
1095
+ deferLoading: true,
1096
+ },
1097
+ },
1098
+ }),
1099
+ },
1100
+ });
1101
+ ```
1102
+
1103
+ Tools in the same namespace must use the same namespace `name` and `description`.
1104
+ For best results with tool search, keep namespace descriptions concise and put
1105
+ detailed usage guidance on the individual function tools.
1106
+
1107
+ ##### Client-Executed Tool Search
1108
+
1109
+ Use client-executed tool search when tool discovery depends on runtime state — for example,
1110
+ tools that vary per tenant, project, or external system. Pass `execution: 'client'` along with
1111
+ a `description`, `parameters` schema, and an `execute` callback:
1112
+
1113
+ ```ts
1114
+ import { openai } from '@ai-sdk/openai';
1115
+ import { generateText, tool, isStepCount } from 'ai';
1116
+ import { z } from 'zod';
1117
+
1118
+ const result = await generateText({
1119
+ model: openai.responses('gpt-5.4'),
1120
+ prompt: 'What is the weather in San Francisco?',
1121
+ stopWhen: isStepCount(10),
1122
+ tools: {
1123
+ toolSearch: openai.tools.toolSearch({
1124
+ execution: 'client',
1125
+ description: 'Search for available tools based on what the user needs.',
1126
+ parameters: {
1127
+ type: 'object',
1128
+ properties: {
1129
+ goal: {
1130
+ type: 'string',
1131
+ description: 'What the user is trying to accomplish',
1132
+ },
1133
+ },
1134
+ required: ['goal'],
1135
+ additionalProperties: false,
1136
+ },
1137
+ execute: async ({ arguments: args }) => {
1138
+ // Your custom tool discovery logic here.
1139
+ // Return the tools that match the search goal.
1140
+ return {
1141
+ tools: [
1142
+ {
1143
+ type: 'function',
1144
+ name: 'get_weather',
1145
+ description: 'Get the current weather at a specific location',
1146
+ deferLoading: true,
1147
+ parameters: {
1148
+ type: 'object',
1149
+ properties: {
1150
+ location: { type: 'string' },
1151
+ },
1152
+ required: ['location'],
1153
+ additionalProperties: false,
1154
+ },
1155
+ },
1156
+ ],
1157
+ };
1158
+ },
1159
+ }),
1160
+
1161
+ get_weather: tool({
1162
+ description: 'Get the current weather at a specific location',
1163
+ inputSchema: z.object({ location: z.string() }),
1164
+ execute: async ({ location }) => ({
1165
+ location,
1166
+ temperature: 64,
1167
+ condition: 'Partly cloudy',
1168
+ }),
1169
+ providerOptions: {
1170
+ openai: { deferLoading: true },
1171
+ },
1172
+ }),
1173
+ },
1174
+ });
1175
+ ```
1176
+
1177
+ In client mode, the flow spans two steps:
1178
+
1179
+ 1. **Step 1:** The model emits a `tool_search_call` with `execution: 'client'` and a non-null `call_id`. The SDK calls your `execute` callback with the search arguments. Your callback returns the discovered tools.
1180
+ 2. **Step 2:** The SDK sends the `tool_search_output` (with the matching `call_id`) back to the model. The model can now call the loaded tools as normal function calls.
1181
+
1182
+ For more details, see the [OpenAI Tool Search documentation](https://platform.openai.com/docs/guides/tools-tool-search).
1183
+
961
1184
  #### Custom Tool
962
1185
 
963
1186
  The OpenAI Responses API supports
@@ -969,13 +1192,12 @@ SQL queries, code snippets, or any output that must match a specific pattern.
969
1192
 
970
1193
  ```ts
971
1194
  import { openai } from '@ai-sdk/openai';
972
- import { generateText, stepCountIs } from 'ai';
1195
+ import { generateText, isStepCount } from 'ai';
973
1196
 
974
1197
  const result = await generateText({
975
1198
  model: openai.responses('gpt-5.2-codex'),
976
1199
  tools: {
977
1200
  write_sql: openai.tools.customTool({
978
- name: 'write_sql',
979
1201
  description: 'Write a SQL SELECT query to answer the user question.',
980
1202
  format: {
981
1203
  type: 'grammar',
@@ -991,7 +1213,7 @@ const result = await generateText({
991
1213
  },
992
1214
  toolChoice: 'required',
993
1215
  prompt: 'Write a SQL query to get all users older than 25.',
994
- stopWhen: stepCountIs(3),
1216
+ stopWhen: isStepCount(3),
995
1217
  });
996
1218
  ```
997
1219
 
@@ -1005,7 +1227,6 @@ const result = streamText({
1005
1227
  model: openai.responses('gpt-5.2-codex'),
1006
1228
  tools: {
1007
1229
  write_sql: openai.tools.customTool({
1008
- name: 'write_sql',
1009
1230
  description: 'Write a SQL SELECT query to answer the user question.',
1010
1231
  format: {
1011
1232
  type: 'grammar',
@@ -1018,7 +1239,7 @@ const result = streamText({
1018
1239
  prompt: 'Write a SQL query to get all users older than 25.',
1019
1240
  });
1020
1241
 
1021
- for await (const chunk of result.fullStream) {
1242
+ for await (const chunk of result.stream) {
1022
1243
  if (chunk.type === 'tool-call') {
1023
1244
  console.log(`Tool: ${chunk.toolName}`);
1024
1245
  console.log(`Input: ${chunk.input}`);
@@ -1028,7 +1249,6 @@ for await (const chunk of result.fullStream) {
1028
1249
 
1029
1250
  The custom tool can be configured with:
1030
1251
 
1031
- - **name** _string_ (required) - The name of the custom tool. Used to identify the tool in tool calls.
1032
1252
  - **description** _string_ (optional) - A description of what the tool does, to help the model understand when to use it.
1033
1253
  - **format** _object_ (optional) - The output format constraint. Omit for unconstrained text output.
1034
1254
  - **type** _'grammar' | 'text'_ - The format type. Use `'grammar'` for constrained output or `'text'` for explicit unconstrained text.
@@ -1053,8 +1273,9 @@ const result = await generateText({
1053
1273
  text: 'Please describe the image.',
1054
1274
  },
1055
1275
  {
1056
- type: 'image',
1057
- image: readFileSync('./data/image.png'),
1276
+ type: 'file',
1277
+ mediaType: 'image',
1278
+ data: readFileSync('./data/image.png'),
1058
1279
  },
1059
1280
  ],
1060
1281
  },
@@ -1069,8 +1290,9 @@ You can also pass a file-id from the OpenAI Files API.
1069
1290
 
1070
1291
  ```ts
1071
1292
  {
1072
- type: 'image',
1073
- image: 'file-8EFBcWHsQxZV7YGezBC1fq'
1293
+ type: 'file',
1294
+ mediaType: 'image',
1295
+ data: 'file-8EFBcWHsQxZV7YGezBC1fq'
1074
1296
  }
1075
1297
  ```
1076
1298
 
@@ -1078,8 +1300,9 @@ You can also pass the URL of an image.
1078
1300
 
1079
1301
  ```ts
1080
1302
  {
1081
- type: 'image',
1082
- image: 'https://sample.edu/image.png',
1303
+ type: 'file',
1304
+ mediaType: 'image',
1305
+ data: 'https://sample.edu/image.png',
1083
1306
  }
1084
1307
  ```
1085
1308
 
@@ -1180,7 +1403,6 @@ This metadata includes the following fields:
1180
1403
  If no annotations are present, this property itself may be omitted (`undefined`).
1181
1404
 
1182
1405
  Each element in `annotations` is a discriminated union with a required `type` field. Supported types include, for example:
1183
-
1184
1406
  - `url_citation`
1185
1407
  - `file_citation`
1186
1408
  - `container_file_citation`
@@ -1375,6 +1597,125 @@ for (const part of result.content) {
1375
1597
  are fields like `filename` that are directly available on the source object.
1376
1598
  </Note>
1377
1599
 
1600
+ #### Compaction
1601
+
1602
+ The OpenAI Responses API supports server-side context compaction. When enabled, the server automatically compresses conversation context when token usage crosses a configured threshold. This is useful for long-running conversations or agent loops where you want to stay within token limits without manually managing context.
1603
+
1604
+ The compaction item returned by the server is opaque and encrypted — it carries forward key prior state and reasoning into the next turn using fewer tokens. The AI SDK handles this automatically: compaction items are returned as text parts with special `providerMetadata`, and when passed back in subsequent requests they are sent as compaction input items.
1605
+
1606
+ ```ts highlight="7-11"
1607
+ import {
1608
+ openai,
1609
+ type OpenAILanguageModelResponsesOptions,
1610
+ } from '@ai-sdk/openai';
1611
+ import { generateText } from 'ai';
1612
+
1613
+ const result = await generateText({
1614
+ model: openai.responses('gpt-5.2'),
1615
+ messages: conversationHistory,
1616
+ providerOptions: {
1617
+ openai: {
1618
+ store: false,
1619
+ contextManagement: [{ type: 'compaction', compactThreshold: 50000 }],
1620
+ } satisfies OpenAILanguageModelResponsesOptions,
1621
+ },
1622
+ });
1623
+ ```
1624
+
1625
+ **Configuration:**
1626
+
1627
+ - **type** — Must be `'compaction'`
1628
+ - **compactThreshold** — The token count at which compaction is triggered. When the rendered input token count crosses this threshold, the server runs a compaction pass before continuing inference.
1629
+
1630
+ <Note>
1631
+ Server-side compaction is ZDR-friendly when you set `store: false` on your
1632
+ requests.
1633
+ </Note>
1634
+
1635
+ ##### Detecting Compaction in Streams
1636
+
1637
+ When using `streamText`, you can detect compaction by checking the `providerMetadata` on `text-start` and `text-end` events:
1638
+
1639
+ ```ts
1640
+ import {
1641
+ openai,
1642
+ type OpenAILanguageModelResponsesOptions,
1643
+ } from '@ai-sdk/openai';
1644
+ import { streamText } from 'ai';
1645
+
1646
+ const result = streamText({
1647
+ model: openai.responses('gpt-5.2'),
1648
+ messages: conversationHistory,
1649
+ providerOptions: {
1650
+ openai: {
1651
+ store: false,
1652
+ contextManagement: [{ type: 'compaction', compactThreshold: 50000 }],
1653
+ } satisfies OpenAILanguageModelResponsesOptions,
1654
+ },
1655
+ });
1656
+
1657
+ for await (const part of result.stream) {
1658
+ switch (part.type) {
1659
+ case 'text-start': {
1660
+ const isCompaction = part.providerMetadata?.openai?.type === 'compaction';
1661
+ if (isCompaction) {
1662
+ // ... your logic
1663
+ }
1664
+ break;
1665
+ }
1666
+ case 'text-end': {
1667
+ const isCompaction = part.providerMetadata?.openai?.type === 'compaction';
1668
+ if (isCompaction) {
1669
+ // ... your logic
1670
+ }
1671
+ break;
1672
+ }
1673
+ case 'text-delta': {
1674
+ process.stdout.write(part.text);
1675
+ break;
1676
+ }
1677
+ }
1678
+ }
1679
+ ```
1680
+
1681
+ ##### Compaction in UI Applications
1682
+
1683
+ When using `useChat` or other UI hooks, compaction items appear as text parts with `providerMetadata`. You can detect and style them differently in your UI:
1684
+
1685
+ ```tsx
1686
+ {
1687
+ message.parts.map((part, index) => {
1688
+ if (part.type === 'text') {
1689
+ const isCompaction =
1690
+ (part.providerMetadata?.openai as { type?: string } | undefined)
1691
+ ?.type === 'compaction';
1692
+
1693
+ if (isCompaction) {
1694
+ return (
1695
+ <div
1696
+ key={index}
1697
+ className="bg-yellow-100 border-l-4 border-yellow-500 p-2"
1698
+ >
1699
+ <span className="font-bold">[Context Compacted]</span>
1700
+ <p className="text-sm text-yellow-700">
1701
+ The server compressed the conversation context to reduce token
1702
+ usage.
1703
+ </p>
1704
+ </div>
1705
+ );
1706
+ }
1707
+ return <div key={index}>{part.text}</div>;
1708
+ }
1709
+ });
1710
+ }
1711
+ ```
1712
+
1713
+ The metadata includes the following fields:
1714
+
1715
+ - **type** — Always `'compaction'`
1716
+ - **itemId** _string_ — The ID of the compaction item in the Responses API
1717
+ - **encryptedContent** _string_ (optional) — The encrypted compaction state. This is automatically sent back to the API when the message is included in subsequent requests.
1718
+
1378
1719
  ### Chat Models
1379
1720
 
1380
1721
  You can create models that call the [OpenAI chat API](https://platform.openai.com/docs/api-reference/chat) using the `.chat()` factory method.
@@ -1514,7 +1855,6 @@ Reasoning models currently only generate text, have several limitations, and are
1514
1855
  They support additional settings and response metadata:
1515
1856
 
1516
1857
  - You can use `providerOptions` to set
1517
-
1518
1858
  - the `reasoningEffort` option (or alternatively the `reasoningEffort` model setting), which determines the amount of reasoning the model performs.
1519
1859
 
1520
1860
  - You can use response `providerMetadata` to access the number of reasoning tokens that the model generated.
@@ -1546,7 +1886,6 @@ console.log('Usage:', {
1546
1886
  </Note>
1547
1887
 
1548
1888
  - You can control how system messages are handled by providerOptions `systemMessageMode`:
1549
-
1550
1889
  - `developer`: treat the prompt as a developer message (default for reasoning models).
1551
1890
  - `system`: keep the system message as a system-level instruction.
1552
1891
  - `remove`: remove the system message from the messages.
@@ -1671,8 +2010,9 @@ const result = await generateText({
1671
2010
  text: 'Please describe the image.',
1672
2011
  },
1673
2012
  {
1674
- type: 'image',
1675
- image: readFileSync('./data/image.png'),
2013
+ type: 'file',
2014
+ mediaType: 'image',
2015
+ data: readFileSync('./data/image.png'),
1676
2016
  },
1677
2017
  ],
1678
2018
  },
@@ -1687,8 +2027,9 @@ You can also pass the URL of an image.
1687
2027
 
1688
2028
  ```ts
1689
2029
  {
1690
- type: 'image',
1691
- image: 'https://sample.edu/image.png',
2030
+ type: 'file',
2031
+ mediaType: 'image',
2032
+ data: 'https://sample.edu/image.png',
1692
2033
  }
1693
2034
  ```
1694
2035
 
@@ -1805,9 +2146,9 @@ const result = await generateText({
1805
2146
  content: [
1806
2147
  { type: 'text', text: 'Describe the image in detail.' },
1807
2148
  {
1808
- type: 'image',
1809
- image:
1810
- 'https://github.com/vercel/ai/blob/main/examples/ai-functions/data/comic-cat.png?raw=true',
2149
+ type: 'file',
2150
+ mediaType: 'image',
2151
+ data: 'https://github.com/vercel/ai/blob/main/examples/ai-functions/data/comic-cat.png?raw=true',
1811
2152
 
1812
2153
  // OpenAI specific options - image detail:
1813
2154
  providerOptions: {
@@ -2041,8 +2382,11 @@ The following optional provider options are available for OpenAI completion mode
2041
2382
 
2042
2383
  | Model | Image Input | Audio Input | Object Generation | Tool Usage |
2043
2384
  | --------------------- | ------------------- | ------------------- | ------------------- | ------------------- |
2385
+ | `gpt-5.5` | <Check size={18} /> | <Cross size={18} /> | <Check size={18} /> | <Check size={18} /> |
2044
2386
  | `gpt-5.4-pro` | <Check size={18} /> | <Cross size={18} /> | <Check size={18} /> | <Check size={18} /> |
2045
2387
  | `gpt-5.4` | <Check size={18} /> | <Cross size={18} /> | <Check size={18} /> | <Check size={18} /> |
2388
+ | `gpt-5.4-mini` | <Check size={18} /> | <Cross size={18} /> | <Check size={18} /> | <Check size={18} /> |
2389
+ | `gpt-5.4-nano` | <Check size={18} /> | <Cross size={18} /> | <Check size={18} /> | <Check size={18} /> |
2046
2390
  | `gpt-5.3-chat-latest` | <Check size={18} /> | <Cross size={18} /> | <Check size={18} /> | <Check size={18} /> |
2047
2391
  | `gpt-5.2-pro` | <Check size={18} /> | <Cross size={18} /> | <Check size={18} /> | <Check size={18} /> |
2048
2392
  | `gpt-5.2-chat-latest` | <Check size={18} /> | <Cross size={18} /> | <Check size={18} /> | <Check size={18} /> |
@@ -2070,6 +2414,31 @@ The following optional provider options are available for OpenAI completion mode
2070
2414
  provider model ID as a string if needed.
2071
2415
  </Note>
2072
2416
 
2417
+ ## Realtime Models
2418
+
2419
+ <Note type="warning">Realtime is an experimental feature.</Note>
2420
+
2421
+ You can create models that call the [OpenAI Realtime API](https://platform.openai.com/docs/guides/realtime)
2422
+ using the `.experimental_realtime()` factory method.
2423
+
2424
+ ```ts
2425
+ import { openai } from '@ai-sdk/openai';
2426
+
2427
+ const model = openai.experimental_realtime('gpt-realtime');
2428
+ ```
2429
+
2430
+ Realtime sessions run in the browser and require a short-lived token created on
2431
+ your server with `openai.experimental_realtime.getToken()`:
2432
+
2433
+ ```ts
2434
+ const token = await openai.experimental_realtime.getToken({
2435
+ model: 'gpt-realtime',
2436
+ });
2437
+ ```
2438
+
2439
+ See [Realtime](/docs/ai-sdk-core/realtime) for the complete setup and tool
2440
+ calling pattern.
2441
+
2073
2442
  ## Embedding Models
2074
2443
 
2075
2444
  You can create models that call the [OpenAI embeddings API](https://platform.openai.com/docs/api-reference/embeddings)
@@ -2175,6 +2544,9 @@ const { images } = await generateImage({
2175
2544
  Remove the background from an image by setting `background` to `transparent`:
2176
2545
 
2177
2546
  ```ts
2547
+ import { openai, type OpenAIImageModelEditOptions } from '@ai-sdk/openai';
2548
+ import { generateImage } from 'ai';
2549
+
2178
2550
  const imageBuffer = readFileSync('./input-image.png');
2179
2551
 
2180
2552
  const { images } = await generateImage({
@@ -2186,8 +2558,8 @@ const { images } = await generateImage({
2186
2558
  providerOptions: {
2187
2559
  openai: {
2188
2560
  background: 'transparent',
2189
- output_format: 'png',
2190
- },
2561
+ outputFormat: 'png',
2562
+ } satisfies OpenAIImageModelEditOptions,
2191
2563
  },
2192
2564
  });
2193
2565
  ```
@@ -2230,11 +2602,14 @@ const { images } = await generateImage({
2230
2602
  You can pass optional `providerOptions` to the image model. These are prone to change by OpenAI and are model dependent. For example, the `gpt-image-1` model supports the `quality` option:
2231
2603
 
2232
2604
  ```ts
2605
+ import { openai, type OpenAIImageModelGenerationOptions } from '@ai-sdk/openai';
2606
+ import { generateImage } from 'ai';
2607
+
2233
2608
  const { image, providerMetadata } = await generateImage({
2234
2609
  model: openai.image('gpt-image-1.5'),
2235
2610
  prompt: 'A salamander at sunrise in a forest pond in the Seychelles.',
2236
2611
  providerOptions: {
2237
- openai: { quality: 'high' },
2612
+ openai: { quality: 'high' } satisfies OpenAIImageModelGenerationOptions,
2238
2613
  },
2239
2614
  });
2240
2615
  ```
@@ -2248,7 +2623,6 @@ is available:
2248
2623
  - **images** _Array&lt;object&gt;_
2249
2624
 
2250
2625
  Array of image-specific metadata. Each image object may contain:
2251
-
2252
2626
  - `revisedPrompt` _string_ - The revised prompt that was actually used to generate the image (OpenAI may modify your prompt for safety or clarity)
2253
2627
  - `created` _number_ - The Unix timestamp (in seconds) of when the image was created
2254
2628
  - `size` _string_ - The size of the generated image. One of `1024x1024`, `1024x1536`, or `1536x1024`
@@ -2272,7 +2646,7 @@ const model = openai.transcription('whisper-1');
2272
2646
  You can also pass additional provider-specific options using the `providerOptions` argument. For example, supplying the input language in ISO-639-1 (e.g. `en`) format will improve accuracy and latency.
2273
2647
 
2274
2648
  ```ts highlight="6"
2275
- import { experimental_transcribe as transcribe } from 'ai';
2649
+ import { transcribe } from 'ai';
2276
2650
  import { openai, type OpenAITranscriptionModelOptions } from '@ai-sdk/openai';
2277
2651
 
2278
2652
  const result = await transcribe({
@@ -2287,7 +2661,7 @@ const result = await transcribe({
2287
2661
  To get word-level timestamps, specify the granularity:
2288
2662
 
2289
2663
  ```ts highlight="8-9"
2290
- import { experimental_transcribe as transcribe } from 'ai';
2664
+ import { transcribe } from 'ai';
2291
2665
  import { openai, type OpenAITranscriptionModelOptions } from '@ai-sdk/openai';
2292
2666
 
2293
2667
  const result = await transcribe({
@@ -2351,7 +2725,7 @@ const model = openai.speech('tts-1');
2351
2725
  The `voice` argument can be set to one of OpenAI's available voices: `alloy`, `ash`, `coral`, `echo`, `fable`, `onyx`, `nova`, `sage`, or `shimmer`.
2352
2726
 
2353
2727
  ```ts highlight="6"
2354
- import { experimental_generateSpeech as generateSpeech } from 'ai';
2728
+ import { generateSpeech } from 'ai';
2355
2729
  import { openai } from '@ai-sdk/openai';
2356
2730
 
2357
2731
  const result = await generateSpeech({
@@ -2364,7 +2738,7 @@ const result = await generateSpeech({
2364
2738
  You can also pass additional provider-specific options using the `providerOptions` argument:
2365
2739
 
2366
2740
  ```ts highlight="7-9"
2367
- import { experimental_generateSpeech as generateSpeech } from 'ai';
2741
+ import { generateSpeech } from 'ai';
2368
2742
  import { openai, type OpenAISpeechModelOptions } from '@ai-sdk/openai';
2369
2743
 
2370
2744
  const result = await generateSpeech({