@ai-sdk/openai 2.0.0-canary.6 → 2.0.0-canary.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,39 @@
1
1
  # @ai-sdk/openai
2
2
 
3
+ ## 2.0.0-canary.8
4
+
5
+ ### Patch Changes
6
+
7
+ - 8aa9e20: feat: add speech with experimental_generateSpeech
8
+ - Updated dependencies [5d142ab]
9
+ - Updated dependencies [b6b43c7]
10
+ - Updated dependencies [8aa9e20]
11
+ - Updated dependencies [3795467]
12
+ - @ai-sdk/provider-utils@3.0.0-canary.8
13
+ - @ai-sdk/provider@2.0.0-canary.7
14
+
15
+ ## 2.0.0-canary.7
16
+
17
+ ### Patch Changes
18
+
19
+ - 26735b5: chore(embedding-model): add v2 interface
20
+ - 443d8ec: feat(embedding-model-v2): add response body field
21
+ - fd65bc6: chore(embedding-model-v2): rename rawResponse to response
22
+ - Updated dependencies [26735b5]
23
+ - Updated dependencies [443d8ec]
24
+ - Updated dependencies [14c9410]
25
+ - Updated dependencies [d9c98f4]
26
+ - Updated dependencies [c4a2fec]
27
+ - Updated dependencies [0054544]
28
+ - Updated dependencies [9e9c809]
29
+ - Updated dependencies [32831c6]
30
+ - Updated dependencies [d0f9495]
31
+ - Updated dependencies [fd65bc6]
32
+ - Updated dependencies [393138b]
33
+ - Updated dependencies [7182d14]
34
+ - @ai-sdk/provider@2.0.0-canary.6
35
+ - @ai-sdk/provider-utils@3.0.0-canary.7
36
+
3
37
  ## 2.0.0-canary.6
4
38
 
5
39
  ### Patch Changes
package/dist/index.d.mts CHANGED
@@ -1,4 +1,4 @@
1
- import { LanguageModelV2, ProviderV2, EmbeddingModelV1, ImageModelV1, TranscriptionModelV1 } from '@ai-sdk/provider';
1
+ import { LanguageModelV2, ProviderV2, EmbeddingModelV2, ImageModelV1, TranscriptionModelV1, SpeechModelV1 } from '@ai-sdk/provider';
2
2
  import { FetchFunction } from '@ai-sdk/provider-utils';
3
3
  import { z } from 'zod';
4
4
 
@@ -142,6 +142,8 @@ type OpenAITranscriptionModelId = 'whisper-1' | 'gpt-4o-mini-transcribe' | 'gpt-
142
142
 
143
143
  type OpenAIResponsesModelId = 'o1' | 'o1-2024-12-17' | 'o1-mini' | 'o1-mini-2024-09-12' | 'o1-preview' | 'o1-preview-2024-09-12' | 'o3-mini' | 'o3-mini-2025-01-31' | 'gpt-4o' | 'gpt-4o-2024-05-13' | 'gpt-4o-2024-08-06' | 'gpt-4o-2024-11-20' | 'gpt-4o-mini' | 'gpt-4o-mini-2024-07-18' | 'gpt-4-turbo' | 'gpt-4-turbo-2024-04-09' | 'gpt-4-turbo-preview' | 'gpt-4-0125-preview' | 'gpt-4-1106-preview' | 'gpt-4' | 'gpt-4-0613' | 'gpt-4.5-preview' | 'gpt-4.5-preview-2025-02-27' | 'gpt-3.5-turbo-0125' | 'gpt-3.5-turbo' | 'gpt-3.5-turbo-1106' | 'chatgpt-4o-latest' | (string & {});
144
144
 
145
+ type OpenAISpeechModelId = 'tts-1' | 'tts-1-hd' | 'gpt-4o-mini-tts' | (string & {});
146
+
145
147
  interface OpenAIProvider extends ProviderV2 {
146
148
  (modelId: 'gpt-3.5-turbo-instruct', settings?: OpenAICompletionSettings): OpenAICompletionLanguageModel;
147
149
  (modelId: OpenAIChatModelId, settings?: OpenAIChatSettings): LanguageModelV2;
@@ -165,17 +167,17 @@ interface OpenAIProvider extends ProviderV2 {
165
167
  /**
166
168
  Creates a model for text embeddings.
167
169
  */
168
- embedding(modelId: OpenAIEmbeddingModelId, settings?: OpenAIEmbeddingSettings): EmbeddingModelV1<string>;
170
+ embedding(modelId: OpenAIEmbeddingModelId, settings?: OpenAIEmbeddingSettings): EmbeddingModelV2<string>;
169
171
  /**
170
172
  Creates a model for text embeddings.
171
173
 
172
174
  @deprecated Use `textEmbeddingModel` instead.
173
175
  */
174
- textEmbedding(modelId: OpenAIEmbeddingModelId, settings?: OpenAIEmbeddingSettings): EmbeddingModelV1<string>;
176
+ textEmbedding(modelId: OpenAIEmbeddingModelId, settings?: OpenAIEmbeddingSettings): EmbeddingModelV2<string>;
175
177
  /**
176
178
  Creates a model for text embeddings.
177
179
  */
178
- textEmbeddingModel(modelId: OpenAIEmbeddingModelId, settings?: OpenAIEmbeddingSettings): EmbeddingModelV1<string>;
180
+ textEmbeddingModel(modelId: OpenAIEmbeddingModelId, settings?: OpenAIEmbeddingSettings): EmbeddingModelV2<string>;
179
181
  /**
180
182
  Creates a model for image generation.
181
183
  */
@@ -189,6 +191,10 @@ interface OpenAIProvider extends ProviderV2 {
189
191
  */
190
192
  transcription(modelId: OpenAITranscriptionModelId): TranscriptionModelV1;
191
193
  /**
194
+ Creates a model for speech generation.
195
+ */
196
+ speech(modelId: OpenAISpeechModelId): SpeechModelV1;
197
+ /**
192
198
  OpenAI-specific tools.
193
199
  */
194
200
  tools: typeof openaiTools;
package/dist/index.d.ts CHANGED
@@ -1,4 +1,4 @@
1
- import { LanguageModelV2, ProviderV2, EmbeddingModelV1, ImageModelV1, TranscriptionModelV1 } from '@ai-sdk/provider';
1
+ import { LanguageModelV2, ProviderV2, EmbeddingModelV2, ImageModelV1, TranscriptionModelV1, SpeechModelV1 } from '@ai-sdk/provider';
2
2
  import { FetchFunction } from '@ai-sdk/provider-utils';
3
3
  import { z } from 'zod';
4
4
 
@@ -142,6 +142,8 @@ type OpenAITranscriptionModelId = 'whisper-1' | 'gpt-4o-mini-transcribe' | 'gpt-
142
142
 
143
143
  type OpenAIResponsesModelId = 'o1' | 'o1-2024-12-17' | 'o1-mini' | 'o1-mini-2024-09-12' | 'o1-preview' | 'o1-preview-2024-09-12' | 'o3-mini' | 'o3-mini-2025-01-31' | 'gpt-4o' | 'gpt-4o-2024-05-13' | 'gpt-4o-2024-08-06' | 'gpt-4o-2024-11-20' | 'gpt-4o-mini' | 'gpt-4o-mini-2024-07-18' | 'gpt-4-turbo' | 'gpt-4-turbo-2024-04-09' | 'gpt-4-turbo-preview' | 'gpt-4-0125-preview' | 'gpt-4-1106-preview' | 'gpt-4' | 'gpt-4-0613' | 'gpt-4.5-preview' | 'gpt-4.5-preview-2025-02-27' | 'gpt-3.5-turbo-0125' | 'gpt-3.5-turbo' | 'gpt-3.5-turbo-1106' | 'chatgpt-4o-latest' | (string & {});
144
144
 
145
+ type OpenAISpeechModelId = 'tts-1' | 'tts-1-hd' | 'gpt-4o-mini-tts' | (string & {});
146
+
145
147
  interface OpenAIProvider extends ProviderV2 {
146
148
  (modelId: 'gpt-3.5-turbo-instruct', settings?: OpenAICompletionSettings): OpenAICompletionLanguageModel;
147
149
  (modelId: OpenAIChatModelId, settings?: OpenAIChatSettings): LanguageModelV2;
@@ -165,17 +167,17 @@ interface OpenAIProvider extends ProviderV2 {
165
167
  /**
166
168
  Creates a model for text embeddings.
167
169
  */
168
- embedding(modelId: OpenAIEmbeddingModelId, settings?: OpenAIEmbeddingSettings): EmbeddingModelV1<string>;
170
+ embedding(modelId: OpenAIEmbeddingModelId, settings?: OpenAIEmbeddingSettings): EmbeddingModelV2<string>;
169
171
  /**
170
172
  Creates a model for text embeddings.
171
173
 
172
174
  @deprecated Use `textEmbeddingModel` instead.
173
175
  */
174
- textEmbedding(modelId: OpenAIEmbeddingModelId, settings?: OpenAIEmbeddingSettings): EmbeddingModelV1<string>;
176
+ textEmbedding(modelId: OpenAIEmbeddingModelId, settings?: OpenAIEmbeddingSettings): EmbeddingModelV2<string>;
175
177
  /**
176
178
  Creates a model for text embeddings.
177
179
  */
178
- textEmbeddingModel(modelId: OpenAIEmbeddingModelId, settings?: OpenAIEmbeddingSettings): EmbeddingModelV1<string>;
180
+ textEmbeddingModel(modelId: OpenAIEmbeddingModelId, settings?: OpenAIEmbeddingSettings): EmbeddingModelV2<string>;
179
181
  /**
180
182
  Creates a model for image generation.
181
183
  */
@@ -189,6 +191,10 @@ interface OpenAIProvider extends ProviderV2 {
189
191
  */
190
192
  transcription(modelId: OpenAITranscriptionModelId): TranscriptionModelV1;
191
193
  /**
194
+ Creates a model for speech generation.
195
+ */
196
+ speech(modelId: OpenAISpeechModelId): SpeechModelV1;
197
+ /**
192
198
  OpenAI-specific tools.
193
199
  */
194
200
  tools: typeof openaiTools;
package/dist/index.js CHANGED
@@ -26,7 +26,7 @@ __export(src_exports, {
26
26
  module.exports = __toCommonJS(src_exports);
27
27
 
28
28
  // src/openai-provider.ts
29
- var import_provider_utils9 = require("@ai-sdk/provider-utils");
29
+ var import_provider_utils10 = require("@ai-sdk/provider-utils");
30
30
 
31
31
  // src/openai-chat-language-model.ts
32
32
  var import_provider3 = require("@ai-sdk/provider");
@@ -580,10 +580,23 @@ var OpenAIChatLanguageModel = class {
580
580
  abortSignal: options.abortSignal,
581
581
  fetch: this.config.fetch
582
582
  });
583
- const { messages: rawPrompt, ...rawSettings } = body;
584
583
  const choice = response.choices[0];
585
- const completionTokenDetails = (_a = response.usage) == null ? void 0 : _a.completion_tokens_details;
586
- const promptTokenDetails = (_b = response.usage) == null ? void 0 : _b.prompt_tokens_details;
584
+ const content = [];
585
+ const text = choice.message.content;
586
+ if (text != null && text.length > 0) {
587
+ content.push({ type: "text", text });
588
+ }
589
+ for (const toolCall of (_a = choice.message.tool_calls) != null ? _a : []) {
590
+ content.push({
591
+ type: "tool-call",
592
+ toolCallType: "function",
593
+ toolCallId: (_b = toolCall.id) != null ? _b : (0, import_provider_utils3.generateId)(),
594
+ toolName: toolCall.function.name,
595
+ args: toolCall.function.arguments
596
+ });
597
+ }
598
+ const completionTokenDetails = (_c = response.usage) == null ? void 0 : _c.completion_tokens_details;
599
+ const promptTokenDetails = (_d = response.usage) == null ? void 0 : _d.prompt_tokens_details;
587
600
  const providerMetadata = { openai: {} };
588
601
  if ((completionTokenDetails == null ? void 0 : completionTokenDetails.reasoning_tokens) != null) {
589
602
  providerMetadata.openai.reasoningTokens = completionTokenDetails == null ? void 0 : completionTokenDetails.reasoning_tokens;
@@ -598,16 +611,7 @@ var OpenAIChatLanguageModel = class {
598
611
  providerMetadata.openai.cachedPromptTokens = promptTokenDetails == null ? void 0 : promptTokenDetails.cached_tokens;
599
612
  }
600
613
  return {
601
- text: (_c = choice.message.content) != null ? _c : void 0,
602
- toolCalls: (_d = choice.message.tool_calls) == null ? void 0 : _d.map((toolCall) => {
603
- var _a2;
604
- return {
605
- toolCallType: "function",
606
- toolCallId: (_a2 = toolCall.id) != null ? _a2 : (0, import_provider_utils3.generateId)(),
607
- toolName: toolCall.function.name,
608
- args: toolCall.function.arguments
609
- };
610
- }),
614
+ content,
611
615
  finishReason: mapOpenAIFinishReason(choice.finish_reason),
612
616
  usage: {
613
617
  inputTokens: (_f = (_e = response.usage) == null ? void 0 : _e.prompt_tokens) != null ? _f : void 0,
@@ -659,6 +663,9 @@ var OpenAIChatLanguageModel = class {
659
663
  return {
660
664
  stream: response.pipeThrough(
661
665
  new TransformStream({
666
+ start(controller) {
667
+ controller.enqueue({ type: "stream-start", warnings });
668
+ },
662
669
  transform(chunk, controller) {
663
670
  var _a, _b, _c, _d, _e, _f, _g, _h, _i, _j, _k, _l;
664
671
  if (!chunk.success) {
@@ -711,8 +718,8 @@ var OpenAIChatLanguageModel = class {
711
718
  const delta = choice.delta;
712
719
  if (delta.content != null) {
713
720
  controller.enqueue({
714
- type: "text-delta",
715
- textDelta: delta.content
721
+ type: "text",
722
+ text: delta.content
716
723
  });
717
724
  }
718
725
  const mappedLogprobs = mapOpenAIChatLogProbsOutput(
@@ -816,8 +823,7 @@ var OpenAIChatLanguageModel = class {
816
823
  })
817
824
  ),
818
825
  request: { body },
819
- response: { headers: responseHeaders },
820
- warnings
826
+ response: { headers: responseHeaders }
821
827
  };
822
828
  }
823
829
  };
@@ -1141,7 +1147,7 @@ var OpenAICompletionLanguageModel = class {
1141
1147
  });
1142
1148
  const choice = response.choices[0];
1143
1149
  return {
1144
- text: choice.text,
1150
+ content: [{ type: "text", text: choice.text }],
1145
1151
  usage: {
1146
1152
  inputTokens: response.usage.prompt_tokens,
1147
1153
  outputTokens: response.usage.completion_tokens
@@ -1189,6 +1195,9 @@ var OpenAICompletionLanguageModel = class {
1189
1195
  return {
1190
1196
  stream: response.pipeThrough(
1191
1197
  new TransformStream({
1198
+ start(controller) {
1199
+ controller.enqueue({ type: "stream-start", warnings });
1200
+ },
1192
1201
  transform(chunk, controller) {
1193
1202
  if (!chunk.success) {
1194
1203
  finishReason = "error";
@@ -1218,8 +1227,8 @@ var OpenAICompletionLanguageModel = class {
1218
1227
  }
1219
1228
  if ((choice == null ? void 0 : choice.text) != null) {
1220
1229
  controller.enqueue({
1221
- type: "text-delta",
1222
- textDelta: choice.text
1230
+ type: "text",
1231
+ text: choice.text
1223
1232
  });
1224
1233
  }
1225
1234
  const mappedLogprobs = mapOpenAICompletionLogProbs(
@@ -1240,9 +1249,8 @@ var OpenAICompletionLanguageModel = class {
1240
1249
  }
1241
1250
  })
1242
1251
  ),
1243
- response: { headers: responseHeaders },
1244
- warnings,
1245
- request: { body: JSON.stringify(body) }
1252
+ request: { body },
1253
+ response: { headers: responseHeaders }
1246
1254
  };
1247
1255
  }
1248
1256
  };
@@ -1297,7 +1305,7 @@ var import_provider_utils5 = require("@ai-sdk/provider-utils");
1297
1305
  var import_zod5 = require("zod");
1298
1306
  var OpenAIEmbeddingModel = class {
1299
1307
  constructor(modelId, settings, config) {
1300
- this.specificationVersion = "v1";
1308
+ this.specificationVersion = "v2";
1301
1309
  this.modelId = modelId;
1302
1310
  this.settings = settings;
1303
1311
  this.config = config;
@@ -1326,7 +1334,11 @@ var OpenAIEmbeddingModel = class {
1326
1334
  values
1327
1335
  });
1328
1336
  }
1329
- const { responseHeaders, value: response } = await (0, import_provider_utils5.postJsonToApi)({
1337
+ const {
1338
+ responseHeaders,
1339
+ value: response,
1340
+ rawValue
1341
+ } = await (0, import_provider_utils5.postJsonToApi)({
1330
1342
  url: this.config.url({
1331
1343
  path: "/embeddings",
1332
1344
  modelId: this.modelId
@@ -1349,7 +1361,7 @@ var OpenAIEmbeddingModel = class {
1349
1361
  return {
1350
1362
  embeddings: response.data.map((item) => item.embedding),
1351
1363
  usage: response.usage ? { tokens: response.usage.prompt_tokens } : void 0,
1352
- rawResponse: { headers: responseHeaders }
1364
+ response: { headers: responseHeaders, body: rawValue }
1353
1365
  };
1354
1366
  }
1355
1367
  };
@@ -1466,7 +1478,7 @@ var openaiTools = {
1466
1478
  // src/openai-transcription-model.ts
1467
1479
  var import_provider_utils7 = require("@ai-sdk/provider-utils");
1468
1480
  var import_zod8 = require("zod");
1469
- var OpenAIProviderOptionsSchema = import_zod8.z.object({
1481
+ var openAIProviderOptionsSchema = import_zod8.z.object({
1470
1482
  include: import_zod8.z.array(import_zod8.z.string()).nullish(),
1471
1483
  language: import_zod8.z.string().nullish(),
1472
1484
  prompt: import_zod8.z.string().nullish(),
@@ -1551,7 +1563,7 @@ var OpenAITranscriptionModel = class {
1551
1563
  const openAIOptions = (0, import_provider_utils7.parseProviderOptions)({
1552
1564
  provider: "openai",
1553
1565
  providerOptions,
1554
- schema: OpenAIProviderOptionsSchema
1566
+ schema: openAIProviderOptionsSchema
1555
1567
  });
1556
1568
  const formData = new FormData();
1557
1569
  const blob = audio instanceof Uint8Array ? new Blob([audio]) : new Blob([(0, import_provider_utils7.convertBase64ToUint8Array)(audio)]);
@@ -1969,7 +1981,7 @@ var OpenAIResponsesLanguageModel = class {
1969
1981
  };
1970
1982
  }
1971
1983
  async doGenerate(options) {
1972
- var _a, _b, _c, _d, _e;
1984
+ var _a, _b, _c, _d, _e, _f, _g, _h;
1973
1985
  const { args: body, warnings } = this.getArgs(options);
1974
1986
  const {
1975
1987
  responseHeaders,
@@ -2033,31 +2045,45 @@ var OpenAIResponsesLanguageModel = class {
2033
2045
  abortSignal: options.abortSignal,
2034
2046
  fetch: this.config.fetch
2035
2047
  });
2036
- const outputTextElements = response.output.filter((output) => output.type === "message").flatMap((output) => output.content).filter((content) => content.type === "output_text");
2037
- const toolCalls = response.output.filter((output) => output.type === "function_call").map((output) => ({
2038
- toolCallType: "function",
2039
- toolCallId: output.call_id,
2040
- toolName: output.name,
2041
- args: output.arguments
2042
- }));
2048
+ const content = [];
2049
+ for (const part of response.output) {
2050
+ switch (part.type) {
2051
+ case "message": {
2052
+ for (const contentPart of part.content) {
2053
+ content.push({
2054
+ type: "text",
2055
+ text: contentPart.text
2056
+ });
2057
+ for (const annotation of contentPart.annotations) {
2058
+ content.push({
2059
+ type: "source",
2060
+ sourceType: "url",
2061
+ id: (_c = (_b = (_a = this.config).generateId) == null ? void 0 : _b.call(_a)) != null ? _c : (0, import_provider_utils8.generateId)(),
2062
+ url: annotation.url,
2063
+ title: annotation.title
2064
+ });
2065
+ }
2066
+ }
2067
+ break;
2068
+ }
2069
+ case "function_call": {
2070
+ content.push({
2071
+ type: "tool-call",
2072
+ toolCallType: "function",
2073
+ toolCallId: part.call_id,
2074
+ toolName: part.name,
2075
+ args: part.arguments
2076
+ });
2077
+ break;
2078
+ }
2079
+ }
2080
+ }
2043
2081
  return {
2044
- text: outputTextElements.map((content) => content.text).join("\n"),
2045
- sources: outputTextElements.flatMap(
2046
- (content) => content.annotations.map((annotation) => {
2047
- var _a2, _b2, _c2;
2048
- return {
2049
- sourceType: "url",
2050
- id: (_c2 = (_b2 = (_a2 = this.config).generateId) == null ? void 0 : _b2.call(_a2)) != null ? _c2 : (0, import_provider_utils8.generateId)(),
2051
- url: annotation.url,
2052
- title: annotation.title
2053
- };
2054
- })
2055
- ),
2082
+ content,
2056
2083
  finishReason: mapOpenAIResponseFinishReason({
2057
- finishReason: (_a = response.incomplete_details) == null ? void 0 : _a.reason,
2058
- hasToolCalls: toolCalls.length > 0
2084
+ finishReason: (_d = response.incomplete_details) == null ? void 0 : _d.reason,
2085
+ hasToolCalls: content.some((part) => part.type === "tool-call")
2059
2086
  }),
2060
- toolCalls: toolCalls.length > 0 ? toolCalls : void 0,
2061
2087
  usage: {
2062
2088
  inputTokens: response.usage.input_tokens,
2063
2089
  outputTokens: response.usage.output_tokens
@@ -2073,8 +2099,8 @@ var OpenAIResponsesLanguageModel = class {
2073
2099
  providerMetadata: {
2074
2100
  openai: {
2075
2101
  responseId: response.id,
2076
- cachedPromptTokens: (_c = (_b = response.usage.input_tokens_details) == null ? void 0 : _b.cached_tokens) != null ? _c : null,
2077
- reasoningTokens: (_e = (_d = response.usage.output_tokens_details) == null ? void 0 : _d.reasoning_tokens) != null ? _e : null
2102
+ cachedPromptTokens: (_f = (_e = response.usage.input_tokens_details) == null ? void 0 : _e.cached_tokens) != null ? _f : null,
2103
+ reasoningTokens: (_h = (_g = response.usage.output_tokens_details) == null ? void 0 : _g.reasoning_tokens) != null ? _h : null
2078
2104
  }
2079
2105
  },
2080
2106
  warnings
@@ -2113,6 +2139,9 @@ var OpenAIResponsesLanguageModel = class {
2113
2139
  return {
2114
2140
  stream: response.pipeThrough(
2115
2141
  new TransformStream({
2142
+ start(controller) {
2143
+ controller.enqueue({ type: "stream-start", warnings });
2144
+ },
2116
2145
  transform(chunk, controller) {
2117
2146
  var _a, _b, _c, _d, _e, _f, _g, _h;
2118
2147
  if (!chunk.success) {
@@ -2156,8 +2185,8 @@ var OpenAIResponsesLanguageModel = class {
2156
2185
  });
2157
2186
  } else if (isTextDeltaChunk(value)) {
2158
2187
  controller.enqueue({
2159
- type: "text-delta",
2160
- textDelta: value.delta
2188
+ type: "text",
2189
+ text: value.delta
2161
2190
  });
2162
2191
  } else if (isResponseOutputItemDoneChunk(value) && value.item.type === "function_call") {
2163
2192
  ongoingToolCalls[value.output_index] = void 0;
@@ -2181,12 +2210,10 @@ var OpenAIResponsesLanguageModel = class {
2181
2210
  } else if (isResponseAnnotationAddedChunk(value)) {
2182
2211
  controller.enqueue({
2183
2212
  type: "source",
2184
- source: {
2185
- sourceType: "url",
2186
- id: (_h = (_g = (_f = self.config).generateId) == null ? void 0 : _g.call(_f)) != null ? _h : (0, import_provider_utils8.generateId)(),
2187
- url: value.annotation.url,
2188
- title: value.annotation.title
2189
- }
2213
+ sourceType: "url",
2214
+ id: (_h = (_g = (_f = self.config).generateId) == null ? void 0 : _g.call(_f)) != null ? _h : (0, import_provider_utils8.generateId)(),
2215
+ url: value.annotation.url,
2216
+ title: value.annotation.title
2190
2217
  });
2191
2218
  }
2192
2219
  },
@@ -2209,8 +2236,7 @@ var OpenAIResponsesLanguageModel = class {
2209
2236
  })
2210
2237
  ),
2211
2238
  request: { body },
2212
- response: { headers: responseHeaders },
2213
- warnings
2239
+ response: { headers: responseHeaders }
2214
2240
  };
2215
2241
  }
2216
2242
  };
@@ -2350,14 +2376,113 @@ var openaiResponsesProviderOptionsSchema = import_zod9.z.object({
2350
2376
  instructions: import_zod9.z.string().nullish()
2351
2377
  });
2352
2378
 
2379
+ // src/openai-speech-model.ts
2380
+ var import_provider_utils9 = require("@ai-sdk/provider-utils");
2381
+ var import_zod10 = require("zod");
2382
+ var OpenAIProviderOptionsSchema = import_zod10.z.object({
2383
+ instructions: import_zod10.z.string().nullish(),
2384
+ speed: import_zod10.z.number().min(0.25).max(4).default(1).nullish()
2385
+ });
2386
+ var OpenAISpeechModel = class {
2387
+ constructor(modelId, config) {
2388
+ this.modelId = modelId;
2389
+ this.config = config;
2390
+ this.specificationVersion = "v1";
2391
+ }
2392
+ get provider() {
2393
+ return this.config.provider;
2394
+ }
2395
+ getArgs({
2396
+ text,
2397
+ voice = "alloy",
2398
+ outputFormat = "mp3",
2399
+ speed,
2400
+ instructions,
2401
+ providerOptions
2402
+ }) {
2403
+ const warnings = [];
2404
+ const openAIOptions = (0, import_provider_utils9.parseProviderOptions)({
2405
+ provider: "openai",
2406
+ providerOptions,
2407
+ schema: OpenAIProviderOptionsSchema
2408
+ });
2409
+ const requestBody = {
2410
+ model: this.modelId,
2411
+ input: text,
2412
+ voice,
2413
+ response_format: "mp3",
2414
+ speed,
2415
+ instructions
2416
+ };
2417
+ if (outputFormat) {
2418
+ if (["mp3", "opus", "aac", "flac", "wav", "pcm"].includes(outputFormat)) {
2419
+ requestBody.response_format = outputFormat;
2420
+ } else {
2421
+ warnings.push({
2422
+ type: "unsupported-setting",
2423
+ setting: "outputFormat",
2424
+ details: `Unsupported output format: ${outputFormat}. Using mp3 instead.`
2425
+ });
2426
+ }
2427
+ }
2428
+ if (openAIOptions) {
2429
+ const speechModelOptions = {};
2430
+ for (const key in speechModelOptions) {
2431
+ const value = speechModelOptions[key];
2432
+ if (value !== void 0) {
2433
+ requestBody[key] = value;
2434
+ }
2435
+ }
2436
+ }
2437
+ return {
2438
+ requestBody,
2439
+ warnings
2440
+ };
2441
+ }
2442
+ async doGenerate(options) {
2443
+ var _a, _b, _c;
2444
+ const currentDate = (_c = (_b = (_a = this.config._internal) == null ? void 0 : _a.currentDate) == null ? void 0 : _b.call(_a)) != null ? _c : /* @__PURE__ */ new Date();
2445
+ const { requestBody, warnings } = this.getArgs(options);
2446
+ const {
2447
+ value: audio,
2448
+ responseHeaders,
2449
+ rawValue: rawResponse
2450
+ } = await (0, import_provider_utils9.postJsonToApi)({
2451
+ url: this.config.url({
2452
+ path: "/audio/speech",
2453
+ modelId: this.modelId
2454
+ }),
2455
+ headers: (0, import_provider_utils9.combineHeaders)(this.config.headers(), options.headers),
2456
+ body: requestBody,
2457
+ failedResponseHandler: openaiFailedResponseHandler,
2458
+ successfulResponseHandler: (0, import_provider_utils9.createBinaryResponseHandler)(),
2459
+ abortSignal: options.abortSignal,
2460
+ fetch: this.config.fetch
2461
+ });
2462
+ return {
2463
+ audio,
2464
+ warnings,
2465
+ request: {
2466
+ body: JSON.stringify(requestBody)
2467
+ },
2468
+ response: {
2469
+ timestamp: currentDate,
2470
+ modelId: this.modelId,
2471
+ headers: responseHeaders,
2472
+ body: rawResponse
2473
+ }
2474
+ };
2475
+ }
2476
+ };
2477
+
2353
2478
  // src/openai-provider.ts
2354
2479
  function createOpenAI(options = {}) {
2355
2480
  var _a, _b, _c;
2356
- const baseURL = (_a = (0, import_provider_utils9.withoutTrailingSlash)(options.baseURL)) != null ? _a : "https://api.openai.com/v1";
2481
+ const baseURL = (_a = (0, import_provider_utils10.withoutTrailingSlash)(options.baseURL)) != null ? _a : "https://api.openai.com/v1";
2357
2482
  const compatibility = (_b = options.compatibility) != null ? _b : "compatible";
2358
2483
  const providerName = (_c = options.name) != null ? _c : "openai";
2359
2484
  const getHeaders = () => ({
2360
- Authorization: `Bearer ${(0, import_provider_utils9.loadApiKey)({
2485
+ Authorization: `Bearer ${(0, import_provider_utils10.loadApiKey)({
2361
2486
  apiKey: options.apiKey,
2362
2487
  environmentVariableName: "OPENAI_API_KEY",
2363
2488
  description: "OpenAI"
@@ -2398,6 +2523,12 @@ function createOpenAI(options = {}) {
2398
2523
  headers: getHeaders,
2399
2524
  fetch: options.fetch
2400
2525
  });
2526
+ const createSpeechModel = (modelId) => new OpenAISpeechModel(modelId, {
2527
+ provider: `${providerName}.speech`,
2528
+ url: ({ path }) => `${baseURL}${path}`,
2529
+ headers: getHeaders,
2530
+ fetch: options.fetch
2531
+ });
2401
2532
  const createLanguageModel = (modelId, settings) => {
2402
2533
  if (new.target) {
2403
2534
  throw new Error(
@@ -2434,6 +2565,8 @@ function createOpenAI(options = {}) {
2434
2565
  provider.imageModel = createImageModel;
2435
2566
  provider.transcription = createTranscriptionModel;
2436
2567
  provider.transcriptionModel = createTranscriptionModel;
2568
+ provider.speech = createSpeechModel;
2569
+ provider.speechModel = createSpeechModel;
2437
2570
  provider.tools = openaiTools;
2438
2571
  return provider;
2439
2572
  }