@ai-sdk/openai 2.0.0-canary.7 → 2.0.0-canary.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,26 @@
1
1
  # @ai-sdk/openai
2
2
 
3
+ ## 2.0.0-canary.9
4
+
5
+ ### Patch Changes
6
+
7
+ - d63bcbc: feat (provider/openai): o4 updates for responses api
8
+ - d2af019: feat (providers/openai): add gpt-4.1 models
9
+ - 870c5c0: feat (providers/openai): add o3 and o4-mini models
10
+ - 06bac05: fix (openai): structure output for responses model
11
+
12
+ ## 2.0.0-canary.8
13
+
14
+ ### Patch Changes
15
+
16
+ - 8aa9e20: feat: add speech with experimental_generateSpeech
17
+ - Updated dependencies [5d142ab]
18
+ - Updated dependencies [b6b43c7]
19
+ - Updated dependencies [8aa9e20]
20
+ - Updated dependencies [3795467]
21
+ - @ai-sdk/provider-utils@3.0.0-canary.8
22
+ - @ai-sdk/provider@2.0.0-canary.7
23
+
3
24
  ## 2.0.0-canary.7
4
25
 
5
26
  ### Patch Changes
package/dist/index.d.mts CHANGED
@@ -1,8 +1,8 @@
1
- import { LanguageModelV2, ProviderV2, EmbeddingModelV2, ImageModelV1, TranscriptionModelV1 } from '@ai-sdk/provider';
1
+ import { LanguageModelV2, ProviderV2, EmbeddingModelV2, ImageModelV1, TranscriptionModelV1, SpeechModelV1 } from '@ai-sdk/provider';
2
2
  import { FetchFunction } from '@ai-sdk/provider-utils';
3
3
  import { z } from 'zod';
4
4
 
5
- type OpenAIChatModelId = 'o1' | 'o1-2024-12-17' | 'o1-mini' | 'o1-mini-2024-09-12' | 'o1-preview' | 'o1-preview-2024-09-12' | 'o3-mini' | 'o3-mini-2025-01-31' | 'gpt-4o' | 'gpt-4o-2024-05-13' | 'gpt-4o-2024-08-06' | 'gpt-4o-2024-11-20' | 'gpt-4o-audio-preview' | 'gpt-4o-audio-preview-2024-10-01' | 'gpt-4o-audio-preview-2024-12-17' | 'gpt-4o-search-preview' | 'gpt-4o-search-preview-2025-03-11' | 'gpt-4o-mini' | 'gpt-4o-mini-2024-07-18' | 'gpt-4-turbo' | 'gpt-4-turbo-2024-04-09' | 'gpt-4-turbo-preview' | 'gpt-4-0125-preview' | 'gpt-4-1106-preview' | 'gpt-4' | 'gpt-4-0613' | 'gpt-4.5-preview' | 'gpt-4.5-preview-2025-02-27' | 'gpt-3.5-turbo-0125' | 'gpt-3.5-turbo' | 'gpt-3.5-turbo-1106' | 'chatgpt-4o-latest' | (string & {});
5
+ type OpenAIChatModelId = 'o1' | 'o1-2024-12-17' | 'o1-mini' | 'o1-mini-2024-09-12' | 'o1-preview' | 'o1-preview-2024-09-12' | 'o3-mini' | 'o3-mini-2025-01-31' | 'o3' | 'o3-2025-04-16' | 'o4-mini' | 'o4-mini-2025-04-16' | 'gpt-4.1' | 'gpt-4.1-2025-04-14' | 'gpt-4.1-mini' | 'gpt-4.1-mini-2025-04-14' | 'gpt-4.1-nano' | 'gpt-4.1-nano-2025-04-14' | 'gpt-4o' | 'gpt-4o-2024-05-13' | 'gpt-4o-2024-08-06' | 'gpt-4o-2024-11-20' | 'gpt-4o-audio-preview' | 'gpt-4o-audio-preview-2024-10-01' | 'gpt-4o-audio-preview-2024-12-17' | 'gpt-4o-search-preview' | 'gpt-4o-search-preview-2025-03-11' | 'gpt-4o-mini-search-preview' | 'gpt-4o-mini-search-preview-2025-03-11' | 'gpt-4o-mini' | 'gpt-4o-mini-2024-07-18' | 'gpt-4-turbo' | 'gpt-4-turbo-2024-04-09' | 'gpt-4-turbo-preview' | 'gpt-4-0125-preview' | 'gpt-4-1106-preview' | 'gpt-4' | 'gpt-4-0613' | 'gpt-4.5-preview' | 'gpt-4.5-preview-2025-02-27' | 'gpt-3.5-turbo-0125' | 'gpt-3.5-turbo' | 'gpt-3.5-turbo-1106' | 'chatgpt-4o-latest' | (string & {});
6
6
  interface OpenAIChatSettings {
7
7
  /**
8
8
  Whether to use structured outputs. Defaults to false.
@@ -140,7 +140,9 @@ declare const openaiTools: {
140
140
 
141
141
  type OpenAITranscriptionModelId = 'whisper-1' | 'gpt-4o-mini-transcribe' | 'gpt-4o-transcribe' | (string & {});
142
142
 
143
- type OpenAIResponsesModelId = 'o1' | 'o1-2024-12-17' | 'o1-mini' | 'o1-mini-2024-09-12' | 'o1-preview' | 'o1-preview-2024-09-12' | 'o3-mini' | 'o3-mini-2025-01-31' | 'gpt-4o' | 'gpt-4o-2024-05-13' | 'gpt-4o-2024-08-06' | 'gpt-4o-2024-11-20' | 'gpt-4o-mini' | 'gpt-4o-mini-2024-07-18' | 'gpt-4-turbo' | 'gpt-4-turbo-2024-04-09' | 'gpt-4-turbo-preview' | 'gpt-4-0125-preview' | 'gpt-4-1106-preview' | 'gpt-4' | 'gpt-4-0613' | 'gpt-4.5-preview' | 'gpt-4.5-preview-2025-02-27' | 'gpt-3.5-turbo-0125' | 'gpt-3.5-turbo' | 'gpt-3.5-turbo-1106' | 'chatgpt-4o-latest' | (string & {});
143
+ type OpenAIResponsesModelId = 'o1' | 'o1-2024-12-17' | 'o1-mini' | 'o1-mini-2024-09-12' | 'o1-preview' | 'o1-preview-2024-09-12' | 'o3-mini' | 'o3-mini-2025-01-31' | 'o3' | 'o3-2025-04-16' | 'o4-mini' | 'o4-mini-2025-04-16' | 'gpt-4.1' | 'gpt-4.1-2025-04-14' | 'gpt-4.1-mini' | 'gpt-4.1-mini-2025-04-14' | 'gpt-4.1-nano' | 'gpt-4.1-nano-2025-04-14' | 'gpt-4o' | 'gpt-4o-2024-05-13' | 'gpt-4o-2024-08-06' | 'gpt-4o-2024-11-20' | 'gpt-4o-audio-preview' | 'gpt-4o-audio-preview-2024-10-01' | 'gpt-4o-audio-preview-2024-12-17' | 'gpt-4o-search-preview' | 'gpt-4o-search-preview-2025-03-11' | 'gpt-4o-mini-search-preview' | 'gpt-4o-mini-search-preview-2025-03-11' | 'gpt-4o-mini' | 'gpt-4o-mini-2024-07-18' | 'gpt-4-turbo' | 'gpt-4-turbo-2024-04-09' | 'gpt-4-turbo-preview' | 'gpt-4-0125-preview' | 'gpt-4-1106-preview' | 'gpt-4' | 'gpt-4-0613' | 'gpt-4.5-preview' | 'gpt-4.5-preview-2025-02-27' | 'gpt-3.5-turbo-0125' | 'gpt-3.5-turbo' | 'gpt-3.5-turbo-1106' | 'chatgpt-4o-latest' | (string & {});
144
+
145
+ type OpenAISpeechModelId = 'tts-1' | 'tts-1-hd' | 'gpt-4o-mini-tts' | (string & {});
144
146
 
145
147
  interface OpenAIProvider extends ProviderV2 {
146
148
  (modelId: 'gpt-3.5-turbo-instruct', settings?: OpenAICompletionSettings): OpenAICompletionLanguageModel;
@@ -189,6 +191,10 @@ interface OpenAIProvider extends ProviderV2 {
189
191
  */
190
192
  transcription(modelId: OpenAITranscriptionModelId): TranscriptionModelV1;
191
193
  /**
194
+ Creates a model for speech generation.
195
+ */
196
+ speech(modelId: OpenAISpeechModelId): SpeechModelV1;
197
+ /**
192
198
  OpenAI-specific tools.
193
199
  */
194
200
  tools: typeof openaiTools;
package/dist/index.d.ts CHANGED
@@ -1,8 +1,8 @@
1
- import { LanguageModelV2, ProviderV2, EmbeddingModelV2, ImageModelV1, TranscriptionModelV1 } from '@ai-sdk/provider';
1
+ import { LanguageModelV2, ProviderV2, EmbeddingModelV2, ImageModelV1, TranscriptionModelV1, SpeechModelV1 } from '@ai-sdk/provider';
2
2
  import { FetchFunction } from '@ai-sdk/provider-utils';
3
3
  import { z } from 'zod';
4
4
 
5
- type OpenAIChatModelId = 'o1' | 'o1-2024-12-17' | 'o1-mini' | 'o1-mini-2024-09-12' | 'o1-preview' | 'o1-preview-2024-09-12' | 'o3-mini' | 'o3-mini-2025-01-31' | 'gpt-4o' | 'gpt-4o-2024-05-13' | 'gpt-4o-2024-08-06' | 'gpt-4o-2024-11-20' | 'gpt-4o-audio-preview' | 'gpt-4o-audio-preview-2024-10-01' | 'gpt-4o-audio-preview-2024-12-17' | 'gpt-4o-search-preview' | 'gpt-4o-search-preview-2025-03-11' | 'gpt-4o-mini' | 'gpt-4o-mini-2024-07-18' | 'gpt-4-turbo' | 'gpt-4-turbo-2024-04-09' | 'gpt-4-turbo-preview' | 'gpt-4-0125-preview' | 'gpt-4-1106-preview' | 'gpt-4' | 'gpt-4-0613' | 'gpt-4.5-preview' | 'gpt-4.5-preview-2025-02-27' | 'gpt-3.5-turbo-0125' | 'gpt-3.5-turbo' | 'gpt-3.5-turbo-1106' | 'chatgpt-4o-latest' | (string & {});
5
+ type OpenAIChatModelId = 'o1' | 'o1-2024-12-17' | 'o1-mini' | 'o1-mini-2024-09-12' | 'o1-preview' | 'o1-preview-2024-09-12' | 'o3-mini' | 'o3-mini-2025-01-31' | 'o3' | 'o3-2025-04-16' | 'o4-mini' | 'o4-mini-2025-04-16' | 'gpt-4.1' | 'gpt-4.1-2025-04-14' | 'gpt-4.1-mini' | 'gpt-4.1-mini-2025-04-14' | 'gpt-4.1-nano' | 'gpt-4.1-nano-2025-04-14' | 'gpt-4o' | 'gpt-4o-2024-05-13' | 'gpt-4o-2024-08-06' | 'gpt-4o-2024-11-20' | 'gpt-4o-audio-preview' | 'gpt-4o-audio-preview-2024-10-01' | 'gpt-4o-audio-preview-2024-12-17' | 'gpt-4o-search-preview' | 'gpt-4o-search-preview-2025-03-11' | 'gpt-4o-mini-search-preview' | 'gpt-4o-mini-search-preview-2025-03-11' | 'gpt-4o-mini' | 'gpt-4o-mini-2024-07-18' | 'gpt-4-turbo' | 'gpt-4-turbo-2024-04-09' | 'gpt-4-turbo-preview' | 'gpt-4-0125-preview' | 'gpt-4-1106-preview' | 'gpt-4' | 'gpt-4-0613' | 'gpt-4.5-preview' | 'gpt-4.5-preview-2025-02-27' | 'gpt-3.5-turbo-0125' | 'gpt-3.5-turbo' | 'gpt-3.5-turbo-1106' | 'chatgpt-4o-latest' | (string & {});
6
6
  interface OpenAIChatSettings {
7
7
  /**
8
8
  Whether to use structured outputs. Defaults to false.
@@ -140,7 +140,9 @@ declare const openaiTools: {
140
140
 
141
141
  type OpenAITranscriptionModelId = 'whisper-1' | 'gpt-4o-mini-transcribe' | 'gpt-4o-transcribe' | (string & {});
142
142
 
143
- type OpenAIResponsesModelId = 'o1' | 'o1-2024-12-17' | 'o1-mini' | 'o1-mini-2024-09-12' | 'o1-preview' | 'o1-preview-2024-09-12' | 'o3-mini' | 'o3-mini-2025-01-31' | 'gpt-4o' | 'gpt-4o-2024-05-13' | 'gpt-4o-2024-08-06' | 'gpt-4o-2024-11-20' | 'gpt-4o-mini' | 'gpt-4o-mini-2024-07-18' | 'gpt-4-turbo' | 'gpt-4-turbo-2024-04-09' | 'gpt-4-turbo-preview' | 'gpt-4-0125-preview' | 'gpt-4-1106-preview' | 'gpt-4' | 'gpt-4-0613' | 'gpt-4.5-preview' | 'gpt-4.5-preview-2025-02-27' | 'gpt-3.5-turbo-0125' | 'gpt-3.5-turbo' | 'gpt-3.5-turbo-1106' | 'chatgpt-4o-latest' | (string & {});
143
+ type OpenAIResponsesModelId = 'o1' | 'o1-2024-12-17' | 'o1-mini' | 'o1-mini-2024-09-12' | 'o1-preview' | 'o1-preview-2024-09-12' | 'o3-mini' | 'o3-mini-2025-01-31' | 'o3' | 'o3-2025-04-16' | 'o4-mini' | 'o4-mini-2025-04-16' | 'gpt-4.1' | 'gpt-4.1-2025-04-14' | 'gpt-4.1-mini' | 'gpt-4.1-mini-2025-04-14' | 'gpt-4.1-nano' | 'gpt-4.1-nano-2025-04-14' | 'gpt-4o' | 'gpt-4o-2024-05-13' | 'gpt-4o-2024-08-06' | 'gpt-4o-2024-11-20' | 'gpt-4o-audio-preview' | 'gpt-4o-audio-preview-2024-10-01' | 'gpt-4o-audio-preview-2024-12-17' | 'gpt-4o-search-preview' | 'gpt-4o-search-preview-2025-03-11' | 'gpt-4o-mini-search-preview' | 'gpt-4o-mini-search-preview-2025-03-11' | 'gpt-4o-mini' | 'gpt-4o-mini-2024-07-18' | 'gpt-4-turbo' | 'gpt-4-turbo-2024-04-09' | 'gpt-4-turbo-preview' | 'gpt-4-0125-preview' | 'gpt-4-1106-preview' | 'gpt-4' | 'gpt-4-0613' | 'gpt-4.5-preview' | 'gpt-4.5-preview-2025-02-27' | 'gpt-3.5-turbo-0125' | 'gpt-3.5-turbo' | 'gpt-3.5-turbo-1106' | 'chatgpt-4o-latest' | (string & {});
144
+
145
+ type OpenAISpeechModelId = 'tts-1' | 'tts-1-hd' | 'gpt-4o-mini-tts' | (string & {});
144
146
 
145
147
  interface OpenAIProvider extends ProviderV2 {
146
148
  (modelId: 'gpt-3.5-turbo-instruct', settings?: OpenAICompletionSettings): OpenAICompletionLanguageModel;
@@ -189,6 +191,10 @@ interface OpenAIProvider extends ProviderV2 {
189
191
  */
190
192
  transcription(modelId: OpenAITranscriptionModelId): TranscriptionModelV1;
191
193
  /**
194
+ Creates a model for speech generation.
195
+ */
196
+ speech(modelId: OpenAISpeechModelId): SpeechModelV1;
197
+ /**
192
198
  OpenAI-specific tools.
193
199
  */
194
200
  tools: typeof openaiTools;
package/dist/index.js CHANGED
@@ -26,7 +26,7 @@ __export(src_exports, {
26
26
  module.exports = __toCommonJS(src_exports);
27
27
 
28
28
  // src/openai-provider.ts
29
- var import_provider_utils9 = require("@ai-sdk/provider-utils");
29
+ var import_provider_utils10 = require("@ai-sdk/provider-utils");
30
30
 
31
31
  // src/openai-chat-language-model.ts
32
32
  var import_provider3 = require("@ai-sdk/provider");
@@ -531,13 +531,13 @@ var OpenAIChatLanguageModel = class {
531
531
  }
532
532
  baseArgs.max_tokens = void 0;
533
533
  }
534
- } else if (this.modelId.startsWith("gpt-4o-search-preview")) {
534
+ } else if (this.modelId.startsWith("gpt-4o-search-preview") || this.modelId.startsWith("gpt-4o-mini-search-preview")) {
535
535
  if (baseArgs.temperature != null) {
536
536
  baseArgs.temperature = void 0;
537
537
  warnings.push({
538
538
  type: "unsupported-setting",
539
539
  setting: "temperature",
540
- details: "temperature is not supported for the gpt-4o-search-preview model and has been removed."
540
+ details: "temperature is not supported for the search preview models and has been removed."
541
541
  });
542
542
  }
543
543
  }
@@ -560,7 +560,7 @@ var OpenAIChatLanguageModel = class {
560
560
  };
561
561
  }
562
562
  async doGenerate(options) {
563
- var _a, _b, _c, _d, _e, _f, _g;
563
+ var _a, _b, _c, _d, _e, _f, _g, _h;
564
564
  const { args: body, warnings } = this.getArgs(options);
565
565
  const {
566
566
  responseHeaders,
@@ -580,10 +580,23 @@ var OpenAIChatLanguageModel = class {
580
580
  abortSignal: options.abortSignal,
581
581
  fetch: this.config.fetch
582
582
  });
583
- const { messages: rawPrompt, ...rawSettings } = body;
584
583
  const choice = response.choices[0];
585
- const completionTokenDetails = (_a = response.usage) == null ? void 0 : _a.completion_tokens_details;
586
- const promptTokenDetails = (_b = response.usage) == null ? void 0 : _b.prompt_tokens_details;
584
+ const content = [];
585
+ const text = choice.message.content;
586
+ if (text != null && text.length > 0) {
587
+ content.push({ type: "text", text });
588
+ }
589
+ for (const toolCall of (_a = choice.message.tool_calls) != null ? _a : []) {
590
+ content.push({
591
+ type: "tool-call",
592
+ toolCallType: "function",
593
+ toolCallId: (_b = toolCall.id) != null ? _b : (0, import_provider_utils3.generateId)(),
594
+ toolName: toolCall.function.name,
595
+ args: toolCall.function.arguments
596
+ });
597
+ }
598
+ const completionTokenDetails = (_c = response.usage) == null ? void 0 : _c.completion_tokens_details;
599
+ const promptTokenDetails = (_d = response.usage) == null ? void 0 : _d.prompt_tokens_details;
587
600
  const providerMetadata = { openai: {} };
588
601
  if ((completionTokenDetails == null ? void 0 : completionTokenDetails.reasoning_tokens) != null) {
589
602
  providerMetadata.openai.reasoningTokens = completionTokenDetails == null ? void 0 : completionTokenDetails.reasoning_tokens;
@@ -598,21 +611,11 @@ var OpenAIChatLanguageModel = class {
598
611
  providerMetadata.openai.cachedPromptTokens = promptTokenDetails == null ? void 0 : promptTokenDetails.cached_tokens;
599
612
  }
600
613
  return {
601
- text: choice.message.content != null ? { type: "text", text: choice.message.content } : void 0,
602
- toolCalls: (_c = choice.message.tool_calls) == null ? void 0 : _c.map((toolCall) => {
603
- var _a2;
604
- return {
605
- type: "tool-call",
606
- toolCallType: "function",
607
- toolCallId: (_a2 = toolCall.id) != null ? _a2 : (0, import_provider_utils3.generateId)(),
608
- toolName: toolCall.function.name,
609
- args: toolCall.function.arguments
610
- };
611
- }),
614
+ content,
612
615
  finishReason: mapOpenAIFinishReason(choice.finish_reason),
613
616
  usage: {
614
- inputTokens: (_e = (_d = response.usage) == null ? void 0 : _d.prompt_tokens) != null ? _e : void 0,
615
- outputTokens: (_g = (_f = response.usage) == null ? void 0 : _f.completion_tokens) != null ? _g : void 0
617
+ inputTokens: (_f = (_e = response.usage) == null ? void 0 : _e.prompt_tokens) != null ? _f : void 0,
618
+ outputTokens: (_h = (_g = response.usage) == null ? void 0 : _g.completion_tokens) != null ? _h : void 0
616
619
  },
617
620
  request: { body },
618
621
  response: {
@@ -660,6 +663,9 @@ var OpenAIChatLanguageModel = class {
660
663
  return {
661
664
  stream: response.pipeThrough(
662
665
  new TransformStream({
666
+ start(controller) {
667
+ controller.enqueue({ type: "stream-start", warnings });
668
+ },
663
669
  transform(chunk, controller) {
664
670
  var _a, _b, _c, _d, _e, _f, _g, _h, _i, _j, _k, _l;
665
671
  if (!chunk.success) {
@@ -817,8 +823,7 @@ var OpenAIChatLanguageModel = class {
817
823
  })
818
824
  ),
819
825
  request: { body },
820
- response: { headers: responseHeaders },
821
- warnings
826
+ response: { headers: responseHeaders }
822
827
  };
823
828
  }
824
829
  };
@@ -919,7 +924,7 @@ var openaiChatChunkSchema = import_zod3.z.union([
919
924
  openaiErrorDataSchema
920
925
  ]);
921
926
  function isReasoningModel(modelId) {
922
- return modelId === "o1" || modelId.startsWith("o1-") || modelId === "o3" || modelId.startsWith("o3-");
927
+ return modelId.startsWith("o");
923
928
  }
924
929
  function isAudioModel(modelId) {
925
930
  return modelId.startsWith("gpt-4o-audio-preview");
@@ -1142,7 +1147,7 @@ var OpenAICompletionLanguageModel = class {
1142
1147
  });
1143
1148
  const choice = response.choices[0];
1144
1149
  return {
1145
- text: { type: "text", text: choice.text },
1150
+ content: [{ type: "text", text: choice.text }],
1146
1151
  usage: {
1147
1152
  inputTokens: response.usage.prompt_tokens,
1148
1153
  outputTokens: response.usage.completion_tokens
@@ -1190,6 +1195,9 @@ var OpenAICompletionLanguageModel = class {
1190
1195
  return {
1191
1196
  stream: response.pipeThrough(
1192
1197
  new TransformStream({
1198
+ start(controller) {
1199
+ controller.enqueue({ type: "stream-start", warnings });
1200
+ },
1193
1201
  transform(chunk, controller) {
1194
1202
  if (!chunk.success) {
1195
1203
  finishReason = "error";
@@ -1241,9 +1249,8 @@ var OpenAICompletionLanguageModel = class {
1241
1249
  }
1242
1250
  })
1243
1251
  ),
1244
- response: { headers: responseHeaders },
1245
- warnings,
1246
- request: { body: JSON.stringify(body) }
1252
+ request: { body },
1253
+ response: { headers: responseHeaders }
1247
1254
  };
1248
1255
  }
1249
1256
  };
@@ -1849,6 +1856,7 @@ var OpenAIResponsesLanguageModel = class {
1849
1856
  constructor(modelId, config) {
1850
1857
  this.specificationVersion = "v2";
1851
1858
  this.defaultObjectGenerationMode = "json";
1859
+ this.supportsStructuredOutputs = true;
1852
1860
  this.modelId = modelId;
1853
1861
  this.config = config;
1854
1862
  }
@@ -1974,7 +1982,7 @@ var OpenAIResponsesLanguageModel = class {
1974
1982
  };
1975
1983
  }
1976
1984
  async doGenerate(options) {
1977
- var _a, _b, _c, _d, _e;
1985
+ var _a, _b, _c, _d, _e, _f, _g, _h;
1978
1986
  const { args: body, warnings } = this.getArgs(options);
1979
1987
  const {
1980
1988
  responseHeaders,
@@ -2038,36 +2046,45 @@ var OpenAIResponsesLanguageModel = class {
2038
2046
  abortSignal: options.abortSignal,
2039
2047
  fetch: this.config.fetch
2040
2048
  });
2041
- const outputTextElements = response.output.filter((output) => output.type === "message").flatMap((output) => output.content).filter((content) => content.type === "output_text");
2042
- const toolCalls = response.output.filter((output) => output.type === "function_call").map((output) => ({
2043
- type: "tool-call",
2044
- toolCallType: "function",
2045
- toolCallId: output.call_id,
2046
- toolName: output.name,
2047
- args: output.arguments
2048
- }));
2049
+ const content = [];
2050
+ for (const part of response.output) {
2051
+ switch (part.type) {
2052
+ case "message": {
2053
+ for (const contentPart of part.content) {
2054
+ content.push({
2055
+ type: "text",
2056
+ text: contentPart.text
2057
+ });
2058
+ for (const annotation of contentPart.annotations) {
2059
+ content.push({
2060
+ type: "source",
2061
+ sourceType: "url",
2062
+ id: (_c = (_b = (_a = this.config).generateId) == null ? void 0 : _b.call(_a)) != null ? _c : (0, import_provider_utils8.generateId)(),
2063
+ url: annotation.url,
2064
+ title: annotation.title
2065
+ });
2066
+ }
2067
+ }
2068
+ break;
2069
+ }
2070
+ case "function_call": {
2071
+ content.push({
2072
+ type: "tool-call",
2073
+ toolCallType: "function",
2074
+ toolCallId: part.call_id,
2075
+ toolName: part.name,
2076
+ args: part.arguments
2077
+ });
2078
+ break;
2079
+ }
2080
+ }
2081
+ }
2049
2082
  return {
2050
- text: {
2051
- type: "text",
2052
- text: outputTextElements.map((content) => content.text).join("\n")
2053
- },
2054
- sources: outputTextElements.flatMap(
2055
- (content) => content.annotations.map((annotation) => {
2056
- var _a2, _b2, _c2;
2057
- return {
2058
- type: "source",
2059
- sourceType: "url",
2060
- id: (_c2 = (_b2 = (_a2 = this.config).generateId) == null ? void 0 : _b2.call(_a2)) != null ? _c2 : (0, import_provider_utils8.generateId)(),
2061
- url: annotation.url,
2062
- title: annotation.title
2063
- };
2064
- })
2065
- ),
2083
+ content,
2066
2084
  finishReason: mapOpenAIResponseFinishReason({
2067
- finishReason: (_a = response.incomplete_details) == null ? void 0 : _a.reason,
2068
- hasToolCalls: toolCalls.length > 0
2085
+ finishReason: (_d = response.incomplete_details) == null ? void 0 : _d.reason,
2086
+ hasToolCalls: content.some((part) => part.type === "tool-call")
2069
2087
  }),
2070
- toolCalls: toolCalls.length > 0 ? toolCalls : void 0,
2071
2088
  usage: {
2072
2089
  inputTokens: response.usage.input_tokens,
2073
2090
  outputTokens: response.usage.output_tokens
@@ -2083,8 +2100,8 @@ var OpenAIResponsesLanguageModel = class {
2083
2100
  providerMetadata: {
2084
2101
  openai: {
2085
2102
  responseId: response.id,
2086
- cachedPromptTokens: (_c = (_b = response.usage.input_tokens_details) == null ? void 0 : _b.cached_tokens) != null ? _c : null,
2087
- reasoningTokens: (_e = (_d = response.usage.output_tokens_details) == null ? void 0 : _d.reasoning_tokens) != null ? _e : null
2103
+ cachedPromptTokens: (_f = (_e = response.usage.input_tokens_details) == null ? void 0 : _e.cached_tokens) != null ? _f : null,
2104
+ reasoningTokens: (_h = (_g = response.usage.output_tokens_details) == null ? void 0 : _g.reasoning_tokens) != null ? _h : null
2088
2105
  }
2089
2106
  },
2090
2107
  warnings
@@ -2123,6 +2140,9 @@ var OpenAIResponsesLanguageModel = class {
2123
2140
  return {
2124
2141
  stream: response.pipeThrough(
2125
2142
  new TransformStream({
2143
+ start(controller) {
2144
+ controller.enqueue({ type: "stream-start", warnings });
2145
+ },
2126
2146
  transform(chunk, controller) {
2127
2147
  var _a, _b, _c, _d, _e, _f, _g, _h;
2128
2148
  if (!chunk.success) {
@@ -2217,8 +2237,7 @@ var OpenAIResponsesLanguageModel = class {
2217
2237
  })
2218
2238
  ),
2219
2239
  request: { body },
2220
- response: { headers: responseHeaders },
2221
- warnings
2240
+ response: { headers: responseHeaders }
2222
2241
  };
2223
2242
  }
2224
2243
  };
@@ -2358,14 +2377,113 @@ var openaiResponsesProviderOptionsSchema = import_zod9.z.object({
2358
2377
  instructions: import_zod9.z.string().nullish()
2359
2378
  });
2360
2379
 
2380
+ // src/openai-speech-model.ts
2381
+ var import_provider_utils9 = require("@ai-sdk/provider-utils");
2382
+ var import_zod10 = require("zod");
2383
+ var OpenAIProviderOptionsSchema = import_zod10.z.object({
2384
+ instructions: import_zod10.z.string().nullish(),
2385
+ speed: import_zod10.z.number().min(0.25).max(4).default(1).nullish()
2386
+ });
2387
+ var OpenAISpeechModel = class {
2388
+ constructor(modelId, config) {
2389
+ this.modelId = modelId;
2390
+ this.config = config;
2391
+ this.specificationVersion = "v1";
2392
+ }
2393
+ get provider() {
2394
+ return this.config.provider;
2395
+ }
2396
+ getArgs({
2397
+ text,
2398
+ voice = "alloy",
2399
+ outputFormat = "mp3",
2400
+ speed,
2401
+ instructions,
2402
+ providerOptions
2403
+ }) {
2404
+ const warnings = [];
2405
+ const openAIOptions = (0, import_provider_utils9.parseProviderOptions)({
2406
+ provider: "openai",
2407
+ providerOptions,
2408
+ schema: OpenAIProviderOptionsSchema
2409
+ });
2410
+ const requestBody = {
2411
+ model: this.modelId,
2412
+ input: text,
2413
+ voice,
2414
+ response_format: "mp3",
2415
+ speed,
2416
+ instructions
2417
+ };
2418
+ if (outputFormat) {
2419
+ if (["mp3", "opus", "aac", "flac", "wav", "pcm"].includes(outputFormat)) {
2420
+ requestBody.response_format = outputFormat;
2421
+ } else {
2422
+ warnings.push({
2423
+ type: "unsupported-setting",
2424
+ setting: "outputFormat",
2425
+ details: `Unsupported output format: ${outputFormat}. Using mp3 instead.`
2426
+ });
2427
+ }
2428
+ }
2429
+ if (openAIOptions) {
2430
+ const speechModelOptions = {};
2431
+ for (const key in speechModelOptions) {
2432
+ const value = speechModelOptions[key];
2433
+ if (value !== void 0) {
2434
+ requestBody[key] = value;
2435
+ }
2436
+ }
2437
+ }
2438
+ return {
2439
+ requestBody,
2440
+ warnings
2441
+ };
2442
+ }
2443
+ async doGenerate(options) {
2444
+ var _a, _b, _c;
2445
+ const currentDate = (_c = (_b = (_a = this.config._internal) == null ? void 0 : _a.currentDate) == null ? void 0 : _b.call(_a)) != null ? _c : /* @__PURE__ */ new Date();
2446
+ const { requestBody, warnings } = this.getArgs(options);
2447
+ const {
2448
+ value: audio,
2449
+ responseHeaders,
2450
+ rawValue: rawResponse
2451
+ } = await (0, import_provider_utils9.postJsonToApi)({
2452
+ url: this.config.url({
2453
+ path: "/audio/speech",
2454
+ modelId: this.modelId
2455
+ }),
2456
+ headers: (0, import_provider_utils9.combineHeaders)(this.config.headers(), options.headers),
2457
+ body: requestBody,
2458
+ failedResponseHandler: openaiFailedResponseHandler,
2459
+ successfulResponseHandler: (0, import_provider_utils9.createBinaryResponseHandler)(),
2460
+ abortSignal: options.abortSignal,
2461
+ fetch: this.config.fetch
2462
+ });
2463
+ return {
2464
+ audio,
2465
+ warnings,
2466
+ request: {
2467
+ body: JSON.stringify(requestBody)
2468
+ },
2469
+ response: {
2470
+ timestamp: currentDate,
2471
+ modelId: this.modelId,
2472
+ headers: responseHeaders,
2473
+ body: rawResponse
2474
+ }
2475
+ };
2476
+ }
2477
+ };
2478
+
2361
2479
  // src/openai-provider.ts
2362
2480
  function createOpenAI(options = {}) {
2363
2481
  var _a, _b, _c;
2364
- const baseURL = (_a = (0, import_provider_utils9.withoutTrailingSlash)(options.baseURL)) != null ? _a : "https://api.openai.com/v1";
2482
+ const baseURL = (_a = (0, import_provider_utils10.withoutTrailingSlash)(options.baseURL)) != null ? _a : "https://api.openai.com/v1";
2365
2483
  const compatibility = (_b = options.compatibility) != null ? _b : "compatible";
2366
2484
  const providerName = (_c = options.name) != null ? _c : "openai";
2367
2485
  const getHeaders = () => ({
2368
- Authorization: `Bearer ${(0, import_provider_utils9.loadApiKey)({
2486
+ Authorization: `Bearer ${(0, import_provider_utils10.loadApiKey)({
2369
2487
  apiKey: options.apiKey,
2370
2488
  environmentVariableName: "OPENAI_API_KEY",
2371
2489
  description: "OpenAI"
@@ -2406,6 +2524,12 @@ function createOpenAI(options = {}) {
2406
2524
  headers: getHeaders,
2407
2525
  fetch: options.fetch
2408
2526
  });
2527
+ const createSpeechModel = (modelId) => new OpenAISpeechModel(modelId, {
2528
+ provider: `${providerName}.speech`,
2529
+ url: ({ path }) => `${baseURL}${path}`,
2530
+ headers: getHeaders,
2531
+ fetch: options.fetch
2532
+ });
2409
2533
  const createLanguageModel = (modelId, settings) => {
2410
2534
  if (new.target) {
2411
2535
  throw new Error(
@@ -2442,6 +2566,8 @@ function createOpenAI(options = {}) {
2442
2566
  provider.imageModel = createImageModel;
2443
2567
  provider.transcription = createTranscriptionModel;
2444
2568
  provider.transcriptionModel = createTranscriptionModel;
2569
+ provider.speech = createSpeechModel;
2570
+ provider.speechModel = createSpeechModel;
2445
2571
  provider.tools = openaiTools;
2446
2572
  return provider;
2447
2573
  }