@ai-sdk/openai 2.0.0-canary.7 → 2.0.0-canary.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs CHANGED
@@ -521,13 +521,13 @@ var OpenAIChatLanguageModel = class {
521
521
  }
522
522
  baseArgs.max_tokens = void 0;
523
523
  }
524
- } else if (this.modelId.startsWith("gpt-4o-search-preview")) {
524
+ } else if (this.modelId.startsWith("gpt-4o-search-preview") || this.modelId.startsWith("gpt-4o-mini-search-preview")) {
525
525
  if (baseArgs.temperature != null) {
526
526
  baseArgs.temperature = void 0;
527
527
  warnings.push({
528
528
  type: "unsupported-setting",
529
529
  setting: "temperature",
530
- details: "temperature is not supported for the gpt-4o-search-preview model and has been removed."
530
+ details: "temperature is not supported for the search preview models and has been removed."
531
531
  });
532
532
  }
533
533
  }
@@ -550,7 +550,7 @@ var OpenAIChatLanguageModel = class {
550
550
  };
551
551
  }
552
552
  async doGenerate(options) {
553
- var _a, _b, _c, _d, _e, _f, _g;
553
+ var _a, _b, _c, _d, _e, _f, _g, _h;
554
554
  const { args: body, warnings } = this.getArgs(options);
555
555
  const {
556
556
  responseHeaders,
@@ -570,10 +570,23 @@ var OpenAIChatLanguageModel = class {
570
570
  abortSignal: options.abortSignal,
571
571
  fetch: this.config.fetch
572
572
  });
573
- const { messages: rawPrompt, ...rawSettings } = body;
574
573
  const choice = response.choices[0];
575
- const completionTokenDetails = (_a = response.usage) == null ? void 0 : _a.completion_tokens_details;
576
- const promptTokenDetails = (_b = response.usage) == null ? void 0 : _b.prompt_tokens_details;
574
+ const content = [];
575
+ const text = choice.message.content;
576
+ if (text != null && text.length > 0) {
577
+ content.push({ type: "text", text });
578
+ }
579
+ for (const toolCall of (_a = choice.message.tool_calls) != null ? _a : []) {
580
+ content.push({
581
+ type: "tool-call",
582
+ toolCallType: "function",
583
+ toolCallId: (_b = toolCall.id) != null ? _b : generateId(),
584
+ toolName: toolCall.function.name,
585
+ args: toolCall.function.arguments
586
+ });
587
+ }
588
+ const completionTokenDetails = (_c = response.usage) == null ? void 0 : _c.completion_tokens_details;
589
+ const promptTokenDetails = (_d = response.usage) == null ? void 0 : _d.prompt_tokens_details;
577
590
  const providerMetadata = { openai: {} };
578
591
  if ((completionTokenDetails == null ? void 0 : completionTokenDetails.reasoning_tokens) != null) {
579
592
  providerMetadata.openai.reasoningTokens = completionTokenDetails == null ? void 0 : completionTokenDetails.reasoning_tokens;
@@ -588,21 +601,11 @@ var OpenAIChatLanguageModel = class {
588
601
  providerMetadata.openai.cachedPromptTokens = promptTokenDetails == null ? void 0 : promptTokenDetails.cached_tokens;
589
602
  }
590
603
  return {
591
- text: choice.message.content != null ? { type: "text", text: choice.message.content } : void 0,
592
- toolCalls: (_c = choice.message.tool_calls) == null ? void 0 : _c.map((toolCall) => {
593
- var _a2;
594
- return {
595
- type: "tool-call",
596
- toolCallType: "function",
597
- toolCallId: (_a2 = toolCall.id) != null ? _a2 : generateId(),
598
- toolName: toolCall.function.name,
599
- args: toolCall.function.arguments
600
- };
601
- }),
604
+ content,
602
605
  finishReason: mapOpenAIFinishReason(choice.finish_reason),
603
606
  usage: {
604
- inputTokens: (_e = (_d = response.usage) == null ? void 0 : _d.prompt_tokens) != null ? _e : void 0,
605
- outputTokens: (_g = (_f = response.usage) == null ? void 0 : _f.completion_tokens) != null ? _g : void 0
607
+ inputTokens: (_f = (_e = response.usage) == null ? void 0 : _e.prompt_tokens) != null ? _f : void 0,
608
+ outputTokens: (_h = (_g = response.usage) == null ? void 0 : _g.completion_tokens) != null ? _h : void 0
606
609
  },
607
610
  request: { body },
608
611
  response: {
@@ -650,6 +653,9 @@ var OpenAIChatLanguageModel = class {
650
653
  return {
651
654
  stream: response.pipeThrough(
652
655
  new TransformStream({
656
+ start(controller) {
657
+ controller.enqueue({ type: "stream-start", warnings });
658
+ },
653
659
  transform(chunk, controller) {
654
660
  var _a, _b, _c, _d, _e, _f, _g, _h, _i, _j, _k, _l;
655
661
  if (!chunk.success) {
@@ -807,8 +813,7 @@ var OpenAIChatLanguageModel = class {
807
813
  })
808
814
  ),
809
815
  request: { body },
810
- response: { headers: responseHeaders },
811
- warnings
816
+ response: { headers: responseHeaders }
812
817
  };
813
818
  }
814
819
  };
@@ -909,7 +914,7 @@ var openaiChatChunkSchema = z3.union([
909
914
  openaiErrorDataSchema
910
915
  ]);
911
916
  function isReasoningModel(modelId) {
912
- return modelId === "o1" || modelId.startsWith("o1-") || modelId === "o3" || modelId.startsWith("o3-");
917
+ return modelId.startsWith("o");
913
918
  }
914
919
  function isAudioModel(modelId) {
915
920
  return modelId.startsWith("gpt-4o-audio-preview");
@@ -1140,7 +1145,7 @@ var OpenAICompletionLanguageModel = class {
1140
1145
  });
1141
1146
  const choice = response.choices[0];
1142
1147
  return {
1143
- text: { type: "text", text: choice.text },
1148
+ content: [{ type: "text", text: choice.text }],
1144
1149
  usage: {
1145
1150
  inputTokens: response.usage.prompt_tokens,
1146
1151
  outputTokens: response.usage.completion_tokens
@@ -1188,6 +1193,9 @@ var OpenAICompletionLanguageModel = class {
1188
1193
  return {
1189
1194
  stream: response.pipeThrough(
1190
1195
  new TransformStream({
1196
+ start(controller) {
1197
+ controller.enqueue({ type: "stream-start", warnings });
1198
+ },
1191
1199
  transform(chunk, controller) {
1192
1200
  if (!chunk.success) {
1193
1201
  finishReason = "error";
@@ -1239,9 +1247,8 @@ var OpenAICompletionLanguageModel = class {
1239
1247
  }
1240
1248
  })
1241
1249
  ),
1242
- response: { headers: responseHeaders },
1243
- warnings,
1244
- request: { body: JSON.stringify(body) }
1250
+ request: { body },
1251
+ response: { headers: responseHeaders }
1245
1252
  };
1246
1253
  }
1247
1254
  };
@@ -1874,6 +1881,7 @@ var OpenAIResponsesLanguageModel = class {
1874
1881
  constructor(modelId, config) {
1875
1882
  this.specificationVersion = "v2";
1876
1883
  this.defaultObjectGenerationMode = "json";
1884
+ this.supportsStructuredOutputs = true;
1877
1885
  this.modelId = modelId;
1878
1886
  this.config = config;
1879
1887
  }
@@ -1999,7 +2007,7 @@ var OpenAIResponsesLanguageModel = class {
1999
2007
  };
2000
2008
  }
2001
2009
  async doGenerate(options) {
2002
- var _a, _b, _c, _d, _e;
2010
+ var _a, _b, _c, _d, _e, _f, _g, _h;
2003
2011
  const { args: body, warnings } = this.getArgs(options);
2004
2012
  const {
2005
2013
  responseHeaders,
@@ -2063,36 +2071,45 @@ var OpenAIResponsesLanguageModel = class {
2063
2071
  abortSignal: options.abortSignal,
2064
2072
  fetch: this.config.fetch
2065
2073
  });
2066
- const outputTextElements = response.output.filter((output) => output.type === "message").flatMap((output) => output.content).filter((content) => content.type === "output_text");
2067
- const toolCalls = response.output.filter((output) => output.type === "function_call").map((output) => ({
2068
- type: "tool-call",
2069
- toolCallType: "function",
2070
- toolCallId: output.call_id,
2071
- toolName: output.name,
2072
- args: output.arguments
2073
- }));
2074
+ const content = [];
2075
+ for (const part of response.output) {
2076
+ switch (part.type) {
2077
+ case "message": {
2078
+ for (const contentPart of part.content) {
2079
+ content.push({
2080
+ type: "text",
2081
+ text: contentPart.text
2082
+ });
2083
+ for (const annotation of contentPart.annotations) {
2084
+ content.push({
2085
+ type: "source",
2086
+ sourceType: "url",
2087
+ id: (_c = (_b = (_a = this.config).generateId) == null ? void 0 : _b.call(_a)) != null ? _c : generateId2(),
2088
+ url: annotation.url,
2089
+ title: annotation.title
2090
+ });
2091
+ }
2092
+ }
2093
+ break;
2094
+ }
2095
+ case "function_call": {
2096
+ content.push({
2097
+ type: "tool-call",
2098
+ toolCallType: "function",
2099
+ toolCallId: part.call_id,
2100
+ toolName: part.name,
2101
+ args: part.arguments
2102
+ });
2103
+ break;
2104
+ }
2105
+ }
2106
+ }
2074
2107
  return {
2075
- text: {
2076
- type: "text",
2077
- text: outputTextElements.map((content) => content.text).join("\n")
2078
- },
2079
- sources: outputTextElements.flatMap(
2080
- (content) => content.annotations.map((annotation) => {
2081
- var _a2, _b2, _c2;
2082
- return {
2083
- type: "source",
2084
- sourceType: "url",
2085
- id: (_c2 = (_b2 = (_a2 = this.config).generateId) == null ? void 0 : _b2.call(_a2)) != null ? _c2 : generateId2(),
2086
- url: annotation.url,
2087
- title: annotation.title
2088
- };
2089
- })
2090
- ),
2108
+ content,
2091
2109
  finishReason: mapOpenAIResponseFinishReason({
2092
- finishReason: (_a = response.incomplete_details) == null ? void 0 : _a.reason,
2093
- hasToolCalls: toolCalls.length > 0
2110
+ finishReason: (_d = response.incomplete_details) == null ? void 0 : _d.reason,
2111
+ hasToolCalls: content.some((part) => part.type === "tool-call")
2094
2112
  }),
2095
- toolCalls: toolCalls.length > 0 ? toolCalls : void 0,
2096
2113
  usage: {
2097
2114
  inputTokens: response.usage.input_tokens,
2098
2115
  outputTokens: response.usage.output_tokens
@@ -2108,8 +2125,8 @@ var OpenAIResponsesLanguageModel = class {
2108
2125
  providerMetadata: {
2109
2126
  openai: {
2110
2127
  responseId: response.id,
2111
- cachedPromptTokens: (_c = (_b = response.usage.input_tokens_details) == null ? void 0 : _b.cached_tokens) != null ? _c : null,
2112
- reasoningTokens: (_e = (_d = response.usage.output_tokens_details) == null ? void 0 : _d.reasoning_tokens) != null ? _e : null
2128
+ cachedPromptTokens: (_f = (_e = response.usage.input_tokens_details) == null ? void 0 : _e.cached_tokens) != null ? _f : null,
2129
+ reasoningTokens: (_h = (_g = response.usage.output_tokens_details) == null ? void 0 : _g.reasoning_tokens) != null ? _h : null
2113
2130
  }
2114
2131
  },
2115
2132
  warnings
@@ -2148,6 +2165,9 @@ var OpenAIResponsesLanguageModel = class {
2148
2165
  return {
2149
2166
  stream: response.pipeThrough(
2150
2167
  new TransformStream({
2168
+ start(controller) {
2169
+ controller.enqueue({ type: "stream-start", warnings });
2170
+ },
2151
2171
  transform(chunk, controller) {
2152
2172
  var _a, _b, _c, _d, _e, _f, _g, _h;
2153
2173
  if (!chunk.success) {
@@ -2242,8 +2262,7 @@ var OpenAIResponsesLanguageModel = class {
2242
2262
  })
2243
2263
  ),
2244
2264
  request: { body },
2245
- response: { headers: responseHeaders },
2246
- warnings
2265
+ response: { headers: responseHeaders }
2247
2266
  };
2248
2267
  }
2249
2268
  };
@@ -2383,6 +2402,110 @@ var openaiResponsesProviderOptionsSchema = z9.object({
2383
2402
  instructions: z9.string().nullish()
2384
2403
  });
2385
2404
 
2405
+ // src/openai-speech-model.ts
2406
+ import {
2407
+ combineHeaders as combineHeaders7,
2408
+ createBinaryResponseHandler,
2409
+ parseProviderOptions as parseProviderOptions4,
2410
+ postJsonToApi as postJsonToApi6
2411
+ } from "@ai-sdk/provider-utils";
2412
+ import { z as z10 } from "zod";
2413
+ var OpenAIProviderOptionsSchema = z10.object({
2414
+ instructions: z10.string().nullish(),
2415
+ speed: z10.number().min(0.25).max(4).default(1).nullish()
2416
+ });
2417
+ var OpenAISpeechModel = class {
2418
+ constructor(modelId, config) {
2419
+ this.modelId = modelId;
2420
+ this.config = config;
2421
+ this.specificationVersion = "v1";
2422
+ }
2423
+ get provider() {
2424
+ return this.config.provider;
2425
+ }
2426
+ getArgs({
2427
+ text,
2428
+ voice = "alloy",
2429
+ outputFormat = "mp3",
2430
+ speed,
2431
+ instructions,
2432
+ providerOptions
2433
+ }) {
2434
+ const warnings = [];
2435
+ const openAIOptions = parseProviderOptions4({
2436
+ provider: "openai",
2437
+ providerOptions,
2438
+ schema: OpenAIProviderOptionsSchema
2439
+ });
2440
+ const requestBody = {
2441
+ model: this.modelId,
2442
+ input: text,
2443
+ voice,
2444
+ response_format: "mp3",
2445
+ speed,
2446
+ instructions
2447
+ };
2448
+ if (outputFormat) {
2449
+ if (["mp3", "opus", "aac", "flac", "wav", "pcm"].includes(outputFormat)) {
2450
+ requestBody.response_format = outputFormat;
2451
+ } else {
2452
+ warnings.push({
2453
+ type: "unsupported-setting",
2454
+ setting: "outputFormat",
2455
+ details: `Unsupported output format: ${outputFormat}. Using mp3 instead.`
2456
+ });
2457
+ }
2458
+ }
2459
+ if (openAIOptions) {
2460
+ const speechModelOptions = {};
2461
+ for (const key in speechModelOptions) {
2462
+ const value = speechModelOptions[key];
2463
+ if (value !== void 0) {
2464
+ requestBody[key] = value;
2465
+ }
2466
+ }
2467
+ }
2468
+ return {
2469
+ requestBody,
2470
+ warnings
2471
+ };
2472
+ }
2473
+ async doGenerate(options) {
2474
+ var _a, _b, _c;
2475
+ const currentDate = (_c = (_b = (_a = this.config._internal) == null ? void 0 : _a.currentDate) == null ? void 0 : _b.call(_a)) != null ? _c : /* @__PURE__ */ new Date();
2476
+ const { requestBody, warnings } = this.getArgs(options);
2477
+ const {
2478
+ value: audio,
2479
+ responseHeaders,
2480
+ rawValue: rawResponse
2481
+ } = await postJsonToApi6({
2482
+ url: this.config.url({
2483
+ path: "/audio/speech",
2484
+ modelId: this.modelId
2485
+ }),
2486
+ headers: combineHeaders7(this.config.headers(), options.headers),
2487
+ body: requestBody,
2488
+ failedResponseHandler: openaiFailedResponseHandler,
2489
+ successfulResponseHandler: createBinaryResponseHandler(),
2490
+ abortSignal: options.abortSignal,
2491
+ fetch: this.config.fetch
2492
+ });
2493
+ return {
2494
+ audio,
2495
+ warnings,
2496
+ request: {
2497
+ body: JSON.stringify(requestBody)
2498
+ },
2499
+ response: {
2500
+ timestamp: currentDate,
2501
+ modelId: this.modelId,
2502
+ headers: responseHeaders,
2503
+ body: rawResponse
2504
+ }
2505
+ };
2506
+ }
2507
+ };
2508
+
2386
2509
  // src/openai-provider.ts
2387
2510
  function createOpenAI(options = {}) {
2388
2511
  var _a, _b, _c;
@@ -2431,6 +2554,12 @@ function createOpenAI(options = {}) {
2431
2554
  headers: getHeaders,
2432
2555
  fetch: options.fetch
2433
2556
  });
2557
+ const createSpeechModel = (modelId) => new OpenAISpeechModel(modelId, {
2558
+ provider: `${providerName}.speech`,
2559
+ url: ({ path }) => `${baseURL}${path}`,
2560
+ headers: getHeaders,
2561
+ fetch: options.fetch
2562
+ });
2434
2563
  const createLanguageModel = (modelId, settings) => {
2435
2564
  if (new.target) {
2436
2565
  throw new Error(
@@ -2467,6 +2596,8 @@ function createOpenAI(options = {}) {
2467
2596
  provider.imageModel = createImageModel;
2468
2597
  provider.transcription = createTranscriptionModel;
2469
2598
  provider.transcriptionModel = createTranscriptionModel;
2599
+ provider.speech = createSpeechModel;
2600
+ provider.speechModel = createSpeechModel;
2470
2601
  provider.tools = openaiTools;
2471
2602
  return provider;
2472
2603
  }