@ai-sdk/openai 2.0.0-canary.6 → 2.0.0-canary.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs CHANGED
@@ -570,10 +570,23 @@ var OpenAIChatLanguageModel = class {
570
570
  abortSignal: options.abortSignal,
571
571
  fetch: this.config.fetch
572
572
  });
573
- const { messages: rawPrompt, ...rawSettings } = body;
574
573
  const choice = response.choices[0];
575
- const completionTokenDetails = (_a = response.usage) == null ? void 0 : _a.completion_tokens_details;
576
- const promptTokenDetails = (_b = response.usage) == null ? void 0 : _b.prompt_tokens_details;
574
+ const content = [];
575
+ const text = choice.message.content;
576
+ if (text != null && text.length > 0) {
577
+ content.push({ type: "text", text });
578
+ }
579
+ for (const toolCall of (_a = choice.message.tool_calls) != null ? _a : []) {
580
+ content.push({
581
+ type: "tool-call",
582
+ toolCallType: "function",
583
+ toolCallId: (_b = toolCall.id) != null ? _b : generateId(),
584
+ toolName: toolCall.function.name,
585
+ args: toolCall.function.arguments
586
+ });
587
+ }
588
+ const completionTokenDetails = (_c = response.usage) == null ? void 0 : _c.completion_tokens_details;
589
+ const promptTokenDetails = (_d = response.usage) == null ? void 0 : _d.prompt_tokens_details;
577
590
  const providerMetadata = { openai: {} };
578
591
  if ((completionTokenDetails == null ? void 0 : completionTokenDetails.reasoning_tokens) != null) {
579
592
  providerMetadata.openai.reasoningTokens = completionTokenDetails == null ? void 0 : completionTokenDetails.reasoning_tokens;
@@ -588,16 +601,7 @@ var OpenAIChatLanguageModel = class {
588
601
  providerMetadata.openai.cachedPromptTokens = promptTokenDetails == null ? void 0 : promptTokenDetails.cached_tokens;
589
602
  }
590
603
  return {
591
- text: (_c = choice.message.content) != null ? _c : void 0,
592
- toolCalls: (_d = choice.message.tool_calls) == null ? void 0 : _d.map((toolCall) => {
593
- var _a2;
594
- return {
595
- toolCallType: "function",
596
- toolCallId: (_a2 = toolCall.id) != null ? _a2 : generateId(),
597
- toolName: toolCall.function.name,
598
- args: toolCall.function.arguments
599
- };
600
- }),
604
+ content,
601
605
  finishReason: mapOpenAIFinishReason(choice.finish_reason),
602
606
  usage: {
603
607
  inputTokens: (_f = (_e = response.usage) == null ? void 0 : _e.prompt_tokens) != null ? _f : void 0,
@@ -649,6 +653,9 @@ var OpenAIChatLanguageModel = class {
649
653
  return {
650
654
  stream: response.pipeThrough(
651
655
  new TransformStream({
656
+ start(controller) {
657
+ controller.enqueue({ type: "stream-start", warnings });
658
+ },
652
659
  transform(chunk, controller) {
653
660
  var _a, _b, _c, _d, _e, _f, _g, _h, _i, _j, _k, _l;
654
661
  if (!chunk.success) {
@@ -701,8 +708,8 @@ var OpenAIChatLanguageModel = class {
701
708
  const delta = choice.delta;
702
709
  if (delta.content != null) {
703
710
  controller.enqueue({
704
- type: "text-delta",
705
- textDelta: delta.content
711
+ type: "text",
712
+ text: delta.content
706
713
  });
707
714
  }
708
715
  const mappedLogprobs = mapOpenAIChatLogProbsOutput(
@@ -806,8 +813,7 @@ var OpenAIChatLanguageModel = class {
806
813
  })
807
814
  ),
808
815
  request: { body },
809
- response: { headers: responseHeaders },
810
- warnings
816
+ response: { headers: responseHeaders }
811
817
  };
812
818
  }
813
819
  };
@@ -1139,7 +1145,7 @@ var OpenAICompletionLanguageModel = class {
1139
1145
  });
1140
1146
  const choice = response.choices[0];
1141
1147
  return {
1142
- text: choice.text,
1148
+ content: [{ type: "text", text: choice.text }],
1143
1149
  usage: {
1144
1150
  inputTokens: response.usage.prompt_tokens,
1145
1151
  outputTokens: response.usage.completion_tokens
@@ -1187,6 +1193,9 @@ var OpenAICompletionLanguageModel = class {
1187
1193
  return {
1188
1194
  stream: response.pipeThrough(
1189
1195
  new TransformStream({
1196
+ start(controller) {
1197
+ controller.enqueue({ type: "stream-start", warnings });
1198
+ },
1190
1199
  transform(chunk, controller) {
1191
1200
  if (!chunk.success) {
1192
1201
  finishReason = "error";
@@ -1216,8 +1225,8 @@ var OpenAICompletionLanguageModel = class {
1216
1225
  }
1217
1226
  if ((choice == null ? void 0 : choice.text) != null) {
1218
1227
  controller.enqueue({
1219
- type: "text-delta",
1220
- textDelta: choice.text
1228
+ type: "text",
1229
+ text: choice.text
1221
1230
  });
1222
1231
  }
1223
1232
  const mappedLogprobs = mapOpenAICompletionLogProbs(
@@ -1238,9 +1247,8 @@ var OpenAICompletionLanguageModel = class {
1238
1247
  }
1239
1248
  })
1240
1249
  ),
1241
- response: { headers: responseHeaders },
1242
- warnings,
1243
- request: { body: JSON.stringify(body) }
1250
+ request: { body },
1251
+ response: { headers: responseHeaders }
1244
1252
  };
1245
1253
  }
1246
1254
  };
@@ -1301,7 +1309,7 @@ import {
1301
1309
  import { z as z5 } from "zod";
1302
1310
  var OpenAIEmbeddingModel = class {
1303
1311
  constructor(modelId, settings, config) {
1304
- this.specificationVersion = "v1";
1312
+ this.specificationVersion = "v2";
1305
1313
  this.modelId = modelId;
1306
1314
  this.settings = settings;
1307
1315
  this.config = config;
@@ -1330,7 +1338,11 @@ var OpenAIEmbeddingModel = class {
1330
1338
  values
1331
1339
  });
1332
1340
  }
1333
- const { responseHeaders, value: response } = await postJsonToApi3({
1341
+ const {
1342
+ responseHeaders,
1343
+ value: response,
1344
+ rawValue
1345
+ } = await postJsonToApi3({
1334
1346
  url: this.config.url({
1335
1347
  path: "/embeddings",
1336
1348
  modelId: this.modelId
@@ -1353,7 +1365,7 @@ var OpenAIEmbeddingModel = class {
1353
1365
  return {
1354
1366
  embeddings: response.data.map((item) => item.embedding),
1355
1367
  usage: response.usage ? { tokens: response.usage.prompt_tokens } : void 0,
1356
- rawResponse: { headers: responseHeaders }
1368
+ response: { headers: responseHeaders, body: rawValue }
1357
1369
  };
1358
1370
  }
1359
1371
  };
@@ -1480,7 +1492,7 @@ import {
1480
1492
  postFormDataToApi
1481
1493
  } from "@ai-sdk/provider-utils";
1482
1494
  import { z as z8 } from "zod";
1483
- var OpenAIProviderOptionsSchema = z8.object({
1495
+ var openAIProviderOptionsSchema = z8.object({
1484
1496
  include: z8.array(z8.string()).nullish(),
1485
1497
  language: z8.string().nullish(),
1486
1498
  prompt: z8.string().nullish(),
@@ -1565,7 +1577,7 @@ var OpenAITranscriptionModel = class {
1565
1577
  const openAIOptions = parseProviderOptions2({
1566
1578
  provider: "openai",
1567
1579
  providerOptions,
1568
- schema: OpenAIProviderOptionsSchema
1580
+ schema: openAIProviderOptionsSchema
1569
1581
  });
1570
1582
  const formData = new FormData();
1571
1583
  const blob = audio instanceof Uint8Array ? new Blob([audio]) : new Blob([convertBase64ToUint8Array(audio)]);
@@ -1994,7 +2006,7 @@ var OpenAIResponsesLanguageModel = class {
1994
2006
  };
1995
2007
  }
1996
2008
  async doGenerate(options) {
1997
- var _a, _b, _c, _d, _e;
2009
+ var _a, _b, _c, _d, _e, _f, _g, _h;
1998
2010
  const { args: body, warnings } = this.getArgs(options);
1999
2011
  const {
2000
2012
  responseHeaders,
@@ -2058,31 +2070,45 @@ var OpenAIResponsesLanguageModel = class {
2058
2070
  abortSignal: options.abortSignal,
2059
2071
  fetch: this.config.fetch
2060
2072
  });
2061
- const outputTextElements = response.output.filter((output) => output.type === "message").flatMap((output) => output.content).filter((content) => content.type === "output_text");
2062
- const toolCalls = response.output.filter((output) => output.type === "function_call").map((output) => ({
2063
- toolCallType: "function",
2064
- toolCallId: output.call_id,
2065
- toolName: output.name,
2066
- args: output.arguments
2067
- }));
2073
+ const content = [];
2074
+ for (const part of response.output) {
2075
+ switch (part.type) {
2076
+ case "message": {
2077
+ for (const contentPart of part.content) {
2078
+ content.push({
2079
+ type: "text",
2080
+ text: contentPart.text
2081
+ });
2082
+ for (const annotation of contentPart.annotations) {
2083
+ content.push({
2084
+ type: "source",
2085
+ sourceType: "url",
2086
+ id: (_c = (_b = (_a = this.config).generateId) == null ? void 0 : _b.call(_a)) != null ? _c : generateId2(),
2087
+ url: annotation.url,
2088
+ title: annotation.title
2089
+ });
2090
+ }
2091
+ }
2092
+ break;
2093
+ }
2094
+ case "function_call": {
2095
+ content.push({
2096
+ type: "tool-call",
2097
+ toolCallType: "function",
2098
+ toolCallId: part.call_id,
2099
+ toolName: part.name,
2100
+ args: part.arguments
2101
+ });
2102
+ break;
2103
+ }
2104
+ }
2105
+ }
2068
2106
  return {
2069
- text: outputTextElements.map((content) => content.text).join("\n"),
2070
- sources: outputTextElements.flatMap(
2071
- (content) => content.annotations.map((annotation) => {
2072
- var _a2, _b2, _c2;
2073
- return {
2074
- sourceType: "url",
2075
- id: (_c2 = (_b2 = (_a2 = this.config).generateId) == null ? void 0 : _b2.call(_a2)) != null ? _c2 : generateId2(),
2076
- url: annotation.url,
2077
- title: annotation.title
2078
- };
2079
- })
2080
- ),
2107
+ content,
2081
2108
  finishReason: mapOpenAIResponseFinishReason({
2082
- finishReason: (_a = response.incomplete_details) == null ? void 0 : _a.reason,
2083
- hasToolCalls: toolCalls.length > 0
2109
+ finishReason: (_d = response.incomplete_details) == null ? void 0 : _d.reason,
2110
+ hasToolCalls: content.some((part) => part.type === "tool-call")
2084
2111
  }),
2085
- toolCalls: toolCalls.length > 0 ? toolCalls : void 0,
2086
2112
  usage: {
2087
2113
  inputTokens: response.usage.input_tokens,
2088
2114
  outputTokens: response.usage.output_tokens
@@ -2098,8 +2124,8 @@ var OpenAIResponsesLanguageModel = class {
2098
2124
  providerMetadata: {
2099
2125
  openai: {
2100
2126
  responseId: response.id,
2101
- cachedPromptTokens: (_c = (_b = response.usage.input_tokens_details) == null ? void 0 : _b.cached_tokens) != null ? _c : null,
2102
- reasoningTokens: (_e = (_d = response.usage.output_tokens_details) == null ? void 0 : _d.reasoning_tokens) != null ? _e : null
2127
+ cachedPromptTokens: (_f = (_e = response.usage.input_tokens_details) == null ? void 0 : _e.cached_tokens) != null ? _f : null,
2128
+ reasoningTokens: (_h = (_g = response.usage.output_tokens_details) == null ? void 0 : _g.reasoning_tokens) != null ? _h : null
2103
2129
  }
2104
2130
  },
2105
2131
  warnings
@@ -2138,6 +2164,9 @@ var OpenAIResponsesLanguageModel = class {
2138
2164
  return {
2139
2165
  stream: response.pipeThrough(
2140
2166
  new TransformStream({
2167
+ start(controller) {
2168
+ controller.enqueue({ type: "stream-start", warnings });
2169
+ },
2141
2170
  transform(chunk, controller) {
2142
2171
  var _a, _b, _c, _d, _e, _f, _g, _h;
2143
2172
  if (!chunk.success) {
@@ -2181,8 +2210,8 @@ var OpenAIResponsesLanguageModel = class {
2181
2210
  });
2182
2211
  } else if (isTextDeltaChunk(value)) {
2183
2212
  controller.enqueue({
2184
- type: "text-delta",
2185
- textDelta: value.delta
2213
+ type: "text",
2214
+ text: value.delta
2186
2215
  });
2187
2216
  } else if (isResponseOutputItemDoneChunk(value) && value.item.type === "function_call") {
2188
2217
  ongoingToolCalls[value.output_index] = void 0;
@@ -2206,12 +2235,10 @@ var OpenAIResponsesLanguageModel = class {
2206
2235
  } else if (isResponseAnnotationAddedChunk(value)) {
2207
2236
  controller.enqueue({
2208
2237
  type: "source",
2209
- source: {
2210
- sourceType: "url",
2211
- id: (_h = (_g = (_f = self.config).generateId) == null ? void 0 : _g.call(_f)) != null ? _h : generateId2(),
2212
- url: value.annotation.url,
2213
- title: value.annotation.title
2214
- }
2238
+ sourceType: "url",
2239
+ id: (_h = (_g = (_f = self.config).generateId) == null ? void 0 : _g.call(_f)) != null ? _h : generateId2(),
2240
+ url: value.annotation.url,
2241
+ title: value.annotation.title
2215
2242
  });
2216
2243
  }
2217
2244
  },
@@ -2234,8 +2261,7 @@ var OpenAIResponsesLanguageModel = class {
2234
2261
  })
2235
2262
  ),
2236
2263
  request: { body },
2237
- response: { headers: responseHeaders },
2238
- warnings
2264
+ response: { headers: responseHeaders }
2239
2265
  };
2240
2266
  }
2241
2267
  };
@@ -2375,6 +2401,110 @@ var openaiResponsesProviderOptionsSchema = z9.object({
2375
2401
  instructions: z9.string().nullish()
2376
2402
  });
2377
2403
 
2404
+ // src/openai-speech-model.ts
2405
+ import {
2406
+ combineHeaders as combineHeaders7,
2407
+ createBinaryResponseHandler,
2408
+ parseProviderOptions as parseProviderOptions4,
2409
+ postJsonToApi as postJsonToApi6
2410
+ } from "@ai-sdk/provider-utils";
2411
+ import { z as z10 } from "zod";
2412
+ var OpenAIProviderOptionsSchema = z10.object({
2413
+ instructions: z10.string().nullish(),
2414
+ speed: z10.number().min(0.25).max(4).default(1).nullish()
2415
+ });
2416
+ var OpenAISpeechModel = class {
2417
+ constructor(modelId, config) {
2418
+ this.modelId = modelId;
2419
+ this.config = config;
2420
+ this.specificationVersion = "v1";
2421
+ }
2422
+ get provider() {
2423
+ return this.config.provider;
2424
+ }
2425
+ getArgs({
2426
+ text,
2427
+ voice = "alloy",
2428
+ outputFormat = "mp3",
2429
+ speed,
2430
+ instructions,
2431
+ providerOptions
2432
+ }) {
2433
+ const warnings = [];
2434
+ const openAIOptions = parseProviderOptions4({
2435
+ provider: "openai",
2436
+ providerOptions,
2437
+ schema: OpenAIProviderOptionsSchema
2438
+ });
2439
+ const requestBody = {
2440
+ model: this.modelId,
2441
+ input: text,
2442
+ voice,
2443
+ response_format: "mp3",
2444
+ speed,
2445
+ instructions
2446
+ };
2447
+ if (outputFormat) {
2448
+ if (["mp3", "opus", "aac", "flac", "wav", "pcm"].includes(outputFormat)) {
2449
+ requestBody.response_format = outputFormat;
2450
+ } else {
2451
+ warnings.push({
2452
+ type: "unsupported-setting",
2453
+ setting: "outputFormat",
2454
+ details: `Unsupported output format: ${outputFormat}. Using mp3 instead.`
2455
+ });
2456
+ }
2457
+ }
2458
+ if (openAIOptions) {
2459
+ const speechModelOptions = {};
2460
+ for (const key in speechModelOptions) {
2461
+ const value = speechModelOptions[key];
2462
+ if (value !== void 0) {
2463
+ requestBody[key] = value;
2464
+ }
2465
+ }
2466
+ }
2467
+ return {
2468
+ requestBody,
2469
+ warnings
2470
+ };
2471
+ }
2472
+ async doGenerate(options) {
2473
+ var _a, _b, _c;
2474
+ const currentDate = (_c = (_b = (_a = this.config._internal) == null ? void 0 : _a.currentDate) == null ? void 0 : _b.call(_a)) != null ? _c : /* @__PURE__ */ new Date();
2475
+ const { requestBody, warnings } = this.getArgs(options);
2476
+ const {
2477
+ value: audio,
2478
+ responseHeaders,
2479
+ rawValue: rawResponse
2480
+ } = await postJsonToApi6({
2481
+ url: this.config.url({
2482
+ path: "/audio/speech",
2483
+ modelId: this.modelId
2484
+ }),
2485
+ headers: combineHeaders7(this.config.headers(), options.headers),
2486
+ body: requestBody,
2487
+ failedResponseHandler: openaiFailedResponseHandler,
2488
+ successfulResponseHandler: createBinaryResponseHandler(),
2489
+ abortSignal: options.abortSignal,
2490
+ fetch: this.config.fetch
2491
+ });
2492
+ return {
2493
+ audio,
2494
+ warnings,
2495
+ request: {
2496
+ body: JSON.stringify(requestBody)
2497
+ },
2498
+ response: {
2499
+ timestamp: currentDate,
2500
+ modelId: this.modelId,
2501
+ headers: responseHeaders,
2502
+ body: rawResponse
2503
+ }
2504
+ };
2505
+ }
2506
+ };
2507
+
2378
2508
  // src/openai-provider.ts
2379
2509
  function createOpenAI(options = {}) {
2380
2510
  var _a, _b, _c;
@@ -2423,6 +2553,12 @@ function createOpenAI(options = {}) {
2423
2553
  headers: getHeaders,
2424
2554
  fetch: options.fetch
2425
2555
  });
2556
+ const createSpeechModel = (modelId) => new OpenAISpeechModel(modelId, {
2557
+ provider: `${providerName}.speech`,
2558
+ url: ({ path }) => `${baseURL}${path}`,
2559
+ headers: getHeaders,
2560
+ fetch: options.fetch
2561
+ });
2426
2562
  const createLanguageModel = (modelId, settings) => {
2427
2563
  if (new.target) {
2428
2564
  throw new Error(
@@ -2459,6 +2595,8 @@ function createOpenAI(options = {}) {
2459
2595
  provider.imageModel = createImageModel;
2460
2596
  provider.transcription = createTranscriptionModel;
2461
2597
  provider.transcriptionModel = createTranscriptionModel;
2598
+ provider.speech = createSpeechModel;
2599
+ provider.speechModel = createSpeechModel;
2462
2600
  provider.tools = openaiTools;
2463
2601
  return provider;
2464
2602
  }