@ai-sdk/openai 2.0.0-canary.7 → 2.0.0-canary.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs CHANGED
@@ -550,7 +550,7 @@ var OpenAIChatLanguageModel = class {
550
550
  };
551
551
  }
552
552
  async doGenerate(options) {
553
- var _a, _b, _c, _d, _e, _f, _g;
553
+ var _a, _b, _c, _d, _e, _f, _g, _h;
554
554
  const { args: body, warnings } = this.getArgs(options);
555
555
  const {
556
556
  responseHeaders,
@@ -570,10 +570,23 @@ var OpenAIChatLanguageModel = class {
570
570
  abortSignal: options.abortSignal,
571
571
  fetch: this.config.fetch
572
572
  });
573
- const { messages: rawPrompt, ...rawSettings } = body;
574
573
  const choice = response.choices[0];
575
- const completionTokenDetails = (_a = response.usage) == null ? void 0 : _a.completion_tokens_details;
576
- const promptTokenDetails = (_b = response.usage) == null ? void 0 : _b.prompt_tokens_details;
574
+ const content = [];
575
+ const text = choice.message.content;
576
+ if (text != null && text.length > 0) {
577
+ content.push({ type: "text", text });
578
+ }
579
+ for (const toolCall of (_a = choice.message.tool_calls) != null ? _a : []) {
580
+ content.push({
581
+ type: "tool-call",
582
+ toolCallType: "function",
583
+ toolCallId: (_b = toolCall.id) != null ? _b : generateId(),
584
+ toolName: toolCall.function.name,
585
+ args: toolCall.function.arguments
586
+ });
587
+ }
588
+ const completionTokenDetails = (_c = response.usage) == null ? void 0 : _c.completion_tokens_details;
589
+ const promptTokenDetails = (_d = response.usage) == null ? void 0 : _d.prompt_tokens_details;
577
590
  const providerMetadata = { openai: {} };
578
591
  if ((completionTokenDetails == null ? void 0 : completionTokenDetails.reasoning_tokens) != null) {
579
592
  providerMetadata.openai.reasoningTokens = completionTokenDetails == null ? void 0 : completionTokenDetails.reasoning_tokens;
@@ -588,21 +601,11 @@ var OpenAIChatLanguageModel = class {
588
601
  providerMetadata.openai.cachedPromptTokens = promptTokenDetails == null ? void 0 : promptTokenDetails.cached_tokens;
589
602
  }
590
603
  return {
591
- text: choice.message.content != null ? { type: "text", text: choice.message.content } : void 0,
592
- toolCalls: (_c = choice.message.tool_calls) == null ? void 0 : _c.map((toolCall) => {
593
- var _a2;
594
- return {
595
- type: "tool-call",
596
- toolCallType: "function",
597
- toolCallId: (_a2 = toolCall.id) != null ? _a2 : generateId(),
598
- toolName: toolCall.function.name,
599
- args: toolCall.function.arguments
600
- };
601
- }),
604
+ content,
602
605
  finishReason: mapOpenAIFinishReason(choice.finish_reason),
603
606
  usage: {
604
- inputTokens: (_e = (_d = response.usage) == null ? void 0 : _d.prompt_tokens) != null ? _e : void 0,
605
- outputTokens: (_g = (_f = response.usage) == null ? void 0 : _f.completion_tokens) != null ? _g : void 0
607
+ inputTokens: (_f = (_e = response.usage) == null ? void 0 : _e.prompt_tokens) != null ? _f : void 0,
608
+ outputTokens: (_h = (_g = response.usage) == null ? void 0 : _g.completion_tokens) != null ? _h : void 0
606
609
  },
607
610
  request: { body },
608
611
  response: {
@@ -650,6 +653,9 @@ var OpenAIChatLanguageModel = class {
650
653
  return {
651
654
  stream: response.pipeThrough(
652
655
  new TransformStream({
656
+ start(controller) {
657
+ controller.enqueue({ type: "stream-start", warnings });
658
+ },
653
659
  transform(chunk, controller) {
654
660
  var _a, _b, _c, _d, _e, _f, _g, _h, _i, _j, _k, _l;
655
661
  if (!chunk.success) {
@@ -807,8 +813,7 @@ var OpenAIChatLanguageModel = class {
807
813
  })
808
814
  ),
809
815
  request: { body },
810
- response: { headers: responseHeaders },
811
- warnings
816
+ response: { headers: responseHeaders }
812
817
  };
813
818
  }
814
819
  };
@@ -1140,7 +1145,7 @@ var OpenAICompletionLanguageModel = class {
1140
1145
  });
1141
1146
  const choice = response.choices[0];
1142
1147
  return {
1143
- text: { type: "text", text: choice.text },
1148
+ content: [{ type: "text", text: choice.text }],
1144
1149
  usage: {
1145
1150
  inputTokens: response.usage.prompt_tokens,
1146
1151
  outputTokens: response.usage.completion_tokens
@@ -1188,6 +1193,9 @@ var OpenAICompletionLanguageModel = class {
1188
1193
  return {
1189
1194
  stream: response.pipeThrough(
1190
1195
  new TransformStream({
1196
+ start(controller) {
1197
+ controller.enqueue({ type: "stream-start", warnings });
1198
+ },
1191
1199
  transform(chunk, controller) {
1192
1200
  if (!chunk.success) {
1193
1201
  finishReason = "error";
@@ -1239,9 +1247,8 @@ var OpenAICompletionLanguageModel = class {
1239
1247
  }
1240
1248
  })
1241
1249
  ),
1242
- response: { headers: responseHeaders },
1243
- warnings,
1244
- request: { body: JSON.stringify(body) }
1250
+ request: { body },
1251
+ response: { headers: responseHeaders }
1245
1252
  };
1246
1253
  }
1247
1254
  };
@@ -1999,7 +2006,7 @@ var OpenAIResponsesLanguageModel = class {
1999
2006
  };
2000
2007
  }
2001
2008
  async doGenerate(options) {
2002
- var _a, _b, _c, _d, _e;
2009
+ var _a, _b, _c, _d, _e, _f, _g, _h;
2003
2010
  const { args: body, warnings } = this.getArgs(options);
2004
2011
  const {
2005
2012
  responseHeaders,
@@ -2063,36 +2070,45 @@ var OpenAIResponsesLanguageModel = class {
2063
2070
  abortSignal: options.abortSignal,
2064
2071
  fetch: this.config.fetch
2065
2072
  });
2066
- const outputTextElements = response.output.filter((output) => output.type === "message").flatMap((output) => output.content).filter((content) => content.type === "output_text");
2067
- const toolCalls = response.output.filter((output) => output.type === "function_call").map((output) => ({
2068
- type: "tool-call",
2069
- toolCallType: "function",
2070
- toolCallId: output.call_id,
2071
- toolName: output.name,
2072
- args: output.arguments
2073
- }));
2073
+ const content = [];
2074
+ for (const part of response.output) {
2075
+ switch (part.type) {
2076
+ case "message": {
2077
+ for (const contentPart of part.content) {
2078
+ content.push({
2079
+ type: "text",
2080
+ text: contentPart.text
2081
+ });
2082
+ for (const annotation of contentPart.annotations) {
2083
+ content.push({
2084
+ type: "source",
2085
+ sourceType: "url",
2086
+ id: (_c = (_b = (_a = this.config).generateId) == null ? void 0 : _b.call(_a)) != null ? _c : generateId2(),
2087
+ url: annotation.url,
2088
+ title: annotation.title
2089
+ });
2090
+ }
2091
+ }
2092
+ break;
2093
+ }
2094
+ case "function_call": {
2095
+ content.push({
2096
+ type: "tool-call",
2097
+ toolCallType: "function",
2098
+ toolCallId: part.call_id,
2099
+ toolName: part.name,
2100
+ args: part.arguments
2101
+ });
2102
+ break;
2103
+ }
2104
+ }
2105
+ }
2074
2106
  return {
2075
- text: {
2076
- type: "text",
2077
- text: outputTextElements.map((content) => content.text).join("\n")
2078
- },
2079
- sources: outputTextElements.flatMap(
2080
- (content) => content.annotations.map((annotation) => {
2081
- var _a2, _b2, _c2;
2082
- return {
2083
- type: "source",
2084
- sourceType: "url",
2085
- id: (_c2 = (_b2 = (_a2 = this.config).generateId) == null ? void 0 : _b2.call(_a2)) != null ? _c2 : generateId2(),
2086
- url: annotation.url,
2087
- title: annotation.title
2088
- };
2089
- })
2090
- ),
2107
+ content,
2091
2108
  finishReason: mapOpenAIResponseFinishReason({
2092
- finishReason: (_a = response.incomplete_details) == null ? void 0 : _a.reason,
2093
- hasToolCalls: toolCalls.length > 0
2109
+ finishReason: (_d = response.incomplete_details) == null ? void 0 : _d.reason,
2110
+ hasToolCalls: content.some((part) => part.type === "tool-call")
2094
2111
  }),
2095
- toolCalls: toolCalls.length > 0 ? toolCalls : void 0,
2096
2112
  usage: {
2097
2113
  inputTokens: response.usage.input_tokens,
2098
2114
  outputTokens: response.usage.output_tokens
@@ -2108,8 +2124,8 @@ var OpenAIResponsesLanguageModel = class {
2108
2124
  providerMetadata: {
2109
2125
  openai: {
2110
2126
  responseId: response.id,
2111
- cachedPromptTokens: (_c = (_b = response.usage.input_tokens_details) == null ? void 0 : _b.cached_tokens) != null ? _c : null,
2112
- reasoningTokens: (_e = (_d = response.usage.output_tokens_details) == null ? void 0 : _d.reasoning_tokens) != null ? _e : null
2127
+ cachedPromptTokens: (_f = (_e = response.usage.input_tokens_details) == null ? void 0 : _e.cached_tokens) != null ? _f : null,
2128
+ reasoningTokens: (_h = (_g = response.usage.output_tokens_details) == null ? void 0 : _g.reasoning_tokens) != null ? _h : null
2113
2129
  }
2114
2130
  },
2115
2131
  warnings
@@ -2148,6 +2164,9 @@ var OpenAIResponsesLanguageModel = class {
2148
2164
  return {
2149
2165
  stream: response.pipeThrough(
2150
2166
  new TransformStream({
2167
+ start(controller) {
2168
+ controller.enqueue({ type: "stream-start", warnings });
2169
+ },
2151
2170
  transform(chunk, controller) {
2152
2171
  var _a, _b, _c, _d, _e, _f, _g, _h;
2153
2172
  if (!chunk.success) {
@@ -2242,8 +2261,7 @@ var OpenAIResponsesLanguageModel = class {
2242
2261
  })
2243
2262
  ),
2244
2263
  request: { body },
2245
- response: { headers: responseHeaders },
2246
- warnings
2264
+ response: { headers: responseHeaders }
2247
2265
  };
2248
2266
  }
2249
2267
  };
@@ -2383,6 +2401,110 @@ var openaiResponsesProviderOptionsSchema = z9.object({
2383
2401
  instructions: z9.string().nullish()
2384
2402
  });
2385
2403
 
2404
+ // src/openai-speech-model.ts
2405
+ import {
2406
+ combineHeaders as combineHeaders7,
2407
+ createBinaryResponseHandler,
2408
+ parseProviderOptions as parseProviderOptions4,
2409
+ postJsonToApi as postJsonToApi6
2410
+ } from "@ai-sdk/provider-utils";
2411
+ import { z as z10 } from "zod";
2412
+ var OpenAIProviderOptionsSchema = z10.object({
2413
+ instructions: z10.string().nullish(),
2414
+ speed: z10.number().min(0.25).max(4).default(1).nullish()
2415
+ });
2416
+ var OpenAISpeechModel = class {
2417
+ constructor(modelId, config) {
2418
+ this.modelId = modelId;
2419
+ this.config = config;
2420
+ this.specificationVersion = "v1";
2421
+ }
2422
+ get provider() {
2423
+ return this.config.provider;
2424
+ }
2425
+ getArgs({
2426
+ text,
2427
+ voice = "alloy",
2428
+ outputFormat = "mp3",
2429
+ speed,
2430
+ instructions,
2431
+ providerOptions
2432
+ }) {
2433
+ const warnings = [];
2434
+ const openAIOptions = parseProviderOptions4({
2435
+ provider: "openai",
2436
+ providerOptions,
2437
+ schema: OpenAIProviderOptionsSchema
2438
+ });
2439
+ const requestBody = {
2440
+ model: this.modelId,
2441
+ input: text,
2442
+ voice,
2443
+ response_format: "mp3",
2444
+ speed,
2445
+ instructions
2446
+ };
2447
+ if (outputFormat) {
2448
+ if (["mp3", "opus", "aac", "flac", "wav", "pcm"].includes(outputFormat)) {
2449
+ requestBody.response_format = outputFormat;
2450
+ } else {
2451
+ warnings.push({
2452
+ type: "unsupported-setting",
2453
+ setting: "outputFormat",
2454
+ details: `Unsupported output format: ${outputFormat}. Using mp3 instead.`
2455
+ });
2456
+ }
2457
+ }
2458
+ if (openAIOptions) {
2459
+ const speechModelOptions = {};
2460
+ for (const key in speechModelOptions) {
2461
+ const value = speechModelOptions[key];
2462
+ if (value !== void 0) {
2463
+ requestBody[key] = value;
2464
+ }
2465
+ }
2466
+ }
2467
+ return {
2468
+ requestBody,
2469
+ warnings
2470
+ };
2471
+ }
2472
+ async doGenerate(options) {
2473
+ var _a, _b, _c;
2474
+ const currentDate = (_c = (_b = (_a = this.config._internal) == null ? void 0 : _a.currentDate) == null ? void 0 : _b.call(_a)) != null ? _c : /* @__PURE__ */ new Date();
2475
+ const { requestBody, warnings } = this.getArgs(options);
2476
+ const {
2477
+ value: audio,
2478
+ responseHeaders,
2479
+ rawValue: rawResponse
2480
+ } = await postJsonToApi6({
2481
+ url: this.config.url({
2482
+ path: "/audio/speech",
2483
+ modelId: this.modelId
2484
+ }),
2485
+ headers: combineHeaders7(this.config.headers(), options.headers),
2486
+ body: requestBody,
2487
+ failedResponseHandler: openaiFailedResponseHandler,
2488
+ successfulResponseHandler: createBinaryResponseHandler(),
2489
+ abortSignal: options.abortSignal,
2490
+ fetch: this.config.fetch
2491
+ });
2492
+ return {
2493
+ audio,
2494
+ warnings,
2495
+ request: {
2496
+ body: JSON.stringify(requestBody)
2497
+ },
2498
+ response: {
2499
+ timestamp: currentDate,
2500
+ modelId: this.modelId,
2501
+ headers: responseHeaders,
2502
+ body: rawResponse
2503
+ }
2504
+ };
2505
+ }
2506
+ };
2507
+
2386
2508
  // src/openai-provider.ts
2387
2509
  function createOpenAI(options = {}) {
2388
2510
  var _a, _b, _c;
@@ -2431,6 +2553,12 @@ function createOpenAI(options = {}) {
2431
2553
  headers: getHeaders,
2432
2554
  fetch: options.fetch
2433
2555
  });
2556
+ const createSpeechModel = (modelId) => new OpenAISpeechModel(modelId, {
2557
+ provider: `${providerName}.speech`,
2558
+ url: ({ path }) => `${baseURL}${path}`,
2559
+ headers: getHeaders,
2560
+ fetch: options.fetch
2561
+ });
2434
2562
  const createLanguageModel = (modelId, settings) => {
2435
2563
  if (new.target) {
2436
2564
  throw new Error(
@@ -2467,6 +2595,8 @@ function createOpenAI(options = {}) {
2467
2595
  provider.imageModel = createImageModel;
2468
2596
  provider.transcription = createTranscriptionModel;
2469
2597
  provider.transcriptionModel = createTranscriptionModel;
2598
+ provider.speech = createSpeechModel;
2599
+ provider.speechModel = createSpeechModel;
2470
2600
  provider.tools = openaiTools;
2471
2601
  return provider;
2472
2602
  }