@ai-sdk/openai 2.0.0-canary.7 → 2.0.0-canary.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,17 @@
1
1
  # @ai-sdk/openai
2
2
 
3
+ ## 2.0.0-canary.8
4
+
5
+ ### Patch Changes
6
+
7
+ - 8aa9e20: feat: add speech with experimental_generateSpeech
8
+ - Updated dependencies [5d142ab]
9
+ - Updated dependencies [b6b43c7]
10
+ - Updated dependencies [8aa9e20]
11
+ - Updated dependencies [3795467]
12
+ - @ai-sdk/provider-utils@3.0.0-canary.8
13
+ - @ai-sdk/provider@2.0.0-canary.7
14
+
3
15
  ## 2.0.0-canary.7
4
16
 
5
17
  ### Patch Changes
package/dist/index.d.mts CHANGED
@@ -1,4 +1,4 @@
1
- import { LanguageModelV2, ProviderV2, EmbeddingModelV2, ImageModelV1, TranscriptionModelV1 } from '@ai-sdk/provider';
1
+ import { LanguageModelV2, ProviderV2, EmbeddingModelV2, ImageModelV1, TranscriptionModelV1, SpeechModelV1 } from '@ai-sdk/provider';
2
2
  import { FetchFunction } from '@ai-sdk/provider-utils';
3
3
  import { z } from 'zod';
4
4
 
@@ -142,6 +142,8 @@ type OpenAITranscriptionModelId = 'whisper-1' | 'gpt-4o-mini-transcribe' | 'gpt-
142
142
 
143
143
  type OpenAIResponsesModelId = 'o1' | 'o1-2024-12-17' | 'o1-mini' | 'o1-mini-2024-09-12' | 'o1-preview' | 'o1-preview-2024-09-12' | 'o3-mini' | 'o3-mini-2025-01-31' | 'gpt-4o' | 'gpt-4o-2024-05-13' | 'gpt-4o-2024-08-06' | 'gpt-4o-2024-11-20' | 'gpt-4o-mini' | 'gpt-4o-mini-2024-07-18' | 'gpt-4-turbo' | 'gpt-4-turbo-2024-04-09' | 'gpt-4-turbo-preview' | 'gpt-4-0125-preview' | 'gpt-4-1106-preview' | 'gpt-4' | 'gpt-4-0613' | 'gpt-4.5-preview' | 'gpt-4.5-preview-2025-02-27' | 'gpt-3.5-turbo-0125' | 'gpt-3.5-turbo' | 'gpt-3.5-turbo-1106' | 'chatgpt-4o-latest' | (string & {});
144
144
 
145
+ type OpenAISpeechModelId = 'tts-1' | 'tts-1-hd' | 'gpt-4o-mini-tts' | (string & {});
146
+
145
147
  interface OpenAIProvider extends ProviderV2 {
146
148
  (modelId: 'gpt-3.5-turbo-instruct', settings?: OpenAICompletionSettings): OpenAICompletionLanguageModel;
147
149
  (modelId: OpenAIChatModelId, settings?: OpenAIChatSettings): LanguageModelV2;
@@ -189,6 +191,10 @@ interface OpenAIProvider extends ProviderV2 {
189
191
  */
190
192
  transcription(modelId: OpenAITranscriptionModelId): TranscriptionModelV1;
191
193
  /**
194
+ Creates a model for speech generation.
195
+ */
196
+ speech(modelId: OpenAISpeechModelId): SpeechModelV1;
197
+ /**
192
198
  OpenAI-specific tools.
193
199
  */
194
200
  tools: typeof openaiTools;
package/dist/index.d.ts CHANGED
@@ -1,4 +1,4 @@
1
- import { LanguageModelV2, ProviderV2, EmbeddingModelV2, ImageModelV1, TranscriptionModelV1 } from '@ai-sdk/provider';
1
+ import { LanguageModelV2, ProviderV2, EmbeddingModelV2, ImageModelV1, TranscriptionModelV1, SpeechModelV1 } from '@ai-sdk/provider';
2
2
  import { FetchFunction } from '@ai-sdk/provider-utils';
3
3
  import { z } from 'zod';
4
4
 
@@ -142,6 +142,8 @@ type OpenAITranscriptionModelId = 'whisper-1' | 'gpt-4o-mini-transcribe' | 'gpt-
142
142
 
143
143
  type OpenAIResponsesModelId = 'o1' | 'o1-2024-12-17' | 'o1-mini' | 'o1-mini-2024-09-12' | 'o1-preview' | 'o1-preview-2024-09-12' | 'o3-mini' | 'o3-mini-2025-01-31' | 'gpt-4o' | 'gpt-4o-2024-05-13' | 'gpt-4o-2024-08-06' | 'gpt-4o-2024-11-20' | 'gpt-4o-mini' | 'gpt-4o-mini-2024-07-18' | 'gpt-4-turbo' | 'gpt-4-turbo-2024-04-09' | 'gpt-4-turbo-preview' | 'gpt-4-0125-preview' | 'gpt-4-1106-preview' | 'gpt-4' | 'gpt-4-0613' | 'gpt-4.5-preview' | 'gpt-4.5-preview-2025-02-27' | 'gpt-3.5-turbo-0125' | 'gpt-3.5-turbo' | 'gpt-3.5-turbo-1106' | 'chatgpt-4o-latest' | (string & {});
144
144
 
145
+ type OpenAISpeechModelId = 'tts-1' | 'tts-1-hd' | 'gpt-4o-mini-tts' | (string & {});
146
+
145
147
  interface OpenAIProvider extends ProviderV2 {
146
148
  (modelId: 'gpt-3.5-turbo-instruct', settings?: OpenAICompletionSettings): OpenAICompletionLanguageModel;
147
149
  (modelId: OpenAIChatModelId, settings?: OpenAIChatSettings): LanguageModelV2;
@@ -189,6 +191,10 @@ interface OpenAIProvider extends ProviderV2 {
189
191
  */
190
192
  transcription(modelId: OpenAITranscriptionModelId): TranscriptionModelV1;
191
193
  /**
194
+ Creates a model for speech generation.
195
+ */
196
+ speech(modelId: OpenAISpeechModelId): SpeechModelV1;
197
+ /**
192
198
  OpenAI-specific tools.
193
199
  */
194
200
  tools: typeof openaiTools;
package/dist/index.js CHANGED
@@ -26,7 +26,7 @@ __export(src_exports, {
26
26
  module.exports = __toCommonJS(src_exports);
27
27
 
28
28
  // src/openai-provider.ts
29
- var import_provider_utils9 = require("@ai-sdk/provider-utils");
29
+ var import_provider_utils10 = require("@ai-sdk/provider-utils");
30
30
 
31
31
  // src/openai-chat-language-model.ts
32
32
  var import_provider3 = require("@ai-sdk/provider");
@@ -560,7 +560,7 @@ var OpenAIChatLanguageModel = class {
560
560
  };
561
561
  }
562
562
  async doGenerate(options) {
563
- var _a, _b, _c, _d, _e, _f, _g;
563
+ var _a, _b, _c, _d, _e, _f, _g, _h;
564
564
  const { args: body, warnings } = this.getArgs(options);
565
565
  const {
566
566
  responseHeaders,
@@ -580,10 +580,23 @@ var OpenAIChatLanguageModel = class {
580
580
  abortSignal: options.abortSignal,
581
581
  fetch: this.config.fetch
582
582
  });
583
- const { messages: rawPrompt, ...rawSettings } = body;
584
583
  const choice = response.choices[0];
585
- const completionTokenDetails = (_a = response.usage) == null ? void 0 : _a.completion_tokens_details;
586
- const promptTokenDetails = (_b = response.usage) == null ? void 0 : _b.prompt_tokens_details;
584
+ const content = [];
585
+ const text = choice.message.content;
586
+ if (text != null && text.length > 0) {
587
+ content.push({ type: "text", text });
588
+ }
589
+ for (const toolCall of (_a = choice.message.tool_calls) != null ? _a : []) {
590
+ content.push({
591
+ type: "tool-call",
592
+ toolCallType: "function",
593
+ toolCallId: (_b = toolCall.id) != null ? _b : (0, import_provider_utils3.generateId)(),
594
+ toolName: toolCall.function.name,
595
+ args: toolCall.function.arguments
596
+ });
597
+ }
598
+ const completionTokenDetails = (_c = response.usage) == null ? void 0 : _c.completion_tokens_details;
599
+ const promptTokenDetails = (_d = response.usage) == null ? void 0 : _d.prompt_tokens_details;
587
600
  const providerMetadata = { openai: {} };
588
601
  if ((completionTokenDetails == null ? void 0 : completionTokenDetails.reasoning_tokens) != null) {
589
602
  providerMetadata.openai.reasoningTokens = completionTokenDetails == null ? void 0 : completionTokenDetails.reasoning_tokens;
@@ -598,21 +611,11 @@ var OpenAIChatLanguageModel = class {
598
611
  providerMetadata.openai.cachedPromptTokens = promptTokenDetails == null ? void 0 : promptTokenDetails.cached_tokens;
599
612
  }
600
613
  return {
601
- text: choice.message.content != null ? { type: "text", text: choice.message.content } : void 0,
602
- toolCalls: (_c = choice.message.tool_calls) == null ? void 0 : _c.map((toolCall) => {
603
- var _a2;
604
- return {
605
- type: "tool-call",
606
- toolCallType: "function",
607
- toolCallId: (_a2 = toolCall.id) != null ? _a2 : (0, import_provider_utils3.generateId)(),
608
- toolName: toolCall.function.name,
609
- args: toolCall.function.arguments
610
- };
611
- }),
614
+ content,
612
615
  finishReason: mapOpenAIFinishReason(choice.finish_reason),
613
616
  usage: {
614
- inputTokens: (_e = (_d = response.usage) == null ? void 0 : _d.prompt_tokens) != null ? _e : void 0,
615
- outputTokens: (_g = (_f = response.usage) == null ? void 0 : _f.completion_tokens) != null ? _g : void 0
617
+ inputTokens: (_f = (_e = response.usage) == null ? void 0 : _e.prompt_tokens) != null ? _f : void 0,
618
+ outputTokens: (_h = (_g = response.usage) == null ? void 0 : _g.completion_tokens) != null ? _h : void 0
616
619
  },
617
620
  request: { body },
618
621
  response: {
@@ -660,6 +663,9 @@ var OpenAIChatLanguageModel = class {
660
663
  return {
661
664
  stream: response.pipeThrough(
662
665
  new TransformStream({
666
+ start(controller) {
667
+ controller.enqueue({ type: "stream-start", warnings });
668
+ },
663
669
  transform(chunk, controller) {
664
670
  var _a, _b, _c, _d, _e, _f, _g, _h, _i, _j, _k, _l;
665
671
  if (!chunk.success) {
@@ -817,8 +823,7 @@ var OpenAIChatLanguageModel = class {
817
823
  })
818
824
  ),
819
825
  request: { body },
820
- response: { headers: responseHeaders },
821
- warnings
826
+ response: { headers: responseHeaders }
822
827
  };
823
828
  }
824
829
  };
@@ -1142,7 +1147,7 @@ var OpenAICompletionLanguageModel = class {
1142
1147
  });
1143
1148
  const choice = response.choices[0];
1144
1149
  return {
1145
- text: { type: "text", text: choice.text },
1150
+ content: [{ type: "text", text: choice.text }],
1146
1151
  usage: {
1147
1152
  inputTokens: response.usage.prompt_tokens,
1148
1153
  outputTokens: response.usage.completion_tokens
@@ -1190,6 +1195,9 @@ var OpenAICompletionLanguageModel = class {
1190
1195
  return {
1191
1196
  stream: response.pipeThrough(
1192
1197
  new TransformStream({
1198
+ start(controller) {
1199
+ controller.enqueue({ type: "stream-start", warnings });
1200
+ },
1193
1201
  transform(chunk, controller) {
1194
1202
  if (!chunk.success) {
1195
1203
  finishReason = "error";
@@ -1241,9 +1249,8 @@ var OpenAICompletionLanguageModel = class {
1241
1249
  }
1242
1250
  })
1243
1251
  ),
1244
- response: { headers: responseHeaders },
1245
- warnings,
1246
- request: { body: JSON.stringify(body) }
1252
+ request: { body },
1253
+ response: { headers: responseHeaders }
1247
1254
  };
1248
1255
  }
1249
1256
  };
@@ -1974,7 +1981,7 @@ var OpenAIResponsesLanguageModel = class {
1974
1981
  };
1975
1982
  }
1976
1983
  async doGenerate(options) {
1977
- var _a, _b, _c, _d, _e;
1984
+ var _a, _b, _c, _d, _e, _f, _g, _h;
1978
1985
  const { args: body, warnings } = this.getArgs(options);
1979
1986
  const {
1980
1987
  responseHeaders,
@@ -2038,36 +2045,45 @@ var OpenAIResponsesLanguageModel = class {
2038
2045
  abortSignal: options.abortSignal,
2039
2046
  fetch: this.config.fetch
2040
2047
  });
2041
- const outputTextElements = response.output.filter((output) => output.type === "message").flatMap((output) => output.content).filter((content) => content.type === "output_text");
2042
- const toolCalls = response.output.filter((output) => output.type === "function_call").map((output) => ({
2043
- type: "tool-call",
2044
- toolCallType: "function",
2045
- toolCallId: output.call_id,
2046
- toolName: output.name,
2047
- args: output.arguments
2048
- }));
2048
+ const content = [];
2049
+ for (const part of response.output) {
2050
+ switch (part.type) {
2051
+ case "message": {
2052
+ for (const contentPart of part.content) {
2053
+ content.push({
2054
+ type: "text",
2055
+ text: contentPart.text
2056
+ });
2057
+ for (const annotation of contentPart.annotations) {
2058
+ content.push({
2059
+ type: "source",
2060
+ sourceType: "url",
2061
+ id: (_c = (_b = (_a = this.config).generateId) == null ? void 0 : _b.call(_a)) != null ? _c : (0, import_provider_utils8.generateId)(),
2062
+ url: annotation.url,
2063
+ title: annotation.title
2064
+ });
2065
+ }
2066
+ }
2067
+ break;
2068
+ }
2069
+ case "function_call": {
2070
+ content.push({
2071
+ type: "tool-call",
2072
+ toolCallType: "function",
2073
+ toolCallId: part.call_id,
2074
+ toolName: part.name,
2075
+ args: part.arguments
2076
+ });
2077
+ break;
2078
+ }
2079
+ }
2080
+ }
2049
2081
  return {
2050
- text: {
2051
- type: "text",
2052
- text: outputTextElements.map((content) => content.text).join("\n")
2053
- },
2054
- sources: outputTextElements.flatMap(
2055
- (content) => content.annotations.map((annotation) => {
2056
- var _a2, _b2, _c2;
2057
- return {
2058
- type: "source",
2059
- sourceType: "url",
2060
- id: (_c2 = (_b2 = (_a2 = this.config).generateId) == null ? void 0 : _b2.call(_a2)) != null ? _c2 : (0, import_provider_utils8.generateId)(),
2061
- url: annotation.url,
2062
- title: annotation.title
2063
- };
2064
- })
2065
- ),
2082
+ content,
2066
2083
  finishReason: mapOpenAIResponseFinishReason({
2067
- finishReason: (_a = response.incomplete_details) == null ? void 0 : _a.reason,
2068
- hasToolCalls: toolCalls.length > 0
2084
+ finishReason: (_d = response.incomplete_details) == null ? void 0 : _d.reason,
2085
+ hasToolCalls: content.some((part) => part.type === "tool-call")
2069
2086
  }),
2070
- toolCalls: toolCalls.length > 0 ? toolCalls : void 0,
2071
2087
  usage: {
2072
2088
  inputTokens: response.usage.input_tokens,
2073
2089
  outputTokens: response.usage.output_tokens
@@ -2083,8 +2099,8 @@ var OpenAIResponsesLanguageModel = class {
2083
2099
  providerMetadata: {
2084
2100
  openai: {
2085
2101
  responseId: response.id,
2086
- cachedPromptTokens: (_c = (_b = response.usage.input_tokens_details) == null ? void 0 : _b.cached_tokens) != null ? _c : null,
2087
- reasoningTokens: (_e = (_d = response.usage.output_tokens_details) == null ? void 0 : _d.reasoning_tokens) != null ? _e : null
2102
+ cachedPromptTokens: (_f = (_e = response.usage.input_tokens_details) == null ? void 0 : _e.cached_tokens) != null ? _f : null,
2103
+ reasoningTokens: (_h = (_g = response.usage.output_tokens_details) == null ? void 0 : _g.reasoning_tokens) != null ? _h : null
2088
2104
  }
2089
2105
  },
2090
2106
  warnings
@@ -2123,6 +2139,9 @@ var OpenAIResponsesLanguageModel = class {
2123
2139
  return {
2124
2140
  stream: response.pipeThrough(
2125
2141
  new TransformStream({
2142
+ start(controller) {
2143
+ controller.enqueue({ type: "stream-start", warnings });
2144
+ },
2126
2145
  transform(chunk, controller) {
2127
2146
  var _a, _b, _c, _d, _e, _f, _g, _h;
2128
2147
  if (!chunk.success) {
@@ -2217,8 +2236,7 @@ var OpenAIResponsesLanguageModel = class {
2217
2236
  })
2218
2237
  ),
2219
2238
  request: { body },
2220
- response: { headers: responseHeaders },
2221
- warnings
2239
+ response: { headers: responseHeaders }
2222
2240
  };
2223
2241
  }
2224
2242
  };
@@ -2358,14 +2376,113 @@ var openaiResponsesProviderOptionsSchema = import_zod9.z.object({
2358
2376
  instructions: import_zod9.z.string().nullish()
2359
2377
  });
2360
2378
 
2379
+ // src/openai-speech-model.ts
2380
+ var import_provider_utils9 = require("@ai-sdk/provider-utils");
2381
+ var import_zod10 = require("zod");
2382
+ var OpenAIProviderOptionsSchema = import_zod10.z.object({
2383
+ instructions: import_zod10.z.string().nullish(),
2384
+ speed: import_zod10.z.number().min(0.25).max(4).default(1).nullish()
2385
+ });
2386
+ var OpenAISpeechModel = class {
2387
+ constructor(modelId, config) {
2388
+ this.modelId = modelId;
2389
+ this.config = config;
2390
+ this.specificationVersion = "v1";
2391
+ }
2392
+ get provider() {
2393
+ return this.config.provider;
2394
+ }
2395
+ getArgs({
2396
+ text,
2397
+ voice = "alloy",
2398
+ outputFormat = "mp3",
2399
+ speed,
2400
+ instructions,
2401
+ providerOptions
2402
+ }) {
2403
+ const warnings = [];
2404
+ const openAIOptions = (0, import_provider_utils9.parseProviderOptions)({
2405
+ provider: "openai",
2406
+ providerOptions,
2407
+ schema: OpenAIProviderOptionsSchema
2408
+ });
2409
+ const requestBody = {
2410
+ model: this.modelId,
2411
+ input: text,
2412
+ voice,
2413
+ response_format: "mp3",
2414
+ speed,
2415
+ instructions
2416
+ };
2417
+ if (outputFormat) {
2418
+ if (["mp3", "opus", "aac", "flac", "wav", "pcm"].includes(outputFormat)) {
2419
+ requestBody.response_format = outputFormat;
2420
+ } else {
2421
+ warnings.push({
2422
+ type: "unsupported-setting",
2423
+ setting: "outputFormat",
2424
+ details: `Unsupported output format: ${outputFormat}. Using mp3 instead.`
2425
+ });
2426
+ }
2427
+ }
2428
+ if (openAIOptions) {
2429
+ const speechModelOptions = {};
2430
+ for (const key in speechModelOptions) {
2431
+ const value = speechModelOptions[key];
2432
+ if (value !== void 0) {
2433
+ requestBody[key] = value;
2434
+ }
2435
+ }
2436
+ }
2437
+ return {
2438
+ requestBody,
2439
+ warnings
2440
+ };
2441
+ }
2442
+ async doGenerate(options) {
2443
+ var _a, _b, _c;
2444
+ const currentDate = (_c = (_b = (_a = this.config._internal) == null ? void 0 : _a.currentDate) == null ? void 0 : _b.call(_a)) != null ? _c : /* @__PURE__ */ new Date();
2445
+ const { requestBody, warnings } = this.getArgs(options);
2446
+ const {
2447
+ value: audio,
2448
+ responseHeaders,
2449
+ rawValue: rawResponse
2450
+ } = await (0, import_provider_utils9.postJsonToApi)({
2451
+ url: this.config.url({
2452
+ path: "/audio/speech",
2453
+ modelId: this.modelId
2454
+ }),
2455
+ headers: (0, import_provider_utils9.combineHeaders)(this.config.headers(), options.headers),
2456
+ body: requestBody,
2457
+ failedResponseHandler: openaiFailedResponseHandler,
2458
+ successfulResponseHandler: (0, import_provider_utils9.createBinaryResponseHandler)(),
2459
+ abortSignal: options.abortSignal,
2460
+ fetch: this.config.fetch
2461
+ });
2462
+ return {
2463
+ audio,
2464
+ warnings,
2465
+ request: {
2466
+ body: JSON.stringify(requestBody)
2467
+ },
2468
+ response: {
2469
+ timestamp: currentDate,
2470
+ modelId: this.modelId,
2471
+ headers: responseHeaders,
2472
+ body: rawResponse
2473
+ }
2474
+ };
2475
+ }
2476
+ };
2477
+
2361
2478
  // src/openai-provider.ts
2362
2479
  function createOpenAI(options = {}) {
2363
2480
  var _a, _b, _c;
2364
- const baseURL = (_a = (0, import_provider_utils9.withoutTrailingSlash)(options.baseURL)) != null ? _a : "https://api.openai.com/v1";
2481
+ const baseURL = (_a = (0, import_provider_utils10.withoutTrailingSlash)(options.baseURL)) != null ? _a : "https://api.openai.com/v1";
2365
2482
  const compatibility = (_b = options.compatibility) != null ? _b : "compatible";
2366
2483
  const providerName = (_c = options.name) != null ? _c : "openai";
2367
2484
  const getHeaders = () => ({
2368
- Authorization: `Bearer ${(0, import_provider_utils9.loadApiKey)({
2485
+ Authorization: `Bearer ${(0, import_provider_utils10.loadApiKey)({
2369
2486
  apiKey: options.apiKey,
2370
2487
  environmentVariableName: "OPENAI_API_KEY",
2371
2488
  description: "OpenAI"
@@ -2406,6 +2523,12 @@ function createOpenAI(options = {}) {
2406
2523
  headers: getHeaders,
2407
2524
  fetch: options.fetch
2408
2525
  });
2526
+ const createSpeechModel = (modelId) => new OpenAISpeechModel(modelId, {
2527
+ provider: `${providerName}.speech`,
2528
+ url: ({ path }) => `${baseURL}${path}`,
2529
+ headers: getHeaders,
2530
+ fetch: options.fetch
2531
+ });
2409
2532
  const createLanguageModel = (modelId, settings) => {
2410
2533
  if (new.target) {
2411
2534
  throw new Error(
@@ -2442,6 +2565,8 @@ function createOpenAI(options = {}) {
2442
2565
  provider.imageModel = createImageModel;
2443
2566
  provider.transcription = createTranscriptionModel;
2444
2567
  provider.transcriptionModel = createTranscriptionModel;
2568
+ provider.speech = createSpeechModel;
2569
+ provider.speechModel = createSpeechModel;
2445
2570
  provider.tools = openaiTools;
2446
2571
  return provider;
2447
2572
  }