@ai-sdk/openai 1.3.9 → 1.3.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,21 @@
1
1
  # @ai-sdk/openai
2
2
 
3
+ ## 1.3.11
4
+
5
+ ### Patch Changes
6
+
7
+ - beef951: feat: add speech with experimental_generateSpeech
8
+ - Updated dependencies [beef951]
9
+ - @ai-sdk/provider@1.1.3
10
+ - @ai-sdk/provider-utils@2.2.7
11
+
12
+ ## 1.3.10
13
+
14
+ ### Patch Changes
15
+
16
+ - dbe53e7: adding support for gpt-4o-search-preview and handling unsupported parameters
17
+ - 84ffaba: fix: propagate openai transcription fixes
18
+
3
19
  ## 1.3.9
4
20
 
5
21
  ### Patch Changes
package/dist/index.d.mts CHANGED
@@ -1,8 +1,8 @@
1
- import { LanguageModelV1, ProviderV1, EmbeddingModelV1, ImageModelV1, TranscriptionModelV1 } from '@ai-sdk/provider';
1
+ import { LanguageModelV1, ProviderV1, EmbeddingModelV1, ImageModelV1, TranscriptionModelV1, SpeechModelV1 } from '@ai-sdk/provider';
2
2
  import { FetchFunction } from '@ai-sdk/provider-utils';
3
3
  import { z } from 'zod';
4
4
 
5
- type OpenAIChatModelId = 'o1' | 'o1-2024-12-17' | 'o1-mini' | 'o1-mini-2024-09-12' | 'o1-preview' | 'o1-preview-2024-09-12' | 'o3-mini' | 'o3-mini-2025-01-31' | 'gpt-4o' | 'gpt-4o-2024-05-13' | 'gpt-4o-2024-08-06' | 'gpt-4o-2024-11-20' | 'gpt-4o-audio-preview' | 'gpt-4o-audio-preview-2024-10-01' | 'gpt-4o-audio-preview-2024-12-17' | 'gpt-4o-mini' | 'gpt-4o-mini-2024-07-18' | 'gpt-4-turbo' | 'gpt-4-turbo-2024-04-09' | 'gpt-4-turbo-preview' | 'gpt-4-0125-preview' | 'gpt-4-1106-preview' | 'gpt-4' | 'gpt-4-0613' | 'gpt-4.5-preview' | 'gpt-4.5-preview-2025-02-27' | 'gpt-3.5-turbo-0125' | 'gpt-3.5-turbo' | 'gpt-3.5-turbo-1106' | 'chatgpt-4o-latest' | (string & {});
5
+ type OpenAIChatModelId = 'o1' | 'o1-2024-12-17' | 'o1-mini' | 'o1-mini-2024-09-12' | 'o1-preview' | 'o1-preview-2024-09-12' | 'o3-mini' | 'o3-mini-2025-01-31' | 'gpt-4o' | 'gpt-4o-2024-05-13' | 'gpt-4o-2024-08-06' | 'gpt-4o-2024-11-20' | 'gpt-4o-audio-preview' | 'gpt-4o-audio-preview-2024-10-01' | 'gpt-4o-audio-preview-2024-12-17' | 'gpt-4o-search-preview' | 'gpt-4o-search-preview-2025-03-11' | 'gpt-4o-mini' | 'gpt-4o-mini-2024-07-18' | 'gpt-4-turbo' | 'gpt-4-turbo-2024-04-09' | 'gpt-4-turbo-preview' | 'gpt-4-0125-preview' | 'gpt-4-1106-preview' | 'gpt-4' | 'gpt-4-0613' | 'gpt-4.5-preview' | 'gpt-4.5-preview-2025-02-27' | 'gpt-3.5-turbo-0125' | 'gpt-3.5-turbo' | 'gpt-3.5-turbo-1106' | 'chatgpt-4o-latest' | (string & {});
6
6
  interface OpenAIChatSettings {
7
7
  /**
8
8
  Modify the likelihood of specified tokens appearing in the completion.
@@ -203,6 +203,8 @@ declare const openaiTools: {
203
203
  webSearchPreview: typeof webSearchPreviewTool;
204
204
  };
205
205
 
206
+ type OpenAISpeechModelId = 'tts-1' | 'tts-1-hd' | 'gpt-4o-mini-tts' | (string & {});
207
+
206
208
  interface OpenAIProvider extends ProviderV1 {
207
209
  (modelId: 'gpt-3.5-turbo-instruct', settings?: OpenAICompletionSettings): OpenAICompletionLanguageModel;
208
210
  (modelId: OpenAIChatModelId, settings?: OpenAIChatSettings): LanguageModelV1;
@@ -250,6 +252,10 @@ interface OpenAIProvider extends ProviderV1 {
250
252
  */
251
253
  transcription(modelId: OpenAITranscriptionModelId): TranscriptionModelV1;
252
254
  /**
255
+ Creates a model for speech generation.
256
+ */
257
+ speech(modelId: OpenAISpeechModelId): SpeechModelV1;
258
+ /**
253
259
  OpenAI-specific tools.
254
260
  */
255
261
  tools: typeof openaiTools;
package/dist/index.d.ts CHANGED
@@ -1,8 +1,8 @@
1
- import { LanguageModelV1, ProviderV1, EmbeddingModelV1, ImageModelV1, TranscriptionModelV1 } from '@ai-sdk/provider';
1
+ import { LanguageModelV1, ProviderV1, EmbeddingModelV1, ImageModelV1, TranscriptionModelV1, SpeechModelV1 } from '@ai-sdk/provider';
2
2
  import { FetchFunction } from '@ai-sdk/provider-utils';
3
3
  import { z } from 'zod';
4
4
 
5
- type OpenAIChatModelId = 'o1' | 'o1-2024-12-17' | 'o1-mini' | 'o1-mini-2024-09-12' | 'o1-preview' | 'o1-preview-2024-09-12' | 'o3-mini' | 'o3-mini-2025-01-31' | 'gpt-4o' | 'gpt-4o-2024-05-13' | 'gpt-4o-2024-08-06' | 'gpt-4o-2024-11-20' | 'gpt-4o-audio-preview' | 'gpt-4o-audio-preview-2024-10-01' | 'gpt-4o-audio-preview-2024-12-17' | 'gpt-4o-mini' | 'gpt-4o-mini-2024-07-18' | 'gpt-4-turbo' | 'gpt-4-turbo-2024-04-09' | 'gpt-4-turbo-preview' | 'gpt-4-0125-preview' | 'gpt-4-1106-preview' | 'gpt-4' | 'gpt-4-0613' | 'gpt-4.5-preview' | 'gpt-4.5-preview-2025-02-27' | 'gpt-3.5-turbo-0125' | 'gpt-3.5-turbo' | 'gpt-3.5-turbo-1106' | 'chatgpt-4o-latest' | (string & {});
5
+ type OpenAIChatModelId = 'o1' | 'o1-2024-12-17' | 'o1-mini' | 'o1-mini-2024-09-12' | 'o1-preview' | 'o1-preview-2024-09-12' | 'o3-mini' | 'o3-mini-2025-01-31' | 'gpt-4o' | 'gpt-4o-2024-05-13' | 'gpt-4o-2024-08-06' | 'gpt-4o-2024-11-20' | 'gpt-4o-audio-preview' | 'gpt-4o-audio-preview-2024-10-01' | 'gpt-4o-audio-preview-2024-12-17' | 'gpt-4o-search-preview' | 'gpt-4o-search-preview-2025-03-11' | 'gpt-4o-mini' | 'gpt-4o-mini-2024-07-18' | 'gpt-4-turbo' | 'gpt-4-turbo-2024-04-09' | 'gpt-4-turbo-preview' | 'gpt-4-0125-preview' | 'gpt-4-1106-preview' | 'gpt-4' | 'gpt-4-0613' | 'gpt-4.5-preview' | 'gpt-4.5-preview-2025-02-27' | 'gpt-3.5-turbo-0125' | 'gpt-3.5-turbo' | 'gpt-3.5-turbo-1106' | 'chatgpt-4o-latest' | (string & {});
6
6
  interface OpenAIChatSettings {
7
7
  /**
8
8
  Modify the likelihood of specified tokens appearing in the completion.
@@ -203,6 +203,8 @@ declare const openaiTools: {
203
203
  webSearchPreview: typeof webSearchPreviewTool;
204
204
  };
205
205
 
206
+ type OpenAISpeechModelId = 'tts-1' | 'tts-1-hd' | 'gpt-4o-mini-tts' | (string & {});
207
+
206
208
  interface OpenAIProvider extends ProviderV1 {
207
209
  (modelId: 'gpt-3.5-turbo-instruct', settings?: OpenAICompletionSettings): OpenAICompletionLanguageModel;
208
210
  (modelId: OpenAIChatModelId, settings?: OpenAIChatSettings): LanguageModelV1;
@@ -250,6 +252,10 @@ interface OpenAIProvider extends ProviderV1 {
250
252
  */
251
253
  transcription(modelId: OpenAITranscriptionModelId): TranscriptionModelV1;
252
254
  /**
255
+ Creates a model for speech generation.
256
+ */
257
+ speech(modelId: OpenAISpeechModelId): SpeechModelV1;
258
+ /**
253
259
  OpenAI-specific tools.
254
260
  */
255
261
  tools: typeof openaiTools;
package/dist/index.js CHANGED
@@ -26,7 +26,7 @@ __export(src_exports, {
26
26
  module.exports = __toCommonJS(src_exports);
27
27
 
28
28
  // src/openai-provider.ts
29
- var import_provider_utils10 = require("@ai-sdk/provider-utils");
29
+ var import_provider_utils11 = require("@ai-sdk/provider-utils");
30
30
 
31
31
  // src/openai-chat-language-model.ts
32
32
  var import_provider3 = require("@ai-sdk/provider");
@@ -536,6 +536,15 @@ var OpenAIChatLanguageModel = class {
536
536
  }
537
537
  baseArgs.max_tokens = void 0;
538
538
  }
539
+ } else if (this.modelId.startsWith("gpt-4o-search-preview")) {
540
+ if (baseArgs.temperature != null) {
541
+ baseArgs.temperature = void 0;
542
+ warnings.push({
543
+ type: "unsupported-setting",
544
+ setting: "temperature",
545
+ details: "temperature is not supported for the gpt-4o-search-preview model and has been removed."
546
+ });
547
+ }
539
548
  }
540
549
  switch (type) {
541
550
  case "regular": {
@@ -1604,18 +1613,12 @@ var openaiImageResponseSchema = import_zod5.z.object({
1604
1613
  // src/openai-transcription-model.ts
1605
1614
  var import_provider_utils7 = require("@ai-sdk/provider-utils");
1606
1615
  var import_zod6 = require("zod");
1607
- var OpenAIProviderOptionsSchema = import_zod6.z.object({
1608
- include: import_zod6.z.array(import_zod6.z.string()).optional().describe(
1609
- "Additional information to include in the transcription response."
1610
- ),
1611
- language: import_zod6.z.string().optional().describe("The language of the input audio in ISO-639-1 format."),
1612
- prompt: import_zod6.z.string().optional().describe(
1613
- "An optional text to guide the model's style or continue a previous audio segment."
1614
- ),
1615
- temperature: import_zod6.z.number().min(0).max(1).optional().default(0).describe("The sampling temperature, between 0 and 1."),
1616
- timestampGranularities: import_zod6.z.array(import_zod6.z.enum(["word", "segment"])).optional().default(["segment"]).describe(
1617
- "The timestamp granularities to populate for this transcription."
1618
- )
1616
+ var openAIProviderOptionsSchema = import_zod6.z.object({
1617
+ include: import_zod6.z.array(import_zod6.z.string()).nullish(),
1618
+ language: import_zod6.z.string().nullish(),
1619
+ prompt: import_zod6.z.string().nullish(),
1620
+ temperature: import_zod6.z.number().min(0).max(1).nullish().default(0),
1621
+ timestampGranularities: import_zod6.z.array(import_zod6.z.enum(["word", "segment"])).nullish().default(["segment"])
1619
1622
  });
1620
1623
  var languageMap = {
1621
1624
  afrikaans: "af",
@@ -1690,11 +1693,12 @@ var OpenAITranscriptionModel = class {
1690
1693
  mediaType,
1691
1694
  providerOptions
1692
1695
  }) {
1696
+ var _a, _b, _c, _d, _e;
1693
1697
  const warnings = [];
1694
1698
  const openAIOptions = (0, import_provider_utils7.parseProviderOptions)({
1695
1699
  provider: "openai",
1696
1700
  providerOptions,
1697
- schema: OpenAIProviderOptionsSchema
1701
+ schema: openAIProviderOptionsSchema
1698
1702
  });
1699
1703
  const formData = new FormData();
1700
1704
  const blob = audio instanceof Uint8Array ? new Blob([audio]) : new Blob([(0, import_provider_utils7.convertBase64ToUint8Array)(audio)]);
@@ -1702,16 +1706,16 @@ var OpenAITranscriptionModel = class {
1702
1706
  formData.append("file", new File([blob], "audio", { type: mediaType }));
1703
1707
  if (openAIOptions) {
1704
1708
  const transcriptionModelOptions = {
1705
- include: openAIOptions.include,
1706
- language: openAIOptions.language,
1707
- prompt: openAIOptions.prompt,
1708
- temperature: openAIOptions.temperature,
1709
- timestamp_granularities: openAIOptions.timestampGranularities
1709
+ include: (_a = openAIOptions.include) != null ? _a : void 0,
1710
+ language: (_b = openAIOptions.language) != null ? _b : void 0,
1711
+ prompt: (_c = openAIOptions.prompt) != null ? _c : void 0,
1712
+ temperature: (_d = openAIOptions.temperature) != null ? _d : void 0,
1713
+ timestamp_granularities: (_e = openAIOptions.timestampGranularities) != null ? _e : void 0
1710
1714
  };
1711
1715
  for (const key in transcriptionModelOptions) {
1712
1716
  const value = transcriptionModelOptions[key];
1713
1717
  if (value !== void 0) {
1714
- formData.append(key, value);
1718
+ formData.append(key, String(value));
1715
1719
  }
1716
1720
  }
1717
1721
  }
@@ -2590,14 +2594,113 @@ var openaiTools = {
2590
2594
  webSearchPreview: webSearchPreviewTool
2591
2595
  };
2592
2596
 
2597
+ // src/openai-speech-model.ts
2598
+ var import_provider_utils10 = require("@ai-sdk/provider-utils");
2599
+ var import_zod9 = require("zod");
2600
+ var OpenAIProviderOptionsSchema = import_zod9.z.object({
2601
+ instructions: import_zod9.z.string().nullish(),
2602
+ speed: import_zod9.z.number().min(0.25).max(4).default(1).nullish()
2603
+ });
2604
+ var OpenAISpeechModel = class {
2605
+ constructor(modelId, config) {
2606
+ this.modelId = modelId;
2607
+ this.config = config;
2608
+ this.specificationVersion = "v1";
2609
+ }
2610
+ get provider() {
2611
+ return this.config.provider;
2612
+ }
2613
+ getArgs({
2614
+ text,
2615
+ voice = "alloy",
2616
+ outputFormat = "mp3",
2617
+ speed,
2618
+ instructions,
2619
+ providerOptions
2620
+ }) {
2621
+ const warnings = [];
2622
+ const openAIOptions = (0, import_provider_utils10.parseProviderOptions)({
2623
+ provider: "openai",
2624
+ providerOptions,
2625
+ schema: OpenAIProviderOptionsSchema
2626
+ });
2627
+ const requestBody = {
2628
+ model: this.modelId,
2629
+ input: text,
2630
+ voice,
2631
+ response_format: "mp3",
2632
+ speed,
2633
+ instructions
2634
+ };
2635
+ if (outputFormat) {
2636
+ if (["mp3", "opus", "aac", "flac", "wav", "pcm"].includes(outputFormat)) {
2637
+ requestBody.response_format = outputFormat;
2638
+ } else {
2639
+ warnings.push({
2640
+ type: "unsupported-setting",
2641
+ setting: "outputFormat",
2642
+ details: `Unsupported output format: ${outputFormat}. Using mp3 instead.`
2643
+ });
2644
+ }
2645
+ }
2646
+ if (openAIOptions) {
2647
+ const speechModelOptions = {};
2648
+ for (const key in speechModelOptions) {
2649
+ const value = speechModelOptions[key];
2650
+ if (value !== void 0) {
2651
+ requestBody[key] = value;
2652
+ }
2653
+ }
2654
+ }
2655
+ return {
2656
+ requestBody,
2657
+ warnings
2658
+ };
2659
+ }
2660
+ async doGenerate(options) {
2661
+ var _a, _b, _c;
2662
+ const currentDate = (_c = (_b = (_a = this.config._internal) == null ? void 0 : _a.currentDate) == null ? void 0 : _b.call(_a)) != null ? _c : /* @__PURE__ */ new Date();
2663
+ const { requestBody, warnings } = this.getArgs(options);
2664
+ const {
2665
+ value: audio,
2666
+ responseHeaders,
2667
+ rawValue: rawResponse
2668
+ } = await (0, import_provider_utils10.postJsonToApi)({
2669
+ url: this.config.url({
2670
+ path: "/audio/speech",
2671
+ modelId: this.modelId
2672
+ }),
2673
+ headers: (0, import_provider_utils10.combineHeaders)(this.config.headers(), options.headers),
2674
+ body: requestBody,
2675
+ failedResponseHandler: openaiFailedResponseHandler,
2676
+ successfulResponseHandler: (0, import_provider_utils10.createBinaryResponseHandler)(),
2677
+ abortSignal: options.abortSignal,
2678
+ fetch: this.config.fetch
2679
+ });
2680
+ return {
2681
+ audio,
2682
+ warnings,
2683
+ request: {
2684
+ body: JSON.stringify(requestBody)
2685
+ },
2686
+ response: {
2687
+ timestamp: currentDate,
2688
+ modelId: this.modelId,
2689
+ headers: responseHeaders,
2690
+ body: rawResponse
2691
+ }
2692
+ };
2693
+ }
2694
+ };
2695
+
2593
2696
  // src/openai-provider.ts
2594
2697
  function createOpenAI(options = {}) {
2595
2698
  var _a, _b, _c;
2596
- const baseURL = (_a = (0, import_provider_utils10.withoutTrailingSlash)(options.baseURL)) != null ? _a : "https://api.openai.com/v1";
2699
+ const baseURL = (_a = (0, import_provider_utils11.withoutTrailingSlash)(options.baseURL)) != null ? _a : "https://api.openai.com/v1";
2597
2700
  const compatibility = (_b = options.compatibility) != null ? _b : "compatible";
2598
2701
  const providerName = (_c = options.name) != null ? _c : "openai";
2599
2702
  const getHeaders = () => ({
2600
- Authorization: `Bearer ${(0, import_provider_utils10.loadApiKey)({
2703
+ Authorization: `Bearer ${(0, import_provider_utils11.loadApiKey)({
2601
2704
  apiKey: options.apiKey,
2602
2705
  environmentVariableName: "OPENAI_API_KEY",
2603
2706
  description: "OpenAI"
@@ -2638,6 +2741,12 @@ function createOpenAI(options = {}) {
2638
2741
  headers: getHeaders,
2639
2742
  fetch: options.fetch
2640
2743
  });
2744
+ const createSpeechModel = (modelId) => new OpenAISpeechModel(modelId, {
2745
+ provider: `${providerName}.speech`,
2746
+ url: ({ path }) => `${baseURL}${path}`,
2747
+ headers: getHeaders,
2748
+ fetch: options.fetch
2749
+ });
2641
2750
  const createLanguageModel = (modelId, settings) => {
2642
2751
  if (new.target) {
2643
2752
  throw new Error(
@@ -2674,6 +2783,8 @@ function createOpenAI(options = {}) {
2674
2783
  provider.imageModel = createImageModel;
2675
2784
  provider.transcription = createTranscriptionModel;
2676
2785
  provider.transcriptionModel = createTranscriptionModel;
2786
+ provider.speech = createSpeechModel;
2787
+ provider.speechModel = createSpeechModel;
2677
2788
  provider.tools = openaiTools;
2678
2789
  return provider;
2679
2790
  }