@ai-sdk/openai 1.3.10 → 1.3.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,14 @@
1
1
  # @ai-sdk/openai
2
2
 
3
+ ## 1.3.11
4
+
5
+ ### Patch Changes
6
+
7
+ - beef951: feat: add speech with experimental_generateSpeech
8
+ - Updated dependencies [beef951]
9
+ - @ai-sdk/provider@1.1.3
10
+ - @ai-sdk/provider-utils@2.2.7
11
+
3
12
  ## 1.3.10
4
13
 
5
14
  ### Patch Changes
package/dist/index.d.mts CHANGED
@@ -1,4 +1,4 @@
1
- import { LanguageModelV1, ProviderV1, EmbeddingModelV1, ImageModelV1, TranscriptionModelV1 } from '@ai-sdk/provider';
1
+ import { LanguageModelV1, ProviderV1, EmbeddingModelV1, ImageModelV1, TranscriptionModelV1, SpeechModelV1 } from '@ai-sdk/provider';
2
2
  import { FetchFunction } from '@ai-sdk/provider-utils';
3
3
  import { z } from 'zod';
4
4
 
@@ -203,6 +203,8 @@ declare const openaiTools: {
203
203
  webSearchPreview: typeof webSearchPreviewTool;
204
204
  };
205
205
 
206
+ type OpenAISpeechModelId = 'tts-1' | 'tts-1-hd' | 'gpt-4o-mini-tts' | (string & {});
207
+
206
208
  interface OpenAIProvider extends ProviderV1 {
207
209
  (modelId: 'gpt-3.5-turbo-instruct', settings?: OpenAICompletionSettings): OpenAICompletionLanguageModel;
208
210
  (modelId: OpenAIChatModelId, settings?: OpenAIChatSettings): LanguageModelV1;
@@ -250,6 +252,10 @@ interface OpenAIProvider extends ProviderV1 {
250
252
  */
251
253
  transcription(modelId: OpenAITranscriptionModelId): TranscriptionModelV1;
252
254
  /**
255
+ Creates a model for speech generation.
256
+ */
257
+ speech(modelId: OpenAISpeechModelId): SpeechModelV1;
258
+ /**
253
259
  OpenAI-specific tools.
254
260
  */
255
261
  tools: typeof openaiTools;
package/dist/index.d.ts CHANGED
@@ -1,4 +1,4 @@
1
- import { LanguageModelV1, ProviderV1, EmbeddingModelV1, ImageModelV1, TranscriptionModelV1 } from '@ai-sdk/provider';
1
+ import { LanguageModelV1, ProviderV1, EmbeddingModelV1, ImageModelV1, TranscriptionModelV1, SpeechModelV1 } from '@ai-sdk/provider';
2
2
  import { FetchFunction } from '@ai-sdk/provider-utils';
3
3
  import { z } from 'zod';
4
4
 
@@ -203,6 +203,8 @@ declare const openaiTools: {
203
203
  webSearchPreview: typeof webSearchPreviewTool;
204
204
  };
205
205
 
206
+ type OpenAISpeechModelId = 'tts-1' | 'tts-1-hd' | 'gpt-4o-mini-tts' | (string & {});
207
+
206
208
  interface OpenAIProvider extends ProviderV1 {
207
209
  (modelId: 'gpt-3.5-turbo-instruct', settings?: OpenAICompletionSettings): OpenAICompletionLanguageModel;
208
210
  (modelId: OpenAIChatModelId, settings?: OpenAIChatSettings): LanguageModelV1;
@@ -250,6 +252,10 @@ interface OpenAIProvider extends ProviderV1 {
250
252
  */
251
253
  transcription(modelId: OpenAITranscriptionModelId): TranscriptionModelV1;
252
254
  /**
255
+ Creates a model for speech generation.
256
+ */
257
+ speech(modelId: OpenAISpeechModelId): SpeechModelV1;
258
+ /**
253
259
  OpenAI-specific tools.
254
260
  */
255
261
  tools: typeof openaiTools;
package/dist/index.js CHANGED
@@ -26,7 +26,7 @@ __export(src_exports, {
26
26
  module.exports = __toCommonJS(src_exports);
27
27
 
28
28
  // src/openai-provider.ts
29
- var import_provider_utils10 = require("@ai-sdk/provider-utils");
29
+ var import_provider_utils11 = require("@ai-sdk/provider-utils");
30
30
 
31
31
  // src/openai-chat-language-model.ts
32
32
  var import_provider3 = require("@ai-sdk/provider");
@@ -1613,7 +1613,7 @@ var openaiImageResponseSchema = import_zod5.z.object({
1613
1613
  // src/openai-transcription-model.ts
1614
1614
  var import_provider_utils7 = require("@ai-sdk/provider-utils");
1615
1615
  var import_zod6 = require("zod");
1616
- var OpenAIProviderOptionsSchema = import_zod6.z.object({
1616
+ var openAIProviderOptionsSchema = import_zod6.z.object({
1617
1617
  include: import_zod6.z.array(import_zod6.z.string()).nullish(),
1618
1618
  language: import_zod6.z.string().nullish(),
1619
1619
  prompt: import_zod6.z.string().nullish(),
@@ -1698,7 +1698,7 @@ var OpenAITranscriptionModel = class {
1698
1698
  const openAIOptions = (0, import_provider_utils7.parseProviderOptions)({
1699
1699
  provider: "openai",
1700
1700
  providerOptions,
1701
- schema: OpenAIProviderOptionsSchema
1701
+ schema: openAIProviderOptionsSchema
1702
1702
  });
1703
1703
  const formData = new FormData();
1704
1704
  const blob = audio instanceof Uint8Array ? new Blob([audio]) : new Blob([(0, import_provider_utils7.convertBase64ToUint8Array)(audio)]);
@@ -2594,14 +2594,113 @@ var openaiTools = {
2594
2594
  webSearchPreview: webSearchPreviewTool
2595
2595
  };
2596
2596
 
2597
+ // src/openai-speech-model.ts
2598
+ var import_provider_utils10 = require("@ai-sdk/provider-utils");
2599
+ var import_zod9 = require("zod");
2600
+ var OpenAIProviderOptionsSchema = import_zod9.z.object({
2601
+ instructions: import_zod9.z.string().nullish(),
2602
+ speed: import_zod9.z.number().min(0.25).max(4).default(1).nullish()
2603
+ });
2604
+ var OpenAISpeechModel = class {
2605
+ constructor(modelId, config) {
2606
+ this.modelId = modelId;
2607
+ this.config = config;
2608
+ this.specificationVersion = "v1";
2609
+ }
2610
+ get provider() {
2611
+ return this.config.provider;
2612
+ }
2613
+ getArgs({
2614
+ text,
2615
+ voice = "alloy",
2616
+ outputFormat = "mp3",
2617
+ speed,
2618
+ instructions,
2619
+ providerOptions
2620
+ }) {
2621
+ const warnings = [];
2622
+ const openAIOptions = (0, import_provider_utils10.parseProviderOptions)({
2623
+ provider: "openai",
2624
+ providerOptions,
2625
+ schema: OpenAIProviderOptionsSchema
2626
+ });
2627
+ const requestBody = {
2628
+ model: this.modelId,
2629
+ input: text,
2630
+ voice,
2631
+ response_format: "mp3",
2632
+ speed,
2633
+ instructions
2634
+ };
2635
+ if (outputFormat) {
2636
+ if (["mp3", "opus", "aac", "flac", "wav", "pcm"].includes(outputFormat)) {
2637
+ requestBody.response_format = outputFormat;
2638
+ } else {
2639
+ warnings.push({
2640
+ type: "unsupported-setting",
2641
+ setting: "outputFormat",
2642
+ details: `Unsupported output format: ${outputFormat}. Using mp3 instead.`
2643
+ });
2644
+ }
2645
+ }
2646
+ if (openAIOptions) {
2647
+ const speechModelOptions = {};
2648
+ for (const key in speechModelOptions) {
2649
+ const value = speechModelOptions[key];
2650
+ if (value !== void 0) {
2651
+ requestBody[key] = value;
2652
+ }
2653
+ }
2654
+ }
2655
+ return {
2656
+ requestBody,
2657
+ warnings
2658
+ };
2659
+ }
2660
+ async doGenerate(options) {
2661
+ var _a, _b, _c;
2662
+ const currentDate = (_c = (_b = (_a = this.config._internal) == null ? void 0 : _a.currentDate) == null ? void 0 : _b.call(_a)) != null ? _c : /* @__PURE__ */ new Date();
2663
+ const { requestBody, warnings } = this.getArgs(options);
2664
+ const {
2665
+ value: audio,
2666
+ responseHeaders,
2667
+ rawValue: rawResponse
2668
+ } = await (0, import_provider_utils10.postJsonToApi)({
2669
+ url: this.config.url({
2670
+ path: "/audio/speech",
2671
+ modelId: this.modelId
2672
+ }),
2673
+ headers: (0, import_provider_utils10.combineHeaders)(this.config.headers(), options.headers),
2674
+ body: requestBody,
2675
+ failedResponseHandler: openaiFailedResponseHandler,
2676
+ successfulResponseHandler: (0, import_provider_utils10.createBinaryResponseHandler)(),
2677
+ abortSignal: options.abortSignal,
2678
+ fetch: this.config.fetch
2679
+ });
2680
+ return {
2681
+ audio,
2682
+ warnings,
2683
+ request: {
2684
+ body: JSON.stringify(requestBody)
2685
+ },
2686
+ response: {
2687
+ timestamp: currentDate,
2688
+ modelId: this.modelId,
2689
+ headers: responseHeaders,
2690
+ body: rawResponse
2691
+ }
2692
+ };
2693
+ }
2694
+ };
2695
+
2597
2696
  // src/openai-provider.ts
2598
2697
  function createOpenAI(options = {}) {
2599
2698
  var _a, _b, _c;
2600
- const baseURL = (_a = (0, import_provider_utils10.withoutTrailingSlash)(options.baseURL)) != null ? _a : "https://api.openai.com/v1";
2699
+ const baseURL = (_a = (0, import_provider_utils11.withoutTrailingSlash)(options.baseURL)) != null ? _a : "https://api.openai.com/v1";
2601
2700
  const compatibility = (_b = options.compatibility) != null ? _b : "compatible";
2602
2701
  const providerName = (_c = options.name) != null ? _c : "openai";
2603
2702
  const getHeaders = () => ({
2604
- Authorization: `Bearer ${(0, import_provider_utils10.loadApiKey)({
2703
+ Authorization: `Bearer ${(0, import_provider_utils11.loadApiKey)({
2605
2704
  apiKey: options.apiKey,
2606
2705
  environmentVariableName: "OPENAI_API_KEY",
2607
2706
  description: "OpenAI"
@@ -2642,6 +2741,12 @@ function createOpenAI(options = {}) {
2642
2741
  headers: getHeaders,
2643
2742
  fetch: options.fetch
2644
2743
  });
2744
+ const createSpeechModel = (modelId) => new OpenAISpeechModel(modelId, {
2745
+ provider: `${providerName}.speech`,
2746
+ url: ({ path }) => `${baseURL}${path}`,
2747
+ headers: getHeaders,
2748
+ fetch: options.fetch
2749
+ });
2645
2750
  const createLanguageModel = (modelId, settings) => {
2646
2751
  if (new.target) {
2647
2752
  throw new Error(
@@ -2678,6 +2783,8 @@ function createOpenAI(options = {}) {
2678
2783
  provider.imageModel = createImageModel;
2679
2784
  provider.transcription = createTranscriptionModel;
2680
2785
  provider.transcriptionModel = createTranscriptionModel;
2786
+ provider.speech = createSpeechModel;
2787
+ provider.speechModel = createSpeechModel;
2681
2788
  provider.tools = openaiTools;
2682
2789
  return provider;
2683
2790
  }