ai 4.3.5 → 4.3.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,17 @@
1
1
  # ai
2
2
 
3
+ ## 4.3.6
4
+
5
+ ### Patch Changes
6
+
7
+ - beef951: feat: add speech with experimental_generateSpeech
8
+ - bd41167: fix(ai/core): properly handle custom separator in provider registry
9
+ - Updated dependencies [beef951]
10
+ - @ai-sdk/provider@1.1.3
11
+ - @ai-sdk/provider-utils@2.2.7
12
+ - @ai-sdk/ui-utils@1.2.8
13
+ - @ai-sdk/react@1.2.9
14
+
3
15
  ## 4.3.5
4
16
 
5
17
  ### Patch Changes
package/dist/index.d.mts CHANGED
@@ -2,7 +2,7 @@ import { IDGenerator } from '@ai-sdk/provider-utils';
2
2
  export { CoreToolCall, CoreToolResult, IDGenerator, ToolCall, ToolResult, createIdGenerator, generateId } from '@ai-sdk/provider-utils';
3
3
  import { DataStreamString, Message, Schema, DeepPartial, JSONValue as JSONValue$1, AssistantMessage, DataMessage } from '@ai-sdk/ui-utils';
4
4
  export { AssistantMessage, AssistantStatus, Attachment, ChatRequest, ChatRequestOptions, CreateMessage, DataMessage, DataStreamPart, DeepPartial, IdGenerator, JSONValue, Message, RequestOptions, Schema, ToolInvocation, UIMessage, UseAssistantOptions, formatAssistantStreamPart, formatDataStreamPart, jsonSchema, parseAssistantStreamPart, parseDataStreamPart, processDataStream, processTextStream, zodSchema } from '@ai-sdk/ui-utils';
5
- import { LanguageModelV1, LanguageModelV1FinishReason, LanguageModelV1LogProbs, LanguageModelV1CallWarning, LanguageModelV1Source, JSONValue, EmbeddingModelV1, EmbeddingModelV1Embedding, ImageModelV1, ImageModelV1CallWarning, LanguageModelV1ProviderMetadata, LanguageModelV1CallOptions, AISDKError, LanguageModelV1FunctionToolCall, JSONSchema7, JSONParseError, TypeValidationError, TranscriptionModelV1CallWarning, TranscriptionModelV1, ProviderV1, NoSuchModelError } from '@ai-sdk/provider';
5
+ import { LanguageModelV1, LanguageModelV1FinishReason, LanguageModelV1LogProbs, LanguageModelV1CallWarning, LanguageModelV1Source, JSONValue, EmbeddingModelV1, EmbeddingModelV1Embedding, ImageModelV1, ImageModelV1CallWarning, LanguageModelV1ProviderMetadata, TranscriptionModelV1, TranscriptionModelV1CallWarning, SpeechModelV1, SpeechModelV1CallWarning, LanguageModelV1CallOptions, AISDKError, LanguageModelV1FunctionToolCall, JSONSchema7, JSONParseError, TypeValidationError, ProviderV1, NoSuchModelError } from '@ai-sdk/provider';
6
6
  export { AISDKError, APICallError, EmptyResponseBodyError, InvalidPromptError, InvalidResponseDataError, JSONParseError, LanguageModelV1, LanguageModelV1CallOptions, LanguageModelV1Prompt, LanguageModelV1StreamPart, LoadAPIKeyError, NoContentGeneratedError, NoSuchModelError, TypeValidationError, UnsupportedFunctionalityError } from '@ai-sdk/provider';
7
7
  import { ServerResponse } from 'node:http';
8
8
  import { AttributeValue, Tracer } from '@opentelemetry/api';
@@ -276,6 +276,56 @@ type EmbeddingModelUsage = {
276
276
  tokens: number;
277
277
  };
278
278
 
279
+ /**
280
+ Transcription model that is used by the AI SDK Core functions.
281
+ */
282
+ type TranscriptionModel = TranscriptionModelV1;
283
+ /**
284
+ Warning from the model provider for this call. The call will proceed, but e.g.
285
+ some settings might not be supported, which can lead to suboptimal results.
286
+ */
287
+ type TranscriptionWarning = TranscriptionModelV1CallWarning;
288
+
289
+ type TranscriptionModelResponseMetadata = {
290
+ /**
291
+ Timestamp for the start of the generated response.
292
+ */
293
+ timestamp: Date;
294
+ /**
295
+ The ID of the response model that was used to generate the response.
296
+ */
297
+ modelId: string;
298
+ /**
299
+ Response headers.
300
+ */
301
+ headers?: Record<string, string>;
302
+ };
303
+
304
+ /**
305
+ Speech model that is used by the AI SDK Core functions.
306
+ */
307
+ type SpeechModel = SpeechModelV1;
308
+ /**
309
+ Warning from the model provider for this call. The call will proceed, but e.g.
310
+ some settings might not be supported, which can lead to suboptimal results.
311
+ */
312
+ type SpeechWarning = SpeechModelV1CallWarning;
313
+
314
+ type SpeechModelResponseMetadata = {
315
+ /**
316
+ Timestamp for the start of the generated response.
317
+ */
318
+ timestamp: Date;
319
+ /**
320
+ The ID of the response model that was used to generate the response.
321
+ */
322
+ modelId: string;
323
+ /**
324
+ Response headers.
325
+ */
326
+ headers?: Record<string, string>;
327
+ };
328
+
279
329
  /**
280
330
  The result of an `embed` call.
281
331
  It contains the embedding, the value, and additional information.
@@ -3793,25 +3843,109 @@ Callback that is called when the LLM response and the final object validation ar
3793
3843
  }): StreamObjectResult<JSONValue, JSONValue, never>;
3794
3844
 
3795
3845
  /**
3796
- Warning from the model provider for this call. The call will proceed, but e.g.
3797
- some settings might not be supported, which can lead to suboptimal results.
3798
- */
3799
- type TranscriptionWarning = TranscriptionModelV1CallWarning;
3846
+ * A generated audio file.
3847
+ */
3848
+ interface GeneratedAudioFile extends GeneratedFile {
3849
+ /**
3850
+ * Audio format of the file (e.g., 'mp3', 'wav', etc.)
3851
+ */
3852
+ readonly format: string;
3853
+ }
3800
3854
 
3801
- type TranscriptionModelResponseMetadata = {
3855
+ /**
3856
+ The result of a `generateSpeech` call.
3857
+ It contains the audio data and additional information.
3858
+ */
3859
+ interface SpeechResult {
3802
3860
  /**
3803
- Timestamp for the start of the generated response.
3861
+ * The audio data as a base64 encoded string or binary data.
3804
3862
  */
3805
- timestamp: Date;
3863
+ readonly audio: GeneratedAudioFile;
3806
3864
  /**
3807
- The ID of the response model that was used to generate the response.
3865
+ Warnings for the call, e.g. unsupported settings.
3866
+ */
3867
+ readonly warnings: Array<SpeechWarning>;
3868
+ /**
3869
+ Response metadata from the provider. There may be multiple responses if we made multiple calls to the model.
3808
3870
  */
3809
- modelId: string;
3871
+ readonly responses: Array<SpeechModelResponseMetadata>;
3810
3872
  /**
3811
- Response headers.
3873
+ Provider metadata from the provider.
3812
3874
  */
3875
+ readonly providerMetadata: Record<string, Record<string, JSONValue>>;
3876
+ }
3877
+
3878
+ /**
3879
+ Generates speech audio using a speech model.
3880
+
3881
+ @param model - The speech model to use.
3882
+ @param text - The text to convert to speech.
3883
+ @param voice - The voice to use for speech generation.
3884
+ @param outputFormat - The output format to use for speech generation e.g. "mp3", "wav", etc.
3885
+ @param instructions - Instructions for the speech generation e.g. "Speak in a slow and steady tone".
3886
+ @param speed - The speed of the speech generation.
3887
+ @param providerOptions - Additional provider-specific options that are passed through to the provider
3888
+ as body parameters.
3889
+ @param maxRetries - Maximum number of retries. Set to 0 to disable retries. Default: 2.
3890
+ @param abortSignal - An optional abort signal that can be used to cancel the call.
3891
+ @param headers - Additional HTTP headers to be sent with the request. Only applicable for HTTP-based providers.
3892
+
3893
+ @returns A result object that contains the generated audio data.
3894
+ */
3895
+ declare function generateSpeech({ model, text, voice, outputFormat, instructions, speed, providerOptions, maxRetries: maxRetriesArg, abortSignal, headers, }: {
3896
+ /**
3897
+ The speech model to use.
3898
+ */
3899
+ model: SpeechModelV1;
3900
+ /**
3901
+ The text to convert to speech.
3902
+ */
3903
+ text: string;
3904
+ /**
3905
+ The voice to use for speech generation.
3906
+ */
3907
+ voice?: string;
3908
+ /**
3909
+ * The desired output format for the audio e.g. "mp3", "wav", etc.
3910
+ */
3911
+ outputFormat?: 'mp3' | 'wav' | (string & {});
3912
+ /**
3913
+ Instructions for the speech generation e.g. "Speak in a slow and steady tone".
3914
+ */
3915
+ instructions?: string;
3916
+ /**
3917
+ The speed of the speech generation.
3918
+ */
3919
+ speed?: number;
3920
+ /**
3921
+ Additional provider-specific options that are passed through to the provider
3922
+ as body parameters.
3923
+
3924
+ The outer record is keyed by the provider name, and the inner
3925
+ record is keyed by the provider-specific metadata key.
3926
+ ```ts
3927
+ {
3928
+ "openai": {}
3929
+ }
3930
+ ```
3931
+ */
3932
+ providerOptions?: ProviderOptions;
3933
+ /**
3934
+ Maximum number of retries per speech model call. Set to 0 to disable retries.
3935
+
3936
+ @default 2
3937
+ */
3938
+ maxRetries?: number;
3939
+ /**
3940
+ Abort signal.
3941
+ */
3942
+ abortSignal?: AbortSignal;
3943
+ /**
3944
+ Additional headers to include in the request.
3945
+ Only applicable for HTTP-based providers.
3946
+ */
3813
3947
  headers?: Record<string, string>;
3814
- };
3948
+ }): Promise<SpeechResult>;
3815
3949
 
3816
3950
  /**
3817
3951
  The result of a `transcribe` call.
@@ -4521,4 +4655,4 @@ declare namespace llamaindexAdapter {
4521
4655
  };
4522
4656
  }
4523
4657
 
4524
- export { AssistantContent, AssistantResponse, CallWarning, ChunkDetector, CoreAssistantMessage, CoreMessage, CoreSystemMessage, CoreTool, CoreToolCallUnion, CoreToolChoice, CoreToolMessage, CoreToolResultUnion, CoreUserMessage, DataContent, DataStreamOptions, DataStreamWriter, DownloadError, EmbedManyResult, EmbedResult, Embedding, EmbeddingModel, EmbeddingModelUsage, GenerateImageResult as Experimental_GenerateImageResult, GeneratedFile as Experimental_GeneratedImage, Experimental_LanguageModelV1Middleware, TranscriptionResult as Experimental_TranscriptionResult, FilePart, FinishReason, GenerateObjectResult, GenerateTextOnStepFinishCallback, GenerateTextResult, GeneratedFile, ImageModel, ImageGenerationWarning as ImageModelCallWarning, ImageModelResponseMetadata, ImagePart, InvalidArgumentError, InvalidDataContentError, InvalidMessageRoleError, InvalidStreamPartError, InvalidToolArgumentsError, JSONRPCError, JSONRPCMessage, JSONRPCNotification, JSONRPCRequest, JSONRPCResponse, langchainAdapter as LangChainAdapter, LanguageModel, LanguageModelRequestMetadata, LanguageModelResponseMetadata, LanguageModelUsage, LanguageModelV1Middleware, llamaindexAdapter as LlamaIndexAdapter, LogProbs, MCPClientError, MCPTransport, MessageConversionError, NoImageGeneratedError, NoObjectGeneratedError, NoOutputSpecifiedError, NoSuchProviderError, NoSuchToolError, ObjectStreamPart, output as Output, Provider, ProviderMetadata, ProviderRegistryProvider, RepairTextFunction, RetryError, StepResult, StreamData, StreamObjectOnFinishCallback, StreamObjectResult, StreamTextOnChunkCallback, StreamTextOnErrorCallback, StreamTextOnFinishCallback, StreamTextOnStepFinishCallback, StreamTextResult, StreamTextTransform, TelemetrySettings, TextPart, TextStreamPart, Tool, ToolCallPart, ToolCallRepairError, ToolCallRepairFunction, ToolCallUnion, ToolChoice, ToolContent, ToolExecutionError, ToolExecutionOptions, ToolResultPart, ToolResultUnion, ToolSet, UserContent, appendClientMessage, appendResponseMessages, convertToCoreMessages, coreAssistantMessageSchema, coreMessageSchema, coreSystemMessageSchema, coreToolMessageSchema, coreUserMessageSchema, cosineSimilarity, createDataStream, createDataStreamResponse, createProviderRegistry, customProvider, defaultSettingsMiddleware, embed, embedMany, createMCPClient as experimental_createMCPClient, experimental_createProviderRegistry, experimental_customProvider, generateImage as experimental_generateImage, transcribe as experimental_transcribe, experimental_wrapLanguageModel, extractReasoningMiddleware, generateObject, generateText, pipeDataStreamToResponse, simulateReadableStream, simulateStreamingMiddleware, smoothStream, streamObject, streamText, tool, wrapLanguageModel };
4658
+ export { AssistantContent, AssistantResponse, CallWarning, ChunkDetector, CoreAssistantMessage, CoreMessage, CoreSystemMessage, CoreTool, CoreToolCallUnion, CoreToolChoice, CoreToolMessage, CoreToolResultUnion, CoreUserMessage, DataContent, DataStreamOptions, DataStreamWriter, DownloadError, EmbedManyResult, EmbedResult, Embedding, EmbeddingModel, EmbeddingModelUsage, GenerateImageResult as Experimental_GenerateImageResult, GeneratedFile as Experimental_GeneratedImage, Experimental_LanguageModelV1Middleware, SpeechResult as Experimental_SpeechResult, TranscriptionResult as Experimental_TranscriptionResult, FilePart, FinishReason, GenerateObjectResult, GenerateTextOnStepFinishCallback, GenerateTextResult, GeneratedAudioFile, GeneratedFile, ImageModel, ImageGenerationWarning as ImageModelCallWarning, ImageModelResponseMetadata, ImagePart, InvalidArgumentError, InvalidDataContentError, InvalidMessageRoleError, InvalidStreamPartError, InvalidToolArgumentsError, JSONRPCError, JSONRPCMessage, JSONRPCNotification, JSONRPCRequest, JSONRPCResponse, langchainAdapter as LangChainAdapter, LanguageModel, LanguageModelRequestMetadata, LanguageModelResponseMetadata, LanguageModelUsage, LanguageModelV1Middleware, llamaindexAdapter as LlamaIndexAdapter, LogProbs, MCPClientError, MCPTransport, MessageConversionError, NoImageGeneratedError, NoObjectGeneratedError, NoOutputSpecifiedError, NoSuchProviderError, NoSuchToolError, ObjectStreamPart, output as Output, Provider, ProviderMetadata, ProviderRegistryProvider, RepairTextFunction, RetryError, SpeechModel, SpeechModelResponseMetadata, SpeechWarning, StepResult, StreamData, StreamObjectOnFinishCallback, StreamObjectResult, StreamTextOnChunkCallback, StreamTextOnErrorCallback, StreamTextOnFinishCallback, StreamTextOnStepFinishCallback, StreamTextResult, StreamTextTransform, TelemetrySettings, TextPart, TextStreamPart, Tool, ToolCallPart, ToolCallRepairError, ToolCallRepairFunction, ToolCallUnion, ToolChoice, ToolContent, ToolExecutionError, ToolExecutionOptions, ToolResultPart, ToolResultUnion, ToolSet, TranscriptionModel, TranscriptionModelResponseMetadata, TranscriptionWarning, UserContent, appendClientMessage, appendResponseMessages, convertToCoreMessages, coreAssistantMessageSchema, coreMessageSchema, coreSystemMessageSchema, coreToolMessageSchema, coreUserMessageSchema, cosineSimilarity, createDataStream, createDataStreamResponse, createProviderRegistry, customProvider, defaultSettingsMiddleware, embed, embedMany, createMCPClient as experimental_createMCPClient, experimental_createProviderRegistry, experimental_customProvider, generateImage as experimental_generateImage, generateSpeech as experimental_generateSpeech, transcribe as experimental_transcribe, experimental_wrapLanguageModel, extractReasoningMiddleware, generateObject, generateText, pipeDataStreamToResponse, simulateReadableStream, simulateStreamingMiddleware, smoothStream, streamObject, streamText, tool, wrapLanguageModel };
package/dist/index.d.ts CHANGED
@@ -2,7 +2,7 @@ import { IDGenerator } from '@ai-sdk/provider-utils';
2
2
  export { CoreToolCall, CoreToolResult, IDGenerator, ToolCall, ToolResult, createIdGenerator, generateId } from '@ai-sdk/provider-utils';
3
3
  import { DataStreamString, Message, Schema, DeepPartial, JSONValue as JSONValue$1, AssistantMessage, DataMessage } from '@ai-sdk/ui-utils';
4
4
  export { AssistantMessage, AssistantStatus, Attachment, ChatRequest, ChatRequestOptions, CreateMessage, DataMessage, DataStreamPart, DeepPartial, IdGenerator, JSONValue, Message, RequestOptions, Schema, ToolInvocation, UIMessage, UseAssistantOptions, formatAssistantStreamPart, formatDataStreamPart, jsonSchema, parseAssistantStreamPart, parseDataStreamPart, processDataStream, processTextStream, zodSchema } from '@ai-sdk/ui-utils';
5
- import { LanguageModelV1, LanguageModelV1FinishReason, LanguageModelV1LogProbs, LanguageModelV1CallWarning, LanguageModelV1Source, JSONValue, EmbeddingModelV1, EmbeddingModelV1Embedding, ImageModelV1, ImageModelV1CallWarning, LanguageModelV1ProviderMetadata, LanguageModelV1CallOptions, AISDKError, LanguageModelV1FunctionToolCall, JSONSchema7, JSONParseError, TypeValidationError, TranscriptionModelV1CallWarning, TranscriptionModelV1, ProviderV1, NoSuchModelError } from '@ai-sdk/provider';
5
+ import { LanguageModelV1, LanguageModelV1FinishReason, LanguageModelV1LogProbs, LanguageModelV1CallWarning, LanguageModelV1Source, JSONValue, EmbeddingModelV1, EmbeddingModelV1Embedding, ImageModelV1, ImageModelV1CallWarning, LanguageModelV1ProviderMetadata, TranscriptionModelV1, TranscriptionModelV1CallWarning, SpeechModelV1, SpeechModelV1CallWarning, LanguageModelV1CallOptions, AISDKError, LanguageModelV1FunctionToolCall, JSONSchema7, JSONParseError, TypeValidationError, ProviderV1, NoSuchModelError } from '@ai-sdk/provider';
6
6
  export { AISDKError, APICallError, EmptyResponseBodyError, InvalidPromptError, InvalidResponseDataError, JSONParseError, LanguageModelV1, LanguageModelV1CallOptions, LanguageModelV1Prompt, LanguageModelV1StreamPart, LoadAPIKeyError, NoContentGeneratedError, NoSuchModelError, TypeValidationError, UnsupportedFunctionalityError } from '@ai-sdk/provider';
7
7
  import { ServerResponse } from 'node:http';
8
8
  import { AttributeValue, Tracer } from '@opentelemetry/api';
@@ -276,6 +276,56 @@ type EmbeddingModelUsage = {
276
276
  tokens: number;
277
277
  };
278
278
 
279
+ /**
280
+ Transcription model that is used by the AI SDK Core functions.
281
+ */
282
+ type TranscriptionModel = TranscriptionModelV1;
283
+ /**
284
+ Warning from the model provider for this call. The call will proceed, but e.g.
285
+ some settings might not be supported, which can lead to suboptimal results.
286
+ */
287
+ type TranscriptionWarning = TranscriptionModelV1CallWarning;
288
+
289
+ type TranscriptionModelResponseMetadata = {
290
+ /**
291
+ Timestamp for the start of the generated response.
292
+ */
293
+ timestamp: Date;
294
+ /**
295
+ The ID of the response model that was used to generate the response.
296
+ */
297
+ modelId: string;
298
+ /**
299
+ Response headers.
300
+ */
301
+ headers?: Record<string, string>;
302
+ };
303
+
304
+ /**
305
+ Speech model that is used by the AI SDK Core functions.
306
+ */
307
+ type SpeechModel = SpeechModelV1;
308
+ /**
309
+ Warning from the model provider for this call. The call will proceed, but e.g.
310
+ some settings might not be supported, which can lead to suboptimal results.
311
+ */
312
+ type SpeechWarning = SpeechModelV1CallWarning;
313
+
314
+ type SpeechModelResponseMetadata = {
315
+ /**
316
+ Timestamp for the start of the generated response.
317
+ */
318
+ timestamp: Date;
319
+ /**
320
+ The ID of the response model that was used to generate the response.
321
+ */
322
+ modelId: string;
323
+ /**
324
+ Response headers.
325
+ */
326
+ headers?: Record<string, string>;
327
+ };
328
+
279
329
  /**
280
330
  The result of an `embed` call.
281
331
  It contains the embedding, the value, and additional information.
@@ -3793,25 +3843,109 @@ Callback that is called when the LLM response and the final object validation ar
3793
3843
  }): StreamObjectResult<JSONValue, JSONValue, never>;
3794
3844
 
3795
3845
  /**
3796
- Warning from the model provider for this call. The call will proceed, but e.g.
3797
- some settings might not be supported, which can lead to suboptimal results.
3798
- */
3799
- type TranscriptionWarning = TranscriptionModelV1CallWarning;
3846
+ * A generated audio file.
3847
+ */
3848
+ interface GeneratedAudioFile extends GeneratedFile {
3849
+ /**
3850
+ * Audio format of the file (e.g., 'mp3', 'wav', etc.)
3851
+ */
3852
+ readonly format: string;
3853
+ }
3800
3854
 
3801
- type TranscriptionModelResponseMetadata = {
3855
+ /**
3856
+ The result of a `generateSpeech` call.
3857
+ It contains the audio data and additional information.
3858
+ */
3859
+ interface SpeechResult {
3802
3860
  /**
3803
- Timestamp for the start of the generated response.
3861
+ * The audio data as a base64 encoded string or binary data.
3804
3862
  */
3805
- timestamp: Date;
3863
+ readonly audio: GeneratedAudioFile;
3806
3864
  /**
3807
- The ID of the response model that was used to generate the response.
3865
+ Warnings for the call, e.g. unsupported settings.
3866
+ */
3867
+ readonly warnings: Array<SpeechWarning>;
3868
+ /**
3869
+ Response metadata from the provider. There may be multiple responses if we made multiple calls to the model.
3808
3870
  */
3809
- modelId: string;
3871
+ readonly responses: Array<SpeechModelResponseMetadata>;
3810
3872
  /**
3811
- Response headers.
3873
+ Provider metadata from the provider.
3812
3874
  */
3875
+ readonly providerMetadata: Record<string, Record<string, JSONValue>>;
3876
+ }
3877
+
3878
+ /**
3879
+ Generates speech audio using a speech model.
3880
+
3881
+ @param model - The speech model to use.
3882
+ @param text - The text to convert to speech.
3883
+ @param voice - The voice to use for speech generation.
3884
+ @param outputFormat - The output format to use for speech generation e.g. "mp3", "wav", etc.
3885
+ @param instructions - Instructions for the speech generation e.g. "Speak in a slow and steady tone".
3886
+ @param speed - The speed of the speech generation.
3887
+ @param providerOptions - Additional provider-specific options that are passed through to the provider
3888
+ as body parameters.
3889
+ @param maxRetries - Maximum number of retries. Set to 0 to disable retries. Default: 2.
3890
+ @param abortSignal - An optional abort signal that can be used to cancel the call.
3891
+ @param headers - Additional HTTP headers to be sent with the request. Only applicable for HTTP-based providers.
3892
+
3893
+ @returns A result object that contains the generated audio data.
3894
+ */
3895
+ declare function generateSpeech({ model, text, voice, outputFormat, instructions, speed, providerOptions, maxRetries: maxRetriesArg, abortSignal, headers, }: {
3896
+ /**
3897
+ The speech model to use.
3898
+ */
3899
+ model: SpeechModelV1;
3900
+ /**
3901
+ The text to convert to speech.
3902
+ */
3903
+ text: string;
3904
+ /**
3905
+ The voice to use for speech generation.
3906
+ */
3907
+ voice?: string;
3908
+ /**
3909
+ * The desired output format for the audio e.g. "mp3", "wav", etc.
3910
+ */
3911
+ outputFormat?: 'mp3' | 'wav' | (string & {});
3912
+ /**
3913
+ Instructions for the speech generation e.g. "Speak in a slow and steady tone".
3914
+ */
3915
+ instructions?: string;
3916
+ /**
3917
+ The speed of the speech generation.
3918
+ */
3919
+ speed?: number;
3920
+ /**
3921
+ Additional provider-specific options that are passed through to the provider
3922
+ as body parameters.
3923
+
3924
+ The outer record is keyed by the provider name, and the inner
3925
+ record is keyed by the provider-specific metadata key.
3926
+ ```ts
3927
+ {
3928
+ "openai": {}
3929
+ }
3930
+ ```
3931
+ */
3932
+ providerOptions?: ProviderOptions;
3933
+ /**
3934
+ Maximum number of retries per speech model call. Set to 0 to disable retries.
3935
+
3936
+ @default 2
3937
+ */
3938
+ maxRetries?: number;
3939
+ /**
3940
+ Abort signal.
3941
+ */
3942
+ abortSignal?: AbortSignal;
3943
+ /**
3944
+ Additional headers to include in the request.
3945
+ Only applicable for HTTP-based providers.
3946
+ */
3813
3947
  headers?: Record<string, string>;
3814
- };
3948
+ }): Promise<SpeechResult>;
3815
3949
 
3816
3950
  /**
3817
3951
  The result of a `transcribe` call.
@@ -4521,4 +4655,4 @@ declare namespace llamaindexAdapter {
4521
4655
  };
4522
4656
  }
4523
4657
 
4524
- export { AssistantContent, AssistantResponse, CallWarning, ChunkDetector, CoreAssistantMessage, CoreMessage, CoreSystemMessage, CoreTool, CoreToolCallUnion, CoreToolChoice, CoreToolMessage, CoreToolResultUnion, CoreUserMessage, DataContent, DataStreamOptions, DataStreamWriter, DownloadError, EmbedManyResult, EmbedResult, Embedding, EmbeddingModel, EmbeddingModelUsage, GenerateImageResult as Experimental_GenerateImageResult, GeneratedFile as Experimental_GeneratedImage, Experimental_LanguageModelV1Middleware, TranscriptionResult as Experimental_TranscriptionResult, FilePart, FinishReason, GenerateObjectResult, GenerateTextOnStepFinishCallback, GenerateTextResult, GeneratedFile, ImageModel, ImageGenerationWarning as ImageModelCallWarning, ImageModelResponseMetadata, ImagePart, InvalidArgumentError, InvalidDataContentError, InvalidMessageRoleError, InvalidStreamPartError, InvalidToolArgumentsError, JSONRPCError, JSONRPCMessage, JSONRPCNotification, JSONRPCRequest, JSONRPCResponse, langchainAdapter as LangChainAdapter, LanguageModel, LanguageModelRequestMetadata, LanguageModelResponseMetadata, LanguageModelUsage, LanguageModelV1Middleware, llamaindexAdapter as LlamaIndexAdapter, LogProbs, MCPClientError, MCPTransport, MessageConversionError, NoImageGeneratedError, NoObjectGeneratedError, NoOutputSpecifiedError, NoSuchProviderError, NoSuchToolError, ObjectStreamPart, output as Output, Provider, ProviderMetadata, ProviderRegistryProvider, RepairTextFunction, RetryError, StepResult, StreamData, StreamObjectOnFinishCallback, StreamObjectResult, StreamTextOnChunkCallback, StreamTextOnErrorCallback, StreamTextOnFinishCallback, StreamTextOnStepFinishCallback, StreamTextResult, StreamTextTransform, TelemetrySettings, TextPart, TextStreamPart, Tool, ToolCallPart, ToolCallRepairError, ToolCallRepairFunction, ToolCallUnion, ToolChoice, ToolContent, ToolExecutionError, ToolExecutionOptions, ToolResultPart, ToolResultUnion, ToolSet, UserContent, appendClientMessage, appendResponseMessages, convertToCoreMessages, coreAssistantMessageSchema, coreMessageSchema, coreSystemMessageSchema, coreToolMessageSchema, coreUserMessageSchema, cosineSimilarity, createDataStream, createDataStreamResponse, createProviderRegistry, customProvider, defaultSettingsMiddleware, embed, embedMany, createMCPClient as experimental_createMCPClient, experimental_createProviderRegistry, experimental_customProvider, generateImage as experimental_generateImage, transcribe as experimental_transcribe, experimental_wrapLanguageModel, extractReasoningMiddleware, generateObject, generateText, pipeDataStreamToResponse, simulateReadableStream, simulateStreamingMiddleware, smoothStream, streamObject, streamText, tool, wrapLanguageModel };
4658
+ export { AssistantContent, AssistantResponse, CallWarning, ChunkDetector, CoreAssistantMessage, CoreMessage, CoreSystemMessage, CoreTool, CoreToolCallUnion, CoreToolChoice, CoreToolMessage, CoreToolResultUnion, CoreUserMessage, DataContent, DataStreamOptions, DataStreamWriter, DownloadError, EmbedManyResult, EmbedResult, Embedding, EmbeddingModel, EmbeddingModelUsage, GenerateImageResult as Experimental_GenerateImageResult, GeneratedFile as Experimental_GeneratedImage, Experimental_LanguageModelV1Middleware, SpeechResult as Experimental_SpeechResult, TranscriptionResult as Experimental_TranscriptionResult, FilePart, FinishReason, GenerateObjectResult, GenerateTextOnStepFinishCallback, GenerateTextResult, GeneratedAudioFile, GeneratedFile, ImageModel, ImageGenerationWarning as ImageModelCallWarning, ImageModelResponseMetadata, ImagePart, InvalidArgumentError, InvalidDataContentError, InvalidMessageRoleError, InvalidStreamPartError, InvalidToolArgumentsError, JSONRPCError, JSONRPCMessage, JSONRPCNotification, JSONRPCRequest, JSONRPCResponse, langchainAdapter as LangChainAdapter, LanguageModel, LanguageModelRequestMetadata, LanguageModelResponseMetadata, LanguageModelUsage, LanguageModelV1Middleware, llamaindexAdapter as LlamaIndexAdapter, LogProbs, MCPClientError, MCPTransport, MessageConversionError, NoImageGeneratedError, NoObjectGeneratedError, NoOutputSpecifiedError, NoSuchProviderError, NoSuchToolError, ObjectStreamPart, output as Output, Provider, ProviderMetadata, ProviderRegistryProvider, RepairTextFunction, RetryError, SpeechModel, SpeechModelResponseMetadata, SpeechWarning, StepResult, StreamData, StreamObjectOnFinishCallback, StreamObjectResult, StreamTextOnChunkCallback, StreamTextOnErrorCallback, StreamTextOnFinishCallback, StreamTextOnStepFinishCallback, StreamTextResult, StreamTextTransform, TelemetrySettings, TextPart, TextStreamPart, Tool, ToolCallPart, ToolCallRepairError, ToolCallRepairFunction, ToolCallUnion, ToolChoice, ToolContent, ToolExecutionError, ToolExecutionOptions, ToolResultPart, ToolResultUnion, ToolSet, TranscriptionModel, TranscriptionModelResponseMetadata, TranscriptionWarning, UserContent, appendClientMessage, appendResponseMessages, convertToCoreMessages, coreAssistantMessageSchema, coreMessageSchema, coreSystemMessageSchema, coreToolMessageSchema, coreUserMessageSchema, cosineSimilarity, createDataStream, createDataStreamResponse, createProviderRegistry, customProvider, defaultSettingsMiddleware, embed, embedMany, createMCPClient as experimental_createMCPClient, experimental_createProviderRegistry, experimental_customProvider, generateImage as experimental_generateImage, generateSpeech as experimental_generateSpeech, transcribe as experimental_transcribe, experimental_wrapLanguageModel, extractReasoningMiddleware, generateObject, generateText, pipeDataStreamToResponse, simulateReadableStream, simulateStreamingMiddleware, smoothStream, streamObject, streamText, tool, wrapLanguageModel };
package/dist/index.js CHANGED
@@ -73,6 +73,7 @@ __export(streams_exports, {
73
73
  experimental_createProviderRegistry: () => experimental_createProviderRegistry,
74
74
  experimental_customProvider: () => experimental_customProvider,
75
75
  experimental_generateImage: () => generateImage,
76
+ experimental_generateSpeech: () => generateSpeech,
76
77
  experimental_transcribe: () => transcribe,
77
78
  experimental_wrapLanguageModel: () => experimental_wrapLanguageModel,
78
79
  extractReasoningMiddleware: () => extractReasoningMiddleware,
@@ -6335,9 +6336,99 @@ var DefaultStreamTextResult = class {
6335
6336
  }
6336
6337
  };
6337
6338
 
6338
- // errors/no-transcript-generated-error.ts
6339
+ // errors/no-speech-generated-error.ts
6339
6340
  var import_provider23 = require("@ai-sdk/provider");
6340
- var NoTranscriptGeneratedError = class extends import_provider23.AISDKError {
6341
+ var NoSpeechGeneratedError = class extends import_provider23.AISDKError {
6342
+ constructor(options) {
6343
+ super({
6344
+ name: "AI_NoSpeechGeneratedError",
6345
+ message: "No speech audio generated."
6346
+ });
6347
+ this.responses = options.responses;
6348
+ }
6349
+ };
6350
+
6351
+ // core/generate-speech/generated-audio-file.ts
6352
+ var DefaultGeneratedAudioFile = class extends DefaultGeneratedFile {
6353
+ constructor({
6354
+ data,
6355
+ mimeType
6356
+ }) {
6357
+ super({ data, mimeType });
6358
+ let format = "mp3";
6359
+ if (mimeType) {
6360
+ const mimeTypeParts = mimeType.split("/");
6361
+ if (mimeTypeParts.length === 2) {
6362
+ if (mimeType !== "audio/mpeg") {
6363
+ format = mimeTypeParts[1];
6364
+ }
6365
+ }
6366
+ }
6367
+ if (!format) {
6368
+ throw new Error(
6369
+ "Audio format must be provided or determinable from mimeType"
6370
+ );
6371
+ }
6372
+ this.format = format;
6373
+ }
6374
+ };
6375
+
6376
+ // core/generate-speech/generate-speech.ts
6377
+ async function generateSpeech({
6378
+ model,
6379
+ text: text2,
6380
+ voice,
6381
+ outputFormat,
6382
+ instructions,
6383
+ speed,
6384
+ providerOptions = {},
6385
+ maxRetries: maxRetriesArg,
6386
+ abortSignal,
6387
+ headers
6388
+ }) {
6389
+ var _a17;
6390
+ const { retry } = prepareRetries({ maxRetries: maxRetriesArg });
6391
+ const result = await retry(
6392
+ () => model.doGenerate({
6393
+ text: text2,
6394
+ voice,
6395
+ outputFormat,
6396
+ instructions,
6397
+ speed,
6398
+ abortSignal,
6399
+ headers,
6400
+ providerOptions
6401
+ })
6402
+ );
6403
+ if (!result.audio || result.audio.length === 0) {
6404
+ throw new NoSpeechGeneratedError({ responses: [result.response] });
6405
+ }
6406
+ return new DefaultSpeechResult({
6407
+ audio: new DefaultGeneratedAudioFile({
6408
+ data: result.audio,
6409
+ mimeType: (_a17 = detectMimeType({
6410
+ data: result.audio,
6411
+ signatures: audioMimeTypeSignatures
6412
+ })) != null ? _a17 : "audio/mp3"
6413
+ }),
6414
+ warnings: result.warnings,
6415
+ responses: [result.response],
6416
+ providerMetadata: result.providerMetadata
6417
+ });
6418
+ }
6419
+ var DefaultSpeechResult = class {
6420
+ constructor(options) {
6421
+ var _a17;
6422
+ this.audio = options.audio;
6423
+ this.warnings = options.warnings;
6424
+ this.responses = options.responses;
6425
+ this.providerMetadata = (_a17 = options.providerMetadata) != null ? _a17 : {};
6426
+ }
6427
+ };
6428
+
6429
+ // errors/no-transcript-generated-error.ts
6430
+ var import_provider24 = require("@ai-sdk/provider");
6431
+ var NoTranscriptGeneratedError = class extends import_provider24.AISDKError {
6341
6432
  constructor(options) {
6342
6433
  super({
6343
6434
  name: "AI_NoTranscriptGeneratedError",
@@ -6714,7 +6805,7 @@ function appendClientMessage({
6714
6805
 
6715
6806
  // core/prompt/append-response-messages.ts
6716
6807
  var import_ui_utils9 = require("@ai-sdk/ui-utils");
6717
- var import_provider24 = require("@ai-sdk/provider");
6808
+ var import_provider25 = require("@ai-sdk/provider");
6718
6809
  function appendResponseMessages({
6719
6810
  messages,
6720
6811
  responseMessages,
@@ -6797,7 +6888,7 @@ function appendResponseMessages({
6797
6888
  break;
6798
6889
  case "file":
6799
6890
  if (part.data instanceof URL) {
6800
- throw new import_provider24.AISDKError({
6891
+ throw new import_provider25.AISDKError({
6801
6892
  name: "InvalidAssistantFileData",
6802
6893
  message: "File data cannot be a URL"
6803
6894
  });
@@ -6891,7 +6982,7 @@ function appendResponseMessages({
6891
6982
  }
6892
6983
 
6893
6984
  // core/registry/custom-provider.ts
6894
- var import_provider25 = require("@ai-sdk/provider");
6985
+ var import_provider26 = require("@ai-sdk/provider");
6895
6986
  function customProvider({
6896
6987
  languageModels,
6897
6988
  textEmbeddingModels,
@@ -6906,7 +6997,7 @@ function customProvider({
6906
6997
  if (fallbackProvider) {
6907
6998
  return fallbackProvider.languageModel(modelId);
6908
6999
  }
6909
- throw new import_provider25.NoSuchModelError({ modelId, modelType: "languageModel" });
7000
+ throw new import_provider26.NoSuchModelError({ modelId, modelType: "languageModel" });
6910
7001
  },
6911
7002
  textEmbeddingModel(modelId) {
6912
7003
  if (textEmbeddingModels != null && modelId in textEmbeddingModels) {
@@ -6915,7 +7006,7 @@ function customProvider({
6915
7006
  if (fallbackProvider) {
6916
7007
  return fallbackProvider.textEmbeddingModel(modelId);
6917
7008
  }
6918
- throw new import_provider25.NoSuchModelError({ modelId, modelType: "textEmbeddingModel" });
7009
+ throw new import_provider26.NoSuchModelError({ modelId, modelType: "textEmbeddingModel" });
6919
7010
  },
6920
7011
  imageModel(modelId) {
6921
7012
  if (imageModels != null && modelId in imageModels) {
@@ -6924,19 +7015,19 @@ function customProvider({
6924
7015
  if (fallbackProvider == null ? void 0 : fallbackProvider.imageModel) {
6925
7016
  return fallbackProvider.imageModel(modelId);
6926
7017
  }
6927
- throw new import_provider25.NoSuchModelError({ modelId, modelType: "imageModel" });
7018
+ throw new import_provider26.NoSuchModelError({ modelId, modelType: "imageModel" });
6928
7019
  }
6929
7020
  };
6930
7021
  }
6931
7022
  var experimental_customProvider = customProvider;
6932
7023
 
6933
7024
  // core/registry/no-such-provider-error.ts
6934
- var import_provider26 = require("@ai-sdk/provider");
7025
+ var import_provider27 = require("@ai-sdk/provider");
6935
7026
  var name16 = "AI_NoSuchProviderError";
6936
7027
  var marker16 = `vercel.ai.error.${name16}`;
6937
7028
  var symbol16 = Symbol.for(marker16);
6938
7029
  var _a16;
6939
- var NoSuchProviderError = class extends import_provider26.NoSuchModelError {
7030
+ var NoSuchProviderError = class extends import_provider27.NoSuchModelError {
6940
7031
  constructor({
6941
7032
  modelId,
6942
7033
  modelType,
@@ -6950,13 +7041,13 @@ var NoSuchProviderError = class extends import_provider26.NoSuchModelError {
6950
7041
  this.availableProviders = availableProviders;
6951
7042
  }
6952
7043
  static isInstance(error) {
6953
- return import_provider26.AISDKError.hasMarker(error, marker16);
7044
+ return import_provider27.AISDKError.hasMarker(error, marker16);
6954
7045
  }
6955
7046
  };
6956
7047
  _a16 = symbol16;
6957
7048
 
6958
7049
  // core/registry/provider-registry.ts
6959
- var import_provider27 = require("@ai-sdk/provider");
7050
+ var import_provider28 = require("@ai-sdk/provider");
6960
7051
  function createProviderRegistry(providers, {
6961
7052
  separator = ":"
6962
7053
  } = {}) {
@@ -6995,20 +7086,20 @@ var DefaultProviderRegistry = class {
6995
7086
  splitId(id, modelType) {
6996
7087
  const index = id.indexOf(this.separator);
6997
7088
  if (index === -1) {
6998
- throw new import_provider27.NoSuchModelError({
7089
+ throw new import_provider28.NoSuchModelError({
6999
7090
  modelId: id,
7000
7091
  modelType,
7001
7092
  message: `Invalid ${modelType} id for registry: ${id} (must be in the format "providerId${this.separator}modelId")`
7002
7093
  });
7003
7094
  }
7004
- return [id.slice(0, index), id.slice(index + 1)];
7095
+ return [id.slice(0, index), id.slice(index + this.separator.length)];
7005
7096
  }
7006
7097
  languageModel(id) {
7007
7098
  var _a17, _b;
7008
7099
  const [providerId, modelId] = this.splitId(id, "languageModel");
7009
7100
  const model = (_b = (_a17 = this.getProvider(providerId)).languageModel) == null ? void 0 : _b.call(_a17, modelId);
7010
7101
  if (model == null) {
7011
- throw new import_provider27.NoSuchModelError({ modelId: id, modelType: "languageModel" });
7102
+ throw new import_provider28.NoSuchModelError({ modelId: id, modelType: "languageModel" });
7012
7103
  }
7013
7104
  return model;
7014
7105
  }
@@ -7018,7 +7109,7 @@ var DefaultProviderRegistry = class {
7018
7109
  const provider = this.getProvider(providerId);
7019
7110
  const model = (_a17 = provider.textEmbeddingModel) == null ? void 0 : _a17.call(provider, modelId);
7020
7111
  if (model == null) {
7021
- throw new import_provider27.NoSuchModelError({
7112
+ throw new import_provider28.NoSuchModelError({
7022
7113
  modelId: id,
7023
7114
  modelType: "textEmbeddingModel"
7024
7115
  });
@@ -7031,7 +7122,7 @@ var DefaultProviderRegistry = class {
7031
7122
  const provider = this.getProvider(providerId);
7032
7123
  const model = (_a17 = provider.imageModel) == null ? void 0 : _a17.call(provider, modelId);
7033
7124
  if (model == null) {
7034
- throw new import_provider27.NoSuchModelError({ modelId: id, modelType: "imageModel" });
7125
+ throw new import_provider28.NoSuchModelError({ modelId: id, modelType: "imageModel" });
7035
7126
  }
7036
7127
  return model;
7037
7128
  }
@@ -8038,6 +8129,7 @@ var StreamData = class {
8038
8129
  experimental_createProviderRegistry,
8039
8130
  experimental_customProvider,
8040
8131
  experimental_generateImage,
8132
+ experimental_generateSpeech,
8041
8133
  experimental_transcribe,
8042
8134
  experimental_wrapLanguageModel,
8043
8135
  extractReasoningMiddleware,