ai 4.3.5 → 4.3.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +12 -0
- package/dist/index.d.mts +147 -13
- package/dist/index.d.ts +147 -13
- package/dist/index.js +109 -17
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +98 -7
- package/dist/index.mjs.map +1 -1
- package/package.json +5 -5
package/CHANGELOG.md
CHANGED
@@ -1,5 +1,17 @@
|
|
1
1
|
# ai
|
2
2
|
|
3
|
+
## 4.3.6
|
4
|
+
|
5
|
+
### Patch Changes
|
6
|
+
|
7
|
+
- beef951: feat: add speech with experimental_generateSpeech
|
8
|
+
- bd41167: fix(ai/core): properly handle custom separator in provider registry
|
9
|
+
- Updated dependencies [beef951]
|
10
|
+
- @ai-sdk/provider@1.1.3
|
11
|
+
- @ai-sdk/provider-utils@2.2.7
|
12
|
+
- @ai-sdk/ui-utils@1.2.8
|
13
|
+
- @ai-sdk/react@1.2.9
|
14
|
+
|
3
15
|
## 4.3.5
|
4
16
|
|
5
17
|
### Patch Changes
|
package/dist/index.d.mts
CHANGED
@@ -2,7 +2,7 @@ import { IDGenerator } from '@ai-sdk/provider-utils';
|
|
2
2
|
export { CoreToolCall, CoreToolResult, IDGenerator, ToolCall, ToolResult, createIdGenerator, generateId } from '@ai-sdk/provider-utils';
|
3
3
|
import { DataStreamString, Message, Schema, DeepPartial, JSONValue as JSONValue$1, AssistantMessage, DataMessage } from '@ai-sdk/ui-utils';
|
4
4
|
export { AssistantMessage, AssistantStatus, Attachment, ChatRequest, ChatRequestOptions, CreateMessage, DataMessage, DataStreamPart, DeepPartial, IdGenerator, JSONValue, Message, RequestOptions, Schema, ToolInvocation, UIMessage, UseAssistantOptions, formatAssistantStreamPart, formatDataStreamPart, jsonSchema, parseAssistantStreamPart, parseDataStreamPart, processDataStream, processTextStream, zodSchema } from '@ai-sdk/ui-utils';
|
5
|
-
import { LanguageModelV1, LanguageModelV1FinishReason, LanguageModelV1LogProbs, LanguageModelV1CallWarning, LanguageModelV1Source, JSONValue, EmbeddingModelV1, EmbeddingModelV1Embedding, ImageModelV1, ImageModelV1CallWarning, LanguageModelV1ProviderMetadata, LanguageModelV1CallOptions, AISDKError, LanguageModelV1FunctionToolCall, JSONSchema7, JSONParseError, TypeValidationError,
|
5
|
+
import { LanguageModelV1, LanguageModelV1FinishReason, LanguageModelV1LogProbs, LanguageModelV1CallWarning, LanguageModelV1Source, JSONValue, EmbeddingModelV1, EmbeddingModelV1Embedding, ImageModelV1, ImageModelV1CallWarning, LanguageModelV1ProviderMetadata, TranscriptionModelV1, TranscriptionModelV1CallWarning, SpeechModelV1, SpeechModelV1CallWarning, LanguageModelV1CallOptions, AISDKError, LanguageModelV1FunctionToolCall, JSONSchema7, JSONParseError, TypeValidationError, ProviderV1, NoSuchModelError } from '@ai-sdk/provider';
|
6
6
|
export { AISDKError, APICallError, EmptyResponseBodyError, InvalidPromptError, InvalidResponseDataError, JSONParseError, LanguageModelV1, LanguageModelV1CallOptions, LanguageModelV1Prompt, LanguageModelV1StreamPart, LoadAPIKeyError, NoContentGeneratedError, NoSuchModelError, TypeValidationError, UnsupportedFunctionalityError } from '@ai-sdk/provider';
|
7
7
|
import { ServerResponse } from 'node:http';
|
8
8
|
import { AttributeValue, Tracer } from '@opentelemetry/api';
|
@@ -276,6 +276,56 @@ type EmbeddingModelUsage = {
|
|
276
276
|
tokens: number;
|
277
277
|
};
|
278
278
|
|
279
|
+
/**
|
280
|
+
Transcription model that is used by the AI SDK Core functions.
|
281
|
+
*/
|
282
|
+
type TranscriptionModel = TranscriptionModelV1;
|
283
|
+
/**
|
284
|
+
Warning from the model provider for this call. The call will proceed, but e.g.
|
285
|
+
some settings might not be supported, which can lead to suboptimal results.
|
286
|
+
*/
|
287
|
+
type TranscriptionWarning = TranscriptionModelV1CallWarning;
|
288
|
+
|
289
|
+
type TranscriptionModelResponseMetadata = {
|
290
|
+
/**
|
291
|
+
Timestamp for the start of the generated response.
|
292
|
+
*/
|
293
|
+
timestamp: Date;
|
294
|
+
/**
|
295
|
+
The ID of the response model that was used to generate the response.
|
296
|
+
*/
|
297
|
+
modelId: string;
|
298
|
+
/**
|
299
|
+
Response headers.
|
300
|
+
*/
|
301
|
+
headers?: Record<string, string>;
|
302
|
+
};
|
303
|
+
|
304
|
+
/**
|
305
|
+
Speech model that is used by the AI SDK Core functions.
|
306
|
+
*/
|
307
|
+
type SpeechModel = SpeechModelV1;
|
308
|
+
/**
|
309
|
+
Warning from the model provider for this call. The call will proceed, but e.g.
|
310
|
+
some settings might not be supported, which can lead to suboptimal results.
|
311
|
+
*/
|
312
|
+
type SpeechWarning = SpeechModelV1CallWarning;
|
313
|
+
|
314
|
+
type SpeechModelResponseMetadata = {
|
315
|
+
/**
|
316
|
+
Timestamp for the start of the generated response.
|
317
|
+
*/
|
318
|
+
timestamp: Date;
|
319
|
+
/**
|
320
|
+
The ID of the response model that was used to generate the response.
|
321
|
+
*/
|
322
|
+
modelId: string;
|
323
|
+
/**
|
324
|
+
Response headers.
|
325
|
+
*/
|
326
|
+
headers?: Record<string, string>;
|
327
|
+
};
|
328
|
+
|
279
329
|
/**
|
280
330
|
The result of an `embed` call.
|
281
331
|
It contains the embedding, the value, and additional information.
|
@@ -3793,25 +3843,109 @@ Callback that is called when the LLM response and the final object validation ar
|
|
3793
3843
|
}): StreamObjectResult<JSONValue, JSONValue, never>;
|
3794
3844
|
|
3795
3845
|
/**
|
3796
|
-
|
3797
|
-
|
3798
|
-
|
3799
|
-
|
3846
|
+
* A generated audio file.
|
3847
|
+
*/
|
3848
|
+
interface GeneratedAudioFile extends GeneratedFile {
|
3849
|
+
/**
|
3850
|
+
* Audio format of the file (e.g., 'mp3', 'wav', etc.)
|
3851
|
+
*/
|
3852
|
+
readonly format: string;
|
3853
|
+
}
|
3800
3854
|
|
3801
|
-
|
3855
|
+
/**
|
3856
|
+
The result of a `generateSpeech` call.
|
3857
|
+
It contains the audio data and additional information.
|
3858
|
+
*/
|
3859
|
+
interface SpeechResult {
|
3802
3860
|
/**
|
3803
|
-
|
3861
|
+
* The audio data as a base64 encoded string or binary data.
|
3804
3862
|
*/
|
3805
|
-
|
3863
|
+
readonly audio: GeneratedAudioFile;
|
3806
3864
|
/**
|
3807
|
-
|
3865
|
+
Warnings for the call, e.g. unsupported settings.
|
3866
|
+
*/
|
3867
|
+
readonly warnings: Array<SpeechWarning>;
|
3868
|
+
/**
|
3869
|
+
Response metadata from the provider. There may be multiple responses if we made multiple calls to the model.
|
3808
3870
|
*/
|
3809
|
-
|
3871
|
+
readonly responses: Array<SpeechModelResponseMetadata>;
|
3810
3872
|
/**
|
3811
|
-
|
3873
|
+
Provider metadata from the provider.
|
3812
3874
|
*/
|
3875
|
+
readonly providerMetadata: Record<string, Record<string, JSONValue>>;
|
3876
|
+
}
|
3877
|
+
|
3878
|
+
/**
|
3879
|
+
Generates speech audio using a speech model.
|
3880
|
+
|
3881
|
+
@param model - The speech model to use.
|
3882
|
+
@param text - The text to convert to speech.
|
3883
|
+
@param voice - The voice to use for speech generation.
|
3884
|
+
@param outputFormat - The output format to use for speech generation e.g. "mp3", "wav", etc.
|
3885
|
+
@param instructions - Instructions for the speech generation e.g. "Speak in a slow and steady tone".
|
3886
|
+
@param speed - The speed of the speech generation.
|
3887
|
+
@param providerOptions - Additional provider-specific options that are passed through to the provider
|
3888
|
+
as body parameters.
|
3889
|
+
@param maxRetries - Maximum number of retries. Set to 0 to disable retries. Default: 2.
|
3890
|
+
@param abortSignal - An optional abort signal that can be used to cancel the call.
|
3891
|
+
@param headers - Additional HTTP headers to be sent with the request. Only applicable for HTTP-based providers.
|
3892
|
+
|
3893
|
+
@returns A result object that contains the generated audio data.
|
3894
|
+
*/
|
3895
|
+
declare function generateSpeech({ model, text, voice, outputFormat, instructions, speed, providerOptions, maxRetries: maxRetriesArg, abortSignal, headers, }: {
|
3896
|
+
/**
|
3897
|
+
The speech model to use.
|
3898
|
+
*/
|
3899
|
+
model: SpeechModelV1;
|
3900
|
+
/**
|
3901
|
+
The text to convert to speech.
|
3902
|
+
*/
|
3903
|
+
text: string;
|
3904
|
+
/**
|
3905
|
+
The voice to use for speech generation.
|
3906
|
+
*/
|
3907
|
+
voice?: string;
|
3908
|
+
/**
|
3909
|
+
* The desired output format for the audio e.g. "mp3", "wav", etc.
|
3910
|
+
*/
|
3911
|
+
outputFormat?: 'mp3' | 'wav' | (string & {});
|
3912
|
+
/**
|
3913
|
+
Instructions for the speech generation e.g. "Speak in a slow and steady tone".
|
3914
|
+
*/
|
3915
|
+
instructions?: string;
|
3916
|
+
/**
|
3917
|
+
The speed of the speech generation.
|
3918
|
+
*/
|
3919
|
+
speed?: number;
|
3920
|
+
/**
|
3921
|
+
Additional provider-specific options that are passed through to the provider
|
3922
|
+
as body parameters.
|
3923
|
+
|
3924
|
+
The outer record is keyed by the provider name, and the inner
|
3925
|
+
record is keyed by the provider-specific metadata key.
|
3926
|
+
```ts
|
3927
|
+
{
|
3928
|
+
"openai": {}
|
3929
|
+
}
|
3930
|
+
```
|
3931
|
+
*/
|
3932
|
+
providerOptions?: ProviderOptions;
|
3933
|
+
/**
|
3934
|
+
Maximum number of retries per speech model call. Set to 0 to disable retries.
|
3935
|
+
|
3936
|
+
@default 2
|
3937
|
+
*/
|
3938
|
+
maxRetries?: number;
|
3939
|
+
/**
|
3940
|
+
Abort signal.
|
3941
|
+
*/
|
3942
|
+
abortSignal?: AbortSignal;
|
3943
|
+
/**
|
3944
|
+
Additional headers to include in the request.
|
3945
|
+
Only applicable for HTTP-based providers.
|
3946
|
+
*/
|
3813
3947
|
headers?: Record<string, string>;
|
3814
|
-
}
|
3948
|
+
}): Promise<SpeechResult>;
|
3815
3949
|
|
3816
3950
|
/**
|
3817
3951
|
The result of a `transcribe` call.
|
@@ -4521,4 +4655,4 @@ declare namespace llamaindexAdapter {
|
|
4521
4655
|
};
|
4522
4656
|
}
|
4523
4657
|
|
4524
|
-
export { AssistantContent, AssistantResponse, CallWarning, ChunkDetector, CoreAssistantMessage, CoreMessage, CoreSystemMessage, CoreTool, CoreToolCallUnion, CoreToolChoice, CoreToolMessage, CoreToolResultUnion, CoreUserMessage, DataContent, DataStreamOptions, DataStreamWriter, DownloadError, EmbedManyResult, EmbedResult, Embedding, EmbeddingModel, EmbeddingModelUsage, GenerateImageResult as Experimental_GenerateImageResult, GeneratedFile as Experimental_GeneratedImage, Experimental_LanguageModelV1Middleware, TranscriptionResult as Experimental_TranscriptionResult, FilePart, FinishReason, GenerateObjectResult, GenerateTextOnStepFinishCallback, GenerateTextResult, GeneratedFile, ImageModel, ImageGenerationWarning as ImageModelCallWarning, ImageModelResponseMetadata, ImagePart, InvalidArgumentError, InvalidDataContentError, InvalidMessageRoleError, InvalidStreamPartError, InvalidToolArgumentsError, JSONRPCError, JSONRPCMessage, JSONRPCNotification, JSONRPCRequest, JSONRPCResponse, langchainAdapter as LangChainAdapter, LanguageModel, LanguageModelRequestMetadata, LanguageModelResponseMetadata, LanguageModelUsage, LanguageModelV1Middleware, llamaindexAdapter as LlamaIndexAdapter, LogProbs, MCPClientError, MCPTransport, MessageConversionError, NoImageGeneratedError, NoObjectGeneratedError, NoOutputSpecifiedError, NoSuchProviderError, NoSuchToolError, ObjectStreamPart, output as Output, Provider, ProviderMetadata, ProviderRegistryProvider, RepairTextFunction, RetryError, StepResult, StreamData, StreamObjectOnFinishCallback, StreamObjectResult, StreamTextOnChunkCallback, StreamTextOnErrorCallback, StreamTextOnFinishCallback, StreamTextOnStepFinishCallback, StreamTextResult, StreamTextTransform, TelemetrySettings, TextPart, TextStreamPart, Tool, ToolCallPart, ToolCallRepairError, ToolCallRepairFunction, ToolCallUnion, ToolChoice, ToolContent, ToolExecutionError, ToolExecutionOptions, ToolResultPart, ToolResultUnion, ToolSet, UserContent, appendClientMessage, appendResponseMessages, convertToCoreMessages, coreAssistantMessageSchema, coreMessageSchema, coreSystemMessageSchema, coreToolMessageSchema, coreUserMessageSchema, cosineSimilarity, createDataStream, createDataStreamResponse, createProviderRegistry, customProvider, defaultSettingsMiddleware, embed, embedMany, createMCPClient as experimental_createMCPClient, experimental_createProviderRegistry, experimental_customProvider, generateImage as experimental_generateImage, transcribe as experimental_transcribe, experimental_wrapLanguageModel, extractReasoningMiddleware, generateObject, generateText, pipeDataStreamToResponse, simulateReadableStream, simulateStreamingMiddleware, smoothStream, streamObject, streamText, tool, wrapLanguageModel };
|
4658
|
+
export { AssistantContent, AssistantResponse, CallWarning, ChunkDetector, CoreAssistantMessage, CoreMessage, CoreSystemMessage, CoreTool, CoreToolCallUnion, CoreToolChoice, CoreToolMessage, CoreToolResultUnion, CoreUserMessage, DataContent, DataStreamOptions, DataStreamWriter, DownloadError, EmbedManyResult, EmbedResult, Embedding, EmbeddingModel, EmbeddingModelUsage, GenerateImageResult as Experimental_GenerateImageResult, GeneratedFile as Experimental_GeneratedImage, Experimental_LanguageModelV1Middleware, SpeechResult as Experimental_SpeechResult, TranscriptionResult as Experimental_TranscriptionResult, FilePart, FinishReason, GenerateObjectResult, GenerateTextOnStepFinishCallback, GenerateTextResult, GeneratedAudioFile, GeneratedFile, ImageModel, ImageGenerationWarning as ImageModelCallWarning, ImageModelResponseMetadata, ImagePart, InvalidArgumentError, InvalidDataContentError, InvalidMessageRoleError, InvalidStreamPartError, InvalidToolArgumentsError, JSONRPCError, JSONRPCMessage, JSONRPCNotification, JSONRPCRequest, JSONRPCResponse, langchainAdapter as LangChainAdapter, LanguageModel, LanguageModelRequestMetadata, LanguageModelResponseMetadata, LanguageModelUsage, LanguageModelV1Middleware, llamaindexAdapter as LlamaIndexAdapter, LogProbs, MCPClientError, MCPTransport, MessageConversionError, NoImageGeneratedError, NoObjectGeneratedError, NoOutputSpecifiedError, NoSuchProviderError, NoSuchToolError, ObjectStreamPart, output as Output, Provider, ProviderMetadata, ProviderRegistryProvider, RepairTextFunction, RetryError, SpeechModel, SpeechModelResponseMetadata, SpeechWarning, StepResult, StreamData, StreamObjectOnFinishCallback, StreamObjectResult, StreamTextOnChunkCallback, StreamTextOnErrorCallback, StreamTextOnFinishCallback, StreamTextOnStepFinishCallback, StreamTextResult, StreamTextTransform, TelemetrySettings, TextPart, TextStreamPart, Tool, ToolCallPart, ToolCallRepairError, ToolCallRepairFunction, ToolCallUnion, ToolChoice, ToolContent, ToolExecutionError, ToolExecutionOptions, ToolResultPart, ToolResultUnion, ToolSet, TranscriptionModel, TranscriptionModelResponseMetadata, TranscriptionWarning, UserContent, appendClientMessage, appendResponseMessages, convertToCoreMessages, coreAssistantMessageSchema, coreMessageSchema, coreSystemMessageSchema, coreToolMessageSchema, coreUserMessageSchema, cosineSimilarity, createDataStream, createDataStreamResponse, createProviderRegistry, customProvider, defaultSettingsMiddleware, embed, embedMany, createMCPClient as experimental_createMCPClient, experimental_createProviderRegistry, experimental_customProvider, generateImage as experimental_generateImage, generateSpeech as experimental_generateSpeech, transcribe as experimental_transcribe, experimental_wrapLanguageModel, extractReasoningMiddleware, generateObject, generateText, pipeDataStreamToResponse, simulateReadableStream, simulateStreamingMiddleware, smoothStream, streamObject, streamText, tool, wrapLanguageModel };
|
package/dist/index.d.ts
CHANGED
@@ -2,7 +2,7 @@ import { IDGenerator } from '@ai-sdk/provider-utils';
|
|
2
2
|
export { CoreToolCall, CoreToolResult, IDGenerator, ToolCall, ToolResult, createIdGenerator, generateId } from '@ai-sdk/provider-utils';
|
3
3
|
import { DataStreamString, Message, Schema, DeepPartial, JSONValue as JSONValue$1, AssistantMessage, DataMessage } from '@ai-sdk/ui-utils';
|
4
4
|
export { AssistantMessage, AssistantStatus, Attachment, ChatRequest, ChatRequestOptions, CreateMessage, DataMessage, DataStreamPart, DeepPartial, IdGenerator, JSONValue, Message, RequestOptions, Schema, ToolInvocation, UIMessage, UseAssistantOptions, formatAssistantStreamPart, formatDataStreamPart, jsonSchema, parseAssistantStreamPart, parseDataStreamPart, processDataStream, processTextStream, zodSchema } from '@ai-sdk/ui-utils';
|
5
|
-
import { LanguageModelV1, LanguageModelV1FinishReason, LanguageModelV1LogProbs, LanguageModelV1CallWarning, LanguageModelV1Source, JSONValue, EmbeddingModelV1, EmbeddingModelV1Embedding, ImageModelV1, ImageModelV1CallWarning, LanguageModelV1ProviderMetadata, LanguageModelV1CallOptions, AISDKError, LanguageModelV1FunctionToolCall, JSONSchema7, JSONParseError, TypeValidationError,
|
5
|
+
import { LanguageModelV1, LanguageModelV1FinishReason, LanguageModelV1LogProbs, LanguageModelV1CallWarning, LanguageModelV1Source, JSONValue, EmbeddingModelV1, EmbeddingModelV1Embedding, ImageModelV1, ImageModelV1CallWarning, LanguageModelV1ProviderMetadata, TranscriptionModelV1, TranscriptionModelV1CallWarning, SpeechModelV1, SpeechModelV1CallWarning, LanguageModelV1CallOptions, AISDKError, LanguageModelV1FunctionToolCall, JSONSchema7, JSONParseError, TypeValidationError, ProviderV1, NoSuchModelError } from '@ai-sdk/provider';
|
6
6
|
export { AISDKError, APICallError, EmptyResponseBodyError, InvalidPromptError, InvalidResponseDataError, JSONParseError, LanguageModelV1, LanguageModelV1CallOptions, LanguageModelV1Prompt, LanguageModelV1StreamPart, LoadAPIKeyError, NoContentGeneratedError, NoSuchModelError, TypeValidationError, UnsupportedFunctionalityError } from '@ai-sdk/provider';
|
7
7
|
import { ServerResponse } from 'node:http';
|
8
8
|
import { AttributeValue, Tracer } from '@opentelemetry/api';
|
@@ -276,6 +276,56 @@ type EmbeddingModelUsage = {
|
|
276
276
|
tokens: number;
|
277
277
|
};
|
278
278
|
|
279
|
+
/**
|
280
|
+
Transcription model that is used by the AI SDK Core functions.
|
281
|
+
*/
|
282
|
+
type TranscriptionModel = TranscriptionModelV1;
|
283
|
+
/**
|
284
|
+
Warning from the model provider for this call. The call will proceed, but e.g.
|
285
|
+
some settings might not be supported, which can lead to suboptimal results.
|
286
|
+
*/
|
287
|
+
type TranscriptionWarning = TranscriptionModelV1CallWarning;
|
288
|
+
|
289
|
+
type TranscriptionModelResponseMetadata = {
|
290
|
+
/**
|
291
|
+
Timestamp for the start of the generated response.
|
292
|
+
*/
|
293
|
+
timestamp: Date;
|
294
|
+
/**
|
295
|
+
The ID of the response model that was used to generate the response.
|
296
|
+
*/
|
297
|
+
modelId: string;
|
298
|
+
/**
|
299
|
+
Response headers.
|
300
|
+
*/
|
301
|
+
headers?: Record<string, string>;
|
302
|
+
};
|
303
|
+
|
304
|
+
/**
|
305
|
+
Speech model that is used by the AI SDK Core functions.
|
306
|
+
*/
|
307
|
+
type SpeechModel = SpeechModelV1;
|
308
|
+
/**
|
309
|
+
Warning from the model provider for this call. The call will proceed, but e.g.
|
310
|
+
some settings might not be supported, which can lead to suboptimal results.
|
311
|
+
*/
|
312
|
+
type SpeechWarning = SpeechModelV1CallWarning;
|
313
|
+
|
314
|
+
type SpeechModelResponseMetadata = {
|
315
|
+
/**
|
316
|
+
Timestamp for the start of the generated response.
|
317
|
+
*/
|
318
|
+
timestamp: Date;
|
319
|
+
/**
|
320
|
+
The ID of the response model that was used to generate the response.
|
321
|
+
*/
|
322
|
+
modelId: string;
|
323
|
+
/**
|
324
|
+
Response headers.
|
325
|
+
*/
|
326
|
+
headers?: Record<string, string>;
|
327
|
+
};
|
328
|
+
|
279
329
|
/**
|
280
330
|
The result of an `embed` call.
|
281
331
|
It contains the embedding, the value, and additional information.
|
@@ -3793,25 +3843,109 @@ Callback that is called when the LLM response and the final object validation ar
|
|
3793
3843
|
}): StreamObjectResult<JSONValue, JSONValue, never>;
|
3794
3844
|
|
3795
3845
|
/**
|
3796
|
-
|
3797
|
-
|
3798
|
-
|
3799
|
-
|
3846
|
+
* A generated audio file.
|
3847
|
+
*/
|
3848
|
+
interface GeneratedAudioFile extends GeneratedFile {
|
3849
|
+
/**
|
3850
|
+
* Audio format of the file (e.g., 'mp3', 'wav', etc.)
|
3851
|
+
*/
|
3852
|
+
readonly format: string;
|
3853
|
+
}
|
3800
3854
|
|
3801
|
-
|
3855
|
+
/**
|
3856
|
+
The result of a `generateSpeech` call.
|
3857
|
+
It contains the audio data and additional information.
|
3858
|
+
*/
|
3859
|
+
interface SpeechResult {
|
3802
3860
|
/**
|
3803
|
-
|
3861
|
+
* The audio data as a base64 encoded string or binary data.
|
3804
3862
|
*/
|
3805
|
-
|
3863
|
+
readonly audio: GeneratedAudioFile;
|
3806
3864
|
/**
|
3807
|
-
|
3865
|
+
Warnings for the call, e.g. unsupported settings.
|
3866
|
+
*/
|
3867
|
+
readonly warnings: Array<SpeechWarning>;
|
3868
|
+
/**
|
3869
|
+
Response metadata from the provider. There may be multiple responses if we made multiple calls to the model.
|
3808
3870
|
*/
|
3809
|
-
|
3871
|
+
readonly responses: Array<SpeechModelResponseMetadata>;
|
3810
3872
|
/**
|
3811
|
-
|
3873
|
+
Provider metadata from the provider.
|
3812
3874
|
*/
|
3875
|
+
readonly providerMetadata: Record<string, Record<string, JSONValue>>;
|
3876
|
+
}
|
3877
|
+
|
3878
|
+
/**
|
3879
|
+
Generates speech audio using a speech model.
|
3880
|
+
|
3881
|
+
@param model - The speech model to use.
|
3882
|
+
@param text - The text to convert to speech.
|
3883
|
+
@param voice - The voice to use for speech generation.
|
3884
|
+
@param outputFormat - The output format to use for speech generation e.g. "mp3", "wav", etc.
|
3885
|
+
@param instructions - Instructions for the speech generation e.g. "Speak in a slow and steady tone".
|
3886
|
+
@param speed - The speed of the speech generation.
|
3887
|
+
@param providerOptions - Additional provider-specific options that are passed through to the provider
|
3888
|
+
as body parameters.
|
3889
|
+
@param maxRetries - Maximum number of retries. Set to 0 to disable retries. Default: 2.
|
3890
|
+
@param abortSignal - An optional abort signal that can be used to cancel the call.
|
3891
|
+
@param headers - Additional HTTP headers to be sent with the request. Only applicable for HTTP-based providers.
|
3892
|
+
|
3893
|
+
@returns A result object that contains the generated audio data.
|
3894
|
+
*/
|
3895
|
+
declare function generateSpeech({ model, text, voice, outputFormat, instructions, speed, providerOptions, maxRetries: maxRetriesArg, abortSignal, headers, }: {
|
3896
|
+
/**
|
3897
|
+
The speech model to use.
|
3898
|
+
*/
|
3899
|
+
model: SpeechModelV1;
|
3900
|
+
/**
|
3901
|
+
The text to convert to speech.
|
3902
|
+
*/
|
3903
|
+
text: string;
|
3904
|
+
/**
|
3905
|
+
The voice to use for speech generation.
|
3906
|
+
*/
|
3907
|
+
voice?: string;
|
3908
|
+
/**
|
3909
|
+
* The desired output format for the audio e.g. "mp3", "wav", etc.
|
3910
|
+
*/
|
3911
|
+
outputFormat?: 'mp3' | 'wav' | (string & {});
|
3912
|
+
/**
|
3913
|
+
Instructions for the speech generation e.g. "Speak in a slow and steady tone".
|
3914
|
+
*/
|
3915
|
+
instructions?: string;
|
3916
|
+
/**
|
3917
|
+
The speed of the speech generation.
|
3918
|
+
*/
|
3919
|
+
speed?: number;
|
3920
|
+
/**
|
3921
|
+
Additional provider-specific options that are passed through to the provider
|
3922
|
+
as body parameters.
|
3923
|
+
|
3924
|
+
The outer record is keyed by the provider name, and the inner
|
3925
|
+
record is keyed by the provider-specific metadata key.
|
3926
|
+
```ts
|
3927
|
+
{
|
3928
|
+
"openai": {}
|
3929
|
+
}
|
3930
|
+
```
|
3931
|
+
*/
|
3932
|
+
providerOptions?: ProviderOptions;
|
3933
|
+
/**
|
3934
|
+
Maximum number of retries per speech model call. Set to 0 to disable retries.
|
3935
|
+
|
3936
|
+
@default 2
|
3937
|
+
*/
|
3938
|
+
maxRetries?: number;
|
3939
|
+
/**
|
3940
|
+
Abort signal.
|
3941
|
+
*/
|
3942
|
+
abortSignal?: AbortSignal;
|
3943
|
+
/**
|
3944
|
+
Additional headers to include in the request.
|
3945
|
+
Only applicable for HTTP-based providers.
|
3946
|
+
*/
|
3813
3947
|
headers?: Record<string, string>;
|
3814
|
-
}
|
3948
|
+
}): Promise<SpeechResult>;
|
3815
3949
|
|
3816
3950
|
/**
|
3817
3951
|
The result of a `transcribe` call.
|
@@ -4521,4 +4655,4 @@ declare namespace llamaindexAdapter {
|
|
4521
4655
|
};
|
4522
4656
|
}
|
4523
4657
|
|
4524
|
-
export { AssistantContent, AssistantResponse, CallWarning, ChunkDetector, CoreAssistantMessage, CoreMessage, CoreSystemMessage, CoreTool, CoreToolCallUnion, CoreToolChoice, CoreToolMessage, CoreToolResultUnion, CoreUserMessage, DataContent, DataStreamOptions, DataStreamWriter, DownloadError, EmbedManyResult, EmbedResult, Embedding, EmbeddingModel, EmbeddingModelUsage, GenerateImageResult as Experimental_GenerateImageResult, GeneratedFile as Experimental_GeneratedImage, Experimental_LanguageModelV1Middleware, TranscriptionResult as Experimental_TranscriptionResult, FilePart, FinishReason, GenerateObjectResult, GenerateTextOnStepFinishCallback, GenerateTextResult, GeneratedFile, ImageModel, ImageGenerationWarning as ImageModelCallWarning, ImageModelResponseMetadata, ImagePart, InvalidArgumentError, InvalidDataContentError, InvalidMessageRoleError, InvalidStreamPartError, InvalidToolArgumentsError, JSONRPCError, JSONRPCMessage, JSONRPCNotification, JSONRPCRequest, JSONRPCResponse, langchainAdapter as LangChainAdapter, LanguageModel, LanguageModelRequestMetadata, LanguageModelResponseMetadata, LanguageModelUsage, LanguageModelV1Middleware, llamaindexAdapter as LlamaIndexAdapter, LogProbs, MCPClientError, MCPTransport, MessageConversionError, NoImageGeneratedError, NoObjectGeneratedError, NoOutputSpecifiedError, NoSuchProviderError, NoSuchToolError, ObjectStreamPart, output as Output, Provider, ProviderMetadata, ProviderRegistryProvider, RepairTextFunction, RetryError, StepResult, StreamData, StreamObjectOnFinishCallback, StreamObjectResult, StreamTextOnChunkCallback, StreamTextOnErrorCallback, StreamTextOnFinishCallback, StreamTextOnStepFinishCallback, StreamTextResult, StreamTextTransform, TelemetrySettings, TextPart, TextStreamPart, Tool, ToolCallPart, ToolCallRepairError, ToolCallRepairFunction, ToolCallUnion, ToolChoice, ToolContent, ToolExecutionError, ToolExecutionOptions, ToolResultPart, ToolResultUnion, ToolSet, UserContent, appendClientMessage, appendResponseMessages, convertToCoreMessages, coreAssistantMessageSchema, coreMessageSchema, coreSystemMessageSchema, coreToolMessageSchema, coreUserMessageSchema, cosineSimilarity, createDataStream, createDataStreamResponse, createProviderRegistry, customProvider, defaultSettingsMiddleware, embed, embedMany, createMCPClient as experimental_createMCPClient, experimental_createProviderRegistry, experimental_customProvider, generateImage as experimental_generateImage, transcribe as experimental_transcribe, experimental_wrapLanguageModel, extractReasoningMiddleware, generateObject, generateText, pipeDataStreamToResponse, simulateReadableStream, simulateStreamingMiddleware, smoothStream, streamObject, streamText, tool, wrapLanguageModel };
|
4658
|
+
export { AssistantContent, AssistantResponse, CallWarning, ChunkDetector, CoreAssistantMessage, CoreMessage, CoreSystemMessage, CoreTool, CoreToolCallUnion, CoreToolChoice, CoreToolMessage, CoreToolResultUnion, CoreUserMessage, DataContent, DataStreamOptions, DataStreamWriter, DownloadError, EmbedManyResult, EmbedResult, Embedding, EmbeddingModel, EmbeddingModelUsage, GenerateImageResult as Experimental_GenerateImageResult, GeneratedFile as Experimental_GeneratedImage, Experimental_LanguageModelV1Middleware, SpeechResult as Experimental_SpeechResult, TranscriptionResult as Experimental_TranscriptionResult, FilePart, FinishReason, GenerateObjectResult, GenerateTextOnStepFinishCallback, GenerateTextResult, GeneratedAudioFile, GeneratedFile, ImageModel, ImageGenerationWarning as ImageModelCallWarning, ImageModelResponseMetadata, ImagePart, InvalidArgumentError, InvalidDataContentError, InvalidMessageRoleError, InvalidStreamPartError, InvalidToolArgumentsError, JSONRPCError, JSONRPCMessage, JSONRPCNotification, JSONRPCRequest, JSONRPCResponse, langchainAdapter as LangChainAdapter, LanguageModel, LanguageModelRequestMetadata, LanguageModelResponseMetadata, LanguageModelUsage, LanguageModelV1Middleware, llamaindexAdapter as LlamaIndexAdapter, LogProbs, MCPClientError, MCPTransport, MessageConversionError, NoImageGeneratedError, NoObjectGeneratedError, NoOutputSpecifiedError, NoSuchProviderError, NoSuchToolError, ObjectStreamPart, output as Output, Provider, ProviderMetadata, ProviderRegistryProvider, RepairTextFunction, RetryError, SpeechModel, SpeechModelResponseMetadata, SpeechWarning, StepResult, StreamData, StreamObjectOnFinishCallback, StreamObjectResult, StreamTextOnChunkCallback, StreamTextOnErrorCallback, StreamTextOnFinishCallback, StreamTextOnStepFinishCallback, StreamTextResult, StreamTextTransform, TelemetrySettings, TextPart, TextStreamPart, Tool, ToolCallPart, ToolCallRepairError, ToolCallRepairFunction, ToolCallUnion, ToolChoice, ToolContent, ToolExecutionError, ToolExecutionOptions, ToolResultPart, ToolResultUnion, ToolSet, TranscriptionModel, TranscriptionModelResponseMetadata, TranscriptionWarning, UserContent, appendClientMessage, appendResponseMessages, convertToCoreMessages, coreAssistantMessageSchema, coreMessageSchema, coreSystemMessageSchema, coreToolMessageSchema, coreUserMessageSchema, cosineSimilarity, createDataStream, createDataStreamResponse, createProviderRegistry, customProvider, defaultSettingsMiddleware, embed, embedMany, createMCPClient as experimental_createMCPClient, experimental_createProviderRegistry, experimental_customProvider, generateImage as experimental_generateImage, generateSpeech as experimental_generateSpeech, transcribe as experimental_transcribe, experimental_wrapLanguageModel, extractReasoningMiddleware, generateObject, generateText, pipeDataStreamToResponse, simulateReadableStream, simulateStreamingMiddleware, smoothStream, streamObject, streamText, tool, wrapLanguageModel };
|
package/dist/index.js
CHANGED
@@ -73,6 +73,7 @@ __export(streams_exports, {
|
|
73
73
|
experimental_createProviderRegistry: () => experimental_createProviderRegistry,
|
74
74
|
experimental_customProvider: () => experimental_customProvider,
|
75
75
|
experimental_generateImage: () => generateImage,
|
76
|
+
experimental_generateSpeech: () => generateSpeech,
|
76
77
|
experimental_transcribe: () => transcribe,
|
77
78
|
experimental_wrapLanguageModel: () => experimental_wrapLanguageModel,
|
78
79
|
extractReasoningMiddleware: () => extractReasoningMiddleware,
|
@@ -6335,9 +6336,99 @@ var DefaultStreamTextResult = class {
|
|
6335
6336
|
}
|
6336
6337
|
};
|
6337
6338
|
|
6338
|
-
// errors/no-
|
6339
|
+
// errors/no-speech-generated-error.ts
|
6339
6340
|
var import_provider23 = require("@ai-sdk/provider");
|
6340
|
-
var
|
6341
|
+
var NoSpeechGeneratedError = class extends import_provider23.AISDKError {
|
6342
|
+
constructor(options) {
|
6343
|
+
super({
|
6344
|
+
name: "AI_NoSpeechGeneratedError",
|
6345
|
+
message: "No speech audio generated."
|
6346
|
+
});
|
6347
|
+
this.responses = options.responses;
|
6348
|
+
}
|
6349
|
+
};
|
6350
|
+
|
6351
|
+
// core/generate-speech/generated-audio-file.ts
|
6352
|
+
var DefaultGeneratedAudioFile = class extends DefaultGeneratedFile {
|
6353
|
+
constructor({
|
6354
|
+
data,
|
6355
|
+
mimeType
|
6356
|
+
}) {
|
6357
|
+
super({ data, mimeType });
|
6358
|
+
let format = "mp3";
|
6359
|
+
if (mimeType) {
|
6360
|
+
const mimeTypeParts = mimeType.split("/");
|
6361
|
+
if (mimeTypeParts.length === 2) {
|
6362
|
+
if (mimeType !== "audio/mpeg") {
|
6363
|
+
format = mimeTypeParts[1];
|
6364
|
+
}
|
6365
|
+
}
|
6366
|
+
}
|
6367
|
+
if (!format) {
|
6368
|
+
throw new Error(
|
6369
|
+
"Audio format must be provided or determinable from mimeType"
|
6370
|
+
);
|
6371
|
+
}
|
6372
|
+
this.format = format;
|
6373
|
+
}
|
6374
|
+
};
|
6375
|
+
|
6376
|
+
// core/generate-speech/generate-speech.ts
|
6377
|
+
async function generateSpeech({
|
6378
|
+
model,
|
6379
|
+
text: text2,
|
6380
|
+
voice,
|
6381
|
+
outputFormat,
|
6382
|
+
instructions,
|
6383
|
+
speed,
|
6384
|
+
providerOptions = {},
|
6385
|
+
maxRetries: maxRetriesArg,
|
6386
|
+
abortSignal,
|
6387
|
+
headers
|
6388
|
+
}) {
|
6389
|
+
var _a17;
|
6390
|
+
const { retry } = prepareRetries({ maxRetries: maxRetriesArg });
|
6391
|
+
const result = await retry(
|
6392
|
+
() => model.doGenerate({
|
6393
|
+
text: text2,
|
6394
|
+
voice,
|
6395
|
+
outputFormat,
|
6396
|
+
instructions,
|
6397
|
+
speed,
|
6398
|
+
abortSignal,
|
6399
|
+
headers,
|
6400
|
+
providerOptions
|
6401
|
+
})
|
6402
|
+
);
|
6403
|
+
if (!result.audio || result.audio.length === 0) {
|
6404
|
+
throw new NoSpeechGeneratedError({ responses: [result.response] });
|
6405
|
+
}
|
6406
|
+
return new DefaultSpeechResult({
|
6407
|
+
audio: new DefaultGeneratedAudioFile({
|
6408
|
+
data: result.audio,
|
6409
|
+
mimeType: (_a17 = detectMimeType({
|
6410
|
+
data: result.audio,
|
6411
|
+
signatures: audioMimeTypeSignatures
|
6412
|
+
})) != null ? _a17 : "audio/mp3"
|
6413
|
+
}),
|
6414
|
+
warnings: result.warnings,
|
6415
|
+
responses: [result.response],
|
6416
|
+
providerMetadata: result.providerMetadata
|
6417
|
+
});
|
6418
|
+
}
|
6419
|
+
var DefaultSpeechResult = class {
|
6420
|
+
constructor(options) {
|
6421
|
+
var _a17;
|
6422
|
+
this.audio = options.audio;
|
6423
|
+
this.warnings = options.warnings;
|
6424
|
+
this.responses = options.responses;
|
6425
|
+
this.providerMetadata = (_a17 = options.providerMetadata) != null ? _a17 : {};
|
6426
|
+
}
|
6427
|
+
};
|
6428
|
+
|
6429
|
+
// errors/no-transcript-generated-error.ts
|
6430
|
+
var import_provider24 = require("@ai-sdk/provider");
|
6431
|
+
var NoTranscriptGeneratedError = class extends import_provider24.AISDKError {
|
6341
6432
|
constructor(options) {
|
6342
6433
|
super({
|
6343
6434
|
name: "AI_NoTranscriptGeneratedError",
|
@@ -6714,7 +6805,7 @@ function appendClientMessage({
|
|
6714
6805
|
|
6715
6806
|
// core/prompt/append-response-messages.ts
|
6716
6807
|
var import_ui_utils9 = require("@ai-sdk/ui-utils");
|
6717
|
-
var
|
6808
|
+
var import_provider25 = require("@ai-sdk/provider");
|
6718
6809
|
function appendResponseMessages({
|
6719
6810
|
messages,
|
6720
6811
|
responseMessages,
|
@@ -6797,7 +6888,7 @@ function appendResponseMessages({
|
|
6797
6888
|
break;
|
6798
6889
|
case "file":
|
6799
6890
|
if (part.data instanceof URL) {
|
6800
|
-
throw new
|
6891
|
+
throw new import_provider25.AISDKError({
|
6801
6892
|
name: "InvalidAssistantFileData",
|
6802
6893
|
message: "File data cannot be a URL"
|
6803
6894
|
});
|
@@ -6891,7 +6982,7 @@ function appendResponseMessages({
|
|
6891
6982
|
}
|
6892
6983
|
|
6893
6984
|
// core/registry/custom-provider.ts
|
6894
|
-
var
|
6985
|
+
var import_provider26 = require("@ai-sdk/provider");
|
6895
6986
|
function customProvider({
|
6896
6987
|
languageModels,
|
6897
6988
|
textEmbeddingModels,
|
@@ -6906,7 +6997,7 @@ function customProvider({
|
|
6906
6997
|
if (fallbackProvider) {
|
6907
6998
|
return fallbackProvider.languageModel(modelId);
|
6908
6999
|
}
|
6909
|
-
throw new
|
7000
|
+
throw new import_provider26.NoSuchModelError({ modelId, modelType: "languageModel" });
|
6910
7001
|
},
|
6911
7002
|
textEmbeddingModel(modelId) {
|
6912
7003
|
if (textEmbeddingModels != null && modelId in textEmbeddingModels) {
|
@@ -6915,7 +7006,7 @@ function customProvider({
|
|
6915
7006
|
if (fallbackProvider) {
|
6916
7007
|
return fallbackProvider.textEmbeddingModel(modelId);
|
6917
7008
|
}
|
6918
|
-
throw new
|
7009
|
+
throw new import_provider26.NoSuchModelError({ modelId, modelType: "textEmbeddingModel" });
|
6919
7010
|
},
|
6920
7011
|
imageModel(modelId) {
|
6921
7012
|
if (imageModels != null && modelId in imageModels) {
|
@@ -6924,19 +7015,19 @@ function customProvider({
|
|
6924
7015
|
if (fallbackProvider == null ? void 0 : fallbackProvider.imageModel) {
|
6925
7016
|
return fallbackProvider.imageModel(modelId);
|
6926
7017
|
}
|
6927
|
-
throw new
|
7018
|
+
throw new import_provider26.NoSuchModelError({ modelId, modelType: "imageModel" });
|
6928
7019
|
}
|
6929
7020
|
};
|
6930
7021
|
}
|
6931
7022
|
var experimental_customProvider = customProvider;
|
6932
7023
|
|
6933
7024
|
// core/registry/no-such-provider-error.ts
|
6934
|
-
var
|
7025
|
+
var import_provider27 = require("@ai-sdk/provider");
|
6935
7026
|
var name16 = "AI_NoSuchProviderError";
|
6936
7027
|
var marker16 = `vercel.ai.error.${name16}`;
|
6937
7028
|
var symbol16 = Symbol.for(marker16);
|
6938
7029
|
var _a16;
|
6939
|
-
var NoSuchProviderError = class extends
|
7030
|
+
var NoSuchProviderError = class extends import_provider27.NoSuchModelError {
|
6940
7031
|
constructor({
|
6941
7032
|
modelId,
|
6942
7033
|
modelType,
|
@@ -6950,13 +7041,13 @@ var NoSuchProviderError = class extends import_provider26.NoSuchModelError {
|
|
6950
7041
|
this.availableProviders = availableProviders;
|
6951
7042
|
}
|
6952
7043
|
static isInstance(error) {
|
6953
|
-
return
|
7044
|
+
return import_provider27.AISDKError.hasMarker(error, marker16);
|
6954
7045
|
}
|
6955
7046
|
};
|
6956
7047
|
_a16 = symbol16;
|
6957
7048
|
|
6958
7049
|
// core/registry/provider-registry.ts
|
6959
|
-
var
|
7050
|
+
var import_provider28 = require("@ai-sdk/provider");
|
6960
7051
|
function createProviderRegistry(providers, {
|
6961
7052
|
separator = ":"
|
6962
7053
|
} = {}) {
|
@@ -6995,20 +7086,20 @@ var DefaultProviderRegistry = class {
|
|
6995
7086
|
splitId(id, modelType) {
|
6996
7087
|
const index = id.indexOf(this.separator);
|
6997
7088
|
if (index === -1) {
|
6998
|
-
throw new
|
7089
|
+
throw new import_provider28.NoSuchModelError({
|
6999
7090
|
modelId: id,
|
7000
7091
|
modelType,
|
7001
7092
|
message: `Invalid ${modelType} id for registry: ${id} (must be in the format "providerId${this.separator}modelId")`
|
7002
7093
|
});
|
7003
7094
|
}
|
7004
|
-
return [id.slice(0, index), id.slice(index +
|
7095
|
+
return [id.slice(0, index), id.slice(index + this.separator.length)];
|
7005
7096
|
}
|
7006
7097
|
languageModel(id) {
|
7007
7098
|
var _a17, _b;
|
7008
7099
|
const [providerId, modelId] = this.splitId(id, "languageModel");
|
7009
7100
|
const model = (_b = (_a17 = this.getProvider(providerId)).languageModel) == null ? void 0 : _b.call(_a17, modelId);
|
7010
7101
|
if (model == null) {
|
7011
|
-
throw new
|
7102
|
+
throw new import_provider28.NoSuchModelError({ modelId: id, modelType: "languageModel" });
|
7012
7103
|
}
|
7013
7104
|
return model;
|
7014
7105
|
}
|
@@ -7018,7 +7109,7 @@ var DefaultProviderRegistry = class {
|
|
7018
7109
|
const provider = this.getProvider(providerId);
|
7019
7110
|
const model = (_a17 = provider.textEmbeddingModel) == null ? void 0 : _a17.call(provider, modelId);
|
7020
7111
|
if (model == null) {
|
7021
|
-
throw new
|
7112
|
+
throw new import_provider28.NoSuchModelError({
|
7022
7113
|
modelId: id,
|
7023
7114
|
modelType: "textEmbeddingModel"
|
7024
7115
|
});
|
@@ -7031,7 +7122,7 @@ var DefaultProviderRegistry = class {
|
|
7031
7122
|
const provider = this.getProvider(providerId);
|
7032
7123
|
const model = (_a17 = provider.imageModel) == null ? void 0 : _a17.call(provider, modelId);
|
7033
7124
|
if (model == null) {
|
7034
|
-
throw new
|
7125
|
+
throw new import_provider28.NoSuchModelError({ modelId: id, modelType: "imageModel" });
|
7035
7126
|
}
|
7036
7127
|
return model;
|
7037
7128
|
}
|
@@ -8038,6 +8129,7 @@ var StreamData = class {
|
|
8038
8129
|
experimental_createProviderRegistry,
|
8039
8130
|
experimental_customProvider,
|
8040
8131
|
experimental_generateImage,
|
8132
|
+
experimental_generateSpeech,
|
8041
8133
|
experimental_transcribe,
|
8042
8134
|
experimental_wrapLanguageModel,
|
8043
8135
|
extractReasoningMiddleware,
|