ai 4.3.2 → 4.3.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,28 @@
1
1
  # ai
2
2
 
3
+ ## 4.3.4
4
+
5
+ ### Patch Changes
6
+
7
+ - 013faa8: core (ai): change transcription model mimeType to mediaType
8
+ - Updated dependencies [013faa8]
9
+ - @ai-sdk/provider@1.1.2
10
+ - @ai-sdk/provider-utils@2.2.6
11
+ - @ai-sdk/ui-utils@1.2.7
12
+ - @ai-sdk/react@1.2.8
13
+
14
+ ## 4.3.3
15
+
16
+ ### Patch Changes
17
+
18
+ - 3e88f4d: fix (ai/mcp): prevent mutation of customEnv
19
+ - c21fa6d: feat: add transcription with experimental_transcribe
20
+ - Updated dependencies [c21fa6d]
21
+ - @ai-sdk/provider-utils@2.2.5
22
+ - @ai-sdk/provider@1.1.1
23
+ - @ai-sdk/react@1.2.7
24
+ - @ai-sdk/ui-utils@1.2.6
25
+
3
26
  ## 4.3.2
4
27
 
5
28
  ### Patch Changes
package/dist/index.d.mts CHANGED
@@ -2,7 +2,7 @@ import { IDGenerator } from '@ai-sdk/provider-utils';
2
2
  export { CoreToolCall, CoreToolResult, IDGenerator, ToolCall, ToolResult, createIdGenerator, generateId } from '@ai-sdk/provider-utils';
3
3
  import { DataStreamString, Message, Schema, DeepPartial, JSONValue as JSONValue$1, AssistantMessage, DataMessage } from '@ai-sdk/ui-utils';
4
4
  export { AssistantMessage, AssistantStatus, Attachment, ChatRequest, ChatRequestOptions, CreateMessage, DataMessage, DataStreamPart, DeepPartial, IdGenerator, JSONValue, Message, RequestOptions, Schema, ToolInvocation, UIMessage, UseAssistantOptions, formatAssistantStreamPart, formatDataStreamPart, jsonSchema, parseAssistantStreamPart, parseDataStreamPart, processDataStream, processTextStream, zodSchema } from '@ai-sdk/ui-utils';
5
- import { LanguageModelV1, LanguageModelV1FinishReason, LanguageModelV1LogProbs, LanguageModelV1CallWarning, LanguageModelV1Source, JSONValue, EmbeddingModelV1, EmbeddingModelV1Embedding, ImageModelV1, ImageModelV1CallWarning, LanguageModelV1ProviderMetadata, LanguageModelV1CallOptions, AISDKError, LanguageModelV1FunctionToolCall, JSONSchema7, JSONParseError, TypeValidationError, ProviderV1, NoSuchModelError } from '@ai-sdk/provider';
5
+ import { LanguageModelV1, LanguageModelV1FinishReason, LanguageModelV1LogProbs, LanguageModelV1CallWarning, LanguageModelV1Source, JSONValue, EmbeddingModelV1, EmbeddingModelV1Embedding, ImageModelV1, ImageModelV1CallWarning, LanguageModelV1ProviderMetadata, LanguageModelV1CallOptions, AISDKError, LanguageModelV1FunctionToolCall, JSONSchema7, JSONParseError, TypeValidationError, TranscriptionModelV1CallWarning, TranscriptionModelV1, ProviderV1, NoSuchModelError } from '@ai-sdk/provider';
6
6
  export { AISDKError, APICallError, EmptyResponseBodyError, InvalidPromptError, InvalidResponseDataError, JSONParseError, LanguageModelV1, LanguageModelV1CallOptions, LanguageModelV1Prompt, LanguageModelV1StreamPart, LoadAPIKeyError, NoContentGeneratedError, NoSuchModelError, TypeValidationError, UnsupportedFunctionalityError } from '@ai-sdk/provider';
7
7
  import { ServerResponse } from 'node:http';
8
8
  import { AttributeValue, Tracer } from '@opentelemetry/api';
@@ -3792,6 +3792,132 @@ Callback that is called when the LLM response and the final object validation ar
3792
3792
  };
3793
3793
  }): StreamObjectResult<JSONValue, JSONValue, never>;
3794
3794
 
3795
+ /**
3796
+ Warning from the model provider for this call. The call will proceed, but e.g.
3797
+ some settings might not be supported, which can lead to suboptimal results.
3798
+ */
3799
+ type TranscriptionWarning = TranscriptionModelV1CallWarning;
3800
+
3801
+ type TranscriptionModelResponseMetadata = {
3802
+ /**
3803
+ Timestamp for the start of the generated response.
3804
+ */
3805
+ timestamp: Date;
3806
+ /**
3807
+ The ID of the response model that was used to generate the response.
3808
+ */
3809
+ modelId: string;
3810
+ /**
3811
+ Response headers.
3812
+ */
3813
+ headers?: Record<string, string>;
3814
+ };
3815
+
3816
+ /**
3817
+ The result of a `transcribe` call.
3818
+ It contains the transcript and additional information.
3819
+ */
3820
+ interface TranscriptionResult {
3821
+ /**
3822
+ * The complete transcribed text from the audio.
3823
+ */
3824
+ readonly text: string;
3825
+ /**
3826
+ * Array of transcript segments with timing information.
3827
+ * Each segment represents a portion of the transcribed text with start and end times.
3828
+ */
3829
+ readonly segments: Array<{
3830
+ /**
3831
+ * The text content of this segment.
3832
+ */
3833
+ readonly text: string;
3834
+ /**
3835
+ * The start time of this segment in seconds.
3836
+ */
3837
+ readonly startSecond: number;
3838
+ /**
3839
+ * The end time of this segment in seconds.
3840
+ */
3841
+ readonly endSecond: number;
3842
+ }>;
3843
+ /**
3844
+ * The detected language of the audio content, as an ISO-639-1 code (e.g., 'en' for English).
3845
+ * May be undefined if the language couldn't be detected.
3846
+ */
3847
+ readonly language: string | undefined;
3848
+ /**
3849
+ * The total duration of the audio file in seconds.
3850
+ * May be undefined if the duration couldn't be determined.
3851
+ */
3852
+ readonly durationInSeconds: number | undefined;
3853
+ /**
3854
+ Warnings for the call, e.g. unsupported settings.
3855
+ */
3856
+ readonly warnings: Array<TranscriptionWarning>;
3857
+ /**
3858
+ Response metadata from the provider. There may be multiple responses if we made multiple calls to the model.
3859
+ */
3860
+ readonly responses: Array<TranscriptionModelResponseMetadata>;
3861
+ /**
3862
+ Provider metadata from the provider.
3863
+ */
3864
+ readonly providerMetadata: Record<string, Record<string, JSONValue>>;
3865
+ }
3866
+
3867
+ /**
3868
+ Generates transcripts using a transcription model.
3869
+
3870
+ @param model - The transcription model to use.
3871
+ @param audio - The audio data to transcribe as DataContent (string | Uint8Array | ArrayBuffer | Buffer) or a URL.
3872
+ @param providerOptions - Additional provider-specific options that are passed through to the provider
3873
+ as body parameters.
3874
+ @param maxRetries - Maximum number of retries. Set to 0 to disable retries. Default: 2.
3875
+ @param abortSignal - An optional abort signal that can be used to cancel the call.
3876
+ @param headers - Additional HTTP headers to be sent with the request. Only applicable for HTTP-based providers.
3877
+
3878
+ @returns A result object that contains the generated transcript.
3879
+ */
3880
+ declare function transcribe({ model, audio, providerOptions, maxRetries: maxRetriesArg, abortSignal, headers, }: {
3881
+ /**
3882
+ The transcription model to use.
3883
+ */
3884
+ model: TranscriptionModelV1;
3885
+ /**
3886
+ The audio data to transcribe.
3887
+ */
3888
+ audio: DataContent | URL;
3889
+ /**
3890
+ Additional provider-specific options that are passed through to the provider
3891
+ as body parameters.
3892
+
3893
+ The outer record is keyed by the provider name, and the inner
3894
+ record is keyed by the provider-specific metadata key.
3895
+ ```ts
3896
+ {
3897
+ "openai": {
3898
+ "temperature": 0
3899
+ }
3900
+ }
3901
+ ```
3902
+ */
3903
+ providerOptions?: ProviderOptions;
3904
+ /**
3905
+ Maximum number of retries per transcript model call. Set to 0 to disable retries.
3906
+
3907
+ @default 2
3908
+ */
3909
+ maxRetries?: number;
3910
+ /**
3911
+ Abort signal.
3912
+ */
3913
+ abortSignal?: AbortSignal;
3914
+ /**
3915
+ Additional headers to include in the request.
3916
+ Only applicable for HTTP-based providers.
3917
+ */
3918
+ headers?: Record<string, string>;
3919
+ }): Promise<TranscriptionResult>;
3920
+
3795
3921
  /**
3796
3922
  * Experimental middleware for LanguageModelV1.
3797
3923
  * This type defines the structure for middleware that can be used to modify
@@ -4395,4 +4521,4 @@ declare namespace llamaindexAdapter {
4395
4521
  };
4396
4522
  }
4397
4523
 
4398
- export { AssistantContent, AssistantResponse, CallWarning, ChunkDetector, CoreAssistantMessage, CoreMessage, CoreSystemMessage, CoreTool, CoreToolCallUnion, CoreToolChoice, CoreToolMessage, CoreToolResultUnion, CoreUserMessage, DataContent, DataStreamOptions, DataStreamWriter, DownloadError, EmbedManyResult, EmbedResult, Embedding, EmbeddingModel, EmbeddingModelUsage, GenerateImageResult as Experimental_GenerateImageResult, GeneratedFile as Experimental_GeneratedImage, Experimental_LanguageModelV1Middleware, FilePart, FinishReason, GenerateObjectResult, GenerateTextOnStepFinishCallback, GenerateTextResult, GeneratedFile, ImageModel, ImageGenerationWarning as ImageModelCallWarning, ImageModelResponseMetadata, ImagePart, InvalidArgumentError, InvalidDataContentError, InvalidMessageRoleError, InvalidStreamPartError, InvalidToolArgumentsError, JSONRPCError, JSONRPCMessage, JSONRPCNotification, JSONRPCRequest, JSONRPCResponse, langchainAdapter as LangChainAdapter, LanguageModel, LanguageModelRequestMetadata, LanguageModelResponseMetadata, LanguageModelUsage, LanguageModelV1Middleware, llamaindexAdapter as LlamaIndexAdapter, LogProbs, MCPClientError, MCPTransport, MessageConversionError, NoImageGeneratedError, NoObjectGeneratedError, NoOutputSpecifiedError, NoSuchProviderError, NoSuchToolError, ObjectStreamPart, output as Output, Provider, ProviderMetadata, ProviderRegistryProvider, RepairTextFunction, RetryError, StepResult, StreamData, StreamObjectOnFinishCallback, StreamObjectResult, StreamTextOnChunkCallback, StreamTextOnErrorCallback, StreamTextOnFinishCallback, StreamTextOnStepFinishCallback, StreamTextResult, StreamTextTransform, TelemetrySettings, TextPart, TextStreamPart, Tool, ToolCallPart, ToolCallRepairError, ToolCallRepairFunction, ToolCallUnion, ToolChoice, ToolContent, ToolExecutionError, ToolExecutionOptions, ToolResultPart, ToolResultUnion, ToolSet, UserContent, appendClientMessage, appendResponseMessages, convertToCoreMessages, coreAssistantMessageSchema, coreMessageSchema, coreSystemMessageSchema, coreToolMessageSchema, coreUserMessageSchema, cosineSimilarity, createDataStream, createDataStreamResponse, createProviderRegistry, customProvider, defaultSettingsMiddleware, embed, embedMany, createMCPClient as experimental_createMCPClient, experimental_createProviderRegistry, experimental_customProvider, generateImage as experimental_generateImage, experimental_wrapLanguageModel, extractReasoningMiddleware, generateObject, generateText, pipeDataStreamToResponse, simulateReadableStream, simulateStreamingMiddleware, smoothStream, streamObject, streamText, tool, wrapLanguageModel };
4524
+ export { AssistantContent, AssistantResponse, CallWarning, ChunkDetector, CoreAssistantMessage, CoreMessage, CoreSystemMessage, CoreTool, CoreToolCallUnion, CoreToolChoice, CoreToolMessage, CoreToolResultUnion, CoreUserMessage, DataContent, DataStreamOptions, DataStreamWriter, DownloadError, EmbedManyResult, EmbedResult, Embedding, EmbeddingModel, EmbeddingModelUsage, GenerateImageResult as Experimental_GenerateImageResult, GeneratedFile as Experimental_GeneratedImage, Experimental_LanguageModelV1Middleware, TranscriptionResult as Experimental_TranscriptionResult, FilePart, FinishReason, GenerateObjectResult, GenerateTextOnStepFinishCallback, GenerateTextResult, GeneratedFile, ImageModel, ImageGenerationWarning as ImageModelCallWarning, ImageModelResponseMetadata, ImagePart, InvalidArgumentError, InvalidDataContentError, InvalidMessageRoleError, InvalidStreamPartError, InvalidToolArgumentsError, JSONRPCError, JSONRPCMessage, JSONRPCNotification, JSONRPCRequest, JSONRPCResponse, langchainAdapter as LangChainAdapter, LanguageModel, LanguageModelRequestMetadata, LanguageModelResponseMetadata, LanguageModelUsage, LanguageModelV1Middleware, llamaindexAdapter as LlamaIndexAdapter, LogProbs, MCPClientError, MCPTransport, MessageConversionError, NoImageGeneratedError, NoObjectGeneratedError, NoOutputSpecifiedError, NoSuchProviderError, NoSuchToolError, ObjectStreamPart, output as Output, Provider, ProviderMetadata, ProviderRegistryProvider, RepairTextFunction, RetryError, StepResult, StreamData, StreamObjectOnFinishCallback, StreamObjectResult, StreamTextOnChunkCallback, StreamTextOnErrorCallback, StreamTextOnFinishCallback, StreamTextOnStepFinishCallback, StreamTextResult, StreamTextTransform, TelemetrySettings, TextPart, TextStreamPart, Tool, ToolCallPart, ToolCallRepairError, ToolCallRepairFunction, ToolCallUnion, ToolChoice, ToolContent, ToolExecutionError, ToolExecutionOptions, ToolResultPart, ToolResultUnion, ToolSet, UserContent, appendClientMessage, appendResponseMessages, convertToCoreMessages, coreAssistantMessageSchema, coreMessageSchema, coreSystemMessageSchema, coreToolMessageSchema, coreUserMessageSchema, cosineSimilarity, createDataStream, createDataStreamResponse, createProviderRegistry, customProvider, defaultSettingsMiddleware, embed, embedMany, createMCPClient as experimental_createMCPClient, experimental_createProviderRegistry, experimental_customProvider, generateImage as experimental_generateImage, transcribe as experimental_transcribe, experimental_wrapLanguageModel, extractReasoningMiddleware, generateObject, generateText, pipeDataStreamToResponse, simulateReadableStream, simulateStreamingMiddleware, smoothStream, streamObject, streamText, tool, wrapLanguageModel };
package/dist/index.d.ts CHANGED
@@ -2,7 +2,7 @@ import { IDGenerator } from '@ai-sdk/provider-utils';
2
2
  export { CoreToolCall, CoreToolResult, IDGenerator, ToolCall, ToolResult, createIdGenerator, generateId } from '@ai-sdk/provider-utils';
3
3
  import { DataStreamString, Message, Schema, DeepPartial, JSONValue as JSONValue$1, AssistantMessage, DataMessage } from '@ai-sdk/ui-utils';
4
4
  export { AssistantMessage, AssistantStatus, Attachment, ChatRequest, ChatRequestOptions, CreateMessage, DataMessage, DataStreamPart, DeepPartial, IdGenerator, JSONValue, Message, RequestOptions, Schema, ToolInvocation, UIMessage, UseAssistantOptions, formatAssistantStreamPart, formatDataStreamPart, jsonSchema, parseAssistantStreamPart, parseDataStreamPart, processDataStream, processTextStream, zodSchema } from '@ai-sdk/ui-utils';
5
- import { LanguageModelV1, LanguageModelV1FinishReason, LanguageModelV1LogProbs, LanguageModelV1CallWarning, LanguageModelV1Source, JSONValue, EmbeddingModelV1, EmbeddingModelV1Embedding, ImageModelV1, ImageModelV1CallWarning, LanguageModelV1ProviderMetadata, LanguageModelV1CallOptions, AISDKError, LanguageModelV1FunctionToolCall, JSONSchema7, JSONParseError, TypeValidationError, ProviderV1, NoSuchModelError } from '@ai-sdk/provider';
5
+ import { LanguageModelV1, LanguageModelV1FinishReason, LanguageModelV1LogProbs, LanguageModelV1CallWarning, LanguageModelV1Source, JSONValue, EmbeddingModelV1, EmbeddingModelV1Embedding, ImageModelV1, ImageModelV1CallWarning, LanguageModelV1ProviderMetadata, LanguageModelV1CallOptions, AISDKError, LanguageModelV1FunctionToolCall, JSONSchema7, JSONParseError, TypeValidationError, TranscriptionModelV1CallWarning, TranscriptionModelV1, ProviderV1, NoSuchModelError } from '@ai-sdk/provider';
6
6
  export { AISDKError, APICallError, EmptyResponseBodyError, InvalidPromptError, InvalidResponseDataError, JSONParseError, LanguageModelV1, LanguageModelV1CallOptions, LanguageModelV1Prompt, LanguageModelV1StreamPart, LoadAPIKeyError, NoContentGeneratedError, NoSuchModelError, TypeValidationError, UnsupportedFunctionalityError } from '@ai-sdk/provider';
7
7
  import { ServerResponse } from 'node:http';
8
8
  import { AttributeValue, Tracer } from '@opentelemetry/api';
@@ -3792,6 +3792,132 @@ Callback that is called when the LLM response and the final object validation ar
3792
3792
  };
3793
3793
  }): StreamObjectResult<JSONValue, JSONValue, never>;
3794
3794
 
3795
+ /**
3796
+ Warning from the model provider for this call. The call will proceed, but e.g.
3797
+ some settings might not be supported, which can lead to suboptimal results.
3798
+ */
3799
+ type TranscriptionWarning = TranscriptionModelV1CallWarning;
3800
+
3801
+ type TranscriptionModelResponseMetadata = {
3802
+ /**
3803
+ Timestamp for the start of the generated response.
3804
+ */
3805
+ timestamp: Date;
3806
+ /**
3807
+ The ID of the response model that was used to generate the response.
3808
+ */
3809
+ modelId: string;
3810
+ /**
3811
+ Response headers.
3812
+ */
3813
+ headers?: Record<string, string>;
3814
+ };
3815
+
3816
+ /**
3817
+ The result of a `transcribe` call.
3818
+ It contains the transcript and additional information.
3819
+ */
3820
+ interface TranscriptionResult {
3821
+ /**
3822
+ * The complete transcribed text from the audio.
3823
+ */
3824
+ readonly text: string;
3825
+ /**
3826
+ * Array of transcript segments with timing information.
3827
+ * Each segment represents a portion of the transcribed text with start and end times.
3828
+ */
3829
+ readonly segments: Array<{
3830
+ /**
3831
+ * The text content of this segment.
3832
+ */
3833
+ readonly text: string;
3834
+ /**
3835
+ * The start time of this segment in seconds.
3836
+ */
3837
+ readonly startSecond: number;
3838
+ /**
3839
+ * The end time of this segment in seconds.
3840
+ */
3841
+ readonly endSecond: number;
3842
+ }>;
3843
+ /**
3844
+ * The detected language of the audio content, as an ISO-639-1 code (e.g., 'en' for English).
3845
+ * May be undefined if the language couldn't be detected.
3846
+ */
3847
+ readonly language: string | undefined;
3848
+ /**
3849
+ * The total duration of the audio file in seconds.
3850
+ * May be undefined if the duration couldn't be determined.
3851
+ */
3852
+ readonly durationInSeconds: number | undefined;
3853
+ /**
3854
+ Warnings for the call, e.g. unsupported settings.
3855
+ */
3856
+ readonly warnings: Array<TranscriptionWarning>;
3857
+ /**
3858
+ Response metadata from the provider. There may be multiple responses if we made multiple calls to the model.
3859
+ */
3860
+ readonly responses: Array<TranscriptionModelResponseMetadata>;
3861
+ /**
3862
+ Provider metadata from the provider.
3863
+ */
3864
+ readonly providerMetadata: Record<string, Record<string, JSONValue>>;
3865
+ }
3866
+
3867
+ /**
3868
+ Generates transcripts using a transcription model.
3869
+
3870
+ @param model - The transcription model to use.
3871
+ @param audio - The audio data to transcribe as DataContent (string | Uint8Array | ArrayBuffer | Buffer) or a URL.
3872
+ @param providerOptions - Additional provider-specific options that are passed through to the provider
3873
+ as body parameters.
3874
+ @param maxRetries - Maximum number of retries. Set to 0 to disable retries. Default: 2.
3875
+ @param abortSignal - An optional abort signal that can be used to cancel the call.
3876
+ @param headers - Additional HTTP headers to be sent with the request. Only applicable for HTTP-based providers.
3877
+
3878
+ @returns A result object that contains the generated transcript.
3879
+ */
3880
+ declare function transcribe({ model, audio, providerOptions, maxRetries: maxRetriesArg, abortSignal, headers, }: {
3881
+ /**
3882
+ The transcription model to use.
3883
+ */
3884
+ model: TranscriptionModelV1;
3885
+ /**
3886
+ The audio data to transcribe.
3887
+ */
3888
+ audio: DataContent | URL;
3889
+ /**
3890
+ Additional provider-specific options that are passed through to the provider
3891
+ as body parameters.
3892
+
3893
+ The outer record is keyed by the provider name, and the inner
3894
+ record is keyed by the provider-specific metadata key.
3895
+ ```ts
3896
+ {
3897
+ "openai": {
3898
+ "temperature": 0
3899
+ }
3900
+ }
3901
+ ```
3902
+ */
3903
+ providerOptions?: ProviderOptions;
3904
+ /**
3905
+ Maximum number of retries per transcript model call. Set to 0 to disable retries.
3906
+
3907
+ @default 2
3908
+ */
3909
+ maxRetries?: number;
3910
+ /**
3911
+ Abort signal.
3912
+ */
3913
+ abortSignal?: AbortSignal;
3914
+ /**
3915
+ Additional headers to include in the request.
3916
+ Only applicable for HTTP-based providers.
3917
+ */
3918
+ headers?: Record<string, string>;
3919
+ }): Promise<TranscriptionResult>;
3920
+
3795
3921
  /**
3796
3922
  * Experimental middleware for LanguageModelV1.
3797
3923
  * This type defines the structure for middleware that can be used to modify
@@ -4395,4 +4521,4 @@ declare namespace llamaindexAdapter {
4395
4521
  };
4396
4522
  }
4397
4523
 
4398
- export { AssistantContent, AssistantResponse, CallWarning, ChunkDetector, CoreAssistantMessage, CoreMessage, CoreSystemMessage, CoreTool, CoreToolCallUnion, CoreToolChoice, CoreToolMessage, CoreToolResultUnion, CoreUserMessage, DataContent, DataStreamOptions, DataStreamWriter, DownloadError, EmbedManyResult, EmbedResult, Embedding, EmbeddingModel, EmbeddingModelUsage, GenerateImageResult as Experimental_GenerateImageResult, GeneratedFile as Experimental_GeneratedImage, Experimental_LanguageModelV1Middleware, FilePart, FinishReason, GenerateObjectResult, GenerateTextOnStepFinishCallback, GenerateTextResult, GeneratedFile, ImageModel, ImageGenerationWarning as ImageModelCallWarning, ImageModelResponseMetadata, ImagePart, InvalidArgumentError, InvalidDataContentError, InvalidMessageRoleError, InvalidStreamPartError, InvalidToolArgumentsError, JSONRPCError, JSONRPCMessage, JSONRPCNotification, JSONRPCRequest, JSONRPCResponse, langchainAdapter as LangChainAdapter, LanguageModel, LanguageModelRequestMetadata, LanguageModelResponseMetadata, LanguageModelUsage, LanguageModelV1Middleware, llamaindexAdapter as LlamaIndexAdapter, LogProbs, MCPClientError, MCPTransport, MessageConversionError, NoImageGeneratedError, NoObjectGeneratedError, NoOutputSpecifiedError, NoSuchProviderError, NoSuchToolError, ObjectStreamPart, output as Output, Provider, ProviderMetadata, ProviderRegistryProvider, RepairTextFunction, RetryError, StepResult, StreamData, StreamObjectOnFinishCallback, StreamObjectResult, StreamTextOnChunkCallback, StreamTextOnErrorCallback, StreamTextOnFinishCallback, StreamTextOnStepFinishCallback, StreamTextResult, StreamTextTransform, TelemetrySettings, TextPart, TextStreamPart, Tool, ToolCallPart, ToolCallRepairError, ToolCallRepairFunction, ToolCallUnion, ToolChoice, ToolContent, ToolExecutionError, ToolExecutionOptions, ToolResultPart, ToolResultUnion, ToolSet, UserContent, appendClientMessage, appendResponseMessages, convertToCoreMessages, coreAssistantMessageSchema, coreMessageSchema, coreSystemMessageSchema, coreToolMessageSchema, coreUserMessageSchema, cosineSimilarity, createDataStream, createDataStreamResponse, createProviderRegistry, customProvider, defaultSettingsMiddleware, embed, embedMany, createMCPClient as experimental_createMCPClient, experimental_createProviderRegistry, experimental_customProvider, generateImage as experimental_generateImage, experimental_wrapLanguageModel, extractReasoningMiddleware, generateObject, generateText, pipeDataStreamToResponse, simulateReadableStream, simulateStreamingMiddleware, smoothStream, streamObject, streamText, tool, wrapLanguageModel };
4524
+ export { AssistantContent, AssistantResponse, CallWarning, ChunkDetector, CoreAssistantMessage, CoreMessage, CoreSystemMessage, CoreTool, CoreToolCallUnion, CoreToolChoice, CoreToolMessage, CoreToolResultUnion, CoreUserMessage, DataContent, DataStreamOptions, DataStreamWriter, DownloadError, EmbedManyResult, EmbedResult, Embedding, EmbeddingModel, EmbeddingModelUsage, GenerateImageResult as Experimental_GenerateImageResult, GeneratedFile as Experimental_GeneratedImage, Experimental_LanguageModelV1Middleware, TranscriptionResult as Experimental_TranscriptionResult, FilePart, FinishReason, GenerateObjectResult, GenerateTextOnStepFinishCallback, GenerateTextResult, GeneratedFile, ImageModel, ImageGenerationWarning as ImageModelCallWarning, ImageModelResponseMetadata, ImagePart, InvalidArgumentError, InvalidDataContentError, InvalidMessageRoleError, InvalidStreamPartError, InvalidToolArgumentsError, JSONRPCError, JSONRPCMessage, JSONRPCNotification, JSONRPCRequest, JSONRPCResponse, langchainAdapter as LangChainAdapter, LanguageModel, LanguageModelRequestMetadata, LanguageModelResponseMetadata, LanguageModelUsage, LanguageModelV1Middleware, llamaindexAdapter as LlamaIndexAdapter, LogProbs, MCPClientError, MCPTransport, MessageConversionError, NoImageGeneratedError, NoObjectGeneratedError, NoOutputSpecifiedError, NoSuchProviderError, NoSuchToolError, ObjectStreamPart, output as Output, Provider, ProviderMetadata, ProviderRegistryProvider, RepairTextFunction, RetryError, StepResult, StreamData, StreamObjectOnFinishCallback, StreamObjectResult, StreamTextOnChunkCallback, StreamTextOnErrorCallback, StreamTextOnFinishCallback, StreamTextOnStepFinishCallback, StreamTextResult, StreamTextTransform, TelemetrySettings, TextPart, TextStreamPart, Tool, ToolCallPart, ToolCallRepairError, ToolCallRepairFunction, ToolCallUnion, ToolChoice, ToolContent, ToolExecutionError, ToolExecutionOptions, ToolResultPart, ToolResultUnion, ToolSet, UserContent, appendClientMessage, appendResponseMessages, convertToCoreMessages, coreAssistantMessageSchema, coreMessageSchema, coreSystemMessageSchema, coreToolMessageSchema, coreUserMessageSchema, cosineSimilarity, createDataStream, createDataStreamResponse, createProviderRegistry, customProvider, defaultSettingsMiddleware, embed, embedMany, createMCPClient as experimental_createMCPClient, experimental_createProviderRegistry, experimental_customProvider, generateImage as experimental_generateImage, transcribe as experimental_transcribe, experimental_wrapLanguageModel, extractReasoningMiddleware, generateObject, generateText, pipeDataStreamToResponse, simulateReadableStream, simulateStreamingMiddleware, smoothStream, streamObject, streamText, tool, wrapLanguageModel };
package/dist/index.js CHANGED
@@ -73,6 +73,7 @@ __export(streams_exports, {
73
73
  experimental_createProviderRegistry: () => experimental_createProviderRegistry,
74
74
  experimental_customProvider: () => experimental_customProvider,
75
75
  experimental_generateImage: () => generateImage,
76
+ experimental_transcribe: () => transcribe,
76
77
  experimental_wrapLanguageModel: () => experimental_wrapLanguageModel,
77
78
  extractReasoningMiddleware: () => extractReasoningMiddleware,
78
79
  formatAssistantStreamPart: () => import_ui_utils11.formatAssistantStreamPart,
@@ -942,8 +943,8 @@ var DefaultGeneratedFileWithType = class extends DefaultGeneratedFile {
942
943
  }
943
944
  };
944
945
 
945
- // core/util/detect-image-mimetype.ts
946
- var mimeTypeSignatures = [
946
+ // core/util/detect-mimetype.ts
947
+ var imageMimeTypeSignatures = [
947
948
  {
948
949
  mimeType: "image/gif",
949
950
  bytesPrefix: [71, 73, 70],
@@ -1016,9 +1017,44 @@ var mimeTypeSignatures = [
1016
1017
  base64Prefix: "AAAAIGZ0eXBoZWlj"
1017
1018
  }
1018
1019
  ];
1019
- function detectImageMimeType(image) {
1020
- for (const signature of mimeTypeSignatures) {
1021
- if (typeof image === "string" ? image.startsWith(signature.base64Prefix) : image.length >= signature.bytesPrefix.length && signature.bytesPrefix.every((byte, index) => image[index] === byte)) {
1020
+ var audioMimeTypeSignatures = [
1021
+ {
1022
+ mimeType: "audio/mpeg",
1023
+ bytesPrefix: [255, 251],
1024
+ base64Prefix: "//s="
1025
+ },
1026
+ {
1027
+ mimeType: "audio/wav",
1028
+ bytesPrefix: [82, 73, 70, 70],
1029
+ base64Prefix: "UklGR"
1030
+ },
1031
+ {
1032
+ mimeType: "audio/ogg",
1033
+ bytesPrefix: [79, 103, 103, 83],
1034
+ base64Prefix: "T2dnUw"
1035
+ },
1036
+ {
1037
+ mimeType: "audio/flac",
1038
+ bytesPrefix: [102, 76, 97, 67],
1039
+ base64Prefix: "ZkxhQw"
1040
+ },
1041
+ {
1042
+ mimeType: "audio/aac",
1043
+ bytesPrefix: [64, 21, 0, 0],
1044
+ base64Prefix: "QBUA"
1045
+ },
1046
+ {
1047
+ mimeType: "audio/mp4",
1048
+ bytesPrefix: [102, 116, 121, 112],
1049
+ base64Prefix: "ZnR5cA"
1050
+ }
1051
+ ];
1052
+ function detectMimeType({
1053
+ data,
1054
+ signatures
1055
+ }) {
1056
+ for (const signature of signatures) {
1057
+ if (typeof data === "string" ? data.startsWith(signature.base64Prefix) : data.length >= signature.bytesPrefix.length && signature.bytesPrefix.every((byte, index) => data[index] === byte)) {
1022
1058
  return signature.mimeType;
1023
1059
  }
1024
1060
  }
@@ -1075,7 +1111,10 @@ async function generateImage({
1075
1111
  var _a18;
1076
1112
  return new DefaultGeneratedFile({
1077
1113
  data: image,
1078
- mimeType: (_a18 = detectImageMimeType(image)) != null ? _a18 : "image/png"
1114
+ mimeType: (_a18 = detectMimeType({
1115
+ data: image,
1116
+ signatures: imageMimeTypeSignatures
1117
+ })) != null ? _a18 : "image/png"
1079
1118
  });
1080
1119
  }
1081
1120
  )
@@ -1504,7 +1543,10 @@ function convertPartToLanguageModelPart(part, downloadedAssets) {
1504
1543
  switch (type) {
1505
1544
  case "image": {
1506
1545
  if (normalizedData instanceof Uint8Array) {
1507
- mimeType = (_b = detectImageMimeType(normalizedData)) != null ? _b : mimeType;
1546
+ mimeType = (_b = detectMimeType({
1547
+ data: normalizedData,
1548
+ signatures: imageMimeTypeSignatures
1549
+ })) != null ? _b : mimeType;
1508
1550
  }
1509
1551
  return {
1510
1552
  type: "image",
@@ -6293,6 +6335,70 @@ var DefaultStreamTextResult = class {
6293
6335
  }
6294
6336
  };
6295
6337
 
6338
+ // errors/no-transcript-generated-error.ts
6339
+ var import_provider23 = require("@ai-sdk/provider");
6340
+ var NoTranscriptGeneratedError = class extends import_provider23.AISDKError {
6341
+ constructor(options) {
6342
+ super({
6343
+ name: "AI_NoTranscriptGeneratedError",
6344
+ message: "No transcript generated."
6345
+ });
6346
+ this.responses = options.responses;
6347
+ }
6348
+ };
6349
+
6350
+ // core/transcribe/transcribe.ts
6351
+ async function transcribe({
6352
+ model,
6353
+ audio,
6354
+ providerOptions = {},
6355
+ maxRetries: maxRetriesArg,
6356
+ abortSignal,
6357
+ headers
6358
+ }) {
6359
+ const { retry } = prepareRetries({ maxRetries: maxRetriesArg });
6360
+ const audioData = audio instanceof URL ? (await download({ url: audio })).data : convertDataContentToUint8Array(audio);
6361
+ const result = await retry(
6362
+ () => {
6363
+ var _a17;
6364
+ return model.doGenerate({
6365
+ audio: audioData,
6366
+ abortSignal,
6367
+ headers,
6368
+ providerOptions,
6369
+ mediaType: (_a17 = detectMimeType({
6370
+ data: audioData,
6371
+ signatures: audioMimeTypeSignatures
6372
+ })) != null ? _a17 : "audio/wav"
6373
+ });
6374
+ }
6375
+ );
6376
+ if (!result.text) {
6377
+ throw new NoTranscriptGeneratedError({ responses: [result.response] });
6378
+ }
6379
+ return new DefaultTranscriptionResult({
6380
+ text: result.text,
6381
+ segments: result.segments,
6382
+ language: result.language,
6383
+ durationInSeconds: result.durationInSeconds,
6384
+ warnings: result.warnings,
6385
+ responses: [result.response],
6386
+ providerMetadata: result.providerMetadata
6387
+ });
6388
+ }
6389
+ var DefaultTranscriptionResult = class {
6390
+ constructor(options) {
6391
+ var _a17;
6392
+ this.text = options.text;
6393
+ this.segments = options.segments;
6394
+ this.language = options.language;
6395
+ this.durationInSeconds = options.durationInSeconds;
6396
+ this.warnings = options.warnings;
6397
+ this.responses = options.responses;
6398
+ this.providerMetadata = (_a17 = options.providerMetadata) != null ? _a17 : {};
6399
+ }
6400
+ };
6401
+
6296
6402
  // core/util/merge-objects.ts
6297
6403
  function mergeObjects(target, source) {
6298
6404
  if (target === void 0 && source === void 0) {
@@ -6608,7 +6714,7 @@ function appendClientMessage({
6608
6714
 
6609
6715
  // core/prompt/append-response-messages.ts
6610
6716
  var import_ui_utils9 = require("@ai-sdk/ui-utils");
6611
- var import_provider23 = require("@ai-sdk/provider");
6717
+ var import_provider24 = require("@ai-sdk/provider");
6612
6718
  function appendResponseMessages({
6613
6719
  messages,
6614
6720
  responseMessages,
@@ -6691,7 +6797,7 @@ function appendResponseMessages({
6691
6797
  break;
6692
6798
  case "file":
6693
6799
  if (part.data instanceof URL) {
6694
- throw new import_provider23.AISDKError({
6800
+ throw new import_provider24.AISDKError({
6695
6801
  name: "InvalidAssistantFileData",
6696
6802
  message: "File data cannot be a URL"
6697
6803
  });
@@ -6785,7 +6891,7 @@ function appendResponseMessages({
6785
6891
  }
6786
6892
 
6787
6893
  // core/registry/custom-provider.ts
6788
- var import_provider24 = require("@ai-sdk/provider");
6894
+ var import_provider25 = require("@ai-sdk/provider");
6789
6895
  function customProvider({
6790
6896
  languageModels,
6791
6897
  textEmbeddingModels,
@@ -6800,7 +6906,7 @@ function customProvider({
6800
6906
  if (fallbackProvider) {
6801
6907
  return fallbackProvider.languageModel(modelId);
6802
6908
  }
6803
- throw new import_provider24.NoSuchModelError({ modelId, modelType: "languageModel" });
6909
+ throw new import_provider25.NoSuchModelError({ modelId, modelType: "languageModel" });
6804
6910
  },
6805
6911
  textEmbeddingModel(modelId) {
6806
6912
  if (textEmbeddingModels != null && modelId in textEmbeddingModels) {
@@ -6809,7 +6915,7 @@ function customProvider({
6809
6915
  if (fallbackProvider) {
6810
6916
  return fallbackProvider.textEmbeddingModel(modelId);
6811
6917
  }
6812
- throw new import_provider24.NoSuchModelError({ modelId, modelType: "textEmbeddingModel" });
6918
+ throw new import_provider25.NoSuchModelError({ modelId, modelType: "textEmbeddingModel" });
6813
6919
  },
6814
6920
  imageModel(modelId) {
6815
6921
  if (imageModels != null && modelId in imageModels) {
@@ -6818,19 +6924,19 @@ function customProvider({
6818
6924
  if (fallbackProvider == null ? void 0 : fallbackProvider.imageModel) {
6819
6925
  return fallbackProvider.imageModel(modelId);
6820
6926
  }
6821
- throw new import_provider24.NoSuchModelError({ modelId, modelType: "imageModel" });
6927
+ throw new import_provider25.NoSuchModelError({ modelId, modelType: "imageModel" });
6822
6928
  }
6823
6929
  };
6824
6930
  }
6825
6931
  var experimental_customProvider = customProvider;
6826
6932
 
6827
6933
  // core/registry/no-such-provider-error.ts
6828
- var import_provider25 = require("@ai-sdk/provider");
6934
+ var import_provider26 = require("@ai-sdk/provider");
6829
6935
  var name16 = "AI_NoSuchProviderError";
6830
6936
  var marker16 = `vercel.ai.error.${name16}`;
6831
6937
  var symbol16 = Symbol.for(marker16);
6832
6938
  var _a16;
6833
- var NoSuchProviderError = class extends import_provider25.NoSuchModelError {
6939
+ var NoSuchProviderError = class extends import_provider26.NoSuchModelError {
6834
6940
  constructor({
6835
6941
  modelId,
6836
6942
  modelType,
@@ -6844,13 +6950,13 @@ var NoSuchProviderError = class extends import_provider25.NoSuchModelError {
6844
6950
  this.availableProviders = availableProviders;
6845
6951
  }
6846
6952
  static isInstance(error) {
6847
- return import_provider25.AISDKError.hasMarker(error, marker16);
6953
+ return import_provider26.AISDKError.hasMarker(error, marker16);
6848
6954
  }
6849
6955
  };
6850
6956
  _a16 = symbol16;
6851
6957
 
6852
6958
  // core/registry/provider-registry.ts
6853
- var import_provider26 = require("@ai-sdk/provider");
6959
+ var import_provider27 = require("@ai-sdk/provider");
6854
6960
  function createProviderRegistry(providers, {
6855
6961
  separator = ":"
6856
6962
  } = {}) {
@@ -6889,7 +6995,7 @@ var DefaultProviderRegistry = class {
6889
6995
  splitId(id, modelType) {
6890
6996
  const index = id.indexOf(this.separator);
6891
6997
  if (index === -1) {
6892
- throw new import_provider26.NoSuchModelError({
6998
+ throw new import_provider27.NoSuchModelError({
6893
6999
  modelId: id,
6894
7000
  modelType,
6895
7001
  message: `Invalid ${modelType} id for registry: ${id} (must be in the format "providerId${this.separator}modelId")`
@@ -6902,7 +7008,7 @@ var DefaultProviderRegistry = class {
6902
7008
  const [providerId, modelId] = this.splitId(id, "languageModel");
6903
7009
  const model = (_b = (_a17 = this.getProvider(providerId)).languageModel) == null ? void 0 : _b.call(_a17, modelId);
6904
7010
  if (model == null) {
6905
- throw new import_provider26.NoSuchModelError({ modelId: id, modelType: "languageModel" });
7011
+ throw new import_provider27.NoSuchModelError({ modelId: id, modelType: "languageModel" });
6906
7012
  }
6907
7013
  return model;
6908
7014
  }
@@ -6912,7 +7018,7 @@ var DefaultProviderRegistry = class {
6912
7018
  const provider = this.getProvider(providerId);
6913
7019
  const model = (_a17 = provider.textEmbeddingModel) == null ? void 0 : _a17.call(provider, modelId);
6914
7020
  if (model == null) {
6915
- throw new import_provider26.NoSuchModelError({
7021
+ throw new import_provider27.NoSuchModelError({
6916
7022
  modelId: id,
6917
7023
  modelType: "textEmbeddingModel"
6918
7024
  });
@@ -6925,7 +7031,7 @@ var DefaultProviderRegistry = class {
6925
7031
  const provider = this.getProvider(providerId);
6926
7032
  const model = (_a17 = provider.imageModel) == null ? void 0 : _a17.call(provider, modelId);
6927
7033
  if (model == null) {
6928
- throw new import_provider26.NoSuchModelError({ modelId: id, modelType: "imageModel" });
7034
+ throw new import_provider27.NoSuchModelError({ modelId: id, modelType: "imageModel" });
6929
7035
  }
6930
7036
  return model;
6931
7037
  }
@@ -7927,6 +8033,7 @@ var StreamData = class {
7927
8033
  experimental_createProviderRegistry,
7928
8034
  experimental_customProvider,
7929
8035
  experimental_generateImage,
8036
+ experimental_transcribe,
7930
8037
  experimental_wrapLanguageModel,
7931
8038
  extractReasoningMiddleware,
7932
8039
  formatAssistantStreamPart,