ai 6.0.0-beta.45 → 6.0.0-beta.47

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,18 @@
1
1
  # ai
2
2
 
3
+ ## 6.0.0-beta.47
4
+
5
+ ### Patch Changes
6
+
7
+ - c62ecf0: feat(ai): add support for v2 specs in transcription and speech models
8
+
9
+ ## 6.0.0-beta.46
10
+
11
+ ### Patch Changes
12
+
13
+ - Updated dependencies [0a2ff8a]
14
+ - @ai-sdk/gateway@2.0.0-beta.30
15
+
3
16
  ## 6.0.0-beta.45
4
17
 
5
18
  ### Patch Changes
package/dist/index.d.mts CHANGED
@@ -3,7 +3,7 @@ import * as _ai_sdk_provider_utils from '@ai-sdk/provider-utils';
3
3
  import { Tool, InferToolInput, InferToolOutput, AssistantModelMessage, ToolModelMessage, ReasoningPart, ModelMessage, FlexibleSchema, InferSchema, SystemModelMessage, UserModelMessage, ProviderOptions, IdGenerator, ToolCall, DataContent, Resolvable, FetchFunction } from '@ai-sdk/provider-utils';
4
4
  export { AssistantContent, AssistantModelMessage, DataContent, FilePart, FlexibleSchema, IdGenerator, ImagePart, InferSchema, InferToolInput, InferToolOutput, ModelMessage, Schema, SystemModelMessage, TextPart, Tool, ToolApprovalRequest, ToolApprovalResponse, ToolCallOptions, ToolCallPart, ToolContent, ToolExecuteFunction, ToolModelMessage, ToolResultPart, UserContent, UserModelMessage, asSchema, createIdGenerator, dynamicTool, generateId, jsonSchema, parseJsonEventStream, tool, zodSchema } from '@ai-sdk/provider-utils';
5
5
  import * as _ai_sdk_provider from '@ai-sdk/provider';
6
- import { EmbeddingModelV3, EmbeddingModelV2, EmbeddingModelV3Embedding, ImageModelV3, ImageModelV3CallWarning, ImageModelV3ProviderMetadata, JSONValue as JSONValue$1, LanguageModelV3, LanguageModelV2, LanguageModelV3FinishReason, LanguageModelV3CallWarning, LanguageModelV3Source, LanguageModelV3Middleware, SharedV3ProviderMetadata, SpeechModelV3, SpeechModelV3CallWarning, TranscriptionModelV3, TranscriptionModelV3CallWarning, LanguageModelV3Usage, LanguageModelV3CallOptions, AISDKError, LanguageModelV3ToolCall, JSONSchema7, JSONParseError, TypeValidationError, ProviderV3, ProviderV2, NoSuchModelError, JSONObject } from '@ai-sdk/provider';
6
+ import { EmbeddingModelV3, EmbeddingModelV2, EmbeddingModelV3Embedding, ImageModelV3, ImageModelV3CallWarning, ImageModelV3ProviderMetadata, JSONValue as JSONValue$1, LanguageModelV3, LanguageModelV2, LanguageModelV3FinishReason, LanguageModelV3CallWarning, LanguageModelV3Source, LanguageModelV3Middleware, SharedV3ProviderMetadata, SpeechModelV3, SpeechModelV2, SpeechModelV3CallWarning, TranscriptionModelV3, TranscriptionModelV2, TranscriptionModelV3CallWarning, LanguageModelV3Usage, LanguageModelV3CallOptions, AISDKError, LanguageModelV3ToolCall, JSONSchema7, JSONParseError, TypeValidationError, ProviderV3, ProviderV2, NoSuchModelError, JSONObject } from '@ai-sdk/provider';
7
7
  export { AISDKError, APICallError, EmptyResponseBodyError, InvalidPromptError, InvalidResponseDataError, JSONParseError, JSONSchema7, LoadAPIKeyError, NoContentGeneratedError, NoSuchModelError, TooManyEmbeddingValuesForCallError, TypeValidationError, UnsupportedFunctionalityError } from '@ai-sdk/provider';
8
8
  import { ServerResponse } from 'node:http';
9
9
  import { AttributeValue, Tracer } from '@opentelemetry/api';
@@ -164,7 +164,7 @@ type ProviderMetadata = SharedV3ProviderMetadata;
164
164
  /**
165
165
  Speech model that is used by the AI SDK Core functions.
166
166
  */
167
- type SpeechModel = SpeechModelV3;
167
+ type SpeechModel = string | SpeechModelV3 | SpeechModelV2;
168
168
  /**
169
169
  Warning from the model provider for this call. The call will proceed, but e.g.
170
170
  some settings might not be supported, which can lead to suboptimal results.
@@ -193,7 +193,7 @@ type SpeechModelResponseMetadata = {
193
193
  /**
194
194
  Transcription model that is used by the AI SDK Core functions.
195
195
  */
196
- type TranscriptionModel = TranscriptionModelV3;
196
+ type TranscriptionModel = string | TranscriptionModelV3 | TranscriptionModelV2;
197
197
  /**
198
198
  Warning from the model provider for this call. The call will proceed, but e.g.
199
199
  some settings might not be supported, which can lead to suboptimal results.
@@ -3691,7 +3691,7 @@ declare function generateSpeech({ model, text, voice, outputFormat, instructions
3691
3691
  /**
3692
3692
  The speech model to use.
3693
3693
  */
3694
- model: SpeechModelV3;
3694
+ model: SpeechModel;
3695
3695
  /**
3696
3696
  The text to convert to speech.
3697
3697
  */
@@ -4149,7 +4149,7 @@ declare function transcribe({ model, audio, providerOptions, maxRetries: maxRetr
4149
4149
  /**
4150
4150
  The transcription model to use.
4151
4151
  */
4152
- model: TranscriptionModelV3;
4152
+ model: TranscriptionModel;
4153
4153
  /**
4154
4154
  The audio data to transcribe.
4155
4155
  */
package/dist/index.d.ts CHANGED
@@ -3,7 +3,7 @@ import * as _ai_sdk_provider_utils from '@ai-sdk/provider-utils';
3
3
  import { Tool, InferToolInput, InferToolOutput, AssistantModelMessage, ToolModelMessage, ReasoningPart, ModelMessage, FlexibleSchema, InferSchema, SystemModelMessage, UserModelMessage, ProviderOptions, IdGenerator, ToolCall, DataContent, Resolvable, FetchFunction } from '@ai-sdk/provider-utils';
4
4
  export { AssistantContent, AssistantModelMessage, DataContent, FilePart, FlexibleSchema, IdGenerator, ImagePart, InferSchema, InferToolInput, InferToolOutput, ModelMessage, Schema, SystemModelMessage, TextPart, Tool, ToolApprovalRequest, ToolApprovalResponse, ToolCallOptions, ToolCallPart, ToolContent, ToolExecuteFunction, ToolModelMessage, ToolResultPart, UserContent, UserModelMessage, asSchema, createIdGenerator, dynamicTool, generateId, jsonSchema, parseJsonEventStream, tool, zodSchema } from '@ai-sdk/provider-utils';
5
5
  import * as _ai_sdk_provider from '@ai-sdk/provider';
6
- import { EmbeddingModelV3, EmbeddingModelV2, EmbeddingModelV3Embedding, ImageModelV3, ImageModelV3CallWarning, ImageModelV3ProviderMetadata, JSONValue as JSONValue$1, LanguageModelV3, LanguageModelV2, LanguageModelV3FinishReason, LanguageModelV3CallWarning, LanguageModelV3Source, LanguageModelV3Middleware, SharedV3ProviderMetadata, SpeechModelV3, SpeechModelV3CallWarning, TranscriptionModelV3, TranscriptionModelV3CallWarning, LanguageModelV3Usage, LanguageModelV3CallOptions, AISDKError, LanguageModelV3ToolCall, JSONSchema7, JSONParseError, TypeValidationError, ProviderV3, ProviderV2, NoSuchModelError, JSONObject } from '@ai-sdk/provider';
6
+ import { EmbeddingModelV3, EmbeddingModelV2, EmbeddingModelV3Embedding, ImageModelV3, ImageModelV3CallWarning, ImageModelV3ProviderMetadata, JSONValue as JSONValue$1, LanguageModelV3, LanguageModelV2, LanguageModelV3FinishReason, LanguageModelV3CallWarning, LanguageModelV3Source, LanguageModelV3Middleware, SharedV3ProviderMetadata, SpeechModelV3, SpeechModelV2, SpeechModelV3CallWarning, TranscriptionModelV3, TranscriptionModelV2, TranscriptionModelV3CallWarning, LanguageModelV3Usage, LanguageModelV3CallOptions, AISDKError, LanguageModelV3ToolCall, JSONSchema7, JSONParseError, TypeValidationError, ProviderV3, ProviderV2, NoSuchModelError, JSONObject } from '@ai-sdk/provider';
7
7
  export { AISDKError, APICallError, EmptyResponseBodyError, InvalidPromptError, InvalidResponseDataError, JSONParseError, JSONSchema7, LoadAPIKeyError, NoContentGeneratedError, NoSuchModelError, TooManyEmbeddingValuesForCallError, TypeValidationError, UnsupportedFunctionalityError } from '@ai-sdk/provider';
8
8
  import { ServerResponse } from 'node:http';
9
9
  import { AttributeValue, Tracer } from '@opentelemetry/api';
@@ -164,7 +164,7 @@ type ProviderMetadata = SharedV3ProviderMetadata;
164
164
  /**
165
165
  Speech model that is used by the AI SDK Core functions.
166
166
  */
167
- type SpeechModel = SpeechModelV3;
167
+ type SpeechModel = string | SpeechModelV3 | SpeechModelV2;
168
168
  /**
169
169
  Warning from the model provider for this call. The call will proceed, but e.g.
170
170
  some settings might not be supported, which can lead to suboptimal results.
@@ -193,7 +193,7 @@ type SpeechModelResponseMetadata = {
193
193
  /**
194
194
  Transcription model that is used by the AI SDK Core functions.
195
195
  */
196
- type TranscriptionModel = TranscriptionModelV3;
196
+ type TranscriptionModel = string | TranscriptionModelV3 | TranscriptionModelV2;
197
197
  /**
198
198
  Warning from the model provider for this call. The call will proceed, but e.g.
199
199
  some settings might not be supported, which can lead to suboptimal results.
@@ -3691,7 +3691,7 @@ declare function generateSpeech({ model, text, voice, outputFormat, instructions
3691
3691
  /**
3692
3692
  The speech model to use.
3693
3693
  */
3694
- model: SpeechModelV3;
3694
+ model: SpeechModel;
3695
3695
  /**
3696
3696
  The text to convert to speech.
3697
3697
  */
@@ -4149,7 +4149,7 @@ declare function transcribe({ model, audio, providerOptions, maxRetries: maxRetr
4149
4149
  /**
4150
4150
  The transcription model to use.
4151
4151
  */
4152
- model: TranscriptionModelV3;
4152
+ model: TranscriptionModel;
4153
4153
  /**
4154
4154
  The audio data to transcribe.
4155
4155
  */
package/dist/index.js CHANGED
@@ -581,6 +581,24 @@ function transformToV3EmbeddingModel(model) {
581
581
  }
582
582
  });
583
583
  }
584
+ function transformToV3TranscriptionModel(model) {
585
+ return new Proxy(model, {
586
+ get(target, prop) {
587
+ if (prop === "specificationVersion")
588
+ return "v3";
589
+ return target[prop];
590
+ }
591
+ });
592
+ }
593
+ function transformToV3SpeechModel(model) {
594
+ return new Proxy(model, {
595
+ get(target, prop) {
596
+ if (prop === "specificationVersion")
597
+ return "v3";
598
+ return target[prop];
599
+ }
600
+ });
601
+ }
584
602
  function resolveLanguageModel(model) {
585
603
  if (typeof model !== "string") {
586
604
  if (model.specificationVersion !== "v3" && model.specificationVersion !== "v2") {
@@ -617,6 +635,42 @@ function resolveEmbeddingModel(model) {
617
635
  model
618
636
  );
619
637
  }
638
+ function resolveTranscriptionModel(model) {
639
+ var _a17, _b;
640
+ if (typeof model !== "string") {
641
+ if (model.specificationVersion !== "v3" && model.specificationVersion !== "v2") {
642
+ const unsupportedModel = model;
643
+ throw new UnsupportedModelVersionError({
644
+ version: unsupportedModel.specificationVersion,
645
+ provider: unsupportedModel.provider,
646
+ modelId: unsupportedModel.modelId
647
+ });
648
+ }
649
+ if (model.specificationVersion === "v2") {
650
+ return transformToV3TranscriptionModel(model);
651
+ }
652
+ return model;
653
+ }
654
+ return (_b = (_a17 = getGlobalProvider()).transcriptionModel) == null ? void 0 : _b.call(_a17, model);
655
+ }
656
+ function resolveSpeechModel(model) {
657
+ var _a17, _b;
658
+ if (typeof model !== "string") {
659
+ if (model.specificationVersion !== "v3" && model.specificationVersion !== "v2") {
660
+ const unsupportedModel = model;
661
+ throw new UnsupportedModelVersionError({
662
+ version: unsupportedModel.specificationVersion,
663
+ provider: unsupportedModel.provider,
664
+ modelId: unsupportedModel.modelId
665
+ });
666
+ }
667
+ if (model.specificationVersion === "v2") {
668
+ return transformToV3SpeechModel(model);
669
+ }
670
+ return model;
671
+ }
672
+ return (_b = (_a17 = getGlobalProvider()).speechModel) == null ? void 0 : _b.call(_a17, model);
673
+ }
620
674
  function getGlobalProvider() {
621
675
  var _a17;
622
676
  return (_a17 = globalThis.AI_SDK_DEFAULT_PROVIDER) != null ? _a17 : import_gateway.gateway;
@@ -814,7 +868,7 @@ function detectMediaType({
814
868
  var import_provider_utils2 = require("@ai-sdk/provider-utils");
815
869
 
816
870
  // src/version.ts
817
- var VERSION = true ? "6.0.0-beta.45" : "0.0.0-test";
871
+ var VERSION = true ? "6.0.0-beta.47" : "0.0.0-test";
818
872
 
819
873
  // src/util/download/download.ts
820
874
  var download = async ({ url }) => {
@@ -8327,12 +8381,9 @@ async function generateSpeech({
8327
8381
  headers
8328
8382
  }) {
8329
8383
  var _a17;
8330
- if (model.specificationVersion !== "v3") {
8331
- throw new UnsupportedModelVersionError({
8332
- version: model.specificationVersion,
8333
- provider: model.provider,
8334
- modelId: model.modelId
8335
- });
8384
+ const resolvedModel = resolveSpeechModel(model);
8385
+ if (!resolvedModel) {
8386
+ throw new Error("Model could not be resolved");
8336
8387
  }
8337
8388
  const headersWithUserAgent = (0, import_provider_utils25.withUserAgentSuffix)(
8338
8389
  headers != null ? headers : {},
@@ -8343,7 +8394,7 @@ async function generateSpeech({
8343
8394
  abortSignal
8344
8395
  });
8345
8396
  const result = await retry(
8346
- () => model.doGenerate({
8397
+ () => resolvedModel.doGenerate({
8347
8398
  text: text2,
8348
8399
  voice,
8349
8400
  outputFormat,
@@ -9746,12 +9797,9 @@ async function transcribe({
9746
9797
  abortSignal,
9747
9798
  headers
9748
9799
  }) {
9749
- if (model.specificationVersion !== "v3") {
9750
- throw new UnsupportedModelVersionError({
9751
- version: model.specificationVersion,
9752
- provider: model.provider,
9753
- modelId: model.modelId
9754
- });
9800
+ const resolvedModel = resolveTranscriptionModel(model);
9801
+ if (!resolvedModel) {
9802
+ throw new Error("Model could not be resolved");
9755
9803
  }
9756
9804
  const { retry } = prepareRetries({
9757
9805
  maxRetries: maxRetriesArg,
@@ -9765,7 +9813,7 @@ async function transcribe({
9765
9813
  const result = await retry(
9766
9814
  () => {
9767
9815
  var _a17;
9768
- return model.doGenerate({
9816
+ return resolvedModel.doGenerate({
9769
9817
  audio: audioData,
9770
9818
  abortSignal,
9771
9819
  headers: headersWithUserAgent,