@ai-sdk/provider 2.0.0-alpha.9 → 2.0.0-beta.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.ts CHANGED
@@ -138,6 +138,12 @@ type EmbeddingModelV2<VALUE> = {
138
138
  tokens: number;
139
139
  };
140
140
  /**
141
+ Additional provider-specific metadata. They are passed through
142
+ from the provider to the AI SDK and enable provider-specific
143
+ results that can be fully encapsulated in the provider.
144
+ */
145
+ providerMetadata?: SharedV2ProviderMetadata;
146
+ /**
141
147
  Optional response information for debugging purposes.
142
148
  */
143
149
  response?: {
@@ -313,11 +319,11 @@ declare const symbol$3: unique symbol;
313
319
  declare class NoSuchModelError extends AISDKError {
314
320
  private readonly [symbol$3];
315
321
  readonly modelId: string;
316
- readonly modelType: 'languageModel' | 'textEmbeddingModel' | 'imageModel';
322
+ readonly modelType: 'languageModel' | 'textEmbeddingModel' | 'imageModel' | 'transcriptionModel' | 'speechModel';
317
323
  constructor({ errorName, modelId, modelType, message, }: {
318
324
  errorName?: string;
319
325
  modelId: string;
320
- modelType: 'languageModel' | 'textEmbeddingModel' | 'imageModel';
326
+ modelType: 'languageModel' | 'textEmbeddingModel' | 'imageModel' | 'transcriptionModel' | 'speechModel';
321
327
  message?: string;
322
328
  });
323
329
  static isInstance(error: unknown): error is NoSuchModelError;
@@ -555,7 +561,11 @@ type LanguageModelV2FunctionTool = {
555
561
  The parameters that the tool expects. The language model uses this to
556
562
  understand the tool's input requirements and to provide matching suggestions.
557
563
  */
558
- parameters: JSONSchema7;
564
+ inputSchema: JSONSchema7;
565
+ /**
566
+ The provider-specific options for the tool.
567
+ */
568
+ providerOptions?: SharedV2ProviderOptions;
559
569
  };
560
570
 
561
571
  /**
@@ -581,7 +591,7 @@ type LanguageModelV2Message = ({
581
591
  content: Array<LanguageModelV2TextPart | LanguageModelV2FilePart>;
582
592
  } | {
583
593
  role: 'assistant';
584
- content: Array<LanguageModelV2TextPart | LanguageModelV2FilePart | LanguageModelV2ReasoningPart | LanguageModelV2ToolCallPart>;
594
+ content: Array<LanguageModelV2TextPart | LanguageModelV2FilePart | LanguageModelV2ReasoningPart | LanguageModelV2ToolCallPart | LanguageModelV2ToolResultPart>;
585
595
  } | {
586
596
  role: 'tool';
587
597
  content: Array<LanguageModelV2ToolResultPart>;
@@ -669,7 +679,12 @@ interface LanguageModelV2ToolCallPart {
669
679
  /**
670
680
  Arguments of the tool call. This is a JSON-serializable object that matches the tool's input schema.
671
681
  */
672
- args: unknown;
682
+ input: unknown;
683
+ /**
684
+ * Whether the tool call will be executed by the provider.
685
+ * If this flag is not set or is false, the tool call will be executed by the client.
686
+ */
687
+ providerExecuted?: boolean;
673
688
  /**
674
689
  * Additional provider-specific options. They are passed through
675
690
  * to the provider from the AI SDK and enable provider-specific
@@ -691,43 +706,49 @@ interface LanguageModelV2ToolResultPart {
691
706
  */
692
707
  toolName: string;
693
708
  /**
694
- Result of the tool call. This is a JSON-serializable object.
695
- */
696
- result: unknown;
697
- /**
698
- Optional flag if the result is an error or an error message.
709
+ Result of the tool call.
699
710
  */
700
- isError?: boolean;
711
+ output: LanguageModelV2ToolResultOutput;
701
712
  /**
702
- Tool results as an array of parts. This enables advanced tool results including images.
703
- When this is used, the `result` field should be ignored (if the provider supports content).
713
+ * Additional provider-specific options. They are passed through
714
+ * to the provider from the AI SDK and enable provider-specific
715
+ * functionality that can be fully encapsulated in the provider.
704
716
  */
705
- content?: Array<{
717
+ providerOptions?: SharedV2ProviderOptions;
718
+ }
719
+ type LanguageModelV2ToolResultOutput = {
720
+ type: 'text';
721
+ value: string;
722
+ } | {
723
+ type: 'json';
724
+ value: JSONValue;
725
+ } | {
726
+ type: 'error-text';
727
+ value: string;
728
+ } | {
729
+ type: 'error-json';
730
+ value: JSONValue;
731
+ } | {
732
+ type: 'content';
733
+ value: Array<{
706
734
  type: 'text';
707
735
  /**
708
736
  Text content.
709
- */
737
+ */
710
738
  text: string;
711
739
  } | {
712
- type: 'image';
740
+ type: 'media';
713
741
  /**
714
- base-64 encoded image data
715
- */
742
+ Base-64 encoded media data.
743
+ */
716
744
  data: string;
717
745
  /**
718
- IANA media type of the image.
719
-
746
+ IANA media type.
720
747
  @see https://www.iana.org/assignments/media-types/media-types.xhtml
721
- */
722
- mediaType?: string;
748
+ */
749
+ mediaType: string;
723
750
  }>;
724
- /**
725
- * Additional provider-specific options. They are passed through
726
- * to the provider from the AI SDK and enable provider-specific
727
- * functionality that can be fully encapsulated in the provider.
728
- */
729
- providerOptions?: SharedV2ProviderOptions;
730
- }
751
+ };
731
752
 
732
753
  /**
733
754
  The configuration of a tool that is defined by the provider.
@@ -738,11 +759,11 @@ type LanguageModelV2ProviderDefinedTool = {
738
759
  */
739
760
  type: 'provider-defined';
740
761
  /**
741
- The ID of the tool. Should follow the format `<provider-name>.<tool-name>`.
762
+ The ID of the tool. Should follow the format `<provider-name>.<unique-tool-name>`.
742
763
  */
743
764
  id: `${string}.${string}`;
744
765
  /**
745
- The name of the tool. Unique within this model call.
766
+ The name of the tool that the user must use in the tool set.
746
767
  */
747
768
  name: string;
748
769
  /**
@@ -843,6 +864,10 @@ type LanguageModelV2CallOptions = {
843
864
  */
844
865
  toolChoice?: LanguageModelV2ToolChoice;
845
866
  /**
867
+ Include raw chunks in the stream. Only applicable for streaming calls.
868
+ */
869
+ includeRawChunks?: boolean;
870
+ /**
846
871
  Abort signal for cancelling the operation.
847
872
  */
848
873
  abortSignal?: AbortSignal;
@@ -918,7 +943,7 @@ A source that has been used as input to generate the response.
918
943
  type LanguageModelV2Source = {
919
944
  type: 'source';
920
945
  /**
921
- * A URL source. This is return by web search RAG models.
946
+ * The type of source - URL sources reference web content.
922
947
  */
923
948
  sourceType: 'url';
924
949
  /**
@@ -937,6 +962,32 @@ type LanguageModelV2Source = {
937
962
  * Additional provider metadata for the source.
938
963
  */
939
964
  providerMetadata?: SharedV2ProviderMetadata;
965
+ } | {
966
+ type: 'source';
967
+ /**
968
+ * The type of source - document sources reference files/documents.
969
+ */
970
+ sourceType: 'document';
971
+ /**
972
+ * The ID of the source.
973
+ */
974
+ id: string;
975
+ /**
976
+ * IANA media type of the document (e.g., 'application/pdf').
977
+ */
978
+ mediaType: string;
979
+ /**
980
+ * The title of the document.
981
+ */
982
+ title: string;
983
+ /**
984
+ * Optional filename of the document.
985
+ */
986
+ filename?: string;
987
+ /**
988
+ * Additional provider metadata for the source.
989
+ */
990
+ providerMetadata?: SharedV2ProviderMetadata;
940
991
  };
941
992
 
942
993
  /**
@@ -948,6 +999,7 @@ type LanguageModelV2Text = {
948
999
  The text content.
949
1000
  */
950
1001
  text: string;
1002
+ providerMetadata?: SharedV2ProviderMetadata;
951
1003
  };
952
1004
 
953
1005
  /**
@@ -955,17 +1007,58 @@ Tool calls that the model has generated.
955
1007
  */
956
1008
  type LanguageModelV2ToolCall = {
957
1009
  type: 'tool-call';
958
- toolCallType: 'function';
959
1010
  toolCallId: string;
960
1011
  toolName: string;
961
1012
  /**
962
1013
  Stringified JSON object with the tool call arguments. Must match the
963
1014
  parameters schema of the tool.
964
1015
  */
965
- args: string;
1016
+ input: string;
1017
+ /**
1018
+ * Whether the tool call will be executed by the provider.
1019
+ * If this flag is not set or is false, the tool call will be executed by the client.
1020
+ */
1021
+ providerExecuted?: boolean;
1022
+ /**
1023
+ * Additional provider-specific metadata for the tool call.
1024
+ */
1025
+ providerMetadata?: SharedV2ProviderMetadata;
1026
+ };
1027
+
1028
+ /**
1029
+ Result of a tool call that has been executed by the provider.
1030
+ */
1031
+ type LanguageModelV2ToolResult = {
1032
+ type: 'tool-result';
1033
+ /**
1034
+ * The ID of the tool call that this result is associated with.
1035
+ */
1036
+ toolCallId: string;
1037
+ /**
1038
+ * Name of the tool that generated this result.
1039
+ */
1040
+ toolName: string;
1041
+ /**
1042
+ * Result of the tool call. This is a JSON-serializable object.
1043
+ */
1044
+ result: unknown;
1045
+ /**
1046
+ * Optional flag if the result is an error or an error message.
1047
+ */
1048
+ isError?: boolean;
1049
+ /**
1050
+ * Whether the tool result was generated by the provider.
1051
+ * If this flag is set to true, the tool result was generated by the provider.
1052
+ * If this flag is not set or is false, the tool result was generated by the client.
1053
+ */
1054
+ providerExecuted?: boolean;
1055
+ /**
1056
+ * Additional provider-specific metadata for the tool result.
1057
+ */
1058
+ providerMetadata?: SharedV2ProviderMetadata;
966
1059
  };
967
1060
 
968
- type LanguageModelV2Content = LanguageModelV2Text | LanguageModelV2Reasoning | LanguageModelV2File | LanguageModelV2Source | LanguageModelV2ToolCall;
1061
+ type LanguageModelV2Content = LanguageModelV2Text | LanguageModelV2Reasoning | LanguageModelV2File | LanguageModelV2Source | LanguageModelV2ToolCall | LanguageModelV2ToolResult;
969
1062
 
970
1063
  /**
971
1064
  Reason why a language model finished generating a response.
@@ -996,14 +1089,6 @@ interface LanguageModelV2ResponseMetadata {
996
1089
  modelId?: string;
997
1090
  }
998
1091
 
999
- type LanguageModelV2ToolCallDelta = {
1000
- type: 'tool-call-delta';
1001
- toolCallType: 'function';
1002
- toolCallId: string;
1003
- toolName: string;
1004
- argsTextDelta: string;
1005
- };
1006
-
1007
1092
  /**
1008
1093
  Usage information for a language model call.
1009
1094
 
@@ -1035,9 +1120,48 @@ type LanguageModelV2Usage = {
1035
1120
  cachedInputTokens?: number | undefined;
1036
1121
  };
1037
1122
 
1038
- type LanguageModelV2StreamPart = LanguageModelV2Content | {
1039
- type: 'reasoning-part-finish';
1040
- } | LanguageModelV2ToolCallDelta | {
1123
+ type LanguageModelV2StreamPart = {
1124
+ type: 'text-start';
1125
+ providerMetadata?: SharedV2ProviderMetadata;
1126
+ id: string;
1127
+ } | {
1128
+ type: 'text-delta';
1129
+ id: string;
1130
+ providerMetadata?: SharedV2ProviderMetadata;
1131
+ delta: string;
1132
+ } | {
1133
+ type: 'text-end';
1134
+ providerMetadata?: SharedV2ProviderMetadata;
1135
+ id: string;
1136
+ } | {
1137
+ type: 'reasoning-start';
1138
+ providerMetadata?: SharedV2ProviderMetadata;
1139
+ id: string;
1140
+ } | {
1141
+ type: 'reasoning-delta';
1142
+ id: string;
1143
+ providerMetadata?: SharedV2ProviderMetadata;
1144
+ delta: string;
1145
+ } | {
1146
+ type: 'reasoning-end';
1147
+ id: string;
1148
+ providerMetadata?: SharedV2ProviderMetadata;
1149
+ } | {
1150
+ type: 'tool-input-start';
1151
+ id: string;
1152
+ toolName: string;
1153
+ providerMetadata?: SharedV2ProviderMetadata;
1154
+ providerExecuted?: boolean;
1155
+ } | {
1156
+ type: 'tool-input-delta';
1157
+ id: string;
1158
+ delta: string;
1159
+ providerMetadata?: SharedV2ProviderMetadata;
1160
+ } | {
1161
+ type: 'tool-input-end';
1162
+ id: string;
1163
+ providerMetadata?: SharedV2ProviderMetadata;
1164
+ } | LanguageModelV2ToolCall | LanguageModelV2ToolResult | LanguageModelV2File | LanguageModelV2Source | {
1041
1165
  type: 'stream-start';
1042
1166
  warnings: Array<LanguageModelV2CallWarning>;
1043
1167
  } | ({
@@ -1047,6 +1171,9 @@ type LanguageModelV2StreamPart = LanguageModelV2Content | {
1047
1171
  usage: LanguageModelV2Usage;
1048
1172
  finishReason: LanguageModelV2FinishReason;
1049
1173
  providerMetadata?: SharedV2ProviderMetadata;
1174
+ } | {
1175
+ type: 'raw';
1176
+ rawValue: unknown;
1050
1177
  } | {
1051
1178
  type: 'error';
1052
1179
  error: unknown;
@@ -1174,6 +1301,27 @@ type LanguageModelV2Middleware = {
1174
1301
  * Middleware specification version. Use `v2` for the current version.
1175
1302
  */
1176
1303
  middlewareVersion?: 'v2' | undefined;
1304
+ /**
1305
+ * Override the provider name if desired.
1306
+ * @param options.model - The language model instance.
1307
+ */
1308
+ overrideProvider?: (options: {
1309
+ model: LanguageModelV2;
1310
+ }) => string;
1311
+ /**
1312
+ * Override the model ID if desired.
1313
+ * @param options.model - The language model instance.
1314
+ */
1315
+ overrideModelId?: (options: {
1316
+ model: LanguageModelV2;
1317
+ }) => string;
1318
+ /**
1319
+ * Override the supported URLs if desired.
1320
+ * @param options.model - The language model instance.
1321
+ */
1322
+ overrideSupportedUrls?: (options: {
1323
+ model: LanguageModelV2;
1324
+ }) => PromiseLike<Record<string, RegExp[]>> | Record<string, RegExp[]>;
1177
1325
  /**
1178
1326
  * Transforms the parameters before they are passed to the language model.
1179
1327
  * @param options - Object containing the type of operation and the parameters.
@@ -1184,6 +1332,7 @@ type LanguageModelV2Middleware = {
1184
1332
  transformParams?: (options: {
1185
1333
  type: 'generate' | 'stream';
1186
1334
  params: LanguageModelV2CallOptions;
1335
+ model: LanguageModelV2;
1187
1336
  }) => PromiseLike<LanguageModelV2CallOptions>;
1188
1337
  /**
1189
1338
  * Wraps the generate operation of the language model.
@@ -1220,809 +1369,148 @@ type LanguageModelV2Middleware = {
1220
1369
  }) => PromiseLike<Awaited<ReturnType<LanguageModelV2['doStream']>>>;
1221
1370
  };
1222
1371
 
1223
- /**
1224
- * Additional provider-specific metadata. They are passed through
1225
- * to the provider from the AI SDK and enable provider-specific
1226
- * functionality that can be fully encapsulated in the provider.
1227
- *
1228
- * This enables us to quickly ship provider-specific functionality
1229
- * without affecting the core AI SDK.
1230
- *
1231
- * The outer record is keyed by the provider name, and the inner
1232
- * record is keyed by the provider-specific metadata key.
1233
- *
1234
- * ```ts
1235
- * {
1236
- * "anthropic": {
1237
- * "cacheControl": { "type": "ephemeral" }
1238
- * }
1239
- * }
1240
- * ```
1241
- */
1242
- type LanguageModelV1ProviderMetadata = Record<string, Record<string, JSONValue>>;
1243
-
1244
- /**
1245
- * A source that has been used as input to generate the response.
1246
- */
1247
- type LanguageModelV1Source = {
1372
+ type SpeechModelV2ProviderOptions = Record<string, Record<string, JSONValue>>;
1373
+ type SpeechModelV2CallOptions = {
1248
1374
  /**
1249
- * A URL source. This is return by web search RAG models.
1375
+ * Text to convert to speech.
1250
1376
  */
1251
- sourceType: 'url';
1377
+ text: string;
1252
1378
  /**
1253
- * The ID of the source.
1379
+ * The voice to use for speech synthesis.
1380
+ * This is provider-specific and may be a voice ID, name, or other identifier.
1254
1381
  */
1255
- id: string;
1382
+ voice?: string;
1256
1383
  /**
1257
- * The URL of the source.
1384
+ * The desired output format for the audio e.g. "mp3", "wav", etc.
1258
1385
  */
1259
- url: string;
1386
+ outputFormat?: string;
1260
1387
  /**
1261
- * The title of the source.
1388
+ * Instructions for the speech generation e.g. "Speak in a slow and steady tone".
1262
1389
  */
1263
- title?: string;
1390
+ instructions?: string;
1264
1391
  /**
1265
- * Additional provider metadata for the source.
1392
+ * The speed of the speech generation.
1266
1393
  */
1267
- providerMetadata?: LanguageModelV1ProviderMetadata;
1268
- };
1269
-
1270
- type LanguageModelV1CallSettings = {
1394
+ speed?: number;
1271
1395
  /**
1272
- Maximum number of tokens to generate.
1396
+ * The language for speech generation. This should be an ISO 639-1 language code (e.g. "en", "es", "fr")
1397
+ * or "auto" for automatic language detection. Provider support varies.
1273
1398
  */
1274
- maxTokens?: number;
1399
+ language?: string;
1275
1400
  /**
1276
- Temperature setting.
1277
-
1278
- It is recommended to set either `temperature` or `topP`, but not both.
1401
+ * Additional provider-specific options that are passed through to the provider
1402
+ * as body parameters.
1403
+ *
1404
+ * The outer record is keyed by the provider name, and the inner
1405
+ * record is keyed by the provider-specific metadata key.
1406
+ * ```ts
1407
+ * {
1408
+ * "openai": {}
1409
+ * }
1410
+ * ```
1279
1411
  */
1280
- temperature?: number;
1412
+ providerOptions?: SpeechModelV2ProviderOptions;
1281
1413
  /**
1282
- Stop sequences.
1283
- If set, the model will stop generating text when one of the stop sequences is generated.
1284
- Providers may have limits on the number of stop sequences.
1414
+ * Abort signal for cancelling the operation.
1285
1415
  */
1286
- stopSequences?: string[];
1416
+ abortSignal?: AbortSignal;
1287
1417
  /**
1288
- Nucleus sampling.
1289
-
1290
- It is recommended to set either `temperature` or `topP`, but not both.
1418
+ * Additional HTTP headers to be sent with the request.
1419
+ * Only applicable for HTTP-based providers.
1291
1420
  */
1292
- topP?: number;
1421
+ headers?: Record<string, string | undefined>;
1422
+ };
1423
+
1424
+ /**
1425
+ * Warning from the model provider for this call. The call will proceed, but e.g.
1426
+ * some settings might not be supported, which can lead to suboptimal results.
1427
+ */
1428
+ type SpeechModelV2CallWarning = {
1429
+ type: 'unsupported-setting';
1430
+ setting: keyof SpeechModelV2CallOptions;
1431
+ details?: string;
1432
+ } | {
1433
+ type: 'other';
1434
+ message: string;
1435
+ };
1436
+
1437
+ /**
1438
+ * Speech model specification version 2.
1439
+ */
1440
+ type SpeechModelV2 = {
1293
1441
  /**
1294
- Only sample from the top K options for each subsequent token.
1295
-
1296
- Used to remove "long tail" low probability responses.
1297
- Recommended for advanced use cases only. You usually only need to use temperature.
1442
+ * The speech model must specify which speech model interface
1443
+ * version it implements. This will allow us to evolve the speech
1444
+ * model interface and retain backwards compatibility. The different
1445
+ * implementation versions can be handled as a discriminated union
1446
+ * on our side.
1298
1447
  */
1299
- topK?: number;
1448
+ readonly specificationVersion: 'v2';
1300
1449
  /**
1301
- Presence penalty setting. It affects the likelihood of the model to
1302
- repeat information that is already in the prompt.
1450
+ * Name of the provider for logging purposes.
1303
1451
  */
1304
- presencePenalty?: number;
1452
+ readonly provider: string;
1305
1453
  /**
1306
- Frequency penalty setting. It affects the likelihood of the model
1307
- to repeatedly use the same words or phrases.
1454
+ * Provider-specific model ID for logging purposes.
1308
1455
  */
1309
- frequencyPenalty?: number;
1456
+ readonly modelId: string;
1310
1457
  /**
1311
- Response format. The output can either be text or JSON. Default is text.
1312
-
1313
- If JSON is selected, a schema can optionally be provided to guide the LLM.
1458
+ * Generates speech audio from text.
1314
1459
  */
1315
- responseFormat?: {
1316
- type: 'text';
1317
- } | {
1318
- type: 'json';
1460
+ doGenerate(options: SpeechModelV2CallOptions): PromiseLike<{
1319
1461
  /**
1320
- * JSON schema that the generated output should conform to.
1462
+ * Generated audio as an ArrayBuffer.
1463
+ * The audio should be returned without any unnecessary conversion.
1464
+ * If the API returns base64 encoded strings, the audio should be returned
1465
+ * as base64 encoded strings. If the API returns binary data, the audio
1466
+ * should be returned as binary data.
1321
1467
  */
1322
- schema?: JSONSchema7;
1468
+ audio: string | Uint8Array;
1323
1469
  /**
1324
- * Name of output that should be generated. Used by some providers for additional LLM guidance.
1470
+ * Warnings for the call, e.g. unsupported settings.
1325
1471
  */
1326
- name?: string;
1327
- /**
1328
- * Description of the output that should be generated. Used by some providers for additional LLM guidance.
1329
- */
1330
- description?: string;
1331
- };
1332
- /**
1333
- The seed (integer) to use for random sampling. If set and supported
1334
- by the model, calls will generate deterministic results.
1335
- */
1336
- seed?: number;
1337
- /**
1338
- Abort signal for cancelling the operation.
1339
- */
1340
- abortSignal?: AbortSignal;
1341
- /**
1342
- Additional HTTP headers to be sent with the request.
1343
- Only applicable for HTTP-based providers.
1344
- */
1345
- headers?: Record<string, string | undefined>;
1346
- };
1347
-
1348
- /**
1349
- A tool has a name, a description, and a set of parameters.
1350
-
1351
- Note: this is **not** the user-facing tool definition. The AI SDK methods will
1352
- map the user-facing tool definitions to this format.
1353
- */
1354
- type LanguageModelV1FunctionTool = {
1355
- /**
1356
- The type of the tool (always 'function').
1357
- */
1358
- type: 'function';
1359
- /**
1360
- The name of the tool. Unique within this model call.
1361
- */
1362
- name: string;
1363
- /**
1364
- A description of the tool. The language model uses this to understand the
1365
- tool's purpose and to provide better completion suggestions.
1366
- */
1367
- description?: string;
1368
- /**
1369
- The parameters that the tool expects. The language model uses this to
1370
- understand the tool's input requirements and to provide matching suggestions.
1371
- */
1372
- parameters: JSONSchema7;
1373
- };
1374
-
1375
- /**
1376
- A prompt is a list of messages.
1377
-
1378
- Note: Not all models and prompt formats support multi-modal inputs and
1379
- tool calls. The validation happens at runtime.
1380
-
1381
- Note: This is not a user-facing prompt. The AI SDK methods will map the
1382
- user-facing prompt types such as chat or instruction prompts to this format.
1383
- */
1384
- type LanguageModelV1Prompt = Array<LanguageModelV1Message>;
1385
- type LanguageModelV1Message = ({
1386
- role: 'system';
1387
- content: string;
1388
- } | {
1389
- role: 'user';
1390
- content: Array<LanguageModelV1TextPart | LanguageModelV1ImagePart | LanguageModelV1FilePart>;
1391
- } | {
1392
- role: 'assistant';
1393
- content: Array<LanguageModelV1TextPart | LanguageModelV1FilePart | LanguageModelV1ReasoningPart | LanguageModelV1RedactedReasoningPart | LanguageModelV1ToolCallPart>;
1394
- } | {
1395
- role: 'tool';
1396
- content: Array<LanguageModelV1ToolResultPart>;
1397
- }) & {
1398
- /**
1399
- * Additional provider-specific metadata. They are passed through
1400
- * to the provider from the AI SDK and enable provider-specific
1401
- * functionality that can be fully encapsulated in the provider.
1402
- */
1403
- providerMetadata?: LanguageModelV1ProviderMetadata;
1404
- };
1405
- /**
1406
- Text content part of a prompt. It contains a string of text.
1407
- */
1408
- interface LanguageModelV1TextPart {
1409
- type: 'text';
1410
- /**
1411
- The text content.
1412
- */
1413
- text: string;
1414
- /**
1415
- * Additional provider-specific metadata. They are passed through
1416
- * to the provider from the AI SDK and enable provider-specific
1417
- * functionality that can be fully encapsulated in the provider.
1418
- */
1419
- providerMetadata?: LanguageModelV1ProviderMetadata;
1420
- }
1421
- /**
1422
- Reasoning content part of a prompt. It contains a string of reasoning text.
1423
- */
1424
- interface LanguageModelV1ReasoningPart {
1425
- type: 'reasoning';
1426
- /**
1427
- The reasoning text.
1428
- */
1429
- text: string;
1430
- /**
1431
- An optional signature for verifying that the reasoning originated from the model.
1432
- */
1433
- signature?: string;
1434
- /**
1435
- Additional provider-specific metadata. They are passed through
1436
- to the provider from the AI SDK and enable provider-specific
1437
- functionality that can be fully encapsulated in the provider.
1438
- */
1439
- providerMetadata?: LanguageModelV1ProviderMetadata;
1440
- }
1441
- /**
1442
- Redacted reasoning content part of a prompt.
1443
- */
1444
- interface LanguageModelV1RedactedReasoningPart {
1445
- type: 'redacted-reasoning';
1446
- /**
1447
- Redacted reasoning data.
1448
- */
1449
- data: string;
1450
- /**
1451
- Additional provider-specific metadata. They are passed through
1452
- to the provider from the AI SDK and enable provider-specific
1453
- functionality that can be fully encapsulated in the provider.
1454
- */
1455
- providerMetadata?: LanguageModelV1ProviderMetadata;
1456
- }
1457
- /**
1458
- Image content part of a prompt. It contains an image.
1459
- */
1460
- interface LanguageModelV1ImagePart {
1461
- type: 'image';
1462
- /**
1463
- Image data as a Uint8Array (e.g. from a Blob or Buffer) or a URL.
1464
- */
1465
- image: Uint8Array | URL;
1466
- /**
1467
- Optional mime type of the image.
1468
- */
1469
- mimeType?: string;
1470
- /**
1471
- * Additional provider-specific metadata. They are passed through
1472
- * to the provider from the AI SDK and enable provider-specific
1473
- * functionality that can be fully encapsulated in the provider.
1474
- */
1475
- providerMetadata?: LanguageModelV1ProviderMetadata;
1476
- }
1477
- /**
1478
- File content part of a prompt. It contains a file.
1479
- */
1480
- interface LanguageModelV1FilePart {
1481
- type: 'file';
1482
- /**
1483
- * Optional filename of the file.
1484
- */
1485
- filename?: string;
1486
- /**
1487
- File data as base64 encoded string or as a URL.
1488
- */
1489
- data: string | URL;
1490
- /**
1491
- Mime type of the file.
1492
- */
1493
- mimeType: string;
1494
- /**
1495
- * Additional provider-specific metadata. They are passed through
1496
- * to the provider from the AI SDK and enable provider-specific
1497
- * functionality that can be fully encapsulated in the provider.
1498
- */
1499
- providerMetadata?: LanguageModelV1ProviderMetadata;
1500
- }
1501
- /**
1502
- Tool call content part of a prompt. It contains a tool call (usually generated by the AI model).
1503
- */
1504
- interface LanguageModelV1ToolCallPart {
1505
- type: 'tool-call';
1506
- /**
1507
- ID of the tool call. This ID is used to match the tool call with the tool result.
1508
- */
1509
- toolCallId: string;
1510
- /**
1511
- Name of the tool that is being called.
1512
- */
1513
- toolName: string;
1514
- /**
1515
- Arguments of the tool call. This is a JSON-serializable object that matches the tool's input schema.
1516
- */
1517
- args: unknown;
1518
- /**
1519
- * Additional provider-specific metadata. They are passed through
1520
- * to the provider from the AI SDK and enable provider-specific
1521
- * functionality that can be fully encapsulated in the provider.
1522
- */
1523
- providerMetadata?: LanguageModelV1ProviderMetadata;
1524
- }
1525
- /**
1526
- Tool result content part of a prompt. It contains the result of the tool call with the matching ID.
1527
- */
1528
- interface LanguageModelV1ToolResultPart {
1529
- type: 'tool-result';
1530
- /**
1531
- ID of the tool call that this result is associated with.
1532
- */
1533
- toolCallId: string;
1534
- /**
1535
- Name of the tool that generated this result.
1536
- */
1537
- toolName: string;
1538
- /**
1539
- Result of the tool call. This is a JSON-serializable object.
1540
- */
1541
- result: unknown;
1542
- /**
1543
- Optional flag if the result is an error or an error message.
1544
- */
1545
- isError?: boolean;
1546
- /**
1547
- Tool results as an array of parts. This enables advanced tool results including images.
1548
- When this is used, the `result` field should be ignored (if the provider supports content).
1549
- */
1550
- content?: Array<{
1551
- type: 'text';
1552
- /**
1553
- Text content.
1554
- */
1555
- text: string;
1556
- } | {
1557
- type: 'image';
1472
+ warnings: Array<SpeechModelV2CallWarning>;
1558
1473
  /**
1559
- base-64 encoded image data
1560
- */
1561
- data: string;
1562
- /**
1563
- Mime type of the image.
1564
- */
1565
- mimeType?: string;
1566
- }>;
1567
- /**
1568
- * Additional provider-specific metadata. They are passed through
1569
- * to the provider from the AI SDK and enable provider-specific
1570
- * functionality that can be fully encapsulated in the provider.
1571
- */
1572
- providerMetadata?: LanguageModelV1ProviderMetadata;
1573
- }
1574
-
1575
- /**
1576
- The configuration of a tool that is defined by the provider.
1577
- */
1578
- type LanguageModelV1ProviderDefinedTool = {
1579
- /**
1580
- The type of the tool (always 'provider-defined').
1581
- */
1582
- type: 'provider-defined';
1583
- /**
1584
- The ID of the tool. Should follow the format `<provider-name>.<tool-name>`.
1585
- */
1586
- id: `${string}.${string}`;
1587
- /**
1588
- The name of the tool. Unique within this model call.
1589
- */
1590
- name: string;
1591
- /**
1592
- The arguments for configuring the tool. Must match the expected arguments defined by the provider for this tool.
1593
- */
1594
- args: Record<string, unknown>;
1595
- };
1596
-
1597
- type LanguageModelV1ToolChoice = {
1598
- type: 'auto';
1599
- } | {
1600
- type: 'none';
1601
- } | {
1602
- type: 'required';
1603
- } | {
1604
- type: 'tool';
1605
- toolName: string;
1606
- };
1607
-
1608
- type LanguageModelV1CallOptions = LanguageModelV1CallSettings & {
1609
- /**
1610
- Whether the user provided the input as messages or as
1611
- a prompt. This can help guide non-chat models in the
1612
- expansion, bc different expansions can be needed for
1613
- chat/non-chat use cases.
1614
- */
1615
- inputFormat: 'messages' | 'prompt';
1616
- /**
1617
- The mode affects the behavior of the language model. It is required to
1618
- support provider-independent streaming and generation of structured objects.
1619
- The model can take this information and e.g. configure json mode, the correct
1620
- low level grammar, etc. It can also be used to optimize the efficiency of the
1621
- streaming, e.g. tool-delta stream parts are only needed in the
1622
- object-tool mode.
1623
-
1624
- @deprecated mode will be removed in v2.
1625
- All necessary settings will be directly supported through the call settings,
1626
- in particular responseFormat, toolChoice, and tools.
1627
- */
1628
- mode: {
1629
- type: 'regular';
1630
- /**
1631
- The tools that are available for the model.
1632
- */
1633
- tools?: Array<LanguageModelV1FunctionTool | LanguageModelV1ProviderDefinedTool>;
1634
- /**
1635
- Specifies how the tool should be selected. Defaults to 'auto'.
1636
- */
1637
- toolChoice?: LanguageModelV1ToolChoice;
1638
- } | {
1639
- type: 'object-json';
1640
- /**
1641
- * JSON schema that the generated output should conform to.
1642
- */
1643
- schema?: JSONSchema7;
1644
- /**
1645
- * Name of output that should be generated. Used by some providers for additional LLM guidance.
1646
- */
1647
- name?: string;
1648
- /**
1649
- * Description of the output that should be generated. Used by some providers for additional LLM guidance.
1650
- */
1651
- description?: string;
1652
- } | {
1653
- type: 'object-tool';
1654
- tool: LanguageModelV1FunctionTool;
1655
- };
1656
- /**
1657
- A language mode prompt is a standardized prompt type.
1658
-
1659
- Note: This is **not** the user-facing prompt. The AI SDK methods will map the
1660
- user-facing prompt types such as chat or instruction prompts to this format.
1661
- That approach allows us to evolve the user facing prompts without breaking
1662
- the language model interface.
1663
- */
1664
- prompt: LanguageModelV1Prompt;
1665
- /**
1666
- Additional provider-specific metadata.
1667
- The metadata is passed through to the provider from the AI SDK and enables
1668
- provider-specific functionality that can be fully encapsulated in the provider.
1669
- */
1670
- providerMetadata?: LanguageModelV1ProviderMetadata;
1671
- };
1672
-
1673
- /**
1674
- Warning from the model provider for this call. The call will proceed, but e.g.
1675
- some settings might not be supported, which can lead to suboptimal results.
1676
- */
1677
- type LanguageModelV1CallWarning = {
1678
- type: 'unsupported-setting';
1679
- setting: keyof LanguageModelV1CallSettings;
1680
- details?: string;
1681
- } | {
1682
- type: 'unsupported-tool';
1683
- tool: LanguageModelV1FunctionTool | LanguageModelV1ProviderDefinedTool;
1684
- details?: string;
1685
- } | {
1686
- type: 'other';
1687
- message: string;
1688
- };
1689
-
1690
- /**
1691
- Reason why a language model finished generating a response.
1692
-
1693
- Can be one of the following:
1694
- - `stop`: model generated stop sequence
1695
- - `length`: model generated maximum number of tokens
1696
- - `content-filter`: content filter violation stopped the model
1697
- - `tool-calls`: model triggered tool calls
1698
- - `error`: model stopped because of an error
1699
- - `other`: model stopped for other reasons
1700
- - `unknown`: the model has not transmitted a finish reason
1701
- */
1702
- type LanguageModelV1FinishReason = 'stop' | 'length' | 'content-filter' | 'tool-calls' | 'error' | 'other' | 'unknown';
1703
-
1704
- type LanguageModelV1FunctionToolCall = {
1705
- toolCallType: 'function';
1706
- toolCallId: string;
1707
- toolName: string;
1708
- /**
1709
- Stringified JSON object with the tool call arguments. Must match the
1710
- parameters schema of the tool.
1711
- */
1712
- args: string;
1713
- };
1714
-
1715
- /**
1716
- Log probabilities for each token and its top log probabilities.
1717
- */
1718
- type LanguageModelV1LogProbs = Array<{
1719
- token: string;
1720
- logprob: number;
1721
- topLogprobs: Array<{
1722
- token: string;
1723
- logprob: number;
1724
- }>;
1725
- }>;
1726
-
1727
- /**
1728
- Specification for a language model that implements the language model interface version 1.
1729
- */
1730
- type LanguageModelV1 = {
1731
- /**
1732
- The language model must specify which language model interface
1733
- version it implements. This will allow us to evolve the language
1734
- model interface and retain backwards compatibility. The different
1735
- implementation versions can be handled as a discriminated union
1736
- on our side.
1737
- */
1738
- readonly specificationVersion: 'v1';
1739
- /**
1740
- Name of the provider for logging purposes.
1741
- */
1742
- readonly provider: string;
1743
- /**
1744
- Provider-specific model ID for logging purposes.
1745
- */
1746
- readonly modelId: string;
1747
- /**
1748
- Default object generation mode that should be used with this model when
1749
- no mode is specified. Should be the mode with the best results for this
1750
- model. `undefined` can be returned if object generation is not supported.
1751
-
1752
- This is needed to generate the best objects possible w/o requiring the
1753
- user to explicitly specify the object generation mode.
1754
- */
1755
- readonly defaultObjectGenerationMode: LanguageModelV1ObjectGenerationMode;
1756
- /**
1757
- Flag whether this model supports image URLs. Default is `true`.
1758
-
1759
- When the flag is set to `false`, the AI SDK will download the image and
1760
- pass the image data to the model.
1761
- */
1762
- readonly supportsImageUrls?: boolean;
1763
- /**
1764
- Flag whether this model supports grammar-guided generation,
1765
- i.e. follows JSON schemas for object generation
1766
- when the response format is set to 'json' or
1767
- when the `object-json` mode is used.
1768
-
1769
- This means that the model guarantees that the generated JSON
1770
- will be a valid JSON object AND that the object will match the
1771
- JSON schema.
1772
-
1773
- Please note that `generateObject` and `streamObject` will work
1774
- regardless of this flag, but might send different prompts and
1775
- use further optimizations if this flag is set to `true`.
1776
-
1777
- Defaults to `false`.
1778
- */
1779
- readonly supportsStructuredOutputs?: boolean;
1780
- /**
1781
- Checks if the model supports the given URL for file parts natively.
1782
- If the model does not support the URL,
1783
- the AI SDK will download the file and pass the file data to the model.
1784
-
1785
- When undefined, the AI SDK will download the file.
1786
- */
1787
- supportsUrl?(url: URL): boolean;
1788
- /**
1789
- Generates a language model output (non-streaming).
1790
-
1791
- Naming: "do" prefix to prevent accidental direct usage of the method
1792
- by the user.
1793
- */
1794
- doGenerate(options: LanguageModelV1CallOptions): PromiseLike<{
1795
- /**
1796
- Text that the model has generated.
1797
- Can be undefined if the model did not generate any text.
1798
- */
1799
- text?: string;
1800
- /**
1801
- Reasoning that the model has generated.
1802
- Can be undefined if the model does not support reasoning.
1803
- */
1804
- reasoning?: string | Array<{
1805
- type: 'text';
1806
- text: string;
1807
- /**
1808
- An optional signature for verifying that the reasoning originated from the model.
1809
- */
1810
- signature?: string;
1811
- } | {
1812
- type: 'redacted';
1813
- data: string;
1814
- }>;
1815
- /**
1816
- Generated files as base64 encoded strings or binary data.
1817
- The files should be returned without any unnecessary conversion.
1818
- If the API returns base64 encoded strings, the files should be returned
1819
- as base64 encoded strings. If the API returns binary data, the files should
1820
- be returned as binary data.
1821
- */
1822
- files?: Array<{
1823
- data: string | Uint8Array;
1824
- mimeType: string;
1825
- }>;
1826
- /**
1827
- Tool calls that the model has generated.
1828
- Can be undefined if the model did not generate any tool calls.
1829
- */
1830
- toolCalls?: Array<LanguageModelV1FunctionToolCall>;
1831
- /**
1832
- Finish reason.
1833
- */
1834
- finishReason: LanguageModelV1FinishReason;
1835
- /**
1836
- Usage information.
1837
- */
1838
- usage: {
1839
- promptTokens: number;
1840
- completionTokens: number;
1841
- };
1842
- /**
1843
- Raw prompt and setting information for observability provider integration.
1844
- */
1845
- rawCall: {
1846
- /**
1847
- Raw prompt after expansion and conversion to the format that the
1848
- provider uses to send the information to their API.
1849
- */
1850
- rawPrompt: unknown;
1851
- /**
1852
- Raw settings that are used for the API call. Includes provider-specific
1853
- settings.
1854
- */
1855
- rawSettings: Record<string, unknown>;
1856
- };
1857
- /**
1858
- Optional response information for telemetry and debugging purposes.
1859
- */
1860
- rawResponse?: {
1861
- /**
1862
- Response headers.
1863
- */
1864
- headers?: Record<string, string>;
1865
- /**
1866
- Response body.
1867
- */
1868
- body?: unknown;
1869
- };
1870
- /**
1871
- Optional request information for telemetry and debugging purposes.
1872
- */
1873
- request?: {
1874
- /**
1875
- Raw request HTTP body that was sent to the provider API as a string (JSON should be stringified).
1876
- Non-HTTP(s) providers should not set this.
1877
- */
1878
- body?: string;
1879
- };
1880
- /**
1881
- Optional response information for telemetry and debugging purposes.
1474
+ * Optional request information for telemetry and debugging purposes.
1882
1475
  */
1883
- response?: {
1884
- /**
1885
- ID for the generated response, if the provider sends one.
1886
- */
1887
- id?: string;
1888
- /**
1889
- Timestamp for the start of the generated response, if the provider sends one.
1890
- */
1891
- timestamp?: Date;
1476
+ request?: {
1892
1477
  /**
1893
- The ID of the response model that was used to generate the response, if the provider sends one.
1894
- */
1895
- modelId?: string;
1478
+ * Response body (available only for providers that use HTTP requests).
1479
+ */
1480
+ body?: unknown;
1896
1481
  };
1897
- warnings?: LanguageModelV1CallWarning[];
1898
- /**
1899
- Additional provider-specific metadata. They are passed through
1900
- from the provider to the AI SDK and enable provider-specific
1901
- results that can be fully encapsulated in the provider.
1902
- */
1903
- providerMetadata?: LanguageModelV1ProviderMetadata;
1904
- /**
1905
- Sources that have been used as input to generate the response.
1906
- */
1907
- sources?: LanguageModelV1Source[];
1908
- /**
1909
- Logprobs for the completion.
1910
- `undefined` if the mode does not support logprobs or if was not enabled
1911
-
1912
- @deprecated will be changed into a provider-specific extension in v2
1913
- */
1914
- logprobs?: LanguageModelV1LogProbs;
1915
- }>;
1916
- /**
1917
- Generates a language model output (streaming).
1918
-
1919
- Naming: "do" prefix to prevent accidental direct usage of the method
1920
- by the user.
1921
- *
1922
- @return A stream of higher-level language model output parts.
1923
- */
1924
- doStream(options: LanguageModelV1CallOptions): PromiseLike<{
1925
- stream: ReadableStream<LanguageModelV1StreamPart>;
1926
1482
  /**
1927
- Raw prompt and setting information for observability provider integration.
1483
+ * Response information for telemetry and debugging purposes.
1928
1484
  */
1929
- rawCall: {
1485
+ response: {
1930
1486
  /**
1931
- Raw prompt after expansion and conversion to the format that the
1932
- provider uses to send the information to their API.
1487
+ * Timestamp for the start of the generated response.
1933
1488
  */
1934
- rawPrompt: unknown;
1489
+ timestamp: Date;
1935
1490
  /**
1936
- Raw settings that are used for the API call. Includes provider-specific
1937
- settings.
1491
+ * The ID of the response model that was used to generate the response.
1938
1492
  */
1939
- rawSettings: Record<string, unknown>;
1940
- };
1941
- /**
1942
- Optional raw response data.
1943
- */
1944
- rawResponse?: {
1493
+ modelId: string;
1945
1494
  /**
1946
- Response headers.
1495
+ * Response headers.
1947
1496
  */
1948
- headers?: Record<string, string>;
1949
- };
1950
- /**
1951
- Optional request information for telemetry and debugging purposes.
1952
- */
1953
- request?: {
1497
+ headers?: SharedV2Headers;
1954
1498
  /**
1955
- Raw request HTTP body that was sent to the provider API as a string (JSON should be stringified).
1956
- Non-HTTP(s) providers should not set this.
1957
- */
1958
- body?: string;
1499
+ * Response body.
1500
+ */
1501
+ body?: unknown;
1959
1502
  };
1960
1503
  /**
1961
- Warnings for the call, e.g. unsupported settings.
1504
+ * Additional provider-specific metadata. They are passed through
1505
+ * from the provider to the AI SDK and enable provider-specific
1506
+ * results that can be fully encapsulated in the provider.
1962
1507
  */
1963
- warnings?: Array<LanguageModelV1CallWarning>;
1508
+ providerMetadata?: Record<string, Record<string, JSONValue>>;
1964
1509
  }>;
1965
1510
  };
1966
- type LanguageModelV1StreamPart = {
1967
- type: 'text-delta';
1968
- textDelta: string;
1969
- } | {
1970
- type: 'reasoning';
1971
- textDelta: string;
1972
- } | {
1973
- type: 'reasoning-signature';
1974
- signature: string;
1975
- } | {
1976
- type: 'redacted-reasoning';
1977
- data: string;
1978
- } | {
1979
- type: 'source';
1980
- source: LanguageModelV1Source;
1981
- } | {
1982
- type: 'file';
1983
- mimeType: string;
1984
- /**
1985
- Generated file data as base64 encoded strings or binary data.
1986
- The file data should be returned without any unnecessary conversion.
1987
- If the API returns base64 encoded strings, the file data should be returned
1988
- as base64 encoded strings. If the API returns binary data, the file data should
1989
- be returned as binary data.
1990
- */
1991
- data: string | Uint8Array;
1992
- } | ({
1993
- type: 'tool-call';
1994
- } & LanguageModelV1FunctionToolCall) | {
1995
- type: 'tool-call-delta';
1996
- toolCallType: 'function';
1997
- toolCallId: string;
1998
- toolName: string;
1999
- argsTextDelta: string;
2000
- } | {
2001
- type: 'response-metadata';
2002
- id?: string;
2003
- timestamp?: Date;
2004
- modelId?: string;
2005
- } | {
2006
- type: 'finish';
2007
- finishReason: LanguageModelV1FinishReason;
2008
- providerMetadata?: LanguageModelV1ProviderMetadata;
2009
- usage: {
2010
- promptTokens: number;
2011
- completionTokens: number;
2012
- };
2013
- logprobs?: LanguageModelV1LogProbs;
2014
- } | {
2015
- type: 'error';
2016
- error: unknown;
2017
- };
2018
- /**
2019
- The object generation modes available for use with a model. `undefined`
2020
- represents no support for object generation.
2021
- */
2022
- type LanguageModelV1ObjectGenerationMode = 'json' | 'tool' | undefined;
2023
1511
 
2024
- type TranscriptionModelV1ProviderOptions = Record<string, Record<string, JSONValue>>;
2025
- type TranscriptionModelV1CallOptions = {
1512
+ type TranscriptionModelV2ProviderOptions = Record<string, Record<string, JSONValue>>;
1513
+ type TranscriptionModelV2CallOptions = {
2026
1514
  /**
2027
1515
  Audio data to transcribe.
2028
1516
  Accepts a `Uint8Array` or `string`, where `string` is a base64 encoded audio file.
@@ -2048,7 +1536,7 @@ type TranscriptionModelV1CallOptions = {
2048
1536
  }
2049
1537
  ```
2050
1538
  */
2051
- providerOptions?: TranscriptionModelV1ProviderOptions;
1539
+ providerOptions?: TranscriptionModelV2ProviderOptions;
2052
1540
  /**
2053
1541
  Abort signal for cancelling the operation.
2054
1542
  */
@@ -2064,9 +1552,9 @@ type TranscriptionModelV1CallOptions = {
2064
1552
  Warning from the model provider for this call. The call will proceed, but e.g.
2065
1553
  some settings might not be supported, which can lead to suboptimal results.
2066
1554
  */
2067
- type TranscriptionModelV1CallWarning = {
1555
+ type TranscriptionModelV2CallWarning = {
2068
1556
  type: 'unsupported-setting';
2069
- setting: keyof TranscriptionModelV1CallOptions;
1557
+ setting: keyof TranscriptionModelV2CallOptions;
2070
1558
  details?: string;
2071
1559
  } | {
2072
1560
  type: 'other';
@@ -2074,9 +1562,9 @@ type TranscriptionModelV1CallWarning = {
2074
1562
  };
2075
1563
 
2076
1564
  /**
2077
- Transcription model specification version 1.
1565
+ Transcription model specification version 2.
2078
1566
  */
2079
- type TranscriptionModelV1 = {
1567
+ type TranscriptionModelV2 = {
2080
1568
  /**
2081
1569
  The transcription model must specify which transcription model interface
2082
1570
  version it implements. This will allow us to evolve the transcription
@@ -2084,7 +1572,7 @@ type TranscriptionModelV1 = {
2084
1572
  implementation versions can be handled as a discriminated union
2085
1573
  on our side.
2086
1574
  */
2087
- readonly specificationVersion: 'v1';
1575
+ readonly specificationVersion: 'v2';
2088
1576
  /**
2089
1577
  Name of the provider for logging purposes.
2090
1578
  */
@@ -2096,7 +1584,7 @@ type TranscriptionModelV1 = {
2096
1584
  /**
2097
1585
  Generates a transcript.
2098
1586
  */
2099
- doGenerate(options: TranscriptionModelV1CallOptions): PromiseLike<{
1587
+ doGenerate(options: TranscriptionModelV2CallOptions): PromiseLike<{
2100
1588
  /**
2101
1589
  * The complete transcribed text from the audio.
2102
1590
  */
@@ -2132,7 +1620,7 @@ type TranscriptionModelV1 = {
2132
1620
  /**
2133
1621
  Warnings for the call, e.g. unsupported settings.
2134
1622
  */
2135
- warnings: Array<TranscriptionModelV1CallWarning>;
1623
+ warnings: Array<TranscriptionModelV2CallWarning>;
2136
1624
  /**
2137
1625
  Optional request information for telemetry and debugging purposes.
2138
1626
  */
@@ -2173,145 +1661,10 @@ type TranscriptionModelV1 = {
2173
1661
  }>;
2174
1662
  };
2175
1663
 
2176
- type SpeechModelV1ProviderOptions = Record<string, Record<string, JSONValue>>;
2177
- type SpeechModelV1CallOptions = {
2178
- /**
2179
- * Text to convert to speech.
2180
- */
2181
- text: string;
2182
- /**
2183
- * The voice to use for speech synthesis.
2184
- * This is provider-specific and may be a voice ID, name, or other identifier.
2185
- */
2186
- voice?: string;
2187
- /**
2188
- * The desired output format for the audio e.g. "mp3", "wav", etc.
2189
- */
2190
- outputFormat?: string;
2191
- /**
2192
- * Instructions for the speech generation e.g. "Speak in a slow and steady tone".
2193
- */
2194
- instructions?: string;
2195
- /**
2196
- * The speed of the speech generation.
2197
- */
2198
- speed?: number;
2199
- /**
2200
- * Additional provider-specific options that are passed through to the provider
2201
- * as body parameters.
2202
- *
2203
- * The outer record is keyed by the provider name, and the inner
2204
- * record is keyed by the provider-specific metadata key.
2205
- * ```ts
2206
- * {
2207
- * "openai": {}
2208
- * }
2209
- * ```
2210
- */
2211
- providerOptions?: SpeechModelV1ProviderOptions;
2212
- /**
2213
- * Abort signal for cancelling the operation.
2214
- */
2215
- abortSignal?: AbortSignal;
2216
- /**
2217
- * Additional HTTP headers to be sent with the request.
2218
- * Only applicable for HTTP-based providers.
2219
- */
2220
- headers?: Record<string, string | undefined>;
2221
- };
2222
-
2223
- /**
2224
- * Warning from the model provider for this call. The call will proceed, but e.g.
2225
- * some settings might not be supported, which can lead to suboptimal results.
2226
- */
2227
- type SpeechModelV1CallWarning = {
2228
- type: 'unsupported-setting';
2229
- setting: keyof SpeechModelV1CallOptions;
2230
- details?: string;
2231
- } | {
2232
- type: 'other';
2233
- message: string;
2234
- };
2235
-
2236
- /**
2237
- * Speech model specification version 1.
2238
- */
2239
- type SpeechModelV1 = {
2240
- /**
2241
- * The speech model must specify which speech model interface
2242
- * version it implements. This will allow us to evolve the speech
2243
- * model interface and retain backwards compatibility. The different
2244
- * implementation versions can be handled as a discriminated union
2245
- * on our side.
2246
- */
2247
- readonly specificationVersion: 'v1';
2248
- /**
2249
- * Name of the provider for logging purposes.
2250
- */
2251
- readonly provider: string;
2252
- /**
2253
- * Provider-specific model ID for logging purposes.
2254
- */
2255
- readonly modelId: string;
2256
- /**
2257
- * Generates speech audio from text.
2258
- */
2259
- doGenerate(options: SpeechModelV1CallOptions): PromiseLike<{
2260
- /**
2261
- * Generated audio as an ArrayBuffer.
2262
- * The audio should be returned without any unnecessary conversion.
2263
- * If the API returns base64 encoded strings, the audio should be returned
2264
- * as base64 encoded strings. If the API returns binary data, the audio
2265
- * should be returned as binary data.
2266
- */
2267
- audio: string | Uint8Array;
2268
- /**
2269
- * Warnings for the call, e.g. unsupported settings.
2270
- */
2271
- warnings: Array<SpeechModelV1CallWarning>;
2272
- /**
2273
- * Optional request information for telemetry and debugging purposes.
2274
- */
2275
- request?: {
2276
- /**
2277
- * Response body (available only for providers that use HTTP requests).
2278
- */
2279
- body?: unknown;
2280
- };
2281
- /**
2282
- * Response information for telemetry and debugging purposes.
2283
- */
2284
- response: {
2285
- /**
2286
- * Timestamp for the start of the generated response.
2287
- */
2288
- timestamp: Date;
2289
- /**
2290
- * The ID of the response model that was used to generate the response.
2291
- */
2292
- modelId: string;
2293
- /**
2294
- * Response headers.
2295
- */
2296
- headers?: SharedV2Headers;
2297
- /**
2298
- * Response body.
2299
- */
2300
- body?: unknown;
2301
- };
2302
- /**
2303
- * Additional provider-specific metadata. They are passed through
2304
- * from the provider to the AI SDK and enable provider-specific
2305
- * results that can be fully encapsulated in the provider.
2306
- */
2307
- providerMetadata?: Record<string, Record<string, JSONValue>>;
2308
- }>;
2309
- };
2310
-
2311
1664
  /**
2312
1665
  * Provider for language, text embedding, and image generation models.
2313
1666
  */
2314
- interface ProviderV1 {
1667
+ interface ProviderV2 {
2315
1668
  /**
2316
1669
  Returns the language model with the given id.
2317
1670
  The model id is then passed to the provider function to get the model.
@@ -2322,7 +1675,7 @@ interface ProviderV1 {
2322
1675
 
2323
1676
  @throws {NoSuchModelError} If no such model exists.
2324
1677
  */
2325
- languageModel(modelId: string): LanguageModelV1;
1678
+ languageModel(modelId: string): LanguageModelV2;
2326
1679
  /**
2327
1680
  Returns the text embedding model with the given id.
2328
1681
  The model id is then passed to the provider function to get the model.
@@ -2342,7 +1695,7 @@ interface ProviderV1 {
2342
1695
 
2343
1696
  @returns {ImageModel} The image model associated with the id
2344
1697
  */
2345
- readonly imageModel?: (modelId: string) => ImageModelV2;
1698
+ imageModel(modelId: string): ImageModelV2;
2346
1699
  /**
2347
1700
  Returns the transcription model with the given id.
2348
1701
  The model id is then passed to the provider function to get the model.
@@ -2350,8 +1703,8 @@ interface ProviderV1 {
2350
1703
  @param {string} modelId - The id of the model to return.
2351
1704
 
2352
1705
  @returns {TranscriptionModel} The transcription model associated with the id
2353
- */
2354
- readonly transcriptionModel?: (modelId: string) => TranscriptionModelV1;
1706
+ */
1707
+ transcriptionModel?(modelId: string): TranscriptionModelV2;
2355
1708
  /**
2356
1709
  Returns the speech model with the given id.
2357
1710
  The model id is then passed to the provider function to get the model.
@@ -2359,45 +1712,8 @@ interface ProviderV1 {
2359
1712
  @param {string} modelId - The id of the model to return.
2360
1713
 
2361
1714
  @returns {SpeechModel} The speech model associated with the id
2362
- */
2363
- readonly speechModel?: (modelId: string) => SpeechModelV1;
2364
- }
2365
-
2366
- /**
2367
- * Provider for language, text embedding, and image generation models.
2368
- */
2369
- interface ProviderV2 {
2370
- /**
2371
- Returns the language model with the given id.
2372
- The model id is then passed to the provider function to get the model.
2373
-
2374
- @param {string} modelId - The id of the model to return.
2375
-
2376
- @returns {LanguageModel} The language model associated with the id
2377
-
2378
- @throws {NoSuchModelError} If no such model exists.
2379
- */
2380
- languageModel(modelId: string): LanguageModelV2;
2381
- /**
2382
- Returns the text embedding model with the given id.
2383
- The model id is then passed to the provider function to get the model.
2384
-
2385
- @param {string} modelId - The id of the model to return.
2386
-
2387
- @returns {LanguageModel} The language model associated with the id
2388
-
2389
- @throws {NoSuchModelError} If no such model exists.
2390
- */
2391
- textEmbeddingModel(modelId: string): EmbeddingModelV2<string>;
2392
- /**
2393
- Returns the image model with the given id.
2394
- The model id is then passed to the provider function to get the model.
2395
-
2396
- @param {string} modelId - The id of the model to return.
2397
-
2398
- @returns {ImageModel} The image model associated with the id
2399
- */
2400
- imageModel(modelId: string): ImageModelV2;
1715
+ */
1716
+ speechModel?(modelId: string): SpeechModelV2;
2401
1717
  }
2402
1718
 
2403
- export { AISDKError, APICallError, type EmbeddingModelV2, type EmbeddingModelV2Embedding, EmptyResponseBodyError, type ImageModelV2, type ImageModelV2CallOptions, type ImageModelV2CallWarning, type ImageModelV2ProviderMetadata, InvalidArgumentError, InvalidPromptError, InvalidResponseDataError, type JSONArray, type JSONObject, JSONParseError, type JSONValue, type LanguageModelV1, type LanguageModelV1CallOptions, type LanguageModelV1CallWarning, type LanguageModelV1FilePart, type LanguageModelV1FinishReason, type LanguageModelV1FunctionTool, type LanguageModelV1FunctionToolCall, type LanguageModelV1ImagePart, type LanguageModelV1LogProbs, type LanguageModelV1Message, type LanguageModelV1ObjectGenerationMode, type LanguageModelV1Prompt, type LanguageModelV1ProviderDefinedTool, type LanguageModelV1ProviderMetadata, type LanguageModelV1ReasoningPart, type LanguageModelV1RedactedReasoningPart, type LanguageModelV1Source, type LanguageModelV1StreamPart, type LanguageModelV1TextPart, type LanguageModelV1ToolCallPart, type LanguageModelV1ToolChoice, type LanguageModelV1ToolResultPart, type LanguageModelV2, type LanguageModelV2CallOptions, type LanguageModelV2CallWarning, type LanguageModelV2Content, type LanguageModelV2DataContent, type LanguageModelV2File, type LanguageModelV2FilePart, type LanguageModelV2FinishReason, type LanguageModelV2FunctionTool, type LanguageModelV2Message, type LanguageModelV2Middleware, type LanguageModelV2Prompt, type LanguageModelV2ProviderDefinedTool, type LanguageModelV2Reasoning, type LanguageModelV2ReasoningPart, type LanguageModelV2ResponseMetadata, type LanguageModelV2Source, type LanguageModelV2StreamPart, type LanguageModelV2Text, type LanguageModelV2TextPart, type LanguageModelV2ToolCall, type LanguageModelV2ToolCallDelta, type LanguageModelV2ToolCallPart, type LanguageModelV2ToolChoice, type LanguageModelV2ToolResultPart, type LanguageModelV2Usage, LoadAPIKeyError, LoadSettingError, NoContentGeneratedError, NoSuchModelError, type ProviderV1, type ProviderV2, type SharedV2Headers, type SharedV2ProviderMetadata, type SharedV2ProviderOptions, type SpeechModelV1, type SpeechModelV1CallOptions, type SpeechModelV1CallWarning, TooManyEmbeddingValuesForCallError, type TranscriptionModelV1, type TranscriptionModelV1CallOptions, type TranscriptionModelV1CallWarning, TypeValidationError, UnsupportedFunctionalityError, getErrorMessage, isJSONArray, isJSONObject, isJSONValue };
1719
+ export { AISDKError, APICallError, type EmbeddingModelV2, type EmbeddingModelV2Embedding, EmptyResponseBodyError, type ImageModelV2, type ImageModelV2CallOptions, type ImageModelV2CallWarning, type ImageModelV2ProviderMetadata, InvalidArgumentError, InvalidPromptError, InvalidResponseDataError, type JSONArray, type JSONObject, JSONParseError, type JSONValue, type LanguageModelV2, type LanguageModelV2CallOptions, type LanguageModelV2CallWarning, type LanguageModelV2Content, type LanguageModelV2DataContent, type LanguageModelV2File, type LanguageModelV2FilePart, type LanguageModelV2FinishReason, type LanguageModelV2FunctionTool, type LanguageModelV2Message, type LanguageModelV2Middleware, type LanguageModelV2Prompt, type LanguageModelV2ProviderDefinedTool, type LanguageModelV2Reasoning, type LanguageModelV2ReasoningPart, type LanguageModelV2ResponseMetadata, type LanguageModelV2Source, type LanguageModelV2StreamPart, type LanguageModelV2Text, type LanguageModelV2TextPart, type LanguageModelV2ToolCall, type LanguageModelV2ToolCallPart, type LanguageModelV2ToolChoice, type LanguageModelV2ToolResultOutput, type LanguageModelV2ToolResultPart, type LanguageModelV2Usage, LoadAPIKeyError, LoadSettingError, NoContentGeneratedError, NoSuchModelError, type ProviderV2, type SharedV2Headers, type SharedV2ProviderMetadata, type SharedV2ProviderOptions, type SpeechModelV2, type SpeechModelV2CallOptions, type SpeechModelV2CallWarning, TooManyEmbeddingValuesForCallError, type TranscriptionModelV2, type TranscriptionModelV2CallOptions, type TranscriptionModelV2CallWarning, TypeValidationError, UnsupportedFunctionalityError, getErrorMessage, isJSONArray, isJSONObject, isJSONValue };