@ai-sdk/provider 2.1.0-beta.3 → 2.1.0-beta.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.ts CHANGED
@@ -1,7 +1,7 @@
1
1
  import { JSONSchema7 } from 'json-schema';
2
2
  export { JSONSchema7, JSONSchema7Definition } from 'json-schema';
3
3
 
4
- type SharedV2Headers = Record<string, string>;
4
+ type SharedV3Headers = Record<string, string>;
5
5
 
6
6
  /**
7
7
  A JSON value can be a string, number, boolean, object, array, or null.
@@ -13,6 +13,54 @@ type JSONObject = {
13
13
  };
14
14
  type JSONArray = JSONValue[];
15
15
 
16
+ /**
17
+ * Additional provider-specific metadata.
18
+ * Metadata are additional outputs from the provider.
19
+ * They are passed through to the provider from the AI SDK
20
+ * and enable provider-specific functionality
21
+ * that can be fully encapsulated in the provider.
22
+ *
23
+ * This enables us to quickly ship provider-specific functionality
24
+ * without affecting the core AI SDK.
25
+ *
26
+ * The outer record is keyed by the provider name, and the inner
27
+ * record is keyed by the provider-specific metadata key.
28
+ *
29
+ * ```ts
30
+ * {
31
+ * "anthropic": {
32
+ * "cacheControl": { "type": "ephemeral" }
33
+ * }
34
+ * }
35
+ * ```
36
+ */
37
+ type SharedV3ProviderMetadata = Record<string, Record<string, JSONValue>>;
38
+
39
+ /**
40
+ * Additional provider-specific options.
41
+ * Options are additional input to the provider.
42
+ * They are passed through to the provider from the AI SDK
43
+ * and enable provider-specific functionality
44
+ * that can be fully encapsulated in the provider.
45
+ *
46
+ * This enables us to quickly ship provider-specific functionality
47
+ * without affecting the core AI SDK.
48
+ *
49
+ * The outer record is keyed by the provider name, and the inner
50
+ * record is keyed by the provider-specific metadata key.
51
+ *
52
+ * ```ts
53
+ * {
54
+ * "anthropic": {
55
+ * "cacheControl": { "type": "ephemeral" }
56
+ * }
57
+ * }
58
+ * ```
59
+ */
60
+ type SharedV3ProviderOptions = Record<string, Record<string, JSONValue>>;
61
+
62
+ type SharedV2Headers = Record<string, string>;
63
+
16
64
  /**
17
65
  * Additional provider-specific metadata.
18
66
  * Metadata are additional outputs from the provider.
@@ -120,7 +168,7 @@ type EmbeddingModelV3<VALUE> = {
120
168
  to the provider from the AI SDK and enable provider-specific
121
169
  functionality that can be fully encapsulated in the provider.
122
170
  */
123
- providerOptions?: SharedV2ProviderOptions;
171
+ providerOptions?: SharedV3ProviderOptions;
124
172
  /**
125
173
  Additional HTTP headers to be sent with the request.
126
174
  Only applicable for HTTP-based providers.
@@ -142,7 +190,7 @@ type EmbeddingModelV3<VALUE> = {
142
190
  from the provider to the AI SDK and enable provider-specific
143
191
  results that can be fully encapsulated in the provider.
144
192
  */
145
- providerMetadata?: SharedV2ProviderMetadata;
193
+ providerMetadata?: SharedV3ProviderMetadata;
146
194
  /**
147
195
  Optional response information for debugging purposes.
148
196
  */
@@ -150,7 +198,7 @@ type EmbeddingModelV3<VALUE> = {
150
198
  /**
151
199
  Response headers.
152
200
  */
153
- headers?: SharedV2Headers;
201
+ headers?: SharedV3Headers;
154
202
  /**
155
203
  The response body.
156
204
  */
@@ -525,7 +573,7 @@ type ImageModelV3CallOptions = {
525
573
  }
526
574
  ```
527
575
  */
528
- providerOptions: SharedV2ProviderOptions;
576
+ providerOptions: SharedV3ProviderOptions;
529
577
  /**
530
578
  Abort signal for cancelling the operation.
531
579
  */
@@ -817,7 +865,7 @@ type LanguageModelV3FunctionTool = {
817
865
  /**
818
866
  The provider-specific options for the tool.
819
867
  */
820
- providerOptions?: SharedV2ProviderOptions;
868
+ providerOptions?: SharedV3ProviderOptions;
821
869
  };
822
870
 
823
871
  /**
@@ -853,7 +901,7 @@ type LanguageModelV3Message = ({
853
901
  * to the provider from the AI SDK and enable provider-specific
854
902
  * functionality that can be fully encapsulated in the provider.
855
903
  */
856
- providerOptions?: SharedV2ProviderOptions;
904
+ providerOptions?: SharedV3ProviderOptions;
857
905
  };
858
906
  /**
859
907
  Text content part of a prompt. It contains a string of text.
@@ -869,7 +917,7 @@ interface LanguageModelV3TextPart {
869
917
  * to the provider from the AI SDK and enable provider-specific
870
918
  * functionality that can be fully encapsulated in the provider.
871
919
  */
872
- providerOptions?: SharedV2ProviderOptions;
920
+ providerOptions?: SharedV3ProviderOptions;
873
921
  }
874
922
  /**
875
923
  Reasoning content part of a prompt. It contains a string of reasoning text.
@@ -885,7 +933,7 @@ interface LanguageModelV3ReasoningPart {
885
933
  * to the provider from the AI SDK and enable provider-specific
886
934
  * functionality that can be fully encapsulated in the provider.
887
935
  */
888
- providerOptions?: SharedV2ProviderOptions;
936
+ providerOptions?: SharedV3ProviderOptions;
889
937
  }
890
938
  /**
891
939
  File content part of a prompt. It contains a file.
@@ -913,7 +961,7 @@ interface LanguageModelV3FilePart {
913
961
  * to the provider from the AI SDK and enable provider-specific
914
962
  * functionality that can be fully encapsulated in the provider.
915
963
  */
916
- providerOptions?: SharedV2ProviderOptions;
964
+ providerOptions?: SharedV3ProviderOptions;
917
965
  }
918
966
  /**
919
967
  Tool call content part of a prompt. It contains a tool call (usually generated by the AI model).
@@ -942,7 +990,7 @@ interface LanguageModelV3ToolCallPart {
942
990
  * to the provider from the AI SDK and enable provider-specific
943
991
  * functionality that can be fully encapsulated in the provider.
944
992
  */
945
- providerOptions?: SharedV2ProviderOptions;
993
+ providerOptions?: SharedV3ProviderOptions;
946
994
  }
947
995
  /**
948
996
  Tool result content part of a prompt. It contains the result of the tool call with the matching ID.
@@ -966,14 +1014,26 @@ interface LanguageModelV3ToolResultPart {
966
1014
  * to the provider from the AI SDK and enable provider-specific
967
1015
  * functionality that can be fully encapsulated in the provider.
968
1016
  */
969
- providerOptions?: SharedV2ProviderOptions;
1017
+ providerOptions?: SharedV3ProviderOptions;
970
1018
  }
1019
+ /**
1020
+ * Result of a tool call.
1021
+ */
971
1022
  type LanguageModelV3ToolResultOutput = {
972
1023
  type: 'text';
973
1024
  value: string;
974
1025
  } | {
975
1026
  type: 'json';
976
1027
  value: JSONValue;
1028
+ } | {
1029
+ /**
1030
+ * Type when the user has denied the execution of the tool call.
1031
+ */
1032
+ type: 'execution-denied';
1033
+ /**
1034
+ * Optional reason for the execution denial.
1035
+ */
1036
+ reason?: string;
977
1037
  } | {
978
1038
  type: 'error-text';
979
1039
  value: string;
@@ -1133,7 +1193,7 @@ type LanguageModelV3CallOptions = {
1133
1193
  * to the provider from the AI SDK and enable provider-specific
1134
1194
  * functionality that can be fully encapsulated in the provider.
1135
1195
  */
1136
- providerOptions?: SharedV2ProviderOptions;
1196
+ providerOptions?: SharedV3ProviderOptions;
1137
1197
  };
1138
1198
 
1139
1199
  /**
@@ -1186,7 +1246,7 @@ type LanguageModelV3Reasoning = {
1186
1246
  /**
1187
1247
  * Optional provider-specific metadata for the reasoning part.
1188
1248
  */
1189
- providerMetadata?: SharedV2ProviderMetadata;
1249
+ providerMetadata?: SharedV3ProviderMetadata;
1190
1250
  };
1191
1251
 
1192
1252
  /**
@@ -1213,7 +1273,7 @@ type LanguageModelV3Source = {
1213
1273
  /**
1214
1274
  * Additional provider metadata for the source.
1215
1275
  */
1216
- providerMetadata?: SharedV2ProviderMetadata;
1276
+ providerMetadata?: SharedV3ProviderMetadata;
1217
1277
  } | {
1218
1278
  type: 'source';
1219
1279
  /**
@@ -1239,7 +1299,7 @@ type LanguageModelV3Source = {
1239
1299
  /**
1240
1300
  * Additional provider metadata for the source.
1241
1301
  */
1242
- providerMetadata?: SharedV2ProviderMetadata;
1302
+ providerMetadata?: SharedV3ProviderMetadata;
1243
1303
  };
1244
1304
 
1245
1305
  /**
@@ -1251,7 +1311,7 @@ type LanguageModelV3Text = {
1251
1311
  The text content.
1252
1312
  */
1253
1313
  text: string;
1254
- providerMetadata?: SharedV2ProviderMetadata;
1314
+ providerMetadata?: SharedV3ProviderMetadata;
1255
1315
  };
1256
1316
 
1257
1317
  /**
@@ -1280,7 +1340,7 @@ type LanguageModelV3ToolCall = {
1280
1340
  /**
1281
1341
  * Additional provider-specific metadata for the tool call.
1282
1342
  */
1283
- providerMetadata?: SharedV2ProviderMetadata;
1343
+ providerMetadata?: SharedV3ProviderMetadata;
1284
1344
  };
1285
1345
 
1286
1346
  /**
@@ -1306,14 +1366,25 @@ type LanguageModelV3ToolResult = {
1306
1366
  isError?: boolean;
1307
1367
  /**
1308
1368
  * Whether the tool result was generated by the provider.
1369
+ *
1309
1370
  * If this flag is set to true, the tool result was generated by the provider.
1310
1371
  * If this flag is not set or is false, the tool result was generated by the client.
1311
1372
  */
1312
1373
  providerExecuted?: boolean;
1374
+ /**
1375
+ * Whether the tool result is preliminary.
1376
+ *
1377
+ * Preliminary tool results replace each other, e.g. image previews.
1378
+ * There always has to be a final, non-preliminary tool result.
1379
+ *
1380
+ * If this flag is set to true, the tool result is preliminary.
1381
+ * If this flag is not set or is false, the tool result is not preliminary.
1382
+ */
1383
+ preliminary?: boolean;
1313
1384
  /**
1314
1385
  * Additional provider-specific metadata for the tool result.
1315
1386
  */
1316
- providerMetadata?: SharedV2ProviderMetadata;
1387
+ providerMetadata?: SharedV3ProviderMetadata;
1317
1388
  };
1318
1389
 
1319
1390
  type LanguageModelV3Content = LanguageModelV3Text | LanguageModelV3Reasoning | LanguageModelV3File | LanguageModelV3Source | LanguageModelV3ToolCall | LanguageModelV3ToolResult;
@@ -1380,45 +1451,45 @@ type LanguageModelV3Usage = {
1380
1451
 
1381
1452
  type LanguageModelV3StreamPart = {
1382
1453
  type: 'text-start';
1383
- providerMetadata?: SharedV2ProviderMetadata;
1454
+ providerMetadata?: SharedV3ProviderMetadata;
1384
1455
  id: string;
1385
1456
  } | {
1386
1457
  type: 'text-delta';
1387
1458
  id: string;
1388
- providerMetadata?: SharedV2ProviderMetadata;
1459
+ providerMetadata?: SharedV3ProviderMetadata;
1389
1460
  delta: string;
1390
1461
  } | {
1391
1462
  type: 'text-end';
1392
- providerMetadata?: SharedV2ProviderMetadata;
1463
+ providerMetadata?: SharedV3ProviderMetadata;
1393
1464
  id: string;
1394
1465
  } | {
1395
1466
  type: 'reasoning-start';
1396
- providerMetadata?: SharedV2ProviderMetadata;
1467
+ providerMetadata?: SharedV3ProviderMetadata;
1397
1468
  id: string;
1398
1469
  } | {
1399
1470
  type: 'reasoning-delta';
1400
1471
  id: string;
1401
- providerMetadata?: SharedV2ProviderMetadata;
1472
+ providerMetadata?: SharedV3ProviderMetadata;
1402
1473
  delta: string;
1403
1474
  } | {
1404
1475
  type: 'reasoning-end';
1405
1476
  id: string;
1406
- providerMetadata?: SharedV2ProviderMetadata;
1477
+ providerMetadata?: SharedV3ProviderMetadata;
1407
1478
  } | {
1408
1479
  type: 'tool-input-start';
1409
1480
  id: string;
1410
1481
  toolName: string;
1411
- providerMetadata?: SharedV2ProviderMetadata;
1482
+ providerMetadata?: SharedV3ProviderMetadata;
1412
1483
  providerExecuted?: boolean;
1413
1484
  } | {
1414
1485
  type: 'tool-input-delta';
1415
1486
  id: string;
1416
1487
  delta: string;
1417
- providerMetadata?: SharedV2ProviderMetadata;
1488
+ providerMetadata?: SharedV3ProviderMetadata;
1418
1489
  } | {
1419
1490
  type: 'tool-input-end';
1420
1491
  id: string;
1421
- providerMetadata?: SharedV2ProviderMetadata;
1492
+ providerMetadata?: SharedV3ProviderMetadata;
1422
1493
  } | LanguageModelV3ToolCall | LanguageModelV3ToolResult | LanguageModelV3File | LanguageModelV3Source | {
1423
1494
  type: 'stream-start';
1424
1495
  warnings: Array<LanguageModelV3CallWarning>;
@@ -1428,7 +1499,7 @@ type LanguageModelV3StreamPart = {
1428
1499
  type: 'finish';
1429
1500
  usage: LanguageModelV3Usage;
1430
1501
  finishReason: LanguageModelV3FinishReason;
1431
- providerMetadata?: SharedV2ProviderMetadata;
1502
+ providerMetadata?: SharedV3ProviderMetadata;
1432
1503
  } | {
1433
1504
  type: 'raw';
1434
1505
  rawValue: unknown;
@@ -1490,7 +1561,7 @@ type LanguageModelV3 = {
1490
1561
  from the provider to the AI SDK and enable provider-specific
1491
1562
  results that can be fully encapsulated in the provider.
1492
1563
  */
1493
- providerMetadata?: SharedV2ProviderMetadata;
1564
+ providerMetadata?: SharedV3ProviderMetadata;
1494
1565
  /**
1495
1566
  Optional request information for telemetry and debugging purposes.
1496
1567
  */
@@ -1507,7 +1578,7 @@ type LanguageModelV3 = {
1507
1578
  /**
1508
1579
  Response headers.
1509
1580
  */
1510
- headers?: SharedV2Headers;
1581
+ headers?: SharedV3Headers;
1511
1582
  /**
1512
1583
  Response HTTP body.
1513
1584
  */
@@ -1544,7 +1615,7 @@ type LanguageModelV3 = {
1544
1615
  /**
1545
1616
  Response headers.
1546
1617
  */
1547
- headers?: SharedV2Headers;
1618
+ headers?: SharedV3Headers;
1548
1619
  };
1549
1620
  }>;
1550
1621
  };
@@ -2465,8 +2536,8 @@ type LanguageModelV2Middleware = {
2465
2536
  }) => PromiseLike<Awaited<ReturnType<LanguageModelV2['doStream']>>>;
2466
2537
  };
2467
2538
 
2468
- type SpeechModelV2ProviderOptions = Record<string, Record<string, JSONValue>>;
2469
- type SpeechModelV2CallOptions = {
2539
+ type SpeechModelV3ProviderOptions = Record<string, Record<string, JSONValue>>;
2540
+ type SpeechModelV3CallOptions = {
2470
2541
  /**
2471
2542
  * Text to convert to speech.
2472
2543
  */
@@ -2505,7 +2576,7 @@ type SpeechModelV2CallOptions = {
2505
2576
  * }
2506
2577
  * ```
2507
2578
  */
2508
- providerOptions?: SpeechModelV2ProviderOptions;
2579
+ providerOptions?: SpeechModelV3ProviderOptions;
2509
2580
  /**
2510
2581
  * Abort signal for cancelling the operation.
2511
2582
  */
@@ -2521,9 +2592,9 @@ type SpeechModelV2CallOptions = {
2521
2592
  * Warning from the model provider for this call. The call will proceed, but e.g.
2522
2593
  * some settings might not be supported, which can lead to suboptimal results.
2523
2594
  */
2524
- type SpeechModelV2CallWarning = {
2595
+ type SpeechModelV3CallWarning = {
2525
2596
  type: 'unsupported-setting';
2526
- setting: keyof SpeechModelV2CallOptions;
2597
+ setting: keyof SpeechModelV3CallOptions;
2527
2598
  details?: string;
2528
2599
  } | {
2529
2600
  type: 'other';
@@ -2531,9 +2602,9 @@ type SpeechModelV2CallWarning = {
2531
2602
  };
2532
2603
 
2533
2604
  /**
2534
- * Speech model specification version 2.
2605
+ * Speech model specification version 3.
2535
2606
  */
2536
- type SpeechModelV2 = {
2607
+ type SpeechModelV3 = {
2537
2608
  /**
2538
2609
  * The speech model must specify which speech model interface
2539
2610
  * version it implements. This will allow us to evolve the speech
@@ -2541,7 +2612,7 @@ type SpeechModelV2 = {
2541
2612
  * implementation versions can be handled as a discriminated union
2542
2613
  * on our side.
2543
2614
  */
2544
- readonly specificationVersion: 'v2';
2615
+ readonly specificationVersion: 'v3';
2545
2616
  /**
2546
2617
  * Name of the provider for logging purposes.
2547
2618
  */
@@ -2553,7 +2624,7 @@ type SpeechModelV2 = {
2553
2624
  /**
2554
2625
  * Generates speech audio from text.
2555
2626
  */
2556
- doGenerate(options: SpeechModelV2CallOptions): PromiseLike<{
2627
+ doGenerate(options: SpeechModelV3CallOptions): PromiseLike<{
2557
2628
  /**
2558
2629
  * Generated audio as an ArrayBuffer.
2559
2630
  * The audio should be returned without any unnecessary conversion.
@@ -2565,7 +2636,7 @@ type SpeechModelV2 = {
2565
2636
  /**
2566
2637
  * Warnings for the call, e.g. unsupported settings.
2567
2638
  */
2568
- warnings: Array<SpeechModelV2CallWarning>;
2639
+ warnings: Array<SpeechModelV3CallWarning>;
2569
2640
  /**
2570
2641
  * Optional request information for telemetry and debugging purposes.
2571
2642
  */
@@ -2605,8 +2676,8 @@ type SpeechModelV2 = {
2605
2676
  }>;
2606
2677
  };
2607
2678
 
2608
- type TranscriptionModelV2ProviderOptions = Record<string, Record<string, JSONValue>>;
2609
- type TranscriptionModelV2CallOptions = {
2679
+ type TranscriptionModelV3ProviderOptions = Record<string, Record<string, JSONValue>>;
2680
+ type TranscriptionModelV3CallOptions = {
2610
2681
  /**
2611
2682
  Audio data to transcribe.
2612
2683
  Accepts a `Uint8Array` or `string`, where `string` is a base64 encoded audio file.
@@ -2632,7 +2703,7 @@ type TranscriptionModelV2CallOptions = {
2632
2703
  }
2633
2704
  ```
2634
2705
  */
2635
- providerOptions?: TranscriptionModelV2ProviderOptions;
2706
+ providerOptions?: TranscriptionModelV3ProviderOptions;
2636
2707
  /**
2637
2708
  Abort signal for cancelling the operation.
2638
2709
  */
@@ -2648,9 +2719,9 @@ type TranscriptionModelV2CallOptions = {
2648
2719
  Warning from the model provider for this call. The call will proceed, but e.g.
2649
2720
  some settings might not be supported, which can lead to suboptimal results.
2650
2721
  */
2651
- type TranscriptionModelV2CallWarning = {
2722
+ type TranscriptionModelV3CallWarning = {
2652
2723
  type: 'unsupported-setting';
2653
- setting: keyof TranscriptionModelV2CallOptions;
2724
+ setting: keyof TranscriptionModelV3CallOptions;
2654
2725
  details?: string;
2655
2726
  } | {
2656
2727
  type: 'other';
@@ -2658,9 +2729,9 @@ type TranscriptionModelV2CallWarning = {
2658
2729
  };
2659
2730
 
2660
2731
  /**
2661
- Transcription model specification version 2.
2732
+ Transcription model specification version 3.
2662
2733
  */
2663
- type TranscriptionModelV2 = {
2734
+ type TranscriptionModelV3 = {
2664
2735
  /**
2665
2736
  The transcription model must specify which transcription model interface
2666
2737
  version it implements. This will allow us to evolve the transcription
@@ -2668,7 +2739,7 @@ type TranscriptionModelV2 = {
2668
2739
  implementation versions can be handled as a discriminated union
2669
2740
  on our side.
2670
2741
  */
2671
- readonly specificationVersion: 'v2';
2742
+ readonly specificationVersion: 'v3';
2672
2743
  /**
2673
2744
  Name of the provider for logging purposes.
2674
2745
  */
@@ -2680,7 +2751,7 @@ type TranscriptionModelV2 = {
2680
2751
  /**
2681
2752
  Generates a transcript.
2682
2753
  */
2683
- doGenerate(options: TranscriptionModelV2CallOptions): PromiseLike<{
2754
+ doGenerate(options: TranscriptionModelV3CallOptions): PromiseLike<{
2684
2755
  /**
2685
2756
  * The complete transcribed text from the audio.
2686
2757
  */
@@ -2716,7 +2787,7 @@ type TranscriptionModelV2 = {
2716
2787
  /**
2717
2788
  Warnings for the call, e.g. unsupported settings.
2718
2789
  */
2719
- warnings: Array<TranscriptionModelV2CallWarning>;
2790
+ warnings: Array<TranscriptionModelV3CallWarning>;
2720
2791
  /**
2721
2792
  Optional request information for telemetry and debugging purposes.
2722
2793
  */
@@ -2742,7 +2813,7 @@ type TranscriptionModelV2 = {
2742
2813
  /**
2743
2814
  Response headers.
2744
2815
  */
2745
- headers?: SharedV2Headers;
2816
+ headers?: SharedV3Headers;
2746
2817
  /**
2747
2818
  Response body.
2748
2819
  */
@@ -2800,7 +2871,7 @@ interface ProviderV3 {
2800
2871
 
2801
2872
  @returns {TranscriptionModel} The transcription model associated with the id
2802
2873
  */
2803
- transcriptionModel?(modelId: string): TranscriptionModelV2;
2874
+ transcriptionModel?(modelId: string): TranscriptionModelV3;
2804
2875
  /**
2805
2876
  Returns the speech model with the given id.
2806
2877
  The model id is then passed to the provider function to get the model.
@@ -2809,9 +2880,301 @@ interface ProviderV3 {
2809
2880
 
2810
2881
  @returns {SpeechModel} The speech model associated with the id
2811
2882
  */
2812
- speechModel?(modelId: string): SpeechModelV2;
2883
+ speechModel?(modelId: string): SpeechModelV3;
2813
2884
  }
2814
2885
 
2886
+ type SpeechModelV2ProviderOptions = Record<string, Record<string, JSONValue>>;
2887
+ type SpeechModelV2CallOptions = {
2888
+ /**
2889
+ * Text to convert to speech.
2890
+ */
2891
+ text: string;
2892
+ /**
2893
+ * The voice to use for speech synthesis.
2894
+ * This is provider-specific and may be a voice ID, name, or other identifier.
2895
+ */
2896
+ voice?: string;
2897
+ /**
2898
+ * The desired output format for the audio e.g. "mp3", "wav", etc.
2899
+ */
2900
+ outputFormat?: string;
2901
+ /**
2902
+ * Instructions for the speech generation e.g. "Speak in a slow and steady tone".
2903
+ */
2904
+ instructions?: string;
2905
+ /**
2906
+ * The speed of the speech generation.
2907
+ */
2908
+ speed?: number;
2909
+ /**
2910
+ * The language for speech generation. This should be an ISO 639-1 language code (e.g. "en", "es", "fr")
2911
+ * or "auto" for automatic language detection. Provider support varies.
2912
+ */
2913
+ language?: string;
2914
+ /**
2915
+ * Additional provider-specific options that are passed through to the provider
2916
+ * as body parameters.
2917
+ *
2918
+ * The outer record is keyed by the provider name, and the inner
2919
+ * record is keyed by the provider-specific metadata key.
2920
+ * ```ts
2921
+ * {
2922
+ * "openai": {}
2923
+ * }
2924
+ * ```
2925
+ */
2926
+ providerOptions?: SpeechModelV2ProviderOptions;
2927
+ /**
2928
+ * Abort signal for cancelling the operation.
2929
+ */
2930
+ abortSignal?: AbortSignal;
2931
+ /**
2932
+ * Additional HTTP headers to be sent with the request.
2933
+ * Only applicable for HTTP-based providers.
2934
+ */
2935
+ headers?: Record<string, string | undefined>;
2936
+ };
2937
+
2938
+ /**
2939
+ * Warning from the model provider for this call. The call will proceed, but e.g.
2940
+ * some settings might not be supported, which can lead to suboptimal results.
2941
+ */
2942
+ type SpeechModelV2CallWarning = {
2943
+ type: 'unsupported-setting';
2944
+ setting: keyof SpeechModelV2CallOptions;
2945
+ details?: string;
2946
+ } | {
2947
+ type: 'other';
2948
+ message: string;
2949
+ };
2950
+
2951
+ /**
2952
+ * Speech model specification version 2.
2953
+ */
2954
+ type SpeechModelV2 = {
2955
+ /**
2956
+ * The speech model must specify which speech model interface
2957
+ * version it implements. This will allow us to evolve the speech
2958
+ * model interface and retain backwards compatibility. The different
2959
+ * implementation versions can be handled as a discriminated union
2960
+ * on our side.
2961
+ */
2962
+ readonly specificationVersion: 'v2';
2963
+ /**
2964
+ * Name of the provider for logging purposes.
2965
+ */
2966
+ readonly provider: string;
2967
+ /**
2968
+ * Provider-specific model ID for logging purposes.
2969
+ */
2970
+ readonly modelId: string;
2971
+ /**
2972
+ * Generates speech audio from text.
2973
+ */
2974
+ doGenerate(options: SpeechModelV2CallOptions): PromiseLike<{
2975
+ /**
2976
+ * Generated audio as an ArrayBuffer.
2977
+ * The audio should be returned without any unnecessary conversion.
2978
+ * If the API returns base64 encoded strings, the audio should be returned
2979
+ * as base64 encoded strings. If the API returns binary data, the audio
2980
+ * should be returned as binary data.
2981
+ */
2982
+ audio: string | Uint8Array;
2983
+ /**
2984
+ * Warnings for the call, e.g. unsupported settings.
2985
+ */
2986
+ warnings: Array<SpeechModelV2CallWarning>;
2987
+ /**
2988
+ * Optional request information for telemetry and debugging purposes.
2989
+ */
2990
+ request?: {
2991
+ /**
2992
+ * Response body (available only for providers that use HTTP requests).
2993
+ */
2994
+ body?: unknown;
2995
+ };
2996
+ /**
2997
+ * Response information for telemetry and debugging purposes.
2998
+ */
2999
+ response: {
3000
+ /**
3001
+ * Timestamp for the start of the generated response.
3002
+ */
3003
+ timestamp: Date;
3004
+ /**
3005
+ * The ID of the response model that was used to generate the response.
3006
+ */
3007
+ modelId: string;
3008
+ /**
3009
+ * Response headers.
3010
+ */
3011
+ headers?: SharedV2Headers;
3012
+ /**
3013
+ * Response body.
3014
+ */
3015
+ body?: unknown;
3016
+ };
3017
+ /**
3018
+ * Additional provider-specific metadata. They are passed through
3019
+ * from the provider to the AI SDK and enable provider-specific
3020
+ * results that can be fully encapsulated in the provider.
3021
+ */
3022
+ providerMetadata?: Record<string, Record<string, JSONValue>>;
3023
+ }>;
3024
+ };
3025
+
3026
+ type TranscriptionModelV2ProviderOptions = Record<string, Record<string, JSONValue>>;
3027
+ type TranscriptionModelV2CallOptions = {
3028
+ /**
3029
+ Audio data to transcribe.
3030
+ Accepts a `Uint8Array` or `string`, where `string` is a base64 encoded audio file.
3031
+ */
3032
+ audio: Uint8Array | string;
3033
+ /**
3034
+ The IANA media type of the audio data.
3035
+
3036
+ @see https://www.iana.org/assignments/media-types/media-types.xhtml
3037
+ */
3038
+ mediaType: string;
3039
+ /**
3040
+ Additional provider-specific options that are passed through to the provider
3041
+ as body parameters.
3042
+
3043
+ The outer record is keyed by the provider name, and the inner
3044
+ record is keyed by the provider-specific metadata key.
3045
+ ```ts
3046
+ {
3047
+ "openai": {
3048
+ "timestampGranularities": ["word"]
3049
+ }
3050
+ }
3051
+ ```
3052
+ */
3053
+ providerOptions?: TranscriptionModelV2ProviderOptions;
3054
+ /**
3055
+ Abort signal for cancelling the operation.
3056
+ */
3057
+ abortSignal?: AbortSignal;
3058
+ /**
3059
+ Additional HTTP headers to be sent with the request.
3060
+ Only applicable for HTTP-based providers.
3061
+ */
3062
+ headers?: Record<string, string | undefined>;
3063
+ };
3064
+
3065
+ /**
3066
+ Warning from the model provider for this call. The call will proceed, but e.g.
3067
+ some settings might not be supported, which can lead to suboptimal results.
3068
+ */
3069
+ type TranscriptionModelV2CallWarning = {
3070
+ type: 'unsupported-setting';
3071
+ setting: keyof TranscriptionModelV2CallOptions;
3072
+ details?: string;
3073
+ } | {
3074
+ type: 'other';
3075
+ message: string;
3076
+ };
3077
+
3078
+ /**
3079
+ Transcription model specification version 2.
3080
+ */
3081
+ type TranscriptionModelV2 = {
3082
+ /**
3083
+ The transcription model must specify which transcription model interface
3084
+ version it implements. This will allow us to evolve the transcription
3085
+ model interface and retain backwards compatibility. The different
3086
+ implementation versions can be handled as a discriminated union
3087
+ on our side.
3088
+ */
3089
+ readonly specificationVersion: 'v2';
3090
+ /**
3091
+ Name of the provider for logging purposes.
3092
+ */
3093
+ readonly provider: string;
3094
+ /**
3095
+ Provider-specific model ID for logging purposes.
3096
+ */
3097
+ readonly modelId: string;
3098
+ /**
3099
+ Generates a transcript.
3100
+ */
3101
+ doGenerate(options: TranscriptionModelV2CallOptions): PromiseLike<{
3102
+ /**
3103
+ * The complete transcribed text from the audio.
3104
+ */
3105
+ text: string;
3106
+ /**
3107
+ * Array of transcript segments with timing information.
3108
+ * Each segment represents a portion of the transcribed text with start and end times.
3109
+ */
3110
+ segments: Array<{
3111
+ /**
3112
+ * The text content of this segment.
3113
+ */
3114
+ text: string;
3115
+ /**
3116
+ * The start time of this segment in seconds.
3117
+ */
3118
+ startSecond: number;
3119
+ /**
3120
+ * The end time of this segment in seconds.
3121
+ */
3122
+ endSecond: number;
3123
+ }>;
3124
+ /**
3125
+ * The detected language of the audio content, as an ISO-639-1 code (e.g., 'en' for English).
3126
+ * May be undefined if the language couldn't be detected.
3127
+ */
3128
+ language: string | undefined;
3129
+ /**
3130
+ * The total duration of the audio file in seconds.
3131
+ * May be undefined if the duration couldn't be determined.
3132
+ */
3133
+ durationInSeconds: number | undefined;
3134
+ /**
3135
+ Warnings for the call, e.g. unsupported settings.
3136
+ */
3137
+ warnings: Array<TranscriptionModelV2CallWarning>;
3138
+ /**
3139
+ Optional request information for telemetry and debugging purposes.
3140
+ */
3141
+ request?: {
3142
+ /**
3143
+ Raw request HTTP body that was sent to the provider API as a string (JSON should be stringified).
3144
+ Non-HTTP(s) providers should not set this.
3145
+ */
3146
+ body?: string;
3147
+ };
3148
+ /**
3149
+ Response information for telemetry and debugging purposes.
3150
+ */
3151
+ response: {
3152
+ /**
3153
+ Timestamp for the start of the generated response.
3154
+ */
3155
+ timestamp: Date;
3156
+ /**
3157
+ The ID of the response model that was used to generate the response.
3158
+ */
3159
+ modelId: string;
3160
+ /**
3161
+ Response headers.
3162
+ */
3163
+ headers?: SharedV2Headers;
3164
+ /**
3165
+ Response body.
3166
+ */
3167
+ body?: unknown;
3168
+ };
3169
+ /**
3170
+ Additional provider-specific metadata. They are passed through
3171
+ from the provider to the AI SDK and enable provider-specific
3172
+ results that can be fully encapsulated in the provider.
3173
+ */
3174
+ providerMetadata?: Record<string, Record<string, JSONValue>>;
3175
+ }>;
3176
+ };
3177
+
2815
3178
  /**
2816
3179
  * Provider for language, text embedding, and image generation models.
2817
3180
  */
@@ -2867,4 +3230,4 @@ interface ProviderV2 {
2867
3230
  speechModel?(modelId: string): SpeechModelV2;
2868
3231
  }
2869
3232
 
2870
- export { AISDKError, APICallError, type EmbeddingModelV2, type EmbeddingModelV2Embedding, type EmbeddingModelV3, type EmbeddingModelV3Embedding, EmptyResponseBodyError, type ImageModelV2, type ImageModelV2CallOptions, type ImageModelV2CallWarning, type ImageModelV2ProviderMetadata, type ImageModelV3, type ImageModelV3CallOptions, type ImageModelV3CallWarning, type ImageModelV3ProviderMetadata, InvalidArgumentError, InvalidPromptError, InvalidResponseDataError, type JSONArray, type JSONObject, JSONParseError, type JSONValue, type LanguageModelV2, type LanguageModelV2CallOptions, type LanguageModelV2CallWarning, type LanguageModelV2Content, type LanguageModelV2DataContent, type LanguageModelV2File, type LanguageModelV2FilePart, type LanguageModelV2FinishReason, type LanguageModelV2FunctionTool, type LanguageModelV2Message, type LanguageModelV2Middleware, type LanguageModelV2Prompt, type LanguageModelV2ProviderDefinedTool, type LanguageModelV2Reasoning, type LanguageModelV2ReasoningPart, type LanguageModelV2ResponseMetadata, type LanguageModelV2Source, type LanguageModelV2StreamPart, type LanguageModelV2Text, type LanguageModelV2TextPart, type LanguageModelV2ToolCall, type LanguageModelV2ToolCallPart, type LanguageModelV2ToolChoice, type LanguageModelV2ToolResultOutput, type LanguageModelV2ToolResultPart, type LanguageModelV2Usage, type LanguageModelV3, type LanguageModelV3CallOptions, type LanguageModelV3CallWarning, type LanguageModelV3Content, type LanguageModelV3DataContent, type LanguageModelV3File, type LanguageModelV3FilePart, type LanguageModelV3FinishReason, type LanguageModelV3FunctionTool, type LanguageModelV3Message, type LanguageModelV3Middleware, type LanguageModelV3Prompt, type LanguageModelV3ProviderDefinedTool, type LanguageModelV3Reasoning, type LanguageModelV3ReasoningPart, type LanguageModelV3ResponseMetadata, type LanguageModelV3Source, type LanguageModelV3StreamPart, type LanguageModelV3Text, type LanguageModelV3TextPart, type LanguageModelV3ToolCall, type LanguageModelV3ToolCallPart, type LanguageModelV3ToolChoice, type LanguageModelV3ToolResult, type LanguageModelV3ToolResultOutput, type LanguageModelV3ToolResultPart, type LanguageModelV3Usage, LoadAPIKeyError, LoadSettingError, NoContentGeneratedError, NoSuchModelError, type ProviderV2, type ProviderV3, type SharedV2Headers, type SharedV2ProviderMetadata, type SharedV2ProviderOptions, type SpeechModelV2, type SpeechModelV2CallOptions, type SpeechModelV2CallWarning, TooManyEmbeddingValuesForCallError, type TranscriptionModelV2, type TranscriptionModelV2CallOptions, type TranscriptionModelV2CallWarning, TypeValidationError, UnsupportedFunctionalityError, getErrorMessage, isJSONArray, isJSONObject, isJSONValue };
3233
+ export { AISDKError, APICallError, type EmbeddingModelV2, type EmbeddingModelV2Embedding, type EmbeddingModelV3, type EmbeddingModelV3Embedding, EmptyResponseBodyError, type ImageModelV2, type ImageModelV2CallOptions, type ImageModelV2CallWarning, type ImageModelV2ProviderMetadata, type ImageModelV3, type ImageModelV3CallOptions, type ImageModelV3CallWarning, type ImageModelV3ProviderMetadata, InvalidArgumentError, InvalidPromptError, InvalidResponseDataError, type JSONArray, type JSONObject, JSONParseError, type JSONValue, type LanguageModelV2, type LanguageModelV2CallOptions, type LanguageModelV2CallWarning, type LanguageModelV2Content, type LanguageModelV2DataContent, type LanguageModelV2File, type LanguageModelV2FilePart, type LanguageModelV2FinishReason, type LanguageModelV2FunctionTool, type LanguageModelV2Message, type LanguageModelV2Middleware, type LanguageModelV2Prompt, type LanguageModelV2ProviderDefinedTool, type LanguageModelV2Reasoning, type LanguageModelV2ReasoningPart, type LanguageModelV2ResponseMetadata, type LanguageModelV2Source, type LanguageModelV2StreamPart, type LanguageModelV2Text, type LanguageModelV2TextPart, type LanguageModelV2ToolCall, type LanguageModelV2ToolCallPart, type LanguageModelV2ToolChoice, type LanguageModelV2ToolResultOutput, type LanguageModelV2ToolResultPart, type LanguageModelV2Usage, type LanguageModelV3, type LanguageModelV3CallOptions, type LanguageModelV3CallWarning, type LanguageModelV3Content, type LanguageModelV3DataContent, type LanguageModelV3File, type LanguageModelV3FilePart, type LanguageModelV3FinishReason, type LanguageModelV3FunctionTool, type LanguageModelV3Message, type LanguageModelV3Middleware, type LanguageModelV3Prompt, type LanguageModelV3ProviderDefinedTool, type LanguageModelV3Reasoning, type LanguageModelV3ReasoningPart, type LanguageModelV3ResponseMetadata, type LanguageModelV3Source, type LanguageModelV3StreamPart, type LanguageModelV3Text, type LanguageModelV3TextPart, type LanguageModelV3ToolCall, type LanguageModelV3ToolCallPart, type LanguageModelV3ToolChoice, type LanguageModelV3ToolResult, type LanguageModelV3ToolResultOutput, type LanguageModelV3ToolResultPart, type LanguageModelV3Usage, LoadAPIKeyError, LoadSettingError, NoContentGeneratedError, NoSuchModelError, type ProviderV2, type ProviderV3, type SharedV2Headers, type SharedV2ProviderMetadata, type SharedV2ProviderOptions, type SharedV3Headers, type SharedV3ProviderMetadata, type SharedV3ProviderOptions, type SpeechModelV2, type SpeechModelV2CallOptions, type SpeechModelV2CallWarning, type SpeechModelV3, type SpeechModelV3CallOptions, type SpeechModelV3CallWarning, TooManyEmbeddingValuesForCallError, type TranscriptionModelV2, type TranscriptionModelV2CallOptions, type TranscriptionModelV2CallWarning, type TranscriptionModelV3, type TranscriptionModelV3CallOptions, type TranscriptionModelV3CallWarning, TypeValidationError, UnsupportedFunctionalityError, getErrorMessage, isJSONArray, isJSONObject, isJSONValue };