@ai-sdk/provider 2.1.0-beta.3 → 2.1.0-beta.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +14 -0
- package/dist/index.d.mts +419 -56
- package/dist/index.d.ts +419 -56
- package/package.json +1 -1
package/dist/index.d.mts
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
import { JSONSchema7 } from 'json-schema';
|
2
2
|
export { JSONSchema7, JSONSchema7Definition } from 'json-schema';
|
3
3
|
|
4
|
-
type
|
4
|
+
type SharedV3Headers = Record<string, string>;
|
5
5
|
|
6
6
|
/**
|
7
7
|
A JSON value can be a string, number, boolean, object, array, or null.
|
@@ -13,6 +13,54 @@ type JSONObject = {
|
|
13
13
|
};
|
14
14
|
type JSONArray = JSONValue[];
|
15
15
|
|
16
|
+
/**
|
17
|
+
* Additional provider-specific metadata.
|
18
|
+
* Metadata are additional outputs from the provider.
|
19
|
+
* They are passed through to the provider from the AI SDK
|
20
|
+
* and enable provider-specific functionality
|
21
|
+
* that can be fully encapsulated in the provider.
|
22
|
+
*
|
23
|
+
* This enables us to quickly ship provider-specific functionality
|
24
|
+
* without affecting the core AI SDK.
|
25
|
+
*
|
26
|
+
* The outer record is keyed by the provider name, and the inner
|
27
|
+
* record is keyed by the provider-specific metadata key.
|
28
|
+
*
|
29
|
+
* ```ts
|
30
|
+
* {
|
31
|
+
* "anthropic": {
|
32
|
+
* "cacheControl": { "type": "ephemeral" }
|
33
|
+
* }
|
34
|
+
* }
|
35
|
+
* ```
|
36
|
+
*/
|
37
|
+
type SharedV3ProviderMetadata = Record<string, Record<string, JSONValue>>;
|
38
|
+
|
39
|
+
/**
|
40
|
+
* Additional provider-specific options.
|
41
|
+
* Options are additional input to the provider.
|
42
|
+
* They are passed through to the provider from the AI SDK
|
43
|
+
* and enable provider-specific functionality
|
44
|
+
* that can be fully encapsulated in the provider.
|
45
|
+
*
|
46
|
+
* This enables us to quickly ship provider-specific functionality
|
47
|
+
* without affecting the core AI SDK.
|
48
|
+
*
|
49
|
+
* The outer record is keyed by the provider name, and the inner
|
50
|
+
* record is keyed by the provider-specific metadata key.
|
51
|
+
*
|
52
|
+
* ```ts
|
53
|
+
* {
|
54
|
+
* "anthropic": {
|
55
|
+
* "cacheControl": { "type": "ephemeral" }
|
56
|
+
* }
|
57
|
+
* }
|
58
|
+
* ```
|
59
|
+
*/
|
60
|
+
type SharedV3ProviderOptions = Record<string, Record<string, JSONValue>>;
|
61
|
+
|
62
|
+
type SharedV2Headers = Record<string, string>;
|
63
|
+
|
16
64
|
/**
|
17
65
|
* Additional provider-specific metadata.
|
18
66
|
* Metadata are additional outputs from the provider.
|
@@ -120,7 +168,7 @@ type EmbeddingModelV3<VALUE> = {
|
|
120
168
|
to the provider from the AI SDK and enable provider-specific
|
121
169
|
functionality that can be fully encapsulated in the provider.
|
122
170
|
*/
|
123
|
-
providerOptions?:
|
171
|
+
providerOptions?: SharedV3ProviderOptions;
|
124
172
|
/**
|
125
173
|
Additional HTTP headers to be sent with the request.
|
126
174
|
Only applicable for HTTP-based providers.
|
@@ -142,7 +190,7 @@ type EmbeddingModelV3<VALUE> = {
|
|
142
190
|
from the provider to the AI SDK and enable provider-specific
|
143
191
|
results that can be fully encapsulated in the provider.
|
144
192
|
*/
|
145
|
-
providerMetadata?:
|
193
|
+
providerMetadata?: SharedV3ProviderMetadata;
|
146
194
|
/**
|
147
195
|
Optional response information for debugging purposes.
|
148
196
|
*/
|
@@ -150,7 +198,7 @@ type EmbeddingModelV3<VALUE> = {
|
|
150
198
|
/**
|
151
199
|
Response headers.
|
152
200
|
*/
|
153
|
-
headers?:
|
201
|
+
headers?: SharedV3Headers;
|
154
202
|
/**
|
155
203
|
The response body.
|
156
204
|
*/
|
@@ -525,7 +573,7 @@ type ImageModelV3CallOptions = {
|
|
525
573
|
}
|
526
574
|
```
|
527
575
|
*/
|
528
|
-
providerOptions:
|
576
|
+
providerOptions: SharedV3ProviderOptions;
|
529
577
|
/**
|
530
578
|
Abort signal for cancelling the operation.
|
531
579
|
*/
|
@@ -817,7 +865,7 @@ type LanguageModelV3FunctionTool = {
|
|
817
865
|
/**
|
818
866
|
The provider-specific options for the tool.
|
819
867
|
*/
|
820
|
-
providerOptions?:
|
868
|
+
providerOptions?: SharedV3ProviderOptions;
|
821
869
|
};
|
822
870
|
|
823
871
|
/**
|
@@ -853,7 +901,7 @@ type LanguageModelV3Message = ({
|
|
853
901
|
* to the provider from the AI SDK and enable provider-specific
|
854
902
|
* functionality that can be fully encapsulated in the provider.
|
855
903
|
*/
|
856
|
-
providerOptions?:
|
904
|
+
providerOptions?: SharedV3ProviderOptions;
|
857
905
|
};
|
858
906
|
/**
|
859
907
|
Text content part of a prompt. It contains a string of text.
|
@@ -869,7 +917,7 @@ interface LanguageModelV3TextPart {
|
|
869
917
|
* to the provider from the AI SDK and enable provider-specific
|
870
918
|
* functionality that can be fully encapsulated in the provider.
|
871
919
|
*/
|
872
|
-
providerOptions?:
|
920
|
+
providerOptions?: SharedV3ProviderOptions;
|
873
921
|
}
|
874
922
|
/**
|
875
923
|
Reasoning content part of a prompt. It contains a string of reasoning text.
|
@@ -885,7 +933,7 @@ interface LanguageModelV3ReasoningPart {
|
|
885
933
|
* to the provider from the AI SDK and enable provider-specific
|
886
934
|
* functionality that can be fully encapsulated in the provider.
|
887
935
|
*/
|
888
|
-
providerOptions?:
|
936
|
+
providerOptions?: SharedV3ProviderOptions;
|
889
937
|
}
|
890
938
|
/**
|
891
939
|
File content part of a prompt. It contains a file.
|
@@ -913,7 +961,7 @@ interface LanguageModelV3FilePart {
|
|
913
961
|
* to the provider from the AI SDK and enable provider-specific
|
914
962
|
* functionality that can be fully encapsulated in the provider.
|
915
963
|
*/
|
916
|
-
providerOptions?:
|
964
|
+
providerOptions?: SharedV3ProviderOptions;
|
917
965
|
}
|
918
966
|
/**
|
919
967
|
Tool call content part of a prompt. It contains a tool call (usually generated by the AI model).
|
@@ -942,7 +990,7 @@ interface LanguageModelV3ToolCallPart {
|
|
942
990
|
* to the provider from the AI SDK and enable provider-specific
|
943
991
|
* functionality that can be fully encapsulated in the provider.
|
944
992
|
*/
|
945
|
-
providerOptions?:
|
993
|
+
providerOptions?: SharedV3ProviderOptions;
|
946
994
|
}
|
947
995
|
/**
|
948
996
|
Tool result content part of a prompt. It contains the result of the tool call with the matching ID.
|
@@ -966,14 +1014,26 @@ interface LanguageModelV3ToolResultPart {
|
|
966
1014
|
* to the provider from the AI SDK and enable provider-specific
|
967
1015
|
* functionality that can be fully encapsulated in the provider.
|
968
1016
|
*/
|
969
|
-
providerOptions?:
|
1017
|
+
providerOptions?: SharedV3ProviderOptions;
|
970
1018
|
}
|
1019
|
+
/**
|
1020
|
+
* Result of a tool call.
|
1021
|
+
*/
|
971
1022
|
type LanguageModelV3ToolResultOutput = {
|
972
1023
|
type: 'text';
|
973
1024
|
value: string;
|
974
1025
|
} | {
|
975
1026
|
type: 'json';
|
976
1027
|
value: JSONValue;
|
1028
|
+
} | {
|
1029
|
+
/**
|
1030
|
+
* Type when the user has denied the execution of the tool call.
|
1031
|
+
*/
|
1032
|
+
type: 'execution-denied';
|
1033
|
+
/**
|
1034
|
+
* Optional reason for the execution denial.
|
1035
|
+
*/
|
1036
|
+
reason?: string;
|
977
1037
|
} | {
|
978
1038
|
type: 'error-text';
|
979
1039
|
value: string;
|
@@ -1133,7 +1193,7 @@ type LanguageModelV3CallOptions = {
|
|
1133
1193
|
* to the provider from the AI SDK and enable provider-specific
|
1134
1194
|
* functionality that can be fully encapsulated in the provider.
|
1135
1195
|
*/
|
1136
|
-
providerOptions?:
|
1196
|
+
providerOptions?: SharedV3ProviderOptions;
|
1137
1197
|
};
|
1138
1198
|
|
1139
1199
|
/**
|
@@ -1186,7 +1246,7 @@ type LanguageModelV3Reasoning = {
|
|
1186
1246
|
/**
|
1187
1247
|
* Optional provider-specific metadata for the reasoning part.
|
1188
1248
|
*/
|
1189
|
-
providerMetadata?:
|
1249
|
+
providerMetadata?: SharedV3ProviderMetadata;
|
1190
1250
|
};
|
1191
1251
|
|
1192
1252
|
/**
|
@@ -1213,7 +1273,7 @@ type LanguageModelV3Source = {
|
|
1213
1273
|
/**
|
1214
1274
|
* Additional provider metadata for the source.
|
1215
1275
|
*/
|
1216
|
-
providerMetadata?:
|
1276
|
+
providerMetadata?: SharedV3ProviderMetadata;
|
1217
1277
|
} | {
|
1218
1278
|
type: 'source';
|
1219
1279
|
/**
|
@@ -1239,7 +1299,7 @@ type LanguageModelV3Source = {
|
|
1239
1299
|
/**
|
1240
1300
|
* Additional provider metadata for the source.
|
1241
1301
|
*/
|
1242
|
-
providerMetadata?:
|
1302
|
+
providerMetadata?: SharedV3ProviderMetadata;
|
1243
1303
|
};
|
1244
1304
|
|
1245
1305
|
/**
|
@@ -1251,7 +1311,7 @@ type LanguageModelV3Text = {
|
|
1251
1311
|
The text content.
|
1252
1312
|
*/
|
1253
1313
|
text: string;
|
1254
|
-
providerMetadata?:
|
1314
|
+
providerMetadata?: SharedV3ProviderMetadata;
|
1255
1315
|
};
|
1256
1316
|
|
1257
1317
|
/**
|
@@ -1280,7 +1340,7 @@ type LanguageModelV3ToolCall = {
|
|
1280
1340
|
/**
|
1281
1341
|
* Additional provider-specific metadata for the tool call.
|
1282
1342
|
*/
|
1283
|
-
providerMetadata?:
|
1343
|
+
providerMetadata?: SharedV3ProviderMetadata;
|
1284
1344
|
};
|
1285
1345
|
|
1286
1346
|
/**
|
@@ -1306,14 +1366,25 @@ type LanguageModelV3ToolResult = {
|
|
1306
1366
|
isError?: boolean;
|
1307
1367
|
/**
|
1308
1368
|
* Whether the tool result was generated by the provider.
|
1369
|
+
*
|
1309
1370
|
* If this flag is set to true, the tool result was generated by the provider.
|
1310
1371
|
* If this flag is not set or is false, the tool result was generated by the client.
|
1311
1372
|
*/
|
1312
1373
|
providerExecuted?: boolean;
|
1374
|
+
/**
|
1375
|
+
* Whether the tool result is preliminary.
|
1376
|
+
*
|
1377
|
+
* Preliminary tool results replace each other, e.g. image previews.
|
1378
|
+
* There always has to be a final, non-preliminary tool result.
|
1379
|
+
*
|
1380
|
+
* If this flag is set to true, the tool result is preliminary.
|
1381
|
+
* If this flag is not set or is false, the tool result is not preliminary.
|
1382
|
+
*/
|
1383
|
+
preliminary?: boolean;
|
1313
1384
|
/**
|
1314
1385
|
* Additional provider-specific metadata for the tool result.
|
1315
1386
|
*/
|
1316
|
-
providerMetadata?:
|
1387
|
+
providerMetadata?: SharedV3ProviderMetadata;
|
1317
1388
|
};
|
1318
1389
|
|
1319
1390
|
type LanguageModelV3Content = LanguageModelV3Text | LanguageModelV3Reasoning | LanguageModelV3File | LanguageModelV3Source | LanguageModelV3ToolCall | LanguageModelV3ToolResult;
|
@@ -1380,45 +1451,45 @@ type LanguageModelV3Usage = {
|
|
1380
1451
|
|
1381
1452
|
type LanguageModelV3StreamPart = {
|
1382
1453
|
type: 'text-start';
|
1383
|
-
providerMetadata?:
|
1454
|
+
providerMetadata?: SharedV3ProviderMetadata;
|
1384
1455
|
id: string;
|
1385
1456
|
} | {
|
1386
1457
|
type: 'text-delta';
|
1387
1458
|
id: string;
|
1388
|
-
providerMetadata?:
|
1459
|
+
providerMetadata?: SharedV3ProviderMetadata;
|
1389
1460
|
delta: string;
|
1390
1461
|
} | {
|
1391
1462
|
type: 'text-end';
|
1392
|
-
providerMetadata?:
|
1463
|
+
providerMetadata?: SharedV3ProviderMetadata;
|
1393
1464
|
id: string;
|
1394
1465
|
} | {
|
1395
1466
|
type: 'reasoning-start';
|
1396
|
-
providerMetadata?:
|
1467
|
+
providerMetadata?: SharedV3ProviderMetadata;
|
1397
1468
|
id: string;
|
1398
1469
|
} | {
|
1399
1470
|
type: 'reasoning-delta';
|
1400
1471
|
id: string;
|
1401
|
-
providerMetadata?:
|
1472
|
+
providerMetadata?: SharedV3ProviderMetadata;
|
1402
1473
|
delta: string;
|
1403
1474
|
} | {
|
1404
1475
|
type: 'reasoning-end';
|
1405
1476
|
id: string;
|
1406
|
-
providerMetadata?:
|
1477
|
+
providerMetadata?: SharedV3ProviderMetadata;
|
1407
1478
|
} | {
|
1408
1479
|
type: 'tool-input-start';
|
1409
1480
|
id: string;
|
1410
1481
|
toolName: string;
|
1411
|
-
providerMetadata?:
|
1482
|
+
providerMetadata?: SharedV3ProviderMetadata;
|
1412
1483
|
providerExecuted?: boolean;
|
1413
1484
|
} | {
|
1414
1485
|
type: 'tool-input-delta';
|
1415
1486
|
id: string;
|
1416
1487
|
delta: string;
|
1417
|
-
providerMetadata?:
|
1488
|
+
providerMetadata?: SharedV3ProviderMetadata;
|
1418
1489
|
} | {
|
1419
1490
|
type: 'tool-input-end';
|
1420
1491
|
id: string;
|
1421
|
-
providerMetadata?:
|
1492
|
+
providerMetadata?: SharedV3ProviderMetadata;
|
1422
1493
|
} | LanguageModelV3ToolCall | LanguageModelV3ToolResult | LanguageModelV3File | LanguageModelV3Source | {
|
1423
1494
|
type: 'stream-start';
|
1424
1495
|
warnings: Array<LanguageModelV3CallWarning>;
|
@@ -1428,7 +1499,7 @@ type LanguageModelV3StreamPart = {
|
|
1428
1499
|
type: 'finish';
|
1429
1500
|
usage: LanguageModelV3Usage;
|
1430
1501
|
finishReason: LanguageModelV3FinishReason;
|
1431
|
-
providerMetadata?:
|
1502
|
+
providerMetadata?: SharedV3ProviderMetadata;
|
1432
1503
|
} | {
|
1433
1504
|
type: 'raw';
|
1434
1505
|
rawValue: unknown;
|
@@ -1490,7 +1561,7 @@ type LanguageModelV3 = {
|
|
1490
1561
|
from the provider to the AI SDK and enable provider-specific
|
1491
1562
|
results that can be fully encapsulated in the provider.
|
1492
1563
|
*/
|
1493
|
-
providerMetadata?:
|
1564
|
+
providerMetadata?: SharedV3ProviderMetadata;
|
1494
1565
|
/**
|
1495
1566
|
Optional request information for telemetry and debugging purposes.
|
1496
1567
|
*/
|
@@ -1507,7 +1578,7 @@ type LanguageModelV3 = {
|
|
1507
1578
|
/**
|
1508
1579
|
Response headers.
|
1509
1580
|
*/
|
1510
|
-
headers?:
|
1581
|
+
headers?: SharedV3Headers;
|
1511
1582
|
/**
|
1512
1583
|
Response HTTP body.
|
1513
1584
|
*/
|
@@ -1544,7 +1615,7 @@ type LanguageModelV3 = {
|
|
1544
1615
|
/**
|
1545
1616
|
Response headers.
|
1546
1617
|
*/
|
1547
|
-
headers?:
|
1618
|
+
headers?: SharedV3Headers;
|
1548
1619
|
};
|
1549
1620
|
}>;
|
1550
1621
|
};
|
@@ -2465,8 +2536,8 @@ type LanguageModelV2Middleware = {
|
|
2465
2536
|
}) => PromiseLike<Awaited<ReturnType<LanguageModelV2['doStream']>>>;
|
2466
2537
|
};
|
2467
2538
|
|
2468
|
-
type
|
2469
|
-
type
|
2539
|
+
type SpeechModelV3ProviderOptions = Record<string, Record<string, JSONValue>>;
|
2540
|
+
type SpeechModelV3CallOptions = {
|
2470
2541
|
/**
|
2471
2542
|
* Text to convert to speech.
|
2472
2543
|
*/
|
@@ -2505,7 +2576,7 @@ type SpeechModelV2CallOptions = {
|
|
2505
2576
|
* }
|
2506
2577
|
* ```
|
2507
2578
|
*/
|
2508
|
-
providerOptions?:
|
2579
|
+
providerOptions?: SpeechModelV3ProviderOptions;
|
2509
2580
|
/**
|
2510
2581
|
* Abort signal for cancelling the operation.
|
2511
2582
|
*/
|
@@ -2521,9 +2592,9 @@ type SpeechModelV2CallOptions = {
|
|
2521
2592
|
* Warning from the model provider for this call. The call will proceed, but e.g.
|
2522
2593
|
* some settings might not be supported, which can lead to suboptimal results.
|
2523
2594
|
*/
|
2524
|
-
type
|
2595
|
+
type SpeechModelV3CallWarning = {
|
2525
2596
|
type: 'unsupported-setting';
|
2526
|
-
setting: keyof
|
2597
|
+
setting: keyof SpeechModelV3CallOptions;
|
2527
2598
|
details?: string;
|
2528
2599
|
} | {
|
2529
2600
|
type: 'other';
|
@@ -2531,9 +2602,9 @@ type SpeechModelV2CallWarning = {
|
|
2531
2602
|
};
|
2532
2603
|
|
2533
2604
|
/**
|
2534
|
-
* Speech model specification version
|
2605
|
+
* Speech model specification version 3.
|
2535
2606
|
*/
|
2536
|
-
type
|
2607
|
+
type SpeechModelV3 = {
|
2537
2608
|
/**
|
2538
2609
|
* The speech model must specify which speech model interface
|
2539
2610
|
* version it implements. This will allow us to evolve the speech
|
@@ -2541,7 +2612,7 @@ type SpeechModelV2 = {
|
|
2541
2612
|
* implementation versions can be handled as a discriminated union
|
2542
2613
|
* on our side.
|
2543
2614
|
*/
|
2544
|
-
readonly specificationVersion: '
|
2615
|
+
readonly specificationVersion: 'v3';
|
2545
2616
|
/**
|
2546
2617
|
* Name of the provider for logging purposes.
|
2547
2618
|
*/
|
@@ -2553,7 +2624,7 @@ type SpeechModelV2 = {
|
|
2553
2624
|
/**
|
2554
2625
|
* Generates speech audio from text.
|
2555
2626
|
*/
|
2556
|
-
doGenerate(options:
|
2627
|
+
doGenerate(options: SpeechModelV3CallOptions): PromiseLike<{
|
2557
2628
|
/**
|
2558
2629
|
* Generated audio as an ArrayBuffer.
|
2559
2630
|
* The audio should be returned without any unnecessary conversion.
|
@@ -2565,7 +2636,7 @@ type SpeechModelV2 = {
|
|
2565
2636
|
/**
|
2566
2637
|
* Warnings for the call, e.g. unsupported settings.
|
2567
2638
|
*/
|
2568
|
-
warnings: Array<
|
2639
|
+
warnings: Array<SpeechModelV3CallWarning>;
|
2569
2640
|
/**
|
2570
2641
|
* Optional request information for telemetry and debugging purposes.
|
2571
2642
|
*/
|
@@ -2605,8 +2676,8 @@ type SpeechModelV2 = {
|
|
2605
2676
|
}>;
|
2606
2677
|
};
|
2607
2678
|
|
2608
|
-
type
|
2609
|
-
type
|
2679
|
+
type TranscriptionModelV3ProviderOptions = Record<string, Record<string, JSONValue>>;
|
2680
|
+
type TranscriptionModelV3CallOptions = {
|
2610
2681
|
/**
|
2611
2682
|
Audio data to transcribe.
|
2612
2683
|
Accepts a `Uint8Array` or `string`, where `string` is a base64 encoded audio file.
|
@@ -2632,7 +2703,7 @@ type TranscriptionModelV2CallOptions = {
|
|
2632
2703
|
}
|
2633
2704
|
```
|
2634
2705
|
*/
|
2635
|
-
providerOptions?:
|
2706
|
+
providerOptions?: TranscriptionModelV3ProviderOptions;
|
2636
2707
|
/**
|
2637
2708
|
Abort signal for cancelling the operation.
|
2638
2709
|
*/
|
@@ -2648,9 +2719,9 @@ type TranscriptionModelV2CallOptions = {
|
|
2648
2719
|
Warning from the model provider for this call. The call will proceed, but e.g.
|
2649
2720
|
some settings might not be supported, which can lead to suboptimal results.
|
2650
2721
|
*/
|
2651
|
-
type
|
2722
|
+
type TranscriptionModelV3CallWarning = {
|
2652
2723
|
type: 'unsupported-setting';
|
2653
|
-
setting: keyof
|
2724
|
+
setting: keyof TranscriptionModelV3CallOptions;
|
2654
2725
|
details?: string;
|
2655
2726
|
} | {
|
2656
2727
|
type: 'other';
|
@@ -2658,9 +2729,9 @@ type TranscriptionModelV2CallWarning = {
|
|
2658
2729
|
};
|
2659
2730
|
|
2660
2731
|
/**
|
2661
|
-
Transcription model specification version
|
2732
|
+
Transcription model specification version 3.
|
2662
2733
|
*/
|
2663
|
-
type
|
2734
|
+
type TranscriptionModelV3 = {
|
2664
2735
|
/**
|
2665
2736
|
The transcription model must specify which transcription model interface
|
2666
2737
|
version it implements. This will allow us to evolve the transcription
|
@@ -2668,7 +2739,7 @@ type TranscriptionModelV2 = {
|
|
2668
2739
|
implementation versions can be handled as a discriminated union
|
2669
2740
|
on our side.
|
2670
2741
|
*/
|
2671
|
-
readonly specificationVersion: '
|
2742
|
+
readonly specificationVersion: 'v3';
|
2672
2743
|
/**
|
2673
2744
|
Name of the provider for logging purposes.
|
2674
2745
|
*/
|
@@ -2680,7 +2751,7 @@ type TranscriptionModelV2 = {
|
|
2680
2751
|
/**
|
2681
2752
|
Generates a transcript.
|
2682
2753
|
*/
|
2683
|
-
doGenerate(options:
|
2754
|
+
doGenerate(options: TranscriptionModelV3CallOptions): PromiseLike<{
|
2684
2755
|
/**
|
2685
2756
|
* The complete transcribed text from the audio.
|
2686
2757
|
*/
|
@@ -2716,7 +2787,7 @@ type TranscriptionModelV2 = {
|
|
2716
2787
|
/**
|
2717
2788
|
Warnings for the call, e.g. unsupported settings.
|
2718
2789
|
*/
|
2719
|
-
warnings: Array<
|
2790
|
+
warnings: Array<TranscriptionModelV3CallWarning>;
|
2720
2791
|
/**
|
2721
2792
|
Optional request information for telemetry and debugging purposes.
|
2722
2793
|
*/
|
@@ -2742,7 +2813,7 @@ type TranscriptionModelV2 = {
|
|
2742
2813
|
/**
|
2743
2814
|
Response headers.
|
2744
2815
|
*/
|
2745
|
-
headers?:
|
2816
|
+
headers?: SharedV3Headers;
|
2746
2817
|
/**
|
2747
2818
|
Response body.
|
2748
2819
|
*/
|
@@ -2800,7 +2871,7 @@ interface ProviderV3 {
|
|
2800
2871
|
|
2801
2872
|
@returns {TranscriptionModel} The transcription model associated with the id
|
2802
2873
|
*/
|
2803
|
-
transcriptionModel?(modelId: string):
|
2874
|
+
transcriptionModel?(modelId: string): TranscriptionModelV3;
|
2804
2875
|
/**
|
2805
2876
|
Returns the speech model with the given id.
|
2806
2877
|
The model id is then passed to the provider function to get the model.
|
@@ -2809,9 +2880,301 @@ interface ProviderV3 {
|
|
2809
2880
|
|
2810
2881
|
@returns {SpeechModel} The speech model associated with the id
|
2811
2882
|
*/
|
2812
|
-
speechModel?(modelId: string):
|
2883
|
+
speechModel?(modelId: string): SpeechModelV3;
|
2813
2884
|
}
|
2814
2885
|
|
2886
|
+
type SpeechModelV2ProviderOptions = Record<string, Record<string, JSONValue>>;
|
2887
|
+
type SpeechModelV2CallOptions = {
|
2888
|
+
/**
|
2889
|
+
* Text to convert to speech.
|
2890
|
+
*/
|
2891
|
+
text: string;
|
2892
|
+
/**
|
2893
|
+
* The voice to use for speech synthesis.
|
2894
|
+
* This is provider-specific and may be a voice ID, name, or other identifier.
|
2895
|
+
*/
|
2896
|
+
voice?: string;
|
2897
|
+
/**
|
2898
|
+
* The desired output format for the audio e.g. "mp3", "wav", etc.
|
2899
|
+
*/
|
2900
|
+
outputFormat?: string;
|
2901
|
+
/**
|
2902
|
+
* Instructions for the speech generation e.g. "Speak in a slow and steady tone".
|
2903
|
+
*/
|
2904
|
+
instructions?: string;
|
2905
|
+
/**
|
2906
|
+
* The speed of the speech generation.
|
2907
|
+
*/
|
2908
|
+
speed?: number;
|
2909
|
+
/**
|
2910
|
+
* The language for speech generation. This should be an ISO 639-1 language code (e.g. "en", "es", "fr")
|
2911
|
+
* or "auto" for automatic language detection. Provider support varies.
|
2912
|
+
*/
|
2913
|
+
language?: string;
|
2914
|
+
/**
|
2915
|
+
* Additional provider-specific options that are passed through to the provider
|
2916
|
+
* as body parameters.
|
2917
|
+
*
|
2918
|
+
* The outer record is keyed by the provider name, and the inner
|
2919
|
+
* record is keyed by the provider-specific metadata key.
|
2920
|
+
* ```ts
|
2921
|
+
* {
|
2922
|
+
* "openai": {}
|
2923
|
+
* }
|
2924
|
+
* ```
|
2925
|
+
*/
|
2926
|
+
providerOptions?: SpeechModelV2ProviderOptions;
|
2927
|
+
/**
|
2928
|
+
* Abort signal for cancelling the operation.
|
2929
|
+
*/
|
2930
|
+
abortSignal?: AbortSignal;
|
2931
|
+
/**
|
2932
|
+
* Additional HTTP headers to be sent with the request.
|
2933
|
+
* Only applicable for HTTP-based providers.
|
2934
|
+
*/
|
2935
|
+
headers?: Record<string, string | undefined>;
|
2936
|
+
};
|
2937
|
+
|
2938
|
+
/**
|
2939
|
+
* Warning from the model provider for this call. The call will proceed, but e.g.
|
2940
|
+
* some settings might not be supported, which can lead to suboptimal results.
|
2941
|
+
*/
|
2942
|
+
type SpeechModelV2CallWarning = {
|
2943
|
+
type: 'unsupported-setting';
|
2944
|
+
setting: keyof SpeechModelV2CallOptions;
|
2945
|
+
details?: string;
|
2946
|
+
} | {
|
2947
|
+
type: 'other';
|
2948
|
+
message: string;
|
2949
|
+
};
|
2950
|
+
|
2951
|
+
/**
|
2952
|
+
* Speech model specification version 2.
|
2953
|
+
*/
|
2954
|
+
type SpeechModelV2 = {
|
2955
|
+
/**
|
2956
|
+
* The speech model must specify which speech model interface
|
2957
|
+
* version it implements. This will allow us to evolve the speech
|
2958
|
+
* model interface and retain backwards compatibility. The different
|
2959
|
+
* implementation versions can be handled as a discriminated union
|
2960
|
+
* on our side.
|
2961
|
+
*/
|
2962
|
+
readonly specificationVersion: 'v2';
|
2963
|
+
/**
|
2964
|
+
* Name of the provider for logging purposes.
|
2965
|
+
*/
|
2966
|
+
readonly provider: string;
|
2967
|
+
/**
|
2968
|
+
* Provider-specific model ID for logging purposes.
|
2969
|
+
*/
|
2970
|
+
readonly modelId: string;
|
2971
|
+
/**
|
2972
|
+
* Generates speech audio from text.
|
2973
|
+
*/
|
2974
|
+
doGenerate(options: SpeechModelV2CallOptions): PromiseLike<{
|
2975
|
+
/**
|
2976
|
+
* Generated audio as an ArrayBuffer.
|
2977
|
+
* The audio should be returned without any unnecessary conversion.
|
2978
|
+
* If the API returns base64 encoded strings, the audio should be returned
|
2979
|
+
* as base64 encoded strings. If the API returns binary data, the audio
|
2980
|
+
* should be returned as binary data.
|
2981
|
+
*/
|
2982
|
+
audio: string | Uint8Array;
|
2983
|
+
/**
|
2984
|
+
* Warnings for the call, e.g. unsupported settings.
|
2985
|
+
*/
|
2986
|
+
warnings: Array<SpeechModelV2CallWarning>;
|
2987
|
+
/**
|
2988
|
+
* Optional request information for telemetry and debugging purposes.
|
2989
|
+
*/
|
2990
|
+
request?: {
|
2991
|
+
/**
|
2992
|
+
* Response body (available only for providers that use HTTP requests).
|
2993
|
+
*/
|
2994
|
+
body?: unknown;
|
2995
|
+
};
|
2996
|
+
/**
|
2997
|
+
* Response information for telemetry and debugging purposes.
|
2998
|
+
*/
|
2999
|
+
response: {
|
3000
|
+
/**
|
3001
|
+
* Timestamp for the start of the generated response.
|
3002
|
+
*/
|
3003
|
+
timestamp: Date;
|
3004
|
+
/**
|
3005
|
+
* The ID of the response model that was used to generate the response.
|
3006
|
+
*/
|
3007
|
+
modelId: string;
|
3008
|
+
/**
|
3009
|
+
* Response headers.
|
3010
|
+
*/
|
3011
|
+
headers?: SharedV2Headers;
|
3012
|
+
/**
|
3013
|
+
* Response body.
|
3014
|
+
*/
|
3015
|
+
body?: unknown;
|
3016
|
+
};
|
3017
|
+
/**
|
3018
|
+
* Additional provider-specific metadata. They are passed through
|
3019
|
+
* from the provider to the AI SDK and enable provider-specific
|
3020
|
+
* results that can be fully encapsulated in the provider.
|
3021
|
+
*/
|
3022
|
+
providerMetadata?: Record<string, Record<string, JSONValue>>;
|
3023
|
+
}>;
|
3024
|
+
};
|
3025
|
+
|
3026
|
+
type TranscriptionModelV2ProviderOptions = Record<string, Record<string, JSONValue>>;
|
3027
|
+
type TranscriptionModelV2CallOptions = {
|
3028
|
+
/**
|
3029
|
+
Audio data to transcribe.
|
3030
|
+
Accepts a `Uint8Array` or `string`, where `string` is a base64 encoded audio file.
|
3031
|
+
*/
|
3032
|
+
audio: Uint8Array | string;
|
3033
|
+
/**
|
3034
|
+
The IANA media type of the audio data.
|
3035
|
+
|
3036
|
+
@see https://www.iana.org/assignments/media-types/media-types.xhtml
|
3037
|
+
*/
|
3038
|
+
mediaType: string;
|
3039
|
+
/**
|
3040
|
+
Additional provider-specific options that are passed through to the provider
|
3041
|
+
as body parameters.
|
3042
|
+
|
3043
|
+
The outer record is keyed by the provider name, and the inner
|
3044
|
+
record is keyed by the provider-specific metadata key.
|
3045
|
+
```ts
|
3046
|
+
{
|
3047
|
+
"openai": {
|
3048
|
+
"timestampGranularities": ["word"]
|
3049
|
+
}
|
3050
|
+
}
|
3051
|
+
```
|
3052
|
+
*/
|
3053
|
+
providerOptions?: TranscriptionModelV2ProviderOptions;
|
3054
|
+
/**
|
3055
|
+
Abort signal for cancelling the operation.
|
3056
|
+
*/
|
3057
|
+
abortSignal?: AbortSignal;
|
3058
|
+
/**
|
3059
|
+
Additional HTTP headers to be sent with the request.
|
3060
|
+
Only applicable for HTTP-based providers.
|
3061
|
+
*/
|
3062
|
+
headers?: Record<string, string | undefined>;
|
3063
|
+
};
|
3064
|
+
|
3065
|
+
/**
|
3066
|
+
Warning from the model provider for this call. The call will proceed, but e.g.
|
3067
|
+
some settings might not be supported, which can lead to suboptimal results.
|
3068
|
+
*/
|
3069
|
+
type TranscriptionModelV2CallWarning = {
|
3070
|
+
type: 'unsupported-setting';
|
3071
|
+
setting: keyof TranscriptionModelV2CallOptions;
|
3072
|
+
details?: string;
|
3073
|
+
} | {
|
3074
|
+
type: 'other';
|
3075
|
+
message: string;
|
3076
|
+
};
|
3077
|
+
|
3078
|
+
/**
|
3079
|
+
Transcription model specification version 2.
|
3080
|
+
*/
|
3081
|
+
type TranscriptionModelV2 = {
|
3082
|
+
/**
|
3083
|
+
The transcription model must specify which transcription model interface
|
3084
|
+
version it implements. This will allow us to evolve the transcription
|
3085
|
+
model interface and retain backwards compatibility. The different
|
3086
|
+
implementation versions can be handled as a discriminated union
|
3087
|
+
on our side.
|
3088
|
+
*/
|
3089
|
+
readonly specificationVersion: 'v2';
|
3090
|
+
/**
|
3091
|
+
Name of the provider for logging purposes.
|
3092
|
+
*/
|
3093
|
+
readonly provider: string;
|
3094
|
+
/**
|
3095
|
+
Provider-specific model ID for logging purposes.
|
3096
|
+
*/
|
3097
|
+
readonly modelId: string;
|
3098
|
+
/**
|
3099
|
+
Generates a transcript.
|
3100
|
+
*/
|
3101
|
+
doGenerate(options: TranscriptionModelV2CallOptions): PromiseLike<{
|
3102
|
+
/**
|
3103
|
+
* The complete transcribed text from the audio.
|
3104
|
+
*/
|
3105
|
+
text: string;
|
3106
|
+
/**
|
3107
|
+
* Array of transcript segments with timing information.
|
3108
|
+
* Each segment represents a portion of the transcribed text with start and end times.
|
3109
|
+
*/
|
3110
|
+
segments: Array<{
|
3111
|
+
/**
|
3112
|
+
* The text content of this segment.
|
3113
|
+
*/
|
3114
|
+
text: string;
|
3115
|
+
/**
|
3116
|
+
* The start time of this segment in seconds.
|
3117
|
+
*/
|
3118
|
+
startSecond: number;
|
3119
|
+
/**
|
3120
|
+
* The end time of this segment in seconds.
|
3121
|
+
*/
|
3122
|
+
endSecond: number;
|
3123
|
+
}>;
|
3124
|
+
/**
|
3125
|
+
* The detected language of the audio content, as an ISO-639-1 code (e.g., 'en' for English).
|
3126
|
+
* May be undefined if the language couldn't be detected.
|
3127
|
+
*/
|
3128
|
+
language: string | undefined;
|
3129
|
+
/**
|
3130
|
+
* The total duration of the audio file in seconds.
|
3131
|
+
* May be undefined if the duration couldn't be determined.
|
3132
|
+
*/
|
3133
|
+
durationInSeconds: number | undefined;
|
3134
|
+
/**
|
3135
|
+
Warnings for the call, e.g. unsupported settings.
|
3136
|
+
*/
|
3137
|
+
warnings: Array<TranscriptionModelV2CallWarning>;
|
3138
|
+
/**
|
3139
|
+
Optional request information for telemetry and debugging purposes.
|
3140
|
+
*/
|
3141
|
+
request?: {
|
3142
|
+
/**
|
3143
|
+
Raw request HTTP body that was sent to the provider API as a string (JSON should be stringified).
|
3144
|
+
Non-HTTP(s) providers should not set this.
|
3145
|
+
*/
|
3146
|
+
body?: string;
|
3147
|
+
};
|
3148
|
+
/**
|
3149
|
+
Response information for telemetry and debugging purposes.
|
3150
|
+
*/
|
3151
|
+
response: {
|
3152
|
+
/**
|
3153
|
+
Timestamp for the start of the generated response.
|
3154
|
+
*/
|
3155
|
+
timestamp: Date;
|
3156
|
+
/**
|
3157
|
+
The ID of the response model that was used to generate the response.
|
3158
|
+
*/
|
3159
|
+
modelId: string;
|
3160
|
+
/**
|
3161
|
+
Response headers.
|
3162
|
+
*/
|
3163
|
+
headers?: SharedV2Headers;
|
3164
|
+
/**
|
3165
|
+
Response body.
|
3166
|
+
*/
|
3167
|
+
body?: unknown;
|
3168
|
+
};
|
3169
|
+
/**
|
3170
|
+
Additional provider-specific metadata. They are passed through
|
3171
|
+
from the provider to the AI SDK and enable provider-specific
|
3172
|
+
results that can be fully encapsulated in the provider.
|
3173
|
+
*/
|
3174
|
+
providerMetadata?: Record<string, Record<string, JSONValue>>;
|
3175
|
+
}>;
|
3176
|
+
};
|
3177
|
+
|
2815
3178
|
/**
|
2816
3179
|
* Provider for language, text embedding, and image generation models.
|
2817
3180
|
*/
|
@@ -2867,4 +3230,4 @@ interface ProviderV2 {
|
|
2867
3230
|
speechModel?(modelId: string): SpeechModelV2;
|
2868
3231
|
}
|
2869
3232
|
|
2870
|
-
export { AISDKError, APICallError, type EmbeddingModelV2, type EmbeddingModelV2Embedding, type EmbeddingModelV3, type EmbeddingModelV3Embedding, EmptyResponseBodyError, type ImageModelV2, type ImageModelV2CallOptions, type ImageModelV2CallWarning, type ImageModelV2ProviderMetadata, type ImageModelV3, type ImageModelV3CallOptions, type ImageModelV3CallWarning, type ImageModelV3ProviderMetadata, InvalidArgumentError, InvalidPromptError, InvalidResponseDataError, type JSONArray, type JSONObject, JSONParseError, type JSONValue, type LanguageModelV2, type LanguageModelV2CallOptions, type LanguageModelV2CallWarning, type LanguageModelV2Content, type LanguageModelV2DataContent, type LanguageModelV2File, type LanguageModelV2FilePart, type LanguageModelV2FinishReason, type LanguageModelV2FunctionTool, type LanguageModelV2Message, type LanguageModelV2Middleware, type LanguageModelV2Prompt, type LanguageModelV2ProviderDefinedTool, type LanguageModelV2Reasoning, type LanguageModelV2ReasoningPart, type LanguageModelV2ResponseMetadata, type LanguageModelV2Source, type LanguageModelV2StreamPart, type LanguageModelV2Text, type LanguageModelV2TextPart, type LanguageModelV2ToolCall, type LanguageModelV2ToolCallPart, type LanguageModelV2ToolChoice, type LanguageModelV2ToolResultOutput, type LanguageModelV2ToolResultPart, type LanguageModelV2Usage, type LanguageModelV3, type LanguageModelV3CallOptions, type LanguageModelV3CallWarning, type LanguageModelV3Content, type LanguageModelV3DataContent, type LanguageModelV3File, type LanguageModelV3FilePart, type LanguageModelV3FinishReason, type LanguageModelV3FunctionTool, type LanguageModelV3Message, type LanguageModelV3Middleware, type LanguageModelV3Prompt, type LanguageModelV3ProviderDefinedTool, type LanguageModelV3Reasoning, type LanguageModelV3ReasoningPart, type LanguageModelV3ResponseMetadata, type LanguageModelV3Source, type LanguageModelV3StreamPart, type LanguageModelV3Text, type LanguageModelV3TextPart, type LanguageModelV3ToolCall, type LanguageModelV3ToolCallPart, type LanguageModelV3ToolChoice, type LanguageModelV3ToolResult, type LanguageModelV3ToolResultOutput, type LanguageModelV3ToolResultPart, type LanguageModelV3Usage, LoadAPIKeyError, LoadSettingError, NoContentGeneratedError, NoSuchModelError, type ProviderV2, type ProviderV3, type SharedV2Headers, type SharedV2ProviderMetadata, type SharedV2ProviderOptions, type SpeechModelV2, type SpeechModelV2CallOptions, type SpeechModelV2CallWarning, TooManyEmbeddingValuesForCallError, type TranscriptionModelV2, type TranscriptionModelV2CallOptions, type TranscriptionModelV2CallWarning, TypeValidationError, UnsupportedFunctionalityError, getErrorMessage, isJSONArray, isJSONObject, isJSONValue };
|
3233
|
+
export { AISDKError, APICallError, type EmbeddingModelV2, type EmbeddingModelV2Embedding, type EmbeddingModelV3, type EmbeddingModelV3Embedding, EmptyResponseBodyError, type ImageModelV2, type ImageModelV2CallOptions, type ImageModelV2CallWarning, type ImageModelV2ProviderMetadata, type ImageModelV3, type ImageModelV3CallOptions, type ImageModelV3CallWarning, type ImageModelV3ProviderMetadata, InvalidArgumentError, InvalidPromptError, InvalidResponseDataError, type JSONArray, type JSONObject, JSONParseError, type JSONValue, type LanguageModelV2, type LanguageModelV2CallOptions, type LanguageModelV2CallWarning, type LanguageModelV2Content, type LanguageModelV2DataContent, type LanguageModelV2File, type LanguageModelV2FilePart, type LanguageModelV2FinishReason, type LanguageModelV2FunctionTool, type LanguageModelV2Message, type LanguageModelV2Middleware, type LanguageModelV2Prompt, type LanguageModelV2ProviderDefinedTool, type LanguageModelV2Reasoning, type LanguageModelV2ReasoningPart, type LanguageModelV2ResponseMetadata, type LanguageModelV2Source, type LanguageModelV2StreamPart, type LanguageModelV2Text, type LanguageModelV2TextPart, type LanguageModelV2ToolCall, type LanguageModelV2ToolCallPart, type LanguageModelV2ToolChoice, type LanguageModelV2ToolResultOutput, type LanguageModelV2ToolResultPart, type LanguageModelV2Usage, type LanguageModelV3, type LanguageModelV3CallOptions, type LanguageModelV3CallWarning, type LanguageModelV3Content, type LanguageModelV3DataContent, type LanguageModelV3File, type LanguageModelV3FilePart, type LanguageModelV3FinishReason, type LanguageModelV3FunctionTool, type LanguageModelV3Message, type LanguageModelV3Middleware, type LanguageModelV3Prompt, type LanguageModelV3ProviderDefinedTool, type LanguageModelV3Reasoning, type LanguageModelV3ReasoningPart, type LanguageModelV3ResponseMetadata, type LanguageModelV3Source, type LanguageModelV3StreamPart, type LanguageModelV3Text, type LanguageModelV3TextPart, type LanguageModelV3ToolCall, type LanguageModelV3ToolCallPart, type LanguageModelV3ToolChoice, type LanguageModelV3ToolResult, type LanguageModelV3ToolResultOutput, type LanguageModelV3ToolResultPart, type LanguageModelV3Usage, LoadAPIKeyError, LoadSettingError, NoContentGeneratedError, NoSuchModelError, type ProviderV2, type ProviderV3, type SharedV2Headers, type SharedV2ProviderMetadata, type SharedV2ProviderOptions, type SharedV3Headers, type SharedV3ProviderMetadata, type SharedV3ProviderOptions, type SpeechModelV2, type SpeechModelV2CallOptions, type SpeechModelV2CallWarning, type SpeechModelV3, type SpeechModelV3CallOptions, type SpeechModelV3CallWarning, TooManyEmbeddingValuesForCallError, type TranscriptionModelV2, type TranscriptionModelV2CallOptions, type TranscriptionModelV2CallWarning, type TranscriptionModelV3, type TranscriptionModelV3CallOptions, type TranscriptionModelV3CallWarning, TypeValidationError, UnsupportedFunctionalityError, getErrorMessage, isJSONArray, isJSONObject, isJSONValue };
|