@ai-sdk/provider 2.1.0-beta.4 → 3.0.0-beta.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +37 -0
- package/dist/index.d.mts +495 -33
- package/dist/index.d.ts +495 -33
- package/package.json +1 -1
package/dist/index.d.ts
CHANGED
|
@@ -59,6 +59,32 @@ type SharedV3ProviderMetadata = Record<string, Record<string, JSONValue>>;
|
|
|
59
59
|
*/
|
|
60
60
|
type SharedV3ProviderOptions = Record<string, Record<string, JSONValue>>;
|
|
61
61
|
|
|
62
|
+
/**
|
|
63
|
+
* Warning from the model that certain features are e.g. unsupported or that compatibility
|
|
64
|
+
* functionality is used (which might lead to suboptimal results).
|
|
65
|
+
*/
|
|
66
|
+
type SharedV3Warning = {
|
|
67
|
+
/**
|
|
68
|
+
* A configuration setting is not supported by the model.
|
|
69
|
+
*/
|
|
70
|
+
type: 'unsupported-setting';
|
|
71
|
+
setting: string;
|
|
72
|
+
details?: string;
|
|
73
|
+
} | {
|
|
74
|
+
/**
|
|
75
|
+
* A compatibility feature is used that might lead to suboptimal results.
|
|
76
|
+
*/
|
|
77
|
+
type: 'compatibility';
|
|
78
|
+
feature: string;
|
|
79
|
+
details?: string;
|
|
80
|
+
} | {
|
|
81
|
+
/**
|
|
82
|
+
* Other warning.
|
|
83
|
+
*/
|
|
84
|
+
type: 'other';
|
|
85
|
+
message: string;
|
|
86
|
+
};
|
|
87
|
+
|
|
62
88
|
type SharedV2Headers = Record<string, string>;
|
|
63
89
|
|
|
64
90
|
/**
|
|
@@ -1016,18 +1042,53 @@ interface LanguageModelV3ToolResultPart {
|
|
|
1016
1042
|
*/
|
|
1017
1043
|
providerOptions?: SharedV3ProviderOptions;
|
|
1018
1044
|
}
|
|
1045
|
+
/**
|
|
1046
|
+
* Result of a tool call.
|
|
1047
|
+
*/
|
|
1019
1048
|
type LanguageModelV3ToolResultOutput = {
|
|
1049
|
+
/**
|
|
1050
|
+
* Text tool output that should be directly sent to the API.
|
|
1051
|
+
*/
|
|
1020
1052
|
type: 'text';
|
|
1021
1053
|
value: string;
|
|
1054
|
+
/**
|
|
1055
|
+
* Provider-specific options.
|
|
1056
|
+
*/
|
|
1057
|
+
providerOptions?: SharedV3ProviderOptions;
|
|
1022
1058
|
} | {
|
|
1023
1059
|
type: 'json';
|
|
1024
1060
|
value: JSONValue;
|
|
1061
|
+
/**
|
|
1062
|
+
* Provider-specific options.
|
|
1063
|
+
*/
|
|
1064
|
+
providerOptions?: SharedV3ProviderOptions;
|
|
1065
|
+
} | {
|
|
1066
|
+
/**
|
|
1067
|
+
* Type when the user has denied the execution of the tool call.
|
|
1068
|
+
*/
|
|
1069
|
+
type: 'execution-denied';
|
|
1070
|
+
/**
|
|
1071
|
+
* Optional reason for the execution denial.
|
|
1072
|
+
*/
|
|
1073
|
+
reason?: string;
|
|
1074
|
+
/**
|
|
1075
|
+
* Provider-specific options.
|
|
1076
|
+
*/
|
|
1077
|
+
providerOptions?: SharedV3ProviderOptions;
|
|
1025
1078
|
} | {
|
|
1026
1079
|
type: 'error-text';
|
|
1027
1080
|
value: string;
|
|
1081
|
+
/**
|
|
1082
|
+
* Provider-specific options.
|
|
1083
|
+
*/
|
|
1084
|
+
providerOptions?: SharedV3ProviderOptions;
|
|
1028
1085
|
} | {
|
|
1029
1086
|
type: 'error-json';
|
|
1030
1087
|
value: JSONValue;
|
|
1088
|
+
/**
|
|
1089
|
+
* Provider-specific options.
|
|
1090
|
+
*/
|
|
1091
|
+
providerOptions?: SharedV3ProviderOptions;
|
|
1031
1092
|
} | {
|
|
1032
1093
|
type: 'content';
|
|
1033
1094
|
value: Array<{
|
|
@@ -1036,8 +1097,12 @@ type LanguageModelV3ToolResultOutput = {
|
|
|
1036
1097
|
Text content.
|
|
1037
1098
|
*/
|
|
1038
1099
|
text: string;
|
|
1100
|
+
/**
|
|
1101
|
+
* Provider-specific options.
|
|
1102
|
+
*/
|
|
1103
|
+
providerOptions?: SharedV3ProviderOptions;
|
|
1039
1104
|
} | {
|
|
1040
|
-
type: '
|
|
1105
|
+
type: 'file-data';
|
|
1041
1106
|
/**
|
|
1042
1107
|
Base-64 encoded media data.
|
|
1043
1108
|
*/
|
|
@@ -1047,28 +1112,120 @@ IANA media type.
|
|
|
1047
1112
|
@see https://www.iana.org/assignments/media-types/media-types.xhtml
|
|
1048
1113
|
*/
|
|
1049
1114
|
mediaType: string;
|
|
1115
|
+
/**
|
|
1116
|
+
* Optional filename of the file.
|
|
1117
|
+
*/
|
|
1118
|
+
filename?: string;
|
|
1119
|
+
/**
|
|
1120
|
+
* Provider-specific options.
|
|
1121
|
+
*/
|
|
1122
|
+
providerOptions?: SharedV3ProviderOptions;
|
|
1123
|
+
} | {
|
|
1124
|
+
type: 'file-url';
|
|
1125
|
+
/**
|
|
1126
|
+
* URL of the file.
|
|
1127
|
+
*/
|
|
1128
|
+
url: string;
|
|
1129
|
+
/**
|
|
1130
|
+
* Provider-specific options.
|
|
1131
|
+
*/
|
|
1132
|
+
providerOptions?: SharedV3ProviderOptions;
|
|
1133
|
+
} | {
|
|
1134
|
+
type: 'file-id';
|
|
1135
|
+
/**
|
|
1136
|
+
* ID of the file.
|
|
1137
|
+
*
|
|
1138
|
+
* If you use multiple providers, you need to
|
|
1139
|
+
* specify the provider specific ids using
|
|
1140
|
+
* the Record option. The key is the provider
|
|
1141
|
+
* name, e.g. 'openai' or 'anthropic'.
|
|
1142
|
+
*/
|
|
1143
|
+
fileId: string | Record<string, string>;
|
|
1144
|
+
/**
|
|
1145
|
+
* Provider-specific options.
|
|
1146
|
+
*/
|
|
1147
|
+
providerOptions?: SharedV3ProviderOptions;
|
|
1148
|
+
} | {
|
|
1149
|
+
/**
|
|
1150
|
+
* Images that are referenced using base64 encoded data.
|
|
1151
|
+
*/
|
|
1152
|
+
type: 'image-data';
|
|
1153
|
+
/**
|
|
1154
|
+
Base-64 encoded image data.
|
|
1155
|
+
*/
|
|
1156
|
+
data: string;
|
|
1157
|
+
/**
|
|
1158
|
+
IANA media type.
|
|
1159
|
+
@see https://www.iana.org/assignments/media-types/media-types.xhtml
|
|
1160
|
+
*/
|
|
1161
|
+
mediaType: string;
|
|
1162
|
+
/**
|
|
1163
|
+
* Provider-specific options.
|
|
1164
|
+
*/
|
|
1165
|
+
providerOptions?: SharedV3ProviderOptions;
|
|
1166
|
+
} | {
|
|
1167
|
+
/**
|
|
1168
|
+
* Images that are referenced using a URL.
|
|
1169
|
+
*/
|
|
1170
|
+
type: 'image-url';
|
|
1171
|
+
/**
|
|
1172
|
+
* URL of the image.
|
|
1173
|
+
*/
|
|
1174
|
+
url: string;
|
|
1175
|
+
/**
|
|
1176
|
+
* Provider-specific options.
|
|
1177
|
+
*/
|
|
1178
|
+
providerOptions?: SharedV3ProviderOptions;
|
|
1179
|
+
} | {
|
|
1180
|
+
/**
|
|
1181
|
+
* Images that are referenced using a provider file id.
|
|
1182
|
+
*/
|
|
1183
|
+
type: 'image-file-id';
|
|
1184
|
+
/**
|
|
1185
|
+
* Image that is referenced using a provider file id.
|
|
1186
|
+
*
|
|
1187
|
+
* If you use multiple providers, you need to
|
|
1188
|
+
* specify the provider specific ids using
|
|
1189
|
+
* the Record option. The key is the provider
|
|
1190
|
+
* name, e.g. 'openai' or 'anthropic'.
|
|
1191
|
+
*/
|
|
1192
|
+
fileId: string | Record<string, string>;
|
|
1193
|
+
/**
|
|
1194
|
+
* Provider-specific options.
|
|
1195
|
+
*/
|
|
1196
|
+
providerOptions?: SharedV3ProviderOptions;
|
|
1197
|
+
} | {
|
|
1198
|
+
/**
|
|
1199
|
+
* Custom content part. This can be used to implement
|
|
1200
|
+
* provider-specific content parts.
|
|
1201
|
+
*/
|
|
1202
|
+
type: 'custom';
|
|
1203
|
+
/**
|
|
1204
|
+
* Provider-specific options.
|
|
1205
|
+
*/
|
|
1206
|
+
providerOptions?: SharedV3ProviderOptions;
|
|
1050
1207
|
}>;
|
|
1051
1208
|
};
|
|
1052
1209
|
|
|
1053
1210
|
/**
|
|
1054
|
-
The configuration of a tool that is defined by the provider.
|
|
1211
|
+
* The configuration of a tool that is defined by the provider.
|
|
1055
1212
|
*/
|
|
1056
1213
|
type LanguageModelV3ProviderDefinedTool = {
|
|
1057
1214
|
/**
|
|
1058
|
-
|
|
1215
|
+
* The type of the tool (always 'provider-defined').
|
|
1059
1216
|
*/
|
|
1060
1217
|
type: 'provider-defined';
|
|
1061
1218
|
/**
|
|
1062
|
-
|
|
1219
|
+
* The ID of the tool. Should follow the format `<provider-id>.<unique-tool-name>`.
|
|
1063
1220
|
*/
|
|
1064
1221
|
id: `${string}.${string}`;
|
|
1065
1222
|
/**
|
|
1066
|
-
|
|
1223
|
+
* The name of the tool that the user must use in the tool set.
|
|
1067
1224
|
*/
|
|
1068
1225
|
name: string;
|
|
1069
1226
|
/**
|
|
1070
|
-
|
|
1071
|
-
|
|
1227
|
+
* The arguments for configuring the tool. Must match the expected arguments defined by the provider for this tool.
|
|
1228
|
+
*/
|
|
1072
1229
|
args: Record<string, unknown>;
|
|
1073
1230
|
};
|
|
1074
1231
|
|
|
@@ -1325,6 +1482,11 @@ type LanguageModelV3ToolCall = {
|
|
|
1325
1482
|
* If this flag is not set or is false, the tool call will be executed by the client.
|
|
1326
1483
|
*/
|
|
1327
1484
|
providerExecuted?: boolean;
|
|
1485
|
+
/**
|
|
1486
|
+
* Whether the tool is dynamic, i.e. defined at runtime.
|
|
1487
|
+
* For example, MCP (Model Context Protocol) tools that are executed by the provider.
|
|
1488
|
+
*/
|
|
1489
|
+
dynamic?: boolean;
|
|
1328
1490
|
/**
|
|
1329
1491
|
* Additional provider-specific metadata for the tool call.
|
|
1330
1492
|
*/
|
|
@@ -1369,6 +1531,11 @@ type LanguageModelV3ToolResult = {
|
|
|
1369
1531
|
* If this flag is not set or is false, the tool result is not preliminary.
|
|
1370
1532
|
*/
|
|
1371
1533
|
preliminary?: boolean;
|
|
1534
|
+
/**
|
|
1535
|
+
* Whether the tool is dynamic, i.e. defined at runtime.
|
|
1536
|
+
* For example, MCP (Model Context Protocol) tools that are executed by the provider.
|
|
1537
|
+
*/
|
|
1538
|
+
dynamic?: boolean;
|
|
1372
1539
|
/**
|
|
1373
1540
|
* Additional provider-specific metadata for the tool result.
|
|
1374
1541
|
*/
|
|
@@ -1469,6 +1636,8 @@ type LanguageModelV3StreamPart = {
|
|
|
1469
1636
|
toolName: string;
|
|
1470
1637
|
providerMetadata?: SharedV3ProviderMetadata;
|
|
1471
1638
|
providerExecuted?: boolean;
|
|
1639
|
+
dynamic?: boolean;
|
|
1640
|
+
title?: string;
|
|
1472
1641
|
} | {
|
|
1473
1642
|
type: 'tool-input-delta';
|
|
1474
1643
|
id: string;
|
|
@@ -1505,11 +1674,11 @@ type LanguageModelV3 = {
|
|
|
1505
1674
|
*/
|
|
1506
1675
|
readonly specificationVersion: 'v3';
|
|
1507
1676
|
/**
|
|
1508
|
-
|
|
1677
|
+
Provider ID.
|
|
1509
1678
|
*/
|
|
1510
1679
|
readonly provider: string;
|
|
1511
1680
|
/**
|
|
1512
|
-
Provider-specific model ID
|
|
1681
|
+
Provider-specific model ID.
|
|
1513
1682
|
*/
|
|
1514
1683
|
readonly modelId: string;
|
|
1515
1684
|
/**
|
|
@@ -2524,8 +2693,8 @@ type LanguageModelV2Middleware = {
|
|
|
2524
2693
|
}) => PromiseLike<Awaited<ReturnType<LanguageModelV2['doStream']>>>;
|
|
2525
2694
|
};
|
|
2526
2695
|
|
|
2527
|
-
type
|
|
2528
|
-
type
|
|
2696
|
+
type SpeechModelV3ProviderOptions = Record<string, Record<string, JSONValue>>;
|
|
2697
|
+
type SpeechModelV3CallOptions = {
|
|
2529
2698
|
/**
|
|
2530
2699
|
* Text to convert to speech.
|
|
2531
2700
|
*/
|
|
@@ -2564,7 +2733,7 @@ type SpeechModelV2CallOptions = {
|
|
|
2564
2733
|
* }
|
|
2565
2734
|
* ```
|
|
2566
2735
|
*/
|
|
2567
|
-
providerOptions?:
|
|
2736
|
+
providerOptions?: SpeechModelV3ProviderOptions;
|
|
2568
2737
|
/**
|
|
2569
2738
|
* Abort signal for cancelling the operation.
|
|
2570
2739
|
*/
|
|
@@ -2580,9 +2749,9 @@ type SpeechModelV2CallOptions = {
|
|
|
2580
2749
|
* Warning from the model provider for this call. The call will proceed, but e.g.
|
|
2581
2750
|
* some settings might not be supported, which can lead to suboptimal results.
|
|
2582
2751
|
*/
|
|
2583
|
-
type
|
|
2752
|
+
type SpeechModelV3CallWarning = {
|
|
2584
2753
|
type: 'unsupported-setting';
|
|
2585
|
-
setting: keyof
|
|
2754
|
+
setting: keyof SpeechModelV3CallOptions;
|
|
2586
2755
|
details?: string;
|
|
2587
2756
|
} | {
|
|
2588
2757
|
type: 'other';
|
|
@@ -2590,9 +2759,9 @@ type SpeechModelV2CallWarning = {
|
|
|
2590
2759
|
};
|
|
2591
2760
|
|
|
2592
2761
|
/**
|
|
2593
|
-
* Speech model specification version
|
|
2762
|
+
* Speech model specification version 3.
|
|
2594
2763
|
*/
|
|
2595
|
-
type
|
|
2764
|
+
type SpeechModelV3 = {
|
|
2596
2765
|
/**
|
|
2597
2766
|
* The speech model must specify which speech model interface
|
|
2598
2767
|
* version it implements. This will allow us to evolve the speech
|
|
@@ -2600,7 +2769,7 @@ type SpeechModelV2 = {
|
|
|
2600
2769
|
* implementation versions can be handled as a discriminated union
|
|
2601
2770
|
* on our side.
|
|
2602
2771
|
*/
|
|
2603
|
-
readonly specificationVersion: '
|
|
2772
|
+
readonly specificationVersion: 'v3';
|
|
2604
2773
|
/**
|
|
2605
2774
|
* Name of the provider for logging purposes.
|
|
2606
2775
|
*/
|
|
@@ -2612,7 +2781,7 @@ type SpeechModelV2 = {
|
|
|
2612
2781
|
/**
|
|
2613
2782
|
* Generates speech audio from text.
|
|
2614
2783
|
*/
|
|
2615
|
-
doGenerate(options:
|
|
2784
|
+
doGenerate(options: SpeechModelV3CallOptions): PromiseLike<{
|
|
2616
2785
|
/**
|
|
2617
2786
|
* Generated audio as an ArrayBuffer.
|
|
2618
2787
|
* The audio should be returned without any unnecessary conversion.
|
|
@@ -2624,7 +2793,7 @@ type SpeechModelV2 = {
|
|
|
2624
2793
|
/**
|
|
2625
2794
|
* Warnings for the call, e.g. unsupported settings.
|
|
2626
2795
|
*/
|
|
2627
|
-
warnings: Array<
|
|
2796
|
+
warnings: Array<SpeechModelV3CallWarning>;
|
|
2628
2797
|
/**
|
|
2629
2798
|
* Optional request information for telemetry and debugging purposes.
|
|
2630
2799
|
*/
|
|
@@ -2664,8 +2833,8 @@ type SpeechModelV2 = {
|
|
|
2664
2833
|
}>;
|
|
2665
2834
|
};
|
|
2666
2835
|
|
|
2667
|
-
type
|
|
2668
|
-
type
|
|
2836
|
+
type TranscriptionModelV3ProviderOptions = Record<string, Record<string, JSONValue>>;
|
|
2837
|
+
type TranscriptionModelV3CallOptions = {
|
|
2669
2838
|
/**
|
|
2670
2839
|
Audio data to transcribe.
|
|
2671
2840
|
Accepts a `Uint8Array` or `string`, where `string` is a base64 encoded audio file.
|
|
@@ -2691,7 +2860,7 @@ type TranscriptionModelV2CallOptions = {
|
|
|
2691
2860
|
}
|
|
2692
2861
|
```
|
|
2693
2862
|
*/
|
|
2694
|
-
providerOptions?:
|
|
2863
|
+
providerOptions?: TranscriptionModelV3ProviderOptions;
|
|
2695
2864
|
/**
|
|
2696
2865
|
Abort signal for cancelling the operation.
|
|
2697
2866
|
*/
|
|
@@ -2707,9 +2876,9 @@ type TranscriptionModelV2CallOptions = {
|
|
|
2707
2876
|
Warning from the model provider for this call. The call will proceed, but e.g.
|
|
2708
2877
|
some settings might not be supported, which can lead to suboptimal results.
|
|
2709
2878
|
*/
|
|
2710
|
-
type
|
|
2879
|
+
type TranscriptionModelV3CallWarning = {
|
|
2711
2880
|
type: 'unsupported-setting';
|
|
2712
|
-
setting: keyof
|
|
2881
|
+
setting: keyof TranscriptionModelV3CallOptions;
|
|
2713
2882
|
details?: string;
|
|
2714
2883
|
} | {
|
|
2715
2884
|
type: 'other';
|
|
@@ -2717,9 +2886,9 @@ type TranscriptionModelV2CallWarning = {
|
|
|
2717
2886
|
};
|
|
2718
2887
|
|
|
2719
2888
|
/**
|
|
2720
|
-
Transcription model specification version
|
|
2889
|
+
Transcription model specification version 3.
|
|
2721
2890
|
*/
|
|
2722
|
-
type
|
|
2891
|
+
type TranscriptionModelV3 = {
|
|
2723
2892
|
/**
|
|
2724
2893
|
The transcription model must specify which transcription model interface
|
|
2725
2894
|
version it implements. This will allow us to evolve the transcription
|
|
@@ -2727,7 +2896,7 @@ type TranscriptionModelV2 = {
|
|
|
2727
2896
|
implementation versions can be handled as a discriminated union
|
|
2728
2897
|
on our side.
|
|
2729
2898
|
*/
|
|
2730
|
-
readonly specificationVersion: '
|
|
2899
|
+
readonly specificationVersion: 'v3';
|
|
2731
2900
|
/**
|
|
2732
2901
|
Name of the provider for logging purposes.
|
|
2733
2902
|
*/
|
|
@@ -2739,7 +2908,7 @@ type TranscriptionModelV2 = {
|
|
|
2739
2908
|
/**
|
|
2740
2909
|
Generates a transcript.
|
|
2741
2910
|
*/
|
|
2742
|
-
doGenerate(options:
|
|
2911
|
+
doGenerate(options: TranscriptionModelV3CallOptions): PromiseLike<{
|
|
2743
2912
|
/**
|
|
2744
2913
|
* The complete transcribed text from the audio.
|
|
2745
2914
|
*/
|
|
@@ -2775,7 +2944,7 @@ type TranscriptionModelV2 = {
|
|
|
2775
2944
|
/**
|
|
2776
2945
|
Warnings for the call, e.g. unsupported settings.
|
|
2777
2946
|
*/
|
|
2778
|
-
warnings: Array<
|
|
2947
|
+
warnings: Array<TranscriptionModelV3CallWarning>;
|
|
2779
2948
|
/**
|
|
2780
2949
|
Optional request information for telemetry and debugging purposes.
|
|
2781
2950
|
*/
|
|
@@ -2801,7 +2970,7 @@ type TranscriptionModelV2 = {
|
|
|
2801
2970
|
/**
|
|
2802
2971
|
Response headers.
|
|
2803
2972
|
*/
|
|
2804
|
-
headers?:
|
|
2973
|
+
headers?: SharedV3Headers;
|
|
2805
2974
|
/**
|
|
2806
2975
|
Response body.
|
|
2807
2976
|
*/
|
|
@@ -2820,6 +2989,7 @@ type TranscriptionModelV2 = {
|
|
|
2820
2989
|
* Provider for language, text embedding, and image generation models.
|
|
2821
2990
|
*/
|
|
2822
2991
|
interface ProviderV3 {
|
|
2992
|
+
readonly specificationVersion: 'v3';
|
|
2823
2993
|
/**
|
|
2824
2994
|
Returns the language model with the given id.
|
|
2825
2995
|
The model id is then passed to the provider function to get the model.
|
|
@@ -2859,7 +3029,7 @@ interface ProviderV3 {
|
|
|
2859
3029
|
|
|
2860
3030
|
@returns {TranscriptionModel} The transcription model associated with the id
|
|
2861
3031
|
*/
|
|
2862
|
-
transcriptionModel?(modelId: string):
|
|
3032
|
+
transcriptionModel?(modelId: string): TranscriptionModelV3;
|
|
2863
3033
|
/**
|
|
2864
3034
|
Returns the speech model with the given id.
|
|
2865
3035
|
The model id is then passed to the provider function to get the model.
|
|
@@ -2868,9 +3038,301 @@ interface ProviderV3 {
|
|
|
2868
3038
|
|
|
2869
3039
|
@returns {SpeechModel} The speech model associated with the id
|
|
2870
3040
|
*/
|
|
2871
|
-
speechModel?(modelId: string):
|
|
3041
|
+
speechModel?(modelId: string): SpeechModelV3;
|
|
2872
3042
|
}
|
|
2873
3043
|
|
|
3044
|
+
type SpeechModelV2ProviderOptions = Record<string, Record<string, JSONValue>>;
|
|
3045
|
+
type SpeechModelV2CallOptions = {
|
|
3046
|
+
/**
|
|
3047
|
+
* Text to convert to speech.
|
|
3048
|
+
*/
|
|
3049
|
+
text: string;
|
|
3050
|
+
/**
|
|
3051
|
+
* The voice to use for speech synthesis.
|
|
3052
|
+
* This is provider-specific and may be a voice ID, name, or other identifier.
|
|
3053
|
+
*/
|
|
3054
|
+
voice?: string;
|
|
3055
|
+
/**
|
|
3056
|
+
* The desired output format for the audio e.g. "mp3", "wav", etc.
|
|
3057
|
+
*/
|
|
3058
|
+
outputFormat?: string;
|
|
3059
|
+
/**
|
|
3060
|
+
* Instructions for the speech generation e.g. "Speak in a slow and steady tone".
|
|
3061
|
+
*/
|
|
3062
|
+
instructions?: string;
|
|
3063
|
+
/**
|
|
3064
|
+
* The speed of the speech generation.
|
|
3065
|
+
*/
|
|
3066
|
+
speed?: number;
|
|
3067
|
+
/**
|
|
3068
|
+
* The language for speech generation. This should be an ISO 639-1 language code (e.g. "en", "es", "fr")
|
|
3069
|
+
* or "auto" for automatic language detection. Provider support varies.
|
|
3070
|
+
*/
|
|
3071
|
+
language?: string;
|
|
3072
|
+
/**
|
|
3073
|
+
* Additional provider-specific options that are passed through to the provider
|
|
3074
|
+
* as body parameters.
|
|
3075
|
+
*
|
|
3076
|
+
* The outer record is keyed by the provider name, and the inner
|
|
3077
|
+
* record is keyed by the provider-specific metadata key.
|
|
3078
|
+
* ```ts
|
|
3079
|
+
* {
|
|
3080
|
+
* "openai": {}
|
|
3081
|
+
* }
|
|
3082
|
+
* ```
|
|
3083
|
+
*/
|
|
3084
|
+
providerOptions?: SpeechModelV2ProviderOptions;
|
|
3085
|
+
/**
|
|
3086
|
+
* Abort signal for cancelling the operation.
|
|
3087
|
+
*/
|
|
3088
|
+
abortSignal?: AbortSignal;
|
|
3089
|
+
/**
|
|
3090
|
+
* Additional HTTP headers to be sent with the request.
|
|
3091
|
+
* Only applicable for HTTP-based providers.
|
|
3092
|
+
*/
|
|
3093
|
+
headers?: Record<string, string | undefined>;
|
|
3094
|
+
};
|
|
3095
|
+
|
|
3096
|
+
/**
|
|
3097
|
+
* Warning from the model provider for this call. The call will proceed, but e.g.
|
|
3098
|
+
* some settings might not be supported, which can lead to suboptimal results.
|
|
3099
|
+
*/
|
|
3100
|
+
type SpeechModelV2CallWarning = {
|
|
3101
|
+
type: 'unsupported-setting';
|
|
3102
|
+
setting: keyof SpeechModelV2CallOptions;
|
|
3103
|
+
details?: string;
|
|
3104
|
+
} | {
|
|
3105
|
+
type: 'other';
|
|
3106
|
+
message: string;
|
|
3107
|
+
};
|
|
3108
|
+
|
|
3109
|
+
/**
|
|
3110
|
+
* Speech model specification version 2.
|
|
3111
|
+
*/
|
|
3112
|
+
type SpeechModelV2 = {
|
|
3113
|
+
/**
|
|
3114
|
+
* The speech model must specify which speech model interface
|
|
3115
|
+
* version it implements. This will allow us to evolve the speech
|
|
3116
|
+
* model interface and retain backwards compatibility. The different
|
|
3117
|
+
* implementation versions can be handled as a discriminated union
|
|
3118
|
+
* on our side.
|
|
3119
|
+
*/
|
|
3120
|
+
readonly specificationVersion: 'v2';
|
|
3121
|
+
/**
|
|
3122
|
+
* Name of the provider for logging purposes.
|
|
3123
|
+
*/
|
|
3124
|
+
readonly provider: string;
|
|
3125
|
+
/**
|
|
3126
|
+
* Provider-specific model ID for logging purposes.
|
|
3127
|
+
*/
|
|
3128
|
+
readonly modelId: string;
|
|
3129
|
+
/**
|
|
3130
|
+
* Generates speech audio from text.
|
|
3131
|
+
*/
|
|
3132
|
+
doGenerate(options: SpeechModelV2CallOptions): PromiseLike<{
|
|
3133
|
+
/**
|
|
3134
|
+
* Generated audio as an ArrayBuffer.
|
|
3135
|
+
* The audio should be returned without any unnecessary conversion.
|
|
3136
|
+
* If the API returns base64 encoded strings, the audio should be returned
|
|
3137
|
+
* as base64 encoded strings. If the API returns binary data, the audio
|
|
3138
|
+
* should be returned as binary data.
|
|
3139
|
+
*/
|
|
3140
|
+
audio: string | Uint8Array;
|
|
3141
|
+
/**
|
|
3142
|
+
* Warnings for the call, e.g. unsupported settings.
|
|
3143
|
+
*/
|
|
3144
|
+
warnings: Array<SpeechModelV2CallWarning>;
|
|
3145
|
+
/**
|
|
3146
|
+
* Optional request information for telemetry and debugging purposes.
|
|
3147
|
+
*/
|
|
3148
|
+
request?: {
|
|
3149
|
+
/**
|
|
3150
|
+
* Response body (available only for providers that use HTTP requests).
|
|
3151
|
+
*/
|
|
3152
|
+
body?: unknown;
|
|
3153
|
+
};
|
|
3154
|
+
/**
|
|
3155
|
+
* Response information for telemetry and debugging purposes.
|
|
3156
|
+
*/
|
|
3157
|
+
response: {
|
|
3158
|
+
/**
|
|
3159
|
+
* Timestamp for the start of the generated response.
|
|
3160
|
+
*/
|
|
3161
|
+
timestamp: Date;
|
|
3162
|
+
/**
|
|
3163
|
+
* The ID of the response model that was used to generate the response.
|
|
3164
|
+
*/
|
|
3165
|
+
modelId: string;
|
|
3166
|
+
/**
|
|
3167
|
+
* Response headers.
|
|
3168
|
+
*/
|
|
3169
|
+
headers?: SharedV2Headers;
|
|
3170
|
+
/**
|
|
3171
|
+
* Response body.
|
|
3172
|
+
*/
|
|
3173
|
+
body?: unknown;
|
|
3174
|
+
};
|
|
3175
|
+
/**
|
|
3176
|
+
* Additional provider-specific metadata. They are passed through
|
|
3177
|
+
* from the provider to the AI SDK and enable provider-specific
|
|
3178
|
+
* results that can be fully encapsulated in the provider.
|
|
3179
|
+
*/
|
|
3180
|
+
providerMetadata?: Record<string, Record<string, JSONValue>>;
|
|
3181
|
+
}>;
|
|
3182
|
+
};
|
|
3183
|
+
|
|
3184
|
+
type TranscriptionModelV2ProviderOptions = Record<string, Record<string, JSONValue>>;
|
|
3185
|
+
type TranscriptionModelV2CallOptions = {
|
|
3186
|
+
/**
|
|
3187
|
+
Audio data to transcribe.
|
|
3188
|
+
Accepts a `Uint8Array` or `string`, where `string` is a base64 encoded audio file.
|
|
3189
|
+
*/
|
|
3190
|
+
audio: Uint8Array | string;
|
|
3191
|
+
/**
|
|
3192
|
+
The IANA media type of the audio data.
|
|
3193
|
+
|
|
3194
|
+
@see https://www.iana.org/assignments/media-types/media-types.xhtml
|
|
3195
|
+
*/
|
|
3196
|
+
mediaType: string;
|
|
3197
|
+
/**
|
|
3198
|
+
Additional provider-specific options that are passed through to the provider
|
|
3199
|
+
as body parameters.
|
|
3200
|
+
|
|
3201
|
+
The outer record is keyed by the provider name, and the inner
|
|
3202
|
+
record is keyed by the provider-specific metadata key.
|
|
3203
|
+
```ts
|
|
3204
|
+
{
|
|
3205
|
+
"openai": {
|
|
3206
|
+
"timestampGranularities": ["word"]
|
|
3207
|
+
}
|
|
3208
|
+
}
|
|
3209
|
+
```
|
|
3210
|
+
*/
|
|
3211
|
+
providerOptions?: TranscriptionModelV2ProviderOptions;
|
|
3212
|
+
/**
|
|
3213
|
+
Abort signal for cancelling the operation.
|
|
3214
|
+
*/
|
|
3215
|
+
abortSignal?: AbortSignal;
|
|
3216
|
+
/**
|
|
3217
|
+
Additional HTTP headers to be sent with the request.
|
|
3218
|
+
Only applicable for HTTP-based providers.
|
|
3219
|
+
*/
|
|
3220
|
+
headers?: Record<string, string | undefined>;
|
|
3221
|
+
};
|
|
3222
|
+
|
|
3223
|
+
/**
|
|
3224
|
+
Warning from the model provider for this call. The call will proceed, but e.g.
|
|
3225
|
+
some settings might not be supported, which can lead to suboptimal results.
|
|
3226
|
+
*/
|
|
3227
|
+
type TranscriptionModelV2CallWarning = {
|
|
3228
|
+
type: 'unsupported-setting';
|
|
3229
|
+
setting: keyof TranscriptionModelV2CallOptions;
|
|
3230
|
+
details?: string;
|
|
3231
|
+
} | {
|
|
3232
|
+
type: 'other';
|
|
3233
|
+
message: string;
|
|
3234
|
+
};
|
|
3235
|
+
|
|
3236
|
+
/**
|
|
3237
|
+
Transcription model specification version 2.
|
|
3238
|
+
*/
|
|
3239
|
+
type TranscriptionModelV2 = {
|
|
3240
|
+
/**
|
|
3241
|
+
The transcription model must specify which transcription model interface
|
|
3242
|
+
version it implements. This will allow us to evolve the transcription
|
|
3243
|
+
model interface and retain backwards compatibility. The different
|
|
3244
|
+
implementation versions can be handled as a discriminated union
|
|
3245
|
+
on our side.
|
|
3246
|
+
*/
|
|
3247
|
+
readonly specificationVersion: 'v2';
|
|
3248
|
+
/**
|
|
3249
|
+
Name of the provider for logging purposes.
|
|
3250
|
+
*/
|
|
3251
|
+
readonly provider: string;
|
|
3252
|
+
/**
|
|
3253
|
+
Provider-specific model ID for logging purposes.
|
|
3254
|
+
*/
|
|
3255
|
+
readonly modelId: string;
|
|
3256
|
+
/**
|
|
3257
|
+
Generates a transcript.
|
|
3258
|
+
*/
|
|
3259
|
+
doGenerate(options: TranscriptionModelV2CallOptions): PromiseLike<{
|
|
3260
|
+
/**
|
|
3261
|
+
* The complete transcribed text from the audio.
|
|
3262
|
+
*/
|
|
3263
|
+
text: string;
|
|
3264
|
+
/**
|
|
3265
|
+
* Array of transcript segments with timing information.
|
|
3266
|
+
* Each segment represents a portion of the transcribed text with start and end times.
|
|
3267
|
+
*/
|
|
3268
|
+
segments: Array<{
|
|
3269
|
+
/**
|
|
3270
|
+
* The text content of this segment.
|
|
3271
|
+
*/
|
|
3272
|
+
text: string;
|
|
3273
|
+
/**
|
|
3274
|
+
* The start time of this segment in seconds.
|
|
3275
|
+
*/
|
|
3276
|
+
startSecond: number;
|
|
3277
|
+
/**
|
|
3278
|
+
* The end time of this segment in seconds.
|
|
3279
|
+
*/
|
|
3280
|
+
endSecond: number;
|
|
3281
|
+
}>;
|
|
3282
|
+
/**
|
|
3283
|
+
* The detected language of the audio content, as an ISO-639-1 code (e.g., 'en' for English).
|
|
3284
|
+
* May be undefined if the language couldn't be detected.
|
|
3285
|
+
*/
|
|
3286
|
+
language: string | undefined;
|
|
3287
|
+
/**
|
|
3288
|
+
* The total duration of the audio file in seconds.
|
|
3289
|
+
* May be undefined if the duration couldn't be determined.
|
|
3290
|
+
*/
|
|
3291
|
+
durationInSeconds: number | undefined;
|
|
3292
|
+
/**
|
|
3293
|
+
Warnings for the call, e.g. unsupported settings.
|
|
3294
|
+
*/
|
|
3295
|
+
warnings: Array<TranscriptionModelV2CallWarning>;
|
|
3296
|
+
/**
|
|
3297
|
+
Optional request information for telemetry and debugging purposes.
|
|
3298
|
+
*/
|
|
3299
|
+
request?: {
|
|
3300
|
+
/**
|
|
3301
|
+
Raw request HTTP body that was sent to the provider API as a string (JSON should be stringified).
|
|
3302
|
+
Non-HTTP(s) providers should not set this.
|
|
3303
|
+
*/
|
|
3304
|
+
body?: string;
|
|
3305
|
+
};
|
|
3306
|
+
/**
|
|
3307
|
+
Response information for telemetry and debugging purposes.
|
|
3308
|
+
*/
|
|
3309
|
+
response: {
|
|
3310
|
+
/**
|
|
3311
|
+
Timestamp for the start of the generated response.
|
|
3312
|
+
*/
|
|
3313
|
+
timestamp: Date;
|
|
3314
|
+
/**
|
|
3315
|
+
The ID of the response model that was used to generate the response.
|
|
3316
|
+
*/
|
|
3317
|
+
modelId: string;
|
|
3318
|
+
/**
|
|
3319
|
+
Response headers.
|
|
3320
|
+
*/
|
|
3321
|
+
headers?: SharedV2Headers;
|
|
3322
|
+
/**
|
|
3323
|
+
Response body.
|
|
3324
|
+
*/
|
|
3325
|
+
body?: unknown;
|
|
3326
|
+
};
|
|
3327
|
+
/**
|
|
3328
|
+
Additional provider-specific metadata. They are passed through
|
|
3329
|
+
from the provider to the AI SDK and enable provider-specific
|
|
3330
|
+
results that can be fully encapsulated in the provider.
|
|
3331
|
+
*/
|
|
3332
|
+
providerMetadata?: Record<string, Record<string, JSONValue>>;
|
|
3333
|
+
}>;
|
|
3334
|
+
};
|
|
3335
|
+
|
|
2874
3336
|
/**
|
|
2875
3337
|
* Provider for language, text embedding, and image generation models.
|
|
2876
3338
|
*/
|
|
@@ -2926,4 +3388,4 @@ interface ProviderV2 {
|
|
|
2926
3388
|
speechModel?(modelId: string): SpeechModelV2;
|
|
2927
3389
|
}
|
|
2928
3390
|
|
|
2929
|
-
export { AISDKError, APICallError, type EmbeddingModelV2, type EmbeddingModelV2Embedding, type EmbeddingModelV3, type EmbeddingModelV3Embedding, EmptyResponseBodyError, type ImageModelV2, type ImageModelV2CallOptions, type ImageModelV2CallWarning, type ImageModelV2ProviderMetadata, type ImageModelV3, type ImageModelV3CallOptions, type ImageModelV3CallWarning, type ImageModelV3ProviderMetadata, InvalidArgumentError, InvalidPromptError, InvalidResponseDataError, type JSONArray, type JSONObject, JSONParseError, type JSONValue, type LanguageModelV2, type LanguageModelV2CallOptions, type LanguageModelV2CallWarning, type LanguageModelV2Content, type LanguageModelV2DataContent, type LanguageModelV2File, type LanguageModelV2FilePart, type LanguageModelV2FinishReason, type LanguageModelV2FunctionTool, type LanguageModelV2Message, type LanguageModelV2Middleware, type LanguageModelV2Prompt, type LanguageModelV2ProviderDefinedTool, type LanguageModelV2Reasoning, type LanguageModelV2ReasoningPart, type LanguageModelV2ResponseMetadata, type LanguageModelV2Source, type LanguageModelV2StreamPart, type LanguageModelV2Text, type LanguageModelV2TextPart, type LanguageModelV2ToolCall, type LanguageModelV2ToolCallPart, type LanguageModelV2ToolChoice, type LanguageModelV2ToolResultOutput, type LanguageModelV2ToolResultPart, type LanguageModelV2Usage, type LanguageModelV3, type LanguageModelV3CallOptions, type LanguageModelV3CallWarning, type LanguageModelV3Content, type LanguageModelV3DataContent, type LanguageModelV3File, type LanguageModelV3FilePart, type LanguageModelV3FinishReason, type LanguageModelV3FunctionTool, type LanguageModelV3Message, type LanguageModelV3Middleware, type LanguageModelV3Prompt, type LanguageModelV3ProviderDefinedTool, type LanguageModelV3Reasoning, type LanguageModelV3ReasoningPart, type LanguageModelV3ResponseMetadata, type LanguageModelV3Source, type LanguageModelV3StreamPart, type LanguageModelV3Text, type LanguageModelV3TextPart, type LanguageModelV3ToolCall, type LanguageModelV3ToolCallPart, type LanguageModelV3ToolChoice, type LanguageModelV3ToolResult, type LanguageModelV3ToolResultOutput, type LanguageModelV3ToolResultPart, type LanguageModelV3Usage, LoadAPIKeyError, LoadSettingError, NoContentGeneratedError, NoSuchModelError, type ProviderV2, type ProviderV3, type SharedV2Headers, type SharedV2ProviderMetadata, type SharedV2ProviderOptions, type SharedV3Headers, type SharedV3ProviderMetadata, type SharedV3ProviderOptions, type SpeechModelV2, type SpeechModelV2CallOptions, type SpeechModelV2CallWarning, TooManyEmbeddingValuesForCallError, type TranscriptionModelV2, type TranscriptionModelV2CallOptions, type TranscriptionModelV2CallWarning, TypeValidationError, UnsupportedFunctionalityError, getErrorMessage, isJSONArray, isJSONObject, isJSONValue };
|
|
3391
|
+
export { AISDKError, APICallError, type EmbeddingModelV2, type EmbeddingModelV2Embedding, type EmbeddingModelV3, type EmbeddingModelV3Embedding, EmptyResponseBodyError, type ImageModelV2, type ImageModelV2CallOptions, type ImageModelV2CallWarning, type ImageModelV2ProviderMetadata, type ImageModelV3, type ImageModelV3CallOptions, type ImageModelV3CallWarning, type ImageModelV3ProviderMetadata, InvalidArgumentError, InvalidPromptError, InvalidResponseDataError, type JSONArray, type JSONObject, JSONParseError, type JSONValue, type LanguageModelV2, type LanguageModelV2CallOptions, type LanguageModelV2CallWarning, type LanguageModelV2Content, type LanguageModelV2DataContent, type LanguageModelV2File, type LanguageModelV2FilePart, type LanguageModelV2FinishReason, type LanguageModelV2FunctionTool, type LanguageModelV2Message, type LanguageModelV2Middleware, type LanguageModelV2Prompt, type LanguageModelV2ProviderDefinedTool, type LanguageModelV2Reasoning, type LanguageModelV2ReasoningPart, type LanguageModelV2ResponseMetadata, type LanguageModelV2Source, type LanguageModelV2StreamPart, type LanguageModelV2Text, type LanguageModelV2TextPart, type LanguageModelV2ToolCall, type LanguageModelV2ToolCallPart, type LanguageModelV2ToolChoice, type LanguageModelV2ToolResultOutput, type LanguageModelV2ToolResultPart, type LanguageModelV2Usage, type LanguageModelV3, type LanguageModelV3CallOptions, type LanguageModelV3CallWarning, type LanguageModelV3Content, type LanguageModelV3DataContent, type LanguageModelV3File, type LanguageModelV3FilePart, type LanguageModelV3FinishReason, type LanguageModelV3FunctionTool, type LanguageModelV3Message, type LanguageModelV3Middleware, type LanguageModelV3Prompt, type LanguageModelV3ProviderDefinedTool, type LanguageModelV3Reasoning, type LanguageModelV3ReasoningPart, type LanguageModelV3ResponseMetadata, type LanguageModelV3Source, type LanguageModelV3StreamPart, type LanguageModelV3Text, type LanguageModelV3TextPart, type LanguageModelV3ToolCall, type LanguageModelV3ToolCallPart, type LanguageModelV3ToolChoice, type LanguageModelV3ToolResult, type LanguageModelV3ToolResultOutput, type LanguageModelV3ToolResultPart, type LanguageModelV3Usage, LoadAPIKeyError, LoadSettingError, NoContentGeneratedError, NoSuchModelError, type ProviderV2, type ProviderV3, type SharedV2Headers, type SharedV2ProviderMetadata, type SharedV2ProviderOptions, type SharedV3Headers, type SharedV3ProviderMetadata, type SharedV3ProviderOptions, type SharedV3Warning, type SpeechModelV2, type SpeechModelV2CallOptions, type SpeechModelV2CallWarning, type SpeechModelV3, type SpeechModelV3CallOptions, type SpeechModelV3CallWarning, TooManyEmbeddingValuesForCallError, type TranscriptionModelV2, type TranscriptionModelV2CallOptions, type TranscriptionModelV2CallWarning, type TranscriptionModelV3, type TranscriptionModelV3CallOptions, type TranscriptionModelV3CallWarning, TypeValidationError, UnsupportedFunctionalityError, getErrorMessage, isJSONArray, isJSONObject, isJSONValue };
|