@ai-sdk/gateway 4.0.0-beta.45 → 4.0.0-beta.47
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +14 -0
- package/dist/index.d.mts +2 -1
- package/dist/index.d.ts +2 -1
- package/dist/index.js +2 -7
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +2 -7
- package/dist/index.mjs.map +1 -1
- package/docs/00-ai-gateway.mdx +14 -0
- package/package.json +2 -2
- package/src/errors/create-gateway-error.ts +0 -1
- package/src/errors/gateway-authentication-error.ts +0 -1
- package/src/gateway-image-model.ts +1 -5
- package/src/gateway-language-model.ts +0 -1
- package/src/gateway-provider-options.ts +8 -0
package/docs/00-ai-gateway.mdx
CHANGED
|
@@ -778,6 +778,20 @@ The following gateway provider options are available:
|
|
|
778
778
|
|
|
779
779
|
Example: `only: ['anthropic', 'vertex']` will only allow routing to Anthropic or Vertex AI.
|
|
780
780
|
|
|
781
|
+
- **sort** _'cost' | 'ttft' | 'tps'_
|
|
782
|
+
|
|
783
|
+
Sorts available providers by a performance or cost metric before routing. The gateway will try the best-scoring provider first and fall back through the rest in sorted order. If unspecified, providers are ordered using the gateway's default system ranking.
|
|
784
|
+
|
|
785
|
+
- `'cost'` — lowest input cost per token first
|
|
786
|
+
- `'ttft'` — lowest time-to-first-token first
|
|
787
|
+
- `'tps'` — highest tokens-per-second first
|
|
788
|
+
|
|
789
|
+
When combined with `order`, the user-specified providers are promoted to the front while remaining providers follow the sorted order.
|
|
790
|
+
|
|
791
|
+
Example: `sort: 'ttft'` will route to the provider with the fastest time-to-first-token.
|
|
792
|
+
|
|
793
|
+
When `sort` is active, the response's `providerMetadata.gateway.routing.sort` object contains the sort option used, the resulting execution order, per-provider metric values, and any providers that were deprioritized.
|
|
794
|
+
|
|
781
795
|
- **models** _string[]_
|
|
782
796
|
|
|
783
797
|
Specifies fallback models to use when the primary model fails or is unavailable. The gateway will try the primary model first (specified in the `model` parameter), then try each model in this array in order until one succeeds.
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@ai-sdk/gateway",
|
|
3
3
|
"private": false,
|
|
4
|
-
"version": "4.0.0-beta.
|
|
4
|
+
"version": "4.0.0-beta.47",
|
|
5
5
|
"license": "Apache-2.0",
|
|
6
6
|
"sideEffects": false,
|
|
7
7
|
"main": "./dist/index.js",
|
|
@@ -32,7 +32,7 @@
|
|
|
32
32
|
"dependencies": {
|
|
33
33
|
"@vercel/oidc": "3.2.0",
|
|
34
34
|
"@ai-sdk/provider": "4.0.0-beta.10",
|
|
35
|
-
"@ai-sdk/provider-utils": "5.0.0-beta.
|
|
35
|
+
"@ai-sdk/provider-utils": "5.0.0-beta.18"
|
|
36
36
|
},
|
|
37
37
|
"devDependencies": {
|
|
38
38
|
"@types/node": "18.15.11",
|
|
@@ -50,11 +50,7 @@ export class GatewayImageModel implements ImageModelV4 {
|
|
|
50
50
|
> {
|
|
51
51
|
const resolvedHeaders = await resolve(this.config.headers());
|
|
52
52
|
try {
|
|
53
|
-
const {
|
|
54
|
-
responseHeaders,
|
|
55
|
-
value: responseBody,
|
|
56
|
-
rawValue,
|
|
57
|
-
} = await postJsonToApi({
|
|
53
|
+
const { responseHeaders, value: responseBody } = await postJsonToApi({
|
|
58
54
|
url: this.getUrl(),
|
|
59
55
|
headers: combineHeaders(
|
|
60
56
|
resolvedHeaders,
|
|
@@ -17,6 +17,14 @@ const gatewayProviderOptions = lazySchema(() =>
|
|
|
17
17
|
* Example: `['bedrock', 'anthropic']` will try Amazon Bedrock first, then Anthropic as fallback.
|
|
18
18
|
*/
|
|
19
19
|
order: z.array(z.string()).optional(),
|
|
20
|
+
/**
|
|
21
|
+
* Sort providers by a performance or cost metric before routing.
|
|
22
|
+
*
|
|
23
|
+
* - `'cost'`: lowest input cost first
|
|
24
|
+
* - `'ttft'`: lowest time-to-first-token first
|
|
25
|
+
* - `'tps'`: highest tokens-per-second first
|
|
26
|
+
*/
|
|
27
|
+
sort: z.enum(['cost', 'ttft', 'tps']).optional(),
|
|
20
28
|
/**
|
|
21
29
|
* The unique identifier for the end user on behalf of whom the request was made.
|
|
22
30
|
*
|