ai-retry 1.10.0 → 1.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +451 -827
- package/dist/{retryables-M5l_6w9k.mjs → conditions-BGoANmfr.mjs} +5 -5
- package/dist/{retryables-CPAbu_M3.mjs → conditions-CyJOeRZK.mjs} +4 -4
- package/dist/create-retryable-model-BIMStLIF.mjs +676 -0
- package/dist/create-retryable-model-CLCFZANp.mjs +244 -0
- package/dist/create-retryable-model-DEQ5jciq.mjs +247 -0
- package/dist/embedding-model/conditions/index.d.mts +14 -0
- package/dist/embedding-model/conditions/index.mjs +7 -0
- package/dist/embedding-model/index.d.mts +14 -0
- package/dist/embedding-model/index.mjs +6 -0
- package/dist/{guards-D8UJtxDK.mjs → guards-DtZgDqE3.mjs} +6 -1
- package/dist/image-model/conditions/index.d.mts +4 -0
- package/dist/image-model/conditions/index.mjs +4 -0
- package/dist/image-model/index.d.mts +14 -0
- package/dist/image-model/index.mjs +6 -0
- package/dist/{index-DaJrd4dN.d.mts → index-BkvvEDSr.d.mts} +6 -4
- package/dist/index-D3t1Xo_U.d.mts +28 -0
- package/dist/index.d.mts +34 -7
- package/dist/index.mjs +43 -2
- package/dist/language-model/conditions/index.d.mts +4 -0
- package/dist/language-model/conditions/index.mjs +4 -0
- package/dist/language-model/index.d.mts +14 -0
- package/dist/language-model/index.mjs +6 -0
- package/dist/{error-CaTT-xX8.mjs → not-C9pUKPO7.mjs} +69 -38
- package/dist/{error-B-rjhfG_.d.mts → or-CFcJxcaL.d.mts} +36 -27
- package/dist/retryables/index.d.mts +54 -18
- package/dist/retryables/index.mjs +50 -14
- package/dist/telemetry-CJFJzjTr.mjs +442 -0
- package/dist/{types-Dik-mH20.d.mts → types-B8qg3Yzx.d.mts} +23 -10
- package/package.json +7 -7
- package/dist/create-retryable-model-D36IQyOQ.mjs +0 -1564
- package/dist/experimental/embedding-model/index.d.mts +0 -8
- package/dist/experimental/embedding-model/index.mjs +0 -19
- package/dist/experimental/embedding-model/retryables/index.d.mts +0 -20
- package/dist/experimental/embedding-model/retryables/index.mjs +0 -7
- package/dist/experimental/image-model/index.d.mts +0 -8
- package/dist/experimental/image-model/index.mjs +0 -19
- package/dist/experimental/image-model/retryables/index.d.mts +0 -4
- package/dist/experimental/image-model/retryables/index.mjs +0 -4
- package/dist/experimental/language-model/index.d.mts +0 -11
- package/dist/experimental/language-model/index.mjs +0 -19
- package/dist/experimental/language-model/retryables/index.d.mts +0 -4
- package/dist/experimental/language-model/retryables/index.mjs +0 -4
- package/dist/index-ewZ5T6B2.d.mts +0 -34
- /package/dist/{parse-retry-headers-CRxgluhe.mjs → parse-retry-headers-RPSiSNjf.mjs} +0 -0
package/README.md
CHANGED
|
@@ -11,108 +11,101 @@
|
|
|
11
11
|
|
|
12
12
|
Automatically handle API failures, content filtering, timeouts and other errors by switching between different AI models and providers.
|
|
13
13
|
|
|
14
|
-
`ai-retry` wraps
|
|
14
|
+
`ai-retry` wraps a base model with a list of typed retry **conditions**. When a request fails with an error, or the response is not satisfying, it walks the conditions top-down to find a suitable fallback. It tracks which models have been tried and how many attempts have been made to prevent infinite loops.
|
|
15
15
|
|
|
16
|
-
|
|
16
|
+
Two retry shapes are supported:
|
|
17
17
|
|
|
18
|
-
- Error-based
|
|
19
|
-
- Result-based
|
|
18
|
+
- **Error-based**: the model throws (timeouts, rate limits, API errors).
|
|
19
|
+
- **Result-based**: the model returns a successful response that still needs retrying (content filtering, schema mismatch, etc.).
|
|
20
20
|
|
|
21
21
|
### Installation
|
|
22
22
|
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
> [!WARNING]
|
|
23
|
+
> [!NOTE]
|
|
26
24
|
> Version compatibility:
|
|
27
25
|
>
|
|
28
|
-
> -
|
|
29
|
-
> -
|
|
26
|
+
> - `ai-retry@0.x` — AI SDK v5
|
|
27
|
+
> - `ai-retry@1.x` — AI SDK v6
|
|
30
28
|
|
|
31
29
|
```bash
|
|
32
|
-
|
|
33
|
-
npm install ai-retry@0
|
|
34
|
-
|
|
35
|
-
# AI SDK v6
|
|
36
|
-
npm install ai-retry@1
|
|
30
|
+
npm install ai-retry
|
|
37
31
|
```
|
|
38
32
|
|
|
39
33
|
### Usage
|
|
40
34
|
|
|
41
|
-
Create a retryable model by providing a base model and a list of retryables or fallback models.
|
|
42
|
-
When an error occurs, it will evaluate each retryable in order and use the first one that indicates a retry should be attempted with a different model.
|
|
43
|
-
|
|
44
35
|
> [!NOTE]
|
|
45
|
-
>
|
|
36
|
+
> **The condition API is the recommended way to configure retries.** Existing code keeps working:
|
|
37
|
+
>
|
|
38
|
+
> - The root `createRetryable` export and the function-style retryables (`contentFilterTriggered`, `requestTimeout`, …) are **deprecated but still functional**. Prefer `createRetryableModel` from `ai-retry/<family>-model` — it is typed for that family and resolves gateway strings for it.
|
|
39
|
+
> - The previously experimental `ai-retry/experimental/*` import paths were removed; the same API now ships at `ai-retry/<family>-model`.
|
|
40
|
+
>
|
|
41
|
+
> See the [migration guide](./MIGRATION.md) to move existing code to the condition API.
|
|
42
|
+
|
|
43
|
+
Create a retryable model with a base model and a list of conditions plus the action to take when a condition matches.
|
|
46
44
|
|
|
47
45
|
```typescript
|
|
46
|
+
import { anthropic } from '@ai-sdk/anthropic';
|
|
48
47
|
import { openai } from '@ai-sdk/openai';
|
|
49
|
-
import { generateText
|
|
50
|
-
import {
|
|
48
|
+
import { generateText } from 'ai';
|
|
49
|
+
import {
|
|
50
|
+
createRetryableModel,
|
|
51
|
+
error,
|
|
52
|
+
finishReason,
|
|
53
|
+
httpStatus,
|
|
54
|
+
} from 'ai-retry/language-model';
|
|
51
55
|
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
// Base model
|
|
55
|
-
model: openai('gpt-4-mini'),
|
|
56
|
+
const retryableModel = createRetryableModel({
|
|
57
|
+
model: openai('gpt-4o'),
|
|
56
58
|
retries: [
|
|
57
|
-
//
|
|
59
|
+
// Fall back to a different model on HTTP 529 or any "overloaded" message
|
|
60
|
+
httpStatus(529, 'overloaded').switch({
|
|
61
|
+
model: anthropic('claude-sonnet-4-0'),
|
|
62
|
+
}),
|
|
63
|
+
|
|
64
|
+
// Fall back when the response was content-filtered
|
|
65
|
+
finishReason('content-filter').switch({ model: openai('gpt-4o-mini') }),
|
|
66
|
+
|
|
67
|
+
// Retry the same model with exponential backoff on retryable errors
|
|
68
|
+
error.isRetryable(true).retry({ delay: 1_000, backoffFactor: 2 }),
|
|
58
69
|
],
|
|
59
70
|
});
|
|
60
71
|
|
|
61
|
-
// Use like any other AI SDK model
|
|
62
72
|
const result = await generateText({
|
|
63
73
|
model: retryableModel,
|
|
64
74
|
prompt: 'Hello world!',
|
|
65
75
|
});
|
|
66
76
|
|
|
67
77
|
console.log(result.text);
|
|
68
|
-
|
|
69
|
-
// Or with streaming
|
|
70
|
-
const result = streamText({
|
|
71
|
-
model: retryableModel,
|
|
72
|
-
prompt: 'Write a story about a robot...',
|
|
73
|
-
});
|
|
74
|
-
|
|
75
|
-
for await (const chunk of result.textStream) {
|
|
76
|
-
console.log(chunk.text);
|
|
77
|
-
}
|
|
78
78
|
```
|
|
79
79
|
|
|
80
|
-
This also works with embedding models:
|
|
80
|
+
This also works with embedding models and image models, each through their own entry point:
|
|
81
81
|
|
|
82
82
|
```typescript
|
|
83
83
|
import { openai } from '@ai-sdk/openai';
|
|
84
84
|
import { embed } from 'ai';
|
|
85
|
-
import {
|
|
85
|
+
import { createRetryableModel, httpStatus } from 'ai-retry/embedding-model';
|
|
86
86
|
|
|
87
|
-
|
|
88
|
-
const retryableModel = createRetryable({
|
|
89
|
-
// Base model
|
|
87
|
+
const retryableModel = createRetryableModel({
|
|
90
88
|
model: openai.textEmbedding('text-embedding-3-large'),
|
|
91
89
|
retries: [
|
|
92
|
-
|
|
90
|
+
httpStatus(529).switch({
|
|
91
|
+
model: openai.textEmbedding('text-embedding-3-small'),
|
|
92
|
+
}),
|
|
93
93
|
],
|
|
94
94
|
});
|
|
95
95
|
|
|
96
|
-
|
|
97
|
-
const result = await embed({
|
|
98
|
-
model: retryableModel,
|
|
99
|
-
value: 'Hello world!',
|
|
100
|
-
});
|
|
101
|
-
|
|
102
|
-
console.log(result.embedding);
|
|
96
|
+
const result = await embed({ model: retryableModel, value: 'Hello world!' });
|
|
103
97
|
```
|
|
104
98
|
|
|
105
|
-
This also works with image models:
|
|
106
|
-
|
|
107
99
|
```typescript
|
|
100
|
+
import { google } from '@ai-sdk/google';
|
|
108
101
|
import { openai } from '@ai-sdk/openai';
|
|
109
102
|
import { generateImage } from 'ai';
|
|
110
|
-
import {
|
|
103
|
+
import { createRetryableModel, noImage } from 'ai-retry/image-model';
|
|
111
104
|
|
|
112
|
-
const retryableModel =
|
|
105
|
+
const retryableModel = createRetryableModel({
|
|
113
106
|
model: openai.image('dall-e-3'),
|
|
114
107
|
retries: [
|
|
115
|
-
|
|
108
|
+
noImage().switch({ model: google.image('gemini-3-pro-image-preview') }),
|
|
116
109
|
],
|
|
117
110
|
});
|
|
118
111
|
|
|
@@ -120,805 +113,463 @@ const result = await generateImage({
|
|
|
120
113
|
model: retryableModel,
|
|
121
114
|
prompt: 'A sunset over mountains',
|
|
122
115
|
});
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
#### Entry points
|
|
123
119
|
|
|
124
|
-
|
|
120
|
+
Pick the entry point that matches the model you pass to `createRetryableModel`. Each module exposes the helpers that make sense for that model family already typed for it, so no manual type annotations are needed.
|
|
121
|
+
|
|
122
|
+
| Entry point | For models passed to |
|
|
123
|
+
| -------------------------- | -------------------------------------------------------------- |
|
|
124
|
+
| `ai-retry/language-model` | `generateText`, `generateObject`, `streamText`, `streamObject` |
|
|
125
|
+
| `ai-retry/embedding-model` | `embed`, `embedMany` |
|
|
126
|
+
| `ai-retry/image-model` | `generateImage` |
|
|
127
|
+
|
|
128
|
+
```typescript
|
|
129
|
+
import { createRetryableModel } from 'ai-retry/language-model';
|
|
130
|
+
import { createRetryableModel } from 'ai-retry/image-model';
|
|
131
|
+
import { createRetryableModel } from 'ai-retry/embedding-model';
|
|
132
|
+
```
|
|
133
|
+
|
|
134
|
+
Each entry point re-exports `createRetryableModel` plus every condition for that family. The condition helpers can also be imported from the dedicated `/conditions` subpath:
|
|
135
|
+
|
|
136
|
+
```typescript
|
|
137
|
+
import {
|
|
138
|
+
error,
|
|
139
|
+
httpStatus,
|
|
140
|
+
finishReason,
|
|
141
|
+
} from 'ai-retry/language-model/conditions';
|
|
142
|
+
// or
|
|
143
|
+
import * as conditions from 'ai-retry/language-model/conditions';
|
|
125
144
|
```
|
|
126
145
|
|
|
127
146
|
#### Vercel AI Gateway
|
|
128
147
|
|
|
129
|
-
You can
|
|
148
|
+
You can pass a model as a string and it will be resolved through the default `gateway` [provider instance](https://ai-sdk.dev/providers/ai-sdk-providers/ai-gateway#provider-instance) from the AI SDK. Each entry point resolves strings to its own model family, so the string is typed against that family's gateway model ids.
|
|
130
149
|
|
|
131
150
|
```typescript
|
|
132
151
|
import { gateway } from 'ai';
|
|
133
|
-
import {
|
|
152
|
+
import { createRetryableModel } from 'ai-retry/language-model';
|
|
134
153
|
|
|
135
|
-
const retryableModel =
|
|
154
|
+
const retryableModel = createRetryableModel({
|
|
136
155
|
model: 'openai/gpt-5',
|
|
137
156
|
retries: ['anthropic/claude-sonnet-4'],
|
|
138
157
|
});
|
|
139
158
|
|
|
140
159
|
// Is the same as:
|
|
141
|
-
const
|
|
160
|
+
const retryableModel2 = createRetryableModel({
|
|
142
161
|
model: gateway('openai/gpt-5'),
|
|
143
162
|
retries: [gateway('anthropic/claude-sonnet-4')],
|
|
144
163
|
});
|
|
145
164
|
```
|
|
146
165
|
|
|
147
|
-
|
|
166
|
+
Embedding and image entry points accept gateway strings too, resolved against their respective families:
|
|
148
167
|
|
|
149
168
|
```typescript
|
|
150
|
-
import {
|
|
151
|
-
import { createRetryable } from 'ai-retry';
|
|
169
|
+
import { createRetryableModel } from 'ai-retry/embedding-model';
|
|
152
170
|
|
|
153
|
-
const
|
|
154
|
-
model:
|
|
171
|
+
const retryableEmbedding = createRetryableModel({
|
|
172
|
+
model: 'openai/text-embedding-3-large',
|
|
173
|
+
retries: ['openai/text-embedding-3-small'],
|
|
155
174
|
});
|
|
156
175
|
```
|
|
157
176
|
|
|
158
|
-
### Retryables
|
|
159
|
-
|
|
160
|
-
The objects passed to the `retries` are called retryables and control the retry behavior. We can distinguish between two types of retryables:
|
|
161
|
-
|
|
162
|
-
- **Static retryables** are simply models instances (language or embedding) that will always be used when an error occurs. They are also called fallback models.
|
|
163
|
-
- **Dynamic retryables** are functions that receive the current attempt context (error/result and previous attempts) and decide whether to retry with a different model based on custom logic.
|
|
164
|
-
|
|
165
|
-
You can think of the `retries` array as a big `if-else` block, where each dynamic retryable is an `if` branch that can match a certain error/result condition, and static retryables are the `else` branches that match all other conditions. The analogy is not perfect, because the order of retryables matters because `retries` are evaluated in order until one matches:
|
|
166
|
-
|
|
167
177
|
```typescript
|
|
168
|
-
import {
|
|
169
|
-
import { createRetryable } from 'ai-retry';
|
|
170
|
-
|
|
171
|
-
const retryableModel = createRetryable({
|
|
172
|
-
// Base model
|
|
173
|
-
model: openai('gpt-4'),
|
|
174
|
-
// Retryables are evaluated top-down in order
|
|
175
|
-
retries: [
|
|
176
|
-
// Dynamic retryables act like if-branches:
|
|
177
|
-
// If error.code == 429 (too many requests) happens, retry with this model
|
|
178
|
-
(context) => {
|
|
179
|
-
return context.current.error.statusCode === 429
|
|
180
|
-
? { model: azure('gpt-4-mini') } // Retry
|
|
181
|
-
: undefined; // Skip
|
|
182
|
-
},
|
|
183
|
-
|
|
184
|
-
// If error.message ~= "service overloaded", retry with this model
|
|
185
|
-
(context) => {
|
|
186
|
-
return context.current.error.message.includes('service overloaded')
|
|
187
|
-
? { model: azure('gpt-4-mini') } // Retry
|
|
188
|
-
: undefined; // Skip
|
|
189
|
-
},
|
|
178
|
+
import { createRetryableModel } from 'ai-retry/image-model';
|
|
190
179
|
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
// Same as:
|
|
195
|
-
// { model: anthropic('claude-3-haiku-20240307'), maxAttempts: 1 }
|
|
196
|
-
],
|
|
180
|
+
const retryableImage = createRetryableModel({
|
|
181
|
+
model: 'google/imagen-4.0-generate-001',
|
|
182
|
+
retries: ['google/imagen-4.0-fast-generate-001'],
|
|
197
183
|
});
|
|
198
184
|
```
|
|
199
185
|
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
#### Errors vs Results
|
|
186
|
+
### Retries
|
|
203
187
|
|
|
204
|
-
|
|
188
|
+
The `retries` array holds the things `ai-retry` tries, in order, when a request fails or a result needs retrying. There are two kinds:
|
|
205
189
|
|
|
206
|
-
- **
|
|
207
|
-
- **
|
|
190
|
+
- **Fallbacks** are model instances (or gateway strings). They always match and are used as plain fallbacks.
|
|
191
|
+
- **Conditions** are typed predicates produced by helpers like `error()` or `httpStatus()` and finalized with a `.switch()` or `.retry()` action. They only fire when their predicate matches.
|
|
208
192
|
|
|
209
|
-
|
|
193
|
+
You can think of `retries` as a big `if-else` chain — each condition is an `if` branch matching some error/result, and each fallback is an `else` branch matching anything left over. Order matters: the array is evaluated top-down until one matches.
|
|
210
194
|
|
|
211
195
|
```typescript
|
|
212
|
-
import {
|
|
213
|
-
import {
|
|
214
|
-
import
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
// The request threw an error - e.g., network timeout, 429 rate limit
|
|
221
|
-
console.log('Request failed with error:', error);
|
|
222
|
-
return { model: anthropic('claude-3-haiku-20240307') };
|
|
223
|
-
}
|
|
224
|
-
return undefined;
|
|
225
|
-
};
|
|
226
|
-
|
|
227
|
-
// Result-based retryable: handles successful responses that need retrying
|
|
228
|
-
const resultBasedRetry: Retryable = (context) => {
|
|
229
|
-
if (isResultAttempt(context.current)) {
|
|
230
|
-
const { result } = context.current;
|
|
231
|
-
// The request succeeded, but the response indicates a problem
|
|
232
|
-
if (result.finishReason.unified === 'content-filter') {
|
|
233
|
-
console.log('Content was filtered, trying different model');
|
|
234
|
-
return { model: openai('gpt-4') };
|
|
235
|
-
}
|
|
236
|
-
}
|
|
237
|
-
return undefined;
|
|
238
|
-
};
|
|
196
|
+
import { anthropic } from '@ai-sdk/anthropic';
|
|
197
|
+
import { azure } from '@ai-sdk/azure';
|
|
198
|
+
import { openai } from '@ai-sdk/openai';
|
|
199
|
+
import {
|
|
200
|
+
createRetryableModel,
|
|
201
|
+
error,
|
|
202
|
+
httpStatus,
|
|
203
|
+
} from 'ai-retry/language-model';
|
|
239
204
|
|
|
240
|
-
const retryableModel =
|
|
241
|
-
model:
|
|
205
|
+
const retryableModel = createRetryableModel({
|
|
206
|
+
model: openai('gpt-4'),
|
|
242
207
|
retries: [
|
|
243
|
-
//
|
|
244
|
-
|
|
208
|
+
// Condition: match HTTP 429 (rate limit)
|
|
209
|
+
httpStatus(429).switch({ model: azure('gpt-4-mini') }),
|
|
210
|
+
|
|
211
|
+
// Condition: match "overloaded" in the error message
|
|
212
|
+
error.message('overloaded').switch({ model: azure('gpt-4-mini') }),
|
|
245
213
|
|
|
246
|
-
//
|
|
247
|
-
|
|
214
|
+
// Fallback: switch to Anthropic for anything else
|
|
215
|
+
anthropic('claude-3-haiku-20240307'),
|
|
216
|
+
// Same as:
|
|
217
|
+
// { model: anthropic('claude-3-haiku-20240307'), maxAttempts: 1 }
|
|
248
218
|
],
|
|
249
219
|
});
|
|
250
220
|
```
|
|
251
221
|
|
|
252
|
-
Result-based retryables apply to language models for both generate (`generateText`, `generateObject`) and streaming (`streamText`, `streamObject`) calls. For streams, the retry decision happens when the upstream `finish` part arrives and only fires if no content has been emitted yet, so behavior like `finishReason: 'content-filter'` on an otherwise empty response can still trigger a fallback. Once any content chunk has been forwarded, the stream is committed and result-based retries are skipped.
|
|
253
|
-
|
|
254
222
|
#### Fallbacks
|
|
255
223
|
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
> [!NOTE]
|
|
259
|
-
> Use the object syntax `{ model: openai('gpt-4') }` if you need to provide additional options like `maxAttempts`, `delay`, etc.
|
|
224
|
+
A fallback is a plain model instance (or gateway string) in `retries`. It always matches, so it acts as a catch-all: when no earlier condition fired, the next fallback model is tried. Each fallback is attempted once by default; use the object form to pass options like `maxAttempts`.
|
|
260
225
|
|
|
261
226
|
```typescript
|
|
227
|
+
import { anthropic } from '@ai-sdk/anthropic';
|
|
262
228
|
import { openai } from '@ai-sdk/openai';
|
|
263
|
-
import {
|
|
264
|
-
import { createRetryable } from 'ai-retry';
|
|
229
|
+
import { createRetryableModel } from 'ai-retry/language-model';
|
|
265
230
|
|
|
266
|
-
const retryableModel =
|
|
267
|
-
|
|
268
|
-
model: openai('gpt-4-mini'),
|
|
269
|
-
// List of fallback models
|
|
231
|
+
const retryableModel = createRetryableModel({
|
|
232
|
+
model: openai('gpt-4o'),
|
|
270
233
|
retries: [
|
|
271
|
-
openai('gpt-
|
|
272
|
-
//
|
|
273
|
-
// { model: openai('gpt-3.5-turbo'), maxAttempts: 1 },
|
|
234
|
+
openai('gpt-4o-mini'), // first fallback
|
|
235
|
+
anthropic('claude-3-haiku-20240307'), // second fallback
|
|
274
236
|
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
// { model: anthropic('claude-3-haiku-20240307'), maxAttempts: 1 },
|
|
237
|
+
// Object form to pass options:
|
|
238
|
+
{ model: anthropic('claude-3-haiku-20240307'), maxAttempts: 2 },
|
|
278
239
|
],
|
|
279
240
|
});
|
|
280
241
|
```
|
|
281
242
|
|
|
282
|
-
|
|
243
|
+
Fallbacks are tried in order. Once all of them are exhausted, a `RetryError` is thrown (see [All retries failed](#all-retries-failed)).
|
|
283
244
|
|
|
284
|
-
####
|
|
245
|
+
#### Conditions
|
|
285
246
|
|
|
286
|
-
|
|
247
|
+
A `Condition` is a typed predicate over a `RetryContext`. The library ships two **low-level** builders (`error()` and `result()`) plus **high-level** helpers built on top of them. Every condition is finalized with one of two terminal actions, `.switch()` or `.retry()`, which turn it into a retryable.
|
|
287
248
|
|
|
288
|
-
|
|
289
|
-
> You can return additional options like `maxAttempts`, `delay`, etc. along with the model.
|
|
249
|
+
##### Universal conditions
|
|
290
250
|
|
|
291
|
-
|
|
292
|
-
> If you'd like the same flexibility with a typed, composable condition system, see [Experimental: Composable Conditions](#experimental-composable-conditions).
|
|
251
|
+
These are available from all three entry points (`language-model`, `embedding-model`, `image-model`).
|
|
293
252
|
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
// Get the error from the current attempt
|
|
306
|
-
const { error } = context.current;
|
|
307
|
-
|
|
308
|
-
// Check for rate limit error
|
|
309
|
-
if (APICallError.isInstance(error) && error.statusCode === 429) {
|
|
310
|
-
// Retry with a different model
|
|
311
|
-
return { model: anthropic('claude-3-haiku-20240307') };
|
|
312
|
-
}
|
|
313
|
-
}
|
|
253
|
+
| Helper | Kind | Matches when |
|
|
254
|
+
| ------------------------------- | ---------- | ------------------------------------------------------------------------------ |
|
|
255
|
+
| `error(predicate)` | low-level | The current attempt failed and `predicate(err, ctx)` returns true |
|
|
256
|
+
| `error.isRetryable(flag)` | low-level | `APICallError.isRetryable === flag` (default `true`) |
|
|
257
|
+
| `error.statusCode(...patterns)` | low-level | Numbers match the status code exactly; regex matches the stringified code |
|
|
258
|
+
| `error.message(...patterns)` | low-level | Substring (case-insensitive) or regex match against the error message |
|
|
259
|
+
| `error.isTimeout()` | low-level | `Error.name === 'TimeoutError'` (`AbortSignal.timeout()` fired) |
|
|
260
|
+
| `error.isAbort()` | low-level | `Error.name === 'AbortError'` (manual `controller.abort()`) |
|
|
261
|
+
| `httpStatus(...patterns)` | high-level | Numbers match the status code; strings match the message; regex matches either |
|
|
262
|
+
| `timeout()` | high-level | Alias for `error.isTimeout()` |
|
|
263
|
+
| `aborted()` | high-level | Alias for `error.isAbort()` |
|
|
314
264
|
|
|
315
|
-
|
|
316
|
-
return undefined;
|
|
317
|
-
};
|
|
265
|
+
###### `error(predicate)`
|
|
318
266
|
|
|
319
|
-
|
|
320
|
-
// Base model
|
|
321
|
-
model: openai('gpt-4-mini'),
|
|
322
|
-
retries: [
|
|
323
|
-
// Use custom rate limit retryable
|
|
324
|
-
rateLimitRetry,
|
|
325
|
-
|
|
326
|
-
// Other retryables...
|
|
327
|
-
],
|
|
328
|
-
});
|
|
329
|
-
```
|
|
330
|
-
|
|
331
|
-
In this example, if the base model fails with a 429 error, it will retry with `claude-3-haiku-20240307`. For any other error, it will skip to the next retryable (if any) or throw the original error.
|
|
332
|
-
|
|
333
|
-
#### All Retries Failed
|
|
334
|
-
|
|
335
|
-
If all retry attempts failed, a `RetryError` is thrown containing all individual errors.
|
|
336
|
-
If no retry was attempted (e.g. because all retryables returned `undefined`), the original error is thrown directly.
|
|
267
|
+
Takes any predicate over the failed attempt's error. Its namespace bundles the common matchers: `isRetryable` (defaults to `true`), `statusCode` (numbers or regex), `message` (case-insensitive substring or regex), and `isTimeout` / `isAbort` (match `AbortSignal.timeout()` firing vs a manual `controller.abort()`). The pattern matchers accept any number of patterns and match if any matches.
|
|
337
268
|
|
|
338
269
|
```typescript
|
|
339
|
-
import {
|
|
270
|
+
import { APICallError } from 'ai';
|
|
271
|
+
import { error } from 'ai-retry/language-model';
|
|
340
272
|
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
model: azure('gpt-4-mini'),
|
|
344
|
-
retries: [
|
|
345
|
-
// Fallback model 1 = Second attempt
|
|
346
|
-
openai('gpt-3.5-turbo'),
|
|
347
|
-
// Fallback model 2 = Third attempt
|
|
348
|
-
anthropic('claude-3-haiku-20240307'),
|
|
349
|
-
],
|
|
273
|
+
error((e) => APICallError.isInstance(e) && e.statusCode === 418).switch({
|
|
274
|
+
model: fallback,
|
|
350
275
|
});
|
|
351
276
|
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
model: retryableModel,
|
|
355
|
-
prompt: 'Hello world!',
|
|
356
|
-
});
|
|
357
|
-
} catch (error) {
|
|
358
|
-
// RetryError is an official AI SDK error
|
|
359
|
-
if (error instanceof RetryError) {
|
|
360
|
-
console.error('All retry attempts failed:', error.errors);
|
|
361
|
-
} else {
|
|
362
|
-
console.error('Request failed:', error);
|
|
363
|
-
}
|
|
364
|
-
}
|
|
365
|
-
```
|
|
366
|
-
|
|
367
|
-
Errors are tracked per unique model (provider + modelId). That means on the first error, it will retry with `gpt-3.5-turbo`. If that also fails, it will retry with `claude-3-haiku-20240307`. If that fails again, the whole retry process stops and a `RetryError` is thrown.
|
|
368
|
-
|
|
369
|
-
### Built-in Retryables
|
|
277
|
+
error.isRetryable().switch({ model: fallback }); // defaults to true
|
|
278
|
+
error.isRetryable(false).switch({ model: fallback });
|
|
370
279
|
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
> [!TIP]
|
|
374
|
-
> You are missing a retryable for your use case? [Open an issue](https://github.com/zirkelc/ai-retry/issues/new) and let's discuss it!
|
|
375
|
-
|
|
376
|
-
> [!NOTE]
|
|
377
|
-
> Looking for a composable alternative? See [Experimental: Composable Conditions](#experimental-composable-conditions) for a `condition().action()` API that builds on small primitives.
|
|
280
|
+
error.statusCode(503, 529).switch({ model: fallback });
|
|
281
|
+
error.statusCode(/^5\d\d$/).switch({ model: fallback }); // any 5xx
|
|
378
282
|
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
- [`requestNotRetryable`](./src/retryables/request-not-retryable.ts): Request failed with a non-retryable error.
|
|
382
|
-
- [`retryAfterDelay`](./src/retryables/retry-after-delay.ts): Retry with delay and exponential backoff and respect `retry-after` headers.
|
|
383
|
-
- [`serviceOverloaded`](./src/retryables/service-overloaded.ts): Response with status code 529 (service overloaded).
|
|
384
|
-
- [`serviceUnavailable`](./src/retryables/service-unavailable.ts): Response with status code 503 (service unavailable).
|
|
385
|
-
- [`schemaMismatch`](./src/retryables/schema-mismatch.ts): Response JSON doesn't match the expected schema from structured output modes (`Output.object()`, `Output.array()`, `Output.choice()`).
|
|
386
|
-
- [`noImageGenerated`](./src/retryables/no-image-generated.ts): Image generation failed with `NoImageGeneratedError`.
|
|
283
|
+
error.message('overloaded').switch({ model: fallback }); // substring
|
|
284
|
+
error.message(/rate.?limit/i).switch({ model: fallback }); // regex
|
|
387
285
|
|
|
388
|
-
|
|
286
|
+
error.isTimeout().switch({ model: fallback }); // AbortSignal.timeout() fired
|
|
287
|
+
error.isAbort().switch({ model: fallback }); // manual controller.abort()
|
|
288
|
+
```
|
|
389
289
|
|
|
390
|
-
|
|
290
|
+
###### `httpStatus(...patterns)`
|
|
391
291
|
|
|
392
|
-
|
|
393
|
-
> For streaming requests this retryable can only fire if the content filter trips before any content has been emitted. Once a text chunk flows through, the stream is committed and the fallback is skipped.
|
|
292
|
+
Matches an `APICallError` by status code (numbers), message substring (strings), or either (regex). Mix any combination in one call.
|
|
394
293
|
|
|
395
294
|
```typescript
|
|
396
|
-
import {
|
|
295
|
+
import { httpStatus } from 'ai-retry/language-model';
|
|
397
296
|
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
contentFilterTriggered(openai('gpt-4-mini')), // Try OpenAI if Azure filters
|
|
402
|
-
],
|
|
403
|
-
});
|
|
297
|
+
httpStatus(429).switch({ model: fallback }); // status code
|
|
298
|
+
httpStatus(529, 'overloaded').switch({ model: fallback }); // status or message
|
|
299
|
+
httpStatus(/^5\d\d$/).switch({ model: fallback }); // any 5xx
|
|
404
300
|
```
|
|
405
301
|
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
Handle timeouts by switching to potentially faster models.
|
|
302
|
+
###### `timeout()`
|
|
409
303
|
|
|
410
|
-
|
|
411
|
-
> You need to use an `abortSignal` with a timeout on your request.
|
|
412
|
-
|
|
413
|
-
When a request times out, the `requestTimeout` retryable will automatically create a fresh abort signal for the retry attempt. This prevents the retry from immediately failing due to the already-aborted signal from the original request. If you do not provide a `timeout` value, a default of 60 seconds is used for the retry attempt.
|
|
304
|
+
Alias for `error.isTimeout()` — matches `AbortSignal.timeout()` firing (`Error.name === 'TimeoutError'`); pass a fresh `timeout` to the action so the fallback gets its own deadline.
|
|
414
305
|
|
|
415
306
|
```typescript
|
|
416
|
-
import {
|
|
417
|
-
|
|
418
|
-
const retryableModel = createRetryable({
|
|
419
|
-
model: azure('gpt-4'),
|
|
420
|
-
retries: [
|
|
421
|
-
// Defaults to 60 seconds timeout for the retry attempt
|
|
422
|
-
requestTimeout(azure('gpt-4-mini')),
|
|
423
|
-
|
|
424
|
-
// Or specify a custom timeout for the retry attempt
|
|
425
|
-
requestTimeout(azure('gpt-4-mini'), { timeout: 30_000 }),
|
|
426
|
-
],
|
|
427
|
-
});
|
|
307
|
+
import { timeout } from 'ai-retry/language-model';
|
|
428
308
|
|
|
429
|
-
|
|
430
|
-
model: retryableModel,
|
|
431
|
-
prompt: 'Write a vegetarian lasagna recipe for 4 people.',
|
|
432
|
-
abortSignal: AbortSignal.timeout(60_000), // Original request timeout
|
|
433
|
-
});
|
|
309
|
+
timeout().switch({ model: fallback, timeout: 30_000 });
|
|
434
310
|
```
|
|
435
311
|
|
|
436
|
-
|
|
312
|
+
###### `aborted()`
|
|
437
313
|
|
|
438
|
-
|
|
314
|
+
Alias for `error.isAbort()` — matches a manual `controller.abort()` (`Error.name === 'AbortError'`).
|
|
439
315
|
|
|
440
316
|
```typescript
|
|
441
|
-
import {
|
|
317
|
+
import { aborted } from 'ai-retry/language-model';
|
|
442
318
|
|
|
443
|
-
|
|
444
|
-
model: anthropic('claude-sonnet-4-0'),
|
|
445
|
-
retries: [
|
|
446
|
-
// Retry with delay and exponential backoff
|
|
447
|
-
serviceOverloaded(anthropic('claude-sonnet-4-0'), {
|
|
448
|
-
delay: 5_000,
|
|
449
|
-
backoffFactor: 2,
|
|
450
|
-
maxAttempts: 5,
|
|
451
|
-
}),
|
|
452
|
-
// Or switch to a different provider
|
|
453
|
-
serviceOverloaded(openai('gpt-4')),
|
|
454
|
-
],
|
|
455
|
-
});
|
|
456
|
-
|
|
457
|
-
const result = streamText({
|
|
458
|
-
model: retryableModel,
|
|
459
|
-
prompt: 'Write a story about a robot...',
|
|
460
|
-
});
|
|
319
|
+
aborted().switch({ model: fallback });
|
|
461
320
|
```
|
|
462
321
|
|
|
463
|
-
|
|
322
|
+
Each high-level helper is a thin wrapper around the low-level ones. For example, `httpStatus(...)` composes `error.statusCode(...)` with `error.message(...)`, and `timeout()` / `aborted()` are aliases for `error.isTimeout()` / `error.isAbort()`.
|
|
464
323
|
|
|
465
|
-
|
|
324
|
+
##### Language model conditions
|
|
466
325
|
|
|
467
|
-
|
|
468
|
-
import { serviceUnavailable } from 'ai-retry/retryables';
|
|
326
|
+
Only available from `ai-retry/language-model`. Result-based conditions inspect a successful response (see [Streaming](#streaming) for how they behave on streams).
|
|
469
327
|
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
```
|
|
328
|
+
| Helper | Kind | Matches when |
|
|
329
|
+
| --------------------------------- | ---------- | --------------------------------------------------------------------- |
|
|
330
|
+
| `result(predicate)` | low-level | The current attempt succeeded and `predicate(res, ctx)` returns true |
|
|
331
|
+
| `result.finishReason(...reasons)` | low-level | The result's `finishReason.unified` matches one of the given values |
|
|
332
|
+
| `finishReason(...reasons)` | high-level | Same as `result.finishReason` (re-exported for convenience) |
|
|
333
|
+
| `schemaInvalid()` | high-level | The result text fails JSON-schema validation against `responseFormat` |
|
|
477
334
|
|
|
478
|
-
|
|
335
|
+
###### `result(predicate)`
|
|
479
336
|
|
|
480
|
-
|
|
337
|
+
Takes any predicate over the successful result. `result.finishReason(...reasons)` and the re-exported `finishReason(...reasons)` match the result's unified finish reason against one or more values.
|
|
481
338
|
|
|
482
339
|
```typescript
|
|
483
|
-
import {
|
|
484
|
-
import { google } from '@ai-sdk/google';
|
|
485
|
-
import { generateImage } from 'ai';
|
|
486
|
-
import { createRetryable } from 'ai-retry';
|
|
487
|
-
import { noImageGenerated } from 'ai-retry/retryables';
|
|
340
|
+
import { finishReason, result } from 'ai-retry/language-model';
|
|
488
341
|
|
|
489
|
-
|
|
490
|
-
model: openai.image('dall-e-3'),
|
|
491
|
-
retries: [
|
|
492
|
-
noImageGenerated(google.image('gemini-3-pro-image-preview')), // Switch to Gemini if DALL-E fails to generate an image
|
|
493
|
-
],
|
|
494
|
-
});
|
|
342
|
+
result((res) => res.usage.outputTokens.total === 0).switch({ model: fallback });
|
|
495
343
|
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
prompt: 'A sunset over mountains',
|
|
499
|
-
});
|
|
344
|
+
finishReason('content-filter').switch({ model: fallback });
|
|
345
|
+
finishReason('length', 'content-filter').retry({ maxAttempts: 3 });
|
|
500
346
|
```
|
|
501
347
|
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
Handle cases where the base model fails with a non-retryable error.
|
|
348
|
+
###### `schemaInvalid()`
|
|
505
349
|
|
|
506
|
-
|
|
507
|
-
> You can check if an error is retryable with the `isRetryable` property on an [`APICallError`](https://ai-sdk.dev/docs/reference/ai-sdk-errors/ai-api-call-error#ai_apicallerror).
|
|
350
|
+
Matches when the result text fails JSON-schema validation against the call's `responseFormat` (set automatically by `Output.object()`).
|
|
508
351
|
|
|
509
352
|
```typescript
|
|
510
|
-
import {
|
|
353
|
+
import { schemaInvalid } from 'ai-retry/language-model';
|
|
511
354
|
|
|
512
|
-
|
|
513
|
-
model: azure('gpt-4-mini'),
|
|
514
|
-
retries: [
|
|
515
|
-
requestNotRetryable(openai('gpt-4')), // Switch provider if error is not retryable
|
|
516
|
-
],
|
|
517
|
-
});
|
|
355
|
+
schemaInvalid().switch({ model: fallback });
|
|
518
356
|
```
|
|
519
357
|
|
|
520
|
-
|
|
358
|
+
##### Image model conditions
|
|
521
359
|
|
|
522
|
-
|
|
523
|
-
The delay and exponential backoff can be configured. If the response contains a [`retry-after`](https://developer.mozilla.org/en-US/docs/Web/HTTP/Reference/Headers/Retry-After) header, it will be prioritized over the configured delay.
|
|
360
|
+
Only available from `ai-retry/image-model`.
|
|
524
361
|
|
|
525
|
-
|
|
362
|
+
| Helper | Kind | Matches when |
|
|
363
|
+
| ----------- | ---------- | --------------------------------------------- |
|
|
364
|
+
| `noImage()` | high-level | The image model threw `NoImageGeneratedError` |
|
|
526
365
|
|
|
527
|
-
|
|
528
|
-
import { retryAfterDelay } from 'ai-retry/retryables';
|
|
366
|
+
###### `noImage()`
|
|
529
367
|
|
|
530
|
-
|
|
531
|
-
model: openai('gpt-4'), // Base model
|
|
532
|
-
retries: [
|
|
533
|
-
// Retry base model 3 times with fixed 2s delay
|
|
534
|
-
retryAfterDelay({ delay: 2_000, maxAttempts: 3 }),
|
|
368
|
+
Matches when the image model threw `NoImageGeneratedError`.
|
|
535
369
|
|
|
536
|
-
|
|
537
|
-
|
|
370
|
+
```typescript
|
|
371
|
+
import { noImage } from 'ai-retry/image-model';
|
|
538
372
|
|
|
539
|
-
|
|
540
|
-
retryAfterDelay({ maxAttempts: 3 }),
|
|
541
|
-
],
|
|
542
|
-
});
|
|
373
|
+
noImage().switch({ model: fallback });
|
|
543
374
|
```
|
|
544
375
|
|
|
545
|
-
|
|
376
|
+
##### Embedding model conditions
|
|
546
377
|
|
|
547
|
-
|
|
378
|
+
> [!NOTE]
|
|
379
|
+
> The `embedding-model` entry point exposes only the universal conditions — there are no embedding-specific result conditions.
|
|
548
380
|
|
|
549
|
-
|
|
381
|
+
#### Actions
|
|
550
382
|
|
|
551
|
-
|
|
552
|
-
Normally, schema validation happens outside the model in `generateText`, so a schema validation error would not be seen by the retryable model. This retryable catches it early and retries with a fallback model.
|
|
383
|
+
Every condition exposes two terminal actions that turn it into a retryable:
|
|
553
384
|
|
|
554
|
-
|
|
555
|
-
|
|
385
|
+
- **`.switch({ model, ...options })`** falls back to a different model when the condition matches. Optional fields (`maxAttempts`, `delay`, `backoffFactor`, `timeout`, `options`) are the same as on a normal `Retry` object. `maxAttempts` defaults to `1`.
|
|
386
|
+
- **`.retry({ delay?, backoffFactor?, maxAttempts?, ... })`** retries the **current** model when the condition matches. Honors `Retry-After` and `Retry-After-Ms` response headers, capped at 60 seconds. `maxAttempts` defaults to `2` (one original attempt + one retry); values below `2` throw, since the retry budget is consumed by the original failure.
|
|
556
387
|
|
|
557
388
|
```typescript
|
|
558
|
-
import {
|
|
559
|
-
import { anthropic } from '@ai-sdk/anthropic';
|
|
560
|
-
import { generateText, Output } from 'ai';
|
|
561
|
-
import { createRetryable } from 'ai-retry';
|
|
562
|
-
import { schemaMismatch } from 'ai-retry/retryables';
|
|
563
|
-
import { z } from 'zod';
|
|
564
|
-
|
|
565
|
-
const retryableModel = createRetryable({
|
|
566
|
-
model: openai('gpt-4-mini'), // Weak base model
|
|
567
|
-
retries: [
|
|
568
|
-
// Retry with stronger model on schema mismatch
|
|
569
|
-
schemaMismatch(openai('gpt-5')),
|
|
570
|
-
],
|
|
571
|
-
});
|
|
389
|
+
import { error, timeout } from 'ai-retry/language-model';
|
|
572
390
|
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
output: Output.object({
|
|
576
|
-
schema: z.object({
|
|
577
|
-
name: z.string(),
|
|
578
|
-
age: z.number(),
|
|
579
|
-
}),
|
|
580
|
-
}),
|
|
581
|
-
prompt: 'Generate a person with name and age.',
|
|
582
|
-
});
|
|
391
|
+
// Switch on a timeout, with a fresh timeout for the fallback
|
|
392
|
+
timeout().switch({ model: fallback, timeout: 30_000 });
|
|
583
393
|
|
|
584
|
-
|
|
394
|
+
// Retry the current model with exponential backoff, max 3 attempts
|
|
395
|
+
error
|
|
396
|
+
.isRetryable(true)
|
|
397
|
+
.retry({ delay: 1_000, backoffFactor: 2, maxAttempts: 3 });
|
|
585
398
|
```
|
|
586
399
|
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
> [!WARNING]
|
|
590
|
-
> This API is experimental and may change. It is not exported from the package root; opt in via one of the per-model deep imports:
|
|
591
|
-
>
|
|
592
|
-
> ```ts
|
|
593
|
-
> import { ... } from 'ai-retry/experimental/language-model';
|
|
594
|
-
> import { ... } from 'ai-retry/experimental/image-model';
|
|
595
|
-
> import { ... } from 'ai-retry/experimental/embedding-model';
|
|
596
|
-
> ```
|
|
597
|
-
>
|
|
598
|
-
> Each entry point also re-exports `createRetryable` already typed for that model family, so you can either import everything from one path:
|
|
599
|
-
>
|
|
600
|
-
> ```ts
|
|
601
|
-
> import {
|
|
602
|
-
> createRetryable,
|
|
603
|
-
> error,
|
|
604
|
-
> httpStatus,
|
|
605
|
-
> } from 'ai-retry/experimental/language-model';
|
|
606
|
-
> ```
|
|
607
|
-
>
|
|
608
|
-
> or pull retryables from the dedicated `/retryables` subpath:
|
|
609
|
-
>
|
|
610
|
-
> ```ts
|
|
611
|
-
> import {
|
|
612
|
-
> error,
|
|
613
|
-
> httpStatus,
|
|
614
|
-
> } from 'ai-retry/experimental/language-model/retryables';
|
|
615
|
-
> // or
|
|
616
|
-
> import * as retryables from 'ai-retry/experimental/language-model/retryables';
|
|
617
|
-
> ```
|
|
400
|
+
#### Combinators
|
|
618
401
|
|
|
619
|
-
|
|
402
|
+
Compose conditions with the top-level `or()`, `and()`, `not()` helpers. Because each entry point is typed for a single model family, they infer the family from their arguments — no type annotations or casts needed. `or()` and `and()` are variadic.
|
|
620
403
|
|
|
621
404
|
```typescript
|
|
622
|
-
import {
|
|
623
|
-
import { openai } from '@ai-sdk/openai';
|
|
624
|
-
import { generateText } from 'ai';
|
|
625
|
-
import {
|
|
626
|
-
createRetryable,
|
|
627
|
-
error,
|
|
628
|
-
finishReason,
|
|
629
|
-
httpStatus,
|
|
630
|
-
} from 'ai-retry/experimental/language-model';
|
|
631
|
-
|
|
632
|
-
const retryableModel = createRetryable({
|
|
633
|
-
model: openai('gpt-4'),
|
|
634
|
-
retries: [
|
|
635
|
-
// Switch on 529 or any "overloaded" message
|
|
636
|
-
httpStatus(529, 'overloaded').switch({
|
|
637
|
-
model: anthropic('claude-3-haiku-20240307'),
|
|
638
|
-
}),
|
|
639
|
-
|
|
640
|
-
// Switch when the response was content-filtered
|
|
641
|
-
finishReason('content-filter').switch({ model: openai('gpt-4o') }),
|
|
405
|
+
import { and, error, httpStatus, not, or } from 'ai-retry/language-model';
|
|
642
406
|
|
|
643
|
-
|
|
644
|
-
|
|
645
|
-
|
|
646
|
-
});
|
|
407
|
+
or(httpStatus(429), error.message('overloaded')).switch({ model: fallback });
|
|
408
|
+
and(httpStatus(503), error.message('temporary')).switch({ model: fallback });
|
|
409
|
+
not(error.isRetryable(true)).switch({ model: fallback });
|
|
647
410
|
```
|
|
648
411
|
|
|
649
|
-
####
|
|
412
|
+
#### Custom predicates
|
|
650
413
|
|
|
651
|
-
|
|
652
|
-
|
|
653
|
-
#### Low-level conditions
|
|
654
|
-
|
|
655
|
-
The primitive builders `error(...)` and `result(...)` take a predicate and turn it into a condition; their namespaces bundle the most common field matchers on top.
|
|
656
|
-
|
|
657
|
-
| Helper | Matches when | Available in |
|
|
658
|
-
| --------------------------------- | ------------------------------------------------------------------------------------ | ---------------------- |
|
|
659
|
-
| `error(predicate)` | The current attempt failed and `predicate(err, ctx)` returns true | all three entry points |
|
|
660
|
-
| `error.isRetryable(flag)` | `APICallError.isRetryable === flag` (default `true`) | all three entry points |
|
|
661
|
-
| `error.statusCode(...patterns)` | Numbers match exactly; regex matches the stringified code (e.g. `/^5\d\d$/` for 5xx) | all three entry points |
|
|
662
|
-
| `error.message(...patterns)` | Substring (case-insensitive) or regex match against the error message | all three entry points |
|
|
663
|
-
| `result(predicate)` | The current attempt succeeded and `predicate(res, ctx)` returns true | `language-model` only |
|
|
664
|
-
| `result.finishReason(...reasons)` | The result's `finishReason.unified` matches one of the given values | `language-model` only |
|
|
414
|
+
When the higher-level helpers don't cover the field you need, drop down to `error(predicate)` / `result(predicate)` and inspect whatever is on the error or result. The predicate receives `(err | result, ctx)` and can be `async`; `ctx` is fully typed for the entry point you imported from, so the current attempt, the model, and all previous attempts are available without manual annotations.
|
|
665
415
|
|
|
666
416
|
```typescript
|
|
417
|
+
import { anthropic } from '@ai-sdk/anthropic';
|
|
418
|
+
import { openai } from '@ai-sdk/openai';
|
|
667
419
|
import { APICallError } from 'ai';
|
|
668
|
-
import { error } from 'ai-retry/
|
|
420
|
+
import { createRetryableModel, error } from 'ai-retry/language-model';
|
|
669
421
|
|
|
670
|
-
error
|
|
671
|
-
|
|
422
|
+
// OpenAI-style error code nested at data.error.code. `e` is `unknown`.
|
|
423
|
+
const isContentFilter = (e: unknown) => {
|
|
424
|
+
if (!APICallError.isInstance(e)) return false;
|
|
425
|
+
const data = e.data as { error?: { code?: string } } | undefined;
|
|
426
|
+
return data?.error?.code === 'content_filter';
|
|
427
|
+
};
|
|
428
|
+
|
|
429
|
+
const retryableModel = createRetryableModel({
|
|
430
|
+
model: openai('gpt-4o'),
|
|
431
|
+
retries: [
|
|
432
|
+
error(isContentFilter).switch({
|
|
433
|
+
model: anthropic('claude-3-haiku-20240307'),
|
|
434
|
+
}),
|
|
435
|
+
],
|
|
672
436
|
});
|
|
673
437
|
```
|
|
674
438
|
|
|
675
|
-
|
|
676
|
-
|
|
677
|
-
Convenience matchers built on top of the low-level ones for the common cases. Each returns a condition that you finalize with `.switch(...)` or `.retry(...)`.
|
|
439
|
+
The predicate's second argument is the typed `RetryContext`, so a check like “only retry on the first attempt” is just `(e, ctx) => ctx.attempts.length === 1 && isContentFilter(e)`.
|
|
678
440
|
|
|
679
|
-
|
|
680
|
-
| -------------------------- | :------------: | :---------: | :-------------: |
|
|
681
|
-
| `httpStatus(...patterns)` | ✓ | ✓ | ✓ |
|
|
682
|
-
| `timeout()` | ✓ | ✓ | ✓ |
|
|
683
|
-
| `aborted()` | ✓ | ✓ | ✓ |
|
|
684
|
-
| `finishReason(...reasons)` | ✓ | — | — |
|
|
685
|
-
| `schemaInvalid()` | ✓ | — | — |
|
|
686
|
-
| `noImage()` | — | ✓ | — |
|
|
441
|
+
#### All retries failed
|
|
687
442
|
|
|
688
|
-
|
|
689
|
-
|
|
690
|
-
| Helper | Matches when |
|
|
691
|
-
| -------------------------- | ------------------------------------------------------------------------------------------ |
|
|
692
|
-
| `httpStatus(...patterns)` | Numbers match the status code; strings match the message (substring); regex matches either |
|
|
693
|
-
| `timeout()` | `Error.name === 'TimeoutError'` (`AbortSignal.timeout()` fired) |
|
|
694
|
-
| `aborted()` | `Error.name === 'AbortError'` (manual `controller.abort()`) |
|
|
695
|
-
| `finishReason(...reasons)` | The result's `finishReason.unified` matches one of the given values |
|
|
696
|
-
| `schemaInvalid()` | The result text fails JSON-schema validation against the call's `responseFormat` |
|
|
697
|
-
| `noImage()` | The image model threw `NoImageGeneratedError` |
|
|
698
|
-
|
|
699
|
-
Each high-level helper is a thin wrapper around the low-level ones. For example, `timeout()` is roughly:
|
|
443
|
+
If all retry attempts fail, a `RetryError` is thrown containing all individual errors. If no retry was attempted (every retryable returned `undefined` / didn't match), the original error is re-thrown directly.
|
|
700
444
|
|
|
701
445
|
```typescript
|
|
702
|
-
|
|
703
|
-
return error((err) => err instanceof Error && err.name === 'TimeoutError');
|
|
704
|
-
}
|
|
705
|
-
```
|
|
706
|
-
|
|
707
|
-
and `finishReason(...)` just delegates to `result.finishReason(...)`:
|
|
446
|
+
import { RetryError } from 'ai';
|
|
708
447
|
|
|
709
|
-
|
|
710
|
-
|
|
711
|
-
|
|
448
|
+
try {
|
|
449
|
+
const result = await generateText({
|
|
450
|
+
model: retryableModel,
|
|
451
|
+
prompt: 'Hello!',
|
|
452
|
+
});
|
|
453
|
+
} catch (err) {
|
|
454
|
+
if (err instanceof RetryError) {
|
|
455
|
+
console.error('All retry attempts failed:', err.errors);
|
|
456
|
+
} else {
|
|
457
|
+
console.error('Request failed:', err);
|
|
458
|
+
}
|
|
712
459
|
}
|
|
713
460
|
```
|
|
714
461
|
|
|
715
|
-
|
|
716
|
-
|
|
717
|
-
Every condition exposes two terminal actions that turn it into a `Retryable`:
|
|
718
|
-
|
|
719
|
-
- **`.switch({ model, ...options })`** falls back to a different model when the condition matches. Optional fields (`maxAttempts`, `delay`, `backoffFactor`, `timeout`, `options`) are the same as on a normal `Retry` object. `maxAttempts` defaults to `1`.
|
|
720
|
-
- **`.retry({ delay?, backoffFactor?, maxAttempts?, ... })`** retries the current model when the condition matches. Honors `Retry-After` and `Retry-After-Ms` response headers when present, capped at 60 seconds. `maxAttempts` defaults to `2` (one original attempt + one retry); values below `2` throw, since the retry budget is consumed by the original failure.
|
|
721
|
-
|
|
722
|
-
#### Combinators
|
|
723
|
-
|
|
724
|
-
Compose conditions with `.and`, `.or`, `.not`:
|
|
725
|
-
|
|
726
|
-
```typescript
|
|
727
|
-
import { error, httpStatus } from 'ai-retry/experimental/language-model';
|
|
728
|
-
|
|
729
|
-
httpStatus(429).or(error.message('overloaded'));
|
|
730
|
-
httpStatus(503).and(error.message('temporary'));
|
|
731
|
-
error.isRetryable(true).not();
|
|
732
|
-
```
|
|
733
|
-
|
|
734
|
-
#### Mapping from Built-in retryables
|
|
735
|
-
|
|
736
|
-
Each stable retryable has an equivalent in the new shape (imports from `ai-retry/experimental/language-model` unless noted):
|
|
737
|
-
|
|
738
|
-
| Built-in | Composable form |
|
|
739
|
-
| ------------------------------------------- | ------------------------------------------------------------------------------------------------------------------- |
|
|
740
|
-
| `contentFilterTriggered(m)` | `error(/* check e.data.error.code === 'content_filter' */).or(finishReason('content-filter')).switch({ model: m })` |
|
|
741
|
-
| `requestTimeout(m)` | `timeout().switch({ model: m, timeout: 60_000 })` |
|
|
742
|
-
| `requestNotRetryable(m)` | `error.isRetryable(false).switch({ model: m })` |
|
|
743
|
-
| `schemaMismatch(m)` | `schemaInvalid().switch({ model: m })` |
|
|
744
|
-
| `serviceOverloaded(m)` | `httpStatus(529, 'overloaded').switch({ model: m })` |
|
|
745
|
-
| `serviceUnavailable(m)` | `error.statusCode(503).switch({ model: m })` |
|
|
746
|
-
| `noImageGenerated(m)` | `noImage().switch({ model: m })` (from `image-model`) |
|
|
747
|
-
| `retryAfterDelay({ delay, backoffFactor })` | `error.isRetryable(true).retry({ delay, backoffFactor })` |
|
|
748
|
-
|
|
749
|
-
> [!NOTE]
|
|
750
|
-
> `error.isRetryable(true)` matches whatever the AI SDK's `APICallError` marks retryable. By default that's status codes 408, 409, 429, and any 5xx, plus network errors and provider-specific overrides (e.g. Anthropic flips it on `error.type === 'overloaded_error'`). It picks up more cases than a manual status-code list.
|
|
462
|
+
Errors are tracked per unique model (`provider/modelId`). Once a model has hit its `maxAttempts`, no further retry will land on it.
|
|
751
463
|
|
|
752
464
|
### Options
|
|
753
465
|
|
|
754
|
-
#### Disabling
|
|
755
|
-
|
|
756
|
-
You can disable retries entirely, which is useful for testing or specific environments. When disabled, the base model will execute directly without any retry logic.
|
|
466
|
+
#### Disabling retries
|
|
757
467
|
|
|
758
468
|
```typescript
|
|
759
|
-
const retryableModel =
|
|
760
|
-
model: openai('gpt-4'),
|
|
761
|
-
retries: [
|
|
762
|
-
/* ... */
|
|
763
|
-
],
|
|
764
|
-
disabled: true, // Retries are completely disabled
|
|
765
|
-
});
|
|
766
|
-
|
|
767
|
-
// Or disable based on environment
|
|
768
|
-
const retryableModel = createRetryable({
|
|
769
|
-
model: openai('gpt-4'), // Base model
|
|
770
|
-
retries: [
|
|
771
|
-
/* ... */
|
|
772
|
-
],
|
|
773
|
-
disabled: process.env.NODE_ENV === 'test', // Disable in test environment
|
|
774
|
-
});
|
|
775
|
-
|
|
776
|
-
// Or use a function for dynamic control
|
|
777
|
-
const retryableModel = createRetryable({
|
|
778
|
-
model: openai('gpt-4'), // Base model
|
|
469
|
+
const retryableModel = createRetryableModel({
|
|
470
|
+
model: openai('gpt-4'),
|
|
779
471
|
retries: [
|
|
780
472
|
/* ... */
|
|
781
473
|
],
|
|
782
|
-
disabled:
|
|
474
|
+
disabled: true, // hard off
|
|
475
|
+
// disabled: process.env.NODE_ENV === 'test', // env-based
|
|
476
|
+
// disabled: () => !featureFlags.isEnabled('ai'), // dynamic
|
|
783
477
|
});
|
|
784
478
|
```
|
|
785
479
|
|
|
786
|
-
|
|
480
|
+
When disabled the base model executes directly, no retry logic runs.
|
|
481
|
+
|
|
482
|
+
#### Retry delays
|
|
787
483
|
|
|
788
|
-
|
|
484
|
+
Delays accept exponential backoff and respect the request's abort signal so they can still be cancelled.
|
|
789
485
|
|
|
790
486
|
```typescript
|
|
791
|
-
|
|
487
|
+
import { createRetryableModel } from 'ai-retry/language-model';
|
|
488
|
+
|
|
489
|
+
const retryableModel = createRetryableModel({
|
|
792
490
|
model: openai('gpt-4'),
|
|
793
491
|
retries: [
|
|
794
|
-
// Retry
|
|
492
|
+
// Retry the base model with a fixed 2s delay
|
|
795
493
|
{ model: openai('gpt-4'), delay: 2_000, maxAttempts: 3 },
|
|
796
494
|
|
|
797
|
-
// Or
|
|
495
|
+
// Or with exponential backoff: 2s, 4s, 8s
|
|
798
496
|
{ model: openai('gpt-4'), delay: 2_000, backoffFactor: 2, maxAttempts: 3 },
|
|
799
497
|
],
|
|
800
498
|
});
|
|
801
|
-
|
|
802
|
-
const result = await generateText({
|
|
803
|
-
model: retryableModel,
|
|
804
|
-
prompt: 'Write a vegetarian lasagna recipe for 4 people.',
|
|
805
|
-
// Will be respected during delays
|
|
806
|
-
abortSignal: AbortSignal.timeout(60_000),
|
|
807
|
-
});
|
|
808
499
|
```
|
|
809
500
|
|
|
810
|
-
|
|
811
|
-
|
|
812
|
-
```typescript
|
|
813
|
-
import { serviceOverloaded } from 'ai-retry/retryables';
|
|
814
|
-
|
|
815
|
-
const retryableModel = createRetryable({
|
|
816
|
-
model: openai('gpt-4'),
|
|
817
|
-
retries: [
|
|
818
|
-
// Wait 5 seconds before retrying on service overload
|
|
819
|
-
serviceOverloaded(openai('gpt-4'), { maxAttempts: 3, delay: 5_000 }),
|
|
820
|
-
],
|
|
821
|
-
});
|
|
822
|
-
```
|
|
501
|
+
The same `delay` / `backoffFactor` / `maxAttempts` options are accepted by `.switch({...})` and `.retry({...})`.
|
|
823
502
|
|
|
824
503
|
#### Timeouts
|
|
825
504
|
|
|
826
|
-
When a retry specifies a `timeout
|
|
505
|
+
When a retry specifies a `timeout`, a fresh `AbortSignal.timeout()` is created for that attempt. If the original `abortSignal` is still alive, the fresh deadline is composed with it via `AbortSignal.any()` so user cancellation still works. If the original signal is already aborted (a request-level deadline already fired), it is dropped so the retry runs against the fresh deadline alone.
|
|
827
506
|
|
|
828
|
-
If the original `abortSignal` is already aborted at the time of retry and the
|
|
507
|
+
If the original `abortSignal` is already aborted at the time of retry and the retry does **not** supply a `timeout`, `ai-retry` re-throws the original error rather than firing a misleading retry against the dead signal. `onError` still fires for observability; `onRetry` is skipped. Setting `timeout` is the explicit opt-in for retrying past an aborted signal.
|
|
829
508
|
|
|
830
509
|
```typescript
|
|
831
|
-
|
|
510
|
+
import { createRetryableModel, timeout } from 'ai-retry/language-model';
|
|
511
|
+
|
|
512
|
+
const retryableModel = createRetryableModel({
|
|
832
513
|
model: openai('gpt-4'),
|
|
833
514
|
retries: [
|
|
834
|
-
|
|
835
|
-
{
|
|
836
|
-
model: openai('gpt-3.5-turbo'),
|
|
837
|
-
timeout: 30_000,
|
|
838
|
-
},
|
|
515
|
+
timeout().switch({ model: openai('gpt-3.5-turbo'), timeout: 30_000 }),
|
|
839
516
|
],
|
|
840
517
|
});
|
|
841
518
|
|
|
842
|
-
|
|
843
|
-
const result = await generateText({
|
|
519
|
+
await generateText({
|
|
844
520
|
model: retryableModel,
|
|
845
521
|
prompt: 'Write a story',
|
|
846
|
-
// Original request timeout
|
|
847
522
|
abortSignal: AbortSignal.timeout(60_000),
|
|
848
523
|
});
|
|
849
524
|
```
|
|
850
525
|
|
|
851
|
-
#### Max
|
|
526
|
+
#### Max attempts
|
|
852
527
|
|
|
853
|
-
|
|
528
|
+
Each retryable attempts a model at most once by default. Use `maxAttempts` to allow more. Attempts are counted per unique model, so duplicates across multiple retryables don't get more chances than configured.
|
|
854
529
|
|
|
855
530
|
```typescript
|
|
856
|
-
const retryableModel =
|
|
531
|
+
const retryableModel = createRetryableModel({
|
|
857
532
|
model: openai('gpt-4'),
|
|
858
533
|
retries: [
|
|
859
|
-
//
|
|
860
|
-
|
|
861
|
-
|
|
862
|
-
{ model: openai('gpt-4'), maxAttempts: 2 },
|
|
863
|
-
// Already tried, won't be retried again
|
|
864
|
-
anthropic('claude-3-haiku-20240307'),
|
|
534
|
+
anthropic('claude-3-haiku-20240307'), // 1 attempt
|
|
535
|
+
{ model: openai('gpt-4'), maxAttempts: 2 }, // 1 + 1 retry
|
|
536
|
+
anthropic('claude-3-haiku-20240307'), // already used
|
|
865
537
|
],
|
|
866
538
|
});
|
|
867
539
|
```
|
|
868
540
|
|
|
869
|
-
|
|
870
|
-
|
|
871
|
-
#### Provider Options
|
|
541
|
+
#### Provider options
|
|
872
542
|
|
|
873
|
-
|
|
543
|
+
Override provider-specific options for a retry, completely replacing the original ones.
|
|
874
544
|
|
|
875
545
|
```typescript
|
|
876
|
-
const retryableModel =
|
|
546
|
+
const retryableModel = createRetryableModel({
|
|
877
547
|
model: openai('gpt-5'),
|
|
878
548
|
retries: [
|
|
879
|
-
// Use different provider options for the retry
|
|
880
549
|
{
|
|
881
550
|
model: openai('gpt-4o-2024-08-06'),
|
|
882
551
|
providerOptions: {
|
|
883
|
-
openai: {
|
|
884
|
-
user: 'fallback-user',
|
|
885
|
-
structuredOutputs: false,
|
|
886
|
-
},
|
|
552
|
+
openai: { user: 'fallback-user', structuredOutputs: false },
|
|
887
553
|
},
|
|
888
554
|
},
|
|
889
555
|
],
|
|
890
556
|
});
|
|
891
|
-
|
|
892
|
-
// Original provider options are used for the first attempt
|
|
893
|
-
const result = await generateText({
|
|
894
|
-
model: retryableModel,
|
|
895
|
-
prompt: 'Write a story',
|
|
896
|
-
providerOptions: {
|
|
897
|
-
openai: {
|
|
898
|
-
user: 'primary-user',
|
|
899
|
-
},
|
|
900
|
-
},
|
|
901
|
-
});
|
|
902
557
|
```
|
|
903
558
|
|
|
904
|
-
|
|
559
|
+
#### Call options
|
|
905
560
|
|
|
906
|
-
|
|
907
|
-
|
|
908
|
-
You can override various call options when retrying requests. This is useful for adjusting parameters like temperature, max tokens, or even the prompt itself for retry attempts. Call options are specified in the `options` field of the retry object.
|
|
561
|
+
Override any of the call options for a retry. Useful for things like temperature, max tokens, or the prompt itself.
|
|
909
562
|
|
|
910
563
|
```typescript
|
|
911
|
-
const retryableModel =
|
|
564
|
+
const retryableModel = createRetryableModel({
|
|
912
565
|
model: openai('gpt-4'),
|
|
913
566
|
retries: [
|
|
914
567
|
{
|
|
915
568
|
model: anthropic('claude-3-haiku'),
|
|
916
569
|
options: {
|
|
917
|
-
// Override generation parameters for more deterministic output
|
|
918
570
|
temperature: 0.3,
|
|
919
571
|
topP: 0.9,
|
|
920
572
|
maxOutputTokens: 500,
|
|
921
|
-
// Set a seed for reproducibility
|
|
922
573
|
seed: 42,
|
|
923
574
|
},
|
|
924
575
|
},
|
|
@@ -926,58 +577,54 @@ const retryableModel = createRetryable({
|
|
|
926
577
|
});
|
|
927
578
|
```
|
|
928
579
|
|
|
929
|
-
The following options can be overridden:
|
|
930
|
-
|
|
931
580
|
> [!NOTE]
|
|
932
581
|
> Override options completely replace the original values (they are not merged). If you don't specify an option, the original value from the request is used.
|
|
933
582
|
|
|
934
|
-
##### Language
|
|
935
|
-
|
|
936
|
-
| Option | Description |
|
|
937
|
-
| -------------------------------------------------------------------------------------------------- | ---------------------------------------------- |
|
|
938
|
-
| [`prompt`](https://ai-sdk.dev/docs/reference/ai-sdk-core/generate-text#prompt) | Override the entire prompt for the retry |
|
|
939
|
-
| [`temperature`](https://ai-sdk.dev/docs/reference/ai-sdk-core/generate-text#temperature) | Temperature setting for controlling randomness |
|
|
940
|
-
| [`topP`](https://ai-sdk.dev/docs/reference/ai-sdk-core/generate-text#topp) | Nucleus sampling parameter |
|
|
941
|
-
| [`topK`](https://ai-sdk.dev/docs/reference/ai-sdk-core/generate-text#topk) | Top-K sampling parameter |
|
|
942
|
-
| [`maxOutputTokens`](https://ai-sdk.dev/docs/reference/ai-sdk-core/generate-text#max-output-tokens) | Maximum number of tokens to generate |
|
|
943
|
-
| [`seed`](https://ai-sdk.dev/docs/reference/ai-sdk-core/generate-text#seed) | Random seed for deterministic generation |
|
|
944
|
-
| [`stopSequences`](https://ai-sdk.dev/docs/reference/ai-sdk-types/generate-text#stopsequences) | Stop sequences to end generation |
|
|
945
|
-
| [`presencePenalty`](https://ai-sdk.dev/docs/reference/ai-sdk-core/generate-text#presencepenalty) | Presence penalty for reducing repetition |
|
|
946
|
-
| [`frequencyPenalty`](https://ai-sdk.dev/docs/reference/ai-sdk-core/generate-text#frequencypenalty) | Frequency penalty for reducing repetition |
|
|
947
|
-
| [`headers`](https://ai-sdk.dev/docs/reference/ai-sdk-core/generate-text#headers) | Additional HTTP headers |
|
|
948
|
-
| [`providerOptions`](https://ai-sdk.dev/docs/reference/ai-sdk-types/generate-text#provideroptions) | Provider-specific options |
|
|
583
|
+
##### Language model options
|
|
949
584
|
|
|
950
|
-
|
|
585
|
+
| Option | Description |
|
|
586
|
+
| ------------------ | ---------------------------------------------- |
|
|
587
|
+
| `prompt` | Override the entire prompt for the retry |
|
|
588
|
+
| `temperature` | Temperature setting for controlling randomness |
|
|
589
|
+
| `topP` | Nucleus sampling parameter |
|
|
590
|
+
| `topK` | Top-K sampling parameter |
|
|
591
|
+
| `maxOutputTokens` | Maximum number of tokens to generate |
|
|
592
|
+
| `seed` | Random seed for deterministic generation |
|
|
593
|
+
| `stopSequences` | Stop sequences to end generation |
|
|
594
|
+
| `presencePenalty` | Presence penalty for reducing repetition |
|
|
595
|
+
| `frequencyPenalty` | Frequency penalty for reducing repetition |
|
|
596
|
+
| `headers` | Additional HTTP headers |
|
|
597
|
+
| `providerOptions` | Provider-specific options |
|
|
951
598
|
|
|
952
|
-
|
|
953
|
-
| ---------------------------------------------------------------------------------------- | ---------------------------- |
|
|
954
|
-
| [`values`](https://ai-sdk.dev/docs/reference/ai-sdk-core/embed#values) | Override the values to embed |
|
|
955
|
-
| [`headers`](https://ai-sdk.dev/docs/reference/ai-sdk-core/embed#headers) | Additional HTTP headers |
|
|
956
|
-
| [`providerOptions`](https://ai-sdk.dev/docs/reference/ai-sdk-core/embed#provideroptions) | Provider-specific options |
|
|
599
|
+
##### Embedding model options
|
|
957
600
|
|
|
958
|
-
|
|
601
|
+
| Option | Description |
|
|
602
|
+
| ----------------- | ---------------------------- |
|
|
603
|
+
| `values` | Override the values to embed |
|
|
604
|
+
| `headers` | Additional HTTP headers |
|
|
605
|
+
| `providerOptions` | Provider-specific options |
|
|
959
606
|
|
|
960
|
-
|
|
961
|
-
| ------------------------------------------------------------------------------------------------- | -------------------------------- |
|
|
962
|
-
| [`n`](https://ai-sdk.dev/docs/reference/ai-sdk-core/generate-image#n) | Number of images to generate |
|
|
963
|
-
| [`size`](https://ai-sdk.dev/docs/reference/ai-sdk-core/generate-image#size) | Size of generated images |
|
|
964
|
-
| [`aspectRatio`](https://ai-sdk.dev/docs/reference/ai-sdk-core/generate-image#aspectratio) | Aspect ratio of generated images |
|
|
965
|
-
| [`seed`](https://ai-sdk.dev/docs/reference/ai-sdk-core/generate-image#seed) | Random seed for reproducibility |
|
|
966
|
-
| [`headers`](https://ai-sdk.dev/docs/reference/ai-sdk-core/generate-image#headers) | Additional HTTP headers |
|
|
967
|
-
| [`providerOptions`](https://ai-sdk.dev/docs/reference/ai-sdk-core/generate-image#provideroptions) | Provider-specific options |
|
|
607
|
+
##### Image model options
|
|
968
608
|
|
|
969
|
-
|
|
609
|
+
| Option | Description |
|
|
610
|
+
| ----------------- | -------------------------------- |
|
|
611
|
+
| `n` | Number of images to generate |
|
|
612
|
+
| `size` | Size of generated images |
|
|
613
|
+
| `aspectRatio` | Aspect ratio of generated images |
|
|
614
|
+
| `seed` | Random seed for reproducibility |
|
|
615
|
+
| `headers` | Additional HTTP headers |
|
|
616
|
+
| `providerOptions` | Provider-specific options |
|
|
970
617
|
|
|
971
|
-
|
|
618
|
+
#### Dynamic call options
|
|
972
619
|
|
|
973
|
-
|
|
620
|
+
You can also override call options dynamically from `onRetry`, instead of declaring them statically on the retry object. This is useful when the override depends on something only known at runtime — the prompt that just failed, the model about to be tried, or the error that triggered the retry. The overrides apply to the upcoming attempt only and can change the same fields as the static `options`. The callback can be `async` if computing the override needs to do work (e.g. fetching a fresh credential).
|
|
974
621
|
|
|
975
622
|
```typescript
|
|
976
|
-
import { createRetryable } from 'ai-retry';
|
|
977
623
|
import { azure } from '@ai-sdk/azure';
|
|
978
624
|
import { openai } from '@ai-sdk/openai';
|
|
625
|
+
import { createRetryableModel } from 'ai-retry/language-model';
|
|
979
626
|
|
|
980
|
-
const retryableModel =
|
|
627
|
+
const retryableModel = createRetryableModel({
|
|
981
628
|
model: azure('gpt-5-chat'),
|
|
982
629
|
retries: [openai('gpt-5-chat')],
|
|
983
630
|
onRetry: (context) => {
|
|
@@ -985,33 +632,16 @@ const retryableModel = createRetryable({
|
|
|
985
632
|
const previous = attempts.at(-1);
|
|
986
633
|
|
|
987
634
|
if (current.model.provider !== previous.model.provider) {
|
|
988
|
-
// Strip provider-scoped metadata
|
|
635
|
+
// Strip provider-scoped metadata before retrying on a different provider
|
|
989
636
|
return {
|
|
990
|
-
options: {
|
|
991
|
-
prompt: stripProviderMetadata(current.options.prompt),
|
|
992
|
-
},
|
|
637
|
+
options: { prompt: stripProviderMetadata(current.options.prompt) },
|
|
993
638
|
};
|
|
994
639
|
}
|
|
995
640
|
},
|
|
996
641
|
});
|
|
997
642
|
```
|
|
998
643
|
|
|
999
|
-
Inside
|
|
1000
|
-
|
|
1001
|
-
`onRetry` may also be `async`, which is useful if computing the override needs to do work (e.g. fetching a fresh credential):
|
|
1002
|
-
|
|
1003
|
-
```typescript
|
|
1004
|
-
const retryableModel = createRetryable({
|
|
1005
|
-
model: openai('gpt-4o-mini'),
|
|
1006
|
-
retries: [anthropic('claude-sonnet-4-20250514')],
|
|
1007
|
-
onRetry: async (context) => {
|
|
1008
|
-
const { current } = context;
|
|
1009
|
-
|
|
1010
|
-
const headers = await refreshAuthHeaders(current.model.provider);
|
|
1011
|
-
return { options: { headers } };
|
|
1012
|
-
},
|
|
1013
|
-
});
|
|
1014
|
-
```
|
|
644
|
+
Inside `onRetry`, `context.current.model` is the model about to be tried next; `context.current.options` and `context.current.error` describe the failed attempt that triggered the retry. The previous model is at `context.attempts.at(-1).model`.
|
|
1015
645
|
|
|
1016
646
|
**Precedence** for the upcoming retry attempt (highest to lowest):
|
|
1017
647
|
|
|
@@ -1029,10 +659,10 @@ You can use the following callbacks to log retry attempts and errors:
|
|
|
1029
659
|
- `onFailure` is invoked when the request ultimately fails and no retry could recover it.
|
|
1030
660
|
|
|
1031
661
|
```typescript
|
|
1032
|
-
const retryableModel =
|
|
1033
|
-
model: openai('gpt-
|
|
662
|
+
const retryableModel = createRetryableModel({
|
|
663
|
+
model: openai('gpt-4o-mini'),
|
|
1034
664
|
retries: [
|
|
1035
|
-
/*
|
|
665
|
+
/* ... */
|
|
1036
666
|
],
|
|
1037
667
|
onError: (context) => {
|
|
1038
668
|
console.error(
|
|
@@ -1042,7 +672,7 @@ const retryableModel = createRetryable({
|
|
|
1042
672
|
},
|
|
1043
673
|
onRetry: (context) => {
|
|
1044
674
|
console.log(
|
|
1045
|
-
`Retrying
|
|
675
|
+
`Retrying with ${context.current.model.provider}/${context.current.model.modelId}...`,
|
|
1046
676
|
);
|
|
1047
677
|
},
|
|
1048
678
|
onSuccess: (context) => {
|
|
@@ -1063,7 +693,7 @@ const retryableModel = createRetryable({
|
|
|
1063
693
|
|
|
1064
694
|
#### Reset
|
|
1065
695
|
|
|
1066
|
-
By default, every new request starts with the base model, even if a previous request was retried with a different model. The `reset` option changes this behavior by making the last successfully retried model **sticky
|
|
696
|
+
By default, every new request starts with the base model, even if a previous request was retried with a different model. The `reset` option changes this behavior by making the last successfully retried model **sticky** — subsequent requests will continue using that model until the reset condition fires.
|
|
1067
697
|
|
|
1068
698
|
| Value | Description |
|
|
1069
699
|
| ------------------ | ------------------------------------------------------------ |
|
|
@@ -1071,51 +701,29 @@ By default, every new request starts with the base model, even if a previous req
|
|
|
1071
701
|
| `after-N-requests` | Keep the retry model for the next **N** requests, then reset |
|
|
1072
702
|
| `after-N-seconds` | Keep the retry model for **N** seconds, then reset |
|
|
1073
703
|
|
|
1074
|
-
##### Reset after each request (default)
|
|
1075
|
-
|
|
1076
|
-
```typescript
|
|
1077
|
-
const retryableModel = createRetryable({
|
|
1078
|
-
model: openai('gpt-4o-mini'),
|
|
1079
|
-
retries: [anthropic('claude-sonnet-4-20250514')],
|
|
1080
|
-
reset: 'after-request', // default: always start with the base model
|
|
1081
|
-
});
|
|
1082
|
-
```
|
|
1083
|
-
|
|
1084
|
-
##### Keep the retry model for N requests
|
|
1085
|
-
|
|
1086
|
-
```typescript
|
|
1087
|
-
const retryableModel = createRetryable({
|
|
1088
|
-
model: openai('gpt-4o-mini'),
|
|
1089
|
-
retries: [anthropic('claude-sonnet-4-20250514')],
|
|
1090
|
-
reset: 'after-5-requests', // use the retry model for 5 more requests before resetting
|
|
1091
|
-
});
|
|
1092
|
-
```
|
|
1093
|
-
|
|
1094
|
-
##### Keep the retry model for N seconds
|
|
1095
|
-
|
|
1096
704
|
```typescript
|
|
1097
|
-
const retryableModel =
|
|
705
|
+
const retryableModel = createRetryableModel({
|
|
1098
706
|
model: openai('gpt-4o-mini'),
|
|
1099
707
|
retries: [anthropic('claude-sonnet-4-20250514')],
|
|
1100
|
-
reset: 'after-
|
|
708
|
+
reset: 'after-5-requests',
|
|
1101
709
|
});
|
|
1102
710
|
```
|
|
1103
711
|
|
|
1104
712
|
### Telemetry
|
|
1105
713
|
|
|
1106
714
|
> [!NOTE]
|
|
1107
|
-
> Experimental:
|
|
715
|
+
> Experimental: span names and attributes may change in patch versions.
|
|
1108
716
|
|
|
1109
|
-
`ai-retry` can emit [OpenTelemetry](https://opentelemetry.io/) spans for each request and every retry attempt.
|
|
717
|
+
`ai-retry` can emit [OpenTelemetry](https://opentelemetry.io/) spans for each request and every retry attempt. Spans are created on the active OpenTelemetry context, so they nest automatically under the AI SDK's own spans (e.g. `ai.generateText.doGenerate`) when you also enable `experimental_telemetry` on `generateText` / `streamText`. A single trace then shows the individual attempts — which model each used, why it was retried, and the backoff between them — that the SDK's own span otherwise hides.
|
|
1110
718
|
|
|
1111
719
|
#### Setup
|
|
1112
720
|
|
|
1113
721
|
Telemetry uses the optional peer dependency `@opentelemetry/api` (already present if you use the AI SDK). Register an OpenTelemetry SDK once at startup, then opt in per model:
|
|
1114
722
|
|
|
1115
723
|
```typescript
|
|
1116
|
-
import {
|
|
724
|
+
import { createRetryableModel } from 'ai-retry/language-model';
|
|
1117
725
|
|
|
1118
|
-
const retryableModel =
|
|
726
|
+
const retryableModel = createRetryableModel({
|
|
1119
727
|
model: openai('gpt-4o'),
|
|
1120
728
|
retries: [anthropic('claude-sonnet-4-5')],
|
|
1121
729
|
experimental_telemetry: { isEnabled: true },
|
|
@@ -1150,27 +758,27 @@ ai_retry.doGenerate outcome=success, attempts=2
|
|
|
1150
758
|
|
|
1151
759
|
**Operation span** attributes:
|
|
1152
760
|
|
|
1153
|
-
| Attribute
|
|
1154
|
-
|
|
|
1155
|
-
| `ai_retry.operation`
|
|
1156
|
-
| `ai_retry.outcome`
|
|
1157
|
-
| `ai_retry.attempts`
|
|
1158
|
-
| `ai_retry.model.start`
|
|
1159
|
-
| `ai_retry.model.final`
|
|
761
|
+
| Attribute | Description |
|
|
762
|
+
| ---------------------------------------------------------------------------- | ---------------------------------------------------------------------------- |
|
|
763
|
+
| `ai_retry.operation` | `doGenerate`, `doStream`, or `doEmbed` |
|
|
764
|
+
| `ai_retry.outcome` | `success` or `failure` |
|
|
765
|
+
| `ai_retry.attempts` | total number of attempts |
|
|
766
|
+
| `ai_retry.model.start` | the model the request started with (`provider/modelId`) |
|
|
767
|
+
| `ai_retry.model.final` | the model that produced the final outcome |
|
|
1160
768
|
| `ai_retry.error.{name,message,status,cause.name,cause.message,cause.status}` | the failing error (on failure); `status` when it carries an HTTP status code |
|
|
1161
|
-
| `ai_retry.function.id`, `ai_retry.metadata.*`
|
|
769
|
+
| `ai_retry.function.id`, `ai_retry.metadata.*` | from the telemetry settings |
|
|
1162
770
|
|
|
1163
771
|
**Attempt span** (`ai_retry.attempt`) attributes:
|
|
1164
772
|
|
|
1165
|
-
| Attribute
|
|
1166
|
-
|
|
|
1167
|
-
| `ai_retry.attempt.number`
|
|
1168
|
-
| `ai_retry.attempt.model`
|
|
1169
|
-
| `ai_retry.attempt.outcome`
|
|
1170
|
-
| `ai_retry.attempt.type`
|
|
1171
|
-
| `ai_retry.attempt.finish_reason`
|
|
1172
|
-
| `ai_retry.attempt.delay_ms`
|
|
1173
|
-
| `ai_retry.attempt.timeout_ms`
|
|
773
|
+
| Attribute | Description |
|
|
774
|
+
| ------------------------------------------------------------------------------------ | ------------------------------------------------------------------------ |
|
|
775
|
+
| `ai_retry.attempt.number` | 1-based attempt index |
|
|
776
|
+
| `ai_retry.attempt.model` | model used (`provider/modelId`) |
|
|
777
|
+
| `ai_retry.attempt.outcome` | `success`, `retry`, or `failure` |
|
|
778
|
+
| `ai_retry.attempt.type` | `result` or `error` |
|
|
779
|
+
| `ai_retry.attempt.finish_reason` | finish reason (result attempts) |
|
|
780
|
+
| `ai_retry.attempt.delay_ms` | backoff scheduled before the next attempt |
|
|
781
|
+
| `ai_retry.attempt.timeout_ms` | timeout budget, when the retry set one |
|
|
1174
782
|
| `ai_retry.attempt.error.{name,message,status,cause.name,cause.message,cause.status}` | the error (error attempts); `status` when it carries an HTTP status code |
|
|
1175
783
|
|
|
1176
784
|
Attempt spans also carry the standard `gen_ai.request.model` / `gen_ai.provider.name` attributes so observability tools (Langfuse, etc.) recognize and render them.
|
|
@@ -1187,10 +795,32 @@ Errors during streaming requests can occur in two ways:
|
|
|
1187
795
|
1. When the stream is initially created (e.g. network error, API error, etc.) by calling `streamText`.
|
|
1188
796
|
2. While the stream is being processed (e.g. timeout, API error, etc.) by reading from the returned `result.textStream` async iterable.
|
|
1189
797
|
|
|
1190
|
-
In the second case, errors during stream processing will not always be retried, because the stream might have already emitted some actual content and the consumer might have processed it. Retrying
|
|
798
|
+
In the second case, errors during stream processing will not always be retried, because the stream might have already emitted some actual content and the consumer might have processed it. Retrying stops as soon as the first content chunk (e.g. `text-delta`, `tool-call`, etc.) is emitted. The chunks considered as content are the same as the ones passed to [`onChunk()`](https://github.com/vercel/ai/blob/1fe4bd4144bff927f5319d9d206e782a73979ccb/packages/ai/src/generate-text/stream-text.ts#L684-L697).
|
|
799
|
+
|
|
800
|
+
Result-based conditions (`finishReason`, `schemaInvalid`, `result(...)`) apply to streams as well: the decision happens when the upstream `finish` part arrives and only fires if no content has been emitted yet, so behavior like `finishReason.unified === 'content-filter'` on an otherwise empty response can still trigger a fallback. Once any content chunk has been forwarded, the stream is committed and result-based retries are skipped.
|
|
1191
801
|
|
|
1192
802
|
> [!IMPORTANT]
|
|
1193
|
-
> **Streaming limitation:**
|
|
803
|
+
> **Streaming limitation:** retries and fallbacks only apply before the first content chunk is emitted. Once streaming begins delivering content, the response is committed to the current model. Mid-stream errors will propagate to the caller rather than triggering a fallback. If reliable retries are critical for your use case, consider using `generateText` instead of `streamText`.
|
|
804
|
+
|
|
805
|
+
### Deprecated: function-style retryables
|
|
806
|
+
|
|
807
|
+
The function-style helpers (`contentFilterTriggered`, `requestTimeout`, `requestNotRetryable`, `retryAfterDelay`, `schemaMismatch`, `serviceOverloaded`, `serviceUnavailable`, `noImageGenerated`) are still exported from `ai-retry/retryables` for backwards compatibility, but they are deprecated in favor of the condition API documented above.
|
|
808
|
+
|
|
809
|
+
> [!NOTE]
|
|
810
|
+
> Full documentation for the deprecated function-style retryables lives in the [earlier README](https://github.com/zirkelc/ai-retry/blob/v1/README.md). New code should use the condition API. See the [migration guide](./MIGRATION.md) to convert existing code.
|
|
811
|
+
|
|
812
|
+
Each function-style retryable has a one-line equivalent in the new shape (imports from `ai-retry/language-model` unless noted):
|
|
813
|
+
|
|
814
|
+
| Function-style (deprecated) | Condition API |
|
|
815
|
+
| ------------------------------------------- | -------------------------------------------------------------------------------------------------------------------- |
|
|
816
|
+
| `contentFilterTriggered(m)` | `finishReason('content-filter').switch({ model: m })` |
|
|
817
|
+
| `requestTimeout(m)` | `timeout().switch({ model: m, timeout: 60_000 })` |
|
|
818
|
+
| `requestNotRetryable(m)` | `error.isRetryable(false).switch({ model: m })` |
|
|
819
|
+
| `schemaMismatch(m)` | `schemaInvalid().switch({ model: m })` |
|
|
820
|
+
| `serviceOverloaded(m)` | `httpStatus(529).switch({ model: m })` |
|
|
821
|
+
| `serviceUnavailable(m)` | `httpStatus(503).switch({ model: m })` |
|
|
822
|
+
| `noImageGenerated(m)` | `noImage().switch({ model: m })` (from `ai-retry/image-model`) |
|
|
823
|
+
| `retryAfterDelay({ delay, backoffFactor })` | `error.isRetryable(true).retry({ delay, backoffFactor })` |
|
|
1194
824
|
|
|
1195
825
|
#### Preamble buffering
|
|
1196
826
|
|
|
@@ -1201,13 +831,13 @@ Every stream begins with a non-content preamble (`stream-start`, then optionally
|
|
|
1201
831
|
|
|
1202
832
|
### API Reference
|
|
1203
833
|
|
|
1204
|
-
#### `
|
|
834
|
+
#### `createRetryableModel(options): LanguageModel | EmbeddingModel | ImageModel`
|
|
1205
835
|
|
|
1206
|
-
|
|
836
|
+
Imported from the per-model entry point (`ai-retry/language-model`, `ai-retry/embedding-model`, `ai-retry/image-model`). Each entry returns a model already narrowed to that family.
|
|
1207
837
|
|
|
1208
838
|
```ts
|
|
1209
839
|
interface RetryableModelOptions<
|
|
1210
|
-
MODEL extends
|
|
840
|
+
MODEL extends LanguageModel | EmbeddingModel | ImageModel,
|
|
1211
841
|
> {
|
|
1212
842
|
model: MODEL;
|
|
1213
843
|
retries: Array<Retryable<MODEL> | MODEL>;
|
|
@@ -1225,19 +855,26 @@ interface RetryableModelOptions<
|
|
|
1225
855
|
|
|
1226
856
|
**Options:**
|
|
1227
857
|
|
|
1228
|
-
- `model
|
|
1229
|
-
- `retries
|
|
1230
|
-
- `disabled
|
|
1231
|
-
- `reset
|
|
1232
|
-
- `experimental_telemetry
|
|
1233
|
-
- `onError
|
|
1234
|
-
- `onRetry
|
|
1235
|
-
- `onSuccess
|
|
1236
|
-
- `onFailure
|
|
858
|
+
- `model` — base model used for the initial request.
|
|
859
|
+
- `retries` — array of conditions (`.switch(...)` / `.retry(...)` outputs), models, or retry objects to try on failure.
|
|
860
|
+
- `disabled` — disable all retry logic. `boolean` or `() => boolean`. Default `false`.
|
|
861
|
+
- `reset` — controls when to reset back to the base model after a successful retry. Default `'after-request'`.
|
|
862
|
+
- `experimental_telemetry` — OpenTelemetry instrumentation. See [Telemetry](#telemetry).
|
|
863
|
+
- `onError` — fires when an error occurs.
|
|
864
|
+
- `onRetry` — fires before a retry attempt. May return `OnRetryOverrides` (or a promise of one) to override `options.*` for that attempt only. See [Dynamic call options](#dynamic-call-options).
|
|
865
|
+
- `onSuccess` — fires after a successful request.
|
|
866
|
+
- `onFailure` — fires when the request ultimately fails and no retry recovered it (no condition matched, retries exhausted, or the retry itself failed).
|
|
1237
867
|
|
|
1238
|
-
#### `
|
|
868
|
+
#### `createRetryable(options)` (deprecated)
|
|
869
|
+
|
|
870
|
+
```ts
|
|
871
|
+
import { createRetryable } from 'ai-retry';
|
|
872
|
+
```
|
|
873
|
+
|
|
874
|
+
> [!WARNING]
|
|
875
|
+
> Deprecated. The root `createRetryable` auto-detects the model family at runtime and resolves bare gateway strings as language models only. Prefer `createRetryableModel` from the matching per-model entry point.
|
|
1239
876
|
|
|
1240
|
-
|
|
877
|
+
#### `Reset`
|
|
1241
878
|
|
|
1242
879
|
```ts
|
|
1243
880
|
type Reset =
|
|
@@ -1246,77 +883,53 @@ type Reset =
|
|
|
1246
883
|
| `after-${number}-seconds`;
|
|
1247
884
|
```
|
|
1248
885
|
|
|
1249
|
-
|
|
1250
|
-
- `after-N-requests` — keep the retry model for the next N requests, then reset.
|
|
1251
|
-
- `after-N-seconds` — keep the retry model for N seconds, then reset.
|
|
1252
|
-
|
|
1253
|
-
#### `Retryable`
|
|
1254
|
-
|
|
1255
|
-
A `Retryable` is a function that receives a `RetryContext` with the current error or result and model and all previous attempts.
|
|
1256
|
-
It should evaluate the error/result and decide whether to retry by returning a `Retry` or to skip by returning `undefined`.
|
|
886
|
+
#### `Condition<MODEL>`
|
|
1257
887
|
|
|
1258
888
|
```ts
|
|
1259
|
-
|
|
1260
|
-
|
|
1261
|
-
|
|
1262
|
-
|
|
1263
|
-
|
|
1264
|
-
|
|
1265
|
-
|
|
1266
|
-
```typescript
|
|
1267
|
-
interface Retry {
|
|
1268
|
-
model: LanguageModelV3 | EmbeddingModelV3 | ImageModelV3;
|
|
1269
|
-
maxAttempts?: number; // Maximum retry attempts per model (default: 1)
|
|
1270
|
-
delay?: number; // Delay in milliseconds before retrying
|
|
1271
|
-
backoffFactor?: number; // Multiplier for exponential backoff
|
|
1272
|
-
timeout?: number; // Timeout in milliseconds for the retry attempt
|
|
1273
|
-
providerOptions?: ProviderOptions; // @deprecated - use options.providerOptions instead
|
|
1274
|
-
options?:
|
|
1275
|
-
| LanguageModelV3CallOptions
|
|
1276
|
-
| EmbeddingModelV3CallOptions
|
|
1277
|
-
| ImageModelV3CallOptions; // Call options to override for this retry
|
|
889
|
+
class Condition<MODEL> {
|
|
890
|
+
evaluate(ctx: RetryContext<MODEL>): Promise<boolean>;
|
|
891
|
+
switch(
|
|
892
|
+
target: { model: MODEL } & Omit<Retry<MODEL>, 'model'>,
|
|
893
|
+
): Retryable<MODEL>;
|
|
894
|
+
retry(options?: Omit<Retry<MODEL>, 'model'>): Retryable<MODEL>;
|
|
1278
895
|
}
|
|
1279
896
|
```
|
|
1280
897
|
|
|
1281
|
-
|
|
898
|
+
Conditions are produced by the low-level (`error`, `result`) and high-level (`httpStatus`, `timeout`, `aborted`, `finishReason`, `schemaInvalid`, `noImage`) helpers. They can be composed with the top-level `and(...conditions)` / `or(...conditions)` / `not(condition)` helpers and finalized into a `Retryable` with `.switch()` or `.retry()`.
|
|
1282
899
|
|
|
1283
|
-
|
|
900
|
+
#### `Retryable`
|
|
1284
901
|
|
|
1285
|
-
|
|
1286
|
-
|
|
1287
|
-
|
|
1288
|
-
|
|
1289
|
-
|
|
902
|
+
A `Retryable` is a function that receives a `RetryContext` and returns a `Retry` (to fire) or `undefined` (to skip).
|
|
903
|
+
|
|
904
|
+
```ts
|
|
905
|
+
type Retryable<MODEL> = (
|
|
906
|
+
context: RetryContext<MODEL>,
|
|
907
|
+
) => Retry<MODEL> | Promise<Retry<MODEL> | undefined> | undefined;
|
|
1290
908
|
```
|
|
1291
909
|
|
|
1292
|
-
|
|
910
|
+
The `.switch()` and `.retry()` actions return `Retryable<MODEL>` for you. Hand-written retryables are still supported when the condition helpers aren't a fit.
|
|
1293
911
|
|
|
1294
|
-
|
|
912
|
+
#### `Retry`
|
|
1295
913
|
|
|
1296
|
-
```
|
|
1297
|
-
interface
|
|
1298
|
-
|
|
1299
|
-
|
|
914
|
+
```ts
|
|
915
|
+
interface Retry<MODEL> {
|
|
916
|
+
model: MODEL;
|
|
917
|
+
maxAttempts?: number; // default: 1 for switch, 2 for retry
|
|
918
|
+
delay?: number; // ms before the attempt
|
|
919
|
+
backoffFactor?: number; // exponential multiplier
|
|
920
|
+
timeout?: number; // fresh AbortSignal.timeout() for this attempt
|
|
921
|
+
options?: RetryCallOptions<MODEL>;
|
|
1300
922
|
}
|
|
1301
923
|
```
|
|
1302
924
|
|
|
1303
|
-
|
|
925
|
+
The shape returned by a retryable (and accepted in static `retries: [...]` entries) describing the next attempt.
|
|
1304
926
|
|
|
1305
|
-
|
|
927
|
+
#### `RetryContext`
|
|
1306
928
|
|
|
1307
|
-
```
|
|
1308
|
-
interface
|
|
1309
|
-
|
|
1310
|
-
|
|
1311
|
-
result:
|
|
1312
|
-
| LanguageModelResult
|
|
1313
|
-
| LanguageModelStream
|
|
1314
|
-
| EmbeddingModelEmbed
|
|
1315
|
-
| ImageModelGenerate;
|
|
1316
|
-
options:
|
|
1317
|
-
| LanguageModelV3CallOptions
|
|
1318
|
-
| EmbeddingModelV3CallOptions
|
|
1319
|
-
| ImageModelV3CallOptions;
|
|
929
|
+
```ts
|
|
930
|
+
interface RetryContext<MODEL> {
|
|
931
|
+
current: RetryAttempt<MODEL>;
|
|
932
|
+
attempts: Array<RetryAttempt<MODEL>>;
|
|
1320
933
|
}
|
|
1321
934
|
```
|
|
1322
935
|
|
|
@@ -1334,34 +947,45 @@ interface FailureContext {
|
|
|
1334
947
|
|
|
1335
948
|
#### `RetryAttempt`
|
|
1336
949
|
|
|
1337
|
-
|
|
1338
|
-
|
|
1339
|
-
```typescript
|
|
1340
|
-
// For language, embedding, and image models
|
|
1341
|
-
type RetryAttempt =
|
|
950
|
+
```ts
|
|
951
|
+
type RetryAttempt<MODEL> =
|
|
1342
952
|
| {
|
|
1343
953
|
type: 'error';
|
|
1344
954
|
error: unknown;
|
|
1345
|
-
model:
|
|
1346
|
-
options:
|
|
1347
|
-
| LanguageModelV3CallOptions
|
|
1348
|
-
| EmbeddingModelV3CallOptions
|
|
1349
|
-
| ImageModelV3CallOptions;
|
|
955
|
+
model: MODEL;
|
|
956
|
+
options: CallOptions<MODEL>;
|
|
1350
957
|
}
|
|
1351
958
|
| {
|
|
1352
959
|
type: 'result';
|
|
1353
960
|
result: LanguageModelResult;
|
|
1354
|
-
model:
|
|
1355
|
-
options:
|
|
961
|
+
model: LanguageModel;
|
|
962
|
+
options: LanguageModelCallOptions;
|
|
1356
963
|
};
|
|
1357
964
|
|
|
1358
|
-
// Note: Result-based retries only apply to language models (both generate and stream paths). They do not apply to embedding or image models. For streaming, retries are only possible before any content has been emitted; once a text-delta flows through, the stream is committed.
|
|
1359
|
-
|
|
1360
|
-
// Type guards for discriminating attempts
|
|
1361
965
|
function isErrorAttempt(attempt: RetryAttempt): attempt is RetryErrorAttempt;
|
|
1362
966
|
function isResultAttempt(attempt: RetryAttempt): attempt is RetryResultAttempt;
|
|
1363
967
|
```
|
|
1364
968
|
|
|
969
|
+
Result-based attempts only fire for language models (both generate and stream paths). They do not fire for embedding or image models. For streams, retries are only possible before any content has been emitted; once a content chunk flows through, the stream is committed.
|
|
970
|
+
|
|
971
|
+
`isErrorAttempt` and `isResultAttempt` are re-exported from the package root (`ai-retry`).
|
|
972
|
+
|
|
973
|
+
#### `SuccessContext`
|
|
974
|
+
|
|
975
|
+
```ts
|
|
976
|
+
interface SuccessContext<MODEL> {
|
|
977
|
+
current: {
|
|
978
|
+
type: 'success';
|
|
979
|
+
model: MODEL;
|
|
980
|
+
result: Result<MODEL>;
|
|
981
|
+
options: CallOptions<MODEL>;
|
|
982
|
+
};
|
|
983
|
+
attempts: Array<RetryAttempt<MODEL>>;
|
|
984
|
+
}
|
|
985
|
+
```
|
|
986
|
+
|
|
987
|
+
Passed to the `onSuccess` callback.
|
|
988
|
+
|
|
1365
989
|
### License
|
|
1366
990
|
|
|
1367
991
|
MIT
|