ai-retry 1.10.0 → 2.0.0-beta.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +452 -827
- package/dist/{retryables-CPAbu_M3.mjs → conditions-CfeJD4K4.mjs} +4 -4
- package/dist/{retryables-M5l_6w9k.mjs → conditions-DAetW5_1.mjs} +5 -5
- package/dist/create-retryable-model-CHCZ0fQk.mjs +247 -0
- package/dist/create-retryable-model-CmfSxgGj.mjs +244 -0
- package/dist/create-retryable-model-kbZjWNJ0.mjs +676 -0
- package/dist/embedding-model/conditions/index.d.mts +14 -0
- package/dist/embedding-model/conditions/index.mjs +7 -0
- package/dist/embedding-model/index.d.mts +14 -0
- package/dist/embedding-model/index.mjs +6 -0
- package/dist/{guards-D8UJtxDK.mjs → guards-CKn5dl__.mjs} +9 -4
- package/dist/image-model/conditions/index.d.mts +4 -0
- package/dist/image-model/conditions/index.mjs +4 -0
- package/dist/image-model/index.d.mts +14 -0
- package/dist/image-model/index.mjs +6 -0
- package/dist/{index-DaJrd4dN.d.mts → index-Bo_FxEjD.d.mts} +6 -4
- package/dist/index-ChhH9SEc.d.mts +28 -0
- package/dist/index.d.mts +34 -7
- package/dist/index.mjs +43 -2
- package/dist/language-model/conditions/index.d.mts +4 -0
- package/dist/language-model/conditions/index.mjs +4 -0
- package/dist/language-model/index.d.mts +14 -0
- package/dist/language-model/index.mjs +6 -0
- package/dist/{error-CaTT-xX8.mjs → not-6hBRaJRl.mjs} +69 -38
- package/dist/{error-B-rjhfG_.d.mts → or-MbQSVByG.d.mts} +36 -27
- package/dist/retryables/index.d.mts +54 -18
- package/dist/retryables/index.mjs +50 -14
- package/dist/telemetry-bNsaXZUI.mjs +442 -0
- package/dist/{types-Dik-mH20.d.mts → types-BrvhJykE.d.mts} +33 -20
- package/package.json +20 -20
- package/dist/create-retryable-model-D36IQyOQ.mjs +0 -1564
- package/dist/experimental/embedding-model/index.d.mts +0 -8
- package/dist/experimental/embedding-model/index.mjs +0 -19
- package/dist/experimental/embedding-model/retryables/index.d.mts +0 -20
- package/dist/experimental/embedding-model/retryables/index.mjs +0 -7
- package/dist/experimental/image-model/index.d.mts +0 -8
- package/dist/experimental/image-model/index.mjs +0 -19
- package/dist/experimental/image-model/retryables/index.d.mts +0 -4
- package/dist/experimental/image-model/retryables/index.mjs +0 -4
- package/dist/experimental/language-model/index.d.mts +0 -11
- package/dist/experimental/language-model/index.mjs +0 -19
- package/dist/experimental/language-model/retryables/index.d.mts +0 -4
- package/dist/experimental/language-model/retryables/index.mjs +0 -4
- package/dist/index-ewZ5T6B2.d.mts +0 -34
- /package/dist/{parse-retry-headers-CRxgluhe.mjs → parse-retry-headers-RPSiSNjf.mjs} +0 -0
package/README.md
CHANGED
|
@@ -11,108 +11,102 @@
|
|
|
11
11
|
|
|
12
12
|
Automatically handle API failures, content filtering, timeouts and other errors by switching between different AI models and providers.
|
|
13
13
|
|
|
14
|
-
`ai-retry` wraps
|
|
14
|
+
`ai-retry` wraps a base model with a list of typed retry **conditions**. When a request fails with an error, or the response is not satisfying, it walks the conditions top-down to find a suitable fallback. It tracks which models have been tried and how many attempts have been made to prevent infinite loops.
|
|
15
15
|
|
|
16
|
-
|
|
16
|
+
Two retry shapes are supported:
|
|
17
17
|
|
|
18
|
-
- Error-based
|
|
19
|
-
- Result-based
|
|
18
|
+
- **Error-based**: the model throws (timeouts, rate limits, API errors).
|
|
19
|
+
- **Result-based**: the model returns a successful response that still needs retrying (content filtering, schema mismatch, etc.).
|
|
20
20
|
|
|
21
21
|
### Installation
|
|
22
22
|
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
> [!WARNING]
|
|
23
|
+
> [!NOTE]
|
|
26
24
|
> Version compatibility:
|
|
27
25
|
>
|
|
28
|
-
> -
|
|
29
|
-
> -
|
|
26
|
+
> - `ai-retry@0.x` — AI SDK v5
|
|
27
|
+
> - `ai-retry@1.x` — AI SDK v6
|
|
28
|
+
> - `ai-retry@2.x` (beta) — AI SDK v7
|
|
30
29
|
|
|
31
30
|
```bash
|
|
32
|
-
|
|
33
|
-
npm install ai-retry@0
|
|
34
|
-
|
|
35
|
-
# AI SDK v6
|
|
36
|
-
npm install ai-retry@1
|
|
31
|
+
npm install ai-retry
|
|
37
32
|
```
|
|
38
33
|
|
|
39
34
|
### Usage
|
|
40
35
|
|
|
41
|
-
Create a retryable model by providing a base model and a list of retryables or fallback models.
|
|
42
|
-
When an error occurs, it will evaluate each retryable in order and use the first one that indicates a retry should be attempted with a different model.
|
|
43
|
-
|
|
44
36
|
> [!NOTE]
|
|
45
|
-
>
|
|
37
|
+
> **The condition API is the recommended way to configure retries.** Existing code keeps working:
|
|
38
|
+
>
|
|
39
|
+
> - The root `createRetryable` export and the function-style retryables (`contentFilterTriggered`, `requestTimeout`, …) are **deprecated but still functional**. Prefer `createRetryableModel` from `ai-retry/<family>-model` — it is typed for that family and resolves gateway strings for it.
|
|
40
|
+
> - The previously experimental `ai-retry/experimental/*` import paths were removed; the same API now ships at `ai-retry/<family>-model`.
|
|
41
|
+
>
|
|
42
|
+
> See the [migration guide](./MIGRATION.md) to move existing code to the condition API.
|
|
43
|
+
|
|
44
|
+
Create a retryable model with a base model and a list of conditions plus the action to take when a condition matches.
|
|
46
45
|
|
|
47
46
|
```typescript
|
|
47
|
+
import { anthropic } from '@ai-sdk/anthropic';
|
|
48
48
|
import { openai } from '@ai-sdk/openai';
|
|
49
|
-
import { generateText
|
|
50
|
-
import {
|
|
49
|
+
import { generateText } from 'ai';
|
|
50
|
+
import {
|
|
51
|
+
createRetryableModel,
|
|
52
|
+
error,
|
|
53
|
+
finishReason,
|
|
54
|
+
httpStatus,
|
|
55
|
+
} from 'ai-retry/language-model';
|
|
51
56
|
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
// Base model
|
|
55
|
-
model: openai('gpt-4-mini'),
|
|
57
|
+
const retryableModel = createRetryableModel({
|
|
58
|
+
model: openai('gpt-4o'),
|
|
56
59
|
retries: [
|
|
57
|
-
//
|
|
60
|
+
// Fall back to a different model on HTTP 529 or any "overloaded" message
|
|
61
|
+
httpStatus(529, 'overloaded').switch({
|
|
62
|
+
model: anthropic('claude-sonnet-4-0'),
|
|
63
|
+
}),
|
|
64
|
+
|
|
65
|
+
// Fall back when the response was content-filtered
|
|
66
|
+
finishReason('content-filter').switch({ model: openai('gpt-4o-mini') }),
|
|
67
|
+
|
|
68
|
+
// Retry the same model with exponential backoff on retryable errors
|
|
69
|
+
error.isRetryable(true).retry({ delay: 1_000, backoffFactor: 2 }),
|
|
58
70
|
],
|
|
59
71
|
});
|
|
60
72
|
|
|
61
|
-
// Use like any other AI SDK model
|
|
62
73
|
const result = await generateText({
|
|
63
74
|
model: retryableModel,
|
|
64
75
|
prompt: 'Hello world!',
|
|
65
76
|
});
|
|
66
77
|
|
|
67
78
|
console.log(result.text);
|
|
68
|
-
|
|
69
|
-
// Or with streaming
|
|
70
|
-
const result = streamText({
|
|
71
|
-
model: retryableModel,
|
|
72
|
-
prompt: 'Write a story about a robot...',
|
|
73
|
-
});
|
|
74
|
-
|
|
75
|
-
for await (const chunk of result.textStream) {
|
|
76
|
-
console.log(chunk.text);
|
|
77
|
-
}
|
|
78
79
|
```
|
|
79
80
|
|
|
80
|
-
This also works with embedding models:
|
|
81
|
+
This also works with embedding models and image models, each through their own entry point:
|
|
81
82
|
|
|
82
83
|
```typescript
|
|
83
84
|
import { openai } from '@ai-sdk/openai';
|
|
84
85
|
import { embed } from 'ai';
|
|
85
|
-
import {
|
|
86
|
+
import { createRetryableModel, httpStatus } from 'ai-retry/embedding-model';
|
|
86
87
|
|
|
87
|
-
|
|
88
|
-
const retryableModel = createRetryable({
|
|
89
|
-
// Base model
|
|
88
|
+
const retryableModel = createRetryableModel({
|
|
90
89
|
model: openai.textEmbedding('text-embedding-3-large'),
|
|
91
90
|
retries: [
|
|
92
|
-
|
|
91
|
+
httpStatus(529).switch({
|
|
92
|
+
model: openai.textEmbedding('text-embedding-3-small'),
|
|
93
|
+
}),
|
|
93
94
|
],
|
|
94
95
|
});
|
|
95
96
|
|
|
96
|
-
|
|
97
|
-
const result = await embed({
|
|
98
|
-
model: retryableModel,
|
|
99
|
-
value: 'Hello world!',
|
|
100
|
-
});
|
|
101
|
-
|
|
102
|
-
console.log(result.embedding);
|
|
97
|
+
const result = await embed({ model: retryableModel, value: 'Hello world!' });
|
|
103
98
|
```
|
|
104
99
|
|
|
105
|
-
This also works with image models:
|
|
106
|
-
|
|
107
100
|
```typescript
|
|
101
|
+
import { google } from '@ai-sdk/google';
|
|
108
102
|
import { openai } from '@ai-sdk/openai';
|
|
109
103
|
import { generateImage } from 'ai';
|
|
110
|
-
import {
|
|
104
|
+
import { createRetryableModel, noImage } from 'ai-retry/image-model';
|
|
111
105
|
|
|
112
|
-
const retryableModel =
|
|
106
|
+
const retryableModel = createRetryableModel({
|
|
113
107
|
model: openai.image('dall-e-3'),
|
|
114
108
|
retries: [
|
|
115
|
-
|
|
109
|
+
noImage().switch({ model: google.image('gemini-3-pro-image-preview') }),
|
|
116
110
|
],
|
|
117
111
|
});
|
|
118
112
|
|
|
@@ -120,805 +114,463 @@ const result = await generateImage({
|
|
|
120
114
|
model: retryableModel,
|
|
121
115
|
prompt: 'A sunset over mountains',
|
|
122
116
|
});
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
#### Entry points
|
|
123
120
|
|
|
124
|
-
|
|
121
|
+
Pick the entry point that matches the model you pass to `createRetryableModel`. Each module exposes the helpers that make sense for that model family already typed for it, so no manual type annotations are needed.
|
|
122
|
+
|
|
123
|
+
| Entry point | For models passed to |
|
|
124
|
+
| -------------------------- | -------------------------------------------------------------- |
|
|
125
|
+
| `ai-retry/language-model` | `generateText`, `generateObject`, `streamText`, `streamObject` |
|
|
126
|
+
| `ai-retry/embedding-model` | `embed`, `embedMany` |
|
|
127
|
+
| `ai-retry/image-model` | `generateImage` |
|
|
128
|
+
|
|
129
|
+
```typescript
|
|
130
|
+
import { createRetryableModel } from 'ai-retry/language-model';
|
|
131
|
+
import { createRetryableModel } from 'ai-retry/image-model';
|
|
132
|
+
import { createRetryableModel } from 'ai-retry/embedding-model';
|
|
133
|
+
```
|
|
134
|
+
|
|
135
|
+
Each entry point re-exports `createRetryableModel` plus every condition for that family. The condition helpers can also be imported from the dedicated `/conditions` subpath:
|
|
136
|
+
|
|
137
|
+
```typescript
|
|
138
|
+
import {
|
|
139
|
+
error,
|
|
140
|
+
httpStatus,
|
|
141
|
+
finishReason,
|
|
142
|
+
} from 'ai-retry/language-model/conditions';
|
|
143
|
+
// or
|
|
144
|
+
import * as conditions from 'ai-retry/language-model/conditions';
|
|
125
145
|
```
|
|
126
146
|
|
|
127
147
|
#### Vercel AI Gateway
|
|
128
148
|
|
|
129
|
-
You can
|
|
149
|
+
You can pass a model as a string and it will be resolved through the default `gateway` [provider instance](https://ai-sdk.dev/providers/ai-sdk-providers/ai-gateway#provider-instance) from the AI SDK. Each entry point resolves strings to its own model family, so the string is typed against that family's gateway model ids.
|
|
130
150
|
|
|
131
151
|
```typescript
|
|
132
152
|
import { gateway } from 'ai';
|
|
133
|
-
import {
|
|
153
|
+
import { createRetryableModel } from 'ai-retry/language-model';
|
|
134
154
|
|
|
135
|
-
const retryableModel =
|
|
155
|
+
const retryableModel = createRetryableModel({
|
|
136
156
|
model: 'openai/gpt-5',
|
|
137
157
|
retries: ['anthropic/claude-sonnet-4'],
|
|
138
158
|
});
|
|
139
159
|
|
|
140
160
|
// Is the same as:
|
|
141
|
-
const
|
|
161
|
+
const retryableModel2 = createRetryableModel({
|
|
142
162
|
model: gateway('openai/gpt-5'),
|
|
143
163
|
retries: [gateway('anthropic/claude-sonnet-4')],
|
|
144
164
|
});
|
|
145
165
|
```
|
|
146
166
|
|
|
147
|
-
|
|
167
|
+
Embedding and image entry points accept gateway strings too, resolved against their respective families:
|
|
148
168
|
|
|
149
169
|
```typescript
|
|
150
|
-
import {
|
|
151
|
-
import { createRetryable } from 'ai-retry';
|
|
170
|
+
import { createRetryableModel } from 'ai-retry/embedding-model';
|
|
152
171
|
|
|
153
|
-
const
|
|
154
|
-
model:
|
|
172
|
+
const retryableEmbedding = createRetryableModel({
|
|
173
|
+
model: 'openai/text-embedding-3-large',
|
|
174
|
+
retries: ['openai/text-embedding-3-small'],
|
|
155
175
|
});
|
|
156
176
|
```
|
|
157
177
|
|
|
158
|
-
### Retryables
|
|
159
|
-
|
|
160
|
-
The objects passed to the `retries` are called retryables and control the retry behavior. We can distinguish between two types of retryables:
|
|
161
|
-
|
|
162
|
-
- **Static retryables** are simply models instances (language or embedding) that will always be used when an error occurs. They are also called fallback models.
|
|
163
|
-
- **Dynamic retryables** are functions that receive the current attempt context (error/result and previous attempts) and decide whether to retry with a different model based on custom logic.
|
|
164
|
-
|
|
165
|
-
You can think of the `retries` array as a big `if-else` block, where each dynamic retryable is an `if` branch that can match a certain error/result condition, and static retryables are the `else` branches that match all other conditions. The analogy is not perfect, because the order of retryables matters because `retries` are evaluated in order until one matches:
|
|
166
|
-
|
|
167
178
|
```typescript
|
|
168
|
-
import {
|
|
169
|
-
import { createRetryable } from 'ai-retry';
|
|
170
|
-
|
|
171
|
-
const retryableModel = createRetryable({
|
|
172
|
-
// Base model
|
|
173
|
-
model: openai('gpt-4'),
|
|
174
|
-
// Retryables are evaluated top-down in order
|
|
175
|
-
retries: [
|
|
176
|
-
// Dynamic retryables act like if-branches:
|
|
177
|
-
// If error.code == 429 (too many requests) happens, retry with this model
|
|
178
|
-
(context) => {
|
|
179
|
-
return context.current.error.statusCode === 429
|
|
180
|
-
? { model: azure('gpt-4-mini') } // Retry
|
|
181
|
-
: undefined; // Skip
|
|
182
|
-
},
|
|
183
|
-
|
|
184
|
-
// If error.message ~= "service overloaded", retry with this model
|
|
185
|
-
(context) => {
|
|
186
|
-
return context.current.error.message.includes('service overloaded')
|
|
187
|
-
? { model: azure('gpt-4-mini') } // Retry
|
|
188
|
-
: undefined; // Skip
|
|
189
|
-
},
|
|
179
|
+
import { createRetryableModel } from 'ai-retry/image-model';
|
|
190
180
|
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
// Same as:
|
|
195
|
-
// { model: anthropic('claude-3-haiku-20240307'), maxAttempts: 1 }
|
|
196
|
-
],
|
|
181
|
+
const retryableImage = createRetryableModel({
|
|
182
|
+
model: 'google/imagen-4.0-generate-001',
|
|
183
|
+
retries: ['google/imagen-4.0-fast-generate-001'],
|
|
197
184
|
});
|
|
198
185
|
```
|
|
199
186
|
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
#### Errors vs Results
|
|
187
|
+
### Retries
|
|
203
188
|
|
|
204
|
-
|
|
189
|
+
The `retries` array holds the things `ai-retry` tries, in order, when a request fails or a result needs retrying. There are two kinds:
|
|
205
190
|
|
|
206
|
-
- **
|
|
207
|
-
- **
|
|
191
|
+
- **Fallbacks** are model instances (or gateway strings). They always match and are used as plain fallbacks.
|
|
192
|
+
- **Conditions** are typed predicates produced by helpers like `error()` or `httpStatus()` and finalized with a `.switch()` or `.retry()` action. They only fire when their predicate matches.
|
|
208
193
|
|
|
209
|
-
|
|
194
|
+
You can think of `retries` as a big `if-else` chain — each condition is an `if` branch matching some error/result, and each fallback is an `else` branch matching anything left over. Order matters: the array is evaluated top-down until one matches.
|
|
210
195
|
|
|
211
196
|
```typescript
|
|
212
|
-
import {
|
|
213
|
-
import {
|
|
214
|
-
import
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
// The request threw an error - e.g., network timeout, 429 rate limit
|
|
221
|
-
console.log('Request failed with error:', error);
|
|
222
|
-
return { model: anthropic('claude-3-haiku-20240307') };
|
|
223
|
-
}
|
|
224
|
-
return undefined;
|
|
225
|
-
};
|
|
226
|
-
|
|
227
|
-
// Result-based retryable: handles successful responses that need retrying
|
|
228
|
-
const resultBasedRetry: Retryable = (context) => {
|
|
229
|
-
if (isResultAttempt(context.current)) {
|
|
230
|
-
const { result } = context.current;
|
|
231
|
-
// The request succeeded, but the response indicates a problem
|
|
232
|
-
if (result.finishReason.unified === 'content-filter') {
|
|
233
|
-
console.log('Content was filtered, trying different model');
|
|
234
|
-
return { model: openai('gpt-4') };
|
|
235
|
-
}
|
|
236
|
-
}
|
|
237
|
-
return undefined;
|
|
238
|
-
};
|
|
197
|
+
import { anthropic } from '@ai-sdk/anthropic';
|
|
198
|
+
import { azure } from '@ai-sdk/azure';
|
|
199
|
+
import { openai } from '@ai-sdk/openai';
|
|
200
|
+
import {
|
|
201
|
+
createRetryableModel,
|
|
202
|
+
error,
|
|
203
|
+
httpStatus,
|
|
204
|
+
} from 'ai-retry/language-model';
|
|
239
205
|
|
|
240
|
-
const retryableModel =
|
|
241
|
-
model:
|
|
206
|
+
const retryableModel = createRetryableModel({
|
|
207
|
+
model: openai('gpt-4'),
|
|
242
208
|
retries: [
|
|
243
|
-
//
|
|
244
|
-
|
|
209
|
+
// Condition: match HTTP 429 (rate limit)
|
|
210
|
+
httpStatus(429).switch({ model: azure('gpt-4-mini') }),
|
|
211
|
+
|
|
212
|
+
// Condition: match "overloaded" in the error message
|
|
213
|
+
error.message('overloaded').switch({ model: azure('gpt-4-mini') }),
|
|
245
214
|
|
|
246
|
-
//
|
|
247
|
-
|
|
215
|
+
// Fallback: switch to Anthropic for anything else
|
|
216
|
+
anthropic('claude-3-haiku-20240307'),
|
|
217
|
+
// Same as:
|
|
218
|
+
// { model: anthropic('claude-3-haiku-20240307'), maxAttempts: 1 }
|
|
248
219
|
],
|
|
249
220
|
});
|
|
250
221
|
```
|
|
251
222
|
|
|
252
|
-
Result-based retryables apply to language models for both generate (`generateText`, `generateObject`) and streaming (`streamText`, `streamObject`) calls. For streams, the retry decision happens when the upstream `finish` part arrives and only fires if no content has been emitted yet, so behavior like `finishReason: 'content-filter'` on an otherwise empty response can still trigger a fallback. Once any content chunk has been forwarded, the stream is committed and result-based retries are skipped.
|
|
253
|
-
|
|
254
223
|
#### Fallbacks
|
|
255
224
|
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
> [!NOTE]
|
|
259
|
-
> Use the object syntax `{ model: openai('gpt-4') }` if you need to provide additional options like `maxAttempts`, `delay`, etc.
|
|
225
|
+
A fallback is a plain model instance (or gateway string) in `retries`. It always matches, so it acts as a catch-all: when no earlier condition fired, the next fallback model is tried. Each fallback is attempted once by default; use the object form to pass options like `maxAttempts`.
|
|
260
226
|
|
|
261
227
|
```typescript
|
|
228
|
+
import { anthropic } from '@ai-sdk/anthropic';
|
|
262
229
|
import { openai } from '@ai-sdk/openai';
|
|
263
|
-
import {
|
|
264
|
-
import { createRetryable } from 'ai-retry';
|
|
230
|
+
import { createRetryableModel } from 'ai-retry/language-model';
|
|
265
231
|
|
|
266
|
-
const retryableModel =
|
|
267
|
-
|
|
268
|
-
model: openai('gpt-4-mini'),
|
|
269
|
-
// List of fallback models
|
|
232
|
+
const retryableModel = createRetryableModel({
|
|
233
|
+
model: openai('gpt-4o'),
|
|
270
234
|
retries: [
|
|
271
|
-
openai('gpt-
|
|
272
|
-
//
|
|
273
|
-
// { model: openai('gpt-3.5-turbo'), maxAttempts: 1 },
|
|
235
|
+
openai('gpt-4o-mini'), // first fallback
|
|
236
|
+
anthropic('claude-3-haiku-20240307'), // second fallback
|
|
274
237
|
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
// { model: anthropic('claude-3-haiku-20240307'), maxAttempts: 1 },
|
|
238
|
+
// Object form to pass options:
|
|
239
|
+
{ model: anthropic('claude-3-haiku-20240307'), maxAttempts: 2 },
|
|
278
240
|
],
|
|
279
241
|
});
|
|
280
242
|
```
|
|
281
243
|
|
|
282
|
-
|
|
244
|
+
Fallbacks are tried in order. Once all of them are exhausted, a `RetryError` is thrown (see [All retries failed](#all-retries-failed)).
|
|
283
245
|
|
|
284
|
-
####
|
|
246
|
+
#### Conditions
|
|
285
247
|
|
|
286
|
-
|
|
248
|
+
A `Condition` is a typed predicate over a `RetryContext`. The library ships two **low-level** builders (`error()` and `result()`) plus **high-level** helpers built on top of them. Every condition is finalized with one of two terminal actions, `.switch()` or `.retry()`, which turn it into a retryable.
|
|
287
249
|
|
|
288
|
-
|
|
289
|
-
> You can return additional options like `maxAttempts`, `delay`, etc. along with the model.
|
|
250
|
+
##### Universal conditions
|
|
290
251
|
|
|
291
|
-
|
|
292
|
-
> If you'd like the same flexibility with a typed, composable condition system, see [Experimental: Composable Conditions](#experimental-composable-conditions).
|
|
252
|
+
These are available from all three entry points (`language-model`, `embedding-model`, `image-model`).
|
|
293
253
|
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
// Get the error from the current attempt
|
|
306
|
-
const { error } = context.current;
|
|
307
|
-
|
|
308
|
-
// Check for rate limit error
|
|
309
|
-
if (APICallError.isInstance(error) && error.statusCode === 429) {
|
|
310
|
-
// Retry with a different model
|
|
311
|
-
return { model: anthropic('claude-3-haiku-20240307') };
|
|
312
|
-
}
|
|
313
|
-
}
|
|
254
|
+
| Helper | Kind | Matches when |
|
|
255
|
+
| ------------------------------- | ---------- | ------------------------------------------------------------------------------ |
|
|
256
|
+
| `error(predicate)` | low-level | The current attempt failed and `predicate(err, ctx)` returns true |
|
|
257
|
+
| `error.isRetryable(flag)` | low-level | `APICallError.isRetryable === flag` (default `true`) |
|
|
258
|
+
| `error.statusCode(...patterns)` | low-level | Numbers match the status code exactly; regex matches the stringified code |
|
|
259
|
+
| `error.message(...patterns)` | low-level | Substring (case-insensitive) or regex match against the error message |
|
|
260
|
+
| `error.isTimeout()` | low-level | `Error.name === 'TimeoutError'` (`AbortSignal.timeout()` fired) |
|
|
261
|
+
| `error.isAbort()` | low-level | `Error.name === 'AbortError'` (manual `controller.abort()`) |
|
|
262
|
+
| `httpStatus(...patterns)` | high-level | Numbers match the status code; strings match the message; regex matches either |
|
|
263
|
+
| `timeout()` | high-level | Alias for `error.isTimeout()` |
|
|
264
|
+
| `aborted()` | high-level | Alias for `error.isAbort()` |
|
|
314
265
|
|
|
315
|
-
|
|
316
|
-
return undefined;
|
|
317
|
-
};
|
|
266
|
+
###### `error(predicate)`
|
|
318
267
|
|
|
319
|
-
|
|
320
|
-
// Base model
|
|
321
|
-
model: openai('gpt-4-mini'),
|
|
322
|
-
retries: [
|
|
323
|
-
// Use custom rate limit retryable
|
|
324
|
-
rateLimitRetry,
|
|
325
|
-
|
|
326
|
-
// Other retryables...
|
|
327
|
-
],
|
|
328
|
-
});
|
|
329
|
-
```
|
|
330
|
-
|
|
331
|
-
In this example, if the base model fails with a 429 error, it will retry with `claude-3-haiku-20240307`. For any other error, it will skip to the next retryable (if any) or throw the original error.
|
|
332
|
-
|
|
333
|
-
#### All Retries Failed
|
|
334
|
-
|
|
335
|
-
If all retry attempts failed, a `RetryError` is thrown containing all individual errors.
|
|
336
|
-
If no retry was attempted (e.g. because all retryables returned `undefined`), the original error is thrown directly.
|
|
268
|
+
Takes any predicate over the failed attempt's error. Its namespace bundles the common matchers: `isRetryable` (defaults to `true`), `statusCode` (numbers or regex), `message` (case-insensitive substring or regex), and `isTimeout` / `isAbort` (match `AbortSignal.timeout()` firing vs a manual `controller.abort()`). The pattern matchers accept any number of patterns and match if any matches.
|
|
337
269
|
|
|
338
270
|
```typescript
|
|
339
|
-
import {
|
|
271
|
+
import { APICallError } from 'ai';
|
|
272
|
+
import { error } from 'ai-retry/language-model';
|
|
340
273
|
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
model: azure('gpt-4-mini'),
|
|
344
|
-
retries: [
|
|
345
|
-
// Fallback model 1 = Second attempt
|
|
346
|
-
openai('gpt-3.5-turbo'),
|
|
347
|
-
// Fallback model 2 = Third attempt
|
|
348
|
-
anthropic('claude-3-haiku-20240307'),
|
|
349
|
-
],
|
|
274
|
+
error((e) => APICallError.isInstance(e) && e.statusCode === 418).switch({
|
|
275
|
+
model: fallback,
|
|
350
276
|
});
|
|
351
277
|
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
model: retryableModel,
|
|
355
|
-
prompt: 'Hello world!',
|
|
356
|
-
});
|
|
357
|
-
} catch (error) {
|
|
358
|
-
// RetryError is an official AI SDK error
|
|
359
|
-
if (error instanceof RetryError) {
|
|
360
|
-
console.error('All retry attempts failed:', error.errors);
|
|
361
|
-
} else {
|
|
362
|
-
console.error('Request failed:', error);
|
|
363
|
-
}
|
|
364
|
-
}
|
|
365
|
-
```
|
|
366
|
-
|
|
367
|
-
Errors are tracked per unique model (provider + modelId). That means on the first error, it will retry with `gpt-3.5-turbo`. If that also fails, it will retry with `claude-3-haiku-20240307`. If that fails again, the whole retry process stops and a `RetryError` is thrown.
|
|
368
|
-
|
|
369
|
-
### Built-in Retryables
|
|
278
|
+
error.isRetryable().switch({ model: fallback }); // defaults to true
|
|
279
|
+
error.isRetryable(false).switch({ model: fallback });
|
|
370
280
|
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
> [!TIP]
|
|
374
|
-
> You are missing a retryable for your use case? [Open an issue](https://github.com/zirkelc/ai-retry/issues/new) and let's discuss it!
|
|
375
|
-
|
|
376
|
-
> [!NOTE]
|
|
377
|
-
> Looking for a composable alternative? See [Experimental: Composable Conditions](#experimental-composable-conditions) for a `condition().action()` API that builds on small primitives.
|
|
281
|
+
error.statusCode(503, 529).switch({ model: fallback });
|
|
282
|
+
error.statusCode(/^5\d\d$/).switch({ model: fallback }); // any 5xx
|
|
378
283
|
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
- [`requestNotRetryable`](./src/retryables/request-not-retryable.ts): Request failed with a non-retryable error.
|
|
382
|
-
- [`retryAfterDelay`](./src/retryables/retry-after-delay.ts): Retry with delay and exponential backoff and respect `retry-after` headers.
|
|
383
|
-
- [`serviceOverloaded`](./src/retryables/service-overloaded.ts): Response with status code 529 (service overloaded).
|
|
384
|
-
- [`serviceUnavailable`](./src/retryables/service-unavailable.ts): Response with status code 503 (service unavailable).
|
|
385
|
-
- [`schemaMismatch`](./src/retryables/schema-mismatch.ts): Response JSON doesn't match the expected schema from structured output modes (`Output.object()`, `Output.array()`, `Output.choice()`).
|
|
386
|
-
- [`noImageGenerated`](./src/retryables/no-image-generated.ts): Image generation failed with `NoImageGeneratedError`.
|
|
284
|
+
error.message('overloaded').switch({ model: fallback }); // substring
|
|
285
|
+
error.message(/rate.?limit/i).switch({ model: fallback }); // regex
|
|
387
286
|
|
|
388
|
-
|
|
287
|
+
error.isTimeout().switch({ model: fallback }); // AbortSignal.timeout() fired
|
|
288
|
+
error.isAbort().switch({ model: fallback }); // manual controller.abort()
|
|
289
|
+
```
|
|
389
290
|
|
|
390
|
-
|
|
291
|
+
###### `httpStatus(...patterns)`
|
|
391
292
|
|
|
392
|
-
|
|
393
|
-
> For streaming requests this retryable can only fire if the content filter trips before any content has been emitted. Once a text chunk flows through, the stream is committed and the fallback is skipped.
|
|
293
|
+
Matches an `APICallError` by status code (numbers), message substring (strings), or either (regex). Mix any combination in one call.
|
|
394
294
|
|
|
395
295
|
```typescript
|
|
396
|
-
import {
|
|
296
|
+
import { httpStatus } from 'ai-retry/language-model';
|
|
397
297
|
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
contentFilterTriggered(openai('gpt-4-mini')), // Try OpenAI if Azure filters
|
|
402
|
-
],
|
|
403
|
-
});
|
|
298
|
+
httpStatus(429).switch({ model: fallback }); // status code
|
|
299
|
+
httpStatus(529, 'overloaded').switch({ model: fallback }); // status or message
|
|
300
|
+
httpStatus(/^5\d\d$/).switch({ model: fallback }); // any 5xx
|
|
404
301
|
```
|
|
405
302
|
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
Handle timeouts by switching to potentially faster models.
|
|
303
|
+
###### `timeout()`
|
|
409
304
|
|
|
410
|
-
|
|
411
|
-
> You need to use an `abortSignal` with a timeout on your request.
|
|
412
|
-
|
|
413
|
-
When a request times out, the `requestTimeout` retryable will automatically create a fresh abort signal for the retry attempt. This prevents the retry from immediately failing due to the already-aborted signal from the original request. If you do not provide a `timeout` value, a default of 60 seconds is used for the retry attempt.
|
|
305
|
+
Alias for `error.isTimeout()` — matches `AbortSignal.timeout()` firing (`Error.name === 'TimeoutError'`); pass a fresh `timeout` to the action so the fallback gets its own deadline.
|
|
414
306
|
|
|
415
307
|
```typescript
|
|
416
|
-
import {
|
|
417
|
-
|
|
418
|
-
const retryableModel = createRetryable({
|
|
419
|
-
model: azure('gpt-4'),
|
|
420
|
-
retries: [
|
|
421
|
-
// Defaults to 60 seconds timeout for the retry attempt
|
|
422
|
-
requestTimeout(azure('gpt-4-mini')),
|
|
423
|
-
|
|
424
|
-
// Or specify a custom timeout for the retry attempt
|
|
425
|
-
requestTimeout(azure('gpt-4-mini'), { timeout: 30_000 }),
|
|
426
|
-
],
|
|
427
|
-
});
|
|
308
|
+
import { timeout } from 'ai-retry/language-model';
|
|
428
309
|
|
|
429
|
-
|
|
430
|
-
model: retryableModel,
|
|
431
|
-
prompt: 'Write a vegetarian lasagna recipe for 4 people.',
|
|
432
|
-
abortSignal: AbortSignal.timeout(60_000), // Original request timeout
|
|
433
|
-
});
|
|
310
|
+
timeout().switch({ model: fallback, timeout: 30_000 });
|
|
434
311
|
```
|
|
435
312
|
|
|
436
|
-
|
|
313
|
+
###### `aborted()`
|
|
437
314
|
|
|
438
|
-
|
|
315
|
+
Alias for `error.isAbort()` — matches a manual `controller.abort()` (`Error.name === 'AbortError'`).
|
|
439
316
|
|
|
440
317
|
```typescript
|
|
441
|
-
import {
|
|
318
|
+
import { aborted } from 'ai-retry/language-model';
|
|
442
319
|
|
|
443
|
-
|
|
444
|
-
model: anthropic('claude-sonnet-4-0'),
|
|
445
|
-
retries: [
|
|
446
|
-
// Retry with delay and exponential backoff
|
|
447
|
-
serviceOverloaded(anthropic('claude-sonnet-4-0'), {
|
|
448
|
-
delay: 5_000,
|
|
449
|
-
backoffFactor: 2,
|
|
450
|
-
maxAttempts: 5,
|
|
451
|
-
}),
|
|
452
|
-
// Or switch to a different provider
|
|
453
|
-
serviceOverloaded(openai('gpt-4')),
|
|
454
|
-
],
|
|
455
|
-
});
|
|
456
|
-
|
|
457
|
-
const result = streamText({
|
|
458
|
-
model: retryableModel,
|
|
459
|
-
prompt: 'Write a story about a robot...',
|
|
460
|
-
});
|
|
320
|
+
aborted().switch({ model: fallback });
|
|
461
321
|
```
|
|
462
322
|
|
|
463
|
-
|
|
323
|
+
Each high-level helper is a thin wrapper around the low-level ones. For example, `httpStatus(...)` composes `error.statusCode(...)` with `error.message(...)`, and `timeout()` / `aborted()` are aliases for `error.isTimeout()` / `error.isAbort()`.
|
|
464
324
|
|
|
465
|
-
|
|
325
|
+
##### Language model conditions
|
|
466
326
|
|
|
467
|
-
|
|
468
|
-
import { serviceUnavailable } from 'ai-retry/retryables';
|
|
327
|
+
Only available from `ai-retry/language-model`. Result-based conditions inspect a successful response (see [Streaming](#streaming) for how they behave on streams).
|
|
469
328
|
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
```
|
|
329
|
+
| Helper | Kind | Matches when |
|
|
330
|
+
| --------------------------------- | ---------- | --------------------------------------------------------------------- |
|
|
331
|
+
| `result(predicate)` | low-level | The current attempt succeeded and `predicate(res, ctx)` returns true |
|
|
332
|
+
| `result.finishReason(...reasons)` | low-level | The result's `finishReason.unified` matches one of the given values |
|
|
333
|
+
| `finishReason(...reasons)` | high-level | Same as `result.finishReason` (re-exported for convenience) |
|
|
334
|
+
| `schemaInvalid()` | high-level | The result text fails JSON-schema validation against `responseFormat` |
|
|
477
335
|
|
|
478
|
-
|
|
336
|
+
###### `result(predicate)`
|
|
479
337
|
|
|
480
|
-
|
|
338
|
+
Takes any predicate over the successful result. `result.finishReason(...reasons)` and the re-exported `finishReason(...reasons)` match the result's unified finish reason against one or more values.
|
|
481
339
|
|
|
482
340
|
```typescript
|
|
483
|
-
import {
|
|
484
|
-
import { google } from '@ai-sdk/google';
|
|
485
|
-
import { generateImage } from 'ai';
|
|
486
|
-
import { createRetryable } from 'ai-retry';
|
|
487
|
-
import { noImageGenerated } from 'ai-retry/retryables';
|
|
341
|
+
import { finishReason, result } from 'ai-retry/language-model';
|
|
488
342
|
|
|
489
|
-
|
|
490
|
-
model: openai.image('dall-e-3'),
|
|
491
|
-
retries: [
|
|
492
|
-
noImageGenerated(google.image('gemini-3-pro-image-preview')), // Switch to Gemini if DALL-E fails to generate an image
|
|
493
|
-
],
|
|
494
|
-
});
|
|
343
|
+
result((res) => res.usage.outputTokens.total === 0).switch({ model: fallback });
|
|
495
344
|
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
prompt: 'A sunset over mountains',
|
|
499
|
-
});
|
|
345
|
+
finishReason('content-filter').switch({ model: fallback });
|
|
346
|
+
finishReason('length', 'content-filter').retry({ maxAttempts: 3 });
|
|
500
347
|
```
|
|
501
348
|
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
Handle cases where the base model fails with a non-retryable error.
|
|
349
|
+
###### `schemaInvalid()`
|
|
505
350
|
|
|
506
|
-
|
|
507
|
-
> You can check if an error is retryable with the `isRetryable` property on an [`APICallError`](https://ai-sdk.dev/docs/reference/ai-sdk-errors/ai-api-call-error#ai_apicallerror).
|
|
351
|
+
Matches when the result text fails JSON-schema validation against the call's `responseFormat` (set automatically by `Output.object()`).
|
|
508
352
|
|
|
509
353
|
```typescript
|
|
510
|
-
import {
|
|
354
|
+
import { schemaInvalid } from 'ai-retry/language-model';
|
|
511
355
|
|
|
512
|
-
|
|
513
|
-
model: azure('gpt-4-mini'),
|
|
514
|
-
retries: [
|
|
515
|
-
requestNotRetryable(openai('gpt-4')), // Switch provider if error is not retryable
|
|
516
|
-
],
|
|
517
|
-
});
|
|
356
|
+
schemaInvalid().switch({ model: fallback });
|
|
518
357
|
```
|
|
519
358
|
|
|
520
|
-
|
|
359
|
+
##### Image model conditions
|
|
521
360
|
|
|
522
|
-
|
|
523
|
-
The delay and exponential backoff can be configured. If the response contains a [`retry-after`](https://developer.mozilla.org/en-US/docs/Web/HTTP/Reference/Headers/Retry-After) header, it will be prioritized over the configured delay.
|
|
361
|
+
Only available from `ai-retry/image-model`.
|
|
524
362
|
|
|
525
|
-
|
|
363
|
+
| Helper | Kind | Matches when |
|
|
364
|
+
| ----------- | ---------- | --------------------------------------------- |
|
|
365
|
+
| `noImage()` | high-level | The image model threw `NoImageGeneratedError` |
|
|
526
366
|
|
|
527
|
-
|
|
528
|
-
import { retryAfterDelay } from 'ai-retry/retryables';
|
|
367
|
+
###### `noImage()`
|
|
529
368
|
|
|
530
|
-
|
|
531
|
-
model: openai('gpt-4'), // Base model
|
|
532
|
-
retries: [
|
|
533
|
-
// Retry base model 3 times with fixed 2s delay
|
|
534
|
-
retryAfterDelay({ delay: 2_000, maxAttempts: 3 }),
|
|
369
|
+
Matches when the image model threw `NoImageGeneratedError`.
|
|
535
370
|
|
|
536
|
-
|
|
537
|
-
|
|
371
|
+
```typescript
|
|
372
|
+
import { noImage } from 'ai-retry/image-model';
|
|
538
373
|
|
|
539
|
-
|
|
540
|
-
retryAfterDelay({ maxAttempts: 3 }),
|
|
541
|
-
],
|
|
542
|
-
});
|
|
374
|
+
noImage().switch({ model: fallback });
|
|
543
375
|
```
|
|
544
376
|
|
|
545
|
-
|
|
377
|
+
##### Embedding model conditions
|
|
546
378
|
|
|
547
|
-
|
|
379
|
+
> [!NOTE]
|
|
380
|
+
> The `embedding-model` entry point exposes only the universal conditions — there are no embedding-specific result conditions.
|
|
548
381
|
|
|
549
|
-
|
|
382
|
+
#### Actions
|
|
550
383
|
|
|
551
|
-
|
|
552
|
-
Normally, schema validation happens outside the model in `generateText`, so a schema validation error would not be seen by the retryable model. This retryable catches it early and retries with a fallback model.
|
|
384
|
+
Every condition exposes two terminal actions that turn it into a retryable:
|
|
553
385
|
|
|
554
|
-
|
|
555
|
-
|
|
386
|
+
- **`.switch({ model, ...options })`** falls back to a different model when the condition matches. Optional fields (`maxAttempts`, `delay`, `backoffFactor`, `timeout`, `options`) are the same as on a normal `Retry` object. `maxAttempts` defaults to `1`.
|
|
387
|
+
- **`.retry({ delay?, backoffFactor?, maxAttempts?, ... })`** retries the **current** model when the condition matches. Honors `Retry-After` and `Retry-After-Ms` response headers, capped at 60 seconds. `maxAttempts` defaults to `2` (one original attempt + one retry); values below `2` throw, since the retry budget is consumed by the original failure.
|
|
556
388
|
|
|
557
389
|
```typescript
|
|
558
|
-
import {
|
|
559
|
-
import { anthropic } from '@ai-sdk/anthropic';
|
|
560
|
-
import { generateText, Output } from 'ai';
|
|
561
|
-
import { createRetryable } from 'ai-retry';
|
|
562
|
-
import { schemaMismatch } from 'ai-retry/retryables';
|
|
563
|
-
import { z } from 'zod';
|
|
564
|
-
|
|
565
|
-
const retryableModel = createRetryable({
|
|
566
|
-
model: openai('gpt-4-mini'), // Weak base model
|
|
567
|
-
retries: [
|
|
568
|
-
// Retry with stronger model on schema mismatch
|
|
569
|
-
schemaMismatch(openai('gpt-5')),
|
|
570
|
-
],
|
|
571
|
-
});
|
|
390
|
+
import { error, timeout } from 'ai-retry/language-model';
|
|
572
391
|
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
output: Output.object({
|
|
576
|
-
schema: z.object({
|
|
577
|
-
name: z.string(),
|
|
578
|
-
age: z.number(),
|
|
579
|
-
}),
|
|
580
|
-
}),
|
|
581
|
-
prompt: 'Generate a person with name and age.',
|
|
582
|
-
});
|
|
392
|
+
// Switch on a timeout, with a fresh timeout for the fallback
|
|
393
|
+
timeout().switch({ model: fallback, timeout: 30_000 });
|
|
583
394
|
|
|
584
|
-
|
|
395
|
+
// Retry the current model with exponential backoff, max 3 attempts
|
|
396
|
+
error
|
|
397
|
+
.isRetryable(true)
|
|
398
|
+
.retry({ delay: 1_000, backoffFactor: 2, maxAttempts: 3 });
|
|
585
399
|
```
|
|
586
400
|
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
> [!WARNING]
|
|
590
|
-
> This API is experimental and may change. It is not exported from the package root; opt in via one of the per-model deep imports:
|
|
591
|
-
>
|
|
592
|
-
> ```ts
|
|
593
|
-
> import { ... } from 'ai-retry/experimental/language-model';
|
|
594
|
-
> import { ... } from 'ai-retry/experimental/image-model';
|
|
595
|
-
> import { ... } from 'ai-retry/experimental/embedding-model';
|
|
596
|
-
> ```
|
|
597
|
-
>
|
|
598
|
-
> Each entry point also re-exports `createRetryable` already typed for that model family, so you can either import everything from one path:
|
|
599
|
-
>
|
|
600
|
-
> ```ts
|
|
601
|
-
> import {
|
|
602
|
-
> createRetryable,
|
|
603
|
-
> error,
|
|
604
|
-
> httpStatus,
|
|
605
|
-
> } from 'ai-retry/experimental/language-model';
|
|
606
|
-
> ```
|
|
607
|
-
>
|
|
608
|
-
> or pull retryables from the dedicated `/retryables` subpath:
|
|
609
|
-
>
|
|
610
|
-
> ```ts
|
|
611
|
-
> import {
|
|
612
|
-
> error,
|
|
613
|
-
> httpStatus,
|
|
614
|
-
> } from 'ai-retry/experimental/language-model/retryables';
|
|
615
|
-
> // or
|
|
616
|
-
> import * as retryables from 'ai-retry/experimental/language-model/retryables';
|
|
617
|
-
> ```
|
|
401
|
+
#### Combinators
|
|
618
402
|
|
|
619
|
-
|
|
403
|
+
Compose conditions with the top-level `or()`, `and()`, `not()` helpers. Because each entry point is typed for a single model family, they infer the family from their arguments — no type annotations or casts needed. `or()` and `and()` are variadic.
|
|
620
404
|
|
|
621
405
|
```typescript
|
|
622
|
-
import {
|
|
623
|
-
import { openai } from '@ai-sdk/openai';
|
|
624
|
-
import { generateText } from 'ai';
|
|
625
|
-
import {
|
|
626
|
-
createRetryable,
|
|
627
|
-
error,
|
|
628
|
-
finishReason,
|
|
629
|
-
httpStatus,
|
|
630
|
-
} from 'ai-retry/experimental/language-model';
|
|
631
|
-
|
|
632
|
-
const retryableModel = createRetryable({
|
|
633
|
-
model: openai('gpt-4'),
|
|
634
|
-
retries: [
|
|
635
|
-
// Switch on 529 or any "overloaded" message
|
|
636
|
-
httpStatus(529, 'overloaded').switch({
|
|
637
|
-
model: anthropic('claude-3-haiku-20240307'),
|
|
638
|
-
}),
|
|
639
|
-
|
|
640
|
-
// Switch when the response was content-filtered
|
|
641
|
-
finishReason('content-filter').switch({ model: openai('gpt-4o') }),
|
|
406
|
+
import { and, error, httpStatus, not, or } from 'ai-retry/language-model';
|
|
642
407
|
|
|
643
|
-
|
|
644
|
-
|
|
645
|
-
|
|
646
|
-
});
|
|
408
|
+
or(httpStatus(429), error.message('overloaded')).switch({ model: fallback });
|
|
409
|
+
and(httpStatus(503), error.message('temporary')).switch({ model: fallback });
|
|
410
|
+
not(error.isRetryable(true)).switch({ model: fallback });
|
|
647
411
|
```
|
|
648
412
|
|
|
649
|
-
####
|
|
413
|
+
#### Custom predicates
|
|
650
414
|
|
|
651
|
-
|
|
652
|
-
|
|
653
|
-
#### Low-level conditions
|
|
654
|
-
|
|
655
|
-
The primitive builders `error(...)` and `result(...)` take a predicate and turn it into a condition; their namespaces bundle the most common field matchers on top.
|
|
656
|
-
|
|
657
|
-
| Helper | Matches when | Available in |
|
|
658
|
-
| --------------------------------- | ------------------------------------------------------------------------------------ | ---------------------- |
|
|
659
|
-
| `error(predicate)` | The current attempt failed and `predicate(err, ctx)` returns true | all three entry points |
|
|
660
|
-
| `error.isRetryable(flag)` | `APICallError.isRetryable === flag` (default `true`) | all three entry points |
|
|
661
|
-
| `error.statusCode(...patterns)` | Numbers match exactly; regex matches the stringified code (e.g. `/^5\d\d$/` for 5xx) | all three entry points |
|
|
662
|
-
| `error.message(...patterns)` | Substring (case-insensitive) or regex match against the error message | all three entry points |
|
|
663
|
-
| `result(predicate)` | The current attempt succeeded and `predicate(res, ctx)` returns true | `language-model` only |
|
|
664
|
-
| `result.finishReason(...reasons)` | The result's `finishReason.unified` matches one of the given values | `language-model` only |
|
|
415
|
+
When the higher-level helpers don't cover the field you need, drop down to `error(predicate)` / `result(predicate)` and inspect whatever is on the error or result. The predicate receives `(err | result, ctx)` and can be `async`; `ctx` is fully typed for the entry point you imported from, so the current attempt, the model, and all previous attempts are available without manual annotations.
|
|
665
416
|
|
|
666
417
|
```typescript
|
|
418
|
+
import { anthropic } from '@ai-sdk/anthropic';
|
|
419
|
+
import { openai } from '@ai-sdk/openai';
|
|
667
420
|
import { APICallError } from 'ai';
|
|
668
|
-
import { error } from 'ai-retry/
|
|
421
|
+
import { createRetryableModel, error } from 'ai-retry/language-model';
|
|
669
422
|
|
|
670
|
-
error
|
|
671
|
-
|
|
423
|
+
// OpenAI-style error code nested at data.error.code. `e` is `unknown`.
|
|
424
|
+
const isContentFilter = (e: unknown) => {
|
|
425
|
+
if (!APICallError.isInstance(e)) return false;
|
|
426
|
+
const data = e.data as { error?: { code?: string } } | undefined;
|
|
427
|
+
return data?.error?.code === 'content_filter';
|
|
428
|
+
};
|
|
429
|
+
|
|
430
|
+
const retryableModel = createRetryableModel({
|
|
431
|
+
model: openai('gpt-4o'),
|
|
432
|
+
retries: [
|
|
433
|
+
error(isContentFilter).switch({
|
|
434
|
+
model: anthropic('claude-3-haiku-20240307'),
|
|
435
|
+
}),
|
|
436
|
+
],
|
|
672
437
|
});
|
|
673
438
|
```
|
|
674
439
|
|
|
675
|
-
|
|
676
|
-
|
|
677
|
-
Convenience matchers built on top of the low-level ones for the common cases. Each returns a condition that you finalize with `.switch(...)` or `.retry(...)`.
|
|
440
|
+
The predicate's second argument is the typed `RetryContext`, so a check like “only retry on the first attempt” is just `(e, ctx) => ctx.attempts.length === 1 && isContentFilter(e)`.
|
|
678
441
|
|
|
679
|
-
|
|
680
|
-
| -------------------------- | :------------: | :---------: | :-------------: |
|
|
681
|
-
| `httpStatus(...patterns)` | ✓ | ✓ | ✓ |
|
|
682
|
-
| `timeout()` | ✓ | ✓ | ✓ |
|
|
683
|
-
| `aborted()` | ✓ | ✓ | ✓ |
|
|
684
|
-
| `finishReason(...reasons)` | ✓ | — | — |
|
|
685
|
-
| `schemaInvalid()` | ✓ | — | — |
|
|
686
|
-
| `noImage()` | — | ✓ | — |
|
|
442
|
+
#### All retries failed
|
|
687
443
|
|
|
688
|
-
|
|
689
|
-
|
|
690
|
-
| Helper | Matches when |
|
|
691
|
-
| -------------------------- | ------------------------------------------------------------------------------------------ |
|
|
692
|
-
| `httpStatus(...patterns)` | Numbers match the status code; strings match the message (substring); regex matches either |
|
|
693
|
-
| `timeout()` | `Error.name === 'TimeoutError'` (`AbortSignal.timeout()` fired) |
|
|
694
|
-
| `aborted()` | `Error.name === 'AbortError'` (manual `controller.abort()`) |
|
|
695
|
-
| `finishReason(...reasons)` | The result's `finishReason.unified` matches one of the given values |
|
|
696
|
-
| `schemaInvalid()` | The result text fails JSON-schema validation against the call's `responseFormat` |
|
|
697
|
-
| `noImage()` | The image model threw `NoImageGeneratedError` |
|
|
698
|
-
|
|
699
|
-
Each high-level helper is a thin wrapper around the low-level ones. For example, `timeout()` is roughly:
|
|
444
|
+
If all retry attempts fail, a `RetryError` is thrown containing all individual errors. If no retry was attempted (every retryable returned `undefined` / didn't match), the original error is re-thrown directly.
|
|
700
445
|
|
|
701
446
|
```typescript
|
|
702
|
-
|
|
703
|
-
return error((err) => err instanceof Error && err.name === 'TimeoutError');
|
|
704
|
-
}
|
|
705
|
-
```
|
|
706
|
-
|
|
707
|
-
and `finishReason(...)` just delegates to `result.finishReason(...)`:
|
|
447
|
+
import { RetryError } from 'ai';
|
|
708
448
|
|
|
709
|
-
|
|
710
|
-
|
|
711
|
-
|
|
449
|
+
try {
|
|
450
|
+
const result = await generateText({
|
|
451
|
+
model: retryableModel,
|
|
452
|
+
prompt: 'Hello!',
|
|
453
|
+
});
|
|
454
|
+
} catch (err) {
|
|
455
|
+
if (err instanceof RetryError) {
|
|
456
|
+
console.error('All retry attempts failed:', err.errors);
|
|
457
|
+
} else {
|
|
458
|
+
console.error('Request failed:', err);
|
|
459
|
+
}
|
|
712
460
|
}
|
|
713
461
|
```
|
|
714
462
|
|
|
715
|
-
|
|
716
|
-
|
|
717
|
-
Every condition exposes two terminal actions that turn it into a `Retryable`:
|
|
718
|
-
|
|
719
|
-
- **`.switch({ model, ...options })`** falls back to a different model when the condition matches. Optional fields (`maxAttempts`, `delay`, `backoffFactor`, `timeout`, `options`) are the same as on a normal `Retry` object. `maxAttempts` defaults to `1`.
|
|
720
|
-
- **`.retry({ delay?, backoffFactor?, maxAttempts?, ... })`** retries the current model when the condition matches. Honors `Retry-After` and `Retry-After-Ms` response headers when present, capped at 60 seconds. `maxAttempts` defaults to `2` (one original attempt + one retry); values below `2` throw, since the retry budget is consumed by the original failure.
|
|
721
|
-
|
|
722
|
-
#### Combinators
|
|
723
|
-
|
|
724
|
-
Compose conditions with `.and`, `.or`, `.not`:
|
|
725
|
-
|
|
726
|
-
```typescript
|
|
727
|
-
import { error, httpStatus } from 'ai-retry/experimental/language-model';
|
|
728
|
-
|
|
729
|
-
httpStatus(429).or(error.message('overloaded'));
|
|
730
|
-
httpStatus(503).and(error.message('temporary'));
|
|
731
|
-
error.isRetryable(true).not();
|
|
732
|
-
```
|
|
733
|
-
|
|
734
|
-
#### Mapping from Built-in retryables
|
|
735
|
-
|
|
736
|
-
Each stable retryable has an equivalent in the new shape (imports from `ai-retry/experimental/language-model` unless noted):
|
|
737
|
-
|
|
738
|
-
| Built-in | Composable form |
|
|
739
|
-
| ------------------------------------------- | ------------------------------------------------------------------------------------------------------------------- |
|
|
740
|
-
| `contentFilterTriggered(m)` | `error(/* check e.data.error.code === 'content_filter' */).or(finishReason('content-filter')).switch({ model: m })` |
|
|
741
|
-
| `requestTimeout(m)` | `timeout().switch({ model: m, timeout: 60_000 })` |
|
|
742
|
-
| `requestNotRetryable(m)` | `error.isRetryable(false).switch({ model: m })` |
|
|
743
|
-
| `schemaMismatch(m)` | `schemaInvalid().switch({ model: m })` |
|
|
744
|
-
| `serviceOverloaded(m)` | `httpStatus(529, 'overloaded').switch({ model: m })` |
|
|
745
|
-
| `serviceUnavailable(m)` | `error.statusCode(503).switch({ model: m })` |
|
|
746
|
-
| `noImageGenerated(m)` | `noImage().switch({ model: m })` (from `image-model`) |
|
|
747
|
-
| `retryAfterDelay({ delay, backoffFactor })` | `error.isRetryable(true).retry({ delay, backoffFactor })` |
|
|
748
|
-
|
|
749
|
-
> [!NOTE]
|
|
750
|
-
> `error.isRetryable(true)` matches whatever the AI SDK's `APICallError` marks retryable. By default that's status codes 408, 409, 429, and any 5xx, plus network errors and provider-specific overrides (e.g. Anthropic flips it on `error.type === 'overloaded_error'`). It picks up more cases than a manual status-code list.
|
|
463
|
+
Errors are tracked per unique model (`provider/modelId`). Once a model has hit its `maxAttempts`, no further retry will land on it.
|
|
751
464
|
|
|
752
465
|
### Options
|
|
753
466
|
|
|
754
|
-
#### Disabling
|
|
755
|
-
|
|
756
|
-
You can disable retries entirely, which is useful for testing or specific environments. When disabled, the base model will execute directly without any retry logic.
|
|
467
|
+
#### Disabling retries
|
|
757
468
|
|
|
758
469
|
```typescript
|
|
759
|
-
const retryableModel =
|
|
760
|
-
model: openai('gpt-4'),
|
|
761
|
-
retries: [
|
|
762
|
-
/* ... */
|
|
763
|
-
],
|
|
764
|
-
disabled: true, // Retries are completely disabled
|
|
765
|
-
});
|
|
766
|
-
|
|
767
|
-
// Or disable based on environment
|
|
768
|
-
const retryableModel = createRetryable({
|
|
769
|
-
model: openai('gpt-4'), // Base model
|
|
770
|
-
retries: [
|
|
771
|
-
/* ... */
|
|
772
|
-
],
|
|
773
|
-
disabled: process.env.NODE_ENV === 'test', // Disable in test environment
|
|
774
|
-
});
|
|
775
|
-
|
|
776
|
-
// Or use a function for dynamic control
|
|
777
|
-
const retryableModel = createRetryable({
|
|
778
|
-
model: openai('gpt-4'), // Base model
|
|
470
|
+
const retryableModel = createRetryableModel({
|
|
471
|
+
model: openai('gpt-4'),
|
|
779
472
|
retries: [
|
|
780
473
|
/* ... */
|
|
781
474
|
],
|
|
782
|
-
disabled:
|
|
475
|
+
disabled: true, // hard off
|
|
476
|
+
// disabled: process.env.NODE_ENV === 'test', // env-based
|
|
477
|
+
// disabled: () => !featureFlags.isEnabled('ai'), // dynamic
|
|
783
478
|
});
|
|
784
479
|
```
|
|
785
480
|
|
|
786
|
-
|
|
481
|
+
When disabled the base model executes directly, no retry logic runs.
|
|
482
|
+
|
|
483
|
+
#### Retry delays
|
|
787
484
|
|
|
788
|
-
|
|
485
|
+
Delays accept exponential backoff and respect the request's abort signal so they can still be cancelled.
|
|
789
486
|
|
|
790
487
|
```typescript
|
|
791
|
-
|
|
488
|
+
import { createRetryableModel } from 'ai-retry/language-model';
|
|
489
|
+
|
|
490
|
+
const retryableModel = createRetryableModel({
|
|
792
491
|
model: openai('gpt-4'),
|
|
793
492
|
retries: [
|
|
794
|
-
// Retry
|
|
493
|
+
// Retry the base model with a fixed 2s delay
|
|
795
494
|
{ model: openai('gpt-4'), delay: 2_000, maxAttempts: 3 },
|
|
796
495
|
|
|
797
|
-
// Or
|
|
496
|
+
// Or with exponential backoff: 2s, 4s, 8s
|
|
798
497
|
{ model: openai('gpt-4'), delay: 2_000, backoffFactor: 2, maxAttempts: 3 },
|
|
799
498
|
],
|
|
800
499
|
});
|
|
801
|
-
|
|
802
|
-
const result = await generateText({
|
|
803
|
-
model: retryableModel,
|
|
804
|
-
prompt: 'Write a vegetarian lasagna recipe for 4 people.',
|
|
805
|
-
// Will be respected during delays
|
|
806
|
-
abortSignal: AbortSignal.timeout(60_000),
|
|
807
|
-
});
|
|
808
500
|
```
|
|
809
501
|
|
|
810
|
-
|
|
811
|
-
|
|
812
|
-
```typescript
|
|
813
|
-
import { serviceOverloaded } from 'ai-retry/retryables';
|
|
814
|
-
|
|
815
|
-
const retryableModel = createRetryable({
|
|
816
|
-
model: openai('gpt-4'),
|
|
817
|
-
retries: [
|
|
818
|
-
// Wait 5 seconds before retrying on service overload
|
|
819
|
-
serviceOverloaded(openai('gpt-4'), { maxAttempts: 3, delay: 5_000 }),
|
|
820
|
-
],
|
|
821
|
-
});
|
|
822
|
-
```
|
|
502
|
+
The same `delay` / `backoffFactor` / `maxAttempts` options are accepted by `.switch({...})` and `.retry({...})`.
|
|
823
503
|
|
|
824
504
|
#### Timeouts
|
|
825
505
|
|
|
826
|
-
When a retry specifies a `timeout
|
|
506
|
+
When a retry specifies a `timeout`, a fresh `AbortSignal.timeout()` is created for that attempt. If the original `abortSignal` is still alive, the fresh deadline is composed with it via `AbortSignal.any()` so user cancellation still works. If the original signal is already aborted (a request-level deadline already fired), it is dropped so the retry runs against the fresh deadline alone.
|
|
827
507
|
|
|
828
|
-
If the original `abortSignal` is already aborted at the time of retry and the
|
|
508
|
+
If the original `abortSignal` is already aborted at the time of retry and the retry does **not** supply a `timeout`, `ai-retry` re-throws the original error rather than firing a misleading retry against the dead signal. `onError` still fires for observability; `onRetry` is skipped. Setting `timeout` is the explicit opt-in for retrying past an aborted signal.
|
|
829
509
|
|
|
830
510
|
```typescript
|
|
831
|
-
|
|
511
|
+
import { createRetryableModel, timeout } from 'ai-retry/language-model';
|
|
512
|
+
|
|
513
|
+
const retryableModel = createRetryableModel({
|
|
832
514
|
model: openai('gpt-4'),
|
|
833
515
|
retries: [
|
|
834
|
-
|
|
835
|
-
{
|
|
836
|
-
model: openai('gpt-3.5-turbo'),
|
|
837
|
-
timeout: 30_000,
|
|
838
|
-
},
|
|
516
|
+
timeout().switch({ model: openai('gpt-3.5-turbo'), timeout: 30_000 }),
|
|
839
517
|
],
|
|
840
518
|
});
|
|
841
519
|
|
|
842
|
-
|
|
843
|
-
const result = await generateText({
|
|
520
|
+
await generateText({
|
|
844
521
|
model: retryableModel,
|
|
845
522
|
prompt: 'Write a story',
|
|
846
|
-
// Original request timeout
|
|
847
523
|
abortSignal: AbortSignal.timeout(60_000),
|
|
848
524
|
});
|
|
849
525
|
```
|
|
850
526
|
|
|
851
|
-
#### Max
|
|
527
|
+
#### Max attempts
|
|
852
528
|
|
|
853
|
-
|
|
529
|
+
Each retryable attempts a model at most once by default. Use `maxAttempts` to allow more. Attempts are counted per unique model, so duplicates across multiple retryables don't get more chances than configured.
|
|
854
530
|
|
|
855
531
|
```typescript
|
|
856
|
-
const retryableModel =
|
|
532
|
+
const retryableModel = createRetryableModel({
|
|
857
533
|
model: openai('gpt-4'),
|
|
858
534
|
retries: [
|
|
859
|
-
//
|
|
860
|
-
|
|
861
|
-
|
|
862
|
-
{ model: openai('gpt-4'), maxAttempts: 2 },
|
|
863
|
-
// Already tried, won't be retried again
|
|
864
|
-
anthropic('claude-3-haiku-20240307'),
|
|
535
|
+
anthropic('claude-3-haiku-20240307'), // 1 attempt
|
|
536
|
+
{ model: openai('gpt-4'), maxAttempts: 2 }, // 1 + 1 retry
|
|
537
|
+
anthropic('claude-3-haiku-20240307'), // already used
|
|
865
538
|
],
|
|
866
539
|
});
|
|
867
540
|
```
|
|
868
541
|
|
|
869
|
-
|
|
870
|
-
|
|
871
|
-
#### Provider Options
|
|
542
|
+
#### Provider options
|
|
872
543
|
|
|
873
|
-
|
|
544
|
+
Override provider-specific options for a retry, completely replacing the original ones.
|
|
874
545
|
|
|
875
546
|
```typescript
|
|
876
|
-
const retryableModel =
|
|
547
|
+
const retryableModel = createRetryableModel({
|
|
877
548
|
model: openai('gpt-5'),
|
|
878
549
|
retries: [
|
|
879
|
-
// Use different provider options for the retry
|
|
880
550
|
{
|
|
881
551
|
model: openai('gpt-4o-2024-08-06'),
|
|
882
552
|
providerOptions: {
|
|
883
|
-
openai: {
|
|
884
|
-
user: 'fallback-user',
|
|
885
|
-
structuredOutputs: false,
|
|
886
|
-
},
|
|
553
|
+
openai: { user: 'fallback-user', structuredOutputs: false },
|
|
887
554
|
},
|
|
888
555
|
},
|
|
889
556
|
],
|
|
890
557
|
});
|
|
891
|
-
|
|
892
|
-
// Original provider options are used for the first attempt
|
|
893
|
-
const result = await generateText({
|
|
894
|
-
model: retryableModel,
|
|
895
|
-
prompt: 'Write a story',
|
|
896
|
-
providerOptions: {
|
|
897
|
-
openai: {
|
|
898
|
-
user: 'primary-user',
|
|
899
|
-
},
|
|
900
|
-
},
|
|
901
|
-
});
|
|
902
558
|
```
|
|
903
559
|
|
|
904
|
-
|
|
560
|
+
#### Call options
|
|
905
561
|
|
|
906
|
-
|
|
907
|
-
|
|
908
|
-
You can override various call options when retrying requests. This is useful for adjusting parameters like temperature, max tokens, or even the prompt itself for retry attempts. Call options are specified in the `options` field of the retry object.
|
|
562
|
+
Override any of the call options for a retry. Useful for things like temperature, max tokens, or the prompt itself.
|
|
909
563
|
|
|
910
564
|
```typescript
|
|
911
|
-
const retryableModel =
|
|
565
|
+
const retryableModel = createRetryableModel({
|
|
912
566
|
model: openai('gpt-4'),
|
|
913
567
|
retries: [
|
|
914
568
|
{
|
|
915
569
|
model: anthropic('claude-3-haiku'),
|
|
916
570
|
options: {
|
|
917
|
-
// Override generation parameters for more deterministic output
|
|
918
571
|
temperature: 0.3,
|
|
919
572
|
topP: 0.9,
|
|
920
573
|
maxOutputTokens: 500,
|
|
921
|
-
// Set a seed for reproducibility
|
|
922
574
|
seed: 42,
|
|
923
575
|
},
|
|
924
576
|
},
|
|
@@ -926,58 +578,54 @@ const retryableModel = createRetryable({
|
|
|
926
578
|
});
|
|
927
579
|
```
|
|
928
580
|
|
|
929
|
-
The following options can be overridden:
|
|
930
|
-
|
|
931
581
|
> [!NOTE]
|
|
932
582
|
> Override options completely replace the original values (they are not merged). If you don't specify an option, the original value from the request is used.
|
|
933
583
|
|
|
934
|
-
##### Language
|
|
935
|
-
|
|
936
|
-
| Option | Description |
|
|
937
|
-
| -------------------------------------------------------------------------------------------------- | ---------------------------------------------- |
|
|
938
|
-
| [`prompt`](https://ai-sdk.dev/docs/reference/ai-sdk-core/generate-text#prompt) | Override the entire prompt for the retry |
|
|
939
|
-
| [`temperature`](https://ai-sdk.dev/docs/reference/ai-sdk-core/generate-text#temperature) | Temperature setting for controlling randomness |
|
|
940
|
-
| [`topP`](https://ai-sdk.dev/docs/reference/ai-sdk-core/generate-text#topp) | Nucleus sampling parameter |
|
|
941
|
-
| [`topK`](https://ai-sdk.dev/docs/reference/ai-sdk-core/generate-text#topk) | Top-K sampling parameter |
|
|
942
|
-
| [`maxOutputTokens`](https://ai-sdk.dev/docs/reference/ai-sdk-core/generate-text#max-output-tokens) | Maximum number of tokens to generate |
|
|
943
|
-
| [`seed`](https://ai-sdk.dev/docs/reference/ai-sdk-core/generate-text#seed) | Random seed for deterministic generation |
|
|
944
|
-
| [`stopSequences`](https://ai-sdk.dev/docs/reference/ai-sdk-types/generate-text#stopsequences) | Stop sequences to end generation |
|
|
945
|
-
| [`presencePenalty`](https://ai-sdk.dev/docs/reference/ai-sdk-core/generate-text#presencepenalty) | Presence penalty for reducing repetition |
|
|
946
|
-
| [`frequencyPenalty`](https://ai-sdk.dev/docs/reference/ai-sdk-core/generate-text#frequencypenalty) | Frequency penalty for reducing repetition |
|
|
947
|
-
| [`headers`](https://ai-sdk.dev/docs/reference/ai-sdk-core/generate-text#headers) | Additional HTTP headers |
|
|
948
|
-
| [`providerOptions`](https://ai-sdk.dev/docs/reference/ai-sdk-types/generate-text#provideroptions) | Provider-specific options |
|
|
584
|
+
##### Language model options
|
|
949
585
|
|
|
950
|
-
|
|
586
|
+
| Option | Description |
|
|
587
|
+
| ------------------ | ---------------------------------------------- |
|
|
588
|
+
| `prompt` | Override the entire prompt for the retry |
|
|
589
|
+
| `temperature` | Temperature setting for controlling randomness |
|
|
590
|
+
| `topP` | Nucleus sampling parameter |
|
|
591
|
+
| `topK` | Top-K sampling parameter |
|
|
592
|
+
| `maxOutputTokens` | Maximum number of tokens to generate |
|
|
593
|
+
| `seed` | Random seed for deterministic generation |
|
|
594
|
+
| `stopSequences` | Stop sequences to end generation |
|
|
595
|
+
| `presencePenalty` | Presence penalty for reducing repetition |
|
|
596
|
+
| `frequencyPenalty` | Frequency penalty for reducing repetition |
|
|
597
|
+
| `headers` | Additional HTTP headers |
|
|
598
|
+
| `providerOptions` | Provider-specific options |
|
|
951
599
|
|
|
952
|
-
|
|
953
|
-
| ---------------------------------------------------------------------------------------- | ---------------------------- |
|
|
954
|
-
| [`values`](https://ai-sdk.dev/docs/reference/ai-sdk-core/embed#values) | Override the values to embed |
|
|
955
|
-
| [`headers`](https://ai-sdk.dev/docs/reference/ai-sdk-core/embed#headers) | Additional HTTP headers |
|
|
956
|
-
| [`providerOptions`](https://ai-sdk.dev/docs/reference/ai-sdk-core/embed#provideroptions) | Provider-specific options |
|
|
600
|
+
##### Embedding model options
|
|
957
601
|
|
|
958
|
-
|
|
602
|
+
| Option | Description |
|
|
603
|
+
| ----------------- | ---------------------------- |
|
|
604
|
+
| `values` | Override the values to embed |
|
|
605
|
+
| `headers` | Additional HTTP headers |
|
|
606
|
+
| `providerOptions` | Provider-specific options |
|
|
959
607
|
|
|
960
|
-
|
|
961
|
-
| ------------------------------------------------------------------------------------------------- | -------------------------------- |
|
|
962
|
-
| [`n`](https://ai-sdk.dev/docs/reference/ai-sdk-core/generate-image#n) | Number of images to generate |
|
|
963
|
-
| [`size`](https://ai-sdk.dev/docs/reference/ai-sdk-core/generate-image#size) | Size of generated images |
|
|
964
|
-
| [`aspectRatio`](https://ai-sdk.dev/docs/reference/ai-sdk-core/generate-image#aspectratio) | Aspect ratio of generated images |
|
|
965
|
-
| [`seed`](https://ai-sdk.dev/docs/reference/ai-sdk-core/generate-image#seed) | Random seed for reproducibility |
|
|
966
|
-
| [`headers`](https://ai-sdk.dev/docs/reference/ai-sdk-core/generate-image#headers) | Additional HTTP headers |
|
|
967
|
-
| [`providerOptions`](https://ai-sdk.dev/docs/reference/ai-sdk-core/generate-image#provideroptions) | Provider-specific options |
|
|
608
|
+
##### Image model options
|
|
968
609
|
|
|
969
|
-
|
|
610
|
+
| Option | Description |
|
|
611
|
+
| ----------------- | -------------------------------- |
|
|
612
|
+
| `n` | Number of images to generate |
|
|
613
|
+
| `size` | Size of generated images |
|
|
614
|
+
| `aspectRatio` | Aspect ratio of generated images |
|
|
615
|
+
| `seed` | Random seed for reproducibility |
|
|
616
|
+
| `headers` | Additional HTTP headers |
|
|
617
|
+
| `providerOptions` | Provider-specific options |
|
|
970
618
|
|
|
971
|
-
|
|
619
|
+
#### Dynamic call options
|
|
972
620
|
|
|
973
|
-
|
|
621
|
+
You can also override call options dynamically from `onRetry`, instead of declaring them statically on the retry object. This is useful when the override depends on something only known at runtime — the prompt that just failed, the model about to be tried, or the error that triggered the retry. The overrides apply to the upcoming attempt only and can change the same fields as the static `options`. The callback can be `async` if computing the override needs to do work (e.g. fetching a fresh credential).
|
|
974
622
|
|
|
975
623
|
```typescript
|
|
976
|
-
import { createRetryable } from 'ai-retry';
|
|
977
624
|
import { azure } from '@ai-sdk/azure';
|
|
978
625
|
import { openai } from '@ai-sdk/openai';
|
|
626
|
+
import { createRetryableModel } from 'ai-retry/language-model';
|
|
979
627
|
|
|
980
|
-
const retryableModel =
|
|
628
|
+
const retryableModel = createRetryableModel({
|
|
981
629
|
model: azure('gpt-5-chat'),
|
|
982
630
|
retries: [openai('gpt-5-chat')],
|
|
983
631
|
onRetry: (context) => {
|
|
@@ -985,33 +633,16 @@ const retryableModel = createRetryable({
|
|
|
985
633
|
const previous = attempts.at(-1);
|
|
986
634
|
|
|
987
635
|
if (current.model.provider !== previous.model.provider) {
|
|
988
|
-
// Strip provider-scoped metadata
|
|
636
|
+
// Strip provider-scoped metadata before retrying on a different provider
|
|
989
637
|
return {
|
|
990
|
-
options: {
|
|
991
|
-
prompt: stripProviderMetadata(current.options.prompt),
|
|
992
|
-
},
|
|
638
|
+
options: { prompt: stripProviderMetadata(current.options.prompt) },
|
|
993
639
|
};
|
|
994
640
|
}
|
|
995
641
|
},
|
|
996
642
|
});
|
|
997
643
|
```
|
|
998
644
|
|
|
999
|
-
Inside
|
|
1000
|
-
|
|
1001
|
-
`onRetry` may also be `async`, which is useful if computing the override needs to do work (e.g. fetching a fresh credential):
|
|
1002
|
-
|
|
1003
|
-
```typescript
|
|
1004
|
-
const retryableModel = createRetryable({
|
|
1005
|
-
model: openai('gpt-4o-mini'),
|
|
1006
|
-
retries: [anthropic('claude-sonnet-4-20250514')],
|
|
1007
|
-
onRetry: async (context) => {
|
|
1008
|
-
const { current } = context;
|
|
1009
|
-
|
|
1010
|
-
const headers = await refreshAuthHeaders(current.model.provider);
|
|
1011
|
-
return { options: { headers } };
|
|
1012
|
-
},
|
|
1013
|
-
});
|
|
1014
|
-
```
|
|
645
|
+
Inside `onRetry`, `context.current.model` is the model about to be tried next; `context.current.options` and `context.current.error` describe the failed attempt that triggered the retry. The previous model is at `context.attempts.at(-1).model`.
|
|
1015
646
|
|
|
1016
647
|
**Precedence** for the upcoming retry attempt (highest to lowest):
|
|
1017
648
|
|
|
@@ -1029,10 +660,10 @@ You can use the following callbacks to log retry attempts and errors:
|
|
|
1029
660
|
- `onFailure` is invoked when the request ultimately fails and no retry could recover it.
|
|
1030
661
|
|
|
1031
662
|
```typescript
|
|
1032
|
-
const retryableModel =
|
|
1033
|
-
model: openai('gpt-
|
|
663
|
+
const retryableModel = createRetryableModel({
|
|
664
|
+
model: openai('gpt-4o-mini'),
|
|
1034
665
|
retries: [
|
|
1035
|
-
/*
|
|
666
|
+
/* ... */
|
|
1036
667
|
],
|
|
1037
668
|
onError: (context) => {
|
|
1038
669
|
console.error(
|
|
@@ -1042,7 +673,7 @@ const retryableModel = createRetryable({
|
|
|
1042
673
|
},
|
|
1043
674
|
onRetry: (context) => {
|
|
1044
675
|
console.log(
|
|
1045
|
-
`Retrying
|
|
676
|
+
`Retrying with ${context.current.model.provider}/${context.current.model.modelId}...`,
|
|
1046
677
|
);
|
|
1047
678
|
},
|
|
1048
679
|
onSuccess: (context) => {
|
|
@@ -1063,7 +694,7 @@ const retryableModel = createRetryable({
|
|
|
1063
694
|
|
|
1064
695
|
#### Reset
|
|
1065
696
|
|
|
1066
|
-
By default, every new request starts with the base model, even if a previous request was retried with a different model. The `reset` option changes this behavior by making the last successfully retried model **sticky
|
|
697
|
+
By default, every new request starts with the base model, even if a previous request was retried with a different model. The `reset` option changes this behavior by making the last successfully retried model **sticky** — subsequent requests will continue using that model until the reset condition fires.
|
|
1067
698
|
|
|
1068
699
|
| Value | Description |
|
|
1069
700
|
| ------------------ | ------------------------------------------------------------ |
|
|
@@ -1071,51 +702,29 @@ By default, every new request starts with the base model, even if a previous req
|
|
|
1071
702
|
| `after-N-requests` | Keep the retry model for the next **N** requests, then reset |
|
|
1072
703
|
| `after-N-seconds` | Keep the retry model for **N** seconds, then reset |
|
|
1073
704
|
|
|
1074
|
-
##### Reset after each request (default)
|
|
1075
|
-
|
|
1076
|
-
```typescript
|
|
1077
|
-
const retryableModel = createRetryable({
|
|
1078
|
-
model: openai('gpt-4o-mini'),
|
|
1079
|
-
retries: [anthropic('claude-sonnet-4-20250514')],
|
|
1080
|
-
reset: 'after-request', // default: always start with the base model
|
|
1081
|
-
});
|
|
1082
|
-
```
|
|
1083
|
-
|
|
1084
|
-
##### Keep the retry model for N requests
|
|
1085
|
-
|
|
1086
|
-
```typescript
|
|
1087
|
-
const retryableModel = createRetryable({
|
|
1088
|
-
model: openai('gpt-4o-mini'),
|
|
1089
|
-
retries: [anthropic('claude-sonnet-4-20250514')],
|
|
1090
|
-
reset: 'after-5-requests', // use the retry model for 5 more requests before resetting
|
|
1091
|
-
});
|
|
1092
|
-
```
|
|
1093
|
-
|
|
1094
|
-
##### Keep the retry model for N seconds
|
|
1095
|
-
|
|
1096
705
|
```typescript
|
|
1097
|
-
const retryableModel =
|
|
706
|
+
const retryableModel = createRetryableModel({
|
|
1098
707
|
model: openai('gpt-4o-mini'),
|
|
1099
708
|
retries: [anthropic('claude-sonnet-4-20250514')],
|
|
1100
|
-
reset: 'after-
|
|
709
|
+
reset: 'after-5-requests',
|
|
1101
710
|
});
|
|
1102
711
|
```
|
|
1103
712
|
|
|
1104
713
|
### Telemetry
|
|
1105
714
|
|
|
1106
715
|
> [!NOTE]
|
|
1107
|
-
> Experimental:
|
|
716
|
+
> Experimental: span names and attributes may change in patch versions.
|
|
1108
717
|
|
|
1109
|
-
`ai-retry` can emit [OpenTelemetry](https://opentelemetry.io/) spans for each request and every retry attempt.
|
|
718
|
+
`ai-retry` can emit [OpenTelemetry](https://opentelemetry.io/) spans for each request and every retry attempt. Spans are created on the active OpenTelemetry context, so they nest automatically under the AI SDK's own spans (e.g. `ai.generateText.doGenerate`) when you also enable `experimental_telemetry` on `generateText` / `streamText`. A single trace then shows the individual attempts — which model each used, why it was retried, and the backoff between them — that the SDK's own span otherwise hides.
|
|
1110
719
|
|
|
1111
720
|
#### Setup
|
|
1112
721
|
|
|
1113
722
|
Telemetry uses the optional peer dependency `@opentelemetry/api` (already present if you use the AI SDK). Register an OpenTelemetry SDK once at startup, then opt in per model:
|
|
1114
723
|
|
|
1115
724
|
```typescript
|
|
1116
|
-
import {
|
|
725
|
+
import { createRetryableModel } from 'ai-retry/language-model';
|
|
1117
726
|
|
|
1118
|
-
const retryableModel =
|
|
727
|
+
const retryableModel = createRetryableModel({
|
|
1119
728
|
model: openai('gpt-4o'),
|
|
1120
729
|
retries: [anthropic('claude-sonnet-4-5')],
|
|
1121
730
|
experimental_telemetry: { isEnabled: true },
|
|
@@ -1150,27 +759,27 @@ ai_retry.doGenerate outcome=success, attempts=2
|
|
|
1150
759
|
|
|
1151
760
|
**Operation span** attributes:
|
|
1152
761
|
|
|
1153
|
-
| Attribute
|
|
1154
|
-
|
|
|
1155
|
-
| `ai_retry.operation`
|
|
1156
|
-
| `ai_retry.outcome`
|
|
1157
|
-
| `ai_retry.attempts`
|
|
1158
|
-
| `ai_retry.model.start`
|
|
1159
|
-
| `ai_retry.model.final`
|
|
762
|
+
| Attribute | Description |
|
|
763
|
+
| ---------------------------------------------------------------------------- | ---------------------------------------------------------------------------- |
|
|
764
|
+
| `ai_retry.operation` | `doGenerate`, `doStream`, or `doEmbed` |
|
|
765
|
+
| `ai_retry.outcome` | `success` or `failure` |
|
|
766
|
+
| `ai_retry.attempts` | total number of attempts |
|
|
767
|
+
| `ai_retry.model.start` | the model the request started with (`provider/modelId`) |
|
|
768
|
+
| `ai_retry.model.final` | the model that produced the final outcome |
|
|
1160
769
|
| `ai_retry.error.{name,message,status,cause.name,cause.message,cause.status}` | the failing error (on failure); `status` when it carries an HTTP status code |
|
|
1161
|
-
| `ai_retry.function.id`, `ai_retry.metadata.*`
|
|
770
|
+
| `ai_retry.function.id`, `ai_retry.metadata.*` | from the telemetry settings |
|
|
1162
771
|
|
|
1163
772
|
**Attempt span** (`ai_retry.attempt`) attributes:
|
|
1164
773
|
|
|
1165
|
-
| Attribute
|
|
1166
|
-
|
|
|
1167
|
-
| `ai_retry.attempt.number`
|
|
1168
|
-
| `ai_retry.attempt.model`
|
|
1169
|
-
| `ai_retry.attempt.outcome`
|
|
1170
|
-
| `ai_retry.attempt.type`
|
|
1171
|
-
| `ai_retry.attempt.finish_reason`
|
|
1172
|
-
| `ai_retry.attempt.delay_ms`
|
|
1173
|
-
| `ai_retry.attempt.timeout_ms`
|
|
774
|
+
| Attribute | Description |
|
|
775
|
+
| ------------------------------------------------------------------------------------ | ------------------------------------------------------------------------ |
|
|
776
|
+
| `ai_retry.attempt.number` | 1-based attempt index |
|
|
777
|
+
| `ai_retry.attempt.model` | model used (`provider/modelId`) |
|
|
778
|
+
| `ai_retry.attempt.outcome` | `success`, `retry`, or `failure` |
|
|
779
|
+
| `ai_retry.attempt.type` | `result` or `error` |
|
|
780
|
+
| `ai_retry.attempt.finish_reason` | finish reason (result attempts) |
|
|
781
|
+
| `ai_retry.attempt.delay_ms` | backoff scheduled before the next attempt |
|
|
782
|
+
| `ai_retry.attempt.timeout_ms` | timeout budget, when the retry set one |
|
|
1174
783
|
| `ai_retry.attempt.error.{name,message,status,cause.name,cause.message,cause.status}` | the error (error attempts); `status` when it carries an HTTP status code |
|
|
1175
784
|
|
|
1176
785
|
Attempt spans also carry the standard `gen_ai.request.model` / `gen_ai.provider.name` attributes so observability tools (Langfuse, etc.) recognize and render them.
|
|
@@ -1187,10 +796,32 @@ Errors during streaming requests can occur in two ways:
|
|
|
1187
796
|
1. When the stream is initially created (e.g. network error, API error, etc.) by calling `streamText`.
|
|
1188
797
|
2. While the stream is being processed (e.g. timeout, API error, etc.) by reading from the returned `result.textStream` async iterable.
|
|
1189
798
|
|
|
1190
|
-
In the second case, errors during stream processing will not always be retried, because the stream might have already emitted some actual content and the consumer might have processed it. Retrying
|
|
799
|
+
In the second case, errors during stream processing will not always be retried, because the stream might have already emitted some actual content and the consumer might have processed it. Retrying stops as soon as the first content chunk (e.g. `text-delta`, `tool-call`, etc.) is emitted. The chunks considered as content are the same as the ones passed to [`onChunk()`](https://github.com/vercel/ai/blob/1fe4bd4144bff927f5319d9d206e782a73979ccb/packages/ai/src/generate-text/stream-text.ts#L684-L697).
|
|
800
|
+
|
|
801
|
+
Result-based conditions (`finishReason`, `schemaInvalid`, `result(...)`) apply to streams as well: the decision happens when the upstream `finish` part arrives and only fires if no content has been emitted yet, so behavior like `finishReason.unified === 'content-filter'` on an otherwise empty response can still trigger a fallback. Once any content chunk has been forwarded, the stream is committed and result-based retries are skipped.
|
|
1191
802
|
|
|
1192
803
|
> [!IMPORTANT]
|
|
1193
|
-
> **Streaming limitation:**
|
|
804
|
+
> **Streaming limitation:** retries and fallbacks only apply before the first content chunk is emitted. Once streaming begins delivering content, the response is committed to the current model. Mid-stream errors will propagate to the caller rather than triggering a fallback. If reliable retries are critical for your use case, consider using `generateText` instead of `streamText`.
|
|
805
|
+
|
|
806
|
+
### Deprecated: function-style retryables
|
|
807
|
+
|
|
808
|
+
The function-style helpers (`contentFilterTriggered`, `requestTimeout`, `requestNotRetryable`, `retryAfterDelay`, `schemaMismatch`, `serviceOverloaded`, `serviceUnavailable`, `noImageGenerated`) are still exported from `ai-retry/retryables` for backwards compatibility, but they are deprecated in favor of the condition API documented above.
|
|
809
|
+
|
|
810
|
+
> [!NOTE]
|
|
811
|
+
> Full documentation for the deprecated function-style retryables lives in the [earlier README](https://github.com/zirkelc/ai-retry/blob/v1/README.md). New code should use the condition API. See the [migration guide](./MIGRATION.md) to convert existing code.
|
|
812
|
+
|
|
813
|
+
Each function-style retryable has a one-line equivalent in the new shape (imports from `ai-retry/language-model` unless noted):
|
|
814
|
+
|
|
815
|
+
| Function-style (deprecated) | Condition API |
|
|
816
|
+
| ------------------------------------------- | -------------------------------------------------------------------------------------------------------------------- |
|
|
817
|
+
| `contentFilterTriggered(m)` | `finishReason('content-filter').switch({ model: m })` |
|
|
818
|
+
| `requestTimeout(m)` | `timeout().switch({ model: m, timeout: 60_000 })` |
|
|
819
|
+
| `requestNotRetryable(m)` | `error.isRetryable(false).switch({ model: m })` |
|
|
820
|
+
| `schemaMismatch(m)` | `schemaInvalid().switch({ model: m })` |
|
|
821
|
+
| `serviceOverloaded(m)` | `httpStatus(529).switch({ model: m })` |
|
|
822
|
+
| `serviceUnavailable(m)` | `httpStatus(503).switch({ model: m })` |
|
|
823
|
+
| `noImageGenerated(m)` | `noImage().switch({ model: m })` (from `ai-retry/image-model`) |
|
|
824
|
+
| `retryAfterDelay({ delay, backoffFactor })` | `error.isRetryable(true).retry({ delay, backoffFactor })` |
|
|
1194
825
|
|
|
1195
826
|
#### Preamble buffering
|
|
1196
827
|
|
|
@@ -1201,13 +832,13 @@ Every stream begins with a non-content preamble (`stream-start`, then optionally
|
|
|
1201
832
|
|
|
1202
833
|
### API Reference
|
|
1203
834
|
|
|
1204
|
-
#### `
|
|
835
|
+
#### `createRetryableModel(options): LanguageModel | EmbeddingModel | ImageModel`
|
|
1205
836
|
|
|
1206
|
-
|
|
837
|
+
Imported from the per-model entry point (`ai-retry/language-model`, `ai-retry/embedding-model`, `ai-retry/image-model`). Each entry returns a model already narrowed to that family.
|
|
1207
838
|
|
|
1208
839
|
```ts
|
|
1209
840
|
interface RetryableModelOptions<
|
|
1210
|
-
MODEL extends
|
|
841
|
+
MODEL extends LanguageModel | EmbeddingModel | ImageModel,
|
|
1211
842
|
> {
|
|
1212
843
|
model: MODEL;
|
|
1213
844
|
retries: Array<Retryable<MODEL> | MODEL>;
|
|
@@ -1225,19 +856,26 @@ interface RetryableModelOptions<
|
|
|
1225
856
|
|
|
1226
857
|
**Options:**
|
|
1227
858
|
|
|
1228
|
-
- `model
|
|
1229
|
-
- `retries
|
|
1230
|
-
- `disabled
|
|
1231
|
-
- `reset
|
|
1232
|
-
- `experimental_telemetry
|
|
1233
|
-
- `onError
|
|
1234
|
-
- `onRetry
|
|
1235
|
-
- `onSuccess
|
|
1236
|
-
- `onFailure
|
|
859
|
+
- `model` — base model used for the initial request.
|
|
860
|
+
- `retries` — array of conditions (`.switch(...)` / `.retry(...)` outputs), models, or retry objects to try on failure.
|
|
861
|
+
- `disabled` — disable all retry logic. `boolean` or `() => boolean`. Default `false`.
|
|
862
|
+
- `reset` — controls when to reset back to the base model after a successful retry. Default `'after-request'`.
|
|
863
|
+
- `experimental_telemetry` — OpenTelemetry instrumentation. See [Telemetry](#telemetry).
|
|
864
|
+
- `onError` — fires when an error occurs.
|
|
865
|
+
- `onRetry` — fires before a retry attempt. May return `OnRetryOverrides` (or a promise of one) to override `options.*` for that attempt only. See [Dynamic call options](#dynamic-call-options).
|
|
866
|
+
- `onSuccess` — fires after a successful request.
|
|
867
|
+
- `onFailure` — fires when the request ultimately fails and no retry recovered it (no condition matched, retries exhausted, or the retry itself failed).
|
|
1237
868
|
|
|
1238
|
-
#### `
|
|
869
|
+
#### `createRetryable(options)` (deprecated)
|
|
870
|
+
|
|
871
|
+
```ts
|
|
872
|
+
import { createRetryable } from 'ai-retry';
|
|
873
|
+
```
|
|
874
|
+
|
|
875
|
+
> [!WARNING]
|
|
876
|
+
> Deprecated. The root `createRetryable` auto-detects the model family at runtime and resolves bare gateway strings as language models only. Prefer `createRetryableModel` from the matching per-model entry point.
|
|
1239
877
|
|
|
1240
|
-
|
|
878
|
+
#### `Reset`
|
|
1241
879
|
|
|
1242
880
|
```ts
|
|
1243
881
|
type Reset =
|
|
@@ -1246,77 +884,53 @@ type Reset =
|
|
|
1246
884
|
| `after-${number}-seconds`;
|
|
1247
885
|
```
|
|
1248
886
|
|
|
1249
|
-
|
|
1250
|
-
- `after-N-requests` — keep the retry model for the next N requests, then reset.
|
|
1251
|
-
- `after-N-seconds` — keep the retry model for N seconds, then reset.
|
|
1252
|
-
|
|
1253
|
-
#### `Retryable`
|
|
1254
|
-
|
|
1255
|
-
A `Retryable` is a function that receives a `RetryContext` with the current error or result and model and all previous attempts.
|
|
1256
|
-
It should evaluate the error/result and decide whether to retry by returning a `Retry` or to skip by returning `undefined`.
|
|
887
|
+
#### `Condition<MODEL>`
|
|
1257
888
|
|
|
1258
889
|
```ts
|
|
1259
|
-
|
|
1260
|
-
|
|
1261
|
-
|
|
1262
|
-
|
|
1263
|
-
|
|
1264
|
-
|
|
1265
|
-
|
|
1266
|
-
```typescript
|
|
1267
|
-
interface Retry {
|
|
1268
|
-
model: LanguageModelV3 | EmbeddingModelV3 | ImageModelV3;
|
|
1269
|
-
maxAttempts?: number; // Maximum retry attempts per model (default: 1)
|
|
1270
|
-
delay?: number; // Delay in milliseconds before retrying
|
|
1271
|
-
backoffFactor?: number; // Multiplier for exponential backoff
|
|
1272
|
-
timeout?: number; // Timeout in milliseconds for the retry attempt
|
|
1273
|
-
providerOptions?: ProviderOptions; // @deprecated - use options.providerOptions instead
|
|
1274
|
-
options?:
|
|
1275
|
-
| LanguageModelV3CallOptions
|
|
1276
|
-
| EmbeddingModelV3CallOptions
|
|
1277
|
-
| ImageModelV3CallOptions; // Call options to override for this retry
|
|
890
|
+
class Condition<MODEL> {
|
|
891
|
+
evaluate(ctx: RetryContext<MODEL>): Promise<boolean>;
|
|
892
|
+
switch(
|
|
893
|
+
target: { model: MODEL } & Omit<Retry<MODEL>, 'model'>,
|
|
894
|
+
): Retryable<MODEL>;
|
|
895
|
+
retry(options?: Omit<Retry<MODEL>, 'model'>): Retryable<MODEL>;
|
|
1278
896
|
}
|
|
1279
897
|
```
|
|
1280
898
|
|
|
1281
|
-
|
|
899
|
+
Conditions are produced by the low-level (`error`, `result`) and high-level (`httpStatus`, `timeout`, `aborted`, `finishReason`, `schemaInvalid`, `noImage`) helpers. They can be composed with the top-level `and(...conditions)` / `or(...conditions)` / `not(condition)` helpers and finalized into a `Retryable` with `.switch()` or `.retry()`.
|
|
1282
900
|
|
|
1283
|
-
|
|
901
|
+
#### `Retryable`
|
|
1284
902
|
|
|
1285
|
-
|
|
1286
|
-
|
|
1287
|
-
|
|
1288
|
-
|
|
1289
|
-
|
|
903
|
+
A `Retryable` is a function that receives a `RetryContext` and returns a `Retry` (to fire) or `undefined` (to skip).
|
|
904
|
+
|
|
905
|
+
```ts
|
|
906
|
+
type Retryable<MODEL> = (
|
|
907
|
+
context: RetryContext<MODEL>,
|
|
908
|
+
) => Retry<MODEL> | Promise<Retry<MODEL> | undefined> | undefined;
|
|
1290
909
|
```
|
|
1291
910
|
|
|
1292
|
-
|
|
911
|
+
The `.switch()` and `.retry()` actions return `Retryable<MODEL>` for you. Hand-written retryables are still supported when the condition helpers aren't a fit.
|
|
1293
912
|
|
|
1294
|
-
|
|
913
|
+
#### `Retry`
|
|
1295
914
|
|
|
1296
|
-
```
|
|
1297
|
-
interface
|
|
1298
|
-
|
|
1299
|
-
|
|
915
|
+
```ts
|
|
916
|
+
interface Retry<MODEL> {
|
|
917
|
+
model: MODEL;
|
|
918
|
+
maxAttempts?: number; // default: 1 for switch, 2 for retry
|
|
919
|
+
delay?: number; // ms before the attempt
|
|
920
|
+
backoffFactor?: number; // exponential multiplier
|
|
921
|
+
timeout?: number; // fresh AbortSignal.timeout() for this attempt
|
|
922
|
+
options?: RetryCallOptions<MODEL>;
|
|
1300
923
|
}
|
|
1301
924
|
```
|
|
1302
925
|
|
|
1303
|
-
|
|
926
|
+
The shape returned by a retryable (and accepted in static `retries: [...]` entries) describing the next attempt.
|
|
1304
927
|
|
|
1305
|
-
|
|
928
|
+
#### `RetryContext`
|
|
1306
929
|
|
|
1307
|
-
```
|
|
1308
|
-
interface
|
|
1309
|
-
|
|
1310
|
-
|
|
1311
|
-
result:
|
|
1312
|
-
| LanguageModelResult
|
|
1313
|
-
| LanguageModelStream
|
|
1314
|
-
| EmbeddingModelEmbed
|
|
1315
|
-
| ImageModelGenerate;
|
|
1316
|
-
options:
|
|
1317
|
-
| LanguageModelV3CallOptions
|
|
1318
|
-
| EmbeddingModelV3CallOptions
|
|
1319
|
-
| ImageModelV3CallOptions;
|
|
930
|
+
```ts
|
|
931
|
+
interface RetryContext<MODEL> {
|
|
932
|
+
current: RetryAttempt<MODEL>;
|
|
933
|
+
attempts: Array<RetryAttempt<MODEL>>;
|
|
1320
934
|
}
|
|
1321
935
|
```
|
|
1322
936
|
|
|
@@ -1334,34 +948,45 @@ interface FailureContext {
|
|
|
1334
948
|
|
|
1335
949
|
#### `RetryAttempt`
|
|
1336
950
|
|
|
1337
|
-
|
|
1338
|
-
|
|
1339
|
-
```typescript
|
|
1340
|
-
// For language, embedding, and image models
|
|
1341
|
-
type RetryAttempt =
|
|
951
|
+
```ts
|
|
952
|
+
type RetryAttempt<MODEL> =
|
|
1342
953
|
| {
|
|
1343
954
|
type: 'error';
|
|
1344
955
|
error: unknown;
|
|
1345
|
-
model:
|
|
1346
|
-
options:
|
|
1347
|
-
| LanguageModelV3CallOptions
|
|
1348
|
-
| EmbeddingModelV3CallOptions
|
|
1349
|
-
| ImageModelV3CallOptions;
|
|
956
|
+
model: MODEL;
|
|
957
|
+
options: CallOptions<MODEL>;
|
|
1350
958
|
}
|
|
1351
959
|
| {
|
|
1352
960
|
type: 'result';
|
|
1353
961
|
result: LanguageModelResult;
|
|
1354
|
-
model:
|
|
1355
|
-
options:
|
|
962
|
+
model: LanguageModel;
|
|
963
|
+
options: LanguageModelCallOptions;
|
|
1356
964
|
};
|
|
1357
965
|
|
|
1358
|
-
// Note: Result-based retries only apply to language models (both generate and stream paths). They do not apply to embedding or image models. For streaming, retries are only possible before any content has been emitted; once a text-delta flows through, the stream is committed.
|
|
1359
|
-
|
|
1360
|
-
// Type guards for discriminating attempts
|
|
1361
966
|
function isErrorAttempt(attempt: RetryAttempt): attempt is RetryErrorAttempt;
|
|
1362
967
|
function isResultAttempt(attempt: RetryAttempt): attempt is RetryResultAttempt;
|
|
1363
968
|
```
|
|
1364
969
|
|
|
970
|
+
Result-based attempts only fire for language models (both generate and stream paths). They do not fire for embedding or image models. For streams, retries are only possible before any content has been emitted; once a content chunk flows through, the stream is committed.
|
|
971
|
+
|
|
972
|
+
`isErrorAttempt` and `isResultAttempt` are re-exported from the package root (`ai-retry`).
|
|
973
|
+
|
|
974
|
+
#### `SuccessContext`
|
|
975
|
+
|
|
976
|
+
```ts
|
|
977
|
+
interface SuccessContext<MODEL> {
|
|
978
|
+
current: {
|
|
979
|
+
type: 'success';
|
|
980
|
+
model: MODEL;
|
|
981
|
+
result: Result<MODEL>;
|
|
982
|
+
options: CallOptions<MODEL>;
|
|
983
|
+
};
|
|
984
|
+
attempts: Array<RetryAttempt<MODEL>>;
|
|
985
|
+
}
|
|
986
|
+
```
|
|
987
|
+
|
|
988
|
+
Passed to the `onSuccess` callback.
|
|
989
|
+
|
|
1365
990
|
### License
|
|
1366
991
|
|
|
1367
992
|
MIT
|