ai-retry 1.4.0 → 1.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +298 -88
- package/dist/index.d.mts +2 -2
- package/dist/index.mjs +104 -57
- package/dist/parse-retry-headers-DIPVbwW5.mjs +26 -0
- package/dist/retryables/experimental/index.d.mts +248 -0
- package/dist/retryables/experimental/index.mjs +310 -0
- package/dist/retryables/index.d.mts +1 -1
- package/dist/retryables/index.mjs +1 -20
- package/dist/{types-CqvBIDad.d.mts → types-pGdkwtOE.d.mts} +24 -3
- package/package.json +38 -23
package/README.md
CHANGED
|
@@ -14,6 +14,7 @@ Automatically handle API failures, content filtering, timeouts and other errors
|
|
|
14
14
|
`ai-retry` wraps the provided base model with a set of retry conditions (retryables). When a request fails with an error or the response is not satisfying, it iterates through the given retryables to find a suitable fallback model. It automatically tracks which models have been tried and how many attempts have been made to prevent infinite loops.
|
|
15
15
|
|
|
16
16
|
It supports two types of retries:
|
|
17
|
+
|
|
17
18
|
- Error-based retries: when the model throws an error (e.g. timeouts, API errors, etc.)
|
|
18
19
|
- Result-based retries: when the model returns a successful response that needs retrying (e.g. content filtering, etc.)
|
|
19
20
|
|
|
@@ -24,7 +25,7 @@ This library supports both AI SDK v5 and v6. The main branch reflects the latest
|
|
|
24
25
|
> [!WARNING]
|
|
25
26
|
> Version compatibility:
|
|
26
27
|
>
|
|
27
|
-
> - Use `ai-retry` version 0.x for AI SDK v5.
|
|
28
|
+
> - Use `ai-retry` version 0.x for AI SDK v5.
|
|
28
29
|
> - Use `ai-retry` version 1.x for AI SDK v6.
|
|
29
30
|
|
|
30
31
|
```bash
|
|
@@ -133,17 +134,13 @@ import { createRetryable } from 'ai-retry';
|
|
|
133
134
|
|
|
134
135
|
const retryableModel = createRetryable({
|
|
135
136
|
model: 'openai/gpt-5',
|
|
136
|
-
retries: [
|
|
137
|
-
'anthropic/claude-sonnet-4'
|
|
138
|
-
]
|
|
137
|
+
retries: ['anthropic/claude-sonnet-4'],
|
|
139
138
|
});
|
|
140
139
|
|
|
141
140
|
// Is the same as:
|
|
142
141
|
const retryableModel = createRetryable({
|
|
143
142
|
model: gateway('openai/gpt-5'),
|
|
144
|
-
retries: [
|
|
145
|
-
gateway('anthropic/claude-sonnet-4')
|
|
146
|
-
]
|
|
143
|
+
retries: [gateway('anthropic/claude-sonnet-4')],
|
|
147
144
|
});
|
|
148
145
|
```
|
|
149
146
|
|
|
@@ -179,16 +176,16 @@ const retryableModel = createRetryable({
|
|
|
179
176
|
// Dynamic retryables act like if-branches:
|
|
180
177
|
// If error.code == 429 (too many requests) happens, retry with this model
|
|
181
178
|
(context) => {
|
|
182
|
-
return context.current.error.statusCode === 429
|
|
183
|
-
? { model: azure('gpt-4-mini') }
|
|
184
|
-
: undefined;
|
|
179
|
+
return context.current.error.statusCode === 429
|
|
180
|
+
? { model: azure('gpt-4-mini') } // Retry
|
|
181
|
+
: undefined; // Skip
|
|
185
182
|
},
|
|
186
183
|
|
|
187
184
|
// If error.message ~= "service overloaded", retry with this model
|
|
188
185
|
(context) => {
|
|
189
|
-
return context.current.error.message.includes(
|
|
190
|
-
? { model: azure('gpt-4-mini') }
|
|
191
|
-
: undefined;
|
|
186
|
+
return context.current.error.message.includes('service overloaded')
|
|
187
|
+
? { model: azure('gpt-4-mini') } // Retry
|
|
188
|
+
: undefined; // Skip
|
|
192
189
|
},
|
|
193
190
|
|
|
194
191
|
// Static retryables act like else branches:
|
|
@@ -245,7 +242,7 @@ const retryableModel = createRetryable({
|
|
|
245
242
|
retries: [
|
|
246
243
|
// Error-based: catches thrown errors like timeouts, rate limits, etc.
|
|
247
244
|
errorBasedRetry,
|
|
248
|
-
|
|
245
|
+
|
|
249
246
|
// Result-based: catches successful responses that need retrying
|
|
250
247
|
resultBasedRetry,
|
|
251
248
|
],
|
|
@@ -258,7 +255,7 @@ Result-based retryables are only available for generate calls like `generateText
|
|
|
258
255
|
|
|
259
256
|
If you don't need precise error matching with custom logic and just want to fallback to different models on any error, you can simply provide a list of models.
|
|
260
257
|
|
|
261
|
-
> [!NOTE]
|
|
258
|
+
> [!NOTE]
|
|
262
259
|
> Use the object syntax `{ model: openai('gpt-4') }` if you need to provide additional options like `maxAttempts`, `delay`, etc.
|
|
263
260
|
|
|
264
261
|
```typescript
|
|
@@ -291,6 +288,9 @@ If you need more control over when to retry and which model to use, you can crea
|
|
|
291
288
|
> [!NOTE]
|
|
292
289
|
> You can return additional options like `maxAttempts`, `delay`, etc. along with the model.
|
|
293
290
|
|
|
291
|
+
> [!TIP]
|
|
292
|
+
> If you'd like the same flexibility with a typed, composable condition system, see [Experimental: Composable Conditions](#experimental-composable-conditions).
|
|
293
|
+
|
|
294
294
|
```typescript
|
|
295
295
|
import { anthropic } from '@ai-sdk/anthropic';
|
|
296
296
|
import { openai } from '@ai-sdk/openai';
|
|
@@ -318,10 +318,10 @@ const rateLimitRetry: Retryable = (context) => {
|
|
|
318
318
|
|
|
319
319
|
const retryableModel = createRetryable({
|
|
320
320
|
// Base model
|
|
321
|
-
model: openai('gpt-4-mini'),
|
|
321
|
+
model: openai('gpt-4-mini'),
|
|
322
322
|
retries: [
|
|
323
323
|
// Use custom rate limit retryable
|
|
324
|
-
rateLimitRetry
|
|
324
|
+
rateLimitRetry,
|
|
325
325
|
|
|
326
326
|
// Other retryables...
|
|
327
327
|
],
|
|
@@ -340,12 +340,12 @@ import { RetryError } from 'ai';
|
|
|
340
340
|
|
|
341
341
|
const retryableModel = createRetryable({
|
|
342
342
|
// Base model = first attempt
|
|
343
|
-
model: azure('gpt-4-mini'),
|
|
343
|
+
model: azure('gpt-4-mini'),
|
|
344
344
|
retries: [
|
|
345
345
|
// Fallback model 1 = Second attempt
|
|
346
|
-
openai('gpt-3.5-turbo'),
|
|
346
|
+
openai('gpt-3.5-turbo'),
|
|
347
347
|
// Fallback model 2 = Third attempt
|
|
348
|
-
anthropic('claude-3-haiku-20240307')
|
|
348
|
+
anthropic('claude-3-haiku-20240307'),
|
|
349
349
|
],
|
|
350
350
|
});
|
|
351
351
|
|
|
@@ -373,6 +373,9 @@ There are several built-in dynamic retryables available for common use cases:
|
|
|
373
373
|
> [!TIP]
|
|
374
374
|
> You are missing a retryable for your use case? [Open an issue](https://github.com/zirkelc/ai-retry/issues/new) and let's discuss it!
|
|
375
375
|
|
|
376
|
+
> [!NOTE]
|
|
377
|
+
> Looking for a composable alternative? See [Experimental: Composable Conditions](#experimental-composable-conditions) for a `condition().action()` API that builds on small primitives.
|
|
378
|
+
|
|
376
379
|
- [`contentFilterTriggered`](./src/retryables/content-filter-triggered.ts): Content filter was triggered based on the prompt or completion.
|
|
377
380
|
- [`requestTimeout`](./src/retryables/request-timeout.ts): Request timeout occurred.
|
|
378
381
|
- [`requestNotRetryable`](./src/retryables/request-not-retryable.ts): Request failed with a non-retryable error.
|
|
@@ -404,8 +407,8 @@ const retryableModel = createRetryable({
|
|
|
404
407
|
|
|
405
408
|
Handle timeouts by switching to potentially faster models.
|
|
406
409
|
|
|
407
|
-
> [!NOTE]
|
|
408
|
-
> You need to use an `abortSignal` with a timeout on your request.
|
|
410
|
+
> [!NOTE]
|
|
411
|
+
> You need to use an `abortSignal` with a timeout on your request.
|
|
409
412
|
|
|
410
413
|
When a request times out, the `requestTimeout` retryable will automatically create a fresh abort signal for the retry attempt. This prevents the retry from immediately failing due to the already-aborted signal from the original request. If you do not provide a `timeout` value, a default of 60 seconds is used for the retry attempt.
|
|
411
414
|
|
|
@@ -416,8 +419,8 @@ const retryableModel = createRetryable({
|
|
|
416
419
|
model: azure('gpt-4'),
|
|
417
420
|
retries: [
|
|
418
421
|
// Defaults to 60 seconds timeout for the retry attempt
|
|
419
|
-
requestTimeout(azure('gpt-4-mini')),
|
|
420
|
-
|
|
422
|
+
requestTimeout(azure('gpt-4-mini')),
|
|
423
|
+
|
|
421
424
|
// Or specify a custom timeout for the retry attempt
|
|
422
425
|
requestTimeout(azure('gpt-4-mini'), { timeout: 30_000 }),
|
|
423
426
|
],
|
|
@@ -500,10 +503,9 @@ const result = await generateImage({
|
|
|
500
503
|
|
|
501
504
|
Handle cases where the base model fails with a non-retryable error.
|
|
502
505
|
|
|
503
|
-
> [!NOTE]
|
|
506
|
+
> [!NOTE]
|
|
504
507
|
> You can check if an error is retryable with the `isRetryable` property on an [`APICallError`](https://ai-sdk.dev/docs/reference/ai-sdk-errors/ai-api-call-error#ai_apicallerror).
|
|
505
508
|
|
|
506
|
-
|
|
507
509
|
```typescript
|
|
508
510
|
import { requestNotRetryable } from 'ai-retry/retryables';
|
|
509
511
|
|
|
@@ -517,7 +519,7 @@ const retryable = createRetryable({
|
|
|
517
519
|
|
|
518
520
|
#### Retry After Delay
|
|
519
521
|
|
|
520
|
-
If an error is retryable, such as 429 (Too Many Requests) or 503 (Service Unavailable) errors, it will be retried after a delay.
|
|
522
|
+
If an error is retryable, such as 429 (Too Many Requests) or 503 (Service Unavailable) errors, it will be retried after a delay.
|
|
521
523
|
The delay and exponential backoff can be configured. If the response contains a [`retry-after`](https://developer.mozilla.org/en-US/docs/Web/HTTP/Reference/Headers/Retry-After) header, it will be prioritized over the configured delay.
|
|
522
524
|
|
|
523
525
|
Note that this retryable does not accept a model parameter, it will always retry the model from the latest failed attempt.
|
|
@@ -546,7 +548,7 @@ By default, if a [`retry-after-ms`](https://learn.microsoft.com/en-us/azure/ai-f
|
|
|
546
548
|
|
|
547
549
|
Automatically retry with a different model when the response JSON doesn't match the expected schema.
|
|
548
550
|
|
|
549
|
-
This is a result-based retryable that validates the model's JSON output against the schema set by structured output modes like `Output.object()`, `Output.array()`, and `Output.choice()`.
|
|
551
|
+
This is a result-based retryable that validates the model's JSON output against the schema set by structured output modes like `Output.object()`, `Output.array()`, and `Output.choice()`.
|
|
550
552
|
Normally, schema validation happens outside the model in `generateText`, so a schema validation error would not be seen by the retryable model. This retryable catches it early and retries with a fallback model.
|
|
551
553
|
|
|
552
554
|
> [!NOTE]
|
|
@@ -582,6 +584,130 @@ const result = await generateText({
|
|
|
582
584
|
console.log(result.object); // { name: "Alice", age: 30 }
|
|
583
585
|
```
|
|
584
586
|
|
|
587
|
+
### Experimental: Composable Conditions
|
|
588
|
+
|
|
589
|
+
> [!WARNING]
|
|
590
|
+
> This API is experimental and may change. It is not exported from the package root; opt in via the deep import:
|
|
591
|
+
>
|
|
592
|
+
> ```ts
|
|
593
|
+
> import { ... } from 'ai-retry/retryables/experimental';
|
|
594
|
+
> ```
|
|
595
|
+
|
|
596
|
+
A `condition().action()` API for retryables. Conditions are built from small primitives (`error(fn)`, `result(fn)`), composed with `and` / `or` / `not`, and turned into a `Retryable` by one of two terminal actions: `.switch({ model })` or `.retry({ delay })`. The result drops into the same `retries: [...]` array as the stable helpers, so you can mix the two styles freely.
|
|
597
|
+
|
|
598
|
+
```typescript
|
|
599
|
+
import { anthropic } from '@ai-sdk/anthropic';
|
|
600
|
+
import { openai } from '@ai-sdk/openai';
|
|
601
|
+
import { generateText } from 'ai';
|
|
602
|
+
import { createRetryable } from 'ai-retry';
|
|
603
|
+
import {
|
|
604
|
+
error,
|
|
605
|
+
finishReason,
|
|
606
|
+
httpStatus,
|
|
607
|
+
} from 'ai-retry/retryables/experimental';
|
|
608
|
+
|
|
609
|
+
const retryableModel = createRetryable({
|
|
610
|
+
model: openai('gpt-4'),
|
|
611
|
+
retries: [
|
|
612
|
+
// Switch on 529 or any "overloaded" message
|
|
613
|
+
httpStatus(529, 'overloaded').switch({
|
|
614
|
+
model: anthropic('claude-3-haiku-20240307'),
|
|
615
|
+
}),
|
|
616
|
+
|
|
617
|
+
// Switch when the response was content-filtered
|
|
618
|
+
finishReason('content-filter').switch({ model: openai('gpt-4o') }),
|
|
619
|
+
|
|
620
|
+
// Retry the same model with exponential backoff on retryable errors
|
|
621
|
+
error.isRetryable(true).retry({ delay: 1_000, backoffFactor: 2 }),
|
|
622
|
+
],
|
|
623
|
+
});
|
|
624
|
+
```
|
|
625
|
+
|
|
626
|
+
#### High-level helpers
|
|
627
|
+
|
|
628
|
+
These cover the common cases. Each returns a `Condition` that you finalize with `.switch(...)` or `.retry(...)`.
|
|
629
|
+
|
|
630
|
+
| Helper | Matches when |
|
|
631
|
+
| ------------------------------ | -------------------------------------------------------------------------------------------------- |
|
|
632
|
+
| `httpStatus(...patterns)` | Numbers match the status code; strings match the message (substring); regex matches either |
|
|
633
|
+
| `timeout()` | `Error.name === 'TimeoutError'` (`AbortSignal.timeout()` fired) |
|
|
634
|
+
| `aborted()` | `Error.name === 'AbortError'` (manual `controller.abort()`) |
|
|
635
|
+
| `noImage()` | The image model threw `NoImageGeneratedError` |
|
|
636
|
+
| `finishReason(...reasons)` | The result's `finishReason.unified` matches one of the given values |
|
|
637
|
+
| `schemaInvalid()` | The result text fails JSON-schema validation against the call's `responseFormat` |
|
|
638
|
+
|
|
639
|
+
#### Actions
|
|
640
|
+
|
|
641
|
+
Every `Condition` exposes two terminal actions that turn it into a `Retryable`:
|
|
642
|
+
|
|
643
|
+
- **`.switch({ model, ...options })`** falls back to a different model when the condition matches. Optional fields (`maxAttempts`, `delay`, `backoffFactor`, `timeout`, `options`) are the same as on a normal `Retry` object.
|
|
644
|
+
- **`.retry({ delay?, backoffFactor?, ... })`** retries the current model when the condition matches. Honors `Retry-After` and `Retry-After-Ms` response headers when present, capped at 60 seconds.
|
|
645
|
+
|
|
646
|
+
#### Combinators
|
|
647
|
+
|
|
648
|
+
Compose conditions with the free functions or the methods on `Condition`:
|
|
649
|
+
|
|
650
|
+
```typescript
|
|
651
|
+
import {
|
|
652
|
+
and,
|
|
653
|
+
error,
|
|
654
|
+
httpStatus,
|
|
655
|
+
not,
|
|
656
|
+
or,
|
|
657
|
+
} from 'ai-retry/retryables/experimental';
|
|
658
|
+
|
|
659
|
+
or(httpStatus(429), error.message('overloaded'));
|
|
660
|
+
and(httpStatus(503), error.message('temporary'));
|
|
661
|
+
not(error.isRetryable(true));
|
|
662
|
+
|
|
663
|
+
// Method form
|
|
664
|
+
httpStatus(429).or(error.message('overloaded'));
|
|
665
|
+
```
|
|
666
|
+
|
|
667
|
+
#### Primitives
|
|
668
|
+
|
|
669
|
+
The two lowest-level builders. Reach for them when no helper covers your case:
|
|
670
|
+
|
|
671
|
+
| Primitive | Matches when |
|
|
672
|
+
| ------------------ | ----------------------------------------------------------------------------- |
|
|
673
|
+
| `error(predicate)` | The current attempt failed and `predicate(err, ctx)` returns true |
|
|
674
|
+
| `result(predicate)`| The current attempt succeeded and `predicate(res, ctx)` returns true (language models only) |
|
|
675
|
+
|
|
676
|
+
```typescript
|
|
677
|
+
import { APICallError } from 'ai';
|
|
678
|
+
import { error } from 'ai-retry/retryables/experimental';
|
|
679
|
+
|
|
680
|
+
error<MODEL, APICallError>(
|
|
681
|
+
(e) => APICallError.isInstance(e) && e.statusCode === 418,
|
|
682
|
+
).switch({ model: fallback });
|
|
683
|
+
```
|
|
684
|
+
|
|
685
|
+
A few common error fields have ready-made matchers on the `error` namespace:
|
|
686
|
+
|
|
687
|
+
| Helper | Matches when |
|
|
688
|
+
| ------------------------------- | ------------------------------------------------------------------------------------- |
|
|
689
|
+
| `error.isRetryable(flag)` | `APICallError.isRetryable === flag` (default `true`) |
|
|
690
|
+
| `error.statusCode(...patterns)` | Numbers match exactly; regex matches the stringified code (e.g. `/^5\d\d$/` for 5xx) |
|
|
691
|
+
| `error.message(...patterns)` | Substring (case-insensitive) or regex match against the error message |
|
|
692
|
+
|
|
693
|
+
#### Mapping from Built-in retryables
|
|
694
|
+
|
|
695
|
+
Each stable retryable has an equivalent in the new shape:
|
|
696
|
+
|
|
697
|
+
| Built-in | Composable form |
|
|
698
|
+
| ----------------------------------------------- | ----------------------------------------------------------------------------------------------------- |
|
|
699
|
+
| `contentFilterTriggered(m)` | `or(error(/* check e.data.error.code === 'content_filter' */), finishReason('content-filter')).switch({ model: m })` |
|
|
700
|
+
| `requestTimeout(m)` | `timeout().switch({ model: m, timeout: 60_000 })` |
|
|
701
|
+
| `requestNotRetryable(m)` | `error.isRetryable(false).switch({ model: m })` |
|
|
702
|
+
| `schemaMismatch(m)` | `schemaInvalid().switch({ model: m })` |
|
|
703
|
+
| `serviceOverloaded(m)` | `httpStatus(529, 'overloaded').switch({ model: m })` |
|
|
704
|
+
| `serviceUnavailable(m)` | `error.statusCode(503).switch({ model: m })` |
|
|
705
|
+
| `noImageGenerated(m)` | `noImage().switch({ model: m })` |
|
|
706
|
+
| `retryAfterDelay({ delay, backoffFactor })` | `error.isRetryable(true).retry({ delay, backoffFactor })` |
|
|
707
|
+
|
|
708
|
+
> [!NOTE]
|
|
709
|
+
> `error.isRetryable(true)` matches whatever the AI SDK's `APICallError` marks retryable. By default that's status codes 408, 409, 429, and any 5xx, plus network errors and provider-specific overrides (e.g. Anthropic flips it on `error.type === 'overloaded_error'`). It picks up more cases than a manual status-code list.
|
|
710
|
+
|
|
585
711
|
### Options
|
|
586
712
|
|
|
587
713
|
#### Disabling Retries
|
|
@@ -591,21 +717,27 @@ You can disable retries entirely, which is useful for testing or specific enviro
|
|
|
591
717
|
```typescript
|
|
592
718
|
const retryableModel = createRetryable({
|
|
593
719
|
model: openai('gpt-4'), // Base model
|
|
594
|
-
retries: [
|
|
720
|
+
retries: [
|
|
721
|
+
/* ... */
|
|
722
|
+
],
|
|
595
723
|
disabled: true, // Retries are completely disabled
|
|
596
724
|
});
|
|
597
725
|
|
|
598
726
|
// Or disable based on environment
|
|
599
727
|
const retryableModel = createRetryable({
|
|
600
728
|
model: openai('gpt-4'), // Base model
|
|
601
|
-
retries: [
|
|
729
|
+
retries: [
|
|
730
|
+
/* ... */
|
|
731
|
+
],
|
|
602
732
|
disabled: process.env.NODE_ENV === 'test', // Disable in test environment
|
|
603
733
|
});
|
|
604
734
|
|
|
605
735
|
// Or use a function for dynamic control
|
|
606
736
|
const retryableModel = createRetryable({
|
|
607
737
|
model: openai('gpt-4'), // Base model
|
|
608
|
-
retries: [
|
|
738
|
+
retries: [
|
|
739
|
+
/* ... */
|
|
740
|
+
],
|
|
609
741
|
disabled: () => !featureFlags.isEnabled('ai-retries'), // Check feature flag
|
|
610
742
|
});
|
|
611
743
|
```
|
|
@@ -630,7 +762,7 @@ const result = await generateText({
|
|
|
630
762
|
model: retryableModel,
|
|
631
763
|
prompt: 'Write a vegetarian lasagna recipe for 4 people.',
|
|
632
764
|
// Will be respected during delays
|
|
633
|
-
abortSignal: AbortSignal.timeout(60_000),
|
|
765
|
+
abortSignal: AbortSignal.timeout(60_000),
|
|
634
766
|
});
|
|
635
767
|
```
|
|
636
768
|
|
|
@@ -647,6 +779,7 @@ const retryableModel = createRetryable({
|
|
|
647
779
|
],
|
|
648
780
|
});
|
|
649
781
|
```
|
|
782
|
+
|
|
650
783
|
#### Timeouts
|
|
651
784
|
|
|
652
785
|
When a retry specifies a `timeout` value, a fresh `AbortSignal.timeout()` is created for that retry attempt, replacing any existing abort signal. This is essential when retrying after timeout errors, as the original abort signal would already be in an aborted state.
|
|
@@ -656,9 +789,9 @@ const retryableModel = createRetryable({
|
|
|
656
789
|
model: openai('gpt-4'),
|
|
657
790
|
retries: [
|
|
658
791
|
// Provide a fresh 30 second timeout for the retry
|
|
659
|
-
{
|
|
660
|
-
model: openai('gpt-3.5-turbo'),
|
|
661
|
-
timeout: 30_000
|
|
792
|
+
{
|
|
793
|
+
model: openai('gpt-3.5-turbo'),
|
|
794
|
+
timeout: 30_000,
|
|
662
795
|
},
|
|
663
796
|
],
|
|
664
797
|
});
|
|
@@ -668,7 +801,7 @@ const result = await generateText({
|
|
|
668
801
|
model: retryableModel,
|
|
669
802
|
prompt: 'Write a story',
|
|
670
803
|
// Original request timeout
|
|
671
|
-
abortSignal: AbortSignal.timeout(60_000),
|
|
804
|
+
abortSignal: AbortSignal.timeout(60_000),
|
|
672
805
|
});
|
|
673
806
|
```
|
|
674
807
|
|
|
@@ -681,11 +814,11 @@ const retryableModel = createRetryable({
|
|
|
681
814
|
model: openai('gpt-4'),
|
|
682
815
|
retries: [
|
|
683
816
|
// Try this once
|
|
684
|
-
anthropic('claude-3-haiku-20240307'),
|
|
817
|
+
anthropic('claude-3-haiku-20240307'),
|
|
685
818
|
// Try this one more time (initial + 1 retry)
|
|
686
|
-
{ model: openai('gpt-4'), maxAttempts: 2 },
|
|
819
|
+
{ model: openai('gpt-4'), maxAttempts: 2 },
|
|
687
820
|
// Already tried, won't be retried again
|
|
688
|
-
anthropic('claude-3-haiku-20240307')
|
|
821
|
+
anthropic('claude-3-haiku-20240307'),
|
|
689
822
|
],
|
|
690
823
|
});
|
|
691
824
|
```
|
|
@@ -757,42 +890,96 @@ The following options can be overridden:
|
|
|
757
890
|
|
|
758
891
|
##### Language Model Options
|
|
759
892
|
|
|
760
|
-
| Option
|
|
761
|
-
|
|
762
|
-
| [`prompt`](https://ai-sdk.dev/docs/reference/ai-sdk-core/generate-text#prompt)
|
|
763
|
-
| [`temperature`](https://ai-sdk.dev/docs/reference/ai-sdk-core/generate-text#temperature)
|
|
764
|
-
| [`topP`](https://ai-sdk.dev/docs/reference/ai-sdk-core/generate-text#topp)
|
|
765
|
-
| [`topK`](https://ai-sdk.dev/docs/reference/ai-sdk-core/generate-text#topk)
|
|
766
|
-
| [`maxOutputTokens`](https://ai-sdk.dev/docs/reference/ai-sdk-core/generate-text#max-output-tokens) | Maximum number of tokens to generate
|
|
767
|
-
| [`seed`](https://ai-sdk.dev/docs/reference/ai-sdk-core/generate-text#seed)
|
|
768
|
-
| [`stopSequences`](https://ai-sdk.dev/docs/reference/ai-sdk-types/generate-text#stopsequences)
|
|
769
|
-
| [`presencePenalty`](https://ai-sdk.dev/docs/reference/ai-sdk-core/generate-text#presencepenalty)
|
|
770
|
-
| [`frequencyPenalty`](https://ai-sdk.dev/docs/reference/ai-sdk-core/generate-text#frequencypenalty) | Frequency penalty for reducing repetition
|
|
771
|
-
| [`headers`](https://ai-sdk.dev/docs/reference/ai-sdk-core/generate-text#headers)
|
|
772
|
-
| [`providerOptions`](https://ai-sdk.dev/docs/reference/ai-sdk-types/generate-text#provideroptions)
|
|
893
|
+
| Option | Description |
|
|
894
|
+
| -------------------------------------------------------------------------------------------------- | ---------------------------------------------- |
|
|
895
|
+
| [`prompt`](https://ai-sdk.dev/docs/reference/ai-sdk-core/generate-text#prompt) | Override the entire prompt for the retry |
|
|
896
|
+
| [`temperature`](https://ai-sdk.dev/docs/reference/ai-sdk-core/generate-text#temperature) | Temperature setting for controlling randomness |
|
|
897
|
+
| [`topP`](https://ai-sdk.dev/docs/reference/ai-sdk-core/generate-text#topp) | Nucleus sampling parameter |
|
|
898
|
+
| [`topK`](https://ai-sdk.dev/docs/reference/ai-sdk-core/generate-text#topk) | Top-K sampling parameter |
|
|
899
|
+
| [`maxOutputTokens`](https://ai-sdk.dev/docs/reference/ai-sdk-core/generate-text#max-output-tokens) | Maximum number of tokens to generate |
|
|
900
|
+
| [`seed`](https://ai-sdk.dev/docs/reference/ai-sdk-core/generate-text#seed) | Random seed for deterministic generation |
|
|
901
|
+
| [`stopSequences`](https://ai-sdk.dev/docs/reference/ai-sdk-types/generate-text#stopsequences) | Stop sequences to end generation |
|
|
902
|
+
| [`presencePenalty`](https://ai-sdk.dev/docs/reference/ai-sdk-core/generate-text#presencepenalty) | Presence penalty for reducing repetition |
|
|
903
|
+
| [`frequencyPenalty`](https://ai-sdk.dev/docs/reference/ai-sdk-core/generate-text#frequencypenalty) | Frequency penalty for reducing repetition |
|
|
904
|
+
| [`headers`](https://ai-sdk.dev/docs/reference/ai-sdk-core/generate-text#headers) | Additional HTTP headers |
|
|
905
|
+
| [`providerOptions`](https://ai-sdk.dev/docs/reference/ai-sdk-types/generate-text#provideroptions) | Provider-specific options |
|
|
773
906
|
|
|
774
907
|
##### Embedding Model Options
|
|
775
908
|
|
|
776
|
-
| Option
|
|
777
|
-
|
|
778
|
-
| [`values`](https://ai-sdk.dev/docs/reference/ai-sdk-core/embed#values)
|
|
779
|
-
| [`headers`](https://ai-sdk.dev/docs/reference/ai-sdk-core/embed#headers)
|
|
780
|
-
| [`providerOptions`](https://ai-sdk.dev/docs/reference/ai-sdk-core/embed#provideroptions) | Provider-specific options
|
|
909
|
+
| Option | Description |
|
|
910
|
+
| ---------------------------------------------------------------------------------------- | ---------------------------- |
|
|
911
|
+
| [`values`](https://ai-sdk.dev/docs/reference/ai-sdk-core/embed#values) | Override the values to embed |
|
|
912
|
+
| [`headers`](https://ai-sdk.dev/docs/reference/ai-sdk-core/embed#headers) | Additional HTTP headers |
|
|
913
|
+
| [`providerOptions`](https://ai-sdk.dev/docs/reference/ai-sdk-core/embed#provideroptions) | Provider-specific options |
|
|
781
914
|
|
|
782
915
|
##### Image Model Options
|
|
783
916
|
|
|
784
|
-
| Option
|
|
785
|
-
|
|
786
|
-
| [`n`](https://ai-sdk.dev/docs/reference/ai-sdk-core/generate-image#n)
|
|
787
|
-
| [`size`](https://ai-sdk.dev/docs/reference/ai-sdk-core/generate-image#size)
|
|
788
|
-
| [`aspectRatio`](https://ai-sdk.dev/docs/reference/ai-sdk-core/generate-image#aspectratio)
|
|
789
|
-
| [`seed`](https://ai-sdk.dev/docs/reference/ai-sdk-core/generate-image#seed)
|
|
790
|
-
| [`headers`](https://ai-sdk.dev/docs/reference/ai-sdk-core/generate-image#headers)
|
|
791
|
-
| [`providerOptions`](https://ai-sdk.dev/docs/reference/ai-sdk-core/generate-image#provideroptions) | Provider-specific options
|
|
917
|
+
| Option | Description |
|
|
918
|
+
| ------------------------------------------------------------------------------------------------- | -------------------------------- |
|
|
919
|
+
| [`n`](https://ai-sdk.dev/docs/reference/ai-sdk-core/generate-image#n) | Number of images to generate |
|
|
920
|
+
| [`size`](https://ai-sdk.dev/docs/reference/ai-sdk-core/generate-image#size) | Size of generated images |
|
|
921
|
+
| [`aspectRatio`](https://ai-sdk.dev/docs/reference/ai-sdk-core/generate-image#aspectratio) | Aspect ratio of generated images |
|
|
922
|
+
| [`seed`](https://ai-sdk.dev/docs/reference/ai-sdk-core/generate-image#seed) | Random seed for reproducibility |
|
|
923
|
+
| [`headers`](https://ai-sdk.dev/docs/reference/ai-sdk-core/generate-image#headers) | Additional HTTP headers |
|
|
924
|
+
| [`providerOptions`](https://ai-sdk.dev/docs/reference/ai-sdk-core/generate-image#provideroptions) | Provider-specific options |
|
|
925
|
+
|
|
926
|
+
#### Dynamic Call Options
|
|
927
|
+
|
|
928
|
+
You can also override call options dynamically from inside the `onRetry` callback, instead of declaring them statically on the retry object. This is useful when the override depends on something only known at runtime, like the prompt that just failed, the model that's about to be tried next, or the error that triggered the retry. The overrides apply to the upcoming retry attempt only, and can change the same fields as the static `options` on a retry plus the request `timeout`. The callback may also be `async` if computing the override needs to do work (e.g. fetching a fresh credential).
|
|
929
|
+
|
|
930
|
+
A common use case is sanitizing provider-scoped metadata when falling back to a different provider, for example stripping `providerOptions.azure.itemId` references from the previous prompt before retrying on OpenAI:
|
|
931
|
+
|
|
932
|
+
```typescript
|
|
933
|
+
import { createRetryable } from 'ai-retry';
|
|
934
|
+
import { azure } from '@ai-sdk/azure';
|
|
935
|
+
import { openai } from '@ai-sdk/openai';
|
|
936
|
+
|
|
937
|
+
const retryableModel = createRetryable({
|
|
938
|
+
model: azure('gpt-5-chat'),
|
|
939
|
+
retries: [openai('gpt-5-chat')],
|
|
940
|
+
onRetry: (context) => {
|
|
941
|
+
const { current, attempts } = context;
|
|
942
|
+
const previous = attempts.at(-1);
|
|
943
|
+
|
|
944
|
+
if (current.model.provider !== previous.model.provider) {
|
|
945
|
+
// Strip provider-scoped metadata from the prompt before retrying on a different provider
|
|
946
|
+
return {
|
|
947
|
+
options: {
|
|
948
|
+
prompt: stripProviderMetadata(current.options.prompt),
|
|
949
|
+
},
|
|
950
|
+
};
|
|
951
|
+
}
|
|
952
|
+
},
|
|
953
|
+
});
|
|
954
|
+
```
|
|
955
|
+
|
|
956
|
+
Inside the `onRetry` callback, `context.current.model` is the model that's about to be tried next, while `context.current.options` and `context.current.error` describe the failed attempt that triggered the retry. The previous model is available at `context.attempts.at(-1).model`.
|
|
957
|
+
|
|
958
|
+
`onRetry` may also be `async`, which is useful if computing the override needs to do work (e.g. fetching a fresh credential):
|
|
959
|
+
|
|
960
|
+
```typescript
|
|
961
|
+
const retryableModel = createRetryable({
|
|
962
|
+
model: openai('gpt-4o-mini'),
|
|
963
|
+
retries: [anthropic('claude-sonnet-4-20250514')],
|
|
964
|
+
onRetry: async (context) => {
|
|
965
|
+
const { current } = context;
|
|
966
|
+
|
|
967
|
+
const headers = await refreshAuthHeaders(current.model.provider);
|
|
968
|
+
return { options: { headers } };
|
|
969
|
+
},
|
|
970
|
+
});
|
|
971
|
+
```
|
|
972
|
+
|
|
973
|
+
**Precedence** for the upcoming retry attempt (highest to lowest):
|
|
974
|
+
|
|
975
|
+
1. The value returned from `onRetry`
|
|
976
|
+
2. The `options` returned from the retryable
|
|
977
|
+
3. The original call options from the request
|
|
792
978
|
|
|
793
979
|
#### Logging
|
|
794
980
|
|
|
795
981
|
You can use the following callbacks to log retry attempts and errors:
|
|
982
|
+
|
|
796
983
|
- `onError` is invoked if an error occurs.
|
|
797
984
|
- `onRetry` is invoked before attempting a retry.
|
|
798
985
|
- `onSuccess` is invoked after a successful request with the model that handled it.
|
|
@@ -800,17 +987,24 @@ You can use the following callbacks to log retry attempts and errors:
|
|
|
800
987
|
```typescript
|
|
801
988
|
const retryableModel = createRetryable({
|
|
802
989
|
model: openai('gpt-4-mini'),
|
|
803
|
-
retries: [
|
|
990
|
+
retries: [
|
|
991
|
+
/* your retryables */
|
|
992
|
+
],
|
|
804
993
|
onError: (context) => {
|
|
805
|
-
console.error(
|
|
806
|
-
context.current.
|
|
994
|
+
console.error(
|
|
995
|
+
`Attempt ${context.attempts.length} with ${context.current.model.provider}/${context.current.model.modelId} failed:`,
|
|
996
|
+
context.current.error,
|
|
807
997
|
);
|
|
808
998
|
},
|
|
809
999
|
onRetry: (context) => {
|
|
810
|
-
console.log(
|
|
1000
|
+
console.log(
|
|
1001
|
+
`Retrying attempt ${context.attempts.length + 1} with model ${context.current.model.provider}/${context.current.model.modelId}...`,
|
|
1002
|
+
);
|
|
811
1003
|
},
|
|
812
1004
|
onSuccess: (context) => {
|
|
813
|
-
console.log(
|
|
1005
|
+
console.log(
|
|
1006
|
+
`Request handled by ${context.current.model.provider}/${context.current.model.modelId}`,
|
|
1007
|
+
);
|
|
814
1008
|
},
|
|
815
1009
|
});
|
|
816
1010
|
```
|
|
@@ -819,11 +1013,11 @@ const retryableModel = createRetryable({
|
|
|
819
1013
|
|
|
820
1014
|
By default, every new request starts with the base model, even if a previous request was retried with a different model. The `reset` option changes this behavior by making the last successfully retried model **sticky**, that means subsequent requests will continue using that model instead of switching back to the base model. The reset value controls how long the retry model stays sticky before resetting back to the base model.
|
|
821
1015
|
|
|
822
|
-
| Value
|
|
823
|
-
|
|
824
|
-
| `after-request`
|
|
1016
|
+
| Value | Description |
|
|
1017
|
+
| ------------------ | ------------------------------------------------------------ |
|
|
1018
|
+
| `after-request` | Reset immediately after the next request (default) |
|
|
825
1019
|
| `after-N-requests` | Keep the retry model for the next **N** requests, then reset |
|
|
826
|
-
| `after-N-seconds`
|
|
1020
|
+
| `after-N-seconds` | Keep the retry model for **N** seconds, then reset |
|
|
827
1021
|
|
|
828
1022
|
##### Reset after each request (default)
|
|
829
1023
|
|
|
@@ -874,24 +1068,29 @@ In the second case, errors during stream processing will not always be retried,
|
|
|
874
1068
|
Creates a retryable model that works with language models, embedding models, and image models.
|
|
875
1069
|
|
|
876
1070
|
```ts
|
|
877
|
-
interface RetryableModelOptions<
|
|
1071
|
+
interface RetryableModelOptions<
|
|
1072
|
+
MODEL extends LanguageModelV3 | EmbeddingModelV3 | ImageModelV3,
|
|
1073
|
+
> {
|
|
878
1074
|
model: MODEL;
|
|
879
1075
|
retries: Array<Retryable<MODEL> | MODEL>;
|
|
880
1076
|
disabled?: boolean | (() => boolean);
|
|
881
1077
|
reset?: Reset;
|
|
882
1078
|
onError?: (context: RetryContext<MODEL>) => void;
|
|
883
|
-
onRetry?: (
|
|
1079
|
+
onRetry?: (
|
|
1080
|
+
context: RetryContext<MODEL>,
|
|
1081
|
+
) => void | OnRetryOverrides<MODEL> | Promise<void | OnRetryOverrides<MODEL>>;
|
|
884
1082
|
onSuccess?: (context: SuccessContext<MODEL>) => void;
|
|
885
1083
|
}
|
|
886
1084
|
```
|
|
887
1085
|
|
|
888
1086
|
**Options:**
|
|
1087
|
+
|
|
889
1088
|
- `model`: The base model to use for the initial request.
|
|
890
1089
|
- `retries`: Array of retryables (functions, models, or retry objects) to attempt on failure.
|
|
891
1090
|
- `disabled`: Disable all retry logic. Can be a boolean or function returning boolean. Default: `false` (retries enabled).
|
|
892
1091
|
- `reset`: Controls when to reset back to the base model after a successful retry. Default: `after-request`.
|
|
893
1092
|
- `onError`: Callback invoked when an error occurs.
|
|
894
|
-
- `onRetry`: Callback invoked before attempting a retry.
|
|
1093
|
+
- `onRetry`: Callback invoked before attempting a retry. May optionally return an `OnRetryOverrides` object (or a `Promise` of one) to override `options.*` and `timeout` for the upcoming attempt only. See [Dynamic Call Options via `onRetry`](#dynamic-call-options-via-onretry).
|
|
895
1094
|
- `onSuccess`: Callback invoked after a successful request. Receives the model that handled the request and all previous attempts.
|
|
896
1095
|
|
|
897
1096
|
#### `Reset`
|
|
@@ -915,9 +1114,7 @@ A `Retryable` is a function that receives a `RetryContext` with the current erro
|
|
|
915
1114
|
It should evaluate the error/result and decide whether to retry by returning a `Retry` or to skip by returning `undefined`.
|
|
916
1115
|
|
|
917
1116
|
```ts
|
|
918
|
-
type Retryable = (
|
|
919
|
-
context: RetryContext
|
|
920
|
-
) => Retry | Promise<Retry> | undefined;
|
|
1117
|
+
type Retryable = (context: RetryContext) => Retry | Promise<Retry> | undefined;
|
|
921
1118
|
```
|
|
922
1119
|
|
|
923
1120
|
#### `Retry`
|
|
@@ -927,12 +1124,15 @@ A `Retry` specifies the model to retry and optional settings. The available opti
|
|
|
927
1124
|
```typescript
|
|
928
1125
|
interface Retry {
|
|
929
1126
|
model: LanguageModelV3 | EmbeddingModelV3 | ImageModelV3;
|
|
930
|
-
maxAttempts?: number;
|
|
931
|
-
delay?: number;
|
|
932
|
-
backoffFactor?: number;
|
|
933
|
-
timeout?: number;
|
|
1127
|
+
maxAttempts?: number; // Maximum retry attempts per model (default: 1)
|
|
1128
|
+
delay?: number; // Delay in milliseconds before retrying
|
|
1129
|
+
backoffFactor?: number; // Multiplier for exponential backoff
|
|
1130
|
+
timeout?: number; // Timeout in milliseconds for the retry attempt
|
|
934
1131
|
providerOptions?: ProviderOptions; // @deprecated - use options.providerOptions instead
|
|
935
|
-
options?:
|
|
1132
|
+
options?:
|
|
1133
|
+
| LanguageModelV3CallOptions
|
|
1134
|
+
| EmbeddingModelV3CallOptions
|
|
1135
|
+
| ImageModelV3CallOptions; // Call options to override for this retry
|
|
936
1136
|
}
|
|
937
1137
|
```
|
|
938
1138
|
|
|
@@ -966,8 +1166,15 @@ A `SuccessAttempt` represents the successful attempt with the model, result, and
|
|
|
966
1166
|
interface SuccessAttempt {
|
|
967
1167
|
type: 'success';
|
|
968
1168
|
model: LanguageModelV3 | EmbeddingModelV3 | ImageModelV3;
|
|
969
|
-
result:
|
|
970
|
-
|
|
1169
|
+
result:
|
|
1170
|
+
| LanguageModelGenerate
|
|
1171
|
+
| LanguageModelStream
|
|
1172
|
+
| EmbeddingModelEmbed
|
|
1173
|
+
| ImageModelGenerate;
|
|
1174
|
+
options:
|
|
1175
|
+
| LanguageModelV3CallOptions
|
|
1176
|
+
| EmbeddingModelV3CallOptions
|
|
1177
|
+
| ImageModelV3CallOptions;
|
|
971
1178
|
}
|
|
972
1179
|
```
|
|
973
1180
|
|
|
@@ -982,7 +1189,10 @@ type RetryAttempt =
|
|
|
982
1189
|
type: 'error';
|
|
983
1190
|
error: unknown;
|
|
984
1191
|
model: LanguageModelV3 | EmbeddingModelV3 | ImageModelV3;
|
|
985
|
-
options:
|
|
1192
|
+
options:
|
|
1193
|
+
| LanguageModelV3CallOptions
|
|
1194
|
+
| EmbeddingModelV3CallOptions
|
|
1195
|
+
| ImageModelV3CallOptions;
|
|
986
1196
|
}
|
|
987
1197
|
| {
|
|
988
1198
|
type: 'result';
|