ai-retry 1.4.0 → 1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -14,6 +14,7 @@ Automatically handle API failures, content filtering, timeouts and other errors
14
14
  `ai-retry` wraps the provided base model with a set of retry conditions (retryables). When a request fails with an error or the response is not satisfying, it iterates through the given retryables to find a suitable fallback model. It automatically tracks which models have been tried and how many attempts have been made to prevent infinite loops.
15
15
 
16
16
  It supports two types of retries:
17
+
17
18
  - Error-based retries: when the model throws an error (e.g. timeouts, API errors, etc.)
18
19
  - Result-based retries: when the model returns a successful response that needs retrying (e.g. content filtering, etc.)
19
20
 
@@ -24,7 +25,7 @@ This library supports both AI SDK v5 and v6. The main branch reflects the latest
24
25
  > [!WARNING]
25
26
  > Version compatibility:
26
27
  >
27
- > - Use `ai-retry` version 0.x for AI SDK v5.
28
+ > - Use `ai-retry` version 0.x for AI SDK v5.
28
29
  > - Use `ai-retry` version 1.x for AI SDK v6.
29
30
 
30
31
  ```bash
@@ -133,17 +134,13 @@ import { createRetryable } from 'ai-retry';
133
134
 
134
135
  const retryableModel = createRetryable({
135
136
  model: 'openai/gpt-5',
136
- retries: [
137
- 'anthropic/claude-sonnet-4'
138
- ]
137
+ retries: ['anthropic/claude-sonnet-4'],
139
138
  });
140
139
 
141
140
  // Is the same as:
142
141
  const retryableModel = createRetryable({
143
142
  model: gateway('openai/gpt-5'),
144
- retries: [
145
- gateway('anthropic/claude-sonnet-4')
146
- ]
143
+ retries: [gateway('anthropic/claude-sonnet-4')],
147
144
  });
148
145
  ```
149
146
 
@@ -179,16 +176,16 @@ const retryableModel = createRetryable({
179
176
  // Dynamic retryables act like if-branches:
180
177
  // If error.code == 429 (too many requests) happens, retry with this model
181
178
  (context) => {
182
- return context.current.error.statusCode === 429
183
- ? { model: azure('gpt-4-mini') } // Retry
184
- : undefined; // Skip
179
+ return context.current.error.statusCode === 429
180
+ ? { model: azure('gpt-4-mini') } // Retry
181
+ : undefined; // Skip
185
182
  },
186
183
 
187
184
  // If error.message ~= "service overloaded", retry with this model
188
185
  (context) => {
189
- return context.current.error.message.includes("service overloaded")
190
- ? { model: azure('gpt-4-mini') } // Retry
191
- : undefined; // Skip
186
+ return context.current.error.message.includes('service overloaded')
187
+ ? { model: azure('gpt-4-mini') } // Retry
188
+ : undefined; // Skip
192
189
  },
193
190
 
194
191
  // Static retryables act like else branches:
@@ -245,7 +242,7 @@ const retryableModel = createRetryable({
245
242
  retries: [
246
243
  // Error-based: catches thrown errors like timeouts, rate limits, etc.
247
244
  errorBasedRetry,
248
-
245
+
249
246
  // Result-based: catches successful responses that need retrying
250
247
  resultBasedRetry,
251
248
  ],
@@ -258,7 +255,7 @@ Result-based retryables are only available for generate calls like `generateText
258
255
 
259
256
  If you don't need precise error matching with custom logic and just want to fallback to different models on any error, you can simply provide a list of models.
260
257
 
261
- > [!NOTE]
258
+ > [!NOTE]
262
259
  > Use the object syntax `{ model: openai('gpt-4') }` if you need to provide additional options like `maxAttempts`, `delay`, etc.
263
260
 
264
261
  ```typescript
@@ -291,6 +288,9 @@ If you need more control over when to retry and which model to use, you can crea
291
288
  > [!NOTE]
292
289
  > You can return additional options like `maxAttempts`, `delay`, etc. along with the model.
293
290
 
291
+ > [!TIP]
292
+ > If you'd like the same flexibility with a typed, composable condition system, see [Experimental: Composable Conditions](#experimental-composable-conditions).
293
+
294
294
  ```typescript
295
295
  import { anthropic } from '@ai-sdk/anthropic';
296
296
  import { openai } from '@ai-sdk/openai';
@@ -318,10 +318,10 @@ const rateLimitRetry: Retryable = (context) => {
318
318
 
319
319
  const retryableModel = createRetryable({
320
320
  // Base model
321
- model: openai('gpt-4-mini'),
321
+ model: openai('gpt-4-mini'),
322
322
  retries: [
323
323
  // Use custom rate limit retryable
324
- rateLimitRetry
324
+ rateLimitRetry,
325
325
 
326
326
  // Other retryables...
327
327
  ],
@@ -340,12 +340,12 @@ import { RetryError } from 'ai';
340
340
 
341
341
  const retryableModel = createRetryable({
342
342
  // Base model = first attempt
343
- model: azure('gpt-4-mini'),
343
+ model: azure('gpt-4-mini'),
344
344
  retries: [
345
345
  // Fallback model 1 = Second attempt
346
- openai('gpt-3.5-turbo'),
346
+ openai('gpt-3.5-turbo'),
347
347
  // Fallback model 2 = Third attempt
348
- anthropic('claude-3-haiku-20240307')
348
+ anthropic('claude-3-haiku-20240307'),
349
349
  ],
350
350
  });
351
351
 
@@ -373,6 +373,9 @@ There are several built-in dynamic retryables available for common use cases:
373
373
  > [!TIP]
374
374
  > You are missing a retryable for your use case? [Open an issue](https://github.com/zirkelc/ai-retry/issues/new) and let's discuss it!
375
375
 
376
+ > [!NOTE]
377
+ > Looking for a composable alternative? See [Experimental: Composable Conditions](#experimental-composable-conditions) for a `condition().action()` API that builds on small primitives.
378
+
376
379
  - [`contentFilterTriggered`](./src/retryables/content-filter-triggered.ts): Content filter was triggered based on the prompt or completion.
377
380
  - [`requestTimeout`](./src/retryables/request-timeout.ts): Request timeout occurred.
378
381
  - [`requestNotRetryable`](./src/retryables/request-not-retryable.ts): Request failed with a non-retryable error.
@@ -404,8 +407,8 @@ const retryableModel = createRetryable({
404
407
 
405
408
  Handle timeouts by switching to potentially faster models.
406
409
 
407
- > [!NOTE]
408
- > You need to use an `abortSignal` with a timeout on your request.
410
+ > [!NOTE]
411
+ > You need to use an `abortSignal` with a timeout on your request.
409
412
 
410
413
  When a request times out, the `requestTimeout` retryable will automatically create a fresh abort signal for the retry attempt. This prevents the retry from immediately failing due to the already-aborted signal from the original request. If you do not provide a `timeout` value, a default of 60 seconds is used for the retry attempt.
411
414
 
@@ -416,8 +419,8 @@ const retryableModel = createRetryable({
416
419
  model: azure('gpt-4'),
417
420
  retries: [
418
421
  // Defaults to 60 seconds timeout for the retry attempt
419
- requestTimeout(azure('gpt-4-mini')),
420
-
422
+ requestTimeout(azure('gpt-4-mini')),
423
+
421
424
  // Or specify a custom timeout for the retry attempt
422
425
  requestTimeout(azure('gpt-4-mini'), { timeout: 30_000 }),
423
426
  ],
@@ -500,10 +503,9 @@ const result = await generateImage({
500
503
 
501
504
  Handle cases where the base model fails with a non-retryable error.
502
505
 
503
- > [!NOTE]
506
+ > [!NOTE]
504
507
  > You can check if an error is retryable with the `isRetryable` property on an [`APICallError`](https://ai-sdk.dev/docs/reference/ai-sdk-errors/ai-api-call-error#ai_apicallerror).
505
508
 
506
-
507
509
  ```typescript
508
510
  import { requestNotRetryable } from 'ai-retry/retryables';
509
511
 
@@ -517,7 +519,7 @@ const retryable = createRetryable({
517
519
 
518
520
  #### Retry After Delay
519
521
 
520
- If an error is retryable, such as 429 (Too Many Requests) or 503 (Service Unavailable) errors, it will be retried after a delay.
522
+ If an error is retryable, such as 429 (Too Many Requests) or 503 (Service Unavailable) errors, it will be retried after a delay.
521
523
  The delay and exponential backoff can be configured. If the response contains a [`retry-after`](https://developer.mozilla.org/en-US/docs/Web/HTTP/Reference/Headers/Retry-After) header, it will be prioritized over the configured delay.
522
524
 
523
525
  Note that this retryable does not accept a model parameter, it will always retry the model from the latest failed attempt.
@@ -546,7 +548,7 @@ By default, if a [`retry-after-ms`](https://learn.microsoft.com/en-us/azure/ai-f
546
548
 
547
549
  Automatically retry with a different model when the response JSON doesn't match the expected schema.
548
550
 
549
- This is a result-based retryable that validates the model's JSON output against the schema set by structured output modes like `Output.object()`, `Output.array()`, and `Output.choice()`.
551
+ This is a result-based retryable that validates the model's JSON output against the schema set by structured output modes like `Output.object()`, `Output.array()`, and `Output.choice()`.
550
552
  Normally, schema validation happens outside the model in `generateText`, so a schema validation error would not be seen by the retryable model. This retryable catches it early and retries with a fallback model.
551
553
 
552
554
  > [!NOTE]
@@ -582,6 +584,130 @@ const result = await generateText({
582
584
  console.log(result.object); // { name: "Alice", age: 30 }
583
585
  ```
584
586
 
587
+ ### Experimental: Composable Conditions
588
+
589
+ > [!WARNING]
590
+ > This API is experimental and may change. It is not exported from the package root; opt in via the deep import:
591
+ >
592
+ > ```ts
593
+ > import { ... } from 'ai-retry/retryables/experimental';
594
+ > ```
595
+
596
+ A `condition().action()` API for retryables. Conditions are built from small primitives (`error(fn)`, `result(fn)`), composed with `and` / `or` / `not`, and turned into a `Retryable` by one of two terminal actions: `.switch({ model })` or `.retry({ delay })`. The result drops into the same `retries: [...]` array as the stable helpers, so you can mix the two styles freely.
597
+
598
+ ```typescript
599
+ import { anthropic } from '@ai-sdk/anthropic';
600
+ import { openai } from '@ai-sdk/openai';
601
+ import { generateText } from 'ai';
602
+ import { createRetryable } from 'ai-retry';
603
+ import {
604
+ error,
605
+ finishReason,
606
+ httpStatus,
607
+ } from 'ai-retry/retryables/experimental';
608
+
609
+ const retryableModel = createRetryable({
610
+ model: openai('gpt-4'),
611
+ retries: [
612
+ // Switch on 529 or any "overloaded" message
613
+ httpStatus(529, 'overloaded').switch({
614
+ model: anthropic('claude-3-haiku-20240307'),
615
+ }),
616
+
617
+ // Switch when the response was content-filtered
618
+ finishReason('content-filter').switch({ model: openai('gpt-4o') }),
619
+
620
+ // Retry the same model with exponential backoff on retryable errors
621
+ error.isRetryable(true).retry({ delay: 1_000, backoffFactor: 2 }),
622
+ ],
623
+ });
624
+ ```
625
+
626
+ #### High-level helpers
627
+
628
+ These cover the common cases. Each returns a `Condition` that you finalize with `.switch(...)` or `.retry(...)`.
629
+
630
+ | Helper | Matches when |
631
+ | ------------------------------ | -------------------------------------------------------------------------------------------------- |
632
+ | `httpStatus(...patterns)` | Numbers match the status code; strings match the message (substring); regex matches either |
633
+ | `timeout()` | `Error.name === 'TimeoutError'` (`AbortSignal.timeout()` fired) |
634
+ | `aborted()` | `Error.name === 'AbortError'` (manual `controller.abort()`) |
635
+ | `noImage()` | The image model threw `NoImageGeneratedError` |
636
+ | `finishReason(...reasons)` | The result's `finishReason.unified` matches one of the given values |
637
+ | `schemaInvalid()` | The result text fails JSON-schema validation against the call's `responseFormat` |
638
+
639
+ #### Actions
640
+
641
+ Every `Condition` exposes two terminal actions that turn it into a `Retryable`:
642
+
643
+ - **`.switch({ model, ...options })`** falls back to a different model when the condition matches. Optional fields (`maxAttempts`, `delay`, `backoffFactor`, `timeout`, `options`) are the same as on a normal `Retry` object.
644
+ - **`.retry({ delay?, backoffFactor?, ... })`** retries the current model when the condition matches. Honors `Retry-After` and `Retry-After-Ms` response headers when present, capped at 60 seconds.
645
+
646
+ #### Combinators
647
+
648
+ Compose conditions with the free functions or the methods on `Condition`:
649
+
650
+ ```typescript
651
+ import {
652
+ and,
653
+ error,
654
+ httpStatus,
655
+ not,
656
+ or,
657
+ } from 'ai-retry/retryables/experimental';
658
+
659
+ or(httpStatus(429), error.message('overloaded'));
660
+ and(httpStatus(503), error.message('temporary'));
661
+ not(error.isRetryable(true));
662
+
663
+ // Method form
664
+ httpStatus(429).or(error.message('overloaded'));
665
+ ```
666
+
667
+ #### Primitives
668
+
669
+ The two lowest-level builders. Reach for them when no helper covers your case:
670
+
671
+ | Primitive | Matches when |
672
+ | ------------------ | ----------------------------------------------------------------------------- |
673
+ | `error(predicate)` | The current attempt failed and `predicate(err, ctx)` returns true |
674
+ | `result(predicate)`| The current attempt succeeded and `predicate(res, ctx)` returns true (language models only) |
675
+
676
+ ```typescript
677
+ import { APICallError } from 'ai';
678
+ import { error } from 'ai-retry/retryables/experimental';
679
+
680
+ error<MODEL, APICallError>(
681
+ (e) => APICallError.isInstance(e) && e.statusCode === 418,
682
+ ).switch({ model: fallback });
683
+ ```
684
+
685
+ A few common error fields have ready-made matchers on the `error` namespace:
686
+
687
+ | Helper | Matches when |
688
+ | ------------------------------- | ------------------------------------------------------------------------------------- |
689
+ | `error.isRetryable(flag)` | `APICallError.isRetryable === flag` (default `true`) |
690
+ | `error.statusCode(...patterns)` | Numbers match exactly; regex matches the stringified code (e.g. `/^5\d\d$/` for 5xx) |
691
+ | `error.message(...patterns)` | Substring (case-insensitive) or regex match against the error message |
692
+
693
+ #### Mapping from Built-in retryables
694
+
695
+ Each stable retryable has an equivalent in the new shape:
696
+
697
+ | Built-in | Composable form |
698
+ | ----------------------------------------------- | ----------------------------------------------------------------------------------------------------- |
699
+ | `contentFilterTriggered(m)` | `or(error(/* check e.data.error.code === 'content_filter' */), finishReason('content-filter')).switch({ model: m })` |
700
+ | `requestTimeout(m)` | `timeout().switch({ model: m, timeout: 60_000 })` |
701
+ | `requestNotRetryable(m)` | `error.isRetryable(false).switch({ model: m })` |
702
+ | `schemaMismatch(m)` | `schemaInvalid().switch({ model: m })` |
703
+ | `serviceOverloaded(m)` | `httpStatus(529, 'overloaded').switch({ model: m })` |
704
+ | `serviceUnavailable(m)` | `error.statusCode(503).switch({ model: m })` |
705
+ | `noImageGenerated(m)` | `noImage().switch({ model: m })` |
706
+ | `retryAfterDelay({ delay, backoffFactor })` | `error.isRetryable(true).retry({ delay, backoffFactor })` |
707
+
708
+ > [!NOTE]
709
+ > `error.isRetryable(true)` matches whatever the AI SDK's `APICallError` marks retryable. By default that's status codes 408, 409, 429, and any 5xx, plus network errors and provider-specific overrides (e.g. Anthropic flips it on `error.type === 'overloaded_error'`). It picks up more cases than a manual status-code list.
710
+
585
711
  ### Options
586
712
 
587
713
  #### Disabling Retries
@@ -591,21 +717,27 @@ You can disable retries entirely, which is useful for testing or specific enviro
591
717
  ```typescript
592
718
  const retryableModel = createRetryable({
593
719
  model: openai('gpt-4'), // Base model
594
- retries: [/* ... */],
720
+ retries: [
721
+ /* ... */
722
+ ],
595
723
  disabled: true, // Retries are completely disabled
596
724
  });
597
725
 
598
726
  // Or disable based on environment
599
727
  const retryableModel = createRetryable({
600
728
  model: openai('gpt-4'), // Base model
601
- retries: [/* ... */],
729
+ retries: [
730
+ /* ... */
731
+ ],
602
732
  disabled: process.env.NODE_ENV === 'test', // Disable in test environment
603
733
  });
604
734
 
605
735
  // Or use a function for dynamic control
606
736
  const retryableModel = createRetryable({
607
737
  model: openai('gpt-4'), // Base model
608
- retries: [/* ... */],
738
+ retries: [
739
+ /* ... */
740
+ ],
609
741
  disabled: () => !featureFlags.isEnabled('ai-retries'), // Check feature flag
610
742
  });
611
743
  ```
@@ -630,7 +762,7 @@ const result = await generateText({
630
762
  model: retryableModel,
631
763
  prompt: 'Write a vegetarian lasagna recipe for 4 people.',
632
764
  // Will be respected during delays
633
- abortSignal: AbortSignal.timeout(60_000),
765
+ abortSignal: AbortSignal.timeout(60_000),
634
766
  });
635
767
  ```
636
768
 
@@ -647,6 +779,7 @@ const retryableModel = createRetryable({
647
779
  ],
648
780
  });
649
781
  ```
782
+
650
783
  #### Timeouts
651
784
 
652
785
  When a retry specifies a `timeout` value, a fresh `AbortSignal.timeout()` is created for that retry attempt, replacing any existing abort signal. This is essential when retrying after timeout errors, as the original abort signal would already be in an aborted state.
@@ -656,9 +789,9 @@ const retryableModel = createRetryable({
656
789
  model: openai('gpt-4'),
657
790
  retries: [
658
791
  // Provide a fresh 30 second timeout for the retry
659
- {
660
- model: openai('gpt-3.5-turbo'),
661
- timeout: 30_000
792
+ {
793
+ model: openai('gpt-3.5-turbo'),
794
+ timeout: 30_000,
662
795
  },
663
796
  ],
664
797
  });
@@ -668,7 +801,7 @@ const result = await generateText({
668
801
  model: retryableModel,
669
802
  prompt: 'Write a story',
670
803
  // Original request timeout
671
- abortSignal: AbortSignal.timeout(60_000),
804
+ abortSignal: AbortSignal.timeout(60_000),
672
805
  });
673
806
  ```
674
807
 
@@ -681,11 +814,11 @@ const retryableModel = createRetryable({
681
814
  model: openai('gpt-4'),
682
815
  retries: [
683
816
  // Try this once
684
- anthropic('claude-3-haiku-20240307'),
817
+ anthropic('claude-3-haiku-20240307'),
685
818
  // Try this one more time (initial + 1 retry)
686
- { model: openai('gpt-4'), maxAttempts: 2 },
819
+ { model: openai('gpt-4'), maxAttempts: 2 },
687
820
  // Already tried, won't be retried again
688
- anthropic('claude-3-haiku-20240307')
821
+ anthropic('claude-3-haiku-20240307'),
689
822
  ],
690
823
  });
691
824
  ```
@@ -757,42 +890,96 @@ The following options can be overridden:
757
890
 
758
891
  ##### Language Model Options
759
892
 
760
- | Option | Description |
761
- |--------|-------------|
762
- | [`prompt`](https://ai-sdk.dev/docs/reference/ai-sdk-core/generate-text#prompt) | Override the entire prompt for the retry |
763
- | [`temperature`](https://ai-sdk.dev/docs/reference/ai-sdk-core/generate-text#temperature) | Temperature setting for controlling randomness |
764
- | [`topP`](https://ai-sdk.dev/docs/reference/ai-sdk-core/generate-text#topp) | Nucleus sampling parameter |
765
- | [`topK`](https://ai-sdk.dev/docs/reference/ai-sdk-core/generate-text#topk) | Top-K sampling parameter |
766
- | [`maxOutputTokens`](https://ai-sdk.dev/docs/reference/ai-sdk-core/generate-text#max-output-tokens) | Maximum number of tokens to generate |
767
- | [`seed`](https://ai-sdk.dev/docs/reference/ai-sdk-core/generate-text#seed) | Random seed for deterministic generation |
768
- | [`stopSequences`](https://ai-sdk.dev/docs/reference/ai-sdk-types/generate-text#stopsequences) | Stop sequences to end generation |
769
- | [`presencePenalty`](https://ai-sdk.dev/docs/reference/ai-sdk-core/generate-text#presencepenalty) | Presence penalty for reducing repetition |
770
- | [`frequencyPenalty`](https://ai-sdk.dev/docs/reference/ai-sdk-core/generate-text#frequencypenalty) | Frequency penalty for reducing repetition |
771
- | [`headers`](https://ai-sdk.dev/docs/reference/ai-sdk-core/generate-text#headers) | Additional HTTP headers |
772
- | [`providerOptions`](https://ai-sdk.dev/docs/reference/ai-sdk-types/generate-text#provideroptions) | Provider-specific options |
893
+ | Option | Description |
894
+ | -------------------------------------------------------------------------------------------------- | ---------------------------------------------- |
895
+ | [`prompt`](https://ai-sdk.dev/docs/reference/ai-sdk-core/generate-text#prompt) | Override the entire prompt for the retry |
896
+ | [`temperature`](https://ai-sdk.dev/docs/reference/ai-sdk-core/generate-text#temperature) | Temperature setting for controlling randomness |
897
+ | [`topP`](https://ai-sdk.dev/docs/reference/ai-sdk-core/generate-text#topp) | Nucleus sampling parameter |
898
+ | [`topK`](https://ai-sdk.dev/docs/reference/ai-sdk-core/generate-text#topk) | Top-K sampling parameter |
899
+ | [`maxOutputTokens`](https://ai-sdk.dev/docs/reference/ai-sdk-core/generate-text#max-output-tokens) | Maximum number of tokens to generate |
900
+ | [`seed`](https://ai-sdk.dev/docs/reference/ai-sdk-core/generate-text#seed) | Random seed for deterministic generation |
901
+ | [`stopSequences`](https://ai-sdk.dev/docs/reference/ai-sdk-types/generate-text#stopsequences) | Stop sequences to end generation |
902
+ | [`presencePenalty`](https://ai-sdk.dev/docs/reference/ai-sdk-core/generate-text#presencepenalty) | Presence penalty for reducing repetition |
903
+ | [`frequencyPenalty`](https://ai-sdk.dev/docs/reference/ai-sdk-core/generate-text#frequencypenalty) | Frequency penalty for reducing repetition |
904
+ | [`headers`](https://ai-sdk.dev/docs/reference/ai-sdk-core/generate-text#headers) | Additional HTTP headers |
905
+ | [`providerOptions`](https://ai-sdk.dev/docs/reference/ai-sdk-types/generate-text#provideroptions) | Provider-specific options |
773
906
 
774
907
  ##### Embedding Model Options
775
908
 
776
- | Option | Description |
777
- |--------|-------------|
778
- | [`values`](https://ai-sdk.dev/docs/reference/ai-sdk-core/embed#values) | Override the values to embed |
779
- | [`headers`](https://ai-sdk.dev/docs/reference/ai-sdk-core/embed#headers) | Additional HTTP headers |
780
- | [`providerOptions`](https://ai-sdk.dev/docs/reference/ai-sdk-core/embed#provideroptions) | Provider-specific options |
909
+ | Option | Description |
910
+ | ---------------------------------------------------------------------------------------- | ---------------------------- |
911
+ | [`values`](https://ai-sdk.dev/docs/reference/ai-sdk-core/embed#values) | Override the values to embed |
912
+ | [`headers`](https://ai-sdk.dev/docs/reference/ai-sdk-core/embed#headers) | Additional HTTP headers |
913
+ | [`providerOptions`](https://ai-sdk.dev/docs/reference/ai-sdk-core/embed#provideroptions) | Provider-specific options |
781
914
 
782
915
  ##### Image Model Options
783
916
 
784
- | Option | Description |
785
- |--------|-------------|
786
- | [`n`](https://ai-sdk.dev/docs/reference/ai-sdk-core/generate-image#n) | Number of images to generate |
787
- | [`size`](https://ai-sdk.dev/docs/reference/ai-sdk-core/generate-image#size) | Size of generated images |
788
- | [`aspectRatio`](https://ai-sdk.dev/docs/reference/ai-sdk-core/generate-image#aspectratio) | Aspect ratio of generated images |
789
- | [`seed`](https://ai-sdk.dev/docs/reference/ai-sdk-core/generate-image#seed) | Random seed for reproducibility |
790
- | [`headers`](https://ai-sdk.dev/docs/reference/ai-sdk-core/generate-image#headers) | Additional HTTP headers |
791
- | [`providerOptions`](https://ai-sdk.dev/docs/reference/ai-sdk-core/generate-image#provideroptions) | Provider-specific options |
917
+ | Option | Description |
918
+ | ------------------------------------------------------------------------------------------------- | -------------------------------- |
919
+ | [`n`](https://ai-sdk.dev/docs/reference/ai-sdk-core/generate-image#n) | Number of images to generate |
920
+ | [`size`](https://ai-sdk.dev/docs/reference/ai-sdk-core/generate-image#size) | Size of generated images |
921
+ | [`aspectRatio`](https://ai-sdk.dev/docs/reference/ai-sdk-core/generate-image#aspectratio) | Aspect ratio of generated images |
922
+ | [`seed`](https://ai-sdk.dev/docs/reference/ai-sdk-core/generate-image#seed) | Random seed for reproducibility |
923
+ | [`headers`](https://ai-sdk.dev/docs/reference/ai-sdk-core/generate-image#headers) | Additional HTTP headers |
924
+ | [`providerOptions`](https://ai-sdk.dev/docs/reference/ai-sdk-core/generate-image#provideroptions) | Provider-specific options |
925
+
926
+ #### Dynamic Call Options
927
+
928
+ You can also override call options dynamically from inside the `onRetry` callback, instead of declaring them statically on the retry object. This is useful when the override depends on something only known at runtime, like the prompt that just failed, the model that's about to be tried next, or the error that triggered the retry. The overrides apply to the upcoming retry attempt only, and can change the same fields as the static `options` on a retry plus the request `timeout`. The callback may also be `async` if computing the override needs to do work (e.g. fetching a fresh credential).
929
+
930
+ A common use case is sanitizing provider-scoped metadata when falling back to a different provider, for example stripping `providerOptions.azure.itemId` references from the previous prompt before retrying on OpenAI:
931
+
932
+ ```typescript
933
+ import { createRetryable } from 'ai-retry';
934
+ import { azure } from '@ai-sdk/azure';
935
+ import { openai } from '@ai-sdk/openai';
936
+
937
+ const retryableModel = createRetryable({
938
+ model: azure('gpt-5-chat'),
939
+ retries: [openai('gpt-5-chat')],
940
+ onRetry: (context) => {
941
+ const { current, attempts } = context;
942
+ const previous = attempts.at(-1);
943
+
944
+ if (current.model.provider !== previous.model.provider) {
945
+ // Strip provider-scoped metadata from the prompt before retrying on a different provider
946
+ return {
947
+ options: {
948
+ prompt: stripProviderMetadata(current.options.prompt),
949
+ },
950
+ };
951
+ }
952
+ },
953
+ });
954
+ ```
955
+
956
+ Inside the `onRetry` callback, `context.current.model` is the model that's about to be tried next, while `context.current.options` and `context.current.error` describe the failed attempt that triggered the retry. The previous model is available at `context.attempts.at(-1).model`.
957
+
958
+ `onRetry` may also be `async`, which is useful if computing the override needs to do work (e.g. fetching a fresh credential):
959
+
960
+ ```typescript
961
+ const retryableModel = createRetryable({
962
+ model: openai('gpt-4o-mini'),
963
+ retries: [anthropic('claude-sonnet-4-20250514')],
964
+ onRetry: async (context) => {
965
+ const { current } = context;
966
+
967
+ const headers = await refreshAuthHeaders(current.model.provider);
968
+ return { options: { headers } };
969
+ },
970
+ });
971
+ ```
972
+
973
+ **Precedence** for the upcoming retry attempt (highest to lowest):
974
+
975
+ 1. The value returned from `onRetry`
976
+ 2. The `options` returned from the retryable
977
+ 3. The original call options from the request
792
978
 
793
979
  #### Logging
794
980
 
795
981
  You can use the following callbacks to log retry attempts and errors:
982
+
796
983
  - `onError` is invoked if an error occurs.
797
984
  - `onRetry` is invoked before attempting a retry.
798
985
  - `onSuccess` is invoked after a successful request with the model that handled it.
@@ -800,17 +987,24 @@ You can use the following callbacks to log retry attempts and errors:
800
987
  ```typescript
801
988
  const retryableModel = createRetryable({
802
989
  model: openai('gpt-4-mini'),
803
- retries: [/* your retryables */],
990
+ retries: [
991
+ /* your retryables */
992
+ ],
804
993
  onError: (context) => {
805
- console.error(`Attempt ${context.attempts.length} with ${context.current.model.provider}/${context.current.model.modelId} failed:`,
806
- context.current.error
994
+ console.error(
995
+ `Attempt ${context.attempts.length} with ${context.current.model.provider}/${context.current.model.modelId} failed:`,
996
+ context.current.error,
807
997
  );
808
998
  },
809
999
  onRetry: (context) => {
810
- console.log(`Retrying attempt ${context.attempts.length + 1} with model ${context.current.model.provider}/${context.current.model.modelId}...`);
1000
+ console.log(
1001
+ `Retrying attempt ${context.attempts.length + 1} with model ${context.current.model.provider}/${context.current.model.modelId}...`,
1002
+ );
811
1003
  },
812
1004
  onSuccess: (context) => {
813
- console.log(`Request handled by ${context.current.model.provider}/${context.current.model.modelId}`);
1005
+ console.log(
1006
+ `Request handled by ${context.current.model.provider}/${context.current.model.modelId}`,
1007
+ );
814
1008
  },
815
1009
  });
816
1010
  ```
@@ -819,11 +1013,11 @@ const retryableModel = createRetryable({
819
1013
 
820
1014
  By default, every new request starts with the base model, even if a previous request was retried with a different model. The `reset` option changes this behavior by making the last successfully retried model **sticky**, that means subsequent requests will continue using that model instead of switching back to the base model. The reset value controls how long the retry model stays sticky before resetting back to the base model.
821
1015
 
822
- | Value | Description |
823
- |-------|-------------|
824
- | `after-request` | Reset immediately after the next request (default) |
1016
+ | Value | Description |
1017
+ | ------------------ | ------------------------------------------------------------ |
1018
+ | `after-request` | Reset immediately after the next request (default) |
825
1019
  | `after-N-requests` | Keep the retry model for the next **N** requests, then reset |
826
- | `after-N-seconds` | Keep the retry model for **N** seconds, then reset |
1020
+ | `after-N-seconds` | Keep the retry model for **N** seconds, then reset |
827
1021
 
828
1022
  ##### Reset after each request (default)
829
1023
 
@@ -874,24 +1068,29 @@ In the second case, errors during stream processing will not always be retried,
874
1068
  Creates a retryable model that works with language models, embedding models, and image models.
875
1069
 
876
1070
  ```ts
877
- interface RetryableModelOptions<MODEL extends LanguageModelV3 | EmbeddingModelV3 | ImageModelV3> {
1071
+ interface RetryableModelOptions<
1072
+ MODEL extends LanguageModelV3 | EmbeddingModelV3 | ImageModelV3,
1073
+ > {
878
1074
  model: MODEL;
879
1075
  retries: Array<Retryable<MODEL> | MODEL>;
880
1076
  disabled?: boolean | (() => boolean);
881
1077
  reset?: Reset;
882
1078
  onError?: (context: RetryContext<MODEL>) => void;
883
- onRetry?: (context: RetryContext<MODEL>) => void;
1079
+ onRetry?: (
1080
+ context: RetryContext<MODEL>,
1081
+ ) => void | OnRetryOverrides<MODEL> | Promise<void | OnRetryOverrides<MODEL>>;
884
1082
  onSuccess?: (context: SuccessContext<MODEL>) => void;
885
1083
  }
886
1084
  ```
887
1085
 
888
1086
  **Options:**
1087
+
889
1088
  - `model`: The base model to use for the initial request.
890
1089
  - `retries`: Array of retryables (functions, models, or retry objects) to attempt on failure.
891
1090
  - `disabled`: Disable all retry logic. Can be a boolean or function returning boolean. Default: `false` (retries enabled).
892
1091
  - `reset`: Controls when to reset back to the base model after a successful retry. Default: `after-request`.
893
1092
  - `onError`: Callback invoked when an error occurs.
894
- - `onRetry`: Callback invoked before attempting a retry.
1093
+ - `onRetry`: Callback invoked before attempting a retry. May optionally return an `OnRetryOverrides` object (or a `Promise` of one) to override `options.*` and `timeout` for the upcoming attempt only. See [Dynamic Call Options via `onRetry`](#dynamic-call-options-via-onretry).
895
1094
  - `onSuccess`: Callback invoked after a successful request. Receives the model that handled the request and all previous attempts.
896
1095
 
897
1096
  #### `Reset`
@@ -915,9 +1114,7 @@ A `Retryable` is a function that receives a `RetryContext` with the current erro
915
1114
  It should evaluate the error/result and decide whether to retry by returning a `Retry` or to skip by returning `undefined`.
916
1115
 
917
1116
  ```ts
918
- type Retryable = (
919
- context: RetryContext
920
- ) => Retry | Promise<Retry> | undefined;
1117
+ type Retryable = (context: RetryContext) => Retry | Promise<Retry> | undefined;
921
1118
  ```
922
1119
 
923
1120
  #### `Retry`
@@ -927,12 +1124,15 @@ A `Retry` specifies the model to retry and optional settings. The available opti
927
1124
  ```typescript
928
1125
  interface Retry {
929
1126
  model: LanguageModelV3 | EmbeddingModelV3 | ImageModelV3;
930
- maxAttempts?: number; // Maximum retry attempts per model (default: 1)
931
- delay?: number; // Delay in milliseconds before retrying
932
- backoffFactor?: number; // Multiplier for exponential backoff
933
- timeout?: number; // Timeout in milliseconds for the retry attempt
1127
+ maxAttempts?: number; // Maximum retry attempts per model (default: 1)
1128
+ delay?: number; // Delay in milliseconds before retrying
1129
+ backoffFactor?: number; // Multiplier for exponential backoff
1130
+ timeout?: number; // Timeout in milliseconds for the retry attempt
934
1131
  providerOptions?: ProviderOptions; // @deprecated - use options.providerOptions instead
935
- options?: LanguageModelV3CallOptions | EmbeddingModelV3CallOptions | ImageModelV3CallOptions; // Call options to override for this retry
1132
+ options?:
1133
+ | LanguageModelV3CallOptions
1134
+ | EmbeddingModelV3CallOptions
1135
+ | ImageModelV3CallOptions; // Call options to override for this retry
936
1136
  }
937
1137
  ```
938
1138
 
@@ -966,8 +1166,15 @@ A `SuccessAttempt` represents the successful attempt with the model, result, and
966
1166
  interface SuccessAttempt {
967
1167
  type: 'success';
968
1168
  model: LanguageModelV3 | EmbeddingModelV3 | ImageModelV3;
969
- result: LanguageModelGenerate | LanguageModelStream | EmbeddingModelEmbed | ImageModelGenerate;
970
- options: LanguageModelV3CallOptions | EmbeddingModelV3CallOptions | ImageModelV3CallOptions;
1169
+ result:
1170
+ | LanguageModelGenerate
1171
+ | LanguageModelStream
1172
+ | EmbeddingModelEmbed
1173
+ | ImageModelGenerate;
1174
+ options:
1175
+ | LanguageModelV3CallOptions
1176
+ | EmbeddingModelV3CallOptions
1177
+ | ImageModelV3CallOptions;
971
1178
  }
972
1179
  ```
973
1180
 
@@ -982,7 +1189,10 @@ type RetryAttempt =
982
1189
  type: 'error';
983
1190
  error: unknown;
984
1191
  model: LanguageModelV3 | EmbeddingModelV3 | ImageModelV3;
985
- options: LanguageModelV3CallOptions | EmbeddingModelV3CallOptions | ImageModelV3CallOptions;
1192
+ options:
1193
+ | LanguageModelV3CallOptions
1194
+ | EmbeddingModelV3CallOptions
1195
+ | ImageModelV3CallOptions;
986
1196
  }
987
1197
  | {
988
1198
  type: 'result';