ai-sdk-rate-limiter 0.1.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -77,7 +77,7 @@ const limiter = createRateLimiter({
77
77
  daily: 50,
78
78
  monthly: 500,
79
79
  },
80
- onExceeded: 'throw', // or 'queue' wait until the period resets
80
+ onExceeded: 'throw', // 'throw' | 'queue' | 'fallback'
81
81
  },
82
82
 
83
83
  // Queue behavior
@@ -169,6 +169,49 @@ Costs are based on **actual token counts** from API responses — not estimates.
169
169
 
170
170
  ---
171
171
 
172
+ ## Budget fallback routing
173
+
174
+ When a budget limit is hit, you can transparently reroute to a cheaper model instead of throwing an error. Pass a `fallback` option to `wrap()`:
175
+
176
+ ```typescript
177
+ const limiter = createRateLimiter({
178
+ cost: {
179
+ budget: { daily: 10 },
180
+ onExceeded: 'fallback', // reroute to fallback instead of throwing
181
+ },
182
+ on: {
183
+ budgetHit: ({ model, currentCostUsd, limitUsd, period }) =>
184
+ console.warn(`${model} ${period} budget hit ($${currentCostUsd} of $${limitUsd})`),
185
+ },
186
+ })
187
+
188
+ const model = limiter.wrap(
189
+ openai('gpt-4o'), // primary model
190
+ { fallback: openai('gpt-4o-mini') }, // used when budget is exceeded
191
+ )
192
+
193
+ // Under budget → uses gpt-4o normally
194
+ // Over $10/day → silently switches to gpt-4o-mini, no code changes needed
195
+ const result = await generateText({ model, prompt })
196
+ ```
197
+
198
+ **How it works:**
199
+ 1. The budget is checked before every request against total rolling spend
200
+ 2. When exceeded, `BudgetExceededError` is caught inside `wrap()` before it reaches your code
201
+ 3. The request is re-executed against the fallback model, bypassing the budget pre-check
202
+ 4. Fallback usage is tracked under the fallback model's ID in `getCostReport()`
203
+
204
+ **Behavior matrix:**
205
+
206
+ | `onExceeded` | `fallback` configured | Outcome |
207
+ |---|---|---|
208
+ | `'throw'` | any | Throws `BudgetExceededError` |
209
+ | `'fallback'` | yes | Transparently uses fallback model |
210
+ | `'fallback'` | no | Throws `BudgetExceededError` |
211
+ | `'queue'` | any | Queues until period resets |
212
+
213
+ ---
214
+
172
215
  ## Backpressure — know before you send
173
216
 
174
217
  Check estimated wait time before committing to a request. Useful for showing loading states or shedding load gracefully.
@@ -209,7 +252,7 @@ limiter.off('queued', handler)
209
252
  | `dequeued` | Request leaves the queue | `model`, `waitedMs`, `priority` |
210
253
  | `retrying` | A failed request is about to retry | `model`, `attempt`, `maxAttempts`, `delayMs`, `error` |
211
254
  | `rateLimited` | Limit hit (local or remote 429) | `model`, `source`, `limitType`, `resetAt` |
212
- | `budgetHit` | Cost budget exceeded | `model`, `currentCostUsd`, `limitUsd`, `period` |
255
+ | `budgetHit` | Cost budget exceeded | `model`, `currentCostUsd`, `limitUsd`, `period`, `usingFallback` |
213
256
  | `dropped` | Request rejected (queue full or timeout) | `model`, `reason` |
214
257
  | `completed` | Request finished successfully | `model`, `inputTokens`, `outputTokens`, `costUsd`, `latencyMs` |
215
258
 
@@ -304,7 +347,7 @@ const model = req.user.plan === 'paid'
304
347
 
305
348
  ## Built-in model registry
306
349
 
307
- Limits and pricing are built-in for every major model. These defaults are Tier 1 (most conservative) — override with your actual tier limits.
350
+ Limits and pricing are built-in for every major model across 6 providers. Defaults are conservative (free/Tier 1) — override with your actual plan limits.
308
351
 
309
352
  **OpenAI**
310
353
 
@@ -332,13 +375,54 @@ Limits and pricing are built-in for every major model. These defaults are Tier 1
332
375
  | gemini-1.5-pro | 2 | 32,000 | $1.25 | $5.00 |
333
376
  | gemini-1.5-flash | 15 | 1,000,000 | $0.075 | $0.30 |
334
377
 
378
+ **Groq** (free tier defaults — on-demand tier is 6,000 RPM / 200k TPM)
379
+
380
+ | Model | RPM | ITPM | Input $/M | Output $/M |
381
+ |---|---|---|---|---|
382
+ | llama-3.3-70b-versatile | 30 | 6,000 | $0.59 | $0.79 |
383
+ | llama-3.1-8b-instant | 30 | 20,000 | $0.05 | $0.08 |
384
+ | mixtral-8x7b-32768 | 30 | 5,000 | $0.24 | $0.24 |
385
+ | gemma2-9b-it | 30 | 15,000 | $0.20 | $0.20 |
386
+ | deepseek-r1-distill-llama-70b | 30 | 6,000 | $0.75 | $0.99 |
387
+
388
+ **Mistral**
389
+
390
+ | Model | RPM | ITPM | Input $/M | Output $/M |
391
+ |---|---|---|---|---|
392
+ | mistral-large-latest | 500 | 100,000 | $2.00 | $6.00 |
393
+ | mistral-small-latest | 500 | 100,000 | $0.10 | $0.30 |
394
+ | codestral-latest | 500 | 100,000 | $0.30 | $0.90 |
395
+ | open-mistral-nemo | 500 | 100,000 | $0.15 | $0.15 |
396
+ | pixtral-large-latest | 500 | 100,000 | $2.00 | $6.00 |
397
+
398
+ **Cohere** (trial tier defaults — production tier is 10,000+ RPM)
399
+
400
+ | Model | RPM | ITPM | Input $/M | Output $/M |
401
+ |---|---|---|---|---|
402
+ | command-r-plus | 20 | 100,000 | $2.50 | $10.00 |
403
+ | command-r | 20 | 100,000 | $0.15 | $0.60 |
404
+ | command | 20 | 100,000 | $0.50 | $1.50 |
405
+ | command-light | 20 | 100,000 | $0.15 | $0.60 |
406
+
335
407
  Unknown models fall back to 60 RPM / 100k ITPM with no cost tracking. You can inspect or extend the registry:
336
408
 
337
409
  ```typescript
338
- import { OPENAI_MODELS, ANTHROPIC_MODELS, resolveModelLimits, isKnownModel } from 'ai-sdk-rate-limiter'
410
+ import {
411
+ OPENAI_MODELS,
412
+ ANTHROPIC_MODELS,
413
+ GOOGLE_MODELS,
414
+ GROQ_MODELS,
415
+ MISTRAL_MODELS,
416
+ COHERE_MODELS,
417
+ resolveModelLimits,
418
+ isKnownModel,
419
+ } from 'ai-sdk-rate-limiter'
420
+
421
+ console.log(GROQ_MODELS['llama-3.3-70b-versatile'])
422
+ // { rpm: 30, itpm: 6000, rpd: 1000, inputPricePerMillion: 0.59, ... }
339
423
 
340
- console.log(OPENAI_MODELS['gpt-4o'])
341
- // { rpm: 500, itpm: 30000, otpm: 30000, inputPricePerMillion: 2.5, ... }
424
+ console.log(isKnownModel('llama-3.3-70b-versatile', 'groq'))
425
+ // true
342
426
 
343
427
  console.log(isKnownModel('my-fine-tune', 'openai'))
344
428
  // false → will use fallback limits
package/dist/index.cjs CHANGED
@@ -317,7 +317,7 @@ var CostTracker = class {
317
317
  ];
318
318
  for (const { limit, current, period } of checks) {
319
319
  if (limit !== void 0 && current + estimatedCostUsd > limit) {
320
- if (onExceeded === "throw") {
320
+ if (onExceeded === "throw" || onExceeded === "fallback") {
321
321
  throw new BudgetExceededError(model, current, limit, period);
322
322
  }
323
323
  return false;
@@ -873,6 +873,341 @@ var GOOGLE_MODELS = {
873
873
  }
874
874
  };
875
875
 
876
+ // src/registry/groq.ts
877
+ var GROQ_MODELS = {
878
+ // -------------------------------------------------------------------------
879
+ // Llama 3.3 family
880
+ // -------------------------------------------------------------------------
881
+ "llama-3.3-70b-versatile": {
882
+ rpm: 30,
883
+ itpm: 6e3,
884
+ otpm: 6e3,
885
+ rpd: 1e3,
886
+ inputPricePerMillion: 0.59,
887
+ outputPricePerMillion: 0.79
888
+ },
889
+ "llama-3.3-70b-specdec": {
890
+ rpm: 30,
891
+ itpm: 6e3,
892
+ otpm: 6e3,
893
+ rpd: 1e3,
894
+ inputPricePerMillion: 0.59,
895
+ outputPricePerMillion: 0.99
896
+ },
897
+ // -------------------------------------------------------------------------
898
+ // Llama 3.1 family
899
+ // -------------------------------------------------------------------------
900
+ "llama-3.1-8b-instant": {
901
+ rpm: 30,
902
+ itpm: 2e4,
903
+ otpm: 2e4,
904
+ rpd: 14400,
905
+ inputPricePerMillion: 0.05,
906
+ outputPricePerMillion: 0.08
907
+ },
908
+ "llama-3.1-70b-versatile": {
909
+ rpm: 30,
910
+ itpm: 6e3,
911
+ otpm: 6e3,
912
+ rpd: 1e3,
913
+ inputPricePerMillion: 0.59,
914
+ outputPricePerMillion: 0.79
915
+ },
916
+ // -------------------------------------------------------------------------
917
+ // Llama 3 family
918
+ // -------------------------------------------------------------------------
919
+ "llama3-70b-8192": {
920
+ rpm: 30,
921
+ itpm: 6e3,
922
+ otpm: 6e3,
923
+ rpd: 14400,
924
+ inputPricePerMillion: 0.59,
925
+ outputPricePerMillion: 0.79
926
+ },
927
+ "llama3-8b-8192": {
928
+ rpm: 30,
929
+ itpm: 3e4,
930
+ otpm: 3e4,
931
+ rpd: 14400,
932
+ inputPricePerMillion: 0.05,
933
+ outputPricePerMillion: 0.08
934
+ },
935
+ "llama-guard-3-8b": {
936
+ rpm: 30,
937
+ itpm: 15e3,
938
+ otpm: 15e3,
939
+ rpd: 14400,
940
+ inputPricePerMillion: 0.2,
941
+ outputPricePerMillion: 0.2
942
+ },
943
+ // -------------------------------------------------------------------------
944
+ // Mixtral family
945
+ // -------------------------------------------------------------------------
946
+ "mixtral-8x7b-32768": {
947
+ rpm: 30,
948
+ itpm: 5e3,
949
+ otpm: 5e3,
950
+ rpd: 14400,
951
+ inputPricePerMillion: 0.24,
952
+ outputPricePerMillion: 0.24
953
+ },
954
+ // -------------------------------------------------------------------------
955
+ // Gemma family
956
+ // -------------------------------------------------------------------------
957
+ "gemma2-9b-it": {
958
+ rpm: 30,
959
+ itpm: 15e3,
960
+ otpm: 15e3,
961
+ rpd: 14400,
962
+ inputPricePerMillion: 0.2,
963
+ outputPricePerMillion: 0.2
964
+ },
965
+ "gemma-7b-it": {
966
+ rpm: 30,
967
+ itpm: 15e3,
968
+ otpm: 15e3,
969
+ rpd: 14400,
970
+ inputPricePerMillion: 0.07,
971
+ outputPricePerMillion: 0.07
972
+ },
973
+ // -------------------------------------------------------------------------
974
+ // Deepseek family
975
+ // -------------------------------------------------------------------------
976
+ "deepseek-r1-distill-llama-70b": {
977
+ rpm: 30,
978
+ itpm: 6e3,
979
+ otpm: 6e3,
980
+ rpd: 1e3,
981
+ inputPricePerMillion: 0.75,
982
+ outputPricePerMillion: 0.99
983
+ },
984
+ "deepseek-r1-distill-qwen-32b": {
985
+ rpm: 30,
986
+ itpm: 6e3,
987
+ otpm: 6e3,
988
+ rpd: 1e3,
989
+ inputPricePerMillion: 0.69,
990
+ outputPricePerMillion: 0.69
991
+ }
992
+ };
993
+
994
+ // src/registry/mistral.ts
995
+ var MISTRAL_MODELS = {
996
+ // -------------------------------------------------------------------------
997
+ // Mistral Large — frontier model
998
+ // -------------------------------------------------------------------------
999
+ "mistral-large-latest": {
1000
+ rpm: 500,
1001
+ itpm: 1e5,
1002
+ otpm: 1e5,
1003
+ inputPricePerMillion: 2,
1004
+ outputPricePerMillion: 6
1005
+ },
1006
+ "mistral-large-2411": {
1007
+ rpm: 500,
1008
+ itpm: 1e5,
1009
+ otpm: 1e5,
1010
+ inputPricePerMillion: 2,
1011
+ outputPricePerMillion: 6
1012
+ },
1013
+ "mistral-large-2407": {
1014
+ rpm: 500,
1015
+ itpm: 1e5,
1016
+ otpm: 1e5,
1017
+ inputPricePerMillion: 2,
1018
+ outputPricePerMillion: 6
1019
+ },
1020
+ // -------------------------------------------------------------------------
1021
+ // Mistral Small — efficient, low-cost
1022
+ // -------------------------------------------------------------------------
1023
+ "mistral-small-latest": {
1024
+ rpm: 500,
1025
+ itpm: 1e5,
1026
+ otpm: 1e5,
1027
+ inputPricePerMillion: 0.1,
1028
+ outputPricePerMillion: 0.3
1029
+ },
1030
+ "mistral-small-2409": {
1031
+ rpm: 500,
1032
+ itpm: 1e5,
1033
+ otpm: 1e5,
1034
+ inputPricePerMillion: 0.1,
1035
+ outputPricePerMillion: 0.3
1036
+ },
1037
+ // -------------------------------------------------------------------------
1038
+ // Pixtral Large — multimodal
1039
+ // -------------------------------------------------------------------------
1040
+ "pixtral-large-latest": {
1041
+ rpm: 500,
1042
+ itpm: 1e5,
1043
+ otpm: 1e5,
1044
+ inputPricePerMillion: 2,
1045
+ outputPricePerMillion: 6
1046
+ },
1047
+ "pixtral-large-2411": {
1048
+ rpm: 500,
1049
+ itpm: 1e5,
1050
+ otpm: 1e5,
1051
+ inputPricePerMillion: 2,
1052
+ outputPricePerMillion: 6
1053
+ },
1054
+ "pixtral-12b": {
1055
+ rpm: 500,
1056
+ itpm: 1e5,
1057
+ otpm: 1e5,
1058
+ inputPricePerMillion: 0.15,
1059
+ outputPricePerMillion: 0.15
1060
+ },
1061
+ "pixtral-12b-2409": {
1062
+ rpm: 500,
1063
+ itpm: 1e5,
1064
+ otpm: 1e5,
1065
+ inputPricePerMillion: 0.15,
1066
+ outputPricePerMillion: 0.15
1067
+ },
1068
+ // -------------------------------------------------------------------------
1069
+ // Codestral — code-optimized
1070
+ // -------------------------------------------------------------------------
1071
+ "codestral-latest": {
1072
+ rpm: 500,
1073
+ itpm: 1e5,
1074
+ otpm: 1e5,
1075
+ inputPricePerMillion: 0.3,
1076
+ outputPricePerMillion: 0.9
1077
+ },
1078
+ "codestral-2501": {
1079
+ rpm: 500,
1080
+ itpm: 1e5,
1081
+ otpm: 1e5,
1082
+ inputPricePerMillion: 0.3,
1083
+ outputPricePerMillion: 0.9
1084
+ },
1085
+ // -------------------------------------------------------------------------
1086
+ // Open models (free / self-hosted weights available)
1087
+ // -------------------------------------------------------------------------
1088
+ "open-mistral-nemo": {
1089
+ rpm: 500,
1090
+ itpm: 1e5,
1091
+ otpm: 1e5,
1092
+ inputPricePerMillion: 0.15,
1093
+ outputPricePerMillion: 0.15
1094
+ },
1095
+ "open-mixtral-8x22b": {
1096
+ rpm: 500,
1097
+ itpm: 1e5,
1098
+ otpm: 1e5,
1099
+ inputPricePerMillion: 2,
1100
+ outputPricePerMillion: 6
1101
+ },
1102
+ "open-mixtral-8x7b": {
1103
+ rpm: 500,
1104
+ itpm: 1e5,
1105
+ otpm: 1e5,
1106
+ inputPricePerMillion: 0.7,
1107
+ outputPricePerMillion: 0.7
1108
+ },
1109
+ "open-mistral-7b": {
1110
+ rpm: 500,
1111
+ itpm: 1e5,
1112
+ otpm: 1e5,
1113
+ inputPricePerMillion: 0.25,
1114
+ outputPricePerMillion: 0.25
1115
+ },
1116
+ // -------------------------------------------------------------------------
1117
+ // Mistral Embed — embedding only (no RPM-based generation limits)
1118
+ // -------------------------------------------------------------------------
1119
+ "mistral-embed": {
1120
+ rpm: 500,
1121
+ itpm: 1e5,
1122
+ otpm: 0,
1123
+ inputPricePerMillion: 0.1,
1124
+ outputPricePerMillion: 0
1125
+ }
1126
+ };
1127
+
1128
+ // src/registry/cohere.ts
1129
+ var COHERE_MODELS = {
1130
+ // -------------------------------------------------------------------------
1131
+ // Command R+ — highest capability
1132
+ // -------------------------------------------------------------------------
1133
+ "command-r-plus": {
1134
+ rpm: 20,
1135
+ itpm: 1e5,
1136
+ otpm: 1e5,
1137
+ inputPricePerMillion: 2.5,
1138
+ outputPricePerMillion: 10
1139
+ },
1140
+ "command-r-plus-08-2024": {
1141
+ rpm: 20,
1142
+ itpm: 1e5,
1143
+ otpm: 1e5,
1144
+ inputPricePerMillion: 2.5,
1145
+ outputPricePerMillion: 10
1146
+ },
1147
+ "command-r-plus-04-2024": {
1148
+ rpm: 20,
1149
+ itpm: 1e5,
1150
+ otpm: 1e5,
1151
+ inputPricePerMillion: 2.5,
1152
+ outputPricePerMillion: 10
1153
+ },
1154
+ // -------------------------------------------------------------------------
1155
+ // Command R — balanced, RAG-optimized
1156
+ // -------------------------------------------------------------------------
1157
+ "command-r": {
1158
+ rpm: 20,
1159
+ itpm: 1e5,
1160
+ otpm: 1e5,
1161
+ inputPricePerMillion: 0.15,
1162
+ outputPricePerMillion: 0.6
1163
+ },
1164
+ "command-r-08-2024": {
1165
+ rpm: 20,
1166
+ itpm: 1e5,
1167
+ otpm: 1e5,
1168
+ inputPricePerMillion: 0.15,
1169
+ outputPricePerMillion: 0.6
1170
+ },
1171
+ "command-r-03-2024": {
1172
+ rpm: 20,
1173
+ itpm: 1e5,
1174
+ otpm: 1e5,
1175
+ inputPricePerMillion: 0.15,
1176
+ outputPricePerMillion: 0.6
1177
+ },
1178
+ // -------------------------------------------------------------------------
1179
+ // Command — legacy general-purpose
1180
+ // -------------------------------------------------------------------------
1181
+ "command": {
1182
+ rpm: 20,
1183
+ itpm: 1e5,
1184
+ otpm: 1e5,
1185
+ inputPricePerMillion: 0.5,
1186
+ outputPricePerMillion: 1.5
1187
+ },
1188
+ "command-nightly": {
1189
+ rpm: 20,
1190
+ itpm: 1e5,
1191
+ otpm: 1e5,
1192
+ inputPricePerMillion: 0.5,
1193
+ outputPricePerMillion: 1.5
1194
+ },
1195
+ "command-light": {
1196
+ rpm: 20,
1197
+ itpm: 1e5,
1198
+ otpm: 1e5,
1199
+ inputPricePerMillion: 0.15,
1200
+ outputPricePerMillion: 0.6
1201
+ },
1202
+ "command-light-nightly": {
1203
+ rpm: 20,
1204
+ itpm: 1e5,
1205
+ otpm: 1e5,
1206
+ inputPricePerMillion: 0.15,
1207
+ outputPricePerMillion: 0.6
1208
+ }
1209
+ };
1210
+
876
1211
  // src/registry/index.ts
877
1212
  var FALLBACK_LIMITS = {
878
1213
  rpm: 60,
@@ -919,7 +1254,22 @@ function getFromRegistry(modelId, provider) {
919
1254
  const stripped = modelId.replace(/^(google|vertex)\//, "");
920
1255
  if (GOOGLE_MODELS[stripped]) return GOOGLE_MODELS[stripped];
921
1256
  }
922
- return OPENAI_MODELS[modelId] ?? ANTHROPIC_MODELS[modelId] ?? GOOGLE_MODELS[modelId];
1257
+ if (provider === "groq") {
1258
+ if (GROQ_MODELS[modelId]) return GROQ_MODELS[modelId];
1259
+ const stripped = modelId.replace(/^groq\//, "");
1260
+ if (GROQ_MODELS[stripped]) return GROQ_MODELS[stripped];
1261
+ }
1262
+ if (provider === "mistral") {
1263
+ if (MISTRAL_MODELS[modelId]) return MISTRAL_MODELS[modelId];
1264
+ const stripped = modelId.replace(/^mistral\//, "");
1265
+ if (MISTRAL_MODELS[stripped]) return MISTRAL_MODELS[stripped];
1266
+ }
1267
+ if (provider === "cohere") {
1268
+ if (COHERE_MODELS[modelId]) return COHERE_MODELS[modelId];
1269
+ const stripped = modelId.replace(/^cohere\//, "");
1270
+ if (COHERE_MODELS[stripped]) return COHERE_MODELS[stripped];
1271
+ }
1272
+ return OPENAI_MODELS[modelId] ?? ANTHROPIC_MODELS[modelId] ?? GOOGLE_MODELS[modelId] ?? GROQ_MODELS[modelId] ?? MISTRAL_MODELS[modelId] ?? COHERE_MODELS[modelId];
923
1273
  }
924
1274
  function isKnownModel(modelId, provider) {
925
1275
  return getFromRegistry(modelId, normalizeProvider(provider)) !== void 0;
@@ -970,7 +1320,7 @@ var Pipeline = class {
970
1320
  const estimatedInput = estimateInputTokens(prompt);
971
1321
  const startMs = Date.now();
972
1322
  const key = `${provider}:${modelId}`;
973
- if (this.config.cost?.budget) {
1323
+ if (this.config.cost?.budget && !opts.skipBudgetCheck) {
974
1324
  const estimatedCost = this.costTracker.estimateCost(
975
1325
  estimatedInput,
976
1326
  500,
@@ -978,12 +1328,26 @@ var Pipeline = class {
978
1328
  limits.inputPricePerMillion,
979
1329
  limits.outputPricePerMillion
980
1330
  );
981
- this.costTracker.checkBudget(
982
- modelId,
983
- estimatedCost,
984
- this.config.cost.budget,
985
- this.config.cost.onExceeded ?? "throw"
986
- );
1331
+ try {
1332
+ this.costTracker.checkBudget(
1333
+ modelId,
1334
+ estimatedCost,
1335
+ this.config.cost.budget,
1336
+ this.config.cost.onExceeded ?? "throw"
1337
+ );
1338
+ } catch (err) {
1339
+ if (err instanceof BudgetExceededError) {
1340
+ this.emitter.emit("budgetHit", {
1341
+ model: err.model,
1342
+ provider,
1343
+ currentCostUsd: err.currentCostUsd,
1344
+ limitUsd: err.limitUsd,
1345
+ period: err.period,
1346
+ usingFallback: false
1347
+ });
1348
+ }
1349
+ throw err;
1350
+ }
987
1351
  }
988
1352
  await this.engine.acquire(key, {
989
1353
  limits,
@@ -1122,7 +1486,8 @@ function getPerRequestOptions(params, queueTimeout) {
1122
1486
  return {
1123
1487
  priority: raw?.priority ?? "normal",
1124
1488
  timeoutMs: raw?.timeout ?? queueTimeout,
1125
- metadata: raw?.metadata ?? {}
1489
+ metadata: raw?.metadata ?? {},
1490
+ skipBudgetCheck: raw?._skipBudgetCheck ?? false
1126
1491
  };
1127
1492
  }
1128
1493
  function extractTokenUsage(usage) {
@@ -1138,7 +1503,7 @@ function createMiddleware(pipeline, queueTimeout) {
1138
1503
  // wrapGenerate — non-streaming
1139
1504
  // -----------------------------------------------------------------------
1140
1505
  async wrapGenerate({ doGenerate, params, model }) {
1141
- const { priority, timeoutMs } = getPerRequestOptions(params, queueTimeout);
1506
+ const { priority, timeoutMs, skipBudgetCheck } = getPerRequestOptions(params, queueTimeout);
1142
1507
  const modelId = model.modelId;
1143
1508
  const provider = model.provider;
1144
1509
  const startMs = Date.now();
@@ -1151,6 +1516,7 @@ function createMiddleware(pipeline, queueTimeout) {
1151
1516
  streaming: false,
1152
1517
  priority,
1153
1518
  timeoutMs,
1519
+ skipBudgetCheck,
1154
1520
  onUsage: () => {
1155
1521
  }
1156
1522
  }
@@ -1165,7 +1531,7 @@ function createMiddleware(pipeline, queueTimeout) {
1165
1531
  // wrapStream — streaming
1166
1532
  // -----------------------------------------------------------------------
1167
1533
  async wrapStream({ doStream, params, model }) {
1168
- const { priority, timeoutMs } = getPerRequestOptions(params, queueTimeout);
1534
+ const { priority, timeoutMs, skipBudgetCheck } = getPerRequestOptions(params, queueTimeout);
1169
1535
  const modelId = model.modelId;
1170
1536
  const provider = model.provider;
1171
1537
  const startMs = Date.now();
@@ -1178,6 +1544,7 @@ function createMiddleware(pipeline, queueTimeout) {
1178
1544
  streaming: true,
1179
1545
  priority,
1180
1546
  timeoutMs,
1547
+ skipBudgetCheck,
1181
1548
  onUsage: () => {
1182
1549
  }
1183
1550
  }
@@ -1204,26 +1571,71 @@ function createMiddleware(pipeline, queueTimeout) {
1204
1571
  function wrapModel(model, middleware, overrides) {
1205
1572
  const providerId = overrides?.providerId ?? model.provider;
1206
1573
  const modelId = overrides?.modelId ?? model.modelId;
1574
+ const fallbackModel = overrides?.fallback;
1207
1575
  return {
1208
1576
  specificationVersion: "v4",
1209
1577
  provider: providerId,
1210
1578
  modelId,
1211
1579
  supportedUrls: model["supportedUrls"],
1212
1580
  async doGenerate(params) {
1213
- return middleware.wrapGenerate({
1214
- doGenerate: () => model.doGenerate(params),
1215
- doStream: () => model.doStream(params),
1216
- params,
1217
- model
1218
- });
1581
+ try {
1582
+ return await middleware.wrapGenerate({
1583
+ doGenerate: () => model.doGenerate(params),
1584
+ doStream: () => model.doStream(params),
1585
+ params,
1586
+ model
1587
+ });
1588
+ } catch (err) {
1589
+ if (err instanceof BudgetExceededError && fallbackModel) {
1590
+ const fallbackParams = {
1591
+ ...params,
1592
+ providerOptions: {
1593
+ ...params.providerOptions,
1594
+ rateLimiter: {
1595
+ ...params.providerOptions?.["rateLimiter"] ?? {},
1596
+ _skipBudgetCheck: true
1597
+ }
1598
+ }
1599
+ };
1600
+ return middleware.wrapGenerate({
1601
+ doGenerate: () => fallbackModel.doGenerate(fallbackParams),
1602
+ doStream: () => fallbackModel.doStream(fallbackParams),
1603
+ params: fallbackParams,
1604
+ model: fallbackModel
1605
+ });
1606
+ }
1607
+ throw err;
1608
+ }
1219
1609
  },
1220
1610
  async doStream(params) {
1221
- return middleware.wrapStream({
1222
- doGenerate: () => model.doGenerate(params),
1223
- doStream: () => model.doStream(params),
1224
- params,
1225
- model
1226
- });
1611
+ try {
1612
+ return await middleware.wrapStream({
1613
+ doGenerate: () => model.doGenerate(params),
1614
+ doStream: () => model.doStream(params),
1615
+ params,
1616
+ model
1617
+ });
1618
+ } catch (err) {
1619
+ if (err instanceof BudgetExceededError && fallbackModel) {
1620
+ const fallbackParams = {
1621
+ ...params,
1622
+ providerOptions: {
1623
+ ...params.providerOptions,
1624
+ rateLimiter: {
1625
+ ...params.providerOptions?.["rateLimiter"] ?? {},
1626
+ _skipBudgetCheck: true
1627
+ }
1628
+ }
1629
+ };
1630
+ return middleware.wrapStream({
1631
+ doGenerate: () => fallbackModel.doGenerate(fallbackParams),
1632
+ doStream: () => fallbackModel.doStream(fallbackParams),
1633
+ params: fallbackParams,
1634
+ model: fallbackModel
1635
+ });
1636
+ }
1637
+ throw err;
1638
+ }
1227
1639
  }
1228
1640
  };
1229
1641
  }
@@ -1260,7 +1672,10 @@ function createRateLimiter(config = {}) {
1260
1672
 
1261
1673
  exports.ANTHROPIC_MODELS = ANTHROPIC_MODELS;
1262
1674
  exports.BudgetExceededError = BudgetExceededError;
1675
+ exports.COHERE_MODELS = COHERE_MODELS;
1263
1676
  exports.GOOGLE_MODELS = GOOGLE_MODELS;
1677
+ exports.GROQ_MODELS = GROQ_MODELS;
1678
+ exports.MISTRAL_MODELS = MISTRAL_MODELS;
1264
1679
  exports.OPENAI_MODELS = OPENAI_MODELS;
1265
1680
  exports.QueueFullError = QueueFullError;
1266
1681
  exports.QueueTimeoutError = QueueTimeoutError;