ai-sdk-rate-limiter 0.1.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +90 -6
- package/dist/index.cjs +439 -24
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +67 -4
- package/dist/index.d.ts +67 -4
- package/dist/index.js +437 -25
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -77,7 +77,7 @@ const limiter = createRateLimiter({
|
|
|
77
77
|
daily: 50,
|
|
78
78
|
monthly: 500,
|
|
79
79
|
},
|
|
80
|
-
onExceeded: 'throw', //
|
|
80
|
+
onExceeded: 'throw', // 'throw' | 'queue' | 'fallback'
|
|
81
81
|
},
|
|
82
82
|
|
|
83
83
|
// Queue behavior
|
|
@@ -169,6 +169,49 @@ Costs are based on **actual token counts** from API responses — not estimates.
|
|
|
169
169
|
|
|
170
170
|
---
|
|
171
171
|
|
|
172
|
+
## Budget fallback routing
|
|
173
|
+
|
|
174
|
+
When a budget limit is hit, you can transparently reroute to a cheaper model instead of throwing an error. Pass a `fallback` option to `wrap()`:
|
|
175
|
+
|
|
176
|
+
```typescript
|
|
177
|
+
const limiter = createRateLimiter({
|
|
178
|
+
cost: {
|
|
179
|
+
budget: { daily: 10 },
|
|
180
|
+
onExceeded: 'fallback', // reroute to fallback instead of throwing
|
|
181
|
+
},
|
|
182
|
+
on: {
|
|
183
|
+
budgetHit: ({ model, currentCostUsd, limitUsd, period }) =>
|
|
184
|
+
console.warn(`${model} ${period} budget hit ($${currentCostUsd} of $${limitUsd})`),
|
|
185
|
+
},
|
|
186
|
+
})
|
|
187
|
+
|
|
188
|
+
const model = limiter.wrap(
|
|
189
|
+
openai('gpt-4o'), // primary model
|
|
190
|
+
{ fallback: openai('gpt-4o-mini') }, // used when budget is exceeded
|
|
191
|
+
)
|
|
192
|
+
|
|
193
|
+
// Under budget → uses gpt-4o normally
|
|
194
|
+
// Over $10/day → silently switches to gpt-4o-mini, no code changes needed
|
|
195
|
+
const result = await generateText({ model, prompt })
|
|
196
|
+
```
|
|
197
|
+
|
|
198
|
+
**How it works:**
|
|
199
|
+
1. The budget is checked before every request against total rolling spend
|
|
200
|
+
2. When exceeded, `BudgetExceededError` is caught inside `wrap()` before it reaches your code
|
|
201
|
+
3. The request is re-executed against the fallback model, bypassing the budget pre-check
|
|
202
|
+
4. Fallback usage is tracked under the fallback model's ID in `getCostReport()`
|
|
203
|
+
|
|
204
|
+
**Behavior matrix:**
|
|
205
|
+
|
|
206
|
+
| `onExceeded` | `fallback` configured | Outcome |
|
|
207
|
+
|---|---|---|
|
|
208
|
+
| `'throw'` | any | Throws `BudgetExceededError` |
|
|
209
|
+
| `'fallback'` | yes | Transparently uses fallback model |
|
|
210
|
+
| `'fallback'` | no | Throws `BudgetExceededError` |
|
|
211
|
+
| `'queue'` | any | Queues until period resets |
|
|
212
|
+
|
|
213
|
+
---
|
|
214
|
+
|
|
172
215
|
## Backpressure — know before you send
|
|
173
216
|
|
|
174
217
|
Check estimated wait time before committing to a request. Useful for showing loading states or shedding load gracefully.
|
|
@@ -209,7 +252,7 @@ limiter.off('queued', handler)
|
|
|
209
252
|
| `dequeued` | Request leaves the queue | `model`, `waitedMs`, `priority` |
|
|
210
253
|
| `retrying` | A failed request is about to retry | `model`, `attempt`, `maxAttempts`, `delayMs`, `error` |
|
|
211
254
|
| `rateLimited` | Limit hit (local or remote 429) | `model`, `source`, `limitType`, `resetAt` |
|
|
212
|
-
| `budgetHit` | Cost budget exceeded | `model`, `currentCostUsd`, `limitUsd`, `period` |
|
|
255
|
+
| `budgetHit` | Cost budget exceeded | `model`, `currentCostUsd`, `limitUsd`, `period`, `usingFallback` |
|
|
213
256
|
| `dropped` | Request rejected (queue full or timeout) | `model`, `reason` |
|
|
214
257
|
| `completed` | Request finished successfully | `model`, `inputTokens`, `outputTokens`, `costUsd`, `latencyMs` |
|
|
215
258
|
|
|
@@ -304,7 +347,7 @@ const model = req.user.plan === 'paid'
|
|
|
304
347
|
|
|
305
348
|
## Built-in model registry
|
|
306
349
|
|
|
307
|
-
Limits and pricing are built-in for every major model.
|
|
350
|
+
Limits and pricing are built-in for every major model across 6 providers. Defaults are conservative (free/Tier 1) — override with your actual plan limits.
|
|
308
351
|
|
|
309
352
|
**OpenAI**
|
|
310
353
|
|
|
@@ -332,13 +375,54 @@ Limits and pricing are built-in for every major model. These defaults are Tier 1
|
|
|
332
375
|
| gemini-1.5-pro | 2 | 32,000 | $1.25 | $5.00 |
|
|
333
376
|
| gemini-1.5-flash | 15 | 1,000,000 | $0.075 | $0.30 |
|
|
334
377
|
|
|
378
|
+
**Groq** (free tier defaults — on-demand tier is 6,000 RPM / 200k TPM)
|
|
379
|
+
|
|
380
|
+
| Model | RPM | ITPM | Input $/M | Output $/M |
|
|
381
|
+
|---|---|---|---|---|
|
|
382
|
+
| llama-3.3-70b-versatile | 30 | 6,000 | $0.59 | $0.79 |
|
|
383
|
+
| llama-3.1-8b-instant | 30 | 20,000 | $0.05 | $0.08 |
|
|
384
|
+
| mixtral-8x7b-32768 | 30 | 5,000 | $0.24 | $0.24 |
|
|
385
|
+
| gemma2-9b-it | 30 | 15,000 | $0.20 | $0.20 |
|
|
386
|
+
| deepseek-r1-distill-llama-70b | 30 | 6,000 | $0.75 | $0.99 |
|
|
387
|
+
|
|
388
|
+
**Mistral**
|
|
389
|
+
|
|
390
|
+
| Model | RPM | ITPM | Input $/M | Output $/M |
|
|
391
|
+
|---|---|---|---|---|
|
|
392
|
+
| mistral-large-latest | 500 | 100,000 | $2.00 | $6.00 |
|
|
393
|
+
| mistral-small-latest | 500 | 100,000 | $0.10 | $0.30 |
|
|
394
|
+
| codestral-latest | 500 | 100,000 | $0.30 | $0.90 |
|
|
395
|
+
| open-mistral-nemo | 500 | 100,000 | $0.15 | $0.15 |
|
|
396
|
+
| pixtral-large-latest | 500 | 100,000 | $2.00 | $6.00 |
|
|
397
|
+
|
|
398
|
+
**Cohere** (trial tier defaults — production tier is 10,000+ RPM)
|
|
399
|
+
|
|
400
|
+
| Model | RPM | ITPM | Input $/M | Output $/M |
|
|
401
|
+
|---|---|---|---|---|
|
|
402
|
+
| command-r-plus | 20 | 100,000 | $2.50 | $10.00 |
|
|
403
|
+
| command-r | 20 | 100,000 | $0.15 | $0.60 |
|
|
404
|
+
| command | 20 | 100,000 | $0.50 | $1.50 |
|
|
405
|
+
| command-light | 20 | 100,000 | $0.15 | $0.60 |
|
|
406
|
+
|
|
335
407
|
Unknown models fall back to 60 RPM / 100k ITPM with no cost tracking. You can inspect or extend the registry:
|
|
336
408
|
|
|
337
409
|
```typescript
|
|
338
|
-
import {
|
|
410
|
+
import {
|
|
411
|
+
OPENAI_MODELS,
|
|
412
|
+
ANTHROPIC_MODELS,
|
|
413
|
+
GOOGLE_MODELS,
|
|
414
|
+
GROQ_MODELS,
|
|
415
|
+
MISTRAL_MODELS,
|
|
416
|
+
COHERE_MODELS,
|
|
417
|
+
resolveModelLimits,
|
|
418
|
+
isKnownModel,
|
|
419
|
+
} from 'ai-sdk-rate-limiter'
|
|
420
|
+
|
|
421
|
+
console.log(GROQ_MODELS['llama-3.3-70b-versatile'])
|
|
422
|
+
// { rpm: 30, itpm: 6000, rpd: 1000, inputPricePerMillion: 0.59, ... }
|
|
339
423
|
|
|
340
|
-
console.log(
|
|
341
|
-
//
|
|
424
|
+
console.log(isKnownModel('llama-3.3-70b-versatile', 'groq'))
|
|
425
|
+
// true
|
|
342
426
|
|
|
343
427
|
console.log(isKnownModel('my-fine-tune', 'openai'))
|
|
344
428
|
// false → will use fallback limits
|
package/dist/index.cjs
CHANGED
|
@@ -317,7 +317,7 @@ var CostTracker = class {
|
|
|
317
317
|
];
|
|
318
318
|
for (const { limit, current, period } of checks) {
|
|
319
319
|
if (limit !== void 0 && current + estimatedCostUsd > limit) {
|
|
320
|
-
if (onExceeded === "throw") {
|
|
320
|
+
if (onExceeded === "throw" || onExceeded === "fallback") {
|
|
321
321
|
throw new BudgetExceededError(model, current, limit, period);
|
|
322
322
|
}
|
|
323
323
|
return false;
|
|
@@ -873,6 +873,341 @@ var GOOGLE_MODELS = {
|
|
|
873
873
|
}
|
|
874
874
|
};
|
|
875
875
|
|
|
876
|
+
// src/registry/groq.ts
|
|
877
|
+
var GROQ_MODELS = {
|
|
878
|
+
// -------------------------------------------------------------------------
|
|
879
|
+
// Llama 3.3 family
|
|
880
|
+
// -------------------------------------------------------------------------
|
|
881
|
+
"llama-3.3-70b-versatile": {
|
|
882
|
+
rpm: 30,
|
|
883
|
+
itpm: 6e3,
|
|
884
|
+
otpm: 6e3,
|
|
885
|
+
rpd: 1e3,
|
|
886
|
+
inputPricePerMillion: 0.59,
|
|
887
|
+
outputPricePerMillion: 0.79
|
|
888
|
+
},
|
|
889
|
+
"llama-3.3-70b-specdec": {
|
|
890
|
+
rpm: 30,
|
|
891
|
+
itpm: 6e3,
|
|
892
|
+
otpm: 6e3,
|
|
893
|
+
rpd: 1e3,
|
|
894
|
+
inputPricePerMillion: 0.59,
|
|
895
|
+
outputPricePerMillion: 0.99
|
|
896
|
+
},
|
|
897
|
+
// -------------------------------------------------------------------------
|
|
898
|
+
// Llama 3.1 family
|
|
899
|
+
// -------------------------------------------------------------------------
|
|
900
|
+
"llama-3.1-8b-instant": {
|
|
901
|
+
rpm: 30,
|
|
902
|
+
itpm: 2e4,
|
|
903
|
+
otpm: 2e4,
|
|
904
|
+
rpd: 14400,
|
|
905
|
+
inputPricePerMillion: 0.05,
|
|
906
|
+
outputPricePerMillion: 0.08
|
|
907
|
+
},
|
|
908
|
+
"llama-3.1-70b-versatile": {
|
|
909
|
+
rpm: 30,
|
|
910
|
+
itpm: 6e3,
|
|
911
|
+
otpm: 6e3,
|
|
912
|
+
rpd: 1e3,
|
|
913
|
+
inputPricePerMillion: 0.59,
|
|
914
|
+
outputPricePerMillion: 0.79
|
|
915
|
+
},
|
|
916
|
+
// -------------------------------------------------------------------------
|
|
917
|
+
// Llama 3 family
|
|
918
|
+
// -------------------------------------------------------------------------
|
|
919
|
+
"llama3-70b-8192": {
|
|
920
|
+
rpm: 30,
|
|
921
|
+
itpm: 6e3,
|
|
922
|
+
otpm: 6e3,
|
|
923
|
+
rpd: 14400,
|
|
924
|
+
inputPricePerMillion: 0.59,
|
|
925
|
+
outputPricePerMillion: 0.79
|
|
926
|
+
},
|
|
927
|
+
"llama3-8b-8192": {
|
|
928
|
+
rpm: 30,
|
|
929
|
+
itpm: 3e4,
|
|
930
|
+
otpm: 3e4,
|
|
931
|
+
rpd: 14400,
|
|
932
|
+
inputPricePerMillion: 0.05,
|
|
933
|
+
outputPricePerMillion: 0.08
|
|
934
|
+
},
|
|
935
|
+
"llama-guard-3-8b": {
|
|
936
|
+
rpm: 30,
|
|
937
|
+
itpm: 15e3,
|
|
938
|
+
otpm: 15e3,
|
|
939
|
+
rpd: 14400,
|
|
940
|
+
inputPricePerMillion: 0.2,
|
|
941
|
+
outputPricePerMillion: 0.2
|
|
942
|
+
},
|
|
943
|
+
// -------------------------------------------------------------------------
|
|
944
|
+
// Mixtral family
|
|
945
|
+
// -------------------------------------------------------------------------
|
|
946
|
+
"mixtral-8x7b-32768": {
|
|
947
|
+
rpm: 30,
|
|
948
|
+
itpm: 5e3,
|
|
949
|
+
otpm: 5e3,
|
|
950
|
+
rpd: 14400,
|
|
951
|
+
inputPricePerMillion: 0.24,
|
|
952
|
+
outputPricePerMillion: 0.24
|
|
953
|
+
},
|
|
954
|
+
// -------------------------------------------------------------------------
|
|
955
|
+
// Gemma family
|
|
956
|
+
// -------------------------------------------------------------------------
|
|
957
|
+
"gemma2-9b-it": {
|
|
958
|
+
rpm: 30,
|
|
959
|
+
itpm: 15e3,
|
|
960
|
+
otpm: 15e3,
|
|
961
|
+
rpd: 14400,
|
|
962
|
+
inputPricePerMillion: 0.2,
|
|
963
|
+
outputPricePerMillion: 0.2
|
|
964
|
+
},
|
|
965
|
+
"gemma-7b-it": {
|
|
966
|
+
rpm: 30,
|
|
967
|
+
itpm: 15e3,
|
|
968
|
+
otpm: 15e3,
|
|
969
|
+
rpd: 14400,
|
|
970
|
+
inputPricePerMillion: 0.07,
|
|
971
|
+
outputPricePerMillion: 0.07
|
|
972
|
+
},
|
|
973
|
+
// -------------------------------------------------------------------------
|
|
974
|
+
// Deepseek family
|
|
975
|
+
// -------------------------------------------------------------------------
|
|
976
|
+
"deepseek-r1-distill-llama-70b": {
|
|
977
|
+
rpm: 30,
|
|
978
|
+
itpm: 6e3,
|
|
979
|
+
otpm: 6e3,
|
|
980
|
+
rpd: 1e3,
|
|
981
|
+
inputPricePerMillion: 0.75,
|
|
982
|
+
outputPricePerMillion: 0.99
|
|
983
|
+
},
|
|
984
|
+
"deepseek-r1-distill-qwen-32b": {
|
|
985
|
+
rpm: 30,
|
|
986
|
+
itpm: 6e3,
|
|
987
|
+
otpm: 6e3,
|
|
988
|
+
rpd: 1e3,
|
|
989
|
+
inputPricePerMillion: 0.69,
|
|
990
|
+
outputPricePerMillion: 0.69
|
|
991
|
+
}
|
|
992
|
+
};
|
|
993
|
+
|
|
994
|
+
// src/registry/mistral.ts
|
|
995
|
+
var MISTRAL_MODELS = {
|
|
996
|
+
// -------------------------------------------------------------------------
|
|
997
|
+
// Mistral Large — frontier model
|
|
998
|
+
// -------------------------------------------------------------------------
|
|
999
|
+
"mistral-large-latest": {
|
|
1000
|
+
rpm: 500,
|
|
1001
|
+
itpm: 1e5,
|
|
1002
|
+
otpm: 1e5,
|
|
1003
|
+
inputPricePerMillion: 2,
|
|
1004
|
+
outputPricePerMillion: 6
|
|
1005
|
+
},
|
|
1006
|
+
"mistral-large-2411": {
|
|
1007
|
+
rpm: 500,
|
|
1008
|
+
itpm: 1e5,
|
|
1009
|
+
otpm: 1e5,
|
|
1010
|
+
inputPricePerMillion: 2,
|
|
1011
|
+
outputPricePerMillion: 6
|
|
1012
|
+
},
|
|
1013
|
+
"mistral-large-2407": {
|
|
1014
|
+
rpm: 500,
|
|
1015
|
+
itpm: 1e5,
|
|
1016
|
+
otpm: 1e5,
|
|
1017
|
+
inputPricePerMillion: 2,
|
|
1018
|
+
outputPricePerMillion: 6
|
|
1019
|
+
},
|
|
1020
|
+
// -------------------------------------------------------------------------
|
|
1021
|
+
// Mistral Small — efficient, low-cost
|
|
1022
|
+
// -------------------------------------------------------------------------
|
|
1023
|
+
"mistral-small-latest": {
|
|
1024
|
+
rpm: 500,
|
|
1025
|
+
itpm: 1e5,
|
|
1026
|
+
otpm: 1e5,
|
|
1027
|
+
inputPricePerMillion: 0.1,
|
|
1028
|
+
outputPricePerMillion: 0.3
|
|
1029
|
+
},
|
|
1030
|
+
"mistral-small-2409": {
|
|
1031
|
+
rpm: 500,
|
|
1032
|
+
itpm: 1e5,
|
|
1033
|
+
otpm: 1e5,
|
|
1034
|
+
inputPricePerMillion: 0.1,
|
|
1035
|
+
outputPricePerMillion: 0.3
|
|
1036
|
+
},
|
|
1037
|
+
// -------------------------------------------------------------------------
|
|
1038
|
+
// Pixtral Large — multimodal
|
|
1039
|
+
// -------------------------------------------------------------------------
|
|
1040
|
+
"pixtral-large-latest": {
|
|
1041
|
+
rpm: 500,
|
|
1042
|
+
itpm: 1e5,
|
|
1043
|
+
otpm: 1e5,
|
|
1044
|
+
inputPricePerMillion: 2,
|
|
1045
|
+
outputPricePerMillion: 6
|
|
1046
|
+
},
|
|
1047
|
+
"pixtral-large-2411": {
|
|
1048
|
+
rpm: 500,
|
|
1049
|
+
itpm: 1e5,
|
|
1050
|
+
otpm: 1e5,
|
|
1051
|
+
inputPricePerMillion: 2,
|
|
1052
|
+
outputPricePerMillion: 6
|
|
1053
|
+
},
|
|
1054
|
+
"pixtral-12b": {
|
|
1055
|
+
rpm: 500,
|
|
1056
|
+
itpm: 1e5,
|
|
1057
|
+
otpm: 1e5,
|
|
1058
|
+
inputPricePerMillion: 0.15,
|
|
1059
|
+
outputPricePerMillion: 0.15
|
|
1060
|
+
},
|
|
1061
|
+
"pixtral-12b-2409": {
|
|
1062
|
+
rpm: 500,
|
|
1063
|
+
itpm: 1e5,
|
|
1064
|
+
otpm: 1e5,
|
|
1065
|
+
inputPricePerMillion: 0.15,
|
|
1066
|
+
outputPricePerMillion: 0.15
|
|
1067
|
+
},
|
|
1068
|
+
// -------------------------------------------------------------------------
|
|
1069
|
+
// Codestral — code-optimized
|
|
1070
|
+
// -------------------------------------------------------------------------
|
|
1071
|
+
"codestral-latest": {
|
|
1072
|
+
rpm: 500,
|
|
1073
|
+
itpm: 1e5,
|
|
1074
|
+
otpm: 1e5,
|
|
1075
|
+
inputPricePerMillion: 0.3,
|
|
1076
|
+
outputPricePerMillion: 0.9
|
|
1077
|
+
},
|
|
1078
|
+
"codestral-2501": {
|
|
1079
|
+
rpm: 500,
|
|
1080
|
+
itpm: 1e5,
|
|
1081
|
+
otpm: 1e5,
|
|
1082
|
+
inputPricePerMillion: 0.3,
|
|
1083
|
+
outputPricePerMillion: 0.9
|
|
1084
|
+
},
|
|
1085
|
+
// -------------------------------------------------------------------------
|
|
1086
|
+
// Open models (free / self-hosted weights available)
|
|
1087
|
+
// -------------------------------------------------------------------------
|
|
1088
|
+
"open-mistral-nemo": {
|
|
1089
|
+
rpm: 500,
|
|
1090
|
+
itpm: 1e5,
|
|
1091
|
+
otpm: 1e5,
|
|
1092
|
+
inputPricePerMillion: 0.15,
|
|
1093
|
+
outputPricePerMillion: 0.15
|
|
1094
|
+
},
|
|
1095
|
+
"open-mixtral-8x22b": {
|
|
1096
|
+
rpm: 500,
|
|
1097
|
+
itpm: 1e5,
|
|
1098
|
+
otpm: 1e5,
|
|
1099
|
+
inputPricePerMillion: 2,
|
|
1100
|
+
outputPricePerMillion: 6
|
|
1101
|
+
},
|
|
1102
|
+
"open-mixtral-8x7b": {
|
|
1103
|
+
rpm: 500,
|
|
1104
|
+
itpm: 1e5,
|
|
1105
|
+
otpm: 1e5,
|
|
1106
|
+
inputPricePerMillion: 0.7,
|
|
1107
|
+
outputPricePerMillion: 0.7
|
|
1108
|
+
},
|
|
1109
|
+
"open-mistral-7b": {
|
|
1110
|
+
rpm: 500,
|
|
1111
|
+
itpm: 1e5,
|
|
1112
|
+
otpm: 1e5,
|
|
1113
|
+
inputPricePerMillion: 0.25,
|
|
1114
|
+
outputPricePerMillion: 0.25
|
|
1115
|
+
},
|
|
1116
|
+
// -------------------------------------------------------------------------
|
|
1117
|
+
// Mistral Embed — embedding only (no RPM-based generation limits)
|
|
1118
|
+
// -------------------------------------------------------------------------
|
|
1119
|
+
"mistral-embed": {
|
|
1120
|
+
rpm: 500,
|
|
1121
|
+
itpm: 1e5,
|
|
1122
|
+
otpm: 0,
|
|
1123
|
+
inputPricePerMillion: 0.1,
|
|
1124
|
+
outputPricePerMillion: 0
|
|
1125
|
+
}
|
|
1126
|
+
};
|
|
1127
|
+
|
|
1128
|
+
// src/registry/cohere.ts
|
|
1129
|
+
var COHERE_MODELS = {
|
|
1130
|
+
// -------------------------------------------------------------------------
|
|
1131
|
+
// Command R+ — highest capability
|
|
1132
|
+
// -------------------------------------------------------------------------
|
|
1133
|
+
"command-r-plus": {
|
|
1134
|
+
rpm: 20,
|
|
1135
|
+
itpm: 1e5,
|
|
1136
|
+
otpm: 1e5,
|
|
1137
|
+
inputPricePerMillion: 2.5,
|
|
1138
|
+
outputPricePerMillion: 10
|
|
1139
|
+
},
|
|
1140
|
+
"command-r-plus-08-2024": {
|
|
1141
|
+
rpm: 20,
|
|
1142
|
+
itpm: 1e5,
|
|
1143
|
+
otpm: 1e5,
|
|
1144
|
+
inputPricePerMillion: 2.5,
|
|
1145
|
+
outputPricePerMillion: 10
|
|
1146
|
+
},
|
|
1147
|
+
"command-r-plus-04-2024": {
|
|
1148
|
+
rpm: 20,
|
|
1149
|
+
itpm: 1e5,
|
|
1150
|
+
otpm: 1e5,
|
|
1151
|
+
inputPricePerMillion: 2.5,
|
|
1152
|
+
outputPricePerMillion: 10
|
|
1153
|
+
},
|
|
1154
|
+
// -------------------------------------------------------------------------
|
|
1155
|
+
// Command R — balanced, RAG-optimized
|
|
1156
|
+
// -------------------------------------------------------------------------
|
|
1157
|
+
"command-r": {
|
|
1158
|
+
rpm: 20,
|
|
1159
|
+
itpm: 1e5,
|
|
1160
|
+
otpm: 1e5,
|
|
1161
|
+
inputPricePerMillion: 0.15,
|
|
1162
|
+
outputPricePerMillion: 0.6
|
|
1163
|
+
},
|
|
1164
|
+
"command-r-08-2024": {
|
|
1165
|
+
rpm: 20,
|
|
1166
|
+
itpm: 1e5,
|
|
1167
|
+
otpm: 1e5,
|
|
1168
|
+
inputPricePerMillion: 0.15,
|
|
1169
|
+
outputPricePerMillion: 0.6
|
|
1170
|
+
},
|
|
1171
|
+
"command-r-03-2024": {
|
|
1172
|
+
rpm: 20,
|
|
1173
|
+
itpm: 1e5,
|
|
1174
|
+
otpm: 1e5,
|
|
1175
|
+
inputPricePerMillion: 0.15,
|
|
1176
|
+
outputPricePerMillion: 0.6
|
|
1177
|
+
},
|
|
1178
|
+
// -------------------------------------------------------------------------
|
|
1179
|
+
// Command — legacy general-purpose
|
|
1180
|
+
// -------------------------------------------------------------------------
|
|
1181
|
+
"command": {
|
|
1182
|
+
rpm: 20,
|
|
1183
|
+
itpm: 1e5,
|
|
1184
|
+
otpm: 1e5,
|
|
1185
|
+
inputPricePerMillion: 0.5,
|
|
1186
|
+
outputPricePerMillion: 1.5
|
|
1187
|
+
},
|
|
1188
|
+
"command-nightly": {
|
|
1189
|
+
rpm: 20,
|
|
1190
|
+
itpm: 1e5,
|
|
1191
|
+
otpm: 1e5,
|
|
1192
|
+
inputPricePerMillion: 0.5,
|
|
1193
|
+
outputPricePerMillion: 1.5
|
|
1194
|
+
},
|
|
1195
|
+
"command-light": {
|
|
1196
|
+
rpm: 20,
|
|
1197
|
+
itpm: 1e5,
|
|
1198
|
+
otpm: 1e5,
|
|
1199
|
+
inputPricePerMillion: 0.15,
|
|
1200
|
+
outputPricePerMillion: 0.6
|
|
1201
|
+
},
|
|
1202
|
+
"command-light-nightly": {
|
|
1203
|
+
rpm: 20,
|
|
1204
|
+
itpm: 1e5,
|
|
1205
|
+
otpm: 1e5,
|
|
1206
|
+
inputPricePerMillion: 0.15,
|
|
1207
|
+
outputPricePerMillion: 0.6
|
|
1208
|
+
}
|
|
1209
|
+
};
|
|
1210
|
+
|
|
876
1211
|
// src/registry/index.ts
|
|
877
1212
|
var FALLBACK_LIMITS = {
|
|
878
1213
|
rpm: 60,
|
|
@@ -919,7 +1254,22 @@ function getFromRegistry(modelId, provider) {
|
|
|
919
1254
|
const stripped = modelId.replace(/^(google|vertex)\//, "");
|
|
920
1255
|
if (GOOGLE_MODELS[stripped]) return GOOGLE_MODELS[stripped];
|
|
921
1256
|
}
|
|
922
|
-
|
|
1257
|
+
if (provider === "groq") {
|
|
1258
|
+
if (GROQ_MODELS[modelId]) return GROQ_MODELS[modelId];
|
|
1259
|
+
const stripped = modelId.replace(/^groq\//, "");
|
|
1260
|
+
if (GROQ_MODELS[stripped]) return GROQ_MODELS[stripped];
|
|
1261
|
+
}
|
|
1262
|
+
if (provider === "mistral") {
|
|
1263
|
+
if (MISTRAL_MODELS[modelId]) return MISTRAL_MODELS[modelId];
|
|
1264
|
+
const stripped = modelId.replace(/^mistral\//, "");
|
|
1265
|
+
if (MISTRAL_MODELS[stripped]) return MISTRAL_MODELS[stripped];
|
|
1266
|
+
}
|
|
1267
|
+
if (provider === "cohere") {
|
|
1268
|
+
if (COHERE_MODELS[modelId]) return COHERE_MODELS[modelId];
|
|
1269
|
+
const stripped = modelId.replace(/^cohere\//, "");
|
|
1270
|
+
if (COHERE_MODELS[stripped]) return COHERE_MODELS[stripped];
|
|
1271
|
+
}
|
|
1272
|
+
return OPENAI_MODELS[modelId] ?? ANTHROPIC_MODELS[modelId] ?? GOOGLE_MODELS[modelId] ?? GROQ_MODELS[modelId] ?? MISTRAL_MODELS[modelId] ?? COHERE_MODELS[modelId];
|
|
923
1273
|
}
|
|
924
1274
|
function isKnownModel(modelId, provider) {
|
|
925
1275
|
return getFromRegistry(modelId, normalizeProvider(provider)) !== void 0;
|
|
@@ -970,7 +1320,7 @@ var Pipeline = class {
|
|
|
970
1320
|
const estimatedInput = estimateInputTokens(prompt);
|
|
971
1321
|
const startMs = Date.now();
|
|
972
1322
|
const key = `${provider}:${modelId}`;
|
|
973
|
-
if (this.config.cost?.budget) {
|
|
1323
|
+
if (this.config.cost?.budget && !opts.skipBudgetCheck) {
|
|
974
1324
|
const estimatedCost = this.costTracker.estimateCost(
|
|
975
1325
|
estimatedInput,
|
|
976
1326
|
500,
|
|
@@ -978,12 +1328,26 @@ var Pipeline = class {
|
|
|
978
1328
|
limits.inputPricePerMillion,
|
|
979
1329
|
limits.outputPricePerMillion
|
|
980
1330
|
);
|
|
981
|
-
|
|
982
|
-
|
|
983
|
-
|
|
984
|
-
|
|
985
|
-
|
|
986
|
-
|
|
1331
|
+
try {
|
|
1332
|
+
this.costTracker.checkBudget(
|
|
1333
|
+
modelId,
|
|
1334
|
+
estimatedCost,
|
|
1335
|
+
this.config.cost.budget,
|
|
1336
|
+
this.config.cost.onExceeded ?? "throw"
|
|
1337
|
+
);
|
|
1338
|
+
} catch (err) {
|
|
1339
|
+
if (err instanceof BudgetExceededError) {
|
|
1340
|
+
this.emitter.emit("budgetHit", {
|
|
1341
|
+
model: err.model,
|
|
1342
|
+
provider,
|
|
1343
|
+
currentCostUsd: err.currentCostUsd,
|
|
1344
|
+
limitUsd: err.limitUsd,
|
|
1345
|
+
period: err.period,
|
|
1346
|
+
usingFallback: false
|
|
1347
|
+
});
|
|
1348
|
+
}
|
|
1349
|
+
throw err;
|
|
1350
|
+
}
|
|
987
1351
|
}
|
|
988
1352
|
await this.engine.acquire(key, {
|
|
989
1353
|
limits,
|
|
@@ -1122,7 +1486,8 @@ function getPerRequestOptions(params, queueTimeout) {
|
|
|
1122
1486
|
return {
|
|
1123
1487
|
priority: raw?.priority ?? "normal",
|
|
1124
1488
|
timeoutMs: raw?.timeout ?? queueTimeout,
|
|
1125
|
-
metadata: raw?.metadata ?? {}
|
|
1489
|
+
metadata: raw?.metadata ?? {},
|
|
1490
|
+
skipBudgetCheck: raw?._skipBudgetCheck ?? false
|
|
1126
1491
|
};
|
|
1127
1492
|
}
|
|
1128
1493
|
function extractTokenUsage(usage) {
|
|
@@ -1138,7 +1503,7 @@ function createMiddleware(pipeline, queueTimeout) {
|
|
|
1138
1503
|
// wrapGenerate — non-streaming
|
|
1139
1504
|
// -----------------------------------------------------------------------
|
|
1140
1505
|
async wrapGenerate({ doGenerate, params, model }) {
|
|
1141
|
-
const { priority, timeoutMs } = getPerRequestOptions(params, queueTimeout);
|
|
1506
|
+
const { priority, timeoutMs, skipBudgetCheck } = getPerRequestOptions(params, queueTimeout);
|
|
1142
1507
|
const modelId = model.modelId;
|
|
1143
1508
|
const provider = model.provider;
|
|
1144
1509
|
const startMs = Date.now();
|
|
@@ -1151,6 +1516,7 @@ function createMiddleware(pipeline, queueTimeout) {
|
|
|
1151
1516
|
streaming: false,
|
|
1152
1517
|
priority,
|
|
1153
1518
|
timeoutMs,
|
|
1519
|
+
skipBudgetCheck,
|
|
1154
1520
|
onUsage: () => {
|
|
1155
1521
|
}
|
|
1156
1522
|
}
|
|
@@ -1165,7 +1531,7 @@ function createMiddleware(pipeline, queueTimeout) {
|
|
|
1165
1531
|
// wrapStream — streaming
|
|
1166
1532
|
// -----------------------------------------------------------------------
|
|
1167
1533
|
async wrapStream({ doStream, params, model }) {
|
|
1168
|
-
const { priority, timeoutMs } = getPerRequestOptions(params, queueTimeout);
|
|
1534
|
+
const { priority, timeoutMs, skipBudgetCheck } = getPerRequestOptions(params, queueTimeout);
|
|
1169
1535
|
const modelId = model.modelId;
|
|
1170
1536
|
const provider = model.provider;
|
|
1171
1537
|
const startMs = Date.now();
|
|
@@ -1178,6 +1544,7 @@ function createMiddleware(pipeline, queueTimeout) {
|
|
|
1178
1544
|
streaming: true,
|
|
1179
1545
|
priority,
|
|
1180
1546
|
timeoutMs,
|
|
1547
|
+
skipBudgetCheck,
|
|
1181
1548
|
onUsage: () => {
|
|
1182
1549
|
}
|
|
1183
1550
|
}
|
|
@@ -1204,26 +1571,71 @@ function createMiddleware(pipeline, queueTimeout) {
|
|
|
1204
1571
|
function wrapModel(model, middleware, overrides) {
|
|
1205
1572
|
const providerId = overrides?.providerId ?? model.provider;
|
|
1206
1573
|
const modelId = overrides?.modelId ?? model.modelId;
|
|
1574
|
+
const fallbackModel = overrides?.fallback;
|
|
1207
1575
|
return {
|
|
1208
1576
|
specificationVersion: "v4",
|
|
1209
1577
|
provider: providerId,
|
|
1210
1578
|
modelId,
|
|
1211
1579
|
supportedUrls: model["supportedUrls"],
|
|
1212
1580
|
async doGenerate(params) {
|
|
1213
|
-
|
|
1214
|
-
|
|
1215
|
-
|
|
1216
|
-
|
|
1217
|
-
|
|
1218
|
-
|
|
1581
|
+
try {
|
|
1582
|
+
return await middleware.wrapGenerate({
|
|
1583
|
+
doGenerate: () => model.doGenerate(params),
|
|
1584
|
+
doStream: () => model.doStream(params),
|
|
1585
|
+
params,
|
|
1586
|
+
model
|
|
1587
|
+
});
|
|
1588
|
+
} catch (err) {
|
|
1589
|
+
if (err instanceof BudgetExceededError && fallbackModel) {
|
|
1590
|
+
const fallbackParams = {
|
|
1591
|
+
...params,
|
|
1592
|
+
providerOptions: {
|
|
1593
|
+
...params.providerOptions,
|
|
1594
|
+
rateLimiter: {
|
|
1595
|
+
...params.providerOptions?.["rateLimiter"] ?? {},
|
|
1596
|
+
_skipBudgetCheck: true
|
|
1597
|
+
}
|
|
1598
|
+
}
|
|
1599
|
+
};
|
|
1600
|
+
return middleware.wrapGenerate({
|
|
1601
|
+
doGenerate: () => fallbackModel.doGenerate(fallbackParams),
|
|
1602
|
+
doStream: () => fallbackModel.doStream(fallbackParams),
|
|
1603
|
+
params: fallbackParams,
|
|
1604
|
+
model: fallbackModel
|
|
1605
|
+
});
|
|
1606
|
+
}
|
|
1607
|
+
throw err;
|
|
1608
|
+
}
|
|
1219
1609
|
},
|
|
1220
1610
|
async doStream(params) {
|
|
1221
|
-
|
|
1222
|
-
|
|
1223
|
-
|
|
1224
|
-
|
|
1225
|
-
|
|
1226
|
-
|
|
1611
|
+
try {
|
|
1612
|
+
return await middleware.wrapStream({
|
|
1613
|
+
doGenerate: () => model.doGenerate(params),
|
|
1614
|
+
doStream: () => model.doStream(params),
|
|
1615
|
+
params,
|
|
1616
|
+
model
|
|
1617
|
+
});
|
|
1618
|
+
} catch (err) {
|
|
1619
|
+
if (err instanceof BudgetExceededError && fallbackModel) {
|
|
1620
|
+
const fallbackParams = {
|
|
1621
|
+
...params,
|
|
1622
|
+
providerOptions: {
|
|
1623
|
+
...params.providerOptions,
|
|
1624
|
+
rateLimiter: {
|
|
1625
|
+
...params.providerOptions?.["rateLimiter"] ?? {},
|
|
1626
|
+
_skipBudgetCheck: true
|
|
1627
|
+
}
|
|
1628
|
+
}
|
|
1629
|
+
};
|
|
1630
|
+
return middleware.wrapStream({
|
|
1631
|
+
doGenerate: () => fallbackModel.doGenerate(fallbackParams),
|
|
1632
|
+
doStream: () => fallbackModel.doStream(fallbackParams),
|
|
1633
|
+
params: fallbackParams,
|
|
1634
|
+
model: fallbackModel
|
|
1635
|
+
});
|
|
1636
|
+
}
|
|
1637
|
+
throw err;
|
|
1638
|
+
}
|
|
1227
1639
|
}
|
|
1228
1640
|
};
|
|
1229
1641
|
}
|
|
@@ -1260,7 +1672,10 @@ function createRateLimiter(config = {}) {
|
|
|
1260
1672
|
|
|
1261
1673
|
exports.ANTHROPIC_MODELS = ANTHROPIC_MODELS;
|
|
1262
1674
|
exports.BudgetExceededError = BudgetExceededError;
|
|
1675
|
+
exports.COHERE_MODELS = COHERE_MODELS;
|
|
1263
1676
|
exports.GOOGLE_MODELS = GOOGLE_MODELS;
|
|
1677
|
+
exports.GROQ_MODELS = GROQ_MODELS;
|
|
1678
|
+
exports.MISTRAL_MODELS = MISTRAL_MODELS;
|
|
1264
1679
|
exports.OPENAI_MODELS = OPENAI_MODELS;
|
|
1265
1680
|
exports.QueueFullError = QueueFullError;
|
|
1266
1681
|
exports.QueueTimeoutError = QueueTimeoutError;
|