ai-sdk-rate-limiter 0.6.0 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +208 -87
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +2 -2
- package/dist/index.d.ts +2 -2
- package/dist/index.js +208 -87
- package/dist/index.js.map +1 -1
- package/dist/otel.d.cts +1 -1
- package/dist/otel.d.ts +1 -1
- package/dist/redis.d.cts +1 -1
- package/dist/redis.d.ts +1 -1
- package/dist/testing.cjs +2007 -0
- package/dist/testing.cjs.map +1 -0
- package/dist/testing.d.cts +59 -0
- package/dist/testing.d.ts +59 -0
- package/dist/testing.js +2005 -0
- package/dist/testing.js.map +1 -0
- package/dist/{types-CgePLtmQ.d.cts → types-D7qskXNw.d.cts} +54 -1
- package/dist/{types-CgePLtmQ.d.ts → types-D7qskXNw.d.ts} +54 -1
- package/package.json +12 -2
package/dist/index.cjs
CHANGED
|
@@ -194,6 +194,9 @@ function sumInput(window) {
|
|
|
194
194
|
}
|
|
195
195
|
|
|
196
196
|
// src/core/rate-limit-engine.ts
|
|
197
|
+
function makeAbortError() {
|
|
198
|
+
return Object.assign(new Error("The operation was aborted"), { name: "AbortError" });
|
|
199
|
+
}
|
|
197
200
|
var PRIORITY_RANK = {
|
|
198
201
|
high: 0,
|
|
199
202
|
normal: 1,
|
|
@@ -233,41 +236,84 @@ var RateLimitEngine = class {
|
|
|
233
236
|
* - If at capacity: enqueues (sorted by priority) and resolves when a slot opens.
|
|
234
237
|
* - If queue is full: throws QueueFullError immediately.
|
|
235
238
|
* - If waiting exceeds timeoutMs: throws QueueTimeoutError.
|
|
239
|
+
* - If signal is aborted while queued: throws an AbortError.
|
|
236
240
|
*/
|
|
237
241
|
async acquire(key, opts) {
|
|
242
|
+
if (opts.signal?.aborted) throw makeAbortError();
|
|
238
243
|
const local = this.getOrCreate(key);
|
|
239
244
|
const nextSlotAtMs = await this.store.checkAndRecord(
|
|
240
245
|
key,
|
|
241
246
|
opts.estimatedInputTokens,
|
|
242
247
|
opts.limits
|
|
243
248
|
);
|
|
244
|
-
if (nextSlotAtMs
|
|
245
|
-
|
|
246
|
-
|
|
249
|
+
if (nextSlotAtMs > Date.now()) {
|
|
250
|
+
if (local.waiters.length >= this.maxQueueSize) {
|
|
251
|
+
throw new QueueFullError(key, this.maxQueueSize);
|
|
252
|
+
}
|
|
253
|
+
const estimatedWaitMs = Math.max(0, nextSlotAtMs - Date.now());
|
|
254
|
+
opts.onQueued?.(local.waiters.length, estimatedWaitMs);
|
|
255
|
+
await new Promise((resolve, reject) => {
|
|
256
|
+
const enqueuedAt = Date.now();
|
|
257
|
+
const timeoutHandle = setTimeout(() => {
|
|
258
|
+
const idx = local.waiters.indexOf(waiter);
|
|
259
|
+
if (idx !== -1) local.waiters.splice(idx, 1);
|
|
260
|
+
cleanup();
|
|
261
|
+
reject(new QueueTimeoutError(key, Date.now() - enqueuedAt, local.waiters.length));
|
|
262
|
+
}, opts.timeoutMs);
|
|
263
|
+
const onAbort = () => {
|
|
264
|
+
const idx = local.waiters.indexOf(waiter);
|
|
265
|
+
if (idx !== -1) local.waiters.splice(idx, 1);
|
|
266
|
+
clearTimeout(timeoutHandle);
|
|
267
|
+
cleanup();
|
|
268
|
+
reject(makeAbortError());
|
|
269
|
+
};
|
|
270
|
+
const cleanup = () => opts.signal?.removeEventListener("abort", onAbort);
|
|
271
|
+
opts.signal?.addEventListener("abort", onAbort, { once: true });
|
|
272
|
+
const waiter = {
|
|
273
|
+
resolve: () => {
|
|
274
|
+
clearTimeout(timeoutHandle);
|
|
275
|
+
cleanup();
|
|
276
|
+
opts.onDequeued?.(Date.now() - enqueuedAt);
|
|
277
|
+
resolve();
|
|
278
|
+
},
|
|
279
|
+
reject: (err) => {
|
|
280
|
+
clearTimeout(timeoutHandle);
|
|
281
|
+
cleanup();
|
|
282
|
+
reject(err);
|
|
283
|
+
},
|
|
284
|
+
priority: opts.priority,
|
|
285
|
+
enqueued: enqueuedAt,
|
|
286
|
+
estimatedInputTokens: opts.estimatedInputTokens,
|
|
287
|
+
timeoutHandle
|
|
288
|
+
};
|
|
289
|
+
insertWaiter(local.waiters, waiter);
|
|
290
|
+
this.scheduleDrain(key, opts.limits, nextSlotAtMs);
|
|
291
|
+
});
|
|
247
292
|
}
|
|
248
|
-
const
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
293
|
+
const maxConcurrent = opts.limits.maxConcurrent;
|
|
294
|
+
if (maxConcurrent !== void 0 && local.activeCount >= maxConcurrent) {
|
|
295
|
+
if (opts.signal?.aborted) throw makeAbortError();
|
|
296
|
+
await new Promise((resolve, reject) => {
|
|
297
|
+
const onAbort = () => {
|
|
298
|
+
const idx = local.concurrencyWaiters.findIndex((w) => w.resolve === resolveWrapped);
|
|
299
|
+
if (idx !== -1) local.concurrencyWaiters.splice(idx, 1);
|
|
300
|
+
cleanup();
|
|
301
|
+
reject(makeAbortError());
|
|
302
|
+
};
|
|
303
|
+
const resolveWrapped = () => {
|
|
304
|
+
cleanup();
|
|
260
305
|
resolve();
|
|
261
|
-
}
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
}
|
|
306
|
+
};
|
|
307
|
+
const rejectWrapped = (e) => {
|
|
308
|
+
cleanup();
|
|
309
|
+
reject(e);
|
|
310
|
+
};
|
|
311
|
+
const cleanup = () => opts.signal?.removeEventListener("abort", onAbort);
|
|
312
|
+
opts.signal?.addEventListener("abort", onAbort, { once: true });
|
|
313
|
+
local.concurrencyWaiters.push({ resolve: resolveWrapped, reject: rejectWrapped });
|
|
314
|
+
});
|
|
315
|
+
}
|
|
316
|
+
if (maxConcurrent !== void 0) local.activeCount++;
|
|
271
317
|
}
|
|
272
318
|
/**
|
|
273
319
|
* Record actual token usage after a request completes.
|
|
@@ -311,13 +357,31 @@ var RateLimitEngine = class {
|
|
|
311
357
|
}
|
|
312
358
|
return null;
|
|
313
359
|
}
|
|
360
|
+
/** All model keys that have been seen by this engine instance. */
|
|
361
|
+
knownKeys() {
|
|
362
|
+
return Array.from(this.localStates.keys());
|
|
363
|
+
}
|
|
364
|
+
/**
|
|
365
|
+
* Signal that a request has completed, decrementing the concurrency counter
|
|
366
|
+
* and unblocking the next concurrency waiter if one is queued.
|
|
367
|
+
*
|
|
368
|
+
* Must be called after every acquire() that succeeded (even on error).
|
|
369
|
+
* Only has an effect when maxConcurrent is configured for the model.
|
|
370
|
+
*/
|
|
371
|
+
release(key) {
|
|
372
|
+
const local = this.localStates.get(key);
|
|
373
|
+
if (!local || local.activeCount === 0) return;
|
|
374
|
+
local.activeCount--;
|
|
375
|
+
const next = local.concurrencyWaiters.shift();
|
|
376
|
+
if (next) next.resolve();
|
|
377
|
+
}
|
|
314
378
|
// -------------------------------------------------------------------------
|
|
315
379
|
// Private helpers
|
|
316
380
|
// -------------------------------------------------------------------------
|
|
317
381
|
getOrCreate(key) {
|
|
318
382
|
let state = this.localStates.get(key);
|
|
319
383
|
if (!state) {
|
|
320
|
-
state = { waiters: [], drainScheduled: false };
|
|
384
|
+
state = { waiters: [], drainScheduled: false, activeCount: 0, concurrencyWaiters: [] };
|
|
321
385
|
this.localStates.set(key, state);
|
|
322
386
|
}
|
|
323
387
|
return state;
|
|
@@ -1339,6 +1403,24 @@ function isKnownModel(modelId, provider) {
|
|
|
1339
1403
|
}
|
|
1340
1404
|
|
|
1341
1405
|
// src/core/pipeline.ts
|
|
1406
|
+
function matchScope(pattern, scope) {
|
|
1407
|
+
if (pattern === scope) return true;
|
|
1408
|
+
if (pattern.includes("*")) {
|
|
1409
|
+
const regex = new RegExp(
|
|
1410
|
+
"^" + pattern.replace(/[.+?^${}()|[\]\\]/g, "\\$&").replace(/\*/g, ".*") + "$"
|
|
1411
|
+
);
|
|
1412
|
+
return regex.test(scope);
|
|
1413
|
+
}
|
|
1414
|
+
return false;
|
|
1415
|
+
}
|
|
1416
|
+
function mergeScopeLimits(base, scope) {
|
|
1417
|
+
return {
|
|
1418
|
+
...base,
|
|
1419
|
+
...scope.rpm !== void 0 && { rpm: scope.rpm },
|
|
1420
|
+
...scope.itpm !== void 0 && { itpm: scope.itpm },
|
|
1421
|
+
...scope.maxConcurrent !== void 0 && { maxConcurrent: scope.maxConcurrent }
|
|
1422
|
+
};
|
|
1423
|
+
}
|
|
1342
1424
|
function resolveRetryConfig(config) {
|
|
1343
1425
|
const r = config.retry ?? {};
|
|
1344
1426
|
return {
|
|
@@ -1375,15 +1457,32 @@ var Pipeline = class {
|
|
|
1375
1457
|
// -------------------------------------------------------------------------
|
|
1376
1458
|
// execute — called by both generate and stream adapters
|
|
1377
1459
|
// -------------------------------------------------------------------------
|
|
1460
|
+
// -------------------------------------------------------------------------
|
|
1461
|
+
// Scope resolution helpers
|
|
1462
|
+
// -------------------------------------------------------------------------
|
|
1463
|
+
resolveScopedLimits(modelId, provider, scope) {
|
|
1464
|
+
const base = this.resolveModelLimits(modelId, provider);
|
|
1465
|
+
if (!this.config.scopes) return base;
|
|
1466
|
+
for (const [pattern, scopeConfig] of Object.entries(this.config.scopes)) {
|
|
1467
|
+
if (matchScope(pattern, scope)) {
|
|
1468
|
+
return mergeScopeLimits(base, scopeConfig);
|
|
1469
|
+
}
|
|
1470
|
+
}
|
|
1471
|
+
return base;
|
|
1472
|
+
}
|
|
1378
1473
|
/**
|
|
1379
1474
|
* Execute an AI request through the full pipeline:
|
|
1380
|
-
* budget check → acquire slot → retry wrapper
|
|
1475
|
+
* budget check → acquire slot → retry wrapper
|
|
1476
|
+
*
|
|
1477
|
+
* Usage recording (completed event) is NOT emitted here. Callers must call
|
|
1478
|
+
* recordUsage() once they have actual token counts from the API response.
|
|
1381
1479
|
*/
|
|
1382
1480
|
async execute(modelId, provider, prompt, fn, opts) {
|
|
1383
|
-
const
|
|
1481
|
+
const scope = opts.scope;
|
|
1482
|
+
const limits = scope ? this.resolveScopedLimits(modelId, provider, scope) : this.resolveModelLimits(modelId, provider);
|
|
1384
1483
|
const estimatedInput = estimateInputTokens(prompt);
|
|
1385
|
-
const
|
|
1386
|
-
|
|
1484
|
+
const key = scope ? `${scope}:${provider}:${modelId}` : `${provider}:${modelId}`;
|
|
1485
|
+
let slotAcquired = false;
|
|
1387
1486
|
if (this.config.cost?.budget && !opts.skipBudgetCheck) {
|
|
1388
1487
|
const estimatedCost = this.costTracker.estimateCost(
|
|
1389
1488
|
estimatedInput,
|
|
@@ -1418,6 +1517,7 @@ var Pipeline = class {
|
|
|
1418
1517
|
estimatedInputTokens: estimatedInput,
|
|
1419
1518
|
priority: opts.priority,
|
|
1420
1519
|
timeoutMs: opts.timeoutMs,
|
|
1520
|
+
...opts.signal !== void 0 && { signal: opts.signal },
|
|
1421
1521
|
onQueued: (queueDepth, estimatedWaitMs) => {
|
|
1422
1522
|
this.emitter.emit("queued", {
|
|
1423
1523
|
model: modelId,
|
|
@@ -1443,9 +1543,9 @@ var Pipeline = class {
|
|
|
1443
1543
|
});
|
|
1444
1544
|
}
|
|
1445
1545
|
});
|
|
1446
|
-
|
|
1546
|
+
slotAcquired = true;
|
|
1447
1547
|
try {
|
|
1448
|
-
result = await withRetry(fn, this.retryConfig, {
|
|
1548
|
+
const result = await withRetry(fn, this.retryConfig, {
|
|
1449
1549
|
modelId,
|
|
1450
1550
|
onRetry: ({ attempt, maxAttempts, delayMs, error }) => {
|
|
1451
1551
|
this.emitter.emit("retrying", {
|
|
@@ -1468,6 +1568,7 @@ var Pipeline = class {
|
|
|
1468
1568
|
});
|
|
1469
1569
|
}
|
|
1470
1570
|
});
|
|
1571
|
+
return result;
|
|
1471
1572
|
} catch (error) {
|
|
1472
1573
|
this.emitter.emit("dropped", {
|
|
1473
1574
|
model: modelId,
|
|
@@ -1475,29 +1576,18 @@ var Pipeline = class {
|
|
|
1475
1576
|
reason: "queue-timeout"
|
|
1476
1577
|
});
|
|
1477
1578
|
throw error;
|
|
1579
|
+
} finally {
|
|
1580
|
+
if (slotAcquired) this.engine.release(key);
|
|
1478
1581
|
}
|
|
1479
|
-
opts.onUsage({
|
|
1480
|
-
inputTokens: estimatedInput,
|
|
1481
|
-
outputTokens: 0
|
|
1482
|
-
});
|
|
1483
|
-
this.emitter.emit("completed", {
|
|
1484
|
-
model: modelId,
|
|
1485
|
-
provider,
|
|
1486
|
-
inputTokens: estimatedInput,
|
|
1487
|
-
outputTokens: 0,
|
|
1488
|
-
costUsd: 0,
|
|
1489
|
-
latencyMs: Date.now() - startMs,
|
|
1490
|
-
streaming: opts.streaming
|
|
1491
|
-
});
|
|
1492
|
-
return result;
|
|
1493
1582
|
}
|
|
1494
1583
|
/**
|
|
1495
1584
|
* Record actual usage after a request resolves.
|
|
1496
|
-
* Called with real token counts from the API response.
|
|
1585
|
+
* Called with real token counts from the API response. Emits the single
|
|
1586
|
+
* authoritative `completed` event for this request.
|
|
1497
1587
|
*/
|
|
1498
|
-
recordUsage(modelId, provider, usage, latencyMs, streaming) {
|
|
1499
|
-
const key = `${provider}:${modelId}`;
|
|
1500
|
-
const limits = this.resolveModelLimits(modelId, provider);
|
|
1588
|
+
recordUsage(modelId, provider, scope, usage, latencyMs, streaming) {
|
|
1589
|
+
const key = scope ? `${scope}:${provider}:${modelId}` : `${provider}:${modelId}`;
|
|
1590
|
+
const limits = scope ? this.resolveScopedLimits(modelId, provider, scope) : this.resolveModelLimits(modelId, provider);
|
|
1501
1591
|
this.engine.recordActualUsage(key, usage.inputTokens, usage.outputTokens);
|
|
1502
1592
|
const costUsd = this.costTracker.record(
|
|
1503
1593
|
modelId,
|
|
@@ -1523,11 +1613,32 @@ var Pipeline = class {
|
|
|
1523
1613
|
}
|
|
1524
1614
|
getStatus() {
|
|
1525
1615
|
const models = [];
|
|
1526
|
-
|
|
1616
|
+
let totalQueueDepth = 0;
|
|
1617
|
+
for (const key of this.engine.knownKeys()) {
|
|
1618
|
+
const colonIdx = key.indexOf(":");
|
|
1619
|
+
const provider = colonIdx !== -1 ? key.slice(0, colonIdx) : key;
|
|
1620
|
+
const modelId = colonIdx !== -1 ? key.slice(colonIdx + 1) : key;
|
|
1621
|
+
const snapshot = this.engine.windowSnapshot(key);
|
|
1622
|
+
const queueDepth = this.engine.queueDepth(key);
|
|
1623
|
+
const backoffUntil = this.engine.backoffUntil(key);
|
|
1624
|
+
totalQueueDepth += queueDepth;
|
|
1625
|
+
models.push({
|
|
1626
|
+
modelId,
|
|
1627
|
+
provider,
|
|
1628
|
+
requestsInWindow: snapshot.requests,
|
|
1629
|
+
inputTokensInWindow: snapshot.inputTokens,
|
|
1630
|
+
outputTokensInWindow: snapshot.outputTokens,
|
|
1631
|
+
queueDepth,
|
|
1632
|
+
estimatedWaitMs: 0,
|
|
1633
|
+
// async — use limiter.estimatedWait() for an accurate value
|
|
1634
|
+
backoffUntil
|
|
1635
|
+
});
|
|
1636
|
+
}
|
|
1637
|
+
return { models, totalQueueDepth };
|
|
1527
1638
|
}
|
|
1528
|
-
async estimatedWait(modelId, provider, priority = "normal") {
|
|
1529
|
-
const key = `${provider}:${modelId}`;
|
|
1530
|
-
const limits = this.resolveModelLimits(modelId, provider);
|
|
1639
|
+
async estimatedWait(modelId, provider, priority = "normal", scope) {
|
|
1640
|
+
const key = scope ? `${scope}:${provider}:${modelId}` : `${provider}:${modelId}`;
|
|
1641
|
+
const limits = scope ? this.resolveScopedLimits(modelId, provider, scope) : this.resolveModelLimits(modelId, provider);
|
|
1531
1642
|
return this.engine.estimatedWaitMs(key, limits);
|
|
1532
1643
|
}
|
|
1533
1644
|
on(event, handler) {
|
|
@@ -1551,7 +1662,8 @@ function getPerRequestOptions(params, queueTimeout) {
|
|
|
1551
1662
|
priority: raw?.priority ?? "normal",
|
|
1552
1663
|
timeoutMs: raw?.timeout ?? queueTimeout,
|
|
1553
1664
|
metadata: raw?.metadata ?? {},
|
|
1554
|
-
skipBudgetCheck: raw?._skipBudgetCheck ?? false
|
|
1665
|
+
skipBudgetCheck: raw?._skipBudgetCheck ?? false,
|
|
1666
|
+
scope: raw?.scope
|
|
1555
1667
|
};
|
|
1556
1668
|
}
|
|
1557
1669
|
function extractTokenUsage(usage) {
|
|
@@ -1567,7 +1679,7 @@ function createMiddleware(pipeline, queueTimeout) {
|
|
|
1567
1679
|
// wrapGenerate — non-streaming
|
|
1568
1680
|
// -----------------------------------------------------------------------
|
|
1569
1681
|
async wrapGenerate({ doGenerate, params, model }) {
|
|
1570
|
-
const { priority, timeoutMs, skipBudgetCheck } = getPerRequestOptions(params, queueTimeout);
|
|
1682
|
+
const { priority, timeoutMs, skipBudgetCheck, scope } = getPerRequestOptions(params, queueTimeout);
|
|
1571
1683
|
const modelId = model.modelId;
|
|
1572
1684
|
const provider = model.provider;
|
|
1573
1685
|
const startMs = Date.now();
|
|
@@ -1581,21 +1693,19 @@ function createMiddleware(pipeline, queueTimeout) {
|
|
|
1581
1693
|
priority,
|
|
1582
1694
|
timeoutMs,
|
|
1583
1695
|
skipBudgetCheck,
|
|
1584
|
-
|
|
1585
|
-
}
|
|
1696
|
+
...scope !== void 0 && { scope },
|
|
1697
|
+
...params.abortSignal !== void 0 && { signal: params.abortSignal }
|
|
1586
1698
|
}
|
|
1587
1699
|
);
|
|
1588
|
-
|
|
1589
|
-
|
|
1590
|
-
pipeline.recordUsage(modelId, provider, usage, Date.now() - startMs, false);
|
|
1591
|
-
}
|
|
1700
|
+
const usage = result.usage ? extractTokenUsage(result.usage) : { inputTokens: 0, outputTokens: 0 };
|
|
1701
|
+
pipeline.recordUsage(modelId, provider, scope, usage, Date.now() - startMs, false);
|
|
1592
1702
|
return result;
|
|
1593
1703
|
},
|
|
1594
1704
|
// -----------------------------------------------------------------------
|
|
1595
1705
|
// wrapStream — streaming
|
|
1596
1706
|
// -----------------------------------------------------------------------
|
|
1597
1707
|
async wrapStream({ doStream, params, model }) {
|
|
1598
|
-
const { priority, timeoutMs, skipBudgetCheck } = getPerRequestOptions(params, queueTimeout);
|
|
1708
|
+
const { priority, timeoutMs, skipBudgetCheck, scope } = getPerRequestOptions(params, queueTimeout);
|
|
1599
1709
|
const modelId = model.modelId;
|
|
1600
1710
|
const provider = model.provider;
|
|
1601
1711
|
const startMs = Date.now();
|
|
@@ -1609,18 +1719,16 @@ function createMiddleware(pipeline, queueTimeout) {
|
|
|
1609
1719
|
priority,
|
|
1610
1720
|
timeoutMs,
|
|
1611
1721
|
skipBudgetCheck,
|
|
1612
|
-
|
|
1613
|
-
}
|
|
1722
|
+
...scope !== void 0 && { scope },
|
|
1723
|
+
...params.abortSignal !== void 0 && { signal: params.abortSignal }
|
|
1614
1724
|
}
|
|
1615
1725
|
);
|
|
1616
1726
|
const { stream, ...rest } = streamResult;
|
|
1617
1727
|
const transformStream = new TransformStream({
|
|
1618
1728
|
transform(chunk, controller) {
|
|
1619
|
-
if (chunk.type === "finish"
|
|
1620
|
-
const usage = extractTokenUsage(
|
|
1621
|
-
|
|
1622
|
-
);
|
|
1623
|
-
pipeline.recordUsage(modelId, provider, usage, Date.now() - startMs, true);
|
|
1729
|
+
if (chunk.type === "finish") {
|
|
1730
|
+
const usage = chunk.usage ? extractTokenUsage(chunk.usage) : { inputTokens: 0, outputTokens: 0 };
|
|
1731
|
+
pipeline.recordUsage(modelId, provider, scope, usage, Date.now() - startMs, true);
|
|
1624
1732
|
}
|
|
1625
1733
|
controller.enqueue(chunk);
|
|
1626
1734
|
}
|
|
@@ -1636,27 +1744,41 @@ function wrapModel(model, middleware, overrides) {
|
|
|
1636
1744
|
const providerId = overrides?.providerId ?? model.provider;
|
|
1637
1745
|
const modelId = overrides?.modelId ?? model.modelId;
|
|
1638
1746
|
const fallbackModel = overrides?.fallback;
|
|
1747
|
+
const staticScope = overrides?.scope;
|
|
1748
|
+
function injectScope(params) {
|
|
1749
|
+
if (!staticScope) return params;
|
|
1750
|
+
const existingRl = params.providerOptions?.["rateLimiter"] ?? {};
|
|
1751
|
+
if (existingRl["scope"]) return params;
|
|
1752
|
+
return {
|
|
1753
|
+
...params,
|
|
1754
|
+
providerOptions: {
|
|
1755
|
+
...params.providerOptions,
|
|
1756
|
+
rateLimiter: { ...existingRl, scope: staticScope }
|
|
1757
|
+
}
|
|
1758
|
+
};
|
|
1759
|
+
}
|
|
1639
1760
|
return {
|
|
1640
1761
|
specificationVersion: "v4",
|
|
1641
1762
|
provider: providerId,
|
|
1642
1763
|
modelId,
|
|
1643
1764
|
supportedUrls: model["supportedUrls"],
|
|
1644
1765
|
async doGenerate(params) {
|
|
1766
|
+
const enrichedParams = injectScope(params);
|
|
1645
1767
|
try {
|
|
1646
1768
|
return await middleware.wrapGenerate({
|
|
1647
|
-
doGenerate: () => model.doGenerate(
|
|
1648
|
-
doStream: () => model.doStream(
|
|
1649
|
-
params,
|
|
1769
|
+
doGenerate: () => model.doGenerate(enrichedParams),
|
|
1770
|
+
doStream: () => model.doStream(enrichedParams),
|
|
1771
|
+
params: enrichedParams,
|
|
1650
1772
|
model
|
|
1651
1773
|
});
|
|
1652
1774
|
} catch (err) {
|
|
1653
1775
|
if (err instanceof BudgetExceededError && fallbackModel) {
|
|
1654
1776
|
const fallbackParams = {
|
|
1655
|
-
...
|
|
1777
|
+
...enrichedParams,
|
|
1656
1778
|
providerOptions: {
|
|
1657
|
-
...
|
|
1779
|
+
...enrichedParams.providerOptions,
|
|
1658
1780
|
rateLimiter: {
|
|
1659
|
-
...
|
|
1781
|
+
...enrichedParams.providerOptions?.["rateLimiter"] ?? {},
|
|
1660
1782
|
_skipBudgetCheck: true
|
|
1661
1783
|
}
|
|
1662
1784
|
}
|
|
@@ -1672,21 +1794,22 @@ function wrapModel(model, middleware, overrides) {
|
|
|
1672
1794
|
}
|
|
1673
1795
|
},
|
|
1674
1796
|
async doStream(params) {
|
|
1797
|
+
const enrichedParams = injectScope(params);
|
|
1675
1798
|
try {
|
|
1676
1799
|
return await middleware.wrapStream({
|
|
1677
|
-
doGenerate: () => model.doGenerate(
|
|
1678
|
-
doStream: () => model.doStream(
|
|
1679
|
-
params,
|
|
1800
|
+
doGenerate: () => model.doGenerate(enrichedParams),
|
|
1801
|
+
doStream: () => model.doStream(enrichedParams),
|
|
1802
|
+
params: enrichedParams,
|
|
1680
1803
|
model
|
|
1681
1804
|
});
|
|
1682
1805
|
} catch (err) {
|
|
1683
1806
|
if (err instanceof BudgetExceededError && fallbackModel) {
|
|
1684
1807
|
const fallbackParams = {
|
|
1685
|
-
...
|
|
1808
|
+
...enrichedParams,
|
|
1686
1809
|
providerOptions: {
|
|
1687
|
-
...
|
|
1810
|
+
...enrichedParams.providerOptions,
|
|
1688
1811
|
rateLimiter: {
|
|
1689
|
-
...
|
|
1812
|
+
...enrichedParams.providerOptions?.["rateLimiter"] ?? {},
|
|
1690
1813
|
_skipBudgetCheck: true
|
|
1691
1814
|
}
|
|
1692
1815
|
}
|
|
@@ -1749,7 +1872,7 @@ function wrapAsyncIterableStream(stream, pipeline, modelId, provider, startMs) {
|
|
|
1749
1872
|
}
|
|
1750
1873
|
}
|
|
1751
1874
|
if (result.done) {
|
|
1752
|
-
pipeline.recordUsage(modelId, provider, { inputTokens, outputTokens }, Date.now() - startMs, true);
|
|
1875
|
+
pipeline.recordUsage(modelId, provider, void 0, { inputTokens, outputTokens }, Date.now() - startMs, true);
|
|
1753
1876
|
}
|
|
1754
1877
|
return result;
|
|
1755
1878
|
},
|
|
@@ -1810,16 +1933,14 @@ async function executeViaProxy(fn, args, modelId, provider, params, pipeline, qu
|
|
|
1810
1933
|
{
|
|
1811
1934
|
streaming: isStreaming,
|
|
1812
1935
|
priority,
|
|
1813
|
-
timeoutMs: queueTimeout
|
|
1814
|
-
onUsage: () => {
|
|
1815
|
-
}
|
|
1936
|
+
timeoutMs: queueTimeout
|
|
1816
1937
|
}
|
|
1817
1938
|
);
|
|
1818
1939
|
if (isStreaming && result !== null && typeof result === "object" && Symbol.asyncIterator in result) {
|
|
1819
1940
|
return wrapAsyncIterableStream(result, pipeline, modelId, provider, startMs);
|
|
1820
1941
|
}
|
|
1821
1942
|
const usage = extractUsage(result);
|
|
1822
|
-
pipeline.recordUsage(modelId, provider, usage, Date.now() - startMs, false);
|
|
1943
|
+
pipeline.recordUsage(modelId, provider, void 0, usage, Date.now() - startMs, false);
|
|
1823
1944
|
return result;
|
|
1824
1945
|
}
|
|
1825
1946
|
function rateLimited(client, options = {}) {
|