llmist 12.3.5 → 12.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +437 -14
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +292 -4
- package/dist/index.d.ts +292 -4
- package/dist/index.js +432 -14
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/dist/index.cjs
CHANGED
|
@@ -1423,6 +1423,241 @@ var init_model_shortcuts = __esm({
|
|
|
1423
1423
|
}
|
|
1424
1424
|
});
|
|
1425
1425
|
|
|
1426
|
+
// src/core/rate-limit.ts
|
|
1427
|
+
function resolveRateLimitConfig(config) {
|
|
1428
|
+
if (!config) {
|
|
1429
|
+
return { safetyMargin: DEFAULT_RATE_LIMIT_CONFIG.safetyMargin, enabled: false };
|
|
1430
|
+
}
|
|
1431
|
+
const hasLimits = config.requestsPerMinute !== void 0 || config.tokensPerMinute !== void 0 || config.tokensPerDay !== void 0;
|
|
1432
|
+
return {
|
|
1433
|
+
requestsPerMinute: config.requestsPerMinute,
|
|
1434
|
+
tokensPerMinute: config.tokensPerMinute,
|
|
1435
|
+
tokensPerDay: config.tokensPerDay,
|
|
1436
|
+
safetyMargin: config.safetyMargin ?? DEFAULT_RATE_LIMIT_CONFIG.safetyMargin,
|
|
1437
|
+
enabled: config.enabled ?? (hasLimits && DEFAULT_RATE_LIMIT_CONFIG.enabled)
|
|
1438
|
+
};
|
|
1439
|
+
}
|
|
1440
|
+
var DEFAULT_RATE_LIMIT_CONFIG, RateLimitTracker;
|
|
1441
|
+
var init_rate_limit = __esm({
|
|
1442
|
+
"src/core/rate-limit.ts"() {
|
|
1443
|
+
"use strict";
|
|
1444
|
+
DEFAULT_RATE_LIMIT_CONFIG = {
|
|
1445
|
+
safetyMargin: 0.9,
|
|
1446
|
+
enabled: true
|
|
1447
|
+
};
|
|
1448
|
+
RateLimitTracker = class {
|
|
1449
|
+
config;
|
|
1450
|
+
/** Timestamps of requests in the current minute window */
|
|
1451
|
+
requestTimestamps = [];
|
|
1452
|
+
/** Token usage entries in the current minute window */
|
|
1453
|
+
tokenUsage = [];
|
|
1454
|
+
/** Daily token count */
|
|
1455
|
+
dailyTokens = 0;
|
|
1456
|
+
/** Date string (YYYY-MM-DD UTC) for daily reset tracking */
|
|
1457
|
+
dailyResetDate;
|
|
1458
|
+
constructor(config) {
|
|
1459
|
+
this.config = resolveRateLimitConfig(config);
|
|
1460
|
+
this.dailyResetDate = this.getCurrentDateUTC();
|
|
1461
|
+
}
|
|
1462
|
+
/**
|
|
1463
|
+
* Record a completed request with its token usage.
|
|
1464
|
+
*
|
|
1465
|
+
* @param inputTokens - Number of input tokens used
|
|
1466
|
+
* @param outputTokens - Number of output tokens generated
|
|
1467
|
+
*/
|
|
1468
|
+
recordUsage(inputTokens, outputTokens) {
|
|
1469
|
+
const now = Date.now();
|
|
1470
|
+
const totalTokens = inputTokens + outputTokens;
|
|
1471
|
+
this.requestTimestamps.push(now);
|
|
1472
|
+
this.tokenUsage.push({ timestamp: now, tokens: totalTokens });
|
|
1473
|
+
this.checkDailyReset();
|
|
1474
|
+
this.dailyTokens += totalTokens;
|
|
1475
|
+
this.pruneOldEntries(now);
|
|
1476
|
+
}
|
|
1477
|
+
/**
|
|
1478
|
+
* Calculate the delay needed before the next request.
|
|
1479
|
+
*
|
|
1480
|
+
* Returns 0 if no delay is needed, otherwise returns the number of
|
|
1481
|
+
* milliseconds to wait to stay within rate limits.
|
|
1482
|
+
*
|
|
1483
|
+
* @returns Delay in milliseconds (0 if none needed)
|
|
1484
|
+
*/
|
|
1485
|
+
getRequiredDelayMs() {
|
|
1486
|
+
if (!this.config.enabled) {
|
|
1487
|
+
return 0;
|
|
1488
|
+
}
|
|
1489
|
+
const now = Date.now();
|
|
1490
|
+
this.pruneOldEntries(now);
|
|
1491
|
+
this.checkDailyReset();
|
|
1492
|
+
let maxDelay = 0;
|
|
1493
|
+
if (this.config.requestsPerMinute !== void 0) {
|
|
1494
|
+
const delay = this.calculateRpmDelay(now);
|
|
1495
|
+
maxDelay = Math.max(maxDelay, delay);
|
|
1496
|
+
}
|
|
1497
|
+
if (this.config.tokensPerMinute !== void 0) {
|
|
1498
|
+
const delay = this.calculateTpmDelay(now);
|
|
1499
|
+
maxDelay = Math.max(maxDelay, delay);
|
|
1500
|
+
}
|
|
1501
|
+
if (this.config.tokensPerDay !== void 0) {
|
|
1502
|
+
const effectiveLimit = this.config.tokensPerDay * this.config.safetyMargin;
|
|
1503
|
+
if (this.dailyTokens >= effectiveLimit) {
|
|
1504
|
+
maxDelay = Math.max(maxDelay, this.getTimeUntilMidnightUTC());
|
|
1505
|
+
}
|
|
1506
|
+
}
|
|
1507
|
+
return Math.ceil(maxDelay);
|
|
1508
|
+
}
|
|
1509
|
+
/**
|
|
1510
|
+
* Check if we're approaching any configured limits.
|
|
1511
|
+
*
|
|
1512
|
+
* @returns true if any limit is at or above the safety margin threshold
|
|
1513
|
+
*/
|
|
1514
|
+
isApproachingLimit() {
|
|
1515
|
+
if (!this.config.enabled) {
|
|
1516
|
+
return false;
|
|
1517
|
+
}
|
|
1518
|
+
const now = Date.now();
|
|
1519
|
+
this.pruneOldEntries(now);
|
|
1520
|
+
this.checkDailyReset();
|
|
1521
|
+
if (this.config.requestsPerMinute !== void 0) {
|
|
1522
|
+
const currentRpm = this.requestTimestamps.length;
|
|
1523
|
+
const threshold = this.config.requestsPerMinute * this.config.safetyMargin;
|
|
1524
|
+
if (currentRpm >= threshold) {
|
|
1525
|
+
return true;
|
|
1526
|
+
}
|
|
1527
|
+
}
|
|
1528
|
+
if (this.config.tokensPerMinute !== void 0) {
|
|
1529
|
+
const currentTpm = this.tokenUsage.reduce((sum, entry) => sum + entry.tokens, 0);
|
|
1530
|
+
const threshold = this.config.tokensPerMinute * this.config.safetyMargin;
|
|
1531
|
+
if (currentTpm >= threshold) {
|
|
1532
|
+
return true;
|
|
1533
|
+
}
|
|
1534
|
+
}
|
|
1535
|
+
if (this.config.tokensPerDay !== void 0) {
|
|
1536
|
+
const threshold = this.config.tokensPerDay * this.config.safetyMargin;
|
|
1537
|
+
if (this.dailyTokens >= threshold) {
|
|
1538
|
+
return true;
|
|
1539
|
+
}
|
|
1540
|
+
}
|
|
1541
|
+
return false;
|
|
1542
|
+
}
|
|
1543
|
+
/**
|
|
1544
|
+
* Get current usage statistics.
|
|
1545
|
+
*
|
|
1546
|
+
* @returns Current usage stats for monitoring/logging
|
|
1547
|
+
*/
|
|
1548
|
+
getUsageStats() {
|
|
1549
|
+
const now = Date.now();
|
|
1550
|
+
this.pruneOldEntries(now);
|
|
1551
|
+
this.checkDailyReset();
|
|
1552
|
+
return {
|
|
1553
|
+
rpm: this.requestTimestamps.length,
|
|
1554
|
+
tpm: this.tokenUsage.reduce((sum, entry) => sum + entry.tokens, 0),
|
|
1555
|
+
dailyTokens: this.dailyTokens,
|
|
1556
|
+
isApproachingLimit: this.isApproachingLimit(),
|
|
1557
|
+
requiredDelayMs: this.getRequiredDelayMs()
|
|
1558
|
+
};
|
|
1559
|
+
}
|
|
1560
|
+
/**
|
|
1561
|
+
* Reset all tracking state.
|
|
1562
|
+
* Useful for testing or when switching API keys/tiers.
|
|
1563
|
+
*/
|
|
1564
|
+
reset() {
|
|
1565
|
+
this.requestTimestamps = [];
|
|
1566
|
+
this.tokenUsage = [];
|
|
1567
|
+
this.dailyTokens = 0;
|
|
1568
|
+
this.dailyResetDate = this.getCurrentDateUTC();
|
|
1569
|
+
}
|
|
1570
|
+
/**
|
|
1571
|
+
* Update configuration dynamically.
|
|
1572
|
+
* Useful when API tier changes or for testing.
|
|
1573
|
+
*
|
|
1574
|
+
* @param config - New configuration to apply
|
|
1575
|
+
*/
|
|
1576
|
+
updateConfig(config) {
|
|
1577
|
+
this.config = resolveRateLimitConfig(config);
|
|
1578
|
+
}
|
|
1579
|
+
// ─────────────────────────────────────────────────────────────────────────
|
|
1580
|
+
// Private methods
|
|
1581
|
+
// ─────────────────────────────────────────────────────────────────────────
|
|
1582
|
+
/**
|
|
1583
|
+
* Calculate delay needed based on RPM limit.
|
|
1584
|
+
*/
|
|
1585
|
+
calculateRpmDelay(now) {
|
|
1586
|
+
const effectiveLimit = this.config.requestsPerMinute * this.config.safetyMargin;
|
|
1587
|
+
const currentRpm = this.requestTimestamps.length;
|
|
1588
|
+
if (currentRpm < effectiveLimit) {
|
|
1589
|
+
return 0;
|
|
1590
|
+
}
|
|
1591
|
+
const oldestTimestamp = this.requestTimestamps[0];
|
|
1592
|
+
if (oldestTimestamp === void 0) {
|
|
1593
|
+
return 0;
|
|
1594
|
+
}
|
|
1595
|
+
const expiryTime = oldestTimestamp + 6e4;
|
|
1596
|
+
return Math.max(0, expiryTime - now);
|
|
1597
|
+
}
|
|
1598
|
+
/**
|
|
1599
|
+
* Calculate delay needed based on TPM limit.
|
|
1600
|
+
*/
|
|
1601
|
+
calculateTpmDelay(now) {
|
|
1602
|
+
const effectiveLimit = this.config.tokensPerMinute * this.config.safetyMargin;
|
|
1603
|
+
const currentTpm = this.tokenUsage.reduce((sum, entry) => sum + entry.tokens, 0);
|
|
1604
|
+
if (currentTpm < effectiveLimit) {
|
|
1605
|
+
return 0;
|
|
1606
|
+
}
|
|
1607
|
+
const sorted = [...this.tokenUsage].sort((a, b) => a.timestamp - b.timestamp);
|
|
1608
|
+
let tokensToFree = currentTpm - effectiveLimit;
|
|
1609
|
+
let delay = 0;
|
|
1610
|
+
for (const entry of sorted) {
|
|
1611
|
+
tokensToFree -= entry.tokens;
|
|
1612
|
+
if (tokensToFree <= 0) {
|
|
1613
|
+
delay = entry.timestamp + 6e4 - now;
|
|
1614
|
+
break;
|
|
1615
|
+
}
|
|
1616
|
+
}
|
|
1617
|
+
return Math.max(0, delay);
|
|
1618
|
+
}
|
|
1619
|
+
/**
|
|
1620
|
+
* Remove entries older than 1 minute from the sliding window.
|
|
1621
|
+
*/
|
|
1622
|
+
pruneOldEntries(now) {
|
|
1623
|
+
const cutoff = now - 6e4;
|
|
1624
|
+
while (this.requestTimestamps.length > 0 && this.requestTimestamps[0] < cutoff) {
|
|
1625
|
+
this.requestTimestamps.shift();
|
|
1626
|
+
}
|
|
1627
|
+
while (this.tokenUsage.length > 0 && this.tokenUsage[0].timestamp < cutoff) {
|
|
1628
|
+
this.tokenUsage.shift();
|
|
1629
|
+
}
|
|
1630
|
+
}
|
|
1631
|
+
/**
|
|
1632
|
+
* Check if the day has changed (UTC) and reset daily counters.
|
|
1633
|
+
*/
|
|
1634
|
+
checkDailyReset() {
|
|
1635
|
+
const currentDate = this.getCurrentDateUTC();
|
|
1636
|
+
if (currentDate !== this.dailyResetDate) {
|
|
1637
|
+
this.dailyTokens = 0;
|
|
1638
|
+
this.dailyResetDate = currentDate;
|
|
1639
|
+
}
|
|
1640
|
+
}
|
|
1641
|
+
/**
|
|
1642
|
+
* Get current date in YYYY-MM-DD format (UTC).
|
|
1643
|
+
*/
|
|
1644
|
+
getCurrentDateUTC() {
|
|
1645
|
+
return (/* @__PURE__ */ new Date()).toISOString().split("T")[0];
|
|
1646
|
+
}
|
|
1647
|
+
/**
|
|
1648
|
+
* Calculate milliseconds until midnight UTC.
|
|
1649
|
+
*/
|
|
1650
|
+
getTimeUntilMidnightUTC() {
|
|
1651
|
+
const now = /* @__PURE__ */ new Date();
|
|
1652
|
+
const midnight = new Date(
|
|
1653
|
+
Date.UTC(now.getUTCFullYear(), now.getUTCMonth(), now.getUTCDate() + 1)
|
|
1654
|
+
);
|
|
1655
|
+
return midnight.getTime() - now.getTime();
|
|
1656
|
+
}
|
|
1657
|
+
};
|
|
1658
|
+
}
|
|
1659
|
+
});
|
|
1660
|
+
|
|
1426
1661
|
// src/core/retry.ts
|
|
1427
1662
|
function resolveRetryConfig(config) {
|
|
1428
1663
|
if (!config) {
|
|
@@ -1437,7 +1672,9 @@ function resolveRetryConfig(config) {
|
|
|
1437
1672
|
randomize: config.randomize ?? DEFAULT_RETRY_CONFIG.randomize,
|
|
1438
1673
|
onRetry: config.onRetry,
|
|
1439
1674
|
onRetriesExhausted: config.onRetriesExhausted,
|
|
1440
|
-
shouldRetry: config.shouldRetry
|
|
1675
|
+
shouldRetry: config.shouldRetry,
|
|
1676
|
+
respectRetryAfter: config.respectRetryAfter ?? DEFAULT_RETRY_CONFIG.respectRetryAfter,
|
|
1677
|
+
maxRetryAfterMs: config.maxRetryAfterMs ?? DEFAULT_RETRY_CONFIG.maxRetryAfterMs
|
|
1441
1678
|
};
|
|
1442
1679
|
}
|
|
1443
1680
|
function isRetryableError(error) {
|
|
@@ -1461,6 +1698,12 @@ function isRetryableError(error) {
|
|
|
1461
1698
|
if (message.includes("overloaded") || message.includes("capacity")) {
|
|
1462
1699
|
return true;
|
|
1463
1700
|
}
|
|
1701
|
+
if (message.includes("resource_exhausted") || message.includes("quota exceeded") || message.includes("unavailable") || message.includes("deadline_exceeded")) {
|
|
1702
|
+
return true;
|
|
1703
|
+
}
|
|
1704
|
+
if (message.includes("overloaded_error") || message.includes("api_error")) {
|
|
1705
|
+
return true;
|
|
1706
|
+
}
|
|
1464
1707
|
if (message.includes("401") || message.includes("403") || message.includes("400") || message.includes("404") || message.includes("authentication") || message.includes("unauthorized") || message.includes("forbidden") || message.includes("invalid") || message.includes("content policy") || name === "AuthenticationError" || name === "BadRequestError" || name === "NotFoundError" || name === "PermissionDeniedError") {
|
|
1465
1708
|
return false;
|
|
1466
1709
|
}
|
|
@@ -1535,10 +1778,54 @@ function formatLLMError(error) {
|
|
|
1535
1778
|
if (firstPart && firstPart.length > 10 && firstPart.length < 150) {
|
|
1536
1779
|
return firstPart.trim();
|
|
1537
1780
|
}
|
|
1538
|
-
return message.slice(0, 150).trim()
|
|
1781
|
+
return `${message.slice(0, 150).trim()}...`;
|
|
1539
1782
|
}
|
|
1540
1783
|
return message;
|
|
1541
1784
|
}
|
|
1785
|
+
function parseRetryAfterHeader(value) {
|
|
1786
|
+
const seconds = Number.parseFloat(value);
|
|
1787
|
+
if (!Number.isNaN(seconds) && seconds > 0) {
|
|
1788
|
+
return Math.ceil(seconds * 1e3);
|
|
1789
|
+
}
|
|
1790
|
+
const date = Date.parse(value);
|
|
1791
|
+
if (!Number.isNaN(date)) {
|
|
1792
|
+
const delay = date - Date.now();
|
|
1793
|
+
return delay > 0 ? delay : null;
|
|
1794
|
+
}
|
|
1795
|
+
return null;
|
|
1796
|
+
}
|
|
1797
|
+
function extractRetryAfterMs(error) {
|
|
1798
|
+
const errorWithHeaders = error;
|
|
1799
|
+
const headers = errorWithHeaders.headers || errorWithHeaders.response?.headers;
|
|
1800
|
+
if (headers) {
|
|
1801
|
+
const retryAfter = typeof headers.get === "function" ? headers.get("retry-after") : headers["retry-after"];
|
|
1802
|
+
if (retryAfter) {
|
|
1803
|
+
const parsed = parseRetryAfterHeader(retryAfter);
|
|
1804
|
+
if (parsed !== null) {
|
|
1805
|
+
return parsed;
|
|
1806
|
+
}
|
|
1807
|
+
}
|
|
1808
|
+
}
|
|
1809
|
+
const message = error.message;
|
|
1810
|
+
const patterns = [
|
|
1811
|
+
/retry\s+(?:in|after)\s+(\d+(?:\.\d+)?)\s*s(?:econds?)?/i,
|
|
1812
|
+
/retry-after:\s*(\d+(?:\.\d+)?)/i,
|
|
1813
|
+
/wait\s+(\d+(?:\.\d+)?)\s*s(?:econds?)?/i
|
|
1814
|
+
];
|
|
1815
|
+
for (const pattern of patterns) {
|
|
1816
|
+
const match = message.match(pattern);
|
|
1817
|
+
if (match) {
|
|
1818
|
+
const seconds = Number.parseFloat(match[1]);
|
|
1819
|
+
if (!Number.isNaN(seconds) && seconds > 0) {
|
|
1820
|
+
return Math.ceil(seconds * 1e3);
|
|
1821
|
+
}
|
|
1822
|
+
}
|
|
1823
|
+
}
|
|
1824
|
+
if (message.toLowerCase().includes("resource_exhausted") || message.toLowerCase().includes("quota exceeded")) {
|
|
1825
|
+
return 6e4;
|
|
1826
|
+
}
|
|
1827
|
+
return null;
|
|
1828
|
+
}
|
|
1542
1829
|
var DEFAULT_RETRY_CONFIG;
|
|
1543
1830
|
var init_retry = __esm({
|
|
1544
1831
|
"src/core/retry.ts"() {
|
|
@@ -1549,7 +1836,10 @@ var init_retry = __esm({
|
|
|
1549
1836
|
minTimeout: 1e3,
|
|
1550
1837
|
maxTimeout: 3e4,
|
|
1551
1838
|
factor: 2,
|
|
1552
|
-
randomize: true
|
|
1839
|
+
randomize: true,
|
|
1840
|
+
respectRetryAfter: true,
|
|
1841
|
+
maxRetryAfterMs: 12e4
|
|
1842
|
+
// 2 minutes cap
|
|
1553
1843
|
};
|
|
1554
1844
|
}
|
|
1555
1845
|
});
|
|
@@ -3905,7 +4195,7 @@ function createProviderFromEnv(envVarName, ClientClass, ProviderClass, clientOpt
|
|
|
3905
4195
|
if (!isNonEmpty(apiKey)) {
|
|
3906
4196
|
return null;
|
|
3907
4197
|
}
|
|
3908
|
-
const client = new ClientClass({ apiKey: apiKey.trim(), ...clientOptions });
|
|
4198
|
+
const client = new ClientClass({ apiKey: apiKey.trim(), maxRetries: 0, ...clientOptions });
|
|
3909
4199
|
return new ProviderClass(client);
|
|
3910
4200
|
}
|
|
3911
4201
|
var init_utils = __esm({
|
|
@@ -5865,8 +6155,8 @@ function createHuggingFaceProviderFromEnv() {
|
|
|
5865
6155
|
baseURL,
|
|
5866
6156
|
timeout: 6e4,
|
|
5867
6157
|
// 60s timeout - HF free tier can be slower than OpenAI
|
|
5868
|
-
maxRetries:
|
|
5869
|
-
//
|
|
6158
|
+
maxRetries: 0
|
|
6159
|
+
// Disable SDK retries - llmist handles all retries at application level
|
|
5870
6160
|
});
|
|
5871
6161
|
return new HuggingFaceProvider(client, endpointType);
|
|
5872
6162
|
}
|
|
@@ -8080,6 +8370,7 @@ var init_builder = __esm({
|
|
|
8080
8370
|
gadgetOutputLimitPercent;
|
|
8081
8371
|
compactionConfig;
|
|
8082
8372
|
retryConfig;
|
|
8373
|
+
rateLimitConfig;
|
|
8083
8374
|
signal;
|
|
8084
8375
|
trailingMessage;
|
|
8085
8376
|
subagentConfig;
|
|
@@ -8587,6 +8878,42 @@ var init_builder = __esm({
|
|
|
8587
8878
|
this.retryConfig = { enabled: false };
|
|
8588
8879
|
return this;
|
|
8589
8880
|
}
|
|
8881
|
+
/**
|
|
8882
|
+
* Configure proactive rate limiting to prevent rate limit errors.
|
|
8883
|
+
*
|
|
8884
|
+
* Set limits based on your API tier to automatically throttle requests
|
|
8885
|
+
* before hitting provider limits. Works in conjunction with reactive
|
|
8886
|
+
* retry/backoff for comprehensive rate limit handling.
|
|
8887
|
+
*
|
|
8888
|
+
* @param config - Rate limit configuration
|
|
8889
|
+
* @returns This builder for chaining
|
|
8890
|
+
*
|
|
8891
|
+
* @example
|
|
8892
|
+
* ```typescript
|
|
8893
|
+
* // Gemini free tier limits
|
|
8894
|
+
* .withRateLimits({
|
|
8895
|
+
* requestsPerMinute: 15,
|
|
8896
|
+
* tokensPerMinute: 1_000_000,
|
|
8897
|
+
* safetyMargin: 0.8, // Start throttling at 80%
|
|
8898
|
+
* })
|
|
8899
|
+
*
|
|
8900
|
+
* // OpenAI Tier 1 limits
|
|
8901
|
+
* .withRateLimits({
|
|
8902
|
+
* requestsPerMinute: 500,
|
|
8903
|
+
* tokensPerMinute: 200_000,
|
|
8904
|
+
* })
|
|
8905
|
+
*
|
|
8906
|
+
* // With daily limit (Gemini free tier)
|
|
8907
|
+
* .withRateLimits({
|
|
8908
|
+
* requestsPerMinute: 15,
|
|
8909
|
+
* tokensPerDay: 1_500_000,
|
|
8910
|
+
* })
|
|
8911
|
+
* ```
|
|
8912
|
+
*/
|
|
8913
|
+
withRateLimits(config) {
|
|
8914
|
+
this.rateLimitConfig = config;
|
|
8915
|
+
return this;
|
|
8916
|
+
}
|
|
8590
8917
|
/**
|
|
8591
8918
|
* Set an abort signal for cancelling requests mid-flight.
|
|
8592
8919
|
*
|
|
@@ -8885,6 +9212,7 @@ ${endPrefix}`
|
|
|
8885
9212
|
gadgetOutputLimitPercent: this.gadgetOutputLimitPercent,
|
|
8886
9213
|
compactionConfig: this.compactionConfig,
|
|
8887
9214
|
retryConfig: this.retryConfig,
|
|
9215
|
+
rateLimitConfig: this.rateLimitConfig,
|
|
8888
9216
|
signal: this.signal,
|
|
8889
9217
|
subagentConfig: this.subagentConfig,
|
|
8890
9218
|
// Tree context for shared tree model (subagents share parent's tree)
|
|
@@ -9070,6 +9398,7 @@ ${endPrefix}`
|
|
|
9070
9398
|
gadgetOutputLimitPercent: this.gadgetOutputLimitPercent,
|
|
9071
9399
|
compactionConfig: this.compactionConfig,
|
|
9072
9400
|
retryConfig: this.retryConfig,
|
|
9401
|
+
rateLimitConfig: this.rateLimitConfig,
|
|
9073
9402
|
signal: this.signal,
|
|
9074
9403
|
subagentConfig: this.subagentConfig,
|
|
9075
9404
|
// Tree context for shared tree model (subagents share parent's tree)
|
|
@@ -11576,6 +11905,7 @@ var init_agent = __esm({
|
|
|
11576
11905
|
init_execution_tree();
|
|
11577
11906
|
init_messages();
|
|
11578
11907
|
init_model_shortcuts();
|
|
11908
|
+
init_rate_limit();
|
|
11579
11909
|
init_retry();
|
|
11580
11910
|
init_media_store();
|
|
11581
11911
|
init_output_viewer();
|
|
@@ -11618,6 +11948,8 @@ var init_agent = __esm({
|
|
|
11618
11948
|
signal;
|
|
11619
11949
|
// Retry configuration
|
|
11620
11950
|
retryConfig;
|
|
11951
|
+
// Rate limit tracker for proactive throttling
|
|
11952
|
+
rateLimitTracker;
|
|
11621
11953
|
// Subagent configuration
|
|
11622
11954
|
agentContextConfig;
|
|
11623
11955
|
subagentConfig;
|
|
@@ -11705,6 +12037,10 @@ var init_agent = __esm({
|
|
|
11705
12037
|
}
|
|
11706
12038
|
this.signal = options.signal;
|
|
11707
12039
|
this.retryConfig = resolveRetryConfig(options.retryConfig);
|
|
12040
|
+
const rateLimitConfig = resolveRateLimitConfig(options.rateLimitConfig);
|
|
12041
|
+
if (rateLimitConfig.enabled) {
|
|
12042
|
+
this.rateLimitTracker = new RateLimitTracker(options.rateLimitConfig);
|
|
12043
|
+
}
|
|
11708
12044
|
this.agentContextConfig = {
|
|
11709
12045
|
model: this.model,
|
|
11710
12046
|
temperature: this.temperature
|
|
@@ -11919,6 +12255,8 @@ var init_agent = __esm({
|
|
|
11919
12255
|
model: this.model,
|
|
11920
12256
|
maxIterations: this.maxIterations
|
|
11921
12257
|
});
|
|
12258
|
+
let currentLLMNodeId;
|
|
12259
|
+
let llmOptions;
|
|
11922
12260
|
try {
|
|
11923
12261
|
while (currentIteration < this.maxIterations) {
|
|
11924
12262
|
if (await this.checkAbortAndNotify(currentIteration)) {
|
|
@@ -11933,8 +12271,6 @@ var init_agent = __esm({
|
|
|
11933
12271
|
});
|
|
11934
12272
|
}
|
|
11935
12273
|
this.logger.debug("Starting iteration", { iteration: currentIteration });
|
|
11936
|
-
let currentLLMNodeId;
|
|
11937
|
-
let llmOptions;
|
|
11938
12274
|
try {
|
|
11939
12275
|
const compactionEvent = await this.checkAndPerformCompaction(currentIteration);
|
|
11940
12276
|
if (compactionEvent) {
|
|
@@ -12086,14 +12422,54 @@ var init_agent = __esm({
|
|
|
12086
12422
|
reason: currentIteration >= this.maxIterations ? "max_iterations" : "natural_completion"
|
|
12087
12423
|
});
|
|
12088
12424
|
} finally {
|
|
12425
|
+
if (currentLLMNodeId) {
|
|
12426
|
+
const node = this.tree.getNode(currentLLMNodeId);
|
|
12427
|
+
if (node && node.type === "llm_call" && !node.completedAt) {
|
|
12428
|
+
await this.safeObserve(async () => {
|
|
12429
|
+
if (this.hooks.observers?.onLLMCallComplete) {
|
|
12430
|
+
const subagentContext = getSubagentContextForNode(this.tree, currentLLMNodeId);
|
|
12431
|
+
const context = {
|
|
12432
|
+
iteration: currentIteration,
|
|
12433
|
+
options: llmOptions ?? {
|
|
12434
|
+
model: this.model,
|
|
12435
|
+
messages: this.conversation.getMessages(),
|
|
12436
|
+
temperature: this.temperature,
|
|
12437
|
+
maxTokens: this.defaultMaxTokens
|
|
12438
|
+
},
|
|
12439
|
+
finishReason: "interrupted",
|
|
12440
|
+
usage: void 0,
|
|
12441
|
+
rawResponse: "",
|
|
12442
|
+
// No response available for interrupted request
|
|
12443
|
+
finalMessage: "",
|
|
12444
|
+
// No final message for interrupted request
|
|
12445
|
+
logger: this.logger,
|
|
12446
|
+
subagentContext
|
|
12447
|
+
};
|
|
12448
|
+
await this.hooks.observers.onLLMCallComplete(context);
|
|
12449
|
+
}
|
|
12450
|
+
});
|
|
12451
|
+
this.tree.completeLLMCall(currentLLMNodeId, {
|
|
12452
|
+
finishReason: "interrupted"
|
|
12453
|
+
});
|
|
12454
|
+
}
|
|
12455
|
+
}
|
|
12089
12456
|
unsubscribeBridge();
|
|
12090
12457
|
}
|
|
12091
12458
|
}
|
|
12092
12459
|
/**
|
|
12093
|
-
* Create LLM stream with
|
|
12094
|
-
*
|
|
12460
|
+
* Create LLM stream with two-layer rate limit protection:
|
|
12461
|
+
*
|
|
12462
|
+
* Layer 1 (Proactive): If rate limits are configured, delays requests to stay within limits.
|
|
12463
|
+
* Layer 2 (Reactive): Exponential backoff with Retry-After header support for transient failures.
|
|
12095
12464
|
*/
|
|
12096
12465
|
async createStreamWithRetry(llmOptions, iteration) {
|
|
12466
|
+
if (this.rateLimitTracker) {
|
|
12467
|
+
const throttleDelay = this.rateLimitTracker.getRequiredDelayMs();
|
|
12468
|
+
if (throttleDelay > 0) {
|
|
12469
|
+
this.logger.debug("Rate limit throttling", { delayMs: throttleDelay });
|
|
12470
|
+
await this.sleep(throttleDelay);
|
|
12471
|
+
}
|
|
12472
|
+
}
|
|
12097
12473
|
if (!this.retryConfig.enabled) {
|
|
12098
12474
|
return this.client.stream(llmOptions);
|
|
12099
12475
|
}
|
|
@@ -12105,11 +12481,23 @@ var init_agent = __esm({
|
|
|
12105
12481
|
randomize,
|
|
12106
12482
|
onRetry,
|
|
12107
12483
|
onRetriesExhausted,
|
|
12108
|
-
shouldRetry
|
|
12484
|
+
shouldRetry,
|
|
12485
|
+
respectRetryAfter,
|
|
12486
|
+
maxRetryAfterMs
|
|
12109
12487
|
} = this.retryConfig;
|
|
12488
|
+
let retryAfterHintMs = null;
|
|
12110
12489
|
try {
|
|
12111
12490
|
return await (0, import_p_retry.default)(
|
|
12112
12491
|
async (attemptNumber) => {
|
|
12492
|
+
if (retryAfterHintMs !== null && respectRetryAfter) {
|
|
12493
|
+
const cappedDelay = Math.min(retryAfterHintMs, maxRetryAfterMs);
|
|
12494
|
+
this.logger.debug("Using Retry-After delay", {
|
|
12495
|
+
retryAfterMs: retryAfterHintMs,
|
|
12496
|
+
cappedDelay
|
|
12497
|
+
});
|
|
12498
|
+
await this.sleep(cappedDelay);
|
|
12499
|
+
retryAfterHintMs = null;
|
|
12500
|
+
}
|
|
12113
12501
|
this.logger.debug("Creating LLM stream", {
|
|
12114
12502
|
attempt: attemptNumber,
|
|
12115
12503
|
maxAttempts: retries + 1
|
|
@@ -12125,9 +12513,21 @@ var init_agent = __esm({
|
|
|
12125
12513
|
signal: this.signal,
|
|
12126
12514
|
onFailedAttempt: (context) => {
|
|
12127
12515
|
const { error, attemptNumber, retriesLeft } = context;
|
|
12516
|
+
if (respectRetryAfter) {
|
|
12517
|
+
retryAfterHintMs = extractRetryAfterMs(error);
|
|
12518
|
+
if (retryAfterHintMs !== null) {
|
|
12519
|
+
this.logger.debug("Retry-After header detected", {
|
|
12520
|
+
delayMs: retryAfterHintMs
|
|
12521
|
+
});
|
|
12522
|
+
}
|
|
12523
|
+
}
|
|
12128
12524
|
this.logger.warn(
|
|
12129
12525
|
`LLM call failed (attempt ${attemptNumber}/${attemptNumber + retriesLeft}), retrying...`,
|
|
12130
|
-
{
|
|
12526
|
+
{
|
|
12527
|
+
error: error.message,
|
|
12528
|
+
retriesLeft,
|
|
12529
|
+
retryAfterMs: retryAfterHintMs
|
|
12530
|
+
}
|
|
12131
12531
|
);
|
|
12132
12532
|
onRetry?.(error, attemptNumber);
|
|
12133
12533
|
},
|
|
@@ -12148,6 +12548,12 @@ var init_agent = __esm({
|
|
|
12148
12548
|
throw error;
|
|
12149
12549
|
}
|
|
12150
12550
|
}
|
|
12551
|
+
/**
|
|
12552
|
+
* Simple sleep utility for rate limit delays.
|
|
12553
|
+
*/
|
|
12554
|
+
sleep(ms) {
|
|
12555
|
+
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
12556
|
+
}
|
|
12151
12557
|
/**
|
|
12152
12558
|
* Handle LLM error through controller.
|
|
12153
12559
|
*/
|
|
@@ -12386,12 +12792,18 @@ var init_agent = __esm({
|
|
|
12386
12792
|
}
|
|
12387
12793
|
/**
|
|
12388
12794
|
* Calculate cost and complete LLM call in execution tree.
|
|
12795
|
+
* Also records usage to rate limit tracker for proactive throttling.
|
|
12389
12796
|
*/
|
|
12390
12797
|
completeLLMCallInTree(nodeId, result) {
|
|
12798
|
+
const inputTokens = result.usage?.inputTokens ?? 0;
|
|
12799
|
+
const outputTokens = result.usage?.outputTokens ?? 0;
|
|
12800
|
+
if (this.rateLimitTracker) {
|
|
12801
|
+
this.rateLimitTracker.recordUsage(inputTokens, outputTokens);
|
|
12802
|
+
}
|
|
12391
12803
|
const llmCost = this.client.modelRegistry?.estimateCost?.(
|
|
12392
12804
|
this.model,
|
|
12393
|
-
|
|
12394
|
-
|
|
12805
|
+
inputTokens,
|
|
12806
|
+
outputTokens,
|
|
12395
12807
|
result.usage?.cachedInputTokens ?? 0,
|
|
12396
12808
|
result.usage?.cacheCreationInputTokens ?? 0
|
|
12397
12809
|
)?.totalCost;
|
|
@@ -12520,6 +12932,7 @@ __export(index_exports, {
|
|
|
12520
12932
|
DEFAULT_COMPACTION_CONFIG: () => DEFAULT_COMPACTION_CONFIG,
|
|
12521
12933
|
DEFAULT_HINTS: () => DEFAULT_HINTS,
|
|
12522
12934
|
DEFAULT_PROMPTS: () => DEFAULT_PROMPTS,
|
|
12935
|
+
DEFAULT_RATE_LIMIT_CONFIG: () => DEFAULT_RATE_LIMIT_CONFIG,
|
|
12523
12936
|
DEFAULT_RETRY_CONFIG: () => DEFAULT_RETRY_CONFIG,
|
|
12524
12937
|
DEFAULT_SUMMARIZATION_PROMPT: () => DEFAULT_SUMMARIZATION_PROMPT,
|
|
12525
12938
|
ExecutionTree: () => ExecutionTree,
|
|
@@ -12544,6 +12957,7 @@ __export(index_exports, {
|
|
|
12544
12957
|
ModelIdentifierParser: () => ModelIdentifierParser,
|
|
12545
12958
|
ModelRegistry: () => ModelRegistry,
|
|
12546
12959
|
OpenAIChatProvider: () => OpenAIChatProvider,
|
|
12960
|
+
RateLimitTracker: () => RateLimitTracker,
|
|
12547
12961
|
SimpleSessionManager: () => SimpleSessionManager,
|
|
12548
12962
|
SlidingWindowStrategy: () => SlidingWindowStrategy,
|
|
12549
12963
|
StreamProcessor: () => StreamProcessor,
|
|
@@ -12570,6 +12984,7 @@ __export(index_exports, {
|
|
|
12570
12984
|
detectImageMimeType: () => detectImageMimeType,
|
|
12571
12985
|
discoverProviderAdapters: () => discoverProviderAdapters,
|
|
12572
12986
|
extractMessageText: () => extractMessageText,
|
|
12987
|
+
extractRetryAfterMs: () => extractRetryAfterMs,
|
|
12573
12988
|
filterByDepth: () => filterByDepth,
|
|
12574
12989
|
filterByParent: () => filterByParent,
|
|
12575
12990
|
filterRootEvents: () => filterRootEvents,
|
|
@@ -12612,11 +13027,13 @@ __export(index_exports, {
|
|
|
12612
13027
|
parallelGadgetHint: () => parallelGadgetHint,
|
|
12613
13028
|
parseDataUrl: () => parseDataUrl,
|
|
12614
13029
|
parseManifest: () => parseManifest,
|
|
13030
|
+
parseRetryAfterHeader: () => parseRetryAfterHeader,
|
|
12615
13031
|
randomDelay: () => randomDelay,
|
|
12616
13032
|
resolveConfig: () => resolveConfig,
|
|
12617
13033
|
resolveHintTemplate: () => resolveHintTemplate,
|
|
12618
13034
|
resolveModel: () => resolveModel,
|
|
12619
13035
|
resolvePromptTemplate: () => resolvePromptTemplate,
|
|
13036
|
+
resolveRateLimitConfig: () => resolveRateLimitConfig,
|
|
12620
13037
|
resolveRetryConfig: () => resolveRetryConfig,
|
|
12621
13038
|
resolveRulesTemplate: () => resolveRulesTemplate,
|
|
12622
13039
|
resolveSubagentModel: () => resolveSubagentModel,
|
|
@@ -13592,6 +14009,7 @@ init_model_shortcuts();
|
|
|
13592
14009
|
init_options();
|
|
13593
14010
|
init_prompt_config();
|
|
13594
14011
|
init_quick_methods();
|
|
14012
|
+
init_rate_limit();
|
|
13595
14013
|
init_retry();
|
|
13596
14014
|
init_create_gadget();
|
|
13597
14015
|
init_exceptions();
|
|
@@ -14159,6 +14577,7 @@ function getHostExports2(ctx) {
|
|
|
14159
14577
|
DEFAULT_COMPACTION_CONFIG,
|
|
14160
14578
|
DEFAULT_HINTS,
|
|
14161
14579
|
DEFAULT_PROMPTS,
|
|
14580
|
+
DEFAULT_RATE_LIMIT_CONFIG,
|
|
14162
14581
|
DEFAULT_RETRY_CONFIG,
|
|
14163
14582
|
DEFAULT_SUMMARIZATION_PROMPT,
|
|
14164
14583
|
ExecutionTree,
|
|
@@ -14183,6 +14602,7 @@ function getHostExports2(ctx) {
|
|
|
14183
14602
|
ModelIdentifierParser,
|
|
14184
14603
|
ModelRegistry,
|
|
14185
14604
|
OpenAIChatProvider,
|
|
14605
|
+
RateLimitTracker,
|
|
14186
14606
|
SimpleSessionManager,
|
|
14187
14607
|
SlidingWindowStrategy,
|
|
14188
14608
|
StreamProcessor,
|
|
@@ -14209,6 +14629,7 @@ function getHostExports2(ctx) {
|
|
|
14209
14629
|
detectImageMimeType,
|
|
14210
14630
|
discoverProviderAdapters,
|
|
14211
14631
|
extractMessageText,
|
|
14632
|
+
extractRetryAfterMs,
|
|
14212
14633
|
filterByDepth,
|
|
14213
14634
|
filterByParent,
|
|
14214
14635
|
filterRootEvents,
|
|
@@ -14251,11 +14672,13 @@ function getHostExports2(ctx) {
|
|
|
14251
14672
|
parallelGadgetHint,
|
|
14252
14673
|
parseDataUrl,
|
|
14253
14674
|
parseManifest,
|
|
14675
|
+
parseRetryAfterHeader,
|
|
14254
14676
|
randomDelay,
|
|
14255
14677
|
resolveConfig,
|
|
14256
14678
|
resolveHintTemplate,
|
|
14257
14679
|
resolveModel,
|
|
14258
14680
|
resolvePromptTemplate,
|
|
14681
|
+
resolveRateLimitConfig,
|
|
14259
14682
|
resolveRetryConfig,
|
|
14260
14683
|
resolveRulesTemplate,
|
|
14261
14684
|
resolveSubagentModel,
|