llmist 15.8.0 → 15.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -1487,6 +1487,8 @@ var init_rate_limit = __esm({
1487
1487
  dailyTokens = 0;
1488
1488
  /** Date string (YYYY-MM-DD UTC) for daily reset tracking */
1489
1489
  dailyResetDate;
1490
+ /** Count of pending reservations (for backward compatibility) */
1491
+ pendingReservations = 0;
1490
1492
  constructor(config) {
1491
1493
  this.config = resolveRateLimitConfig(config);
1492
1494
  this.dailyResetDate = this.getCurrentDateUTC();
@@ -1494,13 +1496,21 @@ var init_rate_limit = __esm({
1494
1496
  /**
1495
1497
  * Record a completed request with its token usage.
1496
1498
  *
1499
+ * If reserveRequest() was called before the LLM call (recommended for concurrent
1500
+ * scenarios), the request timestamp was already recorded. Otherwise, this method
1501
+ * will add it for backward compatibility.
1502
+ *
1497
1503
  * @param inputTokens - Number of input tokens used
1498
1504
  * @param outputTokens - Number of output tokens generated
1499
1505
  */
1500
1506
  recordUsage(inputTokens, outputTokens) {
1501
1507
  const now = Date.now();
1502
1508
  const totalTokens = inputTokens + outputTokens;
1503
- this.requestTimestamps.push(now);
1509
+ if (this.pendingReservations > 0) {
1510
+ this.pendingReservations--;
1511
+ } else {
1512
+ this.requestTimestamps.push(now);
1513
+ }
1504
1514
  this.tokenUsage.push({ timestamp: now, tokens: totalTokens });
1505
1515
  this.checkDailyReset();
1506
1516
  this.dailyTokens += totalTokens;
@@ -1632,6 +1642,7 @@ var init_rate_limit = __esm({
1632
1642
  this.tokenUsage = [];
1633
1643
  this.dailyTokens = 0;
1634
1644
  this.dailyResetDate = this.getCurrentDateUTC();
1645
+ this.pendingReservations = 0;
1635
1646
  }
1636
1647
  /**
1637
1648
  * Update configuration dynamically.
@@ -1642,6 +1653,37 @@ var init_rate_limit = __esm({
1642
1653
  updateConfig(config) {
1643
1654
  this.config = resolveRateLimitConfig(config);
1644
1655
  }
1656
+ /**
1657
+ * Reserve a request slot before making an LLM call.
1658
+ *
1659
+ * This is critical for concurrent subagents sharing a rate limiter.
1660
+ * Without reservation, multiple subagents checking getRequiredDelayMs()
1661
+ * simultaneously would all see zero usage and proceed, causing rate limit errors.
1662
+ *
1663
+ * Call this AFTER waiting for getRequiredDelayMs() but BEFORE making the LLM call.
1664
+ * The reservation ensures subsequent concurrent checks see the pending request.
1665
+ *
1666
+ * @example
1667
+ * ```typescript
1668
+ * // Proactive rate limiting with reservation
1669
+ * const delay = tracker.getRequiredDelayMs();
1670
+ * if (delay > 0) await sleep(delay);
1671
+ *
1672
+ * tracker.reserveRequest(); // Claim slot BEFORE making call
1673
+ * try {
1674
+ * const result = await llm.call();
1675
+ * tracker.recordUsage(result.inputTokens, result.outputTokens);
1676
+ * } catch (error) {
1677
+ * // Request already reserved; recordUsage updates token count
1678
+ * throw error;
1679
+ * }
1680
+ * ```
1681
+ */
1682
+ reserveRequest() {
1683
+ const now = Date.now();
1684
+ this.requestTimestamps.push(now);
1685
+ this.pendingReservations++;
1686
+ }
1645
1687
  // ─────────────────────────────────────────────────────────────────────────
1646
1688
  // Private methods
1647
1689
  // ─────────────────────────────────────────────────────────────────────────
@@ -13345,6 +13387,7 @@ var init_agent = __esm({
13345
13387
  });
13346
13388
  await this.sleep(throttleDelay);
13347
13389
  }
13390
+ this.rateLimitTracker.reserveRequest();
13348
13391
  }
13349
13392
  return this.client.stream(llmOptions);
13350
13393
  }