llmist 15.7.1 → 15.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -1487,6 +1487,8 @@ var init_rate_limit = __esm({
1487
1487
  dailyTokens = 0;
1488
1488
  /** Date string (YYYY-MM-DD UTC) for daily reset tracking */
1489
1489
  dailyResetDate;
1490
+ /** Count of pending reservations (for backward compatibility) */
1491
+ pendingReservations = 0;
1490
1492
  constructor(config) {
1491
1493
  this.config = resolveRateLimitConfig(config);
1492
1494
  this.dailyResetDate = this.getCurrentDateUTC();
@@ -1494,13 +1496,21 @@ var init_rate_limit = __esm({
1494
1496
  /**
1495
1497
  * Record a completed request with its token usage.
1496
1498
  *
1499
+ * If reserveRequest() was called before the LLM call (recommended for concurrent
1500
+ * scenarios), the request timestamp was already recorded. Otherwise, this method
1501
+ * will add it for backward compatibility.
1502
+ *
1497
1503
  * @param inputTokens - Number of input tokens used
1498
1504
  * @param outputTokens - Number of output tokens generated
1499
1505
  */
1500
1506
  recordUsage(inputTokens, outputTokens) {
1501
1507
  const now = Date.now();
1502
1508
  const totalTokens = inputTokens + outputTokens;
1503
- this.requestTimestamps.push(now);
1509
+ if (this.pendingReservations > 0) {
1510
+ this.pendingReservations--;
1511
+ } else {
1512
+ this.requestTimestamps.push(now);
1513
+ }
1504
1514
  this.tokenUsage.push({ timestamp: now, tokens: totalTokens });
1505
1515
  this.checkDailyReset();
1506
1516
  this.dailyTokens += totalTokens;
@@ -1632,6 +1642,7 @@ var init_rate_limit = __esm({
1632
1642
  this.tokenUsage = [];
1633
1643
  this.dailyTokens = 0;
1634
1644
  this.dailyResetDate = this.getCurrentDateUTC();
1645
+ this.pendingReservations = 0;
1635
1646
  }
1636
1647
  /**
1637
1648
  * Update configuration dynamically.
@@ -1642,6 +1653,37 @@ var init_rate_limit = __esm({
1642
1653
  updateConfig(config) {
1643
1654
  this.config = resolveRateLimitConfig(config);
1644
1655
  }
1656
+ /**
1657
+ * Reserve a request slot before making an LLM call.
1658
+ *
1659
+ * This is critical for concurrent subagents sharing a rate limiter.
1660
+ * Without reservation, multiple subagents checking getRequiredDelayMs()
1661
+ * simultaneously would all see zero usage and proceed, causing rate limit errors.
1662
+ *
1663
+ * Call this AFTER waiting for getRequiredDelayMs() but BEFORE making the LLM call.
1664
+ * The reservation ensures subsequent concurrent checks see the pending request.
1665
+ *
1666
+ * @example
1667
+ * ```typescript
1668
+ * // Proactive rate limiting with reservation
1669
+ * const delay = tracker.getRequiredDelayMs();
1670
+ * if (delay > 0) await sleep(delay);
1671
+ *
1672
+ * tracker.reserveRequest(); // Claim slot BEFORE making call
1673
+ * try {
1674
+ * const result = await llm.call();
1675
+ * tracker.recordUsage(result.inputTokens, result.outputTokens);
1676
+ * } catch (error) {
1677
+ * // Request already reserved; recordUsage updates token count
1678
+ * throw error;
1679
+ * }
1680
+ * ```
1681
+ */
1682
+ reserveRequest() {
1683
+ const now = Date.now();
1684
+ this.requestTimestamps.push(now);
1685
+ this.pendingReservations++;
1686
+ }
1645
1687
  // ─────────────────────────────────────────────────────────────────────────
1646
1688
  // Private methods
1647
1689
  // ─────────────────────────────────────────────────────────────────────────
@@ -9131,6 +9173,13 @@ var init_builder = __esm({
9131
9173
  // When a gadget calls withParentContext(ctx), these observers are
9132
9174
  // also called for gadget events in the subagent
9133
9175
  parentObservers;
9176
+ // Shared rate limit tracker from parent for coordinated throttling
9177
+ // When a gadget calls withParentContext(ctx), this tracker is shared
9178
+ // so all agents in the tree respect aggregate RPM/TPM limits
9179
+ sharedRateLimitTracker;
9180
+ // Shared retry config from parent for consistent backoff behavior
9181
+ // When a gadget calls withParentContext(ctx), this config is shared
9182
+ sharedRetryConfig;
9134
9183
  constructor(client) {
9135
9184
  this.client = client;
9136
9185
  }
@@ -9777,6 +9826,12 @@ var init_builder = __esm({
9777
9826
  if (ctx.parentObservers && !this.parentObservers) {
9778
9827
  this.parentObservers = ctx.parentObservers;
9779
9828
  }
9829
+ if (ctx.rateLimitTracker && !this.sharedRateLimitTracker) {
9830
+ this.sharedRateLimitTracker = ctx.rateLimitTracker;
9831
+ }
9832
+ if (ctx.retryConfig && !this.sharedRetryConfig) {
9833
+ this.sharedRetryConfig = ctx.retryConfig;
9834
+ }
9780
9835
  return this;
9781
9836
  }
9782
9837
  /**
@@ -10156,7 +10211,10 @@ ${endPrefix}`
10156
10211
  parentNodeId: this.parentContext?.nodeId,
10157
10212
  baseDepth: this.parentContext ? (this.parentContext.depth ?? 0) + 1 : 0,
10158
10213
  // Parent observer hooks for subagent visibility
10159
- parentObservers: this.parentObservers
10214
+ parentObservers: this.parentObservers,
10215
+ // Shared rate limit tracker and retry config (for coordinated limits across subagents)
10216
+ sharedRateLimitTracker: this.sharedRateLimitTracker,
10217
+ sharedRetryConfig: this.sharedRetryConfig
10160
10218
  };
10161
10219
  return new Agent(AGENT_INTERNAL_KEY, options);
10162
10220
  }
@@ -11068,7 +11126,7 @@ var init_executor = __esm({
11068
11126
  init_parser();
11069
11127
  init_typed_gadget();
11070
11128
  GadgetExecutor = class {
11071
- constructor(registry, requestHumanInput, logger, defaultGadgetTimeoutMs, errorFormatterOptions, client, mediaStore, agentConfig, subagentConfig, tree, parentNodeId, baseDepth, parentObservers, currentObservers) {
11129
+ constructor(registry, requestHumanInput, logger, defaultGadgetTimeoutMs, errorFormatterOptions, client, mediaStore, agentConfig, subagentConfig, tree, parentNodeId, baseDepth, parentObservers, currentObservers, rateLimitTracker, retryConfig) {
11072
11130
  this.registry = registry;
11073
11131
  this.requestHumanInput = requestHumanInput;
11074
11132
  this.defaultGadgetTimeoutMs = defaultGadgetTimeoutMs;
@@ -11081,6 +11139,8 @@ var init_executor = __esm({
11081
11139
  this.baseDepth = baseDepth;
11082
11140
  this.parentObservers = parentObservers;
11083
11141
  this.currentObservers = currentObservers;
11142
+ this.rateLimitTracker = rateLimitTracker;
11143
+ this.retryConfig = retryConfig;
11084
11144
  this.logger = logger ?? createLogger({ name: "llmist:executor" });
11085
11145
  this.errorFormatter = new GadgetExecutionErrorFormatter(errorFormatterOptions);
11086
11146
  this.argPrefix = errorFormatterOptions?.argPrefix ?? GADGET_ARG_PREFIX;
@@ -11251,7 +11311,11 @@ var init_executor = __esm({
11251
11311
  // When a subagent uses withParentContext(ctx), it will receive these
11252
11312
  // and call them for gadget events in addition to its own hooks
11253
11313
  // Merge child and parent observers so both get called (child first, then parent)
11254
- parentObservers: mergeObservers(this.currentObservers, this.parentObservers)
11314
+ parentObservers: mergeObservers(this.currentObservers, this.parentObservers),
11315
+ // Shared rate limit tracker for coordinated throttling across subagents
11316
+ rateLimitTracker: this.rateLimitTracker,
11317
+ // Shared retry config for consistent backoff behavior across subagents
11318
+ retryConfig: this.retryConfig
11255
11319
  };
11256
11320
  let rawResult;
11257
11321
  if (timeoutMs && timeoutMs > 0) {
@@ -11792,7 +11856,11 @@ var init_stream_processor = __esm({
11792
11856
  // Parent observer hooks for subagent visibility
11793
11857
  options.parentObservers,
11794
11858
  // Current agent's observers for subagent inheritance
11795
- options.hooks?.observers
11859
+ options.hooks?.observers,
11860
+ // Shared rate limit tracker for coordinated throttling across subagents
11861
+ options.rateLimitTracker,
11862
+ // Shared retry config for consistent backoff behavior across subagents
11863
+ options.retryConfig
11796
11864
  );
11797
11865
  }
11798
11866
  /**
@@ -12799,10 +12867,14 @@ var init_agent = __esm({
12799
12867
  );
12800
12868
  }
12801
12869
  this.signal = options.signal;
12802
- this.retryConfig = resolveRetryConfig(options.retryConfig);
12803
- const rateLimitConfig = resolveRateLimitConfig(options.rateLimitConfig);
12804
- if (rateLimitConfig.enabled) {
12805
- this.rateLimitTracker = new RateLimitTracker(options.rateLimitConfig);
12870
+ this.retryConfig = options.sharedRetryConfig ?? resolveRetryConfig(options.retryConfig);
12871
+ if (options.sharedRateLimitTracker) {
12872
+ this.rateLimitTracker = options.sharedRateLimitTracker;
12873
+ } else {
12874
+ const rateLimitConfig = resolveRateLimitConfig(options.rateLimitConfig);
12875
+ if (rateLimitConfig.enabled) {
12876
+ this.rateLimitTracker = new RateLimitTracker(options.rateLimitConfig);
12877
+ }
12806
12878
  }
12807
12879
  this.agentContextConfig = {
12808
12880
  model: this.model,
@@ -13092,7 +13164,10 @@ var init_agent = __esm({
13092
13164
  priorCompletedInvocations: this.completedInvocationIds,
13093
13165
  priorFailedInvocations: this.failedInvocationIds,
13094
13166
  // Parent observer hooks for subagent visibility
13095
- parentObservers: this.parentObservers
13167
+ parentObservers: this.parentObservers,
13168
+ // Shared rate limit tracker and retry config for subagents
13169
+ rateLimitTracker: this.rateLimitTracker,
13170
+ retryConfig: this.retryConfig
13096
13171
  });
13097
13172
  for await (const event of processor.process(stream2)) {
13098
13173
  if (event.type === "stream_complete") {
@@ -13312,6 +13387,7 @@ var init_agent = __esm({
13312
13387
  });
13313
13388
  await this.sleep(throttleDelay);
13314
13389
  }
13390
+ this.rateLimitTracker.reserveRequest();
13315
13391
  }
13316
13392
  return this.client.stream(llmOptions);
13317
13393
  }