llmist 15.7.1 → 15.8.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +86 -10
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +100 -1
- package/dist/index.d.ts +100 -1
- package/dist/index.js +86 -10
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/dist/index.cjs
CHANGED
|
@@ -1487,6 +1487,8 @@ var init_rate_limit = __esm({
|
|
|
1487
1487
|
dailyTokens = 0;
|
|
1488
1488
|
/** Date string (YYYY-MM-DD UTC) for daily reset tracking */
|
|
1489
1489
|
dailyResetDate;
|
|
1490
|
+
/** Count of pending reservations (for backward compatibility) */
|
|
1491
|
+
pendingReservations = 0;
|
|
1490
1492
|
constructor(config) {
|
|
1491
1493
|
this.config = resolveRateLimitConfig(config);
|
|
1492
1494
|
this.dailyResetDate = this.getCurrentDateUTC();
|
|
@@ -1494,13 +1496,21 @@ var init_rate_limit = __esm({
|
|
|
1494
1496
|
/**
|
|
1495
1497
|
* Record a completed request with its token usage.
|
|
1496
1498
|
*
|
|
1499
|
+
* If reserveRequest() was called before the LLM call (recommended for concurrent
|
|
1500
|
+
* scenarios), the request timestamp was already recorded. Otherwise, this method
|
|
1501
|
+
* will add it for backward compatibility.
|
|
1502
|
+
*
|
|
1497
1503
|
* @param inputTokens - Number of input tokens used
|
|
1498
1504
|
* @param outputTokens - Number of output tokens generated
|
|
1499
1505
|
*/
|
|
1500
1506
|
recordUsage(inputTokens, outputTokens) {
|
|
1501
1507
|
const now = Date.now();
|
|
1502
1508
|
const totalTokens = inputTokens + outputTokens;
|
|
1503
|
-
this.
|
|
1509
|
+
if (this.pendingReservations > 0) {
|
|
1510
|
+
this.pendingReservations--;
|
|
1511
|
+
} else {
|
|
1512
|
+
this.requestTimestamps.push(now);
|
|
1513
|
+
}
|
|
1504
1514
|
this.tokenUsage.push({ timestamp: now, tokens: totalTokens });
|
|
1505
1515
|
this.checkDailyReset();
|
|
1506
1516
|
this.dailyTokens += totalTokens;
|
|
@@ -1632,6 +1642,7 @@ var init_rate_limit = __esm({
|
|
|
1632
1642
|
this.tokenUsage = [];
|
|
1633
1643
|
this.dailyTokens = 0;
|
|
1634
1644
|
this.dailyResetDate = this.getCurrentDateUTC();
|
|
1645
|
+
this.pendingReservations = 0;
|
|
1635
1646
|
}
|
|
1636
1647
|
/**
|
|
1637
1648
|
* Update configuration dynamically.
|
|
@@ -1642,6 +1653,37 @@ var init_rate_limit = __esm({
|
|
|
1642
1653
|
updateConfig(config) {
|
|
1643
1654
|
this.config = resolveRateLimitConfig(config);
|
|
1644
1655
|
}
|
|
1656
|
+
/**
|
|
1657
|
+
* Reserve a request slot before making an LLM call.
|
|
1658
|
+
*
|
|
1659
|
+
* This is critical for concurrent subagents sharing a rate limiter.
|
|
1660
|
+
* Without reservation, multiple subagents checking getRequiredDelayMs()
|
|
1661
|
+
* simultaneously would all see zero usage and proceed, causing rate limit errors.
|
|
1662
|
+
*
|
|
1663
|
+
* Call this AFTER waiting for getRequiredDelayMs() but BEFORE making the LLM call.
|
|
1664
|
+
* The reservation ensures subsequent concurrent checks see the pending request.
|
|
1665
|
+
*
|
|
1666
|
+
* @example
|
|
1667
|
+
* ```typescript
|
|
1668
|
+
* // Proactive rate limiting with reservation
|
|
1669
|
+
* const delay = tracker.getRequiredDelayMs();
|
|
1670
|
+
* if (delay > 0) await sleep(delay);
|
|
1671
|
+
*
|
|
1672
|
+
* tracker.reserveRequest(); // Claim slot BEFORE making call
|
|
1673
|
+
* try {
|
|
1674
|
+
* const result = await llm.call();
|
|
1675
|
+
* tracker.recordUsage(result.inputTokens, result.outputTokens);
|
|
1676
|
+
* } catch (error) {
|
|
1677
|
+
* // Request already reserved; recordUsage updates token count
|
|
1678
|
+
* throw error;
|
|
1679
|
+
* }
|
|
1680
|
+
* ```
|
|
1681
|
+
*/
|
|
1682
|
+
reserveRequest() {
|
|
1683
|
+
const now = Date.now();
|
|
1684
|
+
this.requestTimestamps.push(now);
|
|
1685
|
+
this.pendingReservations++;
|
|
1686
|
+
}
|
|
1645
1687
|
// ─────────────────────────────────────────────────────────────────────────
|
|
1646
1688
|
// Private methods
|
|
1647
1689
|
// ─────────────────────────────────────────────────────────────────────────
|
|
@@ -9131,6 +9173,13 @@ var init_builder = __esm({
|
|
|
9131
9173
|
// When a gadget calls withParentContext(ctx), these observers are
|
|
9132
9174
|
// also called for gadget events in the subagent
|
|
9133
9175
|
parentObservers;
|
|
9176
|
+
// Shared rate limit tracker from parent for coordinated throttling
|
|
9177
|
+
// When a gadget calls withParentContext(ctx), this tracker is shared
|
|
9178
|
+
// so all agents in the tree respect aggregate RPM/TPM limits
|
|
9179
|
+
sharedRateLimitTracker;
|
|
9180
|
+
// Shared retry config from parent for consistent backoff behavior
|
|
9181
|
+
// When a gadget calls withParentContext(ctx), this config is shared
|
|
9182
|
+
sharedRetryConfig;
|
|
9134
9183
|
constructor(client) {
|
|
9135
9184
|
this.client = client;
|
|
9136
9185
|
}
|
|
@@ -9777,6 +9826,12 @@ var init_builder = __esm({
|
|
|
9777
9826
|
if (ctx.parentObservers && !this.parentObservers) {
|
|
9778
9827
|
this.parentObservers = ctx.parentObservers;
|
|
9779
9828
|
}
|
|
9829
|
+
if (ctx.rateLimitTracker && !this.sharedRateLimitTracker) {
|
|
9830
|
+
this.sharedRateLimitTracker = ctx.rateLimitTracker;
|
|
9831
|
+
}
|
|
9832
|
+
if (ctx.retryConfig && !this.sharedRetryConfig) {
|
|
9833
|
+
this.sharedRetryConfig = ctx.retryConfig;
|
|
9834
|
+
}
|
|
9780
9835
|
return this;
|
|
9781
9836
|
}
|
|
9782
9837
|
/**
|
|
@@ -10156,7 +10211,10 @@ ${endPrefix}`
|
|
|
10156
10211
|
parentNodeId: this.parentContext?.nodeId,
|
|
10157
10212
|
baseDepth: this.parentContext ? (this.parentContext.depth ?? 0) + 1 : 0,
|
|
10158
10213
|
// Parent observer hooks for subagent visibility
|
|
10159
|
-
parentObservers: this.parentObservers
|
|
10214
|
+
parentObservers: this.parentObservers,
|
|
10215
|
+
// Shared rate limit tracker and retry config (for coordinated limits across subagents)
|
|
10216
|
+
sharedRateLimitTracker: this.sharedRateLimitTracker,
|
|
10217
|
+
sharedRetryConfig: this.sharedRetryConfig
|
|
10160
10218
|
};
|
|
10161
10219
|
return new Agent(AGENT_INTERNAL_KEY, options);
|
|
10162
10220
|
}
|
|
@@ -11068,7 +11126,7 @@ var init_executor = __esm({
|
|
|
11068
11126
|
init_parser();
|
|
11069
11127
|
init_typed_gadget();
|
|
11070
11128
|
GadgetExecutor = class {
|
|
11071
|
-
constructor(registry, requestHumanInput, logger, defaultGadgetTimeoutMs, errorFormatterOptions, client, mediaStore, agentConfig, subagentConfig, tree, parentNodeId, baseDepth, parentObservers, currentObservers) {
|
|
11129
|
+
constructor(registry, requestHumanInput, logger, defaultGadgetTimeoutMs, errorFormatterOptions, client, mediaStore, agentConfig, subagentConfig, tree, parentNodeId, baseDepth, parentObservers, currentObservers, rateLimitTracker, retryConfig) {
|
|
11072
11130
|
this.registry = registry;
|
|
11073
11131
|
this.requestHumanInput = requestHumanInput;
|
|
11074
11132
|
this.defaultGadgetTimeoutMs = defaultGadgetTimeoutMs;
|
|
@@ -11081,6 +11139,8 @@ var init_executor = __esm({
|
|
|
11081
11139
|
this.baseDepth = baseDepth;
|
|
11082
11140
|
this.parentObservers = parentObservers;
|
|
11083
11141
|
this.currentObservers = currentObservers;
|
|
11142
|
+
this.rateLimitTracker = rateLimitTracker;
|
|
11143
|
+
this.retryConfig = retryConfig;
|
|
11084
11144
|
this.logger = logger ?? createLogger({ name: "llmist:executor" });
|
|
11085
11145
|
this.errorFormatter = new GadgetExecutionErrorFormatter(errorFormatterOptions);
|
|
11086
11146
|
this.argPrefix = errorFormatterOptions?.argPrefix ?? GADGET_ARG_PREFIX;
|
|
@@ -11251,7 +11311,11 @@ var init_executor = __esm({
|
|
|
11251
11311
|
// When a subagent uses withParentContext(ctx), it will receive these
|
|
11252
11312
|
// and call them for gadget events in addition to its own hooks
|
|
11253
11313
|
// Merge child and parent observers so both get called (child first, then parent)
|
|
11254
|
-
parentObservers: mergeObservers(this.currentObservers, this.parentObservers)
|
|
11314
|
+
parentObservers: mergeObservers(this.currentObservers, this.parentObservers),
|
|
11315
|
+
// Shared rate limit tracker for coordinated throttling across subagents
|
|
11316
|
+
rateLimitTracker: this.rateLimitTracker,
|
|
11317
|
+
// Shared retry config for consistent backoff behavior across subagents
|
|
11318
|
+
retryConfig: this.retryConfig
|
|
11255
11319
|
};
|
|
11256
11320
|
let rawResult;
|
|
11257
11321
|
if (timeoutMs && timeoutMs > 0) {
|
|
@@ -11792,7 +11856,11 @@ var init_stream_processor = __esm({
|
|
|
11792
11856
|
// Parent observer hooks for subagent visibility
|
|
11793
11857
|
options.parentObservers,
|
|
11794
11858
|
// Current agent's observers for subagent inheritance
|
|
11795
|
-
options.hooks?.observers
|
|
11859
|
+
options.hooks?.observers,
|
|
11860
|
+
// Shared rate limit tracker for coordinated throttling across subagents
|
|
11861
|
+
options.rateLimitTracker,
|
|
11862
|
+
// Shared retry config for consistent backoff behavior across subagents
|
|
11863
|
+
options.retryConfig
|
|
11796
11864
|
);
|
|
11797
11865
|
}
|
|
11798
11866
|
/**
|
|
@@ -12799,10 +12867,14 @@ var init_agent = __esm({
|
|
|
12799
12867
|
);
|
|
12800
12868
|
}
|
|
12801
12869
|
this.signal = options.signal;
|
|
12802
|
-
this.retryConfig = resolveRetryConfig(options.retryConfig);
|
|
12803
|
-
|
|
12804
|
-
|
|
12805
|
-
|
|
12870
|
+
this.retryConfig = options.sharedRetryConfig ?? resolveRetryConfig(options.retryConfig);
|
|
12871
|
+
if (options.sharedRateLimitTracker) {
|
|
12872
|
+
this.rateLimitTracker = options.sharedRateLimitTracker;
|
|
12873
|
+
} else {
|
|
12874
|
+
const rateLimitConfig = resolveRateLimitConfig(options.rateLimitConfig);
|
|
12875
|
+
if (rateLimitConfig.enabled) {
|
|
12876
|
+
this.rateLimitTracker = new RateLimitTracker(options.rateLimitConfig);
|
|
12877
|
+
}
|
|
12806
12878
|
}
|
|
12807
12879
|
this.agentContextConfig = {
|
|
12808
12880
|
model: this.model,
|
|
@@ -13092,7 +13164,10 @@ var init_agent = __esm({
|
|
|
13092
13164
|
priorCompletedInvocations: this.completedInvocationIds,
|
|
13093
13165
|
priorFailedInvocations: this.failedInvocationIds,
|
|
13094
13166
|
// Parent observer hooks for subagent visibility
|
|
13095
|
-
parentObservers: this.parentObservers
|
|
13167
|
+
parentObservers: this.parentObservers,
|
|
13168
|
+
// Shared rate limit tracker and retry config for subagents
|
|
13169
|
+
rateLimitTracker: this.rateLimitTracker,
|
|
13170
|
+
retryConfig: this.retryConfig
|
|
13096
13171
|
});
|
|
13097
13172
|
for await (const event of processor.process(stream2)) {
|
|
13098
13173
|
if (event.type === "stream_complete") {
|
|
@@ -13312,6 +13387,7 @@ var init_agent = __esm({
|
|
|
13312
13387
|
});
|
|
13313
13388
|
await this.sleep(throttleDelay);
|
|
13314
13389
|
}
|
|
13390
|
+
this.rateLimitTracker.reserveRequest();
|
|
13315
13391
|
}
|
|
13316
13392
|
return this.client.stream(llmOptions);
|
|
13317
13393
|
}
|