@illuma-ai/agents 1.1.0 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -80,6 +80,15 @@ export declare class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode>
80
80
  private _pruneCalibration;
81
81
  /** Run-scoped tool discovery cache — avoids re-parsing conversation history on every iteration */
82
82
  private _toolDiscoveryCache;
83
+ /**
84
+ * SCALE: Tracks whether a summary call is already in-flight for this Graph instance.
85
+ * Prevents multiple concurrent summary LLM calls when rapid tool iterations each
86
+ * trigger pruning. At 2000 users with 3+ tool calls per turn, this prevents
87
+ * 6000+ summary calls/turn from becoming 2000.
88
+ */
89
+ private _summaryInFlight;
90
+ /** Messages accumulated across tool iterations while a summary call is in-flight */
91
+ private _pendingMessagesToRefine;
83
92
  /** Map of agent contexts by agent ID */
84
93
  agentContexts: Map<string, AgentContext>;
85
94
  /** Default agent ID to use */
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@illuma-ai/agents",
3
- "version": "1.1.0",
3
+ "version": "1.1.1",
4
4
  "main": "./dist/cjs/main.cjs",
5
5
  "module": "./dist/esm/main.mjs",
6
6
  "types": "./dist/types/index.d.ts",
@@ -218,6 +218,15 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
218
218
  private _pruneCalibration: PruneCalibrationState;
219
219
  /** Run-scoped tool discovery cache — avoids re-parsing conversation history on every iteration */
220
220
  private _toolDiscoveryCache: ToolDiscoveryCache;
221
+ /**
222
+ * SCALE: Tracks whether a summary call is already in-flight for this Graph instance.
223
+ * Prevents multiple concurrent summary LLM calls when rapid tool iterations each
224
+ * trigger pruning. At 2000 users with 3+ tool calls per turn, this prevents
225
+ * 6000+ summary calls/turn from becoming 2000.
226
+ */
227
+ private _summaryInFlight: boolean = false;
228
+ /** Messages accumulated across tool iterations while a summary call is in-flight */
229
+ private _pendingMessagesToRefine: BaseMessage[] = [];
221
230
  /** Map of agent contexts by agent ID */
222
231
  agentContexts: Map<string, AgentContext> = new Map();
223
232
  /** Default agent ID to use */
@@ -301,9 +310,11 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
301
310
  new Map()
302
311
  );
303
312
  this.invokedToolIds = resetIfNotEmpty(this.invokedToolIds, undefined);
304
- // Reset EMA calibration and tool discovery cache for fresh run
313
+ // Reset EMA calibration, tool discovery cache, and summary debounce for fresh run
305
314
  this._pruneCalibration = createPruneCalibration();
306
315
  this._toolDiscoveryCache.reset();
316
+ this._summaryInFlight = false;
317
+ this._pendingMessagesToRefine = [];
307
318
  for (const context of this.agentContexts.values()) {
308
319
  context.reset();
309
320
  }
@@ -1598,20 +1609,39 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
1598
1609
  `[Graph:ContextMgmt] Pruned ${messages.length}→${context.length} msgs (${messagesToRefine.length} discarded) | summary=${summarySource}${summary ? ` (len=${summary.length})` : ''} | calibration=${this._pruneCalibration.ratio.toFixed(3)}(${this._pruneCalibration.iterations})`
1599
1610
  );
1600
1611
 
1601
- // Fire background summarization — updates cache for next iteration/turn
1602
- agentContext
1603
- .summarizeCallback(messagesToRefine)
1604
- .then((updated) => {
1605
- if (updated != null && updated !== '') {
1606
- this._cachedRunSummary = updated;
1607
- }
1608
- })
1609
- .catch((err) => {
1610
- console.error(
1611
- '[Graph] Background summary failed (non-fatal):',
1612
- err
1613
- );
1614
- });
1612
+ // SCALE: Debounce background summarization — if a summary call is already
1613
+ // in-flight (from a prior tool iteration), accumulate messages instead of
1614
+ // firing another concurrent LLM call. At 2000 users with 3+ tool calls
1615
+ // per turn, this prevents 3x summary call volume.
1616
+ if (this._summaryInFlight) {
1617
+ this._pendingMessagesToRefine.push(...messagesToRefine);
1618
+ console.debug(
1619
+ `[Graph:ContextMgmt] Summary in-flight, queued ${messagesToRefine.length} msgs (pending=${this._pendingMessagesToRefine.length})`
1620
+ );
1621
+ } else {
1622
+ this._summaryInFlight = true;
1623
+ const allMessages = this._pendingMessagesToRefine.length > 0
1624
+ ? [...this._pendingMessagesToRefine, ...messagesToRefine]
1625
+ : messagesToRefine;
1626
+ this._pendingMessagesToRefine = [];
1627
+
1628
+ agentContext
1629
+ .summarizeCallback(allMessages)
1630
+ .then((updated) => {
1631
+ if (updated != null && updated !== '') {
1632
+ this._cachedRunSummary = updated;
1633
+ }
1634
+ })
1635
+ .catch((err) => {
1636
+ console.error(
1637
+ '[Graph] Background summary failed (non-fatal):',
1638
+ err
1639
+ );
1640
+ })
1641
+ .finally(() => {
1642
+ this._summaryInFlight = false;
1643
+ });
1644
+ }
1615
1645
 
1616
1646
  if (summary != null && summary !== '') {
1617
1647
  hasSummary = true;