npm - @illuma-ai/agents - Versions diffs - 1.1.0 → 1.1.1 - Mend

@illuma-ai/agents 1.1.0 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/dist/cjs/graphs/Graph.cjs +40 -12
package/dist/cjs/graphs/Graph.cjs.map +1 -1
package/dist/esm/graphs/Graph.mjs +40 -12
package/dist/esm/graphs/Graph.mjs.map +1 -1
package/dist/types/graphs/Graph.d.ts +9 -0
package/package.json +1 -1
package/src/graphs/Graph.ts +45 -15

package/dist/types/graphs/Graph.d.ts CHANGED Viewed

@@ -80,6 +80,15 @@ export declare class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode>
     private _pruneCalibration;
     /** Run-scoped tool discovery cache — avoids re-parsing conversation history on every iteration */
     private _toolDiscoveryCache;
+    /**
+     * SCALE: Tracks whether a summary call is already in-flight for this Graph instance.
+     * Prevents multiple concurrent summary LLM calls when rapid tool iterations each
+     * trigger pruning. At 2000 users with 3+ tool calls per turn, this prevents
+     * 6000+ summary calls/turn from becoming 2000.
+     */
+    private _summaryInFlight;
+    /** Messages accumulated across tool iterations while a summary call is in-flight */
+    private _pendingMessagesToRefine;
     /** Map of agent contexts by agent ID */
     agentContexts: Map<string, AgentContext>;
     /** Default agent ID to use */

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@illuma-ai/agents",
-  "version": "1.1.0",
+  "version": "1.1.1",
   "main": "./dist/cjs/main.cjs",
   "module": "./dist/esm/main.mjs",
   "types": "./dist/types/index.d.ts",

package/src/graphs/Graph.ts CHANGED Viewed

@@ -218,6 +218,15 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
   private _pruneCalibration: PruneCalibrationState;
   /** Run-scoped tool discovery cache — avoids re-parsing conversation history on every iteration */
   private _toolDiscoveryCache: ToolDiscoveryCache;
+  /**
+   * SCALE: Tracks whether a summary call is already in-flight for this Graph instance.
+   * Prevents multiple concurrent summary LLM calls when rapid tool iterations each
+   * trigger pruning. At 2000 users with 3+ tool calls per turn, this prevents
+   * 6000+ summary calls/turn from becoming 2000.
+   */
+  private _summaryInFlight: boolean = false;
+  /** Messages accumulated across tool iterations while a summary call is in-flight */
+  private _pendingMessagesToRefine: BaseMessage[] = [];
   /** Map of agent contexts by agent ID */
   agentContexts: Map<string, AgentContext> = new Map();
   /** Default agent ID to use */
@@ -301,9 +310,11 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
       new Map()
     );
     this.invokedToolIds = resetIfNotEmpty(this.invokedToolIds, undefined);
-    // Reset EMA calibration and tool discovery cache for fresh run
+    // Reset EMA calibration, tool discovery cache, and summary debounce for fresh run
     this._pruneCalibration = createPruneCalibration();
     this._toolDiscoveryCache.reset();
+    this._summaryInFlight = false;
+    this._pendingMessagesToRefine = [];
     for (const context of this.agentContexts.values()) {
       context.reset();
     }
@@ -1598,20 +1609,39 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
               `[Graph:ContextMgmt] Pruned ${messages.length}→${context.length} msgs (${messagesToRefine.length} discarded) | summary=${summarySource}${summary ? ` (len=${summary.length})` : ''} | calibration=${this._pruneCalibration.ratio.toFixed(3)}(${this._pruneCalibration.iterations})`
             );
-            // Fire background summarization — updates cache for next iteration/turn
-            agentContext
-              .summarizeCallback(messagesToRefine)
-              .then((updated) => {
-                if (updated != null && updated !== '') {
-                  this._cachedRunSummary = updated;
-                }
-              })
-              .catch((err) => {
-                console.error(
-                  '[Graph] Background summary failed (non-fatal):',
-                  err
-                );
-              });
+            // SCALE: Debounce background summarization — if a summary call is already
+            // in-flight (from a prior tool iteration), accumulate messages instead of
+            // firing another concurrent LLM call. At 2000 users with 3+ tool calls
+            // per turn, this prevents 3x summary call volume.
+            if (this._summaryInFlight) {
+              this._pendingMessagesToRefine.push(...messagesToRefine);
+              console.debug(
+                `[Graph:ContextMgmt] Summary in-flight, queued ${messagesToRefine.length} msgs (pending=${this._pendingMessagesToRefine.length})`
+              );
+            } else {
+              this._summaryInFlight = true;
+              const allMessages = this._pendingMessagesToRefine.length > 0
+                ? [...this._pendingMessagesToRefine, ...messagesToRefine]
+                : messagesToRefine;
+              this._pendingMessagesToRefine = [];
+              agentContext
+                .summarizeCallback(allMessages)
+                .then((updated) => {
+                  if (updated != null && updated !== '') {
+                    this._cachedRunSummary = updated;
+                  }
+                })
+                .catch((err) => {
+                  console.error(
+                    '[Graph] Background summary failed (non-fatal):',
+                    err
+                  );
+                })
+                .finally(() => {
+                  this._summaryInFlight = false;
+                });
+            }
             if (summary != null && summary !== '') {
               hasSummary = true;