npm - @illuma-ai/agents - Versions diffs - 1.1.0 → 1.1.2 - Mend

@illuma-ai/agents 1.1.0 → 1.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

package/dist/cjs/common/constants.cjs +12 -0
package/dist/cjs/common/constants.cjs.map +1 -1
package/dist/cjs/graphs/Graph.cjs +81 -12
package/dist/cjs/graphs/Graph.cjs.map +1 -1
package/dist/cjs/main.cjs +1 -0
package/dist/cjs/main.cjs.map +1 -1
package/dist/esm/common/constants.mjs +12 -1
package/dist/esm/common/constants.mjs.map +1 -1
package/dist/esm/graphs/Graph.mjs +83 -14
package/dist/esm/graphs/Graph.mjs.map +1 -1
package/dist/esm/main.mjs +1 -1
package/dist/types/common/constants.d.ts +11 -0
package/dist/types/graphs/Graph.d.ts +9 -0
package/package.json +1 -1
package/src/common/constants.ts +12 -0
package/src/graphs/Graph.ts +108 -15
package/src/graphs/gapFeatures.test.ts +113 -0

package/dist/esm/main.mjs CHANGED Viewed

@@ -26,7 +26,7 @@ export { createSearchTool } from './tools/search/tool.mjs';
 export { DATE_RANGE, DEFAULT_COUNTRY_DESCRIPTION, DEFAULT_QUERY_DESCRIPTION, WebSearchToolDefinition, WebSearchToolDescription, WebSearchToolName, WebSearchToolSchema, countrySchema, dateSchema, imagesSchema, newsSchema, querySchema, videosSchema } from './tools/search/schema.mjs';
 export { createValidationErrorMessage, isValidJsonSchema, normalizeJsonSchema, prepareSchemaForProvider, validateStructuredOutput, zodToJsonSchema } from './schemas/validate.mjs';
 export { Callback, CommonEvents, Constants, ContentTypes, EdgeType, EnvVar, FinishReasons, GraphEvents, GraphNodeActions, GraphNodeKeys, MessageTypes, Providers, StepTypes, TitleMethod, ToolCallTypes } from './common/enum.mjs';
-export { CONTEXT_SAFETY_BUFFER, DEDUP_MAX_CONTENT_LENGTH, MIN_THINKING_BUDGET, MULTI_DOCUMENT_THRESHOLD, PRUNING_EMA_ALPHA, PRUNING_INITIAL_CALIBRATION, SUMMARIZATION_CONTEXT_THRESHOLD, SUMMARIZATION_RESERVE_RATIO, TOOL_DISCOVERY_CACHE_MAX_SIZE, TOOL_TURN_THINKING_BUDGET } from './common/constants.mjs';
+export { CONTEXT_SAFETY_BUFFER, DEDUP_MAX_CONTENT_LENGTH, MIN_THINKING_BUDGET, MULTI_DOCUMENT_THRESHOLD, PROACTIVE_SUMMARY_THRESHOLD, PRUNING_EMA_ALPHA, PRUNING_INITIAL_CALIBRATION, SUMMARIZATION_CONTEXT_THRESHOLD, SUMMARIZATION_RESERVE_RATIO, TOOL_DISCOVERY_CACHE_MAX_SIZE, TOOL_TURN_THINKING_BUDGET } from './common/constants.mjs';
 export { joinKeys, resetIfNotEmpty } from './utils/graph.mjs';
 export { isGoogleLike, isOpenAILike } from './utils/llm.mjs';
 export { isPresent, unescapeObject } from './utils/misc.mjs';

package/dist/types/common/constants.d.ts CHANGED Viewed

@@ -35,6 +35,17 @@ export declare const CONTEXT_SAFETY_BUFFER = 0.9;
  * When the context window is ≥80% full, pruning + summarization activates.
  */
 export declare const SUMMARIZATION_CONTEXT_THRESHOLD = 80;
+/**
+ * Proactive summarization threshold (0-1 fraction of context window).
+ * At this utilization level, background summarization fires BEFORE pruning is needed.
+ * This gives the summary time to complete so it's ready when context actually fills up.
+ *
+ * Inspired by VS Code Copilot Chat's 3-tier strategy:
+ *   80% → proactive background summary
+ *   90% → pruning kicks in (with summary already cached)
+ *  100% → graceful: use existing summary + recent messages, never block
+ */
+export declare const PROACTIVE_SUMMARY_THRESHOLD = 0.8;
 /**
  * Default reserve ratio (0-1) — fraction of context window to preserve as recent messages.
  * 0.3 means 30% of the context budget is reserved for the most recent messages,

package/dist/types/graphs/Graph.d.ts CHANGED Viewed

@@ -80,6 +80,15 @@ export declare class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode>
     private _pruneCalibration;
     /** Run-scoped tool discovery cache — avoids re-parsing conversation history on every iteration */
     private _toolDiscoveryCache;
+    /**
+     * SCALE: Tracks whether a summary call is already in-flight for this Graph instance.
+     * Prevents multiple concurrent summary LLM calls when rapid tool iterations each
+     * trigger pruning. At 2000 users with 3+ tool calls per turn, this prevents
+     * 6000+ summary calls/turn from becoming 2000.
+     */
+    private _summaryInFlight;
+    /** Messages accumulated across tool iterations while a summary call is in-flight */
+    private _pendingMessagesToRefine;
     /** Map of agent contexts by agent ID */
     agentContexts: Map<string, AgentContext>;
     /** Default agent ID to use */

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@illuma-ai/agents",
-  "version": "1.1.0",
+  "version": "1.1.2",
   "main": "./dist/cjs/main.cjs",
   "module": "./dist/esm/main.mjs",
   "types": "./dist/types/index.d.ts",

package/src/common/constants.ts CHANGED Viewed

@@ -59,6 +59,18 @@ export const CONTEXT_SAFETY_BUFFER = 0.9;
  */
 export const SUMMARIZATION_CONTEXT_THRESHOLD = 80;
+/**
+ * Proactive summarization threshold (0-1 fraction of context window).
+ * At this utilization level, background summarization fires BEFORE pruning is needed.
+ * This gives the summary time to complete so it's ready when context actually fills up.
+ *
+ * Inspired by VS Code Copilot Chat's 3-tier strategy:
+ *   80% → proactive background summary
+ *   90% → pruning kicks in (with summary already cached)
+ *  100% → graceful: use existing summary + recent messages, never block
+ */
+export const PROACTIVE_SUMMARY_THRESHOLD = 0.8;
 /**
  * Default reserve ratio (0-1) — fraction of context window to preserve as recent messages.
  * 0.3 means 30% of the context budget is reserved for the most recent messages,

package/src/graphs/Graph.ts CHANGED Viewed

@@ -35,6 +35,7 @@ import {
   formatAnthropicArtifactContent,
   ensureThinkingBlockInMessages,
   deduplicateSystemMessages,
+  getContextUtilization,
   convertMessagesToContent,
   addBedrockCacheControl,
   modifyDeltaProperties,
@@ -54,6 +55,7 @@ import {
   Constants,
   TOOL_TURN_THINKING_BUDGET,
   SUMMARIZATION_CONTEXT_THRESHOLD,
+  PROACTIVE_SUMMARY_THRESHOLD,
 } from '@/common';
 import {
   ToolDiscoveryCache,
@@ -218,6 +220,15 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
   private _pruneCalibration: PruneCalibrationState;
   /** Run-scoped tool discovery cache — avoids re-parsing conversation history on every iteration */
   private _toolDiscoveryCache: ToolDiscoveryCache;
+  /**
+   * SCALE: Tracks whether a summary call is already in-flight for this Graph instance.
+   * Prevents multiple concurrent summary LLM calls when rapid tool iterations each
+   * trigger pruning. At 2000 users with 3+ tool calls per turn, this prevents
+   * 6000+ summary calls/turn from becoming 2000.
+   */
+  private _summaryInFlight: boolean = false;
+  /** Messages accumulated across tool iterations while a summary call is in-flight */
+  private _pendingMessagesToRefine: BaseMessage[] = [];
   /** Map of agent contexts by agent ID */
   agentContexts: Map<string, AgentContext> = new Map();
   /** Default agent ID to use */
@@ -301,9 +312,11 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
       new Map()
     );
     this.invokedToolIds = resetIfNotEmpty(this.invokedToolIds, undefined);
-    // Reset EMA calibration and tool discovery cache for fresh run
+    // Reset EMA calibration, tool discovery cache, and summary debounce for fresh run
     this._pruneCalibration = createPruneCalibration();
     this._toolDiscoveryCache.reset();
+    this._summaryInFlight = false;
+    this._pendingMessagesToRefine = [];
     for (const context of this.agentContexts.values()) {
       context.reset();
     }
@@ -1531,6 +1544,67 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
         }
       }
+      // ── Proactive summarization at context pressure ───────────────────
+      // Inspired by VS Code Copilot Chat's 3-tier strategy:
+      //   80% → fire proactive background summary (BEFORE pruning needed)
+      //   90% → pruning kicks in (summary already cached from 80% trigger)
+      //  100% → graceful: use existing summary + recent messages, NEVER block
+      //
+      // This ensures the summary is READY by the time pruning actually occurs,
+      // so the user never waits and never sees a context cliff.
+      if (
+        agentContext.maxContextTokens != null &&
+        agentContext.maxContextTokens > 0 &&
+        agentContext.summarizeCallback &&
+        !this._summaryInFlight &&
+        !this._cachedRunSummary
+      ) {
+        const utilization = getContextUtilization(
+          agentContext.indexTokenCountMap,
+          agentContext.instructionTokens,
+          agentContext.maxContextTokens
+        );
+        const threshold = (agentContext.summarizationConfig?.triggerThreshold ?? PROACTIVE_SUMMARY_THRESHOLD * 100);
+        if (utilization >= threshold) {
+          // Identify older messages to summarize proactively.
+          // Keep the last N messages (recent turns) intact — only summarize older history.
+          // This is incremental: the callback checks for existing summary and updates it.
+          const recentTurnCount = Math.max(4, Math.floor(messages.length * 0.3));
+          const oldMessages = messages.slice(
+            messages[0]?.getType() === 'system' ? 1 : 0,
+            Math.max(1, messages.length - recentTurnCount)
+          );
+          if (oldMessages.length > 0) {
+            this._summaryInFlight = true;
+            console.debug(
+              `[Graph:ProactiveSummary] Context at ${utilization.toFixed(1)}% (threshold ${threshold}%) — summarizing ${oldMessages.length} older msgs in background`
+            );
+            agentContext
+              .summarizeCallback(oldMessages)
+              .then((updated) => {
+                if (updated != null && updated !== '') {
+                  this._cachedRunSummary = updated;
+                  console.debug(
+                    `[Graph:ProactiveSummary] Background summary ready (len=${updated.length})`
+                  );
+                }
+              })
+              .catch((err) => {
+                console.error(
+                  '[Graph:ProactiveSummary] Background summary failed (non-fatal):',
+                  err
+                );
+              })
+              .finally(() => {
+                this._summaryInFlight = false;
+              });
+          }
+        }
+      }
       if (agentContext.pruneMessages) {
         const { context, indexTokenCountMap, messagesToRefine } =
           agentContext.pruneMessages({
@@ -1598,20 +1672,39 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
               `[Graph:ContextMgmt] Pruned ${messages.length}→${context.length} msgs (${messagesToRefine.length} discarded) | summary=${summarySource}${summary ? ` (len=${summary.length})` : ''} | calibration=${this._pruneCalibration.ratio.toFixed(3)}(${this._pruneCalibration.iterations})`
             );
-            // Fire background summarization — updates cache for next iteration/turn
-            agentContext
-              .summarizeCallback(messagesToRefine)
-              .then((updated) => {
-                if (updated != null && updated !== '') {
-                  this._cachedRunSummary = updated;
-                }
-              })
-              .catch((err) => {
-                console.error(
-                  '[Graph] Background summary failed (non-fatal):',
-                  err
-                );
-              });
+            // SCALE: Debounce background summarization — if a summary call is already
+            // in-flight (from a prior tool iteration), accumulate messages instead of
+            // firing another concurrent LLM call. At 2000 users with 3+ tool calls
+            // per turn, this prevents 3x summary call volume.
+            if (this._summaryInFlight) {
+              this._pendingMessagesToRefine.push(...messagesToRefine);
+              console.debug(
+                `[Graph:ContextMgmt] Summary in-flight, queued ${messagesToRefine.length} msgs (pending=${this._pendingMessagesToRefine.length})`
+              );
+            } else {
+              this._summaryInFlight = true;
+              const allMessages = this._pendingMessagesToRefine.length > 0
+                ? [...this._pendingMessagesToRefine, ...messagesToRefine]
+                : messagesToRefine;
+              this._pendingMessagesToRefine = [];
+              agentContext
+                .summarizeCallback(allMessages)
+                .then((updated) => {
+                  if (updated != null && updated !== '') {
+                    this._cachedRunSummary = updated;
+                  }
+                })
+                .catch((err) => {
+                  console.error(
+                    '[Graph] Background summary failed (non-fatal):',
+                    err
+                  );
+                })
+                .finally(() => {
+                  this._summaryInFlight = false;
+                });
+            }
             if (summary != null && summary !== '') {
               hasSummary = true;

package/src/graphs/gapFeatures.test.ts CHANGED Viewed

@@ -518,3 +518,116 @@ describe('All Features Combined — Full Pipeline', () => {
     expect(callback).toHaveBeenCalled();
   });
 });
+// ===========================================================================
+// Proactive Summarization — Context Pressure
+// ===========================================================================
+import { getContextUtilization } from '@/messages/prune';
+import { PROACTIVE_SUMMARY_THRESHOLD } from '@/common/constants';
+describe('Proactive Summarization — Context Pressure', () => {
+  it('triggers proactive summary at 80% utilization BEFORE pruning', () => {
+    // Simulate context at 82% utilization
+    const maxContextTokens = 200_000;
+    const indexTokenCountMap: Record<string, number | undefined> = {};
+    // Build messages that fill ~82% of context
+    const msgsNeeded = 40;
+    const tokensPerMsg = Math.floor((maxContextTokens * 0.82) / msgsNeeded);
+    for (let i = 0; i < msgsNeeded; i++) {
+      indexTokenCountMap[String(i)] = tokensPerMsg;
+    }
+    const utilization = getContextUtilization(indexTokenCountMap, 0, maxContextTokens);
+    const threshold = PROACTIVE_SUMMARY_THRESHOLD * 100; // 80
+    expect(utilization).toBeGreaterThanOrEqual(threshold);
+    // At 82%, proactive summary should fire
+    // But pruning should NOT have happened yet (context < 90% safety factor)
+    const effectiveBudget = Math.floor(maxContextTokens * 0.9); // CONTEXT_SAFETY_FACTOR
+    const totalTokens = Object.values(indexTokenCountMap).reduce((s, v) => (s ?? 0) + (v ?? 0), 0) as number;
+    expect(totalTokens).toBeLessThan(effectiveBudget);
+  });
+  it('does NOT trigger proactive summary below 80%', () => {
+    const maxContextTokens = 200_000;
+    const indexTokenCountMap: Record<string, number | undefined> = {};
+    // Fill to 50% utilization
+    const msgsNeeded = 20;
+    const tokensPerMsg = Math.floor((maxContextTokens * 0.5) / msgsNeeded);
+    for (let i = 0; i < msgsNeeded; i++) {
+      indexTokenCountMap[String(i)] = tokensPerMsg;
+    }
+    const utilization = getContextUtilization(indexTokenCountMap, 0, maxContextTokens);
+    expect(utilization).toBeLessThan(PROACTIVE_SUMMARY_THRESHOLD * 100);
+  });
+  it('selects only older messages for proactive summarization (keeps recent turns)', () => {
+    const messages: BaseMessage[] = [
+      new SystemMessage('System prompt'),
+      ...Array.from({ length: 20 }, (_, i) =>
+        i % 2 === 0
+          ? new HumanMessage(`User message ${i}`)
+          : new AIMessage(`AI response ${i}`)
+      ),
+    ];
+    // Simulate the selection logic from Graph.ts proactive summarization
+    const recentTurnCount = Math.max(4, Math.floor(messages.length * 0.3));
+    const oldMessages = messages.slice(
+      1, // skip system message
+      Math.max(1, messages.length - recentTurnCount)
+    );
+    // Recent 30% (~6 messages) preserved, older messages selected for summary
+    expect(oldMessages.length).toBeLessThan(messages.length);
+    expect(oldMessages.length).toBeGreaterThan(0);
+    // System message not included
+    expect(oldMessages[0].getType()).not.toBe('system');
+    // Last messages of conversation not included (recent turns preserved)
+    const lastOldIndex = messages.indexOf(oldMessages[oldMessages.length - 1]);
+    expect(lastOldIndex).toBeLessThan(messages.length - recentTurnCount);
+  });
+  it('never blocks — proactive summary is always fire-and-forget', async () => {
+    let resolveCallback: ((v: string) => void) | undefined;
+    const slowCallback = jest.fn(
+      () =>
+        new Promise<string>((resolve) => {
+          resolveCallback = resolve;
+        })
+    );
+    // Simulate proactive summary fire-and-forget
+    const summaryPromise = slowCallback().then((updated) => {
+      return updated;
+    });
+    // Main flow continues immediately — callback hasn't resolved yet
+    expect(slowCallback).toHaveBeenCalledTimes(1);
+    // Later, callback resolves (simulating Nova Micro responding)
+    resolveCallback!('Proactive summary result');
+    const result = await summaryPromise;
+    expect(result).toBe('Proactive summary result');
+  });
+  it('at 100%+ utilization, uses existing summary without throwing', () => {
+    const maxContextTokens = 200_000;
+    const cachedSummary = 'Previously generated summary of the conversation';
+    // Context is at 105% (over budget)
+    const indexTokenCountMap: Record<string, number | undefined> = {
+      '0': 210_000, // system + everything
+    };
+    const utilization = getContextUtilization(indexTokenCountMap, 0, maxContextTokens);
+    expect(utilization).toBeGreaterThan(100);
+    // Even at 100%+, we use the existing cached summary — no error thrown
+    expect(cachedSummary).toBeTruthy();
+    // Pruning will remove oldest messages to fit, and inject cached summary
+    // The key: no blocking, no throwing, just graceful degradation
+  });
+});