npm - @illuma-ai/agents - Versions diffs - 1.1.3 → 1.1.4 - Mend

@illuma-ai/agents 1.1.3 → 1.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

package/dist/cjs/common/constants.cjs +10 -0
package/dist/cjs/common/constants.cjs.map +1 -1
package/dist/cjs/graphs/Graph.cjs +58 -7
package/dist/cjs/graphs/Graph.cjs.map +1 -1
package/dist/cjs/main.cjs +1 -0
package/dist/cjs/main.cjs.map +1 -1
package/dist/esm/common/constants.mjs +10 -1
package/dist/esm/common/constants.mjs.map +1 -1
package/dist/esm/graphs/Graph.mjs +59 -8
package/dist/esm/graphs/Graph.mjs.map +1 -1
package/dist/esm/main.mjs +1 -1
package/dist/types/common/constants.d.ts +9 -0
package/package.json +1 -1
package/src/common/constants.ts +10 -0
package/src/graphs/Graph.ts +61 -7
package/src/graphs/gapFeatures.test.ts +93 -6

package/dist/esm/main.mjs CHANGED Viewed

@@ -26,7 +26,7 @@ export { createSearchTool } from './tools/search/tool.mjs';
 export { DATE_RANGE, DEFAULT_COUNTRY_DESCRIPTION, DEFAULT_QUERY_DESCRIPTION, WebSearchToolDefinition, WebSearchToolDescription, WebSearchToolName, WebSearchToolSchema, countrySchema, dateSchema, imagesSchema, newsSchema, querySchema, videosSchema } from './tools/search/schema.mjs';
 export { createValidationErrorMessage, isValidJsonSchema, normalizeJsonSchema, prepareSchemaForProvider, validateStructuredOutput, zodToJsonSchema } from './schemas/validate.mjs';
 export { Callback, CommonEvents, Constants, ContentTypes, EdgeType, EnvVar, FinishReasons, GraphEvents, GraphNodeActions, GraphNodeKeys, MessageTypes, Providers, StepTypes, TitleMethod, ToolCallTypes } from './common/enum.mjs';
-export { CONTEXT_SAFETY_BUFFER, DEDUP_MAX_CONTENT_LENGTH, MIN_THINKING_BUDGET, MULTI_DOCUMENT_THRESHOLD, PROACTIVE_SUMMARY_THRESHOLD, PRUNING_EMA_ALPHA, PRUNING_INITIAL_CALIBRATION, SUMMARIZATION_CONTEXT_THRESHOLD, SUMMARIZATION_RESERVE_RATIO, TOOL_DISCOVERY_CACHE_MAX_SIZE, TOOL_TURN_THINKING_BUDGET } from './common/constants.mjs';
+export { COMPACTION_RECENT_ROUNDS, CONTEXT_SAFETY_BUFFER, DEDUP_MAX_CONTENT_LENGTH, MIN_THINKING_BUDGET, MULTI_DOCUMENT_THRESHOLD, PROACTIVE_SUMMARY_THRESHOLD, PRUNING_EMA_ALPHA, PRUNING_INITIAL_CALIBRATION, SUMMARIZATION_CONTEXT_THRESHOLD, SUMMARIZATION_RESERVE_RATIO, TOOL_DISCOVERY_CACHE_MAX_SIZE, TOOL_TURN_THINKING_BUDGET } from './common/constants.mjs';
 export { joinKeys, resetIfNotEmpty } from './utils/graph.mjs';
 export { isGoogleLike, isOpenAILike } from './utils/llm.mjs';
 export { isPresent, unescapeObject } from './utils/misc.mjs';

package/dist/types/common/constants.d.ts CHANGED Viewed

@@ -46,6 +46,15 @@ export declare const SUMMARIZATION_CONTEXT_THRESHOLD = 80;
  *  100% → graceful: use existing summary + recent messages, never block
  */
 export declare const PROACTIVE_SUMMARY_THRESHOLD = 0.8;
+/**
+ * Number of recent conversation rounds (human+AI pairs) to keep in the
+ * windowed view when a summary is available. Everything older is covered
+ * by the summary. 2 rounds = last 2 user questions + 2 AI responses.
+ *
+ * This prevents wasting tokens on raw messages the summary already covers
+ * and keeps context tight for the LLM.
+ */
+export declare const COMPACTION_RECENT_ROUNDS = 2;
 /**
  * Default reserve ratio (0-1) — fraction of context window to preserve as recent messages.
  * 0.3 means 30% of the context budget is reserved for the most recent messages,

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@illuma-ai/agents",
-  "version": "1.1.3",
+  "version": "1.1.4",
   "main": "./dist/cjs/main.cjs",
   "module": "./dist/esm/main.mjs",
   "types": "./dist/types/index.d.ts",

package/src/common/constants.ts CHANGED Viewed

@@ -71,6 +71,16 @@ export const SUMMARIZATION_CONTEXT_THRESHOLD = 80;
  */
 export const PROACTIVE_SUMMARY_THRESHOLD = 0.8;
+/**
+ * Number of recent conversation rounds (human+AI pairs) to keep in the
+ * windowed view when a summary is available. Everything older is covered
+ * by the summary. 2 rounds = last 2 user questions + 2 AI responses.
+ *
+ * This prevents wasting tokens on raw messages the summary already covers
+ * and keeps context tight for the LLM.
+ */
+export const COMPACTION_RECENT_ROUNDS = 2;
 /**
  * Default reserve ratio (0-1) — fraction of context window to preserve as recent messages.
  * 0.3 means 30% of the context budget is reserved for the most recent messages,

package/src/graphs/Graph.ts CHANGED Viewed

@@ -56,6 +56,7 @@ import {
   TOOL_TURN_THINKING_BUDGET,
   SUMMARIZATION_CONTEXT_THRESHOLD,
   PROACTIVE_SUMMARY_THRESHOLD,
+  COMPACTION_RECENT_ROUNDS,
 } from '@/common';
 import {
   ToolDiscoveryCache,
@@ -1670,18 +1671,53 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
         // Budget for recent messages = total - system - summary - 3 (assistant priming)
         const recentBudget = calibratedMax - systemTokens - summaryTokens - 3;
-        // Step 3: Walk newest→oldest, collect messages that fit in the budget
+        // Step 3: Determine window of recent messages to include.
+        //
+        // Two modes:
+        // A) No summary available → fill the budget (all messages that fit)
+        // B) Summary available → keep last 2 conversation rounds (H+A pairs)
+        //    + any trailing tool messages. The summary covers everything else.
+        //    This avoids wasting tokens on raw messages the summary already covers.
+        //
+        // A "round" = one human message + one AI response (+ any tool messages between).
         const contentStart = systemMsg != null ? 1 : 0;
         let usedTokens = 0;
         let windowStart = messages.length; // index where the recent window begins
-        for (let i = messages.length - 1; i >= contentStart; i--) {
-          const msgTokens = agentContext.indexTokenCountMap[i] ?? 0;
-          if (usedTokens + msgTokens > recentBudget) {
-            break;
+        if (summary == null || summary === '') {
+          // Mode A: No summary — include as many recent messages as fit in budget
+          for (let i = messages.length - 1; i >= contentStart; i--) {
+            const msgTokens = agentContext.indexTokenCountMap[i] ?? 0;
+            if (usedTokens + msgTokens > recentBudget) {
+              break;
+            }
+            usedTokens += msgTokens;
+            windowStart = i;
+          }
+        } else {
+          // Mode B: Summary exists — keep last 2 rounds (4 core messages: H+A+H+A)
+          // Walk backward counting human messages as round boundaries.
+          const MAX_RECENT_ROUNDS = COMPACTION_RECENT_ROUNDS;
+          let roundsSeen = 0;
+          for (let i = messages.length - 1; i >= contentStart; i--) {
+            const msgType = messages[i]?.getType();
+            const msgTokens = agentContext.indexTokenCountMap[i] ?? 0;
+            // Budget guard — even in round-limited mode, don't exceed budget
+            if (usedTokens + msgTokens > recentBudget) {
+              break;
+            }
+            usedTokens += msgTokens;
+            windowStart = i;
+            // Count a human message as a round boundary
+            if (msgType === 'human') {
+              roundsSeen++;
+              if (roundsSeen >= MAX_RECENT_ROUNDS) {
+                break;
+              }
+            }
           }
-          usedTokens += msgTokens;
-          windowStart = i;
         }
         // Ensure we don't split tool-call / tool-result pairs.
@@ -1710,6 +1746,24 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
         viewParts.push(...recentMessages);
         messagesToUse = viewParts;
+        // Rebuild indexTokenCountMap for the windowed view so downstream
+        // analytics and summarization triggers see accurate token counts.
+        const viewTokenMap: Record<string, number | undefined> = {};
+        let viewIdx = 0;
+        if (systemMsg != null) {
+          viewTokenMap[viewIdx] = systemTokens;
+          viewIdx++;
+        }
+        if (summaryMsg != null) {
+          viewTokenMap[viewIdx] = summaryTokens;
+          viewIdx++;
+        }
+        for (let i = windowStart; i < messages.length; i++) {
+          viewTokenMap[viewIdx] = agentContext.indexTokenCountMap[i];
+          viewIdx++;
+        }
+        agentContext.indexTokenCountMap = viewTokenMap;
         console.debug(
           `[Graph:Compaction] View: ${messages.length}→${viewParts.length} msgs ` +
           `(${compactedMessages.length} behind summary, ${recentMessages.length} in window) | ` +

package/src/graphs/gapFeatures.test.ts CHANGED Viewed

@@ -636,11 +636,14 @@ describe('Proactive Summarization — Context Pressure', () => {
 // ===========================================================================
 import { applyCalibration as _applyCalibration } from '@/utils/pruneCalibration';
+import { COMPACTION_RECENT_ROUNDS } from '@/common/constants';
 describe('Context Compaction — Windowed View (no message deletion)', () => {
   /**
    * Simulates the compaction logic from Graph.ts without the full Graph instance.
-   * This tests the windowed-view algorithm directly.
+   * Mirrors the two modes:
+   *   A) No summary → fill budget with as many recent messages as fit
+   *   B) Summary exists → keep last COMPACTION_RECENT_ROUNDS rounds only
    */
   function buildWindowedView(opts: {
     messages: BaseMessage[];
@@ -663,11 +666,28 @@ describe('Context Compaction — Windowed View (no message deletion)', () => {
     let usedTokens = 0;
     let windowStart = messages.length;
-    for (let i = messages.length - 1; i >= contentStart; i--) {
-      const msgTokens = indexTokenCountMap[i] ?? 0;
-      if (usedTokens + msgTokens > recentBudget) break;
-      usedTokens += msgTokens;
-      windowStart = i;
+    if (!summary) {
+      // Mode A: No summary — fill budget
+      for (let i = messages.length - 1; i >= contentStart; i--) {
+        const msgTokens = indexTokenCountMap[i] ?? 0;
+        if (usedTokens + msgTokens > recentBudget) break;
+        usedTokens += msgTokens;
+        windowStart = i;
+      }
+    } else {
+      // Mode B: Summary exists — keep last N rounds
+      let roundsSeen = 0;
+      for (let i = messages.length - 1; i >= contentStart; i--) {
+        const msgType = messages[i]?.getType();
+        const msgTokens = indexTokenCountMap[i] ?? 0;
+        if (usedTokens + msgTokens > recentBudget) break;
+        usedTokens += msgTokens;
+        windowStart = i;
+        if (msgType === 'human') {
+          roundsSeen++;
+          if (roundsSeen >= COMPACTION_RECENT_ROUNDS) break;
+        }
+      }
     }
     // Don't split tool-call / tool-result pairs
@@ -836,6 +856,73 @@ describe('Context Compaction — Windowed View (no message deletion)', () => {
     expect(recentWithSummary.length).toBeLessThan(recentWithout.length);
   });
+  it('with summary, limits window to last 2 rounds (not budget-filling)', () => {
+    // 20 messages = 10 rounds. With summary, should only keep last 2 rounds (4 msgs).
+    const messages: BaseMessage[] = [
+      new SystemMessage('System prompt'),
+    ];
+    for (let i = 0; i < 20; i++) {
+      messages.push(
+        i % 2 === 0
+          ? new HumanMessage(`User question ${i / 2}`)
+          : new AIMessage(`AI answer ${(i - 1) / 2}`)
+      );
+    }
+    const indexTokenCountMap: Record<string, number | undefined> = {};
+    for (let i = 0; i < messages.length; i++) {
+      indexTokenCountMap[i] = simpleTokenCounter(messages[i]);
+    }
+    const { view, recentMessages, compactedMessages } = buildWindowedView({
+      messages,
+      indexTokenCountMap,
+      maxTokens: 100_000, // huge budget — would fit everything
+      summary: 'Summary of earlier conversation',
+      tokenCounter: simpleTokenCounter,
+    });
+    // Despite huge budget, only last 2 rounds kept (4 content msgs: H+A+H+A)
+    // Plus possible trailing messages in the last round
+    expect(recentMessages.length).toBeLessThanOrEqual(5); // 2 rounds + maybe 1 trailing
+    expect(recentMessages.length).toBeGreaterThanOrEqual(4); // at least 2 full rounds
+    // Most messages are compacted behind the summary
+    expect(compactedMessages.length).toBeGreaterThan(10);
+    // View = system + summary + recent window
+    expect(view[0].getType()).toBe('system');
+    expect(view[1].content).toContain('[Conversation Summary]');
+  });
+  it('without summary, fills budget (no round limit)', () => {
+    const messages = buildConversation(20, 100); // small messages
+    const indexTokenCountMap: Record<string, number | undefined> = {};
+    for (let i = 0; i < messages.length; i++) {
+      indexTokenCountMap[i] = simpleTokenCounter(messages[i]);
+    }
+    const { recentMessages: withoutSummary } = buildWindowedView({
+      messages,
+      indexTokenCountMap,
+      maxTokens: 100_000, // huge budget
+      tokenCounter: simpleTokenCounter,
+      // no summary → mode A
+    });
+    const { recentMessages: withSummary } = buildWindowedView({
+      messages,
+      indexTokenCountMap,
+      maxTokens: 100_000,
+      summary: 'Summary exists',
+      tokenCounter: simpleTokenCounter,
+    });
+    // Without summary: all messages included (budget-filling mode)
+    expect(withoutSummary.length).toBe(20); // all content messages
+    // With summary: only last 2 rounds
+    expect(withSummary.length).toBeLessThan(withoutSummary.length);
+  });
   it('original messages array is never mutated', () => {
     const messages = buildConversation(15, 400);
     const originalLength = messages.length;