npm - @illuma-ai/agents - Versions diffs - 1.0.94 → 1.0.98 - Mend

@illuma-ai/agents 1.0.94 → 1.0.98

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (67) hide show

package/dist/cjs/common/constants.cjs +25 -0
package/dist/cjs/common/constants.cjs.map +1 -1
package/dist/cjs/events.cjs +0 -4
package/dist/cjs/events.cjs.map +1 -1
package/dist/cjs/graphs/Graph.cjs +38 -148
package/dist/cjs/graphs/Graph.cjs.map +1 -1
package/dist/cjs/main.cjs +8 -0
package/dist/cjs/main.cjs.map +1 -1
package/dist/cjs/tools/CodeExecutor.cjs +5 -0
package/dist/cjs/tools/CodeExecutor.cjs.map +1 -1
package/dist/cjs/tools/ProgrammaticToolCalling.cjs +12 -6
package/dist/cjs/tools/ProgrammaticToolCalling.cjs.map +1 -1
package/dist/cjs/tools/ToolSearch.cjs +14 -10
package/dist/cjs/tools/ToolSearch.cjs.map +1 -1
package/dist/cjs/tools/handlers.cjs +0 -2
package/dist/cjs/tools/handlers.cjs.map +1 -1
package/dist/cjs/tools/search/search.cjs +12 -4
package/dist/cjs/tools/search/search.cjs.map +1 -1
package/dist/cjs/tools/search/tool.cjs +2 -1
package/dist/cjs/tools/search/tool.cjs.map +1 -1
package/dist/cjs/utils/contextPressure.cjs +154 -0
package/dist/cjs/utils/contextPressure.cjs.map +1 -0
package/dist/esm/common/constants.mjs +24 -1
package/dist/esm/common/constants.mjs.map +1 -1
package/dist/esm/events.mjs +0 -4
package/dist/esm/events.mjs.map +1 -1
package/dist/esm/graphs/Graph.mjs +38 -148
package/dist/esm/graphs/Graph.mjs.map +1 -1
package/dist/esm/main.mjs +2 -1
package/dist/esm/main.mjs.map +1 -1
package/dist/esm/tools/CodeExecutor.mjs +5 -0
package/dist/esm/tools/CodeExecutor.mjs.map +1 -1
package/dist/esm/tools/ProgrammaticToolCalling.mjs +12 -6
package/dist/esm/tools/ProgrammaticToolCalling.mjs.map +1 -1
package/dist/esm/tools/ToolSearch.mjs +14 -10
package/dist/esm/tools/ToolSearch.mjs.map +1 -1
package/dist/esm/tools/handlers.mjs +0 -2
package/dist/esm/tools/handlers.mjs.map +1 -1
package/dist/esm/tools/search/search.mjs +12 -4
package/dist/esm/tools/search/search.mjs.map +1 -1
package/dist/esm/tools/search/tool.mjs +2 -1
package/dist/esm/tools/search/tool.mjs.map +1 -1
package/dist/esm/utils/contextPressure.mjs +148 -0
package/dist/esm/utils/contextPressure.mjs.map +1 -0
package/dist/types/common/constants.d.ts +14 -0
package/dist/types/tools/ProgrammaticToolCalling.d.ts +2 -2
package/dist/types/tools/search/types.d.ts +3 -0
package/dist/types/utils/contextPressure.d.ts +72 -0
package/dist/types/utils/index.d.ts +1 -0
package/package.json +1 -1
package/src/common/constants.ts +26 -0
package/src/events.ts +0 -8
package/src/graphs/Graph.ts +53 -177
package/src/graphs/contextManagement.e2e.test.ts +28 -20
package/src/specs/agent-handoffs-bedrock.integration.test.ts +7 -7
package/src/specs/agent-handoffs.test.ts +36 -36
package/src/specs/thinking-handoff.test.ts +10 -10
package/src/tools/CodeExecutor.ts +6 -0
package/src/tools/ProgrammaticToolCalling.ts +23 -6
package/src/tools/ToolSearch.ts +14 -10
package/src/tools/handlers.ts +0 -4
package/src/tools/search/search.ts +15 -3
package/src/tools/search/tool.ts +2 -0
package/src/tools/search/types.ts +3 -0
package/src/utils/contextPressure.test.ts +247 -0
package/src/utils/contextPressure.ts +188 -0
package/src/utils/index.ts +1 -0

package/dist/types/utils/contextPressure.d.ts ADDED Viewed

@@ -0,0 +1,72 @@
+/**
+ * Context Pressure Utilities
+ *
+ * Pure functions for context overflow management. These handle:
+ * 1. Multi-document detection — counting attached documents in messages
+ * 2. Multi-document delegation hint — injected when 3+ documents detected
+ * 3. Post-prune context note — injected after pruning/summarization
+ *
+ * DESIGN PRINCIPLE: The LLM never sees raw token numbers. Context overflow
+ * is handled mechanically by pruning (Graph) + auto-continuation (client.js).
+ * Only task-driven hints (multi-document) are injected — never budget-based.
+ *
+ * @see docs/context-overflow-architecture.md
+ */
+import type { BaseMessage } from '@langchain/core/messages';
+/** Result of scanning messages for attached documents */
+export interface DocumentDetectionResult {
+    /** Total unique documents detected */
+    count: number;
+    /** Names of detected documents */
+    names: string[];
+}
+/**
+ * Scan messages for attached documents using known content patterns.
+ *
+ * Detects documents from:
+ * 1. `# "filename"` headers in "Attached document(s):" blocks (text content)
+ * 2. `**filename1, filename2**` in "The user has attached:" blocks (embedded files)
+ *
+ * @param messages - Conversation messages to scan
+ * @returns Document count and names (deduplicated)
+ */
+export declare function detectDocuments(messages: BaseMessage[]): DocumentDetectionResult;
+/**
+ * Determine whether the multi-document delegation hint should be injected.
+ *
+ * Only fires on the first iteration (before any AI response) when the
+ * document count meets the threshold. This ensures the agent delegates
+ * upfront rather than trying to process all documents itself.
+ *
+ * @param documentCount - Number of detected documents
+ * @param hasAiResponse - Whether the agent has already responded in this chain
+ * @returns Whether to inject the delegation hint
+ */
+export declare function shouldInjectMultiDocHint(documentCount: number, hasAiResponse: boolean): boolean;
+/**
+ * Build the multi-document delegation hint message content.
+ *
+ * @param documentCount - Number of detected documents
+ * @param documentNames - Names of detected documents
+ * @returns Message content string for injection as HumanMessage
+ */
+export declare function buildMultiDocHintContent(documentCount: number, documentNames: string[]): string;
+/**
+ * Build the post-prune context note injected after messages are pruned
+ * and summarized. No token numbers — just a contextual signal that
+ * earlier conversation was compressed.
+ *
+ * @param discardedCount - Number of messages that were pruned
+ * @param hasSummary - Whether a summary was successfully generated
+ * @returns Message content string for injection as SystemMessage, or null if no note needed
+ */
+export declare function buildPostPruneNote(discardedCount: number, hasSummary: boolean): string | null;
+/**
+ * Check whether a tool named "task" exists in the agent's tool set.
+ *
+ * @param tools - Array of tool objects or structured tools
+ * @returns Whether the task tool is available
+ */
+export declare function hasTaskTool(tools: Array<{
+    name?: string;
+} | unknown> | undefined): boolean;

package/dist/types/utils/index.d.ts CHANGED Viewed

@@ -8,3 +8,4 @@ export * from './toonFormat';
 export * from './contextAnalytics';
 export * from './schema';
 export * from './toolCallContinuation';
+export * from './contextPressure';

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@illuma-ai/agents",
-  "version": "1.0.94",
+  "version": "1.0.98",
   "main": "./dist/cjs/main.cjs",
   "module": "./dist/esm/main.mjs",
   "types": "./dist/types/index.d.ts",

package/src/common/constants.ts CHANGED Viewed

@@ -19,3 +19,29 @@ export const MIN_THINKING_BUDGET = 1024;
  * compounding across multi-tool conversations (e.g., 10 tool calls).
  */
 export const TOOL_TURN_THINKING_BUDGET = 1024;
+// ============================================================================
+// CONTEXT OVERFLOW MANAGEMENT
+//
+// Context overflow is handled mechanically — no token budget numbers are
+// exposed to the LLM. The system uses: pruning (Graph), summarization
+// (summarizeCallback), and auto-continuation (client.js max_tokens detection).
+//
+// See: docs/context-overflow-architecture.md
+// ============================================================================
+/**
+ * Minimum number of attached documents before the multi-document delegation
+ * hint is injected. Below this threshold, the agent processes documents
+ * directly within its own context.
+ */
+export const MULTI_DOCUMENT_THRESHOLD = 3;
+/**
+ * Context utilization safety buffer multiplier (0-1).
+ * Applied as: effectiveMax = (maxContextTokens - maxOutputTokens) * CONTEXT_SAFETY_BUFFER
+ *
+ * Reserves headroom so the LLM doesn't hit hard token limits mid-generation.
+ * 0.9 = 10% reserved for safety.
+ */
+export const CONTEXT_SAFETY_BUFFER = 0.9;

package/src/events.ts CHANGED Viewed

@@ -51,14 +51,6 @@ export class ModelEndHandler implements t.EventHandler {
       return handleToolCalls(data?.output?.tool_calls, metadata, graph);
     }
-    console.log(`====== ${event.toUpperCase()} ======`);
-    console.dir(
-      {
-        usage,
-      },
-      { depth: null }
-    );
     const agentContext = graph.getAgentContext(metadata);
     if (

package/src/graphs/Graph.ts CHANGED Viewed

@@ -69,6 +69,13 @@ import { getChatModelClass, manualToolStreamProviders } from '@/llm/providers';
 import { ToolNode as CustomToolNode, toolsCondition } from '@/tools/ToolNode';
 import { ChatOpenAI, AzureChatOpenAI } from '@/llm/openai';
 import { safeDispatchCustomEvent } from '@/utils/events';
+import {
+  detectDocuments,
+  shouldInjectMultiDocHint,
+  buildMultiDocHintContent,
+  buildPostPruneNote,
+  hasTaskTool,
+} from '@/utils/contextPressure';
 import { createSchemaOnlyTools } from '@/tools/schema';
 import { prepareSchemaForProvider } from '@/schemas/validate';
 import { AgentContext } from '@/agents/AgentContext';
@@ -1367,45 +1374,12 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
       // ====================================================================
       // PRE-PRUNING DELEGATION CHECK
-      // Before pruning strips messages (losing context), check if we should
-      // delegate instead. If context would be pruned AND the agent has the
-      // task tool, inject a delegation hint and SKIP pruning — preserving
-      // the content for the LLM to understand what to delegate.
       // ====================================================================
-      let delegationInjectedPrePrune = false;
-      const hasTaskToolPrePrune = agentContext.tools?.some((tool) => {
-        const toolName =
-          typeof tool === 'object' && 'name' in tool
-            ? (tool as { name: string }).name
-            : '';
-        return toolName === 'task';
-      });
-      if (
-        hasTaskToolPrePrune === true &&
-        agentContext.tokenCounter &&
-        agentContext.maxContextTokens != null
-      ) {
-        // Estimate total tokens in messages BEFORE pruning
-        let prePruneTokens = 0;
-        for (const msg of messages) {
-          prePruneTokens += agentContext.tokenCounter(msg);
-        }
-        // Add instruction tokens (system prompt)
-        prePruneTokens += agentContext.instructionTokens;
-        const prePruneUtilization =
-          (prePruneTokens / agentContext.maxContextTokens) * 100;
-        if (prePruneUtilization > 70) {
-          console.warn(
-            `[Graph] PRE-PRUNE delegation check: ${prePruneUtilization.toFixed(1)}% utilization ` +
-              `(${prePruneTokens}/${agentContext.maxContextTokens} tokens). ` +
-              'Injecting delegation hint INSTEAD of pruning.'
-          );
-          delegationInjectedPrePrune = true;
-        }
-      }
+      // Context management is now fully mechanical:
+      // - Pruning always runs when needed (no delegation-based skip)
+      // - Auto-continuation in client.js handles max_tokens finish reason
+      // - LLM never sees raw token numbers (prevents voluntary bail-out)
+      // ====================================================================
       if (
         !agentContext.pruneMessages &&
@@ -1436,8 +1410,8 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
         });
       }
-      if (agentContext.pruneMessages && !delegationInjectedPrePrune) {
-        console.info(
+      if (agentContext.pruneMessages) {
+        console.debug(
           `[Graph:ContextMgmt] Pruning messages | inputCount=${messages.length} | maxTokens=${agentContext.maxContextTokens}`
         );
         const { context, indexTokenCountMap, messagesToRefine } =
@@ -1448,22 +1422,24 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
           });
         agentContext.indexTokenCountMap = indexTokenCountMap;
         messagesToUse = context;
-        console.info(
+        console.debug(
           `[Graph:ContextMgmt] Pruned | kept=${context.length} | discarded=${messagesToRefine.length} | originalCount=${messages.length}`
         );
         // Summarize discarded messages if callback provided
+        let hasSummary = false;
         if (messagesToRefine.length > 0 && agentContext.summarizeCallback) {
-          console.info(
+          console.debug(
             `[Graph:ContextMgmt] Summarizing ${messagesToRefine.length} discarded messages`
           );
           try {
             const summary =
               await agentContext.summarizeCallback(messagesToRefine);
-            console.info(
+            console.debug(
               `[Graph:ContextMgmt] Summary received | len=${summary?.length ?? 0} | hasContent=${summary != null && summary !== ''}`
             );
             if (summary != null && summary !== '') {
+              hasSummary = true;
               const summaryMsg = new SystemMessage(
                 `[Conversation Summary]\n${summary}`
               );
@@ -1475,7 +1451,7 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
                 summaryMsg,
                 ...messagesToUse.slice(systemIdx),
               ];
-              console.info(
+              console.debug(
                 `[Graph:ContextMgmt] Summary injected at index ${systemIdx} | finalMsgCount=${messagesToUse.length}`
               );
             }
@@ -1483,10 +1459,21 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
             console.error('[Graph] Summarization callback failed:', err);
           }
         }
-      } else if (delegationInjectedPrePrune) {
-        console.info(
-          '[Graph] Skipping pruning — delegation will handle context pressure'
-        );
+        // Post-prune context note: inform the LLM that context was compressed
+        // without exposing token numbers (prevents voluntary bail-out)
+        if (messagesToRefine.length > 0 && hasTaskTool(agentContext.tools)) {
+          const postPruneNote = buildPostPruneNote(
+            messagesToRefine.length,
+            hasSummary
+          );
+          if (postPruneNote) {
+            messagesToUse = [...messagesToUse, new SystemMessage(postPruneNote)];
+            console.debug(
+              `[Graph:ContextMgmt] Post-prune note injected | hasSummary=${hasSummary} | discarded=${messagesToRefine.length}`
+            );
+          }
+        }
       }
       let finalMessages = messagesToUse;
@@ -1645,106 +1632,32 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
       );
       // ====================================================================
-      // CONTEXT PRESSURE AWARENESS — Intelligent Sub-Agent Delegation
-      //
-      // Two triggers for delegation hints:
-      // 1. DOCUMENT COUNT: When 3+ documents are detected in the conversation,
-      //    inject a delegation hint on the FIRST iteration (before the LLM
-      //    has called any tools). This ensures the agent delegates upfront
-      //    rather than trying to process all documents itself.
-      // 2. TOKEN UTILIZATION: At EVERY iteration, if context is filling up
-      //    (70%/85%), inject escalating hints to delegate remaining work.
+      // MULTI-DOCUMENT DELEGATION (task-driven, not budget-driven)
       //
-      // This runs mid-chain — so even if tool responses push context up
-      // after the first LLM call, subsequent iterations get the hint.
+      // Token-based pressure hints have been removed — the LLM never sees
+      // raw token numbers. Context overflow is handled mechanically by
+      // pruning (Graph) + auto-continuation (client.js max_tokens detection).
+      // See: docs/context-overflow-architecture.md
       // ====================================================================
-      const hasTaskToolInContext = agentContext.tools?.some((tool) => {
-        const toolName =
-          typeof tool === 'object' && 'name' in tool
-            ? (tool as { name: string }).name
-            : '';
-        return toolName === 'task';
-      });
-      if (
-        hasTaskToolInContext === true &&
-        contextAnalytics.utilizationPercent != null &&
-        contextAnalytics.maxContextTokens != null
-      ) {
-        const utilization = contextAnalytics.utilizationPercent;
-        const totalTokens = contextAnalytics.totalTokens;
-        const maxTokens = contextAnalytics.maxContextTokens;
-        const remainingTokens = maxTokens - totalTokens;
-        // Count attached documents by scanning for document patterns in HumanMessages:
-        // 1. # "filename" headers in "Attached document(s):" blocks (text content)
-        // 2. **filename1, filename2** in "The user has attached:" blocks (embedded files)
-        // 3. Filenames in file_search tool results
-        let documentCount = 0;
-        const documentNames: string[] = [];
-        for (const msg of finalMessages) {
-          const content =
-            typeof msg.content === 'string'
-              ? msg.content
-              : Array.isArray(msg.content)
-                ? msg.content
-                    .map((p: unknown) => {
-                      const part = p as Record<string, unknown>;
-                      return String(part.text ?? part.content ?? '');
-                    })
-                    .join(' ')
-                : '';
-          // Pattern 1: # "filename" headers in attached document blocks
-          const docMatches = content.match(/# "([^"]+)"/g);
-          if (docMatches) {
-            for (const match of docMatches) {
-              const name = match.replace(/# "/, '').replace(/"$/, '');
-              if (!documentNames.includes(name)) {
-                documentNames.push(name);
-                documentCount++;
-              }
-            }
-          }
-          // Pattern 2: "The user has attached: **file1, file2**" (embedded files)
-          const attachedMatch = content.match(
-            /user has attached:\s*\*\*([^*]+)\*\*/i
+      if (hasTaskTool(agentContext.tools)) {
+        const { count: documentCount, names: documentNames } =
+          detectDocuments(finalMessages);
+        // Observability log (no token numbers exposed to LLM)
+        if (contextAnalytics.utilizationPercent != null) {
+          console.debug(
+            `[Graph] Context utilization: ${contextAnalytics.utilizationPercent.toFixed(1)}% | ` +
+              `messages: ${finalMessages.length} | docs: ${documentCount}`
           );
-          if (attachedMatch) {
-            const names = attachedMatch[1]
-              .split(',')
-              .map((n: string) => n.trim())
-              .filter(Boolean);
-            for (const name of names) {
-              if (!documentNames.includes(name)) {
-                documentNames.push(name);
-                documentCount++;
-              }
-            }
-          }
         }
-        // BASELINE LOG: Always fires so we can verify this code path runs
-        console.info(
-          `[Graph] Context utilization: ${utilization.toFixed(1)}% ` +
-            `(${totalTokens}/${maxTokens} tokens, ${remainingTokens} remaining) | ` +
-            `hasTaskTool: true | messages: ${finalMessages.length} | docs: ${documentCount}`
-        );
-        // TRIGGER 1: Multi-document delegation (3+ documents detected)
-        // Only inject on first iteration (no AI messages yet = agent hasn't responded)
+        // Multi-document delegation: first iteration only (before AI has responded)
         const hasAiResponse = finalMessages.some(
           (m) => m._getType() === 'ai' || m._getType() === 'tool'
         );
-        if (documentCount >= 3 && !hasAiResponse) {
+        if (shouldInjectMultiDocHint(documentCount, hasAiResponse)) {
           const pressureMsg = new HumanMessage({
-            content:
-              `[MULTI-DOCUMENT PROCESSING — ${documentCount} documents detected]\n` +
-              `Documents: ${documentNames.join(', ')}\n\n` +
-              `You have ${documentCount} documents attached. For thorough analysis, use the "task" tool ` +
-              'to delegate each document (or group of related documents) to a sub-agent.\n' +
-              'Each sub-agent has its own fresh context window and can use file_search to retrieve the full document content.\n' +
-              'After all sub-agents complete, synthesize their results into a comprehensive response.\n\n' +
-              'This approach ensures each document gets full attention without context limitations.',
+            content: buildMultiDocHintContent(documentCount, documentNames),
           });
           finalMessages = [...finalMessages, pressureMsg];
           console.info(
@@ -1752,43 +1665,6 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
               `${documentNames.join(', ')}`
           );
         }
-        // TRIGGER 2: Token utilization thresholds (mid-chain safety net)
-        // Also fires when we skipped pruning due to delegationInjectedPrePrune
-        if (
-          utilization > 85 ||
-          (delegationInjectedPrePrune && utilization > 50)
-        ) {
-          // CRITICAL: Context is high — MANDATE delegation
-          const pressureMsg = new HumanMessage({
-            content:
-              `[CONTEXT BUDGET CRITICAL — ${utilization.toFixed(0)}% used]\n` +
-              `You have used ${totalTokens} of ${maxTokens} tokens (${remainingTokens} remaining).\n` +
-              'Your context is very large. You MUST use the "task" tool to delegate work to sub-agents.\n' +
-              'Each sub-agent runs in its own fresh context window and can use file_search to access documents.\n' +
-              'Do NOT attempt to process documents directly — delegate each document to a sub-agent, then synthesize results.',
-          });
-          finalMessages = [...finalMessages, pressureMsg];
-          console.warn(
-            `[Graph] Context pressure CRITICAL (${utilization.toFixed(0)}%): ` +
-              `Injected mandatory delegation hint. ${remainingTokens} tokens remaining. ` +
-              `prePruneSkipped: ${delegationInjectedPrePrune}`
-          );
-        } else if (utilization > 70) {
-          // WARNING: Context filling up — suggest delegation
-          const pressureMsg = new HumanMessage({
-            content:
-              `[CONTEXT BUDGET WARNING — ${utilization.toFixed(0)}% used]\n` +
-              `You have used ${totalTokens} of ${maxTokens} tokens (${remainingTokens} remaining).\n` +
-              'Your context is filling up. Consider using the "task" tool to delegate complex operations to sub-agents.\n' +
-              "Sub-agents run in fresh context windows and won't consume your remaining budget.",
-          });
-          finalMessages = [...finalMessages, pressureMsg];
-          console.info(
-            `[Graph] Context pressure WARNING (${utilization.toFixed(0)}%): ` +
-              `Injected delegation suggestion. ${remainingTokens} tokens remaining.`
-          );
-        }
       }
       // Structured output mode: when the agent has NO tools, produce structured JSON immediately.
@@ -2302,7 +2178,7 @@ If I seem to be missing something we discussed earlier, just give me a quick rem
         reducer: (a, b) => {
           if (!a.length) {
             this.startIndex = a.length + b.length;
-            console.info(
+            console.debug(
               `[Graph:Reducer] Initial messages | startIndex=${this.startIndex} | inputMsgCount=${b.length}`
             );
           } else {

package/src/graphs/contextManagement.e2e.test.ts CHANGED Viewed

@@ -596,30 +596,38 @@ describe('Pre-invocation utilization gate', () => {
     expect(emergency.length).toBeLessThan(2000); // Emergency summaries are compact
   });
-  it('injects delegation hint at >70% utilization for agents with task tool', () => {
-    const utilization = 75;
-    const hasTaskTool = true;
-    if (utilization > 70 && hasTaskTool) {
-      const delegationHint = new HumanMessage({
-        content:
-          '[System] Context window is at 75% capacity. Consider delegating complex sub-tasks ' +
-          'to the task tool to maintain context availability.',
-      });
-      expect(delegationHint.content).toContain('75%');
-      expect(delegationHint.content).toContain('task tool');
+  it('does NOT inject token budget hints at any utilization level', () => {
+    // Token budget hints were removed to prevent LLM voluntary bail-out.
+    // Context overflow is handled mechanically by pruning + auto-continuation.
+    // See: docs/context-overflow-architecture.md
+    const utilizationLevels = [50, 70, 85, 95, 101];
+    for (const utilization of utilizationLevels) {
+      const messages = buildConversation(10, 200);
+      // No message should contain raw token numbers or budget percentages
+      for (const msg of messages) {
+        const content =
+          typeof msg.content === 'string'
+            ? msg.content
+            : JSON.stringify(msg.content);
+        expect(content).not.toMatch(/CONTEXT BUDGET/);
+        expect(content).not.toMatch(/\d+ of \d+ tokens/);
+      }
     }
   });
-  it('does not inject delegation hint below 70%', () => {
-    const utilization = 65;
-    let delegationInjected = false;
-    if (utilization > 70) {
-      delegationInjected = true;
+  it('post-prune note does not contain token numbers', () => {
+    // After pruning, a context note is injected but it must not
+    // expose any token counts or budget percentages to the LLM
+    const { buildPostPruneNote } = require('@/utils/contextPressure');
+    const noteWithSummary = buildPostPruneNote(10, true);
+    const noteWithout = buildPostPruneNote(10, false);
+    for (const note of [noteWithSummary, noteWithout]) {
+      expect(note).not.toBeNull();
+      expect(note).not.toMatch(/\d+%/);
+      expect(note).not.toMatch(/\d+ of \d+ tokens/);
+      expect(note).not.toMatch(/BUDGET/i);
+      expect(note).toContain('task');
     }
-    expect(delegationInjected).toBe(false);
   });
 });

package/src/specs/agent-handoffs-bedrock.integration.test.ts CHANGED Viewed

@@ -20,7 +20,7 @@ config({
 import { HumanMessage, ToolMessage } from '@langchain/core/messages';
 import type { RunnableConfig } from '@langchain/core/runnables';
 import type * as t from '@/types';
-import { Providers, Constants, GraphEvents } from '@/common';
+import { Providers, Constants, GraphEvents, EdgeType } from '@/common';
 import { StandardGraph } from '@/graphs/Graph';
 import { ChatModelStreamHandler, createContentAggregator } from '@/stream';
 import { ToolEndHandler, ModelEndHandler } from '@/events';
@@ -118,13 +118,13 @@ describeIf('Agent Handoff E2E with Bedrock', () => {
         {
           from: 'supervisor_abc123',
           to: 'agent_W47hBnn2RoVZEOy5595GC',
-          edgeType: 'handoff',
+          edgeType: EdgeType.HANDOFF,
           // No description - should auto-generate from agent name + description
         },
         {
           from: 'supervisor_abc123',
           to: 'agent_X92kLmn4TpQR8vw3221HD',
-          edgeType: 'handoff',
+          edgeType: EdgeType.HANDOFF,
           // No description
         },
       ];
@@ -203,8 +203,8 @@ describeIf('Agent Handoff E2E with Bedrock', () => {
       ];
       const edges: t.GraphEdge[] = [
-        { from: 'router', to: 'sales_agent', edgeType: 'handoff' },
-        { from: 'router', to: 'support_agent', edgeType: 'handoff' },
+        { from: 'router', to: 'sales_agent', edgeType: EdgeType.HANDOFF },
+        { from: 'router', to: 'support_agent', edgeType: EdgeType.HANDOFF },
       ];
       const { contentParts: _contentParts, aggregateContent } =
@@ -321,8 +321,8 @@ describeIf('Agent Handoff E2E with Bedrock', () => {
       ];
       const edges: t.GraphEdge[] = [
-        { from: 'router', to: 'sales_agent', edgeType: 'handoff' },
-        { from: 'router', to: 'support_agent', edgeType: 'handoff' },
+        { from: 'router', to: 'sales_agent', edgeType: EdgeType.HANDOFF },
+        { from: 'router', to: 'support_agent', edgeType: EdgeType.HANDOFF },
       ];
       const { contentParts: _contentParts, aggregateContent } =