npm - @illuma-ai/agents - Versions diffs - 1.0.96 → 1.1.0 - Mend

@illuma-ai/agents 1.0.96 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (76) hide show

package/dist/cjs/agents/AgentContext.cjs +6 -2
package/dist/cjs/agents/AgentContext.cjs.map +1 -1
package/dist/cjs/common/constants.cjs +78 -0
package/dist/cjs/common/constants.cjs.map +1 -1
package/dist/cjs/graphs/Graph.cjs +191 -165
package/dist/cjs/graphs/Graph.cjs.map +1 -1
package/dist/cjs/main.cjs +22 -0
package/dist/cjs/main.cjs.map +1 -1
package/dist/cjs/messages/dedup.cjs +95 -0
package/dist/cjs/messages/dedup.cjs.map +1 -0
package/dist/cjs/tools/CodeExecutor.cjs +22 -3
package/dist/cjs/tools/CodeExecutor.cjs.map +1 -1
package/dist/cjs/types/graph.cjs.map +1 -1
package/dist/cjs/utils/contextPressure.cjs +154 -0
package/dist/cjs/utils/contextPressure.cjs.map +1 -0
package/dist/cjs/utils/pruneCalibration.cjs +78 -0
package/dist/cjs/utils/pruneCalibration.cjs.map +1 -0
package/dist/cjs/utils/run.cjs.map +1 -1
package/dist/cjs/utils/tokens.cjs.map +1 -1
package/dist/cjs/utils/toolDiscoveryCache.cjs +127 -0
package/dist/cjs/utils/toolDiscoveryCache.cjs.map +1 -0
package/dist/esm/agents/AgentContext.mjs +6 -2
package/dist/esm/agents/AgentContext.mjs.map +1 -1
package/dist/esm/common/constants.mjs +71 -1
package/dist/esm/common/constants.mjs.map +1 -1
package/dist/esm/graphs/Graph.mjs +192 -166
package/dist/esm/graphs/Graph.mjs.map +1 -1
package/dist/esm/main.mjs +5 -1
package/dist/esm/main.mjs.map +1 -1
package/dist/esm/messages/dedup.mjs +93 -0
package/dist/esm/messages/dedup.mjs.map +1 -0
package/dist/esm/tools/CodeExecutor.mjs +22 -3
package/dist/esm/tools/CodeExecutor.mjs.map +1 -1
package/dist/esm/types/graph.mjs.map +1 -1
package/dist/esm/utils/contextPressure.mjs +148 -0
package/dist/esm/utils/contextPressure.mjs.map +1 -0
package/dist/esm/utils/pruneCalibration.mjs +74 -0
package/dist/esm/utils/pruneCalibration.mjs.map +1 -0
package/dist/esm/utils/run.mjs.map +1 -1
package/dist/esm/utils/tokens.mjs.map +1 -1
package/dist/esm/utils/toolDiscoveryCache.mjs +125 -0
package/dist/esm/utils/toolDiscoveryCache.mjs.map +1 -0
package/dist/types/agents/AgentContext.d.ts +4 -1
package/dist/types/common/constants.d.ts +49 -0
package/dist/types/graphs/Graph.d.ts +25 -0
package/dist/types/messages/dedup.d.ts +25 -0
package/dist/types/messages/index.d.ts +1 -0
package/dist/types/types/graph.d.ts +63 -0
package/dist/types/utils/contextPressure.d.ts +72 -0
package/dist/types/utils/index.d.ts +3 -0
package/dist/types/utils/pruneCalibration.d.ts +43 -0
package/dist/types/utils/toolDiscoveryCache.d.ts +77 -0
package/package.json +1 -1
package/src/agents/AgentContext.ts +7 -0
package/src/common/constants.ts +82 -0
package/src/graphs/Graph.ts +254 -208
package/src/graphs/contextManagement.e2e.test.ts +28 -20
package/src/graphs/gapFeatures.test.ts +520 -0
package/src/graphs/nonBlockingSummarization.test.ts +307 -0
package/src/messages/__tests__/dedup.test.ts +166 -0
package/src/messages/dedup.ts +104 -0
package/src/messages/index.ts +1 -0
package/src/specs/agent-handoffs-bedrock.integration.test.ts +7 -7
package/src/specs/agent-handoffs.test.ts +36 -36
package/src/specs/thinking-handoff.test.ts +10 -10
package/src/tools/CodeExecutor.ts +22 -3
package/src/types/graph.ts +73 -0
package/src/utils/__tests__/pruneCalibration.test.ts +148 -0
package/src/utils/__tests__/toolDiscoveryCache.test.ts +214 -0
package/src/utils/contextPressure.test.ts +262 -0
package/src/utils/contextPressure.ts +188 -0
package/src/utils/index.ts +3 -0
package/src/utils/pruneCalibration.ts +92 -0
package/src/utils/run.ts +108 -108
package/src/utils/tokens.ts +118 -118
package/src/utils/toolDiscoveryCache.ts +150 -0

package/dist/cjs/graphs/Graph.cjs CHANGED Viewed

@@ -12,9 +12,9 @@ var prune = require('../messages/prune.cjs');
 var format = require('../messages/format.cjs');
 var cache = require('../messages/cache.cjs');
 var content = require('../messages/content.cjs');
-var tools = require('../messages/tools.cjs');
 var _enum = require('../common/enum.cjs');
 var constants = require('../common/constants.cjs');
+var dedup = require('../messages/dedup.cjs');
 var graph = require('../utils/graph.cjs');
 var llm = require('../utils/llm.cjs');
 var stream = require('../stream.cjs');
@@ -24,6 +24,9 @@ require('ai-tokenizer');
 require('../utils/toonFormat.cjs');
 var contextAnalytics = require('../utils/contextAnalytics.cjs');
 require('zod-to-json-schema');
+var contextPressure = require('../utils/contextPressure.cjs');
+var toolDiscoveryCache = require('../utils/toolDiscoveryCache.cjs');
+var pruneCalibration = require('../utils/pruneCalibration.cjs');
 var providers = require('../llm/providers.cjs');
 var ToolNode = require('../tools/ToolNode.cjs');
 var index = require('../llm/openai/index.cjs');
@@ -92,6 +95,13 @@ class StandardGraph extends Graph {
     runId;
     startIndex = 0;
     signal;
+    /** Cached summary from the first prune in this run.
+     * Reused for subsequent prunes to avoid blocking LLM calls on every tool iteration. */
+    _cachedRunSummary;
+    /** EMA-based pruning calibration state — smooths token budget adjustments across iterations */
+    _pruneCalibration;
+    /** Run-scoped tool discovery cache — avoids re-parsing conversation history on every iteration */
+    _toolDiscoveryCache;
     /** Map of agent contexts by agent ID */
     agentContexts = new Map();
     /** Default agent ID to use */
@@ -112,6 +122,19 @@ class StandardGraph extends Graph {
             this.agentContexts.set(agentConfig.agentId, agentContext);
         }
         this.defaultAgentId = agents[0].agentId;
+        // Seed cached summary from persisted storage so the first prune in a
+        // resumed conversation can also skip the synchronous LLM summarization call
+        const primaryContext = this.agentContexts.get(this.defaultAgentId);
+        if (primaryContext?.persistedSummary) {
+            this._cachedRunSummary = primaryContext.persistedSummary;
+        }
+        // Initialize EMA pruning calibration
+        this._pruneCalibration = pruneCalibration.createPruneCalibration();
+        // Initialize tool discovery cache, seeded with any pre-existing discoveries
+        this._toolDiscoveryCache = new toolDiscoveryCache.ToolDiscoveryCache();
+        if (primaryContext?.discoveredToolNames.size) {
+            this._toolDiscoveryCache.seed([...primaryContext.discoveredToolNames]);
+        }
     }
     /* Init */
     resetValues(keepContent) {
@@ -134,6 +157,9 @@ class StandardGraph extends Graph {
         this.messageStepHasToolCalls = graph.resetIfNotEmpty(this.messageStepHasToolCalls, new Map());
         this.prelimMessageIdsByStepKey = graph.resetIfNotEmpty(this.prelimMessageIdsByStepKey, new Map());
         this.invokedToolIds = graph.resetIfNotEmpty(this.invokedToolIds, undefined);
+        // Reset EMA calibration and tool discovery cache for fresh run
+        this._pruneCalibration = pruneCalibration.createPruneCalibration();
+        this._toolDiscoveryCache.reset();
         for (const context of this.agentContexts.values()) {
             context.reset();
         }
@@ -222,6 +248,62 @@ class StandardGraph extends Graph {
         }
         return clientOptions;
     }
+    /**
+     * Determines whether summarization should trigger based on SummarizationConfig.
+     *
+     * Supports three trigger strategies:
+     * - contextPercentage (default): Trigger when context utilization >= threshold%
+     * - messageCount: Trigger when pruned message count >= threshold
+     * - tokenThreshold: Trigger when total estimated tokens >= threshold
+     *
+     * When no config is provided, always triggers (preserves backward compatibility).
+     *
+     * @param prunedMessageCount - Number of messages that were pruned
+     * @param maxContextTokens - Maximum context token budget
+     * @param indexTokenCountMap - Token count map by message index
+     * @param instructionTokens - Token count for instructions/system message
+     * @param config - Optional SummarizationConfig
+     * @returns Whether summarization should be triggered
+     */
+    shouldTriggerSummarization(prunedMessageCount, maxContextTokens, indexTokenCountMap, instructionTokens, config) {
+        // No pruned messages means nothing to summarize
+        if (prunedMessageCount === 0) {
+            return false;
+        }
+        // No config = backward compatible (always summarize when messages are pruned)
+        if (!config || !config.triggerType) {
+            return true;
+        }
+        const threshold = config.triggerThreshold;
+        switch (config.triggerType) {
+            case 'contextPercentage': {
+                if (maxContextTokens <= 0)
+                    return true;
+                const effectiveThreshold = threshold ?? constants.SUMMARIZATION_CONTEXT_THRESHOLD;
+                let totalTokens = instructionTokens;
+                for (const key in indexTokenCountMap) {
+                    totalTokens += indexTokenCountMap[key] ?? 0;
+                }
+                const utilization = (totalTokens / maxContextTokens) * 100;
+                return utilization >= effectiveThreshold;
+            }
+            case 'messageCount': {
+                const effectiveThreshold = threshold ?? 5;
+                return prunedMessageCount >= effectiveThreshold;
+            }
+            case 'tokenThreshold': {
+                if (threshold == null)
+                    return true;
+                let totalTokens = instructionTokens;
+                for (const key in indexTokenCountMap) {
+                    totalTokens += indexTokenCountMap[key] ?? 0;
+                }
+                return totalTokens >= threshold;
+            }
+            default:
+                return true;
+        }
+    }
     /**
      * Returns the normalized finish/stop reason from the last LLM invocation.
      * Used by callers to detect when the response was truncated due to max_tokens.
@@ -360,7 +442,6 @@ class StandardGraph extends Graph {
     /* Misc.*/
     getRunMessages() {
         const result = this.messages.slice(this.startIndex);
-        console.debug(`[Graph] getRunMessages() | totalMessages=${this.messages.length} | startIndex=${this.startIndex} | runMessages=${result.length}`);
         return result;
     }
     getContentParts() {
@@ -916,10 +997,12 @@ class StandardGraph extends Graph {
                 });
                 messages$1 = [dynamicContextMessage, ackMessage, ...messages$1];
             }
-            // Extract tool discoveries from current turn only (similar to formatArtifactPayload pattern)
-            const discoveredNames = tools.extractToolDiscoveries(messages$1);
-            if (discoveredNames.length > 0) {
-                agentContext.markToolsAsDiscovered(discoveredNames);
+            // Tool discovery caching: only scan new messages since last iteration
+            // instead of re-parsing the full history via extractToolDiscoveries()
+            const cachedDiscoveries = this._toolDiscoveryCache.getNewDiscoveries(messages$1);
+            if (cachedDiscoveries.length > 0) {
+                agentContext.markToolsAsDiscovered(cachedDiscoveries);
+                console.debug(`[Graph:ToolDiscovery] Cached ${cachedDiscoveries.length} new tools (total: ${this._toolDiscoveryCache.size})`);
             }
             const toolsForBinding = agentContext.getToolsForBinding();
             // PERF: Detect subsequent ReAct iterations (tool results present in messages)
@@ -950,36 +1033,12 @@ class StandardGraph extends Graph {
             let messagesToUse = messages$1;
             // ====================================================================
             // PRE-PRUNING DELEGATION CHECK
-            // Before pruning strips messages (losing context), check if we should
-            // delegate instead. If context would be pruned AND the agent has the
-            // task tool, inject a delegation hint and SKIP pruning — preserving
-            // the content for the LLM to understand what to delegate.
             // ====================================================================
-            let delegationInjectedPrePrune = false;
-            const hasTaskToolPrePrune = agentContext.tools?.some((tool) => {
-                const toolName = typeof tool === 'object' && 'name' in tool
-                    ? tool.name
-                    : '';
-                return toolName === 'task';
-            });
-            if (hasTaskToolPrePrune === true &&
-                agentContext.tokenCounter &&
-                agentContext.maxContextTokens != null) {
-                // Estimate total tokens in messages BEFORE pruning
-                let prePruneTokens = 0;
-                for (const msg of messages$1) {
-                    prePruneTokens += agentContext.tokenCounter(msg);
-                }
-                // Add instruction tokens (system prompt)
-                prePruneTokens += agentContext.instructionTokens;
-                const prePruneUtilization = (prePruneTokens / agentContext.maxContextTokens) * 100;
-                if (prePruneUtilization > 70) {
-                    console.warn(`[Graph] PRE-PRUNE delegation check: ${prePruneUtilization.toFixed(1)}% utilization ` +
-                        `(${prePruneTokens}/${agentContext.maxContextTokens} tokens). ` +
-                        'Injecting delegation hint INSTEAD of pruning.');
-                    delegationInjectedPrePrune = true;
-                }
-            }
+            // Context management is now fully mechanical:
+            // - Pruning always runs when needed (no delegation-based skip)
+            // - Auto-continuation in client.js handles max_tokens finish reason
+            // - LLM never sees raw token numbers (prevents voluntary bail-out)
+            // ====================================================================
             if (!agentContext.pruneMessages &&
                 agentContext.tokenCounter &&
                 agentContext.maxContextTokens != null &&
@@ -993,50 +1052,121 @@ class StandardGraph extends Graph {
                     (agentContext.provider === _enum.Providers.OPENAI &&
                         agentContext.clientOptions.modelKwargs
                             ?.thinking?.type === 'enabled');
+                // Apply EMA calibration to max token budget — smooths pruning across iterations
+                const calibratedMaxTokens = pruneCalibration.applyCalibration(agentContext.maxContextTokens, this._pruneCalibration);
                 agentContext.pruneMessages = prune.createPruneMessages({
                     startIndex: this.startIndex,
                     provider: agentContext.provider,
                     tokenCounter: agentContext.tokenCounter,
-                    maxTokens: agentContext.maxContextTokens,
+                    maxTokens: calibratedMaxTokens,
                     thinkingEnabled: isAnthropicWithThinking,
                     indexTokenCountMap: agentContext.indexTokenCountMap,
                 });
             }
-            if (agentContext.pruneMessages && !delegationInjectedPrePrune) {
-                console.debug(`[Graph:ContextMgmt] Pruning messages | inputCount=${messages$1.length} | maxTokens=${agentContext.maxContextTokens}`);
+            // Update EMA calibration with actual token usage from API response
+            if (agentContext.currentUsage?.input_tokens &&
+                agentContext.maxContextTokens) {
+                const estimatedTokens = Object.values(agentContext.indexTokenCountMap).reduce((sum, v) => (sum ?? 0) + (v ?? 0), 0);
+                if (estimatedTokens > 0) {
+                    this._pruneCalibration = pruneCalibration.updatePruneCalibration(this._pruneCalibration, agentContext.currentUsage.input_tokens, estimatedTokens);
+                }
+            }
+            if (agentContext.pruneMessages) {
                 const { context, indexTokenCountMap, messagesToRefine } = agentContext.pruneMessages({
                     messages: messages$1,
                     usageMetadata: agentContext.currentUsage,
-                    // startOnMessageType: 'human',
                 });
                 agentContext.indexTokenCountMap = indexTokenCountMap;
                 messagesToUse = context;
-                console.debug(`[Graph:ContextMgmt] Pruned | kept=${context.length} | discarded=${messagesToRefine.length} | originalCount=${messages$1.length}`);
-                // Summarize discarded messages if callback provided
-                if (messagesToRefine.length > 0 && agentContext.summarizeCallback) {
-                    console.debug(`[Graph:ContextMgmt] Summarizing ${messagesToRefine.length} discarded messages`);
+                // ── Non-blocking summarization ──────────────────────────────────
+                // NEVER block the LLM call waiting for summarization. Instead:
+                //   1. If _cachedRunSummary exists → use it, fire async update
+                //   2. If persistedSummary exists → use it as fallback, fire async update
+                //   3. If NOTHING exists (first-ever prune) → skip summary, fire async generation
+                // The summary catches up asynchronously and is available for subsequent
+                // iterations (tool calls) and the next conversation turn.
+                //
+                // SummarizationConfig integration:
+                //   - triggerType/triggerThreshold control WHEN summarization fires
+                //   - reserveRatio is enforced via calibrated maxTokens (above)
+                //   - initialSummary provides cross-run seeding as fallback before persistedSummary
+                let hasSummary = false;
+                const sumConfig = agentContext.summarizationConfig;
+                const shouldSummarize = this.shouldTriggerSummarization(messagesToRefine.length, agentContext.maxContextTokens ?? 0, agentContext.indexTokenCountMap, agentContext.instructionTokens, sumConfig);
+                if (messagesToRefine.length > 0 &&
+                    agentContext.summarizeCallback &&
+                    shouldSummarize) {
                     try {
-                        const summary = await agentContext.summarizeCallback(messagesToRefine);
-                        console.debug(`[Graph:ContextMgmt] Summary received | len=${summary?.length ?? 0} | hasContent=${summary != null && summary !== ''}`);
+                        let summary;
+                        let summarySource;
+                        if (this._cachedRunSummary != null) {
+                            summary = this._cachedRunSummary;
+                            summarySource = 'cached';
+                        }
+                        else if (agentContext.persistedSummary != null &&
+                            agentContext.persistedSummary !== '') {
+                            summary = agentContext.persistedSummary;
+                            this._cachedRunSummary = summary;
+                            summarySource = 'persisted';
+                        }
+                        else if (sumConfig?.initialSummary != null &&
+                            sumConfig.initialSummary !== '') {
+                            // Cross-run seed: use initialSummary when no persisted summary exists
+                            summary = sumConfig.initialSummary;
+                            this._cachedRunSummary = summary;
+                            summarySource = 'initial-seed';
+                        }
+                        else {
+                            summarySource = 'none';
+                        }
+                        // Single consolidated log for the entire prune+summarize decision
+                        console.debug(`[Graph:ContextMgmt] Pruned ${messages$1.length}→${context.length} msgs (${messagesToRefine.length} discarded) | summary=${summarySource}${summary ? ` (len=${summary.length})` : ''} | calibration=${this._pruneCalibration.ratio.toFixed(3)}(${this._pruneCalibration.iterations})`);
+                        // Fire background summarization — updates cache for next iteration/turn
+                        agentContext
+                            .summarizeCallback(messagesToRefine)
+                            .then((updated) => {
+                            if (updated != null && updated !== '') {
+                                this._cachedRunSummary = updated;
+                            }
+                        })
+                            .catch((err) => {
+                            console.error('[Graph] Background summary failed (non-fatal):', err);
+                        });
                         if (summary != null && summary !== '') {
+                            hasSummary = true;
                             const summaryMsg = new messages.SystemMessage(`[Conversation Summary]\n${summary}`);
-                            // Insert after system message (if present), before conversation messages
                             const systemIdx = messagesToUse[0]?.getType() === 'system' ? 1 : 0;
                             messagesToUse = [
                                 ...messagesToUse.slice(0, systemIdx),
                                 summaryMsg,
                                 ...messagesToUse.slice(systemIdx),
                             ];
-                            console.debug(`[Graph:ContextMgmt] Summary injected at index ${systemIdx} | finalMsgCount=${messagesToUse.length}`);
                         }
                     }
                     catch (err) {
-                        console.error('[Graph] Summarization callback failed:', err);
+                        console.error('[Graph] Summarization failed:', err);
+                    }
+                }
+                else if (messagesToRefine.length > 0) {
+                    // Log pruning even when no summarize callback (discard mode)
+                    console.debug(`[Graph:ContextMgmt] Pruned ${messages$1.length}→${context.length} msgs (${messagesToRefine.length} discarded, no summary callback) | calibration=${this._pruneCalibration.ratio.toFixed(3)}`);
+                }
+                // Deduplicate system messages that accumulate from repeated tool iterations
+                const { messages: dedupedMessages, removedCount } = dedup.deduplicateSystemMessages(messagesToUse);
+                if (removedCount > 0) {
+                    messagesToUse = dedupedMessages;
+                    console.debug(`[Graph:Dedup] Removed ${removedCount} duplicate system message(s)`);
+                }
+                // Post-prune context note for task-tool-enabled agents
+                if (messagesToRefine.length > 0 && contextPressure.hasTaskTool(agentContext.tools)) {
+                    const postPruneNote = contextPressure.buildPostPruneNote(messagesToRefine.length, hasSummary);
+                    if (postPruneNote) {
+                        messagesToUse = [
+                            ...messagesToUse,
+                            new messages.SystemMessage(postPruneNote),
+                        ];
                     }
                 }
-            }
-            else if (delegationInjectedPrePrune) {
-                console.info('[Graph] Skipping pruning — delegation will handle context pressure');
             }
             let finalMessages = messagesToUse;
             if (agentContext.useLegacyContent) {
@@ -1149,125 +1279,25 @@ class StandardGraph extends Graph {
                 analytics: contextAnalytics$1,
             }, config);
             // ====================================================================
-            // CONTEXT PRESSURE AWARENESS — Intelligent Sub-Agent Delegation
-            //
-            // Two triggers for delegation hints:
-            // 1. DOCUMENT COUNT: When 3+ documents are detected in the conversation,
-            //    inject a delegation hint on the FIRST iteration (before the LLM
-            //    has called any tools). This ensures the agent delegates upfront
-            //    rather than trying to process all documents itself.
-            // 2. TOKEN UTILIZATION: At EVERY iteration, if context is filling up
-            //    (70%/85%), inject escalating hints to delegate remaining work.
+            // MULTI-DOCUMENT DELEGATION (task-driven, not budget-driven)
             //
-            // This runs mid-chain — so even if tool responses push context up
-            // after the first LLM call, subsequent iterations get the hint.
+            // Token-based pressure hints have been removed — the LLM never sees
+            // raw token numbers. Context overflow is handled mechanically by
+            // pruning (Graph) + auto-continuation (client.js max_tokens detection).
+            // See: docs/context-overflow-architecture.md
             // ====================================================================
-            const hasTaskToolInContext = agentContext.tools?.some((tool) => {
-                const toolName = typeof tool === 'object' && 'name' in tool
-                    ? tool.name
-                    : '';
-                return toolName === 'task';
-            });
-            if (hasTaskToolInContext === true &&
-                contextAnalytics$1.utilizationPercent != null &&
-                contextAnalytics$1.maxContextTokens != null) {
-                const utilization = contextAnalytics$1.utilizationPercent;
-                const totalTokens = contextAnalytics$1.totalTokens;
-                const maxTokens = contextAnalytics$1.maxContextTokens;
-                const remainingTokens = maxTokens - totalTokens;
-                // Count attached documents by scanning for document patterns in HumanMessages:
-                // 1. # "filename" headers in "Attached document(s):" blocks (text content)
-                // 2. **filename1, filename2** in "The user has attached:" blocks (embedded files)
-                // 3. Filenames in file_search tool results
-                let documentCount = 0;
-                const documentNames = [];
-                for (const msg of finalMessages) {
-                    const content = typeof msg.content === 'string'
-                        ? msg.content
-                        : Array.isArray(msg.content)
-                            ? msg.content
-                                .map((p) => {
-                                const part = p;
-                                return String(part.text ?? part.content ?? '');
-                            })
-                                .join(' ')
-                            : '';
-                    // Pattern 1: # "filename" headers in attached document blocks
-                    const docMatches = content.match(/# "([^"]+)"/g);
-                    if (docMatches) {
-                        for (const match of docMatches) {
-                            const name = match.replace(/# "/, '').replace(/"$/, '');
-                            if (!documentNames.includes(name)) {
-                                documentNames.push(name);
-                                documentCount++;
-                            }
-                        }
-                    }
-                    // Pattern 2: "The user has attached: **file1, file2**" (embedded files)
-                    const attachedMatch = content.match(/user has attached:\s*\*\*([^*]+)\*\*/i);
-                    if (attachedMatch) {
-                        const names = attachedMatch[1]
-                            .split(',')
-                            .map((n) => n.trim())
-                            .filter(Boolean);
-                        for (const name of names) {
-                            if (!documentNames.includes(name)) {
-                                documentNames.push(name);
-                                documentCount++;
-                            }
-                        }
-                    }
-                }
-                // BASELINE LOG: Always fires so we can verify this code path runs
-                console.debug(`[Graph] Context utilization: ${utilization.toFixed(1)}% ` +
-                    `(${totalTokens}/${maxTokens} tokens, ${remainingTokens} remaining) | ` +
-                    `hasTaskTool: true | messages: ${finalMessages.length} | docs: ${documentCount}`);
-                // TRIGGER 1: Multi-document delegation (3+ documents detected)
-                // Only inject on first iteration (no AI messages yet = agent hasn't responded)
+            if (contextPressure.hasTaskTool(agentContext.tools)) {
+                const { count: documentCount, names: documentNames } = contextPressure.detectDocuments(finalMessages);
+                // Multi-document delegation: first iteration only (before AI has responded)
                 const hasAiResponse = finalMessages.some((m) => m._getType() === 'ai' || m._getType() === 'tool');
-                if (documentCount >= 3 && !hasAiResponse) {
+                if (contextPressure.shouldInjectMultiDocHint(documentCount, hasAiResponse)) {
                     const pressureMsg = new messages.HumanMessage({
-                        content: `[MULTI-DOCUMENT PROCESSING — ${documentCount} documents detected]\n` +
-                            `Documents: ${documentNames.join(', ')}\n\n` +
-                            `You have ${documentCount} documents attached. For thorough analysis, use the "task" tool ` +
-                            'to delegate each document (or group of related documents) to a sub-agent.\n' +
-                            'Each sub-agent has its own fresh context window and can use file_search to retrieve the full document content.\n' +
-                            'After all sub-agents complete, synthesize their results into a comprehensive response.\n\n' +
-                            'This approach ensures each document gets full attention without context limitations.',
+                        content: contextPressure.buildMultiDocHintContent(documentCount, documentNames),
                     });
                     finalMessages = [...finalMessages, pressureMsg];
                     console.info(`[Graph] Multi-document delegation hint injected for ${documentCount} documents: ` +
                         `${documentNames.join(', ')}`);
                 }
-                // TRIGGER 2: Token utilization thresholds (mid-chain safety net)
-                // Also fires when we skipped pruning due to delegationInjectedPrePrune
-                if (utilization > 85 ||
-                    (delegationInjectedPrePrune && utilization > 50)) {
-                    // CRITICAL: Context is high — MANDATE delegation
-                    const pressureMsg = new messages.HumanMessage({
-                        content: `[CONTEXT BUDGET CRITICAL — ${utilization.toFixed(0)}% used]\n` +
-                            `You have used ${totalTokens} of ${maxTokens} tokens (${remainingTokens} remaining).\n` +
-                            'Your context is very large. You MUST use the "task" tool to delegate work to sub-agents.\n' +
-                            'Each sub-agent runs in its own fresh context window and can use file_search to access documents.\n' +
-                            'Do NOT attempt to process documents directly — delegate each document to a sub-agent, then synthesize results.',
-                    });
-                    finalMessages = [...finalMessages, pressureMsg];
-                    console.warn(`[Graph] Context pressure CRITICAL (${utilization.toFixed(0)}%): ` +
-                        `Injected mandatory delegation hint. ${remainingTokens} tokens remaining. ` +
-                        `prePruneSkipped: ${delegationInjectedPrePrune}`);
-                }
-                else if (utilization > 70) {
-                    // WARNING: Context filling up — suggest delegation
-                    const pressureMsg = new messages.HumanMessage({
-                        content: `[CONTEXT BUDGET WARNING — ${utilization.toFixed(0)}% used]\n` +
-                            `You have used ${totalTokens} of ${maxTokens} tokens (${remainingTokens} remaining).\n` +
-                            'Your context is filling up. Consider using the "task" tool to delegate complex operations to sub-agents.\n' +
-                            "Sub-agents run in fresh context windows and won't consume your remaining budget.",
-                    });
-                    finalMessages = [...finalMessages, pressureMsg];
-                    console.info(`[Graph] Context pressure WARNING (${utilization.toFixed(0)}%): ` +
-                        `Injected delegation suggestion. ${remainingTokens} tokens remaining.`);
-                }
             }
             // Structured output mode: when the agent has NO tools, produce structured JSON immediately.
             // When the agent HAS tools, we defer structured output until after tool use completes
@@ -1661,10 +1691,6 @@ If I seem to be missing something we discussed earlier, just give me a quick rem
                 reducer: (a, b) => {
                     if (!a.length) {
                         this.startIndex = a.length + b.length;
-                        console.debug(`[Graph:Reducer] Initial messages | startIndex=${this.startIndex} | inputMsgCount=${b.length}`);
-                    }
-                    else {
-                        console.debug(`[Graph:Reducer] Appending messages | existing=${a.length} | new=${b.length} | startIndex=${this.startIndex}`);
                     }
                     const result = langgraph.messagesStateReducer(a, b);
                     this.messages = result;