@illuma-ai/agents 1.0.96 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. package/dist/cjs/agents/AgentContext.cjs +6 -2
  2. package/dist/cjs/agents/AgentContext.cjs.map +1 -1
  3. package/dist/cjs/common/constants.cjs +78 -0
  4. package/dist/cjs/common/constants.cjs.map +1 -1
  5. package/dist/cjs/graphs/Graph.cjs +191 -165
  6. package/dist/cjs/graphs/Graph.cjs.map +1 -1
  7. package/dist/cjs/main.cjs +22 -0
  8. package/dist/cjs/main.cjs.map +1 -1
  9. package/dist/cjs/messages/dedup.cjs +95 -0
  10. package/dist/cjs/messages/dedup.cjs.map +1 -0
  11. package/dist/cjs/tools/CodeExecutor.cjs +22 -3
  12. package/dist/cjs/tools/CodeExecutor.cjs.map +1 -1
  13. package/dist/cjs/types/graph.cjs.map +1 -1
  14. package/dist/cjs/utils/contextPressure.cjs +154 -0
  15. package/dist/cjs/utils/contextPressure.cjs.map +1 -0
  16. package/dist/cjs/utils/pruneCalibration.cjs +78 -0
  17. package/dist/cjs/utils/pruneCalibration.cjs.map +1 -0
  18. package/dist/cjs/utils/run.cjs.map +1 -1
  19. package/dist/cjs/utils/tokens.cjs.map +1 -1
  20. package/dist/cjs/utils/toolDiscoveryCache.cjs +127 -0
  21. package/dist/cjs/utils/toolDiscoveryCache.cjs.map +1 -0
  22. package/dist/esm/agents/AgentContext.mjs +6 -2
  23. package/dist/esm/agents/AgentContext.mjs.map +1 -1
  24. package/dist/esm/common/constants.mjs +71 -1
  25. package/dist/esm/common/constants.mjs.map +1 -1
  26. package/dist/esm/graphs/Graph.mjs +192 -166
  27. package/dist/esm/graphs/Graph.mjs.map +1 -1
  28. package/dist/esm/main.mjs +5 -1
  29. package/dist/esm/main.mjs.map +1 -1
  30. package/dist/esm/messages/dedup.mjs +93 -0
  31. package/dist/esm/messages/dedup.mjs.map +1 -0
  32. package/dist/esm/tools/CodeExecutor.mjs +22 -3
  33. package/dist/esm/tools/CodeExecutor.mjs.map +1 -1
  34. package/dist/esm/types/graph.mjs.map +1 -1
  35. package/dist/esm/utils/contextPressure.mjs +148 -0
  36. package/dist/esm/utils/contextPressure.mjs.map +1 -0
  37. package/dist/esm/utils/pruneCalibration.mjs +74 -0
  38. package/dist/esm/utils/pruneCalibration.mjs.map +1 -0
  39. package/dist/esm/utils/run.mjs.map +1 -1
  40. package/dist/esm/utils/tokens.mjs.map +1 -1
  41. package/dist/esm/utils/toolDiscoveryCache.mjs +125 -0
  42. package/dist/esm/utils/toolDiscoveryCache.mjs.map +1 -0
  43. package/dist/types/agents/AgentContext.d.ts +4 -1
  44. package/dist/types/common/constants.d.ts +49 -0
  45. package/dist/types/graphs/Graph.d.ts +25 -0
  46. package/dist/types/messages/dedup.d.ts +25 -0
  47. package/dist/types/messages/index.d.ts +1 -0
  48. package/dist/types/types/graph.d.ts +63 -0
  49. package/dist/types/utils/contextPressure.d.ts +72 -0
  50. package/dist/types/utils/index.d.ts +3 -0
  51. package/dist/types/utils/pruneCalibration.d.ts +43 -0
  52. package/dist/types/utils/toolDiscoveryCache.d.ts +77 -0
  53. package/package.json +1 -1
  54. package/src/agents/AgentContext.ts +7 -0
  55. package/src/common/constants.ts +82 -0
  56. package/src/graphs/Graph.ts +254 -208
  57. package/src/graphs/contextManagement.e2e.test.ts +28 -20
  58. package/src/graphs/gapFeatures.test.ts +520 -0
  59. package/src/graphs/nonBlockingSummarization.test.ts +307 -0
  60. package/src/messages/__tests__/dedup.test.ts +166 -0
  61. package/src/messages/dedup.ts +104 -0
  62. package/src/messages/index.ts +1 -0
  63. package/src/specs/agent-handoffs-bedrock.integration.test.ts +7 -7
  64. package/src/specs/agent-handoffs.test.ts +36 -36
  65. package/src/specs/thinking-handoff.test.ts +10 -10
  66. package/src/tools/CodeExecutor.ts +22 -3
  67. package/src/types/graph.ts +73 -0
  68. package/src/utils/__tests__/pruneCalibration.test.ts +148 -0
  69. package/src/utils/__tests__/toolDiscoveryCache.test.ts +214 -0
  70. package/src/utils/contextPressure.test.ts +262 -0
  71. package/src/utils/contextPressure.ts +188 -0
  72. package/src/utils/index.ts +3 -0
  73. package/src/utils/pruneCalibration.ts +92 -0
  74. package/src/utils/run.ts +108 -108
  75. package/src/utils/tokens.ts +118 -118
  76. package/src/utils/toolDiscoveryCache.ts +150 -0
@@ -12,9 +12,9 @@ var prune = require('../messages/prune.cjs');
12
12
  var format = require('../messages/format.cjs');
13
13
  var cache = require('../messages/cache.cjs');
14
14
  var content = require('../messages/content.cjs');
15
- var tools = require('../messages/tools.cjs');
16
15
  var _enum = require('../common/enum.cjs');
17
16
  var constants = require('../common/constants.cjs');
17
+ var dedup = require('../messages/dedup.cjs');
18
18
  var graph = require('../utils/graph.cjs');
19
19
  var llm = require('../utils/llm.cjs');
20
20
  var stream = require('../stream.cjs');
@@ -24,6 +24,9 @@ require('ai-tokenizer');
24
24
  require('../utils/toonFormat.cjs');
25
25
  var contextAnalytics = require('../utils/contextAnalytics.cjs');
26
26
  require('zod-to-json-schema');
27
+ var contextPressure = require('../utils/contextPressure.cjs');
28
+ var toolDiscoveryCache = require('../utils/toolDiscoveryCache.cjs');
29
+ var pruneCalibration = require('../utils/pruneCalibration.cjs');
27
30
  var providers = require('../llm/providers.cjs');
28
31
  var ToolNode = require('../tools/ToolNode.cjs');
29
32
  var index = require('../llm/openai/index.cjs');
@@ -92,6 +95,13 @@ class StandardGraph extends Graph {
92
95
  runId;
93
96
  startIndex = 0;
94
97
  signal;
98
+ /** Cached summary from the first prune in this run.
99
+ * Reused for subsequent prunes to avoid blocking LLM calls on every tool iteration. */
100
+ _cachedRunSummary;
101
+ /** EMA-based pruning calibration state — smooths token budget adjustments across iterations */
102
+ _pruneCalibration;
103
+ /** Run-scoped tool discovery cache — avoids re-parsing conversation history on every iteration */
104
+ _toolDiscoveryCache;
95
105
  /** Map of agent contexts by agent ID */
96
106
  agentContexts = new Map();
97
107
  /** Default agent ID to use */
@@ -112,6 +122,19 @@ class StandardGraph extends Graph {
112
122
  this.agentContexts.set(agentConfig.agentId, agentContext);
113
123
  }
114
124
  this.defaultAgentId = agents[0].agentId;
125
+ // Seed cached summary from persisted storage so the first prune in a
126
+ // resumed conversation can also skip the synchronous LLM summarization call
127
+ const primaryContext = this.agentContexts.get(this.defaultAgentId);
128
+ if (primaryContext?.persistedSummary) {
129
+ this._cachedRunSummary = primaryContext.persistedSummary;
130
+ }
131
+ // Initialize EMA pruning calibration
132
+ this._pruneCalibration = pruneCalibration.createPruneCalibration();
133
+ // Initialize tool discovery cache, seeded with any pre-existing discoveries
134
+ this._toolDiscoveryCache = new toolDiscoveryCache.ToolDiscoveryCache();
135
+ if (primaryContext?.discoveredToolNames.size) {
136
+ this._toolDiscoveryCache.seed([...primaryContext.discoveredToolNames]);
137
+ }
115
138
  }
116
139
  /* Init */
117
140
  resetValues(keepContent) {
@@ -134,6 +157,9 @@ class StandardGraph extends Graph {
134
157
  this.messageStepHasToolCalls = graph.resetIfNotEmpty(this.messageStepHasToolCalls, new Map());
135
158
  this.prelimMessageIdsByStepKey = graph.resetIfNotEmpty(this.prelimMessageIdsByStepKey, new Map());
136
159
  this.invokedToolIds = graph.resetIfNotEmpty(this.invokedToolIds, undefined);
160
+ // Reset EMA calibration and tool discovery cache for fresh run
161
+ this._pruneCalibration = pruneCalibration.createPruneCalibration();
162
+ this._toolDiscoveryCache.reset();
137
163
  for (const context of this.agentContexts.values()) {
138
164
  context.reset();
139
165
  }
@@ -222,6 +248,62 @@ class StandardGraph extends Graph {
222
248
  }
223
249
  return clientOptions;
224
250
  }
251
+ /**
252
+ * Determines whether summarization should trigger based on SummarizationConfig.
253
+ *
254
+ * Supports three trigger strategies:
255
+ * - contextPercentage (default): Trigger when context utilization >= threshold%
256
+ * - messageCount: Trigger when pruned message count >= threshold
257
+ * - tokenThreshold: Trigger when total estimated tokens >= threshold
258
+ *
259
+ * When no config is provided, always triggers (preserves backward compatibility).
260
+ *
261
+ * @param prunedMessageCount - Number of messages that were pruned
262
+ * @param maxContextTokens - Maximum context token budget
263
+ * @param indexTokenCountMap - Token count map by message index
264
+ * @param instructionTokens - Token count for instructions/system message
265
+ * @param config - Optional SummarizationConfig
266
+ * @returns Whether summarization should be triggered
267
+ */
268
+ shouldTriggerSummarization(prunedMessageCount, maxContextTokens, indexTokenCountMap, instructionTokens, config) {
269
+ // No pruned messages means nothing to summarize
270
+ if (prunedMessageCount === 0) {
271
+ return false;
272
+ }
273
+ // No config = backward compatible (always summarize when messages are pruned)
274
+ if (!config || !config.triggerType) {
275
+ return true;
276
+ }
277
+ const threshold = config.triggerThreshold;
278
+ switch (config.triggerType) {
279
+ case 'contextPercentage': {
280
+ if (maxContextTokens <= 0)
281
+ return true;
282
+ const effectiveThreshold = threshold ?? constants.SUMMARIZATION_CONTEXT_THRESHOLD;
283
+ let totalTokens = instructionTokens;
284
+ for (const key in indexTokenCountMap) {
285
+ totalTokens += indexTokenCountMap[key] ?? 0;
286
+ }
287
+ const utilization = (totalTokens / maxContextTokens) * 100;
288
+ return utilization >= effectiveThreshold;
289
+ }
290
+ case 'messageCount': {
291
+ const effectiveThreshold = threshold ?? 5;
292
+ return prunedMessageCount >= effectiveThreshold;
293
+ }
294
+ case 'tokenThreshold': {
295
+ if (threshold == null)
296
+ return true;
297
+ let totalTokens = instructionTokens;
298
+ for (const key in indexTokenCountMap) {
299
+ totalTokens += indexTokenCountMap[key] ?? 0;
300
+ }
301
+ return totalTokens >= threshold;
302
+ }
303
+ default:
304
+ return true;
305
+ }
306
+ }
225
307
  /**
226
308
  * Returns the normalized finish/stop reason from the last LLM invocation.
227
309
  * Used by callers to detect when the response was truncated due to max_tokens.
@@ -360,7 +442,6 @@ class StandardGraph extends Graph {
360
442
  /* Misc.*/
361
443
  getRunMessages() {
362
444
  const result = this.messages.slice(this.startIndex);
363
- console.debug(`[Graph] getRunMessages() | totalMessages=${this.messages.length} | startIndex=${this.startIndex} | runMessages=${result.length}`);
364
445
  return result;
365
446
  }
366
447
  getContentParts() {
@@ -916,10 +997,12 @@ class StandardGraph extends Graph {
916
997
  });
917
998
  messages$1 = [dynamicContextMessage, ackMessage, ...messages$1];
918
999
  }
919
- // Extract tool discoveries from current turn only (similar to formatArtifactPayload pattern)
920
- const discoveredNames = tools.extractToolDiscoveries(messages$1);
921
- if (discoveredNames.length > 0) {
922
- agentContext.markToolsAsDiscovered(discoveredNames);
1000
+ // Tool discovery caching: only scan new messages since last iteration
1001
+ // instead of re-parsing the full history via extractToolDiscoveries()
1002
+ const cachedDiscoveries = this._toolDiscoveryCache.getNewDiscoveries(messages$1);
1003
+ if (cachedDiscoveries.length > 0) {
1004
+ agentContext.markToolsAsDiscovered(cachedDiscoveries);
1005
+ console.debug(`[Graph:ToolDiscovery] Cached ${cachedDiscoveries.length} new tools (total: ${this._toolDiscoveryCache.size})`);
923
1006
  }
924
1007
  const toolsForBinding = agentContext.getToolsForBinding();
925
1008
  // PERF: Detect subsequent ReAct iterations (tool results present in messages)
@@ -950,36 +1033,12 @@ class StandardGraph extends Graph {
950
1033
  let messagesToUse = messages$1;
951
1034
  // ====================================================================
952
1035
  // PRE-PRUNING DELEGATION CHECK
953
- // Before pruning strips messages (losing context), check if we should
954
- // delegate instead. If context would be pruned AND the agent has the
955
- // task tool, inject a delegation hint and SKIP pruning — preserving
956
- // the content for the LLM to understand what to delegate.
957
1036
  // ====================================================================
958
- let delegationInjectedPrePrune = false;
959
- const hasTaskToolPrePrune = agentContext.tools?.some((tool) => {
960
- const toolName = typeof tool === 'object' && 'name' in tool
961
- ? tool.name
962
- : '';
963
- return toolName === 'task';
964
- });
965
- if (hasTaskToolPrePrune === true &&
966
- agentContext.tokenCounter &&
967
- agentContext.maxContextTokens != null) {
968
- // Estimate total tokens in messages BEFORE pruning
969
- let prePruneTokens = 0;
970
- for (const msg of messages$1) {
971
- prePruneTokens += agentContext.tokenCounter(msg);
972
- }
973
- // Add instruction tokens (system prompt)
974
- prePruneTokens += agentContext.instructionTokens;
975
- const prePruneUtilization = (prePruneTokens / agentContext.maxContextTokens) * 100;
976
- if (prePruneUtilization > 70) {
977
- console.warn(`[Graph] PRE-PRUNE delegation check: ${prePruneUtilization.toFixed(1)}% utilization ` +
978
- `(${prePruneTokens}/${agentContext.maxContextTokens} tokens). ` +
979
- 'Injecting delegation hint INSTEAD of pruning.');
980
- delegationInjectedPrePrune = true;
981
- }
982
- }
1037
+ // Context management is now fully mechanical:
1038
+ // - Pruning always runs when needed (no delegation-based skip)
1039
+ // - Auto-continuation in client.js handles max_tokens finish reason
1040
+ // - LLM never sees raw token numbers (prevents voluntary bail-out)
1041
+ // ====================================================================
983
1042
  if (!agentContext.pruneMessages &&
984
1043
  agentContext.tokenCounter &&
985
1044
  agentContext.maxContextTokens != null &&
@@ -993,50 +1052,121 @@ class StandardGraph extends Graph {
993
1052
  (agentContext.provider === _enum.Providers.OPENAI &&
994
1053
  agentContext.clientOptions.modelKwargs
995
1054
  ?.thinking?.type === 'enabled');
1055
+ // Apply EMA calibration to max token budget — smooths pruning across iterations
1056
+ const calibratedMaxTokens = pruneCalibration.applyCalibration(agentContext.maxContextTokens, this._pruneCalibration);
996
1057
  agentContext.pruneMessages = prune.createPruneMessages({
997
1058
  startIndex: this.startIndex,
998
1059
  provider: agentContext.provider,
999
1060
  tokenCounter: agentContext.tokenCounter,
1000
- maxTokens: agentContext.maxContextTokens,
1061
+ maxTokens: calibratedMaxTokens,
1001
1062
  thinkingEnabled: isAnthropicWithThinking,
1002
1063
  indexTokenCountMap: agentContext.indexTokenCountMap,
1003
1064
  });
1004
1065
  }
1005
- if (agentContext.pruneMessages && !delegationInjectedPrePrune) {
1006
- console.debug(`[Graph:ContextMgmt] Pruning messages | inputCount=${messages$1.length} | maxTokens=${agentContext.maxContextTokens}`);
1066
+ // Update EMA calibration with actual token usage from API response
1067
+ if (agentContext.currentUsage?.input_tokens &&
1068
+ agentContext.maxContextTokens) {
1069
+ const estimatedTokens = Object.values(agentContext.indexTokenCountMap).reduce((sum, v) => (sum ?? 0) + (v ?? 0), 0);
1070
+ if (estimatedTokens > 0) {
1071
+ this._pruneCalibration = pruneCalibration.updatePruneCalibration(this._pruneCalibration, agentContext.currentUsage.input_tokens, estimatedTokens);
1072
+ }
1073
+ }
1074
+ if (agentContext.pruneMessages) {
1007
1075
  const { context, indexTokenCountMap, messagesToRefine } = agentContext.pruneMessages({
1008
1076
  messages: messages$1,
1009
1077
  usageMetadata: agentContext.currentUsage,
1010
- // startOnMessageType: 'human',
1011
1078
  });
1012
1079
  agentContext.indexTokenCountMap = indexTokenCountMap;
1013
1080
  messagesToUse = context;
1014
- console.debug(`[Graph:ContextMgmt] Pruned | kept=${context.length} | discarded=${messagesToRefine.length} | originalCount=${messages$1.length}`);
1015
- // Summarize discarded messages if callback provided
1016
- if (messagesToRefine.length > 0 && agentContext.summarizeCallback) {
1017
- console.debug(`[Graph:ContextMgmt] Summarizing ${messagesToRefine.length} discarded messages`);
1081
+ // ── Non-blocking summarization ──────────────────────────────────
1082
+ // NEVER block the LLM call waiting for summarization. Instead:
1083
+ // 1. If _cachedRunSummary exists use it, fire async update
1084
+ // 2. If persistedSummary exists → use it as fallback, fire async update
1085
+ // 3. If NOTHING exists (first-ever prune) → skip summary, fire async generation
1086
+ // The summary catches up asynchronously and is available for subsequent
1087
+ // iterations (tool calls) and the next conversation turn.
1088
+ //
1089
+ // SummarizationConfig integration:
1090
+ // - triggerType/triggerThreshold control WHEN summarization fires
1091
+ // - reserveRatio is enforced via calibrated maxTokens (above)
1092
+ // - initialSummary provides cross-run seeding as fallback before persistedSummary
1093
+ let hasSummary = false;
1094
+ const sumConfig = agentContext.summarizationConfig;
1095
+ const shouldSummarize = this.shouldTriggerSummarization(messagesToRefine.length, agentContext.maxContextTokens ?? 0, agentContext.indexTokenCountMap, agentContext.instructionTokens, sumConfig);
1096
+ if (messagesToRefine.length > 0 &&
1097
+ agentContext.summarizeCallback &&
1098
+ shouldSummarize) {
1018
1099
  try {
1019
- const summary = await agentContext.summarizeCallback(messagesToRefine);
1020
- console.debug(`[Graph:ContextMgmt] Summary received | len=${summary?.length ?? 0} | hasContent=${summary != null && summary !== ''}`);
1100
+ let summary;
1101
+ let summarySource;
1102
+ if (this._cachedRunSummary != null) {
1103
+ summary = this._cachedRunSummary;
1104
+ summarySource = 'cached';
1105
+ }
1106
+ else if (agentContext.persistedSummary != null &&
1107
+ agentContext.persistedSummary !== '') {
1108
+ summary = agentContext.persistedSummary;
1109
+ this._cachedRunSummary = summary;
1110
+ summarySource = 'persisted';
1111
+ }
1112
+ else if (sumConfig?.initialSummary != null &&
1113
+ sumConfig.initialSummary !== '') {
1114
+ // Cross-run seed: use initialSummary when no persisted summary exists
1115
+ summary = sumConfig.initialSummary;
1116
+ this._cachedRunSummary = summary;
1117
+ summarySource = 'initial-seed';
1118
+ }
1119
+ else {
1120
+ summarySource = 'none';
1121
+ }
1122
+ // Single consolidated log for the entire prune+summarize decision
1123
+ console.debug(`[Graph:ContextMgmt] Pruned ${messages$1.length}→${context.length} msgs (${messagesToRefine.length} discarded) | summary=${summarySource}${summary ? ` (len=${summary.length})` : ''} | calibration=${this._pruneCalibration.ratio.toFixed(3)}(${this._pruneCalibration.iterations})`);
1124
+ // Fire background summarization — updates cache for next iteration/turn
1125
+ agentContext
1126
+ .summarizeCallback(messagesToRefine)
1127
+ .then((updated) => {
1128
+ if (updated != null && updated !== '') {
1129
+ this._cachedRunSummary = updated;
1130
+ }
1131
+ })
1132
+ .catch((err) => {
1133
+ console.error('[Graph] Background summary failed (non-fatal):', err);
1134
+ });
1021
1135
  if (summary != null && summary !== '') {
1136
+ hasSummary = true;
1022
1137
  const summaryMsg = new messages.SystemMessage(`[Conversation Summary]\n${summary}`);
1023
- // Insert after system message (if present), before conversation messages
1024
1138
  const systemIdx = messagesToUse[0]?.getType() === 'system' ? 1 : 0;
1025
1139
  messagesToUse = [
1026
1140
  ...messagesToUse.slice(0, systemIdx),
1027
1141
  summaryMsg,
1028
1142
  ...messagesToUse.slice(systemIdx),
1029
1143
  ];
1030
- console.debug(`[Graph:ContextMgmt] Summary injected at index ${systemIdx} | finalMsgCount=${messagesToUse.length}`);
1031
1144
  }
1032
1145
  }
1033
1146
  catch (err) {
1034
- console.error('[Graph] Summarization callback failed:', err);
1147
+ console.error('[Graph] Summarization failed:', err);
1148
+ }
1149
+ }
1150
+ else if (messagesToRefine.length > 0) {
1151
+ // Log pruning even when no summarize callback (discard mode)
1152
+ console.debug(`[Graph:ContextMgmt] Pruned ${messages$1.length}→${context.length} msgs (${messagesToRefine.length} discarded, no summary callback) | calibration=${this._pruneCalibration.ratio.toFixed(3)}`);
1153
+ }
1154
+ // Deduplicate system messages that accumulate from repeated tool iterations
1155
+ const { messages: dedupedMessages, removedCount } = dedup.deduplicateSystemMessages(messagesToUse);
1156
+ if (removedCount > 0) {
1157
+ messagesToUse = dedupedMessages;
1158
+ console.debug(`[Graph:Dedup] Removed ${removedCount} duplicate system message(s)`);
1159
+ }
1160
+ // Post-prune context note for task-tool-enabled agents
1161
+ if (messagesToRefine.length > 0 && contextPressure.hasTaskTool(agentContext.tools)) {
1162
+ const postPruneNote = contextPressure.buildPostPruneNote(messagesToRefine.length, hasSummary);
1163
+ if (postPruneNote) {
1164
+ messagesToUse = [
1165
+ ...messagesToUse,
1166
+ new messages.SystemMessage(postPruneNote),
1167
+ ];
1035
1168
  }
1036
1169
  }
1037
- }
1038
- else if (delegationInjectedPrePrune) {
1039
- console.info('[Graph] Skipping pruning — delegation will handle context pressure');
1040
1170
  }
1041
1171
  let finalMessages = messagesToUse;
1042
1172
  if (agentContext.useLegacyContent) {
@@ -1149,125 +1279,25 @@ class StandardGraph extends Graph {
1149
1279
  analytics: contextAnalytics$1,
1150
1280
  }, config);
1151
1281
  // ====================================================================
1152
- // CONTEXT PRESSURE AWARENESS Intelligent Sub-Agent Delegation
1153
- //
1154
- // Two triggers for delegation hints:
1155
- // 1. DOCUMENT COUNT: When 3+ documents are detected in the conversation,
1156
- // inject a delegation hint on the FIRST iteration (before the LLM
1157
- // has called any tools). This ensures the agent delegates upfront
1158
- // rather than trying to process all documents itself.
1159
- // 2. TOKEN UTILIZATION: At EVERY iteration, if context is filling up
1160
- // (70%/85%), inject escalating hints to delegate remaining work.
1282
+ // MULTI-DOCUMENT DELEGATION (task-driven, not budget-driven)
1161
1283
  //
1162
- // This runs mid-chain so even if tool responses push context up
1163
- // after the first LLM call, subsequent iterations get the hint.
1284
+ // Token-based pressure hints have been removed the LLM never sees
1285
+ // raw token numbers. Context overflow is handled mechanically by
1286
+ // pruning (Graph) + auto-continuation (client.js max_tokens detection).
1287
+ // See: docs/context-overflow-architecture.md
1164
1288
  // ====================================================================
1165
- const hasTaskToolInContext = agentContext.tools?.some((tool) => {
1166
- const toolName = typeof tool === 'object' && 'name' in tool
1167
- ? tool.name
1168
- : '';
1169
- return toolName === 'task';
1170
- });
1171
- if (hasTaskToolInContext === true &&
1172
- contextAnalytics$1.utilizationPercent != null &&
1173
- contextAnalytics$1.maxContextTokens != null) {
1174
- const utilization = contextAnalytics$1.utilizationPercent;
1175
- const totalTokens = contextAnalytics$1.totalTokens;
1176
- const maxTokens = contextAnalytics$1.maxContextTokens;
1177
- const remainingTokens = maxTokens - totalTokens;
1178
- // Count attached documents by scanning for document patterns in HumanMessages:
1179
- // 1. # "filename" headers in "Attached document(s):" blocks (text content)
1180
- // 2. **filename1, filename2** in "The user has attached:" blocks (embedded files)
1181
- // 3. Filenames in file_search tool results
1182
- let documentCount = 0;
1183
- const documentNames = [];
1184
- for (const msg of finalMessages) {
1185
- const content = typeof msg.content === 'string'
1186
- ? msg.content
1187
- : Array.isArray(msg.content)
1188
- ? msg.content
1189
- .map((p) => {
1190
- const part = p;
1191
- return String(part.text ?? part.content ?? '');
1192
- })
1193
- .join(' ')
1194
- : '';
1195
- // Pattern 1: # "filename" headers in attached document blocks
1196
- const docMatches = content.match(/# "([^"]+)"/g);
1197
- if (docMatches) {
1198
- for (const match of docMatches) {
1199
- const name = match.replace(/# "/, '').replace(/"$/, '');
1200
- if (!documentNames.includes(name)) {
1201
- documentNames.push(name);
1202
- documentCount++;
1203
- }
1204
- }
1205
- }
1206
- // Pattern 2: "The user has attached: **file1, file2**" (embedded files)
1207
- const attachedMatch = content.match(/user has attached:\s*\*\*([^*]+)\*\*/i);
1208
- if (attachedMatch) {
1209
- const names = attachedMatch[1]
1210
- .split(',')
1211
- .map((n) => n.trim())
1212
- .filter(Boolean);
1213
- for (const name of names) {
1214
- if (!documentNames.includes(name)) {
1215
- documentNames.push(name);
1216
- documentCount++;
1217
- }
1218
- }
1219
- }
1220
- }
1221
- // BASELINE LOG: Always fires so we can verify this code path runs
1222
- console.debug(`[Graph] Context utilization: ${utilization.toFixed(1)}% ` +
1223
- `(${totalTokens}/${maxTokens} tokens, ${remainingTokens} remaining) | ` +
1224
- `hasTaskTool: true | messages: ${finalMessages.length} | docs: ${documentCount}`);
1225
- // TRIGGER 1: Multi-document delegation (3+ documents detected)
1226
- // Only inject on first iteration (no AI messages yet = agent hasn't responded)
1289
+ if (contextPressure.hasTaskTool(agentContext.tools)) {
1290
+ const { count: documentCount, names: documentNames } = contextPressure.detectDocuments(finalMessages);
1291
+ // Multi-document delegation: first iteration only (before AI has responded)
1227
1292
  const hasAiResponse = finalMessages.some((m) => m._getType() === 'ai' || m._getType() === 'tool');
1228
- if (documentCount >= 3 && !hasAiResponse) {
1293
+ if (contextPressure.shouldInjectMultiDocHint(documentCount, hasAiResponse)) {
1229
1294
  const pressureMsg = new messages.HumanMessage({
1230
- content: `[MULTI-DOCUMENT PROCESSING — ${documentCount} documents detected]\n` +
1231
- `Documents: ${documentNames.join(', ')}\n\n` +
1232
- `You have ${documentCount} documents attached. For thorough analysis, use the "task" tool ` +
1233
- 'to delegate each document (or group of related documents) to a sub-agent.\n' +
1234
- 'Each sub-agent has its own fresh context window and can use file_search to retrieve the full document content.\n' +
1235
- 'After all sub-agents complete, synthesize their results into a comprehensive response.\n\n' +
1236
- 'This approach ensures each document gets full attention without context limitations.',
1295
+ content: contextPressure.buildMultiDocHintContent(documentCount, documentNames),
1237
1296
  });
1238
1297
  finalMessages = [...finalMessages, pressureMsg];
1239
1298
  console.info(`[Graph] Multi-document delegation hint injected for ${documentCount} documents: ` +
1240
1299
  `${documentNames.join(', ')}`);
1241
1300
  }
1242
- // TRIGGER 2: Token utilization thresholds (mid-chain safety net)
1243
- // Also fires when we skipped pruning due to delegationInjectedPrePrune
1244
- if (utilization > 85 ||
1245
- (delegationInjectedPrePrune && utilization > 50)) {
1246
- // CRITICAL: Context is high — MANDATE delegation
1247
- const pressureMsg = new messages.HumanMessage({
1248
- content: `[CONTEXT BUDGET CRITICAL — ${utilization.toFixed(0)}% used]\n` +
1249
- `You have used ${totalTokens} of ${maxTokens} tokens (${remainingTokens} remaining).\n` +
1250
- 'Your context is very large. You MUST use the "task" tool to delegate work to sub-agents.\n' +
1251
- 'Each sub-agent runs in its own fresh context window and can use file_search to access documents.\n' +
1252
- 'Do NOT attempt to process documents directly — delegate each document to a sub-agent, then synthesize results.',
1253
- });
1254
- finalMessages = [...finalMessages, pressureMsg];
1255
- console.warn(`[Graph] Context pressure CRITICAL (${utilization.toFixed(0)}%): ` +
1256
- `Injected mandatory delegation hint. ${remainingTokens} tokens remaining. ` +
1257
- `prePruneSkipped: ${delegationInjectedPrePrune}`);
1258
- }
1259
- else if (utilization > 70) {
1260
- // WARNING: Context filling up — suggest delegation
1261
- const pressureMsg = new messages.HumanMessage({
1262
- content: `[CONTEXT BUDGET WARNING — ${utilization.toFixed(0)}% used]\n` +
1263
- `You have used ${totalTokens} of ${maxTokens} tokens (${remainingTokens} remaining).\n` +
1264
- 'Your context is filling up. Consider using the "task" tool to delegate complex operations to sub-agents.\n' +
1265
- "Sub-agents run in fresh context windows and won't consume your remaining budget.",
1266
- });
1267
- finalMessages = [...finalMessages, pressureMsg];
1268
- console.info(`[Graph] Context pressure WARNING (${utilization.toFixed(0)}%): ` +
1269
- `Injected delegation suggestion. ${remainingTokens} tokens remaining.`);
1270
- }
1271
1301
  }
1272
1302
  // Structured output mode: when the agent has NO tools, produce structured JSON immediately.
1273
1303
  // When the agent HAS tools, we defer structured output until after tool use completes
@@ -1661,10 +1691,6 @@ If I seem to be missing something we discussed earlier, just give me a quick rem
1661
1691
  reducer: (a, b) => {
1662
1692
  if (!a.length) {
1663
1693
  this.startIndex = a.length + b.length;
1664
- console.debug(`[Graph:Reducer] Initial messages | startIndex=${this.startIndex} | inputMsgCount=${b.length}`);
1665
- }
1666
- else {
1667
- console.debug(`[Graph:Reducer] Appending messages | existing=${a.length} | new=${b.length} | startIndex=${this.startIndex}`);
1668
1694
  }
1669
1695
  const result = langgraph.messagesStateReducer(a, b);
1670
1696
  this.messages = result;