@illuma-ai/agents 1.0.94 → 1.0.98

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. package/dist/cjs/common/constants.cjs +25 -0
  2. package/dist/cjs/common/constants.cjs.map +1 -1
  3. package/dist/cjs/events.cjs +0 -4
  4. package/dist/cjs/events.cjs.map +1 -1
  5. package/dist/cjs/graphs/Graph.cjs +38 -148
  6. package/dist/cjs/graphs/Graph.cjs.map +1 -1
  7. package/dist/cjs/main.cjs +8 -0
  8. package/dist/cjs/main.cjs.map +1 -1
  9. package/dist/cjs/tools/CodeExecutor.cjs +5 -0
  10. package/dist/cjs/tools/CodeExecutor.cjs.map +1 -1
  11. package/dist/cjs/tools/ProgrammaticToolCalling.cjs +12 -6
  12. package/dist/cjs/tools/ProgrammaticToolCalling.cjs.map +1 -1
  13. package/dist/cjs/tools/ToolSearch.cjs +14 -10
  14. package/dist/cjs/tools/ToolSearch.cjs.map +1 -1
  15. package/dist/cjs/tools/handlers.cjs +0 -2
  16. package/dist/cjs/tools/handlers.cjs.map +1 -1
  17. package/dist/cjs/tools/search/search.cjs +12 -4
  18. package/dist/cjs/tools/search/search.cjs.map +1 -1
  19. package/dist/cjs/tools/search/tool.cjs +2 -1
  20. package/dist/cjs/tools/search/tool.cjs.map +1 -1
  21. package/dist/cjs/utils/contextPressure.cjs +154 -0
  22. package/dist/cjs/utils/contextPressure.cjs.map +1 -0
  23. package/dist/esm/common/constants.mjs +24 -1
  24. package/dist/esm/common/constants.mjs.map +1 -1
  25. package/dist/esm/events.mjs +0 -4
  26. package/dist/esm/events.mjs.map +1 -1
  27. package/dist/esm/graphs/Graph.mjs +38 -148
  28. package/dist/esm/graphs/Graph.mjs.map +1 -1
  29. package/dist/esm/main.mjs +2 -1
  30. package/dist/esm/main.mjs.map +1 -1
  31. package/dist/esm/tools/CodeExecutor.mjs +5 -0
  32. package/dist/esm/tools/CodeExecutor.mjs.map +1 -1
  33. package/dist/esm/tools/ProgrammaticToolCalling.mjs +12 -6
  34. package/dist/esm/tools/ProgrammaticToolCalling.mjs.map +1 -1
  35. package/dist/esm/tools/ToolSearch.mjs +14 -10
  36. package/dist/esm/tools/ToolSearch.mjs.map +1 -1
  37. package/dist/esm/tools/handlers.mjs +0 -2
  38. package/dist/esm/tools/handlers.mjs.map +1 -1
  39. package/dist/esm/tools/search/search.mjs +12 -4
  40. package/dist/esm/tools/search/search.mjs.map +1 -1
  41. package/dist/esm/tools/search/tool.mjs +2 -1
  42. package/dist/esm/tools/search/tool.mjs.map +1 -1
  43. package/dist/esm/utils/contextPressure.mjs +148 -0
  44. package/dist/esm/utils/contextPressure.mjs.map +1 -0
  45. package/dist/types/common/constants.d.ts +14 -0
  46. package/dist/types/tools/ProgrammaticToolCalling.d.ts +2 -2
  47. package/dist/types/tools/search/types.d.ts +3 -0
  48. package/dist/types/utils/contextPressure.d.ts +72 -0
  49. package/dist/types/utils/index.d.ts +1 -0
  50. package/package.json +1 -1
  51. package/src/common/constants.ts +26 -0
  52. package/src/events.ts +0 -8
  53. package/src/graphs/Graph.ts +53 -177
  54. package/src/graphs/contextManagement.e2e.test.ts +28 -20
  55. package/src/specs/agent-handoffs-bedrock.integration.test.ts +7 -7
  56. package/src/specs/agent-handoffs.test.ts +36 -36
  57. package/src/specs/thinking-handoff.test.ts +10 -10
  58. package/src/tools/CodeExecutor.ts +6 -0
  59. package/src/tools/ProgrammaticToolCalling.ts +23 -6
  60. package/src/tools/ToolSearch.ts +14 -10
  61. package/src/tools/handlers.ts +0 -4
  62. package/src/tools/search/search.ts +15 -3
  63. package/src/tools/search/tool.ts +2 -0
  64. package/src/tools/search/types.ts +3 -0
  65. package/src/utils/contextPressure.test.ts +247 -0
  66. package/src/utils/contextPressure.ts +188 -0
  67. package/src/utils/index.ts +1 -0
@@ -0,0 +1,72 @@
1
+ /**
2
+ * Context Pressure Utilities
3
+ *
4
+ * Pure functions for context overflow management. These handle:
5
+ * 1. Multi-document detection — counting attached documents in messages
6
+ * 2. Multi-document delegation hint — injected when 3+ documents detected
7
+ * 3. Post-prune context note — injected after pruning/summarization
8
+ *
9
+ * DESIGN PRINCIPLE: The LLM never sees raw token numbers. Context overflow
10
+ * is handled mechanically by pruning (Graph) + auto-continuation (client.js).
11
+ * Only task-driven hints (multi-document) are injected — never budget-based.
12
+ *
13
+ * @see docs/context-overflow-architecture.md
14
+ */
15
+ import type { BaseMessage } from '@langchain/core/messages';
16
+ /** Result of scanning messages for attached documents */
17
+ export interface DocumentDetectionResult {
18
+ /** Total unique documents detected */
19
+ count: number;
20
+ /** Names of detected documents */
21
+ names: string[];
22
+ }
23
+ /**
24
+ * Scan messages for attached documents using known content patterns.
25
+ *
26
+ * Detects documents from:
27
+ * 1. `# "filename"` headers in "Attached document(s):" blocks (text content)
28
+ * 2. `**filename1, filename2**` in "The user has attached:" blocks (embedded files)
29
+ *
30
+ * @param messages - Conversation messages to scan
31
+ * @returns Document count and names (deduplicated)
32
+ */
33
+ export declare function detectDocuments(messages: BaseMessage[]): DocumentDetectionResult;
34
+ /**
35
+ * Determine whether the multi-document delegation hint should be injected.
36
+ *
37
+ * Only fires on the first iteration (before any AI response) when the
38
+ * document count meets the threshold. This ensures the agent delegates
39
+ * upfront rather than trying to process all documents itself.
40
+ *
41
+ * @param documentCount - Number of detected documents
42
+ * @param hasAiResponse - Whether the agent has already responded in this chain
43
+ * @returns Whether to inject the delegation hint
44
+ */
45
+ export declare function shouldInjectMultiDocHint(documentCount: number, hasAiResponse: boolean): boolean;
46
+ /**
47
+ * Build the multi-document delegation hint message content.
48
+ *
49
+ * @param documentCount - Number of detected documents
50
+ * @param documentNames - Names of detected documents
51
+ * @returns Message content string for injection as HumanMessage
52
+ */
53
+ export declare function buildMultiDocHintContent(documentCount: number, documentNames: string[]): string;
54
+ /**
55
+ * Build the post-prune context note injected after messages are pruned
56
+ * and summarized. No token numbers — just a contextual signal that
57
+ * earlier conversation was compressed.
58
+ *
59
+ * @param discardedCount - Number of messages that were pruned
60
+ * @param hasSummary - Whether a summary was successfully generated
61
+ * @returns Message content string for injection as SystemMessage, or null if no note needed
62
+ */
63
+ export declare function buildPostPruneNote(discardedCount: number, hasSummary: boolean): string | null;
64
+ /**
65
+ * Check whether a tool named "task" exists in the agent's tool set.
66
+ *
67
+ * @param tools - Array of tool objects or structured tools
68
+ * @returns Whether the task tool is available
69
+ */
70
+ export declare function hasTaskTool(tools: Array<{
71
+ name?: string;
72
+ } | unknown> | undefined): boolean;
@@ -8,3 +8,4 @@ export * from './toonFormat';
8
8
  export * from './contextAnalytics';
9
9
  export * from './schema';
10
10
  export * from './toolCallContinuation';
11
+ export * from './contextPressure';
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@illuma-ai/agents",
3
- "version": "1.0.94",
3
+ "version": "1.0.98",
4
4
  "main": "./dist/cjs/main.cjs",
5
5
  "module": "./dist/esm/main.mjs",
6
6
  "types": "./dist/types/index.d.ts",
@@ -19,3 +19,29 @@ export const MIN_THINKING_BUDGET = 1024;
19
19
  * compounding across multi-tool conversations (e.g., 10 tool calls).
20
20
  */
21
21
  export const TOOL_TURN_THINKING_BUDGET = 1024;
22
+
23
+ // ============================================================================
24
+ // CONTEXT OVERFLOW MANAGEMENT
25
+ //
26
+ // Context overflow is handled mechanically — no token budget numbers are
27
+ // exposed to the LLM. The system uses: pruning (Graph), summarization
28
+ // (summarizeCallback), and auto-continuation (client.js max_tokens detection).
29
+ //
30
+ // See: docs/context-overflow-architecture.md
31
+ // ============================================================================
32
+
33
+ /**
34
+ * Minimum number of attached documents before the multi-document delegation
35
+ * hint is injected. Below this threshold, the agent processes documents
36
+ * directly within its own context.
37
+ */
38
+ export const MULTI_DOCUMENT_THRESHOLD = 3;
39
+
40
+ /**
41
+ * Context utilization safety buffer multiplier (0-1).
42
+ * Applied as: effectiveMax = (maxContextTokens - maxOutputTokens) * CONTEXT_SAFETY_BUFFER
43
+ *
44
+ * Reserves headroom so the LLM doesn't hit hard token limits mid-generation.
45
+ * 0.9 = 10% reserved for safety.
46
+ */
47
+ export const CONTEXT_SAFETY_BUFFER = 0.9;
package/src/events.ts CHANGED
@@ -51,14 +51,6 @@ export class ModelEndHandler implements t.EventHandler {
51
51
  return handleToolCalls(data?.output?.tool_calls, metadata, graph);
52
52
  }
53
53
 
54
- console.log(`====== ${event.toUpperCase()} ======`);
55
- console.dir(
56
- {
57
- usage,
58
- },
59
- { depth: null }
60
- );
61
-
62
54
  const agentContext = graph.getAgentContext(metadata);
63
55
 
64
56
  if (
@@ -69,6 +69,13 @@ import { getChatModelClass, manualToolStreamProviders } from '@/llm/providers';
69
69
  import { ToolNode as CustomToolNode, toolsCondition } from '@/tools/ToolNode';
70
70
  import { ChatOpenAI, AzureChatOpenAI } from '@/llm/openai';
71
71
  import { safeDispatchCustomEvent } from '@/utils/events';
72
+ import {
73
+ detectDocuments,
74
+ shouldInjectMultiDocHint,
75
+ buildMultiDocHintContent,
76
+ buildPostPruneNote,
77
+ hasTaskTool,
78
+ } from '@/utils/contextPressure';
72
79
  import { createSchemaOnlyTools } from '@/tools/schema';
73
80
  import { prepareSchemaForProvider } from '@/schemas/validate';
74
81
  import { AgentContext } from '@/agents/AgentContext';
@@ -1367,45 +1374,12 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
1367
1374
 
1368
1375
  // ====================================================================
1369
1376
  // PRE-PRUNING DELEGATION CHECK
1370
- // Before pruning strips messages (losing context), check if we should
1371
- // delegate instead. If context would be pruned AND the agent has the
1372
- // task tool, inject a delegation hint and SKIP pruning — preserving
1373
- // the content for the LLM to understand what to delegate.
1374
1377
  // ====================================================================
1375
- let delegationInjectedPrePrune = false;
1376
- const hasTaskToolPrePrune = agentContext.tools?.some((tool) => {
1377
- const toolName =
1378
- typeof tool === 'object' && 'name' in tool
1379
- ? (tool as { name: string }).name
1380
- : '';
1381
- return toolName === 'task';
1382
- });
1383
-
1384
- if (
1385
- hasTaskToolPrePrune === true &&
1386
- agentContext.tokenCounter &&
1387
- agentContext.maxContextTokens != null
1388
- ) {
1389
- // Estimate total tokens in messages BEFORE pruning
1390
- let prePruneTokens = 0;
1391
- for (const msg of messages) {
1392
- prePruneTokens += agentContext.tokenCounter(msg);
1393
- }
1394
- // Add instruction tokens (system prompt)
1395
- prePruneTokens += agentContext.instructionTokens;
1396
-
1397
- const prePruneUtilization =
1398
- (prePruneTokens / agentContext.maxContextTokens) * 100;
1399
-
1400
- if (prePruneUtilization > 70) {
1401
- console.warn(
1402
- `[Graph] PRE-PRUNE delegation check: ${prePruneUtilization.toFixed(1)}% utilization ` +
1403
- `(${prePruneTokens}/${agentContext.maxContextTokens} tokens). ` +
1404
- 'Injecting delegation hint INSTEAD of pruning.'
1405
- );
1406
- delegationInjectedPrePrune = true;
1407
- }
1408
- }
1378
+ // Context management is now fully mechanical:
1379
+ // - Pruning always runs when needed (no delegation-based skip)
1380
+ // - Auto-continuation in client.js handles max_tokens finish reason
1381
+ // - LLM never sees raw token numbers (prevents voluntary bail-out)
1382
+ // ====================================================================
1409
1383
 
1410
1384
  if (
1411
1385
  !agentContext.pruneMessages &&
@@ -1436,8 +1410,8 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
1436
1410
  });
1437
1411
  }
1438
1412
 
1439
- if (agentContext.pruneMessages && !delegationInjectedPrePrune) {
1440
- console.info(
1413
+ if (agentContext.pruneMessages) {
1414
+ console.debug(
1441
1415
  `[Graph:ContextMgmt] Pruning messages | inputCount=${messages.length} | maxTokens=${agentContext.maxContextTokens}`
1442
1416
  );
1443
1417
  const { context, indexTokenCountMap, messagesToRefine } =
@@ -1448,22 +1422,24 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
1448
1422
  });
1449
1423
  agentContext.indexTokenCountMap = indexTokenCountMap;
1450
1424
  messagesToUse = context;
1451
- console.info(
1425
+ console.debug(
1452
1426
  `[Graph:ContextMgmt] Pruned | kept=${context.length} | discarded=${messagesToRefine.length} | originalCount=${messages.length}`
1453
1427
  );
1454
1428
 
1455
1429
  // Summarize discarded messages if callback provided
1430
+ let hasSummary = false;
1456
1431
  if (messagesToRefine.length > 0 && agentContext.summarizeCallback) {
1457
- console.info(
1432
+ console.debug(
1458
1433
  `[Graph:ContextMgmt] Summarizing ${messagesToRefine.length} discarded messages`
1459
1434
  );
1460
1435
  try {
1461
1436
  const summary =
1462
1437
  await agentContext.summarizeCallback(messagesToRefine);
1463
- console.info(
1438
+ console.debug(
1464
1439
  `[Graph:ContextMgmt] Summary received | len=${summary?.length ?? 0} | hasContent=${summary != null && summary !== ''}`
1465
1440
  );
1466
1441
  if (summary != null && summary !== '') {
1442
+ hasSummary = true;
1467
1443
  const summaryMsg = new SystemMessage(
1468
1444
  `[Conversation Summary]\n${summary}`
1469
1445
  );
@@ -1475,7 +1451,7 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
1475
1451
  summaryMsg,
1476
1452
  ...messagesToUse.slice(systemIdx),
1477
1453
  ];
1478
- console.info(
1454
+ console.debug(
1479
1455
  `[Graph:ContextMgmt] Summary injected at index ${systemIdx} | finalMsgCount=${messagesToUse.length}`
1480
1456
  );
1481
1457
  }
@@ -1483,10 +1459,21 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
1483
1459
  console.error('[Graph] Summarization callback failed:', err);
1484
1460
  }
1485
1461
  }
1486
- } else if (delegationInjectedPrePrune) {
1487
- console.info(
1488
- '[Graph] Skipping pruning delegation will handle context pressure'
1489
- );
1462
+
1463
+ // Post-prune context note: inform the LLM that context was compressed
1464
+ // without exposing token numbers (prevents voluntary bail-out)
1465
+ if (messagesToRefine.length > 0 && hasTaskTool(agentContext.tools)) {
1466
+ const postPruneNote = buildPostPruneNote(
1467
+ messagesToRefine.length,
1468
+ hasSummary
1469
+ );
1470
+ if (postPruneNote) {
1471
+ messagesToUse = [...messagesToUse, new SystemMessage(postPruneNote)];
1472
+ console.debug(
1473
+ `[Graph:ContextMgmt] Post-prune note injected | hasSummary=${hasSummary} | discarded=${messagesToRefine.length}`
1474
+ );
1475
+ }
1476
+ }
1490
1477
  }
1491
1478
 
1492
1479
  let finalMessages = messagesToUse;
@@ -1645,106 +1632,32 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
1645
1632
  );
1646
1633
 
1647
1634
  // ====================================================================
1648
- // CONTEXT PRESSURE AWARENESS Intelligent Sub-Agent Delegation
1649
- //
1650
- // Two triggers for delegation hints:
1651
- // 1. DOCUMENT COUNT: When 3+ documents are detected in the conversation,
1652
- // inject a delegation hint on the FIRST iteration (before the LLM
1653
- // has called any tools). This ensures the agent delegates upfront
1654
- // rather than trying to process all documents itself.
1655
- // 2. TOKEN UTILIZATION: At EVERY iteration, if context is filling up
1656
- // (70%/85%), inject escalating hints to delegate remaining work.
1635
+ // MULTI-DOCUMENT DELEGATION (task-driven, not budget-driven)
1657
1636
  //
1658
- // This runs mid-chain so even if tool responses push context up
1659
- // after the first LLM call, subsequent iterations get the hint.
1637
+ // Token-based pressure hints have been removed the LLM never sees
1638
+ // raw token numbers. Context overflow is handled mechanically by
1639
+ // pruning (Graph) + auto-continuation (client.js max_tokens detection).
1640
+ // See: docs/context-overflow-architecture.md
1660
1641
  // ====================================================================
1661
- const hasTaskToolInContext = agentContext.tools?.some((tool) => {
1662
- const toolName =
1663
- typeof tool === 'object' && 'name' in tool
1664
- ? (tool as { name: string }).name
1665
- : '';
1666
- return toolName === 'task';
1667
- });
1668
-
1669
- if (
1670
- hasTaskToolInContext === true &&
1671
- contextAnalytics.utilizationPercent != null &&
1672
- contextAnalytics.maxContextTokens != null
1673
- ) {
1674
- const utilization = contextAnalytics.utilizationPercent;
1675
- const totalTokens = contextAnalytics.totalTokens;
1676
- const maxTokens = contextAnalytics.maxContextTokens;
1677
- const remainingTokens = maxTokens - totalTokens;
1678
-
1679
- // Count attached documents by scanning for document patterns in HumanMessages:
1680
- // 1. # "filename" headers in "Attached document(s):" blocks (text content)
1681
- // 2. **filename1, filename2** in "The user has attached:" blocks (embedded files)
1682
- // 3. Filenames in file_search tool results
1683
- let documentCount = 0;
1684
- const documentNames: string[] = [];
1685
- for (const msg of finalMessages) {
1686
- const content =
1687
- typeof msg.content === 'string'
1688
- ? msg.content
1689
- : Array.isArray(msg.content)
1690
- ? msg.content
1691
- .map((p: unknown) => {
1692
- const part = p as Record<string, unknown>;
1693
- return String(part.text ?? part.content ?? '');
1694
- })
1695
- .join(' ')
1696
- : '';
1697
- // Pattern 1: # "filename" headers in attached document blocks
1698
- const docMatches = content.match(/# "([^"]+)"/g);
1699
- if (docMatches) {
1700
- for (const match of docMatches) {
1701
- const name = match.replace(/# "/, '').replace(/"$/, '');
1702
- if (!documentNames.includes(name)) {
1703
- documentNames.push(name);
1704
- documentCount++;
1705
- }
1706
- }
1707
- }
1708
- // Pattern 2: "The user has attached: **file1, file2**" (embedded files)
1709
- const attachedMatch = content.match(
1710
- /user has attached:\s*\*\*([^*]+)\*\*/i
1642
+ if (hasTaskTool(agentContext.tools)) {
1643
+ const { count: documentCount, names: documentNames } =
1644
+ detectDocuments(finalMessages);
1645
+
1646
+ // Observability log (no token numbers exposed to LLM)
1647
+ if (contextAnalytics.utilizationPercent != null) {
1648
+ console.debug(
1649
+ `[Graph] Context utilization: ${contextAnalytics.utilizationPercent.toFixed(1)}% | ` +
1650
+ `messages: ${finalMessages.length} | docs: ${documentCount}`
1711
1651
  );
1712
- if (attachedMatch) {
1713
- const names = attachedMatch[1]
1714
- .split(',')
1715
- .map((n: string) => n.trim())
1716
- .filter(Boolean);
1717
- for (const name of names) {
1718
- if (!documentNames.includes(name)) {
1719
- documentNames.push(name);
1720
- documentCount++;
1721
- }
1722
- }
1723
- }
1724
1652
  }
1725
1653
 
1726
- // BASELINE LOG: Always fires so we can verify this code path runs
1727
- console.info(
1728
- `[Graph] Context utilization: ${utilization.toFixed(1)}% ` +
1729
- `(${totalTokens}/${maxTokens} tokens, ${remainingTokens} remaining) | ` +
1730
- `hasTaskTool: true | messages: ${finalMessages.length} | docs: ${documentCount}`
1731
- );
1732
-
1733
- // TRIGGER 1: Multi-document delegation (3+ documents detected)
1734
- // Only inject on first iteration (no AI messages yet = agent hasn't responded)
1654
+ // Multi-document delegation: first iteration only (before AI has responded)
1735
1655
  const hasAiResponse = finalMessages.some(
1736
1656
  (m) => m._getType() === 'ai' || m._getType() === 'tool'
1737
1657
  );
1738
- if (documentCount >= 3 && !hasAiResponse) {
1658
+ if (shouldInjectMultiDocHint(documentCount, hasAiResponse)) {
1739
1659
  const pressureMsg = new HumanMessage({
1740
- content:
1741
- `[MULTI-DOCUMENT PROCESSING — ${documentCount} documents detected]\n` +
1742
- `Documents: ${documentNames.join(', ')}\n\n` +
1743
- `You have ${documentCount} documents attached. For thorough analysis, use the "task" tool ` +
1744
- 'to delegate each document (or group of related documents) to a sub-agent.\n' +
1745
- 'Each sub-agent has its own fresh context window and can use file_search to retrieve the full document content.\n' +
1746
- 'After all sub-agents complete, synthesize their results into a comprehensive response.\n\n' +
1747
- 'This approach ensures each document gets full attention without context limitations.',
1660
+ content: buildMultiDocHintContent(documentCount, documentNames),
1748
1661
  });
1749
1662
  finalMessages = [...finalMessages, pressureMsg];
1750
1663
  console.info(
@@ -1752,43 +1665,6 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
1752
1665
  `${documentNames.join(', ')}`
1753
1666
  );
1754
1667
  }
1755
-
1756
- // TRIGGER 2: Token utilization thresholds (mid-chain safety net)
1757
- // Also fires when we skipped pruning due to delegationInjectedPrePrune
1758
- if (
1759
- utilization > 85 ||
1760
- (delegationInjectedPrePrune && utilization > 50)
1761
- ) {
1762
- // CRITICAL: Context is high — MANDATE delegation
1763
- const pressureMsg = new HumanMessage({
1764
- content:
1765
- `[CONTEXT BUDGET CRITICAL — ${utilization.toFixed(0)}% used]\n` +
1766
- `You have used ${totalTokens} of ${maxTokens} tokens (${remainingTokens} remaining).\n` +
1767
- 'Your context is very large. You MUST use the "task" tool to delegate work to sub-agents.\n' +
1768
- 'Each sub-agent runs in its own fresh context window and can use file_search to access documents.\n' +
1769
- 'Do NOT attempt to process documents directly — delegate each document to a sub-agent, then synthesize results.',
1770
- });
1771
- finalMessages = [...finalMessages, pressureMsg];
1772
- console.warn(
1773
- `[Graph] Context pressure CRITICAL (${utilization.toFixed(0)}%): ` +
1774
- `Injected mandatory delegation hint. ${remainingTokens} tokens remaining. ` +
1775
- `prePruneSkipped: ${delegationInjectedPrePrune}`
1776
- );
1777
- } else if (utilization > 70) {
1778
- // WARNING: Context filling up — suggest delegation
1779
- const pressureMsg = new HumanMessage({
1780
- content:
1781
- `[CONTEXT BUDGET WARNING — ${utilization.toFixed(0)}% used]\n` +
1782
- `You have used ${totalTokens} of ${maxTokens} tokens (${remainingTokens} remaining).\n` +
1783
- 'Your context is filling up. Consider using the "task" tool to delegate complex operations to sub-agents.\n' +
1784
- "Sub-agents run in fresh context windows and won't consume your remaining budget.",
1785
- });
1786
- finalMessages = [...finalMessages, pressureMsg];
1787
- console.info(
1788
- `[Graph] Context pressure WARNING (${utilization.toFixed(0)}%): ` +
1789
- `Injected delegation suggestion. ${remainingTokens} tokens remaining.`
1790
- );
1791
- }
1792
1668
  }
1793
1669
 
1794
1670
  // Structured output mode: when the agent has NO tools, produce structured JSON immediately.
@@ -2302,7 +2178,7 @@ If I seem to be missing something we discussed earlier, just give me a quick rem
2302
2178
  reducer: (a, b) => {
2303
2179
  if (!a.length) {
2304
2180
  this.startIndex = a.length + b.length;
2305
- console.info(
2181
+ console.debug(
2306
2182
  `[Graph:Reducer] Initial messages | startIndex=${this.startIndex} | inputMsgCount=${b.length}`
2307
2183
  );
2308
2184
  } else {
@@ -596,30 +596,38 @@ describe('Pre-invocation utilization gate', () => {
596
596
  expect(emergency.length).toBeLessThan(2000); // Emergency summaries are compact
597
597
  });
598
598
 
599
- it('injects delegation hint at >70% utilization for agents with task tool', () => {
600
- const utilization = 75;
601
- const hasTaskTool = true;
602
-
603
- if (utilization > 70 && hasTaskTool) {
604
- const delegationHint = new HumanMessage({
605
- content:
606
- '[System] Context window is at 75% capacity. Consider delegating complex sub-tasks ' +
607
- 'to the task tool to maintain context availability.',
608
- });
609
- expect(delegationHint.content).toContain('75%');
610
- expect(delegationHint.content).toContain('task tool');
599
+ it('does NOT inject token budget hints at any utilization level', () => {
600
+ // Token budget hints were removed to prevent LLM voluntary bail-out.
601
+ // Context overflow is handled mechanically by pruning + auto-continuation.
602
+ // See: docs/context-overflow-architecture.md
603
+ const utilizationLevels = [50, 70, 85, 95, 101];
604
+ for (const utilization of utilizationLevels) {
605
+ const messages = buildConversation(10, 200);
606
+ // No message should contain raw token numbers or budget percentages
607
+ for (const msg of messages) {
608
+ const content =
609
+ typeof msg.content === 'string'
610
+ ? msg.content
611
+ : JSON.stringify(msg.content);
612
+ expect(content).not.toMatch(/CONTEXT BUDGET/);
613
+ expect(content).not.toMatch(/\d+ of \d+ tokens/);
614
+ }
611
615
  }
612
616
  });
613
617
 
614
- it('does not inject delegation hint below 70%', () => {
615
- const utilization = 65;
616
- let delegationInjected = false;
617
-
618
- if (utilization > 70) {
619
- delegationInjected = true;
618
+ it('post-prune note does not contain token numbers', () => {
619
+ // After pruning, a context note is injected but it must not
620
+ // expose any token counts or budget percentages to the LLM
621
+ const { buildPostPruneNote } = require('@/utils/contextPressure');
622
+ const noteWithSummary = buildPostPruneNote(10, true);
623
+ const noteWithout = buildPostPruneNote(10, false);
624
+ for (const note of [noteWithSummary, noteWithout]) {
625
+ expect(note).not.toBeNull();
626
+ expect(note).not.toMatch(/\d+%/);
627
+ expect(note).not.toMatch(/\d+ of \d+ tokens/);
628
+ expect(note).not.toMatch(/BUDGET/i);
629
+ expect(note).toContain('task');
620
630
  }
621
-
622
- expect(delegationInjected).toBe(false);
623
631
  });
624
632
  });
625
633
 
@@ -20,7 +20,7 @@ config({
20
20
  import { HumanMessage, ToolMessage } from '@langchain/core/messages';
21
21
  import type { RunnableConfig } from '@langchain/core/runnables';
22
22
  import type * as t from '@/types';
23
- import { Providers, Constants, GraphEvents } from '@/common';
23
+ import { Providers, Constants, GraphEvents, EdgeType } from '@/common';
24
24
  import { StandardGraph } from '@/graphs/Graph';
25
25
  import { ChatModelStreamHandler, createContentAggregator } from '@/stream';
26
26
  import { ToolEndHandler, ModelEndHandler } from '@/events';
@@ -118,13 +118,13 @@ describeIf('Agent Handoff E2E with Bedrock', () => {
118
118
  {
119
119
  from: 'supervisor_abc123',
120
120
  to: 'agent_W47hBnn2RoVZEOy5595GC',
121
- edgeType: 'handoff',
121
+ edgeType: EdgeType.HANDOFF,
122
122
  // No description - should auto-generate from agent name + description
123
123
  },
124
124
  {
125
125
  from: 'supervisor_abc123',
126
126
  to: 'agent_X92kLmn4TpQR8vw3221HD',
127
- edgeType: 'handoff',
127
+ edgeType: EdgeType.HANDOFF,
128
128
  // No description
129
129
  },
130
130
  ];
@@ -203,8 +203,8 @@ describeIf('Agent Handoff E2E with Bedrock', () => {
203
203
  ];
204
204
 
205
205
  const edges: t.GraphEdge[] = [
206
- { from: 'router', to: 'sales_agent', edgeType: 'handoff' },
207
- { from: 'router', to: 'support_agent', edgeType: 'handoff' },
206
+ { from: 'router', to: 'sales_agent', edgeType: EdgeType.HANDOFF },
207
+ { from: 'router', to: 'support_agent', edgeType: EdgeType.HANDOFF },
208
208
  ];
209
209
 
210
210
  const { contentParts: _contentParts, aggregateContent } =
@@ -321,8 +321,8 @@ describeIf('Agent Handoff E2E with Bedrock', () => {
321
321
  ];
322
322
 
323
323
  const edges: t.GraphEdge[] = [
324
- { from: 'router', to: 'sales_agent', edgeType: 'handoff' },
325
- { from: 'router', to: 'support_agent', edgeType: 'handoff' },
324
+ { from: 'router', to: 'sales_agent', edgeType: EdgeType.HANDOFF },
325
+ { from: 'router', to: 'support_agent', edgeType: EdgeType.HANDOFF },
326
326
  ];
327
327
 
328
328
  const { contentParts: _contentParts, aggregateContent } =