@illuma-ai/agents 1.0.94 → 1.0.98

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. package/dist/cjs/common/constants.cjs +25 -0
  2. package/dist/cjs/common/constants.cjs.map +1 -1
  3. package/dist/cjs/events.cjs +0 -4
  4. package/dist/cjs/events.cjs.map +1 -1
  5. package/dist/cjs/graphs/Graph.cjs +38 -148
  6. package/dist/cjs/graphs/Graph.cjs.map +1 -1
  7. package/dist/cjs/main.cjs +8 -0
  8. package/dist/cjs/main.cjs.map +1 -1
  9. package/dist/cjs/tools/CodeExecutor.cjs +5 -0
  10. package/dist/cjs/tools/CodeExecutor.cjs.map +1 -1
  11. package/dist/cjs/tools/ProgrammaticToolCalling.cjs +12 -6
  12. package/dist/cjs/tools/ProgrammaticToolCalling.cjs.map +1 -1
  13. package/dist/cjs/tools/ToolSearch.cjs +14 -10
  14. package/dist/cjs/tools/ToolSearch.cjs.map +1 -1
  15. package/dist/cjs/tools/handlers.cjs +0 -2
  16. package/dist/cjs/tools/handlers.cjs.map +1 -1
  17. package/dist/cjs/tools/search/search.cjs +12 -4
  18. package/dist/cjs/tools/search/search.cjs.map +1 -1
  19. package/dist/cjs/tools/search/tool.cjs +2 -1
  20. package/dist/cjs/tools/search/tool.cjs.map +1 -1
  21. package/dist/cjs/utils/contextPressure.cjs +154 -0
  22. package/dist/cjs/utils/contextPressure.cjs.map +1 -0
  23. package/dist/esm/common/constants.mjs +24 -1
  24. package/dist/esm/common/constants.mjs.map +1 -1
  25. package/dist/esm/events.mjs +0 -4
  26. package/dist/esm/events.mjs.map +1 -1
  27. package/dist/esm/graphs/Graph.mjs +38 -148
  28. package/dist/esm/graphs/Graph.mjs.map +1 -1
  29. package/dist/esm/main.mjs +2 -1
  30. package/dist/esm/main.mjs.map +1 -1
  31. package/dist/esm/tools/CodeExecutor.mjs +5 -0
  32. package/dist/esm/tools/CodeExecutor.mjs.map +1 -1
  33. package/dist/esm/tools/ProgrammaticToolCalling.mjs +12 -6
  34. package/dist/esm/tools/ProgrammaticToolCalling.mjs.map +1 -1
  35. package/dist/esm/tools/ToolSearch.mjs +14 -10
  36. package/dist/esm/tools/ToolSearch.mjs.map +1 -1
  37. package/dist/esm/tools/handlers.mjs +0 -2
  38. package/dist/esm/tools/handlers.mjs.map +1 -1
  39. package/dist/esm/tools/search/search.mjs +12 -4
  40. package/dist/esm/tools/search/search.mjs.map +1 -1
  41. package/dist/esm/tools/search/tool.mjs +2 -1
  42. package/dist/esm/tools/search/tool.mjs.map +1 -1
  43. package/dist/esm/utils/contextPressure.mjs +148 -0
  44. package/dist/esm/utils/contextPressure.mjs.map +1 -0
  45. package/dist/types/common/constants.d.ts +14 -0
  46. package/dist/types/tools/ProgrammaticToolCalling.d.ts +2 -2
  47. package/dist/types/tools/search/types.d.ts +3 -0
  48. package/dist/types/utils/contextPressure.d.ts +72 -0
  49. package/dist/types/utils/index.d.ts +1 -0
  50. package/package.json +1 -1
  51. package/src/common/constants.ts +26 -0
  52. package/src/events.ts +0 -8
  53. package/src/graphs/Graph.ts +53 -177
  54. package/src/graphs/contextManagement.e2e.test.ts +28 -20
  55. package/src/specs/agent-handoffs-bedrock.integration.test.ts +7 -7
  56. package/src/specs/agent-handoffs.test.ts +36 -36
  57. package/src/specs/thinking-handoff.test.ts +10 -10
  58. package/src/tools/CodeExecutor.ts +6 -0
  59. package/src/tools/ProgrammaticToolCalling.ts +23 -6
  60. package/src/tools/ToolSearch.ts +14 -10
  61. package/src/tools/handlers.ts +0 -4
  62. package/src/tools/search/search.ts +15 -3
  63. package/src/tools/search/tool.ts +2 -0
  64. package/src/tools/search/types.ts +3 -0
  65. package/src/utils/contextPressure.test.ts +247 -0
  66. package/src/utils/contextPressure.ts +188 -0
  67. package/src/utils/index.ts +1 -0
@@ -19,7 +19,32 @@ const MIN_THINKING_BUDGET = 1024;
19
19
  * compounding across multi-tool conversations (e.g., 10 tool calls).
20
20
  */
21
21
  const TOOL_TURN_THINKING_BUDGET = 1024;
22
+ // ============================================================================
23
+ // CONTEXT OVERFLOW MANAGEMENT
24
+ //
25
+ // Context overflow is handled mechanically — no token budget numbers are
26
+ // exposed to the LLM. The system uses: pruning (Graph), summarization
27
+ // (summarizeCallback), and auto-continuation (client.js max_tokens detection).
28
+ //
29
+ // See: docs/context-overflow-architecture.md
30
+ // ============================================================================
31
+ /**
32
+ * Minimum number of attached documents before the multi-document delegation
33
+ * hint is injected. Below this threshold, the agent processes documents
34
+ * directly within its own context.
35
+ */
36
+ const MULTI_DOCUMENT_THRESHOLD = 3;
37
+ /**
38
+ * Context utilization safety buffer multiplier (0-1).
39
+ * Applied as: effectiveMax = (maxContextTokens - maxOutputTokens) * CONTEXT_SAFETY_BUFFER
40
+ *
41
+ * Reserves headroom so the LLM doesn't hit hard token limits mid-generation.
42
+ * 0.9 = 10% reserved for safety.
43
+ */
44
+ const CONTEXT_SAFETY_BUFFER = 0.9;
22
45
 
46
+ exports.CONTEXT_SAFETY_BUFFER = CONTEXT_SAFETY_BUFFER;
23
47
  exports.MIN_THINKING_BUDGET = MIN_THINKING_BUDGET;
48
+ exports.MULTI_DOCUMENT_THRESHOLD = MULTI_DOCUMENT_THRESHOLD;
24
49
  exports.TOOL_TURN_THINKING_BUDGET = TOOL_TURN_THINKING_BUDGET;
25
50
  //# sourceMappingURL=constants.cjs.map
@@ -1 +1 @@
1
- {"version":3,"file":"constants.cjs","sources":["../../../src/common/constants.ts"],"sourcesContent":["// src/common/constants.ts\n\n/**\n * Minimum thinking budget allowed by the Anthropic API.\n * Extended thinking requires at least 1024 budget_tokens.\n * @see https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking\n */\nexport const MIN_THINKING_BUDGET = 1024;\n\n/**\n * Reduced thinking budget for subsequent ReAct iterations (tool-result turns).\n *\n * In a ReAct agent loop, the first LLM call processes the user's query and\n * may need deep reasoning. Subsequent iterations (after tool results return)\n * typically only need to decide \"call next tool\" or \"generate final response\"\n * — 1024 tokens is sufficient for this routing logic.\n *\n * This reduces wall-clock time per iteration from ~20-30s to ~5-10s,\n * compounding across multi-tool conversations (e.g., 10 tool calls).\n */\nexport const TOOL_TURN_THINKING_BUDGET = 1024;\n"],"names":[],"mappings":";;AAAA;AAEA;;;;AAIG;AACI,MAAM,mBAAmB,GAAG;AAEnC;;;;;;;;;;AAUG;AACI,MAAM,yBAAyB,GAAG;;;;;"}
1
+ {"version":3,"file":"constants.cjs","sources":["../../../src/common/constants.ts"],"sourcesContent":["// src/common/constants.ts\n\n/**\n * Minimum thinking budget allowed by the Anthropic API.\n * Extended thinking requires at least 1024 budget_tokens.\n * @see https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking\n */\nexport const MIN_THINKING_BUDGET = 1024;\n\n/**\n * Reduced thinking budget for subsequent ReAct iterations (tool-result turns).\n *\n * In a ReAct agent loop, the first LLM call processes the user's query and\n * may need deep reasoning. Subsequent iterations (after tool results return)\n * typically only need to decide \"call next tool\" or \"generate final response\"\n * — 1024 tokens is sufficient for this routing logic.\n *\n * This reduces wall-clock time per iteration from ~20-30s to ~5-10s,\n * compounding across multi-tool conversations (e.g., 10 tool calls).\n */\nexport const TOOL_TURN_THINKING_BUDGET = 1024;\n\n// ============================================================================\n// CONTEXT OVERFLOW MANAGEMENT\n//\n// Context overflow is handled mechanically — no token budget numbers are\n// exposed to the LLM. The system uses: pruning (Graph), summarization\n// (summarizeCallback), and auto-continuation (client.js max_tokens detection).\n//\n// See: docs/context-overflow-architecture.md\n// ============================================================================\n\n/**\n * Minimum number of attached documents before the multi-document delegation\n * hint is injected. Below this threshold, the agent processes documents\n * directly within its own context.\n */\nexport const MULTI_DOCUMENT_THRESHOLD = 3;\n\n/**\n * Context utilization safety buffer multiplier (0-1).\n * Applied as: effectiveMax = (maxContextTokens - maxOutputTokens) * CONTEXT_SAFETY_BUFFER\n *\n * Reserves headroom so the LLM doesn't hit hard token limits mid-generation.\n * 0.9 = 10% reserved for safety.\n */\nexport const CONTEXT_SAFETY_BUFFER = 0.9;\n"],"names":[],"mappings":";;AAAA;AAEA;;;;AAIG;AACI,MAAM,mBAAmB,GAAG;AAEnC;;;;;;;;;;AAUG;AACI,MAAM,yBAAyB,GAAG;AAEzC;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AACA;AAEA;;;;AAIG;AACI,MAAM,wBAAwB,GAAG;AAExC;;;;;;AAMG;AACI,MAAM,qBAAqB,GAAG;;;;;;;"}
@@ -32,10 +32,6 @@ class ModelEndHandler {
32
32
  if (metadata.ls_provider === 'FakeListChatModel') {
33
33
  return handlers.handleToolCalls(data?.output?.tool_calls, metadata, graph);
34
34
  }
35
- console.log(`====== ${event.toUpperCase()} ======`);
36
- console.dir({
37
- usage,
38
- }, { depth: null });
39
35
  const agentContext = graph.getAgentContext(metadata);
40
36
  if (agentContext.provider !== _enum.Providers.GOOGLE &&
41
37
  agentContext.provider !== _enum.Providers.BEDROCK) {
@@ -1 +1 @@
1
- {"version":3,"file":"events.cjs","sources":["../../src/events.ts"],"sourcesContent":["/* eslint-disable no-console */\n// src/events.ts\nimport type {\n BaseMessageFields,\n UsageMetadata,\n} from '@langchain/core/messages';\nimport type { MultiAgentGraph, StandardGraph } from '@/graphs';\nimport type { Logger } from 'winston';\nimport type * as t from '@/types';\nimport { handleToolCalls } from '@/tools/handlers';\nimport { Constants, Providers } from '@/common';\n\nexport class HandlerRegistry {\n private handlers: Map<string, t.EventHandler> = new Map();\n\n register(eventType: string, handler: t.EventHandler): void {\n this.handlers.set(eventType, handler);\n }\n\n getHandler(eventType: string): t.EventHandler | undefined {\n return this.handlers.get(eventType);\n }\n}\n\nexport class ModelEndHandler implements t.EventHandler {\n collectedUsage?: UsageMetadata[];\n constructor(collectedUsage?: UsageMetadata[]) {\n if (collectedUsage && !Array.isArray(collectedUsage)) {\n throw new Error('collectedUsage must be an array');\n }\n this.collectedUsage = collectedUsage;\n }\n\n async handle(\n event: string,\n data: t.ModelEndData,\n metadata?: Record<string, unknown>,\n graph?: StandardGraph | MultiAgentGraph\n ): Promise<void> {\n if (!graph || !metadata) {\n console.warn(`Graph or metadata not found in ${event} event`);\n return;\n }\n\n const usage = data?.output?.usage_metadata;\n if (usage != null && this.collectedUsage != null) {\n this.collectedUsage.push(usage);\n }\n\n if (metadata.ls_provider === 'FakeListChatModel') {\n return handleToolCalls(data?.output?.tool_calls, metadata, graph);\n }\n\n console.log(`====== ${event.toUpperCase()} ======`);\n console.dir(\n {\n usage,\n },\n { depth: null }\n );\n\n const agentContext = graph.getAgentContext(metadata);\n\n if (\n agentContext.provider !== Providers.GOOGLE &&\n agentContext.provider !== Providers.BEDROCK\n ) {\n return;\n }\n\n await handleToolCalls(data?.output?.tool_calls, metadata, graph);\n }\n}\n\nexport class ToolEndHandler implements t.EventHandler {\n private callback?: t.ToolEndCallback;\n private logger?: Logger;\n constructor(callback?: t.ToolEndCallback, logger?: Logger) {\n this.callback = callback;\n this.logger = logger;\n }\n\n /**\n * Handles on_tool_end events from the for-await stream consumer.\n *\n * This handler is now purely a consumer callback — tool completion\n * (ON_RUN_STEP_COMPLETED dispatch + session context storage) is handled\n * in graph context by ToolNode directly, eliminating the race between\n * the stream consumer and graph execution.\n */\n async handle(\n event: string,\n data: t.StreamEventData | undefined,\n metadata?: Record<string, unknown>,\n graph?: StandardGraph | MultiAgentGraph\n ): Promise<void> {\n try {\n if (!graph || !metadata) {\n if (this.logger) {\n this.logger.warn(`Graph or metadata not found in ${event} event`);\n } else {\n console.warn(`Graph or metadata not found in ${event} event`);\n }\n return;\n }\n\n const toolEndData = data as t.ToolEndData | undefined;\n if (!toolEndData?.output) {\n if (this.logger) {\n this.logger.warn('No output found in tool_end event');\n } else {\n console.warn('No output found in tool_end event');\n }\n return;\n }\n\n if (metadata[Constants.PROGRAMMATIC_TOOL_CALLING] === true) {\n return;\n }\n\n if (this.callback) {\n await this.callback(toolEndData, metadata);\n }\n } catch (error) {\n if (this.logger) {\n this.logger.error('Error handling tool_end event:', error);\n } else {\n console.error('Error handling tool_end event:', error);\n }\n }\n }\n}\n\nexport class TestLLMStreamHandler implements t.EventHandler {\n handle(event: string, data: t.StreamEventData | undefined): void {\n const chunk = data?.chunk;\n const isMessageChunk = !!(chunk && 'message' in chunk);\n const msg = isMessageChunk ? chunk.message : undefined;\n if (msg && msg.tool_call_chunks && msg.tool_call_chunks.length > 0) {\n console.log(msg.tool_call_chunks);\n } else if (msg && msg.content) {\n if (typeof msg.content === 'string') {\n process.stdout.write(msg.content);\n }\n }\n }\n}\n\nexport class TestChatStreamHandler implements t.EventHandler {\n handle(event: string, data: t.StreamEventData | undefined): void {\n const chunk = data?.chunk;\n const isContentChunk = !!(chunk && 'content' in chunk);\n const content = isContentChunk && chunk.content;\n\n if (!content || !isContentChunk) {\n return;\n }\n\n if (chunk.tool_call_chunks && chunk.tool_call_chunks.length > 0) {\n console.dir(chunk.tool_call_chunks, { depth: null });\n }\n\n if (typeof content === 'string') {\n process.stdout.write(content);\n } else {\n console.dir(content, { depth: null });\n }\n }\n}\n\nexport class LLMStreamHandler implements t.EventHandler {\n handle(\n event: string,\n data: t.StreamEventData | undefined,\n metadata?: Record<string, unknown>\n ): void {\n const chunk = data?.chunk;\n const isMessageChunk = !!(chunk && 'message' in chunk);\n const msg = isMessageChunk && chunk.message;\n if (metadata) {\n console.log(metadata);\n }\n if (msg && msg.tool_call_chunks && msg.tool_call_chunks.length > 0) {\n console.log(msg.tool_call_chunks);\n } else if (msg && msg.content) {\n if (typeof msg.content === 'string') {\n // const text_delta = msg.content;\n // dispatchCustomEvent(GraphEvents.CHAT_MODEL_STREAM, { chunk }, config);\n process.stdout.write(msg.content);\n }\n }\n }\n}\n\nexport const createMetadataAggregator = (\n _collected?: Record<\n string,\n NonNullable<BaseMessageFields['response_metadata']>\n >[]\n): t.MetadataAggregatorResult => {\n const collected = _collected || [];\n\n const handleLLMEnd: t.HandleLLMEnd = (output) => {\n const { generations } = output;\n const lastMessageOutput = (\n generations[generations.length - 1] as\n | (t.StreamGeneration | undefined)[]\n | undefined\n )?.[0];\n if (!lastMessageOutput) {\n return;\n }\n const { message } = lastMessageOutput;\n if (message?.response_metadata) {\n collected.push(message.response_metadata);\n }\n };\n\n return { handleLLMEnd, collected };\n};\n"],"names":["handleToolCalls","Providers","Constants"],"mappings":";;;;;MAYa,eAAe,CAAA;AAClB,IAAA,QAAQ,GAAgC,IAAI,GAAG,EAAE;IAEzD,QAAQ,CAAC,SAAiB,EAAE,OAAuB,EAAA;QACjD,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,SAAS,EAAE,OAAO,CAAC;IACvC;AAEA,IAAA,UAAU,CAAC,SAAiB,EAAA;QAC1B,OAAO,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,SAAS,CAAC;IACrC;AACD;MAEY,eAAe,CAAA;AAC1B,IAAA,cAAc;AACd,IAAA,WAAA,CAAY,cAAgC,EAAA;QAC1C,IAAI,cAAc,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,cAAc,CAAC,EAAE;AACpD,YAAA,MAAM,IAAI,KAAK,CAAC,iCAAiC,CAAC;QACpD;AACA,QAAA,IAAI,CAAC,cAAc,GAAG,cAAc;IACtC;IAEA,MAAM,MAAM,CACV,KAAa,EACb,IAAoB,EACpB,QAAkC,EAClC,KAAuC,EAAA;AAEvC,QAAA,IAAI,CAAC,KAAK,IAAI,CAAC,QAAQ,EAAE;AACvB,YAAA,OAAO,CAAC,IAAI,CAAC,kCAAkC,KAAK,CAAA,MAAA,CAAQ,CAAC;YAC7D;QACF;AAEA,QAAA,MAAM,KAAK,GAAG,IAAI,EAAE,MAAM,EAAE,cAAc;QAC1C,IAAI,KAAK,IAAI,IAAI,IAAI,IAAI,CAAC,cAAc,IAAI,IAAI,EAAE;AAChD,YAAA,IAAI,CAAC,cAAc,CAAC,IAAI,CAAC,KAAK,CAAC;QACjC;AAEA,QAAA,IAAI,QAAQ,CAAC,WAAW,KAAK,mBAAmB,EAAE;AAChD,YAAA,OAAOA,wBAAe,CAAC,IAAI,EAAE,MAAM,EAAE,UAAU,EAAE,QAAQ,EAAE,KAAK,CAAC;QACnE;QAEA,OAAO,CAAC,GAAG,CAAC,CAAA,OAAA,EAAU,KAAK,CAAC,WAAW,EAAE,CAAA,OAAA,CAAS,CAAC;QACnD,OAAO,CAAC,GAAG,CACT;YACE,KAAK;AACN,SAAA,EACD,EAAE,KAAK,EAAE,IAAI,EAAE,CAChB;QAED,MAAM,YAAY,GAAG,KAAK,CAAC,eAAe,CAAC,QAAQ,CAAC;AAEpD,QAAA,IACE,YAAY,CAAC,QAAQ,KAAKC,eAAS,CAAC,MAAM;AAC1C,YAAA,YAAY,CAAC,QAAQ,KAAKA,eAAS,CAAC,OAAO,EAC3C;YACA;QACF;AAEA,QAAA,MAAMD,wBAAe,CAAC,IAAI,EAAE,MAAM,EAAE,UAAU,EAAE,QAAQ,EAAE,KAAK,CAAC;IAClE;AACD;MAEY,cAAc,CAAA;AACjB,IAAA,QAAQ;AACR,IAAA,MAAM;IACd,WAAA,CAAY,QAA4B,EAAE,MAAe,EAAA;AACvD,QAAA,IAAI,CAAC,QAAQ,GAAG,QAAQ;AACxB,QAAA,IAAI,CAAC,MAAM,GAAG,MAAM;IACtB;AAEA;;;;;;;AAOG;IACH,MAAM,MAAM,CACV,KAAa,EACb,IAAmC,EACnC,QAAkC,EAClC,KAAuC,EAAA;AAEvC,QAAA,IAAI;AACF,YAAA,IAAI,CAAC,KAAK,IAAI,CAAC,QAAQ,EAAE;AACvB,gBAAA,IAAI,IAAI,CAAC,MAAM,EAAE;oBACf,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,CAAA,+BAAA,EAAkC,KAAK,CAAA,MAAA,CAAQ,CAAC;gBACnE;qBAAO;AACL,oBAAA,OAAO,CAAC,IAAI,CAAC,kCAAkC,KAAK,CAAA,MAAA,CAAQ,CAAC;gBAC/D;gBACA;YACF;YAEA,MAAM,WAAW,GAAG,IAAiC;AACrD,YAAA,IAAI,CAAC,WAAW,EAAE,MAAM,EAAE;AACxB,gBAAA,IAAI,IAAI,CAAC,MAAM,EAAE;AACf,oBAAA,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,mCAAmC,CAAC;gBACvD;qBAAO;AACL,oBAAA,OAAO,CAAC,IAAI,CAAC,mCAAmC,CAAC;gBACnD;gBACA;YACF;YAEA,IAAI,QAAQ,CAACE,eAAS,CAAC,yBAAyB,CAAC,KAAK,IAAI,EAAE;gBAC1D;YACF;AAEA,YAAA,IAAI,IAAI,CAAC,QAAQ,EAAE;gBACjB,MAAM,IAAI,CAAC,QAAQ,CAAC,WAAW,EAAE,QAAQ,CAAC;YAC5C;QACF;QAAE,OAAO,KAAK,EAAE;AACd,YAAA,IAAI,IAAI,CAAC,MAAM,EAAE;gBACf,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,gCAAgC,EAAE,KAAK,CAAC;YAC5D;iBAAO;AACL,gBAAA,OAAO,CAAC,KAAK,CAAC,gCAAgC,EAAE,KAAK,CAAC;YACxD;QACF;IACF;AACD;MAEY,oBAAoB,CAAA;IAC/B,MAAM,CAAC,KAAa,EAAE,IAAmC,EAAA;AACvD,QAAA,MAAM,KAAK,GAAG,IAAI,EAAE,KAAK;QACzB,MAAM,cAAc,GAAG,CAAC,EAAE,KAAK,IAAI,SAAS,IAAI,KAAK,CAAC;AACtD,QAAA,MAAM,GAAG,GAAG,cAAc,GAAG,KAAK,CAAC,OAAO,GAAG,SAAS;AACtD,QAAA,IAAI,GAAG,IAAI,GAAG,CAAC,gBAAgB,IAAI,GAAG,CAAC,gBAAgB,CAAC,MAAM,GAAG,CAAC,EAAE;AAClE,YAAA,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,gBAAgB,CAAC;QACnC;AAAO,aAAA,IAAI,GAAG,IAAI,GAAG,CAAC,OAAO,EAAE;AAC7B,YAAA,IAAI,OAAO,GAAG,CAAC,OAAO,KAAK,QAAQ,EAAE;gBACnC,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC,OAAO,CAAC;YACnC;QACF;IACF;AACD;MAEY,qBAAqB,CAAA;IAChC,MAAM,CAAC,KAAa,EAAE,IAAmC,EAAA;AACvD,QAAA,MAAM,KAAK,GAAG,IAAI,EAAE,KAAK;QACzB,MAAM,cAAc,GAAG,CAAC,EAAE,KAAK,IAAI,SAAS,IAAI,KAAK,CAAC;AACtD,QAAA,MAAM,OAAO,GAAG,cAAc,IAAI,KAAK,CAAC,OAAO;AAE/C,QAAA,IAAI,CAAC,OAAO,IAAI,CAAC,cAAc,EAAE;YAC/B;QACF;AAEA,QAAA,IAAI,KAAK,CAAC,gBAAgB,IAAI,KAAK,CAAC,gBAAgB,CAAC,MAAM,GAAG,CAAC,EAAE;AAC/D,YAAA,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,gBAAgB,EAAE,EAAE,KAAK,EAAE,IAAI,EAAE,CAAC;QACtD;AAEA,QAAA,IAAI,OAAO,OAAO,KAAK,QAAQ,EAAE;AAC/B,YAAA,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,OAAO,CAAC;QAC/B;aAAO;YACL,OAAO,CAAC,GAAG,CAAC,OAAO,EAAE,EAAE,KAAK,EAAE,IAAI,EAAE,CAAC;QACvC;IACF;AACD;MAEY,gBAAgB,CAAA;AAC3B,IAAA,MAAM,CACJ,KAAa,EACb,IAAmC,EACnC,QAAkC,EAAA;AAElC,QAAA,MAAM,KAAK,GAAG,IAAI,EAAE,KAAK;QACzB,MAAM,cAAc,GAAG,CAAC,EAAE,KAAK,IAAI,SAAS,IAAI,KAAK,CAAC;AACtD,QAAA,MAAM,GAAG,GAAG,cAAc,IAAI,KAAK,CAAC,OAAO;QAC3C,IAAI,QAAQ,EAAE;AACZ,YAAA,OAAO,CAAC,GAAG,CAAC,QAAQ,CAAC;QACvB;AACA,QAAA,IAAI,GAAG,IAAI,GAAG,CAAC,gBAAgB,IAAI,GAAG,CAAC,gBAAgB,CAAC,MAAM,GAAG,CAAC,EAAE;AAClE,YAAA,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,gBAAgB,CAAC;QACnC;AAAO,aAAA,IAAI,GAAG,IAAI,GAAG,CAAC,OAAO,EAAE;AAC7B,YAAA,IAAI,OAAO,GAAG,CAAC,OAAO,KAAK,QAAQ,EAAE;;;gBAGnC,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC,OAAO,CAAC;YACnC;QACF;IACF;AACD;AAEM,MAAM,wBAAwB,GAAG,CACtC,UAGG,KAC2B;AAC9B,IAAA,MAAM,SAAS,GAAG,UAAU,IAAI,EAAE;AAElC,IAAA,MAAM,YAAY,GAAmB,CAAC,MAAM,KAAI;AAC9C,QAAA,MAAM,EAAE,WAAW,EAAE,GAAG,MAAM;AAC9B,QAAA,MAAM,iBAAiB,GACrB,WAAW,CAAC,WAAW,CAAC,MAAM,GAAG,CAAC,CAGnC,GAAG,CAAC,CAAC;QACN,IAAI,CAAC,iBAAiB,EAAE;YACtB;QACF;AACA,QAAA,MAAM,EAAE,OAAO,EAAE,GAAG,iBAAiB;AACrC,QAAA,IAAI,OAAO,EAAE,iBAAiB,EAAE;AAC9B,YAAA,SAAS,CAAC,IAAI,CAAC,OAAO,CAAC,iBAAiB,CAAC;QAC3C;AACF,IAAA,CAAC;AAED,IAAA,OAAO,EAAE,YAAY,EAAE,SAAS,EAAE;AACpC;;;;;;;;;;"}
1
+ {"version":3,"file":"events.cjs","sources":["../../src/events.ts"],"sourcesContent":["/* eslint-disable no-console */\n// src/events.ts\nimport type {\n BaseMessageFields,\n UsageMetadata,\n} from '@langchain/core/messages';\nimport type { MultiAgentGraph, StandardGraph } from '@/graphs';\nimport type { Logger } from 'winston';\nimport type * as t from '@/types';\nimport { handleToolCalls } from '@/tools/handlers';\nimport { Constants, Providers } from '@/common';\n\nexport class HandlerRegistry {\n private handlers: Map<string, t.EventHandler> = new Map();\n\n register(eventType: string, handler: t.EventHandler): void {\n this.handlers.set(eventType, handler);\n }\n\n getHandler(eventType: string): t.EventHandler | undefined {\n return this.handlers.get(eventType);\n }\n}\n\nexport class ModelEndHandler implements t.EventHandler {\n collectedUsage?: UsageMetadata[];\n constructor(collectedUsage?: UsageMetadata[]) {\n if (collectedUsage && !Array.isArray(collectedUsage)) {\n throw new Error('collectedUsage must be an array');\n }\n this.collectedUsage = collectedUsage;\n }\n\n async handle(\n event: string,\n data: t.ModelEndData,\n metadata?: Record<string, unknown>,\n graph?: StandardGraph | MultiAgentGraph\n ): Promise<void> {\n if (!graph || !metadata) {\n console.warn(`Graph or metadata not found in ${event} event`);\n return;\n }\n\n const usage = data?.output?.usage_metadata;\n if (usage != null && this.collectedUsage != null) {\n this.collectedUsage.push(usage);\n }\n\n if (metadata.ls_provider === 'FakeListChatModel') {\n return handleToolCalls(data?.output?.tool_calls, metadata, graph);\n }\n\n const agentContext = graph.getAgentContext(metadata);\n\n if (\n agentContext.provider !== Providers.GOOGLE &&\n agentContext.provider !== Providers.BEDROCK\n ) {\n return;\n }\n\n await handleToolCalls(data?.output?.tool_calls, metadata, graph);\n }\n}\n\nexport class ToolEndHandler implements t.EventHandler {\n private callback?: t.ToolEndCallback;\n private logger?: Logger;\n constructor(callback?: t.ToolEndCallback, logger?: Logger) {\n this.callback = callback;\n this.logger = logger;\n }\n\n /**\n * Handles on_tool_end events from the for-await stream consumer.\n *\n * This handler is now purely a consumer callback — tool completion\n * (ON_RUN_STEP_COMPLETED dispatch + session context storage) is handled\n * in graph context by ToolNode directly, eliminating the race between\n * the stream consumer and graph execution.\n */\n async handle(\n event: string,\n data: t.StreamEventData | undefined,\n metadata?: Record<string, unknown>,\n graph?: StandardGraph | MultiAgentGraph\n ): Promise<void> {\n try {\n if (!graph || !metadata) {\n if (this.logger) {\n this.logger.warn(`Graph or metadata not found in ${event} event`);\n } else {\n console.warn(`Graph or metadata not found in ${event} event`);\n }\n return;\n }\n\n const toolEndData = data as t.ToolEndData | undefined;\n if (!toolEndData?.output) {\n if (this.logger) {\n this.logger.warn('No output found in tool_end event');\n } else {\n console.warn('No output found in tool_end event');\n }\n return;\n }\n\n if (metadata[Constants.PROGRAMMATIC_TOOL_CALLING] === true) {\n return;\n }\n\n if (this.callback) {\n await this.callback(toolEndData, metadata);\n }\n } catch (error) {\n if (this.logger) {\n this.logger.error('Error handling tool_end event:', error);\n } else {\n console.error('Error handling tool_end event:', error);\n }\n }\n }\n}\n\nexport class TestLLMStreamHandler implements t.EventHandler {\n handle(event: string, data: t.StreamEventData | undefined): void {\n const chunk = data?.chunk;\n const isMessageChunk = !!(chunk && 'message' in chunk);\n const msg = isMessageChunk ? chunk.message : undefined;\n if (msg && msg.tool_call_chunks && msg.tool_call_chunks.length > 0) {\n console.log(msg.tool_call_chunks);\n } else if (msg && msg.content) {\n if (typeof msg.content === 'string') {\n process.stdout.write(msg.content);\n }\n }\n }\n}\n\nexport class TestChatStreamHandler implements t.EventHandler {\n handle(event: string, data: t.StreamEventData | undefined): void {\n const chunk = data?.chunk;\n const isContentChunk = !!(chunk && 'content' in chunk);\n const content = isContentChunk && chunk.content;\n\n if (!content || !isContentChunk) {\n return;\n }\n\n if (chunk.tool_call_chunks && chunk.tool_call_chunks.length > 0) {\n console.dir(chunk.tool_call_chunks, { depth: null });\n }\n\n if (typeof content === 'string') {\n process.stdout.write(content);\n } else {\n console.dir(content, { depth: null });\n }\n }\n}\n\nexport class LLMStreamHandler implements t.EventHandler {\n handle(\n event: string,\n data: t.StreamEventData | undefined,\n metadata?: Record<string, unknown>\n ): void {\n const chunk = data?.chunk;\n const isMessageChunk = !!(chunk && 'message' in chunk);\n const msg = isMessageChunk && chunk.message;\n if (metadata) {\n console.log(metadata);\n }\n if (msg && msg.tool_call_chunks && msg.tool_call_chunks.length > 0) {\n console.log(msg.tool_call_chunks);\n } else if (msg && msg.content) {\n if (typeof msg.content === 'string') {\n // const text_delta = msg.content;\n // dispatchCustomEvent(GraphEvents.CHAT_MODEL_STREAM, { chunk }, config);\n process.stdout.write(msg.content);\n }\n }\n }\n}\n\nexport const createMetadataAggregator = (\n _collected?: Record<\n string,\n NonNullable<BaseMessageFields['response_metadata']>\n >[]\n): t.MetadataAggregatorResult => {\n const collected = _collected || [];\n\n const handleLLMEnd: t.HandleLLMEnd = (output) => {\n const { generations } = output;\n const lastMessageOutput = (\n generations[generations.length - 1] as\n | (t.StreamGeneration | undefined)[]\n | undefined\n )?.[0];\n if (!lastMessageOutput) {\n return;\n }\n const { message } = lastMessageOutput;\n if (message?.response_metadata) {\n collected.push(message.response_metadata);\n }\n };\n\n return { handleLLMEnd, collected };\n};\n"],"names":["handleToolCalls","Providers","Constants"],"mappings":";;;;;MAYa,eAAe,CAAA;AAClB,IAAA,QAAQ,GAAgC,IAAI,GAAG,EAAE;IAEzD,QAAQ,CAAC,SAAiB,EAAE,OAAuB,EAAA;QACjD,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,SAAS,EAAE,OAAO,CAAC;IACvC;AAEA,IAAA,UAAU,CAAC,SAAiB,EAAA;QAC1B,OAAO,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,SAAS,CAAC;IACrC;AACD;MAEY,eAAe,CAAA;AAC1B,IAAA,cAAc;AACd,IAAA,WAAA,CAAY,cAAgC,EAAA;QAC1C,IAAI,cAAc,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,cAAc,CAAC,EAAE;AACpD,YAAA,MAAM,IAAI,KAAK,CAAC,iCAAiC,CAAC;QACpD;AACA,QAAA,IAAI,CAAC,cAAc,GAAG,cAAc;IACtC;IAEA,MAAM,MAAM,CACV,KAAa,EACb,IAAoB,EACpB,QAAkC,EAClC,KAAuC,EAAA;AAEvC,QAAA,IAAI,CAAC,KAAK,IAAI,CAAC,QAAQ,EAAE;AACvB,YAAA,OAAO,CAAC,IAAI,CAAC,kCAAkC,KAAK,CAAA,MAAA,CAAQ,CAAC;YAC7D;QACF;AAEA,QAAA,MAAM,KAAK,GAAG,IAAI,EAAE,MAAM,EAAE,cAAc;QAC1C,IAAI,KAAK,IAAI,IAAI,IAAI,IAAI,CAAC,cAAc,IAAI,IAAI,EAAE;AAChD,YAAA,IAAI,CAAC,cAAc,CAAC,IAAI,CAAC,KAAK,CAAC;QACjC;AAEA,QAAA,IAAI,QAAQ,CAAC,WAAW,KAAK,mBAAmB,EAAE;AAChD,YAAA,OAAOA,wBAAe,CAAC,IAAI,EAAE,MAAM,EAAE,UAAU,EAAE,QAAQ,EAAE,KAAK,CAAC;QACnE;QAEA,MAAM,YAAY,GAAG,KAAK,CAAC,eAAe,CAAC,QAAQ,CAAC;AAEpD,QAAA,IACE,YAAY,CAAC,QAAQ,KAAKC,eAAS,CAAC,MAAM;AAC1C,YAAA,YAAY,CAAC,QAAQ,KAAKA,eAAS,CAAC,OAAO,EAC3C;YACA;QACF;AAEA,QAAA,MAAMD,wBAAe,CAAC,IAAI,EAAE,MAAM,EAAE,UAAU,EAAE,QAAQ,EAAE,KAAK,CAAC;IAClE;AACD;MAEY,cAAc,CAAA;AACjB,IAAA,QAAQ;AACR,IAAA,MAAM;IACd,WAAA,CAAY,QAA4B,EAAE,MAAe,EAAA;AACvD,QAAA,IAAI,CAAC,QAAQ,GAAG,QAAQ;AACxB,QAAA,IAAI,CAAC,MAAM,GAAG,MAAM;IACtB;AAEA;;;;;;;AAOG;IACH,MAAM,MAAM,CACV,KAAa,EACb,IAAmC,EACnC,QAAkC,EAClC,KAAuC,EAAA;AAEvC,QAAA,IAAI;AACF,YAAA,IAAI,CAAC,KAAK,IAAI,CAAC,QAAQ,EAAE;AACvB,gBAAA,IAAI,IAAI,CAAC,MAAM,EAAE;oBACf,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,CAAA,+BAAA,EAAkC,KAAK,CAAA,MAAA,CAAQ,CAAC;gBACnE;qBAAO;AACL,oBAAA,OAAO,CAAC,IAAI,CAAC,kCAAkC,KAAK,CAAA,MAAA,CAAQ,CAAC;gBAC/D;gBACA;YACF;YAEA,MAAM,WAAW,GAAG,IAAiC;AACrD,YAAA,IAAI,CAAC,WAAW,EAAE,MAAM,EAAE;AACxB,gBAAA,IAAI,IAAI,CAAC,MAAM,EAAE;AACf,oBAAA,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,mCAAmC,CAAC;gBACvD;qBAAO;AACL,oBAAA,OAAO,CAAC,IAAI,CAAC,mCAAmC,CAAC;gBACnD;gBACA;YACF;YAEA,IAAI,QAAQ,CAACE,eAAS,CAAC,yBAAyB,CAAC,KAAK,IAAI,EAAE;gBAC1D;YACF;AAEA,YAAA,IAAI,IAAI,CAAC,QAAQ,EAAE;gBACjB,MAAM,IAAI,CAAC,QAAQ,CAAC,WAAW,EAAE,QAAQ,CAAC;YAC5C;QACF;QAAE,OAAO,KAAK,EAAE;AACd,YAAA,IAAI,IAAI,CAAC,MAAM,EAAE;gBACf,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,gCAAgC,EAAE,KAAK,CAAC;YAC5D;iBAAO;AACL,gBAAA,OAAO,CAAC,KAAK,CAAC,gCAAgC,EAAE,KAAK,CAAC;YACxD;QACF;IACF;AACD;MAEY,oBAAoB,CAAA;IAC/B,MAAM,CAAC,KAAa,EAAE,IAAmC,EAAA;AACvD,QAAA,MAAM,KAAK,GAAG,IAAI,EAAE,KAAK;QACzB,MAAM,cAAc,GAAG,CAAC,EAAE,KAAK,IAAI,SAAS,IAAI,KAAK,CAAC;AACtD,QAAA,MAAM,GAAG,GAAG,cAAc,GAAG,KAAK,CAAC,OAAO,GAAG,SAAS;AACtD,QAAA,IAAI,GAAG,IAAI,GAAG,CAAC,gBAAgB,IAAI,GAAG,CAAC,gBAAgB,CAAC,MAAM,GAAG,CAAC,EAAE;AAClE,YAAA,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,gBAAgB,CAAC;QACnC;AAAO,aAAA,IAAI,GAAG,IAAI,GAAG,CAAC,OAAO,EAAE;AAC7B,YAAA,IAAI,OAAO,GAAG,CAAC,OAAO,KAAK,QAAQ,EAAE;gBACnC,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC,OAAO,CAAC;YACnC;QACF;IACF;AACD;MAEY,qBAAqB,CAAA;IAChC,MAAM,CAAC,KAAa,EAAE,IAAmC,EAAA;AACvD,QAAA,MAAM,KAAK,GAAG,IAAI,EAAE,KAAK;QACzB,MAAM,cAAc,GAAG,CAAC,EAAE,KAAK,IAAI,SAAS,IAAI,KAAK,CAAC;AACtD,QAAA,MAAM,OAAO,GAAG,cAAc,IAAI,KAAK,CAAC,OAAO;AAE/C,QAAA,IAAI,CAAC,OAAO,IAAI,CAAC,cAAc,EAAE;YAC/B;QACF;AAEA,QAAA,IAAI,KAAK,CAAC,gBAAgB,IAAI,KAAK,CAAC,gBAAgB,CAAC,MAAM,GAAG,CAAC,EAAE;AAC/D,YAAA,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,gBAAgB,EAAE,EAAE,KAAK,EAAE,IAAI,EAAE,CAAC;QACtD;AAEA,QAAA,IAAI,OAAO,OAAO,KAAK,QAAQ,EAAE;AAC/B,YAAA,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,OAAO,CAAC;QAC/B;aAAO;YACL,OAAO,CAAC,GAAG,CAAC,OAAO,EAAE,EAAE,KAAK,EAAE,IAAI,EAAE,CAAC;QACvC;IACF;AACD;MAEY,gBAAgB,CAAA;AAC3B,IAAA,MAAM,CACJ,KAAa,EACb,IAAmC,EACnC,QAAkC,EAAA;AAElC,QAAA,MAAM,KAAK,GAAG,IAAI,EAAE,KAAK;QACzB,MAAM,cAAc,GAAG,CAAC,EAAE,KAAK,IAAI,SAAS,IAAI,KAAK,CAAC;AACtD,QAAA,MAAM,GAAG,GAAG,cAAc,IAAI,KAAK,CAAC,OAAO;QAC3C,IAAI,QAAQ,EAAE;AACZ,YAAA,OAAO,CAAC,GAAG,CAAC,QAAQ,CAAC;QACvB;AACA,QAAA,IAAI,GAAG,IAAI,GAAG,CAAC,gBAAgB,IAAI,GAAG,CAAC,gBAAgB,CAAC,MAAM,GAAG,CAAC,EAAE;AAClE,YAAA,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,gBAAgB,CAAC;QACnC;AAAO,aAAA,IAAI,GAAG,IAAI,GAAG,CAAC,OAAO,EAAE;AAC7B,YAAA,IAAI,OAAO,GAAG,CAAC,OAAO,KAAK,QAAQ,EAAE;;;gBAGnC,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC,OAAO,CAAC;YACnC;QACF;IACF;AACD;AAEM,MAAM,wBAAwB,GAAG,CACtC,UAGG,KAC2B;AAC9B,IAAA,MAAM,SAAS,GAAG,UAAU,IAAI,EAAE;AAElC,IAAA,MAAM,YAAY,GAAmB,CAAC,MAAM,KAAI;AAC9C,QAAA,MAAM,EAAE,WAAW,EAAE,GAAG,MAAM;AAC9B,QAAA,MAAM,iBAAiB,GACrB,WAAW,CAAC,WAAW,CAAC,MAAM,GAAG,CAAC,CAGnC,GAAG,CAAC,CAAC;QACN,IAAI,CAAC,iBAAiB,EAAE;YACtB;QACF;AACA,QAAA,MAAM,EAAE,OAAO,EAAE,GAAG,iBAAiB;AACrC,QAAA,IAAI,OAAO,EAAE,iBAAiB,EAAE;AAC9B,YAAA,SAAS,CAAC,IAAI,CAAC,OAAO,CAAC,iBAAiB,CAAC;QAC3C;AACF,IAAA,CAAC;AAED,IAAA,OAAO,EAAE,YAAY,EAAE,SAAS,EAAE;AACpC;;;;;;;;;;"}
@@ -24,6 +24,7 @@ require('ai-tokenizer');
24
24
  require('../utils/toonFormat.cjs');
25
25
  var contextAnalytics = require('../utils/contextAnalytics.cjs');
26
26
  require('zod-to-json-schema');
27
+ var contextPressure = require('../utils/contextPressure.cjs');
27
28
  var providers = require('../llm/providers.cjs');
28
29
  var ToolNode = require('../tools/ToolNode.cjs');
29
30
  var index = require('../llm/openai/index.cjs');
@@ -950,36 +951,12 @@ class StandardGraph extends Graph {
950
951
  let messagesToUse = messages$1;
951
952
  // ====================================================================
952
953
  // PRE-PRUNING DELEGATION CHECK
953
- // Before pruning strips messages (losing context), check if we should
954
- // delegate instead. If context would be pruned AND the agent has the
955
- // task tool, inject a delegation hint and SKIP pruning — preserving
956
- // the content for the LLM to understand what to delegate.
957
954
  // ====================================================================
958
- let delegationInjectedPrePrune = false;
959
- const hasTaskToolPrePrune = agentContext.tools?.some((tool) => {
960
- const toolName = typeof tool === 'object' && 'name' in tool
961
- ? tool.name
962
- : '';
963
- return toolName === 'task';
964
- });
965
- if (hasTaskToolPrePrune === true &&
966
- agentContext.tokenCounter &&
967
- agentContext.maxContextTokens != null) {
968
- // Estimate total tokens in messages BEFORE pruning
969
- let prePruneTokens = 0;
970
- for (const msg of messages$1) {
971
- prePruneTokens += agentContext.tokenCounter(msg);
972
- }
973
- // Add instruction tokens (system prompt)
974
- prePruneTokens += agentContext.instructionTokens;
975
- const prePruneUtilization = (prePruneTokens / agentContext.maxContextTokens) * 100;
976
- if (prePruneUtilization > 70) {
977
- console.warn(`[Graph] PRE-PRUNE delegation check: ${prePruneUtilization.toFixed(1)}% utilization ` +
978
- `(${prePruneTokens}/${agentContext.maxContextTokens} tokens). ` +
979
- 'Injecting delegation hint INSTEAD of pruning.');
980
- delegationInjectedPrePrune = true;
981
- }
982
- }
955
+ // Context management is now fully mechanical:
956
+ // - Pruning always runs when needed (no delegation-based skip)
957
+ // - Auto-continuation in client.js handles max_tokens finish reason
958
+ // - LLM never sees raw token numbers (prevents voluntary bail-out)
959
+ // ====================================================================
983
960
  if (!agentContext.pruneMessages &&
984
961
  agentContext.tokenCounter &&
985
962
  agentContext.maxContextTokens != null &&
@@ -1002,8 +979,8 @@ class StandardGraph extends Graph {
1002
979
  indexTokenCountMap: agentContext.indexTokenCountMap,
1003
980
  });
1004
981
  }
1005
- if (agentContext.pruneMessages && !delegationInjectedPrePrune) {
1006
- console.info(`[Graph:ContextMgmt] Pruning messages | inputCount=${messages$1.length} | maxTokens=${agentContext.maxContextTokens}`);
982
+ if (agentContext.pruneMessages) {
983
+ console.debug(`[Graph:ContextMgmt] Pruning messages | inputCount=${messages$1.length} | maxTokens=${agentContext.maxContextTokens}`);
1007
984
  const { context, indexTokenCountMap, messagesToRefine } = agentContext.pruneMessages({
1008
985
  messages: messages$1,
1009
986
  usageMetadata: agentContext.currentUsage,
@@ -1011,14 +988,16 @@ class StandardGraph extends Graph {
1011
988
  });
1012
989
  agentContext.indexTokenCountMap = indexTokenCountMap;
1013
990
  messagesToUse = context;
1014
- console.info(`[Graph:ContextMgmt] Pruned | kept=${context.length} | discarded=${messagesToRefine.length} | originalCount=${messages$1.length}`);
991
+ console.debug(`[Graph:ContextMgmt] Pruned | kept=${context.length} | discarded=${messagesToRefine.length} | originalCount=${messages$1.length}`);
1015
992
  // Summarize discarded messages if callback provided
993
+ let hasSummary = false;
1016
994
  if (messagesToRefine.length > 0 && agentContext.summarizeCallback) {
1017
- console.info(`[Graph:ContextMgmt] Summarizing ${messagesToRefine.length} discarded messages`);
995
+ console.debug(`[Graph:ContextMgmt] Summarizing ${messagesToRefine.length} discarded messages`);
1018
996
  try {
1019
997
  const summary = await agentContext.summarizeCallback(messagesToRefine);
1020
- console.info(`[Graph:ContextMgmt] Summary received | len=${summary?.length ?? 0} | hasContent=${summary != null && summary !== ''}`);
998
+ console.debug(`[Graph:ContextMgmt] Summary received | len=${summary?.length ?? 0} | hasContent=${summary != null && summary !== ''}`);
1021
999
  if (summary != null && summary !== '') {
1000
+ hasSummary = true;
1022
1001
  const summaryMsg = new messages.SystemMessage(`[Conversation Summary]\n${summary}`);
1023
1002
  // Insert after system message (if present), before conversation messages
1024
1003
  const systemIdx = messagesToUse[0]?.getType() === 'system' ? 1 : 0;
@@ -1027,16 +1006,22 @@ class StandardGraph extends Graph {
1027
1006
  summaryMsg,
1028
1007
  ...messagesToUse.slice(systemIdx),
1029
1008
  ];
1030
- console.info(`[Graph:ContextMgmt] Summary injected at index ${systemIdx} | finalMsgCount=${messagesToUse.length}`);
1009
+ console.debug(`[Graph:ContextMgmt] Summary injected at index ${systemIdx} | finalMsgCount=${messagesToUse.length}`);
1031
1010
  }
1032
1011
  }
1033
1012
  catch (err) {
1034
1013
  console.error('[Graph] Summarization callback failed:', err);
1035
1014
  }
1036
1015
  }
1037
- }
1038
- else if (delegationInjectedPrePrune) {
1039
- console.info('[Graph] Skipping pruning delegation will handle context pressure');
1016
+ // Post-prune context note: inform the LLM that context was compressed
1017
+ // without exposing token numbers (prevents voluntary bail-out)
1018
+ if (messagesToRefine.length > 0 && contextPressure.hasTaskTool(agentContext.tools)) {
1019
+ const postPruneNote = contextPressure.buildPostPruneNote(messagesToRefine.length, hasSummary);
1020
+ if (postPruneNote) {
1021
+ messagesToUse = [...messagesToUse, new messages.SystemMessage(postPruneNote)];
1022
+ console.debug(`[Graph:ContextMgmt] Post-prune note injected | hasSummary=${hasSummary} | discarded=${messagesToRefine.length}`);
1023
+ }
1024
+ }
1040
1025
  }
1041
1026
  let finalMessages = messagesToUse;
1042
1027
  if (agentContext.useLegacyContent) {
@@ -1149,125 +1134,30 @@ class StandardGraph extends Graph {
1149
1134
  analytics: contextAnalytics$1,
1150
1135
  }, config);
1151
1136
  // ====================================================================
1152
- // CONTEXT PRESSURE AWARENESS Intelligent Sub-Agent Delegation
1153
- //
1154
- // Two triggers for delegation hints:
1155
- // 1. DOCUMENT COUNT: When 3+ documents are detected in the conversation,
1156
- // inject a delegation hint on the FIRST iteration (before the LLM
1157
- // has called any tools). This ensures the agent delegates upfront
1158
- // rather than trying to process all documents itself.
1159
- // 2. TOKEN UTILIZATION: At EVERY iteration, if context is filling up
1160
- // (70%/85%), inject escalating hints to delegate remaining work.
1137
+ // MULTI-DOCUMENT DELEGATION (task-driven, not budget-driven)
1161
1138
  //
1162
- // This runs mid-chain so even if tool responses push context up
1163
- // after the first LLM call, subsequent iterations get the hint.
1139
+ // Token-based pressure hints have been removed the LLM never sees
1140
+ // raw token numbers. Context overflow is handled mechanically by
1141
+ // pruning (Graph) + auto-continuation (client.js max_tokens detection).
1142
+ // See: docs/context-overflow-architecture.md
1164
1143
  // ====================================================================
1165
- const hasTaskToolInContext = agentContext.tools?.some((tool) => {
1166
- const toolName = typeof tool === 'object' && 'name' in tool
1167
- ? tool.name
1168
- : '';
1169
- return toolName === 'task';
1170
- });
1171
- if (hasTaskToolInContext === true &&
1172
- contextAnalytics$1.utilizationPercent != null &&
1173
- contextAnalytics$1.maxContextTokens != null) {
1174
- const utilization = contextAnalytics$1.utilizationPercent;
1175
- const totalTokens = contextAnalytics$1.totalTokens;
1176
- const maxTokens = contextAnalytics$1.maxContextTokens;
1177
- const remainingTokens = maxTokens - totalTokens;
1178
- // Count attached documents by scanning for document patterns in HumanMessages:
1179
- // 1. # "filename" headers in "Attached document(s):" blocks (text content)
1180
- // 2. **filename1, filename2** in "The user has attached:" blocks (embedded files)
1181
- // 3. Filenames in file_search tool results
1182
- let documentCount = 0;
1183
- const documentNames = [];
1184
- for (const msg of finalMessages) {
1185
- const content = typeof msg.content === 'string'
1186
- ? msg.content
1187
- : Array.isArray(msg.content)
1188
- ? msg.content
1189
- .map((p) => {
1190
- const part = p;
1191
- return String(part.text ?? part.content ?? '');
1192
- })
1193
- .join(' ')
1194
- : '';
1195
- // Pattern 1: # "filename" headers in attached document blocks
1196
- const docMatches = content.match(/# "([^"]+)"/g);
1197
- if (docMatches) {
1198
- for (const match of docMatches) {
1199
- const name = match.replace(/# "/, '').replace(/"$/, '');
1200
- if (!documentNames.includes(name)) {
1201
- documentNames.push(name);
1202
- documentCount++;
1203
- }
1204
- }
1205
- }
1206
- // Pattern 2: "The user has attached: **file1, file2**" (embedded files)
1207
- const attachedMatch = content.match(/user has attached:\s*\*\*([^*]+)\*\*/i);
1208
- if (attachedMatch) {
1209
- const names = attachedMatch[1]
1210
- .split(',')
1211
- .map((n) => n.trim())
1212
- .filter(Boolean);
1213
- for (const name of names) {
1214
- if (!documentNames.includes(name)) {
1215
- documentNames.push(name);
1216
- documentCount++;
1217
- }
1218
- }
1219
- }
1144
+ if (contextPressure.hasTaskTool(agentContext.tools)) {
1145
+ const { count: documentCount, names: documentNames } = contextPressure.detectDocuments(finalMessages);
1146
+ // Observability log (no token numbers exposed to LLM)
1147
+ if (contextAnalytics$1.utilizationPercent != null) {
1148
+ console.debug(`[Graph] Context utilization: ${contextAnalytics$1.utilizationPercent.toFixed(1)}% | ` +
1149
+ `messages: ${finalMessages.length} | docs: ${documentCount}`);
1220
1150
  }
1221
- // BASELINE LOG: Always fires so we can verify this code path runs
1222
- console.info(`[Graph] Context utilization: ${utilization.toFixed(1)}% ` +
1223
- `(${totalTokens}/${maxTokens} tokens, ${remainingTokens} remaining) | ` +
1224
- `hasTaskTool: true | messages: ${finalMessages.length} | docs: ${documentCount}`);
1225
- // TRIGGER 1: Multi-document delegation (3+ documents detected)
1226
- // Only inject on first iteration (no AI messages yet = agent hasn't responded)
1151
+ // Multi-document delegation: first iteration only (before AI has responded)
1227
1152
  const hasAiResponse = finalMessages.some((m) => m._getType() === 'ai' || m._getType() === 'tool');
1228
- if (documentCount >= 3 && !hasAiResponse) {
1153
+ if (contextPressure.shouldInjectMultiDocHint(documentCount, hasAiResponse)) {
1229
1154
  const pressureMsg = new messages.HumanMessage({
1230
- content: `[MULTI-DOCUMENT PROCESSING — ${documentCount} documents detected]\n` +
1231
- `Documents: ${documentNames.join(', ')}\n\n` +
1232
- `You have ${documentCount} documents attached. For thorough analysis, use the "task" tool ` +
1233
- 'to delegate each document (or group of related documents) to a sub-agent.\n' +
1234
- 'Each sub-agent has its own fresh context window and can use file_search to retrieve the full document content.\n' +
1235
- 'After all sub-agents complete, synthesize their results into a comprehensive response.\n\n' +
1236
- 'This approach ensures each document gets full attention without context limitations.',
1155
+ content: contextPressure.buildMultiDocHintContent(documentCount, documentNames),
1237
1156
  });
1238
1157
  finalMessages = [...finalMessages, pressureMsg];
1239
1158
  console.info(`[Graph] Multi-document delegation hint injected for ${documentCount} documents: ` +
1240
1159
  `${documentNames.join(', ')}`);
1241
1160
  }
1242
- // TRIGGER 2: Token utilization thresholds (mid-chain safety net)
1243
- // Also fires when we skipped pruning due to delegationInjectedPrePrune
1244
- if (utilization > 85 ||
1245
- (delegationInjectedPrePrune && utilization > 50)) {
1246
- // CRITICAL: Context is high — MANDATE delegation
1247
- const pressureMsg = new messages.HumanMessage({
1248
- content: `[CONTEXT BUDGET CRITICAL — ${utilization.toFixed(0)}% used]\n` +
1249
- `You have used ${totalTokens} of ${maxTokens} tokens (${remainingTokens} remaining).\n` +
1250
- 'Your context is very large. You MUST use the "task" tool to delegate work to sub-agents.\n' +
1251
- 'Each sub-agent runs in its own fresh context window and can use file_search to access documents.\n' +
1252
- 'Do NOT attempt to process documents directly — delegate each document to a sub-agent, then synthesize results.',
1253
- });
1254
- finalMessages = [...finalMessages, pressureMsg];
1255
- console.warn(`[Graph] Context pressure CRITICAL (${utilization.toFixed(0)}%): ` +
1256
- `Injected mandatory delegation hint. ${remainingTokens} tokens remaining. ` +
1257
- `prePruneSkipped: ${delegationInjectedPrePrune}`);
1258
- }
1259
- else if (utilization > 70) {
1260
- // WARNING: Context filling up — suggest delegation
1261
- const pressureMsg = new messages.HumanMessage({
1262
- content: `[CONTEXT BUDGET WARNING — ${utilization.toFixed(0)}% used]\n` +
1263
- `You have used ${totalTokens} of ${maxTokens} tokens (${remainingTokens} remaining).\n` +
1264
- 'Your context is filling up. Consider using the "task" tool to delegate complex operations to sub-agents.\n' +
1265
- "Sub-agents run in fresh context windows and won't consume your remaining budget.",
1266
- });
1267
- finalMessages = [...finalMessages, pressureMsg];
1268
- console.info(`[Graph] Context pressure WARNING (${utilization.toFixed(0)}%): ` +
1269
- `Injected delegation suggestion. ${remainingTokens} tokens remaining.`);
1270
- }
1271
1161
  }
1272
1162
  // Structured output mode: when the agent has NO tools, produce structured JSON immediately.
1273
1163
  // When the agent HAS tools, we defer structured output until after tool use completes
@@ -1661,7 +1551,7 @@ If I seem to be missing something we discussed earlier, just give me a quick rem
1661
1551
  reducer: (a, b) => {
1662
1552
  if (!a.length) {
1663
1553
  this.startIndex = a.length + b.length;
1664
- console.info(`[Graph:Reducer] Initial messages | startIndex=${this.startIndex} | inputMsgCount=${b.length}`);
1554
+ console.debug(`[Graph:Reducer] Initial messages | startIndex=${this.startIndex} | inputMsgCount=${b.length}`);
1665
1555
  }
1666
1556
  else {
1667
1557
  console.debug(`[Graph:Reducer] Appending messages | existing=${a.length} | new=${b.length} | startIndex=${this.startIndex}`);