@illuma-ai/agents 1.0.98 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. package/dist/cjs/agents/AgentContext.cjs +6 -2
  2. package/dist/cjs/agents/AgentContext.cjs.map +1 -1
  3. package/dist/cjs/common/constants.cjs +53 -0
  4. package/dist/cjs/common/constants.cjs.map +1 -1
  5. package/dist/cjs/graphs/Graph.cjs +167 -31
  6. package/dist/cjs/graphs/Graph.cjs.map +1 -1
  7. package/dist/cjs/main.cjs +14 -0
  8. package/dist/cjs/main.cjs.map +1 -1
  9. package/dist/cjs/messages/dedup.cjs +95 -0
  10. package/dist/cjs/messages/dedup.cjs.map +1 -0
  11. package/dist/cjs/tools/CodeExecutor.cjs +22 -3
  12. package/dist/cjs/tools/CodeExecutor.cjs.map +1 -1
  13. package/dist/cjs/types/graph.cjs.map +1 -1
  14. package/dist/cjs/utils/pruneCalibration.cjs +78 -0
  15. package/dist/cjs/utils/pruneCalibration.cjs.map +1 -0
  16. package/dist/cjs/utils/run.cjs.map +1 -1
  17. package/dist/cjs/utils/tokens.cjs.map +1 -1
  18. package/dist/cjs/utils/toolDiscoveryCache.cjs +127 -0
  19. package/dist/cjs/utils/toolDiscoveryCache.cjs.map +1 -0
  20. package/dist/esm/agents/AgentContext.mjs +6 -2
  21. package/dist/esm/agents/AgentContext.mjs.map +1 -1
  22. package/dist/esm/common/constants.mjs +48 -1
  23. package/dist/esm/common/constants.mjs.map +1 -1
  24. package/dist/esm/graphs/Graph.mjs +168 -32
  25. package/dist/esm/graphs/Graph.mjs.map +1 -1
  26. package/dist/esm/main.mjs +4 -1
  27. package/dist/esm/main.mjs.map +1 -1
  28. package/dist/esm/messages/dedup.mjs +93 -0
  29. package/dist/esm/messages/dedup.mjs.map +1 -0
  30. package/dist/esm/tools/CodeExecutor.mjs +22 -3
  31. package/dist/esm/tools/CodeExecutor.mjs.map +1 -1
  32. package/dist/esm/types/graph.mjs.map +1 -1
  33. package/dist/esm/utils/pruneCalibration.mjs +74 -0
  34. package/dist/esm/utils/pruneCalibration.mjs.map +1 -0
  35. package/dist/esm/utils/run.mjs.map +1 -1
  36. package/dist/esm/utils/tokens.mjs.map +1 -1
  37. package/dist/esm/utils/toolDiscoveryCache.mjs +125 -0
  38. package/dist/esm/utils/toolDiscoveryCache.mjs.map +1 -0
  39. package/dist/types/agents/AgentContext.d.ts +4 -1
  40. package/dist/types/common/constants.d.ts +35 -0
  41. package/dist/types/graphs/Graph.d.ts +25 -0
  42. package/dist/types/messages/dedup.d.ts +25 -0
  43. package/dist/types/messages/index.d.ts +1 -0
  44. package/dist/types/types/graph.d.ts +63 -0
  45. package/dist/types/utils/index.d.ts +2 -0
  46. package/dist/types/utils/pruneCalibration.d.ts +43 -0
  47. package/dist/types/utils/toolDiscoveryCache.d.ts +77 -0
  48. package/package.json +1 -1
  49. package/src/agents/AgentContext.ts +7 -0
  50. package/src/common/constants.ts +56 -0
  51. package/src/graphs/Graph.ts +220 -50
  52. package/src/graphs/gapFeatures.test.ts +520 -0
  53. package/src/graphs/nonBlockingSummarization.test.ts +307 -0
  54. package/src/messages/__tests__/dedup.test.ts +166 -0
  55. package/src/messages/dedup.ts +104 -0
  56. package/src/messages/index.ts +1 -0
  57. package/src/tools/CodeExecutor.ts +22 -3
  58. package/src/types/graph.ts +73 -0
  59. package/src/utils/__tests__/pruneCalibration.test.ts +148 -0
  60. package/src/utils/__tests__/toolDiscoveryCache.test.ts +214 -0
  61. package/src/utils/contextPressure.test.ts +24 -9
  62. package/src/utils/index.ts +2 -0
  63. package/src/utils/pruneCalibration.ts +92 -0
  64. package/src/utils/run.ts +108 -108
  65. package/src/utils/tokens.ts +118 -118
  66. package/src/utils/toolDiscoveryCache.ts +150 -0
@@ -0,0 +1,125 @@
1
+ import { MessageTypes, Constants } from '../common/enum.mjs';
2
+ import { TOOL_DISCOVERY_CACHE_MAX_SIZE } from '../common/constants.mjs';
3
+
4
+ /**
5
+ * ToolDiscoveryCache provides a run-scoped cache of tool search results.
6
+ *
7
+ * Problem: Without caching, every LLM iteration re-parses the full message
8
+ * history via extractToolDiscoveries() to find tool_search results. In long
9
+ * conversations with many tool iterations, this is redundant work.
10
+ *
11
+ * Solution: Cache discovered tool names by message index. On each iteration,
12
+ * only scan messages AFTER the last scanned index. Already-seen discoveries
13
+ * are returned from cache instantly.
14
+ *
15
+ * This mirrors the pattern used by VS Code Copilot Chat where tool search
16
+ * results from prior turns are cached to avoid re-discovery.
17
+ *
18
+ * @example
19
+ * ```ts
20
+ * const cache = new ToolDiscoveryCache();
21
+ *
22
+ * // First call: scans all messages
23
+ * const newTools = cache.getNewDiscoveries(messages);
24
+ * // Returns: ['web_search', 'file_read']
25
+ *
26
+ * // Second call (3 new messages added): only scans new messages
27
+ * const moreTools = cache.getNewDiscoveries(messages);
28
+ * // Returns: ['code_exec'] (only newly discovered)
29
+ * ```
30
+ */
31
+ class ToolDiscoveryCache {
32
+ /** Set of all discovered tool names (deduped) */
33
+ _discoveredTools = new Set();
34
+ /** Last message index that was scanned */
35
+ _lastScannedIndex = -1;
36
+ /**
37
+ * Scan messages for new tool_search results since the last scan.
38
+ * Only processes messages after `_lastScannedIndex` to avoid redundant work.
39
+ *
40
+ * @param messages - Full conversation message array
41
+ * @returns Array of newly discovered tool names (not previously cached)
42
+ */
43
+ getNewDiscoveries(messages) {
44
+ if (messages.length === 0) {
45
+ return [];
46
+ }
47
+ const startIndex = this._lastScannedIndex + 1;
48
+ if (startIndex >= messages.length) {
49
+ return [];
50
+ }
51
+ const newDiscoveries = [];
52
+ for (let i = startIndex; i < messages.length; i++) {
53
+ const msg = messages[i];
54
+ if (msg.getType() !== MessageTypes.TOOL) {
55
+ continue;
56
+ }
57
+ // Check if this is a tool_search result
58
+ if (msg.name !== Constants.TOOL_SEARCH) {
59
+ continue;
60
+ }
61
+ // Extract tool references from artifact
62
+ const artifact = msg.artifact;
63
+ if (typeof artifact === 'object' && artifact != null) {
64
+ const refs = artifact.tool_references;
65
+ if (refs && refs.length > 0) {
66
+ for (const ref of refs) {
67
+ if (!this._discoveredTools.has(ref.tool_name)) {
68
+ // Enforce cache size limit
69
+ if (this._discoveredTools.size >= TOOL_DISCOVERY_CACHE_MAX_SIZE) {
70
+ break;
71
+ }
72
+ this._discoveredTools.add(ref.tool_name);
73
+ newDiscoveries.push(ref.tool_name);
74
+ }
75
+ }
76
+ }
77
+ }
78
+ }
79
+ this._lastScannedIndex = messages.length - 1;
80
+ return newDiscoveries;
81
+ }
82
+ /**
83
+ * Returns all tool names discovered so far (across all scans).
84
+ */
85
+ getAllDiscoveredTools() {
86
+ return [...this._discoveredTools];
87
+ }
88
+ /**
89
+ * Check if a specific tool has been discovered.
90
+ */
91
+ has(toolName) {
92
+ return this._discoveredTools.has(toolName);
93
+ }
94
+ /**
95
+ * Number of unique tools discovered.
96
+ */
97
+ get size() {
98
+ return this._discoveredTools.size;
99
+ }
100
+ /**
101
+ * Reset the cache (e.g., on graph reset).
102
+ */
103
+ reset() {
104
+ this._discoveredTools.clear();
105
+ this._lastScannedIndex = -1;
106
+ }
107
+ /**
108
+ * Seed the cache with previously known tool names (e.g., from prior conversation turns).
109
+ * Does not affect _lastScannedIndex — the next getNewDiscoveries call will still
110
+ * scan all messages from the beginning.
111
+ *
112
+ * @param toolNames - Tool names to pre-seed into the cache
113
+ */
114
+ seed(toolNames) {
115
+ for (const name of toolNames) {
116
+ if (this._discoveredTools.size >= TOOL_DISCOVERY_CACHE_MAX_SIZE) {
117
+ break;
118
+ }
119
+ this._discoveredTools.add(name);
120
+ }
121
+ }
122
+ }
123
+
124
+ export { ToolDiscoveryCache };
125
+ //# sourceMappingURL=toolDiscoveryCache.mjs.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"toolDiscoveryCache.mjs","sources":["../../../src/utils/toolDiscoveryCache.ts"],"sourcesContent":["// src/utils/toolDiscoveryCache.ts\nimport type { BaseMessage } from '@langchain/core/messages';\nimport { Constants, MessageTypes } from '@/common';\nimport { TOOL_DISCOVERY_CACHE_MAX_SIZE } from '@/common/constants';\n\n/**\n * Cached tool discovery entry.\n * Stores the tool name and the message index where it was discovered,\n * enabling efficient lookups without re-parsing conversation history.\n */\nexport interface ToolDiscoveryEntry {\n /** The tool name that was discovered */\n toolName: string;\n /** Message index in conversation history where discovery occurred */\n discoveredAtIndex: number;\n}\n\n/**\n * ToolDiscoveryCache provides a run-scoped cache of tool search results.\n *\n * Problem: Without caching, every LLM iteration re-parses the full message\n * history via extractToolDiscoveries() to find tool_search results. In long\n * conversations with many tool iterations, this is redundant work.\n *\n * Solution: Cache discovered tool names by message index. On each iteration,\n * only scan messages AFTER the last scanned index. Already-seen discoveries\n * are returned from cache instantly.\n *\n * This mirrors the pattern used by VS Code Copilot Chat where tool search\n * results from prior turns are cached to avoid re-discovery.\n *\n * @example\n * ```ts\n * const cache = new ToolDiscoveryCache();\n *\n * // First call: scans all messages\n * const newTools = cache.getNewDiscoveries(messages);\n * // Returns: ['web_search', 'file_read']\n *\n * // Second call (3 new messages added): only scans new messages\n * const moreTools = cache.getNewDiscoveries(messages);\n * // Returns: ['code_exec'] (only newly discovered)\n * ```\n */\nexport class ToolDiscoveryCache {\n /** Set of all discovered tool names (deduped) */\n private _discoveredTools: Set<string> = new Set();\n /** Last message index that was scanned */\n private _lastScannedIndex: number = -1;\n\n /**\n * Scan messages for new tool_search results since the last scan.\n * Only processes messages after `_lastScannedIndex` to avoid redundant work.\n *\n * @param messages - Full conversation message array\n * @returns Array of newly discovered tool names (not previously cached)\n */\n getNewDiscoveries(messages: BaseMessage[]): string[] {\n if (messages.length === 0) {\n return [];\n }\n\n const startIndex = this._lastScannedIndex + 1;\n if (startIndex >= messages.length) {\n return [];\n }\n\n const newDiscoveries: string[] = [];\n\n for (let i = startIndex; i < messages.length; i++) {\n const msg = messages[i];\n if (msg.getType() !== MessageTypes.TOOL) {\n continue;\n }\n\n // Check if this is a tool_search result\n if ((msg as { name?: string }).name !== Constants.TOOL_SEARCH) {\n continue;\n }\n\n // Extract tool references from artifact\n const artifact = (msg as { artifact?: unknown }).artifact;\n if (typeof artifact === 'object' && artifact != null) {\n const refs = (\n artifact as { tool_references?: Array<{ tool_name: string }> }\n ).tool_references;\n if (refs && refs.length > 0) {\n for (const ref of refs) {\n if (!this._discoveredTools.has(ref.tool_name)) {\n // Enforce cache size limit\n if (this._discoveredTools.size >= TOOL_DISCOVERY_CACHE_MAX_SIZE) {\n break;\n }\n this._discoveredTools.add(ref.tool_name);\n newDiscoveries.push(ref.tool_name);\n }\n }\n }\n }\n }\n\n this._lastScannedIndex = messages.length - 1;\n return newDiscoveries;\n }\n\n /**\n * Returns all tool names discovered so far (across all scans).\n */\n getAllDiscoveredTools(): string[] {\n return [...this._discoveredTools];\n }\n\n /**\n * Check if a specific tool has been discovered.\n */\n has(toolName: string): boolean {\n return this._discoveredTools.has(toolName);\n }\n\n /**\n * Number of unique tools discovered.\n */\n get size(): number {\n return this._discoveredTools.size;\n }\n\n /**\n * Reset the cache (e.g., on graph reset).\n */\n reset(): void {\n this._discoveredTools.clear();\n this._lastScannedIndex = -1;\n }\n\n /**\n * Seed the cache with previously known tool names (e.g., from prior conversation turns).\n * Does not affect _lastScannedIndex — the next getNewDiscoveries call will still\n * scan all messages from the beginning.\n *\n * @param toolNames - Tool names to pre-seed into the cache\n */\n seed(toolNames: string[]): void {\n for (const name of toolNames) {\n if (this._discoveredTools.size >= TOOL_DISCOVERY_CACHE_MAX_SIZE) {\n break;\n }\n this._discoveredTools.add(name);\n }\n }\n}\n"],"names":[],"mappings":";;;AAiBA;;;;;;;;;;;;;;;;;;;;;;;;;;AA0BG;MACU,kBAAkB,CAAA;;AAErB,IAAA,gBAAgB,GAAgB,IAAI,GAAG,EAAE;;IAEzC,iBAAiB,GAAW,EAAE;AAEtC;;;;;;AAMG;AACH,IAAA,iBAAiB,CAAC,QAAuB,EAAA;AACvC,QAAA,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC,EAAE;AACzB,YAAA,OAAO,EAAE;QACX;AAEA,QAAA,MAAM,UAAU,GAAG,IAAI,CAAC,iBAAiB,GAAG,CAAC;AAC7C,QAAA,IAAI,UAAU,IAAI,QAAQ,CAAC,MAAM,EAAE;AACjC,YAAA,OAAO,EAAE;QACX;QAEA,MAAM,cAAc,GAAa,EAAE;AAEnC,QAAA,KAAK,IAAI,CAAC,GAAG,UAAU,EAAE,CAAC,GAAG,QAAQ,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE;AACjD,YAAA,MAAM,GAAG,GAAG,QAAQ,CAAC,CAAC,CAAC;YACvB,IAAI,GAAG,CAAC,OAAO,EAAE,KAAK,YAAY,CAAC,IAAI,EAAE;gBACvC;YACF;;YAGA,IAAK,GAAyB,CAAC,IAAI,KAAK,SAAS,CAAC,WAAW,EAAE;gBAC7D;YACF;;AAGA,YAAA,MAAM,QAAQ,GAAI,GAA8B,CAAC,QAAQ;YACzD,IAAI,OAAO,QAAQ,KAAK,QAAQ,IAAI,QAAQ,IAAI,IAAI,EAAE;AACpD,gBAAA,MAAM,IAAI,GACR,QACD,CAAC,eAAe;gBACjB,IAAI,IAAI,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC,EAAE;AAC3B,oBAAA,KAAK,MAAM,GAAG,IAAI,IAAI,EAAE;AACtB,wBAAA,IAAI,CAAC,IAAI,CAAC,gBAAgB,CAAC,GAAG,CAAC,GAAG,CAAC,SAAS,CAAC,EAAE;;4BAE7C,IAAI,IAAI,CAAC,gBAAgB,CAAC,IAAI,IAAI,6BAA6B,EAAE;gCAC/D;4BACF;4BACA,IAAI,CAAC,gBAAgB,CAAC,GAAG,CAAC,GAAG,CAAC,SAAS,CAAC;AACxC,4BAAA,cAAc,CAAC,IAAI,CAAC,GAAG,CAAC,SAAS,CAAC;wBACpC;oBACF;gBACF;YACF;QACF;QAEA,IAAI,CAAC,iBAAiB,GAAG,QAAQ,CAAC,MAAM,GAAG,CAAC;AAC5C,QAAA,OAAO,cAAc;IACvB;AAEA;;AAEG;IACH,qBAAqB,GAAA;AACnB,QAAA,OAAO,CAAC,GAAG,IAAI,CAAC,gBAAgB,CAAC;IACnC;AAEA;;AAEG;AACH,IAAA,GAAG,CAAC,QAAgB,EAAA;QAClB,OAAO,IAAI,CAAC,gBAAgB,CAAC,GAAG,CAAC,QAAQ,CAAC;IAC5C;AAEA;;AAEG;AACH,IAAA,IAAI,IAAI,GAAA;AACN,QAAA,OAAO,IAAI,CAAC,gBAAgB,CAAC,IAAI;IACnC;AAEA;;AAEG;IACH,KAAK,GAAA;AACH,QAAA,IAAI,CAAC,gBAAgB,CAAC,KAAK,EAAE;AAC7B,QAAA,IAAI,CAAC,iBAAiB,GAAG,EAAE;IAC7B;AAEA;;;;;;AAMG;AACH,IAAA,IAAI,CAAC,SAAmB,EAAA;AACtB,QAAA,KAAK,MAAM,IAAI,IAAI,SAAS,EAAE;YAC5B,IAAI,IAAI,CAAC,gBAAgB,CAAC,IAAI,IAAI,6BAA6B,EAAE;gBAC/D;YACF;AACA,YAAA,IAAI,CAAC,gBAAgB,CAAC,GAAG,CAAC,IAAI,CAAC;QACjC;IACF;AACD;;;;"}
@@ -116,7 +116,9 @@ export declare class AgentContext {
116
116
  summarizeCallback?: (messages: BaseMessage[]) => Promise<string | undefined>;
117
117
  /** Pre-existing summary loaded from persistent storage, injected into context on new turns */
118
118
  persistedSummary?: string;
119
- constructor({ agentId, name, description, provider, clientOptions, maxContextTokens, streamBuffer, tokenCounter, tools, toolMap, toolRegistry, toolDefinitions, instructions, additionalInstructions, dynamicContext, reasoningKey, toolEnd, instructionTokens, useLegacyContent, structuredOutput, discoveredTools, summarizeCallback, persistedSummary, }: {
119
+ /** Summarization configuration controlling trigger strategy, reserve ratio, and EMA calibration */
120
+ summarizationConfig?: t.SummarizationConfig;
121
+ constructor({ agentId, name, description, provider, clientOptions, maxContextTokens, streamBuffer, tokenCounter, tools, toolMap, toolRegistry, toolDefinitions, instructions, additionalInstructions, dynamicContext, reasoningKey, toolEnd, instructionTokens, useLegacyContent, structuredOutput, discoveredTools, summarizeCallback, persistedSummary, summarizationConfig, }: {
120
122
  agentId: string;
121
123
  name?: string;
122
124
  description?: string;
@@ -140,6 +142,7 @@ export declare class AgentContext {
140
142
  discoveredTools?: string[];
141
143
  summarizeCallback?: (messages: BaseMessage[]) => Promise<string | undefined>;
142
144
  persistedSummary?: string;
145
+ summarizationConfig?: t.SummarizationConfig;
143
146
  });
144
147
  /**
145
148
  * Checks if structured output mode is enabled for this agent.
@@ -30,3 +30,38 @@ export declare const MULTI_DOCUMENT_THRESHOLD = 3;
30
30
  * 0.9 = 10% reserved for safety.
31
31
  */
32
32
  export declare const CONTEXT_SAFETY_BUFFER = 0.9;
33
+ /**
34
+ * Default context utilization percentage (0-100) at which summarization triggers.
35
+ * When the context window is ≥80% full, pruning + summarization activates.
36
+ */
37
+ export declare const SUMMARIZATION_CONTEXT_THRESHOLD = 80;
38
+ /**
39
+ * Default reserve ratio (0-1) — fraction of context window to preserve as recent messages.
40
+ * 0.3 means 30% of the context budget is reserved for the most recent messages,
41
+ * ensuring the model always has immediate conversation history even after aggressive pruning.
42
+ */
43
+ export declare const SUMMARIZATION_RESERVE_RATIO = 0.3;
44
+ /**
45
+ * Default EMA (Exponential Moving Average) alpha for pruning calibration.
46
+ * Controls how quickly the calibration adapts to new token counts.
47
+ * Higher α = faster adaptation (more responsive to recent changes).
48
+ * Lower α = smoother adaptation (more stable across iterations).
49
+ * 0.3 provides a balance between responsiveness and stability.
50
+ */
51
+ export declare const PRUNING_EMA_ALPHA = 0.3;
52
+ /**
53
+ * Default initial calibration ratio for EMA pruning.
54
+ * 1.0 means no adjustment on the first iteration (trust the raw token counts).
55
+ * Subsequent iterations will adjust based on actual vs. estimated token usage.
56
+ */
57
+ export declare const PRUNING_INITIAL_CALIBRATION = 1;
58
+ /**
59
+ * Maximum number of tool discovery entries to cache per conversation.
60
+ * Prevents unbounded memory growth in very long conversations.
61
+ */
62
+ export declare const TOOL_DISCOVERY_CACHE_MAX_SIZE = 200;
63
+ /**
64
+ * Maximum length of system message content to hash for deduplication.
65
+ * Messages longer than this are always considered unique (hashing would be expensive).
66
+ */
67
+ export declare const DEDUP_MAX_CONTENT_LENGTH = 10000;
@@ -73,6 +73,13 @@ export declare class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode>
73
73
  runId: string | undefined;
74
74
  startIndex: number;
75
75
  signal?: AbortSignal;
76
+ /** Cached summary from the first prune in this run.
77
+ * Reused for subsequent prunes to avoid blocking LLM calls on every tool iteration. */
78
+ private _cachedRunSummary;
79
+ /** EMA-based pruning calibration state — smooths token budget adjustments across iterations */
80
+ private _pruneCalibration;
81
+ /** Run-scoped tool discovery cache — avoids re-parsing conversation history on every iteration */
82
+ private _toolDiscoveryCache;
76
83
  /** Map of agent contexts by agent ID */
77
84
  agentContexts: Map<string, AgentContext>;
78
85
  /** Default agent ID to use */
@@ -105,6 +112,24 @@ export declare class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode>
105
112
  * @returns Shallow-cloned clientOptions with reduced thinking budget, or the original if no reduction needed
106
113
  */
107
114
  getAdaptiveClientOptions(clientOptions: t.ClientOptions, provider: Providers): t.ClientOptions;
115
+ /**
116
+ * Determines whether summarization should trigger based on SummarizationConfig.
117
+ *
118
+ * Supports three trigger strategies:
119
+ * - contextPercentage (default): Trigger when context utilization >= threshold%
120
+ * - messageCount: Trigger when pruned message count >= threshold
121
+ * - tokenThreshold: Trigger when total estimated tokens >= threshold
122
+ *
123
+ * When no config is provided, always triggers (preserves backward compatibility).
124
+ *
125
+ * @param prunedMessageCount - Number of messages that were pruned
126
+ * @param maxContextTokens - Maximum context token budget
127
+ * @param indexTokenCountMap - Token count map by message index
128
+ * @param instructionTokens - Token count for instructions/system message
129
+ * @param config - Optional SummarizationConfig
130
+ * @returns Whether summarization should be triggered
131
+ */
132
+ private shouldTriggerSummarization;
108
133
  /**
109
134
  * Returns the normalized finish/stop reason from the last LLM invocation.
110
135
  * Used by callers to detect when the response was truncated due to max_tokens.
@@ -0,0 +1,25 @@
1
+ import type { BaseMessage } from '@langchain/core/messages';
2
+ /**
3
+ * Deduplicates consecutive identical system messages in the context window.
4
+ *
5
+ * Problem: In long tool-use chains, the same system messages (e.g., post-prune notes,
6
+ * conversation summaries) can accumulate when the context is rebuilt on each iteration.
7
+ * These duplicates waste tokens without adding information.
8
+ *
9
+ * Strategy: Only deduplicate system messages that appear consecutively or are exact
10
+ * duplicates of an earlier system message. The FIRST occurrence is always kept.
11
+ * Non-system messages (human, ai, tool) are never touched.
12
+ *
13
+ * Important constraints:
14
+ * - The first system message (index 0) is ALWAYS preserved (it's the main system prompt)
15
+ * - Only system messages are candidates for deduplication
16
+ * - Messages with content longer than DEDUP_MAX_CONTENT_LENGTH are skipped (too expensive to compare)
17
+ * - Content comparison is by string equality (fast and deterministic)
18
+ *
19
+ * @param messages - The message array to deduplicate (not mutated)
20
+ * @returns A new array with duplicate system messages removed, and the count of removed messages
21
+ */
22
+ export declare function deduplicateSystemMessages(messages: BaseMessage[]): {
23
+ messages: BaseMessage[];
24
+ removedCount: number;
25
+ };
@@ -6,3 +6,4 @@ export * from './cache';
6
6
  export * from './content';
7
7
  export * from './tools';
8
8
  export * from './summarize';
9
+ export * from './dedup';
@@ -343,6 +343,63 @@ export interface StructuredOutputInput {
343
343
  /** Whether to enforce strict schema validation */
344
344
  strict?: boolean;
345
345
  }
346
+ /**
347
+ * Trigger strategy for when summarization should activate.
348
+ * - 'contextPercentage': Trigger when context utilization exceeds a threshold percentage
349
+ * - 'messageCount': Trigger when pruned message count exceeds a threshold
350
+ * - 'tokenThreshold': Trigger when total token count exceeds a raw threshold
351
+ */
352
+ export type SummarizationTriggerType = 'contextPercentage' | 'messageCount' | 'tokenThreshold';
353
+ /**
354
+ * Configuration for summarization behavior within the agent pipeline.
355
+ * All fields are optional — sensible defaults are provided via constants.
356
+ *
357
+ * @see SUMMARIZATION_CONTEXT_THRESHOLD, SUMMARIZATION_RESERVE_RATIO, PRUNING_EMA_ALPHA
358
+ */
359
+ export interface SummarizationConfig {
360
+ /**
361
+ * Strategy for when summarization triggers.
362
+ * @default 'contextPercentage'
363
+ */
364
+ triggerType?: SummarizationTriggerType;
365
+ /**
366
+ * Threshold value interpreted based on triggerType:
367
+ * - contextPercentage: 0-100 (percentage of context window)
368
+ * - messageCount: absolute count of messages pruned
369
+ * - tokenThreshold: absolute token count
370
+ * @default 80 (for contextPercentage)
371
+ */
372
+ triggerThreshold?: number;
373
+ /**
374
+ * Fraction of context window (0-1) reserved for recent messages.
375
+ * Prevents over-pruning by ensuring at least this fraction of the
376
+ * context budget is preserved as recent conversation history.
377
+ * @default 0.3
378
+ */
379
+ reserveRatio?: number;
380
+ /**
381
+ * Whether context pruning is enabled (can be disabled for debugging).
382
+ * @default true
383
+ */
384
+ contextPruning?: boolean;
385
+ /**
386
+ * Initial summary text to seed across runs.
387
+ * Different from persistedSummary: this is provided by the caller as a
388
+ * cross-conversation seed (e.g., agent personality or recurring context),
389
+ * while persistedSummary is loaded from the conversation's own history.
390
+ */
391
+ initialSummary?: string;
392
+ }
393
+ /**
394
+ * Runtime state for EMA-based pruning calibration.
395
+ * Maintained across iterations within a single run to smooth pruning decisions.
396
+ */
397
+ export interface PruneCalibrationState {
398
+ /** Current EMA calibration ratio */
399
+ ratio: number;
400
+ /** Number of calibration updates applied */
401
+ iterations: number;
402
+ }
346
403
  export interface AgentInputs {
347
404
  agentId: string;
348
405
  /** Human-readable name for the agent (used in handoff context). Defaults to agentId if not provided. */
@@ -412,4 +469,10 @@ export interface AgentInputs {
412
469
  * Set by Ranger's SummaryStore when resuming a conversation.
413
470
  */
414
471
  persistedSummary?: string;
472
+ /**
473
+ * Summarization configuration controlling trigger strategy, reserve ratio,
474
+ * and EMA calibration for pruning. When omitted, sensible defaults apply.
475
+ * @see SummarizationConfig
476
+ */
477
+ summarizationConfig?: SummarizationConfig;
415
478
  }
@@ -9,3 +9,5 @@ export * from './contextAnalytics';
9
9
  export * from './schema';
10
10
  export * from './toolCallContinuation';
11
11
  export * from './contextPressure';
12
+ export * from './toolDiscoveryCache';
13
+ export * from './pruneCalibration';
@@ -0,0 +1,43 @@
1
+ import type { PruneCalibrationState } from '@/types/graph';
2
+ /**
3
+ * Creates an initial pruning calibration state.
4
+ *
5
+ * @param initialRatio - Starting calibration ratio (default: 1.0)
6
+ * @returns Fresh calibration state
7
+ */
8
+ export declare function createPruneCalibration(initialRatio?: number): PruneCalibrationState;
9
+ /**
10
+ * Updates the pruning calibration using Exponential Moving Average (EMA).
11
+ *
12
+ * Problem: Without calibration, the pruner's token estimates can diverge from
13
+ * reality across iterations, causing either:
14
+ * - Over-pruning (context cliff): Too many messages removed at once, losing critical tool results
15
+ * - Under-pruning: Not enough messages removed, hitting hard token limits
16
+ *
17
+ * Solution: Track the ratio between actual token usage (from API response) and
18
+ * estimated token usage (from our token counter). Apply EMA smoothing so the
19
+ * calibration adjusts gradually, preventing oscillation.
20
+ *
21
+ * The calibration ratio is applied to maxTokens in the pruner:
22
+ * effectiveMaxTokens = maxTokens * calibrationRatio
23
+ *
24
+ * If actual > estimated → ratio decreases → prune more aggressively
25
+ * If actual < estimated → ratio increases → prune less aggressively
26
+ *
27
+ * @param state - Current calibration state
28
+ * @param actualTokens - Actual token count from API response (UsageMetadata)
29
+ * @param estimatedTokens - Estimated token count from token counter
30
+ * @param alpha - EMA smoothing factor (default: PRUNING_EMA_ALPHA)
31
+ * @returns Updated calibration state (new object, does not mutate input)
32
+ */
33
+ export declare function updatePruneCalibration(state: PruneCalibrationState, actualTokens: number, estimatedTokens: number, alpha?: number): PruneCalibrationState;
34
+ /**
35
+ * Applies the calibration ratio to a max token budget.
36
+ * The ratio adjusts the effective budget so pruning is more or less aggressive
37
+ * based on observed vs. estimated token divergence.
38
+ *
39
+ * @param maxTokens - Raw max token budget
40
+ * @param state - Current calibration state
41
+ * @returns Adjusted max token budget
42
+ */
43
+ export declare function applyCalibration(maxTokens: number, state: PruneCalibrationState): number;
@@ -0,0 +1,77 @@
1
+ import type { BaseMessage } from '@langchain/core/messages';
2
+ /**
3
+ * Cached tool discovery entry.
4
+ * Stores the tool name and the message index where it was discovered,
5
+ * enabling efficient lookups without re-parsing conversation history.
6
+ */
7
+ export interface ToolDiscoveryEntry {
8
+ /** The tool name that was discovered */
9
+ toolName: string;
10
+ /** Message index in conversation history where discovery occurred */
11
+ discoveredAtIndex: number;
12
+ }
13
+ /**
14
+ * ToolDiscoveryCache provides a run-scoped cache of tool search results.
15
+ *
16
+ * Problem: Without caching, every LLM iteration re-parses the full message
17
+ * history via extractToolDiscoveries() to find tool_search results. In long
18
+ * conversations with many tool iterations, this is redundant work.
19
+ *
20
+ * Solution: Cache discovered tool names by message index. On each iteration,
21
+ * only scan messages AFTER the last scanned index. Already-seen discoveries
22
+ * are returned from cache instantly.
23
+ *
24
+ * This mirrors the pattern used by VS Code Copilot Chat where tool search
25
+ * results from prior turns are cached to avoid re-discovery.
26
+ *
27
+ * @example
28
+ * ```ts
29
+ * const cache = new ToolDiscoveryCache();
30
+ *
31
+ * // First call: scans all messages
32
+ * const newTools = cache.getNewDiscoveries(messages);
33
+ * // Returns: ['web_search', 'file_read']
34
+ *
35
+ * // Second call (3 new messages added): only scans new messages
36
+ * const moreTools = cache.getNewDiscoveries(messages);
37
+ * // Returns: ['code_exec'] (only newly discovered)
38
+ * ```
39
+ */
40
+ export declare class ToolDiscoveryCache {
41
+ /** Set of all discovered tool names (deduped) */
42
+ private _discoveredTools;
43
+ /** Last message index that was scanned */
44
+ private _lastScannedIndex;
45
+ /**
46
+ * Scan messages for new tool_search results since the last scan.
47
+ * Only processes messages after `_lastScannedIndex` to avoid redundant work.
48
+ *
49
+ * @param messages - Full conversation message array
50
+ * @returns Array of newly discovered tool names (not previously cached)
51
+ */
52
+ getNewDiscoveries(messages: BaseMessage[]): string[];
53
+ /**
54
+ * Returns all tool names discovered so far (across all scans).
55
+ */
56
+ getAllDiscoveredTools(): string[];
57
+ /**
58
+ * Check if a specific tool has been discovered.
59
+ */
60
+ has(toolName: string): boolean;
61
+ /**
62
+ * Number of unique tools discovered.
63
+ */
64
+ get size(): number;
65
+ /**
66
+ * Reset the cache (e.g., on graph reset).
67
+ */
68
+ reset(): void;
69
+ /**
70
+ * Seed the cache with previously known tool names (e.g., from prior conversation turns).
71
+ * Does not affect _lastScannedIndex — the next getNewDiscoveries call will still
72
+ * scan all messages from the beginning.
73
+ *
74
+ * @param toolNames - Tool names to pre-seed into the cache
75
+ */
76
+ seed(toolNames: string[]): void;
77
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@illuma-ai/agents",
3
- "version": "1.0.98",
3
+ "version": "1.1.0",
4
4
  "main": "./dist/cjs/main.cjs",
5
5
  "module": "./dist/esm/main.mjs",
6
6
  "types": "./dist/types/index.d.ts",
@@ -50,6 +50,7 @@ export class AgentContext {
50
50
  discoveredTools,
51
51
  summarizeCallback,
52
52
  persistedSummary,
53
+ summarizationConfig,
53
54
  } = agentConfig;
54
55
 
55
56
  // Normalize structured output: support both camelCase and snake_case inputs
@@ -95,6 +96,7 @@ export class AgentContext {
95
96
  discoveredTools,
96
97
  summarizeCallback,
97
98
  persistedSummary,
99
+ summarizationConfig,
98
100
  });
99
101
 
100
102
  if (tokenCounter) {
@@ -246,6 +248,8 @@ export class AgentContext {
246
248
  summarizeCallback?: (messages: BaseMessage[]) => Promise<string | undefined>;
247
249
  /** Pre-existing summary loaded from persistent storage, injected into context on new turns */
248
250
  persistedSummary?: string;
251
+ /** Summarization configuration controlling trigger strategy, reserve ratio, and EMA calibration */
252
+ summarizationConfig?: t.SummarizationConfig;
249
253
 
250
254
  constructor({
251
255
  agentId,
@@ -271,6 +275,7 @@ export class AgentContext {
271
275
  discoveredTools,
272
276
  summarizeCallback,
273
277
  persistedSummary,
278
+ summarizationConfig,
274
279
  }: {
275
280
  agentId: string;
276
281
  name?: string;
@@ -297,6 +302,7 @@ export class AgentContext {
297
302
  messages: BaseMessage[]
298
303
  ) => Promise<string | undefined>;
299
304
  persistedSummary?: string;
305
+ summarizationConfig?: t.SummarizationConfig;
300
306
  }) {
301
307
  this.agentId = agentId;
302
308
  this.name = name;
@@ -316,6 +322,7 @@ export class AgentContext {
316
322
  this.structuredOutput = structuredOutput;
317
323
  this.summarizeCallback = summarizeCallback;
318
324
  this.persistedSummary = persistedSummary;
325
+ this.summarizationConfig = summarizationConfig;
319
326
  if (reasoningKey) {
320
327
  this.reasoningKey = reasoningKey;
321
328
  }
@@ -45,3 +45,59 @@ export const MULTI_DOCUMENT_THRESHOLD = 3;
45
45
  * 0.9 = 10% reserved for safety.
46
46
  */
47
47
  export const CONTEXT_SAFETY_BUFFER = 0.9;
48
+
49
+ // ============================================================================
50
+ // SUMMARIZATION CONFIGURATION DEFAULTS
51
+ //
52
+ // These constants provide sensible defaults for the SummarizationConfig.
53
+ // They can be overridden per-agent via AgentInputs.summarizationConfig.
54
+ // ============================================================================
55
+
56
+ /**
57
+ * Default context utilization percentage (0-100) at which summarization triggers.
58
+ * When the context window is ≥80% full, pruning + summarization activates.
59
+ */
60
+ export const SUMMARIZATION_CONTEXT_THRESHOLD = 80;
61
+
62
+ /**
63
+ * Default reserve ratio (0-1) — fraction of context window to preserve as recent messages.
64
+ * 0.3 means 30% of the context budget is reserved for the most recent messages,
65
+ * ensuring the model always has immediate conversation history even after aggressive pruning.
66
+ */
67
+ export const SUMMARIZATION_RESERVE_RATIO = 0.3;
68
+
69
+ /**
70
+ * Default EMA (Exponential Moving Average) alpha for pruning calibration.
71
+ * Controls how quickly the calibration adapts to new token counts.
72
+ * Higher α = faster adaptation (more responsive to recent changes).
73
+ * Lower α = smoother adaptation (more stable across iterations).
74
+ * 0.3 provides a balance between responsiveness and stability.
75
+ */
76
+ export const PRUNING_EMA_ALPHA = 0.3;
77
+
78
+ /**
79
+ * Default initial calibration ratio for EMA pruning.
80
+ * 1.0 means no adjustment on the first iteration (trust the raw token counts).
81
+ * Subsequent iterations will adjust based on actual vs. estimated token usage.
82
+ */
83
+ export const PRUNING_INITIAL_CALIBRATION = 1.0;
84
+
85
+ // ============================================================================
86
+ // TOOL DISCOVERY CACHING
87
+ // ============================================================================
88
+
89
+ /**
90
+ * Maximum number of tool discovery entries to cache per conversation.
91
+ * Prevents unbounded memory growth in very long conversations.
92
+ */
93
+ export const TOOL_DISCOVERY_CACHE_MAX_SIZE = 200;
94
+
95
+ // ============================================================================
96
+ // MESSAGE DEDUPLICATION
97
+ // ============================================================================
98
+
99
+ /**
100
+ * Maximum length of system message content to hash for deduplication.
101
+ * Messages longer than this are always considered unique (hashing would be expensive).
102
+ */
103
+ export const DEDUP_MAX_CONTENT_LENGTH = 10000;