@illuma-ai/agents 1.0.96 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cjs/agents/AgentContext.cjs +6 -2
- package/dist/cjs/agents/AgentContext.cjs.map +1 -1
- package/dist/cjs/common/constants.cjs +78 -0
- package/dist/cjs/common/constants.cjs.map +1 -1
- package/dist/cjs/graphs/Graph.cjs +191 -165
- package/dist/cjs/graphs/Graph.cjs.map +1 -1
- package/dist/cjs/main.cjs +22 -0
- package/dist/cjs/main.cjs.map +1 -1
- package/dist/cjs/messages/dedup.cjs +95 -0
- package/dist/cjs/messages/dedup.cjs.map +1 -0
- package/dist/cjs/tools/CodeExecutor.cjs +22 -3
- package/dist/cjs/tools/CodeExecutor.cjs.map +1 -1
- package/dist/cjs/types/graph.cjs.map +1 -1
- package/dist/cjs/utils/contextPressure.cjs +154 -0
- package/dist/cjs/utils/contextPressure.cjs.map +1 -0
- package/dist/cjs/utils/pruneCalibration.cjs +78 -0
- package/dist/cjs/utils/pruneCalibration.cjs.map +1 -0
- package/dist/cjs/utils/run.cjs.map +1 -1
- package/dist/cjs/utils/tokens.cjs.map +1 -1
- package/dist/cjs/utils/toolDiscoveryCache.cjs +127 -0
- package/dist/cjs/utils/toolDiscoveryCache.cjs.map +1 -0
- package/dist/esm/agents/AgentContext.mjs +6 -2
- package/dist/esm/agents/AgentContext.mjs.map +1 -1
- package/dist/esm/common/constants.mjs +71 -1
- package/dist/esm/common/constants.mjs.map +1 -1
- package/dist/esm/graphs/Graph.mjs +192 -166
- package/dist/esm/graphs/Graph.mjs.map +1 -1
- package/dist/esm/main.mjs +5 -1
- package/dist/esm/main.mjs.map +1 -1
- package/dist/esm/messages/dedup.mjs +93 -0
- package/dist/esm/messages/dedup.mjs.map +1 -0
- package/dist/esm/tools/CodeExecutor.mjs +22 -3
- package/dist/esm/tools/CodeExecutor.mjs.map +1 -1
- package/dist/esm/types/graph.mjs.map +1 -1
- package/dist/esm/utils/contextPressure.mjs +148 -0
- package/dist/esm/utils/contextPressure.mjs.map +1 -0
- package/dist/esm/utils/pruneCalibration.mjs +74 -0
- package/dist/esm/utils/pruneCalibration.mjs.map +1 -0
- package/dist/esm/utils/run.mjs.map +1 -1
- package/dist/esm/utils/tokens.mjs.map +1 -1
- package/dist/esm/utils/toolDiscoveryCache.mjs +125 -0
- package/dist/esm/utils/toolDiscoveryCache.mjs.map +1 -0
- package/dist/types/agents/AgentContext.d.ts +4 -1
- package/dist/types/common/constants.d.ts +49 -0
- package/dist/types/graphs/Graph.d.ts +25 -0
- package/dist/types/messages/dedup.d.ts +25 -0
- package/dist/types/messages/index.d.ts +1 -0
- package/dist/types/types/graph.d.ts +63 -0
- package/dist/types/utils/contextPressure.d.ts +72 -0
- package/dist/types/utils/index.d.ts +3 -0
- package/dist/types/utils/pruneCalibration.d.ts +43 -0
- package/dist/types/utils/toolDiscoveryCache.d.ts +77 -0
- package/package.json +1 -1
- package/src/agents/AgentContext.ts +7 -0
- package/src/common/constants.ts +82 -0
- package/src/graphs/Graph.ts +254 -208
- package/src/graphs/contextManagement.e2e.test.ts +28 -20
- package/src/graphs/gapFeatures.test.ts +520 -0
- package/src/graphs/nonBlockingSummarization.test.ts +307 -0
- package/src/messages/__tests__/dedup.test.ts +166 -0
- package/src/messages/dedup.ts +104 -0
- package/src/messages/index.ts +1 -0
- package/src/specs/agent-handoffs-bedrock.integration.test.ts +7 -7
- package/src/specs/agent-handoffs.test.ts +36 -36
- package/src/specs/thinking-handoff.test.ts +10 -10
- package/src/tools/CodeExecutor.ts +22 -3
- package/src/types/graph.ts +73 -0
- package/src/utils/__tests__/pruneCalibration.test.ts +148 -0
- package/src/utils/__tests__/toolDiscoveryCache.test.ts +214 -0
- package/src/utils/contextPressure.test.ts +262 -0
- package/src/utils/contextPressure.ts +188 -0
- package/src/utils/index.ts +3 -0
- package/src/utils/pruneCalibration.ts +92 -0
- package/src/utils/run.ts +108 -108
- package/src/utils/tokens.ts +118 -118
- package/src/utils/toolDiscoveryCache.ts +150 -0
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
import type { BaseMessage } from '@langchain/core/messages';
|
|
2
|
+
/**
|
|
3
|
+
* Cached tool discovery entry.
|
|
4
|
+
* Stores the tool name and the message index where it was discovered,
|
|
5
|
+
* enabling efficient lookups without re-parsing conversation history.
|
|
6
|
+
*/
|
|
7
|
+
export interface ToolDiscoveryEntry {
|
|
8
|
+
/** The tool name that was discovered */
|
|
9
|
+
toolName: string;
|
|
10
|
+
/** Message index in conversation history where discovery occurred */
|
|
11
|
+
discoveredAtIndex: number;
|
|
12
|
+
}
|
|
13
|
+
/**
|
|
14
|
+
* ToolDiscoveryCache provides a run-scoped cache of tool search results.
|
|
15
|
+
*
|
|
16
|
+
* Problem: Without caching, every LLM iteration re-parses the full message
|
|
17
|
+
* history via extractToolDiscoveries() to find tool_search results. In long
|
|
18
|
+
* conversations with many tool iterations, this is redundant work.
|
|
19
|
+
*
|
|
20
|
+
* Solution: Cache discovered tool names by message index. On each iteration,
|
|
21
|
+
* only scan messages AFTER the last scanned index. Already-seen discoveries
|
|
22
|
+
* are returned from cache instantly.
|
|
23
|
+
*
|
|
24
|
+
* This mirrors the pattern used by VS Code Copilot Chat where tool search
|
|
25
|
+
* results from prior turns are cached to avoid re-discovery.
|
|
26
|
+
*
|
|
27
|
+
* @example
|
|
28
|
+
* ```ts
|
|
29
|
+
* const cache = new ToolDiscoveryCache();
|
|
30
|
+
*
|
|
31
|
+
* // First call: scans all messages
|
|
32
|
+
* const newTools = cache.getNewDiscoveries(messages);
|
|
33
|
+
* // Returns: ['web_search', 'file_read']
|
|
34
|
+
*
|
|
35
|
+
* // Second call (3 new messages added): only scans new messages
|
|
36
|
+
* const moreTools = cache.getNewDiscoveries(messages);
|
|
37
|
+
* // Returns: ['code_exec'] (only newly discovered)
|
|
38
|
+
* ```
|
|
39
|
+
*/
|
|
40
|
+
export declare class ToolDiscoveryCache {
|
|
41
|
+
/** Set of all discovered tool names (deduped) */
|
|
42
|
+
private _discoveredTools;
|
|
43
|
+
/** Last message index that was scanned */
|
|
44
|
+
private _lastScannedIndex;
|
|
45
|
+
/**
|
|
46
|
+
* Scan messages for new tool_search results since the last scan.
|
|
47
|
+
* Only processes messages after `_lastScannedIndex` to avoid redundant work.
|
|
48
|
+
*
|
|
49
|
+
* @param messages - Full conversation message array
|
|
50
|
+
* @returns Array of newly discovered tool names (not previously cached)
|
|
51
|
+
*/
|
|
52
|
+
getNewDiscoveries(messages: BaseMessage[]): string[];
|
|
53
|
+
/**
|
|
54
|
+
* Returns all tool names discovered so far (across all scans).
|
|
55
|
+
*/
|
|
56
|
+
getAllDiscoveredTools(): string[];
|
|
57
|
+
/**
|
|
58
|
+
* Check if a specific tool has been discovered.
|
|
59
|
+
*/
|
|
60
|
+
has(toolName: string): boolean;
|
|
61
|
+
/**
|
|
62
|
+
* Number of unique tools discovered.
|
|
63
|
+
*/
|
|
64
|
+
get size(): number;
|
|
65
|
+
/**
|
|
66
|
+
* Reset the cache (e.g., on graph reset).
|
|
67
|
+
*/
|
|
68
|
+
reset(): void;
|
|
69
|
+
/**
|
|
70
|
+
* Seed the cache with previously known tool names (e.g., from prior conversation turns).
|
|
71
|
+
* Does not affect _lastScannedIndex — the next getNewDiscoveries call will still
|
|
72
|
+
* scan all messages from the beginning.
|
|
73
|
+
*
|
|
74
|
+
* @param toolNames - Tool names to pre-seed into the cache
|
|
75
|
+
*/
|
|
76
|
+
seed(toolNames: string[]): void;
|
|
77
|
+
}
|
package/package.json
CHANGED
|
@@ -50,6 +50,7 @@ export class AgentContext {
|
|
|
50
50
|
discoveredTools,
|
|
51
51
|
summarizeCallback,
|
|
52
52
|
persistedSummary,
|
|
53
|
+
summarizationConfig,
|
|
53
54
|
} = agentConfig;
|
|
54
55
|
|
|
55
56
|
// Normalize structured output: support both camelCase and snake_case inputs
|
|
@@ -95,6 +96,7 @@ export class AgentContext {
|
|
|
95
96
|
discoveredTools,
|
|
96
97
|
summarizeCallback,
|
|
97
98
|
persistedSummary,
|
|
99
|
+
summarizationConfig,
|
|
98
100
|
});
|
|
99
101
|
|
|
100
102
|
if (tokenCounter) {
|
|
@@ -246,6 +248,8 @@ export class AgentContext {
|
|
|
246
248
|
summarizeCallback?: (messages: BaseMessage[]) => Promise<string | undefined>;
|
|
247
249
|
/** Pre-existing summary loaded from persistent storage, injected into context on new turns */
|
|
248
250
|
persistedSummary?: string;
|
|
251
|
+
/** Summarization configuration controlling trigger strategy, reserve ratio, and EMA calibration */
|
|
252
|
+
summarizationConfig?: t.SummarizationConfig;
|
|
249
253
|
|
|
250
254
|
constructor({
|
|
251
255
|
agentId,
|
|
@@ -271,6 +275,7 @@ export class AgentContext {
|
|
|
271
275
|
discoveredTools,
|
|
272
276
|
summarizeCallback,
|
|
273
277
|
persistedSummary,
|
|
278
|
+
summarizationConfig,
|
|
274
279
|
}: {
|
|
275
280
|
agentId: string;
|
|
276
281
|
name?: string;
|
|
@@ -297,6 +302,7 @@ export class AgentContext {
|
|
|
297
302
|
messages: BaseMessage[]
|
|
298
303
|
) => Promise<string | undefined>;
|
|
299
304
|
persistedSummary?: string;
|
|
305
|
+
summarizationConfig?: t.SummarizationConfig;
|
|
300
306
|
}) {
|
|
301
307
|
this.agentId = agentId;
|
|
302
308
|
this.name = name;
|
|
@@ -316,6 +322,7 @@ export class AgentContext {
|
|
|
316
322
|
this.structuredOutput = structuredOutput;
|
|
317
323
|
this.summarizeCallback = summarizeCallback;
|
|
318
324
|
this.persistedSummary = persistedSummary;
|
|
325
|
+
this.summarizationConfig = summarizationConfig;
|
|
319
326
|
if (reasoningKey) {
|
|
320
327
|
this.reasoningKey = reasoningKey;
|
|
321
328
|
}
|
package/src/common/constants.ts
CHANGED
|
@@ -19,3 +19,85 @@ export const MIN_THINKING_BUDGET = 1024;
|
|
|
19
19
|
* compounding across multi-tool conversations (e.g., 10 tool calls).
|
|
20
20
|
*/
|
|
21
21
|
export const TOOL_TURN_THINKING_BUDGET = 1024;
|
|
22
|
+
|
|
23
|
+
// ============================================================================
|
|
24
|
+
// CONTEXT OVERFLOW MANAGEMENT
|
|
25
|
+
//
|
|
26
|
+
// Context overflow is handled mechanically — no token budget numbers are
|
|
27
|
+
// exposed to the LLM. The system uses: pruning (Graph), summarization
|
|
28
|
+
// (summarizeCallback), and auto-continuation (client.js max_tokens detection).
|
|
29
|
+
//
|
|
30
|
+
// See: docs/context-overflow-architecture.md
|
|
31
|
+
// ============================================================================
|
|
32
|
+
|
|
33
|
+
/**
|
|
34
|
+
* Minimum number of attached documents before the multi-document delegation
|
|
35
|
+
* hint is injected. Below this threshold, the agent processes documents
|
|
36
|
+
* directly within its own context.
|
|
37
|
+
*/
|
|
38
|
+
export const MULTI_DOCUMENT_THRESHOLD = 3;
|
|
39
|
+
|
|
40
|
+
/**
|
|
41
|
+
* Context utilization safety buffer multiplier (0-1).
|
|
42
|
+
* Applied as: effectiveMax = (maxContextTokens - maxOutputTokens) * CONTEXT_SAFETY_BUFFER
|
|
43
|
+
*
|
|
44
|
+
* Reserves headroom so the LLM doesn't hit hard token limits mid-generation.
|
|
45
|
+
* 0.9 = 10% reserved for safety.
|
|
46
|
+
*/
|
|
47
|
+
export const CONTEXT_SAFETY_BUFFER = 0.9;
|
|
48
|
+
|
|
49
|
+
// ============================================================================
|
|
50
|
+
// SUMMARIZATION CONFIGURATION DEFAULTS
|
|
51
|
+
//
|
|
52
|
+
// These constants provide sensible defaults for the SummarizationConfig.
|
|
53
|
+
// They can be overridden per-agent via AgentInputs.summarizationConfig.
|
|
54
|
+
// ============================================================================
|
|
55
|
+
|
|
56
|
+
/**
|
|
57
|
+
* Default context utilization percentage (0-100) at which summarization triggers.
|
|
58
|
+
* When the context window is ≥80% full, pruning + summarization activates.
|
|
59
|
+
*/
|
|
60
|
+
export const SUMMARIZATION_CONTEXT_THRESHOLD = 80;
|
|
61
|
+
|
|
62
|
+
/**
|
|
63
|
+
* Default reserve ratio (0-1) — fraction of context window to preserve as recent messages.
|
|
64
|
+
* 0.3 means 30% of the context budget is reserved for the most recent messages,
|
|
65
|
+
* ensuring the model always has immediate conversation history even after aggressive pruning.
|
|
66
|
+
*/
|
|
67
|
+
export const SUMMARIZATION_RESERVE_RATIO = 0.3;
|
|
68
|
+
|
|
69
|
+
/**
|
|
70
|
+
* Default EMA (Exponential Moving Average) alpha for pruning calibration.
|
|
71
|
+
* Controls how quickly the calibration adapts to new token counts.
|
|
72
|
+
* Higher α = faster adaptation (more responsive to recent changes).
|
|
73
|
+
* Lower α = smoother adaptation (more stable across iterations).
|
|
74
|
+
* 0.3 provides a balance between responsiveness and stability.
|
|
75
|
+
*/
|
|
76
|
+
export const PRUNING_EMA_ALPHA = 0.3;
|
|
77
|
+
|
|
78
|
+
/**
|
|
79
|
+
* Default initial calibration ratio for EMA pruning.
|
|
80
|
+
* 1.0 means no adjustment on the first iteration (trust the raw token counts).
|
|
81
|
+
* Subsequent iterations will adjust based on actual vs. estimated token usage.
|
|
82
|
+
*/
|
|
83
|
+
export const PRUNING_INITIAL_CALIBRATION = 1.0;
|
|
84
|
+
|
|
85
|
+
// ============================================================================
|
|
86
|
+
// TOOL DISCOVERY CACHING
|
|
87
|
+
// ============================================================================
|
|
88
|
+
|
|
89
|
+
/**
|
|
90
|
+
* Maximum number of tool discovery entries to cache per conversation.
|
|
91
|
+
* Prevents unbounded memory growth in very long conversations.
|
|
92
|
+
*/
|
|
93
|
+
export const TOOL_DISCOVERY_CACHE_MAX_SIZE = 200;
|
|
94
|
+
|
|
95
|
+
// ============================================================================
|
|
96
|
+
// MESSAGE DEDUPLICATION
|
|
97
|
+
// ============================================================================
|
|
98
|
+
|
|
99
|
+
/**
|
|
100
|
+
* Maximum length of system message content to hash for deduplication.
|
|
101
|
+
* Messages longer than this are always considered unique (hashing would be expensive).
|
|
102
|
+
*/
|
|
103
|
+
export const DEDUP_MAX_CONTENT_LENGTH = 10000;
|