@illuma-ai/agents 1.0.96 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cjs/agents/AgentContext.cjs +6 -2
- package/dist/cjs/agents/AgentContext.cjs.map +1 -1
- package/dist/cjs/common/constants.cjs +78 -0
- package/dist/cjs/common/constants.cjs.map +1 -1
- package/dist/cjs/graphs/Graph.cjs +191 -165
- package/dist/cjs/graphs/Graph.cjs.map +1 -1
- package/dist/cjs/main.cjs +22 -0
- package/dist/cjs/main.cjs.map +1 -1
- package/dist/cjs/messages/dedup.cjs +95 -0
- package/dist/cjs/messages/dedup.cjs.map +1 -0
- package/dist/cjs/tools/CodeExecutor.cjs +22 -3
- package/dist/cjs/tools/CodeExecutor.cjs.map +1 -1
- package/dist/cjs/types/graph.cjs.map +1 -1
- package/dist/cjs/utils/contextPressure.cjs +154 -0
- package/dist/cjs/utils/contextPressure.cjs.map +1 -0
- package/dist/cjs/utils/pruneCalibration.cjs +78 -0
- package/dist/cjs/utils/pruneCalibration.cjs.map +1 -0
- package/dist/cjs/utils/run.cjs.map +1 -1
- package/dist/cjs/utils/tokens.cjs.map +1 -1
- package/dist/cjs/utils/toolDiscoveryCache.cjs +127 -0
- package/dist/cjs/utils/toolDiscoveryCache.cjs.map +1 -0
- package/dist/esm/agents/AgentContext.mjs +6 -2
- package/dist/esm/agents/AgentContext.mjs.map +1 -1
- package/dist/esm/common/constants.mjs +71 -1
- package/dist/esm/common/constants.mjs.map +1 -1
- package/dist/esm/graphs/Graph.mjs +192 -166
- package/dist/esm/graphs/Graph.mjs.map +1 -1
- package/dist/esm/main.mjs +5 -1
- package/dist/esm/main.mjs.map +1 -1
- package/dist/esm/messages/dedup.mjs +93 -0
- package/dist/esm/messages/dedup.mjs.map +1 -0
- package/dist/esm/tools/CodeExecutor.mjs +22 -3
- package/dist/esm/tools/CodeExecutor.mjs.map +1 -1
- package/dist/esm/types/graph.mjs.map +1 -1
- package/dist/esm/utils/contextPressure.mjs +148 -0
- package/dist/esm/utils/contextPressure.mjs.map +1 -0
- package/dist/esm/utils/pruneCalibration.mjs +74 -0
- package/dist/esm/utils/pruneCalibration.mjs.map +1 -0
- package/dist/esm/utils/run.mjs.map +1 -1
- package/dist/esm/utils/tokens.mjs.map +1 -1
- package/dist/esm/utils/toolDiscoveryCache.mjs +125 -0
- package/dist/esm/utils/toolDiscoveryCache.mjs.map +1 -0
- package/dist/types/agents/AgentContext.d.ts +4 -1
- package/dist/types/common/constants.d.ts +49 -0
- package/dist/types/graphs/Graph.d.ts +25 -0
- package/dist/types/messages/dedup.d.ts +25 -0
- package/dist/types/messages/index.d.ts +1 -0
- package/dist/types/types/graph.d.ts +63 -0
- package/dist/types/utils/contextPressure.d.ts +72 -0
- package/dist/types/utils/index.d.ts +3 -0
- package/dist/types/utils/pruneCalibration.d.ts +43 -0
- package/dist/types/utils/toolDiscoveryCache.d.ts +77 -0
- package/package.json +1 -1
- package/src/agents/AgentContext.ts +7 -0
- package/src/common/constants.ts +82 -0
- package/src/graphs/Graph.ts +254 -208
- package/src/graphs/contextManagement.e2e.test.ts +28 -20
- package/src/graphs/gapFeatures.test.ts +520 -0
- package/src/graphs/nonBlockingSummarization.test.ts +307 -0
- package/src/messages/__tests__/dedup.test.ts +166 -0
- package/src/messages/dedup.ts +104 -0
- package/src/messages/index.ts +1 -0
- package/src/specs/agent-handoffs-bedrock.integration.test.ts +7 -7
- package/src/specs/agent-handoffs.test.ts +36 -36
- package/src/specs/thinking-handoff.test.ts +10 -10
- package/src/tools/CodeExecutor.ts +22 -3
- package/src/types/graph.ts +73 -0
- package/src/utils/__tests__/pruneCalibration.test.ts +148 -0
- package/src/utils/__tests__/toolDiscoveryCache.test.ts +214 -0
- package/src/utils/contextPressure.test.ts +262 -0
- package/src/utils/contextPressure.ts +188 -0
- package/src/utils/index.ts +3 -0
- package/src/utils/pruneCalibration.ts +92 -0
- package/src/utils/run.ts +108 -108
- package/src/utils/tokens.ts +118 -118
- package/src/utils/toolDiscoveryCache.ts +150 -0
package/src/graphs/Graph.ts
CHANGED
|
@@ -34,9 +34,9 @@ import type * as t from '@/types';
|
|
|
34
34
|
import {
|
|
35
35
|
formatAnthropicArtifactContent,
|
|
36
36
|
ensureThinkingBlockInMessages,
|
|
37
|
+
deduplicateSystemMessages,
|
|
37
38
|
convertMessagesToContent,
|
|
38
39
|
addBedrockCacheControl,
|
|
39
|
-
extractToolDiscoveries,
|
|
40
40
|
modifyDeltaProperties,
|
|
41
41
|
formatArtifactPayload,
|
|
42
42
|
formatContentStrings,
|
|
@@ -53,14 +53,20 @@ import {
|
|
|
53
53
|
MessageTypes,
|
|
54
54
|
Constants,
|
|
55
55
|
TOOL_TURN_THINKING_BUDGET,
|
|
56
|
+
SUMMARIZATION_CONTEXT_THRESHOLD,
|
|
56
57
|
} from '@/common';
|
|
57
58
|
import {
|
|
59
|
+
ToolDiscoveryCache,
|
|
58
60
|
resetIfNotEmpty,
|
|
59
61
|
isOpenAILike,
|
|
60
62
|
isGoogleLike,
|
|
61
63
|
joinKeys,
|
|
62
64
|
sleep,
|
|
65
|
+
createPruneCalibration,
|
|
66
|
+
updatePruneCalibration,
|
|
67
|
+
applyCalibration,
|
|
63
68
|
} from '@/utils';
|
|
69
|
+
import type { PruneCalibrationState } from '@/types/graph';
|
|
64
70
|
import {
|
|
65
71
|
buildContextAnalytics,
|
|
66
72
|
type ContextAnalytics,
|
|
@@ -69,6 +75,13 @@ import { getChatModelClass, manualToolStreamProviders } from '@/llm/providers';
|
|
|
69
75
|
import { ToolNode as CustomToolNode, toolsCondition } from '@/tools/ToolNode';
|
|
70
76
|
import { ChatOpenAI, AzureChatOpenAI } from '@/llm/openai';
|
|
71
77
|
import { safeDispatchCustomEvent } from '@/utils/events';
|
|
78
|
+
import {
|
|
79
|
+
detectDocuments,
|
|
80
|
+
shouldInjectMultiDocHint,
|
|
81
|
+
buildMultiDocHintContent,
|
|
82
|
+
buildPostPruneNote,
|
|
83
|
+
hasTaskTool,
|
|
84
|
+
} from '@/utils/contextPressure';
|
|
72
85
|
import { createSchemaOnlyTools } from '@/tools/schema';
|
|
73
86
|
import { prepareSchemaForProvider } from '@/schemas/validate';
|
|
74
87
|
import { AgentContext } from '@/agents/AgentContext';
|
|
@@ -198,6 +211,13 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
|
|
|
198
211
|
runId: string | undefined;
|
|
199
212
|
startIndex: number = 0;
|
|
200
213
|
signal?: AbortSignal;
|
|
214
|
+
/** Cached summary from the first prune in this run.
|
|
215
|
+
* Reused for subsequent prunes to avoid blocking LLM calls on every tool iteration. */
|
|
216
|
+
private _cachedRunSummary: string | undefined;
|
|
217
|
+
/** EMA-based pruning calibration state — smooths token budget adjustments across iterations */
|
|
218
|
+
private _pruneCalibration: PruneCalibrationState;
|
|
219
|
+
/** Run-scoped tool discovery cache — avoids re-parsing conversation history on every iteration */
|
|
220
|
+
private _toolDiscoveryCache: ToolDiscoveryCache;
|
|
201
221
|
/** Map of agent contexts by agent ID */
|
|
202
222
|
agentContexts: Map<string, AgentContext> = new Map();
|
|
203
223
|
/** Default agent ID to use */
|
|
@@ -232,6 +252,22 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
|
|
|
232
252
|
}
|
|
233
253
|
|
|
234
254
|
this.defaultAgentId = agents[0].agentId;
|
|
255
|
+
|
|
256
|
+
// Seed cached summary from persisted storage so the first prune in a
|
|
257
|
+
// resumed conversation can also skip the synchronous LLM summarization call
|
|
258
|
+
const primaryContext = this.agentContexts.get(this.defaultAgentId);
|
|
259
|
+
if (primaryContext?.persistedSummary) {
|
|
260
|
+
this._cachedRunSummary = primaryContext.persistedSummary;
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
// Initialize EMA pruning calibration
|
|
264
|
+
this._pruneCalibration = createPruneCalibration();
|
|
265
|
+
|
|
266
|
+
// Initialize tool discovery cache, seeded with any pre-existing discoveries
|
|
267
|
+
this._toolDiscoveryCache = new ToolDiscoveryCache();
|
|
268
|
+
if (primaryContext?.discoveredToolNames.size) {
|
|
269
|
+
this._toolDiscoveryCache.seed([...primaryContext.discoveredToolNames]);
|
|
270
|
+
}
|
|
235
271
|
}
|
|
236
272
|
|
|
237
273
|
/* Init */
|
|
@@ -265,6 +301,9 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
|
|
|
265
301
|
new Map()
|
|
266
302
|
);
|
|
267
303
|
this.invokedToolIds = resetIfNotEmpty(this.invokedToolIds, undefined);
|
|
304
|
+
// Reset EMA calibration and tool discovery cache for fresh run
|
|
305
|
+
this._pruneCalibration = createPruneCalibration();
|
|
306
|
+
this._toolDiscoveryCache.reset();
|
|
268
307
|
for (const context of this.agentContexts.values()) {
|
|
269
308
|
context.reset();
|
|
270
309
|
}
|
|
@@ -371,6 +410,70 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
|
|
|
371
410
|
return clientOptions;
|
|
372
411
|
}
|
|
373
412
|
|
|
413
|
+
/**
|
|
414
|
+
* Determines whether summarization should trigger based on SummarizationConfig.
|
|
415
|
+
*
|
|
416
|
+
* Supports three trigger strategies:
|
|
417
|
+
* - contextPercentage (default): Trigger when context utilization >= threshold%
|
|
418
|
+
* - messageCount: Trigger when pruned message count >= threshold
|
|
419
|
+
* - tokenThreshold: Trigger when total estimated tokens >= threshold
|
|
420
|
+
*
|
|
421
|
+
* When no config is provided, always triggers (preserves backward compatibility).
|
|
422
|
+
*
|
|
423
|
+
* @param prunedMessageCount - Number of messages that were pruned
|
|
424
|
+
* @param maxContextTokens - Maximum context token budget
|
|
425
|
+
* @param indexTokenCountMap - Token count map by message index
|
|
426
|
+
* @param instructionTokens - Token count for instructions/system message
|
|
427
|
+
* @param config - Optional SummarizationConfig
|
|
428
|
+
* @returns Whether summarization should be triggered
|
|
429
|
+
*/
|
|
430
|
+
private shouldTriggerSummarization(
|
|
431
|
+
prunedMessageCount: number,
|
|
432
|
+
maxContextTokens: number,
|
|
433
|
+
indexTokenCountMap: Record<string, number | undefined>,
|
|
434
|
+
instructionTokens: number,
|
|
435
|
+
config?: t.SummarizationConfig
|
|
436
|
+
): boolean {
|
|
437
|
+
// No pruned messages means nothing to summarize
|
|
438
|
+
if (prunedMessageCount === 0) {
|
|
439
|
+
return false;
|
|
440
|
+
}
|
|
441
|
+
|
|
442
|
+
// No config = backward compatible (always summarize when messages are pruned)
|
|
443
|
+
if (!config || !config.triggerType) {
|
|
444
|
+
return true;
|
|
445
|
+
}
|
|
446
|
+
|
|
447
|
+
const threshold = config.triggerThreshold;
|
|
448
|
+
|
|
449
|
+
switch (config.triggerType) {
|
|
450
|
+
case 'contextPercentage': {
|
|
451
|
+
if (maxContextTokens <= 0) return true;
|
|
452
|
+
const effectiveThreshold = threshold ?? SUMMARIZATION_CONTEXT_THRESHOLD;
|
|
453
|
+
let totalTokens = instructionTokens;
|
|
454
|
+
for (const key in indexTokenCountMap) {
|
|
455
|
+
totalTokens += indexTokenCountMap[key] ?? 0;
|
|
456
|
+
}
|
|
457
|
+
const utilization = (totalTokens / maxContextTokens) * 100;
|
|
458
|
+
return utilization >= effectiveThreshold;
|
|
459
|
+
}
|
|
460
|
+
case 'messageCount': {
|
|
461
|
+
const effectiveThreshold = threshold ?? 5;
|
|
462
|
+
return prunedMessageCount >= effectiveThreshold;
|
|
463
|
+
}
|
|
464
|
+
case 'tokenThreshold': {
|
|
465
|
+
if (threshold == null) return true;
|
|
466
|
+
let totalTokens = instructionTokens;
|
|
467
|
+
for (const key in indexTokenCountMap) {
|
|
468
|
+
totalTokens += indexTokenCountMap[key] ?? 0;
|
|
469
|
+
}
|
|
470
|
+
return totalTokens >= threshold;
|
|
471
|
+
}
|
|
472
|
+
default:
|
|
473
|
+
return true;
|
|
474
|
+
}
|
|
475
|
+
}
|
|
476
|
+
|
|
374
477
|
/**
|
|
375
478
|
* Returns the normalized finish/stop reason from the last LLM invocation.
|
|
376
479
|
* Used by callers to detect when the response was truncated due to max_tokens.
|
|
@@ -528,9 +631,6 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
|
|
|
528
631
|
|
|
529
632
|
getRunMessages(): BaseMessage[] | undefined {
|
|
530
633
|
const result = this.messages.slice(this.startIndex);
|
|
531
|
-
console.debug(
|
|
532
|
-
`[Graph] getRunMessages() | totalMessages=${this.messages.length} | startIndex=${this.startIndex} | runMessages=${result.length}`
|
|
533
|
-
);
|
|
534
634
|
return result;
|
|
535
635
|
}
|
|
536
636
|
|
|
@@ -1320,10 +1420,15 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
|
|
|
1320
1420
|
messages = [dynamicContextMessage, ackMessage, ...messages];
|
|
1321
1421
|
}
|
|
1322
1422
|
|
|
1323
|
-
//
|
|
1324
|
-
|
|
1325
|
-
|
|
1326
|
-
|
|
1423
|
+
// Tool discovery caching: only scan new messages since last iteration
|
|
1424
|
+
// instead of re-parsing the full history via extractToolDiscoveries()
|
|
1425
|
+
const cachedDiscoveries =
|
|
1426
|
+
this._toolDiscoveryCache.getNewDiscoveries(messages);
|
|
1427
|
+
if (cachedDiscoveries.length > 0) {
|
|
1428
|
+
agentContext.markToolsAsDiscovered(cachedDiscoveries);
|
|
1429
|
+
console.debug(
|
|
1430
|
+
`[Graph:ToolDiscovery] Cached ${cachedDiscoveries.length} new tools (total: ${this._toolDiscoveryCache.size})`
|
|
1431
|
+
);
|
|
1327
1432
|
}
|
|
1328
1433
|
|
|
1329
1434
|
const toolsForBinding = agentContext.getToolsForBinding();
|
|
@@ -1367,45 +1472,12 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
|
|
|
1367
1472
|
|
|
1368
1473
|
// ====================================================================
|
|
1369
1474
|
// PRE-PRUNING DELEGATION CHECK
|
|
1370
|
-
// Before pruning strips messages (losing context), check if we should
|
|
1371
|
-
// delegate instead. If context would be pruned AND the agent has the
|
|
1372
|
-
// task tool, inject a delegation hint and SKIP pruning — preserving
|
|
1373
|
-
// the content for the LLM to understand what to delegate.
|
|
1374
1475
|
// ====================================================================
|
|
1375
|
-
|
|
1376
|
-
|
|
1377
|
-
|
|
1378
|
-
|
|
1379
|
-
|
|
1380
|
-
: '';
|
|
1381
|
-
return toolName === 'task';
|
|
1382
|
-
});
|
|
1383
|
-
|
|
1384
|
-
if (
|
|
1385
|
-
hasTaskToolPrePrune === true &&
|
|
1386
|
-
agentContext.tokenCounter &&
|
|
1387
|
-
agentContext.maxContextTokens != null
|
|
1388
|
-
) {
|
|
1389
|
-
// Estimate total tokens in messages BEFORE pruning
|
|
1390
|
-
let prePruneTokens = 0;
|
|
1391
|
-
for (const msg of messages) {
|
|
1392
|
-
prePruneTokens += agentContext.tokenCounter(msg);
|
|
1393
|
-
}
|
|
1394
|
-
// Add instruction tokens (system prompt)
|
|
1395
|
-
prePruneTokens += agentContext.instructionTokens;
|
|
1396
|
-
|
|
1397
|
-
const prePruneUtilization =
|
|
1398
|
-
(prePruneTokens / agentContext.maxContextTokens) * 100;
|
|
1399
|
-
|
|
1400
|
-
if (prePruneUtilization > 70) {
|
|
1401
|
-
console.warn(
|
|
1402
|
-
`[Graph] PRE-PRUNE delegation check: ${prePruneUtilization.toFixed(1)}% utilization ` +
|
|
1403
|
-
`(${prePruneTokens}/${agentContext.maxContextTokens} tokens). ` +
|
|
1404
|
-
'Injecting delegation hint INSTEAD of pruning.'
|
|
1405
|
-
);
|
|
1406
|
-
delegationInjectedPrePrune = true;
|
|
1407
|
-
}
|
|
1408
|
-
}
|
|
1476
|
+
// Context management is now fully mechanical:
|
|
1477
|
+
// - Pruning always runs when needed (no delegation-based skip)
|
|
1478
|
+
// - Auto-continuation in client.js handles max_tokens finish reason
|
|
1479
|
+
// - LLM never sees raw token numbers (prevents voluntary bail-out)
|
|
1480
|
+
// ====================================================================
|
|
1409
1481
|
|
|
1410
1482
|
if (
|
|
1411
1483
|
!agentContext.pruneMessages &&
|
|
@@ -1426,48 +1498,126 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
|
|
|
1426
1498
|
?.thinking as t.AnthropicClientOptions['thinking']
|
|
1427
1499
|
)?.type === 'enabled');
|
|
1428
1500
|
|
|
1501
|
+
// Apply EMA calibration to max token budget — smooths pruning across iterations
|
|
1502
|
+
const calibratedMaxTokens = applyCalibration(
|
|
1503
|
+
agentContext.maxContextTokens,
|
|
1504
|
+
this._pruneCalibration
|
|
1505
|
+
);
|
|
1506
|
+
|
|
1429
1507
|
agentContext.pruneMessages = createPruneMessages({
|
|
1430
1508
|
startIndex: this.startIndex,
|
|
1431
1509
|
provider: agentContext.provider,
|
|
1432
1510
|
tokenCounter: agentContext.tokenCounter,
|
|
1433
|
-
maxTokens:
|
|
1511
|
+
maxTokens: calibratedMaxTokens,
|
|
1434
1512
|
thinkingEnabled: isAnthropicWithThinking,
|
|
1435
1513
|
indexTokenCountMap: agentContext.indexTokenCountMap,
|
|
1436
1514
|
});
|
|
1437
1515
|
}
|
|
1438
1516
|
|
|
1439
|
-
|
|
1440
|
-
|
|
1441
|
-
|
|
1442
|
-
|
|
1517
|
+
// Update EMA calibration with actual token usage from API response
|
|
1518
|
+
if (
|
|
1519
|
+
agentContext.currentUsage?.input_tokens &&
|
|
1520
|
+
agentContext.maxContextTokens
|
|
1521
|
+
) {
|
|
1522
|
+
const estimatedTokens = Object.values(
|
|
1523
|
+
agentContext.indexTokenCountMap
|
|
1524
|
+
).reduce((sum, v) => (sum ?? 0) + (v ?? 0), 0) as number;
|
|
1525
|
+
if (estimatedTokens > 0) {
|
|
1526
|
+
this._pruneCalibration = updatePruneCalibration(
|
|
1527
|
+
this._pruneCalibration,
|
|
1528
|
+
agentContext.currentUsage.input_tokens,
|
|
1529
|
+
estimatedTokens
|
|
1530
|
+
);
|
|
1531
|
+
}
|
|
1532
|
+
}
|
|
1533
|
+
|
|
1534
|
+
if (agentContext.pruneMessages) {
|
|
1443
1535
|
const { context, indexTokenCountMap, messagesToRefine } =
|
|
1444
1536
|
agentContext.pruneMessages({
|
|
1445
1537
|
messages,
|
|
1446
1538
|
usageMetadata: agentContext.currentUsage,
|
|
1447
|
-
// startOnMessageType: 'human',
|
|
1448
1539
|
});
|
|
1449
1540
|
agentContext.indexTokenCountMap = indexTokenCountMap;
|
|
1450
1541
|
messagesToUse = context;
|
|
1451
|
-
|
|
1452
|
-
|
|
1542
|
+
|
|
1543
|
+
// ── Non-blocking summarization ──────────────────────────────────
|
|
1544
|
+
// NEVER block the LLM call waiting for summarization. Instead:
|
|
1545
|
+
// 1. If _cachedRunSummary exists → use it, fire async update
|
|
1546
|
+
// 2. If persistedSummary exists → use it as fallback, fire async update
|
|
1547
|
+
// 3. If NOTHING exists (first-ever prune) → skip summary, fire async generation
|
|
1548
|
+
// The summary catches up asynchronously and is available for subsequent
|
|
1549
|
+
// iterations (tool calls) and the next conversation turn.
|
|
1550
|
+
//
|
|
1551
|
+
// SummarizationConfig integration:
|
|
1552
|
+
// - triggerType/triggerThreshold control WHEN summarization fires
|
|
1553
|
+
// - reserveRatio is enforced via calibrated maxTokens (above)
|
|
1554
|
+
// - initialSummary provides cross-run seeding as fallback before persistedSummary
|
|
1555
|
+
let hasSummary = false;
|
|
1556
|
+
const sumConfig = agentContext.summarizationConfig;
|
|
1557
|
+
const shouldSummarize = this.shouldTriggerSummarization(
|
|
1558
|
+
messagesToRefine.length,
|
|
1559
|
+
agentContext.maxContextTokens ?? 0,
|
|
1560
|
+
agentContext.indexTokenCountMap,
|
|
1561
|
+
agentContext.instructionTokens,
|
|
1562
|
+
sumConfig
|
|
1453
1563
|
);
|
|
1454
1564
|
|
|
1455
|
-
|
|
1456
|
-
|
|
1457
|
-
|
|
1458
|
-
|
|
1459
|
-
|
|
1565
|
+
if (
|
|
1566
|
+
messagesToRefine.length > 0 &&
|
|
1567
|
+
agentContext.summarizeCallback &&
|
|
1568
|
+
shouldSummarize
|
|
1569
|
+
) {
|
|
1460
1570
|
try {
|
|
1461
|
-
|
|
1462
|
-
|
|
1571
|
+
let summary: string | undefined;
|
|
1572
|
+
let summarySource: string;
|
|
1573
|
+
|
|
1574
|
+
if (this._cachedRunSummary != null) {
|
|
1575
|
+
summary = this._cachedRunSummary;
|
|
1576
|
+
summarySource = 'cached';
|
|
1577
|
+
} else if (
|
|
1578
|
+
agentContext.persistedSummary != null &&
|
|
1579
|
+
agentContext.persistedSummary !== ''
|
|
1580
|
+
) {
|
|
1581
|
+
summary = agentContext.persistedSummary;
|
|
1582
|
+
this._cachedRunSummary = summary;
|
|
1583
|
+
summarySource = 'persisted';
|
|
1584
|
+
} else if (
|
|
1585
|
+
sumConfig?.initialSummary != null &&
|
|
1586
|
+
sumConfig.initialSummary !== ''
|
|
1587
|
+
) {
|
|
1588
|
+
// Cross-run seed: use initialSummary when no persisted summary exists
|
|
1589
|
+
summary = sumConfig.initialSummary;
|
|
1590
|
+
this._cachedRunSummary = summary;
|
|
1591
|
+
summarySource = 'initial-seed';
|
|
1592
|
+
} else {
|
|
1593
|
+
summarySource = 'none';
|
|
1594
|
+
}
|
|
1595
|
+
|
|
1596
|
+
// Single consolidated log for the entire prune+summarize decision
|
|
1463
1597
|
console.debug(
|
|
1464
|
-
`[Graph:ContextMgmt]
|
|
1598
|
+
`[Graph:ContextMgmt] Pruned ${messages.length}→${context.length} msgs (${messagesToRefine.length} discarded) | summary=${summarySource}${summary ? ` (len=${summary.length})` : ''} | calibration=${this._pruneCalibration.ratio.toFixed(3)}(${this._pruneCalibration.iterations})`
|
|
1465
1599
|
);
|
|
1600
|
+
|
|
1601
|
+
// Fire background summarization — updates cache for next iteration/turn
|
|
1602
|
+
agentContext
|
|
1603
|
+
.summarizeCallback(messagesToRefine)
|
|
1604
|
+
.then((updated) => {
|
|
1605
|
+
if (updated != null && updated !== '') {
|
|
1606
|
+
this._cachedRunSummary = updated;
|
|
1607
|
+
}
|
|
1608
|
+
})
|
|
1609
|
+
.catch((err) => {
|
|
1610
|
+
console.error(
|
|
1611
|
+
'[Graph] Background summary failed (non-fatal):',
|
|
1612
|
+
err
|
|
1613
|
+
);
|
|
1614
|
+
});
|
|
1615
|
+
|
|
1466
1616
|
if (summary != null && summary !== '') {
|
|
1617
|
+
hasSummary = true;
|
|
1467
1618
|
const summaryMsg = new SystemMessage(
|
|
1468
1619
|
`[Conversation Summary]\n${summary}`
|
|
1469
1620
|
);
|
|
1470
|
-
// Insert after system message (if present), before conversation messages
|
|
1471
1621
|
const systemIdx =
|
|
1472
1622
|
messagesToUse[0]?.getType() === 'system' ? 1 : 0;
|
|
1473
1623
|
messagesToUse = [
|
|
@@ -1475,18 +1625,40 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
|
|
|
1475
1625
|
summaryMsg,
|
|
1476
1626
|
...messagesToUse.slice(systemIdx),
|
|
1477
1627
|
];
|
|
1478
|
-
console.debug(
|
|
1479
|
-
`[Graph:ContextMgmt] Summary injected at index ${systemIdx} | finalMsgCount=${messagesToUse.length}`
|
|
1480
|
-
);
|
|
1481
1628
|
}
|
|
1482
1629
|
} catch (err) {
|
|
1483
|
-
console.error('[Graph] Summarization
|
|
1630
|
+
console.error('[Graph] Summarization failed:', err);
|
|
1631
|
+
}
|
|
1632
|
+
} else if (messagesToRefine.length > 0) {
|
|
1633
|
+
// Log pruning even when no summarize callback (discard mode)
|
|
1634
|
+
console.debug(
|
|
1635
|
+
`[Graph:ContextMgmt] Pruned ${messages.length}→${context.length} msgs (${messagesToRefine.length} discarded, no summary callback) | calibration=${this._pruneCalibration.ratio.toFixed(3)}`
|
|
1636
|
+
);
|
|
1637
|
+
}
|
|
1638
|
+
|
|
1639
|
+
// Deduplicate system messages that accumulate from repeated tool iterations
|
|
1640
|
+
const { messages: dedupedMessages, removedCount } =
|
|
1641
|
+
deduplicateSystemMessages(messagesToUse);
|
|
1642
|
+
if (removedCount > 0) {
|
|
1643
|
+
messagesToUse = dedupedMessages;
|
|
1644
|
+
console.debug(
|
|
1645
|
+
`[Graph:Dedup] Removed ${removedCount} duplicate system message(s)`
|
|
1646
|
+
);
|
|
1647
|
+
}
|
|
1648
|
+
|
|
1649
|
+
// Post-prune context note for task-tool-enabled agents
|
|
1650
|
+
if (messagesToRefine.length > 0 && hasTaskTool(agentContext.tools)) {
|
|
1651
|
+
const postPruneNote = buildPostPruneNote(
|
|
1652
|
+
messagesToRefine.length,
|
|
1653
|
+
hasSummary
|
|
1654
|
+
);
|
|
1655
|
+
if (postPruneNote) {
|
|
1656
|
+
messagesToUse = [
|
|
1657
|
+
...messagesToUse,
|
|
1658
|
+
new SystemMessage(postPruneNote),
|
|
1659
|
+
];
|
|
1484
1660
|
}
|
|
1485
1661
|
}
|
|
1486
|
-
} else if (delegationInjectedPrePrune) {
|
|
1487
|
-
console.info(
|
|
1488
|
-
'[Graph] Skipping pruning — delegation will handle context pressure'
|
|
1489
|
-
);
|
|
1490
1662
|
}
|
|
1491
1663
|
|
|
1492
1664
|
let finalMessages = messagesToUse;
|
|
@@ -1645,106 +1817,24 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
|
|
|
1645
1817
|
);
|
|
1646
1818
|
|
|
1647
1819
|
// ====================================================================
|
|
1648
|
-
//
|
|
1820
|
+
// MULTI-DOCUMENT DELEGATION (task-driven, not budget-driven)
|
|
1649
1821
|
//
|
|
1650
|
-
//
|
|
1651
|
-
//
|
|
1652
|
-
//
|
|
1653
|
-
//
|
|
1654
|
-
// rather than trying to process all documents itself.
|
|
1655
|
-
// 2. TOKEN UTILIZATION: At EVERY iteration, if context is filling up
|
|
1656
|
-
// (70%/85%), inject escalating hints to delegate remaining work.
|
|
1657
|
-
//
|
|
1658
|
-
// This runs mid-chain — so even if tool responses push context up
|
|
1659
|
-
// after the first LLM call, subsequent iterations get the hint.
|
|
1822
|
+
// Token-based pressure hints have been removed — the LLM never sees
|
|
1823
|
+
// raw token numbers. Context overflow is handled mechanically by
|
|
1824
|
+
// pruning (Graph) + auto-continuation (client.js max_tokens detection).
|
|
1825
|
+
// See: docs/context-overflow-architecture.md
|
|
1660
1826
|
// ====================================================================
|
|
1661
|
-
|
|
1662
|
-
const
|
|
1663
|
-
|
|
1664
|
-
? (tool as { name: string }).name
|
|
1665
|
-
: '';
|
|
1666
|
-
return toolName === 'task';
|
|
1667
|
-
});
|
|
1827
|
+
if (hasTaskTool(agentContext.tools)) {
|
|
1828
|
+
const { count: documentCount, names: documentNames } =
|
|
1829
|
+
detectDocuments(finalMessages);
|
|
1668
1830
|
|
|
1669
|
-
|
|
1670
|
-
hasTaskToolInContext === true &&
|
|
1671
|
-
contextAnalytics.utilizationPercent != null &&
|
|
1672
|
-
contextAnalytics.maxContextTokens != null
|
|
1673
|
-
) {
|
|
1674
|
-
const utilization = contextAnalytics.utilizationPercent;
|
|
1675
|
-
const totalTokens = contextAnalytics.totalTokens;
|
|
1676
|
-
const maxTokens = contextAnalytics.maxContextTokens;
|
|
1677
|
-
const remainingTokens = maxTokens - totalTokens;
|
|
1678
|
-
|
|
1679
|
-
// Count attached documents by scanning for document patterns in HumanMessages:
|
|
1680
|
-
// 1. # "filename" headers in "Attached document(s):" blocks (text content)
|
|
1681
|
-
// 2. **filename1, filename2** in "The user has attached:" blocks (embedded files)
|
|
1682
|
-
// 3. Filenames in file_search tool results
|
|
1683
|
-
let documentCount = 0;
|
|
1684
|
-
const documentNames: string[] = [];
|
|
1685
|
-
for (const msg of finalMessages) {
|
|
1686
|
-
const content =
|
|
1687
|
-
typeof msg.content === 'string'
|
|
1688
|
-
? msg.content
|
|
1689
|
-
: Array.isArray(msg.content)
|
|
1690
|
-
? msg.content
|
|
1691
|
-
.map((p: unknown) => {
|
|
1692
|
-
const part = p as Record<string, unknown>;
|
|
1693
|
-
return String(part.text ?? part.content ?? '');
|
|
1694
|
-
})
|
|
1695
|
-
.join(' ')
|
|
1696
|
-
: '';
|
|
1697
|
-
// Pattern 1: # "filename" headers in attached document blocks
|
|
1698
|
-
const docMatches = content.match(/# "([^"]+)"/g);
|
|
1699
|
-
if (docMatches) {
|
|
1700
|
-
for (const match of docMatches) {
|
|
1701
|
-
const name = match.replace(/# "/, '').replace(/"$/, '');
|
|
1702
|
-
if (!documentNames.includes(name)) {
|
|
1703
|
-
documentNames.push(name);
|
|
1704
|
-
documentCount++;
|
|
1705
|
-
}
|
|
1706
|
-
}
|
|
1707
|
-
}
|
|
1708
|
-
// Pattern 2: "The user has attached: **file1, file2**" (embedded files)
|
|
1709
|
-
const attachedMatch = content.match(
|
|
1710
|
-
/user has attached:\s*\*\*([^*]+)\*\*/i
|
|
1711
|
-
);
|
|
1712
|
-
if (attachedMatch) {
|
|
1713
|
-
const names = attachedMatch[1]
|
|
1714
|
-
.split(',')
|
|
1715
|
-
.map((n: string) => n.trim())
|
|
1716
|
-
.filter(Boolean);
|
|
1717
|
-
for (const name of names) {
|
|
1718
|
-
if (!documentNames.includes(name)) {
|
|
1719
|
-
documentNames.push(name);
|
|
1720
|
-
documentCount++;
|
|
1721
|
-
}
|
|
1722
|
-
}
|
|
1723
|
-
}
|
|
1724
|
-
}
|
|
1725
|
-
|
|
1726
|
-
// BASELINE LOG: Always fires so we can verify this code path runs
|
|
1727
|
-
console.debug(
|
|
1728
|
-
`[Graph] Context utilization: ${utilization.toFixed(1)}% ` +
|
|
1729
|
-
`(${totalTokens}/${maxTokens} tokens, ${remainingTokens} remaining) | ` +
|
|
1730
|
-
`hasTaskTool: true | messages: ${finalMessages.length} | docs: ${documentCount}`
|
|
1731
|
-
);
|
|
1732
|
-
|
|
1733
|
-
// TRIGGER 1: Multi-document delegation (3+ documents detected)
|
|
1734
|
-
// Only inject on first iteration (no AI messages yet = agent hasn't responded)
|
|
1831
|
+
// Multi-document delegation: first iteration only (before AI has responded)
|
|
1735
1832
|
const hasAiResponse = finalMessages.some(
|
|
1736
1833
|
(m) => m._getType() === 'ai' || m._getType() === 'tool'
|
|
1737
1834
|
);
|
|
1738
|
-
if (documentCount
|
|
1835
|
+
if (shouldInjectMultiDocHint(documentCount, hasAiResponse)) {
|
|
1739
1836
|
const pressureMsg = new HumanMessage({
|
|
1740
|
-
content:
|
|
1741
|
-
`[MULTI-DOCUMENT PROCESSING — ${documentCount} documents detected]\n` +
|
|
1742
|
-
`Documents: ${documentNames.join(', ')}\n\n` +
|
|
1743
|
-
`You have ${documentCount} documents attached. For thorough analysis, use the "task" tool ` +
|
|
1744
|
-
'to delegate each document (or group of related documents) to a sub-agent.\n' +
|
|
1745
|
-
'Each sub-agent has its own fresh context window and can use file_search to retrieve the full document content.\n' +
|
|
1746
|
-
'After all sub-agents complete, synthesize their results into a comprehensive response.\n\n' +
|
|
1747
|
-
'This approach ensures each document gets full attention without context limitations.',
|
|
1837
|
+
content: buildMultiDocHintContent(documentCount, documentNames),
|
|
1748
1838
|
});
|
|
1749
1839
|
finalMessages = [...finalMessages, pressureMsg];
|
|
1750
1840
|
console.info(
|
|
@@ -1752,43 +1842,6 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
|
|
|
1752
1842
|
`${documentNames.join(', ')}`
|
|
1753
1843
|
);
|
|
1754
1844
|
}
|
|
1755
|
-
|
|
1756
|
-
// TRIGGER 2: Token utilization thresholds (mid-chain safety net)
|
|
1757
|
-
// Also fires when we skipped pruning due to delegationInjectedPrePrune
|
|
1758
|
-
if (
|
|
1759
|
-
utilization > 85 ||
|
|
1760
|
-
(delegationInjectedPrePrune && utilization > 50)
|
|
1761
|
-
) {
|
|
1762
|
-
// CRITICAL: Context is high — MANDATE delegation
|
|
1763
|
-
const pressureMsg = new HumanMessage({
|
|
1764
|
-
content:
|
|
1765
|
-
`[CONTEXT BUDGET CRITICAL — ${utilization.toFixed(0)}% used]\n` +
|
|
1766
|
-
`You have used ${totalTokens} of ${maxTokens} tokens (${remainingTokens} remaining).\n` +
|
|
1767
|
-
'Your context is very large. You MUST use the "task" tool to delegate work to sub-agents.\n' +
|
|
1768
|
-
'Each sub-agent runs in its own fresh context window and can use file_search to access documents.\n' +
|
|
1769
|
-
'Do NOT attempt to process documents directly — delegate each document to a sub-agent, then synthesize results.',
|
|
1770
|
-
});
|
|
1771
|
-
finalMessages = [...finalMessages, pressureMsg];
|
|
1772
|
-
console.warn(
|
|
1773
|
-
`[Graph] Context pressure CRITICAL (${utilization.toFixed(0)}%): ` +
|
|
1774
|
-
`Injected mandatory delegation hint. ${remainingTokens} tokens remaining. ` +
|
|
1775
|
-
`prePruneSkipped: ${delegationInjectedPrePrune}`
|
|
1776
|
-
);
|
|
1777
|
-
} else if (utilization > 70) {
|
|
1778
|
-
// WARNING: Context filling up — suggest delegation
|
|
1779
|
-
const pressureMsg = new HumanMessage({
|
|
1780
|
-
content:
|
|
1781
|
-
`[CONTEXT BUDGET WARNING — ${utilization.toFixed(0)}% used]\n` +
|
|
1782
|
-
`You have used ${totalTokens} of ${maxTokens} tokens (${remainingTokens} remaining).\n` +
|
|
1783
|
-
'Your context is filling up. Consider using the "task" tool to delegate complex operations to sub-agents.\n' +
|
|
1784
|
-
"Sub-agents run in fresh context windows and won't consume your remaining budget.",
|
|
1785
|
-
});
|
|
1786
|
-
finalMessages = [...finalMessages, pressureMsg];
|
|
1787
|
-
console.info(
|
|
1788
|
-
`[Graph] Context pressure WARNING (${utilization.toFixed(0)}%): ` +
|
|
1789
|
-
`Injected delegation suggestion. ${remainingTokens} tokens remaining.`
|
|
1790
|
-
);
|
|
1791
|
-
}
|
|
1792
1845
|
}
|
|
1793
1846
|
|
|
1794
1847
|
// Structured output mode: when the agent has NO tools, produce structured JSON immediately.
|
|
@@ -2302,13 +2355,6 @@ If I seem to be missing something we discussed earlier, just give me a quick rem
|
|
|
2302
2355
|
reducer: (a, b) => {
|
|
2303
2356
|
if (!a.length) {
|
|
2304
2357
|
this.startIndex = a.length + b.length;
|
|
2305
|
-
console.debug(
|
|
2306
|
-
`[Graph:Reducer] Initial messages | startIndex=${this.startIndex} | inputMsgCount=${b.length}`
|
|
2307
|
-
);
|
|
2308
|
-
} else {
|
|
2309
|
-
console.debug(
|
|
2310
|
-
`[Graph:Reducer] Appending messages | existing=${a.length} | new=${b.length} | startIndex=${this.startIndex}`
|
|
2311
|
-
);
|
|
2312
2358
|
}
|
|
2313
2359
|
const result = messagesStateReducer(a, b);
|
|
2314
2360
|
this.messages = result;
|
|
@@ -596,30 +596,38 @@ describe('Pre-invocation utilization gate', () => {
|
|
|
596
596
|
expect(emergency.length).toBeLessThan(2000); // Emergency summaries are compact
|
|
597
597
|
});
|
|
598
598
|
|
|
599
|
-
it('
|
|
600
|
-
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
599
|
+
it('does NOT inject token budget hints at any utilization level', () => {
|
|
600
|
+
// Token budget hints were removed to prevent LLM voluntary bail-out.
|
|
601
|
+
// Context overflow is handled mechanically by pruning + auto-continuation.
|
|
602
|
+
// See: docs/context-overflow-architecture.md
|
|
603
|
+
const utilizationLevels = [50, 70, 85, 95, 101];
|
|
604
|
+
for (const utilization of utilizationLevels) {
|
|
605
|
+
const messages = buildConversation(10, 200);
|
|
606
|
+
// No message should contain raw token numbers or budget percentages
|
|
607
|
+
for (const msg of messages) {
|
|
608
|
+
const content =
|
|
609
|
+
typeof msg.content === 'string'
|
|
610
|
+
? msg.content
|
|
611
|
+
: JSON.stringify(msg.content);
|
|
612
|
+
expect(content).not.toMatch(/CONTEXT BUDGET/);
|
|
613
|
+
expect(content).not.toMatch(/\d+ of \d+ tokens/);
|
|
614
|
+
}
|
|
611
615
|
}
|
|
612
616
|
});
|
|
613
617
|
|
|
614
|
-
it('does not
|
|
615
|
-
|
|
616
|
-
|
|
617
|
-
|
|
618
|
-
|
|
619
|
-
|
|
618
|
+
it('post-prune note does not contain token numbers', () => {
|
|
619
|
+
// After pruning, a context note is injected but it must not
|
|
620
|
+
// expose any token counts or budget percentages to the LLM
|
|
621
|
+
const { buildPostPruneNote } = require('@/utils/contextPressure');
|
|
622
|
+
const noteWithSummary = buildPostPruneNote(10, true);
|
|
623
|
+
const noteWithout = buildPostPruneNote(10, false);
|
|
624
|
+
for (const note of [noteWithSummary, noteWithout]) {
|
|
625
|
+
expect(note).not.toBeNull();
|
|
626
|
+
expect(note).not.toMatch(/\d+%/);
|
|
627
|
+
expect(note).not.toMatch(/\d+ of \d+ tokens/);
|
|
628
|
+
expect(note).not.toMatch(/BUDGET/i);
|
|
629
|
+
expect(note).toContain('task');
|
|
620
630
|
}
|
|
621
|
-
|
|
622
|
-
expect(delegationInjected).toBe(false);
|
|
623
631
|
});
|
|
624
632
|
});
|
|
625
633
|
|