@illuma-ai/agents 1.0.98 → 1.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cjs/agents/AgentContext.cjs +6 -2
- package/dist/cjs/agents/AgentContext.cjs.map +1 -1
- package/dist/cjs/common/constants.cjs +53 -0
- package/dist/cjs/common/constants.cjs.map +1 -1
- package/dist/cjs/graphs/Graph.cjs +195 -31
- package/dist/cjs/graphs/Graph.cjs.map +1 -1
- package/dist/cjs/main.cjs +14 -0
- package/dist/cjs/main.cjs.map +1 -1
- package/dist/cjs/messages/dedup.cjs +95 -0
- package/dist/cjs/messages/dedup.cjs.map +1 -0
- package/dist/cjs/tools/CodeExecutor.cjs +22 -3
- package/dist/cjs/tools/CodeExecutor.cjs.map +1 -1
- package/dist/cjs/types/graph.cjs.map +1 -1
- package/dist/cjs/utils/pruneCalibration.cjs +78 -0
- package/dist/cjs/utils/pruneCalibration.cjs.map +1 -0
- package/dist/cjs/utils/run.cjs.map +1 -1
- package/dist/cjs/utils/tokens.cjs.map +1 -1
- package/dist/cjs/utils/toolDiscoveryCache.cjs +127 -0
- package/dist/cjs/utils/toolDiscoveryCache.cjs.map +1 -0
- package/dist/esm/agents/AgentContext.mjs +6 -2
- package/dist/esm/agents/AgentContext.mjs.map +1 -1
- package/dist/esm/common/constants.mjs +48 -1
- package/dist/esm/common/constants.mjs.map +1 -1
- package/dist/esm/graphs/Graph.mjs +196 -32
- package/dist/esm/graphs/Graph.mjs.map +1 -1
- package/dist/esm/main.mjs +4 -1
- package/dist/esm/main.mjs.map +1 -1
- package/dist/esm/messages/dedup.mjs +93 -0
- package/dist/esm/messages/dedup.mjs.map +1 -0
- package/dist/esm/tools/CodeExecutor.mjs +22 -3
- package/dist/esm/tools/CodeExecutor.mjs.map +1 -1
- package/dist/esm/types/graph.mjs.map +1 -1
- package/dist/esm/utils/pruneCalibration.mjs +74 -0
- package/dist/esm/utils/pruneCalibration.mjs.map +1 -0
- package/dist/esm/utils/run.mjs.map +1 -1
- package/dist/esm/utils/tokens.mjs.map +1 -1
- package/dist/esm/utils/toolDiscoveryCache.mjs +125 -0
- package/dist/esm/utils/toolDiscoveryCache.mjs.map +1 -0
- package/dist/types/agents/AgentContext.d.ts +4 -1
- package/dist/types/common/constants.d.ts +35 -0
- package/dist/types/graphs/Graph.d.ts +34 -0
- package/dist/types/messages/dedup.d.ts +25 -0
- package/dist/types/messages/index.d.ts +1 -0
- package/dist/types/types/graph.d.ts +63 -0
- package/dist/types/utils/index.d.ts +2 -0
- package/dist/types/utils/pruneCalibration.d.ts +43 -0
- package/dist/types/utils/toolDiscoveryCache.d.ts +77 -0
- package/package.json +1 -1
- package/src/agents/AgentContext.ts +7 -0
- package/src/common/constants.ts +56 -0
- package/src/graphs/Graph.ts +250 -50
- package/src/graphs/gapFeatures.test.ts +520 -0
- package/src/graphs/nonBlockingSummarization.test.ts +307 -0
- package/src/messages/__tests__/dedup.test.ts +166 -0
- package/src/messages/dedup.ts +104 -0
- package/src/messages/index.ts +1 -0
- package/src/tools/CodeExecutor.ts +22 -3
- package/src/types/graph.ts +73 -0
- package/src/utils/__tests__/pruneCalibration.test.ts +148 -0
- package/src/utils/__tests__/toolDiscoveryCache.test.ts +214 -0
- package/src/utils/contextPressure.test.ts +24 -9
- package/src/utils/index.ts +2 -0
- package/src/utils/pruneCalibration.ts +92 -0
- package/src/utils/run.ts +108 -108
- package/src/utils/tokens.ts +118 -118
- package/src/utils/toolDiscoveryCache.ts +150 -0
package/src/graphs/Graph.ts
CHANGED
|
@@ -34,9 +34,9 @@ import type * as t from '@/types';
|
|
|
34
34
|
import {
|
|
35
35
|
formatAnthropicArtifactContent,
|
|
36
36
|
ensureThinkingBlockInMessages,
|
|
37
|
+
deduplicateSystemMessages,
|
|
37
38
|
convertMessagesToContent,
|
|
38
39
|
addBedrockCacheControl,
|
|
39
|
-
extractToolDiscoveries,
|
|
40
40
|
modifyDeltaProperties,
|
|
41
41
|
formatArtifactPayload,
|
|
42
42
|
formatContentStrings,
|
|
@@ -53,14 +53,20 @@ import {
|
|
|
53
53
|
MessageTypes,
|
|
54
54
|
Constants,
|
|
55
55
|
TOOL_TURN_THINKING_BUDGET,
|
|
56
|
+
SUMMARIZATION_CONTEXT_THRESHOLD,
|
|
56
57
|
} from '@/common';
|
|
57
58
|
import {
|
|
59
|
+
ToolDiscoveryCache,
|
|
58
60
|
resetIfNotEmpty,
|
|
59
61
|
isOpenAILike,
|
|
60
62
|
isGoogleLike,
|
|
61
63
|
joinKeys,
|
|
62
64
|
sleep,
|
|
65
|
+
createPruneCalibration,
|
|
66
|
+
updatePruneCalibration,
|
|
67
|
+
applyCalibration,
|
|
63
68
|
} from '@/utils';
|
|
69
|
+
import type { PruneCalibrationState } from '@/types/graph';
|
|
64
70
|
import {
|
|
65
71
|
buildContextAnalytics,
|
|
66
72
|
type ContextAnalytics,
|
|
@@ -205,6 +211,22 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
|
|
|
205
211
|
runId: string | undefined;
|
|
206
212
|
startIndex: number = 0;
|
|
207
213
|
signal?: AbortSignal;
|
|
214
|
+
/** Cached summary from the first prune in this run.
|
|
215
|
+
* Reused for subsequent prunes to avoid blocking LLM calls on every tool iteration. */
|
|
216
|
+
private _cachedRunSummary: string | undefined;
|
|
217
|
+
/** EMA-based pruning calibration state — smooths token budget adjustments across iterations */
|
|
218
|
+
private _pruneCalibration: PruneCalibrationState;
|
|
219
|
+
/** Run-scoped tool discovery cache — avoids re-parsing conversation history on every iteration */
|
|
220
|
+
private _toolDiscoveryCache: ToolDiscoveryCache;
|
|
221
|
+
/**
|
|
222
|
+
* SCALE: Tracks whether a summary call is already in-flight for this Graph instance.
|
|
223
|
+
* Prevents multiple concurrent summary LLM calls when rapid tool iterations each
|
|
224
|
+
* trigger pruning. At 2000 users with 3+ tool calls per turn, this prevents
|
|
225
|
+
* 6000+ summary calls/turn from becoming 2000.
|
|
226
|
+
*/
|
|
227
|
+
private _summaryInFlight: boolean = false;
|
|
228
|
+
/** Messages accumulated across tool iterations while a summary call is in-flight */
|
|
229
|
+
private _pendingMessagesToRefine: BaseMessage[] = [];
|
|
208
230
|
/** Map of agent contexts by agent ID */
|
|
209
231
|
agentContexts: Map<string, AgentContext> = new Map();
|
|
210
232
|
/** Default agent ID to use */
|
|
@@ -239,6 +261,22 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
|
|
|
239
261
|
}
|
|
240
262
|
|
|
241
263
|
this.defaultAgentId = agents[0].agentId;
|
|
264
|
+
|
|
265
|
+
// Seed cached summary from persisted storage so the first prune in a
|
|
266
|
+
// resumed conversation can also skip the synchronous LLM summarization call
|
|
267
|
+
const primaryContext = this.agentContexts.get(this.defaultAgentId);
|
|
268
|
+
if (primaryContext?.persistedSummary) {
|
|
269
|
+
this._cachedRunSummary = primaryContext.persistedSummary;
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
// Initialize EMA pruning calibration
|
|
273
|
+
this._pruneCalibration = createPruneCalibration();
|
|
274
|
+
|
|
275
|
+
// Initialize tool discovery cache, seeded with any pre-existing discoveries
|
|
276
|
+
this._toolDiscoveryCache = new ToolDiscoveryCache();
|
|
277
|
+
if (primaryContext?.discoveredToolNames.size) {
|
|
278
|
+
this._toolDiscoveryCache.seed([...primaryContext.discoveredToolNames]);
|
|
279
|
+
}
|
|
242
280
|
}
|
|
243
281
|
|
|
244
282
|
/* Init */
|
|
@@ -272,6 +310,11 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
|
|
|
272
310
|
new Map()
|
|
273
311
|
);
|
|
274
312
|
this.invokedToolIds = resetIfNotEmpty(this.invokedToolIds, undefined);
|
|
313
|
+
// Reset EMA calibration, tool discovery cache, and summary debounce for fresh run
|
|
314
|
+
this._pruneCalibration = createPruneCalibration();
|
|
315
|
+
this._toolDiscoveryCache.reset();
|
|
316
|
+
this._summaryInFlight = false;
|
|
317
|
+
this._pendingMessagesToRefine = [];
|
|
275
318
|
for (const context of this.agentContexts.values()) {
|
|
276
319
|
context.reset();
|
|
277
320
|
}
|
|
@@ -378,6 +421,70 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
|
|
|
378
421
|
return clientOptions;
|
|
379
422
|
}
|
|
380
423
|
|
|
424
|
+
/**
|
|
425
|
+
* Determines whether summarization should trigger based on SummarizationConfig.
|
|
426
|
+
*
|
|
427
|
+
* Supports three trigger strategies:
|
|
428
|
+
* - contextPercentage (default): Trigger when context utilization >= threshold%
|
|
429
|
+
* - messageCount: Trigger when pruned message count >= threshold
|
|
430
|
+
* - tokenThreshold: Trigger when total estimated tokens >= threshold
|
|
431
|
+
*
|
|
432
|
+
* When no config is provided, always triggers (preserves backward compatibility).
|
|
433
|
+
*
|
|
434
|
+
* @param prunedMessageCount - Number of messages that were pruned
|
|
435
|
+
* @param maxContextTokens - Maximum context token budget
|
|
436
|
+
* @param indexTokenCountMap - Token count map by message index
|
|
437
|
+
* @param instructionTokens - Token count for instructions/system message
|
|
438
|
+
* @param config - Optional SummarizationConfig
|
|
439
|
+
* @returns Whether summarization should be triggered
|
|
440
|
+
*/
|
|
441
|
+
private shouldTriggerSummarization(
|
|
442
|
+
prunedMessageCount: number,
|
|
443
|
+
maxContextTokens: number,
|
|
444
|
+
indexTokenCountMap: Record<string, number | undefined>,
|
|
445
|
+
instructionTokens: number,
|
|
446
|
+
config?: t.SummarizationConfig
|
|
447
|
+
): boolean {
|
|
448
|
+
// No pruned messages means nothing to summarize
|
|
449
|
+
if (prunedMessageCount === 0) {
|
|
450
|
+
return false;
|
|
451
|
+
}
|
|
452
|
+
|
|
453
|
+
// No config = backward compatible (always summarize when messages are pruned)
|
|
454
|
+
if (!config || !config.triggerType) {
|
|
455
|
+
return true;
|
|
456
|
+
}
|
|
457
|
+
|
|
458
|
+
const threshold = config.triggerThreshold;
|
|
459
|
+
|
|
460
|
+
switch (config.triggerType) {
|
|
461
|
+
case 'contextPercentage': {
|
|
462
|
+
if (maxContextTokens <= 0) return true;
|
|
463
|
+
const effectiveThreshold = threshold ?? SUMMARIZATION_CONTEXT_THRESHOLD;
|
|
464
|
+
let totalTokens = instructionTokens;
|
|
465
|
+
for (const key in indexTokenCountMap) {
|
|
466
|
+
totalTokens += indexTokenCountMap[key] ?? 0;
|
|
467
|
+
}
|
|
468
|
+
const utilization = (totalTokens / maxContextTokens) * 100;
|
|
469
|
+
return utilization >= effectiveThreshold;
|
|
470
|
+
}
|
|
471
|
+
case 'messageCount': {
|
|
472
|
+
const effectiveThreshold = threshold ?? 5;
|
|
473
|
+
return prunedMessageCount >= effectiveThreshold;
|
|
474
|
+
}
|
|
475
|
+
case 'tokenThreshold': {
|
|
476
|
+
if (threshold == null) return true;
|
|
477
|
+
let totalTokens = instructionTokens;
|
|
478
|
+
for (const key in indexTokenCountMap) {
|
|
479
|
+
totalTokens += indexTokenCountMap[key] ?? 0;
|
|
480
|
+
}
|
|
481
|
+
return totalTokens >= threshold;
|
|
482
|
+
}
|
|
483
|
+
default:
|
|
484
|
+
return true;
|
|
485
|
+
}
|
|
486
|
+
}
|
|
487
|
+
|
|
381
488
|
/**
|
|
382
489
|
* Returns the normalized finish/stop reason from the last LLM invocation.
|
|
383
490
|
* Used by callers to detect when the response was truncated due to max_tokens.
|
|
@@ -535,9 +642,6 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
|
|
|
535
642
|
|
|
536
643
|
getRunMessages(): BaseMessage[] | undefined {
|
|
537
644
|
const result = this.messages.slice(this.startIndex);
|
|
538
|
-
console.debug(
|
|
539
|
-
`[Graph] getRunMessages() | totalMessages=${this.messages.length} | startIndex=${this.startIndex} | runMessages=${result.length}`
|
|
540
|
-
);
|
|
541
645
|
return result;
|
|
542
646
|
}
|
|
543
647
|
|
|
@@ -1327,10 +1431,15 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
|
|
|
1327
1431
|
messages = [dynamicContextMessage, ackMessage, ...messages];
|
|
1328
1432
|
}
|
|
1329
1433
|
|
|
1330
|
-
//
|
|
1331
|
-
|
|
1332
|
-
|
|
1333
|
-
|
|
1434
|
+
// Tool discovery caching: only scan new messages since last iteration
|
|
1435
|
+
// instead of re-parsing the full history via extractToolDiscoveries()
|
|
1436
|
+
const cachedDiscoveries =
|
|
1437
|
+
this._toolDiscoveryCache.getNewDiscoveries(messages);
|
|
1438
|
+
if (cachedDiscoveries.length > 0) {
|
|
1439
|
+
agentContext.markToolsAsDiscovered(cachedDiscoveries);
|
|
1440
|
+
console.debug(
|
|
1441
|
+
`[Graph:ToolDiscovery] Cached ${cachedDiscoveries.length} new tools (total: ${this._toolDiscoveryCache.size})`
|
|
1442
|
+
);
|
|
1334
1443
|
}
|
|
1335
1444
|
|
|
1336
1445
|
const toolsForBinding = agentContext.getToolsForBinding();
|
|
@@ -1400,50 +1509,145 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
|
|
|
1400
1509
|
?.thinking as t.AnthropicClientOptions['thinking']
|
|
1401
1510
|
)?.type === 'enabled');
|
|
1402
1511
|
|
|
1512
|
+
// Apply EMA calibration to max token budget — smooths pruning across iterations
|
|
1513
|
+
const calibratedMaxTokens = applyCalibration(
|
|
1514
|
+
agentContext.maxContextTokens,
|
|
1515
|
+
this._pruneCalibration
|
|
1516
|
+
);
|
|
1517
|
+
|
|
1403
1518
|
agentContext.pruneMessages = createPruneMessages({
|
|
1404
1519
|
startIndex: this.startIndex,
|
|
1405
1520
|
provider: agentContext.provider,
|
|
1406
1521
|
tokenCounter: agentContext.tokenCounter,
|
|
1407
|
-
maxTokens:
|
|
1522
|
+
maxTokens: calibratedMaxTokens,
|
|
1408
1523
|
thinkingEnabled: isAnthropicWithThinking,
|
|
1409
1524
|
indexTokenCountMap: agentContext.indexTokenCountMap,
|
|
1410
1525
|
});
|
|
1411
1526
|
}
|
|
1412
1527
|
|
|
1528
|
+
// Update EMA calibration with actual token usage from API response
|
|
1529
|
+
if (
|
|
1530
|
+
agentContext.currentUsage?.input_tokens &&
|
|
1531
|
+
agentContext.maxContextTokens
|
|
1532
|
+
) {
|
|
1533
|
+
const estimatedTokens = Object.values(
|
|
1534
|
+
agentContext.indexTokenCountMap
|
|
1535
|
+
).reduce((sum, v) => (sum ?? 0) + (v ?? 0), 0) as number;
|
|
1536
|
+
if (estimatedTokens > 0) {
|
|
1537
|
+
this._pruneCalibration = updatePruneCalibration(
|
|
1538
|
+
this._pruneCalibration,
|
|
1539
|
+
agentContext.currentUsage.input_tokens,
|
|
1540
|
+
estimatedTokens
|
|
1541
|
+
);
|
|
1542
|
+
}
|
|
1543
|
+
}
|
|
1544
|
+
|
|
1413
1545
|
if (agentContext.pruneMessages) {
|
|
1414
|
-
console.debug(
|
|
1415
|
-
`[Graph:ContextMgmt] Pruning messages | inputCount=${messages.length} | maxTokens=${agentContext.maxContextTokens}`
|
|
1416
|
-
);
|
|
1417
1546
|
const { context, indexTokenCountMap, messagesToRefine } =
|
|
1418
1547
|
agentContext.pruneMessages({
|
|
1419
1548
|
messages,
|
|
1420
1549
|
usageMetadata: agentContext.currentUsage,
|
|
1421
|
-
// startOnMessageType: 'human',
|
|
1422
1550
|
});
|
|
1423
1551
|
agentContext.indexTokenCountMap = indexTokenCountMap;
|
|
1424
1552
|
messagesToUse = context;
|
|
1425
|
-
console.debug(
|
|
1426
|
-
`[Graph:ContextMgmt] Pruned | kept=${context.length} | discarded=${messagesToRefine.length} | originalCount=${messages.length}`
|
|
1427
|
-
);
|
|
1428
1553
|
|
|
1429
|
-
//
|
|
1554
|
+
// ── Non-blocking summarization ──────────────────────────────────
|
|
1555
|
+
// NEVER block the LLM call waiting for summarization. Instead:
|
|
1556
|
+
// 1. If _cachedRunSummary exists → use it, fire async update
|
|
1557
|
+
// 2. If persistedSummary exists → use it as fallback, fire async update
|
|
1558
|
+
// 3. If NOTHING exists (first-ever prune) → skip summary, fire async generation
|
|
1559
|
+
// The summary catches up asynchronously and is available for subsequent
|
|
1560
|
+
// iterations (tool calls) and the next conversation turn.
|
|
1561
|
+
//
|
|
1562
|
+
// SummarizationConfig integration:
|
|
1563
|
+
// - triggerType/triggerThreshold control WHEN summarization fires
|
|
1564
|
+
// - reserveRatio is enforced via calibrated maxTokens (above)
|
|
1565
|
+
// - initialSummary provides cross-run seeding as fallback before persistedSummary
|
|
1430
1566
|
let hasSummary = false;
|
|
1431
|
-
|
|
1432
|
-
|
|
1433
|
-
|
|
1434
|
-
|
|
1567
|
+
const sumConfig = agentContext.summarizationConfig;
|
|
1568
|
+
const shouldSummarize = this.shouldTriggerSummarization(
|
|
1569
|
+
messagesToRefine.length,
|
|
1570
|
+
agentContext.maxContextTokens ?? 0,
|
|
1571
|
+
agentContext.indexTokenCountMap,
|
|
1572
|
+
agentContext.instructionTokens,
|
|
1573
|
+
sumConfig
|
|
1574
|
+
);
|
|
1575
|
+
|
|
1576
|
+
if (
|
|
1577
|
+
messagesToRefine.length > 0 &&
|
|
1578
|
+
agentContext.summarizeCallback &&
|
|
1579
|
+
shouldSummarize
|
|
1580
|
+
) {
|
|
1435
1581
|
try {
|
|
1436
|
-
|
|
1437
|
-
|
|
1582
|
+
let summary: string | undefined;
|
|
1583
|
+
let summarySource: string;
|
|
1584
|
+
|
|
1585
|
+
if (this._cachedRunSummary != null) {
|
|
1586
|
+
summary = this._cachedRunSummary;
|
|
1587
|
+
summarySource = 'cached';
|
|
1588
|
+
} else if (
|
|
1589
|
+
agentContext.persistedSummary != null &&
|
|
1590
|
+
agentContext.persistedSummary !== ''
|
|
1591
|
+
) {
|
|
1592
|
+
summary = agentContext.persistedSummary;
|
|
1593
|
+
this._cachedRunSummary = summary;
|
|
1594
|
+
summarySource = 'persisted';
|
|
1595
|
+
} else if (
|
|
1596
|
+
sumConfig?.initialSummary != null &&
|
|
1597
|
+
sumConfig.initialSummary !== ''
|
|
1598
|
+
) {
|
|
1599
|
+
// Cross-run seed: use initialSummary when no persisted summary exists
|
|
1600
|
+
summary = sumConfig.initialSummary;
|
|
1601
|
+
this._cachedRunSummary = summary;
|
|
1602
|
+
summarySource = 'initial-seed';
|
|
1603
|
+
} else {
|
|
1604
|
+
summarySource = 'none';
|
|
1605
|
+
}
|
|
1606
|
+
|
|
1607
|
+
// Single consolidated log for the entire prune+summarize decision
|
|
1438
1608
|
console.debug(
|
|
1439
|
-
`[Graph:ContextMgmt]
|
|
1609
|
+
`[Graph:ContextMgmt] Pruned ${messages.length}→${context.length} msgs (${messagesToRefine.length} discarded) | summary=${summarySource}${summary ? ` (len=${summary.length})` : ''} | calibration=${this._pruneCalibration.ratio.toFixed(3)}(${this._pruneCalibration.iterations})`
|
|
1440
1610
|
);
|
|
1611
|
+
|
|
1612
|
+
// SCALE: Debounce background summarization — if a summary call is already
|
|
1613
|
+
// in-flight (from a prior tool iteration), accumulate messages instead of
|
|
1614
|
+
// firing another concurrent LLM call. At 2000 users with 3+ tool calls
|
|
1615
|
+
// per turn, this prevents 3x summary call volume.
|
|
1616
|
+
if (this._summaryInFlight) {
|
|
1617
|
+
this._pendingMessagesToRefine.push(...messagesToRefine);
|
|
1618
|
+
console.debug(
|
|
1619
|
+
`[Graph:ContextMgmt] Summary in-flight, queued ${messagesToRefine.length} msgs (pending=${this._pendingMessagesToRefine.length})`
|
|
1620
|
+
);
|
|
1621
|
+
} else {
|
|
1622
|
+
this._summaryInFlight = true;
|
|
1623
|
+
const allMessages = this._pendingMessagesToRefine.length > 0
|
|
1624
|
+
? [...this._pendingMessagesToRefine, ...messagesToRefine]
|
|
1625
|
+
: messagesToRefine;
|
|
1626
|
+
this._pendingMessagesToRefine = [];
|
|
1627
|
+
|
|
1628
|
+
agentContext
|
|
1629
|
+
.summarizeCallback(allMessages)
|
|
1630
|
+
.then((updated) => {
|
|
1631
|
+
if (updated != null && updated !== '') {
|
|
1632
|
+
this._cachedRunSummary = updated;
|
|
1633
|
+
}
|
|
1634
|
+
})
|
|
1635
|
+
.catch((err) => {
|
|
1636
|
+
console.error(
|
|
1637
|
+
'[Graph] Background summary failed (non-fatal):',
|
|
1638
|
+
err
|
|
1639
|
+
);
|
|
1640
|
+
})
|
|
1641
|
+
.finally(() => {
|
|
1642
|
+
this._summaryInFlight = false;
|
|
1643
|
+
});
|
|
1644
|
+
}
|
|
1645
|
+
|
|
1441
1646
|
if (summary != null && summary !== '') {
|
|
1442
1647
|
hasSummary = true;
|
|
1443
1648
|
const summaryMsg = new SystemMessage(
|
|
1444
1649
|
`[Conversation Summary]\n${summary}`
|
|
1445
1650
|
);
|
|
1446
|
-
// Insert after system message (if present), before conversation messages
|
|
1447
1651
|
const systemIdx =
|
|
1448
1652
|
messagesToUse[0]?.getType() === 'system' ? 1 : 0;
|
|
1449
1653
|
messagesToUse = [
|
|
@@ -1451,27 +1655,38 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
|
|
|
1451
1655
|
summaryMsg,
|
|
1452
1656
|
...messagesToUse.slice(systemIdx),
|
|
1453
1657
|
];
|
|
1454
|
-
console.debug(
|
|
1455
|
-
`[Graph:ContextMgmt] Summary injected at index ${systemIdx} | finalMsgCount=${messagesToUse.length}`
|
|
1456
|
-
);
|
|
1457
1658
|
}
|
|
1458
1659
|
} catch (err) {
|
|
1459
|
-
console.error('[Graph] Summarization
|
|
1660
|
+
console.error('[Graph] Summarization failed:', err);
|
|
1460
1661
|
}
|
|
1662
|
+
} else if (messagesToRefine.length > 0) {
|
|
1663
|
+
// Log pruning even when no summarize callback (discard mode)
|
|
1664
|
+
console.debug(
|
|
1665
|
+
`[Graph:ContextMgmt] Pruned ${messages.length}→${context.length} msgs (${messagesToRefine.length} discarded, no summary callback) | calibration=${this._pruneCalibration.ratio.toFixed(3)}`
|
|
1666
|
+
);
|
|
1461
1667
|
}
|
|
1462
1668
|
|
|
1463
|
-
//
|
|
1464
|
-
|
|
1669
|
+
// Deduplicate system messages that accumulate from repeated tool iterations
|
|
1670
|
+
const { messages: dedupedMessages, removedCount } =
|
|
1671
|
+
deduplicateSystemMessages(messagesToUse);
|
|
1672
|
+
if (removedCount > 0) {
|
|
1673
|
+
messagesToUse = dedupedMessages;
|
|
1674
|
+
console.debug(
|
|
1675
|
+
`[Graph:Dedup] Removed ${removedCount} duplicate system message(s)`
|
|
1676
|
+
);
|
|
1677
|
+
}
|
|
1678
|
+
|
|
1679
|
+
// Post-prune context note for task-tool-enabled agents
|
|
1465
1680
|
if (messagesToRefine.length > 0 && hasTaskTool(agentContext.tools)) {
|
|
1466
1681
|
const postPruneNote = buildPostPruneNote(
|
|
1467
1682
|
messagesToRefine.length,
|
|
1468
1683
|
hasSummary
|
|
1469
1684
|
);
|
|
1470
1685
|
if (postPruneNote) {
|
|
1471
|
-
messagesToUse = [
|
|
1472
|
-
|
|
1473
|
-
|
|
1474
|
-
|
|
1686
|
+
messagesToUse = [
|
|
1687
|
+
...messagesToUse,
|
|
1688
|
+
new SystemMessage(postPruneNote),
|
|
1689
|
+
];
|
|
1475
1690
|
}
|
|
1476
1691
|
}
|
|
1477
1692
|
}
|
|
@@ -1643,14 +1858,6 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
|
|
|
1643
1858
|
const { count: documentCount, names: documentNames } =
|
|
1644
1859
|
detectDocuments(finalMessages);
|
|
1645
1860
|
|
|
1646
|
-
// Observability log (no token numbers exposed to LLM)
|
|
1647
|
-
if (contextAnalytics.utilizationPercent != null) {
|
|
1648
|
-
console.debug(
|
|
1649
|
-
`[Graph] Context utilization: ${contextAnalytics.utilizationPercent.toFixed(1)}% | ` +
|
|
1650
|
-
`messages: ${finalMessages.length} | docs: ${documentCount}`
|
|
1651
|
-
);
|
|
1652
|
-
}
|
|
1653
|
-
|
|
1654
1861
|
// Multi-document delegation: first iteration only (before AI has responded)
|
|
1655
1862
|
const hasAiResponse = finalMessages.some(
|
|
1656
1863
|
(m) => m._getType() === 'ai' || m._getType() === 'tool'
|
|
@@ -2178,13 +2385,6 @@ If I seem to be missing something we discussed earlier, just give me a quick rem
|
|
|
2178
2385
|
reducer: (a, b) => {
|
|
2179
2386
|
if (!a.length) {
|
|
2180
2387
|
this.startIndex = a.length + b.length;
|
|
2181
|
-
console.debug(
|
|
2182
|
-
`[Graph:Reducer] Initial messages | startIndex=${this.startIndex} | inputMsgCount=${b.length}`
|
|
2183
|
-
);
|
|
2184
|
-
} else {
|
|
2185
|
-
console.debug(
|
|
2186
|
-
`[Graph:Reducer] Appending messages | existing=${a.length} | new=${b.length} | startIndex=${this.startIndex}`
|
|
2187
|
-
);
|
|
2188
2388
|
}
|
|
2189
2389
|
const result = messagesStateReducer(a, b);
|
|
2190
2390
|
this.messages = result;
|