@illuma-ai/agents 1.1.2 → 1.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cjs/graphs/Graph.cjs +115 -82
- package/dist/cjs/graphs/Graph.cjs.map +1 -1
- package/dist/esm/graphs/Graph.mjs +115 -82
- package/dist/esm/graphs/Graph.mjs.map +1 -1
- package/package.json +1 -1
- package/src/graphs/Graph.ts +140 -102
- package/src/graphs/gapFeatures.test.ts +234 -2
|
@@ -1122,68 +1122,116 @@ class StandardGraph extends Graph {
|
|
|
1122
1122
|
}
|
|
1123
1123
|
}
|
|
1124
1124
|
if (agentContext.pruneMessages) {
|
|
1125
|
-
|
|
1126
|
-
|
|
1127
|
-
|
|
1128
|
-
|
|
1129
|
-
|
|
1130
|
-
|
|
1131
|
-
//
|
|
1132
|
-
//
|
|
1133
|
-
//
|
|
1134
|
-
//
|
|
1135
|
-
// 3. If NOTHING exists (first-ever prune) → skip summary, fire async generation
|
|
1136
|
-
// The summary catches up asynchronously and is available for subsequent
|
|
1137
|
-
// iterations (tool calls) and the next conversation turn.
|
|
1125
|
+
// ── Context Compaction (Copilot-style: never delete messages) ─────
|
|
1126
|
+
//
|
|
1127
|
+
// DESIGN: Original messages are NEVER removed from the array.
|
|
1128
|
+
// Instead, we build a "windowed view" for the LLM:
|
|
1129
|
+
// [system prompt] + [summary of older turns] + [recent turns that fit]
|
|
1130
|
+
//
|
|
1131
|
+
// This ensures:
|
|
1132
|
+
// - No context is ever lost (summary covers older turns)
|
|
1133
|
+
// - We can always re-summarize from originals if summary is stale
|
|
1134
|
+
// - Conversation chaining works naturally across turns
|
|
1138
1135
|
//
|
|
1139
|
-
//
|
|
1140
|
-
//
|
|
1141
|
-
//
|
|
1142
|
-
//
|
|
1143
|
-
|
|
1136
|
+
// Flow:
|
|
1137
|
+
// 1. Resolve best available summary (cached > persisted > seed)
|
|
1138
|
+
// 2. Calculate token budget available for recent messages
|
|
1139
|
+
// 3. Walk newest→oldest, build view of messages that fit
|
|
1140
|
+
// 4. Assemble: [system] + [summary] + [recent window]
|
|
1141
|
+
// 5. Fire background summary update for messages outside the window
|
|
1144
1142
|
const sumConfig = agentContext.summarizationConfig;
|
|
1145
|
-
const
|
|
1146
|
-
|
|
1147
|
-
|
|
1148
|
-
|
|
1149
|
-
|
|
1150
|
-
|
|
1151
|
-
|
|
1152
|
-
|
|
1153
|
-
|
|
1154
|
-
|
|
1155
|
-
|
|
1156
|
-
|
|
1157
|
-
|
|
1158
|
-
|
|
1159
|
-
|
|
1160
|
-
|
|
1161
|
-
|
|
1162
|
-
|
|
1163
|
-
|
|
1164
|
-
|
|
1165
|
-
|
|
1166
|
-
|
|
1167
|
-
|
|
1168
|
-
|
|
1169
|
-
|
|
1170
|
-
|
|
1171
|
-
|
|
1172
|
-
|
|
1173
|
-
|
|
1174
|
-
|
|
1175
|
-
|
|
1176
|
-
|
|
1177
|
-
|
|
1143
|
+
const tokenCounter = agentContext.tokenCounter;
|
|
1144
|
+
const maxTokens = agentContext.maxContextTokens ?? 0;
|
|
1145
|
+
// Step 1: Resolve best available summary
|
|
1146
|
+
let summary;
|
|
1147
|
+
let summarySource;
|
|
1148
|
+
if (this._cachedRunSummary != null) {
|
|
1149
|
+
summary = this._cachedRunSummary;
|
|
1150
|
+
summarySource = 'cached';
|
|
1151
|
+
}
|
|
1152
|
+
else if (agentContext.persistedSummary != null &&
|
|
1153
|
+
agentContext.persistedSummary !== '') {
|
|
1154
|
+
summary = agentContext.persistedSummary;
|
|
1155
|
+
this._cachedRunSummary = summary;
|
|
1156
|
+
summarySource = 'persisted';
|
|
1157
|
+
}
|
|
1158
|
+
else if (sumConfig?.initialSummary != null &&
|
|
1159
|
+
sumConfig.initialSummary !== '') {
|
|
1160
|
+
summary = sumConfig.initialSummary;
|
|
1161
|
+
this._cachedRunSummary = summary;
|
|
1162
|
+
summarySource = 'initial-seed';
|
|
1163
|
+
}
|
|
1164
|
+
else {
|
|
1165
|
+
summarySource = 'none';
|
|
1166
|
+
}
|
|
1167
|
+
// Step 2: Calculate token budget
|
|
1168
|
+
// Apply EMA calibration for accuracy across iterations
|
|
1169
|
+
const calibratedMax = applyCalibration(maxTokens, this._pruneCalibration);
|
|
1170
|
+
const systemMsg = messages[0]?.getType() === 'system' ? messages[0] : null;
|
|
1171
|
+
const systemTokens = systemMsg != null
|
|
1172
|
+
? (agentContext.indexTokenCountMap[0] ?? 0)
|
|
1173
|
+
: 0;
|
|
1174
|
+
const summaryMsg = summary != null && summary !== ''
|
|
1175
|
+
? new SystemMessage(`[Conversation Summary]\n${summary}`)
|
|
1176
|
+
: null;
|
|
1177
|
+
const summaryTokens = summaryMsg != null && tokenCounter != null
|
|
1178
|
+
? tokenCounter(summaryMsg)
|
|
1179
|
+
: 0;
|
|
1180
|
+
// Budget for recent messages = total - system - summary - 3 (assistant priming)
|
|
1181
|
+
const recentBudget = calibratedMax - systemTokens - summaryTokens - 3;
|
|
1182
|
+
// Step 3: Walk newest→oldest, collect messages that fit in the budget
|
|
1183
|
+
const contentStart = systemMsg != null ? 1 : 0;
|
|
1184
|
+
let usedTokens = 0;
|
|
1185
|
+
let windowStart = messages.length; // index where the recent window begins
|
|
1186
|
+
for (let i = messages.length - 1; i >= contentStart; i--) {
|
|
1187
|
+
const msgTokens = agentContext.indexTokenCountMap[i] ?? 0;
|
|
1188
|
+
if (usedTokens + msgTokens > recentBudget) {
|
|
1189
|
+
break;
|
|
1190
|
+
}
|
|
1191
|
+
usedTokens += msgTokens;
|
|
1192
|
+
windowStart = i;
|
|
1193
|
+
}
|
|
1194
|
+
// Ensure we don't split tool-call / tool-result pairs.
|
|
1195
|
+
// If windowStart lands on a ToolMessage, walk back to include its AI message.
|
|
1196
|
+
while (windowStart > contentStart &&
|
|
1197
|
+
messages[windowStart]?.getType() === 'tool') {
|
|
1198
|
+
windowStart--;
|
|
1199
|
+
usedTokens += agentContext.indexTokenCountMap[windowStart] ?? 0;
|
|
1200
|
+
}
|
|
1201
|
+
const recentMessages = messages.slice(windowStart);
|
|
1202
|
+
const compactedMessages = messages.slice(contentStart, windowStart);
|
|
1203
|
+
const hasSummary = summaryMsg != null;
|
|
1204
|
+
// Step 4: Assemble the windowed view
|
|
1205
|
+
// [system] + [summary (covers compacted messages)] + [recent window]
|
|
1206
|
+
const viewParts = [];
|
|
1207
|
+
if (systemMsg != null) {
|
|
1208
|
+
viewParts.push(systemMsg);
|
|
1209
|
+
}
|
|
1210
|
+
if (summaryMsg != null) {
|
|
1211
|
+
viewParts.push(summaryMsg);
|
|
1212
|
+
}
|
|
1213
|
+
viewParts.push(...recentMessages);
|
|
1214
|
+
messagesToUse = viewParts;
|
|
1215
|
+
console.debug(`[Graph:Compaction] View: ${messages.length}→${viewParts.length} msgs ` +
|
|
1216
|
+
`(${compactedMessages.length} behind summary, ${recentMessages.length} in window) | ` +
|
|
1217
|
+
`summary=${summarySource}${summary ? ` (len=${summary.length})` : ''} | ` +
|
|
1218
|
+
`budget=${recentBudget}/${calibratedMax} used=${usedTokens}`);
|
|
1219
|
+
// Step 5: Fire background summary update (non-blocking)
|
|
1220
|
+
// Summarize messages outside the window so next iteration has a fresh summary.
|
|
1221
|
+
// Only trigger if there are compacted messages worth summarizing.
|
|
1222
|
+
if (compactedMessages.length > 0 &&
|
|
1223
|
+
agentContext.summarizeCallback) {
|
|
1224
|
+
const shouldSummarize = this.shouldTriggerSummarization(compactedMessages.length, maxTokens, agentContext.indexTokenCountMap, agentContext.instructionTokens, sumConfig);
|
|
1225
|
+
if (shouldSummarize) {
|
|
1178
1226
|
if (this._summaryInFlight) {
|
|
1179
|
-
this._pendingMessagesToRefine.push(...
|
|
1180
|
-
console.debug(`[Graph:
|
|
1227
|
+
this._pendingMessagesToRefine.push(...compactedMessages);
|
|
1228
|
+
console.debug(`[Graph:Compaction] Summary in-flight, queued ${compactedMessages.length} msgs (pending=${this._pendingMessagesToRefine.length})`);
|
|
1181
1229
|
}
|
|
1182
1230
|
else {
|
|
1183
1231
|
this._summaryInFlight = true;
|
|
1184
1232
|
const allMessages = this._pendingMessagesToRefine.length > 0
|
|
1185
|
-
? [...this._pendingMessagesToRefine, ...
|
|
1186
|
-
:
|
|
1233
|
+
? [...this._pendingMessagesToRefine, ...compactedMessages]
|
|
1234
|
+
: compactedMessages;
|
|
1187
1235
|
this._pendingMessagesToRefine = [];
|
|
1188
1236
|
agentContext
|
|
1189
1237
|
.summarizeCallback(allMessages)
|
|
@@ -1193,40 +1241,17 @@ class StandardGraph extends Graph {
|
|
|
1193
1241
|
}
|
|
1194
1242
|
})
|
|
1195
1243
|
.catch((err) => {
|
|
1196
|
-
console.error('[Graph] Background summary failed (non-fatal):', err);
|
|
1244
|
+
console.error('[Graph:Compaction] Background summary update failed (non-fatal):', err);
|
|
1197
1245
|
})
|
|
1198
1246
|
.finally(() => {
|
|
1199
1247
|
this._summaryInFlight = false;
|
|
1200
1248
|
});
|
|
1201
1249
|
}
|
|
1202
|
-
if (summary != null && summary !== '') {
|
|
1203
|
-
hasSummary = true;
|
|
1204
|
-
const summaryMsg = new SystemMessage(`[Conversation Summary]\n${summary}`);
|
|
1205
|
-
const systemIdx = messagesToUse[0]?.getType() === 'system' ? 1 : 0;
|
|
1206
|
-
messagesToUse = [
|
|
1207
|
-
...messagesToUse.slice(0, systemIdx),
|
|
1208
|
-
summaryMsg,
|
|
1209
|
-
...messagesToUse.slice(systemIdx),
|
|
1210
|
-
];
|
|
1211
|
-
}
|
|
1212
|
-
}
|
|
1213
|
-
catch (err) {
|
|
1214
|
-
console.error('[Graph] Summarization failed:', err);
|
|
1215
1250
|
}
|
|
1216
1251
|
}
|
|
1217
|
-
|
|
1218
|
-
|
|
1219
|
-
|
|
1220
|
-
}
|
|
1221
|
-
// Deduplicate system messages that accumulate from repeated tool iterations
|
|
1222
|
-
const { messages: dedupedMessages, removedCount } = deduplicateSystemMessages(messagesToUse);
|
|
1223
|
-
if (removedCount > 0) {
|
|
1224
|
-
messagesToUse = dedupedMessages;
|
|
1225
|
-
console.debug(`[Graph:Dedup] Removed ${removedCount} duplicate system message(s)`);
|
|
1226
|
-
}
|
|
1227
|
-
// Post-prune context note for task-tool-enabled agents
|
|
1228
|
-
if (messagesToRefine.length > 0 && hasTaskTool(agentContext.tools)) {
|
|
1229
|
-
const postPruneNote = buildPostPruneNote(messagesToRefine.length, hasSummary);
|
|
1252
|
+
// Post-compaction context note for task-tool-enabled agents
|
|
1253
|
+
if (compactedMessages.length > 0 && hasTaskTool(agentContext.tools)) {
|
|
1254
|
+
const postPruneNote = buildPostPruneNote(compactedMessages.length, hasSummary);
|
|
1230
1255
|
if (postPruneNote) {
|
|
1231
1256
|
messagesToUse = [
|
|
1232
1257
|
...messagesToUse,
|
|
@@ -1235,6 +1260,14 @@ class StandardGraph extends Graph {
|
|
|
1235
1260
|
}
|
|
1236
1261
|
}
|
|
1237
1262
|
}
|
|
1263
|
+
// Deduplicate system messages — ALWAYS runs, not just during compaction.
|
|
1264
|
+
// Duplicate system messages accumulate from repeated tool iterations,
|
|
1265
|
+
// summary injections, and context notes across turns.
|
|
1266
|
+
const { messages: dedupedMessages, removedCount } = deduplicateSystemMessages(messagesToUse);
|
|
1267
|
+
if (removedCount > 0) {
|
|
1268
|
+
messagesToUse = dedupedMessages;
|
|
1269
|
+
console.debug(`[Graph:Dedup] Removed ${removedCount} duplicate system message(s)`);
|
|
1270
|
+
}
|
|
1238
1271
|
let finalMessages = messagesToUse;
|
|
1239
1272
|
if (agentContext.useLegacyContent) {
|
|
1240
1273
|
finalMessages = formatContentStrings(finalMessages);
|