@illuma-ai/agents 1.1.2 → 1.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1124,68 +1124,116 @@ class StandardGraph extends Graph {
1124
1124
  }
1125
1125
  }
1126
1126
  if (agentContext.pruneMessages) {
1127
- const { context, indexTokenCountMap, messagesToRefine } = agentContext.pruneMessages({
1128
- messages: messages$1,
1129
- usageMetadata: agentContext.currentUsage,
1130
- });
1131
- agentContext.indexTokenCountMap = indexTokenCountMap;
1132
- messagesToUse = context;
1133
- // ── Non-blocking summarization ──────────────────────────────────
1134
- // NEVER block the LLM call waiting for summarization. Instead:
1135
- // 1. If _cachedRunSummary exists use it, fire async update
1136
- // 2. If persistedSummary exists use it as fallback, fire async update
1137
- // 3. If NOTHING exists (first-ever prune) → skip summary, fire async generation
1138
- // The summary catches up asynchronously and is available for subsequent
1139
- // iterations (tool calls) and the next conversation turn.
1127
+ // ── Context Compaction (Copilot-style: never delete messages) ─────
1128
+ //
1129
+ // DESIGN: Original messages are NEVER removed from the array.
1130
+ // Instead, we build a "windowed view" for the LLM:
1131
+ // [system prompt] + [summary of older turns] + [recent turns that fit]
1132
+ //
1133
+ // This ensures:
1134
+ // - No context is ever lost (summary covers older turns)
1135
+ // - We can always re-summarize from originals if summary is stale
1136
+ // - Conversation chaining works naturally across turns
1140
1137
  //
1141
- // SummarizationConfig integration:
1142
- // - triggerType/triggerThreshold control WHEN summarization fires
1143
- // - reserveRatio is enforced via calibrated maxTokens (above)
1144
- // - initialSummary provides cross-run seeding as fallback before persistedSummary
1145
- let hasSummary = false;
1138
+ // Flow:
1139
+ // 1. Resolve best available summary (cached > persisted > seed)
1140
+ // 2. Calculate token budget available for recent messages
1141
+ // 3. Walk newest→oldest, build view of messages that fit
1142
+ // 4. Assemble: [system] + [summary] + [recent window]
1143
+ // 5. Fire background summary update for messages outside the window
1146
1144
  const sumConfig = agentContext.summarizationConfig;
1147
- const shouldSummarize = this.shouldTriggerSummarization(messagesToRefine.length, agentContext.maxContextTokens ?? 0, agentContext.indexTokenCountMap, agentContext.instructionTokens, sumConfig);
1148
- if (messagesToRefine.length > 0 &&
1149
- agentContext.summarizeCallback &&
1150
- shouldSummarize) {
1151
- try {
1152
- let summary;
1153
- let summarySource;
1154
- if (this._cachedRunSummary != null) {
1155
- summary = this._cachedRunSummary;
1156
- summarySource = 'cached';
1157
- }
1158
- else if (agentContext.persistedSummary != null &&
1159
- agentContext.persistedSummary !== '') {
1160
- summary = agentContext.persistedSummary;
1161
- this._cachedRunSummary = summary;
1162
- summarySource = 'persisted';
1163
- }
1164
- else if (sumConfig?.initialSummary != null &&
1165
- sumConfig.initialSummary !== '') {
1166
- // Cross-run seed: use initialSummary when no persisted summary exists
1167
- summary = sumConfig.initialSummary;
1168
- this._cachedRunSummary = summary;
1169
- summarySource = 'initial-seed';
1170
- }
1171
- else {
1172
- summarySource = 'none';
1173
- }
1174
- // Single consolidated log for the entire prune+summarize decision
1175
- console.debug(`[Graph:ContextMgmt] Pruned ${messages$1.length}→${context.length} msgs (${messagesToRefine.length} discarded) | summary=${summarySource}${summary ? ` (len=${summary.length})` : ''} | calibration=${this._pruneCalibration.ratio.toFixed(3)}(${this._pruneCalibration.iterations})`);
1176
- // SCALE: Debounce background summarization — if a summary call is already
1177
- // in-flight (from a prior tool iteration), accumulate messages instead of
1178
- // firing another concurrent LLM call. At 2000 users with 3+ tool calls
1179
- // per turn, this prevents 3x summary call volume.
1145
+ const tokenCounter = agentContext.tokenCounter;
1146
+ const maxTokens = agentContext.maxContextTokens ?? 0;
1147
+ // Step 1: Resolve best available summary
1148
+ let summary;
1149
+ let summarySource;
1150
+ if (this._cachedRunSummary != null) {
1151
+ summary = this._cachedRunSummary;
1152
+ summarySource = 'cached';
1153
+ }
1154
+ else if (agentContext.persistedSummary != null &&
1155
+ agentContext.persistedSummary !== '') {
1156
+ summary = agentContext.persistedSummary;
1157
+ this._cachedRunSummary = summary;
1158
+ summarySource = 'persisted';
1159
+ }
1160
+ else if (sumConfig?.initialSummary != null &&
1161
+ sumConfig.initialSummary !== '') {
1162
+ summary = sumConfig.initialSummary;
1163
+ this._cachedRunSummary = summary;
1164
+ summarySource = 'initial-seed';
1165
+ }
1166
+ else {
1167
+ summarySource = 'none';
1168
+ }
1169
+ // Step 2: Calculate token budget
1170
+ // Apply EMA calibration for accuracy across iterations
1171
+ const calibratedMax = pruneCalibration.applyCalibration(maxTokens, this._pruneCalibration);
1172
+ const systemMsg = messages$1[0]?.getType() === 'system' ? messages$1[0] : null;
1173
+ const systemTokens = systemMsg != null
1174
+ ? (agentContext.indexTokenCountMap[0] ?? 0)
1175
+ : 0;
1176
+ const summaryMsg = summary != null && summary !== ''
1177
+ ? new messages.SystemMessage(`[Conversation Summary]\n${summary}`)
1178
+ : null;
1179
+ const summaryTokens = summaryMsg != null && tokenCounter != null
1180
+ ? tokenCounter(summaryMsg)
1181
+ : 0;
1182
+ // Budget for recent messages = total - system - summary - 3 (assistant priming)
1183
+ const recentBudget = calibratedMax - systemTokens - summaryTokens - 3;
1184
+ // Step 3: Walk newest→oldest, collect messages that fit in the budget
1185
+ const contentStart = systemMsg != null ? 1 : 0;
1186
+ let usedTokens = 0;
1187
+ let windowStart = messages$1.length; // index where the recent window begins
1188
+ for (let i = messages$1.length - 1; i >= contentStart; i--) {
1189
+ const msgTokens = agentContext.indexTokenCountMap[i] ?? 0;
1190
+ if (usedTokens + msgTokens > recentBudget) {
1191
+ break;
1192
+ }
1193
+ usedTokens += msgTokens;
1194
+ windowStart = i;
1195
+ }
1196
+ // Ensure we don't split tool-call / tool-result pairs.
1197
+ // If windowStart lands on a ToolMessage, walk back to include its AI message.
1198
+ while (windowStart > contentStart &&
1199
+ messages$1[windowStart]?.getType() === 'tool') {
1200
+ windowStart--;
1201
+ usedTokens += agentContext.indexTokenCountMap[windowStart] ?? 0;
1202
+ }
1203
+ const recentMessages = messages$1.slice(windowStart);
1204
+ const compactedMessages = messages$1.slice(contentStart, windowStart);
1205
+ const hasSummary = summaryMsg != null;
1206
+ // Step 4: Assemble the windowed view
1207
+ // [system] + [summary (covers compacted messages)] + [recent window]
1208
+ const viewParts = [];
1209
+ if (systemMsg != null) {
1210
+ viewParts.push(systemMsg);
1211
+ }
1212
+ if (summaryMsg != null) {
1213
+ viewParts.push(summaryMsg);
1214
+ }
1215
+ viewParts.push(...recentMessages);
1216
+ messagesToUse = viewParts;
1217
+ console.debug(`[Graph:Compaction] View: ${messages$1.length}→${viewParts.length} msgs ` +
1218
+ `(${compactedMessages.length} behind summary, ${recentMessages.length} in window) | ` +
1219
+ `summary=${summarySource}${summary ? ` (len=${summary.length})` : ''} | ` +
1220
+ `budget=${recentBudget}/${calibratedMax} used=${usedTokens}`);
1221
+ // Step 5: Fire background summary update (non-blocking)
1222
+ // Summarize messages outside the window so next iteration has a fresh summary.
1223
+ // Only trigger if there are compacted messages worth summarizing.
1224
+ if (compactedMessages.length > 0 &&
1225
+ agentContext.summarizeCallback) {
1226
+ const shouldSummarize = this.shouldTriggerSummarization(compactedMessages.length, maxTokens, agentContext.indexTokenCountMap, agentContext.instructionTokens, sumConfig);
1227
+ if (shouldSummarize) {
1180
1228
  if (this._summaryInFlight) {
1181
- this._pendingMessagesToRefine.push(...messagesToRefine);
1182
- console.debug(`[Graph:ContextMgmt] Summary in-flight, queued ${messagesToRefine.length} msgs (pending=${this._pendingMessagesToRefine.length})`);
1229
+ this._pendingMessagesToRefine.push(...compactedMessages);
1230
+ console.debug(`[Graph:Compaction] Summary in-flight, queued ${compactedMessages.length} msgs (pending=${this._pendingMessagesToRefine.length})`);
1183
1231
  }
1184
1232
  else {
1185
1233
  this._summaryInFlight = true;
1186
1234
  const allMessages = this._pendingMessagesToRefine.length > 0
1187
- ? [...this._pendingMessagesToRefine, ...messagesToRefine]
1188
- : messagesToRefine;
1235
+ ? [...this._pendingMessagesToRefine, ...compactedMessages]
1236
+ : compactedMessages;
1189
1237
  this._pendingMessagesToRefine = [];
1190
1238
  agentContext
1191
1239
  .summarizeCallback(allMessages)
@@ -1195,40 +1243,17 @@ class StandardGraph extends Graph {
1195
1243
  }
1196
1244
  })
1197
1245
  .catch((err) => {
1198
- console.error('[Graph] Background summary failed (non-fatal):', err);
1246
+ console.error('[Graph:Compaction] Background summary update failed (non-fatal):', err);
1199
1247
  })
1200
1248
  .finally(() => {
1201
1249
  this._summaryInFlight = false;
1202
1250
  });
1203
1251
  }
1204
- if (summary != null && summary !== '') {
1205
- hasSummary = true;
1206
- const summaryMsg = new messages.SystemMessage(`[Conversation Summary]\n${summary}`);
1207
- const systemIdx = messagesToUse[0]?.getType() === 'system' ? 1 : 0;
1208
- messagesToUse = [
1209
- ...messagesToUse.slice(0, systemIdx),
1210
- summaryMsg,
1211
- ...messagesToUse.slice(systemIdx),
1212
- ];
1213
- }
1214
- }
1215
- catch (err) {
1216
- console.error('[Graph] Summarization failed:', err);
1217
1252
  }
1218
1253
  }
1219
- else if (messagesToRefine.length > 0) {
1220
- // Log pruning even when no summarize callback (discard mode)
1221
- console.debug(`[Graph:ContextMgmt] Pruned ${messages$1.length}→${context.length} msgs (${messagesToRefine.length} discarded, no summary callback) | calibration=${this._pruneCalibration.ratio.toFixed(3)}`);
1222
- }
1223
- // Deduplicate system messages that accumulate from repeated tool iterations
1224
- const { messages: dedupedMessages, removedCount } = dedup.deduplicateSystemMessages(messagesToUse);
1225
- if (removedCount > 0) {
1226
- messagesToUse = dedupedMessages;
1227
- console.debug(`[Graph:Dedup] Removed ${removedCount} duplicate system message(s)`);
1228
- }
1229
- // Post-prune context note for task-tool-enabled agents
1230
- if (messagesToRefine.length > 0 && contextPressure.hasTaskTool(agentContext.tools)) {
1231
- const postPruneNote = contextPressure.buildPostPruneNote(messagesToRefine.length, hasSummary);
1254
+ // Post-compaction context note for task-tool-enabled agents
1255
+ if (compactedMessages.length > 0 && contextPressure.hasTaskTool(agentContext.tools)) {
1256
+ const postPruneNote = contextPressure.buildPostPruneNote(compactedMessages.length, hasSummary);
1232
1257
  if (postPruneNote) {
1233
1258
  messagesToUse = [
1234
1259
  ...messagesToUse,
@@ -1237,6 +1262,14 @@ class StandardGraph extends Graph {
1237
1262
  }
1238
1263
  }
1239
1264
  }
1265
+ // Deduplicate system messages — ALWAYS runs, not just during compaction.
1266
+ // Duplicate system messages accumulate from repeated tool iterations,
1267
+ // summary injections, and context notes across turns.
1268
+ const { messages: dedupedMessages, removedCount } = dedup.deduplicateSystemMessages(messagesToUse);
1269
+ if (removedCount > 0) {
1270
+ messagesToUse = dedupedMessages;
1271
+ console.debug(`[Graph:Dedup] Removed ${removedCount} duplicate system message(s)`);
1272
+ }
1240
1273
  let finalMessages = messagesToUse;
1241
1274
  if (agentContext.useLegacyContent) {
1242
1275
  finalMessages = content.formatContentStrings(finalMessages);