@illuma-ai/agents 1.0.98 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. package/dist/cjs/agents/AgentContext.cjs +6 -2
  2. package/dist/cjs/agents/AgentContext.cjs.map +1 -1
  3. package/dist/cjs/common/constants.cjs +53 -0
  4. package/dist/cjs/common/constants.cjs.map +1 -1
  5. package/dist/cjs/graphs/Graph.cjs +195 -31
  6. package/dist/cjs/graphs/Graph.cjs.map +1 -1
  7. package/dist/cjs/main.cjs +14 -0
  8. package/dist/cjs/main.cjs.map +1 -1
  9. package/dist/cjs/messages/dedup.cjs +95 -0
  10. package/dist/cjs/messages/dedup.cjs.map +1 -0
  11. package/dist/cjs/tools/CodeExecutor.cjs +22 -3
  12. package/dist/cjs/tools/CodeExecutor.cjs.map +1 -1
  13. package/dist/cjs/types/graph.cjs.map +1 -1
  14. package/dist/cjs/utils/pruneCalibration.cjs +78 -0
  15. package/dist/cjs/utils/pruneCalibration.cjs.map +1 -0
  16. package/dist/cjs/utils/run.cjs.map +1 -1
  17. package/dist/cjs/utils/tokens.cjs.map +1 -1
  18. package/dist/cjs/utils/toolDiscoveryCache.cjs +127 -0
  19. package/dist/cjs/utils/toolDiscoveryCache.cjs.map +1 -0
  20. package/dist/esm/agents/AgentContext.mjs +6 -2
  21. package/dist/esm/agents/AgentContext.mjs.map +1 -1
  22. package/dist/esm/common/constants.mjs +48 -1
  23. package/dist/esm/common/constants.mjs.map +1 -1
  24. package/dist/esm/graphs/Graph.mjs +196 -32
  25. package/dist/esm/graphs/Graph.mjs.map +1 -1
  26. package/dist/esm/main.mjs +4 -1
  27. package/dist/esm/main.mjs.map +1 -1
  28. package/dist/esm/messages/dedup.mjs +93 -0
  29. package/dist/esm/messages/dedup.mjs.map +1 -0
  30. package/dist/esm/tools/CodeExecutor.mjs +22 -3
  31. package/dist/esm/tools/CodeExecutor.mjs.map +1 -1
  32. package/dist/esm/types/graph.mjs.map +1 -1
  33. package/dist/esm/utils/pruneCalibration.mjs +74 -0
  34. package/dist/esm/utils/pruneCalibration.mjs.map +1 -0
  35. package/dist/esm/utils/run.mjs.map +1 -1
  36. package/dist/esm/utils/tokens.mjs.map +1 -1
  37. package/dist/esm/utils/toolDiscoveryCache.mjs +125 -0
  38. package/dist/esm/utils/toolDiscoveryCache.mjs.map +1 -0
  39. package/dist/types/agents/AgentContext.d.ts +4 -1
  40. package/dist/types/common/constants.d.ts +35 -0
  41. package/dist/types/graphs/Graph.d.ts +34 -0
  42. package/dist/types/messages/dedup.d.ts +25 -0
  43. package/dist/types/messages/index.d.ts +1 -0
  44. package/dist/types/types/graph.d.ts +63 -0
  45. package/dist/types/utils/index.d.ts +2 -0
  46. package/dist/types/utils/pruneCalibration.d.ts +43 -0
  47. package/dist/types/utils/toolDiscoveryCache.d.ts +77 -0
  48. package/package.json +1 -1
  49. package/src/agents/AgentContext.ts +7 -0
  50. package/src/common/constants.ts +56 -0
  51. package/src/graphs/Graph.ts +250 -50
  52. package/src/graphs/gapFeatures.test.ts +520 -0
  53. package/src/graphs/nonBlockingSummarization.test.ts +307 -0
  54. package/src/messages/__tests__/dedup.test.ts +166 -0
  55. package/src/messages/dedup.ts +104 -0
  56. package/src/messages/index.ts +1 -0
  57. package/src/tools/CodeExecutor.ts +22 -3
  58. package/src/types/graph.ts +73 -0
  59. package/src/utils/__tests__/pruneCalibration.test.ts +148 -0
  60. package/src/utils/__tests__/toolDiscoveryCache.test.ts +214 -0
  61. package/src/utils/contextPressure.test.ts +24 -9
  62. package/src/utils/index.ts +2 -0
  63. package/src/utils/pruneCalibration.ts +92 -0
  64. package/src/utils/run.ts +108 -108
  65. package/src/utils/tokens.ts +118 -118
  66. package/src/utils/toolDiscoveryCache.ts +150 -0
@@ -12,9 +12,9 @@ var prune = require('../messages/prune.cjs');
12
12
  var format = require('../messages/format.cjs');
13
13
  var cache = require('../messages/cache.cjs');
14
14
  var content = require('../messages/content.cjs');
15
- var tools = require('../messages/tools.cjs');
16
15
  var _enum = require('../common/enum.cjs');
17
16
  var constants = require('../common/constants.cjs');
17
+ var dedup = require('../messages/dedup.cjs');
18
18
  var graph = require('../utils/graph.cjs');
19
19
  var llm = require('../utils/llm.cjs');
20
20
  var stream = require('../stream.cjs');
@@ -25,6 +25,8 @@ require('../utils/toonFormat.cjs');
25
25
  var contextAnalytics = require('../utils/contextAnalytics.cjs');
26
26
  require('zod-to-json-schema');
27
27
  var contextPressure = require('../utils/contextPressure.cjs');
28
+ var toolDiscoveryCache = require('../utils/toolDiscoveryCache.cjs');
29
+ var pruneCalibration = require('../utils/pruneCalibration.cjs');
28
30
  var providers = require('../llm/providers.cjs');
29
31
  var ToolNode = require('../tools/ToolNode.cjs');
30
32
  var index = require('../llm/openai/index.cjs');
@@ -93,6 +95,22 @@ class StandardGraph extends Graph {
93
95
  runId;
94
96
  startIndex = 0;
95
97
  signal;
98
+ /** Cached summary from the first prune in this run.
99
+ * Reused for subsequent prunes to avoid blocking LLM calls on every tool iteration. */
100
+ _cachedRunSummary;
101
+ /** EMA-based pruning calibration state — smooths token budget adjustments across iterations */
102
+ _pruneCalibration;
103
+ /** Run-scoped tool discovery cache — avoids re-parsing conversation history on every iteration */
104
+ _toolDiscoveryCache;
105
+ /**
106
+ * SCALE: Tracks whether a summary call is already in-flight for this Graph instance.
107
+ * Prevents multiple concurrent summary LLM calls when rapid tool iterations each
108
+ * trigger pruning. At 2000 users with 3+ tool calls per turn, this prevents
109
+ * 6000+ summary calls/turn from becoming 2000.
110
+ */
111
+ _summaryInFlight = false;
112
+ /** Messages accumulated across tool iterations while a summary call is in-flight */
113
+ _pendingMessagesToRefine = [];
96
114
  /** Map of agent contexts by agent ID */
97
115
  agentContexts = new Map();
98
116
  /** Default agent ID to use */
@@ -113,6 +131,19 @@ class StandardGraph extends Graph {
113
131
  this.agentContexts.set(agentConfig.agentId, agentContext);
114
132
  }
115
133
  this.defaultAgentId = agents[0].agentId;
134
+ // Seed cached summary from persisted storage so the first prune in a
135
+ // resumed conversation can also skip the synchronous LLM summarization call
136
+ const primaryContext = this.agentContexts.get(this.defaultAgentId);
137
+ if (primaryContext?.persistedSummary) {
138
+ this._cachedRunSummary = primaryContext.persistedSummary;
139
+ }
140
+ // Initialize EMA pruning calibration
141
+ this._pruneCalibration = pruneCalibration.createPruneCalibration();
142
+ // Initialize tool discovery cache, seeded with any pre-existing discoveries
143
+ this._toolDiscoveryCache = new toolDiscoveryCache.ToolDiscoveryCache();
144
+ if (primaryContext?.discoveredToolNames.size) {
145
+ this._toolDiscoveryCache.seed([...primaryContext.discoveredToolNames]);
146
+ }
116
147
  }
117
148
  /* Init */
118
149
  resetValues(keepContent) {
@@ -135,6 +166,11 @@ class StandardGraph extends Graph {
135
166
  this.messageStepHasToolCalls = graph.resetIfNotEmpty(this.messageStepHasToolCalls, new Map());
136
167
  this.prelimMessageIdsByStepKey = graph.resetIfNotEmpty(this.prelimMessageIdsByStepKey, new Map());
137
168
  this.invokedToolIds = graph.resetIfNotEmpty(this.invokedToolIds, undefined);
169
+ // Reset EMA calibration, tool discovery cache, and summary debounce for fresh run
170
+ this._pruneCalibration = pruneCalibration.createPruneCalibration();
171
+ this._toolDiscoveryCache.reset();
172
+ this._summaryInFlight = false;
173
+ this._pendingMessagesToRefine = [];
138
174
  for (const context of this.agentContexts.values()) {
139
175
  context.reset();
140
176
  }
@@ -223,6 +259,62 @@ class StandardGraph extends Graph {
223
259
  }
224
260
  return clientOptions;
225
261
  }
262
+ /**
263
+ * Determines whether summarization should trigger based on SummarizationConfig.
264
+ *
265
+ * Supports three trigger strategies:
266
+ * - contextPercentage (default): Trigger when context utilization >= threshold%
267
+ * - messageCount: Trigger when pruned message count >= threshold
268
+ * - tokenThreshold: Trigger when total estimated tokens >= threshold
269
+ *
270
+ * When no config is provided, always triggers (preserves backward compatibility).
271
+ *
272
+ * @param prunedMessageCount - Number of messages that were pruned
273
+ * @param maxContextTokens - Maximum context token budget
274
+ * @param indexTokenCountMap - Token count map by message index
275
+ * @param instructionTokens - Token count for instructions/system message
276
+ * @param config - Optional SummarizationConfig
277
+ * @returns Whether summarization should be triggered
278
+ */
279
+ shouldTriggerSummarization(prunedMessageCount, maxContextTokens, indexTokenCountMap, instructionTokens, config) {
280
+ // No pruned messages means nothing to summarize
281
+ if (prunedMessageCount === 0) {
282
+ return false;
283
+ }
284
+ // No config = backward compatible (always summarize when messages are pruned)
285
+ if (!config || !config.triggerType) {
286
+ return true;
287
+ }
288
+ const threshold = config.triggerThreshold;
289
+ switch (config.triggerType) {
290
+ case 'contextPercentage': {
291
+ if (maxContextTokens <= 0)
292
+ return true;
293
+ const effectiveThreshold = threshold ?? constants.SUMMARIZATION_CONTEXT_THRESHOLD;
294
+ let totalTokens = instructionTokens;
295
+ for (const key in indexTokenCountMap) {
296
+ totalTokens += indexTokenCountMap[key] ?? 0;
297
+ }
298
+ const utilization = (totalTokens / maxContextTokens) * 100;
299
+ return utilization >= effectiveThreshold;
300
+ }
301
+ case 'messageCount': {
302
+ const effectiveThreshold = threshold ?? 5;
303
+ return prunedMessageCount >= effectiveThreshold;
304
+ }
305
+ case 'tokenThreshold': {
306
+ if (threshold == null)
307
+ return true;
308
+ let totalTokens = instructionTokens;
309
+ for (const key in indexTokenCountMap) {
310
+ totalTokens += indexTokenCountMap[key] ?? 0;
311
+ }
312
+ return totalTokens >= threshold;
313
+ }
314
+ default:
315
+ return true;
316
+ }
317
+ }
226
318
  /**
227
319
  * Returns the normalized finish/stop reason from the last LLM invocation.
228
320
  * Used by callers to detect when the response was truncated due to max_tokens.
@@ -361,7 +453,6 @@ class StandardGraph extends Graph {
361
453
  /* Misc.*/
362
454
  getRunMessages() {
363
455
  const result = this.messages.slice(this.startIndex);
364
- console.debug(`[Graph] getRunMessages() | totalMessages=${this.messages.length} | startIndex=${this.startIndex} | runMessages=${result.length}`);
365
456
  return result;
366
457
  }
367
458
  getContentParts() {
@@ -917,10 +1008,12 @@ class StandardGraph extends Graph {
917
1008
  });
918
1009
  messages$1 = [dynamicContextMessage, ackMessage, ...messages$1];
919
1010
  }
920
- // Extract tool discoveries from current turn only (similar to formatArtifactPayload pattern)
921
- const discoveredNames = tools.extractToolDiscoveries(messages$1);
922
- if (discoveredNames.length > 0) {
923
- agentContext.markToolsAsDiscovered(discoveredNames);
1011
+ // Tool discovery caching: only scan new messages since last iteration
1012
+ // instead of re-parsing the full history via extractToolDiscoveries()
1013
+ const cachedDiscoveries = this._toolDiscoveryCache.getNewDiscoveries(messages$1);
1014
+ if (cachedDiscoveries.length > 0) {
1015
+ agentContext.markToolsAsDiscovered(cachedDiscoveries);
1016
+ console.debug(`[Graph:ToolDiscovery] Cached ${cachedDiscoveries.length} new tools (total: ${this._toolDiscoveryCache.size})`);
924
1017
  }
925
1018
  const toolsForBinding = agentContext.getToolsForBinding();
926
1019
  // PERF: Detect subsequent ReAct iterations (tool results present in messages)
@@ -970,56 +1063,136 @@ class StandardGraph extends Graph {
970
1063
  (agentContext.provider === _enum.Providers.OPENAI &&
971
1064
  agentContext.clientOptions.modelKwargs
972
1065
  ?.thinking?.type === 'enabled');
1066
+ // Apply EMA calibration to max token budget — smooths pruning across iterations
1067
+ const calibratedMaxTokens = pruneCalibration.applyCalibration(agentContext.maxContextTokens, this._pruneCalibration);
973
1068
  agentContext.pruneMessages = prune.createPruneMessages({
974
1069
  startIndex: this.startIndex,
975
1070
  provider: agentContext.provider,
976
1071
  tokenCounter: agentContext.tokenCounter,
977
- maxTokens: agentContext.maxContextTokens,
1072
+ maxTokens: calibratedMaxTokens,
978
1073
  thinkingEnabled: isAnthropicWithThinking,
979
1074
  indexTokenCountMap: agentContext.indexTokenCountMap,
980
1075
  });
981
1076
  }
1077
+ // Update EMA calibration with actual token usage from API response
1078
+ if (agentContext.currentUsage?.input_tokens &&
1079
+ agentContext.maxContextTokens) {
1080
+ const estimatedTokens = Object.values(agentContext.indexTokenCountMap).reduce((sum, v) => (sum ?? 0) + (v ?? 0), 0);
1081
+ if (estimatedTokens > 0) {
1082
+ this._pruneCalibration = pruneCalibration.updatePruneCalibration(this._pruneCalibration, agentContext.currentUsage.input_tokens, estimatedTokens);
1083
+ }
1084
+ }
982
1085
  if (agentContext.pruneMessages) {
983
- console.debug(`[Graph:ContextMgmt] Pruning messages | inputCount=${messages$1.length} | maxTokens=${agentContext.maxContextTokens}`);
984
1086
  const { context, indexTokenCountMap, messagesToRefine } = agentContext.pruneMessages({
985
1087
  messages: messages$1,
986
1088
  usageMetadata: agentContext.currentUsage,
987
- // startOnMessageType: 'human',
988
1089
  });
989
1090
  agentContext.indexTokenCountMap = indexTokenCountMap;
990
1091
  messagesToUse = context;
991
- console.debug(`[Graph:ContextMgmt] Pruned | kept=${context.length} | discarded=${messagesToRefine.length} | originalCount=${messages$1.length}`);
992
- // Summarize discarded messages if callback provided
1092
+ // ── Non-blocking summarization ──────────────────────────────────
1093
+ // NEVER block the LLM call waiting for summarization. Instead:
1094
+ // 1. If _cachedRunSummary exists → use it, fire async update
1095
+ // 2. If persistedSummary exists → use it as fallback, fire async update
1096
+ // 3. If NOTHING exists (first-ever prune) → skip summary, fire async generation
1097
+ // The summary catches up asynchronously and is available for subsequent
1098
+ // iterations (tool calls) and the next conversation turn.
1099
+ //
1100
+ // SummarizationConfig integration:
1101
+ // - triggerType/triggerThreshold control WHEN summarization fires
1102
+ // - reserveRatio is enforced via calibrated maxTokens (above)
1103
+ // - initialSummary provides cross-run seeding as fallback before persistedSummary
993
1104
  let hasSummary = false;
994
- if (messagesToRefine.length > 0 && agentContext.summarizeCallback) {
995
- console.debug(`[Graph:ContextMgmt] Summarizing ${messagesToRefine.length} discarded messages`);
1105
+ const sumConfig = agentContext.summarizationConfig;
1106
+ const shouldSummarize = this.shouldTriggerSummarization(messagesToRefine.length, agentContext.maxContextTokens ?? 0, agentContext.indexTokenCountMap, agentContext.instructionTokens, sumConfig);
1107
+ if (messagesToRefine.length > 0 &&
1108
+ agentContext.summarizeCallback &&
1109
+ shouldSummarize) {
996
1110
  try {
997
- const summary = await agentContext.summarizeCallback(messagesToRefine);
998
- console.debug(`[Graph:ContextMgmt] Summary received | len=${summary?.length ?? 0} | hasContent=${summary != null && summary !== ''}`);
1111
+ let summary;
1112
+ let summarySource;
1113
+ if (this._cachedRunSummary != null) {
1114
+ summary = this._cachedRunSummary;
1115
+ summarySource = 'cached';
1116
+ }
1117
+ else if (agentContext.persistedSummary != null &&
1118
+ agentContext.persistedSummary !== '') {
1119
+ summary = agentContext.persistedSummary;
1120
+ this._cachedRunSummary = summary;
1121
+ summarySource = 'persisted';
1122
+ }
1123
+ else if (sumConfig?.initialSummary != null &&
1124
+ sumConfig.initialSummary !== '') {
1125
+ // Cross-run seed: use initialSummary when no persisted summary exists
1126
+ summary = sumConfig.initialSummary;
1127
+ this._cachedRunSummary = summary;
1128
+ summarySource = 'initial-seed';
1129
+ }
1130
+ else {
1131
+ summarySource = 'none';
1132
+ }
1133
+ // Single consolidated log for the entire prune+summarize decision
1134
+ console.debug(`[Graph:ContextMgmt] Pruned ${messages$1.length}→${context.length} msgs (${messagesToRefine.length} discarded) | summary=${summarySource}${summary ? ` (len=${summary.length})` : ''} | calibration=${this._pruneCalibration.ratio.toFixed(3)}(${this._pruneCalibration.iterations})`);
1135
+ // SCALE: Debounce background summarization — if a summary call is already
1136
+ // in-flight (from a prior tool iteration), accumulate messages instead of
1137
+ // firing another concurrent LLM call. At 2000 users with 3+ tool calls
1138
+ // per turn, this prevents 3x summary call volume.
1139
+ if (this._summaryInFlight) {
1140
+ this._pendingMessagesToRefine.push(...messagesToRefine);
1141
+ console.debug(`[Graph:ContextMgmt] Summary in-flight, queued ${messagesToRefine.length} msgs (pending=${this._pendingMessagesToRefine.length})`);
1142
+ }
1143
+ else {
1144
+ this._summaryInFlight = true;
1145
+ const allMessages = this._pendingMessagesToRefine.length > 0
1146
+ ? [...this._pendingMessagesToRefine, ...messagesToRefine]
1147
+ : messagesToRefine;
1148
+ this._pendingMessagesToRefine = [];
1149
+ agentContext
1150
+ .summarizeCallback(allMessages)
1151
+ .then((updated) => {
1152
+ if (updated != null && updated !== '') {
1153
+ this._cachedRunSummary = updated;
1154
+ }
1155
+ })
1156
+ .catch((err) => {
1157
+ console.error('[Graph] Background summary failed (non-fatal):', err);
1158
+ })
1159
+ .finally(() => {
1160
+ this._summaryInFlight = false;
1161
+ });
1162
+ }
999
1163
  if (summary != null && summary !== '') {
1000
1164
  hasSummary = true;
1001
1165
  const summaryMsg = new messages.SystemMessage(`[Conversation Summary]\n${summary}`);
1002
- // Insert after system message (if present), before conversation messages
1003
1166
  const systemIdx = messagesToUse[0]?.getType() === 'system' ? 1 : 0;
1004
1167
  messagesToUse = [
1005
1168
  ...messagesToUse.slice(0, systemIdx),
1006
1169
  summaryMsg,
1007
1170
  ...messagesToUse.slice(systemIdx),
1008
1171
  ];
1009
- console.debug(`[Graph:ContextMgmt] Summary injected at index ${systemIdx} | finalMsgCount=${messagesToUse.length}`);
1010
1172
  }
1011
1173
  }
1012
1174
  catch (err) {
1013
- console.error('[Graph] Summarization callback failed:', err);
1175
+ console.error('[Graph] Summarization failed:', err);
1014
1176
  }
1015
1177
  }
1016
- // Post-prune context note: inform the LLM that context was compressed
1017
- // without exposing token numbers (prevents voluntary bail-out)
1178
+ else if (messagesToRefine.length > 0) {
1179
+ // Log pruning even when no summarize callback (discard mode)
1180
+ console.debug(`[Graph:ContextMgmt] Pruned ${messages$1.length}→${context.length} msgs (${messagesToRefine.length} discarded, no summary callback) | calibration=${this._pruneCalibration.ratio.toFixed(3)}`);
1181
+ }
1182
+ // Deduplicate system messages that accumulate from repeated tool iterations
1183
+ const { messages: dedupedMessages, removedCount } = dedup.deduplicateSystemMessages(messagesToUse);
1184
+ if (removedCount > 0) {
1185
+ messagesToUse = dedupedMessages;
1186
+ console.debug(`[Graph:Dedup] Removed ${removedCount} duplicate system message(s)`);
1187
+ }
1188
+ // Post-prune context note for task-tool-enabled agents
1018
1189
  if (messagesToRefine.length > 0 && contextPressure.hasTaskTool(agentContext.tools)) {
1019
1190
  const postPruneNote = contextPressure.buildPostPruneNote(messagesToRefine.length, hasSummary);
1020
1191
  if (postPruneNote) {
1021
- messagesToUse = [...messagesToUse, new messages.SystemMessage(postPruneNote)];
1022
- console.debug(`[Graph:ContextMgmt] Post-prune note injected | hasSummary=${hasSummary} | discarded=${messagesToRefine.length}`);
1192
+ messagesToUse = [
1193
+ ...messagesToUse,
1194
+ new messages.SystemMessage(postPruneNote),
1195
+ ];
1023
1196
  }
1024
1197
  }
1025
1198
  }
@@ -1143,11 +1316,6 @@ class StandardGraph extends Graph {
1143
1316
  // ====================================================================
1144
1317
  if (contextPressure.hasTaskTool(agentContext.tools)) {
1145
1318
  const { count: documentCount, names: documentNames } = contextPressure.detectDocuments(finalMessages);
1146
- // Observability log (no token numbers exposed to LLM)
1147
- if (contextAnalytics$1.utilizationPercent != null) {
1148
- console.debug(`[Graph] Context utilization: ${contextAnalytics$1.utilizationPercent.toFixed(1)}% | ` +
1149
- `messages: ${finalMessages.length} | docs: ${documentCount}`);
1150
- }
1151
1319
  // Multi-document delegation: first iteration only (before AI has responded)
1152
1320
  const hasAiResponse = finalMessages.some((m) => m._getType() === 'ai' || m._getType() === 'tool');
1153
1321
  if (contextPressure.shouldInjectMultiDocHint(documentCount, hasAiResponse)) {
@@ -1551,10 +1719,6 @@ If I seem to be missing something we discussed earlier, just give me a quick rem
1551
1719
  reducer: (a, b) => {
1552
1720
  if (!a.length) {
1553
1721
  this.startIndex = a.length + b.length;
1554
- console.debug(`[Graph:Reducer] Initial messages | startIndex=${this.startIndex} | inputMsgCount=${b.length}`);
1555
- }
1556
- else {
1557
- console.debug(`[Graph:Reducer] Appending messages | existing=${a.length} | new=${b.length} | startIndex=${this.startIndex}`);
1558
1722
  }
1559
1723
  const result = langgraph.messagesStateReducer(a, b);
1560
1724
  this.messages = result;