@illuma-ai/agents 1.0.98 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. package/dist/cjs/agents/AgentContext.cjs +6 -2
  2. package/dist/cjs/agents/AgentContext.cjs.map +1 -1
  3. package/dist/cjs/common/constants.cjs +53 -0
  4. package/dist/cjs/common/constants.cjs.map +1 -1
  5. package/dist/cjs/graphs/Graph.cjs +167 -31
  6. package/dist/cjs/graphs/Graph.cjs.map +1 -1
  7. package/dist/cjs/main.cjs +14 -0
  8. package/dist/cjs/main.cjs.map +1 -1
  9. package/dist/cjs/messages/dedup.cjs +95 -0
  10. package/dist/cjs/messages/dedup.cjs.map +1 -0
  11. package/dist/cjs/tools/CodeExecutor.cjs +22 -3
  12. package/dist/cjs/tools/CodeExecutor.cjs.map +1 -1
  13. package/dist/cjs/types/graph.cjs.map +1 -1
  14. package/dist/cjs/utils/pruneCalibration.cjs +78 -0
  15. package/dist/cjs/utils/pruneCalibration.cjs.map +1 -0
  16. package/dist/cjs/utils/run.cjs.map +1 -1
  17. package/dist/cjs/utils/tokens.cjs.map +1 -1
  18. package/dist/cjs/utils/toolDiscoveryCache.cjs +127 -0
  19. package/dist/cjs/utils/toolDiscoveryCache.cjs.map +1 -0
  20. package/dist/esm/agents/AgentContext.mjs +6 -2
  21. package/dist/esm/agents/AgentContext.mjs.map +1 -1
  22. package/dist/esm/common/constants.mjs +48 -1
  23. package/dist/esm/common/constants.mjs.map +1 -1
  24. package/dist/esm/graphs/Graph.mjs +168 -32
  25. package/dist/esm/graphs/Graph.mjs.map +1 -1
  26. package/dist/esm/main.mjs +4 -1
  27. package/dist/esm/main.mjs.map +1 -1
  28. package/dist/esm/messages/dedup.mjs +93 -0
  29. package/dist/esm/messages/dedup.mjs.map +1 -0
  30. package/dist/esm/tools/CodeExecutor.mjs +22 -3
  31. package/dist/esm/tools/CodeExecutor.mjs.map +1 -1
  32. package/dist/esm/types/graph.mjs.map +1 -1
  33. package/dist/esm/utils/pruneCalibration.mjs +74 -0
  34. package/dist/esm/utils/pruneCalibration.mjs.map +1 -0
  35. package/dist/esm/utils/run.mjs.map +1 -1
  36. package/dist/esm/utils/tokens.mjs.map +1 -1
  37. package/dist/esm/utils/toolDiscoveryCache.mjs +125 -0
  38. package/dist/esm/utils/toolDiscoveryCache.mjs.map +1 -0
  39. package/dist/types/agents/AgentContext.d.ts +4 -1
  40. package/dist/types/common/constants.d.ts +35 -0
  41. package/dist/types/graphs/Graph.d.ts +25 -0
  42. package/dist/types/messages/dedup.d.ts +25 -0
  43. package/dist/types/messages/index.d.ts +1 -0
  44. package/dist/types/types/graph.d.ts +63 -0
  45. package/dist/types/utils/index.d.ts +2 -0
  46. package/dist/types/utils/pruneCalibration.d.ts +43 -0
  47. package/dist/types/utils/toolDiscoveryCache.d.ts +77 -0
  48. package/package.json +1 -1
  49. package/src/agents/AgentContext.ts +7 -0
  50. package/src/common/constants.ts +56 -0
  51. package/src/graphs/Graph.ts +220 -50
  52. package/src/graphs/gapFeatures.test.ts +520 -0
  53. package/src/graphs/nonBlockingSummarization.test.ts +307 -0
  54. package/src/messages/__tests__/dedup.test.ts +166 -0
  55. package/src/messages/dedup.ts +104 -0
  56. package/src/messages/index.ts +1 -0
  57. package/src/tools/CodeExecutor.ts +22 -3
  58. package/src/types/graph.ts +73 -0
  59. package/src/utils/__tests__/pruneCalibration.test.ts +148 -0
  60. package/src/utils/__tests__/toolDiscoveryCache.test.ts +214 -0
  61. package/src/utils/contextPressure.test.ts +24 -9
  62. package/src/utils/index.ts +2 -0
  63. package/src/utils/pruneCalibration.ts +92 -0
  64. package/src/utils/run.ts +108 -108
  65. package/src/utils/tokens.ts +118 -118
  66. package/src/utils/toolDiscoveryCache.ts +150 -0
@@ -12,9 +12,9 @@ var prune = require('../messages/prune.cjs');
12
12
  var format = require('../messages/format.cjs');
13
13
  var cache = require('../messages/cache.cjs');
14
14
  var content = require('../messages/content.cjs');
15
- var tools = require('../messages/tools.cjs');
16
15
  var _enum = require('../common/enum.cjs');
17
16
  var constants = require('../common/constants.cjs');
17
+ var dedup = require('../messages/dedup.cjs');
18
18
  var graph = require('../utils/graph.cjs');
19
19
  var llm = require('../utils/llm.cjs');
20
20
  var stream = require('../stream.cjs');
@@ -25,6 +25,8 @@ require('../utils/toonFormat.cjs');
25
25
  var contextAnalytics = require('../utils/contextAnalytics.cjs');
26
26
  require('zod-to-json-schema');
27
27
  var contextPressure = require('../utils/contextPressure.cjs');
28
+ var toolDiscoveryCache = require('../utils/toolDiscoveryCache.cjs');
29
+ var pruneCalibration = require('../utils/pruneCalibration.cjs');
28
30
  var providers = require('../llm/providers.cjs');
29
31
  var ToolNode = require('../tools/ToolNode.cjs');
30
32
  var index = require('../llm/openai/index.cjs');
@@ -93,6 +95,13 @@ class StandardGraph extends Graph {
93
95
  runId;
94
96
  startIndex = 0;
95
97
  signal;
98
+ /** Cached summary from the first prune in this run.
99
+ * Reused for subsequent prunes to avoid blocking LLM calls on every tool iteration. */
100
+ _cachedRunSummary;
101
+ /** EMA-based pruning calibration state — smooths token budget adjustments across iterations */
102
+ _pruneCalibration;
103
+ /** Run-scoped tool discovery cache — avoids re-parsing conversation history on every iteration */
104
+ _toolDiscoveryCache;
96
105
  /** Map of agent contexts by agent ID */
97
106
  agentContexts = new Map();
98
107
  /** Default agent ID to use */
@@ -113,6 +122,19 @@ class StandardGraph extends Graph {
113
122
  this.agentContexts.set(agentConfig.agentId, agentContext);
114
123
  }
115
124
  this.defaultAgentId = agents[0].agentId;
125
+ // Seed cached summary from persisted storage so the first prune in a
126
+ // resumed conversation can also skip the synchronous LLM summarization call
127
+ const primaryContext = this.agentContexts.get(this.defaultAgentId);
128
+ if (primaryContext?.persistedSummary) {
129
+ this._cachedRunSummary = primaryContext.persistedSummary;
130
+ }
131
+ // Initialize EMA pruning calibration
132
+ this._pruneCalibration = pruneCalibration.createPruneCalibration();
133
+ // Initialize tool discovery cache, seeded with any pre-existing discoveries
134
+ this._toolDiscoveryCache = new toolDiscoveryCache.ToolDiscoveryCache();
135
+ if (primaryContext?.discoveredToolNames.size) {
136
+ this._toolDiscoveryCache.seed([...primaryContext.discoveredToolNames]);
137
+ }
116
138
  }
117
139
  /* Init */
118
140
  resetValues(keepContent) {
@@ -135,6 +157,9 @@ class StandardGraph extends Graph {
135
157
  this.messageStepHasToolCalls = graph.resetIfNotEmpty(this.messageStepHasToolCalls, new Map());
136
158
  this.prelimMessageIdsByStepKey = graph.resetIfNotEmpty(this.prelimMessageIdsByStepKey, new Map());
137
159
  this.invokedToolIds = graph.resetIfNotEmpty(this.invokedToolIds, undefined);
160
+ // Reset EMA calibration and tool discovery cache for fresh run
161
+ this._pruneCalibration = pruneCalibration.createPruneCalibration();
162
+ this._toolDiscoveryCache.reset();
138
163
  for (const context of this.agentContexts.values()) {
139
164
  context.reset();
140
165
  }
@@ -223,6 +248,62 @@ class StandardGraph extends Graph {
223
248
  }
224
249
  return clientOptions;
225
250
  }
251
+ /**
252
+ * Determines whether summarization should trigger based on SummarizationConfig.
253
+ *
254
+ * Supports three trigger strategies:
255
+ * - contextPercentage (default): Trigger when context utilization >= threshold%
256
+ * - messageCount: Trigger when pruned message count >= threshold
257
+ * - tokenThreshold: Trigger when total estimated tokens >= threshold
258
+ *
259
+ * When no config is provided, always triggers (preserves backward compatibility).
260
+ *
261
+ * @param prunedMessageCount - Number of messages that were pruned
262
+ * @param maxContextTokens - Maximum context token budget
263
+ * @param indexTokenCountMap - Token count map by message index
264
+ * @param instructionTokens - Token count for instructions/system message
265
+ * @param config - Optional SummarizationConfig
266
+ * @returns Whether summarization should be triggered
267
+ */
268
+ shouldTriggerSummarization(prunedMessageCount, maxContextTokens, indexTokenCountMap, instructionTokens, config) {
269
+ // No pruned messages means nothing to summarize
270
+ if (prunedMessageCount === 0) {
271
+ return false;
272
+ }
273
+ // No config = backward compatible (always summarize when messages are pruned)
274
+ if (!config || !config.triggerType) {
275
+ return true;
276
+ }
277
+ const threshold = config.triggerThreshold;
278
+ switch (config.triggerType) {
279
+ case 'contextPercentage': {
280
+ if (maxContextTokens <= 0)
281
+ return true;
282
+ const effectiveThreshold = threshold ?? constants.SUMMARIZATION_CONTEXT_THRESHOLD;
283
+ let totalTokens = instructionTokens;
284
+ for (const key in indexTokenCountMap) {
285
+ totalTokens += indexTokenCountMap[key] ?? 0;
286
+ }
287
+ const utilization = (totalTokens / maxContextTokens) * 100;
288
+ return utilization >= effectiveThreshold;
289
+ }
290
+ case 'messageCount': {
291
+ const effectiveThreshold = threshold ?? 5;
292
+ return prunedMessageCount >= effectiveThreshold;
293
+ }
294
+ case 'tokenThreshold': {
295
+ if (threshold == null)
296
+ return true;
297
+ let totalTokens = instructionTokens;
298
+ for (const key in indexTokenCountMap) {
299
+ totalTokens += indexTokenCountMap[key] ?? 0;
300
+ }
301
+ return totalTokens >= threshold;
302
+ }
303
+ default:
304
+ return true;
305
+ }
306
+ }
226
307
  /**
227
308
  * Returns the normalized finish/stop reason from the last LLM invocation.
228
309
  * Used by callers to detect when the response was truncated due to max_tokens.
@@ -361,7 +442,6 @@ class StandardGraph extends Graph {
361
442
  /* Misc.*/
362
443
  getRunMessages() {
363
444
  const result = this.messages.slice(this.startIndex);
364
- console.debug(`[Graph] getRunMessages() | totalMessages=${this.messages.length} | startIndex=${this.startIndex} | runMessages=${result.length}`);
365
445
  return result;
366
446
  }
367
447
  getContentParts() {
@@ -917,10 +997,12 @@ class StandardGraph extends Graph {
917
997
  });
918
998
  messages$1 = [dynamicContextMessage, ackMessage, ...messages$1];
919
999
  }
920
- // Extract tool discoveries from current turn only (similar to formatArtifactPayload pattern)
921
- const discoveredNames = tools.extractToolDiscoveries(messages$1);
922
- if (discoveredNames.length > 0) {
923
- agentContext.markToolsAsDiscovered(discoveredNames);
1000
+ // Tool discovery caching: only scan new messages since last iteration
1001
+ // instead of re-parsing the full history via extractToolDiscoveries()
1002
+ const cachedDiscoveries = this._toolDiscoveryCache.getNewDiscoveries(messages$1);
1003
+ if (cachedDiscoveries.length > 0) {
1004
+ agentContext.markToolsAsDiscovered(cachedDiscoveries);
1005
+ console.debug(`[Graph:ToolDiscovery] Cached ${cachedDiscoveries.length} new tools (total: ${this._toolDiscoveryCache.size})`);
924
1006
  }
925
1007
  const toolsForBinding = agentContext.getToolsForBinding();
926
1008
  // PERF: Detect subsequent ReAct iterations (tool results present in messages)
@@ -970,56 +1052,119 @@ class StandardGraph extends Graph {
970
1052
  (agentContext.provider === _enum.Providers.OPENAI &&
971
1053
  agentContext.clientOptions.modelKwargs
972
1054
  ?.thinking?.type === 'enabled');
1055
+ // Apply EMA calibration to max token budget — smooths pruning across iterations
1056
+ const calibratedMaxTokens = pruneCalibration.applyCalibration(agentContext.maxContextTokens, this._pruneCalibration);
973
1057
  agentContext.pruneMessages = prune.createPruneMessages({
974
1058
  startIndex: this.startIndex,
975
1059
  provider: agentContext.provider,
976
1060
  tokenCounter: agentContext.tokenCounter,
977
- maxTokens: agentContext.maxContextTokens,
1061
+ maxTokens: calibratedMaxTokens,
978
1062
  thinkingEnabled: isAnthropicWithThinking,
979
1063
  indexTokenCountMap: agentContext.indexTokenCountMap,
980
1064
  });
981
1065
  }
1066
+ // Update EMA calibration with actual token usage from API response
1067
+ if (agentContext.currentUsage?.input_tokens &&
1068
+ agentContext.maxContextTokens) {
1069
+ const estimatedTokens = Object.values(agentContext.indexTokenCountMap).reduce((sum, v) => (sum ?? 0) + (v ?? 0), 0);
1070
+ if (estimatedTokens > 0) {
1071
+ this._pruneCalibration = pruneCalibration.updatePruneCalibration(this._pruneCalibration, agentContext.currentUsage.input_tokens, estimatedTokens);
1072
+ }
1073
+ }
982
1074
  if (agentContext.pruneMessages) {
983
- console.debug(`[Graph:ContextMgmt] Pruning messages | inputCount=${messages$1.length} | maxTokens=${agentContext.maxContextTokens}`);
984
1075
  const { context, indexTokenCountMap, messagesToRefine } = agentContext.pruneMessages({
985
1076
  messages: messages$1,
986
1077
  usageMetadata: agentContext.currentUsage,
987
- // startOnMessageType: 'human',
988
1078
  });
989
1079
  agentContext.indexTokenCountMap = indexTokenCountMap;
990
1080
  messagesToUse = context;
991
- console.debug(`[Graph:ContextMgmt] Pruned | kept=${context.length} | discarded=${messagesToRefine.length} | originalCount=${messages$1.length}`);
992
- // Summarize discarded messages if callback provided
1081
+ // ── Non-blocking summarization ──────────────────────────────────
1082
+ // NEVER block the LLM call waiting for summarization. Instead:
1083
+ // 1. If _cachedRunSummary exists → use it, fire async update
1084
+ // 2. If persistedSummary exists → use it as fallback, fire async update
1085
+ // 3. If NOTHING exists (first-ever prune) → skip summary, fire async generation
1086
+ // The summary catches up asynchronously and is available for subsequent
1087
+ // iterations (tool calls) and the next conversation turn.
1088
+ //
1089
+ // SummarizationConfig integration:
1090
+ // - triggerType/triggerThreshold control WHEN summarization fires
1091
+ // - reserveRatio is enforced via calibrated maxTokens (above)
1092
+ // - initialSummary provides cross-run seeding as fallback before persistedSummary
993
1093
  let hasSummary = false;
994
- if (messagesToRefine.length > 0 && agentContext.summarizeCallback) {
995
- console.debug(`[Graph:ContextMgmt] Summarizing ${messagesToRefine.length} discarded messages`);
1094
+ const sumConfig = agentContext.summarizationConfig;
1095
+ const shouldSummarize = this.shouldTriggerSummarization(messagesToRefine.length, agentContext.maxContextTokens ?? 0, agentContext.indexTokenCountMap, agentContext.instructionTokens, sumConfig);
1096
+ if (messagesToRefine.length > 0 &&
1097
+ agentContext.summarizeCallback &&
1098
+ shouldSummarize) {
996
1099
  try {
997
- const summary = await agentContext.summarizeCallback(messagesToRefine);
998
- console.debug(`[Graph:ContextMgmt] Summary received | len=${summary?.length ?? 0} | hasContent=${summary != null && summary !== ''}`);
1100
+ let summary;
1101
+ let summarySource;
1102
+ if (this._cachedRunSummary != null) {
1103
+ summary = this._cachedRunSummary;
1104
+ summarySource = 'cached';
1105
+ }
1106
+ else if (agentContext.persistedSummary != null &&
1107
+ agentContext.persistedSummary !== '') {
1108
+ summary = agentContext.persistedSummary;
1109
+ this._cachedRunSummary = summary;
1110
+ summarySource = 'persisted';
1111
+ }
1112
+ else if (sumConfig?.initialSummary != null &&
1113
+ sumConfig.initialSummary !== '') {
1114
+ // Cross-run seed: use initialSummary when no persisted summary exists
1115
+ summary = sumConfig.initialSummary;
1116
+ this._cachedRunSummary = summary;
1117
+ summarySource = 'initial-seed';
1118
+ }
1119
+ else {
1120
+ summarySource = 'none';
1121
+ }
1122
+ // Single consolidated log for the entire prune+summarize decision
1123
+ console.debug(`[Graph:ContextMgmt] Pruned ${messages$1.length}→${context.length} msgs (${messagesToRefine.length} discarded) | summary=${summarySource}${summary ? ` (len=${summary.length})` : ''} | calibration=${this._pruneCalibration.ratio.toFixed(3)}(${this._pruneCalibration.iterations})`);
1124
+ // Fire background summarization — updates cache for next iteration/turn
1125
+ agentContext
1126
+ .summarizeCallback(messagesToRefine)
1127
+ .then((updated) => {
1128
+ if (updated != null && updated !== '') {
1129
+ this._cachedRunSummary = updated;
1130
+ }
1131
+ })
1132
+ .catch((err) => {
1133
+ console.error('[Graph] Background summary failed (non-fatal):', err);
1134
+ });
999
1135
  if (summary != null && summary !== '') {
1000
1136
  hasSummary = true;
1001
1137
  const summaryMsg = new messages.SystemMessage(`[Conversation Summary]\n${summary}`);
1002
- // Insert after system message (if present), before conversation messages
1003
1138
  const systemIdx = messagesToUse[0]?.getType() === 'system' ? 1 : 0;
1004
1139
  messagesToUse = [
1005
1140
  ...messagesToUse.slice(0, systemIdx),
1006
1141
  summaryMsg,
1007
1142
  ...messagesToUse.slice(systemIdx),
1008
1143
  ];
1009
- console.debug(`[Graph:ContextMgmt] Summary injected at index ${systemIdx} | finalMsgCount=${messagesToUse.length}`);
1010
1144
  }
1011
1145
  }
1012
1146
  catch (err) {
1013
- console.error('[Graph] Summarization callback failed:', err);
1147
+ console.error('[Graph] Summarization failed:', err);
1014
1148
  }
1015
1149
  }
1016
- // Post-prune context note: inform the LLM that context was compressed
1017
- // without exposing token numbers (prevents voluntary bail-out)
1150
+ else if (messagesToRefine.length > 0) {
1151
+ // Log pruning even when no summarize callback (discard mode)
1152
+ console.debug(`[Graph:ContextMgmt] Pruned ${messages$1.length}→${context.length} msgs (${messagesToRefine.length} discarded, no summary callback) | calibration=${this._pruneCalibration.ratio.toFixed(3)}`);
1153
+ }
1154
+ // Deduplicate system messages that accumulate from repeated tool iterations
1155
+ const { messages: dedupedMessages, removedCount } = dedup.deduplicateSystemMessages(messagesToUse);
1156
+ if (removedCount > 0) {
1157
+ messagesToUse = dedupedMessages;
1158
+ console.debug(`[Graph:Dedup] Removed ${removedCount} duplicate system message(s)`);
1159
+ }
1160
+ // Post-prune context note for task-tool-enabled agents
1018
1161
  if (messagesToRefine.length > 0 && contextPressure.hasTaskTool(agentContext.tools)) {
1019
1162
  const postPruneNote = contextPressure.buildPostPruneNote(messagesToRefine.length, hasSummary);
1020
1163
  if (postPruneNote) {
1021
- messagesToUse = [...messagesToUse, new messages.SystemMessage(postPruneNote)];
1022
- console.debug(`[Graph:ContextMgmt] Post-prune note injected | hasSummary=${hasSummary} | discarded=${messagesToRefine.length}`);
1164
+ messagesToUse = [
1165
+ ...messagesToUse,
1166
+ new messages.SystemMessage(postPruneNote),
1167
+ ];
1023
1168
  }
1024
1169
  }
1025
1170
  }
@@ -1143,11 +1288,6 @@ class StandardGraph extends Graph {
1143
1288
  // ====================================================================
1144
1289
  if (contextPressure.hasTaskTool(agentContext.tools)) {
1145
1290
  const { count: documentCount, names: documentNames } = contextPressure.detectDocuments(finalMessages);
1146
- // Observability log (no token numbers exposed to LLM)
1147
- if (contextAnalytics$1.utilizationPercent != null) {
1148
- console.debug(`[Graph] Context utilization: ${contextAnalytics$1.utilizationPercent.toFixed(1)}% | ` +
1149
- `messages: ${finalMessages.length} | docs: ${documentCount}`);
1150
- }
1151
1291
  // Multi-document delegation: first iteration only (before AI has responded)
1152
1292
  const hasAiResponse = finalMessages.some((m) => m._getType() === 'ai' || m._getType() === 'tool');
1153
1293
  if (contextPressure.shouldInjectMultiDocHint(documentCount, hasAiResponse)) {
@@ -1551,10 +1691,6 @@ If I seem to be missing something we discussed earlier, just give me a quick rem
1551
1691
  reducer: (a, b) => {
1552
1692
  if (!a.length) {
1553
1693
  this.startIndex = a.length + b.length;
1554
- console.debug(`[Graph:Reducer] Initial messages | startIndex=${this.startIndex} | inputMsgCount=${b.length}`);
1555
- }
1556
- else {
1557
- console.debug(`[Graph:Reducer] Appending messages | existing=${a.length} | new=${b.length} | startIndex=${this.startIndex}`);
1558
1694
  }
1559
1695
  const result = langgraph.messagesStateReducer(a, b);
1560
1696
  this.messages = result;