@illuma-ai/agents 1.0.98 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. package/dist/cjs/agents/AgentContext.cjs +6 -2
  2. package/dist/cjs/agents/AgentContext.cjs.map +1 -1
  3. package/dist/cjs/common/constants.cjs +53 -0
  4. package/dist/cjs/common/constants.cjs.map +1 -1
  5. package/dist/cjs/graphs/Graph.cjs +167 -31
  6. package/dist/cjs/graphs/Graph.cjs.map +1 -1
  7. package/dist/cjs/main.cjs +14 -0
  8. package/dist/cjs/main.cjs.map +1 -1
  9. package/dist/cjs/messages/dedup.cjs +95 -0
  10. package/dist/cjs/messages/dedup.cjs.map +1 -0
  11. package/dist/cjs/tools/CodeExecutor.cjs +22 -3
  12. package/dist/cjs/tools/CodeExecutor.cjs.map +1 -1
  13. package/dist/cjs/types/graph.cjs.map +1 -1
  14. package/dist/cjs/utils/pruneCalibration.cjs +78 -0
  15. package/dist/cjs/utils/pruneCalibration.cjs.map +1 -0
  16. package/dist/cjs/utils/run.cjs.map +1 -1
  17. package/dist/cjs/utils/tokens.cjs.map +1 -1
  18. package/dist/cjs/utils/toolDiscoveryCache.cjs +127 -0
  19. package/dist/cjs/utils/toolDiscoveryCache.cjs.map +1 -0
  20. package/dist/esm/agents/AgentContext.mjs +6 -2
  21. package/dist/esm/agents/AgentContext.mjs.map +1 -1
  22. package/dist/esm/common/constants.mjs +48 -1
  23. package/dist/esm/common/constants.mjs.map +1 -1
  24. package/dist/esm/graphs/Graph.mjs +168 -32
  25. package/dist/esm/graphs/Graph.mjs.map +1 -1
  26. package/dist/esm/main.mjs +4 -1
  27. package/dist/esm/main.mjs.map +1 -1
  28. package/dist/esm/messages/dedup.mjs +93 -0
  29. package/dist/esm/messages/dedup.mjs.map +1 -0
  30. package/dist/esm/tools/CodeExecutor.mjs +22 -3
  31. package/dist/esm/tools/CodeExecutor.mjs.map +1 -1
  32. package/dist/esm/types/graph.mjs.map +1 -1
  33. package/dist/esm/utils/pruneCalibration.mjs +74 -0
  34. package/dist/esm/utils/pruneCalibration.mjs.map +1 -0
  35. package/dist/esm/utils/run.mjs.map +1 -1
  36. package/dist/esm/utils/tokens.mjs.map +1 -1
  37. package/dist/esm/utils/toolDiscoveryCache.mjs +125 -0
  38. package/dist/esm/utils/toolDiscoveryCache.mjs.map +1 -0
  39. package/dist/types/agents/AgentContext.d.ts +4 -1
  40. package/dist/types/common/constants.d.ts +35 -0
  41. package/dist/types/graphs/Graph.d.ts +25 -0
  42. package/dist/types/messages/dedup.d.ts +25 -0
  43. package/dist/types/messages/index.d.ts +1 -0
  44. package/dist/types/types/graph.d.ts +63 -0
  45. package/dist/types/utils/index.d.ts +2 -0
  46. package/dist/types/utils/pruneCalibration.d.ts +43 -0
  47. package/dist/types/utils/toolDiscoveryCache.d.ts +77 -0
  48. package/package.json +1 -1
  49. package/src/agents/AgentContext.ts +7 -0
  50. package/src/common/constants.ts +56 -0
  51. package/src/graphs/Graph.ts +220 -50
  52. package/src/graphs/gapFeatures.test.ts +520 -0
  53. package/src/graphs/nonBlockingSummarization.test.ts +307 -0
  54. package/src/messages/__tests__/dedup.test.ts +166 -0
  55. package/src/messages/dedup.ts +104 -0
  56. package/src/messages/index.ts +1 -0
  57. package/src/tools/CodeExecutor.ts +22 -3
  58. package/src/types/graph.ts +73 -0
  59. package/src/utils/__tests__/pruneCalibration.test.ts +148 -0
  60. package/src/utils/__tests__/toolDiscoveryCache.test.ts +214 -0
  61. package/src/utils/contextPressure.test.ts +24 -9
  62. package/src/utils/index.ts +2 -0
  63. package/src/utils/pruneCalibration.ts +92 -0
  64. package/src/utils/run.ts +108 -108
  65. package/src/utils/tokens.ts +118 -118
  66. package/src/utils/toolDiscoveryCache.ts +150 -0
@@ -10,9 +10,9 @@ import { createPruneMessages } from '../messages/prune.mjs';
10
10
  import { ensureThinkingBlockInMessages } from '../messages/format.mjs';
11
11
  import { addCacheControl, addBedrockCacheControl } from '../messages/cache.mjs';
12
12
  import { formatContentStrings } from '../messages/content.mjs';
13
- import { extractToolDiscoveries } from '../messages/tools.mjs';
14
13
  import { GraphNodeKeys, Providers, ContentTypes, GraphEvents, MessageTypes, StepTypes, Constants } from '../common/enum.mjs';
15
- import { TOOL_TURN_THINKING_BUDGET } from '../common/constants.mjs';
14
+ import { TOOL_TURN_THINKING_BUDGET, SUMMARIZATION_CONTEXT_THRESHOLD } from '../common/constants.mjs';
15
+ import { deduplicateSystemMessages } from '../messages/dedup.mjs';
16
16
  import { resetIfNotEmpty, joinKeys } from '../utils/graph.mjs';
17
17
  import { isOpenAILike, isGoogleLike } from '../utils/llm.mjs';
18
18
  import { ChatModelStreamHandler } from '../stream.mjs';
@@ -23,6 +23,8 @@ import '../utils/toonFormat.mjs';
23
23
  import { buildContextAnalytics } from '../utils/contextAnalytics.mjs';
24
24
  import 'zod-to-json-schema';
25
25
  import { hasTaskTool, buildPostPruneNote, detectDocuments, shouldInjectMultiDocHint, buildMultiDocHintContent } from '../utils/contextPressure.mjs';
26
+ import { ToolDiscoveryCache } from '../utils/toolDiscoveryCache.mjs';
27
+ import { createPruneCalibration, applyCalibration, updatePruneCalibration } from '../utils/pruneCalibration.mjs';
26
28
  import { getChatModelClass, manualToolStreamProviders } from '../llm/providers.mjs';
27
29
  import { ToolNode, toolsCondition } from '../tools/ToolNode.mjs';
28
30
  import { ChatOpenAI, AzureChatOpenAI } from '../llm/openai/index.mjs';
@@ -91,6 +93,13 @@ class StandardGraph extends Graph {
91
93
  runId;
92
94
  startIndex = 0;
93
95
  signal;
96
+ /** Cached summary from the first prune in this run.
97
+ * Reused for subsequent prunes to avoid blocking LLM calls on every tool iteration. */
98
+ _cachedRunSummary;
99
+ /** EMA-based pruning calibration state — smooths token budget adjustments across iterations */
100
+ _pruneCalibration;
101
+ /** Run-scoped tool discovery cache — avoids re-parsing conversation history on every iteration */
102
+ _toolDiscoveryCache;
94
103
  /** Map of agent contexts by agent ID */
95
104
  agentContexts = new Map();
96
105
  /** Default agent ID to use */
@@ -111,6 +120,19 @@ class StandardGraph extends Graph {
111
120
  this.agentContexts.set(agentConfig.agentId, agentContext);
112
121
  }
113
122
  this.defaultAgentId = agents[0].agentId;
123
+ // Seed cached summary from persisted storage so the first prune in a
124
+ // resumed conversation can also skip the synchronous LLM summarization call
125
+ const primaryContext = this.agentContexts.get(this.defaultAgentId);
126
+ if (primaryContext?.persistedSummary) {
127
+ this._cachedRunSummary = primaryContext.persistedSummary;
128
+ }
129
+ // Initialize EMA pruning calibration
130
+ this._pruneCalibration = createPruneCalibration();
131
+ // Initialize tool discovery cache, seeded with any pre-existing discoveries
132
+ this._toolDiscoveryCache = new ToolDiscoveryCache();
133
+ if (primaryContext?.discoveredToolNames.size) {
134
+ this._toolDiscoveryCache.seed([...primaryContext.discoveredToolNames]);
135
+ }
114
136
  }
115
137
  /* Init */
116
138
  resetValues(keepContent) {
@@ -133,6 +155,9 @@ class StandardGraph extends Graph {
133
155
  this.messageStepHasToolCalls = resetIfNotEmpty(this.messageStepHasToolCalls, new Map());
134
156
  this.prelimMessageIdsByStepKey = resetIfNotEmpty(this.prelimMessageIdsByStepKey, new Map());
135
157
  this.invokedToolIds = resetIfNotEmpty(this.invokedToolIds, undefined);
158
+ // Reset EMA calibration and tool discovery cache for fresh run
159
+ this._pruneCalibration = createPruneCalibration();
160
+ this._toolDiscoveryCache.reset();
136
161
  for (const context of this.agentContexts.values()) {
137
162
  context.reset();
138
163
  }
@@ -221,6 +246,62 @@ class StandardGraph extends Graph {
221
246
  }
222
247
  return clientOptions;
223
248
  }
249
+ /**
250
+ * Determines whether summarization should trigger based on SummarizationConfig.
251
+ *
252
+ * Supports three trigger strategies:
253
+ * - contextPercentage (default): Trigger when context utilization >= threshold%
254
+ * - messageCount: Trigger when pruned message count >= threshold
255
+ * - tokenThreshold: Trigger when total estimated tokens >= threshold
256
+ *
257
+ * When no config is provided, always triggers (preserves backward compatibility).
258
+ *
259
+ * @param prunedMessageCount - Number of messages that were pruned
260
+ * @param maxContextTokens - Maximum context token budget
261
+ * @param indexTokenCountMap - Token count map by message index
262
+ * @param instructionTokens - Token count for instructions/system message
263
+ * @param config - Optional SummarizationConfig
264
+ * @returns Whether summarization should be triggered
265
+ */
266
+ shouldTriggerSummarization(prunedMessageCount, maxContextTokens, indexTokenCountMap, instructionTokens, config) {
267
+ // No pruned messages means nothing to summarize
268
+ if (prunedMessageCount === 0) {
269
+ return false;
270
+ }
271
+ // No config = backward compatible (always summarize when messages are pruned)
272
+ if (!config || !config.triggerType) {
273
+ return true;
274
+ }
275
+ const threshold = config.triggerThreshold;
276
+ switch (config.triggerType) {
277
+ case 'contextPercentage': {
278
+ if (maxContextTokens <= 0)
279
+ return true;
280
+ const effectiveThreshold = threshold ?? SUMMARIZATION_CONTEXT_THRESHOLD;
281
+ let totalTokens = instructionTokens;
282
+ for (const key in indexTokenCountMap) {
283
+ totalTokens += indexTokenCountMap[key] ?? 0;
284
+ }
285
+ const utilization = (totalTokens / maxContextTokens) * 100;
286
+ return utilization >= effectiveThreshold;
287
+ }
288
+ case 'messageCount': {
289
+ const effectiveThreshold = threshold ?? 5;
290
+ return prunedMessageCount >= effectiveThreshold;
291
+ }
292
+ case 'tokenThreshold': {
293
+ if (threshold == null)
294
+ return true;
295
+ let totalTokens = instructionTokens;
296
+ for (const key in indexTokenCountMap) {
297
+ totalTokens += indexTokenCountMap[key] ?? 0;
298
+ }
299
+ return totalTokens >= threshold;
300
+ }
301
+ default:
302
+ return true;
303
+ }
304
+ }
224
305
  /**
225
306
  * Returns the normalized finish/stop reason from the last LLM invocation.
226
307
  * Used by callers to detect when the response was truncated due to max_tokens.
@@ -359,7 +440,6 @@ class StandardGraph extends Graph {
359
440
  /* Misc.*/
360
441
  getRunMessages() {
361
442
  const result = this.messages.slice(this.startIndex);
362
- console.debug(`[Graph] getRunMessages() | totalMessages=${this.messages.length} | startIndex=${this.startIndex} | runMessages=${result.length}`);
363
443
  return result;
364
444
  }
365
445
  getContentParts() {
@@ -915,10 +995,12 @@ class StandardGraph extends Graph {
915
995
  });
916
996
  messages = [dynamicContextMessage, ackMessage, ...messages];
917
997
  }
918
- // Extract tool discoveries from current turn only (similar to formatArtifactPayload pattern)
919
- const discoveredNames = extractToolDiscoveries(messages);
920
- if (discoveredNames.length > 0) {
921
- agentContext.markToolsAsDiscovered(discoveredNames);
998
+ // Tool discovery caching: only scan new messages since last iteration
999
+ // instead of re-parsing the full history via extractToolDiscoveries()
1000
+ const cachedDiscoveries = this._toolDiscoveryCache.getNewDiscoveries(messages);
1001
+ if (cachedDiscoveries.length > 0) {
1002
+ agentContext.markToolsAsDiscovered(cachedDiscoveries);
1003
+ console.debug(`[Graph:ToolDiscovery] Cached ${cachedDiscoveries.length} new tools (total: ${this._toolDiscoveryCache.size})`);
922
1004
  }
923
1005
  const toolsForBinding = agentContext.getToolsForBinding();
924
1006
  // PERF: Detect subsequent ReAct iterations (tool results present in messages)
@@ -968,56 +1050,119 @@ class StandardGraph extends Graph {
968
1050
  (agentContext.provider === Providers.OPENAI &&
969
1051
  agentContext.clientOptions.modelKwargs
970
1052
  ?.thinking?.type === 'enabled');
1053
+ // Apply EMA calibration to max token budget — smooths pruning across iterations
1054
+ const calibratedMaxTokens = applyCalibration(agentContext.maxContextTokens, this._pruneCalibration);
971
1055
  agentContext.pruneMessages = createPruneMessages({
972
1056
  startIndex: this.startIndex,
973
1057
  provider: agentContext.provider,
974
1058
  tokenCounter: agentContext.tokenCounter,
975
- maxTokens: agentContext.maxContextTokens,
1059
+ maxTokens: calibratedMaxTokens,
976
1060
  thinkingEnabled: isAnthropicWithThinking,
977
1061
  indexTokenCountMap: agentContext.indexTokenCountMap,
978
1062
  });
979
1063
  }
1064
+ // Update EMA calibration with actual token usage from API response
1065
+ if (agentContext.currentUsage?.input_tokens &&
1066
+ agentContext.maxContextTokens) {
1067
+ const estimatedTokens = Object.values(agentContext.indexTokenCountMap).reduce((sum, v) => (sum ?? 0) + (v ?? 0), 0);
1068
+ if (estimatedTokens > 0) {
1069
+ this._pruneCalibration = updatePruneCalibration(this._pruneCalibration, agentContext.currentUsage.input_tokens, estimatedTokens);
1070
+ }
1071
+ }
980
1072
  if (agentContext.pruneMessages) {
981
- console.debug(`[Graph:ContextMgmt] Pruning messages | inputCount=${messages.length} | maxTokens=${agentContext.maxContextTokens}`);
982
1073
  const { context, indexTokenCountMap, messagesToRefine } = agentContext.pruneMessages({
983
1074
  messages,
984
1075
  usageMetadata: agentContext.currentUsage,
985
- // startOnMessageType: 'human',
986
1076
  });
987
1077
  agentContext.indexTokenCountMap = indexTokenCountMap;
988
1078
  messagesToUse = context;
989
- console.debug(`[Graph:ContextMgmt] Pruned | kept=${context.length} | discarded=${messagesToRefine.length} | originalCount=${messages.length}`);
990
- // Summarize discarded messages if callback provided
1079
+ // ── Non-blocking summarization ──────────────────────────────────
1080
+ // NEVER block the LLM call waiting for summarization. Instead:
1081
+ // 1. If _cachedRunSummary exists → use it, fire async update
1082
+ // 2. If persistedSummary exists → use it as fallback, fire async update
1083
+ // 3. If NOTHING exists (first-ever prune) → skip summary, fire async generation
1084
+ // The summary catches up asynchronously and is available for subsequent
1085
+ // iterations (tool calls) and the next conversation turn.
1086
+ //
1087
+ // SummarizationConfig integration:
1088
+ // - triggerType/triggerThreshold control WHEN summarization fires
1089
+ // - reserveRatio is enforced via calibrated maxTokens (above)
1090
+ // - initialSummary provides cross-run seeding as fallback before persistedSummary
991
1091
  let hasSummary = false;
992
- if (messagesToRefine.length > 0 && agentContext.summarizeCallback) {
993
- console.debug(`[Graph:ContextMgmt] Summarizing ${messagesToRefine.length} discarded messages`);
1092
+ const sumConfig = agentContext.summarizationConfig;
1093
+ const shouldSummarize = this.shouldTriggerSummarization(messagesToRefine.length, agentContext.maxContextTokens ?? 0, agentContext.indexTokenCountMap, agentContext.instructionTokens, sumConfig);
1094
+ if (messagesToRefine.length > 0 &&
1095
+ agentContext.summarizeCallback &&
1096
+ shouldSummarize) {
994
1097
  try {
995
- const summary = await agentContext.summarizeCallback(messagesToRefine);
996
- console.debug(`[Graph:ContextMgmt] Summary received | len=${summary?.length ?? 0} | hasContent=${summary != null && summary !== ''}`);
1098
+ let summary;
1099
+ let summarySource;
1100
+ if (this._cachedRunSummary != null) {
1101
+ summary = this._cachedRunSummary;
1102
+ summarySource = 'cached';
1103
+ }
1104
+ else if (agentContext.persistedSummary != null &&
1105
+ agentContext.persistedSummary !== '') {
1106
+ summary = agentContext.persistedSummary;
1107
+ this._cachedRunSummary = summary;
1108
+ summarySource = 'persisted';
1109
+ }
1110
+ else if (sumConfig?.initialSummary != null &&
1111
+ sumConfig.initialSummary !== '') {
1112
+ // Cross-run seed: use initialSummary when no persisted summary exists
1113
+ summary = sumConfig.initialSummary;
1114
+ this._cachedRunSummary = summary;
1115
+ summarySource = 'initial-seed';
1116
+ }
1117
+ else {
1118
+ summarySource = 'none';
1119
+ }
1120
+ // Single consolidated log for the entire prune+summarize decision
1121
+ console.debug(`[Graph:ContextMgmt] Pruned ${messages.length}→${context.length} msgs (${messagesToRefine.length} discarded) | summary=${summarySource}${summary ? ` (len=${summary.length})` : ''} | calibration=${this._pruneCalibration.ratio.toFixed(3)}(${this._pruneCalibration.iterations})`);
1122
+ // Fire background summarization — updates cache for next iteration/turn
1123
+ agentContext
1124
+ .summarizeCallback(messagesToRefine)
1125
+ .then((updated) => {
1126
+ if (updated != null && updated !== '') {
1127
+ this._cachedRunSummary = updated;
1128
+ }
1129
+ })
1130
+ .catch((err) => {
1131
+ console.error('[Graph] Background summary failed (non-fatal):', err);
1132
+ });
997
1133
  if (summary != null && summary !== '') {
998
1134
  hasSummary = true;
999
1135
  const summaryMsg = new SystemMessage(`[Conversation Summary]\n${summary}`);
1000
- // Insert after system message (if present), before conversation messages
1001
1136
  const systemIdx = messagesToUse[0]?.getType() === 'system' ? 1 : 0;
1002
1137
  messagesToUse = [
1003
1138
  ...messagesToUse.slice(0, systemIdx),
1004
1139
  summaryMsg,
1005
1140
  ...messagesToUse.slice(systemIdx),
1006
1141
  ];
1007
- console.debug(`[Graph:ContextMgmt] Summary injected at index ${systemIdx} | finalMsgCount=${messagesToUse.length}`);
1008
1142
  }
1009
1143
  }
1010
1144
  catch (err) {
1011
- console.error('[Graph] Summarization callback failed:', err);
1145
+ console.error('[Graph] Summarization failed:', err);
1012
1146
  }
1013
1147
  }
1014
- // Post-prune context note: inform the LLM that context was compressed
1015
- // without exposing token numbers (prevents voluntary bail-out)
1148
+ else if (messagesToRefine.length > 0) {
1149
+ // Log pruning even when no summarize callback (discard mode)
1150
+ console.debug(`[Graph:ContextMgmt] Pruned ${messages.length}→${context.length} msgs (${messagesToRefine.length} discarded, no summary callback) | calibration=${this._pruneCalibration.ratio.toFixed(3)}`);
1151
+ }
1152
+ // Deduplicate system messages that accumulate from repeated tool iterations
1153
+ const { messages: dedupedMessages, removedCount } = deduplicateSystemMessages(messagesToUse);
1154
+ if (removedCount > 0) {
1155
+ messagesToUse = dedupedMessages;
1156
+ console.debug(`[Graph:Dedup] Removed ${removedCount} duplicate system message(s)`);
1157
+ }
1158
+ // Post-prune context note for task-tool-enabled agents
1016
1159
  if (messagesToRefine.length > 0 && hasTaskTool(agentContext.tools)) {
1017
1160
  const postPruneNote = buildPostPruneNote(messagesToRefine.length, hasSummary);
1018
1161
  if (postPruneNote) {
1019
- messagesToUse = [...messagesToUse, new SystemMessage(postPruneNote)];
1020
- console.debug(`[Graph:ContextMgmt] Post-prune note injected | hasSummary=${hasSummary} | discarded=${messagesToRefine.length}`);
1162
+ messagesToUse = [
1163
+ ...messagesToUse,
1164
+ new SystemMessage(postPruneNote),
1165
+ ];
1021
1166
  }
1022
1167
  }
1023
1168
  }
@@ -1141,11 +1286,6 @@ class StandardGraph extends Graph {
1141
1286
  // ====================================================================
1142
1287
  if (hasTaskTool(agentContext.tools)) {
1143
1288
  const { count: documentCount, names: documentNames } = detectDocuments(finalMessages);
1144
- // Observability log (no token numbers exposed to LLM)
1145
- if (contextAnalytics.utilizationPercent != null) {
1146
- console.debug(`[Graph] Context utilization: ${contextAnalytics.utilizationPercent.toFixed(1)}% | ` +
1147
- `messages: ${finalMessages.length} | docs: ${documentCount}`);
1148
- }
1149
1289
  // Multi-document delegation: first iteration only (before AI has responded)
1150
1290
  const hasAiResponse = finalMessages.some((m) => m._getType() === 'ai' || m._getType() === 'tool');
1151
1291
  if (shouldInjectMultiDocHint(documentCount, hasAiResponse)) {
@@ -1549,10 +1689,6 @@ If I seem to be missing something we discussed earlier, just give me a quick rem
1549
1689
  reducer: (a, b) => {
1550
1690
  if (!a.length) {
1551
1691
  this.startIndex = a.length + b.length;
1552
- console.debug(`[Graph:Reducer] Initial messages | startIndex=${this.startIndex} | inputMsgCount=${b.length}`);
1553
- }
1554
- else {
1555
- console.debug(`[Graph:Reducer] Appending messages | existing=${a.length} | new=${b.length} | startIndex=${this.startIndex}`);
1556
1692
  }
1557
1693
  const result = messagesStateReducer(a, b);
1558
1694
  this.messages = result;