@illuma-ai/agents 1.0.98 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. package/dist/cjs/agents/AgentContext.cjs +6 -2
  2. package/dist/cjs/agents/AgentContext.cjs.map +1 -1
  3. package/dist/cjs/common/constants.cjs +53 -0
  4. package/dist/cjs/common/constants.cjs.map +1 -1
  5. package/dist/cjs/graphs/Graph.cjs +167 -31
  6. package/dist/cjs/graphs/Graph.cjs.map +1 -1
  7. package/dist/cjs/main.cjs +14 -0
  8. package/dist/cjs/main.cjs.map +1 -1
  9. package/dist/cjs/messages/dedup.cjs +95 -0
  10. package/dist/cjs/messages/dedup.cjs.map +1 -0
  11. package/dist/cjs/tools/CodeExecutor.cjs +22 -3
  12. package/dist/cjs/tools/CodeExecutor.cjs.map +1 -1
  13. package/dist/cjs/types/graph.cjs.map +1 -1
  14. package/dist/cjs/utils/pruneCalibration.cjs +78 -0
  15. package/dist/cjs/utils/pruneCalibration.cjs.map +1 -0
  16. package/dist/cjs/utils/run.cjs.map +1 -1
  17. package/dist/cjs/utils/tokens.cjs.map +1 -1
  18. package/dist/cjs/utils/toolDiscoveryCache.cjs +127 -0
  19. package/dist/cjs/utils/toolDiscoveryCache.cjs.map +1 -0
  20. package/dist/esm/agents/AgentContext.mjs +6 -2
  21. package/dist/esm/agents/AgentContext.mjs.map +1 -1
  22. package/dist/esm/common/constants.mjs +48 -1
  23. package/dist/esm/common/constants.mjs.map +1 -1
  24. package/dist/esm/graphs/Graph.mjs +168 -32
  25. package/dist/esm/graphs/Graph.mjs.map +1 -1
  26. package/dist/esm/main.mjs +4 -1
  27. package/dist/esm/main.mjs.map +1 -1
  28. package/dist/esm/messages/dedup.mjs +93 -0
  29. package/dist/esm/messages/dedup.mjs.map +1 -0
  30. package/dist/esm/tools/CodeExecutor.mjs +22 -3
  31. package/dist/esm/tools/CodeExecutor.mjs.map +1 -1
  32. package/dist/esm/types/graph.mjs.map +1 -1
  33. package/dist/esm/utils/pruneCalibration.mjs +74 -0
  34. package/dist/esm/utils/pruneCalibration.mjs.map +1 -0
  35. package/dist/esm/utils/run.mjs.map +1 -1
  36. package/dist/esm/utils/tokens.mjs.map +1 -1
  37. package/dist/esm/utils/toolDiscoveryCache.mjs +125 -0
  38. package/dist/esm/utils/toolDiscoveryCache.mjs.map +1 -0
  39. package/dist/types/agents/AgentContext.d.ts +4 -1
  40. package/dist/types/common/constants.d.ts +35 -0
  41. package/dist/types/graphs/Graph.d.ts +25 -0
  42. package/dist/types/messages/dedup.d.ts +25 -0
  43. package/dist/types/messages/index.d.ts +1 -0
  44. package/dist/types/types/graph.d.ts +63 -0
  45. package/dist/types/utils/index.d.ts +2 -0
  46. package/dist/types/utils/pruneCalibration.d.ts +43 -0
  47. package/dist/types/utils/toolDiscoveryCache.d.ts +77 -0
  48. package/package.json +1 -1
  49. package/src/agents/AgentContext.ts +7 -0
  50. package/src/common/constants.ts +56 -0
  51. package/src/graphs/Graph.ts +220 -50
  52. package/src/graphs/gapFeatures.test.ts +520 -0
  53. package/src/graphs/nonBlockingSummarization.test.ts +307 -0
  54. package/src/messages/__tests__/dedup.test.ts +166 -0
  55. package/src/messages/dedup.ts +104 -0
  56. package/src/messages/index.ts +1 -0
  57. package/src/tools/CodeExecutor.ts +22 -3
  58. package/src/types/graph.ts +73 -0
  59. package/src/utils/__tests__/pruneCalibration.test.ts +148 -0
  60. package/src/utils/__tests__/toolDiscoveryCache.test.ts +214 -0
  61. package/src/utils/contextPressure.test.ts +24 -9
  62. package/src/utils/index.ts +2 -0
  63. package/src/utils/pruneCalibration.ts +92 -0
  64. package/src/utils/run.ts +108 -108
  65. package/src/utils/tokens.ts +118 -118
  66. package/src/utils/toolDiscoveryCache.ts +150 -0
@@ -34,9 +34,9 @@ import type * as t from '@/types';
34
34
  import {
35
35
  formatAnthropicArtifactContent,
36
36
  ensureThinkingBlockInMessages,
37
+ deduplicateSystemMessages,
37
38
  convertMessagesToContent,
38
39
  addBedrockCacheControl,
39
- extractToolDiscoveries,
40
40
  modifyDeltaProperties,
41
41
  formatArtifactPayload,
42
42
  formatContentStrings,
@@ -53,14 +53,20 @@ import {
53
53
  MessageTypes,
54
54
  Constants,
55
55
  TOOL_TURN_THINKING_BUDGET,
56
+ SUMMARIZATION_CONTEXT_THRESHOLD,
56
57
  } from '@/common';
57
58
  import {
59
+ ToolDiscoveryCache,
58
60
  resetIfNotEmpty,
59
61
  isOpenAILike,
60
62
  isGoogleLike,
61
63
  joinKeys,
62
64
  sleep,
65
+ createPruneCalibration,
66
+ updatePruneCalibration,
67
+ applyCalibration,
63
68
  } from '@/utils';
69
+ import type { PruneCalibrationState } from '@/types/graph';
64
70
  import {
65
71
  buildContextAnalytics,
66
72
  type ContextAnalytics,
@@ -205,6 +211,13 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
205
211
  runId: string | undefined;
206
212
  startIndex: number = 0;
207
213
  signal?: AbortSignal;
214
+ /** Cached summary from the first prune in this run.
215
+ * Reused for subsequent prunes to avoid blocking LLM calls on every tool iteration. */
216
+ private _cachedRunSummary: string | undefined;
217
+ /** EMA-based pruning calibration state — smooths token budget adjustments across iterations */
218
+ private _pruneCalibration: PruneCalibrationState;
219
+ /** Run-scoped tool discovery cache — avoids re-parsing conversation history on every iteration */
220
+ private _toolDiscoveryCache: ToolDiscoveryCache;
208
221
  /** Map of agent contexts by agent ID */
209
222
  agentContexts: Map<string, AgentContext> = new Map();
210
223
  /** Default agent ID to use */
@@ -239,6 +252,22 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
239
252
  }
240
253
 
241
254
  this.defaultAgentId = agents[0].agentId;
255
+
256
+ // Seed cached summary from persisted storage so the first prune in a
257
+ // resumed conversation can also skip the synchronous LLM summarization call
258
+ const primaryContext = this.agentContexts.get(this.defaultAgentId);
259
+ if (primaryContext?.persistedSummary) {
260
+ this._cachedRunSummary = primaryContext.persistedSummary;
261
+ }
262
+
263
+ // Initialize EMA pruning calibration
264
+ this._pruneCalibration = createPruneCalibration();
265
+
266
+ // Initialize tool discovery cache, seeded with any pre-existing discoveries
267
+ this._toolDiscoveryCache = new ToolDiscoveryCache();
268
+ if (primaryContext?.discoveredToolNames.size) {
269
+ this._toolDiscoveryCache.seed([...primaryContext.discoveredToolNames]);
270
+ }
242
271
  }
243
272
 
244
273
  /* Init */
@@ -272,6 +301,9 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
272
301
  new Map()
273
302
  );
274
303
  this.invokedToolIds = resetIfNotEmpty(this.invokedToolIds, undefined);
304
+ // Reset EMA calibration and tool discovery cache for fresh run
305
+ this._pruneCalibration = createPruneCalibration();
306
+ this._toolDiscoveryCache.reset();
275
307
  for (const context of this.agentContexts.values()) {
276
308
  context.reset();
277
309
  }
@@ -378,6 +410,70 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
378
410
  return clientOptions;
379
411
  }
380
412
 
413
+ /**
414
+ * Determines whether summarization should trigger based on SummarizationConfig.
415
+ *
416
+ * Supports three trigger strategies:
417
+ * - contextPercentage (default): Trigger when context utilization >= threshold%
418
+ * - messageCount: Trigger when pruned message count >= threshold
419
+ * - tokenThreshold: Trigger when total estimated tokens >= threshold
420
+ *
421
+ * When no config is provided, always triggers (preserves backward compatibility).
422
+ *
423
+ * @param prunedMessageCount - Number of messages that were pruned
424
+ * @param maxContextTokens - Maximum context token budget
425
+ * @param indexTokenCountMap - Token count map by message index
426
+ * @param instructionTokens - Token count for instructions/system message
427
+ * @param config - Optional SummarizationConfig
428
+ * @returns Whether summarization should be triggered
429
+ */
430
+ private shouldTriggerSummarization(
431
+ prunedMessageCount: number,
432
+ maxContextTokens: number,
433
+ indexTokenCountMap: Record<string, number | undefined>,
434
+ instructionTokens: number,
435
+ config?: t.SummarizationConfig
436
+ ): boolean {
437
+ // No pruned messages means nothing to summarize
438
+ if (prunedMessageCount === 0) {
439
+ return false;
440
+ }
441
+
442
+ // No config = backward compatible (always summarize when messages are pruned)
443
+ if (!config || !config.triggerType) {
444
+ return true;
445
+ }
446
+
447
+ const threshold = config.triggerThreshold;
448
+
449
+ switch (config.triggerType) {
450
+ case 'contextPercentage': {
451
+ if (maxContextTokens <= 0) return true;
452
+ const effectiveThreshold = threshold ?? SUMMARIZATION_CONTEXT_THRESHOLD;
453
+ let totalTokens = instructionTokens;
454
+ for (const key in indexTokenCountMap) {
455
+ totalTokens += indexTokenCountMap[key] ?? 0;
456
+ }
457
+ const utilization = (totalTokens / maxContextTokens) * 100;
458
+ return utilization >= effectiveThreshold;
459
+ }
460
+ case 'messageCount': {
461
+ const effectiveThreshold = threshold ?? 5;
462
+ return prunedMessageCount >= effectiveThreshold;
463
+ }
464
+ case 'tokenThreshold': {
465
+ if (threshold == null) return true;
466
+ let totalTokens = instructionTokens;
467
+ for (const key in indexTokenCountMap) {
468
+ totalTokens += indexTokenCountMap[key] ?? 0;
469
+ }
470
+ return totalTokens >= threshold;
471
+ }
472
+ default:
473
+ return true;
474
+ }
475
+ }
476
+
381
477
  /**
382
478
  * Returns the normalized finish/stop reason from the last LLM invocation.
383
479
  * Used by callers to detect when the response was truncated due to max_tokens.
@@ -535,9 +631,6 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
535
631
 
536
632
  getRunMessages(): BaseMessage[] | undefined {
537
633
  const result = this.messages.slice(this.startIndex);
538
- console.debug(
539
- `[Graph] getRunMessages() | totalMessages=${this.messages.length} | startIndex=${this.startIndex} | runMessages=${result.length}`
540
- );
541
634
  return result;
542
635
  }
543
636
 
@@ -1327,10 +1420,15 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
1327
1420
  messages = [dynamicContextMessage, ackMessage, ...messages];
1328
1421
  }
1329
1422
 
1330
- // Extract tool discoveries from current turn only (similar to formatArtifactPayload pattern)
1331
- const discoveredNames = extractToolDiscoveries(messages);
1332
- if (discoveredNames.length > 0) {
1333
- agentContext.markToolsAsDiscovered(discoveredNames);
1423
+ // Tool discovery caching: only scan new messages since last iteration
1424
+ // instead of re-parsing the full history via extractToolDiscoveries()
1425
+ const cachedDiscoveries =
1426
+ this._toolDiscoveryCache.getNewDiscoveries(messages);
1427
+ if (cachedDiscoveries.length > 0) {
1428
+ agentContext.markToolsAsDiscovered(cachedDiscoveries);
1429
+ console.debug(
1430
+ `[Graph:ToolDiscovery] Cached ${cachedDiscoveries.length} new tools (total: ${this._toolDiscoveryCache.size})`
1431
+ );
1334
1432
  }
1335
1433
 
1336
1434
  const toolsForBinding = agentContext.getToolsForBinding();
@@ -1400,50 +1498,126 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
1400
1498
  ?.thinking as t.AnthropicClientOptions['thinking']
1401
1499
  )?.type === 'enabled');
1402
1500
 
1501
+ // Apply EMA calibration to max token budget — smooths pruning across iterations
1502
+ const calibratedMaxTokens = applyCalibration(
1503
+ agentContext.maxContextTokens,
1504
+ this._pruneCalibration
1505
+ );
1506
+
1403
1507
  agentContext.pruneMessages = createPruneMessages({
1404
1508
  startIndex: this.startIndex,
1405
1509
  provider: agentContext.provider,
1406
1510
  tokenCounter: agentContext.tokenCounter,
1407
- maxTokens: agentContext.maxContextTokens,
1511
+ maxTokens: calibratedMaxTokens,
1408
1512
  thinkingEnabled: isAnthropicWithThinking,
1409
1513
  indexTokenCountMap: agentContext.indexTokenCountMap,
1410
1514
  });
1411
1515
  }
1412
1516
 
1517
+ // Update EMA calibration with actual token usage from API response
1518
+ if (
1519
+ agentContext.currentUsage?.input_tokens &&
1520
+ agentContext.maxContextTokens
1521
+ ) {
1522
+ const estimatedTokens = Object.values(
1523
+ agentContext.indexTokenCountMap
1524
+ ).reduce((sum, v) => (sum ?? 0) + (v ?? 0), 0) as number;
1525
+ if (estimatedTokens > 0) {
1526
+ this._pruneCalibration = updatePruneCalibration(
1527
+ this._pruneCalibration,
1528
+ agentContext.currentUsage.input_tokens,
1529
+ estimatedTokens
1530
+ );
1531
+ }
1532
+ }
1533
+
1413
1534
  if (agentContext.pruneMessages) {
1414
- console.debug(
1415
- `[Graph:ContextMgmt] Pruning messages | inputCount=${messages.length} | maxTokens=${agentContext.maxContextTokens}`
1416
- );
1417
1535
  const { context, indexTokenCountMap, messagesToRefine } =
1418
1536
  agentContext.pruneMessages({
1419
1537
  messages,
1420
1538
  usageMetadata: agentContext.currentUsage,
1421
- // startOnMessageType: 'human',
1422
1539
  });
1423
1540
  agentContext.indexTokenCountMap = indexTokenCountMap;
1424
1541
  messagesToUse = context;
1425
- console.debug(
1426
- `[Graph:ContextMgmt] Pruned | kept=${context.length} | discarded=${messagesToRefine.length} | originalCount=${messages.length}`
1427
- );
1428
1542
 
1429
- // Summarize discarded messages if callback provided
1543
+ // ── Non-blocking summarization ──────────────────────────────────
1544
+ // NEVER block the LLM call waiting for summarization. Instead:
1545
+ // 1. If _cachedRunSummary exists → use it, fire async update
1546
+ // 2. If persistedSummary exists → use it as fallback, fire async update
1547
+ // 3. If NOTHING exists (first-ever prune) → skip summary, fire async generation
1548
+ // The summary catches up asynchronously and is available for subsequent
1549
+ // iterations (tool calls) and the next conversation turn.
1550
+ //
1551
+ // SummarizationConfig integration:
1552
+ // - triggerType/triggerThreshold control WHEN summarization fires
1553
+ // - reserveRatio is enforced via calibrated maxTokens (above)
1554
+ // - initialSummary provides cross-run seeding as fallback before persistedSummary
1430
1555
  let hasSummary = false;
1431
- if (messagesToRefine.length > 0 && agentContext.summarizeCallback) {
1432
- console.debug(
1433
- `[Graph:ContextMgmt] Summarizing ${messagesToRefine.length} discarded messages`
1434
- );
1556
+ const sumConfig = agentContext.summarizationConfig;
1557
+ const shouldSummarize = this.shouldTriggerSummarization(
1558
+ messagesToRefine.length,
1559
+ agentContext.maxContextTokens ?? 0,
1560
+ agentContext.indexTokenCountMap,
1561
+ agentContext.instructionTokens,
1562
+ sumConfig
1563
+ );
1564
+
1565
+ if (
1566
+ messagesToRefine.length > 0 &&
1567
+ agentContext.summarizeCallback &&
1568
+ shouldSummarize
1569
+ ) {
1435
1570
  try {
1436
- const summary =
1437
- await agentContext.summarizeCallback(messagesToRefine);
1571
+ let summary: string | undefined;
1572
+ let summarySource: string;
1573
+
1574
+ if (this._cachedRunSummary != null) {
1575
+ summary = this._cachedRunSummary;
1576
+ summarySource = 'cached';
1577
+ } else if (
1578
+ agentContext.persistedSummary != null &&
1579
+ agentContext.persistedSummary !== ''
1580
+ ) {
1581
+ summary = agentContext.persistedSummary;
1582
+ this._cachedRunSummary = summary;
1583
+ summarySource = 'persisted';
1584
+ } else if (
1585
+ sumConfig?.initialSummary != null &&
1586
+ sumConfig.initialSummary !== ''
1587
+ ) {
1588
+ // Cross-run seed: use initialSummary when no persisted summary exists
1589
+ summary = sumConfig.initialSummary;
1590
+ this._cachedRunSummary = summary;
1591
+ summarySource = 'initial-seed';
1592
+ } else {
1593
+ summarySource = 'none';
1594
+ }
1595
+
1596
+ // Single consolidated log for the entire prune+summarize decision
1438
1597
  console.debug(
1439
- `[Graph:ContextMgmt] Summary received | len=${summary?.length ?? 0} | hasContent=${summary != null && summary !== ''}`
1598
+ `[Graph:ContextMgmt] Pruned ${messages.length}→${context.length} msgs (${messagesToRefine.length} discarded) | summary=${summarySource}${summary ? ` (len=${summary.length})` : ''} | calibration=${this._pruneCalibration.ratio.toFixed(3)}(${this._pruneCalibration.iterations})`
1440
1599
  );
1600
+
1601
+ // Fire background summarization — updates cache for next iteration/turn
1602
+ agentContext
1603
+ .summarizeCallback(messagesToRefine)
1604
+ .then((updated) => {
1605
+ if (updated != null && updated !== '') {
1606
+ this._cachedRunSummary = updated;
1607
+ }
1608
+ })
1609
+ .catch((err) => {
1610
+ console.error(
1611
+ '[Graph] Background summary failed (non-fatal):',
1612
+ err
1613
+ );
1614
+ });
1615
+
1441
1616
  if (summary != null && summary !== '') {
1442
1617
  hasSummary = true;
1443
1618
  const summaryMsg = new SystemMessage(
1444
1619
  `[Conversation Summary]\n${summary}`
1445
1620
  );
1446
- // Insert after system message (if present), before conversation messages
1447
1621
  const systemIdx =
1448
1622
  messagesToUse[0]?.getType() === 'system' ? 1 : 0;
1449
1623
  messagesToUse = [
@@ -1451,27 +1625,38 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
1451
1625
  summaryMsg,
1452
1626
  ...messagesToUse.slice(systemIdx),
1453
1627
  ];
1454
- console.debug(
1455
- `[Graph:ContextMgmt] Summary injected at index ${systemIdx} | finalMsgCount=${messagesToUse.length}`
1456
- );
1457
1628
  }
1458
1629
  } catch (err) {
1459
- console.error('[Graph] Summarization callback failed:', err);
1630
+ console.error('[Graph] Summarization failed:', err);
1460
1631
  }
1632
+ } else if (messagesToRefine.length > 0) {
1633
+ // Log pruning even when no summarize callback (discard mode)
1634
+ console.debug(
1635
+ `[Graph:ContextMgmt] Pruned ${messages.length}→${context.length} msgs (${messagesToRefine.length} discarded, no summary callback) | calibration=${this._pruneCalibration.ratio.toFixed(3)}`
1636
+ );
1461
1637
  }
1462
1638
 
1463
- // Post-prune context note: inform the LLM that context was compressed
1464
- // without exposing token numbers (prevents voluntary bail-out)
1639
+ // Deduplicate system messages that accumulate from repeated tool iterations
1640
+ const { messages: dedupedMessages, removedCount } =
1641
+ deduplicateSystemMessages(messagesToUse);
1642
+ if (removedCount > 0) {
1643
+ messagesToUse = dedupedMessages;
1644
+ console.debug(
1645
+ `[Graph:Dedup] Removed ${removedCount} duplicate system message(s)`
1646
+ );
1647
+ }
1648
+
1649
+ // Post-prune context note for task-tool-enabled agents
1465
1650
  if (messagesToRefine.length > 0 && hasTaskTool(agentContext.tools)) {
1466
1651
  const postPruneNote = buildPostPruneNote(
1467
1652
  messagesToRefine.length,
1468
1653
  hasSummary
1469
1654
  );
1470
1655
  if (postPruneNote) {
1471
- messagesToUse = [...messagesToUse, new SystemMessage(postPruneNote)];
1472
- console.debug(
1473
- `[Graph:ContextMgmt] Post-prune note injected | hasSummary=${hasSummary} | discarded=${messagesToRefine.length}`
1474
- );
1656
+ messagesToUse = [
1657
+ ...messagesToUse,
1658
+ new SystemMessage(postPruneNote),
1659
+ ];
1475
1660
  }
1476
1661
  }
1477
1662
  }
@@ -1643,14 +1828,6 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
1643
1828
  const { count: documentCount, names: documentNames } =
1644
1829
  detectDocuments(finalMessages);
1645
1830
 
1646
- // Observability log (no token numbers exposed to LLM)
1647
- if (contextAnalytics.utilizationPercent != null) {
1648
- console.debug(
1649
- `[Graph] Context utilization: ${contextAnalytics.utilizationPercent.toFixed(1)}% | ` +
1650
- `messages: ${finalMessages.length} | docs: ${documentCount}`
1651
- );
1652
- }
1653
-
1654
1831
  // Multi-document delegation: first iteration only (before AI has responded)
1655
1832
  const hasAiResponse = finalMessages.some(
1656
1833
  (m) => m._getType() === 'ai' || m._getType() === 'tool'
@@ -2178,13 +2355,6 @@ If I seem to be missing something we discussed earlier, just give me a quick rem
2178
2355
  reducer: (a, b) => {
2179
2356
  if (!a.length) {
2180
2357
  this.startIndex = a.length + b.length;
2181
- console.debug(
2182
- `[Graph:Reducer] Initial messages | startIndex=${this.startIndex} | inputMsgCount=${b.length}`
2183
- );
2184
- } else {
2185
- console.debug(
2186
- `[Graph:Reducer] Appending messages | existing=${a.length} | new=${b.length} | startIndex=${this.startIndex}`
2187
- );
2188
2358
  }
2189
2359
  const result = messagesStateReducer(a, b);
2190
2360
  this.messages = result;