@illuma-ai/agents 1.1.1 → 1.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/esm/main.mjs CHANGED
@@ -26,7 +26,7 @@ export { createSearchTool } from './tools/search/tool.mjs';
26
26
  export { DATE_RANGE, DEFAULT_COUNTRY_DESCRIPTION, DEFAULT_QUERY_DESCRIPTION, WebSearchToolDefinition, WebSearchToolDescription, WebSearchToolName, WebSearchToolSchema, countrySchema, dateSchema, imagesSchema, newsSchema, querySchema, videosSchema } from './tools/search/schema.mjs';
27
27
  export { createValidationErrorMessage, isValidJsonSchema, normalizeJsonSchema, prepareSchemaForProvider, validateStructuredOutput, zodToJsonSchema } from './schemas/validate.mjs';
28
28
  export { Callback, CommonEvents, Constants, ContentTypes, EdgeType, EnvVar, FinishReasons, GraphEvents, GraphNodeActions, GraphNodeKeys, MessageTypes, Providers, StepTypes, TitleMethod, ToolCallTypes } from './common/enum.mjs';
29
- export { CONTEXT_SAFETY_BUFFER, DEDUP_MAX_CONTENT_LENGTH, MIN_THINKING_BUDGET, MULTI_DOCUMENT_THRESHOLD, PRUNING_EMA_ALPHA, PRUNING_INITIAL_CALIBRATION, SUMMARIZATION_CONTEXT_THRESHOLD, SUMMARIZATION_RESERVE_RATIO, TOOL_DISCOVERY_CACHE_MAX_SIZE, TOOL_TURN_THINKING_BUDGET } from './common/constants.mjs';
29
+ export { CONTEXT_SAFETY_BUFFER, DEDUP_MAX_CONTENT_LENGTH, MIN_THINKING_BUDGET, MULTI_DOCUMENT_THRESHOLD, PROACTIVE_SUMMARY_THRESHOLD, PRUNING_EMA_ALPHA, PRUNING_INITIAL_CALIBRATION, SUMMARIZATION_CONTEXT_THRESHOLD, SUMMARIZATION_RESERVE_RATIO, TOOL_DISCOVERY_CACHE_MAX_SIZE, TOOL_TURN_THINKING_BUDGET } from './common/constants.mjs';
30
30
  export { joinKeys, resetIfNotEmpty } from './utils/graph.mjs';
31
31
  export { isGoogleLike, isOpenAILike } from './utils/llm.mjs';
32
32
  export { isPresent, unescapeObject } from './utils/misc.mjs';
@@ -35,6 +35,17 @@ export declare const CONTEXT_SAFETY_BUFFER = 0.9;
35
35
  * When the context window is ≥80% full, pruning + summarization activates.
36
36
  */
37
37
  export declare const SUMMARIZATION_CONTEXT_THRESHOLD = 80;
38
+ /**
39
+ * Proactive summarization threshold (0-1 fraction of context window).
40
+ * At this utilization level, background summarization fires BEFORE pruning is needed.
41
+ * This gives the summary time to complete so it's ready when context actually fills up.
42
+ *
43
+ * Inspired by VS Code Copilot Chat's 3-tier strategy:
44
+ * 80% → proactive background summary
45
+ * 90% → pruning kicks in (with summary already cached)
46
+ * 100% → graceful: use existing summary + recent messages, never block
47
+ */
48
+ export declare const PROACTIVE_SUMMARY_THRESHOLD = 0.8;
38
49
  /**
39
50
  * Default reserve ratio (0-1) — fraction of context window to preserve as recent messages.
40
51
  * 0.3 means 30% of the context budget is reserved for the most recent messages,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@illuma-ai/agents",
3
- "version": "1.1.1",
3
+ "version": "1.1.2",
4
4
  "main": "./dist/cjs/main.cjs",
5
5
  "module": "./dist/esm/main.mjs",
6
6
  "types": "./dist/types/index.d.ts",
@@ -59,6 +59,18 @@ export const CONTEXT_SAFETY_BUFFER = 0.9;
59
59
  */
60
60
  export const SUMMARIZATION_CONTEXT_THRESHOLD = 80;
61
61
 
62
+ /**
63
+ * Proactive summarization threshold (0-1 fraction of context window).
64
+ * At this utilization level, background summarization fires BEFORE pruning is needed.
65
+ * This gives the summary time to complete so it's ready when context actually fills up.
66
+ *
67
+ * Inspired by VS Code Copilot Chat's 3-tier strategy:
68
+ * 80% → proactive background summary
69
+ * 90% → pruning kicks in (with summary already cached)
70
+ * 100% → graceful: use existing summary + recent messages, never block
71
+ */
72
+ export const PROACTIVE_SUMMARY_THRESHOLD = 0.8;
73
+
62
74
  /**
63
75
  * Default reserve ratio (0-1) — fraction of context window to preserve as recent messages.
64
76
  * 0.3 means 30% of the context budget is reserved for the most recent messages,
@@ -35,6 +35,7 @@ import {
35
35
  formatAnthropicArtifactContent,
36
36
  ensureThinkingBlockInMessages,
37
37
  deduplicateSystemMessages,
38
+ getContextUtilization,
38
39
  convertMessagesToContent,
39
40
  addBedrockCacheControl,
40
41
  modifyDeltaProperties,
@@ -54,6 +55,7 @@ import {
54
55
  Constants,
55
56
  TOOL_TURN_THINKING_BUDGET,
56
57
  SUMMARIZATION_CONTEXT_THRESHOLD,
58
+ PROACTIVE_SUMMARY_THRESHOLD,
57
59
  } from '@/common';
58
60
  import {
59
61
  ToolDiscoveryCache,
@@ -1542,6 +1544,67 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
1542
1544
  }
1543
1545
  }
1544
1546
 
1547
+ // ── Proactive summarization at context pressure ───────────────────
1548
+ // Inspired by VS Code Copilot Chat's 3-tier strategy:
1549
+ // 80% → fire proactive background summary (BEFORE pruning needed)
1550
+ // 90% → pruning kicks in (summary already cached from 80% trigger)
1551
+ // 100% → graceful: use existing summary + recent messages, NEVER block
1552
+ //
1553
+ // This ensures the summary is READY by the time pruning actually occurs,
1554
+ // so the user never waits and never sees a context cliff.
1555
+ if (
1556
+ agentContext.maxContextTokens != null &&
1557
+ agentContext.maxContextTokens > 0 &&
1558
+ agentContext.summarizeCallback &&
1559
+ !this._summaryInFlight &&
1560
+ !this._cachedRunSummary
1561
+ ) {
1562
+ const utilization = getContextUtilization(
1563
+ agentContext.indexTokenCountMap,
1564
+ agentContext.instructionTokens,
1565
+ agentContext.maxContextTokens
1566
+ );
1567
+ const threshold = (agentContext.summarizationConfig?.triggerThreshold ?? PROACTIVE_SUMMARY_THRESHOLD * 100);
1568
+
1569
+ if (utilization >= threshold) {
1570
+ // Identify older messages to summarize proactively.
1571
+ // Keep the last N messages (recent turns) intact — only summarize older history.
1572
+ // This is incremental: the callback checks for existing summary and updates it.
1573
+ const recentTurnCount = Math.max(4, Math.floor(messages.length * 0.3));
1574
+ const oldMessages = messages.slice(
1575
+ messages[0]?.getType() === 'system' ? 1 : 0,
1576
+ Math.max(1, messages.length - recentTurnCount)
1577
+ );
1578
+
1579
+ if (oldMessages.length > 0) {
1580
+ this._summaryInFlight = true;
1581
+ console.debug(
1582
+ `[Graph:ProactiveSummary] Context at ${utilization.toFixed(1)}% (threshold ${threshold}%) — summarizing ${oldMessages.length} older msgs in background`
1583
+ );
1584
+
1585
+ agentContext
1586
+ .summarizeCallback(oldMessages)
1587
+ .then((updated) => {
1588
+ if (updated != null && updated !== '') {
1589
+ this._cachedRunSummary = updated;
1590
+ console.debug(
1591
+ `[Graph:ProactiveSummary] Background summary ready (len=${updated.length})`
1592
+ );
1593
+ }
1594
+ })
1595
+ .catch((err) => {
1596
+ console.error(
1597
+ '[Graph:ProactiveSummary] Background summary failed (non-fatal):',
1598
+ err
1599
+ );
1600
+ })
1601
+ .finally(() => {
1602
+ this._summaryInFlight = false;
1603
+ });
1604
+ }
1605
+ }
1606
+ }
1607
+
1545
1608
  if (agentContext.pruneMessages) {
1546
1609
  const { context, indexTokenCountMap, messagesToRefine } =
1547
1610
  agentContext.pruneMessages({
@@ -518,3 +518,116 @@ describe('All Features Combined — Full Pipeline', () => {
518
518
  expect(callback).toHaveBeenCalled();
519
519
  });
520
520
  });
521
+
522
+ // ===========================================================================
523
+ // Proactive Summarization — Context Pressure
524
+ // ===========================================================================
525
+
526
+ import { getContextUtilization } from '@/messages/prune';
527
+ import { PROACTIVE_SUMMARY_THRESHOLD } from '@/common/constants';
528
+
529
+ describe('Proactive Summarization — Context Pressure', () => {
530
+ it('triggers proactive summary at 80% utilization BEFORE pruning', () => {
531
+ // Simulate context at 82% utilization
532
+ const maxContextTokens = 200_000;
533
+ const indexTokenCountMap: Record<string, number | undefined> = {};
534
+ // Build messages that fill ~82% of context
535
+ const msgsNeeded = 40;
536
+ const tokensPerMsg = Math.floor((maxContextTokens * 0.82) / msgsNeeded);
537
+ for (let i = 0; i < msgsNeeded; i++) {
538
+ indexTokenCountMap[String(i)] = tokensPerMsg;
539
+ }
540
+
541
+ const utilization = getContextUtilization(indexTokenCountMap, 0, maxContextTokens);
542
+ const threshold = PROACTIVE_SUMMARY_THRESHOLD * 100; // 80
543
+
544
+ expect(utilization).toBeGreaterThanOrEqual(threshold);
545
+ // At 82%, proactive summary should fire
546
+ // But pruning should NOT have happened yet (context < 90% safety factor)
547
+ const effectiveBudget = Math.floor(maxContextTokens * 0.9); // CONTEXT_SAFETY_FACTOR
548
+ const totalTokens = Object.values(indexTokenCountMap).reduce((s, v) => (s ?? 0) + (v ?? 0), 0) as number;
549
+ expect(totalTokens).toBeLessThan(effectiveBudget);
550
+ });
551
+
552
+ it('does NOT trigger proactive summary below 80%', () => {
553
+ const maxContextTokens = 200_000;
554
+ const indexTokenCountMap: Record<string, number | undefined> = {};
555
+ // Fill to 50% utilization
556
+ const msgsNeeded = 20;
557
+ const tokensPerMsg = Math.floor((maxContextTokens * 0.5) / msgsNeeded);
558
+ for (let i = 0; i < msgsNeeded; i++) {
559
+ indexTokenCountMap[String(i)] = tokensPerMsg;
560
+ }
561
+
562
+ const utilization = getContextUtilization(indexTokenCountMap, 0, maxContextTokens);
563
+ expect(utilization).toBeLessThan(PROACTIVE_SUMMARY_THRESHOLD * 100);
564
+ });
565
+
566
+ it('selects only older messages for proactive summarization (keeps recent turns)', () => {
567
+ const messages: BaseMessage[] = [
568
+ new SystemMessage('System prompt'),
569
+ ...Array.from({ length: 20 }, (_, i) =>
570
+ i % 2 === 0
571
+ ? new HumanMessage(`User message ${i}`)
572
+ : new AIMessage(`AI response ${i}`)
573
+ ),
574
+ ];
575
+
576
+ // Simulate the selection logic from Graph.ts proactive summarization
577
+ const recentTurnCount = Math.max(4, Math.floor(messages.length * 0.3));
578
+ const oldMessages = messages.slice(
579
+ 1, // skip system message
580
+ Math.max(1, messages.length - recentTurnCount)
581
+ );
582
+
583
+ // Recent 30% (~6 messages) preserved, older messages selected for summary
584
+ expect(oldMessages.length).toBeLessThan(messages.length);
585
+ expect(oldMessages.length).toBeGreaterThan(0);
586
+ // System message not included
587
+ expect(oldMessages[0].getType()).not.toBe('system');
588
+ // Last messages of conversation not included (recent turns preserved)
589
+ const lastOldIndex = messages.indexOf(oldMessages[oldMessages.length - 1]);
590
+ expect(lastOldIndex).toBeLessThan(messages.length - recentTurnCount);
591
+ });
592
+
593
+ it('never blocks — proactive summary is always fire-and-forget', async () => {
594
+ let resolveCallback: ((v: string) => void) | undefined;
595
+ const slowCallback = jest.fn(
596
+ () =>
597
+ new Promise<string>((resolve) => {
598
+ resolveCallback = resolve;
599
+ })
600
+ );
601
+
602
+ // Simulate proactive summary fire-and-forget
603
+ const summaryPromise = slowCallback().then((updated) => {
604
+ return updated;
605
+ });
606
+
607
+ // Main flow continues immediately — callback hasn't resolved yet
608
+ expect(slowCallback).toHaveBeenCalledTimes(1);
609
+
610
+ // Later, callback resolves (simulating Nova Micro responding)
611
+ resolveCallback!('Proactive summary result');
612
+ const result = await summaryPromise;
613
+ expect(result).toBe('Proactive summary result');
614
+ });
615
+
616
+ it('at 100%+ utilization, uses existing summary without throwing', () => {
617
+ const maxContextTokens = 200_000;
618
+ const cachedSummary = 'Previously generated summary of the conversation';
619
+
620
+ // Context is at 105% (over budget)
621
+ const indexTokenCountMap: Record<string, number | undefined> = {
622
+ '0': 210_000, // system + everything
623
+ };
624
+
625
+ const utilization = getContextUtilization(indexTokenCountMap, 0, maxContextTokens);
626
+ expect(utilization).toBeGreaterThan(100);
627
+
628
+ // Even at 100%+, we use the existing cached summary — no error thrown
629
+ expect(cachedSummary).toBeTruthy();
630
+ // Pruning will remove oldest messages to fit, and inject cached summary
631
+ // The key: no blocking, no throwing, just graceful degradation
632
+ });
633
+ });