@illuma-ai/agents 1.1.3 → 1.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/esm/main.mjs CHANGED
@@ -26,7 +26,7 @@ export { createSearchTool } from './tools/search/tool.mjs';
26
26
  export { DATE_RANGE, DEFAULT_COUNTRY_DESCRIPTION, DEFAULT_QUERY_DESCRIPTION, WebSearchToolDefinition, WebSearchToolDescription, WebSearchToolName, WebSearchToolSchema, countrySchema, dateSchema, imagesSchema, newsSchema, querySchema, videosSchema } from './tools/search/schema.mjs';
27
27
  export { createValidationErrorMessage, isValidJsonSchema, normalizeJsonSchema, prepareSchemaForProvider, validateStructuredOutput, zodToJsonSchema } from './schemas/validate.mjs';
28
28
  export { Callback, CommonEvents, Constants, ContentTypes, EdgeType, EnvVar, FinishReasons, GraphEvents, GraphNodeActions, GraphNodeKeys, MessageTypes, Providers, StepTypes, TitleMethod, ToolCallTypes } from './common/enum.mjs';
29
- export { CONTEXT_SAFETY_BUFFER, DEDUP_MAX_CONTENT_LENGTH, MIN_THINKING_BUDGET, MULTI_DOCUMENT_THRESHOLD, PROACTIVE_SUMMARY_THRESHOLD, PRUNING_EMA_ALPHA, PRUNING_INITIAL_CALIBRATION, SUMMARIZATION_CONTEXT_THRESHOLD, SUMMARIZATION_RESERVE_RATIO, TOOL_DISCOVERY_CACHE_MAX_SIZE, TOOL_TURN_THINKING_BUDGET } from './common/constants.mjs';
29
+ export { COMPACTION_RECENT_ROUNDS, CONTEXT_SAFETY_BUFFER, DEDUP_MAX_CONTENT_LENGTH, MIN_THINKING_BUDGET, MULTI_DOCUMENT_THRESHOLD, PROACTIVE_SUMMARY_THRESHOLD, PRUNING_EMA_ALPHA, PRUNING_INITIAL_CALIBRATION, SUMMARIZATION_CONTEXT_THRESHOLD, SUMMARIZATION_RESERVE_RATIO, TOOL_DISCOVERY_CACHE_MAX_SIZE, TOOL_TURN_THINKING_BUDGET } from './common/constants.mjs';
30
30
  export { joinKeys, resetIfNotEmpty } from './utils/graph.mjs';
31
31
  export { isGoogleLike, isOpenAILike } from './utils/llm.mjs';
32
32
  export { isPresent, unescapeObject } from './utils/misc.mjs';
@@ -46,6 +46,15 @@ export declare const SUMMARIZATION_CONTEXT_THRESHOLD = 80;
46
46
  * 100% → graceful: use existing summary + recent messages, never block
47
47
  */
48
48
  export declare const PROACTIVE_SUMMARY_THRESHOLD = 0.8;
49
+ /**
50
+ * Number of recent conversation rounds (human+AI pairs) to keep in the
51
+ * windowed view when a summary is available. Everything older is covered
52
+ * by the summary. 2 rounds = last 2 user questions + 2 AI responses.
53
+ *
54
+ * This prevents wasting tokens on raw messages the summary already covers
55
+ * and keeps context tight for the LLM.
56
+ */
57
+ export declare const COMPACTION_RECENT_ROUNDS = 2;
49
58
  /**
50
59
  * Default reserve ratio (0-1) — fraction of context window to preserve as recent messages.
51
60
  * 0.3 means 30% of the context budget is reserved for the most recent messages,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@illuma-ai/agents",
3
- "version": "1.1.3",
3
+ "version": "1.1.4",
4
4
  "main": "./dist/cjs/main.cjs",
5
5
  "module": "./dist/esm/main.mjs",
6
6
  "types": "./dist/types/index.d.ts",
@@ -71,6 +71,16 @@ export const SUMMARIZATION_CONTEXT_THRESHOLD = 80;
71
71
  */
72
72
  export const PROACTIVE_SUMMARY_THRESHOLD = 0.8;
73
73
 
74
+ /**
75
+ * Number of recent conversation rounds (human+AI pairs) to keep in the
76
+ * windowed view when a summary is available. Everything older is covered
77
+ * by the summary. 2 rounds = last 2 user questions + 2 AI responses.
78
+ *
79
+ * This prevents wasting tokens on raw messages the summary already covers
80
+ * and keeps context tight for the LLM.
81
+ */
82
+ export const COMPACTION_RECENT_ROUNDS = 2;
83
+
74
84
  /**
75
85
  * Default reserve ratio (0-1) — fraction of context window to preserve as recent messages.
76
86
  * 0.3 means 30% of the context budget is reserved for the most recent messages,
@@ -56,6 +56,7 @@ import {
56
56
  TOOL_TURN_THINKING_BUDGET,
57
57
  SUMMARIZATION_CONTEXT_THRESHOLD,
58
58
  PROACTIVE_SUMMARY_THRESHOLD,
59
+ COMPACTION_RECENT_ROUNDS,
59
60
  } from '@/common';
60
61
  import {
61
62
  ToolDiscoveryCache,
@@ -1670,18 +1671,53 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
1670
1671
  // Budget for recent messages = total - system - summary - 3 (assistant priming)
1671
1672
  const recentBudget = calibratedMax - systemTokens - summaryTokens - 3;
1672
1673
 
1673
- // Step 3: Walk newest→oldest, collect messages that fit in the budget
1674
+ // Step 3: Determine window of recent messages to include.
1675
+ //
1676
+ // Two modes:
1677
+ // A) No summary available → fill the budget (all messages that fit)
1678
+ // B) Summary available → keep last 2 conversation rounds (H+A pairs)
1679
+ // + any trailing tool messages. The summary covers everything else.
1680
+ // This avoids wasting tokens on raw messages the summary already covers.
1681
+ //
1682
+ // A "round" = one human message + one AI response (+ any tool messages between).
1674
1683
  const contentStart = systemMsg != null ? 1 : 0;
1675
1684
  let usedTokens = 0;
1676
1685
  let windowStart = messages.length; // index where the recent window begins
1677
1686
 
1678
- for (let i = messages.length - 1; i >= contentStart; i--) {
1679
- const msgTokens = agentContext.indexTokenCountMap[i] ?? 0;
1680
- if (usedTokens + msgTokens > recentBudget) {
1681
- break;
1687
+ if (summary == null || summary === '') {
1688
+ // Mode A: No summary — include as many recent messages as fit in budget
1689
+ for (let i = messages.length - 1; i >= contentStart; i--) {
1690
+ const msgTokens = agentContext.indexTokenCountMap[i] ?? 0;
1691
+ if (usedTokens + msgTokens > recentBudget) {
1692
+ break;
1693
+ }
1694
+ usedTokens += msgTokens;
1695
+ windowStart = i;
1696
+ }
1697
+ } else {
1698
+ // Mode B: Summary exists — keep last 2 rounds (4 core messages: H+A+H+A)
1699
+ // Walk backward counting human messages as round boundaries.
1700
+ const MAX_RECENT_ROUNDS = COMPACTION_RECENT_ROUNDS;
1701
+ let roundsSeen = 0;
1702
+ for (let i = messages.length - 1; i >= contentStart; i--) {
1703
+ const msgType = messages[i]?.getType();
1704
+ const msgTokens = agentContext.indexTokenCountMap[i] ?? 0;
1705
+
1706
+ // Budget guard — even in round-limited mode, don't exceed budget
1707
+ if (usedTokens + msgTokens > recentBudget) {
1708
+ break;
1709
+ }
1710
+ usedTokens += msgTokens;
1711
+ windowStart = i;
1712
+
1713
+ // Count a human message as a round boundary
1714
+ if (msgType === 'human') {
1715
+ roundsSeen++;
1716
+ if (roundsSeen >= MAX_RECENT_ROUNDS) {
1717
+ break;
1718
+ }
1719
+ }
1682
1720
  }
1683
- usedTokens += msgTokens;
1684
- windowStart = i;
1685
1721
  }
1686
1722
 
1687
1723
  // Ensure we don't split tool-call / tool-result pairs.
@@ -1710,6 +1746,24 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
1710
1746
  viewParts.push(...recentMessages);
1711
1747
  messagesToUse = viewParts;
1712
1748
 
1749
+ // Rebuild indexTokenCountMap for the windowed view so downstream
1750
+ // analytics and summarization triggers see accurate token counts.
1751
+ const viewTokenMap: Record<string, number | undefined> = {};
1752
+ let viewIdx = 0;
1753
+ if (systemMsg != null) {
1754
+ viewTokenMap[viewIdx] = systemTokens;
1755
+ viewIdx++;
1756
+ }
1757
+ if (summaryMsg != null) {
1758
+ viewTokenMap[viewIdx] = summaryTokens;
1759
+ viewIdx++;
1760
+ }
1761
+ for (let i = windowStart; i < messages.length; i++) {
1762
+ viewTokenMap[viewIdx] = agentContext.indexTokenCountMap[i];
1763
+ viewIdx++;
1764
+ }
1765
+ agentContext.indexTokenCountMap = viewTokenMap;
1766
+
1713
1767
  console.debug(
1714
1768
  `[Graph:Compaction] View: ${messages.length}→${viewParts.length} msgs ` +
1715
1769
  `(${compactedMessages.length} behind summary, ${recentMessages.length} in window) | ` +
@@ -636,11 +636,14 @@ describe('Proactive Summarization — Context Pressure', () => {
636
636
  // ===========================================================================
637
637
 
638
638
  import { applyCalibration as _applyCalibration } from '@/utils/pruneCalibration';
639
+ import { COMPACTION_RECENT_ROUNDS } from '@/common/constants';
639
640
 
640
641
  describe('Context Compaction — Windowed View (no message deletion)', () => {
641
642
  /**
642
643
  * Simulates the compaction logic from Graph.ts without the full Graph instance.
643
- * This tests the windowed-view algorithm directly.
644
+ * Mirrors the two modes:
645
+ * A) No summary → fill budget with as many recent messages as fit
646
+ * B) Summary exists → keep last COMPACTION_RECENT_ROUNDS rounds only
644
647
  */
645
648
  function buildWindowedView(opts: {
646
649
  messages: BaseMessage[];
@@ -663,11 +666,28 @@ describe('Context Compaction — Windowed View (no message deletion)', () => {
663
666
  let usedTokens = 0;
664
667
  let windowStart = messages.length;
665
668
 
666
- for (let i = messages.length - 1; i >= contentStart; i--) {
667
- const msgTokens = indexTokenCountMap[i] ?? 0;
668
- if (usedTokens + msgTokens > recentBudget) break;
669
- usedTokens += msgTokens;
670
- windowStart = i;
669
+ if (!summary) {
670
+ // Mode A: No summary — fill budget
671
+ for (let i = messages.length - 1; i >= contentStart; i--) {
672
+ const msgTokens = indexTokenCountMap[i] ?? 0;
673
+ if (usedTokens + msgTokens > recentBudget) break;
674
+ usedTokens += msgTokens;
675
+ windowStart = i;
676
+ }
677
+ } else {
678
+ // Mode B: Summary exists — keep last N rounds
679
+ let roundsSeen = 0;
680
+ for (let i = messages.length - 1; i >= contentStart; i--) {
681
+ const msgType = messages[i]?.getType();
682
+ const msgTokens = indexTokenCountMap[i] ?? 0;
683
+ if (usedTokens + msgTokens > recentBudget) break;
684
+ usedTokens += msgTokens;
685
+ windowStart = i;
686
+ if (msgType === 'human') {
687
+ roundsSeen++;
688
+ if (roundsSeen >= COMPACTION_RECENT_ROUNDS) break;
689
+ }
690
+ }
671
691
  }
672
692
 
673
693
  // Don't split tool-call / tool-result pairs
@@ -836,6 +856,73 @@ describe('Context Compaction — Windowed View (no message deletion)', () => {
836
856
  expect(recentWithSummary.length).toBeLessThan(recentWithout.length);
837
857
  });
838
858
 
859
+ it('with summary, limits window to last 2 rounds (not budget-filling)', () => {
860
+ // 20 messages = 10 rounds. With summary, should only keep last 2 rounds (4 msgs).
861
+ const messages: BaseMessage[] = [
862
+ new SystemMessage('System prompt'),
863
+ ];
864
+ for (let i = 0; i < 20; i++) {
865
+ messages.push(
866
+ i % 2 === 0
867
+ ? new HumanMessage(`User question ${i / 2}`)
868
+ : new AIMessage(`AI answer ${(i - 1) / 2}`)
869
+ );
870
+ }
871
+ const indexTokenCountMap: Record<string, number | undefined> = {};
872
+ for (let i = 0; i < messages.length; i++) {
873
+ indexTokenCountMap[i] = simpleTokenCounter(messages[i]);
874
+ }
875
+
876
+ const { view, recentMessages, compactedMessages } = buildWindowedView({
877
+ messages,
878
+ indexTokenCountMap,
879
+ maxTokens: 100_000, // huge budget — would fit everything
880
+ summary: 'Summary of earlier conversation',
881
+ tokenCounter: simpleTokenCounter,
882
+ });
883
+
884
+ // Despite huge budget, only last 2 rounds kept (4 content msgs: H+A+H+A)
885
+ // Plus possible trailing messages in the last round
886
+ expect(recentMessages.length).toBeLessThanOrEqual(5); // 2 rounds + maybe 1 trailing
887
+ expect(recentMessages.length).toBeGreaterThanOrEqual(4); // at least 2 full rounds
888
+
889
+ // Most messages are compacted behind the summary
890
+ expect(compactedMessages.length).toBeGreaterThan(10);
891
+
892
+ // View = system + summary + recent window
893
+ expect(view[0].getType()).toBe('system');
894
+ expect(view[1].content).toContain('[Conversation Summary]');
895
+ });
896
+
897
+ it('without summary, fills budget (no round limit)', () => {
898
+ const messages = buildConversation(20, 100); // small messages
899
+ const indexTokenCountMap: Record<string, number | undefined> = {};
900
+ for (let i = 0; i < messages.length; i++) {
901
+ indexTokenCountMap[i] = simpleTokenCounter(messages[i]);
902
+ }
903
+
904
+ const { recentMessages: withoutSummary } = buildWindowedView({
905
+ messages,
906
+ indexTokenCountMap,
907
+ maxTokens: 100_000, // huge budget
908
+ tokenCounter: simpleTokenCounter,
909
+ // no summary → mode A
910
+ });
911
+
912
+ const { recentMessages: withSummary } = buildWindowedView({
913
+ messages,
914
+ indexTokenCountMap,
915
+ maxTokens: 100_000,
916
+ summary: 'Summary exists',
917
+ tokenCounter: simpleTokenCounter,
918
+ });
919
+
920
+ // Without summary: all messages included (budget-filling mode)
921
+ expect(withoutSummary.length).toBe(20); // all content messages
922
+ // With summary: only last 2 rounds
923
+ expect(withSummary.length).toBeLessThan(withoutSummary.length);
924
+ });
925
+
839
926
  it('original messages array is never mutated', () => {
840
927
  const messages = buildConversation(15, 400);
841
928
  const originalLength = messages.length;