@illuma-ai/agents 1.1.3 → 1.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cjs/common/constants.cjs +10 -0
- package/dist/cjs/common/constants.cjs.map +1 -1
- package/dist/cjs/graphs/Graph.cjs +58 -7
- package/dist/cjs/graphs/Graph.cjs.map +1 -1
- package/dist/cjs/main.cjs +1 -0
- package/dist/cjs/main.cjs.map +1 -1
- package/dist/esm/common/constants.mjs +10 -1
- package/dist/esm/common/constants.mjs.map +1 -1
- package/dist/esm/graphs/Graph.mjs +59 -8
- package/dist/esm/graphs/Graph.mjs.map +1 -1
- package/dist/esm/main.mjs +1 -1
- package/dist/types/common/constants.d.ts +9 -0
- package/package.json +1 -1
- package/src/common/constants.ts +10 -0
- package/src/graphs/Graph.ts +61 -7
- package/src/graphs/gapFeatures.test.ts +93 -6
package/dist/esm/main.mjs
CHANGED
|
@@ -26,7 +26,7 @@ export { createSearchTool } from './tools/search/tool.mjs';
|
|
|
26
26
|
export { DATE_RANGE, DEFAULT_COUNTRY_DESCRIPTION, DEFAULT_QUERY_DESCRIPTION, WebSearchToolDefinition, WebSearchToolDescription, WebSearchToolName, WebSearchToolSchema, countrySchema, dateSchema, imagesSchema, newsSchema, querySchema, videosSchema } from './tools/search/schema.mjs';
|
|
27
27
|
export { createValidationErrorMessage, isValidJsonSchema, normalizeJsonSchema, prepareSchemaForProvider, validateStructuredOutput, zodToJsonSchema } from './schemas/validate.mjs';
|
|
28
28
|
export { Callback, CommonEvents, Constants, ContentTypes, EdgeType, EnvVar, FinishReasons, GraphEvents, GraphNodeActions, GraphNodeKeys, MessageTypes, Providers, StepTypes, TitleMethod, ToolCallTypes } from './common/enum.mjs';
|
|
29
|
-
export { CONTEXT_SAFETY_BUFFER, DEDUP_MAX_CONTENT_LENGTH, MIN_THINKING_BUDGET, MULTI_DOCUMENT_THRESHOLD, PROACTIVE_SUMMARY_THRESHOLD, PRUNING_EMA_ALPHA, PRUNING_INITIAL_CALIBRATION, SUMMARIZATION_CONTEXT_THRESHOLD, SUMMARIZATION_RESERVE_RATIO, TOOL_DISCOVERY_CACHE_MAX_SIZE, TOOL_TURN_THINKING_BUDGET } from './common/constants.mjs';
|
|
29
|
+
export { COMPACTION_RECENT_ROUNDS, CONTEXT_SAFETY_BUFFER, DEDUP_MAX_CONTENT_LENGTH, MIN_THINKING_BUDGET, MULTI_DOCUMENT_THRESHOLD, PROACTIVE_SUMMARY_THRESHOLD, PRUNING_EMA_ALPHA, PRUNING_INITIAL_CALIBRATION, SUMMARIZATION_CONTEXT_THRESHOLD, SUMMARIZATION_RESERVE_RATIO, TOOL_DISCOVERY_CACHE_MAX_SIZE, TOOL_TURN_THINKING_BUDGET } from './common/constants.mjs';
|
|
30
30
|
export { joinKeys, resetIfNotEmpty } from './utils/graph.mjs';
|
|
31
31
|
export { isGoogleLike, isOpenAILike } from './utils/llm.mjs';
|
|
32
32
|
export { isPresent, unescapeObject } from './utils/misc.mjs';
|
|
@@ -46,6 +46,15 @@ export declare const SUMMARIZATION_CONTEXT_THRESHOLD = 80;
|
|
|
46
46
|
* 100% → graceful: use existing summary + recent messages, never block
|
|
47
47
|
*/
|
|
48
48
|
export declare const PROACTIVE_SUMMARY_THRESHOLD = 0.8;
|
|
49
|
+
/**
|
|
50
|
+
* Number of recent conversation rounds (human+AI pairs) to keep in the
|
|
51
|
+
* windowed view when a summary is available. Everything older is covered
|
|
52
|
+
* by the summary. 2 rounds = last 2 user questions + 2 AI responses.
|
|
53
|
+
*
|
|
54
|
+
* This prevents wasting tokens on raw messages the summary already covers
|
|
55
|
+
* and keeps context tight for the LLM.
|
|
56
|
+
*/
|
|
57
|
+
export declare const COMPACTION_RECENT_ROUNDS = 2;
|
|
49
58
|
/**
|
|
50
59
|
* Default reserve ratio (0-1) — fraction of context window to preserve as recent messages.
|
|
51
60
|
* 0.3 means 30% of the context budget is reserved for the most recent messages,
|
package/package.json
CHANGED
package/src/common/constants.ts
CHANGED
|
@@ -71,6 +71,16 @@ export const SUMMARIZATION_CONTEXT_THRESHOLD = 80;
|
|
|
71
71
|
*/
|
|
72
72
|
export const PROACTIVE_SUMMARY_THRESHOLD = 0.8;
|
|
73
73
|
|
|
74
|
+
/**
|
|
75
|
+
* Number of recent conversation rounds (human+AI pairs) to keep in the
|
|
76
|
+
* windowed view when a summary is available. Everything older is covered
|
|
77
|
+
* by the summary. 2 rounds = last 2 user questions + 2 AI responses.
|
|
78
|
+
*
|
|
79
|
+
* This prevents wasting tokens on raw messages the summary already covers
|
|
80
|
+
* and keeps context tight for the LLM.
|
|
81
|
+
*/
|
|
82
|
+
export const COMPACTION_RECENT_ROUNDS = 2;
|
|
83
|
+
|
|
74
84
|
/**
|
|
75
85
|
* Default reserve ratio (0-1) — fraction of context window to preserve as recent messages.
|
|
76
86
|
* 0.3 means 30% of the context budget is reserved for the most recent messages,
|
package/src/graphs/Graph.ts
CHANGED
|
@@ -56,6 +56,7 @@ import {
|
|
|
56
56
|
TOOL_TURN_THINKING_BUDGET,
|
|
57
57
|
SUMMARIZATION_CONTEXT_THRESHOLD,
|
|
58
58
|
PROACTIVE_SUMMARY_THRESHOLD,
|
|
59
|
+
COMPACTION_RECENT_ROUNDS,
|
|
59
60
|
} from '@/common';
|
|
60
61
|
import {
|
|
61
62
|
ToolDiscoveryCache,
|
|
@@ -1670,18 +1671,53 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
|
|
|
1670
1671
|
// Budget for recent messages = total - system - summary - 3 (assistant priming)
|
|
1671
1672
|
const recentBudget = calibratedMax - systemTokens - summaryTokens - 3;
|
|
1672
1673
|
|
|
1673
|
-
// Step 3:
|
|
1674
|
+
// Step 3: Determine window of recent messages to include.
|
|
1675
|
+
//
|
|
1676
|
+
// Two modes:
|
|
1677
|
+
// A) No summary available → fill the budget (all messages that fit)
|
|
1678
|
+
// B) Summary available → keep last 2 conversation rounds (H+A pairs)
|
|
1679
|
+
// + any trailing tool messages. The summary covers everything else.
|
|
1680
|
+
// This avoids wasting tokens on raw messages the summary already covers.
|
|
1681
|
+
//
|
|
1682
|
+
// A "round" = one human message + one AI response (+ any tool messages between).
|
|
1674
1683
|
const contentStart = systemMsg != null ? 1 : 0;
|
|
1675
1684
|
let usedTokens = 0;
|
|
1676
1685
|
let windowStart = messages.length; // index where the recent window begins
|
|
1677
1686
|
|
|
1678
|
-
|
|
1679
|
-
|
|
1680
|
-
|
|
1681
|
-
|
|
1687
|
+
if (summary == null || summary === '') {
|
|
1688
|
+
// Mode A: No summary — include as many recent messages as fit in budget
|
|
1689
|
+
for (let i = messages.length - 1; i >= contentStart; i--) {
|
|
1690
|
+
const msgTokens = agentContext.indexTokenCountMap[i] ?? 0;
|
|
1691
|
+
if (usedTokens + msgTokens > recentBudget) {
|
|
1692
|
+
break;
|
|
1693
|
+
}
|
|
1694
|
+
usedTokens += msgTokens;
|
|
1695
|
+
windowStart = i;
|
|
1696
|
+
}
|
|
1697
|
+
} else {
|
|
1698
|
+
// Mode B: Summary exists — keep last 2 rounds (4 core messages: H+A+H+A)
|
|
1699
|
+
// Walk backward counting human messages as round boundaries.
|
|
1700
|
+
const MAX_RECENT_ROUNDS = COMPACTION_RECENT_ROUNDS;
|
|
1701
|
+
let roundsSeen = 0;
|
|
1702
|
+
for (let i = messages.length - 1; i >= contentStart; i--) {
|
|
1703
|
+
const msgType = messages[i]?.getType();
|
|
1704
|
+
const msgTokens = agentContext.indexTokenCountMap[i] ?? 0;
|
|
1705
|
+
|
|
1706
|
+
// Budget guard — even in round-limited mode, don't exceed budget
|
|
1707
|
+
if (usedTokens + msgTokens > recentBudget) {
|
|
1708
|
+
break;
|
|
1709
|
+
}
|
|
1710
|
+
usedTokens += msgTokens;
|
|
1711
|
+
windowStart = i;
|
|
1712
|
+
|
|
1713
|
+
// Count a human message as a round boundary
|
|
1714
|
+
if (msgType === 'human') {
|
|
1715
|
+
roundsSeen++;
|
|
1716
|
+
if (roundsSeen >= MAX_RECENT_ROUNDS) {
|
|
1717
|
+
break;
|
|
1718
|
+
}
|
|
1719
|
+
}
|
|
1682
1720
|
}
|
|
1683
|
-
usedTokens += msgTokens;
|
|
1684
|
-
windowStart = i;
|
|
1685
1721
|
}
|
|
1686
1722
|
|
|
1687
1723
|
// Ensure we don't split tool-call / tool-result pairs.
|
|
@@ -1710,6 +1746,24 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
|
|
|
1710
1746
|
viewParts.push(...recentMessages);
|
|
1711
1747
|
messagesToUse = viewParts;
|
|
1712
1748
|
|
|
1749
|
+
// Rebuild indexTokenCountMap for the windowed view so downstream
|
|
1750
|
+
// analytics and summarization triggers see accurate token counts.
|
|
1751
|
+
const viewTokenMap: Record<string, number | undefined> = {};
|
|
1752
|
+
let viewIdx = 0;
|
|
1753
|
+
if (systemMsg != null) {
|
|
1754
|
+
viewTokenMap[viewIdx] = systemTokens;
|
|
1755
|
+
viewIdx++;
|
|
1756
|
+
}
|
|
1757
|
+
if (summaryMsg != null) {
|
|
1758
|
+
viewTokenMap[viewIdx] = summaryTokens;
|
|
1759
|
+
viewIdx++;
|
|
1760
|
+
}
|
|
1761
|
+
for (let i = windowStart; i < messages.length; i++) {
|
|
1762
|
+
viewTokenMap[viewIdx] = agentContext.indexTokenCountMap[i];
|
|
1763
|
+
viewIdx++;
|
|
1764
|
+
}
|
|
1765
|
+
agentContext.indexTokenCountMap = viewTokenMap;
|
|
1766
|
+
|
|
1713
1767
|
console.debug(
|
|
1714
1768
|
`[Graph:Compaction] View: ${messages.length}→${viewParts.length} msgs ` +
|
|
1715
1769
|
`(${compactedMessages.length} behind summary, ${recentMessages.length} in window) | ` +
|
|
@@ -636,11 +636,14 @@ describe('Proactive Summarization — Context Pressure', () => {
|
|
|
636
636
|
// ===========================================================================
|
|
637
637
|
|
|
638
638
|
import { applyCalibration as _applyCalibration } from '@/utils/pruneCalibration';
|
|
639
|
+
import { COMPACTION_RECENT_ROUNDS } from '@/common/constants';
|
|
639
640
|
|
|
640
641
|
describe('Context Compaction — Windowed View (no message deletion)', () => {
|
|
641
642
|
/**
|
|
642
643
|
* Simulates the compaction logic from Graph.ts without the full Graph instance.
|
|
643
|
-
*
|
|
644
|
+
* Mirrors the two modes:
|
|
645
|
+
* A) No summary → fill budget with as many recent messages as fit
|
|
646
|
+
* B) Summary exists → keep last COMPACTION_RECENT_ROUNDS rounds only
|
|
644
647
|
*/
|
|
645
648
|
function buildWindowedView(opts: {
|
|
646
649
|
messages: BaseMessage[];
|
|
@@ -663,11 +666,28 @@ describe('Context Compaction — Windowed View (no message deletion)', () => {
|
|
|
663
666
|
let usedTokens = 0;
|
|
664
667
|
let windowStart = messages.length;
|
|
665
668
|
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
|
|
669
|
-
|
|
670
|
-
|
|
669
|
+
if (!summary) {
|
|
670
|
+
// Mode A: No summary — fill budget
|
|
671
|
+
for (let i = messages.length - 1; i >= contentStart; i--) {
|
|
672
|
+
const msgTokens = indexTokenCountMap[i] ?? 0;
|
|
673
|
+
if (usedTokens + msgTokens > recentBudget) break;
|
|
674
|
+
usedTokens += msgTokens;
|
|
675
|
+
windowStart = i;
|
|
676
|
+
}
|
|
677
|
+
} else {
|
|
678
|
+
// Mode B: Summary exists — keep last N rounds
|
|
679
|
+
let roundsSeen = 0;
|
|
680
|
+
for (let i = messages.length - 1; i >= contentStart; i--) {
|
|
681
|
+
const msgType = messages[i]?.getType();
|
|
682
|
+
const msgTokens = indexTokenCountMap[i] ?? 0;
|
|
683
|
+
if (usedTokens + msgTokens > recentBudget) break;
|
|
684
|
+
usedTokens += msgTokens;
|
|
685
|
+
windowStart = i;
|
|
686
|
+
if (msgType === 'human') {
|
|
687
|
+
roundsSeen++;
|
|
688
|
+
if (roundsSeen >= COMPACTION_RECENT_ROUNDS) break;
|
|
689
|
+
}
|
|
690
|
+
}
|
|
671
691
|
}
|
|
672
692
|
|
|
673
693
|
// Don't split tool-call / tool-result pairs
|
|
@@ -836,6 +856,73 @@ describe('Context Compaction — Windowed View (no message deletion)', () => {
|
|
|
836
856
|
expect(recentWithSummary.length).toBeLessThan(recentWithout.length);
|
|
837
857
|
});
|
|
838
858
|
|
|
859
|
+
it('with summary, limits window to last 2 rounds (not budget-filling)', () => {
|
|
860
|
+
// 20 messages = 10 rounds. With summary, should only keep last 2 rounds (4 msgs).
|
|
861
|
+
const messages: BaseMessage[] = [
|
|
862
|
+
new SystemMessage('System prompt'),
|
|
863
|
+
];
|
|
864
|
+
for (let i = 0; i < 20; i++) {
|
|
865
|
+
messages.push(
|
|
866
|
+
i % 2 === 0
|
|
867
|
+
? new HumanMessage(`User question ${i / 2}`)
|
|
868
|
+
: new AIMessage(`AI answer ${(i - 1) / 2}`)
|
|
869
|
+
);
|
|
870
|
+
}
|
|
871
|
+
const indexTokenCountMap: Record<string, number | undefined> = {};
|
|
872
|
+
for (let i = 0; i < messages.length; i++) {
|
|
873
|
+
indexTokenCountMap[i] = simpleTokenCounter(messages[i]);
|
|
874
|
+
}
|
|
875
|
+
|
|
876
|
+
const { view, recentMessages, compactedMessages } = buildWindowedView({
|
|
877
|
+
messages,
|
|
878
|
+
indexTokenCountMap,
|
|
879
|
+
maxTokens: 100_000, // huge budget — would fit everything
|
|
880
|
+
summary: 'Summary of earlier conversation',
|
|
881
|
+
tokenCounter: simpleTokenCounter,
|
|
882
|
+
});
|
|
883
|
+
|
|
884
|
+
// Despite huge budget, only last 2 rounds kept (4 content msgs: H+A+H+A)
|
|
885
|
+
// Plus possible trailing messages in the last round
|
|
886
|
+
expect(recentMessages.length).toBeLessThanOrEqual(5); // 2 rounds + maybe 1 trailing
|
|
887
|
+
expect(recentMessages.length).toBeGreaterThanOrEqual(4); // at least 2 full rounds
|
|
888
|
+
|
|
889
|
+
// Most messages are compacted behind the summary
|
|
890
|
+
expect(compactedMessages.length).toBeGreaterThan(10);
|
|
891
|
+
|
|
892
|
+
// View = system + summary + recent window
|
|
893
|
+
expect(view[0].getType()).toBe('system');
|
|
894
|
+
expect(view[1].content).toContain('[Conversation Summary]');
|
|
895
|
+
});
|
|
896
|
+
|
|
897
|
+
it('without summary, fills budget (no round limit)', () => {
|
|
898
|
+
const messages = buildConversation(20, 100); // small messages
|
|
899
|
+
const indexTokenCountMap: Record<string, number | undefined> = {};
|
|
900
|
+
for (let i = 0; i < messages.length; i++) {
|
|
901
|
+
indexTokenCountMap[i] = simpleTokenCounter(messages[i]);
|
|
902
|
+
}
|
|
903
|
+
|
|
904
|
+
const { recentMessages: withoutSummary } = buildWindowedView({
|
|
905
|
+
messages,
|
|
906
|
+
indexTokenCountMap,
|
|
907
|
+
maxTokens: 100_000, // huge budget
|
|
908
|
+
tokenCounter: simpleTokenCounter,
|
|
909
|
+
// no summary → mode A
|
|
910
|
+
});
|
|
911
|
+
|
|
912
|
+
const { recentMessages: withSummary } = buildWindowedView({
|
|
913
|
+
messages,
|
|
914
|
+
indexTokenCountMap,
|
|
915
|
+
maxTokens: 100_000,
|
|
916
|
+
summary: 'Summary exists',
|
|
917
|
+
tokenCounter: simpleTokenCounter,
|
|
918
|
+
});
|
|
919
|
+
|
|
920
|
+
// Without summary: all messages included (budget-filling mode)
|
|
921
|
+
expect(withoutSummary.length).toBe(20); // all content messages
|
|
922
|
+
// With summary: only last 2 rounds
|
|
923
|
+
expect(withSummary.length).toBeLessThan(withoutSummary.length);
|
|
924
|
+
});
|
|
925
|
+
|
|
839
926
|
it('original messages array is never mutated', () => {
|
|
840
927
|
const messages = buildConversation(15, 400);
|
|
841
928
|
const originalLength = messages.length;
|