@illuma-ai/agents 1.1.2 → 1.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cjs/graphs/Graph.cjs +115 -82
- package/dist/cjs/graphs/Graph.cjs.map +1 -1
- package/dist/esm/graphs/Graph.mjs +115 -82
- package/dist/esm/graphs/Graph.mjs.map +1 -1
- package/package.json +1 -1
- package/src/graphs/Graph.ts +140 -102
- package/src/graphs/gapFeatures.test.ts +234 -2
package/package.json
CHANGED
package/src/graphs/Graph.ts
CHANGED
|
@@ -1606,86 +1606,143 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
|
|
|
1606
1606
|
}
|
|
1607
1607
|
|
|
1608
1608
|
if (agentContext.pruneMessages) {
|
|
1609
|
-
|
|
1610
|
-
|
|
1611
|
-
|
|
1612
|
-
|
|
1613
|
-
|
|
1614
|
-
|
|
1615
|
-
|
|
1616
|
-
|
|
1617
|
-
//
|
|
1618
|
-
//
|
|
1619
|
-
// 1. If _cachedRunSummary exists → use it, fire async update
|
|
1620
|
-
// 2. If persistedSummary exists → use it as fallback, fire async update
|
|
1621
|
-
// 3. If NOTHING exists (first-ever prune) → skip summary, fire async generation
|
|
1622
|
-
// The summary catches up asynchronously and is available for subsequent
|
|
1623
|
-
// iterations (tool calls) and the next conversation turn.
|
|
1609
|
+
// ── Context Compaction (Copilot-style: never delete messages) ─────
|
|
1610
|
+
//
|
|
1611
|
+
// DESIGN: Original messages are NEVER removed from the array.
|
|
1612
|
+
// Instead, we build a "windowed view" for the LLM:
|
|
1613
|
+
// [system prompt] + [summary of older turns] + [recent turns that fit]
|
|
1614
|
+
//
|
|
1615
|
+
// This ensures:
|
|
1616
|
+
// - No context is ever lost (summary covers older turns)
|
|
1617
|
+
// - We can always re-summarize from originals if summary is stale
|
|
1618
|
+
// - Conversation chaining works naturally across turns
|
|
1624
1619
|
//
|
|
1625
|
-
//
|
|
1626
|
-
//
|
|
1627
|
-
//
|
|
1628
|
-
//
|
|
1629
|
-
|
|
1620
|
+
// Flow:
|
|
1621
|
+
// 1. Resolve best available summary (cached > persisted > seed)
|
|
1622
|
+
// 2. Calculate token budget available for recent messages
|
|
1623
|
+
// 3. Walk newest→oldest, build view of messages that fit
|
|
1624
|
+
// 4. Assemble: [system] + [summary] + [recent window]
|
|
1625
|
+
// 5. Fire background summary update for messages outside the window
|
|
1626
|
+
|
|
1630
1627
|
const sumConfig = agentContext.summarizationConfig;
|
|
1631
|
-
const
|
|
1632
|
-
|
|
1633
|
-
|
|
1634
|
-
|
|
1635
|
-
|
|
1636
|
-
|
|
1637
|
-
|
|
1628
|
+
const tokenCounter = agentContext.tokenCounter;
|
|
1629
|
+
const maxTokens = agentContext.maxContextTokens ?? 0;
|
|
1630
|
+
|
|
1631
|
+
// Step 1: Resolve best available summary
|
|
1632
|
+
let summary: string | undefined;
|
|
1633
|
+
let summarySource: string;
|
|
1634
|
+
|
|
1635
|
+
if (this._cachedRunSummary != null) {
|
|
1636
|
+
summary = this._cachedRunSummary;
|
|
1637
|
+
summarySource = 'cached';
|
|
1638
|
+
} else if (
|
|
1639
|
+
agentContext.persistedSummary != null &&
|
|
1640
|
+
agentContext.persistedSummary !== ''
|
|
1641
|
+
) {
|
|
1642
|
+
summary = agentContext.persistedSummary;
|
|
1643
|
+
this._cachedRunSummary = summary;
|
|
1644
|
+
summarySource = 'persisted';
|
|
1645
|
+
} else if (
|
|
1646
|
+
sumConfig?.initialSummary != null &&
|
|
1647
|
+
sumConfig.initialSummary !== ''
|
|
1648
|
+
) {
|
|
1649
|
+
summary = sumConfig.initialSummary;
|
|
1650
|
+
this._cachedRunSummary = summary;
|
|
1651
|
+
summarySource = 'initial-seed';
|
|
1652
|
+
} else {
|
|
1653
|
+
summarySource = 'none';
|
|
1654
|
+
}
|
|
1638
1655
|
|
|
1639
|
-
|
|
1640
|
-
|
|
1641
|
-
|
|
1642
|
-
|
|
1656
|
+
// Step 2: Calculate token budget
|
|
1657
|
+
// Apply EMA calibration for accuracy across iterations
|
|
1658
|
+
const calibratedMax = applyCalibration(maxTokens, this._pruneCalibration);
|
|
1659
|
+
const systemMsg = messages[0]?.getType() === 'system' ? messages[0] : null;
|
|
1660
|
+
const systemTokens = systemMsg != null
|
|
1661
|
+
? (agentContext.indexTokenCountMap[0] ?? 0)
|
|
1662
|
+
: 0;
|
|
1663
|
+
const summaryMsg = summary != null && summary !== ''
|
|
1664
|
+
? new SystemMessage(`[Conversation Summary]\n${summary}`)
|
|
1665
|
+
: null;
|
|
1666
|
+
const summaryTokens = summaryMsg != null && tokenCounter != null
|
|
1667
|
+
? tokenCounter(summaryMsg)
|
|
1668
|
+
: 0;
|
|
1669
|
+
|
|
1670
|
+
// Budget for recent messages = total - system - summary - 3 (assistant priming)
|
|
1671
|
+
const recentBudget = calibratedMax - systemTokens - summaryTokens - 3;
|
|
1672
|
+
|
|
1673
|
+
// Step 3: Walk newest→oldest, collect messages that fit in the budget
|
|
1674
|
+
const contentStart = systemMsg != null ? 1 : 0;
|
|
1675
|
+
let usedTokens = 0;
|
|
1676
|
+
let windowStart = messages.length; // index where the recent window begins
|
|
1677
|
+
|
|
1678
|
+
for (let i = messages.length - 1; i >= contentStart; i--) {
|
|
1679
|
+
const msgTokens = agentContext.indexTokenCountMap[i] ?? 0;
|
|
1680
|
+
if (usedTokens + msgTokens > recentBudget) {
|
|
1681
|
+
break;
|
|
1682
|
+
}
|
|
1683
|
+
usedTokens += msgTokens;
|
|
1684
|
+
windowStart = i;
|
|
1685
|
+
}
|
|
1686
|
+
|
|
1687
|
+
// Ensure we don't split tool-call / tool-result pairs.
|
|
1688
|
+
// If windowStart lands on a ToolMessage, walk back to include its AI message.
|
|
1689
|
+
while (
|
|
1690
|
+
windowStart > contentStart &&
|
|
1691
|
+
messages[windowStart]?.getType() === 'tool'
|
|
1643
1692
|
) {
|
|
1644
|
-
|
|
1645
|
-
|
|
1646
|
-
|
|
1693
|
+
windowStart--;
|
|
1694
|
+
usedTokens += agentContext.indexTokenCountMap[windowStart] ?? 0;
|
|
1695
|
+
}
|
|
1647
1696
|
|
|
1648
|
-
|
|
1649
|
-
|
|
1650
|
-
|
|
1651
|
-
} else if (
|
|
1652
|
-
agentContext.persistedSummary != null &&
|
|
1653
|
-
agentContext.persistedSummary !== ''
|
|
1654
|
-
) {
|
|
1655
|
-
summary = agentContext.persistedSummary;
|
|
1656
|
-
this._cachedRunSummary = summary;
|
|
1657
|
-
summarySource = 'persisted';
|
|
1658
|
-
} else if (
|
|
1659
|
-
sumConfig?.initialSummary != null &&
|
|
1660
|
-
sumConfig.initialSummary !== ''
|
|
1661
|
-
) {
|
|
1662
|
-
// Cross-run seed: use initialSummary when no persisted summary exists
|
|
1663
|
-
summary = sumConfig.initialSummary;
|
|
1664
|
-
this._cachedRunSummary = summary;
|
|
1665
|
-
summarySource = 'initial-seed';
|
|
1666
|
-
} else {
|
|
1667
|
-
summarySource = 'none';
|
|
1668
|
-
}
|
|
1697
|
+
const recentMessages = messages.slice(windowStart);
|
|
1698
|
+
const compactedMessages = messages.slice(contentStart, windowStart);
|
|
1699
|
+
const hasSummary = summaryMsg != null;
|
|
1669
1700
|
|
|
1670
|
-
|
|
1671
|
-
|
|
1672
|
-
|
|
1673
|
-
|
|
1701
|
+
// Step 4: Assemble the windowed view
|
|
1702
|
+
// [system] + [summary (covers compacted messages)] + [recent window]
|
|
1703
|
+
const viewParts: BaseMessage[] = [];
|
|
1704
|
+
if (systemMsg != null) {
|
|
1705
|
+
viewParts.push(systemMsg);
|
|
1706
|
+
}
|
|
1707
|
+
if (summaryMsg != null) {
|
|
1708
|
+
viewParts.push(summaryMsg);
|
|
1709
|
+
}
|
|
1710
|
+
viewParts.push(...recentMessages);
|
|
1711
|
+
messagesToUse = viewParts;
|
|
1712
|
+
|
|
1713
|
+
console.debug(
|
|
1714
|
+
`[Graph:Compaction] View: ${messages.length}→${viewParts.length} msgs ` +
|
|
1715
|
+
`(${compactedMessages.length} behind summary, ${recentMessages.length} in window) | ` +
|
|
1716
|
+
`summary=${summarySource}${summary ? ` (len=${summary.length})` : ''} | ` +
|
|
1717
|
+
`budget=${recentBudget}/${calibratedMax} used=${usedTokens}`
|
|
1718
|
+
);
|
|
1719
|
+
|
|
1720
|
+
// Step 5: Fire background summary update (non-blocking)
|
|
1721
|
+
// Summarize messages outside the window so next iteration has a fresh summary.
|
|
1722
|
+
// Only trigger if there are compacted messages worth summarizing.
|
|
1723
|
+
if (
|
|
1724
|
+
compactedMessages.length > 0 &&
|
|
1725
|
+
agentContext.summarizeCallback
|
|
1726
|
+
) {
|
|
1727
|
+
const shouldSummarize = this.shouldTriggerSummarization(
|
|
1728
|
+
compactedMessages.length,
|
|
1729
|
+
maxTokens,
|
|
1730
|
+
agentContext.indexTokenCountMap,
|
|
1731
|
+
agentContext.instructionTokens,
|
|
1732
|
+
sumConfig
|
|
1733
|
+
);
|
|
1674
1734
|
|
|
1675
|
-
|
|
1676
|
-
// in-flight (from a prior tool iteration), accumulate messages instead of
|
|
1677
|
-
// firing another concurrent LLM call. At 2000 users with 3+ tool calls
|
|
1678
|
-
// per turn, this prevents 3x summary call volume.
|
|
1735
|
+
if (shouldSummarize) {
|
|
1679
1736
|
if (this._summaryInFlight) {
|
|
1680
|
-
this._pendingMessagesToRefine.push(...
|
|
1737
|
+
this._pendingMessagesToRefine.push(...compactedMessages);
|
|
1681
1738
|
console.debug(
|
|
1682
|
-
`[Graph:
|
|
1739
|
+
`[Graph:Compaction] Summary in-flight, queued ${compactedMessages.length} msgs (pending=${this._pendingMessagesToRefine.length})`
|
|
1683
1740
|
);
|
|
1684
1741
|
} else {
|
|
1685
1742
|
this._summaryInFlight = true;
|
|
1686
1743
|
const allMessages = this._pendingMessagesToRefine.length > 0
|
|
1687
|
-
? [...this._pendingMessagesToRefine, ...
|
|
1688
|
-
:
|
|
1744
|
+
? [...this._pendingMessagesToRefine, ...compactedMessages]
|
|
1745
|
+
: compactedMessages;
|
|
1689
1746
|
this._pendingMessagesToRefine = [];
|
|
1690
1747
|
|
|
1691
1748
|
agentContext
|
|
@@ -1697,7 +1754,7 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
|
|
|
1697
1754
|
})
|
|
1698
1755
|
.catch((err) => {
|
|
1699
1756
|
console.error(
|
|
1700
|
-
'[Graph] Background summary failed (non-fatal):',
|
|
1757
|
+
'[Graph:Compaction] Background summary update failed (non-fatal):',
|
|
1701
1758
|
err
|
|
1702
1759
|
);
|
|
1703
1760
|
})
|
|
@@ -1705,44 +1762,13 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
|
|
|
1705
1762
|
this._summaryInFlight = false;
|
|
1706
1763
|
});
|
|
1707
1764
|
}
|
|
1708
|
-
|
|
1709
|
-
if (summary != null && summary !== '') {
|
|
1710
|
-
hasSummary = true;
|
|
1711
|
-
const summaryMsg = new SystemMessage(
|
|
1712
|
-
`[Conversation Summary]\n${summary}`
|
|
1713
|
-
);
|
|
1714
|
-
const systemIdx =
|
|
1715
|
-
messagesToUse[0]?.getType() === 'system' ? 1 : 0;
|
|
1716
|
-
messagesToUse = [
|
|
1717
|
-
...messagesToUse.slice(0, systemIdx),
|
|
1718
|
-
summaryMsg,
|
|
1719
|
-
...messagesToUse.slice(systemIdx),
|
|
1720
|
-
];
|
|
1721
|
-
}
|
|
1722
|
-
} catch (err) {
|
|
1723
|
-
console.error('[Graph] Summarization failed:', err);
|
|
1724
1765
|
}
|
|
1725
|
-
} else if (messagesToRefine.length > 0) {
|
|
1726
|
-
// Log pruning even when no summarize callback (discard mode)
|
|
1727
|
-
console.debug(
|
|
1728
|
-
`[Graph:ContextMgmt] Pruned ${messages.length}→${context.length} msgs (${messagesToRefine.length} discarded, no summary callback) | calibration=${this._pruneCalibration.ratio.toFixed(3)}`
|
|
1729
|
-
);
|
|
1730
1766
|
}
|
|
1731
1767
|
|
|
1732
|
-
//
|
|
1733
|
-
|
|
1734
|
-
deduplicateSystemMessages(messagesToUse);
|
|
1735
|
-
if (removedCount > 0) {
|
|
1736
|
-
messagesToUse = dedupedMessages;
|
|
1737
|
-
console.debug(
|
|
1738
|
-
`[Graph:Dedup] Removed ${removedCount} duplicate system message(s)`
|
|
1739
|
-
);
|
|
1740
|
-
}
|
|
1741
|
-
|
|
1742
|
-
// Post-prune context note for task-tool-enabled agents
|
|
1743
|
-
if (messagesToRefine.length > 0 && hasTaskTool(agentContext.tools)) {
|
|
1768
|
+
// Post-compaction context note for task-tool-enabled agents
|
|
1769
|
+
if (compactedMessages.length > 0 && hasTaskTool(agentContext.tools)) {
|
|
1744
1770
|
const postPruneNote = buildPostPruneNote(
|
|
1745
|
-
|
|
1771
|
+
compactedMessages.length,
|
|
1746
1772
|
hasSummary
|
|
1747
1773
|
);
|
|
1748
1774
|
if (postPruneNote) {
|
|
@@ -1754,6 +1780,18 @@ export class StandardGraph extends Graph<t.BaseGraphState, t.GraphNode> {
|
|
|
1754
1780
|
}
|
|
1755
1781
|
}
|
|
1756
1782
|
|
|
1783
|
+
// Deduplicate system messages — ALWAYS runs, not just during compaction.
|
|
1784
|
+
// Duplicate system messages accumulate from repeated tool iterations,
|
|
1785
|
+
// summary injections, and context notes across turns.
|
|
1786
|
+
const { messages: dedupedMessages, removedCount } =
|
|
1787
|
+
deduplicateSystemMessages(messagesToUse);
|
|
1788
|
+
if (removedCount > 0) {
|
|
1789
|
+
messagesToUse = dedupedMessages;
|
|
1790
|
+
console.debug(
|
|
1791
|
+
`[Graph:Dedup] Removed ${removedCount} duplicate system message(s)`
|
|
1792
|
+
);
|
|
1793
|
+
}
|
|
1794
|
+
|
|
1757
1795
|
let finalMessages = messagesToUse;
|
|
1758
1796
|
if (agentContext.useLegacyContent) {
|
|
1759
1797
|
finalMessages = formatContentStrings(finalMessages);
|
|
@@ -627,7 +627,239 @@ describe('Proactive Summarization — Context Pressure', () => {
|
|
|
627
627
|
|
|
628
628
|
// Even at 100%+, we use the existing cached summary — no error thrown
|
|
629
629
|
expect(cachedSummary).toBeTruthy();
|
|
630
|
-
//
|
|
631
|
-
|
|
630
|
+
// Compaction builds a windowed view — no messages deleted, no throwing
|
|
631
|
+
});
|
|
632
|
+
});
|
|
633
|
+
|
|
634
|
+
// ===========================================================================
|
|
635
|
+
// Context Compaction (Copilot-style: never delete messages)
|
|
636
|
+
// ===========================================================================
|
|
637
|
+
|
|
638
|
+
import { applyCalibration as _applyCalibration } from '@/utils/pruneCalibration';
|
|
639
|
+
|
|
640
|
+
describe('Context Compaction — Windowed View (no message deletion)', () => {
|
|
641
|
+
/**
|
|
642
|
+
* Simulates the compaction logic from Graph.ts without the full Graph instance.
|
|
643
|
+
* This tests the windowed-view algorithm directly.
|
|
644
|
+
*/
|
|
645
|
+
function buildWindowedView(opts: {
|
|
646
|
+
messages: BaseMessage[];
|
|
647
|
+
indexTokenCountMap: Record<string, number | undefined>;
|
|
648
|
+
maxTokens: number;
|
|
649
|
+
summary?: string;
|
|
650
|
+
tokenCounter: TokenCounter;
|
|
651
|
+
}) {
|
|
652
|
+
const { messages, indexTokenCountMap, maxTokens, summary, tokenCounter } = opts;
|
|
653
|
+
|
|
654
|
+
const systemMsg = messages[0]?.getType() === 'system' ? messages[0] : null;
|
|
655
|
+
const systemTokens = systemMsg != null ? (indexTokenCountMap[0] ?? 0) : 0;
|
|
656
|
+
const summaryMsg = summary
|
|
657
|
+
? new SystemMessage(`[Conversation Summary]\n${summary}`)
|
|
658
|
+
: null;
|
|
659
|
+
const summaryTokens = summaryMsg != null ? tokenCounter(summaryMsg) : 0;
|
|
660
|
+
|
|
661
|
+
const recentBudget = maxTokens - systemTokens - summaryTokens - 3;
|
|
662
|
+
const contentStart = systemMsg != null ? 1 : 0;
|
|
663
|
+
let usedTokens = 0;
|
|
664
|
+
let windowStart = messages.length;
|
|
665
|
+
|
|
666
|
+
for (let i = messages.length - 1; i >= contentStart; i--) {
|
|
667
|
+
const msgTokens = indexTokenCountMap[i] ?? 0;
|
|
668
|
+
if (usedTokens + msgTokens > recentBudget) break;
|
|
669
|
+
usedTokens += msgTokens;
|
|
670
|
+
windowStart = i;
|
|
671
|
+
}
|
|
672
|
+
|
|
673
|
+
// Don't split tool-call / tool-result pairs
|
|
674
|
+
while (
|
|
675
|
+
windowStart > contentStart &&
|
|
676
|
+
messages[windowStart]?.getType() === 'tool'
|
|
677
|
+
) {
|
|
678
|
+
windowStart--;
|
|
679
|
+
usedTokens += indexTokenCountMap[windowStart] ?? 0;
|
|
680
|
+
}
|
|
681
|
+
|
|
682
|
+
const recentMessages = messages.slice(windowStart);
|
|
683
|
+
const compactedMessages = messages.slice(contentStart, windowStart);
|
|
684
|
+
|
|
685
|
+
const view: BaseMessage[] = [];
|
|
686
|
+
if (systemMsg) view.push(systemMsg);
|
|
687
|
+
if (summaryMsg) view.push(summaryMsg);
|
|
688
|
+
view.push(...recentMessages);
|
|
689
|
+
|
|
690
|
+
return { view, compactedMessages, recentMessages, usedTokens };
|
|
691
|
+
}
|
|
692
|
+
|
|
693
|
+
it('builds a windowed view without deleting any messages', () => {
|
|
694
|
+
const messages = buildConversation(20, 400); // system + 20 content msgs
|
|
695
|
+
const indexTokenCountMap: Record<string, number | undefined> = {};
|
|
696
|
+
for (let i = 0; i < messages.length; i++) {
|
|
697
|
+
indexTokenCountMap[i] = simpleTokenCounter(messages[i]);
|
|
698
|
+
}
|
|
699
|
+
|
|
700
|
+
const { view, compactedMessages, recentMessages } = buildWindowedView({
|
|
701
|
+
messages,
|
|
702
|
+
indexTokenCountMap,
|
|
703
|
+
maxTokens: 500, // small budget forces windowing
|
|
704
|
+
tokenCounter: simpleTokenCounter,
|
|
705
|
+
});
|
|
706
|
+
|
|
707
|
+
// View is smaller than original
|
|
708
|
+
expect(view.length).toBeLessThan(messages.length);
|
|
709
|
+
// But original messages array is untouched
|
|
710
|
+
expect(messages.length).toBe(21); // system + 20
|
|
711
|
+
// Compacted + recent = all non-system messages
|
|
712
|
+
expect(compactedMessages.length + recentMessages.length).toBe(20);
|
|
713
|
+
// View starts with system message
|
|
714
|
+
expect(view[0].getType()).toBe('system');
|
|
715
|
+
});
|
|
716
|
+
|
|
717
|
+
it('injects summary message covering compacted (windowed-out) messages', () => {
|
|
718
|
+
const messages = buildConversation(20, 400);
|
|
719
|
+
const indexTokenCountMap: Record<string, number | undefined> = {};
|
|
720
|
+
for (let i = 0; i < messages.length; i++) {
|
|
721
|
+
indexTokenCountMap[i] = simpleTokenCounter(messages[i]);
|
|
722
|
+
}
|
|
723
|
+
|
|
724
|
+
const summary = 'Summary of earlier conversation turns';
|
|
725
|
+
const { view, compactedMessages } = buildWindowedView({
|
|
726
|
+
messages,
|
|
727
|
+
indexTokenCountMap,
|
|
728
|
+
maxTokens: 600,
|
|
729
|
+
summary,
|
|
730
|
+
tokenCounter: simpleTokenCounter,
|
|
731
|
+
});
|
|
732
|
+
|
|
733
|
+
// Summary is injected after system message
|
|
734
|
+
expect(view[1].content).toContain('[Conversation Summary]');
|
|
735
|
+
expect(view[1].content).toContain(summary);
|
|
736
|
+
// There should be compacted messages behind the summary
|
|
737
|
+
expect(compactedMessages.length).toBeGreaterThan(0);
|
|
738
|
+
// Original array is unchanged
|
|
739
|
+
expect(messages.length).toBe(21);
|
|
740
|
+
});
|
|
741
|
+
|
|
742
|
+
it('includes all messages when budget is large enough (no compaction)', () => {
|
|
743
|
+
const messages = buildConversation(5, 100); // small conversation
|
|
744
|
+
const indexTokenCountMap: Record<string, number | undefined> = {};
|
|
745
|
+
for (let i = 0; i < messages.length; i++) {
|
|
746
|
+
indexTokenCountMap[i] = simpleTokenCounter(messages[i]);
|
|
747
|
+
}
|
|
748
|
+
|
|
749
|
+
const { view, compactedMessages } = buildWindowedView({
|
|
750
|
+
messages,
|
|
751
|
+
indexTokenCountMap,
|
|
752
|
+
maxTokens: 100_000, // huge budget
|
|
753
|
+
tokenCounter: simpleTokenCounter,
|
|
754
|
+
});
|
|
755
|
+
|
|
756
|
+
// All messages fit — no compaction
|
|
757
|
+
expect(view.length).toBe(messages.length);
|
|
758
|
+
expect(compactedMessages.length).toBe(0);
|
|
759
|
+
});
|
|
760
|
+
|
|
761
|
+
it('does not split tool-call / tool-result pairs at window boundary', () => {
|
|
762
|
+
const messages: BaseMessage[] = [
|
|
763
|
+
new SystemMessage('System'),
|
|
764
|
+
new HumanMessage('old question'),
|
|
765
|
+
new AIMessage('old answer'),
|
|
766
|
+
new HumanMessage('question about tool'),
|
|
767
|
+
new AIMessageChunk({
|
|
768
|
+
content: 'Let me search',
|
|
769
|
+
tool_calls: [{ id: 'tc_1', name: 'web_search', args: {} }],
|
|
770
|
+
}),
|
|
771
|
+
new ToolMessage({ content: 'Search results', tool_call_id: 'tc_1', name: 'web_search' }),
|
|
772
|
+
new AIMessage('Based on the search results...'),
|
|
773
|
+
new HumanMessage('latest question'),
|
|
774
|
+
new AIMessage('latest answer'),
|
|
775
|
+
];
|
|
776
|
+
const indexTokenCountMap: Record<string, number | undefined> = {};
|
|
777
|
+
for (let i = 0; i < messages.length; i++) {
|
|
778
|
+
indexTokenCountMap[i] = simpleTokenCounter(messages[i]);
|
|
779
|
+
}
|
|
780
|
+
|
|
781
|
+
// Budget that would naturally cut between the AI tool-call and ToolMessage
|
|
782
|
+
// Force the window to start at the ToolMessage by making budget tight
|
|
783
|
+
const toolMsgIdx = 5; // ToolMessage index
|
|
784
|
+
let budgetUpToTool = 3; // priming tokens
|
|
785
|
+
for (let i = toolMsgIdx; i < messages.length; i++) {
|
|
786
|
+
budgetUpToTool += indexTokenCountMap[i] ?? 0;
|
|
787
|
+
}
|
|
788
|
+
// Budget includes ToolMessage but NOT the AI tool-call before it
|
|
789
|
+
// The algorithm should walk back to include the AI message too
|
|
790
|
+
const tightBudget = budgetUpToTool + (indexTokenCountMap[0] ?? 0) + 5;
|
|
791
|
+
|
|
792
|
+
const { view } = buildWindowedView({
|
|
793
|
+
messages,
|
|
794
|
+
indexTokenCountMap,
|
|
795
|
+
maxTokens: tightBudget,
|
|
796
|
+
tokenCounter: simpleTokenCounter,
|
|
797
|
+
});
|
|
798
|
+
|
|
799
|
+
// Verify no ToolMessage appears without its preceding AI message
|
|
800
|
+
for (let i = 0; i < view.length; i++) {
|
|
801
|
+
if (view[i].getType() === 'tool' && i > 0) {
|
|
802
|
+
// The message before a ToolMessage should be an AI message (the tool caller)
|
|
803
|
+
// or another ToolMessage (multi-tool scenario), or system
|
|
804
|
+
const prevType = view[i - 1].getType();
|
|
805
|
+
expect(['ai', 'tool', 'system']).toContain(prevType);
|
|
806
|
+
}
|
|
807
|
+
}
|
|
808
|
+
});
|
|
809
|
+
|
|
810
|
+
it('with summary, recent messages use remaining budget after summary tokens', () => {
|
|
811
|
+
const messages = buildConversation(20, 400);
|
|
812
|
+
const indexTokenCountMap: Record<string, number | undefined> = {};
|
|
813
|
+
for (let i = 0; i < messages.length; i++) {
|
|
814
|
+
indexTokenCountMap[i] = simpleTokenCounter(messages[i]);
|
|
815
|
+
}
|
|
816
|
+
|
|
817
|
+
// Large summary eats into the budget
|
|
818
|
+
const largeSummary = 'S'.repeat(1000); // ~250 tokens
|
|
819
|
+
const { view: viewWithSummary, recentMessages: recentWithSummary } = buildWindowedView({
|
|
820
|
+
messages,
|
|
821
|
+
indexTokenCountMap,
|
|
822
|
+
maxTokens: 800,
|
|
823
|
+
summary: largeSummary,
|
|
824
|
+
tokenCounter: simpleTokenCounter,
|
|
825
|
+
});
|
|
826
|
+
|
|
827
|
+
// Without summary — more recent messages fit
|
|
828
|
+
const { recentMessages: recentWithout } = buildWindowedView({
|
|
829
|
+
messages,
|
|
830
|
+
indexTokenCountMap,
|
|
831
|
+
maxTokens: 800,
|
|
832
|
+
tokenCounter: simpleTokenCounter,
|
|
833
|
+
});
|
|
834
|
+
|
|
835
|
+
// Summary takes budget, so fewer recent messages fit
|
|
836
|
+
expect(recentWithSummary.length).toBeLessThan(recentWithout.length);
|
|
837
|
+
});
|
|
838
|
+
|
|
839
|
+
it('original messages array is never mutated', () => {
|
|
840
|
+
const messages = buildConversation(15, 400);
|
|
841
|
+
const originalLength = messages.length;
|
|
842
|
+
const originalFirstContent = messages[0].content;
|
|
843
|
+
const originalLastContent = messages[messages.length - 1].content;
|
|
844
|
+
const indexTokenCountMap: Record<string, number | undefined> = {};
|
|
845
|
+
for (let i = 0; i < messages.length; i++) {
|
|
846
|
+
indexTokenCountMap[i] = simpleTokenCounter(messages[i]);
|
|
847
|
+
}
|
|
848
|
+
|
|
849
|
+
// Run compaction multiple times
|
|
850
|
+
for (let i = 0; i < 5; i++) {
|
|
851
|
+
buildWindowedView({
|
|
852
|
+
messages,
|
|
853
|
+
indexTokenCountMap,
|
|
854
|
+
maxTokens: 300,
|
|
855
|
+
summary: `Summary iteration ${i}`,
|
|
856
|
+
tokenCounter: simpleTokenCounter,
|
|
857
|
+
});
|
|
858
|
+
}
|
|
859
|
+
|
|
860
|
+
// Original array unchanged after 5 compaction runs
|
|
861
|
+
expect(messages.length).toBe(originalLength);
|
|
862
|
+
expect(messages[0].content).toBe(originalFirstContent);
|
|
863
|
+
expect(messages[messages.length - 1].content).toBe(originalLastContent);
|
|
632
864
|
});
|
|
633
865
|
});
|