@illuma-ai/agents 1.0.98 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cjs/agents/AgentContext.cjs +6 -2
- package/dist/cjs/agents/AgentContext.cjs.map +1 -1
- package/dist/cjs/common/constants.cjs +53 -0
- package/dist/cjs/common/constants.cjs.map +1 -1
- package/dist/cjs/graphs/Graph.cjs +167 -31
- package/dist/cjs/graphs/Graph.cjs.map +1 -1
- package/dist/cjs/main.cjs +14 -0
- package/dist/cjs/main.cjs.map +1 -1
- package/dist/cjs/messages/dedup.cjs +95 -0
- package/dist/cjs/messages/dedup.cjs.map +1 -0
- package/dist/cjs/tools/CodeExecutor.cjs +22 -3
- package/dist/cjs/tools/CodeExecutor.cjs.map +1 -1
- package/dist/cjs/types/graph.cjs.map +1 -1
- package/dist/cjs/utils/pruneCalibration.cjs +78 -0
- package/dist/cjs/utils/pruneCalibration.cjs.map +1 -0
- package/dist/cjs/utils/run.cjs.map +1 -1
- package/dist/cjs/utils/tokens.cjs.map +1 -1
- package/dist/cjs/utils/toolDiscoveryCache.cjs +127 -0
- package/dist/cjs/utils/toolDiscoveryCache.cjs.map +1 -0
- package/dist/esm/agents/AgentContext.mjs +6 -2
- package/dist/esm/agents/AgentContext.mjs.map +1 -1
- package/dist/esm/common/constants.mjs +48 -1
- package/dist/esm/common/constants.mjs.map +1 -1
- package/dist/esm/graphs/Graph.mjs +168 -32
- package/dist/esm/graphs/Graph.mjs.map +1 -1
- package/dist/esm/main.mjs +4 -1
- package/dist/esm/main.mjs.map +1 -1
- package/dist/esm/messages/dedup.mjs +93 -0
- package/dist/esm/messages/dedup.mjs.map +1 -0
- package/dist/esm/tools/CodeExecutor.mjs +22 -3
- package/dist/esm/tools/CodeExecutor.mjs.map +1 -1
- package/dist/esm/types/graph.mjs.map +1 -1
- package/dist/esm/utils/pruneCalibration.mjs +74 -0
- package/dist/esm/utils/pruneCalibration.mjs.map +1 -0
- package/dist/esm/utils/run.mjs.map +1 -1
- package/dist/esm/utils/tokens.mjs.map +1 -1
- package/dist/esm/utils/toolDiscoveryCache.mjs +125 -0
- package/dist/esm/utils/toolDiscoveryCache.mjs.map +1 -0
- package/dist/types/agents/AgentContext.d.ts +4 -1
- package/dist/types/common/constants.d.ts +35 -0
- package/dist/types/graphs/Graph.d.ts +25 -0
- package/dist/types/messages/dedup.d.ts +25 -0
- package/dist/types/messages/index.d.ts +1 -0
- package/dist/types/types/graph.d.ts +63 -0
- package/dist/types/utils/index.d.ts +2 -0
- package/dist/types/utils/pruneCalibration.d.ts +43 -0
- package/dist/types/utils/toolDiscoveryCache.d.ts +77 -0
- package/package.json +1 -1
- package/src/agents/AgentContext.ts +7 -0
- package/src/common/constants.ts +56 -0
- package/src/graphs/Graph.ts +220 -50
- package/src/graphs/gapFeatures.test.ts +520 -0
- package/src/graphs/nonBlockingSummarization.test.ts +307 -0
- package/src/messages/__tests__/dedup.test.ts +166 -0
- package/src/messages/dedup.ts +104 -0
- package/src/messages/index.ts +1 -0
- package/src/tools/CodeExecutor.ts +22 -3
- package/src/types/graph.ts +73 -0
- package/src/utils/__tests__/pruneCalibration.test.ts +148 -0
- package/src/utils/__tests__/toolDiscoveryCache.test.ts +214 -0
- package/src/utils/contextPressure.test.ts +24 -9
- package/src/utils/index.ts +2 -0
- package/src/utils/pruneCalibration.ts +92 -0
- package/src/utils/run.ts +108 -108
- package/src/utils/tokens.ts +118 -118
- package/src/utils/toolDiscoveryCache.ts +150 -0
package/src/types/graph.ts
CHANGED
|
@@ -488,6 +488,73 @@ export interface StructuredOutputInput {
|
|
|
488
488
|
strict?: boolean;
|
|
489
489
|
}
|
|
490
490
|
|
|
491
|
+
/**
|
|
492
|
+
* Trigger strategy for when summarization should activate.
|
|
493
|
+
* - 'contextPercentage': Trigger when context utilization exceeds a threshold percentage
|
|
494
|
+
* - 'messageCount': Trigger when pruned message count exceeds a threshold
|
|
495
|
+
* - 'tokenThreshold': Trigger when total token count exceeds a raw threshold
|
|
496
|
+
*/
|
|
497
|
+
export type SummarizationTriggerType =
|
|
498
|
+
| 'contextPercentage'
|
|
499
|
+
| 'messageCount'
|
|
500
|
+
| 'tokenThreshold';
|
|
501
|
+
|
|
502
|
+
/**
|
|
503
|
+
* Configuration for summarization behavior within the agent pipeline.
|
|
504
|
+
* All fields are optional — sensible defaults are provided via constants.
|
|
505
|
+
*
|
|
506
|
+
* @see SUMMARIZATION_CONTEXT_THRESHOLD, SUMMARIZATION_RESERVE_RATIO, PRUNING_EMA_ALPHA
|
|
507
|
+
*/
|
|
508
|
+
export interface SummarizationConfig {
|
|
509
|
+
/**
|
|
510
|
+
* Strategy for when summarization triggers.
|
|
511
|
+
* @default 'contextPercentage'
|
|
512
|
+
*/
|
|
513
|
+
triggerType?: SummarizationTriggerType;
|
|
514
|
+
|
|
515
|
+
/**
|
|
516
|
+
* Threshold value interpreted based on triggerType:
|
|
517
|
+
* - contextPercentage: 0-100 (percentage of context window)
|
|
518
|
+
* - messageCount: absolute count of messages pruned
|
|
519
|
+
* - tokenThreshold: absolute token count
|
|
520
|
+
* @default 80 (for contextPercentage)
|
|
521
|
+
*/
|
|
522
|
+
triggerThreshold?: number;
|
|
523
|
+
|
|
524
|
+
/**
|
|
525
|
+
* Fraction of context window (0-1) reserved for recent messages.
|
|
526
|
+
* Prevents over-pruning by ensuring at least this fraction of the
|
|
527
|
+
* context budget is preserved as recent conversation history.
|
|
528
|
+
* @default 0.3
|
|
529
|
+
*/
|
|
530
|
+
reserveRatio?: number;
|
|
531
|
+
|
|
532
|
+
/**
|
|
533
|
+
* Whether context pruning is enabled (can be disabled for debugging).
|
|
534
|
+
* @default true
|
|
535
|
+
*/
|
|
536
|
+
contextPruning?: boolean;
|
|
537
|
+
|
|
538
|
+
/**
|
|
539
|
+
* Initial summary text to seed across runs.
|
|
540
|
+
* Different from persistedSummary: this is provided by the caller as a
|
|
541
|
+
* cross-conversation seed (e.g., agent personality or recurring context),
|
|
542
|
+
* while persistedSummary is loaded from the conversation's own history.
|
|
543
|
+
*/
|
|
544
|
+
initialSummary?: string;
|
|
545
|
+
}
|
|
546
|
+
|
|
547
|
+
/**
|
|
548
|
+
* Runtime state for EMA-based pruning calibration.
|
|
549
|
+
* Maintained across iterations within a single run to smooth pruning decisions.
|
|
550
|
+
*/
|
|
551
|
+
export interface PruneCalibrationState {
|
|
552
|
+
/** Current EMA calibration ratio */
|
|
553
|
+
ratio: number;
|
|
554
|
+
/** Number of calibration updates applied */
|
|
555
|
+
iterations: number;
|
|
556
|
+
}
|
|
557
|
+
|
|
491
558
|
export interface AgentInputs {
|
|
492
559
|
agentId: string;
|
|
493
560
|
/** Human-readable name for the agent (used in handoff context). Defaults to agentId if not provided. */
|
|
@@ -559,4 +626,10 @@ export interface AgentInputs {
|
|
|
559
626
|
* Set by Ranger's SummaryStore when resuming a conversation.
|
|
560
627
|
*/
|
|
561
628
|
persistedSummary?: string;
|
|
629
|
+
/**
|
|
630
|
+
* Summarization configuration controlling trigger strategy, reserve ratio,
|
|
631
|
+
* and EMA calibration for pruning. When omitted, sensible defaults apply.
|
|
632
|
+
* @see SummarizationConfig
|
|
633
|
+
*/
|
|
634
|
+
summarizationConfig?: SummarizationConfig;
|
|
562
635
|
}
|
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
// src/utils/__tests__/pruneCalibration.test.ts
|
|
2
|
+
import {
|
|
3
|
+
createPruneCalibration,
|
|
4
|
+
updatePruneCalibration,
|
|
5
|
+
applyCalibration,
|
|
6
|
+
} from '../pruneCalibration';
|
|
7
|
+
import {
|
|
8
|
+
PRUNING_INITIAL_CALIBRATION,
|
|
9
|
+
PRUNING_EMA_ALPHA,
|
|
10
|
+
} from '@/common/constants';
|
|
11
|
+
|
|
12
|
+
describe('pruneCalibration', () => {
|
|
13
|
+
describe('createPruneCalibration', () => {
|
|
14
|
+
it('creates initial state with default ratio', () => {
|
|
15
|
+
const state = createPruneCalibration();
|
|
16
|
+
expect(state.ratio).toBe(PRUNING_INITIAL_CALIBRATION);
|
|
17
|
+
expect(state.iterations).toBe(0);
|
|
18
|
+
});
|
|
19
|
+
|
|
20
|
+
it('accepts custom initial ratio', () => {
|
|
21
|
+
const state = createPruneCalibration(0.85);
|
|
22
|
+
expect(state.ratio).toBe(0.85);
|
|
23
|
+
expect(state.iterations).toBe(0);
|
|
24
|
+
});
|
|
25
|
+
});
|
|
26
|
+
|
|
27
|
+
describe('updatePruneCalibration', () => {
|
|
28
|
+
it('adjusts ratio when actual > estimated (over-counting)', () => {
|
|
29
|
+
const state = createPruneCalibration();
|
|
30
|
+
// Actual: 1000 tokens, estimated: 1500 tokens (our counter over-estimates)
|
|
31
|
+
// observedRatio = 1500/1000 = 1.5
|
|
32
|
+
// newRatio = 0.3 * 1.5 + 0.7 * 1.0 = 0.45 + 0.7 = 1.15
|
|
33
|
+
const updated = updatePruneCalibration(state, 1000, 1500);
|
|
34
|
+
expect(updated.ratio).toBeCloseTo(1.15, 2);
|
|
35
|
+
expect(updated.iterations).toBe(1);
|
|
36
|
+
});
|
|
37
|
+
|
|
38
|
+
it('adjusts ratio when actual < estimated (under-counting)', () => {
|
|
39
|
+
const state = createPruneCalibration();
|
|
40
|
+
// Actual: 2000 tokens, estimated: 1000 tokens (our counter under-estimates)
|
|
41
|
+
// observedRatio = 1000/2000 = 0.5
|
|
42
|
+
// newRatio = 0.3 * 0.5 + 0.7 * 1.0 = 0.15 + 0.7 = 0.85
|
|
43
|
+
const updated = updatePruneCalibration(state, 2000, 1000);
|
|
44
|
+
expect(updated.ratio).toBeCloseTo(0.85, 2);
|
|
45
|
+
expect(updated.iterations).toBe(1);
|
|
46
|
+
});
|
|
47
|
+
|
|
48
|
+
it('converges with consistent readings', () => {
|
|
49
|
+
let state = createPruneCalibration();
|
|
50
|
+
|
|
51
|
+
// Simulate 10 iterations where actual is consistently 1.5x estimated
|
|
52
|
+
for (let i = 0; i < 10; i++) {
|
|
53
|
+
state = updatePruneCalibration(state, 1500, 1000);
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
// Should converge toward ~0.667 (estimated/actual = 1000/1500)
|
|
57
|
+
expect(state.ratio).toBeCloseTo(0.667, 1);
|
|
58
|
+
expect(state.iterations).toBe(10);
|
|
59
|
+
});
|
|
60
|
+
|
|
61
|
+
it('clamps extreme ratios to prevent wild adjustments', () => {
|
|
62
|
+
const state = createPruneCalibration();
|
|
63
|
+
|
|
64
|
+
// Extreme case: estimated 10x actual (should be clamped to 2.0)
|
|
65
|
+
const updated = updatePruneCalibration(state, 100, 10000);
|
|
66
|
+
// Clamped observedRatio = 2.0
|
|
67
|
+
// newRatio = 0.3 * 2.0 + 0.7 * 1.0 = 0.6 + 0.7 = 1.3
|
|
68
|
+
expect(updated.ratio).toBeCloseTo(1.3, 2);
|
|
69
|
+
});
|
|
70
|
+
|
|
71
|
+
it('does not update with invalid inputs', () => {
|
|
72
|
+
const state = createPruneCalibration();
|
|
73
|
+
|
|
74
|
+
expect(updatePruneCalibration(state, 0, 1000)).toBe(state);
|
|
75
|
+
expect(updatePruneCalibration(state, 1000, 0)).toBe(state);
|
|
76
|
+
expect(updatePruneCalibration(state, -1, 1000)).toBe(state);
|
|
77
|
+
});
|
|
78
|
+
|
|
79
|
+
it('does not mutate input state', () => {
|
|
80
|
+
const state = createPruneCalibration();
|
|
81
|
+
const original = { ...state };
|
|
82
|
+
|
|
83
|
+
updatePruneCalibration(state, 1000, 1500);
|
|
84
|
+
expect(state).toEqual(original);
|
|
85
|
+
});
|
|
86
|
+
|
|
87
|
+
it('accepts custom alpha', () => {
|
|
88
|
+
const state = createPruneCalibration();
|
|
89
|
+
// With alpha=1.0, fully adapts to new reading
|
|
90
|
+
const updated = updatePruneCalibration(state, 1000, 1500, 1.0);
|
|
91
|
+
// observedRatio = 1.5, clamped to 1.5
|
|
92
|
+
// newRatio = 1.0 * 1.5 + 0.0 * 1.0 = 1.5
|
|
93
|
+
expect(updated.ratio).toBeCloseTo(1.5, 2);
|
|
94
|
+
});
|
|
95
|
+
});
|
|
96
|
+
|
|
97
|
+
describe('applyCalibration', () => {
|
|
98
|
+
it('returns raw budget when no iterations have occurred', () => {
|
|
99
|
+
const state = createPruneCalibration();
|
|
100
|
+
expect(applyCalibration(10000, state)).toBe(10000);
|
|
101
|
+
});
|
|
102
|
+
|
|
103
|
+
it('adjusts budget after calibration', () => {
|
|
104
|
+
let state = createPruneCalibration();
|
|
105
|
+
state = updatePruneCalibration(state, 1000, 1500);
|
|
106
|
+
// ratio ≈ 1.15, so budget is increased (our counter over-estimates)
|
|
107
|
+
const adjusted = applyCalibration(10000, state);
|
|
108
|
+
expect(adjusted).toBeCloseTo(11500, -2);
|
|
109
|
+
});
|
|
110
|
+
|
|
111
|
+
it('decreases budget when under-counting', () => {
|
|
112
|
+
let state = createPruneCalibration();
|
|
113
|
+
state = updatePruneCalibration(state, 2000, 1000);
|
|
114
|
+
// ratio ≈ 0.85, so budget is decreased (our counter under-estimates)
|
|
115
|
+
const adjusted = applyCalibration(10000, state);
|
|
116
|
+
expect(adjusted).toBeLessThan(10000);
|
|
117
|
+
});
|
|
118
|
+
|
|
119
|
+
it('returns floor of the adjusted value', () => {
|
|
120
|
+
let state = createPruneCalibration();
|
|
121
|
+
state = updatePruneCalibration(state, 1000, 1500);
|
|
122
|
+
const adjusted = applyCalibration(10001, state);
|
|
123
|
+
expect(Number.isInteger(adjusted)).toBe(true);
|
|
124
|
+
});
|
|
125
|
+
});
|
|
126
|
+
|
|
127
|
+
describe('multi-iteration convergence', () => {
|
|
128
|
+
it('smoothly transitions when accuracy changes', () => {
|
|
129
|
+
let state = createPruneCalibration();
|
|
130
|
+
|
|
131
|
+
// First 5 iterations: estimated is 1.5x actual
|
|
132
|
+
for (let i = 0; i < 5; i++) {
|
|
133
|
+
state = updatePruneCalibration(state, 1000, 1500);
|
|
134
|
+
}
|
|
135
|
+
const ratio5 = state.ratio;
|
|
136
|
+
|
|
137
|
+
// Next 5 iterations: estimated matches actual
|
|
138
|
+
for (let i = 0; i < 5; i++) {
|
|
139
|
+
state = updatePruneCalibration(state, 1000, 1000);
|
|
140
|
+
}
|
|
141
|
+
const ratio10 = state.ratio;
|
|
142
|
+
|
|
143
|
+
// Ratio should move toward 1.0 but still carry some history
|
|
144
|
+
expect(ratio10).toBeLessThan(ratio5);
|
|
145
|
+
expect(ratio10).toBeGreaterThan(0.9);
|
|
146
|
+
});
|
|
147
|
+
});
|
|
148
|
+
});
|
|
@@ -0,0 +1,214 @@
|
|
|
1
|
+
// src/utils/__tests__/toolDiscoveryCache.test.ts
|
|
2
|
+
import {
|
|
3
|
+
ToolMessage,
|
|
4
|
+
AIMessageChunk,
|
|
5
|
+
HumanMessage,
|
|
6
|
+
SystemMessage,
|
|
7
|
+
} from '@langchain/core/messages';
|
|
8
|
+
import type { BaseMessage } from '@langchain/core/messages';
|
|
9
|
+
import { ToolDiscoveryCache } from '../toolDiscoveryCache';
|
|
10
|
+
import { Constants } from '@/common';
|
|
11
|
+
|
|
12
|
+
/**
|
|
13
|
+
* Creates a mock tool_search result message.
|
|
14
|
+
*/
|
|
15
|
+
function createToolSearchResult(
|
|
16
|
+
toolNames: string[],
|
|
17
|
+
toolCallId: string = 'tc_1'
|
|
18
|
+
): ToolMessage {
|
|
19
|
+
return new ToolMessage({
|
|
20
|
+
content: `Found ${toolNames.length} tools`,
|
|
21
|
+
tool_call_id: toolCallId,
|
|
22
|
+
name: Constants.TOOL_SEARCH,
|
|
23
|
+
artifact: {
|
|
24
|
+
tool_references: toolNames.map((name) => ({ tool_name: name })),
|
|
25
|
+
},
|
|
26
|
+
});
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
/**
|
|
30
|
+
* Creates a mock AI message with tool calls.
|
|
31
|
+
*/
|
|
32
|
+
function createAIWithToolCalls(toolCallIds: string[]): AIMessageChunk {
|
|
33
|
+
return new AIMessageChunk({
|
|
34
|
+
content: 'I will search for tools',
|
|
35
|
+
tool_calls: toolCallIds.map((id) => ({
|
|
36
|
+
id,
|
|
37
|
+
name: Constants.TOOL_SEARCH,
|
|
38
|
+
args: { query: 'test' },
|
|
39
|
+
})),
|
|
40
|
+
});
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
describe('ToolDiscoveryCache', () => {
|
|
44
|
+
let cache: ToolDiscoveryCache;
|
|
45
|
+
|
|
46
|
+
beforeEach(() => {
|
|
47
|
+
cache = new ToolDiscoveryCache();
|
|
48
|
+
});
|
|
49
|
+
|
|
50
|
+
describe('getNewDiscoveries', () => {
|
|
51
|
+
it('returns empty array for empty messages', () => {
|
|
52
|
+
expect(cache.getNewDiscoveries([])).toEqual([]);
|
|
53
|
+
});
|
|
54
|
+
|
|
55
|
+
it('discovers tools from tool_search results', () => {
|
|
56
|
+
const messages: BaseMessage[] = [
|
|
57
|
+
new SystemMessage('You are helpful'),
|
|
58
|
+
new HumanMessage('Find tools'),
|
|
59
|
+
createAIWithToolCalls(['tc_1']),
|
|
60
|
+
createToolSearchResult(['web_search', 'file_read'], 'tc_1'),
|
|
61
|
+
];
|
|
62
|
+
|
|
63
|
+
const result = cache.getNewDiscoveries(messages);
|
|
64
|
+
expect(result).toEqual(['web_search', 'file_read']);
|
|
65
|
+
expect(cache.size).toBe(2);
|
|
66
|
+
});
|
|
67
|
+
|
|
68
|
+
it('only scans new messages on subsequent calls', () => {
|
|
69
|
+
const messages: BaseMessage[] = [
|
|
70
|
+
new HumanMessage('msg1'),
|
|
71
|
+
createAIWithToolCalls(['tc_1']),
|
|
72
|
+
createToolSearchResult(['tool_a'], 'tc_1'),
|
|
73
|
+
];
|
|
74
|
+
|
|
75
|
+
// First scan
|
|
76
|
+
const first = cache.getNewDiscoveries(messages);
|
|
77
|
+
expect(first).toEqual(['tool_a']);
|
|
78
|
+
|
|
79
|
+
// Add more messages
|
|
80
|
+
messages.push(
|
|
81
|
+
new HumanMessage('msg2'),
|
|
82
|
+
createAIWithToolCalls(['tc_2']),
|
|
83
|
+
createToolSearchResult(['tool_b'], 'tc_2')
|
|
84
|
+
);
|
|
85
|
+
|
|
86
|
+
// Second scan: only finds tool_b (tool_a already cached)
|
|
87
|
+
const second = cache.getNewDiscoveries(messages);
|
|
88
|
+
expect(second).toEqual(['tool_b']);
|
|
89
|
+
expect(cache.size).toBe(2);
|
|
90
|
+
});
|
|
91
|
+
|
|
92
|
+
it('deduplicates tool names across scans', () => {
|
|
93
|
+
const messages: BaseMessage[] = [
|
|
94
|
+
createAIWithToolCalls(['tc_1']),
|
|
95
|
+
createToolSearchResult(['tool_a', 'tool_b'], 'tc_1'),
|
|
96
|
+
];
|
|
97
|
+
|
|
98
|
+
cache.getNewDiscoveries(messages);
|
|
99
|
+
|
|
100
|
+
// Add another search that returns tool_a again
|
|
101
|
+
messages.push(
|
|
102
|
+
createAIWithToolCalls(['tc_2']),
|
|
103
|
+
createToolSearchResult(['tool_a', 'tool_c'], 'tc_2')
|
|
104
|
+
);
|
|
105
|
+
|
|
106
|
+
const second = cache.getNewDiscoveries(messages);
|
|
107
|
+
// tool_a is already cached, only tool_c is new
|
|
108
|
+
expect(second).toEqual(['tool_c']);
|
|
109
|
+
expect(cache.size).toBe(3);
|
|
110
|
+
});
|
|
111
|
+
|
|
112
|
+
it('ignores non-tool-search tool messages', () => {
|
|
113
|
+
const messages: BaseMessage[] = [
|
|
114
|
+
createAIWithToolCalls(['tc_1']),
|
|
115
|
+
new ToolMessage({
|
|
116
|
+
content: 'result',
|
|
117
|
+
tool_call_id: 'tc_1',
|
|
118
|
+
name: 'some_other_tool',
|
|
119
|
+
}),
|
|
120
|
+
];
|
|
121
|
+
|
|
122
|
+
const result = cache.getNewDiscoveries(messages);
|
|
123
|
+
expect(result).toEqual([]);
|
|
124
|
+
});
|
|
125
|
+
|
|
126
|
+
it('returns empty when no new messages since last scan', () => {
|
|
127
|
+
const messages: BaseMessage[] = [
|
|
128
|
+
createAIWithToolCalls(['tc_1']),
|
|
129
|
+
createToolSearchResult(['tool_a'], 'tc_1'),
|
|
130
|
+
];
|
|
131
|
+
|
|
132
|
+
cache.getNewDiscoveries(messages);
|
|
133
|
+
// No new messages added
|
|
134
|
+
const second = cache.getNewDiscoveries(messages);
|
|
135
|
+
expect(second).toEqual([]);
|
|
136
|
+
});
|
|
137
|
+
});
|
|
138
|
+
|
|
139
|
+
describe('has', () => {
|
|
140
|
+
it('returns true for discovered tools', () => {
|
|
141
|
+
const messages: BaseMessage[] = [
|
|
142
|
+
createAIWithToolCalls(['tc_1']),
|
|
143
|
+
createToolSearchResult(['tool_a'], 'tc_1'),
|
|
144
|
+
];
|
|
145
|
+
|
|
146
|
+
cache.getNewDiscoveries(messages);
|
|
147
|
+
expect(cache.has('tool_a')).toBe(true);
|
|
148
|
+
expect(cache.has('tool_b')).toBe(false);
|
|
149
|
+
});
|
|
150
|
+
});
|
|
151
|
+
|
|
152
|
+
describe('getAllDiscoveredTools', () => {
|
|
153
|
+
it('returns all discovered tool names', () => {
|
|
154
|
+
const messages: BaseMessage[] = [
|
|
155
|
+
createAIWithToolCalls(['tc_1']),
|
|
156
|
+
createToolSearchResult(['tool_a', 'tool_b'], 'tc_1'),
|
|
157
|
+
];
|
|
158
|
+
|
|
159
|
+
cache.getNewDiscoveries(messages);
|
|
160
|
+
expect(cache.getAllDiscoveredTools()).toEqual(
|
|
161
|
+
expect.arrayContaining(['tool_a', 'tool_b'])
|
|
162
|
+
);
|
|
163
|
+
});
|
|
164
|
+
});
|
|
165
|
+
|
|
166
|
+
describe('seed', () => {
|
|
167
|
+
it('pre-populates the cache with known tool names', () => {
|
|
168
|
+
cache.seed(['tool_x', 'tool_y']);
|
|
169
|
+
expect(cache.size).toBe(2);
|
|
170
|
+
expect(cache.has('tool_x')).toBe(true);
|
|
171
|
+
expect(cache.has('tool_y')).toBe(true);
|
|
172
|
+
});
|
|
173
|
+
|
|
174
|
+
it('seeded tools are treated as already discovered', () => {
|
|
175
|
+
cache.seed(['tool_a']);
|
|
176
|
+
|
|
177
|
+
const messages: BaseMessage[] = [
|
|
178
|
+
createAIWithToolCalls(['tc_1']),
|
|
179
|
+
createToolSearchResult(['tool_a', 'tool_b'], 'tc_1'),
|
|
180
|
+
];
|
|
181
|
+
|
|
182
|
+
// tool_a is already seeded, only tool_b should be new
|
|
183
|
+
const result = cache.getNewDiscoveries(messages);
|
|
184
|
+
expect(result).toEqual(['tool_b']);
|
|
185
|
+
});
|
|
186
|
+
});
|
|
187
|
+
|
|
188
|
+
describe('reset', () => {
|
|
189
|
+
it('clears all state', () => {
|
|
190
|
+
cache.seed(['tool_a']);
|
|
191
|
+
expect(cache.size).toBe(1);
|
|
192
|
+
|
|
193
|
+
cache.reset();
|
|
194
|
+
expect(cache.size).toBe(0);
|
|
195
|
+
expect(cache.has('tool_a')).toBe(false);
|
|
196
|
+
});
|
|
197
|
+
|
|
198
|
+
it('allows re-discovery after reset', () => {
|
|
199
|
+
const messages: BaseMessage[] = [
|
|
200
|
+
createAIWithToolCalls(['tc_1']),
|
|
201
|
+
createToolSearchResult(['tool_a'], 'tc_1'),
|
|
202
|
+
];
|
|
203
|
+
|
|
204
|
+
cache.getNewDiscoveries(messages);
|
|
205
|
+
expect(cache.size).toBe(1);
|
|
206
|
+
|
|
207
|
+
cache.reset();
|
|
208
|
+
|
|
209
|
+
// Same messages should produce discoveries again
|
|
210
|
+
const result = cache.getNewDiscoveries(messages);
|
|
211
|
+
expect(result).toEqual(['tool_a']);
|
|
212
|
+
});
|
|
213
|
+
});
|
|
214
|
+
});
|
|
@@ -1,4 +1,8 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import {
|
|
2
|
+
HumanMessage,
|
|
3
|
+
AIMessage,
|
|
4
|
+
SystemMessage,
|
|
5
|
+
} from '@langchain/core/messages';
|
|
2
6
|
import { MULTI_DOCUMENT_THRESHOLD } from '@/common/constants';
|
|
3
7
|
import {
|
|
4
8
|
detectDocuments,
|
|
@@ -83,9 +87,7 @@ describe('detectDocuments', () => {
|
|
|
83
87
|
it('ignores non-human messages with document patterns', () => {
|
|
84
88
|
// detectDocuments scans ALL messages — AI messages with doc patterns
|
|
85
89
|
// should still be detected (they may contain tool results with docs)
|
|
86
|
-
const messages = [
|
|
87
|
-
new AIMessage('Found: # "results.csv"\nData here'),
|
|
88
|
-
];
|
|
90
|
+
const messages = [new AIMessage('Found: # "results.csv"\nData here')];
|
|
89
91
|
const result = detectDocuments(messages);
|
|
90
92
|
expect(result.count).toBe(1);
|
|
91
93
|
});
|
|
@@ -111,19 +113,27 @@ describe('detectDocuments', () => {
|
|
|
111
113
|
|
|
112
114
|
describe('shouldInjectMultiDocHint', () => {
|
|
113
115
|
it('returns true when document count meets threshold and no AI response', () => {
|
|
114
|
-
expect(shouldInjectMultiDocHint(MULTI_DOCUMENT_THRESHOLD, false)).toBe(
|
|
116
|
+
expect(shouldInjectMultiDocHint(MULTI_DOCUMENT_THRESHOLD, false)).toBe(
|
|
117
|
+
true
|
|
118
|
+
);
|
|
115
119
|
});
|
|
116
120
|
|
|
117
121
|
it('returns true when document count exceeds threshold', () => {
|
|
118
|
-
expect(shouldInjectMultiDocHint(MULTI_DOCUMENT_THRESHOLD + 5, false)).toBe(
|
|
122
|
+
expect(shouldInjectMultiDocHint(MULTI_DOCUMENT_THRESHOLD + 5, false)).toBe(
|
|
123
|
+
true
|
|
124
|
+
);
|
|
119
125
|
});
|
|
120
126
|
|
|
121
127
|
it('returns false when document count is below threshold', () => {
|
|
122
|
-
expect(shouldInjectMultiDocHint(MULTI_DOCUMENT_THRESHOLD - 1, false)).toBe(
|
|
128
|
+
expect(shouldInjectMultiDocHint(MULTI_DOCUMENT_THRESHOLD - 1, false)).toBe(
|
|
129
|
+
false
|
|
130
|
+
);
|
|
123
131
|
});
|
|
124
132
|
|
|
125
133
|
it('returns false when AI has already responded', () => {
|
|
126
|
-
expect(shouldInjectMultiDocHint(MULTI_DOCUMENT_THRESHOLD, true)).toBe(
|
|
134
|
+
expect(shouldInjectMultiDocHint(MULTI_DOCUMENT_THRESHOLD, true)).toBe(
|
|
135
|
+
false
|
|
136
|
+
);
|
|
127
137
|
});
|
|
128
138
|
|
|
129
139
|
it('returns false with zero documents', () => {
|
|
@@ -143,7 +153,12 @@ describe('shouldInjectMultiDocHint', () => {
|
|
|
143
153
|
|
|
144
154
|
describe('buildMultiDocHintContent', () => {
|
|
145
155
|
it('includes document count in header', () => {
|
|
146
|
-
const content = buildMultiDocHintContent(4, [
|
|
156
|
+
const content = buildMultiDocHintContent(4, [
|
|
157
|
+
'a.pdf',
|
|
158
|
+
'b.pdf',
|
|
159
|
+
'c.pdf',
|
|
160
|
+
'd.pdf',
|
|
161
|
+
]);
|
|
147
162
|
expect(content).toContain('4 documents detected');
|
|
148
163
|
});
|
|
149
164
|
|
package/src/utils/index.ts
CHANGED
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
// src/utils/pruneCalibration.ts
|
|
2
|
+
import type { PruneCalibrationState } from '@/types/graph';
|
|
3
|
+
import {
|
|
4
|
+
PRUNING_EMA_ALPHA,
|
|
5
|
+
PRUNING_INITIAL_CALIBRATION,
|
|
6
|
+
} from '@/common/constants';
|
|
7
|
+
|
|
8
|
+
/**
|
|
9
|
+
* Creates an initial pruning calibration state.
|
|
10
|
+
*
|
|
11
|
+
* @param initialRatio - Starting calibration ratio (default: 1.0)
|
|
12
|
+
* @returns Fresh calibration state
|
|
13
|
+
*/
|
|
14
|
+
export function createPruneCalibration(
|
|
15
|
+
initialRatio?: number
|
|
16
|
+
): PruneCalibrationState {
|
|
17
|
+
return {
|
|
18
|
+
ratio: initialRatio ?? PRUNING_INITIAL_CALIBRATION,
|
|
19
|
+
iterations: 0,
|
|
20
|
+
};
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
/**
|
|
24
|
+
* Updates the pruning calibration using Exponential Moving Average (EMA).
|
|
25
|
+
*
|
|
26
|
+
* Problem: Without calibration, the pruner's token estimates can diverge from
|
|
27
|
+
* reality across iterations, causing either:
|
|
28
|
+
* - Over-pruning (context cliff): Too many messages removed at once, losing critical tool results
|
|
29
|
+
* - Under-pruning: Not enough messages removed, hitting hard token limits
|
|
30
|
+
*
|
|
31
|
+
* Solution: Track the ratio between actual token usage (from API response) and
|
|
32
|
+
* estimated token usage (from our token counter). Apply EMA smoothing so the
|
|
33
|
+
* calibration adjusts gradually, preventing oscillation.
|
|
34
|
+
*
|
|
35
|
+
* The calibration ratio is applied to maxTokens in the pruner:
|
|
36
|
+
* effectiveMaxTokens = maxTokens * calibrationRatio
|
|
37
|
+
*
|
|
38
|
+
* If actual > estimated → ratio decreases → prune more aggressively
|
|
39
|
+
* If actual < estimated → ratio increases → prune less aggressively
|
|
40
|
+
*
|
|
41
|
+
* @param state - Current calibration state
|
|
42
|
+
* @param actualTokens - Actual token count from API response (UsageMetadata)
|
|
43
|
+
* @param estimatedTokens - Estimated token count from token counter
|
|
44
|
+
* @param alpha - EMA smoothing factor (default: PRUNING_EMA_ALPHA)
|
|
45
|
+
* @returns Updated calibration state (new object, does not mutate input)
|
|
46
|
+
*/
|
|
47
|
+
export function updatePruneCalibration(
|
|
48
|
+
state: PruneCalibrationState,
|
|
49
|
+
actualTokens: number,
|
|
50
|
+
estimatedTokens: number,
|
|
51
|
+
alpha: number = PRUNING_EMA_ALPHA
|
|
52
|
+
): PruneCalibrationState {
|
|
53
|
+
// Guard against division by zero or invalid inputs
|
|
54
|
+
if (estimatedTokens <= 0 || actualTokens <= 0) {
|
|
55
|
+
return state;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
// Raw ratio: how much our estimate differs from reality
|
|
59
|
+
const observedRatio = estimatedTokens / actualTokens;
|
|
60
|
+
|
|
61
|
+
// Clamp to prevent extreme adjustments from outlier readings
|
|
62
|
+
// Range [0.5, 2.0] means we never more than double or halve the budget
|
|
63
|
+
const clampedRatio = Math.max(0.5, Math.min(2.0, observedRatio));
|
|
64
|
+
|
|
65
|
+
// Apply EMA: new_ratio = α * observed + (1 - α) * previous
|
|
66
|
+
const newRatio = alpha * clampedRatio + (1 - alpha) * state.ratio;
|
|
67
|
+
|
|
68
|
+
return {
|
|
69
|
+
ratio: newRatio,
|
|
70
|
+
iterations: state.iterations + 1,
|
|
71
|
+
};
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
/**
|
|
75
|
+
* Applies the calibration ratio to a max token budget.
|
|
76
|
+
* The ratio adjusts the effective budget so pruning is more or less aggressive
|
|
77
|
+
* based on observed vs. estimated token divergence.
|
|
78
|
+
*
|
|
79
|
+
* @param maxTokens - Raw max token budget
|
|
80
|
+
* @param state - Current calibration state
|
|
81
|
+
* @returns Adjusted max token budget
|
|
82
|
+
*/
|
|
83
|
+
export function applyCalibration(
|
|
84
|
+
maxTokens: number,
|
|
85
|
+
state: PruneCalibrationState
|
|
86
|
+
): number {
|
|
87
|
+
if (state.iterations === 0) {
|
|
88
|
+
// No calibration data yet — use raw budget
|
|
89
|
+
return maxTokens;
|
|
90
|
+
}
|
|
91
|
+
return Math.floor(maxTokens * state.ratio);
|
|
92
|
+
}
|