@illuma-ai/agents 1.0.98 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. package/dist/cjs/agents/AgentContext.cjs +6 -2
  2. package/dist/cjs/agents/AgentContext.cjs.map +1 -1
  3. package/dist/cjs/common/constants.cjs +53 -0
  4. package/dist/cjs/common/constants.cjs.map +1 -1
  5. package/dist/cjs/graphs/Graph.cjs +195 -31
  6. package/dist/cjs/graphs/Graph.cjs.map +1 -1
  7. package/dist/cjs/main.cjs +14 -0
  8. package/dist/cjs/main.cjs.map +1 -1
  9. package/dist/cjs/messages/dedup.cjs +95 -0
  10. package/dist/cjs/messages/dedup.cjs.map +1 -0
  11. package/dist/cjs/tools/CodeExecutor.cjs +22 -3
  12. package/dist/cjs/tools/CodeExecutor.cjs.map +1 -1
  13. package/dist/cjs/types/graph.cjs.map +1 -1
  14. package/dist/cjs/utils/pruneCalibration.cjs +78 -0
  15. package/dist/cjs/utils/pruneCalibration.cjs.map +1 -0
  16. package/dist/cjs/utils/run.cjs.map +1 -1
  17. package/dist/cjs/utils/tokens.cjs.map +1 -1
  18. package/dist/cjs/utils/toolDiscoveryCache.cjs +127 -0
  19. package/dist/cjs/utils/toolDiscoveryCache.cjs.map +1 -0
  20. package/dist/esm/agents/AgentContext.mjs +6 -2
  21. package/dist/esm/agents/AgentContext.mjs.map +1 -1
  22. package/dist/esm/common/constants.mjs +48 -1
  23. package/dist/esm/common/constants.mjs.map +1 -1
  24. package/dist/esm/graphs/Graph.mjs +196 -32
  25. package/dist/esm/graphs/Graph.mjs.map +1 -1
  26. package/dist/esm/main.mjs +4 -1
  27. package/dist/esm/main.mjs.map +1 -1
  28. package/dist/esm/messages/dedup.mjs +93 -0
  29. package/dist/esm/messages/dedup.mjs.map +1 -0
  30. package/dist/esm/tools/CodeExecutor.mjs +22 -3
  31. package/dist/esm/tools/CodeExecutor.mjs.map +1 -1
  32. package/dist/esm/types/graph.mjs.map +1 -1
  33. package/dist/esm/utils/pruneCalibration.mjs +74 -0
  34. package/dist/esm/utils/pruneCalibration.mjs.map +1 -0
  35. package/dist/esm/utils/run.mjs.map +1 -1
  36. package/dist/esm/utils/tokens.mjs.map +1 -1
  37. package/dist/esm/utils/toolDiscoveryCache.mjs +125 -0
  38. package/dist/esm/utils/toolDiscoveryCache.mjs.map +1 -0
  39. package/dist/types/agents/AgentContext.d.ts +4 -1
  40. package/dist/types/common/constants.d.ts +35 -0
  41. package/dist/types/graphs/Graph.d.ts +34 -0
  42. package/dist/types/messages/dedup.d.ts +25 -0
  43. package/dist/types/messages/index.d.ts +1 -0
  44. package/dist/types/types/graph.d.ts +63 -0
  45. package/dist/types/utils/index.d.ts +2 -0
  46. package/dist/types/utils/pruneCalibration.d.ts +43 -0
  47. package/dist/types/utils/toolDiscoveryCache.d.ts +77 -0
  48. package/package.json +1 -1
  49. package/src/agents/AgentContext.ts +7 -0
  50. package/src/common/constants.ts +56 -0
  51. package/src/graphs/Graph.ts +250 -50
  52. package/src/graphs/gapFeatures.test.ts +520 -0
  53. package/src/graphs/nonBlockingSummarization.test.ts +307 -0
  54. package/src/messages/__tests__/dedup.test.ts +166 -0
  55. package/src/messages/dedup.ts +104 -0
  56. package/src/messages/index.ts +1 -0
  57. package/src/tools/CodeExecutor.ts +22 -3
  58. package/src/types/graph.ts +73 -0
  59. package/src/utils/__tests__/pruneCalibration.test.ts +148 -0
  60. package/src/utils/__tests__/toolDiscoveryCache.test.ts +214 -0
  61. package/src/utils/contextPressure.test.ts +24 -9
  62. package/src/utils/index.ts +2 -0
  63. package/src/utils/pruneCalibration.ts +92 -0
  64. package/src/utils/run.ts +108 -108
  65. package/src/utils/tokens.ts +118 -118
  66. package/src/utils/toolDiscoveryCache.ts +150 -0
@@ -488,6 +488,73 @@ export interface StructuredOutputInput {
488
488
  strict?: boolean;
489
489
  }
490
490
 
491
+ /**
492
+ * Trigger strategy for when summarization should activate.
493
+ * - 'contextPercentage': Trigger when context utilization exceeds a threshold percentage
494
+ * - 'messageCount': Trigger when pruned message count exceeds a threshold
495
+ * - 'tokenThreshold': Trigger when total token count exceeds a raw threshold
496
+ */
497
+ export type SummarizationTriggerType =
498
+ | 'contextPercentage'
499
+ | 'messageCount'
500
+ | 'tokenThreshold';
501
+
502
+ /**
503
+ * Configuration for summarization behavior within the agent pipeline.
504
+ * All fields are optional — sensible defaults are provided via constants.
505
+ *
506
+ * @see SUMMARIZATION_CONTEXT_THRESHOLD, SUMMARIZATION_RESERVE_RATIO, PRUNING_EMA_ALPHA
507
+ */
508
+ export interface SummarizationConfig {
509
+ /**
510
+ * Strategy for when summarization triggers.
511
+ * @default 'contextPercentage'
512
+ */
513
+ triggerType?: SummarizationTriggerType;
514
+
515
+ /**
516
+ * Threshold value interpreted based on triggerType:
517
+ * - contextPercentage: 0-100 (percentage of context window)
518
+ * - messageCount: absolute count of messages pruned
519
+ * - tokenThreshold: absolute token count
520
+ * @default 80 (for contextPercentage)
521
+ */
522
+ triggerThreshold?: number;
523
+
524
+ /**
525
+ * Fraction of context window (0-1) reserved for recent messages.
526
+ * Prevents over-pruning by ensuring at least this fraction of the
527
+ * context budget is preserved as recent conversation history.
528
+ * @default 0.3
529
+ */
530
+ reserveRatio?: number;
531
+
532
+ /**
533
+ * Whether context pruning is enabled (can be disabled for debugging).
534
+ * @default true
535
+ */
536
+ contextPruning?: boolean;
537
+
538
+ /**
539
+ * Initial summary text to seed across runs.
540
+ * Different from persistedSummary: this is provided by the caller as a
541
+ * cross-conversation seed (e.g., agent personality or recurring context),
542
+ * while persistedSummary is loaded from the conversation's own history.
543
+ */
544
+ initialSummary?: string;
545
+ }
546
+
547
+ /**
548
+ * Runtime state for EMA-based pruning calibration.
549
+ * Maintained across iterations within a single run to smooth pruning decisions.
550
+ */
551
+ export interface PruneCalibrationState {
552
+ /** Current EMA calibration ratio */
553
+ ratio: number;
554
+ /** Number of calibration updates applied */
555
+ iterations: number;
556
+ }
557
+
491
558
  export interface AgentInputs {
492
559
  agentId: string;
493
560
  /** Human-readable name for the agent (used in handoff context). Defaults to agentId if not provided. */
@@ -559,4 +626,10 @@ export interface AgentInputs {
559
626
  * Set by Ranger's SummaryStore when resuming a conversation.
560
627
  */
561
628
  persistedSummary?: string;
629
+ /**
630
+ * Summarization configuration controlling trigger strategy, reserve ratio,
631
+ * and EMA calibration for pruning. When omitted, sensible defaults apply.
632
+ * @see SummarizationConfig
633
+ */
634
+ summarizationConfig?: SummarizationConfig;
562
635
  }
@@ -0,0 +1,148 @@
1
+ // src/utils/__tests__/pruneCalibration.test.ts
2
+ import {
3
+ createPruneCalibration,
4
+ updatePruneCalibration,
5
+ applyCalibration,
6
+ } from '../pruneCalibration';
7
+ import {
8
+ PRUNING_INITIAL_CALIBRATION,
9
+ PRUNING_EMA_ALPHA,
10
+ } from '@/common/constants';
11
+
12
+ describe('pruneCalibration', () => {
13
+ describe('createPruneCalibration', () => {
14
+ it('creates initial state with default ratio', () => {
15
+ const state = createPruneCalibration();
16
+ expect(state.ratio).toBe(PRUNING_INITIAL_CALIBRATION);
17
+ expect(state.iterations).toBe(0);
18
+ });
19
+
20
+ it('accepts custom initial ratio', () => {
21
+ const state = createPruneCalibration(0.85);
22
+ expect(state.ratio).toBe(0.85);
23
+ expect(state.iterations).toBe(0);
24
+ });
25
+ });
26
+
27
+ describe('updatePruneCalibration', () => {
28
+ it('adjusts ratio when actual > estimated (over-counting)', () => {
29
+ const state = createPruneCalibration();
30
+ // Actual: 1000 tokens, estimated: 1500 tokens (our counter over-estimates)
31
+ // observedRatio = 1500/1000 = 1.5
32
+ // newRatio = 0.3 * 1.5 + 0.7 * 1.0 = 0.45 + 0.7 = 1.15
33
+ const updated = updatePruneCalibration(state, 1000, 1500);
34
+ expect(updated.ratio).toBeCloseTo(1.15, 2);
35
+ expect(updated.iterations).toBe(1);
36
+ });
37
+
38
+ it('adjusts ratio when actual < estimated (under-counting)', () => {
39
+ const state = createPruneCalibration();
40
+ // Actual: 2000 tokens, estimated: 1000 tokens (our counter under-estimates)
41
+ // observedRatio = 1000/2000 = 0.5
42
+ // newRatio = 0.3 * 0.5 + 0.7 * 1.0 = 0.15 + 0.7 = 0.85
43
+ const updated = updatePruneCalibration(state, 2000, 1000);
44
+ expect(updated.ratio).toBeCloseTo(0.85, 2);
45
+ expect(updated.iterations).toBe(1);
46
+ });
47
+
48
+ it('converges with consistent readings', () => {
49
+ let state = createPruneCalibration();
50
+
51
+ // Simulate 10 iterations where actual is consistently 1.5x estimated
52
+ for (let i = 0; i < 10; i++) {
53
+ state = updatePruneCalibration(state, 1500, 1000);
54
+ }
55
+
56
+ // Should converge toward ~0.667 (estimated/actual = 1000/1500)
57
+ expect(state.ratio).toBeCloseTo(0.667, 1);
58
+ expect(state.iterations).toBe(10);
59
+ });
60
+
61
+ it('clamps extreme ratios to prevent wild adjustments', () => {
62
+ const state = createPruneCalibration();
63
+
64
+ // Extreme case: estimated 10x actual (should be clamped to 2.0)
65
+ const updated = updatePruneCalibration(state, 100, 10000);
66
+ // Clamped observedRatio = 2.0
67
+ // newRatio = 0.3 * 2.0 + 0.7 * 1.0 = 0.6 + 0.7 = 1.3
68
+ expect(updated.ratio).toBeCloseTo(1.3, 2);
69
+ });
70
+
71
+ it('does not update with invalid inputs', () => {
72
+ const state = createPruneCalibration();
73
+
74
+ expect(updatePruneCalibration(state, 0, 1000)).toBe(state);
75
+ expect(updatePruneCalibration(state, 1000, 0)).toBe(state);
76
+ expect(updatePruneCalibration(state, -1, 1000)).toBe(state);
77
+ });
78
+
79
+ it('does not mutate input state', () => {
80
+ const state = createPruneCalibration();
81
+ const original = { ...state };
82
+
83
+ updatePruneCalibration(state, 1000, 1500);
84
+ expect(state).toEqual(original);
85
+ });
86
+
87
+ it('accepts custom alpha', () => {
88
+ const state = createPruneCalibration();
89
+ // With alpha=1.0, fully adapts to new reading
90
+ const updated = updatePruneCalibration(state, 1000, 1500, 1.0);
91
+ // observedRatio = 1.5, clamped to 1.5
92
+ // newRatio = 1.0 * 1.5 + 0.0 * 1.0 = 1.5
93
+ expect(updated.ratio).toBeCloseTo(1.5, 2);
94
+ });
95
+ });
96
+
97
+ describe('applyCalibration', () => {
98
+ it('returns raw budget when no iterations have occurred', () => {
99
+ const state = createPruneCalibration();
100
+ expect(applyCalibration(10000, state)).toBe(10000);
101
+ });
102
+
103
+ it('adjusts budget after calibration', () => {
104
+ let state = createPruneCalibration();
105
+ state = updatePruneCalibration(state, 1000, 1500);
106
+ // ratio ≈ 1.15, so budget is increased (our counter over-estimates)
107
+ const adjusted = applyCalibration(10000, state);
108
+ expect(adjusted).toBeCloseTo(11500, -2);
109
+ });
110
+
111
+ it('decreases budget when under-counting', () => {
112
+ let state = createPruneCalibration();
113
+ state = updatePruneCalibration(state, 2000, 1000);
114
+ // ratio ≈ 0.85, so budget is decreased (our counter under-estimates)
115
+ const adjusted = applyCalibration(10000, state);
116
+ expect(adjusted).toBeLessThan(10000);
117
+ });
118
+
119
+ it('returns floor of the adjusted value', () => {
120
+ let state = createPruneCalibration();
121
+ state = updatePruneCalibration(state, 1000, 1500);
122
+ const adjusted = applyCalibration(10001, state);
123
+ expect(Number.isInteger(adjusted)).toBe(true);
124
+ });
125
+ });
126
+
127
+ describe('multi-iteration convergence', () => {
128
+ it('smoothly transitions when accuracy changes', () => {
129
+ let state = createPruneCalibration();
130
+
131
+ // First 5 iterations: estimated is 1.5x actual
132
+ for (let i = 0; i < 5; i++) {
133
+ state = updatePruneCalibration(state, 1000, 1500);
134
+ }
135
+ const ratio5 = state.ratio;
136
+
137
+ // Next 5 iterations: estimated matches actual
138
+ for (let i = 0; i < 5; i++) {
139
+ state = updatePruneCalibration(state, 1000, 1000);
140
+ }
141
+ const ratio10 = state.ratio;
142
+
143
+ // Ratio should move toward 1.0 but still carry some history
144
+ expect(ratio10).toBeLessThan(ratio5);
145
+ expect(ratio10).toBeGreaterThan(0.9);
146
+ });
147
+ });
148
+ });
@@ -0,0 +1,214 @@
1
+ // src/utils/__tests__/toolDiscoveryCache.test.ts
2
+ import {
3
+ ToolMessage,
4
+ AIMessageChunk,
5
+ HumanMessage,
6
+ SystemMessage,
7
+ } from '@langchain/core/messages';
8
+ import type { BaseMessage } from '@langchain/core/messages';
9
+ import { ToolDiscoveryCache } from '../toolDiscoveryCache';
10
+ import { Constants } from '@/common';
11
+
12
+ /**
13
+ * Creates a mock tool_search result message.
14
+ */
15
+ function createToolSearchResult(
16
+ toolNames: string[],
17
+ toolCallId: string = 'tc_1'
18
+ ): ToolMessage {
19
+ return new ToolMessage({
20
+ content: `Found ${toolNames.length} tools`,
21
+ tool_call_id: toolCallId,
22
+ name: Constants.TOOL_SEARCH,
23
+ artifact: {
24
+ tool_references: toolNames.map((name) => ({ tool_name: name })),
25
+ },
26
+ });
27
+ }
28
+
29
+ /**
30
+ * Creates a mock AI message with tool calls.
31
+ */
32
+ function createAIWithToolCalls(toolCallIds: string[]): AIMessageChunk {
33
+ return new AIMessageChunk({
34
+ content: 'I will search for tools',
35
+ tool_calls: toolCallIds.map((id) => ({
36
+ id,
37
+ name: Constants.TOOL_SEARCH,
38
+ args: { query: 'test' },
39
+ })),
40
+ });
41
+ }
42
+
43
+ describe('ToolDiscoveryCache', () => {
44
+ let cache: ToolDiscoveryCache;
45
+
46
+ beforeEach(() => {
47
+ cache = new ToolDiscoveryCache();
48
+ });
49
+
50
+ describe('getNewDiscoveries', () => {
51
+ it('returns empty array for empty messages', () => {
52
+ expect(cache.getNewDiscoveries([])).toEqual([]);
53
+ });
54
+
55
+ it('discovers tools from tool_search results', () => {
56
+ const messages: BaseMessage[] = [
57
+ new SystemMessage('You are helpful'),
58
+ new HumanMessage('Find tools'),
59
+ createAIWithToolCalls(['tc_1']),
60
+ createToolSearchResult(['web_search', 'file_read'], 'tc_1'),
61
+ ];
62
+
63
+ const result = cache.getNewDiscoveries(messages);
64
+ expect(result).toEqual(['web_search', 'file_read']);
65
+ expect(cache.size).toBe(2);
66
+ });
67
+
68
+ it('only scans new messages on subsequent calls', () => {
69
+ const messages: BaseMessage[] = [
70
+ new HumanMessage('msg1'),
71
+ createAIWithToolCalls(['tc_1']),
72
+ createToolSearchResult(['tool_a'], 'tc_1'),
73
+ ];
74
+
75
+ // First scan
76
+ const first = cache.getNewDiscoveries(messages);
77
+ expect(first).toEqual(['tool_a']);
78
+
79
+ // Add more messages
80
+ messages.push(
81
+ new HumanMessage('msg2'),
82
+ createAIWithToolCalls(['tc_2']),
83
+ createToolSearchResult(['tool_b'], 'tc_2')
84
+ );
85
+
86
+ // Second scan: only finds tool_b (tool_a already cached)
87
+ const second = cache.getNewDiscoveries(messages);
88
+ expect(second).toEqual(['tool_b']);
89
+ expect(cache.size).toBe(2);
90
+ });
91
+
92
+ it('deduplicates tool names across scans', () => {
93
+ const messages: BaseMessage[] = [
94
+ createAIWithToolCalls(['tc_1']),
95
+ createToolSearchResult(['tool_a', 'tool_b'], 'tc_1'),
96
+ ];
97
+
98
+ cache.getNewDiscoveries(messages);
99
+
100
+ // Add another search that returns tool_a again
101
+ messages.push(
102
+ createAIWithToolCalls(['tc_2']),
103
+ createToolSearchResult(['tool_a', 'tool_c'], 'tc_2')
104
+ );
105
+
106
+ const second = cache.getNewDiscoveries(messages);
107
+ // tool_a is already cached, only tool_c is new
108
+ expect(second).toEqual(['tool_c']);
109
+ expect(cache.size).toBe(3);
110
+ });
111
+
112
+ it('ignores non-tool-search tool messages', () => {
113
+ const messages: BaseMessage[] = [
114
+ createAIWithToolCalls(['tc_1']),
115
+ new ToolMessage({
116
+ content: 'result',
117
+ tool_call_id: 'tc_1',
118
+ name: 'some_other_tool',
119
+ }),
120
+ ];
121
+
122
+ const result = cache.getNewDiscoveries(messages);
123
+ expect(result).toEqual([]);
124
+ });
125
+
126
+ it('returns empty when no new messages since last scan', () => {
127
+ const messages: BaseMessage[] = [
128
+ createAIWithToolCalls(['tc_1']),
129
+ createToolSearchResult(['tool_a'], 'tc_1'),
130
+ ];
131
+
132
+ cache.getNewDiscoveries(messages);
133
+ // No new messages added
134
+ const second = cache.getNewDiscoveries(messages);
135
+ expect(second).toEqual([]);
136
+ });
137
+ });
138
+
139
+ describe('has', () => {
140
+ it('returns true for discovered tools', () => {
141
+ const messages: BaseMessage[] = [
142
+ createAIWithToolCalls(['tc_1']),
143
+ createToolSearchResult(['tool_a'], 'tc_1'),
144
+ ];
145
+
146
+ cache.getNewDiscoveries(messages);
147
+ expect(cache.has('tool_a')).toBe(true);
148
+ expect(cache.has('tool_b')).toBe(false);
149
+ });
150
+ });
151
+
152
+ describe('getAllDiscoveredTools', () => {
153
+ it('returns all discovered tool names', () => {
154
+ const messages: BaseMessage[] = [
155
+ createAIWithToolCalls(['tc_1']),
156
+ createToolSearchResult(['tool_a', 'tool_b'], 'tc_1'),
157
+ ];
158
+
159
+ cache.getNewDiscoveries(messages);
160
+ expect(cache.getAllDiscoveredTools()).toEqual(
161
+ expect.arrayContaining(['tool_a', 'tool_b'])
162
+ );
163
+ });
164
+ });
165
+
166
+ describe('seed', () => {
167
+ it('pre-populates the cache with known tool names', () => {
168
+ cache.seed(['tool_x', 'tool_y']);
169
+ expect(cache.size).toBe(2);
170
+ expect(cache.has('tool_x')).toBe(true);
171
+ expect(cache.has('tool_y')).toBe(true);
172
+ });
173
+
174
+ it('seeded tools are treated as already discovered', () => {
175
+ cache.seed(['tool_a']);
176
+
177
+ const messages: BaseMessage[] = [
178
+ createAIWithToolCalls(['tc_1']),
179
+ createToolSearchResult(['tool_a', 'tool_b'], 'tc_1'),
180
+ ];
181
+
182
+ // tool_a is already seeded, only tool_b should be new
183
+ const result = cache.getNewDiscoveries(messages);
184
+ expect(result).toEqual(['tool_b']);
185
+ });
186
+ });
187
+
188
+ describe('reset', () => {
189
+ it('clears all state', () => {
190
+ cache.seed(['tool_a']);
191
+ expect(cache.size).toBe(1);
192
+
193
+ cache.reset();
194
+ expect(cache.size).toBe(0);
195
+ expect(cache.has('tool_a')).toBe(false);
196
+ });
197
+
198
+ it('allows re-discovery after reset', () => {
199
+ const messages: BaseMessage[] = [
200
+ createAIWithToolCalls(['tc_1']),
201
+ createToolSearchResult(['tool_a'], 'tc_1'),
202
+ ];
203
+
204
+ cache.getNewDiscoveries(messages);
205
+ expect(cache.size).toBe(1);
206
+
207
+ cache.reset();
208
+
209
+ // Same messages should produce discoveries again
210
+ const result = cache.getNewDiscoveries(messages);
211
+ expect(result).toEqual(['tool_a']);
212
+ });
213
+ });
214
+ });
@@ -1,4 +1,8 @@
1
- import { HumanMessage, AIMessage, SystemMessage } from '@langchain/core/messages';
1
+ import {
2
+ HumanMessage,
3
+ AIMessage,
4
+ SystemMessage,
5
+ } from '@langchain/core/messages';
2
6
  import { MULTI_DOCUMENT_THRESHOLD } from '@/common/constants';
3
7
  import {
4
8
  detectDocuments,
@@ -83,9 +87,7 @@ describe('detectDocuments', () => {
83
87
  it('ignores non-human messages with document patterns', () => {
84
88
  // detectDocuments scans ALL messages — AI messages with doc patterns
85
89
  // should still be detected (they may contain tool results with docs)
86
- const messages = [
87
- new AIMessage('Found: # "results.csv"\nData here'),
88
- ];
90
+ const messages = [new AIMessage('Found: # "results.csv"\nData here')];
89
91
  const result = detectDocuments(messages);
90
92
  expect(result.count).toBe(1);
91
93
  });
@@ -111,19 +113,27 @@ describe('detectDocuments', () => {
111
113
 
112
114
  describe('shouldInjectMultiDocHint', () => {
113
115
  it('returns true when document count meets threshold and no AI response', () => {
114
- expect(shouldInjectMultiDocHint(MULTI_DOCUMENT_THRESHOLD, false)).toBe(true);
116
+ expect(shouldInjectMultiDocHint(MULTI_DOCUMENT_THRESHOLD, false)).toBe(
117
+ true
118
+ );
115
119
  });
116
120
 
117
121
  it('returns true when document count exceeds threshold', () => {
118
- expect(shouldInjectMultiDocHint(MULTI_DOCUMENT_THRESHOLD + 5, false)).toBe(true);
122
+ expect(shouldInjectMultiDocHint(MULTI_DOCUMENT_THRESHOLD + 5, false)).toBe(
123
+ true
124
+ );
119
125
  });
120
126
 
121
127
  it('returns false when document count is below threshold', () => {
122
- expect(shouldInjectMultiDocHint(MULTI_DOCUMENT_THRESHOLD - 1, false)).toBe(false);
128
+ expect(shouldInjectMultiDocHint(MULTI_DOCUMENT_THRESHOLD - 1, false)).toBe(
129
+ false
130
+ );
123
131
  });
124
132
 
125
133
  it('returns false when AI has already responded', () => {
126
- expect(shouldInjectMultiDocHint(MULTI_DOCUMENT_THRESHOLD, true)).toBe(false);
134
+ expect(shouldInjectMultiDocHint(MULTI_DOCUMENT_THRESHOLD, true)).toBe(
135
+ false
136
+ );
127
137
  });
128
138
 
129
139
  it('returns false with zero documents', () => {
@@ -143,7 +153,12 @@ describe('shouldInjectMultiDocHint', () => {
143
153
 
144
154
  describe('buildMultiDocHintContent', () => {
145
155
  it('includes document count in header', () => {
146
- const content = buildMultiDocHintContent(4, ['a.pdf', 'b.pdf', 'c.pdf', 'd.pdf']);
156
+ const content = buildMultiDocHintContent(4, [
157
+ 'a.pdf',
158
+ 'b.pdf',
159
+ 'c.pdf',
160
+ 'd.pdf',
161
+ ]);
147
162
  expect(content).toContain('4 documents detected');
148
163
  });
149
164
 
@@ -9,3 +9,5 @@ export * from './contextAnalytics';
9
9
  export * from './schema';
10
10
  export * from './toolCallContinuation';
11
11
  export * from './contextPressure';
12
+ export * from './toolDiscoveryCache';
13
+ export * from './pruneCalibration';
@@ -0,0 +1,92 @@
1
+ // src/utils/pruneCalibration.ts
2
+ import type { PruneCalibrationState } from '@/types/graph';
3
+ import {
4
+ PRUNING_EMA_ALPHA,
5
+ PRUNING_INITIAL_CALIBRATION,
6
+ } from '@/common/constants';
7
+
8
+ /**
9
+ * Creates an initial pruning calibration state.
10
+ *
11
+ * @param initialRatio - Starting calibration ratio (default: 1.0)
12
+ * @returns Fresh calibration state
13
+ */
14
+ export function createPruneCalibration(
15
+ initialRatio?: number
16
+ ): PruneCalibrationState {
17
+ return {
18
+ ratio: initialRatio ?? PRUNING_INITIAL_CALIBRATION,
19
+ iterations: 0,
20
+ };
21
+ }
22
+
23
+ /**
24
+ * Updates the pruning calibration using Exponential Moving Average (EMA).
25
+ *
26
+ * Problem: Without calibration, the pruner's token estimates can diverge from
27
+ * reality across iterations, causing either:
28
+ * - Over-pruning (context cliff): Too many messages removed at once, losing critical tool results
29
+ * - Under-pruning: Not enough messages removed, hitting hard token limits
30
+ *
31
+ * Solution: Track the ratio between actual token usage (from API response) and
32
+ * estimated token usage (from our token counter). Apply EMA smoothing so the
33
+ * calibration adjusts gradually, preventing oscillation.
34
+ *
35
+ * The calibration ratio is applied to maxTokens in the pruner:
36
+ * effectiveMaxTokens = maxTokens * calibrationRatio
37
+ *
38
+ * If actual > estimated → ratio decreases → prune more aggressively
39
+ * If actual < estimated → ratio increases → prune less aggressively
40
+ *
41
+ * @param state - Current calibration state
42
+ * @param actualTokens - Actual token count from API response (UsageMetadata)
43
+ * @param estimatedTokens - Estimated token count from token counter
44
+ * @param alpha - EMA smoothing factor (default: PRUNING_EMA_ALPHA)
45
+ * @returns Updated calibration state (new object, does not mutate input)
46
+ */
47
+ export function updatePruneCalibration(
48
+ state: PruneCalibrationState,
49
+ actualTokens: number,
50
+ estimatedTokens: number,
51
+ alpha: number = PRUNING_EMA_ALPHA
52
+ ): PruneCalibrationState {
53
+ // Guard against division by zero or invalid inputs
54
+ if (estimatedTokens <= 0 || actualTokens <= 0) {
55
+ return state;
56
+ }
57
+
58
+ // Raw ratio: how much our estimate differs from reality
59
+ const observedRatio = estimatedTokens / actualTokens;
60
+
61
+ // Clamp to prevent extreme adjustments from outlier readings
62
+ // Range [0.5, 2.0] means we never more than double or halve the budget
63
+ const clampedRatio = Math.max(0.5, Math.min(2.0, observedRatio));
64
+
65
+ // Apply EMA: new_ratio = α * observed + (1 - α) * previous
66
+ const newRatio = alpha * clampedRatio + (1 - alpha) * state.ratio;
67
+
68
+ return {
69
+ ratio: newRatio,
70
+ iterations: state.iterations + 1,
71
+ };
72
+ }
73
+
74
+ /**
75
+ * Applies the calibration ratio to a max token budget.
76
+ * The ratio adjusts the effective budget so pruning is more or less aggressive
77
+ * based on observed vs. estimated token divergence.
78
+ *
79
+ * @param maxTokens - Raw max token budget
80
+ * @param state - Current calibration state
81
+ * @returns Adjusted max token budget
82
+ */
83
+ export function applyCalibration(
84
+ maxTokens: number,
85
+ state: PruneCalibrationState
86
+ ): number {
87
+ if (state.iterations === 0) {
88
+ // No calibration data yet — use raw budget
89
+ return maxTokens;
90
+ }
91
+ return Math.floor(maxTokens * state.ratio);
92
+ }