@falai/agent 1.1.3 → 1.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +9 -0
- package/dist/cjs/core/Agent.d.ts +17 -1
- package/dist/cjs/core/Agent.d.ts.map +1 -1
- package/dist/cjs/core/Agent.js +47 -0
- package/dist/cjs/core/Agent.js.map +1 -1
- package/dist/cjs/core/BatchPromptBuilder.d.ts +3 -0
- package/dist/cjs/core/BatchPromptBuilder.d.ts.map +1 -1
- package/dist/cjs/core/BatchPromptBuilder.js +4 -1
- package/dist/cjs/core/BatchPromptBuilder.js.map +1 -1
- package/dist/cjs/core/CompactionEngine.d.ts +65 -0
- package/dist/cjs/core/CompactionEngine.d.ts.map +1 -0
- package/dist/cjs/core/CompactionEngine.js +251 -0
- package/dist/cjs/core/CompactionEngine.js.map +1 -0
- package/dist/cjs/core/PromptComposer.d.ts +8 -1
- package/dist/cjs/core/PromptComposer.d.ts.map +1 -1
- package/dist/cjs/core/PromptComposer.js +238 -126
- package/dist/cjs/core/PromptComposer.js.map +1 -1
- package/dist/cjs/core/PromptSectionCache.d.ts +57 -0
- package/dist/cjs/core/PromptSectionCache.d.ts.map +1 -0
- package/dist/cjs/core/PromptSectionCache.js +108 -0
- package/dist/cjs/core/PromptSectionCache.js.map +1 -0
- package/dist/cjs/core/ResponseEngine.d.ts +3 -2
- package/dist/cjs/core/ResponseEngine.d.ts.map +1 -1
- package/dist/cjs/core/ResponseEngine.js +8 -8
- package/dist/cjs/core/ResponseEngine.js.map +1 -1
- package/dist/cjs/core/ResponseModal.d.ts.map +1 -1
- package/dist/cjs/core/ResponseModal.js +120 -70
- package/dist/cjs/core/ResponseModal.js.map +1 -1
- package/dist/cjs/core/ResponsePipeline.d.ts +2 -1
- package/dist/cjs/core/ResponsePipeline.d.ts.map +1 -1
- package/dist/cjs/core/ResponsePipeline.js +17 -19
- package/dist/cjs/core/ResponsePipeline.js.map +1 -1
- package/dist/cjs/core/RoutingEngine.d.ts +10 -0
- package/dist/cjs/core/RoutingEngine.d.ts.map +1 -1
- package/dist/cjs/core/RoutingEngine.js +5 -4
- package/dist/cjs/core/RoutingEngine.js.map +1 -1
- package/dist/cjs/core/SessionManager.d.ts.map +1 -1
- package/dist/cjs/core/SessionManager.js +20 -0
- package/dist/cjs/core/SessionManager.js.map +1 -1
- package/dist/cjs/core/StreamingToolExecutor.d.ts +142 -0
- package/dist/cjs/core/StreamingToolExecutor.d.ts.map +1 -0
- package/dist/cjs/core/StreamingToolExecutor.js +455 -0
- package/dist/cjs/core/StreamingToolExecutor.js.map +1 -0
- package/dist/cjs/core/ToolManager.d.ts +18 -1
- package/dist/cjs/core/ToolManager.d.ts.map +1 -1
- package/dist/cjs/core/ToolManager.js +91 -0
- package/dist/cjs/core/ToolManager.js.map +1 -1
- package/dist/cjs/index.d.ts +5 -1
- package/dist/cjs/index.d.ts.map +1 -1
- package/dist/cjs/index.js +8 -2
- package/dist/cjs/index.js.map +1 -1
- package/dist/cjs/providers/AnthropicProvider.d.ts +7 -0
- package/dist/cjs/providers/AnthropicProvider.d.ts.map +1 -1
- package/dist/cjs/providers/AnthropicProvider.js +109 -19
- package/dist/cjs/providers/AnthropicProvider.js.map +1 -1
- package/dist/cjs/providers/GeminiProvider.d.ts +32 -0
- package/dist/cjs/providers/GeminiProvider.d.ts.map +1 -1
- package/dist/cjs/providers/GeminiProvider.js +160 -53
- package/dist/cjs/providers/GeminiProvider.js.map +1 -1
- package/dist/cjs/providers/OpenAIProvider.d.ts +5 -0
- package/dist/cjs/providers/OpenAIProvider.d.ts.map +1 -1
- package/dist/cjs/providers/OpenAIProvider.js +65 -18
- package/dist/cjs/providers/OpenAIProvider.js.map +1 -1
- package/dist/cjs/providers/OpenRouterProvider.d.ts +5 -0
- package/dist/cjs/providers/OpenRouterProvider.d.ts.map +1 -1
- package/dist/cjs/providers/OpenRouterProvider.js +57 -18
- package/dist/cjs/providers/OpenRouterProvider.js.map +1 -1
- package/dist/cjs/types/agent.d.ts +44 -0
- package/dist/cjs/types/agent.d.ts.map +1 -1
- package/dist/cjs/types/agent.js.map +1 -1
- package/dist/cjs/types/ai.d.ts +2 -2
- package/dist/cjs/types/ai.d.ts.map +1 -1
- package/dist/cjs/types/compaction.d.ts +50 -0
- package/dist/cjs/types/compaction.d.ts.map +1 -0
- package/dist/cjs/types/compaction.js +6 -0
- package/dist/cjs/types/compaction.js.map +1 -0
- package/dist/cjs/types/index.d.ts +4 -2
- package/dist/cjs/types/index.d.ts.map +1 -1
- package/dist/cjs/types/index.js.map +1 -1
- package/dist/cjs/types/tool.d.ts +84 -0
- package/dist/cjs/types/tool.d.ts.map +1 -1
- package/dist/core/Agent.d.ts +17 -1
- package/dist/core/Agent.d.ts.map +1 -1
- package/dist/core/Agent.js +47 -0
- package/dist/core/Agent.js.map +1 -1
- package/dist/core/BatchPromptBuilder.d.ts +3 -0
- package/dist/core/BatchPromptBuilder.d.ts.map +1 -1
- package/dist/core/BatchPromptBuilder.js +4 -1
- package/dist/core/BatchPromptBuilder.js.map +1 -1
- package/dist/core/CompactionEngine.d.ts +65 -0
- package/dist/core/CompactionEngine.d.ts.map +1 -0
- package/dist/core/CompactionEngine.js +244 -0
- package/dist/core/CompactionEngine.js.map +1 -0
- package/dist/core/PromptComposer.d.ts +8 -1
- package/dist/core/PromptComposer.d.ts.map +1 -1
- package/dist/core/PromptComposer.js +238 -126
- package/dist/core/PromptComposer.js.map +1 -1
- package/dist/core/PromptSectionCache.d.ts +57 -0
- package/dist/core/PromptSectionCache.d.ts.map +1 -0
- package/dist/core/PromptSectionCache.js +104 -0
- package/dist/core/PromptSectionCache.js.map +1 -0
- package/dist/core/ResponseEngine.d.ts +3 -2
- package/dist/core/ResponseEngine.d.ts.map +1 -1
- package/dist/core/ResponseEngine.js +8 -8
- package/dist/core/ResponseEngine.js.map +1 -1
- package/dist/core/ResponseModal.d.ts.map +1 -1
- package/dist/core/ResponseModal.js +121 -71
- package/dist/core/ResponseModal.js.map +1 -1
- package/dist/core/ResponsePipeline.d.ts +2 -1
- package/dist/core/ResponsePipeline.d.ts.map +1 -1
- package/dist/core/ResponsePipeline.js +18 -20
- package/dist/core/ResponsePipeline.js.map +1 -1
- package/dist/core/RoutingEngine.d.ts +10 -0
- package/dist/core/RoutingEngine.d.ts.map +1 -1
- package/dist/core/RoutingEngine.js +6 -5
- package/dist/core/RoutingEngine.js.map +1 -1
- package/dist/core/SessionManager.d.ts.map +1 -1
- package/dist/core/SessionManager.js +17 -0
- package/dist/core/SessionManager.js.map +1 -1
- package/dist/core/StreamingToolExecutor.d.ts +142 -0
- package/dist/core/StreamingToolExecutor.d.ts.map +1 -0
- package/dist/core/StreamingToolExecutor.js +448 -0
- package/dist/core/StreamingToolExecutor.js.map +1 -0
- package/dist/core/ToolManager.d.ts +18 -1
- package/dist/core/ToolManager.d.ts.map +1 -1
- package/dist/core/ToolManager.js +91 -0
- package/dist/core/ToolManager.js.map +1 -1
- package/dist/index.d.ts +5 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +3 -0
- package/dist/index.js.map +1 -1
- package/dist/providers/AnthropicProvider.d.ts +7 -0
- package/dist/providers/AnthropicProvider.d.ts.map +1 -1
- package/dist/providers/AnthropicProvider.js +109 -19
- package/dist/providers/AnthropicProvider.js.map +1 -1
- package/dist/providers/GeminiProvider.d.ts +32 -0
- package/dist/providers/GeminiProvider.d.ts.map +1 -1
- package/dist/providers/GeminiProvider.js +160 -53
- package/dist/providers/GeminiProvider.js.map +1 -1
- package/dist/providers/OpenAIProvider.d.ts +5 -0
- package/dist/providers/OpenAIProvider.d.ts.map +1 -1
- package/dist/providers/OpenAIProvider.js +65 -18
- package/dist/providers/OpenAIProvider.js.map +1 -1
- package/dist/providers/OpenRouterProvider.d.ts +5 -0
- package/dist/providers/OpenRouterProvider.d.ts.map +1 -1
- package/dist/providers/OpenRouterProvider.js +57 -18
- package/dist/providers/OpenRouterProvider.js.map +1 -1
- package/dist/types/agent.d.ts +44 -0
- package/dist/types/agent.d.ts.map +1 -1
- package/dist/types/agent.js.map +1 -1
- package/dist/types/ai.d.ts +2 -2
- package/dist/types/ai.d.ts.map +1 -1
- package/dist/types/compaction.d.ts +50 -0
- package/dist/types/compaction.d.ts.map +1 -0
- package/dist/types/compaction.js +5 -0
- package/dist/types/compaction.js.map +1 -0
- package/dist/types/index.d.ts +4 -2
- package/dist/types/index.d.ts.map +1 -1
- package/dist/types/index.js.map +1 -1
- package/dist/types/tool.d.ts +84 -0
- package/dist/types/tool.d.ts.map +1 -1
- package/docs/api/overview.md +140 -0
- package/docs/core/tools/enhanced-tool.md +186 -0
- package/docs/core/tools/streaming-execution.md +161 -0
- package/docs/guides/context-compaction.md +96 -0
- package/docs/guides/prompt-optimization.md +164 -0
- package/examples/advanced-patterns/context-compaction.ts +223 -0
- package/examples/advanced-patterns/streaming-responses.ts +85 -7
- package/examples/tools/enhanced-tool-metadata.ts +268 -0
- package/examples/tools/streaming-tool-execution.ts +283 -0
- package/package.json +1 -1
- package/src/core/Agent.ts +58 -2
- package/src/core/BatchPromptBuilder.ts +4 -1
- package/src/core/CompactionEngine.ts +318 -0
- package/src/core/PromptComposer.ts +259 -156
- package/src/core/PromptSectionCache.ts +136 -0
- package/src/core/ResponseEngine.ts +7 -11
- package/src/core/ResponseModal.ts +133 -83
- package/src/core/ResponsePipeline.ts +22 -22
- package/src/core/RoutingEngine.ts +16 -5
- package/src/core/SessionManager.ts +19 -0
- package/src/core/StreamingToolExecutor.ts +572 -0
- package/src/core/ToolManager.ts +151 -41
- package/src/index.ts +14 -0
- package/src/providers/AnthropicProvider.ts +121 -24
- package/src/providers/GeminiProvider.ts +174 -54
- package/src/providers/OpenAIProvider.ts +77 -25
- package/src/providers/OpenRouterProvider.ts +68 -25
- package/src/types/agent.ts +45 -0
- package/src/types/ai.ts +2 -2
- package/src/types/compaction.ts +52 -0
- package/src/types/index.ts +35 -14
- package/src/types/tool.ts +108 -0
package/src/core/Agent.ts
CHANGED
|
@@ -18,6 +18,7 @@ import type {
|
|
|
18
18
|
ValidationError,
|
|
19
19
|
ValidationResult,
|
|
20
20
|
AiProvider,
|
|
21
|
+
CompactionOptions,
|
|
21
22
|
} from "../types";
|
|
22
23
|
import { CompositionMode } from "../types";
|
|
23
24
|
import type { StreamOptions, GenerateOptions, RespondParams } from "./ResponseModal";
|
|
@@ -35,9 +36,11 @@ import { Step } from "./Step";
|
|
|
35
36
|
import { PersistenceManager } from "./PersistenceManager";
|
|
36
37
|
import { SessionManager } from "./SessionManager";
|
|
37
38
|
import { RoutingEngine } from "./RoutingEngine";
|
|
39
|
+
import { PromptSectionCache } from "./PromptSectionCache";
|
|
38
40
|
|
|
39
41
|
import { ResponseModal } from "./ResponseModal";
|
|
40
42
|
import { ToolManager } from "./ToolManager";
|
|
43
|
+
import { CompactionEngine } from "./CompactionEngine";
|
|
41
44
|
|
|
42
45
|
/**
|
|
43
46
|
* Error thrown when data validation fails
|
|
@@ -78,6 +81,8 @@ export class Agent<TContext = any, TData = any> {
|
|
|
78
81
|
private _knowledgeBase: Record<string, unknown> = {};
|
|
79
82
|
private _schema?: StructuredSchema;
|
|
80
83
|
private _collectedData: Partial<TData> = {};
|
|
84
|
+
private _compactionOptions?: CompactionOptions;
|
|
85
|
+
private _promptSectionCache: PromptSectionCache;
|
|
81
86
|
|
|
82
87
|
/** Public session manager for easy session management */
|
|
83
88
|
public session: SessionManager<TData>;
|
|
@@ -125,9 +130,14 @@ export class Agent<TContext = any, TData = any> {
|
|
|
125
130
|
// Initialize current session if provided
|
|
126
131
|
this._currentSession = options.session;
|
|
127
132
|
|
|
133
|
+
// Initialize prompt section cache
|
|
134
|
+
this._promptSectionCache = new PromptSectionCache(options.promptCache);
|
|
135
|
+
|
|
128
136
|
// Initialize routing engine
|
|
129
137
|
this._routingEngine = new RoutingEngine<TContext, TData>({
|
|
130
138
|
routeSwitchMargin: options.routeSwitchMargin,
|
|
139
|
+
onRouteSwitch: () => this.invalidateRouteSections(),
|
|
140
|
+
promptSectionCache: this._promptSectionCache,
|
|
131
141
|
});
|
|
132
142
|
|
|
133
143
|
// Initialize ResponseModal for handling all response generation
|
|
@@ -205,6 +215,20 @@ export class Agent<TContext = any, TData = any> {
|
|
|
205
215
|
this._knowledgeBase = { ...options.knowledgeBase };
|
|
206
216
|
}
|
|
207
217
|
|
|
218
|
+
// Initialize compaction options if configured
|
|
219
|
+
if (options.compaction && options.compaction.enabled !== false) {
|
|
220
|
+
const compactionOptions: CompactionOptions = {
|
|
221
|
+
maxTokens: options.compaction.maxTokens,
|
|
222
|
+
compactionThreshold: options.compaction.compactionThreshold ?? 0.8,
|
|
223
|
+
preserveRecentCount: options.compaction.preserveRecentCount ?? 4,
|
|
224
|
+
maxToolResultChars: options.compaction.maxToolResultChars ?? 5000,
|
|
225
|
+
provider: options.provider,
|
|
226
|
+
};
|
|
227
|
+
CompactionEngine.validateOptions(compactionOptions);
|
|
228
|
+
this._compactionOptions = compactionOptions;
|
|
229
|
+
logger.debug("[Agent] Compaction options initialized and validated");
|
|
230
|
+
}
|
|
231
|
+
|
|
208
232
|
// Initialize session manager with reference to this agent for bidirectional sync
|
|
209
233
|
this.session = new SessionManager<TData>(this._persistenceManager, this);
|
|
210
234
|
|
|
@@ -505,6 +529,13 @@ export class Agent<TContext = any, TData = any> {
|
|
|
505
529
|
this.options.routeSwitchMargin = value;
|
|
506
530
|
}
|
|
507
531
|
|
|
532
|
+
/**
|
|
533
|
+
* Get the prompt section cache instance
|
|
534
|
+
*/
|
|
535
|
+
get promptSectionCache(): PromptSectionCache {
|
|
536
|
+
return this._promptSectionCache;
|
|
537
|
+
}
|
|
538
|
+
|
|
508
539
|
/**
|
|
509
540
|
* Get the maximum steps per batch
|
|
510
541
|
* @default 1
|
|
@@ -620,6 +651,7 @@ export class Agent<TContext = any, TData = any> {
|
|
|
620
651
|
*/
|
|
621
652
|
set currentSession(value: SessionState | undefined) {
|
|
622
653
|
this._currentSession = value;
|
|
654
|
+
this._promptSectionCache.invalidateAll();
|
|
623
655
|
}
|
|
624
656
|
|
|
625
657
|
// ---------------------------------------------------------------------------
|
|
@@ -705,6 +737,7 @@ export class Agent<TContext = any, TData = any> {
|
|
|
705
737
|
*/
|
|
706
738
|
setCurrentSession(session: SessionState): void {
|
|
707
739
|
this.currentSession = session;
|
|
740
|
+
this._promptSectionCache.invalidateAll();
|
|
708
741
|
}
|
|
709
742
|
|
|
710
743
|
/**
|
|
@@ -713,6 +746,18 @@ export class Agent<TContext = any, TData = any> {
|
|
|
713
746
|
*/
|
|
714
747
|
clearCurrentSession(): void {
|
|
715
748
|
this._currentSession = undefined;
|
|
749
|
+
this._promptSectionCache.invalidateAll();
|
|
750
|
+
}
|
|
751
|
+
|
|
752
|
+
/**
|
|
753
|
+
* Invalidate route-dependent prompt cache sections.
|
|
754
|
+
* Called automatically when the active route changes.
|
|
755
|
+
*/
|
|
756
|
+
invalidateRouteSections(): void {
|
|
757
|
+
this._promptSectionCache.invalidate('activeRoutes');
|
|
758
|
+
this._promptSectionCache.invalidate('routeRules');
|
|
759
|
+
this._promptSectionCache.invalidate('routeProhibitions');
|
|
760
|
+
this._promptSectionCache.invalidate('routeKnowledgeBase');
|
|
716
761
|
}
|
|
717
762
|
|
|
718
763
|
/**
|
|
@@ -729,6 +774,13 @@ export class Agent<TContext = any, TData = any> {
|
|
|
729
774
|
return this._persistenceManager !== undefined;
|
|
730
775
|
}
|
|
731
776
|
|
|
777
|
+
/**
|
|
778
|
+
* Get the resolved compaction options (if compaction is configured)
|
|
779
|
+
*/
|
|
780
|
+
getCompactionOptions(): CompactionOptions | undefined {
|
|
781
|
+
return this._compactionOptions;
|
|
782
|
+
}
|
|
783
|
+
|
|
732
784
|
// ---------------------------------------------------------------------------
|
|
733
785
|
// Core methods
|
|
734
786
|
// ---------------------------------------------------------------------------
|
|
@@ -876,6 +928,10 @@ export class Agent<TContext = any, TData = any> {
|
|
|
876
928
|
if (this.options.hooks?.onContextUpdate && previousContext !== undefined) {
|
|
877
929
|
await this.options.hooks.onContextUpdate(this._context, previousContext);
|
|
878
930
|
}
|
|
931
|
+
|
|
932
|
+
// Invalidate context-dependent prompt cache sections
|
|
933
|
+
this._promptSectionCache.invalidate('agentMeta');
|
|
934
|
+
this._promptSectionCache.invalidate('knowledgeBase');
|
|
879
935
|
}
|
|
880
936
|
|
|
881
937
|
/**
|
|
@@ -939,7 +995,7 @@ export class Agent<TContext = any, TData = any> {
|
|
|
939
995
|
/**
|
|
940
996
|
* Generate a response based on history and context as a stream
|
|
941
997
|
*/
|
|
942
|
-
async *respondStream(params: RespondParams<TContext, TData>): AsyncGenerator<AgentResponseStreamChunk<TData>> {
|
|
998
|
+
async * respondStream(params: RespondParams<TContext, TData>): AsyncGenerator<AgentResponseStreamChunk<TData>> {
|
|
943
999
|
// Delegate to ResponseModal
|
|
944
1000
|
yield* this._responseModal.respondStream(params);
|
|
945
1001
|
}
|
|
@@ -1210,7 +1266,7 @@ export class Agent<TContext = any, TData = any> {
|
|
|
1210
1266
|
* Modern streaming API - simple interface like chat() but returns a stream
|
|
1211
1267
|
* Automatically manages conversation history through the session
|
|
1212
1268
|
*/
|
|
1213
|
-
async *stream(
|
|
1269
|
+
async * stream(
|
|
1214
1270
|
message?: string,
|
|
1215
1271
|
options?: StreamOptions<TContext>
|
|
1216
1272
|
): AsyncGenerator<AgentResponseStreamChunk<TData>> {
|
|
@@ -14,6 +14,7 @@ import type { Event } from '../types/history';
|
|
|
14
14
|
import type { Route } from './Route';
|
|
15
15
|
import { render, renderMany, createTemplateContext } from '../utils/template';
|
|
16
16
|
import { PromptComposer } from './PromptComposer';
|
|
17
|
+
import type { PromptSectionCache } from './PromptSectionCache';
|
|
17
18
|
|
|
18
19
|
/**
|
|
19
20
|
* Parameters for building a batch prompt
|
|
@@ -56,6 +57,8 @@ export interface BatchPromptResult {
|
|
|
56
57
|
* **Validates: Requirements 4.1, 4.2, 4.3, 4.4**
|
|
57
58
|
*/
|
|
58
59
|
export class BatchPromptBuilder<TContext = unknown, TData = unknown> {
|
|
60
|
+
constructor(private readonly promptSectionCache?: PromptSectionCache) { }
|
|
61
|
+
|
|
59
62
|
/**
|
|
60
63
|
* Build a combined prompt for a batch of Steps
|
|
61
64
|
*
|
|
@@ -89,7 +92,7 @@ export class BatchPromptBuilder<TContext = unknown, TData = unknown> {
|
|
|
89
92
|
}
|
|
90
93
|
|
|
91
94
|
// Build the combined prompt using PromptComposer for consistency
|
|
92
|
-
const composer = new PromptComposer<TContext, TData>(templateContext);
|
|
95
|
+
const composer = new PromptComposer<TContext, TData>(templateContext, this.promptSectionCache);
|
|
93
96
|
|
|
94
97
|
// Add agent meta information
|
|
95
98
|
await composer.addAgentMeta(agentOptions);
|
|
@@ -0,0 +1,318 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* CompactionEngine - Manages conversation history size through multi-layered compaction strategies.
|
|
3
|
+
*
|
|
4
|
+
* Strategies are applied in order of cost:
|
|
5
|
+
* 1. none - history is under threshold, no action needed
|
|
6
|
+
* 2. tool_result_budget - truncate oversized tool results (no LLM call)
|
|
7
|
+
* 3. micro_compact - compress verbose tool outputs inline (no LLM call)
|
|
8
|
+
* 4. auto_compact - summarize old messages via LLM provider
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
import log from "loglevel";
|
|
12
|
+
import type { HistoryItem } from "../types/history";
|
|
13
|
+
import type { CompactionOptions, CompactionResult } from "../types/compaction";
|
|
14
|
+
|
|
15
|
+
export class CompactionEngine {
|
|
16
|
+
/**
|
|
17
|
+
* Validate CompactionOptions. Throws on invalid values.
|
|
18
|
+
*/
|
|
19
|
+
static validateOptions(options: CompactionOptions): void {
|
|
20
|
+
if (
|
|
21
|
+
typeof options.compactionThreshold !== "number" ||
|
|
22
|
+
options.compactionThreshold < 0.5 ||
|
|
23
|
+
options.compactionThreshold > 0.95
|
|
24
|
+
) {
|
|
25
|
+
throw new Error(
|
|
26
|
+
`compactionThreshold must be between 0.5 and 0.95, got ${options.compactionThreshold}`
|
|
27
|
+
);
|
|
28
|
+
}
|
|
29
|
+
if (
|
|
30
|
+
typeof options.preserveRecentCount !== "number" ||
|
|
31
|
+
options.preserveRecentCount < 2
|
|
32
|
+
) {
|
|
33
|
+
throw new Error(
|
|
34
|
+
`preserveRecentCount must be >= 2, got ${options.preserveRecentCount}`
|
|
35
|
+
);
|
|
36
|
+
}
|
|
37
|
+
if (
|
|
38
|
+
typeof options.maxToolResultChars !== "number" ||
|
|
39
|
+
options.maxToolResultChars <= 0
|
|
40
|
+
) {
|
|
41
|
+
throw new Error(
|
|
42
|
+
`maxToolResultChars must be > 0, got ${options.maxToolResultChars}`
|
|
43
|
+
);
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
/**
|
|
48
|
+
* Estimate token count using a character-based heuristic (~4 chars/token).
|
|
49
|
+
*
|
|
50
|
+
* For each HistoryItem: count content length + name length (if present) + 4 (role overhead).
|
|
51
|
+
* Returns Math.ceil(totalChars / 4). Empty history returns 0.
|
|
52
|
+
* Deterministic for the same input.
|
|
53
|
+
*/
|
|
54
|
+
static estimateTokens(history: HistoryItem[]): number {
|
|
55
|
+
if (history.length === 0) return 0;
|
|
56
|
+
|
|
57
|
+
let totalChars = 0;
|
|
58
|
+
for (const item of history) {
|
|
59
|
+
const content = item.content;
|
|
60
|
+
const contentLength =
|
|
61
|
+
typeof content === "string" ? content.length : JSON.stringify(content ?? "").length;
|
|
62
|
+
totalChars += contentLength;
|
|
63
|
+
if ("name" in item && typeof item.name === "string") {
|
|
64
|
+
totalChars += item.name.length;
|
|
65
|
+
}
|
|
66
|
+
totalChars += 4; // role overhead
|
|
67
|
+
}
|
|
68
|
+
return Math.ceil(totalChars / 4);
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
/**
|
|
72
|
+
* Truncate tool results that exceed the per-message character budget.
|
|
73
|
+
*
|
|
74
|
+
* For HistoryItem with role === 'tool' whose content exceeds maxChars,
|
|
75
|
+
* truncate to maxChars and append truncation notice.
|
|
76
|
+
* Items within budget are returned unchanged.
|
|
77
|
+
*/
|
|
78
|
+
static applyToolResultBudget(
|
|
79
|
+
history: HistoryItem[],
|
|
80
|
+
maxCharsPerResult: number
|
|
81
|
+
): HistoryItem[] {
|
|
82
|
+
return history.map((item) => {
|
|
83
|
+
if (item.role !== "tool") return item;
|
|
84
|
+
|
|
85
|
+
const contentStr =
|
|
86
|
+
typeof item.content === "string"
|
|
87
|
+
? item.content
|
|
88
|
+
: JSON.stringify(item.content ?? "");
|
|
89
|
+
const totalChars = contentStr.length;
|
|
90
|
+
|
|
91
|
+
if (totalChars <= maxCharsPerResult) return item;
|
|
92
|
+
|
|
93
|
+
const preview = contentStr.slice(0, maxCharsPerResult);
|
|
94
|
+
const truncatedContent = `${preview}\n\n[Truncated: ${totalChars} chars total, showing first ${maxCharsPerResult}]`;
|
|
95
|
+
return { ...item, content: truncatedContent };
|
|
96
|
+
});
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
/**
|
|
100
|
+
* Micro-compact: compress verbose tool outputs inline.
|
|
101
|
+
* Strips excessive whitespace and shortens JSON-like content in tool results.
|
|
102
|
+
* Preserves the last `preserveCount` messages unchanged.
|
|
103
|
+
*/
|
|
104
|
+
private static microCompact(
|
|
105
|
+
history: HistoryItem[],
|
|
106
|
+
preserveCount: number
|
|
107
|
+
): HistoryItem[] {
|
|
108
|
+
const cutoff = Math.max(0, history.length - preserveCount);
|
|
109
|
+
const compactable = history.slice(0, cutoff);
|
|
110
|
+
const preserved = history.slice(cutoff);
|
|
111
|
+
|
|
112
|
+
const compacted = compactable.map((item) => {
|
|
113
|
+
if (item.role !== "tool") return item;
|
|
114
|
+
|
|
115
|
+
const contentStr =
|
|
116
|
+
typeof item.content === "string"
|
|
117
|
+
? item.content
|
|
118
|
+
: JSON.stringify(item.content ?? "");
|
|
119
|
+
|
|
120
|
+
// Strip excessive whitespace: collapse runs of whitespace to single space
|
|
121
|
+
const compressed = contentStr
|
|
122
|
+
.replace(/\s+/g, " ")
|
|
123
|
+
.trim();
|
|
124
|
+
|
|
125
|
+
return { ...item, content: compressed };
|
|
126
|
+
});
|
|
127
|
+
|
|
128
|
+
return [...compacted, ...preserved];
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
/**
|
|
132
|
+
* Count how many messages differ between original and compacted history.
|
|
133
|
+
*/
|
|
134
|
+
private static countDifferences(
|
|
135
|
+
original: HistoryItem[],
|
|
136
|
+
compacted: HistoryItem[]
|
|
137
|
+
): number {
|
|
138
|
+
let count = 0;
|
|
139
|
+
const len = Math.min(original.length, compacted.length);
|
|
140
|
+
for (let i = 0; i < len; i++) {
|
|
141
|
+
if (original[i] !== compacted[i]) count++;
|
|
142
|
+
}
|
|
143
|
+
// If lengths differ, count the extra messages
|
|
144
|
+
count += Math.abs(original.length - compacted.length);
|
|
145
|
+
return count;
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
/**
|
|
149
|
+
* Summarize old messages via LLM provider.
|
|
150
|
+
* On failure, returns null (caller should fall back).
|
|
151
|
+
*/
|
|
152
|
+
private static async summarizeMessages(
|
|
153
|
+
messages: HistoryItem[],
|
|
154
|
+
options: CompactionOptions
|
|
155
|
+
): Promise<string | null> {
|
|
156
|
+
try {
|
|
157
|
+
const messagesText = messages
|
|
158
|
+
.map((m) => {
|
|
159
|
+
const content =
|
|
160
|
+
typeof m.content === "string"
|
|
161
|
+
? m.content
|
|
162
|
+
: JSON.stringify(m.content ?? "");
|
|
163
|
+
return `[${m.role}]: ${content}`;
|
|
164
|
+
})
|
|
165
|
+
.join("\n");
|
|
166
|
+
|
|
167
|
+
const result = await options.provider.generateMessage({
|
|
168
|
+
prompt: `Summarize the following conversation concisely, preserving key facts, decisions, and context:\n\n${messagesText}`,
|
|
169
|
+
history: [],
|
|
170
|
+
context: {},
|
|
171
|
+
parameters: {
|
|
172
|
+
maxOutputTokens: 1024,
|
|
173
|
+
jsonSchema: {},
|
|
174
|
+
},
|
|
175
|
+
});
|
|
176
|
+
|
|
177
|
+
return result.message;
|
|
178
|
+
} catch {
|
|
179
|
+
return null;
|
|
180
|
+
}
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
/**
|
|
184
|
+
* Aggressive truncation fallback: remove oldest messages (no LLM needed).
|
|
185
|
+
* Keeps only the most recent messages that fit within the token budget.
|
|
186
|
+
*/
|
|
187
|
+
private static aggressiveTruncate(
|
|
188
|
+
history: HistoryItem[],
|
|
189
|
+
options: CompactionOptions
|
|
190
|
+
): HistoryItem[] {
|
|
191
|
+
const threshold = options.maxTokens * options.compactionThreshold;
|
|
192
|
+
const preserveCount = options.preserveRecentCount;
|
|
193
|
+
|
|
194
|
+
// Always preserve the last preserveRecentCount messages
|
|
195
|
+
const preserved = history.slice(-preserveCount);
|
|
196
|
+
|
|
197
|
+
// Try to keep as many older messages as fit within budget
|
|
198
|
+
const older = history.slice(0, -preserveCount);
|
|
199
|
+
const result: HistoryItem[] = [];
|
|
200
|
+
|
|
201
|
+
// Add older messages from most recent backwards until we'd exceed budget
|
|
202
|
+
for (let i = older.length - 1; i >= 0; i--) {
|
|
203
|
+
const candidate = [older[i], ...result, ...preserved];
|
|
204
|
+
if (CompactionEngine.estimateTokens(candidate) < threshold) {
|
|
205
|
+
result.unshift(older[i]);
|
|
206
|
+
} else {
|
|
207
|
+
break;
|
|
208
|
+
}
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
return [...result, ...preserved];
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
/**
|
|
215
|
+
* Multi-layered compaction strategy.
|
|
216
|
+
*
|
|
217
|
+
* Layer 1 (none): If estimatedTokens < maxTokens * compactionThreshold, return unchanged
|
|
218
|
+
* Layer 2 (tool_result_budget): Apply applyToolResultBudget, check if under threshold
|
|
219
|
+
* Layer 3 (micro_compact): Compress verbose tool outputs inline
|
|
220
|
+
* Layer 4 (auto_compact): Summarize old messages via LLM provider
|
|
221
|
+
*
|
|
222
|
+
* The last `preserveRecentCount` messages are NEVER modified or removed.
|
|
223
|
+
*/
|
|
224
|
+
static async checkAndCompact(
|
|
225
|
+
history: HistoryItem[],
|
|
226
|
+
options: CompactionOptions
|
|
227
|
+
): Promise<CompactionResult> {
|
|
228
|
+
CompactionEngine.validateOptions(options);
|
|
229
|
+
|
|
230
|
+
const threshold = options.maxTokens * options.compactionThreshold;
|
|
231
|
+
const estimatedTokens = CompactionEngine.estimateTokens(history);
|
|
232
|
+
|
|
233
|
+
// Layer 1: No compaction needed
|
|
234
|
+
if (estimatedTokens < threshold) {
|
|
235
|
+
return {
|
|
236
|
+
history,
|
|
237
|
+
strategy: "none",
|
|
238
|
+
estimatedTokens,
|
|
239
|
+
messagesCompacted: 0,
|
|
240
|
+
};
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
// Layer 2: Tool result budgeting (cheapest — no LLM call)
|
|
244
|
+
// Apply budget but preserve recent messages
|
|
245
|
+
const preserveCount = Math.min(options.preserveRecentCount, history.length);
|
|
246
|
+
const cutoff2 = Math.max(0, history.length - preserveCount);
|
|
247
|
+
const budgeted = [
|
|
248
|
+
...CompactionEngine.applyToolResultBudget(
|
|
249
|
+
history.slice(0, cutoff2),
|
|
250
|
+
options.maxToolResultChars
|
|
251
|
+
),
|
|
252
|
+
...history.slice(cutoff2),
|
|
253
|
+
];
|
|
254
|
+
let newEstimate = CompactionEngine.estimateTokens(budgeted);
|
|
255
|
+
if (newEstimate < threshold) {
|
|
256
|
+
return {
|
|
257
|
+
history: budgeted,
|
|
258
|
+
strategy: "tool_result_budget",
|
|
259
|
+
estimatedTokens: newEstimate,
|
|
260
|
+
messagesCompacted: CompactionEngine.countDifferences(history, budgeted),
|
|
261
|
+
};
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
// Layer 3: Micro-compaction (compress verbose tool outputs)
|
|
265
|
+
const microCompacted = CompactionEngine.microCompact(budgeted, preserveCount);
|
|
266
|
+
newEstimate = CompactionEngine.estimateTokens(microCompacted);
|
|
267
|
+
if (newEstimate < threshold) {
|
|
268
|
+
return {
|
|
269
|
+
history: microCompacted,
|
|
270
|
+
strategy: "micro_compact",
|
|
271
|
+
estimatedTokens: newEstimate,
|
|
272
|
+
messagesCompacted: CompactionEngine.countDifferences(
|
|
273
|
+
history,
|
|
274
|
+
microCompacted
|
|
275
|
+
),
|
|
276
|
+
};
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
// Layer 4: Auto-compact (summarize old messages via LLM)
|
|
280
|
+
const oldMessages = microCompacted.slice(0, -preserveCount);
|
|
281
|
+
const recentMessages = microCompacted.slice(-preserveCount);
|
|
282
|
+
|
|
283
|
+
const summary = await CompactionEngine.summarizeMessages(
|
|
284
|
+
oldMessages,
|
|
285
|
+
options
|
|
286
|
+
);
|
|
287
|
+
|
|
288
|
+
if (summary !== null) {
|
|
289
|
+
const summaryItem: HistoryItem = {
|
|
290
|
+
role: "system",
|
|
291
|
+
content: `[Conversation Summary]\n${summary}`,
|
|
292
|
+
};
|
|
293
|
+
const finalHistory = [summaryItem, ...recentMessages];
|
|
294
|
+
return {
|
|
295
|
+
history: finalHistory,
|
|
296
|
+
strategy: "auto_compact",
|
|
297
|
+
estimatedTokens: CompactionEngine.estimateTokens(finalHistory),
|
|
298
|
+
messagesCompacted: oldMessages.length,
|
|
299
|
+
summary,
|
|
300
|
+
};
|
|
301
|
+
}
|
|
302
|
+
|
|
303
|
+
// Fallback: LLM summarization failed — aggressive truncation
|
|
304
|
+
log.warn(
|
|
305
|
+
"CompactionEngine: LLM summarization failed, falling back to aggressive truncation"
|
|
306
|
+
);
|
|
307
|
+
const truncated = CompactionEngine.aggressiveTruncate(
|
|
308
|
+
microCompacted,
|
|
309
|
+
options
|
|
310
|
+
);
|
|
311
|
+
return {
|
|
312
|
+
history: truncated,
|
|
313
|
+
strategy: "auto_compact",
|
|
314
|
+
estimatedTokens: CompactionEngine.estimateTokens(truncated),
|
|
315
|
+
messagesCompacted: history.length - truncated.length,
|
|
316
|
+
};
|
|
317
|
+
}
|
|
318
|
+
}
|