@falai/agent 1.1.2 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (173) hide show
  1. package/README.md +9 -0
  2. package/dist/cjs/core/Agent.d.ts +17 -1
  3. package/dist/cjs/core/Agent.d.ts.map +1 -1
  4. package/dist/cjs/core/Agent.js +47 -0
  5. package/dist/cjs/core/Agent.js.map +1 -1
  6. package/dist/cjs/core/BatchPromptBuilder.d.ts +3 -0
  7. package/dist/cjs/core/BatchPromptBuilder.d.ts.map +1 -1
  8. package/dist/cjs/core/BatchPromptBuilder.js +14 -11
  9. package/dist/cjs/core/BatchPromptBuilder.js.map +1 -1
  10. package/dist/cjs/core/CompactionEngine.d.ts +65 -0
  11. package/dist/cjs/core/CompactionEngine.d.ts.map +1 -0
  12. package/dist/cjs/core/CompactionEngine.js +251 -0
  13. package/dist/cjs/core/CompactionEngine.js.map +1 -0
  14. package/dist/cjs/core/PromptComposer.d.ts +8 -1
  15. package/dist/cjs/core/PromptComposer.d.ts.map +1 -1
  16. package/dist/cjs/core/PromptComposer.js +238 -118
  17. package/dist/cjs/core/PromptComposer.js.map +1 -1
  18. package/dist/cjs/core/PromptSectionCache.d.ts +57 -0
  19. package/dist/cjs/core/PromptSectionCache.d.ts.map +1 -0
  20. package/dist/cjs/core/PromptSectionCache.js +108 -0
  21. package/dist/cjs/core/PromptSectionCache.js.map +1 -0
  22. package/dist/cjs/core/ResponseEngine.d.ts +3 -0
  23. package/dist/cjs/core/ResponseEngine.d.ts.map +1 -1
  24. package/dist/cjs/core/ResponseEngine.js +10 -6
  25. package/dist/cjs/core/ResponseEngine.js.map +1 -1
  26. package/dist/cjs/core/ResponseModal.d.ts.map +1 -1
  27. package/dist/cjs/core/ResponseModal.js +79 -20
  28. package/dist/cjs/core/ResponseModal.js.map +1 -1
  29. package/dist/cjs/core/RoutingEngine.d.ts +10 -0
  30. package/dist/cjs/core/RoutingEngine.d.ts.map +1 -1
  31. package/dist/cjs/core/RoutingEngine.js +3 -2
  32. package/dist/cjs/core/RoutingEngine.js.map +1 -1
  33. package/dist/cjs/core/SessionManager.d.ts.map +1 -1
  34. package/dist/cjs/core/SessionManager.js +20 -0
  35. package/dist/cjs/core/SessionManager.js.map +1 -1
  36. package/dist/cjs/core/StreamingToolExecutor.d.ts +142 -0
  37. package/dist/cjs/core/StreamingToolExecutor.d.ts.map +1 -0
  38. package/dist/cjs/core/StreamingToolExecutor.js +455 -0
  39. package/dist/cjs/core/StreamingToolExecutor.js.map +1 -0
  40. package/dist/cjs/core/ToolManager.d.ts +18 -1
  41. package/dist/cjs/core/ToolManager.d.ts.map +1 -1
  42. package/dist/cjs/core/ToolManager.js +91 -0
  43. package/dist/cjs/core/ToolManager.js.map +1 -1
  44. package/dist/cjs/index.d.ts +5 -1
  45. package/dist/cjs/index.d.ts.map +1 -1
  46. package/dist/cjs/index.js +8 -2
  47. package/dist/cjs/index.js.map +1 -1
  48. package/dist/cjs/providers/AnthropicProvider.d.ts.map +1 -1
  49. package/dist/cjs/providers/AnthropicProvider.js +8 -7
  50. package/dist/cjs/providers/AnthropicProvider.js.map +1 -1
  51. package/dist/cjs/providers/GeminiProvider.d.ts +25 -0
  52. package/dist/cjs/providers/GeminiProvider.d.ts.map +1 -1
  53. package/dist/cjs/providers/GeminiProvider.js +79 -51
  54. package/dist/cjs/providers/GeminiProvider.js.map +1 -1
  55. package/dist/cjs/providers/OpenAIProvider.d.ts.map +1 -1
  56. package/dist/cjs/providers/OpenAIProvider.js +14 -6
  57. package/dist/cjs/providers/OpenAIProvider.js.map +1 -1
  58. package/dist/cjs/providers/OpenRouterProvider.d.ts.map +1 -1
  59. package/dist/cjs/providers/OpenRouterProvider.js +7 -6
  60. package/dist/cjs/providers/OpenRouterProvider.js.map +1 -1
  61. package/dist/cjs/types/agent.d.ts +44 -0
  62. package/dist/cjs/types/agent.d.ts.map +1 -1
  63. package/dist/cjs/types/agent.js.map +1 -1
  64. package/dist/cjs/types/compaction.d.ts +50 -0
  65. package/dist/cjs/types/compaction.d.ts.map +1 -0
  66. package/dist/cjs/types/compaction.js +6 -0
  67. package/dist/cjs/types/compaction.js.map +1 -0
  68. package/dist/cjs/types/index.d.ts +4 -2
  69. package/dist/cjs/types/index.d.ts.map +1 -1
  70. package/dist/cjs/types/index.js.map +1 -1
  71. package/dist/cjs/types/tool.d.ts +84 -0
  72. package/dist/cjs/types/tool.d.ts.map +1 -1
  73. package/dist/core/Agent.d.ts +17 -1
  74. package/dist/core/Agent.d.ts.map +1 -1
  75. package/dist/core/Agent.js +47 -0
  76. package/dist/core/Agent.js.map +1 -1
  77. package/dist/core/BatchPromptBuilder.d.ts +3 -0
  78. package/dist/core/BatchPromptBuilder.d.ts.map +1 -1
  79. package/dist/core/BatchPromptBuilder.js +14 -11
  80. package/dist/core/BatchPromptBuilder.js.map +1 -1
  81. package/dist/core/CompactionEngine.d.ts +65 -0
  82. package/dist/core/CompactionEngine.d.ts.map +1 -0
  83. package/dist/core/CompactionEngine.js +244 -0
  84. package/dist/core/CompactionEngine.js.map +1 -0
  85. package/dist/core/PromptComposer.d.ts +8 -1
  86. package/dist/core/PromptComposer.d.ts.map +1 -1
  87. package/dist/core/PromptComposer.js +238 -118
  88. package/dist/core/PromptComposer.js.map +1 -1
  89. package/dist/core/PromptSectionCache.d.ts +57 -0
  90. package/dist/core/PromptSectionCache.d.ts.map +1 -0
  91. package/dist/core/PromptSectionCache.js +104 -0
  92. package/dist/core/PromptSectionCache.js.map +1 -0
  93. package/dist/core/ResponseEngine.d.ts +3 -0
  94. package/dist/core/ResponseEngine.d.ts.map +1 -1
  95. package/dist/core/ResponseEngine.js +10 -6
  96. package/dist/core/ResponseEngine.js.map +1 -1
  97. package/dist/core/ResponseModal.d.ts.map +1 -1
  98. package/dist/core/ResponseModal.js +79 -20
  99. package/dist/core/ResponseModal.js.map +1 -1
  100. package/dist/core/RoutingEngine.d.ts +10 -0
  101. package/dist/core/RoutingEngine.d.ts.map +1 -1
  102. package/dist/core/RoutingEngine.js +3 -2
  103. package/dist/core/RoutingEngine.js.map +1 -1
  104. package/dist/core/SessionManager.d.ts.map +1 -1
  105. package/dist/core/SessionManager.js +17 -0
  106. package/dist/core/SessionManager.js.map +1 -1
  107. package/dist/core/StreamingToolExecutor.d.ts +142 -0
  108. package/dist/core/StreamingToolExecutor.d.ts.map +1 -0
  109. package/dist/core/StreamingToolExecutor.js +448 -0
  110. package/dist/core/StreamingToolExecutor.js.map +1 -0
  111. package/dist/core/ToolManager.d.ts +18 -1
  112. package/dist/core/ToolManager.d.ts.map +1 -1
  113. package/dist/core/ToolManager.js +91 -0
  114. package/dist/core/ToolManager.js.map +1 -1
  115. package/dist/index.d.ts +5 -1
  116. package/dist/index.d.ts.map +1 -1
  117. package/dist/index.js +3 -0
  118. package/dist/index.js.map +1 -1
  119. package/dist/providers/AnthropicProvider.d.ts.map +1 -1
  120. package/dist/providers/AnthropicProvider.js +8 -7
  121. package/dist/providers/AnthropicProvider.js.map +1 -1
  122. package/dist/providers/GeminiProvider.d.ts +25 -0
  123. package/dist/providers/GeminiProvider.d.ts.map +1 -1
  124. package/dist/providers/GeminiProvider.js +79 -51
  125. package/dist/providers/GeminiProvider.js.map +1 -1
  126. package/dist/providers/OpenAIProvider.d.ts.map +1 -1
  127. package/dist/providers/OpenAIProvider.js +14 -6
  128. package/dist/providers/OpenAIProvider.js.map +1 -1
  129. package/dist/providers/OpenRouterProvider.d.ts.map +1 -1
  130. package/dist/providers/OpenRouterProvider.js +7 -6
  131. package/dist/providers/OpenRouterProvider.js.map +1 -1
  132. package/dist/types/agent.d.ts +44 -0
  133. package/dist/types/agent.d.ts.map +1 -1
  134. package/dist/types/agent.js.map +1 -1
  135. package/dist/types/compaction.d.ts +50 -0
  136. package/dist/types/compaction.d.ts.map +1 -0
  137. package/dist/types/compaction.js +5 -0
  138. package/dist/types/compaction.js.map +1 -0
  139. package/dist/types/index.d.ts +4 -2
  140. package/dist/types/index.d.ts.map +1 -1
  141. package/dist/types/index.js.map +1 -1
  142. package/dist/types/tool.d.ts +84 -0
  143. package/dist/types/tool.d.ts.map +1 -1
  144. package/docs/api/overview.md +140 -0
  145. package/docs/core/tools/enhanced-tool.md +186 -0
  146. package/docs/core/tools/streaming-execution.md +161 -0
  147. package/docs/guides/context-compaction.md +96 -0
  148. package/docs/guides/prompt-optimization.md +164 -0
  149. package/examples/advanced-patterns/context-compaction.ts +223 -0
  150. package/examples/advanced-patterns/streaming-responses.ts +85 -7
  151. package/examples/tools/enhanced-tool-metadata.ts +268 -0
  152. package/examples/tools/streaming-tool-execution.ts +283 -0
  153. package/package.json +1 -1
  154. package/src/core/Agent.ts +58 -2
  155. package/src/core/BatchPromptBuilder.ts +14 -11
  156. package/src/core/CompactionEngine.ts +318 -0
  157. package/src/core/PromptComposer.ts +261 -141
  158. package/src/core/PromptSectionCache.ts +136 -0
  159. package/src/core/ResponseEngine.ts +9 -6
  160. package/src/core/ResponseModal.ts +81 -20
  161. package/src/core/RoutingEngine.ts +13 -2
  162. package/src/core/SessionManager.ts +19 -0
  163. package/src/core/StreamingToolExecutor.ts +572 -0
  164. package/src/core/ToolManager.ts +151 -41
  165. package/src/index.ts +14 -0
  166. package/src/providers/AnthropicProvider.ts +11 -12
  167. package/src/providers/GeminiProvider.ts +83 -52
  168. package/src/providers/OpenAIProvider.ts +21 -13
  169. package/src/providers/OpenRouterProvider.ts +13 -13
  170. package/src/types/agent.ts +45 -0
  171. package/src/types/compaction.ts +52 -0
  172. package/src/types/index.ts +35 -14
  173. package/src/types/tool.ts +108 -0
package/src/core/Agent.ts CHANGED
@@ -18,6 +18,7 @@ import type {
18
18
  ValidationError,
19
19
  ValidationResult,
20
20
  AiProvider,
21
+ CompactionOptions,
21
22
  } from "../types";
22
23
  import { CompositionMode } from "../types";
23
24
  import type { StreamOptions, GenerateOptions, RespondParams } from "./ResponseModal";
@@ -35,9 +36,11 @@ import { Step } from "./Step";
35
36
  import { PersistenceManager } from "./PersistenceManager";
36
37
  import { SessionManager } from "./SessionManager";
37
38
  import { RoutingEngine } from "./RoutingEngine";
39
+ import { PromptSectionCache } from "./PromptSectionCache";
38
40
 
39
41
  import { ResponseModal } from "./ResponseModal";
40
42
  import { ToolManager } from "./ToolManager";
43
+ import { CompactionEngine } from "./CompactionEngine";
41
44
 
42
45
  /**
43
46
  * Error thrown when data validation fails
@@ -78,6 +81,8 @@ export class Agent<TContext = any, TData = any> {
78
81
  private _knowledgeBase: Record<string, unknown> = {};
79
82
  private _schema?: StructuredSchema;
80
83
  private _collectedData: Partial<TData> = {};
84
+ private _compactionOptions?: CompactionOptions;
85
+ private _promptSectionCache: PromptSectionCache;
81
86
 
82
87
  /** Public session manager for easy session management */
83
88
  public session: SessionManager<TData>;
@@ -125,9 +130,14 @@ export class Agent<TContext = any, TData = any> {
125
130
  // Initialize current session if provided
126
131
  this._currentSession = options.session;
127
132
 
133
+ // Initialize prompt section cache
134
+ this._promptSectionCache = new PromptSectionCache(options.promptCache);
135
+
128
136
  // Initialize routing engine
129
137
  this._routingEngine = new RoutingEngine<TContext, TData>({
130
138
  routeSwitchMargin: options.routeSwitchMargin,
139
+ onRouteSwitch: () => this.invalidateRouteSections(),
140
+ promptSectionCache: this._promptSectionCache,
131
141
  });
132
142
 
133
143
  // Initialize ResponseModal for handling all response generation
@@ -205,6 +215,20 @@ export class Agent<TContext = any, TData = any> {
205
215
  this._knowledgeBase = { ...options.knowledgeBase };
206
216
  }
207
217
 
218
+ // Initialize compaction options if configured
219
+ if (options.compaction && options.compaction.enabled !== false) {
220
+ const compactionOptions: CompactionOptions = {
221
+ maxTokens: options.compaction.maxTokens,
222
+ compactionThreshold: options.compaction.compactionThreshold ?? 0.8,
223
+ preserveRecentCount: options.compaction.preserveRecentCount ?? 4,
224
+ maxToolResultChars: options.compaction.maxToolResultChars ?? 5000,
225
+ provider: options.provider,
226
+ };
227
+ CompactionEngine.validateOptions(compactionOptions);
228
+ this._compactionOptions = compactionOptions;
229
+ logger.debug("[Agent] Compaction options initialized and validated");
230
+ }
231
+
208
232
  // Initialize session manager with reference to this agent for bidirectional sync
209
233
  this.session = new SessionManager<TData>(this._persistenceManager, this);
210
234
 
@@ -505,6 +529,13 @@ export class Agent<TContext = any, TData = any> {
505
529
  this.options.routeSwitchMargin = value;
506
530
  }
507
531
 
532
+ /**
533
+ * Get the prompt section cache instance
534
+ */
535
+ get promptSectionCache(): PromptSectionCache {
536
+ return this._promptSectionCache;
537
+ }
538
+
508
539
  /**
509
540
  * Get the maximum steps per batch
510
541
  * @default 1
@@ -620,6 +651,7 @@ export class Agent<TContext = any, TData = any> {
620
651
  */
621
652
  set currentSession(value: SessionState | undefined) {
622
653
  this._currentSession = value;
654
+ this._promptSectionCache.invalidateAll();
623
655
  }
624
656
 
625
657
  // ---------------------------------------------------------------------------
@@ -705,6 +737,7 @@ export class Agent<TContext = any, TData = any> {
705
737
  */
706
738
  setCurrentSession(session: SessionState): void {
707
739
  this.currentSession = session;
740
+ this._promptSectionCache.invalidateAll();
708
741
  }
709
742
 
710
743
  /**
@@ -713,6 +746,18 @@ export class Agent<TContext = any, TData = any> {
713
746
  */
714
747
  clearCurrentSession(): void {
715
748
  this._currentSession = undefined;
749
+ this._promptSectionCache.invalidateAll();
750
+ }
751
+
752
+ /**
753
+ * Invalidate route-dependent prompt cache sections.
754
+ * Called automatically when the active route changes.
755
+ */
756
+ invalidateRouteSections(): void {
757
+ this._promptSectionCache.invalidate('activeRoutes');
758
+ this._promptSectionCache.invalidate('routeRules');
759
+ this._promptSectionCache.invalidate('routeProhibitions');
760
+ this._promptSectionCache.invalidate('routeKnowledgeBase');
716
761
  }
717
762
 
718
763
  /**
@@ -729,6 +774,13 @@ export class Agent<TContext = any, TData = any> {
729
774
  return this._persistenceManager !== undefined;
730
775
  }
731
776
 
777
+ /**
778
+ * Get the resolved compaction options (if compaction is configured)
779
+ */
780
+ getCompactionOptions(): CompactionOptions | undefined {
781
+ return this._compactionOptions;
782
+ }
783
+
732
784
  // ---------------------------------------------------------------------------
733
785
  // Core methods
734
786
  // ---------------------------------------------------------------------------
@@ -876,6 +928,10 @@ export class Agent<TContext = any, TData = any> {
876
928
  if (this.options.hooks?.onContextUpdate && previousContext !== undefined) {
877
929
  await this.options.hooks.onContextUpdate(this._context, previousContext);
878
930
  }
931
+
932
+ // Invalidate context-dependent prompt cache sections
933
+ this._promptSectionCache.invalidate('agentMeta');
934
+ this._promptSectionCache.invalidate('knowledgeBase');
879
935
  }
880
936
 
881
937
  /**
@@ -939,7 +995,7 @@ export class Agent<TContext = any, TData = any> {
939
995
  /**
940
996
  * Generate a response based on history and context as a stream
941
997
  */
942
- async *respondStream(params: RespondParams<TContext, TData>): AsyncGenerator<AgentResponseStreamChunk<TData>> {
998
+ async * respondStream(params: RespondParams<TContext, TData>): AsyncGenerator<AgentResponseStreamChunk<TData>> {
943
999
  // Delegate to ResponseModal
944
1000
  yield* this._responseModal.respondStream(params);
945
1001
  }
@@ -1210,7 +1266,7 @@ export class Agent<TContext = any, TData = any> {
1210
1266
  * Modern streaming API - simple interface like chat() but returns a stream
1211
1267
  * Automatically manages conversation history through the session
1212
1268
  */
1213
- async *stream(
1269
+ async * stream(
1214
1270
  message?: string,
1215
1271
  options?: StreamOptions<TContext>
1216
1272
  ): AsyncGenerator<AgentResponseStreamChunk<TData>> {
@@ -14,6 +14,7 @@ import type { Event } from '../types/history';
14
14
  import type { Route } from './Route';
15
15
  import { render, renderMany, createTemplateContext } from '../utils/template';
16
16
  import { PromptComposer } from './PromptComposer';
17
+ import type { PromptSectionCache } from './PromptSectionCache';
17
18
 
18
19
  /**
19
20
  * Parameters for building a batch prompt
@@ -56,6 +57,8 @@ export interface BatchPromptResult {
56
57
  * **Validates: Requirements 4.1, 4.2, 4.3, 4.4**
57
58
  */
58
59
  export class BatchPromptBuilder<TContext = unknown, TData = unknown> {
60
+ constructor(private readonly promptSectionCache?: PromptSectionCache) { }
61
+
59
62
  /**
60
63
  * Build a combined prompt for a batch of Steps
61
64
  *
@@ -89,7 +92,7 @@ export class BatchPromptBuilder<TContext = unknown, TData = unknown> {
89
92
  }
90
93
 
91
94
  // Build the combined prompt using PromptComposer for consistency
92
- const composer = new PromptComposer<TContext, TData>(templateContext);
95
+ const composer = new PromptComposer<TContext, TData>(templateContext, this.promptSectionCache);
93
96
 
94
97
  // Add agent meta information
95
98
  await composer.addAgentMeta(agentOptions);
@@ -120,21 +123,21 @@ export class BatchPromptBuilder<TContext = unknown, TData = unknown> {
120
123
  const allGuidelines = [...(agentOptions.guidelines || []), ...route.getGuidelines()];
121
124
  await composer.addGuidelines(allGuidelines);
122
125
 
123
- // Add combined rules (agent + route)
124
- const allRules = [...(agentOptions.rules || []), ...route.getRules()];
125
- if (allRules.length > 0) {
126
- const renderedRules = await renderMany(allRules, templateContext);
126
+ // Add route-level rules (agent-level rules are already in addAgentMeta)
127
+ const routeRules = route.getRules();
128
+ if (routeRules.length > 0) {
129
+ const renderedRules = await renderMany(routeRules, templateContext);
127
130
  if (renderedRules.length > 0) {
128
- await composer.addInstruction(`Rules:\n- ${renderedRules.join('\n- ')}`);
131
+ await composer.addInstruction(`Route Rules:\n- ${renderedRules.join('\n- ')}`);
129
132
  }
130
133
  }
131
134
 
132
- // Add combined prohibitions (agent + route)
133
- const allProhibitions = [...(agentOptions.prohibitions || []), ...route.getProhibitions()];
134
- if (allProhibitions.length > 0) {
135
- const renderedProhibitions = await renderMany(allProhibitions, templateContext);
135
+ // Add route-level prohibitions (agent-level prohibitions are already in addAgentMeta)
136
+ const routeProhibitions = route.getProhibitions();
137
+ if (routeProhibitions.length > 0) {
138
+ const renderedProhibitions = await renderMany(routeProhibitions, templateContext);
136
139
  if (renderedProhibitions.length > 0) {
137
- await composer.addInstruction(`Prohibitions:\n- ${renderedProhibitions.join('\n- ')}`);
140
+ await composer.addInstruction(`Route Prohibitions:\n- ${renderedProhibitions.join('\n- ')}`);
138
141
  }
139
142
  }
140
143
 
@@ -0,0 +1,318 @@
1
+ /**
2
+ * CompactionEngine - Manages conversation history size through multi-layered compaction strategies.
3
+ *
4
+ * Strategies are applied in order of cost:
5
+ * 1. none - history is under threshold, no action needed
6
+ * 2. tool_result_budget - truncate oversized tool results (no LLM call)
7
+ * 3. micro_compact - compress verbose tool outputs inline (no LLM call)
8
+ * 4. auto_compact - summarize old messages via LLM provider
9
+ */
10
+
11
+ import log from "loglevel";
12
+ import type { HistoryItem } from "../types/history";
13
+ import type { CompactionOptions, CompactionResult } from "../types/compaction";
14
+
15
+ export class CompactionEngine {
16
+ /**
17
+ * Validate CompactionOptions. Throws on invalid values.
18
+ */
19
+ static validateOptions(options: CompactionOptions): void {
20
+ if (
21
+ typeof options.compactionThreshold !== "number" ||
22
+ options.compactionThreshold < 0.5 ||
23
+ options.compactionThreshold > 0.95
24
+ ) {
25
+ throw new Error(
26
+ `compactionThreshold must be between 0.5 and 0.95, got ${options.compactionThreshold}`
27
+ );
28
+ }
29
+ if (
30
+ typeof options.preserveRecentCount !== "number" ||
31
+ options.preserveRecentCount < 2
32
+ ) {
33
+ throw new Error(
34
+ `preserveRecentCount must be >= 2, got ${options.preserveRecentCount}`
35
+ );
36
+ }
37
+ if (
38
+ typeof options.maxToolResultChars !== "number" ||
39
+ options.maxToolResultChars <= 0
40
+ ) {
41
+ throw new Error(
42
+ `maxToolResultChars must be > 0, got ${options.maxToolResultChars}`
43
+ );
44
+ }
45
+ }
46
+
47
+ /**
48
+ * Estimate token count using a character-based heuristic (~4 chars/token).
49
+ *
50
+ * For each HistoryItem: count content length + name length (if present) + 4 (role overhead).
51
+ * Returns Math.ceil(totalChars / 4). Empty history returns 0.
52
+ * Deterministic for the same input.
53
+ */
54
+ static estimateTokens(history: HistoryItem[]): number {
55
+ if (history.length === 0) return 0;
56
+
57
+ let totalChars = 0;
58
+ for (const item of history) {
59
+ const content = item.content;
60
+ const contentLength =
61
+ typeof content === "string" ? content.length : JSON.stringify(content ?? "").length;
62
+ totalChars += contentLength;
63
+ if ("name" in item && typeof item.name === "string") {
64
+ totalChars += item.name.length;
65
+ }
66
+ totalChars += 4; // role overhead
67
+ }
68
+ return Math.ceil(totalChars / 4);
69
+ }
70
+
71
+ /**
72
+ * Truncate tool results that exceed the per-message character budget.
73
+ *
74
+ * For HistoryItem with role === 'tool' whose content exceeds maxChars,
75
+ * truncate to maxChars and append truncation notice.
76
+ * Items within budget are returned unchanged.
77
+ */
78
+ static applyToolResultBudget(
79
+ history: HistoryItem[],
80
+ maxCharsPerResult: number
81
+ ): HistoryItem[] {
82
+ return history.map((item) => {
83
+ if (item.role !== "tool") return item;
84
+
85
+ const contentStr =
86
+ typeof item.content === "string"
87
+ ? item.content
88
+ : JSON.stringify(item.content ?? "");
89
+ const totalChars = contentStr.length;
90
+
91
+ if (totalChars <= maxCharsPerResult) return item;
92
+
93
+ const preview = contentStr.slice(0, maxCharsPerResult);
94
+ const truncatedContent = `${preview}\n\n[Truncated: ${totalChars} chars total, showing first ${maxCharsPerResult}]`;
95
+ return { ...item, content: truncatedContent };
96
+ });
97
+ }
98
+
99
+ /**
100
+ * Micro-compact: compress verbose tool outputs inline.
101
+ * Strips excessive whitespace and shortens JSON-like content in tool results.
102
+ * Preserves the last `preserveCount` messages unchanged.
103
+ */
104
+ private static microCompact(
105
+ history: HistoryItem[],
106
+ preserveCount: number
107
+ ): HistoryItem[] {
108
+ const cutoff = Math.max(0, history.length - preserveCount);
109
+ const compactable = history.slice(0, cutoff);
110
+ const preserved = history.slice(cutoff);
111
+
112
+ const compacted = compactable.map((item) => {
113
+ if (item.role !== "tool") return item;
114
+
115
+ const contentStr =
116
+ typeof item.content === "string"
117
+ ? item.content
118
+ : JSON.stringify(item.content ?? "");
119
+
120
+ // Strip excessive whitespace: collapse runs of whitespace to single space
121
+ const compressed = contentStr
122
+ .replace(/\s+/g, " ")
123
+ .trim();
124
+
125
+ return { ...item, content: compressed };
126
+ });
127
+
128
+ return [...compacted, ...preserved];
129
+ }
130
+
131
+ /**
132
+ * Count how many messages differ between original and compacted history.
133
+ */
134
+ private static countDifferences(
135
+ original: HistoryItem[],
136
+ compacted: HistoryItem[]
137
+ ): number {
138
+ let count = 0;
139
+ const len = Math.min(original.length, compacted.length);
140
+ for (let i = 0; i < len; i++) {
141
+ if (original[i] !== compacted[i]) count++;
142
+ }
143
+ // If lengths differ, count the extra messages
144
+ count += Math.abs(original.length - compacted.length);
145
+ return count;
146
+ }
147
+
148
+ /**
149
+ * Summarize old messages via LLM provider.
150
+ * On failure, returns null (caller should fall back).
151
+ */
152
+ private static async summarizeMessages(
153
+ messages: HistoryItem[],
154
+ options: CompactionOptions
155
+ ): Promise<string | null> {
156
+ try {
157
+ const messagesText = messages
158
+ .map((m) => {
159
+ const content =
160
+ typeof m.content === "string"
161
+ ? m.content
162
+ : JSON.stringify(m.content ?? "");
163
+ return `[${m.role}]: ${content}`;
164
+ })
165
+ .join("\n");
166
+
167
+ const result = await options.provider.generateMessage({
168
+ prompt: `Summarize the following conversation concisely, preserving key facts, decisions, and context:\n\n${messagesText}`,
169
+ history: [],
170
+ context: {},
171
+ parameters: {
172
+ maxOutputTokens: 1024,
173
+ jsonSchema: {},
174
+ },
175
+ });
176
+
177
+ return result.message;
178
+ } catch {
179
+ return null;
180
+ }
181
+ }
182
+
183
+ /**
184
+ * Aggressive truncation fallback: remove oldest messages (no LLM needed).
185
+ * Keeps only the most recent messages that fit within the token budget.
186
+ */
187
+ private static aggressiveTruncate(
188
+ history: HistoryItem[],
189
+ options: CompactionOptions
190
+ ): HistoryItem[] {
191
+ const threshold = options.maxTokens * options.compactionThreshold;
192
+ const preserveCount = options.preserveRecentCount;
193
+
194
+ // Always preserve the last preserveRecentCount messages
195
+ const preserved = history.slice(-preserveCount);
196
+
197
+ // Try to keep as many older messages as fit within budget
198
+ const older = history.slice(0, -preserveCount);
199
+ const result: HistoryItem[] = [];
200
+
201
+ // Add older messages from most recent backwards until we'd exceed budget
202
+ for (let i = older.length - 1; i >= 0; i--) {
203
+ const candidate = [older[i], ...result, ...preserved];
204
+ if (CompactionEngine.estimateTokens(candidate) < threshold) {
205
+ result.unshift(older[i]);
206
+ } else {
207
+ break;
208
+ }
209
+ }
210
+
211
+ return [...result, ...preserved];
212
+ }
213
+
214
+ /**
215
+ * Multi-layered compaction strategy.
216
+ *
217
+ * Layer 1 (none): If estimatedTokens < maxTokens * compactionThreshold, return unchanged
218
+ * Layer 2 (tool_result_budget): Apply applyToolResultBudget, check if under threshold
219
+ * Layer 3 (micro_compact): Compress verbose tool outputs inline
220
+ * Layer 4 (auto_compact): Summarize old messages via LLM provider
221
+ *
222
+ * The last `preserveRecentCount` messages are NEVER modified or removed.
223
+ */
224
+ static async checkAndCompact(
225
+ history: HistoryItem[],
226
+ options: CompactionOptions
227
+ ): Promise<CompactionResult> {
228
+ CompactionEngine.validateOptions(options);
229
+
230
+ const threshold = options.maxTokens * options.compactionThreshold;
231
+ const estimatedTokens = CompactionEngine.estimateTokens(history);
232
+
233
+ // Layer 1: No compaction needed
234
+ if (estimatedTokens < threshold) {
235
+ return {
236
+ history,
237
+ strategy: "none",
238
+ estimatedTokens,
239
+ messagesCompacted: 0,
240
+ };
241
+ }
242
+
243
+ // Layer 2: Tool result budgeting (cheapest — no LLM call)
244
+ // Apply budget but preserve recent messages
245
+ const preserveCount = Math.min(options.preserveRecentCount, history.length);
246
+ const cutoff2 = Math.max(0, history.length - preserveCount);
247
+ const budgeted = [
248
+ ...CompactionEngine.applyToolResultBudget(
249
+ history.slice(0, cutoff2),
250
+ options.maxToolResultChars
251
+ ),
252
+ ...history.slice(cutoff2),
253
+ ];
254
+ let newEstimate = CompactionEngine.estimateTokens(budgeted);
255
+ if (newEstimate < threshold) {
256
+ return {
257
+ history: budgeted,
258
+ strategy: "tool_result_budget",
259
+ estimatedTokens: newEstimate,
260
+ messagesCompacted: CompactionEngine.countDifferences(history, budgeted),
261
+ };
262
+ }
263
+
264
+ // Layer 3: Micro-compaction (compress verbose tool outputs)
265
+ const microCompacted = CompactionEngine.microCompact(budgeted, preserveCount);
266
+ newEstimate = CompactionEngine.estimateTokens(microCompacted);
267
+ if (newEstimate < threshold) {
268
+ return {
269
+ history: microCompacted,
270
+ strategy: "micro_compact",
271
+ estimatedTokens: newEstimate,
272
+ messagesCompacted: CompactionEngine.countDifferences(
273
+ history,
274
+ microCompacted
275
+ ),
276
+ };
277
+ }
278
+
279
+ // Layer 4: Auto-compact (summarize old messages via LLM)
280
+ const oldMessages = microCompacted.slice(0, -preserveCount);
281
+ const recentMessages = microCompacted.slice(-preserveCount);
282
+
283
+ const summary = await CompactionEngine.summarizeMessages(
284
+ oldMessages,
285
+ options
286
+ );
287
+
288
+ if (summary !== null) {
289
+ const summaryItem: HistoryItem = {
290
+ role: "system",
291
+ content: `[Conversation Summary]\n${summary}`,
292
+ };
293
+ const finalHistory = [summaryItem, ...recentMessages];
294
+ return {
295
+ history: finalHistory,
296
+ strategy: "auto_compact",
297
+ estimatedTokens: CompactionEngine.estimateTokens(finalHistory),
298
+ messagesCompacted: oldMessages.length,
299
+ summary,
300
+ };
301
+ }
302
+
303
+ // Fallback: LLM summarization failed — aggressive truncation
304
+ log.warn(
305
+ "CompactionEngine: LLM summarization failed, falling back to aggressive truncation"
306
+ );
307
+ const truncated = CompactionEngine.aggressiveTruncate(
308
+ microCompacted,
309
+ options
310
+ );
311
+ return {
312
+ history: truncated,
313
+ strategy: "auto_compact",
314
+ estimatedTokens: CompactionEngine.estimateTokens(truncated),
315
+ messagesCompacted: history.length - truncated.length,
316
+ };
317
+ }
318
+ }