@falai/agent 1.1.3 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (173) hide show
  1. package/README.md +9 -0
  2. package/dist/cjs/core/Agent.d.ts +17 -1
  3. package/dist/cjs/core/Agent.d.ts.map +1 -1
  4. package/dist/cjs/core/Agent.js +47 -0
  5. package/dist/cjs/core/Agent.js.map +1 -1
  6. package/dist/cjs/core/BatchPromptBuilder.d.ts +3 -0
  7. package/dist/cjs/core/BatchPromptBuilder.d.ts.map +1 -1
  8. package/dist/cjs/core/BatchPromptBuilder.js +4 -1
  9. package/dist/cjs/core/BatchPromptBuilder.js.map +1 -1
  10. package/dist/cjs/core/CompactionEngine.d.ts +65 -0
  11. package/dist/cjs/core/CompactionEngine.d.ts.map +1 -0
  12. package/dist/cjs/core/CompactionEngine.js +251 -0
  13. package/dist/cjs/core/CompactionEngine.js.map +1 -0
  14. package/dist/cjs/core/PromptComposer.d.ts +8 -1
  15. package/dist/cjs/core/PromptComposer.d.ts.map +1 -1
  16. package/dist/cjs/core/PromptComposer.js +238 -126
  17. package/dist/cjs/core/PromptComposer.js.map +1 -1
  18. package/dist/cjs/core/PromptSectionCache.d.ts +57 -0
  19. package/dist/cjs/core/PromptSectionCache.d.ts.map +1 -0
  20. package/dist/cjs/core/PromptSectionCache.js +108 -0
  21. package/dist/cjs/core/PromptSectionCache.js.map +1 -0
  22. package/dist/cjs/core/ResponseEngine.d.ts +3 -0
  23. package/dist/cjs/core/ResponseEngine.d.ts.map +1 -1
  24. package/dist/cjs/core/ResponseEngine.js +10 -6
  25. package/dist/cjs/core/ResponseEngine.js.map +1 -1
  26. package/dist/cjs/core/ResponseModal.d.ts.map +1 -1
  27. package/dist/cjs/core/ResponseModal.js +75 -16
  28. package/dist/cjs/core/ResponseModal.js.map +1 -1
  29. package/dist/cjs/core/RoutingEngine.d.ts +10 -0
  30. package/dist/cjs/core/RoutingEngine.d.ts.map +1 -1
  31. package/dist/cjs/core/RoutingEngine.js +3 -2
  32. package/dist/cjs/core/RoutingEngine.js.map +1 -1
  33. package/dist/cjs/core/SessionManager.d.ts.map +1 -1
  34. package/dist/cjs/core/SessionManager.js +20 -0
  35. package/dist/cjs/core/SessionManager.js.map +1 -1
  36. package/dist/cjs/core/StreamingToolExecutor.d.ts +142 -0
  37. package/dist/cjs/core/StreamingToolExecutor.d.ts.map +1 -0
  38. package/dist/cjs/core/StreamingToolExecutor.js +455 -0
  39. package/dist/cjs/core/StreamingToolExecutor.js.map +1 -0
  40. package/dist/cjs/core/ToolManager.d.ts +18 -1
  41. package/dist/cjs/core/ToolManager.d.ts.map +1 -1
  42. package/dist/cjs/core/ToolManager.js +91 -0
  43. package/dist/cjs/core/ToolManager.js.map +1 -1
  44. package/dist/cjs/index.d.ts +5 -1
  45. package/dist/cjs/index.d.ts.map +1 -1
  46. package/dist/cjs/index.js +8 -2
  47. package/dist/cjs/index.js.map +1 -1
  48. package/dist/cjs/providers/AnthropicProvider.d.ts.map +1 -1
  49. package/dist/cjs/providers/AnthropicProvider.js +8 -7
  50. package/dist/cjs/providers/AnthropicProvider.js.map +1 -1
  51. package/dist/cjs/providers/GeminiProvider.d.ts +25 -0
  52. package/dist/cjs/providers/GeminiProvider.d.ts.map +1 -1
  53. package/dist/cjs/providers/GeminiProvider.js +79 -51
  54. package/dist/cjs/providers/GeminiProvider.js.map +1 -1
  55. package/dist/cjs/providers/OpenAIProvider.d.ts.map +1 -1
  56. package/dist/cjs/providers/OpenAIProvider.js +14 -6
  57. package/dist/cjs/providers/OpenAIProvider.js.map +1 -1
  58. package/dist/cjs/providers/OpenRouterProvider.d.ts.map +1 -1
  59. package/dist/cjs/providers/OpenRouterProvider.js +7 -6
  60. package/dist/cjs/providers/OpenRouterProvider.js.map +1 -1
  61. package/dist/cjs/types/agent.d.ts +44 -0
  62. package/dist/cjs/types/agent.d.ts.map +1 -1
  63. package/dist/cjs/types/agent.js.map +1 -1
  64. package/dist/cjs/types/compaction.d.ts +50 -0
  65. package/dist/cjs/types/compaction.d.ts.map +1 -0
  66. package/dist/cjs/types/compaction.js +6 -0
  67. package/dist/cjs/types/compaction.js.map +1 -0
  68. package/dist/cjs/types/index.d.ts +4 -2
  69. package/dist/cjs/types/index.d.ts.map +1 -1
  70. package/dist/cjs/types/index.js.map +1 -1
  71. package/dist/cjs/types/tool.d.ts +84 -0
  72. package/dist/cjs/types/tool.d.ts.map +1 -1
  73. package/dist/core/Agent.d.ts +17 -1
  74. package/dist/core/Agent.d.ts.map +1 -1
  75. package/dist/core/Agent.js +47 -0
  76. package/dist/core/Agent.js.map +1 -1
  77. package/dist/core/BatchPromptBuilder.d.ts +3 -0
  78. package/dist/core/BatchPromptBuilder.d.ts.map +1 -1
  79. package/dist/core/BatchPromptBuilder.js +4 -1
  80. package/dist/core/BatchPromptBuilder.js.map +1 -1
  81. package/dist/core/CompactionEngine.d.ts +65 -0
  82. package/dist/core/CompactionEngine.d.ts.map +1 -0
  83. package/dist/core/CompactionEngine.js +244 -0
  84. package/dist/core/CompactionEngine.js.map +1 -0
  85. package/dist/core/PromptComposer.d.ts +8 -1
  86. package/dist/core/PromptComposer.d.ts.map +1 -1
  87. package/dist/core/PromptComposer.js +238 -126
  88. package/dist/core/PromptComposer.js.map +1 -1
  89. package/dist/core/PromptSectionCache.d.ts +57 -0
  90. package/dist/core/PromptSectionCache.d.ts.map +1 -0
  91. package/dist/core/PromptSectionCache.js +104 -0
  92. package/dist/core/PromptSectionCache.js.map +1 -0
  93. package/dist/core/ResponseEngine.d.ts +3 -0
  94. package/dist/core/ResponseEngine.d.ts.map +1 -1
  95. package/dist/core/ResponseEngine.js +10 -6
  96. package/dist/core/ResponseEngine.js.map +1 -1
  97. package/dist/core/ResponseModal.d.ts.map +1 -1
  98. package/dist/core/ResponseModal.js +75 -16
  99. package/dist/core/ResponseModal.js.map +1 -1
  100. package/dist/core/RoutingEngine.d.ts +10 -0
  101. package/dist/core/RoutingEngine.d.ts.map +1 -1
  102. package/dist/core/RoutingEngine.js +3 -2
  103. package/dist/core/RoutingEngine.js.map +1 -1
  104. package/dist/core/SessionManager.d.ts.map +1 -1
  105. package/dist/core/SessionManager.js +17 -0
  106. package/dist/core/SessionManager.js.map +1 -1
  107. package/dist/core/StreamingToolExecutor.d.ts +142 -0
  108. package/dist/core/StreamingToolExecutor.d.ts.map +1 -0
  109. package/dist/core/StreamingToolExecutor.js +448 -0
  110. package/dist/core/StreamingToolExecutor.js.map +1 -0
  111. package/dist/core/ToolManager.d.ts +18 -1
  112. package/dist/core/ToolManager.d.ts.map +1 -1
  113. package/dist/core/ToolManager.js +91 -0
  114. package/dist/core/ToolManager.js.map +1 -1
  115. package/dist/index.d.ts +5 -1
  116. package/dist/index.d.ts.map +1 -1
  117. package/dist/index.js +3 -0
  118. package/dist/index.js.map +1 -1
  119. package/dist/providers/AnthropicProvider.d.ts.map +1 -1
  120. package/dist/providers/AnthropicProvider.js +8 -7
  121. package/dist/providers/AnthropicProvider.js.map +1 -1
  122. package/dist/providers/GeminiProvider.d.ts +25 -0
  123. package/dist/providers/GeminiProvider.d.ts.map +1 -1
  124. package/dist/providers/GeminiProvider.js +79 -51
  125. package/dist/providers/GeminiProvider.js.map +1 -1
  126. package/dist/providers/OpenAIProvider.d.ts.map +1 -1
  127. package/dist/providers/OpenAIProvider.js +14 -6
  128. package/dist/providers/OpenAIProvider.js.map +1 -1
  129. package/dist/providers/OpenRouterProvider.d.ts.map +1 -1
  130. package/dist/providers/OpenRouterProvider.js +7 -6
  131. package/dist/providers/OpenRouterProvider.js.map +1 -1
  132. package/dist/types/agent.d.ts +44 -0
  133. package/dist/types/agent.d.ts.map +1 -1
  134. package/dist/types/agent.js.map +1 -1
  135. package/dist/types/compaction.d.ts +50 -0
  136. package/dist/types/compaction.d.ts.map +1 -0
  137. package/dist/types/compaction.js +5 -0
  138. package/dist/types/compaction.js.map +1 -0
  139. package/dist/types/index.d.ts +4 -2
  140. package/dist/types/index.d.ts.map +1 -1
  141. package/dist/types/index.js.map +1 -1
  142. package/dist/types/tool.d.ts +84 -0
  143. package/dist/types/tool.d.ts.map +1 -1
  144. package/docs/api/overview.md +140 -0
  145. package/docs/core/tools/enhanced-tool.md +186 -0
  146. package/docs/core/tools/streaming-execution.md +161 -0
  147. package/docs/guides/context-compaction.md +96 -0
  148. package/docs/guides/prompt-optimization.md +164 -0
  149. package/examples/advanced-patterns/context-compaction.ts +223 -0
  150. package/examples/advanced-patterns/streaming-responses.ts +85 -7
  151. package/examples/tools/enhanced-tool-metadata.ts +268 -0
  152. package/examples/tools/streaming-tool-execution.ts +283 -0
  153. package/package.json +1 -1
  154. package/src/core/Agent.ts +58 -2
  155. package/src/core/BatchPromptBuilder.ts +4 -1
  156. package/src/core/CompactionEngine.ts +318 -0
  157. package/src/core/PromptComposer.ts +259 -156
  158. package/src/core/PromptSectionCache.ts +136 -0
  159. package/src/core/ResponseEngine.ts +9 -6
  160. package/src/core/ResponseModal.ts +77 -16
  161. package/src/core/RoutingEngine.ts +13 -2
  162. package/src/core/SessionManager.ts +19 -0
  163. package/src/core/StreamingToolExecutor.ts +572 -0
  164. package/src/core/ToolManager.ts +151 -41
  165. package/src/index.ts +14 -0
  166. package/src/providers/AnthropicProvider.ts +11 -12
  167. package/src/providers/GeminiProvider.ts +83 -52
  168. package/src/providers/OpenAIProvider.ts +21 -13
  169. package/src/providers/OpenRouterProvider.ts +13 -13
  170. package/src/types/agent.ts +45 -0
  171. package/src/types/compaction.ts +52 -0
  172. package/src/types/index.ts +35 -14
  173. package/src/types/tool.ts +108 -0
@@ -0,0 +1,161 @@
1
+ # Streaming Tool Execution
2
+
3
+ The `StreamingToolExecutor` executes tools as they arrive from the LLM stream rather than waiting for the full response. It provides concurrency control, abort handling, and ordered result yielding.
4
+
5
+ ## Overview
6
+
7
+ - Tools begin executing immediately as they are parsed from the LLM stream
8
+ - Read-only (concurrency-safe) tools run in parallel
9
+ - Write (non-concurrency-safe) tools run serially with exclusive access
10
+ - Results are always yielded in the original request order
11
+ - Progress messages bypass ordering and are delivered immediately
12
+
13
+ ## Concurrency Control
14
+
15
+ The executor enforces a strict invariant at all times:
16
+
17
+ > Either **all** executing tools have `isConcurrencySafe === true`, **or** exactly **one** tool is executing with `isConcurrencySafe === false`.
18
+
19
+ Tools without the `isConcurrencySafe` method default to `false` (serial execution), preserving backward compatibility with plain `Tool` objects.
20
+
21
+ A configurable `maxParallel` limit (default: 10) caps the number of concurrently executing tools regardless of concurrency safety.
22
+
23
+ ### Example: Mixed Read/Write Tools
24
+
25
+ ```typescript
26
+ import { Agent, EnhancedTool } from "@falai/agent";
27
+
28
+ const readFile: EnhancedTool = {
29
+ id: "read-file",
30
+ name: "read_file",
31
+ description: "Read a file from disk",
32
+ parameters: {
33
+ type: "object",
34
+ properties: { path: { type: "string" } },
35
+ required: ["path"],
36
+ },
37
+ handler: async (ctx, args) => {
38
+ const content = await fs.readFile(args?.path as string, "utf-8");
39
+ return { data: content, success: true };
40
+ },
41
+ isConcurrencySafe: () => true, // safe to run in parallel
42
+ isReadOnly: () => true,
43
+ maxResultSizeChars: 50_000,
44
+ };
45
+
46
+ const writeFile: EnhancedTool = {
47
+ id: "write-file",
48
+ name: "write_file",
49
+ description: "Write content to a file",
50
+ parameters: {
51
+ type: "object",
52
+ properties: {
53
+ path: { type: "string" },
54
+ content: { type: "string" },
55
+ },
56
+ required: ["path", "content"],
57
+ },
58
+ handler: async (ctx, args) => {
59
+ await fs.writeFile(args?.path as string, args?.content as string);
60
+ return { success: true };
61
+ },
62
+ isConcurrencySafe: () => false, // must run exclusively
63
+ interruptBehavior: () => "block",
64
+ };
65
+
66
+ const agent = new Agent({
67
+ name: "CodeAssistant",
68
+ provider: anthropicProvider,
69
+ tools: [readFile, writeFile],
70
+ });
71
+ ```
72
+
73
+ When the LLM requests `read_file` three times followed by `write_file`, the three reads execute in parallel. Once all reads complete, the write executes alone.
74
+
75
+ ## Abort Behavior
76
+
77
+ ### Sibling Abort
78
+
79
+ When a tool in a concurrent batch fails, all sibling tools in the same batch receive an abort signal. Each tool's `interruptBehavior` determines the response:
80
+
81
+ - `'cancel'` — tool is immediately aborted
82
+ - `'block'` (default) — tool is allowed to finish
83
+
84
+ ### Parent AbortSignal
85
+
86
+ A parent `AbortSignal` can be passed via `StreamingToolExecutorOptions`. When it fires:
87
+
88
+ 1. Tools with `interruptBehavior() === 'cancel'` are aborted immediately
89
+ 2. Tools with `interruptBehavior() === 'block'` complete normally
90
+ 3. No new queued tools are started
91
+
92
+ ```typescript
93
+ const controller = new AbortController();
94
+
95
+ // Pass signal through agent options or directly to the executor
96
+ for await (const chunk of agent.respondStream({
97
+ history,
98
+ signal: controller.signal,
99
+ })) {
100
+ process.stdout.write(chunk.delta);
101
+ }
102
+
103
+ // Cancel from user action
104
+ controller.abort();
105
+ ```
106
+
107
+ ## Progress Reporting
108
+
109
+ Tools can emit progress messages during execution. These are yielded immediately to the caller without being buffered behind result ordering.
110
+
111
+ ```typescript
112
+ for await (const chunk of agent.respondStream({ history })) {
113
+ if (chunk.toolExecution?.progress) {
114
+ console.log(`[progress] ${chunk.toolExecution.toolCallId}: ${chunk.toolExecution.progress}`);
115
+ }
116
+ if (chunk.toolExecution?.result) {
117
+ console.log(`[result] ${chunk.toolExecution.toolCallId}: done`);
118
+ }
119
+ process.stdout.write(chunk.delta);
120
+ }
121
+ ```
122
+
123
+ ## Result Ordering
124
+
125
+ Results are always yielded in the same order as the original tool call requests, regardless of actual completion order. If tool B finishes before tool A, tool B's result is buffered until tool A's result is yielded first.
126
+
127
+ ## API Reference
128
+
129
+ ### Constructor
130
+
131
+ ```typescript
132
+ new StreamingToolExecutor<TContext, TData>(
133
+ toolContext: ToolContext<TContext, TData>,
134
+ options?: {
135
+ maxParallel?: number; // default: 10
136
+ signal?: AbortSignal; // parent abort signal
137
+ }
138
+ )
139
+ ```
140
+
141
+ ### Methods
142
+
143
+ | Method | Description |
144
+ |---|---|
145
+ | `addTool(toolCall, tool)` | Queue a tool for execution. Concurrency safety is evaluated once at queue time. |
146
+ | `getCompletedResults()` | Synchronous generator yielding available results in request order. |
147
+ | `getRemainingResults()` | Async generator yielding all results (waits for pending tools). |
148
+ | `discard()` | Stop processing new queued tools. Running tools continue per their `interruptBehavior`. |
149
+ | `getUpdatedContext()` | Return accumulated context updates from completed tools. |
150
+ | `hasUnfinishedTools()` | `true` if any tools are still queued or executing. |
151
+
152
+ ### Default Behaviors for Plain `Tool` Objects
153
+
154
+ | Property | Default |
155
+ |---|---|
156
+ | `isConcurrencySafe` | `false` |
157
+ | `isReadOnly` | `false` |
158
+ | `isDestructive` | `false` |
159
+ | `interruptBehavior` | `'block'` |
160
+
161
+ Plain `Tool` objects work without modification — they execute serially and are allowed to complete on abort.
@@ -0,0 +1,96 @@
1
+ # Context Compaction
2
+
3
+ The `CompactionEngine` automatically manages conversation history size when approaching token limits. It applies multi-layered strategies in order of cost, from cheap truncation to LLM-powered summarization.
4
+
5
+ ## Compaction Strategies
6
+
7
+ Strategies are applied in order until the history fits within the token budget:
8
+
9
+ | Strategy | Cost | Description |
10
+ |---|---|---|
11
+ | `none` | Free | History is under threshold — no action taken |
12
+ | `tool_result_budget` | Free | Truncate oversized tool results with a notice |
13
+ | `micro_compact` | Free | Collapse whitespace in verbose tool outputs |
14
+ | `auto_compact` | LLM call | Summarize old messages via the configured AI provider |
15
+
16
+ If the LLM summarization fails, the engine falls back to aggressive truncation (removing oldest messages) and logs a warning. The next compaction attempt will retry summarization.
17
+
18
+ ## Configuration
19
+
20
+ Compaction is configured at the agent level via the `compaction` option:
21
+
22
+ ```typescript
23
+ import { Agent } from "@falai/agent";
24
+
25
+ const agent = new Agent({
26
+ name: "LongConversationAgent",
27
+ provider: anthropicProvider,
28
+ compaction: {
29
+ maxTokens: 100_000,
30
+ compactionThreshold: 0.8, // trigger at 80% of budget
31
+ preserveRecentCount: 10, // always keep last 10 messages
32
+ maxToolResultChars: 5_000, // truncate tool results over 5k chars
33
+ provider: anthropicProvider, // provider for LLM summarization
34
+ },
35
+ });
36
+ ```
37
+
38
+ ### CompactionOptions
39
+
40
+ | Option | Type | Constraint | Description |
41
+ |---|---|---|---|
42
+ | `maxTokens` | `number` | > 0 | Maximum token budget for the conversation |
43
+ | `compactionThreshold` | `number` | 0.5 – 0.95 | Ratio at which compaction triggers |
44
+ | `preserveRecentCount` | `number` | ≥ 2 | Recent messages that are never modified |
45
+ | `maxToolResultChars` | `number` | > 0 | Per-tool-result character limit before truncation |
46
+ | `provider` | `AiProvider` | — | Provider used for LLM summarization |
47
+
48
+ Invalid options throw at construction time.
49
+
50
+ ## How It Works
51
+
52
+ When the `SessionManager` detects that estimated tokens exceed `maxTokens * compactionThreshold`, the `CompactionEngine` runs:
53
+
54
+ 1. **Token estimation** — character-based heuristic (~4 chars/token), no external tokenizer needed
55
+ 2. **Tool result budget** — truncate any tool result exceeding `maxToolResultChars`, append a notice like `[Truncated: 12000 chars total, showing first 5000]`
56
+ 3. **Micro-compact** — collapse whitespace in tool outputs for the compactable portion of history
57
+ 4. **Auto-compact** — summarize old messages via the AI provider, replacing them with a `[Conversation Summary]` system message
58
+
59
+ The last `preserveRecentCount` messages are never modified or removed by any strategy.
60
+
61
+ ## Manual Compaction
62
+
63
+ You can also use the `CompactionEngine` directly:
64
+
65
+ ```typescript
66
+ import { CompactionEngine } from "@falai/agent";
67
+
68
+ const result = await CompactionEngine.checkAndCompact(history, {
69
+ maxTokens: 100_000,
70
+ compactionThreshold: 0.8,
71
+ preserveRecentCount: 10,
72
+ maxToolResultChars: 5_000,
73
+ provider: anthropicProvider,
74
+ });
75
+
76
+ console.log(result.strategy); // 'none' | 'tool_result_budget' | 'micro_compact' | 'auto_compact'
77
+ console.log(result.estimatedTokens); // tokens after compaction
78
+ console.log(result.messagesCompacted);
79
+ ```
80
+
81
+ ### Standalone Utilities
82
+
83
+ ```typescript
84
+ // Estimate tokens for a history
85
+ const tokens = CompactionEngine.estimateTokens(history);
86
+
87
+ // Truncate tool results only
88
+ const budgeted = CompactionEngine.applyToolResultBudget(history, 5_000);
89
+ ```
90
+
91
+ ## Key Properties
92
+
93
+ - **Idempotent** — compacting already-compacted history with the same options produces the same result
94
+ - **Deterministic estimation** — `estimateTokens` always returns the same value for the same input
95
+ - **Preservation guarantee** — the last `preserveRecentCount` messages are never touched
96
+ - **Graceful degradation** — LLM failure falls back to truncation, never crashes
@@ -0,0 +1,164 @@
1
+ # Prompt Optimization
2
+
3
+ The `PromptSectionCache` optimizes prompt generation by memoizing static sections across turns and recomputing only dynamic sections per-turn. Combined with the native history format change, this reduces redundant computation and token usage.
4
+
5
+ ## Section Types
6
+
7
+ Prompt sections are classified as either static or dynamic:
8
+
9
+ | Type | Behavior | Examples |
10
+ |---|---|---|
11
+ | `static` | Cached after first computation, reused across turns | Agent identity, glossary, knowledge base, route descriptions, scoring rules |
12
+ | `dynamic` | Recomputed on every `resolveAll()` call | Instructions, directives, available tools, guidelines |
13
+
14
+ Static sections only change when the underlying state changes (context update, session switch, route change). Dynamic sections depend on per-turn state and are always fresh.
15
+
16
+ ## Configuration
17
+
18
+ Prompt caching is enabled by default. Configure it via the `promptCache` option on the agent:
19
+
20
+ ```typescript
21
+ import { Agent } from "@falai/agent";
22
+
23
+ const agent = new Agent({
24
+ name: "MyAgent",
25
+ provider: anthropicProvider,
26
+ promptCache: {
27
+ enabled: true, // default: true
28
+ volatileKeys: [], // keys that always recompute, even if registered as static
29
+ },
30
+ });
31
+ ```
32
+
33
+ ### PromptCacheConfig
34
+
35
+ | Option | Type | Default | Description |
36
+ |---|---|---|---|
37
+ | `enabled` | `boolean` | `true` | Enable/disable section memoization |
38
+ | `volatileKeys` | `string[]` | `[]` | Section keys forced to recompute every turn |
39
+
40
+ Set `enabled: false` to disable caching entirely (useful for debugging):
41
+
42
+ ```typescript
43
+ const agent = new Agent({
44
+ name: "DebugAgent",
45
+ provider: anthropicProvider,
46
+ promptCache: { enabled: false },
47
+ });
48
+ ```
49
+
50
+ ## PromptSectionCache API
51
+
52
+ ### `register(key, type, compute)`
53
+
54
+ Register a section with a unique key, type (`'static'` or `'dynamic'`), and a compute function.
55
+
56
+ ```typescript
57
+ cache.register("agentMeta", "static", async () => {
58
+ return "## Agent Identity\nYou are MyAgent.";
59
+ });
60
+
61
+ cache.register("directives", "dynamic", () => {
62
+ return "## Directives\n- Address the user's question";
63
+ });
64
+ ```
65
+
66
+ ### `get(key)`
67
+
68
+ Retrieve a section's value. Static sections return the cached value when available; dynamic sections always recompute.
69
+
70
+ ### `resolveAll()`
71
+
72
+ Resolve all registered sections in registration order. Returns `(string | null)[]`.
73
+
74
+ ### `invalidate(key)`
75
+
76
+ Force a specific section to recompute on the next `resolveAll()` call.
77
+
78
+ ```typescript
79
+ cache.invalidate("knowledgeBase");
80
+ ```
81
+
82
+ ### `invalidateAll()`
83
+
84
+ Force all sections to recompute. Called automatically on session change or `/clear`.
85
+
86
+ ```typescript
87
+ cache.invalidateAll();
88
+ ```
89
+
90
+ ## Automatic Cache Invalidation
91
+
92
+ The framework invalidates relevant caches automatically when state changes:
93
+
94
+ | Event | Sections Invalidated |
95
+ |---|---|
96
+ | `agent.updateContext()` | `agentMeta`, `knowledgeBase` |
97
+ | Session change / clear | All sections (`invalidateAll()`) |
98
+ | Route switch | Route-dependent sections (active routes, route rules, route knowledge base) |
99
+
100
+ No manual cache management is needed for typical usage.
101
+
102
+ ## Native History Format
103
+
104
+ History is now sent as native provider messages via `GenerateMessageInput.history` instead of being JSON-serialized into the system prompt. This saves tokens (no JSON overhead) and lets providers optimize for their native message format.
105
+
106
+ ### Migration from `addInteractionHistory()`
107
+
108
+ The `PromptComposer.addInteractionHistory()` method is deprecated. If you were calling it directly:
109
+
110
+ **Before:**
111
+ ```typescript
112
+ const pc = new PromptComposer(context);
113
+ await pc.addAgentMeta(agentOptions);
114
+ await pc.addInteractionHistory(history); // embedded in prompt string
115
+ await pc.addLastMessage(lastMessage);
116
+ const prompt = await pc.build();
117
+
118
+ const response = await provider.generateMessage({ prompt, history: [] });
119
+ ```
120
+
121
+ **After:**
122
+ ```typescript
123
+ const pc = new PromptComposer(context, cache);
124
+ await pc.addAgentMeta(agentOptions);
125
+ // No addInteractionHistory() — history flows natively
126
+ const prompt = await pc.build();
127
+
128
+ const response = await provider.generateMessage({ prompt, history });
129
+ ```
130
+
131
+ The `addInteractionHistory()` method still works for backward compatibility but is marked `@deprecated` and will be removed in a future version.
132
+
133
+ ## Manual Cache Usage
134
+
135
+ You can use `PromptSectionCache` directly for custom prompt pipelines:
136
+
137
+ ```typescript
138
+ import { PromptSectionCache } from "@falai/agent";
139
+
140
+ const cache = new PromptSectionCache({ enabled: true });
141
+
142
+ cache.register("identity", "static", () => "You are a helpful assistant.");
143
+ cache.register("tools", "dynamic", () => "Available: search, calculate");
144
+
145
+ // First call: both sections computed
146
+ const sections1 = await cache.resolveAll(); // ["You are a helpful assistant.", "Available: search, calculate"]
147
+
148
+ // Second call: identity served from cache, tools recomputed
149
+ const sections2 = await cache.resolveAll();
150
+
151
+ // Invalidate a specific section
152
+ cache.invalidate("identity");
153
+
154
+ // Next call: identity recomputed, tools recomputed (always)
155
+ const sections3 = await cache.resolveAll();
156
+ ```
157
+
158
+ ## Key Properties
159
+
160
+ - **Static sections cache** — computed once per session, reused across turns until invalidated
161
+ - **Dynamic sections recompute** — always fresh on every `resolveAll()` call
162
+ - **Automatic invalidation** — context updates, session changes, and route switches trigger targeted invalidation
163
+ - **Configurable** — disable caching or mark specific keys as volatile
164
+ - **Backward compatible** — `addInteractionHistory()` still works, just deprecated
@@ -0,0 +1,223 @@
1
+ /**
2
+ * Context Compaction Example
3
+ *
4
+ * Demonstrates the CompactionEngine for managing conversation history size.
5
+ * Shows agent-level compaction config and how long conversations trigger
6
+ * automatic compaction through layered strategies.
7
+ *
8
+ * Key concepts:
9
+ * - Agent-level compaction configuration via `AgentCompactionConfig`
10
+ * - Token estimation using character-based heuristic (~4 chars/token)
11
+ * - Layered compaction strategies: tool_result_budget → micro_compact → auto_compact
12
+ * - Preservation of recent messages during compaction
13
+ * - Manual compaction via CompactionEngine API
14
+ */
15
+
16
+ import {
17
+ Agent,
18
+ CompactionEngine,
19
+ GeminiProvider,
20
+ type HistoryItem,
21
+ type CompactionOptions,
22
+ } from "../../src/index";
23
+
24
+ // --- Agent-level compaction config ---
25
+
26
+ async function demonstrateAgentCompaction() {
27
+ console.log("=== Agent-Level Compaction Config ===\n");
28
+
29
+ const provider = new GeminiProvider({
30
+ apiKey: process.env.GEMINI_API_KEY || "demo-key",
31
+ model: "models/gemini-2.5-flash",
32
+ });
33
+
34
+ // Compaction is configured at the agent level.
35
+ // The agent validates options on construction and wires the CompactionEngine
36
+ // into the SessionManager so compaction happens transparently.
37
+ const agent = new Agent({
38
+ name: "LongConversationAgent",
39
+ description: "An agent that handles long conversations gracefully",
40
+ provider,
41
+ compaction: {
42
+ maxTokens: 100_000,
43
+ compactionThreshold: 0.8, // compact at 80% of budget
44
+ preserveRecentCount: 10, // always keep last 10 messages
45
+ maxToolResultChars: 5_000, // truncate tool results over 5k chars
46
+ enabled: true,
47
+ },
48
+ });
49
+
50
+ console.log("Agent created with compaction config:");
51
+ console.log(" maxTokens: 100,000");
52
+ console.log(" compactionThreshold: 0.8 (triggers at 80k tokens)");
53
+ console.log(" preserveRecentCount: 10");
54
+ console.log(" maxToolResultChars: 5,000");
55
+ console.log();
56
+ console.log("Compaction runs automatically in SessionManager when history grows.");
57
+ console.log("No manual intervention needed for typical usage.\n");
58
+ }
59
+
60
+ // --- Manual CompactionEngine usage ---
61
+
62
+ async function demonstrateManualCompaction() {
63
+ console.log("=== Manual CompactionEngine Usage ===\n");
64
+
65
+ // Build a synthetic history with large tool results
66
+ const history: HistoryItem[] = [
67
+ { role: "user", content: "Analyze the codebase for security issues." },
68
+ { role: "assistant", content: "I'll scan the files for common vulnerabilities." },
69
+ { role: "tool", tool_call_id: "tc_1", name: "scan_files", content: "x".repeat(20_000) },
70
+ { role: "assistant", content: "Found some issues. Let me check more files." },
71
+ { role: "tool", tool_call_id: "tc_2", name: "scan_files", content: "y".repeat(15_000) },
72
+ { role: "user", content: "What about SQL injection?" },
73
+ { role: "assistant", content: "Let me search for raw SQL queries." },
74
+ { role: "tool", tool_call_id: "tc_3", name: "search_code", content: "z".repeat(10_000) },
75
+ { role: "user", content: "Summarize the findings." },
76
+ { role: "assistant", content: "Here is a summary of the security audit." },
77
+ ];
78
+
79
+ // 1. Token estimation
80
+ const tokens = CompactionEngine.estimateTokens(history);
81
+ console.log(`Estimated tokens: ${tokens}`);
82
+ console.log(`Total messages: ${history.length}\n`);
83
+
84
+ // 2. Tool result budgeting (no LLM call needed)
85
+ const budgeted = CompactionEngine.applyToolResultBudget(history, 5_000);
86
+ const budgetedTokens = CompactionEngine.estimateTokens(budgeted);
87
+ console.log("After tool result budget (maxChars=5000):");
88
+ console.log(` Tokens: ${tokens} → ${budgetedTokens}`);
89
+
90
+ for (let i = 0; i < budgeted.length; i++) {
91
+ if (budgeted[i].role === "tool") {
92
+ const truncated = budgeted[i].content.length < history[i].content.length;
93
+ console.log(` Message ${i} (tool): ${truncated ? "truncated" : "unchanged"} (${budgeted[i].content.length} chars)`);
94
+ }
95
+ }
96
+ console.log();
97
+
98
+ // 3. Full compaction with a mock provider
99
+ // In real usage you'd pass the agent's provider for LLM summarization.
100
+ // Here we show the layered strategy selection.
101
+ const mockProvider = {
102
+ generateMessage: async () => ({
103
+ content: "Security audit found 3 potential SQL injection points and 2 XSS vulnerabilities.",
104
+ toolCalls: [],
105
+ }),
106
+ };
107
+
108
+ const options: CompactionOptions = {
109
+ maxTokens: 5_000, // tight budget to force compaction
110
+ compactionThreshold: 0.8,
111
+ preserveRecentCount: 4,
112
+ maxToolResultChars: 2_000,
113
+ provider: mockProvider as any,
114
+ };
115
+
116
+ const result = await CompactionEngine.checkAndCompact(history, options);
117
+
118
+ console.log("Full compaction result:");
119
+ console.log(` Strategy: ${result.strategy}`);
120
+ console.log(` Estimated tokens: ${result.estimatedTokens}`);
121
+ console.log(` Messages compacted: ${result.messagesCompacted}`);
122
+ console.log(` History length: ${result.history.length} (was ${history.length})`);
123
+ if (result.summary) {
124
+ console.log(` Summary: "${result.summary}"`);
125
+ }
126
+ }
127
+
128
+ // --- Demonstrating compaction strategies ---
129
+
130
+ async function demonstrateStrategies() {
131
+ console.log("\n=== Compaction Strategy Ladder ===\n");
132
+
133
+ const smallHistory: HistoryItem[] = [
134
+ { role: "user", content: "Hello" },
135
+ { role: "assistant", content: "Hi there!" },
136
+ ];
137
+
138
+ const mockProvider = {
139
+ generateMessage: async () => ({
140
+ content: "Conversation summary.",
141
+ toolCalls: [],
142
+ }),
143
+ };
144
+
145
+ const baseOptions: CompactionOptions = {
146
+ maxTokens: 10_000,
147
+ compactionThreshold: 0.8,
148
+ preserveRecentCount: 2,
149
+ maxToolResultChars: 1_000,
150
+ provider: mockProvider as any,
151
+ };
152
+
153
+ // Strategy: 'none' — history is well under budget
154
+ const r1 = await CompactionEngine.checkAndCompact(smallHistory, baseOptions);
155
+ console.log(`Small history (${CompactionEngine.estimateTokens(smallHistory)} tokens):`);
156
+ console.log(` → Strategy: ${r1.strategy}\n`);
157
+
158
+ // Strategy: 'tool_result_budget' — large tool results push over threshold
159
+ const mediumHistory: HistoryItem[] = [
160
+ { role: "user", content: "Analyze this." },
161
+ { role: "tool", tool_call_id: "tc_m1", name: "analyze", content: "a".repeat(30_000) },
162
+ { role: "user", content: "Thanks." },
163
+ { role: "assistant", content: "You're welcome." },
164
+ ];
165
+
166
+ const r2 = await CompactionEngine.checkAndCompact(mediumHistory, {
167
+ ...baseOptions,
168
+ maxTokens: 2_000,
169
+ });
170
+ console.log(`Medium history with large tool result (${CompactionEngine.estimateTokens(mediumHistory)} tokens):`);
171
+ console.log(` → Strategy: ${r2.strategy}`);
172
+ console.log(` → Tokens after: ${r2.estimatedTokens}\n`);
173
+
174
+ // Strategy: 'auto_compact' — many messages push well over budget
175
+ const longHistory: HistoryItem[] = Array.from({ length: 50 }, (_, i) => ({
176
+ role: (i % 2 === 0 ? "user" : "assistant") as "user" | "assistant",
177
+ content: `Message ${i}: ${"lorem ipsum ".repeat(100)}`,
178
+ }));
179
+
180
+ const r3 = await CompactionEngine.checkAndCompact(longHistory, {
181
+ ...baseOptions,
182
+ maxTokens: 5_000,
183
+ });
184
+ console.log(`Long history (${CompactionEngine.estimateTokens(longHistory)} tokens, ${longHistory.length} messages):`);
185
+ console.log(` → Strategy: ${r3.strategy}`);
186
+ console.log(` → Tokens after: ${r3.estimatedTokens}`);
187
+ console.log(` → Messages compacted: ${r3.messagesCompacted}`);
188
+ }
189
+
190
+ // --- Validation demo ---
191
+
192
+ function demonstrateValidation() {
193
+ console.log("\n=== CompactionOptions Validation ===\n");
194
+
195
+ const invalidConfigs = [
196
+ { label: "threshold too low (0.3)", opts: { compactionThreshold: 0.3, preserveRecentCount: 4, maxToolResultChars: 1000, maxTokens: 10000 } },
197
+ { label: "threshold too high (0.99)", opts: { compactionThreshold: 0.99, preserveRecentCount: 4, maxToolResultChars: 1000, maxTokens: 10000 } },
198
+ { label: "preserveRecentCount < 2", opts: { compactionThreshold: 0.8, preserveRecentCount: 1, maxToolResultChars: 1000, maxTokens: 10000 } },
199
+ { label: "maxToolResultChars <= 0", opts: { compactionThreshold: 0.8, preserveRecentCount: 4, maxToolResultChars: 0, maxTokens: 10000 } },
200
+ ];
201
+
202
+ for (const { label, opts } of invalidConfigs) {
203
+ try {
204
+ CompactionEngine.validateOptions(opts as any);
205
+ console.log(` ${label}: accepted (unexpected)`);
206
+ } catch (e) {
207
+ console.log(` ${label}: rejected — ${(e as Error).message}`);
208
+ }
209
+ }
210
+ }
211
+
212
+ async function main() {
213
+ await demonstrateAgentCompaction();
214
+ await demonstrateManualCompaction();
215
+ await demonstrateStrategies();
216
+ demonstrateValidation();
217
+ }
218
+
219
+ if (import.meta.url === `file://${process.argv[1]}`) {
220
+ main().catch(console.error);
221
+ }
222
+
223
+ export { main };