@falai/agent 1.1.3 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +9 -0
- package/dist/cjs/core/Agent.d.ts +17 -1
- package/dist/cjs/core/Agent.d.ts.map +1 -1
- package/dist/cjs/core/Agent.js +47 -0
- package/dist/cjs/core/Agent.js.map +1 -1
- package/dist/cjs/core/BatchPromptBuilder.d.ts +3 -0
- package/dist/cjs/core/BatchPromptBuilder.d.ts.map +1 -1
- package/dist/cjs/core/BatchPromptBuilder.js +4 -1
- package/dist/cjs/core/BatchPromptBuilder.js.map +1 -1
- package/dist/cjs/core/CompactionEngine.d.ts +65 -0
- package/dist/cjs/core/CompactionEngine.d.ts.map +1 -0
- package/dist/cjs/core/CompactionEngine.js +251 -0
- package/dist/cjs/core/CompactionEngine.js.map +1 -0
- package/dist/cjs/core/PromptComposer.d.ts +8 -1
- package/dist/cjs/core/PromptComposer.d.ts.map +1 -1
- package/dist/cjs/core/PromptComposer.js +238 -126
- package/dist/cjs/core/PromptComposer.js.map +1 -1
- package/dist/cjs/core/PromptSectionCache.d.ts +57 -0
- package/dist/cjs/core/PromptSectionCache.d.ts.map +1 -0
- package/dist/cjs/core/PromptSectionCache.js +108 -0
- package/dist/cjs/core/PromptSectionCache.js.map +1 -0
- package/dist/cjs/core/ResponseEngine.d.ts +3 -0
- package/dist/cjs/core/ResponseEngine.d.ts.map +1 -1
- package/dist/cjs/core/ResponseEngine.js +10 -6
- package/dist/cjs/core/ResponseEngine.js.map +1 -1
- package/dist/cjs/core/ResponseModal.d.ts.map +1 -1
- package/dist/cjs/core/ResponseModal.js +75 -16
- package/dist/cjs/core/ResponseModal.js.map +1 -1
- package/dist/cjs/core/RoutingEngine.d.ts +10 -0
- package/dist/cjs/core/RoutingEngine.d.ts.map +1 -1
- package/dist/cjs/core/RoutingEngine.js +3 -2
- package/dist/cjs/core/RoutingEngine.js.map +1 -1
- package/dist/cjs/core/SessionManager.d.ts.map +1 -1
- package/dist/cjs/core/SessionManager.js +20 -0
- package/dist/cjs/core/SessionManager.js.map +1 -1
- package/dist/cjs/core/StreamingToolExecutor.d.ts +142 -0
- package/dist/cjs/core/StreamingToolExecutor.d.ts.map +1 -0
- package/dist/cjs/core/StreamingToolExecutor.js +455 -0
- package/dist/cjs/core/StreamingToolExecutor.js.map +1 -0
- package/dist/cjs/core/ToolManager.d.ts +18 -1
- package/dist/cjs/core/ToolManager.d.ts.map +1 -1
- package/dist/cjs/core/ToolManager.js +91 -0
- package/dist/cjs/core/ToolManager.js.map +1 -1
- package/dist/cjs/index.d.ts +5 -1
- package/dist/cjs/index.d.ts.map +1 -1
- package/dist/cjs/index.js +8 -2
- package/dist/cjs/index.js.map +1 -1
- package/dist/cjs/providers/AnthropicProvider.d.ts.map +1 -1
- package/dist/cjs/providers/AnthropicProvider.js +8 -7
- package/dist/cjs/providers/AnthropicProvider.js.map +1 -1
- package/dist/cjs/providers/GeminiProvider.d.ts +25 -0
- package/dist/cjs/providers/GeminiProvider.d.ts.map +1 -1
- package/dist/cjs/providers/GeminiProvider.js +79 -51
- package/dist/cjs/providers/GeminiProvider.js.map +1 -1
- package/dist/cjs/providers/OpenAIProvider.d.ts.map +1 -1
- package/dist/cjs/providers/OpenAIProvider.js +14 -6
- package/dist/cjs/providers/OpenAIProvider.js.map +1 -1
- package/dist/cjs/providers/OpenRouterProvider.d.ts.map +1 -1
- package/dist/cjs/providers/OpenRouterProvider.js +7 -6
- package/dist/cjs/providers/OpenRouterProvider.js.map +1 -1
- package/dist/cjs/types/agent.d.ts +44 -0
- package/dist/cjs/types/agent.d.ts.map +1 -1
- package/dist/cjs/types/agent.js.map +1 -1
- package/dist/cjs/types/compaction.d.ts +50 -0
- package/dist/cjs/types/compaction.d.ts.map +1 -0
- package/dist/cjs/types/compaction.js +6 -0
- package/dist/cjs/types/compaction.js.map +1 -0
- package/dist/cjs/types/index.d.ts +4 -2
- package/dist/cjs/types/index.d.ts.map +1 -1
- package/dist/cjs/types/index.js.map +1 -1
- package/dist/cjs/types/tool.d.ts +84 -0
- package/dist/cjs/types/tool.d.ts.map +1 -1
- package/dist/core/Agent.d.ts +17 -1
- package/dist/core/Agent.d.ts.map +1 -1
- package/dist/core/Agent.js +47 -0
- package/dist/core/Agent.js.map +1 -1
- package/dist/core/BatchPromptBuilder.d.ts +3 -0
- package/dist/core/BatchPromptBuilder.d.ts.map +1 -1
- package/dist/core/BatchPromptBuilder.js +4 -1
- package/dist/core/BatchPromptBuilder.js.map +1 -1
- package/dist/core/CompactionEngine.d.ts +65 -0
- package/dist/core/CompactionEngine.d.ts.map +1 -0
- package/dist/core/CompactionEngine.js +244 -0
- package/dist/core/CompactionEngine.js.map +1 -0
- package/dist/core/PromptComposer.d.ts +8 -1
- package/dist/core/PromptComposer.d.ts.map +1 -1
- package/dist/core/PromptComposer.js +238 -126
- package/dist/core/PromptComposer.js.map +1 -1
- package/dist/core/PromptSectionCache.d.ts +57 -0
- package/dist/core/PromptSectionCache.d.ts.map +1 -0
- package/dist/core/PromptSectionCache.js +104 -0
- package/dist/core/PromptSectionCache.js.map +1 -0
- package/dist/core/ResponseEngine.d.ts +3 -0
- package/dist/core/ResponseEngine.d.ts.map +1 -1
- package/dist/core/ResponseEngine.js +10 -6
- package/dist/core/ResponseEngine.js.map +1 -1
- package/dist/core/ResponseModal.d.ts.map +1 -1
- package/dist/core/ResponseModal.js +75 -16
- package/dist/core/ResponseModal.js.map +1 -1
- package/dist/core/RoutingEngine.d.ts +10 -0
- package/dist/core/RoutingEngine.d.ts.map +1 -1
- package/dist/core/RoutingEngine.js +3 -2
- package/dist/core/RoutingEngine.js.map +1 -1
- package/dist/core/SessionManager.d.ts.map +1 -1
- package/dist/core/SessionManager.js +17 -0
- package/dist/core/SessionManager.js.map +1 -1
- package/dist/core/StreamingToolExecutor.d.ts +142 -0
- package/dist/core/StreamingToolExecutor.d.ts.map +1 -0
- package/dist/core/StreamingToolExecutor.js +448 -0
- package/dist/core/StreamingToolExecutor.js.map +1 -0
- package/dist/core/ToolManager.d.ts +18 -1
- package/dist/core/ToolManager.d.ts.map +1 -1
- package/dist/core/ToolManager.js +91 -0
- package/dist/core/ToolManager.js.map +1 -1
- package/dist/index.d.ts +5 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +3 -0
- package/dist/index.js.map +1 -1
- package/dist/providers/AnthropicProvider.d.ts.map +1 -1
- package/dist/providers/AnthropicProvider.js +8 -7
- package/dist/providers/AnthropicProvider.js.map +1 -1
- package/dist/providers/GeminiProvider.d.ts +25 -0
- package/dist/providers/GeminiProvider.d.ts.map +1 -1
- package/dist/providers/GeminiProvider.js +79 -51
- package/dist/providers/GeminiProvider.js.map +1 -1
- package/dist/providers/OpenAIProvider.d.ts.map +1 -1
- package/dist/providers/OpenAIProvider.js +14 -6
- package/dist/providers/OpenAIProvider.js.map +1 -1
- package/dist/providers/OpenRouterProvider.d.ts.map +1 -1
- package/dist/providers/OpenRouterProvider.js +7 -6
- package/dist/providers/OpenRouterProvider.js.map +1 -1
- package/dist/types/agent.d.ts +44 -0
- package/dist/types/agent.d.ts.map +1 -1
- package/dist/types/agent.js.map +1 -1
- package/dist/types/compaction.d.ts +50 -0
- package/dist/types/compaction.d.ts.map +1 -0
- package/dist/types/compaction.js +5 -0
- package/dist/types/compaction.js.map +1 -0
- package/dist/types/index.d.ts +4 -2
- package/dist/types/index.d.ts.map +1 -1
- package/dist/types/index.js.map +1 -1
- package/dist/types/tool.d.ts +84 -0
- package/dist/types/tool.d.ts.map +1 -1
- package/docs/api/overview.md +140 -0
- package/docs/core/tools/enhanced-tool.md +186 -0
- package/docs/core/tools/streaming-execution.md +161 -0
- package/docs/guides/context-compaction.md +96 -0
- package/docs/guides/prompt-optimization.md +164 -0
- package/examples/advanced-patterns/context-compaction.ts +223 -0
- package/examples/advanced-patterns/streaming-responses.ts +85 -7
- package/examples/tools/enhanced-tool-metadata.ts +268 -0
- package/examples/tools/streaming-tool-execution.ts +283 -0
- package/package.json +1 -1
- package/src/core/Agent.ts +58 -2
- package/src/core/BatchPromptBuilder.ts +4 -1
- package/src/core/CompactionEngine.ts +318 -0
- package/src/core/PromptComposer.ts +259 -156
- package/src/core/PromptSectionCache.ts +136 -0
- package/src/core/ResponseEngine.ts +9 -6
- package/src/core/ResponseModal.ts +77 -16
- package/src/core/RoutingEngine.ts +13 -2
- package/src/core/SessionManager.ts +19 -0
- package/src/core/StreamingToolExecutor.ts +572 -0
- package/src/core/ToolManager.ts +151 -41
- package/src/index.ts +14 -0
- package/src/providers/AnthropicProvider.ts +11 -12
- package/src/providers/GeminiProvider.ts +83 -52
- package/src/providers/OpenAIProvider.ts +21 -13
- package/src/providers/OpenRouterProvider.ts +13 -13
- package/src/types/agent.ts +45 -0
- package/src/types/compaction.ts +52 -0
- package/src/types/index.ts +35 -14
- package/src/types/tool.ts +108 -0
|
@@ -0,0 +1,161 @@
|
|
|
1
|
+
# Streaming Tool Execution
|
|
2
|
+
|
|
3
|
+
The `StreamingToolExecutor` executes tools as they arrive from the LLM stream rather than waiting for the full response. It provides concurrency control, abort handling, and ordered result yielding.
|
|
4
|
+
|
|
5
|
+
## Overview
|
|
6
|
+
|
|
7
|
+
- Tools begin executing immediately as they are parsed from the LLM stream
|
|
8
|
+
- Read-only (concurrency-safe) tools run in parallel
|
|
9
|
+
- Write (non-concurrency-safe) tools run serially with exclusive access
|
|
10
|
+
- Results are always yielded in the original request order
|
|
11
|
+
- Progress messages bypass ordering and are delivered immediately
|
|
12
|
+
|
|
13
|
+
## Concurrency Control
|
|
14
|
+
|
|
15
|
+
The executor enforces a strict invariant at all times:
|
|
16
|
+
|
|
17
|
+
> Either **all** executing tools have `isConcurrencySafe === true`, **or** exactly **one** tool is executing with `isConcurrencySafe === false`.
|
|
18
|
+
|
|
19
|
+
Tools without the `isConcurrencySafe` method default to `false` (serial execution), preserving backward compatibility with plain `Tool` objects.
|
|
20
|
+
|
|
21
|
+
A configurable `maxParallel` limit (default: 10) caps the number of concurrently executing tools regardless of concurrency safety.
|
|
22
|
+
|
|
23
|
+
### Example: Mixed Read/Write Tools
|
|
24
|
+
|
|
25
|
+
```typescript
|
|
26
|
+
import { Agent, EnhancedTool } from "@falai/agent";
|
|
27
|
+
|
|
28
|
+
const readFile: EnhancedTool = {
|
|
29
|
+
id: "read-file",
|
|
30
|
+
name: "read_file",
|
|
31
|
+
description: "Read a file from disk",
|
|
32
|
+
parameters: {
|
|
33
|
+
type: "object",
|
|
34
|
+
properties: { path: { type: "string" } },
|
|
35
|
+
required: ["path"],
|
|
36
|
+
},
|
|
37
|
+
handler: async (ctx, args) => {
|
|
38
|
+
const content = await fs.readFile(args?.path as string, "utf-8");
|
|
39
|
+
return { data: content, success: true };
|
|
40
|
+
},
|
|
41
|
+
isConcurrencySafe: () => true, // safe to run in parallel
|
|
42
|
+
isReadOnly: () => true,
|
|
43
|
+
maxResultSizeChars: 50_000,
|
|
44
|
+
};
|
|
45
|
+
|
|
46
|
+
const writeFile: EnhancedTool = {
|
|
47
|
+
id: "write-file",
|
|
48
|
+
name: "write_file",
|
|
49
|
+
description: "Write content to a file",
|
|
50
|
+
parameters: {
|
|
51
|
+
type: "object",
|
|
52
|
+
properties: {
|
|
53
|
+
path: { type: "string" },
|
|
54
|
+
content: { type: "string" },
|
|
55
|
+
},
|
|
56
|
+
required: ["path", "content"],
|
|
57
|
+
},
|
|
58
|
+
handler: async (ctx, args) => {
|
|
59
|
+
await fs.writeFile(args?.path as string, args?.content as string);
|
|
60
|
+
return { success: true };
|
|
61
|
+
},
|
|
62
|
+
isConcurrencySafe: () => false, // must run exclusively
|
|
63
|
+
interruptBehavior: () => "block",
|
|
64
|
+
};
|
|
65
|
+
|
|
66
|
+
const agent = new Agent({
|
|
67
|
+
name: "CodeAssistant",
|
|
68
|
+
provider: anthropicProvider,
|
|
69
|
+
tools: [readFile, writeFile],
|
|
70
|
+
});
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
When the LLM requests `read_file` three times followed by `write_file`, the three reads execute in parallel. Once all reads complete, the write executes alone.
|
|
74
|
+
|
|
75
|
+
## Abort Behavior
|
|
76
|
+
|
|
77
|
+
### Sibling Abort
|
|
78
|
+
|
|
79
|
+
When a tool in a concurrent batch fails, all sibling tools in the same batch receive an abort signal. Each tool's `interruptBehavior` determines the response:
|
|
80
|
+
|
|
81
|
+
- `'cancel'` — tool is immediately aborted
|
|
82
|
+
- `'block'` (default) — tool is allowed to finish
|
|
83
|
+
|
|
84
|
+
### Parent AbortSignal
|
|
85
|
+
|
|
86
|
+
A parent `AbortSignal` can be passed via `StreamingToolExecutorOptions`. When it fires:
|
|
87
|
+
|
|
88
|
+
1. Tools with `interruptBehavior() === 'cancel'` are aborted immediately
|
|
89
|
+
2. Tools with `interruptBehavior() === 'block'` complete normally
|
|
90
|
+
3. No new queued tools are started
|
|
91
|
+
|
|
92
|
+
```typescript
|
|
93
|
+
const controller = new AbortController();
|
|
94
|
+
|
|
95
|
+
// Pass signal through agent options or directly to the executor
|
|
96
|
+
for await (const chunk of agent.respondStream({
|
|
97
|
+
history,
|
|
98
|
+
signal: controller.signal,
|
|
99
|
+
})) {
|
|
100
|
+
process.stdout.write(chunk.delta);
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
// Cancel from user action
|
|
104
|
+
controller.abort();
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
## Progress Reporting
|
|
108
|
+
|
|
109
|
+
Tools can emit progress messages during execution. These are yielded immediately to the caller without being buffered behind result ordering.
|
|
110
|
+
|
|
111
|
+
```typescript
|
|
112
|
+
for await (const chunk of agent.respondStream({ history })) {
|
|
113
|
+
if (chunk.toolExecution?.progress) {
|
|
114
|
+
console.log(`[progress] ${chunk.toolExecution.toolCallId}: ${chunk.toolExecution.progress}`);
|
|
115
|
+
}
|
|
116
|
+
if (chunk.toolExecution?.result) {
|
|
117
|
+
console.log(`[result] ${chunk.toolExecution.toolCallId}: done`);
|
|
118
|
+
}
|
|
119
|
+
process.stdout.write(chunk.delta);
|
|
120
|
+
}
|
|
121
|
+
```
|
|
122
|
+
|
|
123
|
+
## Result Ordering
|
|
124
|
+
|
|
125
|
+
Results are always yielded in the same order as the original tool call requests, regardless of actual completion order. If tool B finishes before tool A, tool B's result is buffered until tool A's result is yielded first.
|
|
126
|
+
|
|
127
|
+
## API Reference
|
|
128
|
+
|
|
129
|
+
### Constructor
|
|
130
|
+
|
|
131
|
+
```typescript
|
|
132
|
+
new StreamingToolExecutor<TContext, TData>(
|
|
133
|
+
toolContext: ToolContext<TContext, TData>,
|
|
134
|
+
options?: {
|
|
135
|
+
maxParallel?: number; // default: 10
|
|
136
|
+
signal?: AbortSignal; // parent abort signal
|
|
137
|
+
}
|
|
138
|
+
)
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
### Methods
|
|
142
|
+
|
|
143
|
+
| Method | Description |
|
|
144
|
+
|---|---|
|
|
145
|
+
| `addTool(toolCall, tool)` | Queue a tool for execution. Concurrency safety is evaluated once at queue time. |
|
|
146
|
+
| `getCompletedResults()` | Synchronous generator yielding available results in request order. |
|
|
147
|
+
| `getRemainingResults()` | Async generator yielding all results (waits for pending tools). |
|
|
148
|
+
| `discard()` | Stop processing new queued tools. Running tools continue per their `interruptBehavior`. |
|
|
149
|
+
| `getUpdatedContext()` | Return accumulated context updates from completed tools. |
|
|
150
|
+
| `hasUnfinishedTools()` | `true` if any tools are still queued or executing. |
|
|
151
|
+
|
|
152
|
+
### Default Behaviors for Plain `Tool` Objects
|
|
153
|
+
|
|
154
|
+
| Property | Default |
|
|
155
|
+
|---|---|
|
|
156
|
+
| `isConcurrencySafe` | `false` |
|
|
157
|
+
| `isReadOnly` | `false` |
|
|
158
|
+
| `isDestructive` | `false` |
|
|
159
|
+
| `interruptBehavior` | `'block'` |
|
|
160
|
+
|
|
161
|
+
Plain `Tool` objects work without modification — they execute serially and are allowed to complete on abort.
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
# Context Compaction
|
|
2
|
+
|
|
3
|
+
The `CompactionEngine` automatically manages conversation history size when approaching token limits. It applies multi-layered strategies in order of cost, from cheap truncation to LLM-powered summarization.
|
|
4
|
+
|
|
5
|
+
## Compaction Strategies
|
|
6
|
+
|
|
7
|
+
Strategies are applied in order until the history fits within the token budget:
|
|
8
|
+
|
|
9
|
+
| Strategy | Cost | Description |
|
|
10
|
+
|---|---|---|
|
|
11
|
+
| `none` | Free | History is under threshold — no action taken |
|
|
12
|
+
| `tool_result_budget` | Free | Truncate oversized tool results with a notice |
|
|
13
|
+
| `micro_compact` | Free | Collapse whitespace in verbose tool outputs |
|
|
14
|
+
| `auto_compact` | LLM call | Summarize old messages via the configured AI provider |
|
|
15
|
+
|
|
16
|
+
If the LLM summarization fails, the engine falls back to aggressive truncation (removing oldest messages) and logs a warning. The next compaction attempt will retry summarization.
|
|
17
|
+
|
|
18
|
+
## Configuration
|
|
19
|
+
|
|
20
|
+
Compaction is configured at the agent level via the `compaction` option:
|
|
21
|
+
|
|
22
|
+
```typescript
|
|
23
|
+
import { Agent } from "@falai/agent";
|
|
24
|
+
|
|
25
|
+
const agent = new Agent({
|
|
26
|
+
name: "LongConversationAgent",
|
|
27
|
+
provider: anthropicProvider,
|
|
28
|
+
compaction: {
|
|
29
|
+
maxTokens: 100_000,
|
|
30
|
+
compactionThreshold: 0.8, // trigger at 80% of budget
|
|
31
|
+
preserveRecentCount: 10, // always keep last 10 messages
|
|
32
|
+
maxToolResultChars: 5_000, // truncate tool results over 5k chars
|
|
33
|
+
provider: anthropicProvider, // provider for LLM summarization
|
|
34
|
+
},
|
|
35
|
+
});
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
### CompactionOptions
|
|
39
|
+
|
|
40
|
+
| Option | Type | Constraint | Description |
|
|
41
|
+
|---|---|---|---|
|
|
42
|
+
| `maxTokens` | `number` | > 0 | Maximum token budget for the conversation |
|
|
43
|
+
| `compactionThreshold` | `number` | 0.5 – 0.95 | Ratio at which compaction triggers |
|
|
44
|
+
| `preserveRecentCount` | `number` | ≥ 2 | Recent messages that are never modified |
|
|
45
|
+
| `maxToolResultChars` | `number` | > 0 | Per-tool-result character limit before truncation |
|
|
46
|
+
| `provider` | `AiProvider` | — | Provider used for LLM summarization |
|
|
47
|
+
|
|
48
|
+
Invalid options throw at construction time.
|
|
49
|
+
|
|
50
|
+
## How It Works
|
|
51
|
+
|
|
52
|
+
When the `SessionManager` detects that estimated tokens exceed `maxTokens * compactionThreshold`, the `CompactionEngine` runs:
|
|
53
|
+
|
|
54
|
+
1. **Token estimation** — character-based heuristic (~4 chars/token), no external tokenizer needed
|
|
55
|
+
2. **Tool result budget** — truncate any tool result exceeding `maxToolResultChars`, append a notice like `[Truncated: 12000 chars total, showing first 5000]`
|
|
56
|
+
3. **Micro-compact** — collapse whitespace in tool outputs for the compactable portion of history
|
|
57
|
+
4. **Auto-compact** — summarize old messages via the AI provider, replacing them with a `[Conversation Summary]` system message
|
|
58
|
+
|
|
59
|
+
The last `preserveRecentCount` messages are never modified or removed by any strategy.
|
|
60
|
+
|
|
61
|
+
## Manual Compaction
|
|
62
|
+
|
|
63
|
+
You can also use the `CompactionEngine` directly:
|
|
64
|
+
|
|
65
|
+
```typescript
|
|
66
|
+
import { CompactionEngine } from "@falai/agent";
|
|
67
|
+
|
|
68
|
+
const result = await CompactionEngine.checkAndCompact(history, {
|
|
69
|
+
maxTokens: 100_000,
|
|
70
|
+
compactionThreshold: 0.8,
|
|
71
|
+
preserveRecentCount: 10,
|
|
72
|
+
maxToolResultChars: 5_000,
|
|
73
|
+
provider: anthropicProvider,
|
|
74
|
+
});
|
|
75
|
+
|
|
76
|
+
console.log(result.strategy); // 'none' | 'tool_result_budget' | 'micro_compact' | 'auto_compact'
|
|
77
|
+
console.log(result.estimatedTokens); // tokens after compaction
|
|
78
|
+
console.log(result.messagesCompacted);
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
### Standalone Utilities
|
|
82
|
+
|
|
83
|
+
```typescript
|
|
84
|
+
// Estimate tokens for a history
|
|
85
|
+
const tokens = CompactionEngine.estimateTokens(history);
|
|
86
|
+
|
|
87
|
+
// Truncate tool results only
|
|
88
|
+
const budgeted = CompactionEngine.applyToolResultBudget(history, 5_000);
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
## Key Properties
|
|
92
|
+
|
|
93
|
+
- **Idempotent** — compacting already-compacted history with the same options produces the same result
|
|
94
|
+
- **Deterministic estimation** — `estimateTokens` always returns the same value for the same input
|
|
95
|
+
- **Preservation guarantee** — the last `preserveRecentCount` messages are never touched
|
|
96
|
+
- **Graceful degradation** — LLM failure falls back to truncation, never crashes
|
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
# Prompt Optimization
|
|
2
|
+
|
|
3
|
+
The `PromptSectionCache` optimizes prompt generation by memoizing static sections across turns and recomputing only dynamic sections per-turn. Combined with the native history format change, this reduces redundant computation and token usage.
|
|
4
|
+
|
|
5
|
+
## Section Types
|
|
6
|
+
|
|
7
|
+
Prompt sections are classified as either static or dynamic:
|
|
8
|
+
|
|
9
|
+
| Type | Behavior | Examples |
|
|
10
|
+
|---|---|---|
|
|
11
|
+
| `static` | Cached after first computation, reused across turns | Agent identity, glossary, knowledge base, route descriptions, scoring rules |
|
|
12
|
+
| `dynamic` | Recomputed on every `resolveAll()` call | Instructions, directives, available tools, guidelines |
|
|
13
|
+
|
|
14
|
+
Static sections only change when the underlying state changes (context update, session switch, route change). Dynamic sections depend on per-turn state and are always fresh.
|
|
15
|
+
|
|
16
|
+
## Configuration
|
|
17
|
+
|
|
18
|
+
Prompt caching is enabled by default. Configure it via the `promptCache` option on the agent:
|
|
19
|
+
|
|
20
|
+
```typescript
|
|
21
|
+
import { Agent } from "@falai/agent";
|
|
22
|
+
|
|
23
|
+
const agent = new Agent({
|
|
24
|
+
name: "MyAgent",
|
|
25
|
+
provider: anthropicProvider,
|
|
26
|
+
promptCache: {
|
|
27
|
+
enabled: true, // default: true
|
|
28
|
+
volatileKeys: [], // keys that always recompute, even if registered as static
|
|
29
|
+
},
|
|
30
|
+
});
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
### PromptCacheConfig
|
|
34
|
+
|
|
35
|
+
| Option | Type | Default | Description |
|
|
36
|
+
|---|---|---|---|
|
|
37
|
+
| `enabled` | `boolean` | `true` | Enable/disable section memoization |
|
|
38
|
+
| `volatileKeys` | `string[]` | `[]` | Section keys forced to recompute every turn |
|
|
39
|
+
|
|
40
|
+
Set `enabled: false` to disable caching entirely (useful for debugging):
|
|
41
|
+
|
|
42
|
+
```typescript
|
|
43
|
+
const agent = new Agent({
|
|
44
|
+
name: "DebugAgent",
|
|
45
|
+
provider: anthropicProvider,
|
|
46
|
+
promptCache: { enabled: false },
|
|
47
|
+
});
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
## PromptSectionCache API
|
|
51
|
+
|
|
52
|
+
### `register(key, type, compute)`
|
|
53
|
+
|
|
54
|
+
Register a section with a unique key, type (`'static'` or `'dynamic'`), and a compute function.
|
|
55
|
+
|
|
56
|
+
```typescript
|
|
57
|
+
cache.register("agentMeta", "static", async () => {
|
|
58
|
+
return "## Agent Identity\nYou are MyAgent.";
|
|
59
|
+
});
|
|
60
|
+
|
|
61
|
+
cache.register("directives", "dynamic", () => {
|
|
62
|
+
return "## Directives\n- Address the user's question";
|
|
63
|
+
});
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
### `get(key)`
|
|
67
|
+
|
|
68
|
+
Retrieve a section's value. Static sections return the cached value when available; dynamic sections always recompute.
|
|
69
|
+
|
|
70
|
+
### `resolveAll()`
|
|
71
|
+
|
|
72
|
+
Resolve all registered sections in registration order. Returns `(string | null)[]`.
|
|
73
|
+
|
|
74
|
+
### `invalidate(key)`
|
|
75
|
+
|
|
76
|
+
Force a specific section to recompute on the next `resolveAll()` call.
|
|
77
|
+
|
|
78
|
+
```typescript
|
|
79
|
+
cache.invalidate("knowledgeBase");
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
### `invalidateAll()`
|
|
83
|
+
|
|
84
|
+
Force all sections to recompute. Called automatically on session change or `/clear`.
|
|
85
|
+
|
|
86
|
+
```typescript
|
|
87
|
+
cache.invalidateAll();
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
## Automatic Cache Invalidation
|
|
91
|
+
|
|
92
|
+
The framework invalidates relevant caches automatically when state changes:
|
|
93
|
+
|
|
94
|
+
| Event | Sections Invalidated |
|
|
95
|
+
|---|---|
|
|
96
|
+
| `agent.updateContext()` | `agentMeta`, `knowledgeBase` |
|
|
97
|
+
| Session change / clear | All sections (`invalidateAll()`) |
|
|
98
|
+
| Route switch | Route-dependent sections (active routes, route rules, route knowledge base) |
|
|
99
|
+
|
|
100
|
+
No manual cache management is needed for typical usage.
|
|
101
|
+
|
|
102
|
+
## Native History Format
|
|
103
|
+
|
|
104
|
+
History is now sent as native provider messages via `GenerateMessageInput.history` instead of being JSON-serialized into the system prompt. This saves tokens (no JSON overhead) and lets providers optimize for their native message format.
|
|
105
|
+
|
|
106
|
+
### Migration from `addInteractionHistory()`
|
|
107
|
+
|
|
108
|
+
The `PromptComposer.addInteractionHistory()` method is deprecated. If you were calling it directly:
|
|
109
|
+
|
|
110
|
+
**Before:**
|
|
111
|
+
```typescript
|
|
112
|
+
const pc = new PromptComposer(context);
|
|
113
|
+
await pc.addAgentMeta(agentOptions);
|
|
114
|
+
await pc.addInteractionHistory(history); // embedded in prompt string
|
|
115
|
+
await pc.addLastMessage(lastMessage);
|
|
116
|
+
const prompt = await pc.build();
|
|
117
|
+
|
|
118
|
+
const response = await provider.generateMessage({ prompt, history: [] });
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
**After:**
|
|
122
|
+
```typescript
|
|
123
|
+
const pc = new PromptComposer(context, cache);
|
|
124
|
+
await pc.addAgentMeta(agentOptions);
|
|
125
|
+
// No addInteractionHistory() — history flows natively
|
|
126
|
+
const prompt = await pc.build();
|
|
127
|
+
|
|
128
|
+
const response = await provider.generateMessage({ prompt, history });
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
The `addInteractionHistory()` method still works for backward compatibility but is marked `@deprecated` and will be removed in a future version.
|
|
132
|
+
|
|
133
|
+
## Manual Cache Usage
|
|
134
|
+
|
|
135
|
+
You can use `PromptSectionCache` directly for custom prompt pipelines:
|
|
136
|
+
|
|
137
|
+
```typescript
|
|
138
|
+
import { PromptSectionCache } from "@falai/agent";
|
|
139
|
+
|
|
140
|
+
const cache = new PromptSectionCache({ enabled: true });
|
|
141
|
+
|
|
142
|
+
cache.register("identity", "static", () => "You are a helpful assistant.");
|
|
143
|
+
cache.register("tools", "dynamic", () => "Available: search, calculate");
|
|
144
|
+
|
|
145
|
+
// First call: both sections computed
|
|
146
|
+
const sections1 = await cache.resolveAll(); // ["You are a helpful assistant.", "Available: search, calculate"]
|
|
147
|
+
|
|
148
|
+
// Second call: identity served from cache, tools recomputed
|
|
149
|
+
const sections2 = await cache.resolveAll();
|
|
150
|
+
|
|
151
|
+
// Invalidate a specific section
|
|
152
|
+
cache.invalidate("identity");
|
|
153
|
+
|
|
154
|
+
// Next call: identity recomputed, tools recomputed (always)
|
|
155
|
+
const sections3 = await cache.resolveAll();
|
|
156
|
+
```
|
|
157
|
+
|
|
158
|
+
## Key Properties
|
|
159
|
+
|
|
160
|
+
- **Static sections cache** — computed once per session, reused across turns until invalidated
|
|
161
|
+
- **Dynamic sections recompute** — always fresh on every `resolveAll()` call
|
|
162
|
+
- **Automatic invalidation** — context updates, session changes, and route switches trigger targeted invalidation
|
|
163
|
+
- **Configurable** — disable caching or mark specific keys as volatile
|
|
164
|
+
- **Backward compatible** — `addInteractionHistory()` still works, just deprecated
|
|
@@ -0,0 +1,223 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Context Compaction Example
|
|
3
|
+
*
|
|
4
|
+
* Demonstrates the CompactionEngine for managing conversation history size.
|
|
5
|
+
* Shows agent-level compaction config and how long conversations trigger
|
|
6
|
+
* automatic compaction through layered strategies.
|
|
7
|
+
*
|
|
8
|
+
* Key concepts:
|
|
9
|
+
* - Agent-level compaction configuration via `AgentCompactionConfig`
|
|
10
|
+
* - Token estimation using character-based heuristic (~4 chars/token)
|
|
11
|
+
* - Layered compaction strategies: tool_result_budget → micro_compact → auto_compact
|
|
12
|
+
* - Preservation of recent messages during compaction
|
|
13
|
+
* - Manual compaction via CompactionEngine API
|
|
14
|
+
*/
|
|
15
|
+
|
|
16
|
+
import {
|
|
17
|
+
Agent,
|
|
18
|
+
CompactionEngine,
|
|
19
|
+
GeminiProvider,
|
|
20
|
+
type HistoryItem,
|
|
21
|
+
type CompactionOptions,
|
|
22
|
+
} from "../../src/index";
|
|
23
|
+
|
|
24
|
+
// --- Agent-level compaction config ---
|
|
25
|
+
|
|
26
|
+
async function demonstrateAgentCompaction() {
|
|
27
|
+
console.log("=== Agent-Level Compaction Config ===\n");
|
|
28
|
+
|
|
29
|
+
const provider = new GeminiProvider({
|
|
30
|
+
apiKey: process.env.GEMINI_API_KEY || "demo-key",
|
|
31
|
+
model: "models/gemini-2.5-flash",
|
|
32
|
+
});
|
|
33
|
+
|
|
34
|
+
// Compaction is configured at the agent level.
|
|
35
|
+
// The agent validates options on construction and wires the CompactionEngine
|
|
36
|
+
// into the SessionManager so compaction happens transparently.
|
|
37
|
+
const agent = new Agent({
|
|
38
|
+
name: "LongConversationAgent",
|
|
39
|
+
description: "An agent that handles long conversations gracefully",
|
|
40
|
+
provider,
|
|
41
|
+
compaction: {
|
|
42
|
+
maxTokens: 100_000,
|
|
43
|
+
compactionThreshold: 0.8, // compact at 80% of budget
|
|
44
|
+
preserveRecentCount: 10, // always keep last 10 messages
|
|
45
|
+
maxToolResultChars: 5_000, // truncate tool results over 5k chars
|
|
46
|
+
enabled: true,
|
|
47
|
+
},
|
|
48
|
+
});
|
|
49
|
+
|
|
50
|
+
console.log("Agent created with compaction config:");
|
|
51
|
+
console.log(" maxTokens: 100,000");
|
|
52
|
+
console.log(" compactionThreshold: 0.8 (triggers at 80k tokens)");
|
|
53
|
+
console.log(" preserveRecentCount: 10");
|
|
54
|
+
console.log(" maxToolResultChars: 5,000");
|
|
55
|
+
console.log();
|
|
56
|
+
console.log("Compaction runs automatically in SessionManager when history grows.");
|
|
57
|
+
console.log("No manual intervention needed for typical usage.\n");
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
// --- Manual CompactionEngine usage ---
|
|
61
|
+
|
|
62
|
+
async function demonstrateManualCompaction() {
|
|
63
|
+
console.log("=== Manual CompactionEngine Usage ===\n");
|
|
64
|
+
|
|
65
|
+
// Build a synthetic history with large tool results
|
|
66
|
+
const history: HistoryItem[] = [
|
|
67
|
+
{ role: "user", content: "Analyze the codebase for security issues." },
|
|
68
|
+
{ role: "assistant", content: "I'll scan the files for common vulnerabilities." },
|
|
69
|
+
{ role: "tool", tool_call_id: "tc_1", name: "scan_files", content: "x".repeat(20_000) },
|
|
70
|
+
{ role: "assistant", content: "Found some issues. Let me check more files." },
|
|
71
|
+
{ role: "tool", tool_call_id: "tc_2", name: "scan_files", content: "y".repeat(15_000) },
|
|
72
|
+
{ role: "user", content: "What about SQL injection?" },
|
|
73
|
+
{ role: "assistant", content: "Let me search for raw SQL queries." },
|
|
74
|
+
{ role: "tool", tool_call_id: "tc_3", name: "search_code", content: "z".repeat(10_000) },
|
|
75
|
+
{ role: "user", content: "Summarize the findings." },
|
|
76
|
+
{ role: "assistant", content: "Here is a summary of the security audit." },
|
|
77
|
+
];
|
|
78
|
+
|
|
79
|
+
// 1. Token estimation
|
|
80
|
+
const tokens = CompactionEngine.estimateTokens(history);
|
|
81
|
+
console.log(`Estimated tokens: ${tokens}`);
|
|
82
|
+
console.log(`Total messages: ${history.length}\n`);
|
|
83
|
+
|
|
84
|
+
// 2. Tool result budgeting (no LLM call needed)
|
|
85
|
+
const budgeted = CompactionEngine.applyToolResultBudget(history, 5_000);
|
|
86
|
+
const budgetedTokens = CompactionEngine.estimateTokens(budgeted);
|
|
87
|
+
console.log("After tool result budget (maxChars=5000):");
|
|
88
|
+
console.log(` Tokens: ${tokens} → ${budgetedTokens}`);
|
|
89
|
+
|
|
90
|
+
for (let i = 0; i < budgeted.length; i++) {
|
|
91
|
+
if (budgeted[i].role === "tool") {
|
|
92
|
+
const truncated = budgeted[i].content.length < history[i].content.length;
|
|
93
|
+
console.log(` Message ${i} (tool): ${truncated ? "truncated" : "unchanged"} (${budgeted[i].content.length} chars)`);
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
console.log();
|
|
97
|
+
|
|
98
|
+
// 3. Full compaction with a mock provider
|
|
99
|
+
// In real usage you'd pass the agent's provider for LLM summarization.
|
|
100
|
+
// Here we show the layered strategy selection.
|
|
101
|
+
const mockProvider = {
|
|
102
|
+
generateMessage: async () => ({
|
|
103
|
+
content: "Security audit found 3 potential SQL injection points and 2 XSS vulnerabilities.",
|
|
104
|
+
toolCalls: [],
|
|
105
|
+
}),
|
|
106
|
+
};
|
|
107
|
+
|
|
108
|
+
const options: CompactionOptions = {
|
|
109
|
+
maxTokens: 5_000, // tight budget to force compaction
|
|
110
|
+
compactionThreshold: 0.8,
|
|
111
|
+
preserveRecentCount: 4,
|
|
112
|
+
maxToolResultChars: 2_000,
|
|
113
|
+
provider: mockProvider as any,
|
|
114
|
+
};
|
|
115
|
+
|
|
116
|
+
const result = await CompactionEngine.checkAndCompact(history, options);
|
|
117
|
+
|
|
118
|
+
console.log("Full compaction result:");
|
|
119
|
+
console.log(` Strategy: ${result.strategy}`);
|
|
120
|
+
console.log(` Estimated tokens: ${result.estimatedTokens}`);
|
|
121
|
+
console.log(` Messages compacted: ${result.messagesCompacted}`);
|
|
122
|
+
console.log(` History length: ${result.history.length} (was ${history.length})`);
|
|
123
|
+
if (result.summary) {
|
|
124
|
+
console.log(` Summary: "${result.summary}"`);
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
// --- Demonstrating compaction strategies ---
|
|
129
|
+
|
|
130
|
+
async function demonstrateStrategies() {
|
|
131
|
+
console.log("\n=== Compaction Strategy Ladder ===\n");
|
|
132
|
+
|
|
133
|
+
const smallHistory: HistoryItem[] = [
|
|
134
|
+
{ role: "user", content: "Hello" },
|
|
135
|
+
{ role: "assistant", content: "Hi there!" },
|
|
136
|
+
];
|
|
137
|
+
|
|
138
|
+
const mockProvider = {
|
|
139
|
+
generateMessage: async () => ({
|
|
140
|
+
content: "Conversation summary.",
|
|
141
|
+
toolCalls: [],
|
|
142
|
+
}),
|
|
143
|
+
};
|
|
144
|
+
|
|
145
|
+
const baseOptions: CompactionOptions = {
|
|
146
|
+
maxTokens: 10_000,
|
|
147
|
+
compactionThreshold: 0.8,
|
|
148
|
+
preserveRecentCount: 2,
|
|
149
|
+
maxToolResultChars: 1_000,
|
|
150
|
+
provider: mockProvider as any,
|
|
151
|
+
};
|
|
152
|
+
|
|
153
|
+
// Strategy: 'none' — history is well under budget
|
|
154
|
+
const r1 = await CompactionEngine.checkAndCompact(smallHistory, baseOptions);
|
|
155
|
+
console.log(`Small history (${CompactionEngine.estimateTokens(smallHistory)} tokens):`);
|
|
156
|
+
console.log(` → Strategy: ${r1.strategy}\n`);
|
|
157
|
+
|
|
158
|
+
// Strategy: 'tool_result_budget' — large tool results push over threshold
|
|
159
|
+
const mediumHistory: HistoryItem[] = [
|
|
160
|
+
{ role: "user", content: "Analyze this." },
|
|
161
|
+
{ role: "tool", tool_call_id: "tc_m1", name: "analyze", content: "a".repeat(30_000) },
|
|
162
|
+
{ role: "user", content: "Thanks." },
|
|
163
|
+
{ role: "assistant", content: "You're welcome." },
|
|
164
|
+
];
|
|
165
|
+
|
|
166
|
+
const r2 = await CompactionEngine.checkAndCompact(mediumHistory, {
|
|
167
|
+
...baseOptions,
|
|
168
|
+
maxTokens: 2_000,
|
|
169
|
+
});
|
|
170
|
+
console.log(`Medium history with large tool result (${CompactionEngine.estimateTokens(mediumHistory)} tokens):`);
|
|
171
|
+
console.log(` → Strategy: ${r2.strategy}`);
|
|
172
|
+
console.log(` → Tokens after: ${r2.estimatedTokens}\n`);
|
|
173
|
+
|
|
174
|
+
// Strategy: 'auto_compact' — many messages push well over budget
|
|
175
|
+
const longHistory: HistoryItem[] = Array.from({ length: 50 }, (_, i) => ({
|
|
176
|
+
role: (i % 2 === 0 ? "user" : "assistant") as "user" | "assistant",
|
|
177
|
+
content: `Message ${i}: ${"lorem ipsum ".repeat(100)}`,
|
|
178
|
+
}));
|
|
179
|
+
|
|
180
|
+
const r3 = await CompactionEngine.checkAndCompact(longHistory, {
|
|
181
|
+
...baseOptions,
|
|
182
|
+
maxTokens: 5_000,
|
|
183
|
+
});
|
|
184
|
+
console.log(`Long history (${CompactionEngine.estimateTokens(longHistory)} tokens, ${longHistory.length} messages):`);
|
|
185
|
+
console.log(` → Strategy: ${r3.strategy}`);
|
|
186
|
+
console.log(` → Tokens after: ${r3.estimatedTokens}`);
|
|
187
|
+
console.log(` → Messages compacted: ${r3.messagesCompacted}`);
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
// --- Validation demo ---
|
|
191
|
+
|
|
192
|
+
function demonstrateValidation() {
|
|
193
|
+
console.log("\n=== CompactionOptions Validation ===\n");
|
|
194
|
+
|
|
195
|
+
const invalidConfigs = [
|
|
196
|
+
{ label: "threshold too low (0.3)", opts: { compactionThreshold: 0.3, preserveRecentCount: 4, maxToolResultChars: 1000, maxTokens: 10000 } },
|
|
197
|
+
{ label: "threshold too high (0.99)", opts: { compactionThreshold: 0.99, preserveRecentCount: 4, maxToolResultChars: 1000, maxTokens: 10000 } },
|
|
198
|
+
{ label: "preserveRecentCount < 2", opts: { compactionThreshold: 0.8, preserveRecentCount: 1, maxToolResultChars: 1000, maxTokens: 10000 } },
|
|
199
|
+
{ label: "maxToolResultChars <= 0", opts: { compactionThreshold: 0.8, preserveRecentCount: 4, maxToolResultChars: 0, maxTokens: 10000 } },
|
|
200
|
+
];
|
|
201
|
+
|
|
202
|
+
for (const { label, opts } of invalidConfigs) {
|
|
203
|
+
try {
|
|
204
|
+
CompactionEngine.validateOptions(opts as any);
|
|
205
|
+
console.log(` ${label}: accepted (unexpected)`);
|
|
206
|
+
} catch (e) {
|
|
207
|
+
console.log(` ${label}: rejected — ${(e as Error).message}`);
|
|
208
|
+
}
|
|
209
|
+
}
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
async function main() {
|
|
213
|
+
await demonstrateAgentCompaction();
|
|
214
|
+
await demonstrateManualCompaction();
|
|
215
|
+
await demonstrateStrategies();
|
|
216
|
+
demonstrateValidation();
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
if (import.meta.url === `file://${process.argv[1]}`) {
|
|
220
|
+
main().catch(console.error);
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
export { main };
|