brownian-code 2026.2.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +97 -0
- package/bin/brownian +25 -0
- package/env.example +21 -0
- package/package.json +87 -0
- package/src/agent/agent.test.ts +414 -0
- package/src/agent/agent.ts +385 -0
- package/src/agent/index.ts +27 -0
- package/src/agent/prompts.ts +271 -0
- package/src/agent/scratchpad.test.ts +482 -0
- package/src/agent/scratchpad.ts +526 -0
- package/src/agent/token-counter.test.ts +59 -0
- package/src/agent/token-counter.ts +33 -0
- package/src/agent/types.ts +137 -0
- package/src/cli.tsx +385 -0
- package/src/commands/builtin.test.ts +271 -0
- package/src/commands/builtin.ts +200 -0
- package/src/commands/registry.test.ts +188 -0
- package/src/commands/registry.ts +111 -0
- package/src/commands/types.ts +64 -0
- package/src/components/AgentEventView.tsx +487 -0
- package/src/components/AnswerBox.tsx +81 -0
- package/src/components/ApiKeyPrompt.tsx +75 -0
- package/src/components/CommandMenu.test.tsx +64 -0
- package/src/components/CommandMenu.tsx +38 -0
- package/src/components/CursorText.tsx +43 -0
- package/src/components/DebugPanel.tsx +48 -0
- package/src/components/ErrorBox.test.tsx +58 -0
- package/src/components/ErrorBox.tsx +26 -0
- package/src/components/HelpView.test.tsx +70 -0
- package/src/components/HelpView.tsx +61 -0
- package/src/components/HistoryItemView.tsx +108 -0
- package/src/components/Input.tsx +193 -0
- package/src/components/Intro.test.tsx +59 -0
- package/src/components/Intro.tsx +35 -0
- package/src/components/ModelSelector.tsx +288 -0
- package/src/components/StatusBar.test.tsx +78 -0
- package/src/components/StatusBar.tsx +56 -0
- package/src/components/WorkingIndicator.tsx +133 -0
- package/src/components/index.ts +23 -0
- package/src/e2e/agent-flow.test.ts +378 -0
- package/src/evals/components/EvalApp.tsx +206 -0
- package/src/evals/components/EvalCurrentQuestion.tsx +42 -0
- package/src/evals/components/EvalProgress.tsx +33 -0
- package/src/evals/components/EvalRecentResults.tsx +63 -0
- package/src/evals/components/EvalStats.tsx +49 -0
- package/src/evals/components/index.ts +5 -0
- package/src/evals/dataset/crypto_agent.csv +16 -0
- package/src/evals/run.ts +355 -0
- package/src/gateway/channels/whatsapp/auth-store.ts +15 -0
- package/src/gateway/channels/whatsapp/inbound.ts +86 -0
- package/src/gateway/channels/whatsapp/login.ts +28 -0
- package/src/gateway/channels/whatsapp/outbound.ts +27 -0
- package/src/gateway/channels/whatsapp/session.ts +69 -0
- package/src/gateway/config.ts +81 -0
- package/src/gateway/index.ts +62 -0
- package/src/hooks/useAgentRunner.ts +317 -0
- package/src/hooks/useDebugLogs.ts +22 -0
- package/src/hooks/useInputHistory.ts +106 -0
- package/src/hooks/useModelSelection.ts +249 -0
- package/src/hooks/useTextBuffer.test.ts +121 -0
- package/src/hooks/useTextBuffer.ts +97 -0
- package/src/index.tsx +74 -0
- package/src/mcp/cache.ts +205 -0
- package/src/mcp/client.test.ts +126 -0
- package/src/mcp/client.ts +145 -0
- package/src/mcp/index.ts +2 -0
- package/src/model/llm.test.ts +158 -0
- package/src/model/llm.ts +233 -0
- package/src/providers.ts +94 -0
- package/src/skills/index.ts +17 -0
- package/src/skills/loader.ts +73 -0
- package/src/skills/registry.ts +125 -0
- package/src/skills/types.ts +31 -0
- package/src/test-utils/mocks.ts +110 -0
- package/src/theme.ts +21 -0
- package/src/tools/browser/browser.ts +357 -0
- package/src/tools/browser/index.ts +1 -0
- package/src/tools/crypto/hive-tools.ts +171 -0
- package/src/tools/crypto/index.ts +1 -0
- package/src/tools/descriptions/browser.ts +105 -0
- package/src/tools/descriptions/crypto-search.ts +58 -0
- package/src/tools/descriptions/index.ts +8 -0
- package/src/tools/descriptions/web-fetch.ts +44 -0
- package/src/tools/descriptions/web-search.ts +26 -0
- package/src/tools/fetch/cache.ts +95 -0
- package/src/tools/fetch/external-content.ts +200 -0
- package/src/tools/fetch/index.ts +1 -0
- package/src/tools/fetch/web-fetch-utils.ts +122 -0
- package/src/tools/fetch/web-fetch.ts +371 -0
- package/src/tools/index.ts +12 -0
- package/src/tools/registry.ts +130 -0
- package/src/tools/search/exa.ts +43 -0
- package/src/tools/search/index.ts +2 -0
- package/src/tools/search/tavily.ts +35 -0
- package/src/tools/skill.ts +62 -0
- package/src/tools/types.ts +53 -0
- package/src/utils/ai-message.ts +26 -0
- package/src/utils/config.ts +54 -0
- package/src/utils/cost-calculator.test.ts +101 -0
- package/src/utils/cost-calculator.ts +74 -0
- package/src/utils/env.ts +101 -0
- package/src/utils/error-classifier.test.ts +146 -0
- package/src/utils/error-classifier.ts +91 -0
- package/src/utils/in-memory-chat-history.test.ts +291 -0
- package/src/utils/in-memory-chat-history.ts +224 -0
- package/src/utils/index.ts +19 -0
- package/src/utils/input-key-handlers.test.ts +155 -0
- package/src/utils/input-key-handlers.ts +64 -0
- package/src/utils/logger.ts +67 -0
- package/src/utils/long-term-chat-history.ts +138 -0
- package/src/utils/markdown-table.ts +227 -0
- package/src/utils/ollama.ts +37 -0
- package/src/utils/progress-channel.ts +84 -0
- package/src/utils/text-navigation.test.ts +222 -0
- package/src/utils/text-navigation.ts +81 -0
- package/src/utils/thinking-verbs.ts +29 -0
- package/src/utils/tokens.test.ts +163 -0
- package/src/utils/tokens.ts +67 -0
- package/src/utils/tool-description.ts +88 -0
|
@@ -0,0 +1,526 @@
|
|
|
1
|
+
import { existsSync, mkdirSync, appendFileSync, readFileSync } from 'fs';
|
|
2
|
+
import { join } from 'path';
|
|
3
|
+
import { createHash } from 'crypto';
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* Record of a tool call for external consumers (e.g., DoneEvent)
|
|
7
|
+
*/
|
|
8
|
+
export interface ToolCallRecord {
|
|
9
|
+
tool: string;
|
|
10
|
+
args: Record<string, unknown>;
|
|
11
|
+
result: string;
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
/**
|
|
15
|
+
* Full context data for final answer generation
|
|
16
|
+
*/
|
|
17
|
+
export interface ToolContext {
|
|
18
|
+
toolName: string;
|
|
19
|
+
args: Record<string, unknown>;
|
|
20
|
+
result: string;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
export interface ScratchpadEntry {
|
|
24
|
+
type: 'init' | 'tool_result' | 'thinking';
|
|
25
|
+
timestamp: string;
|
|
26
|
+
// For init/thinking:
|
|
27
|
+
content?: string;
|
|
28
|
+
// For tool_result:
|
|
29
|
+
toolName?: string;
|
|
30
|
+
args?: Record<string, unknown>;
|
|
31
|
+
result?: unknown; // Stored as parsed object when possible, string otherwise
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
/**
|
|
35
|
+
* Tool call limit configuration
|
|
36
|
+
*/
|
|
37
|
+
export interface ToolLimitConfig {
|
|
38
|
+
/** Max calls per tool per query (default: 3) */
|
|
39
|
+
maxCallsPerTool: number;
|
|
40
|
+
/** Query similarity threshold (0-1, default: 0.7) */
|
|
41
|
+
similarityThreshold: number;
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
/**
|
|
45
|
+
* Status of tool usage for graceful exit mechanism
|
|
46
|
+
*/
|
|
47
|
+
export interface ToolUsageStatus {
|
|
48
|
+
toolName: string;
|
|
49
|
+
callCount: number;
|
|
50
|
+
maxCalls: number;
|
|
51
|
+
remainingCalls: number;
|
|
52
|
+
recentQueries: string[];
|
|
53
|
+
isBlocked: boolean;
|
|
54
|
+
blockReason?: string;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
/**
|
|
58
|
+
* Maximum characters stored per tool result.
|
|
59
|
+
* Prevents a single large API response (e.g., market data for 100+ coins)
|
|
60
|
+
* from blowing up context. ~50K chars ≈ 14K tokens.
|
|
61
|
+
*/
|
|
62
|
+
const MAX_TOOL_RESULT_CHARS = 50_000;
|
|
63
|
+
|
|
64
|
+
/** Default tool limit configuration */
|
|
65
|
+
const DEFAULT_LIMIT_CONFIG: ToolLimitConfig = {
|
|
66
|
+
maxCallsPerTool: 3,
|
|
67
|
+
similarityThreshold: 0.7,
|
|
68
|
+
};
|
|
69
|
+
|
|
70
|
+
/**
|
|
71
|
+
* Append-only scratchpad for tracking agent work on a query.
|
|
72
|
+
* Uses JSONL format (newline-delimited JSON) for resilient appending.
|
|
73
|
+
* Files are persisted in .brownian/scratchpad/ for debugging/history.
|
|
74
|
+
*
|
|
75
|
+
* This is the single source of truth for all agent work on a query.
|
|
76
|
+
*
|
|
77
|
+
* Includes soft limit warnings to guide the LLM:
|
|
78
|
+
* - Tool call counting with suggested limits (warnings, not blocks)
|
|
79
|
+
* - Query similarity detection to help prevent retry loops
|
|
80
|
+
*/
|
|
81
|
+
export class Scratchpad {
|
|
82
|
+
private readonly scratchpadDir = '.brownian/scratchpad';
|
|
83
|
+
private readonly filepath: string;
|
|
84
|
+
private readonly limitConfig: ToolLimitConfig;
|
|
85
|
+
|
|
86
|
+
// In-memory tracking for tool limits (also persisted in JSONL)
|
|
87
|
+
private toolCallCounts: Map<string, number> = new Map();
|
|
88
|
+
private toolQueries: Map<string, string[]> = new Map();
|
|
89
|
+
|
|
90
|
+
// In-memory tracking for Anthropic-style context clearing (JSONL file untouched)
|
|
91
|
+
// Stores indices of tool_result entries that have been cleared from context
|
|
92
|
+
private clearedToolIndices: Set<number> = new Set();
|
|
93
|
+
|
|
94
|
+
constructor(query: string, limitConfig?: Partial<ToolLimitConfig>) {
|
|
95
|
+
this.limitConfig = { ...DEFAULT_LIMIT_CONFIG, ...limitConfig };
|
|
96
|
+
|
|
97
|
+
if (!existsSync(this.scratchpadDir)) {
|
|
98
|
+
mkdirSync(this.scratchpadDir, { recursive: true });
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
const hash = createHash('md5').update(query).digest('hex').slice(0, 12);
|
|
102
|
+
const now = new Date();
|
|
103
|
+
const timestamp = now.toISOString()
|
|
104
|
+
.slice(0, 19) // "2026-01-21T15:30:45"
|
|
105
|
+
.replace('T', '-') // "2026-01-21-15:30:45"
|
|
106
|
+
.replace(/:/g, ''); // "2026-01-21-153045"
|
|
107
|
+
this.filepath = join(this.scratchpadDir, `${timestamp}_${hash}.jsonl`);
|
|
108
|
+
|
|
109
|
+
// Write initial entry with the query
|
|
110
|
+
this.append({ type: 'init', content: query, timestamp: new Date().toISOString() });
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
/**
|
|
114
|
+
* Add a complete tool result with full data.
|
|
115
|
+
* Parses JSON strings to store as objects for cleaner JSONL output.
|
|
116
|
+
* Anthropic-style: no inline summarization, full results preserved.
|
|
117
|
+
*/
|
|
118
|
+
addToolResult(
|
|
119
|
+
toolName: string,
|
|
120
|
+
args: Record<string, unknown>,
|
|
121
|
+
result: string
|
|
122
|
+
): void {
|
|
123
|
+
let stored = result;
|
|
124
|
+
if (result.length > MAX_TOOL_RESULT_CHARS) {
|
|
125
|
+
stored = result.slice(0, MAX_TOOL_RESULT_CHARS) +
|
|
126
|
+
`\n...[truncated: ${result.length.toLocaleString()} → ${MAX_TOOL_RESULT_CHARS.toLocaleString()} chars]`;
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
this.append({
|
|
130
|
+
type: 'tool_result',
|
|
131
|
+
timestamp: new Date().toISOString(),
|
|
132
|
+
toolName,
|
|
133
|
+
args,
|
|
134
|
+
result: this.parseResultSafely(stored),
|
|
135
|
+
});
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
// ============================================================================
|
|
139
|
+
// Tool Limit / Graceful Exit Methods
|
|
140
|
+
// ============================================================================
|
|
141
|
+
|
|
142
|
+
/**
|
|
143
|
+
* Check if a tool call can proceed. Returns status with warning if limits exceeded.
|
|
144
|
+
* Call this BEFORE executing a tool to help prevent retry loops.
|
|
145
|
+
* Note: Always allows the call but provides warnings to guide the LLM.
|
|
146
|
+
*/
|
|
147
|
+
canCallTool(toolName: string, query?: string): { allowed: boolean; warning?: string } {
|
|
148
|
+
const currentCount = this.toolCallCounts.get(toolName) ?? 0;
|
|
149
|
+
const maxCalls = this.limitConfig.maxCallsPerTool;
|
|
150
|
+
|
|
151
|
+
// Check if over the suggested limit - warn but allow
|
|
152
|
+
if (currentCount >= maxCalls) {
|
|
153
|
+
return {
|
|
154
|
+
allowed: true,
|
|
155
|
+
warning: `Tool '${toolName}' has been called ${currentCount} times (suggested limit: ${maxCalls}). ` +
|
|
156
|
+
`If previous calls didn't return the needed data, consider: ` +
|
|
157
|
+
`(1) trying a different tool, (2) using different search terms, or ` +
|
|
158
|
+
`(3) proceeding with what you have and noting any data gaps to the user.`,
|
|
159
|
+
};
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
// Check query similarity if query provided
|
|
163
|
+
if (query) {
|
|
164
|
+
const previousQueries = this.toolQueries.get(toolName) ?? [];
|
|
165
|
+
const similarQuery = this.findSimilarQuery(query, previousQueries);
|
|
166
|
+
|
|
167
|
+
if (similarQuery) {
|
|
168
|
+
// Allow but warn - the LLM should know it's repeating
|
|
169
|
+
const remaining = maxCalls - currentCount;
|
|
170
|
+
return {
|
|
171
|
+
allowed: true,
|
|
172
|
+
warning: `This query is very similar to a previous '${toolName}' call. ` +
|
|
173
|
+
`You have ${remaining} attempt(s) before reaching the suggested limit. ` +
|
|
174
|
+
`If the tool isn't returning useful results, consider: ` +
|
|
175
|
+
`(1) trying a different tool, (2) using different search terms, or ` +
|
|
176
|
+
`(3) acknowledging the data limitation to the user.`,
|
|
177
|
+
};
|
|
178
|
+
}
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
// Check if approaching limit (1 call remaining)
|
|
182
|
+
if (currentCount === maxCalls - 1) {
|
|
183
|
+
return {
|
|
184
|
+
allowed: true,
|
|
185
|
+
warning: `You are approaching the suggested limit for '${toolName}' (${currentCount + 1}/${maxCalls}). ` +
|
|
186
|
+
`If this doesn't return the needed data, consider trying a different approach.`,
|
|
187
|
+
};
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
return { allowed: true };
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
/**
|
|
194
|
+
* Record a tool call attempt. Call this AFTER the tool executes successfully.
|
|
195
|
+
*/
|
|
196
|
+
recordToolCall(toolName: string, query?: string): void {
|
|
197
|
+
// Update call count
|
|
198
|
+
const currentCount = this.toolCallCounts.get(toolName) ?? 0;
|
|
199
|
+
this.toolCallCounts.set(toolName, currentCount + 1);
|
|
200
|
+
|
|
201
|
+
// Track query if provided
|
|
202
|
+
if (query) {
|
|
203
|
+
const queries = this.toolQueries.get(toolName) ?? [];
|
|
204
|
+
queries.push(query);
|
|
205
|
+
this.toolQueries.set(toolName, queries);
|
|
206
|
+
}
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
/**
|
|
210
|
+
* Get usage status for all tools that have been called.
|
|
211
|
+
* Used to inject tool attempt status into prompts.
|
|
212
|
+
*/
|
|
213
|
+
getToolUsageStatus(): ToolUsageStatus[] {
|
|
214
|
+
const statuses: ToolUsageStatus[] = [];
|
|
215
|
+
|
|
216
|
+
for (const [toolName, callCount] of this.toolCallCounts) {
|
|
217
|
+
const maxCalls = this.limitConfig.maxCallsPerTool;
|
|
218
|
+
const remainingCalls = Math.max(0, maxCalls - callCount);
|
|
219
|
+
const recentQueries = this.toolQueries.get(toolName) ?? [];
|
|
220
|
+
const overLimit = callCount >= maxCalls;
|
|
221
|
+
|
|
222
|
+
statuses.push({
|
|
223
|
+
toolName,
|
|
224
|
+
callCount,
|
|
225
|
+
maxCalls,
|
|
226
|
+
remainingCalls,
|
|
227
|
+
recentQueries: recentQueries.slice(-3), // Last 3 queries
|
|
228
|
+
isBlocked: false, // Never block, just warn
|
|
229
|
+
blockReason: overLimit ? `Over suggested limit of ${maxCalls} calls` : undefined,
|
|
230
|
+
});
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
return statuses;
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
/**
|
|
237
|
+
* Format tool usage status for injection into prompts.
|
|
238
|
+
*/
|
|
239
|
+
formatToolUsageForPrompt(): string | null {
|
|
240
|
+
const statuses = this.getToolUsageStatus();
|
|
241
|
+
|
|
242
|
+
if (statuses.length === 0) {
|
|
243
|
+
return null;
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
const lines = statuses.map(s => {
|
|
247
|
+
const status = s.callCount >= s.maxCalls
|
|
248
|
+
? `${s.callCount} calls (over suggested limit of ${s.maxCalls})`
|
|
249
|
+
: `${s.callCount}/${s.maxCalls} calls`;
|
|
250
|
+
return `- ${s.toolName}: ${status}`;
|
|
251
|
+
});
|
|
252
|
+
|
|
253
|
+
return `## Tool Usage This Query\n\n${lines.join('\n')}\n\n` +
|
|
254
|
+
`Note: If a tool isn't returning useful results after several attempts, consider trying a different tool/approach.`;
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
/**
|
|
258
|
+
* Check if a query is too similar to previous queries.
|
|
259
|
+
* Uses word overlap similarity (Jaccard-like).
|
|
260
|
+
*/
|
|
261
|
+
private findSimilarQuery(newQuery: string, previousQueries: string[]): string | null {
|
|
262
|
+
const newWords = this.tokenize(newQuery);
|
|
263
|
+
|
|
264
|
+
for (const prevQuery of previousQueries) {
|
|
265
|
+
const prevWords = this.tokenize(prevQuery);
|
|
266
|
+
const similarity = this.calculateSimilarity(newWords, prevWords);
|
|
267
|
+
|
|
268
|
+
if (similarity >= this.limitConfig.similarityThreshold) {
|
|
269
|
+
return prevQuery;
|
|
270
|
+
}
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
return null;
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
/**
|
|
277
|
+
* Tokenize a query into normalized words for similarity comparison.
|
|
278
|
+
*/
|
|
279
|
+
private tokenize(query: string): Set<string> {
|
|
280
|
+
return new Set(
|
|
281
|
+
query
|
|
282
|
+
.toLowerCase()
|
|
283
|
+
.replace(/[^\w\s]/g, ' ')
|
|
284
|
+
.split(/\s+/)
|
|
285
|
+
.filter(w => w.length > 2) // Skip very short words
|
|
286
|
+
);
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
/**
|
|
290
|
+
* Calculate word overlap similarity between two word sets.
|
|
291
|
+
*/
|
|
292
|
+
private calculateSimilarity(set1: Set<string>, set2: Set<string>): number {
|
|
293
|
+
if (set1.size === 0 || set2.size === 0) return 0;
|
|
294
|
+
|
|
295
|
+
const intersection = [...set1].filter(w => set2.has(w)).length;
|
|
296
|
+
const union = new Set([...set1, ...set2]).size;
|
|
297
|
+
|
|
298
|
+
return intersection / union; // Jaccard similarity
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
/**
|
|
302
|
+
* Safely parse a result string as JSON if possible.
|
|
303
|
+
* Returns the parsed object if valid JSON, otherwise returns the original string.
|
|
304
|
+
*/
|
|
305
|
+
private parseResultSafely(result: string): unknown {
|
|
306
|
+
try {
|
|
307
|
+
return JSON.parse(result);
|
|
308
|
+
} catch {
|
|
309
|
+
// Not valid JSON, return as-is (e.g., error messages, plain text)
|
|
310
|
+
return result;
|
|
311
|
+
}
|
|
312
|
+
}
|
|
313
|
+
|
|
314
|
+
/**
|
|
315
|
+
* Append thinking/reasoning
|
|
316
|
+
*/
|
|
317
|
+
addThinking(thought: string): void {
|
|
318
|
+
this.append({ type: 'thinking', content: thought, timestamp: new Date().toISOString() });
|
|
319
|
+
}
|
|
320
|
+
|
|
321
|
+
/**
|
|
322
|
+
* Get full tool results formatted for the iteration prompt.
|
|
323
|
+
* Anthropic-style: full results in context, excluding cleared entries.
|
|
324
|
+
* Does NOT modify the JSONL file - clearing is in-memory only.
|
|
325
|
+
*/
|
|
326
|
+
getToolResults(): string {
|
|
327
|
+
const entries = this.readEntries();
|
|
328
|
+
let toolResultIndex = 0;
|
|
329
|
+
|
|
330
|
+
const formattedResults: string[] = [];
|
|
331
|
+
for (const entry of entries) {
|
|
332
|
+
if (entry.type !== 'tool_result' || !entry.toolName) continue;
|
|
333
|
+
|
|
334
|
+
// Skip entries that have been cleared from context (in-memory only)
|
|
335
|
+
if (this.clearedToolIndices.has(toolResultIndex)) {
|
|
336
|
+
formattedResults.push(`[Tool result #${toolResultIndex + 1} cleared from context]`);
|
|
337
|
+
toolResultIndex++;
|
|
338
|
+
continue;
|
|
339
|
+
}
|
|
340
|
+
|
|
341
|
+
const argsStr = entry.args
|
|
342
|
+
? Object.entries(entry.args).map(([k, v]) => `${k}=${v}`).join(', ')
|
|
343
|
+
: '';
|
|
344
|
+
const resultStr = this.stringifyResult(entry.result);
|
|
345
|
+
formattedResults.push(`### ${entry.toolName}(${argsStr})\n${resultStr}`);
|
|
346
|
+
toolResultIndex++;
|
|
347
|
+
}
|
|
348
|
+
|
|
349
|
+
return formattedResults.join('\n\n');
|
|
350
|
+
}
|
|
351
|
+
|
|
352
|
+
/**
|
|
353
|
+
* Get full tool results as ToolContext array (for final answer generation).
|
|
354
|
+
* Excludes cleared entries.
|
|
355
|
+
*/
|
|
356
|
+
getActiveToolResults(): ToolContext[] {
|
|
357
|
+
const entries = this.readEntries();
|
|
358
|
+
let toolResultIndex = 0;
|
|
359
|
+
|
|
360
|
+
const results: ToolContext[] = [];
|
|
361
|
+
for (const entry of entries) {
|
|
362
|
+
if (entry.type !== 'tool_result' || !entry.toolName) continue;
|
|
363
|
+
|
|
364
|
+
// Skip cleared entries
|
|
365
|
+
if (!this.clearedToolIndices.has(toolResultIndex)) {
|
|
366
|
+
results.push({
|
|
367
|
+
toolName: entry.toolName,
|
|
368
|
+
args: entry.args!,
|
|
369
|
+
result: this.stringifyResult(entry.result),
|
|
370
|
+
});
|
|
371
|
+
}
|
|
372
|
+
toolResultIndex++;
|
|
373
|
+
}
|
|
374
|
+
|
|
375
|
+
return results;
|
|
376
|
+
}
|
|
377
|
+
|
|
378
|
+
/**
|
|
379
|
+
* Clear oldest tool results from context (in-memory only).
|
|
380
|
+
* Anthropic-style: removes oldest tool results, keeping most recent N.
|
|
381
|
+
* The JSONL file is NOT modified - this only affects what gets sent to the LLM.
|
|
382
|
+
*
|
|
383
|
+
* @param keepCount - Number of most recent tool results to keep
|
|
384
|
+
* @returns Number of tool results that were cleared
|
|
385
|
+
*/
|
|
386
|
+
clearOldestToolResults(keepCount: number): number {
|
|
387
|
+
const entries = this.readEntries();
|
|
388
|
+
const toolResultIndices: number[] = [];
|
|
389
|
+
|
|
390
|
+
let index = 0;
|
|
391
|
+
for (const entry of entries) {
|
|
392
|
+
if (entry.type === 'tool_result') {
|
|
393
|
+
// Only consider entries not already cleared
|
|
394
|
+
if (!this.clearedToolIndices.has(index)) {
|
|
395
|
+
toolResultIndices.push(index);
|
|
396
|
+
}
|
|
397
|
+
index++;
|
|
398
|
+
}
|
|
399
|
+
}
|
|
400
|
+
|
|
401
|
+
// Calculate how many to clear
|
|
402
|
+
const toClearCount = Math.max(0, toolResultIndices.length - keepCount);
|
|
403
|
+
|
|
404
|
+
if (toClearCount === 0) return 0;
|
|
405
|
+
|
|
406
|
+
// Clear oldest entries (first N indices)
|
|
407
|
+
for (let i = 0; i < toClearCount; i++) {
|
|
408
|
+
this.clearedToolIndices.add(toolResultIndices[i]);
|
|
409
|
+
}
|
|
410
|
+
|
|
411
|
+
return toClearCount;
|
|
412
|
+
}
|
|
413
|
+
|
|
414
|
+
/**
|
|
415
|
+
* Get count of active (non-cleared) tool results.
|
|
416
|
+
*/
|
|
417
|
+
getActiveToolResultCount(): number {
|
|
418
|
+
const entries = this.readEntries();
|
|
419
|
+
let count = 0;
|
|
420
|
+
let index = 0;
|
|
421
|
+
|
|
422
|
+
for (const entry of entries) {
|
|
423
|
+
if (entry.type === 'tool_result') {
|
|
424
|
+
if (!this.clearedToolIndices.has(index)) {
|
|
425
|
+
count++;
|
|
426
|
+
}
|
|
427
|
+
index++;
|
|
428
|
+
}
|
|
429
|
+
}
|
|
430
|
+
|
|
431
|
+
return count;
|
|
432
|
+
}
|
|
433
|
+
|
|
434
|
+
/**
|
|
435
|
+
* Get tool call records for DoneEvent (external consumers)
|
|
436
|
+
*/
|
|
437
|
+
getToolCallRecords(): ToolCallRecord[] {
|
|
438
|
+
return this.readEntries()
|
|
439
|
+
.filter(e => e.type === 'tool_result' && e.toolName)
|
|
440
|
+
.map(e => ({
|
|
441
|
+
tool: e.toolName!,
|
|
442
|
+
args: e.args!,
|
|
443
|
+
result: this.stringifyResult(e.result),
|
|
444
|
+
}));
|
|
445
|
+
}
|
|
446
|
+
|
|
447
|
+
/**
|
|
448
|
+
* Get full contexts for final answer generation.
|
|
449
|
+
* Returns all tool results (including cleared ones) for comprehensive final answer.
|
|
450
|
+
*/
|
|
451
|
+
getFullContexts(): ToolContext[] {
|
|
452
|
+
return this.readEntries()
|
|
453
|
+
.filter(e => e.type === 'tool_result' && e.toolName && e.result)
|
|
454
|
+
.map(e => ({
|
|
455
|
+
toolName: e.toolName!,
|
|
456
|
+
args: e.args!,
|
|
457
|
+
result: this.stringifyResult(e.result),
|
|
458
|
+
}));
|
|
459
|
+
}
|
|
460
|
+
|
|
461
|
+
/**
|
|
462
|
+
* Convert a result back to string for API compatibility.
|
|
463
|
+
* If already a string, returns as-is. Otherwise JSON stringifies.
|
|
464
|
+
*/
|
|
465
|
+
private stringifyResult(result: unknown): string {
|
|
466
|
+
if (typeof result === 'string') {
|
|
467
|
+
return result;
|
|
468
|
+
}
|
|
469
|
+
return JSON.stringify(result);
|
|
470
|
+
}
|
|
471
|
+
|
|
472
|
+
/**
|
|
473
|
+
* Check if any tool results have been recorded
|
|
474
|
+
*/
|
|
475
|
+
hasToolResults(): boolean {
|
|
476
|
+
return this.readEntries().some(e => e.type === 'tool_result');
|
|
477
|
+
}
|
|
478
|
+
|
|
479
|
+
/**
|
|
480
|
+
* Check if a skill has already been executed in this query.
|
|
481
|
+
* Used for deduplication - each skill should only run once per query.
|
|
482
|
+
*/
|
|
483
|
+
hasExecutedSkill(skillName: string): boolean {
|
|
484
|
+
return this.readEntries().some(
|
|
485
|
+
e => e.type === 'tool_result' && e.toolName === 'skill' && e.args?.skill === skillName
|
|
486
|
+
);
|
|
487
|
+
}
|
|
488
|
+
|
|
489
|
+
/**
|
|
490
|
+
* Append-only write
|
|
491
|
+
*/
|
|
492
|
+
private append(entry: ScratchpadEntry): void {
|
|
493
|
+
appendFileSync(this.filepath, JSON.stringify(entry) + '\n');
|
|
494
|
+
}
|
|
495
|
+
|
|
496
|
+
/**
|
|
497
|
+
* Parse and validate a single JSONL line. Returns null for malformed or invalid entries.
|
|
498
|
+
*/
|
|
499
|
+
private parseLine(line: string): ScratchpadEntry | null {
|
|
500
|
+
try {
|
|
501
|
+
const parsed = JSON.parse(line);
|
|
502
|
+
return parsed && typeof parsed === 'object' && 'type' in parsed && 'timestamp' in parsed
|
|
503
|
+
? (parsed as ScratchpadEntry)
|
|
504
|
+
: null;
|
|
505
|
+
} catch {
|
|
506
|
+
return null;
|
|
507
|
+
}
|
|
508
|
+
}
|
|
509
|
+
|
|
510
|
+
/**
|
|
511
|
+
* Read all entries from the log.
|
|
512
|
+
* Skips malformed or corrupt lines (partial writes, disk corruption) to avoid
|
|
513
|
+
* a single bad line crashing getToolSummaries, getFullContexts, etc.
|
|
514
|
+
*/
|
|
515
|
+
private readEntries(): ScratchpadEntry[] {
|
|
516
|
+
if (!existsSync(this.filepath)) {
|
|
517
|
+
return [];
|
|
518
|
+
}
|
|
519
|
+
|
|
520
|
+
return readFileSync(this.filepath, 'utf-8')
|
|
521
|
+
.split('\n')
|
|
522
|
+
.filter((line) => line.trim())
|
|
523
|
+
.map((line) => this.parseLine(line))
|
|
524
|
+
.filter((entry): entry is ScratchpadEntry => entry !== null);
|
|
525
|
+
}
|
|
526
|
+
}
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
import { describe, test, expect } from 'bun:test';
|
|
2
|
+
import { TokenCounter } from './token-counter.js';
|
|
3
|
+
|
|
4
|
+
describe('TokenCounter', () => {
|
|
5
|
+
test('add(undefined) is a no-op', () => {
|
|
6
|
+
const counter = new TokenCounter();
|
|
7
|
+
counter.add(undefined);
|
|
8
|
+
expect(counter.getUsage()).toBeUndefined();
|
|
9
|
+
});
|
|
10
|
+
|
|
11
|
+
test('add() accumulates usage correctly', () => {
|
|
12
|
+
const counter = new TokenCounter();
|
|
13
|
+
counter.add({ inputTokens: 100, outputTokens: 50, totalTokens: 150 });
|
|
14
|
+
counter.add({ inputTokens: 200, outputTokens: 100, totalTokens: 300 });
|
|
15
|
+
|
|
16
|
+
const usage = counter.getUsage();
|
|
17
|
+
expect(usage).toEqual({
|
|
18
|
+
inputTokens: 300,
|
|
19
|
+
outputTokens: 150,
|
|
20
|
+
totalTokens: 450,
|
|
21
|
+
});
|
|
22
|
+
});
|
|
23
|
+
|
|
24
|
+
test('getUsage() returns undefined when zero tokens', () => {
|
|
25
|
+
const counter = new TokenCounter();
|
|
26
|
+
expect(counter.getUsage()).toBeUndefined();
|
|
27
|
+
});
|
|
28
|
+
|
|
29
|
+
test('getUsage() returns a copy (not internal reference)', () => {
|
|
30
|
+
const counter = new TokenCounter();
|
|
31
|
+
counter.add({ inputTokens: 10, outputTokens: 5, totalTokens: 15 });
|
|
32
|
+
|
|
33
|
+
const usage1 = counter.getUsage()!;
|
|
34
|
+
const usage2 = counter.getUsage()!;
|
|
35
|
+
expect(usage1).toEqual(usage2);
|
|
36
|
+
expect(usage1).not.toBe(usage2); // different objects
|
|
37
|
+
});
|
|
38
|
+
|
|
39
|
+
test('getTokensPerSecond() calculates correctly', () => {
|
|
40
|
+
const counter = new TokenCounter();
|
|
41
|
+
counter.add({ inputTokens: 500, outputTokens: 500, totalTokens: 1000 });
|
|
42
|
+
|
|
43
|
+
// 1000 tokens / (2000ms / 1000) = 500 tokens/sec
|
|
44
|
+
expect(counter.getTokensPerSecond(2000)).toBe(500);
|
|
45
|
+
});
|
|
46
|
+
|
|
47
|
+
test('getTokensPerSecond() returns undefined for zero tokens', () => {
|
|
48
|
+
const counter = new TokenCounter();
|
|
49
|
+
expect(counter.getTokensPerSecond(1000)).toBeUndefined();
|
|
50
|
+
});
|
|
51
|
+
|
|
52
|
+
test('getTokensPerSecond() returns undefined for zero/negative time', () => {
|
|
53
|
+
const counter = new TokenCounter();
|
|
54
|
+
counter.add({ inputTokens: 100, outputTokens: 50, totalTokens: 150 });
|
|
55
|
+
|
|
56
|
+
expect(counter.getTokensPerSecond(0)).toBeUndefined();
|
|
57
|
+
expect(counter.getTokensPerSecond(-100)).toBeUndefined();
|
|
58
|
+
});
|
|
59
|
+
});
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
import type { TokenUsage } from './types.js';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Tracks token usage across multiple LLM calls.
|
|
5
|
+
*/
|
|
6
|
+
export class TokenCounter {
|
|
7
|
+
private usage: TokenUsage = { inputTokens: 0, outputTokens: 0, totalTokens: 0 };
|
|
8
|
+
|
|
9
|
+
/**
|
|
10
|
+
* Add usage from an LLM call to the running total.
|
|
11
|
+
*/
|
|
12
|
+
add(usage?: TokenUsage): void {
|
|
13
|
+
if (!usage) return;
|
|
14
|
+
this.usage.inputTokens += usage.inputTokens;
|
|
15
|
+
this.usage.outputTokens += usage.outputTokens;
|
|
16
|
+
this.usage.totalTokens += usage.totalTokens;
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
/**
|
|
20
|
+
* Get the accumulated token usage, or undefined if no tokens were tracked.
|
|
21
|
+
*/
|
|
22
|
+
getUsage(): TokenUsage | undefined {
|
|
23
|
+
return this.usage.totalTokens > 0 ? { ...this.usage } : undefined;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
/**
|
|
27
|
+
* Calculate tokens per second given elapsed time in milliseconds.
|
|
28
|
+
*/
|
|
29
|
+
getTokensPerSecond(elapsedMs: number): number | undefined {
|
|
30
|
+
if (this.usage.totalTokens === 0 || elapsedMs <= 0) return undefined;
|
|
31
|
+
return this.usage.totalTokens / (elapsedMs / 1000);
|
|
32
|
+
}
|
|
33
|
+
}
|