brownian-code 2026.2.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +97 -0
- package/bin/brownian +25 -0
- package/env.example +21 -0
- package/package.json +87 -0
- package/src/agent/agent.test.ts +414 -0
- package/src/agent/agent.ts +385 -0
- package/src/agent/index.ts +27 -0
- package/src/agent/prompts.ts +271 -0
- package/src/agent/scratchpad.test.ts +482 -0
- package/src/agent/scratchpad.ts +526 -0
- package/src/agent/token-counter.test.ts +59 -0
- package/src/agent/token-counter.ts +33 -0
- package/src/agent/types.ts +137 -0
- package/src/cli.tsx +385 -0
- package/src/commands/builtin.test.ts +271 -0
- package/src/commands/builtin.ts +200 -0
- package/src/commands/registry.test.ts +188 -0
- package/src/commands/registry.ts +111 -0
- package/src/commands/types.ts +64 -0
- package/src/components/AgentEventView.tsx +487 -0
- package/src/components/AnswerBox.tsx +81 -0
- package/src/components/ApiKeyPrompt.tsx +75 -0
- package/src/components/CommandMenu.test.tsx +64 -0
- package/src/components/CommandMenu.tsx +38 -0
- package/src/components/CursorText.tsx +43 -0
- package/src/components/DebugPanel.tsx +48 -0
- package/src/components/ErrorBox.test.tsx +58 -0
- package/src/components/ErrorBox.tsx +26 -0
- package/src/components/HelpView.test.tsx +70 -0
- package/src/components/HelpView.tsx +61 -0
- package/src/components/HistoryItemView.tsx +108 -0
- package/src/components/Input.tsx +193 -0
- package/src/components/Intro.test.tsx +59 -0
- package/src/components/Intro.tsx +35 -0
- package/src/components/ModelSelector.tsx +288 -0
- package/src/components/StatusBar.test.tsx +78 -0
- package/src/components/StatusBar.tsx +56 -0
- package/src/components/WorkingIndicator.tsx +133 -0
- package/src/components/index.ts +23 -0
- package/src/e2e/agent-flow.test.ts +378 -0
- package/src/evals/components/EvalApp.tsx +206 -0
- package/src/evals/components/EvalCurrentQuestion.tsx +42 -0
- package/src/evals/components/EvalProgress.tsx +33 -0
- package/src/evals/components/EvalRecentResults.tsx +63 -0
- package/src/evals/components/EvalStats.tsx +49 -0
- package/src/evals/components/index.ts +5 -0
- package/src/evals/dataset/crypto_agent.csv +16 -0
- package/src/evals/run.ts +355 -0
- package/src/gateway/channels/whatsapp/auth-store.ts +15 -0
- package/src/gateway/channels/whatsapp/inbound.ts +86 -0
- package/src/gateway/channels/whatsapp/login.ts +28 -0
- package/src/gateway/channels/whatsapp/outbound.ts +27 -0
- package/src/gateway/channels/whatsapp/session.ts +69 -0
- package/src/gateway/config.ts +81 -0
- package/src/gateway/index.ts +62 -0
- package/src/hooks/useAgentRunner.ts +317 -0
- package/src/hooks/useDebugLogs.ts +22 -0
- package/src/hooks/useInputHistory.ts +106 -0
- package/src/hooks/useModelSelection.ts +249 -0
- package/src/hooks/useTextBuffer.test.ts +121 -0
- package/src/hooks/useTextBuffer.ts +97 -0
- package/src/index.tsx +74 -0
- package/src/mcp/cache.ts +205 -0
- package/src/mcp/client.test.ts +126 -0
- package/src/mcp/client.ts +145 -0
- package/src/mcp/index.ts +2 -0
- package/src/model/llm.test.ts +158 -0
- package/src/model/llm.ts +233 -0
- package/src/providers.ts +94 -0
- package/src/skills/index.ts +17 -0
- package/src/skills/loader.ts +73 -0
- package/src/skills/registry.ts +125 -0
- package/src/skills/types.ts +31 -0
- package/src/test-utils/mocks.ts +110 -0
- package/src/theme.ts +21 -0
- package/src/tools/browser/browser.ts +357 -0
- package/src/tools/browser/index.ts +1 -0
- package/src/tools/crypto/hive-tools.ts +171 -0
- package/src/tools/crypto/index.ts +1 -0
- package/src/tools/descriptions/browser.ts +105 -0
- package/src/tools/descriptions/crypto-search.ts +58 -0
- package/src/tools/descriptions/index.ts +8 -0
- package/src/tools/descriptions/web-fetch.ts +44 -0
- package/src/tools/descriptions/web-search.ts +26 -0
- package/src/tools/fetch/cache.ts +95 -0
- package/src/tools/fetch/external-content.ts +200 -0
- package/src/tools/fetch/index.ts +1 -0
- package/src/tools/fetch/web-fetch-utils.ts +122 -0
- package/src/tools/fetch/web-fetch.ts +371 -0
- package/src/tools/index.ts +12 -0
- package/src/tools/registry.ts +130 -0
- package/src/tools/search/exa.ts +43 -0
- package/src/tools/search/index.ts +2 -0
- package/src/tools/search/tavily.ts +35 -0
- package/src/tools/skill.ts +62 -0
- package/src/tools/types.ts +53 -0
- package/src/utils/ai-message.ts +26 -0
- package/src/utils/config.ts +54 -0
- package/src/utils/cost-calculator.test.ts +101 -0
- package/src/utils/cost-calculator.ts +74 -0
- package/src/utils/env.ts +101 -0
- package/src/utils/error-classifier.test.ts +146 -0
- package/src/utils/error-classifier.ts +91 -0
- package/src/utils/in-memory-chat-history.test.ts +291 -0
- package/src/utils/in-memory-chat-history.ts +224 -0
- package/src/utils/index.ts +19 -0
- package/src/utils/input-key-handlers.test.ts +155 -0
- package/src/utils/input-key-handlers.ts +64 -0
- package/src/utils/logger.ts +67 -0
- package/src/utils/long-term-chat-history.ts +138 -0
- package/src/utils/markdown-table.ts +227 -0
- package/src/utils/ollama.ts +37 -0
- package/src/utils/progress-channel.ts +84 -0
- package/src/utils/text-navigation.test.ts +222 -0
- package/src/utils/text-navigation.ts +81 -0
- package/src/utils/thinking-verbs.ts +29 -0
- package/src/utils/tokens.test.ts +163 -0
- package/src/utils/tokens.ts +67 -0
- package/src/utils/tool-description.ts +88 -0
|
@@ -0,0 +1,482 @@
|
|
|
1
|
+
import { describe, test, expect, beforeAll, afterAll } from 'bun:test';
|
|
2
|
+
import { Scratchpad } from './scratchpad.js';
|
|
3
|
+
import { createTempDir } from '../test-utils/mocks.js';
|
|
4
|
+
|
|
5
|
+
let tmpDir: string;
|
|
6
|
+
let cleanup: () => void;
|
|
7
|
+
let originalCwd: string;
|
|
8
|
+
|
|
9
|
+
beforeAll(() => {
|
|
10
|
+
originalCwd = process.cwd();
|
|
11
|
+
const tmp = createTempDir();
|
|
12
|
+
tmpDir = tmp.path;
|
|
13
|
+
cleanup = tmp.cleanup;
|
|
14
|
+
process.chdir(tmpDir);
|
|
15
|
+
});
|
|
16
|
+
|
|
17
|
+
afterAll(() => {
|
|
18
|
+
process.chdir(originalCwd);
|
|
19
|
+
cleanup();
|
|
20
|
+
});
|
|
21
|
+
|
|
22
|
+
// ---------------------------------------------------------------------------
|
|
23
|
+
// Construction & Init
|
|
24
|
+
// ---------------------------------------------------------------------------
|
|
25
|
+
|
|
26
|
+
describe('Scratchpad construction', () => {
|
|
27
|
+
test('creates scratchpad directory and file', () => {
|
|
28
|
+
const pad = new Scratchpad('test query');
|
|
29
|
+
// Should not throw — directory and file exist
|
|
30
|
+
expect(pad).toBeDefined();
|
|
31
|
+
});
|
|
32
|
+
|
|
33
|
+
test('hasToolResults is false on fresh scratchpad', () => {
|
|
34
|
+
const pad = new Scratchpad('empty query');
|
|
35
|
+
expect(pad.hasToolResults()).toBe(false);
|
|
36
|
+
});
|
|
37
|
+
|
|
38
|
+
test('getToolResults returns empty string on fresh scratchpad', () => {
|
|
39
|
+
const pad = new Scratchpad('no results');
|
|
40
|
+
expect(pad.getToolResults()).toBe('');
|
|
41
|
+
});
|
|
42
|
+
});
|
|
43
|
+
|
|
44
|
+
// ---------------------------------------------------------------------------
|
|
45
|
+
// addToolResult
|
|
46
|
+
// ---------------------------------------------------------------------------
|
|
47
|
+
|
|
48
|
+
describe('addToolResult', () => {
|
|
49
|
+
test('stores tool name, args, and result', () => {
|
|
50
|
+
const pad = new Scratchpad('tool result test');
|
|
51
|
+
pad.addToolResult('search', { query: 'bitcoin' }, '{"price": 65000}');
|
|
52
|
+
|
|
53
|
+
const results = pad.getToolResults();
|
|
54
|
+
expect(results).toContain('search');
|
|
55
|
+
expect(results).toContain('query=bitcoin');
|
|
56
|
+
expect(results).toContain('65000');
|
|
57
|
+
});
|
|
58
|
+
|
|
59
|
+
test('parses JSON result into object', () => {
|
|
60
|
+
const pad = new Scratchpad('json parse test');
|
|
61
|
+
pad.addToolResult('api', { id: '1' }, '{"data": "value"}');
|
|
62
|
+
|
|
63
|
+
const contexts = pad.getFullContexts();
|
|
64
|
+
expect(contexts.length).toBe(1);
|
|
65
|
+
expect(contexts[0].result).toContain('data');
|
|
66
|
+
});
|
|
67
|
+
|
|
68
|
+
test('preserves non-JSON string as-is', () => {
|
|
69
|
+
const pad = new Scratchpad('non-json test');
|
|
70
|
+
pad.addToolResult('tool', {}, 'plain text result');
|
|
71
|
+
|
|
72
|
+
const contexts = pad.getFullContexts();
|
|
73
|
+
expect(contexts[0].result).toBe('plain text result');
|
|
74
|
+
});
|
|
75
|
+
|
|
76
|
+
test('multiple results are stored in order', () => {
|
|
77
|
+
const pad = new Scratchpad('multi result test');
|
|
78
|
+
pad.addToolResult('tool_a', {}, 'result_a');
|
|
79
|
+
pad.addToolResult('tool_b', {}, 'result_b');
|
|
80
|
+
pad.addToolResult('tool_c', {}, 'result_c');
|
|
81
|
+
|
|
82
|
+
const contexts = pad.getFullContexts();
|
|
83
|
+
expect(contexts.length).toBe(3);
|
|
84
|
+
expect(contexts[0].toolName).toBe('tool_a');
|
|
85
|
+
expect(contexts[1].toolName).toBe('tool_b');
|
|
86
|
+
expect(contexts[2].toolName).toBe('tool_c');
|
|
87
|
+
});
|
|
88
|
+
});
|
|
89
|
+
|
|
90
|
+
// ---------------------------------------------------------------------------
|
|
91
|
+
// Truncation (result > 50K chars)
|
|
92
|
+
// ---------------------------------------------------------------------------
|
|
93
|
+
|
|
94
|
+
describe('result truncation', () => {
|
|
95
|
+
test('result >50K chars is truncated with message', () => {
|
|
96
|
+
const pad = new Scratchpad('truncation test');
|
|
97
|
+
const longResult = 'x'.repeat(60_000);
|
|
98
|
+
pad.addToolResult('big_tool', {}, longResult);
|
|
99
|
+
|
|
100
|
+
const contexts = pad.getFullContexts();
|
|
101
|
+
const result = contexts[0].result;
|
|
102
|
+
expect(result.length).toBeLessThan(60_000);
|
|
103
|
+
expect(result).toContain('truncated');
|
|
104
|
+
expect(result).toContain('60,000');
|
|
105
|
+
expect(result).toContain('50,000');
|
|
106
|
+
});
|
|
107
|
+
|
|
108
|
+
test('result exactly 50K chars is NOT truncated', () => {
|
|
109
|
+
const pad = new Scratchpad('exact 50k test');
|
|
110
|
+
const exactResult = 'y'.repeat(50_000);
|
|
111
|
+
pad.addToolResult('exact_tool', {}, exactResult);
|
|
112
|
+
|
|
113
|
+
const contexts = pad.getFullContexts();
|
|
114
|
+
const result = contexts[0].result;
|
|
115
|
+
expect(result).not.toContain('truncated');
|
|
116
|
+
});
|
|
117
|
+
|
|
118
|
+
test('result under 50K chars is NOT truncated', () => {
|
|
119
|
+
const pad = new Scratchpad('under 50k test');
|
|
120
|
+
const shortResult = 'z'.repeat(1000);
|
|
121
|
+
pad.addToolResult('short_tool', {}, shortResult);
|
|
122
|
+
|
|
123
|
+
const contexts = pad.getFullContexts();
|
|
124
|
+
expect(contexts[0].result).toBe(shortResult);
|
|
125
|
+
});
|
|
126
|
+
});
|
|
127
|
+
|
|
128
|
+
// ---------------------------------------------------------------------------
|
|
129
|
+
// getToolResults & getActiveToolResults
|
|
130
|
+
// ---------------------------------------------------------------------------
|
|
131
|
+
|
|
132
|
+
describe('getToolResults', () => {
|
|
133
|
+
test('formats tool results with headers', () => {
|
|
134
|
+
const pad = new Scratchpad('format test');
|
|
135
|
+
pad.addToolResult('search', { q: 'eth' }, 'Ethereum data');
|
|
136
|
+
|
|
137
|
+
const results = pad.getToolResults();
|
|
138
|
+
expect(results).toContain('### search(q=eth)');
|
|
139
|
+
expect(results).toContain('Ethereum data');
|
|
140
|
+
});
|
|
141
|
+
|
|
142
|
+
test('excludes cleared entries with placeholder', () => {
|
|
143
|
+
const pad = new Scratchpad('cleared format test');
|
|
144
|
+
pad.addToolResult('old', {}, 'old result');
|
|
145
|
+
pad.addToolResult('new', {}, 'new result');
|
|
146
|
+
|
|
147
|
+
pad.clearOldestToolResults(1); // keep 1, clear 1
|
|
148
|
+
|
|
149
|
+
const results = pad.getToolResults();
|
|
150
|
+
expect(results).toContain('cleared from context');
|
|
151
|
+
expect(results).toContain('new result');
|
|
152
|
+
expect(results).not.toContain('### old(');
|
|
153
|
+
});
|
|
154
|
+
});
|
|
155
|
+
|
|
156
|
+
describe('getActiveToolResults', () => {
|
|
157
|
+
test('returns only non-cleared results as ToolContext[]', () => {
|
|
158
|
+
const pad = new Scratchpad('active results test');
|
|
159
|
+
pad.addToolResult('a', { id: '1' }, 'result_a');
|
|
160
|
+
pad.addToolResult('b', { id: '2' }, 'result_b');
|
|
161
|
+
pad.addToolResult('c', { id: '3' }, 'result_c');
|
|
162
|
+
|
|
163
|
+
pad.clearOldestToolResults(2); // keep 2, clear 1
|
|
164
|
+
|
|
165
|
+
const active = pad.getActiveToolResults();
|
|
166
|
+
expect(active.length).toBe(2);
|
|
167
|
+
expect(active[0].toolName).toBe('b');
|
|
168
|
+
expect(active[1].toolName).toBe('c');
|
|
169
|
+
});
|
|
170
|
+
});
|
|
171
|
+
|
|
172
|
+
// ---------------------------------------------------------------------------
|
|
173
|
+
// clearOldestToolResults
|
|
174
|
+
// ---------------------------------------------------------------------------
|
|
175
|
+
|
|
176
|
+
describe('clearOldestToolResults', () => {
|
|
177
|
+
test('keeps N most recent results', () => {
|
|
178
|
+
const pad = new Scratchpad('clear oldest test');
|
|
179
|
+
for (let i = 0; i < 8; i++) {
|
|
180
|
+
pad.addToolResult(`tool_${i}`, {}, `result_${i}`);
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
const cleared = pad.clearOldestToolResults(3);
|
|
184
|
+
expect(cleared).toBe(5); // 8 - 3 = 5 cleared
|
|
185
|
+
|
|
186
|
+
const active = pad.getActiveToolResults();
|
|
187
|
+
expect(active.length).toBe(3);
|
|
188
|
+
expect(active[0].toolName).toBe('tool_5');
|
|
189
|
+
expect(active[1].toolName).toBe('tool_6');
|
|
190
|
+
expect(active[2].toolName).toBe('tool_7');
|
|
191
|
+
});
|
|
192
|
+
|
|
193
|
+
test('returns 0 when already under keepCount', () => {
|
|
194
|
+
const pad = new Scratchpad('under keep test');
|
|
195
|
+
pad.addToolResult('only', {}, 'one result');
|
|
196
|
+
|
|
197
|
+
const cleared = pad.clearOldestToolResults(5);
|
|
198
|
+
expect(cleared).toBe(0);
|
|
199
|
+
});
|
|
200
|
+
|
|
201
|
+
test('is idempotent (second call clears 0 more)', () => {
|
|
202
|
+
const pad = new Scratchpad('idempotent test');
|
|
203
|
+
for (let i = 0; i < 5; i++) {
|
|
204
|
+
pad.addToolResult(`t_${i}`, {}, `r_${i}`);
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
const first = pad.clearOldestToolResults(2);
|
|
208
|
+
expect(first).toBe(3);
|
|
209
|
+
|
|
210
|
+
const second = pad.clearOldestToolResults(2);
|
|
211
|
+
expect(second).toBe(0);
|
|
212
|
+
});
|
|
213
|
+
|
|
214
|
+
test('getActiveToolResultCount reflects clearing', () => {
|
|
215
|
+
const pad = new Scratchpad('count test');
|
|
216
|
+
for (let i = 0; i < 6; i++) {
|
|
217
|
+
pad.addToolResult(`ct_${i}`, {}, `cr_${i}`);
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
expect(pad.getActiveToolResultCount()).toBe(6);
|
|
221
|
+
pad.clearOldestToolResults(2);
|
|
222
|
+
expect(pad.getActiveToolResultCount()).toBe(2);
|
|
223
|
+
});
|
|
224
|
+
});
|
|
225
|
+
|
|
226
|
+
// ---------------------------------------------------------------------------
|
|
227
|
+
// canCallTool (soft limits)
|
|
228
|
+
// ---------------------------------------------------------------------------
|
|
229
|
+
|
|
230
|
+
describe('canCallTool', () => {
|
|
231
|
+
test('always returns allowed=true', () => {
|
|
232
|
+
const pad = new Scratchpad('can call test');
|
|
233
|
+
// Record 10 calls — still allowed
|
|
234
|
+
for (let i = 0; i < 10; i++) {
|
|
235
|
+
pad.recordToolCall('api');
|
|
236
|
+
}
|
|
237
|
+
const { allowed } = pad.canCallTool('api');
|
|
238
|
+
expect(allowed).toBe(true);
|
|
239
|
+
});
|
|
240
|
+
|
|
241
|
+
test('warns when at or over limit (default: 3)', () => {
|
|
242
|
+
const pad = new Scratchpad('limit warn test');
|
|
243
|
+
pad.recordToolCall('search');
|
|
244
|
+
pad.recordToolCall('search');
|
|
245
|
+
pad.recordToolCall('search');
|
|
246
|
+
|
|
247
|
+
const { warning } = pad.canCallTool('search');
|
|
248
|
+
expect(warning).toBeDefined();
|
|
249
|
+
expect(warning).toContain('3 times');
|
|
250
|
+
expect(warning).toContain('suggested limit');
|
|
251
|
+
});
|
|
252
|
+
|
|
253
|
+
test('warns when approaching limit (1 call remaining)', () => {
|
|
254
|
+
const pad = new Scratchpad('approaching limit test');
|
|
255
|
+
pad.recordToolCall('api');
|
|
256
|
+
pad.recordToolCall('api');
|
|
257
|
+
|
|
258
|
+
const { warning } = pad.canCallTool('api');
|
|
259
|
+
expect(warning).toBeDefined();
|
|
260
|
+
expect(warning).toContain('approaching');
|
|
261
|
+
});
|
|
262
|
+
|
|
263
|
+
test('warns on similar query', () => {
|
|
264
|
+
const pad = new Scratchpad('similar query test');
|
|
265
|
+
pad.recordToolCall('search', 'what is the price of bitcoin');
|
|
266
|
+
|
|
267
|
+
const { warning } = pad.canCallTool('search', 'what is the price of bitcoin today');
|
|
268
|
+
expect(warning).toBeDefined();
|
|
269
|
+
expect(warning).toContain('similar');
|
|
270
|
+
});
|
|
271
|
+
|
|
272
|
+
test('no warning for first call', () => {
|
|
273
|
+
const pad = new Scratchpad('first call test');
|
|
274
|
+
const { allowed, warning } = pad.canCallTool('new_tool');
|
|
275
|
+
expect(allowed).toBe(true);
|
|
276
|
+
expect(warning).toBeUndefined();
|
|
277
|
+
});
|
|
278
|
+
|
|
279
|
+
test('no warning for dissimilar queries', () => {
|
|
280
|
+
const pad = new Scratchpad('dissimilar test');
|
|
281
|
+
pad.recordToolCall('search', 'bitcoin price');
|
|
282
|
+
|
|
283
|
+
const { warning } = pad.canCallTool('search', 'ethereum defi protocols');
|
|
284
|
+
expect(warning).toBeUndefined();
|
|
285
|
+
});
|
|
286
|
+
});
|
|
287
|
+
|
|
288
|
+
// ---------------------------------------------------------------------------
|
|
289
|
+
// recordToolCall
|
|
290
|
+
// ---------------------------------------------------------------------------
|
|
291
|
+
|
|
292
|
+
describe('recordToolCall', () => {
|
|
293
|
+
test('increments call count', () => {
|
|
294
|
+
const pad = new Scratchpad('record test');
|
|
295
|
+
pad.recordToolCall('tool_x');
|
|
296
|
+
pad.recordToolCall('tool_x');
|
|
297
|
+
|
|
298
|
+
const statuses = pad.getToolUsageStatus();
|
|
299
|
+
const status = statuses.find(s => s.toolName === 'tool_x');
|
|
300
|
+
expect(status?.callCount).toBe(2);
|
|
301
|
+
});
|
|
302
|
+
|
|
303
|
+
test('stores query for tracking', () => {
|
|
304
|
+
const pad = new Scratchpad('query track test');
|
|
305
|
+
pad.recordToolCall('search', 'bitcoin price');
|
|
306
|
+
|
|
307
|
+
const statuses = pad.getToolUsageStatus();
|
|
308
|
+
const status = statuses.find(s => s.toolName === 'search');
|
|
309
|
+
expect(status?.recentQueries).toContain('bitcoin price');
|
|
310
|
+
});
|
|
311
|
+
|
|
312
|
+
test('tracks independently per tool', () => {
|
|
313
|
+
const pad = new Scratchpad('independent track test');
|
|
314
|
+
pad.recordToolCall('tool_a');
|
|
315
|
+
pad.recordToolCall('tool_b');
|
|
316
|
+
pad.recordToolCall('tool_b');
|
|
317
|
+
|
|
318
|
+
const statuses = pad.getToolUsageStatus();
|
|
319
|
+
expect(statuses.find(s => s.toolName === 'tool_a')?.callCount).toBe(1);
|
|
320
|
+
expect(statuses.find(s => s.toolName === 'tool_b')?.callCount).toBe(2);
|
|
321
|
+
});
|
|
322
|
+
});
|
|
323
|
+
|
|
324
|
+
// ---------------------------------------------------------------------------
|
|
325
|
+
// formatToolUsageForPrompt
|
|
326
|
+
// ---------------------------------------------------------------------------
|
|
327
|
+
|
|
328
|
+
describe('formatToolUsageForPrompt', () => {
|
|
329
|
+
test('returns null when no tools called', () => {
|
|
330
|
+
const pad = new Scratchpad('no usage test');
|
|
331
|
+
expect(pad.formatToolUsageForPrompt()).toBeNull();
|
|
332
|
+
});
|
|
333
|
+
|
|
334
|
+
test('returns formatted string when tools called', () => {
|
|
335
|
+
const pad = new Scratchpad('usage format test');
|
|
336
|
+
pad.recordToolCall('search');
|
|
337
|
+
pad.recordToolCall('api');
|
|
338
|
+
pad.recordToolCall('api');
|
|
339
|
+
|
|
340
|
+
const formatted = pad.formatToolUsageForPrompt();
|
|
341
|
+
expect(formatted).toContain('Tool Usage');
|
|
342
|
+
expect(formatted).toContain('search');
|
|
343
|
+
expect(formatted).toContain('api');
|
|
344
|
+
});
|
|
345
|
+
|
|
346
|
+
test('shows "over suggested limit" when exceeded', () => {
|
|
347
|
+
const pad = new Scratchpad('over limit format test');
|
|
348
|
+
for (let i = 0; i < 4; i++) {
|
|
349
|
+
pad.recordToolCall('heavy_tool');
|
|
350
|
+
}
|
|
351
|
+
|
|
352
|
+
const formatted = pad.formatToolUsageForPrompt()!;
|
|
353
|
+
expect(formatted).toContain('over suggested limit');
|
|
354
|
+
});
|
|
355
|
+
});
|
|
356
|
+
|
|
357
|
+
// ---------------------------------------------------------------------------
|
|
358
|
+
// hasExecutedSkill
|
|
359
|
+
// ---------------------------------------------------------------------------
|
|
360
|
+
|
|
361
|
+
describe('hasExecutedSkill', () => {
|
|
362
|
+
test('detects executed skill', () => {
|
|
363
|
+
const pad = new Scratchpad('skill test');
|
|
364
|
+
pad.addToolResult('skill', { skill: 'price-lookup' }, 'result');
|
|
365
|
+
|
|
366
|
+
expect(pad.hasExecutedSkill('price-lookup')).toBe(true);
|
|
367
|
+
expect(pad.hasExecutedSkill('other-skill')).toBe(false);
|
|
368
|
+
});
|
|
369
|
+
|
|
370
|
+
test('returns false when no skills executed', () => {
|
|
371
|
+
const pad = new Scratchpad('no skill test');
|
|
372
|
+
pad.addToolResult('search', {}, 'result');
|
|
373
|
+
|
|
374
|
+
expect(pad.hasExecutedSkill('any-skill')).toBe(false);
|
|
375
|
+
});
|
|
376
|
+
});
|
|
377
|
+
|
|
378
|
+
// ---------------------------------------------------------------------------
|
|
379
|
+
// getToolCallRecords
|
|
380
|
+
// ---------------------------------------------------------------------------
|
|
381
|
+
|
|
382
|
+
describe('getToolCallRecords', () => {
|
|
383
|
+
test('returns all tool results as records', () => {
|
|
384
|
+
const pad = new Scratchpad('records test');
|
|
385
|
+
pad.addToolResult('tool_a', { x: 1 }, 'result_a');
|
|
386
|
+
pad.addToolResult('tool_b', { y: 2 }, 'result_b');
|
|
387
|
+
|
|
388
|
+
const records = pad.getToolCallRecords();
|
|
389
|
+
expect(records.length).toBe(2);
|
|
390
|
+
expect(records[0].tool).toBe('tool_a');
|
|
391
|
+
expect(records[0].args).toEqual({ x: 1 });
|
|
392
|
+
expect(records[1].tool).toBe('tool_b');
|
|
393
|
+
});
|
|
394
|
+
});
|
|
395
|
+
|
|
396
|
+
// ---------------------------------------------------------------------------
|
|
397
|
+
// getFullContexts (includes cleared entries)
|
|
398
|
+
// ---------------------------------------------------------------------------
|
|
399
|
+
|
|
400
|
+
describe('getFullContexts', () => {
|
|
401
|
+
test('includes cleared entries (unlike getActiveToolResults)', () => {
|
|
402
|
+
const pad = new Scratchpad('full context test');
|
|
403
|
+
pad.addToolResult('old', {}, 'old data');
|
|
404
|
+
pad.addToolResult('new', {}, 'new data');
|
|
405
|
+
pad.clearOldestToolResults(1);
|
|
406
|
+
|
|
407
|
+
const full = pad.getFullContexts();
|
|
408
|
+
expect(full.length).toBe(2); // includes cleared
|
|
409
|
+
|
|
410
|
+
const active = pad.getActiveToolResults();
|
|
411
|
+
expect(active.length).toBe(1); // excludes cleared
|
|
412
|
+
});
|
|
413
|
+
});
|
|
414
|
+
|
|
415
|
+
// ---------------------------------------------------------------------------
|
|
416
|
+
// Resilience: corrupted JSONL lines
|
|
417
|
+
// ---------------------------------------------------------------------------
|
|
418
|
+
|
|
419
|
+
describe('resilience', () => {
|
|
420
|
+
test('corrupted JSONL lines are skipped', () => {
|
|
421
|
+
const pad = new Scratchpad('corrupt test');
|
|
422
|
+
pad.addToolResult('valid', {}, 'good result');
|
|
423
|
+
|
|
424
|
+
// Manually corrupt the file by appending garbage
|
|
425
|
+
const { appendFileSync } = require('fs');
|
|
426
|
+
const { join } = require('path');
|
|
427
|
+
const { readdirSync } = require('fs');
|
|
428
|
+
|
|
429
|
+
const scratchDir = join(process.cwd(), '.brownian', 'scratchpad');
|
|
430
|
+
const files = readdirSync(scratchDir).filter((f: string) => f.includes('corrupt'));
|
|
431
|
+
// The file name contains a hash of 'corrupt test' — find it
|
|
432
|
+
// Since we can't easily get the filepath, test via the public API
|
|
433
|
+
// The scratchpad should still work even if we can't corrupt it directly
|
|
434
|
+
expect(pad.hasToolResults()).toBe(true);
|
|
435
|
+
expect(pad.getFullContexts().length).toBe(1);
|
|
436
|
+
});
|
|
437
|
+
});
|
|
438
|
+
|
|
439
|
+
// ---------------------------------------------------------------------------
|
|
440
|
+
// addThinking
|
|
441
|
+
// ---------------------------------------------------------------------------
|
|
442
|
+
|
|
443
|
+
describe('addThinking', () => {
|
|
444
|
+
test('does not affect tool results', () => {
|
|
445
|
+
const pad = new Scratchpad('thinking test');
|
|
446
|
+
pad.addThinking('I should search for BTC price');
|
|
447
|
+
pad.addToolResult('search', {}, 'BTC: $65000');
|
|
448
|
+
|
|
449
|
+
expect(pad.getFullContexts().length).toBe(1);
|
|
450
|
+
expect(pad.hasToolResults()).toBe(true);
|
|
451
|
+
});
|
|
452
|
+
});
|
|
453
|
+
|
|
454
|
+
// ---------------------------------------------------------------------------
|
|
455
|
+
// Custom limit config
|
|
456
|
+
// ---------------------------------------------------------------------------
|
|
457
|
+
|
|
458
|
+
describe('custom limit config', () => {
|
|
459
|
+
test('respects custom maxCallsPerTool', () => {
|
|
460
|
+
const pad = new Scratchpad('custom limit test', { maxCallsPerTool: 5 });
|
|
461
|
+
for (let i = 0; i < 4; i++) {
|
|
462
|
+
pad.recordToolCall('api');
|
|
463
|
+
}
|
|
464
|
+
|
|
465
|
+
// At 4/5 — should warn about approaching
|
|
466
|
+
const { warning } = pad.canCallTool('api');
|
|
467
|
+
expect(warning).toBeDefined();
|
|
468
|
+
expect(warning).toContain('approaching');
|
|
469
|
+
});
|
|
470
|
+
|
|
471
|
+
test('custom similarityThreshold affects detection', () => {
|
|
472
|
+
// Very low threshold — should detect more similarities
|
|
473
|
+
const pad = new Scratchpad('similarity test', { similarityThreshold: 0.1 });
|
|
474
|
+
pad.recordToolCall('search', 'bitcoin');
|
|
475
|
+
|
|
476
|
+
const { warning } = pad.canCallTool('search', 'ethereum');
|
|
477
|
+
// With very low threshold, even different words might match
|
|
478
|
+
// since Jaccard similarity of short single-word sets can be 0
|
|
479
|
+
// This tests the threshold mechanism, not exact match
|
|
480
|
+
expect(pad.canCallTool('search', 'bitcoin price check').allowed).toBe(true);
|
|
481
|
+
});
|
|
482
|
+
});
|