brownian-code 2026.2.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +97 -0
- package/bin/brownian +25 -0
- package/env.example +21 -0
- package/package.json +87 -0
- package/src/agent/agent.test.ts +414 -0
- package/src/agent/agent.ts +385 -0
- package/src/agent/index.ts +27 -0
- package/src/agent/prompts.ts +271 -0
- package/src/agent/scratchpad.test.ts +482 -0
- package/src/agent/scratchpad.ts +526 -0
- package/src/agent/token-counter.test.ts +59 -0
- package/src/agent/token-counter.ts +33 -0
- package/src/agent/types.ts +137 -0
- package/src/cli.tsx +385 -0
- package/src/commands/builtin.test.ts +271 -0
- package/src/commands/builtin.ts +200 -0
- package/src/commands/registry.test.ts +188 -0
- package/src/commands/registry.ts +111 -0
- package/src/commands/types.ts +64 -0
- package/src/components/AgentEventView.tsx +487 -0
- package/src/components/AnswerBox.tsx +81 -0
- package/src/components/ApiKeyPrompt.tsx +75 -0
- package/src/components/CommandMenu.test.tsx +64 -0
- package/src/components/CommandMenu.tsx +38 -0
- package/src/components/CursorText.tsx +43 -0
- package/src/components/DebugPanel.tsx +48 -0
- package/src/components/ErrorBox.test.tsx +58 -0
- package/src/components/ErrorBox.tsx +26 -0
- package/src/components/HelpView.test.tsx +70 -0
- package/src/components/HelpView.tsx +61 -0
- package/src/components/HistoryItemView.tsx +108 -0
- package/src/components/Input.tsx +193 -0
- package/src/components/Intro.test.tsx +59 -0
- package/src/components/Intro.tsx +35 -0
- package/src/components/ModelSelector.tsx +288 -0
- package/src/components/StatusBar.test.tsx +78 -0
- package/src/components/StatusBar.tsx +56 -0
- package/src/components/WorkingIndicator.tsx +133 -0
- package/src/components/index.ts +23 -0
- package/src/e2e/agent-flow.test.ts +378 -0
- package/src/evals/components/EvalApp.tsx +206 -0
- package/src/evals/components/EvalCurrentQuestion.tsx +42 -0
- package/src/evals/components/EvalProgress.tsx +33 -0
- package/src/evals/components/EvalRecentResults.tsx +63 -0
- package/src/evals/components/EvalStats.tsx +49 -0
- package/src/evals/components/index.ts +5 -0
- package/src/evals/dataset/crypto_agent.csv +16 -0
- package/src/evals/run.ts +355 -0
- package/src/gateway/channels/whatsapp/auth-store.ts +15 -0
- package/src/gateway/channels/whatsapp/inbound.ts +86 -0
- package/src/gateway/channels/whatsapp/login.ts +28 -0
- package/src/gateway/channels/whatsapp/outbound.ts +27 -0
- package/src/gateway/channels/whatsapp/session.ts +69 -0
- package/src/gateway/config.ts +81 -0
- package/src/gateway/index.ts +62 -0
- package/src/hooks/useAgentRunner.ts +317 -0
- package/src/hooks/useDebugLogs.ts +22 -0
- package/src/hooks/useInputHistory.ts +106 -0
- package/src/hooks/useModelSelection.ts +249 -0
- package/src/hooks/useTextBuffer.test.ts +121 -0
- package/src/hooks/useTextBuffer.ts +97 -0
- package/src/index.tsx +74 -0
- package/src/mcp/cache.ts +205 -0
- package/src/mcp/client.test.ts +126 -0
- package/src/mcp/client.ts +145 -0
- package/src/mcp/index.ts +2 -0
- package/src/model/llm.test.ts +158 -0
- package/src/model/llm.ts +233 -0
- package/src/providers.ts +94 -0
- package/src/skills/index.ts +17 -0
- package/src/skills/loader.ts +73 -0
- package/src/skills/registry.ts +125 -0
- package/src/skills/types.ts +31 -0
- package/src/test-utils/mocks.ts +110 -0
- package/src/theme.ts +21 -0
- package/src/tools/browser/browser.ts +357 -0
- package/src/tools/browser/index.ts +1 -0
- package/src/tools/crypto/hive-tools.ts +171 -0
- package/src/tools/crypto/index.ts +1 -0
- package/src/tools/descriptions/browser.ts +105 -0
- package/src/tools/descriptions/crypto-search.ts +58 -0
- package/src/tools/descriptions/index.ts +8 -0
- package/src/tools/descriptions/web-fetch.ts +44 -0
- package/src/tools/descriptions/web-search.ts +26 -0
- package/src/tools/fetch/cache.ts +95 -0
- package/src/tools/fetch/external-content.ts +200 -0
- package/src/tools/fetch/index.ts +1 -0
- package/src/tools/fetch/web-fetch-utils.ts +122 -0
- package/src/tools/fetch/web-fetch.ts +371 -0
- package/src/tools/index.ts +12 -0
- package/src/tools/registry.ts +130 -0
- package/src/tools/search/exa.ts +43 -0
- package/src/tools/search/index.ts +2 -0
- package/src/tools/search/tavily.ts +35 -0
- package/src/tools/skill.ts +62 -0
- package/src/tools/types.ts +53 -0
- package/src/utils/ai-message.ts +26 -0
- package/src/utils/config.ts +54 -0
- package/src/utils/cost-calculator.test.ts +101 -0
- package/src/utils/cost-calculator.ts +74 -0
- package/src/utils/env.ts +101 -0
- package/src/utils/error-classifier.test.ts +146 -0
- package/src/utils/error-classifier.ts +91 -0
- package/src/utils/in-memory-chat-history.test.ts +291 -0
- package/src/utils/in-memory-chat-history.ts +224 -0
- package/src/utils/index.ts +19 -0
- package/src/utils/input-key-handlers.test.ts +155 -0
- package/src/utils/input-key-handlers.ts +64 -0
- package/src/utils/logger.ts +67 -0
- package/src/utils/long-term-chat-history.ts +138 -0
- package/src/utils/markdown-table.ts +227 -0
- package/src/utils/ollama.ts +37 -0
- package/src/utils/progress-channel.ts +84 -0
- package/src/utils/text-navigation.test.ts +222 -0
- package/src/utils/text-navigation.ts +81 -0
- package/src/utils/thinking-verbs.ts +29 -0
- package/src/utils/tokens.test.ts +163 -0
- package/src/utils/tokens.ts +67 -0
- package/src/utils/tool-description.ts +88 -0
|
@@ -0,0 +1,385 @@
|
|
|
1
|
+
import { AIMessage } from '@langchain/core/messages';
|
|
2
|
+
import { StructuredToolInterface } from '@langchain/core/tools';
|
|
3
|
+
import { callLlm } from '../model/llm.js';
|
|
4
|
+
import { Scratchpad, type ToolContext } from './scratchpad.js';
|
|
5
|
+
import { getTools } from '../tools/registry.js';
|
|
6
|
+
import { buildSystemPrompt, buildIterationPrompt, buildFinalAnswerPrompt } from '../agent/prompts.js';
|
|
7
|
+
import { extractTextContent, hasToolCalls } from '../utils/ai-message.js';
|
|
8
|
+
import { InMemoryChatHistory } from '../utils/in-memory-chat-history.js';
|
|
9
|
+
import { getToolDescription } from '../utils/tool-description.js';
|
|
10
|
+
import { estimateTokens, getContextThreshold, KEEP_TOOL_USES } from '../utils/tokens.js';
|
|
11
|
+
import { createProgressChannel } from '../utils/progress-channel.js';
|
|
12
|
+
import type { AgentConfig, AgentEvent, ToolStartEvent, ToolProgressEvent, ToolEndEvent, ToolErrorEvent, ToolLimitEvent, ContextClearedEvent, TokenUsage } from '../agent/types.js';
|
|
13
|
+
import { TokenCounter } from './token-counter.js';
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
const DEFAULT_MAX_ITERATIONS = 10;
|
|
17
|
+
|
|
18
|
+
/**
|
|
19
|
+
* The core agent class that handles the agent loop and tool execution.
|
|
20
|
+
*/
|
|
21
|
+
export class Agent {
|
|
22
|
+
private readonly model: string;
|
|
23
|
+
private readonly modelProvider: string;
|
|
24
|
+
private readonly maxIterations: number;
|
|
25
|
+
private readonly tools: StructuredToolInterface[];
|
|
26
|
+
private readonly toolMap: Map<string, StructuredToolInterface>;
|
|
27
|
+
private readonly systemPrompt: string;
|
|
28
|
+
private readonly signal?: AbortSignal;
|
|
29
|
+
|
|
30
|
+
private constructor(
|
|
31
|
+
config: AgentConfig,
|
|
32
|
+
tools: StructuredToolInterface[],
|
|
33
|
+
systemPrompt: string
|
|
34
|
+
) {
|
|
35
|
+
this.model = config.model ?? 'claude-sonnet-4-5';
|
|
36
|
+
this.modelProvider = config.modelProvider ?? 'anthropic';
|
|
37
|
+
this.maxIterations = config.maxIterations ?? DEFAULT_MAX_ITERATIONS;
|
|
38
|
+
this.tools = tools;
|
|
39
|
+
this.toolMap = new Map(tools.map(t => [t.name, t]));
|
|
40
|
+
this.systemPrompt = systemPrompt;
|
|
41
|
+
this.signal = config.signal;
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
/**
|
|
45
|
+
* Create a new Agent instance with tools.
|
|
46
|
+
*/
|
|
47
|
+
static create(config: AgentConfig = {}): Agent {
|
|
48
|
+
const model = config.model ?? 'claude-sonnet-4-5';
|
|
49
|
+
const tools = getTools(model);
|
|
50
|
+
const systemPrompt = buildSystemPrompt(model);
|
|
51
|
+
return new Agent(config, tools, systemPrompt);
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
/**
|
|
55
|
+
* Run the agent and yield events for real-time UI updates.
|
|
56
|
+
* Anthropic-style context management: full tool results during iteration,
|
|
57
|
+
* with threshold-based clearing of oldest results when context exceeds limit.
|
|
58
|
+
*/
|
|
59
|
+
async *run(query: string, inMemoryHistory?: InMemoryChatHistory): AsyncGenerator<AgentEvent> {
|
|
60
|
+
const startTime = Date.now();
|
|
61
|
+
const tokenCounter = new TokenCounter();
|
|
62
|
+
|
|
63
|
+
if (this.tools.length === 0) {
|
|
64
|
+
yield { type: 'done', answer: 'No tools available. Please check your API key configuration.', toolCalls: [], iterations: 0, totalTime: Date.now() - startTime };
|
|
65
|
+
return;
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
// Create scratchpad for this query - single source of truth for all work done
|
|
69
|
+
const scratchpad = new Scratchpad(query);
|
|
70
|
+
|
|
71
|
+
// Build initial prompt with conversation history context
|
|
72
|
+
let currentPrompt = await this.buildInitialPrompt(query, inMemoryHistory);
|
|
73
|
+
|
|
74
|
+
let iteration = 0;
|
|
75
|
+
|
|
76
|
+
const contextThreshold = getContextThreshold(this.model);
|
|
77
|
+
|
|
78
|
+
// Main agent loop
|
|
79
|
+
while (iteration < this.maxIterations) {
|
|
80
|
+
iteration++;
|
|
81
|
+
|
|
82
|
+
// Pre-flight context check: trim before sending to model
|
|
83
|
+
const estimatedPreFlight = estimateTokens(this.systemPrompt + currentPrompt);
|
|
84
|
+
if (estimatedPreFlight > contextThreshold) {
|
|
85
|
+
const clearedCount = scratchpad.clearOldestToolResults(KEEP_TOOL_USES);
|
|
86
|
+
if (clearedCount > 0) {
|
|
87
|
+
yield { type: 'context_cleared', clearedCount, keptCount: KEEP_TOOL_USES } as ContextClearedEvent;
|
|
88
|
+
const trimmedResults = scratchpad.getToolResults();
|
|
89
|
+
currentPrompt = buildIterationPrompt(
|
|
90
|
+
query,
|
|
91
|
+
trimmedResults,
|
|
92
|
+
scratchpad.formatToolUsageForPrompt()
|
|
93
|
+
);
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
const { response, usage } = await this.callModel(currentPrompt);
|
|
98
|
+
tokenCounter.add(usage);
|
|
99
|
+
const responseText = typeof response === 'string' ? response : extractTextContent(response);
|
|
100
|
+
|
|
101
|
+
// Emit thinking if there are also tool calls (skip whitespace-only responses)
|
|
102
|
+
if (responseText?.trim() && typeof response !== 'string' && hasToolCalls(response)) {
|
|
103
|
+
const trimmedText = responseText.trim();
|
|
104
|
+
scratchpad.addThinking(trimmedText);
|
|
105
|
+
yield { type: 'thinking', message: trimmedText };
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
// No tool calls = ready to generate final answer
|
|
109
|
+
if (typeof response === 'string' || !hasToolCalls(response)) {
|
|
110
|
+
// If no tools were called at all, just use the direct response
|
|
111
|
+
// This handles greetings, clarifying questions, etc.
|
|
112
|
+
if (!scratchpad.hasToolResults() && responseText) {
|
|
113
|
+
yield { type: 'answer_start' };
|
|
114
|
+
const totalTime = Date.now() - startTime;
|
|
115
|
+
yield { type: 'done', answer: responseText, toolCalls: [], iterations: iteration, totalTime, tokenUsage: tokenCounter.getUsage(), tokensPerSecond: tokenCounter.getTokensPerSecond(totalTime) };
|
|
116
|
+
return;
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
// Generate final answer with full context from scratchpad
|
|
120
|
+
const fullContext = this.buildFullContextForAnswer(query, scratchpad);
|
|
121
|
+
const finalPrompt = buildFinalAnswerPrompt(query, fullContext);
|
|
122
|
+
|
|
123
|
+
yield { type: 'answer_start' };
|
|
124
|
+
const { response: finalResponse, usage: finalUsage } = await this.callModel(finalPrompt, false);
|
|
125
|
+
tokenCounter.add(finalUsage);
|
|
126
|
+
const answer = typeof finalResponse === 'string'
|
|
127
|
+
? finalResponse
|
|
128
|
+
: extractTextContent(finalResponse);
|
|
129
|
+
|
|
130
|
+
const totalTime = Date.now() - startTime;
|
|
131
|
+
yield { type: 'done', answer, toolCalls: scratchpad.getToolCallRecords(), iterations: iteration, totalTime, tokenUsage: tokenCounter.getUsage(), tokensPerSecond: tokenCounter.getTokensPerSecond(totalTime) };
|
|
132
|
+
return;
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
// Execute tools and add results to scratchpad (response is AIMessage here)
|
|
136
|
+
const generator = this.executeToolCalls(response, query, scratchpad);
|
|
137
|
+
let result = await generator.next();
|
|
138
|
+
|
|
139
|
+
// Yield tool events
|
|
140
|
+
while (!result.done) {
|
|
141
|
+
yield result.value;
|
|
142
|
+
result = await generator.next();
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
// Anthropic-style context management: get full tool results
|
|
146
|
+
let fullToolResults = scratchpad.getToolResults();
|
|
147
|
+
|
|
148
|
+
// Check context threshold and clear oldest tool results if needed
|
|
149
|
+
const estimatedContextTokens = estimateTokens(this.systemPrompt + query + fullToolResults);
|
|
150
|
+
if (estimatedContextTokens > contextThreshold) {
|
|
151
|
+
const clearedCount = scratchpad.clearOldestToolResults(KEEP_TOOL_USES);
|
|
152
|
+
if (clearedCount > 0) {
|
|
153
|
+
yield { type: 'context_cleared', clearedCount, keptCount: KEEP_TOOL_USES } as ContextClearedEvent;
|
|
154
|
+
// Re-fetch after clearing
|
|
155
|
+
fullToolResults = scratchpad.getToolResults();
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
// Build iteration prompt with full tool results (Anthropic-style)
|
|
160
|
+
currentPrompt = buildIterationPrompt(
|
|
161
|
+
query,
|
|
162
|
+
fullToolResults,
|
|
163
|
+
scratchpad.formatToolUsageForPrompt()
|
|
164
|
+
);
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
// Max iterations reached - still generate proper final answer
|
|
168
|
+
const fullContext = this.buildFullContextForAnswer(query, scratchpad);
|
|
169
|
+
const finalPrompt = buildFinalAnswerPrompt(query, fullContext);
|
|
170
|
+
|
|
171
|
+
yield { type: 'answer_start' };
|
|
172
|
+
const { response: finalResponse, usage: finalUsage } = await this.callModel(finalPrompt, false);
|
|
173
|
+
tokenCounter.add(finalUsage);
|
|
174
|
+
const answer = typeof finalResponse === 'string'
|
|
175
|
+
? finalResponse
|
|
176
|
+
: extractTextContent(finalResponse);
|
|
177
|
+
|
|
178
|
+
const totalTime = Date.now() - startTime;
|
|
179
|
+
yield {
|
|
180
|
+
type: 'done',
|
|
181
|
+
answer: answer || `Reached maximum iterations (${this.maxIterations}).`,
|
|
182
|
+
toolCalls: scratchpad.getToolCallRecords(),
|
|
183
|
+
iterations: iteration,
|
|
184
|
+
totalTime,
|
|
185
|
+
tokenUsage: tokenCounter.getUsage(),
|
|
186
|
+
tokensPerSecond: tokenCounter.getTokensPerSecond(totalTime)
|
|
187
|
+
};
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
/**
|
|
191
|
+
* Call the LLM with the current prompt.
|
|
192
|
+
* @param prompt - The prompt to send to the LLM
|
|
193
|
+
* @param useTools - Whether to bind tools (default: true). When false, returns string directly.
|
|
194
|
+
*/
|
|
195
|
+
private async callModel(prompt: string, useTools: boolean = true): Promise<{ response: AIMessage | string; usage?: TokenUsage }> {
|
|
196
|
+
const result = await callLlm(prompt, {
|
|
197
|
+
model: this.model,
|
|
198
|
+
systemPrompt: this.systemPrompt,
|
|
199
|
+
tools: useTools ? this.tools : undefined,
|
|
200
|
+
signal: this.signal,
|
|
201
|
+
});
|
|
202
|
+
return { response: result.response, usage: result.usage };
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
/**
|
|
206
|
+
* Execute all tool calls from an LLM response and add results to scratchpad.
|
|
207
|
+
* Deduplicates skill calls - each skill can only be executed once per query.
|
|
208
|
+
* Includes graceful exit mechanism - checks tool limits before executing.
|
|
209
|
+
*/
|
|
210
|
+
private async *executeToolCalls(
|
|
211
|
+
response: AIMessage,
|
|
212
|
+
query: string,
|
|
213
|
+
scratchpad: Scratchpad
|
|
214
|
+
): AsyncGenerator<ToolStartEvent | ToolProgressEvent | ToolEndEvent | ToolErrorEvent | ToolLimitEvent, void> {
|
|
215
|
+
for (const toolCall of response.tool_calls!) {
|
|
216
|
+
const toolName = toolCall.name;
|
|
217
|
+
const toolArgs = toolCall.args as Record<string, unknown>;
|
|
218
|
+
|
|
219
|
+
// Deduplicate skill calls - each skill can only run once per query
|
|
220
|
+
if (toolName === 'skill') {
|
|
221
|
+
const skillName = toolArgs.skill as string;
|
|
222
|
+
if (scratchpad.hasExecutedSkill(skillName)) continue;
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
const generator = this.executeToolCall(toolName, toolArgs, query, scratchpad);
|
|
226
|
+
let result = await generator.next();
|
|
227
|
+
|
|
228
|
+
while (!result.done) {
|
|
229
|
+
yield result.value;
|
|
230
|
+
result = await generator.next();
|
|
231
|
+
}
|
|
232
|
+
}
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
/**
|
|
236
|
+
* Execute a single tool call and add result to scratchpad.
|
|
237
|
+
* Yields start/end/error events for UI updates.
|
|
238
|
+
* Includes soft limit warnings to guide the LLM.
|
|
239
|
+
*/
|
|
240
|
+
private async *executeToolCall(
|
|
241
|
+
toolName: string,
|
|
242
|
+
toolArgs: Record<string, unknown>,
|
|
243
|
+
query: string,
|
|
244
|
+
scratchpad: Scratchpad
|
|
245
|
+
): AsyncGenerator<ToolStartEvent | ToolProgressEvent | ToolEndEvent | ToolErrorEvent | ToolLimitEvent, void> {
|
|
246
|
+
// Extract query string from tool args for similarity detection
|
|
247
|
+
const toolQuery = this.extractQueryFromArgs(toolArgs);
|
|
248
|
+
|
|
249
|
+
// Check tool limits - yields warning if approaching/over limits
|
|
250
|
+
const limitCheck = scratchpad.canCallTool(toolName, toolQuery);
|
|
251
|
+
|
|
252
|
+
if (limitCheck.warning) {
|
|
253
|
+
yield {
|
|
254
|
+
type: 'tool_limit',
|
|
255
|
+
tool: toolName,
|
|
256
|
+
warning: limitCheck.warning,
|
|
257
|
+
blocked: false
|
|
258
|
+
};
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
yield { type: 'tool_start', tool: toolName, args: toolArgs };
|
|
262
|
+
|
|
263
|
+
const toolStartTime = Date.now();
|
|
264
|
+
|
|
265
|
+
try {
|
|
266
|
+
const tool = this.toolMap.get(toolName);
|
|
267
|
+
if (!tool) {
|
|
268
|
+
throw new Error(`Tool '${toolName}' not found`);
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
// Create a progress channel so subagent tools can stream status updates
|
|
272
|
+
const channel = createProgressChannel();
|
|
273
|
+
const config = {
|
|
274
|
+
metadata: { onProgress: channel.emit },
|
|
275
|
+
...(this.signal ? { signal: this.signal } : {}),
|
|
276
|
+
};
|
|
277
|
+
|
|
278
|
+
// Launch tool invocation -- closes the channel when it settles
|
|
279
|
+
const toolPromise = tool.invoke(toolArgs, config).then(
|
|
280
|
+
(raw) => { channel.close(); return raw; },
|
|
281
|
+
(err) => { channel.close(); throw err; },
|
|
282
|
+
);
|
|
283
|
+
|
|
284
|
+
// Drain progress events in real-time as the tool executes
|
|
285
|
+
for await (const message of channel) {
|
|
286
|
+
yield { type: 'tool_progress', tool: toolName, message } as ToolProgressEvent;
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
// Tool has finished -- collect the result
|
|
290
|
+
const rawResult = await toolPromise;
|
|
291
|
+
const result = typeof rawResult === 'string' ? rawResult : JSON.stringify(rawResult);
|
|
292
|
+
const duration = Date.now() - toolStartTime;
|
|
293
|
+
|
|
294
|
+
yield { type: 'tool_end', tool: toolName, args: toolArgs, result, duration };
|
|
295
|
+
|
|
296
|
+
// Record the tool call for limit tracking
|
|
297
|
+
scratchpad.recordToolCall(toolName, toolQuery);
|
|
298
|
+
|
|
299
|
+
// Add full tool result to scratchpad (Anthropic-style: no inline summarization)
|
|
300
|
+
scratchpad.addToolResult(toolName, toolArgs, result);
|
|
301
|
+
} catch (error) {
|
|
302
|
+
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
303
|
+
yield { type: 'tool_error', tool: toolName, error: errorMessage };
|
|
304
|
+
|
|
305
|
+
// Still record the call even on error (counts toward limit)
|
|
306
|
+
scratchpad.recordToolCall(toolName, toolQuery);
|
|
307
|
+
|
|
308
|
+
// Add error to scratchpad
|
|
309
|
+
scratchpad.addToolResult(toolName, toolArgs, `Error: ${errorMessage}`);
|
|
310
|
+
}
|
|
311
|
+
}
|
|
312
|
+
|
|
313
|
+
/**
|
|
314
|
+
* Extract query string from tool arguments for similarity detection.
|
|
315
|
+
* Looks for common query-like argument names.
|
|
316
|
+
*/
|
|
317
|
+
private extractQueryFromArgs(args: Record<string, unknown>): string | undefined {
|
|
318
|
+
const queryKeys = ['query', 'search', 'question', 'q', 'text', 'input'];
|
|
319
|
+
|
|
320
|
+
for (const key of queryKeys) {
|
|
321
|
+
if (typeof args[key] === 'string') {
|
|
322
|
+
return args[key] as string;
|
|
323
|
+
}
|
|
324
|
+
}
|
|
325
|
+
|
|
326
|
+
return undefined;
|
|
327
|
+
}
|
|
328
|
+
|
|
329
|
+
/**
|
|
330
|
+
* Build initial prompt with conversation history context if available.
|
|
331
|
+
* Uses LLM-based relevance selection to include only pertinent history.
|
|
332
|
+
*/
|
|
333
|
+
private async buildInitialPrompt(
|
|
334
|
+
query: string,
|
|
335
|
+
inMemoryChatHistory?: InMemoryChatHistory
|
|
336
|
+
): Promise<string> {
|
|
337
|
+
if (!inMemoryChatHistory?.hasMessages()) {
|
|
338
|
+
return query;
|
|
339
|
+
}
|
|
340
|
+
|
|
341
|
+
const relevantMessages = await inMemoryChatHistory.selectRelevantMessages(query);
|
|
342
|
+
if (relevantMessages.length === 0) {
|
|
343
|
+
return query;
|
|
344
|
+
}
|
|
345
|
+
|
|
346
|
+
const historyContext = inMemoryChatHistory.formatForPlanning(relevantMessages);
|
|
347
|
+
return `Current query to answer: ${query}\n\nRelevant conversation history:\n${historyContext}`;
|
|
348
|
+
}
|
|
349
|
+
|
|
350
|
+
/**
|
|
351
|
+
* Build full context data for final answer generation from scratchpad.
|
|
352
|
+
* Uses only active (non-cleared) tool results — cleared entries were removed
|
|
353
|
+
* because context was too large, so re-including them would cause the same overflow.
|
|
354
|
+
*/
|
|
355
|
+
private buildFullContextForAnswer(_query: string, scratchpad: Scratchpad): string {
|
|
356
|
+
const contexts = scratchpad.getActiveToolResults();
|
|
357
|
+
|
|
358
|
+
if (contexts.length === 0) {
|
|
359
|
+
return 'No data was gathered.';
|
|
360
|
+
}
|
|
361
|
+
|
|
362
|
+
// Filter out error results
|
|
363
|
+
const validContexts = contexts.filter(ctx => !ctx.result.startsWith('Error:'));
|
|
364
|
+
|
|
365
|
+
if (validContexts.length === 0) {
|
|
366
|
+
return 'No data was successfully gathered.';
|
|
367
|
+
}
|
|
368
|
+
|
|
369
|
+
// Format all contexts with full data
|
|
370
|
+
return validContexts.map(ctx => this.formatToolContext(ctx)).join('\n\n');
|
|
371
|
+
}
|
|
372
|
+
|
|
373
|
+
/**
|
|
374
|
+
* Format a single tool context entry for the final answer.
|
|
375
|
+
*/
|
|
376
|
+
private formatToolContext(ctx: ToolContext): string {
|
|
377
|
+
const description = getToolDescription(ctx.toolName, ctx.args);
|
|
378
|
+
try {
|
|
379
|
+
return `### ${description}\n\`\`\`json\n${JSON.stringify(JSON.parse(ctx.result), null, 2)}\n\`\`\``;
|
|
380
|
+
} catch {
|
|
381
|
+
// If result is not valid JSON, return as-is
|
|
382
|
+
return `### ${description}\n${ctx.result}`;
|
|
383
|
+
}
|
|
384
|
+
}
|
|
385
|
+
}
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
export { Agent } from './agent.js';
|
|
2
|
+
|
|
3
|
+
export { Scratchpad } from './scratchpad.js';
|
|
4
|
+
|
|
5
|
+
export { getCurrentDate, buildSystemPrompt, buildIterationPrompt, DEFAULT_SYSTEM_PROMPT } from './prompts.js';
|
|
6
|
+
|
|
7
|
+
export type {
|
|
8
|
+
AgentConfig,
|
|
9
|
+
Message,
|
|
10
|
+
AgentEvent,
|
|
11
|
+
ThinkingEvent,
|
|
12
|
+
ToolStartEvent,
|
|
13
|
+
ToolProgressEvent,
|
|
14
|
+
ToolEndEvent,
|
|
15
|
+
ToolErrorEvent,
|
|
16
|
+
ToolLimitEvent,
|
|
17
|
+
AnswerStartEvent,
|
|
18
|
+
DoneEvent,
|
|
19
|
+
} from './types.js';
|
|
20
|
+
|
|
21
|
+
export type {
|
|
22
|
+
ToolCallRecord,
|
|
23
|
+
ToolContext,
|
|
24
|
+
ScratchpadEntry,
|
|
25
|
+
ToolLimitConfig,
|
|
26
|
+
ToolUsageStatus,
|
|
27
|
+
} from './scratchpad.js';
|
|
@@ -0,0 +1,271 @@
|
|
|
1
|
+
import { buildToolDescriptions } from '../tools/registry.js';
|
|
2
|
+
import { buildSkillMetadataSection, discoverSkills } from '../skills/index.js';
|
|
3
|
+
|
|
4
|
+
// ============================================================================
|
|
5
|
+
// Helper Functions
|
|
6
|
+
// ============================================================================
|
|
7
|
+
|
|
8
|
+
/**
|
|
9
|
+
* Returns the current date formatted for prompts.
|
|
10
|
+
*/
|
|
11
|
+
export function getCurrentDate(): string {
|
|
12
|
+
const options: Intl.DateTimeFormatOptions = {
|
|
13
|
+
weekday: 'long',
|
|
14
|
+
year: 'numeric',
|
|
15
|
+
month: 'long',
|
|
16
|
+
day: 'numeric',
|
|
17
|
+
};
|
|
18
|
+
return new Date().toLocaleDateString('en-US', options);
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
/**
|
|
22
|
+
* Build the skills section for the system prompt.
|
|
23
|
+
* Only includes skill metadata if skills are available.
|
|
24
|
+
*/
|
|
25
|
+
function buildSkillsSection(): string {
|
|
26
|
+
const skills = discoverSkills();
|
|
27
|
+
|
|
28
|
+
if (skills.length === 0) {
|
|
29
|
+
return '';
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
const skillList = buildSkillMetadataSection();
|
|
33
|
+
|
|
34
|
+
return `## Available Skills
|
|
35
|
+
|
|
36
|
+
${skillList}
|
|
37
|
+
|
|
38
|
+
## Skill Usage Policy
|
|
39
|
+
|
|
40
|
+
- Check if available skills can help complete the task more effectively
|
|
41
|
+
- When a skill is relevant, invoke it IMMEDIATELY as your first action
|
|
42
|
+
- Skills provide specialized workflows for complex tasks
|
|
43
|
+
- Do not invoke a skill that has already been invoked for the current query`;
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
// ============================================================================
|
|
47
|
+
// Default System Prompt (for backward compatibility)
|
|
48
|
+
// ============================================================================
|
|
49
|
+
|
|
50
|
+
/**
|
|
51
|
+
* Default system prompt used when no specific prompt is provided.
|
|
52
|
+
*/
|
|
53
|
+
export const DEFAULT_SYSTEM_PROMPT = `You are Brownian Code, an AI agent for crypto research.
|
|
54
|
+
|
|
55
|
+
Current date: ${getCurrentDate()}
|
|
56
|
+
|
|
57
|
+
Your output is displayed on a command line interface. Keep responses short and concise.
|
|
58
|
+
|
|
59
|
+
## Behavior
|
|
60
|
+
|
|
61
|
+
- Prioritize accuracy over validation
|
|
62
|
+
- Use professional, data-driven tone
|
|
63
|
+
- Never hype or shill tokens
|
|
64
|
+
|
|
65
|
+
## Response Format
|
|
66
|
+
|
|
67
|
+
- Keep responses brief and direct
|
|
68
|
+
- For non-comparative information, prefer plain text or simple lists over tables
|
|
69
|
+
- Do not use markdown headers or *italics* - use **bold** sparingly for emphasis
|
|
70
|
+
|
|
71
|
+
## Tables (for comparative/tabular data)
|
|
72
|
+
|
|
73
|
+
Use markdown tables. They will be rendered as formatted box tables.
|
|
74
|
+
|
|
75
|
+
STRICT FORMAT - each row must:
|
|
76
|
+
- Start with | and end with |
|
|
77
|
+
- Have no trailing spaces after the final |
|
|
78
|
+
- Use |---| separator (with optional : for alignment)
|
|
79
|
+
|
|
80
|
+
| Token | Price | 24h | MCap |
|
|
81
|
+
|-------|----------|--------|-------|
|
|
82
|
+
| BTC | $67,420 | +2.3% | 1.3T |
|
|
83
|
+
|
|
84
|
+
Keep tables compact:
|
|
85
|
+
- Max 3-4 columns; prefer multiple small tables over one wide table
|
|
86
|
+
- Use symbols/tickers not full names: "BTC" not "Bitcoin"
|
|
87
|
+
- Numbers compact: 1.3T not $1,300,000,000,000
|
|
88
|
+
- Omit units in cells if header has them`;
|
|
89
|
+
|
|
90
|
+
// ============================================================================
|
|
91
|
+
// System Prompt
|
|
92
|
+
// ============================================================================
|
|
93
|
+
|
|
94
|
+
/**
|
|
95
|
+
* Build the system prompt for the agent.
|
|
96
|
+
* @param model - The model name (used to get appropriate tool descriptions)
|
|
97
|
+
*/
|
|
98
|
+
export function buildSystemPrompt(model: string): string {
|
|
99
|
+
const toolDescriptions = buildToolDescriptions(model);
|
|
100
|
+
|
|
101
|
+
return `You are Brownian Code, a CLI agent for crypto research. You have access to 227+ crypto data tools via Hive Intelligence, covering market data, DeFi, wallets, security, NFTs, and more.
|
|
102
|
+
|
|
103
|
+
Current date: ${getCurrentDate()}
|
|
104
|
+
|
|
105
|
+
Your output is displayed on a command line interface. Keep responses short and concise.
|
|
106
|
+
|
|
107
|
+
## Available Tools
|
|
108
|
+
|
|
109
|
+
${toolDescriptions}
|
|
110
|
+
|
|
111
|
+
## Tool Usage Policy
|
|
112
|
+
|
|
113
|
+
- **Category-first routing**: classify intent → call category endpoint → get schema → invoke endpoint
|
|
114
|
+
- **Common shortcuts**: skip schema for well-known endpoints (get_protocol_tvl, get_defi_protocol, get_token_security, check_malicious_address)
|
|
115
|
+
- **Tool budget**: aim for 3-5 tool calls per query (1 category + 1 schema + 1-3 invokes)
|
|
116
|
+
- **Prefer aggregate endpoints**: use coins_market_data_browser for multi-coin comparisons instead of multiple simple_price calls
|
|
117
|
+
- Only use tools when the query actually requires external data
|
|
118
|
+
- Use web_fetch for reading web pages, articles, and documentation
|
|
119
|
+
- Only use browser when you need JavaScript rendering or interactive navigation
|
|
120
|
+
- For general knowledge questions, respond directly without tools
|
|
121
|
+
|
|
122
|
+
## CRITICAL Tool Rules
|
|
123
|
+
|
|
124
|
+
- **NEVER** call the same tool twice with the same or similar arguments
|
|
125
|
+
- **NEVER** call a category tool (get_*_endpoints) more than once per query
|
|
126
|
+
- invoke_api_endpoint takes \`endpoint_name\` and \`arguments\` — e.g. \`{ "endpoint_name": "get_protocol_tvl", "arguments": { "protocol": "lido" } }\`
|
|
127
|
+
- get_api_endpoint_schema takes \`endpoint_name\` — e.g. \`{ "endpoint_name": "get_token_security" }\`
|
|
128
|
+
- If a tool call returns an error, do NOT retry with the same arguments — adjust or try a different endpoint
|
|
129
|
+
|
|
130
|
+
## Correct Call Pattern Examples
|
|
131
|
+
|
|
132
|
+
**TVL lookup** (1 call — well-known shortcut):
|
|
133
|
+
→ invoke_api_endpoint({ endpoint_name: "get_protocol_tvl", arguments: { protocol: "lido" } })
|
|
134
|
+
|
|
135
|
+
**DeFi protocol detail** (3 calls):
|
|
136
|
+
→ get_defi_protocol_endpoints() → get_api_endpoint_schema({ endpoint_name: "get_defi_protocol" }) → invoke_api_endpoint({ endpoint_name: "get_defi_protocol", arguments: { protocol: "aave" } })
|
|
137
|
+
|
|
138
|
+
**Security audit** (3 calls):
|
|
139
|
+
→ get_security_risk_endpoints() → get_api_endpoint_schema({ endpoint_name: "get_token_security" }) → invoke_api_endpoint({ endpoint_name: "get_token_security", arguments: { chainId: "1", contract_addresses: "0x..." } })
|
|
140
|
+
|
|
141
|
+
## Entity Resolution
|
|
142
|
+
|
|
143
|
+
When users mention crypto assets, resolve to standard IDs:
|
|
144
|
+
- BTC/btc/Bitcoin → id: "bitcoin"
|
|
145
|
+
- ETH/eth/Ethereum → id: "ethereum"
|
|
146
|
+
- SOL/sol/Solana → id: "solana"
|
|
147
|
+
- BNB/bnb → id: "binancecoin"
|
|
148
|
+
- XRP/xrp → id: "ripple"
|
|
149
|
+
- ADA/ada/Cardano → id: "cardano"
|
|
150
|
+
- AVAX/avax → id: "avalanche-2"
|
|
151
|
+
- DOT/dot → id: "polkadot"
|
|
152
|
+
- MATIC/matic → id: "matic-network"
|
|
153
|
+
- LINK/link → id: "chainlink"
|
|
154
|
+
- UNI/uni → id: "uniswap"
|
|
155
|
+
- AAVE/aave → id: "aave"
|
|
156
|
+
- Default currency: USD | Default timeframe: 24h
|
|
157
|
+
- For contract addresses (0x...), use contract lookup endpoints
|
|
158
|
+
|
|
159
|
+
## Security-First Routing
|
|
160
|
+
|
|
161
|
+
- **Top 20 tokens** (BTC, ETH, SOL, BNB, XRP, ADA, AVAX, DOT, MATIC, LINK, UNI, AAVE, etc.): direct price lookup OK
|
|
162
|
+
- **Any other token**: ALWAYS check security first via get_security_risk_endpoints before presenting data
|
|
163
|
+
- **Always warn about**: honeypots, concentrated holders (>50% held by top 10), unverified contracts, low liquidity
|
|
164
|
+
|
|
165
|
+
## Query Classification
|
|
166
|
+
|
|
167
|
+
- **LOOKUP** (2-3 calls): single asset price, basic token info → category + schema + invoke
|
|
168
|
+
- **ANALYSIS** (3-5 calls): multi-asset comparison, wallet analysis, DeFi research → category + schema + 2-3 invokes
|
|
169
|
+
- **EXPLORATION** (4-6 calls): discovery, trending, complex research → multiple categories + invokes
|
|
170
|
+
|
|
171
|
+
${buildSkillsSection()}
|
|
172
|
+
|
|
173
|
+
## Behavior
|
|
174
|
+
|
|
175
|
+
- Prioritize accuracy — never fabricate data or make up numbers
|
|
176
|
+
- Use professional, data-driven tone without hype or shilling
|
|
177
|
+
- For research tasks, be thorough but efficient
|
|
178
|
+
- Always note data freshness (Hive data is near real-time but may have slight delays)
|
|
179
|
+
- Acknowledge limitations of on-chain data (e.g., CEX balances not visible)
|
|
180
|
+
- Never ask users to provide raw data or reference JSON/API internals
|
|
181
|
+
- If data is incomplete, answer with what you have without exposing implementation details
|
|
182
|
+
|
|
183
|
+
## Response Format
|
|
184
|
+
|
|
185
|
+
- Keep casual responses brief and direct
|
|
186
|
+
- For research: lead with the key finding and include specific data points
|
|
187
|
+
- For price lookups: include price, 24h change, and market cap when available
|
|
188
|
+
- For comparisons: use tables with key metrics
|
|
189
|
+
- Don't narrate your actions or ask leading questions
|
|
190
|
+
- Do not use markdown headers or *italics* - use **bold** sparingly for emphasis
|
|
191
|
+
- Include data source attribution when relevant (e.g., "via CoinGecko", "via DefiLlama")
|
|
192
|
+
|
|
193
|
+
## Tables (for comparative/tabular data)
|
|
194
|
+
|
|
195
|
+
Use markdown tables. They will be rendered as formatted box tables.
|
|
196
|
+
|
|
197
|
+
STRICT FORMAT - each row must:
|
|
198
|
+
- Start with | and end with |
|
|
199
|
+
- Have no trailing spaces after the final |
|
|
200
|
+
- Use |---| separator (with optional : for alignment)
|
|
201
|
+
|
|
202
|
+
| Token | Price | 24h | MCap |
|
|
203
|
+
|-------|----------|--------|-------|
|
|
204
|
+
| BTC | $67,420 | +2.3% | 1.3T |
|
|
205
|
+
|
|
206
|
+
Keep tables compact:
|
|
207
|
+
- Max 3-4 columns; prefer multiple small tables over one wide table
|
|
208
|
+
- Use symbols/tickers not full names: "BTC" not "Bitcoin"
|
|
209
|
+
- Numbers compact: 1.3T not $1,300,000,000,000
|
|
210
|
+
- Omit units in cells if header has them
|
|
211
|
+
- Use color hints: prefix positive changes with +, negative with -`;
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
// ============================================================================
|
|
215
|
+
// User Prompts
|
|
216
|
+
// ============================================================================
|
|
217
|
+
|
|
218
|
+
/**
|
|
219
|
+
* Build user prompt for agent iteration with full tool results.
|
|
220
|
+
* Anthropic-style: full results in context for accurate decision-making.
|
|
221
|
+
* Context clearing happens at threshold, not inline summarization.
|
|
222
|
+
*
|
|
223
|
+
* @param originalQuery - The user's original query
|
|
224
|
+
* @param fullToolResults - Formatted full tool results (or placeholder for cleared)
|
|
225
|
+
* @param toolUsageStatus - Optional tool usage status for graceful exit mechanism
|
|
226
|
+
*/
|
|
227
|
+
export function buildIterationPrompt(
|
|
228
|
+
originalQuery: string,
|
|
229
|
+
fullToolResults: string,
|
|
230
|
+
toolUsageStatus?: string | null
|
|
231
|
+
): string {
|
|
232
|
+
let prompt = `Query: ${originalQuery}`;
|
|
233
|
+
|
|
234
|
+
if (fullToolResults.trim()) {
|
|
235
|
+
prompt += `
|
|
236
|
+
|
|
237
|
+
Data retrieved from tool calls:
|
|
238
|
+
${fullToolResults}`;
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
// Add tool usage status if available (graceful exit mechanism)
|
|
242
|
+
if (toolUsageStatus) {
|
|
243
|
+
prompt += `\n\n${toolUsageStatus}`;
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
prompt += `
|
|
247
|
+
|
|
248
|
+
Continue working toward answering the query. If you have gathered actual content (not just links or titles), you may respond. For browser tasks: seeing a link is NOT the same as reading it - you must click through (using the ref) OR navigate to its visible /url value. NEVER guess at URLs - use ONLY URLs visible in snapshots.`;
|
|
249
|
+
|
|
250
|
+
return prompt;
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
// ============================================================================
|
|
254
|
+
// Final Answer Generation
|
|
255
|
+
// ============================================================================
|
|
256
|
+
|
|
257
|
+
/**
|
|
258
|
+
* Build the prompt for final answer generation with full context data.
|
|
259
|
+
* This is used after context compaction - full data is loaded from disk for the final answer.
|
|
260
|
+
*/
|
|
261
|
+
export function buildFinalAnswerPrompt(
|
|
262
|
+
originalQuery: string,
|
|
263
|
+
fullContextData: string
|
|
264
|
+
): string {
|
|
265
|
+
return `Query: ${originalQuery}
|
|
266
|
+
|
|
267
|
+
Data retrieved from your tool calls:
|
|
268
|
+
${fullContextData}
|
|
269
|
+
|
|
270
|
+
Answer the user's query using this data. Do not ask the user to provide additional data, paste values, or reference JSON/API internals. If data is incomplete, answer with what you have.`;
|
|
271
|
+
}
|