brownian-code 2026.2.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (120) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +97 -0
  3. package/bin/brownian +25 -0
  4. package/env.example +21 -0
  5. package/package.json +87 -0
  6. package/src/agent/agent.test.ts +414 -0
  7. package/src/agent/agent.ts +385 -0
  8. package/src/agent/index.ts +27 -0
  9. package/src/agent/prompts.ts +271 -0
  10. package/src/agent/scratchpad.test.ts +482 -0
  11. package/src/agent/scratchpad.ts +526 -0
  12. package/src/agent/token-counter.test.ts +59 -0
  13. package/src/agent/token-counter.ts +33 -0
  14. package/src/agent/types.ts +137 -0
  15. package/src/cli.tsx +385 -0
  16. package/src/commands/builtin.test.ts +271 -0
  17. package/src/commands/builtin.ts +200 -0
  18. package/src/commands/registry.test.ts +188 -0
  19. package/src/commands/registry.ts +111 -0
  20. package/src/commands/types.ts +64 -0
  21. package/src/components/AgentEventView.tsx +487 -0
  22. package/src/components/AnswerBox.tsx +81 -0
  23. package/src/components/ApiKeyPrompt.tsx +75 -0
  24. package/src/components/CommandMenu.test.tsx +64 -0
  25. package/src/components/CommandMenu.tsx +38 -0
  26. package/src/components/CursorText.tsx +43 -0
  27. package/src/components/DebugPanel.tsx +48 -0
  28. package/src/components/ErrorBox.test.tsx +58 -0
  29. package/src/components/ErrorBox.tsx +26 -0
  30. package/src/components/HelpView.test.tsx +70 -0
  31. package/src/components/HelpView.tsx +61 -0
  32. package/src/components/HistoryItemView.tsx +108 -0
  33. package/src/components/Input.tsx +193 -0
  34. package/src/components/Intro.test.tsx +59 -0
  35. package/src/components/Intro.tsx +35 -0
  36. package/src/components/ModelSelector.tsx +288 -0
  37. package/src/components/StatusBar.test.tsx +78 -0
  38. package/src/components/StatusBar.tsx +56 -0
  39. package/src/components/WorkingIndicator.tsx +133 -0
  40. package/src/components/index.ts +23 -0
  41. package/src/e2e/agent-flow.test.ts +378 -0
  42. package/src/evals/components/EvalApp.tsx +206 -0
  43. package/src/evals/components/EvalCurrentQuestion.tsx +42 -0
  44. package/src/evals/components/EvalProgress.tsx +33 -0
  45. package/src/evals/components/EvalRecentResults.tsx +63 -0
  46. package/src/evals/components/EvalStats.tsx +49 -0
  47. package/src/evals/components/index.ts +5 -0
  48. package/src/evals/dataset/crypto_agent.csv +16 -0
  49. package/src/evals/run.ts +355 -0
  50. package/src/gateway/channels/whatsapp/auth-store.ts +15 -0
  51. package/src/gateway/channels/whatsapp/inbound.ts +86 -0
  52. package/src/gateway/channels/whatsapp/login.ts +28 -0
  53. package/src/gateway/channels/whatsapp/outbound.ts +27 -0
  54. package/src/gateway/channels/whatsapp/session.ts +69 -0
  55. package/src/gateway/config.ts +81 -0
  56. package/src/gateway/index.ts +62 -0
  57. package/src/hooks/useAgentRunner.ts +317 -0
  58. package/src/hooks/useDebugLogs.ts +22 -0
  59. package/src/hooks/useInputHistory.ts +106 -0
  60. package/src/hooks/useModelSelection.ts +249 -0
  61. package/src/hooks/useTextBuffer.test.ts +121 -0
  62. package/src/hooks/useTextBuffer.ts +97 -0
  63. package/src/index.tsx +74 -0
  64. package/src/mcp/cache.ts +205 -0
  65. package/src/mcp/client.test.ts +126 -0
  66. package/src/mcp/client.ts +145 -0
  67. package/src/mcp/index.ts +2 -0
  68. package/src/model/llm.test.ts +158 -0
  69. package/src/model/llm.ts +233 -0
  70. package/src/providers.ts +94 -0
  71. package/src/skills/index.ts +17 -0
  72. package/src/skills/loader.ts +73 -0
  73. package/src/skills/registry.ts +125 -0
  74. package/src/skills/types.ts +31 -0
  75. package/src/test-utils/mocks.ts +110 -0
  76. package/src/theme.ts +21 -0
  77. package/src/tools/browser/browser.ts +357 -0
  78. package/src/tools/browser/index.ts +1 -0
  79. package/src/tools/crypto/hive-tools.ts +171 -0
  80. package/src/tools/crypto/index.ts +1 -0
  81. package/src/tools/descriptions/browser.ts +105 -0
  82. package/src/tools/descriptions/crypto-search.ts +58 -0
  83. package/src/tools/descriptions/index.ts +8 -0
  84. package/src/tools/descriptions/web-fetch.ts +44 -0
  85. package/src/tools/descriptions/web-search.ts +26 -0
  86. package/src/tools/fetch/cache.ts +95 -0
  87. package/src/tools/fetch/external-content.ts +200 -0
  88. package/src/tools/fetch/index.ts +1 -0
  89. package/src/tools/fetch/web-fetch-utils.ts +122 -0
  90. package/src/tools/fetch/web-fetch.ts +371 -0
  91. package/src/tools/index.ts +12 -0
  92. package/src/tools/registry.ts +130 -0
  93. package/src/tools/search/exa.ts +43 -0
  94. package/src/tools/search/index.ts +2 -0
  95. package/src/tools/search/tavily.ts +35 -0
  96. package/src/tools/skill.ts +62 -0
  97. package/src/tools/types.ts +53 -0
  98. package/src/utils/ai-message.ts +26 -0
  99. package/src/utils/config.ts +54 -0
  100. package/src/utils/cost-calculator.test.ts +101 -0
  101. package/src/utils/cost-calculator.ts +74 -0
  102. package/src/utils/env.ts +101 -0
  103. package/src/utils/error-classifier.test.ts +146 -0
  104. package/src/utils/error-classifier.ts +91 -0
  105. package/src/utils/in-memory-chat-history.test.ts +291 -0
  106. package/src/utils/in-memory-chat-history.ts +224 -0
  107. package/src/utils/index.ts +19 -0
  108. package/src/utils/input-key-handlers.test.ts +155 -0
  109. package/src/utils/input-key-handlers.ts +64 -0
  110. package/src/utils/logger.ts +67 -0
  111. package/src/utils/long-term-chat-history.ts +138 -0
  112. package/src/utils/markdown-table.ts +227 -0
  113. package/src/utils/ollama.ts +37 -0
  114. package/src/utils/progress-channel.ts +84 -0
  115. package/src/utils/text-navigation.test.ts +222 -0
  116. package/src/utils/text-navigation.ts +81 -0
  117. package/src/utils/thinking-verbs.ts +29 -0
  118. package/src/utils/tokens.test.ts +163 -0
  119. package/src/utils/tokens.ts +67 -0
  120. package/src/utils/tool-description.ts +88 -0
@@ -0,0 +1,385 @@
1
+ import { AIMessage } from '@langchain/core/messages';
2
+ import { StructuredToolInterface } from '@langchain/core/tools';
3
+ import { callLlm } from '../model/llm.js';
4
+ import { Scratchpad, type ToolContext } from './scratchpad.js';
5
+ import { getTools } from '../tools/registry.js';
6
+ import { buildSystemPrompt, buildIterationPrompt, buildFinalAnswerPrompt } from '../agent/prompts.js';
7
+ import { extractTextContent, hasToolCalls } from '../utils/ai-message.js';
8
+ import { InMemoryChatHistory } from '../utils/in-memory-chat-history.js';
9
+ import { getToolDescription } from '../utils/tool-description.js';
10
+ import { estimateTokens, getContextThreshold, KEEP_TOOL_USES } from '../utils/tokens.js';
11
+ import { createProgressChannel } from '../utils/progress-channel.js';
12
+ import type { AgentConfig, AgentEvent, ToolStartEvent, ToolProgressEvent, ToolEndEvent, ToolErrorEvent, ToolLimitEvent, ContextClearedEvent, TokenUsage } from '../agent/types.js';
13
+ import { TokenCounter } from './token-counter.js';
14
+
15
+
16
+ const DEFAULT_MAX_ITERATIONS = 10;
17
+
18
+ /**
19
+ * The core agent class that handles the agent loop and tool execution.
20
+ */
21
+ export class Agent {
22
+ private readonly model: string;
23
+ private readonly modelProvider: string;
24
+ private readonly maxIterations: number;
25
+ private readonly tools: StructuredToolInterface[];
26
+ private readonly toolMap: Map<string, StructuredToolInterface>;
27
+ private readonly systemPrompt: string;
28
+ private readonly signal?: AbortSignal;
29
+
30
+ private constructor(
31
+ config: AgentConfig,
32
+ tools: StructuredToolInterface[],
33
+ systemPrompt: string
34
+ ) {
35
+ this.model = config.model ?? 'claude-sonnet-4-5';
36
+ this.modelProvider = config.modelProvider ?? 'anthropic';
37
+ this.maxIterations = config.maxIterations ?? DEFAULT_MAX_ITERATIONS;
38
+ this.tools = tools;
39
+ this.toolMap = new Map(tools.map(t => [t.name, t]));
40
+ this.systemPrompt = systemPrompt;
41
+ this.signal = config.signal;
42
+ }
43
+
44
+ /**
45
+ * Create a new Agent instance with tools.
46
+ */
47
+ static create(config: AgentConfig = {}): Agent {
48
+ const model = config.model ?? 'claude-sonnet-4-5';
49
+ const tools = getTools(model);
50
+ const systemPrompt = buildSystemPrompt(model);
51
+ return new Agent(config, tools, systemPrompt);
52
+ }
53
+
54
+ /**
55
+ * Run the agent and yield events for real-time UI updates.
56
+ * Anthropic-style context management: full tool results during iteration,
57
+ * with threshold-based clearing of oldest results when context exceeds limit.
58
+ */
59
+ async *run(query: string, inMemoryHistory?: InMemoryChatHistory): AsyncGenerator<AgentEvent> {
60
+ const startTime = Date.now();
61
+ const tokenCounter = new TokenCounter();
62
+
63
+ if (this.tools.length === 0) {
64
+ yield { type: 'done', answer: 'No tools available. Please check your API key configuration.', toolCalls: [], iterations: 0, totalTime: Date.now() - startTime };
65
+ return;
66
+ }
67
+
68
+ // Create scratchpad for this query - single source of truth for all work done
69
+ const scratchpad = new Scratchpad(query);
70
+
71
+ // Build initial prompt with conversation history context
72
+ let currentPrompt = await this.buildInitialPrompt(query, inMemoryHistory);
73
+
74
+ let iteration = 0;
75
+
76
+ const contextThreshold = getContextThreshold(this.model);
77
+
78
+ // Main agent loop
79
+ while (iteration < this.maxIterations) {
80
+ iteration++;
81
+
82
+ // Pre-flight context check: trim before sending to model
83
+ const estimatedPreFlight = estimateTokens(this.systemPrompt + currentPrompt);
84
+ if (estimatedPreFlight > contextThreshold) {
85
+ const clearedCount = scratchpad.clearOldestToolResults(KEEP_TOOL_USES);
86
+ if (clearedCount > 0) {
87
+ yield { type: 'context_cleared', clearedCount, keptCount: KEEP_TOOL_USES } as ContextClearedEvent;
88
+ const trimmedResults = scratchpad.getToolResults();
89
+ currentPrompt = buildIterationPrompt(
90
+ query,
91
+ trimmedResults,
92
+ scratchpad.formatToolUsageForPrompt()
93
+ );
94
+ }
95
+ }
96
+
97
+ const { response, usage } = await this.callModel(currentPrompt);
98
+ tokenCounter.add(usage);
99
+ const responseText = typeof response === 'string' ? response : extractTextContent(response);
100
+
101
+ // Emit thinking if there are also tool calls (skip whitespace-only responses)
102
+ if (responseText?.trim() && typeof response !== 'string' && hasToolCalls(response)) {
103
+ const trimmedText = responseText.trim();
104
+ scratchpad.addThinking(trimmedText);
105
+ yield { type: 'thinking', message: trimmedText };
106
+ }
107
+
108
+ // No tool calls = ready to generate final answer
109
+ if (typeof response === 'string' || !hasToolCalls(response)) {
110
+ // If no tools were called at all, just use the direct response
111
+ // This handles greetings, clarifying questions, etc.
112
+ if (!scratchpad.hasToolResults() && responseText) {
113
+ yield { type: 'answer_start' };
114
+ const totalTime = Date.now() - startTime;
115
+ yield { type: 'done', answer: responseText, toolCalls: [], iterations: iteration, totalTime, tokenUsage: tokenCounter.getUsage(), tokensPerSecond: tokenCounter.getTokensPerSecond(totalTime) };
116
+ return;
117
+ }
118
+
119
+ // Generate final answer with full context from scratchpad
120
+ const fullContext = this.buildFullContextForAnswer(query, scratchpad);
121
+ const finalPrompt = buildFinalAnswerPrompt(query, fullContext);
122
+
123
+ yield { type: 'answer_start' };
124
+ const { response: finalResponse, usage: finalUsage } = await this.callModel(finalPrompt, false);
125
+ tokenCounter.add(finalUsage);
126
+ const answer = typeof finalResponse === 'string'
127
+ ? finalResponse
128
+ : extractTextContent(finalResponse);
129
+
130
+ const totalTime = Date.now() - startTime;
131
+ yield { type: 'done', answer, toolCalls: scratchpad.getToolCallRecords(), iterations: iteration, totalTime, tokenUsage: tokenCounter.getUsage(), tokensPerSecond: tokenCounter.getTokensPerSecond(totalTime) };
132
+ return;
133
+ }
134
+
135
+ // Execute tools and add results to scratchpad (response is AIMessage here)
136
+ const generator = this.executeToolCalls(response, query, scratchpad);
137
+ let result = await generator.next();
138
+
139
+ // Yield tool events
140
+ while (!result.done) {
141
+ yield result.value;
142
+ result = await generator.next();
143
+ }
144
+
145
+ // Anthropic-style context management: get full tool results
146
+ let fullToolResults = scratchpad.getToolResults();
147
+
148
+ // Check context threshold and clear oldest tool results if needed
149
+ const estimatedContextTokens = estimateTokens(this.systemPrompt + query + fullToolResults);
150
+ if (estimatedContextTokens > contextThreshold) {
151
+ const clearedCount = scratchpad.clearOldestToolResults(KEEP_TOOL_USES);
152
+ if (clearedCount > 0) {
153
+ yield { type: 'context_cleared', clearedCount, keptCount: KEEP_TOOL_USES } as ContextClearedEvent;
154
+ // Re-fetch after clearing
155
+ fullToolResults = scratchpad.getToolResults();
156
+ }
157
+ }
158
+
159
+ // Build iteration prompt with full tool results (Anthropic-style)
160
+ currentPrompt = buildIterationPrompt(
161
+ query,
162
+ fullToolResults,
163
+ scratchpad.formatToolUsageForPrompt()
164
+ );
165
+ }
166
+
167
+ // Max iterations reached - still generate proper final answer
168
+ const fullContext = this.buildFullContextForAnswer(query, scratchpad);
169
+ const finalPrompt = buildFinalAnswerPrompt(query, fullContext);
170
+
171
+ yield { type: 'answer_start' };
172
+ const { response: finalResponse, usage: finalUsage } = await this.callModel(finalPrompt, false);
173
+ tokenCounter.add(finalUsage);
174
+ const answer = typeof finalResponse === 'string'
175
+ ? finalResponse
176
+ : extractTextContent(finalResponse);
177
+
178
+ const totalTime = Date.now() - startTime;
179
+ yield {
180
+ type: 'done',
181
+ answer: answer || `Reached maximum iterations (${this.maxIterations}).`,
182
+ toolCalls: scratchpad.getToolCallRecords(),
183
+ iterations: iteration,
184
+ totalTime,
185
+ tokenUsage: tokenCounter.getUsage(),
186
+ tokensPerSecond: tokenCounter.getTokensPerSecond(totalTime)
187
+ };
188
+ }
189
+
190
+ /**
191
+ * Call the LLM with the current prompt.
192
+ * @param prompt - The prompt to send to the LLM
193
+ * @param useTools - Whether to bind tools (default: true). When false, returns string directly.
194
+ */
195
+ private async callModel(prompt: string, useTools: boolean = true): Promise<{ response: AIMessage | string; usage?: TokenUsage }> {
196
+ const result = await callLlm(prompt, {
197
+ model: this.model,
198
+ systemPrompt: this.systemPrompt,
199
+ tools: useTools ? this.tools : undefined,
200
+ signal: this.signal,
201
+ });
202
+ return { response: result.response, usage: result.usage };
203
+ }
204
+
205
+ /**
206
+ * Execute all tool calls from an LLM response and add results to scratchpad.
207
+ * Deduplicates skill calls - each skill can only be executed once per query.
208
+ * Includes graceful exit mechanism - checks tool limits before executing.
209
+ */
210
+ private async *executeToolCalls(
211
+ response: AIMessage,
212
+ query: string,
213
+ scratchpad: Scratchpad
214
+ ): AsyncGenerator<ToolStartEvent | ToolProgressEvent | ToolEndEvent | ToolErrorEvent | ToolLimitEvent, void> {
215
+ for (const toolCall of response.tool_calls!) {
216
+ const toolName = toolCall.name;
217
+ const toolArgs = toolCall.args as Record<string, unknown>;
218
+
219
+ // Deduplicate skill calls - each skill can only run once per query
220
+ if (toolName === 'skill') {
221
+ const skillName = toolArgs.skill as string;
222
+ if (scratchpad.hasExecutedSkill(skillName)) continue;
223
+ }
224
+
225
+ const generator = this.executeToolCall(toolName, toolArgs, query, scratchpad);
226
+ let result = await generator.next();
227
+
228
+ while (!result.done) {
229
+ yield result.value;
230
+ result = await generator.next();
231
+ }
232
+ }
233
+ }
234
+
235
+ /**
236
+ * Execute a single tool call and add result to scratchpad.
237
+ * Yields start/end/error events for UI updates.
238
+ * Includes soft limit warnings to guide the LLM.
239
+ */
240
+ private async *executeToolCall(
241
+ toolName: string,
242
+ toolArgs: Record<string, unknown>,
243
+ query: string,
244
+ scratchpad: Scratchpad
245
+ ): AsyncGenerator<ToolStartEvent | ToolProgressEvent | ToolEndEvent | ToolErrorEvent | ToolLimitEvent, void> {
246
+ // Extract query string from tool args for similarity detection
247
+ const toolQuery = this.extractQueryFromArgs(toolArgs);
248
+
249
+ // Check tool limits - yields warning if approaching/over limits
250
+ const limitCheck = scratchpad.canCallTool(toolName, toolQuery);
251
+
252
+ if (limitCheck.warning) {
253
+ yield {
254
+ type: 'tool_limit',
255
+ tool: toolName,
256
+ warning: limitCheck.warning,
257
+ blocked: false
258
+ };
259
+ }
260
+
261
+ yield { type: 'tool_start', tool: toolName, args: toolArgs };
262
+
263
+ const toolStartTime = Date.now();
264
+
265
+ try {
266
+ const tool = this.toolMap.get(toolName);
267
+ if (!tool) {
268
+ throw new Error(`Tool '${toolName}' not found`);
269
+ }
270
+
271
+ // Create a progress channel so subagent tools can stream status updates
272
+ const channel = createProgressChannel();
273
+ const config = {
274
+ metadata: { onProgress: channel.emit },
275
+ ...(this.signal ? { signal: this.signal } : {}),
276
+ };
277
+
278
+ // Launch tool invocation -- closes the channel when it settles
279
+ const toolPromise = tool.invoke(toolArgs, config).then(
280
+ (raw) => { channel.close(); return raw; },
281
+ (err) => { channel.close(); throw err; },
282
+ );
283
+
284
+ // Drain progress events in real-time as the tool executes
285
+ for await (const message of channel) {
286
+ yield { type: 'tool_progress', tool: toolName, message } as ToolProgressEvent;
287
+ }
288
+
289
+ // Tool has finished -- collect the result
290
+ const rawResult = await toolPromise;
291
+ const result = typeof rawResult === 'string' ? rawResult : JSON.stringify(rawResult);
292
+ const duration = Date.now() - toolStartTime;
293
+
294
+ yield { type: 'tool_end', tool: toolName, args: toolArgs, result, duration };
295
+
296
+ // Record the tool call for limit tracking
297
+ scratchpad.recordToolCall(toolName, toolQuery);
298
+
299
+ // Add full tool result to scratchpad (Anthropic-style: no inline summarization)
300
+ scratchpad.addToolResult(toolName, toolArgs, result);
301
+ } catch (error) {
302
+ const errorMessage = error instanceof Error ? error.message : String(error);
303
+ yield { type: 'tool_error', tool: toolName, error: errorMessage };
304
+
305
+ // Still record the call even on error (counts toward limit)
306
+ scratchpad.recordToolCall(toolName, toolQuery);
307
+
308
+ // Add error to scratchpad
309
+ scratchpad.addToolResult(toolName, toolArgs, `Error: ${errorMessage}`);
310
+ }
311
+ }
312
+
313
+ /**
314
+ * Extract query string from tool arguments for similarity detection.
315
+ * Looks for common query-like argument names.
316
+ */
317
+ private extractQueryFromArgs(args: Record<string, unknown>): string | undefined {
318
+ const queryKeys = ['query', 'search', 'question', 'q', 'text', 'input'];
319
+
320
+ for (const key of queryKeys) {
321
+ if (typeof args[key] === 'string') {
322
+ return args[key] as string;
323
+ }
324
+ }
325
+
326
+ return undefined;
327
+ }
328
+
329
+ /**
330
+ * Build initial prompt with conversation history context if available.
331
+ * Uses LLM-based relevance selection to include only pertinent history.
332
+ */
333
+ private async buildInitialPrompt(
334
+ query: string,
335
+ inMemoryChatHistory?: InMemoryChatHistory
336
+ ): Promise<string> {
337
+ if (!inMemoryChatHistory?.hasMessages()) {
338
+ return query;
339
+ }
340
+
341
+ const relevantMessages = await inMemoryChatHistory.selectRelevantMessages(query);
342
+ if (relevantMessages.length === 0) {
343
+ return query;
344
+ }
345
+
346
+ const historyContext = inMemoryChatHistory.formatForPlanning(relevantMessages);
347
+ return `Current query to answer: ${query}\n\nRelevant conversation history:\n${historyContext}`;
348
+ }
349
+
350
+ /**
351
+ * Build full context data for final answer generation from scratchpad.
352
+ * Uses only active (non-cleared) tool results — cleared entries were removed
353
+ * because context was too large, so re-including them would cause the same overflow.
354
+ */
355
+ private buildFullContextForAnswer(_query: string, scratchpad: Scratchpad): string {
356
+ const contexts = scratchpad.getActiveToolResults();
357
+
358
+ if (contexts.length === 0) {
359
+ return 'No data was gathered.';
360
+ }
361
+
362
+ // Filter out error results
363
+ const validContexts = contexts.filter(ctx => !ctx.result.startsWith('Error:'));
364
+
365
+ if (validContexts.length === 0) {
366
+ return 'No data was successfully gathered.';
367
+ }
368
+
369
+ // Format all contexts with full data
370
+ return validContexts.map(ctx => this.formatToolContext(ctx)).join('\n\n');
371
+ }
372
+
373
+ /**
374
+ * Format a single tool context entry for the final answer.
375
+ */
376
+ private formatToolContext(ctx: ToolContext): string {
377
+ const description = getToolDescription(ctx.toolName, ctx.args);
378
+ try {
379
+ return `### ${description}\n\`\`\`json\n${JSON.stringify(JSON.parse(ctx.result), null, 2)}\n\`\`\``;
380
+ } catch {
381
+ // If result is not valid JSON, return as-is
382
+ return `### ${description}\n${ctx.result}`;
383
+ }
384
+ }
385
+ }
@@ -0,0 +1,27 @@
1
+ export { Agent } from './agent.js';
2
+
3
+ export { Scratchpad } from './scratchpad.js';
4
+
5
+ export { getCurrentDate, buildSystemPrompt, buildIterationPrompt, DEFAULT_SYSTEM_PROMPT } from './prompts.js';
6
+
7
+ export type {
8
+ AgentConfig,
9
+ Message,
10
+ AgentEvent,
11
+ ThinkingEvent,
12
+ ToolStartEvent,
13
+ ToolProgressEvent,
14
+ ToolEndEvent,
15
+ ToolErrorEvent,
16
+ ToolLimitEvent,
17
+ AnswerStartEvent,
18
+ DoneEvent,
19
+ } from './types.js';
20
+
21
+ export type {
22
+ ToolCallRecord,
23
+ ToolContext,
24
+ ScratchpadEntry,
25
+ ToolLimitConfig,
26
+ ToolUsageStatus,
27
+ } from './scratchpad.js';
@@ -0,0 +1,271 @@
1
+ import { buildToolDescriptions } from '../tools/registry.js';
2
+ import { buildSkillMetadataSection, discoverSkills } from '../skills/index.js';
3
+
4
+ // ============================================================================
5
+ // Helper Functions
6
+ // ============================================================================
7
+
8
+ /**
9
+ * Returns the current date formatted for prompts.
10
+ */
11
+ export function getCurrentDate(): string {
12
+ const options: Intl.DateTimeFormatOptions = {
13
+ weekday: 'long',
14
+ year: 'numeric',
15
+ month: 'long',
16
+ day: 'numeric',
17
+ };
18
+ return new Date().toLocaleDateString('en-US', options);
19
+ }
20
+
21
+ /**
22
+ * Build the skills section for the system prompt.
23
+ * Only includes skill metadata if skills are available.
24
+ */
25
+ function buildSkillsSection(): string {
26
+ const skills = discoverSkills();
27
+
28
+ if (skills.length === 0) {
29
+ return '';
30
+ }
31
+
32
+ const skillList = buildSkillMetadataSection();
33
+
34
+ return `## Available Skills
35
+
36
+ ${skillList}
37
+
38
+ ## Skill Usage Policy
39
+
40
+ - Check if available skills can help complete the task more effectively
41
+ - When a skill is relevant, invoke it IMMEDIATELY as your first action
42
+ - Skills provide specialized workflows for complex tasks
43
+ - Do not invoke a skill that has already been invoked for the current query`;
44
+ }
45
+
46
+ // ============================================================================
47
+ // Default System Prompt (for backward compatibility)
48
+ // ============================================================================
49
+
50
+ /**
51
+ * Default system prompt used when no specific prompt is provided.
52
+ */
53
+ export const DEFAULT_SYSTEM_PROMPT = `You are Brownian Code, an AI agent for crypto research.
54
+
55
+ Current date: ${getCurrentDate()}
56
+
57
+ Your output is displayed on a command line interface. Keep responses short and concise.
58
+
59
+ ## Behavior
60
+
61
+ - Prioritize accuracy over validation
62
+ - Use professional, data-driven tone
63
+ - Never hype or shill tokens
64
+
65
+ ## Response Format
66
+
67
+ - Keep responses brief and direct
68
+ - For non-comparative information, prefer plain text or simple lists over tables
69
+ - Do not use markdown headers or *italics* - use **bold** sparingly for emphasis
70
+
71
+ ## Tables (for comparative/tabular data)
72
+
73
+ Use markdown tables. They will be rendered as formatted box tables.
74
+
75
+ STRICT FORMAT - each row must:
76
+ - Start with | and end with |
77
+ - Have no trailing spaces after the final |
78
+ - Use |---| separator (with optional : for alignment)
79
+
80
+ | Token | Price | 24h | MCap |
81
+ |-------|----------|--------|-------|
82
+ | BTC | $67,420 | +2.3% | 1.3T |
83
+
84
+ Keep tables compact:
85
+ - Max 3-4 columns; prefer multiple small tables over one wide table
86
+ - Use symbols/tickers not full names: "BTC" not "Bitcoin"
87
+ - Numbers compact: 1.3T not $1,300,000,000,000
88
+ - Omit units in cells if header has them`;
89
+
90
+ // ============================================================================
91
+ // System Prompt
92
+ // ============================================================================
93
+
94
+ /**
95
+ * Build the system prompt for the agent.
96
+ * @param model - The model name (used to get appropriate tool descriptions)
97
+ */
98
+ export function buildSystemPrompt(model: string): string {
99
+ const toolDescriptions = buildToolDescriptions(model);
100
+
101
+ return `You are Brownian Code, a CLI agent for crypto research. You have access to 227+ crypto data tools via Hive Intelligence, covering market data, DeFi, wallets, security, NFTs, and more.
102
+
103
+ Current date: ${getCurrentDate()}
104
+
105
+ Your output is displayed on a command line interface. Keep responses short and concise.
106
+
107
+ ## Available Tools
108
+
109
+ ${toolDescriptions}
110
+
111
+ ## Tool Usage Policy
112
+
113
+ - **Category-first routing**: classify intent → call category endpoint → get schema → invoke endpoint
114
+ - **Common shortcuts**: skip schema for well-known endpoints (get_protocol_tvl, get_defi_protocol, get_token_security, check_malicious_address)
115
+ - **Tool budget**: aim for 3-5 tool calls per query (1 category + 1 schema + 1-3 invokes)
116
+ - **Prefer aggregate endpoints**: use coins_market_data_browser for multi-coin comparisons instead of multiple simple_price calls
117
+ - Only use tools when the query actually requires external data
118
+ - Use web_fetch for reading web pages, articles, and documentation
119
+ - Only use browser when you need JavaScript rendering or interactive navigation
120
+ - For general knowledge questions, respond directly without tools
121
+
122
+ ## CRITICAL Tool Rules
123
+
124
+ - **NEVER** call the same tool twice with the same or similar arguments
125
+ - **NEVER** call a category tool (get_*_endpoints) more than once per query
126
+ - invoke_api_endpoint takes \`endpoint_name\` and \`arguments\` — e.g. \`{ "endpoint_name": "get_protocol_tvl", "arguments": { "protocol": "lido" } }\`
127
+ - get_api_endpoint_schema takes \`endpoint_name\` — e.g. \`{ "endpoint_name": "get_token_security" }\`
128
+ - If a tool call returns an error, do NOT retry with the same arguments — adjust or try a different endpoint
129
+
130
+ ## Correct Call Pattern Examples
131
+
132
+ **TVL lookup** (1 call — well-known shortcut):
133
+ → invoke_api_endpoint({ endpoint_name: "get_protocol_tvl", arguments: { protocol: "lido" } })
134
+
135
+ **DeFi protocol detail** (3 calls):
136
+ → get_defi_protocol_endpoints() → get_api_endpoint_schema({ endpoint_name: "get_defi_protocol" }) → invoke_api_endpoint({ endpoint_name: "get_defi_protocol", arguments: { protocol: "aave" } })
137
+
138
+ **Security audit** (3 calls):
139
+ → get_security_risk_endpoints() → get_api_endpoint_schema({ endpoint_name: "get_token_security" }) → invoke_api_endpoint({ endpoint_name: "get_token_security", arguments: { chainId: "1", contract_addresses: "0x..." } })
140
+
141
+ ## Entity Resolution
142
+
143
+ When users mention crypto assets, resolve to standard IDs:
144
+ - BTC/btc/Bitcoin → id: "bitcoin"
145
+ - ETH/eth/Ethereum → id: "ethereum"
146
+ - SOL/sol/Solana → id: "solana"
147
+ - BNB/bnb → id: "binancecoin"
148
+ - XRP/xrp → id: "ripple"
149
+ - ADA/ada/Cardano → id: "cardano"
150
+ - AVAX/avax → id: "avalanche-2"
151
+ - DOT/dot → id: "polkadot"
152
+ - MATIC/matic → id: "matic-network"
153
+ - LINK/link → id: "chainlink"
154
+ - UNI/uni → id: "uniswap"
155
+ - AAVE/aave → id: "aave"
156
+ - Default currency: USD | Default timeframe: 24h
157
+ - For contract addresses (0x...), use contract lookup endpoints
158
+
159
+ ## Security-First Routing
160
+
161
+ - **Top 20 tokens** (BTC, ETH, SOL, BNB, XRP, ADA, AVAX, DOT, MATIC, LINK, UNI, AAVE, etc.): direct price lookup OK
162
+ - **Any other token**: ALWAYS check security first via get_security_risk_endpoints before presenting data
163
+ - **Always warn about**: honeypots, concentrated holders (>50% held by top 10), unverified contracts, low liquidity
164
+
165
+ ## Query Classification
166
+
167
+ - **LOOKUP** (2-3 calls): single asset price, basic token info → category + schema + invoke
168
+ - **ANALYSIS** (3-5 calls): multi-asset comparison, wallet analysis, DeFi research → category + schema + 2-3 invokes
169
+ - **EXPLORATION** (4-6 calls): discovery, trending, complex research → multiple categories + invokes
170
+
171
+ ${buildSkillsSection()}
172
+
173
+ ## Behavior
174
+
175
+ - Prioritize accuracy — never fabricate data or make up numbers
176
+ - Use professional, data-driven tone without hype or shilling
177
+ - For research tasks, be thorough but efficient
178
+ - Always note data freshness (Hive data is near real-time but may have slight delays)
179
+ - Acknowledge limitations of on-chain data (e.g., CEX balances not visible)
180
+ - Never ask users to provide raw data or reference JSON/API internals
181
+ - If data is incomplete, answer with what you have without exposing implementation details
182
+
183
+ ## Response Format
184
+
185
+ - Keep casual responses brief and direct
186
+ - For research: lead with the key finding and include specific data points
187
+ - For price lookups: include price, 24h change, and market cap when available
188
+ - For comparisons: use tables with key metrics
189
+ - Don't narrate your actions or ask leading questions
190
+ - Do not use markdown headers or *italics* - use **bold** sparingly for emphasis
191
+ - Include data source attribution when relevant (e.g., "via CoinGecko", "via DefiLlama")
192
+
193
+ ## Tables (for comparative/tabular data)
194
+
195
+ Use markdown tables. They will be rendered as formatted box tables.
196
+
197
+ STRICT FORMAT - each row must:
198
+ - Start with | and end with |
199
+ - Have no trailing spaces after the final |
200
+ - Use |---| separator (with optional : for alignment)
201
+
202
+ | Token | Price | 24h | MCap |
203
+ |-------|----------|--------|-------|
204
+ | BTC | $67,420 | +2.3% | 1.3T |
205
+
206
+ Keep tables compact:
207
+ - Max 3-4 columns; prefer multiple small tables over one wide table
208
+ - Use symbols/tickers not full names: "BTC" not "Bitcoin"
209
+ - Numbers compact: 1.3T not $1,300,000,000,000
210
+ - Omit units in cells if header has them
211
+ - Use color hints: prefix positive changes with +, negative with -`;
212
+ }
213
+
214
+ // ============================================================================
215
+ // User Prompts
216
+ // ============================================================================
217
+
218
+ /**
219
+ * Build user prompt for agent iteration with full tool results.
220
+ * Anthropic-style: full results in context for accurate decision-making.
221
+ * Context clearing happens at threshold, not inline summarization.
222
+ *
223
+ * @param originalQuery - The user's original query
224
+ * @param fullToolResults - Formatted full tool results (or placeholder for cleared)
225
+ * @param toolUsageStatus - Optional tool usage status for graceful exit mechanism
226
+ */
227
+ export function buildIterationPrompt(
228
+ originalQuery: string,
229
+ fullToolResults: string,
230
+ toolUsageStatus?: string | null
231
+ ): string {
232
+ let prompt = `Query: ${originalQuery}`;
233
+
234
+ if (fullToolResults.trim()) {
235
+ prompt += `
236
+
237
+ Data retrieved from tool calls:
238
+ ${fullToolResults}`;
239
+ }
240
+
241
+ // Add tool usage status if available (graceful exit mechanism)
242
+ if (toolUsageStatus) {
243
+ prompt += `\n\n${toolUsageStatus}`;
244
+ }
245
+
246
+ prompt += `
247
+
248
+ Continue working toward answering the query. If you have gathered actual content (not just links or titles), you may respond. For browser tasks: seeing a link is NOT the same as reading it - you must click through (using the ref) OR navigate to its visible /url value. NEVER guess at URLs - use ONLY URLs visible in snapshots.`;
249
+
250
+ return prompt;
251
+ }
252
+
253
+ // ============================================================================
254
+ // Final Answer Generation
255
+ // ============================================================================
256
+
257
+ /**
258
+ * Build the prompt for final answer generation with full context data.
259
+ * This is used after context compaction - full data is loaded from disk for the final answer.
260
+ */
261
+ export function buildFinalAnswerPrompt(
262
+ originalQuery: string,
263
+ fullContextData: string
264
+ ): string {
265
+ return `Query: ${originalQuery}
266
+
267
+ Data retrieved from your tool calls:
268
+ ${fullContextData}
269
+
270
+ Answer the user's query using this data. Do not ask the user to provide additional data, paste values, or reference JSON/API internals. If data is incomplete, answer with what you have.`;
271
+ }