brownian-code 2026.2.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (120) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +97 -0
  3. package/bin/brownian +25 -0
  4. package/env.example +21 -0
  5. package/package.json +87 -0
  6. package/src/agent/agent.test.ts +414 -0
  7. package/src/agent/agent.ts +385 -0
  8. package/src/agent/index.ts +27 -0
  9. package/src/agent/prompts.ts +271 -0
  10. package/src/agent/scratchpad.test.ts +482 -0
  11. package/src/agent/scratchpad.ts +526 -0
  12. package/src/agent/token-counter.test.ts +59 -0
  13. package/src/agent/token-counter.ts +33 -0
  14. package/src/agent/types.ts +137 -0
  15. package/src/cli.tsx +385 -0
  16. package/src/commands/builtin.test.ts +271 -0
  17. package/src/commands/builtin.ts +200 -0
  18. package/src/commands/registry.test.ts +188 -0
  19. package/src/commands/registry.ts +111 -0
  20. package/src/commands/types.ts +64 -0
  21. package/src/components/AgentEventView.tsx +487 -0
  22. package/src/components/AnswerBox.tsx +81 -0
  23. package/src/components/ApiKeyPrompt.tsx +75 -0
  24. package/src/components/CommandMenu.test.tsx +64 -0
  25. package/src/components/CommandMenu.tsx +38 -0
  26. package/src/components/CursorText.tsx +43 -0
  27. package/src/components/DebugPanel.tsx +48 -0
  28. package/src/components/ErrorBox.test.tsx +58 -0
  29. package/src/components/ErrorBox.tsx +26 -0
  30. package/src/components/HelpView.test.tsx +70 -0
  31. package/src/components/HelpView.tsx +61 -0
  32. package/src/components/HistoryItemView.tsx +108 -0
  33. package/src/components/Input.tsx +193 -0
  34. package/src/components/Intro.test.tsx +59 -0
  35. package/src/components/Intro.tsx +35 -0
  36. package/src/components/ModelSelector.tsx +288 -0
  37. package/src/components/StatusBar.test.tsx +78 -0
  38. package/src/components/StatusBar.tsx +56 -0
  39. package/src/components/WorkingIndicator.tsx +133 -0
  40. package/src/components/index.ts +23 -0
  41. package/src/e2e/agent-flow.test.ts +378 -0
  42. package/src/evals/components/EvalApp.tsx +206 -0
  43. package/src/evals/components/EvalCurrentQuestion.tsx +42 -0
  44. package/src/evals/components/EvalProgress.tsx +33 -0
  45. package/src/evals/components/EvalRecentResults.tsx +63 -0
  46. package/src/evals/components/EvalStats.tsx +49 -0
  47. package/src/evals/components/index.ts +5 -0
  48. package/src/evals/dataset/crypto_agent.csv +16 -0
  49. package/src/evals/run.ts +355 -0
  50. package/src/gateway/channels/whatsapp/auth-store.ts +15 -0
  51. package/src/gateway/channels/whatsapp/inbound.ts +86 -0
  52. package/src/gateway/channels/whatsapp/login.ts +28 -0
  53. package/src/gateway/channels/whatsapp/outbound.ts +27 -0
  54. package/src/gateway/channels/whatsapp/session.ts +69 -0
  55. package/src/gateway/config.ts +81 -0
  56. package/src/gateway/index.ts +62 -0
  57. package/src/hooks/useAgentRunner.ts +317 -0
  58. package/src/hooks/useDebugLogs.ts +22 -0
  59. package/src/hooks/useInputHistory.ts +106 -0
  60. package/src/hooks/useModelSelection.ts +249 -0
  61. package/src/hooks/useTextBuffer.test.ts +121 -0
  62. package/src/hooks/useTextBuffer.ts +97 -0
  63. package/src/index.tsx +74 -0
  64. package/src/mcp/cache.ts +205 -0
  65. package/src/mcp/client.test.ts +126 -0
  66. package/src/mcp/client.ts +145 -0
  67. package/src/mcp/index.ts +2 -0
  68. package/src/model/llm.test.ts +158 -0
  69. package/src/model/llm.ts +233 -0
  70. package/src/providers.ts +94 -0
  71. package/src/skills/index.ts +17 -0
  72. package/src/skills/loader.ts +73 -0
  73. package/src/skills/registry.ts +125 -0
  74. package/src/skills/types.ts +31 -0
  75. package/src/test-utils/mocks.ts +110 -0
  76. package/src/theme.ts +21 -0
  77. package/src/tools/browser/browser.ts +357 -0
  78. package/src/tools/browser/index.ts +1 -0
  79. package/src/tools/crypto/hive-tools.ts +171 -0
  80. package/src/tools/crypto/index.ts +1 -0
  81. package/src/tools/descriptions/browser.ts +105 -0
  82. package/src/tools/descriptions/crypto-search.ts +58 -0
  83. package/src/tools/descriptions/index.ts +8 -0
  84. package/src/tools/descriptions/web-fetch.ts +44 -0
  85. package/src/tools/descriptions/web-search.ts +26 -0
  86. package/src/tools/fetch/cache.ts +95 -0
  87. package/src/tools/fetch/external-content.ts +200 -0
  88. package/src/tools/fetch/index.ts +1 -0
  89. package/src/tools/fetch/web-fetch-utils.ts +122 -0
  90. package/src/tools/fetch/web-fetch.ts +371 -0
  91. package/src/tools/index.ts +12 -0
  92. package/src/tools/registry.ts +130 -0
  93. package/src/tools/search/exa.ts +43 -0
  94. package/src/tools/search/index.ts +2 -0
  95. package/src/tools/search/tavily.ts +35 -0
  96. package/src/tools/skill.ts +62 -0
  97. package/src/tools/types.ts +53 -0
  98. package/src/utils/ai-message.ts +26 -0
  99. package/src/utils/config.ts +54 -0
  100. package/src/utils/cost-calculator.test.ts +101 -0
  101. package/src/utils/cost-calculator.ts +74 -0
  102. package/src/utils/env.ts +101 -0
  103. package/src/utils/error-classifier.test.ts +146 -0
  104. package/src/utils/error-classifier.ts +91 -0
  105. package/src/utils/in-memory-chat-history.test.ts +291 -0
  106. package/src/utils/in-memory-chat-history.ts +224 -0
  107. package/src/utils/index.ts +19 -0
  108. package/src/utils/input-key-handlers.test.ts +155 -0
  109. package/src/utils/input-key-handlers.ts +64 -0
  110. package/src/utils/logger.ts +67 -0
  111. package/src/utils/long-term-chat-history.ts +138 -0
  112. package/src/utils/markdown-table.ts +227 -0
  113. package/src/utils/ollama.ts +37 -0
  114. package/src/utils/progress-channel.ts +84 -0
  115. package/src/utils/text-navigation.test.ts +222 -0
  116. package/src/utils/text-navigation.ts +81 -0
  117. package/src/utils/thinking-verbs.ts +29 -0
  118. package/src/utils/tokens.test.ts +163 -0
  119. package/src/utils/tokens.ts +67 -0
  120. package/src/utils/tool-description.ts +88 -0
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Brownian
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,97 @@
1
+ # Brownian Code
2
+
3
+ Think Claude Code, but built specifically for crypto research.
4
+
5
+ [![npm version](https://img.shields.io/npm/v/brownian-code)](https://www.npmjs.com/package/brownian-code)
6
+ [![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](LICENSE)
7
+ [![GitHub release](https://img.shields.io/github/v/release/brownian-xyz/brownian-code)](https://github.com/brownian-xyz/brownian-code/releases)
8
+
9
+ Brownian Code is an autonomous crypto research agent that thinks, plans, and learns as it works. It performs analysis using task planning, self-reflection, and real-time market data -- decomposing complex questions into research steps, gathering live data, validating its own work, and iterating until it has a confident, data-backed answer.
10
+
11
+ ## Install
12
+
13
+ **Quick install (macOS / Linux):**
14
+
15
+ ```bash
16
+ curl -fsSL https://brownian.xyz/install.sh | bash
17
+ ```
18
+
19
+ **npm:**
20
+
21
+ ```bash
22
+ npm install -g brownian-code
23
+ ```
24
+
25
+ **Homebrew:**
26
+
27
+ ```bash
28
+ brew tap brownian-xyz/brownian
29
+ brew install brownian
30
+ ```
31
+
32
+ **From source:**
33
+
34
+ ```bash
35
+ git clone https://github.com/brownian-xyz/brownian-code.git
36
+ cd brownian-code
37
+ bun install
38
+ bun start
39
+ ```
40
+
41
+ ## Quick Start
42
+
43
+ 1. Copy the example environment file and add at least one LLM API key:
44
+
45
+ ```bash
46
+ cp env.example .env
47
+ # Edit .env and add your API key(s)
48
+ ```
49
+
50
+ 2. Launch the agent:
51
+
52
+ ```bash
53
+ brownian
54
+ ```
55
+
56
+ 3. Ask a question:
57
+
58
+ ```
59
+ > Compare SOL vs ETH developer activity and TVL trends over the last quarter
60
+ ```
61
+
62
+ See [QUICKSTART.md](QUICKSTART.md) for a full first-run walkthrough.
63
+
64
+ ## Features
65
+
66
+ - **Autonomous task planning** -- Decomposes complex queries into structured research steps, executes them, and self-validates results.
67
+ - **Self-reflection** -- Checks its own work, detects gaps, and iterates until tasks are complete. Built-in loop detection and step limits prevent runaway execution.
68
+ - **Real-time crypto data** -- 227+ endpoints via Hive Intelligence covering prices, DeFi, wallets, security, NFTs, and more.
69
+ - **Multi-provider LLM support** -- OpenAI, Anthropic, Google, xAI, OpenRouter, Ollama (local). Switch models on the fly with `/model`.
70
+ - **Web search + browser** -- Exa/Tavily search and Playwright-based scraping for real-time web research.
71
+ - **Extensible skills** -- Define custom research workflows as SKILL.md files.
72
+ - **Conversation history** -- Persistent history with search, export, and context compaction.
73
+
74
+ ## Documentation
75
+
76
+ | Document | Description |
77
+ |----------|-------------|
78
+ | [INSTALL.md](INSTALL.md) | Detailed installation guide with all methods |
79
+ | [QUICKSTART.md](QUICKSTART.md) | First-run setup and usage walkthrough |
80
+ | [docs/COMMANDS.md](docs/COMMANDS.md) | All CLI commands and keyboard shortcuts |
81
+ | [docs/CONFIGURATION.md](docs/CONFIGURATION.md) | Environment variables and config files |
82
+ | [docs/TROUBLESHOOTING.md](docs/TROUBLESHOOTING.md) | Common issues and fixes |
83
+ | [AGENTS.md](AGENTS.md) | Repository guidelines for AI coding agents |
84
+
85
+ ## Contributing
86
+
87
+ 1. Fork the repository
88
+ 2. Create a feature branch
89
+ 3. Commit your changes
90
+ 4. Push to the branch
91
+ 5. Open a Pull Request
92
+
93
+ Please keep pull requests small and focused. Run `bun run typecheck && bun test` before submitting.
94
+
95
+ ## License
96
+
97
+ MIT -- see [LICENSE](LICENSE) for details.
package/bin/brownian ADDED
@@ -0,0 +1,25 @@
1
+ #!/usr/bin/env node
2
+
3
+ import { execFileSync } from "node:child_process";
4
+ import { fileURLToPath } from "node:url";
5
+ import { join, dirname } from "node:path";
6
+
7
+ // Check if bun is available
8
+ try {
9
+ execFileSync("bun", ["--version"], { stdio: "ignore" });
10
+ } catch {
11
+ console.error("Error: Bun runtime is required but not found on your PATH.");
12
+ console.error("");
13
+ console.error("Install bun: curl -fsSL https://bun.sh/install | bash");
14
+ console.error("More info: https://bun.sh");
15
+ process.exit(1);
16
+ }
17
+
18
+ // Launch the app via bun
19
+ const __dirname = dirname(fileURLToPath(import.meta.url));
20
+ const entry = join(__dirname, "..", "src", "index.tsx");
21
+ try {
22
+ execFileSync("bun", ["run", entry], { stdio: "inherit" });
23
+ } catch (e) {
24
+ process.exit(e.status ?? 1);
25
+ }
package/env.example ADDED
@@ -0,0 +1,21 @@
1
+ # LLM API Keys
2
+ OPENAI_API_KEY=your-api-key
3
+ ANTHROPIC_API_KEY=your-api-key
4
+ GOOGLE_API_KEY=your-api-key
5
+ XAI_API_KEY=your-api-key
6
+ OPENROUTER_API_KEY=your-api-key
7
+ MOONSHOT_API_KEY=your-api-key
8
+ DEEPSEEK_API_KEY=your-api-key
9
+
10
+ # Ollama (Local LLM)
11
+ OLLAMA_BASE_URL=http://127.0.0.1:11434
12
+
13
+ # Web Search API Keys (Exa preferred, Tavily fallback)
14
+ EXASEARCH_API_KEY=your-api-key
15
+ TAVILY_API_KEY=your-api-key
16
+
17
+ # LangSmith (optional - enable for tracing)
18
+ # LANGSMITH_API_KEY=your-api-key
19
+ # LANGSMITH_ENDPOINT=https://api.smith.langchain.com
20
+ # LANGSMITH_PROJECT=brownian-code
21
+ # LANGSMITH_TRACING=true
package/package.json ADDED
@@ -0,0 +1,87 @@
1
+ {
2
+ "name": "brownian-code",
3
+ "version": "2026.2.10",
4
+ "description": "Brownian Code - AI agent for crypto research",
5
+ "type": "module",
6
+ "main": "src/index.tsx",
7
+ "bin": {
8
+ "brownian": "./bin/brownian"
9
+ },
10
+ "files": [
11
+ "bin/",
12
+ "src/",
13
+ "env.example",
14
+ "README.md",
15
+ "LICENSE"
16
+ ],
17
+ "engines": {
18
+ "bun": ">=1.1.0"
19
+ },
20
+ "scripts": {
21
+ "start": "bun run src/index.tsx",
22
+ "dev": "bun --watch run src/index.tsx",
23
+ "typecheck": "tsc --noEmit",
24
+ "test": "bun test",
25
+ "test:watch": "bun test --watch",
26
+ "build:all": "bash scripts/build-all.sh",
27
+ "gateway": "bun run src/gateway/index.ts",
28
+ "gateway:login": "bun run src/gateway/channels/whatsapp/login.ts",
29
+ "prepublishOnly": "bun run typecheck && bun test"
30
+ },
31
+ "keywords": [
32
+ "crypto",
33
+ "research",
34
+ "ai",
35
+ "agent",
36
+ "cli",
37
+ "blockchain",
38
+ "defi",
39
+ "finance"
40
+ ],
41
+ "license": "MIT",
42
+ "repository": {
43
+ "type": "git",
44
+ "url": "git+https://github.com/brownian-xyz/brownian-code.git"
45
+ },
46
+ "homepage": "https://brownian.xyz",
47
+ "publishConfig": {
48
+ "access": "public"
49
+ },
50
+ "dependencies": {
51
+ "@langchain/anthropic": "^1.1.3",
52
+ "@langchain/core": "^1.1.0",
53
+ "@langchain/exa": "^1.0.1",
54
+ "@langchain/google-genai": "^2.0.0",
55
+ "@langchain/ollama": "^1.0.3",
56
+ "@langchain/openai": "^1.1.3",
57
+ "@langchain/tavily": "^1.0.1",
58
+ "@modelcontextprotocol/sdk": "^1.13.0",
59
+ "@mozilla/readability": "^0.6.0",
60
+ "@whiskeysockets/baileys": "^7.0.0-rc.9",
61
+ "dotenv": "^17.2.3",
62
+ "exa-js": "^2.2.0",
63
+ "figlet": "^1.10.0",
64
+ "gray-matter": "^4.0.3",
65
+ "ink": "^6.5.1",
66
+ "ink-spinner": "^5.0.0",
67
+ "ink-text-input": "^6.0.0",
68
+ "langsmith": "^0.4.10",
69
+ "linkedom": "^0.18.12",
70
+ "playwright": "^1.52.0",
71
+ "react": "^19.2.0",
72
+ "zod": "^4.1.13"
73
+ },
74
+ "devDependencies": {
75
+ "@babel/core": "^7.28.5",
76
+ "@babel/preset-env": "^7.28.5",
77
+ "@types/bun": "latest",
78
+ "@types/figlet": "^1.7.0",
79
+ "@types/jest": "^29.5.14",
80
+ "@types/react": "^19.2.7",
81
+ "babel-jest": "^30.2.0",
82
+ "ink-testing-library": "^4.0.0",
83
+ "jest": "^29.7.0",
84
+ "ts-jest": "^29.2.5",
85
+ "typescript": "^5.9.3"
86
+ }
87
+ }
@@ -0,0 +1,414 @@
1
+ import { describe, test, expect, mock, beforeAll, afterAll, beforeEach } from 'bun:test';
2
+ import { createMockAIMessage, createMockTool, createTempDir } from '../test-utils/mocks.js';
3
+ import type { AgentEvent, DoneEvent } from './types.js';
4
+
5
+ // ---------------------------------------------------------------------------
6
+ // Mock dependencies before importing Agent
7
+ // ---------------------------------------------------------------------------
8
+
9
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
10
+ const mockCallLlm = mock(async (): Promise<any> => ({
11
+ response: 'default response',
12
+ usage: { inputTokens: 100, outputTokens: 50, totalTokens: 150 },
13
+ }));
14
+
15
+ mock.module('../model/llm.js', () => ({
16
+ callLlm: mockCallLlm,
17
+ DEFAULT_MODEL: 'claude-sonnet-4-5',
18
+ }));
19
+
20
+ const mockGetTools = mock(() => [
21
+ createMockTool('search', '{"results": "BTC is $65000"}'),
22
+ createMockTool('api', '{"data": "some api data"}'),
23
+ ]);
24
+
25
+ mock.module('../tools/registry.js', () => ({
26
+ getTools: mockGetTools,
27
+ buildToolDescriptions: () => 'Mock tool descriptions',
28
+ }));
29
+
30
+ mock.module('./prompts.js', () => ({
31
+ buildSystemPrompt: () => 'You are a test agent.',
32
+ buildIterationPrompt: (query: string, results: string) => `Query: ${query}\nResults: ${results}`,
33
+ buildFinalAnswerPrompt: (query: string, context: string) => `Answer: ${query}\nContext: ${context}`,
34
+ DEFAULT_SYSTEM_PROMPT: 'Default test system prompt.',
35
+ }));
36
+
37
+ mock.module('../utils/tool-description.js', () => ({
38
+ getToolDescription: (name: string) => name,
39
+ }));
40
+
41
+ mock.module('../skills/index.js', () => ({
42
+ discoverSkills: () => [],
43
+ buildSkillMetadataSection: () => '',
44
+ }));
45
+
46
+ // Import Agent after mocks
47
+ const { Agent } = await import('./agent.js');
48
+
49
+ // ---------------------------------------------------------------------------
50
+ // Setup temp CWD for scratchpad files
51
+ // ---------------------------------------------------------------------------
52
+
53
+ let originalCwd: string;
54
+ let cleanup: () => void;
55
+
56
+ beforeAll(() => {
57
+ originalCwd = process.cwd();
58
+ const tmp = createTempDir();
59
+ cleanup = tmp.cleanup;
60
+ process.chdir(tmp.path);
61
+ });
62
+
63
+ afterAll(() => {
64
+ process.chdir(originalCwd);
65
+ cleanup();
66
+ });
67
+
68
+ beforeEach(() => {
69
+ mockCallLlm.mockClear();
70
+ mockGetTools.mockClear();
71
+ mockGetTools.mockReturnValue([
72
+ createMockTool('search', '{"results": "BTC is $65000"}'),
73
+ createMockTool('api', '{"data": "some api data"}'),
74
+ ]);
75
+ });
76
+
77
+ // ---------------------------------------------------------------------------
78
+ // Helpers
79
+ // ---------------------------------------------------------------------------
80
+
81
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
82
+ async function collectEvents(agent: any, query: string): Promise<AgentEvent[]> {
83
+ const events: AgentEvent[] = [];
84
+ for await (const event of agent.run(query)) {
85
+ events.push(event);
86
+ }
87
+ return events;
88
+ }
89
+
90
+ function findDone(events: AgentEvent[]): DoneEvent {
91
+ return events.find(e => e.type === 'done') as DoneEvent;
92
+ }
93
+
94
+ // ---------------------------------------------------------------------------
95
+ // Simple query (no tools)
96
+ // ---------------------------------------------------------------------------
97
+
98
+ describe('Agent - simple query (no tools)', () => {
99
+ test('LLM returns string → done with text, iterations=1', async () => {
100
+ mockCallLlm.mockResolvedValue({
101
+ response: 'Hello! How can I help?',
102
+ usage: { inputTokens: 50, outputTokens: 20, totalTokens: 70 },
103
+ });
104
+
105
+ const agent = Agent.create({ model: 'claude-sonnet-4-5' });
106
+ const events = await collectEvents(agent, 'hi');
107
+
108
+ const done = findDone(events);
109
+ expect(done).toBeDefined();
110
+ expect(done.answer).toBe('Hello! How can I help?');
111
+ expect(done.iterations).toBe(1);
112
+ expect(done.toolCalls.length).toBe(0);
113
+ });
114
+ });
115
+
116
+ // ---------------------------------------------------------------------------
117
+ // Single tool call
118
+ // ---------------------------------------------------------------------------
119
+
120
+ describe('Agent - tool calls', () => {
121
+ test('single tool call flow: tool_start → tool_end → done', async () => {
122
+ // First call: LLM wants to call a tool
123
+ mockCallLlm.mockResolvedValueOnce({
124
+ response: createMockAIMessage('Let me search for that', [
125
+ { name: 'search', args: { query: 'BTC price' } },
126
+ ]),
127
+ usage: { inputTokens: 100, outputTokens: 50, totalTokens: 150 },
128
+ });
129
+ // Second call: LLM gives final answer (no tools)
130
+ mockCallLlm.mockResolvedValueOnce({
131
+ response: 'BTC is at $65,000',
132
+ usage: { inputTokens: 200, outputTokens: 100, totalTokens: 300 },
133
+ });
134
+ // Third call: final answer generation
135
+ mockCallLlm.mockResolvedValueOnce({
136
+ response: 'Bitcoin is currently priced at $65,000.',
137
+ usage: { inputTokens: 150, outputTokens: 80, totalTokens: 230 },
138
+ });
139
+
140
+ const agent = Agent.create({ model: 'claude-sonnet-4-5' });
141
+ const events = await collectEvents(agent, 'What is BTC price?');
142
+
143
+ const types = events.map(e => e.type);
144
+ expect(types).toContain('tool_start');
145
+ expect(types).toContain('tool_end');
146
+ expect(types).toContain('done');
147
+
148
+ const done = findDone(events);
149
+ expect(done.toolCalls.length).toBeGreaterThan(0);
150
+ });
151
+
152
+ test('tool error: yields tool_error and agent continues', async () => {
153
+ const errorTool = createMockTool('broken', () => {
154
+ throw new Error('API timeout');
155
+ });
156
+ mockGetTools.mockReturnValue([errorTool]);
157
+
158
+ // LLM calls the broken tool
159
+ mockCallLlm.mockResolvedValueOnce({
160
+ response: createMockAIMessage('', [
161
+ { name: 'broken', args: {} },
162
+ ]),
163
+ usage: { inputTokens: 100, outputTokens: 50, totalTokens: 150 },
164
+ });
165
+ // LLM gives answer after error
166
+ mockCallLlm.mockResolvedValueOnce({
167
+ response: 'I encountered an error.',
168
+ usage: { inputTokens: 100, outputTokens: 50, totalTokens: 150 },
169
+ });
170
+ // Final answer generation
171
+ mockCallLlm.mockResolvedValueOnce({
172
+ response: 'Sorry, there was an error getting the data.',
173
+ usage: { inputTokens: 100, outputTokens: 50, totalTokens: 150 },
174
+ });
175
+
176
+ const agent = Agent.create({ model: 'claude-sonnet-4-5' });
177
+ const events = await collectEvents(agent, 'test error');
178
+
179
+ const types = events.map(e => e.type);
180
+ expect(types).toContain('tool_error');
181
+ expect(types).toContain('done');
182
+ });
183
+ });
184
+
185
+ // ---------------------------------------------------------------------------
186
+ // Max iterations
187
+ // ---------------------------------------------------------------------------
188
+
189
+ describe('Agent - max iterations', () => {
190
+ test('stops at max iterations and still generates final answer', async () => {
191
+ // Always return tool calls to force max iterations
192
+ mockCallLlm.mockResolvedValue({
193
+ response: createMockAIMessage('thinking...', [
194
+ { name: 'search', args: { query: 'loop' } },
195
+ ]),
196
+ usage: { inputTokens: 50, outputTokens: 25, totalTokens: 75 },
197
+ });
198
+
199
+ const agent = Agent.create({ model: 'claude-sonnet-4-5', maxIterations: 2 });
200
+
201
+ // Override only the last callLlm (final answer after max iterations)
202
+ let callCount = 0;
203
+ mockCallLlm.mockImplementation(async () => {
204
+ callCount++;
205
+ if (callCount <= 2) {
206
+ return {
207
+ response: createMockAIMessage('', [
208
+ { name: 'search', args: { query: `attempt ${callCount}` } },
209
+ ]),
210
+ usage: { inputTokens: 50, outputTokens: 25, totalTokens: 75 },
211
+ };
212
+ }
213
+ // Final answer generation call
214
+ return {
215
+ response: 'Max iterations reached, here is what I found.',
216
+ usage: { inputTokens: 50, outputTokens: 25, totalTokens: 75 },
217
+ };
218
+ });
219
+
220
+ const events = await collectEvents(agent, 'loop test');
221
+ const done = findDone(events);
222
+ expect(done).toBeDefined();
223
+ expect(done.iterations).toBe(2);
224
+ });
225
+ });
226
+
227
+ // ---------------------------------------------------------------------------
228
+ // No tools available
229
+ // ---------------------------------------------------------------------------
230
+
231
+ describe('Agent - no tools', () => {
232
+ test('yields done with "No tools available" message', async () => {
233
+ mockGetTools.mockReturnValue([]);
234
+
235
+ const agent = Agent.create({ model: 'claude-sonnet-4-5' });
236
+ const events = await collectEvents(agent, 'test query');
237
+
238
+ const done = findDone(events);
239
+ expect(done.answer).toContain('No tools available');
240
+ expect(done.iterations).toBe(0);
241
+ });
242
+ });
243
+
244
+ // ---------------------------------------------------------------------------
245
+ // Skill deduplication
246
+ // ---------------------------------------------------------------------------
247
+
248
+ describe('Agent - skill deduplication', () => {
249
+ test('same skill called twice → second skipped', async () => {
250
+ const skillInvocations: string[] = [];
251
+ const skillTool = createMockTool('skill', (args) => {
252
+ skillInvocations.push(args.skill as string);
253
+ return 'skill result';
254
+ });
255
+ mockGetTools.mockReturnValue([skillTool]);
256
+
257
+ // LLM requests same skill twice in one response
258
+ mockCallLlm.mockResolvedValueOnce({
259
+ response: createMockAIMessage('', [
260
+ { name: 'skill', args: { skill: 'price-lookup' } },
261
+ { name: 'skill', args: { skill: 'price-lookup' } },
262
+ ]),
263
+ usage: { inputTokens: 100, outputTokens: 50, totalTokens: 150 },
264
+ });
265
+ mockCallLlm.mockResolvedValueOnce({
266
+ response: 'Done.',
267
+ usage: { inputTokens: 50, outputTokens: 25, totalTokens: 75 },
268
+ });
269
+ mockCallLlm.mockResolvedValueOnce({
270
+ response: 'Final answer after skill.',
271
+ usage: { inputTokens: 50, outputTokens: 25, totalTokens: 75 },
272
+ });
273
+
274
+ const agent = Agent.create({ model: 'claude-sonnet-4-5' });
275
+ const events = await collectEvents(agent, 'run skill');
276
+
277
+ // Skill should only have been invoked once
278
+ expect(skillInvocations.length).toBe(1);
279
+ expect(skillInvocations[0]).toBe('price-lookup');
280
+ });
281
+ });
282
+
283
+ // ---------------------------------------------------------------------------
284
+ // Thinking events
285
+ // ---------------------------------------------------------------------------
286
+
287
+ describe('Agent - thinking events', () => {
288
+ test('emits thinking event when LLM includes text alongside tool calls', async () => {
289
+ mockCallLlm.mockResolvedValueOnce({
290
+ response: createMockAIMessage('Let me search for Bitcoin data.', [
291
+ { name: 'search', args: { query: 'BTC' } },
292
+ ]),
293
+ usage: { inputTokens: 100, outputTokens: 50, totalTokens: 150 },
294
+ });
295
+ mockCallLlm.mockResolvedValueOnce({
296
+ response: 'BTC is $65k',
297
+ usage: { inputTokens: 50, outputTokens: 25, totalTokens: 75 },
298
+ });
299
+ mockCallLlm.mockResolvedValueOnce({
300
+ response: 'Bitcoin costs $65,000.',
301
+ usage: { inputTokens: 50, outputTokens: 25, totalTokens: 75 },
302
+ });
303
+
304
+ const agent = Agent.create({ model: 'claude-sonnet-4-5' });
305
+ const events = await collectEvents(agent, 'BTC price');
306
+
307
+ const thinking = events.find(e => e.type === 'thinking');
308
+ expect(thinking).toBeDefined();
309
+ if (thinking?.type === 'thinking') {
310
+ expect(thinking.message).toContain('search for Bitcoin');
311
+ }
312
+ });
313
+ });
314
+
315
+ // ---------------------------------------------------------------------------
316
+ // Token usage tracking
317
+ // ---------------------------------------------------------------------------
318
+
319
+ describe('Agent - token usage', () => {
320
+ test('done event includes accumulated token usage', async () => {
321
+ mockCallLlm.mockResolvedValue({
322
+ response: 'Quick answer',
323
+ usage: { inputTokens: 100, outputTokens: 50, totalTokens: 150 },
324
+ });
325
+
326
+ const agent = Agent.create({ model: 'claude-sonnet-4-5' });
327
+ const events = await collectEvents(agent, 'simple');
328
+
329
+ const done = findDone(events);
330
+ expect(done.tokenUsage).toBeDefined();
331
+ expect(done.tokenUsage!.totalTokens).toBeGreaterThan(0);
332
+ expect(done.totalTime).toBeGreaterThanOrEqual(0);
333
+ });
334
+ });
335
+
336
+ // ---------------------------------------------------------------------------
337
+ // buildFullContextForAnswer
338
+ // ---------------------------------------------------------------------------
339
+
340
+ describe('Agent - context building', () => {
341
+ test('filters out error results from final context', async () => {
342
+ const errorTool = createMockTool('flaky', (args) => {
343
+ if (args.fail) throw new Error('timeout');
344
+ return '{"data": "good"}';
345
+ });
346
+ const goodTool = createMockTool('reliable', () => '{"data": "success"}');
347
+ mockGetTools.mockReturnValue([errorTool, goodTool]);
348
+
349
+ // First call: both tools, one will fail
350
+ mockCallLlm.mockResolvedValueOnce({
351
+ response: createMockAIMessage('', [
352
+ { name: 'flaky', args: { fail: true } },
353
+ { name: 'reliable', args: {} },
354
+ ]),
355
+ usage: { inputTokens: 100, outputTokens: 50, totalTokens: 150 },
356
+ });
357
+ // Second call: no more tools
358
+ mockCallLlm.mockResolvedValueOnce({
359
+ response: 'data gathered',
360
+ usage: { inputTokens: 100, outputTokens: 50, totalTokens: 150 },
361
+ });
362
+ // Final answer
363
+ mockCallLlm.mockResolvedValueOnce({
364
+ response: 'Here is the result from reliable tool.',
365
+ usage: { inputTokens: 100, outputTokens: 50, totalTokens: 150 },
366
+ });
367
+
368
+ const agent = Agent.create({ model: 'claude-sonnet-4-5' });
369
+ const events = await collectEvents(agent, 'mixed results');
370
+
371
+ const done = findDone(events);
372
+ expect(done.answer).toBeDefined();
373
+ // The final answer prompt should have been called with context that excludes errors
374
+ // We can verify the LLM was called with a prompt that doesn't include "Error:"
375
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
376
+ const lastCallArgs = mockCallLlm.mock.calls[mockCallLlm.mock.calls.length - 1] as any[];
377
+ const promptArg = lastCallArgs?.[0] as string;
378
+ if (promptArg) {
379
+ expect(promptArg).not.toContain('Error: timeout');
380
+ }
381
+ });
382
+ });
383
+
384
+ // ---------------------------------------------------------------------------
385
+ // answer_start event
386
+ // ---------------------------------------------------------------------------
387
+
388
+ describe('Agent - answer_start event', () => {
389
+ test('emits answer_start before done', async () => {
390
+ mockCallLlm.mockResolvedValueOnce({
391
+ response: createMockAIMessage('', [
392
+ { name: 'search', args: { query: 'test' } },
393
+ ]),
394
+ usage: { inputTokens: 100, outputTokens: 50, totalTokens: 150 },
395
+ });
396
+ mockCallLlm.mockResolvedValueOnce({
397
+ response: 'done searching',
398
+ usage: { inputTokens: 50, outputTokens: 25, totalTokens: 75 },
399
+ });
400
+ mockCallLlm.mockResolvedValueOnce({
401
+ response: 'Final answer.',
402
+ usage: { inputTokens: 50, outputTokens: 25, totalTokens: 75 },
403
+ });
404
+
405
+ const agent = Agent.create({ model: 'claude-sonnet-4-5' });
406
+ const events = await collectEvents(agent, 'test');
407
+
408
+ const types = events.map(e => e.type);
409
+ const answerStartIdx = types.indexOf('answer_start');
410
+ const doneIdx = types.indexOf('done');
411
+ expect(answerStartIdx).toBeGreaterThanOrEqual(0);
412
+ expect(doneIdx).toBeGreaterThan(answerStartIdx);
413
+ });
414
+ });