brownian-code 2026.2.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +97 -0
- package/bin/brownian +25 -0
- package/env.example +21 -0
- package/package.json +87 -0
- package/src/agent/agent.test.ts +414 -0
- package/src/agent/agent.ts +385 -0
- package/src/agent/index.ts +27 -0
- package/src/agent/prompts.ts +271 -0
- package/src/agent/scratchpad.test.ts +482 -0
- package/src/agent/scratchpad.ts +526 -0
- package/src/agent/token-counter.test.ts +59 -0
- package/src/agent/token-counter.ts +33 -0
- package/src/agent/types.ts +137 -0
- package/src/cli.tsx +385 -0
- package/src/commands/builtin.test.ts +271 -0
- package/src/commands/builtin.ts +200 -0
- package/src/commands/registry.test.ts +188 -0
- package/src/commands/registry.ts +111 -0
- package/src/commands/types.ts +64 -0
- package/src/components/AgentEventView.tsx +487 -0
- package/src/components/AnswerBox.tsx +81 -0
- package/src/components/ApiKeyPrompt.tsx +75 -0
- package/src/components/CommandMenu.test.tsx +64 -0
- package/src/components/CommandMenu.tsx +38 -0
- package/src/components/CursorText.tsx +43 -0
- package/src/components/DebugPanel.tsx +48 -0
- package/src/components/ErrorBox.test.tsx +58 -0
- package/src/components/ErrorBox.tsx +26 -0
- package/src/components/HelpView.test.tsx +70 -0
- package/src/components/HelpView.tsx +61 -0
- package/src/components/HistoryItemView.tsx +108 -0
- package/src/components/Input.tsx +193 -0
- package/src/components/Intro.test.tsx +59 -0
- package/src/components/Intro.tsx +35 -0
- package/src/components/ModelSelector.tsx +288 -0
- package/src/components/StatusBar.test.tsx +78 -0
- package/src/components/StatusBar.tsx +56 -0
- package/src/components/WorkingIndicator.tsx +133 -0
- package/src/components/index.ts +23 -0
- package/src/e2e/agent-flow.test.ts +378 -0
- package/src/evals/components/EvalApp.tsx +206 -0
- package/src/evals/components/EvalCurrentQuestion.tsx +42 -0
- package/src/evals/components/EvalProgress.tsx +33 -0
- package/src/evals/components/EvalRecentResults.tsx +63 -0
- package/src/evals/components/EvalStats.tsx +49 -0
- package/src/evals/components/index.ts +5 -0
- package/src/evals/dataset/crypto_agent.csv +16 -0
- package/src/evals/run.ts +355 -0
- package/src/gateway/channels/whatsapp/auth-store.ts +15 -0
- package/src/gateway/channels/whatsapp/inbound.ts +86 -0
- package/src/gateway/channels/whatsapp/login.ts +28 -0
- package/src/gateway/channels/whatsapp/outbound.ts +27 -0
- package/src/gateway/channels/whatsapp/session.ts +69 -0
- package/src/gateway/config.ts +81 -0
- package/src/gateway/index.ts +62 -0
- package/src/hooks/useAgentRunner.ts +317 -0
- package/src/hooks/useDebugLogs.ts +22 -0
- package/src/hooks/useInputHistory.ts +106 -0
- package/src/hooks/useModelSelection.ts +249 -0
- package/src/hooks/useTextBuffer.test.ts +121 -0
- package/src/hooks/useTextBuffer.ts +97 -0
- package/src/index.tsx +74 -0
- package/src/mcp/cache.ts +205 -0
- package/src/mcp/client.test.ts +126 -0
- package/src/mcp/client.ts +145 -0
- package/src/mcp/index.ts +2 -0
- package/src/model/llm.test.ts +158 -0
- package/src/model/llm.ts +233 -0
- package/src/providers.ts +94 -0
- package/src/skills/index.ts +17 -0
- package/src/skills/loader.ts +73 -0
- package/src/skills/registry.ts +125 -0
- package/src/skills/types.ts +31 -0
- package/src/test-utils/mocks.ts +110 -0
- package/src/theme.ts +21 -0
- package/src/tools/browser/browser.ts +357 -0
- package/src/tools/browser/index.ts +1 -0
- package/src/tools/crypto/hive-tools.ts +171 -0
- package/src/tools/crypto/index.ts +1 -0
- package/src/tools/descriptions/browser.ts +105 -0
- package/src/tools/descriptions/crypto-search.ts +58 -0
- package/src/tools/descriptions/index.ts +8 -0
- package/src/tools/descriptions/web-fetch.ts +44 -0
- package/src/tools/descriptions/web-search.ts +26 -0
- package/src/tools/fetch/cache.ts +95 -0
- package/src/tools/fetch/external-content.ts +200 -0
- package/src/tools/fetch/index.ts +1 -0
- package/src/tools/fetch/web-fetch-utils.ts +122 -0
- package/src/tools/fetch/web-fetch.ts +371 -0
- package/src/tools/index.ts +12 -0
- package/src/tools/registry.ts +130 -0
- package/src/tools/search/exa.ts +43 -0
- package/src/tools/search/index.ts +2 -0
- package/src/tools/search/tavily.ts +35 -0
- package/src/tools/skill.ts +62 -0
- package/src/tools/types.ts +53 -0
- package/src/utils/ai-message.ts +26 -0
- package/src/utils/config.ts +54 -0
- package/src/utils/cost-calculator.test.ts +101 -0
- package/src/utils/cost-calculator.ts +74 -0
- package/src/utils/env.ts +101 -0
- package/src/utils/error-classifier.test.ts +146 -0
- package/src/utils/error-classifier.ts +91 -0
- package/src/utils/in-memory-chat-history.test.ts +291 -0
- package/src/utils/in-memory-chat-history.ts +224 -0
- package/src/utils/index.ts +19 -0
- package/src/utils/input-key-handlers.test.ts +155 -0
- package/src/utils/input-key-handlers.ts +64 -0
- package/src/utils/logger.ts +67 -0
- package/src/utils/long-term-chat-history.ts +138 -0
- package/src/utils/markdown-table.ts +227 -0
- package/src/utils/ollama.ts +37 -0
- package/src/utils/progress-channel.ts +84 -0
- package/src/utils/text-navigation.test.ts +222 -0
- package/src/utils/text-navigation.ts +81 -0
- package/src/utils/thinking-verbs.ts +29 -0
- package/src/utils/tokens.test.ts +163 -0
- package/src/utils/tokens.ts +67 -0
- package/src/utils/tool-description.ts +88 -0
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Brownian
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
# Brownian Code
|
|
2
|
+
|
|
3
|
+
Think Claude Code, but built specifically for crypto research.
|
|
4
|
+
|
|
5
|
+
[](https://www.npmjs.com/package/brownian-code)
|
|
6
|
+
[](LICENSE)
|
|
7
|
+
[](https://github.com/brownian-xyz/brownian-code/releases)
|
|
8
|
+
|
|
9
|
+
Brownian Code is an autonomous crypto research agent that thinks, plans, and learns as it works. It performs analysis using task planning, self-reflection, and real-time market data -- decomposing complex questions into research steps, gathering live data, validating its own work, and iterating until it has a confident, data-backed answer.
|
|
10
|
+
|
|
11
|
+
## Install
|
|
12
|
+
|
|
13
|
+
**Quick install (macOS / Linux):**
|
|
14
|
+
|
|
15
|
+
```bash
|
|
16
|
+
curl -fsSL https://brownian.xyz/install.sh | bash
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
**npm:**
|
|
20
|
+
|
|
21
|
+
```bash
|
|
22
|
+
npm install -g brownian-code
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
**Homebrew:**
|
|
26
|
+
|
|
27
|
+
```bash
|
|
28
|
+
brew tap brownian-xyz/brownian
|
|
29
|
+
brew install brownian
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
**From source:**
|
|
33
|
+
|
|
34
|
+
```bash
|
|
35
|
+
git clone https://github.com/brownian-xyz/brownian-code.git
|
|
36
|
+
cd brownian-code
|
|
37
|
+
bun install
|
|
38
|
+
bun start
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
## Quick Start
|
|
42
|
+
|
|
43
|
+
1. Copy the example environment file and add at least one LLM API key:
|
|
44
|
+
|
|
45
|
+
```bash
|
|
46
|
+
cp env.example .env
|
|
47
|
+
# Edit .env and add your API key(s)
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
2. Launch the agent:
|
|
51
|
+
|
|
52
|
+
```bash
|
|
53
|
+
brownian
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
3. Ask a question:
|
|
57
|
+
|
|
58
|
+
```
|
|
59
|
+
> Compare SOL vs ETH developer activity and TVL trends over the last quarter
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
See [QUICKSTART.md](QUICKSTART.md) for a full first-run walkthrough.
|
|
63
|
+
|
|
64
|
+
## Features
|
|
65
|
+
|
|
66
|
+
- **Autonomous task planning** -- Decomposes complex queries into structured research steps, executes them, and self-validates results.
|
|
67
|
+
- **Self-reflection** -- Checks its own work, detects gaps, and iterates until tasks are complete. Built-in loop detection and step limits prevent runaway execution.
|
|
68
|
+
- **Real-time crypto data** -- 227+ endpoints via Hive Intelligence covering prices, DeFi, wallets, security, NFTs, and more.
|
|
69
|
+
- **Multi-provider LLM support** -- OpenAI, Anthropic, Google, xAI, OpenRouter, Ollama (local). Switch models on the fly with `/model`.
|
|
70
|
+
- **Web search + browser** -- Exa/Tavily search and Playwright-based scraping for real-time web research.
|
|
71
|
+
- **Extensible skills** -- Define custom research workflows as SKILL.md files.
|
|
72
|
+
- **Conversation history** -- Persistent history with search, export, and context compaction.
|
|
73
|
+
|
|
74
|
+
## Documentation
|
|
75
|
+
|
|
76
|
+
| Document | Description |
|
|
77
|
+
|----------|-------------|
|
|
78
|
+
| [INSTALL.md](INSTALL.md) | Detailed installation guide with all methods |
|
|
79
|
+
| [QUICKSTART.md](QUICKSTART.md) | First-run setup and usage walkthrough |
|
|
80
|
+
| [docs/COMMANDS.md](docs/COMMANDS.md) | All CLI commands and keyboard shortcuts |
|
|
81
|
+
| [docs/CONFIGURATION.md](docs/CONFIGURATION.md) | Environment variables and config files |
|
|
82
|
+
| [docs/TROUBLESHOOTING.md](docs/TROUBLESHOOTING.md) | Common issues and fixes |
|
|
83
|
+
| [AGENTS.md](AGENTS.md) | Repository guidelines for AI coding agents |
|
|
84
|
+
|
|
85
|
+
## Contributing
|
|
86
|
+
|
|
87
|
+
1. Fork the repository
|
|
88
|
+
2. Create a feature branch
|
|
89
|
+
3. Commit your changes
|
|
90
|
+
4. Push to the branch
|
|
91
|
+
5. Open a Pull Request
|
|
92
|
+
|
|
93
|
+
Please keep pull requests small and focused. Run `bun run typecheck && bun test` before submitting.
|
|
94
|
+
|
|
95
|
+
## License
|
|
96
|
+
|
|
97
|
+
MIT -- see [LICENSE](LICENSE) for details.
|
package/bin/brownian
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
|
|
3
|
+
import { execFileSync } from "node:child_process";
|
|
4
|
+
import { fileURLToPath } from "node:url";
|
|
5
|
+
import { join, dirname } from "node:path";
|
|
6
|
+
|
|
7
|
+
// Check if bun is available
|
|
8
|
+
try {
|
|
9
|
+
execFileSync("bun", ["--version"], { stdio: "ignore" });
|
|
10
|
+
} catch {
|
|
11
|
+
console.error("Error: Bun runtime is required but not found on your PATH.");
|
|
12
|
+
console.error("");
|
|
13
|
+
console.error("Install bun: curl -fsSL https://bun.sh/install | bash");
|
|
14
|
+
console.error("More info: https://bun.sh");
|
|
15
|
+
process.exit(1);
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
// Launch the app via bun
|
|
19
|
+
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
20
|
+
const entry = join(__dirname, "..", "src", "index.tsx");
|
|
21
|
+
try {
|
|
22
|
+
execFileSync("bun", ["run", entry], { stdio: "inherit" });
|
|
23
|
+
} catch (e) {
|
|
24
|
+
process.exit(e.status ?? 1);
|
|
25
|
+
}
|
package/env.example
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
# LLM API Keys
|
|
2
|
+
OPENAI_API_KEY=your-api-key
|
|
3
|
+
ANTHROPIC_API_KEY=your-api-key
|
|
4
|
+
GOOGLE_API_KEY=your-api-key
|
|
5
|
+
XAI_API_KEY=your-api-key
|
|
6
|
+
OPENROUTER_API_KEY=your-api-key
|
|
7
|
+
MOONSHOT_API_KEY=your-api-key
|
|
8
|
+
DEEPSEEK_API_KEY=your-api-key
|
|
9
|
+
|
|
10
|
+
# Ollama (Local LLM)
|
|
11
|
+
OLLAMA_BASE_URL=http://127.0.0.1:11434
|
|
12
|
+
|
|
13
|
+
# Web Search API Keys (Exa preferred, Tavily fallback)
|
|
14
|
+
EXASEARCH_API_KEY=your-api-key
|
|
15
|
+
TAVILY_API_KEY=your-api-key
|
|
16
|
+
|
|
17
|
+
# LangSmith (optional - enable for tracing)
|
|
18
|
+
# LANGSMITH_API_KEY=your-api-key
|
|
19
|
+
# LANGSMITH_ENDPOINT=https://api.smith.langchain.com
|
|
20
|
+
# LANGSMITH_PROJECT=brownian-code
|
|
21
|
+
# LANGSMITH_TRACING=true
|
package/package.json
ADDED
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "brownian-code",
|
|
3
|
+
"version": "2026.2.10",
|
|
4
|
+
"description": "Brownian Code - AI agent for crypto research",
|
|
5
|
+
"type": "module",
|
|
6
|
+
"main": "src/index.tsx",
|
|
7
|
+
"bin": {
|
|
8
|
+
"brownian": "./bin/brownian"
|
|
9
|
+
},
|
|
10
|
+
"files": [
|
|
11
|
+
"bin/",
|
|
12
|
+
"src/",
|
|
13
|
+
"env.example",
|
|
14
|
+
"README.md",
|
|
15
|
+
"LICENSE"
|
|
16
|
+
],
|
|
17
|
+
"engines": {
|
|
18
|
+
"bun": ">=1.1.0"
|
|
19
|
+
},
|
|
20
|
+
"scripts": {
|
|
21
|
+
"start": "bun run src/index.tsx",
|
|
22
|
+
"dev": "bun --watch run src/index.tsx",
|
|
23
|
+
"typecheck": "tsc --noEmit",
|
|
24
|
+
"test": "bun test",
|
|
25
|
+
"test:watch": "bun test --watch",
|
|
26
|
+
"build:all": "bash scripts/build-all.sh",
|
|
27
|
+
"gateway": "bun run src/gateway/index.ts",
|
|
28
|
+
"gateway:login": "bun run src/gateway/channels/whatsapp/login.ts",
|
|
29
|
+
"prepublishOnly": "bun run typecheck && bun test"
|
|
30
|
+
},
|
|
31
|
+
"keywords": [
|
|
32
|
+
"crypto",
|
|
33
|
+
"research",
|
|
34
|
+
"ai",
|
|
35
|
+
"agent",
|
|
36
|
+
"cli",
|
|
37
|
+
"blockchain",
|
|
38
|
+
"defi",
|
|
39
|
+
"finance"
|
|
40
|
+
],
|
|
41
|
+
"license": "MIT",
|
|
42
|
+
"repository": {
|
|
43
|
+
"type": "git",
|
|
44
|
+
"url": "git+https://github.com/brownian-xyz/brownian-code.git"
|
|
45
|
+
},
|
|
46
|
+
"homepage": "https://brownian.xyz",
|
|
47
|
+
"publishConfig": {
|
|
48
|
+
"access": "public"
|
|
49
|
+
},
|
|
50
|
+
"dependencies": {
|
|
51
|
+
"@langchain/anthropic": "^1.1.3",
|
|
52
|
+
"@langchain/core": "^1.1.0",
|
|
53
|
+
"@langchain/exa": "^1.0.1",
|
|
54
|
+
"@langchain/google-genai": "^2.0.0",
|
|
55
|
+
"@langchain/ollama": "^1.0.3",
|
|
56
|
+
"@langchain/openai": "^1.1.3",
|
|
57
|
+
"@langchain/tavily": "^1.0.1",
|
|
58
|
+
"@modelcontextprotocol/sdk": "^1.13.0",
|
|
59
|
+
"@mozilla/readability": "^0.6.0",
|
|
60
|
+
"@whiskeysockets/baileys": "^7.0.0-rc.9",
|
|
61
|
+
"dotenv": "^17.2.3",
|
|
62
|
+
"exa-js": "^2.2.0",
|
|
63
|
+
"figlet": "^1.10.0",
|
|
64
|
+
"gray-matter": "^4.0.3",
|
|
65
|
+
"ink": "^6.5.1",
|
|
66
|
+
"ink-spinner": "^5.0.0",
|
|
67
|
+
"ink-text-input": "^6.0.0",
|
|
68
|
+
"langsmith": "^0.4.10",
|
|
69
|
+
"linkedom": "^0.18.12",
|
|
70
|
+
"playwright": "^1.52.0",
|
|
71
|
+
"react": "^19.2.0",
|
|
72
|
+
"zod": "^4.1.13"
|
|
73
|
+
},
|
|
74
|
+
"devDependencies": {
|
|
75
|
+
"@babel/core": "^7.28.5",
|
|
76
|
+
"@babel/preset-env": "^7.28.5",
|
|
77
|
+
"@types/bun": "latest",
|
|
78
|
+
"@types/figlet": "^1.7.0",
|
|
79
|
+
"@types/jest": "^29.5.14",
|
|
80
|
+
"@types/react": "^19.2.7",
|
|
81
|
+
"babel-jest": "^30.2.0",
|
|
82
|
+
"ink-testing-library": "^4.0.0",
|
|
83
|
+
"jest": "^29.7.0",
|
|
84
|
+
"ts-jest": "^29.2.5",
|
|
85
|
+
"typescript": "^5.9.3"
|
|
86
|
+
}
|
|
87
|
+
}
|
|
@@ -0,0 +1,414 @@
|
|
|
1
|
+
import { describe, test, expect, mock, beforeAll, afterAll, beforeEach } from 'bun:test';
|
|
2
|
+
import { createMockAIMessage, createMockTool, createTempDir } from '../test-utils/mocks.js';
|
|
3
|
+
import type { AgentEvent, DoneEvent } from './types.js';
|
|
4
|
+
|
|
5
|
+
// ---------------------------------------------------------------------------
|
|
6
|
+
// Mock dependencies before importing Agent
|
|
7
|
+
// ---------------------------------------------------------------------------
|
|
8
|
+
|
|
9
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
10
|
+
const mockCallLlm = mock(async (): Promise<any> => ({
|
|
11
|
+
response: 'default response',
|
|
12
|
+
usage: { inputTokens: 100, outputTokens: 50, totalTokens: 150 },
|
|
13
|
+
}));
|
|
14
|
+
|
|
15
|
+
mock.module('../model/llm.js', () => ({
|
|
16
|
+
callLlm: mockCallLlm,
|
|
17
|
+
DEFAULT_MODEL: 'claude-sonnet-4-5',
|
|
18
|
+
}));
|
|
19
|
+
|
|
20
|
+
const mockGetTools = mock(() => [
|
|
21
|
+
createMockTool('search', '{"results": "BTC is $65000"}'),
|
|
22
|
+
createMockTool('api', '{"data": "some api data"}'),
|
|
23
|
+
]);
|
|
24
|
+
|
|
25
|
+
mock.module('../tools/registry.js', () => ({
|
|
26
|
+
getTools: mockGetTools,
|
|
27
|
+
buildToolDescriptions: () => 'Mock tool descriptions',
|
|
28
|
+
}));
|
|
29
|
+
|
|
30
|
+
mock.module('./prompts.js', () => ({
|
|
31
|
+
buildSystemPrompt: () => 'You are a test agent.',
|
|
32
|
+
buildIterationPrompt: (query: string, results: string) => `Query: ${query}\nResults: ${results}`,
|
|
33
|
+
buildFinalAnswerPrompt: (query: string, context: string) => `Answer: ${query}\nContext: ${context}`,
|
|
34
|
+
DEFAULT_SYSTEM_PROMPT: 'Default test system prompt.',
|
|
35
|
+
}));
|
|
36
|
+
|
|
37
|
+
mock.module('../utils/tool-description.js', () => ({
|
|
38
|
+
getToolDescription: (name: string) => name,
|
|
39
|
+
}));
|
|
40
|
+
|
|
41
|
+
mock.module('../skills/index.js', () => ({
|
|
42
|
+
discoverSkills: () => [],
|
|
43
|
+
buildSkillMetadataSection: () => '',
|
|
44
|
+
}));
|
|
45
|
+
|
|
46
|
+
// Import Agent after mocks
|
|
47
|
+
const { Agent } = await import('./agent.js');
|
|
48
|
+
|
|
49
|
+
// ---------------------------------------------------------------------------
|
|
50
|
+
// Setup temp CWD for scratchpad files
|
|
51
|
+
// ---------------------------------------------------------------------------
|
|
52
|
+
|
|
53
|
+
let originalCwd: string;
|
|
54
|
+
let cleanup: () => void;
|
|
55
|
+
|
|
56
|
+
beforeAll(() => {
|
|
57
|
+
originalCwd = process.cwd();
|
|
58
|
+
const tmp = createTempDir();
|
|
59
|
+
cleanup = tmp.cleanup;
|
|
60
|
+
process.chdir(tmp.path);
|
|
61
|
+
});
|
|
62
|
+
|
|
63
|
+
afterAll(() => {
|
|
64
|
+
process.chdir(originalCwd);
|
|
65
|
+
cleanup();
|
|
66
|
+
});
|
|
67
|
+
|
|
68
|
+
beforeEach(() => {
|
|
69
|
+
mockCallLlm.mockClear();
|
|
70
|
+
mockGetTools.mockClear();
|
|
71
|
+
mockGetTools.mockReturnValue([
|
|
72
|
+
createMockTool('search', '{"results": "BTC is $65000"}'),
|
|
73
|
+
createMockTool('api', '{"data": "some api data"}'),
|
|
74
|
+
]);
|
|
75
|
+
});
|
|
76
|
+
|
|
77
|
+
// ---------------------------------------------------------------------------
|
|
78
|
+
// Helpers
|
|
79
|
+
// ---------------------------------------------------------------------------
|
|
80
|
+
|
|
81
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
82
|
+
async function collectEvents(agent: any, query: string): Promise<AgentEvent[]> {
|
|
83
|
+
const events: AgentEvent[] = [];
|
|
84
|
+
for await (const event of agent.run(query)) {
|
|
85
|
+
events.push(event);
|
|
86
|
+
}
|
|
87
|
+
return events;
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
function findDone(events: AgentEvent[]): DoneEvent {
|
|
91
|
+
return events.find(e => e.type === 'done') as DoneEvent;
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
// ---------------------------------------------------------------------------
|
|
95
|
+
// Simple query (no tools)
|
|
96
|
+
// ---------------------------------------------------------------------------
|
|
97
|
+
|
|
98
|
+
describe('Agent - simple query (no tools)', () => {
|
|
99
|
+
test('LLM returns string → done with text, iterations=1', async () => {
|
|
100
|
+
mockCallLlm.mockResolvedValue({
|
|
101
|
+
response: 'Hello! How can I help?',
|
|
102
|
+
usage: { inputTokens: 50, outputTokens: 20, totalTokens: 70 },
|
|
103
|
+
});
|
|
104
|
+
|
|
105
|
+
const agent = Agent.create({ model: 'claude-sonnet-4-5' });
|
|
106
|
+
const events = await collectEvents(agent, 'hi');
|
|
107
|
+
|
|
108
|
+
const done = findDone(events);
|
|
109
|
+
expect(done).toBeDefined();
|
|
110
|
+
expect(done.answer).toBe('Hello! How can I help?');
|
|
111
|
+
expect(done.iterations).toBe(1);
|
|
112
|
+
expect(done.toolCalls.length).toBe(0);
|
|
113
|
+
});
|
|
114
|
+
});
|
|
115
|
+
|
|
116
|
+
// ---------------------------------------------------------------------------
|
|
117
|
+
// Single tool call
|
|
118
|
+
// ---------------------------------------------------------------------------
|
|
119
|
+
|
|
120
|
+
describe('Agent - tool calls', () => {
|
|
121
|
+
test('single tool call flow: tool_start → tool_end → done', async () => {
|
|
122
|
+
// First call: LLM wants to call a tool
|
|
123
|
+
mockCallLlm.mockResolvedValueOnce({
|
|
124
|
+
response: createMockAIMessage('Let me search for that', [
|
|
125
|
+
{ name: 'search', args: { query: 'BTC price' } },
|
|
126
|
+
]),
|
|
127
|
+
usage: { inputTokens: 100, outputTokens: 50, totalTokens: 150 },
|
|
128
|
+
});
|
|
129
|
+
// Second call: LLM gives final answer (no tools)
|
|
130
|
+
mockCallLlm.mockResolvedValueOnce({
|
|
131
|
+
response: 'BTC is at $65,000',
|
|
132
|
+
usage: { inputTokens: 200, outputTokens: 100, totalTokens: 300 },
|
|
133
|
+
});
|
|
134
|
+
// Third call: final answer generation
|
|
135
|
+
mockCallLlm.mockResolvedValueOnce({
|
|
136
|
+
response: 'Bitcoin is currently priced at $65,000.',
|
|
137
|
+
usage: { inputTokens: 150, outputTokens: 80, totalTokens: 230 },
|
|
138
|
+
});
|
|
139
|
+
|
|
140
|
+
const agent = Agent.create({ model: 'claude-sonnet-4-5' });
|
|
141
|
+
const events = await collectEvents(agent, 'What is BTC price?');
|
|
142
|
+
|
|
143
|
+
const types = events.map(e => e.type);
|
|
144
|
+
expect(types).toContain('tool_start');
|
|
145
|
+
expect(types).toContain('tool_end');
|
|
146
|
+
expect(types).toContain('done');
|
|
147
|
+
|
|
148
|
+
const done = findDone(events);
|
|
149
|
+
expect(done.toolCalls.length).toBeGreaterThan(0);
|
|
150
|
+
});
|
|
151
|
+
|
|
152
|
+
test('tool error: yields tool_error and agent continues', async () => {
|
|
153
|
+
const errorTool = createMockTool('broken', () => {
|
|
154
|
+
throw new Error('API timeout');
|
|
155
|
+
});
|
|
156
|
+
mockGetTools.mockReturnValue([errorTool]);
|
|
157
|
+
|
|
158
|
+
// LLM calls the broken tool
|
|
159
|
+
mockCallLlm.mockResolvedValueOnce({
|
|
160
|
+
response: createMockAIMessage('', [
|
|
161
|
+
{ name: 'broken', args: {} },
|
|
162
|
+
]),
|
|
163
|
+
usage: { inputTokens: 100, outputTokens: 50, totalTokens: 150 },
|
|
164
|
+
});
|
|
165
|
+
// LLM gives answer after error
|
|
166
|
+
mockCallLlm.mockResolvedValueOnce({
|
|
167
|
+
response: 'I encountered an error.',
|
|
168
|
+
usage: { inputTokens: 100, outputTokens: 50, totalTokens: 150 },
|
|
169
|
+
});
|
|
170
|
+
// Final answer generation
|
|
171
|
+
mockCallLlm.mockResolvedValueOnce({
|
|
172
|
+
response: 'Sorry, there was an error getting the data.',
|
|
173
|
+
usage: { inputTokens: 100, outputTokens: 50, totalTokens: 150 },
|
|
174
|
+
});
|
|
175
|
+
|
|
176
|
+
const agent = Agent.create({ model: 'claude-sonnet-4-5' });
|
|
177
|
+
const events = await collectEvents(agent, 'test error');
|
|
178
|
+
|
|
179
|
+
const types = events.map(e => e.type);
|
|
180
|
+
expect(types).toContain('tool_error');
|
|
181
|
+
expect(types).toContain('done');
|
|
182
|
+
});
|
|
183
|
+
});
|
|
184
|
+
|
|
185
|
+
// ---------------------------------------------------------------------------
|
|
186
|
+
// Max iterations
|
|
187
|
+
// ---------------------------------------------------------------------------
|
|
188
|
+
|
|
189
|
+
describe('Agent - max iterations', () => {
|
|
190
|
+
test('stops at max iterations and still generates final answer', async () => {
|
|
191
|
+
// Always return tool calls to force max iterations
|
|
192
|
+
mockCallLlm.mockResolvedValue({
|
|
193
|
+
response: createMockAIMessage('thinking...', [
|
|
194
|
+
{ name: 'search', args: { query: 'loop' } },
|
|
195
|
+
]),
|
|
196
|
+
usage: { inputTokens: 50, outputTokens: 25, totalTokens: 75 },
|
|
197
|
+
});
|
|
198
|
+
|
|
199
|
+
const agent = Agent.create({ model: 'claude-sonnet-4-5', maxIterations: 2 });
|
|
200
|
+
|
|
201
|
+
// Override only the last callLlm (final answer after max iterations)
|
|
202
|
+
let callCount = 0;
|
|
203
|
+
mockCallLlm.mockImplementation(async () => {
|
|
204
|
+
callCount++;
|
|
205
|
+
if (callCount <= 2) {
|
|
206
|
+
return {
|
|
207
|
+
response: createMockAIMessage('', [
|
|
208
|
+
{ name: 'search', args: { query: `attempt ${callCount}` } },
|
|
209
|
+
]),
|
|
210
|
+
usage: { inputTokens: 50, outputTokens: 25, totalTokens: 75 },
|
|
211
|
+
};
|
|
212
|
+
}
|
|
213
|
+
// Final answer generation call
|
|
214
|
+
return {
|
|
215
|
+
response: 'Max iterations reached, here is what I found.',
|
|
216
|
+
usage: { inputTokens: 50, outputTokens: 25, totalTokens: 75 },
|
|
217
|
+
};
|
|
218
|
+
});
|
|
219
|
+
|
|
220
|
+
const events = await collectEvents(agent, 'loop test');
|
|
221
|
+
const done = findDone(events);
|
|
222
|
+
expect(done).toBeDefined();
|
|
223
|
+
expect(done.iterations).toBe(2);
|
|
224
|
+
});
|
|
225
|
+
});
|
|
226
|
+
|
|
227
|
+
// ---------------------------------------------------------------------------
|
|
228
|
+
// No tools available
|
|
229
|
+
// ---------------------------------------------------------------------------
|
|
230
|
+
|
|
231
|
+
describe('Agent - no tools', () => {
|
|
232
|
+
test('yields done with "No tools available" message', async () => {
|
|
233
|
+
mockGetTools.mockReturnValue([]);
|
|
234
|
+
|
|
235
|
+
const agent = Agent.create({ model: 'claude-sonnet-4-5' });
|
|
236
|
+
const events = await collectEvents(agent, 'test query');
|
|
237
|
+
|
|
238
|
+
const done = findDone(events);
|
|
239
|
+
expect(done.answer).toContain('No tools available');
|
|
240
|
+
expect(done.iterations).toBe(0);
|
|
241
|
+
});
|
|
242
|
+
});
|
|
243
|
+
|
|
244
|
+
// ---------------------------------------------------------------------------
|
|
245
|
+
// Skill deduplication
|
|
246
|
+
// ---------------------------------------------------------------------------
|
|
247
|
+
|
|
248
|
+
describe('Agent - skill deduplication', () => {
|
|
249
|
+
test('same skill called twice → second skipped', async () => {
|
|
250
|
+
const skillInvocations: string[] = [];
|
|
251
|
+
const skillTool = createMockTool('skill', (args) => {
|
|
252
|
+
skillInvocations.push(args.skill as string);
|
|
253
|
+
return 'skill result';
|
|
254
|
+
});
|
|
255
|
+
mockGetTools.mockReturnValue([skillTool]);
|
|
256
|
+
|
|
257
|
+
// LLM requests same skill twice in one response
|
|
258
|
+
mockCallLlm.mockResolvedValueOnce({
|
|
259
|
+
response: createMockAIMessage('', [
|
|
260
|
+
{ name: 'skill', args: { skill: 'price-lookup' } },
|
|
261
|
+
{ name: 'skill', args: { skill: 'price-lookup' } },
|
|
262
|
+
]),
|
|
263
|
+
usage: { inputTokens: 100, outputTokens: 50, totalTokens: 150 },
|
|
264
|
+
});
|
|
265
|
+
mockCallLlm.mockResolvedValueOnce({
|
|
266
|
+
response: 'Done.',
|
|
267
|
+
usage: { inputTokens: 50, outputTokens: 25, totalTokens: 75 },
|
|
268
|
+
});
|
|
269
|
+
mockCallLlm.mockResolvedValueOnce({
|
|
270
|
+
response: 'Final answer after skill.',
|
|
271
|
+
usage: { inputTokens: 50, outputTokens: 25, totalTokens: 75 },
|
|
272
|
+
});
|
|
273
|
+
|
|
274
|
+
const agent = Agent.create({ model: 'claude-sonnet-4-5' });
|
|
275
|
+
const events = await collectEvents(agent, 'run skill');
|
|
276
|
+
|
|
277
|
+
// Skill should only have been invoked once
|
|
278
|
+
expect(skillInvocations.length).toBe(1);
|
|
279
|
+
expect(skillInvocations[0]).toBe('price-lookup');
|
|
280
|
+
});
|
|
281
|
+
});
|
|
282
|
+
|
|
283
|
+
// ---------------------------------------------------------------------------
|
|
284
|
+
// Thinking events
|
|
285
|
+
// ---------------------------------------------------------------------------
|
|
286
|
+
|
|
287
|
+
describe('Agent - thinking events', () => {
|
|
288
|
+
test('emits thinking event when LLM includes text alongside tool calls', async () => {
|
|
289
|
+
mockCallLlm.mockResolvedValueOnce({
|
|
290
|
+
response: createMockAIMessage('Let me search for Bitcoin data.', [
|
|
291
|
+
{ name: 'search', args: { query: 'BTC' } },
|
|
292
|
+
]),
|
|
293
|
+
usage: { inputTokens: 100, outputTokens: 50, totalTokens: 150 },
|
|
294
|
+
});
|
|
295
|
+
mockCallLlm.mockResolvedValueOnce({
|
|
296
|
+
response: 'BTC is $65k',
|
|
297
|
+
usage: { inputTokens: 50, outputTokens: 25, totalTokens: 75 },
|
|
298
|
+
});
|
|
299
|
+
mockCallLlm.mockResolvedValueOnce({
|
|
300
|
+
response: 'Bitcoin costs $65,000.',
|
|
301
|
+
usage: { inputTokens: 50, outputTokens: 25, totalTokens: 75 },
|
|
302
|
+
});
|
|
303
|
+
|
|
304
|
+
const agent = Agent.create({ model: 'claude-sonnet-4-5' });
|
|
305
|
+
const events = await collectEvents(agent, 'BTC price');
|
|
306
|
+
|
|
307
|
+
const thinking = events.find(e => e.type === 'thinking');
|
|
308
|
+
expect(thinking).toBeDefined();
|
|
309
|
+
if (thinking?.type === 'thinking') {
|
|
310
|
+
expect(thinking.message).toContain('search for Bitcoin');
|
|
311
|
+
}
|
|
312
|
+
});
|
|
313
|
+
});
|
|
314
|
+
|
|
315
|
+
// ---------------------------------------------------------------------------
|
|
316
|
+
// Token usage tracking
|
|
317
|
+
// ---------------------------------------------------------------------------
|
|
318
|
+
|
|
319
|
+
describe('Agent - token usage', () => {
|
|
320
|
+
test('done event includes accumulated token usage', async () => {
|
|
321
|
+
mockCallLlm.mockResolvedValue({
|
|
322
|
+
response: 'Quick answer',
|
|
323
|
+
usage: { inputTokens: 100, outputTokens: 50, totalTokens: 150 },
|
|
324
|
+
});
|
|
325
|
+
|
|
326
|
+
const agent = Agent.create({ model: 'claude-sonnet-4-5' });
|
|
327
|
+
const events = await collectEvents(agent, 'simple');
|
|
328
|
+
|
|
329
|
+
const done = findDone(events);
|
|
330
|
+
expect(done.tokenUsage).toBeDefined();
|
|
331
|
+
expect(done.tokenUsage!.totalTokens).toBeGreaterThan(0);
|
|
332
|
+
expect(done.totalTime).toBeGreaterThanOrEqual(0);
|
|
333
|
+
});
|
|
334
|
+
});
|
|
335
|
+
|
|
336
|
+
// ---------------------------------------------------------------------------
|
|
337
|
+
// buildFullContextForAnswer
|
|
338
|
+
// ---------------------------------------------------------------------------
|
|
339
|
+
|
|
340
|
+
describe('Agent - context building', () => {
|
|
341
|
+
test('filters out error results from final context', async () => {
|
|
342
|
+
const errorTool = createMockTool('flaky', (args) => {
|
|
343
|
+
if (args.fail) throw new Error('timeout');
|
|
344
|
+
return '{"data": "good"}';
|
|
345
|
+
});
|
|
346
|
+
const goodTool = createMockTool('reliable', () => '{"data": "success"}');
|
|
347
|
+
mockGetTools.mockReturnValue([errorTool, goodTool]);
|
|
348
|
+
|
|
349
|
+
// First call: both tools, one will fail
|
|
350
|
+
mockCallLlm.mockResolvedValueOnce({
|
|
351
|
+
response: createMockAIMessage('', [
|
|
352
|
+
{ name: 'flaky', args: { fail: true } },
|
|
353
|
+
{ name: 'reliable', args: {} },
|
|
354
|
+
]),
|
|
355
|
+
usage: { inputTokens: 100, outputTokens: 50, totalTokens: 150 },
|
|
356
|
+
});
|
|
357
|
+
// Second call: no more tools
|
|
358
|
+
mockCallLlm.mockResolvedValueOnce({
|
|
359
|
+
response: 'data gathered',
|
|
360
|
+
usage: { inputTokens: 100, outputTokens: 50, totalTokens: 150 },
|
|
361
|
+
});
|
|
362
|
+
// Final answer
|
|
363
|
+
mockCallLlm.mockResolvedValueOnce({
|
|
364
|
+
response: 'Here is the result from reliable tool.',
|
|
365
|
+
usage: { inputTokens: 100, outputTokens: 50, totalTokens: 150 },
|
|
366
|
+
});
|
|
367
|
+
|
|
368
|
+
const agent = Agent.create({ model: 'claude-sonnet-4-5' });
|
|
369
|
+
const events = await collectEvents(agent, 'mixed results');
|
|
370
|
+
|
|
371
|
+
const done = findDone(events);
|
|
372
|
+
expect(done.answer).toBeDefined();
|
|
373
|
+
// The final answer prompt should have been called with context that excludes errors
|
|
374
|
+
// We can verify the LLM was called with a prompt that doesn't include "Error:"
|
|
375
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
376
|
+
const lastCallArgs = mockCallLlm.mock.calls[mockCallLlm.mock.calls.length - 1] as any[];
|
|
377
|
+
const promptArg = lastCallArgs?.[0] as string;
|
|
378
|
+
if (promptArg) {
|
|
379
|
+
expect(promptArg).not.toContain('Error: timeout');
|
|
380
|
+
}
|
|
381
|
+
});
|
|
382
|
+
});
|
|
383
|
+
|
|
384
|
+
// ---------------------------------------------------------------------------
|
|
385
|
+
// answer_start event
|
|
386
|
+
// ---------------------------------------------------------------------------
|
|
387
|
+
|
|
388
|
+
describe('Agent - answer_start event', () => {
|
|
389
|
+
test('emits answer_start before done', async () => {
|
|
390
|
+
mockCallLlm.mockResolvedValueOnce({
|
|
391
|
+
response: createMockAIMessage('', [
|
|
392
|
+
{ name: 'search', args: { query: 'test' } },
|
|
393
|
+
]),
|
|
394
|
+
usage: { inputTokens: 100, outputTokens: 50, totalTokens: 150 },
|
|
395
|
+
});
|
|
396
|
+
mockCallLlm.mockResolvedValueOnce({
|
|
397
|
+
response: 'done searching',
|
|
398
|
+
usage: { inputTokens: 50, outputTokens: 25, totalTokens: 75 },
|
|
399
|
+
});
|
|
400
|
+
mockCallLlm.mockResolvedValueOnce({
|
|
401
|
+
response: 'Final answer.',
|
|
402
|
+
usage: { inputTokens: 50, outputTokens: 25, totalTokens: 75 },
|
|
403
|
+
});
|
|
404
|
+
|
|
405
|
+
const agent = Agent.create({ model: 'claude-sonnet-4-5' });
|
|
406
|
+
const events = await collectEvents(agent, 'test');
|
|
407
|
+
|
|
408
|
+
const types = events.map(e => e.type);
|
|
409
|
+
const answerStartIdx = types.indexOf('answer_start');
|
|
410
|
+
const doneIdx = types.indexOf('done');
|
|
411
|
+
expect(answerStartIdx).toBeGreaterThanOrEqual(0);
|
|
412
|
+
expect(doneIdx).toBeGreaterThan(answerStartIdx);
|
|
413
|
+
});
|
|
414
|
+
});
|