browse-ai 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md ADDED
@@ -0,0 +1,124 @@
1
+ # browse-ai
2
+
3
+ Open-source deep research MCP server for AI agents.
4
+
5
+ Turn any AI assistant into a research engine with real-time web search, evidence extraction, and structured citations.
6
+
7
+ ## What it does
8
+
9
+ Instead of letting your AI hallucinate, `browse-ai` gives it real-time access to the web with **structured, cited answers**:
10
+
11
+ ```
12
+ Your question → Web search → Fetch pages → Extract claims → Build evidence graph → Cited answer
13
+ ```
14
+
15
+ Every answer includes:
16
+ - **Claims** with source URLs
17
+ - **Confidence score** (0-1)
18
+ - **Source quotes** from actual web pages
19
+ - **Execution trace** with timing
20
+
21
+ ## Quick Start
22
+
23
+ ```bash
24
+ npx browse-ai setup
25
+ ```
26
+
27
+ This auto-configures Claude Desktop. You'll need:
28
+ - [Tavily API key](https://tavily.com) (free tier available)
29
+ - [OpenRouter API key](https://openrouter.ai)
30
+
31
+ ## Manual Setup
32
+
33
+ ### Claude Desktop
34
+
35
+ Add to `~/Library/Application Support/Claude/claude_desktop_config.json`:
36
+
37
+ ```json
38
+ {
39
+ "mcpServers": {
40
+ "browse-ai": {
41
+ "command": "npx",
42
+ "args": ["-y", "browse-ai"],
43
+ "env": {
44
+ "SERP_API_KEY": "tvly-your-key",
45
+ "OPENROUTER_API_KEY": "your-openrouter-key"
46
+ }
47
+ }
48
+ }
49
+ }
50
+ ```
51
+
52
+ ### Cursor / Windsurf
53
+
54
+ Add to your MCP settings:
55
+
56
+ ```json
57
+ {
58
+ "browse-ai": {
59
+ "command": "npx",
60
+ "args": ["-y", "browse-ai"],
61
+ "env": {
62
+ "SERP_API_KEY": "tvly-your-key",
63
+ "OPENROUTER_API_KEY": "your-openrouter-key"
64
+ }
65
+ }
66
+ }
67
+ ```
68
+
69
+ ## MCP Tools
70
+
71
+ | Tool | Description |
72
+ |------|-------------|
73
+ | `browse_search` | Search the web via Tavily |
74
+ | `browse_open` | Fetch and parse a page into clean text |
75
+ | `browse_extract` | Extract structured knowledge from a page |
76
+ | `browse_answer` | Full pipeline: search + extract + cite |
77
+ | `browse_compare` | Compare raw LLM vs evidence-backed answer |
78
+
79
+ ## Example
80
+
81
+ Ask Claude: *"Use browse_answer to explain what causes aurora borealis"*
82
+
83
+ Response:
84
+ ```json
85
+ {
86
+ "answer": "Aurora borealis occurs when charged particles from the Sun...",
87
+ "claims": [
88
+ {
89
+ "claim": "Aurora borealis is caused by solar wind particles...",
90
+ "sources": ["https://en.wikipedia.org/wiki/Aurora"]
91
+ }
92
+ ],
93
+ "sources": [
94
+ {
95
+ "url": "https://en.wikipedia.org/wiki/Aurora",
96
+ "title": "Aurora - Wikipedia",
97
+ "domain": "en.wikipedia.org",
98
+ "quote": "An aurora is a natural light display..."
99
+ }
100
+ ],
101
+ "confidence": 0.92
102
+ }
103
+ ```
104
+
105
+ ## Why browse-ai?
106
+
107
+ | Feature | Raw LLM | browse-ai |
108
+ |---------|---------|-----------|
109
+ | Sources | None | Real URLs with quotes |
110
+ | Citations | Hallucinated | Verified from pages |
111
+ | Confidence | Unknown | 0-1 score |
112
+ | Freshness | Training data | Real-time web |
113
+ | Claims | Mixed in text | Structured + linked |
114
+
115
+ ## Tech Stack
116
+
117
+ - **Search**: Tavily API
118
+ - **Parsing**: @mozilla/readability + linkedom
119
+ - **AI**: OpenRouter (100+ models)
120
+ - **Protocol**: Model Context Protocol (MCP)
121
+
122
+ ## License
123
+
124
+ MIT
@@ -0,0 +1,2 @@
1
+ #!/usr/bin/env node
2
+ export {};
package/dist/index.js ADDED
@@ -0,0 +1,368 @@
1
+ #!/usr/bin/env node
2
+ import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
3
+ import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
4
+ import { z } from "zod";
5
+ import { Readability } from "@mozilla/readability";
6
+ import { parseHTML } from "linkedom";
7
+ // --- Constants (inlined for standalone npm package) ---
8
+ const VERSION = "0.1.0";
9
+ const LLM_MODEL = "google/gemini-2.5-flash";
10
+ const LLM_ENDPOINT = "https://openrouter.ai/api/v1/chat/completions";
11
+ const TAVILY_ENDPOINT = "https://api.tavily.com/search";
12
+ const MAX_PAGE_CONTENT_LENGTH = 3000;
13
+ // --- CLI handling ---
14
+ const args = process.argv.slice(2);
15
+ if (args.includes("--help") || args.includes("-h")) {
16
+ console.log(`
17
+ browse-ai v${VERSION}
18
+ Open-source deep research MCP server for AI agents
19
+
20
+ Usage:
21
+ browse-ai Start the MCP server (stdio transport)
22
+ browse-ai setup Auto-configure Claude Desktop
23
+ browse-ai --help Show this help
24
+ browse-ai --version Show version
25
+
26
+ Environment Variables:
27
+ SERP_API_KEY Tavily API key (get one at https://tavily.com)
28
+ OPENROUTER_API_KEY OpenRouter API key (get one at https://openrouter.ai)
29
+
30
+ MCP Tools:
31
+ browse.search Search the web for information
32
+ browse.open Fetch and parse a web page
33
+ browse.extract Extract structured knowledge from a page
34
+ browse.answer Full pipeline: search + extract + answer
35
+ browse.compare Compare raw LLM vs evidence-backed answer
36
+
37
+ Quick Setup:
38
+ 1. Get API keys: https://tavily.com + https://openrouter.ai
39
+ 2. Run: npx browse-ai setup
40
+ 3. Restart Claude Desktop
41
+ `);
42
+ process.exit(0);
43
+ }
44
+ if (args.includes("--version") || args.includes("-v")) {
45
+ console.log(VERSION);
46
+ process.exit(0);
47
+ }
48
+ if (args[0] === "setup") {
49
+ import("./setup.js").then((m) => m.runSetup());
50
+ }
51
+ else {
52
+ // --- Start MCP server ---
53
+ startServer();
54
+ }
55
+ // --- Env validation ---
56
+ function getEnvKeys() {
57
+ const SERP_API_KEY = process.env.SERP_API_KEY;
58
+ const OPENROUTER_API_KEY = process.env.OPENROUTER_API_KEY;
59
+ if (!SERP_API_KEY || !OPENROUTER_API_KEY) {
60
+ console.error(`
61
+ browse-ai: Missing required environment variables
62
+
63
+ ${!SERP_API_KEY ? " SERP_API_KEY - Get one at https://tavily.com" : " SERP_API_KEY - Set"}
64
+ ${!OPENROUTER_API_KEY ? " OPENROUTER_API_KEY - Get one at https://openrouter.ai" : " OPENROUTER_API_KEY - Set"}
65
+
66
+ Quick fix: run 'npx browse-ai setup' to configure automatically.
67
+ `);
68
+ process.exit(1);
69
+ }
70
+ return { SERP_API_KEY, OPENROUTER_API_KEY };
71
+ }
72
+ // --- In-memory cache ---
73
+ const cache = new Map();
74
+ function cacheGet(key) {
75
+ const entry = cache.get(key);
76
+ if (!entry || Date.now() > entry.expires) {
77
+ cache.delete(key);
78
+ return null;
79
+ }
80
+ return entry.value;
81
+ }
82
+ function cacheSet(key, value, ttl = 300) {
83
+ cache.set(key, { value, expires: Date.now() + ttl * 1000 });
84
+ }
85
+ // --- Tavily search ---
86
+ async function tavilySearch(query, limit = 5) {
87
+ const { SERP_API_KEY } = getEnvKeys();
88
+ const cached = cacheGet(`search:${query}:${limit}`);
89
+ if (cached)
90
+ return JSON.parse(cached);
91
+ const res = await fetch(TAVILY_ENDPOINT, {
92
+ method: "POST",
93
+ headers: { "Content-Type": "application/json" },
94
+ body: JSON.stringify({
95
+ api_key: SERP_API_KEY,
96
+ query,
97
+ max_results: limit,
98
+ include_raw_content: false,
99
+ search_depth: "basic",
100
+ }),
101
+ });
102
+ if (!res.ok)
103
+ throw new Error(`Tavily search failed: ${res.status}`);
104
+ const data = await res.json();
105
+ const results = data.results.map((r) => ({
106
+ url: r.url,
107
+ title: r.title,
108
+ snippet: r.content,
109
+ score: r.score,
110
+ }));
111
+ cacheSet(`search:${query}:${limit}`, JSON.stringify(results), 600);
112
+ return results;
113
+ }
114
+ // --- Readability page fetch ---
115
+ async function fetchPage(url) {
116
+ const cached = cacheGet(`page:${url}`);
117
+ if (cached)
118
+ return JSON.parse(cached);
119
+ const res = await fetch(url, {
120
+ headers: {
121
+ "User-Agent": "Mozilla/5.0 (compatible; BrowseAI/1.0)",
122
+ Accept: "text/html,application/xhtml+xml",
123
+ },
124
+ signal: AbortSignal.timeout(10000),
125
+ });
126
+ if (!res.ok)
127
+ throw new Error(`Failed to fetch ${url}: ${res.status}`);
128
+ const html = await res.text();
129
+ const { document } = parseHTML(html);
130
+ const reader = new Readability(document);
131
+ const article = reader.parse();
132
+ if (!article)
133
+ throw new Error(`Could not parse ${url}`);
134
+ const page = {
135
+ title: article.title,
136
+ content: article.textContent.slice(0, MAX_PAGE_CONTENT_LENGTH * 2),
137
+ excerpt: article.excerpt || "",
138
+ siteName: article.siteName,
139
+ };
140
+ cacheSet(`page:${url}`, JSON.stringify(page), 1800);
141
+ return page;
142
+ }
143
+ // --- LLM knowledge extraction (via OpenRouter) ---
144
+ async function extractKnowledge(query, pageContents) {
145
+ const { OPENROUTER_API_KEY } = getEnvKeys();
146
+ const res = await fetch(LLM_ENDPOINT, {
147
+ method: "POST",
148
+ headers: {
149
+ Authorization: `Bearer ${OPENROUTER_API_KEY}`,
150
+ "Content-Type": "application/json",
151
+ },
152
+ body: JSON.stringify({
153
+ model: LLM_MODEL,
154
+ messages: [
155
+ {
156
+ role: "system",
157
+ content: "You are a knowledge extraction engine. Given web page content, extract structured claims with source attribution and write a clear answer. Use only extracted evidence. Never invent sources. Preserve citations. Return a JSON object using the tool provided.",
158
+ },
159
+ {
160
+ role: "user",
161
+ content: `Question: ${query}\n\nWeb sources:\n${pageContents}`,
162
+ },
163
+ ],
164
+ tools: [
165
+ {
166
+ type: "function",
167
+ function: {
168
+ name: "return_knowledge",
169
+ description: "Return extracted knowledge with claims, sources, answer, and confidence",
170
+ parameters: {
171
+ type: "object",
172
+ properties: {
173
+ answer: { type: "string" },
174
+ confidence: { type: "number" },
175
+ claims: {
176
+ type: "array",
177
+ items: {
178
+ type: "object",
179
+ properties: {
180
+ claim: { type: "string" },
181
+ sources: {
182
+ type: "array",
183
+ items: { type: "string" },
184
+ },
185
+ },
186
+ required: ["claim", "sources"],
187
+ },
188
+ },
189
+ sources: {
190
+ type: "array",
191
+ items: {
192
+ type: "object",
193
+ properties: {
194
+ url: { type: "string" },
195
+ title: { type: "string" },
196
+ domain: { type: "string" },
197
+ quote: { type: "string" },
198
+ },
199
+ required: ["url", "title", "domain", "quote"],
200
+ },
201
+ },
202
+ },
203
+ required: ["answer", "confidence", "claims", "sources"],
204
+ additionalProperties: false,
205
+ },
206
+ },
207
+ },
208
+ ],
209
+ tool_choice: {
210
+ type: "function",
211
+ function: { name: "return_knowledge" },
212
+ },
213
+ }),
214
+ });
215
+ if (!res.ok)
216
+ throw new Error(`LLM failed: ${res.status}`);
217
+ const data = await res.json();
218
+ const toolCall = data.choices?.[0]?.message?.tool_calls?.[0];
219
+ if (!toolCall)
220
+ throw new Error("LLM did not return structured output");
221
+ return JSON.parse(toolCall.function.arguments);
222
+ }
223
+ // --- Raw LLM call (no sources, for compare) ---
224
+ async function rawLLMAnswer(query) {
225
+ const { OPENROUTER_API_KEY } = getEnvKeys();
226
+ const res = await fetch(LLM_ENDPOINT, {
227
+ method: "POST",
228
+ headers: {
229
+ Authorization: `Bearer ${OPENROUTER_API_KEY}`,
230
+ "Content-Type": "application/json",
231
+ },
232
+ body: JSON.stringify({
233
+ model: LLM_MODEL,
234
+ messages: [
235
+ {
236
+ role: "system",
237
+ content: "Answer the question clearly and concisely.",
238
+ },
239
+ { role: "user", content: query },
240
+ ],
241
+ }),
242
+ });
243
+ if (!res.ok)
244
+ throw new Error(`LLM failed: ${res.status}`);
245
+ const data = await res.json();
246
+ return data.choices?.[0]?.message?.content || "No response";
247
+ }
248
+ async function answerPipeline(query) {
249
+ const trace = [];
250
+ const searchStart = Date.now();
251
+ const searchResults = await tavilySearch(query);
252
+ trace.push({
253
+ step: "Search Web",
254
+ duration_ms: Date.now() - searchStart,
255
+ detail: `${searchResults.length} results`,
256
+ });
257
+ const scrapeStart = Date.now();
258
+ const pages = await Promise.allSettled(searchResults.slice(0, 5).map((r) => fetchPage(r.url)));
259
+ const successfulPages = pages
260
+ .filter((p) => p.status === "fulfilled")
261
+ .map((p) => p.value);
262
+ trace.push({
263
+ step: "Fetch Pages",
264
+ duration_ms: Date.now() - scrapeStart,
265
+ detail: `${successfulPages.length} pages`,
266
+ });
267
+ const pageContents = successfulPages
268
+ .map((p, i) => `[Source ${i + 1}] URL: ${searchResults[i]?.url}\nTitle: ${p.title}\n\n${p.content.slice(0, MAX_PAGE_CONTENT_LENGTH)}`)
269
+ .join("\n\n---\n\n");
270
+ const llmStart = Date.now();
271
+ const knowledge = await extractKnowledge(query, pageContents);
272
+ const llmDuration = Date.now() - llmStart;
273
+ trace.push({
274
+ step: "Extract Claims",
275
+ duration_ms: Math.round(llmDuration * 0.4),
276
+ detail: `${knowledge.claims?.length || 0} claims`,
277
+ });
278
+ trace.push({
279
+ step: "Build Evidence Graph",
280
+ duration_ms: Math.round(llmDuration * 0.1),
281
+ detail: `${knowledge.sources?.length || 0} sources`,
282
+ });
283
+ trace.push({
284
+ step: "Generate Answer",
285
+ duration_ms: Math.round(llmDuration * 0.5),
286
+ detail: "OpenRouter",
287
+ });
288
+ return {
289
+ answer: knowledge.answer,
290
+ claims: knowledge.claims || [],
291
+ sources: knowledge.sources || [],
292
+ confidence: knowledge.confidence || 0.85,
293
+ trace,
294
+ };
295
+ }
296
+ // --- MCP Server ---
297
+ function startServer() {
298
+ // Validate env before starting
299
+ getEnvKeys();
300
+ const server = new McpServer({
301
+ name: "browse-ai",
302
+ version: VERSION,
303
+ });
304
+ server.tool("browse_search", "Search the web for information on a topic. Returns URLs, titles, snippets, and relevance scores.", { query: z.string(), limit: z.number().optional() }, async ({ query, limit }) => {
305
+ const results = await tavilySearch(query, limit ?? 5);
306
+ return {
307
+ content: [{ type: "text", text: JSON.stringify(results, null, 2) }],
308
+ };
309
+ });
310
+ server.tool("browse_open", "Fetch and parse a web page into clean text using Readability. Strips ads, nav, and boilerplate.", { url: z.string() }, async ({ url }) => {
311
+ const page = await fetchPage(url);
312
+ return {
313
+ content: [{ type: "text", text: JSON.stringify(page, null, 2) }],
314
+ };
315
+ });
316
+ server.tool("browse_extract", "Extract structured knowledge (claims + sources + confidence) from a single web page using AI.", { url: z.string(), query: z.string().optional() }, async ({ url, query }) => {
317
+ const page = await fetchPage(url);
318
+ const domain = new URL(url).hostname;
319
+ const pageContent = `[Source 1] URL: ${url}\nTitle: ${page.title}\n\n${page.content.slice(0, MAX_PAGE_CONTENT_LENGTH)}`;
320
+ const q = query || `Summarize the content from ${domain}`;
321
+ const result = await extractKnowledge(q, pageContent);
322
+ return {
323
+ content: [{ type: "text", text: JSON.stringify(result, null, 2) }],
324
+ };
325
+ });
326
+ server.tool("browse_answer", "Full deep research pipeline: search the web, fetch pages, extract claims, build evidence graph, and generate a structured answer with citations and confidence score.", { query: z.string() }, async ({ query }) => {
327
+ const result = await answerPipeline(query);
328
+ return {
329
+ content: [{ type: "text", text: JSON.stringify(result, null, 2) }],
330
+ };
331
+ });
332
+ server.tool("browse_compare", "Compare a raw LLM answer (no sources) vs an evidence-backed answer. Shows the difference between hallucination-prone and grounded responses.", { query: z.string() }, async ({ query }) => {
333
+ const [rawAnswer, evidenceResult] = await Promise.all([
334
+ rawLLMAnswer(query),
335
+ answerPipeline(query),
336
+ ]);
337
+ const comparison = {
338
+ query,
339
+ raw_llm: {
340
+ answer: rawAnswer,
341
+ sources: 0,
342
+ claims: 0,
343
+ confidence: null,
344
+ },
345
+ evidence_backed: {
346
+ answer: evidenceResult.answer,
347
+ sources: evidenceResult.sources.length,
348
+ claims: evidenceResult.claims.length,
349
+ confidence: evidenceResult.confidence,
350
+ citations: evidenceResult.sources,
351
+ },
352
+ };
353
+ return {
354
+ content: [
355
+ { type: "text", text: JSON.stringify(comparison, null, 2) },
356
+ ],
357
+ };
358
+ });
359
+ async function run() {
360
+ const transport = new StdioServerTransport();
361
+ await server.connect(transport);
362
+ console.error(`browse-ai v${VERSION} MCP server running on stdio`);
363
+ }
364
+ run().catch((err) => {
365
+ console.error("Failed to start browse-ai:", err);
366
+ process.exit(1);
367
+ });
368
+ }
@@ -0,0 +1 @@
1
+ export declare function runSetup(): Promise<void>;
package/dist/setup.js ADDED
@@ -0,0 +1,82 @@
1
+ import { readFileSync, writeFileSync, existsSync, mkdirSync } from "fs";
2
+ import { join } from "path";
3
+ import { createInterface } from "readline";
4
+ const rl = createInterface({ input: process.stdin, output: process.stdout });
5
+ function ask(question) {
6
+ return new Promise((resolve) => rl.question(question, resolve));
7
+ }
8
+ function getConfigPath() {
9
+ const platform = process.platform;
10
+ const home = process.env.HOME || process.env.USERPROFILE || "";
11
+ if (platform === "darwin") {
12
+ return join(home, "Library", "Application Support", "Claude", "claude_desktop_config.json");
13
+ }
14
+ else if (platform === "win32") {
15
+ return join(process.env.APPDATA || join(home, "AppData", "Roaming"), "Claude", "claude_desktop_config.json");
16
+ }
17
+ else {
18
+ return join(home, ".config", "claude", "claude_desktop_config.json");
19
+ }
20
+ }
21
+ export async function runSetup() {
22
+ console.log(`
23
+ browse-ai setup
24
+ ================
25
+ Configure browse-ai for Claude Desktop / Cursor / Windsurf
26
+ `);
27
+ const serpKey = await ask(" Tavily API key (get one at https://tavily.com): ");
28
+ if (!serpKey.trim()) {
29
+ console.log("\n Tavily API key is required. Get one at https://tavily.com\n");
30
+ process.exit(1);
31
+ }
32
+ const openrouterKey = await ask(" OpenRouter API key (get one at https://openrouter.ai): ");
33
+ if (!openrouterKey.trim()) {
34
+ console.log("\n OpenRouter API key is required. Get one at https://openrouter.ai\n");
35
+ process.exit(1);
36
+ }
37
+ rl.close();
38
+ const mcpEntry = {
39
+ command: "npx",
40
+ args: ["-y", "browse-ai"],
41
+ env: {
42
+ SERP_API_KEY: serpKey.trim(),
43
+ OPENROUTER_API_KEY: openrouterKey.trim(),
44
+ },
45
+ };
46
+ const configPath = getConfigPath();
47
+ console.log(`\n Config path: ${configPath}`);
48
+ let config = { mcpServers: {} };
49
+ if (existsSync(configPath)) {
50
+ try {
51
+ config = JSON.parse(readFileSync(configPath, "utf-8"));
52
+ if (!config.mcpServers)
53
+ config.mcpServers = {};
54
+ }
55
+ catch {
56
+ console.log(" Could not parse existing config, creating new one...");
57
+ }
58
+ }
59
+ else {
60
+ const dir = configPath.replace(/[/\\][^/\\]+$/, "");
61
+ mkdirSync(dir, { recursive: true });
62
+ }
63
+ config.mcpServers["browse-ai"] = mcpEntry;
64
+ writeFileSync(configPath, JSON.stringify(config, null, 2));
65
+ console.log(`
66
+ Done! browse-ai has been configured.
67
+
68
+ Next steps:
69
+ 1. Restart Claude Desktop
70
+ 2. You should see "browse-ai" in the MCP tools list
71
+ 3. Try asking: "Use browse_answer to explain quantum computing"
72
+
73
+ Available tools:
74
+ browse_search - Search the web
75
+ browse_open - Fetch and parse a page
76
+ browse_extract - Extract knowledge from a page
77
+ browse_answer - Full deep research pipeline
78
+ browse_compare - Compare raw LLM vs evidence-backed answer
79
+
80
+ Config written to: ${configPath}
81
+ `);
82
+ }
package/package.json ADDED
@@ -0,0 +1,41 @@
1
+ {
2
+ "name": "browse-ai",
3
+ "version": "0.1.0",
4
+ "type": "module",
5
+ "description": "Open-source deep research MCP server for AI agents. Search the web, extract claims, build evidence graphs, get structured answers with citations.",
6
+ "keywords": [
7
+ "mcp",
8
+ "claude",
9
+ "ai-agent",
10
+ "web-search",
11
+ "deep-research",
12
+ "model-context-protocol",
13
+ "cursor",
14
+ "windsurf"
15
+ ],
16
+ "license": "MIT",
17
+ "bin": {
18
+ "browse-ai": "dist/index.js"
19
+ },
20
+ "files": [
21
+ "dist",
22
+ "README.md"
23
+ ],
24
+ "scripts": {
25
+ "dev": "tsx src/index.ts",
26
+ "build": "tsc",
27
+ "start": "node dist/index.js",
28
+ "prepublishOnly": "tsc"
29
+ },
30
+ "dependencies": {
31
+ "@modelcontextprotocol/sdk": "^1.12.0",
32
+ "@mozilla/readability": "^0.5.0",
33
+ "linkedom": "^0.18.0",
34
+ "zod": "^3.25.76"
35
+ },
36
+ "devDependencies": {
37
+ "tsx": "^4.19.0",
38
+ "typescript": "^5.8.3",
39
+ "@types/node": "^22.16.5"
40
+ }
41
+ }