npm - browse-ai - Versions diffs - 0.1.0 - Mend

browse-ai 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/README.md ADDED Viewed

@@ -0,0 +1,124 @@
+# browse-ai
+Open-source deep research MCP server for AI agents.
+Turn any AI assistant into a research engine with real-time web search, evidence extraction, and structured citations.
+## What it does
+Instead of letting your AI hallucinate, `browse-ai` gives it real-time access to the web with **structured, cited answers**:
+```
+Your question → Web search → Fetch pages → Extract claims → Build evidence graph → Cited answer
+```
+Every answer includes:
+- **Claims** with source URLs
+- **Confidence score** (0-1)
+- **Source quotes** from actual web pages
+- **Execution trace** with timing
+## Quick Start
+```bash
+npx browse-ai setup
+```
+This auto-configures Claude Desktop. You'll need:
+- [Tavily API key](https://tavily.com) (free tier available)
+- [OpenRouter API key](https://openrouter.ai)
+## Manual Setup
+### Claude Desktop
+Add to `~/Library/Application Support/Claude/claude_desktop_config.json`:
+```json
+{
+  "mcpServers": {
+    "browse-ai": {
+      "command": "npx",
+      "args": ["-y", "browse-ai"],
+      "env": {
+        "SERP_API_KEY": "tvly-your-key",
+        "OPENROUTER_API_KEY": "your-openrouter-key"
+      }
+    }
+  }
+}
+```
+### Cursor / Windsurf
+Add to your MCP settings:
+```json
+{
+  "browse-ai": {
+    "command": "npx",
+    "args": ["-y", "browse-ai"],
+    "env": {
+      "SERP_API_KEY": "tvly-your-key",
+      "OPENROUTER_API_KEY": "your-openrouter-key"
+    }
+  }
+}
+```
+## MCP Tools
+| Tool | Description |
+|------|-------------|
+| `browse_search` | Search the web via Tavily |
+| `browse_open` | Fetch and parse a page into clean text |
+| `browse_extract` | Extract structured knowledge from a page |
+| `browse_answer` | Full pipeline: search + extract + cite |
+| `browse_compare` | Compare raw LLM vs evidence-backed answer |
+## Example
+Ask Claude: *"Use browse_answer to explain what causes aurora borealis"*
+Response:
+```json
+{
+  "answer": "Aurora borealis occurs when charged particles from the Sun...",
+  "claims": [
+    {
+      "claim": "Aurora borealis is caused by solar wind particles...",
+      "sources": ["https://en.wikipedia.org/wiki/Aurora"]
+    }
+  ],
+  "sources": [
+    {
+      "url": "https://en.wikipedia.org/wiki/Aurora",
+      "title": "Aurora - Wikipedia",
+      "domain": "en.wikipedia.org",
+      "quote": "An aurora is a natural light display..."
+    }
+  ],
+  "confidence": 0.92
+}
+```
+## Why browse-ai?
+| Feature | Raw LLM | browse-ai |
+|---------|---------|-----------|
+| Sources | None | Real URLs with quotes |
+| Citations | Hallucinated | Verified from pages |
+| Confidence | Unknown | 0-1 score |
+| Freshness | Training data | Real-time web |
+| Claims | Mixed in text | Structured + linked |
+## Tech Stack
+- **Search**: Tavily API
+- **Parsing**: @mozilla/readability + linkedom
+- **AI**: OpenRouter (100+ models)
+- **Protocol**: Model Context Protocol (MCP)
+## License
+MIT

package/dist/index.d.ts ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ #!/usr/bin/env node
2	+ export {};

package/dist/index.js ADDED Viewed

@@ -0,0 +1,368 @@
+#!/usr/bin/env node
+import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
+import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
+import { z } from "zod";
+import { Readability } from "@mozilla/readability";
+import { parseHTML } from "linkedom";
+// --- Constants (inlined for standalone npm package) ---
+const VERSION = "0.1.0";
+const LLM_MODEL = "google/gemini-2.5-flash";
+const LLM_ENDPOINT = "https://openrouter.ai/api/v1/chat/completions";
+const TAVILY_ENDPOINT = "https://api.tavily.com/search";
+const MAX_PAGE_CONTENT_LENGTH = 3000;
+// --- CLI handling ---
+const args = process.argv.slice(2);
+if (args.includes("--help") || args.includes("-h")) {
+    console.log(`
+  browse-ai v${VERSION}
+  Open-source deep research MCP server for AI agents
+  Usage:
+    browse-ai              Start the MCP server (stdio transport)
+    browse-ai setup        Auto-configure Claude Desktop
+    browse-ai --help       Show this help
+    browse-ai --version    Show version
+  Environment Variables:
+    SERP_API_KEY           Tavily API key (get one at https://tavily.com)
+    OPENROUTER_API_KEY     OpenRouter API key (get one at https://openrouter.ai)
+  MCP Tools:
+    browse.search          Search the web for information
+    browse.open            Fetch and parse a web page
+    browse.extract         Extract structured knowledge from a page
+    browse.answer          Full pipeline: search + extract + answer
+    browse.compare         Compare raw LLM vs evidence-backed answer
+  Quick Setup:
+    1. Get API keys: https://tavily.com + https://openrouter.ai
+    2. Run: npx browse-ai setup
+    3. Restart Claude Desktop
+`);
+    process.exit(0);
+}
+if (args.includes("--version") || args.includes("-v")) {
+    console.log(VERSION);
+    process.exit(0);
+}
+if (args[0] === "setup") {
+    import("./setup.js").then((m) => m.runSetup());
+}
+else {
+    // --- Start MCP server ---
+    startServer();
+}
+// --- Env validation ---
+function getEnvKeys() {
+    const SERP_API_KEY = process.env.SERP_API_KEY;
+    const OPENROUTER_API_KEY = process.env.OPENROUTER_API_KEY;
+    if (!SERP_API_KEY || !OPENROUTER_API_KEY) {
+        console.error(`
+  browse-ai: Missing required environment variables
+  ${!SERP_API_KEY ? "  SERP_API_KEY       - Get one at https://tavily.com" : "  SERP_API_KEY       - Set"}
+  ${!OPENROUTER_API_KEY ? "  OPENROUTER_API_KEY - Get one at https://openrouter.ai" : "  OPENROUTER_API_KEY - Set"}
+  Quick fix: run 'npx browse-ai setup' to configure automatically.
+`);
+        process.exit(1);
+    }
+    return { SERP_API_KEY, OPENROUTER_API_KEY };
+}
+// --- In-memory cache ---
+const cache = new Map();
+function cacheGet(key) {
+    const entry = cache.get(key);
+    if (!entry || Date.now() > entry.expires) {
+        cache.delete(key);
+        return null;
+    }
+    return entry.value;
+}
+function cacheSet(key, value, ttl = 300) {
+    cache.set(key, { value, expires: Date.now() + ttl * 1000 });
+}
+// --- Tavily search ---
+async function tavilySearch(query, limit = 5) {
+    const { SERP_API_KEY } = getEnvKeys();
+    const cached = cacheGet(`search:${query}:${limit}`);
+    if (cached)
+        return JSON.parse(cached);
+    const res = await fetch(TAVILY_ENDPOINT, {
+        method: "POST",
+        headers: { "Content-Type": "application/json" },
+        body: JSON.stringify({
+            api_key: SERP_API_KEY,
+            query,
+            max_results: limit,
+            include_raw_content: false,
+            search_depth: "basic",
+        }),
+    });
+    if (!res.ok)
+        throw new Error(`Tavily search failed: ${res.status}`);
+    const data = await res.json();
+    const results = data.results.map((r) => ({
+        url: r.url,
+        title: r.title,
+        snippet: r.content,
+        score: r.score,
+    }));
+    cacheSet(`search:${query}:${limit}`, JSON.stringify(results), 600);
+    return results;
+}
+// --- Readability page fetch ---
+async function fetchPage(url) {
+    const cached = cacheGet(`page:${url}`);
+    if (cached)
+        return JSON.parse(cached);
+    const res = await fetch(url, {
+        headers: {
+            "User-Agent": "Mozilla/5.0 (compatible; BrowseAI/1.0)",
+            Accept: "text/html,application/xhtml+xml",
+        },
+        signal: AbortSignal.timeout(10000),
+    });
+    if (!res.ok)
+        throw new Error(`Failed to fetch ${url}: ${res.status}`);
+    const html = await res.text();
+    const { document } = parseHTML(html);
+    const reader = new Readability(document);
+    const article = reader.parse();
+    if (!article)
+        throw new Error(`Could not parse ${url}`);
+    const page = {
+        title: article.title,
+        content: article.textContent.slice(0, MAX_PAGE_CONTENT_LENGTH * 2),
+        excerpt: article.excerpt || "",
+        siteName: article.siteName,
+    };
+    cacheSet(`page:${url}`, JSON.stringify(page), 1800);
+    return page;
+}
+// --- LLM knowledge extraction (via OpenRouter) ---
+async function extractKnowledge(query, pageContents) {
+    const { OPENROUTER_API_KEY } = getEnvKeys();
+    const res = await fetch(LLM_ENDPOINT, {
+        method: "POST",
+        headers: {
+            Authorization: `Bearer ${OPENROUTER_API_KEY}`,
+            "Content-Type": "application/json",
+        },
+        body: JSON.stringify({
+            model: LLM_MODEL,
+            messages: [
+                {
+                    role: "system",
+                    content: "You are a knowledge extraction engine. Given web page content, extract structured claims with source attribution and write a clear answer. Use only extracted evidence. Never invent sources. Preserve citations. Return a JSON object using the tool provided.",
+                },
+                {
+                    role: "user",
+                    content: `Question: ${query}\n\nWeb sources:\n${pageContents}`,
+                },
+            ],
+            tools: [
+                {
+                    type: "function",
+                    function: {
+                        name: "return_knowledge",
+                        description: "Return extracted knowledge with claims, sources, answer, and confidence",
+                        parameters: {
+                            type: "object",
+                            properties: {
+                                answer: { type: "string" },
+                                confidence: { type: "number" },
+                                claims: {
+                                    type: "array",
+                                    items: {
+                                        type: "object",
+                                        properties: {
+                                            claim: { type: "string" },
+                                            sources: {
+                                                type: "array",
+                                                items: { type: "string" },
+                                            },
+                                        },
+                                        required: ["claim", "sources"],
+                                    },
+                                },
+                                sources: {
+                                    type: "array",
+                                    items: {
+                                        type: "object",
+                                        properties: {
+                                            url: { type: "string" },
+                                            title: { type: "string" },
+                                            domain: { type: "string" },
+                                            quote: { type: "string" },
+                                        },
+                                        required: ["url", "title", "domain", "quote"],
+                                    },
+                                },
+                            },
+                            required: ["answer", "confidence", "claims", "sources"],
+                            additionalProperties: false,
+                        },
+                    },
+                },
+            ],
+            tool_choice: {
+                type: "function",
+                function: { name: "return_knowledge" },
+            },
+        }),
+    });
+    if (!res.ok)
+        throw new Error(`LLM failed: ${res.status}`);
+    const data = await res.json();
+    const toolCall = data.choices?.[0]?.message?.tool_calls?.[0];
+    if (!toolCall)
+        throw new Error("LLM did not return structured output");
+    return JSON.parse(toolCall.function.arguments);
+}
+// --- Raw LLM call (no sources, for compare) ---
+async function rawLLMAnswer(query) {
+    const { OPENROUTER_API_KEY } = getEnvKeys();
+    const res = await fetch(LLM_ENDPOINT, {
+        method: "POST",
+        headers: {
+            Authorization: `Bearer ${OPENROUTER_API_KEY}`,
+            "Content-Type": "application/json",
+        },
+        body: JSON.stringify({
+            model: LLM_MODEL,
+            messages: [
+                {
+                    role: "system",
+                    content: "Answer the question clearly and concisely.",
+                },
+                { role: "user", content: query },
+            ],
+        }),
+    });
+    if (!res.ok)
+        throw new Error(`LLM failed: ${res.status}`);
+    const data = await res.json();
+    return data.choices?.[0]?.message?.content || "No response";
+}
+async function answerPipeline(query) {
+    const trace = [];
+    const searchStart = Date.now();
+    const searchResults = await tavilySearch(query);
+    trace.push({
+        step: "Search Web",
+        duration_ms: Date.now() - searchStart,
+        detail: `${searchResults.length} results`,
+    });
+    const scrapeStart = Date.now();
+    const pages = await Promise.allSettled(searchResults.slice(0, 5).map((r) => fetchPage(r.url)));
+    const successfulPages = pages
+        .filter((p) => p.status === "fulfilled")
+        .map((p) => p.value);
+    trace.push({
+        step: "Fetch Pages",
+        duration_ms: Date.now() - scrapeStart,
+        detail: `${successfulPages.length} pages`,
+    });
+    const pageContents = successfulPages
+        .map((p, i) => `[Source ${i + 1}] URL: ${searchResults[i]?.url}\nTitle: ${p.title}\n\n${p.content.slice(0, MAX_PAGE_CONTENT_LENGTH)}`)
+        .join("\n\n---\n\n");
+    const llmStart = Date.now();
+    const knowledge = await extractKnowledge(query, pageContents);
+    const llmDuration = Date.now() - llmStart;
+    trace.push({
+        step: "Extract Claims",
+        duration_ms: Math.round(llmDuration * 0.4),
+        detail: `${knowledge.claims?.length || 0} claims`,
+    });
+    trace.push({
+        step: "Build Evidence Graph",
+        duration_ms: Math.round(llmDuration * 0.1),
+        detail: `${knowledge.sources?.length || 0} sources`,
+    });
+    trace.push({
+        step: "Generate Answer",
+        duration_ms: Math.round(llmDuration * 0.5),
+        detail: "OpenRouter",
+    });
+    return {
+        answer: knowledge.answer,
+        claims: knowledge.claims || [],
+        sources: knowledge.sources || [],
+        confidence: knowledge.confidence || 0.85,
+        trace,
+    };
+}
+// --- MCP Server ---
+function startServer() {
+    // Validate env before starting
+    getEnvKeys();
+    const server = new McpServer({
+        name: "browse-ai",
+        version: VERSION,
+    });
+    server.tool("browse_search", "Search the web for information on a topic. Returns URLs, titles, snippets, and relevance scores.", { query: z.string(), limit: z.number().optional() }, async ({ query, limit }) => {
+        const results = await tavilySearch(query, limit ?? 5);
+        return {
+            content: [{ type: "text", text: JSON.stringify(results, null, 2) }],
+        };
+    });
+    server.tool("browse_open", "Fetch and parse a web page into clean text using Readability. Strips ads, nav, and boilerplate.", { url: z.string() }, async ({ url }) => {
+        const page = await fetchPage(url);
+        return {
+            content: [{ type: "text", text: JSON.stringify(page, null, 2) }],
+        };
+    });
+    server.tool("browse_extract", "Extract structured knowledge (claims + sources + confidence) from a single web page using AI.", { url: z.string(), query: z.string().optional() }, async ({ url, query }) => {
+        const page = await fetchPage(url);
+        const domain = new URL(url).hostname;
+        const pageContent = `[Source 1] URL: ${url}\nTitle: ${page.title}\n\n${page.content.slice(0, MAX_PAGE_CONTENT_LENGTH)}`;
+        const q = query || `Summarize the content from ${domain}`;
+        const result = await extractKnowledge(q, pageContent);
+        return {
+            content: [{ type: "text", text: JSON.stringify(result, null, 2) }],
+        };
+    });
+    server.tool("browse_answer", "Full deep research pipeline: search the web, fetch pages, extract claims, build evidence graph, and generate a structured answer with citations and confidence score.", { query: z.string() }, async ({ query }) => {
+        const result = await answerPipeline(query);
+        return {
+            content: [{ type: "text", text: JSON.stringify(result, null, 2) }],
+        };
+    });
+    server.tool("browse_compare", "Compare a raw LLM answer (no sources) vs an evidence-backed answer. Shows the difference between hallucination-prone and grounded responses.", { query: z.string() }, async ({ query }) => {
+        const [rawAnswer, evidenceResult] = await Promise.all([
+            rawLLMAnswer(query),
+            answerPipeline(query),
+        ]);
+        const comparison = {
+            query,
+            raw_llm: {
+                answer: rawAnswer,
+                sources: 0,
+                claims: 0,
+                confidence: null,
+            },
+            evidence_backed: {
+                answer: evidenceResult.answer,
+                sources: evidenceResult.sources.length,
+                claims: evidenceResult.claims.length,
+                confidence: evidenceResult.confidence,
+                citations: evidenceResult.sources,
+            },
+        };
+        return {
+            content: [
+                { type: "text", text: JSON.stringify(comparison, null, 2) },
+            ],
+        };
+    });
+    async function run() {
+        const transport = new StdioServerTransport();
+        await server.connect(transport);
+        console.error(`browse-ai v${VERSION} MCP server running on stdio`);
+    }
+    run().catch((err) => {
+        console.error("Failed to start browse-ai:", err);
+        process.exit(1);
+    });
+}

package/dist/setup.d.ts ADDED Viewed

	@@ -0,0 +1 @@
1	+ export declare function runSetup(): Promise<void>;

package/dist/setup.js ADDED Viewed

@@ -0,0 +1,82 @@
+import { readFileSync, writeFileSync, existsSync, mkdirSync } from "fs";
+import { join } from "path";
+import { createInterface } from "readline";
+const rl = createInterface({ input: process.stdin, output: process.stdout });
+function ask(question) {
+    return new Promise((resolve) => rl.question(question, resolve));
+}
+function getConfigPath() {
+    const platform = process.platform;
+    const home = process.env.HOME || process.env.USERPROFILE || "";
+    if (platform === "darwin") {
+        return join(home, "Library", "Application Support", "Claude", "claude_desktop_config.json");
+    }
+    else if (platform === "win32") {
+        return join(process.env.APPDATA || join(home, "AppData", "Roaming"), "Claude", "claude_desktop_config.json");
+    }
+    else {
+        return join(home, ".config", "claude", "claude_desktop_config.json");
+    }
+}
+export async function runSetup() {
+    console.log(`
+  browse-ai setup
+  ================
+  Configure browse-ai for Claude Desktop / Cursor / Windsurf
+`);
+    const serpKey = await ask("  Tavily API key (get one at https://tavily.com): ");
+    if (!serpKey.trim()) {
+        console.log("\n  Tavily API key is required. Get one at https://tavily.com\n");
+        process.exit(1);
+    }
+    const openrouterKey = await ask("  OpenRouter API key (get one at https://openrouter.ai): ");
+    if (!openrouterKey.trim()) {
+        console.log("\n  OpenRouter API key is required. Get one at https://openrouter.ai\n");
+        process.exit(1);
+    }
+    rl.close();
+    const mcpEntry = {
+        command: "npx",
+        args: ["-y", "browse-ai"],
+        env: {
+            SERP_API_KEY: serpKey.trim(),
+            OPENROUTER_API_KEY: openrouterKey.trim(),
+        },
+    };
+    const configPath = getConfigPath();
+    console.log(`\n  Config path: ${configPath}`);
+    let config = { mcpServers: {} };
+    if (existsSync(configPath)) {
+        try {
+            config = JSON.parse(readFileSync(configPath, "utf-8"));
+            if (!config.mcpServers)
+                config.mcpServers = {};
+        }
+        catch {
+            console.log("  Could not parse existing config, creating new one...");
+        }
+    }
+    else {
+        const dir = configPath.replace(/[/\\][^/\\]+$/, "");
+        mkdirSync(dir, { recursive: true });
+    }
+    config.mcpServers["browse-ai"] = mcpEntry;
+    writeFileSync(configPath, JSON.stringify(config, null, 2));
+    console.log(`
+  Done! browse-ai has been configured.
+  Next steps:
+    1. Restart Claude Desktop
+    2. You should see "browse-ai" in the MCP tools list
+    3. Try asking: "Use browse_answer to explain quantum computing"
+  Available tools:
+    browse_search    - Search the web
+    browse_open      - Fetch and parse a page
+    browse_extract   - Extract knowledge from a page
+    browse_answer    - Full deep research pipeline
+    browse_compare   - Compare raw LLM vs evidence-backed answer
+  Config written to: ${configPath}
+`);
+}

package/package.json ADDED Viewed

@@ -0,0 +1,41 @@
+{
+  "name": "browse-ai",
+  "version": "0.1.0",
+  "type": "module",
+  "description": "Open-source deep research MCP server for AI agents. Search the web, extract claims, build evidence graphs, get structured answers with citations.",
+  "keywords": [
+    "mcp",
+    "claude",
+    "ai-agent",
+    "web-search",
+    "deep-research",
+    "model-context-protocol",
+    "cursor",
+    "windsurf"
+  ],
+  "license": "MIT",
+  "bin": {
+    "browse-ai": "dist/index.js"
+  },
+  "files": [
+    "dist",
+    "README.md"
+  ],
+  "scripts": {
+    "dev": "tsx src/index.ts",
+    "build": "tsc",
+    "start": "node dist/index.js",
+    "prepublishOnly": "tsc"
+  },
+  "dependencies": {
+    "@modelcontextprotocol/sdk": "^1.12.0",
+    "@mozilla/readability": "^0.5.0",
+    "linkedom": "^0.18.0",
+    "zod": "^3.25.76"
+  },
+  "devDependencies": {
+    "tsx": "^4.19.0",
+    "typescript": "^5.8.3",
+    "@types/node": "^22.16.5"
+  }
+}