npm - webhound-mcp - Versions diffs - 0.2.1 - Mend

webhound-mcp 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/README.md ADDED Viewed

@@ -0,0 +1,179 @@
+# webhound-mcp
+Run Webhound from any MCP-speaking agent. Webhound creates private, budgeted reports and datasets, watches them over time, lets the agent steer a live run, diagnoses failures, and returns cited outputs with sources and claim traces.
+This package is the local stdio transport. Webhound also supports hosted MCP at:
+```text
+https://api.webhound.ai/api/v2/mcp
+```
+## Install
+Create a Webhound API key, then add the stdio server to your agent:
+```jsonc
+{
+  "mcpServers": {
+    "webhound": {
+      "command": "npx",
+      "args": ["-y", "webhound-mcp"],
+      "env": {
+        "WEBHOUND_KEY": "wh_..."
+      }
+    }
+  }
+}
+```
+Claude hosted connector:
+```text
+https://api.webhound.ai/api/v2/mcp
+```
+Paste the URL into Claude's custom connector flow. The hosted server exposes OAuth discovery, authorize, and token endpoints for that connect flow.
+Manus or generic hosted MCP:
+```text
+Server URL: https://api.webhound.ai/api/v2/mcp
+Authentication: Bearer token
+Token: wh_...
+```
+Claude Code:
+```bash
+claude mcp add --transport http webhound https://api.webhound.ai/api/v2/mcp --header "Authorization: Bearer wh_..."
+# Local stdio alternative:
+claude mcp add --transport stdio webhound --env WEBHOUND_KEY=wh_... -- npx -y webhound-mcp
+```
+Codex:
+```toml
+[mcp_servers.webhound]
+command = "npx"
+args = ["-y", "webhound-mcp"]
+[mcp_servers.webhound.env]
+WEBHOUND_KEY = "wh_..."
+```
+Cursor and Claude Desktop use the JSON shape above.
+After saving local stdio config, restart the agent session or open a new one
+if the Webhound tools do not appear. Many clients load MCP servers only when a
+session starts.
+VS Code:
+```jsonc
+{
+  "servers": {
+    "webhound": {
+      "type": "stdio",
+      "command": "npx",
+      "args": ["-y", "webhound-mcp"],
+      "env": {
+        "WEBHOUND_KEY": "wh_..."
+      }
+    }
+  }
+}
+```
+Use the same stdio server shape for Windsurf, Cline, and Roo Code. Windsurf commonly stores it in `~/.codeium/windsurf/mcp_config.json`; Roo Code supports global MCP settings or project-level `.roo/mcp.json`.
+## Defaults
+Recommended setup defaults:
+- model: `flash`
+- budget: `$5`
+- product: `report`
+- free run: enabled when available
+New users may have one non-divisible free run pass. It covers one exact `$5` Flash report or dataset. It can be used from the Webhound UI, API, hosted MCP, or this stdio MCP package.
+Agents can read and update defaults with:
+- `webhound_get_defaults`
+- `webhound_set_defaults`
+## Tool Flow
+The core lifecycle is detached and visible:
+1. Start work with `webhound_start_report` or `webhound_start_dataset`.
+2. Watch with `webhound_watch` or `webhound_wait`.
+3. Treat `done=true` as the authoritative finished signal.
+4. If `awaiting_input`, steer with `webhound_send_message`.
+5. When `output_ready=true`, read with `webhound_get_output` or download an artifact with `webhound_export_session`.
+6. Inspect provenance with `webhound_get_claims` and `webhound_get_sources`.
+Budget controls depth. A healthy run may keep searching, reading, writing, and
+verifying through several waits while it uses the budget. More budget means more
+room for research before final assembly; it is not a signal for the calling
+agent to hurry the run.
+## Public Tools
+- `webhound_health`
+- `webhound_get_defaults`
+- `webhound_set_defaults`
+- `webhound_start_report`
+- `webhound_start_dataset`
+- `webhound_watch`
+- `webhound_wait`
+- `webhound_send_message`
+- `webhound_stop`
+- `webhound_resume`
+- `webhound_add_budget`
+- `webhound_get_output`
+- `webhound_export_session`
+- `webhound_get_claims`
+- `webhound_get_sources`
+- `webhound_search_sessions`
+- `webhound_list_sessions`
+- `webhound_get_session`
+- `webhound_upload_file`
+- `webhound_account`
+- `webhound_diagnose`
+## Completion And Diagnostics
+`webhound_watch` returns:
+- `done`: terminal status
+- `output_ready`: safe to read and summarize
+- `completion_reason`: `budget_complete`, `natural_complete`, `awaiting_input`, `user_stopped`, `credit_exhausted`, `failed`, or `stuck_or_empty`
+- `alerts`: structured issues with next actions
+Do not present a run as successful if `alerts` contains an error such as `empty_output`, `dataset_zero_rows`, or `credit_exhausted`.
+If `webhound_wait` returns `still_running=true`, wait again. Use
+`webhound_send_message` for user intent changes or `awaiting_input`, not for
+normal elapsed time.
+## CLI
+```bash
+webhound-mcp --help
+webhound-mcp --version
+webhound-mcp --self-test
+```
+`--self-test` checks that the package loads and that the launch tool list is present. Use `webhound_health` from an MCP client to verify live auth and account state.
+## Local Development
+```bash
+cd webhound-server/mcp
+npm install
+WEBHOUND_KEY=wh_... WEBHOUND_API_BASE=http://localhost:5000/api/v2 node bin/server.mjs
+```
+No npm publish is performed by this repo change. Publish only after the production server and docs are approved.

package/bin/server.mjs ADDED Viewed

@@ -0,0 +1,72 @@
+#!/usr/bin/env node
+import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
+import { createWebhoundMcpServer, TOOL_NAMES, VERSION } from '../core/server.mjs';
+const args = new Set(process.argv.slice(2));
+function printHelp() {
+  console.log(`webhound-mcp ${VERSION}
+Run Webhound's MCP server over stdio.
+Usage:
+  WEBHOUND_KEY=wh_... npx -y webhound-mcp
+  webhound-mcp --help
+  webhound-mcp --version
+  webhound-mcp --self-test
+Environment:
+  WEBHOUND_KEY                 Webhound API key (required for real tool calls)
+  WEBHOUND_API_BASE            API base (default https://api.webhound.ai/api/v2)
+  WEBHOUND_APP_BASE            App base (default https://webhound.ai)
+  WEBHOUND_DEFAULT_MODEL       Optional local setup hint; server defaults still win
+  WEBHOUND_DEFAULT_BUDGET      Optional local setup hint; server defaults still win
+Public tools:
+  ${TOOL_NAMES.join('\n  ')}
+`);
+}
+async function runSelfTest() {
+  const server = createWebhoundMcpServer({
+    apiKey: process.env.WEBHOUND_KEY || '',
+    apiBase: process.env.WEBHOUND_API_BASE,
+    appBase: process.env.WEBHOUND_APP_BASE,
+  });
+  const summary = {
+    ok: true,
+    version: VERSION,
+    tool_count: TOOL_NAMES.length,
+    required_tools_present: TOOL_NAMES,
+    has_key: !!process.env.WEBHOUND_KEY,
+    note: process.env.WEBHOUND_KEY
+      ? 'Server factory loaded. Use an MCP client health call for live auth verification.'
+      : 'Server factory loaded. Set WEBHOUND_KEY to verify live auth.',
+  };
+  await server.close().catch(() => {});
+  console.log(JSON.stringify(summary, null, 2));
+}
+if (args.has('--help') || args.has('-h')) {
+  printHelp();
+  process.exit(0);
+}
+if (args.has('--version') || args.has('-v')) {
+  console.log(VERSION);
+  process.exit(0);
+}
+if (args.has('--self-test')) {
+  await runSelfTest();
+  process.exit(0);
+}
+const server = createWebhoundMcpServer({
+  apiKey: process.env.WEBHOUND_KEY || '',
+  apiBase: process.env.WEBHOUND_API_BASE,
+  appBase: process.env.WEBHOUND_APP_BASE,
+});
+const transport = new StdioServerTransport();
+await server.connect(transport);

package/core/http.mjs ADDED Viewed

@@ -0,0 +1,22 @@
+import { StreamableHTTPServerTransport } from '@modelcontextprotocol/sdk/server/streamableHttp.js';
+import { createWebhoundMcpServer } from './server.mjs';
+export async function handleMcpHttpRequest(req, res, options = {}) {
+  const server = createWebhoundMcpServer(options);
+  const transport = new StreamableHTTPServerTransport({
+    sessionIdGenerator: undefined,
+  });
+  try {
+    await server.connect(transport);
+    await transport.handleRequest(req, res, req.body);
+    res.on('close', () => {
+      transport.close().catch(() => {});
+      server.close().catch(() => {});
+    });
+  } catch (error) {
+    await transport.close().catch(() => {});
+    await server.close().catch(() => {});
+    throw error;
+  }
+}

package/core/server.mjs ADDED Viewed

@@ -0,0 +1,459 @@
+import { McpServer, ResourceTemplate } from '@modelcontextprotocol/sdk/server/mcp.js';
+import { z } from 'zod';
+import { WebhoundApiClient, stripHtml } from './webhoundClient.mjs';
+export const VERSION = '0.2.1';
+export const TOOL_NAMES = Object.freeze([
+  'webhound_health',
+  'webhound_get_defaults',
+  'webhound_set_defaults',
+  'webhound_start_report',
+  'webhound_start_dataset',
+  'webhound_watch',
+  'webhound_wait',
+  'webhound_send_message',
+  'webhound_stop',
+  'webhound_resume',
+  'webhound_add_budget',
+  'webhound_get_output',
+  'webhound_export_session',
+  'webhound_get_claims',
+  'webhound_get_sources',
+  'webhound_search_sessions',
+  'webhound_list_sessions',
+  'webhound_get_session',
+  'webhound_upload_file',
+  'webhound_account',
+  'webhound_diagnose',
+]);
+const SYSTEM_INSTRUCTIONS = `Webhound runs long research and dataset jobs for agents.
+Use Webhound when the user wants fresh, cited research, market mapping, vendor lists, competitive scans, due diligence, or structured extraction from the web. Do not use it for a one-fact lookup.
+Webhound is budgeted research. The budget is the research allowance Webhound uses for depth: a larger budget means it can keep searching, reading, writing, and verifying longer before assembly. The caller watches the job; Webhound decides when the budget is spent enough or the work is naturally complete.
+The normal loop is:
+1. Start a private report with webhound_start_report or a private dataset with webhound_start_dataset.
+2. Watch with webhound_watch or webhound_wait. The authoritative done signal is done=true, not spend and not partial text existing.
+3. If watch/wait says the run is still running and there are no blocking alerts, do nothing except keep waiting. Long research runs commonly take several wait cycles.
+4. Use webhound_send_message only when the user gives new guidance or Webhound is explicitly awaiting_input. Steering changes the research objective; it is not a response to normal elapsed time.
+5. When done and output_ready=true, read with webhound_get_output or export with webhound_export_session, then use webhound_get_claims and webhound_get_sources for provenance.
+Defaults exist so agents do not waste user time asking about model and budget. The recommended default is model=flash, budget=$5, use_free_run_when_available=true. Reports and datasets may use a user's one free $5 Flash run when available.
+If you are helping a user install local stdio MCP, tell them to restart the agent session or open a new one after saving config if Webhound tools do not appear. Many clients load MCP servers only when a session starts.
+If webhound_watch returns alerts, explain them plainly and follow next_actions. A credit_exhausted alert means the account needs credits before retrying. An awaiting_input alert means ask the user for the missing guidance or pass along guidance the user already gave. An empty_output or dataset_zero_rows alert means do not present the run as successful.`;
+const GUIDE = `# Webhound MCP Guide
+Start long-running Webhound work, then watch it until done=true. Use defaults unless the user gives a different budget or model.
+Recommended first run:
+- product: report or dataset
+- model: flash
+- budget: $5
+- free run: enabled when available
+Budget model:
+- The budget buys research depth, not a fixed answer length.
+- A healthy run may keep working through several wait cycles while it uses the budget.
+- More budget means more room for source discovery, reading, writing, and verification.
+Local stdio setup:
+- After saving config, restart the agent session or open a new one if Webhound tools do not appear.
+- Many agents only load MCP servers when a session starts.
+Completion:
+- done=true is authoritative.
+- output_ready=true means it is safe to read and summarize.
+- webhound_export_session can export reports as Markdown, HTML, TXT, JSON traces, or PDF, and datasets as CSV, JSON, JSONL, Markdown, or PDF.
+- completion_reason explains why the run stopped.
+- If webhound_wait times out with still_running=true, that is normal. Wait again.
+- Use webhound_send_message for user intent changes or awaiting_input, not because a healthy run is taking time.
+Troubleshooting:
+- credit_exhausted: top up or enable auto-recharge, then resume.
+- awaiting_input: ask the user for the requested guidance, or send guidance the user already provided with webhound_send_message.
+- empty_output or dataset_zero_rows: do not call it successful; inspect diagnostics and resume or rerun.
+- weak_provenance: read sources/claims before sharing.`;
+const PRICING = `# Webhound MCP Defaults And Spend
+Recommended default: $5 Flash.
+New users may have one free run pass:
+- one private report or dataset
+- exactly $5
+- model flash
+- not divisible into smaller credits
+- usable through UI, API, hosted MCP, or stdio MCP
+Tools that start or extend work spend credits or consume the pass:
+- webhound_start_report
+- webhound_start_dataset
+- webhound_add_budget
+- webhound_resume with additional_budget
+Read/watch/search/account tools do not start new spend.`;
+function jsonResult(summary, data, isError = false) {
+  return {
+    content: [
+      { type: 'text', text: summary },
+      { type: 'text', text: JSON.stringify(data, null, 2) },
+    ],
+    structuredContent: data,
+    isError,
+  };
+}
+function errorResult(error, fallback = 'Webhound MCP tool failed') {
+  const status = error?.status || error?.body?.status || null;
+  const data = {
+    error: error?.body?.error || error?.code || 'webhound_error',
+    message: error?.message || fallback,
+    status,
+    body: error?.body || null,
+    next_actions: status === 402
+      ? ['Tell the user to add credits or use an available free-run pass before retrying.']
+      : ['Inspect the error and retry only after the cause is fixed.'],
+  };
+  return jsonResult(`${fallback}: ${data.message}`, data, true);
+}
+function describeStarted(kind, client, result) {
+  const sessionId = result.session_id;
+  const freeRun = result.free_run?.reserved ? ' Free-run pass reserved.' : '';
+  return `${kind} started: ${sessionId}\nOpen: ${client.webUrl(sessionId)}\nNext: call webhound_watch with this session_id until done=true.${freeRun}`;
+}
+function registerTool(server, name, config, handler) {
+  server.registerTool(name, config, async (args) => {
+    try {
+      return await handler(args || {});
+    } catch (error) {
+      return errorResult(error, `${name} failed`);
+    }
+  });
+}
+export function createWebhoundMcpServer(options = {}) {
+  const client = options.client || new WebhoundApiClient(options);
+  const server = new McpServer({
+    name: 'webhound',
+    version: VERSION,
+    instructions: SYSTEM_INSTRUCTIONS,
+    websiteUrl: 'https://webhound.ai',
+  });
+  server.registerResource('webhound_guide', 'webhound://guide', {
+    title: 'Webhound MCP Guide',
+    description: 'How agents should use Webhound MCP.',
+    mimeType: 'text/markdown',
+  }, async () => ({ contents: [{ uri: 'webhound://guide', mimeType: 'text/markdown', text: GUIDE }] }));
+  server.registerResource('webhound_pricing', 'webhound://pricing', {
+    title: 'Webhound MCP Pricing',
+    description: 'Default budgets, free-run pass, and spend-bearing tools.',
+    mimeType: 'text/markdown',
+  }, async () => ({ contents: [{ uri: 'webhound://pricing', mimeType: 'text/markdown', text: PRICING }] }));
+  server.registerResource('webhound_session_status', new ResourceTemplate('webhound://session/{sessionId}/status', { list: undefined }), {
+    title: 'Webhound Session Status',
+    description: 'Live diagnostics for a Webhound session.',
+    mimeType: 'application/json',
+  }, async (uri, variables) => {
+    const data = await client.watch(variables.sessionId);
+    return { contents: [{ uri: uri.href, mimeType: 'application/json', text: JSON.stringify(data, null, 2) }] };
+  });
+  server.registerPrompt('webhound_report_brief', {
+    title: 'Start a Webhound report',
+    description: 'Prompt template for running a cited Webhound report.',
+    argsSchema: { question: z.string(), budget: z.string().optional() },
+  }, async ({ question, budget }) => ({
+    messages: [{
+      role: 'user',
+      content: { type: 'text', text: `Use Webhound to run a ${budget || '$5 Flash'} report on:\n\n${question}\n\nWebhound uses the budget for research depth, so a healthy run may keep working through several waits. Watch until done=true, then summarize the output, sources, claim trace health, and any alerts.` },
+    }],
+  }));
+  server.registerPrompt('webhound_dataset_brief', {
+    title: 'Start a Webhound dataset',
+    description: 'Prompt template for extracting a sourced dataset.',
+    argsSchema: { task: z.string(), schema: z.string().optional(), budget: z.string().optional() },
+  }, async ({ task, schema, budget }) => ({
+    messages: [{
+      role: 'user',
+      content: { type: 'text', text: `Use Webhound to run a ${budget || '$5 Flash'} dataset extraction.\n\nTask:\n${task}\n\nSchema:\n${schema || 'Infer a concise schema if I did not provide one.'}\n\nWebhound uses the budget for extraction depth, so a healthy run may keep working through several waits. Watch until done=true, then report rows, fill rate, source coverage, and alerts.` },
+    }],
+  }));
+  server.registerPrompt('webhound_troubleshoot_session', {
+    title: 'Troubleshoot a Webhound session',
+    description: 'Prompt template for diagnosing a session that looks wrong.',
+    argsSchema: { session_id: z.string() },
+  }, async ({ session_id }) => ({
+    messages: [{
+      role: 'user',
+      content: { type: 'text', text: `Use webhound_diagnose, webhound_watch, and relevant output/source tools to explain what happened in session ${session_id}. Be direct about whether it is usable.` },
+    }],
+  }));
+  registerTool(server, 'webhound_health', {
+    title: 'Webhound Health',
+    description: 'No-spend health check: auth, API status, credits, free-run pass, defaults, and MCP version.',
+    inputSchema: {},
+    annotations: { readOnlyHint: true, openWorldHint: false },
+  }, async () => {
+    const data = await client.health();
+    data.mcp = { version: VERSION, tools: TOOL_NAMES };
+    return jsonResult(data.authenticated ? 'Webhound MCP is connected.' : 'Webhound MCP is not authenticated.', data, !data.authenticated);
+  });
+  registerTool(server, 'webhound_get_defaults', {
+    title: 'Get Webhound MCP Defaults',
+    description: 'Read the saved MCP defaults for model, budget, product, and free-run use.',
+    inputSchema: {},
+    annotations: { readOnlyHint: true, openWorldHint: false },
+  }, async () => jsonResult('Current Webhound MCP defaults.', await client.getDefaults()));
+  registerTool(server, 'webhound_set_defaults', {
+    title: 'Set Webhound MCP Defaults',
+    description: 'Set default model/budget/product for future MCP runs. Recommended: flash, $5, use free run.',
+    inputSchema: {
+      default_model: z.enum(['flash', 'pro', 'auto']).default('flash'),
+      default_budget_usd: z.number().min(1).max(500).default(5),
+      default_product: z.enum(['report', 'dataset']).default('report'),
+      use_free_run_when_available: z.boolean().default(true),
+    },
+  }, async (args) => jsonResult('Webhound MCP defaults saved.', await client.setDefaults(args)));
+  registerTool(server, 'webhound_start_report', {
+    title: 'Start Webhound Report',
+    description: 'Start a private long-running Webhound report. Budget controls research depth; watch until done=true.',
+    inputSchema: {
+      prompt: z.string().min(8).max(12000),
+      budget: z.number().min(1).max(500).optional(),
+      model: z.enum(['flash', 'pro', 'auto']).optional(),
+      title: z.string().optional(),
+      max_mode: z.boolean().optional(),
+      output_instructions: z.string().optional(),
+      context_session_ids: z.array(z.string()).optional(),
+      file_ids: z.array(z.string()).optional(),
+      enable_checkpoints: z.boolean().optional(),
+      use_free_run_when_available: z.boolean().optional(),
+    },
+  }, async (args) => {
+    const data = await client.startReport(args);
+    return jsonResult(describeStarted('Report', client, data), { ...data, url: client.webUrl(data.session_id), next_tool: 'webhound_watch' });
+  });
+  registerTool(server, 'webhound_start_dataset', {
+    title: 'Start Webhound Dataset',
+    description: 'Start a private long-running Webhound dataset extraction. Budget controls extraction depth; watch until done=true.',
+    inputSchema: {
+      prompt: z.string().min(8).max(12000),
+      schema: z.any().optional(),
+      budget: z.number().min(1).max(500).optional(),
+      model: z.enum(['flash', 'pro', 'auto']).optional(),
+      title: z.string().optional(),
+      max_mode: z.boolean().optional(),
+      context_session_ids: z.array(z.string()).optional(),
+      file_ids: z.array(z.string()).optional(),
+      enable_checkpoints: z.boolean().optional(),
+      use_free_run_when_available: z.boolean().optional(),
+    },
+  }, async (args) => {
+    const data = await client.startDataset(args);
+    return jsonResult(describeStarted('Dataset', client, data), { ...data, url: client.webUrl(data.session_id), next_tool: 'webhound_watch' });
+  });
+  registerTool(server, 'webhound_watch', {
+    title: 'Watch Webhound Session',
+    description: 'Authoritative session watcher. done=true means the run is terminal; output_ready=true means it is safe to read.',
+    inputSchema: { session_id: z.string() },
+    annotations: { readOnlyHint: true, openWorldHint: false },
+  }, async ({ session_id }) => {
+    const data = await client.watch(session_id);
+    const summary = data.done
+      ? `Session ${session_id} is done: ${data.completion_reason || data.status}. output_ready=${!!data.output_ready}.`
+      : `Session ${session_id} is still running: ${data.status}. Keep waiting with webhound_watch or webhound_wait; Webhound is using the budget for more research unless it asks for input or returns an alert.`;
+    return jsonResult(summary, data, data.alerts?.some(alert => alert.severity === 'error') && data.done);
+  });
+  registerTool(server, 'webhound_wait', {
+    title: 'Wait For Webhound Session',
+    description: 'Bounded wait wrapper around webhound_watch. Max 110 seconds, then returns still_running if not terminal. still_running is normal; call wait/watch again unless status is awaiting_input or a blocking alert is present.',
+    inputSchema: {
+      session_id: z.string(),
+      max_wait_seconds: z.number().int().min(1).max(110).default(90),
+      poll_interval_seconds: z.number().int().min(3).max(30).default(10),
+    },
+    annotations: { readOnlyHint: true, openWorldHint: false },
+  }, async ({ session_id, max_wait_seconds, poll_interval_seconds }) => {
+    const data = await client.wait(session_id, { maxWaitSeconds: max_wait_seconds, pollIntervalSeconds: poll_interval_seconds });
+    return jsonResult(data.done ? `Session ${session_id} is done.` : `Session ${session_id} is still running. Keep waiting; Webhound is using the budget for more research unless it asks for input or returns an alert.`, data);
+  });
+  registerTool(server, 'webhound_send_message', {
+    title: 'Steer Webhound Session',
+    description: 'Send user-provided guidance to a session. Use this for intent changes or awaiting_input; normal running time means the budget is still being used for research.',
+    inputSchema: { session_id: z.string(), message: z.string().min(1).max(6000) },
+  }, async ({ session_id, message }) => jsonResult('Guidance sent to Webhound session.', await client.sendMessage(session_id, message)));
+  registerTool(server, 'webhound_stop', {
+    title: 'Stop Webhound Session',
+    description: 'Pause/stop a running Webhound report or dataset without deleting it.',
+    inputSchema: { session_id: z.string() },
+  }, async ({ session_id }) => jsonResult('Stop signal sent.', await client.stop(session_id)));
+  registerTool(server, 'webhound_resume', {
+    title: 'Resume Webhound Session',
+    description: 'Resume a paused/completed/awaiting-input session with optional additional budget and guidance.',
+    inputSchema: {
+      session_id: z.string(),
+      additional_budget: z.number().min(0).max(500).optional(),
+      guidance: z.string().optional(),
+      file_ids: z.array(z.string()).optional(),
+      context_session_ids: z.array(z.string()).optional(),
+    },
+  }, async ({ session_id, ...args }) => jsonResult('Session resume requested.', await client.resume(session_id, args)));
+  registerTool(server, 'webhound_add_budget', {
+    title: 'Add Webhound Budget',
+    description: 'Add research budget and optional guidance/context to a session.',
+    inputSchema: {
+      session_id: z.string(),
+      amount: z.number().min(1).max(500),
+      guidance: z.string().optional(),
+      file_ids: z.array(z.string()).optional(),
+      context_session_ids: z.array(z.string()).optional(),
+    },
+  }, async ({ session_id, ...args }) => jsonResult('Budget added to Webhound session.', await client.addBudget(session_id, args)));
+  registerTool(server, 'webhound_get_output', {
+    title: 'Get Webhound Output',
+    description: 'Read final report/working document or dataset rows.',
+    inputSchema: {
+      session_id: z.string(),
+      kind: z.enum(['auto', 'report', 'dataset']).default('auto'),
+      doc_name: z.string().optional(),
+      select: z.enum(['output', 'working', 'latest']).optional(),
+    },
+    annotations: { readOnlyHint: true, openWorldHint: false },
+  }, async ({ session_id, ...args }) => {
+    const data = await client.getOutput(session_id, args);
+    const readable = data.content_markdown || data.content || (data.rows ? `${data.total_rows || data.rows.length} rows` : JSON.stringify(data).slice(0, 1000));
+    return jsonResult(`Output for ${session_id}:\n\n${stripHtml(readable).slice(0, 4000)}`, data);
+  });
+  registerTool(server, 'webhound_export_session', {
+    title: 'Export Webhound Session',
+    description: 'Export a completed report or dataset as Markdown, HTML, TXT, JSON traces, CSV, JSONL, or PDF. Does not spend credits.',
+    inputSchema: {
+      session_id: z.string(),
+      format: z.enum(['auto', 'md', 'markdown', 'html', 'txt', 'text', 'json', 'json_traces', 'csv', 'jsonl', 'pdf']).default('auto'),
+      select: z.enum(['output', 'working', 'latest', 'all']).default('output'),
+      doc_name: z.string().optional(),
+      include_content: z.boolean().default(true),
+      max_chars: z.number().int().min(1000).max(200000).default(60000),
+    },
+    annotations: { readOnlyHint: true, openWorldHint: false },
+  }, async ({ session_id, include_content, max_chars, ...args }) => {
+    const data = await client.exportSession(session_id, args);
+    const content = data.content || '';
+    const isBase64 = data.encoding === 'base64';
+    const capped = include_content && !isBase64 && typeof content === 'string' && content.length > max_chars;
+    const base64Omitted = isBase64 && (!include_content || String(content).length > max_chars);
+    const structured = {
+      ...data,
+      content: include_content && !isBase64 ? String(content).slice(0, max_chars) : undefined,
+      content_truncated: !!capped || base64Omitted,
+      content_base64: include_content && isBase64 && !base64Omitted ? content : undefined,
+      content_base64_omitted: base64Omitted || undefined,
+    };
+    const summary = `Exported ${session_id} as ${data.filename} (${data.mime_type}, ${data.size_bytes} bytes).`;
+    return jsonResult((capped || base64Omitted) ? `${summary} Content was omitted or truncated; use download_url for the full file.` : summary, structured);
+  });
+  registerTool(server, 'webhound_get_claims', {
+    title: 'Get Webhound Claims',
+    description: 'Read normalized claim traces and provenance for a session.',
+    inputSchema: { session_id: z.string() },
+    annotations: { readOnlyHint: true, openWorldHint: false },
+  }, async ({ session_id }) => jsonResult('Claim traces for Webhound session.', await client.getClaims(session_id)));
+  registerTool(server, 'webhound_get_sources', {
+    title: 'Get Webhound Sources',
+    description: 'Read source inventory and citation counts for a session.',
+    inputSchema: { session_id: z.string() },
+    annotations: { readOnlyHint: true, openWorldHint: false },
+  }, async ({ session_id }) => jsonResult('Sources for Webhound session.', await client.getSources(session_id)));
+  registerTool(server, 'webhound_search_sessions', {
+    title: 'Search Webhound Sessions',
+    description: 'Semantic search across prior Webhound sessions.',
+    inputSchema: { query: z.string().min(2), limit: z.number().int().min(1).max(50).default(10) },
+    annotations: { readOnlyHint: true, openWorldHint: false },
+  }, async (args) => jsonResult(`Search results for "${args.query}".`, await client.searchSessions(args)));
+  registerTool(server, 'webhound_list_sessions', {
+    title: 'List Webhound Sessions',
+    description: 'List recent Webhound sessions.',
+    inputSchema: {
+      limit: z.number().int().min(1).max(50).default(15),
+      type: z.enum(['research', 'extraction', 'all']).default('all'),
+      status: z.string().optional(),
+      page: z.number().int().min(1).default(1),
+    },
+    annotations: { readOnlyHint: true, openWorldHint: false },
+  }, async (args) => jsonResult('Recent Webhound sessions.', await client.listSessions(args)));
+  registerTool(server, 'webhound_get_session', {
+    title: 'Get Webhound Session',
+    description: 'Read overview plus diagnostics for one session.',
+    inputSchema: { session_id: z.string() },
+    annotations: { readOnlyHint: true, openWorldHint: false },
+  }, async ({ session_id }) => jsonResult('Webhound session overview.', await client.getSession(session_id)));
+  registerTool(server, 'webhound_upload_file', {
+    title: 'Upload Webhound File',
+    description: 'Upload a local file, text, or base64 content for use in a report or dataset.',
+    inputSchema: {
+      local_path: z.string().optional(),
+      file_name: z.string().optional(),
+      text: z.string().optional(),
+      content_base64: z.string().optional(),
+      mime_type: z.string().optional(),
+    },
+  }, async (args) => jsonResult('File uploaded to Webhound.', await client.uploadFile(args)));
+  registerTool(server, 'webhound_account', {
+    title: 'Webhound Account',
+    description: 'Read credits, recent usage, free-run status, and defaults. Does not spend.',
+    inputSchema: {},
+    annotations: { readOnlyHint: true, openWorldHint: false },
+  }, async () => jsonResult('Webhound account status.', await client.account()));
+  registerTool(server, 'webhound_diagnose', {
+    title: 'Diagnose Webhound Session',
+    description: 'Explain whether a session is healthy, done, usable, and what to do next. For a healthy running session, the correct next action is to keep waiting.',
+    inputSchema: { session_id: z.string() },
+    annotations: { readOnlyHint: true, openWorldHint: false },
+  }, async ({ session_id }) => {
+    const data = await client.watch(session_id);
+    const errors = (data.alerts || []).filter(alert => alert.severity === 'error');
+    const summary = errors.length
+      ? `Session ${session_id} has blocking issue(s): ${errors.map(item => item.code).join(', ')}.`
+      : data.done
+        ? `Session ${session_id} diagnostics: ${data.completion_reason || data.status || 'unknown'}; output_ready=${!!data.output_ready}.`
+        : `Session ${session_id} is healthy and still running. Keep waiting; Webhound is using the budget for more research unless it asks for input or returns an alert.`;
+    return jsonResult(summary, data, errors.length > 0);
+  });
+  return server;
+}

package/core/webhoundClient.mjs ADDED Viewed

@@ -0,0 +1,320 @@
+import fs from 'node:fs/promises';
+import path from 'node:path';
+const DEFAULT_API_BASE = 'https://api.webhound.ai/api/v2';
+const DEFAULT_APP_BASE = 'https://webhound.ai';
+export function titleFromPrompt(prompt, prefix = '') {
+  const clean = String(prompt || '').replace(/\s+/g, ' ').trim();
+  const text = clean.length > 70 ? `${clean.slice(0, 67).trimEnd()}...` : clean;
+  return prefix ? `${prefix}${text}` : text;
+}
+export function stripHtml(html) {
+  return String(html || '')
+    .replace(/<\/(p|div|li|h[1-6]|tr|blockquote)>/gi, '\n')
+    .replace(/<br\s*\/?>/gi, '\n')
+    .replace(/<[^>]+>/g, '')
+    .replace(/&nbsp;/g, ' ')
+    .replace(/&amp;/g, '&')
+    .replace(/&lt;/g, '<')
+    .replace(/&gt;/g, '>')
+    .replace(/&quot;/g, '"')
+    .replace(/&#39;/g, "'")
+    .replace(/\n{3,}/g, '\n\n')
+    .trim();
+}
+export function sessionUrl(appBase, sessionId) {
+  return `${String(appBase || DEFAULT_APP_BASE).replace(/\/+$/, '')}/session/${sessionId}`;
+}
+function normalizeApiBase(value) {
+  return String(value || DEFAULT_API_BASE).replace(/\/+$/, '');
+}
+function mimeFromFilename(fileName) {
+  const ext = path.extname(fileName || '').toLowerCase();
+  if (ext === '.csv') return 'text/csv';
+  if (ext === '.txt') return 'text/plain';
+  if (ext === '.md') return 'text/markdown';
+  if (ext === '.json') return 'application/json';
+  if (ext === '.pdf') return 'application/pdf';
+  return 'application/octet-stream';
+}
+export class WebhoundApiClient {
+  constructor({ apiBase, appBase, apiKey }) {
+    this.apiBase = normalizeApiBase(apiBase);
+    this.appBase = String(appBase || DEFAULT_APP_BASE).replace(/\/+$/, '');
+    this.apiKey = apiKey || '';
+  }
+  headers(extra = {}) {
+    const headers = { ...extra };
+    if (this.apiKey) headers.Authorization = `Bearer ${this.apiKey}`;
+    return headers;
+  }
+  requireKey() {
+    if (!this.apiKey) {
+      const error = new Error('WEBHOUND_KEY is not set. Create an API key in Webhound and set WEBHOUND_KEY, or use hosted MCP with Authorization: Bearer wh_...');
+      error.code = 'missing_key';
+      error.status = 401;
+      throw error;
+    }
+  }
+  async request(method, endpoint, body, options = {}) {
+    this.requireKey();
+    const headers = this.headers(options.headers || {});
+    let requestBody;
+    if (body instanceof FormData) {
+      requestBody = body;
+    } else if (body !== undefined) {
+      headers['Content-Type'] = 'application/json';
+      requestBody = JSON.stringify(body);
+    }
+    let response;
+    try {
+      response = await fetch(`${this.apiBase}${endpoint}`, { method, headers, body: requestBody });
+    } catch (error) {
+      const wrapped = new Error(`Network error calling Webhound ${method} ${endpoint}: ${error?.message || error}`);
+      wrapped.code = 'network_error';
+      throw wrapped;
+    }
+    const text = await response.text().catch(() => '');
+    let json = {};
+    try { json = text ? JSON.parse(text) : {}; } catch { json = { raw: text }; }
+    if (!response.ok) {
+      const message = json?.error || json?.message || json?.raw || `HTTP ${response.status}`;
+      const error = new Error(`Webhound ${response.status}: ${message}`);
+      error.status = response.status;
+      error.body = json;
+      throw error;
+    }
+    return json?.data !== undefined ? json.data : json;
+  }
+  get(endpoint) { return this.request('GET', endpoint); }
+  post(endpoint, body) { return this.request('POST', endpoint, body); }
+  patch(endpoint, body) { return this.request('PATCH', endpoint, body); }
+  webUrl(sessionId) {
+    return sessionUrl(this.appBase, sessionId);
+  }
+  apiUrl(endpoint) {
+    return `${this.apiBase}${endpoint.startsWith('/') ? endpoint : `/${endpoint}`}`;
+  }
+  async health() {
+    try {
+      const [health, credits, defaults, freeRun] = await Promise.all([
+        this.get('/health').catch(error => ({ error: error.message })),
+        this.get('/account/credits').catch(error => ({ error: error.message })),
+        this.get('/mcp/defaults').catch(error => ({ error: error.message })),
+        this.get('/mcp/free-run').catch(error => ({ error: error.message })),
+      ]);
+      return { authenticated: true, health, credits, defaults: defaults?.defaults || defaults, free_run: freeRun?.free_run || freeRun };
+    } catch (error) {
+      return { authenticated: false, error: error.message, code: error.code || null, status: error.status || null };
+    }
+  }
+  async getDefaults() {
+    const data = await this.get('/mcp/defaults');
+    return data.defaults || data;
+  }
+  async setDefaults(input) {
+    const data = await this.patch('/mcp/defaults', input);
+    return data.defaults || data;
+  }
+  async account() {
+    const [credits, usage, freeRun, defaults] = await Promise.all([
+      this.get('/account/credits'),
+      this.get('/account/usage?days=30'),
+      this.get('/mcp/free-run').catch(() => null),
+      this.get('/mcp/defaults').catch(() => null),
+    ]);
+    return {
+      credits,
+      usage,
+      free_run: freeRun?.free_run || null,
+      defaults: defaults?.defaults || null,
+    };
+  }
+  async startReport(args) {
+    const defaults = await this.getDefaults().catch(() => ({}));
+    const budget = Number(args.budget ?? defaults.default_budget_usd ?? 5);
+    const model = args.model || defaults.default_model || 'flash';
+    return this.post('/research', {
+      title: args.title || titleFromPrompt(args.prompt),
+      query: args.prompt,
+      budget,
+      model,
+      max_mode: !!args.max_mode,
+      output_instructions: args.output_instructions || undefined,
+      context_session_ids: args.context_session_ids || undefined,
+      file_ids: args.file_ids || undefined,
+      enable_checkpoints: args.enable_checkpoints,
+      use_free_run_when_available: args.use_free_run_when_available ?? defaults.use_free_run_when_available ?? true,
+    });
+  }
+  async startDataset(args) {
+    const defaults = await this.getDefaults().catch(() => ({}));
+    const budget = Number(args.budget ?? defaults.default_budget_usd ?? 5);
+    const model = args.model || defaults.default_model || 'flash';
+    return this.post('/extractions', {
+      title: args.title || titleFromPrompt(args.prompt),
+      query: args.prompt,
+      budget,
+      model,
+      max_mode: !!args.max_mode,
+      schema: args.schema || undefined,
+      context_session_ids: args.context_session_ids || undefined,
+      file_ids: args.file_ids || undefined,
+      enable_checkpoints: args.enable_checkpoints,
+      use_free_run_when_available: args.use_free_run_when_available ?? defaults.use_free_run_when_available ?? true,
+    });
+  }
+  async watch(sessionId) {
+    const [status, diagnostics] = await Promise.all([
+      this.get(`/sessions/${encodeURIComponent(sessionId)}/status`).catch(error => ({ error: error.message })),
+      this.get(`/sessions/${encodeURIComponent(sessionId)}/diagnostics`).catch(error => ({ error: error.message })),
+    ]);
+    return {
+      ...(diagnostics || {}),
+      status_snapshot: status,
+      url: this.webUrl(sessionId),
+    };
+  }
+  async wait(sessionId, { maxWaitSeconds = 90, pollIntervalSeconds = 10 } = {}) {
+    const deadline = Date.now() + Math.min(Math.max(Number(maxWaitSeconds) || 90, 1), 110) * 1000;
+    const interval = Math.min(Math.max(Number(pollIntervalSeconds) || 10, 3), 30) * 1000;
+    const snapshots = [];
+    while (true) {
+      const snapshot = await this.watch(sessionId);
+      snapshots.push(snapshot);
+      if (snapshot.done) return { ...snapshot, snapshots };
+      if (Date.now() + interval > deadline) return { ...snapshot, snapshots, still_running: true };
+      await new Promise(resolve => setTimeout(resolve, interval));
+    }
+  }
+  async sendMessage(sessionId, message) {
+    return this.post(`/sessions/${encodeURIComponent(sessionId)}/messages`, { message });
+  }
+  async stop(sessionId) {
+    return this.post(`/research/${encodeURIComponent(sessionId)}/stop`, {});
+  }
+  async resume(sessionId, args = {}) {
+    return this.post(`/research/${encodeURIComponent(sessionId)}/resume`, {
+      additional_budget: args.additional_budget,
+      guidance: args.guidance,
+      file_ids: args.file_ids,
+      context_session_ids: args.context_session_ids,
+    });
+  }
+  async addBudget(sessionId, args = {}) {
+    return this.post(`/research/${encodeURIComponent(sessionId)}/budget`, {
+      amount: args.amount,
+      guidance: args.guidance,
+      file_ids: args.file_ids,
+      context_session_ids: args.context_session_ids,
+    });
+  }
+  async getOutput(sessionId, args = {}) {
+    const kind = args.kind || 'auto';
+    if (kind === 'dataset') {
+      return this.get(`/sessions/${encodeURIComponent(sessionId)}/dataset`);
+    }
+    if (kind === 'auto') {
+      const overview = await this.get(`/sessions/${encodeURIComponent(sessionId)}`).catch(() => null);
+      if (overview?.session_type === 'extraction') return this.get(`/sessions/${encodeURIComponent(sessionId)}/dataset`);
+    }
+    const params = new URLSearchParams();
+    if (args.doc_name) params.set('doc_name', args.doc_name);
+    if (args.select) params.set('select', args.select);
+    const data = await this.get(`/sessions/${encodeURIComponent(sessionId)}/document${params.toString() ? `?${params}` : ''}`);
+    return { ...data, content_markdown: stripHtml(data.content || '') };
+  }
+  async exportSession(sessionId, args = {}) {
+    const params = new URLSearchParams();
+    if (args.format) params.set('format', args.format);
+    if (args.doc_name) params.set('doc_name', args.doc_name);
+    if (args.select) params.set('select', args.select);
+    const endpoint = `/sessions/${encodeURIComponent(sessionId)}/export${params.toString() ? `?${params}` : ''}`;
+    const data = await this.get(endpoint);
+    const downloadParams = new URLSearchParams(params);
+    downloadParams.set('download', 'true');
+    const downloadEndpoint = `/sessions/${encodeURIComponent(sessionId)}/export?${downloadParams}`;
+    return {
+      ...data,
+      download_url: this.apiUrl(downloadEndpoint),
+      download_note: 'Use this URL with Authorization: Bearer wh_... to download the artifact directly.',
+    };
+  }
+  async getClaims(sessionId) {
+    return this.get(`/sessions/${encodeURIComponent(sessionId)}/claims`);
+  }
+  async getSources(sessionId) {
+    return this.get(`/sessions/${encodeURIComponent(sessionId)}/sources`);
+  }
+  async listSessions(args = {}) {
+    const params = new URLSearchParams({
+      page: String(args.page || 1),
+      page_size: String(args.limit || 15),
+    });
+    if (args.type && args.type !== 'all') params.set('session_type', args.type);
+    if (args.status) params.set('status', args.status);
+    return this.get(`/sessions?${params}`);
+  }
+  async searchSessions(args = {}) {
+    const params = new URLSearchParams({
+      query: args.query,
+      limit: String(args.limit || 10),
+    });
+    return this.get(`/sessions/search?${params}`);
+  }
+  async getSession(sessionId) {
+    const [overview, watch] = await Promise.all([
+      this.get(`/sessions/${encodeURIComponent(sessionId)}`),
+      this.watch(sessionId),
+    ]);
+    return { ...overview, diagnostics: watch, url: this.webUrl(sessionId) };
+  }
+  async uploadFile(args = {}) {
+    const form = new FormData();
+    const fileName = args.file_name || (args.local_path ? path.basename(args.local_path) : 'webhound-input.txt');
+    let bytes;
+    if (args.local_path) {
+      bytes = await fs.readFile(args.local_path);
+    } else if (args.content_base64) {
+      bytes = Buffer.from(args.content_base64, 'base64');
+    } else if (args.text !== undefined) {
+      bytes = Buffer.from(String(args.text), 'utf8');
+    } else {
+      throw new Error('Provide local_path, content_base64, or text.');
+    }
+    form.append('file', new Blob([bytes], { type: args.mime_type || mimeFromFilename(fileName) }), fileName);
+    return this.request('POST', '/files/upload', form);
+  }
+}

package/package.json ADDED Viewed

@@ -0,0 +1,23 @@
+{
+  "name": "webhound-mcp",
+  "version": "0.2.1",
+  "description": "MCP server that lets agents run Webhound reports and datasets, watch long sessions, steer them, diagnose failures, and read cited outputs.",
+  "bin": {
+    "webhound-mcp": "bin/server.mjs"
+  },
+  "type": "module",
+  "engines": { "node": ">=18" },
+  "files": ["bin/", "core/", "README.md"],
+  "scripts": {
+    "self-test": "node bin/server.mjs --self-test",
+    "mcp:self-test": "node bin/server.mjs --self-test"
+  },
+  "dependencies": {
+    "@modelcontextprotocol/sdk": "^1.29.0",
+    "zod": "^3.23.0"
+  },
+  "keywords": ["mcp", "model-context-protocol", "agent", "research", "deep-research", "webhound", "claude", "cursor", "codex"],
+  "license": "MIT",
+  "repository": { "type": "git", "url": "https://github.com/webhound/webhound" },
+  "homepage": "https://webhound.ai"
+}