npm - openscholar-mcp - Versions diffs - 1.0.0 - Mend

openscholar-mcp 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (2) hide show

package/mcp_server.js +424 -0
package/package.json +16 -0

package/mcp_server.js ADDED Viewed

@@ -0,0 +1,424 @@
+#!/usr/bin/env node
+"use strict";
+/*
+ * OpenScholar MCP Server (Node.js)
+ *
+ * Dependency-free stdio MCP wrapper for Claude Code. It exposes the same tools
+ * as mcp_server.py and forwards all tool calls to serve_openscholar.py:
+ *
+ *   Claude Code <-> this file (stdio/MCP) <-> http://127.0.0.1:8006/api/mcp/tool
+ *
+ * Requires Node.js 18+ for built-in fetch.
+ */
+const OPEN_SCHOLAR_URL = process.env.OPEN_SCHOLAR_URL || "http://127.0.0.1:8006";
+const PROBES = Number.parseInt(process.env.PROBES || "20", 10);
+const TOP_N = Number.parseInt(process.env.TOP_N || "8", 10);
+const REQUEST_TIMEOUT_MS = 120000;
+function log(message) {
+  process.stderr.write(`[openScholar MCP] ${message}\n`);
+}
+function jsonText(value) {
+  return JSON.stringify(value);
+}
+async function postJson(url, payload, timeoutMs = REQUEST_TIMEOUT_MS) {
+  if (typeof fetch !== "function") {
+    throw new Error("Node.js 18+ is required because this server uses built-in fetch.");
+  }
+  const controller = new AbortController();
+  const timer = setTimeout(() => controller.abort(), timeoutMs);
+  try {
+    const response = await fetch(url, {
+      method: "POST",
+      headers: {"Content-Type": "application/json"},
+      body: JSON.stringify(payload),
+      signal: controller.signal,
+    });
+    const text = await response.text();
+    if (!response.ok) {
+      throw new Error(`HTTP ${response.status} from ${url}: ${text}`);
+    }
+    return text ? JSON.parse(text) : {};
+  } finally {
+    clearTimeout(timer);
+  }
+}
+async function checkBackend() {
+  if (typeof fetch !== "function") {
+    log("Node.js 18+ is required because this server uses built-in fetch.");
+    return;
+  }
+  const controller = new AbortController();
+  const timer = setTimeout(() => controller.abort(), 5000);
+  try {
+    const response = await fetch(`${OPEN_SCHOLAR_URL}/api/health`, {
+      method: "GET",
+      signal: controller.signal,
+    });
+    if (response.status === 204) {
+      log(`Backend service reachable at ${OPEN_SCHOLAR_URL}`);
+    } else {
+      log(`Backend service returned HTTP ${response.status} at ${OPEN_SCHOLAR_URL}`);
+    }
+  } catch (error) {
+    log(`Backend service not reachable at ${OPEN_SCHOLAR_URL}. Ensure serve_openscholar.py is running before using MCP tools.`);
+  } finally {
+    clearTimeout(timer);
+  }
+}
+async function backendTool(tool, arguments_, timeoutMs = REQUEST_TIMEOUT_MS) {
+  try {
+    const response = await postJson(
+      `${OPEN_SCHOLAR_URL}/api/mcp/tool`,
+      {tool, arguments: arguments_},
+      timeoutMs,
+    );
+    return response.result;
+  } catch (error) {
+    const message = error && error.name === "AbortError"
+      ? `${tool} request timed out.`
+      : `${tool} failed: ${error && error.message ? error.message : String(error)}`;
+    return jsonText({error: message});
+  }
+}
+function schema(properties, required = []) {
+  return {
+    type: "object",
+    properties,
+    required,
+    additionalProperties: false,
+  };
+}
+const stringSchema = (description, defaultValue) => {
+  const item = {type: "string", description};
+  if (defaultValue !== undefined) item.default = defaultValue;
+  return item;
+};
+const integerSchema = (description, defaultValue) => {
+  const item = {type: "integer", description};
+  if (defaultValue !== undefined) item.default = defaultValue;
+  return item;
+};
+const booleanSchema = (description, defaultValue) => {
+  const item = {type: "boolean", description};
+  if (defaultValue !== undefined) item.default = defaultValue;
+  return item;
+};
+const stringArraySchema = (description) => ({
+  type: "array",
+  items: {type: "string"},
+  description,
+  default: [],
+});
+const integerArraySchema = (description) => ({
+  type: "array",
+  items: {type: "integer"},
+  description,
+  default: [],
+});
+const tools = [
+  {
+    name: "search_papers",
+    description: `Search academic literature with dense retrieval + cross-encoder reranking.
+Retrieves passages from PES2O (scientific corpus), reranks them with a cross-encoder model, and returns top-N results with metadata.
+IMPORTANT: PES2O database is English-only. Translate Chinese or non-English queries into English before searching.`,
+    inputSchema: schema({
+      query: stringSchema("The search query in English. Translate non-English queries first."),
+      top_n: integerSchema("Number of top results to return.", TOP_N),
+      probes: integerSchema("ivfflat probes for PES2O retrieval; controls search width.", PROBES),
+    }, ["query"]),
+    call: (args) => backendTool("search_papers", {
+      query: args.query,
+      top_n: args.top_n ?? TOP_N,
+      probes: args.probes ?? PROBES,
+    }),
+  },
+  {
+    name: "get_paper_details",
+    description: `Get bibliographic metadata for formatting reference entries.
+Use this for standard reference formats such as GB/T 7714, APA, MLA, and BibTeX-style entries. It is not intended as evidence text for LLM generation.`,
+    inputSchema: schema({
+      corpus_ids: integerArraySchema("Paper corpus IDs from search results."),
+      dois: stringArraySchema("DOIs from search results; recommended for best metadata."),
+      titles: stringArraySchema("Paper titles from search results; recommended for best metadata."),
+    }),
+    call: (args) => backendTool("get_paper_details", {
+      corpus_ids: args.corpus_ids || [],
+      dois: args.dois || [],
+      titles: args.titles || [],
+    }, 30000),
+  },
+  {
+    name: "generate_review",
+    description: `Generate a comprehensive literature review prompt with citations on a research topic.
+This is the main tool for writing academic reviews. It searches PES2O, reranks results, enriches metadata, and returns a complete prompt with references. Claude must then write the review from that prompt.`,
+    inputSchema: schema({
+      question: stringSchema("The research question to review. Can be Chinese or English."),
+      top_n: integerSchema("Number of top passages to include as references.", TOP_N),
+      probes: integerSchema("ivfflat probes for PES2O retrieval; controls search width.", PROBES),
+      language: stringSchema("Output language: en for English, cn for Chinese.", "en"),
+      zero_shot: booleanSchema("If true, skip few-shot examples.", false),
+    }, ["question"]),
+    call: (args) => backendTool("generate_review", {
+      question: args.question,
+      top_n: args.top_n ?? TOP_N,
+      probes: args.probes ?? PROBES,
+      language: args.language || "en",
+      zero_shot: args.zero_shot ?? false,
+    }),
+  },
+  {
+    name: "generate_related_work",
+    description: `Search literature and build a prompt for generating a related work section.
+Given a paper abstract, this tool searches for related papers and returns a prompt for writing related work with citations.`,
+    inputSchema: schema({
+      abstract: stringSchema("The abstract of the paper. English is recommended for best retrieval."),
+      top_n: integerSchema("Number of related passages to include.", TOP_N),
+      probes: integerSchema("ivfflat probes for PES2O retrieval; controls search width.", PROBES),
+      language: stringSchema("Output language: en for English, cn for Chinese.", "en"),
+    }, ["abstract"]),
+    call: (args) => backendTool("generate_related_work", {
+      abstract: args.abstract,
+      top_n: args.top_n ?? TOP_N,
+      probes: args.probes ?? PROBES,
+      language: args.language || "en",
+    }),
+  },
+  {
+    name: "generate_feedback",
+    description: "Build a prompt for evaluating an answer and generating improvement suggestions.",
+    inputSchema: schema({
+      question: stringSchema("The original research question."),
+      answer: stringSchema("The current answer to evaluate."),
+      language: stringSchema("Output language: en for English, cn for Chinese.", "en"),
+    }, ["question", "answer"]),
+    call: (args) => backendTool("generate_feedback", {
+      question: args.question,
+      answer: args.answer,
+      language: args.language || "en",
+    }, 30000),
+  },
+  {
+    name: "edit_with_feedback",
+    description: "Build a prompt to incorporate feedback and improve an answer.",
+    inputSchema: schema({
+      question: stringSchema("The original research question."),
+      answer: stringSchema("The current answer."),
+      feedback: stringSchema("The feedback to incorporate."),
+      new_references: stringSchema("Additional references from supplemental retrieval.", ""),
+      language: stringSchema("Output language: en for English, cn for Chinese.", "en"),
+    }, ["question", "answer", "feedback"]),
+    call: (args) => backendTool("edit_with_feedback", {
+      question: args.question,
+      answer: args.answer,
+      feedback: args.feedback,
+      new_references: args.new_references || "",
+      language: args.language || "en",
+    }, 30000),
+  },
+  {
+    name: "add_citations",
+    description: "Build a prompt for post-hoc citation insertion into an uncited paragraph.",
+    inputSchema: schema({
+      paragraph: stringSchema("The paragraph without citations."),
+      references: stringSchema("Reference passages formatted as [N] Title: ... Text: ..."),
+      language: stringSchema("Output language: en for English, cn for Chinese.", "en"),
+    }, ["paragraph", "references"]),
+    call: (args) => backendTool("add_citations", {
+      paragraph: args.paragraph,
+      references: args.references,
+      language: args.language || "en",
+    }, 30000),
+  },
+  {
+    name: "refine_answer",
+    description: "Build a prompt for final answer refinement while preserving citations.",
+    inputSchema: schema({
+      question: stringSchema("The original research question."),
+      answer: stringSchema("The answer to refine."),
+      language: stringSchema("Output language: en for English, cn for Chinese.", "en"),
+    }, ["question", "answer"]),
+    call: (args) => backendTool("refine_answer", {
+      question: args.question,
+      answer: args.answer,
+      language: args.language || "en",
+    }, 30000),
+  },
+];
+const toolsByName = new Map(tools.map((tool) => [tool.name, tool]));
+function listTools() {
+  return tools.map((tool) => ({
+    name: tool.name,
+    description: tool.description,
+    inputSchema: tool.inputSchema,
+  }));
+}
+function response(id, result) {
+  return {jsonrpc: "2.0", id, result};
+}
+function errorResponse(id, code, message) {
+  return {jsonrpc: "2.0", id, error: {code, message}};
+}
+function writeMessage(message) {
+  const payload = Buffer.from(JSON.stringify(message), "utf8");
+  process.stdout.write(`Content-Length: ${payload.length}\r\n\r\n`);
+  process.stdout.write(payload);
+}
+async function callTool(params) {
+  const name = params && params.name;
+  const args = (params && params.arguments) || {};
+  const tool = toolsByName.get(name);
+  if (!tool) {
+    return {
+      content: [{type: "text", text: `Unknown tool: ${name}`}],
+      isError: true,
+    };
+  }
+  try {
+    const result = await tool.call(args);
+    return {
+      content: [{type: "text", text: String(result)}],
+      isError: false,
+    };
+  } catch (error) {
+    log(`Tool ${name} failed: ${error && error.stack ? error.stack : error}`);
+    return {
+      content: [{type: "text", text: `${error && error.name ? error.name : "Error"}: ${error && error.message ? error.message : String(error)}`}],
+      isError: true,
+    };
+  }
+}
+async function handleMessage(message) {
+  const id = message.id;
+  if (id === undefined || id === null) {
+    return;
+  }
+  try {
+    const params = message.params || {};
+    switch (message.method) {
+      case "initialize":
+        writeMessage(response(id, {
+          protocolVersion: params.protocolVersion || "2024-11-05",
+          capabilities: {tools: {listChanged: false}},
+          serverInfo: {name: "openScholar", version: "1.0.0-node"},
+        }));
+        break;
+      case "ping":
+        writeMessage(response(id, {}));
+        break;
+      case "tools/list":
+        writeMessage(response(id, {tools: listTools()}));
+        break;
+      case "tools/call":
+        writeMessage(response(id, await callTool(params)));
+        break;
+      case "resources/list":
+        writeMessage(response(id, {resources: []}));
+        break;
+      case "prompts/list":
+        writeMessage(response(id, {prompts: []}));
+        break;
+      default:
+        writeMessage(errorResponse(id, -32601, `Method not found: ${message.method}`));
+        break;
+    }
+  } catch (error) {
+    log(`Failed to handle MCP message: ${error && error.stack ? error.stack : error}`);
+    writeMessage(errorResponse(id, -32603, "Internal error"));
+  }
+}
+let inputBuffer = Buffer.alloc(0);
+let processingQueue = Promise.resolve();
+function enqueueMessage(message) {
+  processingQueue = processingQueue
+    .then(() => handleMessage(message))
+    .catch((error) => {
+      log(`Failed to process queued MCP message: ${error && error.stack ? error.stack : error}`);
+    });
+}
+function parseMessages() {
+  while (true) {
+    const headerEnd = inputBuffer.indexOf("\r\n\r\n");
+    const altHeaderEnd = inputBuffer.indexOf("\n\n");
+    let separatorLength = 4;
+    let index = headerEnd;
+    if (index < 0 || (altHeaderEnd >= 0 && altHeaderEnd < index)) {
+      index = altHeaderEnd;
+      separatorLength = 2;
+    }
+    if (index < 0) {
+      return;
+    }
+    const headerText = inputBuffer.slice(0, index).toString("utf8");
+    const match = headerText.match(/content-length:\s*(\d+)/i);
+    if (!match) {
+      inputBuffer = inputBuffer.slice(index + separatorLength);
+      continue;
+    }
+    const length = Number.parseInt(match[1], 10);
+    const messageStart = index + separatorLength;
+    const messageEnd = messageStart + length;
+    if (inputBuffer.length < messageEnd) {
+      return;
+    }
+    const payload = inputBuffer.slice(messageStart, messageEnd).toString("utf8");
+    inputBuffer = inputBuffer.slice(messageEnd);
+    try {
+      const message = JSON.parse(payload);
+      enqueueMessage(message);
+    } catch (error) {
+      log(`Failed to parse MCP message: ${error && error.stack ? error.stack : error}`);
+    }
+  }
+}
+process.stdin.on("data", (chunk) => {
+  inputBuffer = Buffer.concat([inputBuffer, chunk]);
+  parseMessages();
+});
+process.stdin.on("end", async () => {
+  await processingQueue;
+  process.exit(0);
+});
+checkBackend();

package/package.json ADDED Viewed

@@ -0,0 +1,16 @@
+{
+  "name": "openscholar-mcp",
+  "version": "1.0.0",
+  "description": "OpenScholar MCP Server - academic literature search and review for Claude Code",
+  "main": "mcp_server.js",
+  "bin": {
+    "openscholar-mcp": "mcp_server.js"
+  },
+  "files": [
+    "mcp_server.js"
+  ],
+  "engines": {
+    "node": ">=18"
+  },
+  "license": "MIT"
+}