npm - openalex-mcp-server - Versions diffs - 1.0.1 - Mend

openalex-mcp-server 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

package/.claude/README.md +680 -0
package/.claude/commands/prd.md +138 -0
package/.claude/commands/ralph-yolo.md +346 -0
package/.claude/commands/ralph.md +226 -0
package/.claude/ralph-config.json +17 -0
package/.claude/scripts/prompt.md +108 -0
package/.claude/scripts/ralph.sh +127 -0
package/.claude/skills/prd.md +270 -0
package/.claude/skills/ralph-yolo.md +613 -0
package/.claude/skills/ralph.md +315 -0
package/.claude/templates/prd.json.example +64 -0
package/.env.example +8 -0
package/.github/workflows/npm-publish.yml +48 -0
package/README.md +525 -0
package/config/mcp-config.json +77 -0
package/docs/PRD.md +897 -0
package/docs/api-document.md +973 -0
package/docs/document-mcp.txt +1 -0
package/package.json +49 -0
package/prd-progress.txt +66 -0
package/src/cache-manager.js +204 -0
package/src/cli.js +47 -0
package/src/fulltext-downloader.js +333 -0
package/src/index.js +603 -0
package/src/json-optimizer.js +153 -0
package/src/openalex-client.js +305 -0
package/src/types/pdf-parse.d.ts +13 -0
package/src/utils.js +90 -0
package/tests/cli.test.js +31 -0
package/tsconfig.json +22 -0

package/docs/document-mcp.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+ https://docs.openalex.org/~gitbook/mcp

package/package.json ADDED Viewed

@@ -0,0 +1,49 @@
+{
+  "name": "openalex-mcp-server",
+  "version": "1.0.1",
+  "description": "轻量化的 OpenAlex MCP 服务器，为 AI Agent 提供快速学术文献检索、详细摘要信息和全文下载能力",
+  "main": "src/index.js",
+  "type": "module",
+  "scripts": {
+    "start": "node src/cli.js",
+    "release:patch": "npm version patch -m \"chore(release): v%s\"",
+    "release:minor": "npm version minor -m \"chore(release): v%s\"",
+    "release:major": "npm version major -m \"chore(release): v%s\"",
+    "typecheck": "tsc --noEmit",
+    "test": "node --test"
+  },
+  "bin": {
+    "openalex-mcp-server": "src/cli.js",
+    "openalex-mcp": "src/cli.js"
+  },
+  "engines": {
+    "node": ">=18"
+  },
+  "keywords": [
+    "mcp",
+    "openalex",
+    "academic",
+    "research",
+    "papers"
+  ],
+  "author": "https://github.com/SecretRichGarden/openAlex-mcp",
+  "license": "ISC",
+  "repository": {
+    "type": "git",
+    "url": "git+https://github.com/SecretRichGarden/openAlex-mcp.git"
+  },
+  "bugs": {
+    "url": "https://github.com/SecretRichGarden/openAlex-mcp/issues"
+  },
+  "homepage": "https://github.com/SecretRichGarden/openAlex-mcp#readme",
+  "dependencies": {
+    "@modelcontextprotocol/sdk": "^1.0.4",
+    "node-fetch": "^3.3.2",
+    "dotenv": "^16.4.5",
+    "pdf-parse": "^1.1.1"
+  },
+  "devDependencies": {
+    "@types/node": "^22.10.5",
+    "typescript": "^5.7.3"
+  }
+}

package/prd-progress.txt ADDED Viewed

@@ -0,0 +1,66 @@
+# Ralph YOLO Progress Log
+Started: Sun, Jan 25, 2026  3:09:23 PM
+---
+## US-001: 项目初始化和基础结构 ✅
+- 完成时间: $(date)
+- Agent: Full-stack-developer
+- 状态: 已完成
+- 实现内容:
+  - 创建 package.json (项目名: openalex-mcp-server, 版本: 1.0.0)
+  - 安装依赖: @modelcontextprotocol/sdk, node-fetch, dotenv
+  - 创建目录: src/, config/, cache/papers/, cache/fulltext/
+  - 创建配置文件: .env.example, .gitignore, tsconfig.json
+- 文件修改: package.json, .env.example, .gitignore, tsconfig.json
+- 学习点: TypeScript 配置已就绪，等待源代码文件
+---
+## US-002: 工具函数模块 ✅
+- 完成时间: $(date)
+- Agent: Full-stack-developer
+- 提交哈希: 3b8ec91
+- 状态: 已完成
+- 实现内容:
+  - 创建 src/utils.js (111 行代码)
+  - 实现 rebuildAbstract() - 从倒排索引重建摘要
+  - 实现 simplifyOpenAlexId() - 简化 OpenAlex URL
+  - 实现 sleep() - 异步延迟
+  - 实现 validateWorkId() - 验证 Work ID 格式
+  - 更新 tsconfig.json 支持 JavaScript 类型检查
+- 文件修改: src/utils.js (新建), tsconfig.json (更新)
+- 学习点: 配置 TypeScript 检查 JavaScript 文件需要 allowJs 和 checkJs
+---
+## US-003: 缓存管理器模块 ✅
+- 完成时间: $(date)
+- Agent: Full-stack-developer
+- 提交哈希: ad32123
+- 状态: 已完成
+- 实现内容:
+  - 创建 src/cache-manager.js (204 行代码)
+  - 实现 CacheManager 类，支持 get/set/has/delete/getStats 方法
+  - 缓存存储在 cache/papers/ 目录，JSON 格式
+  - 自动创建缓存目录，自动过期检查机制
+  - 提供详细的缓存统计功能
+- 文件修改: src/cache-manager.js (新建)
+- 学习点: 使用 import.meta.url 和 fileURLToPath 获取文件路径，实现智能过期检查
+---
+## US-004: OpenAlex API 客户端 - 基础请求 ✅
+- 完成时间: $(date)
+- Agent: Full-stack-developer
+- 提交哈希: 6f5e5f8
+- 状态: 已完成
+- 实现内容:
+  - 创建 src/openalex-client.js (85 行代码)
+  - 实现 OpenAlexClient 类，支持可选 API key 认证
+  - 实现 _request 私有方法，处理 GET 请求和查询参数
+  - 基础 URL: https://api.openalex.org
+  - 完整的 HTTP 错误处理和网络错误处理
+- 文件修改: src/openalex-client.js (新建)
+- 学习点: TypeScript 严格类型检查要求 catch 块中使用 instanceof Error 进行类型守卫
+---

package/src/cache-manager.js ADDED Viewed

@@ -0,0 +1,204 @@
+/**
+ * 缓存管理器模块
+ * 提供文件系统缓存管理功能，用于缓存论文元数据和全文
+ */
+import { promises as fs } from 'fs';
+import path from 'path';
+import { fileURLToPath } from 'url';
+const __filename = fileURLToPath(import.meta.url);
+const __dirname = path.dirname(__filename);
+/**
+ * 缓存管理器类
+ * 支持论文元数据和全文的缓存管理，避免重复 API 调用
+ */
+export class CacheManager {
+  /**
+   * 创建缓存管理器实例
+   * @param {string} [cacheDir='cache/papers'] - 缓存目录路径（相对于项目根目录）
+   */
+  constructor(cacheDir = 'cache/papers') {
+    // 从 src 目录回到项目根目录
+    this.cacheDir = path.resolve(__dirname, '..', cacheDir);
+    this._ensureCacheDirExists();
+  }
+  /**
+   * 确保缓存目录存在，如果不存在则自动创建
+   * @private
+   * @returns {Promise<void>}
+   */
+  async _ensureCacheDirExists() {
+    try {
+      await fs.access(this.cacheDir);
+    } catch (error) {
+      // 目录不存在，创建它
+      await fs.mkdir(this.cacheDir, { recursive: true });
+    }
+  }
+  /**
+   * 生成缓存文件路径
+   * @private
+   * @param {string} key - 缓存键
+   * @returns {string} 缓存文件的完整路径
+   */
+  _getCacheFilePath(key) {
+    // 将键转换为安全的文件名（移除特殊字符）
+    const safeKey = key.replace(/[^a-zA-Z0-9_-]/g, '_');
+    return path.join(this.cacheDir, `${safeKey}.json`);
+  }
+  /**
+   * 检查缓存是否存在且未过期
+   * @param {string} key - 缓存键
+   * @returns {Promise<boolean>} 缓存是否有效
+   */
+  async has(key) {
+    try {
+      const filePath = this._getCacheFilePath(key);
+      await fs.access(filePath);
+      // 读取缓存文件检查过期时间
+      const content = await fs.readFile(filePath, 'utf8');
+      const cache = JSON.parse(content);
+      // 检查是否过期
+      if (cache.expiresAt && new Date(cache.expiresAt) < new Date()) {
+        // 缓存已过期，删除它
+        await this.delete(key);
+        return false;
+      }
+      return true;
+    } catch (error) {
+      return false;
+    }
+  }
+  /**
+   * 从缓存中获取数据
+   * @param {string} key - 缓存键
+   * @returns {Promise<any|null>} 缓存的数据，如果不存在或已过期则返回 null
+   */
+  async get(key) {
+    try {
+      const filePath = this._getCacheFilePath(key);
+      const content = await fs.readFile(filePath, 'utf8');
+      const cache = JSON.parse(content);
+      // 检查是否过期
+      if (cache.expiresAt && new Date(cache.expiresAt) < new Date()) {
+        // 缓存已过期，删除它并返回 null
+        await this.delete(key);
+        return null;
+      }
+      return cache.data;
+    } catch (error) {
+      // 文件不存在或读取失败
+      return null;
+    }
+  }
+  /**
+   * 将数据存入缓存
+   * @param {string} key - 缓存键
+   * @param {any} data - 要缓存的数据
+   * @param {number} [expiryDays=30] - 缓存过期天数，默认 30 天
+   * @returns {Promise<void>}
+   */
+  async set(key, data, expiryDays = 30) {
+    await this._ensureCacheDirExists();
+    const filePath = this._getCacheFilePath(key);
+    // 计算过期时间
+    const expiresAt = new Date();
+    expiresAt.setDate(expiresAt.getDate() + expiryDays);
+    const cache = {
+      key,
+      data,
+      createdAt: new Date().toISOString(),
+      expiresAt: expiresAt.toISOString()
+    };
+    await fs.writeFile(filePath, JSON.stringify(cache, null, 2), 'utf8');
+  }
+  /**
+   * 删除指定的缓存
+   * @param {string} key - 缓存键
+   * @returns {Promise<boolean>} 是否成功删除
+   */
+  async delete(key) {
+    try {
+      const filePath = this._getCacheFilePath(key);
+      await fs.unlink(filePath);
+      return true;
+    } catch (error) {
+      // 文件不存在或删除失败
+      return false;
+    }
+  }
+  /**
+   * 获取缓存统计信息
+   * @returns {Promise<{totalFiles: number, totalSize: number, oldestCache: string|null, newestCache: string|null}>}
+   */
+  async getStats() {
+    try {
+      await this._ensureCacheDirExists();
+      const files = await fs.readdir(this.cacheDir);
+      const jsonFiles = files.filter(file => file.endsWith('.json'));
+      let totalSize = 0;
+      let oldestCache = null;
+      let newestCache = null;
+      let oldestTime = Infinity;
+      let newestTime = 0;
+      for (const file of jsonFiles) {
+        const filePath = path.join(this.cacheDir, file);
+        const stats = await fs.stat(filePath);
+        totalSize += stats.size;
+        const content = await fs.readFile(filePath, 'utf8');
+        try {
+          const cache = JSON.parse(content);
+          const createdTime = new Date(cache.createdAt).getTime();
+          if (createdTime < oldestTime) {
+            oldestTime = createdTime;
+            oldestCache = cache.createdAt;
+          }
+          if (createdTime > newestTime) {
+            newestTime = createdTime;
+            newestCache = cache.createdAt;
+          }
+        } catch (parseError) {
+          // 忽略无效的 JSON 文件
+        }
+      }
+      return {
+        totalFiles: jsonFiles.length,
+        totalSize,
+        oldestCache,
+        newestCache
+      };
+    } catch (error) {
+      return {
+        totalFiles: 0,
+        totalSize: 0,
+        oldestCache: null,
+        newestCache: null
+      };
+    }
+  }
+}

package/src/cli.js ADDED Viewed

@@ -0,0 +1,47 @@
+#!/usr/bin/env node
+/**
+ * CLI entry for OpenAlex MCP server (stdio).
+ */
+import { createRequire } from 'module';
+import path from 'path';
+import { fileURLToPath } from 'url';
+const require = createRequire(import.meta.url);
+const __filename = fileURLToPath(import.meta.url);
+const __dirname = path.dirname(__filename);
+const pkg = require(path.resolve(__dirname, '..', 'package.json'));
+function printHelp() {
+  const helpText = `
+OpenAlex MCP Server (stdio)
+Usage:
+  openalex-mcp-server [--help] [--version]
+  openalex-mcp [--help] [--version]
+  npx -y openalex-mcp-server
+Environment:
+  OPENALEX_API_KEY   Optional. Higher rate limit when provided.
+  CACHE_ENABLED      Optional. Defaults to true.
+  ABSTRACT_MODE      Optional. Defaults to quick.
+`;
+  console.log(helpText.trim());
+}
+const args = process.argv.slice(2);
+if (args.includes('-h') || args.includes('--help')) {
+  printHelp();
+  process.exit(0);
+}
+if (args.includes('-v') || args.includes('--version')) {
+  console.log(pkg.version);
+  process.exit(0);
+}
+try {
+  await import('./index.js');
+} catch (error) {
+  console.error('Failed to start server:', error);
+  process.exit(1);
+}

package/src/fulltext-downloader.js ADDED Viewed

@@ -0,0 +1,333 @@
+/**
+ * 全文下载器模块
+ * 提供论文全文下载、文本提取和章节识别功能
+ */
+import fetch from 'node-fetch';
+import { promises as fs } from 'fs';
+import path from 'path';
+import { fileURLToPath } from 'url';
+import pdfParse from 'pdf-parse';
+const __filename = fileURLToPath(import.meta.url);
+const __dirname = path.dirname(__filename);
+/**
+ * 全文下载器类
+ * 支持检测、下载和提取 OA 论文的全文内容
+ */
+export class FulltextDownloader {
+  /**
+   * 创建全文下载器实例
+   * @param {string} [cacheDir='cache/fulltext'] - 缓存目录路径（相对于项目根目录）
+   */
+  constructor(cacheDir = 'cache/fulltext') {
+    this.cacheDir = path.resolve(__dirname, '..', cacheDir);
+    this._ensureCacheDirExists();
+  }
+  /**
+   * 确保缓存目录存在
+   * @private
+   * @returns {Promise<void>}
+   */
+  async _ensureCacheDirExists() {
+    try {
+      await fs.access(this.cacheDir);
+    } catch (error) {
+      await fs.mkdir(this.cacheDir, { recursive: true });
+    }
+  }
+  /**
+   * 获取缓存文件路径
+   * @private
+   * @param {string} workId - 论文 ID
+   * @param {string} extension - 文件扩展名
+   * @returns {string} 缓存文件路径
+   */
+  _getCachePath(workId, extension) {
+    const safeId = workId.replace(/[^a-zA-Z0-9_-]/g, '_');
+    return path.join(this.cacheDir, `${safeId}.${extension}`);
+  }
+  /**
+   * 检测论文是否有可用的 OA 全文 URL
+   * @param {Object} workData - 论文数据（优化后的格式）
+   * @returns {Promise<Object>} 检测结果
+   * @example
+   * const result = await downloader.detectFulltext(workData);
+   * // { work_id, is_oa, oa_status, oa_url, fulltext_available }
+   */
+  async detectFulltext(workData) {
+    if (!workData || !workData.id) {
+      throw new Error('无效的论文数据');
+    }
+    const result = {
+      work_id: workData.id,
+      is_oa: false,
+      oa_status: null,
+      oa_url: null,
+      fulltext_available: false
+    };
+    // 检查开放访问信息
+    if (workData.open_access) {
+      result.is_oa = workData.open_access.is_oa || false;
+      result.oa_status = workData.open_access.oa_status || null;
+      result.oa_url = workData.open_access.oa_url || null;
+    }
+    // 检查是否有最佳 PDF URL
+    if (workData.best_pdf_url) {
+      result.oa_url = workData.best_pdf_url;
+      result.fulltext_available = true;
+    } else if (result.oa_url) {
+      result.fulltext_available = true;
+    }
+    return result;
+  }
+  /**
+   * 下载论文全文 PDF
+   * @param {string} workId - 论文 ID
+   * @param {string} oaUrl - OA 全文 URL
+   * @param {boolean} [forceDownload=false] - 是否强制重新下载
+   * @returns {Promise<Object>} 下载状态
+   * @example
+   * const result = await downloader.downloadFulltext('W3128609807', 'https://arxiv.org/pdf/2301.xxxxx.pdf');
+   * // { work_id, status, cache_path, file_size }
+   */
+  async downloadFulltext(workId, oaUrl, forceDownload = false) {
+    if (!workId || !oaUrl) {
+      throw new Error('workId 和 oaUrl 是必需参数');
+    }
+    const pdfPath = this._getCachePath(workId, 'pdf');
+    // 检查是否已缓存
+    if (!forceDownload) {
+      try {
+        await fs.access(pdfPath);
+        const stats = await fs.stat(pdfPath);
+        return {
+          work_id: workId,
+          status: 'cached',
+          cache_path: pdfPath,
+          file_size: stats.size
+        };
+      } catch (error) {
+        // 文件不存在，继续下载
+      }
+    }
+    try {
+      // 下载 PDF
+      const response = await fetch(oaUrl, {
+        headers: {
+          'User-Agent': 'openalex-mcp-server/1.0.0'
+        }
+      });
+      if (!response.ok) {
+        throw new Error(`下载失败: HTTP ${response.status} ${response.statusText}`);
+      }
+      // 检查 Content-Type
+      const contentType = response.headers.get('content-type');
+      if (!contentType || !contentType.includes('application/pdf')) {
+        console.warn(`警告: URL 可能不是 PDF 文件: ${contentType}`);
+      }
+      // 保存到缓存
+      await this._ensureCacheDirExists();
+      const buffer = await response.buffer();
+      await fs.writeFile(pdfPath, buffer);
+      return {
+        work_id: workId,
+        status: 'downloaded',
+        cache_path: pdfPath,
+        file_size: buffer.length
+      };
+    } catch (error) {
+      const errorMessage = error instanceof Error ? error.message : String(error);
+      return {
+        work_id: workId,
+        status: 'failed',
+        cache_path: null,
+        file_size: 0,
+        error: errorMessage
+      };
+    }
+  }
+  /**
+   * 从 PDF 中提取文本内容
+   * @param {string} pdfPath - PDF 文件路径
+   * @returns {Promise<string>} 提取的文本内容
+   * @example
+   * const text = await downloader.extractText('/path/to/paper.pdf');
+   */
+  async extractText(pdfPath) {
+    try {
+      const dataBuffer = await fs.readFile(pdfPath);
+      const data = await pdfParse(dataBuffer);
+      return data.text;
+    } catch (error) {
+      const errorMessage = error instanceof Error ? error.message : String(error);
+      throw new Error(`PDF 文本提取失败: ${errorMessage}`);
+    }
+  }
+  /**
+   * 尝试识别并提取论文章节
+   * @param {string} text - 论文全文文本
+   * @returns {Object} 提取的章节内容
+   * @example
+   * const sections = await downloader.extractSections(fullText);
+   * // { abstract: '', introduction: '', methods: '', results: '', discussion: '' }
+   */
+  extractSections(text) {
+    const sections = {
+      abstract: null,
+      introduction: null,
+      methods: null,
+      results: null,
+      discussion: null,
+      conclusion: null,
+      references: null
+    };
+    // 常见的章节标题模式（不区分大小写）
+    const patterns = {
+      abstract: /(?:^|\n)\s*(?:abstract|摘要|resumen)\s*\n/i,
+      introduction: /(?:^|\n)\s*(?:introduction|引言|introducción)\s*\n/i,
+      methods: /(?:^|\n)\s*(?:materials?\s+and\s+methods?|methodology|methods|方法|material\s+y\s+métodos)\s*\n/i,
+      results: /(?:^|\n)\s*(?:results|结果|resultados)\s*\n/i,
+      discussion: /(?:^|\n)\s*(?:discussion|讨论|discusión)\s*\n/i,
+      conclusion: /(?:^|\n)\s*(?:conclusion[s]?|结论|conclusión)\s*\n/i,
+      references: /(?:^|\n)\s*(?:references|bibliography|参考文献|referencias)\s*\n/i
+    };
+    // 找到所有章节的位置
+    const matches = [];
+    for (const [section, pattern] of Object.entries(patterns)) {
+      const match = text.match(pattern);
+      if (match) {
+        matches.push({
+          section,
+          index: match.index + match[0].length
+        });
+      }
+    }
+    // 按位置排序
+    matches.sort((a, b) => a.index - b.index);
+    // 提取每个章节的内容
+    for (let i = 0; i < matches.length; i++) {
+      const current = matches[i];
+      const next = matches[i + 1];
+      if (next) {
+        sections[current.section] = text.substring(current.index, next.index).trim();
+      } else {
+        sections[current.section] = text.substring(current.index).trim();
+      }
+    }
+    // 移除空值
+    for (const key of Object.keys(sections)) {
+      if (!sections[key] || sections[key].length === 0) {
+        delete sections[key];
+      }
+    }
+    return sections;
+  }
+  /**
+   * 提取并保存论文章节
+   * @param {string} workId - 论文 ID
+   * @returns {Promise<Object>} 提取结果
+   * @example
+   * const result = await downloader.extractAndSaveSections('W3128609807');
+   */
+  async extractAndSaveSections(workId) {
+    const pdfPath = this._getCachePath(workId, 'pdf');
+    const textPath = this._getCachePath(workId, 'txt');
+    const sectionsPath = this._getCachePath(workId, 'sections.json');
+    try {
+      // 检查 PDF 是否存在
+      await fs.access(pdfPath);
+      // 提取文本
+      let text;
+      try {
+        await fs.access(textPath);
+        text = await fs.readFile(textPath, 'utf8');
+      } catch (error) {
+        text = await this.extractText(pdfPath);
+        await fs.writeFile(textPath, text, 'utf8');
+      }
+      // 提取章节
+      const sections = this.extractSections(text);
+      await fs.writeFile(sectionsPath, JSON.stringify(sections, null, 2), 'utf8');
+      return {
+        work_id: workId,
+        status: 'success',
+        text_path: textPath,
+        sections_path: sectionsPath,
+        sections: Object.keys(sections)
+      };
+    } catch (error) {
+      const errorMessage = error instanceof Error ? error.message : String(error);
+      return {
+        work_id: workId,
+        status: 'failed',
+        error: errorMessage
+      };
+    }
+  }
+  /**
+   * 获取已提取的章节内容
+   * @param {string} workId - 论文 ID
+   * @param {string[]} [sections] - 要获取的章节列表（可选，默认返回所有）
+   * @returns {Promise<Object>} 章节内容
+   * @example
+   * const sections = await downloader.getSections('W3128609807', ['abstract', 'introduction']);
+   */
+  async getSections(workId, sections = null) {
+    const sectionsPath = this._getCachePath(workId, 'sections.json');
+    try {
+      const content = await fs.readFile(sectionsPath, 'utf8');
+      const allSections = JSON.parse(content);
+      // 如果指定了章节，只返回请求的章节
+      if (sections && Array.isArray(sections)) {
+        const result = {};
+        for (const section of sections) {
+          if (allSections[section]) {
+            result[section] = allSections[section];
+          }
+        }
+        return result;
+      }
+      return allSections;
+    } catch (error) {
+      throw new Error(`无法读取章节文件: ${error instanceof Error ? error.message : String(error)}`);
+    }
+  }
+}