cnki-mcp-server 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,27 @@
1
+ name: Publish to PyPI
2
+
3
+ on:
4
+ release:
5
+ types: [published]
6
+
7
+ jobs:
8
+ publish:
9
+ runs-on: ubuntu-latest
10
+ permissions:
11
+ id-token: write
12
+
13
+ steps:
14
+ - uses: actions/checkout@v4
15
+
16
+ - name: Set up Python
17
+ uses: actions/setup-python@v5
18
+ with:
19
+ python-version: "3.11"
20
+
21
+ - name: Build
22
+ run: |
23
+ pip install hatchling
24
+ python -m hatchling build
25
+
26
+ - name: Publish to PyPI
27
+ uses: pypa/gh-action-pypi-publish@release/v1
@@ -0,0 +1,36 @@
1
+ name: Test
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+ pull_request:
7
+ branches: [main]
8
+
9
+ jobs:
10
+ test:
11
+ strategy:
12
+ fail-fast: false
13
+ matrix:
14
+ python-version: ["3.10", "3.11", "3.12", "3.13"]
15
+ os: [ubuntu-latest]
16
+
17
+ runs-on: ${{ matrix.os }}
18
+
19
+ steps:
20
+ - uses: actions/checkout@v4
21
+
22
+ - name: Set up Python ${{ matrix.python-version }}
23
+ uses: actions/setup-python@v5
24
+ with:
25
+ python-version: ${{ matrix.python-version }}
26
+
27
+ - name: Install dependencies
28
+ run: |
29
+ python -m pip install --upgrade pip
30
+ pip install -e ".[dev]"
31
+
32
+ - name: Lint
33
+ run: ruff check cnki_mcp/ tests/
34
+
35
+ - name: Unit tests
36
+ run: pytest tests/ -v
@@ -0,0 +1,12 @@
1
+ __pycache__/
2
+ *.pyc
3
+ *.egg-info/
4
+ dist/
5
+ build/
6
+ .env
7
+ .venv/
8
+ venv/
9
+ *.log
10
+ .pytest_cache/
11
+ .ruff_cache/
12
+ .playwright/
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 CNKI MCP Contributors
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,129 @@
1
+ Metadata-Version: 2.4
2
+ Name: cnki-mcp-server
3
+ Version: 0.2.0
4
+ Summary: CNKI (中国知网) MCP Server — 通过 Model Context Protocol 检索中文学术论文
5
+ Author: CNKI MCP Contributors
6
+ License: MIT
7
+ License-File: LICENSE
8
+ Keywords: chinese-academic,cnki,fastmcp,mcp,playwright,知网
9
+ Classifier: Development Status :: 4 - Beta
10
+ Classifier: License :: OSI Approved :: MIT License
11
+ Classifier: Programming Language :: Python :: 3
12
+ Classifier: Programming Language :: Python :: 3.10
13
+ Classifier: Programming Language :: Python :: 3.11
14
+ Classifier: Programming Language :: Python :: 3.12
15
+ Classifier: Programming Language :: Python :: 3.13
16
+ Classifier: Topic :: Internet :: WWW/HTTP
17
+ Classifier: Topic :: Scientific/Engineering
18
+ Requires-Python: >=3.10
19
+ Requires-Dist: fastmcp>=2.0.0
20
+ Requires-Dist: playwright>=1.40.0
21
+ Provides-Extra: dev
22
+ Requires-Dist: pytest-asyncio>=0.23; extra == 'dev'
23
+ Requires-Dist: pytest>=7.0; extra == 'dev'
24
+ Requires-Dist: ruff>=0.3.0; extra == 'dev'
25
+ Description-Content-Type: text/markdown
26
+
27
+ # CNKI MCP Server
28
+
29
+ [![PyPI version](https://img.shields.io/pypi/v/cnki-mcp-server.svg)](https://pypi.org/project/cnki-mcp-server/)
30
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
31
+
32
+ **CNKI (中国知网) MCP Server** — 通过 [Model Context Protocol (MCP)](https://modelcontextprotocol.io/) 为 AI Agent 提供中文学术论文检索能力。
33
+
34
+ ## 功能
35
+
36
+ | 工具 | 说明 | 需要浏览器 |
37
+ |------|------|-----------|
38
+ | `search_cnki` | 搜索 CNKI 论文,支持多页、多种搜索类型和排序 | 是 |
39
+ | `get_paper_detail` | 获取论文详情(标题、摘要、作者、关键词、DOI 等 17 字段) | 是 |
40
+ | `find_best_match` | 快速匹配论文标题,验证引用信息 | 是 |
41
+ | `format_citation` | 引文格式化(GB/T 7714, APA, MLA, Chicago, Vancouver) | 否 |
42
+ | `browse_journals` | 期刊浏览(学科分类、期刊搜索、最新文章) | 是 |
43
+ | `export_papers` | 批量导出(CSV, JSON, BibTeX, RIS) | 否 |
44
+
45
+ ### 搜索类型
46
+
47
+ 支持 15 种搜索类型:主题、关键词、篇名、作者、作者单位、全文、DOI、基金、摘要等(中英文别名均可)。
48
+
49
+ ### 排序方式
50
+
51
+ 相关度 / 发表时间 / 被引 / 下载 / 综合(支持英文别名:relevance, date, cited, download, composite)。
52
+
53
+ ## 安装
54
+
55
+ ```bash
56
+ pip install cnki-mcp-server
57
+ python -m playwright install chromium
58
+ ```
59
+
60
+ > **注意**: Playwright Chromium 约 300MB,首次安装需要下载,后续无需重复安装。
61
+
62
+ ## 使用
63
+
64
+ ### Claude Code
65
+
66
+ 在 `.claude/settings.json` 或 Claude Code 的 MCP 配置中添加:
67
+
68
+ ```json
69
+ {
70
+ "mcpServers": {
71
+ "cnki": {
72
+ "command": "python",
73
+ "args": ["-m", "cnki_mcp"]
74
+ }
75
+ }
76
+ }
77
+ ```
78
+
79
+ ### 命令行直接使用
80
+
81
+ ```bash
82
+ python -m cnki_mcp
83
+ ```
84
+
85
+ ## 要求
86
+
87
+ - Python >= 3.10
88
+ - Playwright Chromium(首次使用时自动安装)
89
+
90
+ ## 引文格式
91
+
92
+ | 风格 | 标准 | 适用场景 |
93
+ |------|------|----------|
94
+ | `gbt7714` | GB/T 7714-2015 | 中文学位论文、中文期刊 |
95
+ | `apa` | APA 7th Edition | 心理学、教育学、社会科学 |
96
+ | `mla` | MLA 9th Edition | 语言文学、人文学科 |
97
+ | `chicago` | Chicago Notes & Bibliography | 历史学、艺术学 |
98
+ | `vancouver` | Vancouver/ICMJE | 生物医学、临床医学 |
99
+
100
+ ## 导出格式
101
+
102
+ | 格式 | 适用软件 |
103
+ |------|----------|
104
+ | JSON | 编程处理、数据分析 |
105
+ | CSV | Excel、Google Sheets |
106
+ | BibTeX | LaTeX、Zotero、JabRef |
107
+ | RIS | EndNote、Mendeley、Zotero |
108
+
109
+ ## 技术实现
110
+
111
+ - **引擎**: Playwright(自带签名 Chromium,消除 macOS codesign 问题,跨平台零配置)
112
+ - **MCP 框架**: FastMCP
113
+ - **并发**: 原生 async/await
114
+ - **反检测**: 随机 User-Agent、模拟人类输入、navigator.webdriver 覆写
115
+ - **会话复用**: 共享 BrowserContext,Cookie 互通,避免 CNKI 验证码
116
+
117
+ ## 开发
118
+
119
+ ```bash
120
+ git clone https://github.com/xxxxchaos/cnki-mcp-server.git
121
+ cd cnki-mcp-server
122
+ pip install -e ".[dev]"
123
+ python -m playwright install chromium
124
+ pytest tests/ -v
125
+ ```
126
+
127
+ ## 许可
128
+
129
+ MIT License
@@ -0,0 +1,103 @@
1
+ # CNKI MCP Server
2
+
3
+ [![PyPI version](https://img.shields.io/pypi/v/cnki-mcp-server.svg)](https://pypi.org/project/cnki-mcp-server/)
4
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
5
+
6
+ **CNKI (中国知网) MCP Server** — 通过 [Model Context Protocol (MCP)](https://modelcontextprotocol.io/) 为 AI Agent 提供中文学术论文检索能力。
7
+
8
+ ## 功能
9
+
10
+ | 工具 | 说明 | 需要浏览器 |
11
+ |------|------|-----------|
12
+ | `search_cnki` | 搜索 CNKI 论文,支持多页、多种搜索类型和排序 | 是 |
13
+ | `get_paper_detail` | 获取论文详情(标题、摘要、作者、关键词、DOI 等 17 字段) | 是 |
14
+ | `find_best_match` | 快速匹配论文标题,验证引用信息 | 是 |
15
+ | `format_citation` | 引文格式化(GB/T 7714, APA, MLA, Chicago, Vancouver) | 否 |
16
+ | `browse_journals` | 期刊浏览(学科分类、期刊搜索、最新文章) | 是 |
17
+ | `export_papers` | 批量导出(CSV, JSON, BibTeX, RIS) | 否 |
18
+
19
+ ### 搜索类型
20
+
21
+ 支持 15 种搜索类型:主题、关键词、篇名、作者、作者单位、全文、DOI、基金、摘要等(中英文别名均可)。
22
+
23
+ ### 排序方式
24
+
25
+ 相关度 / 发表时间 / 被引 / 下载 / 综合(支持英文别名:relevance, date, cited, download, composite)。
26
+
27
+ ## 安装
28
+
29
+ ```bash
30
+ pip install cnki-mcp-server
31
+ python -m playwright install chromium
32
+ ```
33
+
34
+ > **注意**: Playwright Chromium 约 300MB,首次安装需要下载,后续无需重复安装。
35
+
36
+ ## 使用
37
+
38
+ ### Claude Code
39
+
40
+ 在 `.claude/settings.json` 或 Claude Code 的 MCP 配置中添加:
41
+
42
+ ```json
43
+ {
44
+ "mcpServers": {
45
+ "cnki": {
46
+ "command": "python",
47
+ "args": ["-m", "cnki_mcp"]
48
+ }
49
+ }
50
+ }
51
+ ```
52
+
53
+ ### 命令行直接使用
54
+
55
+ ```bash
56
+ python -m cnki_mcp
57
+ ```
58
+
59
+ ## 要求
60
+
61
+ - Python >= 3.10
62
+ - Playwright Chromium(首次使用时自动安装)
63
+
64
+ ## 引文格式
65
+
66
+ | 风格 | 标准 | 适用场景 |
67
+ |------|------|----------|
68
+ | `gbt7714` | GB/T 7714-2015 | 中文学位论文、中文期刊 |
69
+ | `apa` | APA 7th Edition | 心理学、教育学、社会科学 |
70
+ | `mla` | MLA 9th Edition | 语言文学、人文学科 |
71
+ | `chicago` | Chicago Notes & Bibliography | 历史学、艺术学 |
72
+ | `vancouver` | Vancouver/ICMJE | 生物医学、临床医学 |
73
+
74
+ ## 导出格式
75
+
76
+ | 格式 | 适用软件 |
77
+ |------|----------|
78
+ | JSON | 编程处理、数据分析 |
79
+ | CSV | Excel、Google Sheets |
80
+ | BibTeX | LaTeX、Zotero、JabRef |
81
+ | RIS | EndNote、Mendeley、Zotero |
82
+
83
+ ## 技术实现
84
+
85
+ - **引擎**: Playwright(自带签名 Chromium,消除 macOS codesign 问题,跨平台零配置)
86
+ - **MCP 框架**: FastMCP
87
+ - **并发**: 原生 async/await
88
+ - **反检测**: 随机 User-Agent、模拟人类输入、navigator.webdriver 覆写
89
+ - **会话复用**: 共享 BrowserContext,Cookie 互通,避免 CNKI 验证码
90
+
91
+ ## 开发
92
+
93
+ ```bash
94
+ git clone https://github.com/xxxxchaos/cnki-mcp-server.git
95
+ cd cnki-mcp-server
96
+ pip install -e ".[dev]"
97
+ python -m playwright install chromium
98
+ pytest tests/ -v
99
+ ```
100
+
101
+ ## 许可
102
+
103
+ MIT License
@@ -0,0 +1,5 @@
1
+ __version__ = "0.2.0"
2
+
3
+ from cnki_mcp.server import mcp, main
4
+
5
+ __all__ = ["mcp", "main", "__version__"]
@@ -0,0 +1,5 @@
1
+ """python -m cnki_mcp 入口"""
2
+
3
+ from cnki_mcp.server import main
4
+
5
+ main()
@@ -0,0 +1,169 @@
1
+ """
2
+ AsyncPlaywright 浏览器池管理。
3
+
4
+ 特性:
5
+ - 懒初始化:首次调用时才启动浏览器
6
+ - 实例复用:多次调用共享同一 browser 实例
7
+ - 共享 BrowserContext:同一 context 内创建 Page,Cookie 互通
8
+ - 空闲超时关闭(600s)
9
+ - asyncio.Lock 保护并发访问
10
+ """
11
+
12
+ import asyncio
13
+ import random
14
+ import subprocess
15
+ import sys
16
+ import time
17
+ from typing import Optional
18
+
19
+ from playwright.async_api import Browser, BrowserContext, Page, async_playwright
20
+
21
+ from cnki_mcp.config import (
22
+ BROWSER_TIMEOUT,
23
+ IDLE_TIMEOUT,
24
+ USER_AGENTS,
25
+ )
26
+ from cnki_mcp.exceptions import BrowserError
27
+
28
+
29
+ class AsyncBrowserPool:
30
+ """异步 Playwright 浏览器池(共享 BrowserContext,Cookie 互通)"""
31
+
32
+ def __init__(self) -> None:
33
+ self._playwright = None
34
+ self._browser: Optional[Browser] = None
35
+ self._context: Optional[BrowserContext] = None
36
+ self._last_used: float = 0
37
+ self._lock = asyncio.Lock()
38
+
39
+ async def _ensure_browser(self) -> Browser:
40
+ """确保浏览器实例可用"""
41
+ if self._browser is not None:
42
+ if time.time() - self._last_used > IDLE_TIMEOUT:
43
+ await self._close_browser()
44
+ elif not self._browser.is_connected():
45
+ self._browser = None
46
+ self._context = None
47
+
48
+ if self._browser is None:
49
+ try:
50
+ self._playwright = await async_playwright().start()
51
+ self._browser = await self._playwright.chromium.launch(
52
+ headless=True,
53
+ args=[
54
+ "--disable-blink-features=AutomationControlled",
55
+ "--no-sandbox",
56
+ "--disable-dev-shm-usage",
57
+ "--disable-gpu",
58
+ "--disable-infobars",
59
+ "--disable-extensions",
60
+ ],
61
+ )
62
+ except Exception as e:
63
+ if "Executable doesn't exist" in str(e) or "playwright" in str(e).lower():
64
+ await self._install_browser()
65
+ self._playwright = await async_playwright().start()
66
+ self._browser = await self._playwright.chromium.launch(
67
+ headless=True,
68
+ args=[
69
+ "--disable-blink-features=AutomationControlled",
70
+ "--no-sandbox",
71
+ "--disable-dev-shm-usage",
72
+ ],
73
+ )
74
+ else:
75
+ raise BrowserError(f"浏览器启动失败: {e}") from e
76
+
77
+ if self._browser is None:
78
+ raise BrowserError("浏览器启动失败")
79
+
80
+ # 创建共享的 BrowserContext
81
+ self._context = await self._browser.new_context(
82
+ user_agent=random.choice(USER_AGENTS),
83
+ viewport={"width": 1920, "height": 1080},
84
+ locale="zh-CN",
85
+ )
86
+ await self._context.add_init_script("""
87
+ Object.defineProperty(navigator, 'webdriver', {
88
+ get: () => undefined,
89
+ });
90
+ """)
91
+
92
+ self._last_used = time.time()
93
+ return self._browser
94
+
95
+ async def _install_browser(self) -> None:
96
+ """安装 Playwright Chromium"""
97
+ try:
98
+ subprocess.check_call(
99
+ [sys.executable, "-m", "playwright", "install", "chromium"],
100
+ stdout=subprocess.DEVNULL,
101
+ stderr=subprocess.DEVNULL,
102
+ timeout=120,
103
+ )
104
+ except subprocess.CalledProcessError as e:
105
+ raise BrowserError(
106
+ "Playwright Chromium 安装失败。请手动运行: playwright install chromium"
107
+ ) from e
108
+
109
+ async def new_page(self) -> Page:
110
+ """创建新 Page(共享 Context,Cookie 互通)"""
111
+ async with self._lock:
112
+ await self._ensure_browser()
113
+
114
+ assert self._context is not None
115
+ page = await self._context.new_page()
116
+ page.set_default_timeout(BROWSER_TIMEOUT)
117
+ return page
118
+
119
+ async def navigate_to_cnki(self, page: Page) -> None:
120
+ """导航到 CNKI 首页(含弹窗处理)"""
121
+ await page.goto("https://www.cnki.net/", wait_until="domcontentloaded")
122
+ await asyncio.sleep(random.uniform(1, 2))
123
+ await self._dismiss_popups(page)
124
+
125
+ async def _dismiss_popups(self, page: Page) -> bool:
126
+ """尝试关闭 CNKI 弹窗/遮罩"""
127
+ dismiss_selectors = [
128
+ "#close",
129
+ ".close",
130
+ 'div[class*="popup"] a:has-text("关闭")',
131
+ 'div[class*="modal"] button:has-text("关闭")',
132
+ 'div[class*="layui-layer"] a[class*="layui-layer-close"]',
133
+ ]
134
+ for selector in dismiss_selectors:
135
+ try:
136
+ elem = page.locator(selector).first
137
+ if await elem.count() > 0 and await elem.is_visible():
138
+ await elem.click()
139
+ await asyncio.sleep(0.5)
140
+ return True
141
+ except Exception:
142
+ continue
143
+ return False
144
+
145
+ async def _close_browser(self) -> None:
146
+ """关闭浏览器实例和上下文"""
147
+ if self._context:
148
+ try:
149
+ await self._context.close()
150
+ except Exception:
151
+ pass
152
+ self._context = None
153
+ if self._browser:
154
+ try:
155
+ await self._browser.close()
156
+ except Exception:
157
+ pass
158
+ self._browser = None
159
+ if self._playwright:
160
+ try:
161
+ await self._playwright.stop()
162
+ except Exception:
163
+ pass
164
+ self._playwright = None
165
+
166
+ async def close(self) -> None:
167
+ """关闭浏览器池"""
168
+ async with self._lock:
169
+ await self._close_browser()