cnki-mcp-server 0.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cnki_mcp_server-0.2.0/.github/workflows/publish.yml +27 -0
- cnki_mcp_server-0.2.0/.github/workflows/test.yml +36 -0
- cnki_mcp_server-0.2.0/.gitignore +12 -0
- cnki_mcp_server-0.2.0/LICENSE +21 -0
- cnki_mcp_server-0.2.0/PKG-INFO +129 -0
- cnki_mcp_server-0.2.0/README.md +103 -0
- cnki_mcp_server-0.2.0/cnki_mcp/__init__.py +5 -0
- cnki_mcp_server-0.2.0/cnki_mcp/__main__.py +5 -0
- cnki_mcp_server-0.2.0/cnki_mcp/browser.py +169 -0
- cnki_mcp_server-0.2.0/cnki_mcp/citation.py +257 -0
- cnki_mcp_server-0.2.0/cnki_mcp/config.py +169 -0
- cnki_mcp_server-0.2.0/cnki_mcp/detail.py +194 -0
- cnki_mcp_server-0.2.0/cnki_mcp/exceptions.py +36 -0
- cnki_mcp_server-0.2.0/cnki_mcp/export.py +172 -0
- cnki_mcp_server-0.2.0/cnki_mcp/journals.py +156 -0
- cnki_mcp_server-0.2.0/cnki_mcp/match.py +69 -0
- cnki_mcp_server-0.2.0/cnki_mcp/search.py +202 -0
- cnki_mcp_server-0.2.0/cnki_mcp/server.py +459 -0
- cnki_mcp_server-0.2.0/cnki_mcp/utils.py +59 -0
- cnki_mcp_server-0.2.0/pyproject.toml +50 -0
- cnki_mcp_server-0.2.0/tests/__init__.py +0 -0
- cnki_mcp_server-0.2.0/tests/conftest.py +17 -0
- cnki_mcp_server-0.2.0/tests/test_citation.py +125 -0
- cnki_mcp_server-0.2.0/tests/test_config.py +34 -0
- cnki_mcp_server-0.2.0/tests/test_export.py +115 -0
- cnki_mcp_server-0.2.0/tests/test_match.py +24 -0
- cnki_mcp_server-0.2.0/tests/test_utils.py +38 -0
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
name: Publish to PyPI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
release:
|
|
5
|
+
types: [published]
|
|
6
|
+
|
|
7
|
+
jobs:
|
|
8
|
+
publish:
|
|
9
|
+
runs-on: ubuntu-latest
|
|
10
|
+
permissions:
|
|
11
|
+
id-token: write
|
|
12
|
+
|
|
13
|
+
steps:
|
|
14
|
+
- uses: actions/checkout@v4
|
|
15
|
+
|
|
16
|
+
- name: Set up Python
|
|
17
|
+
uses: actions/setup-python@v5
|
|
18
|
+
with:
|
|
19
|
+
python-version: "3.11"
|
|
20
|
+
|
|
21
|
+
- name: Build
|
|
22
|
+
run: |
|
|
23
|
+
pip install hatchling
|
|
24
|
+
python -m hatchling build
|
|
25
|
+
|
|
26
|
+
- name: Publish to PyPI
|
|
27
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
name: Test
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [main]
|
|
6
|
+
pull_request:
|
|
7
|
+
branches: [main]
|
|
8
|
+
|
|
9
|
+
jobs:
|
|
10
|
+
test:
|
|
11
|
+
strategy:
|
|
12
|
+
fail-fast: false
|
|
13
|
+
matrix:
|
|
14
|
+
python-version: ["3.10", "3.11", "3.12", "3.13"]
|
|
15
|
+
os: [ubuntu-latest]
|
|
16
|
+
|
|
17
|
+
runs-on: ${{ matrix.os }}
|
|
18
|
+
|
|
19
|
+
steps:
|
|
20
|
+
- uses: actions/checkout@v4
|
|
21
|
+
|
|
22
|
+
- name: Set up Python ${{ matrix.python-version }}
|
|
23
|
+
uses: actions/setup-python@v5
|
|
24
|
+
with:
|
|
25
|
+
python-version: ${{ matrix.python-version }}
|
|
26
|
+
|
|
27
|
+
- name: Install dependencies
|
|
28
|
+
run: |
|
|
29
|
+
python -m pip install --upgrade pip
|
|
30
|
+
pip install -e ".[dev]"
|
|
31
|
+
|
|
32
|
+
- name: Lint
|
|
33
|
+
run: ruff check cnki_mcp/ tests/
|
|
34
|
+
|
|
35
|
+
- name: Unit tests
|
|
36
|
+
run: pytest tests/ -v
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 CNKI MCP Contributors
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: cnki-mcp-server
|
|
3
|
+
Version: 0.2.0
|
|
4
|
+
Summary: CNKI (中国知网) MCP Server — 通过 Model Context Protocol 检索中文学术论文
|
|
5
|
+
Author: CNKI MCP Contributors
|
|
6
|
+
License: MIT
|
|
7
|
+
License-File: LICENSE
|
|
8
|
+
Keywords: chinese-academic,cnki,fastmcp,mcp,playwright,知网
|
|
9
|
+
Classifier: Development Status :: 4 - Beta
|
|
10
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
11
|
+
Classifier: Programming Language :: Python :: 3
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
16
|
+
Classifier: Topic :: Internet :: WWW/HTTP
|
|
17
|
+
Classifier: Topic :: Scientific/Engineering
|
|
18
|
+
Requires-Python: >=3.10
|
|
19
|
+
Requires-Dist: fastmcp>=2.0.0
|
|
20
|
+
Requires-Dist: playwright>=1.40.0
|
|
21
|
+
Provides-Extra: dev
|
|
22
|
+
Requires-Dist: pytest-asyncio>=0.23; extra == 'dev'
|
|
23
|
+
Requires-Dist: pytest>=7.0; extra == 'dev'
|
|
24
|
+
Requires-Dist: ruff>=0.3.0; extra == 'dev'
|
|
25
|
+
Description-Content-Type: text/markdown
|
|
26
|
+
|
|
27
|
+
# CNKI MCP Server
|
|
28
|
+
|
|
29
|
+
[](https://pypi.org/project/cnki-mcp-server/)
|
|
30
|
+
[](https://opensource.org/licenses/MIT)
|
|
31
|
+
|
|
32
|
+
**CNKI (中国知网) MCP Server** — 通过 [Model Context Protocol (MCP)](https://modelcontextprotocol.io/) 为 AI Agent 提供中文学术论文检索能力。
|
|
33
|
+
|
|
34
|
+
## 功能
|
|
35
|
+
|
|
36
|
+
| 工具 | 说明 | 需要浏览器 |
|
|
37
|
+
|------|------|-----------|
|
|
38
|
+
| `search_cnki` | 搜索 CNKI 论文,支持多页、多种搜索类型和排序 | 是 |
|
|
39
|
+
| `get_paper_detail` | 获取论文详情(标题、摘要、作者、关键词、DOI 等 17 字段) | 是 |
|
|
40
|
+
| `find_best_match` | 快速匹配论文标题,验证引用信息 | 是 |
|
|
41
|
+
| `format_citation` | 引文格式化(GB/T 7714, APA, MLA, Chicago, Vancouver) | 否 |
|
|
42
|
+
| `browse_journals` | 期刊浏览(学科分类、期刊搜索、最新文章) | 是 |
|
|
43
|
+
| `export_papers` | 批量导出(CSV, JSON, BibTeX, RIS) | 否 |
|
|
44
|
+
|
|
45
|
+
### 搜索类型
|
|
46
|
+
|
|
47
|
+
支持 15 种搜索类型:主题、关键词、篇名、作者、作者单位、全文、DOI、基金、摘要等(中英文别名均可)。
|
|
48
|
+
|
|
49
|
+
### 排序方式
|
|
50
|
+
|
|
51
|
+
相关度 / 发表时间 / 被引 / 下载 / 综合(支持英文别名:relevance, date, cited, download, composite)。
|
|
52
|
+
|
|
53
|
+
## 安装
|
|
54
|
+
|
|
55
|
+
```bash
|
|
56
|
+
pip install cnki-mcp-server
|
|
57
|
+
python -m playwright install chromium
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
> **注意**: Playwright Chromium 约 300MB,首次安装需要下载,后续无需重复安装。
|
|
61
|
+
|
|
62
|
+
## 使用
|
|
63
|
+
|
|
64
|
+
### Claude Code
|
|
65
|
+
|
|
66
|
+
在 `.claude/settings.json` 或 Claude Code 的 MCP 配置中添加:
|
|
67
|
+
|
|
68
|
+
```json
|
|
69
|
+
{
|
|
70
|
+
"mcpServers": {
|
|
71
|
+
"cnki": {
|
|
72
|
+
"command": "python",
|
|
73
|
+
"args": ["-m", "cnki_mcp"]
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
### 命令行直接使用
|
|
80
|
+
|
|
81
|
+
```bash
|
|
82
|
+
python -m cnki_mcp
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
## 要求
|
|
86
|
+
|
|
87
|
+
- Python >= 3.10
|
|
88
|
+
- Playwright Chromium(首次使用时自动安装)
|
|
89
|
+
|
|
90
|
+
## 引文格式
|
|
91
|
+
|
|
92
|
+
| 风格 | 标准 | 适用场景 |
|
|
93
|
+
|------|------|----------|
|
|
94
|
+
| `gbt7714` | GB/T 7714-2015 | 中文学位论文、中文期刊 |
|
|
95
|
+
| `apa` | APA 7th Edition | 心理学、教育学、社会科学 |
|
|
96
|
+
| `mla` | MLA 9th Edition | 语言文学、人文学科 |
|
|
97
|
+
| `chicago` | Chicago Notes & Bibliography | 历史学、艺术学 |
|
|
98
|
+
| `vancouver` | Vancouver/ICMJE | 生物医学、临床医学 |
|
|
99
|
+
|
|
100
|
+
## 导出格式
|
|
101
|
+
|
|
102
|
+
| 格式 | 适用软件 |
|
|
103
|
+
|------|----------|
|
|
104
|
+
| JSON | 编程处理、数据分析 |
|
|
105
|
+
| CSV | Excel、Google Sheets |
|
|
106
|
+
| BibTeX | LaTeX、Zotero、JabRef |
|
|
107
|
+
| RIS | EndNote、Mendeley、Zotero |
|
|
108
|
+
|
|
109
|
+
## 技术实现
|
|
110
|
+
|
|
111
|
+
- **引擎**: Playwright(自带签名 Chromium,消除 macOS codesign 问题,跨平台零配置)
|
|
112
|
+
- **MCP 框架**: FastMCP
|
|
113
|
+
- **并发**: 原生 async/await
|
|
114
|
+
- **反检测**: 随机 User-Agent、模拟人类输入、navigator.webdriver 覆写
|
|
115
|
+
- **会话复用**: 共享 BrowserContext,Cookie 互通,避免 CNKI 验证码
|
|
116
|
+
|
|
117
|
+
## 开发
|
|
118
|
+
|
|
119
|
+
```bash
|
|
120
|
+
git clone https://github.com/xxxxchaos/cnki-mcp-server.git
|
|
121
|
+
cd cnki-mcp-server
|
|
122
|
+
pip install -e ".[dev]"
|
|
123
|
+
python -m playwright install chromium
|
|
124
|
+
pytest tests/ -v
|
|
125
|
+
```
|
|
126
|
+
|
|
127
|
+
## 许可
|
|
128
|
+
|
|
129
|
+
MIT License
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
# CNKI MCP Server
|
|
2
|
+
|
|
3
|
+
[](https://pypi.org/project/cnki-mcp-server/)
|
|
4
|
+
[](https://opensource.org/licenses/MIT)
|
|
5
|
+
|
|
6
|
+
**CNKI (中国知网) MCP Server** — 通过 [Model Context Protocol (MCP)](https://modelcontextprotocol.io/) 为 AI Agent 提供中文学术论文检索能力。
|
|
7
|
+
|
|
8
|
+
## 功能
|
|
9
|
+
|
|
10
|
+
| 工具 | 说明 | 需要浏览器 |
|
|
11
|
+
|------|------|-----------|
|
|
12
|
+
| `search_cnki` | 搜索 CNKI 论文,支持多页、多种搜索类型和排序 | 是 |
|
|
13
|
+
| `get_paper_detail` | 获取论文详情(标题、摘要、作者、关键词、DOI 等 17 字段) | 是 |
|
|
14
|
+
| `find_best_match` | 快速匹配论文标题,验证引用信息 | 是 |
|
|
15
|
+
| `format_citation` | 引文格式化(GB/T 7714, APA, MLA, Chicago, Vancouver) | 否 |
|
|
16
|
+
| `browse_journals` | 期刊浏览(学科分类、期刊搜索、最新文章) | 是 |
|
|
17
|
+
| `export_papers` | 批量导出(CSV, JSON, BibTeX, RIS) | 否 |
|
|
18
|
+
|
|
19
|
+
### 搜索类型
|
|
20
|
+
|
|
21
|
+
支持 15 种搜索类型:主题、关键词、篇名、作者、作者单位、全文、DOI、基金、摘要等(中英文别名均可)。
|
|
22
|
+
|
|
23
|
+
### 排序方式
|
|
24
|
+
|
|
25
|
+
相关度 / 发表时间 / 被引 / 下载 / 综合(支持英文别名:relevance, date, cited, download, composite)。
|
|
26
|
+
|
|
27
|
+
## 安装
|
|
28
|
+
|
|
29
|
+
```bash
|
|
30
|
+
pip install cnki-mcp-server
|
|
31
|
+
python -m playwright install chromium
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
> **注意**: Playwright Chromium 约 300MB,首次安装需要下载,后续无需重复安装。
|
|
35
|
+
|
|
36
|
+
## 使用
|
|
37
|
+
|
|
38
|
+
### Claude Code
|
|
39
|
+
|
|
40
|
+
在 `.claude/settings.json` 或 Claude Code 的 MCP 配置中添加:
|
|
41
|
+
|
|
42
|
+
```json
|
|
43
|
+
{
|
|
44
|
+
"mcpServers": {
|
|
45
|
+
"cnki": {
|
|
46
|
+
"command": "python",
|
|
47
|
+
"args": ["-m", "cnki_mcp"]
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
### 命令行直接使用
|
|
54
|
+
|
|
55
|
+
```bash
|
|
56
|
+
python -m cnki_mcp
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
## 要求
|
|
60
|
+
|
|
61
|
+
- Python >= 3.10
|
|
62
|
+
- Playwright Chromium(首次使用时自动安装)
|
|
63
|
+
|
|
64
|
+
## 引文格式
|
|
65
|
+
|
|
66
|
+
| 风格 | 标准 | 适用场景 |
|
|
67
|
+
|------|------|----------|
|
|
68
|
+
| `gbt7714` | GB/T 7714-2015 | 中文学位论文、中文期刊 |
|
|
69
|
+
| `apa` | APA 7th Edition | 心理学、教育学、社会科学 |
|
|
70
|
+
| `mla` | MLA 9th Edition | 语言文学、人文学科 |
|
|
71
|
+
| `chicago` | Chicago Notes & Bibliography | 历史学、艺术学 |
|
|
72
|
+
| `vancouver` | Vancouver/ICMJE | 生物医学、临床医学 |
|
|
73
|
+
|
|
74
|
+
## 导出格式
|
|
75
|
+
|
|
76
|
+
| 格式 | 适用软件 |
|
|
77
|
+
|------|----------|
|
|
78
|
+
| JSON | 编程处理、数据分析 |
|
|
79
|
+
| CSV | Excel、Google Sheets |
|
|
80
|
+
| BibTeX | LaTeX、Zotero、JabRef |
|
|
81
|
+
| RIS | EndNote、Mendeley、Zotero |
|
|
82
|
+
|
|
83
|
+
## 技术实现
|
|
84
|
+
|
|
85
|
+
- **引擎**: Playwright(自带签名 Chromium,消除 macOS codesign 问题,跨平台零配置)
|
|
86
|
+
- **MCP 框架**: FastMCP
|
|
87
|
+
- **并发**: 原生 async/await
|
|
88
|
+
- **反检测**: 随机 User-Agent、模拟人类输入、navigator.webdriver 覆写
|
|
89
|
+
- **会话复用**: 共享 BrowserContext,Cookie 互通,避免 CNKI 验证码
|
|
90
|
+
|
|
91
|
+
## 开发
|
|
92
|
+
|
|
93
|
+
```bash
|
|
94
|
+
git clone https://github.com/xxxxchaos/cnki-mcp-server.git
|
|
95
|
+
cd cnki-mcp-server
|
|
96
|
+
pip install -e ".[dev]"
|
|
97
|
+
python -m playwright install chromium
|
|
98
|
+
pytest tests/ -v
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
## 许可
|
|
102
|
+
|
|
103
|
+
MIT License
|
|
@@ -0,0 +1,169 @@
|
|
|
1
|
+
"""
|
|
2
|
+
AsyncPlaywright 浏览器池管理。
|
|
3
|
+
|
|
4
|
+
特性:
|
|
5
|
+
- 懒初始化:首次调用时才启动浏览器
|
|
6
|
+
- 实例复用:多次调用共享同一 browser 实例
|
|
7
|
+
- 共享 BrowserContext:同一 context 内创建 Page,Cookie 互通
|
|
8
|
+
- 空闲超时关闭(600s)
|
|
9
|
+
- asyncio.Lock 保护并发访问
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
import asyncio
|
|
13
|
+
import random
|
|
14
|
+
import subprocess
|
|
15
|
+
import sys
|
|
16
|
+
import time
|
|
17
|
+
from typing import Optional
|
|
18
|
+
|
|
19
|
+
from playwright.async_api import Browser, BrowserContext, Page, async_playwright
|
|
20
|
+
|
|
21
|
+
from cnki_mcp.config import (
|
|
22
|
+
BROWSER_TIMEOUT,
|
|
23
|
+
IDLE_TIMEOUT,
|
|
24
|
+
USER_AGENTS,
|
|
25
|
+
)
|
|
26
|
+
from cnki_mcp.exceptions import BrowserError
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class AsyncBrowserPool:
|
|
30
|
+
"""异步 Playwright 浏览器池(共享 BrowserContext,Cookie 互通)"""
|
|
31
|
+
|
|
32
|
+
def __init__(self) -> None:
|
|
33
|
+
self._playwright = None
|
|
34
|
+
self._browser: Optional[Browser] = None
|
|
35
|
+
self._context: Optional[BrowserContext] = None
|
|
36
|
+
self._last_used: float = 0
|
|
37
|
+
self._lock = asyncio.Lock()
|
|
38
|
+
|
|
39
|
+
async def _ensure_browser(self) -> Browser:
|
|
40
|
+
"""确保浏览器实例可用"""
|
|
41
|
+
if self._browser is not None:
|
|
42
|
+
if time.time() - self._last_used > IDLE_TIMEOUT:
|
|
43
|
+
await self._close_browser()
|
|
44
|
+
elif not self._browser.is_connected():
|
|
45
|
+
self._browser = None
|
|
46
|
+
self._context = None
|
|
47
|
+
|
|
48
|
+
if self._browser is None:
|
|
49
|
+
try:
|
|
50
|
+
self._playwright = await async_playwright().start()
|
|
51
|
+
self._browser = await self._playwright.chromium.launch(
|
|
52
|
+
headless=True,
|
|
53
|
+
args=[
|
|
54
|
+
"--disable-blink-features=AutomationControlled",
|
|
55
|
+
"--no-sandbox",
|
|
56
|
+
"--disable-dev-shm-usage",
|
|
57
|
+
"--disable-gpu",
|
|
58
|
+
"--disable-infobars",
|
|
59
|
+
"--disable-extensions",
|
|
60
|
+
],
|
|
61
|
+
)
|
|
62
|
+
except Exception as e:
|
|
63
|
+
if "Executable doesn't exist" in str(e) or "playwright" in str(e).lower():
|
|
64
|
+
await self._install_browser()
|
|
65
|
+
self._playwright = await async_playwright().start()
|
|
66
|
+
self._browser = await self._playwright.chromium.launch(
|
|
67
|
+
headless=True,
|
|
68
|
+
args=[
|
|
69
|
+
"--disable-blink-features=AutomationControlled",
|
|
70
|
+
"--no-sandbox",
|
|
71
|
+
"--disable-dev-shm-usage",
|
|
72
|
+
],
|
|
73
|
+
)
|
|
74
|
+
else:
|
|
75
|
+
raise BrowserError(f"浏览器启动失败: {e}") from e
|
|
76
|
+
|
|
77
|
+
if self._browser is None:
|
|
78
|
+
raise BrowserError("浏览器启动失败")
|
|
79
|
+
|
|
80
|
+
# 创建共享的 BrowserContext
|
|
81
|
+
self._context = await self._browser.new_context(
|
|
82
|
+
user_agent=random.choice(USER_AGENTS),
|
|
83
|
+
viewport={"width": 1920, "height": 1080},
|
|
84
|
+
locale="zh-CN",
|
|
85
|
+
)
|
|
86
|
+
await self._context.add_init_script("""
|
|
87
|
+
Object.defineProperty(navigator, 'webdriver', {
|
|
88
|
+
get: () => undefined,
|
|
89
|
+
});
|
|
90
|
+
""")
|
|
91
|
+
|
|
92
|
+
self._last_used = time.time()
|
|
93
|
+
return self._browser
|
|
94
|
+
|
|
95
|
+
async def _install_browser(self) -> None:
|
|
96
|
+
"""安装 Playwright Chromium"""
|
|
97
|
+
try:
|
|
98
|
+
subprocess.check_call(
|
|
99
|
+
[sys.executable, "-m", "playwright", "install", "chromium"],
|
|
100
|
+
stdout=subprocess.DEVNULL,
|
|
101
|
+
stderr=subprocess.DEVNULL,
|
|
102
|
+
timeout=120,
|
|
103
|
+
)
|
|
104
|
+
except subprocess.CalledProcessError as e:
|
|
105
|
+
raise BrowserError(
|
|
106
|
+
"Playwright Chromium 安装失败。请手动运行: playwright install chromium"
|
|
107
|
+
) from e
|
|
108
|
+
|
|
109
|
+
async def new_page(self) -> Page:
|
|
110
|
+
"""创建新 Page(共享 Context,Cookie 互通)"""
|
|
111
|
+
async with self._lock:
|
|
112
|
+
await self._ensure_browser()
|
|
113
|
+
|
|
114
|
+
assert self._context is not None
|
|
115
|
+
page = await self._context.new_page()
|
|
116
|
+
page.set_default_timeout(BROWSER_TIMEOUT)
|
|
117
|
+
return page
|
|
118
|
+
|
|
119
|
+
async def navigate_to_cnki(self, page: Page) -> None:
|
|
120
|
+
"""导航到 CNKI 首页(含弹窗处理)"""
|
|
121
|
+
await page.goto("https://www.cnki.net/", wait_until="domcontentloaded")
|
|
122
|
+
await asyncio.sleep(random.uniform(1, 2))
|
|
123
|
+
await self._dismiss_popups(page)
|
|
124
|
+
|
|
125
|
+
async def _dismiss_popups(self, page: Page) -> bool:
|
|
126
|
+
"""尝试关闭 CNKI 弹窗/遮罩"""
|
|
127
|
+
dismiss_selectors = [
|
|
128
|
+
"#close",
|
|
129
|
+
".close",
|
|
130
|
+
'div[class*="popup"] a:has-text("关闭")',
|
|
131
|
+
'div[class*="modal"] button:has-text("关闭")',
|
|
132
|
+
'div[class*="layui-layer"] a[class*="layui-layer-close"]',
|
|
133
|
+
]
|
|
134
|
+
for selector in dismiss_selectors:
|
|
135
|
+
try:
|
|
136
|
+
elem = page.locator(selector).first
|
|
137
|
+
if await elem.count() > 0 and await elem.is_visible():
|
|
138
|
+
await elem.click()
|
|
139
|
+
await asyncio.sleep(0.5)
|
|
140
|
+
return True
|
|
141
|
+
except Exception:
|
|
142
|
+
continue
|
|
143
|
+
return False
|
|
144
|
+
|
|
145
|
+
async def _close_browser(self) -> None:
|
|
146
|
+
"""关闭浏览器实例和上下文"""
|
|
147
|
+
if self._context:
|
|
148
|
+
try:
|
|
149
|
+
await self._context.close()
|
|
150
|
+
except Exception:
|
|
151
|
+
pass
|
|
152
|
+
self._context = None
|
|
153
|
+
if self._browser:
|
|
154
|
+
try:
|
|
155
|
+
await self._browser.close()
|
|
156
|
+
except Exception:
|
|
157
|
+
pass
|
|
158
|
+
self._browser = None
|
|
159
|
+
if self._playwright:
|
|
160
|
+
try:
|
|
161
|
+
await self._playwright.stop()
|
|
162
|
+
except Exception:
|
|
163
|
+
pass
|
|
164
|
+
self._playwright = None
|
|
165
|
+
|
|
166
|
+
async def close(self) -> None:
|
|
167
|
+
"""关闭浏览器池"""
|
|
168
|
+
async with self._lock:
|
|
169
|
+
await self._close_browser()
|