cnks 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cnks/__init__.py +39 -0
- cnks/server.py +762 -0
- cnks-0.1.0.dist-info/METADATA +841 -0
- cnks-0.1.0.dist-info/RECORD +6 -0
- cnks-0.1.0.dist-info/WHEEL +4 -0
- cnks-0.1.0.dist-info/entry_points.txt +2 -0
cnks/server.py
ADDED
@@ -0,0 +1,762 @@
|
|
1
|
+
import asyncio
|
2
|
+
import json
|
3
|
+
import os
|
4
|
+
import platform
|
5
|
+
import re
|
6
|
+
import subprocess
|
7
|
+
import sys
|
8
|
+
import time
|
9
|
+
import logging
|
10
|
+
from pathlib import Path
|
11
|
+
from urllib.parse import quote
|
12
|
+
|
13
|
+
from mcp.server.models import InitializationOptions
|
14
|
+
import mcp.types as types
|
15
|
+
from mcp.server import NotificationOptions, Server
|
16
|
+
from pydantic import AnyUrl
|
17
|
+
import mcp.server.stdio
|
18
|
+
|
19
|
+
# 配置日志记录
|
20
|
+
logging.basicConfig(
|
21
|
+
level=logging.DEBUG,
|
22
|
+
filename="cnks.log",
|
23
|
+
filemode="a",
|
24
|
+
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
|
25
|
+
)
|
26
|
+
logger = logging.getLogger("cnks")
|
27
|
+
|
28
|
+
# 尝试导入playwright
|
29
|
+
try:
|
30
|
+
from playwright.async_api import async_playwright
|
31
|
+
PLAYWRIGHT_AVAILABLE = True
|
32
|
+
except ImportError:
|
33
|
+
PLAYWRIGHT_AVAILABLE = False
|
34
|
+
logger.warning("Playwright未安装,将使用传统方式打开Chrome")
|
35
|
+
|
36
|
+
# 存储当前页面内容和笔记
|
37
|
+
page_content = ""
|
38
|
+
current_url = ""
|
39
|
+
notes: dict[str, str] = {}
|
40
|
+
browser_instance = None
|
41
|
+
|
42
|
+
server = Server("cnks")
|
43
|
+
|
44
|
+
def find_chrome_executable():
|
45
|
+
"""查找Chrome可执行文件路径"""
|
46
|
+
system = platform.system()
|
47
|
+
|
48
|
+
# 定义可能的Chrome位置
|
49
|
+
if system == "Windows":
|
50
|
+
chrome_paths = [
|
51
|
+
r"C:\Program Files\Google\Chrome\Application\chrome.exe",
|
52
|
+
r"C:\Program Files (x86)\Google\Chrome\Application\chrome.exe",
|
53
|
+
os.path.expanduser(r"~\AppData\Local\Google\Chrome\Application\chrome.exe"),
|
54
|
+
]
|
55
|
+
elif system == "Darwin": # MacOS
|
56
|
+
chrome_paths = [
|
57
|
+
"/Applications/Google Chrome.app/Contents/MacOS/Google Chrome",
|
58
|
+
os.path.expanduser("~/Applications/Google Chrome.app/Contents/MacOS/Google Chrome"),
|
59
|
+
]
|
60
|
+
elif system == "Linux":
|
61
|
+
chrome_paths = [
|
62
|
+
"/usr/bin/google-chrome",
|
63
|
+
"/usr/bin/chromium-browser",
|
64
|
+
"/usr/bin/chromium",
|
65
|
+
]
|
66
|
+
else:
|
67
|
+
return None
|
68
|
+
|
69
|
+
# 检查路径是否存在
|
70
|
+
for path in chrome_paths:
|
71
|
+
if os.path.exists(path):
|
72
|
+
return path
|
73
|
+
|
74
|
+
# 尝试从环境变量中查找
|
75
|
+
chrome_env = os.environ.get("CHROME_PATH")
|
76
|
+
if chrome_env and os.path.exists(chrome_env):
|
77
|
+
return chrome_env
|
78
|
+
|
79
|
+
return None
|
80
|
+
|
81
|
+
def open_chrome(url):
|
82
|
+
"""打开Chrome浏览器并访问指定URL"""
|
83
|
+
try:
|
84
|
+
chrome_path = find_chrome_executable()
|
85
|
+
|
86
|
+
if not chrome_path:
|
87
|
+
return "未找到Chrome可执行文件。请设置CHROME_PATH环境变量指向Chrome位置。"
|
88
|
+
|
89
|
+
subprocess.Popen([
|
90
|
+
chrome_path,
|
91
|
+
url
|
92
|
+
])
|
93
|
+
time.sleep(2) # 等待页面加载
|
94
|
+
return True
|
95
|
+
except Exception as e:
|
96
|
+
return f"打开Chrome时出错: {str(e)}"
|
97
|
+
|
98
|
+
async def search_with_playwright(keywords):
|
99
|
+
"""使用playwright在知网搜索关键词"""
|
100
|
+
if not PLAYWRIGHT_AVAILABLE:
|
101
|
+
return "需要安装playwright模块:uv add playwright"
|
102
|
+
|
103
|
+
try:
|
104
|
+
chrome_path = find_chrome_executable()
|
105
|
+
if not chrome_path:
|
106
|
+
return "未找到Chrome可执行文件。请设置CHROME_PATH环境变量指向Chrome位置。"
|
107
|
+
|
108
|
+
logger.debug(f"[DEBUG] 使用Playwright搜索,Chrome路径: {chrome_path}")
|
109
|
+
|
110
|
+
# 创建全局浏览器实例,避免执行完关闭
|
111
|
+
global browser_instance
|
112
|
+
|
113
|
+
# 只打开一个playwright实例
|
114
|
+
playwright = await async_playwright().start()
|
115
|
+
|
116
|
+
# 尝试使用系统Chrome
|
117
|
+
try:
|
118
|
+
logger.debug("[DEBUG] 尝试使用channel='chrome'启动浏览器")
|
119
|
+
browser = await playwright.chromium.launch(
|
120
|
+
headless=False,
|
121
|
+
channel="chrome"
|
122
|
+
)
|
123
|
+
except Exception as e:
|
124
|
+
logger.debug(f"[DEBUG] channel='chrome'方式失败: {str(e)}")
|
125
|
+
logger.debug("[DEBUG] 尝试使用executable_path启动浏览器")
|
126
|
+
# 如果失败,尝试使用executable_path指定Chrome路径
|
127
|
+
browser = await playwright.chromium.launch(
|
128
|
+
headless=False,
|
129
|
+
executable_path=chrome_path
|
130
|
+
)
|
131
|
+
|
132
|
+
# 保存浏览器实例以防止被关闭
|
133
|
+
browser_instance = browser
|
134
|
+
|
135
|
+
page = await browser.new_page()
|
136
|
+
|
137
|
+
# 导航到知网搜索页面
|
138
|
+
await page.goto("https://kns.cnki.net/kns8s/search")
|
139
|
+
logger.debug("[DEBUG] 成功打开知网搜索页面")
|
140
|
+
|
141
|
+
# 等待页面加载
|
142
|
+
await page.wait_for_load_state("networkidle")
|
143
|
+
|
144
|
+
# 查找并填写搜索框
|
145
|
+
try:
|
146
|
+
# 尝试定位搜索框
|
147
|
+
search_input = await page.query_selector('input.search-input')
|
148
|
+
if search_input:
|
149
|
+
# 清空搜索框
|
150
|
+
await search_input.fill("")
|
151
|
+
# 输入关键词
|
152
|
+
await search_input.fill(keywords)
|
153
|
+
logger.debug(f"[DEBUG] 已在搜索框中输入: {keywords}")
|
154
|
+
|
155
|
+
# 增加短暂等待以确保用户可以看到输入过程
|
156
|
+
await asyncio.sleep(1)
|
157
|
+
|
158
|
+
# 查找并点击搜索按钮
|
159
|
+
search_button = await page.query_selector('.search-btn')
|
160
|
+
if search_button:
|
161
|
+
await search_button.click()
|
162
|
+
logger.debug("[DEBUG] 已点击搜索按钮")
|
163
|
+
# 等待搜索结果加载
|
164
|
+
await page.wait_for_load_state("networkidle")
|
165
|
+
|
166
|
+
# 点击操作1:点击下拉框的三角形
|
167
|
+
try:
|
168
|
+
# 等待一下,确保页面元素都加载完成
|
169
|
+
await asyncio.sleep(2)
|
170
|
+
|
171
|
+
# 尝试点击排序下拉框
|
172
|
+
logger.debug("[DEBUG] 尝试点击排序下拉框")
|
173
|
+
# 根据提供的HTML,尝试定位下拉框的三角形
|
174
|
+
sort_dropdown = await page.query_selector('div[class="sort"][id="perPageDiv"]')
|
175
|
+
if sort_dropdown:
|
176
|
+
await sort_dropdown.click()
|
177
|
+
logger.debug("[DEBUG] 成功点击排序下拉框")
|
178
|
+
|
179
|
+
# 等待下拉菜单出现
|
180
|
+
await asyncio.sleep(1)
|
181
|
+
|
182
|
+
# 点击操作2:点击数字50选项
|
183
|
+
logger.debug("[DEBUG] 尝试点击'50'选项")
|
184
|
+
# 尝试定位"50"选项
|
185
|
+
option_50 = await page.query_selector('li[data-val="50"]')
|
186
|
+
if option_50:
|
187
|
+
await option_50.click()
|
188
|
+
logger.debug("[DEBUG] 成功点击'50'选项")
|
189
|
+
await page.wait_for_load_state("networkidle")
|
190
|
+
|
191
|
+
# 勾选来源类别中的CSSCI选项
|
192
|
+
try:
|
193
|
+
# 等待一下确保页面完全加载
|
194
|
+
await asyncio.sleep(2)
|
195
|
+
|
196
|
+
logger.debug("[DEBUG] 尝试勾选CSSCI选项")
|
197
|
+
|
198
|
+
# 首先尝试找到来源类别区域
|
199
|
+
# 通常来源类别会有一个标题或者分组
|
200
|
+
source_category = await page.query_selector('div.group-item:has-text("来源类别")')
|
201
|
+
|
202
|
+
if source_category:
|
203
|
+
logger.debug("[DEBUG] 找到来源类别区域")
|
204
|
+
|
205
|
+
# 在来源类别区域内查找CSSCI选项
|
206
|
+
cssci_checkbox = await source_category.query_selector('input[type="checkbox"]:near(:text("CSSCI"))')
|
207
|
+
|
208
|
+
if cssci_checkbox:
|
209
|
+
# 点击CSSCI复选框
|
210
|
+
await cssci_checkbox.click()
|
211
|
+
logger.debug("[DEBUG] 成功勾选CSSCI选项")
|
212
|
+
|
213
|
+
# 等待页面刷新
|
214
|
+
await page.wait_for_load_state("networkidle")
|
215
|
+
|
216
|
+
# 查找所有包含"article/abstract?v="字样的链接
|
217
|
+
links_count = await find_and_count_abstract_links(page)
|
218
|
+
|
219
|
+
return f"已完成全部操作:搜索关键词、设置每页显示50条、勾选CSSCI来源类别。找到{links_count}条包含article/abstract?v=的链接。浏览器将保持打开状态。"
|
220
|
+
else:
|
221
|
+
logger.debug("[DEBUG] 在来源类别区域未找到CSSCI选项")
|
222
|
+
|
223
|
+
# 尝试另一种方式:直接在整个页面中查找CSSCI
|
224
|
+
cssci_text = await page.query_selector(':text("CSSCI")')
|
225
|
+
if cssci_text:
|
226
|
+
# 尝试点击文本附近的复选框
|
227
|
+
await cssci_text.click()
|
228
|
+
logger.debug("[DEBUG] 通过文本找到并点击了CSSCI")
|
229
|
+
await page.wait_for_load_state("networkidle")
|
230
|
+
|
231
|
+
# 查找所有包含"article/abstract?v="字样的链接
|
232
|
+
links_count = await find_and_count_abstract_links(page)
|
233
|
+
|
234
|
+
return f"已完成全部操作:搜索关键词、设置每页显示50条、勾选CSSCI来源类别。找到{links_count}条包含article/abstract?v=的链接。浏览器将保持打开状态。"
|
235
|
+
else:
|
236
|
+
return "已完成搜索和设置每页显示50条,但未找到CSSCI选项。浏览器将保持打开状态。"
|
237
|
+
else:
|
238
|
+
logger.debug("[DEBUG] 未找到来源类别区域")
|
239
|
+
|
240
|
+
# 尝试直接在页面中查找CSSCI文本
|
241
|
+
cssci_text = await page.query_selector(':text("CSSCI")')
|
242
|
+
if cssci_text:
|
243
|
+
# 尝试点击文本附近的复选框
|
244
|
+
await cssci_text.click()
|
245
|
+
logger.debug("[DEBUG] 直接找到并点击了CSSCI")
|
246
|
+
await page.wait_for_load_state("networkidle")
|
247
|
+
|
248
|
+
# 查找所有包含"article/abstract?v="字样的链接
|
249
|
+
links_count = await find_and_count_abstract_links(page)
|
250
|
+
|
251
|
+
return f"已完成全部操作:搜索关键词、设置每页显示50条、勾选CSSCI来源类别。找到{links_count}条包含article/abstract?v=的链接。浏览器将保持打开状态。"
|
252
|
+
else:
|
253
|
+
return "已完成搜索和设置每页显示50条,但未找到来源类别区域或CSSCI选项。浏览器将保持打开状态。"
|
254
|
+
except Exception as e:
|
255
|
+
logger.debug(f"[DEBUG] 勾选CSSCI选项时出错: {str(e)}")
|
256
|
+
return f"已完成搜索和设置每页显示50条,但勾选CSSCI时出错: {str(e)}。浏览器将保持打开状态。"
|
257
|
+
|
258
|
+
return "已完成全部操作:搜索关键词、点击排序下拉框、选择每页显示50条。浏览器将保持打开状态。"
|
259
|
+
else:
|
260
|
+
logger.debug("[DEBUG] 未找到'50'选项")
|
261
|
+
return "已搜索并点击下拉框,但未找到'50'选项。浏览器将保持打开状态。"
|
262
|
+
else:
|
263
|
+
logger.debug("[DEBUG] 未找到排序下拉框")
|
264
|
+
return "已搜索,但未找到排序下拉框。浏览器将保持打开状态。"
|
265
|
+
except Exception as e:
|
266
|
+
logger.debug(f"[DEBUG] 点击下拉框或选项时出错: {str(e)}")
|
267
|
+
return f"已搜索,但在点击下拉框或选项时出错: {str(e)}。浏览器将保持打开状态。"
|
268
|
+
|
269
|
+
# 不关闭浏览器,让它保持打开状态
|
270
|
+
# 注意:不调用 browser.close() 和 playwright.stop()
|
271
|
+
else:
|
272
|
+
# 不关闭浏览器
|
273
|
+
return f"已填写搜索关键词: {keywords},但未找到搜索按钮。请手动点击搜索。"
|
274
|
+
else:
|
275
|
+
# 不关闭浏览器
|
276
|
+
return f"未找到搜索框。已打开知网页面,请手动搜索: {keywords}"
|
277
|
+
except Exception as e:
|
278
|
+
logger.debug(f"[DEBUG] 填写搜索框或点击搜索按钮时出错: {str(e)}")
|
279
|
+
# 不关闭浏览器
|
280
|
+
return f"自动搜索过程中出错,请手动在页面中搜索: {keywords}"
|
281
|
+
except Exception as e:
|
282
|
+
error_msg = str(e)
|
283
|
+
logger.debug(f"[DEBUG] Playwright错误: {error_msg}")
|
284
|
+
|
285
|
+
# 如果是找不到Chrome的错误,提供更明确的指导
|
286
|
+
if "Executable doesn't exist" in error_msg and "ms-playwright" in error_msg:
|
287
|
+
return f"需要安装Playwright的浏览器: playwright install\n如果您想使用系统Chrome,请重新启动服务器。\n\n{error_msg}"
|
288
|
+
|
289
|
+
# 如果Playwright启动失败,使用传统方式打开Chrome
|
290
|
+
return f"使用Playwright启动Chrome失败: {error_msg}。尝试使用传统方式打开浏览器。"
|
291
|
+
|
292
|
+
def search_with_direct_chrome(keywords):
|
293
|
+
"""直接使用Chrome搜索,不使用playwright"""
|
294
|
+
logger.debug("[DEBUG] 正在使用search_with_direct_chrome函数")
|
295
|
+
|
296
|
+
# 构建知网搜索URL - 知网不支持URL参数搜索,所以只能打开页面
|
297
|
+
url = "https://kns.cnki.net/kns8s/search"
|
298
|
+
|
299
|
+
# 打开Chrome
|
300
|
+
result = open_chrome(url)
|
301
|
+
|
302
|
+
if result is True:
|
303
|
+
return f"已打开知网页面。请在搜索框中输入并搜索: {keywords}"
|
304
|
+
else:
|
305
|
+
return f"打开Chrome浏览器失败: {result}"
|
306
|
+
|
307
|
+
def get_page_content():
|
308
|
+
"""获取当前页面内容(简化模拟)"""
|
309
|
+
global page_content, current_url
|
310
|
+
if not current_url:
|
311
|
+
return "尚未打开任何页面"
|
312
|
+
|
313
|
+
# 实际应用中,这里可以使用Selenium或类似工具来获取实际页面内容
|
314
|
+
# 此处为简化实现,返回模拟内容
|
315
|
+
if "cnki" in current_url:
|
316
|
+
return f"中国知网搜索页面\n当前URL: {current_url}\n可使用搜索工具查询文献。"
|
317
|
+
return f"已打开页面: {current_url}"
|
318
|
+
|
319
|
+
@server.list_resources()
|
320
|
+
async def handle_list_resources() -> list[types.Resource]:
|
321
|
+
"""列出可用资源"""
|
322
|
+
resources = []
|
323
|
+
|
324
|
+
# 当前网页资源
|
325
|
+
resources.append(
|
326
|
+
types.Resource(
|
327
|
+
uri=AnyUrl("webpage://current"),
|
328
|
+
name="当前网页",
|
329
|
+
description="当前打开的网页内容",
|
330
|
+
mimeType="text/plain",
|
331
|
+
)
|
332
|
+
)
|
333
|
+
|
334
|
+
# 知网搜索页资源
|
335
|
+
resources.append(
|
336
|
+
types.Resource(
|
337
|
+
uri=AnyUrl("webpage://cnki/search"),
|
338
|
+
name="知网搜索页",
|
339
|
+
description="中国知网搜索页面",
|
340
|
+
mimeType="text/plain",
|
341
|
+
)
|
342
|
+
)
|
343
|
+
|
344
|
+
# 笔记资源
|
345
|
+
for name in notes:
|
346
|
+
resources.append(
|
347
|
+
types.Resource(
|
348
|
+
uri=AnyUrl(f"note://internal/{name}"),
|
349
|
+
name=f"笔记: {name}",
|
350
|
+
description=f"笔记: {name}",
|
351
|
+
mimeType="text/plain",
|
352
|
+
)
|
353
|
+
)
|
354
|
+
|
355
|
+
return resources
|
356
|
+
|
357
|
+
@server.read_resource()
|
358
|
+
async def handle_read_resource(uri: AnyUrl) -> str:
|
359
|
+
"""读取资源内容"""
|
360
|
+
global current_url
|
361
|
+
|
362
|
+
scheme = uri.scheme
|
363
|
+
|
364
|
+
if scheme == "webpage":
|
365
|
+
path = uri.path if uri.path else ""
|
366
|
+
host = uri.host if uri.host else ""
|
367
|
+
|
368
|
+
if host == "current":
|
369
|
+
return get_page_content()
|
370
|
+
elif host == "cnki" and path == "/search":
|
371
|
+
# 打开知网搜索页
|
372
|
+
current_url = "https://kns.cnki.net/kns8s/search"
|
373
|
+
result = open_chrome(current_url)
|
374
|
+
if result is True:
|
375
|
+
return "已打开中国知网搜索页面,可使用搜索工具查询文献。"
|
376
|
+
else:
|
377
|
+
return result
|
378
|
+
elif scheme == "note":
|
379
|
+
name = uri.path
|
380
|
+
if name is not None:
|
381
|
+
name = name.lstrip("/")
|
382
|
+
if name in notes:
|
383
|
+
return notes[name]
|
384
|
+
raise ValueError(f"笔记未找到: {name}")
|
385
|
+
|
386
|
+
raise ValueError(f"不支持的URI方案或资源未找到: {uri}")
|
387
|
+
|
388
|
+
@server.list_prompts()
|
389
|
+
async def handle_list_prompts() -> list[types.Prompt]:
|
390
|
+
"""列出可用提示"""
|
391
|
+
return [
|
392
|
+
types.Prompt(
|
393
|
+
name="search-literature",
|
394
|
+
description="按主题搜索文献",
|
395
|
+
arguments=[
|
396
|
+
types.PromptArgument(
|
397
|
+
name="keywords",
|
398
|
+
description="搜索关键词",
|
399
|
+
required=True,
|
400
|
+
)
|
401
|
+
],
|
402
|
+
),
|
403
|
+
types.Prompt(
|
404
|
+
name="advanced-search",
|
405
|
+
description="高级文献搜索",
|
406
|
+
arguments=[
|
407
|
+
types.PromptArgument(
|
408
|
+
name="title",
|
409
|
+
description="论文标题",
|
410
|
+
required=False,
|
411
|
+
),
|
412
|
+
types.PromptArgument(
|
413
|
+
name="author",
|
414
|
+
description="作者",
|
415
|
+
required=False,
|
416
|
+
),
|
417
|
+
types.PromptArgument(
|
418
|
+
name="keywords",
|
419
|
+
description="关键词",
|
420
|
+
required=False,
|
421
|
+
),
|
422
|
+
types.PromptArgument(
|
423
|
+
name="institution",
|
424
|
+
description="机构",
|
425
|
+
required=False,
|
426
|
+
),
|
427
|
+
],
|
428
|
+
),
|
429
|
+
types.Prompt(
|
430
|
+
name="summarize-notes",
|
431
|
+
description="总结所有笔记",
|
432
|
+
arguments=[
|
433
|
+
types.PromptArgument(
|
434
|
+
name="style",
|
435
|
+
description="摘要风格 (brief/detailed)",
|
436
|
+
required=False,
|
437
|
+
)
|
438
|
+
],
|
439
|
+
)
|
440
|
+
]
|
441
|
+
|
442
|
+
@server.get_prompt()
|
443
|
+
async def handle_get_prompt(
|
444
|
+
name: str, arguments: dict[str, str] | None
|
445
|
+
) -> types.GetPromptResult:
|
446
|
+
"""生成提示"""
|
447
|
+
if name == "search-literature":
|
448
|
+
keywords = (arguments or {}).get("keywords", "")
|
449
|
+
return types.GetPromptResult(
|
450
|
+
description="按主题搜索文献",
|
451
|
+
messages=[
|
452
|
+
types.PromptMessage(
|
453
|
+
role="user",
|
454
|
+
content=types.TextContent(
|
455
|
+
type="text",
|
456
|
+
text=f"请在中国知网搜索关于\"{keywords}\"的文献,并分析主要研究趋势。"
|
457
|
+
),
|
458
|
+
)
|
459
|
+
],
|
460
|
+
)
|
461
|
+
elif name == "advanced-search":
|
462
|
+
title = (arguments or {}).get("title", "")
|
463
|
+
author = (arguments or {}).get("author", "")
|
464
|
+
keywords = (arguments or {}).get("keywords", "")
|
465
|
+
institution = (arguments or {}).get("institution", "")
|
466
|
+
|
467
|
+
search_terms = []
|
468
|
+
if title:
|
469
|
+
search_terms.append(f"标题包含\"{title}\"")
|
470
|
+
if author:
|
471
|
+
search_terms.append(f"作者为\"{author}\"")
|
472
|
+
if keywords:
|
473
|
+
search_terms.append(f"关键词包含\"{keywords}\"")
|
474
|
+
if institution:
|
475
|
+
search_terms.append(f"机构为\"{institution}\"")
|
476
|
+
|
477
|
+
search_criteria = "、".join(search_terms)
|
478
|
+
|
479
|
+
return types.GetPromptResult(
|
480
|
+
description="高级文献搜索",
|
481
|
+
messages=[
|
482
|
+
types.PromptMessage(
|
483
|
+
role="user",
|
484
|
+
content=types.TextContent(
|
485
|
+
type="text",
|
486
|
+
text=f"请在中国知网搜索{search_criteria}的文献,并总结相关研究成果。"
|
487
|
+
),
|
488
|
+
)
|
489
|
+
],
|
490
|
+
)
|
491
|
+
elif name == "summarize-notes":
|
492
|
+
style = (arguments or {}).get("style", "brief")
|
493
|
+
detail_prompt = "请提供详细分析。" if style == "detailed" else ""
|
494
|
+
|
495
|
+
return types.GetPromptResult(
|
496
|
+
description="总结所有笔记",
|
497
|
+
messages=[
|
498
|
+
types.PromptMessage(
|
499
|
+
role="user",
|
500
|
+
content=types.TextContent(
|
501
|
+
type="text",
|
502
|
+
text=f"以下是需要总结的笔记:{detail_prompt}\n\n"
|
503
|
+
+ "\n".join(
|
504
|
+
f"- {name}: {content}"
|
505
|
+
for name, content in notes.items()
|
506
|
+
),
|
507
|
+
),
|
508
|
+
)
|
509
|
+
],
|
510
|
+
)
|
511
|
+
|
512
|
+
raise ValueError(f"未知提示: {name}")
|
513
|
+
|
514
|
+
@server.list_tools()
|
515
|
+
async def handle_list_tools() -> list[types.Tool]:
|
516
|
+
"""列出可用工具"""
|
517
|
+
return [
|
518
|
+
types.Tool(
|
519
|
+
name="open-cnki",
|
520
|
+
description="打开中国知网搜索页面",
|
521
|
+
inputSchema={
|
522
|
+
"type": "object",
|
523
|
+
"properties": {},
|
524
|
+
"required": [],
|
525
|
+
},
|
526
|
+
),
|
527
|
+
types.Tool(
|
528
|
+
name="search-keywords",
|
529
|
+
description="在知网搜索关键词",
|
530
|
+
inputSchema={
|
531
|
+
"type": "object",
|
532
|
+
"properties": {
|
533
|
+
"keywords": {"type": "string", "description": "搜索关键词"},
|
534
|
+
},
|
535
|
+
"required": ["keywords"],
|
536
|
+
},
|
537
|
+
),
|
538
|
+
types.Tool(
|
539
|
+
name="add-note",
|
540
|
+
description="添加笔记",
|
541
|
+
inputSchema={
|
542
|
+
"type": "object",
|
543
|
+
"properties": {
|
544
|
+
"name": {"type": "string", "description": "笔记名称"},
|
545
|
+
"content": {"type": "string", "description": "笔记内容"},
|
546
|
+
},
|
547
|
+
"required": ["name", "content"],
|
548
|
+
},
|
549
|
+
),
|
550
|
+
types.Tool(
|
551
|
+
name="get-abstract-links",
|
552
|
+
description="获取最近一次搜索找到的论文摘要链接",
|
553
|
+
inputSchema={
|
554
|
+
"type": "object",
|
555
|
+
"properties": {},
|
556
|
+
"required": [],
|
557
|
+
},
|
558
|
+
)
|
559
|
+
]
|
560
|
+
|
561
|
+
@server.call_tool()
|
562
|
+
async def handle_call_tool(
|
563
|
+
name: str, arguments: dict | None
|
564
|
+
) -> list[types.TextContent | types.ImageContent | types.EmbeddedResource]:
|
565
|
+
"""处理工具执行请求"""
|
566
|
+
global current_url, page_content
|
567
|
+
|
568
|
+
if name == "open-cnki":
|
569
|
+
current_url = "https://kns.cnki.net/kns8s/search"
|
570
|
+
result = open_chrome(current_url)
|
571
|
+
if result is True:
|
572
|
+
return [
|
573
|
+
types.TextContent(
|
574
|
+
type="text",
|
575
|
+
text="已打开中国知网搜索页面。"
|
576
|
+
)
|
577
|
+
]
|
578
|
+
else:
|
579
|
+
return [
|
580
|
+
types.TextContent(
|
581
|
+
type="text",
|
582
|
+
text=f"打开中国知网时出错: {result}"
|
583
|
+
)
|
584
|
+
]
|
585
|
+
|
586
|
+
elif name == "search-keywords":
|
587
|
+
if not arguments:
|
588
|
+
raise ValueError("缺少参数")
|
589
|
+
|
590
|
+
keywords = arguments.get("keywords")
|
591
|
+
if not keywords:
|
592
|
+
raise ValueError("缺少关键词")
|
593
|
+
|
594
|
+
# 优先使用playwright进行搜索
|
595
|
+
if PLAYWRIGHT_AVAILABLE:
|
596
|
+
result = await search_with_playwright(keywords)
|
597
|
+
current_url = "https://kns.cnki.net/kns8s/search"
|
598
|
+
|
599
|
+
return [
|
600
|
+
types.TextContent(
|
601
|
+
type="text",
|
602
|
+
text=result
|
603
|
+
)
|
604
|
+
]
|
605
|
+
else:
|
606
|
+
# 如果没有playwright,回退到传统方式
|
607
|
+
result = search_with_direct_chrome(keywords)
|
608
|
+
current_url = "https://kns.cnki.net/kns8s/search"
|
609
|
+
|
610
|
+
return [
|
611
|
+
types.TextContent(
|
612
|
+
type="text",
|
613
|
+
text=f"{result}。如需自动搜索功能,请安装: uv add playwright"
|
614
|
+
)
|
615
|
+
]
|
616
|
+
|
617
|
+
elif name == "add-note":
|
618
|
+
if not arguments:
|
619
|
+
raise ValueError("缺少参数")
|
620
|
+
|
621
|
+
note_name = arguments.get("name")
|
622
|
+
content = arguments.get("content")
|
623
|
+
|
624
|
+
if not note_name or not content:
|
625
|
+
raise ValueError("缺少名称或内容")
|
626
|
+
|
627
|
+
# 更新服务器状态
|
628
|
+
notes[note_name] = content
|
629
|
+
|
630
|
+
# 通知客户端资源已更改
|
631
|
+
await server.request_context.session.send_resource_list_changed()
|
632
|
+
|
633
|
+
return [
|
634
|
+
types.TextContent(
|
635
|
+
type="text",
|
636
|
+
text=f"已添加笔记 '{note_name}': {content}"
|
637
|
+
)
|
638
|
+
]
|
639
|
+
|
640
|
+
elif name == "get-abstract-links":
|
641
|
+
if not page_content or "找到" not in page_content:
|
642
|
+
return [
|
643
|
+
types.TextContent(
|
644
|
+
type="text",
|
645
|
+
text="尚未执行搜索或未找到链接。请先使用search-keywords工具搜索。"
|
646
|
+
)
|
647
|
+
]
|
648
|
+
|
649
|
+
return [
|
650
|
+
types.TextContent(
|
651
|
+
type="text",
|
652
|
+
text=page_content
|
653
|
+
)
|
654
|
+
]
|
655
|
+
|
656
|
+
raise ValueError(f"未知工具: {name}")
|
657
|
+
|
658
|
+
async def find_and_count_abstract_links(page):
|
659
|
+
"""查找并统计包含article/abstract?v=的链接"""
|
660
|
+
try:
|
661
|
+
logger.debug("[DEBUG] 开始查找所有包含article/abstract?v=的链接")
|
662
|
+
|
663
|
+
# 等待确保页面完全加载
|
664
|
+
await asyncio.sleep(2)
|
665
|
+
|
666
|
+
# 查找所有链接
|
667
|
+
all_links = await page.query_selector_all('a[href*="article/abstract?v="]')
|
668
|
+
links_count = len(all_links)
|
669
|
+
|
670
|
+
logger.debug(f"[DEBUG] 找到{links_count}条包含article/abstract?v=的链接")
|
671
|
+
|
672
|
+
# 提取并记录每个链接的URL和文本
|
673
|
+
links_info = []
|
674
|
+
|
675
|
+
for i, link in enumerate(all_links):
|
676
|
+
href = await link.get_attribute('href')
|
677
|
+
|
678
|
+
links_info.append({
|
679
|
+
'index': i + 1,
|
680
|
+
'href': href
|
681
|
+
})
|
682
|
+
|
683
|
+
logger.debug(f"[DEBUG] 链接 {i+1}: {href}")
|
684
|
+
|
685
|
+
# 判断数量是否符合预期(50条)
|
686
|
+
if links_count == 50:
|
687
|
+
logger.debug("[DEBUG] 链接数量正好是50条,符合预期")
|
688
|
+
elif links_count < 50:
|
689
|
+
logger.debug(f"[DEBUG] 链接数量为{links_count}条,少于预期的50条")
|
690
|
+
else:
|
691
|
+
logger.debug(f"[DEBUG] 链接数量为{links_count}条,多于预期的50条")
|
692
|
+
|
693
|
+
# 存储结果 - 只包含编号和链接,不包含标题和连字符
|
694
|
+
global page_content
|
695
|
+
page_content = f"找到{links_count}条包含article/abstract?v=的链接\n\n" + "\n".join([
|
696
|
+
f"{link['index']}. {link['href']}" for link in links_info
|
697
|
+
])
|
698
|
+
|
699
|
+
return links_count
|
700
|
+
except Exception as e:
|
701
|
+
logger.debug(f"[DEBUG] 查找链接时出错: {str(e)}")
|
702
|
+
return 0
|
703
|
+
|
704
|
+
async def main():
|
705
|
+
"""主程序入口"""
|
706
|
+
# 使用stdin/stdout流运行服务器
|
707
|
+
async with mcp.server.stdio.stdio_server() as (read_stream, write_stream):
|
708
|
+
await server.run(
|
709
|
+
read_stream,
|
710
|
+
write_stream,
|
711
|
+
InitializationOptions(
|
712
|
+
server_name="cnks",
|
713
|
+
server_version="0.1.0",
|
714
|
+
capabilities=server.get_capabilities(
|
715
|
+
notification_options=NotificationOptions(),
|
716
|
+
experimental_capabilities={},
|
717
|
+
),
|
718
|
+
),
|
719
|
+
)
|
720
|
+
|
721
|
+
# 为符合README.md的要求,添加从FastMCP导出的接口
|
722
|
+
def create_fastmcp_server():
|
723
|
+
"""创建FastMCP服务器接口,符合README中的示例"""
|
724
|
+
try:
|
725
|
+
from mcp.server.fastmcp import FastMCP
|
726
|
+
fast_mcp = FastMCP("知网搜索")
|
727
|
+
|
728
|
+
@fast_mcp.tool()
|
729
|
+
def open_cnki_search():
|
730
|
+
"""打开中国知网搜索页面"""
|
731
|
+
return open_chrome("https://kns.cnki.net/kns8s/search")
|
732
|
+
|
733
|
+
@fast_mcp.tool()
|
734
|
+
async def search_keywords(keywords: str) -> str:
|
735
|
+
"""在知网搜索关键词"""
|
736
|
+
logger.debug("[DEBUG] 正在使用FastMCP的search_keywords函数")
|
737
|
+
if PLAYWRIGHT_AVAILABLE:
|
738
|
+
result = await search_with_playwright(keywords)
|
739
|
+
return result
|
740
|
+
else:
|
741
|
+
result = search_with_direct_chrome(keywords)
|
742
|
+
return f"{result}。如需自动搜索功能,请安装: uv add playwright"
|
743
|
+
|
744
|
+
@fast_mcp.tool()
|
745
|
+
def get_abstract_links() -> str:
|
746
|
+
"""获取最近一次搜索找到的论文摘要链接"""
|
747
|
+
if not page_content or "找到" not in page_content:
|
748
|
+
return "尚未执行搜索或未找到链接。请先使用search_keywords工具搜索。"
|
749
|
+
return page_content
|
750
|
+
|
751
|
+
@fast_mcp.resource("webpage://current")
|
752
|
+
def get_current_webpage() -> str:
|
753
|
+
"""获取当前网页内容"""
|
754
|
+
return get_page_content()
|
755
|
+
|
756
|
+
return fast_mcp
|
757
|
+
except ImportError:
|
758
|
+
logger.warning("警告: 无法导入FastMCP,请确保已安装最新版本的MCP")
|
759
|
+
return None
|
760
|
+
|
761
|
+
if __name__ == "__main__":
|
762
|
+
asyncio.run(main())
|