cnks 0.2.5__py3-none-any.whl → 0.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cnks-0.3.1.dist-info/METADATA +101 -0
- cnks-0.3.1.dist-info/RECORD +17 -0
- cnks-0.3.1.dist-info/entry_points.txt +5 -0
- src/ThisIsAServerSample.py +377 -0
- src/__init__.py +7 -0
- src/cache.py +451 -0
- src/citzer.py +868 -0
- src/click50.py +527 -0
- src/client.py +135 -0
- src/cssci.py +267 -0
- src/extractlink.py +262 -0
- src/ifverify.py +134 -0
- src/main.py +70 -0
- src/searcher.py +767 -0
- src/server.py +487 -0
- src/worker.py +219 -0
- cnks/__init__.py +0 -50
- cnks/server.py +0 -1876
- cnks-0.2.5.dist-info/METADATA +0 -181
- cnks-0.2.5.dist-info/RECORD +0 -6
- cnks-0.2.5.dist-info/entry_points.txt +0 -2
- {cnks-0.2.5.dist-info → cnks-0.3.1.dist-info}/WHEEL +0 -0
src/click50.py
ADDED
@@ -0,0 +1,527 @@
|
|
1
|
+
#!/usr/bin/env python
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
|
4
|
+
"""
|
5
|
+
结果显示条数设置模块 (Page Results Display Count Selector)
|
6
|
+
|
7
|
+
这个模块负责设置知网搜索结果页面中每页显示的条目数量。
|
8
|
+
在搜索结果加载后,通过模拟用户点击设置每页显示50条结果。
|
9
|
+
|
10
|
+
主要职责:
|
11
|
+
1. 定位页面中的显示数量控制区域
|
12
|
+
2. 点击下拉菜单
|
13
|
+
3. 选择"50"条每页选项
|
14
|
+
"""
|
15
|
+
|
16
|
+
import logging
|
17
|
+
import traceback
|
18
|
+
import asyncio
|
19
|
+
from typing import Dict, Any
|
20
|
+
|
21
|
+
# 获取logger
|
22
|
+
logger = logging.getLogger("cnks.click50")
|
23
|
+
|
24
|
+
async def set_results_per_page(page, attempts=5) -> Dict[str, Any]:
|
25
|
+
"""
|
26
|
+
在搜索结果页面中设置每页显示50条结果
|
27
|
+
|
28
|
+
Args:
|
29
|
+
page: Playwright页面对象
|
30
|
+
attempts: 尝试次数,默认5次
|
31
|
+
|
32
|
+
Returns:
|
33
|
+
Dict: 包含操作结果的字典,包括是否成功、消息等
|
34
|
+
"""
|
35
|
+
logger.info("开始设置每页显示50条结果")
|
36
|
+
result = {
|
37
|
+
"success": False,
|
38
|
+
"message": "",
|
39
|
+
"setting_applied": False
|
40
|
+
}
|
41
|
+
|
42
|
+
# 循环尝试,最多尝试指定次数
|
43
|
+
for attempt in range(attempts):
|
44
|
+
try:
|
45
|
+
logger.info(f"第{attempt+1}次尝试设置每页显示50条结果")
|
46
|
+
|
47
|
+
# 1. 使用JavaScript查找并识别下拉菜单元素
|
48
|
+
dropdown_js = """
|
49
|
+
() => {
|
50
|
+
// 尝试多种可能的选择器来查找显示数量控制区域
|
51
|
+
const dropdownSelectors = [
|
52
|
+
// 直接通过显示文本相关选择器
|
53
|
+
'#id_grid_display_num',
|
54
|
+
'.page-show-count',
|
55
|
+
'div[id*="display_num"]',
|
56
|
+
'div[id*="pageSize"]',
|
57
|
+
'div[class*="page-show"]',
|
58
|
+
'div[class*="perPage"]',
|
59
|
+
'div[class*="sort"]', // 通常在排序区域附近
|
60
|
+
|
61
|
+
// 更通用的下拉菜单选择器
|
62
|
+
'div.dropdown',
|
63
|
+
'select.form-control',
|
64
|
+
'.dropdown-toggle'
|
65
|
+
];
|
66
|
+
|
67
|
+
// 通过文本内容查找
|
68
|
+
const textBasedSelectors = [
|
69
|
+
'span:has-text("显示:")',
|
70
|
+
'div:has-text("每页显示")'
|
71
|
+
];
|
72
|
+
|
73
|
+
// 尝试找到下拉菜单元素
|
74
|
+
for (const selector of [...dropdownSelectors, ...textBasedSelectors]) {
|
75
|
+
const element = document.querySelector(selector);
|
76
|
+
if (element) {
|
77
|
+
// 检查是否含有数字,如显示后面的数字
|
78
|
+
if (element.textContent && /\\d+/.test(element.textContent)) {
|
79
|
+
return {
|
80
|
+
found: true,
|
81
|
+
selector: selector,
|
82
|
+
id: element.id || "",
|
83
|
+
text: element.textContent.trim(),
|
84
|
+
tagName: element.tagName.toLowerCase(),
|
85
|
+
hasDropdown: !!element.querySelector('.dropdown-menu, select, option')
|
86
|
+
};
|
87
|
+
}
|
88
|
+
}
|
89
|
+
}
|
90
|
+
|
91
|
+
// 查找可能包含"显示:"文本的任何元素
|
92
|
+
const anyDisplayElements = document.evaluate(
|
93
|
+
'//*[contains(text(), "显示:") or contains(text(), "每页") or contains(text(), "条目")]',
|
94
|
+
document,
|
95
|
+
null,
|
96
|
+
XPathResult.ORDERED_NODE_SNAPSHOT_TYPE,
|
97
|
+
null
|
98
|
+
);
|
99
|
+
|
100
|
+
for (let i = 0; i < anyDisplayElements.snapshotLength; i++) {
|
101
|
+
const element = anyDisplayElements.snapshotItem(i);
|
102
|
+
return {
|
103
|
+
found: true,
|
104
|
+
xpath: true,
|
105
|
+
text: element.textContent.trim(),
|
106
|
+
tagName: element.tagName.toLowerCase()
|
107
|
+
};
|
108
|
+
}
|
109
|
+
|
110
|
+
// 尝试查找包含具体数字选项的元素
|
111
|
+
const options50 = document.querySelectorAll('li[data-val="50"], option[value="50"], a:has-text("50")');
|
112
|
+
if (options50.length > 0) {
|
113
|
+
return {
|
114
|
+
found: true,
|
115
|
+
option50Found: true,
|
116
|
+
count: options50.length
|
117
|
+
};
|
118
|
+
}
|
119
|
+
|
120
|
+
return { found: false };
|
121
|
+
}
|
122
|
+
"""
|
123
|
+
|
124
|
+
display_info = await page.evaluate(dropdown_js)
|
125
|
+
logger.info(f"显示控制区域信息: {display_info}")
|
126
|
+
|
127
|
+
if not display_info.get('found', False):
|
128
|
+
logger.warning("未找到显示控制区域,尝试使用备用方法")
|
129
|
+
|
130
|
+
# 2. 找到特定的data-val="50"的元素进行直接点击
|
131
|
+
direct_click_js = """
|
132
|
+
() => {
|
133
|
+
// 直接查找并点击data-val为50的元素
|
134
|
+
const item50 = document.querySelector('li[data-val="50"], a[onclick*="50"]');
|
135
|
+
if (item50) {
|
136
|
+
const rect = item50.getBoundingClientRect();
|
137
|
+
return {
|
138
|
+
found: true,
|
139
|
+
x: rect.left + rect.width/2,
|
140
|
+
y: rect.top + rect.height/2,
|
141
|
+
text: item50.textContent.trim(),
|
142
|
+
tagName: item50.tagName.toLowerCase()
|
143
|
+
};
|
144
|
+
}
|
145
|
+
return { found: false };
|
146
|
+
}
|
147
|
+
"""
|
148
|
+
|
149
|
+
item50_info = await page.evaluate(direct_click_js)
|
150
|
+
if item50_info.get('found', False):
|
151
|
+
logger.info(f"找到50选项元素: {item50_info}")
|
152
|
+
await page.mouse.click(item50_info['x'], item50_info['y'])
|
153
|
+
logger.info("已直接点击50选项元素")
|
154
|
+
|
155
|
+
# 等待页面更新
|
156
|
+
await page.wait_for_load_state("networkidle", timeout=10000)
|
157
|
+
result["success"] = True
|
158
|
+
result["setting_applied"] = True
|
159
|
+
result["message"] = "成功直接点击50选项"
|
160
|
+
return result
|
161
|
+
|
162
|
+
# 3. 如果找到显示控制区域,先点击它打开下拉菜单
|
163
|
+
# 查找与"显示:"相关的下拉框元素
|
164
|
+
show_dropdown_js = """
|
165
|
+
() => {
|
166
|
+
// 查找下拉菜单触发元素
|
167
|
+
const dropdownTriggers = [
|
168
|
+
// ID选择器
|
169
|
+
'#id_grid_display_num',
|
170
|
+
'#pageSize',
|
171
|
+
|
172
|
+
// 带有显示字样的span或div
|
173
|
+
'span:has-text("显示")',
|
174
|
+
'div.page-show-count',
|
175
|
+
'div[class*="sort"]',
|
176
|
+
|
177
|
+
// 通过父子关系查找
|
178
|
+
'.toolbar-opt span',
|
179
|
+
'.sort-default',
|
180
|
+
'.dropdown-toggle'
|
181
|
+
];
|
182
|
+
|
183
|
+
// 尝试查找下拉触发器
|
184
|
+
for (const selector of dropdownTriggers) {
|
185
|
+
const element = document.querySelector(selector);
|
186
|
+
if (element) {
|
187
|
+
const rect = element.getBoundingClientRect();
|
188
|
+
return {
|
189
|
+
found: true,
|
190
|
+
selector: selector,
|
191
|
+
x: rect.left + rect.width/2,
|
192
|
+
y: rect.top + rect.height/2,
|
193
|
+
text: element.textContent.trim(),
|
194
|
+
tagName: element.tagName.toLowerCase()
|
195
|
+
};
|
196
|
+
}
|
197
|
+
}
|
198
|
+
|
199
|
+
// 使用XPath查找更复杂的模式
|
200
|
+
const displayElements = document.evaluate(
|
201
|
+
'//*[contains(text(), "显示")]/ancestor::*[contains(@class, "dropdown") or contains(@class, "sort")]',
|
202
|
+
document,
|
203
|
+
null,
|
204
|
+
XPathResult.ORDERED_NODE_SNAPSHOT_TYPE,
|
205
|
+
null
|
206
|
+
);
|
207
|
+
|
208
|
+
if (displayElements.snapshotLength > 0) {
|
209
|
+
const element = displayElements.snapshotItem(0);
|
210
|
+
const rect = element.getBoundingClientRect();
|
211
|
+
return {
|
212
|
+
found: true,
|
213
|
+
x: rect.left + rect.width/2,
|
214
|
+
y: rect.top + rect.height/2,
|
215
|
+
text: element.textContent.trim(),
|
216
|
+
xpath: true
|
217
|
+
};
|
218
|
+
}
|
219
|
+
|
220
|
+
return { found: false };
|
221
|
+
}
|
222
|
+
"""
|
223
|
+
|
224
|
+
dropdown_info = await page.evaluate(show_dropdown_js)
|
225
|
+
logger.info(f"下拉菜单触发元素信息: {dropdown_info}")
|
226
|
+
|
227
|
+
if dropdown_info.get('found', False):
|
228
|
+
# 点击下拉菜单触发元素
|
229
|
+
logger.info(f"点击下拉菜单触发元素: {dropdown_info.get('text', '')}")
|
230
|
+
await page.mouse.click(dropdown_info['x'], dropdown_info['y'])
|
231
|
+
|
232
|
+
# 等待下拉菜单展开
|
233
|
+
await asyncio.sleep(1)
|
234
|
+
|
235
|
+
# 4. 查找并点击"50"选项
|
236
|
+
option50_js = """
|
237
|
+
() => {
|
238
|
+
// 查找值为50的选项
|
239
|
+
const options = [
|
240
|
+
'li[data-val="50"]',
|
241
|
+
'option[value="50"]',
|
242
|
+
'a:has-text("50")',
|
243
|
+
'.dropdown-menu li:has-text("50")',
|
244
|
+
'ul[class*="sort-list"] li:nth-child(3)',
|
245
|
+
'.dropdown-item:has-text("50")'
|
246
|
+
];
|
247
|
+
|
248
|
+
for (const selector of options) {
|
249
|
+
const option = document.querySelector(selector);
|
250
|
+
if (option) {
|
251
|
+
const rect = option.getBoundingClientRect();
|
252
|
+
return {
|
253
|
+
found: true,
|
254
|
+
selector: selector,
|
255
|
+
x: rect.left + rect.width/2,
|
256
|
+
y: rect.top + rect.height/2,
|
257
|
+
text: option.textContent.trim(),
|
258
|
+
tagName: option.tagName.toLowerCase()
|
259
|
+
};
|
260
|
+
}
|
261
|
+
}
|
262
|
+
|
263
|
+
// 使用XPath查找包含50的选项
|
264
|
+
const option50Elements = document.evaluate(
|
265
|
+
'//li[text()="50" or contains(text(), "50条")]',
|
266
|
+
document,
|
267
|
+
null,
|
268
|
+
XPathResult.ORDERED_NODE_SNAPSHOT_TYPE,
|
269
|
+
null
|
270
|
+
);
|
271
|
+
|
272
|
+
if (option50Elements.snapshotLength > 0) {
|
273
|
+
const option = option50Elements.snapshotItem(0);
|
274
|
+
const rect = option.getBoundingClientRect();
|
275
|
+
return {
|
276
|
+
found: true,
|
277
|
+
x: rect.left + rect.width/2,
|
278
|
+
y: rect.top + rect.height/2,
|
279
|
+
text: option.textContent.trim(),
|
280
|
+
xpath: true
|
281
|
+
};
|
282
|
+
}
|
283
|
+
|
284
|
+
return { found: false };
|
285
|
+
}
|
286
|
+
"""
|
287
|
+
|
288
|
+
option50_info = await page.evaluate(option50_js)
|
289
|
+
logger.info(f"50选项信息: {option50_info}")
|
290
|
+
|
291
|
+
if option50_info.get('found', False):
|
292
|
+
# 点击50选项
|
293
|
+
logger.info(f"点击50选项: {option50_info.get('text', '')}")
|
294
|
+
await page.mouse.click(option50_info['x'], option50_info['y'])
|
295
|
+
|
296
|
+
# 等待页面更新
|
297
|
+
await page.wait_for_load_state("networkidle", timeout=10000)
|
298
|
+
|
299
|
+
result["success"] = True
|
300
|
+
result["setting_applied"] = True
|
301
|
+
result["message"] = "成功设置每页显示50条结果"
|
302
|
+
return result
|
303
|
+
else:
|
304
|
+
logger.warning("未找到50选项")
|
305
|
+
else:
|
306
|
+
logger.warning("未找到下拉菜单触发元素")
|
307
|
+
|
308
|
+
# 如果找不到标准元素,尝试使用更通用的方法
|
309
|
+
# 5. 尝试基于图片中看到的HTML结构进行精确定位
|
310
|
+
specific_structure_js = """
|
311
|
+
() => {
|
312
|
+
try {
|
313
|
+
// 从您图片中看到的HTML结构精确定位
|
314
|
+
const dropdown = document.querySelector('div[id="id_grid_display_num"]');
|
315
|
+
if (dropdown) {
|
316
|
+
const dropdownRect = dropdown.getBoundingClientRect();
|
317
|
+
const dropdownInfo = {
|
318
|
+
found: true,
|
319
|
+
dropdown: {
|
320
|
+
x: dropdownRect.left + dropdownRect.width/2,
|
321
|
+
y: dropdownRect.top + dropdownRect.height/2,
|
322
|
+
text: dropdown.textContent.trim()
|
323
|
+
}
|
324
|
+
};
|
325
|
+
|
326
|
+
// 尝试找到data-val="50"的li元素
|
327
|
+
const option50 = document.querySelector('li[data-val="50"]');
|
328
|
+
if (option50) {
|
329
|
+
const optionRect = option50.getBoundingClientRect();
|
330
|
+
dropdownInfo.option50 = {
|
331
|
+
found: true,
|
332
|
+
x: optionRect.left + optionRect.width/2,
|
333
|
+
y: optionRect.top + optionRect.height/2,
|
334
|
+
text: option50.textContent.trim()
|
335
|
+
};
|
336
|
+
}
|
337
|
+
|
338
|
+
return dropdownInfo;
|
339
|
+
}
|
340
|
+
|
341
|
+
// 尝试找到包含"javascript:void(0)"和"50"的a元素
|
342
|
+
const javascriptLink = document.querySelector('a[href*="javascript:void(0)"][data-val="50"]');
|
343
|
+
if (javascriptLink) {
|
344
|
+
const rect = javascriptLink.getBoundingClientRect();
|
345
|
+
return {
|
346
|
+
found: true,
|
347
|
+
jsLink: {
|
348
|
+
x: rect.left + rect.width/2,
|
349
|
+
y: rect.top + rect.height/2,
|
350
|
+
text: javascriptLink.textContent.trim()
|
351
|
+
}
|
352
|
+
};
|
353
|
+
}
|
354
|
+
} catch (e) {
|
355
|
+
return { found: false, error: e.toString() };
|
356
|
+
}
|
357
|
+
|
358
|
+
return { found: false };
|
359
|
+
}
|
360
|
+
"""
|
361
|
+
|
362
|
+
specific_info = await page.evaluate(specific_structure_js)
|
363
|
+
logger.info(f"特定结构信息: {specific_info}")
|
364
|
+
|
365
|
+
if specific_info.get('found', False):
|
366
|
+
# 如果找到了下拉菜单
|
367
|
+
if 'dropdown' in specific_info:
|
368
|
+
logger.info(f"点击特定下拉菜单: {specific_info['dropdown'].get('text', '')}")
|
369
|
+
await page.mouse.click(
|
370
|
+
specific_info['dropdown']['x'],
|
371
|
+
specific_info['dropdown']['y']
|
372
|
+
)
|
373
|
+
|
374
|
+
# 等待下拉菜单展开
|
375
|
+
await asyncio.sleep(1)
|
376
|
+
|
377
|
+
# 如果找到了50选项
|
378
|
+
if 'option50' in specific_info and specific_info['option50'].get('found', False):
|
379
|
+
logger.info(f"点击特定50选项: {specific_info['option50'].get('text', '')}")
|
380
|
+
await page.mouse.click(
|
381
|
+
specific_info['option50']['x'],
|
382
|
+
specific_info['option50']['y']
|
383
|
+
)
|
384
|
+
|
385
|
+
# 等待页面更新
|
386
|
+
await page.wait_for_load_state("networkidle", timeout=10000)
|
387
|
+
|
388
|
+
result["success"] = True
|
389
|
+
result["setting_applied"] = True
|
390
|
+
result["message"] = "通过特定结构成功设置每页显示50条结果"
|
391
|
+
return result
|
392
|
+
|
393
|
+
# 如果找到了javascript链接
|
394
|
+
if 'jsLink' in specific_info:
|
395
|
+
logger.info(f"点击JavaScript链接: {specific_info['jsLink'].get('text', '')}")
|
396
|
+
await page.mouse.click(
|
397
|
+
specific_info['jsLink']['x'],
|
398
|
+
specific_info['jsLink']['y']
|
399
|
+
)
|
400
|
+
|
401
|
+
# 等待页面更新
|
402
|
+
await page.wait_for_load_state("networkidle", timeout=10000)
|
403
|
+
|
404
|
+
result["success"] = True
|
405
|
+
result["setting_applied"] = True
|
406
|
+
result["message"] = "通过JavaScript链接成功设置每页显示50条结果"
|
407
|
+
return result
|
408
|
+
|
409
|
+
# 6. 最后尝试直接执行JavaScript修改页面
|
410
|
+
logger.info("尝试使用JavaScript直接修改每页显示数量")
|
411
|
+
direct_js = """
|
412
|
+
() => {
|
413
|
+
try {
|
414
|
+
// 尝试各种可能的方式修改页面显示设置
|
415
|
+
|
416
|
+
// 方法1: 尝试触发点击事件
|
417
|
+
const option50 = document.querySelector('li[data-val="50"], a[data-val="50"]');
|
418
|
+
if (option50) {
|
419
|
+
option50.click();
|
420
|
+
return { success: true, method: "click" };
|
421
|
+
}
|
422
|
+
|
423
|
+
// 方法2: 尝试执行可能的JavaScript函数
|
424
|
+
if (typeof changePageSize === 'function') {
|
425
|
+
changePageSize(50);
|
426
|
+
return { success: true, method: "changePageSize" };
|
427
|
+
}
|
428
|
+
|
429
|
+
if (typeof setPageSize === 'function') {
|
430
|
+
setPageSize(50);
|
431
|
+
return { success: true, method: "setPageSize" };
|
432
|
+
}
|
433
|
+
|
434
|
+
if (typeof setDisplayCount === 'function') {
|
435
|
+
setDisplayCount(50);
|
436
|
+
return { success: true, method: "setDisplayCount" };
|
437
|
+
}
|
438
|
+
|
439
|
+
// 方法3: 尝试触发有特定值的元素的点击事件
|
440
|
+
const clickables = document.querySelectorAll('[onclick*="50"]');
|
441
|
+
if (clickables.length > 0) {
|
442
|
+
clickables[0].click();
|
443
|
+
return { success: true, method: "onclick", element: clickables[0].tagName };
|
444
|
+
}
|
445
|
+
|
446
|
+
return { success: false };
|
447
|
+
} catch (e) {
|
448
|
+
return { success: false, error: e.toString() };
|
449
|
+
}
|
450
|
+
}
|
451
|
+
"""
|
452
|
+
|
453
|
+
direct_result = await page.evaluate(direct_js)
|
454
|
+
logger.info(f"直接JavaScript执行结果: {direct_result}")
|
455
|
+
|
456
|
+
if direct_result.get('success', False):
|
457
|
+
# 等待页面更新
|
458
|
+
await page.wait_for_load_state("networkidle", timeout=10000)
|
459
|
+
|
460
|
+
result["success"] = True
|
461
|
+
result["setting_applied"] = True
|
462
|
+
result["message"] = f"使用JavaScript方法'{direct_result.get('method')}'成功设置每页显示条数"
|
463
|
+
return result
|
464
|
+
|
465
|
+
# 如果当前尝试失败,等待一秒再试
|
466
|
+
logger.warning(f"第{attempt+1}次尝试失败,等待后重试")
|
467
|
+
await asyncio.sleep(1)
|
468
|
+
|
469
|
+
except Exception as e:
|
470
|
+
error_msg = f"设置每页显示条数时出错: {str(e)}"
|
471
|
+
logger.warning(error_msg)
|
472
|
+
logger.warning(traceback.format_exc())
|
473
|
+
|
474
|
+
# 如果发生错误,等待一秒再试
|
475
|
+
await asyncio.sleep(1)
|
476
|
+
|
477
|
+
# 如果所有尝试都失败
|
478
|
+
result["success"] = False
|
479
|
+
result["message"] = "多次尝试后未能设置每页显示50条结果"
|
480
|
+
return result
|
481
|
+
|
482
|
+
# 用于独立测试的函数
|
483
|
+
async def test_set_results_per_page(page_url):
|
484
|
+
"""
|
485
|
+
独立测试设置每页显示条数功能
|
486
|
+
|
487
|
+
Args:
|
488
|
+
page_url: 要测试的页面URL
|
489
|
+
"""
|
490
|
+
from playwright.async_api import async_playwright
|
491
|
+
|
492
|
+
async with async_playwright() as p:
|
493
|
+
browser = await p.chromium.launch(headless=False)
|
494
|
+
page = await browser.new_page()
|
495
|
+
|
496
|
+
try:
|
497
|
+
await page.goto(page_url, wait_until="domcontentloaded")
|
498
|
+
await page.wait_for_load_state("networkidle")
|
499
|
+
|
500
|
+
result = await set_results_per_page(page)
|
501
|
+
print(f"测试结果: {result}")
|
502
|
+
|
503
|
+
# 截图保存结果
|
504
|
+
await page.screenshot(path="results_per_page_test.png")
|
505
|
+
print("已保存测试结果截图")
|
506
|
+
|
507
|
+
# 等待查看结果
|
508
|
+
await asyncio.sleep(5)
|
509
|
+
|
510
|
+
finally:
|
511
|
+
await browser.close()
|
512
|
+
|
513
|
+
# 如果直接运行脚本
|
514
|
+
if __name__ == "__main__":
|
515
|
+
import sys
|
516
|
+
|
517
|
+
# 配置日志
|
518
|
+
logging.basicConfig(
|
519
|
+
level=logging.INFO,
|
520
|
+
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
|
521
|
+
)
|
522
|
+
|
523
|
+
# 获取命令行参数或使用默认URL
|
524
|
+
url = sys.argv[1] if len(sys.argv) > 1 else "https://kns.cnki.net/kns8s/search"
|
525
|
+
|
526
|
+
print(f"测试在页面 {url} 上设置每页显示50条结果")
|
527
|
+
asyncio.run(test_set_results_per_page(url))
|
src/client.py
ADDED
@@ -0,0 +1,135 @@
|
|
1
|
+
#!/usr/bin/env python
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
|
4
|
+
"""
|
5
|
+
MCP Client Module
|
6
|
+
|
7
|
+
该模块提供一个简单的命令行客户端,用于与CNKS MCP服务器交互。
|
8
|
+
使用子进程和stdin/stdout方式与服务器通信。
|
9
|
+
"""
|
10
|
+
|
11
|
+
import argparse
|
12
|
+
import json
|
13
|
+
import logging
|
14
|
+
import sys
|
15
|
+
import time
|
16
|
+
import asyncio
|
17
|
+
import subprocess
|
18
|
+
import os
|
19
|
+
from typing import Dict, Any, Optional
|
20
|
+
|
21
|
+
# 配置客户端基本日志
|
22
|
+
logging.basicConfig(
|
23
|
+
level=logging.INFO,
|
24
|
+
format="[%(asctime)s] [%(levelname)s] [客户端] %(message)s",
|
25
|
+
handlers=[logging.StreamHandler(sys.stdout)] # 输出日志到控制台
|
26
|
+
)
|
27
|
+
logger = logging.getLogger("mcp.client")
|
28
|
+
|
29
|
+
async def run_client(keyword: str, force_refresh: bool = False, timeout: float = 600.0):
|
30
|
+
"""
|
31
|
+
运行客户端并调用服务器工具
|
32
|
+
|
33
|
+
通过TCP连接到本地服务器并发送请求
|
34
|
+
"""
|
35
|
+
logger.info(f"尝试连接到CNKS服务器...")
|
36
|
+
|
37
|
+
reader = None
|
38
|
+
writer = None
|
39
|
+
|
40
|
+
try:
|
41
|
+
# 连接到服务器
|
42
|
+
reader, writer = await asyncio.open_connection('127.0.0.1', 8000)
|
43
|
+
logger.info("已连接到服务器")
|
44
|
+
|
45
|
+
# 构建请求
|
46
|
+
request = {
|
47
|
+
"type": "tool_call",
|
48
|
+
"tool": "search_keyword",
|
49
|
+
"params": {
|
50
|
+
"keyword": keyword,
|
51
|
+
"force_refresh": force_refresh
|
52
|
+
}
|
53
|
+
}
|
54
|
+
|
55
|
+
# 发送请求
|
56
|
+
logger.info(f"发送请求: {json.dumps(request)}")
|
57
|
+
writer.write(json.dumps(request).encode() + b"\n")
|
58
|
+
await writer.drain()
|
59
|
+
|
60
|
+
# 等待响应
|
61
|
+
start_time = time.time()
|
62
|
+
logger.info(f"等待响应(最长 {timeout} 秒)...")
|
63
|
+
|
64
|
+
try:
|
65
|
+
response_line = await asyncio.wait_for(reader.readline(), timeout=timeout)
|
66
|
+
if response_line:
|
67
|
+
response_data = json.loads(response_line.decode().strip())
|
68
|
+
|
69
|
+
processing_time = time.time() - start_time
|
70
|
+
logger.info(f"收到响应,处理耗时: {processing_time:.2f}秒")
|
71
|
+
|
72
|
+
# 检查状态
|
73
|
+
if response_data.get("status") == "error":
|
74
|
+
logger.error(f"服务器返回错误: {response_data.get('message', '未知错误')}")
|
75
|
+
return
|
76
|
+
|
77
|
+
# 打印响应
|
78
|
+
print(json.dumps(response_data, indent=2, ensure_ascii=False))
|
79
|
+
|
80
|
+
# 尝试提取统计信息(如果有)
|
81
|
+
result = response_data.get("result", {})
|
82
|
+
processed = result.get("processed_count", 0)
|
83
|
+
unprocessed = result.get("unprocessed_count", 0)
|
84
|
+
newly_processed = result.get("newly_processed", 0)
|
85
|
+
|
86
|
+
if processed or unprocessed or newly_processed:
|
87
|
+
logger.info(f"已处理: {processed}, 未处理: {unprocessed}, 新处理: {newly_processed}")
|
88
|
+
else:
|
89
|
+
logger.error("服务器关闭连接,未收到响应")
|
90
|
+
|
91
|
+
except asyncio.TimeoutError:
|
92
|
+
logger.error(f"等待响应超时({timeout}秒)")
|
93
|
+
|
94
|
+
except ConnectionRefusedError:
|
95
|
+
logger.error("无法连接到服务器。请确保服务器已运行。")
|
96
|
+
print("错误: 无法连接到服务器。请先运行服务器(python src/server.py)")
|
97
|
+
except Exception as e:
|
98
|
+
logger.error(f"客户端操作过程中发生错误: {str(e)}")
|
99
|
+
import traceback
|
100
|
+
logger.error(traceback.format_exc())
|
101
|
+
finally:
|
102
|
+
# 关闭连接
|
103
|
+
if writer:
|
104
|
+
writer.close()
|
105
|
+
try:
|
106
|
+
await writer.wait_closed()
|
107
|
+
except:
|
108
|
+
pass
|
109
|
+
|
110
|
+
def main():
|
111
|
+
"""
|
112
|
+
客户端的主函数。解析命令行参数
|
113
|
+
并与CNKS服务器交互。
|
114
|
+
"""
|
115
|
+
parser = argparse.ArgumentParser(description="向CNKS服务器发送搜索请求。")
|
116
|
+
parser.add_argument("keyword", help="要搜索的关键词。")
|
117
|
+
parser.add_argument(
|
118
|
+
"--timeout",
|
119
|
+
type=float,
|
120
|
+
default=600.0, # 默认超时600秒
|
121
|
+
help="等待服务器响应的超时时间(秒)。"
|
122
|
+
)
|
123
|
+
parser.add_argument(
|
124
|
+
"--force-refresh",
|
125
|
+
action="store_true",
|
126
|
+
help="强制刷新缓存,忽略已缓存的内容。"
|
127
|
+
)
|
128
|
+
|
129
|
+
args = parser.parse_args()
|
130
|
+
|
131
|
+
# 运行异步客户端
|
132
|
+
asyncio.run(run_client(args.keyword, args.force_refresh, args.timeout))
|
133
|
+
|
134
|
+
if __name__ == "__main__":
|
135
|
+
main()
|