chatgpt-mirai-qq-bot-web-search 0.2.6__py3-none-any.whl → 0.2.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {chatgpt_mirai_qq_bot_web_search-0.2.6.dist-info → chatgpt_mirai_qq_bot_web_search-0.2.8.dist-info}/METADATA +1 -1
- {chatgpt_mirai_qq_bot_web_search-0.2.6.dist-info → chatgpt_mirai_qq_bot_web_search-0.2.8.dist-info}/RECORD +7 -7
- web_search/web_searcher.py +45 -27
- {chatgpt_mirai_qq_bot_web_search-0.2.6.dist-info → chatgpt_mirai_qq_bot_web_search-0.2.8.dist-info}/LICENSE +0 -0
- {chatgpt_mirai_qq_bot_web_search-0.2.6.dist-info → chatgpt_mirai_qq_bot_web_search-0.2.8.dist-info}/WHEEL +0 -0
- {chatgpt_mirai_qq_bot_web_search-0.2.6.dist-info → chatgpt_mirai_qq_bot_web_search-0.2.8.dist-info}/entry_points.txt +0 -0
- {chatgpt_mirai_qq_bot_web_search-0.2.6.dist-info → chatgpt_mirai_qq_bot_web_search-0.2.8.dist-info}/top_level.txt +0 -0
@@ -1,11 +1,11 @@
|
|
1
1
|
web_search/__init__.py,sha256=bpu1gpf9tq3mOAoaGMM-8S6GBw2GzRAG6ClHGlp-CVw,4607
|
2
2
|
web_search/blocks.py,sha256=QuXt3KMuY4hUW-ucleNYXFvW6YbUAB4Xu4m_SxdCd-U,9152
|
3
3
|
web_search/config.py,sha256=DhLiERBJR2V5Boglf7Aq9Rbc4vsvLIh67CrLDIPeqA0,398
|
4
|
-
web_search/web_searcher.py,sha256
|
4
|
+
web_search/web_searcher.py,sha256=-wNuPzG0oZAr9-ihI5EdAexB9QXLUvc6bGyoVkWTlvw,21814
|
5
5
|
web_search/example/roleplayWithWebSearch.yaml,sha256=C-dGy3z8gcRcmxzurssP-kPRLqMf1TYR-nnNUaJjISE,7468
|
6
|
-
chatgpt_mirai_qq_bot_web_search-0.2.
|
7
|
-
chatgpt_mirai_qq_bot_web_search-0.2.
|
8
|
-
chatgpt_mirai_qq_bot_web_search-0.2.
|
9
|
-
chatgpt_mirai_qq_bot_web_search-0.2.
|
10
|
-
chatgpt_mirai_qq_bot_web_search-0.2.
|
11
|
-
chatgpt_mirai_qq_bot_web_search-0.2.
|
6
|
+
chatgpt_mirai_qq_bot_web_search-0.2.8.dist-info/LICENSE,sha256=ILBn-G3jdarm2w8oOrLmXeJNU3czuJvVhDLBASWdhM8,34522
|
7
|
+
chatgpt_mirai_qq_bot_web_search-0.2.8.dist-info/METADATA,sha256=8L4yju3nxv1fAs4phYBFXJjNpsC1jIh1rhB5P9aiovI,1734
|
8
|
+
chatgpt_mirai_qq_bot_web_search-0.2.8.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
|
9
|
+
chatgpt_mirai_qq_bot_web_search-0.2.8.dist-info/entry_points.txt,sha256=o3kRDSdSmSdnCKlK6qS57aN0WpI4ab-Nxub2NwUrjf0,64
|
10
|
+
chatgpt_mirai_qq_bot_web_search-0.2.8.dist-info/top_level.txt,sha256=PoNm8MJYw_y8RTMaNlY0ePLoNHxVUAE2IHDuL5fFubI,11
|
11
|
+
chatgpt_mirai_qq_bot_web_search-0.2.8.dist-info/RECORD,,
|
web_search/web_searcher.py
CHANGED
@@ -31,7 +31,7 @@ class WebSearcher:
|
|
31
31
|
self.video_ids = self._load_video_ids()
|
32
32
|
self.search_engines = {
|
33
33
|
'bing': {
|
34
|
-
'url': 'https://
|
34
|
+
'url': 'https://cn.bing.com/search?q={}',
|
35
35
|
'selectors': ['.b_algo', '#b_results .b_algo', 'main .b_algo'],
|
36
36
|
'title_selector': 'h2',
|
37
37
|
'link_selector': 'h2 a',
|
@@ -72,7 +72,7 @@ class WebSearcher:
|
|
72
72
|
|
73
73
|
# 合并所有选项到一个字典
|
74
74
|
context_options = {
|
75
|
-
'headless':
|
75
|
+
'headless': False,
|
76
76
|
'chromium_sandbox': False,
|
77
77
|
'slow_mo': 50, # 减慢操作速度,更像人类
|
78
78
|
'args': [
|
@@ -163,9 +163,9 @@ class WebSearcher:
|
|
163
163
|
# 创建新标签页获取内容
|
164
164
|
page = await context.new_page()
|
165
165
|
try:
|
166
|
-
#
|
166
|
+
# 修改资源加载策略,允许加载必要的CSS
|
167
167
|
await page.route("**/*", lambda route: route.abort()
|
168
|
-
if route.request.resource_type in ['image', '
|
168
|
+
if route.request.resource_type in ['image', 'font', 'media'] # 移除 'stylesheet'
|
169
169
|
else route.continue_())
|
170
170
|
|
171
171
|
# 使用 domcontentloaded 而不是 networkidle
|
@@ -263,46 +263,64 @@ class WebSearcher:
|
|
263
263
|
|
264
264
|
# 使用搜索引擎特定的选择器
|
265
265
|
results = None
|
266
|
+
# 等待页面稳定
|
267
|
+
await asyncio.sleep(2) # 添加短暂延迟
|
266
268
|
|
267
269
|
# 对于Google,让页面有更多时间加载
|
268
270
|
if engine == 'google':
|
269
271
|
await self.simulate_human_scroll(page)
|
270
272
|
|
273
|
+
selector_timeout = 5000
|
271
274
|
for selector in engine_config['selectors']:
|
272
275
|
try:
|
273
276
|
logger.info(f"Trying selector: {selector}")
|
274
|
-
await page.wait_for_selector(selector, timeout=
|
277
|
+
await page.wait_for_selector(selector, timeout=selector_timeout) # 增加等待时间
|
278
|
+
selector_timeout = 500
|
275
279
|
results = await page.query_selector_all(selector)
|
276
280
|
if results and len(results) > 0:
|
277
281
|
logger.info(f"Found {len(results)} results with selector {selector}")
|
278
282
|
break
|
279
283
|
except Exception as e:
|
284
|
+
selector_timeout = 500
|
280
285
|
logger.warning(f"Selector {selector} failed: {e}")
|
281
286
|
continue
|
282
287
|
|
283
288
|
if not results:
|
284
|
-
#
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
|
290
|
-
|
291
|
-
|
292
|
-
|
293
|
-
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
|
298
|
-
|
299
|
-
|
300
|
-
|
301
|
-
|
302
|
-
|
303
|
-
|
304
|
-
|
305
|
-
|
289
|
+
# 添加重试机制
|
290
|
+
retry_count = 0
|
291
|
+
while not results and retry_count < max_results:
|
292
|
+
logger.info(f"Retrying search, attempt {retry_count + 1}/{max_results}")
|
293
|
+
# 刷新页面重试
|
294
|
+
await page.goto(
|
295
|
+
engine_config['url'].format(encoded_query),
|
296
|
+
wait_until='load',
|
297
|
+
timeout=timeout * 1000
|
298
|
+
)
|
299
|
+
await self.simulate_human_scroll(page)
|
300
|
+
|
301
|
+
# 重新尝试所有选择器
|
302
|
+
selector_timeout = 5000
|
303
|
+
for selector in engine_config['selectors']:
|
304
|
+
try:
|
305
|
+
logger.info(f"Retrying selector: {selector}")
|
306
|
+
await page.wait_for_selector(selector, timeout=selector_timeout)
|
307
|
+
selector_timeout = 500
|
308
|
+
results = await page.query_selector_all(selector)
|
309
|
+
if results and len(results) > 0:
|
310
|
+
logger.info(f"Found {len(results)} results with selector {selector} on retry {retry_count + 1}")
|
311
|
+
break
|
312
|
+
except Exception as e:
|
313
|
+
selector_timeout = 500
|
314
|
+
logger.warning(f"Selector {selector} failed on retry {retry_count + 1}: {e}")
|
315
|
+
continue
|
316
|
+
|
317
|
+
retry_count += 1
|
318
|
+
|
319
|
+
|
320
|
+
# 如果所有重试都失败了,才返回错误
|
321
|
+
if not results:
|
322
|
+
logger.error("No search results found after all retries")
|
323
|
+
return "搜索结果加载失败"
|
306
324
|
|
307
325
|
logger.info(f"Found {len(results)} search results")
|
308
326
|
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|