chatgpt-mirai-qq-bot-web-search 0.2.7__py3-none-any.whl → 0.2.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {chatgpt_mirai_qq_bot_web_search-0.2.7.dist-info → chatgpt_mirai_qq_bot_web_search-0.2.9.dist-info}/METADATA +1 -1
- {chatgpt_mirai_qq_bot_web_search-0.2.7.dist-info → chatgpt_mirai_qq_bot_web_search-0.2.9.dist-info}/RECORD +7 -7
- web_search/web_searcher.py +18 -10
- {chatgpt_mirai_qq_bot_web_search-0.2.7.dist-info → chatgpt_mirai_qq_bot_web_search-0.2.9.dist-info}/LICENSE +0 -0
- {chatgpt_mirai_qq_bot_web_search-0.2.7.dist-info → chatgpt_mirai_qq_bot_web_search-0.2.9.dist-info}/WHEEL +0 -0
- {chatgpt_mirai_qq_bot_web_search-0.2.7.dist-info → chatgpt_mirai_qq_bot_web_search-0.2.9.dist-info}/entry_points.txt +0 -0
- {chatgpt_mirai_qq_bot_web_search-0.2.7.dist-info → chatgpt_mirai_qq_bot_web_search-0.2.9.dist-info}/top_level.txt +0 -0
@@ -1,11 +1,11 @@
|
|
1
1
|
web_search/__init__.py,sha256=bpu1gpf9tq3mOAoaGMM-8S6GBw2GzRAG6ClHGlp-CVw,4607
|
2
2
|
web_search/blocks.py,sha256=QuXt3KMuY4hUW-ucleNYXFvW6YbUAB4Xu4m_SxdCd-U,9152
|
3
3
|
web_search/config.py,sha256=DhLiERBJR2V5Boglf7Aq9Rbc4vsvLIh67CrLDIPeqA0,398
|
4
|
-
web_search/web_searcher.py,sha256=
|
4
|
+
web_search/web_searcher.py,sha256=wjhe6vqhnPvxhK4ssvmCAjvefQP6iE_7CpyhhHdY4a8,21813
|
5
5
|
web_search/example/roleplayWithWebSearch.yaml,sha256=C-dGy3z8gcRcmxzurssP-kPRLqMf1TYR-nnNUaJjISE,7468
|
6
|
-
chatgpt_mirai_qq_bot_web_search-0.2.
|
7
|
-
chatgpt_mirai_qq_bot_web_search-0.2.
|
8
|
-
chatgpt_mirai_qq_bot_web_search-0.2.
|
9
|
-
chatgpt_mirai_qq_bot_web_search-0.2.
|
10
|
-
chatgpt_mirai_qq_bot_web_search-0.2.
|
11
|
-
chatgpt_mirai_qq_bot_web_search-0.2.
|
6
|
+
chatgpt_mirai_qq_bot_web_search-0.2.9.dist-info/LICENSE,sha256=ILBn-G3jdarm2w8oOrLmXeJNU3czuJvVhDLBASWdhM8,34522
|
7
|
+
chatgpt_mirai_qq_bot_web_search-0.2.9.dist-info/METADATA,sha256=KeMLPnusHjs43C7z7zS-ImyZH5KVVWWuIIMhBzKNuVs,1734
|
8
|
+
chatgpt_mirai_qq_bot_web_search-0.2.9.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
|
9
|
+
chatgpt_mirai_qq_bot_web_search-0.2.9.dist-info/entry_points.txt,sha256=o3kRDSdSmSdnCKlK6qS57aN0WpI4ab-Nxub2NwUrjf0,64
|
10
|
+
chatgpt_mirai_qq_bot_web_search-0.2.9.dist-info/top_level.txt,sha256=PoNm8MJYw_y8RTMaNlY0ePLoNHxVUAE2IHDuL5fFubI,11
|
11
|
+
chatgpt_mirai_qq_bot_web_search-0.2.9.dist-info/RECORD,,
|
web_search/web_searcher.py
CHANGED
@@ -31,7 +31,7 @@ class WebSearcher:
|
|
31
31
|
self.video_ids = self._load_video_ids()
|
32
32
|
self.search_engines = {
|
33
33
|
'bing': {
|
34
|
-
'url': 'https://
|
34
|
+
'url': 'https://cn.bing.com/search?q={}',
|
35
35
|
'selectors': ['.b_algo', '#b_results .b_algo', 'main .b_algo'],
|
36
36
|
'title_selector': 'h2',
|
37
37
|
'link_selector': 'h2 a',
|
@@ -163,9 +163,9 @@ class WebSearcher:
|
|
163
163
|
# 创建新标签页获取内容
|
164
164
|
page = await context.new_page()
|
165
165
|
try:
|
166
|
-
#
|
166
|
+
# 修改资源加载策略,允许加载必要的CSS
|
167
167
|
await page.route("**/*", lambda route: route.abort()
|
168
|
-
if route.request.resource_type in ['image', '
|
168
|
+
if route.request.resource_type in ['image', 'font', 'media'] # 移除 'stylesheet'
|
169
169
|
else route.continue_())
|
170
170
|
|
171
171
|
# 使用 domcontentloaded 而不是 networkidle
|
@@ -263,22 +263,25 @@ class WebSearcher:
|
|
263
263
|
|
264
264
|
# 使用搜索引擎特定的选择器
|
265
265
|
results = None
|
266
|
+
# 等待页面稳定
|
267
|
+
await asyncio.sleep(2) # 添加短暂延迟
|
266
268
|
|
267
269
|
# 对于Google,让页面有更多时间加载
|
268
270
|
if engine == 'google':
|
269
271
|
await self.simulate_human_scroll(page)
|
270
272
|
|
271
|
-
|
273
|
+
selector_timeout = 5000
|
272
274
|
for selector in engine_config['selectors']:
|
273
275
|
try:
|
274
276
|
logger.info(f"Trying selector: {selector}")
|
275
|
-
await page.wait_for_selector(selector, timeout=
|
276
|
-
|
277
|
+
await page.wait_for_selector(selector, timeout=selector_timeout) # 增加等待时间
|
278
|
+
selector_timeout = 500
|
277
279
|
results = await page.query_selector_all(selector)
|
278
280
|
if results and len(results) > 0:
|
279
281
|
logger.info(f"Found {len(results)} results with selector {selector}")
|
280
282
|
break
|
281
283
|
except Exception as e:
|
284
|
+
selector_timeout = 500
|
282
285
|
logger.warning(f"Selector {selector} failed: {e}")
|
283
286
|
continue
|
284
287
|
|
@@ -288,21 +291,26 @@ class WebSearcher:
|
|
288
291
|
while not results and retry_count < max_results:
|
289
292
|
logger.info(f"Retrying search, attempt {retry_count + 1}/{max_results}")
|
290
293
|
# 刷新页面重试
|
291
|
-
await page.
|
294
|
+
await page.goto(
|
295
|
+
engine_config['url'].format(encoded_query),
|
296
|
+
wait_until='load',
|
297
|
+
timeout=timeout * 1000
|
298
|
+
)
|
292
299
|
await self.simulate_human_scroll(page)
|
293
300
|
|
294
301
|
# 重新尝试所有选择器
|
295
|
-
|
302
|
+
selector_timeout = 5000
|
296
303
|
for selector in engine_config['selectors']:
|
297
304
|
try:
|
298
305
|
logger.info(f"Retrying selector: {selector}")
|
299
|
-
await page.wait_for_selector(selector, timeout=
|
300
|
-
|
306
|
+
await page.wait_for_selector(selector, timeout=selector_timeout)
|
307
|
+
selector_timeout = 500
|
301
308
|
results = await page.query_selector_all(selector)
|
302
309
|
if results and len(results) > 0:
|
303
310
|
logger.info(f"Found {len(results)} results with selector {selector} on retry {retry_count + 1}")
|
304
311
|
break
|
305
312
|
except Exception as e:
|
313
|
+
selector_timeout = 500
|
306
314
|
logger.warning(f"Selector {selector} failed on retry {retry_count + 1}: {e}")
|
307
315
|
continue
|
308
316
|
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|