chatgpt-mirai-qq-bot-web-search 0.2.6__py3-none-any.whl → 0.2.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: chatgpt-mirai-qq-bot-web-search
3
- Version: 0.2.6
3
+ Version: 0.2.8
4
4
  Summary: WebSearch adapter for lss233/chatgpt-mirai-qq-bot
5
5
  Home-page: https://github.com/chuanSir123/web_search
6
6
  Author: chuanSir
@@ -1,11 +1,11 @@
1
1
  web_search/__init__.py,sha256=bpu1gpf9tq3mOAoaGMM-8S6GBw2GzRAG6ClHGlp-CVw,4607
2
2
  web_search/blocks.py,sha256=QuXt3KMuY4hUW-ucleNYXFvW6YbUAB4Xu4m_SxdCd-U,9152
3
3
  web_search/config.py,sha256=DhLiERBJR2V5Boglf7Aq9Rbc4vsvLIh67CrLDIPeqA0,398
4
- web_search/web_searcher.py,sha256=760WTBGusxYntigGi5Wsbm2rRHEWq3sTwfNlf3ew3b0,20919
4
+ web_search/web_searcher.py,sha256=-wNuPzG0oZAr9-ihI5EdAexB9QXLUvc6bGyoVkWTlvw,21814
5
5
  web_search/example/roleplayWithWebSearch.yaml,sha256=C-dGy3z8gcRcmxzurssP-kPRLqMf1TYR-nnNUaJjISE,7468
6
- chatgpt_mirai_qq_bot_web_search-0.2.6.dist-info/LICENSE,sha256=ILBn-G3jdarm2w8oOrLmXeJNU3czuJvVhDLBASWdhM8,34522
7
- chatgpt_mirai_qq_bot_web_search-0.2.6.dist-info/METADATA,sha256=88y-xtYK-se7dwVVU2_fcXx_rGCUEgIxWMJvjI4Dfwg,1734
8
- chatgpt_mirai_qq_bot_web_search-0.2.6.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
9
- chatgpt_mirai_qq_bot_web_search-0.2.6.dist-info/entry_points.txt,sha256=o3kRDSdSmSdnCKlK6qS57aN0WpI4ab-Nxub2NwUrjf0,64
10
- chatgpt_mirai_qq_bot_web_search-0.2.6.dist-info/top_level.txt,sha256=PoNm8MJYw_y8RTMaNlY0ePLoNHxVUAE2IHDuL5fFubI,11
11
- chatgpt_mirai_qq_bot_web_search-0.2.6.dist-info/RECORD,,
6
+ chatgpt_mirai_qq_bot_web_search-0.2.8.dist-info/LICENSE,sha256=ILBn-G3jdarm2w8oOrLmXeJNU3czuJvVhDLBASWdhM8,34522
7
+ chatgpt_mirai_qq_bot_web_search-0.2.8.dist-info/METADATA,sha256=8L4yju3nxv1fAs4phYBFXJjNpsC1jIh1rhB5P9aiovI,1734
8
+ chatgpt_mirai_qq_bot_web_search-0.2.8.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
9
+ chatgpt_mirai_qq_bot_web_search-0.2.8.dist-info/entry_points.txt,sha256=o3kRDSdSmSdnCKlK6qS57aN0WpI4ab-Nxub2NwUrjf0,64
10
+ chatgpt_mirai_qq_bot_web_search-0.2.8.dist-info/top_level.txt,sha256=PoNm8MJYw_y8RTMaNlY0ePLoNHxVUAE2IHDuL5fFubI,11
11
+ chatgpt_mirai_qq_bot_web_search-0.2.8.dist-info/RECORD,,
@@ -31,7 +31,7 @@ class WebSearcher:
31
31
  self.video_ids = self._load_video_ids()
32
32
  self.search_engines = {
33
33
  'bing': {
34
- 'url': 'https://www.bing.com/search?q={}',
34
+ 'url': 'https://cn.bing.com/search?q={}',
35
35
  'selectors': ['.b_algo', '#b_results .b_algo', 'main .b_algo'],
36
36
  'title_selector': 'h2',
37
37
  'link_selector': 'h2 a',
@@ -72,7 +72,7 @@ class WebSearcher:
72
72
 
73
73
  # 合并所有选项到一个字典
74
74
  context_options = {
75
- 'headless': True,
75
+ 'headless': False,
76
76
  'chromium_sandbox': False,
77
77
  'slow_mo': 50, # 减慢操作速度,更像人类
78
78
  'args': [
@@ -163,9 +163,9 @@ class WebSearcher:
163
163
  # 创建新标签页获取内容
164
164
  page = await context.new_page()
165
165
  try:
166
- # 设置更严格的资源加载策略
166
+ # 修改资源加载策略,允许加载必要的CSS
167
167
  await page.route("**/*", lambda route: route.abort()
168
- if route.request.resource_type in ['image', 'stylesheet', 'font', 'media']
168
+ if route.request.resource_type in ['image', 'font', 'media'] # 移除 'stylesheet'
169
169
  else route.continue_())
170
170
 
171
171
  # 使用 domcontentloaded 而不是 networkidle
@@ -263,46 +263,64 @@ class WebSearcher:
263
263
 
264
264
  # 使用搜索引擎特定的选择器
265
265
  results = None
266
+ # 等待页面稳定
267
+ await asyncio.sleep(2) # 添加短暂延迟
266
268
 
267
269
  # 对于Google,让页面有更多时间加载
268
270
  if engine == 'google':
269
271
  await self.simulate_human_scroll(page)
270
272
 
273
+ selector_timeout = 5000
271
274
  for selector in engine_config['selectors']:
272
275
  try:
273
276
  logger.info(f"Trying selector: {selector}")
274
- await page.wait_for_selector(selector, timeout=8000) # 增加等待时间
277
+ await page.wait_for_selector(selector, timeout=selector_timeout) # 增加等待时间
278
+ selector_timeout = 500
275
279
  results = await page.query_selector_all(selector)
276
280
  if results and len(results) > 0:
277
281
  logger.info(f"Found {len(results)} results with selector {selector}")
278
282
  break
279
283
  except Exception as e:
284
+ selector_timeout = 500
280
285
  logger.warning(f"Selector {selector} failed: {e}")
281
286
  continue
282
287
 
283
288
  if not results:
284
- # 尝试直接使用 JavaScript 获取元素
285
- if engine == 'google':
286
- try:
287
- # 使用更通用的JavaScript选择器尝试获取结果
288
- results = await page.evaluate("""
289
- () => {
290
- const elements = document.querySelectorAll('div[data-sokoban-container], div.g, .MjjYud');
291
- return Array.from(elements).length;
292
- }
293
- """)
294
- logger.info(f"JavaScript found {results} elements")
295
-
296
- # 如果找到了元素,使用evaluate来处理它们
297
- if results > 0:
298
- # 自定义处理逻辑...
299
- pass
300
- except Exception as e:
301
- logger.error(f"JavaScript evaluation failed: {e}")
302
-
303
- logger.error("No search results found with any selector")
304
- await page.screenshot(path=f'search_failed_{engine}.png')
305
- return "搜索结果加载失败"
289
+ # 添加重试机制
290
+ retry_count = 0
291
+ while not results and retry_count < max_results:
292
+ logger.info(f"Retrying search, attempt {retry_count + 1}/{max_results}")
293
+ # 刷新页面重试
294
+ await page.goto(
295
+ engine_config['url'].format(encoded_query),
296
+ wait_until='load',
297
+ timeout=timeout * 1000
298
+ )
299
+ await self.simulate_human_scroll(page)
300
+
301
+ # 重新尝试所有选择器
302
+ selector_timeout = 5000
303
+ for selector in engine_config['selectors']:
304
+ try:
305
+ logger.info(f"Retrying selector: {selector}")
306
+ await page.wait_for_selector(selector, timeout=selector_timeout)
307
+ selector_timeout = 500
308
+ results = await page.query_selector_all(selector)
309
+ if results and len(results) > 0:
310
+ logger.info(f"Found {len(results)} results with selector {selector} on retry {retry_count + 1}")
311
+ break
312
+ except Exception as e:
313
+ selector_timeout = 500
314
+ logger.warning(f"Selector {selector} failed on retry {retry_count + 1}: {e}")
315
+ continue
316
+
317
+ retry_count += 1
318
+
319
+
320
+ # 如果所有重试都失败了,才返回错误
321
+ if not results:
322
+ logger.error("No search results found after all retries")
323
+ return "搜索结果加载失败"
306
324
 
307
325
  logger.info(f"Found {len(results)} search results")
308
326