chatgpt-mirai-qq-bot-web-search 0.1.13__py3-none-any.whl → 0.1.15__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {chatgpt_mirai_qq_bot_web_search-0.1.13.dist-info → chatgpt_mirai_qq_bot_web_search-0.1.15.dist-info}/METADATA +1 -1
- chatgpt_mirai_qq_bot_web_search-0.1.15.dist-info/RECORD +11 -0
- web_search/blocks.py +15 -7
- web_search/web_searcher.py +325 -237
- chatgpt_mirai_qq_bot_web_search-0.1.13.dist-info/RECORD +0 -11
- {chatgpt_mirai_qq_bot_web_search-0.1.13.dist-info → chatgpt_mirai_qq_bot_web_search-0.1.15.dist-info}/LICENSE +0 -0
- {chatgpt_mirai_qq_bot_web_search-0.1.13.dist-info → chatgpt_mirai_qq_bot_web_search-0.1.15.dist-info}/WHEEL +0 -0
- {chatgpt_mirai_qq_bot_web_search-0.1.13.dist-info → chatgpt_mirai_qq_bot_web_search-0.1.15.dist-info}/entry_points.txt +0 -0
- {chatgpt_mirai_qq_bot_web_search-0.1.13.dist-info → chatgpt_mirai_qq_bot_web_search-0.1.15.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,11 @@
|
|
1
|
+
web_search/__init__.py,sha256=zVZLb5A-im5XETwohgxyE-UCxjSvYl6I2OC3LnEQhdQ,4360
|
2
|
+
web_search/blocks.py,sha256=I0nq2JzWi1whkJCO8aS76qus_V9Z-TSvL1kjd9AZKa4,4093
|
3
|
+
web_search/config.py,sha256=DhLiERBJR2V5Boglf7Aq9Rbc4vsvLIh67CrLDIPeqA0,398
|
4
|
+
web_search/web_searcher.py,sha256=0zLgMsWCK71gStyWpFjup5WfxHx3tBTf3rGwM7Ae7Zs,13332
|
5
|
+
web_search/example/roleplayWithWebSearch.yaml,sha256=C-dGy3z8gcRcmxzurssP-kPRLqMf1TYR-nnNUaJjISE,7468
|
6
|
+
chatgpt_mirai_qq_bot_web_search-0.1.15.dist-info/LICENSE,sha256=ILBn-G3jdarm2w8oOrLmXeJNU3czuJvVhDLBASWdhM8,34522
|
7
|
+
chatgpt_mirai_qq_bot_web_search-0.1.15.dist-info/METADATA,sha256=FE3NIJ3yYwk9bgVF55Q4vm8BnmAsqUkubuaOwY64XpU,1739
|
8
|
+
chatgpt_mirai_qq_bot_web_search-0.1.15.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
|
9
|
+
chatgpt_mirai_qq_bot_web_search-0.1.15.dist-info/entry_points.txt,sha256=o3kRDSdSmSdnCKlK6qS57aN0WpI4ab-Nxub2NwUrjf0,64
|
10
|
+
chatgpt_mirai_qq_bot_web_search-0.1.15.dist-info/top_level.txt,sha256=PoNm8MJYw_y8RTMaNlY0ePLoNHxVUAE2IHDuL5fFubI,11
|
11
|
+
chatgpt_mirai_qq_bot_web_search-0.1.15.dist-info/RECORD,,
|
web_search/blocks.py
CHANGED
@@ -1,12 +1,14 @@
|
|
1
|
-
from typing import Any, Dict, List, Optional
|
1
|
+
from typing import Any, Dict, List, Optional,Annotated
|
2
2
|
import asyncio
|
3
|
-
from
|
4
|
-
from framework.workflow.core.block.input_output import Input, Output
|
3
|
+
from kirara_ai.workflow.core.block import Block, Input, Output, ParamMeta
|
5
4
|
from .web_searcher import WebSearcher
|
6
5
|
from .config import WebSearchConfig
|
7
|
-
from
|
8
|
-
from
|
6
|
+
from kirara_ai.llm.format.message import LLMChatMessage
|
7
|
+
from kirara_ai.llm.format.response import LLMChatResponse
|
8
|
+
from kirara_ai.ioc.container import DependencyContainer
|
9
9
|
|
10
|
+
def get_options_provider(container: DependencyContainer, block: Block) -> List[str]:
|
11
|
+
return ["bing", "google", "baidu"]
|
10
12
|
class WebSearchBlock(Block):
|
11
13
|
"""Web搜索Block"""
|
12
14
|
name = "web_search"
|
@@ -19,13 +21,16 @@ class WebSearchBlock(Block):
|
|
19
21
|
"results": Output(name="results",label="搜索结果",data_type= str, description="搜索结果")
|
20
22
|
}
|
21
23
|
|
22
|
-
def __init__(self, name: str = None, max_results: Optional[int] = None, timeout: Optional[int] = None, fetch_content: Optional[bool] = None
|
24
|
+
def __init__(self, name: str = None, max_results: Optional[int] = None, timeout: Optional[int] = None, fetch_content: Optional[bool] = None
|
25
|
+
,engine: Annotated[Optional[str],ParamMeta(label="搜索引擎", description="要使用的搜索引擎", options_provider=get_options_provider),] = "bing", proxy: str = None,):
|
23
26
|
super().__init__(name)
|
24
27
|
self.searcher = None
|
25
28
|
self.config = WebSearchConfig()
|
26
29
|
self.max_results = max_results
|
27
30
|
self.timeout = timeout
|
28
31
|
self.fetch_content = fetch_content
|
32
|
+
self.engine=engine
|
33
|
+
self.proxy = proxy
|
29
34
|
|
30
35
|
def _ensure_searcher(self):
|
31
36
|
"""同步方式初始化searcher"""
|
@@ -62,11 +67,14 @@ class WebSearchBlock(Block):
|
|
62
67
|
query=query,
|
63
68
|
max_results=max_results,
|
64
69
|
timeout=timeout,
|
65
|
-
fetch_content=fetch_content
|
70
|
+
fetch_content=fetch_content,
|
71
|
+
engine=self.engine,
|
72
|
+
proxy = self.proxy,
|
66
73
|
)
|
67
74
|
)
|
68
75
|
return {"results": "\n以下是联网搜索的结果:\n-- 搜索结果开始 --"+results+"\n-- 搜索结果结束 --"}
|
69
76
|
except Exception as e:
|
77
|
+
print(e)
|
70
78
|
return {"results": f"搜索失败: {str(e)}"}
|
71
79
|
|
72
80
|
class AppendSystemPromptBlock(Block):
|
web_search/web_searcher.py
CHANGED
@@ -1,237 +1,325 @@
|
|
1
|
-
from playwright.async_api import async_playwright
|
2
|
-
import trafilatura
|
3
|
-
import random
|
4
|
-
import time
|
5
|
-
import urllib.parse
|
6
|
-
import asyncio
|
7
|
-
import subprocess
|
8
|
-
import sys
|
9
|
-
from
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
self.
|
17
|
-
self.
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
async def
|
145
|
-
"""
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
)
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
return
|
221
|
-
|
222
|
-
except Exception as e:
|
223
|
-
logger.error(f"
|
224
|
-
return
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
1
|
+
from playwright.async_api import async_playwright
|
2
|
+
import trafilatura
|
3
|
+
import random
|
4
|
+
import time
|
5
|
+
import urllib.parse
|
6
|
+
import asyncio
|
7
|
+
import subprocess
|
8
|
+
import sys
|
9
|
+
from kirara_ai.logger import get_logger
|
10
|
+
import os
|
11
|
+
|
12
|
+
logger = get_logger("WebSearchPlugin")
|
13
|
+
|
14
|
+
class WebSearcher:
|
15
|
+
def __init__(self):
|
16
|
+
self.playwright = None
|
17
|
+
self.browser = None
|
18
|
+
self.context = None
|
19
|
+
self.search_engines = {
|
20
|
+
'bing': {
|
21
|
+
'url': 'https://www.bing.com/search?q={}',
|
22
|
+
'selectors': ['.b_algo', '#b_results .b_algo', 'main .b_algo'],
|
23
|
+
'title_selector': 'h2',
|
24
|
+
'link_selector': 'h2 a',
|
25
|
+
'snippet_selector': '.b_caption p'
|
26
|
+
},
|
27
|
+
'google': {
|
28
|
+
'url': 'https://www.google.com/search?q={}',
|
29
|
+
'selectors': ['.MjjYud', 'div.g', 'div[data-hveid]'],
|
30
|
+
'title_selector': 'h3.LC20lb',
|
31
|
+
'link_selector': 'a[jsname="UWckNb"], div.yuRUbf a',
|
32
|
+
'snippet_selector': 'div.VwiC3b'
|
33
|
+
},
|
34
|
+
'baidu': {
|
35
|
+
'url': 'https://www.baidu.com/s?wd={}',
|
36
|
+
'selectors': ['.result', '.result-op'],
|
37
|
+
'title_selector': 'h3',
|
38
|
+
'link_selector': 'h3 a',
|
39
|
+
'snippet_selector': '.content-right_8Zs40'
|
40
|
+
}
|
41
|
+
}
|
42
|
+
|
43
|
+
@classmethod
|
44
|
+
async def create(cls):
|
45
|
+
"""创建 WebSearcher 实例的工厂方法"""
|
46
|
+
self = cls()
|
47
|
+
return self
|
48
|
+
|
49
|
+
async def _ensure_initialized(self,proxy):
|
50
|
+
"""确保浏览器已初始化"""
|
51
|
+
try:
|
52
|
+
self.playwright = await async_playwright().start()
|
53
|
+
|
54
|
+
# 创建用户数据目录路径
|
55
|
+
user_data_dir = os.path.join(os.path.expanduser("~"), ".playwright_user_data")
|
56
|
+
os.makedirs(user_data_dir, exist_ok=True)
|
57
|
+
|
58
|
+
# 合并所有选项到一个字典
|
59
|
+
context_options = {
|
60
|
+
'headless': True,
|
61
|
+
'chromium_sandbox': False,
|
62
|
+
'slow_mo': 50, # 减慢操作速度,更像人类
|
63
|
+
'args': [
|
64
|
+
'--no-sandbox',
|
65
|
+
'--disable-setuid-sandbox',
|
66
|
+
'--disable-dev-shm-usage',
|
67
|
+
'--disable-blink-features=AutomationControlled', # 隐藏自动化控制痕迹
|
68
|
+
'--disable-features=IsolateOrigins,site-per-process',
|
69
|
+
],
|
70
|
+
'ignore_default_args': ['--enable-automation'], # 屏蔽自动化标志
|
71
|
+
'viewport': {'width': 1920, 'height': 1080},
|
72
|
+
'user_agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36',
|
73
|
+
'locale': 'zh-CN',
|
74
|
+
'timezone_id': 'Asia/Shanghai',
|
75
|
+
'color_scheme': 'dark', # 或 'light',根据用户习惯
|
76
|
+
'device_scale_factor': 1.75, # 高DPI设备
|
77
|
+
'has_touch': True, # 支持触摸
|
78
|
+
'is_mobile': False,
|
79
|
+
'reduced_motion': 'no-preference'
|
80
|
+
}
|
81
|
+
|
82
|
+
# 如果是 Google 搜索,添加代理设置
|
83
|
+
if proxy:
|
84
|
+
context_options['proxy'] = {
|
85
|
+
'server': proxy
|
86
|
+
}
|
87
|
+
|
88
|
+
try:
|
89
|
+
# 使用 launch_persistent_context 代替分开的 launch 和 new_context
|
90
|
+
self.context = await self.playwright.chromium.launch_persistent_context(
|
91
|
+
user_data_dir=user_data_dir,
|
92
|
+
**context_options
|
93
|
+
)
|
94
|
+
|
95
|
+
self.browser = None # 不再需要单独的browser引用
|
96
|
+
|
97
|
+
except Exception as e:
|
98
|
+
if "Executable doesn't exist" in str(e):
|
99
|
+
logger.info("Installing playwright browsers...")
|
100
|
+
process = subprocess.Popen(
|
101
|
+
[sys.executable, "-m", "playwright", "install", "chromium"],
|
102
|
+
stdout=subprocess.PIPE,
|
103
|
+
stderr=subprocess.PIPE
|
104
|
+
)
|
105
|
+
stdout, stderr = process.communicate()
|
106
|
+
if process.returncode != 0:
|
107
|
+
raise RuntimeError(f"Failed to install playwright browsers: {stderr.decode()}")
|
108
|
+
|
109
|
+
# 重试使用 launch_persistent_context
|
110
|
+
self.context = await self.playwright.chromium.launch_persistent_context(
|
111
|
+
user_data_dir=user_data_dir,
|
112
|
+
**context_options
|
113
|
+
)
|
114
|
+
else:
|
115
|
+
raise
|
116
|
+
|
117
|
+
# 注入脚本来伪装webdriver标记
|
118
|
+
await self.context.add_init_script("""
|
119
|
+
Object.defineProperty(navigator, 'webdriver', {
|
120
|
+
get: () => false,
|
121
|
+
});
|
122
|
+
|
123
|
+
// 防止 iframe 检测
|
124
|
+
window.parent.document;
|
125
|
+
|
126
|
+
// 防止检测到 Chrome Devtools 协议
|
127
|
+
delete window.cdc_adoQpoasnfa76pfcZLmcfl_Array;
|
128
|
+
delete window.cdc_adoQpoasnfa76pfcZLmcfl_Promise;
|
129
|
+
delete window.cdc_adoQpoasnfa76pfcZLmcfl_Symbol;
|
130
|
+
""")
|
131
|
+
|
132
|
+
return self.context
|
133
|
+
|
134
|
+
except Exception as e:
|
135
|
+
logger.error(f"Failed to initialize WebSearcher: {e}")
|
136
|
+
await self.close()
|
137
|
+
raise
|
138
|
+
|
139
|
+
async def simulate_human_scroll(self, page):
|
140
|
+
"""模拟人类滚动"""
|
141
|
+
for _ in range(3):
|
142
|
+
await page.mouse.wheel(0, random.randint(300, 700))
|
143
|
+
|
144
|
+
async def get_webpage_content(self, url: str, timeout: int,context) -> str:
|
145
|
+
"""获取网页内容"""
|
146
|
+
start_time = time.time()
|
147
|
+
try:
|
148
|
+
# 创建新标签页获取内容
|
149
|
+
page = await context.new_page()
|
150
|
+
try:
|
151
|
+
# 设置更严格的资源加载策略
|
152
|
+
await page.route("**/*", lambda route: route.abort()
|
153
|
+
if route.request.resource_type in ['image', 'stylesheet', 'font', 'media']
|
154
|
+
else route.continue_())
|
155
|
+
|
156
|
+
# 使用 domcontentloaded 而不是 networkidle
|
157
|
+
await page.goto(url, wait_until='domcontentloaded', timeout=timeout * 1000)
|
158
|
+
|
159
|
+
# 等待页面主要内容加载,但设置较短的超时时间
|
160
|
+
try:
|
161
|
+
await page.wait_for_load_state('domcontentloaded', timeout=5000)
|
162
|
+
except Exception as e:
|
163
|
+
logger.warning(f"Load state timeout for {url}, continuing anyway: {e}")
|
164
|
+
|
165
|
+
await self.simulate_human_scroll(page)
|
166
|
+
|
167
|
+
content = await page.content()
|
168
|
+
text = trafilatura.extract(content)
|
169
|
+
|
170
|
+
await page.close()
|
171
|
+
logger.info(f"Content fetched - URL: {url} - Time: {time.time() - start_time:.2f}s")
|
172
|
+
return text or ""
|
173
|
+
except Exception as e:
|
174
|
+
await page.close()
|
175
|
+
logger.error(f"Failed to fetch content - URL: {url} - Error: {e}")
|
176
|
+
return ""
|
177
|
+
except Exception as e:
|
178
|
+
logger.error(f"Failed to create page - URL: {url} - Error: {e}")
|
179
|
+
return ""
|
180
|
+
|
181
|
+
async def process_search_result(self, result, idx: int, timeout: int, fetch_content: bool, context, engine='bing'):
|
182
|
+
"""处理单个搜索结果"""
|
183
|
+
try:
|
184
|
+
engine_config = self.search_engines[engine]
|
185
|
+
title_element = await result.query_selector(engine_config['title_selector'])
|
186
|
+
link_element = await result.query_selector(engine_config['link_selector'])
|
187
|
+
snippet_element = await result.query_selector(engine_config['snippet_selector'])
|
188
|
+
|
189
|
+
if not title_element or not link_element:
|
190
|
+
return None
|
191
|
+
|
192
|
+
title = await title_element.inner_text()
|
193
|
+
link = await link_element.get_attribute('href')
|
194
|
+
|
195
|
+
# 对于百度搜索需要特殊处理链接
|
196
|
+
if engine == 'baidu':
|
197
|
+
try:
|
198
|
+
# 创建新页面来获取真实URL
|
199
|
+
new_page = await context.new_page()
|
200
|
+
await new_page.goto(link, wait_until='domcontentloaded', timeout=5000)
|
201
|
+
real_url = new_page.url
|
202
|
+
await new_page.close()
|
203
|
+
link = real_url
|
204
|
+
except Exception as e:
|
205
|
+
logger.warning(f"Failed to get real URL from Baidu: {e}")
|
206
|
+
|
207
|
+
snippet = await snippet_element.inner_text() if snippet_element else "无简介"
|
208
|
+
|
209
|
+
if not link:
|
210
|
+
return None
|
211
|
+
|
212
|
+
result_text = f"[{idx+1}] {title}\nURL: {link}\n搜索简介: {snippet}"
|
213
|
+
|
214
|
+
if fetch_content:
|
215
|
+
|
216
|
+
content = await self.get_webpage_content(link, timeout,context)
|
217
|
+
if content:
|
218
|
+
result_text += f"\n内容详情:\n{content}"
|
219
|
+
|
220
|
+
return result_text
|
221
|
+
|
222
|
+
except Exception as e:
|
223
|
+
logger.error(f"Failed to process result {idx}: {e}")
|
224
|
+
return None
|
225
|
+
|
226
|
+
async def search(self, query: str, max_results: int = 3, timeout: int = 10, fetch_content: bool = True, engine: str = 'bing', proxy: str = None) -> str:
|
227
|
+
"""执行搜索"""
|
228
|
+
if engine not in self.search_engines:
|
229
|
+
return f"不支持的搜索引擎: {engine}"
|
230
|
+
|
231
|
+
# 设置当前搜索引擎
|
232
|
+
self.current_engine = engine
|
233
|
+
context = await self._ensure_initialized(proxy)
|
234
|
+
engine_config = self.search_engines[engine]
|
235
|
+
search_start_time = time.time()
|
236
|
+
page = None
|
237
|
+
|
238
|
+
try:
|
239
|
+
encoded_query = urllib.parse.quote(query)
|
240
|
+
page = await context.new_page()
|
241
|
+
|
242
|
+
# Google搜索特定处理
|
243
|
+
await page.goto(
|
244
|
+
engine_config['url'].format(encoded_query),
|
245
|
+
wait_until='load',
|
246
|
+
timeout=timeout * 1000
|
247
|
+
)
|
248
|
+
|
249
|
+
# 使用搜索引擎特定的选择器
|
250
|
+
results = None
|
251
|
+
|
252
|
+
# 对于Google,让页面有更多时间加载
|
253
|
+
if engine == 'google':
|
254
|
+
await self.simulate_human_scroll(page)
|
255
|
+
|
256
|
+
for selector in engine_config['selectors']:
|
257
|
+
try:
|
258
|
+
logger.info(f"Trying selector: {selector}")
|
259
|
+
await page.wait_for_selector(selector, timeout=8000) # 增加等待时间
|
260
|
+
results = await page.query_selector_all(selector)
|
261
|
+
if results and len(results) > 0:
|
262
|
+
logger.info(f"Found {len(results)} results with selector {selector}")
|
263
|
+
break
|
264
|
+
except Exception as e:
|
265
|
+
logger.warning(f"Selector {selector} failed: {e}")
|
266
|
+
continue
|
267
|
+
|
268
|
+
if not results:
|
269
|
+
# 尝试直接使用 JavaScript 获取元素
|
270
|
+
if engine == 'google':
|
271
|
+
try:
|
272
|
+
# 使用更通用的JavaScript选择器尝试获取结果
|
273
|
+
results = await page.evaluate("""
|
274
|
+
() => {
|
275
|
+
const elements = document.querySelectorAll('div[data-sokoban-container], div.g, .MjjYud');
|
276
|
+
return Array.from(elements).length;
|
277
|
+
}
|
278
|
+
""")
|
279
|
+
logger.info(f"JavaScript found {results} elements")
|
280
|
+
|
281
|
+
# 如果找到了元素,使用evaluate来处理它们
|
282
|
+
if results > 0:
|
283
|
+
# 自定义处理逻辑...
|
284
|
+
pass
|
285
|
+
except Exception as e:
|
286
|
+
logger.error(f"JavaScript evaluation failed: {e}")
|
287
|
+
|
288
|
+
logger.error("No search results found with any selector")
|
289
|
+
await page.screenshot(path=f'search_failed_{engine}.png')
|
290
|
+
return "搜索结果加载失败"
|
291
|
+
|
292
|
+
logger.info(f"Found {len(results)} search results")
|
293
|
+
|
294
|
+
tasks = []
|
295
|
+
for idx, result in enumerate(results[:max_results]):
|
296
|
+
tasks.append(self.process_search_result(result, idx, timeout, fetch_content, context, engine))
|
297
|
+
|
298
|
+
detailed_results = []
|
299
|
+
completed_results = await asyncio.gather(*tasks)
|
300
|
+
|
301
|
+
for result in completed_results:
|
302
|
+
if result:
|
303
|
+
detailed_results.append(result)
|
304
|
+
|
305
|
+
total_time = time.time() - search_start_time
|
306
|
+
results = "\n---\n".join(detailed_results) if detailed_results else "未找到相关结果"
|
307
|
+
logger.info(f"Search completed - Query: {query} - Time: {total_time:.2f}s - Found {len(detailed_results)} valid results")
|
308
|
+
return results
|
309
|
+
|
310
|
+
except Exception as e:
|
311
|
+
logger.error(f"Search failed - Query: {query} - Error: {e}", exc_info=True)
|
312
|
+
return f"搜索失败: {str(e)}"
|
313
|
+
finally:
|
314
|
+
if page:
|
315
|
+
try:
|
316
|
+
await page.close()
|
317
|
+
except Exception as e:
|
318
|
+
logger.error(f"Error closing page: {e}")
|
319
|
+
|
320
|
+
async def close(self):
|
321
|
+
"""关闭浏览器"""
|
322
|
+
if self.context:
|
323
|
+
await self.context.close()
|
324
|
+
if self.playwright:
|
325
|
+
await self.playwright.stop()
|
@@ -1,11 +0,0 @@
|
|
1
|
-
web_search/__init__.py,sha256=zVZLb5A-im5XETwohgxyE-UCxjSvYl6I2OC3LnEQhdQ,4360
|
2
|
-
web_search/blocks.py,sha256=bVLn5kg-OMqWQsDrJLvA43AoV_eMEcZ3nrjqponHHX4,3611
|
3
|
-
web_search/config.py,sha256=DhLiERBJR2V5Boglf7Aq9Rbc4vsvLIh67CrLDIPeqA0,398
|
4
|
-
web_search/web_searcher.py,sha256=dmN1R4iyFvaPNpyBjFLWujvQ6_I3oD1GRlRyC03egpo,9707
|
5
|
-
web_search/example/roleplayWithWebSearch.yaml,sha256=C-dGy3z8gcRcmxzurssP-kPRLqMf1TYR-nnNUaJjISE,7468
|
6
|
-
chatgpt_mirai_qq_bot_web_search-0.1.13.dist-info/LICENSE,sha256=ILBn-G3jdarm2w8oOrLmXeJNU3czuJvVhDLBASWdhM8,34522
|
7
|
-
chatgpt_mirai_qq_bot_web_search-0.1.13.dist-info/METADATA,sha256=ICD-hdrx6ERNCfaT6U5RRGvysF2GarMtNpzWniMGwuo,1739
|
8
|
-
chatgpt_mirai_qq_bot_web_search-0.1.13.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
|
9
|
-
chatgpt_mirai_qq_bot_web_search-0.1.13.dist-info/entry_points.txt,sha256=o3kRDSdSmSdnCKlK6qS57aN0WpI4ab-Nxub2NwUrjf0,64
|
10
|
-
chatgpt_mirai_qq_bot_web_search-0.1.13.dist-info/top_level.txt,sha256=PoNm8MJYw_y8RTMaNlY0ePLoNHxVUAE2IHDuL5fFubI,11
|
11
|
-
chatgpt_mirai_qq_bot_web_search-0.1.13.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|