Jarvis-Brain 0.1.5.5__tar.gz → 0.1.10.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: Jarvis_Brain
3
- Version: 0.1.5.5
3
+ Version: 0.1.10.0
4
4
  Summary: Jarvis brain mcp
5
5
  Requires-Python: >=3.10
6
6
  Requires-Dist: beautifulsoup4
@@ -4,8 +4,8 @@
4
4
 
5
5
  一个基于 FastMCP 和 DrissionPage 的浏览器自动化 MCP 服务器
6
6
 
7
- [![Python Version](https://img.shields.io/badge/python-3.10~3.12-blue.svg)](https://www.python.org/downloads/)
8
- [![Version](https://img.shields.io/badge/version-0.1.4.5-green.svg)](https://github.com/yourusername/jarvis-mcp)
7
+ [![Python Version](https://img.shields.io/badge/python-3.10+-blue.svg)](https://www.python.org/downloads/)
8
+ [![Version](https://img.shields.io/badge/version-0.1.9.12-green.svg)](https://github.com/yourusername/jarvis-mcp)
9
9
 
10
10
  ## 📖 简介
11
11
 
@@ -0,0 +1,5 @@
1
+ test_str="""
2
+ \\"X-XSS-Protection\\": \\"1; mode=block\\", \\"Server\\": \\"-\\"}, \\"response_body_segment\\": \\"pename\\\\\\": \\\\\\"SpuItemAttribute\\\\\\"}, {\\\\\\"code\\\\\\": \\\\\\"imageVersion\\\\\\", \\\\\\"name\\\\\\": \\\\\\"imageVersion\\\\\\", \\\\\\"subAttrList\\\\\\": [{\\\\\\"attrName\\\\\\": \\\\\\"1\\\\\\", \\\\\\"attrValue\\\\\\": \\\\\\"1\\\\\\", \\\\\\"__typename\\\\\\": \\\\\\"SubAttribute\\\\\\"}], \\\\\\"__typename\\\\\\": \\\\\\"SpuItemAttribute\\\\\\"}, {\\\\\\"code\\\\\\": \\\\\\"PLPhover\\\\\\", \\\\\\"name\\\\\\": \\\\\\"PLPhover\\\\\\", \\\\\\"subAttrList\\\\\\": [{\\\\\\"attrName\\\\\\": \\\\\\"2\\\\\\", \\\\\\"attrValue\\\\\\": \\\\\\"2\\\\\\", \\\\\\"__typename\\\\\\": \\\\\\"SubAttribute\\\\\\"}], \\\\\\"__typename\\\\\\": \\\\\\"SpuItemAttribute\\\\\\"}, {\\\\\\"code\\\\\\": \\\\\\"官网\\\\\\", \\\\\\"name\\\\\\": \\\\\\"官网\\\\\\", \\\\\\"subAttrList\\\\\\": [{\\\\\\"attrName\\\\\\": \\\\\\"是\\\\\\", \\\\\\"attrValue\\\\\\": \\\\\\"true\\\\\\", \\\\\\"__typename\\\\\\": \\\\\\"SubAttribute\\\\\\"}], \\\\\\"__typename\\\\\\": \\\\\\"SpuItemAttribute\\\\\\"}, {\\\\\\"code\\\\\\": \\\\\\"小程序\\\\\\", \\\\\\"name\\\\\\": \\\\\\"小程序\\\\\\", \\\\\\"subAttrList\\\\\\": [{\\\\\\"attrName\\\\\\": \\\\\\"是\\\\\\", \\\\\\"attrValue\\\\\\": \\\\\\"true\\\\\\", \\\\\\"__typename\\\\\\": \\\\\\"SubAttribute\\\\\\"}], \\\\\\"__typename\\\\\\": \\\\\\"SpuItemAttribute\\\\\\"}], \\\\\\"__typename\\\\\\": \\\\\\"SpuAttribute\\\\\\"}, \\\\\\"labelList\\\\\\": [{\\\\\\"name\\\\\\": \\\\\\"new\\\\\\", \\\\\\"value\\\\\\": \\\\\\"新品\\\\\\", \\\\\\"excludeValue\\\\\\": null, \\\\\\"__typename\\\\\\": \\\\\\"BaseLabel\\\\\\"}], \\\\\\"__typename\\\\\\": \\\\\\"Product2\\\\\\"}, {\\\\\\"baseInfo\\\\\\": {\\\\\\"spuCode\\\\\\": \\\\\\"864428CVE0G1000\\\\\\", \\\\\\"title\\\\\\": \\\\\\"互扣式双G带扣窄版腰带\\\\\\", \\\\\\"salePrice\\\\\\": 4300.0, \\\\\\"style\\\\\\": \\\\\\"864428\\\\\\", \\\\\\"categorys\\\\\\": [{\\\\\\"navFrontName\\\\\\": \\\\\\"女士风尚-手工plp\\\\\\", \\\\\\"frontName\\\\\\": null, \\\\\\"code\\\\\\": \\\\\\"67d7d4c5eb9d2100016b4fe5\\\\\\", \\\\\\"parentCode\\\\\\": null, \\\\\\"businessCode\\\\\\": \\\\\\"women-fashion手工plp\\\\\\", \\\\\\"__typename\\\\\\": \\\\\\"Category\\\\\\"}, {\\\\\\"navFrontName\\\\\\": \\\\\\"女士-women>女士配饰-women-accessories>女士腰带-women-accessories-belts\\\\\\", \\\\\\"frontName\\\\\\": null, \\\\\\"code\\\\\\": \\\\\\"642670bf0ae2090001133cbe\\\\\\", \\\\\\"parentCode\\\\\\": null, \\\\\\"businessCode\\\\\\": \\\\\\"women-accessories-belts\\\\\\", \\\\\\"__typename\\\\\\": \\\\\\"Category\\\\\\"}, {\\\\\\"navFrontName\\\\\\": \\\\\\"女士-women>女士配饰-women-accessories\\\\\\", \\\\\\"frontName\\\\\\": null, \\\\\\"code\\\\\\": \\\\\\"642670bf0ae2090001133cbd\\\\\\", \\\\\\"parentCode\\\\\\": null, \\\\\\"businessCode\\\\\\": \\\\\\"women-accessories\\\\\\",
3
+ """
4
+ decoded = test_str.encode().decode('unicode_escape')
5
+ print(decoded)
@@ -0,0 +1,11 @@
1
+ """
2
+ 这个文件中提供的工具作为谷歌官方mcp:chrome-devtools-mcp的辅助工具,仅提供功能补充与浏览器实例管理
3
+ """
4
+ import hashlib
5
+ import json
6
+ import os
7
+ from typing import Any
8
+
9
+ from fastmcp import FastMCP
10
+
11
+ from tools.browser_manager import BrowserManager
@@ -0,0 +1,306 @@
1
+ """
2
+ 这个文件中提供的工具作为独立的Drissionpage mcp工具
3
+ """
4
+ import hashlib
5
+ import json
6
+ import os
7
+ from typing import Any
8
+
9
+ from fastmcp import FastMCP
10
+
11
+ from tools.browser_manager import BrowserManager
12
+ from tools.tools import compress_html, requests_html, dp_headless_html, assert_waf_cookie, dp_mcp_message_pack, \
13
+ compress_html_js
14
+ from tools.browser_proxy import DPProxyClient, DPProxyClientManager
15
+
16
+ html_source_code_local_save_path = os.path.join(os.getcwd(), "html-source-code")
17
+ waf_status_code_dict = {
18
+ 412: "瑞数",
19
+ 521: "加速乐"
20
+ }
21
+ # 一轮最大输入,以免单个html最大长度超过ai最大输入
22
+ one_turn_max_token = 8000
23
+
24
+
25
+ def register_visit_url(mcp: FastMCP, browser_manager: BrowserManager, client_manager: DPProxyClientManager):
26
+ @mcp.tool(name="visit_url",
27
+ description="使用Drissionpage打开url访问某个网站,并开始监听初始tab页的所有的XHR请求"
28
+ "当需要使用手机版浏览器Ua时use_mobile_user_agent为True"
29
+ "如果想要以域名对packet进行过滤,可以传入想要过滤的域名列表。默认是:None。"
30
+ "如果想要以method对packet进行过滤,可以传入想要过滤的method列表,默认是:['GET', 'POST']")
31
+ async def visit_url(url: str, domain_filter: list = None, method_filter: list = ["GET", "POST"],
32
+ use_mobile_user_agent: bool = False) -> dict[str, Any]:
33
+ mobile_user_agent = None
34
+ if use_mobile_user_agent:
35
+ mobile_user_agent = "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/143.0.0.0 Mobile Safari/537.36"
36
+ port, _browser = browser_manager.create_browser(mobile_user_agent)
37
+ tab = _browser.get_tab()
38
+ packet_filter = {
39
+ "domain_filter": domain_filter,
40
+ "method_filter": method_filter,
41
+ }
42
+ client_manager.create_client(tab, packet_filter)
43
+ tab.get(url)
44
+ tab_id = tab.tab_id
45
+ return dp_mcp_message_pack(
46
+ f"已在[{port}]端口创建浏览器对象,并已打开链接:{url},打开的模式是:{'手机版' if use_mobile_user_agent else '电脑版'}",
47
+ tab_id=tab_id,
48
+ browser_port=port
49
+ )
50
+
51
+
52
+ def register_get_new_tab(mcp: FastMCP, browser_manager, client_manager: DPProxyClientManager):
53
+ @mcp.tool(name="get_new_tab",
54
+ description="使用Drissionpage创建一个新的tab页,在新的tab页中打开url,并开始监听新的tab页的所有XHR请求"
55
+ "如果想要以域名对packet进行过滤,可以传入想要过滤的域名列表。默认是:None。"
56
+ "如果想要以method对packet进行过滤,可以传入想要过滤的method列表,默认是:['GET', 'POST']")
57
+ async def get_new_tab(browser_port: int, url: str, domain_filter: list = None,
58
+ method_filter: list = ["GET", "POST"]) -> dict[str, Any]:
59
+ _browser = browser_manager.get_browser(browser_port)
60
+ tab = _browser.new_tab()
61
+ packet_filter = {
62
+ "domain_filter": domain_filter,
63
+ "method_filter": method_filter,
64
+ }
65
+ client_manager.create_client(tab, packet_filter)
66
+ tab.get(url)
67
+ _browser.activate_tab(tab)
68
+ tab_id = tab.tab_id
69
+ return dp_mcp_message_pack(f"已创建新的tab页,并打开链接:{url}", tab_id=tab_id)
70
+
71
+
72
+ def register_pop_first_packet(mcp: FastMCP, browser_manager, client_manager: DPProxyClientManager):
73
+ @mcp.tool(name="pop_first_packet",
74
+ description="每调用一次就会弹出传入的tab页所监听到的数据包中的第一个packet_message,当一个packet_message的response body过长时会被切分成多个包,具体一个请求是否还有下一个包,可以参考body_completed字段")
75
+ async def pop_first_packet(browser_port: int, tab_id: str) -> dict[str, Any]:
76
+ _browser = browser_manager.get_browser(browser_port)
77
+ client = client_manager.get_client(tab_id)
78
+ current_queue_size, packet_message = client.pop_first_packet()
79
+ message = f"tab页:【{tab_id}】,暂时没有监听到XHR数据包"
80
+ if packet_message:
81
+ message = f"tab页:【{tab_id}】,监听到XHR数据包,当前数据包队列中还剩 {current_queue_size} 条数据,如果还剩数据为0,可以暂时稍后再次调用该方法"
82
+ if (packet_message is None) and current_queue_size:
83
+ message = f"tab页:【{tab_id}】,当前弹出的第一个数据包不符合过滤条件,当前数据包队列中还剩 {current_queue_size} 条数据,请不要改变条件,继续弹出下一个数据包"
84
+ return dp_mcp_message_pack(
85
+ message,
86
+ browser_port=browser_port,
87
+ tab_id=tab_id,
88
+ packet_message=packet_message,
89
+ current_queue_size=current_queue_size,
90
+ )
91
+
92
+
93
+ def register_get_html(mcp: FastMCP, browser_manager):
94
+ @mcp.tool(name="get_html", description="使用Drissionpage获取某一个tab页的html")
95
+ async def get_html(browser_port: int, tab_id: str) -> dict[str, Any]:
96
+ _browser = browser_manager.get_browser(browser_port)
97
+ tab = _browser.get_tab(tab_id)
98
+ file_name_prefix = hashlib.md5(str(tab.title).encode('utf-8')).hexdigest()
99
+ if not os.path.exists(html_source_code_local_save_path):
100
+ os.makedirs(html_source_code_local_save_path)
101
+ # min_html, compress_rate = compress_html(tab.html)
102
+ min_html = tab.run_js(compress_html_js)
103
+ # html_str_list = [min_html[i:i + one_turn_max_token] for i in range(0, len(min_html), one_turn_max_token)]
104
+ html_file_list = []
105
+ for index, html_str in enumerate([min_html]):
106
+ file_name = file_name_prefix + f"_{tab_id}_segment{index}.html"
107
+ abs_path = os.path.join(html_source_code_local_save_path, file_name)
108
+ with open(abs_path, "w", encoding="utf-8") as f:
109
+ f.write(html_str)
110
+ html_file_list.append(abs_path)
111
+ message = f"已保存tab页:【{tab_id}】的html源码片段共{len(html_file_list)}个"
112
+ return dp_mcp_message_pack(message, tab_id=tab_id, htmls_local_path=html_file_list)
113
+
114
+
115
+ def register_switch_tab(mcp: FastMCP, browser_manager):
116
+ @mcp.tool(name="switch_tab", description="根据传入的tab_id切换到对应的tab页", )
117
+ async def switch_tab(browser_port: int, tab_id: str) -> dict[str, Any]:
118
+ _browser = browser_manager.get_browser(browser_port)
119
+ _browser.activate_tab(tab_id)
120
+ return dp_mcp_message_pack(f"已将tab页:【{tab_id}】切换至最前端")
121
+
122
+
123
+ def register_close_tab(mcp: FastMCP, browser_manager):
124
+ @mcp.tool(name="close_tab", description="根据传入的tab_id关闭tab页", )
125
+ async def close_tab(browser_port: int, tab_id: str) -> dict[str, Any]:
126
+ _browser = browser_manager.get_browser(browser_port)
127
+ _browser.close_tabs(tab_id)
128
+ return dp_mcp_message_pack(f"已将tab页:【{tab_id}】关闭")
129
+
130
+
131
+ def register_check_selector(mcp: FastMCP, browser_manager):
132
+ @mcp.tool(name="check_selector",
133
+ description="查找tab页中是否包含元素,并返回元素attr_name所对应的值。"
134
+ "当要选择的元素包含过多元素时,需要传入offset和page_size来分批查看元素,一般不建议调整page_size,更推荐你调整offset"
135
+ "同时如果单个元素属性值太长,函数会进行截断。一般的单个元素的属性值超过300个字符的就会触发截断,截断后会在最后拼接'...'")
136
+ async def check_selector(browser_port: int, tab_id: str, css_selector: str, attr_name: str = "text",
137
+ offset: int = 0, page_size: int = 10) -> dict[
138
+ str, Any]:
139
+ _browser = browser_manager.get_browser(browser_port)
140
+ target_tab = _browser.get_tab(tab_id)
141
+ css_selector = css_selector
142
+ if "css:" not in css_selector:
143
+ css_selector = "css:" + css_selector
144
+ target_eles = target_tab.eles(css_selector)
145
+ exist_flag = False
146
+ if len(target_eles) != 0:
147
+ exist_flag = True
148
+ if len(target_eles) > page_size:
149
+ target_eles = target_eles[offset:offset + page_size]
150
+ slice_seg = max(300, one_turn_max_token // (page_size + 6))
151
+ if attr_name == "text":
152
+ ele_attr_list = [i.text.replace("\n", "") for i in target_eles]
153
+ ele_attr_list = [attr_str[:slice_seg] for attr_str in ele_attr_list]
154
+ # 如果经过截断遍历后的字符串长度与截断长度相等,则默认截断了
155
+ ele_attr_list = [attr_str + "..." if len(attr_str) == slice_seg else attr_str for attr_str in ele_attr_list]
156
+ attr_output = "\n".join(ele_attr_list)
157
+ else:
158
+ ele_attr_list = [i.attr(attr_name) for i in target_eles]
159
+ ele_attr_list = [attr_str[:slice_seg] for attr_str in ele_attr_list if attr_str]
160
+ ele_attr_list = [attr_str + "..." if len(attr_str) == slice_seg else attr_str for attr_str in ele_attr_list]
161
+ attr_output = json.dumps(ele_attr_list, ensure_ascii=False)
162
+ # 对attr_output逐个截断,截断的长度为:一轮最大token除以元素个数+3个点+两个引号和逗号
163
+ return dp_mcp_message_pack(
164
+ f"已完成tab页:【{tab_id}】对:【{css_selector}】的检查",
165
+ tab_id=tab_id,
166
+ selector=css_selector,
167
+ selector_ele_exist=exist_flag,
168
+ page_size=page_size,
169
+ offset=offset,
170
+ attr_output=attr_output
171
+ )
172
+
173
+
174
+ def register_quit_browser(mcp: FastMCP, browser_manager):
175
+ @mcp.tool(name="quit_browser", description="退出浏览器会话,关闭浏览器")
176
+ async def quit_browser(browser_port: int) -> dict[str, Any]:
177
+ flag, _browser = browser_manager.remove_page(browser_port)
178
+ if flag:
179
+ _browser.quit()
180
+ return dp_mcp_message_pack(
181
+ f"浏览器[{browser_port}],退出会话,关闭浏览器{'成功' if flag else '失败'}",
182
+ browser_port=browser_port,
183
+ quit_flag=flag
184
+ )
185
+
186
+
187
+ def register_assert_waf(mcp: FastMCP, browser_manager):
188
+ @mcp.tool(name="assert_waf",
189
+ description="通过对比requests、有头浏览器、无头浏览器获取到的html,判断网页是否使用了waf以及是否为动态渲染的网页")
190
+ async def assert_waf(browser_port: int, tab_id: str) -> dict[str, Any]:
191
+ _browser = browser_manager.get_browser(browser_port)
192
+ target_tab = _browser.get_tab(tab_id)
193
+ recommend_team = "drissionpage_head"
194
+ head_cookies = target_tab.cookies()
195
+ # 通过cookie判断是否有waf
196
+ waf_flag, waf_type = assert_waf_cookie(head_cookies)
197
+ head_html = target_tab.html
198
+ min_head_html, head_rate = compress_html(head_html, only_text=True)
199
+ raw_html, status_code = requests_html(target_tab.url)
200
+ min_raw_html, raw_rate = compress_html(raw_html, only_text=True)
201
+ r_h_rate_diff = abs(head_rate - raw_rate)
202
+ # 如果有已知的防火墙,则不浪费时间使用无头获取html和压缩比了
203
+ if waf_flag or status_code in waf_status_code_dict.keys():
204
+ return dp_mcp_message_pack(
205
+ f"已完成tab页:【{tab_id}】的分析,该tab页存在waf",
206
+ tab_id=tab_id,
207
+ recommend_team=recommend_team,
208
+ raw_head_rate_difference=r_h_rate_diff,
209
+ raw_headless_rate_difference=0,
210
+ head_headless_rate_difference=0
211
+ )
212
+
213
+ headless_html = dp_headless_html(target_tab.url)
214
+ min_headless_html, headless_rate = compress_html(headless_html, only_text=True)
215
+ r_hless_rate_diff = abs(raw_rate - headless_rate)
216
+ h_hless_rate_diff = abs(head_rate - headless_rate)
217
+ # 最优情况:requests,dp有头和无头拿到的结果基本一致,认定为没有防护的静态网页
218
+ if r_h_rate_diff < 40 and r_hless_rate_diff < 40 and h_hless_rate_diff < 40:
219
+ recommend_team = "requests"
220
+ # 最差情况:requests,dp有头和无头拿到的结果差距都很大,认定为有浏览器无头检测+动态网页
221
+ # if r_h_rate_diff < 40 and r_hless_rate_diff < 40 and h_hless_rate_diff < 40:
222
+ # 较差1:dp有头和无头差距很小,但是requests拿不到正确结果,认定为有requests防护 or 动态网页
223
+ elif h_hless_rate_diff < 30 and r_hless_rate_diff > 40:
224
+ recommend_team = "drissionpage_headless"
225
+ # 较差2:有头和无头差距很大,但是requests和有头拿到的结果基本一致,认定为要么有别的没有防护requests的waf,或者间歇性的瑞数【此时应该拿有头的cookie去判断其中是否有瑞数特征,上面已经做了】
226
+ # if r_h_rate_diff < 15 and h_hless_rate_diff > 40:
227
+ return dp_mcp_message_pack(
228
+ f"已完成tab页:【{tab_id}】的分析,该tab页存在waf",
229
+ tab_id=tab_id,
230
+ recommend_team=recommend_team,
231
+ raw_head_rate_difference=r_h_rate_diff,
232
+ raw_headless_rate_difference=h_hless_rate_diff,
233
+ head_headless_rate_difference=h_hless_rate_diff
234
+ )
235
+
236
+
237
+ def register_click_action(mcp: FastMCP, browser_manager):
238
+ @mcp.tool(name="click_action", description="尝试点击tab页中的元素,返回元素是否可以被点击,以及是否点击成功。")
239
+ async def click_action(browser_port: int, tab_id: str, css_selector: str) -> dict[str, Any]:
240
+ _browser = browser_manager.get_browser(browser_port)
241
+ target_tab = _browser.get_tab(tab_id)
242
+ css_selector = css_selector
243
+ if "css:" not in css_selector:
244
+ css_selector = "css:" + css_selector
245
+ target_eles = target_tab.eles(css_selector)
246
+ click_success = False
247
+ element_clickable = False
248
+ if len(target_eles) == 1:
249
+ target_element = target_eles[0]
250
+ element_clickable = target_element.states.is_clickable
251
+ try:
252
+ target_element.click()
253
+ click_success = True
254
+ except Exception as e:
255
+ click_success = False
256
+ message = f"tab页:【{tab_id}】点击【{css_selector}】 {'成功' if click_success else '失败'} 了"
257
+ else:
258
+ message = f"tab页:【{tab_id}】传入的css_selector找到了{len(target_eles)}个元素,请确保传入的css_selector可以找到唯一的一个元素"
259
+ return dp_mcp_message_pack(
260
+ message=message,
261
+ browser_port=browser_port,
262
+ tab_id=tab_id,
263
+ css_selector=css_selector,
264
+ element_clickable=element_clickable,
265
+ click_success=click_success,
266
+ extra_message="点击成功,页面可能有更新,请重新获取页面html,并重新分析页面Selector" if click_success else ""
267
+ )
268
+
269
+
270
+ def register_scroll_action(mcp: FastMCP, browser_manager):
271
+ @mcp.tool(name="scroll_action", description="尝试滚动tab页"
272
+ "forward参数是滚动的方向:down、up、left、right"
273
+ "pixel参数是滚动的像素值,默认为None。"
274
+ "当forward为down且pixel为None,则将页面滚动到垂直中间位置,水平位置不变"
275
+ "当forward为up且pixel为None,则将页面滚动到顶部,水平位置不变"
276
+ "当forward为left且pixel为None,则将页面滚动到最左边,垂直位置不变"
277
+ "当forward为right且pixel为None,则将页面滚动到最右边,垂直位置不变")
278
+ async def scroll_action(browser_port: int, tab_id: str, forward: str = "down", pixel: int = None) -> dict[str, Any]:
279
+ _browser = browser_manager.get_browser(browser_port)
280
+ target_tab = _browser.get_tab(tab_id)
281
+ if forward == "down":
282
+ if pixel is None:
283
+ target_tab.scroll.to_half()
284
+ target_tab.scroll.down(pixel)
285
+ elif forward == "up":
286
+ if pixel is None:
287
+ target_tab.scroll.to_top()
288
+ target_tab.scroll.up(pixel)
289
+ elif forward == "left":
290
+ if pixel is None:
291
+ target_tab.scroll.to_leftmost()
292
+ target_tab.scroll.left(pixel)
293
+ elif forward == "right":
294
+ if pixel is None:
295
+ target_tab.scroll.to_rightmost()
296
+ target_tab.scroll.right(pixel)
297
+ else:
298
+ if pixel is None:
299
+ target_tab.scroll.to_half()
300
+ target_tab.scroll.down()
301
+ message = f"已完成对tab页:【{tab_id}】forward={forward} 的滑动"
302
+ return dp_mcp_message_pack(
303
+ message=message,
304
+ browser_port=browser_port,
305
+ tab_id=tab_id,
306
+ )
@@ -2,6 +2,7 @@ from fastmcp import FastMCP
2
2
 
3
3
  from mcp_tools.dp_tools import *
4
4
  from tools.browser_manager import browser_manager
5
+ from tools.browser_proxy import client_manager
5
6
 
6
7
  mcp = FastMCP("Jarvis Brain Mcp Tools")
7
8
 
@@ -13,18 +14,22 @@ if "TeamNode-Dp" in enabled_modules:
13
14
  # 页面管理
14
15
  register_close_tab(mcp, browser_manager)
15
16
  register_switch_tab(mcp, browser_manager)
16
- register_get_new_tab(mcp, browser_manager)
17
- # 功能
18
- register_visit_url(mcp, browser_manager)
17
+ register_get_new_tab(mcp, browser_manager, client_manager)
18
+ # 基础功能
19
+ register_visit_url(mcp, browser_manager, client_manager)
19
20
  register_get_html(mcp, browser_manager)
20
21
  register_check_selector(mcp, browser_manager)
22
+ register_pop_first_packet(mcp, browser_manager, client_manager)
23
+ # 页面交互
24
+ register_click_action(mcp, browser_manager)
25
+ register_scroll_action(mcp, browser_manager)
21
26
 
22
27
  if "JarvisNode" in enabled_modules:
23
28
  register_assert_waf(mcp, browser_manager)
24
29
 
25
30
 
26
31
  def main():
27
- mcp.run(transport="stdio")
32
+ mcp.run(transport="stdio",show_banner=False)
28
33
 
29
34
 
30
35
  if __name__ == '__main__':
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "Jarvis_Brain" # 别人下载时用的名字,必须在 PyPI 上唯一
3
- version = "0.1.5.5"
3
+ version = "0.1.10.0"
4
4
  description = "Jarvis brain mcp"
5
5
  dependencies = [
6
6
  "fastmcp",
@@ -3,6 +3,10 @@ import random
3
3
  from typing import Optional, Tuple
4
4
  import os
5
5
  from DrissionPage import ChromiumPage, ChromiumOptions
6
+ import platform
7
+ from DrissionPage.common import Settings
8
+
9
+ Settings.set_raise_when_click_failed(True)
6
10
 
7
11
 
8
12
  class BrowserManager:
@@ -15,13 +19,17 @@ class BrowserManager:
15
19
  cls._instance.browser_pool = {}
16
20
  return cls._instance
17
21
 
18
- def create_browser(self) -> Tuple[int, ChromiumPage]:
22
+ def create_browser(self, user_agent: str = None) -> Tuple[int, ChromiumPage]:
19
23
  """创建新的浏览器实例"""
20
24
  random_port = random.randint(9223, 9934)
21
25
  while random_port in self.browser_pool:
22
26
  random_port = random.randint(9223, 9934)
23
27
 
24
28
  co = ChromiumOptions().set_local_port(random_port)
29
+ if user_agent:
30
+ co.set_user_agent(user_agent)
31
+ if platform.system() != 'Windows':
32
+ co.set_argument('--no-sandbox')
25
33
  custom_data_dir = os.path.join(os.path.expanduser('~'), 'DrissionPage', "userData", f"{random_port}")
26
34
  co.set_user_data_path(custom_data_dir) # 设置用户数据路径
27
35
  # if not os.path.exists(custom_data_dir):
@@ -0,0 +1,184 @@
1
+ import threading
2
+ from collections import deque
3
+ import time
4
+ from DrissionPage import ChromiumPage, ChromiumOptions
5
+ from DrissionPage._pages.chromium_tab import ChromiumTab
6
+ from DrissionPage._units.listener import DataPacket
7
+ from typing import Tuple, Optional
8
+ import json
9
+ from urllib.parse import urlparse, urlunparse
10
+
11
+ one_turn_max_token = 16000
12
+
13
+
14
+ class DPProxyClient:
15
+ def __init__(self, driver: ChromiumTab, packet_filter: dict, self_kill=False):
16
+ self.tab_id = driver.tab_id
17
+ self.driver = ChromePageProxy(driver, self)
18
+ self.thread = None
19
+ self.self_kill = self_kill
20
+ self.packet_filter = packet_filter
21
+ self.packet_queue = deque()
22
+
23
+ def get_driver(self, start_listen, count=None, timeout=10) -> ChromiumTab:
24
+ """
25
+ 获取代理后的driver、tab
26
+ :param start_listen: 若你自己写的代码里已经使用自动化框架监听发包的功能了,则该值应该置为False。若没监听,则必须将该值置为True
27
+ :param count: 需捕获的数据包总数,为None表示无限
28
+ :param timeout: 两个数据包之间等待的最大时长(秒),为None表示无限,默认为10秒
29
+ :return:
30
+ """
31
+ if start_listen:
32
+ self.driver.listen.set_targets(res_type=('xhr', 'fetch'))
33
+ self.driver.listen.start()
34
+ self.thread = threading.Thread(target=self.start_listen, args=(count, timeout,))
35
+ self.thread.start()
36
+ return self.driver
37
+
38
+ def start_listen(self, count=None, timeout=10):
39
+ for _ in self.driver.listen.steps(count=count, timeout=timeout, gap=1):
40
+ pass
41
+
42
+ # 每次调用函数,都从队列的左端弹出一个数据包
43
+ def pop_first_packet(self):
44
+ if self.packet_queue:
45
+ result = self.packet_queue.popleft()
46
+ current_queue_size = len(self.packet_queue)
47
+ return current_queue_size, json.dumps(result, ensure_ascii=False, separators=(',', ':')).replace("\\", "")
48
+ else:
49
+ return 0, None
50
+
51
+
52
+ class DPProxyClientManager:
53
+ """浏览器池管理器 - 使用单例模式"""
54
+ _instance = None
55
+
56
+ def __new__(cls):
57
+ if cls._instance is None:
58
+ cls._instance = super().__new__(cls)
59
+ cls._instance.tab_pool = {}
60
+ return cls._instance
61
+
62
+ def create_client(self, tab: ChromiumTab, packet_filter: dict, self_kill=False) -> Tuple[
63
+ str, DPProxyClient, ChromiumTab]:
64
+ """创建新的tab页面代理实例"""
65
+ client = DPProxyClient(tab, packet_filter, self_kill=self_kill)
66
+ tab = client.get_driver(True, timeout=60 * 10)
67
+ tab_id = tab.tab_id
68
+ self.tab_pool[tab_id] = {"client": client, "driver": tab}
69
+ return tab_id, client, tab
70
+
71
+ def get_client(self, tab_id: str) -> Optional[DPProxyClient]:
72
+ """根据端口获取浏览器实例"""
73
+ return self.tab_pool.get(tab_id).get("client", None)
74
+
75
+ def remove_client(self, tab_id: str) -> Tuple[bool, Optional[ChromiumPage]]:
76
+ """根据端口移除浏览器实例"""
77
+ client = self.tab_pool.pop(tab_id, None)
78
+ return client is not None, client
79
+
80
+ def list_clients(self) -> list[int]:
81
+ """列出所有活跃的浏览器端口"""
82
+ return list(self.tab_pool.keys())
83
+
84
+
85
+ class ChromePageProxy:
86
+ def __init__(self, page, client=None):
87
+ self.__dict__['page'] = page
88
+ self.__dict__['client'] = client
89
+
90
+ def __getattr__(self, item):
91
+ attr = getattr(self.page, item)
92
+ print(item, attr)
93
+ if item == 'listen':
94
+ listen_proxy = DrissionPageListenerProxy(attr, self.__dict__['client'])
95
+ return listen_proxy
96
+ return attr
97
+
98
+
99
+ class DrissionPageListenerProxy:
100
+ def __init__(self, listener, client=None):
101
+ self.listener = listener
102
+ self.client = client
103
+
104
+ def __getattr__(self, item):
105
+ attr = getattr(self.listener, item)
106
+ # 当监听到wait被调用的时候
107
+ if item == "wait":
108
+ def wrapper(*args, **kwargs):
109
+ result = attr(*args, **kwargs)
110
+ check_data_packet(result, self.client)
111
+ return result
112
+
113
+ return wrapper
114
+ # 当监听到steps被调用的时候
115
+ if item == "steps":
116
+ def wrapper(*args, **kwargs):
117
+ if kwargs.get("gap", 1) > 1:
118
+ raise Exception("暂不支持多包监控")
119
+ result = attr(*args, **kwargs)
120
+ if attr.__name__ == "steps":
121
+ for step in result:
122
+ check_data_packet(step, self.client)
123
+ yield step
124
+
125
+ return wrapper
126
+ return attr
127
+
128
+
129
+ def check_data_packet(packet: DataPacket, client: DPProxyClient):
130
+ """
131
+ 封装监听到的数据包,并将其存放在client的packet_queue中
132
+ :param packet:
133
+ :param client:
134
+ :return:
135
+ """
136
+ url = packet.url
137
+ method = packet.request.method
138
+ data = None
139
+ if packet.request.hasPostData:
140
+ data = packet.request.postData
141
+ domain = urlparse(url).netloc
142
+ body = packet.response.body
143
+ body_str = json.dumps(body, ensure_ascii=False, separators=(',', ':'))
144
+ body_str_list = [body_str[i:i + one_turn_max_token] for i in range(0, len(body_str), one_turn_max_token)]
145
+ body_completed = True
146
+ packet_filter = client.packet_filter
147
+ domain_filter = packet_filter.get("domain_filter", None)
148
+ method_filter = packet_filter.get("method_filter", ["GET", "POST"])
149
+ for index, body_str in enumerate(body_str_list):
150
+ # 如果给了domain_filter并且domain没有在domain_filter中时跳过该数据包
151
+ if domain_filter and domain not in domain_filter:
152
+ continue
153
+ # 如果method没有在method_filter中,则跳过该数据包
154
+ if method not in method_filter:
155
+ continue
156
+ if (index + 1) != len(body_str_list):
157
+ body_completed = False
158
+ temp_dict = {
159
+ "url": url,
160
+ "body_completed": body_completed,
161
+ "method": method,
162
+ "request_data": data,
163
+ "request_headers": dict(packet.request.headers),
164
+ "response_headers": dict(packet.response.headers),
165
+ "response_body_segment": body_str.replace("\\", ""),
166
+ }
167
+ client.packet_queue.append(temp_dict)
168
+
169
+
170
+ client_manager = DPProxyClientManager()
171
+
172
+ # if __name__ == '__main__':
173
+ # co = ChromiumOptions().set_user_agent(
174
+ # "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/143.0.0.0 Mobile Safari/537.36")
175
+ # tab = ChromiumPage(co).latest_tab
176
+ # client = DPProxyClient(tab, self_kill=False)
177
+ # # client = CaptchaClient(tab, self_kill=True)
178
+ # tab = client.get_driver(True)
179
+ # url = "https://api.toutiaoapi.com/feoffline/hotspot_and_local/html/hot_list/index.html?client_extra_params=%7B%22custom_log_pb%22%3A%22%7B%5C%22style_id%5C%22%3A%5C%2240030%5C%22%2C%5C%22entrance_hotspot%5C%22%3A%5C%22search%5C%22%2C%5C%22location%5C%22%3A%5C%22hot_board%5C%22%2C%5C%22category_name%5C%22%3A%5C%22hotboard_light%5C%22%7D%22%7D&count=50&log_pb=%7B%22style_id%22%3A%2240030%22%2C%22entrance_hotspot%22%3A%22search%22%2C%22location%22%3A%22hot_board%22%2C%22category_name%22%3A%22hotboard_light%22%7D&only_hot_list=1&tab_name=stream&enter_keyword=%23%E7%BE%8E%E5%9B%BD%E9%80%80%E5%87%BA66%E4%B8%AA%E5%9B%BD%E9%99%85%E7%BB%84%E7%BB%87%23"
180
+ # tab.get(url)
181
+ # for _ in range(5056):
182
+ # new_packet = client.pop_first_packet()
183
+ # print(new_packet, "23")
184
+ # time.sleep(1)
@@ -7,6 +7,87 @@ from bs4 import BeautifulSoup
7
7
  from curl_cffi import requests
8
8
  from lxml import html, etree
9
9
 
10
+ compress_html_js = """
11
+ function getSimplifiedDOM(node) {
12
+ // 1. 处理文本节点
13
+ if (node.nodeType === Node.TEXT_NODE) {
14
+ const text = node.textContent.trim();
15
+ // 限制文本长度,避免大段文章消耗 token,保留前100个字符通常足够定位
16
+ return text ? text.slice(0, 100) + (text.length > 100 ? '...' : '') : null;
17
+ }
18
+
19
+ // 2. 过滤无用标签
20
+ const ignoreTags = ['SCRIPT', 'STYLE', 'NOSCRIPT', 'IFRAME', 'SVG', 'LINK', 'META'];
21
+ if (ignoreTags.includes(node.tagName)) return null;
22
+ if (node.nodeType !== Node.ELEMENT_NODE) return null;
23
+
24
+ // 3. 过滤不可见元素
25
+ const style = window.getComputedStyle(node);
26
+ if (style.display === 'none' || style.visibility === 'hidden' || style.opacity === '0') return null;
27
+ // 过滤宽高太小的元素(往往是埋点空像素)
28
+ const rect = node.getBoundingClientRect();
29
+ if (rect.width === 0 || rect.height === 0) return null;
30
+
31
+ // --- 开始构建标签字符串 ---
32
+ const tagName = node.tagName.toLowerCase();
33
+ let tagStr = tagName;
34
+
35
+ // A. 基础标识符 (ID 和 Class)
36
+ if (node.id) tagStr += `#${node.id}`;
37
+ if (node.className && typeof node.className === 'string') {
38
+ // 过滤掉 Tailwind 等太长且无语义的 class,保留有意义的业务 class
39
+ // 这里简单处理,全部保留,让 LLM 自己判断
40
+ const classes = node.className.trim().split(/\s+/);
41
+ if (classes.length > 0) tagStr += `.${classes.join('.')}`;
42
+ }
43
+
44
+ // B. 关键属性白名单 (这是你指出问题的核心修复)
45
+ const props = [];
46
+
47
+ // 通用重要属性
48
+ if (node.getAttribute('role')) props.push(`role="${node.getAttribute('role')}"`);
49
+ if (node.getAttribute('aria-label')) props.push(`aria-label="${node.getAttribute('aria-label')}"`);
50
+ if (node.getAttribute('title')) props.push(`title="${node.getAttribute('title')}"`);
51
+
52
+ // 特定标签的特定属性
53
+ if (tagName === 'a') {
54
+ const href = node.getAttribute('href');
55
+ // 只保留有意义的链接,忽略 javascript:;
56
+ if (href && !href.startsWith('javascript')) props.push(`href="${href}"`);
57
+ } else if (tagName === 'input' || tagName === 'textarea' || tagName === 'select') {
58
+ if (node.getAttribute('type')) props.push(`type="${node.getAttribute('type')}"`);
59
+ if (node.getAttribute('name')) props.push(`name="${node.getAttribute('name')}"`);
60
+ if (node.getAttribute('placeholder')) props.push(`placeholder="${node.getAttribute('placeholder')}"`);
61
+ if (node.disabled) props.push('disabled');
62
+ if (node.checked) props.push('checked');
63
+ } else if (tagName === 'button') {
64
+ if (node.getAttribute('type')) props.push(`type="${node.getAttribute('type')}"`);
65
+ } else if (tagName === 'img') {
66
+ if (node.getAttribute('alt')) props.push(`alt="${node.getAttribute('alt')}"`);
67
+ }
68
+
69
+ if (props.length > 0) {
70
+ tagStr += ` ${props.join(' ')}`;
71
+ }
72
+
73
+ // 4. 递归子节点
74
+ const children = Array.from(node.childNodes)
75
+ .map(getSimplifiedDOM)
76
+ .filter(n => n !== null);
77
+
78
+ // 5. 组装输出
79
+ // 如果没有子节点,也没有ID/Class,也不是输入框/图片/链接,那这个标签可能只是布局用的 div,可以考虑跳过它直接返回子节点内容
80
+ // 但为了保持结构完整,我们暂时保留它
81
+ if (children.length === 0) {
82
+ // 自闭合标签或空标签
83
+ return `<${tagStr} />`;
84
+ }
85
+ return `<${tagStr}>${children.join('')}</${tagName}>`; // 结束标签只保留 tagName 节省 token
86
+ }
87
+
88
+ return getSimplifiedDOM(document.body);
89
+ """
90
+
10
91
 
11
92
  # 使用requests获取html,用于测试是否使用了瑞数和jsl
12
93
  def requests_html(url):
@@ -53,6 +134,15 @@ def compress_html(content, only_text=False):
53
134
  for meta in doc.xpath('//meta'):
54
135
  meta.getparent().remove(meta)
55
136
 
137
+ for svg in doc.xpath('//svg'):
138
+ # 获取 SVG 内的文本内容
139
+ text_content = svg.text_content()
140
+ # 创建一个新的文本节点替换 SVG
141
+ parent = svg.getparent()
142
+ if parent is not None:
143
+ parent.text = (parent.text or '') + text_content
144
+ parent.remove(svg)
145
+
56
146
  # 删除 style 属性
57
147
  for element in doc.xpath('//*[@style]'):
58
148
  element.attrib.pop('style')
@@ -1,166 +0,0 @@
1
- import json
2
- import os
3
- from typing import Any
4
-
5
- from DrissionPage._elements.none_element import NoneElement
6
- from fastmcp import FastMCP
7
-
8
- from tools.tools import compress_html, requests_html, dp_headless_html, assert_waf_cookie, dp_mcp_message_pack
9
-
10
- html_source_code_local_save_path = os.path.join(os.getcwd(), "html-source-code")
11
- waf_status_code_dict = {
12
- 412: "瑞数",
13
- 521: "加速乐"
14
- }
15
-
16
-
17
- def register_visit_url(mcp: FastMCP, browser_manager):
18
- @mcp.tool(name="visit_url", description="使用Drissionpage打开url访问某个网站")
19
- async def visit_url(url: str) -> dict[str, Any]:
20
- port, _browser = browser_manager.create_browser()
21
- tab = _browser.get_tab()
22
- tab.get(url)
23
- tab_id = tab.tab_id
24
- return dp_mcp_message_pack(
25
- f"已在[{port}]端口创建浏览器对象,并已打开链接:{url}",
26
- tab_id=tab_id,
27
- browser_port=port
28
- )
29
-
30
-
31
- def register_get_html(mcp: FastMCP, browser_manager):
32
- @mcp.tool(name="get_html", description="使用Drissionpage获取某一个tab页的html")
33
- async def get_html(browser_port: int, tab_id: str) -> dict[str, Any]:
34
- _browser = browser_manager.get_browser(browser_port)
35
- tab = _browser.get_tab(tab_id)
36
- file_name = tab.title + f"_{tab_id}.html"
37
- if not os.path.exists(html_source_code_local_save_path):
38
- os.makedirs(html_source_code_local_save_path)
39
- abs_path = os.path.join(html_source_code_local_save_path, file_name)
40
- with open(abs_path, "w", encoding="utf-8") as f:
41
- min_html, compress_rate = compress_html(tab.html)
42
- f.write(min_html)
43
- return dp_mcp_message_pack(f"已保存tab页:【{tab_id}】的html源码", tab_id=tab_id, html_local_path=abs_path)
44
-
45
-
46
- def register_get_new_tab(mcp: FastMCP, browser_manager):
47
- @mcp.tool(name="get_new_tab", description="使用Drissionpage创建一个新的tab页,在新的tab页中打开url")
48
- async def get_new_tab(browser_port: int, url: str) -> dict[str, Any]:
49
- _browser = browser_manager.get_browser(browser_port)
50
- tab = _browser.new_tab(url)
51
- _browser.activate_tab(tab)
52
- tab_id = tab.tab_id
53
- return dp_mcp_message_pack(f"已创建新的tab页,并打开链接:{url}", tab_id=tab_id)
54
-
55
-
56
- def register_switch_tab(mcp: FastMCP, browser_manager):
57
- @mcp.tool(name="switch_tab", description="根据传入的tab_id切换到对应的tab页", )
58
- async def switch_tab(browser_port: int, tab_id: str) -> dict[str, Any]:
59
- _browser = browser_manager.get_browser(browser_port)
60
- _browser.activate_tab(tab_id)
61
- return dp_mcp_message_pack(f"已将tab页:【{tab_id}】切换至最前端")
62
-
63
-
64
- def register_close_tab(mcp: FastMCP, browser_manager):
65
- @mcp.tool(name="close_tab", description="根据传入的tab_id关闭tab页", )
66
- async def close_tab(browser_port, tab_id) -> dict[str, Any]:
67
- _browser = browser_manager.get_browser(browser_port)
68
- _browser.close_tabs(tab_id)
69
- return dp_mcp_message_pack(f"已将tab页:【{tab_id}】关闭")
70
-
71
-
72
- def register_check_selector(mcp: FastMCP, browser_manager):
73
- @mcp.tool(name="check_selector", description="查找tab页中是否包含元素,并返回元素attr_name所对应的值")
74
- async def check_selector(browser_port: int, tab_id: str, css_selector: str, attr_name: str = "text") -> dict[str, Any]:
75
- _browser = browser_manager.get_browser(browser_port)
76
- target_tab = _browser.get_tab(tab_id)
77
- css_selector = css_selector
78
- if "css:" not in css_selector:
79
- css_selector = "css:" + css_selector
80
- target_eles = target_tab.eles(css_selector)
81
- exist_flag = False
82
- if len(target_eles) != 0:
83
- exist_flag = True
84
- if attr_name == "text":
85
- ele_text_list = [i.text.replace("\n", "") for i in target_eles]
86
- attr_output = "\n".join(ele_text_list)
87
- else:
88
- attr_output = json.dumps([i.attr(attr_name) for i in target_eles])
89
- return dp_mcp_message_pack(
90
- f"已完成tab页:【{tab_id}】对:【{css_selector}】的检查",
91
- tab_id=tab_id,
92
- selector=css_selector,
93
- selector_ele_exist=exist_flag,
94
- attr_output=attr_output
95
- )
96
-
97
-
98
- def register_quit_browser(mcp: FastMCP, browser_manager):
99
- @mcp.tool(name="quit_browser", description="退出浏览器会话,关闭浏览器")
100
- async def quit_browser(browser_port: int) -> dict[str, Any]:
101
- flag, _browser = browser_manager.remove_page(browser_port)
102
- if flag:
103
- _browser.quit()
104
- return dp_mcp_message_pack(
105
- f"浏览器[{browser_port}],退出会话,关闭浏览器{'成功' if flag else '失败'}",
106
- browser_port=browser_port,
107
- quit_flag=flag
108
- )
109
-
110
-
111
- def register_assert_waf(mcp: FastMCP, browser_manager):
112
- @mcp.tool(name="assert_waf",
113
- description="通过对比requests、有头浏览器、无头浏览器获取到的html,判断网页是否使用了waf以及是否为动态渲染的网页")
114
- async def assert_waf(browser_port: int, tab_id: str) -> dict[str, Any]:
115
- _browser = browser_manager.get_browser(browser_port)
116
- target_tab = _browser.get_tab(tab_id)
117
- recommend_team = "drissionpage_head"
118
- head_cookies = target_tab.cookies()
119
- # 通过cookie判断是否有waf
120
- waf_flag, waf_type = assert_waf_cookie(head_cookies)
121
- head_html = target_tab.html
122
- min_head_html, head_rate = compress_html(head_html, only_text=True)
123
- raw_html, status_code = requests_html(target_tab.url)
124
- min_raw_html, raw_rate = compress_html(raw_html, only_text=True)
125
- r_h_rate_diff = abs(head_rate - raw_rate)
126
- # 如果有已知的防火墙,则不浪费时间使用无头获取html和压缩比了
127
- if waf_flag or status_code in waf_status_code_dict.keys():
128
- return dp_mcp_message_pack(
129
- f"已完成tab页:【{tab_id}】的分析,该tab页存在waf",
130
- tab_id=tab_id,
131
- recommend_team=recommend_team,
132
- raw_head_rate_difference=r_h_rate_diff,
133
- raw_headless_rate_difference=0,
134
- head_headless_rate_difference=0
135
- )
136
-
137
- headless_html = dp_headless_html(target_tab.url)
138
- min_headless_html, headless_rate = compress_html(headless_html, only_text=True)
139
- r_hless_rate_diff = abs(raw_rate - headless_rate)
140
- h_hless_rate_diff = abs(head_rate - headless_rate)
141
- # 最优情况:requests,dp有头和无头拿到的结果基本一致,认定为没有防护的静态网页
142
- if r_h_rate_diff < 40 and r_hless_rate_diff < 40 and h_hless_rate_diff < 40:
143
- recommend_team = "requests"
144
- # 最差情况:requests,dp有头和无头拿到的结果差距都很大,认定为有浏览器无头检测+动态网页
145
- # if r_h_rate_diff < 40 and r_hless_rate_diff < 40 and h_hless_rate_diff < 40:
146
- # 较差1:dp有头和无头差距很小,但是requests拿不到正确结果,认定为有requests防护 or 动态网页
147
- elif h_hless_rate_diff < 30 and r_hless_rate_diff > 40:
148
- recommend_team = "drissionpage_headless"
149
- # 较差2:有头和无头差距很大,但是requests和有头拿到的结果基本一致,认定为要么有别的没有防护requests的waf,或者间歇性的瑞数【此时应该拿有头的cookie去判断其中是否有瑞数特征,上面已经做了】
150
- # if r_h_rate_diff < 15 and h_hless_rate_diff > 40:
151
- return dp_mcp_message_pack(
152
- f"已完成tab页:【{tab_id}】的分析,该tab页存在waf",
153
- tab_id=tab_id,
154
- recommend_team=recommend_team,
155
- raw_head_rate_difference=r_h_rate_diff,
156
- raw_headless_rate_difference=h_hless_rate_diff,
157
- head_headless_rate_difference=h_hless_rate_diff
158
- )
159
-
160
- # def register_highlight_element_captcha(mcp: FastMCP, browser_manager):
161
- # @mcp.tool(name="highlight_element_captcha",
162
- # description="将传入的Selector在页面上高亮,并截屏")
163
- # async def highlight_element_captcha(browser_port: int, tab_id: str, selector: str) -> dict[str, Any]:
164
- # _browser = browser_manager.get_browser(browser_port)
165
- # tab = _browser.get_tab(tab_id)
166
- # tab.ele
File without changes
File without changes