Jarvis-Brain 0.1.5.9__tar.gz → 0.1.5.10__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: Jarvis_Brain
3
- Version: 0.1.5.9
3
+ Version: 0.1.5.10
4
4
  Summary: Jarvis brain mcp
5
5
  Requires-Python: >=3.10
6
6
  Requires-Dist: beautifulsoup4
@@ -0,0 +1,176 @@
1
+ import hashlib
2
+ import json
3
+ import os
4
+ from typing import Any
5
+
6
+ from fastmcp import FastMCP
7
+
8
+ from tools.tools import compress_html, requests_html, dp_headless_html, assert_waf_cookie, dp_mcp_message_pack
9
+
10
+ html_source_code_local_save_path = os.path.join(os.getcwd(), "html-source-code")
11
+ waf_status_code_dict = {
12
+ 412: "瑞数",
13
+ 521: "加速乐"
14
+ }
15
+ # 一轮最大输入,以免单个html最大长度超过ai最大输入
16
+ ont_turn_max_token = 25000
17
+
18
+
19
+ def register_visit_url(mcp: FastMCP, browser_manager):
20
+ @mcp.tool(name="visit_url", description="使用Drissionpage打开url访问某个网站")
21
+ async def visit_url(url: str) -> dict[str, Any]:
22
+ port, _browser = browser_manager.create_browser()
23
+ tab = _browser.get_tab()
24
+ tab.get(url)
25
+ tab_id = tab.tab_id
26
+ return dp_mcp_message_pack(
27
+ f"已在[{port}]端口创建浏览器对象,并已打开链接:{url}",
28
+ tab_id=tab_id,
29
+ browser_port=port
30
+ )
31
+
32
+
33
+ def register_get_html(mcp: FastMCP, browser_manager):
34
+ @mcp.tool(name="get_html", description="使用Drissionpage获取某一个tab页的html")
35
+ async def get_html(browser_port: int, tab_id: str) -> dict[str, Any]:
36
+ _browser = browser_manager.get_browser(browser_port)
37
+ tab = _browser.get_tab(tab_id)
38
+ file_name = str(tab.title).replace("/", "_").replace(":", "_")
39
+ if not os.path.exists(html_source_code_local_save_path):
40
+ os.makedirs(html_source_code_local_save_path)
41
+ min_html, compress_rate = compress_html(tab.html)
42
+ html_str_list = [min_html[i:i + ont_turn_max_token] for i in range(0, len(min_html), ont_turn_max_token)]
43
+ html_file_list = []
44
+ for index, html_str in enumerate(html_str_list):
45
+ file_name = file_name + f"_{tab_id}_segment{index}.html"
46
+ abs_path = os.path.join(html_source_code_local_save_path, file_name)
47
+ with open(abs_path, "w", encoding="utf-8") as f:
48
+ f.write(html_str)
49
+ html_file_list.append(abs_path)
50
+ message = f"已保存tab页:【{tab_id}】的html源码片段共{len(html_file_list)}个"
51
+ return dp_mcp_message_pack(message, tab_id=tab_id, htmls_local_path=html_file_list)
52
+
53
+ def register_get_new_tab(mcp: FastMCP, browser_manager):
54
+ @mcp.tool(name="get_new_tab", description="使用Drissionpage创建一个新的tab页,在新的tab页中打开url")
55
+ async def get_new_tab(browser_port: int, url: str) -> dict[str, Any]:
56
+ _browser = browser_manager.get_browser(browser_port)
57
+ tab = _browser.new_tab(url)
58
+ _browser.activate_tab(tab)
59
+ tab_id = tab.tab_id
60
+ return dp_mcp_message_pack(f"已创建新的tab页,并打开链接:{url}", tab_id=tab_id)
61
+
62
+ def register_switch_tab(mcp: FastMCP, browser_manager):
63
+ @mcp.tool(name="switch_tab", description="根据传入的tab_id切换到对应的tab页", )
64
+ async def switch_tab(browser_port: int, tab_id: str) -> dict[str, Any]:
65
+ _browser = browser_manager.get_browser(browser_port)
66
+ _browser.activate_tab(tab_id)
67
+ return dp_mcp_message_pack(f"已将tab页:【{tab_id}】切换至最前端")
68
+
69
+ def register_close_tab(mcp: FastMCP, browser_manager):
70
+ @mcp.tool(name="close_tab", description="根据传入的tab_id关闭tab页", )
71
+ async def close_tab(browser_port, tab_id) -> dict[str, Any]:
72
+ _browser = browser_manager.get_browser(browser_port)
73
+ _browser.close_tabs(tab_id)
74
+ return dp_mcp_message_pack(f"已将tab页:【{tab_id}】关闭")
75
+
76
+ def register_check_selector(mcp: FastMCP, browser_manager):
77
+ @mcp.tool(name="check_selector", description="查找tab页中是否包含元素,并返回元素attr_name所对应的值")
78
+ async def check_selector(browser_port: int, tab_id: str, css_selector: str, attr_name: str = "text") -> dict[
79
+ str, Any]:
80
+ _browser = browser_manager.get_browser(browser_port)
81
+ target_tab = _browser.get_tab(tab_id)
82
+ css_selector = css_selector
83
+ if "css:" not in css_selector:
84
+ css_selector = "css:" + css_selector
85
+ target_eles = target_tab.eles(css_selector)
86
+ exist_flag = False
87
+ if len(target_eles) != 0:
88
+ exist_flag = True
89
+ if attr_name == "text":
90
+ ele_text_list = [i.text.replace("\n", "") for i in target_eles]
91
+ attr_output = "\n".join(ele_text_list)
92
+ else:
93
+ attr_output = json.dumps([i.attr(attr_name) for i in target_eles])
94
+ return dp_mcp_message_pack(
95
+ f"已完成tab页:【{tab_id}】对:【{css_selector}】的检查",
96
+ tab_id=tab_id,
97
+ selector=css_selector,
98
+ selector_ele_exist=exist_flag,
99
+ attr_output=attr_output
100
+ )
101
+
102
+ def register_quit_browser(mcp: FastMCP, browser_manager):
103
+ @mcp.tool(name="quit_browser", description="退出浏览器会话,关闭浏览器")
104
+ async def quit_browser(browser_port: int) -> dict[str, Any]:
105
+ flag, _browser = browser_manager.remove_page(browser_port)
106
+ if flag:
107
+ _browser.quit()
108
+ return dp_mcp_message_pack(
109
+ f"浏览器[{browser_port}],退出会话,关闭浏览器{'成功' if flag else '失败'}",
110
+ browser_port=browser_port,
111
+ quit_flag=flag
112
+ )
113
+
114
+ def register_assert_waf(mcp: FastMCP, browser_manager):
115
+ @mcp.tool(name="assert_waf",
116
+ description="通过对比requests、有头浏览器、无头浏览器获取到的html,判断网页是否使用了waf以及是否为动态渲染的网页")
117
+ async def assert_waf(browser_port: int, tab_id: str) -> dict[str, Any]:
118
+ _browser = browser_manager.get_browser(browser_port)
119
+ target_tab = _browser.get_tab(tab_id)
120
+ recommend_team = "drissionpage_head"
121
+ head_cookies = target_tab.cookies()
122
+ # 通过cookie判断是否有waf
123
+ waf_flag, waf_type = assert_waf_cookie(head_cookies)
124
+ head_html = target_tab.html
125
+ min_head_html, head_rate = compress_html(head_html, only_text=True)
126
+ raw_html, status_code = requests_html(target_tab.url)
127
+ min_raw_html, raw_rate = compress_html(raw_html, only_text=True)
128
+ r_h_rate_diff = abs(head_rate - raw_rate)
129
+ # 如果有已知的防火墙,则不浪费时间使用无头获取html和压缩比了
130
+ if waf_flag or status_code in waf_status_code_dict.keys():
131
+ return dp_mcp_message_pack(
132
+ f"已完成tab页:【{tab_id}】的分析,该tab页存在waf",
133
+ tab_id=tab_id,
134
+ recommend_team=recommend_team,
135
+ raw_head_rate_difference=r_h_rate_diff,
136
+ raw_headless_rate_difference=0,
137
+ head_headless_rate_difference=0
138
+ )
139
+
140
+ headless_html = dp_headless_html(target_tab.url)
141
+ min_headless_html, headless_rate = compress_html(headless_html, only_text=True)
142
+ r_hless_rate_diff = abs(raw_rate - headless_rate)
143
+ h_hless_rate_diff = abs(head_rate - headless_rate)
144
+ # 最优情况:requests,dp有头和无头拿到的结果基本一致,认定为没有防护的静态网页
145
+ if r_h_rate_diff < 40 and r_hless_rate_diff < 40 and h_hless_rate_diff < 40:
146
+ recommend_team = "requests"
147
+ # 最差情况:requests,dp有头和无头拿到的结果差距都很大,认定为有浏览器无头检测+动态网页
148
+ # if r_h_rate_diff < 40 and r_hless_rate_diff < 40 and h_hless_rate_diff < 40:
149
+ # 较差1:dp有头和无头差距很小,但是requests拿不到正确结果,认定为有requests防护 or 动态网页
150
+ elif h_hless_rate_diff < 30 and r_hless_rate_diff > 40:
151
+ recommend_team = "drissionpage_headless"
152
+ # 较差2:有头和无头差距很大,但是requests和有头拿到的结果基本一致,认定为要么有别的没有防护requests的waf,或者间歇性的瑞数【此时应该拿有头的cookie去判断其中是否有瑞数特征,上面已经做了】
153
+ # if r_h_rate_diff < 15 and h_hless_rate_diff > 40:
154
+ return dp_mcp_message_pack(
155
+ f"已完成tab页:【{tab_id}】的分析,该tab页存在waf",
156
+ tab_id=tab_id,
157
+ recommend_team=recommend_team,
158
+ raw_head_rate_difference=r_h_rate_diff,
159
+ raw_headless_rate_difference=h_hless_rate_diff,
160
+ head_headless_rate_difference=h_hless_rate_diff
161
+ )
162
+
163
+ def register_highlight_element_captcha(mcp: FastMCP, browser_manager):
164
+ @mcp.tool(name="highlight_element_captcha", description="将传入的Selector在页面上高亮,并截屏")
165
+ async def highlight_element_captcha(browser_port: int, tab_id: str, css_selector: str) -> dict[str, Any]:
166
+ _browser = browser_manager.get_browser(browser_port)
167
+ tab = _browser.get_tab(tab_id)
168
+ highlight_style = 'background-color: rgba(255, 255, 0, 0.2); outline: 2px solid yellow !important; outline-offset: 2px'
169
+ css_selector = css_selector
170
+ if "css:" not in css_selector:
171
+ css_selector = "css:" + css_selector
172
+
173
+ target_eles = tab.eles(css_selector)
174
+ exist_flag = False
175
+ if len(target_eles) != 0:
176
+ exist_flag = True
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "Jarvis_Brain" # 别人下载时用的名字,必须在 PyPI 上唯一
3
- version = "0.1.5.9"
3
+ version = "0.1.5.10"
4
4
  description = "Jarvis brain mcp"
5
5
  dependencies = [
6
6
  "fastmcp",
@@ -1,176 +0,0 @@
1
- import hashlib
2
- import json
3
- import os
4
- from typing import Any
5
-
6
- from fastmcp import FastMCP
7
-
8
- from tools.tools import compress_html, requests_html, dp_headless_html, assert_waf_cookie, dp_mcp_message_pack
9
-
10
- html_source_code_local_save_path = os.path.join(os.getcwd(), "html-source-code")
11
- waf_status_code_dict = {
12
- 412: "瑞数",
13
- 521: "加速乐"
14
- }
15
- # 一轮最大输入,以免单个html最大长度超过ai最大输入
16
- ont_turn_max_token = 25000
17
-
18
-
19
- def register_visit_url(mcp: FastMCP, browser_manager):
20
- @mcp.tool(name="visit_url", description="使用Drissionpage打开url访问某个网站")
21
- async def visit_url(url: str) -> dict[str, Any]:
22
- port, _browser = browser_manager.create_browser()
23
- tab = _browser.get_tab()
24
- tab.get(url)
25
- tab_id = tab.tab_id
26
- return dp_mcp_message_pack(
27
- f"已在[{port}]端口创建浏览器对象,并已打开链接:{url}",
28
- tab_id=tab_id,
29
- browser_port=port
30
- )
31
-
32
-
33
- def register_get_html(mcp: FastMCP, browser_manager):
34
- @mcp.tool(name="get_html", description="使用Drissionpage获取某一个tab页的html")
35
- async def get_html(browser_port: int, tab_id: str) -> dict[str, Any]:
36
- _browser = browser_manager.get_browser(browser_port)
37
- tab = _browser.get_tab(tab_id)
38
- file_name = str(tab.title).replace("/", "_").replace(":", "_")
39
- if not os.path.exists(html_source_code_local_save_path):
40
- os.makedirs(html_source_code_local_save_path)
41
- min_html, compress_rate = compress_html(tab.html)
42
- html_str_list = [min_html[i:i + ont_turn_max_token] for i in range(0, len(min_html), ont_turn_max_token)]
43
- html_file_list = []
44
- for index, html_str in enumerate(html_str_list):
45
- file_name = file_name + f"_{tab_id}_segment{index}.html"
46
- abs_path = os.path.join(html_source_code_local_save_path, file_name)
47
- with open(abs_path, "w", encoding="utf-8") as f:
48
- f.write(html_str)
49
- html_file_list.append(abs_path)
50
- message = f"已保存tab页:【{tab_id}】的html源码片段共{len(html_file_list)}个"
51
- return dp_mcp_message_pack(message, tab_id=tab_id, htmls_local_path=html_file_list)
52
-
53
- def register_get_new_tab(mcp: FastMCP, browser_manager):
54
- @mcp.tool(name="get_new_tab", description="使用Drissionpage创建一个新的tab页,在新的tab页中打开url")
55
- async def get_new_tab(browser_port: int, url: str) -> dict[str, Any]:
56
- _browser = browser_manager.get_browser(browser_port)
57
- tab = _browser.new_tab(url)
58
- _browser.activate_tab(tab)
59
- tab_id = tab.tab_id
60
- return dp_mcp_message_pack(f"已创建新的tab页,并打开链接:{url}", tab_id=tab_id)
61
-
62
- def register_switch_tab(mcp: FastMCP, browser_manager):
63
- @mcp.tool(name="switch_tab", description="根据传入的tab_id切换到对应的tab页", )
64
- async def switch_tab(browser_port: int, tab_id: str) -> dict[str, Any]:
65
- _browser = browser_manager.get_browser(browser_port)
66
- _browser.activate_tab(tab_id)
67
- return dp_mcp_message_pack(f"已将tab页:【{tab_id}】切换至最前端")
68
-
69
- def register_close_tab(mcp: FastMCP, browser_manager):
70
- @mcp.tool(name="close_tab", description="根据传入的tab_id关闭tab页", )
71
- async def close_tab(browser_port, tab_id) -> dict[str, Any]:
72
- _browser = browser_manager.get_browser(browser_port)
73
- _browser.close_tabs(tab_id)
74
- return dp_mcp_message_pack(f"已将tab页:【{tab_id}】关闭")
75
-
76
- def register_check_selector(mcp: FastMCP, browser_manager):
77
- @mcp.tool(name="check_selector", description="查找tab页中是否包含元素,并返回元素attr_name所对应的值")
78
- async def check_selector(browser_port: int, tab_id: str, css_selector: str, attr_name: str = "text") -> dict[
79
- str, Any]:
80
- _browser = browser_manager.get_browser(browser_port)
81
- target_tab = _browser.get_tab(tab_id)
82
- css_selector = css_selector
83
- if "css:" not in css_selector:
84
- css_selector = "css:" + css_selector
85
- target_eles = target_tab.eles(css_selector)
86
- exist_flag = False
87
- if len(target_eles) != 0:
88
- exist_flag = True
89
- if attr_name == "text":
90
- ele_text_list = [i.text.replace("\n", "") for i in target_eles]
91
- attr_output = "\n".join(ele_text_list)
92
- else:
93
- attr_output = json.dumps([i.attr(attr_name) for i in target_eles])
94
- return dp_mcp_message_pack(
95
- f"已完成tab页:【{tab_id}】对:【{css_selector}】的检查",
96
- tab_id=tab_id,
97
- selector=css_selector,
98
- selector_ele_exist=exist_flag,
99
- attr_output=attr_output
100
- )
101
-
102
- def register_quit_browser(mcp: FastMCP, browser_manager):
103
- @mcp.tool(name="quit_browser", description="退出浏览器会话,关闭浏览器")
104
- async def quit_browser(browser_port: int) -> dict[str, Any]:
105
- flag, _browser = browser_manager.remove_page(browser_port)
106
- if flag:
107
- _browser.quit()
108
- return dp_mcp_message_pack(
109
- f"浏览器[{browser_port}],退出会话,关闭浏览器{'成功' if flag else '失败'}",
110
- browser_port=browser_port,
111
- quit_flag=flag
112
- )
113
-
114
- def register_assert_waf(mcp: FastMCP, browser_manager):
115
- @mcp.tool(name="assert_waf",
116
- description="通过对比requests、有头浏览器、无头浏览器获取到的html,判断网页是否使用了waf以及是否为动态渲染的网页")
117
- async def assert_waf(browser_port: int, tab_id: str) -> dict[str, Any]:
118
- _browser = browser_manager.get_browser(browser_port)
119
- target_tab = _browser.get_tab(tab_id)
120
- recommend_team = "drissionpage_head"
121
- head_cookies = target_tab.cookies()
122
- # 通过cookie判断是否有waf
123
- waf_flag, waf_type = assert_waf_cookie(head_cookies)
124
- head_html = target_tab.html
125
- min_head_html, head_rate = compress_html(head_html, only_text=True)
126
- raw_html, status_code = requests_html(target_tab.url)
127
- min_raw_html, raw_rate = compress_html(raw_html, only_text=True)
128
- r_h_rate_diff = abs(head_rate - raw_rate)
129
- # 如果有已知的防火墙,则不浪费时间使用无头获取html和压缩比了
130
- if waf_flag or status_code in waf_status_code_dict.keys():
131
- return dp_mcp_message_pack(
132
- f"已完成tab页:【{tab_id}】的分析,该tab页存在waf",
133
- tab_id=tab_id,
134
- recommend_team=recommend_team,
135
- raw_head_rate_difference=r_h_rate_diff,
136
- raw_headless_rate_difference=0,
137
- head_headless_rate_difference=0
138
- )
139
-
140
- headless_html = dp_headless_html(target_tab.url)
141
- min_headless_html, headless_rate = compress_html(headless_html, only_text=True)
142
- r_hless_rate_diff = abs(raw_rate - headless_rate)
143
- h_hless_rate_diff = abs(head_rate - headless_rate)
144
- # 最优情况:requests,dp有头和无头拿到的结果基本一致,认定为没有防护的静态网页
145
- if r_h_rate_diff < 40 and r_hless_rate_diff < 40 and h_hless_rate_diff < 40:
146
- recommend_team = "requests"
147
- # 最差情况:requests,dp有头和无头拿到的结果差距都很大,认定为有浏览器无头检测+动态网页
148
- # if r_h_rate_diff < 40 and r_hless_rate_diff < 40 and h_hless_rate_diff < 40:
149
- # 较差1:dp有头和无头差距很小,但是requests拿不到正确结果,认定为有requests防护 or 动态网页
150
- elif h_hless_rate_diff < 30 and r_hless_rate_diff > 40:
151
- recommend_team = "drissionpage_headless"
152
- # 较差2:有头和无头差距很大,但是requests和有头拿到的结果基本一致,认定为要么有别的没有防护requests的waf,或者间歇性的瑞数【此时应该拿有头的cookie去判断其中是否有瑞数特征,上面已经做了】
153
- # if r_h_rate_diff < 15 and h_hless_rate_diff > 40:
154
- return dp_mcp_message_pack(
155
- f"已完成tab页:【{tab_id}】的分析,该tab页存在waf",
156
- tab_id=tab_id,
157
- recommend_team=recommend_team,
158
- raw_head_rate_difference=r_h_rate_diff,
159
- raw_headless_rate_difference=h_hless_rate_diff,
160
- head_headless_rate_difference=h_hless_rate_diff
161
- )
162
-
163
- def register_highlight_element_captcha(mcp: FastMCP, browser_manager):
164
- @mcp.tool(name="highlight_element_captcha", description="将传入的Selector在页面上高亮,并截屏")
165
- async def highlight_element_captcha(browser_port: int, tab_id: str, css_selector: str) -> dict[str, Any]:
166
- _browser = browser_manager.get_browser(browser_port)
167
- tab = _browser.get_tab(tab_id)
168
- highlight_style = 'background-color: rgba(255, 255, 0, 0.2); outline: 2px solid yellow !important; outline-offset: 2px'
169
- css_selector = css_selector
170
- if "css:" not in css_selector:
171
- css_selector = "css:" + css_selector
172
-
173
- target_eles = tab.eles(css_selector)
174
- exist_flag = False
175
- if len(target_eles) != 0:
176
- exist_flag = True
File without changes