Jarvis-Brain 0.1.9.15__py3-none-any.whl → 0.1.11.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {jarvis_brain-0.1.9.15.dist-info → jarvis_brain-0.1.11.0.dist-info}/METADATA +2 -1
- jarvis_brain-0.1.11.0.dist-info/RECORD +11 -0
- mcp_tools/dp_tools.py +31 -4
- mcp_tools/main.py +2 -1
- tools/browser_proxy.py +40 -0
- tools/tools.py +150 -0
- jarvis_brain-0.1.9.15.dist-info/RECORD +0 -11
- {jarvis_brain-0.1.9.15.dist-info → jarvis_brain-0.1.11.0.dist-info}/WHEEL +0 -0
- {jarvis_brain-0.1.9.15.dist-info → jarvis_brain-0.1.11.0.dist-info}/entry_points.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: Jarvis_Brain
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.11.0
|
|
4
4
|
Summary: Jarvis brain mcp
|
|
5
5
|
Requires-Python: >=3.10
|
|
6
6
|
Requires-Dist: beautifulsoup4
|
|
@@ -8,3 +8,4 @@ Requires-Dist: curl-cffi
|
|
|
8
8
|
Requires-Dist: drissionpage
|
|
9
9
|
Requires-Dist: fastmcp
|
|
10
10
|
Requires-Dist: minify-html
|
|
11
|
+
Requires-Dist: pillow
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
mcp_tools/__init__.py,sha256=_ZzedxbxmVkdRH00ONOjQR31pZW-B8WPn5P2qE6T-Iw,3012
|
|
2
|
+
mcp_tools/dp_tools.py,sha256=YTl2DEe02UI3qDRfVWxpqVjir6vq9SGw3wY9JFY3DV4,18370
|
|
3
|
+
mcp_tools/main.py,sha256=Fgq2PPuc7XCCwXCSVsyoADB0z1vI_gZ07lnLChcrjjg,1167
|
|
4
|
+
tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
5
|
+
tools/browser_manager.py,sha256=zeYcWuzxoohMdnYUoZbRH7axFC_VtV8MsncfN8y0yw0,2023
|
|
6
|
+
tools/browser_proxy.py,sha256=7QdLNSgYV5eYDhlmMiIQ0BkMvXwhPDjTRh4EWgIep6k,9255
|
|
7
|
+
tools/tools.py,sha256=dcxtuv1rKIrqqJud8e7sC8z-hJM8nx1WUUoeQYBIhiY,11098
|
|
8
|
+
jarvis_brain-0.1.11.0.dist-info/METADATA,sha256=D98jNkUXIpAa4IjSGq0bjFcypvADVoBykYxNNAST4gA,264
|
|
9
|
+
jarvis_brain-0.1.11.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
10
|
+
jarvis_brain-0.1.11.0.dist-info/entry_points.txt,sha256=YFQT4xpkUqt5dM5wlKPQQOqcjMuFrT9iuRAzIpAyH7U,51
|
|
11
|
+
jarvis_brain-0.1.11.0.dist-info/RECORD,,
|
mcp_tools/dp_tools.py
CHANGED
|
@@ -1,12 +1,17 @@
|
|
|
1
|
+
"""
|
|
2
|
+
这个文件中提供的工具作为独立的Drissionpage mcp工具
|
|
3
|
+
"""
|
|
1
4
|
import hashlib
|
|
2
5
|
import json
|
|
3
6
|
import os
|
|
7
|
+
import time
|
|
4
8
|
from typing import Any
|
|
5
9
|
|
|
6
10
|
from fastmcp import FastMCP
|
|
7
11
|
|
|
8
12
|
from tools.browser_manager import BrowserManager
|
|
9
|
-
from tools.tools import compress_html, requests_html, dp_headless_html, assert_waf_cookie, dp_mcp_message_pack
|
|
13
|
+
from tools.tools import compress_html, requests_html, dp_headless_html, assert_waf_cookie, dp_mcp_message_pack, \
|
|
14
|
+
compress_html_js, compress_image_bytes
|
|
10
15
|
from tools.browser_proxy import DPProxyClient, DPProxyClientManager
|
|
11
16
|
|
|
12
17
|
html_source_code_local_save_path = os.path.join(os.getcwd(), "html-source-code")
|
|
@@ -94,10 +99,11 @@ def register_get_html(mcp: FastMCP, browser_manager):
|
|
|
94
99
|
file_name_prefix = hashlib.md5(str(tab.title).encode('utf-8')).hexdigest()
|
|
95
100
|
if not os.path.exists(html_source_code_local_save_path):
|
|
96
101
|
os.makedirs(html_source_code_local_save_path)
|
|
97
|
-
min_html, compress_rate = compress_html(tab.html)
|
|
98
|
-
|
|
102
|
+
# min_html, compress_rate = compress_html(tab.html)
|
|
103
|
+
min_html = tab.run_js(compress_html_js)
|
|
104
|
+
# html_str_list = [min_html[i:i + one_turn_max_token] for i in range(0, len(min_html), one_turn_max_token)]
|
|
99
105
|
html_file_list = []
|
|
100
|
-
for index, html_str in enumerate(
|
|
106
|
+
for index, html_str in enumerate([min_html]):
|
|
101
107
|
file_name = file_name_prefix + f"_{tab_id}_segment{index}.html"
|
|
102
108
|
abs_path = os.path.join(html_source_code_local_save_path, file_name)
|
|
103
109
|
with open(abs_path, "w", encoding="utf-8") as f:
|
|
@@ -299,3 +305,24 @@ def register_scroll_action(mcp: FastMCP, browser_manager):
|
|
|
299
305
|
browser_port=browser_port,
|
|
300
306
|
tab_id=tab_id,
|
|
301
307
|
)
|
|
308
|
+
|
|
309
|
+
|
|
310
|
+
def register_get_screenshot(mcp: FastMCP, browser_manager):
|
|
311
|
+
@mcp.tool(name="get_tab_screenshot", description="尝试对传入tab页进行截图,并将截图压缩为1M大小png图片,会返回截图保存路径")
|
|
312
|
+
async def get_tab_screenshot(browser_port: int, tab_id: str) -> dict[str, Any]:
|
|
313
|
+
_browser = browser_manager.get_browser(browser_port)
|
|
314
|
+
target_tab = _browser.get_tab(tab_id)
|
|
315
|
+
if not os.path.exists(html_source_code_local_save_path):
|
|
316
|
+
os.makedirs(html_source_code_local_save_path)
|
|
317
|
+
timestamp = int(time.time() * 1000)
|
|
318
|
+
origin_png = target_tab.get_screenshot(as_bytes="png")
|
|
319
|
+
compress_png = compress_image_bytes(origin_png)
|
|
320
|
+
image_path = os.path.join(html_source_code_local_save_path, f"{browser_port}_{tab_id}_{timestamp}.png")
|
|
321
|
+
with open(image_path, "wb") as f:
|
|
322
|
+
f.write(compress_png)
|
|
323
|
+
return dp_mcp_message_pack(
|
|
324
|
+
message=f"已完成对browser_port={browser_port},tab_id={tab_id}的截屏",
|
|
325
|
+
browser_port=browser_port,
|
|
326
|
+
tab_id=tab_id,
|
|
327
|
+
screenshot_path=image_path
|
|
328
|
+
)
|
mcp_tools/main.py
CHANGED
|
@@ -20,6 +20,7 @@ if "TeamNode-Dp" in enabled_modules:
|
|
|
20
20
|
register_get_html(mcp, browser_manager)
|
|
21
21
|
register_check_selector(mcp, browser_manager)
|
|
22
22
|
register_pop_first_packet(mcp, browser_manager, client_manager)
|
|
23
|
+
register_get_screenshot(mcp, browser_manager)
|
|
23
24
|
# 页面交互
|
|
24
25
|
register_click_action(mcp, browser_manager)
|
|
25
26
|
register_scroll_action(mcp, browser_manager)
|
|
@@ -29,7 +30,7 @@ if "JarvisNode" in enabled_modules:
|
|
|
29
30
|
|
|
30
31
|
|
|
31
32
|
def main():
|
|
32
|
-
mcp.run(transport="stdio",show_banner=False)
|
|
33
|
+
mcp.run(transport="stdio", show_banner=False)
|
|
33
34
|
|
|
34
35
|
|
|
35
36
|
if __name__ == '__main__':
|
tools/browser_proxy.py
CHANGED
|
@@ -126,6 +126,46 @@ class DrissionPageListenerProxy:
|
|
|
126
126
|
return attr
|
|
127
127
|
|
|
128
128
|
|
|
129
|
+
def check_data_packet(packet: DataPacket, client: DPProxyClient):
|
|
130
|
+
"""
|
|
131
|
+
封装监听到的数据包,并将其存放在client的packet_queue中
|
|
132
|
+
:param packet:
|
|
133
|
+
:param client:
|
|
134
|
+
:return:
|
|
135
|
+
"""
|
|
136
|
+
url = packet.url
|
|
137
|
+
method = packet.request.method
|
|
138
|
+
data = None
|
|
139
|
+
if packet.request.hasPostData:
|
|
140
|
+
data = packet.request.postData
|
|
141
|
+
domain = urlparse(url).netloc
|
|
142
|
+
body = packet.response.body
|
|
143
|
+
body_str = json.dumps(body, ensure_ascii=False, separators=(',', ':'))
|
|
144
|
+
body_str_list = [body_str[i:i + one_turn_max_token] for i in range(0, len(body_str), one_turn_max_token)]
|
|
145
|
+
body_completed = True
|
|
146
|
+
packet_filter = client.packet_filter
|
|
147
|
+
domain_filter = packet_filter.get("domain_filter", None)
|
|
148
|
+
method_filter = packet_filter.get("method_filter", ["GET", "POST"])
|
|
149
|
+
for index, body_str in enumerate(body_str_list):
|
|
150
|
+
# 如果给了domain_filter并且domain没有在domain_filter中时跳过该数据包
|
|
151
|
+
if domain_filter and domain not in domain_filter:
|
|
152
|
+
continue
|
|
153
|
+
# 如果method没有在method_filter中,则跳过该数据包
|
|
154
|
+
if method not in method_filter:
|
|
155
|
+
continue
|
|
156
|
+
if (index + 1) != len(body_str_list):
|
|
157
|
+
body_completed = False
|
|
158
|
+
temp_dict = {
|
|
159
|
+
"url": url,
|
|
160
|
+
"body_completed": body_completed,
|
|
161
|
+
"method": method,
|
|
162
|
+
"request_data": data,
|
|
163
|
+
"request_headers": dict(packet.request.headers),
|
|
164
|
+
"response_headers": dict(packet.response.headers),
|
|
165
|
+
"response_body_segment": body_str.replace("\\", ""),
|
|
166
|
+
}
|
|
167
|
+
client.packet_queue.append(temp_dict)
|
|
168
|
+
|
|
129
169
|
def check_data_packet(packet: DataPacket, client: DPProxyClient):
|
|
130
170
|
"""
|
|
131
171
|
封装监听到的数据包,并将其存放在client的packet_queue中
|
tools/tools.py
CHANGED
|
@@ -6,6 +6,90 @@ from DrissionPage import ChromiumPage, ChromiumOptions
|
|
|
6
6
|
from bs4 import BeautifulSoup
|
|
7
7
|
from curl_cffi import requests
|
|
8
8
|
from lxml import html, etree
|
|
9
|
+
import base64
|
|
10
|
+
from PIL import Image
|
|
11
|
+
import io
|
|
12
|
+
|
|
13
|
+
compress_html_js = """
|
|
14
|
+
function getSimplifiedDOM(node) {
|
|
15
|
+
// 1. 处理文本节点
|
|
16
|
+
if (node.nodeType === Node.TEXT_NODE) {
|
|
17
|
+
const text = node.textContent.trim();
|
|
18
|
+
// 限制文本长度,避免大段文章消耗 token,保留前100个字符通常足够定位
|
|
19
|
+
return text ? text.slice(0, 100) + (text.length > 100 ? '...' : '') : null;
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
// 2. 过滤无用标签
|
|
23
|
+
const ignoreTags = ['SCRIPT', 'STYLE', 'NOSCRIPT', 'IFRAME', 'SVG', 'LINK', 'META'];
|
|
24
|
+
if (ignoreTags.includes(node.tagName)) return null;
|
|
25
|
+
if (node.nodeType !== Node.ELEMENT_NODE) return null;
|
|
26
|
+
|
|
27
|
+
// 3. 过滤不可见元素
|
|
28
|
+
const style = window.getComputedStyle(node);
|
|
29
|
+
if (style.display === 'none' || style.visibility === 'hidden' || style.opacity === '0') return null;
|
|
30
|
+
// 过滤宽高太小的元素(往往是埋点空像素)
|
|
31
|
+
const rect = node.getBoundingClientRect();
|
|
32
|
+
if (rect.width === 0 || rect.height === 0) return null;
|
|
33
|
+
|
|
34
|
+
// --- 开始构建标签字符串 ---
|
|
35
|
+
const tagName = node.tagName.toLowerCase();
|
|
36
|
+
let tagStr = tagName;
|
|
37
|
+
|
|
38
|
+
// A. 基础标识符 (ID 和 Class)
|
|
39
|
+
if (node.id) tagStr += `#${node.id}`;
|
|
40
|
+
if (node.className && typeof node.className === 'string') {
|
|
41
|
+
// 过滤掉 Tailwind 等太长且无语义的 class,保留有意义的业务 class
|
|
42
|
+
// 这里简单处理,全部保留,让 LLM 自己判断
|
|
43
|
+
const classes = node.className.trim().split(/\s+/);
|
|
44
|
+
if (classes.length > 0) tagStr += `.${classes.join('.')}`;
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
// B. 关键属性白名单 (这是你指出问题的核心修复)
|
|
48
|
+
const props = [];
|
|
49
|
+
|
|
50
|
+
// 通用重要属性
|
|
51
|
+
if (node.getAttribute('role')) props.push(`role="${node.getAttribute('role')}"`);
|
|
52
|
+
if (node.getAttribute('aria-label')) props.push(`aria-label="${node.getAttribute('aria-label')}"`);
|
|
53
|
+
if (node.getAttribute('title')) props.push(`title="${node.getAttribute('title')}"`);
|
|
54
|
+
|
|
55
|
+
// 特定标签的特定属性
|
|
56
|
+
if (tagName === 'a') {
|
|
57
|
+
const href = node.getAttribute('href');
|
|
58
|
+
// 只保留有意义的链接,忽略 javascript:;
|
|
59
|
+
if (href && !href.startsWith('javascript')) props.push(`href="${href}"`);
|
|
60
|
+
} else if (tagName === 'input' || tagName === 'textarea' || tagName === 'select') {
|
|
61
|
+
if (node.getAttribute('type')) props.push(`type="${node.getAttribute('type')}"`);
|
|
62
|
+
if (node.getAttribute('name')) props.push(`name="${node.getAttribute('name')}"`);
|
|
63
|
+
if (node.getAttribute('placeholder')) props.push(`placeholder="${node.getAttribute('placeholder')}"`);
|
|
64
|
+
if (node.disabled) props.push('disabled');
|
|
65
|
+
if (node.checked) props.push('checked');
|
|
66
|
+
} else if (tagName === 'button') {
|
|
67
|
+
if (node.getAttribute('type')) props.push(`type="${node.getAttribute('type')}"`);
|
|
68
|
+
} else if (tagName === 'img') {
|
|
69
|
+
if (node.getAttribute('alt')) props.push(`alt="${node.getAttribute('alt')}"`);
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
if (props.length > 0) {
|
|
73
|
+
tagStr += ` ${props.join(' ')}`;
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
// 4. 递归子节点
|
|
77
|
+
const children = Array.from(node.childNodes)
|
|
78
|
+
.map(getSimplifiedDOM)
|
|
79
|
+
.filter(n => n !== null);
|
|
80
|
+
|
|
81
|
+
// 5. 组装输出
|
|
82
|
+
// 如果没有子节点,也没有ID/Class,也不是输入框/图片/链接,那这个标签可能只是布局用的 div,可以考虑跳过它直接返回子节点内容
|
|
83
|
+
// 但为了保持结构完整,我们暂时保留它
|
|
84
|
+
if (children.length === 0) {
|
|
85
|
+
// 自闭合标签或空标签
|
|
86
|
+
return `<${tagStr} />`;
|
|
87
|
+
}
|
|
88
|
+
return `<${tagStr}>${children.join('')}</${tagName}>`; // 结束标签只保留 tagName 节省 token
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
return getSimplifiedDOM(document.body);
|
|
92
|
+
"""
|
|
9
93
|
|
|
10
94
|
|
|
11
95
|
# 使用requests获取html,用于测试是否使用了瑞数和jsl
|
|
@@ -107,6 +191,72 @@ def dp_mcp_message_pack(message: str, **kwargs):
|
|
|
107
191
|
}]
|
|
108
192
|
}
|
|
109
193
|
|
|
194
|
+
|
|
195
|
+
def btyes2Base64Img(target_byte):
|
|
196
|
+
"""
|
|
197
|
+
把byte转为base64,用于传输图片
|
|
198
|
+
:param target_byte:
|
|
199
|
+
:return:
|
|
200
|
+
"""
|
|
201
|
+
return "data:image/png;base64," + base64.b64encode(target_byte).decode()
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
def compress_image_bytes(input_bytes, target_size_mb=1):
|
|
205
|
+
"""
|
|
206
|
+
压缩图片字节数据到目标大小
|
|
207
|
+
|
|
208
|
+
参数:
|
|
209
|
+
input_bytes: 输入图片的字节数据
|
|
210
|
+
target_size_mb: 目标大小(MB),默认1MB
|
|
211
|
+
|
|
212
|
+
返回:
|
|
213
|
+
压缩后的图片字节数据
|
|
214
|
+
"""
|
|
215
|
+
target_size = target_size_mb * 1024 * 1024 # 转换为字节
|
|
216
|
+
|
|
217
|
+
# 从字节数据打开图片
|
|
218
|
+
img = Image.open(io.BytesIO(input_bytes))
|
|
219
|
+
|
|
220
|
+
# 如果是PNG或其他格式,转换为RGB
|
|
221
|
+
if img.mode in ('RGBA', 'LA', 'P'):
|
|
222
|
+
img = img.convert('RGB')
|
|
223
|
+
|
|
224
|
+
# 初始质量设置
|
|
225
|
+
quality = 95
|
|
226
|
+
|
|
227
|
+
# 先尝试压缩
|
|
228
|
+
output_buffer = io.BytesIO()
|
|
229
|
+
img.save(output_buffer, 'JPEG', quality=quality, optimize=True)
|
|
230
|
+
output_bytes = output_buffer.getvalue()
|
|
231
|
+
|
|
232
|
+
# 如果文件仍然太大,逐步降低质量
|
|
233
|
+
while len(output_bytes) > target_size and quality > 10:
|
|
234
|
+
quality -= 5
|
|
235
|
+
output_buffer = io.BytesIO()
|
|
236
|
+
img.save(output_buffer, 'JPEG', quality=quality, optimize=True)
|
|
237
|
+
output_bytes = output_buffer.getvalue()
|
|
238
|
+
|
|
239
|
+
# 如果降低质量还不够,尝试缩小尺寸
|
|
240
|
+
if len(output_bytes) > target_size:
|
|
241
|
+
width, height = img.size
|
|
242
|
+
|
|
243
|
+
while len(output_bytes) > target_size and quality > 10:
|
|
244
|
+
# 缩小10%
|
|
245
|
+
width = int(width * 0.9)
|
|
246
|
+
height = int(height * 0.9)
|
|
247
|
+
img_resized = img.resize((width, height), Image.Resampling.LANCZOS)
|
|
248
|
+
output_buffer = io.BytesIO()
|
|
249
|
+
img_resized.save(output_buffer, 'JPEG', quality=quality, optimize=True)
|
|
250
|
+
output_bytes = output_buffer.getvalue()
|
|
251
|
+
|
|
252
|
+
final_size = len(output_bytes) / (1024 * 1024)
|
|
253
|
+
# print(f"压缩完成!")
|
|
254
|
+
# print(f"原始大小: {len(input_bytes) / (1024 * 1024):.2f}MB")
|
|
255
|
+
# print(f"压缩后大小: {final_size:.2f}MB")
|
|
256
|
+
# print(f"最终质量: {quality}")
|
|
257
|
+
|
|
258
|
+
return output_bytes
|
|
259
|
+
|
|
110
260
|
# todo: 大致盘一下各种判定的逻辑【以下的所有压缩比之间的差距均取“绝对值”】
|
|
111
261
|
# 1. 如果requests、无头、有头获取到的压缩比之间从差距都在15%以内,则认定该页面是静态页面,此时优先使用requests请求
|
|
112
262
|
# 2. 如果requests的status_code为特定的412,或者521,则判定是瑞数和jsl。[此时还有一个特点:requests的压缩比会与其他两种方式获取到的压缩比差距非常大(一两千的那种)]
|
|
@@ -1,11 +0,0 @@
|
|
|
1
|
-
mcp_tools/__init__.py,sha256=_ZzedxbxmVkdRH00ONOjQR31pZW-B8WPn5P2qE6T-Iw,3012
|
|
2
|
-
mcp_tools/dp_tools.py,sha256=ugzHnOf43fJ2LutWlDrYplc4TPn8ZLP9Byb85yENvfM,17059
|
|
3
|
-
mcp_tools/main.py,sha256=k5YOuf2USW7vnlFEBxvLSfxPPHP7nrM1J8lLmVEcSuY,1116
|
|
4
|
-
tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
5
|
-
tools/browser_manager.py,sha256=zeYcWuzxoohMdnYUoZbRH7axFC_VtV8MsncfN8y0yw0,2023
|
|
6
|
-
tools/browser_proxy.py,sha256=YDBp1bDEX08ev-Bvbuv0TkWz_nkShe95GS7tfd8GBLc,7606
|
|
7
|
-
tools/tools.py,sha256=zYJCvwy7OUmYWKOhHbItSfttAmJLbC-ixXv9oW-TNTA,5033
|
|
8
|
-
jarvis_brain-0.1.9.15.dist-info/METADATA,sha256=g9Bp_gpYdKAXYMaiWgZGesmaLYpF5KxSJ9ijuwdUn8E,242
|
|
9
|
-
jarvis_brain-0.1.9.15.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
10
|
-
jarvis_brain-0.1.9.15.dist-info/entry_points.txt,sha256=YFQT4xpkUqt5dM5wlKPQQOqcjMuFrT9iuRAzIpAyH7U,51
|
|
11
|
-
jarvis_brain-0.1.9.15.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|