Jarvis-Brain 0.1.9.15__py3-none-any.whl → 0.1.10.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {jarvis_brain-0.1.9.15.dist-info → jarvis_brain-0.1.10.0.dist-info}/METADATA +1 -1
- jarvis_brain-0.1.10.0.dist-info/RECORD +12 -0
- mcp_tools/chrome_devtools_tools.py +11 -0
- mcp_tools/dp_tools.py +9 -4
- tools/tools.py +81 -0
- jarvis_brain-0.1.9.15.dist-info/RECORD +0 -11
- {jarvis_brain-0.1.9.15.dist-info → jarvis_brain-0.1.10.0.dist-info}/WHEEL +0 -0
- {jarvis_brain-0.1.9.15.dist-info → jarvis_brain-0.1.10.0.dist-info}/entry_points.txt +0 -0
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
mcp_tools/__init__.py,sha256=_ZzedxbxmVkdRH00ONOjQR31pZW-B8WPn5P2qE6T-Iw,3012
|
|
2
|
+
mcp_tools/chrome_devtools_tools.py,sha256=LEcDb2gamjJdSzqA9_Wy-lCtx8B-9p1M97Ik9GScXrY,284
|
|
3
|
+
mcp_tools/dp_tools.py,sha256=_2ys1eedp2AxfRxeQ5U_o6qE_eII6J7mxAWLXaKy9Sg,17208
|
|
4
|
+
mcp_tools/main.py,sha256=k5YOuf2USW7vnlFEBxvLSfxPPHP7nrM1J8lLmVEcSuY,1116
|
|
5
|
+
tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
6
|
+
tools/browser_manager.py,sha256=zeYcWuzxoohMdnYUoZbRH7axFC_VtV8MsncfN8y0yw0,2023
|
|
7
|
+
tools/browser_proxy.py,sha256=YDBp1bDEX08ev-Bvbuv0TkWz_nkShe95GS7tfd8GBLc,7606
|
|
8
|
+
tools/tools.py,sha256=E2NyOwJtFh5hT18nvCwIYs23OipKyjJEHIfyZJ1Bsjg,8960
|
|
9
|
+
jarvis_brain-0.1.10.0.dist-info/METADATA,sha256=o6l0jL2sUiifn_rx2zEY4Xtma6Ajqi2NP7quCTtB0NQ,242
|
|
10
|
+
jarvis_brain-0.1.10.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
11
|
+
jarvis_brain-0.1.10.0.dist-info/entry_points.txt,sha256=YFQT4xpkUqt5dM5wlKPQQOqcjMuFrT9iuRAzIpAyH7U,51
|
|
12
|
+
jarvis_brain-0.1.10.0.dist-info/RECORD,,
|
mcp_tools/dp_tools.py
CHANGED
|
@@ -1,3 +1,6 @@
|
|
|
1
|
+
"""
|
|
2
|
+
这个文件中提供的工具作为独立的Drissionpage mcp工具
|
|
3
|
+
"""
|
|
1
4
|
import hashlib
|
|
2
5
|
import json
|
|
3
6
|
import os
|
|
@@ -6,7 +9,8 @@ from typing import Any
|
|
|
6
9
|
from fastmcp import FastMCP
|
|
7
10
|
|
|
8
11
|
from tools.browser_manager import BrowserManager
|
|
9
|
-
from tools.tools import compress_html, requests_html, dp_headless_html, assert_waf_cookie, dp_mcp_message_pack
|
|
12
|
+
from tools.tools import compress_html, requests_html, dp_headless_html, assert_waf_cookie, dp_mcp_message_pack, \
|
|
13
|
+
compress_html_js
|
|
10
14
|
from tools.browser_proxy import DPProxyClient, DPProxyClientManager
|
|
11
15
|
|
|
12
16
|
html_source_code_local_save_path = os.path.join(os.getcwd(), "html-source-code")
|
|
@@ -94,10 +98,11 @@ def register_get_html(mcp: FastMCP, browser_manager):
|
|
|
94
98
|
file_name_prefix = hashlib.md5(str(tab.title).encode('utf-8')).hexdigest()
|
|
95
99
|
if not os.path.exists(html_source_code_local_save_path):
|
|
96
100
|
os.makedirs(html_source_code_local_save_path)
|
|
97
|
-
min_html, compress_rate = compress_html(tab.html)
|
|
98
|
-
|
|
101
|
+
# min_html, compress_rate = compress_html(tab.html)
|
|
102
|
+
min_html = tab.run_js(compress_html_js)
|
|
103
|
+
# html_str_list = [min_html[i:i + one_turn_max_token] for i in range(0, len(min_html), one_turn_max_token)]
|
|
99
104
|
html_file_list = []
|
|
100
|
-
for index, html_str in enumerate(
|
|
105
|
+
for index, html_str in enumerate([min_html]):
|
|
101
106
|
file_name = file_name_prefix + f"_{tab_id}_segment{index}.html"
|
|
102
107
|
abs_path = os.path.join(html_source_code_local_save_path, file_name)
|
|
103
108
|
with open(abs_path, "w", encoding="utf-8") as f:
|
tools/tools.py
CHANGED
|
@@ -7,6 +7,87 @@ from bs4 import BeautifulSoup
|
|
|
7
7
|
from curl_cffi import requests
|
|
8
8
|
from lxml import html, etree
|
|
9
9
|
|
|
10
|
+
compress_html_js = """
|
|
11
|
+
function getSimplifiedDOM(node) {
|
|
12
|
+
// 1. 处理文本节点
|
|
13
|
+
if (node.nodeType === Node.TEXT_NODE) {
|
|
14
|
+
const text = node.textContent.trim();
|
|
15
|
+
// 限制文本长度,避免大段文章消耗 token,保留前100个字符通常足够定位
|
|
16
|
+
return text ? text.slice(0, 100) + (text.length > 100 ? '...' : '') : null;
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
// 2. 过滤无用标签
|
|
20
|
+
const ignoreTags = ['SCRIPT', 'STYLE', 'NOSCRIPT', 'IFRAME', 'SVG', 'LINK', 'META'];
|
|
21
|
+
if (ignoreTags.includes(node.tagName)) return null;
|
|
22
|
+
if (node.nodeType !== Node.ELEMENT_NODE) return null;
|
|
23
|
+
|
|
24
|
+
// 3. 过滤不可见元素
|
|
25
|
+
const style = window.getComputedStyle(node);
|
|
26
|
+
if (style.display === 'none' || style.visibility === 'hidden' || style.opacity === '0') return null;
|
|
27
|
+
// 过滤宽高太小的元素(往往是埋点空像素)
|
|
28
|
+
const rect = node.getBoundingClientRect();
|
|
29
|
+
if (rect.width === 0 || rect.height === 0) return null;
|
|
30
|
+
|
|
31
|
+
// --- 开始构建标签字符串 ---
|
|
32
|
+
const tagName = node.tagName.toLowerCase();
|
|
33
|
+
let tagStr = tagName;
|
|
34
|
+
|
|
35
|
+
// A. 基础标识符 (ID 和 Class)
|
|
36
|
+
if (node.id) tagStr += `#${node.id}`;
|
|
37
|
+
if (node.className && typeof node.className === 'string') {
|
|
38
|
+
// 过滤掉 Tailwind 等太长且无语义的 class,保留有意义的业务 class
|
|
39
|
+
// 这里简单处理,全部保留,让 LLM 自己判断
|
|
40
|
+
const classes = node.className.trim().split(/\s+/);
|
|
41
|
+
if (classes.length > 0) tagStr += `.${classes.join('.')}`;
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
// B. 关键属性白名单 (这是你指出问题的核心修复)
|
|
45
|
+
const props = [];
|
|
46
|
+
|
|
47
|
+
// 通用重要属性
|
|
48
|
+
if (node.getAttribute('role')) props.push(`role="${node.getAttribute('role')}"`);
|
|
49
|
+
if (node.getAttribute('aria-label')) props.push(`aria-label="${node.getAttribute('aria-label')}"`);
|
|
50
|
+
if (node.getAttribute('title')) props.push(`title="${node.getAttribute('title')}"`);
|
|
51
|
+
|
|
52
|
+
// 特定标签的特定属性
|
|
53
|
+
if (tagName === 'a') {
|
|
54
|
+
const href = node.getAttribute('href');
|
|
55
|
+
// 只保留有意义的链接,忽略 javascript:;
|
|
56
|
+
if (href && !href.startsWith('javascript')) props.push(`href="${href}"`);
|
|
57
|
+
} else if (tagName === 'input' || tagName === 'textarea' || tagName === 'select') {
|
|
58
|
+
if (node.getAttribute('type')) props.push(`type="${node.getAttribute('type')}"`);
|
|
59
|
+
if (node.getAttribute('name')) props.push(`name="${node.getAttribute('name')}"`);
|
|
60
|
+
if (node.getAttribute('placeholder')) props.push(`placeholder="${node.getAttribute('placeholder')}"`);
|
|
61
|
+
if (node.disabled) props.push('disabled');
|
|
62
|
+
if (node.checked) props.push('checked');
|
|
63
|
+
} else if (tagName === 'button') {
|
|
64
|
+
if (node.getAttribute('type')) props.push(`type="${node.getAttribute('type')}"`);
|
|
65
|
+
} else if (tagName === 'img') {
|
|
66
|
+
if (node.getAttribute('alt')) props.push(`alt="${node.getAttribute('alt')}"`);
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
if (props.length > 0) {
|
|
70
|
+
tagStr += ` ${props.join(' ')}`;
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
// 4. 递归子节点
|
|
74
|
+
const children = Array.from(node.childNodes)
|
|
75
|
+
.map(getSimplifiedDOM)
|
|
76
|
+
.filter(n => n !== null);
|
|
77
|
+
|
|
78
|
+
// 5. 组装输出
|
|
79
|
+
// 如果没有子节点,也没有ID/Class,也不是输入框/图片/链接,那这个标签可能只是布局用的 div,可以考虑跳过它直接返回子节点内容
|
|
80
|
+
// 但为了保持结构完整,我们暂时保留它
|
|
81
|
+
if (children.length === 0) {
|
|
82
|
+
// 自闭合标签或空标签
|
|
83
|
+
return `<${tagStr} />`;
|
|
84
|
+
}
|
|
85
|
+
return `<${tagStr}>${children.join('')}</${tagName}>`; // 结束标签只保留 tagName 节省 token
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
return getSimplifiedDOM(document.body);
|
|
89
|
+
"""
|
|
90
|
+
|
|
10
91
|
|
|
11
92
|
# 使用requests获取html,用于测试是否使用了瑞数和jsl
|
|
12
93
|
def requests_html(url):
|
|
@@ -1,11 +0,0 @@
|
|
|
1
|
-
mcp_tools/__init__.py,sha256=_ZzedxbxmVkdRH00ONOjQR31pZW-B8WPn5P2qE6T-Iw,3012
|
|
2
|
-
mcp_tools/dp_tools.py,sha256=ugzHnOf43fJ2LutWlDrYplc4TPn8ZLP9Byb85yENvfM,17059
|
|
3
|
-
mcp_tools/main.py,sha256=k5YOuf2USW7vnlFEBxvLSfxPPHP7nrM1J8lLmVEcSuY,1116
|
|
4
|
-
tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
5
|
-
tools/browser_manager.py,sha256=zeYcWuzxoohMdnYUoZbRH7axFC_VtV8MsncfN8y0yw0,2023
|
|
6
|
-
tools/browser_proxy.py,sha256=YDBp1bDEX08ev-Bvbuv0TkWz_nkShe95GS7tfd8GBLc,7606
|
|
7
|
-
tools/tools.py,sha256=zYJCvwy7OUmYWKOhHbItSfttAmJLbC-ixXv9oW-TNTA,5033
|
|
8
|
-
jarvis_brain-0.1.9.15.dist-info/METADATA,sha256=g9Bp_gpYdKAXYMaiWgZGesmaLYpF5KxSJ9ijuwdUn8E,242
|
|
9
|
-
jarvis_brain-0.1.9.15.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
10
|
-
jarvis_brain-0.1.9.15.dist-info/entry_points.txt,sha256=YFQT4xpkUqt5dM5wlKPQQOqcjMuFrT9iuRAzIpAyH7U,51
|
|
11
|
-
jarvis_brain-0.1.9.15.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|