Jarvis-Brain 0.1.13.5__py3-none-any.whl → 0.1.13.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {jarvis_brain-0.1.13.5.dist-info → jarvis_brain-0.1.13.8.dist-info}/METADATA +1 -1
- {jarvis_brain-0.1.13.5.dist-info → jarvis_brain-0.1.13.8.dist-info}/RECORD +6 -6
- mcp_tools/dp_tools.py +12 -4
- tools/tools.py +96 -9
- {jarvis_brain-0.1.13.5.dist-info → jarvis_brain-0.1.13.8.dist-info}/WHEEL +0 -0
- {jarvis_brain-0.1.13.5.dist-info → jarvis_brain-0.1.13.8.dist-info}/entry_points.txt +0 -0
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
mcp_tools/__init__.py,sha256=_ZzedxbxmVkdRH00ONOjQR31pZW-B8WPn5P2qE6T-Iw,3012
|
|
2
|
-
mcp_tools/dp_tools.py,sha256=
|
|
2
|
+
mcp_tools/dp_tools.py,sha256=AzAb5W-3jI_JcU_zz1LhzFiAKQwUuoXkiMQcclhXeZk,25688
|
|
3
3
|
mcp_tools/main.py,sha256=hIiYV7QVT8XDEDihhTQTUmL7Ftlsd0DlfhKMHEqj244,1262
|
|
4
4
|
tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
5
5
|
tools/browser_manager.py,sha256=zeYcWuzxoohMdnYUoZbRH7axFC_VtV8MsncfN8y0yw0,2023
|
|
6
6
|
tools/browser_proxy.py,sha256=VAhuVrBIoXmKyXvCmJh4eRYCWpLIhbTYmFHK8l8eoTA,6590
|
|
7
|
-
tools/tools.py,sha256=
|
|
8
|
-
jarvis_brain-0.1.13.
|
|
9
|
-
jarvis_brain-0.1.13.
|
|
10
|
-
jarvis_brain-0.1.13.
|
|
11
|
-
jarvis_brain-0.1.13.
|
|
7
|
+
tools/tools.py,sha256=wp5U1PM8Fh2yAZPDmXmCG-svtGIf9n0yb-YL3KPewWI,12928
|
|
8
|
+
jarvis_brain-0.1.13.8.dist-info/METADATA,sha256=BBWTdcMJ8iof4QA3E3OxdueAD4jdRvuPO8fWdTGsH3g,264
|
|
9
|
+
jarvis_brain-0.1.13.8.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
10
|
+
jarvis_brain-0.1.13.8.dist-info/entry_points.txt,sha256=YFQT4xpkUqt5dM5wlKPQQOqcjMuFrT9iuRAzIpAyH7U,51
|
|
11
|
+
jarvis_brain-0.1.13.8.dist-info/RECORD,,
|
mcp_tools/dp_tools.py
CHANGED
|
@@ -357,7 +357,7 @@ def register_get_ele_info(mcp: FastMCP, browser_manager):
|
|
|
357
357
|
"返回值说明:"
|
|
358
358
|
"element_tag:此属性返回元素的标签名。"
|
|
359
359
|
"element_attrs_key:此属性以list的形式返回元素所有属性的key。"
|
|
360
|
-
"element_rect_size
|
|
360
|
+
"element_rect_size:此属性以元组形式返回元素的大小【如果元素没有位置及大小,则返回空元组】。"
|
|
361
361
|
"is_in_viewport:此属性以布尔值方式返回元素是否在视口中,以元素可以接受点击的点为判断。"
|
|
362
362
|
"is_whole_in_viewport:此属性以布尔值方式返回元素是否整个在视口中。"
|
|
363
363
|
"is_alive:此属性以布尔值形式返回当前元素是否仍可用。用于判断是否因页面刷新而导致元素失效。"
|
|
@@ -386,6 +386,14 @@ def register_get_ele_info(mcp: FastMCP, browser_manager):
|
|
|
386
386
|
css_selector=css_selector,
|
|
387
387
|
element_index=element_index,
|
|
388
388
|
)
|
|
389
|
+
has_rect = target_element.states.has_rect,
|
|
390
|
+
element_rect_size = tuple()
|
|
391
|
+
if not has_rect:
|
|
392
|
+
element_rect_size = target_element.rect.size,
|
|
393
|
+
try:
|
|
394
|
+
child_count=target_element.child_count
|
|
395
|
+
except Exception:
|
|
396
|
+
child_count=0
|
|
389
397
|
return dp_mcp_message_pack(
|
|
390
398
|
message="元素可以被正常的选择到,以下是元素相关的一系列信息",
|
|
391
399
|
browser_port=browser_port,
|
|
@@ -394,8 +402,8 @@ def register_get_ele_info(mcp: FastMCP, browser_manager):
|
|
|
394
402
|
element_index=element_index,
|
|
395
403
|
element_tag=target_element.tag,
|
|
396
404
|
element_attrs_key=list(target_element.attrs.keys()),
|
|
397
|
-
|
|
398
|
-
element_rect_size=
|
|
405
|
+
element_child_count=child_count,
|
|
406
|
+
element_rect_size=element_rect_size,
|
|
399
407
|
is_in_viewport=target_element.states.is_in_viewport,
|
|
400
408
|
is_whole_in_viewport=target_element.states.is_whole_in_viewport,
|
|
401
409
|
is_alive=target_element.states.is_alive,
|
|
@@ -405,7 +413,7 @@ def register_get_ele_info(mcp: FastMCP, browser_manager):
|
|
|
405
413
|
is_displayed=target_element.states.is_displayed,
|
|
406
414
|
is_covered=target_element.states.is_covered,
|
|
407
415
|
is_clickable=target_element.states.is_clickable,
|
|
408
|
-
has_rect=
|
|
416
|
+
has_rect=has_rect
|
|
409
417
|
)
|
|
410
418
|
|
|
411
419
|
|
tools/tools.py
CHANGED
|
@@ -10,7 +10,7 @@ import base64
|
|
|
10
10
|
from PIL import Image
|
|
11
11
|
import io
|
|
12
12
|
|
|
13
|
-
|
|
13
|
+
compress_html_js1 = """
|
|
14
14
|
function getSimplifiedDOM(node) {
|
|
15
15
|
// 1. 处理文本节点
|
|
16
16
|
if (node.nodeType === Node.TEXT_NODE) {
|
|
@@ -102,6 +102,101 @@ function getSimplifiedDOM(node) {
|
|
|
102
102
|
return getSimplifiedDOM(document.body);
|
|
103
103
|
"""
|
|
104
104
|
|
|
105
|
+
# 我自己优化后的版本,逻辑为:删除不可见元素、标签的任何属性value的长度大于20时直接删除这个属性、id和class采用简写方式:id=>#,class=>.
|
|
106
|
+
compress_html_js="""
|
|
107
|
+
function getSimplifiedDOM(node) {
|
|
108
|
+
// 全局配置:最大属性值长度
|
|
109
|
+
const MAX_ATTR_LEN = 40;
|
|
110
|
+
|
|
111
|
+
// 1. 处理文本节点
|
|
112
|
+
if (node.nodeType === Node.TEXT_NODE) {
|
|
113
|
+
const text = node.textContent.trim();
|
|
114
|
+
return text ? text.slice(0, 100) + (text.length > 100 ? '...' : '') : null;
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
// 2. 过滤无用标签
|
|
118
|
+
const ignoreTags = ['SCRIPT', 'STYLE', 'NOSCRIPT', 'IFRAME', 'LINK', 'META', 'AUDIO', 'VIDEO', 'CANVAS'];
|
|
119
|
+
if (ignoreTags.includes(node.tagName)) return null;
|
|
120
|
+
if (node.nodeType !== Node.ELEMENT_NODE) return null;
|
|
121
|
+
|
|
122
|
+
// 3. 过滤不可见元素
|
|
123
|
+
const style = window.getComputedStyle(node);
|
|
124
|
+
if (style.display === 'none' || style.visibility === 'hidden' || style.opacity === '0') return null;
|
|
125
|
+
|
|
126
|
+
const rect = node.getBoundingClientRect();
|
|
127
|
+
if ((rect.width === 0 || rect.height === 0) && style.overflow !== 'visible') return null;
|
|
128
|
+
|
|
129
|
+
// --- 开始构建标签字符串 ---
|
|
130
|
+
const tagName = node.tagName.toLowerCase();
|
|
131
|
+
let tagStr = tagName;
|
|
132
|
+
|
|
133
|
+
const id = node.id;
|
|
134
|
+
const className = node.getAttribute('class');
|
|
135
|
+
|
|
136
|
+
// A. 处理 ID 简写 (#id)
|
|
137
|
+
// 限制提高到 40
|
|
138
|
+
if (id && id.length <= MAX_ATTR_LEN) {
|
|
139
|
+
tagStr += `#${id}`;
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
// B. 处理 Class 简写 (.class)
|
|
143
|
+
// 限制提高到 40
|
|
144
|
+
if (className && typeof className === 'string' && className.length <= MAX_ATTR_LEN) {
|
|
145
|
+
const classes = className.trim().split(/\s+/);
|
|
146
|
+
if (classes.length > 0) {
|
|
147
|
+
tagStr += `.${classes.join('.')}`;
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
let propsStr = '';
|
|
152
|
+
|
|
153
|
+
// C. 处理属性
|
|
154
|
+
if (node.hasAttributes()) {
|
|
155
|
+
for (const attr of node.attributes) {
|
|
156
|
+
const name = attr.name;
|
|
157
|
+
const value = attr.value;
|
|
158
|
+
|
|
159
|
+
// 1. 跳过 ID 和 Class (已在 tagStr 处理,或因过长被丢弃)
|
|
160
|
+
if (name === 'id' || name === 'class') continue;
|
|
161
|
+
|
|
162
|
+
// 2. 黑名单:直接删除 style 和 aria-label
|
|
163
|
+
if (name === 'style' || name === 'aria-label') continue;
|
|
164
|
+
|
|
165
|
+
// 3. 特殊标签:path 标签删除所有属性
|
|
166
|
+
if (tagName === 'path') continue;
|
|
167
|
+
|
|
168
|
+
// 4. 【长度与白名单逻辑】
|
|
169
|
+
// 如果不是 src 且不是 href,同时长度又超过了 40,则删除
|
|
170
|
+
const isLinkAttr = (name === 'src' || name === 'href');
|
|
171
|
+
|
|
172
|
+
if (!isLinkAttr && value.length > MAX_ATTR_LEN) {
|
|
173
|
+
continue;
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
// 5. 拼接保留的属性
|
|
177
|
+
propsStr += ` ${name}="${value.replace(/"/g, '"')}"`;
|
|
178
|
+
}
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
// 4. 递归子节点
|
|
182
|
+
let childNodes = Array.from(node.childNodes);
|
|
183
|
+
if (node.shadowRoot) {
|
|
184
|
+
childNodes = [...childNodes, ...Array.from(node.shadowRoot.childNodes)];
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
const children = childNodes
|
|
188
|
+
.map(getSimplifiedDOM)
|
|
189
|
+
.filter(n => n !== null);
|
|
190
|
+
|
|
191
|
+
// 5. 组装输出
|
|
192
|
+
if (children.length === 0) {
|
|
193
|
+
return `<${tagStr}${propsStr} />`;
|
|
194
|
+
}
|
|
195
|
+
return `<${tagStr}${propsStr}>${children.join('')}</${tagName}>`;
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
return getSimplifiedDOM(document.body);
|
|
199
|
+
"""
|
|
105
200
|
|
|
106
201
|
# 使用requests获取html,用于测试是否使用了瑞数和jsl
|
|
107
202
|
def requests_html(url):
|
|
@@ -268,11 +363,3 @@ def compress_image_bytes(input_bytes, target_size_mb=1):
|
|
|
268
363
|
|
|
269
364
|
return output_bytes
|
|
270
365
|
|
|
271
|
-
# todo: 大致盘一下各种判定的逻辑【以下的所有压缩比之间的差距均取“绝对值”】
|
|
272
|
-
# 1. 如果requests、无头、有头获取到的压缩比之间从差距都在15%以内,则认定该页面是静态页面,此时优先使用requests请求
|
|
273
|
-
# 2. 如果requests的status_code为特定的412,或者521,则判定是瑞数和jsl。[此时还有一个特点:requests的压缩比会与其他两种方式获取到的压缩比差距非常大(一两千的那种)]
|
|
274
|
-
# 3. 如果requests、无头、有头获取到的压缩比之间差距都在40%以上,则判定该页面只可以用有头采集
|
|
275
|
-
# 4. 如果无头和有头获取到的压缩比之间差距小于15%,但是requests和无头的差距大于40%,则认定该页面可以使用无头浏览器采集
|
|
276
|
-
# 5. 如果requests和有头获取到的压缩比之间差距小于15%,但是无头和有头的差距大于40%,则认定该页面优先使用有头浏览器采集
|
|
277
|
-
# 【此时可能是:1.使用了别的检测无头的waf。2.网站使用瑞数,但是这次请求没有拦截requests(不知道是不是瑞数那边故意设置的),
|
|
278
|
-
# 此时如果想进一步判定是否是瑞数,可以使用有头浏览器取一下cookies,如果cookies里面存在瑞数的cookie,那么就可以断定是瑞数】
|
|
File without changes
|
|
File without changes
|