Jarvis-Brain 0.1.7.12__tar.gz → 0.1.9.15__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: Jarvis_Brain
3
- Version: 0.1.7.12
3
+ Version: 0.1.9.15
4
4
  Summary: Jarvis brain mcp
5
5
  Requires-Python: >=3.10
6
6
  Requires-Dist: beautifulsoup4
@@ -5,7 +5,7 @@
5
5
  一个基于 FastMCP 和 DrissionPage 的浏览器自动化 MCP 服务器
6
6
 
7
7
  [![Python Version](https://img.shields.io/badge/python-3.10+-blue.svg)](https://www.python.org/downloads/)
8
- [![Version](https://img.shields.io/badge/version-0.1.5.7-green.svg)](https://github.com/yourusername/jarvis-mcp)
8
+ [![Version](https://img.shields.io/badge/version-0.1.9.12-green.svg)](https://github.com/yourusername/jarvis-mcp)
9
9
 
10
10
  ## 📖 简介
11
11
 
@@ -0,0 +1,5 @@
1
+ test_str="""
2
+ \\"X-XSS-Protection\\": \\"1; mode=block\\", \\"Server\\": \\"-\\"}, \\"response_body_segment\\": \\"pename\\\\\\": \\\\\\"SpuItemAttribute\\\\\\"}, {\\\\\\"code\\\\\\": \\\\\\"imageVersion\\\\\\", \\\\\\"name\\\\\\": \\\\\\"imageVersion\\\\\\", \\\\\\"subAttrList\\\\\\": [{\\\\\\"attrName\\\\\\": \\\\\\"1\\\\\\", \\\\\\"attrValue\\\\\\": \\\\\\"1\\\\\\", \\\\\\"__typename\\\\\\": \\\\\\"SubAttribute\\\\\\"}], \\\\\\"__typename\\\\\\": \\\\\\"SpuItemAttribute\\\\\\"}, {\\\\\\"code\\\\\\": \\\\\\"PLPhover\\\\\\", \\\\\\"name\\\\\\": \\\\\\"PLPhover\\\\\\", \\\\\\"subAttrList\\\\\\": [{\\\\\\"attrName\\\\\\": \\\\\\"2\\\\\\", \\\\\\"attrValue\\\\\\": \\\\\\"2\\\\\\", \\\\\\"__typename\\\\\\": \\\\\\"SubAttribute\\\\\\"}], \\\\\\"__typename\\\\\\": \\\\\\"SpuItemAttribute\\\\\\"}, {\\\\\\"code\\\\\\": \\\\\\"官网\\\\\\", \\\\\\"name\\\\\\": \\\\\\"官网\\\\\\", \\\\\\"subAttrList\\\\\\": [{\\\\\\"attrName\\\\\\": \\\\\\"是\\\\\\", \\\\\\"attrValue\\\\\\": \\\\\\"true\\\\\\", \\\\\\"__typename\\\\\\": \\\\\\"SubAttribute\\\\\\"}], \\\\\\"__typename\\\\\\": \\\\\\"SpuItemAttribute\\\\\\"}, {\\\\\\"code\\\\\\": \\\\\\"小程序\\\\\\", \\\\\\"name\\\\\\": \\\\\\"小程序\\\\\\", \\\\\\"subAttrList\\\\\\": [{\\\\\\"attrName\\\\\\": \\\\\\"是\\\\\\", \\\\\\"attrValue\\\\\\": \\\\\\"true\\\\\\", \\\\\\"__typename\\\\\\": \\\\\\"SubAttribute\\\\\\"}], \\\\\\"__typename\\\\\\": \\\\\\"SpuItemAttribute\\\\\\"}], \\\\\\"__typename\\\\\\": \\\\\\"SpuAttribute\\\\\\"}, \\\\\\"labelList\\\\\\": [{\\\\\\"name\\\\\\": \\\\\\"new\\\\\\", \\\\\\"value\\\\\\": \\\\\\"新品\\\\\\", \\\\\\"excludeValue\\\\\\": null, \\\\\\"__typename\\\\\\": \\\\\\"BaseLabel\\\\\\"}], \\\\\\"__typename\\\\\\": \\\\\\"Product2\\\\\\"}, {\\\\\\"baseInfo\\\\\\": {\\\\\\"spuCode\\\\\\": \\\\\\"864428CVE0G1000\\\\\\", \\\\\\"title\\\\\\": \\\\\\"互扣式双G带扣窄版腰带\\\\\\", \\\\\\"salePrice\\\\\\": 4300.0, \\\\\\"style\\\\\\": \\\\\\"864428\\\\\\", \\\\\\"categorys\\\\\\": [{\\\\\\"navFrontName\\\\\\": \\\\\\"女士风尚-手工plp\\\\\\", \\\\\\"frontName\\\\\\": null, \\\\\\"code\\\\\\": \\\\\\"67d7d4c5eb9d2100016b4fe5\\\\\\", \\\\\\"parentCode\\\\\\": null, \\\\\\"businessCode\\\\\\": \\\\\\"women-fashion手工plp\\\\\\", \\\\\\"__typename\\\\\\": \\\\\\"Category\\\\\\"}, {\\\\\\"navFrontName\\\\\\": \\\\\\"女士-women>女士配饰-women-accessories>女士腰带-women-accessories-belts\\\\\\", \\\\\\"frontName\\\\\\": null, \\\\\\"code\\\\\\": \\\\\\"642670bf0ae2090001133cbe\\\\\\", \\\\\\"parentCode\\\\\\": null, \\\\\\"businessCode\\\\\\": \\\\\\"women-accessories-belts\\\\\\", \\\\\\"__typename\\\\\\": \\\\\\"Category\\\\\\"}, {\\\\\\"navFrontName\\\\\\": \\\\\\"女士-women>女士配饰-women-accessories\\\\\\", \\\\\\"frontName\\\\\\": null, \\\\\\"code\\\\\\": \\\\\\"642670bf0ae2090001133cbd\\\\\\", \\\\\\"parentCode\\\\\\": null, \\\\\\"businessCode\\\\\\": \\\\\\"women-accessories\\\\\\",
3
+ """
4
+ decoded = test_str.encode().decode('unicode_escape')
5
+ print(decoded)
@@ -15,19 +15,27 @@ waf_status_code_dict = {
15
15
  521: "加速乐"
16
16
  }
17
17
  # 一轮最大输入,以免单个html最大长度超过ai最大输入
18
- one_turn_max_token = 20000
18
+ one_turn_max_token = 8000
19
19
 
20
20
 
21
21
  def register_visit_url(mcp: FastMCP, browser_manager: BrowserManager, client_manager: DPProxyClientManager):
22
22
  @mcp.tool(name="visit_url",
23
- description="使用Drissionpage打开url访问某个网站,并开始监听初始tab页的所有的XHR请求,当需要使用手机版浏览器Ua时use_mobile_user_agent为True")
24
- async def visit_url(url: str, use_mobile_user_agent=False) -> dict[str, Any]:
23
+ description="使用Drissionpage打开url访问某个网站,并开始监听初始tab页的所有的XHR请求"
24
+ "当需要使用手机版浏览器Ua时use_mobile_user_agent为True"
25
+ "如果想要以域名对packet进行过滤,可以传入想要过滤的域名列表。默认是:None。"
26
+ "如果想要以method对packet进行过滤,可以传入想要过滤的method列表,默认是:['GET', 'POST']")
27
+ async def visit_url(url: str, domain_filter: list = None, method_filter: list = ["GET", "POST"],
28
+ use_mobile_user_agent: bool = False) -> dict[str, Any]:
25
29
  mobile_user_agent = None
26
30
  if use_mobile_user_agent:
27
31
  mobile_user_agent = "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/143.0.0.0 Mobile Safari/537.36"
28
32
  port, _browser = browser_manager.create_browser(mobile_user_agent)
29
33
  tab = _browser.get_tab()
30
- client_manager.create_client(tab)
34
+ packet_filter = {
35
+ "domain_filter": domain_filter,
36
+ "method_filter": method_filter,
37
+ }
38
+ client_manager.create_client(tab, packet_filter)
31
39
  tab.get(url)
32
40
  tab_id = tab.tab_id
33
41
  return dp_mcp_message_pack(
@@ -37,7 +45,48 @@ def register_visit_url(mcp: FastMCP, browser_manager: BrowserManager, client_man
37
45
  )
38
46
 
39
47
 
40
- def register_get_html(mcp: FastMCP, browser_manager, client_manager: DPProxyClientManager):
48
+ def register_get_new_tab(mcp: FastMCP, browser_manager, client_manager: DPProxyClientManager):
49
+ @mcp.tool(name="get_new_tab",
50
+ description="使用Drissionpage创建一个新的tab页,在新的tab页中打开url,并开始监听新的tab页的所有XHR请求"
51
+ "如果想要以域名对packet进行过滤,可以传入想要过滤的域名列表。默认是:None。"
52
+ "如果想要以method对packet进行过滤,可以传入想要过滤的method列表,默认是:['GET', 'POST']")
53
+ async def get_new_tab(browser_port: int, url: str, domain_filter: list = None,
54
+ method_filter: list = ["GET", "POST"]) -> dict[str, Any]:
55
+ _browser = browser_manager.get_browser(browser_port)
56
+ tab = _browser.new_tab()
57
+ packet_filter = {
58
+ "domain_filter": domain_filter,
59
+ "method_filter": method_filter,
60
+ }
61
+ client_manager.create_client(tab, packet_filter)
62
+ tab.get(url)
63
+ _browser.activate_tab(tab)
64
+ tab_id = tab.tab_id
65
+ return dp_mcp_message_pack(f"已创建新的tab页,并打开链接:{url}", tab_id=tab_id)
66
+
67
+
68
+ def register_pop_first_packet(mcp: FastMCP, browser_manager, client_manager: DPProxyClientManager):
69
+ @mcp.tool(name="pop_first_packet",
70
+ description="每调用一次就会弹出传入的tab页所监听到的数据包中的第一个packet_message,当一个packet_message的response body过长时会被切分成多个包,具体一个请求是否还有下一个包,可以参考body_completed字段")
71
+ async def pop_first_packet(browser_port: int, tab_id: str) -> dict[str, Any]:
72
+ _browser = browser_manager.get_browser(browser_port)
73
+ client = client_manager.get_client(tab_id)
74
+ current_queue_size, packet_message = client.pop_first_packet()
75
+ message = f"tab页:【{tab_id}】,暂时没有监听到XHR数据包"
76
+ if packet_message:
77
+ message = f"tab页:【{tab_id}】,监听到XHR数据包,当前数据包队列中还剩 {current_queue_size} 条数据,如果还剩数据为0,可以暂时稍后再次调用该方法"
78
+ if (packet_message is None) and current_queue_size:
79
+ message = f"tab页:【{tab_id}】,当前弹出的第一个数据包不符合过滤条件,当前数据包队列中还剩 {current_queue_size} 条数据,请不要改变条件,继续弹出下一个数据包"
80
+ return dp_mcp_message_pack(
81
+ message,
82
+ browser_port=browser_port,
83
+ tab_id=tab_id,
84
+ packet_message=packet_message,
85
+ current_queue_size=current_queue_size,
86
+ )
87
+
88
+
89
+ def register_get_html(mcp: FastMCP, browser_manager):
41
90
  @mcp.tool(name="get_html", description="使用Drissionpage获取某一个tab页的html")
42
91
  async def get_html(browser_port: int, tab_id: str) -> dict[str, Any]:
43
92
  _browser = browser_manager.get_browser(browser_port)
@@ -58,20 +107,7 @@ def register_get_html(mcp: FastMCP, browser_manager, client_manager: DPProxyClie
58
107
  return dp_mcp_message_pack(message, tab_id=tab_id, htmls_local_path=html_file_list)
59
108
 
60
109
 
61
- def register_get_new_tab(mcp: FastMCP, browser_manager, client_manager: DPProxyClientManager):
62
- @mcp.tool(name="get_new_tab",
63
- description="使用Drissionpage创建一个新的tab页,在新的tab页中打开url,并开始监听新的tab页的所有XHR请求")
64
- async def get_new_tab(browser_port: int, url: str) -> dict[str, Any]:
65
- _browser = browser_manager.get_browser(browser_port)
66
- tab = _browser.new_tab()
67
- client_manager.create_client(tab)
68
- tab.get(url)
69
- _browser.activate_tab(tab)
70
- tab_id = tab.tab_id
71
- return dp_mcp_message_pack(f"已创建新的tab页,并打开链接:{url}", tab_id=tab_id)
72
-
73
-
74
- def register_switch_tab(mcp: FastMCP, browser_manager, client_manager: DPProxyClientManager):
110
+ def register_switch_tab(mcp: FastMCP, browser_manager):
75
111
  @mcp.tool(name="switch_tab", description="根据传入的tab_id切换到对应的tab页", )
76
112
  async def switch_tab(browser_port: int, tab_id: str) -> dict[str, Any]:
77
113
  _browser = browser_manager.get_browser(browser_port)
@@ -79,15 +115,15 @@ def register_switch_tab(mcp: FastMCP, browser_manager, client_manager: DPProxyCl
79
115
  return dp_mcp_message_pack(f"已将tab页:【{tab_id}】切换至最前端")
80
116
 
81
117
 
82
- def register_close_tab(mcp: FastMCP, browser_manager, client_manager: DPProxyClientManager):
118
+ def register_close_tab(mcp: FastMCP, browser_manager):
83
119
  @mcp.tool(name="close_tab", description="根据传入的tab_id关闭tab页", )
84
- async def close_tab(browser_port, tab_id) -> dict[str, Any]:
120
+ async def close_tab(browser_port: int, tab_id: str) -> dict[str, Any]:
85
121
  _browser = browser_manager.get_browser(browser_port)
86
122
  _browser.close_tabs(tab_id)
87
123
  return dp_mcp_message_pack(f"已将tab页:【{tab_id}】关闭")
88
124
 
89
125
 
90
- def register_check_selector(mcp: FastMCP, browser_manager, client_manager: DPProxyClientManager):
126
+ def register_check_selector(mcp: FastMCP, browser_manager):
91
127
  @mcp.tool(name="check_selector",
92
128
  description="查找tab页中是否包含元素,并返回元素attr_name所对应的值。"
93
129
  "当要选择的元素包含过多元素时,需要传入offset和page_size来分批查看元素,一般不建议调整page_size,更推荐你调整offset"
@@ -106,14 +142,19 @@ def register_check_selector(mcp: FastMCP, browser_manager, client_manager: DPPro
106
142
  exist_flag = True
107
143
  if len(target_eles) > page_size:
108
144
  target_eles = target_eles[offset:offset + page_size]
145
+ slice_seg = max(300, one_turn_max_token // (page_size + 6))
109
146
  if attr_name == "text":
110
- ele_text_list = [i.text.replace("\n", "") for i in target_eles]
111
- attr_output = "\n".join(ele_text_list)
147
+ ele_attr_list = [i.text.replace("\n", "") for i in target_eles]
148
+ ele_attr_list = [attr_str[:slice_seg] for attr_str in ele_attr_list]
149
+ # 如果经过截断遍历后的字符串长度与截断长度相等,则默认截断了
150
+ ele_attr_list = [attr_str + "..." if len(attr_str) == slice_seg else attr_str for attr_str in ele_attr_list]
151
+ attr_output = "\n".join(ele_attr_list)
112
152
  else:
113
- attr_output = json.dumps([i.attr(attr_name) for i in target_eles])
153
+ ele_attr_list = [i.attr(attr_name) for i in target_eles]
154
+ ele_attr_list = [attr_str[:slice_seg] for attr_str in ele_attr_list if attr_str]
155
+ ele_attr_list = [attr_str + "..." if len(attr_str) == slice_seg else attr_str for attr_str in ele_attr_list]
156
+ attr_output = json.dumps(ele_attr_list, ensure_ascii=False)
114
157
  # 对attr_output逐个截断,截断的长度为:一轮最大token除以元素个数+3个点+两个引号和逗号
115
- slice_seg = max(300, one_turn_max_token // (page_size + 6))
116
- attr_output = [attr_str[:slice_seg] + "..." for attr_str in attr_output]
117
158
  return dp_mcp_message_pack(
118
159
  f"已完成tab页:【{tab_id}】对:【{css_selector}】的检查",
119
160
  tab_id=tab_id,
@@ -125,7 +166,7 @@ def register_check_selector(mcp: FastMCP, browser_manager, client_manager: DPPro
125
166
  )
126
167
 
127
168
 
128
- def register_quit_browser(mcp: FastMCP, browser_manager, client_manager: DPProxyClientManager):
169
+ def register_quit_browser(mcp: FastMCP, browser_manager):
129
170
  @mcp.tool(name="quit_browser", description="退出浏览器会话,关闭浏览器")
130
171
  async def quit_browser(browser_port: int) -> dict[str, Any]:
131
172
  flag, _browser = browser_manager.remove_page(browser_port)
@@ -138,25 +179,7 @@ def register_quit_browser(mcp: FastMCP, browser_manager, client_manager: DPProxy
138
179
  )
139
180
 
140
181
 
141
- def register_pop_first_packet(mcp: FastMCP, browser_manager, client_manager: DPProxyClientManager):
142
- @mcp.tool(name="pop_first_packet",
143
- description="每调用一次就会弹出传入的tab页所监听到的数据包中的第一个packet_message,当一个packet_message的response body过长时会被切分成多个包,具体一个请求是否还有下一个包,可以参考body_completed字段")
144
- async def pop_first_packet(browser_port: int, tab_id: str) -> dict[str, Any]:
145
- _browser = browser_manager.get_browser(browser_port)
146
- client = client_manager.get_client(tab_id)
147
- packet_message = client.pop_first_packet()
148
- message = f"tab页:【{tab_id}】,暂时没有监听到XHR数据包"
149
- if packet_message:
150
- message = f"tab页:【{tab_id}】,监听到XHR数据包",
151
- return dp_mcp_message_pack(
152
- message,
153
- browser_port=browser_port,
154
- tab_id=tab_id,
155
- packet_message=packet_message
156
- )
157
-
158
-
159
- def register_assert_waf(mcp: FastMCP, browser_manager, client_manager: DPProxyClientManager):
182
+ def register_assert_waf(mcp: FastMCP, browser_manager):
160
183
  @mcp.tool(name="assert_waf",
161
184
  description="通过对比requests、有头浏览器、无头浏览器获取到的html,判断网页是否使用了waf以及是否为动态渲染的网页")
162
185
  async def assert_waf(browser_port: int, tab_id: str) -> dict[str, Any]:
@@ -204,3 +227,75 @@ def register_assert_waf(mcp: FastMCP, browser_manager, client_manager: DPProxyCl
204
227
  raw_headless_rate_difference=h_hless_rate_diff,
205
228
  head_headless_rate_difference=h_hless_rate_diff
206
229
  )
230
+
231
+
232
+ def register_click_action(mcp: FastMCP, browser_manager):
233
+ @mcp.tool(name="click_action", description="尝试点击tab页中的元素,返回元素是否可以被点击,以及是否点击成功。")
234
+ async def click_action(browser_port: int, tab_id: str, css_selector: str) -> dict[str, Any]:
235
+ _browser = browser_manager.get_browser(browser_port)
236
+ target_tab = _browser.get_tab(tab_id)
237
+ css_selector = css_selector
238
+ if "css:" not in css_selector:
239
+ css_selector = "css:" + css_selector
240
+ target_eles = target_tab.eles(css_selector)
241
+ click_success = False
242
+ element_clickable = False
243
+ if len(target_eles) == 1:
244
+ target_element = target_eles[0]
245
+ element_clickable = target_element.states.is_clickable
246
+ try:
247
+ target_element.click()
248
+ click_success = True
249
+ except Exception as e:
250
+ click_success = False
251
+ message = f"tab页:【{tab_id}】点击【{css_selector}】 {'成功' if click_success else '失败'} 了"
252
+ else:
253
+ message = f"tab页:【{tab_id}】传入的css_selector找到了{len(target_eles)}个元素,请确保传入的css_selector可以找到唯一的一个元素"
254
+ return dp_mcp_message_pack(
255
+ message=message,
256
+ browser_port=browser_port,
257
+ tab_id=tab_id,
258
+ css_selector=css_selector,
259
+ element_clickable=element_clickable,
260
+ click_success=click_success,
261
+ extra_message="点击成功,页面可能有更新,请重新获取页面html,并重新分析页面Selector" if click_success else ""
262
+ )
263
+
264
+
265
+ def register_scroll_action(mcp: FastMCP, browser_manager):
266
+ @mcp.tool(name="scroll_action", description="尝试滚动tab页"
267
+ "forward参数是滚动的方向:down、up、left、right"
268
+ "pixel参数是滚动的像素值,默认为None。"
269
+ "当forward为down且pixel为None,则将页面滚动到垂直中间位置,水平位置不变"
270
+ "当forward为up且pixel为None,则将页面滚动到顶部,水平位置不变"
271
+ "当forward为left且pixel为None,则将页面滚动到最左边,垂直位置不变"
272
+ "当forward为right且pixel为None,则将页面滚动到最右边,垂直位置不变")
273
+ async def scroll_action(browser_port: int, tab_id: str, forward: str = "down", pixel: int = None) -> dict[str, Any]:
274
+ _browser = browser_manager.get_browser(browser_port)
275
+ target_tab = _browser.get_tab(tab_id)
276
+ if forward == "down":
277
+ if pixel is None:
278
+ target_tab.scroll.to_half()
279
+ target_tab.scroll.down(pixel)
280
+ elif forward == "up":
281
+ if pixel is None:
282
+ target_tab.scroll.to_top()
283
+ target_tab.scroll.up(pixel)
284
+ elif forward == "left":
285
+ if pixel is None:
286
+ target_tab.scroll.to_leftmost()
287
+ target_tab.scroll.left(pixel)
288
+ elif forward == "right":
289
+ if pixel is None:
290
+ target_tab.scroll.to_rightmost()
291
+ target_tab.scroll.right(pixel)
292
+ else:
293
+ if pixel is None:
294
+ target_tab.scroll.to_half()
295
+ target_tab.scroll.down()
296
+ message = f"已完成对tab页:【{tab_id}】forward={forward} 的滑动"
297
+ return dp_mcp_message_pack(
298
+ message=message,
299
+ browser_port=browser_port,
300
+ tab_id=tab_id,
301
+ )
@@ -12,21 +12,24 @@ base_cwd = os.getenv("BASE_CWD", os.path.expanduser('~'))
12
12
 
13
13
  if "TeamNode-Dp" in enabled_modules:
14
14
  # 页面管理
15
- register_close_tab(mcp, browser_manager, client_manager)
16
- register_switch_tab(mcp, browser_manager, client_manager)
15
+ register_close_tab(mcp, browser_manager)
16
+ register_switch_tab(mcp, browser_manager)
17
17
  register_get_new_tab(mcp, browser_manager, client_manager)
18
- # 功能
18
+ # 基础功能
19
19
  register_visit_url(mcp, browser_manager, client_manager)
20
- register_get_html(mcp, browser_manager, client_manager)
21
- register_check_selector(mcp, browser_manager, client_manager)
20
+ register_get_html(mcp, browser_manager)
21
+ register_check_selector(mcp, browser_manager)
22
22
  register_pop_first_packet(mcp, browser_manager, client_manager)
23
+ # 页面交互
24
+ register_click_action(mcp, browser_manager)
25
+ register_scroll_action(mcp, browser_manager)
23
26
 
24
27
  if "JarvisNode" in enabled_modules:
25
- register_assert_waf(mcp, browser_manager, client_manager)
28
+ register_assert_waf(mcp, browser_manager)
26
29
 
27
30
 
28
31
  def main():
29
- mcp.run(transport="stdio")
32
+ mcp.run(transport="stdio",show_banner=False)
30
33
 
31
34
 
32
35
  if __name__ == '__main__':
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "Jarvis_Brain" # 别人下载时用的名字,必须在 PyPI 上唯一
3
- version = "0.1.7.12"
3
+ version = "0.1.9.15"
4
4
  description = "Jarvis brain mcp"
5
5
  dependencies = [
6
6
  "fastmcp",
@@ -4,6 +4,9 @@ from typing import Optional, Tuple
4
4
  import os
5
5
  from DrissionPage import ChromiumPage, ChromiumOptions
6
6
  import platform
7
+ from DrissionPage.common import Settings
8
+
9
+ Settings.set_raise_when_click_failed(True)
7
10
 
8
11
 
9
12
  class BrowserManager:
@@ -6,17 +6,18 @@ from DrissionPage._pages.chromium_tab import ChromiumTab
6
6
  from DrissionPage._units.listener import DataPacket
7
7
  from typing import Tuple, Optional
8
8
  import json
9
+ from urllib.parse import urlparse, urlunparse
9
10
 
10
- one_turn_max_token = 20000
11
+ one_turn_max_token = 16000
11
12
 
12
13
 
13
14
  class DPProxyClient:
14
- def __init__(self, driver: ChromiumTab, self_kill=False):
15
+ def __init__(self, driver: ChromiumTab, packet_filter: dict, self_kill=False):
15
16
  self.tab_id = driver.tab_id
16
17
  self.driver = ChromePageProxy(driver, self)
17
18
  self.thread = None
18
19
  self.self_kill = self_kill
19
- # self.packet_list = []
20
+ self.packet_filter = packet_filter
20
21
  self.packet_queue = deque()
21
22
 
22
23
  def get_driver(self, start_listen, count=None, timeout=10) -> ChromiumTab:
@@ -28,7 +29,7 @@ class DPProxyClient:
28
29
  :return:
29
30
  """
30
31
  if start_listen:
31
- self.driver.listen.set_targets(res_type="XHR")
32
+ self.driver.listen.set_targets(res_type=('xhr', 'fetch'))
32
33
  self.driver.listen.start()
33
34
  self.thread = threading.Thread(target=self.start_listen, args=(count, timeout,))
34
35
  self.thread.start()
@@ -42,9 +43,10 @@ class DPProxyClient:
42
43
  def pop_first_packet(self):
43
44
  if self.packet_queue:
44
45
  result = self.packet_queue.popleft()
45
- return json.dumps(result, ensure_ascii=False)
46
+ current_queue_size = len(self.packet_queue)
47
+ return current_queue_size, json.dumps(result, ensure_ascii=False, separators=(',', ':')).replace("\\", "")
46
48
  else:
47
- return None
49
+ return 0, None
48
50
 
49
51
 
50
52
  class DPProxyClientManager:
@@ -57,10 +59,11 @@ class DPProxyClientManager:
57
59
  cls._instance.tab_pool = {}
58
60
  return cls._instance
59
61
 
60
- def create_client(self, tab: ChromiumTab, self_kill=False) -> Tuple[str, DPProxyClient, ChromiumTab]:
62
+ def create_client(self, tab: ChromiumTab, packet_filter: dict, self_kill=False) -> Tuple[
63
+ str, DPProxyClient, ChromiumTab]:
61
64
  """创建新的tab页面代理实例"""
62
- client = DPProxyClient(tab, self_kill=self_kill)
63
- tab = client.get_driver(True)
65
+ client = DPProxyClient(tab, packet_filter, self_kill=self_kill)
66
+ tab = client.get_driver(True, timeout=60 * 10)
64
67
  tab_id = tab.tab_id
65
68
  self.tab_pool[tab_id] = {"client": client, "driver": tab}
66
69
  return tab_id, client, tab
@@ -135,11 +138,21 @@ def check_data_packet(packet: DataPacket, client: DPProxyClient):
135
138
  data = None
136
139
  if packet.request.hasPostData:
137
140
  data = packet.request.postData
141
+ domain = urlparse(url).netloc
138
142
  body = packet.response.body
139
- body_str = json.dumps(body, ensure_ascii=False)
143
+ body_str = json.dumps(body, ensure_ascii=False, separators=(',', ':'))
140
144
  body_str_list = [body_str[i:i + one_turn_max_token] for i in range(0, len(body_str), one_turn_max_token)]
141
145
  body_completed = True
146
+ packet_filter = client.packet_filter
147
+ domain_filter = packet_filter.get("domain_filter", None)
148
+ method_filter = packet_filter.get("method_filter", ["GET", "POST"])
142
149
  for index, body_str in enumerate(body_str_list):
150
+ # 如果给了domain_filter并且domain没有在domain_filter中时跳过该数据包
151
+ if domain_filter and domain not in domain_filter:
152
+ continue
153
+ # 如果method没有在method_filter中,则跳过该数据包
154
+ if method not in method_filter:
155
+ continue
143
156
  if (index + 1) != len(body_str_list):
144
157
  body_completed = False
145
158
  temp_dict = {
@@ -149,7 +162,7 @@ def check_data_packet(packet: DataPacket, client: DPProxyClient):
149
162
  "request_data": data,
150
163
  "request_headers": dict(packet.request.headers),
151
164
  "response_headers": dict(packet.response.headers),
152
- "response_body_segment": body_str,
165
+ "response_body_segment": body_str.replace("\\", ""),
153
166
  }
154
167
  client.packet_queue.append(temp_dict)
155
168
 
@@ -53,6 +53,15 @@ def compress_html(content, only_text=False):
53
53
  for meta in doc.xpath('//meta'):
54
54
  meta.getparent().remove(meta)
55
55
 
56
+ for svg in doc.xpath('//svg'):
57
+ # 获取 SVG 内的文本内容
58
+ text_content = svg.text_content()
59
+ # 创建一个新的文本节点替换 SVG
60
+ parent = svg.getparent()
61
+ if parent is not None:
62
+ parent.text = (parent.text or '') + text_content
63
+ parent.remove(svg)
64
+
56
65
  # 删除 style 属性
57
66
  for element in doc.xpath('//*[@style]'):
58
67
  element.attrib.pop('style')
File without changes
File without changes