dp-cli 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dp_cli/__init__.py +1 -0
- dp_cli/commands/__init__.py +12 -0
- dp_cli/commands/_utils.py +107 -0
- dp_cli/commands/browser.py +159 -0
- dp_cli/commands/element.py +259 -0
- dp_cli/commands/keyboard.py +126 -0
- dp_cli/commands/misc.py +136 -0
- dp_cli/commands/network.py +169 -0
- dp_cli/commands/page.py +204 -0
- dp_cli/commands/snapshot_cmd.py +391 -0
- dp_cli/commands/storage.py +222 -0
- dp_cli/commands/tab.py +203 -0
- dp_cli/main.py +47 -0
- dp_cli/output.py +97 -0
- dp_cli/session.py +201 -0
- dp_cli/snapshot/__init__.py +23 -0
- dp_cli/snapshot/a11y.py +671 -0
- dp_cli/snapshot/extract.py +158 -0
- dp_cli/snapshot/js_scripts.py +155 -0
- dp_cli/snapshot/utils.py +43 -0
- dp_cli-0.1.0.dist-info/METADATA +103 -0
- dp_cli-0.1.0.dist-info/RECORD +25 -0
- dp_cli-0.1.0.dist-info/WHEEL +5 -0
- dp_cli-0.1.0.dist-info/entry_points.txt +2 -0
- dp_cli-0.1.0.dist-info/top_level.txt +1 -0
dp_cli/commands/tab.py
ADDED
|
@@ -0,0 +1,203 @@
|
|
|
1
|
+
# -*- coding:utf-8 -*-
|
|
2
|
+
"""标签页管理命令: tab-list / tab-new / tab-select / tab-close"""
|
|
3
|
+
import click
|
|
4
|
+
|
|
5
|
+
from dp_cli.output import ok, error
|
|
6
|
+
from dp_cli.session import load_session, save_session
|
|
7
|
+
from dp_cli.commands._utils import session_option, _get_page, normalize_url
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def register(cli):
|
|
11
|
+
|
|
12
|
+
@cli.command('tab-list')
|
|
13
|
+
@session_option
|
|
14
|
+
def tab_list(session):
|
|
15
|
+
"""列出所有标签页。
|
|
16
|
+
|
|
17
|
+
\b
|
|
18
|
+
绑定的标签页会显示 [pinned] 标记,后续所有 dp 命令只在该标签页中执行。
|
|
19
|
+
"""
|
|
20
|
+
page = _get_page(session, raw=True)
|
|
21
|
+
if not page:
|
|
22
|
+
return
|
|
23
|
+
try:
|
|
24
|
+
sess = load_session(session)
|
|
25
|
+
pinned_id = sess.get('active_tab')
|
|
26
|
+
tabs = []
|
|
27
|
+
for i, tab_id in enumerate(page.tab_ids):
|
|
28
|
+
tab = page.get_tab(tab_id)
|
|
29
|
+
entry = {
|
|
30
|
+
'index': i,
|
|
31
|
+
'id': tab_id,
|
|
32
|
+
'url': tab.url,
|
|
33
|
+
'title': tab.title,
|
|
34
|
+
}
|
|
35
|
+
if tab_id == pinned_id:
|
|
36
|
+
entry['pinned'] = True
|
|
37
|
+
tabs.append(entry)
|
|
38
|
+
ok({'tabs': tabs, 'count': len(tabs),
|
|
39
|
+
'pinned': pinned_id or '(none)'})
|
|
40
|
+
except Exception as e:
|
|
41
|
+
error(f'获取标签页列表失败', code='TAB_FAILED', detail=str(e))
|
|
42
|
+
|
|
43
|
+
@cli.command('tab-new')
|
|
44
|
+
@click.argument('url', required=False)
|
|
45
|
+
@session_option
|
|
46
|
+
@click.option('--background', is_flag=True, help='在后台打开(不绑定)')
|
|
47
|
+
@click.option('--new-window', is_flag=True, help='在新窗口中打开')
|
|
48
|
+
def tab_new(url, session, background, new_window):
|
|
49
|
+
"""新建标签页并自动绑定。
|
|
50
|
+
|
|
51
|
+
\b
|
|
52
|
+
新标签页会自动绑定到当前会话,后续 dp 命令在该标签页中执行。
|
|
53
|
+
使用 --background 时不绑定。
|
|
54
|
+
使用 --new-window 在独立窗口中打开(适合自动化与手动浏览分离)。
|
|
55
|
+
|
|
56
|
+
\b
|
|
57
|
+
示例:
|
|
58
|
+
dp tab-new https://example.com
|
|
59
|
+
dp tab-new https://example.com --new-window
|
|
60
|
+
dp tab-new https://example.com --background
|
|
61
|
+
"""
|
|
62
|
+
page = _get_page(session, raw=True)
|
|
63
|
+
if not page:
|
|
64
|
+
return
|
|
65
|
+
try:
|
|
66
|
+
new_tab = page.new_tab(url=normalize_url(url) if url else '',
|
|
67
|
+
new_window=new_window,
|
|
68
|
+
background=background)
|
|
69
|
+
new_tid = new_tab.tab_id
|
|
70
|
+
msg = '新标签页已创建'
|
|
71
|
+
|
|
72
|
+
# 非 background 时自动绑定
|
|
73
|
+
if not background:
|
|
74
|
+
sess = load_session(session)
|
|
75
|
+
sess['active_tab'] = new_tid
|
|
76
|
+
save_session(session, sess)
|
|
77
|
+
msg += '(已绑定,dp 命令将在此标签页执行)'
|
|
78
|
+
|
|
79
|
+
ok({'id': new_tid, 'url': new_tab.url,
|
|
80
|
+
'title': new_tab.title, 'pinned': not background},
|
|
81
|
+
msg=msg)
|
|
82
|
+
except Exception as e:
|
|
83
|
+
error(f'创建标签页失败', code='TAB_FAILED', detail=str(e))
|
|
84
|
+
|
|
85
|
+
@cli.command('tab-select')
|
|
86
|
+
@click.argument('target')
|
|
87
|
+
@session_option
|
|
88
|
+
def tab_select(target, session):
|
|
89
|
+
"""绑定到指定标签页,后续 dp 命令在该标签页中执行。
|
|
90
|
+
|
|
91
|
+
\b
|
|
92
|
+
TARGET 支持:
|
|
93
|
+
序号 dp tab-select 0 (按标签页序号)
|
|
94
|
+
tab_id dp tab-select ABC123 (按标签页 ID)
|
|
95
|
+
URL dp tab-select zhipin (按 URL 关键词匹配)
|
|
96
|
+
none dp tab-select none (解除绑定,恢复默认行为)
|
|
97
|
+
"""
|
|
98
|
+
# 解除绑定
|
|
99
|
+
if target.lower() == 'none':
|
|
100
|
+
sess = load_session(session)
|
|
101
|
+
old = sess.pop('active_tab', None)
|
|
102
|
+
save_session(session, sess)
|
|
103
|
+
if old:
|
|
104
|
+
ok(msg='已解除标签页绑定,后续命令将在浏览器活跃标签页执行')
|
|
105
|
+
else:
|
|
106
|
+
ok(msg='当前没有绑定的标签页')
|
|
107
|
+
return
|
|
108
|
+
|
|
109
|
+
page = _get_page(session, raw=True)
|
|
110
|
+
if not page:
|
|
111
|
+
return
|
|
112
|
+
try:
|
|
113
|
+
tab_id = _resolve_tab_target(page, target)
|
|
114
|
+
if not tab_id:
|
|
115
|
+
return
|
|
116
|
+
|
|
117
|
+
tab = page.get_tab(tab_id)
|
|
118
|
+
tab.set.activate()
|
|
119
|
+
sess = load_session(session)
|
|
120
|
+
sess['active_tab'] = tab_id
|
|
121
|
+
save_session(session, sess)
|
|
122
|
+
ok({'id': tab_id, 'url': tab.url, 'title': tab.title},
|
|
123
|
+
msg='已绑定,dp 命令将在此标签页执行')
|
|
124
|
+
except Exception as e:
|
|
125
|
+
error(f'切换标签页失败', code='TAB_FAILED', detail=str(e))
|
|
126
|
+
|
|
127
|
+
@cli.command('tab-close')
|
|
128
|
+
@click.argument('index_or_id', required=False)
|
|
129
|
+
@session_option
|
|
130
|
+
def tab_close(index_or_id, session):
|
|
131
|
+
"""关闭标签页(默认关闭绑定的标签页,无绑定则关闭当前页)。"""
|
|
132
|
+
page = _get_page(session, raw=True)
|
|
133
|
+
if not page:
|
|
134
|
+
return
|
|
135
|
+
try:
|
|
136
|
+
sess = load_session(session)
|
|
137
|
+
pinned_id = sess.get('active_tab')
|
|
138
|
+
|
|
139
|
+
if index_or_id is None:
|
|
140
|
+
# 优先关闭绑定的标签页
|
|
141
|
+
if pinned_id:
|
|
142
|
+
tab = page.get_tab(pinned_id)
|
|
143
|
+
tab.close()
|
|
144
|
+
sess.pop('active_tab', None)
|
|
145
|
+
save_session(session, sess)
|
|
146
|
+
ok({'id': pinned_id}, msg='绑定的标签页已关闭(绑定已解除)')
|
|
147
|
+
else:
|
|
148
|
+
page.close()
|
|
149
|
+
ok(msg='当前标签页已关闭')
|
|
150
|
+
else:
|
|
151
|
+
tab_id = _resolve_tab_target(page, index_or_id)
|
|
152
|
+
if not tab_id:
|
|
153
|
+
return
|
|
154
|
+
tab = page.get_tab(tab_id)
|
|
155
|
+
tab.close()
|
|
156
|
+
# 如果关闭的是绑定的标签页,清除绑定
|
|
157
|
+
if tab_id == pinned_id:
|
|
158
|
+
sess.pop('active_tab', None)
|
|
159
|
+
save_session(session, sess)
|
|
160
|
+
ok({'id': tab_id}, msg='标签页已关闭')
|
|
161
|
+
except Exception as e:
|
|
162
|
+
error(f'关闭标签页失败', code='TAB_FAILED', detail=str(e))
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
def _resolve_tab_target(page, target: str) -> str:
|
|
166
|
+
"""将 target(序号/tab_id/URL关键词)解析为 tab_id"""
|
|
167
|
+
tab_ids = page.tab_ids
|
|
168
|
+
|
|
169
|
+
# 1. 尝试按序号
|
|
170
|
+
try:
|
|
171
|
+
idx = int(target)
|
|
172
|
+
if 0 <= idx < len(tab_ids):
|
|
173
|
+
return tab_ids[idx]
|
|
174
|
+
error(f'标签页序号越界: {idx}(共 {len(tab_ids)} 个)',
|
|
175
|
+
code='TAB_NOT_FOUND')
|
|
176
|
+
return ''
|
|
177
|
+
except ValueError:
|
|
178
|
+
pass
|
|
179
|
+
|
|
180
|
+
# 2. 尝试按 tab_id 精确匹配
|
|
181
|
+
if target in tab_ids:
|
|
182
|
+
return target
|
|
183
|
+
|
|
184
|
+
# 3. 按 URL 关键词模糊匹配
|
|
185
|
+
for tid in tab_ids:
|
|
186
|
+
try:
|
|
187
|
+
tab = page.get_tab(tid)
|
|
188
|
+
if target.lower() in (tab.url or '').lower():
|
|
189
|
+
return tid
|
|
190
|
+
except Exception:
|
|
191
|
+
continue
|
|
192
|
+
|
|
193
|
+
# 4. 按 title 关键词模糊匹配
|
|
194
|
+
for tid in tab_ids:
|
|
195
|
+
try:
|
|
196
|
+
tab = page.get_tab(tid)
|
|
197
|
+
if target.lower() in (tab.title or '').lower():
|
|
198
|
+
return tid
|
|
199
|
+
except Exception:
|
|
200
|
+
continue
|
|
201
|
+
|
|
202
|
+
error(f'未找到匹配 "{target}" 的标签页', code='TAB_NOT_FOUND')
|
|
203
|
+
return ''
|
dp_cli/main.py
ADDED
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
# -*- coding:utf-8 -*-
|
|
2
|
+
"""
|
|
3
|
+
dp-cli —— DrissionPage 命令行工具
|
|
4
|
+
比 playwright-cli 更强大,充分利用 DrissionPage 的独特优势:
|
|
5
|
+
- 不基于 webdriver,天然反检测
|
|
6
|
+
- 支持浏览器模式 + HTTP 模式无缝切换
|
|
7
|
+
- 强大的定位语法(比 a11y ref 更稳定)
|
|
8
|
+
- lxml 高效批量解析,snapshot 一次 CDP 调用
|
|
9
|
+
- 支持 shadow-root / iframe 穿透
|
|
10
|
+
- 内置网络包监听能力
|
|
11
|
+
"""
|
|
12
|
+
import click
|
|
13
|
+
|
|
14
|
+
from dp_cli.commands import register_all
|
|
15
|
+
|
|
16
|
+
CONTEXT_SETTINGS = dict(help_option_names=['-h', '--help'], max_content_width=100)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@click.group(context_settings=CONTEXT_SETTINGS, invoke_without_command=True)
|
|
20
|
+
@click.version_option(message='%(version)s')
|
|
21
|
+
@click.pass_context
|
|
22
|
+
def cli(ctx):
|
|
23
|
+
"""
|
|
24
|
+
\b
|
|
25
|
+
dp-cli —— DrissionPage 命令行工具
|
|
26
|
+
|
|
27
|
+
\b
|
|
28
|
+
快速开始:
|
|
29
|
+
dp open https://example.com
|
|
30
|
+
dp snapshot
|
|
31
|
+
dp click "text:登录"
|
|
32
|
+
dp fill "@name=username" admin
|
|
33
|
+
dp close
|
|
34
|
+
"""
|
|
35
|
+
if ctx.invoked_subcommand is None:
|
|
36
|
+
click.echo(ctx.get_help())
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
register_all(cli)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def main():
|
|
43
|
+
cli()
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
if __name__ == '__main__':
|
|
47
|
+
main()
|
dp_cli/output.py
ADDED
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
# -*- coding:utf-8 -*-
|
|
2
|
+
"""
|
|
3
|
+
dp-cli 输出格式化模块
|
|
4
|
+
统一的 JSON 输出格式,便于 AI 工具解析。
|
|
5
|
+
"""
|
|
6
|
+
import json
|
|
7
|
+
import sys
|
|
8
|
+
from typing import Any, Optional
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def ok(data: Any = None, msg: str = None) -> None:
|
|
12
|
+
"""成功输出"""
|
|
13
|
+
result = {'status': 'ok'}
|
|
14
|
+
if msg:
|
|
15
|
+
result['message'] = msg
|
|
16
|
+
if data is not None:
|
|
17
|
+
result['data'] = data
|
|
18
|
+
_print(result)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def error(msg: str, code: str = 'ERROR', detail: str = None) -> None:
|
|
22
|
+
"""错误输出"""
|
|
23
|
+
result = {'status': 'error', 'code': code, 'message': msg}
|
|
24
|
+
if detail:
|
|
25
|
+
result['detail'] = detail
|
|
26
|
+
_print(result)
|
|
27
|
+
sys.exit(1)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def _print(result: dict) -> None:
|
|
31
|
+
print(json.dumps(result, ensure_ascii=False, indent=2))
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def format_element(ele, include_rect: bool = False) -> dict:
|
|
35
|
+
"""格式化单个元素信息"""
|
|
36
|
+
try:
|
|
37
|
+
attrs = ele.attrs
|
|
38
|
+
except Exception:
|
|
39
|
+
attrs = {}
|
|
40
|
+
|
|
41
|
+
info = {
|
|
42
|
+
'tag': ele.tag,
|
|
43
|
+
'text': (ele.raw_text or '').strip()[:200],
|
|
44
|
+
'attrs': attrs,
|
|
45
|
+
'loc': _suggest_locator(ele, attrs),
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
if include_rect:
|
|
49
|
+
try:
|
|
50
|
+
info['rect'] = {
|
|
51
|
+
'location': list(ele.rect.location),
|
|
52
|
+
'size': list(ele.rect.size),
|
|
53
|
+
'midpoint': list(ele.rect.midpoint),
|
|
54
|
+
}
|
|
55
|
+
except Exception:
|
|
56
|
+
pass
|
|
57
|
+
|
|
58
|
+
return info
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def _suggest_locator(ele, attrs: dict) -> str:
|
|
62
|
+
"""为元素生成最优 DrissionPage 定位字符串"""
|
|
63
|
+
# 优先用 id
|
|
64
|
+
if attrs.get('id'):
|
|
65
|
+
return f'#{attrs["id"]}'
|
|
66
|
+
|
|
67
|
+
# data-testid / data-qa / aria-label 等语义属性
|
|
68
|
+
for semantic in ('data-testid', 'data-qa', 'aria-label', 'name', 'placeholder'):
|
|
69
|
+
if attrs.get(semantic):
|
|
70
|
+
return f'@{semantic}={attrs[semantic]}'
|
|
71
|
+
|
|
72
|
+
# 有唯一 class
|
|
73
|
+
cls = attrs.get('class', '')
|
|
74
|
+
if cls:
|
|
75
|
+
classes = cls.strip().split()
|
|
76
|
+
if classes:
|
|
77
|
+
return f'.{classes[0]}'
|
|
78
|
+
|
|
79
|
+
# 按文本
|
|
80
|
+
try:
|
|
81
|
+
txt = (ele.raw_text or '').strip()
|
|
82
|
+
if txt and len(txt) <= 30:
|
|
83
|
+
return f'text:{txt}'
|
|
84
|
+
except Exception:
|
|
85
|
+
pass
|
|
86
|
+
|
|
87
|
+
# 最后按 tag
|
|
88
|
+
return f't:{ele.tag}'
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def format_page_info(page) -> dict:
|
|
92
|
+
"""格式化页面基本信息"""
|
|
93
|
+
return {
|
|
94
|
+
'url': page.url,
|
|
95
|
+
'title': page.title,
|
|
96
|
+
'ready_state': page.states.ready_state,
|
|
97
|
+
}
|
dp_cli/session.py
ADDED
|
@@ -0,0 +1,201 @@
|
|
|
1
|
+
# -*- coding:utf-8 -*-
|
|
2
|
+
"""
|
|
3
|
+
dp-cli 会话管理模块
|
|
4
|
+
通过固定端口连接复用已运行的浏览器实例,实现跨命令状态共享。
|
|
5
|
+
"""
|
|
6
|
+
import json
|
|
7
|
+
import os
|
|
8
|
+
import sys
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from time import sleep, perf_counter
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
# 会话状态文件默认目录
|
|
14
|
+
_SESSION_DIR = Path.home() / '.dp_cli' / 'sessions'
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def get_session_dir() -> Path:
|
|
18
|
+
_SESSION_DIR.mkdir(parents=True, exist_ok=True)
|
|
19
|
+
return _SESSION_DIR
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def get_session_file(name: str) -> Path:
|
|
23
|
+
return get_session_dir() / f'{name}.json'
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def load_session(name: str) -> dict:
|
|
27
|
+
"""读取会话信息"""
|
|
28
|
+
f = get_session_file(name)
|
|
29
|
+
if not f.exists():
|
|
30
|
+
return {}
|
|
31
|
+
try:
|
|
32
|
+
return json.loads(f.read_text(encoding='utf-8'))
|
|
33
|
+
except Exception:
|
|
34
|
+
return {}
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def save_session(name: str, data: dict) -> None:
|
|
38
|
+
"""保存会话信息"""
|
|
39
|
+
get_session_file(name).write_text(
|
|
40
|
+
json.dumps(data, ensure_ascii=False, indent=2), encoding='utf-8'
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def delete_session(name: str) -> bool:
|
|
45
|
+
"""删除会话文件"""
|
|
46
|
+
f = get_session_file(name)
|
|
47
|
+
if f.exists():
|
|
48
|
+
f.unlink()
|
|
49
|
+
return True
|
|
50
|
+
return False
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def list_sessions() -> list:
|
|
54
|
+
"""列出所有会话"""
|
|
55
|
+
d = get_session_dir()
|
|
56
|
+
sessions = []
|
|
57
|
+
for f in d.glob('*.json'):
|
|
58
|
+
try:
|
|
59
|
+
data = json.loads(f.read_text(encoding='utf-8'))
|
|
60
|
+
sessions.append({'name': f.stem, **data})
|
|
61
|
+
except Exception:
|
|
62
|
+
pass
|
|
63
|
+
return sessions
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def get_browser(session_name: str = 'default', headless: bool = False,
|
|
67
|
+
browser_path: str = None, user_data_dir: str = None,
|
|
68
|
+
proxy: str = None, port: int = None):
|
|
69
|
+
"""
|
|
70
|
+
获取或创建浏览器实例。
|
|
71
|
+
|
|
72
|
+
连接优先级:
|
|
73
|
+
1. 指定 port → 直接连接该端口(失败则报错,不 fallback)
|
|
74
|
+
2. 已有会话记录的 port → 尝试复用(失败则新建)
|
|
75
|
+
3. 新建浏览器实例
|
|
76
|
+
"""
|
|
77
|
+
from DrissionPage import ChromiumPage
|
|
78
|
+
from DrissionPage._configs.chromium_options import ChromiumOptions
|
|
79
|
+
|
|
80
|
+
# === 情况1:用户明确指定端口(连接用户自己的浏览器) ===
|
|
81
|
+
if port:
|
|
82
|
+
co = ChromiumOptions(read_file=False)
|
|
83
|
+
co.set_local_port(port)
|
|
84
|
+
co.existing_only(True)
|
|
85
|
+
try:
|
|
86
|
+
page = ChromiumPage(co)
|
|
87
|
+
except Exception as e:
|
|
88
|
+
raise ConnectionError(
|
|
89
|
+
f'无法连接到端口 {port} 的浏览器实例。\n'
|
|
90
|
+
f'请确认浏览器已使用 --remote-debugging-port={port} 启动。\n'
|
|
91
|
+
f'启动命令示例:\n'
|
|
92
|
+
f' google-chrome --remote-debugging-port={port}\n'
|
|
93
|
+
f' chromium --remote-debugging-port={port}\n'
|
|
94
|
+
f'原始错误: {e}'
|
|
95
|
+
) from e
|
|
96
|
+
# 记录到会话(后续命令无需再指定端口)
|
|
97
|
+
save_session(session_name, {
|
|
98
|
+
'port': port,
|
|
99
|
+
'headless': headless,
|
|
100
|
+
'user_data_dir': user_data_dir,
|
|
101
|
+
'user_connected': True, # 标记:这是用户自己的浏览器
|
|
102
|
+
})
|
|
103
|
+
return page
|
|
104
|
+
|
|
105
|
+
# === 情况2:尝试复用已有会话 ===
|
|
106
|
+
sess = load_session(session_name)
|
|
107
|
+
saved_port = sess.get('port')
|
|
108
|
+
if saved_port:
|
|
109
|
+
try:
|
|
110
|
+
co = ChromiumOptions(read_file=False)
|
|
111
|
+
co.set_local_port(saved_port)
|
|
112
|
+
co.existing_only(True)
|
|
113
|
+
page = ChromiumPage(co)
|
|
114
|
+
return page
|
|
115
|
+
except Exception:
|
|
116
|
+
# 会话失效,删除记录,继续新建
|
|
117
|
+
delete_session(session_name)
|
|
118
|
+
|
|
119
|
+
# === 情况3:新建浏览器实例 ===
|
|
120
|
+
co = ChromiumOptions()
|
|
121
|
+
if browser_path:
|
|
122
|
+
co.set_browser_path(browser_path)
|
|
123
|
+
if user_data_dir:
|
|
124
|
+
co.set_user_data_path(user_data_dir)
|
|
125
|
+
else:
|
|
126
|
+
# 每个会话有独立的用户数据目录
|
|
127
|
+
uid = get_session_dir() / 'profiles' / session_name
|
|
128
|
+
uid.mkdir(parents=True, exist_ok=True)
|
|
129
|
+
co.set_user_data_path(str(uid))
|
|
130
|
+
|
|
131
|
+
if proxy:
|
|
132
|
+
co.set_proxy(proxy)
|
|
133
|
+
|
|
134
|
+
if headless:
|
|
135
|
+
co.headless(True)
|
|
136
|
+
|
|
137
|
+
co.auto_port(True)
|
|
138
|
+
|
|
139
|
+
page = ChromiumPage(co)
|
|
140
|
+
|
|
141
|
+
# 保存会话端口
|
|
142
|
+
port = int(page.browser.address.split(':')[-1])
|
|
143
|
+
save_session(session_name, {
|
|
144
|
+
'port': port,
|
|
145
|
+
'headless': headless,
|
|
146
|
+
'user_data_dir': str(co.user_data_path) if co.user_data_path else None,
|
|
147
|
+
})
|
|
148
|
+
|
|
149
|
+
return page
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
# ── Ref 映射管理 ─────────────────────────────────────────────────────────────
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
def save_refs(session_name: str, url: str, refs: dict) -> None:
|
|
156
|
+
"""保存快照编号映射到 refs.json"""
|
|
157
|
+
from datetime import datetime
|
|
158
|
+
data = {
|
|
159
|
+
'url': url,
|
|
160
|
+
'timestamp': datetime.now().isoformat(),
|
|
161
|
+
'refs': refs,
|
|
162
|
+
}
|
|
163
|
+
refs_dir = get_session_dir() / 'refs'
|
|
164
|
+
refs_dir.mkdir(exist_ok=True)
|
|
165
|
+
f = refs_dir / f'{session_name}.json'
|
|
166
|
+
f.write_text(json.dumps(data, ensure_ascii=False), encoding='utf-8')
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
def load_refs(session_name: str) -> dict:
|
|
170
|
+
"""加载快照编号映射,返回 {ref_id: {locator, role, name, backendNodeId}}"""
|
|
171
|
+
f = get_session_dir() / 'refs' / f'{session_name}.json'
|
|
172
|
+
if not f.exists():
|
|
173
|
+
return {}
|
|
174
|
+
try:
|
|
175
|
+
data = json.loads(f.read_text(encoding='utf-8'))
|
|
176
|
+
return data.get('refs', {})
|
|
177
|
+
except Exception:
|
|
178
|
+
return {}
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
def close_browser(session_name: str = 'default', del_data: bool = False) -> bool:
|
|
182
|
+
"""关闭指定会话的浏览器"""
|
|
183
|
+
from DrissionPage import ChromiumPage
|
|
184
|
+
from DrissionPage._configs.chromium_options import ChromiumOptions
|
|
185
|
+
|
|
186
|
+
sess = load_session(session_name)
|
|
187
|
+
port = sess.get('port')
|
|
188
|
+
if not port:
|
|
189
|
+
return False
|
|
190
|
+
|
|
191
|
+
try:
|
|
192
|
+
co = ChromiumOptions(read_file=False)
|
|
193
|
+
co.set_local_port(port)
|
|
194
|
+
co.existing_only(True)
|
|
195
|
+
page = ChromiumPage(co)
|
|
196
|
+
page.browser.quit(del_data=del_data)
|
|
197
|
+
except Exception:
|
|
198
|
+
pass
|
|
199
|
+
|
|
200
|
+
delete_session(session_name)
|
|
201
|
+
return True
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
# -*- coding:utf-8 -*-
|
|
2
|
+
"""
|
|
3
|
+
dp-cli snapshot 模块
|
|
4
|
+
|
|
5
|
+
基于浏览器原生 a11y tree(无障碍树)的页面快照系统。
|
|
6
|
+
通过 CDP Accessibility API 获取,通用性极强,适用于 95%+ 的网站。
|
|
7
|
+
|
|
8
|
+
模块组成:
|
|
9
|
+
- a11y.py 核心:a11y tree 获取 + 多模式渲染(full/brief/text)
|
|
10
|
+
- extract.py 数据提取(extract_structured / query_elements)
|
|
11
|
+
- utils.py 共享工具(定位器生成等)
|
|
12
|
+
- js_scripts.py JS 降级脚本(CDP 不可用时的 fallback)
|
|
13
|
+
"""
|
|
14
|
+
from .a11y import take_a11y_snapshot, render_a11y_text, render_a11y_plain_text
|
|
15
|
+
from .extract import extract_structured, query_elements
|
|
16
|
+
|
|
17
|
+
__all__ = [
|
|
18
|
+
'take_a11y_snapshot',
|
|
19
|
+
'render_a11y_text',
|
|
20
|
+
'render_a11y_plain_text',
|
|
21
|
+
'extract_structured',
|
|
22
|
+
'query_elements',
|
|
23
|
+
]
|