dp-cli 0.6.0__tar.gz → 0.6.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dp_cli-0.6.0 → dp_cli-0.6.2}/PKG-INFO +14 -6
- {dp_cli-0.6.0 → dp_cli-0.6.2}/README.md +8 -5
- {dp_cli-0.6.0 → dp_cli-0.6.2}/dp_cli/bridge.py +23 -5
- {dp_cli-0.6.0 → dp_cli-0.6.2}/dp_cli/bridge_manager.py +88 -11
- {dp_cli-0.6.0 → dp_cli-0.6.2}/dp_cli/commands/_utils.py +1 -1
- {dp_cli-0.6.0 → dp_cli-0.6.2}/dp_cli/commands/keyboard.py +31 -47
- {dp_cli-0.6.0 → dp_cli-0.6.2}/dp_cli/commands/snapshot_cmd.py +40 -20
- {dp_cli-0.6.0 → dp_cli-0.6.2}/dp_cli/output.py +4 -37
- {dp_cli-0.6.0 → dp_cli-0.6.2}/dp_cli/recorder.py +61 -97
- {dp_cli-0.6.0 → dp_cli-0.6.2}/dp_cli/snapshot/a11y.py +297 -63
- {dp_cli-0.6.0 → dp_cli-0.6.2}/dp_cli/snapshot/clickable.py +13 -5
- {dp_cli-0.6.0 → dp_cli-0.6.2}/dp_cli/snapshot/extract.py +2 -2
- dp_cli-0.6.2/dp_cli/snapshot/utils.py +70 -0
- {dp_cli-0.6.0 → dp_cli-0.6.2}/dp_cli.egg-info/PKG-INFO +14 -6
- {dp_cli-0.6.0 → dp_cli-0.6.2}/dp_cli.egg-info/SOURCES.txt +6 -1
- dp_cli-0.6.2/dp_cli.egg-info/requires.txt +11 -0
- dp_cli-0.6.2/pyproject.toml +88 -0
- dp_cli-0.6.2/tests/test_a11y.py +661 -0
- {dp_cli-0.6.0 → dp_cli-0.6.2}/tests/test_bridge_manager.py +9 -6
- dp_cli-0.6.2/tests/test_commands.py +389 -0
- dp_cli-0.6.2/tests/test_recorder.py +473 -0
- dp_cli-0.6.2/tests/test_session.py +804 -0
- dp_cli-0.6.2/tests/test_snapshot_small.py +454 -0
- dp_cli-0.6.0/dp_cli/snapshot/utils.py +0 -43
- dp_cli-0.6.0/dp_cli.egg-info/requires.txt +0 -5
- dp_cli-0.6.0/pyproject.toml +0 -37
- {dp_cli-0.6.0 → dp_cli-0.6.2}/dp_cli/__init__.py +0 -0
- {dp_cli-0.6.0 → dp_cli-0.6.2}/dp_cli/commands/__init__.py +0 -0
- {dp_cli-0.6.0 → dp_cli-0.6.2}/dp_cli/commands/browser.py +0 -0
- {dp_cli-0.6.0 → dp_cli-0.6.2}/dp_cli/commands/element.py +0 -0
- {dp_cli-0.6.0 → dp_cli-0.6.2}/dp_cli/commands/misc.py +0 -0
- {dp_cli-0.6.0 → dp_cli-0.6.2}/dp_cli/commands/network.py +0 -0
- {dp_cli-0.6.0 → dp_cli-0.6.2}/dp_cli/commands/page.py +0 -0
- {dp_cli-0.6.0 → dp_cli-0.6.2}/dp_cli/commands/record.py +0 -0
- {dp_cli-0.6.0 → dp_cli-0.6.2}/dp_cli/commands/storage.py +0 -0
- {dp_cli-0.6.0 → dp_cli-0.6.2}/dp_cli/commands/tab.py +0 -0
- {dp_cli-0.6.0 → dp_cli-0.6.2}/dp_cli/locators/__init__.py +0 -0
- {dp_cli-0.6.0 → dp_cli-0.6.2}/dp_cli/locators/playwright.py +0 -0
- {dp_cli-0.6.0 → dp_cli-0.6.2}/dp_cli/locators/pw_js.py +0 -0
- {dp_cli-0.6.0 → dp_cli-0.6.2}/dp_cli/main.py +0 -0
- {dp_cli-0.6.0 → dp_cli-0.6.2}/dp_cli/session.py +0 -0
- {dp_cli-0.6.0 → dp_cli-0.6.2}/dp_cli/snapshot/__init__.py +0 -0
- {dp_cli-0.6.0 → dp_cli-0.6.2}/dp_cli/snapshot/clickable_js.py +0 -0
- {dp_cli-0.6.0 → dp_cli-0.6.2}/dp_cli/snapshot/js_scripts.py +0 -0
- {dp_cli-0.6.0 → dp_cli-0.6.2}/dp_cli/stealth.py +0 -0
- {dp_cli-0.6.0 → dp_cli-0.6.2}/dp_cli.egg-info/dependency_links.txt +0 -0
- {dp_cli-0.6.0 → dp_cli-0.6.2}/dp_cli.egg-info/entry_points.txt +0 -0
- {dp_cli-0.6.0 → dp_cli-0.6.2}/dp_cli.egg-info/top_level.txt +0 -0
- {dp_cli-0.6.0 → dp_cli-0.6.2}/setup.cfg +0 -0
- {dp_cli-0.6.0 → dp_cli-0.6.2}/tests/test_bridge_integration.py +0 -0
- {dp_cli-0.6.0 → dp_cli-0.6.2}/tests/test_clickable.py +0 -0
- {dp_cli-0.6.0 → dp_cli-0.6.2}/tests/test_pw_locator.py +0 -0
- {dp_cli-0.6.0 → dp_cli-0.6.2}/tests/test_resolve_locator.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: dp-cli
|
|
3
|
-
Version: 0.6.
|
|
3
|
+
Version: 0.6.2
|
|
4
4
|
Summary: A powerful CLI for DrissionPage — browser automation, structured data extraction, network listening and more.
|
|
5
5
|
License: BSD-3-Clause
|
|
6
6
|
Project-URL: Homepage, https://github.com/mofanx/dp-cli
|
|
@@ -18,6 +18,11 @@ Requires-Dist: click>=8.0
|
|
|
18
18
|
Requires-Dist: aiohttp>=3.9
|
|
19
19
|
Requires-Dist: websockets>=12
|
|
20
20
|
Requires-Dist: requests>=2.28
|
|
21
|
+
Provides-Extra: test
|
|
22
|
+
Requires-Dist: pytest>=7.4.0; extra == "test"
|
|
23
|
+
Requires-Dist: pytest-cov>=4.1.0; extra == "test"
|
|
24
|
+
Requires-Dist: pytest-timeout>=2.1.0; extra == "test"
|
|
25
|
+
Requires-Dist: pytest-mock>=3.11.0; extra == "test"
|
|
21
26
|
|
|
22
27
|
# dp-cli
|
|
23
28
|
|
|
@@ -133,9 +138,12 @@ Every element gets an `[N]` ref usable in any command: `dp click "ref:5"`.
|
|
|
133
138
|
|
|
134
139
|
```bash
|
|
135
140
|
dp snapshot # a11y + clickable (default); high + medium markers
|
|
136
|
-
dp snapshot
|
|
137
|
-
dp snapshot
|
|
138
|
-
dp snapshot
|
|
141
|
+
dp snapshot -i # interactive mode: only interactive elements
|
|
142
|
+
dp snapshot -s ".main" # limit to specific area
|
|
143
|
+
dp snapshot -p "data-test" # custom locator priority
|
|
144
|
+
dp scan --viewport # only elements currently in viewport
|
|
145
|
+
dp scan --confidence all # include low-confidence heuristics
|
|
146
|
+
dp scan --confidence high # only the sure-thing clickables
|
|
139
147
|
```
|
|
140
148
|
|
|
141
149
|
### `dp scan` — fast clickable-only listing
|
|
@@ -239,7 +247,7 @@ GPU or Xvfb environment.
|
|
|
239
247
|
|
|
240
248
|
```bash
|
|
241
249
|
# 1. Discover CSS class names via noise-filtered content tree
|
|
242
|
-
dp snapshot
|
|
250
|
+
dp snapshot -i -s ".main"
|
|
243
251
|
|
|
244
252
|
# 2. Verify field selectors
|
|
245
253
|
dp query "css:.item-title" --fields "text,loc"
|
|
@@ -250,7 +258,7 @@ dp extract "css:.item-card" \
|
|
|
250
258
|
"price":"css:.item-price",
|
|
251
259
|
"tags":{"selector":"css:.tag","multi":true},
|
|
252
260
|
"url":{"selector":"css:a","attr":"href"}}' \
|
|
253
|
-
--limit 100
|
|
261
|
+
--limit 100 -o csv -f result.csv
|
|
254
262
|
```
|
|
255
263
|
|
|
256
264
|
## Project Structure
|
|
@@ -112,9 +112,12 @@ Every element gets an `[N]` ref usable in any command: `dp click "ref:5"`.
|
|
|
112
112
|
|
|
113
113
|
```bash
|
|
114
114
|
dp snapshot # a11y + clickable (default); high + medium markers
|
|
115
|
-
dp snapshot
|
|
116
|
-
dp snapshot
|
|
117
|
-
dp snapshot
|
|
115
|
+
dp snapshot -i # interactive mode: only interactive elements
|
|
116
|
+
dp snapshot -s ".main" # limit to specific area
|
|
117
|
+
dp snapshot -p "data-test" # custom locator priority
|
|
118
|
+
dp scan --viewport # only elements currently in viewport
|
|
119
|
+
dp scan --confidence all # include low-confidence heuristics
|
|
120
|
+
dp scan --confidence high # only the sure-thing clickables
|
|
118
121
|
```
|
|
119
122
|
|
|
120
123
|
### `dp scan` — fast clickable-only listing
|
|
@@ -218,7 +221,7 @@ GPU or Xvfb environment.
|
|
|
218
221
|
|
|
219
222
|
```bash
|
|
220
223
|
# 1. Discover CSS class names via noise-filtered content tree
|
|
221
|
-
dp snapshot
|
|
224
|
+
dp snapshot -i -s ".main"
|
|
222
225
|
|
|
223
226
|
# 2. Verify field selectors
|
|
224
227
|
dp query "css:.item-title" --fields "text,loc"
|
|
@@ -229,7 +232,7 @@ dp extract "css:.item-card" \
|
|
|
229
232
|
"price":"css:.item-price",
|
|
230
233
|
"tags":{"selector":"css:.tag","multi":true},
|
|
231
234
|
"url":{"selector":"css:a","attr":"href"}}' \
|
|
232
|
-
--limit 100
|
|
235
|
+
--limit 100 -o csv -f result.csv
|
|
233
236
|
```
|
|
234
237
|
|
|
235
238
|
## Project Structure
|
|
@@ -464,11 +464,29 @@ async def main_async(user_data_dir: Path, host: str, port: int) -> None:
|
|
|
464
464
|
# 等待终止信号
|
|
465
465
|
stop_evt = asyncio.Event()
|
|
466
466
|
loop = asyncio.get_running_loop()
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
467
|
+
|
|
468
|
+
if sys.platform == 'win32':
|
|
469
|
+
# Windows ProactorEventLoop 不支持 add_signal_handler。
|
|
470
|
+
# 改用 signal.signal 拦截 SIGINT / SIGBREAK(来自 CTRL_BREAK_EVENT)。
|
|
471
|
+
# signal handler 跑在主线程的信号上下文中,必须用 call_soon_threadsafe
|
|
472
|
+
# 通知 event loop。
|
|
473
|
+
def _win_signal_handler(signum, frame):
|
|
474
|
+
loop.call_soon_threadsafe(stop_evt.set)
|
|
475
|
+
|
|
476
|
+
for sig_name in ('SIGINT', 'SIGBREAK', 'SIGTERM'):
|
|
477
|
+
sig = getattr(signal, sig_name, None)
|
|
478
|
+
if sig is None:
|
|
479
|
+
continue
|
|
480
|
+
try:
|
|
481
|
+
signal.signal(sig, _win_signal_handler)
|
|
482
|
+
except (ValueError, OSError):
|
|
483
|
+
pass
|
|
484
|
+
else:
|
|
485
|
+
for sig in (signal.SIGINT, signal.SIGTERM):
|
|
486
|
+
try:
|
|
487
|
+
loop.add_signal_handler(sig, stop_evt.set)
|
|
488
|
+
except NotImplementedError:
|
|
489
|
+
pass
|
|
472
490
|
try:
|
|
473
491
|
await stop_evt.wait()
|
|
474
492
|
finally:
|
|
@@ -8,8 +8,10 @@ chrome://inspect 桥接进程生命周期管理
|
|
|
8
8
|
- start_bridge(user_data_dir): spawn `python -m dp_cli.bridge` 子进程,
|
|
9
9
|
等待其向 stdout 打印 "BRIDGE_READY host=... port=..." 标记后返回 (pid, port)。
|
|
10
10
|
|
|
11
|
-
- stop_bridge(pid):
|
|
12
|
-
|
|
11
|
+
- stop_bridge(pid): 向子进程发终止信号;如 2 秒未退出再强杀。
|
|
12
|
+
POSIX: SIGTERM → SIGKILL(针对整个进程组)。
|
|
13
|
+
Windows: CTRL_BREAK_EVENT → taskkill /F /T。
|
|
14
|
+
- is_bridge_alive(pid): OS 级存在性检查(Windows 走 OpenProcess)。
|
|
13
15
|
"""
|
|
14
16
|
|
|
15
17
|
from __future__ import annotations
|
|
@@ -22,6 +24,59 @@ import sys
|
|
|
22
24
|
import time
|
|
23
25
|
from pathlib import Path
|
|
24
26
|
|
|
27
|
+
IS_WINDOWS = sys.platform == 'win32'
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def _detach_spawn_kwargs() -> dict:
|
|
31
|
+
"""让 bridge 子进程脱离父进程的信号/控制台分组。
|
|
32
|
+
|
|
33
|
+
POSIX: ``start_new_session=True`` → setsid,使 bridge 自成进程组,
|
|
34
|
+
父进程 Ctrl-C 不会传递过来。
|
|
35
|
+
Windows: ``CREATE_NEW_PROCESS_GROUP`` 让我们后续可以发 CTRL_BREAK_EVENT;
|
|
36
|
+
``CREATE_NO_WINDOW`` 避免在 GUI/服务环境弹出黑色控制台窗口。
|
|
37
|
+
"""
|
|
38
|
+
if IS_WINDOWS:
|
|
39
|
+
CREATE_NEW_PROCESS_GROUP = 0x00000200
|
|
40
|
+
CREATE_NO_WINDOW = 0x08000000
|
|
41
|
+
return {'creationflags': CREATE_NEW_PROCESS_GROUP | CREATE_NO_WINDOW}
|
|
42
|
+
return {'start_new_session': True}
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def _win_pid_alive(pid: int) -> bool:
|
|
46
|
+
"""Windows: 用 OpenProcess + GetExitCodeProcess 判断进程是否存活。"""
|
|
47
|
+
import ctypes
|
|
48
|
+
from ctypes import wintypes
|
|
49
|
+
|
|
50
|
+
PROCESS_QUERY_LIMITED_INFORMATION = 0x1000
|
|
51
|
+
STILL_ACTIVE = 259
|
|
52
|
+
kernel32 = ctypes.windll.kernel32
|
|
53
|
+
kernel32.OpenProcess.restype = wintypes.HANDLE
|
|
54
|
+
kernel32.OpenProcess.argtypes = [wintypes.DWORD, wintypes.BOOL, wintypes.DWORD]
|
|
55
|
+
h = kernel32.OpenProcess(PROCESS_QUERY_LIMITED_INFORMATION, False, pid)
|
|
56
|
+
if not h:
|
|
57
|
+
return False
|
|
58
|
+
try:
|
|
59
|
+
code = wintypes.DWORD()
|
|
60
|
+
if not kernel32.GetExitCodeProcess(h, ctypes.byref(code)):
|
|
61
|
+
return False
|
|
62
|
+
return code.value == STILL_ACTIVE
|
|
63
|
+
finally:
|
|
64
|
+
kernel32.CloseHandle(h)
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def _win_terminate(pid: int) -> None:
|
|
68
|
+
"""Windows: taskkill /F /T 强杀进程及其子进程树。"""
|
|
69
|
+
try:
|
|
70
|
+
subprocess.run(
|
|
71
|
+
['taskkill', '/F', '/T', '/PID', str(pid)],
|
|
72
|
+
stdout=subprocess.DEVNULL,
|
|
73
|
+
stderr=subprocess.DEVNULL,
|
|
74
|
+
check=False,
|
|
75
|
+
timeout=5,
|
|
76
|
+
)
|
|
77
|
+
except Exception:
|
|
78
|
+
pass
|
|
79
|
+
|
|
25
80
|
|
|
26
81
|
_READY_RE = re.compile(r'^BRIDGE_READY host=(?P<host>\S+) port=(?P<port>\d+)\s*$')
|
|
27
82
|
|
|
@@ -82,14 +137,15 @@ def start_bridge(user_data_dir: str | os.PathLike,
|
|
|
82
137
|
'--listen', str(listen_port),
|
|
83
138
|
'-v',
|
|
84
139
|
]
|
|
85
|
-
#
|
|
140
|
+
# 让 bridge 成为独立进程组,防止父进程 SIGINT/Ctrl-C 误杀。
|
|
141
|
+
# POSIX 走 start_new_session;Windows 走 CREATE_NEW_PROCESS_GROUP。
|
|
86
142
|
proc = subprocess.Popen(
|
|
87
143
|
cmd,
|
|
88
144
|
stdout=subprocess.PIPE,
|
|
89
145
|
stderr=subprocess.PIPE,
|
|
90
146
|
text=True,
|
|
91
|
-
start_new_session=True,
|
|
92
147
|
bufsize=1, # 行缓冲
|
|
148
|
+
**_detach_spawn_kwargs(),
|
|
93
149
|
)
|
|
94
150
|
|
|
95
151
|
# 立即提示用户:bridge 正在连接;若 Chrome 弹出授权框请点击。
|
|
@@ -173,6 +229,8 @@ def start_bridge(user_data_dir: str | os.PathLike,
|
|
|
173
229
|
def is_bridge_alive(pid: int) -> bool:
|
|
174
230
|
if pid <= 0:
|
|
175
231
|
return False
|
|
232
|
+
if IS_WINDOWS:
|
|
233
|
+
return _win_pid_alive(pid)
|
|
176
234
|
try:
|
|
177
235
|
os.kill(pid, 0)
|
|
178
236
|
return True
|
|
@@ -185,18 +243,37 @@ def is_bridge_alive(pid: int) -> bool:
|
|
|
185
243
|
|
|
186
244
|
|
|
187
245
|
def stop_bridge(pid: int, timeout: float = 2.0) -> bool:
|
|
188
|
-
"""停止 bridge 子进程。返回是否成功终止。
|
|
246
|
+
"""停止 bridge 子进程。返回是否成功终止。
|
|
247
|
+
|
|
248
|
+
POSIX: SIGTERM 整个进程组 → 等待 → SIGKILL。
|
|
249
|
+
Windows: CTRL_BREAK_EVENT(依赖 spawn 时的 CREATE_NEW_PROCESS_GROUP)
|
|
250
|
+
→ 等待 → taskkill /F /T 终止进程树。
|
|
251
|
+
"""
|
|
189
252
|
if not is_bridge_alive(pid):
|
|
190
253
|
return True
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
except (ProcessLookupError, PermissionError):
|
|
254
|
+
|
|
255
|
+
if IS_WINDOWS:
|
|
256
|
+
# 1) 优雅: 给整个进程组发 CTRL_BREAK_EVENT
|
|
195
257
|
try:
|
|
196
|
-
os.kill(pid, signal.
|
|
258
|
+
os.kill(pid, signal.CTRL_BREAK_EVENT)
|
|
197
259
|
except Exception:
|
|
198
260
|
pass
|
|
199
|
-
|
|
261
|
+
|
|
262
|
+
deadline = time.monotonic() + timeout
|
|
263
|
+
while time.monotonic() < deadline:
|
|
264
|
+
if not is_bridge_alive(pid):
|
|
265
|
+
return True
|
|
266
|
+
time.sleep(0.05)
|
|
267
|
+
|
|
268
|
+
# 2) 强杀: taskkill /F /T 终止进程树
|
|
269
|
+
_win_terminate(pid)
|
|
270
|
+
time.sleep(0.1)
|
|
271
|
+
return not is_bridge_alive(pid)
|
|
272
|
+
|
|
273
|
+
# POSIX: 先 SIGTERM 整个进程组(start_new_session 让 bridge 自成组)
|
|
274
|
+
try:
|
|
275
|
+
os.killpg(pid, signal.SIGTERM)
|
|
276
|
+
except (ProcessLookupError, PermissionError, OSError):
|
|
200
277
|
try:
|
|
201
278
|
os.kill(pid, signal.SIGTERM)
|
|
202
279
|
except Exception:
|
|
@@ -9,6 +9,20 @@ from dp_cli.commands._utils import (
|
|
|
9
9
|
session_option, _get_page, resolve_locator, wait_network_idle,
|
|
10
10
|
)
|
|
11
11
|
|
|
12
|
+
# ponytail: 提取重复的 findScrollable JS 函数
|
|
13
|
+
_FIND_SCROLLABLE_JS = """
|
|
14
|
+
function findScrollable(el) {
|
|
15
|
+
while (el && el !== document.body && el !== document.documentElement) {
|
|
16
|
+
const st = getComputedStyle(el);
|
|
17
|
+
const canY = /(auto|scroll|overlay)/.test(st.overflowY) && el.scrollHeight > el.clientHeight + 1;
|
|
18
|
+
const canX = /(auto|scroll|overlay)/.test(st.overflowX) && el.scrollWidth > el.clientWidth + 1;
|
|
19
|
+
if (canY || canX) return el;
|
|
20
|
+
el = el.parentElement;
|
|
21
|
+
}
|
|
22
|
+
return document.scrollingElement || document.documentElement;
|
|
23
|
+
}
|
|
24
|
+
"""
|
|
25
|
+
|
|
12
26
|
|
|
13
27
|
def register(cli):
|
|
14
28
|
|
|
@@ -142,32 +156,22 @@ def register(cli):
|
|
|
142
156
|
|
|
143
157
|
if mouse_x is not None and mouse_y is not None:
|
|
144
158
|
result = page.run_js(
|
|
145
|
-
"""
|
|
146
|
-
function findScrollable(el) {
|
|
147
|
-
while (el && el !== document.body && el !== document.documentElement) {
|
|
148
|
-
const st = getComputedStyle(el);
|
|
149
|
-
const canY = /(auto|scroll|overlay)/.test(st.overflowY) && el.scrollHeight > el.clientHeight + 1;
|
|
150
|
-
const canX = /(auto|scroll|overlay)/.test(st.overflowX) && el.scrollWidth > el.clientWidth + 1;
|
|
151
|
-
if (canY || canX) return el;
|
|
152
|
-
el = el.parentElement;
|
|
153
|
-
}
|
|
154
|
-
return document.scrollingElement || document.documentElement;
|
|
155
|
-
}
|
|
159
|
+
f"""{_FIND_SCROLLABLE_JS}
|
|
156
160
|
const start = document.elementFromPoint(arguments[2], arguments[3]);
|
|
157
161
|
const target = findScrollable(start);
|
|
158
|
-
const before = {scrollTop: target.scrollTop, scrollLeft: target.scrollLeft};
|
|
159
|
-
if (arguments[4]) {
|
|
162
|
+
const before = {{scrollTop: target.scrollTop, scrollLeft: target.scrollLeft}};
|
|
163
|
+
if (arguments[4]) {{
|
|
160
164
|
target.scrollTop = 0;
|
|
161
|
-
} else if (arguments[5]) {
|
|
165
|
+
}} else if (arguments[5]) {{
|
|
162
166
|
target.scrollTop = target.scrollHeight;
|
|
163
|
-
} else {
|
|
167
|
+
}} else {{
|
|
164
168
|
target.scrollTop += arguments[1];
|
|
165
169
|
target.scrollLeft += arguments[0];
|
|
166
|
-
}
|
|
167
|
-
target.dispatchEvent(new Event('scroll', {bubbles: true}));
|
|
168
|
-
return {
|
|
170
|
+
}}
|
|
171
|
+
target.dispatchEvent(new Event('scroll', {{bubbles: true}}));
|
|
172
|
+
return {{
|
|
169
173
|
before,
|
|
170
|
-
after: {scrollTop: target.scrollTop, scrollLeft: target.scrollLeft},
|
|
174
|
+
after: {{scrollTop: target.scrollTop, scrollLeft: target.scrollLeft}},
|
|
171
175
|
scrollHeight: target.scrollHeight,
|
|
172
176
|
clientHeight: target.clientHeight,
|
|
173
177
|
scrollWidth: target.scrollWidth,
|
|
@@ -176,8 +180,8 @@ def register(cli):
|
|
|
176
180
|
id: target.id || '',
|
|
177
181
|
className: target.className || '',
|
|
178
182
|
mode: 'mouse'
|
|
179
|
-
};
|
|
180
|
-
""",
|
|
183
|
+
}};
|
|
184
|
+
}}""",
|
|
181
185
|
x, y, mouse_x, mouse_y, top, bottom,
|
|
182
186
|
)
|
|
183
187
|
ok({'x': x, 'y': y, 'mouse': {'x': mouse_x, 'y': mouse_y},
|
|
@@ -292,19 +296,9 @@ def register(cli):
|
|
|
292
296
|
return int(target.run_js('return this.scrollHeight'))
|
|
293
297
|
if use_mouse_container:
|
|
294
298
|
return int(page.run_js(
|
|
295
|
-
"""
|
|
296
|
-
function findScrollable(el) {
|
|
297
|
-
while (el && el !== document.body && el !== document.documentElement) {
|
|
298
|
-
const st = getComputedStyle(el);
|
|
299
|
-
const canY = /(auto|scroll|overlay)/.test(st.overflowY) && el.scrollHeight > el.clientHeight + 1;
|
|
300
|
-
if (canY) return el;
|
|
301
|
-
el = el.parentElement;
|
|
302
|
-
}
|
|
303
|
-
return document.scrollingElement || document.documentElement;
|
|
304
|
-
}
|
|
299
|
+
f"""{_FIND_SCROLLABLE_JS}
|
|
305
300
|
const target = findScrollable(document.elementFromPoint(arguments[0], arguments[1]));
|
|
306
|
-
return target.scrollHeight;
|
|
307
|
-
""",
|
|
301
|
+
return target.scrollHeight;""",
|
|
308
302
|
mouse_x, mouse_y,
|
|
309
303
|
))
|
|
310
304
|
return int(page.run_js('return document.documentElement.scrollHeight'))
|
|
@@ -329,22 +323,13 @@ def register(cli):
|
|
|
329
323
|
)
|
|
330
324
|
elif use_mouse_container:
|
|
331
325
|
position = page.run_js(
|
|
332
|
-
"""
|
|
333
|
-
function findScrollable(el) {
|
|
334
|
-
while (el && el !== document.body && el !== document.documentElement) {
|
|
335
|
-
const st = getComputedStyle(el);
|
|
336
|
-
const canY = /(auto|scroll|overlay)/.test(st.overflowY) && el.scrollHeight > el.clientHeight + 1;
|
|
337
|
-
if (canY) return el;
|
|
338
|
-
el = el.parentElement;
|
|
339
|
-
}
|
|
340
|
-
return document.scrollingElement || document.documentElement;
|
|
341
|
-
}
|
|
326
|
+
f"""{_FIND_SCROLLABLE_JS}
|
|
342
327
|
const target = findScrollable(document.elementFromPoint(arguments[0], arguments[1]));
|
|
343
328
|
const before = target.scrollTop;
|
|
344
329
|
const delta = arguments[2] > 0 ? arguments[2] : Math.max(300, Math.floor(target.clientHeight * arguments[3]));
|
|
345
330
|
target.scrollTop += delta;
|
|
346
|
-
target.dispatchEvent(new Event('scroll', {bubbles: true}));
|
|
347
|
-
return {
|
|
331
|
+
target.dispatchEvent(new Event('scroll', {{bubbles: true}}));
|
|
332
|
+
return {{
|
|
348
333
|
before,
|
|
349
334
|
after: target.scrollTop,
|
|
350
335
|
delta,
|
|
@@ -354,8 +339,7 @@ def register(cli):
|
|
|
354
339
|
id: target.id || '',
|
|
355
340
|
className: target.className || '',
|
|
356
341
|
mode: 'mouse'
|
|
357
|
-
};
|
|
358
|
-
""",
|
|
342
|
+
}};""",
|
|
359
343
|
mouse_x, mouse_y, step, 3 if fast else 0.9,
|
|
360
344
|
)
|
|
361
345
|
else:
|
|
@@ -18,28 +18,33 @@ def register(cli):
|
|
|
18
18
|
|
|
19
19
|
@cli.command()
|
|
20
20
|
@session_option
|
|
21
|
-
@click.option('--mode',
|
|
22
|
-
type=click.Choice(['full', 'brief', 'text']),
|
|
21
|
+
@click.option('--mode', '-m',
|
|
22
|
+
type=click.Choice(['full', 'interactive', 'brief', 'text']),
|
|
23
23
|
default='full', show_default=True, help='快照模式')
|
|
24
|
-
@click.option('--
|
|
25
|
-
|
|
24
|
+
@click.option('--interactive', '-i', is_flag=True, default=False,
|
|
25
|
+
help='快捷方式:等价于 --mode interactive')
|
|
26
|
+
@click.option('--selector', '-s', default=None, help='限定快照范围的 CSS 选择器')
|
|
27
|
+
@click.option('--format', '-f', 'fmt', type=click.Choice(['json', 'text']),
|
|
26
28
|
default='text', show_default=True, help='输出格式')
|
|
27
|
-
@click.option('--filename', default=None, help='保存到文件路径')
|
|
29
|
+
@click.option('--filename', '-o', default=None, help='保存到文件路径')
|
|
28
30
|
@click.option('--no-clickables', is_flag=True, default=False,
|
|
29
31
|
help='禁用 Vimium 风格可交互元素补充探测(默认开启)')
|
|
30
32
|
@click.option('--include-low', is_flag=True, default=False,
|
|
31
33
|
help='包含 low 置信度元素(cursor:pointer / class 规则匹配,可能假阳性)')
|
|
32
34
|
@click.option('--viewport-only', is_flag=True, default=False,
|
|
33
35
|
help='补充探测只看视口内元素(省 token、更快)')
|
|
34
|
-
|
|
35
|
-
|
|
36
|
+
@click.option('--locator-priority', '-p', default=None,
|
|
37
|
+
help='自定义 locator 属性优先级(逗号分隔),如 "data-testid,data-test-id,id"')
|
|
38
|
+
def snapshot(session, mode, interactive, selector, fmt, filename,
|
|
39
|
+
no_clickables, include_low, viewport_only, locator_priority):
|
|
36
40
|
"""获取页面快照(a11y tree + Vimium 风格可交互元素补充)。
|
|
37
41
|
|
|
38
42
|
\b
|
|
39
43
|
模式说明(默认 full):
|
|
40
|
-
full
|
|
41
|
-
|
|
42
|
-
|
|
44
|
+
full 【默认】完整页面快照,包含所有内容和交互元素
|
|
45
|
+
interactive【推荐】只显示交互元素,适合脚本执行
|
|
46
|
+
brief interactive 的别名(向后兼容)
|
|
47
|
+
text 纯文本模式,按阅读顺序输出可见文本
|
|
43
48
|
|
|
44
49
|
\b
|
|
45
50
|
可交互元素补充探测(默认开启):
|
|
@@ -50,12 +55,24 @@ def register(cli):
|
|
|
50
55
|
\b
|
|
51
56
|
示例:
|
|
52
57
|
dp snapshot # 完整快照(默认含 clickable 补充)
|
|
53
|
-
dp snapshot
|
|
58
|
+
dp snapshot -i # 只显示交互元素(最简洁,推荐)
|
|
59
|
+
dp snapshot -m interactive # 只显示交互元素(明确)
|
|
60
|
+
dp snapshot -m brief # 精简模式(interactive 别名)
|
|
54
61
|
dp snapshot --viewport-only # 只扫视口内,更快
|
|
55
62
|
dp snapshot --include-low # 启用 low 置信度(可能假阳性)
|
|
56
63
|
dp snapshot --no-clickables # 纯 a11y tree,旧版本行为
|
|
57
|
-
dp snapshot
|
|
64
|
+
dp snapshot -s ".main" # 只获取指定区域
|
|
65
|
+
dp snapshot -p "data-testid,data-test-id,id" # 自定义属性优先级
|
|
58
66
|
"""
|
|
67
|
+
# -i 选项覆盖 mode
|
|
68
|
+
if interactive:
|
|
69
|
+
mode = 'interactive'
|
|
70
|
+
|
|
71
|
+
# 解析 locator_priority
|
|
72
|
+
attr_priority = None
|
|
73
|
+
if locator_priority:
|
|
74
|
+
attr_priority = [p.strip() for p in locator_priority.split(',') if p.strip()]
|
|
75
|
+
|
|
59
76
|
page = _get_page(session)
|
|
60
77
|
|
|
61
78
|
try:
|
|
@@ -64,6 +81,7 @@ def register(cli):
|
|
|
64
81
|
with_clickables=not no_clickables,
|
|
65
82
|
include_low=include_low,
|
|
66
83
|
viewport_only=viewport_only,
|
|
84
|
+
attr_priority=attr_priority,
|
|
67
85
|
)
|
|
68
86
|
except Exception as e:
|
|
69
87
|
error('获取页面快照失败', code='SNAPSHOT_FAILED', detail=str(e))
|
|
@@ -71,13 +89,15 @@ def register(cli):
|
|
|
71
89
|
|
|
72
90
|
# 收集 ref 映射(所有模式都收集,便于后续 ref:N 引用)
|
|
73
91
|
refs = {}
|
|
92
|
+
# brief 是 interactive 的别名,统一处理
|
|
93
|
+
mode_effective = 'interactive' if mode == 'brief' else mode
|
|
74
94
|
if fmt == 'json':
|
|
75
95
|
render_a11y_text(data, refs=refs) # 触发编号分配
|
|
76
96
|
output = json.dumps({'status': 'ok', 'data': data},
|
|
77
97
|
ensure_ascii=False, indent=2)
|
|
78
|
-
elif
|
|
98
|
+
elif mode_effective == 'text':
|
|
79
99
|
output = render_a11y_plain_text(data, refs=refs)
|
|
80
|
-
elif
|
|
100
|
+
elif mode_effective == 'interactive':
|
|
81
101
|
output = render_a11y_text(data, brief=True, refs=refs)
|
|
82
102
|
else:
|
|
83
103
|
output = render_a11y_text(data, refs=refs)
|
|
@@ -102,9 +122,9 @@ def register(cli):
|
|
|
102
122
|
'使用 "all" 等价于 high,medium,low')
|
|
103
123
|
@click.option('--max', 'max_elements', default=1000, show_default=True,
|
|
104
124
|
help='最多返回多少个元素')
|
|
105
|
-
@click.option('--format', 'fmt', type=click.Choice(['text', 'json']),
|
|
125
|
+
@click.option('--format', '-f', 'fmt', type=click.Choice(['text', 'json']),
|
|
106
126
|
default='text', show_default=True, help='输出格式')
|
|
107
|
-
@click.option('--filename', default=None, help='保存到文件路径')
|
|
127
|
+
@click.option('--filename', '-o', default=None, help='保存到文件路径')
|
|
108
128
|
@click.option('--verbose', '-v', is_flag=True, default=False,
|
|
109
129
|
help='显示 detection reason 和像素尺寸(调试用)')
|
|
110
130
|
def scan(session, viewport_only, confidence, max_elements, fmt, filename, verbose):
|
|
@@ -244,9 +264,9 @@ def register(cli):
|
|
|
244
264
|
@click.argument('container')
|
|
245
265
|
@click.argument('fields_json')
|
|
246
266
|
@click.option('--limit', type=int, default=None, help='最多提取多少条记录', show_default=True)
|
|
247
|
-
@click.option('--output', 'output_fmt', type=click.Choice(['json', 'csv']),
|
|
267
|
+
@click.option('--output', '-o', 'output_fmt', type=click.Choice(['json', 'csv']),
|
|
248
268
|
default='json', show_default=True, help='输出格式')
|
|
249
|
-
@click.option('--filename', default=None, help='保存结果到文件')
|
|
269
|
+
@click.option('--filename', '-f', default=None, help='保存结果到文件')
|
|
250
270
|
def cmd_extract(session, container, fields_json, limit, output_fmt, filename):
|
|
251
271
|
"""批量提取结构化数据(列表页核心工具)。
|
|
252
272
|
|
|
@@ -305,7 +325,7 @@ def register(cli):
|
|
|
305
325
|
@click.option('--fields', default='text,loc', show_default=True,
|
|
306
326
|
help='提取字段,逗号分隔')
|
|
307
327
|
@click.option('--limit', default=None, help='最多返回多少条', show_default=True)
|
|
308
|
-
@click.option('--filename', default=None, help='保存结果到 JSON 文件')
|
|
328
|
+
@click.option('--filename', '-o', default=None, help='保存结果到 JSON 文件')
|
|
309
329
|
def cmd_query(session, selector, fields, limit, filename):
|
|
310
330
|
"""按选择器查询元素,提取内容和定位器。支持动态渲染内容。
|
|
311
331
|
|
|
@@ -457,7 +477,7 @@ def register(cli):
|
|
|
457
477
|
first_cls = cls.strip().split()[0] if cls.strip() else ''
|
|
458
478
|
if first_cls:
|
|
459
479
|
label += f'.{first_cls}'
|
|
460
|
-
loc = suggest_locator(tag, attrs, text[:50])
|
|
480
|
+
loc = suggest_locator(tag, attrs, text[:50], attr_priority=None)
|
|
461
481
|
summary = {'tag': label, 'loc': loc}
|
|
462
482
|
if text:
|
|
463
483
|
summary['text'] = text[:max_text] + ('…' if len(text) > max_text else '')
|
|
@@ -6,6 +6,7 @@ dp-cli 输出格式化模块
|
|
|
6
6
|
import json
|
|
7
7
|
import sys
|
|
8
8
|
from typing import Any, Optional
|
|
9
|
+
from dp_cli.snapshot.utils import suggest_locator
|
|
9
10
|
|
|
10
11
|
|
|
11
12
|
def ok(data: Any = None, msg: str = None) -> None:
|
|
@@ -15,7 +16,7 @@ def ok(data: Any = None, msg: str = None) -> None:
|
|
|
15
16
|
result['message'] = msg
|
|
16
17
|
if data is not None:
|
|
17
18
|
result['data'] = data
|
|
18
|
-
|
|
19
|
+
print(json.dumps(result, ensure_ascii=False, indent=2))
|
|
19
20
|
|
|
20
21
|
|
|
21
22
|
def error(msg: str, code: str = 'ERROR', detail: str = None) -> None:
|
|
@@ -23,12 +24,8 @@ def error(msg: str, code: str = 'ERROR', detail: str = None) -> None:
|
|
|
23
24
|
result = {'status': 'error', 'code': code, 'message': msg}
|
|
24
25
|
if detail:
|
|
25
26
|
result['detail'] = detail
|
|
26
|
-
_print(result)
|
|
27
|
-
sys.exit(1)
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
def _print(result: dict) -> None:
|
|
31
27
|
print(json.dumps(result, ensure_ascii=False, indent=2))
|
|
28
|
+
sys.exit(1)
|
|
32
29
|
|
|
33
30
|
|
|
34
31
|
def format_element(ele, include_rect: bool = False) -> dict:
|
|
@@ -42,7 +39,7 @@ def format_element(ele, include_rect: bool = False) -> dict:
|
|
|
42
39
|
'tag': ele.tag,
|
|
43
40
|
'text': (ele.raw_text or '').strip()[:200],
|
|
44
41
|
'attrs': attrs,
|
|
45
|
-
'loc':
|
|
42
|
+
'loc': suggest_locator(ele.tag, attrs, (ele.raw_text or '').strip()[:50]),
|
|
46
43
|
}
|
|
47
44
|
|
|
48
45
|
if include_rect:
|
|
@@ -58,36 +55,6 @@ def format_element(ele, include_rect: bool = False) -> dict:
|
|
|
58
55
|
return info
|
|
59
56
|
|
|
60
57
|
|
|
61
|
-
def _suggest_locator(ele, attrs: dict) -> str:
|
|
62
|
-
"""为元素生成最优 DrissionPage 定位字符串"""
|
|
63
|
-
# 优先用 id
|
|
64
|
-
if attrs.get('id'):
|
|
65
|
-
return f'#{attrs["id"]}'
|
|
66
|
-
|
|
67
|
-
# data-testid / data-qa / aria-label 等语义属性
|
|
68
|
-
for semantic in ('data-testid', 'data-qa', 'aria-label', 'name', 'placeholder'):
|
|
69
|
-
if attrs.get(semantic):
|
|
70
|
-
return f'@{semantic}={attrs[semantic]}'
|
|
71
|
-
|
|
72
|
-
# 有唯一 class
|
|
73
|
-
cls = attrs.get('class', '')
|
|
74
|
-
if cls:
|
|
75
|
-
classes = cls.strip().split()
|
|
76
|
-
if classes:
|
|
77
|
-
return f'.{classes[0]}'
|
|
78
|
-
|
|
79
|
-
# 按文本
|
|
80
|
-
try:
|
|
81
|
-
txt = (ele.raw_text or '').strip()
|
|
82
|
-
if txt and len(txt) <= 30:
|
|
83
|
-
return f'text:{txt}'
|
|
84
|
-
except Exception:
|
|
85
|
-
pass
|
|
86
|
-
|
|
87
|
-
# 最后按 tag
|
|
88
|
-
return f't:{ele.tag}'
|
|
89
|
-
|
|
90
|
-
|
|
91
58
|
def format_page_info(page) -> dict:
|
|
92
59
|
"""格式化页面基本信息"""
|
|
93
60
|
return {
|