dp-cli 0.3.2__tar.gz → 0.5.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dp_cli-0.3.2 → dp_cli-0.5.0}/PKG-INFO +50 -2
- {dp_cli-0.3.2 → dp_cli-0.5.0}/README.md +49 -1
- {dp_cli-0.3.2 → dp_cli-0.5.0}/dp_cli/commands/__init__.py +2 -2
- {dp_cli-0.3.2 → dp_cli-0.5.0}/dp_cli/commands/_utils.py +63 -11
- {dp_cli-0.3.2 → dp_cli-0.5.0}/dp_cli/commands/browser.py +1 -1
- dp_cli-0.5.0/dp_cli/commands/keyboard.py +405 -0
- dp_cli-0.5.0/dp_cli/commands/record.py +204 -0
- {dp_cli-0.3.2 → dp_cli-0.5.0}/dp_cli/commands/snapshot_cmd.py +1 -1
- {dp_cli-0.3.2 → dp_cli-0.5.0}/dp_cli/commands/tab.py +6 -0
- dp_cli-0.5.0/dp_cli/locators/__init__.py +9 -0
- dp_cli-0.5.0/dp_cli/locators/playwright.py +236 -0
- dp_cli-0.5.0/dp_cli/locators/pw_js.py +395 -0
- dp_cli-0.5.0/dp_cli/recorder.py +799 -0
- {dp_cli-0.3.2 → dp_cli-0.5.0}/dp_cli.egg-info/PKG-INFO +50 -2
- {dp_cli-0.3.2 → dp_cli-0.5.0}/dp_cli.egg-info/SOURCES.txt +6 -0
- {dp_cli-0.3.2 → dp_cli-0.5.0}/pyproject.toml +1 -1
- dp_cli-0.5.0/tests/test_pw_locator.py +310 -0
- dp_cli-0.3.2/dp_cli/commands/keyboard.py +0 -225
- {dp_cli-0.3.2 → dp_cli-0.5.0}/dp_cli/__init__.py +0 -0
- {dp_cli-0.3.2 → dp_cli-0.5.0}/dp_cli/bridge.py +0 -0
- {dp_cli-0.3.2 → dp_cli-0.5.0}/dp_cli/bridge_manager.py +0 -0
- {dp_cli-0.3.2 → dp_cli-0.5.0}/dp_cli/commands/element.py +0 -0
- {dp_cli-0.3.2 → dp_cli-0.5.0}/dp_cli/commands/misc.py +0 -0
- {dp_cli-0.3.2 → dp_cli-0.5.0}/dp_cli/commands/network.py +0 -0
- {dp_cli-0.3.2 → dp_cli-0.5.0}/dp_cli/commands/page.py +0 -0
- {dp_cli-0.3.2 → dp_cli-0.5.0}/dp_cli/commands/storage.py +0 -0
- {dp_cli-0.3.2 → dp_cli-0.5.0}/dp_cli/main.py +0 -0
- {dp_cli-0.3.2 → dp_cli-0.5.0}/dp_cli/output.py +0 -0
- {dp_cli-0.3.2 → dp_cli-0.5.0}/dp_cli/session.py +0 -0
- {dp_cli-0.3.2 → dp_cli-0.5.0}/dp_cli/snapshot/__init__.py +0 -0
- {dp_cli-0.3.2 → dp_cli-0.5.0}/dp_cli/snapshot/a11y.py +0 -0
- {dp_cli-0.3.2 → dp_cli-0.5.0}/dp_cli/snapshot/clickable.py +0 -0
- {dp_cli-0.3.2 → dp_cli-0.5.0}/dp_cli/snapshot/clickable_js.py +0 -0
- {dp_cli-0.3.2 → dp_cli-0.5.0}/dp_cli/snapshot/extract.py +0 -0
- {dp_cli-0.3.2 → dp_cli-0.5.0}/dp_cli/snapshot/js_scripts.py +0 -0
- {dp_cli-0.3.2 → dp_cli-0.5.0}/dp_cli/snapshot/utils.py +0 -0
- {dp_cli-0.3.2 → dp_cli-0.5.0}/dp_cli/stealth.py +0 -0
- {dp_cli-0.3.2 → dp_cli-0.5.0}/dp_cli.egg-info/dependency_links.txt +0 -0
- {dp_cli-0.3.2 → dp_cli-0.5.0}/dp_cli.egg-info/entry_points.txt +0 -0
- {dp_cli-0.3.2 → dp_cli-0.5.0}/dp_cli.egg-info/requires.txt +0 -0
- {dp_cli-0.3.2 → dp_cli-0.5.0}/dp_cli.egg-info/top_level.txt +0 -0
- {dp_cli-0.3.2 → dp_cli-0.5.0}/setup.cfg +0 -0
- {dp_cli-0.3.2 → dp_cli-0.5.0}/tests/test_bridge_integration.py +0 -0
- {dp_cli-0.3.2 → dp_cli-0.5.0}/tests/test_bridge_manager.py +0 -0
- {dp_cli-0.3.2 → dp_cli-0.5.0}/tests/test_clickable.py +0 -0
- {dp_cli-0.3.2 → dp_cli-0.5.0}/tests/test_resolve_locator.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: dp-cli
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.5.0
|
|
4
4
|
Summary: A powerful CLI for DrissionPage — browser automation, structured data extraction, network listening and more.
|
|
5
5
|
License: BSD-3-Clause
|
|
6
6
|
Project-URL: Homepage, https://github.com/mofanx/dp-cli
|
|
@@ -31,7 +31,8 @@ A powerful CLI for [DrissionPage](https://github.com/g1879/DrissionPage) — bro
|
|
|
31
31
|
and custom menu items the a11y tree misses; every element gets an `[N]` ref with
|
|
32
32
|
confidence markers (`⚡` medium, `?` low)
|
|
33
33
|
- **`dp scan`** — fast Vimium-style listing of interactive elements (viewport-only mode available)
|
|
34
|
-
- **Powerful locator syntax** — descriptive strings stable across navigation
|
|
34
|
+
- **Powerful locator syntax** — descriptive strings stable across navigation, plus
|
|
35
|
+
Playwright-style `pw:role=button[name="Submit"] >> nth=2` chains
|
|
35
36
|
- **Structured data extraction** — `extract` + `query` + `snapshot` for scraping list pages
|
|
36
37
|
- **Network listening** — capture XHR/Fetch requests and response bodies
|
|
37
38
|
- **Stealth patches** — `dp stealth` bypasses common automation detections
|
|
@@ -150,6 +151,53 @@ dp scan --confidence high # only the sure-thing clickables
|
|
|
150
151
|
Both `snapshot` and `scan` share the same `[N]` ref numbering per session, so
|
|
151
152
|
`dp click "ref:N"` works regardless of which one produced the snapshot.
|
|
152
153
|
|
|
154
|
+
## Playwright-style locators (`pw:` prefix)
|
|
155
|
+
|
|
156
|
+
Need semantic, role-based targeting on a fresh page (no snapshot required)?
|
|
157
|
+
Use the `pw:` prefix. Syntax mirrors Playwright, and chains with `>>`:
|
|
158
|
+
|
|
159
|
+
```bash
|
|
160
|
+
# By ARIA role (with accessible name — exact / substring / regex)
|
|
161
|
+
dp click 'pw:role=button[name="Submit"]'
|
|
162
|
+
dp click 'pw:role=button[name=/^Sign/i]'
|
|
163
|
+
dp click 'pw:role=link[name=More]' # substring
|
|
164
|
+
|
|
165
|
+
# By visible text (exact / substring / regex)
|
|
166
|
+
dp click 'pw:text="Login"' # exact
|
|
167
|
+
dp click 'pw:text=Login' # substring (case-insensitive)
|
|
168
|
+
dp click 'pw:text=/^log/i' # regex
|
|
169
|
+
|
|
170
|
+
# By form affordances
|
|
171
|
+
dp fill 'pw:placeholder=Search…' "chatgpt"
|
|
172
|
+
dp fill 'pw:label="Email"' "a@b.com"
|
|
173
|
+
dp click 'pw:alt="Logo"'
|
|
174
|
+
dp click 'pw:title="Close"'
|
|
175
|
+
dp click 'pw:testid=submit-btn' # data-testid / data-test-id / data-test
|
|
176
|
+
|
|
177
|
+
# Chain with >> (each step narrows the scope)
|
|
178
|
+
dp click 'pw:css=.sidebar >> role=listitem[name="Chat"] >> nth=2'
|
|
179
|
+
dp click 'pw:css=li >> has-text="Python"'
|
|
180
|
+
dp click 'pw:role=list >> nth=-1' # negative index = from end
|
|
181
|
+
|
|
182
|
+
# Raw css/xpath chunks mix freely
|
|
183
|
+
dp click 'pw:xpath=//nav >> role=link[name=Docs]'
|
|
184
|
+
```
|
|
185
|
+
|
|
186
|
+
**Matchers**: `role` / `text` / `label` / `placeholder` / `alt` / `title` /
|
|
187
|
+
`testid` / `css` / `xpath` / `nth` / `has-text` / `visible`
|
|
188
|
+
|
|
189
|
+
**Value forms**: `bare` = substring, `"quoted"` = exact, `/pattern/flags` = regex
|
|
190
|
+
|
|
191
|
+
**Visibility**: `role` / `text` / `has-text` automatically skip elements hidden
|
|
192
|
+
via `display:none`, `visibility:hidden`, `hidden` attribute, or
|
|
193
|
+
`aria-hidden="true"` (matches Playwright semantics).
|
|
194
|
+
|
|
195
|
+
**Shadow DOM**: open shadow roots are traversed automatically.
|
|
196
|
+
|
|
197
|
+
Under the hood the matcher chain is evaluated in-page as JS, the target element
|
|
198
|
+
is tagged with a one-shot `data-dp-ref` attribute, and DrissionPage resolves it
|
|
199
|
+
by that attribute — bypassing stale classes / CSS Modules / dynamic XPath.
|
|
200
|
+
|
|
153
201
|
## Anti-Detection (stealth)
|
|
154
202
|
|
|
155
203
|
Bypass `navigator.webdriver`, `HeadlessChrome` UA, empty `plugins`, SwiftShader WebGL,
|
|
@@ -10,7 +10,8 @@ A powerful CLI for [DrissionPage](https://github.com/g1879/DrissionPage) — bro
|
|
|
10
10
|
and custom menu items the a11y tree misses; every element gets an `[N]` ref with
|
|
11
11
|
confidence markers (`⚡` medium, `?` low)
|
|
12
12
|
- **`dp scan`** — fast Vimium-style listing of interactive elements (viewport-only mode available)
|
|
13
|
-
- **Powerful locator syntax** — descriptive strings stable across navigation
|
|
13
|
+
- **Powerful locator syntax** — descriptive strings stable across navigation, plus
|
|
14
|
+
Playwright-style `pw:role=button[name="Submit"] >> nth=2` chains
|
|
14
15
|
- **Structured data extraction** — `extract` + `query` + `snapshot` for scraping list pages
|
|
15
16
|
- **Network listening** — capture XHR/Fetch requests and response bodies
|
|
16
17
|
- **Stealth patches** — `dp stealth` bypasses common automation detections
|
|
@@ -129,6 +130,53 @@ dp scan --confidence high # only the sure-thing clickables
|
|
|
129
130
|
Both `snapshot` and `scan` share the same `[N]` ref numbering per session, so
|
|
130
131
|
`dp click "ref:N"` works regardless of which one produced the snapshot.
|
|
131
132
|
|
|
133
|
+
## Playwright-style locators (`pw:` prefix)
|
|
134
|
+
|
|
135
|
+
Need semantic, role-based targeting on a fresh page (no snapshot required)?
|
|
136
|
+
Use the `pw:` prefix. Syntax mirrors Playwright, and chains with `>>`:
|
|
137
|
+
|
|
138
|
+
```bash
|
|
139
|
+
# By ARIA role (with accessible name — exact / substring / regex)
|
|
140
|
+
dp click 'pw:role=button[name="Submit"]'
|
|
141
|
+
dp click 'pw:role=button[name=/^Sign/i]'
|
|
142
|
+
dp click 'pw:role=link[name=More]' # substring
|
|
143
|
+
|
|
144
|
+
# By visible text (exact / substring / regex)
|
|
145
|
+
dp click 'pw:text="Login"' # exact
|
|
146
|
+
dp click 'pw:text=Login' # substring (case-insensitive)
|
|
147
|
+
dp click 'pw:text=/^log/i' # regex
|
|
148
|
+
|
|
149
|
+
# By form affordances
|
|
150
|
+
dp fill 'pw:placeholder=Search…' "chatgpt"
|
|
151
|
+
dp fill 'pw:label="Email"' "a@b.com"
|
|
152
|
+
dp click 'pw:alt="Logo"'
|
|
153
|
+
dp click 'pw:title="Close"'
|
|
154
|
+
dp click 'pw:testid=submit-btn' # data-testid / data-test-id / data-test
|
|
155
|
+
|
|
156
|
+
# Chain with >> (each step narrows the scope)
|
|
157
|
+
dp click 'pw:css=.sidebar >> role=listitem[name="Chat"] >> nth=2'
|
|
158
|
+
dp click 'pw:css=li >> has-text="Python"'
|
|
159
|
+
dp click 'pw:role=list >> nth=-1' # negative index = from end
|
|
160
|
+
|
|
161
|
+
# Raw css/xpath chunks mix freely
|
|
162
|
+
dp click 'pw:xpath=//nav >> role=link[name=Docs]'
|
|
163
|
+
```
|
|
164
|
+
|
|
165
|
+
**Matchers**: `role` / `text` / `label` / `placeholder` / `alt` / `title` /
|
|
166
|
+
`testid` / `css` / `xpath` / `nth` / `has-text` / `visible`
|
|
167
|
+
|
|
168
|
+
**Value forms**: `bare` = substring, `"quoted"` = exact, `/pattern/flags` = regex
|
|
169
|
+
|
|
170
|
+
**Visibility**: `role` / `text` / `has-text` automatically skip elements hidden
|
|
171
|
+
via `display:none`, `visibility:hidden`, `hidden` attribute, or
|
|
172
|
+
`aria-hidden="true"` (matches Playwright semantics).
|
|
173
|
+
|
|
174
|
+
**Shadow DOM**: open shadow roots are traversed automatically.
|
|
175
|
+
|
|
176
|
+
Under the hood the matcher chain is evaluated in-page as JS, the target element
|
|
177
|
+
is tagged with a one-shot `data-dp-ref` attribute, and DrissionPage resolves it
|
|
178
|
+
by that attribute — bypassing stale classes / CSS Modules / dynamic XPath.
|
|
179
|
+
|
|
132
180
|
## Anti-Detection (stealth)
|
|
133
181
|
|
|
134
182
|
Bypass `navigator.webdriver`, `HeadlessChrome` UA, empty `plugins`, SwiftShader WebGL,
|
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
# -*- coding:utf-8 -*-
|
|
2
2
|
from dp_cli.commands import (
|
|
3
3
|
browser, snapshot_cmd, element, keyboard,
|
|
4
|
-
page, tab, storage, network, misc,
|
|
4
|
+
page, tab, storage, network, record, misc,
|
|
5
5
|
)
|
|
6
6
|
|
|
7
|
-
_MODULES = [browser, snapshot_cmd, element, keyboard, page, tab, storage, network, misc]
|
|
7
|
+
_MODULES = [browser, snapshot_cmd, element, keyboard, page, tab, storage, network, record, misc]
|
|
8
8
|
|
|
9
9
|
|
|
10
10
|
def register_all(cli):
|
|
@@ -25,11 +25,12 @@ def session_option(f):
|
|
|
25
25
|
help='会话名称,默认 default', show_default=True)(f)
|
|
26
26
|
|
|
27
27
|
|
|
28
|
-
def _get_page(session: str, raw: bool = False):
|
|
28
|
+
def _get_page(session: str, raw: bool = False, inject_recording: bool = True):
|
|
29
29
|
"""获取页面对象,失败则 error 退出。
|
|
30
30
|
|
|
31
31
|
:param raw: True 时始终返回 ChromiumPage(用于浏览器级操作如标签页管理)。
|
|
32
|
-
False 时返回绑定的标签页 ChromiumTab
|
|
32
|
+
False 时返回绑定的标签页 ChromiumTab(如有),否则返回当前激活标签页。
|
|
33
|
+
:param inject_recording: session 录制中时是否自动注入录制器。
|
|
33
34
|
"""
|
|
34
35
|
try:
|
|
35
36
|
page = get_browser(session)
|
|
@@ -53,7 +54,10 @@ def _get_page(session: str, raw: bool = False):
|
|
|
53
54
|
save_session(session, sess)
|
|
54
55
|
target = page
|
|
55
56
|
else:
|
|
56
|
-
|
|
57
|
+
try:
|
|
58
|
+
target = page.latest_tab or page
|
|
59
|
+
except Exception:
|
|
60
|
+
target = page
|
|
57
61
|
|
|
58
62
|
# 自动重新应用 stealth:CDP init_js 绑定到 CDP session,每个 dp 命令是独立
|
|
59
63
|
# Python 进程/独立 session,必须重新注册才能让下一次 navigation 生效。
|
|
@@ -71,11 +75,18 @@ def _get_page(session: str, raw: bool = False):
|
|
|
71
75
|
except Exception:
|
|
72
76
|
pass # 不能让 stealth 失败阻塞常规命令
|
|
73
77
|
|
|
78
|
+
if inject_recording and sess.get('recording'):
|
|
79
|
+
try:
|
|
80
|
+
from dp_cli.recorder import inject_recorder
|
|
81
|
+
inject_recorder(target)
|
|
82
|
+
except Exception:
|
|
83
|
+
pass
|
|
84
|
+
|
|
74
85
|
return target
|
|
75
86
|
|
|
76
87
|
|
|
77
88
|
_KNOWN_PREFIX = re.compile(
|
|
78
|
-
r'^(css[:=]|xpath[:=]|text[:=^$]|tag[:=^$]|@@?[\w]|ref:)', re.IGNORECASE)
|
|
89
|
+
r'^(css[:=]|xpath[:=]|text[:=^$]|tag[:=^$]|@@?[\w]|ref:|pw:)', re.IGNORECASE)
|
|
79
90
|
_CSS_ID_CLASS = re.compile(r'^[#.][\w-]') # #id .class
|
|
80
91
|
_CSS_TAG_SEL = re.compile(r'^[\w-]+[.#\[][\w-]') # div.class a[href] h1#title
|
|
81
92
|
_CSS_COMBINATOR = re.compile(r'[\[>+~]|::|:(?:nth|first|last|not|has)') # [attr] > + ~ ::pseudo :nth-child
|
|
@@ -145,17 +156,58 @@ def _mark_element_by_backend_id(page, backend_node_id: int) -> str:
|
|
|
145
156
|
return marker
|
|
146
157
|
|
|
147
158
|
|
|
148
|
-
def
|
|
149
|
-
"""
|
|
159
|
+
def _resolve_pw(expr: str, session: str, page) -> str:
|
|
160
|
+
"""解析 pw: 表达式 → 在页面上打标 → 返回 @data-dp-ref=<marker>。
|
|
161
|
+
|
|
162
|
+
失败(语法错 / 未匹配 / 浏览器不可用)会调用 error 并退出。
|
|
163
|
+
"""
|
|
164
|
+
from dp_cli.locators import parse_pw, build_pw_js, PwParseError
|
|
165
|
+
try:
|
|
166
|
+
matchers = parse_pw(expr)
|
|
167
|
+
except PwParseError as e:
|
|
168
|
+
error(f'pw 定位器语法错误: {e}', code='PW_SYNTAX')
|
|
169
|
+
raise SystemExit(1)
|
|
150
170
|
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
171
|
+
if page is None:
|
|
172
|
+
try:
|
|
173
|
+
page = _get_page(session)
|
|
174
|
+
except SystemExit:
|
|
175
|
+
raise
|
|
176
|
+
except Exception as e:
|
|
177
|
+
error('无法连接浏览器会话', code='SESSION_NOT_FOUND', detail=str(e))
|
|
178
|
+
raise SystemExit(1)
|
|
179
|
+
|
|
180
|
+
js = build_pw_js(matchers)
|
|
181
|
+
try:
|
|
182
|
+
marker = page.run_js(js)
|
|
183
|
+
except Exception as e:
|
|
184
|
+
error(f'pw 定位器求值失败', code='PW_EVAL_FAILED', detail=str(e))
|
|
185
|
+
raise SystemExit(1)
|
|
186
|
+
|
|
187
|
+
if not marker:
|
|
188
|
+
error(f'pw 定位器未匹配到元素: pw:{expr}', code='PW_NOT_FOUND')
|
|
189
|
+
raise SystemExit(1)
|
|
190
|
+
|
|
191
|
+
return f'@data-dp-ref={marker}'
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
def resolve_locator(locator: str, session: str = 'default', page=None) -> str:
|
|
195
|
+
"""解析定位器:ref:N 展开 + pw: 表达式求值 + 智能前缀补全。
|
|
196
|
+
|
|
197
|
+
- pw:<expr>:Playwright 风格(role/text/label/placeholder/alt/title/
|
|
198
|
+
testid/css/xpath/nth/has-text/visible),支持 >> 链式。通过 JS 求值
|
|
199
|
+
+ 打标,返回 @data-dp-ref=<marker>。
|
|
200
|
+
- ref:N:从 session 的 refs 映射中查找。
|
|
201
|
+
· 有 backendNodeId 时:通过 CDP 现场打临时属性,返回 @data-dp-ref=
|
|
202
|
+
(最鲁棒,绕开 CSS Modules / 动态 class / xpath 变化)
|
|
203
|
+
· 无 backendNodeId 或打标失败时:回落到保存的 locator 字符串
|
|
204
|
+
· 再失败,用 name 作 text 定位器
|
|
205
|
+
- 其它:智能补全 css:/xpath: 前缀。
|
|
156
206
|
|
|
157
207
|
:param page: 可选,传入避免内部再调用 _get_page;为 None 时按需懒加载。
|
|
158
208
|
"""
|
|
209
|
+
if locator.startswith('pw:'):
|
|
210
|
+
return _resolve_pw(locator[3:], session, page)
|
|
159
211
|
if not locator.startswith('ref:'):
|
|
160
212
|
return normalize_locator(locator)
|
|
161
213
|
|
|
@@ -23,7 +23,7 @@ def register(cli):
|
|
|
23
23
|
@click.option('--profile', 'user_data_dir', default=None, help='用户数据目录')
|
|
24
24
|
@click.option('--proxy', default=None, help='代理服务器,如 http://127.0.0.1:7890')
|
|
25
25
|
@click.option('--port', type=int, default=None, help='连接指定端口的已有浏览器实例')
|
|
26
|
-
@click.option('--auto-connect', is_flag=True,
|
|
26
|
+
@click.option('--auto-connect', '-a', is_flag=True,
|
|
27
27
|
help='从用户常规启动的 Chrome 读取 DevToolsActivePort 自动发现端口'
|
|
28
28
|
'(需 Chrome 144+,用户在 chrome://inspect/#remote-debugging 启用)')
|
|
29
29
|
@click.option('--channel', type=click.Choice(['stable', 'beta', 'dev', 'canary', 'chromium']),
|