dp-cli 0.3.2__tar.gz → 0.5.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. {dp_cli-0.3.2 → dp_cli-0.5.0}/PKG-INFO +50 -2
  2. {dp_cli-0.3.2 → dp_cli-0.5.0}/README.md +49 -1
  3. {dp_cli-0.3.2 → dp_cli-0.5.0}/dp_cli/commands/__init__.py +2 -2
  4. {dp_cli-0.3.2 → dp_cli-0.5.0}/dp_cli/commands/_utils.py +63 -11
  5. {dp_cli-0.3.2 → dp_cli-0.5.0}/dp_cli/commands/browser.py +1 -1
  6. dp_cli-0.5.0/dp_cli/commands/keyboard.py +405 -0
  7. dp_cli-0.5.0/dp_cli/commands/record.py +204 -0
  8. {dp_cli-0.3.2 → dp_cli-0.5.0}/dp_cli/commands/snapshot_cmd.py +1 -1
  9. {dp_cli-0.3.2 → dp_cli-0.5.0}/dp_cli/commands/tab.py +6 -0
  10. dp_cli-0.5.0/dp_cli/locators/__init__.py +9 -0
  11. dp_cli-0.5.0/dp_cli/locators/playwright.py +236 -0
  12. dp_cli-0.5.0/dp_cli/locators/pw_js.py +395 -0
  13. dp_cli-0.5.0/dp_cli/recorder.py +799 -0
  14. {dp_cli-0.3.2 → dp_cli-0.5.0}/dp_cli.egg-info/PKG-INFO +50 -2
  15. {dp_cli-0.3.2 → dp_cli-0.5.0}/dp_cli.egg-info/SOURCES.txt +6 -0
  16. {dp_cli-0.3.2 → dp_cli-0.5.0}/pyproject.toml +1 -1
  17. dp_cli-0.5.0/tests/test_pw_locator.py +310 -0
  18. dp_cli-0.3.2/dp_cli/commands/keyboard.py +0 -225
  19. {dp_cli-0.3.2 → dp_cli-0.5.0}/dp_cli/__init__.py +0 -0
  20. {dp_cli-0.3.2 → dp_cli-0.5.0}/dp_cli/bridge.py +0 -0
  21. {dp_cli-0.3.2 → dp_cli-0.5.0}/dp_cli/bridge_manager.py +0 -0
  22. {dp_cli-0.3.2 → dp_cli-0.5.0}/dp_cli/commands/element.py +0 -0
  23. {dp_cli-0.3.2 → dp_cli-0.5.0}/dp_cli/commands/misc.py +0 -0
  24. {dp_cli-0.3.2 → dp_cli-0.5.0}/dp_cli/commands/network.py +0 -0
  25. {dp_cli-0.3.2 → dp_cli-0.5.0}/dp_cli/commands/page.py +0 -0
  26. {dp_cli-0.3.2 → dp_cli-0.5.0}/dp_cli/commands/storage.py +0 -0
  27. {dp_cli-0.3.2 → dp_cli-0.5.0}/dp_cli/main.py +0 -0
  28. {dp_cli-0.3.2 → dp_cli-0.5.0}/dp_cli/output.py +0 -0
  29. {dp_cli-0.3.2 → dp_cli-0.5.0}/dp_cli/session.py +0 -0
  30. {dp_cli-0.3.2 → dp_cli-0.5.0}/dp_cli/snapshot/__init__.py +0 -0
  31. {dp_cli-0.3.2 → dp_cli-0.5.0}/dp_cli/snapshot/a11y.py +0 -0
  32. {dp_cli-0.3.2 → dp_cli-0.5.0}/dp_cli/snapshot/clickable.py +0 -0
  33. {dp_cli-0.3.2 → dp_cli-0.5.0}/dp_cli/snapshot/clickable_js.py +0 -0
  34. {dp_cli-0.3.2 → dp_cli-0.5.0}/dp_cli/snapshot/extract.py +0 -0
  35. {dp_cli-0.3.2 → dp_cli-0.5.0}/dp_cli/snapshot/js_scripts.py +0 -0
  36. {dp_cli-0.3.2 → dp_cli-0.5.0}/dp_cli/snapshot/utils.py +0 -0
  37. {dp_cli-0.3.2 → dp_cli-0.5.0}/dp_cli/stealth.py +0 -0
  38. {dp_cli-0.3.2 → dp_cli-0.5.0}/dp_cli.egg-info/dependency_links.txt +0 -0
  39. {dp_cli-0.3.2 → dp_cli-0.5.0}/dp_cli.egg-info/entry_points.txt +0 -0
  40. {dp_cli-0.3.2 → dp_cli-0.5.0}/dp_cli.egg-info/requires.txt +0 -0
  41. {dp_cli-0.3.2 → dp_cli-0.5.0}/dp_cli.egg-info/top_level.txt +0 -0
  42. {dp_cli-0.3.2 → dp_cli-0.5.0}/setup.cfg +0 -0
  43. {dp_cli-0.3.2 → dp_cli-0.5.0}/tests/test_bridge_integration.py +0 -0
  44. {dp_cli-0.3.2 → dp_cli-0.5.0}/tests/test_bridge_manager.py +0 -0
  45. {dp_cli-0.3.2 → dp_cli-0.5.0}/tests/test_clickable.py +0 -0
  46. {dp_cli-0.3.2 → dp_cli-0.5.0}/tests/test_resolve_locator.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dp-cli
3
- Version: 0.3.2
3
+ Version: 0.5.0
4
4
  Summary: A powerful CLI for DrissionPage — browser automation, structured data extraction, network listening and more.
5
5
  License: BSD-3-Clause
6
6
  Project-URL: Homepage, https://github.com/mofanx/dp-cli
@@ -31,7 +31,8 @@ A powerful CLI for [DrissionPage](https://github.com/g1879/DrissionPage) — bro
31
31
  and custom menu items the a11y tree misses; every element gets an `[N]` ref with
32
32
  confidence markers (`⚡` medium, `?` low)
33
33
  - **`dp scan`** — fast Vimium-style listing of interactive elements (viewport-only mode available)
34
- - **Powerful locator syntax** — descriptive strings stable across navigation
34
+ - **Powerful locator syntax** — descriptive strings stable across navigation, plus
35
+ Playwright-style `pw:role=button[name="Submit"] >> nth=2` chains
35
36
  - **Structured data extraction** — `extract` + `query` + `snapshot` for scraping list pages
36
37
  - **Network listening** — capture XHR/Fetch requests and response bodies
37
38
  - **Stealth patches** — `dp stealth` bypasses common automation detections
@@ -150,6 +151,53 @@ dp scan --confidence high # only the sure-thing clickables
150
151
  Both `snapshot` and `scan` share the same `[N]` ref numbering per session, so
151
152
  `dp click "ref:N"` works regardless of which one produced the snapshot.
152
153
 
154
+ ## Playwright-style locators (`pw:` prefix)
155
+
156
+ Need semantic, role-based targeting on a fresh page (no snapshot required)?
157
+ Use the `pw:` prefix. Syntax mirrors Playwright, and chains with `>>`:
158
+
159
+ ```bash
160
+ # By ARIA role (with accessible name — exact / substring / regex)
161
+ dp click 'pw:role=button[name="Submit"]'
162
+ dp click 'pw:role=button[name=/^Sign/i]'
163
+ dp click 'pw:role=link[name=More]' # substring
164
+
165
+ # By visible text (exact / substring / regex)
166
+ dp click 'pw:text="Login"' # exact
167
+ dp click 'pw:text=Login' # substring (case-insensitive)
168
+ dp click 'pw:text=/^log/i' # regex
169
+
170
+ # By form affordances
171
+ dp fill 'pw:placeholder=Search…' "chatgpt"
172
+ dp fill 'pw:label="Email"' "a@b.com"
173
+ dp click 'pw:alt="Logo"'
174
+ dp click 'pw:title="Close"'
175
+ dp click 'pw:testid=submit-btn' # data-testid / data-test-id / data-test
176
+
177
+ # Chain with >> (each step narrows the scope)
178
+ dp click 'pw:css=.sidebar >> role=listitem[name="Chat"] >> nth=2'
179
+ dp click 'pw:css=li >> has-text="Python"'
180
+ dp click 'pw:role=list >> nth=-1' # negative index = from end
181
+
182
+ # Raw css/xpath chunks mix freely
183
+ dp click 'pw:xpath=//nav >> role=link[name=Docs]'
184
+ ```
185
+
186
+ **Matchers**: `role` / `text` / `label` / `placeholder` / `alt` / `title` /
187
+ `testid` / `css` / `xpath` / `nth` / `has-text` / `visible`
188
+
189
+ **Value forms**: `bare` = substring, `"quoted"` = exact, `/pattern/flags` = regex
190
+
191
+ **Visibility**: `role` / `text` / `has-text` automatically skip elements hidden
192
+ via `display:none`, `visibility:hidden`, `hidden` attribute, or
193
+ `aria-hidden="true"` (matches Playwright semantics).
194
+
195
+ **Shadow DOM**: open shadow roots are traversed automatically.
196
+
197
+ Under the hood the matcher chain is evaluated in-page as JS, the target element
198
+ is tagged with a one-shot `data-dp-ref` attribute, and DrissionPage resolves it
199
+ by that attribute — bypassing stale classes / CSS Modules / dynamic XPath.
200
+
153
201
  ## Anti-Detection (stealth)
154
202
 
155
203
  Bypass `navigator.webdriver`, `HeadlessChrome` UA, empty `plugins`, SwiftShader WebGL,
@@ -10,7 +10,8 @@ A powerful CLI for [DrissionPage](https://github.com/g1879/DrissionPage) — bro
10
10
  and custom menu items the a11y tree misses; every element gets an `[N]` ref with
11
11
  confidence markers (`⚡` medium, `?` low)
12
12
  - **`dp scan`** — fast Vimium-style listing of interactive elements (viewport-only mode available)
13
- - **Powerful locator syntax** — descriptive strings stable across navigation
13
+ - **Powerful locator syntax** — descriptive strings stable across navigation, plus
14
+ Playwright-style `pw:role=button[name="Submit"] >> nth=2` chains
14
15
  - **Structured data extraction** — `extract` + `query` + `snapshot` for scraping list pages
15
16
  - **Network listening** — capture XHR/Fetch requests and response bodies
16
17
  - **Stealth patches** — `dp stealth` bypasses common automation detections
@@ -129,6 +130,53 @@ dp scan --confidence high # only the sure-thing clickables
129
130
  Both `snapshot` and `scan` share the same `[N]` ref numbering per session, so
130
131
  `dp click "ref:N"` works regardless of which one produced the snapshot.
131
132
 
133
+ ## Playwright-style locators (`pw:` prefix)
134
+
135
+ Need semantic, role-based targeting on a fresh page (no snapshot required)?
136
+ Use the `pw:` prefix. Syntax mirrors Playwright, and chains with `>>`:
137
+
138
+ ```bash
139
+ # By ARIA role (with accessible name — exact / substring / regex)
140
+ dp click 'pw:role=button[name="Submit"]'
141
+ dp click 'pw:role=button[name=/^Sign/i]'
142
+ dp click 'pw:role=link[name=More]' # substring
143
+
144
+ # By visible text (exact / substring / regex)
145
+ dp click 'pw:text="Login"' # exact
146
+ dp click 'pw:text=Login' # substring (case-insensitive)
147
+ dp click 'pw:text=/^log/i' # regex
148
+
149
+ # By form affordances
150
+ dp fill 'pw:placeholder=Search…' "chatgpt"
151
+ dp fill 'pw:label="Email"' "a@b.com"
152
+ dp click 'pw:alt="Logo"'
153
+ dp click 'pw:title="Close"'
154
+ dp click 'pw:testid=submit-btn' # data-testid / data-test-id / data-test
155
+
156
+ # Chain with >> (each step narrows the scope)
157
+ dp click 'pw:css=.sidebar >> role=listitem[name="Chat"] >> nth=2'
158
+ dp click 'pw:css=li >> has-text="Python"'
159
+ dp click 'pw:role=list >> nth=-1' # negative index = from end
160
+
161
+ # Raw css/xpath chunks mix freely
162
+ dp click 'pw:xpath=//nav >> role=link[name=Docs]'
163
+ ```
164
+
165
+ **Matchers**: `role` / `text` / `label` / `placeholder` / `alt` / `title` /
166
+ `testid` / `css` / `xpath` / `nth` / `has-text` / `visible`
167
+
168
+ **Value forms**: `bare` = substring, `"quoted"` = exact, `/pattern/flags` = regex
169
+
170
+ **Visibility**: `role` / `text` / `has-text` automatically skip elements hidden
171
+ via `display:none`, `visibility:hidden`, `hidden` attribute, or
172
+ `aria-hidden="true"` (matches Playwright semantics).
173
+
174
+ **Shadow DOM**: open shadow roots are traversed automatically.
175
+
176
+ Under the hood the matcher chain is evaluated in-page as JS, the target element
177
+ is tagged with a one-shot `data-dp-ref` attribute, and DrissionPage resolves it
178
+ by that attribute — bypassing stale classes / CSS Modules / dynamic XPath.
179
+
132
180
  ## Anti-Detection (stealth)
133
181
 
134
182
  Bypass `navigator.webdriver`, `HeadlessChrome` UA, empty `plugins`, SwiftShader WebGL,
@@ -1,10 +1,10 @@
1
1
  # -*- coding:utf-8 -*-
2
2
  from dp_cli.commands import (
3
3
  browser, snapshot_cmd, element, keyboard,
4
- page, tab, storage, network, misc,
4
+ page, tab, storage, network, record, misc,
5
5
  )
6
6
 
7
- _MODULES = [browser, snapshot_cmd, element, keyboard, page, tab, storage, network, misc]
7
+ _MODULES = [browser, snapshot_cmd, element, keyboard, page, tab, storage, network, record, misc]
8
8
 
9
9
 
10
10
  def register_all(cli):
@@ -25,11 +25,12 @@ def session_option(f):
25
25
  help='会话名称,默认 default', show_default=True)(f)
26
26
 
27
27
 
28
- def _get_page(session: str, raw: bool = False):
28
+ def _get_page(session: str, raw: bool = False, inject_recording: bool = True):
29
29
  """获取页面对象,失败则 error 退出。
30
30
 
31
31
  :param raw: True 时始终返回 ChromiumPage(用于浏览器级操作如标签页管理)。
32
- False 时返回绑定的标签页 ChromiumTab(如有),否则返回 ChromiumPage。
32
+ False 时返回绑定的标签页 ChromiumTab(如有),否则返回当前激活标签页。
33
+ :param inject_recording: session 录制中时是否自动注入录制器。
33
34
  """
34
35
  try:
35
36
  page = get_browser(session)
@@ -53,7 +54,10 @@ def _get_page(session: str, raw: bool = False):
53
54
  save_session(session, sess)
54
55
  target = page
55
56
  else:
56
- target = page
57
+ try:
58
+ target = page.latest_tab or page
59
+ except Exception:
60
+ target = page
57
61
 
58
62
  # 自动重新应用 stealth:CDP init_js 绑定到 CDP session,每个 dp 命令是独立
59
63
  # Python 进程/独立 session,必须重新注册才能让下一次 navigation 生效。
@@ -71,11 +75,18 @@ def _get_page(session: str, raw: bool = False):
71
75
  except Exception:
72
76
  pass # 不能让 stealth 失败阻塞常规命令
73
77
 
78
+ if inject_recording and sess.get('recording'):
79
+ try:
80
+ from dp_cli.recorder import inject_recorder
81
+ inject_recorder(target)
82
+ except Exception:
83
+ pass
84
+
74
85
  return target
75
86
 
76
87
 
77
88
  _KNOWN_PREFIX = re.compile(
78
- r'^(css[:=]|xpath[:=]|text[:=^$]|tag[:=^$]|@@?[\w]|ref:)', re.IGNORECASE)
89
+ r'^(css[:=]|xpath[:=]|text[:=^$]|tag[:=^$]|@@?[\w]|ref:|pw:)', re.IGNORECASE)
79
90
  _CSS_ID_CLASS = re.compile(r'^[#.][\w-]') # #id .class
80
91
  _CSS_TAG_SEL = re.compile(r'^[\w-]+[.#\[][\w-]') # div.class a[href] h1#title
81
92
  _CSS_COMBINATOR = re.compile(r'[\[>+~]|::|:(?:nth|first|last|not|has)') # [attr] > + ~ ::pseudo :nth-child
@@ -145,17 +156,58 @@ def _mark_element_by_backend_id(page, backend_node_id: int) -> str:
145
156
  return marker
146
157
 
147
158
 
148
- def resolve_locator(locator: str, session: str = 'default', page=None) -> str:
149
- """解析定位器:ref:N 展开 + 智能前缀补全。
159
+ def _resolve_pw(expr: str, session: str, page) -> str:
160
+ """解析 pw: 表达式 在页面上打标 → 返回 @data-dp-ref=<marker>。
161
+
162
+ 失败(语法错 / 未匹配 / 浏览器不可用)会调用 error 并退出。
163
+ """
164
+ from dp_cli.locators import parse_pw, build_pw_js, PwParseError
165
+ try:
166
+ matchers = parse_pw(expr)
167
+ except PwParseError as e:
168
+ error(f'pw 定位器语法错误: {e}', code='PW_SYNTAX')
169
+ raise SystemExit(1)
150
170
 
151
- 如果 locator 'ref:' 开头,从 session 的 refs 映射中查找。
152
- - 有 backendNodeId 时:通过 CDP 现场打临时属性,返回 @data-dp-ref=<marker>
153
- (最鲁棒,绕开 CSS Modules / 动态 class / xpath 变化)
154
- - 无 backendNodeId 或打标失败时:回落到保存的 locator 字符串
155
- - 再失败,用 name 作 text 定位器
171
+ if page is None:
172
+ try:
173
+ page = _get_page(session)
174
+ except SystemExit:
175
+ raise
176
+ except Exception as e:
177
+ error('无法连接浏览器会话', code='SESSION_NOT_FOUND', detail=str(e))
178
+ raise SystemExit(1)
179
+
180
+ js = build_pw_js(matchers)
181
+ try:
182
+ marker = page.run_js(js)
183
+ except Exception as e:
184
+ error(f'pw 定位器求值失败', code='PW_EVAL_FAILED', detail=str(e))
185
+ raise SystemExit(1)
186
+
187
+ if not marker:
188
+ error(f'pw 定位器未匹配到元素: pw:{expr}', code='PW_NOT_FOUND')
189
+ raise SystemExit(1)
190
+
191
+ return f'@data-dp-ref={marker}'
192
+
193
+
194
+ def resolve_locator(locator: str, session: str = 'default', page=None) -> str:
195
+ """解析定位器:ref:N 展开 + pw: 表达式求值 + 智能前缀补全。
196
+
197
+ - pw:<expr>:Playwright 风格(role/text/label/placeholder/alt/title/
198
+ testid/css/xpath/nth/has-text/visible),支持 >> 链式。通过 JS 求值
199
+ + 打标,返回 @data-dp-ref=<marker>。
200
+ - ref:N:从 session 的 refs 映射中查找。
201
+ · 有 backendNodeId 时:通过 CDP 现场打临时属性,返回 @data-dp-ref=
202
+ (最鲁棒,绕开 CSS Modules / 动态 class / xpath 变化)
203
+ · 无 backendNodeId 或打标失败时:回落到保存的 locator 字符串
204
+ · 再失败,用 name 作 text 定位器
205
+ - 其它:智能补全 css:/xpath: 前缀。
156
206
 
157
207
  :param page: 可选,传入避免内部再调用 _get_page;为 None 时按需懒加载。
158
208
  """
209
+ if locator.startswith('pw:'):
210
+ return _resolve_pw(locator[3:], session, page)
159
211
  if not locator.startswith('ref:'):
160
212
  return normalize_locator(locator)
161
213
 
@@ -23,7 +23,7 @@ def register(cli):
23
23
  @click.option('--profile', 'user_data_dir', default=None, help='用户数据目录')
24
24
  @click.option('--proxy', default=None, help='代理服务器,如 http://127.0.0.1:7890')
25
25
  @click.option('--port', type=int, default=None, help='连接指定端口的已有浏览器实例')
26
- @click.option('--auto-connect', is_flag=True,
26
+ @click.option('--auto-connect', '-a', is_flag=True,
27
27
  help='从用户常规启动的 Chrome 读取 DevToolsActivePort 自动发现端口'
28
28
  '(需 Chrome 144+,用户在 chrome://inspect/#remote-debugging 启用)')
29
29
  @click.option('--channel', type=click.Choice(['stable', 'beta', 'dev', 'canary', 'chromium']),