dp-cli 0.3.2__tar.gz → 0.4.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dp_cli-0.3.2 → dp_cli-0.4.0}/PKG-INFO +50 -2
- {dp_cli-0.3.2 → dp_cli-0.4.0}/README.md +49 -1
- {dp_cli-0.3.2 → dp_cli-0.4.0}/dp_cli/commands/_utils.py +49 -8
- dp_cli-0.4.0/dp_cli/locators/__init__.py +9 -0
- dp_cli-0.4.0/dp_cli/locators/playwright.py +236 -0
- dp_cli-0.4.0/dp_cli/locators/pw_js.py +395 -0
- {dp_cli-0.3.2 → dp_cli-0.4.0}/dp_cli.egg-info/PKG-INFO +50 -2
- {dp_cli-0.3.2 → dp_cli-0.4.0}/dp_cli.egg-info/SOURCES.txt +4 -0
- {dp_cli-0.3.2 → dp_cli-0.4.0}/pyproject.toml +1 -1
- dp_cli-0.4.0/tests/test_pw_locator.py +310 -0
- {dp_cli-0.3.2 → dp_cli-0.4.0}/dp_cli/__init__.py +0 -0
- {dp_cli-0.3.2 → dp_cli-0.4.0}/dp_cli/bridge.py +0 -0
- {dp_cli-0.3.2 → dp_cli-0.4.0}/dp_cli/bridge_manager.py +0 -0
- {dp_cli-0.3.2 → dp_cli-0.4.0}/dp_cli/commands/__init__.py +0 -0
- {dp_cli-0.3.2 → dp_cli-0.4.0}/dp_cli/commands/browser.py +0 -0
- {dp_cli-0.3.2 → dp_cli-0.4.0}/dp_cli/commands/element.py +0 -0
- {dp_cli-0.3.2 → dp_cli-0.4.0}/dp_cli/commands/keyboard.py +0 -0
- {dp_cli-0.3.2 → dp_cli-0.4.0}/dp_cli/commands/misc.py +0 -0
- {dp_cli-0.3.2 → dp_cli-0.4.0}/dp_cli/commands/network.py +0 -0
- {dp_cli-0.3.2 → dp_cli-0.4.0}/dp_cli/commands/page.py +0 -0
- {dp_cli-0.3.2 → dp_cli-0.4.0}/dp_cli/commands/snapshot_cmd.py +0 -0
- {dp_cli-0.3.2 → dp_cli-0.4.0}/dp_cli/commands/storage.py +0 -0
- {dp_cli-0.3.2 → dp_cli-0.4.0}/dp_cli/commands/tab.py +0 -0
- {dp_cli-0.3.2 → dp_cli-0.4.0}/dp_cli/main.py +0 -0
- {dp_cli-0.3.2 → dp_cli-0.4.0}/dp_cli/output.py +0 -0
- {dp_cli-0.3.2 → dp_cli-0.4.0}/dp_cli/session.py +0 -0
- {dp_cli-0.3.2 → dp_cli-0.4.0}/dp_cli/snapshot/__init__.py +0 -0
- {dp_cli-0.3.2 → dp_cli-0.4.0}/dp_cli/snapshot/a11y.py +0 -0
- {dp_cli-0.3.2 → dp_cli-0.4.0}/dp_cli/snapshot/clickable.py +0 -0
- {dp_cli-0.3.2 → dp_cli-0.4.0}/dp_cli/snapshot/clickable_js.py +0 -0
- {dp_cli-0.3.2 → dp_cli-0.4.0}/dp_cli/snapshot/extract.py +0 -0
- {dp_cli-0.3.2 → dp_cli-0.4.0}/dp_cli/snapshot/js_scripts.py +0 -0
- {dp_cli-0.3.2 → dp_cli-0.4.0}/dp_cli/snapshot/utils.py +0 -0
- {dp_cli-0.3.2 → dp_cli-0.4.0}/dp_cli/stealth.py +0 -0
- {dp_cli-0.3.2 → dp_cli-0.4.0}/dp_cli.egg-info/dependency_links.txt +0 -0
- {dp_cli-0.3.2 → dp_cli-0.4.0}/dp_cli.egg-info/entry_points.txt +0 -0
- {dp_cli-0.3.2 → dp_cli-0.4.0}/dp_cli.egg-info/requires.txt +0 -0
- {dp_cli-0.3.2 → dp_cli-0.4.0}/dp_cli.egg-info/top_level.txt +0 -0
- {dp_cli-0.3.2 → dp_cli-0.4.0}/setup.cfg +0 -0
- {dp_cli-0.3.2 → dp_cli-0.4.0}/tests/test_bridge_integration.py +0 -0
- {dp_cli-0.3.2 → dp_cli-0.4.0}/tests/test_bridge_manager.py +0 -0
- {dp_cli-0.3.2 → dp_cli-0.4.0}/tests/test_clickable.py +0 -0
- {dp_cli-0.3.2 → dp_cli-0.4.0}/tests/test_resolve_locator.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: dp-cli
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.4.0
|
|
4
4
|
Summary: A powerful CLI for DrissionPage — browser automation, structured data extraction, network listening and more.
|
|
5
5
|
License: BSD-3-Clause
|
|
6
6
|
Project-URL: Homepage, https://github.com/mofanx/dp-cli
|
|
@@ -31,7 +31,8 @@ A powerful CLI for [DrissionPage](https://github.com/g1879/DrissionPage) — bro
|
|
|
31
31
|
and custom menu items the a11y tree misses; every element gets an `[N]` ref with
|
|
32
32
|
confidence markers (`⚡` medium, `?` low)
|
|
33
33
|
- **`dp scan`** — fast Vimium-style listing of interactive elements (viewport-only mode available)
|
|
34
|
-
- **Powerful locator syntax** — descriptive strings stable across navigation
|
|
34
|
+
- **Powerful locator syntax** — descriptive strings stable across navigation, plus
|
|
35
|
+
Playwright-style `pw:role=button[name="Submit"] >> nth=2` chains
|
|
35
36
|
- **Structured data extraction** — `extract` + `query` + `snapshot` for scraping list pages
|
|
36
37
|
- **Network listening** — capture XHR/Fetch requests and response bodies
|
|
37
38
|
- **Stealth patches** — `dp stealth` bypasses common automation detections
|
|
@@ -150,6 +151,53 @@ dp scan --confidence high # only the sure-thing clickables
|
|
|
150
151
|
Both `snapshot` and `scan` share the same `[N]` ref numbering per session, so
|
|
151
152
|
`dp click "ref:N"` works regardless of which one produced the snapshot.
|
|
152
153
|
|
|
154
|
+
## Playwright-style locators (`pw:` prefix)
|
|
155
|
+
|
|
156
|
+
Need semantic, role-based targeting on a fresh page (no snapshot required)?
|
|
157
|
+
Use the `pw:` prefix. Syntax mirrors Playwright, and chains with `>>`:
|
|
158
|
+
|
|
159
|
+
```bash
|
|
160
|
+
# By ARIA role (with accessible name — exact / substring / regex)
|
|
161
|
+
dp click 'pw:role=button[name="Submit"]'
|
|
162
|
+
dp click 'pw:role=button[name=/^Sign/i]'
|
|
163
|
+
dp click 'pw:role=link[name=More]' # substring
|
|
164
|
+
|
|
165
|
+
# By visible text (exact / substring / regex)
|
|
166
|
+
dp click 'pw:text="Login"' # exact
|
|
167
|
+
dp click 'pw:text=Login' # substring (case-insensitive)
|
|
168
|
+
dp click 'pw:text=/^log/i' # regex
|
|
169
|
+
|
|
170
|
+
# By form affordances
|
|
171
|
+
dp fill 'pw:placeholder=Search…' "chatgpt"
|
|
172
|
+
dp fill 'pw:label="Email"' "a@b.com"
|
|
173
|
+
dp click 'pw:alt="Logo"'
|
|
174
|
+
dp click 'pw:title="Close"'
|
|
175
|
+
dp click 'pw:testid=submit-btn' # data-testid / data-test-id / data-test
|
|
176
|
+
|
|
177
|
+
# Chain with >> (each step narrows the scope)
|
|
178
|
+
dp click 'pw:css=.sidebar >> role=listitem[name="Chat"] >> nth=2'
|
|
179
|
+
dp click 'pw:css=li >> has-text="Python"'
|
|
180
|
+
dp click 'pw:role=list >> nth=-1' # negative index = from end
|
|
181
|
+
|
|
182
|
+
# Raw css/xpath chunks mix freely
|
|
183
|
+
dp click 'pw:xpath=//nav >> role=link[name=Docs]'
|
|
184
|
+
```
|
|
185
|
+
|
|
186
|
+
**Matchers**: `role` / `text` / `label` / `placeholder` / `alt` / `title` /
|
|
187
|
+
`testid` / `css` / `xpath` / `nth` / `has-text` / `visible`
|
|
188
|
+
|
|
189
|
+
**Value forms**: `bare` = substring, `"quoted"` = exact, `/pattern/flags` = regex
|
|
190
|
+
|
|
191
|
+
**Visibility**: `role` / `text` / `has-text` automatically skip elements hidden
|
|
192
|
+
via `display:none`, `visibility:hidden`, `hidden` attribute, or
|
|
193
|
+
`aria-hidden="true"` (matches Playwright semantics).
|
|
194
|
+
|
|
195
|
+
**Shadow DOM**: open shadow roots are traversed automatically.
|
|
196
|
+
|
|
197
|
+
Under the hood the matcher chain is evaluated in-page as JS, the target element
|
|
198
|
+
is tagged with a one-shot `data-dp-ref` attribute, and DrissionPage resolves it
|
|
199
|
+
by that attribute — bypassing stale classes / CSS Modules / dynamic XPath.
|
|
200
|
+
|
|
153
201
|
## Anti-Detection (stealth)
|
|
154
202
|
|
|
155
203
|
Bypass `navigator.webdriver`, `HeadlessChrome` UA, empty `plugins`, SwiftShader WebGL,
|
|
@@ -10,7 +10,8 @@ A powerful CLI for [DrissionPage](https://github.com/g1879/DrissionPage) — bro
|
|
|
10
10
|
and custom menu items the a11y tree misses; every element gets an `[N]` ref with
|
|
11
11
|
confidence markers (`⚡` medium, `?` low)
|
|
12
12
|
- **`dp scan`** — fast Vimium-style listing of interactive elements (viewport-only mode available)
|
|
13
|
-
- **Powerful locator syntax** — descriptive strings stable across navigation
|
|
13
|
+
- **Powerful locator syntax** — descriptive strings stable across navigation, plus
|
|
14
|
+
Playwright-style `pw:role=button[name="Submit"] >> nth=2` chains
|
|
14
15
|
- **Structured data extraction** — `extract` + `query` + `snapshot` for scraping list pages
|
|
15
16
|
- **Network listening** — capture XHR/Fetch requests and response bodies
|
|
16
17
|
- **Stealth patches** — `dp stealth` bypasses common automation detections
|
|
@@ -129,6 +130,53 @@ dp scan --confidence high # only the sure-thing clickables
|
|
|
129
130
|
Both `snapshot` and `scan` share the same `[N]` ref numbering per session, so
|
|
130
131
|
`dp click "ref:N"` works regardless of which one produced the snapshot.
|
|
131
132
|
|
|
133
|
+
## Playwright-style locators (`pw:` prefix)
|
|
134
|
+
|
|
135
|
+
Need semantic, role-based targeting on a fresh page (no snapshot required)?
|
|
136
|
+
Use the `pw:` prefix. Syntax mirrors Playwright, and chains with `>>`:
|
|
137
|
+
|
|
138
|
+
```bash
|
|
139
|
+
# By ARIA role (with accessible name — exact / substring / regex)
|
|
140
|
+
dp click 'pw:role=button[name="Submit"]'
|
|
141
|
+
dp click 'pw:role=button[name=/^Sign/i]'
|
|
142
|
+
dp click 'pw:role=link[name=More]' # substring
|
|
143
|
+
|
|
144
|
+
# By visible text (exact / substring / regex)
|
|
145
|
+
dp click 'pw:text="Login"' # exact
|
|
146
|
+
dp click 'pw:text=Login' # substring (case-insensitive)
|
|
147
|
+
dp click 'pw:text=/^log/i' # regex
|
|
148
|
+
|
|
149
|
+
# By form affordances
|
|
150
|
+
dp fill 'pw:placeholder=Search…' "chatgpt"
|
|
151
|
+
dp fill 'pw:label="Email"' "a@b.com"
|
|
152
|
+
dp click 'pw:alt="Logo"'
|
|
153
|
+
dp click 'pw:title="Close"'
|
|
154
|
+
dp click 'pw:testid=submit-btn' # data-testid / data-test-id / data-test
|
|
155
|
+
|
|
156
|
+
# Chain with >> (each step narrows the scope)
|
|
157
|
+
dp click 'pw:css=.sidebar >> role=listitem[name="Chat"] >> nth=2'
|
|
158
|
+
dp click 'pw:css=li >> has-text="Python"'
|
|
159
|
+
dp click 'pw:role=list >> nth=-1' # negative index = from end
|
|
160
|
+
|
|
161
|
+
# Raw css/xpath chunks mix freely
|
|
162
|
+
dp click 'pw:xpath=//nav >> role=link[name=Docs]'
|
|
163
|
+
```
|
|
164
|
+
|
|
165
|
+
**Matchers**: `role` / `text` / `label` / `placeholder` / `alt` / `title` /
|
|
166
|
+
`testid` / `css` / `xpath` / `nth` / `has-text` / `visible`
|
|
167
|
+
|
|
168
|
+
**Value forms**: `bare` = substring, `"quoted"` = exact, `/pattern/flags` = regex
|
|
169
|
+
|
|
170
|
+
**Visibility**: `role` / `text` / `has-text` automatically skip elements hidden
|
|
171
|
+
via `display:none`, `visibility:hidden`, `hidden` attribute, or
|
|
172
|
+
`aria-hidden="true"` (matches Playwright semantics).
|
|
173
|
+
|
|
174
|
+
**Shadow DOM**: open shadow roots are traversed automatically.
|
|
175
|
+
|
|
176
|
+
Under the hood the matcher chain is evaluated in-page as JS, the target element
|
|
177
|
+
is tagged with a one-shot `data-dp-ref` attribute, and DrissionPage resolves it
|
|
178
|
+
by that attribute — bypassing stale classes / CSS Modules / dynamic XPath.
|
|
179
|
+
|
|
132
180
|
## Anti-Detection (stealth)
|
|
133
181
|
|
|
134
182
|
Bypass `navigator.webdriver`, `HeadlessChrome` UA, empty `plugins`, SwiftShader WebGL,
|
|
@@ -75,7 +75,7 @@ def _get_page(session: str, raw: bool = False):
|
|
|
75
75
|
|
|
76
76
|
|
|
77
77
|
_KNOWN_PREFIX = re.compile(
|
|
78
|
-
r'^(css[:=]|xpath[:=]|text[:=^$]|tag[:=^$]|@@?[\w]|ref:)', re.IGNORECASE)
|
|
78
|
+
r'^(css[:=]|xpath[:=]|text[:=^$]|tag[:=^$]|@@?[\w]|ref:|pw:)', re.IGNORECASE)
|
|
79
79
|
_CSS_ID_CLASS = re.compile(r'^[#.][\w-]') # #id .class
|
|
80
80
|
_CSS_TAG_SEL = re.compile(r'^[\w-]+[.#\[][\w-]') # div.class a[href] h1#title
|
|
81
81
|
_CSS_COMBINATOR = re.compile(r'[\[>+~]|::|:(?:nth|first|last|not|has)') # [attr] > + ~ ::pseudo :nth-child
|
|
@@ -145,17 +145,58 @@ def _mark_element_by_backend_id(page, backend_node_id: int) -> str:
|
|
|
145
145
|
return marker
|
|
146
146
|
|
|
147
147
|
|
|
148
|
-
def
|
|
149
|
-
"""
|
|
148
|
+
def _resolve_pw(expr: str, session: str, page) -> str:
|
|
149
|
+
"""解析 pw: 表达式 → 在页面上打标 → 返回 @data-dp-ref=<marker>。
|
|
150
|
+
|
|
151
|
+
失败(语法错 / 未匹配 / 浏览器不可用)会调用 error 并退出。
|
|
152
|
+
"""
|
|
153
|
+
from dp_cli.locators import parse_pw, build_pw_js, PwParseError
|
|
154
|
+
try:
|
|
155
|
+
matchers = parse_pw(expr)
|
|
156
|
+
except PwParseError as e:
|
|
157
|
+
error(f'pw 定位器语法错误: {e}', code='PW_SYNTAX')
|
|
158
|
+
raise SystemExit(1)
|
|
150
159
|
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
160
|
+
if page is None:
|
|
161
|
+
try:
|
|
162
|
+
page = _get_page(session)
|
|
163
|
+
except SystemExit:
|
|
164
|
+
raise
|
|
165
|
+
except Exception as e:
|
|
166
|
+
error('无法连接浏览器会话', code='SESSION_NOT_FOUND', detail=str(e))
|
|
167
|
+
raise SystemExit(1)
|
|
168
|
+
|
|
169
|
+
js = build_pw_js(matchers)
|
|
170
|
+
try:
|
|
171
|
+
marker = page.run_js(js)
|
|
172
|
+
except Exception as e:
|
|
173
|
+
error(f'pw 定位器求值失败', code='PW_EVAL_FAILED', detail=str(e))
|
|
174
|
+
raise SystemExit(1)
|
|
175
|
+
|
|
176
|
+
if not marker:
|
|
177
|
+
error(f'pw 定位器未匹配到元素: pw:{expr}', code='PW_NOT_FOUND')
|
|
178
|
+
raise SystemExit(1)
|
|
179
|
+
|
|
180
|
+
return f'@data-dp-ref={marker}'
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
def resolve_locator(locator: str, session: str = 'default', page=None) -> str:
|
|
184
|
+
"""解析定位器:ref:N 展开 + pw: 表达式求值 + 智能前缀补全。
|
|
185
|
+
|
|
186
|
+
- pw:<expr>:Playwright 风格(role/text/label/placeholder/alt/title/
|
|
187
|
+
testid/css/xpath/nth/has-text/visible),支持 >> 链式。通过 JS 求值
|
|
188
|
+
+ 打标,返回 @data-dp-ref=<marker>。
|
|
189
|
+
- ref:N:从 session 的 refs 映射中查找。
|
|
190
|
+
· 有 backendNodeId 时:通过 CDP 现场打临时属性,返回 @data-dp-ref=
|
|
191
|
+
(最鲁棒,绕开 CSS Modules / 动态 class / xpath 变化)
|
|
192
|
+
· 无 backendNodeId 或打标失败时:回落到保存的 locator 字符串
|
|
193
|
+
· 再失败,用 name 作 text 定位器
|
|
194
|
+
- 其它:智能补全 css:/xpath: 前缀。
|
|
156
195
|
|
|
157
196
|
:param page: 可选,传入避免内部再调用 _get_page;为 None 时按需懒加载。
|
|
158
197
|
"""
|
|
198
|
+
if locator.startswith('pw:'):
|
|
199
|
+
return _resolve_pw(locator[3:], session, page)
|
|
159
200
|
if not locator.startswith('ref:'):
|
|
160
201
|
return normalize_locator(locator)
|
|
161
202
|
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
# -*- coding:utf-8 -*-
|
|
2
|
+
"""Playwright 风格定位器(pw: 前缀)。
|
|
3
|
+
|
|
4
|
+
模块入口只暴露最常用的 API:
|
|
5
|
+
- parse_pw(expr): 解析 'css=.btn >> role=button[name="OK"]' → matcher 列表
|
|
6
|
+
- build_pw_js(matchers): 把 matcher 列表转成可执行的 JS 脚本
|
|
7
|
+
"""
|
|
8
|
+
from .playwright import parse_pw, PwParseError # noqa: F401
|
|
9
|
+
from .pw_js import build_pw_js # noqa: F401
|
|
@@ -0,0 +1,236 @@
|
|
|
1
|
+
# -*- coding:utf-8 -*-
|
|
2
|
+
"""Playwright 风格定位器解析器(纯 Python,不依赖浏览器)。
|
|
3
|
+
|
|
4
|
+
输入:去掉 'pw:' 前缀的表达式字符串,例如
|
|
5
|
+
css=.sidebar >> role=listitem[name="Chat"] >> nth=2
|
|
6
|
+
role=button[name=/^Sign/i]
|
|
7
|
+
text="Login" >> has-text="今天"
|
|
8
|
+
|
|
9
|
+
输出:matcher 列表(list[dict]),交给 JS 逐段求值。
|
|
10
|
+
|
|
11
|
+
matcher dict 结构示例:
|
|
12
|
+
{'type': 'role', 'role': 'button',
|
|
13
|
+
'name': {'kind': 'exact', 'value': 'Submit'}} # 或 None
|
|
14
|
+
{'type': 'text', 'value': {'kind': 'substr', 'value': 'Login'}}
|
|
15
|
+
{'type': 'label', 'value': {'kind': 'exact', 'value': 'Email'}}
|
|
16
|
+
{'type': 'placeholder', 'value': {'kind': 'substr', 'value': 'search'}}
|
|
17
|
+
{'type': 'alt' | 'title' | 'testid', 'value': {...}}
|
|
18
|
+
{'type': 'css', 'value': '.btn'}
|
|
19
|
+
{'type': 'xpath', 'value': '//div[@id="foo"]'}
|
|
20
|
+
{'type': 'nth', 'index': 2}
|
|
21
|
+
{'type': 'has-text', 'value': {'kind': 'substr', 'value': 'Price'}}
|
|
22
|
+
{'type': 'visible', 'value': True}
|
|
23
|
+
|
|
24
|
+
值规格(value spec):
|
|
25
|
+
{'kind': 'exact', 'value': 'Submit'} # "Submit" / 'Submit'
|
|
26
|
+
{'kind': 'substr', 'value': 'Sub'} # Submit(裸值)
|
|
27
|
+
{'kind': 'regex', 'value': '^Sign', 'flags': 'i'} # /^Sign/i
|
|
28
|
+
"""
|
|
29
|
+
import re
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class PwParseError(ValueError):
|
|
33
|
+
"""pw: 表达式语法错误。"""
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
# 允许的顶层 chunk 类型(不含 >> 分段符)
|
|
37
|
+
_VALUE_TYPES = (
|
|
38
|
+
'text', 'label', 'placeholder', 'alt', 'title', 'testid', 'has-text'
|
|
39
|
+
)
|
|
40
|
+
_RAW_TYPES = ('css', 'xpath') # 保留原值,不再解析
|
|
41
|
+
_ALL_TYPES = _VALUE_TYPES + _RAW_TYPES + ('role', 'nth', 'visible')
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def parse_pw(expr: str) -> list:
|
|
45
|
+
"""把 pw: 表达式解析为 matcher 列表。
|
|
46
|
+
|
|
47
|
+
:raises PwParseError: 语法非法
|
|
48
|
+
"""
|
|
49
|
+
if not expr or not expr.strip():
|
|
50
|
+
raise PwParseError('空的 pw 表达式')
|
|
51
|
+
chunks = _split_chunks(expr)
|
|
52
|
+
if not chunks:
|
|
53
|
+
raise PwParseError(f'未找到有效的 chunk: {expr!r}')
|
|
54
|
+
return [_parse_chunk(c) for c in chunks]
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
# ─────────────────────────────────────────────────────────────────────────────
|
|
58
|
+
# chunk 切分:按 ' >> ' 分段,尊重引号和正则字面量
|
|
59
|
+
# ─────────────────────────────────────────────────────────────────────────────
|
|
60
|
+
|
|
61
|
+
def _split_chunks(expr: str) -> list:
|
|
62
|
+
"""把表达式按 >> 切成若干 chunk。
|
|
63
|
+
|
|
64
|
+
规则:
|
|
65
|
+
- 引号(' / ")内的 >> 不切
|
|
66
|
+
- 正则字面量 /.../[flags] 内的 >> 不切
|
|
67
|
+
- >> 前后可以有空格,也可以没有(但建议有)
|
|
68
|
+
"""
|
|
69
|
+
parts = []
|
|
70
|
+
buf = []
|
|
71
|
+
i = 0
|
|
72
|
+
n = len(expr)
|
|
73
|
+
in_quote = None # None | '"' | "'"
|
|
74
|
+
in_regex = False
|
|
75
|
+
while i < n:
|
|
76
|
+
c = expr[i]
|
|
77
|
+
if in_quote:
|
|
78
|
+
buf.append(c)
|
|
79
|
+
# 处理反斜杠转义
|
|
80
|
+
if c == '\\' and i + 1 < n:
|
|
81
|
+
buf.append(expr[i + 1])
|
|
82
|
+
i += 2
|
|
83
|
+
continue
|
|
84
|
+
if c == in_quote:
|
|
85
|
+
in_quote = None
|
|
86
|
+
i += 1
|
|
87
|
+
continue
|
|
88
|
+
if in_regex:
|
|
89
|
+
buf.append(c)
|
|
90
|
+
if c == '\\' and i + 1 < n:
|
|
91
|
+
buf.append(expr[i + 1])
|
|
92
|
+
i += 2
|
|
93
|
+
continue
|
|
94
|
+
if c == '/':
|
|
95
|
+
# 结束正则,继续吃 flags
|
|
96
|
+
j = i + 1
|
|
97
|
+
while j < n and expr[j].isalpha():
|
|
98
|
+
buf.append(expr[j])
|
|
99
|
+
j += 1
|
|
100
|
+
in_regex = False
|
|
101
|
+
i = j
|
|
102
|
+
continue
|
|
103
|
+
i += 1
|
|
104
|
+
continue
|
|
105
|
+
# 非引号、非正则态
|
|
106
|
+
if c in ('"', "'"):
|
|
107
|
+
in_quote = c
|
|
108
|
+
buf.append(c)
|
|
109
|
+
i += 1
|
|
110
|
+
continue
|
|
111
|
+
# 识别正则起始:=/.../ 这种,简化判断为前一个非空字符是 =
|
|
112
|
+
if c == '/':
|
|
113
|
+
# 往前看非空格字符
|
|
114
|
+
k = len(buf) - 1
|
|
115
|
+
while k >= 0 and buf[k] == ' ':
|
|
116
|
+
k -= 1
|
|
117
|
+
if k >= 0 and buf[k] == '=':
|
|
118
|
+
in_regex = True
|
|
119
|
+
buf.append(c)
|
|
120
|
+
i += 1
|
|
121
|
+
continue
|
|
122
|
+
if c == '>' and i + 1 < n and expr[i + 1] == '>':
|
|
123
|
+
parts.append(''.join(buf).strip())
|
|
124
|
+
buf = []
|
|
125
|
+
i += 2
|
|
126
|
+
continue
|
|
127
|
+
buf.append(c)
|
|
128
|
+
i += 1
|
|
129
|
+
|
|
130
|
+
if in_quote:
|
|
131
|
+
raise PwParseError(f'引号未闭合: {expr!r}')
|
|
132
|
+
if in_regex:
|
|
133
|
+
raise PwParseError(f'正则字面量未闭合: {expr!r}')
|
|
134
|
+
|
|
135
|
+
tail = ''.join(buf).strip()
|
|
136
|
+
if tail:
|
|
137
|
+
parts.append(tail)
|
|
138
|
+
return [p for p in parts if p]
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
# ─────────────────────────────────────────────────────────────────────────────
|
|
142
|
+
# 单个 chunk 解析
|
|
143
|
+
# ─────────────────────────────────────────────────────────────────────────────
|
|
144
|
+
|
|
145
|
+
_ROLE_RE = re.compile(r'^role=([a-zA-Z][\w-]*)(.*)$')
|
|
146
|
+
_ROLE_NAME_RE = re.compile(r'^\[name=(.+)\]$')
|
|
147
|
+
_NTH_RE = re.compile(r'^nth=(-?\d+)$')
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
def _parse_chunk(chunk: str) -> dict:
|
|
151
|
+
s = chunk.strip()
|
|
152
|
+
if not s:
|
|
153
|
+
raise PwParseError('空 chunk')
|
|
154
|
+
|
|
155
|
+
# visible / visible=true / visible=false
|
|
156
|
+
if s == 'visible' or s == 'visible=true':
|
|
157
|
+
return {'type': 'visible', 'value': True}
|
|
158
|
+
if s == 'visible=false':
|
|
159
|
+
return {'type': 'visible', 'value': False}
|
|
160
|
+
|
|
161
|
+
# nth=N
|
|
162
|
+
m = _NTH_RE.match(s)
|
|
163
|
+
if m:
|
|
164
|
+
return {'type': 'nth', 'index': int(m.group(1))}
|
|
165
|
+
|
|
166
|
+
# role=X 或 role=X[name=...]
|
|
167
|
+
m = _ROLE_RE.match(s)
|
|
168
|
+
if m:
|
|
169
|
+
role = m.group(1)
|
|
170
|
+
rest = m.group(2).strip()
|
|
171
|
+
if not rest:
|
|
172
|
+
return {'type': 'role', 'role': role, 'name': None}
|
|
173
|
+
nm = _ROLE_NAME_RE.match(rest)
|
|
174
|
+
if not nm:
|
|
175
|
+
raise PwParseError(
|
|
176
|
+
f'role= 后只支持 [name=...] 过滤: {chunk!r}')
|
|
177
|
+
name_spec = _parse_value(nm.group(1))
|
|
178
|
+
return {'type': 'role', 'role': role, 'name': name_spec}
|
|
179
|
+
|
|
180
|
+
# 文本类过滤:text= / label= / placeholder= / alt= / title= / testid= / has-text=
|
|
181
|
+
for t in _VALUE_TYPES:
|
|
182
|
+
prefix = t + '='
|
|
183
|
+
if s.startswith(prefix):
|
|
184
|
+
spec = _parse_value(s[len(prefix):])
|
|
185
|
+
return {'type': t, 'value': spec}
|
|
186
|
+
|
|
187
|
+
# css= / xpath=:原样保留
|
|
188
|
+
for t in _RAW_TYPES:
|
|
189
|
+
prefix = t + '='
|
|
190
|
+
if s.startswith(prefix):
|
|
191
|
+
raw = s[len(prefix):].strip()
|
|
192
|
+
if not raw:
|
|
193
|
+
raise PwParseError(f'{t}= 后面不能为空: {chunk!r}')
|
|
194
|
+
return {'type': t, 'value': raw}
|
|
195
|
+
|
|
196
|
+
raise PwParseError(
|
|
197
|
+
f'无法识别的 pw chunk: {chunk!r};'
|
|
198
|
+
f'合法类型: {", ".join(_ALL_TYPES)}')
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
# ─────────────────────────────────────────────────────────────────────────────
|
|
202
|
+
# 值规格解析(value spec)
|
|
203
|
+
# ─────────────────────────────────────────────────────────────────────────────
|
|
204
|
+
|
|
205
|
+
_REGEX_RE = re.compile(r'^/(.+)/([a-z]*)$', re.DOTALL)
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
def _parse_value(raw: str) -> dict:
|
|
209
|
+
"""解析值字符串。
|
|
210
|
+
|
|
211
|
+
规则(按优先级):
|
|
212
|
+
"..." 或 '...' → exact 精确匹配,支持 \\ 转义引号
|
|
213
|
+
/pattern/[flags] → regex 正则(Playwright 风格)
|
|
214
|
+
其它(裸值) → substr 子串匹配
|
|
215
|
+
"""
|
|
216
|
+
s = raw.strip()
|
|
217
|
+
if not s:
|
|
218
|
+
raise PwParseError('值不能为空')
|
|
219
|
+
# 引号包裹 → exact
|
|
220
|
+
if len(s) >= 2 and s[0] == s[-1] and s[0] in ('"', "'"):
|
|
221
|
+
inner = s[1:-1]
|
|
222
|
+
# 去反斜杠转义
|
|
223
|
+
inner = inner.replace('\\' + s[0], s[0]).replace('\\\\', '\\')
|
|
224
|
+
return {'kind': 'exact', 'value': inner}
|
|
225
|
+
# /re/flags → regex
|
|
226
|
+
m = _REGEX_RE.match(s)
|
|
227
|
+
if m:
|
|
228
|
+
pattern = m.group(1)
|
|
229
|
+
flags = m.group(2) or ''
|
|
230
|
+
# 校验 flags(JS 允许的)
|
|
231
|
+
for f in flags:
|
|
232
|
+
if f not in 'gimsuy':
|
|
233
|
+
raise PwParseError(f'非法的正则 flag: {f!r} in {raw!r}')
|
|
234
|
+
return {'kind': 'regex', 'value': pattern, 'flags': flags}
|
|
235
|
+
# 裸值 → substr
|
|
236
|
+
return {'kind': 'substr', 'value': s}
|