dp-cli 0.3.2__tar.gz → 0.4.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. {dp_cli-0.3.2 → dp_cli-0.4.0}/PKG-INFO +50 -2
  2. {dp_cli-0.3.2 → dp_cli-0.4.0}/README.md +49 -1
  3. {dp_cli-0.3.2 → dp_cli-0.4.0}/dp_cli/commands/_utils.py +49 -8
  4. dp_cli-0.4.0/dp_cli/locators/__init__.py +9 -0
  5. dp_cli-0.4.0/dp_cli/locators/playwright.py +236 -0
  6. dp_cli-0.4.0/dp_cli/locators/pw_js.py +395 -0
  7. {dp_cli-0.3.2 → dp_cli-0.4.0}/dp_cli.egg-info/PKG-INFO +50 -2
  8. {dp_cli-0.3.2 → dp_cli-0.4.0}/dp_cli.egg-info/SOURCES.txt +4 -0
  9. {dp_cli-0.3.2 → dp_cli-0.4.0}/pyproject.toml +1 -1
  10. dp_cli-0.4.0/tests/test_pw_locator.py +310 -0
  11. {dp_cli-0.3.2 → dp_cli-0.4.0}/dp_cli/__init__.py +0 -0
  12. {dp_cli-0.3.2 → dp_cli-0.4.0}/dp_cli/bridge.py +0 -0
  13. {dp_cli-0.3.2 → dp_cli-0.4.0}/dp_cli/bridge_manager.py +0 -0
  14. {dp_cli-0.3.2 → dp_cli-0.4.0}/dp_cli/commands/__init__.py +0 -0
  15. {dp_cli-0.3.2 → dp_cli-0.4.0}/dp_cli/commands/browser.py +0 -0
  16. {dp_cli-0.3.2 → dp_cli-0.4.0}/dp_cli/commands/element.py +0 -0
  17. {dp_cli-0.3.2 → dp_cli-0.4.0}/dp_cli/commands/keyboard.py +0 -0
  18. {dp_cli-0.3.2 → dp_cli-0.4.0}/dp_cli/commands/misc.py +0 -0
  19. {dp_cli-0.3.2 → dp_cli-0.4.0}/dp_cli/commands/network.py +0 -0
  20. {dp_cli-0.3.2 → dp_cli-0.4.0}/dp_cli/commands/page.py +0 -0
  21. {dp_cli-0.3.2 → dp_cli-0.4.0}/dp_cli/commands/snapshot_cmd.py +0 -0
  22. {dp_cli-0.3.2 → dp_cli-0.4.0}/dp_cli/commands/storage.py +0 -0
  23. {dp_cli-0.3.2 → dp_cli-0.4.0}/dp_cli/commands/tab.py +0 -0
  24. {dp_cli-0.3.2 → dp_cli-0.4.0}/dp_cli/main.py +0 -0
  25. {dp_cli-0.3.2 → dp_cli-0.4.0}/dp_cli/output.py +0 -0
  26. {dp_cli-0.3.2 → dp_cli-0.4.0}/dp_cli/session.py +0 -0
  27. {dp_cli-0.3.2 → dp_cli-0.4.0}/dp_cli/snapshot/__init__.py +0 -0
  28. {dp_cli-0.3.2 → dp_cli-0.4.0}/dp_cli/snapshot/a11y.py +0 -0
  29. {dp_cli-0.3.2 → dp_cli-0.4.0}/dp_cli/snapshot/clickable.py +0 -0
  30. {dp_cli-0.3.2 → dp_cli-0.4.0}/dp_cli/snapshot/clickable_js.py +0 -0
  31. {dp_cli-0.3.2 → dp_cli-0.4.0}/dp_cli/snapshot/extract.py +0 -0
  32. {dp_cli-0.3.2 → dp_cli-0.4.0}/dp_cli/snapshot/js_scripts.py +0 -0
  33. {dp_cli-0.3.2 → dp_cli-0.4.0}/dp_cli/snapshot/utils.py +0 -0
  34. {dp_cli-0.3.2 → dp_cli-0.4.0}/dp_cli/stealth.py +0 -0
  35. {dp_cli-0.3.2 → dp_cli-0.4.0}/dp_cli.egg-info/dependency_links.txt +0 -0
  36. {dp_cli-0.3.2 → dp_cli-0.4.0}/dp_cli.egg-info/entry_points.txt +0 -0
  37. {dp_cli-0.3.2 → dp_cli-0.4.0}/dp_cli.egg-info/requires.txt +0 -0
  38. {dp_cli-0.3.2 → dp_cli-0.4.0}/dp_cli.egg-info/top_level.txt +0 -0
  39. {dp_cli-0.3.2 → dp_cli-0.4.0}/setup.cfg +0 -0
  40. {dp_cli-0.3.2 → dp_cli-0.4.0}/tests/test_bridge_integration.py +0 -0
  41. {dp_cli-0.3.2 → dp_cli-0.4.0}/tests/test_bridge_manager.py +0 -0
  42. {dp_cli-0.3.2 → dp_cli-0.4.0}/tests/test_clickable.py +0 -0
  43. {dp_cli-0.3.2 → dp_cli-0.4.0}/tests/test_resolve_locator.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dp-cli
3
- Version: 0.3.2
3
+ Version: 0.4.0
4
4
  Summary: A powerful CLI for DrissionPage — browser automation, structured data extraction, network listening and more.
5
5
  License: BSD-3-Clause
6
6
  Project-URL: Homepage, https://github.com/mofanx/dp-cli
@@ -31,7 +31,8 @@ A powerful CLI for [DrissionPage](https://github.com/g1879/DrissionPage) — bro
31
31
  and custom menu items the a11y tree misses; every element gets an `[N]` ref with
32
32
  confidence markers (`⚡` medium, `?` low)
33
33
  - **`dp scan`** — fast Vimium-style listing of interactive elements (viewport-only mode available)
34
- - **Powerful locator syntax** — descriptive strings stable across navigation
34
+ - **Powerful locator syntax** — descriptive strings stable across navigation, plus
35
+ Playwright-style `pw:role=button[name="Submit"] >> nth=2` chains
35
36
  - **Structured data extraction** — `extract` + `query` + `snapshot` for scraping list pages
36
37
  - **Network listening** — capture XHR/Fetch requests and response bodies
37
38
  - **Stealth patches** — `dp stealth` bypasses common automation detections
@@ -150,6 +151,53 @@ dp scan --confidence high # only the sure-thing clickables
150
151
  Both `snapshot` and `scan` share the same `[N]` ref numbering per session, so
151
152
  `dp click "ref:N"` works regardless of which one produced the snapshot.
152
153
 
154
+ ## Playwright-style locators (`pw:` prefix)
155
+
156
+ Need semantic, role-based targeting on a fresh page (no snapshot required)?
157
+ Use the `pw:` prefix. Syntax mirrors Playwright, and chains with `>>`:
158
+
159
+ ```bash
160
+ # By ARIA role (with accessible name — exact / substring / regex)
161
+ dp click 'pw:role=button[name="Submit"]'
162
+ dp click 'pw:role=button[name=/^Sign/i]'
163
+ dp click 'pw:role=link[name=More]' # substring
164
+
165
+ # By visible text (exact / substring / regex)
166
+ dp click 'pw:text="Login"' # exact
167
+ dp click 'pw:text=Login' # substring (case-insensitive)
168
+ dp click 'pw:text=/^log/i' # regex
169
+
170
+ # By form affordances
171
+ dp fill 'pw:placeholder=Search…' "chatgpt"
172
+ dp fill 'pw:label="Email"' "a@b.com"
173
+ dp click 'pw:alt="Logo"'
174
+ dp click 'pw:title="Close"'
175
+ dp click 'pw:testid=submit-btn' # data-testid / data-test-id / data-test
176
+
177
+ # Chain with >> (each step narrows the scope)
178
+ dp click 'pw:css=.sidebar >> role=listitem[name="Chat"] >> nth=2'
179
+ dp click 'pw:css=li >> has-text="Python"'
180
+ dp click 'pw:role=list >> nth=-1' # negative index = from end
181
+
182
+ # Raw css/xpath chunks mix freely
183
+ dp click 'pw:xpath=//nav >> role=link[name=Docs]'
184
+ ```
185
+
186
+ **Matchers**: `role` / `text` / `label` / `placeholder` / `alt` / `title` /
187
+ `testid` / `css` / `xpath` / `nth` / `has-text` / `visible`
188
+
189
+ **Value forms**: `bare` = substring, `"quoted"` = exact, `/pattern/flags` = regex
190
+
191
+ **Visibility**: `role` / `text` / `has-text` automatically skip elements hidden
192
+ via `display:none`, `visibility:hidden`, `hidden` attribute, or
193
+ `aria-hidden="true"` (matches Playwright semantics).
194
+
195
+ **Shadow DOM**: open shadow roots are traversed automatically.
196
+
197
+ Under the hood the matcher chain is evaluated in-page as JS, the target element
198
+ is tagged with a one-shot `data-dp-ref` attribute, and DrissionPage resolves it
199
+ by that attribute — bypassing stale classes / CSS Modules / dynamic XPath.
200
+
153
201
  ## Anti-Detection (stealth)
154
202
 
155
203
  Bypass `navigator.webdriver`, `HeadlessChrome` UA, empty `plugins`, SwiftShader WebGL,
@@ -10,7 +10,8 @@ A powerful CLI for [DrissionPage](https://github.com/g1879/DrissionPage) — bro
10
10
  and custom menu items the a11y tree misses; every element gets an `[N]` ref with
11
11
  confidence markers (`⚡` medium, `?` low)
12
12
  - **`dp scan`** — fast Vimium-style listing of interactive elements (viewport-only mode available)
13
- - **Powerful locator syntax** — descriptive strings stable across navigation
13
+ - **Powerful locator syntax** — descriptive strings stable across navigation, plus
14
+ Playwright-style `pw:role=button[name="Submit"] >> nth=2` chains
14
15
  - **Structured data extraction** — `extract` + `query` + `snapshot` for scraping list pages
15
16
  - **Network listening** — capture XHR/Fetch requests and response bodies
16
17
  - **Stealth patches** — `dp stealth` bypasses common automation detections
@@ -129,6 +130,53 @@ dp scan --confidence high # only the sure-thing clickables
129
130
  Both `snapshot` and `scan` share the same `[N]` ref numbering per session, so
130
131
  `dp click "ref:N"` works regardless of which one produced the snapshot.
131
132
 
133
+ ## Playwright-style locators (`pw:` prefix)
134
+
135
+ Need semantic, role-based targeting on a fresh page (no snapshot required)?
136
+ Use the `pw:` prefix. Syntax mirrors Playwright, and chains with `>>`:
137
+
138
+ ```bash
139
+ # By ARIA role (with accessible name — exact / substring / regex)
140
+ dp click 'pw:role=button[name="Submit"]'
141
+ dp click 'pw:role=button[name=/^Sign/i]'
142
+ dp click 'pw:role=link[name=More]' # substring
143
+
144
+ # By visible text (exact / substring / regex)
145
+ dp click 'pw:text="Login"' # exact
146
+ dp click 'pw:text=Login' # substring (case-insensitive)
147
+ dp click 'pw:text=/^log/i' # regex
148
+
149
+ # By form affordances
150
+ dp fill 'pw:placeholder=Search…' "chatgpt"
151
+ dp fill 'pw:label="Email"' "a@b.com"
152
+ dp click 'pw:alt="Logo"'
153
+ dp click 'pw:title="Close"'
154
+ dp click 'pw:testid=submit-btn' # data-testid / data-test-id / data-test
155
+
156
+ # Chain with >> (each step narrows the scope)
157
+ dp click 'pw:css=.sidebar >> role=listitem[name="Chat"] >> nth=2'
158
+ dp click 'pw:css=li >> has-text="Python"'
159
+ dp click 'pw:role=list >> nth=-1' # negative index = from end
160
+
161
+ # Raw css/xpath chunks mix freely
162
+ dp click 'pw:xpath=//nav >> role=link[name=Docs]'
163
+ ```
164
+
165
+ **Matchers**: `role` / `text` / `label` / `placeholder` / `alt` / `title` /
166
+ `testid` / `css` / `xpath` / `nth` / `has-text` / `visible`
167
+
168
+ **Value forms**: `bare` = substring, `"quoted"` = exact, `/pattern/flags` = regex
169
+
170
+ **Visibility**: `role` / `text` / `has-text` automatically skip elements hidden
171
+ via `display:none`, `visibility:hidden`, `hidden` attribute, or
172
+ `aria-hidden="true"` (matches Playwright semantics).
173
+
174
+ **Shadow DOM**: open shadow roots are traversed automatically.
175
+
176
+ Under the hood the matcher chain is evaluated in-page as JS, the target element
177
+ is tagged with a one-shot `data-dp-ref` attribute, and DrissionPage resolves it
178
+ by that attribute — bypassing stale classes / CSS Modules / dynamic XPath.
179
+
132
180
  ## Anti-Detection (stealth)
133
181
 
134
182
  Bypass `navigator.webdriver`, `HeadlessChrome` UA, empty `plugins`, SwiftShader WebGL,
@@ -75,7 +75,7 @@ def _get_page(session: str, raw: bool = False):
75
75
 
76
76
 
77
77
  _KNOWN_PREFIX = re.compile(
78
- r'^(css[:=]|xpath[:=]|text[:=^$]|tag[:=^$]|@@?[\w]|ref:)', re.IGNORECASE)
78
+ r'^(css[:=]|xpath[:=]|text[:=^$]|tag[:=^$]|@@?[\w]|ref:|pw:)', re.IGNORECASE)
79
79
  _CSS_ID_CLASS = re.compile(r'^[#.][\w-]') # #id .class
80
80
  _CSS_TAG_SEL = re.compile(r'^[\w-]+[.#\[][\w-]') # div.class a[href] h1#title
81
81
  _CSS_COMBINATOR = re.compile(r'[\[>+~]|::|:(?:nth|first|last|not|has)') # [attr] > + ~ ::pseudo :nth-child
@@ -145,17 +145,58 @@ def _mark_element_by_backend_id(page, backend_node_id: int) -> str:
145
145
  return marker
146
146
 
147
147
 
148
- def resolve_locator(locator: str, session: str = 'default', page=None) -> str:
149
- """解析定位器:ref:N 展开 + 智能前缀补全。
148
+ def _resolve_pw(expr: str, session: str, page) -> str:
149
+ """解析 pw: 表达式 在页面上打标 → 返回 @data-dp-ref=<marker>。
150
+
151
+ 失败(语法错 / 未匹配 / 浏览器不可用)会调用 error 并退出。
152
+ """
153
+ from dp_cli.locators import parse_pw, build_pw_js, PwParseError
154
+ try:
155
+ matchers = parse_pw(expr)
156
+ except PwParseError as e:
157
+ error(f'pw 定位器语法错误: {e}', code='PW_SYNTAX')
158
+ raise SystemExit(1)
150
159
 
151
- 如果 locator 'ref:' 开头,从 session 的 refs 映射中查找。
152
- - 有 backendNodeId 时:通过 CDP 现场打临时属性,返回 @data-dp-ref=<marker>
153
- (最鲁棒,绕开 CSS Modules / 动态 class / xpath 变化)
154
- - 无 backendNodeId 或打标失败时:回落到保存的 locator 字符串
155
- - 再失败,用 name 作 text 定位器
160
+ if page is None:
161
+ try:
162
+ page = _get_page(session)
163
+ except SystemExit:
164
+ raise
165
+ except Exception as e:
166
+ error('无法连接浏览器会话', code='SESSION_NOT_FOUND', detail=str(e))
167
+ raise SystemExit(1)
168
+
169
+ js = build_pw_js(matchers)
170
+ try:
171
+ marker = page.run_js(js)
172
+ except Exception as e:
173
+ error(f'pw 定位器求值失败', code='PW_EVAL_FAILED', detail=str(e))
174
+ raise SystemExit(1)
175
+
176
+ if not marker:
177
+ error(f'pw 定位器未匹配到元素: pw:{expr}', code='PW_NOT_FOUND')
178
+ raise SystemExit(1)
179
+
180
+ return f'@data-dp-ref={marker}'
181
+
182
+
183
+ def resolve_locator(locator: str, session: str = 'default', page=None) -> str:
184
+ """解析定位器:ref:N 展开 + pw: 表达式求值 + 智能前缀补全。
185
+
186
+ - pw:<expr>:Playwright 风格(role/text/label/placeholder/alt/title/
187
+ testid/css/xpath/nth/has-text/visible),支持 >> 链式。通过 JS 求值
188
+ + 打标,返回 @data-dp-ref=<marker>。
189
+ - ref:N:从 session 的 refs 映射中查找。
190
+ · 有 backendNodeId 时:通过 CDP 现场打临时属性,返回 @data-dp-ref=
191
+ (最鲁棒,绕开 CSS Modules / 动态 class / xpath 变化)
192
+ · 无 backendNodeId 或打标失败时:回落到保存的 locator 字符串
193
+ · 再失败,用 name 作 text 定位器
194
+ - 其它:智能补全 css:/xpath: 前缀。
156
195
 
157
196
  :param page: 可选,传入避免内部再调用 _get_page;为 None 时按需懒加载。
158
197
  """
198
+ if locator.startswith('pw:'):
199
+ return _resolve_pw(locator[3:], session, page)
159
200
  if not locator.startswith('ref:'):
160
201
  return normalize_locator(locator)
161
202
 
@@ -0,0 +1,9 @@
1
+ # -*- coding:utf-8 -*-
2
+ """Playwright 风格定位器(pw: 前缀)。
3
+
4
+ 模块入口只暴露最常用的 API:
5
+ - parse_pw(expr): 解析 'css=.btn >> role=button[name="OK"]' → matcher 列表
6
+ - build_pw_js(matchers): 把 matcher 列表转成可执行的 JS 脚本
7
+ """
8
+ from .playwright import parse_pw, PwParseError # noqa: F401
9
+ from .pw_js import build_pw_js # noqa: F401
@@ -0,0 +1,236 @@
1
+ # -*- coding:utf-8 -*-
2
+ """Playwright 风格定位器解析器(纯 Python,不依赖浏览器)。
3
+
4
+ 输入:去掉 'pw:' 前缀的表达式字符串,例如
5
+ css=.sidebar >> role=listitem[name="Chat"] >> nth=2
6
+ role=button[name=/^Sign/i]
7
+ text="Login" >> has-text="今天"
8
+
9
+ 输出:matcher 列表(list[dict]),交给 JS 逐段求值。
10
+
11
+ matcher dict 结构示例:
12
+ {'type': 'role', 'role': 'button',
13
+ 'name': {'kind': 'exact', 'value': 'Submit'}} # 或 None
14
+ {'type': 'text', 'value': {'kind': 'substr', 'value': 'Login'}}
15
+ {'type': 'label', 'value': {'kind': 'exact', 'value': 'Email'}}
16
+ {'type': 'placeholder', 'value': {'kind': 'substr', 'value': 'search'}}
17
+ {'type': 'alt' | 'title' | 'testid', 'value': {...}}
18
+ {'type': 'css', 'value': '.btn'}
19
+ {'type': 'xpath', 'value': '//div[@id="foo"]'}
20
+ {'type': 'nth', 'index': 2}
21
+ {'type': 'has-text', 'value': {'kind': 'substr', 'value': 'Price'}}
22
+ {'type': 'visible', 'value': True}
23
+
24
+ 值规格(value spec):
25
+ {'kind': 'exact', 'value': 'Submit'} # "Submit" / 'Submit'
26
+ {'kind': 'substr', 'value': 'Sub'} # Submit(裸值)
27
+ {'kind': 'regex', 'value': '^Sign', 'flags': 'i'} # /^Sign/i
28
+ """
29
+ import re
30
+
31
+
32
+ class PwParseError(ValueError):
33
+ """pw: 表达式语法错误。"""
34
+
35
+
36
+ # 允许的顶层 chunk 类型(不含 >> 分段符)
37
+ _VALUE_TYPES = (
38
+ 'text', 'label', 'placeholder', 'alt', 'title', 'testid', 'has-text'
39
+ )
40
+ _RAW_TYPES = ('css', 'xpath') # 保留原值,不再解析
41
+ _ALL_TYPES = _VALUE_TYPES + _RAW_TYPES + ('role', 'nth', 'visible')
42
+
43
+
44
+ def parse_pw(expr: str) -> list:
45
+ """把 pw: 表达式解析为 matcher 列表。
46
+
47
+ :raises PwParseError: 语法非法
48
+ """
49
+ if not expr or not expr.strip():
50
+ raise PwParseError('空的 pw 表达式')
51
+ chunks = _split_chunks(expr)
52
+ if not chunks:
53
+ raise PwParseError(f'未找到有效的 chunk: {expr!r}')
54
+ return [_parse_chunk(c) for c in chunks]
55
+
56
+
57
+ # ─────────────────────────────────────────────────────────────────────────────
58
+ # chunk 切分:按 ' >> ' 分段,尊重引号和正则字面量
59
+ # ─────────────────────────────────────────────────────────────────────────────
60
+
61
+ def _split_chunks(expr: str) -> list:
62
+ """把表达式按 >> 切成若干 chunk。
63
+
64
+ 规则:
65
+ - 引号(' / ")内的 >> 不切
66
+ - 正则字面量 /.../[flags] 内的 >> 不切
67
+ - >> 前后可以有空格,也可以没有(但建议有)
68
+ """
69
+ parts = []
70
+ buf = []
71
+ i = 0
72
+ n = len(expr)
73
+ in_quote = None # None | '"' | "'"
74
+ in_regex = False
75
+ while i < n:
76
+ c = expr[i]
77
+ if in_quote:
78
+ buf.append(c)
79
+ # 处理反斜杠转义
80
+ if c == '\\' and i + 1 < n:
81
+ buf.append(expr[i + 1])
82
+ i += 2
83
+ continue
84
+ if c == in_quote:
85
+ in_quote = None
86
+ i += 1
87
+ continue
88
+ if in_regex:
89
+ buf.append(c)
90
+ if c == '\\' and i + 1 < n:
91
+ buf.append(expr[i + 1])
92
+ i += 2
93
+ continue
94
+ if c == '/':
95
+ # 结束正则,继续吃 flags
96
+ j = i + 1
97
+ while j < n and expr[j].isalpha():
98
+ buf.append(expr[j])
99
+ j += 1
100
+ in_regex = False
101
+ i = j
102
+ continue
103
+ i += 1
104
+ continue
105
+ # 非引号、非正则态
106
+ if c in ('"', "'"):
107
+ in_quote = c
108
+ buf.append(c)
109
+ i += 1
110
+ continue
111
+ # 识别正则起始:=/.../ 这种,简化判断为前一个非空字符是 =
112
+ if c == '/':
113
+ # 往前看非空格字符
114
+ k = len(buf) - 1
115
+ while k >= 0 and buf[k] == ' ':
116
+ k -= 1
117
+ if k >= 0 and buf[k] == '=':
118
+ in_regex = True
119
+ buf.append(c)
120
+ i += 1
121
+ continue
122
+ if c == '>' and i + 1 < n and expr[i + 1] == '>':
123
+ parts.append(''.join(buf).strip())
124
+ buf = []
125
+ i += 2
126
+ continue
127
+ buf.append(c)
128
+ i += 1
129
+
130
+ if in_quote:
131
+ raise PwParseError(f'引号未闭合: {expr!r}')
132
+ if in_regex:
133
+ raise PwParseError(f'正则字面量未闭合: {expr!r}')
134
+
135
+ tail = ''.join(buf).strip()
136
+ if tail:
137
+ parts.append(tail)
138
+ return [p for p in parts if p]
139
+
140
+
141
+ # ─────────────────────────────────────────────────────────────────────────────
142
+ # 单个 chunk 解析
143
+ # ─────────────────────────────────────────────────────────────────────────────
144
+
145
+ _ROLE_RE = re.compile(r'^role=([a-zA-Z][\w-]*)(.*)$')
146
+ _ROLE_NAME_RE = re.compile(r'^\[name=(.+)\]$')
147
+ _NTH_RE = re.compile(r'^nth=(-?\d+)$')
148
+
149
+
150
+ def _parse_chunk(chunk: str) -> dict:
151
+ s = chunk.strip()
152
+ if not s:
153
+ raise PwParseError('空 chunk')
154
+
155
+ # visible / visible=true / visible=false
156
+ if s == 'visible' or s == 'visible=true':
157
+ return {'type': 'visible', 'value': True}
158
+ if s == 'visible=false':
159
+ return {'type': 'visible', 'value': False}
160
+
161
+ # nth=N
162
+ m = _NTH_RE.match(s)
163
+ if m:
164
+ return {'type': 'nth', 'index': int(m.group(1))}
165
+
166
+ # role=X 或 role=X[name=...]
167
+ m = _ROLE_RE.match(s)
168
+ if m:
169
+ role = m.group(1)
170
+ rest = m.group(2).strip()
171
+ if not rest:
172
+ return {'type': 'role', 'role': role, 'name': None}
173
+ nm = _ROLE_NAME_RE.match(rest)
174
+ if not nm:
175
+ raise PwParseError(
176
+ f'role= 后只支持 [name=...] 过滤: {chunk!r}')
177
+ name_spec = _parse_value(nm.group(1))
178
+ return {'type': 'role', 'role': role, 'name': name_spec}
179
+
180
+ # 文本类过滤:text= / label= / placeholder= / alt= / title= / testid= / has-text=
181
+ for t in _VALUE_TYPES:
182
+ prefix = t + '='
183
+ if s.startswith(prefix):
184
+ spec = _parse_value(s[len(prefix):])
185
+ return {'type': t, 'value': spec}
186
+
187
+ # css= / xpath=:原样保留
188
+ for t in _RAW_TYPES:
189
+ prefix = t + '='
190
+ if s.startswith(prefix):
191
+ raw = s[len(prefix):].strip()
192
+ if not raw:
193
+ raise PwParseError(f'{t}= 后面不能为空: {chunk!r}')
194
+ return {'type': t, 'value': raw}
195
+
196
+ raise PwParseError(
197
+ f'无法识别的 pw chunk: {chunk!r};'
198
+ f'合法类型: {", ".join(_ALL_TYPES)}')
199
+
200
+
201
+ # ─────────────────────────────────────────────────────────────────────────────
202
+ # 值规格解析(value spec)
203
+ # ─────────────────────────────────────────────────────────────────────────────
204
+
205
+ _REGEX_RE = re.compile(r'^/(.+)/([a-z]*)$', re.DOTALL)
206
+
207
+
208
+ def _parse_value(raw: str) -> dict:
209
+ """解析值字符串。
210
+
211
+ 规则(按优先级):
212
+ "..." 或 '...' → exact 精确匹配,支持 \\ 转义引号
213
+ /pattern/[flags] → regex 正则(Playwright 风格)
214
+ 其它(裸值) → substr 子串匹配
215
+ """
216
+ s = raw.strip()
217
+ if not s:
218
+ raise PwParseError('值不能为空')
219
+ # 引号包裹 → exact
220
+ if len(s) >= 2 and s[0] == s[-1] and s[0] in ('"', "'"):
221
+ inner = s[1:-1]
222
+ # 去反斜杠转义
223
+ inner = inner.replace('\\' + s[0], s[0]).replace('\\\\', '\\')
224
+ return {'kind': 'exact', 'value': inner}
225
+ # /re/flags → regex
226
+ m = _REGEX_RE.match(s)
227
+ if m:
228
+ pattern = m.group(1)
229
+ flags = m.group(2) or ''
230
+ # 校验 flags(JS 允许的)
231
+ for f in flags:
232
+ if f not in 'gimsuy':
233
+ raise PwParseError(f'非法的正则 flag: {f!r} in {raw!r}')
234
+ return {'kind': 'regex', 'value': pattern, 'flags': flags}
235
+ # 裸值 → substr
236
+ return {'kind': 'substr', 'value': s}