dp-cli 0.2.0__tar.gz → 0.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. {dp_cli-0.2.0 → dp_cli-0.3.0}/PKG-INFO +50 -4
  2. {dp_cli-0.2.0 → dp_cli-0.3.0}/README.md +49 -3
  3. {dp_cli-0.2.0 → dp_cli-0.3.0}/dp_cli/commands/snapshot_cmd.py +185 -8
  4. dp_cli-0.3.0/dp_cli/snapshot/__init__.py +28 -0
  5. {dp_cli-0.2.0 → dp_cli-0.3.0}/dp_cli/snapshot/a11y.py +126 -9
  6. dp_cli-0.3.0/dp_cli/snapshot/clickable.py +197 -0
  7. dp_cli-0.3.0/dp_cli/snapshot/clickable_js.py +273 -0
  8. {dp_cli-0.2.0 → dp_cli-0.3.0}/dp_cli.egg-info/PKG-INFO +50 -4
  9. {dp_cli-0.2.0 → dp_cli-0.3.0}/dp_cli.egg-info/SOURCES.txt +4 -1
  10. {dp_cli-0.2.0 → dp_cli-0.3.0}/pyproject.toml +1 -1
  11. dp_cli-0.3.0/tests/test_clickable.py +130 -0
  12. dp_cli-0.2.0/dp_cli/snapshot/__init__.py +0 -23
  13. {dp_cli-0.2.0 → dp_cli-0.3.0}/dp_cli/__init__.py +0 -0
  14. {dp_cli-0.2.0 → dp_cli-0.3.0}/dp_cli/bridge.py +0 -0
  15. {dp_cli-0.2.0 → dp_cli-0.3.0}/dp_cli/bridge_manager.py +0 -0
  16. {dp_cli-0.2.0 → dp_cli-0.3.0}/dp_cli/commands/__init__.py +0 -0
  17. {dp_cli-0.2.0 → dp_cli-0.3.0}/dp_cli/commands/_utils.py +0 -0
  18. {dp_cli-0.2.0 → dp_cli-0.3.0}/dp_cli/commands/browser.py +0 -0
  19. {dp_cli-0.2.0 → dp_cli-0.3.0}/dp_cli/commands/element.py +0 -0
  20. {dp_cli-0.2.0 → dp_cli-0.3.0}/dp_cli/commands/keyboard.py +0 -0
  21. {dp_cli-0.2.0 → dp_cli-0.3.0}/dp_cli/commands/misc.py +0 -0
  22. {dp_cli-0.2.0 → dp_cli-0.3.0}/dp_cli/commands/network.py +0 -0
  23. {dp_cli-0.2.0 → dp_cli-0.3.0}/dp_cli/commands/page.py +0 -0
  24. {dp_cli-0.2.0 → dp_cli-0.3.0}/dp_cli/commands/storage.py +0 -0
  25. {dp_cli-0.2.0 → dp_cli-0.3.0}/dp_cli/commands/tab.py +0 -0
  26. {dp_cli-0.2.0 → dp_cli-0.3.0}/dp_cli/main.py +0 -0
  27. {dp_cli-0.2.0 → dp_cli-0.3.0}/dp_cli/output.py +0 -0
  28. {dp_cli-0.2.0 → dp_cli-0.3.0}/dp_cli/session.py +0 -0
  29. {dp_cli-0.2.0 → dp_cli-0.3.0}/dp_cli/snapshot/extract.py +0 -0
  30. {dp_cli-0.2.0 → dp_cli-0.3.0}/dp_cli/snapshot/js_scripts.py +0 -0
  31. {dp_cli-0.2.0 → dp_cli-0.3.0}/dp_cli/snapshot/utils.py +0 -0
  32. {dp_cli-0.2.0 → dp_cli-0.3.0}/dp_cli/stealth.py +0 -0
  33. {dp_cli-0.2.0 → dp_cli-0.3.0}/dp_cli.egg-info/dependency_links.txt +0 -0
  34. {dp_cli-0.2.0 → dp_cli-0.3.0}/dp_cli.egg-info/entry_points.txt +0 -0
  35. {dp_cli-0.2.0 → dp_cli-0.3.0}/dp_cli.egg-info/requires.txt +0 -0
  36. {dp_cli-0.2.0 → dp_cli-0.3.0}/dp_cli.egg-info/top_level.txt +0 -0
  37. {dp_cli-0.2.0 → dp_cli-0.3.0}/setup.cfg +0 -0
  38. {dp_cli-0.2.0 → dp_cli-0.3.0}/tests/test_bridge_integration.py +0 -0
  39. {dp_cli-0.2.0 → dp_cli-0.3.0}/tests/test_bridge_manager.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dp-cli
3
- Version: 0.2.0
3
+ Version: 0.3.0
4
4
  Summary: A powerful CLI for DrissionPage — browser automation, structured data extraction, network listening and more.
5
5
  License: BSD-3-Clause
6
6
  Project-URL: Homepage, https://github.com/mofanx/dp-cli
@@ -26,10 +26,15 @@ A powerful CLI for [DrissionPage](https://github.com/g1879/DrissionPage) — bro
26
26
  ## Features
27
27
 
28
28
  - **Anti-detection by default** — not based on webdriver, `navigator.webdriver` is `false`
29
- - **Reuse your own browser** — connect to a running Chrome via `--port`, keeping login state and cookies
30
- - **Powerful locator syntax** — descriptive strings stable across navigation (no ephemeral refs)
31
- - **Structured data extraction** `extract` + `query` + `snapshot --mode content` for scraping list pages
29
+ - **Reuse your own browser** — `--auto-connect` (Chrome 144+, no CLI flag needed) or `--port`
30
+ - **Hybrid snapshot** — a11y tree + Vimium-style clickable detection, catches icon-only buttons
31
+ and custom menu items the a11y tree misses; every element gets an `[N]` ref with
32
+ confidence markers (`⚡` medium, `?` low)
33
+ - **`dp scan`** — fast Vimium-style listing of interactive elements (viewport-only mode available)
34
+ - **Powerful locator syntax** — descriptive strings stable across navigation
35
+ - **Structured data extraction** — `extract` + `query` + `snapshot` for scraping list pages
32
36
  - **Network listening** — capture XHR/Fetch requests and response bodies
37
+ - **Stealth patches** — `dp stealth` bypasses common automation detections
33
38
  - **Dual mode** — browser control + pure HTTP requests
34
39
  - **Shadow-root / iframe** — traverse directly without switching context
35
40
  - **JSON output** — all commands output JSON, AI-friendly
@@ -99,6 +104,47 @@ the bridge automatically and never quits your Chrome (it's your browser, not dp'
99
104
  - Works with whatever profile Chrome is actually using — same cookies, logins, history.
100
105
  - Classic `--remote-debugging-port=9222` mode still works unchanged via `dp open --port 9222`.
101
106
 
107
+ ## Hybrid Snapshot (a11y + Vimium-style)
108
+
109
+ The default `dp snapshot` fuses two element-discovery paths:
110
+
111
+ 1. **Browser a11y tree** via CDP — the structural skeleton (headings, lists, form roles,
112
+ explicit `<a>`/`<button>`, any `role="..."` element).
113
+ 2. **Vimium-style clickable detection** — a JS probe that flags icon-only buttons,
114
+ `<div onclick>`, `[tabindex>=0]`, `aria-selected`, `cursor:pointer` elements, etc.
115
+ that the a11y tree misses.
116
+
117
+ Results are deduplicated by `backendNodeId` and rendered with confidence markers:
118
+
119
+ | Marker | Confidence | Triggers |
120
+ |--------|-----------|----------|
121
+ | none | **high** | `<a href>`, `<button>`, `<input>`, `role=button/link/...`, `contenteditable` |
122
+ | `⚡` | **medium** | `onclick` / `jsaction` / `tabindex>=0` / `aria-selected` / `<audio>/<video>` |
123
+ | `?` | **low** | `cursor:pointer` / class keyword match (`btn` / `click` / `toggle` / …) |
124
+
125
+ Every element gets an `[N]` ref usable in any command: `dp click "ref:5"`.
126
+
127
+ ```bash
128
+ dp snapshot # a11y + clickable (default); high + medium markers
129
+ dp snapshot --viewport-only # clickable probe limited to viewport (faster)
130
+ dp snapshot --include-low # also surface `?` low-confidence heuristics
131
+ dp snapshot --no-clickables # a11y tree only (legacy behavior)
132
+ ```
133
+
134
+ ### `dp scan` — fast clickable-only listing
135
+
136
+ When you only need "what can I click next?" without the full a11y tree:
137
+
138
+ ```bash
139
+ dp scan # full page, high+medium
140
+ dp scan --viewport # only elements currently in viewport
141
+ dp scan --confidence all # include low-confidence heuristics
142
+ dp scan --confidence high # only the sure-thing clickables
143
+ ```
144
+
145
+ Both `snapshot` and `scan` share the same `[N]` ref numbering per session, so
146
+ `dp click "ref:N"` works regardless of which one produced the snapshot.
147
+
102
148
  ## Anti-Detection (stealth)
103
149
 
104
150
  Bypass `navigator.webdriver`, `HeadlessChrome` UA, empty `plugins`, SwiftShader WebGL,
@@ -5,10 +5,15 @@ A powerful CLI for [DrissionPage](https://github.com/g1879/DrissionPage) — bro
5
5
  ## Features
6
6
 
7
7
  - **Anti-detection by default** — not based on webdriver, `navigator.webdriver` is `false`
8
- - **Reuse your own browser** — connect to a running Chrome via `--port`, keeping login state and cookies
9
- - **Powerful locator syntax** — descriptive strings stable across navigation (no ephemeral refs)
10
- - **Structured data extraction** `extract` + `query` + `snapshot --mode content` for scraping list pages
8
+ - **Reuse your own browser** — `--auto-connect` (Chrome 144+, no CLI flag needed) or `--port`
9
+ - **Hybrid snapshot** — a11y tree + Vimium-style clickable detection, catches icon-only buttons
10
+ and custom menu items the a11y tree misses; every element gets an `[N]` ref with
11
+ confidence markers (`⚡` medium, `?` low)
12
+ - **`dp scan`** — fast Vimium-style listing of interactive elements (viewport-only mode available)
13
+ - **Powerful locator syntax** — descriptive strings stable across navigation
14
+ - **Structured data extraction** — `extract` + `query` + `snapshot` for scraping list pages
11
15
  - **Network listening** — capture XHR/Fetch requests and response bodies
16
+ - **Stealth patches** — `dp stealth` bypasses common automation detections
12
17
  - **Dual mode** — browser control + pure HTTP requests
13
18
  - **Shadow-root / iframe** — traverse directly without switching context
14
19
  - **JSON output** — all commands output JSON, AI-friendly
@@ -78,6 +83,47 @@ the bridge automatically and never quits your Chrome (it's your browser, not dp'
78
83
  - Works with whatever profile Chrome is actually using — same cookies, logins, history.
79
84
  - Classic `--remote-debugging-port=9222` mode still works unchanged via `dp open --port 9222`.
80
85
 
86
+ ## Hybrid Snapshot (a11y + Vimium-style)
87
+
88
+ The default `dp snapshot` fuses two element-discovery paths:
89
+
90
+ 1. **Browser a11y tree** via CDP — the structural skeleton (headings, lists, form roles,
91
+ explicit `<a>`/`<button>`, any `role="..."` element).
92
+ 2. **Vimium-style clickable detection** — a JS probe that flags icon-only buttons,
93
+ `<div onclick>`, `[tabindex>=0]`, `aria-selected`, `cursor:pointer` elements, etc.
94
+ that the a11y tree misses.
95
+
96
+ Results are deduplicated by `backendNodeId` and rendered with confidence markers:
97
+
98
+ | Marker | Confidence | Triggers |
99
+ |--------|-----------|----------|
100
+ | none | **high** | `<a href>`, `<button>`, `<input>`, `role=button/link/...`, `contenteditable` |
101
+ | `⚡` | **medium** | `onclick` / `jsaction` / `tabindex>=0` / `aria-selected` / `<audio>/<video>` |
102
+ | `?` | **low** | `cursor:pointer` / class keyword match (`btn` / `click` / `toggle` / …) |
103
+
104
+ Every element gets an `[N]` ref usable in any command: `dp click "ref:5"`.
105
+
106
+ ```bash
107
+ dp snapshot # a11y + clickable (default); high + medium markers
108
+ dp snapshot --viewport-only # clickable probe limited to viewport (faster)
109
+ dp snapshot --include-low # also surface `?` low-confidence heuristics
110
+ dp snapshot --no-clickables # a11y tree only (legacy behavior)
111
+ ```
112
+
113
+ ### `dp scan` — fast clickable-only listing
114
+
115
+ When you only need "what can I click next?" without the full a11y tree:
116
+
117
+ ```bash
118
+ dp scan # full page, high+medium
119
+ dp scan --viewport # only elements currently in viewport
120
+ dp scan --confidence all # include low-confidence heuristics
121
+ dp scan --confidence high # only the sure-thing clickables
122
+ ```
123
+
124
+ Both `snapshot` and `scan` share the same `[N]` ref numbering per session, so
125
+ `dp click "ref:N"` works regardless of which one produced the snapshot.
126
+
81
127
  ## Anti-Detection (stealth)
82
128
 
83
129
  Bypass `navigator.webdriver`, `HeadlessChrome` UA, empty `plugins`, SwiftShader WebGL,
@@ -9,7 +9,7 @@ from dp_cli.output import ok, error
9
9
  from dp_cli.session import save_refs
10
10
  from dp_cli.snapshot import (extract_structured, query_elements,
11
11
  take_a11y_snapshot, render_a11y_text,
12
- render_a11y_plain_text)
12
+ render_a11y_plain_text, detect_clickables)
13
13
  from dp_cli.snapshot.utils import suggest_locator
14
14
  from dp_cli.commands._utils import session_option, _get_page, records_to_csv, resolve_locator
15
15
 
@@ -25,8 +25,15 @@ def register(cli):
25
25
  @click.option('--format', 'fmt', type=click.Choice(['json', 'text']),
26
26
  default='text', show_default=True, help='输出格式')
27
27
  @click.option('--filename', default=None, help='保存到文件路径')
28
- def snapshot(session, mode, selector, fmt, filename):
29
- """获取页面快照(基于浏览器原生 a11y tree,通用性极强)。
28
+ @click.option('--no-clickables', is_flag=True, default=False,
29
+ help='禁用 Vimium 风格可交互元素补充探测(默认开启)')
30
+ @click.option('--include-low', is_flag=True, default=False,
31
+ help='包含 low 置信度元素(cursor:pointer / class 规则匹配,可能假阳性)')
32
+ @click.option('--viewport-only', is_flag=True, default=False,
33
+ help='补充探测只看视口内元素(省 token、更快)')
34
+ def snapshot(session, mode, selector, fmt, filename,
35
+ no_clickables, include_low, viewport_only):
36
+ """获取页面快照(a11y tree + Vimium 风格可交互元素补充)。
30
37
 
31
38
  \b
32
39
  模式说明(默认 full):
@@ -34,18 +41,30 @@ def register(cli):
34
41
  brief 精简模式,保留结构+交互,截断长文本(省 token)
35
42
  text 纯文本模式,按阅读顺序输出可见文本
36
43
 
44
+ \b
45
+ 可交互元素补充探测(默认开启):
46
+ a11y tree 会漏掉纯图标按钮、弹窗菜单项等未标 ARIA role 的元素。
47
+ 补充探测会扫描 DOM,按 Vimium 规则识别这类元素并给出 ref:N 引用。
48
+ 输出中以 ⚡ 标记 medium 置信度,? 标记 low 置信度。
49
+
37
50
  \b
38
51
  示例:
39
- dp snapshot # 完整快照(推荐首次调用)
52
+ dp snapshot # 完整快照(默认含 clickable 补充)
40
53
  dp snapshot --mode brief # 精简模式(省 token,适合循环调用)
41
- dp snapshot --mode text # 纯文本(全量文字内容)
42
- dp snapshot --selector ".main" # 只获取指定区域
43
- dp snapshot --format json # JSON 格式输出
54
+ dp snapshot --viewport-only # 只扫视口内,更快
55
+ dp snapshot --include-low # 启用 low 置信度(可能假阳性)
56
+ dp snapshot --no-clickables # a11y tree,旧版本行为
57
+ dp snapshot --selector ".main" # 只获取指定区域
44
58
  """
45
59
  page = _get_page(session)
46
60
 
47
61
  try:
48
- data = take_a11y_snapshot(page, selector=selector)
62
+ data = take_a11y_snapshot(
63
+ page, selector=selector,
64
+ with_clickables=not no_clickables,
65
+ include_low=include_low,
66
+ viewport_only=viewport_only,
67
+ )
49
68
  except Exception as e:
50
69
  error('获取页面快照失败', code='SNAPSHOT_FAILED', detail=str(e))
51
70
  return
@@ -74,6 +93,164 @@ def register(cli):
74
93
  else:
75
94
  click.echo(output)
76
95
 
96
+ @cli.command('scan')
97
+ @session_option
98
+ @click.option('--viewport', 'viewport_only', is_flag=True, default=False,
99
+ help='只扫描视口内元素(更快、更少结果)')
100
+ @click.option('--confidence', default='high,medium', show_default=True,
101
+ help='逗号分隔的置信度过滤,可选 high / medium / low;'
102
+ '使用 "all" 等价于 high,medium,low')
103
+ @click.option('--max', 'max_elements', default=1000, show_default=True,
104
+ help='最多返回多少个元素')
105
+ @click.option('--format', 'fmt', type=click.Choice(['text', 'json']),
106
+ default='text', show_default=True, help='输出格式')
107
+ @click.option('--filename', default=None, help='保存到文件路径')
108
+ def scan(session, viewport_only, confidence, max_elements, fmt, filename):
109
+ """Vimium 风格扫描当前页面的可交互元素(纯 DOM 遍历,不依赖 a11y tree)。
110
+
111
+ \b
112
+ 与 dp snapshot 的区别:
113
+ snapshot 返回完整 a11y tree + clickable 补充(大、慢、全面)
114
+ scan 只返回可交互元素清单(小、快、适合执行脚本)
115
+
116
+ \b
117
+ 置信度分级:
118
+ high 明确可点击(<a href>, <button>, role=button 等)
119
+ medium 很可能可点击(onclick / jsaction / tabindex / aria-selected)
120
+ low 启发式匹配(cursor:pointer / class 含 btn/click/… 关键词)
121
+ 默认只返回 high + medium;用 --confidence all 看全部
122
+
123
+ \b
124
+ 输出的每个元素都有 [N] 编号,可直接 dp click "ref:N" 引用。
125
+ 输出中 ⚡ 标记 medium 置信度,? 标记 low 置信度。
126
+
127
+ \b
128
+ 示例:
129
+ dp scan # 扫全页,high + medium
130
+ dp scan --viewport # 只扫视口内
131
+ dp scan --confidence high # 只要高置信度
132
+ dp scan --confidence all # 包含 low 置信度
133
+ dp scan --format json # JSON 输出
134
+ """
135
+ # 解析 confidence 过滤
136
+ conf_str = (confidence or '').strip().lower()
137
+ if conf_str == 'all':
138
+ wanted = {'high', 'medium', 'low'}
139
+ else:
140
+ wanted = {c.strip() for c in conf_str.split(',') if c.strip()}
141
+ unknown = wanted - {'high', 'medium', 'low'}
142
+ if unknown:
143
+ error('无效的置信度值', code='INVALID_CONFIDENCE',
144
+ detail=f'unknown: {sorted(unknown)}; 可选: high, medium, low, all')
145
+ return
146
+ include_low = 'low' in wanted
147
+
148
+ page = _get_page(session)
149
+ try:
150
+ data = detect_clickables(
151
+ page,
152
+ viewport_only=viewport_only,
153
+ max_elements=max_elements,
154
+ include_low=include_low,
155
+ )
156
+ except Exception as e:
157
+ error('扫描失败', code='SCAN_FAILED', detail=str(e))
158
+ return
159
+
160
+ if data.get('method') == 'failed':
161
+ error('扫描失败', code='SCAN_FAILED',
162
+ detail=data.get('error', '未知'))
163
+ return
164
+
165
+ # 置信度过滤(JS 的 include_low 控制是否 *生成* low;
166
+ # 这里再按 wanted 精确过滤——例如用户只要 high)
167
+ filtered = [e for e in data.get('elements', [])
168
+ if e.get('confidence') in wanted]
169
+
170
+ # 分配 ref:N 并保存到 session
171
+ refs = {}
172
+ url = page.url
173
+ title = page.title
174
+ rendered_lines = []
175
+
176
+ # 头部
177
+ mode_parts = []
178
+ if viewport_only:
179
+ mode_parts.append('viewport')
180
+ mode_parts.append(f"confidence={','.join(sorted(wanted))}")
181
+ mode_tag = ', '.join(mode_parts)
182
+
183
+ rendered_lines.append(f'### Clickable Scan ({mode_tag})')
184
+ rendered_lines.append(f'- URL: {url}')
185
+ rendered_lines.append(f'- Title: {title}')
186
+ rendered_lines.append(
187
+ f'- Detected: {data.get("total", 0)} total, '
188
+ f'{len(filtered)} after filter'
189
+ + (' (truncated)' if data.get('truncated') else '')
190
+ )
191
+ rendered_lines.append('- 用 ref:N 引用元素(⚡ = medium, ? = low)')
192
+ rendered_lines.append('')
193
+
194
+ # 置信度标记
195
+ marker_map = {'high': '', 'medium': '⚡ ', 'low': '? '}
196
+
197
+ for i, rec in enumerate(filtered, start=1):
198
+ refs[str(i)] = {
199
+ 'locator': rec.get('locator') or '',
200
+ 'role': f"clickable/{rec.get('tag', '')}",
201
+ 'name': (rec.get('text') or '')[:100],
202
+ 'backendNodeId': rec.get('backendNodeId'),
203
+ 'confidence': rec.get('confidence'),
204
+ 'reason': rec.get('reason'),
205
+ }
206
+ marker = marker_map.get(rec.get('confidence'), '')
207
+ tag = rec.get('tag', '')
208
+ text = (rec.get('text') or '').strip()
209
+ reason = rec.get('reason') or ''
210
+ rect = rec.get('rect') or {}
211
+ loc = rec.get('locator') or ''
212
+
213
+ parts = [f'- [{i}] {marker}{tag}']
214
+ if text:
215
+ disp = text[:80] + '…' if len(text) > 80 else text
216
+ parts.append(f'"{disp}"')
217
+ meta_bits = [reason]
218
+ if rect.get('w'):
219
+ meta_bits.append(f'{rect["w"]}x{rect["h"]}')
220
+ meta_bits.append(f'@{rect.get("x", 0)},{rect.get("y", 0)}')
221
+ parts.append(f'({", ".join(meta_bits)})')
222
+ if loc:
223
+ parts.append(f'→ {loc}')
224
+ rendered_lines.append(' '.join(parts))
225
+
226
+ # 保存 refs 到 session
227
+ if refs:
228
+ save_refs(session, url, refs)
229
+
230
+ if fmt == 'json':
231
+ output = json.dumps({
232
+ 'status': 'ok',
233
+ 'data': {
234
+ 'page': {'url': url, 'title': title},
235
+ 'meta': {
236
+ 'total_detected': data.get('total'),
237
+ 'after_filter': len(filtered),
238
+ 'truncated': data.get('truncated'),
239
+ 'viewport_only': viewport_only,
240
+ 'confidence_filter': sorted(wanted),
241
+ },
242
+ 'elements': filtered,
243
+ }
244
+ }, ensure_ascii=False, indent=2)
245
+ else:
246
+ output = '\n'.join(rendered_lines)
247
+
248
+ if filename:
249
+ Path(filename).write_text(output, encoding='utf-8')
250
+ ok(msg=f'扫描结果已保存到 {filename}')
251
+ else:
252
+ click.echo(output)
253
+
77
254
  @cli.command('extract')
78
255
  @session_option
79
256
  @click.argument('container')
@@ -0,0 +1,28 @@
1
+ # -*- coding:utf-8 -*-
2
+ """
3
+ dp-cli snapshot 模块
4
+
5
+ 基于浏览器原生 a11y tree(无障碍树)的页面快照系统。
6
+ 通过 CDP Accessibility API 获取,通用性极强,适用于 95%+ 的网站。
7
+
8
+ 模块组成:
9
+ - a11y.py 核心:a11y tree 获取 + 多模式渲染(full/brief/text)
10
+ - clickable.py Vimium 风格可交互元素探测(补充 a11y tree 覆盖盲区)
11
+ - clickable_js.py 注入浏览器的 JS 探测脚本
12
+ - extract.py 数据提取(extract_structured / query_elements)
13
+ - utils.py 共享工具(定位器生成等)
14
+ - js_scripts.py JS 降级脚本(CDP 不可用时的 fallback)
15
+ """
16
+ from .a11y import take_a11y_snapshot, render_a11y_text, render_a11y_plain_text
17
+ from .clickable import detect_clickables, format_clickable_record
18
+ from .extract import extract_structured, query_elements
19
+
20
+ __all__ = [
21
+ 'take_a11y_snapshot',
22
+ 'render_a11y_text',
23
+ 'render_a11y_plain_text',
24
+ 'detect_clickables',
25
+ 'format_clickable_record',
26
+ 'extract_structured',
27
+ 'query_elements',
28
+ ]
@@ -50,14 +50,24 @@ _REF_CONTENT_ROLES = frozenset({
50
50
  })
51
51
 
52
52
 
53
- def take_a11y_snapshot(page, selector=None, max_depth=None) -> dict:
53
+ def take_a11y_snapshot(page, selector=None, max_depth=None,
54
+ with_clickables: bool = True,
55
+ include_low: bool = False,
56
+ viewport_only: bool = False) -> dict:
54
57
  """
55
- 获取页面 a11y tree
58
+ 获取页面 a11y tree,并(可选)合并 Vimium 风格的可点击元素探测。
56
59
 
57
60
  :param page: DrissionPage 的 ChromiumPage 对象
58
61
  :param selector: CSS 选择器,限定子树范围(可选)
59
62
  :param max_depth: 最大深度限制(可选,传给 CDP)
60
- :return: 标准化的 a11y tree 数据
63
+ :param with_clickables: True 时额外运行 clickable 探测并合并到快照;
64
+ 收集 a11y tree 漏掉的可交互元素(如纯图标按钮、
65
+ 弹窗菜单项等)
66
+ :param include_low: with_clickables=True 时,是否包含 low 置信度元素
67
+ (cursor:pointer 或 class-pattern 启发式匹配)
68
+ :param viewport_only: with_clickables=True 时,是否只探测视口内可见元素
69
+ :return: 标准化的 a11y tree 数据;若 with_clickables=True,
70
+ 额外带 'clickable_extras' 字段(补充 a11y tree 未覆盖的可交互元素)
61
71
  """
62
72
  page.wait.doc_loaded()
63
73
  page_info = {'url': page.url, 'title': page.title}
@@ -91,6 +101,42 @@ def take_a11y_snapshot(page, selector=None, max_depth=None) -> dict:
91
101
  }
92
102
  if selector_warning:
93
103
  result['warning'] = selector_warning
104
+
105
+ # ── 可选:合并 clickable 探测结果 ──
106
+ # 注意:clickable 必须自己建 bid_map —— 它的 JS 会给元素加
107
+ # data-dp-scan-id 临时属性,bid_map 必须在那之后再建才能包含 scan-id
108
+ if with_clickables:
109
+ try:
110
+ from .clickable import detect_clickables
111
+ clk = detect_clickables(
112
+ page,
113
+ viewport_only=viewport_only,
114
+ include_low=include_low,
115
+ )
116
+ # 收集 a11y tree 已覆盖的 backendNodeId(有 locator 的交互节点)
117
+ covered = {n['backendNodeId'] for n in normalized
118
+ if n.get('backendNodeId')
119
+ and n.get('locator')
120
+ and n['role'] in _INTERACTIVE_ROLES}
121
+ # 过滤出 a11y 未覆盖的元素
122
+ extras = [e for e in clk.get('elements', [])
123
+ if not (e.get('backendNodeId')
124
+ and e['backendNodeId'] in covered)]
125
+ # 过滤策略:如果有 rect 且 w/h < 2,跳过(已在 JS 过滤过,双保险)
126
+ extras = [e for e in extras
127
+ if e.get('rect') and e['rect'].get('w', 0) >= 2]
128
+ result['clickable_extras'] = extras
129
+ result['clickable_meta'] = {
130
+ 'total_detected': clk.get('total', 0),
131
+ 'covered_by_a11y': clk.get('total', 0) - len(extras),
132
+ 'extras': len(extras),
133
+ 'truncated': clk.get('truncated', False),
134
+ 'viewport_only': viewport_only,
135
+ 'include_low': include_low,
136
+ }
137
+ except Exception as ce:
138
+ result['clickable_warning'] = f'clickable 探测失败(非致命):{ce}'
139
+
94
140
  return result
95
141
  except Exception as cdp_err:
96
142
  cdp_error_msg = str(cdp_err)
@@ -170,12 +216,69 @@ def render_a11y_text(snapshot: dict, verbose: bool = False,
170
216
  else:
171
217
  lines.append('(a11y tree 为空)')
172
218
 
219
+ # ── 追加 clickable_extras(a11y tree 漏掉的可交互元素)──
220
+ extras = snapshot.get('clickable_extras') or []
221
+ if extras:
222
+ from .clickable import CONFIDENCE_MARKER
223
+ lines.append('')
224
+ meta = snapshot.get('clickable_meta') or {}
225
+ header_suffix = []
226
+ if meta.get('viewport_only'):
227
+ header_suffix.append('viewport-only')
228
+ if meta.get('include_low'):
229
+ header_suffix.append('include-low')
230
+ suffix_str = (f' — {", ".join(header_suffix)}'
231
+ if header_suffix else '')
232
+ lines.append(f'### Additional Interactive Elements'
233
+ f' (Vimium-style, not in a11y tree){suffix_str}')
234
+ lines.append(f'- 共 {len(extras)} 个;⚡ = medium 置信, ? = low 置信;'
235
+ f'用 ref:N 引用')
236
+ lines.append('')
237
+ for rec in extras:
238
+ ctx['counter'] += 1
239
+ rid = ctx['counter']
240
+ marker = CONFIDENCE_MARKER.get(rec.get('confidence'), '')
241
+ tag = rec.get('tag', '')
242
+ text = (rec.get('text') or '').strip()
243
+ reason = rec.get('reason') or ''
244
+ loc = rec.get('locator') or ''
245
+ rect = rec.get('rect') or {}
246
+
247
+ parts = [f'- [{rid}] {marker}{tag}']
248
+ if text:
249
+ display_text = text[:80] + '…' if len(text) > 80 else text
250
+ parts.append(f'"{display_text}"')
251
+ meta_parts = [reason]
252
+ if rect.get('w'):
253
+ meta_parts.append(f'{rect["w"]}x{rect["h"]}')
254
+ parts.append(f'({", ".join(meta_parts)})')
255
+ if loc:
256
+ parts.append(f'→ {loc}')
257
+ lines.append(' '.join(parts))
258
+
259
+ # 记入 refs 以便 click/fill 引用
260
+ ctx['refs'][str(rid)] = {
261
+ 'locator': loc,
262
+ 'role': f'clickable/{tag}',
263
+ 'name': text[:100],
264
+ 'backendNodeId': rec.get('backendNodeId'),
265
+ 'confidence': rec.get('confidence'),
266
+ 'reason': reason,
267
+ }
268
+
269
+ if snapshot.get('clickable_warning'):
270
+ lines.append('')
271
+ lines.append(f"⚠ {snapshot['clickable_warning']}")
272
+
173
273
  # 回填头部:包含 ref 统计
174
274
  ref_count = ctx['counter']
175
275
  lines[header_idx] = f'### Page Snapshot ({mode_label})'
176
- lines[stats_idx] = (f"- Nodes: {stats.get('total', 0)} total, "
177
- f"{stats.get('interactive', 0)} interactive, "
178
- f"{ref_count} refs")
276
+ stats_line = (f"- Nodes: {stats.get('total', 0)} total, "
277
+ f"{stats.get('interactive', 0)} interactive, "
278
+ f"{ref_count} refs")
279
+ if extras:
280
+ stats_line += f" (含 {len(extras)} 个 a11y 外可交互)"
281
+ lines[stats_idx] = stats_line
179
282
  if ref_count > 0:
180
283
  lines[stats_idx] += f" — 使用 ref:N 引用元素,如 dp click \"ref:1\""
181
284
 
@@ -195,15 +298,29 @@ def render_a11y_plain_text(snapshot: dict, refs: dict = None) -> str:
195
298
  :return: 纯文本字符串
196
299
  """
197
300
  tree = snapshot.get('tree', {})
198
- if not tree:
199
- return ''
200
301
 
201
302
  # 如果需要收集 refs,在纯文本渲染过程中顺便收集
202
303
  if refs is not None:
203
304
  ctx = {'counter': 0, 'refs': refs}
204
- _collect_refs_only(tree, ctx)
305
+ if tree:
306
+ _collect_refs_only(tree, ctx)
307
+ # 合并 clickable_extras 的 refs(与 full/brief 保持编号一致)
308
+ for rec in snapshot.get('clickable_extras') or []:
309
+ ctx['counter'] += 1
310
+ rid = ctx['counter']
311
+ ctx['refs'][str(rid)] = {
312
+ 'locator': rec.get('locator') or '',
313
+ 'role': f"clickable/{rec.get('tag', '')}",
314
+ 'name': (rec.get('text') or '')[:100],
315
+ 'backendNodeId': rec.get('backendNodeId'),
316
+ 'confidence': rec.get('confidence'),
317
+ 'reason': rec.get('reason'),
318
+ }
205
319
  refs.update(ctx['refs'])
206
320
 
321
+ if not tree:
322
+ return ''
323
+
207
324
  parts = []
208
325
  _collect_plain_text(tree, parts)
209
326
  return '\n'.join(parts)