dp-cli 0.2.0__tar.gz → 0.3.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. {dp_cli-0.2.0 → dp_cli-0.3.1}/PKG-INFO +55 -4
  2. {dp_cli-0.2.0 → dp_cli-0.3.1}/README.md +54 -3
  3. {dp_cli-0.2.0 → dp_cli-0.3.1}/dp_cli/commands/snapshot_cmd.py +173 -8
  4. dp_cli-0.3.1/dp_cli/snapshot/__init__.py +28 -0
  5. {dp_cli-0.2.0 → dp_cli-0.3.1}/dp_cli/snapshot/a11y.py +111 -9
  6. dp_cli-0.3.1/dp_cli/snapshot/clickable.py +210 -0
  7. dp_cli-0.3.1/dp_cli/snapshot/clickable_js.py +381 -0
  8. {dp_cli-0.2.0 → dp_cli-0.3.1}/dp_cli.egg-info/PKG-INFO +55 -4
  9. {dp_cli-0.2.0 → dp_cli-0.3.1}/dp_cli.egg-info/SOURCES.txt +4 -1
  10. {dp_cli-0.2.0 → dp_cli-0.3.1}/pyproject.toml +1 -1
  11. dp_cli-0.3.1/tests/test_clickable.py +174 -0
  12. dp_cli-0.2.0/dp_cli/snapshot/__init__.py +0 -23
  13. {dp_cli-0.2.0 → dp_cli-0.3.1}/dp_cli/__init__.py +0 -0
  14. {dp_cli-0.2.0 → dp_cli-0.3.1}/dp_cli/bridge.py +0 -0
  15. {dp_cli-0.2.0 → dp_cli-0.3.1}/dp_cli/bridge_manager.py +0 -0
  16. {dp_cli-0.2.0 → dp_cli-0.3.1}/dp_cli/commands/__init__.py +0 -0
  17. {dp_cli-0.2.0 → dp_cli-0.3.1}/dp_cli/commands/_utils.py +0 -0
  18. {dp_cli-0.2.0 → dp_cli-0.3.1}/dp_cli/commands/browser.py +0 -0
  19. {dp_cli-0.2.0 → dp_cli-0.3.1}/dp_cli/commands/element.py +0 -0
  20. {dp_cli-0.2.0 → dp_cli-0.3.1}/dp_cli/commands/keyboard.py +0 -0
  21. {dp_cli-0.2.0 → dp_cli-0.3.1}/dp_cli/commands/misc.py +0 -0
  22. {dp_cli-0.2.0 → dp_cli-0.3.1}/dp_cli/commands/network.py +0 -0
  23. {dp_cli-0.2.0 → dp_cli-0.3.1}/dp_cli/commands/page.py +0 -0
  24. {dp_cli-0.2.0 → dp_cli-0.3.1}/dp_cli/commands/storage.py +0 -0
  25. {dp_cli-0.2.0 → dp_cli-0.3.1}/dp_cli/commands/tab.py +0 -0
  26. {dp_cli-0.2.0 → dp_cli-0.3.1}/dp_cli/main.py +0 -0
  27. {dp_cli-0.2.0 → dp_cli-0.3.1}/dp_cli/output.py +0 -0
  28. {dp_cli-0.2.0 → dp_cli-0.3.1}/dp_cli/session.py +0 -0
  29. {dp_cli-0.2.0 → dp_cli-0.3.1}/dp_cli/snapshot/extract.py +0 -0
  30. {dp_cli-0.2.0 → dp_cli-0.3.1}/dp_cli/snapshot/js_scripts.py +0 -0
  31. {dp_cli-0.2.0 → dp_cli-0.3.1}/dp_cli/snapshot/utils.py +0 -0
  32. {dp_cli-0.2.0 → dp_cli-0.3.1}/dp_cli/stealth.py +0 -0
  33. {dp_cli-0.2.0 → dp_cli-0.3.1}/dp_cli.egg-info/dependency_links.txt +0 -0
  34. {dp_cli-0.2.0 → dp_cli-0.3.1}/dp_cli.egg-info/entry_points.txt +0 -0
  35. {dp_cli-0.2.0 → dp_cli-0.3.1}/dp_cli.egg-info/requires.txt +0 -0
  36. {dp_cli-0.2.0 → dp_cli-0.3.1}/dp_cli.egg-info/top_level.txt +0 -0
  37. {dp_cli-0.2.0 → dp_cli-0.3.1}/setup.cfg +0 -0
  38. {dp_cli-0.2.0 → dp_cli-0.3.1}/tests/test_bridge_integration.py +0 -0
  39. {dp_cli-0.2.0 → dp_cli-0.3.1}/tests/test_bridge_manager.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dp-cli
3
- Version: 0.2.0
3
+ Version: 0.3.1
4
4
  Summary: A powerful CLI for DrissionPage — browser automation, structured data extraction, network listening and more.
5
5
  License: BSD-3-Clause
6
6
  Project-URL: Homepage, https://github.com/mofanx/dp-cli
@@ -26,10 +26,15 @@ A powerful CLI for [DrissionPage](https://github.com/g1879/DrissionPage) — bro
26
26
  ## Features
27
27
 
28
28
  - **Anti-detection by default** — not based on webdriver, `navigator.webdriver` is `false`
29
- - **Reuse your own browser** — connect to a running Chrome via `--port`, keeping login state and cookies
30
- - **Powerful locator syntax** — descriptive strings stable across navigation (no ephemeral refs)
31
- - **Structured data extraction** `extract` + `query` + `snapshot --mode content` for scraping list pages
29
+ - **Reuse your own browser** — `--auto-connect` (Chrome 144+, no CLI flag needed) or `--port`
30
+ - **Hybrid snapshot** — a11y tree + Vimium-style clickable detection, catches icon-only buttons
31
+ and custom menu items the a11y tree misses; every element gets an `[N]` ref with
32
+ confidence markers (`⚡` medium, `?` low)
33
+ - **`dp scan`** — fast Vimium-style listing of interactive elements (viewport-only mode available)
34
+ - **Powerful locator syntax** — descriptive strings stable across navigation
35
+ - **Structured data extraction** — `extract` + `query` + `snapshot` for scraping list pages
32
36
  - **Network listening** — capture XHR/Fetch requests and response bodies
37
+ - **Stealth patches** — `dp stealth` bypasses common automation detections
33
38
  - **Dual mode** — browser control + pure HTTP requests
34
39
  - **Shadow-root / iframe** — traverse directly without switching context
35
40
  - **JSON output** — all commands output JSON, AI-friendly
@@ -99,6 +104,52 @@ the bridge automatically and never quits your Chrome (it's your browser, not dp'
99
104
  - Works with whatever profile Chrome is actually using — same cookies, logins, history.
100
105
  - Classic `--remote-debugging-port=9222` mode still works unchanged via `dp open --port 9222`.
101
106
 
107
+ ## Hybrid Snapshot (a11y + Vimium-style)
108
+
109
+ The default `dp snapshot` fuses two element-discovery paths:
110
+
111
+ 1. **Browser a11y tree** via CDP — the structural skeleton (headings, lists, form roles,
112
+ explicit `<a>`/`<button>`, any `role="..."` element).
113
+ 2. **Vimium-style clickable detection** — a JS probe that flags icon-only buttons,
114
+ `<div onclick>`, `[tabindex>=0]`, `aria-selected`, `cursor:pointer` elements, etc.
115
+ that the a11y tree misses.
116
+
117
+ Results are deduplicated by `backendNodeId` and rendered with confidence markers:
118
+
119
+ | Marker | Confidence | Triggers |
120
+ |--------|-----------|----------|
121
+ | none | **high** | `<a href>`, `<button>`, `<input>`, `role=button/link/...`, `contenteditable` |
122
+ | `⚡` | **medium** | `onclick` / `jsaction` / `tabindex>=0` / `aria-selected` / `<audio>/<video>`, or `cursor:pointer` + heuristic (aria-label / icon child / small square / class keyword) — **catches most React/Vue icon buttons** |
123
+ | `?` | **low** | bare `cursor:pointer` / class keyword only (no other signals); hidden unless `--include-low` |
124
+
125
+ Output includes helpful context:
126
+ - `@top-left`, `@top-right`, `@center`, `@bottom` … — position in the 9-region viewport grid
127
+ - `(icon)` — icon-only button (no visible label, has `<svg>` / `<img>` child)
128
+ - Shadow DOM is traversed automatically (open shadow roots)
129
+
130
+ Every element gets an `[N]` ref usable in any command: `dp click "ref:5"`.
131
+
132
+ ```bash
133
+ dp snapshot # a11y + clickable (default); high + medium markers
134
+ dp snapshot --viewport-only # clickable probe limited to viewport (faster)
135
+ dp snapshot --include-low # also surface `?` low-confidence heuristics
136
+ dp snapshot --no-clickables # a11y tree only (legacy behavior)
137
+ ```
138
+
139
+ ### `dp scan` — fast clickable-only listing
140
+
141
+ When you only need "what can I click next?" without the full a11y tree:
142
+
143
+ ```bash
144
+ dp scan # full page, high+medium
145
+ dp scan --viewport # only elements currently in viewport
146
+ dp scan --confidence all # include low-confidence heuristics
147
+ dp scan --confidence high # only the sure-thing clickables
148
+ ```
149
+
150
+ Both `snapshot` and `scan` share the same `[N]` ref numbering per session, so
151
+ `dp click "ref:N"` works regardless of which one produced the snapshot.
152
+
102
153
  ## Anti-Detection (stealth)
103
154
 
104
155
  Bypass `navigator.webdriver`, `HeadlessChrome` UA, empty `plugins`, SwiftShader WebGL,
@@ -5,10 +5,15 @@ A powerful CLI for [DrissionPage](https://github.com/g1879/DrissionPage) — bro
5
5
  ## Features
6
6
 
7
7
  - **Anti-detection by default** — not based on webdriver, `navigator.webdriver` is `false`
8
- - **Reuse your own browser** — connect to a running Chrome via `--port`, keeping login state and cookies
9
- - **Powerful locator syntax** — descriptive strings stable across navigation (no ephemeral refs)
10
- - **Structured data extraction** `extract` + `query` + `snapshot --mode content` for scraping list pages
8
+ - **Reuse your own browser** — `--auto-connect` (Chrome 144+, no CLI flag needed) or `--port`
9
+ - **Hybrid snapshot** — a11y tree + Vimium-style clickable detection, catches icon-only buttons
10
+ and custom menu items the a11y tree misses; every element gets an `[N]` ref with
11
+ confidence markers (`⚡` medium, `?` low)
12
+ - **`dp scan`** — fast Vimium-style listing of interactive elements (viewport-only mode available)
13
+ - **Powerful locator syntax** — descriptive strings stable across navigation
14
+ - **Structured data extraction** — `extract` + `query` + `snapshot` for scraping list pages
11
15
  - **Network listening** — capture XHR/Fetch requests and response bodies
16
+ - **Stealth patches** — `dp stealth` bypasses common automation detections
12
17
  - **Dual mode** — browser control + pure HTTP requests
13
18
  - **Shadow-root / iframe** — traverse directly without switching context
14
19
  - **JSON output** — all commands output JSON, AI-friendly
@@ -78,6 +83,52 @@ the bridge automatically and never quits your Chrome (it's your browser, not dp'
78
83
  - Works with whatever profile Chrome is actually using — same cookies, logins, history.
79
84
  - Classic `--remote-debugging-port=9222` mode still works unchanged via `dp open --port 9222`.
80
85
 
86
+ ## Hybrid Snapshot (a11y + Vimium-style)
87
+
88
+ The default `dp snapshot` fuses two element-discovery paths:
89
+
90
+ 1. **Browser a11y tree** via CDP — the structural skeleton (headings, lists, form roles,
91
+ explicit `<a>`/`<button>`, any `role="..."` element).
92
+ 2. **Vimium-style clickable detection** — a JS probe that flags icon-only buttons,
93
+ `<div onclick>`, `[tabindex>=0]`, `aria-selected`, `cursor:pointer` elements, etc.
94
+ that the a11y tree misses.
95
+
96
+ Results are deduplicated by `backendNodeId` and rendered with confidence markers:
97
+
98
+ | Marker | Confidence | Triggers |
99
+ |--------|-----------|----------|
100
+ | none | **high** | `<a href>`, `<button>`, `<input>`, `role=button/link/...`, `contenteditable` |
101
+ | `⚡` | **medium** | `onclick` / `jsaction` / `tabindex>=0` / `aria-selected` / `<audio>/<video>`, or `cursor:pointer` + heuristic (aria-label / icon child / small square / class keyword) — **catches most React/Vue icon buttons** |
102
+ | `?` | **low** | bare `cursor:pointer` / class keyword only (no other signals); hidden unless `--include-low` |
103
+
104
+ Output includes helpful context:
105
+ - `@top-left`, `@top-right`, `@center`, `@bottom` … — position in the 9-region viewport grid
106
+ - `(icon)` — icon-only button (no visible label, has `<svg>` / `<img>` child)
107
+ - Shadow DOM is traversed automatically (open shadow roots)
108
+
109
+ Every element gets an `[N]` ref usable in any command: `dp click "ref:5"`.
110
+
111
+ ```bash
112
+ dp snapshot # a11y + clickable (default); high + medium markers
113
+ dp snapshot --viewport-only # clickable probe limited to viewport (faster)
114
+ dp snapshot --include-low # also surface `?` low-confidence heuristics
115
+ dp snapshot --no-clickables # a11y tree only (legacy behavior)
116
+ ```
117
+
118
+ ### `dp scan` — fast clickable-only listing
119
+
120
+ When you only need "what can I click next?" without the full a11y tree:
121
+
122
+ ```bash
123
+ dp scan # full page, high+medium
124
+ dp scan --viewport # only elements currently in viewport
125
+ dp scan --confidence all # include low-confidence heuristics
126
+ dp scan --confidence high # only the sure-thing clickables
127
+ ```
128
+
129
+ Both `snapshot` and `scan` share the same `[N]` ref numbering per session, so
130
+ `dp click "ref:N"` works regardless of which one produced the snapshot.
131
+
81
132
  ## Anti-Detection (stealth)
82
133
 
83
134
  Bypass `navigator.webdriver`, `HeadlessChrome` UA, empty `plugins`, SwiftShader WebGL,
@@ -9,7 +9,7 @@ from dp_cli.output import ok, error
9
9
  from dp_cli.session import save_refs
10
10
  from dp_cli.snapshot import (extract_structured, query_elements,
11
11
  take_a11y_snapshot, render_a11y_text,
12
- render_a11y_plain_text)
12
+ render_a11y_plain_text, detect_clickables)
13
13
  from dp_cli.snapshot.utils import suggest_locator
14
14
  from dp_cli.commands._utils import session_option, _get_page, records_to_csv, resolve_locator
15
15
 
@@ -25,8 +25,15 @@ def register(cli):
25
25
  @click.option('--format', 'fmt', type=click.Choice(['json', 'text']),
26
26
  default='text', show_default=True, help='输出格式')
27
27
  @click.option('--filename', default=None, help='保存到文件路径')
28
- def snapshot(session, mode, selector, fmt, filename):
29
- """获取页面快照(基于浏览器原生 a11y tree,通用性极强)。
28
+ @click.option('--no-clickables', is_flag=True, default=False,
29
+ help='禁用 Vimium 风格可交互元素补充探测(默认开启)')
30
+ @click.option('--include-low', is_flag=True, default=False,
31
+ help='包含 low 置信度元素(cursor:pointer / class 规则匹配,可能假阳性)')
32
+ @click.option('--viewport-only', is_flag=True, default=False,
33
+ help='补充探测只看视口内元素(省 token、更快)')
34
+ def snapshot(session, mode, selector, fmt, filename,
35
+ no_clickables, include_low, viewport_only):
36
+ """获取页面快照(a11y tree + Vimium 风格可交互元素补充)。
30
37
 
31
38
  \b
32
39
  模式说明(默认 full):
@@ -34,18 +41,30 @@ def register(cli):
34
41
  brief 精简模式,保留结构+交互,截断长文本(省 token)
35
42
  text 纯文本模式,按阅读顺序输出可见文本
36
43
 
44
+ \b
45
+ 可交互元素补充探测(默认开启):
46
+ a11y tree 会漏掉纯图标按钮、弹窗菜单项等未标 ARIA role 的元素。
47
+ 补充探测会扫描 DOM,按 Vimium 规则识别这类元素并给出 ref:N 引用。
48
+ 输出中以 ⚡ 标记 medium 置信度,? 标记 low 置信度。
49
+
37
50
  \b
38
51
  示例:
39
- dp snapshot # 完整快照(推荐首次调用)
52
+ dp snapshot # 完整快照(默认含 clickable 补充)
40
53
  dp snapshot --mode brief # 精简模式(省 token,适合循环调用)
41
- dp snapshot --mode text # 纯文本(全量文字内容)
42
- dp snapshot --selector ".main" # 只获取指定区域
43
- dp snapshot --format json # JSON 格式输出
54
+ dp snapshot --viewport-only # 只扫视口内,更快
55
+ dp snapshot --include-low # 启用 low 置信度(可能假阳性)
56
+ dp snapshot --no-clickables # a11y tree,旧版本行为
57
+ dp snapshot --selector ".main" # 只获取指定区域
44
58
  """
45
59
  page = _get_page(session)
46
60
 
47
61
  try:
48
- data = take_a11y_snapshot(page, selector=selector)
62
+ data = take_a11y_snapshot(
63
+ page, selector=selector,
64
+ with_clickables=not no_clickables,
65
+ include_low=include_low,
66
+ viewport_only=viewport_only,
67
+ )
49
68
  except Exception as e:
50
69
  error('获取页面快照失败', code='SNAPSHOT_FAILED', detail=str(e))
51
70
  return
@@ -74,6 +93,152 @@ def register(cli):
74
93
  else:
75
94
  click.echo(output)
76
95
 
96
+ @cli.command('scan')
97
+ @session_option
98
+ @click.option('--viewport', 'viewport_only', is_flag=True, default=False,
99
+ help='只扫描视口内元素(更快、更少结果)')
100
+ @click.option('--confidence', default='high,medium', show_default=True,
101
+ help='逗号分隔的置信度过滤,可选 high / medium / low;'
102
+ '使用 "all" 等价于 high,medium,low')
103
+ @click.option('--max', 'max_elements', default=1000, show_default=True,
104
+ help='最多返回多少个元素')
105
+ @click.option('--format', 'fmt', type=click.Choice(['text', 'json']),
106
+ default='text', show_default=True, help='输出格式')
107
+ @click.option('--filename', default=None, help='保存到文件路径')
108
+ @click.option('--verbose', '-v', is_flag=True, default=False,
109
+ help='显示 detection reason 和像素尺寸(调试用)')
110
+ def scan(session, viewport_only, confidence, max_elements, fmt, filename, verbose):
111
+ """Vimium 风格扫描当前页面的可交互元素(纯 DOM 遍历,不依赖 a11y tree)。
112
+
113
+ \b
114
+ 与 dp snapshot 的区别:
115
+ snapshot 返回完整 a11y tree + clickable 补充(大、慢、全面)
116
+ scan 只返回可交互元素清单(小、快、适合执行脚本)
117
+
118
+ \b
119
+ 置信度分级:
120
+ high 明确可点击(<a href>, <button>, role=button 等)
121
+ medium 很可能可点击(onclick / jsaction / tabindex / aria-selected)
122
+ low 启发式匹配(cursor:pointer / class 含 btn/click/… 关键词)
123
+ 默认只返回 high + medium;用 --confidence all 看全部
124
+
125
+ \b
126
+ 输出的每个元素都有 [N] 编号,可直接 dp click "ref:N" 引用。
127
+ 输出中 ⚡ 标记 medium 置信度,? 标记 low 置信度。
128
+
129
+ \b
130
+ 示例:
131
+ dp scan # 扫全页,high + medium
132
+ dp scan --viewport # 只扫视口内
133
+ dp scan --confidence high # 只要高置信度
134
+ dp scan --confidence all # 包含 low 置信度
135
+ dp scan --format json # JSON 输出
136
+ """
137
+ # 解析 confidence 过滤
138
+ conf_str = (confidence or '').strip().lower()
139
+ if conf_str == 'all':
140
+ wanted = {'high', 'medium', 'low'}
141
+ else:
142
+ wanted = {c.strip() for c in conf_str.split(',') if c.strip()}
143
+ unknown = wanted - {'high', 'medium', 'low'}
144
+ if unknown:
145
+ error('无效的置信度值', code='INVALID_CONFIDENCE',
146
+ detail=f'unknown: {sorted(unknown)}; 可选: high, medium, low, all')
147
+ return
148
+ include_low = 'low' in wanted
149
+
150
+ page = _get_page(session)
151
+ try:
152
+ data = detect_clickables(
153
+ page,
154
+ viewport_only=viewport_only,
155
+ max_elements=max_elements,
156
+ include_low=include_low,
157
+ )
158
+ except Exception as e:
159
+ error('扫描失败', code='SCAN_FAILED', detail=str(e))
160
+ return
161
+
162
+ if data.get('method') == 'failed':
163
+ error('扫描失败', code='SCAN_FAILED',
164
+ detail=data.get('error', '未知'))
165
+ return
166
+
167
+ # 置信度过滤(JS 的 include_low 控制是否 *生成* low;
168
+ # 这里再按 wanted 精确过滤——例如用户只要 high)
169
+ filtered = [e for e in data.get('elements', [])
170
+ if e.get('confidence') in wanted]
171
+
172
+ # 分配 ref:N 并保存到 session
173
+ refs = {}
174
+ url = page.url
175
+ title = page.title
176
+ rendered_lines = []
177
+
178
+ # 头部
179
+ mode_parts = []
180
+ if viewport_only:
181
+ mode_parts.append('viewport')
182
+ mode_parts.append(f"confidence={','.join(sorted(wanted))}")
183
+ mode_tag = ', '.join(mode_parts)
184
+
185
+ rendered_lines.append(f'### Clickable Scan ({mode_tag})')
186
+ rendered_lines.append(f'- URL: {url}')
187
+ rendered_lines.append(f'- Title: {title}')
188
+ rendered_lines.append(
189
+ f'- Detected: {data.get("total", 0)} total, '
190
+ f'{len(filtered)} after filter'
191
+ + (' (truncated)' if data.get('truncated') else '')
192
+ )
193
+ rendered_lines.append(
194
+ '- ⚡ = medium, ? = low;@zone = 位置区域(top-left/top-right/… 9 宫格);'
195
+ '(icon) = 仅图标按钮;用 ref:N 引用'
196
+ )
197
+ rendered_lines.append('')
198
+
199
+ from ..snapshot.clickable import format_clickable_record
200
+
201
+ for i, rec in enumerate(filtered, start=1):
202
+ refs[str(i)] = {
203
+ 'locator': rec.get('locator') or '',
204
+ 'role': f"clickable/{rec.get('tag', '')}",
205
+ 'name': (rec.get('label') or rec.get('text') or '')[:100],
206
+ 'backendNodeId': rec.get('backendNodeId'),
207
+ 'confidence': rec.get('confidence'),
208
+ 'reason': rec.get('reason'),
209
+ 'zone': rec.get('zone'),
210
+ 'iconOnly': bool(rec.get('iconOnly')),
211
+ }
212
+ rendered_lines.append('- ' + format_clickable_record(rec, i, verbose=verbose))
213
+
214
+ # 保存 refs 到 session
215
+ if refs:
216
+ save_refs(session, url, refs)
217
+
218
+ if fmt == 'json':
219
+ output = json.dumps({
220
+ 'status': 'ok',
221
+ 'data': {
222
+ 'page': {'url': url, 'title': title},
223
+ 'meta': {
224
+ 'total_detected': data.get('total'),
225
+ 'after_filter': len(filtered),
226
+ 'truncated': data.get('truncated'),
227
+ 'viewport_only': viewport_only,
228
+ 'confidence_filter': sorted(wanted),
229
+ },
230
+ 'elements': filtered,
231
+ }
232
+ }, ensure_ascii=False, indent=2)
233
+ else:
234
+ output = '\n'.join(rendered_lines)
235
+
236
+ if filename:
237
+ Path(filename).write_text(output, encoding='utf-8')
238
+ ok(msg=f'扫描结果已保存到 {filename}')
239
+ else:
240
+ click.echo(output)
241
+
77
242
  @cli.command('extract')
78
243
  @session_option
79
244
  @click.argument('container')
@@ -0,0 +1,28 @@
1
+ # -*- coding:utf-8 -*-
2
+ """
3
+ dp-cli snapshot 模块
4
+
5
+ 基于浏览器原生 a11y tree(无障碍树)的页面快照系统。
6
+ 通过 CDP Accessibility API 获取,通用性极强,适用于 95%+ 的网站。
7
+
8
+ 模块组成:
9
+ - a11y.py 核心:a11y tree 获取 + 多模式渲染(full/brief/text)
10
+ - clickable.py Vimium 风格可交互元素探测(补充 a11y tree 覆盖盲区)
11
+ - clickable_js.py 注入浏览器的 JS 探测脚本
12
+ - extract.py 数据提取(extract_structured / query_elements)
13
+ - utils.py 共享工具(定位器生成等)
14
+ - js_scripts.py JS 降级脚本(CDP 不可用时的 fallback)
15
+ """
16
+ from .a11y import take_a11y_snapshot, render_a11y_text, render_a11y_plain_text
17
+ from .clickable import detect_clickables, format_clickable_record
18
+ from .extract import extract_structured, query_elements
19
+
20
+ __all__ = [
21
+ 'take_a11y_snapshot',
22
+ 'render_a11y_text',
23
+ 'render_a11y_plain_text',
24
+ 'detect_clickables',
25
+ 'format_clickable_record',
26
+ 'extract_structured',
27
+ 'query_elements',
28
+ ]
@@ -50,14 +50,24 @@ _REF_CONTENT_ROLES = frozenset({
50
50
  })
51
51
 
52
52
 
53
- def take_a11y_snapshot(page, selector=None, max_depth=None) -> dict:
53
+ def take_a11y_snapshot(page, selector=None, max_depth=None,
54
+ with_clickables: bool = True,
55
+ include_low: bool = False,
56
+ viewport_only: bool = False) -> dict:
54
57
  """
55
- 获取页面 a11y tree
58
+ 获取页面 a11y tree,并(可选)合并 Vimium 风格的可点击元素探测。
56
59
 
57
60
  :param page: DrissionPage 的 ChromiumPage 对象
58
61
  :param selector: CSS 选择器,限定子树范围(可选)
59
62
  :param max_depth: 最大深度限制(可选,传给 CDP)
60
- :return: 标准化的 a11y tree 数据
63
+ :param with_clickables: True 时额外运行 clickable 探测并合并到快照;
64
+ 收集 a11y tree 漏掉的可交互元素(如纯图标按钮、
65
+ 弹窗菜单项等)
66
+ :param include_low: with_clickables=True 时,是否包含 low 置信度元素
67
+ (cursor:pointer 或 class-pattern 启发式匹配)
68
+ :param viewport_only: with_clickables=True 时,是否只探测视口内可见元素
69
+ :return: 标准化的 a11y tree 数据;若 with_clickables=True,
70
+ 额外带 'clickable_extras' 字段(补充 a11y tree 未覆盖的可交互元素)
61
71
  """
62
72
  page.wait.doc_loaded()
63
73
  page_info = {'url': page.url, 'title': page.title}
@@ -91,6 +101,42 @@ def take_a11y_snapshot(page, selector=None, max_depth=None) -> dict:
91
101
  }
92
102
  if selector_warning:
93
103
  result['warning'] = selector_warning
104
+
105
+ # ── 可选:合并 clickable 探测结果 ──
106
+ # 注意:clickable 必须自己建 bid_map —— 它的 JS 会给元素加
107
+ # data-dp-scan-id 临时属性,bid_map 必须在那之后再建才能包含 scan-id
108
+ if with_clickables:
109
+ try:
110
+ from .clickable import detect_clickables
111
+ clk = detect_clickables(
112
+ page,
113
+ viewport_only=viewport_only,
114
+ include_low=include_low,
115
+ )
116
+ # 收集 a11y tree 已覆盖的 backendNodeId(有 locator 的交互节点)
117
+ covered = {n['backendNodeId'] for n in normalized
118
+ if n.get('backendNodeId')
119
+ and n.get('locator')
120
+ and n['role'] in _INTERACTIVE_ROLES}
121
+ # 过滤出 a11y 未覆盖的元素
122
+ extras = [e for e in clk.get('elements', [])
123
+ if not (e.get('backendNodeId')
124
+ and e['backendNodeId'] in covered)]
125
+ # 过滤策略:如果有 rect 且 w/h < 2,跳过(已在 JS 过滤过,双保险)
126
+ extras = [e for e in extras
127
+ if e.get('rect') and e['rect'].get('w', 0) >= 2]
128
+ result['clickable_extras'] = extras
129
+ result['clickable_meta'] = {
130
+ 'total_detected': clk.get('total', 0),
131
+ 'covered_by_a11y': clk.get('total', 0) - len(extras),
132
+ 'extras': len(extras),
133
+ 'truncated': clk.get('truncated', False),
134
+ 'viewport_only': viewport_only,
135
+ 'include_low': include_low,
136
+ }
137
+ except Exception as ce:
138
+ result['clickable_warning'] = f'clickable 探测失败(非致命):{ce}'
139
+
94
140
  return result
95
141
  except Exception as cdp_err:
96
142
  cdp_error_msg = str(cdp_err)
@@ -170,12 +216,54 @@ def render_a11y_text(snapshot: dict, verbose: bool = False,
170
216
  else:
171
217
  lines.append('(a11y tree 为空)')
172
218
 
219
+ # ── 追加 clickable_extras(a11y tree 漏掉的可交互元素)──
220
+ extras = snapshot.get('clickable_extras') or []
221
+ if extras:
222
+ from .clickable import format_clickable_record
223
+ lines.append('')
224
+ meta = snapshot.get('clickable_meta') or {}
225
+ header_suffix = []
226
+ if meta.get('viewport_only'):
227
+ header_suffix.append('viewport-only')
228
+ if meta.get('include_low'):
229
+ header_suffix.append('include-low')
230
+ suffix_str = (f' — {", ".join(header_suffix)}'
231
+ if header_suffix else '')
232
+ lines.append(f'### Additional Interactive Elements'
233
+ f' (Vimium-style, not in a11y tree){suffix_str}')
234
+ lines.append(f'- 共 {len(extras)} 个;⚡ = medium 置信, ? = low 置信;'
235
+ f'@zone=位置区域(top-left/top-right/center/… 9 宫格);'
236
+ f'(icon)=仅图标;用 ref:N 引用')
237
+ lines.append('')
238
+ for rec in extras:
239
+ ctx['counter'] += 1
240
+ rid = ctx['counter']
241
+ lines.append('- ' + format_clickable_record(rec, rid))
242
+ # 记入 refs 以便 click/fill 引用
243
+ ctx['refs'][str(rid)] = {
244
+ 'locator': rec.get('locator') or '',
245
+ 'role': f"clickable/{rec.get('tag', '')}",
246
+ 'name': (rec.get('label') or rec.get('text') or '')[:100],
247
+ 'backendNodeId': rec.get('backendNodeId'),
248
+ 'confidence': rec.get('confidence'),
249
+ 'reason': rec.get('reason'),
250
+ 'zone': rec.get('zone'),
251
+ 'iconOnly': bool(rec.get('iconOnly')),
252
+ }
253
+
254
+ if snapshot.get('clickable_warning'):
255
+ lines.append('')
256
+ lines.append(f"⚠ {snapshot['clickable_warning']}")
257
+
173
258
  # 回填头部:包含 ref 统计
174
259
  ref_count = ctx['counter']
175
260
  lines[header_idx] = f'### Page Snapshot ({mode_label})'
176
- lines[stats_idx] = (f"- Nodes: {stats.get('total', 0)} total, "
177
- f"{stats.get('interactive', 0)} interactive, "
178
- f"{ref_count} refs")
261
+ stats_line = (f"- Nodes: {stats.get('total', 0)} total, "
262
+ f"{stats.get('interactive', 0)} interactive, "
263
+ f"{ref_count} refs")
264
+ if extras:
265
+ stats_line += f" (含 {len(extras)} 个 a11y 外可交互)"
266
+ lines[stats_idx] = stats_line
179
267
  if ref_count > 0:
180
268
  lines[stats_idx] += f" — 使用 ref:N 引用元素,如 dp click \"ref:1\""
181
269
 
@@ -195,15 +283,29 @@ def render_a11y_plain_text(snapshot: dict, refs: dict = None) -> str:
195
283
  :return: 纯文本字符串
196
284
  """
197
285
  tree = snapshot.get('tree', {})
198
- if not tree:
199
- return ''
200
286
 
201
287
  # 如果需要收集 refs,在纯文本渲染过程中顺便收集
202
288
  if refs is not None:
203
289
  ctx = {'counter': 0, 'refs': refs}
204
- _collect_refs_only(tree, ctx)
290
+ if tree:
291
+ _collect_refs_only(tree, ctx)
292
+ # 合并 clickable_extras 的 refs(与 full/brief 保持编号一致)
293
+ for rec in snapshot.get('clickable_extras') or []:
294
+ ctx['counter'] += 1
295
+ rid = ctx['counter']
296
+ ctx['refs'][str(rid)] = {
297
+ 'locator': rec.get('locator') or '',
298
+ 'role': f"clickable/{rec.get('tag', '')}",
299
+ 'name': (rec.get('text') or '')[:100],
300
+ 'backendNodeId': rec.get('backendNodeId'),
301
+ 'confidence': rec.get('confidence'),
302
+ 'reason': rec.get('reason'),
303
+ }
205
304
  refs.update(ctx['refs'])
206
305
 
306
+ if not tree:
307
+ return ''
308
+
207
309
  parts = []
208
310
  _collect_plain_text(tree, parts)
209
311
  return '\n'.join(parts)