dp-cli 0.1.1__tar.gz → 0.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. dp_cli-0.3.0/PKG-INFO +232 -0
  2. dp_cli-0.3.0/README.md +211 -0
  3. dp_cli-0.3.0/dp_cli/bridge.py +500 -0
  4. dp_cli-0.3.0/dp_cli/bridge_manager.py +219 -0
  5. dp_cli-0.3.0/dp_cli/commands/_utils.py +197 -0
  6. dp_cli-0.3.0/dp_cli/commands/browser.py +316 -0
  7. {dp_cli-0.1.1 → dp_cli-0.3.0}/dp_cli/commands/element.py +35 -1
  8. dp_cli-0.3.0/dp_cli/commands/keyboard.py +225 -0
  9. {dp_cli-0.1.1 → dp_cli-0.3.0}/dp_cli/commands/page.py +36 -12
  10. {dp_cli-0.1.1 → dp_cli-0.3.0}/dp_cli/commands/snapshot_cmd.py +187 -10
  11. {dp_cli-0.1.1 → dp_cli-0.3.0}/dp_cli/commands/tab.py +1 -1
  12. dp_cli-0.3.0/dp_cli/session.py +414 -0
  13. dp_cli-0.3.0/dp_cli/snapshot/__init__.py +28 -0
  14. {dp_cli-0.1.1 → dp_cli-0.3.0}/dp_cli/snapshot/a11y.py +126 -9
  15. dp_cli-0.3.0/dp_cli/snapshot/clickable.py +197 -0
  16. dp_cli-0.3.0/dp_cli/snapshot/clickable_js.py +273 -0
  17. dp_cli-0.3.0/dp_cli/stealth.py +368 -0
  18. dp_cli-0.3.0/dp_cli.egg-info/PKG-INFO +232 -0
  19. {dp_cli-0.1.1 → dp_cli-0.3.0}/dp_cli.egg-info/SOURCES.txt +9 -1
  20. dp_cli-0.3.0/dp_cli.egg-info/requires.txt +5 -0
  21. {dp_cli-0.1.1 → dp_cli-0.3.0}/pyproject.toml +4 -1
  22. dp_cli-0.3.0/tests/test_bridge_integration.py +210 -0
  23. dp_cli-0.3.0/tests/test_bridge_manager.py +166 -0
  24. dp_cli-0.3.0/tests/test_clickable.py +130 -0
  25. dp_cli-0.1.1/PKG-INFO +0 -103
  26. dp_cli-0.1.1/README.md +0 -85
  27. dp_cli-0.1.1/dp_cli/commands/_utils.py +0 -107
  28. dp_cli-0.1.1/dp_cli/commands/browser.py +0 -159
  29. dp_cli-0.1.1/dp_cli/commands/keyboard.py +0 -126
  30. dp_cli-0.1.1/dp_cli/session.py +0 -218
  31. dp_cli-0.1.1/dp_cli/snapshot/__init__.py +0 -23
  32. dp_cli-0.1.1/dp_cli.egg-info/PKG-INFO +0 -103
  33. dp_cli-0.1.1/dp_cli.egg-info/requires.txt +0 -2
  34. {dp_cli-0.1.1 → dp_cli-0.3.0}/dp_cli/__init__.py +0 -0
  35. {dp_cli-0.1.1 → dp_cli-0.3.0}/dp_cli/commands/__init__.py +0 -0
  36. {dp_cli-0.1.1 → dp_cli-0.3.0}/dp_cli/commands/misc.py +0 -0
  37. {dp_cli-0.1.1 → dp_cli-0.3.0}/dp_cli/commands/network.py +0 -0
  38. {dp_cli-0.1.1 → dp_cli-0.3.0}/dp_cli/commands/storage.py +0 -0
  39. {dp_cli-0.1.1 → dp_cli-0.3.0}/dp_cli/main.py +0 -0
  40. {dp_cli-0.1.1 → dp_cli-0.3.0}/dp_cli/output.py +0 -0
  41. {dp_cli-0.1.1 → dp_cli-0.3.0}/dp_cli/snapshot/extract.py +0 -0
  42. {dp_cli-0.1.1 → dp_cli-0.3.0}/dp_cli/snapshot/js_scripts.py +0 -0
  43. {dp_cli-0.1.1 → dp_cli-0.3.0}/dp_cli/snapshot/utils.py +0 -0
  44. {dp_cli-0.1.1 → dp_cli-0.3.0}/dp_cli.egg-info/dependency_links.txt +0 -0
  45. {dp_cli-0.1.1 → dp_cli-0.3.0}/dp_cli.egg-info/entry_points.txt +0 -0
  46. {dp_cli-0.1.1 → dp_cli-0.3.0}/dp_cli.egg-info/top_level.txt +0 -0
  47. {dp_cli-0.1.1 → dp_cli-0.3.0}/setup.cfg +0 -0
dp_cli-0.3.0/PKG-INFO ADDED
@@ -0,0 +1,232 @@
1
+ Metadata-Version: 2.4
2
+ Name: dp-cli
3
+ Version: 0.3.0
4
+ Summary: A powerful CLI for DrissionPage — browser automation, structured data extraction, network listening and more.
5
+ License: BSD-3-Clause
6
+ Project-URL: Homepage, https://github.com/mofanx/dp-cli
7
+ Project-URL: Repository, https://github.com/mofanx/dp-cli
8
+ Keywords: drissionpage,browser,automation,cli,web-scraping
9
+ Classifier: Programming Language :: Python :: 3
10
+ Classifier: Development Status :: 3 - Alpha
11
+ Classifier: Environment :: Console
12
+ Classifier: Topic :: Utilities
13
+ Classifier: Topic :: Internet :: WWW/HTTP :: Browsers
14
+ Requires-Python: >=3.8
15
+ Description-Content-Type: text/markdown
16
+ Requires-Dist: DrissionPage>=4.0
17
+ Requires-Dist: click>=8.0
18
+ Requires-Dist: aiohttp>=3.9
19
+ Requires-Dist: websockets>=12
20
+ Requires-Dist: requests>=2.28
21
+
22
+ # dp-cli
23
+
24
+ A powerful CLI for [DrissionPage](https://github.com/g1879/DrissionPage) — browser automation, structured data extraction, network listening and more.
25
+
26
+ ## Features
27
+
28
+ - **Anti-detection by default** — not based on webdriver, `navigator.webdriver` is `false`
29
+ - **Reuse your own browser** — `--auto-connect` (Chrome 144+, no CLI flag needed) or `--port`
30
+ - **Hybrid snapshot** — a11y tree + Vimium-style clickable detection, catches icon-only buttons
31
+ and custom menu items the a11y tree misses; every element gets an `[N]` ref with
32
+ confidence markers (`⚡` medium, `?` low)
33
+ - **`dp scan`** — fast Vimium-style listing of interactive elements (viewport-only mode available)
34
+ - **Powerful locator syntax** — descriptive strings stable across navigation
35
+ - **Structured data extraction** — `extract` + `query` + `snapshot` for scraping list pages
36
+ - **Network listening** — capture XHR/Fetch requests and response bodies
37
+ - **Stealth patches** — `dp stealth` bypasses common automation detections
38
+ - **Dual mode** — browser control + pure HTTP requests
39
+ - **Shadow-root / iframe** — traverse directly without switching context
40
+ - **JSON output** — all commands output JSON, AI-friendly
41
+
42
+ ## Installation
43
+
44
+ ```bash
45
+ pip install dp-cli
46
+ dp --help
47
+ ```
48
+
49
+ ## Quick Start
50
+
51
+ ```bash
52
+ # Auto-managed browser
53
+ dp open https://example.com
54
+ dp snapshot
55
+ dp click "text:Login"
56
+ dp fill "@name=username" admin
57
+ dp press Enter
58
+ dp close
59
+
60
+ # Connect to your own logged-in browser
61
+ google-chrome --remote-debugging-port=9222
62
+ dp open https://example.com --port 9222
63
+ dp snapshot
64
+ ```
65
+
66
+ ## Connect to a Normally-Launched Chrome (Chrome 144+)
67
+
68
+ No `--remote-debugging-port` required. Chrome 144+ exposes opt-in remote debugging
69
+ via `chrome://inspect`:
70
+
71
+ 1. Open your Chrome as usual (no special flags)
72
+ 2. Visit `chrome://inspect/#remote-debugging`
73
+ 3. Check **"Allow remote debugging for this browser instance"**
74
+ 4. Run `dp open --auto-connect`
75
+
76
+ ```bash
77
+ dp open --auto-connect # stable channel, default profile
78
+ dp open --auto-connect --channel beta # pick a different channel
79
+ dp open --auto-connect --probe-dir ~/my-profile # custom user-data-dir
80
+ ```
81
+
82
+ ### How it works
83
+
84
+ Chrome 144+ in this mode exposes **only** a browser-level WebSocket and omits the HTTP
85
+ REST API (`/json`, `/json/version`, ...) that DrissionPage / puppeteer / Playwright
86
+ depend on. `dp-cli` transparently handles this:
87
+
88
+ 1. Reads `DevToolsActivePort` from the user-data-dir → real CDP port
89
+ 2. Probes the port — if `/json/version` is missing, identifies this as inspect mode
90
+ 3. Spawns a local bridge (`python -m dp_cli.bridge`) that:
91
+ - Synthesizes the missing HTTP endpoints from CDP calls
92
+ - Multiplexes page-level CDP traffic over a single browser-level WebSocket
93
+ via `Target.attachToTarget(flatten=True)`
94
+ 4. Points DrissionPage at the bridge. Subsequent `dp` commands reuse the same bridge.
95
+
96
+ The bridge subprocess and its port are tracked in the session file; `dp close` stops
97
+ the bridge automatically and never quits your Chrome (it's your browser, not dp's).
98
+
99
+ ### Caveats
100
+
101
+ - Chrome always shows an **"Allow remote debugging"** dialog per new WebSocket client.
102
+ Since bridge maintains one WebSocket and dp commands share it, you confirm at most
103
+ once per `dp open --auto-connect`.
104
+ - Works with whatever profile Chrome is actually using — same cookies, logins, history.
105
+ - Classic `--remote-debugging-port=9222` mode still works unchanged via `dp open --port 9222`.
106
+
107
+ ## Hybrid Snapshot (a11y + Vimium-style)
108
+
109
+ The default `dp snapshot` fuses two element-discovery paths:
110
+
111
+ 1. **Browser a11y tree** via CDP — the structural skeleton (headings, lists, form roles,
112
+ explicit `<a>`/`<button>`, any `role="..."` element).
113
+ 2. **Vimium-style clickable detection** — a JS probe that flags icon-only buttons,
114
+ `<div onclick>`, `[tabindex>=0]`, `aria-selected`, `cursor:pointer` elements, etc.
115
+ that the a11y tree misses.
116
+
117
+ Results are deduplicated by `backendNodeId` and rendered with confidence markers:
118
+
119
+ | Marker | Confidence | Triggers |
120
+ |--------|-----------|----------|
121
+ | none | **high** | `<a href>`, `<button>`, `<input>`, `role=button/link/...`, `contenteditable` |
122
+ | `⚡` | **medium** | `onclick` / `jsaction` / `tabindex>=0` / `aria-selected` / `<audio>/<video>` |
123
+ | `?` | **low** | `cursor:pointer` / class keyword match (`btn` / `click` / `toggle` / …) |
124
+
125
+ Every element gets an `[N]` ref usable in any command: `dp click "ref:5"`.
126
+
127
+ ```bash
128
+ dp snapshot # a11y + clickable (default); high + medium markers
129
+ dp snapshot --viewport-only # clickable probe limited to viewport (faster)
130
+ dp snapshot --include-low # also surface `?` low-confidence heuristics
131
+ dp snapshot --no-clickables # a11y tree only (legacy behavior)
132
+ ```
133
+
134
+ ### `dp scan` — fast clickable-only listing
135
+
136
+ When you only need "what can I click next?" without the full a11y tree:
137
+
138
+ ```bash
139
+ dp scan # full page, high+medium
140
+ dp scan --viewport # only elements currently in viewport
141
+ dp scan --confidence all # include low-confidence heuristics
142
+ dp scan --confidence high # only the sure-thing clickables
143
+ ```
144
+
145
+ Both `snapshot` and `scan` share the same `[N]` ref numbering per session, so
146
+ `dp click "ref:N"` works regardless of which one produced the snapshot.
147
+
148
+ ## Anti-Detection (stealth)
149
+
150
+ Bypass `navigator.webdriver`, `HeadlessChrome` UA, empty `plugins`, SwiftShader WebGL,
151
+ `chrome.runtime` missing, and other common automation fingerprints.
152
+
153
+ ```bash
154
+ # One-shot: connect + apply full stealth patches
155
+ dp open --port 9322 --stealth
156
+ dp goto https://bot.sannysoft.com/
157
+
158
+ # Or apply manually on an existing session (full preset by default)
159
+ dp stealth
160
+ dp stealth --preset mild # webdriver + UA only
161
+ dp stealth --ua "Mozilla/5.0 ..." # custom UA
162
+ dp stealth --feature webdriver --feature webgl # fine-grained
163
+ ```
164
+
165
+ ### Recommended VPS Chrome flags (when connecting via SSH tunnel)
166
+
167
+ ```bash
168
+ google-chrome --headless=new --remote-debugging-port=9222 \
169
+ --no-sandbox --disable-dev-shm-usage \
170
+ --disable-blink-features=AutomationControlled \
171
+ --user-data-dir=~/.config/google-chrome
172
+ # Then on local:
173
+ ssh -NL 9322:127.0.0.1:9222 vps
174
+ dp open --port 9322 --stealth
175
+ ```
176
+
177
+ Patched features (full preset): `webdriver`, `UA`, `chrome.runtime`, `permissions`,
178
+ `plugins`, `languages`, `WebGL VENDOR/RENDERER`, `window.outerWidth/Height`.
179
+
180
+ Patches are injected via `Page.addScriptToEvaluateOnNewDocument` — they persist across
181
+ navigations and frames. Advanced fingerprints (Canvas/Audio/font list) require a real
182
+ GPU or Xvfb environment.
183
+
184
+ ## Data Extraction (3-step workflow)
185
+
186
+ ```bash
187
+ # 1. Discover CSS class names via noise-filtered content tree
188
+ dp snapshot --mode content --max-text 40
189
+
190
+ # 2. Verify field selectors
191
+ dp query "css:.item-title" --fields "text,loc"
192
+
193
+ # 3. Batch extract to CSV
194
+ dp extract "css:.item-card" \
195
+ '{"title":"css:.item-title",
196
+ "price":"css:.item-price",
197
+ "tags":{"selector":"css:.tag","multi":true},
198
+ "url":{"selector":"css:a","attr":"href"}}' \
199
+ --limit 100 --output csv --filename result.csv
200
+ ```
201
+
202
+ ## Project Structure
203
+
204
+ ```
205
+ dp_cli/
206
+ ├── main.py # CLI entry point (~47 lines)
207
+ ├── session.py # Browser session management + auto-connect bridge glue
208
+ ├── bridge.py # chrome://inspect mode CDP bridge (python -m dp_cli.bridge)
209
+ ├── bridge_manager.py # Bridge subprocess lifecycle + inspect-mode detection
210
+ ├── stealth.py # Anti-detection JS patches (applied via CDP)
211
+ ├── snapshot/ # a11y-tree snapshot & data extraction engine
212
+ ├── output.py # JSON output helpers
213
+ └── commands/
214
+ ├── _utils.py # Shared decorators & helpers
215
+ ├── browser.py # open / goto / reload / close / list / stealth
216
+ ├── snapshot_cmd.py # snapshot / extract / query / find / inspect
217
+ ├── element.py # click / fill / select / hover / drag / check / upload / count
218
+ ├── keyboard.py # press / type / scroll / scroll-to / autoscroll
219
+ ├── page.py # screenshot / pdf / eval / wait (idle/loaded/url/title) / dialog
220
+ ├── tab.py # tab-list / tab-new / tab-select / tab-close
221
+ ├── storage.py # cookie-* / localstorage-* / sessionstorage-*
222
+ ├── network.py # listen / listen-stop / http-get / http-post
223
+ └── misc.py # resize / maximize / state-save / state-load / config-set
224
+ ```
225
+
226
+ ## Documentation
227
+
228
+ See [`skills/SKILL.md`](skills/SKILL.md) for full workflow guide and [`skills/references/commands.md`](skills/references/commands.md) for complete command reference.
229
+
230
+ ## License
231
+
232
+ BSD-3-Clause
dp_cli-0.3.0/README.md ADDED
@@ -0,0 +1,211 @@
1
+ # dp-cli
2
+
3
+ A powerful CLI for [DrissionPage](https://github.com/g1879/DrissionPage) — browser automation, structured data extraction, network listening and more.
4
+
5
+ ## Features
6
+
7
+ - **Anti-detection by default** — not based on webdriver, `navigator.webdriver` is `false`
8
+ - **Reuse your own browser** — `--auto-connect` (Chrome 144+, no CLI flag needed) or `--port`
9
+ - **Hybrid snapshot** — a11y tree + Vimium-style clickable detection, catches icon-only buttons
10
+ and custom menu items the a11y tree misses; every element gets an `[N]` ref with
11
+ confidence markers (`⚡` medium, `?` low)
12
+ - **`dp scan`** — fast Vimium-style listing of interactive elements (viewport-only mode available)
13
+ - **Powerful locator syntax** — descriptive strings stable across navigation
14
+ - **Structured data extraction** — `extract` + `query` + `snapshot` for scraping list pages
15
+ - **Network listening** — capture XHR/Fetch requests and response bodies
16
+ - **Stealth patches** — `dp stealth` bypasses common automation detections
17
+ - **Dual mode** — browser control + pure HTTP requests
18
+ - **Shadow-root / iframe** — traverse directly without switching context
19
+ - **JSON output** — all commands output JSON, AI-friendly
20
+
21
+ ## Installation
22
+
23
+ ```bash
24
+ pip install dp-cli
25
+ dp --help
26
+ ```
27
+
28
+ ## Quick Start
29
+
30
+ ```bash
31
+ # Auto-managed browser
32
+ dp open https://example.com
33
+ dp snapshot
34
+ dp click "text:Login"
35
+ dp fill "@name=username" admin
36
+ dp press Enter
37
+ dp close
38
+
39
+ # Connect to your own logged-in browser
40
+ google-chrome --remote-debugging-port=9222
41
+ dp open https://example.com --port 9222
42
+ dp snapshot
43
+ ```
44
+
45
+ ## Connect to a Normally-Launched Chrome (Chrome 144+)
46
+
47
+ No `--remote-debugging-port` required. Chrome 144+ exposes opt-in remote debugging
48
+ via `chrome://inspect`:
49
+
50
+ 1. Open your Chrome as usual (no special flags)
51
+ 2. Visit `chrome://inspect/#remote-debugging`
52
+ 3. Check **"Allow remote debugging for this browser instance"**
53
+ 4. Run `dp open --auto-connect`
54
+
55
+ ```bash
56
+ dp open --auto-connect # stable channel, default profile
57
+ dp open --auto-connect --channel beta # pick a different channel
58
+ dp open --auto-connect --probe-dir ~/my-profile # custom user-data-dir
59
+ ```
60
+
61
+ ### How it works
62
+
63
+ Chrome 144+ in this mode exposes **only** a browser-level WebSocket and omits the HTTP
64
+ REST API (`/json`, `/json/version`, ...) that DrissionPage / puppeteer / Playwright
65
+ depend on. `dp-cli` transparently handles this:
66
+
67
+ 1. Reads `DevToolsActivePort` from the user-data-dir → real CDP port
68
+ 2. Probes the port — if `/json/version` is missing, identifies this as inspect mode
69
+ 3. Spawns a local bridge (`python -m dp_cli.bridge`) that:
70
+ - Synthesizes the missing HTTP endpoints from CDP calls
71
+ - Multiplexes page-level CDP traffic over a single browser-level WebSocket
72
+ via `Target.attachToTarget(flatten=True)`
73
+ 4. Points DrissionPage at the bridge. Subsequent `dp` commands reuse the same bridge.
74
+
75
+ The bridge subprocess and its port are tracked in the session file; `dp close` stops
76
+ the bridge automatically and never quits your Chrome (it's your browser, not dp's).
77
+
78
+ ### Caveats
79
+
80
+ - Chrome always shows an **"Allow remote debugging"** dialog per new WebSocket client.
81
+ Since bridge maintains one WebSocket and dp commands share it, you confirm at most
82
+ once per `dp open --auto-connect`.
83
+ - Works with whatever profile Chrome is actually using — same cookies, logins, history.
84
+ - Classic `--remote-debugging-port=9222` mode still works unchanged via `dp open --port 9222`.
85
+
86
+ ## Hybrid Snapshot (a11y + Vimium-style)
87
+
88
+ The default `dp snapshot` fuses two element-discovery paths:
89
+
90
+ 1. **Browser a11y tree** via CDP — the structural skeleton (headings, lists, form roles,
91
+ explicit `<a>`/`<button>`, any `role="..."` element).
92
+ 2. **Vimium-style clickable detection** — a JS probe that flags icon-only buttons,
93
+ `<div onclick>`, `[tabindex>=0]`, `aria-selected`, `cursor:pointer` elements, etc.
94
+ that the a11y tree misses.
95
+
96
+ Results are deduplicated by `backendNodeId` and rendered with confidence markers:
97
+
98
+ | Marker | Confidence | Triggers |
99
+ |--------|-----------|----------|
100
+ | none | **high** | `<a href>`, `<button>`, `<input>`, `role=button/link/...`, `contenteditable` |
101
+ | `⚡` | **medium** | `onclick` / `jsaction` / `tabindex>=0` / `aria-selected` / `<audio>/<video>` |
102
+ | `?` | **low** | `cursor:pointer` / class keyword match (`btn` / `click` / `toggle` / …) |
103
+
104
+ Every element gets an `[N]` ref usable in any command: `dp click "ref:5"`.
105
+
106
+ ```bash
107
+ dp snapshot # a11y + clickable (default); high + medium markers
108
+ dp snapshot --viewport-only # clickable probe limited to viewport (faster)
109
+ dp snapshot --include-low # also surface `?` low-confidence heuristics
110
+ dp snapshot --no-clickables # a11y tree only (legacy behavior)
111
+ ```
112
+
113
+ ### `dp scan` — fast clickable-only listing
114
+
115
+ When you only need "what can I click next?" without the full a11y tree:
116
+
117
+ ```bash
118
+ dp scan # full page, high+medium
119
+ dp scan --viewport # only elements currently in viewport
120
+ dp scan --confidence all # include low-confidence heuristics
121
+ dp scan --confidence high # only the sure-thing clickables
122
+ ```
123
+
124
+ Both `snapshot` and `scan` share the same `[N]` ref numbering per session, so
125
+ `dp click "ref:N"` works regardless of which one produced the snapshot.
126
+
127
+ ## Anti-Detection (stealth)
128
+
129
+ Bypass `navigator.webdriver`, `HeadlessChrome` UA, empty `plugins`, SwiftShader WebGL,
130
+ `chrome.runtime` missing, and other common automation fingerprints.
131
+
132
+ ```bash
133
+ # One-shot: connect + apply full stealth patches
134
+ dp open --port 9322 --stealth
135
+ dp goto https://bot.sannysoft.com/
136
+
137
+ # Or apply manually on an existing session (full preset by default)
138
+ dp stealth
139
+ dp stealth --preset mild # webdriver + UA only
140
+ dp stealth --ua "Mozilla/5.0 ..." # custom UA
141
+ dp stealth --feature webdriver --feature webgl # fine-grained
142
+ ```
143
+
144
+ ### Recommended VPS Chrome flags (when connecting via SSH tunnel)
145
+
146
+ ```bash
147
+ google-chrome --headless=new --remote-debugging-port=9222 \
148
+ --no-sandbox --disable-dev-shm-usage \
149
+ --disable-blink-features=AutomationControlled \
150
+ --user-data-dir=~/.config/google-chrome
151
+ # Then on local:
152
+ ssh -NL 9322:127.0.0.1:9222 vps
153
+ dp open --port 9322 --stealth
154
+ ```
155
+
156
+ Patched features (full preset): `webdriver`, `UA`, `chrome.runtime`, `permissions`,
157
+ `plugins`, `languages`, `WebGL VENDOR/RENDERER`, `window.outerWidth/Height`.
158
+
159
+ Patches are injected via `Page.addScriptToEvaluateOnNewDocument` — they persist across
160
+ navigations and frames. Advanced fingerprints (Canvas/Audio/font list) require a real
161
+ GPU or Xvfb environment.
162
+
163
+ ## Data Extraction (3-step workflow)
164
+
165
+ ```bash
166
+ # 1. Discover CSS class names via noise-filtered content tree
167
+ dp snapshot --mode content --max-text 40
168
+
169
+ # 2. Verify field selectors
170
+ dp query "css:.item-title" --fields "text,loc"
171
+
172
+ # 3. Batch extract to CSV
173
+ dp extract "css:.item-card" \
174
+ '{"title":"css:.item-title",
175
+ "price":"css:.item-price",
176
+ "tags":{"selector":"css:.tag","multi":true},
177
+ "url":{"selector":"css:a","attr":"href"}}' \
178
+ --limit 100 --output csv --filename result.csv
179
+ ```
180
+
181
+ ## Project Structure
182
+
183
+ ```
184
+ dp_cli/
185
+ ├── main.py # CLI entry point (~47 lines)
186
+ ├── session.py # Browser session management + auto-connect bridge glue
187
+ ├── bridge.py # chrome://inspect mode CDP bridge (python -m dp_cli.bridge)
188
+ ├── bridge_manager.py # Bridge subprocess lifecycle + inspect-mode detection
189
+ ├── stealth.py # Anti-detection JS patches (applied via CDP)
190
+ ├── snapshot/ # a11y-tree snapshot & data extraction engine
191
+ ├── output.py # JSON output helpers
192
+ └── commands/
193
+ ├── _utils.py # Shared decorators & helpers
194
+ ├── browser.py # open / goto / reload / close / list / stealth
195
+ ├── snapshot_cmd.py # snapshot / extract / query / find / inspect
196
+ ├── element.py # click / fill / select / hover / drag / check / upload / count
197
+ ├── keyboard.py # press / type / scroll / scroll-to / autoscroll
198
+ ├── page.py # screenshot / pdf / eval / wait (idle/loaded/url/title) / dialog
199
+ ├── tab.py # tab-list / tab-new / tab-select / tab-close
200
+ ├── storage.py # cookie-* / localstorage-* / sessionstorage-*
201
+ ├── network.py # listen / listen-stop / http-get / http-post
202
+ └── misc.py # resize / maximize / state-save / state-load / config-set
203
+ ```
204
+
205
+ ## Documentation
206
+
207
+ See [`skills/SKILL.md`](skills/SKILL.md) for full workflow guide and [`skills/references/commands.md`](skills/references/commands.md) for complete command reference.
208
+
209
+ ## License
210
+
211
+ BSD-3-Clause