@silbercue/chrome 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (129) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +229 -0
  3. package/build/cache/a11y-tree.d.ts +252 -0
  4. package/build/cache/a11y-tree.js +1956 -0
  5. package/build/cache/index.d.ts +8 -0
  6. package/build/cache/index.js +4 -0
  7. package/build/cache/selector-cache.d.ts +47 -0
  8. package/build/cache/selector-cache.js +119 -0
  9. package/build/cache/session-defaults.d.ts +27 -0
  10. package/build/cache/session-defaults.js +130 -0
  11. package/build/cache/tab-state-cache.d.ts +39 -0
  12. package/build/cache/tab-state-cache.js +171 -0
  13. package/build/cdp/cdp-client.d.ts +25 -0
  14. package/build/cdp/cdp-client.js +146 -0
  15. package/build/cdp/chrome-launcher.d.ts +85 -0
  16. package/build/cdp/chrome-launcher.js +502 -0
  17. package/build/cdp/console-collector.d.ts +53 -0
  18. package/build/cdp/console-collector.js +147 -0
  19. package/build/cdp/debug.d.ts +1 -0
  20. package/build/cdp/debug.js +6 -0
  21. package/build/cdp/dialog-handler.d.ts +54 -0
  22. package/build/cdp/dialog-handler.js +129 -0
  23. package/build/cdp/dom-watcher.d.ts +45 -0
  24. package/build/cdp/dom-watcher.js +195 -0
  25. package/build/cdp/emulation.d.ts +12 -0
  26. package/build/cdp/emulation.js +17 -0
  27. package/build/cdp/index.d.ts +11 -0
  28. package/build/cdp/index.js +6 -0
  29. package/build/cdp/network-collector.d.ts +77 -0
  30. package/build/cdp/network-collector.js +257 -0
  31. package/build/cdp/protocol.d.ts +20 -0
  32. package/build/cdp/protocol.js +1 -0
  33. package/build/cdp/session-manager.d.ts +62 -0
  34. package/build/cdp/session-manager.js +205 -0
  35. package/build/cdp/settle.d.ts +16 -0
  36. package/build/cdp/settle.js +71 -0
  37. package/build/cli/license-commands.d.ts +19 -0
  38. package/build/cli/license-commands.js +199 -0
  39. package/build/cli/top-level-commands.d.ts +49 -0
  40. package/build/cli/top-level-commands.js +222 -0
  41. package/build/hooks/index.d.ts +2 -0
  42. package/build/hooks/index.js +1 -0
  43. package/build/hooks/pro-hooks.d.ts +126 -0
  44. package/build/hooks/pro-hooks.js +17 -0
  45. package/build/index.d.ts +4 -0
  46. package/build/index.js +86 -0
  47. package/build/license/free-tier-config.d.ts +14 -0
  48. package/build/license/free-tier-config.js +18 -0
  49. package/build/license/index.d.ts +4 -0
  50. package/build/license/index.js +2 -0
  51. package/build/license/license-status.d.ts +15 -0
  52. package/build/license/license-status.js +9 -0
  53. package/build/overlay/session-overlay.d.ts +22 -0
  54. package/build/overlay/session-overlay.js +372 -0
  55. package/build/plan/index.d.ts +7 -0
  56. package/build/plan/index.js +4 -0
  57. package/build/plan/plan-conditions.d.ts +12 -0
  58. package/build/plan/plan-conditions.js +242 -0
  59. package/build/plan/plan-executor.d.ts +49 -0
  60. package/build/plan/plan-executor.js +259 -0
  61. package/build/plan/plan-state-store.d.ts +24 -0
  62. package/build/plan/plan-state-store.js +43 -0
  63. package/build/plan/plan-variables.d.ts +16 -0
  64. package/build/plan/plan-variables.js +71 -0
  65. package/build/registry.d.ts +124 -0
  66. package/build/registry.js +884 -0
  67. package/build/server.d.ts +1 -0
  68. package/build/server.js +245 -0
  69. package/build/tools/click.d.ts +34 -0
  70. package/build/tools/click.js +293 -0
  71. package/build/tools/configure-session.d.ts +15 -0
  72. package/build/tools/configure-session.js +45 -0
  73. package/build/tools/console-logs.d.ts +18 -0
  74. package/build/tools/console-logs.js +44 -0
  75. package/build/tools/dom-snapshot.d.ts +13 -0
  76. package/build/tools/dom-snapshot.js +259 -0
  77. package/build/tools/element-utils.d.ts +23 -0
  78. package/build/tools/element-utils.js +133 -0
  79. package/build/tools/error-utils.d.ts +8 -0
  80. package/build/tools/error-utils.js +27 -0
  81. package/build/tools/evaluate.d.ts +34 -0
  82. package/build/tools/evaluate.js +217 -0
  83. package/build/tools/file-upload.d.ts +20 -0
  84. package/build/tools/file-upload.js +174 -0
  85. package/build/tools/fill-form.d.ts +39 -0
  86. package/build/tools/fill-form.js +256 -0
  87. package/build/tools/handle-dialog.d.ts +15 -0
  88. package/build/tools/handle-dialog.js +48 -0
  89. package/build/tools/index.d.ts +35 -0
  90. package/build/tools/index.js +18 -0
  91. package/build/tools/navigate.d.ts +18 -0
  92. package/build/tools/navigate.js +111 -0
  93. package/build/tools/network-monitor.d.ts +18 -0
  94. package/build/tools/network-monitor.js +66 -0
  95. package/build/tools/observe.d.ts +44 -0
  96. package/build/tools/observe.js +339 -0
  97. package/build/tools/press-key.d.ts +33 -0
  98. package/build/tools/press-key.js +155 -0
  99. package/build/tools/read-page.d.ts +22 -0
  100. package/build/tools/read-page.js +100 -0
  101. package/build/tools/run-plan.d.ts +205 -0
  102. package/build/tools/run-plan.js +215 -0
  103. package/build/tools/screenshot.d.ts +16 -0
  104. package/build/tools/screenshot.js +283 -0
  105. package/build/tools/scroll.d.ts +28 -0
  106. package/build/tools/scroll.js +143 -0
  107. package/build/tools/switch-tab.d.ts +26 -0
  108. package/build/tools/switch-tab.js +355 -0
  109. package/build/tools/tab-status.d.ts +7 -0
  110. package/build/tools/tab-status.js +50 -0
  111. package/build/tools/type.d.ts +31 -0
  112. package/build/tools/type.js +247 -0
  113. package/build/tools/virtual-desk.d.ts +7 -0
  114. package/build/tools/virtual-desk.js +108 -0
  115. package/build/tools/visual-constants.d.ts +3 -0
  116. package/build/tools/visual-constants.js +10 -0
  117. package/build/tools/wait-for.d.ts +26 -0
  118. package/build/tools/wait-for.js +323 -0
  119. package/build/transport/index.d.ts +3 -0
  120. package/build/transport/index.js +2 -0
  121. package/build/transport/pipe-transport.d.ts +18 -0
  122. package/build/transport/pipe-transport.js +63 -0
  123. package/build/transport/transport.d.ts +8 -0
  124. package/build/transport/transport.js +1 -0
  125. package/build/transport/websocket-transport.d.ts +22 -0
  126. package/build/transport/websocket-transport.js +200 -0
  127. package/build/types.d.ts +21 -0
  128. package/build/types.js +1 -0
  129. package/package.json +62 -0
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Silbercue
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,229 @@
1
+ # SilbercueChrome
2
+
3
+ [![GitHub Release](https://img.shields.io/github/v/release/Silbercue/silbercuechrome)](https://github.com/Silbercue/silbercuechrome/releases)
4
+ [![npm version](https://img.shields.io/npm/v/@silbercue%2Fchrome)](https://www.npmjs.com/package/@silbercue/chrome)
5
+ [![Free — 18 tools](https://img.shields.io/badge/Free-18_tools-brightgreen)](https://github.com/Silbercue/silbercuechrome#free-vs-pro)
6
+ [![Pro — 21+ tools](https://img.shields.io/badge/Pro-21%2B_tools-blueviolet)](https://polar.sh/silbercuechrome)
7
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](LICENSE)
8
+ [![Node >= 18](https://img.shields.io/badge/node-%3E%3D18-brightgreen)](https://nodejs.org)
9
+
10
+ The fastest, most token-efficient MCP server for Chrome browser automation. Direct CDP, a11y-tree refs, multi-tab ready. **24/24 on the hardest benchmark at 20s scripted, beating Playwright MCP and claude-in-chrome.**
11
+
12
+ Built for [Claude Code](https://claude.ai/claude-code), [Cursor](https://cursor.sh), and any MCP-compatible client.
13
+
14
+ > **Looking for an alternative to Playwright MCP, Browser MCP, or claude-in-chrome?** SilbercueChrome talks to Chrome directly via the DevTools Protocol — no Playwright dependency, no Chrome extension bridge, no single-tab limit. One command to install, zero config, and the best benchmark score in the category. [See comparison below](#benchmarks).
15
+
16
+ ## Why SilbercueChrome?
17
+
18
+ Every Chrome MCP server has the same problem: **too many tokens, too few reliable refs.** Screenshots eat 10-30x more tokens than text trees. Selector-based refs break the second the DOM rerenders. Extension bridges (Browser MCP) get stuck on the connected tab. Playwright wrappers spin up a new browser instance for every session.
19
+
20
+ SilbercueChrome fixes this. It talks directly to Chrome via CDP (same protocol Playwright and Puppeteer use internally), returns an accessibility-tree-based reference map, and caches it across calls so `click(ref: 'e5')` and `type(ref: 'e7', ...)` survive scrolls and DOM updates.
21
+
22
+ | What you get | Playwright MCP | Browser MCP | claude-in-chrome | browser-use | **SilbercueChrome** |
23
+ |---|---|---|---|---|---|
24
+ | Hardest benchmark (24 tests, LLM-driven) | 24/24 (~570s) | **cannot finish** | 24/24 (1140s) | 17/24 (~1049s) | **24/24 Pro: 555s · Free: 755-900s** |
25
+ | Scripted benchmark (24 tests) | — | — | — | — | **24/24 in ~20s** |
26
+ | Multi-tab support | Yes | **No (single tab)** | Yes | Partial | **Yes** |
27
+ | Connection | New browser | Extension bridge | Extension | Subprocess | **Direct CDP (pipe or WebSocket)** |
28
+ | Ref system | Playwright refs | Playwright refs | CSS selectors | Screenshots | **A11y-tree refs (stable across DOM changes)** |
29
+ | Read page | Screenshot + DOM | Snapshot | DOM dump | Screenshot-heavy | **`read_page` — 10-30x fewer tokens** |
30
+ | Drag & drop | Yes | No | Partial | No | **Yes (native CDP mouse events)** |
31
+ | Shadow DOM + iframe | Yes | Yes | Partial | No | **Yes (with OOPIF session support)** |
32
+ | Keyboard shortcuts | Yes | Yes | Partial | No | **Yes (`press_key` with real CDP keyboard events)** |
33
+ | localStorage/cookies | Yes | No | Partial | No | **Yes (via `evaluate`)** |
34
+ | Multi-step plan execution | — | — | — | — | **`run_plan` — server-side plan executor with variables, conditions, suspend/resume** |
35
+ | Zero-config install | Yes | Yes | Built-in | Yes | **Yes (one `claude mcp add` line)** |
36
+
37
+ ### Where SilbercueChrome really shines
38
+
39
+ > ![killer feat](https://img.shields.io/badge/killer%20feat-%23FFD700?style=flat-square) **24/24 on the hardest benchmark in 20 seconds (scripted), 555s LLM-driven** — beats every alternative in the category
40
+
41
+ The test-hardest suite covers 24 patterns that break most browser MCPs: infinite scroll, Shadow DOM, nested iframes, drag & drop, canvas clicks, keyboard shortcuts, contenteditable, async timing races, 10K-element DOMs, localStorage chains, mutation observers, modal form chains. SilbercueChrome Pro clears all 24 in ~555s LLM-driven (scripted: ~21s). Free tier runs 20s scripted / 755-900s LLM. Playwright MCP takes ~570s LLM. claude-in-chrome takes 1140s. browser-use fails 7 tests architecturally.
42
+
43
+ > ![killer feat](https://img.shields.io/badge/killer%20feat-%23FFD700?style=flat-square) **`read_page` with a11y-tree refs — 10-30x cheaper than screenshots**
44
+
45
+ Instead of pushing an 800KB screenshot every turn, `read_page` returns the accessibility tree with stable `e5`-style refs. Agents read text, find elements, and chain `click(ref: 'e5')` / `type(ref: 'e7', ...)` — all in 30KB responses. Screenshots stay available for visual verification via `screenshot`, but the LLM stops defaulting to them for element discovery.
46
+
47
+ > ![killer feat](https://img.shields.io/badge/killer%20feat-%23FFD700?style=flat-square) **True multi-tab — `virtual_desk`, `switch_tab`, parallel tabs in `run_plan`** <img src="https://img.shields.io/badge/Pro-blueviolet?style=flat-square" align="center">
48
+
49
+ Browser MCP binds to a single "connected" tab via its Chrome extension — cross-tab operations are architecturally impossible. SilbercueChrome uses CDP `Target` API to enumerate, open, close, and switch between tabs. `virtual_desk` lists every open tab with stable IDs. `switch_tab` moves between them without touching the user's active tab. `run_plan` even supports parallel tab execution.
50
+
51
+ > ![strong](https://img.shields.io/badge/strong-%23C0C0C0?style=flat-square) **`fill_form` — one call for a complete form**
52
+
53
+ Other MCPs make you emit N `type` calls for an N-field form. `fill_form` takes a single `fields[]` array with refs and values, handles text inputs, `<select>` (by value or label), checkboxes, and radios in one CDP round-trip, and reports per-field status.
54
+
55
+ > ![strong](https://img.shields.io/badge/strong-%23C0C0C0?style=flat-square) **`observe` — watch DOM changes without writing JavaScript**
56
+
57
+ Two modes: `collect` (watch for N ms, return every text/attribute change) and `until` (wait for a condition, then auto-click). Use `click_first` to trigger the action that causes changes — the observer is set up *before* the click, so nothing is missed. Replaces the typical `setInterval`/`MutationObserver`/`evaluate` dance.
58
+
59
+ > ![strong](https://img.shields.io/badge/strong-%23C0C0C0?style=flat-square) **`run_plan` — server-side multi-step automation**
60
+
61
+ Execute a sequence of tool steps server-side with variables (`$varName`), conditions (`if`), `saveAs`, error strategies (`abort`/`continue`/`screenshot`), and suspend/resume for long-running workflows. Parallel tab execution is a Pro feature.
62
+
63
+ ## Quick Start
64
+
65
+ ### Install in Claude Code
66
+
67
+ One command — installs globally for all projects:
68
+
69
+ ```bash
70
+ claude mcp add --scope user silbercuechrome npx -y @silbercue/chrome@latest
71
+ ```
72
+
73
+ Restart Claude Code. First tool call auto-launches Chrome **visible** (no headless, no port setup). Done.
74
+
75
+ ### Install in Cursor
76
+
77
+ Add to `~/.cursor/mcp.json`:
78
+
79
+ ```json
80
+ {
81
+ "mcpServers": {
82
+ "silbercuechrome": {
83
+ "command": "npx",
84
+ "args": ["-y", "@silbercue/chrome@latest"]
85
+ }
86
+ }
87
+ }
88
+ ```
89
+
90
+ ### Install in other MCP clients
91
+
92
+ Any client that supports stdio MCP servers: `npx -y @silbercue/chrome@latest` with no arguments.
93
+
94
+ ### Uninstall
95
+
96
+ ```bash
97
+ claude mcp remove --scope user silbercuechrome
98
+ ```
99
+
100
+ ## Free vs Pro
101
+
102
+ The Free tier gives you 18 tools covering 24/24 benchmark tests in the scripted runner. Pro adds `virtual_desk`, `switch_tab`, `dom_snapshot`, and advanced `run_plan` features (parallel tabs, operator hooks, ambient context) plus faster internals.
103
+
104
+ | | Free | Pro |
105
+ |---|---|---|
106
+ | Tools | 18 | 21+ |
107
+ | Page understanding | `read_page` | `read_page` + `dom_snapshot` (spatial queries) |
108
+ | Tab management | `navigate`, `tab_status` | + `virtual_desk`, `switch_tab`, parallel tabs in `run_plan` |
109
+ | Interaction | `click`, `type`, `fill_form`, `press_key`, `scroll`, `file_upload`, `handle_dialog` | Same |
110
+ | Observation | `screenshot`, `wait_for`, `observe`, `console_logs`, `network_monitor` | Same + ambient page context hooks |
111
+ | Scripting | `run_plan` (sequential) | `run_plan` (sequential + parallel + operator hooks) |
112
+ | Last resort | `evaluate` | `evaluate` + anti-pattern scanner hints |
113
+ | Benchmark score | 24/24 | 24/24 |
114
+ | Benchmark time (scripted) | ~20s | ~21s |
115
+ | Benchmark time (LLM-driven) | 755-900s | ~555s |
116
+
117
+ Pro costs $19 USD one-time. [Get a license on Polar.sh](https://polar.sh/silbercuechrome), then activate via the built-in license command or `SILBERCUECHROME_LICENSE_KEY=SC-PRO-...` env var.
118
+
119
+ ## Tools
120
+
121
+ ### Reading & Observation
122
+
123
+ | Tool | Description |
124
+ |---|---|
125
+ | `read_page` | Accessibility tree with stable `e`-refs — primary way to understand the page. 10-30x cheaper than screenshots. Filter by `interactive` (default) or `all` (include static text). |
126
+ | `screenshot` | WebP capture, max 800px, <100KB. Use for visual verification only — you cannot use screenshots to drive click/type, refs come from `read_page`. |
127
+ | `console_logs` | Retrieve browser console output with level/pattern filters |
128
+ | `network_monitor` | Start/stop/query network requests with filtering |
129
+ | `observe` | Watch DOM changes: `collect` (buffer changes over time) or `until` (wait for condition, then auto-click) |
130
+ | `wait_for` | Wait for element visible, network idle, or JS expression true |
131
+ | `tab_status` | Active tab's cached URL/title/ready/errors — mid-workflow sanity check |
132
+
133
+ ### Interaction
134
+
135
+ | Tool | Description |
136
+ |---|---|
137
+ | `click` | Real CDP mouse events (mouseMoved/Pressed/Released). Click by ref, selector, text, or `x`+`y` coordinates. Response includes DOM diff (NEW/REMOVED/CHANGED). |
138
+ | `type` | Type into an input by ref/selector |
139
+ | `fill_form` | Fill a complete form in one call — text, `<select>`, checkbox, radio. Per-field status, partial errors don't abort. |
140
+ | `press_key` | Real CDP keyboard events — Enter, Escape, Tab, arrows, shortcuts (Ctrl+K, etc.) |
141
+ | `scroll` | Scroll page, element into view, or inside a specific container (sidebar, modal body) |
142
+ | `file_upload` | Upload file(s) to an `<input type="file">` |
143
+ | `handle_dialog` | Configure `alert`/`confirm`/`prompt` handling before triggering actions |
144
+
145
+ ### Navigation
146
+
147
+ | Tool | Description |
148
+ |---|---|
149
+ | `navigate` | Load a URL in the active tab. Waits for settle. First call per session is auto-redirected to `virtual_desk` to prevent blindly overwriting the user's tab. |
150
+
151
+ ### Scripting
152
+
153
+ | Tool | Description |
154
+ |---|---|
155
+ | `run_plan` | Execute a multi-step plan server-side. Variables (`$varName`), conditions (`if`), `saveAs`, error strategies (`abort`/`continue`/`screenshot`), suspend/resume. Parallel tabs require Pro. |
156
+ | `configure_session` | View/set session defaults (tab, timeout) and accept auto-promote suggestions |
157
+ | `evaluate` | Execute JS in the page context. Use for COMPUTE or side effects no tool covers — not for element discovery (use `read_page` instead). Anti-pattern scanner warns when you reach for `querySelector` or `.click()`. |
158
+
159
+ ### Pro tier (additional)
160
+
161
+ | Tool | Description |
162
+ |---|---|
163
+ | `virtual_desk` <img src="https://img.shields.io/badge/Pro-blueviolet?style=flat-square" align="center"> | Lists all tabs with stable IDs. Call first in every session. |
164
+ | `switch_tab` <img src="https://img.shields.io/badge/Pro-blueviolet?style=flat-square" align="center"> | Open, switch to, or close tabs by ID from `virtual_desk` |
165
+ | `dom_snapshot` <img src="https://img.shields.io/badge/Pro-blueviolet?style=flat-square" align="center"> | Bounding boxes, computed styles, paint order, colors. For spatial questions `read_page` cannot answer. |
166
+
167
+ ## Benchmarks
168
+
169
+ Measured on `https://mcp-test.second-truth.com` — 24 tests in 4 levels (Basics, Intermediate, Advanced, Hardest). Each run is independent, values on the benchmark page are randomized per page-load, and all runs started in a fresh Claude Code session out of `/tmp` (no project context bias).
170
+
171
+ | MCP | Passed | Time (LLM) | Time (scripted) |
172
+ |---|---|---|---|
173
+ | **SilbercueChrome Pro** | **24/24** | **555s** | **21s** |
174
+ | **SilbercueChrome Free** | **24/24** | **755-900s** | **20s** |
175
+ | Playwright MCP | 24/24 | ~570s | — |
176
+ | claude-in-chrome | 24/24 | 1140s | — |
177
+ | browser-use | 17/24 | ~1049s | — |
178
+ | Browser MCP | — | could not complete | — |
179
+
180
+ browser-use fails 7 tests architecturally: infinite scroll (no container-internal scrolling), drag & drop, canvas click, keyboard shortcuts, contenteditable bold, localStorage+cookie chain, mutation observer. Browser MCP's single-tab extension bridge cannot complete the `Tab switch, read, return` pattern and becomes unstable over longer runs. See [`test-hardest/BENCHMARK-PROTOCOL.md`](test-hardest/BENCHMARK-PROTOCOL.md) for the full protocol and raw JSON runs.
181
+
182
+ ## Architecture
183
+
184
+ ```
185
+ SilbercueChrome (Node.js MCP server, @silbercue/chrome)
186
+ ├── @modelcontextprotocol/sdk (stdio transport)
187
+ ├── CDP Client
188
+ │ ├── WebSocket transport (existing Chrome on :9222)
189
+ │ └── Pipe transport (auto-launched Chrome with --remote-debugging-pipe)
190
+ ├── Auto-Launch: Chrome + optimal flags, visible by default
191
+ ├── A11y-tree cache + Selector cache
192
+ ├── Session Manager (OOPIF support for iframes and Shadow DOM)
193
+ ├── Tab State Cache (URL/title/ready across tabs)
194
+ └── 18 Free-tier tools + 3+ Pro-tier tools
195
+ Reading · Interaction · Navigation · Scripting · Observation
196
+ ```
197
+
198
+ Connection priority:
199
+ 1. **Auto-Launch (default, zero-config)** — starts Chrome as a child process via `--remote-debugging-pipe`, visible as a window, with all flags set for reliable screenshots and keyboard focus.
200
+ 2. **WebSocket (optional)** — if you already run Chrome with `--remote-debugging-port=9222`, SilbercueChrome connects to that instead. Use this to control your own browser with its extensions and login sessions.
201
+
202
+ ## Requirements
203
+
204
+ - Node.js >= 18
205
+ - Google Chrome, Chromium, or any Chromium-based browser (auto-detected on macOS/Linux/Windows; override with `CHROME_PATH`)
206
+
207
+ ## Environment Variables
208
+
209
+ | Variable | Values | Default | Description |
210
+ |---|---|---|---|
211
+ | `SILBERCUE_CHROME_AUTO_LAUNCH` | `true` / `false` | `true` | Auto-launch Chrome if no running instance found |
212
+ | `SILBERCUE_CHROME_HEADLESS` | `true` / `false` | `false` | Opt-in headless mode for CI/server environments |
213
+ | `SILBERCUE_CHROME_PROFILE` | path | — | Chrome user profile directory (auto-launch only) |
214
+ | `CHROME_PATH` | path | — | Path to Chrome binary (overrides auto-detection) |
215
+ | `SILBERCUECHROME_LICENSE_KEY` | license key | — | Pro license key (e.g. `SC-PRO-...`) |
216
+
217
+ ## License
218
+
219
+ The core server and all 18 Free-tier tools are **MIT licensed** — see [LICENSE](LICENSE). Use them however you want, commercially or otherwise.
220
+
221
+ Pro tools (3+ gated tools, parallel tab execution, ambient context, operator hooks, faster internals) require a [paid license](https://polar.sh/silbercuechrome). The license validation code is in the separate private Pro repository.
222
+
223
+ ## Contributing
224
+
225
+ Issues and pull requests welcome at [github.com/Silbercue/silbercuechrome](https://github.com/Silbercue/silbercuechrome).
226
+
227
+ ## Privacy
228
+
229
+ SilbercueChrome runs entirely on your machine. All browser automation happens locally via CDP. No telemetry, no remote calls, no data sent to any third party.
@@ -0,0 +1,252 @@
1
+ import type { CdpClient } from "../cdp/cdp-client.js";
2
+ import type { SessionManager } from "../cdp/session-manager.js";
3
+ interface AXValue {
4
+ type: string;
5
+ value: unknown;
6
+ }
7
+ interface AXProperty {
8
+ name: string;
9
+ value: AXValue;
10
+ }
11
+ export interface AXNode {
12
+ nodeId: string;
13
+ ignored: boolean;
14
+ role?: AXValue;
15
+ name?: AXValue;
16
+ description?: AXValue;
17
+ value?: AXValue;
18
+ properties?: AXProperty[];
19
+ childIds?: string[];
20
+ parentId?: string;
21
+ backendDOMNodeId?: number;
22
+ frameId?: string;
23
+ }
24
+ export interface TreeOptions {
25
+ depth?: number;
26
+ ref?: string;
27
+ filter?: "interactive" | "all" | "landmark" | "visual";
28
+ max_tokens?: number;
29
+ /** Bypass precomputed cache and fetch fresh data from CDP (fixes stale data after SPA navigation) */
30
+ fresh?: boolean;
31
+ }
32
+ export interface TreeResult {
33
+ text: string;
34
+ refCount: number;
35
+ depth: number;
36
+ tokenCount: number;
37
+ pageUrl: string;
38
+ hasVisualData?: boolean;
39
+ downsampled?: boolean;
40
+ originalTokens?: number;
41
+ downsampleLevel?: number;
42
+ /** FR-022: Number of content nodes (StaticText, paragraph, cell, etc.) with visible text that were hidden by filter:interactive. */
43
+ hiddenContentCount?: number;
44
+ }
45
+ export type ElementClassification = "widget-state" | "clickable" | "disabled" | "static";
46
+ export interface DOMChange {
47
+ type: "added" | "removed" | "changed";
48
+ ref: string;
49
+ role: string;
50
+ before?: string;
51
+ after: string;
52
+ }
53
+ export type SnapshotMap = Map<number, string>;
54
+ export interface ClosestRefSuggestion {
55
+ ref: string;
56
+ role: string;
57
+ name: string;
58
+ }
59
+ export declare class A11yTreeProcessor {
60
+ private refMap;
61
+ private reverseMap;
62
+ private nodeInfoMap;
63
+ private sessionNodeMap;
64
+ private nextRef;
65
+ private lastUrl;
66
+ private _precomputedNodes;
67
+ private _precomputedUrl;
68
+ private _precomputedSessionId;
69
+ private _precomputedDepth;
70
+ private _cacheVersion;
71
+ /** Story 13.1: Current cache version — increments on every state change */
72
+ get cacheVersion(): number;
73
+ reset(): void;
74
+ /** Invalidiert den Precomputed-Cache (z.B. nach Navigation oder Reconnect) */
75
+ invalidatePrecomputed(): void;
76
+ /** Hintergrund-Refresh: Laedt A11y-Tree und speichert als Cache */
77
+ refreshPrecomputed(cdpClient: CdpClient, sessionId: string, sessionManager?: SessionManager): Promise<void>;
78
+ /** Prueft ob ein gueltiger Precomputed-Cache vorliegt */
79
+ hasPrecomputed(sessionId: string): boolean;
80
+ /**
81
+ * H1: Remove all node references for a detached OOPIF session.
82
+ * Called when an OOPIF frame navigates away or is destroyed.
83
+ */
84
+ removeNodesForSession(sessionId: string): void;
85
+ resolveRef(ref: string): number | undefined;
86
+ getNodeInfo(backendNodeId: number): {
87
+ role: string;
88
+ name: string;
89
+ } | undefined;
90
+ /**
91
+ * UX-001: Find an element by visible text (name). Returns ref string and backendNodeId.
92
+ * Matching priority: exact → case-insensitive exact → partial substring.
93
+ * Within each tier, interactive roles (button, link, etc.) are preferred.
94
+ */
95
+ findByText(text: string): {
96
+ ref: string;
97
+ backendNodeId: number;
98
+ } | null;
99
+ /** Returns true if the ref map has been populated (i.e. getTree was called at least once). */
100
+ hasRefs(): boolean;
101
+ /** Number of currently assigned refs (for DOM fingerprinting, Story 7.5) */
102
+ get refCount(): number;
103
+ /** Current page URL (for on-the-fly fingerprint computation, Story 7.5 H1 fix) */
104
+ get currentUrl(): string;
105
+ getRefForBackendNodeId(backendNodeId: number): string | undefined;
106
+ private fetchVisualData;
107
+ /**
108
+ * FR-H5: Enrich nodeInfoMap with HTML attributes (IDs, onclick) and event listeners.
109
+ * Phase 1: DOM.describeNode for HTML IDs + inline onclick detection
110
+ * Phase 2: DOMDebugger.getEventListeners for non-interactive nodes (mousedown, click, pointerdown)
111
+ * Called from both refreshPrecomputed() and getTree() so read_page always has full data.
112
+ */
113
+ private _enrichNodeMetadata;
114
+ private computeIsClickable;
115
+ private getSnapshotString;
116
+ findClosestRef(ref: string, roleFilter?: Set<string>): ClosestRefSuggestion | null;
117
+ getTree(cdpClient: CdpClient, sessionId: string, options?: TreeOptions, sessionManager?: SessionManager): Promise<TreeResult>;
118
+ private downsampleTree;
119
+ private truncateToFit;
120
+ /** C2: Trim body lines from the end (content first) until budget is met */
121
+ private trimBodyToFit;
122
+ private renderNodeDownsampled;
123
+ private renderChildrenDownsampled;
124
+ private countVisibleChildren;
125
+ private getVisibleChildren;
126
+ private countDescendantElements;
127
+ /** H3: Check if a node has any interactive descendants (recursive) */
128
+ private hasInteractiveDescendants;
129
+ private shortContainerRole;
130
+ private formatDownsampledHeader;
131
+ private getSubtree;
132
+ /** H2: Downsample a subtree (same algorithm as downsampleTree but for a single root) */
133
+ private downsampleSubtree;
134
+ /** BUG-001: Find the nearest heading sibling before this node in parent's children */
135
+ private findSectionHeading;
136
+ private renderNode;
137
+ /**
138
+ * Ticket-1 / Token-Aggregation: minimum number of same-class leaf elements
139
+ * inside the rendered subtree before they are collapsed into one summary
140
+ * line at the first occurrence. Set to 10 so we never aggregate small or
141
+ * medium lists (button bars, nav menus, dialog actions) but reliably catch
142
+ * large generated lists like the 240-button benchmark page, even when they
143
+ * are interleaved with headings/paragraphs/links.
144
+ */
145
+ private static readonly AGGREGATE_MIN_COUNT;
146
+ /**
147
+ * Ticket-1: Per-render state built by {@link prepareAggregateGroups}. Keys
148
+ * the first backendDOMNodeId of a ≥10-member aggregation bucket to the
149
+ * info needed to emit the summary line. Null when no aggregation pass has
150
+ * been executed (e.g. during subtree renders or tests that bypass getTree).
151
+ */
152
+ private _aggregateAnchors;
153
+ /**
154
+ * Ticket-1: All non-first member backendDOMNodeIds for ≥10-member buckets.
155
+ * renderNode skips any node whose backendDOMNodeId is in this set — the
156
+ * line they would have produced is already covered by the anchor's
157
+ * summary line.
158
+ */
159
+ private _aggregateSuppressed;
160
+ /**
161
+ * Ticket-1: Build a stable aggregation key for a leaf element. Two
162
+ * sibling leaves share an aggregation class iff their keys are equal:
163
+ *
164
+ * - Identical role.
165
+ * - Either an identical name (e.g. 50× "Submit") OR an identical
166
+ * name prefix once a trailing run of digits is stripped
167
+ * (e.g. "Action 1" / "Action 240" → key "button::Action ").
168
+ *
169
+ * Returns null when the element shouldn't participate in aggregation
170
+ * (no role at all). Empty/missing names are treated as their own key
171
+ * so unnamed buttons within a row still group together.
172
+ */
173
+ private aggregationKey;
174
+ /**
175
+ * Ticket-1: A child is a "renderable leaf" for aggregation purposes if
176
+ * it would emit exactly one line under the current filter and carries
177
+ * no descendants that would also render. We only need to look one level
178
+ * deep — text wrappers like <span> / <strong> inside a <button> are
179
+ * either ignored or non-interactive and never produce their own line.
180
+ */
181
+ private isRenderableLeaf;
182
+ /**
183
+ * Ticket-1: Walk the renderable subtree (main + OOPIFs) and compute which
184
+ * leaves should be collapsed into summary lines. Leaves are bucketed by
185
+ * aggregation key; any bucket with ≥{@link AGGREGATE_MIN_COUNT} members
186
+ * becomes a collapse group. The first member in DOM order becomes the
187
+ * "anchor" (its position emits the summary line) and the rest land in
188
+ * the suppressed set so renderNode skips them.
189
+ *
190
+ * This runs independently of the ≥10-consecutive-siblings assumption,
191
+ * which is why it catches the T4.7 benchmark case where 120 "Action N"
192
+ * buttons are interleaved with headings, paragraphs, and inputs inside
193
+ * 60 sections that share a single DOM parent.
194
+ */
195
+ private prepareAggregateGroups;
196
+ /** Reset the per-render aggregation state set up by prepareAggregateGroups. */
197
+ private clearAggregateGroups;
198
+ /**
199
+ * Ticket-1: Emit the summary line for a collapse-group anchor. Format is
200
+ * intentionally compact and still carries the addressable ref band so the
201
+ * LLM can click({ ref: "eN" }) on any individual element inside it.
202
+ */
203
+ private emitAggregateLine;
204
+ private renderChildren;
205
+ /** FR-022: Count content nodes with visible text that would be hidden by filter:interactive.
206
+ * Used to append a hint in read-page.ts that points the LLM at filter:'all' instead of evaluate. */
207
+ private countHiddenContentNodes;
208
+ private getRole;
209
+ private passesFilter;
210
+ private formatLine;
211
+ private formatHeader;
212
+ private getPageTitle;
213
+ private getAvailableRefsRange;
214
+ private suggestClosestRef;
215
+ /**
216
+ * Story 13a.2: Classify a ref for pre-click ambient context decision.
217
+ * Returns classification based on cached AXNode properties (0 CDP calls).
218
+ */
219
+ classifyRef(ref: string): ElementClassification;
220
+ /**
221
+ * FR-008: Return a compact list of known interactive elements for error hints.
222
+ * Used when a CSS selector fails to provide the LLM with actionable alternatives.
223
+ * ZERO CDP calls — purely in-memory from cached nodeInfoMap.
224
+ */
225
+ getInteractiveElements(limit?: number): string[];
226
+ /**
227
+ * FR-002: Lightweight snapshot map for DOM-Diff.
228
+ * Returns Map<refNum, "role\0name"> for all nodes with a name.
229
+ * ZERO CDP calls — purely in-memory.
230
+ */
231
+ getSnapshotMap(): SnapshotMap;
232
+ /**
233
+ * FR-002: Compute diff between two snapshot maps.
234
+ * Returns only meaningful changes (role+name), ignoring nodes without names.
235
+ */
236
+ static diffSnapshots(before: SnapshotMap, after: SnapshotMap): DOMChange[];
237
+ /**
238
+ * FR-002: Format DOM changes as compact context string for LLM.
239
+ * Prioritizes alerts/status, then shows changes near the action, caps at ~30 lines.
240
+ */
241
+ static formatDomDiff(changes: DOMChange[], url?: string): string | null;
242
+ /**
243
+ * Story 13a.2: Enriched compact snapshot with headings, alerts, status
244
+ * plus interactive elements. ZERO CDP calls — purely in-memory.
245
+ */
246
+ getCompactSnapshot(maxTokens?: number): string | null;
247
+ }
248
+ export declare class RefNotFoundError extends Error {
249
+ constructor(message: string);
250
+ }
251
+ export declare const a11yTree: A11yTreeProcessor;
252
+ export {};