@silbercue/chrome 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (129) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +229 -0
  3. package/build/cache/a11y-tree.d.ts +252 -0
  4. package/build/cache/a11y-tree.js +1956 -0
  5. package/build/cache/index.d.ts +8 -0
  6. package/build/cache/index.js +4 -0
  7. package/build/cache/selector-cache.d.ts +47 -0
  8. package/build/cache/selector-cache.js +119 -0
  9. package/build/cache/session-defaults.d.ts +27 -0
  10. package/build/cache/session-defaults.js +130 -0
  11. package/build/cache/tab-state-cache.d.ts +39 -0
  12. package/build/cache/tab-state-cache.js +171 -0
  13. package/build/cdp/cdp-client.d.ts +25 -0
  14. package/build/cdp/cdp-client.js +146 -0
  15. package/build/cdp/chrome-launcher.d.ts +85 -0
  16. package/build/cdp/chrome-launcher.js +502 -0
  17. package/build/cdp/console-collector.d.ts +53 -0
  18. package/build/cdp/console-collector.js +147 -0
  19. package/build/cdp/debug.d.ts +1 -0
  20. package/build/cdp/debug.js +6 -0
  21. package/build/cdp/dialog-handler.d.ts +54 -0
  22. package/build/cdp/dialog-handler.js +129 -0
  23. package/build/cdp/dom-watcher.d.ts +45 -0
  24. package/build/cdp/dom-watcher.js +195 -0
  25. package/build/cdp/emulation.d.ts +12 -0
  26. package/build/cdp/emulation.js +17 -0
  27. package/build/cdp/index.d.ts +11 -0
  28. package/build/cdp/index.js +6 -0
  29. package/build/cdp/network-collector.d.ts +77 -0
  30. package/build/cdp/network-collector.js +257 -0
  31. package/build/cdp/protocol.d.ts +20 -0
  32. package/build/cdp/protocol.js +1 -0
  33. package/build/cdp/session-manager.d.ts +62 -0
  34. package/build/cdp/session-manager.js +205 -0
  35. package/build/cdp/settle.d.ts +16 -0
  36. package/build/cdp/settle.js +71 -0
  37. package/build/cli/license-commands.d.ts +19 -0
  38. package/build/cli/license-commands.js +199 -0
  39. package/build/cli/top-level-commands.d.ts +49 -0
  40. package/build/cli/top-level-commands.js +222 -0
  41. package/build/hooks/index.d.ts +2 -0
  42. package/build/hooks/index.js +1 -0
  43. package/build/hooks/pro-hooks.d.ts +126 -0
  44. package/build/hooks/pro-hooks.js +17 -0
  45. package/build/index.d.ts +4 -0
  46. package/build/index.js +86 -0
  47. package/build/license/free-tier-config.d.ts +14 -0
  48. package/build/license/free-tier-config.js +18 -0
  49. package/build/license/index.d.ts +4 -0
  50. package/build/license/index.js +2 -0
  51. package/build/license/license-status.d.ts +15 -0
  52. package/build/license/license-status.js +9 -0
  53. package/build/overlay/session-overlay.d.ts +22 -0
  54. package/build/overlay/session-overlay.js +372 -0
  55. package/build/plan/index.d.ts +7 -0
  56. package/build/plan/index.js +4 -0
  57. package/build/plan/plan-conditions.d.ts +12 -0
  58. package/build/plan/plan-conditions.js +242 -0
  59. package/build/plan/plan-executor.d.ts +49 -0
  60. package/build/plan/plan-executor.js +259 -0
  61. package/build/plan/plan-state-store.d.ts +24 -0
  62. package/build/plan/plan-state-store.js +43 -0
  63. package/build/plan/plan-variables.d.ts +16 -0
  64. package/build/plan/plan-variables.js +71 -0
  65. package/build/registry.d.ts +124 -0
  66. package/build/registry.js +884 -0
  67. package/build/server.d.ts +1 -0
  68. package/build/server.js +245 -0
  69. package/build/tools/click.d.ts +34 -0
  70. package/build/tools/click.js +293 -0
  71. package/build/tools/configure-session.d.ts +15 -0
  72. package/build/tools/configure-session.js +45 -0
  73. package/build/tools/console-logs.d.ts +18 -0
  74. package/build/tools/console-logs.js +44 -0
  75. package/build/tools/dom-snapshot.d.ts +13 -0
  76. package/build/tools/dom-snapshot.js +259 -0
  77. package/build/tools/element-utils.d.ts +23 -0
  78. package/build/tools/element-utils.js +133 -0
  79. package/build/tools/error-utils.d.ts +8 -0
  80. package/build/tools/error-utils.js +27 -0
  81. package/build/tools/evaluate.d.ts +34 -0
  82. package/build/tools/evaluate.js +217 -0
  83. package/build/tools/file-upload.d.ts +20 -0
  84. package/build/tools/file-upload.js +174 -0
  85. package/build/tools/fill-form.d.ts +39 -0
  86. package/build/tools/fill-form.js +256 -0
  87. package/build/tools/handle-dialog.d.ts +15 -0
  88. package/build/tools/handle-dialog.js +48 -0
  89. package/build/tools/index.d.ts +35 -0
  90. package/build/tools/index.js +18 -0
  91. package/build/tools/navigate.d.ts +18 -0
  92. package/build/tools/navigate.js +111 -0
  93. package/build/tools/network-monitor.d.ts +18 -0
  94. package/build/tools/network-monitor.js +66 -0
  95. package/build/tools/observe.d.ts +44 -0
  96. package/build/tools/observe.js +339 -0
  97. package/build/tools/press-key.d.ts +33 -0
  98. package/build/tools/press-key.js +155 -0
  99. package/build/tools/read-page.d.ts +22 -0
  100. package/build/tools/read-page.js +100 -0
  101. package/build/tools/run-plan.d.ts +205 -0
  102. package/build/tools/run-plan.js +215 -0
  103. package/build/tools/screenshot.d.ts +16 -0
  104. package/build/tools/screenshot.js +283 -0
  105. package/build/tools/scroll.d.ts +28 -0
  106. package/build/tools/scroll.js +143 -0
  107. package/build/tools/switch-tab.d.ts +26 -0
  108. package/build/tools/switch-tab.js +355 -0
  109. package/build/tools/tab-status.d.ts +7 -0
  110. package/build/tools/tab-status.js +50 -0
  111. package/build/tools/type.d.ts +31 -0
  112. package/build/tools/type.js +247 -0
  113. package/build/tools/virtual-desk.d.ts +7 -0
  114. package/build/tools/virtual-desk.js +108 -0
  115. package/build/tools/visual-constants.d.ts +3 -0
  116. package/build/tools/visual-constants.js +10 -0
  117. package/build/tools/wait-for.d.ts +26 -0
  118. package/build/tools/wait-for.js +323 -0
  119. package/build/transport/index.d.ts +3 -0
  120. package/build/transport/index.js +2 -0
  121. package/build/transport/pipe-transport.d.ts +18 -0
  122. package/build/transport/pipe-transport.js +63 -0
  123. package/build/transport/transport.d.ts +8 -0
  124. package/build/transport/transport.js +1 -0
  125. package/build/transport/websocket-transport.d.ts +22 -0
  126. package/build/transport/websocket-transport.js +200 -0
  127. package/build/types.d.ts +21 -0
  128. package/build/types.js +1 -0
  129. package/package.json +62 -0
@@ -0,0 +1 @@
1
+ export declare function startServer(): Promise<void>;
@@ -0,0 +1,245 @@
1
+ import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
2
+ import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
3
+ import { ChromeLauncher, resolveAutoLaunch } from "./cdp/chrome-launcher.js";
4
+ import { SessionManager } from "./cdp/session-manager.js";
5
+ import { DialogHandler } from "./cdp/dialog-handler.js";
6
+ import { ConsoleCollector } from "./cdp/console-collector.js";
7
+ import { NetworkCollector } from "./cdp/network-collector.js";
8
+ import { DomWatcher } from "./cdp/dom-watcher.js";
9
+ import { DEVICE_METRICS_OVERRIDE, EMULATED_WIDTH, EMULATED_HEIGHT, setHeadless } from "./cdp/emulation.js";
10
+ import { ToolRegistry } from "./registry.js";
11
+ import { injectOverlay, removeOverlay, setTierLabel, setLicenseInfo } from "./overlay/session-overlay.js";
12
+ import { TabStateCache } from "./cache/tab-state-cache.js";
13
+ import { SessionDefaults } from "./cache/session-defaults.js";
14
+ import { a11yTree } from "./cache/a11y-tree.js";
15
+ import { selectorCache } from "./cache/selector-cache.js";
16
+ import { FreeTierLicenseStatus } from "./license/license-status.js";
17
+ import { loadFreeTierConfig } from "./license/free-tier-config.js";
18
+ import { getProHooks } from "./hooks/pro-hooks.js";
19
+ export async function startServer() {
20
+ // 1. Connect to Chrome (Story 1.3: WebSocket first, then Auto-Launch)
21
+ const profilePath = process.env.SILBERCUE_CHROME_PROFILE || undefined;
22
+ const headlessEnv = process.env.SILBERCUE_CHROME_HEADLESS === "true";
23
+ const autoLaunch = resolveAutoLaunch(process.env, headlessEnv);
24
+ const launcher = new ChromeLauncher({ profilePath, headless: headlessEnv, autoLaunch });
25
+ const connection = await launcher.connect();
26
+ const { cdpClient } = connection;
27
+ // Use detected headless from connection (auto-detected from Chrome's /json/version for WebSocket)
28
+ const headless = connection.headless;
29
+ setHeadless(headless);
30
+ if (profilePath) {
31
+ if (connection.transportType === "pipe") {
32
+ console.error(`SilbercueChrome using Chrome profile: ${profilePath}`);
33
+ }
34
+ else {
35
+ console.error(`SilbercueChrome warning: profilePath "${profilePath}" ignored — connected via WebSocket to existing Chrome`);
36
+ }
37
+ }
38
+ // 2. Attach to a page target (browser-level connection needs a page session)
39
+ const { targetInfos } = await cdpClient.send("Target.getTargets");
40
+ let pageTarget = targetInfos.find((t) => t.type === "page");
41
+ if (!pageTarget) {
42
+ const { targetId } = await cdpClient.send("Target.createTarget", {
43
+ url: "about:blank",
44
+ });
45
+ pageTarget = { targetId, type: "page", url: "about:blank" };
46
+ }
47
+ const { sessionId } = await cdpClient.send("Target.attachToTarget", {
48
+ targetId: pageTarget.targetId,
49
+ flatten: true,
50
+ });
51
+ // 3. Activate CDP domains on the page session
52
+ await cdpClient.send("Runtime.enable", {}, sessionId);
53
+ await cdpClient.send("Page.enable", {}, sessionId);
54
+ await cdpClient.send("Page.setLifecycleEventsEnabled", { enabled: true }, sessionId);
55
+ await cdpClient.send("Accessibility.enable", {}, sessionId);
56
+ // BUG-015 fix: Keep renderer alive when window is occluded on macOS.
57
+ // setFocusEmulationEnabled calls WebContents::IncrementCapturerCount(stay_hidden=false),
58
+ // which keeps visible_capturer_count_ > 0 → renderer stays in kVisible state.
59
+ if (!headless) {
60
+ await cdpClient.send("Emulation.setFocusEmulationEnabled", { enabled: true }, sessionId);
61
+ }
62
+ if (headless) {
63
+ await cdpClient.send("Emulation.setDeviceMetricsOverride", DEVICE_METRICS_OVERRIDE, sessionId);
64
+ }
65
+ else {
66
+ // Headed mode: resize browser window instead of emulating viewport.
67
+ // Emulation.setDeviceMetricsOverride causes a gray bar below the content.
68
+ try {
69
+ const { windowId } = await cdpClient.send("Browser.getWindowForTarget", { targetId: pageTarget.targetId });
70
+ await cdpClient.send("Browser.setWindowBounds", {
71
+ windowId,
72
+ bounds: { width: EMULATED_WIDTH, height: EMULATED_HEIGHT + 85 }, // +85 for Chrome UI (tabs, address bar)
73
+ });
74
+ }
75
+ catch {
76
+ // Fallback to emulation if Browser.setWindowBounds fails (e.g. remote connection)
77
+ await cdpClient.send("Emulation.setDeviceMetricsOverride", DEVICE_METRICS_OVERRIDE, sessionId);
78
+ }
79
+ }
80
+ // 3b. Inject session overlay (visual indicator for controlled tab)
81
+ await injectOverlay(cdpClient, sessionId);
82
+ // 4. Create TabStateCache and attach to CDP events
83
+ const tabStateCache = new TabStateCache({ ttlMs: 30_000 });
84
+ tabStateCache.setActiveTarget(pageTarget.targetId);
85
+ tabStateCache.attachToClient(cdpClient, sessionId);
86
+ // 4b. Create SessionManager for OOPIF support
87
+ const sessionManager = new SessionManager(cdpClient, sessionId);
88
+ // H1: Wire up OOPIF detach callback to clean A11yTreeProcessor ref-maps
89
+ sessionManager.onOopifDetach((detachedSessionId) => {
90
+ a11yTree.removeNodesForSession(detachedSessionId);
91
+ });
92
+ await sessionManager.init();
93
+ // 4c. Create DialogHandler for automatic dialog handling (Story 6.1)
94
+ const dialogHandler = new DialogHandler(cdpClient, sessionId);
95
+ dialogHandler.init();
96
+ // 4d. Create ConsoleCollector for console log buffering (Story 7.1)
97
+ const consoleCollector = new ConsoleCollector(cdpClient, sessionId);
98
+ consoleCollector.init();
99
+ // 4e. Create NetworkCollector for network request monitoring (Story 7.2)
100
+ // NOT started here — on-demand via action: "start"
101
+ const networkCollector = new NetworkCollector(cdpClient, sessionId);
102
+ // 4f. Create SessionDefaults for session parameter defaults (Story 7.3)
103
+ const sessionDefaults = new SessionDefaults();
104
+ // 4g. Create DomWatcher for precomputed A11y-Tree (Story 7.4)
105
+ const domWatcher = new DomWatcher(cdpClient, sessionId, { debounceMs: 500 });
106
+ domWatcher.onRefresh(async () => {
107
+ await a11yTree.refreshPrecomputed(cdpClient, sessionId, sessionManager);
108
+ // Update selector cache fingerprint after tree refresh (Story 7.5)
109
+ const urlResult = await cdpClient.send("Runtime.evaluate", { expression: "document.URL", returnByValue: true }, sessionId);
110
+ const fp = selectorCache.computeFingerprint(urlResult.result.value, a11yTree.refCount);
111
+ selectorCache.updateFingerprint(fp);
112
+ });
113
+ domWatcher.onInvalidate(() => {
114
+ a11yTree.invalidatePrecomputed();
115
+ // H2 fix: Invalidate selector cache on navigation (not on every DOM mutation).
116
+ // DOM mutations use fingerprint mismatch for self-healing instead.
117
+ selectorCache.invalidate();
118
+ });
119
+ // BUG-010: Invalidate precomputed A11y-tree immediately on DOM mutations
120
+ // (selector cache uses fingerprint self-healing, so only A11y cache needs immediate invalidation)
121
+ domWatcher.onMutationInvalidate(() => {
122
+ a11yTree.invalidatePrecomputed();
123
+ });
124
+ await domWatcher.init();
125
+ // 6. Create MCP server and register tools
126
+ const server = new McpServer({
127
+ name: "silbercuechrome",
128
+ version: "0.1.0",
129
+ }, {
130
+ instructions: [
131
+ "SilbercueChrome controls a real Chrome browser via CDP.",
132
+ "",
133
+ "Workflow: virtual_desk → switch_tab (or navigate) → read_page → click/type/fill_form using refs.",
134
+ "",
135
+ "Token-efficiency rules:",
136
+ "- read_page (accessibility tree with refs like 'e5') is 10-30x cheaper than screenshot.",
137
+ "- Screenshots CANNOT drive click/type — only read_page returns usable element refs.",
138
+ "- fill_form beats multiple type calls for any form with 2+ fields.",
139
+ "- evaluate is a last resort — prefer read_page, click, type, fill_form, observe.",
140
+ ].join("\n"),
141
+ });
142
+ // Story 15.5: License status via ProHooks (Pro-Repo injiziert LicenseValidator)
143
+ const hooks = getProHooks();
144
+ let licenseStatus = new FreeTierLicenseStatus();
145
+ if (hooks.provideLicenseStatus) {
146
+ try {
147
+ licenseStatus = await hooks.provideLicenseStatus();
148
+ }
149
+ catch {
150
+ // Fallback to Free Tier
151
+ }
152
+ }
153
+ const freeTierConfig = loadFreeTierConfig();
154
+ // Set overlay tier label and license info
155
+ setTierLabel(licenseStatus.isPro());
156
+ setLicenseInfo(undefined, undefined, undefined);
157
+ // Story 13a.2: Pass waitForAXChange callback to Registry for post-click detection
158
+ const registry = new ToolRegistry(server, cdpClient, sessionId, tabStateCache, () => connection.status, sessionManager, dialogHandler, licenseStatus, freeTierConfig, consoleCollector, networkCollector, sessionDefaults, (ms) => domWatcher.waitForAXChange(ms));
159
+ registry.registerAll();
160
+ // 5. Register reconnect handler for automatic re-wiring (Story 5.2)
161
+ // H1 fix: Registered AFTER registry creation to avoid TDZ reference
162
+ connection.onReconnect(async (reconn) => {
163
+ const newCdpClient = reconn.cdpClient;
164
+ // 1. Attach to page target (same as initial startup)
165
+ const { targetInfos: newTargets } = await newCdpClient.send("Target.getTargets");
166
+ let newPageTarget = newTargets.find((t) => t.type === "page");
167
+ if (!newPageTarget) {
168
+ const { targetId } = await newCdpClient.send("Target.createTarget", { url: "about:blank" });
169
+ newPageTarget = { targetId, type: "page", url: "about:blank" };
170
+ }
171
+ const { sessionId: newSessionId } = await newCdpClient.send("Target.attachToTarget", {
172
+ targetId: newPageTarget.targetId,
173
+ flatten: true,
174
+ });
175
+ // 2. Enable CDP domains on the new session
176
+ await newCdpClient.send("Runtime.enable", {}, newSessionId);
177
+ await newCdpClient.send("Page.enable", {}, newSessionId);
178
+ await newCdpClient.send("Page.setLifecycleEventsEnabled", { enabled: true }, newSessionId);
179
+ await newCdpClient.send("Accessibility.enable", {}, newSessionId);
180
+ // BUG-015 fix: Keep renderer alive on reconnect (same as initial setup)
181
+ if (!headless) {
182
+ await newCdpClient.send("Emulation.setFocusEmulationEnabled", { enabled: true }, newSessionId);
183
+ }
184
+ if (headless) {
185
+ await newCdpClient.send("Emulation.setDeviceMetricsOverride", DEVICE_METRICS_OVERRIDE, newSessionId);
186
+ }
187
+ // 2b. Re-inject session overlay after reconnect
188
+ await injectOverlay(newCdpClient, newSessionId);
189
+ // 3. Re-wire TabStateCache: detach from old, attach to new
190
+ tabStateCache.detachFromClient();
191
+ tabStateCache.setActiveTarget(newPageTarget.targetId);
192
+ tabStateCache.attachToClient(newCdpClient, newSessionId);
193
+ // 4. Re-wire ToolRegistry
194
+ registry.updateClient(newCdpClient, newSessionId);
195
+ // 5. Re-initialize SessionManager for OOPIF support
196
+ await sessionManager.reinit(newCdpClient, newSessionId);
197
+ // 6. Re-initialize DialogHandler for dialog handling
198
+ dialogHandler.reinit(newCdpClient, newSessionId);
199
+ // 7. Re-initialize ConsoleCollector for console log buffering
200
+ consoleCollector.reinit(newCdpClient, newSessionId);
201
+ // 8. Re-initialize NetworkCollector for network request monitoring
202
+ networkCollector.reinit(newCdpClient, newSessionId);
203
+ // 9. Re-initialize DomWatcher for precomputed A11y-Tree (Story 7.4)
204
+ // H3: Rebind callbacks BEFORE reinit() to avoid race condition where
205
+ // reinit()->init() fires events that invoke stale closures
206
+ a11yTree.invalidatePrecomputed();
207
+ // 10. Invalidate SelectorCache on reconnect (Story 7.5)
208
+ selectorCache.invalidate();
209
+ domWatcher.onRefresh(async () => {
210
+ await a11yTree.refreshPrecomputed(newCdpClient, newSessionId, sessionManager);
211
+ // Update selector cache fingerprint after tree refresh (Story 7.5)
212
+ const urlResult = await newCdpClient.send("Runtime.evaluate", { expression: "document.URL", returnByValue: true }, newSessionId);
213
+ const fp = selectorCache.computeFingerprint(urlResult.result.value, a11yTree.refCount);
214
+ selectorCache.updateFingerprint(fp);
215
+ });
216
+ domWatcher.onInvalidate(() => {
217
+ a11yTree.invalidatePrecomputed();
218
+ selectorCache.invalidate();
219
+ });
220
+ // BUG-010: Rebind mutation invalidation callback on reconnect
221
+ domWatcher.onMutationInvalidate(() => {
222
+ a11yTree.invalidatePrecomputed();
223
+ });
224
+ await domWatcher.reinit(newCdpClient, newSessionId);
225
+ });
226
+ // 7. Start stdio transport
227
+ const transport = new StdioServerTransport();
228
+ await server.connect(transport);
229
+ console.error("SilbercueChrome MCP server running on stdio");
230
+ // 8. Graceful shutdown
231
+ const shutdown = async () => {
232
+ await removeOverlay(cdpClient, sessionId).catch(() => { });
233
+ domWatcher.detach();
234
+ networkCollector.detach();
235
+ consoleCollector.detach();
236
+ dialogHandler.detach();
237
+ sessionManager.detach();
238
+ tabStateCache.detachFromClient();
239
+ await server.close();
240
+ await connection.close();
241
+ process.exit(0);
242
+ };
243
+ process.on("SIGINT", shutdown);
244
+ process.on("SIGTERM", shutdown);
245
+ }
@@ -0,0 +1,34 @@
1
+ import { z } from "zod";
2
+ import type { CdpClient } from "../cdp/cdp-client.js";
3
+ import type { SessionManager } from "../cdp/session-manager.js";
4
+ import type { ToolResponse } from "../types.js";
5
+ export declare const clickSchema: z.ZodObject<{
6
+ ref: z.ZodOptional<z.ZodString>;
7
+ selector: z.ZodOptional<z.ZodString>;
8
+ text: z.ZodOptional<z.ZodString>;
9
+ x: z.ZodOptional<z.ZodNumber>;
10
+ y: z.ZodOptional<z.ZodNumber>;
11
+ }, "strip", z.ZodTypeAny, {
12
+ text?: string | undefined;
13
+ x?: number | undefined;
14
+ y?: number | undefined;
15
+ ref?: string | undefined;
16
+ selector?: string | undefined;
17
+ }, {
18
+ text?: string | undefined;
19
+ x?: number | undefined;
20
+ y?: number | undefined;
21
+ ref?: string | undefined;
22
+ selector?: string | undefined;
23
+ }>;
24
+ export type ClickParams = z.infer<typeof clickSchema>;
25
+ export type ClickMethod = "cdp" | "js-rect" | "js-click" | "coordinates";
26
+ /**
27
+ * Story 16.5: Optional human-mouse-move callback injected via the
28
+ * `enhanceTool` Pro-Hook. When present, this replaces the raw
29
+ * `Input.dispatchMouseEvent("mouseMoved",...)` with a Bezier-curve mouse
30
+ * movement from the Pro-Repo Human Touch module. The Free-Repo does NOT
31
+ * contain any Human-Touch logic — it only knows how to delegate.
32
+ */
33
+ export type HumanMouseMoveFn = (cdpClient: CdpClient, sessionId: string, fromX: number, fromY: number, toX: number, toY: number) => Promise<void>;
34
+ export declare function clickHandler(params: ClickParams, cdpClient: CdpClient, sessionId?: string, sessionManager?: SessionManager): Promise<ToolResponse>;
@@ -0,0 +1,293 @@
1
+ import { z } from "zod";
2
+ import { resolveElement, buildRefNotFoundError, RefNotFoundError } from "./element-utils.js";
3
+ import { wrapCdpError } from "./error-utils.js";
4
+ import { a11yTree } from "../cache/a11y-tree.js";
5
+ import { isHeadless } from "../cdp/emulation.js";
6
+ // --- Schema (Task 2) ---
7
+ export const clickSchema = z.object({
8
+ ref: z
9
+ .string()
10
+ .optional()
11
+ .describe("A11y-Tree element ref (e.g. 'e5') — preferred over selector"),
12
+ selector: z
13
+ .string()
14
+ .optional()
15
+ .describe("CSS selector (e.g. '#submit-btn') — fallback when ref is not available"),
16
+ text: z
17
+ .string()
18
+ .optional()
19
+ .describe("Visible text to match (e.g. 'Submit'). Finds element by name in the A11y tree — no prior read_page needed. Prefers interactive elements (buttons, links)."),
20
+ x: z
21
+ .number()
22
+ .optional()
23
+ .describe("X coordinate (viewport pixels) — for canvas or pixel-precise clicks. Use with y instead of ref/selector."),
24
+ y: z
25
+ .number()
26
+ .optional()
27
+ .describe("Y coordinate (viewport pixels) — for canvas or pixel-precise clicks. Use with x instead of ref/selector."),
28
+ });
29
+ async function dispatchClick(cdpClient, sessionId, backendNodeId, objectId, humanMouseMove) {
30
+ // Step 1: Reset scroll to origin before clicking.
31
+ // When Emulation.setDeviceMetricsOverride is active, Input.dispatchMouseEvent
32
+ // hit-tests at document coordinates (viewport + scrollY) instead of viewport
33
+ // coordinates. Scrolling to 0 ensures viewport coords = document coords.
34
+ await cdpClient.send("Runtime.evaluate", { expression: "window.scrollTo(0,0)" }, sessionId);
35
+ // Step 2: Scroll element into view (from scroll 0)
36
+ await cdpClient.send("DOM.scrollIntoViewIfNeeded", { backendNodeId }, sessionId);
37
+ // Step 3: Get viewport-relative center — try getContentQuads, fallback chain
38
+ let x;
39
+ let y;
40
+ let clickMethod = "cdp";
41
+ try {
42
+ const quadsResult = await cdpClient.send("DOM.getContentQuads", { backendNodeId }, sessionId);
43
+ if (!quadsResult.quads || quadsResult.quads.length === 0) {
44
+ throw new Error("Element has no visible layout quads");
45
+ }
46
+ // Quad is [x1,y1, x2,y2, x3,y3, x4,y4] — average all 4 corners for center
47
+ const q = quadsResult.quads[0];
48
+ x = (q[0] + q[2] + q[4] + q[6]) / 4;
49
+ y = (q[1] + q[3] + q[5] + q[7]) / 4;
50
+ }
51
+ catch {
52
+ // Fallback 1: getBoundingClientRect via Runtime.callFunctionOn
53
+ // Handles Shadow-DOM nodes and post-mutation stale layouts (BUG-005, BUG-007, BUG-012)
54
+ try {
55
+ const rectResult = await cdpClient.send("Runtime.callFunctionOn", {
56
+ functionDeclaration: `function() {
57
+ var rect = this.getBoundingClientRect();
58
+ return { x: rect.left + rect.width / 2, y: rect.top + rect.height / 2 };
59
+ }`,
60
+ objectId,
61
+ returnByValue: true,
62
+ }, sessionId);
63
+ x = rectResult.result.value.x;
64
+ y = rectResult.result.value.y;
65
+ clickMethod = "js-rect";
66
+ }
67
+ catch {
68
+ // Fallback 2: Pure JS click — no coordinates needed
69
+ await cdpClient.send("Runtime.callFunctionOn", {
70
+ functionDeclaration: `function() { this.click(); }`,
71
+ objectId,
72
+ returnByValue: false,
73
+ }, sessionId);
74
+ return { method: "js-click", x: 0, y: 0 };
75
+ }
76
+ }
77
+ // Step 4: Dispatch mouse events — mouseMoved → mousePressed → mouseReleased
78
+ // mouseMoved establishes mouseenter/mouseover context (BUG-002)
79
+ // Story 16.5: If humanMouseMove callback is injected (via Pro-Hook),
80
+ // delegate the mouse-move sequence to it. Otherwise: raw CDP dispatch.
81
+ if (humanMouseMove) {
82
+ await humanMouseMove(cdpClient, sessionId, 0, 0, x, y);
83
+ }
84
+ else {
85
+ await cdpClient.send("Input.dispatchMouseEvent", { type: "mouseMoved", x, y, button: "none", buttons: 0 }, sessionId);
86
+ }
87
+ await cdpClient.send("Input.dispatchMouseEvent", { type: "mousePressed", x, y, button: "left", buttons: 1, clickCount: 1 }, sessionId);
88
+ await cdpClient.send("Input.dispatchMouseEvent", { type: "mouseReleased", x, y, button: "left", buttons: 0, clickCount: 1 }, sessionId);
89
+ return { method: clickMethod, x, y };
90
+ }
91
+ // --- Main handler (Task 6) ---
92
+ export async function clickHandler(params, cdpClient, sessionId, sessionManager) {
93
+ const start = performance.now();
94
+ // Story 16.5: Extract optional humanMouseMove callback injected by the
95
+ // `enhanceTool` Pro-Hook. The field is NOT part of the Zod schema — it is
96
+ // read from the raw params map via type-guard and stripped from the params
97
+ // object before downstream code uses it (so it never leaks into CDP calls
98
+ // or schema-validation paths).
99
+ const rawParams = params;
100
+ const maybeHuman = rawParams.humanMouseMove;
101
+ const humanMouseMove = typeof maybeHuman === "function" ? maybeHuman : undefined;
102
+ if ("humanMouseMove" in rawParams) {
103
+ const { humanMouseMove: _humanMouseMove, ...rest } = rawParams;
104
+ void _humanMouseMove;
105
+ params = rest;
106
+ }
107
+ // FR-D: Coordinate-based click — skip element resolution entirely
108
+ if (params.x !== undefined && params.y !== undefined) {
109
+ try {
110
+ const x = params.x;
111
+ const y = params.y;
112
+ // Snapshot tab count before click (FR-E: new tab detection)
113
+ let beforeTabIds;
114
+ try {
115
+ const { targetInfos } = await cdpClient.send("Target.getTargets");
116
+ beforeTabIds = new Set(targetInfos.filter(t => t.type === "page").map(t => t.targetId));
117
+ }
118
+ catch { /* non-critical */ }
119
+ // FR-01: Auto-scroll + headless adjustment in a single atomic evaluate.
120
+ // If coordinates exceed viewport dimensions, the LLM passed document/page
121
+ // coordinates (e.g. from a full_page screenshot). Scroll to center the target,
122
+ // then return the final scroll position for coordinate adjustment.
123
+ const snap = await cdpClient.send("Runtime.evaluate", {
124
+ expression: `((x,y)=>{const w=window.innerWidth,h=window.innerHeight,oob=x<0||y<0||x>=w||y>=h;if(oob)window.scrollTo(Math.max(0,x-Math.round(w/2)),Math.max(0,y-Math.round(h/2)));return{sx:Math.round(window.scrollX),sy:Math.round(window.scrollY),w,h,oob}})(${x},${y})`,
125
+ returnByValue: true,
126
+ }, sessionId);
127
+ const { sx, sy, w: vw, h: vh, oob: autoScrolled } = snap.result.value;
128
+ // Viewport-relative coordinates (subtract scroll offset applied by auto-scroll)
129
+ const viewportX = autoScrolled ? x - sx : x;
130
+ const viewportY = autoScrolled ? y - sy : y;
131
+ // FR-01 fallback: if viewport coords are still out of bounds after scroll
132
+ // (page shorter than expected, overflow:hidden, etc.), warn immediately.
133
+ if (autoScrolled && (viewportX < 0 || viewportY < 0 || viewportX >= vw || viewportY >= vh)) {
134
+ const elapsedMs = Math.round(performance.now() - start);
135
+ return {
136
+ content: [{ type: "text", text: `click at (${x}, ${y}) failed: coordinates are outside page bounds (page scrolled to ${sy}px but target is at ${y}px). The page may be shorter than expected — use read_page or screenshot to verify element positions.` }],
137
+ isError: true,
138
+ _meta: { elapsedMs, method: "click", clickMethod: "coordinates", autoScrolled },
139
+ };
140
+ }
141
+ // FR-H: Headless mode — Emulation.setDeviceMetricsOverride causes hit-testing
142
+ // at document coords, so add scroll offset to convert viewport → document space.
143
+ let dispatchX = viewportX;
144
+ let dispatchY = viewportY;
145
+ if (isHeadless()) {
146
+ dispatchX += sx;
147
+ dispatchY += sy;
148
+ }
149
+ // Dispatch mouse events at coordinates via CDP
150
+ // Story 16.5: If humanMouseMove callback is injected, delegate the move.
151
+ if (humanMouseMove) {
152
+ await humanMouseMove(cdpClient, sessionId, 0, 0, dispatchX, dispatchY);
153
+ }
154
+ else {
155
+ await cdpClient.send("Input.dispatchMouseEvent", { type: "mouseMoved", x: dispatchX, y: dispatchY, button: "none", buttons: 0 }, sessionId);
156
+ }
157
+ await cdpClient.send("Input.dispatchMouseEvent", { type: "mousePressed", x: dispatchX, y: dispatchY, button: "left", buttons: 1, clickCount: 1 }, sessionId);
158
+ await cdpClient.send("Input.dispatchMouseEvent", { type: "mouseReleased", x: dispatchX, y: dispatchY, button: "left", buttons: 0, clickCount: 1 }, sessionId);
159
+ // FR-E: Check for new tabs after click
160
+ const newTabHint = await detectNewTab(cdpClient, beforeTabIds);
161
+ const elapsedMs = Math.round(performance.now() - start);
162
+ const scrollHint = autoScrolled ? ` (auto-scrolled from page position)` : "";
163
+ return {
164
+ content: [{ type: "text", text: `Clicked at (${x}, ${y})${scrollHint}${newTabHint}` }],
165
+ _meta: { elapsedMs, method: "click", clickMethod: "coordinates", autoScrolled },
166
+ };
167
+ }
168
+ catch (err) {
169
+ const elapsedMs = Math.round(performance.now() - start);
170
+ return {
171
+ content: [{ type: "text", text: wrapCdpError(err, "click", `(${params.x}, ${params.y})`) }],
172
+ isError: true,
173
+ _meta: { elapsedMs, method: "click" },
174
+ };
175
+ }
176
+ }
177
+ // UX-001: Resolve text to ref before validation
178
+ if (params.text && !params.ref && !params.selector) {
179
+ // Ensure a11y tree is populated — fetch fresh if needed
180
+ if (!a11yTree.hasRefs()) {
181
+ try {
182
+ await a11yTree.getTree(cdpClient, sessionId, { depth: 3, filter: "interactive", fresh: true }, sessionManager);
183
+ }
184
+ catch { /* best-effort — findByText will return null */ }
185
+ }
186
+ const match = a11yTree.findByText(params.text);
187
+ if (match) {
188
+ params.ref = match.ref;
189
+ }
190
+ else {
191
+ const elements = a11yTree.getInteractiveElements(8);
192
+ const hint = elements.length > 0
193
+ ? "\nAvailable interactive elements:\n " + elements.join("\n ")
194
+ : "\nNo interactive elements found — try read_page first.";
195
+ return {
196
+ content: [{ type: "text", text: `No element found with text "${params.text}".${hint}` }],
197
+ isError: true,
198
+ _meta: { elapsedMs: Math.round(performance.now() - start), method: "click" },
199
+ };
200
+ }
201
+ }
202
+ // Validation (Task 2.4)
203
+ if (!params.ref && !params.selector) {
204
+ return {
205
+ content: [
206
+ {
207
+ type: "text",
208
+ text: "click requires either 'ref' (e.g. 'e5'), 'selector' (e.g. '#submit-btn'), 'text' (e.g. 'Submit'), or coordinates (x + y)",
209
+ },
210
+ ],
211
+ isError: true,
212
+ _meta: { elapsedMs: 0, method: "click" },
213
+ };
214
+ }
215
+ try {
216
+ // FR-E: Snapshot tab count before click (new tab detection)
217
+ let beforeTabIds;
218
+ try {
219
+ const { targetInfos } = await cdpClient.send("Target.getTargets");
220
+ beforeTabIds = new Set(targetInfos.filter(t => t.type === "page").map(t => t.targetId));
221
+ }
222
+ catch { /* non-critical */ }
223
+ // Resolve element via shared utility (with OOPIF routing)
224
+ const target = params.ref ? { ref: params.ref } : { selector: params.selector };
225
+ const element = await resolveElement(cdpClient, sessionId, target, sessionManager);
226
+ // Dispatch click using the resolved session (may be OOPIF or main)
227
+ const clickResult = await dispatchClick(cdpClient, element.resolvedSessionId, element.backendNodeId, element.objectId, humanMouseMove);
228
+ // FR-E: Check for new tabs after click
229
+ const newTabHint = await detectNewTab(cdpClient, beforeTabIds);
230
+ // Story 13a.2: Classify clicked element for ambient context decision
231
+ const elementClass = params.ref ? a11yTree.classifyRef(params.ref) : "clickable";
232
+ const elapsedMs = Math.round(performance.now() - start);
233
+ const suffix = clickResult.method !== "cdp" ? `, fallback: ${clickResult.method}` : "";
234
+ return {
235
+ content: [
236
+ {
237
+ type: "text",
238
+ text: `Clicked ${params.ref ?? params.selector} (${element.resolvedVia}${suffix})${newTabHint}`,
239
+ },
240
+ ],
241
+ _meta: {
242
+ elapsedMs,
243
+ method: "click",
244
+ resolvedVia: element.resolvedVia,
245
+ clickMethod: clickResult.method,
246
+ clickX: clickResult.x,
247
+ clickY: clickResult.y,
248
+ elementClass,
249
+ },
250
+ };
251
+ }
252
+ catch (err) {
253
+ if (err instanceof RefNotFoundError && params.ref) {
254
+ const errorText = buildRefNotFoundError(params.ref);
255
+ return {
256
+ content: [{ type: "text", text: errorText }],
257
+ isError: true,
258
+ _meta: { elapsedMs: 0, method: "click" },
259
+ };
260
+ }
261
+ const elapsedMs = Math.round(performance.now() - start);
262
+ const elementHint = params.ref ?? params.selector;
263
+ let errorText = wrapCdpError(err, "click", elementHint);
264
+ // FR-008: When CSS selector not found, suggest available interactive elements
265
+ const message = err instanceof Error ? err.message : String(err);
266
+ if (params.selector && message.includes("Element not found for selector")) {
267
+ const elements = a11yTree.getInteractiveElements(8);
268
+ if (elements.length > 0) {
269
+ errorText += "\nAvailable interactive elements:\n " + elements.join("\n ");
270
+ }
271
+ }
272
+ return {
273
+ content: [{ type: "text", text: errorText }],
274
+ isError: true,
275
+ _meta: { elapsedMs, method: "click" },
276
+ };
277
+ }
278
+ }
279
+ // --- FR-E: New tab detection ---
280
+ async function detectNewTab(cdpClient, beforeTabIds) {
281
+ if (!beforeTabIds)
282
+ return "";
283
+ try {
284
+ const { targetInfos } = await cdpClient.send("Target.getTargets");
285
+ const newTabs = targetInfos.filter(t => t.type === "page" && !beforeTabIds.has(t.targetId));
286
+ if (newTabs.length > 0) {
287
+ const tab = newTabs[0];
288
+ return `\n⮕ New tab opened: ${tab.url || "about:blank"} — use switch_tab to access it`;
289
+ }
290
+ }
291
+ catch { /* non-critical */ }
292
+ return "";
293
+ }
@@ -0,0 +1,15 @@
1
+ import { z } from "zod";
2
+ import type { SessionDefaults } from "../cache/session-defaults.js";
3
+ import type { ToolResponse } from "../types.js";
4
+ export declare const configureSessionSchema: z.ZodObject<{
5
+ defaults: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
6
+ autoPromote: z.ZodOptional<z.ZodBoolean>;
7
+ }, "strip", z.ZodTypeAny, {
8
+ defaults?: Record<string, unknown> | undefined;
9
+ autoPromote?: boolean | undefined;
10
+ }, {
11
+ defaults?: Record<string, unknown> | undefined;
12
+ autoPromote?: boolean | undefined;
13
+ }>;
14
+ export type ConfigureSessionParams = z.infer<typeof configureSessionSchema>;
15
+ export declare function configureSessionHandler(params: ConfigureSessionParams, sessionDefaults: SessionDefaults): Promise<ToolResponse>;
@@ -0,0 +1,45 @@
1
+ import { z } from "zod";
2
+ export const configureSessionSchema = z.object({
3
+ defaults: z.record(z.unknown())
4
+ .optional()
5
+ .describe("Set session defaults. Keys: param names (tab, timeout, etc.). Values: default values. null removes a default."),
6
+ autoPromote: z.boolean()
7
+ .optional()
8
+ .describe("If true, apply all current auto-promote suggestions as defaults"),
9
+ });
10
+ export async function configureSessionHandler(params, sessionDefaults) {
11
+ const start = performance.now();
12
+ // H4 fix: Process defaults and autoPromote independently (no early return)
13
+ let applied;
14
+ // defaults gesetzt → Defaults aktualisieren
15
+ if (params.defaults) {
16
+ for (const [key, value] of Object.entries(params.defaults)) {
17
+ sessionDefaults.setDefault(key, value);
18
+ }
19
+ }
20
+ // autoPromote: true → alle Vorschlaege als Defaults uebernehmen
21
+ if (params.autoPromote) {
22
+ applied = sessionDefaults.applyAllSuggestions();
23
+ }
24
+ // Build response based on what was requested
25
+ if (params.defaults !== undefined || params.autoPromote) {
26
+ const payload = {
27
+ defaults: sessionDefaults.getAllDefaults(),
28
+ };
29
+ if (applied !== undefined) {
30
+ payload.applied = applied;
31
+ }
32
+ return {
33
+ content: [{ type: "text", text: JSON.stringify(payload) }],
34
+ _meta: { elapsedMs: Math.round(performance.now() - start), method: "configure_session" },
35
+ };
36
+ }
37
+ // Keine Parameter → aktuelle Defaults + Vorschlaege abfragen
38
+ return {
39
+ content: [{ type: "text", text: JSON.stringify({
40
+ defaults: sessionDefaults.getAllDefaults(),
41
+ autoPromote: sessionDefaults.getSuggestions(),
42
+ }) }],
43
+ _meta: { elapsedMs: Math.round(performance.now() - start), method: "configure_session" },
44
+ };
45
+ }
@@ -0,0 +1,18 @@
1
+ import { z } from "zod";
2
+ import type { ConsoleCollector } from "../cdp/console-collector.js";
3
+ import type { ToolResponse } from "../types.js";
4
+ export declare const consoleLogsSchema: z.ZodObject<{
5
+ level: z.ZodOptional<z.ZodEnum<["info", "warning", "error", "debug"]>>;
6
+ pattern: z.ZodOptional<z.ZodString>;
7
+ clear: z.ZodDefault<z.ZodOptional<z.ZodBoolean>>;
8
+ }, "strip", z.ZodTypeAny, {
9
+ clear: boolean;
10
+ level?: "error" | "info" | "warning" | "debug" | undefined;
11
+ pattern?: string | undefined;
12
+ }, {
13
+ level?: "error" | "info" | "warning" | "debug" | undefined;
14
+ clear?: boolean | undefined;
15
+ pattern?: string | undefined;
16
+ }>;
17
+ export type ConsoleLogsParams = z.infer<typeof consoleLogsSchema>;
18
+ export declare function consoleLogsHandler(params: ConsoleLogsParams, consoleCollector: ConsoleCollector): Promise<ToolResponse>;