screenhand 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. package/LICENSE +661 -21
  2. package/README.md +208 -38
  3. package/dist/.audit-log.jsonl +55 -0
  4. package/dist/.screenhand/memory/.lock +1 -0
  5. package/dist/.screenhand/memory/actions.jsonl +85 -0
  6. package/dist/.screenhand/memory/errors.jsonl +5 -0
  7. package/dist/.screenhand/memory/errors.jsonl.bak +4 -0
  8. package/dist/.screenhand/memory/state.json +35 -0
  9. package/dist/.screenhand/memory/state.json.bak +35 -0
  10. package/dist/.screenhand/memory/strategies.jsonl +12 -0
  11. package/dist/agent/cli.js +73 -0
  12. package/dist/agent/loop.js +258 -0
  13. package/dist/index.js +1 -0
  14. package/dist/mcp/mcp-stdio-server.js +164 -0
  15. package/dist/mcp-desktop.js +2731 -0
  16. package/dist/mcp-entry.js +7 -10
  17. package/dist/monitor/codex-monitor.js +377 -0
  18. package/dist/monitor/task-queue.js +84 -0
  19. package/dist/monitor/types.js +49 -0
  20. package/dist/native/bridge-client.js +2 -1
  21. package/dist/npm-publish-helper.js +117 -0
  22. package/dist/npm-token-cdp.js +113 -0
  23. package/dist/npm-token-create.js +135 -0
  24. package/dist/npm-token-finish.js +126 -0
  25. package/dist/playbook/engine.js +193 -0
  26. package/dist/playbook/index.js +4 -0
  27. package/dist/playbook/recorder.js +519 -0
  28. package/dist/playbook/runner.js +392 -0
  29. package/dist/playbook/store.js +166 -0
  30. package/dist/playbook/types.js +4 -0
  31. package/dist/scripts/codex-monitor-daemon.js +335 -0
  32. package/dist/scripts/supervisor-daemon.js +272 -0
  33. package/dist/scripts/worker-daemon.js +228 -0
  34. package/dist/src/agent/cli.js +82 -0
  35. package/dist/src/agent/loop.js +274 -0
  36. package/dist/src/config.js +25 -0
  37. package/dist/src/index.js +72 -0
  38. package/dist/src/jobs/manager.js +237 -0
  39. package/dist/src/jobs/runner.js +683 -0
  40. package/dist/src/jobs/store.js +102 -0
  41. package/dist/src/jobs/types.js +30 -0
  42. package/dist/src/jobs/worker.js +97 -0
  43. package/dist/src/logging/timeline-logger.js +45 -0
  44. package/dist/src/mcp/mcp-stdio-server.js +464 -0
  45. package/dist/src/mcp/server.js +363 -0
  46. package/dist/src/mcp-entry.js +60 -0
  47. package/dist/src/memory/recall.js +170 -0
  48. package/dist/src/memory/research.js +104 -0
  49. package/dist/src/memory/seeds.js +101 -0
  50. package/dist/src/memory/service.js +421 -0
  51. package/dist/src/memory/session.js +169 -0
  52. package/dist/src/memory/store.js +422 -0
  53. package/dist/src/memory/types.js +17 -0
  54. package/dist/src/monitor/codex-monitor.js +382 -0
  55. package/dist/src/monitor/task-queue.js +97 -0
  56. package/dist/src/monitor/types.js +62 -0
  57. package/dist/src/native/bridge-client.js +190 -0
  58. package/dist/src/native/macos-bridge-client.js +21 -0
  59. package/dist/src/playbook/engine.js +201 -0
  60. package/dist/src/playbook/index.js +20 -0
  61. package/dist/src/playbook/recorder.js +535 -0
  62. package/dist/src/playbook/runner.js +408 -0
  63. package/dist/src/playbook/store.js +183 -0
  64. package/dist/src/playbook/types.js +17 -0
  65. package/dist/src/runtime/accessibility-adapter.js +393 -0
  66. package/dist/src/runtime/app-adapter.js +64 -0
  67. package/dist/src/runtime/applescript-adapter.js +299 -0
  68. package/dist/src/runtime/ax-role-map.js +96 -0
  69. package/dist/src/runtime/browser-adapter.js +52 -0
  70. package/dist/src/runtime/cdp-chrome-adapter.js +521 -0
  71. package/dist/src/runtime/composite-adapter.js +221 -0
  72. package/dist/src/runtime/execution-contract.js +159 -0
  73. package/dist/src/runtime/executor.js +266 -0
  74. package/dist/src/runtime/locator-cache.js +28 -0
  75. package/dist/src/runtime/planning-loop.js +63 -0
  76. package/dist/src/runtime/service.js +388 -0
  77. package/dist/src/runtime/session-manager.js +60 -0
  78. package/dist/src/runtime/state-observer.js +121 -0
  79. package/dist/src/runtime/vision-adapter.js +224 -0
  80. package/dist/src/supervisor/locks.js +186 -0
  81. package/dist/src/supervisor/supervisor.js +403 -0
  82. package/dist/src/supervisor/types.js +30 -0
  83. package/dist/src/test-mcp-protocol.js +154 -0
  84. package/dist/src/types.js +17 -0
  85. package/dist/src/util/atomic-write.js +118 -0
  86. package/package.json +12 -9
@@ -0,0 +1,2731 @@
1
+ #!/usr/bin/env npx tsx
2
+ // Copyright (C) 2025 Clazro Technology Private Limited
3
+ // SPDX-License-Identifier: AGPL-3.0-only
4
+ //
5
+ // This file is part of ScreenHand.
6
+ //
7
+ // ScreenHand is free software: you can redistribute it and/or modify
8
+ // it under the terms of the GNU Affero General Public License as
9
+ // published by the Free Software Foundation, version 3.
10
+ //
11
+ // ScreenHand is distributed in the hope that it will be useful,
12
+ // but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
+ // GNU Affero General Public License for more details.
15
+ //
16
+ // You should have received a copy of the GNU Affero General Public License
17
+ // along with ScreenHand. If not, see <https://www.gnu.org/licenses/>.
18
+ /**
19
+ * ScreenHand — MCP Server for Desktop Automation
20
+ * Controls any macOS/Windows app + Chrome browser via CDP.
21
+ *
22
+ * Setup — add to ~/.claude/settings.json or project .mcp.json:
23
+ * {
24
+ * "mcpServers": {
25
+ * "screenhand": {
26
+ * "command": "npx",
27
+ * "args": ["tsx", "/path/to/screenhand/mcp-desktop.ts"]
28
+ * }
29
+ * }
30
+ * }
31
+ */
32
+ import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
33
+ import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
34
+ import { z } from "zod";
35
+ import path from "node:path";
36
+ import { fileURLToPath } from "node:url";
37
+ import { execSync } from "node:child_process";
38
+ import fs from "node:fs";
39
+ import { BridgeClient } from "./src/native/bridge-client.js";
40
+ import { writeFileAtomicSync, readJsonWithRecovery } from "./src/util/atomic-write.js";
41
+ import { MemoryService } from "./src/memory/service.js";
42
+ import { backgroundResearch } from "./src/memory/research.js";
43
+ import { SessionSupervisor, LeaseManager } from "./src/supervisor/supervisor.js";
44
+ import { JobManager } from "./src/jobs/manager.js";
45
+ import { JobRunner } from "./src/jobs/runner.js";
46
+ import { getWorkerLiveStatus, getWorkerDaemonPid, WORKER_LOG_FILE } from "./src/jobs/worker.js";
47
+ import { PlaybookEngine } from "./src/playbook/engine.js";
48
+ import { PlaybookStore } from "./src/playbook/store.js";
49
+ import { AccessibilityAdapter } from "./src/runtime/accessibility-adapter.js";
50
+ import { AutomationRuntimeService } from "./src/runtime/service.js";
51
+ import { TimelineLogger } from "./src/logging/timeline-logger.js";
52
+ import { spawn } from "node:child_process";
53
+ import os from "node:os";
54
+ const __dirname = path.dirname(fileURLToPath(import.meta.url));
55
+ // ── Audit logging for dangerous tools ──
56
+ const AUDIT_LOG_PATH = path.resolve(__dirname, ".audit-log.jsonl");
57
+ function auditLog(tool, params) {
58
+ const entry = {
59
+ timestamp: new Date().toISOString(),
60
+ tool,
61
+ params,
62
+ pid: process.pid,
63
+ };
64
+ try {
65
+ fs.appendFileSync(AUDIT_LOG_PATH, JSON.stringify(entry) + "\n");
66
+ }
67
+ catch {
68
+ // Non-critical — don't crash if log write fails
69
+ }
70
+ }
71
+ const bridgePath = process.platform === "win32"
72
+ ? path.resolve(__dirname, "native/windows-bridge/bin/Release/net8.0-windows/windows-bridge.exe")
73
+ : path.resolve(__dirname, "native/macos-bridge/.build/release/macos-bridge");
74
+ const bridge = new BridgeClient(bridgePath);
75
+ let bridgeReady = false;
76
+ async function ensureBridge() {
77
+ if (!bridgeReady) {
78
+ await bridge.start();
79
+ bridgeReady = true;
80
+ }
81
+ }
82
+ // CDP connection cache
83
+ let cdpPort = null;
84
+ let CDP = null;
85
+ async function ensureCDP() {
86
+ if (!CDP)
87
+ CDP = (await import("chrome-remote-interface")).default;
88
+ if (cdpPort) {
89
+ try {
90
+ await CDP.Version({ port: cdpPort });
91
+ return { CDP, port: cdpPort };
92
+ }
93
+ catch { }
94
+ }
95
+ // Try common ports
96
+ for (const p of [9222, 9223, 9224]) {
97
+ try {
98
+ await CDP.Version({ port: p });
99
+ cdpPort = p;
100
+ return { CDP, port: p };
101
+ }
102
+ catch { }
103
+ }
104
+ throw new Error("Chrome not running with --remote-debugging-port. Launch with: /Applications/Google\\ Chrome.app/Contents/MacOS/Google\\ Chrome --remote-debugging-port=9222 --user-data-dir=/tmp/chrome-debug");
105
+ }
106
+ const server = new McpServer({ name: "screenhand", version: "2.0.0" });
107
+ // ═══════════════════════════════════════════════
108
+ // LEARNING MEMORY — cached, auto-recall, non-blocking
109
+ // ═══════════════════════════════════════════════
110
+ const memory = new MemoryService(__dirname);
111
+ memory.init(); // One-time disk read at startup
112
+ // Supervisor — manages session leases and stall detection
113
+ const supervisor = new SessionSupervisor();
114
+ // Job manager — persistent multi-step automation jobs
115
+ const JOB_DIR = path.join(os.homedir(), ".screenhand", "jobs");
116
+ const jobManager = new JobManager({ jobDir: JOB_DIR, memory, supervisor });
117
+ jobManager.init();
118
+ // Direct lease manager that shares the filesystem lock dir with the daemon
119
+ const LOCK_DIR = path.join(os.homedir(), ".screenhand", "locks");
120
+ const leaseManager = new LeaseManager(LOCK_DIR);
121
+ // Skip logging for memory tools themselves
122
+ const MEMORY_TOOLS = new Set([
123
+ "memory_snapshot", "memory_recall", "memory_save", "memory_record_error",
124
+ "memory_record_learning", "memory_query_patterns", "memory_errors",
125
+ "memory_stats", "memory_clear",
126
+ "session_claim", "session_heartbeat", "session_release",
127
+ "supervisor_status", "supervisor_start", "supervisor_stop", "supervisor_pause", "supervisor_resume",
128
+ "supervisor_install", "supervisor_uninstall",
129
+ "recovery_queue_add", "recovery_queue_list",
130
+ "job_create", "job_status", "job_list", "job_transition",
131
+ "job_step_done", "job_step_fail", "job_resume", "job_dequeue", "job_remove",
132
+ "job_run", "job_run_all",
133
+ "worker_start", "worker_stop", "worker_status",
134
+ ]);
135
+ // Track the strategy we're currently following (for feedback loop)
136
+ let activeStrategyFingerprint = null;
137
+ // Intercept all tool registrations to auto-log + auto-recall
138
+ const originalTool = server.tool.bind(server);
139
+ function extractText(result) {
140
+ if (!result?.content)
141
+ return "";
142
+ return result.content
143
+ .filter((c) => c.type === "text")
144
+ .map((c) => c.text)
145
+ .join("\n")
146
+ .slice(0, 500);
147
+ }
148
+ server.tool = (...args) => {
149
+ const handlerIdx = args.findIndex((a) => typeof a === "function");
150
+ if (handlerIdx === -1)
151
+ return originalTool(...args);
152
+ const originalHandler = args[handlerIdx];
153
+ const toolName = args[0];
154
+ const wrappedHandler = async (params, extra) => {
155
+ // Skip intercepting memory tools to avoid recursion
156
+ if (MEMORY_TOOLS.has(toolName)) {
157
+ return originalHandler(params, extra);
158
+ }
159
+ const sessionId = memory.getSessionId();
160
+ const safeParams = typeof params === "object" && params !== null ? params : {};
161
+ const start = Date.now();
162
+ // ── PRE-CALL: check for known error warnings (~0ms, in-memory) ──
163
+ const knownError = memory.quickErrorCheck(toolName);
164
+ try {
165
+ const result = await originalHandler(params, extra);
166
+ const durationMs = Date.now() - start;
167
+ // ── POST-CALL: log action (async, non-blocking) ──
168
+ const entry = {
169
+ id: "a_" + Date.now().toString(36) + Math.random().toString(36).slice(2, 6),
170
+ timestamp: new Date().toISOString(),
171
+ sessionId,
172
+ tool: toolName,
173
+ params: safeParams,
174
+ durationMs,
175
+ success: true,
176
+ result: extractText(result),
177
+ error: null,
178
+ };
179
+ memory.recordEvent(entry); // non-blocking write + session tracking
180
+ // ── POST-CALL: auto-recall hints (~0ms, in-memory) ──
181
+ const hints = [];
182
+ // Warn about known errors for this tool
183
+ if (knownError) {
184
+ hints.push(`⚡ Memory: "${toolName}" has failed before: "${knownError.error}" (${knownError.occurrences}x). Fix: ${knownError.resolution}`);
185
+ }
186
+ // Suggest next step if we're mid-strategy
187
+ const recentTools = memory.getRecentToolNames();
188
+ const strategyHint = memory.quickStrategyHint(recentTools);
189
+ if (strategyHint) {
190
+ activeStrategyFingerprint = strategyHint.fingerprint;
191
+ const nextParams = Object.keys(strategyHint.nextStep.params).length > 0
192
+ ? `(${JSON.stringify(strategyHint.nextStep.params)})`
193
+ : "";
194
+ hints.push(`💡 Memory: This matches strategy "${strategyHint.strategy.task}" (${strategyHint.strategy.successCount} wins, ${strategyHint.strategy.failCount ?? 0} fails). Next step: ${strategyHint.nextStep.tool}${nextParams}`);
195
+ // If this was the last step of the strategy, record success
196
+ if (recentTools.length === strategyHint.strategy.steps.length - 1) {
197
+ // Next call will be the final step — but this call completing means we're on track
198
+ }
199
+ }
200
+ else if (activeStrategyFingerprint && recentTools.length > 0) {
201
+ // We were following a strategy but the sequence diverged — record success
202
+ // (the agent completed the strategy or went its own way after it)
203
+ memory.recordStrategyOutcome(activeStrategyFingerprint, true);
204
+ activeStrategyFingerprint = null;
205
+ }
206
+ // Attach hints as _meta (doesn't pollute tool output for MCP clients)
207
+ if (hints.length > 0) {
208
+ return {
209
+ ...result,
210
+ _meta: { ...(result?._meta ?? {}), memoryHints: hints },
211
+ };
212
+ }
213
+ return result;
214
+ }
215
+ catch (err) {
216
+ const durationMs = Date.now() - start;
217
+ const errorMsg = err?.message ?? String(err);
218
+ // Log failed action (non-blocking)
219
+ const entry = {
220
+ id: "a_" + Date.now().toString(36) + Math.random().toString(36).slice(2, 6),
221
+ timestamp: new Date().toISOString(),
222
+ sessionId,
223
+ tool: toolName,
224
+ params: safeParams,
225
+ durationMs,
226
+ success: false,
227
+ result: null,
228
+ error: errorMsg,
229
+ };
230
+ memory.recordEvent(entry); // non-blocking write + session tracking
231
+ // Record strategy failure if we were following one
232
+ if (activeStrategyFingerprint) {
233
+ memory.recordStrategyOutcome(activeStrategyFingerprint, false);
234
+ activeStrategyFingerprint = null;
235
+ }
236
+ // Record error pattern (updates cache + async write)
237
+ const errorPattern = {
238
+ id: "err_" + Date.now().toString(36) + Math.random().toString(36).slice(2, 6),
239
+ tool: toolName,
240
+ params: safeParams,
241
+ error: errorMsg,
242
+ resolution: null,
243
+ occurrences: 1,
244
+ lastSeen: new Date().toISOString(),
245
+ };
246
+ memory.appendError(errorPattern);
247
+ // Background research: search for a fix if no resolution exists
248
+ const existingErrors = memory.readErrors();
249
+ const hasResolution = existingErrors.some((e) => e.tool === toolName && e.error === errorMsg && e.resolution);
250
+ if (!hasResolution) {
251
+ backgroundResearch(memory, toolName, safeParams, errorMsg);
252
+ }
253
+ throw err;
254
+ }
255
+ };
256
+ const newArgs = [...args];
257
+ newArgs[handlerIdx] = wrappedHandler;
258
+ return originalTool(...newArgs);
259
+ };
260
+ // ═══════════════════════════════════════════════
261
+ // APPS — discover and manage running applications
262
+ // ═══════════════════════════════════════════════
263
+ server.tool("apps", "List all running applications with bundle IDs and PIDs", {}, async () => {
264
+ await ensureBridge();
265
+ const apps = await bridge.call("app.list");
266
+ const lines = apps.map((a) => `${a.name} (${a.bundleId}) pid=${a.pid}${a.isActive ? " ← active" : ""}`);
267
+ return { content: [{ type: "text", text: lines.join("\n") }] };
268
+ });
269
+ server.tool("windows", "List all visible windows with IDs, positions, and sizes", {}, async () => {
270
+ await ensureBridge();
271
+ const wins = await bridge.call("app.windows");
272
+ const lines = wins.map((w) => {
273
+ const b = w.bounds || {};
274
+ return `[${w.windowId}] ${w.appName} "${w.title}" (${Math.round(b.x || 0)},${Math.round(b.y || 0)}) ${Math.round(b.width || 0)}x${Math.round(b.height || 0)}`;
275
+ });
276
+ return { content: [{ type: "text", text: lines.join("\n") }] };
277
+ });
278
+ server.tool("focus", "Focus/activate an application", {
279
+ bundleId: z.string().describe("App bundle ID, e.g. com.apple.Safari"),
280
+ }, async ({ bundleId }) => {
281
+ await ensureBridge();
282
+ await bridge.call("app.focus", { bundleId });
283
+ return { content: [{ type: "text", text: "Focused " + bundleId }] };
284
+ });
285
+ server.tool("launch", "Launch an application", {
286
+ bundleId: z.string().describe("App bundle ID"),
287
+ }, async ({ bundleId }) => {
288
+ await ensureBridge();
289
+ const r = await bridge.call("app.launch", { bundleId });
290
+ return { content: [{ type: "text", text: `Launched ${r.appName} pid=${r.pid}` }] };
291
+ });
292
+ // ═══════════════════════════════════════════════
293
+ // INSPECT — see what's on screen (debugging/design)
294
+ // ═══════════════════════════════════════════════
295
+ server.tool("screenshot", "Take a screenshot and OCR it. Returns all visible text. NOTE: For finding/clicking UI elements, ui_tree + ui_press is 10x faster.", {
296
+ windowId: z.number().optional().describe("Window ID. Omit for full screen."),
297
+ }, async ({ windowId }) => {
298
+ await ensureBridge();
299
+ let shot;
300
+ if (windowId) {
301
+ shot = await bridge.call("cg.captureWindow", { windowId });
302
+ }
303
+ else {
304
+ shot = await bridge.call("cg.captureScreen");
305
+ }
306
+ const ocr = await bridge.call("vision.ocr", { imagePath: shot.path });
307
+ return { content: [{ type: "text", text: `Screenshot: ${shot.width}x${shot.height} (${shot.path})\n\n${ocr.text}` }] };
308
+ });
309
+ server.tool("screenshot_file", "Take a screenshot and return the file path (for viewing the actual image)", {
310
+ windowId: z.number().optional().describe("Window ID. Omit for full screen."),
311
+ }, async ({ windowId }) => {
312
+ await ensureBridge();
313
+ let shot;
314
+ if (windowId) {
315
+ shot = await bridge.call("cg.captureWindow", { windowId });
316
+ }
317
+ else {
318
+ shot = await bridge.call("cg.captureScreen");
319
+ }
320
+ return { content: [{ type: "text", text: shot.path }] };
321
+ });
322
+ server.tool("ocr", "OCR a window with element positions. SLOW — prefer ui_tree for structured element discovery. Use OCR only for reading visual/canvas content.", {
323
+ windowId: z.number().optional().describe("Window ID. Omit for full screen."),
324
+ }, async ({ windowId }) => {
325
+ await ensureBridge();
326
+ let shot;
327
+ if (windowId) {
328
+ shot = await bridge.call("cg.captureWindow", { windowId });
329
+ }
330
+ else {
331
+ shot = await bridge.call("cg.captureScreen");
332
+ }
333
+ const ocr = await bridge.call("vision.ocr", { imagePath: shot.path });
334
+ let winBounds = null;
335
+ if (windowId) {
336
+ const wins = await bridge.call("app.windows");
337
+ const win = wins.find((w) => w.windowId === windowId);
338
+ winBounds = win?.bounds;
339
+ }
340
+ const regions = ocr.regions.map((r) => `"${r.text}" (${Math.round(r.bounds.x)},${Math.round(r.bounds.y)}) ${Math.round(r.bounds.width)}x${Math.round(r.bounds.height)}`);
341
+ return {
342
+ content: [{
343
+ type: "text",
344
+ text: JSON.stringify({
345
+ image: { width: shot.width, height: shot.height, path: shot.path },
346
+ window: winBounds,
347
+ elementCount: regions.length,
348
+ elements: regions,
349
+ }, null, 2),
350
+ }],
351
+ };
352
+ });
353
+ // ═══════════════════════════════════════════════
354
+ // ACCESSIBILITY — structured UI inspection (instant, no OCR)
355
+ // ═══════════════════════════════════════════════
356
+ server.tool("ui_tree", "PREFERRED: Get the full UI element tree of an app via Accessibility. ~50ms, no screenshot/OCR. Use this FIRST to find elements — returns titles, roles, and bounds. Then use ui_press/ui_find to interact.", {
357
+ pid: z.number().describe("Process ID of the app"),
358
+ maxDepth: z.number().optional().describe("Max depth (default 4). Use 2 for overview, 6+ for deep inspection."),
359
+ }, async ({ pid, maxDepth }) => {
360
+ await ensureBridge();
361
+ const tree = await bridge.call("ax.getElementTree", { pid, maxDepth: maxDepth || 4 });
362
+ function format(node, depth) {
363
+ let line = " ".repeat(depth) + (node.role || "?");
364
+ if (node.title)
365
+ line += ` "${node.title}"`;
366
+ if (node.value)
367
+ line += ` =${String(node.value).slice(0, 60)}`;
368
+ if (node.bounds)
369
+ line += ` (${Math.round(node.bounds.x)},${Math.round(node.bounds.y)} ${Math.round(node.bounds.width)}x${Math.round(node.bounds.height)})`;
370
+ let result = line;
371
+ if (node.children) {
372
+ for (const c of node.children)
373
+ result += "\n" + format(c, depth + 1);
374
+ }
375
+ return result;
376
+ }
377
+ return { content: [{ type: "text", text: format(tree, 0) }] };
378
+ });
379
+ server.tool("ui_find", "Find a specific UI element by text/title. Returns its role, bounds, and path.", {
380
+ pid: z.number().describe("Process ID"),
381
+ title: z.string().describe("Text to search for (partial match)"),
382
+ }, async ({ pid, title }) => {
383
+ await ensureBridge();
384
+ const r = await bridge.call("ax.findElement", { pid, title, exact: false });
385
+ return { content: [{ type: "text", text: JSON.stringify(r, null, 2) }] };
386
+ });
387
+ server.tool("ui_press", "PREFERRED: Find and press/click a UI element by its title via Accessibility. Faster and more reliable than click_text — no screenshot needed.", {
388
+ pid: z.number().describe("Process ID"),
389
+ title: z.string().describe("Element title to find and press"),
390
+ }, async ({ pid, title }) => {
391
+ await ensureBridge();
392
+ const el = await bridge.call("ax.findElement", { pid, title, exact: false });
393
+ await bridge.call("ax.performAction", { pid, elementPath: el.elementPath, action: "AXPress" });
394
+ return { content: [{ type: "text", text: `Pressed "${el.title}" (${el.role})` }] };
395
+ });
396
+ server.tool("ui_set_value", "Set the value of a UI element (text field, slider, etc.)", {
397
+ pid: z.number().describe("Process ID"),
398
+ title: z.string().describe("Element title to find"),
399
+ value: z.string().describe("Value to set"),
400
+ }, async ({ pid, title, value }) => {
401
+ await ensureBridge();
402
+ const el = await bridge.call("ax.findElement", { pid, title, exact: false });
403
+ await bridge.call("ax.setElementValue", { pid, elementPath: el.elementPath, value });
404
+ return { content: [{ type: "text", text: `Set "${el.title}" = "${value}"` }] };
405
+ });
406
+ server.tool("menu_click", "Click a menu item in an app's menu bar", {
407
+ pid: z.number().describe("Process ID"),
408
+ menuPath: z.string().describe("Menu path separated by /. e.g. 'File/New', 'View/Show Sidebar'"),
409
+ }, async ({ pid, menuPath }) => {
410
+ await ensureBridge();
411
+ await bridge.call("ax.menuClick", { pid, menuPath: menuPath.split("/") });
412
+ return { content: [{ type: "text", text: "Menu: " + menuPath }] };
413
+ });
414
+ // ═══════════════════════════════════════════════
415
+ // INPUT — interact with the screen
416
+ // ═══════════════════════════════════════════════
417
+ server.tool("click", "Click at screen coordinates", {
418
+ x: z.number().describe("Screen X"),
419
+ y: z.number().describe("Screen Y"),
420
+ }, async ({ x, y }) => {
421
+ await ensureBridge();
422
+ await bridge.call("cg.mouseMove", { x, y });
423
+ await new Promise(r => setTimeout(r, 50));
424
+ await bridge.call("cg.mouseClick", { x, y });
425
+ return { content: [{ type: "text", text: `Clicked (${x}, ${y})` }] };
426
+ });
427
+ server.tool("click_text", "SLOW fallback: Find text on screen via OCR and click it. Use ui_press instead when possible — it's 10x faster. Only use this for canvas/image content where Accessibility doesn't work.", {
428
+ windowId: z.number().describe("Window ID"),
429
+ text: z.string().describe("Text to find and click"),
430
+ offset_y: z.number().optional().describe("Y offset from text center (e.g. -25 for icon above label)"),
431
+ }, async ({ windowId, text, offset_y }) => {
432
+ await ensureBridge();
433
+ const wins = await bridge.call("app.windows");
434
+ const win = wins.find((w) => w.windowId === windowId);
435
+ if (!win)
436
+ return { content: [{ type: "text", text: "Window not found" }] };
437
+ const wb = win.bounds;
438
+ const shot = await bridge.call("cg.captureWindow", { windowId });
439
+ const ocr = await bridge.call("vision.ocr", { imagePath: shot.path });
440
+ const match = ocr.regions.find((r) => r.text.toLowerCase().includes(text.toLowerCase()));
441
+ if (!match) {
442
+ return { content: [{ type: "text", text: `"${text}" not found. Available: ${ocr.regions.map((r) => r.text).slice(0, 20).join(", ")}` }] };
443
+ }
444
+ const shadowL = (shot.width - wb.width * 2) / 2;
445
+ const shadowT = (shot.height - wb.height * 2) / 3;
446
+ const sx = wb.x + (match.bounds.x + match.bounds.width / 2 - shadowL) / 2;
447
+ const sy = wb.y + (match.bounds.y + match.bounds.height / 2 - shadowT) / 2 + (offset_y || 0);
448
+ await bridge.call("cg.mouseMove", { x: sx, y: sy });
449
+ await new Promise(r => setTimeout(r, 50));
450
+ await bridge.call("cg.mouseClick", { x: sx, y: sy });
451
+ return { content: [{ type: "text", text: `Clicked "${match.text}" at (${Math.round(sx)}, ${Math.round(sy)})` }] };
452
+ });
453
+ server.tool("type_text", "Type text using the keyboard", {
454
+ text: z.string().describe("Text to type"),
455
+ }, async ({ text }) => {
456
+ await ensureBridge();
457
+ await bridge.call("cg.typeText", { text });
458
+ return { content: [{ type: "text", text: "Typed: " + text }] };
459
+ });
460
+ server.tool("key", "Press a key combination", {
461
+ combo: z.string().describe("Key combo: 'cmd+c', 'enter', 'cmd+shift+n', 'space'. Use + to separate."),
462
+ }, async ({ combo }) => {
463
+ await ensureBridge();
464
+ await bridge.call("cg.keyCombo", { keys: combo.split("+") });
465
+ return { content: [{ type: "text", text: "Key: " + combo }] };
466
+ });
467
+ server.tool("drag", "Drag from one point to another", {
468
+ fromX: z.number(), fromY: z.number(),
469
+ toX: z.number(), toY: z.number(),
470
+ }, async ({ fromX, fromY, toX, toY }) => {
471
+ await ensureBridge();
472
+ await bridge.call("cg.mouseDrag", { fromX, fromY, toX, toY });
473
+ return { content: [{ type: "text", text: `Dragged (${fromX},${fromY}) → (${toX},${toY})` }] };
474
+ });
475
+ server.tool("scroll", "Scroll at a position", {
476
+ x: z.number(), y: z.number(),
477
+ deltaX: z.number().optional().describe("Horizontal scroll (default 0)"),
478
+ deltaY: z.number().describe("Vertical scroll (negative = down)"),
479
+ }, async ({ x, y, deltaX, deltaY }) => {
480
+ await ensureBridge();
481
+ await bridge.call("cg.scroll", { x, y, deltaX: deltaX || 0, deltaY });
482
+ return { content: [{ type: "text", text: "Scrolled" }] };
483
+ });
484
+ // ── CDP helper: get client for a tab ──
485
+ async function getCDPClient(tabId) {
486
+ const { CDP: cdp, port } = await ensureCDP();
487
+ let targetId = tabId;
488
+ if (!targetId) {
489
+ const targets = await cdp.List({ port });
490
+ const page = targets.find((t) => t.type === "page");
491
+ if (!page)
492
+ throw new Error("No tabs open");
493
+ targetId = page.id;
494
+ }
495
+ const client = await cdp({ port, target: targetId });
496
+ return { client, targetId: targetId, CDP: cdp, port };
497
+ }
498
+ // ── Random delay helper ──
499
+ function randomDelay(min, max) {
500
+ return new Promise(r => setTimeout(r, min + Math.random() * (max - min)));
501
+ }
502
+ // ═══════════════════════════════════════════════
503
+ // BROWSER — control Chrome pages via CDP (10ms, not OCR)
504
+ // ═══════════════════════════════════════════════
505
+ server.tool("browser_tabs", "List all open Chrome tabs", {}, async () => {
506
+ const { CDP: cdp, port } = await ensureCDP();
507
+ const targets = await cdp.List({ port });
508
+ const pages = targets.filter((t) => t.type === "page");
509
+ const lines = pages.map((t) => `[${t.id}] ${t.title} — ${t.url}`);
510
+ return { content: [{ type: "text", text: lines.join("\n") || "No tabs open" }] };
511
+ });
512
+ server.tool("browser_open", "Open a URL in Chrome (creates new tab)", {
513
+ url: z.string().describe("URL to open"),
514
+ }, async ({ url }) => {
515
+ const { CDP: cdp, port } = await ensureCDP();
516
+ const target = await cdp.New({ port, url });
517
+ return { content: [{ type: "text", text: `Opened: ${target.id} — ${url}` }] };
518
+ });
519
+ server.tool("browser_navigate", "Navigate the active Chrome tab to a URL", {
520
+ url: z.string().describe("URL to navigate to"),
521
+ tabId: z.string().optional().describe("Tab ID (from browser_tabs). Omit for most recent tab."),
522
+ }, async ({ url, tabId }) => {
523
+ const { CDP: cdp, port } = await ensureCDP();
524
+ let targetId = tabId;
525
+ if (!targetId) {
526
+ const targets = await cdp.List({ port });
527
+ const page = targets.find((t) => t.type === "page");
528
+ if (!page)
529
+ throw new Error("No tabs open");
530
+ targetId = page.id;
531
+ }
532
+ const client = await cdp({ port, target: targetId });
533
+ await client.Page.enable();
534
+ await client.Page.navigate({ url });
535
+ // Wait for load
536
+ const deadline = Date.now() + 10000;
537
+ while (Date.now() < deadline) {
538
+ const r = await client.Runtime.evaluate({ expression: "document.readyState", returnByValue: true });
539
+ if (r.result.value === "complete" || r.result.value === "interactive")
540
+ break;
541
+ await new Promise(r => setTimeout(r, 200));
542
+ }
543
+ const title = await client.Runtime.evaluate({ expression: "document.title", returnByValue: true });
544
+ await client.close();
545
+ return { content: [{ type: "text", text: `Navigated to: ${title.result.value}` }] };
546
+ });
547
+ server.tool("browser_js", "Execute JavaScript in a Chrome tab. Returns the result. WARNING: This runs arbitrary JS in the browser context — avoid on sensitive pages (banking, email). All executions are audit-logged.", {
548
+ code: z.string().describe("JavaScript to execute. Must be an expression that returns a value. Use (() => { ... })() for multi-line."),
549
+ tabId: z.string().optional().describe("Tab ID. Omit for most recent tab."),
550
+ }, async ({ code, tabId }) => {
551
+ auditLog("browser_js", { code: code.slice(0, 500), tabId });
552
+ const { CDP: cdp, port } = await ensureCDP();
553
+ let targetId = tabId;
554
+ if (!targetId) {
555
+ const targets = await cdp.List({ port });
556
+ const page = targets.find((t) => t.type === "page");
557
+ if (!page)
558
+ throw new Error("No tabs open");
559
+ targetId = page.id;
560
+ }
561
+ const client = await cdp({ port, target: targetId });
562
+ await client.Runtime.enable();
563
+ const result = await client.Runtime.evaluate({
564
+ expression: code,
565
+ awaitPromise: true,
566
+ returnByValue: true,
567
+ });
568
+ await client.close();
569
+ if (result.exceptionDetails) {
570
+ return { content: [{ type: "text", text: `JS Error: ${result.exceptionDetails.text}\n${result.exceptionDetails.exception?.description || ""}` }] };
571
+ }
572
+ const val = result.result.value;
573
+ const text = typeof val === "object" ? JSON.stringify(val, null, 2) : String(val ?? "undefined");
574
+ return { content: [{ type: "text", text }] };
575
+ });
576
+ server.tool("browser_dom", "Query the DOM of a Chrome page. Returns matching elements' text, attributes, and structure.", {
577
+ selector: z.string().describe("CSS selector, e.g. 'button', '.nav a', '#main h2'"),
578
+ tabId: z.string().optional().describe("Tab ID. Omit for most recent tab."),
579
+ limit: z.number().optional().describe("Max results (default 20)"),
580
+ }, async ({ selector, tabId, limit }) => {
581
+ const { CDP: cdp, port } = await ensureCDP();
582
+ let targetId = tabId;
583
+ if (!targetId) {
584
+ const targets = await cdp.List({ port });
585
+ const page = targets.find((t) => t.type === "page");
586
+ if (!page)
587
+ throw new Error("No tabs open");
588
+ targetId = page.id;
589
+ }
590
+ const client = await cdp({ port, target: targetId });
591
+ await client.Runtime.enable();
592
+ const maxResults = limit || 20;
593
+ const result = await client.Runtime.evaluate({
594
+ expression: `(() => {
595
+ const els = Array.from(document.querySelectorAll(${JSON.stringify(selector)})).slice(0, ${maxResults});
596
+ return els.map((el, i) => ({
597
+ index: i,
598
+ tag: el.tagName.toLowerCase(),
599
+ id: el.id || undefined,
600
+ class: el.className?.toString()?.slice(0, 100) || undefined,
601
+ text: el.textContent?.trim()?.slice(0, 200),
602
+ href: el.href || undefined,
603
+ src: el.src || undefined,
604
+ value: el.value || undefined,
605
+ rect: (() => { const r = el.getBoundingClientRect(); return { x: Math.round(r.x), y: Math.round(r.y), w: Math.round(r.width), h: Math.round(r.height) }; })(),
606
+ }));
607
+ })()`,
608
+ returnByValue: true,
609
+ });
610
+ await client.close();
611
+ return { content: [{ type: "text", text: JSON.stringify(result.result.value, null, 2) }] };
612
+ });
613
+ server.tool("browser_click", "Click an element in Chrome by CSS selector. Uses CDP Input.dispatchMouseEvent for realistic mouse events.", {
614
+ selector: z.string().describe("CSS selector of element to click"),
615
+ tabId: z.string().optional().describe("Tab ID. Omit for most recent tab."),
616
+ }, async ({ selector, tabId }) => {
617
+ const { client } = await getCDPClient(tabId);
618
+ await client.Runtime.enable();
619
+ const result = await client.Runtime.evaluate({
620
+ expression: `(() => {
621
+ const el = document.querySelector(${JSON.stringify(selector)});
622
+ if (!el) return { ok: false, reason: "Element not found: ${selector.replace(/"/g, '\\"')}" };
623
+ el.scrollIntoView({ block: "center" });
624
+ const r = el.getBoundingClientRect();
625
+ return { ok: true, x: r.x + r.width / 2, y: r.y + r.height / 2, text: el.textContent?.trim()?.slice(0, 100) };
626
+ })()`,
627
+ returnByValue: true,
628
+ });
629
+ const val = result.result.value;
630
+ if (!val?.ok) {
631
+ await client.close();
632
+ return { content: [{ type: "text", text: val?.reason || "Element not found" }] };
633
+ }
634
+ const { x, y } = val;
635
+ await client.Input.dispatchMouseEvent({ type: "mouseMoved", x, y });
636
+ await randomDelay(30, 60);
637
+ await client.Input.dispatchMouseEvent({ type: "mousePressed", x, y, button: "left", clickCount: 1 });
638
+ await randomDelay(30, 80);
639
+ await client.Input.dispatchMouseEvent({ type: "mouseReleased", x, y, button: "left", clickCount: 1 });
640
+ await client.close();
641
+ return { content: [{ type: "text", text: `Clicked: "${val.text}" at (${Math.round(x)}, ${Math.round(y)})` }] };
642
+ });
643
+ server.tool("browser_type", "Type into an input field in Chrome. Uses CDP Input.dispatchKeyEvent for real keyboard events (works with React/Angular).", {
644
+ selector: z.string().describe("CSS selector of the input"),
645
+ text: z.string().describe("Text to type"),
646
+ clear: z.boolean().optional().describe("Clear field first (default true)"),
647
+ tabId: z.string().optional().describe("Tab ID"),
648
+ }, async ({ selector, text, clear, tabId }) => {
649
+ const { client } = await getCDPClient(tabId);
650
+ await client.Runtime.enable();
651
+ // Focus the element
652
+ const focusResult = await client.Runtime.evaluate({
653
+ expression: `(() => {
654
+ const el = document.querySelector(${JSON.stringify(selector)});
655
+ if (!el) return { ok: false, reason: "Input not found" };
656
+ el.scrollIntoView({ block: "center" });
657
+ el.focus();
658
+ return { ok: true };
659
+ })()`,
660
+ returnByValue: true,
661
+ });
662
+ if (!focusResult.result.value?.ok) {
663
+ await client.close();
664
+ return { content: [{ type: "text", text: focusResult.result.value?.reason || "Input not found" }] };
665
+ }
666
+ // Clear if needed: select all + delete
667
+ const shouldClear = clear !== false;
668
+ if (shouldClear) {
669
+ await client.Input.dispatchKeyEvent({ type: "keyDown", key: "a", code: "KeyA", modifiers: process.platform === "darwin" ? 4 : 2 });
670
+ await client.Input.dispatchKeyEvent({ type: "keyUp", key: "a", code: "KeyA", modifiers: process.platform === "darwin" ? 4 : 2 });
671
+ await client.Input.dispatchKeyEvent({ type: "keyDown", key: "Backspace", code: "Backspace" });
672
+ await client.Input.dispatchKeyEvent({ type: "keyUp", key: "Backspace", code: "Backspace" });
673
+ await randomDelay(30, 80);
674
+ }
675
+ // Type character by character with random delays
676
+ for (const char of text) {
677
+ await client.Input.dispatchKeyEvent({ type: "keyDown", text: char, key: char, unmodifiedText: char });
678
+ await client.Input.dispatchKeyEvent({ type: "keyUp", text: char, key: char, unmodifiedText: char });
679
+ await randomDelay(30, 80);
680
+ }
681
+ await client.close();
682
+ return { content: [{ type: "text", text: `Typed "${text}"` }] };
683
+ });
684
+ server.tool("browser_wait", "Wait for a condition on a Chrome page", {
685
+ condition: z.string().describe("JS expression that returns truthy when ready. e.g. 'document.querySelector(\".loaded\")'"),
686
+ timeoutMs: z.number().optional().describe("Timeout in ms (default 10000)"),
687
+ tabId: z.string().optional().describe("Tab ID"),
688
+ }, async ({ condition, timeoutMs, tabId }) => {
689
+ const { CDP: cdp, port } = await ensureCDP();
690
+ let targetId = tabId;
691
+ if (!targetId) {
692
+ const targets = await cdp.List({ port });
693
+ const page = targets.find((t) => t.type === "page");
694
+ if (!page)
695
+ throw new Error("No tabs open");
696
+ targetId = page.id;
697
+ }
698
+ const client = await cdp({ port, target: targetId });
699
+ await client.Runtime.enable();
700
+ const deadline = Date.now() + (timeoutMs || 10000);
701
+ let met = false;
702
+ while (Date.now() < deadline) {
703
+ const r = await client.Runtime.evaluate({ expression: `!!(${condition})`, returnByValue: true });
704
+ if (r.result.value) {
705
+ met = true;
706
+ break;
707
+ }
708
+ await new Promise(r => setTimeout(r, 300));
709
+ }
710
+ await client.close();
711
+ return { content: [{ type: "text", text: met ? "Condition met" : "Timeout — condition not met" }] };
712
+ });
713
+ server.tool("browser_page_info", "Get current page title, URL, and text content summary", {
714
+ tabId: z.string().optional().describe("Tab ID"),
715
+ }, async ({ tabId }) => {
716
+ const { CDP: cdp, port } = await ensureCDP();
717
+ let targetId = tabId;
718
+ if (!targetId) {
719
+ const targets = await cdp.List({ port });
720
+ const page = targets.find((t) => t.type === "page");
721
+ if (!page)
722
+ throw new Error("No tabs open");
723
+ targetId = page.id;
724
+ }
725
+ const client = await cdp({ port, target: targetId });
726
+ await client.Runtime.enable();
727
+ const result = await client.Runtime.evaluate({
728
+ expression: `(() => ({
729
+ title: document.title,
730
+ url: location.href,
731
+ text: document.body?.innerText?.slice(0, 2000) || "",
732
+ }))()`,
733
+ returnByValue: true,
734
+ });
735
+ await client.close();
736
+ return { content: [{ type: "text", text: JSON.stringify(result.result.value, null, 2) }] };
737
+ });
738
+ // ═══════════════════════════════════════════════
739
+ // BROWSER STEALTH — anti-detection patches
740
+ // ═══════════════════════════════════════════════
741
+ const STEALTH_SCRIPT = `
742
+ // Hide navigator.webdriver flag
743
+ Object.defineProperty(navigator, 'webdriver', { get: () => undefined });
744
+
745
+ // Delete ChromeDriver leak variables
746
+ for (const key of Object.keys(window)) {
747
+ if (key.match(/^cdc_/)) delete (window)[key];
748
+ }
749
+
750
+ // Realistic plugins array
751
+ Object.defineProperty(navigator, 'plugins', {
752
+ get: () => [
753
+ { name: 'Chrome PDF Plugin', filename: 'internal-pdf-viewer', description: 'Portable Document Format' },
754
+ { name: 'Chrome PDF Viewer', filename: 'mhjfbmdgcfjbbpaeojofohoefgiehjai', description: '' },
755
+ { name: 'Native Client', filename: 'internal-nacl-plugin', description: '' },
756
+ ],
757
+ });
758
+
759
+ // Realistic languages
760
+ Object.defineProperty(navigator, 'languages', { get: () => ['en-US', 'en'] });
761
+
762
+ // Patch chrome.runtime to look realistic (not headless)
763
+ if (!window.chrome) (window as any).chrome = {};
764
+ if (!window.chrome.runtime) (window as any).chrome.runtime = { connect: () => {}, sendMessage: () => {} };
765
+
766
+ // Patch Permissions.query for notifications
767
+ const origQuery = window.Permissions?.prototype?.query;
768
+ if (origQuery) {
769
+ window.Permissions.prototype.query = function(params: any) {
770
+ if (params.name === 'notifications') {
771
+ return Promise.resolve({ state: 'denied', onchange: null } as PermissionStatus);
772
+ }
773
+ return origQuery.call(this, params);
774
+ };
775
+ }
776
+ `;
777
+ server.tool("browser_stealth", "Inject anti-detection patches into Chrome page. Call once after navigating to a protected site. Hides webdriver flag, patches plugins/languages/permissions.", {
778
+ tabId: z.string().optional().describe("Tab ID. Omit for most recent tab."),
779
+ }, async ({ tabId }) => {
780
+ const { client } = await getCDPClient(tabId);
781
+ await client.Page.enable();
782
+ await client.Page.addScriptToEvaluateOnNewDocument({ source: STEALTH_SCRIPT });
783
+ // Also evaluate immediately on current page
784
+ await client.Runtime.enable();
785
+ await client.Runtime.evaluate({ expression: STEALTH_SCRIPT, returnByValue: true });
786
+ await client.close();
787
+ return { content: [{ type: "text", text: "Stealth patches injected: webdriver hidden, plugins/languages/permissions patched." }] };
788
+ });
789
+ // ═══════════════════════════════════════════════
790
+ // BROWSER HUMAN-LIKE INPUT — anti-detection tools
791
+ // ═══════════════════════════════════════════════
792
+ server.tool("browser_fill_form", "Fill a form field with human-like typing (anti-detection). Uses real keyboard events via CDP Input domain.", {
793
+ selector: z.string().describe("CSS selector of the input"),
794
+ text: z.string().describe("Text to type"),
795
+ clear: z.boolean().optional().describe("Clear field first (default true)"),
796
+ delayMs: z.number().optional().describe("Avg delay between keystrokes in ms (default 50)"),
797
+ tabId: z.string().optional().describe("Tab ID"),
798
+ }, async ({ selector, text, clear, delayMs, tabId }) => {
799
+ const { client } = await getCDPClient(tabId);
800
+ await client.Runtime.enable();
801
+ // Focus the element
802
+ const focusResult = await client.Runtime.evaluate({
803
+ expression: `(() => {
804
+ const el = document.querySelector(${JSON.stringify(selector)});
805
+ if (!el) return { ok: false, reason: "Element not found: ${selector.replace(/"/g, '\\"')}" };
806
+ el.scrollIntoView({ block: "center" });
807
+ el.focus();
808
+ return { ok: true };
809
+ })()`,
810
+ returnByValue: true,
811
+ });
812
+ if (!focusResult.result.value?.ok) {
813
+ await client.close();
814
+ return { content: [{ type: "text", text: focusResult.result.value?.reason || "Element not found" }] };
815
+ }
816
+ // Clear if needed
817
+ const shouldClear = clear !== false;
818
+ if (shouldClear) {
819
+ await client.Input.dispatchKeyEvent({ type: "keyDown", key: "a", code: "KeyA", modifiers: process.platform === "darwin" ? 4 : 2 });
820
+ await client.Input.dispatchKeyEvent({ type: "keyUp", key: "a", code: "KeyA", modifiers: process.platform === "darwin" ? 4 : 2 });
821
+ await client.Input.dispatchKeyEvent({ type: "keyDown", key: "Backspace", code: "Backspace" });
822
+ await client.Input.dispatchKeyEvent({ type: "keyUp", key: "Backspace", code: "Backspace" });
823
+ await randomDelay(30, 80);
824
+ }
825
+ // Type character by character with random delays
826
+ const avgDelay = delayMs ?? 50;
827
+ const minDelay = Math.max(10, avgDelay - 20);
828
+ const maxDelay = avgDelay + 30;
829
+ for (const char of text) {
830
+ await client.Input.dispatchKeyEvent({ type: "keyDown", text: char, key: char, unmodifiedText: char });
831
+ await client.Input.dispatchKeyEvent({ type: "keyUp", text: char, key: char, unmodifiedText: char });
832
+ await randomDelay(minDelay, maxDelay);
833
+ }
834
+ await client.close();
835
+ return { content: [{ type: "text", text: `Typed "${text}" (${text.length} chars, human-like)` }] };
836
+ });
837
+ server.tool("browser_human_click", "Click an element with realistic mouse events (anti-detection). Dispatches mouseMoved → mousePressed → mouseReleased at element coordinates.", {
838
+ selector: z.string().describe("CSS selector of element to click"),
839
+ tabId: z.string().optional().describe("Tab ID. Omit for most recent tab."),
840
+ }, async ({ selector, tabId }) => {
841
+ const { client } = await getCDPClient(tabId);
842
+ await client.Runtime.enable();
843
+ // Get element center coordinates
844
+ const rectResult = await client.Runtime.evaluate({
845
+ expression: `(() => {
846
+ const el = document.querySelector(${JSON.stringify(selector)});
847
+ if (!el) return { ok: false, reason: "Element not found: ${selector.replace(/"/g, '\\"')}" };
848
+ el.scrollIntoView({ block: "center" });
849
+ const r = el.getBoundingClientRect();
850
+ return { ok: true, x: r.x + r.width / 2, y: r.y + r.height / 2, text: el.textContent?.trim()?.slice(0, 100) };
851
+ })()`,
852
+ returnByValue: true,
853
+ });
854
+ const val = rectResult.result.value;
855
+ if (!val?.ok) {
856
+ await client.close();
857
+ return { content: [{ type: "text", text: val?.reason || "Element not found" }] };
858
+ }
859
+ const { x, y } = val;
860
+ // Simulate realistic mouse event sequence
861
+ await client.Input.dispatchMouseEvent({ type: "mouseMoved", x, y });
862
+ await randomDelay(30, 60);
863
+ await client.Input.dispatchMouseEvent({ type: "mousePressed", x, y, button: "left", clickCount: 1 });
864
+ await randomDelay(30, 80);
865
+ await client.Input.dispatchMouseEvent({ type: "mouseReleased", x, y, button: "left", clickCount: 1 });
866
+ await client.close();
867
+ return { content: [{ type: "text", text: `Clicked: "${val.text}" at (${Math.round(x)}, ${Math.round(y)})` }] };
868
+ });
869
+ // ═══════════════════════════════════════════════
870
+ // PLATFORM PLAYBOOKS — lazy-loaded site knowledge
871
+ // ═══════════════════════════════════════════════
872
+ const playbooksDir = path.resolve(__dirname, "playbooks");
873
+ server.tool("platform_guide", "Get automation guide for a platform (selectors, URLs, flows, error solutions). Available: devpost. Zero cost — only loads when called.", {
874
+ platform: z.string().describe("Platform name, e.g. 'devpost'"),
875
+ section: z.enum(["all", "urls", "flows", "selectors", "errors", "detection"]).optional().describe("Section to return (default: all). Use 'errors' for just error+solution pairs."),
876
+ }, async ({ platform, section }) => {
877
+ const filePath = path.resolve(playbooksDir, `${platform.toLowerCase()}.json`);
878
+ if (!fs.existsSync(filePath)) {
879
+ const available = fs.existsSync(playbooksDir)
880
+ ? fs.readdirSync(playbooksDir).filter(f => f.endsWith(".json")).map(f => f.replace(".json", ""))
881
+ : [];
882
+ return { content: [{ type: "text", text: `No playbook for "${platform}". Available: ${available.join(", ") || "none"}` }] };
883
+ }
884
+ const data = JSON.parse(fs.readFileSync(filePath, "utf-8"));
885
+ const s = section || "all";
886
+ if (s === "errors") {
887
+ const errors = data.errors || [];
888
+ const text = errors.map((e, i) => `${i + 1}. [${e.severity}] ${e.error}\n Context: ${e.context}\n Solution: ${e.solution}`).join("\n\n");
889
+ return { content: [{ type: "text", text: text || "No errors documented." }] };
890
+ }
891
+ if (s === "urls") {
892
+ return { content: [{ type: "text", text: JSON.stringify(data.urls, null, 2) }] };
893
+ }
894
+ if (s === "detection") {
895
+ return { content: [{ type: "text", text: JSON.stringify(data.detection, null, 2) }] };
896
+ }
897
+ if (s === "flows") {
898
+ const flows = data.flows || {};
899
+ const text = Object.entries(flows).map(([name, flow]) => {
900
+ const steps = (flow.steps || []).map((s, i) => ` ${i + 1}. ${s}`).join("\n");
901
+ const tips = (flow.tips || []).map((t) => ` TIP: ${t}`).join("\n");
902
+ return `### ${name}\n${steps}${tips ? "\n" + tips : ""}`;
903
+ }).join("\n\n");
904
+ return { content: [{ type: "text", text }] };
905
+ }
906
+ if (s === "selectors") {
907
+ const flows = data.flows || {};
908
+ const text = Object.entries(flows).map(([name, flow]) => {
909
+ const sels = flow.selectors || {};
910
+ const lines = Object.entries(sels).map(([k, v]) => ` ${k}: ${v}`).join("\n");
911
+ return `### ${name}\n${lines}`;
912
+ }).join("\n\n");
913
+ return { content: [{ type: "text", text }] };
914
+ }
915
+ // "all" — return full playbook
916
+ return { content: [{ type: "text", text: JSON.stringify(data, null, 2) }] };
917
+ });
918
+ server.tool("export_playbook", "Generate a playbook JSON from your session. Extracts URLs, selectors, errors+solutions from memory. Share the output with ScreenHand to help others automate this platform.", {
919
+ platform: z.string().describe("Platform name, e.g. 'linkedin', 'twitter'"),
920
+ domain: z.string().describe("Domain to filter actions by, e.g. 'linkedin.com'"),
921
+ description: z.string().optional().describe("Short description of the platform"),
922
+ tabId: z.string().optional().describe("Tab ID to scan current page for selectors"),
923
+ }, async ({ platform, domain, description, tabId }) => {
924
+ // 1. Pull URLs and errors from memory store
925
+ const actions = memory.readActions();
926
+ const errors = memory.readErrors();
927
+ const strategies = memory.readStrategies();
928
+ const domainLower = domain.toLowerCase();
929
+ // Extract unique URLs from actions that touched this domain
930
+ const urlSet = new Set();
931
+ for (const a of actions) {
932
+ const params = a.params || {};
933
+ const url = params.url || "";
934
+ if (typeof url === "string" && url.toLowerCase().includes(domainLower)) {
935
+ urlSet.add(url);
936
+ }
937
+ const result = a.result || "";
938
+ const urlMatch = result.match(/https?:\/\/[^\s"]+/g);
939
+ if (urlMatch) {
940
+ for (const u of urlMatch) {
941
+ if (u.toLowerCase().includes(domainLower))
942
+ urlSet.add(u);
943
+ }
944
+ }
945
+ }
946
+ // Extract errors related to this domain's tools
947
+ const domainErrors = [];
948
+ for (const e of errors) {
949
+ const params = e.params || {};
950
+ const url = params.url || params.selector || "";
951
+ const isRelevant = (typeof url === "string" && url.toLowerCase().includes(domainLower)) ||
952
+ actions.some(a => {
953
+ const ap = a.params || {};
954
+ return a.tool === e.tool && typeof ap.url === "string" && ap.url.toLowerCase().includes(domainLower);
955
+ });
956
+ if (isRelevant) {
957
+ domainErrors.push({
958
+ error: e.error,
959
+ tool: e.tool,
960
+ resolution: e.resolution,
961
+ occurrences: e.occurrences,
962
+ });
963
+ }
964
+ }
965
+ // Extract relevant strategies
966
+ const domainStrategies = strategies.filter(s => s.task.toLowerCase().includes(domainLower) ||
967
+ s.task.toLowerCase().includes(platform.toLowerCase()) ||
968
+ s.tags.some(t => t.toLowerCase().includes(platform.toLowerCase())));
969
+ // 2. Scan current page for selectors if tab is available
970
+ let pageSelectors = {};
971
+ if (tabId || true) {
972
+ try {
973
+ const { client } = await getCDPClient(tabId);
974
+ await client.Runtime.enable();
975
+ const scanResult = await client.Runtime.evaluate({
976
+ expression: `(() => {
977
+ const url = location.href;
978
+ if (!url.toLowerCase().includes(${JSON.stringify(domainLower)})) return { match: false, url };
979
+ const inputs = Array.from(document.querySelectorAll('input,select,textarea,button[type="submit"]'));
980
+ const selectors = {};
981
+ for (const el of inputs) {
982
+ const id = el.id;
983
+ const name = el.name || el.getAttribute('aria-label') || el.placeholder || el.type || el.tagName.toLowerCase();
984
+ const key = (id || name || '').replace(/[^a-zA-Z0-9_]/g, '_').toLowerCase();
985
+ if (!key) continue;
986
+ if (id) selectors[key] = '#' + id;
987
+ else if (el.name) selectors[key] = '[name="' + el.name + '"]';
988
+ else if (el.getAttribute('aria-label')) selectors[key] = '[aria-label="' + el.getAttribute('aria-label') + '"]';
989
+ }
990
+ return { match: true, url, selectors };
991
+ })()`,
992
+ returnByValue: true,
993
+ });
994
+ await client.close();
995
+ if (scanResult.result.value?.match) {
996
+ pageSelectors = scanResult.result.value.selectors || {};
997
+ }
998
+ }
999
+ catch {
1000
+ // No browser or wrong page — skip selector scan
1001
+ }
1002
+ }
1003
+ // 3. Build playbook JSON
1004
+ const playbook = {
1005
+ platform: platform.toLowerCase(),
1006
+ version: "1.0.0",
1007
+ updated: new Date().toISOString().slice(0, 10),
1008
+ description: description || `Automation playbook for ${platform}`,
1009
+ urls: Object.fromEntries(Array.from(urlSet).sort().map((u, i) => {
1010
+ const urlObj = new URL(u);
1011
+ const pathKey = urlObj.pathname.replace(/^\//, "").replace(/\//g, "_").replace(/[^a-zA-Z0-9_]/g, "") || "home";
1012
+ return [pathKey, u];
1013
+ })),
1014
+ flows: {
1015
+ discovered: {
1016
+ steps: domainStrategies.length > 0
1017
+ ? domainStrategies[0].steps.map((s) => `${s.tool}(${JSON.stringify(s.params)})`)
1018
+ : ["No strategies recorded yet. Use the platform, then call export_playbook again."],
1019
+ selectors: pageSelectors,
1020
+ },
1021
+ },
1022
+ detection: {
1023
+ is_logged_in: "// Add detection JS for logged-in state",
1024
+ },
1025
+ errors: domainErrors.map(e => ({
1026
+ error: e.error,
1027
+ context: `Tool: ${e.tool} (${e.occurrences}x)`,
1028
+ solution: e.resolution || "No resolution recorded yet. Fix it and call memory_save.",
1029
+ severity: e.occurrences >= 3 ? "high" : "medium",
1030
+ })),
1031
+ _meta: {
1032
+ exported_from: "screenhand",
1033
+ actions_count: actions.filter(a => {
1034
+ const p = a.params || {};
1035
+ return typeof p.url === "string" && p.url.toLowerCase().includes(domainLower);
1036
+ }).length,
1037
+ strategies_count: domainStrategies.length,
1038
+ },
1039
+ };
1040
+ // 4. Save to playbooks dir
1041
+ const outPath = path.resolve(playbooksDir, `${platform.toLowerCase()}.json`);
1042
+ const exists = fs.existsSync(outPath);
1043
+ if (!fs.existsSync(playbooksDir))
1044
+ fs.mkdirSync(playbooksDir, { recursive: true });
1045
+ fs.writeFileSync(outPath, JSON.stringify(playbook, null, 2));
1046
+ return {
1047
+ content: [{
1048
+ type: "text",
1049
+ text: `${exists ? "Updated" : "Created"} playbook: playbooks/${platform.toLowerCase()}.json\n\n` +
1050
+ `URLs found: ${urlSet.size}\n` +
1051
+ `Selectors found: ${Object.keys(pageSelectors).length}\n` +
1052
+ `Errors documented: ${domainErrors.length}\n` +
1053
+ `Strategies: ${domainStrategies.length}\n\n` +
1054
+ `Share this file to help others automate ${platform}.\n\n` +
1055
+ JSON.stringify(playbook, null, 2),
1056
+ }],
1057
+ };
1058
+ });
1059
+ // ═══════════════════════════════════════════════
1060
+ // APPLESCRIPT — control scriptable apps directly
1061
+ // ═══════════════════════════════════════════════
1062
+ server.tool("applescript", "Run an AppleScript command. For controlling Finder, Safari, Mail, Notes, etc. (macOS only). WARNING: Executes arbitrary AppleScript — can perform destructive actions (delete files, send emails). All executions are audit-logged.", {
1063
+ script: z.string().describe("AppleScript code to execute"),
1064
+ }, async ({ script }) => {
1065
+ auditLog("applescript", { script: script.slice(0, 500) });
1066
+ if (process.platform === "win32") {
1067
+ return { content: [{ type: "text", text: "AppleScript is not supported on Windows. Use ui_tree, ui_press, and other accessibility tools instead." }] };
1068
+ }
1069
+ try {
1070
+ const result = execSync(`osascript -e '${script.replace(/'/g, "'\\''")}'`, {
1071
+ encoding: "utf-8",
1072
+ timeout: 15000,
1073
+ }).trim();
1074
+ return { content: [{ type: "text", text: result || "(no output)" }] };
1075
+ }
1076
+ catch (e) {
1077
+ return { content: [{ type: "text", text: "Error: " + (e.stderr || e.message) }] };
1078
+ }
1079
+ });
1080
+ // ═══════════════════════════════════════════════
1081
+ // MEMORY — recall past strategies and error patterns
1082
+ // ═══════════════════════════════════════════════
1083
+ originalTool("memory_snapshot", "Get current memory state snapshot — session info, mission, health metrics, known patterns, and policy.", {}, async () => {
1084
+ const snap = memory.getSnapshot();
1085
+ return { content: [{ type: "text", text: JSON.stringify(snap, null, 2) }] };
1086
+ });
1087
+ originalTool("memory_recall", "Have I done something like this before? Searches past successful strategies by keyword similarity.", {
1088
+ task: z.string().describe("Describe the task you want to accomplish"),
1089
+ limit: z.number().optional().describe("Max results (default 5)"),
1090
+ }, async ({ task, limit }) => {
1091
+ const matches = memory.recallStrategies(task, limit ?? 5);
1092
+ if (matches.length === 0) {
1093
+ return { content: [{ type: "text", text: "No matching strategies found. Try memory_save after completing a task to build up knowledge." }] };
1094
+ }
1095
+ const text = matches.map((m, i) => {
1096
+ const steps = m.steps.map((s, j) => ` ${j + 1}. ${s.tool}(${JSON.stringify(s.params)})`).join("\n");
1097
+ return `${i + 1}. "${m.task}" (used ${m.successCount}x, score: ${m.score.toFixed(2)})\n${steps}`;
1098
+ }).join("\n\n");
1099
+ return { content: [{ type: "text", text }] };
1100
+ });
1101
+ originalTool("memory_save", "This approach worked — remember it. Saves the current session's action sequence as a reusable strategy.", {
1102
+ task: z.string().describe("Short description of the task that was accomplished"),
1103
+ tags: z.array(z.string()).optional().describe("Optional tags for easier recall"),
1104
+ }, async ({ task, tags }) => {
1105
+ const strategy = memory.saveStrategy(task, tags);
1106
+ if (!strategy) {
1107
+ return { content: [{ type: "text", text: "No actions recorded in the current session. Perform some tool calls first, then save." }] };
1108
+ }
1109
+ return { content: [{ type: "text", text: `Saved strategy "${task}" with ${strategy.steps.length} steps. Tags: ${strategy.tags.join(", ")}` }] };
1110
+ });
1111
+ originalTool("memory_record_error", "Record a known error pattern with an optional fix. Helps future sessions avoid the same problem.", {
1112
+ tool: z.string().describe("Tool that failed"),
1113
+ error: z.string().describe("Error message or description"),
1114
+ fix: z.string().optional().describe("How to fix or work around this error"),
1115
+ scope: z.string().optional().describe("Scope of the error (e.g., 'chrome/github.com', 'vscode/terminal')"),
1116
+ }, async ({ tool, error, fix, scope }) => {
1117
+ memory.recordError(tool, error, fix ?? null, scope);
1118
+ return { content: [{ type: "text", text: `Error pattern recorded for "${tool}": "${error}"${fix ? `\nFix: ${fix}` : ""}` }] };
1119
+ });
1120
+ originalTool("memory_record_learning", "Record a verified pattern — what works, what fails, and how to fix it. Builds the knowledge base for future sessions.", {
1121
+ scope: z.string().describe("Scope (e.g., 'chrome/github.com', 'slack/desktop', 'vscode/terminal')"),
1122
+ pattern: z.string().describe("What worked or failed"),
1123
+ method: z.enum(["ax", "cdp", "ocr", "coordinates"]).describe("Which execution method was used"),
1124
+ confidence: z.number().min(0).max(1).describe("Confidence level 0-1"),
1125
+ success: z.boolean().describe("Was this a success or failure?"),
1126
+ fix: z.string().optional().describe("Fix or workaround if it was a failure"),
1127
+ }, async ({ scope, pattern, method, confidence, success, fix }) => {
1128
+ memory.recordLearning({
1129
+ scope,
1130
+ pattern,
1131
+ method,
1132
+ confidence,
1133
+ successCount: success ? 1 : 0,
1134
+ failCount: success ? 0 : 1,
1135
+ lastSeen: new Date().toISOString(),
1136
+ fix: fix ?? null,
1137
+ });
1138
+ return { content: [{ type: "text", text: `Learning recorded: ${scope} — "${pattern}" (${method}, confidence=${confidence})` }] };
1139
+ });
1140
+ originalTool("memory_query_patterns", "Search verified learnings by scope and/or execution method.", {
1141
+ scope: z.string().optional().describe("Filter by scope (e.g., 'chrome', 'vscode')"),
1142
+ method: z.enum(["ax", "cdp", "ocr", "coordinates"]).optional().describe("Filter by execution method"),
1143
+ }, async ({ scope, method }) => {
1144
+ const patterns = memory.queryPatterns(scope, method);
1145
+ if (patterns.length === 0) {
1146
+ return { content: [{ type: "text", text: "No matching patterns found." }] };
1147
+ }
1148
+ const text = patterns.map((p, i) => `${i + 1}. [${p.method}] ${p.scope}: "${p.pattern}" (confidence=${p.confidence.toFixed(2)}, ${p.successCount}✓ ${p.failCount}✗)${p.fix ? `\n Fix: ${p.fix}` : ""}`).join("\n");
1149
+ return { content: [{ type: "text", text }] };
1150
+ });
1151
+ originalTool("memory_errors", "What goes wrong with this tool? Shows known error patterns and resolutions.", {
1152
+ tool: z.string().optional().describe("Tool name to filter by (omit for all errors)"),
1153
+ }, async ({ tool }) => {
1154
+ const errors = memory.queryErrors(tool);
1155
+ if (errors.length === 0) {
1156
+ return { content: [{ type: "text", text: tool ? `No known error patterns for "${tool}".` : "No error patterns recorded yet." }] };
1157
+ }
1158
+ const text = errors.map((e, i) => `${i + 1}. ${e.tool}: "${e.error}" (${e.occurrences}x)${e.resolution ? `\n Fix: ${e.resolution}` : ""}`).join("\n");
1159
+ return { content: [{ type: "text", text }] };
1160
+ });
1161
+ originalTool("memory_stats", "How much have I learned? Shows total actions, strategies, error patterns, and success rates.", {}, async () => {
1162
+ const stats = memory.getStats();
1163
+ const lines = [
1164
+ `Actions logged: ${stats.totalActions}`,
1165
+ `Strategies saved: ${stats.totalStrategies}`,
1166
+ `Error patterns: ${stats.totalErrors}`,
1167
+ `Success rate: ${(stats.successRate * 100).toFixed(1)}%`,
1168
+ `Disk usage: ${(stats.diskUsageBytes / 1024).toFixed(1)} KB`,
1169
+ ];
1170
+ if (stats.topTools.length > 0) {
1171
+ lines.push("", "Top tools:");
1172
+ for (const t of stats.topTools) {
1173
+ lines.push(` ${t.tool}: ${t.count} calls`);
1174
+ }
1175
+ }
1176
+ return { content: [{ type: "text", text: lines.join("\n") }] };
1177
+ });
1178
+ originalTool("memory_clear", "Forget everything or just a specific category. Clears stored memory data.", {
1179
+ what: z.enum(["all", "actions", "strategies", "errors", "learnings"]).describe("What to clear"),
1180
+ }, async ({ what }) => {
1181
+ memory.clear(what);
1182
+ return { content: [{ type: "text", text: `Cleared ${what === "all" ? "all memory data" : what}.` }] };
1183
+ });
1184
+ // ═══════════════════════════════════════════════
1185
+ // SESSION SUPERVISOR — lease management, stall detection, recovery
1186
+ // ═══════════════════════════════════════════════
1187
+ originalTool("session_claim", "Claim exclusive control of an app window. Prevents other clients from acting on the same window.", {
1188
+ clientId: z.string().describe("Your client identifier (e.g., 'claude_abc123')"),
1189
+ clientType: z.enum(["claude", "codex", "cursor", "openclaw"]).describe("Client type"),
1190
+ app: z.string().describe("Bundle ID of the app (e.g., 'com.google.Chrome')"),
1191
+ windowId: z.number().describe("Window ID to claim (get from 'windows' tool)"),
1192
+ }, async ({ clientId, clientType, app, windowId }) => {
1193
+ // Use filesystem-backed lease manager directly (shared with daemon)
1194
+ const lease = leaseManager.claim({ id: clientId, type: clientType, startedAt: new Date().toISOString() }, app, windowId);
1195
+ if (!lease) {
1196
+ const existing = leaseManager.isLocked(app, windowId);
1197
+ return { content: [{ type: "text", text: `Window already claimed by ${existing?.client.type ?? "unknown"} (session=${existing?.sessionId}). Release it first or wait for expiry.` }] };
1198
+ }
1199
+ return { content: [{ type: "text", text: `Session claimed!\nSession ID: ${lease.sessionId}\nApp: ${app}\nWindow: ${windowId}\nExpires: ${lease.expiresAt}\n\nCall session_heartbeat every 60s to keep the lease alive.` }] };
1200
+ });
1201
+ originalTool("session_heartbeat", "Keep your session lease alive. Call every 60 seconds. Lease expires after 5 minutes without heartbeat.", {
1202
+ sessionId: z.string().describe("Session ID from session_claim"),
1203
+ }, async ({ sessionId }) => {
1204
+ // Use filesystem-backed lease manager directly (shared with daemon)
1205
+ const ok = leaseManager.heartbeat(sessionId);
1206
+ if (!ok) {
1207
+ return { content: [{ type: "text", text: `Session ${sessionId} not found or expired. Re-claim with session_claim.` }] };
1208
+ }
1209
+ return { content: [{ type: "text", text: `Heartbeat OK for ${sessionId}.` }] };
1210
+ });
1211
+ originalTool("session_release", "Release your session lease so other clients can use the window.", {
1212
+ sessionId: z.string().describe("Session ID to release"),
1213
+ }, async ({ sessionId }) => {
1214
+ // Use filesystem-backed lease manager directly (shared with daemon)
1215
+ const released = leaseManager.release(sessionId);
1216
+ return { content: [{ type: "text", text: released ? `Session ${sessionId} released.` : `Session ${sessionId} not found.` }] };
1217
+ });
1218
+ originalTool("supervisor_status", "Get supervisor state — active sessions, health metrics, stall detection.", {
1219
+ tail_log: z.number().optional().describe("Show last N lines of supervisor log (default: 0, max: 50)"),
1220
+ }, async ({ tail_log }) => {
1221
+ const { running: daemonRunning, pid: daemonPid } = isSupervisorDaemonRunning();
1222
+ // Always read active sessions from the shared filesystem lock dir (source of truth)
1223
+ const activeSessions = leaseManager.getActive();
1224
+ // Read daemon health counters if available, otherwise show minimal info
1225
+ let health = { uptimeMs: 0, totalSessions: 0, expiredLeases: 0, stallsDetected: 0, recoveriesAttempted: 0 };
1226
+ if (daemonRunning && fs.existsSync(SUPERVISOR_STATE_FILE)) {
1227
+ try {
1228
+ const daemonState = JSON.parse(fs.readFileSync(SUPERVISOR_STATE_FILE, "utf-8"));
1229
+ health = daemonState.health ?? health;
1230
+ }
1231
+ catch { /* use defaults */ }
1232
+ }
1233
+ const lines = [
1234
+ `Supervisor: ${daemonRunning ? "DAEMON RUNNING" : "STOPPED"} (pid=${daemonPid ?? "n/a"})`,
1235
+ `Active sessions: ${activeSessions.length} (from lock files)`,
1236
+ ];
1237
+ if (daemonRunning) {
1238
+ lines.push(`Uptime: ${Math.round(health.uptimeMs / 60000)}m`, `Expired leases: ${health.expiredLeases}`, `Stalls detected: ${health.stallsDetected}`, `Recoveries attempted: ${health.recoveriesAttempted}`);
1239
+ }
1240
+ if (activeSessions.length > 0) {
1241
+ lines.push("", "Active sessions:");
1242
+ for (const s of activeSessions) {
1243
+ lines.push(` ${s.sessionId}: ${s.client.type} → ${s.app} (window=${s.windowId}, heartbeat=${s.lastHeartbeat})`);
1244
+ }
1245
+ }
1246
+ if (tail_log && tail_log > 0) {
1247
+ try {
1248
+ const logContent = fs.readFileSync(SUPERVISOR_LOG_FILE, "utf-8");
1249
+ const logLines = logContent.trim().split("\n").slice(-Math.min(tail_log, 50));
1250
+ lines.push("", "--- Supervisor Log ---");
1251
+ lines.push(logLines.join("\n"));
1252
+ }
1253
+ catch {
1254
+ lines.push("\n(no log file found)");
1255
+ }
1256
+ }
1257
+ return { content: [{ type: "text", text: lines.join("\n") }] };
1258
+ });
1259
+ const SUPERVISOR_DIR = path.join(os.homedir(), ".screenhand", "supervisor");
1260
+ const SUPERVISOR_PID_FILE = path.join(SUPERVISOR_DIR, "supervisor.pid");
1261
+ const SUPERVISOR_STATE_FILE = path.join(SUPERVISOR_DIR, "state.json");
1262
+ const SUPERVISOR_LOG_FILE = path.join(SUPERVISOR_DIR, "supervisor.log");
1263
+ const SUPERVISOR_RECOVERIES_FILE = path.join(SUPERVISOR_DIR, "recoveries.json");
1264
+ const SUPERVISOR_DAEMON_SCRIPT = path.resolve(__dirname, "scripts", "supervisor-daemon.ts");
1265
+ /** Read recoveries from daemon's filesystem state (with corrupt-file recovery). */
1266
+ function readDaemonRecoveries() {
1267
+ return readJsonWithRecovery(SUPERVISOR_RECOVERIES_FILE) ?? [];
1268
+ }
1269
+ /** Write recoveries atomically to daemon's filesystem state. */
1270
+ function writeDaemonRecoveries(recoveries) {
1271
+ fs.mkdirSync(SUPERVISOR_DIR, { recursive: true });
1272
+ writeFileAtomicSync(SUPERVISOR_RECOVERIES_FILE, JSON.stringify(recoveries, null, 2));
1273
+ }
1274
+ function isSupervisorDaemonRunning() {
1275
+ try {
1276
+ if (!fs.existsSync(SUPERVISOR_PID_FILE))
1277
+ return { running: false, pid: null };
1278
+ const pid = Number(fs.readFileSync(SUPERVISOR_PID_FILE, "utf-8").trim());
1279
+ process.kill(pid, 0);
1280
+ return { running: true, pid };
1281
+ }
1282
+ catch {
1283
+ return { running: false, pid: null };
1284
+ }
1285
+ }
1286
+ originalTool("supervisor_start", "Start the supervisor as a background daemon. Survives Claude Code restarts. Monitors sessions, detects stalls, executes recovery actions via native bridge.", {
1287
+ pollMs: z.number().optional().describe("Poll interval in ms (default: 5000)"),
1288
+ stallMs: z.number().optional().describe("Stall threshold in ms (default: 300000 = 5 min)"),
1289
+ dryRun: z.boolean().optional().describe("Log recovery actions without executing them (default: false)"),
1290
+ }, async ({ pollMs, stallMs, dryRun }) => {
1291
+ const { running, pid } = isSupervisorDaemonRunning();
1292
+ if (running) {
1293
+ return { content: [{ type: "text", text: `Supervisor daemon already running (pid=${pid}). Use supervisor_stop first.` }] };
1294
+ }
1295
+ // Try compiled JS first (reliable), fall back to tsx (dev mode)
1296
+ // When running from dist/, the script is a sibling: dist/scripts/supervisor-daemon.js
1297
+ // When running from source via tsx, it's at dist/scripts/supervisor-daemon.js relative to project root
1298
+ const compiledPath = fs.existsSync(path.resolve(__dirname, "scripts", "supervisor-daemon.js"))
1299
+ ? path.resolve(__dirname, "scripts", "supervisor-daemon.js") // running from dist/
1300
+ : path.resolve(__dirname, "dist", "scripts", "supervisor-daemon.js"); // running from source
1301
+ let child;
1302
+ let usedCompiled = false;
1303
+ if (fs.existsSync(compiledPath)) {
1304
+ const nodeArgs = [compiledPath];
1305
+ if (pollMs)
1306
+ nodeArgs.push("--poll", String(pollMs));
1307
+ if (stallMs)
1308
+ nodeArgs.push("--stall", String(stallMs));
1309
+ if (dryRun)
1310
+ nodeArgs.push("--dry-run");
1311
+ child = spawn("node", nodeArgs, {
1312
+ detached: true,
1313
+ stdio: "ignore",
1314
+ cwd: __dirname,
1315
+ });
1316
+ usedCompiled = true;
1317
+ }
1318
+ else {
1319
+ const daemonArgs = ["tsx", SUPERVISOR_DAEMON_SCRIPT];
1320
+ if (pollMs)
1321
+ daemonArgs.push("--poll", String(pollMs));
1322
+ if (stallMs)
1323
+ daemonArgs.push("--stall", String(stallMs));
1324
+ if (dryRun)
1325
+ daemonArgs.push("--dry-run");
1326
+ child = spawn("npx", daemonArgs, {
1327
+ detached: true,
1328
+ stdio: "ignore",
1329
+ cwd: __dirname,
1330
+ });
1331
+ }
1332
+ child.unref();
1333
+ const daemonPid = child.pid;
1334
+ // Wait briefly, then verify the daemon actually started by checking PID file
1335
+ await new Promise((r) => setTimeout(r, 2000));
1336
+ const verify = isSupervisorDaemonRunning();
1337
+ if (!verify.running) {
1338
+ return { content: [{ type: "text", text: `Supervisor daemon failed to start (spawned pid=${daemonPid}, mode=${usedCompiled ? "compiled" : "tsx"}).\nCheck log: ${SUPERVISOR_LOG_FILE}\n\nIf running in a restricted environment, ensure 'npx tsx' or 'node' can spawn processes.\nYou can also run the daemon manually: npx tsx scripts/supervisor-daemon.ts` }] };
1339
+ }
1340
+ const dryNote = dryRun ? "\n⚠️ DRY RUN mode — recovery actions are logged but not executed." : "";
1341
+ return { content: [{ type: "text", text: `Supervisor daemon started (pid=${verify.pid}, mode=${usedCompiled ? "compiled" : "tsx"}).\nPoll: ${pollMs ?? 5000}ms | Stall threshold: ${stallMs ?? 300000}ms\nLog: ${SUPERVISOR_LOG_FILE}${dryNote}\n\nThe daemon runs independently — survives Claude Code restarts.\nUse supervisor_status to check health.` }] };
1342
+ });
1343
+ originalTool("supervisor_stop", "Stop the supervisor background daemon.", {}, async () => {
1344
+ const { running, pid } = isSupervisorDaemonRunning();
1345
+ if (!running) {
1346
+ // Also stop in-process supervisor if it was started
1347
+ await supervisor.stop();
1348
+ return { content: [{ type: "text", text: "No supervisor daemon running." }] };
1349
+ }
1350
+ try {
1351
+ process.kill(pid, "SIGTERM");
1352
+ await new Promise((r) => setTimeout(r, 1000));
1353
+ return { content: [{ type: "text", text: `Supervisor daemon stopped (pid=${pid}).` }] };
1354
+ }
1355
+ catch (err) {
1356
+ return { content: [{ type: "text", text: `Failed to stop: ${err.message}` }] };
1357
+ }
1358
+ });
1359
+ originalTool("supervisor_pause", "Pause all automation — keeps leases but signals clients to stop acting.", {
1360
+ reason: z.string().optional().describe("Why automation is being paused"),
1361
+ }, async ({ reason }) => {
1362
+ // Read active sessions from shared filesystem lock dir (source of truth)
1363
+ const sessions = leaseManager.getActive();
1364
+ // Add escalation recovery to daemon's filesystem state
1365
+ const recoveries = readDaemonRecoveries();
1366
+ for (const s of sessions) {
1367
+ recoveries.push({
1368
+ id: "recv_" + Date.now().toString(36) + "_" + Math.random().toString(36).slice(2, 8),
1369
+ sessionId: s.sessionId,
1370
+ type: "escalate",
1371
+ instruction: reason ?? "Automation paused by operator.",
1372
+ status: "pending",
1373
+ createdAt: new Date().toISOString(),
1374
+ attemptedAt: null,
1375
+ result: null,
1376
+ });
1377
+ }
1378
+ writeDaemonRecoveries(recoveries);
1379
+ return { content: [{ type: "text", text: `Paused. ${sessions.length} session(s) notified. Leases held — call supervisor_resume to continue.` }] };
1380
+ });
1381
+ originalTool("supervisor_resume", "Resume automation after a pause.", {}, async () => {
1382
+ // Clear pending escalation recoveries from daemon's filesystem state
1383
+ const recoveries = readDaemonRecoveries();
1384
+ let cleared = 0;
1385
+ for (const r of recoveries) {
1386
+ if (r.type === "escalate" && r.status === "pending") {
1387
+ r.status = "succeeded";
1388
+ r.result = "Resumed by operator.";
1389
+ cleared++;
1390
+ }
1391
+ }
1392
+ writeDaemonRecoveries(recoveries);
1393
+ return { content: [{ type: "text", text: `Resumed. ${cleared} pause escalation(s) cleared. Clients can continue.` }] };
1394
+ });
1395
+ originalTool("recovery_queue_add", "Add a manual recovery instruction for a stalled session.", {
1396
+ sessionId: z.string().describe("Session ID that needs recovery"),
1397
+ type: z.enum(["nudge", "restart", "escalate", "custom"]).describe("Recovery type"),
1398
+ instruction: z.string().describe("What to do (e.g., 'Click the login button', 'Restart Chrome')"),
1399
+ }, async ({ sessionId, type, instruction }) => {
1400
+ const recovery = {
1401
+ id: "recv_" + Date.now().toString(36) + "_" + Math.random().toString(36).slice(2, 8),
1402
+ sessionId,
1403
+ type,
1404
+ instruction,
1405
+ status: "pending",
1406
+ createdAt: new Date().toISOString(),
1407
+ attemptedAt: null,
1408
+ result: null,
1409
+ };
1410
+ // Write to daemon's filesystem state so the daemon picks it up
1411
+ const recoveries = readDaemonRecoveries();
1412
+ recoveries.push(recovery);
1413
+ writeDaemonRecoveries(recoveries);
1414
+ return { content: [{ type: "text", text: `Recovery queued: ${recovery.id} (type=${type})` }] };
1415
+ });
1416
+ originalTool("recovery_queue_list", "List recovery actions, optionally filtered by status.", {
1417
+ status: z.enum(["pending", "attempted", "succeeded", "failed"]).optional().describe("Filter by status"),
1418
+ }, async ({ status }) => {
1419
+ // Read from daemon's filesystem state
1420
+ let recoveries = readDaemonRecoveries();
1421
+ if (status) {
1422
+ recoveries = recoveries.filter((r) => r.status === status);
1423
+ }
1424
+ if (recoveries.length === 0) {
1425
+ return { content: [{ type: "text", text: `No ${status ?? ""} recovery actions.` }] };
1426
+ }
1427
+ const text = recoveries.map((r, i) => `${i + 1}. [${r.status.toUpperCase()}] ${r.type}: "${r.instruction.slice(0, 80)}"\n Session: ${r.sessionId} | Created: ${r.createdAt}${r.result ? `\n Result: ${r.result}` : ""}`).join("\n\n");
1428
+ return { content: [{ type: "text", text }] };
1429
+ });
1430
+ // ── Service install / auto-start (launchd on macOS) ──
1431
+ const LAUNCHD_LABEL = "com.screenhand.supervisor";
1432
+ const LAUNCHD_PLIST_PATH = path.join(os.homedir(), "Library", "LaunchAgents", `${LAUNCHD_LABEL}.plist`);
1433
+ function findNodeBinary() {
1434
+ // Prefer the node that's running us — guaranteed to exist
1435
+ return process.execPath;
1436
+ }
1437
+ function findDaemonScript() {
1438
+ // compiled JS in dist/
1439
+ const fromDist = path.resolve(__dirname, "scripts", "supervisor-daemon.js");
1440
+ if (fs.existsSync(fromDist))
1441
+ return fromDist;
1442
+ // running from source root
1443
+ const fromRoot = path.resolve(__dirname, "dist", "scripts", "supervisor-daemon.js");
1444
+ if (fs.existsSync(fromRoot))
1445
+ return fromRoot;
1446
+ return null;
1447
+ }
1448
+ function generatePlist(nodeBin, daemonScript, opts) {
1449
+ const args = [nodeBin, daemonScript];
1450
+ if (opts.pollMs)
1451
+ args.push("--poll", String(opts.pollMs));
1452
+ if (opts.stallMs)
1453
+ args.push("--stall", String(opts.stallMs));
1454
+ const programArgs = args.map((a) => ` <string>${a}</string>`).join("\n");
1455
+ // Inherit PATH so native bridge binary and node can be found
1456
+ const envPath = process.env.PATH ?? "/usr/local/bin:/usr/bin:/bin";
1457
+ return `<?xml version="1.0" encoding="UTF-8"?>
1458
+ <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN"
1459
+ "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
1460
+ <plist version="1.0">
1461
+ <dict>
1462
+ <key>Label</key>
1463
+ <string>${LAUNCHD_LABEL}</string>
1464
+
1465
+ <key>ProgramArguments</key>
1466
+ <array>
1467
+ ${programArgs}
1468
+ </array>
1469
+
1470
+ <key>WorkingDirectory</key>
1471
+ <string>${path.dirname(daemonScript).replace(/\/dist\/scripts$/, "").replace(/\/dist$/, "")}</string>
1472
+
1473
+ <key>RunAtLoad</key>
1474
+ <true/>
1475
+
1476
+ <key>KeepAlive</key>
1477
+ <dict>
1478
+ <key>SuccessfulExit</key>
1479
+ <false/>
1480
+ </dict>
1481
+
1482
+ <key>ThrottleInterval</key>
1483
+ <integer>10</integer>
1484
+
1485
+ <key>StandardOutPath</key>
1486
+ <string>${SUPERVISOR_DIR}/launchd-stdout.log</string>
1487
+
1488
+ <key>StandardErrorPath</key>
1489
+ <string>${SUPERVISOR_DIR}/launchd-stderr.log</string>
1490
+
1491
+ <key>EnvironmentVariables</key>
1492
+ <dict>
1493
+ <key>PATH</key>
1494
+ <string>${envPath}</string>
1495
+ </dict>
1496
+ </dict>
1497
+ </plist>
1498
+ `;
1499
+ }
1500
+ function isServiceInstalled() {
1501
+ return fs.existsSync(LAUNCHD_PLIST_PATH);
1502
+ }
1503
+ originalTool("supervisor_install", "Install the supervisor as a system service (launchd on macOS). Starts automatically on login and restarts on crash.", {
1504
+ pollMs: z.number().optional().describe("Poll interval in ms (default: 5000)"),
1505
+ stallMs: z.number().optional().describe("Stall threshold in ms (default: 300000 = 5 min)"),
1506
+ }, async ({ pollMs, stallMs }) => {
1507
+ if (process.platform !== "darwin") {
1508
+ return { content: [{ type: "text", text: "Service install is currently macOS-only (launchd). Windows Task Scheduler support coming soon." }] };
1509
+ }
1510
+ const daemonScript = findDaemonScript();
1511
+ if (!daemonScript) {
1512
+ return { content: [{ type: "text", text: "Cannot find compiled daemon script. Run `npx tsc` first to build dist/scripts/supervisor-daemon.js." }] };
1513
+ }
1514
+ const nodeBin = findNodeBinary();
1515
+ // Stop existing daemon if running (will be managed by launchd now)
1516
+ const { running, pid } = isSupervisorDaemonRunning();
1517
+ if (running && pid) {
1518
+ try {
1519
+ process.kill(pid, "SIGTERM");
1520
+ }
1521
+ catch { /* ignore */ }
1522
+ await new Promise((r) => setTimeout(r, 1000));
1523
+ }
1524
+ // Unload existing plist if present
1525
+ if (isServiceInstalled()) {
1526
+ try {
1527
+ const { execFileSync } = await import("node:child_process");
1528
+ execFileSync("launchctl", ["unload", LAUNCHD_PLIST_PATH], { stdio: "ignore" });
1529
+ }
1530
+ catch { /* ignore */ }
1531
+ }
1532
+ // Write plist
1533
+ const plist = generatePlist(nodeBin, daemonScript, { pollMs, stallMs });
1534
+ fs.mkdirSync(path.dirname(LAUNCHD_PLIST_PATH), { recursive: true });
1535
+ fs.writeFileSync(LAUNCHD_PLIST_PATH, plist);
1536
+ // Load the service
1537
+ try {
1538
+ const { execFileSync } = await import("node:child_process");
1539
+ execFileSync("launchctl", ["load", LAUNCHD_PLIST_PATH]);
1540
+ }
1541
+ catch (err) {
1542
+ return { content: [{ type: "text", text: `Plist written to ${LAUNCHD_PLIST_PATH} but launchctl load failed: ${err.message}\nTry manually: launchctl load "${LAUNCHD_PLIST_PATH}"` }] };
1543
+ }
1544
+ // Verify it started
1545
+ await new Promise((r) => setTimeout(r, 2000));
1546
+ const verify = isSupervisorDaemonRunning();
1547
+ const lines = [
1548
+ `Service installed and loaded.`,
1549
+ ` Plist: ${LAUNCHD_PLIST_PATH}`,
1550
+ ` Node: ${nodeBin}`,
1551
+ ` Script: ${daemonScript}`,
1552
+ ` Poll: ${pollMs ?? 5000}ms | Stall: ${stallMs ?? 300000}ms`,
1553
+ ` Status: ${verify.running ? `running (pid=${verify.pid})` : "starting..."}`,
1554
+ ``,
1555
+ `The supervisor will:`,
1556
+ ` - Start automatically on login`,
1557
+ ` - Restart automatically if it crashes`,
1558
+ ` - Survive reboots`,
1559
+ ``,
1560
+ `Use supervisor_uninstall to remove.`,
1561
+ ];
1562
+ return { content: [{ type: "text", text: lines.join("\n") }] };
1563
+ });
1564
+ originalTool("supervisor_uninstall", "Uninstall the supervisor system service. Stops the daemon and removes the launchd plist.", {}, async () => {
1565
+ if (process.platform !== "darwin") {
1566
+ return { content: [{ type: "text", text: "Service uninstall is currently macOS-only." }] };
1567
+ }
1568
+ if (!isServiceInstalled()) {
1569
+ return { content: [{ type: "text", text: "No service installed (no plist at " + LAUNCHD_PLIST_PATH + ")." }] };
1570
+ }
1571
+ // Unload the service (stops the daemon)
1572
+ try {
1573
+ const { execFileSync } = await import("node:child_process");
1574
+ execFileSync("launchctl", ["unload", LAUNCHD_PLIST_PATH]);
1575
+ }
1576
+ catch { /* ignore — may already be unloaded */ }
1577
+ // Remove plist
1578
+ try {
1579
+ fs.unlinkSync(LAUNCHD_PLIST_PATH);
1580
+ }
1581
+ catch { /* ignore */ }
1582
+ // Clean up PID file
1583
+ try {
1584
+ fs.unlinkSync(SUPERVISOR_PID_FILE);
1585
+ }
1586
+ catch { /* ignore */ }
1587
+ return { content: [{ type: "text", text: `Service uninstalled.\n Removed: ${LAUNCHD_PLIST_PATH}\n Daemon stopped.\n\nState files in ~/.screenhand/ are preserved (logs, leases, recoveries).` }] };
1588
+ });
1589
+ // ═══════════════════════════════════════════════
1590
+ // EXECUTION CONTRACT — canonical fallback chain
1591
+ // ═══════════════════════════════════════════════
1592
+ import { METHOD_CAPABILITIES, DEFAULT_RETRY_POLICY, planExecution, executeWithFallback, } from "./src/runtime/execution-contract.js";
1593
+ originalTool("execution_plan", "Show the execution plan for an action type. Returns the ordered fallback chain based on available infrastructure.", {
1594
+ action: z.enum(["click", "type", "read", "locate", "select", "scroll"]).describe("Action type"),
1595
+ }, async ({ action }) => {
1596
+ const plan = planExecution(action, { hasBridge: true, hasCDP: cdpPort !== null });
1597
+ const lines = plan.map((method, i) => {
1598
+ const cap = METHOD_CAPABILITIES[method];
1599
+ return `${i + 1}. ${method} (~${cap.avgLatencyMs}ms)${i === 0 ? " ← primary" : ""}`;
1600
+ });
1601
+ lines.push("", `Retry policy: ${DEFAULT_RETRY_POLICY.maxRetriesPerMethod}/method, ${DEFAULT_RETRY_POLICY.maxTotalRetries} total, escalate after ${DEFAULT_RETRY_POLICY.escalateAfter}`);
1602
+ return { content: [{ type: "text", text: `Execution plan for "${action}":\n${lines.join("\n")}` }] };
1603
+ });
1604
+ // ── Shared helpers for resilient action tools ──
1605
+ async function resolvePid(bundleId) {
1606
+ let pid = 0;
1607
+ if (bundleId) {
1608
+ try {
1609
+ const appInfo = await bridge.call("app.focus", { bundleId });
1610
+ pid = appInfo.pid ?? 0;
1611
+ }
1612
+ catch { /* fall through */ }
1613
+ }
1614
+ if (pid === 0) {
1615
+ try {
1616
+ const front = await bridge.call("app.frontmost", {});
1617
+ pid = front.pid;
1618
+ }
1619
+ catch { /* caller will handle pid=0 */ }
1620
+ }
1621
+ return pid;
1622
+ }
1623
+ function infra() {
1624
+ return { hasBridge: true, hasCDP: cdpPort !== null };
1625
+ }
1626
+ function formatResult(action, target, result) {
1627
+ if (result.ok) {
1628
+ const fallbackNote = result.fallbackFrom ? ` (fell back from ${result.fallbackFrom})` : "";
1629
+ return { content: [{ type: "text", text: `${action} "${result.target ?? target}" via ${result.method}${fallbackNote} in ${result.durationMs}ms` }] };
1630
+ }
1631
+ return { content: [{ type: "text", text: `Failed to ${action} "${target}" — all methods exhausted. Last error: ${result.error}` }] };
1632
+ }
1633
+ // ── click_with_fallback ──
1634
+ originalTool("click_with_fallback", "Click a target by text using the canonical fallback chain: AX → CDP → OCR. Automatically retries and falls through methods.", {
1635
+ target: z.string().describe("Text, title, or identifier of the element to click"),
1636
+ bundleId: z.string().optional().describe("App bundle ID (for AX path)"),
1637
+ }, async ({ target, bundleId }) => {
1638
+ await ensureBridge();
1639
+ const plan = planExecution("click", infra())
1640
+ .filter((m) => m !== "coordinates");
1641
+ const targetPid = await resolvePid(bundleId);
1642
+ const result = await executeWithFallback("click", plan, DEFAULT_RETRY_POLICY, async (method, attempt) => {
1643
+ const start = Date.now();
1644
+ try {
1645
+ switch (method) {
1646
+ case "ax": {
1647
+ // Find element by title, then perform AXPress action
1648
+ const found = await bridge.call("ax.findElement", {
1649
+ pid: targetPid,
1650
+ title: target,
1651
+ exact: false,
1652
+ });
1653
+ await bridge.call("ax.performAction", {
1654
+ pid: targetPid,
1655
+ elementPath: found.elementPath,
1656
+ action: "AXPress",
1657
+ });
1658
+ return { ok: true, method, durationMs: Date.now() - start, fallbackFrom: null, retries: attempt, error: null, target };
1659
+ }
1660
+ case "cdp": {
1661
+ if (!cdpPort)
1662
+ throw new Error("CDP not available");
1663
+ const { CDP: CDPClient, port } = await ensureCDP();
1664
+ const client = await CDPClient({ port });
1665
+ try {
1666
+ const { Runtime } = client;
1667
+ const evalResult = await Runtime.evaluate({
1668
+ expression: `(() => {
1669
+ const el = Array.from(document.querySelectorAll('*')).find(e =>
1670
+ e.textContent?.trim() === ${JSON.stringify(target)} ||
1671
+ e.getAttribute('aria-label') === ${JSON.stringify(target)}
1672
+ );
1673
+ if (el) { el.click(); return 'clicked'; }
1674
+ return null;
1675
+ })()`,
1676
+ returnByValue: true,
1677
+ });
1678
+ if (evalResult.result?.value === "clicked") {
1679
+ return { ok: true, method, durationMs: Date.now() - start, fallbackFrom: null, retries: attempt, error: null, target };
1680
+ }
1681
+ throw new Error("Element not found via CDP");
1682
+ }
1683
+ finally {
1684
+ await client.close();
1685
+ }
1686
+ }
1687
+ case "ocr": {
1688
+ // Capture screen, find text via vision.findText, click at center of bounds
1689
+ const shot = await bridge.call("cg.captureScreen", {});
1690
+ const matches = await bridge.call("vision.findText", {
1691
+ imagePath: shot.path,
1692
+ searchText: target,
1693
+ });
1694
+ const match = Array.isArray(matches) ? matches[0] : null;
1695
+ if (match && match.bounds) {
1696
+ const x = match.bounds.x + match.bounds.width / 2;
1697
+ const y = match.bounds.y + match.bounds.height / 2;
1698
+ await bridge.call("cg.mouseClick", { x, y });
1699
+ return { ok: true, method, durationMs: Date.now() - start, fallbackFrom: null, retries: attempt, error: null, target: `${target} at (${Math.round(x)},${Math.round(y)})` };
1700
+ }
1701
+ throw new Error("Target not found via OCR");
1702
+ }
1703
+ }
1704
+ throw new Error(`Unknown method: ${method}`);
1705
+ }
1706
+ catch (err) {
1707
+ return { ok: false, method, durationMs: Date.now() - start, fallbackFrom: null, retries: attempt, error: err instanceof Error ? err.message : String(err), target };
1708
+ }
1709
+ });
1710
+ return formatResult("Clicked", target, result);
1711
+ });
1712
+ // ── type_with_fallback ──
1713
+ originalTool("type_with_fallback", "Type text into a target field using the canonical fallback chain: AX → CDP → coordinates. Finds the field by label/placeholder, focuses it, then types.", {
1714
+ target: z.string().describe("Label, placeholder, or title of the field to type into"),
1715
+ text: z.string().describe("Text to type"),
1716
+ bundleId: z.string().optional().describe("App bundle ID"),
1717
+ clearFirst: z.boolean().optional().describe("Select-all and clear the field before typing (default: false)"),
1718
+ }, async ({ target, text, bundleId, clearFirst }) => {
1719
+ await ensureBridge();
1720
+ const plan = planExecution("type", infra());
1721
+ const targetPid = await resolvePid(bundleId);
1722
+ const result = await executeWithFallback("type", plan, DEFAULT_RETRY_POLICY, async (method, attempt) => {
1723
+ const start = Date.now();
1724
+ try {
1725
+ switch (method) {
1726
+ case "ax": {
1727
+ const found = await bridge.call("ax.findElement", {
1728
+ pid: targetPid,
1729
+ title: target,
1730
+ exact: false,
1731
+ });
1732
+ if (clearFirst) {
1733
+ await bridge.call("ax.setElementValue", { pid: targetPid, elementPath: found.elementPath, value: "" });
1734
+ }
1735
+ await bridge.call("ax.setElementValue", { pid: targetPid, elementPath: found.elementPath, value: text });
1736
+ return { ok: true, method, durationMs: Date.now() - start, fallbackFrom: null, retries: attempt, error: null, target };
1737
+ }
1738
+ case "cdp": {
1739
+ if (!cdpPort)
1740
+ throw new Error("CDP not available");
1741
+ const { CDP: CDPClient, port } = await ensureCDP();
1742
+ const client = await CDPClient({ port });
1743
+ try {
1744
+ const { Runtime, DOM, Input } = client;
1745
+ const evalResult = await Runtime.evaluate({
1746
+ expression: `(() => {
1747
+ const el = Array.from(document.querySelectorAll('input, textarea, [contenteditable]')).find(e =>
1748
+ e.getAttribute('placeholder') === ${JSON.stringify(target)} ||
1749
+ e.getAttribute('aria-label') === ${JSON.stringify(target)} ||
1750
+ e.getAttribute('name') === ${JSON.stringify(target)} ||
1751
+ (e.labels && Array.from(e.labels).some(l => l.textContent?.trim() === ${JSON.stringify(target)}))
1752
+ );
1753
+ if (el) { el.focus(); return true; }
1754
+ return false;
1755
+ })()`,
1756
+ returnByValue: true,
1757
+ });
1758
+ if (!evalResult.result?.value)
1759
+ throw new Error("Field not found via CDP");
1760
+ if (clearFirst) {
1761
+ await Input.dispatchKeyEvent({ type: "keyDown", key: "a", code: "KeyA", modifiers: 2 });
1762
+ await Input.dispatchKeyEvent({ type: "keyUp", key: "a", code: "KeyA", modifiers: 2 });
1763
+ }
1764
+ for (const char of text) {
1765
+ await Input.dispatchKeyEvent({ type: "keyDown", key: char, text: char });
1766
+ await Input.dispatchKeyEvent({ type: "keyUp", key: char });
1767
+ }
1768
+ return { ok: true, method, durationMs: Date.now() - start, fallbackFrom: null, retries: attempt, error: null, target };
1769
+ }
1770
+ finally {
1771
+ await client.close();
1772
+ }
1773
+ }
1774
+ }
1775
+ throw new Error(`Method ${method} does not support type`);
1776
+ }
1777
+ catch (err) {
1778
+ return { ok: false, method, durationMs: Date.now() - start, fallbackFrom: null, retries: attempt, error: err instanceof Error ? err.message : String(err), target };
1779
+ }
1780
+ });
1781
+ return formatResult("Typed into", target, result);
1782
+ });
1783
+ // ── read_with_fallback ──
1784
+ originalTool("read_with_fallback", "Read text content from the screen or a specific element using the canonical fallback chain: AX → CDP → OCR. Returns the text found.", {
1785
+ target: z.string().optional().describe("Element label/title to read from (omit for full-screen OCR)"),
1786
+ bundleId: z.string().optional().describe("App bundle ID"),
1787
+ }, async ({ target, bundleId }) => {
1788
+ await ensureBridge();
1789
+ const plan = planExecution("read", infra());
1790
+ const targetPid = await resolvePid(bundleId);
1791
+ const result = await executeWithFallback("read", plan, DEFAULT_RETRY_POLICY, async (method, attempt) => {
1792
+ const start = Date.now();
1793
+ try {
1794
+ switch (method) {
1795
+ case "ax": {
1796
+ if (target) {
1797
+ const found = await bridge.call("ax.findElement", {
1798
+ pid: targetPid,
1799
+ title: target,
1800
+ exact: false,
1801
+ });
1802
+ const val = await bridge.call("ax.getElementValue", {
1803
+ pid: targetPid,
1804
+ elementPath: found.elementPath,
1805
+ });
1806
+ return { ok: true, method, durationMs: Date.now() - start, fallbackFrom: null, retries: attempt, error: null, target: val.value ?? "" };
1807
+ }
1808
+ // No specific target — get the full element tree text
1809
+ const tree = await bridge.call("ax.getElementTree", {
1810
+ pid: targetPid,
1811
+ maxDepth: 4,
1812
+ });
1813
+ return { ok: true, method, durationMs: Date.now() - start, fallbackFrom: null, retries: attempt, error: null, target: tree.description ?? JSON.stringify(tree).slice(0, 2000) };
1814
+ }
1815
+ case "cdp": {
1816
+ if (!cdpPort)
1817
+ throw new Error("CDP not available");
1818
+ const { CDP: CDPClient, port } = await ensureCDP();
1819
+ const client = await CDPClient({ port });
1820
+ try {
1821
+ const { Runtime } = client;
1822
+ if (target) {
1823
+ const evalResult = await Runtime.evaluate({
1824
+ expression: `(() => {
1825
+ const el = Array.from(document.querySelectorAll('*')).find(e =>
1826
+ e.getAttribute('aria-label') === ${JSON.stringify(target)} ||
1827
+ e.textContent?.trim() === ${JSON.stringify(target)}
1828
+ );
1829
+ return el ? (el.value ?? el.textContent ?? '').trim() : null;
1830
+ })()`,
1831
+ returnByValue: true,
1832
+ });
1833
+ if (evalResult.result?.value == null)
1834
+ throw new Error("Element not found via CDP");
1835
+ return { ok: true, method, durationMs: Date.now() - start, fallbackFrom: null, retries: attempt, error: null, target: String(evalResult.result.value) };
1836
+ }
1837
+ // Full page text
1838
+ const evalResult = await Runtime.evaluate({
1839
+ expression: "document.body?.innerText?.slice(0, 4000) ?? ''",
1840
+ returnByValue: true,
1841
+ });
1842
+ return { ok: true, method, durationMs: Date.now() - start, fallbackFrom: null, retries: attempt, error: null, target: String(evalResult.result?.value ?? "") };
1843
+ }
1844
+ finally {
1845
+ await client.close();
1846
+ }
1847
+ }
1848
+ case "ocr": {
1849
+ const shot = await bridge.call("cg.captureScreen", {});
1850
+ if (target) {
1851
+ const matches = await bridge.call("vision.findText", {
1852
+ imagePath: shot.path,
1853
+ searchText: target,
1854
+ });
1855
+ const match = Array.isArray(matches) ? matches[0] : null;
1856
+ if (!match)
1857
+ throw new Error("Text not found via OCR");
1858
+ return { ok: true, method, durationMs: Date.now() - start, fallbackFrom: null, retries: attempt, error: null, target: match.text };
1859
+ }
1860
+ const ocr = await bridge.call("vision.ocr", { imagePath: shot.path });
1861
+ return { ok: true, method, durationMs: Date.now() - start, fallbackFrom: null, retries: attempt, error: null, target: ocr.text?.slice(0, 4000) ?? "" };
1862
+ }
1863
+ }
1864
+ throw new Error(`Method ${method} does not support read`);
1865
+ }
1866
+ catch (err) {
1867
+ return { ok: false, method, durationMs: Date.now() - start, fallbackFrom: null, retries: attempt, error: err instanceof Error ? err.message : String(err), target: null };
1868
+ }
1869
+ });
1870
+ if (result.ok) {
1871
+ const fallbackNote = result.fallbackFrom ? ` (fell back from ${result.fallbackFrom})` : "";
1872
+ return { content: [{ type: "text", text: `Read via ${result.method}${fallbackNote} in ${result.durationMs}ms:\n\n${result.target}` }] };
1873
+ }
1874
+ return { content: [{ type: "text", text: `Failed to read${target ? ` "${target}"` : ""} — all methods exhausted. Last error: ${result.error}` }] };
1875
+ });
1876
+ // ── locate_with_fallback ──
1877
+ originalTool("locate_with_fallback", "Find an element's position on screen using the canonical fallback chain: AX → CDP → OCR. Returns bounds (x, y, width, height).", {
1878
+ target: z.string().describe("Text, title, or identifier of the element to locate"),
1879
+ bundleId: z.string().optional().describe("App bundle ID"),
1880
+ }, async ({ target, bundleId }) => {
1881
+ await ensureBridge();
1882
+ const plan = planExecution("locate", infra());
1883
+ const targetPid = await resolvePid(bundleId);
1884
+ const result = await executeWithFallback("locate", plan, DEFAULT_RETRY_POLICY, async (method, attempt) => {
1885
+ const start = Date.now();
1886
+ try {
1887
+ switch (method) {
1888
+ case "ax": {
1889
+ const found = await bridge.call("ax.findElement", {
1890
+ pid: targetPid,
1891
+ title: target,
1892
+ exact: false,
1893
+ });
1894
+ if (!found.bounds)
1895
+ throw new Error("Element found but has no bounds");
1896
+ const b = found.bounds;
1897
+ return { ok: true, method, durationMs: Date.now() - start, fallbackFrom: null, retries: attempt, error: null, target: `${target} at (${b.x},${b.y} ${b.width}x${b.height})` };
1898
+ }
1899
+ case "cdp": {
1900
+ if (!cdpPort)
1901
+ throw new Error("CDP not available");
1902
+ const { CDP: CDPClient, port } = await ensureCDP();
1903
+ const client = await CDPClient({ port });
1904
+ try {
1905
+ const { Runtime } = client;
1906
+ const evalResult = await Runtime.evaluate({
1907
+ expression: `(() => {
1908
+ const el = Array.from(document.querySelectorAll('*')).find(e =>
1909
+ e.textContent?.trim() === ${JSON.stringify(target)} ||
1910
+ e.getAttribute('aria-label') === ${JSON.stringify(target)}
1911
+ );
1912
+ if (!el) return null;
1913
+ const r = el.getBoundingClientRect();
1914
+ return { x: Math.round(r.x), y: Math.round(r.y), width: Math.round(r.width), height: Math.round(r.height) };
1915
+ })()`,
1916
+ returnByValue: true,
1917
+ });
1918
+ const bounds = evalResult.result?.value;
1919
+ if (!bounds)
1920
+ throw new Error("Element not found via CDP");
1921
+ return { ok: true, method, durationMs: Date.now() - start, fallbackFrom: null, retries: attempt, error: null, target: `${target} at (${bounds.x},${bounds.y} ${bounds.width}x${bounds.height})` };
1922
+ }
1923
+ finally {
1924
+ await client.close();
1925
+ }
1926
+ }
1927
+ case "ocr": {
1928
+ const shot = await bridge.call("cg.captureScreen", {});
1929
+ const matches = await bridge.call("vision.findText", {
1930
+ imagePath: shot.path,
1931
+ searchText: target,
1932
+ });
1933
+ const match = Array.isArray(matches) ? matches[0] : null;
1934
+ if (!match?.bounds)
1935
+ throw new Error("Target not found via OCR");
1936
+ const b = match.bounds;
1937
+ return { ok: true, method, durationMs: Date.now() - start, fallbackFrom: null, retries: attempt, error: null, target: `${target} at (${b.x},${b.y} ${b.width}x${b.height})` };
1938
+ }
1939
+ }
1940
+ throw new Error(`Method ${method} does not support locate`);
1941
+ }
1942
+ catch (err) {
1943
+ return { ok: false, method, durationMs: Date.now() - start, fallbackFrom: null, retries: attempt, error: err instanceof Error ? err.message : String(err), target: null };
1944
+ }
1945
+ });
1946
+ return formatResult("Located", target, result);
1947
+ });
1948
+ // ── select_with_fallback ──
1949
+ originalTool("select_with_fallback", "Select an option from a dropdown/menu using the canonical fallback chain: AX → CDP. Finds the control, opens it, and picks the specified option.", {
1950
+ target: z.string().describe("Label or title of the dropdown/menu control"),
1951
+ option: z.string().describe("Text of the option to select"),
1952
+ bundleId: z.string().optional().describe("App bundle ID"),
1953
+ }, async ({ target, option, bundleId }) => {
1954
+ await ensureBridge();
1955
+ const plan = planExecution("select", infra());
1956
+ const targetPid = await resolvePid(bundleId);
1957
+ const result = await executeWithFallback("select", plan, DEFAULT_RETRY_POLICY, async (method, attempt) => {
1958
+ const start = Date.now();
1959
+ try {
1960
+ switch (method) {
1961
+ case "ax": {
1962
+ // Find the popup button / combo box by title
1963
+ const found = await bridge.call("ax.findElement", {
1964
+ pid: targetPid,
1965
+ title: target,
1966
+ exact: false,
1967
+ });
1968
+ // Press to open the menu
1969
+ await bridge.call("ax.performAction", { pid: targetPid, elementPath: found.elementPath, action: "AXPress" });
1970
+ await new Promise((r) => setTimeout(r, 300));
1971
+ // Now find the menu item by title
1972
+ const menuItem = await bridge.call("ax.findElement", {
1973
+ pid: targetPid,
1974
+ title: option,
1975
+ exact: false,
1976
+ });
1977
+ await bridge.call("ax.performAction", { pid: targetPid, elementPath: menuItem.elementPath, action: "AXPress" });
1978
+ return { ok: true, method, durationMs: Date.now() - start, fallbackFrom: null, retries: attempt, error: null, target: `${target} → ${option}` };
1979
+ }
1980
+ case "cdp": {
1981
+ if (!cdpPort)
1982
+ throw new Error("CDP not available");
1983
+ const { CDP: CDPClient, port } = await ensureCDP();
1984
+ const client = await CDPClient({ port });
1985
+ try {
1986
+ const { Runtime } = client;
1987
+ const evalResult = await Runtime.evaluate({
1988
+ expression: `(() => {
1989
+ const sel = Array.from(document.querySelectorAll('select')).find(s =>
1990
+ s.getAttribute('aria-label') === ${JSON.stringify(target)} ||
1991
+ s.getAttribute('name') === ${JSON.stringify(target)} ||
1992
+ (s.labels && Array.from(s.labels).some(l => l.textContent?.trim() === ${JSON.stringify(target)}))
1993
+ );
1994
+ if (!sel) return null;
1995
+ const opt = Array.from(sel.options).find(o => o.text.trim() === ${JSON.stringify(option)} || o.value === ${JSON.stringify(option)});
1996
+ if (!opt) return 'no_option';
1997
+ sel.value = opt.value;
1998
+ sel.dispatchEvent(new Event('change', { bubbles: true }));
1999
+ return 'selected';
2000
+ })()`,
2001
+ returnByValue: true,
2002
+ });
2003
+ if (evalResult.result?.value === "selected") {
2004
+ return { ok: true, method, durationMs: Date.now() - start, fallbackFrom: null, retries: attempt, error: null, target: `${target} → ${option}` };
2005
+ }
2006
+ if (evalResult.result?.value === "no_option")
2007
+ throw new Error(`Option "${option}" not found in select`);
2008
+ throw new Error("Select element not found via CDP");
2009
+ }
2010
+ finally {
2011
+ await client.close();
2012
+ }
2013
+ }
2014
+ }
2015
+ throw new Error(`Method ${method} does not support select`);
2016
+ }
2017
+ catch (err) {
2018
+ return { ok: false, method, durationMs: Date.now() - start, fallbackFrom: null, retries: attempt, error: err instanceof Error ? err.message : String(err), target: null };
2019
+ }
2020
+ });
2021
+ return formatResult("Selected", `${target} → ${option}`, result);
2022
+ });
2023
+ // ── scroll_with_fallback ──
2024
+ originalTool("scroll_with_fallback", "Scroll within an element or the active window using the canonical fallback chain: AX → CDP → coordinates. Scrolls until target text is visible, or by a fixed amount.", {
2025
+ direction: z.enum(["up", "down", "left", "right"]).describe("Scroll direction"),
2026
+ amount: z.number().optional().describe("Scroll amount in pixels (default: 300)"),
2027
+ target: z.string().optional().describe("Scroll until this text is visible (overrides amount)"),
2028
+ bundleId: z.string().optional().describe("App bundle ID"),
2029
+ }, async ({ direction, amount, target, bundleId }) => {
2030
+ await ensureBridge();
2031
+ const plan = planExecution("scroll", infra());
2032
+ const targetPid = await resolvePid(bundleId);
2033
+ const scrollAmount = amount ?? 300;
2034
+ // If target is specified, scroll in a loop until text is visible (max 10 scrolls)
2035
+ if (target) {
2036
+ for (let i = 0; i < 10; i++) {
2037
+ // Check if target is already visible
2038
+ try {
2039
+ const shot = await bridge.call("cg.captureScreen", {});
2040
+ const matches = await bridge.call("vision.findText", {
2041
+ imagePath: shot.path,
2042
+ searchText: target,
2043
+ });
2044
+ if (Array.isArray(matches) && matches.length > 0) {
2045
+ return { content: [{ type: "text", text: `"${target}" is visible after ${i} scroll(s).` }] };
2046
+ }
2047
+ }
2048
+ catch { /* OCR failed, keep scrolling */ }
2049
+ // Scroll once
2050
+ const deltaX = direction === "left" ? -scrollAmount : direction === "right" ? scrollAmount : 0;
2051
+ const deltaY = direction === "up" ? -scrollAmount : direction === "down" ? scrollAmount : 0;
2052
+ await bridge.call("cg.scroll", { deltaX, deltaY });
2053
+ await new Promise((r) => setTimeout(r, 400));
2054
+ }
2055
+ return { content: [{ type: "text", text: `Scrolled ${direction} 10 times but "${target}" not found.` }] };
2056
+ }
2057
+ // Fixed-amount scroll via fallback chain
2058
+ const result = await executeWithFallback("scroll", plan, DEFAULT_RETRY_POLICY, async (method, attempt) => {
2059
+ const start = Date.now();
2060
+ try {
2061
+ const deltaX = direction === "left" ? -scrollAmount : direction === "right" ? scrollAmount : 0;
2062
+ const deltaY = direction === "up" ? -scrollAmount : direction === "down" ? scrollAmount : 0;
2063
+ switch (method) {
2064
+ case "ax": {
2065
+ // Use AX scroll action on the focused element
2066
+ const tree = await bridge.call("ax.getElementTree", {
2067
+ pid: targetPid,
2068
+ maxDepth: 1,
2069
+ });
2070
+ // Fall through to cg.scroll since AX scroll is less reliable
2071
+ await bridge.call("cg.scroll", { deltaX, deltaY });
2072
+ return { ok: true, method, durationMs: Date.now() - start, fallbackFrom: null, retries: attempt, error: null, target: `${direction} ${scrollAmount}px` };
2073
+ }
2074
+ case "cdp": {
2075
+ if (!cdpPort)
2076
+ throw new Error("CDP not available");
2077
+ const { CDP: CDPClient, port } = await ensureCDP();
2078
+ const client = await CDPClient({ port });
2079
+ try {
2080
+ const { Runtime } = client;
2081
+ await Runtime.evaluate({
2082
+ expression: `window.scrollBy(${deltaX}, ${deltaY})`,
2083
+ });
2084
+ return { ok: true, method, durationMs: Date.now() - start, fallbackFrom: null, retries: attempt, error: null, target: `${direction} ${scrollAmount}px` };
2085
+ }
2086
+ finally {
2087
+ await client.close();
2088
+ }
2089
+ }
2090
+ case "coordinates": {
2091
+ await bridge.call("cg.scroll", { deltaX, deltaY });
2092
+ return { ok: true, method, durationMs: Date.now() - start, fallbackFrom: null, retries: attempt, error: null, target: `${direction} ${scrollAmount}px` };
2093
+ }
2094
+ }
2095
+ throw new Error(`Method ${method} does not support scroll`);
2096
+ }
2097
+ catch (err) {
2098
+ return { ok: false, method, durationMs: Date.now() - start, fallbackFrom: null, retries: attempt, error: err instanceof Error ? err.message : String(err), target: null };
2099
+ }
2100
+ });
2101
+ return formatResult("Scrolled", `${direction} ${scrollAmount}px`, result);
2102
+ });
2103
+ // ── wait_for_state ──
2104
+ originalTool("wait_for_state", "Wait until a condition is met on screen: text appears, text disappears, or element becomes available. Polls at intervals using the fallback chain.", {
2105
+ condition: z.enum(["text_appears", "text_disappears", "element_exists"]).describe("What to wait for"),
2106
+ target: z.string().describe("Text or element to watch for"),
2107
+ timeoutMs: z.number().optional().describe("Maximum wait time in ms (default: 10000)"),
2108
+ pollMs: z.number().optional().describe("Poll interval in ms (default: 1000)"),
2109
+ bundleId: z.string().optional().describe("App bundle ID"),
2110
+ }, async ({ condition, target, timeoutMs, pollMs, bundleId }) => {
2111
+ await ensureBridge();
2112
+ const timeout = timeoutMs ?? 10000;
2113
+ const poll = pollMs ?? 1000;
2114
+ const deadline = Date.now() + timeout;
2115
+ const targetPid = await resolvePid(bundleId);
2116
+ let lastCheck = "";
2117
+ while (Date.now() < deadline) {
2118
+ let found = false;
2119
+ // Try AX first (fastest), then OCR as fallback
2120
+ try {
2121
+ if (condition === "element_exists") {
2122
+ await bridge.call("ax.findElement", { pid: targetPid, title: target, exact: false });
2123
+ found = true;
2124
+ }
2125
+ else {
2126
+ // Text-based: try OCR
2127
+ const shot = await bridge.call("cg.captureScreen", {});
2128
+ const matches = await bridge.call("vision.findText", {
2129
+ imagePath: shot.path,
2130
+ searchText: target,
2131
+ });
2132
+ found = Array.isArray(matches) && matches.length > 0;
2133
+ }
2134
+ }
2135
+ catch {
2136
+ found = false;
2137
+ }
2138
+ // Also try CDP if available and text-based
2139
+ if (!found && cdpPort && condition !== "element_exists") {
2140
+ try {
2141
+ const { CDP: CDPClient, port } = await ensureCDP();
2142
+ const client = await CDPClient({ port });
2143
+ try {
2144
+ const { Runtime } = client;
2145
+ const evalResult = await Runtime.evaluate({
2146
+ expression: `document.body?.innerText?.includes(${JSON.stringify(target)}) ?? false`,
2147
+ returnByValue: true,
2148
+ });
2149
+ found = !!evalResult.result?.value;
2150
+ }
2151
+ finally {
2152
+ await client.close();
2153
+ }
2154
+ }
2155
+ catch { /* CDP unavailable */ }
2156
+ }
2157
+ const elapsed = Date.now() - (deadline - timeout);
2158
+ lastCheck = `${elapsed}ms`;
2159
+ if (condition === "text_appears" && found) {
2160
+ return { content: [{ type: "text", text: `"${target}" appeared after ${lastCheck}.` }] };
2161
+ }
2162
+ if (condition === "text_disappears" && !found) {
2163
+ return { content: [{ type: "text", text: `"${target}" disappeared after ${lastCheck}.` }] };
2164
+ }
2165
+ if (condition === "element_exists" && found) {
2166
+ return { content: [{ type: "text", text: `Element "${target}" found after ${lastCheck}.` }] };
2167
+ }
2168
+ await new Promise((r) => setTimeout(r, poll));
2169
+ }
2170
+ return { content: [{ type: "text", text: `Timeout: "${target}" — condition "${condition}" not met after ${timeout}ms.` }] };
2171
+ });
2172
+ // ═══════════════════════════════════════════════
2173
+ // JOBS — persistent multi-step automation with resume
2174
+ // ═══════════════════════════════════════════════
2175
+ originalTool("job_create", "Create a new automation job. Jobs persist across restarts and can be resumed from the last successful step.", {
2176
+ task: z.string().describe("Human-readable description of what this job should do"),
2177
+ playbookId: z.string().optional().describe("Playbook ID to drive this job (optional — AI-only if omitted)"),
2178
+ bundleId: z.string().optional().describe("Target application bundle ID (e.g., 'com.apple.Safari'). Omit for app-agnostic jobs."),
2179
+ windowId: z.number().optional().describe("Target window ID within the application. Omit for app-agnostic jobs."),
2180
+ steps: z.array(z.object({
2181
+ action: z.string().describe("Action name (e.g., navigate, click, type_text, screenshot, key)"),
2182
+ target: z.string().optional().describe("Target element or URL"),
2183
+ description: z.string().optional().describe("Human-readable description"),
2184
+ text: z.string().optional().describe("Text payload for type_text/type_into actions"),
2185
+ keys: z.string().optional().describe("Key combo string for key/key_combo actions (e.g., 'cmd+a')"),
2186
+ value: z.string().optional().describe("Value payload for set_value actions"),
2187
+ })).optional().describe("Ordered steps for this job (can be populated from a playbook)"),
2188
+ tags: z.array(z.string()).optional().describe("Tags for filtering/grouping"),
2189
+ priority: z.number().optional().describe("Priority (lower = higher priority, default: 10)"),
2190
+ maxRetries: z.number().optional().describe("Max retry attempts on failure (default: 3)"),
2191
+ sessionId: z.string().optional().describe("Bind to an existing supervisor session"),
2192
+ }, async ({ task, playbookId, bundleId, windowId, steps, tags, priority, maxRetries, sessionId }) => {
2193
+ const createOpts = { task };
2194
+ if (playbookId !== undefined)
2195
+ createOpts.playbookId = playbookId;
2196
+ if (bundleId !== undefined)
2197
+ createOpts.bundleId = bundleId;
2198
+ if (windowId !== undefined)
2199
+ createOpts.windowId = windowId;
2200
+ if (steps !== undefined)
2201
+ createOpts.steps = steps;
2202
+ if (tags !== undefined)
2203
+ createOpts.tags = tags;
2204
+ if (priority !== undefined)
2205
+ createOpts.priority = priority;
2206
+ if (maxRetries !== undefined)
2207
+ createOpts.maxRetries = maxRetries;
2208
+ if (sessionId !== undefined)
2209
+ createOpts.sessionId = sessionId;
2210
+ const job = jobManager.create(createOpts);
2211
+ return { content: [{ type: "text", text: `Job created: ${job.id}\nTask: ${job.task}\nState: ${job.state}\nSteps: ${job.steps.length}\nPriority: ${job.priority}\nTarget: ${job.bundleId ?? "(any app)"}${job.windowId != null ? ` window ${job.windowId}` : ""}` }] };
2212
+ });
2213
+ originalTool("job_status", "Get detailed status of a job including step progress and resume point.", {
2214
+ jobId: z.string().describe("Job ID"),
2215
+ }, async ({ jobId }) => {
2216
+ const job = jobManager.get(jobId);
2217
+ if (!job)
2218
+ return { content: [{ type: "text", text: `Job ${jobId} not found.` }] };
2219
+ const completed = job.steps.filter((s) => s.status === "done").length;
2220
+ const failed = job.steps.filter((s) => s.status === "failed").length;
2221
+ const pending = job.steps.filter((s) => s.status === "pending").length;
2222
+ const resume = jobManager.getResumePoint(jobId);
2223
+ const lines = [
2224
+ `Job: ${job.id}`,
2225
+ `Task: ${job.task}`,
2226
+ `State: ${job.state}`,
2227
+ `Playbook: ${job.playbookId ?? "(none)"}`,
2228
+ `Target: ${job.bundleId ?? "(any app)"}${job.windowId != null ? ` window ${job.windowId}` : ""}`,
2229
+ `Session: ${job.sessionId ?? "(unbound)"}`,
2230
+ `Steps: ${completed} done, ${failed} failed, ${pending} pending (${job.steps.length} total)`,
2231
+ `Last completed step: ${job.lastStep}`,
2232
+ `Resume point: ${resume ? `step ${resume.stepIndex} — ${resume.step.description ?? resume.step.action}` : "(none — all done or no pending steps)"}`,
2233
+ `Retries: ${job.retries}/${job.maxRetries}`,
2234
+ ];
2235
+ if (job.blockReason)
2236
+ lines.push(`Block reason: ${job.blockReason}`);
2237
+ if (job.lastError)
2238
+ lines.push(`Last error: ${job.lastError}`);
2239
+ if (job.startedAt)
2240
+ lines.push(`Started: ${job.startedAt}`);
2241
+ if (job.completedAt)
2242
+ lines.push(`Completed: ${job.completedAt}`);
2243
+ if (job.steps.length > 0) {
2244
+ lines.push("", "Steps:");
2245
+ for (const s of job.steps) {
2246
+ const icon = s.status === "done" ? "✓" : s.status === "failed" ? "✗" : s.status === "skipped" ? "–" : "○";
2247
+ lines.push(` ${icon} [${s.index}] ${s.description ?? s.action}${s.error ? ` (${s.error})` : ""}${s.durationMs != null ? ` ${s.durationMs}ms` : ""}`);
2248
+ }
2249
+ }
2250
+ return { content: [{ type: "text", text: lines.join("\n") }] };
2251
+ });
2252
+ originalTool("job_list", "List all jobs, optionally filtered by state. Shows summary counts and job details.", {
2253
+ state: z.enum(["queued", "running", "blocked", "waiting_human", "done", "failed"]).optional().describe("Filter by state"),
2254
+ }, async ({ state }) => {
2255
+ const jobs = jobManager.list(state);
2256
+ const sum = jobManager.summary();
2257
+ const lines = [
2258
+ `Jobs: ${sum.total} total — queued:${sum.byState.queued} running:${sum.byState.running} blocked:${sum.byState.blocked} waiting_human:${sum.byState.waiting_human} done:${sum.byState.done} failed:${sum.byState.failed}`,
2259
+ ];
2260
+ if (sum.oldestQueued)
2261
+ lines.push(`Oldest queued: ${sum.oldestQueued}`);
2262
+ if (sum.runningJobIds.length > 0)
2263
+ lines.push(`Running: ${sum.runningJobIds.join(", ")}`);
2264
+ if (jobs.length > 0) {
2265
+ lines.push("");
2266
+ for (const j of jobs.slice(0, 50)) {
2267
+ const completed = j.steps.filter((s) => s.status === "done").length;
2268
+ lines.push(`[${j.state}] ${j.id} — ${j.task.slice(0, 60)} (${completed}/${j.steps.length} steps, pri=${j.priority})`);
2269
+ }
2270
+ if (jobs.length > 50)
2271
+ lines.push(` ... and ${jobs.length - 50} more`);
2272
+ }
2273
+ return { content: [{ type: "text", text: lines.join("\n") }] };
2274
+ });
2275
+ originalTool("job_transition", "Move a job to a new state. Validates the transition is allowed by the state machine.", {
2276
+ jobId: z.string().describe("Job ID"),
2277
+ to: z.enum(["queued", "running", "blocked", "waiting_human", "done", "failed"]).describe("Target state"),
2278
+ reason: z.string().optional().describe("Block/failure reason"),
2279
+ sessionId: z.string().optional().describe("Session ID (when transitioning to running)"),
2280
+ }, async ({ jobId, to, reason, sessionId }) => {
2281
+ const transOpts = {};
2282
+ if ((to === "blocked" || to === "waiting_human") && reason)
2283
+ transOpts.blockReason = reason;
2284
+ if (to === "failed" && reason)
2285
+ transOpts.error = reason;
2286
+ if (sessionId !== undefined)
2287
+ transOpts.sessionId = sessionId;
2288
+ const result = jobManager.transition(jobId, to, transOpts);
2289
+ if ("error" in result)
2290
+ return { content: [{ type: "text", text: `Error: ${result.error}` }] };
2291
+ return { content: [{ type: "text", text: `Job ${jobId} → ${to}${reason ? ` (${reason})` : ""}` }] };
2292
+ });
2293
+ originalTool("job_step_done", "Mark a step as completed and advance the job's resume point.", {
2294
+ jobId: z.string().describe("Job ID"),
2295
+ stepIndex: z.number().describe("Step index to mark done"),
2296
+ durationMs: z.number().optional().describe("How long the step took"),
2297
+ }, async ({ jobId, stepIndex, durationMs }) => {
2298
+ const stepOpts = {};
2299
+ if (durationMs !== undefined)
2300
+ stepOpts.durationMs = durationMs;
2301
+ const result = jobManager.completeStep(jobId, stepIndex, stepOpts);
2302
+ if ("error" in result)
2303
+ return { content: [{ type: "text", text: `Error: ${result.error}` }] };
2304
+ const resume = jobManager.getResumePoint(jobId);
2305
+ return { content: [{ type: "text", text: `Step ${stepIndex} done.${resume ? ` Next: step ${resume.stepIndex} — ${resume.step.description ?? resume.step.action}` : " All steps complete."}` }] };
2306
+ });
2307
+ originalTool("job_step_fail", "Mark a step as failed. The job stays running — caller decides whether to retry, block, or fail the job.", {
2308
+ jobId: z.string().describe("Job ID"),
2309
+ stepIndex: z.number().describe("Step index that failed"),
2310
+ error: z.string().describe("Error message"),
2311
+ }, async ({ jobId, stepIndex, error }) => {
2312
+ const result = jobManager.failStep(jobId, stepIndex, error);
2313
+ if ("error" in result)
2314
+ return { content: [{ type: "text", text: `Error: ${result.error}` }] };
2315
+ return { content: [{ type: "text", text: `Step ${stepIndex} failed: ${error}` }] };
2316
+ });
2317
+ originalTool("job_resume", "Get the resume point for a job — the next pending step after the last successful one.", {
2318
+ jobId: z.string().describe("Job ID"),
2319
+ }, async ({ jobId }) => {
2320
+ const job = jobManager.get(jobId);
2321
+ if (!job)
2322
+ return { content: [{ type: "text", text: `Job ${jobId} not found.` }] };
2323
+ const resume = jobManager.getResumePoint(jobId);
2324
+ if (!resume) {
2325
+ return { content: [{ type: "text", text: `No pending steps. Last completed: ${job.lastStep}. State: ${job.state}.` }] };
2326
+ }
2327
+ return { content: [{ type: "text", text: `Resume at step ${resume.stepIndex}: ${resume.step.description ?? resume.step.action}\nAction: ${resume.step.action}${resume.step.target ? `\nTarget: ${resume.step.target}` : ""}` }] };
2328
+ });
2329
+ originalTool("job_dequeue", "Pop the highest-priority queued job and transition it to running.", {
2330
+ sessionId: z.string().optional().describe("Session ID to bind the job to"),
2331
+ }, async ({ sessionId }) => {
2332
+ const job = jobManager.dequeue(sessionId);
2333
+ if (!job)
2334
+ return { content: [{ type: "text", text: "No queued jobs." }] };
2335
+ const resume = jobManager.getResumePoint(job.id);
2336
+ return { content: [{ type: "text", text: `Dequeued: ${job.id}\nTask: ${job.task}\nSteps: ${job.steps.length}\nResume: ${resume ? `step ${resume.stepIndex}` : "start"}` }] };
2337
+ });
2338
+ originalTool("job_remove", "Remove a job entirely (any state).", {
2339
+ jobId: z.string().describe("Job ID"),
2340
+ }, async ({ jobId }) => {
2341
+ const ok = jobManager.remove(jobId);
2342
+ return { content: [{ type: "text", text: ok ? `Job ${jobId} removed.` : `Job ${jobId} not found.` }] };
2343
+ });
2344
+ // ── Job Runner + Worker ─────────────────────────
2345
+ const PLAYBOOKS_DIR = path.join(os.homedir(), ".screenhand", "playbooks");
2346
+ let activeJobRunner = null;
2347
+ function getJobRunner() {
2348
+ if (!activeJobRunner) {
2349
+ // Build playbook engine stack: adapter → runtime → engine
2350
+ const adapter = new AccessibilityAdapter(bridge);
2351
+ const logger = new TimelineLogger();
2352
+ const runtimeService = new AutomationRuntimeService(adapter, logger);
2353
+ const playbookEngine = new PlaybookEngine(runtimeService);
2354
+ const playbookStore = new PlaybookStore(PLAYBOOKS_DIR);
2355
+ playbookStore.load();
2356
+ activeJobRunner = new JobRunner(bridge, jobManager, leaseManager, supervisor, (() => {
2357
+ const cfg = {
2358
+ hasCDP: cdpPort !== null,
2359
+ playbookEngine,
2360
+ playbookStore,
2361
+ runtimeService,
2362
+ };
2363
+ if (cdpPort) {
2364
+ cfg.cdpConnect = async () => {
2365
+ const { CDP: CDPClient, port } = await ensureCDP();
2366
+ const client = await CDPClient({ port });
2367
+ return { Runtime: client.Runtime, Input: client.Input, close: () => client.close() };
2368
+ };
2369
+ }
2370
+ return cfg;
2371
+ })());
2372
+ }
2373
+ return activeJobRunner;
2374
+ }
2375
+ originalTool("job_run", "Execute the next queued job: dequeue → claim session → run steps through fallback chain → auto-transition. Returns when the job completes, blocks, or fails.", {}, async () => {
2376
+ await ensureBridge();
2377
+ const runner = getJobRunner();
2378
+ const result = await runner.run();
2379
+ if (!result)
2380
+ return { content: [{ type: "text", text: "No queued jobs." }] };
2381
+ const lines = [
2382
+ `Job: ${result.jobId}`,
2383
+ `Final state: ${result.finalState}`,
2384
+ `Steps: ${result.stepsCompleted}/${result.totalSteps}`,
2385
+ `Duration: ${result.durationMs}ms`,
2386
+ ];
2387
+ if (result.error)
2388
+ lines.push(`Error: ${result.error}`);
2389
+ return { content: [{ type: "text", text: lines.join("\n") }] };
2390
+ });
2391
+ originalTool("job_run_all", "Process all queued jobs sequentially until the queue is empty or a job blocks/fails. Each job gets its own session.", {
2392
+ maxJobs: z.number().optional().describe("Max jobs to process (default: unlimited)"),
2393
+ }, async ({ maxJobs }) => {
2394
+ await ensureBridge();
2395
+ const runner = getJobRunner();
2396
+ const results = [];
2397
+ const limit = maxJobs ?? Infinity;
2398
+ for (let i = 0; i < limit; i++) {
2399
+ const result = await runner.run();
2400
+ if (!result)
2401
+ break;
2402
+ results.push(result);
2403
+ }
2404
+ if (results.length === 0)
2405
+ return { content: [{ type: "text", text: "No queued jobs." }] };
2406
+ const lines = [`Processed ${results.length} job(s):`];
2407
+ for (const r of results) {
2408
+ lines.push(` ${r.jobId}: ${r.finalState} (${r.stepsCompleted}/${r.totalSteps} steps, ${r.durationMs}ms)${r.error ? ` — ${r.error}` : ""}`);
2409
+ }
2410
+ const done = results.filter((r) => r.finalState === "done").length;
2411
+ const failed = results.filter((r) => r.finalState === "failed").length;
2412
+ const blocked = results.filter((r) => r.finalState === "blocked" || r.finalState === "waiting_human").length;
2413
+ lines.push(`\nSummary: ${done} done, ${failed} failed, ${blocked} blocked`);
2414
+ return { content: [{ type: "text", text: lines.join("\n") }] };
2415
+ });
2416
+ // ── Job Worker Daemon (separate process, survives restarts) ───
2417
+ const WORKER_DAEMON_PATH = path.resolve(__dirname, "scripts/worker-daemon.ts");
2418
+ originalTool("worker_start", "Start the job worker daemon as a detached background process. Survives MCP/client restarts. Continuously processes the job queue.", {
2419
+ pollMs: z.number().optional().describe("Poll interval when queue is empty (default: 3000ms)"),
2420
+ maxJobs: z.number().optional().describe("Max jobs to process before auto-stopping (0 = unlimited, default: 0)"),
2421
+ }, async ({ pollMs, maxJobs }) => {
2422
+ const existingPid = getWorkerDaemonPid();
2423
+ if (existingPid !== null) {
2424
+ return { content: [{ type: "text", text: `Worker daemon is already running (pid=${existingPid}).` }] };
2425
+ }
2426
+ const daemonArgs = ["tsx", WORKER_DAEMON_PATH];
2427
+ if (pollMs !== undefined)
2428
+ daemonArgs.push("--poll", String(pollMs));
2429
+ if (maxJobs !== undefined)
2430
+ daemonArgs.push("--max-jobs", String(maxJobs));
2431
+ const child = spawn("npx", daemonArgs, {
2432
+ detached: true,
2433
+ stdio: "ignore",
2434
+ env: { ...process.env },
2435
+ });
2436
+ child.unref();
2437
+ // Wait briefly for PID file to appear
2438
+ await new Promise((r) => setTimeout(r, 1500));
2439
+ const pid = getWorkerDaemonPid();
2440
+ return { content: [{ type: "text", text: pid
2441
+ ? `Worker daemon started (pid=${pid}).\nPoll: ${pollMs ?? 3000}ms | Max jobs: ${maxJobs ?? "unlimited"}\nLog: ${WORKER_LOG_FILE}`
2442
+ : `Worker daemon spawn attempted but PID not yet confirmed. Check log: ${WORKER_LOG_FILE}` }] };
2443
+ });
2444
+ originalTool("worker_stop", "Stop the worker daemon. Sends SIGTERM for graceful shutdown — current job finishes before exit.", {}, async () => {
2445
+ const pid = getWorkerDaemonPid();
2446
+ if (pid === null) {
2447
+ return { content: [{ type: "text", text: "Worker daemon is not running." }] };
2448
+ }
2449
+ try {
2450
+ process.kill(pid, "SIGTERM");
2451
+ }
2452
+ catch {
2453
+ return { content: [{ type: "text", text: `Failed to send SIGTERM to pid=${pid}. Process may have already exited.` }] };
2454
+ }
2455
+ // Wait for it to exit
2456
+ await new Promise((r) => setTimeout(r, 2000));
2457
+ const stillAlive = getWorkerDaemonPid();
2458
+ const s = getWorkerLiveStatus();
2459
+ const summary = `Jobs processed: ${s.jobsProcessed} (${s.jobsDone} done, ${s.jobsFailed} failed, ${s.jobsBlocked} blocked)`;
2460
+ return { content: [{ type: "text", text: stillAlive
2461
+ ? `SIGTERM sent to pid=${pid} but process is still running. It may be finishing a job.\n${summary}`
2462
+ : `Worker daemon stopped (was pid=${pid}).\n${summary}` }] };
2463
+ });
2464
+ originalTool("worker_status", "Get the current status of the worker daemon (reads persisted state from disk).", {}, async () => {
2465
+ const s = getWorkerLiveStatus();
2466
+ const lines = [
2467
+ `Running: ${s.running}${s.pid ? ` (pid=${s.pid})` : ""}`,
2468
+ `Started: ${s.startedAt ?? "(not started)"}`,
2469
+ `Uptime: ${Math.round(s.uptimeMs / 1000)}s`,
2470
+ `Poll: ${s.pollMs}ms | Max jobs: ${s.maxJobs || "unlimited"}`,
2471
+ `Jobs processed: ${s.jobsProcessed}`,
2472
+ ` Done: ${s.jobsDone}`,
2473
+ ` Failed: ${s.jobsFailed}`,
2474
+ ` Blocked: ${s.jobsBlocked}`,
2475
+ ];
2476
+ if (s.lastJobId)
2477
+ lines.push(`Last job: ${s.lastJobId} → ${s.lastJobState}`);
2478
+ if (s.recentResults.length > 0) {
2479
+ lines.push("", `Recent (last ${Math.min(s.recentResults.length, 10)}):`);
2480
+ for (const r of s.recentResults.slice(-10)) {
2481
+ lines.push(` ${r.jobId}: ${r.finalState} (${r.stepsCompleted}/${r.totalSteps}, ${r.durationMs}ms)`);
2482
+ }
2483
+ }
2484
+ lines.push("", `Log: ${WORKER_LOG_FILE}`);
2485
+ return { content: [{ type: "text", text: lines.join("\n") }] };
2486
+ });
2487
+ // ═══════════════════════════════════════════════
2488
+ // CODEX MONITOR — watch VS Code terminals, auto-assign tasks
2489
+ // ═══════════════════════════════════════════════
2490
+ // Daemon state directory
2491
+ const MONITOR_DIR = path.join(os.homedir(), ".screenhand", "monitor");
2492
+ const MONITOR_STATE = path.join(MONITOR_DIR, "state.json");
2493
+ const MONITOR_TASKS = path.join(MONITOR_DIR, "tasks.json");
2494
+ const MONITOR_PID = path.join(MONITOR_DIR, "daemon.pid");
2495
+ const MONITOR_LOG = path.join(MONITOR_DIR, "daemon.log");
2496
+ const DAEMON_SCRIPT = path.resolve(__dirname, "scripts", "codex-monitor-daemon.ts");
2497
+ function isDaemonRunning() {
2498
+ try {
2499
+ if (!fs.existsSync(MONITOR_PID))
2500
+ return { running: false, pid: null };
2501
+ const pid = Number(fs.readFileSync(MONITOR_PID, "utf-8").trim());
2502
+ // Check if process is alive
2503
+ process.kill(pid, 0);
2504
+ return { running: true, pid };
2505
+ }
2506
+ catch {
2507
+ return { running: false, pid: null };
2508
+ }
2509
+ }
2510
+ function readDaemonState() {
2511
+ try {
2512
+ if (!fs.existsSync(MONITOR_STATE))
2513
+ return null;
2514
+ return JSON.parse(fs.readFileSync(MONITOR_STATE, "utf-8"));
2515
+ }
2516
+ catch {
2517
+ return null;
2518
+ }
2519
+ }
2520
+ function readDaemonTasks() {
2521
+ try {
2522
+ if (!fs.existsSync(MONITOR_TASKS))
2523
+ return [];
2524
+ return JSON.parse(fs.readFileSync(MONITOR_TASKS, "utf-8"));
2525
+ }
2526
+ catch {
2527
+ return [];
2528
+ }
2529
+ }
2530
+ function writeDaemonTasks(tasks) {
2531
+ fs.mkdirSync(MONITOR_DIR, { recursive: true });
2532
+ fs.writeFileSync(MONITOR_TASKS, JSON.stringify(tasks, null, 2));
2533
+ }
2534
+ server.tool("codex_monitor_start", "Start a background daemon that monitors VS Code terminals for Codex/AI agent activity. Runs independently — survives Claude Code restarts. Watches terminal output via OCR, detects running/idle/done.", {
2535
+ vscodePid: z.number().describe("Process ID of VS Code (get from 'apps' tool)"),
2536
+ windowId: z.number().optional().describe("Window ID of the VS Code window (get from 'windows' tool). Auto-detected if omitted."),
2537
+ label: z.string().optional().describe("Label for this terminal (default: 'Terminal')"),
2538
+ pollIntervalMs: z.number().optional().describe("How often to poll in ms (default: 3000)"),
2539
+ autoAssign: z.boolean().optional().describe("Auto-assign queued tasks when terminal goes idle (default: true)"),
2540
+ }, async ({ vscodePid, windowId, label, pollIntervalMs, autoAssign }) => {
2541
+ const { running, pid } = isDaemonRunning();
2542
+ if (running) {
2543
+ return { content: [{ type: "text", text: `Daemon already running (pid=${pid}). Use codex_monitor_stop first to restart.` }] };
2544
+ }
2545
+ // Build daemon args
2546
+ const daemonArgs = ["tsx", DAEMON_SCRIPT, "--pid", String(vscodePid)];
2547
+ if (windowId)
2548
+ daemonArgs.push("--window", String(windowId));
2549
+ if (pollIntervalMs)
2550
+ daemonArgs.push("--poll", String(pollIntervalMs));
2551
+ if (label)
2552
+ daemonArgs.push("--label", label);
2553
+ if (autoAssign === false)
2554
+ daemonArgs.push("--no-auto-assign");
2555
+ // Spawn detached daemon
2556
+ const child = spawn("npx", daemonArgs, {
2557
+ detached: true,
2558
+ stdio: "ignore",
2559
+ cwd: __dirname,
2560
+ });
2561
+ child.unref();
2562
+ const daemonPid = child.pid;
2563
+ // Wait a moment for daemon to start and write state
2564
+ await new Promise((r) => setTimeout(r, 3000));
2565
+ const state = readDaemonState();
2566
+ const terminalId = state?.terminals?.[0]?.id ?? "pending";
2567
+ return {
2568
+ content: [{
2569
+ type: "text",
2570
+ text: `Background daemon started!\n` +
2571
+ `Daemon PID: ${daemonPid}\n` +
2572
+ `Terminal ID: ${terminalId}\n` +
2573
+ `VS Code PID: ${vscodePid}\n` +
2574
+ `Window ID: ${windowId ?? "auto-detecting"}\n` +
2575
+ `Poll interval: ${pollIntervalMs ?? 3000}ms\n` +
2576
+ `Auto-assign: ${autoAssign !== false}\n` +
2577
+ `Log: ${MONITOR_LOG}\n` +
2578
+ `State: ${MONITOR_STATE}\n\n` +
2579
+ `The daemon runs independently — survives Claude Code restarts.\n` +
2580
+ `Use codex_monitor_status to check on it anytime.`,
2581
+ }],
2582
+ };
2583
+ });
2584
+ server.tool("codex_monitor_status", "Get status of the background monitor daemon. Shows terminal status, agent activity, task queue, and daemon health.", {
2585
+ tail_log: z.number().optional().describe("Show last N lines of daemon log (default: 0, max: 50)"),
2586
+ }, async ({ tail_log }) => {
2587
+ const { running, pid } = isDaemonRunning();
2588
+ const state = readDaemonState();
2589
+ const tasks = readDaemonTasks();
2590
+ const lines = [];
2591
+ lines.push(`Daemon: ${running ? "RUNNING" : "STOPPED"} (pid=${pid ?? "none"})`);
2592
+ if (state?.terminals) {
2593
+ for (const t of state.terminals) {
2594
+ const lastOutput = (t.lastOutput || "").split("\n").slice(-5).join("\n").trim();
2595
+ lines.push("");
2596
+ lines.push(`--- ${t.id} ---`);
2597
+ lines.push(` Status: ${(t.status || "unknown").toUpperCase()}`);
2598
+ lines.push(` VS Code PID: ${t.vscodePid}`);
2599
+ lines.push(` Window ID: ${t.windowId ?? "unknown"}`);
2600
+ lines.push(` Current task: ${t.lastTask ?? "none"}`);
2601
+ lines.push(` Tasks completed: ${t.tasksCompleted}`);
2602
+ lines.push(` Last poll: ${t.lastPollAt}`);
2603
+ lines.push(` Last output (tail):`);
2604
+ lines.push(` ${lastOutput.split("\n").join("\n ")}`);
2605
+ }
2606
+ }
2607
+ else if (!running) {
2608
+ lines.push("\nNo monitor running. Use codex_monitor_start first.");
2609
+ }
2610
+ const queued = tasks.filter((t) => t.status === "queued").length;
2611
+ const runningTasks = tasks.filter((t) => t.status === "running").length;
2612
+ const completed = tasks.filter((t) => t.status === "completed").length;
2613
+ lines.push("");
2614
+ lines.push(`Tasks: ${queued} queued, ${runningTasks} running, ${completed} completed`);
2615
+ // Optionally show daemon log tail
2616
+ if (tail_log && tail_log > 0) {
2617
+ try {
2618
+ const logContent = fs.readFileSync(MONITOR_LOG, "utf-8");
2619
+ const logLines = logContent.trim().split("\n").slice(-(Math.min(tail_log, 50)));
2620
+ lines.push("");
2621
+ lines.push("--- Daemon Log ---");
2622
+ lines.push(logLines.join("\n"));
2623
+ }
2624
+ catch {
2625
+ lines.push("\n(no log file found)");
2626
+ }
2627
+ }
2628
+ return { content: [{ type: "text", text: lines.join("\n") }] };
2629
+ });
2630
+ server.tool("codex_monitor_add_task", "Add a task to the daemon's queue. When a monitored terminal goes idle, the next task is automatically typed in and executed.", {
2631
+ prompt: z.string().describe("The prompt/command to send to Codex when a terminal is available"),
2632
+ priority: z.number().optional().describe("Priority (lower = higher priority, default: 10)"),
2633
+ terminalId: z.string().optional().describe("Assign to a specific terminal (omit for any available)"),
2634
+ }, async ({ prompt, priority, terminalId }) => {
2635
+ const tasks = readDaemonTasks();
2636
+ const task = {
2637
+ id: "task_" + Date.now().toString(36) + Math.random().toString(36).slice(2, 6),
2638
+ prompt,
2639
+ priority: priority ?? 10,
2640
+ terminalId: terminalId ?? null,
2641
+ status: "queued",
2642
+ createdAt: new Date().toISOString(),
2643
+ assignedAt: null,
2644
+ completedAt: null,
2645
+ result: null,
2646
+ };
2647
+ tasks.push(task);
2648
+ tasks.sort((a, b) => a.priority - b.priority);
2649
+ writeDaemonTasks(tasks);
2650
+ const queued = tasks.filter((t) => t.status === "queued").length;
2651
+ return {
2652
+ content: [{
2653
+ type: "text",
2654
+ text: `Task queued!\n` +
2655
+ `ID: ${task.id}\n` +
2656
+ `Prompt: "${prompt.slice(0, 100)}${prompt.length > 100 ? "..." : ""}"\n` +
2657
+ `Priority: ${task.priority}\n` +
2658
+ `Target terminal: ${task.terminalId ?? "any available"}\n` +
2659
+ `Queue size: ${queued}`,
2660
+ }],
2661
+ };
2662
+ });
2663
+ server.tool("codex_monitor_tasks", "List all tasks in the daemon's queue with their status.", {
2664
+ status: z.enum(["all", "queued", "running", "completed", "failed"]).optional().describe("Filter by status (default: all)"),
2665
+ }, async ({ status }) => {
2666
+ let tasks = readDaemonTasks();
2667
+ if (status && status !== "all") {
2668
+ tasks = tasks.filter((t) => t.status === status);
2669
+ }
2670
+ if (tasks.length === 0) {
2671
+ return { content: [{ type: "text", text: `No ${status ?? ""} tasks.` }] };
2672
+ }
2673
+ const lines = tasks.map((t, i) => {
2674
+ const parts = [
2675
+ `${i + 1}. [${t.status.toUpperCase()}] "${(t.prompt || "").slice(0, 80)}"`,
2676
+ ` ID: ${t.id} | Priority: ${t.priority}`,
2677
+ ` Terminal: ${t.terminalId ?? "any"}`,
2678
+ ` Created: ${t.createdAt}`,
2679
+ ];
2680
+ if (t.assignedAt)
2681
+ parts.push(` Assigned: ${t.assignedAt}`);
2682
+ if (t.completedAt)
2683
+ parts.push(` Completed: ${t.completedAt}`);
2684
+ if (t.result)
2685
+ parts.push(` Result: ${(t.result || "").slice(0, 100)}`);
2686
+ return parts.join("\n");
2687
+ });
2688
+ return { content: [{ type: "text", text: lines.join("\n\n") }] };
2689
+ });
2690
+ server.tool("codex_monitor_assign_now", "Immediately type a prompt into the VS Code terminal (bypasses queue). Focuses VS Code, types, presses Enter.", {
2691
+ prompt: z.string().describe("The prompt/command to type into the terminal"),
2692
+ }, async ({ prompt }) => {
2693
+ await ensureBridge();
2694
+ try {
2695
+ await bridge.call("app.focus", { bundleId: "com.microsoft.VSCode" });
2696
+ await new Promise((r) => setTimeout(r, 300));
2697
+ await bridge.call("cg.typeText", { text: prompt });
2698
+ await new Promise((r) => setTimeout(r, 100));
2699
+ await bridge.call("cg.keyCombo", { keys: ["enter"] });
2700
+ return { content: [{ type: "text", text: `Typed and sent: "${prompt.slice(0, 100)}"` }] };
2701
+ }
2702
+ catch (err) {
2703
+ return { content: [{ type: "text", text: `Failed: ${err.message}` }] };
2704
+ }
2705
+ });
2706
+ server.tool("codex_monitor_stop", "Stop the background monitor daemon.", {}, async () => {
2707
+ const { running, pid } = isDaemonRunning();
2708
+ if (!running) {
2709
+ return { content: [{ type: "text", text: "No daemon running." }] };
2710
+ }
2711
+ try {
2712
+ process.kill(pid, "SIGTERM");
2713
+ // Wait for it to clean up
2714
+ await new Promise((r) => setTimeout(r, 1000));
2715
+ return { content: [{ type: "text", text: `Daemon stopped (pid=${pid}).` }] };
2716
+ }
2717
+ catch (err) {
2718
+ return { content: [{ type: "text", text: `Failed to stop daemon: ${err.message}` }] };
2719
+ }
2720
+ });
2721
+ // ═══════════════════════════════════════════════
2722
+ // START
2723
+ // ═══════════════════════════════════════════════
2724
+ async function main() {
2725
+ const transport = new StdioServerTransport();
2726
+ await server.connect(transport);
2727
+ }
2728
+ main().catch((err) => {
2729
+ process.stderr.write("MCP server error: " + err.message + "\n");
2730
+ process.exit(1);
2731
+ });