screenhand 0.1.1 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +458 -93
- package/dist/.audit-log.jsonl +55 -0
- package/dist/.screenhand/memory/.lock +1 -0
- package/dist/.screenhand/memory/actions.jsonl +85 -0
- package/dist/.screenhand/memory/errors.jsonl +5 -0
- package/dist/.screenhand/memory/errors.jsonl.bak +4 -0
- package/dist/.screenhand/memory/state.json +35 -0
- package/dist/.screenhand/memory/state.json.bak +35 -0
- package/dist/.screenhand/memory/strategies.jsonl +12 -0
- package/dist/agent/cli.js +73 -0
- package/dist/agent/loop.js +258 -0
- package/dist/config.js +9 -0
- package/dist/index.js +56 -0
- package/dist/logging/timeline-logger.js +29 -0
- package/dist/mcp/mcp-stdio-server.js +448 -0
- package/dist/mcp/server.js +347 -0
- package/dist/mcp-desktop.js +2731 -0
- package/dist/mcp-entry.js +59 -0
- package/dist/memory/recall.js +160 -0
- package/dist/memory/research.js +98 -0
- package/dist/memory/seeds.js +89 -0
- package/dist/memory/session.js +161 -0
- package/dist/memory/store.js +391 -0
- package/dist/memory/types.js +4 -0
- package/dist/monitor/codex-monitor.js +377 -0
- package/dist/monitor/task-queue.js +84 -0
- package/dist/monitor/types.js +49 -0
- package/dist/native/bridge-client.js +174 -0
- package/dist/native/macos-bridge-client.js +5 -0
- package/dist/npm-publish-helper.js +117 -0
- package/dist/npm-token-cdp.js +113 -0
- package/dist/npm-token-create.js +135 -0
- package/dist/npm-token-finish.js +126 -0
- package/dist/playbook/engine.js +193 -0
- package/dist/playbook/index.js +4 -0
- package/dist/playbook/recorder.js +519 -0
- package/dist/playbook/runner.js +392 -0
- package/dist/playbook/store.js +166 -0
- package/dist/playbook/types.js +4 -0
- package/dist/runtime/accessibility-adapter.js +377 -0
- package/dist/runtime/app-adapter.js +48 -0
- package/dist/runtime/applescript-adapter.js +283 -0
- package/dist/runtime/ax-role-map.js +80 -0
- package/dist/runtime/browser-adapter.js +36 -0
- package/dist/runtime/cdp-chrome-adapter.js +505 -0
- package/dist/runtime/composite-adapter.js +205 -0
- package/dist/runtime/executor.js +250 -0
- package/dist/runtime/locator-cache.js +12 -0
- package/dist/runtime/planning-loop.js +47 -0
- package/dist/runtime/service.js +372 -0
- package/dist/runtime/session-manager.js +28 -0
- package/dist/runtime/state-observer.js +105 -0
- package/dist/runtime/vision-adapter.js +208 -0
- package/dist/scripts/codex-monitor-daemon.js +335 -0
- package/dist/scripts/supervisor-daemon.js +272 -0
- package/dist/scripts/worker-daemon.js +228 -0
- package/dist/src/agent/cli.js +82 -0
- package/dist/src/agent/loop.js +274 -0
- package/{src/config.ts → dist/src/config.js} +5 -10
- package/{src/index.ts → dist/src/index.js} +32 -52
- package/dist/src/jobs/manager.js +237 -0
- package/dist/src/jobs/runner.js +683 -0
- package/dist/src/jobs/store.js +102 -0
- package/dist/src/jobs/types.js +30 -0
- package/dist/src/jobs/worker.js +97 -0
- package/dist/src/logging/timeline-logger.js +45 -0
- package/dist/src/mcp/mcp-stdio-server.js +464 -0
- package/dist/src/mcp/server.js +363 -0
- package/dist/src/mcp-entry.js +60 -0
- package/dist/src/memory/recall.js +170 -0
- package/dist/src/memory/research.js +104 -0
- package/dist/src/memory/seeds.js +101 -0
- package/dist/src/memory/service.js +421 -0
- package/dist/src/memory/session.js +169 -0
- package/dist/src/memory/store.js +422 -0
- package/dist/src/memory/types.js +17 -0
- package/dist/src/monitor/codex-monitor.js +382 -0
- package/dist/src/monitor/task-queue.js +97 -0
- package/dist/src/monitor/types.js +62 -0
- package/dist/src/native/bridge-client.js +190 -0
- package/{src/native/macos-bridge-client.ts → dist/src/native/macos-bridge-client.js} +0 -1
- package/dist/src/playbook/engine.js +201 -0
- package/dist/src/playbook/index.js +20 -0
- package/dist/src/playbook/recorder.js +535 -0
- package/dist/src/playbook/runner.js +408 -0
- package/dist/src/playbook/store.js +183 -0
- package/dist/src/playbook/types.js +17 -0
- package/dist/src/runtime/accessibility-adapter.js +393 -0
- package/dist/src/runtime/app-adapter.js +64 -0
- package/dist/src/runtime/applescript-adapter.js +299 -0
- package/dist/src/runtime/ax-role-map.js +96 -0
- package/dist/src/runtime/browser-adapter.js +52 -0
- package/dist/src/runtime/cdp-chrome-adapter.js +521 -0
- package/dist/src/runtime/composite-adapter.js +221 -0
- package/dist/src/runtime/execution-contract.js +159 -0
- package/dist/src/runtime/executor.js +266 -0
- package/{src/runtime/locator-cache.ts → dist/src/runtime/locator-cache.js} +10 -15
- package/dist/src/runtime/planning-loop.js +63 -0
- package/dist/src/runtime/service.js +388 -0
- package/dist/src/runtime/session-manager.js +60 -0
- package/dist/src/runtime/state-observer.js +121 -0
- package/dist/src/runtime/vision-adapter.js +224 -0
- package/dist/src/supervisor/locks.js +186 -0
- package/dist/src/supervisor/supervisor.js +403 -0
- package/dist/src/supervisor/types.js +30 -0
- package/dist/src/test-mcp-protocol.js +154 -0
- package/dist/src/types.js +17 -0
- package/dist/src/util/atomic-write.js +118 -0
- package/dist/test-mcp-protocol.js +138 -0
- package/dist/types.js +1 -0
- package/package.json +18 -4
- package/.claude/commands/automate.md +0 -28
- package/.claude/commands/debug-ui.md +0 -19
- package/.claude/commands/screenshot.md +0 -15
- package/.github/FUNDING.yml +0 -1
- package/.github/ISSUE_TEMPLATE/bug_report.md +0 -27
- package/.github/ISSUE_TEMPLATE/feature_request.md +0 -20
- package/.mcp.json +0 -8
- package/DESKTOP_MCP_GUIDE.md +0 -92
- package/SECURITY.md +0 -44
- package/docs/architecture.md +0 -47
- package/install-skills.sh +0 -19
- package/mcp-bridge.ts +0 -271
- package/mcp-desktop.ts +0 -1221
- package/native/macos-bridge/Package.swift +0 -21
- package/native/macos-bridge/Sources/AccessibilityBridge.swift +0 -261
- package/native/macos-bridge/Sources/AppManagement.swift +0 -129
- package/native/macos-bridge/Sources/CoreGraphicsBridge.swift +0 -242
- package/native/macos-bridge/Sources/ObserverBridge.swift +0 -120
- package/native/macos-bridge/Sources/VisionBridge.swift +0 -80
- package/native/macos-bridge/Sources/main.swift +0 -345
- package/native/windows-bridge/AppManagement.cs +0 -234
- package/native/windows-bridge/InputBridge.cs +0 -436
- package/native/windows-bridge/Program.cs +0 -265
- package/native/windows-bridge/ScreenCapture.cs +0 -329
- package/native/windows-bridge/UIAutomationBridge.cs +0 -571
- package/native/windows-bridge/WindowsBridge.csproj +0 -17
- package/playbooks/devpost.json +0 -186
- package/playbooks/instagram.json +0 -41
- package/playbooks/instagram_v2.json +0 -201
- package/playbooks/x_v1.json +0 -211
- package/scripts/devpost-live-loop.mjs +0 -421
- package/src/logging/timeline-logger.ts +0 -55
- package/src/mcp/server.ts +0 -449
- package/src/memory/recall.ts +0 -191
- package/src/memory/research.ts +0 -146
- package/src/memory/seeds.ts +0 -123
- package/src/memory/session.ts +0 -201
- package/src/memory/store.ts +0 -434
- package/src/memory/types.ts +0 -69
- package/src/native/bridge-client.ts +0 -239
- package/src/runtime/accessibility-adapter.ts +0 -487
- package/src/runtime/app-adapter.ts +0 -169
- package/src/runtime/applescript-adapter.ts +0 -376
- package/src/runtime/ax-role-map.ts +0 -102
- package/src/runtime/browser-adapter.ts +0 -129
- package/src/runtime/cdp-chrome-adapter.ts +0 -676
- package/src/runtime/composite-adapter.ts +0 -274
- package/src/runtime/executor.ts +0 -396
- package/src/runtime/planning-loop.ts +0 -81
- package/src/runtime/service.ts +0 -448
- package/src/runtime/session-manager.ts +0 -50
- package/src/runtime/state-observer.ts +0 -136
- package/src/runtime/vision-adapter.ts +0 -297
- package/src/types.ts +0 -297
- package/tests/bridge-client.test.ts +0 -176
- package/tests/browser-stealth.test.ts +0 -210
- package/tests/composite-adapter.test.ts +0 -64
- package/tests/mcp-server.test.ts +0 -151
- package/tests/memory-recall.test.ts +0 -339
- package/tests/memory-research.test.ts +0 -159
- package/tests/memory-seeds.test.ts +0 -120
- package/tests/memory-store.test.ts +0 -392
- package/tests/types.test.ts +0 -92
- package/tsconfig.check.json +0 -17
- package/tsconfig.json +0 -19
- package/vitest.config.ts +0 -8
package/mcp-desktop.ts
DELETED
|
@@ -1,1221 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env npx tsx
|
|
2
|
-
/**
|
|
3
|
-
* ScreenHand — MCP Server for Desktop Automation
|
|
4
|
-
* Controls any macOS/Windows app + Chrome browser via CDP.
|
|
5
|
-
*
|
|
6
|
-
* Setup — add to ~/.claude/settings.json or project .mcp.json:
|
|
7
|
-
* {
|
|
8
|
-
* "mcpServers": {
|
|
9
|
-
* "screenhand": {
|
|
10
|
-
* "command": "npx",
|
|
11
|
-
* "args": ["tsx", "/path/to/screenhand/mcp-desktop.ts"]
|
|
12
|
-
* }
|
|
13
|
-
* }
|
|
14
|
-
* }
|
|
15
|
-
*/
|
|
16
|
-
|
|
17
|
-
import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
|
|
18
|
-
import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
|
|
19
|
-
import { z } from "zod";
|
|
20
|
-
import path from "node:path";
|
|
21
|
-
import { fileURLToPath } from "node:url";
|
|
22
|
-
import { execSync } from "node:child_process";
|
|
23
|
-
import fs from "node:fs";
|
|
24
|
-
import { BridgeClient } from "./src/native/bridge-client.js";
|
|
25
|
-
import { MemoryStore } from "./src/memory/store.js";
|
|
26
|
-
import { SessionTracker } from "./src/memory/session.js";
|
|
27
|
-
import { RecallEngine } from "./src/memory/recall.js";
|
|
28
|
-
import type { ActionEntry, ErrorPattern } from "./src/memory/types.js";
|
|
29
|
-
import { backgroundResearch } from "./src/memory/research.js";
|
|
30
|
-
|
|
31
|
-
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
|
32
|
-
|
|
33
|
-
// ── Audit logging for dangerous tools ──
|
|
34
|
-
const AUDIT_LOG_PATH = path.resolve(__dirname, ".audit-log.jsonl");
|
|
35
|
-
|
|
36
|
-
function auditLog(tool: string, params: Record<string, unknown>) {
|
|
37
|
-
const entry = {
|
|
38
|
-
timestamp: new Date().toISOString(),
|
|
39
|
-
tool,
|
|
40
|
-
params,
|
|
41
|
-
pid: process.pid,
|
|
42
|
-
};
|
|
43
|
-
try {
|
|
44
|
-
fs.appendFileSync(AUDIT_LOG_PATH, JSON.stringify(entry) + "\n");
|
|
45
|
-
} catch {
|
|
46
|
-
// Non-critical — don't crash if log write fails
|
|
47
|
-
}
|
|
48
|
-
}
|
|
49
|
-
const bridgePath = process.platform === "win32"
|
|
50
|
-
? path.resolve(__dirname, "native/windows-bridge/bin/Release/net8.0-windows/windows-bridge.exe")
|
|
51
|
-
: path.resolve(__dirname, "native/macos-bridge/.build/release/macos-bridge");
|
|
52
|
-
const bridge = new BridgeClient(bridgePath);
|
|
53
|
-
let bridgeReady = false;
|
|
54
|
-
|
|
55
|
-
async function ensureBridge() {
|
|
56
|
-
if (!bridgeReady) { await bridge.start(); bridgeReady = true; }
|
|
57
|
-
}
|
|
58
|
-
|
|
59
|
-
// CDP connection cache
|
|
60
|
-
let cdpPort: number | null = null;
|
|
61
|
-
let CDP: any = null;
|
|
62
|
-
|
|
63
|
-
async function ensureCDP(): Promise<{ CDP: any; port: number }> {
|
|
64
|
-
if (!CDP) CDP = (await import("chrome-remote-interface")).default;
|
|
65
|
-
if (cdpPort) {
|
|
66
|
-
try { await CDP.Version({ port: cdpPort }); return { CDP, port: cdpPort }; } catch {}
|
|
67
|
-
}
|
|
68
|
-
// Try common ports
|
|
69
|
-
for (const p of [9222, 9223, 9224]) {
|
|
70
|
-
try { await CDP.Version({ port: p }); cdpPort = p; return { CDP, port: p }; } catch {}
|
|
71
|
-
}
|
|
72
|
-
throw new Error("Chrome not running with --remote-debugging-port. Launch with: /Applications/Google\\ Chrome.app/Contents/MacOS/Google\\ Chrome --remote-debugging-port=9222 --user-data-dir=/tmp/chrome-debug");
|
|
73
|
-
}
|
|
74
|
-
|
|
75
|
-
const server = new McpServer({ name: "screenhand", version: "2.0.0" });
|
|
76
|
-
|
|
77
|
-
// ═══════════════════════════════════════════════
|
|
78
|
-
// LEARNING MEMORY — cached, auto-recall, non-blocking
|
|
79
|
-
// ═══════════════════════════════════════════════
|
|
80
|
-
|
|
81
|
-
const memoryStore = new MemoryStore(__dirname);
|
|
82
|
-
memoryStore.init(); // One-time disk read at startup
|
|
83
|
-
const sessionTracker = new SessionTracker(memoryStore);
|
|
84
|
-
const recallEngine = new RecallEngine(memoryStore);
|
|
85
|
-
|
|
86
|
-
// Skip logging for memory tools themselves
|
|
87
|
-
const MEMORY_TOOLS = new Set(["memory_recall", "memory_save", "memory_errors", "memory_stats", "memory_clear"]);
|
|
88
|
-
|
|
89
|
-
// Track the strategy we're currently following (for feedback loop)
|
|
90
|
-
let activeStrategyFingerprint: string | null = null;
|
|
91
|
-
|
|
92
|
-
// Intercept all tool registrations to auto-log + auto-recall
|
|
93
|
-
const originalTool = server.tool.bind(server);
|
|
94
|
-
type ToolArgs = Parameters<typeof server.tool>;
|
|
95
|
-
|
|
96
|
-
function extractText(result: any): string {
|
|
97
|
-
if (!result?.content) return "";
|
|
98
|
-
return result.content
|
|
99
|
-
.filter((c: any) => c.type === "text")
|
|
100
|
-
.map((c: any) => c.text)
|
|
101
|
-
.join("\n")
|
|
102
|
-
.slice(0, 500);
|
|
103
|
-
}
|
|
104
|
-
|
|
105
|
-
(server as any).tool = (...args: ToolArgs) => {
|
|
106
|
-
const handlerIdx = args.findIndex((a) => typeof a === "function");
|
|
107
|
-
if (handlerIdx === -1) return (originalTool as any)(...args);
|
|
108
|
-
|
|
109
|
-
const originalHandler = args[handlerIdx] as Function;
|
|
110
|
-
const toolName = args[0] as string;
|
|
111
|
-
|
|
112
|
-
const wrappedHandler = async (params: any, extra: any) => {
|
|
113
|
-
// Skip intercepting memory tools to avoid recursion
|
|
114
|
-
if (MEMORY_TOOLS.has(toolName)) {
|
|
115
|
-
return originalHandler(params, extra);
|
|
116
|
-
}
|
|
117
|
-
|
|
118
|
-
const sessionId = sessionTracker.getSessionId();
|
|
119
|
-
const safeParams = typeof params === "object" && params !== null ? params : {};
|
|
120
|
-
const start = Date.now();
|
|
121
|
-
|
|
122
|
-
// ── PRE-CALL: check for known error warnings (~0ms, in-memory) ──
|
|
123
|
-
const knownError = recallEngine.quickErrorCheck(toolName);
|
|
124
|
-
|
|
125
|
-
try {
|
|
126
|
-
const result = await originalHandler(params, extra);
|
|
127
|
-
const durationMs = Date.now() - start;
|
|
128
|
-
|
|
129
|
-
// ── POST-CALL: log action (async, non-blocking) ──
|
|
130
|
-
const entry: ActionEntry = {
|
|
131
|
-
id: "a_" + Date.now().toString(36) + Math.random().toString(36).slice(2, 6),
|
|
132
|
-
timestamp: new Date().toISOString(),
|
|
133
|
-
sessionId,
|
|
134
|
-
tool: toolName,
|
|
135
|
-
params: safeParams,
|
|
136
|
-
durationMs,
|
|
137
|
-
success: true,
|
|
138
|
-
result: extractText(result),
|
|
139
|
-
error: null,
|
|
140
|
-
};
|
|
141
|
-
memoryStore.appendAction(entry); // non-blocking
|
|
142
|
-
sessionTracker.recordAction(entry); // in-memory only
|
|
143
|
-
|
|
144
|
-
// ── POST-CALL: auto-recall hints (~0ms, in-memory) ──
|
|
145
|
-
const hints: string[] = [];
|
|
146
|
-
|
|
147
|
-
// Warn about known errors for this tool
|
|
148
|
-
if (knownError) {
|
|
149
|
-
hints.push(`⚡ Memory: "${toolName}" has failed before: "${knownError.error}" (${knownError.occurrences}x). Fix: ${knownError.resolution}`);
|
|
150
|
-
}
|
|
151
|
-
|
|
152
|
-
// Suggest next step if we're mid-strategy
|
|
153
|
-
const recentTools = sessionTracker.getRecentToolNames();
|
|
154
|
-
const strategyHint = recallEngine.quickStrategyHint(recentTools);
|
|
155
|
-
if (strategyHint) {
|
|
156
|
-
activeStrategyFingerprint = strategyHint.fingerprint;
|
|
157
|
-
const nextParams = Object.keys(strategyHint.nextStep.params).length > 0
|
|
158
|
-
? `(${JSON.stringify(strategyHint.nextStep.params)})`
|
|
159
|
-
: "";
|
|
160
|
-
hints.push(`💡 Memory: This matches strategy "${strategyHint.strategy.task}" (${strategyHint.strategy.successCount} wins, ${strategyHint.strategy.failCount ?? 0} fails). Next step: ${strategyHint.nextStep.tool}${nextParams}`);
|
|
161
|
-
|
|
162
|
-
// If this was the last step of the strategy, record success
|
|
163
|
-
if (recentTools.length === strategyHint.strategy.steps.length - 1) {
|
|
164
|
-
// Next call will be the final step — but this call completing means we're on track
|
|
165
|
-
}
|
|
166
|
-
} else if (activeStrategyFingerprint && recentTools.length > 0) {
|
|
167
|
-
// We were following a strategy but the sequence diverged — record success
|
|
168
|
-
// (the agent completed the strategy or went its own way after it)
|
|
169
|
-
memoryStore.recordStrategyOutcome(activeStrategyFingerprint, true);
|
|
170
|
-
activeStrategyFingerprint = null;
|
|
171
|
-
}
|
|
172
|
-
|
|
173
|
-
// Attach hints as _meta (doesn't pollute tool output for MCP clients)
|
|
174
|
-
if (hints.length > 0) {
|
|
175
|
-
return {
|
|
176
|
-
...result,
|
|
177
|
-
_meta: { ...(result?._meta ?? {}), memoryHints: hints },
|
|
178
|
-
};
|
|
179
|
-
}
|
|
180
|
-
|
|
181
|
-
return result;
|
|
182
|
-
} catch (err: any) {
|
|
183
|
-
const durationMs = Date.now() - start;
|
|
184
|
-
const errorMsg = err?.message ?? String(err);
|
|
185
|
-
|
|
186
|
-
// Log failed action (non-blocking)
|
|
187
|
-
const entry: ActionEntry = {
|
|
188
|
-
id: "a_" + Date.now().toString(36) + Math.random().toString(36).slice(2, 6),
|
|
189
|
-
timestamp: new Date().toISOString(),
|
|
190
|
-
sessionId,
|
|
191
|
-
tool: toolName,
|
|
192
|
-
params: safeParams,
|
|
193
|
-
durationMs,
|
|
194
|
-
success: false,
|
|
195
|
-
result: null,
|
|
196
|
-
error: errorMsg,
|
|
197
|
-
};
|
|
198
|
-
memoryStore.appendAction(entry); // non-blocking
|
|
199
|
-
sessionTracker.recordAction(entry); // in-memory only
|
|
200
|
-
|
|
201
|
-
// Record strategy failure if we were following one
|
|
202
|
-
if (activeStrategyFingerprint) {
|
|
203
|
-
memoryStore.recordStrategyOutcome(activeStrategyFingerprint, false);
|
|
204
|
-
activeStrategyFingerprint = null;
|
|
205
|
-
}
|
|
206
|
-
|
|
207
|
-
// Record error pattern (updates cache + async write)
|
|
208
|
-
const errorPattern: ErrorPattern = {
|
|
209
|
-
id: "err_" + Date.now().toString(36) + Math.random().toString(36).slice(2, 6),
|
|
210
|
-
tool: toolName,
|
|
211
|
-
params: safeParams,
|
|
212
|
-
error: errorMsg,
|
|
213
|
-
resolution: null,
|
|
214
|
-
occurrences: 1,
|
|
215
|
-
lastSeen: new Date().toISOString(),
|
|
216
|
-
};
|
|
217
|
-
memoryStore.appendError(errorPattern);
|
|
218
|
-
|
|
219
|
-
// Background research: search for a fix if no resolution exists
|
|
220
|
-
const existingErrors = memoryStore.readErrors();
|
|
221
|
-
const hasResolution = existingErrors.some(
|
|
222
|
-
(e) => e.tool === toolName && e.error === errorMsg && e.resolution
|
|
223
|
-
);
|
|
224
|
-
if (!hasResolution) {
|
|
225
|
-
backgroundResearch(memoryStore, toolName, safeParams, errorMsg);
|
|
226
|
-
}
|
|
227
|
-
|
|
228
|
-
throw err;
|
|
229
|
-
}
|
|
230
|
-
};
|
|
231
|
-
|
|
232
|
-
const newArgs = [...args];
|
|
233
|
-
newArgs[handlerIdx] = wrappedHandler;
|
|
234
|
-
return (originalTool as any)(...newArgs);
|
|
235
|
-
};
|
|
236
|
-
|
|
237
|
-
// ═══════════════════════════════════════════════
|
|
238
|
-
// APPS — discover and manage running applications
|
|
239
|
-
// ═══════════════════════════════════════════════
|
|
240
|
-
|
|
241
|
-
server.tool("apps", "List all running applications with bundle IDs and PIDs", {}, async () => {
|
|
242
|
-
await ensureBridge();
|
|
243
|
-
const apps = await bridge.call<any[]>("app.list");
|
|
244
|
-
const lines = apps.map((a: any) =>
|
|
245
|
-
`${a.name} (${a.bundleId}) pid=${a.pid}${a.isActive ? " ← active" : ""}`
|
|
246
|
-
);
|
|
247
|
-
return { content: [{ type: "text", text: lines.join("\n") }] };
|
|
248
|
-
});
|
|
249
|
-
|
|
250
|
-
server.tool("windows", "List all visible windows with IDs, positions, and sizes", {}, async () => {
|
|
251
|
-
await ensureBridge();
|
|
252
|
-
const wins = await bridge.call<any[]>("app.windows");
|
|
253
|
-
const lines = wins.map((w: any) => {
|
|
254
|
-
const b = w.bounds || {};
|
|
255
|
-
return `[${w.windowId}] ${w.appName} "${w.title}" (${Math.round(b.x||0)},${Math.round(b.y||0)}) ${Math.round(b.width||0)}x${Math.round(b.height||0)}`;
|
|
256
|
-
});
|
|
257
|
-
return { content: [{ type: "text", text: lines.join("\n") }] };
|
|
258
|
-
});
|
|
259
|
-
|
|
260
|
-
server.tool("focus", "Focus/activate an application", {
|
|
261
|
-
bundleId: z.string().describe("App bundle ID, e.g. com.apple.Safari"),
|
|
262
|
-
}, async ({ bundleId }) => {
|
|
263
|
-
await ensureBridge();
|
|
264
|
-
await bridge.call("app.focus", { bundleId });
|
|
265
|
-
return { content: [{ type: "text", text: "Focused " + bundleId }] };
|
|
266
|
-
});
|
|
267
|
-
|
|
268
|
-
server.tool("launch", "Launch an application", {
|
|
269
|
-
bundleId: z.string().describe("App bundle ID"),
|
|
270
|
-
}, async ({ bundleId }) => {
|
|
271
|
-
await ensureBridge();
|
|
272
|
-
const r = await bridge.call<any>("app.launch", { bundleId });
|
|
273
|
-
return { content: [{ type: "text", text: `Launched ${r.appName} pid=${r.pid}` }] };
|
|
274
|
-
});
|
|
275
|
-
|
|
276
|
-
// ═══════════════════════════════════════════════
|
|
277
|
-
// INSPECT — see what's on screen (debugging/design)
|
|
278
|
-
// ═══════════════════════════════════════════════
|
|
279
|
-
|
|
280
|
-
server.tool("screenshot", "Take a screenshot and OCR it. Returns all visible text. NOTE: For finding/clicking UI elements, ui_tree + ui_press is 10x faster.", {
|
|
281
|
-
windowId: z.number().optional().describe("Window ID. Omit for full screen."),
|
|
282
|
-
}, async ({ windowId }) => {
|
|
283
|
-
await ensureBridge();
|
|
284
|
-
let shot: any;
|
|
285
|
-
if (windowId) {
|
|
286
|
-
shot = await bridge.call<any>("cg.captureWindow", { windowId });
|
|
287
|
-
} else {
|
|
288
|
-
shot = await bridge.call<any>("cg.captureScreen");
|
|
289
|
-
}
|
|
290
|
-
const ocr = await bridge.call<any>("vision.ocr", { imagePath: shot.path });
|
|
291
|
-
return { content: [{ type: "text", text: `Screenshot: ${shot.width}x${shot.height} (${shot.path})\n\n${ocr.text}` }] };
|
|
292
|
-
});
|
|
293
|
-
|
|
294
|
-
server.tool("screenshot_file", "Take a screenshot and return the file path (for viewing the actual image)", {
|
|
295
|
-
windowId: z.number().optional().describe("Window ID. Omit for full screen."),
|
|
296
|
-
}, async ({ windowId }) => {
|
|
297
|
-
await ensureBridge();
|
|
298
|
-
let shot: any;
|
|
299
|
-
if (windowId) {
|
|
300
|
-
shot = await bridge.call<any>("cg.captureWindow", { windowId });
|
|
301
|
-
} else {
|
|
302
|
-
shot = await bridge.call<any>("cg.captureScreen");
|
|
303
|
-
}
|
|
304
|
-
return { content: [{ type: "text", text: shot.path }] };
|
|
305
|
-
});
|
|
306
|
-
|
|
307
|
-
server.tool("ocr", "OCR a window with element positions. SLOW — prefer ui_tree for structured element discovery. Use OCR only for reading visual/canvas content.", {
|
|
308
|
-
windowId: z.number().optional().describe("Window ID. Omit for full screen."),
|
|
309
|
-
}, async ({ windowId }) => {
|
|
310
|
-
await ensureBridge();
|
|
311
|
-
let shot: any;
|
|
312
|
-
if (windowId) {
|
|
313
|
-
shot = await bridge.call<any>("cg.captureWindow", { windowId });
|
|
314
|
-
} else {
|
|
315
|
-
shot = await bridge.call<any>("cg.captureScreen");
|
|
316
|
-
}
|
|
317
|
-
const ocr = await bridge.call<any>("vision.ocr", { imagePath: shot.path });
|
|
318
|
-
|
|
319
|
-
let winBounds: any = null;
|
|
320
|
-
if (windowId) {
|
|
321
|
-
const wins = await bridge.call<any[]>("app.windows");
|
|
322
|
-
const win = wins.find((w: any) => w.windowId === windowId);
|
|
323
|
-
winBounds = win?.bounds;
|
|
324
|
-
}
|
|
325
|
-
|
|
326
|
-
const regions = ocr.regions.map((r: any) => `"${r.text}" (${Math.round(r.bounds.x)},${Math.round(r.bounds.y)}) ${Math.round(r.bounds.width)}x${Math.round(r.bounds.height)}`);
|
|
327
|
-
|
|
328
|
-
return {
|
|
329
|
-
content: [{
|
|
330
|
-
type: "text",
|
|
331
|
-
text: JSON.stringify({
|
|
332
|
-
image: { width: shot.width, height: shot.height, path: shot.path },
|
|
333
|
-
window: winBounds,
|
|
334
|
-
elementCount: regions.length,
|
|
335
|
-
elements: regions,
|
|
336
|
-
}, null, 2),
|
|
337
|
-
}],
|
|
338
|
-
};
|
|
339
|
-
});
|
|
340
|
-
|
|
341
|
-
// ═══════════════════════════════════════════════
|
|
342
|
-
// ACCESSIBILITY — structured UI inspection (instant, no OCR)
|
|
343
|
-
// ═══════════════════════════════════════════════
|
|
344
|
-
|
|
345
|
-
server.tool("ui_tree", "PREFERRED: Get the full UI element tree of an app via Accessibility. ~50ms, no screenshot/OCR. Use this FIRST to find elements — returns titles, roles, and bounds. Then use ui_press/ui_find to interact.", {
|
|
346
|
-
pid: z.number().describe("Process ID of the app"),
|
|
347
|
-
maxDepth: z.number().optional().describe("Max depth (default 4). Use 2 for overview, 6+ for deep inspection."),
|
|
348
|
-
}, async ({ pid, maxDepth }) => {
|
|
349
|
-
await ensureBridge();
|
|
350
|
-
const tree = await bridge.call<any>("ax.getElementTree", { pid, maxDepth: maxDepth || 4 });
|
|
351
|
-
|
|
352
|
-
function format(node: any, depth: number): string {
|
|
353
|
-
let line = " ".repeat(depth) + (node.role || "?");
|
|
354
|
-
if (node.title) line += ` "${node.title}"`;
|
|
355
|
-
if (node.value) line += ` =${String(node.value).slice(0, 60)}`;
|
|
356
|
-
if (node.bounds) line += ` (${Math.round(node.bounds.x)},${Math.round(node.bounds.y)} ${Math.round(node.bounds.width)}x${Math.round(node.bounds.height)})`;
|
|
357
|
-
let result = line;
|
|
358
|
-
if (node.children) {
|
|
359
|
-
for (const c of node.children) result += "\n" + format(c, depth + 1);
|
|
360
|
-
}
|
|
361
|
-
return result;
|
|
362
|
-
}
|
|
363
|
-
|
|
364
|
-
return { content: [{ type: "text", text: format(tree, 0) }] };
|
|
365
|
-
});
|
|
366
|
-
|
|
367
|
-
server.tool("ui_find", "Find a specific UI element by text/title. Returns its role, bounds, and path.", {
|
|
368
|
-
pid: z.number().describe("Process ID"),
|
|
369
|
-
title: z.string().describe("Text to search for (partial match)"),
|
|
370
|
-
}, async ({ pid, title }) => {
|
|
371
|
-
await ensureBridge();
|
|
372
|
-
const r = await bridge.call<any>("ax.findElement", { pid, title, exact: false });
|
|
373
|
-
return { content: [{ type: "text", text: JSON.stringify(r, null, 2) }] };
|
|
374
|
-
});
|
|
375
|
-
|
|
376
|
-
server.tool("ui_press", "PREFERRED: Find and press/click a UI element by its title via Accessibility. Faster and more reliable than click_text — no screenshot needed.", {
|
|
377
|
-
pid: z.number().describe("Process ID"),
|
|
378
|
-
title: z.string().describe("Element title to find and press"),
|
|
379
|
-
}, async ({ pid, title }) => {
|
|
380
|
-
await ensureBridge();
|
|
381
|
-
const el = await bridge.call<any>("ax.findElement", { pid, title, exact: false });
|
|
382
|
-
await bridge.call("ax.performAction", { pid, elementPath: el.elementPath, action: "AXPress" });
|
|
383
|
-
return { content: [{ type: "text", text: `Pressed "${el.title}" (${el.role})` }] };
|
|
384
|
-
});
|
|
385
|
-
|
|
386
|
-
server.tool("ui_set_value", "Set the value of a UI element (text field, slider, etc.)", {
|
|
387
|
-
pid: z.number().describe("Process ID"),
|
|
388
|
-
title: z.string().describe("Element title to find"),
|
|
389
|
-
value: z.string().describe("Value to set"),
|
|
390
|
-
}, async ({ pid, title, value }) => {
|
|
391
|
-
await ensureBridge();
|
|
392
|
-
const el = await bridge.call<any>("ax.findElement", { pid, title, exact: false });
|
|
393
|
-
await bridge.call("ax.setElementValue", { pid, elementPath: el.elementPath, value });
|
|
394
|
-
return { content: [{ type: "text", text: `Set "${el.title}" = "${value}"` }] };
|
|
395
|
-
});
|
|
396
|
-
|
|
397
|
-
server.tool("menu_click", "Click a menu item in an app's menu bar", {
|
|
398
|
-
pid: z.number().describe("Process ID"),
|
|
399
|
-
menuPath: z.string().describe("Menu path separated by /. e.g. 'File/New', 'View/Show Sidebar'"),
|
|
400
|
-
}, async ({ pid, menuPath }) => {
|
|
401
|
-
await ensureBridge();
|
|
402
|
-
await bridge.call("ax.menuClick", { pid, menuPath: menuPath.split("/") });
|
|
403
|
-
return { content: [{ type: "text", text: "Menu: " + menuPath }] };
|
|
404
|
-
});
|
|
405
|
-
|
|
406
|
-
// ═══════════════════════════════════════════════
|
|
407
|
-
// INPUT — interact with the screen
|
|
408
|
-
// ═══════════════════════════════════════════════
|
|
409
|
-
|
|
410
|
-
server.tool("click", "Click at screen coordinates", {
|
|
411
|
-
x: z.number().describe("Screen X"),
|
|
412
|
-
y: z.number().describe("Screen Y"),
|
|
413
|
-
}, async ({ x, y }) => {
|
|
414
|
-
await ensureBridge();
|
|
415
|
-
await bridge.call("cg.mouseMove", { x, y });
|
|
416
|
-
await new Promise(r => setTimeout(r, 50));
|
|
417
|
-
await bridge.call("cg.mouseClick", { x, y });
|
|
418
|
-
return { content: [{ type: "text", text: `Clicked (${x}, ${y})` }] };
|
|
419
|
-
});
|
|
420
|
-
|
|
421
|
-
server.tool("click_text", "SLOW fallback: Find text on screen via OCR and click it. Use ui_press instead when possible — it's 10x faster. Only use this for canvas/image content where Accessibility doesn't work.", {
|
|
422
|
-
windowId: z.number().describe("Window ID"),
|
|
423
|
-
text: z.string().describe("Text to find and click"),
|
|
424
|
-
offset_y: z.number().optional().describe("Y offset from text center (e.g. -25 for icon above label)"),
|
|
425
|
-
}, async ({ windowId, text, offset_y }) => {
|
|
426
|
-
await ensureBridge();
|
|
427
|
-
const wins = await bridge.call<any[]>("app.windows");
|
|
428
|
-
const win = wins.find((w: any) => w.windowId === windowId);
|
|
429
|
-
if (!win) return { content: [{ type: "text", text: "Window not found" }] };
|
|
430
|
-
const wb = win.bounds;
|
|
431
|
-
|
|
432
|
-
const shot = await bridge.call<any>("cg.captureWindow", { windowId });
|
|
433
|
-
const ocr = await bridge.call<any>("vision.ocr", { imagePath: shot.path });
|
|
434
|
-
const match = ocr.regions.find((r: any) => r.text.toLowerCase().includes(text.toLowerCase()));
|
|
435
|
-
if (!match) {
|
|
436
|
-
return { content: [{ type: "text", text: `"${text}" not found. Available: ${ocr.regions.map((r:any) => r.text).slice(0, 20).join(", ")}` }] };
|
|
437
|
-
}
|
|
438
|
-
|
|
439
|
-
const shadowL = (shot.width - wb.width * 2) / 2;
|
|
440
|
-
const shadowT = (shot.height - wb.height * 2) / 3;
|
|
441
|
-
const sx = wb.x + (match.bounds.x + match.bounds.width / 2 - shadowL) / 2;
|
|
442
|
-
const sy = wb.y + (match.bounds.y + match.bounds.height / 2 - shadowT) / 2 + (offset_y || 0);
|
|
443
|
-
|
|
444
|
-
await bridge.call("cg.mouseMove", { x: sx, y: sy });
|
|
445
|
-
await new Promise(r => setTimeout(r, 50));
|
|
446
|
-
await bridge.call("cg.mouseClick", { x: sx, y: sy });
|
|
447
|
-
|
|
448
|
-
return { content: [{ type: "text", text: `Clicked "${match.text}" at (${Math.round(sx)}, ${Math.round(sy)})` }] };
|
|
449
|
-
});
|
|
450
|
-
|
|
451
|
-
server.tool("type_text", "Type text using the keyboard", {
|
|
452
|
-
text: z.string().describe("Text to type"),
|
|
453
|
-
}, async ({ text }) => {
|
|
454
|
-
await ensureBridge();
|
|
455
|
-
await bridge.call("cg.typeText", { text });
|
|
456
|
-
return { content: [{ type: "text", text: "Typed: " + text }] };
|
|
457
|
-
});
|
|
458
|
-
|
|
459
|
-
server.tool("key", "Press a key combination", {
|
|
460
|
-
combo: z.string().describe("Key combo: 'cmd+c', 'enter', 'cmd+shift+n', 'space'. Use + to separate."),
|
|
461
|
-
}, async ({ combo }) => {
|
|
462
|
-
await ensureBridge();
|
|
463
|
-
await bridge.call("cg.keyCombo", { keys: combo.split("+") });
|
|
464
|
-
return { content: [{ type: "text", text: "Key: " + combo }] };
|
|
465
|
-
});
|
|
466
|
-
|
|
467
|
-
server.tool("drag", "Drag from one point to another", {
|
|
468
|
-
fromX: z.number(), fromY: z.number(),
|
|
469
|
-
toX: z.number(), toY: z.number(),
|
|
470
|
-
}, async ({ fromX, fromY, toX, toY }) => {
|
|
471
|
-
await ensureBridge();
|
|
472
|
-
await bridge.call("cg.mouseDrag", { fromX, fromY, toX, toY });
|
|
473
|
-
return { content: [{ type: "text", text: `Dragged (${fromX},${fromY}) → (${toX},${toY})` }] };
|
|
474
|
-
});
|
|
475
|
-
|
|
476
|
-
server.tool("scroll", "Scroll at a position", {
|
|
477
|
-
x: z.number(), y: z.number(),
|
|
478
|
-
deltaX: z.number().optional().describe("Horizontal scroll (default 0)"),
|
|
479
|
-
deltaY: z.number().describe("Vertical scroll (negative = down)"),
|
|
480
|
-
}, async ({ x, y, deltaX, deltaY }) => {
|
|
481
|
-
await ensureBridge();
|
|
482
|
-
await bridge.call("cg.scroll", { x, y, deltaX: deltaX || 0, deltaY });
|
|
483
|
-
return { content: [{ type: "text", text: "Scrolled" }] };
|
|
484
|
-
});
|
|
485
|
-
|
|
486
|
-
// ── CDP helper: get client for a tab ──
|
|
487
|
-
async function getCDPClient(tabId?: string): Promise<{ client: any; targetId: string; CDP: any; port: number }> {
|
|
488
|
-
const { CDP: cdp, port } = await ensureCDP();
|
|
489
|
-
let targetId = tabId;
|
|
490
|
-
if (!targetId) {
|
|
491
|
-
const targets = await cdp.List({ port });
|
|
492
|
-
const page = targets.find((t: any) => t.type === "page");
|
|
493
|
-
if (!page) throw new Error("No tabs open");
|
|
494
|
-
targetId = page.id;
|
|
495
|
-
}
|
|
496
|
-
const client = await cdp({ port, target: targetId });
|
|
497
|
-
return { client, targetId: targetId!, CDP: cdp, port };
|
|
498
|
-
}
|
|
499
|
-
|
|
500
|
-
// ── Random delay helper ──
|
|
501
|
-
function randomDelay(min: number, max: number): Promise<void> {
|
|
502
|
-
return new Promise(r => setTimeout(r, min + Math.random() * (max - min)));
|
|
503
|
-
}
|
|
504
|
-
|
|
505
|
-
// ═══════════════════════════════════════════════
|
|
506
|
-
// BROWSER — control Chrome pages via CDP (10ms, not OCR)
|
|
507
|
-
// ═══════════════════════════════════════════════
|
|
508
|
-
|
|
509
|
-
server.tool("browser_tabs", "List all open Chrome tabs", {}, async () => {
|
|
510
|
-
const { CDP: cdp, port } = await ensureCDP();
|
|
511
|
-
const targets = await cdp.List({ port });
|
|
512
|
-
const pages = targets.filter((t: any) => t.type === "page");
|
|
513
|
-
const lines = pages.map((t: any) => `[${t.id}] ${t.title} — ${t.url}`);
|
|
514
|
-
return { content: [{ type: "text", text: lines.join("\n") || "No tabs open" }] };
|
|
515
|
-
});
|
|
516
|
-
|
|
517
|
-
server.tool("browser_open", "Open a URL in Chrome (creates new tab)", {
|
|
518
|
-
url: z.string().describe("URL to open"),
|
|
519
|
-
}, async ({ url }) => {
|
|
520
|
-
const { CDP: cdp, port } = await ensureCDP();
|
|
521
|
-
const target = await cdp.New({ port, url });
|
|
522
|
-
return { content: [{ type: "text", text: `Opened: ${target.id} — ${url}` }] };
|
|
523
|
-
});
|
|
524
|
-
|
|
525
|
-
server.tool("browser_navigate", "Navigate the active Chrome tab to a URL", {
|
|
526
|
-
url: z.string().describe("URL to navigate to"),
|
|
527
|
-
tabId: z.string().optional().describe("Tab ID (from browser_tabs). Omit for most recent tab."),
|
|
528
|
-
}, async ({ url, tabId }) => {
|
|
529
|
-
const { CDP: cdp, port } = await ensureCDP();
|
|
530
|
-
let targetId = tabId;
|
|
531
|
-
if (!targetId) {
|
|
532
|
-
const targets = await cdp.List({ port });
|
|
533
|
-
const page = targets.find((t: any) => t.type === "page");
|
|
534
|
-
if (!page) throw new Error("No tabs open");
|
|
535
|
-
targetId = page.id;
|
|
536
|
-
}
|
|
537
|
-
const client = await cdp({ port, target: targetId });
|
|
538
|
-
await client.Page.enable();
|
|
539
|
-
await client.Page.navigate({ url });
|
|
540
|
-
// Wait for load
|
|
541
|
-
const deadline = Date.now() + 10000;
|
|
542
|
-
while (Date.now() < deadline) {
|
|
543
|
-
const r = await client.Runtime.evaluate({ expression: "document.readyState", returnByValue: true });
|
|
544
|
-
if (r.result.value === "complete" || r.result.value === "interactive") break;
|
|
545
|
-
await new Promise(r => setTimeout(r, 200));
|
|
546
|
-
}
|
|
547
|
-
const title = await client.Runtime.evaluate({ expression: "document.title", returnByValue: true });
|
|
548
|
-
await client.close();
|
|
549
|
-
return { content: [{ type: "text", text: `Navigated to: ${title.result.value}` }] };
|
|
550
|
-
});
|
|
551
|
-
|
|
552
|
-
server.tool("browser_js", "Execute JavaScript in a Chrome tab. Returns the result. WARNING: This runs arbitrary JS in the browser context — avoid on sensitive pages (banking, email). All executions are audit-logged.", {
|
|
553
|
-
code: z.string().describe("JavaScript to execute. Must be an expression that returns a value. Use (() => { ... })() for multi-line."),
|
|
554
|
-
tabId: z.string().optional().describe("Tab ID. Omit for most recent tab."),
|
|
555
|
-
}, async ({ code, tabId }) => {
|
|
556
|
-
auditLog("browser_js", { code: code.slice(0, 500), tabId });
|
|
557
|
-
const { CDP: cdp, port } = await ensureCDP();
|
|
558
|
-
let targetId = tabId;
|
|
559
|
-
if (!targetId) {
|
|
560
|
-
const targets = await cdp.List({ port });
|
|
561
|
-
const page = targets.find((t: any) => t.type === "page");
|
|
562
|
-
if (!page) throw new Error("No tabs open");
|
|
563
|
-
targetId = page.id;
|
|
564
|
-
}
|
|
565
|
-
const client = await cdp({ port, target: targetId });
|
|
566
|
-
await client.Runtime.enable();
|
|
567
|
-
const result = await client.Runtime.evaluate({
|
|
568
|
-
expression: code,
|
|
569
|
-
awaitPromise: true,
|
|
570
|
-
returnByValue: true,
|
|
571
|
-
});
|
|
572
|
-
await client.close();
|
|
573
|
-
|
|
574
|
-
if (result.exceptionDetails) {
|
|
575
|
-
return { content: [{ type: "text", text: `JS Error: ${result.exceptionDetails.text}\n${result.exceptionDetails.exception?.description || ""}` }] };
|
|
576
|
-
}
|
|
577
|
-
|
|
578
|
-
const val = result.result.value;
|
|
579
|
-
const text = typeof val === "object" ? JSON.stringify(val, null, 2) : String(val ?? "undefined");
|
|
580
|
-
return { content: [{ type: "text", text }] };
|
|
581
|
-
});
|
|
582
|
-
|
|
583
|
-
server.tool("browser_dom", "Query the DOM of a Chrome page. Returns matching elements' text, attributes, and structure.", {
|
|
584
|
-
selector: z.string().describe("CSS selector, e.g. 'button', '.nav a', '#main h2'"),
|
|
585
|
-
tabId: z.string().optional().describe("Tab ID. Omit for most recent tab."),
|
|
586
|
-
limit: z.number().optional().describe("Max results (default 20)"),
|
|
587
|
-
}, async ({ selector, tabId, limit }) => {
|
|
588
|
-
const { CDP: cdp, port } = await ensureCDP();
|
|
589
|
-
let targetId = tabId;
|
|
590
|
-
if (!targetId) {
|
|
591
|
-
const targets = await cdp.List({ port });
|
|
592
|
-
const page = targets.find((t: any) => t.type === "page");
|
|
593
|
-
if (!page) throw new Error("No tabs open");
|
|
594
|
-
targetId = page.id;
|
|
595
|
-
}
|
|
596
|
-
const client = await cdp({ port, target: targetId });
|
|
597
|
-
await client.Runtime.enable();
|
|
598
|
-
const maxResults = limit || 20;
|
|
599
|
-
const result = await client.Runtime.evaluate({
|
|
600
|
-
expression: `(() => {
|
|
601
|
-
const els = Array.from(document.querySelectorAll(${JSON.stringify(selector)})).slice(0, ${maxResults});
|
|
602
|
-
return els.map((el, i) => ({
|
|
603
|
-
index: i,
|
|
604
|
-
tag: el.tagName.toLowerCase(),
|
|
605
|
-
id: el.id || undefined,
|
|
606
|
-
class: el.className?.toString()?.slice(0, 100) || undefined,
|
|
607
|
-
text: el.textContent?.trim()?.slice(0, 200),
|
|
608
|
-
href: el.href || undefined,
|
|
609
|
-
src: el.src || undefined,
|
|
610
|
-
value: el.value || undefined,
|
|
611
|
-
rect: (() => { const r = el.getBoundingClientRect(); return { x: Math.round(r.x), y: Math.round(r.y), w: Math.round(r.width), h: Math.round(r.height) }; })(),
|
|
612
|
-
}));
|
|
613
|
-
})()`,
|
|
614
|
-
returnByValue: true,
|
|
615
|
-
});
|
|
616
|
-
await client.close();
|
|
617
|
-
|
|
618
|
-
return { content: [{ type: "text", text: JSON.stringify(result.result.value, null, 2) }] };
|
|
619
|
-
});
|
|
620
|
-
|
|
621
|
-
server.tool("browser_click", "Click an element in Chrome by CSS selector. Uses CDP Input.dispatchMouseEvent for realistic mouse events.", {
|
|
622
|
-
selector: z.string().describe("CSS selector of element to click"),
|
|
623
|
-
tabId: z.string().optional().describe("Tab ID. Omit for most recent tab."),
|
|
624
|
-
}, async ({ selector, tabId }) => {
|
|
625
|
-
const { client } = await getCDPClient(tabId);
|
|
626
|
-
await client.Runtime.enable();
|
|
627
|
-
|
|
628
|
-
const result = await client.Runtime.evaluate({
|
|
629
|
-
expression: `(() => {
|
|
630
|
-
const el = document.querySelector(${JSON.stringify(selector)});
|
|
631
|
-
if (!el) return { ok: false, reason: "Element not found: ${selector.replace(/"/g, '\\"')}" };
|
|
632
|
-
el.scrollIntoView({ block: "center" });
|
|
633
|
-
const r = el.getBoundingClientRect();
|
|
634
|
-
return { ok: true, x: r.x + r.width / 2, y: r.y + r.height / 2, text: el.textContent?.trim()?.slice(0, 100) };
|
|
635
|
-
})()`,
|
|
636
|
-
returnByValue: true,
|
|
637
|
-
});
|
|
638
|
-
|
|
639
|
-
const val = result.result.value;
|
|
640
|
-
if (!val?.ok) {
|
|
641
|
-
await client.close();
|
|
642
|
-
return { content: [{ type: "text", text: val?.reason || "Element not found" }] };
|
|
643
|
-
}
|
|
644
|
-
|
|
645
|
-
const { x, y } = val;
|
|
646
|
-
await client.Input.dispatchMouseEvent({ type: "mouseMoved", x, y });
|
|
647
|
-
await randomDelay(30, 60);
|
|
648
|
-
await client.Input.dispatchMouseEvent({ type: "mousePressed", x, y, button: "left", clickCount: 1 });
|
|
649
|
-
await randomDelay(30, 80);
|
|
650
|
-
await client.Input.dispatchMouseEvent({ type: "mouseReleased", x, y, button: "left", clickCount: 1 });
|
|
651
|
-
|
|
652
|
-
await client.close();
|
|
653
|
-
return { content: [{ type: "text", text: `Clicked: "${val.text}" at (${Math.round(x)}, ${Math.round(y)})` }] };
|
|
654
|
-
});
|
|
655
|
-
|
|
656
|
-
server.tool("browser_type", "Type into an input field in Chrome. Uses CDP Input.dispatchKeyEvent for real keyboard events (works with React/Angular).", {
|
|
657
|
-
selector: z.string().describe("CSS selector of the input"),
|
|
658
|
-
text: z.string().describe("Text to type"),
|
|
659
|
-
clear: z.boolean().optional().describe("Clear field first (default true)"),
|
|
660
|
-
tabId: z.string().optional().describe("Tab ID"),
|
|
661
|
-
}, async ({ selector, text, clear, tabId }) => {
|
|
662
|
-
const { client } = await getCDPClient(tabId);
|
|
663
|
-
await client.Runtime.enable();
|
|
664
|
-
|
|
665
|
-
// Focus the element
|
|
666
|
-
const focusResult = await client.Runtime.evaluate({
|
|
667
|
-
expression: `(() => {
|
|
668
|
-
const el = document.querySelector(${JSON.stringify(selector)});
|
|
669
|
-
if (!el) return { ok: false, reason: "Input not found" };
|
|
670
|
-
el.scrollIntoView({ block: "center" });
|
|
671
|
-
el.focus();
|
|
672
|
-
return { ok: true };
|
|
673
|
-
})()`,
|
|
674
|
-
returnByValue: true,
|
|
675
|
-
});
|
|
676
|
-
|
|
677
|
-
if (!focusResult.result.value?.ok) {
|
|
678
|
-
await client.close();
|
|
679
|
-
return { content: [{ type: "text", text: focusResult.result.value?.reason || "Input not found" }] };
|
|
680
|
-
}
|
|
681
|
-
|
|
682
|
-
// Clear if needed: select all + delete
|
|
683
|
-
const shouldClear = clear !== false;
|
|
684
|
-
if (shouldClear) {
|
|
685
|
-
await client.Input.dispatchKeyEvent({ type: "keyDown", key: "a", code: "KeyA", modifiers: process.platform === "darwin" ? 4 : 2 });
|
|
686
|
-
await client.Input.dispatchKeyEvent({ type: "keyUp", key: "a", code: "KeyA", modifiers: process.platform === "darwin" ? 4 : 2 });
|
|
687
|
-
await client.Input.dispatchKeyEvent({ type: "keyDown", key: "Backspace", code: "Backspace" });
|
|
688
|
-
await client.Input.dispatchKeyEvent({ type: "keyUp", key: "Backspace", code: "Backspace" });
|
|
689
|
-
await randomDelay(30, 80);
|
|
690
|
-
}
|
|
691
|
-
|
|
692
|
-
// Type character by character with random delays
|
|
693
|
-
for (const char of text) {
|
|
694
|
-
await client.Input.dispatchKeyEvent({ type: "keyDown", text: char, key: char, unmodifiedText: char });
|
|
695
|
-
await client.Input.dispatchKeyEvent({ type: "keyUp", text: char, key: char, unmodifiedText: char });
|
|
696
|
-
await randomDelay(30, 80);
|
|
697
|
-
}
|
|
698
|
-
|
|
699
|
-
await client.close();
|
|
700
|
-
return { content: [{ type: "text", text: `Typed "${text}"` }] };
|
|
701
|
-
});
|
|
702
|
-
|
|
703
|
-
server.tool("browser_wait", "Wait for a condition on a Chrome page", {
|
|
704
|
-
condition: z.string().describe("JS expression that returns truthy when ready. e.g. 'document.querySelector(\".loaded\")'"),
|
|
705
|
-
timeoutMs: z.number().optional().describe("Timeout in ms (default 10000)"),
|
|
706
|
-
tabId: z.string().optional().describe("Tab ID"),
|
|
707
|
-
}, async ({ condition, timeoutMs, tabId }) => {
|
|
708
|
-
const { CDP: cdp, port } = await ensureCDP();
|
|
709
|
-
let targetId = tabId;
|
|
710
|
-
if (!targetId) {
|
|
711
|
-
const targets = await cdp.List({ port });
|
|
712
|
-
const page = targets.find((t: any) => t.type === "page");
|
|
713
|
-
if (!page) throw new Error("No tabs open");
|
|
714
|
-
targetId = page.id;
|
|
715
|
-
}
|
|
716
|
-
const client = await cdp({ port, target: targetId });
|
|
717
|
-
await client.Runtime.enable();
|
|
718
|
-
const deadline = Date.now() + (timeoutMs || 10000);
|
|
719
|
-
let met = false;
|
|
720
|
-
while (Date.now() < deadline) {
|
|
721
|
-
const r = await client.Runtime.evaluate({ expression: `!!(${condition})`, returnByValue: true });
|
|
722
|
-
if (r.result.value) { met = true; break; }
|
|
723
|
-
await new Promise(r => setTimeout(r, 300));
|
|
724
|
-
}
|
|
725
|
-
await client.close();
|
|
726
|
-
return { content: [{ type: "text", text: met ? "Condition met" : "Timeout — condition not met" }] };
|
|
727
|
-
});
|
|
728
|
-
|
|
729
|
-
server.tool("browser_page_info", "Get current page title, URL, and text content summary", {
|
|
730
|
-
tabId: z.string().optional().describe("Tab ID"),
|
|
731
|
-
}, async ({ tabId }) => {
|
|
732
|
-
const { CDP: cdp, port } = await ensureCDP();
|
|
733
|
-
let targetId = tabId;
|
|
734
|
-
if (!targetId) {
|
|
735
|
-
const targets = await cdp.List({ port });
|
|
736
|
-
const page = targets.find((t: any) => t.type === "page");
|
|
737
|
-
if (!page) throw new Error("No tabs open");
|
|
738
|
-
targetId = page.id;
|
|
739
|
-
}
|
|
740
|
-
const client = await cdp({ port, target: targetId });
|
|
741
|
-
await client.Runtime.enable();
|
|
742
|
-
const result = await client.Runtime.evaluate({
|
|
743
|
-
expression: `(() => ({
|
|
744
|
-
title: document.title,
|
|
745
|
-
url: location.href,
|
|
746
|
-
text: document.body?.innerText?.slice(0, 2000) || "",
|
|
747
|
-
}))()`,
|
|
748
|
-
returnByValue: true,
|
|
749
|
-
});
|
|
750
|
-
await client.close();
|
|
751
|
-
return { content: [{ type: "text", text: JSON.stringify(result.result.value, null, 2) }] };
|
|
752
|
-
});
|
|
753
|
-
|
|
754
|
-
// ═══════════════════════════════════════════════
|
|
755
|
-
// BROWSER STEALTH — anti-detection patches
|
|
756
|
-
// ═══════════════════════════════════════════════
|
|
757
|
-
|
|
758
|
-
const STEALTH_SCRIPT = `
|
|
759
|
-
// Hide navigator.webdriver flag
|
|
760
|
-
Object.defineProperty(navigator, 'webdriver', { get: () => undefined });
|
|
761
|
-
|
|
762
|
-
// Delete ChromeDriver leak variables
|
|
763
|
-
for (const key of Object.keys(window)) {
|
|
764
|
-
if (key.match(/^cdc_/)) delete (window)[key];
|
|
765
|
-
}
|
|
766
|
-
|
|
767
|
-
// Realistic plugins array
|
|
768
|
-
Object.defineProperty(navigator, 'plugins', {
|
|
769
|
-
get: () => [
|
|
770
|
-
{ name: 'Chrome PDF Plugin', filename: 'internal-pdf-viewer', description: 'Portable Document Format' },
|
|
771
|
-
{ name: 'Chrome PDF Viewer', filename: 'mhjfbmdgcfjbbpaeojofohoefgiehjai', description: '' },
|
|
772
|
-
{ name: 'Native Client', filename: 'internal-nacl-plugin', description: '' },
|
|
773
|
-
],
|
|
774
|
-
});
|
|
775
|
-
|
|
776
|
-
// Realistic languages
|
|
777
|
-
Object.defineProperty(navigator, 'languages', { get: () => ['en-US', 'en'] });
|
|
778
|
-
|
|
779
|
-
// Patch chrome.runtime to look realistic (not headless)
|
|
780
|
-
if (!window.chrome) (window as any).chrome = {};
|
|
781
|
-
if (!window.chrome.runtime) (window as any).chrome.runtime = { connect: () => {}, sendMessage: () => {} };
|
|
782
|
-
|
|
783
|
-
// Patch Permissions.query for notifications
|
|
784
|
-
const origQuery = window.Permissions?.prototype?.query;
|
|
785
|
-
if (origQuery) {
|
|
786
|
-
window.Permissions.prototype.query = function(params: any) {
|
|
787
|
-
if (params.name === 'notifications') {
|
|
788
|
-
return Promise.resolve({ state: 'denied', onchange: null } as PermissionStatus);
|
|
789
|
-
}
|
|
790
|
-
return origQuery.call(this, params);
|
|
791
|
-
};
|
|
792
|
-
}
|
|
793
|
-
`;
|
|
794
|
-
|
|
795
|
-
server.tool("browser_stealth", "Inject anti-detection patches into Chrome page. Call once after navigating to a protected site. Hides webdriver flag, patches plugins/languages/permissions.", {
|
|
796
|
-
tabId: z.string().optional().describe("Tab ID. Omit for most recent tab."),
|
|
797
|
-
}, async ({ tabId }) => {
|
|
798
|
-
const { client } = await getCDPClient(tabId);
|
|
799
|
-
await client.Page.enable();
|
|
800
|
-
await client.Page.addScriptToEvaluateOnNewDocument({ source: STEALTH_SCRIPT });
|
|
801
|
-
// Also evaluate immediately on current page
|
|
802
|
-
await client.Runtime.enable();
|
|
803
|
-
await client.Runtime.evaluate({ expression: STEALTH_SCRIPT, returnByValue: true });
|
|
804
|
-
await client.close();
|
|
805
|
-
return { content: [{ type: "text", text: "Stealth patches injected: webdriver hidden, plugins/languages/permissions patched." }] };
|
|
806
|
-
});
|
|
807
|
-
|
|
808
|
-
// ═══════════════════════════════════════════════
|
|
809
|
-
// BROWSER HUMAN-LIKE INPUT — anti-detection tools
|
|
810
|
-
// ═══════════════════════════════════════════════
|
|
811
|
-
|
|
812
|
-
server.tool("browser_fill_form", "Fill a form field with human-like typing (anti-detection). Uses real keyboard events via CDP Input domain.", {
|
|
813
|
-
selector: z.string().describe("CSS selector of the input"),
|
|
814
|
-
text: z.string().describe("Text to type"),
|
|
815
|
-
clear: z.boolean().optional().describe("Clear field first (default true)"),
|
|
816
|
-
delayMs: z.number().optional().describe("Avg delay between keystrokes in ms (default 50)"),
|
|
817
|
-
tabId: z.string().optional().describe("Tab ID"),
|
|
818
|
-
}, async ({ selector, text, clear, delayMs, tabId }) => {
|
|
819
|
-
const { client } = await getCDPClient(tabId);
|
|
820
|
-
await client.Runtime.enable();
|
|
821
|
-
|
|
822
|
-
// Focus the element
|
|
823
|
-
const focusResult = await client.Runtime.evaluate({
|
|
824
|
-
expression: `(() => {
|
|
825
|
-
const el = document.querySelector(${JSON.stringify(selector)});
|
|
826
|
-
if (!el) return { ok: false, reason: "Element not found: ${selector.replace(/"/g, '\\"')}" };
|
|
827
|
-
el.scrollIntoView({ block: "center" });
|
|
828
|
-
el.focus();
|
|
829
|
-
return { ok: true };
|
|
830
|
-
})()`,
|
|
831
|
-
returnByValue: true,
|
|
832
|
-
});
|
|
833
|
-
if (!focusResult.result.value?.ok) {
|
|
834
|
-
await client.close();
|
|
835
|
-
return { content: [{ type: "text", text: focusResult.result.value?.reason || "Element not found" }] };
|
|
836
|
-
}
|
|
837
|
-
|
|
838
|
-
// Clear if needed
|
|
839
|
-
const shouldClear = clear !== false;
|
|
840
|
-
if (shouldClear) {
|
|
841
|
-
await client.Input.dispatchKeyEvent({ type: "keyDown", key: "a", code: "KeyA", modifiers: process.platform === "darwin" ? 4 : 2 });
|
|
842
|
-
await client.Input.dispatchKeyEvent({ type: "keyUp", key: "a", code: "KeyA", modifiers: process.platform === "darwin" ? 4 : 2 });
|
|
843
|
-
await client.Input.dispatchKeyEvent({ type: "keyDown", key: "Backspace", code: "Backspace" });
|
|
844
|
-
await client.Input.dispatchKeyEvent({ type: "keyUp", key: "Backspace", code: "Backspace" });
|
|
845
|
-
await randomDelay(30, 80);
|
|
846
|
-
}
|
|
847
|
-
|
|
848
|
-
// Type character by character with random delays
|
|
849
|
-
const avgDelay = delayMs ?? 50;
|
|
850
|
-
const minDelay = Math.max(10, avgDelay - 20);
|
|
851
|
-
const maxDelay = avgDelay + 30;
|
|
852
|
-
|
|
853
|
-
for (const char of text) {
|
|
854
|
-
await client.Input.dispatchKeyEvent({ type: "keyDown", text: char, key: char, unmodifiedText: char });
|
|
855
|
-
await client.Input.dispatchKeyEvent({ type: "keyUp", text: char, key: char, unmodifiedText: char });
|
|
856
|
-
await randomDelay(minDelay, maxDelay);
|
|
857
|
-
}
|
|
858
|
-
|
|
859
|
-
await client.close();
|
|
860
|
-
return { content: [{ type: "text", text: `Typed "${text}" (${text.length} chars, human-like)` }] };
|
|
861
|
-
});
|
|
862
|
-
|
|
863
|
-
server.tool("browser_human_click", "Click an element with realistic mouse events (anti-detection). Dispatches mouseMoved → mousePressed → mouseReleased at element coordinates.", {
|
|
864
|
-
selector: z.string().describe("CSS selector of element to click"),
|
|
865
|
-
tabId: z.string().optional().describe("Tab ID. Omit for most recent tab."),
|
|
866
|
-
}, async ({ selector, tabId }) => {
|
|
867
|
-
const { client } = await getCDPClient(tabId);
|
|
868
|
-
await client.Runtime.enable();
|
|
869
|
-
|
|
870
|
-
// Get element center coordinates
|
|
871
|
-
const rectResult = await client.Runtime.evaluate({
|
|
872
|
-
expression: `(() => {
|
|
873
|
-
const el = document.querySelector(${JSON.stringify(selector)});
|
|
874
|
-
if (!el) return { ok: false, reason: "Element not found: ${selector.replace(/"/g, '\\"')}" };
|
|
875
|
-
el.scrollIntoView({ block: "center" });
|
|
876
|
-
const r = el.getBoundingClientRect();
|
|
877
|
-
return { ok: true, x: r.x + r.width / 2, y: r.y + r.height / 2, text: el.textContent?.trim()?.slice(0, 100) };
|
|
878
|
-
})()`,
|
|
879
|
-
returnByValue: true,
|
|
880
|
-
});
|
|
881
|
-
|
|
882
|
-
const val = rectResult.result.value;
|
|
883
|
-
if (!val?.ok) {
|
|
884
|
-
await client.close();
|
|
885
|
-
return { content: [{ type: "text", text: val?.reason || "Element not found" }] };
|
|
886
|
-
}
|
|
887
|
-
|
|
888
|
-
const { x, y } = val;
|
|
889
|
-
|
|
890
|
-
// Simulate realistic mouse event sequence
|
|
891
|
-
await client.Input.dispatchMouseEvent({ type: "mouseMoved", x, y });
|
|
892
|
-
await randomDelay(30, 60);
|
|
893
|
-
await client.Input.dispatchMouseEvent({ type: "mousePressed", x, y, button: "left", clickCount: 1 });
|
|
894
|
-
await randomDelay(30, 80);
|
|
895
|
-
await client.Input.dispatchMouseEvent({ type: "mouseReleased", x, y, button: "left", clickCount: 1 });
|
|
896
|
-
|
|
897
|
-
await client.close();
|
|
898
|
-
return { content: [{ type: "text", text: `Clicked: "${val.text}" at (${Math.round(x)}, ${Math.round(y)})` }] };
|
|
899
|
-
});
|
|
900
|
-
|
|
901
|
-
// ═══════════════════════════════════════════════
|
|
902
|
-
// PLATFORM PLAYBOOKS — lazy-loaded site knowledge
|
|
903
|
-
// ═══════════════════════════════════════════════
|
|
904
|
-
|
|
905
|
-
const playbooksDir = path.resolve(__dirname, "playbooks");
|
|
906
|
-
|
|
907
|
-
server.tool("platform_guide", "Get automation guide for a platform (selectors, URLs, flows, error solutions). Available: devpost. Zero cost — only loads when called.", {
|
|
908
|
-
platform: z.string().describe("Platform name, e.g. 'devpost'"),
|
|
909
|
-
section: z.enum(["all", "urls", "flows", "selectors", "errors", "detection"]).optional().describe("Section to return (default: all). Use 'errors' for just error+solution pairs."),
|
|
910
|
-
}, async ({ platform, section }) => {
|
|
911
|
-
const filePath = path.resolve(playbooksDir, `${platform.toLowerCase()}.json`);
|
|
912
|
-
if (!fs.existsSync(filePath)) {
|
|
913
|
-
const available = fs.existsSync(playbooksDir)
|
|
914
|
-
? fs.readdirSync(playbooksDir).filter(f => f.endsWith(".json")).map(f => f.replace(".json", ""))
|
|
915
|
-
: [];
|
|
916
|
-
return { content: [{ type: "text", text: `No playbook for "${platform}". Available: ${available.join(", ") || "none"}` }] };
|
|
917
|
-
}
|
|
918
|
-
|
|
919
|
-
const data = JSON.parse(fs.readFileSync(filePath, "utf-8"));
|
|
920
|
-
const s = section || "all";
|
|
921
|
-
|
|
922
|
-
if (s === "errors") {
|
|
923
|
-
const errors = data.errors || [];
|
|
924
|
-
const text = errors.map((e: any, i: number) =>
|
|
925
|
-
`${i + 1}. [${e.severity}] ${e.error}\n Context: ${e.context}\n Solution: ${e.solution}`
|
|
926
|
-
).join("\n\n");
|
|
927
|
-
return { content: [{ type: "text", text: text || "No errors documented." }] };
|
|
928
|
-
}
|
|
929
|
-
|
|
930
|
-
if (s === "urls") {
|
|
931
|
-
return { content: [{ type: "text", text: JSON.stringify(data.urls, null, 2) }] };
|
|
932
|
-
}
|
|
933
|
-
|
|
934
|
-
if (s === "detection") {
|
|
935
|
-
return { content: [{ type: "text", text: JSON.stringify(data.detection, null, 2) }] };
|
|
936
|
-
}
|
|
937
|
-
|
|
938
|
-
if (s === "flows") {
|
|
939
|
-
const flows = data.flows || {};
|
|
940
|
-
const text = Object.entries(flows).map(([name, flow]: [string, any]) => {
|
|
941
|
-
const steps = (flow.steps || []).map((s: string, i: number) => ` ${i + 1}. ${s}`).join("\n");
|
|
942
|
-
const tips = (flow.tips || []).map((t: string) => ` TIP: ${t}`).join("\n");
|
|
943
|
-
return `### ${name}\n${steps}${tips ? "\n" + tips : ""}`;
|
|
944
|
-
}).join("\n\n");
|
|
945
|
-
return { content: [{ type: "text", text }] };
|
|
946
|
-
}
|
|
947
|
-
|
|
948
|
-
if (s === "selectors") {
|
|
949
|
-
const flows = data.flows || {};
|
|
950
|
-
const text = Object.entries(flows).map(([name, flow]: [string, any]) => {
|
|
951
|
-
const sels = flow.selectors || {};
|
|
952
|
-
const lines = Object.entries(sels).map(([k, v]) => ` ${k}: ${v}`).join("\n");
|
|
953
|
-
return `### ${name}\n${lines}`;
|
|
954
|
-
}).join("\n\n");
|
|
955
|
-
return { content: [{ type: "text", text }] };
|
|
956
|
-
}
|
|
957
|
-
|
|
958
|
-
// "all" — return full playbook
|
|
959
|
-
return { content: [{ type: "text", text: JSON.stringify(data, null, 2) }] };
|
|
960
|
-
});
|
|
961
|
-
|
|
962
|
-
server.tool("export_playbook", "Generate a playbook JSON from your session. Extracts URLs, selectors, errors+solutions from memory. Share the output with ScreenHand to help others automate this platform.", {
|
|
963
|
-
platform: z.string().describe("Platform name, e.g. 'linkedin', 'twitter'"),
|
|
964
|
-
domain: z.string().describe("Domain to filter actions by, e.g. 'linkedin.com'"),
|
|
965
|
-
description: z.string().optional().describe("Short description of the platform"),
|
|
966
|
-
tabId: z.string().optional().describe("Tab ID to scan current page for selectors"),
|
|
967
|
-
}, async ({ platform, domain, description, tabId }) => {
|
|
968
|
-
// 1. Pull URLs and errors from memory store
|
|
969
|
-
const actions = memoryStore.readActions();
|
|
970
|
-
const errors = memoryStore.readErrors();
|
|
971
|
-
const strategies = memoryStore.readStrategies();
|
|
972
|
-
|
|
973
|
-
const domainLower = domain.toLowerCase();
|
|
974
|
-
|
|
975
|
-
// Extract unique URLs from actions that touched this domain
|
|
976
|
-
const urlSet = new Set<string>();
|
|
977
|
-
for (const a of actions) {
|
|
978
|
-
const params = a.params as Record<string, any> || {};
|
|
979
|
-
const url = params.url || "";
|
|
980
|
-
if (typeof url === "string" && url.toLowerCase().includes(domainLower)) {
|
|
981
|
-
urlSet.add(url);
|
|
982
|
-
}
|
|
983
|
-
const result = a.result || "";
|
|
984
|
-
const urlMatch = result.match(/https?:\/\/[^\s"]+/g);
|
|
985
|
-
if (urlMatch) {
|
|
986
|
-
for (const u of urlMatch) {
|
|
987
|
-
if (u.toLowerCase().includes(domainLower)) urlSet.add(u);
|
|
988
|
-
}
|
|
989
|
-
}
|
|
990
|
-
}
|
|
991
|
-
|
|
992
|
-
// Extract errors related to this domain's tools
|
|
993
|
-
const domainErrors: Array<{ error: string; tool: string; resolution: string | null; occurrences: number }> = [];
|
|
994
|
-
for (const e of errors) {
|
|
995
|
-
const params = e.params as Record<string, any> || {};
|
|
996
|
-
const url = params.url || params.selector || "";
|
|
997
|
-
const isRelevant = (typeof url === "string" && url.toLowerCase().includes(domainLower)) ||
|
|
998
|
-
actions.some(a => {
|
|
999
|
-
const ap = a.params as Record<string, any> || {};
|
|
1000
|
-
return a.tool === e.tool && typeof ap.url === "string" && ap.url.toLowerCase().includes(domainLower);
|
|
1001
|
-
});
|
|
1002
|
-
if (isRelevant) {
|
|
1003
|
-
domainErrors.push({
|
|
1004
|
-
error: e.error,
|
|
1005
|
-
tool: e.tool,
|
|
1006
|
-
resolution: e.resolution,
|
|
1007
|
-
occurrences: e.occurrences,
|
|
1008
|
-
});
|
|
1009
|
-
}
|
|
1010
|
-
}
|
|
1011
|
-
|
|
1012
|
-
// Extract relevant strategies
|
|
1013
|
-
const domainStrategies = strategies.filter(s =>
|
|
1014
|
-
s.task.toLowerCase().includes(domainLower) ||
|
|
1015
|
-
s.task.toLowerCase().includes(platform.toLowerCase()) ||
|
|
1016
|
-
s.tags.some(t => t.toLowerCase().includes(platform.toLowerCase()))
|
|
1017
|
-
);
|
|
1018
|
-
|
|
1019
|
-
// 2. Scan current page for selectors if tab is available
|
|
1020
|
-
let pageSelectors: Record<string, string> = {};
|
|
1021
|
-
if (tabId || true) {
|
|
1022
|
-
try {
|
|
1023
|
-
const { client } = await getCDPClient(tabId);
|
|
1024
|
-
await client.Runtime.enable();
|
|
1025
|
-
const scanResult = await client.Runtime.evaluate({
|
|
1026
|
-
expression: `(() => {
|
|
1027
|
-
const url = location.href;
|
|
1028
|
-
if (!url.toLowerCase().includes(${JSON.stringify(domainLower)})) return { match: false, url };
|
|
1029
|
-
const inputs = Array.from(document.querySelectorAll('input,select,textarea,button[type="submit"]'));
|
|
1030
|
-
const selectors = {};
|
|
1031
|
-
for (const el of inputs) {
|
|
1032
|
-
const id = el.id;
|
|
1033
|
-
const name = el.name || el.getAttribute('aria-label') || el.placeholder || el.type || el.tagName.toLowerCase();
|
|
1034
|
-
const key = (id || name || '').replace(/[^a-zA-Z0-9_]/g, '_').toLowerCase();
|
|
1035
|
-
if (!key) continue;
|
|
1036
|
-
if (id) selectors[key] = '#' + id;
|
|
1037
|
-
else if (el.name) selectors[key] = '[name="' + el.name + '"]';
|
|
1038
|
-
else if (el.getAttribute('aria-label')) selectors[key] = '[aria-label="' + el.getAttribute('aria-label') + '"]';
|
|
1039
|
-
}
|
|
1040
|
-
return { match: true, url, selectors };
|
|
1041
|
-
})()`,
|
|
1042
|
-
returnByValue: true,
|
|
1043
|
-
});
|
|
1044
|
-
await client.close();
|
|
1045
|
-
if (scanResult.result.value?.match) {
|
|
1046
|
-
pageSelectors = scanResult.result.value.selectors || {};
|
|
1047
|
-
}
|
|
1048
|
-
} catch {
|
|
1049
|
-
// No browser or wrong page — skip selector scan
|
|
1050
|
-
}
|
|
1051
|
-
}
|
|
1052
|
-
|
|
1053
|
-
// 3. Build playbook JSON
|
|
1054
|
-
const playbook = {
|
|
1055
|
-
platform: platform.toLowerCase(),
|
|
1056
|
-
version: "1.0.0",
|
|
1057
|
-
updated: new Date().toISOString().slice(0, 10),
|
|
1058
|
-
description: description || `Automation playbook for ${platform}`,
|
|
1059
|
-
urls: Object.fromEntries(
|
|
1060
|
-
Array.from(urlSet).sort().map((u, i) => {
|
|
1061
|
-
const urlObj = new URL(u);
|
|
1062
|
-
const pathKey = urlObj.pathname.replace(/^\//, "").replace(/\//g, "_").replace(/[^a-zA-Z0-9_]/g, "") || "home";
|
|
1063
|
-
return [pathKey, u];
|
|
1064
|
-
})
|
|
1065
|
-
),
|
|
1066
|
-
flows: {
|
|
1067
|
-
discovered: {
|
|
1068
|
-
steps: domainStrategies.length > 0
|
|
1069
|
-
? domainStrategies[0]!.steps.map((s: any) => `${s.tool}(${JSON.stringify(s.params)})`)
|
|
1070
|
-
: ["No strategies recorded yet. Use the platform, then call export_playbook again."],
|
|
1071
|
-
selectors: pageSelectors,
|
|
1072
|
-
},
|
|
1073
|
-
},
|
|
1074
|
-
detection: {
|
|
1075
|
-
is_logged_in: "// Add detection JS for logged-in state",
|
|
1076
|
-
},
|
|
1077
|
-
errors: domainErrors.map(e => ({
|
|
1078
|
-
error: e.error,
|
|
1079
|
-
context: `Tool: ${e.tool} (${e.occurrences}x)`,
|
|
1080
|
-
solution: e.resolution || "No resolution recorded yet. Fix it and call memory_save.",
|
|
1081
|
-
severity: e.occurrences >= 3 ? "high" : "medium",
|
|
1082
|
-
})),
|
|
1083
|
-
_meta: {
|
|
1084
|
-
exported_from: "screenhand",
|
|
1085
|
-
actions_count: actions.filter(a => {
|
|
1086
|
-
const p = a.params as Record<string, any> || {};
|
|
1087
|
-
return typeof p.url === "string" && p.url.toLowerCase().includes(domainLower);
|
|
1088
|
-
}).length,
|
|
1089
|
-
strategies_count: domainStrategies.length,
|
|
1090
|
-
},
|
|
1091
|
-
};
|
|
1092
|
-
|
|
1093
|
-
// 4. Save to playbooks dir
|
|
1094
|
-
const outPath = path.resolve(playbooksDir, `${platform.toLowerCase()}.json`);
|
|
1095
|
-
const exists = fs.existsSync(outPath);
|
|
1096
|
-
|
|
1097
|
-
if (!fs.existsSync(playbooksDir)) fs.mkdirSync(playbooksDir, { recursive: true });
|
|
1098
|
-
fs.writeFileSync(outPath, JSON.stringify(playbook, null, 2));
|
|
1099
|
-
|
|
1100
|
-
return {
|
|
1101
|
-
content: [{
|
|
1102
|
-
type: "text",
|
|
1103
|
-
text: `${exists ? "Updated" : "Created"} playbook: playbooks/${platform.toLowerCase()}.json\n\n` +
|
|
1104
|
-
`URLs found: ${urlSet.size}\n` +
|
|
1105
|
-
`Selectors found: ${Object.keys(pageSelectors).length}\n` +
|
|
1106
|
-
`Errors documented: ${domainErrors.length}\n` +
|
|
1107
|
-
`Strategies: ${domainStrategies.length}\n\n` +
|
|
1108
|
-
`Share this file to help others automate ${platform}.\n\n` +
|
|
1109
|
-
JSON.stringify(playbook, null, 2),
|
|
1110
|
-
}],
|
|
1111
|
-
};
|
|
1112
|
-
});
|
|
1113
|
-
|
|
1114
|
-
// ═══════════════════════════════════════════════
|
|
1115
|
-
// APPLESCRIPT — control scriptable apps directly
|
|
1116
|
-
// ═══════════════════════════════════════════════
|
|
1117
|
-
|
|
1118
|
-
server.tool("applescript", "Run an AppleScript command. For controlling Finder, Safari, Mail, Notes, etc. (macOS only). WARNING: Executes arbitrary AppleScript — can perform destructive actions (delete files, send emails). All executions are audit-logged.", {
|
|
1119
|
-
script: z.string().describe("AppleScript code to execute"),
|
|
1120
|
-
}, async ({ script }) => {
|
|
1121
|
-
auditLog("applescript", { script: script.slice(0, 500) });
|
|
1122
|
-
if (process.platform === "win32") {
|
|
1123
|
-
return { content: [{ type: "text", text: "AppleScript is not supported on Windows. Use ui_tree, ui_press, and other accessibility tools instead." }] };
|
|
1124
|
-
}
|
|
1125
|
-
try {
|
|
1126
|
-
const result = execSync(`osascript -e '${script.replace(/'/g, "'\\''")}'`, {
|
|
1127
|
-
encoding: "utf-8",
|
|
1128
|
-
timeout: 15000,
|
|
1129
|
-
}).trim();
|
|
1130
|
-
return { content: [{ type: "text", text: result || "(no output)" }] };
|
|
1131
|
-
} catch (e: any) {
|
|
1132
|
-
return { content: [{ type: "text", text: "Error: " + (e.stderr || e.message) }] };
|
|
1133
|
-
}
|
|
1134
|
-
});
|
|
1135
|
-
|
|
1136
|
-
// ═══════════════════════════════════════════════
|
|
1137
|
-
// MEMORY — recall past strategies and error patterns
|
|
1138
|
-
// ═══════════════════════════════════════════════
|
|
1139
|
-
|
|
1140
|
-
originalTool("memory_recall", "Have I done something like this before? Searches past successful strategies by keyword similarity.", {
|
|
1141
|
-
task: z.string().describe("Describe the task you want to accomplish"),
|
|
1142
|
-
limit: z.number().optional().describe("Max results (default 5)"),
|
|
1143
|
-
}, async ({ task, limit }) => {
|
|
1144
|
-
const matches = recallEngine.recallStrategies(task, limit ?? 5);
|
|
1145
|
-
if (matches.length === 0) {
|
|
1146
|
-
return { content: [{ type: "text" as const, text: "No matching strategies found. Try memory_save after completing a task to build up knowledge." }] };
|
|
1147
|
-
}
|
|
1148
|
-
const text = matches.map((m, i) => {
|
|
1149
|
-
const steps = m.steps.map((s, j) => ` ${j + 1}. ${s.tool}(${JSON.stringify(s.params)})`).join("\n");
|
|
1150
|
-
return `${i + 1}. "${m.task}" (used ${m.successCount}x, score: ${m.score.toFixed(2)})\n${steps}`;
|
|
1151
|
-
}).join("\n\n");
|
|
1152
|
-
return { content: [{ type: "text" as const, text }] };
|
|
1153
|
-
});
|
|
1154
|
-
|
|
1155
|
-
originalTool("memory_save", "This approach worked — remember it. Saves the current session's action sequence as a reusable strategy.", {
|
|
1156
|
-
task: z.string().describe("Short description of the task that was accomplished"),
|
|
1157
|
-
tags: z.array(z.string()).optional().describe("Optional tags for easier recall"),
|
|
1158
|
-
}, async ({ task, tags }) => {
|
|
1159
|
-
const strategy = sessionTracker.endSession(true, task);
|
|
1160
|
-
if (!strategy) {
|
|
1161
|
-
return { content: [{ type: "text" as const, text: "No actions recorded in the current session. Perform some tool calls first, then save." }] };
|
|
1162
|
-
}
|
|
1163
|
-
if (tags && tags.length > 0) {
|
|
1164
|
-
strategy.tags = [...new Set([...strategy.tags, ...tags])];
|
|
1165
|
-
// Re-save with updated tags
|
|
1166
|
-
memoryStore.appendStrategy(strategy);
|
|
1167
|
-
}
|
|
1168
|
-
return { content: [{ type: "text" as const, text: `Saved strategy "${task}" with ${strategy.steps.length} steps. Tags: ${strategy.tags.join(", ")}` }] };
|
|
1169
|
-
});
|
|
1170
|
-
|
|
1171
|
-
originalTool("memory_errors", "What goes wrong with this tool? Shows known error patterns and resolutions.", {
|
|
1172
|
-
tool: z.string().optional().describe("Tool name to filter by (omit for all errors)"),
|
|
1173
|
-
}, async ({ tool }) => {
|
|
1174
|
-
const errors = recallEngine.recallErrors(tool);
|
|
1175
|
-
if (errors.length === 0) {
|
|
1176
|
-
return { content: [{ type: "text" as const, text: tool ? `No known error patterns for "${tool}".` : "No error patterns recorded yet." }] };
|
|
1177
|
-
}
|
|
1178
|
-
const text = errors.map((e, i) =>
|
|
1179
|
-
`${i + 1}. ${e.tool}: "${e.error}" (${e.occurrences}x)${e.resolution ? `\n Fix: ${e.resolution}` : ""}`
|
|
1180
|
-
).join("\n");
|
|
1181
|
-
return { content: [{ type: "text" as const, text }] };
|
|
1182
|
-
});
|
|
1183
|
-
|
|
1184
|
-
originalTool("memory_stats", "How much have I learned? Shows total actions, strategies, error patterns, and success rates.", {}, async () => {
|
|
1185
|
-
const stats = memoryStore.getStats();
|
|
1186
|
-
const lines = [
|
|
1187
|
-
`Actions logged: ${stats.totalActions}`,
|
|
1188
|
-
`Strategies saved: ${stats.totalStrategies}`,
|
|
1189
|
-
`Error patterns: ${stats.totalErrors}`,
|
|
1190
|
-
`Success rate: ${(stats.successRate * 100).toFixed(1)}%`,
|
|
1191
|
-
`Disk usage: ${(stats.diskUsageBytes / 1024).toFixed(1)} KB`,
|
|
1192
|
-
];
|
|
1193
|
-
if (stats.topTools.length > 0) {
|
|
1194
|
-
lines.push("", "Top tools:");
|
|
1195
|
-
for (const t of stats.topTools) {
|
|
1196
|
-
lines.push(` ${t.tool}: ${t.count} calls`);
|
|
1197
|
-
}
|
|
1198
|
-
}
|
|
1199
|
-
return { content: [{ type: "text" as const, text: lines.join("\n") }] };
|
|
1200
|
-
});
|
|
1201
|
-
|
|
1202
|
-
originalTool("memory_clear", "Forget everything or just a specific category. Clears stored memory data.", {
|
|
1203
|
-
what: z.enum(["all", "actions", "strategies", "errors"]).describe("What to clear"),
|
|
1204
|
-
}, async ({ what }) => {
|
|
1205
|
-
memoryStore.clear(what);
|
|
1206
|
-
return { content: [{ type: "text" as const, text: `Cleared ${what === "all" ? "all memory data" : what}.` }] };
|
|
1207
|
-
});
|
|
1208
|
-
|
|
1209
|
-
// ═══════════════════════════════════════════════
|
|
1210
|
-
// START
|
|
1211
|
-
// ═══════════════════════════════════════════════
|
|
1212
|
-
|
|
1213
|
-
async function main() {
|
|
1214
|
-
const transport = new StdioServerTransport();
|
|
1215
|
-
await server.connect(transport);
|
|
1216
|
-
}
|
|
1217
|
-
|
|
1218
|
-
main().catch((err) => {
|
|
1219
|
-
process.stderr.write("MCP server error: " + err.message + "\n");
|
|
1220
|
-
process.exit(1);
|
|
1221
|
-
});
|