screenhand 0.2.0 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +165 -446
- package/bin/darwin-arm64/macos-bridge +0 -0
- package/dist/mcp-desktop.js +3615 -400
- package/dist/scripts/export-help-center.js +112 -0
- package/dist/scripts/marketing-loop.js +117 -0
- package/dist/scripts/observer-daemon.js +288 -0
- package/dist/scripts/orchestrator-daemon.js +399 -0
- package/dist/scripts/threads-campaign.js +208 -0
- package/dist/src/community/fetcher.js +109 -0
- package/dist/src/community/index.js +6 -0
- package/dist/src/community/publisher.js +191 -0
- package/dist/src/community/remote-api.js +121 -0
- package/dist/src/community/types.js +3 -0
- package/dist/src/community/validator.js +95 -0
- package/dist/src/context-tracker.js +489 -0
- package/dist/src/ingestion/coverage-auditor.js +233 -0
- package/dist/src/ingestion/doc-parser.js +164 -0
- package/dist/src/ingestion/index.js +8 -0
- package/dist/src/ingestion/menu-scanner.js +152 -0
- package/dist/src/ingestion/reference-merger.js +186 -0
- package/dist/src/ingestion/shortcut-extractor.js +180 -0
- package/dist/src/ingestion/tutorial-extractor.js +170 -0
- package/dist/src/ingestion/types.js +3 -0
- package/dist/src/jobs/manager.js +82 -14
- package/dist/src/jobs/runner.js +138 -15
- package/dist/src/learning/engine.js +356 -0
- package/dist/src/learning/index.js +9 -0
- package/dist/src/learning/locator-policy.js +120 -0
- package/dist/src/learning/pattern-policy.js +89 -0
- package/dist/src/learning/recovery-policy.js +116 -0
- package/dist/src/learning/sensor-policy.js +115 -0
- package/dist/src/learning/timing-model.js +204 -0
- package/dist/src/learning/topology-policy.js +90 -0
- package/dist/src/learning/types.js +9 -0
- package/dist/src/logging/timeline-logger.js +4 -1
- package/dist/src/memory/playbook-seeds.js +200 -0
- package/dist/src/memory/recall.js +60 -8
- package/dist/src/memory/service.js +30 -5
- package/dist/src/memory/store.js +34 -5
- package/dist/src/native/bridge-client.js +253 -31
- package/dist/src/observer/state.js +199 -0
- package/dist/src/observer/types.js +43 -0
- package/dist/src/orchestrator/state.js +68 -0
- package/dist/src/orchestrator/types.js +22 -0
- package/dist/src/perception/ax-source.js +162 -0
- package/dist/src/perception/cdp-source.js +162 -0
- package/dist/src/perception/coordinator.js +771 -0
- package/dist/src/perception/frame-differ.js +287 -0
- package/dist/src/perception/index.js +22 -0
- package/dist/src/perception/manager.js +199 -0
- package/dist/src/perception/types.js +47 -0
- package/dist/src/perception/vision-source.js +399 -0
- package/dist/src/planner/deterministic.js +298 -0
- package/dist/src/planner/executor.js +870 -0
- package/dist/src/planner/goal-store.js +92 -0
- package/dist/src/planner/index.js +21 -0
- package/dist/src/planner/planner.js +520 -0
- package/dist/src/planner/tool-registry.js +71 -0
- package/dist/src/planner/types.js +22 -0
- package/dist/src/platform/explorer.js +213 -0
- package/dist/src/platform/help-center-markdown.js +527 -0
- package/dist/src/platform/learner.js +257 -0
- package/dist/src/playbook/engine.js +296 -11
- package/dist/src/playbook/mcp-recorder.js +204 -0
- package/dist/src/playbook/recorder.js +3 -2
- package/dist/src/playbook/runner.js +1 -1
- package/dist/src/playbook/store.js +139 -10
- package/dist/src/recovery/detectors.js +156 -0
- package/dist/src/recovery/engine.js +327 -0
- package/dist/src/recovery/index.js +20 -0
- package/dist/src/recovery/strategies.js +274 -0
- package/dist/src/recovery/types.js +20 -0
- package/dist/src/runtime/accessibility-adapter.js +55 -18
- package/dist/src/runtime/applescript-adapter.js +8 -2
- package/dist/src/runtime/cdp-chrome-adapter.js +1 -1
- package/dist/src/runtime/executor.js +23 -3
- package/dist/src/runtime/locator-cache.js +24 -2
- package/dist/src/runtime/service.js +59 -15
- package/dist/src/runtime/session-manager.js +4 -1
- package/dist/src/runtime/vision-adapter.js +2 -1
- package/dist/src/state/app-map-types.js +72 -0
- package/dist/src/state/app-map.js +1974 -0
- package/dist/src/state/entity-tracker.js +108 -0
- package/dist/src/state/fusion.js +96 -0
- package/dist/src/state/index.js +21 -0
- package/dist/src/state/ladder-generator.js +236 -0
- package/dist/src/state/persistence.js +156 -0
- package/dist/src/state/types.js +17 -0
- package/dist/src/state/world-model.js +1456 -0
- package/dist/src/util/atomic-write.js +19 -4
- package/dist/src/util/sanitize.js +146 -0
- package/dist-app-maps/com.figma.Desktop.json +959 -0
- package/dist-app-maps/com.hnc.Discord.json +1146 -0
- package/dist-app-maps/notion.id.json +2831 -0
- package/dist-playbooks/canva-screenhand-carousel.json +445 -0
- package/dist-playbooks/codex-desktop.json +76 -0
- package/dist-playbooks/competitor-research-stack.json +122 -0
- package/dist-playbooks/davinci-color-grade.json +153 -0
- package/dist-playbooks/davinci-edit-timeline.json +162 -0
- package/dist-playbooks/davinci-render.json +114 -0
- package/dist-playbooks/devto.json +52 -0
- package/dist-playbooks/discord.json +41 -0
- package/dist-playbooks/google-flow-create-project.json +59 -0
- package/dist-playbooks/google-flow-edit-image.json +90 -0
- package/dist-playbooks/google-flow-edit-video.json +90 -0
- package/dist-playbooks/google-flow-generate-image.json +68 -0
- package/dist-playbooks/google-flow-generate-video.json +191 -0
- package/dist-playbooks/google-flow-open-project.json +48 -0
- package/dist-playbooks/google-flow-open-scenebuilder.json +64 -0
- package/dist-playbooks/google-flow-search-assets.json +64 -0
- package/dist-playbooks/instagram.json +57 -0
- package/dist-playbooks/linkedin.json +52 -0
- package/dist-playbooks/n8n.json +43 -0
- package/dist-playbooks/reddit.json +52 -0
- package/dist-playbooks/threads.json +59 -0
- package/dist-playbooks/x-twitter.json +59 -0
- package/dist-playbooks/youtube.json +59 -0
- package/dist-references/canva.json +646 -0
- package/dist-references/codex-desktop.json +305 -0
- package/dist-references/davinci-resolve-keyboard.json +594 -0
- package/dist-references/davinci-resolve-menu-map.json +1139 -0
- package/dist-references/davinci-resolve-menus-batch1.json +116 -0
- package/dist-references/davinci-resolve-menus-batch2.json +372 -0
- package/dist-references/davinci-resolve-menus-batch3.json +330 -0
- package/dist-references/davinci-resolve-menus-batch4.json +297 -0
- package/dist-references/davinci-resolve-shortcuts.json +333 -0
- package/dist-references/devpost.json +186 -0
- package/dist-references/devto.json +317 -0
- package/dist-references/discord.json +549 -0
- package/dist-references/figma.json +1186 -0
- package/dist-references/finder.json +146 -0
- package/dist-references/google-ads-transparency.json +95 -0
- package/dist-references/google-flow.json +649 -0
- package/dist-references/instagram.json +341 -0
- package/dist-references/linkedin.json +324 -0
- package/dist-references/meta-ad-library.json +86 -0
- package/dist-references/n8n.json +387 -0
- package/dist-references/notes.json +27 -0
- package/dist-references/notion.json +163 -0
- package/dist-references/reddit.json +341 -0
- package/dist-references/threads.json +337 -0
- package/dist-references/x-twitter.json +403 -0
- package/dist-references/youtube.json +373 -0
- package/native/macos-bridge/Package.swift +22 -0
- package/native/macos-bridge/Sources/AccessibilityBridge.swift +482 -0
- package/native/macos-bridge/Sources/AppManagement.swift +339 -0
- package/native/macos-bridge/Sources/CoreGraphicsBridge.swift +537 -0
- package/native/macos-bridge/Sources/ObserverBridge.swift +120 -0
- package/native/macos-bridge/Sources/StreamCapture.swift +136 -0
- package/native/macos-bridge/Sources/VisionBridge.swift +238 -0
- package/native/macos-bridge/Sources/main.swift +498 -0
- package/native/windows-bridge/AppManagement.cs +234 -0
- package/native/windows-bridge/InputBridge.cs +436 -0
- package/native/windows-bridge/Program.cs +270 -0
- package/native/windows-bridge/ScreenCapture.cs +453 -0
- package/native/windows-bridge/UIAutomationBridge.cs +571 -0
- package/native/windows-bridge/WindowsBridge.csproj +17 -0
- package/package.json +12 -1
- package/scripts/postinstall.cjs +127 -0
- package/dist/.audit-log.jsonl +0 -55
- package/dist/.screenhand/memory/.lock +0 -1
- package/dist/.screenhand/memory/actions.jsonl +0 -85
- package/dist/.screenhand/memory/errors.jsonl +0 -5
- package/dist/.screenhand/memory/errors.jsonl.bak +0 -4
- package/dist/.screenhand/memory/state.json +0 -35
- package/dist/.screenhand/memory/state.json.bak +0 -35
- package/dist/.screenhand/memory/strategies.jsonl +0 -12
- package/dist/agent/cli.js +0 -73
- package/dist/agent/loop.js +0 -258
- package/dist/config.js +0 -9
- package/dist/index.js +0 -56
- package/dist/logging/timeline-logger.js +0 -29
- package/dist/mcp/mcp-stdio-server.js +0 -448
- package/dist/mcp/server.js +0 -347
- package/dist/mcp-entry.js +0 -59
- package/dist/memory/recall.js +0 -160
- package/dist/memory/research.js +0 -98
- package/dist/memory/seeds.js +0 -89
- package/dist/memory/session.js +0 -161
- package/dist/memory/store.js +0 -391
- package/dist/memory/types.js +0 -4
- package/dist/monitor/codex-monitor.js +0 -377
- package/dist/monitor/task-queue.js +0 -84
- package/dist/monitor/types.js +0 -49
- package/dist/native/bridge-client.js +0 -174
- package/dist/native/macos-bridge-client.js +0 -5
- package/dist/npm-publish-helper.js +0 -117
- package/dist/npm-token-cdp.js +0 -113
- package/dist/npm-token-create.js +0 -135
- package/dist/npm-token-finish.js +0 -126
- package/dist/playbook/engine.js +0 -193
- package/dist/playbook/index.js +0 -4
- package/dist/playbook/recorder.js +0 -519
- package/dist/playbook/runner.js +0 -392
- package/dist/playbook/store.js +0 -166
- package/dist/playbook/types.js +0 -4
- package/dist/runtime/accessibility-adapter.js +0 -377
- package/dist/runtime/app-adapter.js +0 -48
- package/dist/runtime/applescript-adapter.js +0 -283
- package/dist/runtime/ax-role-map.js +0 -80
- package/dist/runtime/browser-adapter.js +0 -36
- package/dist/runtime/cdp-chrome-adapter.js +0 -505
- package/dist/runtime/composite-adapter.js +0 -205
- package/dist/runtime/executor.js +0 -250
- package/dist/runtime/locator-cache.js +0 -12
- package/dist/runtime/planning-loop.js +0 -47
- package/dist/runtime/service.js +0 -372
- package/dist/runtime/session-manager.js +0 -28
- package/dist/runtime/state-observer.js +0 -105
- package/dist/runtime/vision-adapter.js +0 -208
- package/dist/test-mcp-protocol.js +0 -138
- package/dist/types.js +0 -1
|
@@ -1,448 +0,0 @@
|
|
|
1
|
-
import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
|
|
2
|
-
import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
|
|
3
|
-
import { z } from "zod";
|
|
4
|
-
// ── Schema building blocks ──
|
|
5
|
-
const TargetSchema = z.union([
|
|
6
|
-
z.string().describe("Shorthand: text to find, or 'css=...' / 'text=...' / 'ax_id=...' prefix"),
|
|
7
|
-
z.object({
|
|
8
|
-
text: z.string(),
|
|
9
|
-
exact: z.boolean().optional(),
|
|
10
|
-
}).describe("Find by visible text"),
|
|
11
|
-
z.object({
|
|
12
|
-
role: z.string(),
|
|
13
|
-
name: z.string(),
|
|
14
|
-
exact: z.boolean().optional(),
|
|
15
|
-
}).describe("Find by ARIA/AX role and accessible name"),
|
|
16
|
-
z.object({
|
|
17
|
-
selector: z.string(),
|
|
18
|
-
}).describe("Find by CSS selector (browser) or AX identifier (desktop)"),
|
|
19
|
-
z.object({
|
|
20
|
-
x: z.number(),
|
|
21
|
-
y: z.number(),
|
|
22
|
-
}).describe("Click at screen coordinates"),
|
|
23
|
-
z.object({
|
|
24
|
-
attribute: z.string(),
|
|
25
|
-
value: z.string(),
|
|
26
|
-
}).describe("Find by accessibility attribute"),
|
|
27
|
-
]);
|
|
28
|
-
const WaitConditionSchema = z.object({
|
|
29
|
-
type: z.enum([
|
|
30
|
-
"selector_visible",
|
|
31
|
-
"selector_hidden",
|
|
32
|
-
"url_matches",
|
|
33
|
-
"text_appears",
|
|
34
|
-
"spinner_disappears",
|
|
35
|
-
"element_exists",
|
|
36
|
-
"element_gone",
|
|
37
|
-
"window_title_matches",
|
|
38
|
-
"app_idle",
|
|
39
|
-
]),
|
|
40
|
-
selector: z.string().optional(),
|
|
41
|
-
regex: z.string().optional(),
|
|
42
|
-
text: z.string().optional(),
|
|
43
|
-
target: TargetSchema.optional(),
|
|
44
|
-
bundleId: z.string().optional(),
|
|
45
|
-
timeoutMs: z.number().optional(),
|
|
46
|
-
}).describe("Condition to wait for");
|
|
47
|
-
const RegionSchema = z.object({
|
|
48
|
-
x: z.number(),
|
|
49
|
-
y: z.number(),
|
|
50
|
-
width: z.number(),
|
|
51
|
-
height: z.number(),
|
|
52
|
-
});
|
|
53
|
-
// ── Target parser ──
|
|
54
|
-
function parseTarget(input) {
|
|
55
|
-
if (typeof input === "string") {
|
|
56
|
-
if (input.startsWith("css="))
|
|
57
|
-
return { type: "selector", value: input.slice(4) };
|
|
58
|
-
if (input.startsWith("text="))
|
|
59
|
-
return { type: "text", value: input.slice(5), exact: true };
|
|
60
|
-
if (input.startsWith("ax_id="))
|
|
61
|
-
return { type: "ax_attribute", attribute: "identifier", value: input.slice(6) };
|
|
62
|
-
return { type: "text", value: input };
|
|
63
|
-
}
|
|
64
|
-
const obj = input;
|
|
65
|
-
if (typeof obj.selector === "string")
|
|
66
|
-
return { type: "selector", value: obj.selector };
|
|
67
|
-
if (typeof obj.text === "string")
|
|
68
|
-
return { type: "text", value: obj.text, exact: obj.exact === true };
|
|
69
|
-
if (typeof obj.role === "string" && typeof obj.name === "string")
|
|
70
|
-
return { type: "role", role: obj.role, name: obj.name, exact: obj.exact === true };
|
|
71
|
-
if (typeof obj.x === "number" && typeof obj.y === "number")
|
|
72
|
-
return { type: "coordinates", x: obj.x, y: obj.y };
|
|
73
|
-
if (typeof obj.attribute === "string" && typeof obj.value === "string")
|
|
74
|
-
return { type: "ax_attribute", attribute: obj.attribute, value: obj.value };
|
|
75
|
-
throw new Error("Invalid target");
|
|
76
|
-
}
|
|
77
|
-
function parseWaitCondition(input) {
|
|
78
|
-
const obj = input;
|
|
79
|
-
const type = obj.type;
|
|
80
|
-
switch (type) {
|
|
81
|
-
case "selector_visible": return { type: "selector_visible", selector: obj.selector };
|
|
82
|
-
case "selector_hidden": return { type: "selector_hidden", selector: obj.selector };
|
|
83
|
-
case "url_matches": return { type: "url_matches", regex: obj.regex };
|
|
84
|
-
case "text_appears": return { type: "text_appears", text: obj.text };
|
|
85
|
-
case "spinner_disappears": return { type: "spinner_disappears", selector: obj.selector };
|
|
86
|
-
case "element_exists": return { type: "element_exists", target: parseTarget(obj.target) };
|
|
87
|
-
case "element_gone": return { type: "element_gone", target: parseTarget(obj.target) };
|
|
88
|
-
case "window_title_matches": return { type: "window_title_matches", regex: obj.regex };
|
|
89
|
-
case "app_idle": {
|
|
90
|
-
const cond = { type: "app_idle", bundleId: obj.bundleId };
|
|
91
|
-
if (typeof obj.timeoutMs === "number")
|
|
92
|
-
cond.timeoutMs = obj.timeoutMs;
|
|
93
|
-
return cond;
|
|
94
|
-
}
|
|
95
|
-
default: throw new Error(`Unknown wait condition type: ${type}`);
|
|
96
|
-
}
|
|
97
|
-
}
|
|
98
|
-
// ── Helpers ──
|
|
99
|
-
function ok(data) {
|
|
100
|
-
return { content: [{ type: "text", text: JSON.stringify(data, null, 2) }] };
|
|
101
|
-
}
|
|
102
|
-
function err(message) {
|
|
103
|
-
return { content: [{ type: "text", text: message }], isError: true };
|
|
104
|
-
}
|
|
105
|
-
// ── Server builder ──
|
|
106
|
-
export function createMcpStdioServer(runtime) {
|
|
107
|
-
const mcp = new McpServer({ name: "screenhand", version: "0.1.0" }, {
|
|
108
|
-
capabilities: { tools: {} },
|
|
109
|
-
instructions: "ScreenHand gives AI agents eyes and hands on the desktop. Use session_start to begin, then call tools to control apps.",
|
|
110
|
-
});
|
|
111
|
-
// ── session_start ──
|
|
112
|
-
mcp.tool("session_start", "Start a new automation session. Returns a sessionId needed by all other tools. Automatically attaches to the frontmost app.", { profile: z.string().optional().describe("Session profile name (default: 'automation')") }, async ({ profile }) => {
|
|
113
|
-
try {
|
|
114
|
-
const session = await runtime.sessionStart(profile);
|
|
115
|
-
return ok(session);
|
|
116
|
-
}
|
|
117
|
-
catch (e) {
|
|
118
|
-
return err(`Failed to start session: ${e instanceof Error ? e.message : String(e)}`);
|
|
119
|
-
}
|
|
120
|
-
});
|
|
121
|
-
// ── press ──
|
|
122
|
-
mcp.tool("press", "Click/press a UI element. Finds the element by text, role, selector, or coordinates, then clicks it.", {
|
|
123
|
-
sessionId: z.string().describe("Session ID from session_start"),
|
|
124
|
-
target: TargetSchema.describe("What to click — text string, {role, name}, {selector}, or {x, y}"),
|
|
125
|
-
verify: WaitConditionSchema.optional().describe("Optional condition to verify after clicking"),
|
|
126
|
-
}, async ({ sessionId, target, verify }) => {
|
|
127
|
-
const input = { sessionId, target: parseTarget(target) };
|
|
128
|
-
if (verify)
|
|
129
|
-
input.verify = parseWaitCondition(verify);
|
|
130
|
-
const result = await runtime.press(input);
|
|
131
|
-
return result.ok ? ok(result) : err(result.error.message);
|
|
132
|
-
});
|
|
133
|
-
// ── type_into ──
|
|
134
|
-
mcp.tool("type_into", "Type text into a UI element (text field, search box, etc). Locates the field, optionally clears it, then types.", {
|
|
135
|
-
sessionId: z.string(),
|
|
136
|
-
target: TargetSchema.describe("The input field to type into"),
|
|
137
|
-
text: z.string().describe("Text to type"),
|
|
138
|
-
clear: z.boolean().optional().describe("Clear the field first (default: true)"),
|
|
139
|
-
verify: WaitConditionSchema.optional(),
|
|
140
|
-
}, async ({ sessionId, target, text, clear, verify }) => {
|
|
141
|
-
const input = { sessionId, target: parseTarget(target), text };
|
|
142
|
-
if (typeof clear === "boolean")
|
|
143
|
-
input.clear = clear;
|
|
144
|
-
if (verify)
|
|
145
|
-
input.verify = parseWaitCondition(verify);
|
|
146
|
-
const result = await runtime.typeInto(input);
|
|
147
|
-
return result.ok ? ok(result) : err(result.error.message);
|
|
148
|
-
});
|
|
149
|
-
// ── navigate ──
|
|
150
|
-
mcp.tool("navigate", "Navigate a browser to a URL, or open an app via 'app://com.bundle.id'.", {
|
|
151
|
-
sessionId: z.string(),
|
|
152
|
-
url: z.string().describe("URL to navigate to, or 'app://bundleId' to launch an app"),
|
|
153
|
-
timeoutMs: z.number().optional().describe("Navigation timeout in ms (default: 10000)"),
|
|
154
|
-
}, async ({ sessionId, url, timeoutMs }) => {
|
|
155
|
-
const input = { sessionId, url };
|
|
156
|
-
if (typeof timeoutMs === "number")
|
|
157
|
-
input.timeoutMs = timeoutMs;
|
|
158
|
-
const result = await runtime.navigate(input);
|
|
159
|
-
return result.ok ? ok(result) : err(result.error.message);
|
|
160
|
-
});
|
|
161
|
-
// ── wait_for ──
|
|
162
|
-
mcp.tool("wait_for", "Wait for a condition: element appears/disappears, text appears, URL changes, window title matches, etc.", {
|
|
163
|
-
sessionId: z.string(),
|
|
164
|
-
condition: WaitConditionSchema,
|
|
165
|
-
timeoutMs: z.number().optional().describe("Timeout in ms (default: 2000)"),
|
|
166
|
-
}, async ({ sessionId, condition, timeoutMs }) => {
|
|
167
|
-
const input = { sessionId, condition: parseWaitCondition(condition) };
|
|
168
|
-
if (typeof timeoutMs === "number")
|
|
169
|
-
input.timeoutMs = timeoutMs;
|
|
170
|
-
const result = await runtime.waitFor(input);
|
|
171
|
-
return result.ok ? ok(result) : err(result.error.message);
|
|
172
|
-
});
|
|
173
|
-
// ── extract ──
|
|
174
|
-
mcp.tool("extract", "Extract data from a UI element. Returns text content, table data, or structured JSON from the element.", {
|
|
175
|
-
sessionId: z.string(),
|
|
176
|
-
target: TargetSchema,
|
|
177
|
-
format: z.enum(["text", "table", "json"]).describe("Output format"),
|
|
178
|
-
}, async ({ sessionId, target, format }) => {
|
|
179
|
-
const result = await runtime.extract({
|
|
180
|
-
sessionId,
|
|
181
|
-
target: parseTarget(target),
|
|
182
|
-
format: format,
|
|
183
|
-
});
|
|
184
|
-
return result.ok ? ok(result) : err(result.error.message);
|
|
185
|
-
});
|
|
186
|
-
// ── screenshot ──
|
|
187
|
-
mcp.tool("screenshot", "Capture a screenshot of the current app window or a specific screen region. Returns the file path.", {
|
|
188
|
-
sessionId: z.string(),
|
|
189
|
-
region: RegionSchema.optional().describe("Optional screen region to capture"),
|
|
190
|
-
}, async ({ sessionId, region }) => {
|
|
191
|
-
const input = { sessionId };
|
|
192
|
-
if (region)
|
|
193
|
-
input.region = region;
|
|
194
|
-
const result = await runtime.screenshot(input);
|
|
195
|
-
return result.ok ? ok(result) : err(result.error.message);
|
|
196
|
-
});
|
|
197
|
-
// ── app_launch ──
|
|
198
|
-
mcp.tool("app_launch", "Launch a macOS/Windows application by bundle ID (e.g., 'com.apple.Safari', 'com.google.Chrome').", {
|
|
199
|
-
sessionId: z.string(),
|
|
200
|
-
bundleId: z.string().describe("macOS bundle ID or Windows process name"),
|
|
201
|
-
}, async ({ sessionId, bundleId }) => {
|
|
202
|
-
const result = await runtime.appLaunch({ sessionId, bundleId });
|
|
203
|
-
return result.ok ? ok(result) : err(result.error.message);
|
|
204
|
-
});
|
|
205
|
-
// ── app_focus ──
|
|
206
|
-
mcp.tool("app_focus", "Bring a running application to the foreground.", {
|
|
207
|
-
sessionId: z.string(),
|
|
208
|
-
bundleId: z.string(),
|
|
209
|
-
}, async ({ sessionId, bundleId }) => {
|
|
210
|
-
const result = await runtime.appFocus({ sessionId, bundleId });
|
|
211
|
-
return result.ok ? ok(result) : err(result.error.message);
|
|
212
|
-
});
|
|
213
|
-
// ── app_list ──
|
|
214
|
-
mcp.tool("app_list", "List all running applications with their bundle IDs, names, and PIDs.", { sessionId: z.string() }, async ({ sessionId }) => {
|
|
215
|
-
const result = await runtime.appList(sessionId);
|
|
216
|
-
return result.ok ? ok(result) : err(result.error.message);
|
|
217
|
-
});
|
|
218
|
-
// ── window_list ──
|
|
219
|
-
mcp.tool("window_list", "List all visible windows with their titles, positions, and sizes.", { sessionId: z.string() }, async ({ sessionId }) => {
|
|
220
|
-
const result = await runtime.windowList(sessionId);
|
|
221
|
-
return result.ok ? ok(result) : err(result.error.message);
|
|
222
|
-
});
|
|
223
|
-
// ── menu_click ──
|
|
224
|
-
mcp.tool("menu_click", "Click a menu item by path. For example ['File', 'Save As...'] clicks File → Save As.", {
|
|
225
|
-
sessionId: z.string(),
|
|
226
|
-
menuPath: z.array(z.string()).describe("Menu path, e.g. ['File', 'New Window']"),
|
|
227
|
-
}, async ({ sessionId, menuPath }) => {
|
|
228
|
-
const result = await runtime.menuClick({ sessionId, menuPath });
|
|
229
|
-
return result.ok ? ok(result) : err(result.error.message);
|
|
230
|
-
});
|
|
231
|
-
// ── key_combo ──
|
|
232
|
-
mcp.tool("key_combo", "Send a keyboard shortcut. Keys: 'cmd', 'ctrl', 'alt', 'shift', plus any character. E.g. ['cmd', 'c'] for copy.", {
|
|
233
|
-
sessionId: z.string(),
|
|
234
|
-
keys: z.array(z.string()).describe("Key combination, e.g. ['cmd', 's']"),
|
|
235
|
-
}, async ({ sessionId, keys }) => {
|
|
236
|
-
const result = await runtime.keyCombo({ sessionId, keys });
|
|
237
|
-
return result.ok ? ok(result) : err(result.error.message);
|
|
238
|
-
});
|
|
239
|
-
// ── element_tree ──
|
|
240
|
-
mcp.tool("element_tree", "Get the accessibility element tree of the current app. Useful for understanding the UI structure and finding elements to interact with.", {
|
|
241
|
-
sessionId: z.string(),
|
|
242
|
-
maxDepth: z.number().optional().describe("Max tree depth (default: 5)"),
|
|
243
|
-
}, async ({ sessionId, maxDepth }) => {
|
|
244
|
-
const input = { sessionId };
|
|
245
|
-
if (typeof maxDepth === "number")
|
|
246
|
-
input.maxDepth = maxDepth;
|
|
247
|
-
const result = await runtime.elementTree(input);
|
|
248
|
-
return result.ok ? ok(result) : err(result.error.message);
|
|
249
|
-
});
|
|
250
|
-
// ── drag ──
|
|
251
|
-
mcp.tool("drag", "Drag from one UI element to another.", {
|
|
252
|
-
sessionId: z.string(),
|
|
253
|
-
from: TargetSchema.describe("Element to drag from"),
|
|
254
|
-
to: TargetSchema.describe("Element to drag to"),
|
|
255
|
-
}, async ({ sessionId, from, to }) => {
|
|
256
|
-
const result = await runtime.drag({
|
|
257
|
-
sessionId,
|
|
258
|
-
from: parseTarget(from),
|
|
259
|
-
to: parseTarget(to),
|
|
260
|
-
});
|
|
261
|
-
return result.ok ? ok(result) : err(result.error.message);
|
|
262
|
-
});
|
|
263
|
-
// ── scroll ──
|
|
264
|
-
mcp.tool("scroll", "Scroll in a direction, optionally targeting a specific element.", {
|
|
265
|
-
sessionId: z.string(),
|
|
266
|
-
direction: z.enum(["up", "down", "left", "right"]),
|
|
267
|
-
amount: z.number().optional().describe("Scroll amount (default: 3)"),
|
|
268
|
-
target: TargetSchema.optional().describe("Element to scroll within"),
|
|
269
|
-
}, async ({ sessionId, direction, amount, target }) => {
|
|
270
|
-
const input = { sessionId, direction };
|
|
271
|
-
if (typeof amount === "number")
|
|
272
|
-
input.amount = amount;
|
|
273
|
-
if (target)
|
|
274
|
-
input.target = parseTarget(target);
|
|
275
|
-
const result = await runtime.scroll(input);
|
|
276
|
-
return result.ok ? ok(result) : err(result.error.message);
|
|
277
|
-
});
|
|
278
|
-
// ── task_run ──
|
|
279
|
-
mcp.tool("task_run", "Run a complete task autonomously. Starts an observe→decide→act loop that uses the accessibility tree (not screenshots) to see the UI and Claude to decide each action. The loop continues until the task is fully done or max steps reached. Returns a summary of all actions taken.", {
|
|
280
|
-
task: z.string().describe("Natural language description of the task to complete"),
|
|
281
|
-
sessionId: z.string().optional().describe("Existing session ID (auto-creates if not provided)"),
|
|
282
|
-
maxSteps: z.number().optional().describe("Max actions before stopping (default: 50)"),
|
|
283
|
-
model: z.string().optional().describe("Claude model for decisions (default: claude-sonnet-4-20250514)"),
|
|
284
|
-
}, async ({ task, sessionId, maxSteps, model }) => {
|
|
285
|
-
try {
|
|
286
|
-
const { runAgentLoop } = await import("../agent/loop.js");
|
|
287
|
-
// Auto-create session if not provided
|
|
288
|
-
let sid = sessionId;
|
|
289
|
-
if (!sid) {
|
|
290
|
-
const session = await runtime.sessionStart();
|
|
291
|
-
sid = session.sessionId;
|
|
292
|
-
}
|
|
293
|
-
const result = await runAgentLoop(runtime, sid, task, {
|
|
294
|
-
maxSteps: maxSteps ?? 50,
|
|
295
|
-
...(model ? { model } : {}),
|
|
296
|
-
onStep: (step) => {
|
|
297
|
-
process.stderr.write(`[step ${step.index}] ${step.reasoning.slice(0, 80)} → ${step.action?.tool ?? "none"} (${step.durationMs}ms)\n`);
|
|
298
|
-
},
|
|
299
|
-
});
|
|
300
|
-
return ok({
|
|
301
|
-
success: result.success,
|
|
302
|
-
summary: result.summary,
|
|
303
|
-
totalSteps: result.steps.length,
|
|
304
|
-
totalMs: result.totalMs,
|
|
305
|
-
steps: result.steps.map(s => ({
|
|
306
|
-
reasoning: s.reasoning,
|
|
307
|
-
action: s.action,
|
|
308
|
-
result: s.result,
|
|
309
|
-
durationMs: s.durationMs,
|
|
310
|
-
})),
|
|
311
|
-
});
|
|
312
|
-
}
|
|
313
|
-
catch (e) {
|
|
314
|
-
return err(`Agent loop failed: ${e instanceof Error ? e.message : String(e)}`);
|
|
315
|
-
}
|
|
316
|
-
});
|
|
317
|
-
// ── playbook_run ──
|
|
318
|
-
mcp.tool("playbook_run", "Execute a saved playbook by ID or auto-match by task description. Playbooks run deterministically without AI calls. If a step fails, AI automatically recovers and patches the playbook for next time.", {
|
|
319
|
-
sessionId: z.string(),
|
|
320
|
-
task: z.string().optional().describe("Natural language task — auto-matches best playbook"),
|
|
321
|
-
playbookId: z.string().optional().describe("Specific playbook ID to run"),
|
|
322
|
-
}, async ({ sessionId, task, playbookId }) => {
|
|
323
|
-
try {
|
|
324
|
-
const { PlaybookRunner } = await import("../playbook/runner.js");
|
|
325
|
-
const playbookDir = new URL("../../playbooks", import.meta.url).pathname;
|
|
326
|
-
const runner = new PlaybookRunner(runtime, playbookDir, {
|
|
327
|
-
onLog: (msg) => process.stderr.write(`${msg}\n`),
|
|
328
|
-
});
|
|
329
|
-
if (playbookId) {
|
|
330
|
-
const playbook = runner.listPlaybooks().find(p => p.id === playbookId);
|
|
331
|
-
if (!playbook)
|
|
332
|
-
return err(`Playbook "${playbookId}" not found`);
|
|
333
|
-
const { PlaybookEngine } = await import("../playbook/engine.js");
|
|
334
|
-
const engine = new PlaybookEngine(runtime);
|
|
335
|
-
const result = await engine.run(sessionId, playbook, {
|
|
336
|
-
onStep: (i, step, res) => {
|
|
337
|
-
process.stderr.write(`[playbook step ${i + 1}] ${step.description ?? step.action} → ${res}\n`);
|
|
338
|
-
},
|
|
339
|
-
});
|
|
340
|
-
return ok(result);
|
|
341
|
-
}
|
|
342
|
-
if (task) {
|
|
343
|
-
const result = await runner.execute(sessionId, task);
|
|
344
|
-
return ok(result);
|
|
345
|
-
}
|
|
346
|
-
return err("Provide either task or playbookId");
|
|
347
|
-
}
|
|
348
|
-
catch (e) {
|
|
349
|
-
return err(`Playbook failed: ${e instanceof Error ? e.message : String(e)}`);
|
|
350
|
-
}
|
|
351
|
-
});
|
|
352
|
-
// ── playbook_list ──
|
|
353
|
-
mcp.tool("playbook_list", "List all available playbooks with their IDs, names, platforms, and success rates.", {}, async () => {
|
|
354
|
-
try {
|
|
355
|
-
const { PlaybookStore } = await import("../playbook/store.js");
|
|
356
|
-
const playbookDir = new URL("../../playbooks", import.meta.url).pathname;
|
|
357
|
-
const store = new PlaybookStore(playbookDir);
|
|
358
|
-
store.load();
|
|
359
|
-
const playbooks = store.getAll().map(p => ({
|
|
360
|
-
id: p.id,
|
|
361
|
-
name: p.name,
|
|
362
|
-
platform: p.platform,
|
|
363
|
-
description: p.description,
|
|
364
|
-
stepsCount: p.steps.length,
|
|
365
|
-
successCount: p.successCount,
|
|
366
|
-
failCount: p.failCount,
|
|
367
|
-
tags: p.tags,
|
|
368
|
-
lastRun: p.lastRun,
|
|
369
|
-
}));
|
|
370
|
-
return ok({ playbooks, total: playbooks.length });
|
|
371
|
-
}
|
|
372
|
-
catch (e) {
|
|
373
|
-
return err(`Failed to list playbooks: ${e instanceof Error ? e.message : String(e)}`);
|
|
374
|
-
}
|
|
375
|
-
});
|
|
376
|
-
// ── recording_start ──
|
|
377
|
-
let activeRecorder = null;
|
|
378
|
-
mcp.tool("recording_start", "Start recording user actions to auto-generate a playbook. Do the task manually while recording, then call recording_stop to save.", {
|
|
379
|
-
sessionId: z.string(),
|
|
380
|
-
}, async ({ sessionId }) => {
|
|
381
|
-
try {
|
|
382
|
-
const { PlaybookRecorder } = await import("../playbook/recorder.js");
|
|
383
|
-
const playbookDir = new URL("../../playbooks", import.meta.url).pathname;
|
|
384
|
-
const recorder = new PlaybookRecorder(runtime, playbookDir, {
|
|
385
|
-
onLog: (msg) => process.stderr.write(`${msg}\n`),
|
|
386
|
-
});
|
|
387
|
-
await recorder.start(sessionId);
|
|
388
|
-
activeRecorder = recorder;
|
|
389
|
-
return ok({ status: "recording", message: "Recording started. Do the task manually, then call recording_stop." });
|
|
390
|
-
}
|
|
391
|
-
catch (e) {
|
|
392
|
-
return err(`Failed to start recording: ${e instanceof Error ? e.message : String(e)}`);
|
|
393
|
-
}
|
|
394
|
-
});
|
|
395
|
-
// ── recording_stop ──
|
|
396
|
-
mcp.tool("recording_stop", "Stop recording and save the captured actions as a new playbook.", {
|
|
397
|
-
name: z.string().describe("Name for the playbook (e.g. 'Change X profile picture')"),
|
|
398
|
-
description: z.string().optional().describe("What the playbook does"),
|
|
399
|
-
platform: z.string().describe("Platform name (e.g. 'x', 'instagram', 'gmail')"),
|
|
400
|
-
}, async ({ name, description, platform }) => {
|
|
401
|
-
try {
|
|
402
|
-
const recorder = activeRecorder;
|
|
403
|
-
if (!recorder || !recorder.isRecording) {
|
|
404
|
-
return err("No active recording. Call recording_start first.");
|
|
405
|
-
}
|
|
406
|
-
const playbook = await recorder.stop(name, description ?? name, platform);
|
|
407
|
-
activeRecorder = null;
|
|
408
|
-
return ok({
|
|
409
|
-
status: "saved",
|
|
410
|
-
playbookId: playbook.id,
|
|
411
|
-
name: playbook.name,
|
|
412
|
-
stepsCount: playbook.steps.length,
|
|
413
|
-
steps: playbook.steps.map((s, i) => `${i + 1}. ${s.description ?? s.action}`),
|
|
414
|
-
});
|
|
415
|
-
}
|
|
416
|
-
catch (e) {
|
|
417
|
-
return err(`Failed to stop recording: ${e instanceof Error ? e.message : String(e)}`);
|
|
418
|
-
}
|
|
419
|
-
});
|
|
420
|
-
// ── recording_cancel ──
|
|
421
|
-
mcp.tool("recording_cancel", "Cancel the current recording without saving.", {}, async () => {
|
|
422
|
-
const recorder = activeRecorder;
|
|
423
|
-
if (!recorder || !recorder.isRecording) {
|
|
424
|
-
return err("No active recording.");
|
|
425
|
-
}
|
|
426
|
-
recorder.cancel();
|
|
427
|
-
activeRecorder = null;
|
|
428
|
-
return ok({ status: "cancelled" });
|
|
429
|
-
});
|
|
430
|
-
// ── recording_status ──
|
|
431
|
-
mcp.tool("recording_status", "Check if recording is active and how many events captured so far.", {}, async () => {
|
|
432
|
-
const recorder = activeRecorder;
|
|
433
|
-
if (!recorder || !recorder.isRecording) {
|
|
434
|
-
return ok({ recording: false, eventCount: 0 });
|
|
435
|
-
}
|
|
436
|
-
return ok({
|
|
437
|
-
recording: true,
|
|
438
|
-
eventCount: recorder.eventCount,
|
|
439
|
-
events: recorder.getEvents().map((e) => `[${e.type}] ${JSON.stringify(e.details).slice(0, 80)}`),
|
|
440
|
-
});
|
|
441
|
-
});
|
|
442
|
-
return mcp;
|
|
443
|
-
}
|
|
444
|
-
export async function startMcpStdioServer(runtime) {
|
|
445
|
-
const mcp = createMcpStdioServer(runtime);
|
|
446
|
-
const transport = new StdioServerTransport();
|
|
447
|
-
await mcp.connect(transport);
|
|
448
|
-
}
|