screenhand 0.1.1 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (241) hide show
  1. package/README.md +193 -109
  2. package/bin/darwin-arm64/macos-bridge +0 -0
  3. package/dist/mcp-desktop.js +5876 -0
  4. package/dist/scripts/codex-monitor-daemon.js +335 -0
  5. package/dist/scripts/export-help-center.js +112 -0
  6. package/dist/scripts/marketing-loop.js +117 -0
  7. package/dist/scripts/observer-daemon.js +288 -0
  8. package/dist/scripts/orchestrator-daemon.js +399 -0
  9. package/dist/scripts/supervisor-daemon.js +272 -0
  10. package/dist/scripts/threads-campaign.js +208 -0
  11. package/dist/scripts/worker-daemon.js +228 -0
  12. package/dist/src/agent/cli.js +82 -0
  13. package/dist/src/agent/loop.js +274 -0
  14. package/dist/src/community/fetcher.js +109 -0
  15. package/dist/src/community/index.js +6 -0
  16. package/dist/src/community/publisher.js +191 -0
  17. package/dist/src/community/remote-api.js +121 -0
  18. package/dist/src/community/types.js +3 -0
  19. package/dist/src/community/validator.js +95 -0
  20. package/{src/config.ts → dist/src/config.js} +5 -10
  21. package/dist/src/context-tracker.js +489 -0
  22. package/{src/index.ts → dist/src/index.js} +32 -52
  23. package/dist/src/ingestion/coverage-auditor.js +233 -0
  24. package/dist/src/ingestion/doc-parser.js +164 -0
  25. package/dist/src/ingestion/index.js +8 -0
  26. package/dist/src/ingestion/menu-scanner.js +152 -0
  27. package/dist/src/ingestion/reference-merger.js +186 -0
  28. package/dist/src/ingestion/shortcut-extractor.js +180 -0
  29. package/dist/src/ingestion/tutorial-extractor.js +170 -0
  30. package/dist/src/ingestion/types.js +3 -0
  31. package/dist/src/jobs/manager.js +305 -0
  32. package/dist/src/jobs/runner.js +806 -0
  33. package/dist/src/jobs/store.js +102 -0
  34. package/dist/src/jobs/types.js +30 -0
  35. package/dist/src/jobs/worker.js +97 -0
  36. package/dist/src/learning/engine.js +356 -0
  37. package/dist/src/learning/index.js +9 -0
  38. package/dist/src/learning/locator-policy.js +120 -0
  39. package/dist/src/learning/pattern-policy.js +89 -0
  40. package/dist/src/learning/recovery-policy.js +116 -0
  41. package/dist/src/learning/sensor-policy.js +115 -0
  42. package/dist/src/learning/timing-model.js +204 -0
  43. package/dist/src/learning/topology-policy.js +90 -0
  44. package/dist/src/learning/types.js +9 -0
  45. package/dist/src/logging/timeline-logger.js +48 -0
  46. package/dist/src/mcp/mcp-stdio-server.js +464 -0
  47. package/dist/src/mcp/server.js +363 -0
  48. package/dist/src/mcp-entry.js +60 -0
  49. package/dist/src/memory/playbook-seeds.js +200 -0
  50. package/dist/src/memory/recall.js +222 -0
  51. package/dist/src/memory/research.js +104 -0
  52. package/dist/src/memory/seeds.js +101 -0
  53. package/dist/src/memory/service.js +446 -0
  54. package/dist/src/memory/session.js +169 -0
  55. package/dist/src/memory/store.js +451 -0
  56. package/{src/runtime/locator-cache.ts → dist/src/memory/types.js} +1 -17
  57. package/dist/src/monitor/codex-monitor.js +382 -0
  58. package/dist/src/monitor/task-queue.js +97 -0
  59. package/dist/src/monitor/types.js +62 -0
  60. package/dist/src/native/bridge-client.js +412 -0
  61. package/{src/native/macos-bridge-client.ts → dist/src/native/macos-bridge-client.js} +0 -1
  62. package/dist/src/observer/state.js +199 -0
  63. package/dist/src/observer/types.js +43 -0
  64. package/dist/src/orchestrator/state.js +68 -0
  65. package/dist/src/orchestrator/types.js +22 -0
  66. package/dist/src/perception/ax-source.js +162 -0
  67. package/dist/src/perception/cdp-source.js +162 -0
  68. package/dist/src/perception/coordinator.js +771 -0
  69. package/dist/src/perception/frame-differ.js +287 -0
  70. package/dist/src/perception/index.js +22 -0
  71. package/dist/src/perception/manager.js +199 -0
  72. package/dist/src/perception/types.js +47 -0
  73. package/dist/src/perception/vision-source.js +399 -0
  74. package/dist/src/planner/deterministic.js +298 -0
  75. package/dist/src/planner/executor.js +870 -0
  76. package/dist/src/planner/goal-store.js +92 -0
  77. package/dist/src/planner/index.js +21 -0
  78. package/dist/src/planner/planner.js +520 -0
  79. package/dist/src/planner/tool-registry.js +71 -0
  80. package/dist/src/planner/types.js +22 -0
  81. package/dist/src/platform/explorer.js +213 -0
  82. package/dist/src/platform/help-center-markdown.js +527 -0
  83. package/dist/src/platform/learner.js +257 -0
  84. package/dist/src/playbook/engine.js +486 -0
  85. package/dist/src/playbook/index.js +20 -0
  86. package/dist/src/playbook/mcp-recorder.js +204 -0
  87. package/dist/src/playbook/recorder.js +536 -0
  88. package/dist/src/playbook/runner.js +408 -0
  89. package/dist/src/playbook/store.js +312 -0
  90. package/dist/src/playbook/types.js +17 -0
  91. package/dist/src/recovery/detectors.js +156 -0
  92. package/dist/src/recovery/engine.js +327 -0
  93. package/dist/src/recovery/index.js +20 -0
  94. package/dist/src/recovery/strategies.js +274 -0
  95. package/dist/src/recovery/types.js +20 -0
  96. package/dist/src/runtime/accessibility-adapter.js +430 -0
  97. package/dist/src/runtime/app-adapter.js +64 -0
  98. package/dist/src/runtime/applescript-adapter.js +305 -0
  99. package/dist/src/runtime/ax-role-map.js +96 -0
  100. package/dist/src/runtime/browser-adapter.js +52 -0
  101. package/dist/src/runtime/cdp-chrome-adapter.js +521 -0
  102. package/dist/src/runtime/composite-adapter.js +221 -0
  103. package/dist/src/runtime/execution-contract.js +159 -0
  104. package/dist/src/runtime/executor.js +286 -0
  105. package/dist/src/runtime/locator-cache.js +50 -0
  106. package/dist/src/runtime/planning-loop.js +63 -0
  107. package/dist/src/runtime/service.js +432 -0
  108. package/dist/src/runtime/session-manager.js +63 -0
  109. package/dist/src/runtime/state-observer.js +121 -0
  110. package/dist/src/runtime/vision-adapter.js +225 -0
  111. package/dist/src/state/app-map-types.js +72 -0
  112. package/dist/src/state/app-map.js +1974 -0
  113. package/dist/src/state/entity-tracker.js +108 -0
  114. package/dist/src/state/fusion.js +96 -0
  115. package/dist/src/state/index.js +21 -0
  116. package/dist/src/state/ladder-generator.js +236 -0
  117. package/dist/src/state/persistence.js +156 -0
  118. package/dist/src/state/types.js +17 -0
  119. package/dist/src/state/world-model.js +1456 -0
  120. package/dist/src/supervisor/locks.js +186 -0
  121. package/dist/src/supervisor/supervisor.js +403 -0
  122. package/dist/src/supervisor/types.js +30 -0
  123. package/dist/src/test-mcp-protocol.js +154 -0
  124. package/dist/src/types.js +17 -0
  125. package/dist/src/util/atomic-write.js +133 -0
  126. package/dist/src/util/sanitize.js +146 -0
  127. package/dist-app-maps/com.figma.Desktop.json +959 -0
  128. package/dist-app-maps/com.hnc.Discord.json +1146 -0
  129. package/dist-app-maps/notion.id.json +2831 -0
  130. package/dist-playbooks/canva-screenhand-carousel.json +445 -0
  131. package/dist-playbooks/codex-desktop.json +76 -0
  132. package/dist-playbooks/competitor-research-stack.json +122 -0
  133. package/dist-playbooks/davinci-color-grade.json +153 -0
  134. package/dist-playbooks/davinci-edit-timeline.json +162 -0
  135. package/dist-playbooks/davinci-render.json +114 -0
  136. package/dist-playbooks/devto.json +52 -0
  137. package/dist-playbooks/discord.json +41 -0
  138. package/dist-playbooks/google-flow-create-project.json +59 -0
  139. package/dist-playbooks/google-flow-edit-image.json +90 -0
  140. package/dist-playbooks/google-flow-edit-video.json +90 -0
  141. package/dist-playbooks/google-flow-generate-image.json +68 -0
  142. package/dist-playbooks/google-flow-generate-video.json +191 -0
  143. package/dist-playbooks/google-flow-open-project.json +48 -0
  144. package/dist-playbooks/google-flow-open-scenebuilder.json +64 -0
  145. package/dist-playbooks/google-flow-search-assets.json +64 -0
  146. package/dist-playbooks/instagram.json +57 -0
  147. package/dist-playbooks/linkedin.json +52 -0
  148. package/dist-playbooks/n8n.json +43 -0
  149. package/dist-playbooks/reddit.json +52 -0
  150. package/dist-playbooks/threads.json +59 -0
  151. package/dist-playbooks/x-twitter.json +59 -0
  152. package/dist-playbooks/youtube.json +59 -0
  153. package/dist-references/canva.json +646 -0
  154. package/dist-references/codex-desktop.json +305 -0
  155. package/dist-references/davinci-resolve-keyboard.json +594 -0
  156. package/dist-references/davinci-resolve-menu-map.json +1139 -0
  157. package/dist-references/davinci-resolve-menus-batch1.json +116 -0
  158. package/dist-references/davinci-resolve-menus-batch2.json +372 -0
  159. package/dist-references/davinci-resolve-menus-batch3.json +330 -0
  160. package/dist-references/davinci-resolve-menus-batch4.json +297 -0
  161. package/dist-references/davinci-resolve-shortcuts.json +333 -0
  162. package/dist-references/devto.json +317 -0
  163. package/dist-references/discord.json +549 -0
  164. package/dist-references/figma.json +1186 -0
  165. package/dist-references/finder.json +146 -0
  166. package/dist-references/google-ads-transparency.json +95 -0
  167. package/dist-references/google-flow.json +649 -0
  168. package/dist-references/instagram.json +341 -0
  169. package/dist-references/linkedin.json +324 -0
  170. package/dist-references/meta-ad-library.json +86 -0
  171. package/dist-references/n8n.json +387 -0
  172. package/dist-references/notes.json +27 -0
  173. package/dist-references/notion.json +163 -0
  174. package/dist-references/reddit.json +341 -0
  175. package/dist-references/threads.json +337 -0
  176. package/dist-references/x-twitter.json +403 -0
  177. package/dist-references/youtube.json +373 -0
  178. package/native/macos-bridge/Package.swift +1 -0
  179. package/native/macos-bridge/Sources/AccessibilityBridge.swift +257 -36
  180. package/native/macos-bridge/Sources/AppManagement.swift +212 -2
  181. package/native/macos-bridge/Sources/CoreGraphicsBridge.swift +348 -53
  182. package/native/macos-bridge/Sources/StreamCapture.swift +136 -0
  183. package/native/macos-bridge/Sources/VisionBridge.swift +165 -7
  184. package/native/macos-bridge/Sources/main.swift +169 -16
  185. package/native/windows-bridge/Program.cs +5 -0
  186. package/native/windows-bridge/ScreenCapture.cs +124 -0
  187. package/package.json +29 -4
  188. package/scripts/postinstall.cjs +127 -0
  189. package/.claude/commands/automate.md +0 -28
  190. package/.claude/commands/debug-ui.md +0 -19
  191. package/.claude/commands/screenshot.md +0 -15
  192. package/.github/FUNDING.yml +0 -1
  193. package/.github/ISSUE_TEMPLATE/bug_report.md +0 -27
  194. package/.github/ISSUE_TEMPLATE/feature_request.md +0 -20
  195. package/.mcp.json +0 -8
  196. package/DESKTOP_MCP_GUIDE.md +0 -92
  197. package/SECURITY.md +0 -44
  198. package/docs/architecture.md +0 -47
  199. package/install-skills.sh +0 -19
  200. package/mcp-bridge.ts +0 -271
  201. package/mcp-desktop.ts +0 -1221
  202. package/playbooks/instagram.json +0 -41
  203. package/playbooks/instagram_v2.json +0 -201
  204. package/playbooks/x_v1.json +0 -211
  205. package/scripts/devpost-live-loop.mjs +0 -421
  206. package/src/logging/timeline-logger.ts +0 -55
  207. package/src/mcp/server.ts +0 -449
  208. package/src/memory/recall.ts +0 -191
  209. package/src/memory/research.ts +0 -146
  210. package/src/memory/seeds.ts +0 -123
  211. package/src/memory/session.ts +0 -201
  212. package/src/memory/store.ts +0 -434
  213. package/src/memory/types.ts +0 -69
  214. package/src/native/bridge-client.ts +0 -239
  215. package/src/runtime/accessibility-adapter.ts +0 -487
  216. package/src/runtime/app-adapter.ts +0 -169
  217. package/src/runtime/applescript-adapter.ts +0 -376
  218. package/src/runtime/ax-role-map.ts +0 -102
  219. package/src/runtime/browser-adapter.ts +0 -129
  220. package/src/runtime/cdp-chrome-adapter.ts +0 -676
  221. package/src/runtime/composite-adapter.ts +0 -274
  222. package/src/runtime/executor.ts +0 -396
  223. package/src/runtime/planning-loop.ts +0 -81
  224. package/src/runtime/service.ts +0 -448
  225. package/src/runtime/session-manager.ts +0 -50
  226. package/src/runtime/state-observer.ts +0 -136
  227. package/src/runtime/vision-adapter.ts +0 -297
  228. package/src/types.ts +0 -297
  229. package/tests/bridge-client.test.ts +0 -176
  230. package/tests/browser-stealth.test.ts +0 -210
  231. package/tests/composite-adapter.test.ts +0 -64
  232. package/tests/mcp-server.test.ts +0 -151
  233. package/tests/memory-recall.test.ts +0 -339
  234. package/tests/memory-research.test.ts +0 -159
  235. package/tests/memory-seeds.test.ts +0 -120
  236. package/tests/memory-store.test.ts +0 -392
  237. package/tests/types.test.ts +0 -92
  238. package/tsconfig.check.json +0 -17
  239. package/tsconfig.json +0 -19
  240. package/vitest.config.ts +0 -8
  241. /package/{playbooks → dist-references}/devpost.json +0 -0
@@ -0,0 +1,90 @@
1
+ // Copyright (C) 2025 Clazro Technology Private Limited
2
+ // SPDX-License-Identifier: AGPL-3.0-only
3
+ /**
4
+ * TopologyPolicy — learns which navigation edges are reliable per app.
5
+ *
6
+ * Persisted to `topology.jsonl`. Each entry tracks success/fail counts
7
+ * for a specific bundleId×fromNode×action×toNode quad, scored with
8
+ * Bayesian averaging.
9
+ *
10
+ * Used alongside the AppMap to determine which navigation paths
11
+ * through an application are verified and reliable.
12
+ */
13
+ export class TopologyPolicy {
14
+ entries = new Map();
15
+ priorStrength;
16
+ constructor(priorStrength = 2) {
17
+ this.priorStrength = priorStrength;
18
+ }
19
+ /**
20
+ * Record a navigation edge outcome (success/failure for an app).
21
+ */
22
+ record(outcome) {
23
+ const key = `${outcome.bundleId}::${outcome.fromNode}::${outcome.action}::${outcome.toNode}`;
24
+ let entry = this.entries.get(key);
25
+ if (!entry) {
26
+ entry = {
27
+ key,
28
+ bundleId: outcome.bundleId,
29
+ fromNode: outcome.fromNode,
30
+ action: outcome.action,
31
+ toNode: outcome.toNode,
32
+ successCount: 0,
33
+ failCount: 0,
34
+ score: 0.5,
35
+ lastUsed: new Date().toISOString(),
36
+ };
37
+ this.entries.set(key, entry);
38
+ }
39
+ if (outcome.success) {
40
+ entry.successCount++;
41
+ }
42
+ else {
43
+ entry.failCount++;
44
+ }
45
+ entry.score = this.bayesianScore(entry.successCount, entry.failCount);
46
+ entry.lastUsed = new Date().toISOString();
47
+ }
48
+ /**
49
+ * Query all navigation edges for a given app, sorted by score descending.
50
+ * Optionally filter by source node.
51
+ */
52
+ query(bundleId, fromNode) {
53
+ const results = [];
54
+ for (const entry of this.entries.values()) {
55
+ if (entry.bundleId !== bundleId)
56
+ continue;
57
+ if (fromNode && entry.fromNode !== fromNode)
58
+ continue;
59
+ results.push(entry);
60
+ }
61
+ return results.sort((a, b) => b.score - a.score);
62
+ }
63
+ /**
64
+ * Get the best navigation edge from a given node, or null if insufficient data.
65
+ */
66
+ recommend(bundleId, fromNode, minSamples = 3) {
67
+ const candidates = this.query(bundleId, fromNode);
68
+ for (const entry of candidates) {
69
+ if (entry.successCount + entry.failCount >= minSamples && entry.score > 0.5) {
70
+ return entry;
71
+ }
72
+ }
73
+ return null;
74
+ }
75
+ clear() {
76
+ this.entries.clear();
77
+ }
78
+ getAllEntries() {
79
+ return [...this.entries.values()];
80
+ }
81
+ loadEntries(entries) {
82
+ for (const entry of entries) {
83
+ this.entries.set(entry.key, { ...entry });
84
+ }
85
+ }
86
+ bayesianScore(successes, failures) {
87
+ return ((successes + this.priorStrength) /
88
+ (successes + failures + 2 * this.priorStrength));
89
+ }
90
+ }
@@ -0,0 +1,9 @@
1
+ // Copyright (C) 2025 Clazro Technology Private Limited
2
+ // SPDX-License-Identifier: AGPL-3.0-only
3
+ export const DEFAULT_LEARNING_CONFIG = {
4
+ dataDir: "",
5
+ minSamplesForConfidence: 5,
6
+ priorStrength: 2,
7
+ maxEntriesPerFile: 5000,
8
+ maxTimingSamples: 100,
9
+ };
@@ -0,0 +1,48 @@
1
+ // Copyright (C) 2025 Clazro Technology Private Limited
2
+ // SPDX-License-Identifier: AGPL-3.0-only
3
+ //
4
+ // This file is part of ScreenHand.
5
+ //
6
+ // ScreenHand is free software: you can redistribute it and/or modify
7
+ // it under the terms of the GNU Affero General Public License as
8
+ // published by the Free Software Foundation, version 3.
9
+ //
10
+ // ScreenHand is distributed in the hope that it will be useful,
11
+ // but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ // GNU Affero General Public License for more details.
14
+ //
15
+ // You should have received a copy of the GNU Affero General Public License
16
+ // along with ScreenHand. If not, see <https://www.gnu.org/licenses/>.
17
+ export class TimelineLogger {
18
+ timeline = [];
19
+ start(action, sessionId) {
20
+ return {
21
+ action,
22
+ sessionId,
23
+ startedAt: new Date().toISOString(),
24
+ locateMs: 0,
25
+ actMs: 0,
26
+ verifyMs: 0,
27
+ retries: 0,
28
+ };
29
+ }
30
+ finish(telemetry, status) {
31
+ const finishedAt = new Date().toISOString();
32
+ const startTime = new Date(telemetry.startedAt).getTime();
33
+ const totalMs = Number.isFinite(startTime)
34
+ ? new Date(finishedAt).getTime() - startTime
35
+ : 0;
36
+ const finalized = {
37
+ ...telemetry,
38
+ finishedAt,
39
+ totalMs,
40
+ status,
41
+ };
42
+ this.timeline.push(finalized);
43
+ return finalized;
44
+ }
45
+ getRecent(limit = 50) {
46
+ return this.timeline.slice(-limit);
47
+ }
48
+ }
@@ -0,0 +1,464 @@
1
+ // Copyright (C) 2025 Clazro Technology Private Limited
2
+ // SPDX-License-Identifier: AGPL-3.0-only
3
+ //
4
+ // This file is part of ScreenHand.
5
+ //
6
+ // ScreenHand is free software: you can redistribute it and/or modify
7
+ // it under the terms of the GNU Affero General Public License as
8
+ // published by the Free Software Foundation, version 3.
9
+ //
10
+ // ScreenHand is distributed in the hope that it will be useful,
11
+ // but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ // GNU Affero General Public License for more details.
14
+ //
15
+ // You should have received a copy of the GNU Affero General Public License
16
+ // along with ScreenHand. If not, see <https://www.gnu.org/licenses/>.
17
+ import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
18
+ import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
19
+ import { z } from "zod";
20
+ // ── Schema building blocks ──
21
+ const TargetSchema = z.union([
22
+ z.string().describe("Shorthand: text to find, or 'css=...' / 'text=...' / 'ax_id=...' prefix"),
23
+ z.object({
24
+ text: z.string(),
25
+ exact: z.boolean().optional(),
26
+ }).describe("Find by visible text"),
27
+ z.object({
28
+ role: z.string(),
29
+ name: z.string(),
30
+ exact: z.boolean().optional(),
31
+ }).describe("Find by ARIA/AX role and accessible name"),
32
+ z.object({
33
+ selector: z.string(),
34
+ }).describe("Find by CSS selector (browser) or AX identifier (desktop)"),
35
+ z.object({
36
+ x: z.number(),
37
+ y: z.number(),
38
+ }).describe("Click at screen coordinates"),
39
+ z.object({
40
+ attribute: z.string(),
41
+ value: z.string(),
42
+ }).describe("Find by accessibility attribute"),
43
+ ]);
44
+ const WaitConditionSchema = z.object({
45
+ type: z.enum([
46
+ "selector_visible",
47
+ "selector_hidden",
48
+ "url_matches",
49
+ "text_appears",
50
+ "spinner_disappears",
51
+ "element_exists",
52
+ "element_gone",
53
+ "window_title_matches",
54
+ "app_idle",
55
+ ]),
56
+ selector: z.string().optional(),
57
+ regex: z.string().optional(),
58
+ text: z.string().optional(),
59
+ target: TargetSchema.optional(),
60
+ bundleId: z.string().optional(),
61
+ timeoutMs: z.number().optional(),
62
+ }).describe("Condition to wait for");
63
+ const RegionSchema = z.object({
64
+ x: z.number(),
65
+ y: z.number(),
66
+ width: z.number(),
67
+ height: z.number(),
68
+ });
69
+ // ── Target parser ──
70
+ function parseTarget(input) {
71
+ if (typeof input === "string") {
72
+ if (input.startsWith("css="))
73
+ return { type: "selector", value: input.slice(4) };
74
+ if (input.startsWith("text="))
75
+ return { type: "text", value: input.slice(5), exact: true };
76
+ if (input.startsWith("ax_id="))
77
+ return { type: "ax_attribute", attribute: "identifier", value: input.slice(6) };
78
+ return { type: "text", value: input };
79
+ }
80
+ const obj = input;
81
+ if (typeof obj.selector === "string")
82
+ return { type: "selector", value: obj.selector };
83
+ if (typeof obj.text === "string")
84
+ return { type: "text", value: obj.text, exact: obj.exact === true };
85
+ if (typeof obj.role === "string" && typeof obj.name === "string")
86
+ return { type: "role", role: obj.role, name: obj.name, exact: obj.exact === true };
87
+ if (typeof obj.x === "number" && typeof obj.y === "number")
88
+ return { type: "coordinates", x: obj.x, y: obj.y };
89
+ if (typeof obj.attribute === "string" && typeof obj.value === "string")
90
+ return { type: "ax_attribute", attribute: obj.attribute, value: obj.value };
91
+ throw new Error("Invalid target");
92
+ }
93
+ function parseWaitCondition(input) {
94
+ const obj = input;
95
+ const type = obj.type;
96
+ switch (type) {
97
+ case "selector_visible": return { type: "selector_visible", selector: obj.selector };
98
+ case "selector_hidden": return { type: "selector_hidden", selector: obj.selector };
99
+ case "url_matches": return { type: "url_matches", regex: obj.regex };
100
+ case "text_appears": return { type: "text_appears", text: obj.text };
101
+ case "spinner_disappears": return { type: "spinner_disappears", selector: obj.selector };
102
+ case "element_exists": return { type: "element_exists", target: parseTarget(obj.target) };
103
+ case "element_gone": return { type: "element_gone", target: parseTarget(obj.target) };
104
+ case "window_title_matches": return { type: "window_title_matches", regex: obj.regex };
105
+ case "app_idle": {
106
+ const cond = { type: "app_idle", bundleId: obj.bundleId };
107
+ if (typeof obj.timeoutMs === "number")
108
+ cond.timeoutMs = obj.timeoutMs;
109
+ return cond;
110
+ }
111
+ default: throw new Error(`Unknown wait condition type: ${type}`);
112
+ }
113
+ }
114
+ // ── Helpers ──
115
+ function ok(data) {
116
+ return { content: [{ type: "text", text: JSON.stringify(data, null, 2) }] };
117
+ }
118
+ function err(message) {
119
+ return { content: [{ type: "text", text: message }], isError: true };
120
+ }
121
+ // ── Server builder ──
122
+ export function createMcpStdioServer(runtime) {
123
+ const mcp = new McpServer({ name: "screenhand", version: "0.1.0" }, {
124
+ capabilities: { tools: {} },
125
+ instructions: "ScreenHand gives AI agents eyes and hands on the desktop. Use session_start to begin, then call tools to control apps.",
126
+ });
127
+ // ── session_start ──
128
+ mcp.tool("session_start", "Start a new automation session. Returns a sessionId needed by all other tools. Automatically attaches to the frontmost app.", { profile: z.string().optional().describe("Session profile name (default: 'automation')") }, async ({ profile }) => {
129
+ try {
130
+ const session = await runtime.sessionStart(profile);
131
+ return ok(session);
132
+ }
133
+ catch (e) {
134
+ return err(`Failed to start session: ${e instanceof Error ? e.message : String(e)}`);
135
+ }
136
+ });
137
+ // ── press ──
138
+ mcp.tool("press", "Click/press a UI element. Finds the element by text, role, selector, or coordinates, then clicks it.", {
139
+ sessionId: z.string().describe("Session ID from session_start"),
140
+ target: TargetSchema.describe("What to click — text string, {role, name}, {selector}, or {x, y}"),
141
+ verify: WaitConditionSchema.optional().describe("Optional condition to verify after clicking"),
142
+ }, async ({ sessionId, target, verify }) => {
143
+ const input = { sessionId, target: parseTarget(target) };
144
+ if (verify)
145
+ input.verify = parseWaitCondition(verify);
146
+ const result = await runtime.press(input);
147
+ return result.ok ? ok(result) : err(result.error.message);
148
+ });
149
+ // ── type_into ──
150
+ mcp.tool("type_into", "Type text into a UI element (text field, search box, etc). Locates the field, optionally clears it, then types.", {
151
+ sessionId: z.string(),
152
+ target: TargetSchema.describe("The input field to type into"),
153
+ text: z.string().describe("Text to type"),
154
+ clear: z.boolean().optional().describe("Clear the field first (default: true)"),
155
+ verify: WaitConditionSchema.optional(),
156
+ }, async ({ sessionId, target, text, clear, verify }) => {
157
+ const input = { sessionId, target: parseTarget(target), text };
158
+ if (typeof clear === "boolean")
159
+ input.clear = clear;
160
+ if (verify)
161
+ input.verify = parseWaitCondition(verify);
162
+ const result = await runtime.typeInto(input);
163
+ return result.ok ? ok(result) : err(result.error.message);
164
+ });
165
+ // ── navigate ──
166
+ mcp.tool("navigate", "Navigate a browser to a URL, or open an app via 'app://com.bundle.id'.", {
167
+ sessionId: z.string(),
168
+ url: z.string().describe("URL to navigate to, or 'app://bundleId' to launch an app"),
169
+ timeoutMs: z.number().optional().describe("Navigation timeout in ms (default: 10000)"),
170
+ }, async ({ sessionId, url, timeoutMs }) => {
171
+ const input = { sessionId, url };
172
+ if (typeof timeoutMs === "number")
173
+ input.timeoutMs = timeoutMs;
174
+ const result = await runtime.navigate(input);
175
+ return result.ok ? ok(result) : err(result.error.message);
176
+ });
177
+ // ── wait_for ──
178
+ mcp.tool("wait_for", "Wait for a condition: element appears/disappears, text appears, URL changes, window title matches, etc.", {
179
+ sessionId: z.string(),
180
+ condition: WaitConditionSchema,
181
+ timeoutMs: z.number().optional().describe("Timeout in ms (default: 2000)"),
182
+ }, async ({ sessionId, condition, timeoutMs }) => {
183
+ const input = { sessionId, condition: parseWaitCondition(condition) };
184
+ if (typeof timeoutMs === "number")
185
+ input.timeoutMs = timeoutMs;
186
+ const result = await runtime.waitFor(input);
187
+ return result.ok ? ok(result) : err(result.error.message);
188
+ });
189
+ // ── extract ──
190
+ mcp.tool("extract", "Extract data from a UI element. Returns text content, table data, or structured JSON from the element.", {
191
+ sessionId: z.string(),
192
+ target: TargetSchema,
193
+ format: z.enum(["text", "table", "json"]).describe("Output format"),
194
+ }, async ({ sessionId, target, format }) => {
195
+ const result = await runtime.extract({
196
+ sessionId,
197
+ target: parseTarget(target),
198
+ format: format,
199
+ });
200
+ return result.ok ? ok(result) : err(result.error.message);
201
+ });
202
+ // ── screenshot ──
203
+ mcp.tool("screenshot", "Capture a screenshot of the current app window or a specific screen region. Returns the file path.", {
204
+ sessionId: z.string(),
205
+ region: RegionSchema.optional().describe("Optional screen region to capture"),
206
+ }, async ({ sessionId, region }) => {
207
+ const input = { sessionId };
208
+ if (region)
209
+ input.region = region;
210
+ const result = await runtime.screenshot(input);
211
+ return result.ok ? ok(result) : err(result.error.message);
212
+ });
213
+ // ── app_launch ──
214
+ mcp.tool("app_launch", "Launch a macOS/Windows application by bundle ID (e.g., 'com.apple.Safari', 'com.google.Chrome').", {
215
+ sessionId: z.string(),
216
+ bundleId: z.string().describe("macOS bundle ID or Windows process name"),
217
+ }, async ({ sessionId, bundleId }) => {
218
+ const result = await runtime.appLaunch({ sessionId, bundleId });
219
+ return result.ok ? ok(result) : err(result.error.message);
220
+ });
221
+ // ── app_focus ──
222
+ mcp.tool("app_focus", "Bring a running application to the foreground.", {
223
+ sessionId: z.string(),
224
+ bundleId: z.string(),
225
+ }, async ({ sessionId, bundleId }) => {
226
+ const result = await runtime.appFocus({ sessionId, bundleId });
227
+ return result.ok ? ok(result) : err(result.error.message);
228
+ });
229
+ // ── app_list ──
230
+ mcp.tool("app_list", "List all running applications with their bundle IDs, names, and PIDs.", { sessionId: z.string() }, async ({ sessionId }) => {
231
+ const result = await runtime.appList(sessionId);
232
+ return result.ok ? ok(result) : err(result.error.message);
233
+ });
234
+ // ── window_list ──
235
+ mcp.tool("window_list", "List all visible windows with their titles, positions, and sizes.", { sessionId: z.string() }, async ({ sessionId }) => {
236
+ const result = await runtime.windowList(sessionId);
237
+ return result.ok ? ok(result) : err(result.error.message);
238
+ });
239
+ // ── menu_click ──
240
+ mcp.tool("menu_click", "Click a menu item by path. For example ['File', 'Save As...'] clicks File → Save As.", {
241
+ sessionId: z.string(),
242
+ menuPath: z.array(z.string()).describe("Menu path, e.g. ['File', 'New Window']"),
243
+ }, async ({ sessionId, menuPath }) => {
244
+ const result = await runtime.menuClick({ sessionId, menuPath });
245
+ return result.ok ? ok(result) : err(result.error.message);
246
+ });
247
+ // ── key_combo ──
248
+ mcp.tool("key_combo", "Send a keyboard shortcut. Keys: 'cmd', 'ctrl', 'alt', 'shift', plus any character. E.g. ['cmd', 'c'] for copy.", {
249
+ sessionId: z.string(),
250
+ keys: z.array(z.string()).describe("Key combination, e.g. ['cmd', 's']"),
251
+ }, async ({ sessionId, keys }) => {
252
+ const result = await runtime.keyCombo({ sessionId, keys });
253
+ return result.ok ? ok(result) : err(result.error.message);
254
+ });
255
+ // ── element_tree ──
256
+ mcp.tool("element_tree", "Get the accessibility element tree of the current app. Useful for understanding the UI structure and finding elements to interact with.", {
257
+ sessionId: z.string(),
258
+ maxDepth: z.number().optional().describe("Max tree depth (default: 5)"),
259
+ }, async ({ sessionId, maxDepth }) => {
260
+ const input = { sessionId };
261
+ if (typeof maxDepth === "number")
262
+ input.maxDepth = maxDepth;
263
+ const result = await runtime.elementTree(input);
264
+ return result.ok ? ok(result) : err(result.error.message);
265
+ });
266
+ // ── drag ──
267
+ mcp.tool("drag", "Drag from one UI element to another.", {
268
+ sessionId: z.string(),
269
+ from: TargetSchema.describe("Element to drag from"),
270
+ to: TargetSchema.describe("Element to drag to"),
271
+ }, async ({ sessionId, from, to }) => {
272
+ const result = await runtime.drag({
273
+ sessionId,
274
+ from: parseTarget(from),
275
+ to: parseTarget(to),
276
+ });
277
+ return result.ok ? ok(result) : err(result.error.message);
278
+ });
279
+ // ── scroll ──
280
+ mcp.tool("scroll", "Scroll in a direction, optionally targeting a specific element.", {
281
+ sessionId: z.string(),
282
+ direction: z.enum(["up", "down", "left", "right"]),
283
+ amount: z.number().optional().describe("Scroll amount (default: 3)"),
284
+ target: TargetSchema.optional().describe("Element to scroll within"),
285
+ }, async ({ sessionId, direction, amount, target }) => {
286
+ const input = { sessionId, direction };
287
+ if (typeof amount === "number")
288
+ input.amount = amount;
289
+ if (target)
290
+ input.target = parseTarget(target);
291
+ const result = await runtime.scroll(input);
292
+ return result.ok ? ok(result) : err(result.error.message);
293
+ });
294
+ // ── task_run ──
295
+ mcp.tool("task_run", "Run a complete task autonomously. Starts an observe→decide→act loop that uses the accessibility tree (not screenshots) to see the UI and Claude to decide each action. The loop continues until the task is fully done or max steps reached. Returns a summary of all actions taken.", {
296
+ task: z.string().describe("Natural language description of the task to complete"),
297
+ sessionId: z.string().optional().describe("Existing session ID (auto-creates if not provided)"),
298
+ maxSteps: z.number().optional().describe("Max actions before stopping (default: 50)"),
299
+ model: z.string().optional().describe("Claude model for decisions (default: claude-sonnet-4-20250514)"),
300
+ }, async ({ task, sessionId, maxSteps, model }) => {
301
+ try {
302
+ const { runAgentLoop } = await import("../agent/loop.js");
303
+ // Auto-create session if not provided
304
+ let sid = sessionId;
305
+ if (!sid) {
306
+ const session = await runtime.sessionStart();
307
+ sid = session.sessionId;
308
+ }
309
+ const result = await runAgentLoop(runtime, sid, task, {
310
+ maxSteps: maxSteps ?? 50,
311
+ ...(model ? { model } : {}),
312
+ onStep: (step) => {
313
+ process.stderr.write(`[step ${step.index}] ${step.reasoning.slice(0, 80)} → ${step.action?.tool ?? "none"} (${step.durationMs}ms)\n`);
314
+ },
315
+ });
316
+ return ok({
317
+ success: result.success,
318
+ summary: result.summary,
319
+ totalSteps: result.steps.length,
320
+ totalMs: result.totalMs,
321
+ steps: result.steps.map(s => ({
322
+ reasoning: s.reasoning,
323
+ action: s.action,
324
+ result: s.result,
325
+ durationMs: s.durationMs,
326
+ })),
327
+ });
328
+ }
329
+ catch (e) {
330
+ return err(`Agent loop failed: ${e instanceof Error ? e.message : String(e)}`);
331
+ }
332
+ });
333
+ // ── playbook_run ──
334
+ mcp.tool("playbook_run", "Execute a saved playbook by ID or auto-match by task description. Playbooks run deterministically without AI calls. If a step fails, AI automatically recovers and patches the playbook for next time.", {
335
+ sessionId: z.string(),
336
+ task: z.string().optional().describe("Natural language task — auto-matches best playbook"),
337
+ playbookId: z.string().optional().describe("Specific playbook ID to run"),
338
+ }, async ({ sessionId, task, playbookId }) => {
339
+ try {
340
+ const { PlaybookRunner } = await import("../playbook/runner.js");
341
+ const playbookDir = new URL("../../playbooks", import.meta.url).pathname;
342
+ const runner = new PlaybookRunner(runtime, playbookDir, {
343
+ onLog: (msg) => process.stderr.write(`${msg}\n`),
344
+ });
345
+ if (playbookId) {
346
+ const playbook = runner.listPlaybooks().find(p => p.id === playbookId);
347
+ if (!playbook)
348
+ return err(`Playbook "${playbookId}" not found`);
349
+ const { PlaybookEngine } = await import("../playbook/engine.js");
350
+ const engine = new PlaybookEngine(runtime);
351
+ const result = await engine.run(sessionId, playbook, {
352
+ onStep: (i, step, res) => {
353
+ process.stderr.write(`[playbook step ${i + 1}] ${step.description ?? step.action} → ${res}\n`);
354
+ },
355
+ });
356
+ return ok(result);
357
+ }
358
+ if (task) {
359
+ const result = await runner.execute(sessionId, task);
360
+ return ok(result);
361
+ }
362
+ return err("Provide either task or playbookId");
363
+ }
364
+ catch (e) {
365
+ return err(`Playbook failed: ${e instanceof Error ? e.message : String(e)}`);
366
+ }
367
+ });
368
+ // ── playbook_list ──
369
+ mcp.tool("playbook_list", "List all available playbooks with their IDs, names, platforms, and success rates.", {}, async () => {
370
+ try {
371
+ const { PlaybookStore } = await import("../playbook/store.js");
372
+ const playbookDir = new URL("../../playbooks", import.meta.url).pathname;
373
+ const store = new PlaybookStore(playbookDir);
374
+ store.load();
375
+ const playbooks = store.getAll().map(p => ({
376
+ id: p.id,
377
+ name: p.name,
378
+ platform: p.platform,
379
+ description: p.description,
380
+ stepsCount: p.steps.length,
381
+ successCount: p.successCount,
382
+ failCount: p.failCount,
383
+ tags: p.tags,
384
+ lastRun: p.lastRun,
385
+ }));
386
+ return ok({ playbooks, total: playbooks.length });
387
+ }
388
+ catch (e) {
389
+ return err(`Failed to list playbooks: ${e instanceof Error ? e.message : String(e)}`);
390
+ }
391
+ });
392
+ // ── recording_start ──
393
+ let activeRecorder = null;
394
+ mcp.tool("recording_start", "Start recording user actions to auto-generate a playbook. Do the task manually while recording, then call recording_stop to save.", {
395
+ sessionId: z.string(),
396
+ }, async ({ sessionId }) => {
397
+ try {
398
+ const { PlaybookRecorder } = await import("../playbook/recorder.js");
399
+ const playbookDir = new URL("../../playbooks", import.meta.url).pathname;
400
+ const recorder = new PlaybookRecorder(runtime, playbookDir, {
401
+ onLog: (msg) => process.stderr.write(`${msg}\n`),
402
+ });
403
+ await recorder.start(sessionId);
404
+ activeRecorder = recorder;
405
+ return ok({ status: "recording", message: "Recording started. Do the task manually, then call recording_stop." });
406
+ }
407
+ catch (e) {
408
+ return err(`Failed to start recording: ${e instanceof Error ? e.message : String(e)}`);
409
+ }
410
+ });
411
+ // ── recording_stop ──
412
+ mcp.tool("recording_stop", "Stop recording and save the captured actions as a new playbook.", {
413
+ name: z.string().describe("Name for the playbook (e.g. 'Change X profile picture')"),
414
+ description: z.string().optional().describe("What the playbook does"),
415
+ platform: z.string().describe("Platform name (e.g. 'x', 'instagram', 'gmail')"),
416
+ }, async ({ name, description, platform }) => {
417
+ try {
418
+ const recorder = activeRecorder;
419
+ if (!recorder || !recorder.isRecording) {
420
+ return err("No active recording. Call recording_start first.");
421
+ }
422
+ const playbook = await recorder.stop(name, description ?? name, platform);
423
+ activeRecorder = null;
424
+ return ok({
425
+ status: "saved",
426
+ playbookId: playbook.id,
427
+ name: playbook.name,
428
+ stepsCount: playbook.steps.length,
429
+ steps: playbook.steps.map((s, i) => `${i + 1}. ${s.description ?? s.action}`),
430
+ });
431
+ }
432
+ catch (e) {
433
+ return err(`Failed to stop recording: ${e instanceof Error ? e.message : String(e)}`);
434
+ }
435
+ });
436
+ // ── recording_cancel ──
437
+ mcp.tool("recording_cancel", "Cancel the current recording without saving.", {}, async () => {
438
+ const recorder = activeRecorder;
439
+ if (!recorder || !recorder.isRecording) {
440
+ return err("No active recording.");
441
+ }
442
+ recorder.cancel();
443
+ activeRecorder = null;
444
+ return ok({ status: "cancelled" });
445
+ });
446
+ // ── recording_status ──
447
+ mcp.tool("recording_status", "Check if recording is active and how many events captured so far.", {}, async () => {
448
+ const recorder = activeRecorder;
449
+ if (!recorder || !recorder.isRecording) {
450
+ return ok({ recording: false, eventCount: 0 });
451
+ }
452
+ return ok({
453
+ recording: true,
454
+ eventCount: recorder.eventCount,
455
+ events: recorder.getEvents().map((e) => `[${e.type}] ${JSON.stringify(e.details).slice(0, 80)}`),
456
+ });
457
+ });
458
+ return mcp;
459
+ }
460
+ export async function startMcpStdioServer(runtime) {
461
+ const mcp = createMcpStdioServer(runtime);
462
+ const transport = new StdioServerTransport();
463
+ await mcp.connect(transport);
464
+ }