screenhand 0.1.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (177) hide show
  1. package/README.md +458 -93
  2. package/dist/.audit-log.jsonl +55 -0
  3. package/dist/.screenhand/memory/.lock +1 -0
  4. package/dist/.screenhand/memory/actions.jsonl +85 -0
  5. package/dist/.screenhand/memory/errors.jsonl +5 -0
  6. package/dist/.screenhand/memory/errors.jsonl.bak +4 -0
  7. package/dist/.screenhand/memory/state.json +35 -0
  8. package/dist/.screenhand/memory/state.json.bak +35 -0
  9. package/dist/.screenhand/memory/strategies.jsonl +12 -0
  10. package/dist/agent/cli.js +73 -0
  11. package/dist/agent/loop.js +258 -0
  12. package/dist/config.js +9 -0
  13. package/dist/index.js +56 -0
  14. package/dist/logging/timeline-logger.js +29 -0
  15. package/dist/mcp/mcp-stdio-server.js +448 -0
  16. package/dist/mcp/server.js +347 -0
  17. package/dist/mcp-desktop.js +2731 -0
  18. package/dist/mcp-entry.js +59 -0
  19. package/dist/memory/recall.js +160 -0
  20. package/dist/memory/research.js +98 -0
  21. package/dist/memory/seeds.js +89 -0
  22. package/dist/memory/session.js +161 -0
  23. package/dist/memory/store.js +391 -0
  24. package/dist/memory/types.js +4 -0
  25. package/dist/monitor/codex-monitor.js +377 -0
  26. package/dist/monitor/task-queue.js +84 -0
  27. package/dist/monitor/types.js +49 -0
  28. package/dist/native/bridge-client.js +174 -0
  29. package/dist/native/macos-bridge-client.js +5 -0
  30. package/dist/npm-publish-helper.js +117 -0
  31. package/dist/npm-token-cdp.js +113 -0
  32. package/dist/npm-token-create.js +135 -0
  33. package/dist/npm-token-finish.js +126 -0
  34. package/dist/playbook/engine.js +193 -0
  35. package/dist/playbook/index.js +4 -0
  36. package/dist/playbook/recorder.js +519 -0
  37. package/dist/playbook/runner.js +392 -0
  38. package/dist/playbook/store.js +166 -0
  39. package/dist/playbook/types.js +4 -0
  40. package/dist/runtime/accessibility-adapter.js +377 -0
  41. package/dist/runtime/app-adapter.js +48 -0
  42. package/dist/runtime/applescript-adapter.js +283 -0
  43. package/dist/runtime/ax-role-map.js +80 -0
  44. package/dist/runtime/browser-adapter.js +36 -0
  45. package/dist/runtime/cdp-chrome-adapter.js +505 -0
  46. package/dist/runtime/composite-adapter.js +205 -0
  47. package/dist/runtime/executor.js +250 -0
  48. package/dist/runtime/locator-cache.js +12 -0
  49. package/dist/runtime/planning-loop.js +47 -0
  50. package/dist/runtime/service.js +372 -0
  51. package/dist/runtime/session-manager.js +28 -0
  52. package/dist/runtime/state-observer.js +105 -0
  53. package/dist/runtime/vision-adapter.js +208 -0
  54. package/dist/scripts/codex-monitor-daemon.js +335 -0
  55. package/dist/scripts/supervisor-daemon.js +272 -0
  56. package/dist/scripts/worker-daemon.js +228 -0
  57. package/dist/src/agent/cli.js +82 -0
  58. package/dist/src/agent/loop.js +274 -0
  59. package/{src/config.ts → dist/src/config.js} +5 -10
  60. package/{src/index.ts → dist/src/index.js} +32 -52
  61. package/dist/src/jobs/manager.js +237 -0
  62. package/dist/src/jobs/runner.js +683 -0
  63. package/dist/src/jobs/store.js +102 -0
  64. package/dist/src/jobs/types.js +30 -0
  65. package/dist/src/jobs/worker.js +97 -0
  66. package/dist/src/logging/timeline-logger.js +45 -0
  67. package/dist/src/mcp/mcp-stdio-server.js +464 -0
  68. package/dist/src/mcp/server.js +363 -0
  69. package/dist/src/mcp-entry.js +60 -0
  70. package/dist/src/memory/recall.js +170 -0
  71. package/dist/src/memory/research.js +104 -0
  72. package/dist/src/memory/seeds.js +101 -0
  73. package/dist/src/memory/service.js +421 -0
  74. package/dist/src/memory/session.js +169 -0
  75. package/dist/src/memory/store.js +422 -0
  76. package/dist/src/memory/types.js +17 -0
  77. package/dist/src/monitor/codex-monitor.js +382 -0
  78. package/dist/src/monitor/task-queue.js +97 -0
  79. package/dist/src/monitor/types.js +62 -0
  80. package/dist/src/native/bridge-client.js +190 -0
  81. package/{src/native/macos-bridge-client.ts → dist/src/native/macos-bridge-client.js} +0 -1
  82. package/dist/src/playbook/engine.js +201 -0
  83. package/dist/src/playbook/index.js +20 -0
  84. package/dist/src/playbook/recorder.js +535 -0
  85. package/dist/src/playbook/runner.js +408 -0
  86. package/dist/src/playbook/store.js +183 -0
  87. package/dist/src/playbook/types.js +17 -0
  88. package/dist/src/runtime/accessibility-adapter.js +393 -0
  89. package/dist/src/runtime/app-adapter.js +64 -0
  90. package/dist/src/runtime/applescript-adapter.js +299 -0
  91. package/dist/src/runtime/ax-role-map.js +96 -0
  92. package/dist/src/runtime/browser-adapter.js +52 -0
  93. package/dist/src/runtime/cdp-chrome-adapter.js +521 -0
  94. package/dist/src/runtime/composite-adapter.js +221 -0
  95. package/dist/src/runtime/execution-contract.js +159 -0
  96. package/dist/src/runtime/executor.js +266 -0
  97. package/{src/runtime/locator-cache.ts → dist/src/runtime/locator-cache.js} +10 -15
  98. package/dist/src/runtime/planning-loop.js +63 -0
  99. package/dist/src/runtime/service.js +388 -0
  100. package/dist/src/runtime/session-manager.js +60 -0
  101. package/dist/src/runtime/state-observer.js +121 -0
  102. package/dist/src/runtime/vision-adapter.js +224 -0
  103. package/dist/src/supervisor/locks.js +186 -0
  104. package/dist/src/supervisor/supervisor.js +403 -0
  105. package/dist/src/supervisor/types.js +30 -0
  106. package/dist/src/test-mcp-protocol.js +154 -0
  107. package/dist/src/types.js +17 -0
  108. package/dist/src/util/atomic-write.js +118 -0
  109. package/dist/test-mcp-protocol.js +138 -0
  110. package/dist/types.js +1 -0
  111. package/package.json +18 -4
  112. package/.claude/commands/automate.md +0 -28
  113. package/.claude/commands/debug-ui.md +0 -19
  114. package/.claude/commands/screenshot.md +0 -15
  115. package/.github/FUNDING.yml +0 -1
  116. package/.github/ISSUE_TEMPLATE/bug_report.md +0 -27
  117. package/.github/ISSUE_TEMPLATE/feature_request.md +0 -20
  118. package/.mcp.json +0 -8
  119. package/DESKTOP_MCP_GUIDE.md +0 -92
  120. package/SECURITY.md +0 -44
  121. package/docs/architecture.md +0 -47
  122. package/install-skills.sh +0 -19
  123. package/mcp-bridge.ts +0 -271
  124. package/mcp-desktop.ts +0 -1221
  125. package/native/macos-bridge/Package.swift +0 -21
  126. package/native/macos-bridge/Sources/AccessibilityBridge.swift +0 -261
  127. package/native/macos-bridge/Sources/AppManagement.swift +0 -129
  128. package/native/macos-bridge/Sources/CoreGraphicsBridge.swift +0 -242
  129. package/native/macos-bridge/Sources/ObserverBridge.swift +0 -120
  130. package/native/macos-bridge/Sources/VisionBridge.swift +0 -80
  131. package/native/macos-bridge/Sources/main.swift +0 -345
  132. package/native/windows-bridge/AppManagement.cs +0 -234
  133. package/native/windows-bridge/InputBridge.cs +0 -436
  134. package/native/windows-bridge/Program.cs +0 -265
  135. package/native/windows-bridge/ScreenCapture.cs +0 -329
  136. package/native/windows-bridge/UIAutomationBridge.cs +0 -571
  137. package/native/windows-bridge/WindowsBridge.csproj +0 -17
  138. package/playbooks/devpost.json +0 -186
  139. package/playbooks/instagram.json +0 -41
  140. package/playbooks/instagram_v2.json +0 -201
  141. package/playbooks/x_v1.json +0 -211
  142. package/scripts/devpost-live-loop.mjs +0 -421
  143. package/src/logging/timeline-logger.ts +0 -55
  144. package/src/mcp/server.ts +0 -449
  145. package/src/memory/recall.ts +0 -191
  146. package/src/memory/research.ts +0 -146
  147. package/src/memory/seeds.ts +0 -123
  148. package/src/memory/session.ts +0 -201
  149. package/src/memory/store.ts +0 -434
  150. package/src/memory/types.ts +0 -69
  151. package/src/native/bridge-client.ts +0 -239
  152. package/src/runtime/accessibility-adapter.ts +0 -487
  153. package/src/runtime/app-adapter.ts +0 -169
  154. package/src/runtime/applescript-adapter.ts +0 -376
  155. package/src/runtime/ax-role-map.ts +0 -102
  156. package/src/runtime/browser-adapter.ts +0 -129
  157. package/src/runtime/cdp-chrome-adapter.ts +0 -676
  158. package/src/runtime/composite-adapter.ts +0 -274
  159. package/src/runtime/executor.ts +0 -396
  160. package/src/runtime/planning-loop.ts +0 -81
  161. package/src/runtime/service.ts +0 -448
  162. package/src/runtime/session-manager.ts +0 -50
  163. package/src/runtime/state-observer.ts +0 -136
  164. package/src/runtime/vision-adapter.ts +0 -297
  165. package/src/types.ts +0 -297
  166. package/tests/bridge-client.test.ts +0 -176
  167. package/tests/browser-stealth.test.ts +0 -210
  168. package/tests/composite-adapter.test.ts +0 -64
  169. package/tests/mcp-server.test.ts +0 -151
  170. package/tests/memory-recall.test.ts +0 -339
  171. package/tests/memory-research.test.ts +0 -159
  172. package/tests/memory-seeds.test.ts +0 -120
  173. package/tests/memory-store.test.ts +0 -392
  174. package/tests/types.test.ts +0 -92
  175. package/tsconfig.check.json +0 -17
  176. package/tsconfig.json +0 -19
  177. package/vitest.config.ts +0 -8
package/mcp-bridge.ts DELETED
@@ -1,271 +0,0 @@
1
- #!/usr/bin/env npx tsx
2
- /**
3
- * ScreenHand Bridge — exposes the native bridge as MCP tools.
4
- * Claude Code can call these directly as tool calls — no API key needed.
5
- *
6
- * Add to .claude/settings.json:
7
- * {
8
- * "mcpServers": {
9
- * "screenhand": {
10
- * "command": "npx",
11
- * "args": ["tsx", "/path/to/screenhand/mcp-bridge.ts"]
12
- * }
13
- * }
14
- * }
15
- */
16
-
17
- import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
18
- import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
19
- import { z } from "zod";
20
- import path from "node:path";
21
- import { BridgeClient } from "./src/native/bridge-client.js";
22
-
23
- const bridgePath = process.platform === "win32"
24
- ? path.resolve(import.meta.dirname ?? process.cwd(), "native/windows-bridge/bin/Release/net8.0-windows/windows-bridge.exe")
25
- : path.resolve(import.meta.dirname ?? process.cwd(), "native/macos-bridge/.build/release/macos-bridge");
26
-
27
- const bridge = new BridgeClient(bridgePath);
28
- let bridgeReady = false;
29
-
30
- async function ensureBridge() {
31
- if (!bridgeReady) {
32
- await bridge.start();
33
- bridgeReady = true;
34
- }
35
- }
36
-
37
- const server = new McpServer({
38
- name: "screenhand",
39
- version: "1.0.0",
40
- });
41
-
42
- // ── Apps ──
43
-
44
- server.tool("apps", "List all running applications", {}, async () => {
45
- await ensureBridge();
46
- const apps = await bridge.call<any[]>("app.list");
47
- return { content: [{ type: "text", text: JSON.stringify(apps, null, 2) }] };
48
- });
49
-
50
- server.tool("windows", "List all visible windows with IDs and positions", {}, async () => {
51
- await ensureBridge();
52
- const wins = await bridge.call<any[]>("app.windows");
53
- return { content: [{ type: "text", text: JSON.stringify(wins, null, 2) }] };
54
- });
55
-
56
- server.tool("focus", "Focus/activate an application by bundle ID", {
57
- bundleId: z.string().describe("App bundle ID, e.g. com.apple.Safari"),
58
- }, async ({ bundleId }) => {
59
- await ensureBridge();
60
- await bridge.call("app.focus", { bundleId });
61
- return { content: [{ type: "text", text: "Focused " + bundleId }] };
62
- });
63
-
64
- server.tool("launch", "Launch an application by bundle ID", {
65
- bundleId: z.string().describe("App bundle ID"),
66
- }, async ({ bundleId }) => {
67
- await ensureBridge();
68
- const r = await bridge.call<any>("app.launch", { bundleId });
69
- return { content: [{ type: "text", text: JSON.stringify(r) }] };
70
- });
71
-
72
- // ── Screenshot + OCR ──
73
-
74
- server.tool("screenshot", "Screenshot a window (or full screen) and OCR it. Returns visible text.", {
75
- windowId: z.number().optional().describe("Window ID to capture. Omit for full screen."),
76
- }, async ({ windowId }) => {
77
- await ensureBridge();
78
- let shotPath: string;
79
- if (windowId) {
80
- const s = await bridge.call<any>("cg.captureWindow", { windowId });
81
- shotPath = s.path;
82
- } else {
83
- const s = await bridge.call<any>("cg.captureScreen");
84
- shotPath = s.path;
85
- }
86
- const ocr = await bridge.call<any>("vision.ocr", { imagePath: shotPath });
87
- return { content: [{ type: "text", text: ocr.text }] };
88
- });
89
-
90
- server.tool("ocr_regions", "Screenshot + OCR with detailed region positions (bounds, confidence)", {
91
- windowId: z.number().optional().describe("Window ID. Omit for full screen."),
92
- }, async ({ windowId }) => {
93
- await ensureBridge();
94
- let shotPath: string;
95
- let imgW: number, imgH: number;
96
- if (windowId) {
97
- const s = await bridge.call<any>("cg.captureWindow", { windowId });
98
- shotPath = s.path; imgW = s.width; imgH = s.height;
99
- } else {
100
- const s = await bridge.call<any>("cg.captureScreen");
101
- shotPath = s.path; imgW = s.width; imgH = s.height;
102
- }
103
- const ocr = await bridge.call<any>("vision.ocr", { imagePath: shotPath });
104
-
105
- // Also get window bounds if windowId provided (for coordinate mapping)
106
- let winInfo: any = null;
107
- if (windowId) {
108
- const wins = await bridge.call<any[]>("app.windows");
109
- winInfo = wins.find((w: any) => w.windowId === windowId);
110
- }
111
-
112
- return {
113
- content: [{
114
- type: "text",
115
- text: JSON.stringify({
116
- text: ocr.text,
117
- regions: ocr.regions,
118
- image: { width: imgW, height: imgH },
119
- window: winInfo?.bounds || null,
120
- }, null, 2),
121
- }],
122
- };
123
- });
124
-
125
- // ── Input ──
126
-
127
- server.tool("click", "Click at screen coordinates", {
128
- x: z.number().describe("Screen X coordinate"),
129
- y: z.number().describe("Screen Y coordinate"),
130
- }, async ({ x, y }) => {
131
- await ensureBridge();
132
- await bridge.call("cg.mouseClick", { x, y });
133
- return { content: [{ type: "text", text: "Clicked (" + x + ", " + y + ")" }] };
134
- });
135
-
136
- server.tool("click_text", "Find text on a window via OCR and click it. Handles Retina + shadow coordinate mapping.", {
137
- windowId: z.number().describe("Window ID to search in"),
138
- text: z.string().describe("Text to find and click"),
139
- offset_y: z.number().optional().describe("Y offset in screen points from text center. Use -25 to click icon above a label."),
140
- }, async ({ windowId, text, offset_y }) => {
141
- await ensureBridge();
142
-
143
- const wins = await bridge.call<any[]>("app.windows");
144
- const win = wins.find((w: any) => w.windowId === windowId);
145
- if (!win) return { content: [{ type: "text", text: "Window " + windowId + " not found" }] };
146
- const wb = win.bounds;
147
-
148
- const shot = await bridge.call<any>("cg.captureWindow", { windowId });
149
- const ocr = await bridge.call<any>("vision.ocr", { imagePath: shot.path });
150
-
151
- const match = ocr.regions.find((r: any) =>
152
- r.text.toLowerCase().includes(text.toLowerCase())
153
- );
154
- if (!match) {
155
- const available = ocr.regions.map((r: any) => r.text).join(", ");
156
- return { content: [{ type: "text", text: "'" + text + "' not found. Available: " + available }] };
157
- }
158
-
159
- // Shadow-corrected Retina coordinate mapping
160
- const contentW = wb.width * 2;
161
- const contentH = wb.height * 2;
162
- const shadowL = (shot.width - contentW) / 2;
163
- const shadowT = (shot.height - contentH) / 3;
164
- const imgCx = match.bounds.x + match.bounds.width / 2;
165
- const imgCy = match.bounds.y + match.bounds.height / 2;
166
- const sx = wb.x + (imgCx - shadowL) / 2;
167
- const sy = wb.y + (imgCy - shadowT) / 2 + (offset_y || 0);
168
-
169
- await bridge.call("cg.mouseMove", { x: sx, y: sy });
170
- await new Promise(r => setTimeout(r, 100));
171
- await bridge.call("cg.mouseClick", { x: sx, y: sy });
172
-
173
- return { content: [{ type: "text", text: "Clicked '" + match.text + "' at (" + Math.round(sx) + ", " + Math.round(sy) + ")" }] };
174
- });
175
-
176
- server.tool("type_text", "Type text using keyboard", {
177
- text: z.string().describe("Text to type"),
178
- }, async ({ text }) => {
179
- await ensureBridge();
180
- await bridge.call("cg.typeText", { text });
181
- return { content: [{ type: "text", text: "Typed: " + text }] };
182
- });
183
-
184
- server.tool("key", "Press a key combination", {
185
- keys: z.string().describe("Key combo like 'cmd+c', 'enter', 'cmd+shift+n'. Use + to separate."),
186
- }, async ({ keys }) => {
187
- await ensureBridge();
188
- await bridge.call("cg.keyCombo", { keys: keys.split("+") });
189
- return { content: [{ type: "text", text: "Key: " + keys }] };
190
- });
191
-
192
- // ── Gestures ──
193
-
194
- server.tool("drag", "Drag from one point to another (slow, smooth)", {
195
- fromX: z.number(), fromY: z.number(),
196
- toX: z.number(), toY: z.number(),
197
- }, async ({ fromX, fromY, toX, toY }) => {
198
- await ensureBridge();
199
- await bridge.call("cg.mouseDrag", { fromX, fromY, toX, toY });
200
- return { content: [{ type: "text", text: "Dragged (" + fromX + "," + fromY + ") → (" + toX + "," + toY + ")" }] };
201
- });
202
-
203
- server.tool("flick", "Fast swipe/flick gesture (for iOS home gesture etc)", {
204
- fromX: z.number(), fromY: z.number(),
205
- toX: z.number(), toY: z.number(),
206
- }, async ({ fromX, fromY, toX, toY }) => {
207
- await ensureBridge();
208
- await bridge.call("cg.mouseFlick", { fromX, fromY, toX, toY });
209
- return { content: [{ type: "text", text: "Flicked (" + fromX + "," + fromY + ") → (" + toX + "," + toY + ")" }] };
210
- });
211
-
212
- server.tool("scroll", "Scroll at a position", {
213
- x: z.number(), y: z.number(),
214
- deltaX: z.number().describe("Horizontal scroll amount"),
215
- deltaY: z.number().describe("Vertical scroll amount (negative = scroll down)"),
216
- }, async ({ x, y, deltaX, deltaY }) => {
217
- await ensureBridge();
218
- await bridge.call("cg.scroll", { x, y, deltaX, deltaY });
219
- return { content: [{ type: "text", text: "Scrolled" }] };
220
- });
221
-
222
- // ── Accessibility ──
223
-
224
- server.tool("ax_tree", "Get the accessibility UI tree of an app", {
225
- pid: z.number().describe("Process ID of the app"),
226
- maxDepth: z.number().optional().describe("Max tree depth (default 3)"),
227
- }, async ({ pid, maxDepth }) => {
228
- await ensureBridge();
229
- const tree = await bridge.call<any>("ax.getElementTree", { pid, maxDepth: maxDepth || 3 });
230
- return { content: [{ type: "text", text: JSON.stringify(tree, null, 2) }] };
231
- });
232
-
233
- server.tool("ax_find", "Find a UI element by text/title in an app", {
234
- pid: z.number().describe("Process ID"),
235
- title: z.string().describe("Text to search for"),
236
- }, async ({ pid, title }) => {
237
- await ensureBridge();
238
- const r = await bridge.call<any>("ax.findElement", { pid, title, exact: false });
239
- return { content: [{ type: "text", text: JSON.stringify(r, null, 2) }] };
240
- });
241
-
242
- server.tool("ax_press", "Find a UI element by title and press/click it via accessibility", {
243
- pid: z.number().describe("Process ID"),
244
- title: z.string().describe("Element title to find and press"),
245
- }, async ({ pid, title }) => {
246
- await ensureBridge();
247
- const el = await bridge.call<any>("ax.findElement", { pid, title, exact: false });
248
- await bridge.call("ax.performAction", { pid, elementPath: el.elementPath, action: "AXPress" });
249
- return { content: [{ type: "text", text: "Pressed '" + el.title + "' (" + el.role + ")" }] };
250
- });
251
-
252
- server.tool("menu_click", "Click a menu item in an app's menu bar", {
253
- pid: z.number().describe("Process ID"),
254
- menuPath: z.string().describe("Menu path like 'File/New' or 'View/Home Screen'"),
255
- }, async ({ pid, menuPath }) => {
256
- await ensureBridge();
257
- await bridge.call("ax.menuClick", { pid, menuPath: menuPath.split("/") });
258
- return { content: [{ type: "text", text: "Menu: " + menuPath }] };
259
- });
260
-
261
- // ── Start ──
262
-
263
- async function main() {
264
- const transport = new StdioServerTransport();
265
- await server.connect(transport);
266
- }
267
-
268
- main().catch((err) => {
269
- console.error("MCP server error:", err);
270
- process.exit(1);
271
- });