screenhand 0.1.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (177) hide show
  1. package/README.md +458 -93
  2. package/dist/.audit-log.jsonl +55 -0
  3. package/dist/.screenhand/memory/.lock +1 -0
  4. package/dist/.screenhand/memory/actions.jsonl +85 -0
  5. package/dist/.screenhand/memory/errors.jsonl +5 -0
  6. package/dist/.screenhand/memory/errors.jsonl.bak +4 -0
  7. package/dist/.screenhand/memory/state.json +35 -0
  8. package/dist/.screenhand/memory/state.json.bak +35 -0
  9. package/dist/.screenhand/memory/strategies.jsonl +12 -0
  10. package/dist/agent/cli.js +73 -0
  11. package/dist/agent/loop.js +258 -0
  12. package/dist/config.js +9 -0
  13. package/dist/index.js +56 -0
  14. package/dist/logging/timeline-logger.js +29 -0
  15. package/dist/mcp/mcp-stdio-server.js +448 -0
  16. package/dist/mcp/server.js +347 -0
  17. package/dist/mcp-desktop.js +2731 -0
  18. package/dist/mcp-entry.js +59 -0
  19. package/dist/memory/recall.js +160 -0
  20. package/dist/memory/research.js +98 -0
  21. package/dist/memory/seeds.js +89 -0
  22. package/dist/memory/session.js +161 -0
  23. package/dist/memory/store.js +391 -0
  24. package/dist/memory/types.js +4 -0
  25. package/dist/monitor/codex-monitor.js +377 -0
  26. package/dist/monitor/task-queue.js +84 -0
  27. package/dist/monitor/types.js +49 -0
  28. package/dist/native/bridge-client.js +174 -0
  29. package/dist/native/macos-bridge-client.js +5 -0
  30. package/dist/npm-publish-helper.js +117 -0
  31. package/dist/npm-token-cdp.js +113 -0
  32. package/dist/npm-token-create.js +135 -0
  33. package/dist/npm-token-finish.js +126 -0
  34. package/dist/playbook/engine.js +193 -0
  35. package/dist/playbook/index.js +4 -0
  36. package/dist/playbook/recorder.js +519 -0
  37. package/dist/playbook/runner.js +392 -0
  38. package/dist/playbook/store.js +166 -0
  39. package/dist/playbook/types.js +4 -0
  40. package/dist/runtime/accessibility-adapter.js +377 -0
  41. package/dist/runtime/app-adapter.js +48 -0
  42. package/dist/runtime/applescript-adapter.js +283 -0
  43. package/dist/runtime/ax-role-map.js +80 -0
  44. package/dist/runtime/browser-adapter.js +36 -0
  45. package/dist/runtime/cdp-chrome-adapter.js +505 -0
  46. package/dist/runtime/composite-adapter.js +205 -0
  47. package/dist/runtime/executor.js +250 -0
  48. package/dist/runtime/locator-cache.js +12 -0
  49. package/dist/runtime/planning-loop.js +47 -0
  50. package/dist/runtime/service.js +372 -0
  51. package/dist/runtime/session-manager.js +28 -0
  52. package/dist/runtime/state-observer.js +105 -0
  53. package/dist/runtime/vision-adapter.js +208 -0
  54. package/dist/scripts/codex-monitor-daemon.js +335 -0
  55. package/dist/scripts/supervisor-daemon.js +272 -0
  56. package/dist/scripts/worker-daemon.js +228 -0
  57. package/dist/src/agent/cli.js +82 -0
  58. package/dist/src/agent/loop.js +274 -0
  59. package/{src/config.ts → dist/src/config.js} +5 -10
  60. package/{src/index.ts → dist/src/index.js} +32 -52
  61. package/dist/src/jobs/manager.js +237 -0
  62. package/dist/src/jobs/runner.js +683 -0
  63. package/dist/src/jobs/store.js +102 -0
  64. package/dist/src/jobs/types.js +30 -0
  65. package/dist/src/jobs/worker.js +97 -0
  66. package/dist/src/logging/timeline-logger.js +45 -0
  67. package/dist/src/mcp/mcp-stdio-server.js +464 -0
  68. package/dist/src/mcp/server.js +363 -0
  69. package/dist/src/mcp-entry.js +60 -0
  70. package/dist/src/memory/recall.js +170 -0
  71. package/dist/src/memory/research.js +104 -0
  72. package/dist/src/memory/seeds.js +101 -0
  73. package/dist/src/memory/service.js +421 -0
  74. package/dist/src/memory/session.js +169 -0
  75. package/dist/src/memory/store.js +422 -0
  76. package/dist/src/memory/types.js +17 -0
  77. package/dist/src/monitor/codex-monitor.js +382 -0
  78. package/dist/src/monitor/task-queue.js +97 -0
  79. package/dist/src/monitor/types.js +62 -0
  80. package/dist/src/native/bridge-client.js +190 -0
  81. package/{src/native/macos-bridge-client.ts → dist/src/native/macos-bridge-client.js} +0 -1
  82. package/dist/src/playbook/engine.js +201 -0
  83. package/dist/src/playbook/index.js +20 -0
  84. package/dist/src/playbook/recorder.js +535 -0
  85. package/dist/src/playbook/runner.js +408 -0
  86. package/dist/src/playbook/store.js +183 -0
  87. package/dist/src/playbook/types.js +17 -0
  88. package/dist/src/runtime/accessibility-adapter.js +393 -0
  89. package/dist/src/runtime/app-adapter.js +64 -0
  90. package/dist/src/runtime/applescript-adapter.js +299 -0
  91. package/dist/src/runtime/ax-role-map.js +96 -0
  92. package/dist/src/runtime/browser-adapter.js +52 -0
  93. package/dist/src/runtime/cdp-chrome-adapter.js +521 -0
  94. package/dist/src/runtime/composite-adapter.js +221 -0
  95. package/dist/src/runtime/execution-contract.js +159 -0
  96. package/dist/src/runtime/executor.js +266 -0
  97. package/{src/runtime/locator-cache.ts → dist/src/runtime/locator-cache.js} +10 -15
  98. package/dist/src/runtime/planning-loop.js +63 -0
  99. package/dist/src/runtime/service.js +388 -0
  100. package/dist/src/runtime/session-manager.js +60 -0
  101. package/dist/src/runtime/state-observer.js +121 -0
  102. package/dist/src/runtime/vision-adapter.js +224 -0
  103. package/dist/src/supervisor/locks.js +186 -0
  104. package/dist/src/supervisor/supervisor.js +403 -0
  105. package/dist/src/supervisor/types.js +30 -0
  106. package/dist/src/test-mcp-protocol.js +154 -0
  107. package/dist/src/types.js +17 -0
  108. package/dist/src/util/atomic-write.js +118 -0
  109. package/dist/test-mcp-protocol.js +138 -0
  110. package/dist/types.js +1 -0
  111. package/package.json +18 -4
  112. package/.claude/commands/automate.md +0 -28
  113. package/.claude/commands/debug-ui.md +0 -19
  114. package/.claude/commands/screenshot.md +0 -15
  115. package/.github/FUNDING.yml +0 -1
  116. package/.github/ISSUE_TEMPLATE/bug_report.md +0 -27
  117. package/.github/ISSUE_TEMPLATE/feature_request.md +0 -20
  118. package/.mcp.json +0 -8
  119. package/DESKTOP_MCP_GUIDE.md +0 -92
  120. package/SECURITY.md +0 -44
  121. package/docs/architecture.md +0 -47
  122. package/install-skills.sh +0 -19
  123. package/mcp-bridge.ts +0 -271
  124. package/mcp-desktop.ts +0 -1221
  125. package/native/macos-bridge/Package.swift +0 -21
  126. package/native/macos-bridge/Sources/AccessibilityBridge.swift +0 -261
  127. package/native/macos-bridge/Sources/AppManagement.swift +0 -129
  128. package/native/macos-bridge/Sources/CoreGraphicsBridge.swift +0 -242
  129. package/native/macos-bridge/Sources/ObserverBridge.swift +0 -120
  130. package/native/macos-bridge/Sources/VisionBridge.swift +0 -80
  131. package/native/macos-bridge/Sources/main.swift +0 -345
  132. package/native/windows-bridge/AppManagement.cs +0 -234
  133. package/native/windows-bridge/InputBridge.cs +0 -436
  134. package/native/windows-bridge/Program.cs +0 -265
  135. package/native/windows-bridge/ScreenCapture.cs +0 -329
  136. package/native/windows-bridge/UIAutomationBridge.cs +0 -571
  137. package/native/windows-bridge/WindowsBridge.csproj +0 -17
  138. package/playbooks/devpost.json +0 -186
  139. package/playbooks/instagram.json +0 -41
  140. package/playbooks/instagram_v2.json +0 -201
  141. package/playbooks/x_v1.json +0 -211
  142. package/scripts/devpost-live-loop.mjs +0 -421
  143. package/src/logging/timeline-logger.ts +0 -55
  144. package/src/mcp/server.ts +0 -449
  145. package/src/memory/recall.ts +0 -191
  146. package/src/memory/research.ts +0 -146
  147. package/src/memory/seeds.ts +0 -123
  148. package/src/memory/session.ts +0 -201
  149. package/src/memory/store.ts +0 -434
  150. package/src/memory/types.ts +0 -69
  151. package/src/native/bridge-client.ts +0 -239
  152. package/src/runtime/accessibility-adapter.ts +0 -487
  153. package/src/runtime/app-adapter.ts +0 -169
  154. package/src/runtime/applescript-adapter.ts +0 -376
  155. package/src/runtime/ax-role-map.ts +0 -102
  156. package/src/runtime/browser-adapter.ts +0 -129
  157. package/src/runtime/cdp-chrome-adapter.ts +0 -676
  158. package/src/runtime/composite-adapter.ts +0 -274
  159. package/src/runtime/executor.ts +0 -396
  160. package/src/runtime/planning-loop.ts +0 -81
  161. package/src/runtime/service.ts +0 -448
  162. package/src/runtime/session-manager.ts +0 -50
  163. package/src/runtime/state-observer.ts +0 -136
  164. package/src/runtime/vision-adapter.ts +0 -297
  165. package/src/types.ts +0 -297
  166. package/tests/bridge-client.test.ts +0 -176
  167. package/tests/browser-stealth.test.ts +0 -210
  168. package/tests/composite-adapter.test.ts +0 -64
  169. package/tests/mcp-server.test.ts +0 -151
  170. package/tests/memory-recall.test.ts +0 -339
  171. package/tests/memory-research.test.ts +0 -159
  172. package/tests/memory-seeds.test.ts +0 -120
  173. package/tests/memory-store.test.ts +0 -392
  174. package/tests/types.test.ts +0 -92
  175. package/tsconfig.check.json +0 -17
  176. package/tsconfig.json +0 -19
  177. package/vitest.config.ts +0 -8
@@ -0,0 +1,117 @@
1
+ #!/usr/bin/env tsx
2
+ /**
3
+ * Helper script: Uses ScreenHand to navigate npm tokens page in Safari.
4
+ * Spawns the MCP server, starts a session, and automates Safari.
5
+ */
6
+ import { spawn } from "node:child_process";
7
+ import { createInterface } from "node:readline";
8
+ import path from "node:path";
9
+ import { fileURLToPath } from "node:url";
10
+ const __dirname = path.dirname(fileURLToPath(import.meta.url));
11
+ const projectRoot = path.resolve(__dirname, "..");
12
+ const tsxBin = path.join(projectRoot, "node_modules", ".bin", "tsx");
13
+ const TIMEOUT_MS = 15_000;
14
+ const proc = spawn(tsxBin, [path.join(projectRoot, "src/mcp-entry.ts")], {
15
+ stdio: ["pipe", "pipe", "pipe"],
16
+ env: { ...process.env, SCREENHAND_ADAPTER: "accessibility" },
17
+ cwd: projectRoot,
18
+ });
19
+ let stderrBuf = "";
20
+ proc.stderr.on("data", (d) => {
21
+ stderrBuf += d.toString();
22
+ process.stderr.write(d);
23
+ });
24
+ let msgId = 0;
25
+ function send(msg) {
26
+ proc.stdin.write(JSON.stringify(msg) + "\n");
27
+ }
28
+ const rl = createInterface({ input: proc.stdout });
29
+ const lineQueue = [];
30
+ let lineWaiter = null;
31
+ rl.on("line", (line) => {
32
+ if (lineWaiter) {
33
+ const w = lineWaiter;
34
+ lineWaiter = null;
35
+ w(line);
36
+ }
37
+ else {
38
+ lineQueue.push(line);
39
+ }
40
+ });
41
+ function readResponse() {
42
+ return new Promise((resolve, reject) => {
43
+ const timer = setTimeout(() => {
44
+ lineWaiter = null;
45
+ reject(new Error(`Timeout. stderr: ${stderrBuf.slice(-500)}`));
46
+ }, TIMEOUT_MS);
47
+ const handle = (line) => {
48
+ clearTimeout(timer);
49
+ resolve(JSON.parse(line));
50
+ };
51
+ const queued = lineQueue.shift();
52
+ if (queued) {
53
+ clearTimeout(timer);
54
+ resolve(JSON.parse(queued));
55
+ }
56
+ else {
57
+ lineWaiter = handle;
58
+ }
59
+ });
60
+ }
61
+ async function callTool(name, args) {
62
+ const id = ++msgId;
63
+ send({ jsonrpc: "2.0", id, method: "tools/call", params: { name, arguments: args } });
64
+ const resp = await readResponse();
65
+ const result = resp.result;
66
+ const content = result?.content;
67
+ const data = content?.[0]?.text ? JSON.parse(content[0].text) : null;
68
+ if (result?.isError) {
69
+ console.error(` ERROR: ${content?.[0]?.text}`);
70
+ }
71
+ return data;
72
+ }
73
+ try {
74
+ // Wait for server to start
75
+ await new Promise((r) => setTimeout(r, 2000));
76
+ // Initialize MCP
77
+ send({
78
+ jsonrpc: "2.0", id: ++msgId, method: "initialize",
79
+ params: { protocolVersion: "2024-11-05", capabilities: {}, clientInfo: { name: "npm-helper", version: "1.0" } },
80
+ });
81
+ await readResponse();
82
+ send({ jsonrpc: "2.0", method: "notifications/initialized" });
83
+ await new Promise((r) => setTimeout(r, 300));
84
+ // Start session
85
+ console.log("Starting session...");
86
+ const session = await callTool("session_start", {});
87
+ const sid = session.sessionId;
88
+ console.log(` Session: ${sid}`);
89
+ // Launch Safari
90
+ console.log("\nLaunching Safari...");
91
+ await callTool("app_launch", { sessionId: sid, bundleId: "com.apple.Safari" });
92
+ await new Promise((r) => setTimeout(r, 1500));
93
+ // Focus Safari
94
+ console.log("Focusing Safari...");
95
+ await callTool("app_focus", { sessionId: sid, bundleId: "com.apple.Safari" });
96
+ await new Promise((r) => setTimeout(r, 500));
97
+ // Navigate to npm tokens page
98
+ console.log("Navigating to npm tokens page...");
99
+ await callTool("navigate", { sessionId: sid, url: "https://www.npmjs.com/settings/ai2hum/tokens" });
100
+ await new Promise((r) => setTimeout(r, 3000));
101
+ // Take screenshot to see the page
102
+ console.log("\nTaking screenshot...");
103
+ const screenshot = await callTool("screenshot", { sessionId: sid });
104
+ console.log(" Screenshot:", JSON.stringify(screenshot));
105
+ // Get element tree to see what's on the page
106
+ console.log("\nGetting element tree...");
107
+ const tree = await callTool("element_tree", { sessionId: sid, maxDepth: 4 });
108
+ console.log(" Tree:", JSON.stringify(tree, null, 2).slice(0, 3000));
109
+ console.log("\n--- Page loaded. Inspect the output above to determine next steps. ---");
110
+ proc.kill();
111
+ process.exit(0);
112
+ }
113
+ catch (e) {
114
+ console.error("Error:", e instanceof Error ? e.message : String(e));
115
+ proc.kill();
116
+ process.exit(1);
117
+ }
@@ -0,0 +1,113 @@
1
+ #!/usr/bin/env tsx
2
+ /**
3
+ * Uses ScreenHand with CDP adapter to create npm token in Chrome.
4
+ */
5
+ import { spawn } from "node:child_process";
6
+ import { createInterface } from "node:readline";
7
+ import path from "node:path";
8
+ import { fileURLToPath } from "node:url";
9
+ const __dirname = path.dirname(fileURLToPath(import.meta.url));
10
+ const projectRoot = path.resolve(__dirname, "..");
11
+ const tsxBin = path.join(projectRoot, "node_modules", ".bin", "tsx");
12
+ const proc = spawn(tsxBin, [path.join(projectRoot, "src/mcp-entry.ts")], {
13
+ stdio: ["pipe", "pipe", "pipe"],
14
+ env: { ...process.env, SCREENHAND_ADAPTER: "cdp" },
15
+ cwd: projectRoot,
16
+ });
17
+ proc.stderr.on("data", (d) => process.stderr.write(d));
18
+ let msgId = 0;
19
+ function send(msg) {
20
+ proc.stdin.write(JSON.stringify(msg) + "\n");
21
+ }
22
+ const rl = createInterface({ input: proc.stdout });
23
+ const lineQueue = [];
24
+ let lineWaiter = null;
25
+ rl.on("line", (line) => {
26
+ if (lineWaiter) {
27
+ const w = lineWaiter;
28
+ lineWaiter = null;
29
+ w(line);
30
+ }
31
+ else
32
+ lineQueue.push(line);
33
+ });
34
+ function readResponse() {
35
+ return new Promise((resolve, reject) => {
36
+ const timer = setTimeout(() => { lineWaiter = null; reject(new Error("Timeout")); }, 30000);
37
+ const handle = (line) => { clearTimeout(timer); resolve(JSON.parse(line)); };
38
+ const queued = lineQueue.shift();
39
+ if (queued) {
40
+ clearTimeout(timer);
41
+ resolve(JSON.parse(queued));
42
+ }
43
+ else
44
+ lineWaiter = handle;
45
+ });
46
+ }
47
+ async function callTool(name, args) {
48
+ send({ jsonrpc: "2.0", id: ++msgId, method: "tools/call", params: { name, arguments: args } });
49
+ const resp = await readResponse();
50
+ const result = resp.result;
51
+ const content = result?.content;
52
+ const text = content?.[0]?.text ?? "{}";
53
+ if (result?.isError) {
54
+ console.error(` ERROR: ${text}`);
55
+ return null;
56
+ }
57
+ try {
58
+ return JSON.parse(text);
59
+ }
60
+ catch {
61
+ return text;
62
+ }
63
+ }
64
+ async function sleep(ms) { await new Promise(r => setTimeout(r, ms)); }
65
+ try {
66
+ await sleep(3000);
67
+ send({ jsonrpc: "2.0", id: ++msgId, method: "initialize",
68
+ params: { protocolVersion: "2024-11-05", capabilities: {}, clientInfo: { name: "npm-cdp", version: "1.0" } } });
69
+ await readResponse();
70
+ send({ jsonrpc: "2.0", method: "notifications/initialized" });
71
+ await sleep(300);
72
+ const session = await callTool("session_start", {});
73
+ if (!session) {
74
+ console.error("Failed to start session");
75
+ process.exit(1);
76
+ }
77
+ const sid = session.sessionId;
78
+ console.log(`Session: ${sid}`);
79
+ // Navigate to npm tokens page
80
+ console.log("\nNavigating to npm tokens page...");
81
+ await callTool("navigate", { sessionId: sid, url: "https://www.npmjs.com/settings/ai2hum/tokens/new" });
82
+ await sleep(3000);
83
+ // Take screenshot
84
+ const ss1 = await callTool("screenshot", { sessionId: sid });
85
+ console.log("Screenshot:", JSON.stringify(ss1).slice(0, 200));
86
+ // Type token name
87
+ console.log("\nTyping token name...");
88
+ await callTool("type_into", { sessionId: sid, target: "css=#token-name", text: "screenhand-publish" });
89
+ await sleep(500);
90
+ // Check bypass 2FA
91
+ console.log("\nChecking bypass 2FA...");
92
+ await callTool("press", { sessionId: sid, target: "css=#bypass-2fa" });
93
+ await sleep(500);
94
+ // Set packages permission to "Read and write"
95
+ console.log("\nSetting packages permission...");
96
+ // Find the packages permissions select and change it
97
+ await callTool("press", { sessionId: sid, target: "css=select[name='packages-permission']" });
98
+ await sleep(300);
99
+ // Screenshot to see current state
100
+ const ss2 = await callTool("screenshot", { sessionId: sid });
101
+ console.log("Screenshot:", JSON.stringify(ss2).slice(0, 200));
102
+ // Try to get page HTML to understand the form structure
103
+ console.log("\nGetting page structure...");
104
+ const html = await callTool("extract", { sessionId: sid, target: "css=form", format: "text" });
105
+ console.log("Form text:", JSON.stringify(html).slice(0, 2000));
106
+ proc.kill();
107
+ process.exit(0);
108
+ }
109
+ catch (e) {
110
+ console.error("Error:", e instanceof Error ? e.message : String(e));
111
+ proc.kill();
112
+ process.exit(1);
113
+ }
@@ -0,0 +1,135 @@
1
+ #!/usr/bin/env tsx
2
+ import { spawn } from "node:child_process";
3
+ import { createInterface } from "node:readline";
4
+ import path from "node:path";
5
+ import { fileURLToPath } from "node:url";
6
+ const __dirname = path.dirname(fileURLToPath(import.meta.url));
7
+ const projectRoot = path.resolve(__dirname, "..");
8
+ const tsxBin = path.join(projectRoot, "node_modules", ".bin", "tsx");
9
+ const proc = spawn(tsxBin, [path.join(projectRoot, "src/mcp-entry.ts")], {
10
+ stdio: ["pipe", "pipe", "pipe"],
11
+ env: { ...process.env, SCREENHAND_ADAPTER: "accessibility" },
12
+ cwd: projectRoot,
13
+ });
14
+ proc.stderr.on("data", (d) => process.stderr.write(d));
15
+ let msgId = 0;
16
+ function send(msg) {
17
+ proc.stdin.write(JSON.stringify(msg) + "\n");
18
+ }
19
+ const rl = createInterface({ input: proc.stdout });
20
+ const lineQueue = [];
21
+ let lineWaiter = null;
22
+ rl.on("line", (line) => {
23
+ if (lineWaiter) {
24
+ const w = lineWaiter;
25
+ lineWaiter = null;
26
+ w(line);
27
+ }
28
+ else
29
+ lineQueue.push(line);
30
+ });
31
+ function readResponse() {
32
+ return new Promise((resolve, reject) => {
33
+ const timer = setTimeout(() => { lineWaiter = null; reject(new Error("Timeout")); }, 15000);
34
+ const handle = (line) => { clearTimeout(timer); resolve(JSON.parse(line)); };
35
+ const queued = lineQueue.shift();
36
+ if (queued) {
37
+ clearTimeout(timer);
38
+ resolve(JSON.parse(queued));
39
+ }
40
+ else
41
+ lineWaiter = handle;
42
+ });
43
+ }
44
+ async function callTool(name, args) {
45
+ send({ jsonrpc: "2.0", id: ++msgId, method: "tools/call", params: { name, arguments: args } });
46
+ const resp = await readResponse();
47
+ const result = resp.result;
48
+ const content = result?.content;
49
+ const text = content?.[0]?.text ?? "{}";
50
+ if (result?.isError)
51
+ console.error(` ERROR: ${text}`);
52
+ try {
53
+ return JSON.parse(text);
54
+ }
55
+ catch {
56
+ return text;
57
+ }
58
+ }
59
+ async function screenshot(sid) {
60
+ const result = await callTool("screenshot", { sessionId: sid });
61
+ const data = result.data;
62
+ return data?.path ?? "";
63
+ }
64
+ async function sleep(ms) { await new Promise(r => setTimeout(r, ms)); }
65
+ try {
66
+ await sleep(2000);
67
+ // Init MCP
68
+ send({ jsonrpc: "2.0", id: ++msgId, method: "initialize",
69
+ params: { protocolVersion: "2024-11-05", capabilities: {}, clientInfo: { name: "npm-helper", version: "1.0" } } });
70
+ await readResponse();
71
+ send({ jsonrpc: "2.0", method: "notifications/initialized" });
72
+ await sleep(300);
73
+ // Start session
74
+ console.log("Starting session...");
75
+ const session = await callTool("session_start", {});
76
+ const sid = session.sessionId;
77
+ console.log(` Session: ${sid}`);
78
+ // Focus Safari (should already be on the tokens page)
79
+ console.log("\nFocusing Safari...");
80
+ await callTool("app_focus", { sessionId: sid, bundleId: "com.apple.Safari" });
81
+ await sleep(500);
82
+ // Step 1: Type token name
83
+ console.log("\nTyping token name...");
84
+ await callTool("press", { sessionId: sid, target: "Token name" });
85
+ await sleep(300);
86
+ await callTool("type_into", { sessionId: sid, target: "Token name", text: "screenhand-publish" });
87
+ await sleep(500);
88
+ // Step 2: Check "Bypass two-factor authentication (2FA)"
89
+ console.log("\nChecking bypass 2FA...");
90
+ await callTool("press", { sessionId: sid, target: "Bypass two-factor authentication (2FA)" });
91
+ await sleep(500);
92
+ // Step 3: Scroll down to see more options
93
+ console.log("\nScrolling down...");
94
+ await callTool("scroll", { sessionId: sid, direction: "down", amount: 5 });
95
+ await sleep(1000);
96
+ // Screenshot to see current state
97
+ console.log("\nScreenshot after scroll...");
98
+ const path1 = await screenshot(sid);
99
+ console.log(` Saved: ${path1}`);
100
+ // Step 4: Look for packages section - need to set permissions
101
+ console.log("\nScrolling down more...");
102
+ await callTool("scroll", { sessionId: sid, direction: "down", amount: 5 });
103
+ await sleep(1000);
104
+ const path2 = await screenshot(sid);
105
+ console.log(` Screenshot: ${path2}`);
106
+ // Try to find and click "Read and write" for packages permission
107
+ console.log("\nLooking for package permissions...");
108
+ await callTool("press", { sessionId: sid, target: "Read and write" });
109
+ await sleep(500);
110
+ // Scroll down to find Generate Token button
111
+ console.log("\nScrolling to Generate Token...");
112
+ await callTool("scroll", { sessionId: sid, direction: "down", amount: 5 });
113
+ await sleep(1000);
114
+ const path3 = await screenshot(sid);
115
+ console.log(` Screenshot: ${path3}`);
116
+ // Click Generate Token
117
+ console.log("\nClicking Generate Token...");
118
+ await callTool("press", { sessionId: sid, target: "Generate Token" });
119
+ await sleep(3000);
120
+ // Take final screenshot to see the token
121
+ console.log("\nFinal screenshot...");
122
+ const pathFinal = await screenshot(sid);
123
+ console.log(` Screenshot: ${pathFinal}`);
124
+ // Try to extract the token text from the page
125
+ console.log("\nExtracting page content...");
126
+ const tree = await callTool("element_tree", { sessionId: sid, maxDepth: 6 });
127
+ console.log(JSON.stringify(tree, null, 2).slice(0, 5000));
128
+ proc.kill();
129
+ process.exit(0);
130
+ }
131
+ catch (e) {
132
+ console.error("Error:", e instanceof Error ? e.message : String(e));
133
+ proc.kill();
134
+ process.exit(1);
135
+ }
@@ -0,0 +1,126 @@
1
+ #!/usr/bin/env tsx
2
+ import { spawn } from "node:child_process";
3
+ import { createInterface } from "node:readline";
4
+ import path from "node:path";
5
+ import { fileURLToPath } from "node:url";
6
+ const __dirname = path.dirname(fileURLToPath(import.meta.url));
7
+ const projectRoot = path.resolve(__dirname, "..");
8
+ const tsxBin = path.join(projectRoot, "node_modules", ".bin", "tsx");
9
+ const proc = spawn(tsxBin, [path.join(projectRoot, "src/mcp-entry.ts")], {
10
+ stdio: ["pipe", "pipe", "pipe"],
11
+ env: { ...process.env, SCREENHAND_ADAPTER: "accessibility" },
12
+ cwd: projectRoot,
13
+ });
14
+ proc.stderr.on("data", (d) => process.stderr.write(d));
15
+ let msgId = 0;
16
+ function send(msg) {
17
+ proc.stdin.write(JSON.stringify(msg) + "\n");
18
+ }
19
+ const rl = createInterface({ input: proc.stdout });
20
+ const lineQueue = [];
21
+ let lineWaiter = null;
22
+ rl.on("line", (line) => {
23
+ if (lineWaiter) {
24
+ const w = lineWaiter;
25
+ lineWaiter = null;
26
+ w(line);
27
+ }
28
+ else
29
+ lineQueue.push(line);
30
+ });
31
+ function readResponse() {
32
+ return new Promise((resolve, reject) => {
33
+ const timer = setTimeout(() => { lineWaiter = null; reject(new Error("Timeout")); }, 15000);
34
+ const handle = (line) => { clearTimeout(timer); resolve(JSON.parse(line)); };
35
+ const queued = lineQueue.shift();
36
+ if (queued) {
37
+ clearTimeout(timer);
38
+ resolve(JSON.parse(queued));
39
+ }
40
+ else
41
+ lineWaiter = handle;
42
+ });
43
+ }
44
+ async function callTool(name, args) {
45
+ send({ jsonrpc: "2.0", id: ++msgId, method: "tools/call", params: { name, arguments: args } });
46
+ const resp = await readResponse();
47
+ const result = resp.result;
48
+ const content = result?.content;
49
+ const text = content?.[0]?.text ?? "{}";
50
+ if (result?.isError)
51
+ console.error(` ERROR: ${text}`);
52
+ try {
53
+ return JSON.parse(text);
54
+ }
55
+ catch {
56
+ return text;
57
+ }
58
+ }
59
+ async function screenshot(sid, label) {
60
+ const result = await callTool("screenshot", { sessionId: sid });
61
+ const data = result.data;
62
+ const p = data?.path ?? "";
63
+ console.log(` [${label}] ${p}`);
64
+ return p;
65
+ }
66
+ async function sleep(ms) { await new Promise(r => setTimeout(r, ms)); }
67
+ try {
68
+ await sleep(2000);
69
+ send({ jsonrpc: "2.0", id: ++msgId, method: "initialize",
70
+ params: { protocolVersion: "2024-11-05", capabilities: {}, clientInfo: { name: "npm-helper", version: "1.0" } } });
71
+ await readResponse();
72
+ send({ jsonrpc: "2.0", method: "notifications/initialized" });
73
+ await sleep(300);
74
+ const session = await callTool("session_start", {});
75
+ const sid = session.sessionId;
76
+ console.log(`Session: ${sid}`);
77
+ // Focus Safari
78
+ await callTool("app_focus", { sessionId: sid, bundleId: "com.apple.Safari" });
79
+ await sleep(500);
80
+ // Click the Packages "No access" dropdown to change permissions
81
+ console.log("\n1. Clicking Packages permissions dropdown...");
82
+ // The dropdown shows "No access" under "Packages and scopes"
83
+ await callTool("press", { sessionId: sid, target: { x: 220, y: 280 } });
84
+ await sleep(500);
85
+ await screenshot(sid, "after clicking packages dropdown");
86
+ // Try selecting "Read and write" from the dropdown
87
+ console.log("\n2. Selecting Read and write...");
88
+ await callTool("press", { sessionId: sid, target: "Read and write" });
89
+ await sleep(500);
90
+ await screenshot(sid, "after selecting read and write");
91
+ // Now we need to select which package - type "screenhand"
92
+ console.log("\n3. Looking for package selector...");
93
+ await callTool("scroll", { sessionId: sid, direction: "down", amount: 3 });
94
+ await sleep(500);
95
+ await screenshot(sid, "after scroll");
96
+ // Try to find a package input or "select packages" area
97
+ console.log("\n4. Trying to add screenhand package...");
98
+ await callTool("press", { sessionId: sid, target: "select" });
99
+ await sleep(300);
100
+ // Type screenhand in whatever input appeared
101
+ await callTool("key_combo", { sessionId: sid, keys: ["s", "c", "r", "e", "e", "n", "h", "a", "n", "d"] });
102
+ await sleep(1000);
103
+ await screenshot(sid, "after typing screenhand");
104
+ // Try to select it from autocomplete
105
+ await callTool("press", { sessionId: sid, target: "screenhand" });
106
+ await sleep(500);
107
+ // Scroll down to Generate Token
108
+ console.log("\n5. Scrolling to Generate Token...");
109
+ await callTool("scroll", { sessionId: sid, direction: "down", amount: 10 });
110
+ await sleep(500);
111
+ await screenshot(sid, "before generate");
112
+ // Click Generate Token
113
+ console.log("\n6. Clicking Generate Token...");
114
+ await callTool("press", { sessionId: sid, target: "Generate Token" });
115
+ await sleep(3000);
116
+ // Final screenshot - should show the token
117
+ console.log("\n7. Final result...");
118
+ const finalPath = await screenshot(sid, "FINAL");
119
+ proc.kill();
120
+ process.exit(0);
121
+ }
122
+ catch (e) {
123
+ console.error("Error:", e instanceof Error ? e.message : String(e));
124
+ proc.kill();
125
+ process.exit(1);
126
+ }
@@ -0,0 +1,193 @@
1
+ /**
2
+ * Playbook Engine — executes playbooks step-by-step
3
+ *
4
+ * Known path → playbook (fast, deterministic, no AI)
5
+ * Unknown state → AI fallback (slow, adaptive, learns)
6
+ *
7
+ * After AI recovers, the recovery steps get saved back into the playbook.
8
+ */
9
+ const DEFAULT_VERIFY_TIMEOUT = 5000;
10
+ const STEP_DELAY_MS = 300;
11
+ export class PlaybookEngine {
12
+ runtime;
13
+ constructor(runtime) {
14
+ this.runtime = runtime;
15
+ }
16
+ /**
17
+ * Execute a playbook against a live session.
18
+ * Returns result with success/failure and which step broke.
19
+ */
20
+ async run(sessionId, playbook, options = {}) {
21
+ const start = Date.now();
22
+ let stepsCompleted = 0;
23
+ for (let i = 0; i < playbook.steps.length; i++) {
24
+ const step = playbook.steps[i];
25
+ try {
26
+ const result = await this.executeStep(sessionId, step);
27
+ stepsCompleted++;
28
+ if (options.onStep) {
29
+ options.onStep(i, step, result);
30
+ }
31
+ // Verify step if needed
32
+ if (step.verify) {
33
+ const verified = await this.verifyStep(sessionId, step);
34
+ if (!verified && !step.optional) {
35
+ return {
36
+ playbook: playbook.id,
37
+ success: false,
38
+ stepsCompleted,
39
+ totalSteps: playbook.steps.length,
40
+ failedAtStep: i,
41
+ error: `Verification failed at step ${i}: ${step.description ?? step.action}`,
42
+ durationMs: Date.now() - start,
43
+ };
44
+ }
45
+ }
46
+ // Small delay between steps for UI to settle
47
+ await sleep(STEP_DELAY_MS);
48
+ }
49
+ catch (err) {
50
+ if (step.optional) {
51
+ stepsCompleted++;
52
+ if (options.onStep) {
53
+ options.onStep(i, step, `Skipped (optional): ${err instanceof Error ? err.message : String(err)}`);
54
+ }
55
+ continue;
56
+ }
57
+ return {
58
+ playbook: playbook.id,
59
+ success: false,
60
+ stepsCompleted,
61
+ totalSteps: playbook.steps.length,
62
+ failedAtStep: i,
63
+ error: err instanceof Error ? err.message : String(err),
64
+ durationMs: Date.now() - start,
65
+ };
66
+ }
67
+ }
68
+ return {
69
+ playbook: playbook.id,
70
+ success: true,
71
+ stepsCompleted,
72
+ totalSteps: playbook.steps.length,
73
+ failedAtStep: -1,
74
+ durationMs: Date.now() - start,
75
+ };
76
+ }
77
+ /**
78
+ * Execute a single playbook step.
79
+ */
80
+ async executeStep(sessionId, step) {
81
+ const target = this.resolveTarget(step.target);
82
+ switch (step.action) {
83
+ case "navigate": {
84
+ if (!step.url)
85
+ throw new Error("navigate step missing url");
86
+ const r = await this.runtime.navigate({ sessionId, url: step.url });
87
+ if (!r.ok)
88
+ throw new Error(r.error.message);
89
+ return `Navigated to ${step.url}`;
90
+ }
91
+ case "press": {
92
+ if (!target)
93
+ throw new Error("press step missing target");
94
+ const r = await this.runtime.press({ sessionId, target });
95
+ if (!r.ok)
96
+ throw new Error(r.error.message);
97
+ return `Pressed ${JSON.stringify(step.target)}`;
98
+ }
99
+ case "type_into": {
100
+ if (!target)
101
+ throw new Error("type_into step missing target");
102
+ if (!step.text)
103
+ throw new Error("type_into step missing text");
104
+ const r = await this.runtime.typeInto({ sessionId, target, text: step.text });
105
+ if (!r.ok)
106
+ throw new Error(r.error.message);
107
+ return `Typed "${step.text}" into ${JSON.stringify(step.target)}`;
108
+ }
109
+ case "extract": {
110
+ if (!target)
111
+ throw new Error("extract step missing target");
112
+ const r = await this.runtime.extract({
113
+ sessionId,
114
+ target,
115
+ format: step.format ?? "text",
116
+ });
117
+ if (!r.ok)
118
+ throw new Error(r.error.message);
119
+ return `Extracted: ${JSON.stringify(r.data).slice(0, 200)}`;
120
+ }
121
+ case "key_combo": {
122
+ if (!step.keys || step.keys.length === 0)
123
+ throw new Error("key_combo step missing keys");
124
+ const r = await this.runtime.keyCombo({ sessionId, keys: step.keys });
125
+ if (!r.ok)
126
+ throw new Error(r.error.message);
127
+ return `Key combo: ${step.keys.join("+")}`;
128
+ }
129
+ case "scroll": {
130
+ const input = {
131
+ sessionId,
132
+ direction: step.direction ?? "down",
133
+ };
134
+ if (step.amount != null)
135
+ input.amount = step.amount;
136
+ const r = await this.runtime.scroll(input);
137
+ if (!r.ok)
138
+ throw new Error(r.error.message);
139
+ return `Scrolled ${step.direction ?? "down"}`;
140
+ }
141
+ case "wait": {
142
+ await sleep(step.ms ?? 1000);
143
+ return `Waited ${step.ms ?? 1000}ms`;
144
+ }
145
+ case "screenshot": {
146
+ const r = await this.runtime.screenshot({ sessionId });
147
+ if (!r.ok)
148
+ throw new Error(r.error.message);
149
+ return `Screenshot taken`;
150
+ }
151
+ default:
152
+ throw new Error(`Unknown action: ${step.action}`);
153
+ }
154
+ }
155
+ /**
156
+ * Verify a step's postcondition via CSS selector check.
157
+ */
158
+ async verifyStep(sessionId, step) {
159
+ if (!step.verify)
160
+ return true;
161
+ const timeout = step.verifyTimeoutMs ?? DEFAULT_VERIFY_TIMEOUT;
162
+ const r = await this.runtime.waitFor({
163
+ sessionId,
164
+ condition: { type: "selector_visible", selector: step.verify },
165
+ timeoutMs: timeout,
166
+ });
167
+ return r.ok && r.data.matched;
168
+ }
169
+ /**
170
+ * Convert playbook target format to runtime Target format.
171
+ */
172
+ resolveTarget(target) {
173
+ if (!target)
174
+ return undefined;
175
+ if (typeof target === "string") {
176
+ // CSS selector if starts with common patterns, else treat as text
177
+ if (target.startsWith("[") || target.startsWith("#") || target.startsWith(".") || target.startsWith("css=")) {
178
+ return { type: "selector", value: target.replace(/^css=/, "") };
179
+ }
180
+ return { type: "text", value: target };
181
+ }
182
+ if ("selector" in target) {
183
+ return { type: "selector", value: target.selector };
184
+ }
185
+ if ("x" in target && "y" in target) {
186
+ return { type: "coordinates", x: target.x, y: target.y };
187
+ }
188
+ return undefined;
189
+ }
190
+ }
191
+ function sleep(ms) {
192
+ return new Promise((resolve) => setTimeout(resolve, ms));
193
+ }
@@ -0,0 +1,4 @@
1
+ export { PlaybookEngine } from "./engine.js";
2
+ export { PlaybookStore } from "./store.js";
3
+ export { PlaybookRunner } from "./runner.js";
4
+ export { PlaybookRecorder } from "./recorder.js";