screenhand 0.1.1 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (241) hide show
  1. package/README.md +193 -109
  2. package/bin/darwin-arm64/macos-bridge +0 -0
  3. package/dist/mcp-desktop.js +5876 -0
  4. package/dist/scripts/codex-monitor-daemon.js +335 -0
  5. package/dist/scripts/export-help-center.js +112 -0
  6. package/dist/scripts/marketing-loop.js +117 -0
  7. package/dist/scripts/observer-daemon.js +288 -0
  8. package/dist/scripts/orchestrator-daemon.js +399 -0
  9. package/dist/scripts/supervisor-daemon.js +272 -0
  10. package/dist/scripts/threads-campaign.js +208 -0
  11. package/dist/scripts/worker-daemon.js +228 -0
  12. package/dist/src/agent/cli.js +82 -0
  13. package/dist/src/agent/loop.js +274 -0
  14. package/dist/src/community/fetcher.js +109 -0
  15. package/dist/src/community/index.js +6 -0
  16. package/dist/src/community/publisher.js +191 -0
  17. package/dist/src/community/remote-api.js +121 -0
  18. package/dist/src/community/types.js +3 -0
  19. package/dist/src/community/validator.js +95 -0
  20. package/{src/config.ts → dist/src/config.js} +5 -10
  21. package/dist/src/context-tracker.js +489 -0
  22. package/{src/index.ts → dist/src/index.js} +32 -52
  23. package/dist/src/ingestion/coverage-auditor.js +233 -0
  24. package/dist/src/ingestion/doc-parser.js +164 -0
  25. package/dist/src/ingestion/index.js +8 -0
  26. package/dist/src/ingestion/menu-scanner.js +152 -0
  27. package/dist/src/ingestion/reference-merger.js +186 -0
  28. package/dist/src/ingestion/shortcut-extractor.js +180 -0
  29. package/dist/src/ingestion/tutorial-extractor.js +170 -0
  30. package/dist/src/ingestion/types.js +3 -0
  31. package/dist/src/jobs/manager.js +305 -0
  32. package/dist/src/jobs/runner.js +806 -0
  33. package/dist/src/jobs/store.js +102 -0
  34. package/dist/src/jobs/types.js +30 -0
  35. package/dist/src/jobs/worker.js +97 -0
  36. package/dist/src/learning/engine.js +356 -0
  37. package/dist/src/learning/index.js +9 -0
  38. package/dist/src/learning/locator-policy.js +120 -0
  39. package/dist/src/learning/pattern-policy.js +89 -0
  40. package/dist/src/learning/recovery-policy.js +116 -0
  41. package/dist/src/learning/sensor-policy.js +115 -0
  42. package/dist/src/learning/timing-model.js +204 -0
  43. package/dist/src/learning/topology-policy.js +90 -0
  44. package/dist/src/learning/types.js +9 -0
  45. package/dist/src/logging/timeline-logger.js +48 -0
  46. package/dist/src/mcp/mcp-stdio-server.js +464 -0
  47. package/dist/src/mcp/server.js +363 -0
  48. package/dist/src/mcp-entry.js +60 -0
  49. package/dist/src/memory/playbook-seeds.js +200 -0
  50. package/dist/src/memory/recall.js +222 -0
  51. package/dist/src/memory/research.js +104 -0
  52. package/dist/src/memory/seeds.js +101 -0
  53. package/dist/src/memory/service.js +446 -0
  54. package/dist/src/memory/session.js +169 -0
  55. package/dist/src/memory/store.js +451 -0
  56. package/{src/runtime/locator-cache.ts → dist/src/memory/types.js} +1 -17
  57. package/dist/src/monitor/codex-monitor.js +382 -0
  58. package/dist/src/monitor/task-queue.js +97 -0
  59. package/dist/src/monitor/types.js +62 -0
  60. package/dist/src/native/bridge-client.js +412 -0
  61. package/{src/native/macos-bridge-client.ts → dist/src/native/macos-bridge-client.js} +0 -1
  62. package/dist/src/observer/state.js +199 -0
  63. package/dist/src/observer/types.js +43 -0
  64. package/dist/src/orchestrator/state.js +68 -0
  65. package/dist/src/orchestrator/types.js +22 -0
  66. package/dist/src/perception/ax-source.js +162 -0
  67. package/dist/src/perception/cdp-source.js +162 -0
  68. package/dist/src/perception/coordinator.js +771 -0
  69. package/dist/src/perception/frame-differ.js +287 -0
  70. package/dist/src/perception/index.js +22 -0
  71. package/dist/src/perception/manager.js +199 -0
  72. package/dist/src/perception/types.js +47 -0
  73. package/dist/src/perception/vision-source.js +399 -0
  74. package/dist/src/planner/deterministic.js +298 -0
  75. package/dist/src/planner/executor.js +870 -0
  76. package/dist/src/planner/goal-store.js +92 -0
  77. package/dist/src/planner/index.js +21 -0
  78. package/dist/src/planner/planner.js +520 -0
  79. package/dist/src/planner/tool-registry.js +71 -0
  80. package/dist/src/planner/types.js +22 -0
  81. package/dist/src/platform/explorer.js +213 -0
  82. package/dist/src/platform/help-center-markdown.js +527 -0
  83. package/dist/src/platform/learner.js +257 -0
  84. package/dist/src/playbook/engine.js +486 -0
  85. package/dist/src/playbook/index.js +20 -0
  86. package/dist/src/playbook/mcp-recorder.js +204 -0
  87. package/dist/src/playbook/recorder.js +536 -0
  88. package/dist/src/playbook/runner.js +408 -0
  89. package/dist/src/playbook/store.js +312 -0
  90. package/dist/src/playbook/types.js +17 -0
  91. package/dist/src/recovery/detectors.js +156 -0
  92. package/dist/src/recovery/engine.js +327 -0
  93. package/dist/src/recovery/index.js +20 -0
  94. package/dist/src/recovery/strategies.js +274 -0
  95. package/dist/src/recovery/types.js +20 -0
  96. package/dist/src/runtime/accessibility-adapter.js +430 -0
  97. package/dist/src/runtime/app-adapter.js +64 -0
  98. package/dist/src/runtime/applescript-adapter.js +305 -0
  99. package/dist/src/runtime/ax-role-map.js +96 -0
  100. package/dist/src/runtime/browser-adapter.js +52 -0
  101. package/dist/src/runtime/cdp-chrome-adapter.js +521 -0
  102. package/dist/src/runtime/composite-adapter.js +221 -0
  103. package/dist/src/runtime/execution-contract.js +159 -0
  104. package/dist/src/runtime/executor.js +286 -0
  105. package/dist/src/runtime/locator-cache.js +50 -0
  106. package/dist/src/runtime/planning-loop.js +63 -0
  107. package/dist/src/runtime/service.js +432 -0
  108. package/dist/src/runtime/session-manager.js +63 -0
  109. package/dist/src/runtime/state-observer.js +121 -0
  110. package/dist/src/runtime/vision-adapter.js +225 -0
  111. package/dist/src/state/app-map-types.js +72 -0
  112. package/dist/src/state/app-map.js +1974 -0
  113. package/dist/src/state/entity-tracker.js +108 -0
  114. package/dist/src/state/fusion.js +96 -0
  115. package/dist/src/state/index.js +21 -0
  116. package/dist/src/state/ladder-generator.js +236 -0
  117. package/dist/src/state/persistence.js +156 -0
  118. package/dist/src/state/types.js +17 -0
  119. package/dist/src/state/world-model.js +1456 -0
  120. package/dist/src/supervisor/locks.js +186 -0
  121. package/dist/src/supervisor/supervisor.js +403 -0
  122. package/dist/src/supervisor/types.js +30 -0
  123. package/dist/src/test-mcp-protocol.js +154 -0
  124. package/dist/src/types.js +17 -0
  125. package/dist/src/util/atomic-write.js +133 -0
  126. package/dist/src/util/sanitize.js +146 -0
  127. package/dist-app-maps/com.figma.Desktop.json +959 -0
  128. package/dist-app-maps/com.hnc.Discord.json +1146 -0
  129. package/dist-app-maps/notion.id.json +2831 -0
  130. package/dist-playbooks/canva-screenhand-carousel.json +445 -0
  131. package/dist-playbooks/codex-desktop.json +76 -0
  132. package/dist-playbooks/competitor-research-stack.json +122 -0
  133. package/dist-playbooks/davinci-color-grade.json +153 -0
  134. package/dist-playbooks/davinci-edit-timeline.json +162 -0
  135. package/dist-playbooks/davinci-render.json +114 -0
  136. package/dist-playbooks/devto.json +52 -0
  137. package/dist-playbooks/discord.json +41 -0
  138. package/dist-playbooks/google-flow-create-project.json +59 -0
  139. package/dist-playbooks/google-flow-edit-image.json +90 -0
  140. package/dist-playbooks/google-flow-edit-video.json +90 -0
  141. package/dist-playbooks/google-flow-generate-image.json +68 -0
  142. package/dist-playbooks/google-flow-generate-video.json +191 -0
  143. package/dist-playbooks/google-flow-open-project.json +48 -0
  144. package/dist-playbooks/google-flow-open-scenebuilder.json +64 -0
  145. package/dist-playbooks/google-flow-search-assets.json +64 -0
  146. package/dist-playbooks/instagram.json +57 -0
  147. package/dist-playbooks/linkedin.json +52 -0
  148. package/dist-playbooks/n8n.json +43 -0
  149. package/dist-playbooks/reddit.json +52 -0
  150. package/dist-playbooks/threads.json +59 -0
  151. package/dist-playbooks/x-twitter.json +59 -0
  152. package/dist-playbooks/youtube.json +59 -0
  153. package/dist-references/canva.json +646 -0
  154. package/dist-references/codex-desktop.json +305 -0
  155. package/dist-references/davinci-resolve-keyboard.json +594 -0
  156. package/dist-references/davinci-resolve-menu-map.json +1139 -0
  157. package/dist-references/davinci-resolve-menus-batch1.json +116 -0
  158. package/dist-references/davinci-resolve-menus-batch2.json +372 -0
  159. package/dist-references/davinci-resolve-menus-batch3.json +330 -0
  160. package/dist-references/davinci-resolve-menus-batch4.json +297 -0
  161. package/dist-references/davinci-resolve-shortcuts.json +333 -0
  162. package/dist-references/devto.json +317 -0
  163. package/dist-references/discord.json +549 -0
  164. package/dist-references/figma.json +1186 -0
  165. package/dist-references/finder.json +146 -0
  166. package/dist-references/google-ads-transparency.json +95 -0
  167. package/dist-references/google-flow.json +649 -0
  168. package/dist-references/instagram.json +341 -0
  169. package/dist-references/linkedin.json +324 -0
  170. package/dist-references/meta-ad-library.json +86 -0
  171. package/dist-references/n8n.json +387 -0
  172. package/dist-references/notes.json +27 -0
  173. package/dist-references/notion.json +163 -0
  174. package/dist-references/reddit.json +341 -0
  175. package/dist-references/threads.json +337 -0
  176. package/dist-references/x-twitter.json +403 -0
  177. package/dist-references/youtube.json +373 -0
  178. package/native/macos-bridge/Package.swift +1 -0
  179. package/native/macos-bridge/Sources/AccessibilityBridge.swift +257 -36
  180. package/native/macos-bridge/Sources/AppManagement.swift +212 -2
  181. package/native/macos-bridge/Sources/CoreGraphicsBridge.swift +348 -53
  182. package/native/macos-bridge/Sources/StreamCapture.swift +136 -0
  183. package/native/macos-bridge/Sources/VisionBridge.swift +165 -7
  184. package/native/macos-bridge/Sources/main.swift +169 -16
  185. package/native/windows-bridge/Program.cs +5 -0
  186. package/native/windows-bridge/ScreenCapture.cs +124 -0
  187. package/package.json +29 -4
  188. package/scripts/postinstall.cjs +127 -0
  189. package/.claude/commands/automate.md +0 -28
  190. package/.claude/commands/debug-ui.md +0 -19
  191. package/.claude/commands/screenshot.md +0 -15
  192. package/.github/FUNDING.yml +0 -1
  193. package/.github/ISSUE_TEMPLATE/bug_report.md +0 -27
  194. package/.github/ISSUE_TEMPLATE/feature_request.md +0 -20
  195. package/.mcp.json +0 -8
  196. package/DESKTOP_MCP_GUIDE.md +0 -92
  197. package/SECURITY.md +0 -44
  198. package/docs/architecture.md +0 -47
  199. package/install-skills.sh +0 -19
  200. package/mcp-bridge.ts +0 -271
  201. package/mcp-desktop.ts +0 -1221
  202. package/playbooks/instagram.json +0 -41
  203. package/playbooks/instagram_v2.json +0 -201
  204. package/playbooks/x_v1.json +0 -211
  205. package/scripts/devpost-live-loop.mjs +0 -421
  206. package/src/logging/timeline-logger.ts +0 -55
  207. package/src/mcp/server.ts +0 -449
  208. package/src/memory/recall.ts +0 -191
  209. package/src/memory/research.ts +0 -146
  210. package/src/memory/seeds.ts +0 -123
  211. package/src/memory/session.ts +0 -201
  212. package/src/memory/store.ts +0 -434
  213. package/src/memory/types.ts +0 -69
  214. package/src/native/bridge-client.ts +0 -239
  215. package/src/runtime/accessibility-adapter.ts +0 -487
  216. package/src/runtime/app-adapter.ts +0 -169
  217. package/src/runtime/applescript-adapter.ts +0 -376
  218. package/src/runtime/ax-role-map.ts +0 -102
  219. package/src/runtime/browser-adapter.ts +0 -129
  220. package/src/runtime/cdp-chrome-adapter.ts +0 -676
  221. package/src/runtime/composite-adapter.ts +0 -274
  222. package/src/runtime/executor.ts +0 -396
  223. package/src/runtime/planning-loop.ts +0 -81
  224. package/src/runtime/service.ts +0 -448
  225. package/src/runtime/session-manager.ts +0 -50
  226. package/src/runtime/state-observer.ts +0 -136
  227. package/src/runtime/vision-adapter.ts +0 -297
  228. package/src/types.ts +0 -297
  229. package/tests/bridge-client.test.ts +0 -176
  230. package/tests/browser-stealth.test.ts +0 -210
  231. package/tests/composite-adapter.test.ts +0 -64
  232. package/tests/mcp-server.test.ts +0 -151
  233. package/tests/memory-recall.test.ts +0 -339
  234. package/tests/memory-research.test.ts +0 -159
  235. package/tests/memory-seeds.test.ts +0 -120
  236. package/tests/memory-store.test.ts +0 -392
  237. package/tests/types.test.ts +0 -92
  238. package/tsconfig.check.json +0 -17
  239. package/tsconfig.json +0 -19
  240. package/vitest.config.ts +0 -8
  241. /package/{playbooks → dist-references}/devpost.json +0 -0
@@ -0,0 +1,486 @@
1
+ // Copyright (C) 2025 Clazro Technology Private Limited
2
+ // SPDX-License-Identifier: AGPL-3.0-only
3
+ //
4
+ // This file is part of ScreenHand.
5
+ //
6
+ // ScreenHand is free software: you can redistribute it and/or modify
7
+ // it under the terms of the GNU Affero General Public License as
8
+ // published by the Free Software Foundation, version 3.
9
+ //
10
+ // ScreenHand is distributed in the hope that it will be useful,
11
+ // but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ // GNU Affero General Public License for more details.
14
+ //
15
+ // You should have received a copy of the GNU Affero General Public License
16
+ // along with ScreenHand. If not, see <https://www.gnu.org/licenses/>.
17
+ import { readObserverState, getObserverPopup } from "../observer/state.js";
18
+ const DEFAULT_VERIFY_TIMEOUT = 5000;
19
+ const STEP_DELAY_MS = 300;
20
+ export class PlaybookEngine {
21
+ runtime;
22
+ cdpConnect;
23
+ /** Enable observer-based popup checks before each step */
24
+ popupCheckEnabled = false;
25
+ constructor(runtime) {
26
+ this.runtime = runtime;
27
+ }
28
+ /** Enable/disable pre-step popup detection via observer daemon */
29
+ setPopupCheck(enabled) {
30
+ this.popupCheckEnabled = enabled;
31
+ }
32
+ /** Set CDP connection factory for browser_js and cdp_key_event actions. Factory accepts optional port override. */
33
+ setCDPConnect(factory) {
34
+ this.cdpConnect = factory;
35
+ }
36
+ /**
37
+ * Execute a playbook against a live session.
38
+ * Returns result with success/failure and which step broke.
39
+ */
40
+ async run(sessionId, playbook, options = {}) {
41
+ const start = Date.now();
42
+ let stepsCompleted = 0;
43
+ for (let i = 0; i < playbook.steps.length; i++) {
44
+ let step = options.vars ? this.substituteVars(playbook.steps[i], options.vars) : playbook.steps[i];
45
+ try {
46
+ // Pre-step: check for popups via observer (if enabled, non-blocking)
47
+ if (this.popupCheckEnabled) {
48
+ await this.dismissPopupIfPresent(sessionId);
49
+ }
50
+ // OCR-based locate: resolve locateByOcr to coordinates before execution
51
+ if (step.locateByOcr) {
52
+ const coords = this.resolveOcrTarget(step.locateByOcr, step.offsetX ?? 0, step.offsetY ?? 0);
53
+ if (coords) {
54
+ step = { ...step, target: { x: coords.x, y: coords.y } };
55
+ }
56
+ }
57
+ const result = await this.executeStep(sessionId, step, playbook.cdpPort);
58
+ stepsCompleted++;
59
+ if (options.onStep) {
60
+ options.onStep(i, step, result);
61
+ }
62
+ // Verify step if needed
63
+ if (step.verify) {
64
+ const verified = await this.verifyStep(sessionId, step);
65
+ if (!verified && !step.optional) {
66
+ return {
67
+ playbook: playbook.id,
68
+ success: false,
69
+ stepsCompleted,
70
+ totalSteps: playbook.steps.length,
71
+ failedAtStep: i,
72
+ error: `Verification failed at step ${i}: ${step.description ?? step.action}`,
73
+ durationMs: Date.now() - start,
74
+ };
75
+ }
76
+ }
77
+ // Small delay between steps for UI to settle
78
+ await sleep(STEP_DELAY_MS);
79
+ }
80
+ catch (err) {
81
+ if (step.optional) {
82
+ stepsCompleted++;
83
+ if (options.onStep) {
84
+ options.onStep(i, step, `Skipped (optional): ${err instanceof Error ? err.message : String(err)}`);
85
+ }
86
+ continue;
87
+ }
88
+ return {
89
+ playbook: playbook.id,
90
+ success: false,
91
+ stepsCompleted,
92
+ totalSteps: playbook.steps.length,
93
+ failedAtStep: i,
94
+ error: err instanceof Error ? err.message : String(err),
95
+ durationMs: Date.now() - start,
96
+ };
97
+ }
98
+ }
99
+ return {
100
+ playbook: playbook.id,
101
+ success: true,
102
+ stepsCompleted,
103
+ totalSteps: playbook.steps.length,
104
+ failedAtStep: -1,
105
+ durationMs: Date.now() - start,
106
+ };
107
+ }
108
+ /**
109
+ * Execute a single playbook step.
110
+ */
111
+ async executeStep(sessionId, step, cdpPort) {
112
+ const target = this.resolveTarget(step.target);
113
+ switch (step.action) {
114
+ case "navigate": {
115
+ if (!step.url)
116
+ throw new Error("navigate step missing url");
117
+ const r = await this.runtime.navigate({ sessionId, url: step.url });
118
+ if (!r.ok)
119
+ throw new Error(r.error.message);
120
+ return `Navigated to ${step.url}`;
121
+ }
122
+ case "press": {
123
+ if (!target)
124
+ throw new Error("press step missing target");
125
+ const r = await this.runtime.press({ sessionId, target });
126
+ if (!r.ok)
127
+ throw new Error(r.error.message);
128
+ return `Pressed ${JSON.stringify(step.target)}`;
129
+ }
130
+ case "type_into": {
131
+ if (!step.text)
132
+ throw new Error("type_into step missing text");
133
+ if (target) {
134
+ const r = await this.runtime.typeInto({ sessionId, target, text: step.text });
135
+ if (!r.ok)
136
+ throw new Error(r.error.message);
137
+ return `Typed "${step.text}" into ${JSON.stringify(step.target)}`;
138
+ }
139
+ // No target — type into focused element character by character via key events
140
+ for (const char of step.text) {
141
+ const r = await this.runtime.keyCombo({ sessionId, keys: [char] });
142
+ if (!r.ok)
143
+ throw new Error(r.error?.message ?? "key event failed");
144
+ }
145
+ return `Typed "${step.text}" into focused element`;
146
+ }
147
+ case "extract": {
148
+ if (!target)
149
+ throw new Error("extract step missing target");
150
+ const r = await this.runtime.extract({
151
+ sessionId,
152
+ target,
153
+ format: step.format ?? "text",
154
+ });
155
+ if (!r.ok)
156
+ throw new Error(r.error.message);
157
+ return `Extracted: ${JSON.stringify(r.data).slice(0, 200)}`;
158
+ }
159
+ case "key":
160
+ case "key_combo": {
161
+ if (!step.keys || step.keys.length === 0)
162
+ throw new Error(`${step.action} step missing keys`);
163
+ const r = await this.runtime.keyCombo({ sessionId, keys: step.keys });
164
+ if (!r.ok)
165
+ throw new Error(r.error.message);
166
+ return `${step.action === "key" ? "Key" : "Key combo"}: ${step.keys.join("+")}`;
167
+ }
168
+ case "menu_click": {
169
+ if (!step.menuPath || step.menuPath.length === 0)
170
+ throw new Error("menu_click step missing menuPath");
171
+ const r = await this.runtime.menuClick({ sessionId, menuPath: step.menuPath });
172
+ if (!r.ok)
173
+ throw new Error(r.error.message);
174
+ return `Menu click: ${step.menuPath.join(" > ")}`;
175
+ }
176
+ case "scroll": {
177
+ const input = {
178
+ sessionId,
179
+ direction: step.direction ?? "down",
180
+ };
181
+ if (step.amount != null)
182
+ input.amount = step.amount;
183
+ const r = await this.runtime.scroll(input);
184
+ if (!r.ok)
185
+ throw new Error(r.error.message);
186
+ return `Scrolled ${step.direction ?? "down"}`;
187
+ }
188
+ case "wait": {
189
+ await sleep(step.ms ?? 1000);
190
+ return `Waited ${step.ms ?? 1000}ms`;
191
+ }
192
+ case "screenshot": {
193
+ const r = await this.runtime.screenshot({ sessionId });
194
+ if (!r.ok)
195
+ throw new Error(r.error.message);
196
+ return `Screenshot taken`;
197
+ }
198
+ case "browser_js": {
199
+ if (!step.code)
200
+ throw new Error("browser_js step missing code");
201
+ if (!this.cdpConnect)
202
+ throw new Error("browser_js requires CDP — call setCDPConnect() first");
203
+ const client = await this.cdpConnect(cdpPort);
204
+ try {
205
+ const result = await client.Runtime.evaluate({
206
+ expression: step.code,
207
+ awaitPromise: true,
208
+ returnByValue: true,
209
+ });
210
+ if (result.exceptionDetails) {
211
+ throw new Error(`JS Error: ${result.exceptionDetails.text ?? result.exceptionDetails.exception?.description ?? "unknown"}`);
212
+ }
213
+ const val = result.result?.value;
214
+ return `browser_js: ${typeof val === "object" ? JSON.stringify(val) : String(val ?? "undefined")}`;
215
+ }
216
+ finally {
217
+ await client.close();
218
+ }
219
+ }
220
+ case "browser_click":
221
+ case "browser_human_click": {
222
+ const selector = this.getBrowserSelector(step);
223
+ if (!this.cdpConnect)
224
+ throw new Error(`${step.action} requires CDP — call setCDPConnect() first`);
225
+ const client = await this.cdpConnect(cdpPort);
226
+ try {
227
+ const point = await this.resolveBrowserClickPoint(client, selector);
228
+ await this.dispatchMouseClick(client, point.x, point.y);
229
+ return `${step.action}: clicked ${selector}`;
230
+ }
231
+ finally {
232
+ await client.close();
233
+ }
234
+ }
235
+ case "browser_type": {
236
+ const selector = this.getBrowserSelector(step);
237
+ if (!step.text)
238
+ throw new Error("browser_type step missing text");
239
+ if (!this.cdpConnect)
240
+ throw new Error("browser_type requires CDP — call setCDPConnect() first");
241
+ const client = await this.cdpConnect(cdpPort);
242
+ try {
243
+ await this.focusBrowserElement(client, selector);
244
+ const shouldClear = step.text !== undefined;
245
+ if (shouldClear) {
246
+ await this.dispatchSelectAll(client);
247
+ await this.dispatchKey(client, "Backspace", "Backspace");
248
+ await sleep(50);
249
+ }
250
+ for (const char of step.text) {
251
+ await this.dispatchTextChar(client, char);
252
+ await sleep(50);
253
+ }
254
+ return `browser_type: typed ${step.text.length} chars into ${selector}`;
255
+ }
256
+ finally {
257
+ await client.close();
258
+ }
259
+ }
260
+ case "cdp_key_event": {
261
+ if (!step.keyEvent)
262
+ throw new Error("cdp_key_event step missing keyEvent");
263
+ if (!this.cdpConnect)
264
+ throw new Error("cdp_key_event requires CDP — call setCDPConnect() first");
265
+ const client = await this.cdpConnect(cdpPort);
266
+ try {
267
+ const { key, code, modifiers, windowsVirtualKeyCode } = step.keyEvent;
268
+ const baseParams = { key, code, modifiers: modifiers ?? 0, windowsVirtualKeyCode: windowsVirtualKeyCode ?? 0, nativeVirtualKeyCode: windowsVirtualKeyCode ?? 0 };
269
+ await client.Input.dispatchKeyEvent({ type: "keyDown", ...baseParams });
270
+ await client.Input.dispatchKeyEvent({ type: "keyUp", ...baseParams });
271
+ return `cdp_key_event: ${modifiers ? `mod${modifiers}+` : ""}${key}`;
272
+ }
273
+ finally {
274
+ await client.close();
275
+ }
276
+ }
277
+ default:
278
+ throw new Error(`Unknown action: ${step.action}`);
279
+ }
280
+ }
281
+ /**
282
+ * Substitute {VAR_NAME} placeholders in step string fields with actual values.
283
+ */
284
+ substituteVars(step, vars) {
285
+ const sub = (s) => {
286
+ let result = s;
287
+ for (const [key, val] of Object.entries(vars)) {
288
+ result = result.replaceAll(`{${key}}`, val);
289
+ }
290
+ return result;
291
+ };
292
+ const result = { ...step };
293
+ if (result.code)
294
+ result.code = sub(result.code);
295
+ if (result.text)
296
+ result.text = sub(result.text);
297
+ if (result.url)
298
+ result.url = sub(result.url);
299
+ if (result.description)
300
+ result.description = sub(result.description);
301
+ if (result.verify)
302
+ result.verify = sub(result.verify);
303
+ if (result.menuPath)
304
+ result.menuPath = result.menuPath.map(sub);
305
+ return result;
306
+ }
307
+ /**
308
+ * Verify a step's postcondition via CSS selector check.
309
+ */
310
+ async verifyStep(sessionId, step) {
311
+ if (!step.verify)
312
+ return true;
313
+ const timeout = step.verifyTimeoutMs ?? DEFAULT_VERIFY_TIMEOUT;
314
+ const r = await this.runtime.waitFor({
315
+ sessionId,
316
+ condition: { type: "selector_visible", selector: step.verify },
317
+ timeoutMs: timeout,
318
+ });
319
+ return r.ok && r.data.matched;
320
+ }
321
+ /**
322
+ * Dismiss a popup detected by the observer daemon.
323
+ * Reads observer state, if popup found, sends the appropriate dismiss action.
324
+ * Non-fatal — if observer isn't running or no popup, silently returns.
325
+ */
326
+ async dismissPopupIfPresent(sessionId) {
327
+ let popup;
328
+ try {
329
+ popup = getObserverPopup();
330
+ }
331
+ catch {
332
+ return; // Observer not running or state unreadable
333
+ }
334
+ if (!popup)
335
+ return;
336
+ try {
337
+ switch (popup.dismissAction) {
338
+ case "press_escape":
339
+ await this.runtime.keyCombo({ sessionId, keys: ["escape"] });
340
+ break;
341
+ case "click_ok":
342
+ case "click_cancel":
343
+ case "click_close":
344
+ case "click_allow":
345
+ case "click_deny": {
346
+ // Map action to button text
347
+ const buttonMap = {
348
+ click_ok: "OK",
349
+ click_cancel: "Cancel",
350
+ click_close: "Close",
351
+ click_allow: "Allow",
352
+ click_deny: "Don't Allow",
353
+ };
354
+ const buttonText = buttonMap[popup.dismissAction] ?? "OK";
355
+ // Try to click the button by text
356
+ await this.runtime.press({ sessionId, target: { type: "text", value: buttonText } });
357
+ break;
358
+ }
359
+ case "unknown":
360
+ break; // Don't auto-dismiss unknown popups
361
+ }
362
+ // Wait briefly for popup to close
363
+ await sleep(500);
364
+ }
365
+ catch {
366
+ // Popup dismiss failed — non-fatal, continue with step
367
+ }
368
+ }
369
+ /**
370
+ * Resolve an OCR text target to screen coordinates using observer state.
371
+ * Returns center coordinates of the matched text + offsets, or null if not found.
372
+ */
373
+ resolveOcrTarget(searchText, offsetX, offsetY) {
374
+ let state;
375
+ try {
376
+ state = readObserverState();
377
+ }
378
+ catch {
379
+ return null;
380
+ }
381
+ if (!state?.running || !state.lastFrame?.ocrText)
382
+ return null;
383
+ // Simple text search in OCR output
384
+ // The native OCR (vision.ocr) returns bounding boxes when available.
385
+ // For now we use a fallback: if the observer has the text, we know
386
+ // the element is visible. The caller should provide approximate
387
+ // coordinates via offsetX/offsetY relative to a known anchor.
388
+ const ocrText = state.lastFrame.ocrText;
389
+ if (!ocrText.toLowerCase().includes(searchText.toLowerCase())) {
390
+ return null; // Text not found on screen
391
+ }
392
+ // Text found — return offset coordinates (caller provides absolute offsets
393
+ // or relative to screen center as a basic heuristic)
394
+ if (offsetX !== 0 || offsetY !== 0) {
395
+ return { x: offsetX, y: offsetY };
396
+ }
397
+ // No explicit coordinates — can't determine position from plain OCR text alone
398
+ return null;
399
+ }
400
+ /**
401
+ * Convert playbook target format to runtime Target format.
402
+ */
403
+ resolveTarget(target) {
404
+ if (!target)
405
+ return undefined;
406
+ if (typeof target === "string") {
407
+ // CSS selector if starts with common patterns, else treat as text
408
+ if (target.startsWith("[") || target.startsWith("#") || target.startsWith(".") || target.startsWith("css=")) {
409
+ return { type: "selector", value: target.replace(/^css=/, "") };
410
+ }
411
+ return { type: "text", value: target };
412
+ }
413
+ if ("selector" in target) {
414
+ return { type: "selector", value: target.selector };
415
+ }
416
+ if ("x" in target && "y" in target) {
417
+ return { type: "coordinates", x: target.x, y: target.y };
418
+ }
419
+ return undefined;
420
+ }
421
+ getBrowserSelector(step) {
422
+ if (typeof step.target === "string")
423
+ return step.target;
424
+ if (step.target && "selector" in step.target)
425
+ return step.target.selector;
426
+ if (step.verify)
427
+ return step.verify;
428
+ throw new Error(`${step.action} step missing selector target`);
429
+ }
430
+ async focusBrowserElement(client, selector) {
431
+ const result = await client.Runtime.evaluate({
432
+ expression: `(() => {
433
+ const el = document.querySelector(${JSON.stringify(selector)});
434
+ if (!(el instanceof HTMLElement)) return { ok: false, reason: "Element not found: ${selector.replace(/"/g, '\\"')}" };
435
+ el.scrollIntoView({ block: "center" });
436
+ el.focus();
437
+ return { ok: true };
438
+ })()`,
439
+ returnByValue: true,
440
+ });
441
+ const value = result.result?.value;
442
+ if (!value?.ok) {
443
+ throw new Error(value?.reason || `Element not found: ${selector}`);
444
+ }
445
+ }
446
+ async resolveBrowserClickPoint(client, selector) {
447
+ const result = await client.Runtime.evaluate({
448
+ expression: `(() => {
449
+ const el = document.querySelector(${JSON.stringify(selector)});
450
+ if (!(el instanceof HTMLElement)) return { ok: false, reason: "Element not found: ${selector.replace(/"/g, '\\"')}" };
451
+ el.scrollIntoView({ block: "center" });
452
+ const r = el.getBoundingClientRect();
453
+ return { ok: true, x: r.x + r.width / 2, y: r.y + r.height / 2 };
454
+ })()`,
455
+ returnByValue: true,
456
+ });
457
+ const value = result.result?.value;
458
+ if (!value?.ok) {
459
+ throw new Error(value?.reason || `Element not found: ${selector}`);
460
+ }
461
+ return { x: value.x, y: value.y };
462
+ }
463
+ async dispatchMouseClick(client, x, y) {
464
+ await client.Input.dispatchMouseEvent({ type: "mouseMoved", x, y });
465
+ await sleep(40);
466
+ await client.Input.dispatchMouseEvent({ type: "mousePressed", x, y, button: "left", clickCount: 1 });
467
+ await sleep(40);
468
+ await client.Input.dispatchMouseEvent({ type: "mouseReleased", x, y, button: "left", clickCount: 1 });
469
+ }
470
+ async dispatchSelectAll(client) {
471
+ const metaModifier = process.platform === "darwin" ? 4 : 2;
472
+ await client.Input.dispatchKeyEvent({ type: "keyDown", key: "a", code: "KeyA", modifiers: metaModifier });
473
+ await client.Input.dispatchKeyEvent({ type: "keyUp", key: "a", code: "KeyA", modifiers: metaModifier });
474
+ }
475
+ async dispatchKey(client, key, code) {
476
+ await client.Input.dispatchKeyEvent({ type: "keyDown", key, code });
477
+ await client.Input.dispatchKeyEvent({ type: "keyUp", key, code });
478
+ }
479
+ async dispatchTextChar(client, char) {
480
+ await client.Input.dispatchKeyEvent({ type: "keyDown", text: char, key: char, unmodifiedText: char });
481
+ await client.Input.dispatchKeyEvent({ type: "keyUp", text: char, key: char, unmodifiedText: char });
482
+ }
483
+ }
484
+ function sleep(ms) {
485
+ return new Promise((resolve) => setTimeout(resolve, ms));
486
+ }
@@ -0,0 +1,20 @@
1
+ // Copyright (C) 2025 Clazro Technology Private Limited
2
+ // SPDX-License-Identifier: AGPL-3.0-only
3
+ //
4
+ // This file is part of ScreenHand.
5
+ //
6
+ // ScreenHand is free software: you can redistribute it and/or modify
7
+ // it under the terms of the GNU Affero General Public License as
8
+ // published by the Free Software Foundation, version 3.
9
+ //
10
+ // ScreenHand is distributed in the hope that it will be useful,
11
+ // but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ // GNU Affero General Public License for more details.
14
+ //
15
+ // You should have received a copy of the GNU Affero General Public License
16
+ // along with ScreenHand. If not, see <https://www.gnu.org/licenses/>.
17
+ export { PlaybookEngine } from "./engine.js";
18
+ export { PlaybookStore } from "./store.js";
19
+ export { PlaybookRunner } from "./runner.js";
20
+ export { PlaybookRecorder } from "./recorder.js";