screenhand 0.2.0 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (212) hide show
  1. package/README.md +165 -446
  2. package/bin/darwin-arm64/macos-bridge +0 -0
  3. package/dist/mcp-desktop.js +3615 -400
  4. package/dist/scripts/export-help-center.js +112 -0
  5. package/dist/scripts/marketing-loop.js +117 -0
  6. package/dist/scripts/observer-daemon.js +288 -0
  7. package/dist/scripts/orchestrator-daemon.js +399 -0
  8. package/dist/scripts/threads-campaign.js +208 -0
  9. package/dist/src/community/fetcher.js +109 -0
  10. package/dist/src/community/index.js +6 -0
  11. package/dist/src/community/publisher.js +191 -0
  12. package/dist/src/community/remote-api.js +121 -0
  13. package/dist/src/community/types.js +3 -0
  14. package/dist/src/community/validator.js +95 -0
  15. package/dist/src/context-tracker.js +489 -0
  16. package/dist/src/ingestion/coverage-auditor.js +233 -0
  17. package/dist/src/ingestion/doc-parser.js +164 -0
  18. package/dist/src/ingestion/index.js +8 -0
  19. package/dist/src/ingestion/menu-scanner.js +152 -0
  20. package/dist/src/ingestion/reference-merger.js +186 -0
  21. package/dist/src/ingestion/shortcut-extractor.js +180 -0
  22. package/dist/src/ingestion/tutorial-extractor.js +170 -0
  23. package/dist/src/ingestion/types.js +3 -0
  24. package/dist/src/jobs/manager.js +82 -14
  25. package/dist/src/jobs/runner.js +138 -15
  26. package/dist/src/learning/engine.js +356 -0
  27. package/dist/src/learning/index.js +9 -0
  28. package/dist/src/learning/locator-policy.js +120 -0
  29. package/dist/src/learning/pattern-policy.js +89 -0
  30. package/dist/src/learning/recovery-policy.js +116 -0
  31. package/dist/src/learning/sensor-policy.js +115 -0
  32. package/dist/src/learning/timing-model.js +204 -0
  33. package/dist/src/learning/topology-policy.js +90 -0
  34. package/dist/src/learning/types.js +9 -0
  35. package/dist/src/logging/timeline-logger.js +4 -1
  36. package/dist/src/memory/playbook-seeds.js +200 -0
  37. package/dist/src/memory/recall.js +60 -8
  38. package/dist/src/memory/service.js +30 -5
  39. package/dist/src/memory/store.js +34 -5
  40. package/dist/src/native/bridge-client.js +253 -31
  41. package/dist/src/observer/state.js +199 -0
  42. package/dist/src/observer/types.js +43 -0
  43. package/dist/src/orchestrator/state.js +68 -0
  44. package/dist/src/orchestrator/types.js +22 -0
  45. package/dist/src/perception/ax-source.js +162 -0
  46. package/dist/src/perception/cdp-source.js +162 -0
  47. package/dist/src/perception/coordinator.js +771 -0
  48. package/dist/src/perception/frame-differ.js +287 -0
  49. package/dist/src/perception/index.js +22 -0
  50. package/dist/src/perception/manager.js +199 -0
  51. package/dist/src/perception/types.js +47 -0
  52. package/dist/src/perception/vision-source.js +399 -0
  53. package/dist/src/planner/deterministic.js +298 -0
  54. package/dist/src/planner/executor.js +870 -0
  55. package/dist/src/planner/goal-store.js +92 -0
  56. package/dist/src/planner/index.js +21 -0
  57. package/dist/src/planner/planner.js +520 -0
  58. package/dist/src/planner/tool-registry.js +71 -0
  59. package/dist/src/planner/types.js +22 -0
  60. package/dist/src/platform/explorer.js +213 -0
  61. package/dist/src/platform/help-center-markdown.js +527 -0
  62. package/dist/src/platform/learner.js +257 -0
  63. package/dist/src/playbook/engine.js +296 -11
  64. package/dist/src/playbook/mcp-recorder.js +204 -0
  65. package/dist/src/playbook/recorder.js +3 -2
  66. package/dist/src/playbook/runner.js +1 -1
  67. package/dist/src/playbook/store.js +139 -10
  68. package/dist/src/recovery/detectors.js +156 -0
  69. package/dist/src/recovery/engine.js +327 -0
  70. package/dist/src/recovery/index.js +20 -0
  71. package/dist/src/recovery/strategies.js +274 -0
  72. package/dist/src/recovery/types.js +20 -0
  73. package/dist/src/runtime/accessibility-adapter.js +55 -18
  74. package/dist/src/runtime/applescript-adapter.js +8 -2
  75. package/dist/src/runtime/cdp-chrome-adapter.js +1 -1
  76. package/dist/src/runtime/executor.js +23 -3
  77. package/dist/src/runtime/locator-cache.js +24 -2
  78. package/dist/src/runtime/service.js +59 -15
  79. package/dist/src/runtime/session-manager.js +4 -1
  80. package/dist/src/runtime/vision-adapter.js +2 -1
  81. package/dist/src/state/app-map-types.js +72 -0
  82. package/dist/src/state/app-map.js +1974 -0
  83. package/dist/src/state/entity-tracker.js +108 -0
  84. package/dist/src/state/fusion.js +96 -0
  85. package/dist/src/state/index.js +21 -0
  86. package/dist/src/state/ladder-generator.js +236 -0
  87. package/dist/src/state/persistence.js +156 -0
  88. package/dist/src/state/types.js +17 -0
  89. package/dist/src/state/world-model.js +1456 -0
  90. package/dist/src/util/atomic-write.js +19 -4
  91. package/dist/src/util/sanitize.js +146 -0
  92. package/dist-app-maps/com.figma.Desktop.json +959 -0
  93. package/dist-app-maps/com.hnc.Discord.json +1146 -0
  94. package/dist-app-maps/notion.id.json +2831 -0
  95. package/dist-playbooks/canva-screenhand-carousel.json +445 -0
  96. package/dist-playbooks/codex-desktop.json +76 -0
  97. package/dist-playbooks/competitor-research-stack.json +122 -0
  98. package/dist-playbooks/davinci-color-grade.json +153 -0
  99. package/dist-playbooks/davinci-edit-timeline.json +162 -0
  100. package/dist-playbooks/davinci-render.json +114 -0
  101. package/dist-playbooks/devto.json +52 -0
  102. package/dist-playbooks/discord.json +41 -0
  103. package/dist-playbooks/google-flow-create-project.json +59 -0
  104. package/dist-playbooks/google-flow-edit-image.json +90 -0
  105. package/dist-playbooks/google-flow-edit-video.json +90 -0
  106. package/dist-playbooks/google-flow-generate-image.json +68 -0
  107. package/dist-playbooks/google-flow-generate-video.json +191 -0
  108. package/dist-playbooks/google-flow-open-project.json +48 -0
  109. package/dist-playbooks/google-flow-open-scenebuilder.json +64 -0
  110. package/dist-playbooks/google-flow-search-assets.json +64 -0
  111. package/dist-playbooks/instagram.json +57 -0
  112. package/dist-playbooks/linkedin.json +52 -0
  113. package/dist-playbooks/n8n.json +43 -0
  114. package/dist-playbooks/reddit.json +52 -0
  115. package/dist-playbooks/threads.json +59 -0
  116. package/dist-playbooks/x-twitter.json +59 -0
  117. package/dist-playbooks/youtube.json +59 -0
  118. package/dist-references/canva.json +646 -0
  119. package/dist-references/codex-desktop.json +305 -0
  120. package/dist-references/davinci-resolve-keyboard.json +594 -0
  121. package/dist-references/davinci-resolve-menu-map.json +1139 -0
  122. package/dist-references/davinci-resolve-menus-batch1.json +116 -0
  123. package/dist-references/davinci-resolve-menus-batch2.json +372 -0
  124. package/dist-references/davinci-resolve-menus-batch3.json +330 -0
  125. package/dist-references/davinci-resolve-menus-batch4.json +297 -0
  126. package/dist-references/davinci-resolve-shortcuts.json +333 -0
  127. package/dist-references/devpost.json +186 -0
  128. package/dist-references/devto.json +317 -0
  129. package/dist-references/discord.json +549 -0
  130. package/dist-references/figma.json +1186 -0
  131. package/dist-references/finder.json +146 -0
  132. package/dist-references/google-ads-transparency.json +95 -0
  133. package/dist-references/google-flow.json +649 -0
  134. package/dist-references/instagram.json +341 -0
  135. package/dist-references/linkedin.json +324 -0
  136. package/dist-references/meta-ad-library.json +86 -0
  137. package/dist-references/n8n.json +387 -0
  138. package/dist-references/notes.json +27 -0
  139. package/dist-references/notion.json +163 -0
  140. package/dist-references/reddit.json +341 -0
  141. package/dist-references/threads.json +337 -0
  142. package/dist-references/x-twitter.json +403 -0
  143. package/dist-references/youtube.json +373 -0
  144. package/native/macos-bridge/Package.swift +22 -0
  145. package/native/macos-bridge/Sources/AccessibilityBridge.swift +482 -0
  146. package/native/macos-bridge/Sources/AppManagement.swift +339 -0
  147. package/native/macos-bridge/Sources/CoreGraphicsBridge.swift +537 -0
  148. package/native/macos-bridge/Sources/ObserverBridge.swift +120 -0
  149. package/native/macos-bridge/Sources/StreamCapture.swift +136 -0
  150. package/native/macos-bridge/Sources/VisionBridge.swift +238 -0
  151. package/native/macos-bridge/Sources/main.swift +498 -0
  152. package/native/windows-bridge/AppManagement.cs +234 -0
  153. package/native/windows-bridge/InputBridge.cs +436 -0
  154. package/native/windows-bridge/Program.cs +270 -0
  155. package/native/windows-bridge/ScreenCapture.cs +453 -0
  156. package/native/windows-bridge/UIAutomationBridge.cs +571 -0
  157. package/native/windows-bridge/WindowsBridge.csproj +17 -0
  158. package/package.json +12 -1
  159. package/scripts/postinstall.cjs +127 -0
  160. package/dist/.audit-log.jsonl +0 -55
  161. package/dist/.screenhand/memory/.lock +0 -1
  162. package/dist/.screenhand/memory/actions.jsonl +0 -85
  163. package/dist/.screenhand/memory/errors.jsonl +0 -5
  164. package/dist/.screenhand/memory/errors.jsonl.bak +0 -4
  165. package/dist/.screenhand/memory/state.json +0 -35
  166. package/dist/.screenhand/memory/state.json.bak +0 -35
  167. package/dist/.screenhand/memory/strategies.jsonl +0 -12
  168. package/dist/agent/cli.js +0 -73
  169. package/dist/agent/loop.js +0 -258
  170. package/dist/config.js +0 -9
  171. package/dist/index.js +0 -56
  172. package/dist/logging/timeline-logger.js +0 -29
  173. package/dist/mcp/mcp-stdio-server.js +0 -448
  174. package/dist/mcp/server.js +0 -347
  175. package/dist/mcp-entry.js +0 -59
  176. package/dist/memory/recall.js +0 -160
  177. package/dist/memory/research.js +0 -98
  178. package/dist/memory/seeds.js +0 -89
  179. package/dist/memory/session.js +0 -161
  180. package/dist/memory/store.js +0 -391
  181. package/dist/memory/types.js +0 -4
  182. package/dist/monitor/codex-monitor.js +0 -377
  183. package/dist/monitor/task-queue.js +0 -84
  184. package/dist/monitor/types.js +0 -49
  185. package/dist/native/bridge-client.js +0 -174
  186. package/dist/native/macos-bridge-client.js +0 -5
  187. package/dist/npm-publish-helper.js +0 -117
  188. package/dist/npm-token-cdp.js +0 -113
  189. package/dist/npm-token-create.js +0 -135
  190. package/dist/npm-token-finish.js +0 -126
  191. package/dist/playbook/engine.js +0 -193
  192. package/dist/playbook/index.js +0 -4
  193. package/dist/playbook/recorder.js +0 -519
  194. package/dist/playbook/runner.js +0 -392
  195. package/dist/playbook/store.js +0 -166
  196. package/dist/playbook/types.js +0 -4
  197. package/dist/runtime/accessibility-adapter.js +0 -377
  198. package/dist/runtime/app-adapter.js +0 -48
  199. package/dist/runtime/applescript-adapter.js +0 -283
  200. package/dist/runtime/ax-role-map.js +0 -80
  201. package/dist/runtime/browser-adapter.js +0 -36
  202. package/dist/runtime/cdp-chrome-adapter.js +0 -505
  203. package/dist/runtime/composite-adapter.js +0 -205
  204. package/dist/runtime/executor.js +0 -250
  205. package/dist/runtime/locator-cache.js +0 -12
  206. package/dist/runtime/planning-loop.js +0 -47
  207. package/dist/runtime/service.js +0 -372
  208. package/dist/runtime/session-manager.js +0 -28
  209. package/dist/runtime/state-observer.js +0 -105
  210. package/dist/runtime/vision-adapter.js +0 -208
  211. package/dist/test-mcp-protocol.js +0 -138
  212. package/dist/types.js +0 -1
@@ -0,0 +1,257 @@
1
+ // Copyright (C) 2025 Clazro Technology Private Limited
2
+ // SPDX-License-Identifier: AGPL-3.0-only
3
+ /**
4
+ * PlatformLearner — scrape official docs, help center, shortcuts for a platform.
5
+ *
6
+ * Crawls documentation pages via CDP, extracts structured data,
7
+ * and saves as a reference JSON.
8
+ */
9
+ import fs from "node:fs";
10
+ import path from "node:path";
11
+ import { writeFileAtomicSync } from "../util/atomic-write.js";
12
+ /** Common URL patterns for platform documentation */
13
+ export function buildDocUrls(platform, rootUrl) {
14
+ const base = rootUrl ?? `https://${platform}.com`;
15
+ const origin = base.replace(/\/$/, "");
16
+ return [
17
+ origin,
18
+ `${origin}/help`,
19
+ `${origin}/support`,
20
+ `${origin}/docs`,
21
+ `${origin}/keyboard-shortcuts`,
22
+ `${origin}/shortcuts`,
23
+ `https://help.${platform}.com`,
24
+ `https://support.${platform}.com`,
25
+ `https://docs.${platform}.com`,
26
+ `${origin}/developers`,
27
+ `${origin}/api`,
28
+ `${origin}/changelog`,
29
+ `${origin}/whats-new`,
30
+ ];
31
+ }
32
+ /** Extract keyboard shortcuts from a page */
33
+ export async function extractShortcuts(cdpEvaluate) {
34
+ const result = await cdpEvaluate(`(() => {
35
+ const shortcuts = {};
36
+ // Look for common shortcut table patterns
37
+ const tables = document.querySelectorAll('table');
38
+ for (const table of tables) {
39
+ const rows = table.querySelectorAll('tr');
40
+ for (const row of rows) {
41
+ const cells = row.querySelectorAll('td, th');
42
+ if (cells.length >= 2) {
43
+ const text0 = (cells[0].textContent || '').trim();
44
+ const text1 = (cells[1].textContent || '').trim();
45
+ // Check if either cell contains key combos
46
+ if (text0.match(/[⌘⌥⇧⌃]|ctrl|cmd|alt|shift/i) || text1.match(/[⌘⌥⇧⌃]|ctrl|cmd|alt|shift/i)) {
47
+ shortcuts[text0] = text1;
48
+ }
49
+ }
50
+ }
51
+ }
52
+ // Also check kbd elements
53
+ const kbds = document.querySelectorAll('kbd');
54
+ for (const kbd of kbds) {
55
+ const parent = kbd.closest('li, tr, p, div');
56
+ if (parent) {
57
+ const keyText = kbd.textContent.trim();
58
+ const descText = parent.textContent.replace(keyText, '').trim().substring(0, 80);
59
+ if (keyText && descText) shortcuts[keyText] = descText;
60
+ }
61
+ }
62
+ return shortcuts;
63
+ })()`);
64
+ return result.result?.value ?? {};
65
+ }
66
+ /** Extract page content as structured text */
67
+ export async function extractPageContent(cdpEvaluate) {
68
+ const result = await cdpEvaluate(`(() => {
69
+ const headings = Array.from(document.querySelectorAll('h1, h2, h3')).map(h => h.textContent.trim()).filter(Boolean);
70
+ const links = Array.from(document.querySelectorAll('a[href]')).slice(0, 100).map(a => ({
71
+ text: (a.textContent || '').trim().substring(0, 80),
72
+ href: a.href,
73
+ })).filter(l => l.text && l.href);
74
+ return {
75
+ title: document.title,
76
+ headings,
77
+ links,
78
+ text: document.body.innerText.substring(0, 8000),
79
+ };
80
+ })()`);
81
+ return result.result?.value ?? { title: "", headings: [], links: [], text: "" };
82
+ }
83
+ /** Extract interactive element selectors from a page */
84
+ export async function extractSelectors(cdpEvaluate) {
85
+ const result = await cdpEvaluate(`(() => {
86
+ const selectors = {};
87
+ const elements = document.querySelectorAll('[data-testid], [aria-label], [role="button"], [role="tab"], [role="menuitem"]');
88
+ for (const el of Array.from(elements).slice(0, 50)) {
89
+ const testId = el.getAttribute('data-testid');
90
+ const label = el.getAttribute('aria-label');
91
+ const key = testId || label || el.textContent?.trim().substring(0, 30) || '';
92
+ if (!key) continue;
93
+
94
+ let selector = '';
95
+ if (testId) selector = '[data-testid="' + testId + '"]';
96
+ else if (el.id) selector = '#' + el.id;
97
+ else if (label) selector = '[aria-label="' + label + '"]';
98
+
99
+ if (selector) selectors[key] = selector;
100
+ }
101
+ return selectors;
102
+ })()`);
103
+ return result.result?.value ?? {};
104
+ }
105
+ /** Crawl a page via CDP: navigate, wait, extract */
106
+ export async function crawlPage(cdpClient, url, timeoutMs = 10000) {
107
+ try {
108
+ // Navigate
109
+ await cdpClient.Page.navigate({ url });
110
+ // Wait for load
111
+ await new Promise((resolve) => {
112
+ const timer = setTimeout(resolve, timeoutMs);
113
+ cdpClient.Page.loadEventFired().then(() => { clearTimeout(timer); resolve(); }).catch(() => { clearTimeout(timer); resolve(); });
114
+ });
115
+ // Extra wait for SPA content
116
+ await new Promise(r => setTimeout(r, 2000));
117
+ const evaluate = async (expr) => {
118
+ return cdpClient.Runtime.evaluate({ expression: expr, returnByValue: true, awaitPromise: true });
119
+ };
120
+ const content = await extractPageContent(evaluate);
121
+ const shortcuts = await extractShortcuts(evaluate);
122
+ const selectors = await extractSelectors(evaluate);
123
+ return { success: true, content, shortcuts, selectors };
124
+ }
125
+ catch (err) {
126
+ return { success: false, error: err instanceof Error ? err.message : String(err) };
127
+ }
128
+ }
129
+ /** Compile crawl results into a learn result */
130
+ export function compileLearnResult(platform, crawledPages) {
131
+ const allShortcuts = {};
132
+ const allSelectors = {};
133
+ const features = [];
134
+ const tips = [];
135
+ const sourceUrls = [];
136
+ const flows = {};
137
+ const apiEndpoints = [];
138
+ const knownLimitations = [];
139
+ for (const page of crawledPages) {
140
+ sourceUrls.push(page.url);
141
+ if (page.shortcuts) {
142
+ Object.assign(allShortcuts, page.shortcuts);
143
+ }
144
+ if (page.selectors && Object.keys(page.selectors).length > 0) {
145
+ const pageName = page.content?.title?.replace(/[^a-zA-Z0-9]/g, "_").substring(0, 30) ?? "page";
146
+ allSelectors[pageName] = page.selectors;
147
+ }
148
+ if (page.content) {
149
+ // Extract features from headings
150
+ for (const h of page.content.headings) {
151
+ if (h.length > 3 && h.length < 80)
152
+ features.push(h);
153
+ }
154
+ // Look for API-related links
155
+ for (const link of page.content.links) {
156
+ if (/api|developer|endpoint|sdk|integration/i.test(link.text)) {
157
+ apiEndpoints.push(`${link.text}: ${link.href}`);
158
+ }
159
+ }
160
+ // Extract flows from numbered step sequences (e.g. "1. Click..." "2. Enter..." "3. Submit...")
161
+ const contentLines = page.content.text.split("\n");
162
+ let currentFlow = null;
163
+ for (let i = 0; i < contentLines.length; i++) {
164
+ const line = contentLines[i].trim();
165
+ const stepMatch = line.match(/^(\d+)[.)]\s+(.+)/);
166
+ if (stepMatch) {
167
+ const stepNum = parseInt(stepMatch[1], 10);
168
+ const stepText = stepMatch[2].trim();
169
+ if (stepNum === 1 && stepText.length > 5) {
170
+ // Start a new flow — use the preceding heading as the name
171
+ const heading = i > 0 ? contentLines.slice(Math.max(0, i - 3), i).find(l => l.trim().length > 3 && !l.trim().match(/^\d/)) : null;
172
+ const flowName = (heading?.trim() ?? `flow_${Object.keys(flows).length + 1}`).replace(/[^a-zA-Z0-9_ ]/g, "").substring(0, 50).trim();
173
+ currentFlow = { name: flowName, steps: [stepText] };
174
+ }
175
+ else if (currentFlow && stepNum > 1) {
176
+ currentFlow.steps.push(stepText);
177
+ }
178
+ }
179
+ else if (currentFlow && currentFlow.steps.length >= 2) {
180
+ // End of step sequence — save the flow
181
+ const key = currentFlow.name.toLowerCase().replace(/\s+/g, "_");
182
+ if (!flows[key]) {
183
+ flows[key] = { description: currentFlow.name, steps: currentFlow.steps };
184
+ }
185
+ currentFlow = null;
186
+ }
187
+ else if (line.length > 0 && !line.match(/^\d/)) {
188
+ currentFlow = null;
189
+ }
190
+ }
191
+ // Save any trailing flow
192
+ if (currentFlow && currentFlow.steps.length >= 2) {
193
+ const key = currentFlow.name.toLowerCase().replace(/\s+/g, "_");
194
+ if (!flows[key]) {
195
+ flows[key] = { description: currentFlow.name, steps: currentFlow.steps };
196
+ }
197
+ }
198
+ // Look for limitation/known-issue mentions
199
+ const text = page.content.text.toLowerCase();
200
+ if (text.includes("limitation") || text.includes("known issue") || text.includes("not supported")) {
201
+ const lines = page.content.text.split("\n");
202
+ for (const line of lines) {
203
+ if (/limitation|known issue|not supported|doesn't support|won't work/i.test(line)) {
204
+ knownLimitations.push(line.trim().substring(0, 200));
205
+ }
206
+ }
207
+ }
208
+ }
209
+ }
210
+ return {
211
+ platform,
212
+ learnedAt: new Date().toISOString(),
213
+ sourceUrls,
214
+ shortcuts: allShortcuts,
215
+ features: [...new Set(features)].slice(0, 50),
216
+ selectors: allSelectors,
217
+ flows,
218
+ apiEndpoints: [...new Set(apiEndpoints)].slice(0, 20),
219
+ knownLimitations: [...new Set(knownLimitations)].slice(0, 20),
220
+ tips,
221
+ };
222
+ }
223
+ /** Save learn result as a reference JSON */
224
+ export function saveLearnResult(referencesDir, result) {
225
+ if (!fs.existsSync(referencesDir)) {
226
+ fs.mkdirSync(referencesDir, { recursive: true });
227
+ }
228
+ const filePath = path.join(referencesDir, `${result.platform}-learned.json`);
229
+ const reference = {
230
+ id: `${result.platform}-learned`,
231
+ name: `${result.platform} — Auto-Learned from Docs`,
232
+ description: `Scraped ${result.sourceUrls.length} documentation pages. Found ${Object.keys(result.shortcuts).length} shortcuts, ${result.features.length} features.`,
233
+ platform: result.platform,
234
+ bundleId: result.bundleId ?? null,
235
+ version: "1.0.0",
236
+ tags: [result.platform, "auto-learned"],
237
+ successCount: 0,
238
+ failCount: 0,
239
+ urls: Object.fromEntries(result.sourceUrls.map((u, i) => [`doc_${i}`, u])),
240
+ selectors: result.selectors,
241
+ shortcuts: result.shortcuts,
242
+ flows: result.flows,
243
+ detection: {},
244
+ errors: [],
245
+ policyNotes: {},
246
+ _meta: {
247
+ learnedAt: result.learnedAt,
248
+ sourceUrls: result.sourceUrls,
249
+ features: result.features,
250
+ apiEndpoints: result.apiEndpoints,
251
+ knownLimitations: result.knownLimitations,
252
+ tips: result.tips,
253
+ },
254
+ };
255
+ writeFileAtomicSync(filePath, JSON.stringify(reference, null, 2));
256
+ return filePath;
257
+ }
@@ -14,13 +14,25 @@
14
14
  //
15
15
  // You should have received a copy of the GNU Affero General Public License
16
16
  // along with ScreenHand. If not, see <https://www.gnu.org/licenses/>.
17
+ import { readObserverState, getObserverPopup } from "../observer/state.js";
17
18
  const DEFAULT_VERIFY_TIMEOUT = 5000;
18
19
  const STEP_DELAY_MS = 300;
19
20
  export class PlaybookEngine {
20
21
  runtime;
22
+ cdpConnect;
23
+ /** Enable observer-based popup checks before each step */
24
+ popupCheckEnabled = false;
21
25
  constructor(runtime) {
22
26
  this.runtime = runtime;
23
27
  }
28
+ /** Enable/disable pre-step popup detection via observer daemon */
29
+ setPopupCheck(enabled) {
30
+ this.popupCheckEnabled = enabled;
31
+ }
32
+ /** Set CDP connection factory for browser_js and cdp_key_event actions. Factory accepts optional port override. */
33
+ setCDPConnect(factory) {
34
+ this.cdpConnect = factory;
35
+ }
24
36
  /**
25
37
  * Execute a playbook against a live session.
26
38
  * Returns result with success/failure and which step broke.
@@ -29,9 +41,20 @@ export class PlaybookEngine {
29
41
  const start = Date.now();
30
42
  let stepsCompleted = 0;
31
43
  for (let i = 0; i < playbook.steps.length; i++) {
32
- const step = playbook.steps[i];
44
+ let step = options.vars ? this.substituteVars(playbook.steps[i], options.vars) : playbook.steps[i];
33
45
  try {
34
- const result = await this.executeStep(sessionId, step);
46
+ // Pre-step: check for popups via observer (if enabled, non-blocking)
47
+ if (this.popupCheckEnabled) {
48
+ await this.dismissPopupIfPresent(sessionId);
49
+ }
50
+ // OCR-based locate: resolve locateByOcr to coordinates before execution
51
+ if (step.locateByOcr) {
52
+ const coords = this.resolveOcrTarget(step.locateByOcr, step.offsetX ?? 0, step.offsetY ?? 0);
53
+ if (coords) {
54
+ step = { ...step, target: { x: coords.x, y: coords.y } };
55
+ }
56
+ }
57
+ const result = await this.executeStep(sessionId, step, playbook.cdpPort);
35
58
  stepsCompleted++;
36
59
  if (options.onStep) {
37
60
  options.onStep(i, step, result);
@@ -85,7 +108,7 @@ export class PlaybookEngine {
85
108
  /**
86
109
  * Execute a single playbook step.
87
110
  */
88
- async executeStep(sessionId, step) {
111
+ async executeStep(sessionId, step, cdpPort) {
89
112
  const target = this.resolveTarget(step.target);
90
113
  switch (step.action) {
91
114
  case "navigate": {
@@ -105,14 +128,21 @@ export class PlaybookEngine {
105
128
  return `Pressed ${JSON.stringify(step.target)}`;
106
129
  }
107
130
  case "type_into": {
108
- if (!target)
109
- throw new Error("type_into step missing target");
110
131
  if (!step.text)
111
132
  throw new Error("type_into step missing text");
112
- const r = await this.runtime.typeInto({ sessionId, target, text: step.text });
113
- if (!r.ok)
114
- throw new Error(r.error.message);
115
- return `Typed "${step.text}" into ${JSON.stringify(step.target)}`;
133
+ if (target) {
134
+ const r = await this.runtime.typeInto({ sessionId, target, text: step.text });
135
+ if (!r.ok)
136
+ throw new Error(r.error.message);
137
+ return `Typed "${step.text}" into ${JSON.stringify(step.target)}`;
138
+ }
139
+ // No target — type into focused element character by character via key events
140
+ for (const char of step.text) {
141
+ const r = await this.runtime.keyCombo({ sessionId, keys: [char] });
142
+ if (!r.ok)
143
+ throw new Error(r.error?.message ?? "key event failed");
144
+ }
145
+ return `Typed "${step.text}" into focused element`;
116
146
  }
117
147
  case "extract": {
118
148
  if (!target)
@@ -126,13 +156,22 @@ export class PlaybookEngine {
126
156
  throw new Error(r.error.message);
127
157
  return `Extracted: ${JSON.stringify(r.data).slice(0, 200)}`;
128
158
  }
159
+ case "key":
129
160
  case "key_combo": {
130
161
  if (!step.keys || step.keys.length === 0)
131
- throw new Error("key_combo step missing keys");
162
+ throw new Error(`${step.action} step missing keys`);
132
163
  const r = await this.runtime.keyCombo({ sessionId, keys: step.keys });
133
164
  if (!r.ok)
134
165
  throw new Error(r.error.message);
135
- return `Key combo: ${step.keys.join("+")}`;
166
+ return `${step.action === "key" ? "Key" : "Key combo"}: ${step.keys.join("+")}`;
167
+ }
168
+ case "menu_click": {
169
+ if (!step.menuPath || step.menuPath.length === 0)
170
+ throw new Error("menu_click step missing menuPath");
171
+ const r = await this.runtime.menuClick({ sessionId, menuPath: step.menuPath });
172
+ if (!r.ok)
173
+ throw new Error(r.error.message);
174
+ return `Menu click: ${step.menuPath.join(" > ")}`;
136
175
  }
137
176
  case "scroll": {
138
177
  const input = {
@@ -156,10 +195,115 @@ export class PlaybookEngine {
156
195
  throw new Error(r.error.message);
157
196
  return `Screenshot taken`;
158
197
  }
198
+ case "browser_js": {
199
+ if (!step.code)
200
+ throw new Error("browser_js step missing code");
201
+ if (!this.cdpConnect)
202
+ throw new Error("browser_js requires CDP — call setCDPConnect() first");
203
+ const client = await this.cdpConnect(cdpPort);
204
+ try {
205
+ const result = await client.Runtime.evaluate({
206
+ expression: step.code,
207
+ awaitPromise: true,
208
+ returnByValue: true,
209
+ });
210
+ if (result.exceptionDetails) {
211
+ throw new Error(`JS Error: ${result.exceptionDetails.text ?? result.exceptionDetails.exception?.description ?? "unknown"}`);
212
+ }
213
+ const val = result.result?.value;
214
+ return `browser_js: ${typeof val === "object" ? JSON.stringify(val) : String(val ?? "undefined")}`;
215
+ }
216
+ finally {
217
+ await client.close();
218
+ }
219
+ }
220
+ case "browser_click":
221
+ case "browser_human_click": {
222
+ const selector = this.getBrowserSelector(step);
223
+ if (!this.cdpConnect)
224
+ throw new Error(`${step.action} requires CDP — call setCDPConnect() first`);
225
+ const client = await this.cdpConnect(cdpPort);
226
+ try {
227
+ const point = await this.resolveBrowserClickPoint(client, selector);
228
+ await this.dispatchMouseClick(client, point.x, point.y);
229
+ return `${step.action}: clicked ${selector}`;
230
+ }
231
+ finally {
232
+ await client.close();
233
+ }
234
+ }
235
+ case "browser_type": {
236
+ const selector = this.getBrowserSelector(step);
237
+ if (!step.text)
238
+ throw new Error("browser_type step missing text");
239
+ if (!this.cdpConnect)
240
+ throw new Error("browser_type requires CDP — call setCDPConnect() first");
241
+ const client = await this.cdpConnect(cdpPort);
242
+ try {
243
+ await this.focusBrowserElement(client, selector);
244
+ const shouldClear = step.text !== undefined;
245
+ if (shouldClear) {
246
+ await this.dispatchSelectAll(client);
247
+ await this.dispatchKey(client, "Backspace", "Backspace");
248
+ await sleep(50);
249
+ }
250
+ for (const char of step.text) {
251
+ await this.dispatchTextChar(client, char);
252
+ await sleep(50);
253
+ }
254
+ return `browser_type: typed ${step.text.length} chars into ${selector}`;
255
+ }
256
+ finally {
257
+ await client.close();
258
+ }
259
+ }
260
+ case "cdp_key_event": {
261
+ if (!step.keyEvent)
262
+ throw new Error("cdp_key_event step missing keyEvent");
263
+ if (!this.cdpConnect)
264
+ throw new Error("cdp_key_event requires CDP — call setCDPConnect() first");
265
+ const client = await this.cdpConnect(cdpPort);
266
+ try {
267
+ const { key, code, modifiers, windowsVirtualKeyCode } = step.keyEvent;
268
+ const baseParams = { key, code, modifiers: modifiers ?? 0, windowsVirtualKeyCode: windowsVirtualKeyCode ?? 0, nativeVirtualKeyCode: windowsVirtualKeyCode ?? 0 };
269
+ await client.Input.dispatchKeyEvent({ type: "keyDown", ...baseParams });
270
+ await client.Input.dispatchKeyEvent({ type: "keyUp", ...baseParams });
271
+ return `cdp_key_event: ${modifiers ? `mod${modifiers}+` : ""}${key}`;
272
+ }
273
+ finally {
274
+ await client.close();
275
+ }
276
+ }
159
277
  default:
160
278
  throw new Error(`Unknown action: ${step.action}`);
161
279
  }
162
280
  }
281
+ /**
282
+ * Substitute {VAR_NAME} placeholders in step string fields with actual values.
283
+ */
284
+ substituteVars(step, vars) {
285
+ const sub = (s) => {
286
+ let result = s;
287
+ for (const [key, val] of Object.entries(vars)) {
288
+ result = result.replaceAll(`{${key}}`, val);
289
+ }
290
+ return result;
291
+ };
292
+ const result = { ...step };
293
+ if (result.code)
294
+ result.code = sub(result.code);
295
+ if (result.text)
296
+ result.text = sub(result.text);
297
+ if (result.url)
298
+ result.url = sub(result.url);
299
+ if (result.description)
300
+ result.description = sub(result.description);
301
+ if (result.verify)
302
+ result.verify = sub(result.verify);
303
+ if (result.menuPath)
304
+ result.menuPath = result.menuPath.map(sub);
305
+ return result;
306
+ }
163
307
  /**
164
308
  * Verify a step's postcondition via CSS selector check.
165
309
  */
@@ -174,6 +318,85 @@ export class PlaybookEngine {
174
318
  });
175
319
  return r.ok && r.data.matched;
176
320
  }
321
+ /**
322
+ * Dismiss a popup detected by the observer daemon.
323
+ * Reads observer state, if popup found, sends the appropriate dismiss action.
324
+ * Non-fatal — if observer isn't running or no popup, silently returns.
325
+ */
326
+ async dismissPopupIfPresent(sessionId) {
327
+ let popup;
328
+ try {
329
+ popup = getObserverPopup();
330
+ }
331
+ catch {
332
+ return; // Observer not running or state unreadable
333
+ }
334
+ if (!popup)
335
+ return;
336
+ try {
337
+ switch (popup.dismissAction) {
338
+ case "press_escape":
339
+ await this.runtime.keyCombo({ sessionId, keys: ["escape"] });
340
+ break;
341
+ case "click_ok":
342
+ case "click_cancel":
343
+ case "click_close":
344
+ case "click_allow":
345
+ case "click_deny": {
346
+ // Map action to button text
347
+ const buttonMap = {
348
+ click_ok: "OK",
349
+ click_cancel: "Cancel",
350
+ click_close: "Close",
351
+ click_allow: "Allow",
352
+ click_deny: "Don't Allow",
353
+ };
354
+ const buttonText = buttonMap[popup.dismissAction] ?? "OK";
355
+ // Try to click the button by text
356
+ await this.runtime.press({ sessionId, target: { type: "text", value: buttonText } });
357
+ break;
358
+ }
359
+ case "unknown":
360
+ break; // Don't auto-dismiss unknown popups
361
+ }
362
+ // Wait briefly for popup to close
363
+ await sleep(500);
364
+ }
365
+ catch {
366
+ // Popup dismiss failed — non-fatal, continue with step
367
+ }
368
+ }
369
+ /**
370
+ * Resolve an OCR text target to screen coordinates using observer state.
371
+ * Returns center coordinates of the matched text + offsets, or null if not found.
372
+ */
373
+ resolveOcrTarget(searchText, offsetX, offsetY) {
374
+ let state;
375
+ try {
376
+ state = readObserverState();
377
+ }
378
+ catch {
379
+ return null;
380
+ }
381
+ if (!state?.running || !state.lastFrame?.ocrText)
382
+ return null;
383
+ // Simple text search in OCR output
384
+ // The native OCR (vision.ocr) returns bounding boxes when available.
385
+ // For now we use a fallback: if the observer has the text, we know
386
+ // the element is visible. The caller should provide approximate
387
+ // coordinates via offsetX/offsetY relative to a known anchor.
388
+ const ocrText = state.lastFrame.ocrText;
389
+ if (!ocrText.toLowerCase().includes(searchText.toLowerCase())) {
390
+ return null; // Text not found on screen
391
+ }
392
+ // Text found — return offset coordinates (caller provides absolute offsets
393
+ // or relative to screen center as a basic heuristic)
394
+ if (offsetX !== 0 || offsetY !== 0) {
395
+ return { x: offsetX, y: offsetY };
396
+ }
397
+ // No explicit coordinates — can't determine position from plain OCR text alone
398
+ return null;
399
+ }
177
400
  /**
178
401
  * Convert playbook target format to runtime Target format.
179
402
  */
@@ -195,6 +418,68 @@ export class PlaybookEngine {
195
418
  }
196
419
  return undefined;
197
420
  }
421
+ getBrowserSelector(step) {
422
+ if (typeof step.target === "string")
423
+ return step.target;
424
+ if (step.target && "selector" in step.target)
425
+ return step.target.selector;
426
+ if (step.verify)
427
+ return step.verify;
428
+ throw new Error(`${step.action} step missing selector target`);
429
+ }
430
+ async focusBrowserElement(client, selector) {
431
+ const result = await client.Runtime.evaluate({
432
+ expression: `(() => {
433
+ const el = document.querySelector(${JSON.stringify(selector)});
434
+ if (!(el instanceof HTMLElement)) return { ok: false, reason: "Element not found: ${selector.replace(/"/g, '\\"')}" };
435
+ el.scrollIntoView({ block: "center" });
436
+ el.focus();
437
+ return { ok: true };
438
+ })()`,
439
+ returnByValue: true,
440
+ });
441
+ const value = result.result?.value;
442
+ if (!value?.ok) {
443
+ throw new Error(value?.reason || `Element not found: ${selector}`);
444
+ }
445
+ }
446
+ async resolveBrowserClickPoint(client, selector) {
447
+ const result = await client.Runtime.evaluate({
448
+ expression: `(() => {
449
+ const el = document.querySelector(${JSON.stringify(selector)});
450
+ if (!(el instanceof HTMLElement)) return { ok: false, reason: "Element not found: ${selector.replace(/"/g, '\\"')}" };
451
+ el.scrollIntoView({ block: "center" });
452
+ const r = el.getBoundingClientRect();
453
+ return { ok: true, x: r.x + r.width / 2, y: r.y + r.height / 2 };
454
+ })()`,
455
+ returnByValue: true,
456
+ });
457
+ const value = result.result?.value;
458
+ if (!value?.ok) {
459
+ throw new Error(value?.reason || `Element not found: ${selector}`);
460
+ }
461
+ return { x: value.x, y: value.y };
462
+ }
463
+ async dispatchMouseClick(client, x, y) {
464
+ await client.Input.dispatchMouseEvent({ type: "mouseMoved", x, y });
465
+ await sleep(40);
466
+ await client.Input.dispatchMouseEvent({ type: "mousePressed", x, y, button: "left", clickCount: 1 });
467
+ await sleep(40);
468
+ await client.Input.dispatchMouseEvent({ type: "mouseReleased", x, y, button: "left", clickCount: 1 });
469
+ }
470
+ async dispatchSelectAll(client) {
471
+ const metaModifier = process.platform === "darwin" ? 4 : 2;
472
+ await client.Input.dispatchKeyEvent({ type: "keyDown", key: "a", code: "KeyA", modifiers: metaModifier });
473
+ await client.Input.dispatchKeyEvent({ type: "keyUp", key: "a", code: "KeyA", modifiers: metaModifier });
474
+ }
475
+ async dispatchKey(client, key, code) {
476
+ await client.Input.dispatchKeyEvent({ type: "keyDown", key, code });
477
+ await client.Input.dispatchKeyEvent({ type: "keyUp", key, code });
478
+ }
479
+ async dispatchTextChar(client, char) {
480
+ await client.Input.dispatchKeyEvent({ type: "keyDown", text: char, key: char, unmodifiedText: char });
481
+ await client.Input.dispatchKeyEvent({ type: "keyUp", text: char, key: char, unmodifiedText: char });
482
+ }
198
483
  }
199
484
  function sleep(ms) {
200
485
  return new Promise((resolve) => setTimeout(resolve, ms));