screenhand 0.1.1 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (241) hide show
  1. package/README.md +193 -109
  2. package/bin/darwin-arm64/macos-bridge +0 -0
  3. package/dist/mcp-desktop.js +5876 -0
  4. package/dist/scripts/codex-monitor-daemon.js +335 -0
  5. package/dist/scripts/export-help-center.js +112 -0
  6. package/dist/scripts/marketing-loop.js +117 -0
  7. package/dist/scripts/observer-daemon.js +288 -0
  8. package/dist/scripts/orchestrator-daemon.js +399 -0
  9. package/dist/scripts/supervisor-daemon.js +272 -0
  10. package/dist/scripts/threads-campaign.js +208 -0
  11. package/dist/scripts/worker-daemon.js +228 -0
  12. package/dist/src/agent/cli.js +82 -0
  13. package/dist/src/agent/loop.js +274 -0
  14. package/dist/src/community/fetcher.js +109 -0
  15. package/dist/src/community/index.js +6 -0
  16. package/dist/src/community/publisher.js +191 -0
  17. package/dist/src/community/remote-api.js +121 -0
  18. package/dist/src/community/types.js +3 -0
  19. package/dist/src/community/validator.js +95 -0
  20. package/{src/config.ts → dist/src/config.js} +5 -10
  21. package/dist/src/context-tracker.js +489 -0
  22. package/{src/index.ts → dist/src/index.js} +32 -52
  23. package/dist/src/ingestion/coverage-auditor.js +233 -0
  24. package/dist/src/ingestion/doc-parser.js +164 -0
  25. package/dist/src/ingestion/index.js +8 -0
  26. package/dist/src/ingestion/menu-scanner.js +152 -0
  27. package/dist/src/ingestion/reference-merger.js +186 -0
  28. package/dist/src/ingestion/shortcut-extractor.js +180 -0
  29. package/dist/src/ingestion/tutorial-extractor.js +170 -0
  30. package/dist/src/ingestion/types.js +3 -0
  31. package/dist/src/jobs/manager.js +305 -0
  32. package/dist/src/jobs/runner.js +806 -0
  33. package/dist/src/jobs/store.js +102 -0
  34. package/dist/src/jobs/types.js +30 -0
  35. package/dist/src/jobs/worker.js +97 -0
  36. package/dist/src/learning/engine.js +356 -0
  37. package/dist/src/learning/index.js +9 -0
  38. package/dist/src/learning/locator-policy.js +120 -0
  39. package/dist/src/learning/pattern-policy.js +89 -0
  40. package/dist/src/learning/recovery-policy.js +116 -0
  41. package/dist/src/learning/sensor-policy.js +115 -0
  42. package/dist/src/learning/timing-model.js +204 -0
  43. package/dist/src/learning/topology-policy.js +90 -0
  44. package/dist/src/learning/types.js +9 -0
  45. package/dist/src/logging/timeline-logger.js +48 -0
  46. package/dist/src/mcp/mcp-stdio-server.js +464 -0
  47. package/dist/src/mcp/server.js +363 -0
  48. package/dist/src/mcp-entry.js +60 -0
  49. package/dist/src/memory/playbook-seeds.js +200 -0
  50. package/dist/src/memory/recall.js +222 -0
  51. package/dist/src/memory/research.js +104 -0
  52. package/dist/src/memory/seeds.js +101 -0
  53. package/dist/src/memory/service.js +446 -0
  54. package/dist/src/memory/session.js +169 -0
  55. package/dist/src/memory/store.js +451 -0
  56. package/{src/runtime/locator-cache.ts → dist/src/memory/types.js} +1 -17
  57. package/dist/src/monitor/codex-monitor.js +382 -0
  58. package/dist/src/monitor/task-queue.js +97 -0
  59. package/dist/src/monitor/types.js +62 -0
  60. package/dist/src/native/bridge-client.js +412 -0
  61. package/{src/native/macos-bridge-client.ts → dist/src/native/macos-bridge-client.js} +0 -1
  62. package/dist/src/observer/state.js +199 -0
  63. package/dist/src/observer/types.js +43 -0
  64. package/dist/src/orchestrator/state.js +68 -0
  65. package/dist/src/orchestrator/types.js +22 -0
  66. package/dist/src/perception/ax-source.js +162 -0
  67. package/dist/src/perception/cdp-source.js +162 -0
  68. package/dist/src/perception/coordinator.js +771 -0
  69. package/dist/src/perception/frame-differ.js +287 -0
  70. package/dist/src/perception/index.js +22 -0
  71. package/dist/src/perception/manager.js +199 -0
  72. package/dist/src/perception/types.js +47 -0
  73. package/dist/src/perception/vision-source.js +399 -0
  74. package/dist/src/planner/deterministic.js +298 -0
  75. package/dist/src/planner/executor.js +870 -0
  76. package/dist/src/planner/goal-store.js +92 -0
  77. package/dist/src/planner/index.js +21 -0
  78. package/dist/src/planner/planner.js +520 -0
  79. package/dist/src/planner/tool-registry.js +71 -0
  80. package/dist/src/planner/types.js +22 -0
  81. package/dist/src/platform/explorer.js +213 -0
  82. package/dist/src/platform/help-center-markdown.js +527 -0
  83. package/dist/src/platform/learner.js +257 -0
  84. package/dist/src/playbook/engine.js +486 -0
  85. package/dist/src/playbook/index.js +20 -0
  86. package/dist/src/playbook/mcp-recorder.js +204 -0
  87. package/dist/src/playbook/recorder.js +536 -0
  88. package/dist/src/playbook/runner.js +408 -0
  89. package/dist/src/playbook/store.js +312 -0
  90. package/dist/src/playbook/types.js +17 -0
  91. package/dist/src/recovery/detectors.js +156 -0
  92. package/dist/src/recovery/engine.js +327 -0
  93. package/dist/src/recovery/index.js +20 -0
  94. package/dist/src/recovery/strategies.js +274 -0
  95. package/dist/src/recovery/types.js +20 -0
  96. package/dist/src/runtime/accessibility-adapter.js +430 -0
  97. package/dist/src/runtime/app-adapter.js +64 -0
  98. package/dist/src/runtime/applescript-adapter.js +305 -0
  99. package/dist/src/runtime/ax-role-map.js +96 -0
  100. package/dist/src/runtime/browser-adapter.js +52 -0
  101. package/dist/src/runtime/cdp-chrome-adapter.js +521 -0
  102. package/dist/src/runtime/composite-adapter.js +221 -0
  103. package/dist/src/runtime/execution-contract.js +159 -0
  104. package/dist/src/runtime/executor.js +286 -0
  105. package/dist/src/runtime/locator-cache.js +50 -0
  106. package/dist/src/runtime/planning-loop.js +63 -0
  107. package/dist/src/runtime/service.js +432 -0
  108. package/dist/src/runtime/session-manager.js +63 -0
  109. package/dist/src/runtime/state-observer.js +121 -0
  110. package/dist/src/runtime/vision-adapter.js +225 -0
  111. package/dist/src/state/app-map-types.js +72 -0
  112. package/dist/src/state/app-map.js +1974 -0
  113. package/dist/src/state/entity-tracker.js +108 -0
  114. package/dist/src/state/fusion.js +96 -0
  115. package/dist/src/state/index.js +21 -0
  116. package/dist/src/state/ladder-generator.js +236 -0
  117. package/dist/src/state/persistence.js +156 -0
  118. package/dist/src/state/types.js +17 -0
  119. package/dist/src/state/world-model.js +1456 -0
  120. package/dist/src/supervisor/locks.js +186 -0
  121. package/dist/src/supervisor/supervisor.js +403 -0
  122. package/dist/src/supervisor/types.js +30 -0
  123. package/dist/src/test-mcp-protocol.js +154 -0
  124. package/dist/src/types.js +17 -0
  125. package/dist/src/util/atomic-write.js +133 -0
  126. package/dist/src/util/sanitize.js +146 -0
  127. package/dist-app-maps/com.figma.Desktop.json +959 -0
  128. package/dist-app-maps/com.hnc.Discord.json +1146 -0
  129. package/dist-app-maps/notion.id.json +2831 -0
  130. package/dist-playbooks/canva-screenhand-carousel.json +445 -0
  131. package/dist-playbooks/codex-desktop.json +76 -0
  132. package/dist-playbooks/competitor-research-stack.json +122 -0
  133. package/dist-playbooks/davinci-color-grade.json +153 -0
  134. package/dist-playbooks/davinci-edit-timeline.json +162 -0
  135. package/dist-playbooks/davinci-render.json +114 -0
  136. package/dist-playbooks/devto.json +52 -0
  137. package/dist-playbooks/discord.json +41 -0
  138. package/dist-playbooks/google-flow-create-project.json +59 -0
  139. package/dist-playbooks/google-flow-edit-image.json +90 -0
  140. package/dist-playbooks/google-flow-edit-video.json +90 -0
  141. package/dist-playbooks/google-flow-generate-image.json +68 -0
  142. package/dist-playbooks/google-flow-generate-video.json +191 -0
  143. package/dist-playbooks/google-flow-open-project.json +48 -0
  144. package/dist-playbooks/google-flow-open-scenebuilder.json +64 -0
  145. package/dist-playbooks/google-flow-search-assets.json +64 -0
  146. package/dist-playbooks/instagram.json +57 -0
  147. package/dist-playbooks/linkedin.json +52 -0
  148. package/dist-playbooks/n8n.json +43 -0
  149. package/dist-playbooks/reddit.json +52 -0
  150. package/dist-playbooks/threads.json +59 -0
  151. package/dist-playbooks/x-twitter.json +59 -0
  152. package/dist-playbooks/youtube.json +59 -0
  153. package/dist-references/canva.json +646 -0
  154. package/dist-references/codex-desktop.json +305 -0
  155. package/dist-references/davinci-resolve-keyboard.json +594 -0
  156. package/dist-references/davinci-resolve-menu-map.json +1139 -0
  157. package/dist-references/davinci-resolve-menus-batch1.json +116 -0
  158. package/dist-references/davinci-resolve-menus-batch2.json +372 -0
  159. package/dist-references/davinci-resolve-menus-batch3.json +330 -0
  160. package/dist-references/davinci-resolve-menus-batch4.json +297 -0
  161. package/dist-references/davinci-resolve-shortcuts.json +333 -0
  162. package/dist-references/devto.json +317 -0
  163. package/dist-references/discord.json +549 -0
  164. package/dist-references/figma.json +1186 -0
  165. package/dist-references/finder.json +146 -0
  166. package/dist-references/google-ads-transparency.json +95 -0
  167. package/dist-references/google-flow.json +649 -0
  168. package/dist-references/instagram.json +341 -0
  169. package/dist-references/linkedin.json +324 -0
  170. package/dist-references/meta-ad-library.json +86 -0
  171. package/dist-references/n8n.json +387 -0
  172. package/dist-references/notes.json +27 -0
  173. package/dist-references/notion.json +163 -0
  174. package/dist-references/reddit.json +341 -0
  175. package/dist-references/threads.json +337 -0
  176. package/dist-references/x-twitter.json +403 -0
  177. package/dist-references/youtube.json +373 -0
  178. package/native/macos-bridge/Package.swift +1 -0
  179. package/native/macos-bridge/Sources/AccessibilityBridge.swift +257 -36
  180. package/native/macos-bridge/Sources/AppManagement.swift +212 -2
  181. package/native/macos-bridge/Sources/CoreGraphicsBridge.swift +348 -53
  182. package/native/macos-bridge/Sources/StreamCapture.swift +136 -0
  183. package/native/macos-bridge/Sources/VisionBridge.swift +165 -7
  184. package/native/macos-bridge/Sources/main.swift +169 -16
  185. package/native/windows-bridge/Program.cs +5 -0
  186. package/native/windows-bridge/ScreenCapture.cs +124 -0
  187. package/package.json +29 -4
  188. package/scripts/postinstall.cjs +127 -0
  189. package/.claude/commands/automate.md +0 -28
  190. package/.claude/commands/debug-ui.md +0 -19
  191. package/.claude/commands/screenshot.md +0 -15
  192. package/.github/FUNDING.yml +0 -1
  193. package/.github/ISSUE_TEMPLATE/bug_report.md +0 -27
  194. package/.github/ISSUE_TEMPLATE/feature_request.md +0 -20
  195. package/.mcp.json +0 -8
  196. package/DESKTOP_MCP_GUIDE.md +0 -92
  197. package/SECURITY.md +0 -44
  198. package/docs/architecture.md +0 -47
  199. package/install-skills.sh +0 -19
  200. package/mcp-bridge.ts +0 -271
  201. package/mcp-desktop.ts +0 -1221
  202. package/playbooks/instagram.json +0 -41
  203. package/playbooks/instagram_v2.json +0 -201
  204. package/playbooks/x_v1.json +0 -211
  205. package/scripts/devpost-live-loop.mjs +0 -421
  206. package/src/logging/timeline-logger.ts +0 -55
  207. package/src/mcp/server.ts +0 -449
  208. package/src/memory/recall.ts +0 -191
  209. package/src/memory/research.ts +0 -146
  210. package/src/memory/seeds.ts +0 -123
  211. package/src/memory/session.ts +0 -201
  212. package/src/memory/store.ts +0 -434
  213. package/src/memory/types.ts +0 -69
  214. package/src/native/bridge-client.ts +0 -239
  215. package/src/runtime/accessibility-adapter.ts +0 -487
  216. package/src/runtime/app-adapter.ts +0 -169
  217. package/src/runtime/applescript-adapter.ts +0 -376
  218. package/src/runtime/ax-role-map.ts +0 -102
  219. package/src/runtime/browser-adapter.ts +0 -129
  220. package/src/runtime/cdp-chrome-adapter.ts +0 -676
  221. package/src/runtime/composite-adapter.ts +0 -274
  222. package/src/runtime/executor.ts +0 -396
  223. package/src/runtime/planning-loop.ts +0 -81
  224. package/src/runtime/service.ts +0 -448
  225. package/src/runtime/session-manager.ts +0 -50
  226. package/src/runtime/state-observer.ts +0 -136
  227. package/src/runtime/vision-adapter.ts +0 -297
  228. package/src/types.ts +0 -297
  229. package/tests/bridge-client.test.ts +0 -176
  230. package/tests/browser-stealth.test.ts +0 -210
  231. package/tests/composite-adapter.test.ts +0 -64
  232. package/tests/mcp-server.test.ts +0 -151
  233. package/tests/memory-recall.test.ts +0 -339
  234. package/tests/memory-research.test.ts +0 -159
  235. package/tests/memory-seeds.test.ts +0 -120
  236. package/tests/memory-store.test.ts +0 -392
  237. package/tests/types.test.ts +0 -92
  238. package/tsconfig.check.json +0 -17
  239. package/tsconfig.json +0 -19
  240. package/vitest.config.ts +0 -8
  241. /package/{playbooks → dist-references}/devpost.json +0 -0
@@ -1,274 +0,0 @@
1
- // Copyright (C) 2025 Clazro Technology Private Limited
2
- // SPDX-License-Identifier: AGPL-3.0-only
3
- //
4
- // This file is part of ScreenHand.
5
- //
6
- // ScreenHand is free software: you can redistribute it and/or modify
7
- // it under the terms of the GNU Affero General Public License as
8
- // published by the Free Software Foundation, version 3.
9
- //
10
- // ScreenHand is distributed in the hope that it will be useful,
11
- // but WITHOUT ANY WARRANTY; without even the implied warranty of
12
- // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
- // GNU Affero General Public License for more details.
14
- //
15
- // You should have received a copy of the GNU Affero General Public License
16
- // along with ScreenHand. If not, see <https://www.gnu.org/licenses/>.
17
-
18
- import type {
19
- AXNode,
20
- AppContext,
21
- ExtractFormat,
22
- LocatedElement,
23
- PageMeta,
24
- RunningApp,
25
- SessionInfo,
26
- Target,
27
- WaitCondition,
28
- WindowInfo,
29
- } from "../types.js";
30
- import type { AppAdapter } from "./app-adapter.js";
31
- import type { BridgeClient } from "../native/bridge-client.js";
32
- // Legacy alias for backward compatibility
33
- type MacOSBridgeClient = BridgeClient;
34
- import { AccessibilityAdapter } from "./accessibility-adapter.js";
35
- import { AppleScriptAdapter } from "./applescript-adapter.js";
36
- import { CdpChromeAdapter, type CdpChromeAdapterOptions } from "./cdp-chrome-adapter.js";
37
- import { VisionAdapter } from "./vision-adapter.js";
38
-
39
- /** macOS bundle IDs routed to CDP. */
40
- const BROWSER_BUNDLES = new Set([
41
- "com.google.Chrome",
42
- "com.google.Chrome.canary",
43
- "com.brave.Browser",
44
- "com.microsoft.edgemac",
45
- "com.vivaldi.Vivaldi",
46
- "org.chromium.Chromium",
47
- ]);
48
-
49
- /** Windows process names routed to CDP. */
50
- const BROWSER_PROCESS_NAMES = new Set([
51
- "chrome",
52
- "chrome.exe",
53
- "brave",
54
- "brave.exe",
55
- "msedge",
56
- "msedge.exe",
57
- "vivaldi",
58
- "vivaldi.exe",
59
- "chromium",
60
- "chromium.exe",
61
- ]);
62
-
63
- const isWindows = process.platform === "win32";
64
-
65
- interface SessionRouting {
66
- adapter: AppAdapter;
67
- adapterName: string;
68
- }
69
-
70
- /**
71
- * Composite adapter that auto-selects the best adapter per app:
72
- * - Chromium browsers → CDP
73
- * - Scriptable apps → AppleScript (with AX fallback)
74
- * - Default → Accessibility
75
- * - Fallback → Vision (if AX locate fails)
76
- */
77
- export class CompositeAdapter implements AppAdapter {
78
- private readonly cdp: CdpChromeAdapter;
79
- private readonly accessibility: AccessibilityAdapter;
80
- private readonly applescript: AppleScriptAdapter;
81
- private readonly vision: VisionAdapter;
82
-
83
- private readonly sessionRouting = new Map<string, SessionRouting>();
84
-
85
- constructor(
86
- private readonly bridge: MacOSBridgeClient,
87
- cdpOptions?: CdpChromeAdapterOptions,
88
- ) {
89
- this.cdp = new CdpChromeAdapter(cdpOptions);
90
- this.accessibility = new AccessibilityAdapter(bridge);
91
- this.applescript = new AppleScriptAdapter();
92
- this.vision = new VisionAdapter(bridge);
93
- }
94
-
95
- async attach(profile: string): Promise<SessionInfo> {
96
- // Default to accessibility adapter; routing is set per-session when app is known
97
- const info = await this.accessibility.attach(profile);
98
- this.sessionRouting.set(info.sessionId, {
99
- adapter: this.accessibility,
100
- adapterName: "accessibility",
101
- });
102
-
103
- // Override adapterType
104
- return { ...info, adapterType: "composite" };
105
- }
106
-
107
- async getAppContext(sessionId: string): Promise<AppContext> {
108
- return this.getAdapter(sessionId).getAppContext(sessionId);
109
- }
110
-
111
- async getPageMeta(sessionId: string): Promise<PageMeta> {
112
- return this.getAdapter(sessionId).getPageMeta(sessionId);
113
- }
114
-
115
- async navigate(sessionId: string, url: string, timeoutMs: number): Promise<PageMeta> {
116
- return this.getAdapter(sessionId).navigate(sessionId, url, timeoutMs);
117
- }
118
-
119
- async locate(sessionId: string, target: Target, timeoutMs: number): Promise<LocatedElement | null> {
120
- const primary = this.getAdapter(sessionId);
121
- const result = await primary.locate(sessionId, target, timeoutMs);
122
- if (result) return result;
123
-
124
- // Fallback to vision if primary (accessibility/applescript) fails
125
- const routing = this.sessionRouting.get(sessionId);
126
- if (routing && routing.adapterName !== "vision" && routing.adapterName !== "cdp") {
127
- try {
128
- return await this.vision.locate(sessionId, target, Math.min(timeoutMs, 2000));
129
- } catch {
130
- // Vision also failed
131
- }
132
- }
133
-
134
- return null;
135
- }
136
-
137
- async click(sessionId: string, element: LocatedElement): Promise<void> {
138
- // If the element was found by vision (coordinates-based), use vision adapter for click
139
- if (element.locatorUsed.startsWith("vision:") && element.coordinates) {
140
- return this.vision.click(sessionId, element);
141
- }
142
- return this.getAdapter(sessionId).click(sessionId, element);
143
- }
144
-
145
- async setValue(sessionId: string, element: LocatedElement, text: string, clear: boolean): Promise<void> {
146
- return this.getAdapter(sessionId).setValue(sessionId, element, text, clear);
147
- }
148
-
149
- async getValue(sessionId: string, element: LocatedElement): Promise<string> {
150
- return this.getAdapter(sessionId).getValue(sessionId, element);
151
- }
152
-
153
- async waitFor(sessionId: string, condition: WaitCondition, timeoutMs: number): Promise<boolean> {
154
- return this.getAdapter(sessionId).waitFor(sessionId, condition, timeoutMs);
155
- }
156
-
157
- async extract(sessionId: string, target: Target, format: ExtractFormat): Promise<unknown> {
158
- return this.getAdapter(sessionId).extract(sessionId, target, format);
159
- }
160
-
161
- async screenshot(sessionId: string, region?: { x: number; y: number; width: number; height: number }): Promise<string> {
162
- return this.getAdapter(sessionId).screenshot(sessionId, region);
163
- }
164
-
165
- // ── Desktop methods (delegate to the best adapter that supports them) ──
166
-
167
- async launchApp(sessionId: string, bundleId: string): Promise<AppContext> {
168
- // Route to the appropriate adapter based on the app being launched
169
- this.routeSession(sessionId, bundleId);
170
-
171
- const adapter = this.getAdapter(sessionId);
172
- if (adapter.launchApp) {
173
- return adapter.launchApp(sessionId, bundleId);
174
- }
175
- // Fallback to accessibility
176
- return this.accessibility.launchApp(sessionId, bundleId);
177
- }
178
-
179
- async focusApp(sessionId: string, bundleId: string): Promise<void> {
180
- this.routeSession(sessionId, bundleId);
181
- const adapter = this.getAdapter(sessionId);
182
- if (adapter.focusApp) {
183
- return adapter.focusApp(sessionId, bundleId);
184
- }
185
- return this.accessibility.focusApp(sessionId, bundleId);
186
- }
187
-
188
- async listApps(sessionId: string): Promise<RunningApp[]> {
189
- return this.accessibility.listApps(sessionId);
190
- }
191
-
192
- async listWindows(sessionId: string): Promise<WindowInfo[]> {
193
- return this.accessibility.listWindows(sessionId);
194
- }
195
-
196
- async menuClick(sessionId: string, menuPath: string[]): Promise<void> {
197
- const adapter = this.getAdapter(sessionId);
198
- if (adapter.menuClick) {
199
- return adapter.menuClick(sessionId, menuPath);
200
- }
201
- return this.accessibility.menuClick(sessionId, menuPath);
202
- }
203
-
204
- async keyCombo(sessionId: string, keys: string[]): Promise<void> {
205
- const adapter = this.getAdapter(sessionId);
206
- if (adapter.keyCombo) {
207
- return adapter.keyCombo(sessionId, keys);
208
- }
209
- return this.accessibility.keyCombo(sessionId, keys);
210
- }
211
-
212
- async elementTree(sessionId: string, maxDepth?: number, root?: Target): Promise<AXNode> {
213
- const adapter = this.getAdapter(sessionId);
214
- if (adapter.elementTree) {
215
- return adapter.elementTree(sessionId, maxDepth, root);
216
- }
217
- return this.accessibility.elementTree(sessionId, maxDepth, root);
218
- }
219
-
220
- async drag(sessionId: string, from: LocatedElement, to: LocatedElement): Promise<void> {
221
- const adapter = this.getAdapter(sessionId);
222
- if (adapter.drag) {
223
- return adapter.drag(sessionId, from, to);
224
- }
225
- return this.accessibility.drag(sessionId, from, to);
226
- }
227
-
228
- async scroll(sessionId: string, direction: "up" | "down" | "left" | "right", amount: number, element?: LocatedElement): Promise<void> {
229
- const adapter = this.getAdapter(sessionId);
230
- if (adapter.scroll) {
231
- return adapter.scroll(sessionId, direction, amount, element);
232
- }
233
- return this.accessibility.scroll(sessionId, direction, amount, element);
234
- }
235
-
236
- // ── Routing logic ──
237
-
238
- private routeSession(sessionId: string, bundleId: string): void {
239
- let adapter: AppAdapter;
240
- let adapterName: string;
241
-
242
- if (isWindows) {
243
- // On Windows: route by process name
244
- const processName = bundleId.toLowerCase().replace(/\.exe$/, "");
245
- if (BROWSER_PROCESS_NAMES.has(processName) || BROWSER_PROCESS_NAMES.has(bundleId.toLowerCase())) {
246
- adapter = this.cdp;
247
- adapterName = "cdp";
248
- } else {
249
- // No AppleScript on Windows — always use accessibility (UI Automation)
250
- adapter = this.accessibility;
251
- adapterName = "accessibility";
252
- }
253
- } else {
254
- // On macOS: route by bundle ID
255
- if (BROWSER_BUNDLES.has(bundleId)) {
256
- adapter = this.cdp;
257
- adapterName = "cdp";
258
- } else if (AppleScriptAdapter.isScriptable(bundleId)) {
259
- adapter = this.applescript;
260
- adapterName = "applescript";
261
- } else {
262
- adapter = this.accessibility;
263
- adapterName = "accessibility";
264
- }
265
- }
266
-
267
- this.sessionRouting.set(sessionId, { adapter, adapterName });
268
- }
269
-
270
- private getAdapter(sessionId: string): AppAdapter {
271
- const routing = this.sessionRouting.get(sessionId);
272
- return routing?.adapter ?? this.accessibility;
273
- }
274
- }
@@ -1,396 +0,0 @@
1
- // Copyright (C) 2025 Clazro Technology Private Limited
2
- // SPDX-License-Identifier: AGPL-3.0-only
3
- //
4
- // This file is part of ScreenHand.
5
- //
6
- // ScreenHand is free software: you can redistribute it and/or modify
7
- // it under the terms of the GNU Affero General Public License as
8
- // published by the Free Software Foundation, version 3.
9
- //
10
- // ScreenHand is distributed in the hope that it will be useful,
11
- // but WITHOUT ANY WARRANTY; without even the implied warranty of
12
- // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
- // GNU Affero General Public License for more details.
14
- //
15
- // You should have received a copy of the GNU Affero General Public License
16
- // along with ScreenHand. If not, see <https://www.gnu.org/licenses/>.
17
-
18
- import { DEFAULT_ACTION_BUDGET } from "../config.js";
19
- import type { TimelineLogger } from "../logging/timeline-logger.js";
20
- import type {
21
- ActionBudget,
22
- ActionTelemetry,
23
- LocatedElement,
24
- LocatorAttempt,
25
- PageMeta,
26
- PressInput,
27
- RuntimeError,
28
- Target,
29
- ToolResult,
30
- TypeIntoInput,
31
- } from "../types.js";
32
- import type { AppAdapter } from "./app-adapter.js";
33
- import type { LocatorCache } from "./locator-cache.js";
34
-
35
- interface LocateResult {
36
- element: LocatedElement;
37
- attempts: LocatorAttempt[];
38
- }
39
-
40
- export class Executor {
41
- constructor(
42
- private readonly adapter: AppAdapter,
43
- private readonly cache: LocatorCache,
44
- private readonly logger: TimelineLogger,
45
- ) {}
46
-
47
- async press(input: PressInput): Promise<ToolResult<PageMeta>> {
48
- const telemetry = this.logger.start("press", input.sessionId);
49
- const budget = this.resolveBudget(input.budget);
50
- const attempts: LocatorAttempt[] = [];
51
- let lastError: RuntimeError | undefined;
52
-
53
- for (let retry = 0; retry <= budget.maxRetries; retry += 1) {
54
- telemetry.retries = retry;
55
- try {
56
- const siteKey = await this.currentSiteKey(input.sessionId);
57
- const actionKey = this.targetToKey(input.target);
58
- const locateResult = await this.locateWithBudget(
59
- input.sessionId,
60
- siteKey,
61
- actionKey,
62
- input.target,
63
- budget.locateMs,
64
- retry > 0,
65
- );
66
- attempts.push(...locateResult.attempts);
67
- telemetry.locateMs += locateResult.attempts.reduce(
68
- (sum, attempt) => sum + attempt.timeoutMs,
69
- 0,
70
- );
71
-
72
- await this.timed(
73
- budget.actMs,
74
- async () => {
75
- await this.adapter.click(input.sessionId, locateResult.element);
76
- },
77
- "ACTION_FAILED",
78
- );
79
- telemetry.actMs += budget.actMs;
80
-
81
- if (input.verify) {
82
- const verified = await this.timed(
83
- budget.verifyMs,
84
- () => this.adapter.waitFor(input.sessionId, input.verify!, budget.verifyMs),
85
- "VERIFY_FAILED",
86
- );
87
- telemetry.verifyMs += budget.verifyMs;
88
- if (!verified) {
89
- throw this.runtimeError("VERIFY_FAILED", "Verification condition not met.");
90
- }
91
- }
92
-
93
- const page = await this.adapter.getPageMeta(input.sessionId);
94
- return this.success(page, telemetry);
95
- } catch (error) {
96
- lastError = this.asRuntimeError(error, attempts);
97
- }
98
- }
99
-
100
- return this.failure(
101
- lastError ??
102
- this.runtimeError("ACTION_FAILED", "Press failed with unknown runtime error."),
103
- telemetry,
104
- );
105
- }
106
-
107
- async typeInto(input: TypeIntoInput): Promise<ToolResult<PageMeta>> {
108
- const telemetry = this.logger.start("type_into", input.sessionId);
109
- const budget = this.resolveBudget(input.budget);
110
- const attempts: LocatorAttempt[] = [];
111
-
112
- try {
113
- const siteKey = await this.currentSiteKey(input.sessionId);
114
- const actionKey = `type:${this.targetToKey(input.target)}`;
115
- const locateResult = await this.locateWithBudget(
116
- input.sessionId,
117
- siteKey,
118
- actionKey,
119
- input.target,
120
- budget.locateMs,
121
- false,
122
- );
123
- attempts.push(...locateResult.attempts);
124
- telemetry.locateMs += budget.locateMs;
125
-
126
- await this.timed(
127
- budget.actMs,
128
- async () => {
129
- await this.adapter.setValue(
130
- input.sessionId,
131
- locateResult.element,
132
- input.text,
133
- input.clear ?? true,
134
- );
135
- },
136
- "ACTION_FAILED",
137
- );
138
- telemetry.actMs += budget.actMs;
139
-
140
- if (input.verifyValue ?? true) {
141
- const read = await this.adapter.getValue(input.sessionId, locateResult.element);
142
- if (read !== input.text) {
143
- throw this.runtimeError(
144
- "VERIFY_FAILED",
145
- `Field value mismatch. Expected "${input.text}", got "${read}".`,
146
- );
147
- }
148
- }
149
-
150
- if (input.verify) {
151
- const verified = await this.timed(
152
- budget.verifyMs,
153
- () => this.adapter.waitFor(input.sessionId, input.verify!, budget.verifyMs),
154
- "VERIFY_FAILED",
155
- );
156
- telemetry.verifyMs += budget.verifyMs;
157
- if (!verified) {
158
- throw this.runtimeError("VERIFY_FAILED", "Verification condition not met.");
159
- }
160
- }
161
-
162
- const page = await this.adapter.getPageMeta(input.sessionId);
163
- return this.success(page, telemetry);
164
- } catch (error) {
165
- return this.failure(this.asRuntimeError(error, attempts), telemetry);
166
- }
167
- }
168
-
169
- private async locateWithBudget(
170
- sessionId: string,
171
- siteKey: string,
172
- actionKey: string,
173
- target: Target,
174
- locateBudgetMs: number,
175
- skipCache: boolean,
176
- ): Promise<LocateResult> {
177
- const attempts: LocatorAttempt[] = [];
178
- const strategyBudget = Math.max(50, Math.floor(locateBudgetMs / 3));
179
-
180
- if (!skipCache) {
181
- const cachedLocator = this.cache.get(siteKey, actionKey);
182
- if (cachedLocator) {
183
- const cachedTarget: Target = { type: "selector", value: cachedLocator };
184
- const match = await this.tryLocate(
185
- sessionId,
186
- "cache",
187
- cachedTarget,
188
- strategyBudget,
189
- attempts,
190
- );
191
- if (match) {
192
- return { element: match, attempts };
193
- }
194
- }
195
- }
196
-
197
- const strategies = this.expandTargetStrategies(target);
198
- for (const strategy of strategies) {
199
- const match = await this.tryLocate(
200
- sessionId,
201
- strategy.strategy,
202
- strategy.target,
203
- strategyBudget,
204
- attempts,
205
- );
206
- if (match) {
207
- if (strategy.target.type === "selector") {
208
- this.cache.set(siteKey, actionKey, strategy.target.value);
209
- }
210
- return { element: match, attempts };
211
- }
212
- }
213
-
214
- throw this.runtimeError("LOCATE_FAILED", "Could not locate target.", attempts);
215
- }
216
-
217
- private async tryLocate(
218
- sessionId: string,
219
- strategyName: string,
220
- target: Target,
221
- timeoutMs: number,
222
- attempts: LocatorAttempt[],
223
- ): Promise<LocatedElement | null> {
224
- try {
225
- const found = await this.timed(
226
- timeoutMs,
227
- () => this.adapter.locate(sessionId, target, timeoutMs),
228
- "LOCATE_FAILED",
229
- );
230
- attempts.push({
231
- strategy: strategyName,
232
- target: this.targetToKey(target),
233
- timeoutMs,
234
- matched: Boolean(found),
235
- });
236
- return found;
237
- } catch (error) {
238
- attempts.push({
239
- strategy: strategyName,
240
- target: this.targetToKey(target),
241
- timeoutMs,
242
- matched: false,
243
- reason: error instanceof Error ? error.message : "Unknown locate error",
244
- });
245
- return null;
246
- }
247
- }
248
-
249
- private expandTargetStrategies(
250
- target: Target,
251
- ): Array<{ strategy: string; target: Target }> {
252
- if (target.type === "selector") {
253
- return [{ strategy: "selector", target }];
254
- }
255
- if (target.type === "text") {
256
- return [
257
- { strategy: "text_exact", target: { type: "text", value: target.value, exact: true } },
258
- { strategy: "text_fuzzy", target: { type: "text", value: target.value, exact: false } },
259
- ];
260
- }
261
- if (target.type === "role") {
262
- return [
263
- { strategy: "role_name_exact", target: { type: "role", role: target.role, name: target.name, exact: true } },
264
- { strategy: "role_name_fuzzy", target: { type: "role", role: target.role, name: target.name, exact: false } },
265
- { strategy: "fallback_text", target: { type: "text", value: target.name } },
266
- ];
267
- }
268
- // For new target types (ax_path, ax_attribute, coordinates, image), pass through directly
269
- return [{ strategy: target.type, target }];
270
- }
271
-
272
- private async currentSiteKey(sessionId: string): Promise<string> {
273
- // Try app context first for desktop apps, fall back to page URL for browsers
274
- try {
275
- const ctx = await this.adapter.getAppContext(sessionId);
276
- if (ctx.url) {
277
- try {
278
- return new URL(ctx.url).host || ctx.bundleId;
279
- } catch {
280
- // URL parsing failed, use bundleId + windowTitle
281
- }
282
- }
283
- return `${ctx.bundleId}::${ctx.windowTitle}`;
284
- } catch {
285
- // Fallback to page meta
286
- try {
287
- const page = await this.adapter.getPageMeta(sessionId);
288
- return new URL(page.url).host || "unknown-site";
289
- } catch {
290
- return "unknown-site";
291
- }
292
- }
293
- }
294
-
295
- private resolveBudget(input?: Partial<ActionBudget>): ActionBudget {
296
- return {
297
- ...DEFAULT_ACTION_BUDGET,
298
- ...input,
299
- };
300
- }
301
-
302
- private async timed<T>(
303
- timeoutMs: number,
304
- operation: () => Promise<T>,
305
- errorCode: RuntimeError["code"],
306
- ): Promise<T> {
307
- const timeout = new Promise<never>((_, reject) => {
308
- setTimeout(() => {
309
- reject(this.runtimeError("TIMEOUT", `Timed out after ${timeoutMs}ms.`));
310
- }, timeoutMs);
311
- });
312
-
313
- try {
314
- return await Promise.race([operation(), timeout]);
315
- } catch (error) {
316
- if (this.isRuntimeError(error)) {
317
- throw error;
318
- }
319
- throw this.runtimeError(
320
- errorCode,
321
- error instanceof Error ? error.message : "Unexpected runtime error",
322
- );
323
- }
324
- }
325
-
326
- private targetToKey(target: Target): string {
327
- switch (target.type) {
328
- case "selector":
329
- return `selector:${target.value}`;
330
- case "text":
331
- return `text:${target.value}`;
332
- case "role":
333
- return `role:${target.role}|name:${target.name}`;
334
- case "ax_path":
335
- return `ax_path:${target.path.join("/")}`;
336
- case "ax_attribute":
337
- return `ax_attr:${target.attribute}=${target.value}`;
338
- case "coordinates":
339
- return `coords:${target.x},${target.y}`;
340
- case "image":
341
- return `image:${target.base64.slice(0, 20)}`;
342
- }
343
- }
344
-
345
- private success<T>(data: T, telemetry: ActionTelemetry): ToolResult<T> {
346
- return {
347
- ok: true,
348
- data,
349
- telemetry: this.logger.finish(telemetry, "success"),
350
- };
351
- }
352
-
353
- private failure<T>(error: RuntimeError, telemetry: ActionTelemetry): ToolResult<T> {
354
- return {
355
- ok: false,
356
- error,
357
- telemetry: this.logger.finish(telemetry, "failed"),
358
- };
359
- }
360
-
361
- private runtimeError(
362
- code: RuntimeError["code"],
363
- message: string,
364
- attempts?: LocatorAttempt[],
365
- ): RuntimeError {
366
- const error: RuntimeError = { code, message };
367
- if (attempts && attempts.length > 0) {
368
- error.attempts = attempts;
369
- }
370
- return error;
371
- }
372
-
373
- private isRuntimeError(error: unknown): error is RuntimeError {
374
- if (typeof error !== "object" || error === null) {
375
- return false;
376
- }
377
- return "code" in error && "message" in error;
378
- }
379
-
380
- private asRuntimeError(error: unknown, attempts?: LocatorAttempt[]): RuntimeError {
381
- if (this.isRuntimeError(error)) {
382
- if (error.attempts || !attempts || attempts.length === 0) {
383
- return error;
384
- }
385
- return {
386
- ...error,
387
- attempts,
388
- };
389
- }
390
- return this.runtimeError(
391
- "ACTION_FAILED",
392
- error instanceof Error ? error.message : "Unexpected runtime error",
393
- attempts,
394
- );
395
- }
396
- }