screenhand 0.1.1 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (241) hide show
  1. package/README.md +193 -109
  2. package/bin/darwin-arm64/macos-bridge +0 -0
  3. package/dist/mcp-desktop.js +5876 -0
  4. package/dist/scripts/codex-monitor-daemon.js +335 -0
  5. package/dist/scripts/export-help-center.js +112 -0
  6. package/dist/scripts/marketing-loop.js +117 -0
  7. package/dist/scripts/observer-daemon.js +288 -0
  8. package/dist/scripts/orchestrator-daemon.js +399 -0
  9. package/dist/scripts/supervisor-daemon.js +272 -0
  10. package/dist/scripts/threads-campaign.js +208 -0
  11. package/dist/scripts/worker-daemon.js +228 -0
  12. package/dist/src/agent/cli.js +82 -0
  13. package/dist/src/agent/loop.js +274 -0
  14. package/dist/src/community/fetcher.js +109 -0
  15. package/dist/src/community/index.js +6 -0
  16. package/dist/src/community/publisher.js +191 -0
  17. package/dist/src/community/remote-api.js +121 -0
  18. package/dist/src/community/types.js +3 -0
  19. package/dist/src/community/validator.js +95 -0
  20. package/{src/config.ts → dist/src/config.js} +5 -10
  21. package/dist/src/context-tracker.js +489 -0
  22. package/{src/index.ts → dist/src/index.js} +32 -52
  23. package/dist/src/ingestion/coverage-auditor.js +233 -0
  24. package/dist/src/ingestion/doc-parser.js +164 -0
  25. package/dist/src/ingestion/index.js +8 -0
  26. package/dist/src/ingestion/menu-scanner.js +152 -0
  27. package/dist/src/ingestion/reference-merger.js +186 -0
  28. package/dist/src/ingestion/shortcut-extractor.js +180 -0
  29. package/dist/src/ingestion/tutorial-extractor.js +170 -0
  30. package/dist/src/ingestion/types.js +3 -0
  31. package/dist/src/jobs/manager.js +305 -0
  32. package/dist/src/jobs/runner.js +806 -0
  33. package/dist/src/jobs/store.js +102 -0
  34. package/dist/src/jobs/types.js +30 -0
  35. package/dist/src/jobs/worker.js +97 -0
  36. package/dist/src/learning/engine.js +356 -0
  37. package/dist/src/learning/index.js +9 -0
  38. package/dist/src/learning/locator-policy.js +120 -0
  39. package/dist/src/learning/pattern-policy.js +89 -0
  40. package/dist/src/learning/recovery-policy.js +116 -0
  41. package/dist/src/learning/sensor-policy.js +115 -0
  42. package/dist/src/learning/timing-model.js +204 -0
  43. package/dist/src/learning/topology-policy.js +90 -0
  44. package/dist/src/learning/types.js +9 -0
  45. package/dist/src/logging/timeline-logger.js +48 -0
  46. package/dist/src/mcp/mcp-stdio-server.js +464 -0
  47. package/dist/src/mcp/server.js +363 -0
  48. package/dist/src/mcp-entry.js +60 -0
  49. package/dist/src/memory/playbook-seeds.js +200 -0
  50. package/dist/src/memory/recall.js +222 -0
  51. package/dist/src/memory/research.js +104 -0
  52. package/dist/src/memory/seeds.js +101 -0
  53. package/dist/src/memory/service.js +446 -0
  54. package/dist/src/memory/session.js +169 -0
  55. package/dist/src/memory/store.js +451 -0
  56. package/{src/runtime/locator-cache.ts → dist/src/memory/types.js} +1 -17
  57. package/dist/src/monitor/codex-monitor.js +382 -0
  58. package/dist/src/monitor/task-queue.js +97 -0
  59. package/dist/src/monitor/types.js +62 -0
  60. package/dist/src/native/bridge-client.js +412 -0
  61. package/{src/native/macos-bridge-client.ts → dist/src/native/macos-bridge-client.js} +0 -1
  62. package/dist/src/observer/state.js +199 -0
  63. package/dist/src/observer/types.js +43 -0
  64. package/dist/src/orchestrator/state.js +68 -0
  65. package/dist/src/orchestrator/types.js +22 -0
  66. package/dist/src/perception/ax-source.js +162 -0
  67. package/dist/src/perception/cdp-source.js +162 -0
  68. package/dist/src/perception/coordinator.js +771 -0
  69. package/dist/src/perception/frame-differ.js +287 -0
  70. package/dist/src/perception/index.js +22 -0
  71. package/dist/src/perception/manager.js +199 -0
  72. package/dist/src/perception/types.js +47 -0
  73. package/dist/src/perception/vision-source.js +399 -0
  74. package/dist/src/planner/deterministic.js +298 -0
  75. package/dist/src/planner/executor.js +870 -0
  76. package/dist/src/planner/goal-store.js +92 -0
  77. package/dist/src/planner/index.js +21 -0
  78. package/dist/src/planner/planner.js +520 -0
  79. package/dist/src/planner/tool-registry.js +71 -0
  80. package/dist/src/planner/types.js +22 -0
  81. package/dist/src/platform/explorer.js +213 -0
  82. package/dist/src/platform/help-center-markdown.js +527 -0
  83. package/dist/src/platform/learner.js +257 -0
  84. package/dist/src/playbook/engine.js +486 -0
  85. package/dist/src/playbook/index.js +20 -0
  86. package/dist/src/playbook/mcp-recorder.js +204 -0
  87. package/dist/src/playbook/recorder.js +536 -0
  88. package/dist/src/playbook/runner.js +408 -0
  89. package/dist/src/playbook/store.js +312 -0
  90. package/dist/src/playbook/types.js +17 -0
  91. package/dist/src/recovery/detectors.js +156 -0
  92. package/dist/src/recovery/engine.js +327 -0
  93. package/dist/src/recovery/index.js +20 -0
  94. package/dist/src/recovery/strategies.js +274 -0
  95. package/dist/src/recovery/types.js +20 -0
  96. package/dist/src/runtime/accessibility-adapter.js +430 -0
  97. package/dist/src/runtime/app-adapter.js +64 -0
  98. package/dist/src/runtime/applescript-adapter.js +305 -0
  99. package/dist/src/runtime/ax-role-map.js +96 -0
  100. package/dist/src/runtime/browser-adapter.js +52 -0
  101. package/dist/src/runtime/cdp-chrome-adapter.js +521 -0
  102. package/dist/src/runtime/composite-adapter.js +221 -0
  103. package/dist/src/runtime/execution-contract.js +159 -0
  104. package/dist/src/runtime/executor.js +286 -0
  105. package/dist/src/runtime/locator-cache.js +50 -0
  106. package/dist/src/runtime/planning-loop.js +63 -0
  107. package/dist/src/runtime/service.js +432 -0
  108. package/dist/src/runtime/session-manager.js +63 -0
  109. package/dist/src/runtime/state-observer.js +121 -0
  110. package/dist/src/runtime/vision-adapter.js +225 -0
  111. package/dist/src/state/app-map-types.js +72 -0
  112. package/dist/src/state/app-map.js +1974 -0
  113. package/dist/src/state/entity-tracker.js +108 -0
  114. package/dist/src/state/fusion.js +96 -0
  115. package/dist/src/state/index.js +21 -0
  116. package/dist/src/state/ladder-generator.js +236 -0
  117. package/dist/src/state/persistence.js +156 -0
  118. package/dist/src/state/types.js +17 -0
  119. package/dist/src/state/world-model.js +1456 -0
  120. package/dist/src/supervisor/locks.js +186 -0
  121. package/dist/src/supervisor/supervisor.js +403 -0
  122. package/dist/src/supervisor/types.js +30 -0
  123. package/dist/src/test-mcp-protocol.js +154 -0
  124. package/dist/src/types.js +17 -0
  125. package/dist/src/util/atomic-write.js +133 -0
  126. package/dist/src/util/sanitize.js +146 -0
  127. package/dist-app-maps/com.figma.Desktop.json +959 -0
  128. package/dist-app-maps/com.hnc.Discord.json +1146 -0
  129. package/dist-app-maps/notion.id.json +2831 -0
  130. package/dist-playbooks/canva-screenhand-carousel.json +445 -0
  131. package/dist-playbooks/codex-desktop.json +76 -0
  132. package/dist-playbooks/competitor-research-stack.json +122 -0
  133. package/dist-playbooks/davinci-color-grade.json +153 -0
  134. package/dist-playbooks/davinci-edit-timeline.json +162 -0
  135. package/dist-playbooks/davinci-render.json +114 -0
  136. package/dist-playbooks/devto.json +52 -0
  137. package/dist-playbooks/discord.json +41 -0
  138. package/dist-playbooks/google-flow-create-project.json +59 -0
  139. package/dist-playbooks/google-flow-edit-image.json +90 -0
  140. package/dist-playbooks/google-flow-edit-video.json +90 -0
  141. package/dist-playbooks/google-flow-generate-image.json +68 -0
  142. package/dist-playbooks/google-flow-generate-video.json +191 -0
  143. package/dist-playbooks/google-flow-open-project.json +48 -0
  144. package/dist-playbooks/google-flow-open-scenebuilder.json +64 -0
  145. package/dist-playbooks/google-flow-search-assets.json +64 -0
  146. package/dist-playbooks/instagram.json +57 -0
  147. package/dist-playbooks/linkedin.json +52 -0
  148. package/dist-playbooks/n8n.json +43 -0
  149. package/dist-playbooks/reddit.json +52 -0
  150. package/dist-playbooks/threads.json +59 -0
  151. package/dist-playbooks/x-twitter.json +59 -0
  152. package/dist-playbooks/youtube.json +59 -0
  153. package/dist-references/canva.json +646 -0
  154. package/dist-references/codex-desktop.json +305 -0
  155. package/dist-references/davinci-resolve-keyboard.json +594 -0
  156. package/dist-references/davinci-resolve-menu-map.json +1139 -0
  157. package/dist-references/davinci-resolve-menus-batch1.json +116 -0
  158. package/dist-references/davinci-resolve-menus-batch2.json +372 -0
  159. package/dist-references/davinci-resolve-menus-batch3.json +330 -0
  160. package/dist-references/davinci-resolve-menus-batch4.json +297 -0
  161. package/dist-references/davinci-resolve-shortcuts.json +333 -0
  162. package/dist-references/devto.json +317 -0
  163. package/dist-references/discord.json +549 -0
  164. package/dist-references/figma.json +1186 -0
  165. package/dist-references/finder.json +146 -0
  166. package/dist-references/google-ads-transparency.json +95 -0
  167. package/dist-references/google-flow.json +649 -0
  168. package/dist-references/instagram.json +341 -0
  169. package/dist-references/linkedin.json +324 -0
  170. package/dist-references/meta-ad-library.json +86 -0
  171. package/dist-references/n8n.json +387 -0
  172. package/dist-references/notes.json +27 -0
  173. package/dist-references/notion.json +163 -0
  174. package/dist-references/reddit.json +341 -0
  175. package/dist-references/threads.json +337 -0
  176. package/dist-references/x-twitter.json +403 -0
  177. package/dist-references/youtube.json +373 -0
  178. package/native/macos-bridge/Package.swift +1 -0
  179. package/native/macos-bridge/Sources/AccessibilityBridge.swift +257 -36
  180. package/native/macos-bridge/Sources/AppManagement.swift +212 -2
  181. package/native/macos-bridge/Sources/CoreGraphicsBridge.swift +348 -53
  182. package/native/macos-bridge/Sources/StreamCapture.swift +136 -0
  183. package/native/macos-bridge/Sources/VisionBridge.swift +165 -7
  184. package/native/macos-bridge/Sources/main.swift +169 -16
  185. package/native/windows-bridge/Program.cs +5 -0
  186. package/native/windows-bridge/ScreenCapture.cs +124 -0
  187. package/package.json +29 -4
  188. package/scripts/postinstall.cjs +127 -0
  189. package/.claude/commands/automate.md +0 -28
  190. package/.claude/commands/debug-ui.md +0 -19
  191. package/.claude/commands/screenshot.md +0 -15
  192. package/.github/FUNDING.yml +0 -1
  193. package/.github/ISSUE_TEMPLATE/bug_report.md +0 -27
  194. package/.github/ISSUE_TEMPLATE/feature_request.md +0 -20
  195. package/.mcp.json +0 -8
  196. package/DESKTOP_MCP_GUIDE.md +0 -92
  197. package/SECURITY.md +0 -44
  198. package/docs/architecture.md +0 -47
  199. package/install-skills.sh +0 -19
  200. package/mcp-bridge.ts +0 -271
  201. package/mcp-desktop.ts +0 -1221
  202. package/playbooks/instagram.json +0 -41
  203. package/playbooks/instagram_v2.json +0 -201
  204. package/playbooks/x_v1.json +0 -211
  205. package/scripts/devpost-live-loop.mjs +0 -421
  206. package/src/logging/timeline-logger.ts +0 -55
  207. package/src/mcp/server.ts +0 -449
  208. package/src/memory/recall.ts +0 -191
  209. package/src/memory/research.ts +0 -146
  210. package/src/memory/seeds.ts +0 -123
  211. package/src/memory/session.ts +0 -201
  212. package/src/memory/store.ts +0 -434
  213. package/src/memory/types.ts +0 -69
  214. package/src/native/bridge-client.ts +0 -239
  215. package/src/runtime/accessibility-adapter.ts +0 -487
  216. package/src/runtime/app-adapter.ts +0 -169
  217. package/src/runtime/applescript-adapter.ts +0 -376
  218. package/src/runtime/ax-role-map.ts +0 -102
  219. package/src/runtime/browser-adapter.ts +0 -129
  220. package/src/runtime/cdp-chrome-adapter.ts +0 -676
  221. package/src/runtime/composite-adapter.ts +0 -274
  222. package/src/runtime/executor.ts +0 -396
  223. package/src/runtime/planning-loop.ts +0 -81
  224. package/src/runtime/service.ts +0 -448
  225. package/src/runtime/session-manager.ts +0 -50
  226. package/src/runtime/state-observer.ts +0 -136
  227. package/src/runtime/vision-adapter.ts +0 -297
  228. package/src/types.ts +0 -297
  229. package/tests/bridge-client.test.ts +0 -176
  230. package/tests/browser-stealth.test.ts +0 -210
  231. package/tests/composite-adapter.test.ts +0 -64
  232. package/tests/mcp-server.test.ts +0 -151
  233. package/tests/memory-recall.test.ts +0 -339
  234. package/tests/memory-research.test.ts +0 -159
  235. package/tests/memory-seeds.test.ts +0 -120
  236. package/tests/memory-store.test.ts +0 -392
  237. package/tests/types.test.ts +0 -92
  238. package/tsconfig.check.json +0 -17
  239. package/tsconfig.json +0 -19
  240. package/vitest.config.ts +0 -8
  241. /package/{playbooks → dist-references}/devpost.json +0 -0
@@ -1,487 +0,0 @@
1
- // Copyright (C) 2025 Clazro Technology Private Limited
2
- // SPDX-License-Identifier: AGPL-3.0-only
3
- //
4
- // This file is part of ScreenHand.
5
- //
6
- // ScreenHand is free software: you can redistribute it and/or modify
7
- // it under the terms of the GNU Affero General Public License as
8
- // published by the Free Software Foundation, version 3.
9
- //
10
- // ScreenHand is distributed in the hope that it will be useful,
11
- // but WITHOUT ANY WARRANTY; without even the implied warranty of
12
- // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
- // GNU Affero General Public License for more details.
14
- //
15
- // You should have received a copy of the GNU Affero General Public License
16
- // along with ScreenHand. If not, see <https://www.gnu.org/licenses/>.
17
-
18
- import type {
19
- AXNode,
20
- AppContext,
21
- ExtractFormat,
22
- LocatedElement,
23
- PageMeta,
24
- RunningApp,
25
- SessionInfo,
26
- Target,
27
- WaitCondition,
28
- WindowInfo,
29
- } from "../types.js";
30
- import type { AppAdapter } from "./app-adapter.js";
31
- import type { MacOSBridgeClient } from "../native/macos-bridge-client.js";
32
- import { toAXRole } from "./ax-role-map.js";
33
-
34
- const POLL_INTERVAL_MS = 100;
35
-
36
- interface AXSessionState {
37
- info: SessionInfo;
38
- pid: number;
39
- bundleId: string;
40
- appName: string;
41
- }
42
-
43
- interface BridgeElement {
44
- handleId: string;
45
- role: string;
46
- title: string;
47
- elementPath: number[];
48
- value?: string;
49
- identifier?: string;
50
- bounds?: { x: number; y: number; width: number; height: number };
51
- }
52
-
53
- export class AccessibilityAdapter implements AppAdapter {
54
- private readonly sessions = new Map<string, AXSessionState>();
55
- private readonly sessionsByProfile = new Map<string, AXSessionState>();
56
-
57
- constructor(private readonly bridge: MacOSBridgeClient) {}
58
-
59
- async attach(profile: string): Promise<SessionInfo> {
60
- const existing = this.sessionsByProfile.get(profile);
61
- if (existing) return existing.info;
62
-
63
- // Ensure bridge is started
64
- await this.bridge.start();
65
-
66
- // Check accessibility permissions
67
- const perms = await this.bridge.checkPermissions();
68
- if (!perms.trusted) {
69
- throw new Error(
70
- "Accessibility permission not granted. Go to System Settings → Privacy & Security → Accessibility and enable this app.",
71
- );
72
- }
73
-
74
- const info: SessionInfo = {
75
- sessionId: `ax_session_${profile}_${Date.now()}`,
76
- profile,
77
- createdAt: new Date().toISOString(),
78
- adapterType: "accessibility",
79
- };
80
-
81
- // Default to frontmost app
82
- const frontmost = await this.bridge.call<{ bundleId: string; name: string; pid: number }>(
83
- "app.frontmost",
84
- );
85
-
86
- const state: AXSessionState = {
87
- info,
88
- pid: frontmost.pid,
89
- bundleId: frontmost.bundleId,
90
- appName: frontmost.name,
91
- };
92
-
93
- this.sessions.set(info.sessionId, state);
94
- this.sessionsByProfile.set(profile, state);
95
- return info;
96
- }
97
-
98
- async getAppContext(sessionId: string): Promise<AppContext> {
99
- const state = this.requireSession(sessionId);
100
- // Get window title from AX tree
101
- let windowTitle = "";
102
- try {
103
- const tree = await this.bridge.call<AXNode>("ax.getElementTree", {
104
- pid: state.pid,
105
- maxDepth: 1,
106
- });
107
- const window = tree.children?.find((c) => c.role === "AXWindow");
108
- windowTitle = window?.title ?? "";
109
- } catch {
110
- // Ignore tree errors
111
- }
112
-
113
- return {
114
- bundleId: state.bundleId,
115
- appName: state.appName,
116
- pid: state.pid,
117
- windowTitle,
118
- };
119
- }
120
-
121
- async getPageMeta(sessionId: string): Promise<PageMeta> {
122
- const ctx = await this.getAppContext(sessionId);
123
- return {
124
- url: ctx.url ?? `app://${ctx.bundleId}`,
125
- title: ctx.windowTitle || ctx.appName,
126
- };
127
- }
128
-
129
- async navigate(sessionId: string, url: string, _timeoutMs: number): Promise<PageMeta> {
130
- // For desktop apps, "navigate" means launching/focusing an app by bundle ID
131
- const state = this.requireSession(sessionId);
132
- if (url.startsWith("app://")) {
133
- const bundleId = url.slice(6);
134
- const result = await this.bridge.call<{ pid: number; appName: string; bundleId: string }>(
135
- "app.launch",
136
- { bundleId },
137
- );
138
- state.pid = result.pid;
139
- state.bundleId = result.bundleId;
140
- state.appName = result.appName;
141
- }
142
- return this.getPageMeta(sessionId);
143
- }
144
-
145
- async locate(sessionId: string, target: Target, timeoutMs: number): Promise<LocatedElement | null> {
146
- const state = this.requireSession(sessionId);
147
- const deadline = Date.now() + timeoutMs;
148
-
149
- while (Date.now() < deadline) {
150
- try {
151
- const params = this.buildFindParams(target, state.pid);
152
- const result = await this.bridge.call<BridgeElement>("ax.findElement", params);
153
- if (result) {
154
- const located: LocatedElement = {
155
- handleId: result.handleId,
156
- locatorUsed: `ax:${target.type}`,
157
- role: result.role,
158
- label: result.title,
159
- };
160
- if (result.bounds) {
161
- located.coordinates = result.bounds;
162
- }
163
- return located;
164
- }
165
- } catch {
166
- // Element not found yet, keep polling
167
- }
168
- await sleep(POLL_INTERVAL_MS);
169
- }
170
- return null;
171
- }
172
-
173
- async click(sessionId: string, element: LocatedElement): Promise<void> {
174
- const state = this.requireSession(sessionId);
175
- const elementPath = this.parseElementPath(element.handleId);
176
-
177
- if (elementPath) {
178
- await this.bridge.call("ax.performAction", {
179
- pid: state.pid,
180
- elementPath,
181
- action: "AXPress",
182
- });
183
- } else if (element.coordinates) {
184
- // Fallback to coordinate click
185
- const cx = element.coordinates.x + element.coordinates.width / 2;
186
- const cy = element.coordinates.y + element.coordinates.height / 2;
187
- await this.bridge.call("cg.mouseClick", { x: cx, y: cy });
188
- } else {
189
- throw new Error("Cannot click: no element path or coordinates");
190
- }
191
- }
192
-
193
- async setValue(sessionId: string, element: LocatedElement, text: string, clear: boolean): Promise<void> {
194
- const state = this.requireSession(sessionId);
195
- const elementPath = this.parseElementPath(element.handleId);
196
-
197
- if (clear && elementPath) {
198
- // Try AX value set first
199
- try {
200
- await this.bridge.call("ax.setElementValue", {
201
- pid: state.pid,
202
- elementPath,
203
- value: text,
204
- });
205
- return;
206
- } catch {
207
- // Fallback: click, select all, type
208
- }
209
- }
210
-
211
- // Fallback: click to focus, select all if clearing, then type
212
- await this.click(sessionId, element);
213
- await sleep(50);
214
- if (clear) {
215
- await this.bridge.call("cg.keyCombo", { keys: ["cmd", "a"] });
216
- await sleep(50);
217
- }
218
- await this.bridge.call("cg.typeText", { text });
219
- }
220
-
221
- async getValue(sessionId: string, element: LocatedElement): Promise<string> {
222
- const state = this.requireSession(sessionId);
223
- const elementPath = this.parseElementPath(element.handleId);
224
- if (!elementPath) return "";
225
-
226
- const result = await this.bridge.call<{ value: string }>("ax.getElementValue", {
227
- pid: state.pid,
228
- elementPath,
229
- });
230
- return result.value;
231
- }
232
-
233
- async waitFor(sessionId: string, condition: WaitCondition, timeoutMs: number): Promise<boolean> {
234
- const deadline = Date.now() + timeoutMs;
235
-
236
- while (Date.now() < deadline) {
237
- const met = await this.checkCondition(sessionId, condition);
238
- if (met) return true;
239
- await sleep(POLL_INTERVAL_MS);
240
- }
241
- return false;
242
- }
243
-
244
- async extract(sessionId: string, target: Target, format: ExtractFormat): Promise<unknown> {
245
- const state = this.requireSession(sessionId);
246
-
247
- if (format === "text") {
248
- // Get element tree and extract text content
249
- const element = await this.locate(sessionId, target, 1500);
250
- if (!element) throw new Error("Extract target not found");
251
- const result = await this.getValue(sessionId, element);
252
- return result || element.label || "";
253
- }
254
-
255
- if (format === "json") {
256
- // Return the AX tree as JSON
257
- const tree = await this.bridge.call<AXNode>("ax.getElementTree", {
258
- pid: state.pid,
259
- maxDepth: 3,
260
- });
261
- return tree;
262
- }
263
-
264
- // table format: return element tree in tabular form
265
- const tree = await this.bridge.call<AXNode>("ax.getElementTree", {
266
- pid: state.pid,
267
- maxDepth: 2,
268
- });
269
- return {
270
- headers: ["role", "title", "value"],
271
- rows: this.flattenTree(tree).map((n) => [n.role, n.title ?? "", n.value ?? ""]),
272
- };
273
- }
274
-
275
- async screenshot(sessionId: string, region?: { x: number; y: number; width: number; height: number }): Promise<string> {
276
- const state = this.requireSession(sessionId);
277
- // Get window list to find the window ID for the app
278
- const windows = await this.bridge.call<WindowInfo[]>("app.windows");
279
- const appWindow = windows.find((w) => w.pid === state.pid);
280
-
281
- if (appWindow) {
282
- const result = await this.bridge.call<{ path: string }>(
283
- "cg.captureWindow",
284
- { windowId: appWindow.windowId },
285
- );
286
- return result.path;
287
- }
288
-
289
- // Fallback to screen capture with region
290
- const result = await this.bridge.call<{ path: string }>(
291
- "cg.captureScreen",
292
- region ? { region } : {},
293
- );
294
- return result.path;
295
- }
296
-
297
- // ── Desktop-specific methods ──
298
-
299
- async launchApp(sessionId: string, bundleId: string): Promise<AppContext> {
300
- const state = this.requireSession(sessionId);
301
- const result = await this.bridge.call<{ bundleId: string; appName: string; pid: number }>(
302
- "app.launch",
303
- { bundleId },
304
- );
305
- // Update session to track new app
306
- state.pid = result.pid;
307
- state.bundleId = result.bundleId;
308
- state.appName = result.appName;
309
-
310
- return {
311
- bundleId: result.bundleId,
312
- appName: result.appName,
313
- pid: result.pid,
314
- windowTitle: "",
315
- };
316
- }
317
-
318
- async focusApp(sessionId: string, bundleId: string): Promise<void> {
319
- const state = this.requireSession(sessionId);
320
- await this.bridge.call("app.focus", { bundleId });
321
- // Update PID if different app
322
- if (bundleId !== state.bundleId) {
323
- const apps = await this.bridge.call<RunningApp[]>("app.list");
324
- const app = apps.find((a) => a.bundleId === bundleId);
325
- if (app) {
326
- state.pid = app.pid;
327
- state.bundleId = bundleId;
328
- state.appName = app.name;
329
- }
330
- }
331
- }
332
-
333
- async listApps(_sessionId: string): Promise<RunningApp[]> {
334
- return this.bridge.call<RunningApp[]>("app.list");
335
- }
336
-
337
- async listWindows(_sessionId: string): Promise<WindowInfo[]> {
338
- return this.bridge.call<WindowInfo[]>("app.windows");
339
- }
340
-
341
- async menuClick(sessionId: string, menuPath: string[]): Promise<void> {
342
- const state = this.requireSession(sessionId);
343
- await this.bridge.call("ax.menuClick", { pid: state.pid, menuPath });
344
- }
345
-
346
- async keyCombo(_sessionId: string, keys: string[]): Promise<void> {
347
- await this.bridge.call("cg.keyCombo", { keys });
348
- }
349
-
350
- async elementTree(sessionId: string, maxDepth?: number, _root?: Target): Promise<AXNode> {
351
- const state = this.requireSession(sessionId);
352
- return this.bridge.call<AXNode>("ax.getElementTree", {
353
- pid: state.pid,
354
- maxDepth: maxDepth ?? 5,
355
- });
356
- }
357
-
358
- async drag(sessionId: string, from: LocatedElement, to: LocatedElement): Promise<void> {
359
- if (!from.coordinates || !to.coordinates) {
360
- throw new Error("Drag requires elements with coordinates");
361
- }
362
- const fromX = from.coordinates.x + from.coordinates.width / 2;
363
- const fromY = from.coordinates.y + from.coordinates.height / 2;
364
- const toX = to.coordinates.x + to.coordinates.width / 2;
365
- const toY = to.coordinates.y + to.coordinates.height / 2;
366
-
367
- await this.bridge.call("cg.mouseDrag", { fromX, fromY, toX, toY });
368
- }
369
-
370
- async scroll(_sessionId: string, direction: "up" | "down" | "left" | "right", amount: number, element?: LocatedElement): Promise<void> {
371
- let x = 500;
372
- let y = 400;
373
-
374
- if (element?.coordinates) {
375
- x = element.coordinates.x + element.coordinates.width / 2;
376
- y = element.coordinates.y + element.coordinates.height / 2;
377
- }
378
-
379
- const deltaMap = {
380
- up: { deltaX: 0, deltaY: -amount },
381
- down: { deltaX: 0, deltaY: amount },
382
- left: { deltaX: -amount, deltaY: 0 },
383
- right: { deltaX: amount, deltaY: 0 },
384
- };
385
-
386
- const delta = deltaMap[direction];
387
- await this.bridge.call("cg.scroll", { x, y, ...delta });
388
- }
389
-
390
- // ── Private helpers ──
391
-
392
- private requireSession(sessionId: string): AXSessionState {
393
- const state = this.sessions.get(sessionId);
394
- if (!state) throw new Error(`Session not found: ${sessionId}`);
395
- return state;
396
- }
397
-
398
- private buildFindParams(target: Target, pid: number): Record<string, unknown> {
399
- const params: Record<string, unknown> = { pid };
400
-
401
- switch (target.type) {
402
- case "role":
403
- params.role = toAXRole(target.role);
404
- params.title = target.name;
405
- params.exact = target.exact ?? true;
406
- break;
407
- case "text":
408
- params.title = target.value;
409
- params.exact = target.exact ?? true;
410
- break;
411
- case "selector":
412
- // For AX, treat selector as an identifier
413
- params.identifier = target.value;
414
- break;
415
- case "ax_path":
416
- // Direct path resolution handled differently
417
- params.role = target.path[target.path.length - 1];
418
- break;
419
- case "ax_attribute":
420
- params[target.attribute] = target.value;
421
- break;
422
- case "coordinates":
423
- // Can't find by coordinates via AX, will fallback to vision
424
- throw new Error("Cannot locate by coordinates using accessibility adapter");
425
- case "image":
426
- throw new Error("Cannot locate by image using accessibility adapter");
427
- }
428
-
429
- return params;
430
- }
431
-
432
- private parseElementPath(handleId: string): number[] | null {
433
- // Handle IDs from the bridge are formatted as "ax_0_1_2"
434
- if (!handleId.startsWith("ax_")) return null;
435
- const parts = handleId.slice(3).split("_");
436
- const indices = parts.map(Number).filter((n) => !isNaN(n));
437
- return indices.length > 0 ? indices : null;
438
- }
439
-
440
- private async checkCondition(sessionId: string, condition: WaitCondition): Promise<boolean> {
441
- switch (condition.type) {
442
- case "element_exists": {
443
- const found = await this.locate(sessionId, condition.target, 100);
444
- return found !== null;
445
- }
446
- case "element_gone": {
447
- const found = await this.locate(sessionId, condition.target, 100);
448
- return found === null;
449
- }
450
- case "window_title_matches": {
451
- const ctx = await this.getAppContext(sessionId);
452
- return new RegExp(condition.regex).test(ctx.windowTitle);
453
- }
454
- case "text_appears": {
455
- const found = await this.locate(
456
- sessionId,
457
- { type: "text", value: condition.text },
458
- 100,
459
- );
460
- return found !== null;
461
- }
462
- case "app_idle":
463
- // Simplified: always return true after a short delay
464
- return true;
465
- case "selector_visible":
466
- case "selector_hidden":
467
- case "url_matches":
468
- case "spinner_disappears":
469
- // Browser-specific conditions not fully supported
470
- return false;
471
- }
472
- }
473
-
474
- private flattenTree(node: AXNode): AXNode[] {
475
- const result: AXNode[] = [node];
476
- if (node.children) {
477
- for (const child of node.children) {
478
- result.push(...this.flattenTree(child));
479
- }
480
- }
481
- return result;
482
- }
483
- }
484
-
485
- function sleep(ms: number): Promise<void> {
486
- return new Promise((resolve) => setTimeout(resolve, ms));
487
- }
@@ -1,169 +0,0 @@
1
- // Copyright (C) 2025 Clazro Technology Private Limited
2
- // SPDX-License-Identifier: AGPL-3.0-only
3
- //
4
- // This file is part of ScreenHand.
5
- //
6
- // ScreenHand is free software: you can redistribute it and/or modify
7
- // it under the terms of the GNU Affero General Public License as
8
- // published by the Free Software Foundation, version 3.
9
- //
10
- // ScreenHand is distributed in the hope that it will be useful,
11
- // but WITHOUT ANY WARRANTY; without even the implied warranty of
12
- // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
- // GNU Affero General Public License for more details.
14
- //
15
- // You should have received a copy of the GNU Affero General Public License
16
- // along with ScreenHand. If not, see <https://www.gnu.org/licenses/>.
17
-
18
- import type {
19
- AXNode,
20
- AppContext,
21
- ExtractFormat,
22
- LocatedElement,
23
- PageMeta,
24
- RunningApp,
25
- SessionInfo,
26
- Target,
27
- WaitCondition,
28
- WindowInfo,
29
- } from "../types.js";
30
-
31
- /**
32
- * Universal adapter interface for controlling any application.
33
- * Extends beyond browsers to support native desktop apps via
34
- * accessibility APIs, AppleScript, vision, and more.
35
- */
36
- export interface AppAdapter {
37
- /** Connect/attach to an application session. */
38
- attach(profile: string): Promise<SessionInfo>;
39
-
40
- /** Get current app context (replaces browser-only PageMeta for context). */
41
- getAppContext(sessionId: string): Promise<AppContext>;
42
-
43
- /** Get page metadata — for browser adapters returns URL+title; for desktop adapters returns window title. */
44
- getPageMeta(sessionId: string): Promise<PageMeta>;
45
-
46
- /** Navigate to a URL (browser) or open a path/resource (desktop). */
47
- navigate(sessionId: string, url: string, timeoutMs: number): Promise<PageMeta>;
48
-
49
- /** Locate an element on screen. */
50
- locate(sessionId: string, target: Target, timeoutMs: number): Promise<LocatedElement | null>;
51
-
52
- /** Click/press an element. */
53
- click(sessionId: string, element: LocatedElement): Promise<void>;
54
-
55
- /** Set the value of an input element. */
56
- setValue(sessionId: string, element: LocatedElement, text: string, clear: boolean): Promise<void>;
57
-
58
- /** Get the current value of an element. */
59
- getValue(sessionId: string, element: LocatedElement): Promise<string>;
60
-
61
- /** Wait for a condition to be met. */
62
- waitFor(sessionId: string, condition: WaitCondition, timeoutMs: number): Promise<boolean>;
63
-
64
- /** Extract data from an element. */
65
- extract(sessionId: string, target: Target, format: ExtractFormat): Promise<unknown>;
66
-
67
- /** Capture a screenshot. */
68
- screenshot(
69
- sessionId: string,
70
- region?: { x: number; y: number; width: number; height: number },
71
- ): Promise<string>;
72
-
73
- // ── Desktop-specific methods (optional, adapters may throw NOT_IMPLEMENTED) ──
74
-
75
- /** Launch an application by bundle ID. */
76
- launchApp?(sessionId: string, bundleId: string): Promise<AppContext>;
77
-
78
- /** Focus/bring an application to front. */
79
- focusApp?(sessionId: string, bundleId: string): Promise<void>;
80
-
81
- /** List all running applications. */
82
- listApps?(sessionId: string): Promise<RunningApp[]>;
83
-
84
- /** List all windows. */
85
- listWindows?(sessionId: string): Promise<WindowInfo[]>;
86
-
87
- /** Click a menu item by path (e.g., ["File", "New Window"]). */
88
- menuClick?(sessionId: string, menuPath: string[]): Promise<void>;
89
-
90
- /** Send a keyboard shortcut (e.g., ["cmd", "c"]). */
91
- keyCombo?(sessionId: string, keys: string[]): Promise<void>;
92
-
93
- /** Get the accessibility element tree. */
94
- elementTree?(sessionId: string, maxDepth?: number, root?: Target): Promise<AXNode>;
95
-
96
- /** Drag from one target to another. */
97
- drag?(
98
- sessionId: string,
99
- from: LocatedElement,
100
- to: LocatedElement,
101
- ): Promise<void>;
102
-
103
- /** Scroll in a direction. */
104
- scroll?(
105
- sessionId: string,
106
- direction: "up" | "down" | "left" | "right",
107
- amount: number,
108
- element?: LocatedElement,
109
- ): Promise<void>;
110
- }
111
-
112
- /**
113
- * Placeholder adapter that returns stubs for all methods.
114
- * Used for testing or when no real adapter is configured.
115
- */
116
- export class PlaceholderAppAdapter implements AppAdapter {
117
- async attach(profile: string): Promise<SessionInfo> {
118
- return {
119
- sessionId: `session_${profile}_${Date.now()}`,
120
- profile,
121
- createdAt: new Date().toISOString(),
122
- };
123
- }
124
-
125
- async getAppContext(_sessionId: string): Promise<AppContext> {
126
- return {
127
- bundleId: "com.placeholder",
128
- appName: "Placeholder",
129
- pid: 0,
130
- windowTitle: "Placeholder Session",
131
- };
132
- }
133
-
134
- async getPageMeta(_sessionId: string): Promise<PageMeta> {
135
- return { url: "about:blank", title: "Placeholder Session" };
136
- }
137
-
138
- async navigate(_sessionId: string, url: string, _timeoutMs: number): Promise<PageMeta> {
139
- return { url, title: "Placeholder Navigation" };
140
- }
141
-
142
- async locate(_sessionId: string, _target: Target, _timeoutMs: number): Promise<LocatedElement | null> {
143
- throw new Error("App adapter not implemented: locate");
144
- }
145
-
146
- async click(_sessionId: string, _element: LocatedElement): Promise<void> {
147
- throw new Error("App adapter not implemented: click");
148
- }
149
-
150
- async setValue(_sessionId: string, _element: LocatedElement, _text: string, _clear: boolean): Promise<void> {
151
- throw new Error("App adapter not implemented: setValue");
152
- }
153
-
154
- async getValue(_sessionId: string, _element: LocatedElement): Promise<string> {
155
- throw new Error("App adapter not implemented: getValue");
156
- }
157
-
158
- async waitFor(_sessionId: string, _condition: WaitCondition, _timeoutMs: number): Promise<boolean> {
159
- throw new Error("App adapter not implemented: waitFor");
160
- }
161
-
162
- async extract(_sessionId: string, _target: Target, _format: ExtractFormat): Promise<unknown> {
163
- throw new Error("App adapter not implemented: extract");
164
- }
165
-
166
- async screenshot(_sessionId: string, _region?: { x: number; y: number; width: number; height: number }): Promise<string> {
167
- throw new Error("App adapter not implemented: screenshot");
168
- }
169
- }