screenhand 0.1.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (177) hide show
  1. package/README.md +458 -93
  2. package/dist/.audit-log.jsonl +55 -0
  3. package/dist/.screenhand/memory/.lock +1 -0
  4. package/dist/.screenhand/memory/actions.jsonl +85 -0
  5. package/dist/.screenhand/memory/errors.jsonl +5 -0
  6. package/dist/.screenhand/memory/errors.jsonl.bak +4 -0
  7. package/dist/.screenhand/memory/state.json +35 -0
  8. package/dist/.screenhand/memory/state.json.bak +35 -0
  9. package/dist/.screenhand/memory/strategies.jsonl +12 -0
  10. package/dist/agent/cli.js +73 -0
  11. package/dist/agent/loop.js +258 -0
  12. package/dist/config.js +9 -0
  13. package/dist/index.js +56 -0
  14. package/dist/logging/timeline-logger.js +29 -0
  15. package/dist/mcp/mcp-stdio-server.js +448 -0
  16. package/dist/mcp/server.js +347 -0
  17. package/dist/mcp-desktop.js +2731 -0
  18. package/dist/mcp-entry.js +59 -0
  19. package/dist/memory/recall.js +160 -0
  20. package/dist/memory/research.js +98 -0
  21. package/dist/memory/seeds.js +89 -0
  22. package/dist/memory/session.js +161 -0
  23. package/dist/memory/store.js +391 -0
  24. package/dist/memory/types.js +4 -0
  25. package/dist/monitor/codex-monitor.js +377 -0
  26. package/dist/monitor/task-queue.js +84 -0
  27. package/dist/monitor/types.js +49 -0
  28. package/dist/native/bridge-client.js +174 -0
  29. package/dist/native/macos-bridge-client.js +5 -0
  30. package/dist/npm-publish-helper.js +117 -0
  31. package/dist/npm-token-cdp.js +113 -0
  32. package/dist/npm-token-create.js +135 -0
  33. package/dist/npm-token-finish.js +126 -0
  34. package/dist/playbook/engine.js +193 -0
  35. package/dist/playbook/index.js +4 -0
  36. package/dist/playbook/recorder.js +519 -0
  37. package/dist/playbook/runner.js +392 -0
  38. package/dist/playbook/store.js +166 -0
  39. package/dist/playbook/types.js +4 -0
  40. package/dist/runtime/accessibility-adapter.js +377 -0
  41. package/dist/runtime/app-adapter.js +48 -0
  42. package/dist/runtime/applescript-adapter.js +283 -0
  43. package/dist/runtime/ax-role-map.js +80 -0
  44. package/dist/runtime/browser-adapter.js +36 -0
  45. package/dist/runtime/cdp-chrome-adapter.js +505 -0
  46. package/dist/runtime/composite-adapter.js +205 -0
  47. package/dist/runtime/executor.js +250 -0
  48. package/dist/runtime/locator-cache.js +12 -0
  49. package/dist/runtime/planning-loop.js +47 -0
  50. package/dist/runtime/service.js +372 -0
  51. package/dist/runtime/session-manager.js +28 -0
  52. package/dist/runtime/state-observer.js +105 -0
  53. package/dist/runtime/vision-adapter.js +208 -0
  54. package/dist/scripts/codex-monitor-daemon.js +335 -0
  55. package/dist/scripts/supervisor-daemon.js +272 -0
  56. package/dist/scripts/worker-daemon.js +228 -0
  57. package/dist/src/agent/cli.js +82 -0
  58. package/dist/src/agent/loop.js +274 -0
  59. package/{src/config.ts → dist/src/config.js} +5 -10
  60. package/{src/index.ts → dist/src/index.js} +32 -52
  61. package/dist/src/jobs/manager.js +237 -0
  62. package/dist/src/jobs/runner.js +683 -0
  63. package/dist/src/jobs/store.js +102 -0
  64. package/dist/src/jobs/types.js +30 -0
  65. package/dist/src/jobs/worker.js +97 -0
  66. package/dist/src/logging/timeline-logger.js +45 -0
  67. package/dist/src/mcp/mcp-stdio-server.js +464 -0
  68. package/dist/src/mcp/server.js +363 -0
  69. package/dist/src/mcp-entry.js +60 -0
  70. package/dist/src/memory/recall.js +170 -0
  71. package/dist/src/memory/research.js +104 -0
  72. package/dist/src/memory/seeds.js +101 -0
  73. package/dist/src/memory/service.js +421 -0
  74. package/dist/src/memory/session.js +169 -0
  75. package/dist/src/memory/store.js +422 -0
  76. package/dist/src/memory/types.js +17 -0
  77. package/dist/src/monitor/codex-monitor.js +382 -0
  78. package/dist/src/monitor/task-queue.js +97 -0
  79. package/dist/src/monitor/types.js +62 -0
  80. package/dist/src/native/bridge-client.js +190 -0
  81. package/{src/native/macos-bridge-client.ts → dist/src/native/macos-bridge-client.js} +0 -1
  82. package/dist/src/playbook/engine.js +201 -0
  83. package/dist/src/playbook/index.js +20 -0
  84. package/dist/src/playbook/recorder.js +535 -0
  85. package/dist/src/playbook/runner.js +408 -0
  86. package/dist/src/playbook/store.js +183 -0
  87. package/dist/src/playbook/types.js +17 -0
  88. package/dist/src/runtime/accessibility-adapter.js +393 -0
  89. package/dist/src/runtime/app-adapter.js +64 -0
  90. package/dist/src/runtime/applescript-adapter.js +299 -0
  91. package/dist/src/runtime/ax-role-map.js +96 -0
  92. package/dist/src/runtime/browser-adapter.js +52 -0
  93. package/dist/src/runtime/cdp-chrome-adapter.js +521 -0
  94. package/dist/src/runtime/composite-adapter.js +221 -0
  95. package/dist/src/runtime/execution-contract.js +159 -0
  96. package/dist/src/runtime/executor.js +266 -0
  97. package/{src/runtime/locator-cache.ts → dist/src/runtime/locator-cache.js} +10 -15
  98. package/dist/src/runtime/planning-loop.js +63 -0
  99. package/dist/src/runtime/service.js +388 -0
  100. package/dist/src/runtime/session-manager.js +60 -0
  101. package/dist/src/runtime/state-observer.js +121 -0
  102. package/dist/src/runtime/vision-adapter.js +224 -0
  103. package/dist/src/supervisor/locks.js +186 -0
  104. package/dist/src/supervisor/supervisor.js +403 -0
  105. package/dist/src/supervisor/types.js +30 -0
  106. package/dist/src/test-mcp-protocol.js +154 -0
  107. package/dist/src/types.js +17 -0
  108. package/dist/src/util/atomic-write.js +118 -0
  109. package/dist/test-mcp-protocol.js +138 -0
  110. package/dist/types.js +1 -0
  111. package/package.json +18 -4
  112. package/.claude/commands/automate.md +0 -28
  113. package/.claude/commands/debug-ui.md +0 -19
  114. package/.claude/commands/screenshot.md +0 -15
  115. package/.github/FUNDING.yml +0 -1
  116. package/.github/ISSUE_TEMPLATE/bug_report.md +0 -27
  117. package/.github/ISSUE_TEMPLATE/feature_request.md +0 -20
  118. package/.mcp.json +0 -8
  119. package/DESKTOP_MCP_GUIDE.md +0 -92
  120. package/SECURITY.md +0 -44
  121. package/docs/architecture.md +0 -47
  122. package/install-skills.sh +0 -19
  123. package/mcp-bridge.ts +0 -271
  124. package/mcp-desktop.ts +0 -1221
  125. package/native/macos-bridge/Package.swift +0 -21
  126. package/native/macos-bridge/Sources/AccessibilityBridge.swift +0 -261
  127. package/native/macos-bridge/Sources/AppManagement.swift +0 -129
  128. package/native/macos-bridge/Sources/CoreGraphicsBridge.swift +0 -242
  129. package/native/macos-bridge/Sources/ObserverBridge.swift +0 -120
  130. package/native/macos-bridge/Sources/VisionBridge.swift +0 -80
  131. package/native/macos-bridge/Sources/main.swift +0 -345
  132. package/native/windows-bridge/AppManagement.cs +0 -234
  133. package/native/windows-bridge/InputBridge.cs +0 -436
  134. package/native/windows-bridge/Program.cs +0 -265
  135. package/native/windows-bridge/ScreenCapture.cs +0 -329
  136. package/native/windows-bridge/UIAutomationBridge.cs +0 -571
  137. package/native/windows-bridge/WindowsBridge.csproj +0 -17
  138. package/playbooks/devpost.json +0 -186
  139. package/playbooks/instagram.json +0 -41
  140. package/playbooks/instagram_v2.json +0 -201
  141. package/playbooks/x_v1.json +0 -211
  142. package/scripts/devpost-live-loop.mjs +0 -421
  143. package/src/logging/timeline-logger.ts +0 -55
  144. package/src/mcp/server.ts +0 -449
  145. package/src/memory/recall.ts +0 -191
  146. package/src/memory/research.ts +0 -146
  147. package/src/memory/seeds.ts +0 -123
  148. package/src/memory/session.ts +0 -201
  149. package/src/memory/store.ts +0 -434
  150. package/src/memory/types.ts +0 -69
  151. package/src/native/bridge-client.ts +0 -239
  152. package/src/runtime/accessibility-adapter.ts +0 -487
  153. package/src/runtime/app-adapter.ts +0 -169
  154. package/src/runtime/applescript-adapter.ts +0 -376
  155. package/src/runtime/ax-role-map.ts +0 -102
  156. package/src/runtime/browser-adapter.ts +0 -129
  157. package/src/runtime/cdp-chrome-adapter.ts +0 -676
  158. package/src/runtime/composite-adapter.ts +0 -274
  159. package/src/runtime/executor.ts +0 -396
  160. package/src/runtime/planning-loop.ts +0 -81
  161. package/src/runtime/service.ts +0 -448
  162. package/src/runtime/session-manager.ts +0 -50
  163. package/src/runtime/state-observer.ts +0 -136
  164. package/src/runtime/vision-adapter.ts +0 -297
  165. package/src/types.ts +0 -297
  166. package/tests/bridge-client.test.ts +0 -176
  167. package/tests/browser-stealth.test.ts +0 -210
  168. package/tests/composite-adapter.test.ts +0 -64
  169. package/tests/mcp-server.test.ts +0 -151
  170. package/tests/memory-recall.test.ts +0 -339
  171. package/tests/memory-research.test.ts +0 -159
  172. package/tests/memory-seeds.test.ts +0 -120
  173. package/tests/memory-store.test.ts +0 -392
  174. package/tests/types.test.ts +0 -92
  175. package/tsconfig.check.json +0 -17
  176. package/tsconfig.json +0 -19
  177. package/vitest.config.ts +0 -8
@@ -0,0 +1,63 @@
1
+ // Copyright (C) 2025 Clazro Technology Private Limited
2
+ // SPDX-License-Identifier: AGPL-3.0-only
3
+ //
4
+ // This file is part of ScreenHand.
5
+ //
6
+ // ScreenHand is free software: you can redistribute it and/or modify
7
+ // it under the terms of the GNU Affero General Public License as
8
+ // published by the Free Software Foundation, version 3.
9
+ //
10
+ // ScreenHand is distributed in the hope that it will be useful,
11
+ // but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ // GNU Affero General Public License for more details.
14
+ //
15
+ // You should have received a copy of the GNU Affero General Public License
16
+ // along with ScreenHand. If not, see <https://www.gnu.org/licenses/>.
17
+ /**
18
+ * Bidirectional planning loop that buffers UI events between LLM actions
19
+ * and provides state snapshots for the LLM to react to.
20
+ */
21
+ export class PlanningLoop {
22
+ observer;
23
+ adapter;
24
+ lastActionResults = new Map();
25
+ constructor(observer, adapter) {
26
+ this.observer = observer;
27
+ this.adapter = adapter;
28
+ }
29
+ /** Get a state snapshot for the LLM after an action. */
30
+ async getStateSnapshot(sessionId) {
31
+ const recentEvents = this.observer.drainEvents();
32
+ let appContext = null;
33
+ try {
34
+ appContext = await this.adapter.getAppContext(sessionId);
35
+ }
36
+ catch {
37
+ // May not have an active session
38
+ }
39
+ return {
40
+ recentEvents,
41
+ appContext,
42
+ lastActionResult: this.lastActionResults.get(sessionId) ?? null,
43
+ observing: this.observer.isObserving,
44
+ timestamp: new Date().toISOString(),
45
+ };
46
+ }
47
+ /** Record the result of the last action for a session. */
48
+ recordActionResult(sessionId, result) {
49
+ this.lastActionResults.set(sessionId, result);
50
+ }
51
+ /** Start observing a process for state changes. */
52
+ async startObserving(sessionId, pid) {
53
+ await this.observer.startObserving(pid);
54
+ }
55
+ /** Stop observing a process. */
56
+ async stopObserving(sessionId, pid) {
57
+ await this.observer.stopObserving(pid);
58
+ }
59
+ /** Peek at recent events without draining. */
60
+ peekEvents(limit = 50) {
61
+ return this.observer.peekEvents(limit);
62
+ }
63
+ }
@@ -0,0 +1,388 @@
1
+ // Copyright (C) 2025 Clazro Technology Private Limited
2
+ // SPDX-License-Identifier: AGPL-3.0-only
3
+ //
4
+ // This file is part of ScreenHand.
5
+ //
6
+ // ScreenHand is free software: you can redistribute it and/or modify
7
+ // it under the terms of the GNU Affero General Public License as
8
+ // published by the Free Software Foundation, version 3.
9
+ //
10
+ // ScreenHand is distributed in the hope that it will be useful,
11
+ // but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ // GNU Affero General Public License for more details.
14
+ //
15
+ // You should have received a copy of the GNU Affero General Public License
16
+ // along with ScreenHand. If not, see <https://www.gnu.org/licenses/>.
17
+ import { DEFAULT_NAVIGATE_TIMEOUT_MS, DEFAULT_PROFILE, DEFAULT_WAIT_TIMEOUT_MS, } from "../config.js";
18
+ import { Executor } from "./executor.js";
19
+ import { LocatorCache } from "./locator-cache.js";
20
+ import { SessionManager } from "./session-manager.js";
21
+ export class AutomationRuntimeService {
22
+ adapter;
23
+ logger;
24
+ sessions;
25
+ executor;
26
+ constructor(adapter, logger, cache = new LocatorCache()) {
27
+ this.adapter = adapter;
28
+ this.logger = logger;
29
+ this.sessions = new SessionManager(adapter);
30
+ this.executor = new Executor(adapter, cache, logger);
31
+ }
32
+ async sessionStart(profile = DEFAULT_PROFILE) {
33
+ return this.sessions.sessionStart(profile);
34
+ }
35
+ async navigate(input) {
36
+ const telemetry = this.logger.start("navigate", input.sessionId);
37
+ try {
38
+ await this.sessions.requireSessionResilent(input.sessionId);
39
+ const page = await this.adapter.navigate(input.sessionId, input.url, input.timeoutMs ?? DEFAULT_NAVIGATE_TIMEOUT_MS);
40
+ return {
41
+ ok: true,
42
+ data: page,
43
+ telemetry: this.logger.finish(telemetry, "success"),
44
+ };
45
+ }
46
+ catch (error) {
47
+ return {
48
+ ok: false,
49
+ error: {
50
+ code: "ACTION_FAILED",
51
+ message: error instanceof Error ? error.message : "Navigate failed",
52
+ },
53
+ telemetry: this.logger.finish(telemetry, "failed"),
54
+ };
55
+ }
56
+ }
57
+ async waitFor(input) {
58
+ const telemetry = this.logger.start("wait_for", input.sessionId);
59
+ try {
60
+ await this.sessions.requireSessionResilent(input.sessionId);
61
+ const matched = await this.adapter.waitFor(input.sessionId, input.condition, input.timeoutMs ?? DEFAULT_WAIT_TIMEOUT_MS);
62
+ return {
63
+ ok: true,
64
+ data: { matched },
65
+ telemetry: this.logger.finish(telemetry, "success"),
66
+ };
67
+ }
68
+ catch (error) {
69
+ return {
70
+ ok: false,
71
+ error: {
72
+ code: "ACTION_FAILED",
73
+ message: error instanceof Error ? error.message : "Wait failed",
74
+ },
75
+ telemetry: this.logger.finish(telemetry, "failed"),
76
+ };
77
+ }
78
+ }
79
+ async press(input) {
80
+ await this.sessions.requireSessionResilent(input.sessionId);
81
+ return this.executor.press(input);
82
+ }
83
+ async typeInto(input) {
84
+ await this.sessions.requireSessionResilent(input.sessionId);
85
+ return this.executor.typeInto(input);
86
+ }
87
+ async extract(input) {
88
+ const telemetry = this.logger.start("extract", input.sessionId);
89
+ try {
90
+ await this.sessions.requireSessionResilent(input.sessionId);
91
+ const data = await this.adapter.extract(input.sessionId, input.target, input.format);
92
+ return {
93
+ ok: true,
94
+ data,
95
+ telemetry: this.logger.finish(telemetry, "success"),
96
+ };
97
+ }
98
+ catch (error) {
99
+ return {
100
+ ok: false,
101
+ error: {
102
+ code: "ACTION_FAILED",
103
+ message: error instanceof Error ? error.message : "Extract failed",
104
+ },
105
+ telemetry: this.logger.finish(telemetry, "failed"),
106
+ };
107
+ }
108
+ }
109
+ async screenshot(input) {
110
+ const telemetry = this.logger.start("screenshot", input.sessionId);
111
+ try {
112
+ await this.sessions.requireSessionResilent(input.sessionId);
113
+ const path = await this.adapter.screenshot(input.sessionId, input.region);
114
+ return {
115
+ ok: true,
116
+ data: { path },
117
+ telemetry: this.logger.finish(telemetry, "success"),
118
+ };
119
+ }
120
+ catch (error) {
121
+ return {
122
+ ok: false,
123
+ error: {
124
+ code: "ACTION_FAILED",
125
+ message: error instanceof Error ? error.message : "Screenshot failed",
126
+ },
127
+ telemetry: this.logger.finish(telemetry, "failed"),
128
+ };
129
+ }
130
+ }
131
+ // ── Desktop-specific methods ──
132
+ async appLaunch(input) {
133
+ const telemetry = this.logger.start("app_launch", input.sessionId);
134
+ try {
135
+ await this.sessions.requireSessionResilent(input.sessionId);
136
+ if (!this.adapter.launchApp) {
137
+ throw new Error("Adapter does not support launchApp");
138
+ }
139
+ const ctx = await this.adapter.launchApp(input.sessionId, input.bundleId);
140
+ return {
141
+ ok: true,
142
+ data: ctx,
143
+ telemetry: this.logger.finish(telemetry, "success"),
144
+ };
145
+ }
146
+ catch (error) {
147
+ return {
148
+ ok: false,
149
+ error: {
150
+ code: "ACTION_FAILED",
151
+ message: error instanceof Error ? error.message : "App launch failed",
152
+ },
153
+ telemetry: this.logger.finish(telemetry, "failed"),
154
+ };
155
+ }
156
+ }
157
+ async appFocus(input) {
158
+ const telemetry = this.logger.start("app_focus", input.sessionId);
159
+ try {
160
+ await this.sessions.requireSessionResilent(input.sessionId);
161
+ if (!this.adapter.focusApp) {
162
+ throw new Error("Adapter does not support focusApp");
163
+ }
164
+ await this.adapter.focusApp(input.sessionId, input.bundleId);
165
+ return {
166
+ ok: true,
167
+ data: undefined,
168
+ telemetry: this.logger.finish(telemetry, "success"),
169
+ };
170
+ }
171
+ catch (error) {
172
+ return {
173
+ ok: false,
174
+ error: {
175
+ code: "ACTION_FAILED",
176
+ message: error instanceof Error ? error.message : "App focus failed",
177
+ },
178
+ telemetry: this.logger.finish(telemetry, "failed"),
179
+ };
180
+ }
181
+ }
182
+ async appList(sessionId) {
183
+ const telemetry = this.logger.start("app_list", sessionId);
184
+ try {
185
+ await this.sessions.requireSessionResilent(sessionId);
186
+ if (!this.adapter.listApps) {
187
+ throw new Error("Adapter does not support listApps");
188
+ }
189
+ const apps = await this.adapter.listApps(sessionId);
190
+ return {
191
+ ok: true,
192
+ data: apps,
193
+ telemetry: this.logger.finish(telemetry, "success"),
194
+ };
195
+ }
196
+ catch (error) {
197
+ return {
198
+ ok: false,
199
+ error: {
200
+ code: "ACTION_FAILED",
201
+ message: error instanceof Error ? error.message : "App list failed",
202
+ },
203
+ telemetry: this.logger.finish(telemetry, "failed"),
204
+ };
205
+ }
206
+ }
207
+ async windowList(sessionId) {
208
+ const telemetry = this.logger.start("window_list", sessionId);
209
+ try {
210
+ await this.sessions.requireSessionResilent(sessionId);
211
+ if (!this.adapter.listWindows) {
212
+ throw new Error("Adapter does not support listWindows");
213
+ }
214
+ const windows = await this.adapter.listWindows(sessionId);
215
+ return {
216
+ ok: true,
217
+ data: windows,
218
+ telemetry: this.logger.finish(telemetry, "success"),
219
+ };
220
+ }
221
+ catch (error) {
222
+ return {
223
+ ok: false,
224
+ error: {
225
+ code: "ACTION_FAILED",
226
+ message: error instanceof Error ? error.message : "Window list failed",
227
+ },
228
+ telemetry: this.logger.finish(telemetry, "failed"),
229
+ };
230
+ }
231
+ }
232
+ async menuClick(input) {
233
+ const telemetry = this.logger.start("menu_click", input.sessionId);
234
+ try {
235
+ await this.sessions.requireSessionResilent(input.sessionId);
236
+ if (!this.adapter.menuClick) {
237
+ throw new Error("Adapter does not support menuClick");
238
+ }
239
+ await this.adapter.menuClick(input.sessionId, input.menuPath);
240
+ return {
241
+ ok: true,
242
+ data: undefined,
243
+ telemetry: this.logger.finish(telemetry, "success"),
244
+ };
245
+ }
246
+ catch (error) {
247
+ return {
248
+ ok: false,
249
+ error: {
250
+ code: "ACTION_FAILED",
251
+ message: error instanceof Error ? error.message : "Menu click failed",
252
+ },
253
+ telemetry: this.logger.finish(telemetry, "failed"),
254
+ };
255
+ }
256
+ }
257
+ async keyCombo(input) {
258
+ const telemetry = this.logger.start("key_combo", input.sessionId);
259
+ try {
260
+ await this.sessions.requireSessionResilent(input.sessionId);
261
+ if (!this.adapter.keyCombo) {
262
+ throw new Error("Adapter does not support keyCombo");
263
+ }
264
+ await this.adapter.keyCombo(input.sessionId, input.keys);
265
+ return {
266
+ ok: true,
267
+ data: undefined,
268
+ telemetry: this.logger.finish(telemetry, "success"),
269
+ };
270
+ }
271
+ catch (error) {
272
+ return {
273
+ ok: false,
274
+ error: {
275
+ code: "ACTION_FAILED",
276
+ message: error instanceof Error ? error.message : "Key combo failed",
277
+ },
278
+ telemetry: this.logger.finish(telemetry, "failed"),
279
+ };
280
+ }
281
+ }
282
+ async elementTree(input) {
283
+ const telemetry = this.logger.start("element_tree", input.sessionId);
284
+ try {
285
+ await this.sessions.requireSessionResilent(input.sessionId);
286
+ if (!this.adapter.elementTree) {
287
+ throw new Error("Adapter does not support elementTree");
288
+ }
289
+ const tree = await this.adapter.elementTree(input.sessionId, input.maxDepth, input.root);
290
+ return {
291
+ ok: true,
292
+ data: tree,
293
+ telemetry: this.logger.finish(telemetry, "success"),
294
+ };
295
+ }
296
+ catch (error) {
297
+ return {
298
+ ok: false,
299
+ error: {
300
+ code: "ACTION_FAILED",
301
+ message: error instanceof Error ? error.message : "Element tree failed",
302
+ },
303
+ telemetry: this.logger.finish(telemetry, "failed"),
304
+ };
305
+ }
306
+ }
307
+ async drag(input) {
308
+ const telemetry = this.logger.start("drag", input.sessionId);
309
+ try {
310
+ await this.sessions.requireSessionResilent(input.sessionId);
311
+ if (!this.adapter.drag) {
312
+ throw new Error("Adapter does not support drag");
313
+ }
314
+ const fromEl = await this.adapter.locate(input.sessionId, input.from, 800);
315
+ const toEl = await this.adapter.locate(input.sessionId, input.to, 800);
316
+ if (!fromEl || !toEl) {
317
+ throw new Error("Could not locate drag source or destination");
318
+ }
319
+ await this.adapter.drag(input.sessionId, fromEl, toEl);
320
+ return {
321
+ ok: true,
322
+ data: undefined,
323
+ telemetry: this.logger.finish(telemetry, "success"),
324
+ };
325
+ }
326
+ catch (error) {
327
+ return {
328
+ ok: false,
329
+ error: {
330
+ code: "ACTION_FAILED",
331
+ message: error instanceof Error ? error.message : "Drag failed",
332
+ },
333
+ telemetry: this.logger.finish(telemetry, "failed"),
334
+ };
335
+ }
336
+ }
337
+ async scroll(input) {
338
+ const telemetry = this.logger.start("scroll", input.sessionId);
339
+ try {
340
+ await this.sessions.requireSessionResilent(input.sessionId);
341
+ if (!this.adapter.scroll) {
342
+ throw new Error("Adapter does not support scroll");
343
+ }
344
+ let element;
345
+ if (input.target) {
346
+ const found = await this.adapter.locate(input.sessionId, input.target, 800);
347
+ if (found)
348
+ element = found;
349
+ }
350
+ await this.adapter.scroll(input.sessionId, input.direction, input.amount ?? 3, element);
351
+ return {
352
+ ok: true,
353
+ data: undefined,
354
+ telemetry: this.logger.finish(telemetry, "success"),
355
+ };
356
+ }
357
+ catch (error) {
358
+ return {
359
+ ok: false,
360
+ error: {
361
+ code: "ACTION_FAILED",
362
+ message: error instanceof Error ? error.message : "Scroll failed",
363
+ },
364
+ telemetry: this.logger.finish(telemetry, "failed"),
365
+ };
366
+ }
367
+ }
368
+ async observeStart(_input) {
369
+ const telemetry = this.logger.start("observe_start", _input.sessionId);
370
+ // Implemented in Phase 4 when StateObserver is available
371
+ return {
372
+ ok: true,
373
+ data: undefined,
374
+ telemetry: this.logger.finish(telemetry, "success"),
375
+ };
376
+ }
377
+ async observeStop(_input) {
378
+ const telemetry = this.logger.start("observe_stop", _input.sessionId);
379
+ return {
380
+ ok: true,
381
+ data: undefined,
382
+ telemetry: this.logger.finish(telemetry, "success"),
383
+ };
384
+ }
385
+ getTimeline(limit = 100) {
386
+ return this.logger.getRecent(limit);
387
+ }
388
+ }
@@ -0,0 +1,60 @@
1
+ // Copyright (C) 2025 Clazro Technology Private Limited
2
+ // SPDX-License-Identifier: AGPL-3.0-only
3
+ //
4
+ // This file is part of ScreenHand.
5
+ //
6
+ // ScreenHand is free software: you can redistribute it and/or modify
7
+ // it under the terms of the GNU Affero General Public License as
8
+ // published by the Free Software Foundation, version 3.
9
+ //
10
+ // ScreenHand is distributed in the hope that it will be useful,
11
+ // but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ // GNU Affero General Public License for more details.
14
+ //
15
+ // You should have received a copy of the GNU Affero General Public License
16
+ // along with ScreenHand. If not, see <https://www.gnu.org/licenses/>.
17
+ export class SessionManager {
18
+ adapter;
19
+ sessionsByProfile = new Map();
20
+ sessionsById = new Map();
21
+ constructor(adapter) {
22
+ this.adapter = adapter;
23
+ }
24
+ async sessionStart(profile) {
25
+ const existing = this.sessionsByProfile.get(profile);
26
+ if (existing) {
27
+ return existing;
28
+ }
29
+ const created = await this.adapter.attach(profile);
30
+ this.sessionsByProfile.set(profile, created);
31
+ this.sessionsById.set(created.sessionId, created);
32
+ return created;
33
+ }
34
+ getSession(sessionId) {
35
+ return this.sessionsById.get(sessionId);
36
+ }
37
+ requireSession(sessionId) {
38
+ const session = this.getSession(sessionId);
39
+ if (!session) {
40
+ throw new Error(`Session not found: ${sessionId}`);
41
+ }
42
+ return session;
43
+ }
44
+ /**
45
+ * Like requireSession but auto-recreates expired/missing sessions.
46
+ * MCP servers restart between tool calls, losing in-memory state.
47
+ * This re-attaches transparently so the caller's sessionId stays valid.
48
+ */
49
+ async requireSessionResilent(sessionId) {
50
+ const existing = this.getSession(sessionId);
51
+ if (existing)
52
+ return existing;
53
+ const match = sessionId.match(/^(?:ax|cdp|as|vision|composite)_session_(.+)_\d+$/);
54
+ const profile = match?.[1] ?? "automation";
55
+ const created = await this.adapter.attach(profile, sessionId);
56
+ this.sessionsByProfile.set(profile, created);
57
+ this.sessionsById.set(created.sessionId, created);
58
+ return created;
59
+ }
60
+ }
@@ -0,0 +1,121 @@
1
+ // Copyright (C) 2025 Clazro Technology Private Limited
2
+ // SPDX-License-Identifier: AGPL-3.0-only
3
+ //
4
+ // This file is part of ScreenHand.
5
+ //
6
+ // ScreenHand is free software: you can redistribute it and/or modify
7
+ // it under the terms of the GNU Affero General Public License as
8
+ // published by the Free Software Foundation, version 3.
9
+ //
10
+ // ScreenHand is distributed in the hope that it will be useful,
11
+ // but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ // GNU Affero General Public License for more details.
14
+ //
15
+ // You should have received a copy of the GNU Affero General Public License
16
+ // along with ScreenHand. If not, see <https://www.gnu.org/licenses/>.
17
+ import { EventEmitter } from "node:events";
18
+ /**
19
+ * Wraps the native bridge's AX observer events into typed UIEvent objects.
20
+ * Buffers events for consumption by the planning loop.
21
+ */
22
+ export class StateObserver extends EventEmitter {
23
+ bridge;
24
+ observedPids = new Set();
25
+ eventBuffer = [];
26
+ maxBufferSize;
27
+ constructor(bridge, maxBufferSize = 200) {
28
+ super();
29
+ this.bridge = bridge;
30
+ this.maxBufferSize = maxBufferSize;
31
+ // Listen for AX events from the bridge
32
+ this.bridge.on("ax-event", (raw) => {
33
+ const event = this.parseEvent(raw);
34
+ if (event) {
35
+ this.eventBuffer.push(event);
36
+ if (this.eventBuffer.length > this.maxBufferSize) {
37
+ this.eventBuffer.shift();
38
+ }
39
+ this.emit("event", event);
40
+ }
41
+ });
42
+ }
43
+ async startObserving(pid, eventTypes) {
44
+ if (this.observedPids.has(pid))
45
+ return;
46
+ const notifications = eventTypes
47
+ ? this.mapEventTypesToNotifications(eventTypes)
48
+ : undefined;
49
+ await this.bridge.call("observer.start", {
50
+ pid,
51
+ notifications,
52
+ });
53
+ this.observedPids.add(pid);
54
+ }
55
+ async stopObserving(pid) {
56
+ if (!this.observedPids.has(pid))
57
+ return;
58
+ await this.bridge.call("observer.stop", { pid });
59
+ this.observedPids.delete(pid);
60
+ }
61
+ /** Get and clear the event buffer. */
62
+ drainEvents() {
63
+ const events = [...this.eventBuffer];
64
+ this.eventBuffer = [];
65
+ return events;
66
+ }
67
+ /** Get recent events without clearing. */
68
+ peekEvents(limit = 50) {
69
+ return this.eventBuffer.slice(-limit);
70
+ }
71
+ /** Clear the event buffer. */
72
+ clearEvents() {
73
+ this.eventBuffer = [];
74
+ }
75
+ get isObserving() {
76
+ return this.observedPids.size > 0;
77
+ }
78
+ get observedProcesses() {
79
+ return [...this.observedPids];
80
+ }
81
+ parseEvent(raw) {
82
+ const type = raw.type;
83
+ if (!type)
84
+ return null;
85
+ const event = {
86
+ type,
87
+ timestamp: raw.timestamp ?? new Date().toISOString(),
88
+ pid: raw.pid ?? 0,
89
+ };
90
+ if (typeof raw.bundleId === "string")
91
+ event.bundleId = raw.bundleId;
92
+ if (typeof raw.elementRole === "string")
93
+ event.elementRole = raw.elementRole;
94
+ if (typeof raw.elementLabel === "string")
95
+ event.elementLabel = raw.elementLabel;
96
+ if (typeof raw.oldValue === "string")
97
+ event.oldValue = raw.oldValue;
98
+ if (typeof raw.newValue === "string")
99
+ event.newValue = raw.newValue;
100
+ if (typeof raw.windowTitle === "string")
101
+ event.windowTitle = raw.windowTitle;
102
+ return event;
103
+ }
104
+ mapEventTypesToNotifications(types) {
105
+ const map = {
106
+ value_changed: "AXValueChanged",
107
+ focus_changed: "AXFocusedUIElementChanged",
108
+ window_created: "AXWindowCreated",
109
+ window_closed: "AXUIElementDestroyed",
110
+ title_changed: "AXTitleChanged",
111
+ menu_opened: "AXMenuOpened",
112
+ layout_changed: "AXLayoutChanged",
113
+ dialog_appeared: "AXSheetCreated",
114
+ app_activated: "AXApplicationActivated",
115
+ app_deactivated: "AXApplicationDeactivated",
116
+ };
117
+ return types
118
+ .map((t) => map[t])
119
+ .filter((n) => n !== undefined);
120
+ }
121
+ }