screenhand 0.1.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (177) hide show
  1. package/README.md +458 -93
  2. package/dist/.audit-log.jsonl +55 -0
  3. package/dist/.screenhand/memory/.lock +1 -0
  4. package/dist/.screenhand/memory/actions.jsonl +85 -0
  5. package/dist/.screenhand/memory/errors.jsonl +5 -0
  6. package/dist/.screenhand/memory/errors.jsonl.bak +4 -0
  7. package/dist/.screenhand/memory/state.json +35 -0
  8. package/dist/.screenhand/memory/state.json.bak +35 -0
  9. package/dist/.screenhand/memory/strategies.jsonl +12 -0
  10. package/dist/agent/cli.js +73 -0
  11. package/dist/agent/loop.js +258 -0
  12. package/dist/config.js +9 -0
  13. package/dist/index.js +56 -0
  14. package/dist/logging/timeline-logger.js +29 -0
  15. package/dist/mcp/mcp-stdio-server.js +448 -0
  16. package/dist/mcp/server.js +347 -0
  17. package/dist/mcp-desktop.js +2731 -0
  18. package/dist/mcp-entry.js +59 -0
  19. package/dist/memory/recall.js +160 -0
  20. package/dist/memory/research.js +98 -0
  21. package/dist/memory/seeds.js +89 -0
  22. package/dist/memory/session.js +161 -0
  23. package/dist/memory/store.js +391 -0
  24. package/dist/memory/types.js +4 -0
  25. package/dist/monitor/codex-monitor.js +377 -0
  26. package/dist/monitor/task-queue.js +84 -0
  27. package/dist/monitor/types.js +49 -0
  28. package/dist/native/bridge-client.js +174 -0
  29. package/dist/native/macos-bridge-client.js +5 -0
  30. package/dist/npm-publish-helper.js +117 -0
  31. package/dist/npm-token-cdp.js +113 -0
  32. package/dist/npm-token-create.js +135 -0
  33. package/dist/npm-token-finish.js +126 -0
  34. package/dist/playbook/engine.js +193 -0
  35. package/dist/playbook/index.js +4 -0
  36. package/dist/playbook/recorder.js +519 -0
  37. package/dist/playbook/runner.js +392 -0
  38. package/dist/playbook/store.js +166 -0
  39. package/dist/playbook/types.js +4 -0
  40. package/dist/runtime/accessibility-adapter.js +377 -0
  41. package/dist/runtime/app-adapter.js +48 -0
  42. package/dist/runtime/applescript-adapter.js +283 -0
  43. package/dist/runtime/ax-role-map.js +80 -0
  44. package/dist/runtime/browser-adapter.js +36 -0
  45. package/dist/runtime/cdp-chrome-adapter.js +505 -0
  46. package/dist/runtime/composite-adapter.js +205 -0
  47. package/dist/runtime/executor.js +250 -0
  48. package/dist/runtime/locator-cache.js +12 -0
  49. package/dist/runtime/planning-loop.js +47 -0
  50. package/dist/runtime/service.js +372 -0
  51. package/dist/runtime/session-manager.js +28 -0
  52. package/dist/runtime/state-observer.js +105 -0
  53. package/dist/runtime/vision-adapter.js +208 -0
  54. package/dist/scripts/codex-monitor-daemon.js +335 -0
  55. package/dist/scripts/supervisor-daemon.js +272 -0
  56. package/dist/scripts/worker-daemon.js +228 -0
  57. package/dist/src/agent/cli.js +82 -0
  58. package/dist/src/agent/loop.js +274 -0
  59. package/{src/config.ts → dist/src/config.js} +5 -10
  60. package/{src/index.ts → dist/src/index.js} +32 -52
  61. package/dist/src/jobs/manager.js +237 -0
  62. package/dist/src/jobs/runner.js +683 -0
  63. package/dist/src/jobs/store.js +102 -0
  64. package/dist/src/jobs/types.js +30 -0
  65. package/dist/src/jobs/worker.js +97 -0
  66. package/dist/src/logging/timeline-logger.js +45 -0
  67. package/dist/src/mcp/mcp-stdio-server.js +464 -0
  68. package/dist/src/mcp/server.js +363 -0
  69. package/dist/src/mcp-entry.js +60 -0
  70. package/dist/src/memory/recall.js +170 -0
  71. package/dist/src/memory/research.js +104 -0
  72. package/dist/src/memory/seeds.js +101 -0
  73. package/dist/src/memory/service.js +421 -0
  74. package/dist/src/memory/session.js +169 -0
  75. package/dist/src/memory/store.js +422 -0
  76. package/dist/src/memory/types.js +17 -0
  77. package/dist/src/monitor/codex-monitor.js +382 -0
  78. package/dist/src/monitor/task-queue.js +97 -0
  79. package/dist/src/monitor/types.js +62 -0
  80. package/dist/src/native/bridge-client.js +190 -0
  81. package/{src/native/macos-bridge-client.ts → dist/src/native/macos-bridge-client.js} +0 -1
  82. package/dist/src/playbook/engine.js +201 -0
  83. package/dist/src/playbook/index.js +20 -0
  84. package/dist/src/playbook/recorder.js +535 -0
  85. package/dist/src/playbook/runner.js +408 -0
  86. package/dist/src/playbook/store.js +183 -0
  87. package/dist/src/playbook/types.js +17 -0
  88. package/dist/src/runtime/accessibility-adapter.js +393 -0
  89. package/dist/src/runtime/app-adapter.js +64 -0
  90. package/dist/src/runtime/applescript-adapter.js +299 -0
  91. package/dist/src/runtime/ax-role-map.js +96 -0
  92. package/dist/src/runtime/browser-adapter.js +52 -0
  93. package/dist/src/runtime/cdp-chrome-adapter.js +521 -0
  94. package/dist/src/runtime/composite-adapter.js +221 -0
  95. package/dist/src/runtime/execution-contract.js +159 -0
  96. package/dist/src/runtime/executor.js +266 -0
  97. package/{src/runtime/locator-cache.ts → dist/src/runtime/locator-cache.js} +10 -15
  98. package/dist/src/runtime/planning-loop.js +63 -0
  99. package/dist/src/runtime/service.js +388 -0
  100. package/dist/src/runtime/session-manager.js +60 -0
  101. package/dist/src/runtime/state-observer.js +121 -0
  102. package/dist/src/runtime/vision-adapter.js +224 -0
  103. package/dist/src/supervisor/locks.js +186 -0
  104. package/dist/src/supervisor/supervisor.js +403 -0
  105. package/dist/src/supervisor/types.js +30 -0
  106. package/dist/src/test-mcp-protocol.js +154 -0
  107. package/dist/src/types.js +17 -0
  108. package/dist/src/util/atomic-write.js +118 -0
  109. package/dist/test-mcp-protocol.js +138 -0
  110. package/dist/types.js +1 -0
  111. package/package.json +18 -4
  112. package/.claude/commands/automate.md +0 -28
  113. package/.claude/commands/debug-ui.md +0 -19
  114. package/.claude/commands/screenshot.md +0 -15
  115. package/.github/FUNDING.yml +0 -1
  116. package/.github/ISSUE_TEMPLATE/bug_report.md +0 -27
  117. package/.github/ISSUE_TEMPLATE/feature_request.md +0 -20
  118. package/.mcp.json +0 -8
  119. package/DESKTOP_MCP_GUIDE.md +0 -92
  120. package/SECURITY.md +0 -44
  121. package/docs/architecture.md +0 -47
  122. package/install-skills.sh +0 -19
  123. package/mcp-bridge.ts +0 -271
  124. package/mcp-desktop.ts +0 -1221
  125. package/native/macos-bridge/Package.swift +0 -21
  126. package/native/macos-bridge/Sources/AccessibilityBridge.swift +0 -261
  127. package/native/macos-bridge/Sources/AppManagement.swift +0 -129
  128. package/native/macos-bridge/Sources/CoreGraphicsBridge.swift +0 -242
  129. package/native/macos-bridge/Sources/ObserverBridge.swift +0 -120
  130. package/native/macos-bridge/Sources/VisionBridge.swift +0 -80
  131. package/native/macos-bridge/Sources/main.swift +0 -345
  132. package/native/windows-bridge/AppManagement.cs +0 -234
  133. package/native/windows-bridge/InputBridge.cs +0 -436
  134. package/native/windows-bridge/Program.cs +0 -265
  135. package/native/windows-bridge/ScreenCapture.cs +0 -329
  136. package/native/windows-bridge/UIAutomationBridge.cs +0 -571
  137. package/native/windows-bridge/WindowsBridge.csproj +0 -17
  138. package/playbooks/devpost.json +0 -186
  139. package/playbooks/instagram.json +0 -41
  140. package/playbooks/instagram_v2.json +0 -201
  141. package/playbooks/x_v1.json +0 -211
  142. package/scripts/devpost-live-loop.mjs +0 -421
  143. package/src/logging/timeline-logger.ts +0 -55
  144. package/src/mcp/server.ts +0 -449
  145. package/src/memory/recall.ts +0 -191
  146. package/src/memory/research.ts +0 -146
  147. package/src/memory/seeds.ts +0 -123
  148. package/src/memory/session.ts +0 -201
  149. package/src/memory/store.ts +0 -434
  150. package/src/memory/types.ts +0 -69
  151. package/src/native/bridge-client.ts +0 -239
  152. package/src/runtime/accessibility-adapter.ts +0 -487
  153. package/src/runtime/app-adapter.ts +0 -169
  154. package/src/runtime/applescript-adapter.ts +0 -376
  155. package/src/runtime/ax-role-map.ts +0 -102
  156. package/src/runtime/browser-adapter.ts +0 -129
  157. package/src/runtime/cdp-chrome-adapter.ts +0 -676
  158. package/src/runtime/composite-adapter.ts +0 -274
  159. package/src/runtime/executor.ts +0 -396
  160. package/src/runtime/planning-loop.ts +0 -81
  161. package/src/runtime/service.ts +0 -448
  162. package/src/runtime/session-manager.ts +0 -50
  163. package/src/runtime/state-observer.ts +0 -136
  164. package/src/runtime/vision-adapter.ts +0 -297
  165. package/src/types.ts +0 -297
  166. package/tests/bridge-client.test.ts +0 -176
  167. package/tests/browser-stealth.test.ts +0 -210
  168. package/tests/composite-adapter.test.ts +0 -64
  169. package/tests/mcp-server.test.ts +0 -151
  170. package/tests/memory-recall.test.ts +0 -339
  171. package/tests/memory-research.test.ts +0 -159
  172. package/tests/memory-seeds.test.ts +0 -120
  173. package/tests/memory-store.test.ts +0 -392
  174. package/tests/types.test.ts +0 -92
  175. package/tsconfig.check.json +0 -17
  176. package/tsconfig.json +0 -19
  177. package/vitest.config.ts +0 -8
@@ -0,0 +1,274 @@
1
+ // Copyright (C) 2025 Clazro Technology Private Limited
2
+ // SPDX-License-Identifier: AGPL-3.0-only
3
+ //
4
+ // This file is part of ScreenHand.
5
+ //
6
+ // ScreenHand is free software: you can redistribute it and/or modify
7
+ // it under the terms of the GNU Affero General Public License as
8
+ // published by the Free Software Foundation, version 3.
9
+ //
10
+ // ScreenHand is distributed in the hope that it will be useful,
11
+ // but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ // GNU Affero General Public License for more details.
14
+ //
15
+ // You should have received a copy of the GNU Affero General Public License
16
+ // along with ScreenHand. If not, see <https://www.gnu.org/licenses/>.
17
+ /**
18
+ * ScreenHand Agent Loop
19
+ *
20
+ * Continuous observe → decide → act loop powered by Claude.
21
+ * Uses element_tree (accessibility tree) as the primary observation — not screenshots.
22
+ * ~50ms per observe, ~50ms per action. Only the LLM call adds latency.
23
+ */
24
+ import Anthropic from "@anthropic-ai/sdk";
25
+ /**
26
+ * Compact AX tree representation for LLM consumption.
27
+ * Converts the full AXNode tree into a concise text format:
28
+ * [button] "Send" (350,200)
29
+ * [textField] "Search" value="hello" (100,50)
30
+ */
31
+ function compactTree(node, depth = 0, maxDepth = 5) {
32
+ if (depth > maxDepth)
33
+ return "";
34
+ const indent = " ".repeat(depth);
35
+ const parts = [];
36
+ // Role
37
+ const role = node.role.replace("AX", "").toLowerCase();
38
+ // Label — prefer title, then description, then identifier
39
+ const label = node.title || node.description || node.identifier || "";
40
+ // Value
41
+ const val = node.value ? ` value="${node.value.slice(0, 50)}"` : "";
42
+ // Position
43
+ const pos = node.position ? ` (${Math.round(node.position.x)},${Math.round(node.position.y)})` : "";
44
+ // Focused/enabled markers
45
+ const markers = [];
46
+ if (node.focused)
47
+ markers.push("focused");
48
+ if (node.enabled === false)
49
+ markers.push("disabled");
50
+ const markerStr = markers.length ? ` [${markers.join(",")}]` : "";
51
+ // Skip noise nodes with no useful info
52
+ const isNoise = !label && !val && !node.focused && (role === "group" || role === "splitgroup" || role === "scrollarea");
53
+ if (!isNoise) {
54
+ parts.push(`${indent}[${role}] "${label}"${val}${pos}${markerStr}`);
55
+ }
56
+ if (node.children) {
57
+ for (const child of node.children) {
58
+ const childStr = compactTree(child, isNoise ? depth : depth + 1, maxDepth);
59
+ if (childStr)
60
+ parts.push(childStr);
61
+ }
62
+ }
63
+ return parts.join("\n");
64
+ }
65
+ const SYSTEM_PROMPT = `You are a desktop automation agent. You control a computer through ScreenHand tools.
66
+
67
+ On each turn you receive the current UI state as an accessibility tree. You must decide the SINGLE next action to take.
68
+
69
+ Respond in this exact JSON format (no markdown, no explanation outside the JSON):
70
+ {
71
+ "reasoning": "Brief explanation of what you see and why you're taking this action",
72
+ "action": { "tool": "...", ... },
73
+ "done": false
74
+ }
75
+
76
+ When the task is fully complete, respond with:
77
+ {
78
+ "reasoning": "Task is complete because ...",
79
+ "action": { "tool": "done", "summary": "What was accomplished" },
80
+ "done": true
81
+ }
82
+
83
+ Available actions:
84
+ - {"tool": "press", "target": "Button text or element name"}
85
+ - {"tool": "type_into", "target": "Field name", "text": "text to type"}
86
+ - {"tool": "navigate", "url": "https://..."}
87
+ - {"tool": "scroll", "direction": "up|down|left|right", "amount": 3}
88
+ - {"tool": "key_combo", "keys": ["cmd", "c"]}
89
+ - {"tool": "menu_click", "menuPath": ["File", "Save"]}
90
+ - {"tool": "app_launch", "bundleId": "com.apple.Safari"}
91
+ - {"tool": "app_focus", "bundleId": "com.apple.Safari"}
92
+ - {"tool": "extract", "target": "element name", "format": "text"}
93
+ - {"tool": "wait", "ms": 1000}
94
+ - {"tool": "done", "summary": "what was accomplished"}
95
+
96
+ Rules:
97
+ - Take ONE action per turn. After each action you'll see the updated UI.
98
+ - Use the accessibility tree to find elements — look for roles and labels.
99
+ - Target elements by their visible text/label, not coordinates (unless no label exists).
100
+ - If an action fails, try an alternative approach — don't repeat the same failed action.
101
+ - If you're stuck after 3 attempts, explain what's blocking you and mark done.
102
+ - Be efficient. Don't take unnecessary actions.`;
103
+ export async function runAgentLoop(runtime, sessionId, task, options = {}) {
104
+ const { maxSteps = 50, model = "claude-sonnet-4-20250514", maxTokens = 1024, onStep, screenshotOnStart = false, } = options;
105
+ const client = new Anthropic();
106
+ const steps = [];
107
+ const messages = [];
108
+ const startTime = Date.now();
109
+ // Optional initial screenshot for context
110
+ if (screenshotOnStart) {
111
+ await runtime.screenshot({ sessionId });
112
+ }
113
+ for (let i = 0; i < maxSteps; i++) {
114
+ const stepStart = Date.now();
115
+ // 1. OBSERVE — get accessibility tree (~50ms)
116
+ const treeResult = await runtime.elementTree({ sessionId, maxDepth: 5 });
117
+ let observation;
118
+ if (treeResult.ok) {
119
+ observation = compactTree(treeResult.data);
120
+ // Truncate if too large to keep tokens manageable
121
+ if (observation.length > 8000) {
122
+ observation = observation.slice(0, 8000) + "\n... (truncated)";
123
+ }
124
+ }
125
+ else {
126
+ observation = `[Error getting UI tree: ${treeResult.error.message}]`;
127
+ }
128
+ // Also get app context
129
+ let contextLine = "";
130
+ try {
131
+ const apps = await runtime.appList(sessionId);
132
+ if (apps.ok) {
133
+ const active = apps.data.find(a => a.isActive);
134
+ if (active)
135
+ contextLine = `Active app: ${active.name} (${active.bundleId})`;
136
+ }
137
+ }
138
+ catch { /* ignore */ }
139
+ // 2. BUILD prompt
140
+ const userMsg = i === 0
141
+ ? `Task: ${task}\n\nCurrent UI state:\n${contextLine}\n${observation}`
142
+ : `Action result: ${steps[i - 1].result}\n\nUpdated UI state:\n${contextLine}\n${observation}`;
143
+ messages.push({ role: "user", content: userMsg });
144
+ // 3. DECIDE — ask Claude what to do next
145
+ let reasoning = "";
146
+ let action = null;
147
+ let done = false;
148
+ try {
149
+ const resp = await client.messages.create({
150
+ model,
151
+ max_tokens: maxTokens,
152
+ system: SYSTEM_PROMPT,
153
+ messages,
154
+ });
155
+ const text = resp.content[0]?.type === "text" ? resp.content[0].text : "";
156
+ messages.push({ role: "assistant", content: text });
157
+ // Parse JSON response
158
+ const jsonMatch = text.match(/\{[\s\S]*\}/);
159
+ if (jsonMatch) {
160
+ const parsed = JSON.parse(jsonMatch[0]);
161
+ reasoning = parsed.reasoning ?? "";
162
+ action = parsed.action ?? null;
163
+ done = parsed.done === true;
164
+ }
165
+ else {
166
+ reasoning = text;
167
+ }
168
+ }
169
+ catch (e) {
170
+ reasoning = `LLM error: ${e instanceof Error ? e.message : String(e)}`;
171
+ }
172
+ // 4. ACT — execute the action (~50ms)
173
+ let result = "";
174
+ if (action) {
175
+ try {
176
+ result = await executeAction(runtime, sessionId, action);
177
+ }
178
+ catch (e) {
179
+ result = `Error: ${e instanceof Error ? e.message : String(e)}`;
180
+ }
181
+ }
182
+ else {
183
+ result = "No action taken";
184
+ }
185
+ // Record step
186
+ const step = {
187
+ index: i,
188
+ observation: observation.slice(0, 500),
189
+ reasoning,
190
+ action,
191
+ result,
192
+ done,
193
+ durationMs: Date.now() - stepStart,
194
+ };
195
+ steps.push(step);
196
+ if (onStep)
197
+ onStep(step);
198
+ if (done)
199
+ break;
200
+ }
201
+ const lastStep = steps[steps.length - 1];
202
+ const summary = lastStep?.action?.tool === "done"
203
+ ? lastStep.action.summary
204
+ : `Stopped after ${steps.length} steps`;
205
+ return {
206
+ success: lastStep?.done ?? false,
207
+ summary,
208
+ steps,
209
+ totalMs: Date.now() - startTime,
210
+ };
211
+ }
212
+ async function executeAction(runtime, sessionId, action) {
213
+ switch (action.tool) {
214
+ case "press": {
215
+ const r = await runtime.press({
216
+ sessionId,
217
+ target: { type: "text", value: action.target },
218
+ });
219
+ return r.ok ? `Pressed "${action.target}"` : `Failed: ${r.error.message}`;
220
+ }
221
+ case "type_into": {
222
+ const r = await runtime.typeInto({
223
+ sessionId,
224
+ target: { type: "text", value: action.target },
225
+ text: action.text,
226
+ });
227
+ return r.ok ? `Typed "${action.text}" into "${action.target}"` : `Failed: ${r.error.message}`;
228
+ }
229
+ case "navigate": {
230
+ const r = await runtime.navigate({ sessionId, url: action.url });
231
+ return r.ok ? `Navigated to ${action.url}` : `Failed: ${r.error.message}`;
232
+ }
233
+ case "scroll": {
234
+ const input = { sessionId, direction: action.direction };
235
+ if (typeof action.amount === "number")
236
+ input.amount = action.amount;
237
+ const r = await runtime.scroll(input);
238
+ return r.ok ? `Scrolled ${action.direction}` : `Failed: ${r.error.message}`;
239
+ }
240
+ case "key_combo": {
241
+ const r = await runtime.keyCombo({ sessionId, keys: action.keys });
242
+ return r.ok ? `Key combo: ${action.keys.join("+")}` : `Failed: ${r.error.message}`;
243
+ }
244
+ case "menu_click": {
245
+ const r = await runtime.menuClick({ sessionId, menuPath: action.menuPath });
246
+ return r.ok ? `Menu: ${action.menuPath.join(" → ")}` : `Failed: ${r.error.message}`;
247
+ }
248
+ case "app_launch": {
249
+ const r = await runtime.appLaunch({ sessionId, bundleId: action.bundleId });
250
+ return r.ok ? `Launched ${action.bundleId}` : `Failed: ${r.error.message}`;
251
+ }
252
+ case "app_focus": {
253
+ const r = await runtime.appFocus({ sessionId, bundleId: action.bundleId });
254
+ return r.ok ? `Focused ${action.bundleId}` : `Failed: ${r.error.message}`;
255
+ }
256
+ case "extract": {
257
+ const r = await runtime.extract({
258
+ sessionId,
259
+ target: { type: "text", value: action.target },
260
+ format: action.format,
261
+ });
262
+ return r.ok ? `Extracted: ${JSON.stringify(r.data).slice(0, 500)}` : `Failed: ${r.error.message}`;
263
+ }
264
+ case "wait": {
265
+ await new Promise(resolve => setTimeout(resolve, action.ms));
266
+ return `Waited ${action.ms}ms`;
267
+ }
268
+ case "done": {
269
+ return `Task complete: ${action.summary}`;
270
+ }
271
+ default:
272
+ return `Unknown action: ${action.tool}`;
273
+ }
274
+ }
@@ -14,17 +14,12 @@
14
14
  //
15
15
  // You should have received a copy of the GNU Affero General Public License
16
16
  // along with ScreenHand. If not, see <https://www.gnu.org/licenses/>.
17
-
18
- import type { ActionBudget } from "./types.js";
19
-
20
- export const DEFAULT_ACTION_BUDGET: ActionBudget = {
21
- locateMs: 800,
22
- actMs: 200,
23
- verifyMs: 2000,
24
- maxRetries: 1,
17
+ export const DEFAULT_ACTION_BUDGET = {
18
+ locateMs: 800,
19
+ actMs: 200,
20
+ verifyMs: 2000,
21
+ maxRetries: 1,
25
22
  };
26
-
27
23
  export const DEFAULT_NAVIGATE_TIMEOUT_MS = 10_000;
28
24
  export const DEFAULT_WAIT_TIMEOUT_MS = 2_000;
29
25
  export const DEFAULT_PROFILE = "automation";
30
-
@@ -14,18 +14,11 @@
14
14
  //
15
15
  // You should have received a copy of the GNU Affero General Public License
16
16
  // along with ScreenHand. If not, see <https://www.gnu.org/licenses/>.
17
-
18
17
  import { TimelineLogger } from "./logging/timeline-logger.js";
19
18
  import { MvpMcpServer } from "./mcp/server.js";
20
- import {
21
- type AppAdapter,
22
- PlaceholderAppAdapter,
23
- } from "./runtime/app-adapter.js";
19
+ import { PlaceholderAppAdapter, } from "./runtime/app-adapter.js";
24
20
  import { CdpChromeAdapter } from "./runtime/cdp-chrome-adapter.js";
25
21
  import { AutomationRuntimeService } from "./runtime/service.js";
26
-
27
- // Re-export types and adapters for external use
28
- export type { AppAdapter } from "./runtime/app-adapter.js";
29
22
  export { PlaceholderAppAdapter } from "./runtime/app-adapter.js";
30
23
  export { CdpChromeAdapter } from "./runtime/cdp-chrome-adapter.js";
31
24
  export { AccessibilityAdapter } from "./runtime/accessibility-adapter.js";
@@ -37,56 +30,43 @@ export { StateObserver } from "./runtime/state-observer.js";
37
30
  export { PlanningLoop } from "./runtime/planning-loop.js";
38
31
  export { AutomationRuntimeService } from "./runtime/service.js";
39
32
  export { MvpMcpServer } from "./mcp/server.js";
40
-
41
- export interface RuntimeApp {
42
- runtime: AutomationRuntimeService;
43
- mcp: MvpMcpServer;
33
+ export { createMcpStdioServer, startMcpStdioServer } from "./mcp/mcp-stdio-server.js";
34
+ export { runAgentLoop } from "./agent/loop.js";
35
+ export function createRuntimeApp(adapter) {
36
+ const logger = new TimelineLogger();
37
+ const runtime = new AutomationRuntimeService(adapter, logger);
38
+ const mcp = new MvpMcpServer(runtime);
39
+ return { runtime, mcp };
44
40
  }
45
-
46
- export function createRuntimeApp(adapter: AppAdapter): RuntimeApp {
47
- const logger = new TimelineLogger();
48
- const runtime = new AutomationRuntimeService(adapter, logger);
49
- const mcp = new MvpMcpServer(runtime);
50
- return { runtime, mcp };
51
- }
52
-
53
- async function createDefaultAdapter(): Promise<AppAdapter> {
54
- if (process.env.AUTOMATOR_ADAPTER === "placeholder") {
55
- return new PlaceholderAppAdapter();
56
- }
57
- if (process.env.AUTOMATOR_ADAPTER === "composite") {
58
- // Lazy import to avoid requiring Swift bridge for CDP-only usage
59
- const { MacOSBridgeClient } = await import("./native/macos-bridge-client.js");
60
- const { CompositeAdapter } = await import("./runtime/composite-adapter.js");
61
- const bridge = new MacOSBridgeClient();
62
- return new CompositeAdapter(bridge, {
63
- headless: process.env.AUTOMATOR_HEADLESS === "1",
41
+ async function createDefaultAdapter() {
42
+ if (process.env.AUTOMATOR_ADAPTER === "placeholder") {
43
+ return new PlaceholderAppAdapter();
44
+ }
45
+ if (process.env.AUTOMATOR_ADAPTER === "composite") {
46
+ // Lazy import to avoid requiring Swift bridge for CDP-only usage
47
+ const { MacOSBridgeClient } = await import("./native/macos-bridge-client.js");
48
+ const { CompositeAdapter } = await import("./runtime/composite-adapter.js");
49
+ const bridge = new MacOSBridgeClient();
50
+ return new CompositeAdapter(bridge, {
51
+ headless: process.env.AUTOMATOR_HEADLESS === "1",
52
+ });
53
+ }
54
+ if (process.env.AUTOMATOR_ADAPTER === "accessibility") {
55
+ const { MacOSBridgeClient } = await import("./native/macos-bridge-client.js");
56
+ const { AccessibilityAdapter } = await import("./runtime/accessibility-adapter.js");
57
+ const bridge = new MacOSBridgeClient();
58
+ return new AccessibilityAdapter(bridge);
59
+ }
60
+ return new CdpChromeAdapter({
61
+ headless: process.env.AUTOMATOR_HEADLESS === "1",
64
62
  });
65
- }
66
- if (process.env.AUTOMATOR_ADAPTER === "accessibility") {
67
- const { MacOSBridgeClient } = await import("./native/macos-bridge-client.js");
68
- const { AccessibilityAdapter } = await import("./runtime/accessibility-adapter.js");
69
- const bridge = new MacOSBridgeClient();
70
- return new AccessibilityAdapter(bridge);
71
- }
72
- return new CdpChromeAdapter({
73
- headless: process.env.AUTOMATOR_HEADLESS === "1",
74
- });
75
63
  }
76
-
77
64
  const app = createRuntimeApp(await createDefaultAdapter());
78
-
79
65
  if (process.argv.includes("--healthcheck")) {
80
- const session = await app.runtime.sessionStart("automation");
81
- console.log(
82
- JSON.stringify(
83
- {
66
+ const session = await app.runtime.sessionStart("automation");
67
+ console.log(JSON.stringify({
84
68
  status: "ok",
85
69
  session,
86
70
  note: "Runtime loaded with universal adapter support.",
87
- },
88
- null,
89
- 2,
90
- ),
91
- );
71
+ }, null, 2));
92
72
  }
@@ -0,0 +1,237 @@
1
+ // Copyright (C) 2025 Clazro Technology Private Limited
2
+ // SPDX-License-Identifier: AGPL-3.0-only
3
+ //
4
+ // This file is part of ScreenHand.
5
+ //
6
+ // ScreenHand is free software: you can redistribute it and/or modify
7
+ // it under the terms of the GNU Affero General Public License as
8
+ // published by the Free Software Foundation, version 3.
9
+ //
10
+ // ScreenHand is distributed in the hope that it will be useful,
11
+ // but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ // GNU Affero General Public License for more details.
14
+ //
15
+ // You should have received a copy of the GNU Affero General Public License
16
+ // along with ScreenHand. If not, see <https://www.gnu.org/licenses/>.
17
+ import { VALID_TRANSITIONS, JOB_STATES } from "./types.js";
18
+ import { JobStore } from "./store.js";
19
+ export class JobManager {
20
+ store;
21
+ memory;
22
+ supervisor;
23
+ constructor(opts) {
24
+ this.store = new JobStore(opts.jobDir);
25
+ this.memory = opts.memory ?? null;
26
+ this.supervisor = opts.supervisor ?? null;
27
+ }
28
+ init() {
29
+ this.store.init();
30
+ }
31
+ // ── Create ──────────────────────────────────────
32
+ create(opts) {
33
+ const now = new Date().toISOString();
34
+ const id = "job_" + Date.now().toString(36) + "_" + Math.random().toString(36).slice(2, 8);
35
+ const steps = (opts.steps ?? []).map((s, i) => {
36
+ const step = { index: i, action: s.action, status: "pending" };
37
+ if (s.target !== undefined)
38
+ step.target = s.target;
39
+ if (s.description !== undefined)
40
+ step.description = s.description;
41
+ if (s.text !== undefined)
42
+ step.text = s.text;
43
+ if (s.keys !== undefined)
44
+ step.keys = s.keys;
45
+ if (s.value !== undefined)
46
+ step.value = s.value;
47
+ return step;
48
+ });
49
+ const job = {
50
+ id,
51
+ task: opts.task,
52
+ state: "queued",
53
+ playbookId: opts.playbookId ?? null,
54
+ sessionId: opts.sessionId ?? null,
55
+ bundleId: opts.bundleId ?? null,
56
+ windowId: opts.windowId ?? null,
57
+ lastStep: -1,
58
+ steps,
59
+ blockReason: null,
60
+ retries: 0,
61
+ maxRetries: opts.maxRetries ?? 3,
62
+ lastError: null,
63
+ tags: opts.tags ?? [],
64
+ priority: opts.priority ?? 10,
65
+ createdAt: now,
66
+ updatedAt: now,
67
+ startedAt: null,
68
+ completedAt: null,
69
+ };
70
+ this.store.add(job);
71
+ return job;
72
+ }
73
+ // ── State transitions ───────────────────────────
74
+ transition(id, to, opts) {
75
+ const job = this.store.get(id);
76
+ if (!job)
77
+ return { error: `Job ${id} not found` };
78
+ const allowed = VALID_TRANSITIONS[job.state];
79
+ if (!allowed.includes(to)) {
80
+ return { error: `Cannot transition from "${job.state}" to "${to}". Allowed: [${allowed.join(", ")}]` };
81
+ }
82
+ const patch = { state: to };
83
+ if (to === "running") {
84
+ if (!job.startedAt)
85
+ patch.startedAt = new Date().toISOString();
86
+ patch.blockReason = null;
87
+ if (opts?.sessionId)
88
+ patch.sessionId = opts.sessionId;
89
+ }
90
+ if (to === "blocked" || to === "waiting_human") {
91
+ patch.blockReason = opts?.blockReason ?? null;
92
+ }
93
+ if (to === "failed") {
94
+ patch.lastError = opts?.error ?? job.lastError;
95
+ this.recordOutcomeToMemory(job, false);
96
+ }
97
+ if (to === "done") {
98
+ patch.completedAt = new Date().toISOString();
99
+ this.recordOutcomeToMemory(job, true);
100
+ }
101
+ // Re-queue bumps retry count
102
+ if (job.state === "failed" && to === "queued") {
103
+ patch.retries = job.retries + 1;
104
+ if (patch.retries > job.maxRetries) {
105
+ return { error: `Job ${id} has exceeded max retries (${job.maxRetries})` };
106
+ }
107
+ patch.lastError = null;
108
+ patch.blockReason = null;
109
+ }
110
+ const updated = this.store.update(id, patch);
111
+ return updated ?? { error: `Failed to update job ${id}` };
112
+ }
113
+ // ── Step tracking ───────────────────────────────
114
+ /** Mark a step as completed and advance lastStep. */
115
+ completeStep(jobId, stepIndex, opts) {
116
+ const job = this.store.get(jobId);
117
+ if (!job)
118
+ return { error: `Job ${jobId} not found` };
119
+ if (job.state !== "running")
120
+ return { error: `Job is not running (state=${job.state})` };
121
+ const step = job.steps[stepIndex];
122
+ if (!step)
123
+ return { error: `Step ${stepIndex} does not exist (total: ${job.steps.length})` };
124
+ step.status = "done";
125
+ step.completedAt = new Date().toISOString();
126
+ if (opts?.durationMs !== undefined)
127
+ step.durationMs = opts.durationMs;
128
+ const newLastStep = Math.max(job.lastStep, stepIndex);
129
+ return this.store.update(jobId, { lastStep: newLastStep, steps: job.steps }) ?? { error: "Update failed" };
130
+ }
131
+ /** Mark a step as failed. Does NOT transition the job — caller decides (retry vs block vs fail). */
132
+ failStep(jobId, stepIndex, error) {
133
+ const job = this.store.get(jobId);
134
+ if (!job)
135
+ return { error: `Job ${jobId} not found` };
136
+ const step = job.steps[stepIndex];
137
+ if (!step)
138
+ return { error: `Step ${stepIndex} does not exist` };
139
+ step.status = "failed";
140
+ step.error = error;
141
+ return this.store.update(jobId, { steps: job.steps, lastError: error }) ?? { error: "Update failed" };
142
+ }
143
+ /** Skip a step (e.g., optional step or already done). */
144
+ skipStep(jobId, stepIndex) {
145
+ const job = this.store.get(jobId);
146
+ if (!job)
147
+ return { error: `Job ${jobId} not found` };
148
+ const step = job.steps[stepIndex];
149
+ if (!step)
150
+ return { error: `Step ${stepIndex} does not exist` };
151
+ step.status = "skipped";
152
+ return this.store.update(jobId, { steps: job.steps }) ?? { error: "Update failed" };
153
+ }
154
+ // ── Resume ──────────────────────────────────────
155
+ /** Get the resume point: next pending step after lastStep. */
156
+ getResumePoint(jobId) {
157
+ const job = this.store.get(jobId);
158
+ if (!job)
159
+ return null;
160
+ for (let i = job.lastStep + 1; i < job.steps.length; i++) {
161
+ if (job.steps[i].status === "pending") {
162
+ return { stepIndex: i, step: job.steps[i] };
163
+ }
164
+ }
165
+ return null;
166
+ }
167
+ // ── Queries ─────────────────────────────────────
168
+ get(id) {
169
+ return this.store.get(id);
170
+ }
171
+ list(state) {
172
+ return this.store.list(state);
173
+ }
174
+ /** Pop the next queued job and transition it to running. */
175
+ dequeue(sessionId) {
176
+ const next = this.store.nextQueued();
177
+ if (!next)
178
+ return null;
179
+ const opts = {};
180
+ if (sessionId !== undefined)
181
+ opts.sessionId = sessionId;
182
+ const result = this.transition(next.id, "running", opts);
183
+ if ("error" in result)
184
+ return null;
185
+ return result;
186
+ }
187
+ summary() {
188
+ const all = this.store.list();
189
+ const byState = Object.fromEntries(JOB_STATES.map((s) => [s, 0]));
190
+ for (const j of all)
191
+ byState[j.state]++;
192
+ const queued = all
193
+ .filter((j) => j.state === "queued")
194
+ .sort((a, b) => new Date(a.createdAt).getTime() - new Date(b.createdAt).getTime());
195
+ return {
196
+ total: all.length,
197
+ byState,
198
+ oldestQueued: queued[0]?.createdAt ?? null,
199
+ runningJobIds: all.filter((j) => j.state === "running").map((j) => j.id),
200
+ };
201
+ }
202
+ /** Remove a job entirely. */
203
+ remove(id) {
204
+ return this.store.remove(id);
205
+ }
206
+ /** Prune old terminal jobs. */
207
+ prune() {
208
+ return this.store.prune();
209
+ }
210
+ // ── Private ─────────────────────────────────────
211
+ recordOutcomeToMemory(job, success) {
212
+ if (!this.memory)
213
+ return;
214
+ try {
215
+ const completedSteps = job.steps.filter((s) => s.status === "done");
216
+ if (completedSteps.length === 0)
217
+ return;
218
+ this.memory.appendStrategy({
219
+ id: "strat_" + job.id,
220
+ task: job.task,
221
+ steps: completedSteps.map((s) => ({
222
+ tool: s.action,
223
+ params: s.target ? { target: s.target } : {},
224
+ })),
225
+ totalDurationMs: completedSteps.reduce((sum, s) => sum + (s.durationMs ?? 0), 0),
226
+ successCount: success ? 1 : 0,
227
+ failCount: success ? 0 : 1,
228
+ lastUsed: new Date().toISOString(),
229
+ tags: job.tags,
230
+ fingerprint: completedSteps.map((s) => s.action).join("→"),
231
+ });
232
+ }
233
+ catch {
234
+ // Non-critical — don't let memory failures break job flow
235
+ }
236
+ }
237
+ }