screenhand 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +661 -21
- package/README.md +208 -38
- package/dist/.audit-log.jsonl +55 -0
- package/dist/.screenhand/memory/.lock +1 -0
- package/dist/.screenhand/memory/actions.jsonl +85 -0
- package/dist/.screenhand/memory/errors.jsonl +5 -0
- package/dist/.screenhand/memory/errors.jsonl.bak +4 -0
- package/dist/.screenhand/memory/state.json +35 -0
- package/dist/.screenhand/memory/state.json.bak +35 -0
- package/dist/.screenhand/memory/strategies.jsonl +12 -0
- package/dist/agent/cli.js +73 -0
- package/dist/agent/loop.js +258 -0
- package/dist/index.js +1 -0
- package/dist/mcp/mcp-stdio-server.js +164 -0
- package/dist/mcp-desktop.js +2731 -0
- package/dist/mcp-entry.js +7 -10
- package/dist/monitor/codex-monitor.js +377 -0
- package/dist/monitor/task-queue.js +84 -0
- package/dist/monitor/types.js +49 -0
- package/dist/native/bridge-client.js +2 -1
- package/dist/npm-publish-helper.js +117 -0
- package/dist/npm-token-cdp.js +113 -0
- package/dist/npm-token-create.js +135 -0
- package/dist/npm-token-finish.js +126 -0
- package/dist/playbook/engine.js +193 -0
- package/dist/playbook/index.js +4 -0
- package/dist/playbook/recorder.js +519 -0
- package/dist/playbook/runner.js +392 -0
- package/dist/playbook/store.js +166 -0
- package/dist/playbook/types.js +4 -0
- package/dist/scripts/codex-monitor-daemon.js +335 -0
- package/dist/scripts/supervisor-daemon.js +272 -0
- package/dist/scripts/worker-daemon.js +228 -0
- package/dist/src/agent/cli.js +82 -0
- package/dist/src/agent/loop.js +274 -0
- package/dist/src/config.js +25 -0
- package/dist/src/index.js +72 -0
- package/dist/src/jobs/manager.js +237 -0
- package/dist/src/jobs/runner.js +683 -0
- package/dist/src/jobs/store.js +102 -0
- package/dist/src/jobs/types.js +30 -0
- package/dist/src/jobs/worker.js +97 -0
- package/dist/src/logging/timeline-logger.js +45 -0
- package/dist/src/mcp/mcp-stdio-server.js +464 -0
- package/dist/src/mcp/server.js +363 -0
- package/dist/src/mcp-entry.js +60 -0
- package/dist/src/memory/recall.js +170 -0
- package/dist/src/memory/research.js +104 -0
- package/dist/src/memory/seeds.js +101 -0
- package/dist/src/memory/service.js +421 -0
- package/dist/src/memory/session.js +169 -0
- package/dist/src/memory/store.js +422 -0
- package/dist/src/memory/types.js +17 -0
- package/dist/src/monitor/codex-monitor.js +382 -0
- package/dist/src/monitor/task-queue.js +97 -0
- package/dist/src/monitor/types.js +62 -0
- package/dist/src/native/bridge-client.js +190 -0
- package/dist/src/native/macos-bridge-client.js +21 -0
- package/dist/src/playbook/engine.js +201 -0
- package/dist/src/playbook/index.js +20 -0
- package/dist/src/playbook/recorder.js +535 -0
- package/dist/src/playbook/runner.js +408 -0
- package/dist/src/playbook/store.js +183 -0
- package/dist/src/playbook/types.js +17 -0
- package/dist/src/runtime/accessibility-adapter.js +393 -0
- package/dist/src/runtime/app-adapter.js +64 -0
- package/dist/src/runtime/applescript-adapter.js +299 -0
- package/dist/src/runtime/ax-role-map.js +96 -0
- package/dist/src/runtime/browser-adapter.js +52 -0
- package/dist/src/runtime/cdp-chrome-adapter.js +521 -0
- package/dist/src/runtime/composite-adapter.js +221 -0
- package/dist/src/runtime/execution-contract.js +159 -0
- package/dist/src/runtime/executor.js +266 -0
- package/dist/src/runtime/locator-cache.js +28 -0
- package/dist/src/runtime/planning-loop.js +63 -0
- package/dist/src/runtime/service.js +388 -0
- package/dist/src/runtime/session-manager.js +60 -0
- package/dist/src/runtime/state-observer.js +121 -0
- package/dist/src/runtime/vision-adapter.js +224 -0
- package/dist/src/supervisor/locks.js +186 -0
- package/dist/src/supervisor/supervisor.js +403 -0
- package/dist/src/supervisor/types.js +30 -0
- package/dist/src/test-mcp-protocol.js +154 -0
- package/dist/src/types.js +17 -0
- package/dist/src/util/atomic-write.js +118 -0
- package/package.json +12 -9
|
@@ -0,0 +1,221 @@
|
|
|
1
|
+
// Copyright (C) 2025 Clazro Technology Private Limited
|
|
2
|
+
// SPDX-License-Identifier: AGPL-3.0-only
|
|
3
|
+
//
|
|
4
|
+
// This file is part of ScreenHand.
|
|
5
|
+
//
|
|
6
|
+
// ScreenHand is free software: you can redistribute it and/or modify
|
|
7
|
+
// it under the terms of the GNU Affero General Public License as
|
|
8
|
+
// published by the Free Software Foundation, version 3.
|
|
9
|
+
//
|
|
10
|
+
// ScreenHand is distributed in the hope that it will be useful,
|
|
11
|
+
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
12
|
+
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
13
|
+
// GNU Affero General Public License for more details.
|
|
14
|
+
//
|
|
15
|
+
// You should have received a copy of the GNU Affero General Public License
|
|
16
|
+
// along with ScreenHand. If not, see <https://www.gnu.org/licenses/>.
|
|
17
|
+
import { AccessibilityAdapter } from "./accessibility-adapter.js";
|
|
18
|
+
import { AppleScriptAdapter } from "./applescript-adapter.js";
|
|
19
|
+
import { CdpChromeAdapter } from "./cdp-chrome-adapter.js";
|
|
20
|
+
import { VisionAdapter } from "./vision-adapter.js";
|
|
21
|
+
/** macOS bundle IDs routed to CDP. */
|
|
22
|
+
const BROWSER_BUNDLES = new Set([
|
|
23
|
+
"com.google.Chrome",
|
|
24
|
+
"com.google.Chrome.canary",
|
|
25
|
+
"com.brave.Browser",
|
|
26
|
+
"com.microsoft.edgemac",
|
|
27
|
+
"com.vivaldi.Vivaldi",
|
|
28
|
+
"org.chromium.Chromium",
|
|
29
|
+
]);
|
|
30
|
+
/** Windows process names routed to CDP. */
|
|
31
|
+
const BROWSER_PROCESS_NAMES = new Set([
|
|
32
|
+
"chrome",
|
|
33
|
+
"chrome.exe",
|
|
34
|
+
"brave",
|
|
35
|
+
"brave.exe",
|
|
36
|
+
"msedge",
|
|
37
|
+
"msedge.exe",
|
|
38
|
+
"vivaldi",
|
|
39
|
+
"vivaldi.exe",
|
|
40
|
+
"chromium",
|
|
41
|
+
"chromium.exe",
|
|
42
|
+
]);
|
|
43
|
+
const isWindows = process.platform === "win32";
|
|
44
|
+
/**
|
|
45
|
+
* Composite adapter that auto-selects the best adapter per app:
|
|
46
|
+
* - Chromium browsers → CDP
|
|
47
|
+
* - Scriptable apps → AppleScript (with AX fallback)
|
|
48
|
+
* - Default → Accessibility
|
|
49
|
+
* - Fallback → Vision (if AX locate fails)
|
|
50
|
+
*/
|
|
51
|
+
export class CompositeAdapter {
|
|
52
|
+
bridge;
|
|
53
|
+
cdp;
|
|
54
|
+
accessibility;
|
|
55
|
+
applescript;
|
|
56
|
+
vision;
|
|
57
|
+
sessionRouting = new Map();
|
|
58
|
+
constructor(bridge, cdpOptions) {
|
|
59
|
+
this.bridge = bridge;
|
|
60
|
+
this.cdp = new CdpChromeAdapter(cdpOptions);
|
|
61
|
+
this.accessibility = new AccessibilityAdapter(bridge);
|
|
62
|
+
this.applescript = new AppleScriptAdapter();
|
|
63
|
+
this.vision = new VisionAdapter(bridge);
|
|
64
|
+
}
|
|
65
|
+
async attach(profile, reuseSessionId) {
|
|
66
|
+
// Default to accessibility adapter; routing is set per-session when app is known
|
|
67
|
+
const info = await this.accessibility.attach(profile, reuseSessionId);
|
|
68
|
+
this.sessionRouting.set(info.sessionId, {
|
|
69
|
+
adapter: this.accessibility,
|
|
70
|
+
adapterName: "accessibility",
|
|
71
|
+
});
|
|
72
|
+
// Override adapterType
|
|
73
|
+
return { ...info, adapterType: "composite" };
|
|
74
|
+
}
|
|
75
|
+
async getAppContext(sessionId) {
|
|
76
|
+
return this.getAdapter(sessionId).getAppContext(sessionId);
|
|
77
|
+
}
|
|
78
|
+
async getPageMeta(sessionId) {
|
|
79
|
+
return this.getAdapter(sessionId).getPageMeta(sessionId);
|
|
80
|
+
}
|
|
81
|
+
async navigate(sessionId, url, timeoutMs) {
|
|
82
|
+
return this.getAdapter(sessionId).navigate(sessionId, url, timeoutMs);
|
|
83
|
+
}
|
|
84
|
+
async locate(sessionId, target, timeoutMs) {
|
|
85
|
+
const primary = this.getAdapter(sessionId);
|
|
86
|
+
const result = await primary.locate(sessionId, target, timeoutMs);
|
|
87
|
+
if (result)
|
|
88
|
+
return result;
|
|
89
|
+
// Fallback to vision if primary (accessibility/applescript) fails
|
|
90
|
+
const routing = this.sessionRouting.get(sessionId);
|
|
91
|
+
if (routing && routing.adapterName !== "vision" && routing.adapterName !== "cdp") {
|
|
92
|
+
try {
|
|
93
|
+
return await this.vision.locate(sessionId, target, Math.min(timeoutMs, 2000));
|
|
94
|
+
}
|
|
95
|
+
catch {
|
|
96
|
+
// Vision also failed
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
return null;
|
|
100
|
+
}
|
|
101
|
+
async click(sessionId, element) {
|
|
102
|
+
// If the element was found by vision (coordinates-based), use vision adapter for click
|
|
103
|
+
if (element.locatorUsed.startsWith("vision:") && element.coordinates) {
|
|
104
|
+
return this.vision.click(sessionId, element);
|
|
105
|
+
}
|
|
106
|
+
return this.getAdapter(sessionId).click(sessionId, element);
|
|
107
|
+
}
|
|
108
|
+
async setValue(sessionId, element, text, clear) {
|
|
109
|
+
return this.getAdapter(sessionId).setValue(sessionId, element, text, clear);
|
|
110
|
+
}
|
|
111
|
+
async getValue(sessionId, element) {
|
|
112
|
+
return this.getAdapter(sessionId).getValue(sessionId, element);
|
|
113
|
+
}
|
|
114
|
+
async waitFor(sessionId, condition, timeoutMs) {
|
|
115
|
+
return this.getAdapter(sessionId).waitFor(sessionId, condition, timeoutMs);
|
|
116
|
+
}
|
|
117
|
+
async extract(sessionId, target, format) {
|
|
118
|
+
return this.getAdapter(sessionId).extract(sessionId, target, format);
|
|
119
|
+
}
|
|
120
|
+
async screenshot(sessionId, region) {
|
|
121
|
+
return this.getAdapter(sessionId).screenshot(sessionId, region);
|
|
122
|
+
}
|
|
123
|
+
// ── Desktop methods (delegate to the best adapter that supports them) ──
|
|
124
|
+
async launchApp(sessionId, bundleId) {
|
|
125
|
+
// Route to the appropriate adapter based on the app being launched
|
|
126
|
+
this.routeSession(sessionId, bundleId);
|
|
127
|
+
const adapter = this.getAdapter(sessionId);
|
|
128
|
+
if (adapter.launchApp) {
|
|
129
|
+
return adapter.launchApp(sessionId, bundleId);
|
|
130
|
+
}
|
|
131
|
+
// Fallback to accessibility
|
|
132
|
+
return this.accessibility.launchApp(sessionId, bundleId);
|
|
133
|
+
}
|
|
134
|
+
async focusApp(sessionId, bundleId) {
|
|
135
|
+
this.routeSession(sessionId, bundleId);
|
|
136
|
+
const adapter = this.getAdapter(sessionId);
|
|
137
|
+
if (adapter.focusApp) {
|
|
138
|
+
return adapter.focusApp(sessionId, bundleId);
|
|
139
|
+
}
|
|
140
|
+
return this.accessibility.focusApp(sessionId, bundleId);
|
|
141
|
+
}
|
|
142
|
+
async listApps(sessionId) {
|
|
143
|
+
return this.accessibility.listApps(sessionId);
|
|
144
|
+
}
|
|
145
|
+
async listWindows(sessionId) {
|
|
146
|
+
return this.accessibility.listWindows(sessionId);
|
|
147
|
+
}
|
|
148
|
+
async menuClick(sessionId, menuPath) {
|
|
149
|
+
const adapter = this.getAdapter(sessionId);
|
|
150
|
+
if (adapter.menuClick) {
|
|
151
|
+
return adapter.menuClick(sessionId, menuPath);
|
|
152
|
+
}
|
|
153
|
+
return this.accessibility.menuClick(sessionId, menuPath);
|
|
154
|
+
}
|
|
155
|
+
async keyCombo(sessionId, keys) {
|
|
156
|
+
const adapter = this.getAdapter(sessionId);
|
|
157
|
+
if (adapter.keyCombo) {
|
|
158
|
+
return adapter.keyCombo(sessionId, keys);
|
|
159
|
+
}
|
|
160
|
+
return this.accessibility.keyCombo(sessionId, keys);
|
|
161
|
+
}
|
|
162
|
+
async elementTree(sessionId, maxDepth, root) {
|
|
163
|
+
const adapter = this.getAdapter(sessionId);
|
|
164
|
+
if (adapter.elementTree) {
|
|
165
|
+
return adapter.elementTree(sessionId, maxDepth, root);
|
|
166
|
+
}
|
|
167
|
+
return this.accessibility.elementTree(sessionId, maxDepth, root);
|
|
168
|
+
}
|
|
169
|
+
async drag(sessionId, from, to) {
|
|
170
|
+
const adapter = this.getAdapter(sessionId);
|
|
171
|
+
if (adapter.drag) {
|
|
172
|
+
return adapter.drag(sessionId, from, to);
|
|
173
|
+
}
|
|
174
|
+
return this.accessibility.drag(sessionId, from, to);
|
|
175
|
+
}
|
|
176
|
+
async scroll(sessionId, direction, amount, element) {
|
|
177
|
+
const adapter = this.getAdapter(sessionId);
|
|
178
|
+
if (adapter.scroll) {
|
|
179
|
+
return adapter.scroll(sessionId, direction, amount, element);
|
|
180
|
+
}
|
|
181
|
+
return this.accessibility.scroll(sessionId, direction, amount, element);
|
|
182
|
+
}
|
|
183
|
+
// ── Routing logic ──
|
|
184
|
+
routeSession(sessionId, bundleId) {
|
|
185
|
+
let adapter;
|
|
186
|
+
let adapterName;
|
|
187
|
+
if (isWindows) {
|
|
188
|
+
// On Windows: route by process name
|
|
189
|
+
const processName = bundleId.toLowerCase().replace(/\.exe$/, "");
|
|
190
|
+
if (BROWSER_PROCESS_NAMES.has(processName) || BROWSER_PROCESS_NAMES.has(bundleId.toLowerCase())) {
|
|
191
|
+
adapter = this.cdp;
|
|
192
|
+
adapterName = "cdp";
|
|
193
|
+
}
|
|
194
|
+
else {
|
|
195
|
+
// No AppleScript on Windows — always use accessibility (UI Automation)
|
|
196
|
+
adapter = this.accessibility;
|
|
197
|
+
adapterName = "accessibility";
|
|
198
|
+
}
|
|
199
|
+
}
|
|
200
|
+
else {
|
|
201
|
+
// On macOS: route by bundle ID
|
|
202
|
+
if (BROWSER_BUNDLES.has(bundleId)) {
|
|
203
|
+
adapter = this.cdp;
|
|
204
|
+
adapterName = "cdp";
|
|
205
|
+
}
|
|
206
|
+
else if (AppleScriptAdapter.isScriptable(bundleId)) {
|
|
207
|
+
adapter = this.applescript;
|
|
208
|
+
adapterName = "applescript";
|
|
209
|
+
}
|
|
210
|
+
else {
|
|
211
|
+
adapter = this.accessibility;
|
|
212
|
+
adapterName = "accessibility";
|
|
213
|
+
}
|
|
214
|
+
}
|
|
215
|
+
this.sessionRouting.set(sessionId, { adapter, adapterName });
|
|
216
|
+
}
|
|
217
|
+
getAdapter(sessionId) {
|
|
218
|
+
const routing = this.sessionRouting.get(sessionId);
|
|
219
|
+
return routing?.adapter ?? this.accessibility;
|
|
220
|
+
}
|
|
221
|
+
}
|
|
@@ -0,0 +1,159 @@
|
|
|
1
|
+
// Copyright (C) 2025 Clazro Technology Private Limited
|
|
2
|
+
// SPDX-License-Identifier: AGPL-3.0-only
|
|
3
|
+
//
|
|
4
|
+
// This file is part of ScreenHand.
|
|
5
|
+
//
|
|
6
|
+
// ScreenHand is free software: you can redistribute it and/or modify
|
|
7
|
+
// it under the terms of the GNU Affero General Public License as
|
|
8
|
+
// published by the Free Software Foundation, version 3.
|
|
9
|
+
//
|
|
10
|
+
// ScreenHand is distributed in the hope that it will be useful,
|
|
11
|
+
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
12
|
+
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
13
|
+
// GNU Affero General Public License for more details.
|
|
14
|
+
//
|
|
15
|
+
// You should have received a copy of the GNU Affero General Public License
|
|
16
|
+
// along with ScreenHand. If not, see <https://www.gnu.org/licenses/>.
|
|
17
|
+
/**
|
|
18
|
+
* Canonical execution contract for ScreenHand.
|
|
19
|
+
*
|
|
20
|
+
* Defines the fallback chain of execution methods, the result contract
|
|
21
|
+
* every action must satisfy, execution planning, retry policy, and the
|
|
22
|
+
* fallback runner that ties them together.
|
|
23
|
+
*/
|
|
24
|
+
// ── 1. Fallback Chain ──────────────────────────────────────────────────
|
|
25
|
+
/** Ordered list of execution methods, from fastest/most reliable to slowest/least reliable */
|
|
26
|
+
const EXECUTION_METHODS = ["ax", "cdp", "ocr", "coordinates"];
|
|
27
|
+
const METHOD_CAPABILITIES = {
|
|
28
|
+
ax: {
|
|
29
|
+
method: "ax",
|
|
30
|
+
canClick: true,
|
|
31
|
+
canType: true,
|
|
32
|
+
canRead: true,
|
|
33
|
+
canLocate: true,
|
|
34
|
+
canSelect: true,
|
|
35
|
+
canScroll: true,
|
|
36
|
+
avgLatencyMs: 50,
|
|
37
|
+
requiresBridge: true,
|
|
38
|
+
requiresCDP: false,
|
|
39
|
+
},
|
|
40
|
+
cdp: {
|
|
41
|
+
method: "cdp",
|
|
42
|
+
canClick: true,
|
|
43
|
+
canType: true,
|
|
44
|
+
canRead: true,
|
|
45
|
+
canLocate: true,
|
|
46
|
+
canSelect: true,
|
|
47
|
+
canScroll: true,
|
|
48
|
+
avgLatencyMs: 10,
|
|
49
|
+
requiresBridge: false,
|
|
50
|
+
requiresCDP: true,
|
|
51
|
+
},
|
|
52
|
+
ocr: {
|
|
53
|
+
method: "ocr",
|
|
54
|
+
canClick: false,
|
|
55
|
+
canType: false,
|
|
56
|
+
canRead: true,
|
|
57
|
+
canLocate: true,
|
|
58
|
+
canSelect: false,
|
|
59
|
+
canScroll: false,
|
|
60
|
+
avgLatencyMs: 600,
|
|
61
|
+
requiresBridge: true,
|
|
62
|
+
requiresCDP: false,
|
|
63
|
+
},
|
|
64
|
+
coordinates: {
|
|
65
|
+
method: "coordinates",
|
|
66
|
+
canClick: true,
|
|
67
|
+
canType: false,
|
|
68
|
+
canRead: false,
|
|
69
|
+
canLocate: false,
|
|
70
|
+
canSelect: false,
|
|
71
|
+
canScroll: true,
|
|
72
|
+
avgLatencyMs: 50,
|
|
73
|
+
requiresBridge: true,
|
|
74
|
+
requiresCDP: false,
|
|
75
|
+
},
|
|
76
|
+
};
|
|
77
|
+
const ACTION_TO_CAPABILITY = {
|
|
78
|
+
click: "canClick",
|
|
79
|
+
type: "canType",
|
|
80
|
+
read: "canRead",
|
|
81
|
+
locate: "canLocate",
|
|
82
|
+
select: "canSelect",
|
|
83
|
+
scroll: "canScroll",
|
|
84
|
+
};
|
|
85
|
+
/**
|
|
86
|
+
* Given an action type and available capabilities, returns the ordered
|
|
87
|
+
* list of methods to try.
|
|
88
|
+
*
|
|
89
|
+
* Filters EXECUTION_METHODS to only those that:
|
|
90
|
+
* 1. Support the requested action
|
|
91
|
+
* 2. Have their infrastructure requirements met
|
|
92
|
+
* Returns in canonical order (ax -> cdp -> ocr -> coordinates).
|
|
93
|
+
*/
|
|
94
|
+
function planExecution(action, available) {
|
|
95
|
+
const capKey = ACTION_TO_CAPABILITY[action];
|
|
96
|
+
return EXECUTION_METHODS.filter((method) => {
|
|
97
|
+
const cap = METHOD_CAPABILITIES[method];
|
|
98
|
+
// Must support the requested action
|
|
99
|
+
if (!cap[capKey])
|
|
100
|
+
return false;
|
|
101
|
+
// Must have required infrastructure
|
|
102
|
+
if (cap.requiresBridge && !available.hasBridge)
|
|
103
|
+
return false;
|
|
104
|
+
if (cap.requiresCDP && !available.hasCDP)
|
|
105
|
+
return false;
|
|
106
|
+
return true;
|
|
107
|
+
});
|
|
108
|
+
}
|
|
109
|
+
const DEFAULT_RETRY_POLICY = {
|
|
110
|
+
maxRetriesPerMethod: 2,
|
|
111
|
+
maxTotalRetries: 5,
|
|
112
|
+
delayBetweenRetriesMs: 500,
|
|
113
|
+
escalateAfter: 3,
|
|
114
|
+
};
|
|
115
|
+
// ── 5. Execution Runner ────────────────────────────────────────────────
|
|
116
|
+
/**
|
|
117
|
+
* Returns a promise that resolves after the given number of milliseconds.
|
|
118
|
+
*/
|
|
119
|
+
function delay(ms) {
|
|
120
|
+
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
121
|
+
}
|
|
122
|
+
/**
|
|
123
|
+
* Runs an action through the fallback chain.
|
|
124
|
+
* Tries each method in order, with retries per method.
|
|
125
|
+
* Returns the result from whichever method succeeded (or the last failure).
|
|
126
|
+
*/
|
|
127
|
+
async function executeWithFallback(action, plan, policy, executor) {
|
|
128
|
+
let totalRetries = 0;
|
|
129
|
+
let lastResult = null;
|
|
130
|
+
let previousMethod = null;
|
|
131
|
+
for (const method of plan) {
|
|
132
|
+
for (let attempt = 0; attempt <= policy.maxRetriesPerMethod; attempt++) {
|
|
133
|
+
if (totalRetries >= policy.maxTotalRetries) {
|
|
134
|
+
// Exhausted total retry budget — return whatever we have
|
|
135
|
+
return lastResult;
|
|
136
|
+
}
|
|
137
|
+
// Delay between retries (not before the very first attempt)
|
|
138
|
+
if (totalRetries > 0) {
|
|
139
|
+
await delay(policy.delayBetweenRetriesMs);
|
|
140
|
+
}
|
|
141
|
+
const result = await executor(method, attempt);
|
|
142
|
+
// Stamp fallbackFrom if we fell through from a higher-priority method
|
|
143
|
+
if (previousMethod !== null && result.fallbackFrom === null) {
|
|
144
|
+
result.fallbackFrom = previousMethod;
|
|
145
|
+
}
|
|
146
|
+
lastResult = result;
|
|
147
|
+
if (result.ok) {
|
|
148
|
+
return result;
|
|
149
|
+
}
|
|
150
|
+
totalRetries++;
|
|
151
|
+
}
|
|
152
|
+
// This method is exhausted — record it so the next method knows
|
|
153
|
+
previousMethod = method;
|
|
154
|
+
}
|
|
155
|
+
// All methods exhausted
|
|
156
|
+
return lastResult;
|
|
157
|
+
}
|
|
158
|
+
// ── Exports ────────────────────────────────────────────────────────────
|
|
159
|
+
export { EXECUTION_METHODS, METHOD_CAPABILITIES, DEFAULT_RETRY_POLICY, planExecution, executeWithFallback, };
|
|
@@ -0,0 +1,266 @@
|
|
|
1
|
+
// Copyright (C) 2025 Clazro Technology Private Limited
|
|
2
|
+
// SPDX-License-Identifier: AGPL-3.0-only
|
|
3
|
+
//
|
|
4
|
+
// This file is part of ScreenHand.
|
|
5
|
+
//
|
|
6
|
+
// ScreenHand is free software: you can redistribute it and/or modify
|
|
7
|
+
// it under the terms of the GNU Affero General Public License as
|
|
8
|
+
// published by the Free Software Foundation, version 3.
|
|
9
|
+
//
|
|
10
|
+
// ScreenHand is distributed in the hope that it will be useful,
|
|
11
|
+
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
12
|
+
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
13
|
+
// GNU Affero General Public License for more details.
|
|
14
|
+
//
|
|
15
|
+
// You should have received a copy of the GNU Affero General Public License
|
|
16
|
+
// along with ScreenHand. If not, see <https://www.gnu.org/licenses/>.
|
|
17
|
+
import { DEFAULT_ACTION_BUDGET } from "../config.js";
|
|
18
|
+
export class Executor {
|
|
19
|
+
adapter;
|
|
20
|
+
cache;
|
|
21
|
+
logger;
|
|
22
|
+
constructor(adapter, cache, logger) {
|
|
23
|
+
this.adapter = adapter;
|
|
24
|
+
this.cache = cache;
|
|
25
|
+
this.logger = logger;
|
|
26
|
+
}
|
|
27
|
+
async press(input) {
|
|
28
|
+
const telemetry = this.logger.start("press", input.sessionId);
|
|
29
|
+
const budget = this.resolveBudget(input.budget);
|
|
30
|
+
const attempts = [];
|
|
31
|
+
let lastError;
|
|
32
|
+
for (let retry = 0; retry <= budget.maxRetries; retry += 1) {
|
|
33
|
+
telemetry.retries = retry;
|
|
34
|
+
try {
|
|
35
|
+
const siteKey = await this.currentSiteKey(input.sessionId);
|
|
36
|
+
const actionKey = this.targetToKey(input.target);
|
|
37
|
+
const locateResult = await this.locateWithBudget(input.sessionId, siteKey, actionKey, input.target, budget.locateMs, retry > 0);
|
|
38
|
+
attempts.push(...locateResult.attempts);
|
|
39
|
+
telemetry.locateMs += locateResult.attempts.reduce((sum, attempt) => sum + attempt.timeoutMs, 0);
|
|
40
|
+
await this.timed(budget.actMs, async () => {
|
|
41
|
+
await this.adapter.click(input.sessionId, locateResult.element);
|
|
42
|
+
}, "ACTION_FAILED");
|
|
43
|
+
telemetry.actMs += budget.actMs;
|
|
44
|
+
if (input.verify) {
|
|
45
|
+
const verified = await this.timed(budget.verifyMs, () => this.adapter.waitFor(input.sessionId, input.verify, budget.verifyMs), "VERIFY_FAILED");
|
|
46
|
+
telemetry.verifyMs += budget.verifyMs;
|
|
47
|
+
if (!verified) {
|
|
48
|
+
throw this.runtimeError("VERIFY_FAILED", "Verification condition not met.");
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
const page = await this.adapter.getPageMeta(input.sessionId);
|
|
52
|
+
return this.success(page, telemetry);
|
|
53
|
+
}
|
|
54
|
+
catch (error) {
|
|
55
|
+
lastError = this.asRuntimeError(error, attempts);
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
return this.failure(lastError ??
|
|
59
|
+
this.runtimeError("ACTION_FAILED", "Press failed with unknown runtime error."), telemetry);
|
|
60
|
+
}
|
|
61
|
+
async typeInto(input) {
|
|
62
|
+
const telemetry = this.logger.start("type_into", input.sessionId);
|
|
63
|
+
const budget = this.resolveBudget(input.budget);
|
|
64
|
+
const attempts = [];
|
|
65
|
+
try {
|
|
66
|
+
const siteKey = await this.currentSiteKey(input.sessionId);
|
|
67
|
+
const actionKey = `type:${this.targetToKey(input.target)}`;
|
|
68
|
+
const locateResult = await this.locateWithBudget(input.sessionId, siteKey, actionKey, input.target, budget.locateMs, false);
|
|
69
|
+
attempts.push(...locateResult.attempts);
|
|
70
|
+
telemetry.locateMs += budget.locateMs;
|
|
71
|
+
await this.timed(budget.actMs, async () => {
|
|
72
|
+
await this.adapter.setValue(input.sessionId, locateResult.element, input.text, input.clear ?? true);
|
|
73
|
+
}, "ACTION_FAILED");
|
|
74
|
+
telemetry.actMs += budget.actMs;
|
|
75
|
+
if (input.verifyValue ?? true) {
|
|
76
|
+
const read = await this.adapter.getValue(input.sessionId, locateResult.element);
|
|
77
|
+
if (read !== input.text) {
|
|
78
|
+
throw this.runtimeError("VERIFY_FAILED", `Field value mismatch. Expected "${input.text}", got "${read}".`);
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
if (input.verify) {
|
|
82
|
+
const verified = await this.timed(budget.verifyMs, () => this.adapter.waitFor(input.sessionId, input.verify, budget.verifyMs), "VERIFY_FAILED");
|
|
83
|
+
telemetry.verifyMs += budget.verifyMs;
|
|
84
|
+
if (!verified) {
|
|
85
|
+
throw this.runtimeError("VERIFY_FAILED", "Verification condition not met.");
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
const page = await this.adapter.getPageMeta(input.sessionId);
|
|
89
|
+
return this.success(page, telemetry);
|
|
90
|
+
}
|
|
91
|
+
catch (error) {
|
|
92
|
+
return this.failure(this.asRuntimeError(error, attempts), telemetry);
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
async locateWithBudget(sessionId, siteKey, actionKey, target, locateBudgetMs, skipCache) {
|
|
96
|
+
const attempts = [];
|
|
97
|
+
const strategyBudget = Math.max(50, Math.floor(locateBudgetMs / 3));
|
|
98
|
+
if (!skipCache) {
|
|
99
|
+
const cachedLocator = this.cache.get(siteKey, actionKey);
|
|
100
|
+
if (cachedLocator) {
|
|
101
|
+
const cachedTarget = { type: "selector", value: cachedLocator };
|
|
102
|
+
const match = await this.tryLocate(sessionId, "cache", cachedTarget, strategyBudget, attempts);
|
|
103
|
+
if (match) {
|
|
104
|
+
return { element: match, attempts };
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
const strategies = this.expandTargetStrategies(target);
|
|
109
|
+
for (const strategy of strategies) {
|
|
110
|
+
const match = await this.tryLocate(sessionId, strategy.strategy, strategy.target, strategyBudget, attempts);
|
|
111
|
+
if (match) {
|
|
112
|
+
if (strategy.target.type === "selector") {
|
|
113
|
+
this.cache.set(siteKey, actionKey, strategy.target.value);
|
|
114
|
+
}
|
|
115
|
+
return { element: match, attempts };
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
throw this.runtimeError("LOCATE_FAILED", "Could not locate target.", attempts);
|
|
119
|
+
}
|
|
120
|
+
async tryLocate(sessionId, strategyName, target, timeoutMs, attempts) {
|
|
121
|
+
try {
|
|
122
|
+
const found = await this.timed(timeoutMs, () => this.adapter.locate(sessionId, target, timeoutMs), "LOCATE_FAILED");
|
|
123
|
+
attempts.push({
|
|
124
|
+
strategy: strategyName,
|
|
125
|
+
target: this.targetToKey(target),
|
|
126
|
+
timeoutMs,
|
|
127
|
+
matched: Boolean(found),
|
|
128
|
+
});
|
|
129
|
+
return found;
|
|
130
|
+
}
|
|
131
|
+
catch (error) {
|
|
132
|
+
attempts.push({
|
|
133
|
+
strategy: strategyName,
|
|
134
|
+
target: this.targetToKey(target),
|
|
135
|
+
timeoutMs,
|
|
136
|
+
matched: false,
|
|
137
|
+
reason: error instanceof Error ? error.message : "Unknown locate error",
|
|
138
|
+
});
|
|
139
|
+
return null;
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
expandTargetStrategies(target) {
|
|
143
|
+
if (target.type === "selector") {
|
|
144
|
+
return [{ strategy: "selector", target }];
|
|
145
|
+
}
|
|
146
|
+
if (target.type === "text") {
|
|
147
|
+
return [
|
|
148
|
+
{ strategy: "text_exact", target: { type: "text", value: target.value, exact: true } },
|
|
149
|
+
{ strategy: "text_fuzzy", target: { type: "text", value: target.value, exact: false } },
|
|
150
|
+
];
|
|
151
|
+
}
|
|
152
|
+
if (target.type === "role") {
|
|
153
|
+
return [
|
|
154
|
+
{ strategy: "role_name_exact", target: { type: "role", role: target.role, name: target.name, exact: true } },
|
|
155
|
+
{ strategy: "role_name_fuzzy", target: { type: "role", role: target.role, name: target.name, exact: false } },
|
|
156
|
+
{ strategy: "fallback_text", target: { type: "text", value: target.name } },
|
|
157
|
+
];
|
|
158
|
+
}
|
|
159
|
+
// For new target types (ax_path, ax_attribute, coordinates, image), pass through directly
|
|
160
|
+
return [{ strategy: target.type, target }];
|
|
161
|
+
}
|
|
162
|
+
async currentSiteKey(sessionId) {
|
|
163
|
+
// Try app context first for desktop apps, fall back to page URL for browsers
|
|
164
|
+
try {
|
|
165
|
+
const ctx = await this.adapter.getAppContext(sessionId);
|
|
166
|
+
if (ctx.url) {
|
|
167
|
+
try {
|
|
168
|
+
return new URL(ctx.url).host || ctx.bundleId;
|
|
169
|
+
}
|
|
170
|
+
catch {
|
|
171
|
+
// URL parsing failed, use bundleId + windowTitle
|
|
172
|
+
}
|
|
173
|
+
}
|
|
174
|
+
return `${ctx.bundleId}::${ctx.windowTitle}`;
|
|
175
|
+
}
|
|
176
|
+
catch {
|
|
177
|
+
// Fallback to page meta
|
|
178
|
+
try {
|
|
179
|
+
const page = await this.adapter.getPageMeta(sessionId);
|
|
180
|
+
return new URL(page.url).host || "unknown-site";
|
|
181
|
+
}
|
|
182
|
+
catch {
|
|
183
|
+
return "unknown-site";
|
|
184
|
+
}
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
resolveBudget(input) {
|
|
188
|
+
return {
|
|
189
|
+
...DEFAULT_ACTION_BUDGET,
|
|
190
|
+
...input,
|
|
191
|
+
};
|
|
192
|
+
}
|
|
193
|
+
async timed(timeoutMs, operation, errorCode) {
|
|
194
|
+
const timeout = new Promise((_, reject) => {
|
|
195
|
+
setTimeout(() => {
|
|
196
|
+
reject(this.runtimeError("TIMEOUT", `Timed out after ${timeoutMs}ms.`));
|
|
197
|
+
}, timeoutMs);
|
|
198
|
+
});
|
|
199
|
+
try {
|
|
200
|
+
return await Promise.race([operation(), timeout]);
|
|
201
|
+
}
|
|
202
|
+
catch (error) {
|
|
203
|
+
if (this.isRuntimeError(error)) {
|
|
204
|
+
throw error;
|
|
205
|
+
}
|
|
206
|
+
throw this.runtimeError(errorCode, error instanceof Error ? error.message : "Unexpected runtime error");
|
|
207
|
+
}
|
|
208
|
+
}
|
|
209
|
+
targetToKey(target) {
|
|
210
|
+
switch (target.type) {
|
|
211
|
+
case "selector":
|
|
212
|
+
return `selector:${target.value}`;
|
|
213
|
+
case "text":
|
|
214
|
+
return `text:${target.value}`;
|
|
215
|
+
case "role":
|
|
216
|
+
return `role:${target.role}|name:${target.name}`;
|
|
217
|
+
case "ax_path":
|
|
218
|
+
return `ax_path:${target.path.join("/")}`;
|
|
219
|
+
case "ax_attribute":
|
|
220
|
+
return `ax_attr:${target.attribute}=${target.value}`;
|
|
221
|
+
case "coordinates":
|
|
222
|
+
return `coords:${target.x},${target.y}`;
|
|
223
|
+
case "image":
|
|
224
|
+
return `image:${target.base64.slice(0, 20)}`;
|
|
225
|
+
}
|
|
226
|
+
}
|
|
227
|
+
success(data, telemetry) {
|
|
228
|
+
return {
|
|
229
|
+
ok: true,
|
|
230
|
+
data,
|
|
231
|
+
telemetry: this.logger.finish(telemetry, "success"),
|
|
232
|
+
};
|
|
233
|
+
}
|
|
234
|
+
failure(error, telemetry) {
|
|
235
|
+
return {
|
|
236
|
+
ok: false,
|
|
237
|
+
error,
|
|
238
|
+
telemetry: this.logger.finish(telemetry, "failed"),
|
|
239
|
+
};
|
|
240
|
+
}
|
|
241
|
+
runtimeError(code, message, attempts) {
|
|
242
|
+
const error = { code, message };
|
|
243
|
+
if (attempts && attempts.length > 0) {
|
|
244
|
+
error.attempts = attempts;
|
|
245
|
+
}
|
|
246
|
+
return error;
|
|
247
|
+
}
|
|
248
|
+
isRuntimeError(error) {
|
|
249
|
+
if (typeof error !== "object" || error === null) {
|
|
250
|
+
return false;
|
|
251
|
+
}
|
|
252
|
+
return "code" in error && "message" in error;
|
|
253
|
+
}
|
|
254
|
+
asRuntimeError(error, attempts) {
|
|
255
|
+
if (this.isRuntimeError(error)) {
|
|
256
|
+
if (error.attempts || !attempts || attempts.length === 0) {
|
|
257
|
+
return error;
|
|
258
|
+
}
|
|
259
|
+
return {
|
|
260
|
+
...error,
|
|
261
|
+
attempts,
|
|
262
|
+
};
|
|
263
|
+
}
|
|
264
|
+
return this.runtimeError("ACTION_FAILED", error instanceof Error ? error.message : "Unexpected runtime error", attempts);
|
|
265
|
+
}
|
|
266
|
+
}
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
// Copyright (C) 2025 Clazro Technology Private Limited
|
|
2
|
+
// SPDX-License-Identifier: AGPL-3.0-only
|
|
3
|
+
//
|
|
4
|
+
// This file is part of ScreenHand.
|
|
5
|
+
//
|
|
6
|
+
// ScreenHand is free software: you can redistribute it and/or modify
|
|
7
|
+
// it under the terms of the GNU Affero General Public License as
|
|
8
|
+
// published by the Free Software Foundation, version 3.
|
|
9
|
+
//
|
|
10
|
+
// ScreenHand is distributed in the hope that it will be useful,
|
|
11
|
+
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
12
|
+
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
13
|
+
// GNU Affero General Public License for more details.
|
|
14
|
+
//
|
|
15
|
+
// You should have received a copy of the GNU Affero General Public License
|
|
16
|
+
// along with ScreenHand. If not, see <https://www.gnu.org/licenses/>.
|
|
17
|
+
export class LocatorCache {
|
|
18
|
+
store = new Map();
|
|
19
|
+
get(siteKey, actionKey) {
|
|
20
|
+
return this.store.get(this.key(siteKey, actionKey));
|
|
21
|
+
}
|
|
22
|
+
set(siteKey, actionKey, locator) {
|
|
23
|
+
this.store.set(this.key(siteKey, actionKey), locator);
|
|
24
|
+
}
|
|
25
|
+
key(siteKey, actionKey) {
|
|
26
|
+
return `${siteKey}::${actionKey}`;
|
|
27
|
+
}
|
|
28
|
+
}
|