screenhand 0.2.0 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +165 -446
- package/bin/darwin-arm64/macos-bridge +0 -0
- package/dist/mcp-desktop.js +3615 -400
- package/dist/scripts/export-help-center.js +112 -0
- package/dist/scripts/marketing-loop.js +117 -0
- package/dist/scripts/observer-daemon.js +288 -0
- package/dist/scripts/orchestrator-daemon.js +399 -0
- package/dist/scripts/threads-campaign.js +208 -0
- package/dist/src/community/fetcher.js +109 -0
- package/dist/src/community/index.js +6 -0
- package/dist/src/community/publisher.js +191 -0
- package/dist/src/community/remote-api.js +121 -0
- package/dist/src/community/types.js +3 -0
- package/dist/src/community/validator.js +95 -0
- package/dist/src/context-tracker.js +489 -0
- package/dist/src/ingestion/coverage-auditor.js +233 -0
- package/dist/src/ingestion/doc-parser.js +164 -0
- package/dist/src/ingestion/index.js +8 -0
- package/dist/src/ingestion/menu-scanner.js +152 -0
- package/dist/src/ingestion/reference-merger.js +186 -0
- package/dist/src/ingestion/shortcut-extractor.js +180 -0
- package/dist/src/ingestion/tutorial-extractor.js +170 -0
- package/dist/src/ingestion/types.js +3 -0
- package/dist/src/jobs/manager.js +82 -14
- package/dist/src/jobs/runner.js +138 -15
- package/dist/src/learning/engine.js +356 -0
- package/dist/src/learning/index.js +9 -0
- package/dist/src/learning/locator-policy.js +120 -0
- package/dist/src/learning/pattern-policy.js +89 -0
- package/dist/src/learning/recovery-policy.js +116 -0
- package/dist/src/learning/sensor-policy.js +115 -0
- package/dist/src/learning/timing-model.js +204 -0
- package/dist/src/learning/topology-policy.js +90 -0
- package/dist/src/learning/types.js +9 -0
- package/dist/src/logging/timeline-logger.js +4 -1
- package/dist/src/memory/playbook-seeds.js +200 -0
- package/dist/src/memory/recall.js +60 -8
- package/dist/src/memory/service.js +30 -5
- package/dist/src/memory/store.js +34 -5
- package/dist/src/native/bridge-client.js +253 -31
- package/dist/src/observer/state.js +199 -0
- package/dist/src/observer/types.js +43 -0
- package/dist/src/orchestrator/state.js +68 -0
- package/dist/src/orchestrator/types.js +22 -0
- package/dist/src/perception/ax-source.js +162 -0
- package/dist/src/perception/cdp-source.js +162 -0
- package/dist/src/perception/coordinator.js +771 -0
- package/dist/src/perception/frame-differ.js +287 -0
- package/dist/src/perception/index.js +22 -0
- package/dist/src/perception/manager.js +199 -0
- package/dist/src/perception/types.js +47 -0
- package/dist/src/perception/vision-source.js +399 -0
- package/dist/src/planner/deterministic.js +298 -0
- package/dist/src/planner/executor.js +870 -0
- package/dist/src/planner/goal-store.js +92 -0
- package/dist/src/planner/index.js +21 -0
- package/dist/src/planner/planner.js +520 -0
- package/dist/src/planner/tool-registry.js +71 -0
- package/dist/src/planner/types.js +22 -0
- package/dist/src/platform/explorer.js +213 -0
- package/dist/src/platform/help-center-markdown.js +527 -0
- package/dist/src/platform/learner.js +257 -0
- package/dist/src/playbook/engine.js +296 -11
- package/dist/src/playbook/mcp-recorder.js +204 -0
- package/dist/src/playbook/recorder.js +3 -2
- package/dist/src/playbook/runner.js +1 -1
- package/dist/src/playbook/store.js +139 -10
- package/dist/src/recovery/detectors.js +156 -0
- package/dist/src/recovery/engine.js +327 -0
- package/dist/src/recovery/index.js +20 -0
- package/dist/src/recovery/strategies.js +274 -0
- package/dist/src/recovery/types.js +20 -0
- package/dist/src/runtime/accessibility-adapter.js +55 -18
- package/dist/src/runtime/applescript-adapter.js +8 -2
- package/dist/src/runtime/cdp-chrome-adapter.js +1 -1
- package/dist/src/runtime/executor.js +23 -3
- package/dist/src/runtime/locator-cache.js +24 -2
- package/dist/src/runtime/service.js +59 -15
- package/dist/src/runtime/session-manager.js +4 -1
- package/dist/src/runtime/vision-adapter.js +2 -1
- package/dist/src/state/app-map-types.js +72 -0
- package/dist/src/state/app-map.js +1974 -0
- package/dist/src/state/entity-tracker.js +108 -0
- package/dist/src/state/fusion.js +96 -0
- package/dist/src/state/index.js +21 -0
- package/dist/src/state/ladder-generator.js +236 -0
- package/dist/src/state/persistence.js +156 -0
- package/dist/src/state/types.js +17 -0
- package/dist/src/state/world-model.js +1456 -0
- package/dist/src/util/atomic-write.js +19 -4
- package/dist/src/util/sanitize.js +146 -0
- package/dist-app-maps/com.figma.Desktop.json +959 -0
- package/dist-app-maps/com.hnc.Discord.json +1146 -0
- package/dist-app-maps/notion.id.json +2831 -0
- package/dist-playbooks/canva-screenhand-carousel.json +445 -0
- package/dist-playbooks/codex-desktop.json +76 -0
- package/dist-playbooks/competitor-research-stack.json +122 -0
- package/dist-playbooks/davinci-color-grade.json +153 -0
- package/dist-playbooks/davinci-edit-timeline.json +162 -0
- package/dist-playbooks/davinci-render.json +114 -0
- package/dist-playbooks/devto.json +52 -0
- package/dist-playbooks/discord.json +41 -0
- package/dist-playbooks/google-flow-create-project.json +59 -0
- package/dist-playbooks/google-flow-edit-image.json +90 -0
- package/dist-playbooks/google-flow-edit-video.json +90 -0
- package/dist-playbooks/google-flow-generate-image.json +68 -0
- package/dist-playbooks/google-flow-generate-video.json +191 -0
- package/dist-playbooks/google-flow-open-project.json +48 -0
- package/dist-playbooks/google-flow-open-scenebuilder.json +64 -0
- package/dist-playbooks/google-flow-search-assets.json +64 -0
- package/dist-playbooks/instagram.json +57 -0
- package/dist-playbooks/linkedin.json +52 -0
- package/dist-playbooks/n8n.json +43 -0
- package/dist-playbooks/reddit.json +52 -0
- package/dist-playbooks/threads.json +59 -0
- package/dist-playbooks/x-twitter.json +59 -0
- package/dist-playbooks/youtube.json +59 -0
- package/dist-references/canva.json +646 -0
- package/dist-references/codex-desktop.json +305 -0
- package/dist-references/davinci-resolve-keyboard.json +594 -0
- package/dist-references/davinci-resolve-menu-map.json +1139 -0
- package/dist-references/davinci-resolve-menus-batch1.json +116 -0
- package/dist-references/davinci-resolve-menus-batch2.json +372 -0
- package/dist-references/davinci-resolve-menus-batch3.json +330 -0
- package/dist-references/davinci-resolve-menus-batch4.json +297 -0
- package/dist-references/davinci-resolve-shortcuts.json +333 -0
- package/dist-references/devpost.json +186 -0
- package/dist-references/devto.json +317 -0
- package/dist-references/discord.json +549 -0
- package/dist-references/figma.json +1186 -0
- package/dist-references/finder.json +146 -0
- package/dist-references/google-ads-transparency.json +95 -0
- package/dist-references/google-flow.json +649 -0
- package/dist-references/instagram.json +341 -0
- package/dist-references/linkedin.json +324 -0
- package/dist-references/meta-ad-library.json +86 -0
- package/dist-references/n8n.json +387 -0
- package/dist-references/notes.json +27 -0
- package/dist-references/notion.json +163 -0
- package/dist-references/reddit.json +341 -0
- package/dist-references/threads.json +337 -0
- package/dist-references/x-twitter.json +403 -0
- package/dist-references/youtube.json +373 -0
- package/native/macos-bridge/Package.swift +22 -0
- package/native/macos-bridge/Sources/AccessibilityBridge.swift +482 -0
- package/native/macos-bridge/Sources/AppManagement.swift +339 -0
- package/native/macos-bridge/Sources/CoreGraphicsBridge.swift +537 -0
- package/native/macos-bridge/Sources/ObserverBridge.swift +120 -0
- package/native/macos-bridge/Sources/StreamCapture.swift +136 -0
- package/native/macos-bridge/Sources/VisionBridge.swift +238 -0
- package/native/macos-bridge/Sources/main.swift +498 -0
- package/native/windows-bridge/AppManagement.cs +234 -0
- package/native/windows-bridge/InputBridge.cs +436 -0
- package/native/windows-bridge/Program.cs +270 -0
- package/native/windows-bridge/ScreenCapture.cs +453 -0
- package/native/windows-bridge/UIAutomationBridge.cs +571 -0
- package/native/windows-bridge/WindowsBridge.csproj +17 -0
- package/package.json +12 -1
- package/scripts/postinstall.cjs +127 -0
- package/dist/.audit-log.jsonl +0 -55
- package/dist/.screenhand/memory/.lock +0 -1
- package/dist/.screenhand/memory/actions.jsonl +0 -85
- package/dist/.screenhand/memory/errors.jsonl +0 -5
- package/dist/.screenhand/memory/errors.jsonl.bak +0 -4
- package/dist/.screenhand/memory/state.json +0 -35
- package/dist/.screenhand/memory/state.json.bak +0 -35
- package/dist/.screenhand/memory/strategies.jsonl +0 -12
- package/dist/agent/cli.js +0 -73
- package/dist/agent/loop.js +0 -258
- package/dist/config.js +0 -9
- package/dist/index.js +0 -56
- package/dist/logging/timeline-logger.js +0 -29
- package/dist/mcp/mcp-stdio-server.js +0 -448
- package/dist/mcp/server.js +0 -347
- package/dist/mcp-entry.js +0 -59
- package/dist/memory/recall.js +0 -160
- package/dist/memory/research.js +0 -98
- package/dist/memory/seeds.js +0 -89
- package/dist/memory/session.js +0 -161
- package/dist/memory/store.js +0 -391
- package/dist/memory/types.js +0 -4
- package/dist/monitor/codex-monitor.js +0 -377
- package/dist/monitor/task-queue.js +0 -84
- package/dist/monitor/types.js +0 -49
- package/dist/native/bridge-client.js +0 -174
- package/dist/native/macos-bridge-client.js +0 -5
- package/dist/npm-publish-helper.js +0 -117
- package/dist/npm-token-cdp.js +0 -113
- package/dist/npm-token-create.js +0 -135
- package/dist/npm-token-finish.js +0 -126
- package/dist/playbook/engine.js +0 -193
- package/dist/playbook/index.js +0 -4
- package/dist/playbook/recorder.js +0 -519
- package/dist/playbook/runner.js +0 -392
- package/dist/playbook/store.js +0 -166
- package/dist/playbook/types.js +0 -4
- package/dist/runtime/accessibility-adapter.js +0 -377
- package/dist/runtime/app-adapter.js +0 -48
- package/dist/runtime/applescript-adapter.js +0 -283
- package/dist/runtime/ax-role-map.js +0 -80
- package/dist/runtime/browser-adapter.js +0 -36
- package/dist/runtime/cdp-chrome-adapter.js +0 -505
- package/dist/runtime/composite-adapter.js +0 -205
- package/dist/runtime/executor.js +0 -250
- package/dist/runtime/locator-cache.js +0 -12
- package/dist/runtime/planning-loop.js +0 -47
- package/dist/runtime/service.js +0 -372
- package/dist/runtime/session-manager.js +0 -28
- package/dist/runtime/state-observer.js +0 -105
- package/dist/runtime/vision-adapter.js +0 -208
- package/dist/test-mcp-protocol.js +0 -138
- package/dist/types.js +0 -1
|
@@ -14,12 +14,14 @@
|
|
|
14
14
|
//
|
|
15
15
|
// You should have received a copy of the GNU Affero General Public License
|
|
16
16
|
// along with ScreenHand. If not, see <https://www.gnu.org/licenses/>.
|
|
17
|
+
import { randomUUID } from "node:crypto";
|
|
17
18
|
import { toAXRole } from "./ax-role-map.js";
|
|
18
19
|
const POLL_INTERVAL_MS = 100;
|
|
19
20
|
export class AccessibilityAdapter {
|
|
20
21
|
bridge;
|
|
21
22
|
sessions = new Map();
|
|
22
23
|
sessionsByProfile = new Map();
|
|
24
|
+
lastPidRefresh = 0;
|
|
23
25
|
constructor(bridge) {
|
|
24
26
|
this.bridge = bridge;
|
|
25
27
|
}
|
|
@@ -35,7 +37,7 @@ export class AccessibilityAdapter {
|
|
|
35
37
|
throw new Error("Accessibility permission not granted. Go to System Settings → Privacy & Security → Accessibility and enable this app.");
|
|
36
38
|
}
|
|
37
39
|
const info = {
|
|
38
|
-
sessionId: reuseSessionId ?? `ax_session_${profile}_${Date.now()}`,
|
|
40
|
+
sessionId: reuseSessionId ?? `ax_session_${profile}_${Date.now()}_${randomUUID().slice(0, 8)}`,
|
|
39
41
|
profile,
|
|
40
42
|
createdAt: new Date().toISOString(),
|
|
41
43
|
adapterType: "accessibility",
|
|
@@ -122,6 +124,7 @@ export class AccessibilityAdapter {
|
|
|
122
124
|
}
|
|
123
125
|
async click(sessionId, element) {
|
|
124
126
|
const state = this.requireSession(sessionId);
|
|
127
|
+
await this.refreshPidIfNeeded(state);
|
|
125
128
|
const elementPath = this.parseElementPath(element.handleId);
|
|
126
129
|
if (elementPath) {
|
|
127
130
|
await this.bridge.call("ax.performAction", {
|
|
@@ -134,7 +137,7 @@ export class AccessibilityAdapter {
|
|
|
134
137
|
// Fallback to coordinate click
|
|
135
138
|
const cx = element.coordinates.x + element.coordinates.width / 2;
|
|
136
139
|
const cy = element.coordinates.y + element.coordinates.height / 2;
|
|
137
|
-
await this.bridge.call("cg.mouseClick", { x: cx, y: cy });
|
|
140
|
+
await this.bridge.call("cg.mouseClick", { x: cx, y: cy, targetPid: state.pid });
|
|
138
141
|
}
|
|
139
142
|
else {
|
|
140
143
|
throw new Error("Cannot click: no element path or coordinates");
|
|
@@ -142,6 +145,7 @@ export class AccessibilityAdapter {
|
|
|
142
145
|
}
|
|
143
146
|
async setValue(sessionId, element, text, clear) {
|
|
144
147
|
const state = this.requireSession(sessionId);
|
|
148
|
+
await this.refreshPidIfNeeded(state);
|
|
145
149
|
const elementPath = this.parseElementPath(element.handleId);
|
|
146
150
|
if (clear && elementPath) {
|
|
147
151
|
// Try AX value set first
|
|
@@ -158,13 +162,15 @@ export class AccessibilityAdapter {
|
|
|
158
162
|
}
|
|
159
163
|
}
|
|
160
164
|
// Fallback: click to focus, select all if clearing, then type
|
|
165
|
+
// Use PID-targeted events to prevent keystrokes going to wrong app
|
|
166
|
+
const targetPid = state.pid;
|
|
161
167
|
await this.click(sessionId, element);
|
|
162
168
|
await sleep(50);
|
|
163
169
|
if (clear) {
|
|
164
|
-
await this.bridge.call("cg.keyCombo", { keys: ["cmd", "a"] });
|
|
170
|
+
await this.bridge.call("cg.keyCombo", { keys: ["cmd", "a"], targetPid });
|
|
165
171
|
await sleep(50);
|
|
166
172
|
}
|
|
167
|
-
await this.bridge.call("cg.typeText", { text });
|
|
173
|
+
await this.bridge.call("cg.typeText", { text, targetPid });
|
|
168
174
|
}
|
|
169
175
|
async getValue(sessionId, element) {
|
|
170
176
|
const state = this.requireSession(sessionId);
|
|
@@ -246,16 +252,18 @@ export class AccessibilityAdapter {
|
|
|
246
252
|
async focusApp(sessionId, bundleId) {
|
|
247
253
|
const state = this.requireSession(sessionId);
|
|
248
254
|
await this.bridge.call("app.focus", { bundleId });
|
|
249
|
-
//
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
state.bundleId = bundleId;
|
|
256
|
-
state.appName = app.name;
|
|
257
|
-
}
|
|
255
|
+
// Verify focus was achieved by checking frontmost app
|
|
256
|
+
let frontmost = await this.bridge.call("app.frontmost");
|
|
257
|
+
// If focus didn't take, retry once
|
|
258
|
+
if (frontmost.bundleId !== bundleId) {
|
|
259
|
+
await this.bridge.call("app.focus", { bundleId });
|
|
260
|
+
frontmost = await this.bridge.call("app.frontmost");
|
|
258
261
|
}
|
|
262
|
+
// Update state based on actual frontmost app, not optimistic assumption
|
|
263
|
+
state.pid = frontmost.pid;
|
|
264
|
+
state.bundleId = frontmost.bundleId;
|
|
265
|
+
state.appName = frontmost.name;
|
|
266
|
+
this.lastPidRefresh = Date.now();
|
|
259
267
|
}
|
|
260
268
|
async listApps(_sessionId) {
|
|
261
269
|
return this.bridge.call("app.list");
|
|
@@ -265,10 +273,12 @@ export class AccessibilityAdapter {
|
|
|
265
273
|
}
|
|
266
274
|
async menuClick(sessionId, menuPath) {
|
|
267
275
|
const state = this.requireSession(sessionId);
|
|
276
|
+
await this.refreshPidIfNeeded(state);
|
|
268
277
|
await this.bridge.call("ax.menuClick", { pid: state.pid, menuPath });
|
|
269
278
|
}
|
|
270
|
-
async keyCombo(
|
|
271
|
-
|
|
279
|
+
async keyCombo(sessionId, keys) {
|
|
280
|
+
const state = this.requireSession(sessionId);
|
|
281
|
+
await this.bridge.call("cg.keyCombo", { keys, targetPid: state.pid });
|
|
272
282
|
}
|
|
273
283
|
async elementTree(sessionId, maxDepth, _root) {
|
|
274
284
|
const state = this.requireSession(sessionId);
|
|
@@ -285,9 +295,10 @@ export class AccessibilityAdapter {
|
|
|
285
295
|
const fromY = from.coordinates.y + from.coordinates.height / 2;
|
|
286
296
|
const toX = to.coordinates.x + to.coordinates.width / 2;
|
|
287
297
|
const toY = to.coordinates.y + to.coordinates.height / 2;
|
|
288
|
-
|
|
298
|
+
const state = this.requireSession(sessionId);
|
|
299
|
+
await this.bridge.call("cg.mouseDrag", { fromX, fromY, toX, toY, targetPid: state.pid });
|
|
289
300
|
}
|
|
290
|
-
async scroll(
|
|
301
|
+
async scroll(sessionId, direction, amount, element) {
|
|
291
302
|
let x = 500;
|
|
292
303
|
let y = 400;
|
|
293
304
|
if (element?.coordinates) {
|
|
@@ -301,9 +312,35 @@ export class AccessibilityAdapter {
|
|
|
301
312
|
right: { deltaX: amount, deltaY: 0 },
|
|
302
313
|
};
|
|
303
314
|
const delta = deltaMap[direction];
|
|
304
|
-
|
|
315
|
+
const state = this.requireSession(sessionId);
|
|
316
|
+
await this.bridge.call("cg.scroll", { x, y, ...delta, targetPid: state.pid });
|
|
317
|
+
}
|
|
318
|
+
async isFrontmost() {
|
|
319
|
+
// Check if *any* session's bundleId matches the current frontmost app.
|
|
320
|
+
// Used by the executor to verify focus before acting.
|
|
321
|
+
const frontmost = await this.bridge.call("app.frontmost");
|
|
322
|
+
for (const state of this.sessions.values()) {
|
|
323
|
+
if (state.bundleId === frontmost.bundleId) {
|
|
324
|
+
return true;
|
|
325
|
+
}
|
|
326
|
+
}
|
|
327
|
+
return false;
|
|
305
328
|
}
|
|
306
329
|
// ── Private helpers ──
|
|
330
|
+
async refreshPidIfNeeded(state) {
|
|
331
|
+
if (Date.now() - this.lastPidRefresh < 500)
|
|
332
|
+
return;
|
|
333
|
+
try {
|
|
334
|
+
const frontmost = await this.bridge.call("app.frontmost");
|
|
335
|
+
if (frontmost.bundleId === state.bundleId) {
|
|
336
|
+
state.pid = frontmost.pid;
|
|
337
|
+
this.lastPidRefresh = Date.now();
|
|
338
|
+
}
|
|
339
|
+
}
|
|
340
|
+
catch {
|
|
341
|
+
// Best-effort refresh; don't break the caller
|
|
342
|
+
}
|
|
343
|
+
}
|
|
307
344
|
requireSession(sessionId) {
|
|
308
345
|
const state = this.sessions.get(sessionId);
|
|
309
346
|
if (!state)
|
|
@@ -15,6 +15,7 @@
|
|
|
15
15
|
// You should have received a copy of the GNU Affero General Public License
|
|
16
16
|
// along with ScreenHand. If not, see <https://www.gnu.org/licenses/>.
|
|
17
17
|
import { execFile } from "node:child_process";
|
|
18
|
+
import { randomUUID } from "node:crypto";
|
|
18
19
|
import { promisify } from "node:util";
|
|
19
20
|
const execFileAsync = promisify(execFile);
|
|
20
21
|
const POLL_INTERVAL_MS = 100;
|
|
@@ -49,7 +50,7 @@ export class AppleScriptAdapter {
|
|
|
49
50
|
if (existing)
|
|
50
51
|
return existing.info;
|
|
51
52
|
const info = {
|
|
52
|
-
sessionId: reuseSessionId ?? `as_session_${profile}_${Date.now()}`,
|
|
53
|
+
sessionId: reuseSessionId ?? `as_session_${profile}_${Date.now()}_${randomUUID().slice(0, 8)}`,
|
|
53
54
|
profile,
|
|
54
55
|
createdAt: new Date().toISOString(),
|
|
55
56
|
adapterType: "applescript",
|
|
@@ -291,7 +292,12 @@ export class AppleScriptAdapter {
|
|
|
291
292
|
throw new Error(`AppleScript adapter does not support target type: ${target.type}`);
|
|
292
293
|
}
|
|
293
294
|
escapeAS(str) {
|
|
294
|
-
return str
|
|
295
|
+
return str
|
|
296
|
+
.replace(/\\/g, "\\\\")
|
|
297
|
+
.replace(/"/g, '\\"')
|
|
298
|
+
.replace(/\n/g, "\\n")
|
|
299
|
+
.replace(/\r/g, "\\r")
|
|
300
|
+
.replace(/\0/g, "");
|
|
295
301
|
}
|
|
296
302
|
}
|
|
297
303
|
function sleep(ms) {
|
|
@@ -47,7 +47,7 @@ export class CdpChromeAdapter {
|
|
|
47
47
|
const client = await CDP({ port: chrome.port, target: targetId });
|
|
48
48
|
await Promise.all([client.Page.enable(), client.Runtime.enable()]);
|
|
49
49
|
const info = {
|
|
50
|
-
sessionId: reuseSessionId ?? `cdp_session_${profile}_${Date.now()}`,
|
|
50
|
+
sessionId: reuseSessionId ?? `cdp_session_${profile}_${Date.now()}_${randomUUID().slice(0, 8)}`,
|
|
51
51
|
profile,
|
|
52
52
|
createdAt: new Date().toISOString(),
|
|
53
53
|
adapterType: "cdp",
|
|
@@ -37,6 +37,14 @@ export class Executor {
|
|
|
37
37
|
const locateResult = await this.locateWithBudget(input.sessionId, siteKey, actionKey, input.target, budget.locateMs, retry > 0);
|
|
38
38
|
attempts.push(...locateResult.attempts);
|
|
39
39
|
telemetry.locateMs += locateResult.attempts.reduce((sum, attempt) => sum + attempt.timeoutMs, 0);
|
|
40
|
+
// Re-validate focus before acting — app may have lost focus during locate
|
|
41
|
+
if (this.adapter.isFrontmost) {
|
|
42
|
+
const front = await this.adapter.isFrontmost();
|
|
43
|
+
if (!front && this.adapter.focusApp) {
|
|
44
|
+
const ctx = await this.adapter.getAppContext(input.sessionId);
|
|
45
|
+
await this.adapter.focusApp(input.sessionId, ctx.bundleId);
|
|
46
|
+
}
|
|
47
|
+
}
|
|
40
48
|
await this.timed(budget.actMs, async () => {
|
|
41
49
|
await this.adapter.click(input.sessionId, locateResult.element);
|
|
42
50
|
}, "ACTION_FAILED");
|
|
@@ -68,6 +76,14 @@ export class Executor {
|
|
|
68
76
|
const locateResult = await this.locateWithBudget(input.sessionId, siteKey, actionKey, input.target, budget.locateMs, false);
|
|
69
77
|
attempts.push(...locateResult.attempts);
|
|
70
78
|
telemetry.locateMs += budget.locateMs;
|
|
79
|
+
// Re-validate focus before acting — app may have lost focus during locate
|
|
80
|
+
if (this.adapter.isFrontmost) {
|
|
81
|
+
const front = await this.adapter.isFrontmost();
|
|
82
|
+
if (!front && this.adapter.focusApp) {
|
|
83
|
+
const ctx = await this.adapter.getAppContext(input.sessionId);
|
|
84
|
+
await this.adapter.focusApp(input.sessionId, ctx.bundleId);
|
|
85
|
+
}
|
|
86
|
+
}
|
|
71
87
|
await this.timed(budget.actMs, async () => {
|
|
72
88
|
await this.adapter.setValue(input.sessionId, locateResult.element, input.text, input.clear ?? true);
|
|
73
89
|
}, "ACTION_FAILED");
|
|
@@ -171,7 +187,7 @@ export class Executor {
|
|
|
171
187
|
// URL parsing failed, use bundleId + windowTitle
|
|
172
188
|
}
|
|
173
189
|
}
|
|
174
|
-
return
|
|
190
|
+
return ctx.bundleId;
|
|
175
191
|
}
|
|
176
192
|
catch {
|
|
177
193
|
// Fallback to page meta
|
|
@@ -191,15 +207,19 @@ export class Executor {
|
|
|
191
207
|
};
|
|
192
208
|
}
|
|
193
209
|
async timed(timeoutMs, operation, errorCode) {
|
|
210
|
+
let timerId;
|
|
194
211
|
const timeout = new Promise((_, reject) => {
|
|
195
|
-
setTimeout(() => {
|
|
212
|
+
timerId = setTimeout(() => {
|
|
196
213
|
reject(this.runtimeError("TIMEOUT", `Timed out after ${timeoutMs}ms.`));
|
|
197
214
|
}, timeoutMs);
|
|
198
215
|
});
|
|
199
216
|
try {
|
|
200
|
-
|
|
217
|
+
const result = await Promise.race([operation(), timeout]);
|
|
218
|
+
clearTimeout(timerId);
|
|
219
|
+
return result;
|
|
201
220
|
}
|
|
202
221
|
catch (error) {
|
|
222
|
+
clearTimeout(timerId);
|
|
203
223
|
if (this.isRuntimeError(error)) {
|
|
204
224
|
throw error;
|
|
205
225
|
}
|
|
@@ -16,13 +16,35 @@
|
|
|
16
16
|
// along with ScreenHand. If not, see <https://www.gnu.org/licenses/>.
|
|
17
17
|
export class LocatorCache {
|
|
18
18
|
store = new Map();
|
|
19
|
+
learningEngine = null;
|
|
20
|
+
/**
|
|
21
|
+
* Inject the learning engine for fallback on cache miss.
|
|
22
|
+
* Called after both are constructed to avoid circular dependencies.
|
|
23
|
+
*/
|
|
24
|
+
setLearningEngine(engine) {
|
|
25
|
+
this.learningEngine = engine;
|
|
26
|
+
}
|
|
19
27
|
get(siteKey, actionKey) {
|
|
20
|
-
|
|
28
|
+
// 1. Check in-memory cache first
|
|
29
|
+
const cached = this.store.get(this.key(siteKey, actionKey));
|
|
30
|
+
if (cached)
|
|
31
|
+
return cached;
|
|
32
|
+
// 2. Fallback: ask learning engine for a proven locator
|
|
33
|
+
if (this.learningEngine) {
|
|
34
|
+
const learned = this.learningEngine.recommendLocator(siteKey, actionKey);
|
|
35
|
+
if (learned) {
|
|
36
|
+
// Promote to cache for fast subsequent lookups
|
|
37
|
+
this.store.set(this.key(siteKey, actionKey), learned.locator);
|
|
38
|
+
return learned.locator;
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
return undefined;
|
|
21
42
|
}
|
|
22
43
|
set(siteKey, actionKey, locator) {
|
|
23
44
|
this.store.set(this.key(siteKey, actionKey), locator);
|
|
24
45
|
}
|
|
25
46
|
key(siteKey, actionKey) {
|
|
26
|
-
|
|
47
|
+
// Use length-prefixed format to avoid collision when keys contain the separator
|
|
48
|
+
return `${siteKey.length}:${siteKey}\0${actionKey}`;
|
|
27
49
|
}
|
|
28
50
|
}
|
|
@@ -23,20 +23,57 @@ export class AutomationRuntimeService {
|
|
|
23
23
|
logger;
|
|
24
24
|
sessions;
|
|
25
25
|
executor;
|
|
26
|
+
worldModel = null;
|
|
26
27
|
constructor(adapter, logger, cache = new LocatorCache()) {
|
|
27
28
|
this.adapter = adapter;
|
|
28
29
|
this.logger = logger;
|
|
29
30
|
this.sessions = new SessionManager(adapter);
|
|
30
31
|
this.executor = new Executor(adapter, cache, logger);
|
|
31
32
|
}
|
|
33
|
+
/**
|
|
34
|
+
* Inject the WorldModel so runtime actions update shared state.
|
|
35
|
+
*/
|
|
36
|
+
setWorldModel(model) {
|
|
37
|
+
this.worldModel = model;
|
|
38
|
+
}
|
|
32
39
|
async sessionStart(profile = DEFAULT_PROFILE) {
|
|
33
40
|
return this.sessions.sessionStart(profile);
|
|
34
41
|
}
|
|
42
|
+
/**
|
|
43
|
+
* Ensure session exists (re-attaches if lost after MCP restart).
|
|
44
|
+
* Also reloads the world model from disk when re-attaching so world
|
|
45
|
+
* state survives across MCP server restarts.
|
|
46
|
+
*/
|
|
47
|
+
async ensureSession(sessionId) {
|
|
48
|
+
const hadSession = !!this.sessions.getSession(sessionId);
|
|
49
|
+
const session = await this.sessions.requireSessionResilent(sessionId);
|
|
50
|
+
// If we had to re-attach, reload persisted world state
|
|
51
|
+
if (!hadSession && this.worldModel) {
|
|
52
|
+
this.worldModel.init(sessionId);
|
|
53
|
+
}
|
|
54
|
+
return session;
|
|
55
|
+
}
|
|
56
|
+
// L2-74 fix: Centralized URL protocol validation for all navigate paths
|
|
57
|
+
static BLOCKED_URL_PROTOCOLS = ["javascript:", "data:", "blob:", "vbscript:"];
|
|
35
58
|
async navigate(input) {
|
|
36
59
|
const telemetry = this.logger.start("navigate", input.sessionId);
|
|
37
60
|
try {
|
|
38
|
-
|
|
61
|
+
// L2-74 fix: Block dangerous URL protocols at the service level
|
|
62
|
+
const urlLower = input.url.trim().toLowerCase();
|
|
63
|
+
for (const proto of AutomationRuntimeService.BLOCKED_URL_PROTOCOLS) {
|
|
64
|
+
if (urlLower.startsWith(proto)) {
|
|
65
|
+
throw new Error(`Blocked: "${proto}" URLs are not allowed for security reasons`);
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
await this.ensureSession(input.sessionId);
|
|
39
69
|
const page = await this.adapter.navigate(input.sessionId, input.url, input.timeoutMs ?? DEFAULT_NAVIGATE_TIMEOUT_MS);
|
|
70
|
+
// Feed navigation result to world model for domain state tracking
|
|
71
|
+
if (this.worldModel) {
|
|
72
|
+
const bundleId = this.worldModel.getState().focusedApp?.bundleId;
|
|
73
|
+
if (bundleId) {
|
|
74
|
+
this.worldModel.ingestCDPSnapshot(bundleId, input.url, page.title ?? "");
|
|
75
|
+
}
|
|
76
|
+
}
|
|
40
77
|
return {
|
|
41
78
|
ok: true,
|
|
42
79
|
data: page,
|
|
@@ -57,7 +94,7 @@ export class AutomationRuntimeService {
|
|
|
57
94
|
async waitFor(input) {
|
|
58
95
|
const telemetry = this.logger.start("wait_for", input.sessionId);
|
|
59
96
|
try {
|
|
60
|
-
await this.
|
|
97
|
+
await this.ensureSession(input.sessionId);
|
|
61
98
|
const matched = await this.adapter.waitFor(input.sessionId, input.condition, input.timeoutMs ?? DEFAULT_WAIT_TIMEOUT_MS);
|
|
62
99
|
return {
|
|
63
100
|
ok: true,
|
|
@@ -77,17 +114,17 @@ export class AutomationRuntimeService {
|
|
|
77
114
|
}
|
|
78
115
|
}
|
|
79
116
|
async press(input) {
|
|
80
|
-
await this.
|
|
117
|
+
await this.ensureSession(input.sessionId);
|
|
81
118
|
return this.executor.press(input);
|
|
82
119
|
}
|
|
83
120
|
async typeInto(input) {
|
|
84
|
-
await this.
|
|
121
|
+
await this.ensureSession(input.sessionId);
|
|
85
122
|
return this.executor.typeInto(input);
|
|
86
123
|
}
|
|
87
124
|
async extract(input) {
|
|
88
125
|
const telemetry = this.logger.start("extract", input.sessionId);
|
|
89
126
|
try {
|
|
90
|
-
await this.
|
|
127
|
+
await this.ensureSession(input.sessionId);
|
|
91
128
|
const data = await this.adapter.extract(input.sessionId, input.target, input.format);
|
|
92
129
|
return {
|
|
93
130
|
ok: true,
|
|
@@ -109,7 +146,7 @@ export class AutomationRuntimeService {
|
|
|
109
146
|
async screenshot(input) {
|
|
110
147
|
const telemetry = this.logger.start("screenshot", input.sessionId);
|
|
111
148
|
try {
|
|
112
|
-
await this.
|
|
149
|
+
await this.ensureSession(input.sessionId);
|
|
113
150
|
const path = await this.adapter.screenshot(input.sessionId, input.region);
|
|
114
151
|
return {
|
|
115
152
|
ok: true,
|
|
@@ -132,11 +169,12 @@ export class AutomationRuntimeService {
|
|
|
132
169
|
async appLaunch(input) {
|
|
133
170
|
const telemetry = this.logger.start("app_launch", input.sessionId);
|
|
134
171
|
try {
|
|
135
|
-
await this.
|
|
172
|
+
await this.ensureSession(input.sessionId);
|
|
136
173
|
if (!this.adapter.launchApp) {
|
|
137
174
|
throw new Error("Adapter does not support launchApp");
|
|
138
175
|
}
|
|
139
176
|
const ctx = await this.adapter.launchApp(input.sessionId, input.bundleId);
|
|
177
|
+
this.worldModel?.updateFocusedApp(ctx);
|
|
140
178
|
return {
|
|
141
179
|
ok: true,
|
|
142
180
|
data: ctx,
|
|
@@ -157,11 +195,17 @@ export class AutomationRuntimeService {
|
|
|
157
195
|
async appFocus(input) {
|
|
158
196
|
const telemetry = this.logger.start("app_focus", input.sessionId);
|
|
159
197
|
try {
|
|
160
|
-
await this.
|
|
198
|
+
await this.ensureSession(input.sessionId);
|
|
161
199
|
if (!this.adapter.focusApp) {
|
|
162
200
|
throw new Error("Adapter does not support focusApp");
|
|
163
201
|
}
|
|
164
202
|
await this.adapter.focusApp(input.sessionId, input.bundleId);
|
|
203
|
+
this.worldModel?.updateFocusedApp({
|
|
204
|
+
bundleId: input.bundleId,
|
|
205
|
+
appName: input.bundleId,
|
|
206
|
+
pid: 0,
|
|
207
|
+
windowTitle: "",
|
|
208
|
+
});
|
|
165
209
|
return {
|
|
166
210
|
ok: true,
|
|
167
211
|
data: undefined,
|
|
@@ -182,7 +226,7 @@ export class AutomationRuntimeService {
|
|
|
182
226
|
async appList(sessionId) {
|
|
183
227
|
const telemetry = this.logger.start("app_list", sessionId);
|
|
184
228
|
try {
|
|
185
|
-
await this.
|
|
229
|
+
await this.ensureSession(sessionId);
|
|
186
230
|
if (!this.adapter.listApps) {
|
|
187
231
|
throw new Error("Adapter does not support listApps");
|
|
188
232
|
}
|
|
@@ -207,7 +251,7 @@ export class AutomationRuntimeService {
|
|
|
207
251
|
async windowList(sessionId) {
|
|
208
252
|
const telemetry = this.logger.start("window_list", sessionId);
|
|
209
253
|
try {
|
|
210
|
-
await this.
|
|
254
|
+
await this.ensureSession(sessionId);
|
|
211
255
|
if (!this.adapter.listWindows) {
|
|
212
256
|
throw new Error("Adapter does not support listWindows");
|
|
213
257
|
}
|
|
@@ -232,7 +276,7 @@ export class AutomationRuntimeService {
|
|
|
232
276
|
async menuClick(input) {
|
|
233
277
|
const telemetry = this.logger.start("menu_click", input.sessionId);
|
|
234
278
|
try {
|
|
235
|
-
await this.
|
|
279
|
+
await this.ensureSession(input.sessionId);
|
|
236
280
|
if (!this.adapter.menuClick) {
|
|
237
281
|
throw new Error("Adapter does not support menuClick");
|
|
238
282
|
}
|
|
@@ -257,7 +301,7 @@ export class AutomationRuntimeService {
|
|
|
257
301
|
async keyCombo(input) {
|
|
258
302
|
const telemetry = this.logger.start("key_combo", input.sessionId);
|
|
259
303
|
try {
|
|
260
|
-
await this.
|
|
304
|
+
await this.ensureSession(input.sessionId);
|
|
261
305
|
if (!this.adapter.keyCombo) {
|
|
262
306
|
throw new Error("Adapter does not support keyCombo");
|
|
263
307
|
}
|
|
@@ -282,7 +326,7 @@ export class AutomationRuntimeService {
|
|
|
282
326
|
async elementTree(input) {
|
|
283
327
|
const telemetry = this.logger.start("element_tree", input.sessionId);
|
|
284
328
|
try {
|
|
285
|
-
await this.
|
|
329
|
+
await this.ensureSession(input.sessionId);
|
|
286
330
|
if (!this.adapter.elementTree) {
|
|
287
331
|
throw new Error("Adapter does not support elementTree");
|
|
288
332
|
}
|
|
@@ -307,7 +351,7 @@ export class AutomationRuntimeService {
|
|
|
307
351
|
async drag(input) {
|
|
308
352
|
const telemetry = this.logger.start("drag", input.sessionId);
|
|
309
353
|
try {
|
|
310
|
-
await this.
|
|
354
|
+
await this.ensureSession(input.sessionId);
|
|
311
355
|
if (!this.adapter.drag) {
|
|
312
356
|
throw new Error("Adapter does not support drag");
|
|
313
357
|
}
|
|
@@ -337,7 +381,7 @@ export class AutomationRuntimeService {
|
|
|
337
381
|
async scroll(input) {
|
|
338
382
|
const telemetry = this.logger.start("scroll", input.sessionId);
|
|
339
383
|
try {
|
|
340
|
-
await this.
|
|
384
|
+
await this.ensureSession(input.sessionId);
|
|
341
385
|
if (!this.adapter.scroll) {
|
|
342
386
|
throw new Error("Adapter does not support scroll");
|
|
343
387
|
}
|
|
@@ -50,7 +50,10 @@ export class SessionManager {
|
|
|
50
50
|
const existing = this.getSession(sessionId);
|
|
51
51
|
if (existing)
|
|
52
52
|
return existing;
|
|
53
|
-
|
|
53
|
+
// Session IDs: {prefix}_session_{profile}_{timestamp}_{random8} (new)
|
|
54
|
+
// or legacy: {prefix}_session_{profile}_{timestamp}
|
|
55
|
+
// Use greedy .+ so profiles with digits (e.g. "user_1234567890") capture fully
|
|
56
|
+
const match = sessionId.match(/^(?:ax|cdp|as|vision|composite)_session_(.+)_\d{13,}(?:_[a-f0-9]{8})?$/);
|
|
54
57
|
const profile = match?.[1] ?? "automation";
|
|
55
58
|
const created = await this.adapter.attach(profile, sessionId);
|
|
56
59
|
this.sessionsByProfile.set(profile, created);
|
|
@@ -14,6 +14,7 @@
|
|
|
14
14
|
//
|
|
15
15
|
// You should have received a copy of the GNU Affero General Public License
|
|
16
16
|
// along with ScreenHand. If not, see <https://www.gnu.org/licenses/>.
|
|
17
|
+
import { randomUUID } from "node:crypto";
|
|
17
18
|
const POLL_INTERVAL_MS = 200;
|
|
18
19
|
/**
|
|
19
20
|
* Vision-based adapter for apps with poor/no accessibility support.
|
|
@@ -33,7 +34,7 @@ export class VisionAdapter {
|
|
|
33
34
|
await this.bridge.start();
|
|
34
35
|
const frontmost = await this.bridge.call("app.frontmost");
|
|
35
36
|
const info = {
|
|
36
|
-
sessionId: reuseSessionId ?? `vision_session_${profile}_${Date.now()}`,
|
|
37
|
+
sessionId: reuseSessionId ?? `vision_session_${profile}_${Date.now()}_${randomUUID().slice(0, 8)}`,
|
|
37
38
|
profile,
|
|
38
39
|
createdAt: new Date().toISOString(),
|
|
39
40
|
adapterType: "vision",
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
// Copyright (C) 2025 Clazro Technology Private Limited
|
|
2
|
+
// SPDX-License-Identifier: AGPL-3.0-only
|
|
3
|
+
// ── Rating System (F → 0) ──────────────────────────────────────────
|
|
4
|
+
//
|
|
5
|
+
// Game-style rating: F E D C B A S SS SSS 0
|
|
6
|
+
// Each grade has 3 sub-tiers: e.g. B1 (entry), B2 (mid), B3 (top)
|
|
7
|
+
// Graded by 10 weighted factors scored 0-100
|
|
8
|
+
//
|
|
9
|
+
// Years-equivalent mapping:
|
|
10
|
+
// F = just opened the app
|
|
11
|
+
// E = ~1 week user
|
|
12
|
+
// D = ~1-3 months
|
|
13
|
+
// C = ~6-12 months
|
|
14
|
+
// B = ~1-3 years (consistent daily user)
|
|
15
|
+
// A = ~3-5 years (power user / team lead)
|
|
16
|
+
// S = ~5-10 years (department architect)
|
|
17
|
+
// SS = ~10-20 years (platform expert, builds systems)
|
|
18
|
+
// SSS = ~20+ years (framework builder, trains others)
|
|
19
|
+
// 0 = Class Zero — transcendent mastery, all 10 factors maxed
|
|
20
|
+
/** Rating grades from lowest to highest */
|
|
21
|
+
export const RATING_GRADES = ["F", "E", "D", "C", "B", "A", "S", "SS", "SSS", "0"];
|
|
22
|
+
/** Factor weights — hard-to-fake signals dominate (57%), session-gated evidence (43%) */
|
|
23
|
+
export const RATING_FACTOR_WEIGHTS = {
|
|
24
|
+
consistency: 20, // THE core signal — can't fake showing up 50+ times
|
|
25
|
+
platformKnowledge: 15, // shortcuts, deep features — proves real knowledge
|
|
26
|
+
edgeCaseHandling: 12, // surviving unexpected states — proves resilience
|
|
27
|
+
teachingAbility: 10, // exporting playbooks — proves codifiable mastery
|
|
28
|
+
featureCoverage: 10, // breadth of features used (session-gated)
|
|
29
|
+
workflowDepth: 8, // multi-step workflows completed (session-gated)
|
|
30
|
+
outcomeVerification: 8, // verified outcomes (session-gated)
|
|
31
|
+
errorRecovery: 7, // healing from failures — honest if it happens
|
|
32
|
+
crossFeatureChains: 5, // combining features end-to-end
|
|
33
|
+
speedEfficiency: 5, // repeat mastery across sessions
|
|
34
|
+
};
|
|
35
|
+
export const DEFAULT_APP_MAP_CONFIG = {
|
|
36
|
+
mapsDir: "",
|
|
37
|
+
staleThresholdDays: 7,
|
|
38
|
+
versionDecayFactor: 0.5,
|
|
39
|
+
pruneSessionThreshold: 10,
|
|
40
|
+
maxZonesPerApp: 50,
|
|
41
|
+
maxElementsPerZone: 100,
|
|
42
|
+
maxEdges: 200,
|
|
43
|
+
maxHistoryEntries: 100,
|
|
44
|
+
maxHierarchyEntriesPerZone: 50,
|
|
45
|
+
maxContractsPerZone: 30,
|
|
46
|
+
maxOutcomesPerContract: 5,
|
|
47
|
+
maxStateDimensions: 30,
|
|
48
|
+
maxStateTransitions: 100,
|
|
49
|
+
maxVisibilityConditions: 200,
|
|
50
|
+
maxTimingProfiles: 100,
|
|
51
|
+
maxReadySignals: 50,
|
|
52
|
+
};
|
|
53
|
+
// ── Rating Utility ──────────────────────────────────────────────────
|
|
54
|
+
/** Convert Rating to display string: "B2", "SS3", "0" */
|
|
55
|
+
export function ratingToString(r) {
|
|
56
|
+
if (r.grade === "0")
|
|
57
|
+
return "0"; // Class Zero has no sub-tier display
|
|
58
|
+
return `${r.grade}${r.subTier}`;
|
|
59
|
+
}
|
|
60
|
+
/** Grade thresholds: weighted score needed for each grade (0-100 scale) */
|
|
61
|
+
export const GRADE_THRESHOLDS = [
|
|
62
|
+
{ grade: "0", minScore: 97 }, // Class Zero — near-perfect across all factors
|
|
63
|
+
{ grade: "SSS", minScore: 90 }, // 20+ years equivalent
|
|
64
|
+
{ grade: "SS", minScore: 82 }, // 10-20 years
|
|
65
|
+
{ grade: "S", minScore: 73 }, // 5-10 years
|
|
66
|
+
{ grade: "A", minScore: 62 }, // 3-5 years
|
|
67
|
+
{ grade: "B", minScore: 50 }, // 1-3 years
|
|
68
|
+
{ grade: "C", minScore: 38 }, // 6-12 months
|
|
69
|
+
{ grade: "D", minScore: 25 }, // 1-3 months
|
|
70
|
+
{ grade: "E", minScore: 12 }, // ~1 week
|
|
71
|
+
{ grade: "F", minScore: 0 }, // just opened the app
|
|
72
|
+
];
|