screenhand 0.1.1 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +193 -109
- package/bin/darwin-arm64/macos-bridge +0 -0
- package/dist/mcp-desktop.js +5876 -0
- package/dist/scripts/codex-monitor-daemon.js +335 -0
- package/dist/scripts/export-help-center.js +112 -0
- package/dist/scripts/marketing-loop.js +117 -0
- package/dist/scripts/observer-daemon.js +288 -0
- package/dist/scripts/orchestrator-daemon.js +399 -0
- package/dist/scripts/supervisor-daemon.js +272 -0
- package/dist/scripts/threads-campaign.js +208 -0
- package/dist/scripts/worker-daemon.js +228 -0
- package/dist/src/agent/cli.js +82 -0
- package/dist/src/agent/loop.js +274 -0
- package/dist/src/community/fetcher.js +109 -0
- package/dist/src/community/index.js +6 -0
- package/dist/src/community/publisher.js +191 -0
- package/dist/src/community/remote-api.js +121 -0
- package/dist/src/community/types.js +3 -0
- package/dist/src/community/validator.js +95 -0
- package/{src/config.ts → dist/src/config.js} +5 -10
- package/dist/src/context-tracker.js +489 -0
- package/{src/index.ts → dist/src/index.js} +32 -52
- package/dist/src/ingestion/coverage-auditor.js +233 -0
- package/dist/src/ingestion/doc-parser.js +164 -0
- package/dist/src/ingestion/index.js +8 -0
- package/dist/src/ingestion/menu-scanner.js +152 -0
- package/dist/src/ingestion/reference-merger.js +186 -0
- package/dist/src/ingestion/shortcut-extractor.js +180 -0
- package/dist/src/ingestion/tutorial-extractor.js +170 -0
- package/dist/src/ingestion/types.js +3 -0
- package/dist/src/jobs/manager.js +305 -0
- package/dist/src/jobs/runner.js +806 -0
- package/dist/src/jobs/store.js +102 -0
- package/dist/src/jobs/types.js +30 -0
- package/dist/src/jobs/worker.js +97 -0
- package/dist/src/learning/engine.js +356 -0
- package/dist/src/learning/index.js +9 -0
- package/dist/src/learning/locator-policy.js +120 -0
- package/dist/src/learning/pattern-policy.js +89 -0
- package/dist/src/learning/recovery-policy.js +116 -0
- package/dist/src/learning/sensor-policy.js +115 -0
- package/dist/src/learning/timing-model.js +204 -0
- package/dist/src/learning/topology-policy.js +90 -0
- package/dist/src/learning/types.js +9 -0
- package/dist/src/logging/timeline-logger.js +48 -0
- package/dist/src/mcp/mcp-stdio-server.js +464 -0
- package/dist/src/mcp/server.js +363 -0
- package/dist/src/mcp-entry.js +60 -0
- package/dist/src/memory/playbook-seeds.js +200 -0
- package/dist/src/memory/recall.js +222 -0
- package/dist/src/memory/research.js +104 -0
- package/dist/src/memory/seeds.js +101 -0
- package/dist/src/memory/service.js +446 -0
- package/dist/src/memory/session.js +169 -0
- package/dist/src/memory/store.js +451 -0
- package/{src/runtime/locator-cache.ts → dist/src/memory/types.js} +1 -17
- package/dist/src/monitor/codex-monitor.js +382 -0
- package/dist/src/monitor/task-queue.js +97 -0
- package/dist/src/monitor/types.js +62 -0
- package/dist/src/native/bridge-client.js +412 -0
- package/{src/native/macos-bridge-client.ts → dist/src/native/macos-bridge-client.js} +0 -1
- package/dist/src/observer/state.js +199 -0
- package/dist/src/observer/types.js +43 -0
- package/dist/src/orchestrator/state.js +68 -0
- package/dist/src/orchestrator/types.js +22 -0
- package/dist/src/perception/ax-source.js +162 -0
- package/dist/src/perception/cdp-source.js +162 -0
- package/dist/src/perception/coordinator.js +771 -0
- package/dist/src/perception/frame-differ.js +287 -0
- package/dist/src/perception/index.js +22 -0
- package/dist/src/perception/manager.js +199 -0
- package/dist/src/perception/types.js +47 -0
- package/dist/src/perception/vision-source.js +399 -0
- package/dist/src/planner/deterministic.js +298 -0
- package/dist/src/planner/executor.js +870 -0
- package/dist/src/planner/goal-store.js +92 -0
- package/dist/src/planner/index.js +21 -0
- package/dist/src/planner/planner.js +520 -0
- package/dist/src/planner/tool-registry.js +71 -0
- package/dist/src/planner/types.js +22 -0
- package/dist/src/platform/explorer.js +213 -0
- package/dist/src/platform/help-center-markdown.js +527 -0
- package/dist/src/platform/learner.js +257 -0
- package/dist/src/playbook/engine.js +486 -0
- package/dist/src/playbook/index.js +20 -0
- package/dist/src/playbook/mcp-recorder.js +204 -0
- package/dist/src/playbook/recorder.js +536 -0
- package/dist/src/playbook/runner.js +408 -0
- package/dist/src/playbook/store.js +312 -0
- package/dist/src/playbook/types.js +17 -0
- package/dist/src/recovery/detectors.js +156 -0
- package/dist/src/recovery/engine.js +327 -0
- package/dist/src/recovery/index.js +20 -0
- package/dist/src/recovery/strategies.js +274 -0
- package/dist/src/recovery/types.js +20 -0
- package/dist/src/runtime/accessibility-adapter.js +430 -0
- package/dist/src/runtime/app-adapter.js +64 -0
- package/dist/src/runtime/applescript-adapter.js +305 -0
- package/dist/src/runtime/ax-role-map.js +96 -0
- package/dist/src/runtime/browser-adapter.js +52 -0
- package/dist/src/runtime/cdp-chrome-adapter.js +521 -0
- package/dist/src/runtime/composite-adapter.js +221 -0
- package/dist/src/runtime/execution-contract.js +159 -0
- package/dist/src/runtime/executor.js +286 -0
- package/dist/src/runtime/locator-cache.js +50 -0
- package/dist/src/runtime/planning-loop.js +63 -0
- package/dist/src/runtime/service.js +432 -0
- package/dist/src/runtime/session-manager.js +63 -0
- package/dist/src/runtime/state-observer.js +121 -0
- package/dist/src/runtime/vision-adapter.js +225 -0
- package/dist/src/state/app-map-types.js +72 -0
- package/dist/src/state/app-map.js +1974 -0
- package/dist/src/state/entity-tracker.js +108 -0
- package/dist/src/state/fusion.js +96 -0
- package/dist/src/state/index.js +21 -0
- package/dist/src/state/ladder-generator.js +236 -0
- package/dist/src/state/persistence.js +156 -0
- package/dist/src/state/types.js +17 -0
- package/dist/src/state/world-model.js +1456 -0
- package/dist/src/supervisor/locks.js +186 -0
- package/dist/src/supervisor/supervisor.js +403 -0
- package/dist/src/supervisor/types.js +30 -0
- package/dist/src/test-mcp-protocol.js +154 -0
- package/dist/src/types.js +17 -0
- package/dist/src/util/atomic-write.js +133 -0
- package/dist/src/util/sanitize.js +146 -0
- package/dist-app-maps/com.figma.Desktop.json +959 -0
- package/dist-app-maps/com.hnc.Discord.json +1146 -0
- package/dist-app-maps/notion.id.json +2831 -0
- package/dist-playbooks/canva-screenhand-carousel.json +445 -0
- package/dist-playbooks/codex-desktop.json +76 -0
- package/dist-playbooks/competitor-research-stack.json +122 -0
- package/dist-playbooks/davinci-color-grade.json +153 -0
- package/dist-playbooks/davinci-edit-timeline.json +162 -0
- package/dist-playbooks/davinci-render.json +114 -0
- package/dist-playbooks/devto.json +52 -0
- package/dist-playbooks/discord.json +41 -0
- package/dist-playbooks/google-flow-create-project.json +59 -0
- package/dist-playbooks/google-flow-edit-image.json +90 -0
- package/dist-playbooks/google-flow-edit-video.json +90 -0
- package/dist-playbooks/google-flow-generate-image.json +68 -0
- package/dist-playbooks/google-flow-generate-video.json +191 -0
- package/dist-playbooks/google-flow-open-project.json +48 -0
- package/dist-playbooks/google-flow-open-scenebuilder.json +64 -0
- package/dist-playbooks/google-flow-search-assets.json +64 -0
- package/dist-playbooks/instagram.json +57 -0
- package/dist-playbooks/linkedin.json +52 -0
- package/dist-playbooks/n8n.json +43 -0
- package/dist-playbooks/reddit.json +52 -0
- package/dist-playbooks/threads.json +59 -0
- package/dist-playbooks/x-twitter.json +59 -0
- package/dist-playbooks/youtube.json +59 -0
- package/dist-references/canva.json +646 -0
- package/dist-references/codex-desktop.json +305 -0
- package/dist-references/davinci-resolve-keyboard.json +594 -0
- package/dist-references/davinci-resolve-menu-map.json +1139 -0
- package/dist-references/davinci-resolve-menus-batch1.json +116 -0
- package/dist-references/davinci-resolve-menus-batch2.json +372 -0
- package/dist-references/davinci-resolve-menus-batch3.json +330 -0
- package/dist-references/davinci-resolve-menus-batch4.json +297 -0
- package/dist-references/davinci-resolve-shortcuts.json +333 -0
- package/dist-references/devto.json +317 -0
- package/dist-references/discord.json +549 -0
- package/dist-references/figma.json +1186 -0
- package/dist-references/finder.json +146 -0
- package/dist-references/google-ads-transparency.json +95 -0
- package/dist-references/google-flow.json +649 -0
- package/dist-references/instagram.json +341 -0
- package/dist-references/linkedin.json +324 -0
- package/dist-references/meta-ad-library.json +86 -0
- package/dist-references/n8n.json +387 -0
- package/dist-references/notes.json +27 -0
- package/dist-references/notion.json +163 -0
- package/dist-references/reddit.json +341 -0
- package/dist-references/threads.json +337 -0
- package/dist-references/x-twitter.json +403 -0
- package/dist-references/youtube.json +373 -0
- package/native/macos-bridge/Package.swift +1 -0
- package/native/macos-bridge/Sources/AccessibilityBridge.swift +257 -36
- package/native/macos-bridge/Sources/AppManagement.swift +212 -2
- package/native/macos-bridge/Sources/CoreGraphicsBridge.swift +348 -53
- package/native/macos-bridge/Sources/StreamCapture.swift +136 -0
- package/native/macos-bridge/Sources/VisionBridge.swift +165 -7
- package/native/macos-bridge/Sources/main.swift +169 -16
- package/native/windows-bridge/Program.cs +5 -0
- package/native/windows-bridge/ScreenCapture.cs +124 -0
- package/package.json +29 -4
- package/scripts/postinstall.cjs +127 -0
- package/.claude/commands/automate.md +0 -28
- package/.claude/commands/debug-ui.md +0 -19
- package/.claude/commands/screenshot.md +0 -15
- package/.github/FUNDING.yml +0 -1
- package/.github/ISSUE_TEMPLATE/bug_report.md +0 -27
- package/.github/ISSUE_TEMPLATE/feature_request.md +0 -20
- package/.mcp.json +0 -8
- package/DESKTOP_MCP_GUIDE.md +0 -92
- package/SECURITY.md +0 -44
- package/docs/architecture.md +0 -47
- package/install-skills.sh +0 -19
- package/mcp-bridge.ts +0 -271
- package/mcp-desktop.ts +0 -1221
- package/playbooks/instagram.json +0 -41
- package/playbooks/instagram_v2.json +0 -201
- package/playbooks/x_v1.json +0 -211
- package/scripts/devpost-live-loop.mjs +0 -421
- package/src/logging/timeline-logger.ts +0 -55
- package/src/mcp/server.ts +0 -449
- package/src/memory/recall.ts +0 -191
- package/src/memory/research.ts +0 -146
- package/src/memory/seeds.ts +0 -123
- package/src/memory/session.ts +0 -201
- package/src/memory/store.ts +0 -434
- package/src/memory/types.ts +0 -69
- package/src/native/bridge-client.ts +0 -239
- package/src/runtime/accessibility-adapter.ts +0 -487
- package/src/runtime/app-adapter.ts +0 -169
- package/src/runtime/applescript-adapter.ts +0 -376
- package/src/runtime/ax-role-map.ts +0 -102
- package/src/runtime/browser-adapter.ts +0 -129
- package/src/runtime/cdp-chrome-adapter.ts +0 -676
- package/src/runtime/composite-adapter.ts +0 -274
- package/src/runtime/executor.ts +0 -396
- package/src/runtime/planning-loop.ts +0 -81
- package/src/runtime/service.ts +0 -448
- package/src/runtime/session-manager.ts +0 -50
- package/src/runtime/state-observer.ts +0 -136
- package/src/runtime/vision-adapter.ts +0 -297
- package/src/types.ts +0 -297
- package/tests/bridge-client.test.ts +0 -176
- package/tests/browser-stealth.test.ts +0 -210
- package/tests/composite-adapter.test.ts +0 -64
- package/tests/mcp-server.test.ts +0 -151
- package/tests/memory-recall.test.ts +0 -339
- package/tests/memory-research.test.ts +0 -159
- package/tests/memory-seeds.test.ts +0 -120
- package/tests/memory-store.test.ts +0 -392
- package/tests/types.test.ts +0 -92
- package/tsconfig.check.json +0 -17
- package/tsconfig.json +0 -19
- package/vitest.config.ts +0 -8
- /package/{playbooks → dist-references}/devpost.json +0 -0
|
@@ -0,0 +1,486 @@
|
|
|
1
|
+
// Copyright (C) 2025 Clazro Technology Private Limited
|
|
2
|
+
// SPDX-License-Identifier: AGPL-3.0-only
|
|
3
|
+
//
|
|
4
|
+
// This file is part of ScreenHand.
|
|
5
|
+
//
|
|
6
|
+
// ScreenHand is free software: you can redistribute it and/or modify
|
|
7
|
+
// it under the terms of the GNU Affero General Public License as
|
|
8
|
+
// published by the Free Software Foundation, version 3.
|
|
9
|
+
//
|
|
10
|
+
// ScreenHand is distributed in the hope that it will be useful,
|
|
11
|
+
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
12
|
+
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
13
|
+
// GNU Affero General Public License for more details.
|
|
14
|
+
//
|
|
15
|
+
// You should have received a copy of the GNU Affero General Public License
|
|
16
|
+
// along with ScreenHand. If not, see <https://www.gnu.org/licenses/>.
|
|
17
|
+
import { readObserverState, getObserverPopup } from "../observer/state.js";
|
|
18
|
+
const DEFAULT_VERIFY_TIMEOUT = 5000;
|
|
19
|
+
const STEP_DELAY_MS = 300;
|
|
20
|
+
export class PlaybookEngine {
|
|
21
|
+
runtime;
|
|
22
|
+
cdpConnect;
|
|
23
|
+
/** Enable observer-based popup checks before each step */
|
|
24
|
+
popupCheckEnabled = false;
|
|
25
|
+
constructor(runtime) {
|
|
26
|
+
this.runtime = runtime;
|
|
27
|
+
}
|
|
28
|
+
/** Enable/disable pre-step popup detection via observer daemon */
|
|
29
|
+
setPopupCheck(enabled) {
|
|
30
|
+
this.popupCheckEnabled = enabled;
|
|
31
|
+
}
|
|
32
|
+
/** Set CDP connection factory for browser_js and cdp_key_event actions. Factory accepts optional port override. */
|
|
33
|
+
setCDPConnect(factory) {
|
|
34
|
+
this.cdpConnect = factory;
|
|
35
|
+
}
|
|
36
|
+
/**
|
|
37
|
+
* Execute a playbook against a live session.
|
|
38
|
+
* Returns result with success/failure and which step broke.
|
|
39
|
+
*/
|
|
40
|
+
async run(sessionId, playbook, options = {}) {
|
|
41
|
+
const start = Date.now();
|
|
42
|
+
let stepsCompleted = 0;
|
|
43
|
+
for (let i = 0; i < playbook.steps.length; i++) {
|
|
44
|
+
let step = options.vars ? this.substituteVars(playbook.steps[i], options.vars) : playbook.steps[i];
|
|
45
|
+
try {
|
|
46
|
+
// Pre-step: check for popups via observer (if enabled, non-blocking)
|
|
47
|
+
if (this.popupCheckEnabled) {
|
|
48
|
+
await this.dismissPopupIfPresent(sessionId);
|
|
49
|
+
}
|
|
50
|
+
// OCR-based locate: resolve locateByOcr to coordinates before execution
|
|
51
|
+
if (step.locateByOcr) {
|
|
52
|
+
const coords = this.resolveOcrTarget(step.locateByOcr, step.offsetX ?? 0, step.offsetY ?? 0);
|
|
53
|
+
if (coords) {
|
|
54
|
+
step = { ...step, target: { x: coords.x, y: coords.y } };
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
const result = await this.executeStep(sessionId, step, playbook.cdpPort);
|
|
58
|
+
stepsCompleted++;
|
|
59
|
+
if (options.onStep) {
|
|
60
|
+
options.onStep(i, step, result);
|
|
61
|
+
}
|
|
62
|
+
// Verify step if needed
|
|
63
|
+
if (step.verify) {
|
|
64
|
+
const verified = await this.verifyStep(sessionId, step);
|
|
65
|
+
if (!verified && !step.optional) {
|
|
66
|
+
return {
|
|
67
|
+
playbook: playbook.id,
|
|
68
|
+
success: false,
|
|
69
|
+
stepsCompleted,
|
|
70
|
+
totalSteps: playbook.steps.length,
|
|
71
|
+
failedAtStep: i,
|
|
72
|
+
error: `Verification failed at step ${i}: ${step.description ?? step.action}`,
|
|
73
|
+
durationMs: Date.now() - start,
|
|
74
|
+
};
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
// Small delay between steps for UI to settle
|
|
78
|
+
await sleep(STEP_DELAY_MS);
|
|
79
|
+
}
|
|
80
|
+
catch (err) {
|
|
81
|
+
if (step.optional) {
|
|
82
|
+
stepsCompleted++;
|
|
83
|
+
if (options.onStep) {
|
|
84
|
+
options.onStep(i, step, `Skipped (optional): ${err instanceof Error ? err.message : String(err)}`);
|
|
85
|
+
}
|
|
86
|
+
continue;
|
|
87
|
+
}
|
|
88
|
+
return {
|
|
89
|
+
playbook: playbook.id,
|
|
90
|
+
success: false,
|
|
91
|
+
stepsCompleted,
|
|
92
|
+
totalSteps: playbook.steps.length,
|
|
93
|
+
failedAtStep: i,
|
|
94
|
+
error: err instanceof Error ? err.message : String(err),
|
|
95
|
+
durationMs: Date.now() - start,
|
|
96
|
+
};
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
return {
|
|
100
|
+
playbook: playbook.id,
|
|
101
|
+
success: true,
|
|
102
|
+
stepsCompleted,
|
|
103
|
+
totalSteps: playbook.steps.length,
|
|
104
|
+
failedAtStep: -1,
|
|
105
|
+
durationMs: Date.now() - start,
|
|
106
|
+
};
|
|
107
|
+
}
|
|
108
|
+
/**
|
|
109
|
+
* Execute a single playbook step.
|
|
110
|
+
*/
|
|
111
|
+
async executeStep(sessionId, step, cdpPort) {
|
|
112
|
+
const target = this.resolveTarget(step.target);
|
|
113
|
+
switch (step.action) {
|
|
114
|
+
case "navigate": {
|
|
115
|
+
if (!step.url)
|
|
116
|
+
throw new Error("navigate step missing url");
|
|
117
|
+
const r = await this.runtime.navigate({ sessionId, url: step.url });
|
|
118
|
+
if (!r.ok)
|
|
119
|
+
throw new Error(r.error.message);
|
|
120
|
+
return `Navigated to ${step.url}`;
|
|
121
|
+
}
|
|
122
|
+
case "press": {
|
|
123
|
+
if (!target)
|
|
124
|
+
throw new Error("press step missing target");
|
|
125
|
+
const r = await this.runtime.press({ sessionId, target });
|
|
126
|
+
if (!r.ok)
|
|
127
|
+
throw new Error(r.error.message);
|
|
128
|
+
return `Pressed ${JSON.stringify(step.target)}`;
|
|
129
|
+
}
|
|
130
|
+
case "type_into": {
|
|
131
|
+
if (!step.text)
|
|
132
|
+
throw new Error("type_into step missing text");
|
|
133
|
+
if (target) {
|
|
134
|
+
const r = await this.runtime.typeInto({ sessionId, target, text: step.text });
|
|
135
|
+
if (!r.ok)
|
|
136
|
+
throw new Error(r.error.message);
|
|
137
|
+
return `Typed "${step.text}" into ${JSON.stringify(step.target)}`;
|
|
138
|
+
}
|
|
139
|
+
// No target — type into focused element character by character via key events
|
|
140
|
+
for (const char of step.text) {
|
|
141
|
+
const r = await this.runtime.keyCombo({ sessionId, keys: [char] });
|
|
142
|
+
if (!r.ok)
|
|
143
|
+
throw new Error(r.error?.message ?? "key event failed");
|
|
144
|
+
}
|
|
145
|
+
return `Typed "${step.text}" into focused element`;
|
|
146
|
+
}
|
|
147
|
+
case "extract": {
|
|
148
|
+
if (!target)
|
|
149
|
+
throw new Error("extract step missing target");
|
|
150
|
+
const r = await this.runtime.extract({
|
|
151
|
+
sessionId,
|
|
152
|
+
target,
|
|
153
|
+
format: step.format ?? "text",
|
|
154
|
+
});
|
|
155
|
+
if (!r.ok)
|
|
156
|
+
throw new Error(r.error.message);
|
|
157
|
+
return `Extracted: ${JSON.stringify(r.data).slice(0, 200)}`;
|
|
158
|
+
}
|
|
159
|
+
case "key":
|
|
160
|
+
case "key_combo": {
|
|
161
|
+
if (!step.keys || step.keys.length === 0)
|
|
162
|
+
throw new Error(`${step.action} step missing keys`);
|
|
163
|
+
const r = await this.runtime.keyCombo({ sessionId, keys: step.keys });
|
|
164
|
+
if (!r.ok)
|
|
165
|
+
throw new Error(r.error.message);
|
|
166
|
+
return `${step.action === "key" ? "Key" : "Key combo"}: ${step.keys.join("+")}`;
|
|
167
|
+
}
|
|
168
|
+
case "menu_click": {
|
|
169
|
+
if (!step.menuPath || step.menuPath.length === 0)
|
|
170
|
+
throw new Error("menu_click step missing menuPath");
|
|
171
|
+
const r = await this.runtime.menuClick({ sessionId, menuPath: step.menuPath });
|
|
172
|
+
if (!r.ok)
|
|
173
|
+
throw new Error(r.error.message);
|
|
174
|
+
return `Menu click: ${step.menuPath.join(" > ")}`;
|
|
175
|
+
}
|
|
176
|
+
case "scroll": {
|
|
177
|
+
const input = {
|
|
178
|
+
sessionId,
|
|
179
|
+
direction: step.direction ?? "down",
|
|
180
|
+
};
|
|
181
|
+
if (step.amount != null)
|
|
182
|
+
input.amount = step.amount;
|
|
183
|
+
const r = await this.runtime.scroll(input);
|
|
184
|
+
if (!r.ok)
|
|
185
|
+
throw new Error(r.error.message);
|
|
186
|
+
return `Scrolled ${step.direction ?? "down"}`;
|
|
187
|
+
}
|
|
188
|
+
case "wait": {
|
|
189
|
+
await sleep(step.ms ?? 1000);
|
|
190
|
+
return `Waited ${step.ms ?? 1000}ms`;
|
|
191
|
+
}
|
|
192
|
+
case "screenshot": {
|
|
193
|
+
const r = await this.runtime.screenshot({ sessionId });
|
|
194
|
+
if (!r.ok)
|
|
195
|
+
throw new Error(r.error.message);
|
|
196
|
+
return `Screenshot taken`;
|
|
197
|
+
}
|
|
198
|
+
case "browser_js": {
|
|
199
|
+
if (!step.code)
|
|
200
|
+
throw new Error("browser_js step missing code");
|
|
201
|
+
if (!this.cdpConnect)
|
|
202
|
+
throw new Error("browser_js requires CDP — call setCDPConnect() first");
|
|
203
|
+
const client = await this.cdpConnect(cdpPort);
|
|
204
|
+
try {
|
|
205
|
+
const result = await client.Runtime.evaluate({
|
|
206
|
+
expression: step.code,
|
|
207
|
+
awaitPromise: true,
|
|
208
|
+
returnByValue: true,
|
|
209
|
+
});
|
|
210
|
+
if (result.exceptionDetails) {
|
|
211
|
+
throw new Error(`JS Error: ${result.exceptionDetails.text ?? result.exceptionDetails.exception?.description ?? "unknown"}`);
|
|
212
|
+
}
|
|
213
|
+
const val = result.result?.value;
|
|
214
|
+
return `browser_js: ${typeof val === "object" ? JSON.stringify(val) : String(val ?? "undefined")}`;
|
|
215
|
+
}
|
|
216
|
+
finally {
|
|
217
|
+
await client.close();
|
|
218
|
+
}
|
|
219
|
+
}
|
|
220
|
+
case "browser_click":
|
|
221
|
+
case "browser_human_click": {
|
|
222
|
+
const selector = this.getBrowserSelector(step);
|
|
223
|
+
if (!this.cdpConnect)
|
|
224
|
+
throw new Error(`${step.action} requires CDP — call setCDPConnect() first`);
|
|
225
|
+
const client = await this.cdpConnect(cdpPort);
|
|
226
|
+
try {
|
|
227
|
+
const point = await this.resolveBrowserClickPoint(client, selector);
|
|
228
|
+
await this.dispatchMouseClick(client, point.x, point.y);
|
|
229
|
+
return `${step.action}: clicked ${selector}`;
|
|
230
|
+
}
|
|
231
|
+
finally {
|
|
232
|
+
await client.close();
|
|
233
|
+
}
|
|
234
|
+
}
|
|
235
|
+
case "browser_type": {
|
|
236
|
+
const selector = this.getBrowserSelector(step);
|
|
237
|
+
if (!step.text)
|
|
238
|
+
throw new Error("browser_type step missing text");
|
|
239
|
+
if (!this.cdpConnect)
|
|
240
|
+
throw new Error("browser_type requires CDP — call setCDPConnect() first");
|
|
241
|
+
const client = await this.cdpConnect(cdpPort);
|
|
242
|
+
try {
|
|
243
|
+
await this.focusBrowserElement(client, selector);
|
|
244
|
+
const shouldClear = step.text !== undefined;
|
|
245
|
+
if (shouldClear) {
|
|
246
|
+
await this.dispatchSelectAll(client);
|
|
247
|
+
await this.dispatchKey(client, "Backspace", "Backspace");
|
|
248
|
+
await sleep(50);
|
|
249
|
+
}
|
|
250
|
+
for (const char of step.text) {
|
|
251
|
+
await this.dispatchTextChar(client, char);
|
|
252
|
+
await sleep(50);
|
|
253
|
+
}
|
|
254
|
+
return `browser_type: typed ${step.text.length} chars into ${selector}`;
|
|
255
|
+
}
|
|
256
|
+
finally {
|
|
257
|
+
await client.close();
|
|
258
|
+
}
|
|
259
|
+
}
|
|
260
|
+
case "cdp_key_event": {
|
|
261
|
+
if (!step.keyEvent)
|
|
262
|
+
throw new Error("cdp_key_event step missing keyEvent");
|
|
263
|
+
if (!this.cdpConnect)
|
|
264
|
+
throw new Error("cdp_key_event requires CDP — call setCDPConnect() first");
|
|
265
|
+
const client = await this.cdpConnect(cdpPort);
|
|
266
|
+
try {
|
|
267
|
+
const { key, code, modifiers, windowsVirtualKeyCode } = step.keyEvent;
|
|
268
|
+
const baseParams = { key, code, modifiers: modifiers ?? 0, windowsVirtualKeyCode: windowsVirtualKeyCode ?? 0, nativeVirtualKeyCode: windowsVirtualKeyCode ?? 0 };
|
|
269
|
+
await client.Input.dispatchKeyEvent({ type: "keyDown", ...baseParams });
|
|
270
|
+
await client.Input.dispatchKeyEvent({ type: "keyUp", ...baseParams });
|
|
271
|
+
return `cdp_key_event: ${modifiers ? `mod${modifiers}+` : ""}${key}`;
|
|
272
|
+
}
|
|
273
|
+
finally {
|
|
274
|
+
await client.close();
|
|
275
|
+
}
|
|
276
|
+
}
|
|
277
|
+
default:
|
|
278
|
+
throw new Error(`Unknown action: ${step.action}`);
|
|
279
|
+
}
|
|
280
|
+
}
|
|
281
|
+
/**
|
|
282
|
+
* Substitute {VAR_NAME} placeholders in step string fields with actual values.
|
|
283
|
+
*/
|
|
284
|
+
substituteVars(step, vars) {
|
|
285
|
+
const sub = (s) => {
|
|
286
|
+
let result = s;
|
|
287
|
+
for (const [key, val] of Object.entries(vars)) {
|
|
288
|
+
result = result.replaceAll(`{${key}}`, val);
|
|
289
|
+
}
|
|
290
|
+
return result;
|
|
291
|
+
};
|
|
292
|
+
const result = { ...step };
|
|
293
|
+
if (result.code)
|
|
294
|
+
result.code = sub(result.code);
|
|
295
|
+
if (result.text)
|
|
296
|
+
result.text = sub(result.text);
|
|
297
|
+
if (result.url)
|
|
298
|
+
result.url = sub(result.url);
|
|
299
|
+
if (result.description)
|
|
300
|
+
result.description = sub(result.description);
|
|
301
|
+
if (result.verify)
|
|
302
|
+
result.verify = sub(result.verify);
|
|
303
|
+
if (result.menuPath)
|
|
304
|
+
result.menuPath = result.menuPath.map(sub);
|
|
305
|
+
return result;
|
|
306
|
+
}
|
|
307
|
+
/**
|
|
308
|
+
* Verify a step's postcondition via CSS selector check.
|
|
309
|
+
*/
|
|
310
|
+
async verifyStep(sessionId, step) {
|
|
311
|
+
if (!step.verify)
|
|
312
|
+
return true;
|
|
313
|
+
const timeout = step.verifyTimeoutMs ?? DEFAULT_VERIFY_TIMEOUT;
|
|
314
|
+
const r = await this.runtime.waitFor({
|
|
315
|
+
sessionId,
|
|
316
|
+
condition: { type: "selector_visible", selector: step.verify },
|
|
317
|
+
timeoutMs: timeout,
|
|
318
|
+
});
|
|
319
|
+
return r.ok && r.data.matched;
|
|
320
|
+
}
|
|
321
|
+
/**
|
|
322
|
+
* Dismiss a popup detected by the observer daemon.
|
|
323
|
+
* Reads observer state, if popup found, sends the appropriate dismiss action.
|
|
324
|
+
* Non-fatal — if observer isn't running or no popup, silently returns.
|
|
325
|
+
*/
|
|
326
|
+
async dismissPopupIfPresent(sessionId) {
|
|
327
|
+
let popup;
|
|
328
|
+
try {
|
|
329
|
+
popup = getObserverPopup();
|
|
330
|
+
}
|
|
331
|
+
catch {
|
|
332
|
+
return; // Observer not running or state unreadable
|
|
333
|
+
}
|
|
334
|
+
if (!popup)
|
|
335
|
+
return;
|
|
336
|
+
try {
|
|
337
|
+
switch (popup.dismissAction) {
|
|
338
|
+
case "press_escape":
|
|
339
|
+
await this.runtime.keyCombo({ sessionId, keys: ["escape"] });
|
|
340
|
+
break;
|
|
341
|
+
case "click_ok":
|
|
342
|
+
case "click_cancel":
|
|
343
|
+
case "click_close":
|
|
344
|
+
case "click_allow":
|
|
345
|
+
case "click_deny": {
|
|
346
|
+
// Map action to button text
|
|
347
|
+
const buttonMap = {
|
|
348
|
+
click_ok: "OK",
|
|
349
|
+
click_cancel: "Cancel",
|
|
350
|
+
click_close: "Close",
|
|
351
|
+
click_allow: "Allow",
|
|
352
|
+
click_deny: "Don't Allow",
|
|
353
|
+
};
|
|
354
|
+
const buttonText = buttonMap[popup.dismissAction] ?? "OK";
|
|
355
|
+
// Try to click the button by text
|
|
356
|
+
await this.runtime.press({ sessionId, target: { type: "text", value: buttonText } });
|
|
357
|
+
break;
|
|
358
|
+
}
|
|
359
|
+
case "unknown":
|
|
360
|
+
break; // Don't auto-dismiss unknown popups
|
|
361
|
+
}
|
|
362
|
+
// Wait briefly for popup to close
|
|
363
|
+
await sleep(500);
|
|
364
|
+
}
|
|
365
|
+
catch {
|
|
366
|
+
// Popup dismiss failed — non-fatal, continue with step
|
|
367
|
+
}
|
|
368
|
+
}
|
|
369
|
+
/**
|
|
370
|
+
* Resolve an OCR text target to screen coordinates using observer state.
|
|
371
|
+
* Returns center coordinates of the matched text + offsets, or null if not found.
|
|
372
|
+
*/
|
|
373
|
+
resolveOcrTarget(searchText, offsetX, offsetY) {
|
|
374
|
+
let state;
|
|
375
|
+
try {
|
|
376
|
+
state = readObserverState();
|
|
377
|
+
}
|
|
378
|
+
catch {
|
|
379
|
+
return null;
|
|
380
|
+
}
|
|
381
|
+
if (!state?.running || !state.lastFrame?.ocrText)
|
|
382
|
+
return null;
|
|
383
|
+
// Simple text search in OCR output
|
|
384
|
+
// The native OCR (vision.ocr) returns bounding boxes when available.
|
|
385
|
+
// For now we use a fallback: if the observer has the text, we know
|
|
386
|
+
// the element is visible. The caller should provide approximate
|
|
387
|
+
// coordinates via offsetX/offsetY relative to a known anchor.
|
|
388
|
+
const ocrText = state.lastFrame.ocrText;
|
|
389
|
+
if (!ocrText.toLowerCase().includes(searchText.toLowerCase())) {
|
|
390
|
+
return null; // Text not found on screen
|
|
391
|
+
}
|
|
392
|
+
// Text found — return offset coordinates (caller provides absolute offsets
|
|
393
|
+
// or relative to screen center as a basic heuristic)
|
|
394
|
+
if (offsetX !== 0 || offsetY !== 0) {
|
|
395
|
+
return { x: offsetX, y: offsetY };
|
|
396
|
+
}
|
|
397
|
+
// No explicit coordinates — can't determine position from plain OCR text alone
|
|
398
|
+
return null;
|
|
399
|
+
}
|
|
400
|
+
/**
|
|
401
|
+
* Convert playbook target format to runtime Target format.
|
|
402
|
+
*/
|
|
403
|
+
resolveTarget(target) {
|
|
404
|
+
if (!target)
|
|
405
|
+
return undefined;
|
|
406
|
+
if (typeof target === "string") {
|
|
407
|
+
// CSS selector if starts with common patterns, else treat as text
|
|
408
|
+
if (target.startsWith("[") || target.startsWith("#") || target.startsWith(".") || target.startsWith("css=")) {
|
|
409
|
+
return { type: "selector", value: target.replace(/^css=/, "") };
|
|
410
|
+
}
|
|
411
|
+
return { type: "text", value: target };
|
|
412
|
+
}
|
|
413
|
+
if ("selector" in target) {
|
|
414
|
+
return { type: "selector", value: target.selector };
|
|
415
|
+
}
|
|
416
|
+
if ("x" in target && "y" in target) {
|
|
417
|
+
return { type: "coordinates", x: target.x, y: target.y };
|
|
418
|
+
}
|
|
419
|
+
return undefined;
|
|
420
|
+
}
|
|
421
|
+
getBrowserSelector(step) {
|
|
422
|
+
if (typeof step.target === "string")
|
|
423
|
+
return step.target;
|
|
424
|
+
if (step.target && "selector" in step.target)
|
|
425
|
+
return step.target.selector;
|
|
426
|
+
if (step.verify)
|
|
427
|
+
return step.verify;
|
|
428
|
+
throw new Error(`${step.action} step missing selector target`);
|
|
429
|
+
}
|
|
430
|
+
async focusBrowserElement(client, selector) {
|
|
431
|
+
const result = await client.Runtime.evaluate({
|
|
432
|
+
expression: `(() => {
|
|
433
|
+
const el = document.querySelector(${JSON.stringify(selector)});
|
|
434
|
+
if (!(el instanceof HTMLElement)) return { ok: false, reason: "Element not found: ${selector.replace(/"/g, '\\"')}" };
|
|
435
|
+
el.scrollIntoView({ block: "center" });
|
|
436
|
+
el.focus();
|
|
437
|
+
return { ok: true };
|
|
438
|
+
})()`,
|
|
439
|
+
returnByValue: true,
|
|
440
|
+
});
|
|
441
|
+
const value = result.result?.value;
|
|
442
|
+
if (!value?.ok) {
|
|
443
|
+
throw new Error(value?.reason || `Element not found: ${selector}`);
|
|
444
|
+
}
|
|
445
|
+
}
|
|
446
|
+
async resolveBrowserClickPoint(client, selector) {
|
|
447
|
+
const result = await client.Runtime.evaluate({
|
|
448
|
+
expression: `(() => {
|
|
449
|
+
const el = document.querySelector(${JSON.stringify(selector)});
|
|
450
|
+
if (!(el instanceof HTMLElement)) return { ok: false, reason: "Element not found: ${selector.replace(/"/g, '\\"')}" };
|
|
451
|
+
el.scrollIntoView({ block: "center" });
|
|
452
|
+
const r = el.getBoundingClientRect();
|
|
453
|
+
return { ok: true, x: r.x + r.width / 2, y: r.y + r.height / 2 };
|
|
454
|
+
})()`,
|
|
455
|
+
returnByValue: true,
|
|
456
|
+
});
|
|
457
|
+
const value = result.result?.value;
|
|
458
|
+
if (!value?.ok) {
|
|
459
|
+
throw new Error(value?.reason || `Element not found: ${selector}`);
|
|
460
|
+
}
|
|
461
|
+
return { x: value.x, y: value.y };
|
|
462
|
+
}
|
|
463
|
+
async dispatchMouseClick(client, x, y) {
|
|
464
|
+
await client.Input.dispatchMouseEvent({ type: "mouseMoved", x, y });
|
|
465
|
+
await sleep(40);
|
|
466
|
+
await client.Input.dispatchMouseEvent({ type: "mousePressed", x, y, button: "left", clickCount: 1 });
|
|
467
|
+
await sleep(40);
|
|
468
|
+
await client.Input.dispatchMouseEvent({ type: "mouseReleased", x, y, button: "left", clickCount: 1 });
|
|
469
|
+
}
|
|
470
|
+
async dispatchSelectAll(client) {
|
|
471
|
+
const metaModifier = process.platform === "darwin" ? 4 : 2;
|
|
472
|
+
await client.Input.dispatchKeyEvent({ type: "keyDown", key: "a", code: "KeyA", modifiers: metaModifier });
|
|
473
|
+
await client.Input.dispatchKeyEvent({ type: "keyUp", key: "a", code: "KeyA", modifiers: metaModifier });
|
|
474
|
+
}
|
|
475
|
+
async dispatchKey(client, key, code) {
|
|
476
|
+
await client.Input.dispatchKeyEvent({ type: "keyDown", key, code });
|
|
477
|
+
await client.Input.dispatchKeyEvent({ type: "keyUp", key, code });
|
|
478
|
+
}
|
|
479
|
+
async dispatchTextChar(client, char) {
|
|
480
|
+
await client.Input.dispatchKeyEvent({ type: "keyDown", text: char, key: char, unmodifiedText: char });
|
|
481
|
+
await client.Input.dispatchKeyEvent({ type: "keyUp", text: char, key: char, unmodifiedText: char });
|
|
482
|
+
}
|
|
483
|
+
}
|
|
484
|
+
function sleep(ms) {
|
|
485
|
+
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
486
|
+
}
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
// Copyright (C) 2025 Clazro Technology Private Limited
|
|
2
|
+
// SPDX-License-Identifier: AGPL-3.0-only
|
|
3
|
+
//
|
|
4
|
+
// This file is part of ScreenHand.
|
|
5
|
+
//
|
|
6
|
+
// ScreenHand is free software: you can redistribute it and/or modify
|
|
7
|
+
// it under the terms of the GNU Affero General Public License as
|
|
8
|
+
// published by the Free Software Foundation, version 3.
|
|
9
|
+
//
|
|
10
|
+
// ScreenHand is distributed in the hope that it will be useful,
|
|
11
|
+
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
12
|
+
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
13
|
+
// GNU Affero General Public License for more details.
|
|
14
|
+
//
|
|
15
|
+
// You should have received a copy of the GNU Affero General Public License
|
|
16
|
+
// along with ScreenHand. If not, see <https://www.gnu.org/licenses/>.
|
|
17
|
+
export { PlaybookEngine } from "./engine.js";
|
|
18
|
+
export { PlaybookStore } from "./store.js";
|
|
19
|
+
export { PlaybookRunner } from "./runner.js";
|
|
20
|
+
export { PlaybookRecorder } from "./recorder.js";
|