screenhand 0.2.0 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +165 -446
- package/bin/darwin-arm64/macos-bridge +0 -0
- package/dist/mcp-desktop.js +3615 -400
- package/dist/scripts/export-help-center.js +112 -0
- package/dist/scripts/marketing-loop.js +117 -0
- package/dist/scripts/observer-daemon.js +288 -0
- package/dist/scripts/orchestrator-daemon.js +399 -0
- package/dist/scripts/threads-campaign.js +208 -0
- package/dist/src/community/fetcher.js +109 -0
- package/dist/src/community/index.js +6 -0
- package/dist/src/community/publisher.js +191 -0
- package/dist/src/community/remote-api.js +121 -0
- package/dist/src/community/types.js +3 -0
- package/dist/src/community/validator.js +95 -0
- package/dist/src/context-tracker.js +489 -0
- package/dist/src/ingestion/coverage-auditor.js +233 -0
- package/dist/src/ingestion/doc-parser.js +164 -0
- package/dist/src/ingestion/index.js +8 -0
- package/dist/src/ingestion/menu-scanner.js +152 -0
- package/dist/src/ingestion/reference-merger.js +186 -0
- package/dist/src/ingestion/shortcut-extractor.js +180 -0
- package/dist/src/ingestion/tutorial-extractor.js +170 -0
- package/dist/src/ingestion/types.js +3 -0
- package/dist/src/jobs/manager.js +82 -14
- package/dist/src/jobs/runner.js +138 -15
- package/dist/src/learning/engine.js +356 -0
- package/dist/src/learning/index.js +9 -0
- package/dist/src/learning/locator-policy.js +120 -0
- package/dist/src/learning/pattern-policy.js +89 -0
- package/dist/src/learning/recovery-policy.js +116 -0
- package/dist/src/learning/sensor-policy.js +115 -0
- package/dist/src/learning/timing-model.js +204 -0
- package/dist/src/learning/topology-policy.js +90 -0
- package/dist/src/learning/types.js +9 -0
- package/dist/src/logging/timeline-logger.js +4 -1
- package/dist/src/memory/playbook-seeds.js +200 -0
- package/dist/src/memory/recall.js +60 -8
- package/dist/src/memory/service.js +30 -5
- package/dist/src/memory/store.js +34 -5
- package/dist/src/native/bridge-client.js +253 -31
- package/dist/src/observer/state.js +199 -0
- package/dist/src/observer/types.js +43 -0
- package/dist/src/orchestrator/state.js +68 -0
- package/dist/src/orchestrator/types.js +22 -0
- package/dist/src/perception/ax-source.js +162 -0
- package/dist/src/perception/cdp-source.js +162 -0
- package/dist/src/perception/coordinator.js +771 -0
- package/dist/src/perception/frame-differ.js +287 -0
- package/dist/src/perception/index.js +22 -0
- package/dist/src/perception/manager.js +199 -0
- package/dist/src/perception/types.js +47 -0
- package/dist/src/perception/vision-source.js +399 -0
- package/dist/src/planner/deterministic.js +298 -0
- package/dist/src/planner/executor.js +870 -0
- package/dist/src/planner/goal-store.js +92 -0
- package/dist/src/planner/index.js +21 -0
- package/dist/src/planner/planner.js +520 -0
- package/dist/src/planner/tool-registry.js +71 -0
- package/dist/src/planner/types.js +22 -0
- package/dist/src/platform/explorer.js +213 -0
- package/dist/src/platform/help-center-markdown.js +527 -0
- package/dist/src/platform/learner.js +257 -0
- package/dist/src/playbook/engine.js +296 -11
- package/dist/src/playbook/mcp-recorder.js +204 -0
- package/dist/src/playbook/recorder.js +3 -2
- package/dist/src/playbook/runner.js +1 -1
- package/dist/src/playbook/store.js +139 -10
- package/dist/src/recovery/detectors.js +156 -0
- package/dist/src/recovery/engine.js +327 -0
- package/dist/src/recovery/index.js +20 -0
- package/dist/src/recovery/strategies.js +274 -0
- package/dist/src/recovery/types.js +20 -0
- package/dist/src/runtime/accessibility-adapter.js +55 -18
- package/dist/src/runtime/applescript-adapter.js +8 -2
- package/dist/src/runtime/cdp-chrome-adapter.js +1 -1
- package/dist/src/runtime/executor.js +23 -3
- package/dist/src/runtime/locator-cache.js +24 -2
- package/dist/src/runtime/service.js +59 -15
- package/dist/src/runtime/session-manager.js +4 -1
- package/dist/src/runtime/vision-adapter.js +2 -1
- package/dist/src/state/app-map-types.js +72 -0
- package/dist/src/state/app-map.js +1974 -0
- package/dist/src/state/entity-tracker.js +108 -0
- package/dist/src/state/fusion.js +96 -0
- package/dist/src/state/index.js +21 -0
- package/dist/src/state/ladder-generator.js +236 -0
- package/dist/src/state/persistence.js +156 -0
- package/dist/src/state/types.js +17 -0
- package/dist/src/state/world-model.js +1456 -0
- package/dist/src/util/atomic-write.js +19 -4
- package/dist/src/util/sanitize.js +146 -0
- package/dist-app-maps/com.figma.Desktop.json +959 -0
- package/dist-app-maps/com.hnc.Discord.json +1146 -0
- package/dist-app-maps/notion.id.json +2831 -0
- package/dist-playbooks/canva-screenhand-carousel.json +445 -0
- package/dist-playbooks/codex-desktop.json +76 -0
- package/dist-playbooks/competitor-research-stack.json +122 -0
- package/dist-playbooks/davinci-color-grade.json +153 -0
- package/dist-playbooks/davinci-edit-timeline.json +162 -0
- package/dist-playbooks/davinci-render.json +114 -0
- package/dist-playbooks/devto.json +52 -0
- package/dist-playbooks/discord.json +41 -0
- package/dist-playbooks/google-flow-create-project.json +59 -0
- package/dist-playbooks/google-flow-edit-image.json +90 -0
- package/dist-playbooks/google-flow-edit-video.json +90 -0
- package/dist-playbooks/google-flow-generate-image.json +68 -0
- package/dist-playbooks/google-flow-generate-video.json +191 -0
- package/dist-playbooks/google-flow-open-project.json +48 -0
- package/dist-playbooks/google-flow-open-scenebuilder.json +64 -0
- package/dist-playbooks/google-flow-search-assets.json +64 -0
- package/dist-playbooks/instagram.json +57 -0
- package/dist-playbooks/linkedin.json +52 -0
- package/dist-playbooks/n8n.json +43 -0
- package/dist-playbooks/reddit.json +52 -0
- package/dist-playbooks/threads.json +59 -0
- package/dist-playbooks/x-twitter.json +59 -0
- package/dist-playbooks/youtube.json +59 -0
- package/dist-references/canva.json +646 -0
- package/dist-references/codex-desktop.json +305 -0
- package/dist-references/davinci-resolve-keyboard.json +594 -0
- package/dist-references/davinci-resolve-menu-map.json +1139 -0
- package/dist-references/davinci-resolve-menus-batch1.json +116 -0
- package/dist-references/davinci-resolve-menus-batch2.json +372 -0
- package/dist-references/davinci-resolve-menus-batch3.json +330 -0
- package/dist-references/davinci-resolve-menus-batch4.json +297 -0
- package/dist-references/davinci-resolve-shortcuts.json +333 -0
- package/dist-references/devpost.json +186 -0
- package/dist-references/devto.json +317 -0
- package/dist-references/discord.json +549 -0
- package/dist-references/figma.json +1186 -0
- package/dist-references/finder.json +146 -0
- package/dist-references/google-ads-transparency.json +95 -0
- package/dist-references/google-flow.json +649 -0
- package/dist-references/instagram.json +341 -0
- package/dist-references/linkedin.json +324 -0
- package/dist-references/meta-ad-library.json +86 -0
- package/dist-references/n8n.json +387 -0
- package/dist-references/notes.json +27 -0
- package/dist-references/notion.json +163 -0
- package/dist-references/reddit.json +341 -0
- package/dist-references/threads.json +337 -0
- package/dist-references/x-twitter.json +403 -0
- package/dist-references/youtube.json +373 -0
- package/native/macos-bridge/Package.swift +22 -0
- package/native/macos-bridge/Sources/AccessibilityBridge.swift +482 -0
- package/native/macos-bridge/Sources/AppManagement.swift +339 -0
- package/native/macos-bridge/Sources/CoreGraphicsBridge.swift +537 -0
- package/native/macos-bridge/Sources/ObserverBridge.swift +120 -0
- package/native/macos-bridge/Sources/StreamCapture.swift +136 -0
- package/native/macos-bridge/Sources/VisionBridge.swift +238 -0
- package/native/macos-bridge/Sources/main.swift +498 -0
- package/native/windows-bridge/AppManagement.cs +234 -0
- package/native/windows-bridge/InputBridge.cs +436 -0
- package/native/windows-bridge/Program.cs +270 -0
- package/native/windows-bridge/ScreenCapture.cs +453 -0
- package/native/windows-bridge/UIAutomationBridge.cs +571 -0
- package/native/windows-bridge/WindowsBridge.csproj +17 -0
- package/package.json +12 -1
- package/scripts/postinstall.cjs +127 -0
- package/dist/.audit-log.jsonl +0 -55
- package/dist/.screenhand/memory/.lock +0 -1
- package/dist/.screenhand/memory/actions.jsonl +0 -85
- package/dist/.screenhand/memory/errors.jsonl +0 -5
- package/dist/.screenhand/memory/errors.jsonl.bak +0 -4
- package/dist/.screenhand/memory/state.json +0 -35
- package/dist/.screenhand/memory/state.json.bak +0 -35
- package/dist/.screenhand/memory/strategies.jsonl +0 -12
- package/dist/agent/cli.js +0 -73
- package/dist/agent/loop.js +0 -258
- package/dist/config.js +0 -9
- package/dist/index.js +0 -56
- package/dist/logging/timeline-logger.js +0 -29
- package/dist/mcp/mcp-stdio-server.js +0 -448
- package/dist/mcp/server.js +0 -347
- package/dist/mcp-entry.js +0 -59
- package/dist/memory/recall.js +0 -160
- package/dist/memory/research.js +0 -98
- package/dist/memory/seeds.js +0 -89
- package/dist/memory/session.js +0 -161
- package/dist/memory/store.js +0 -391
- package/dist/memory/types.js +0 -4
- package/dist/monitor/codex-monitor.js +0 -377
- package/dist/monitor/task-queue.js +0 -84
- package/dist/monitor/types.js +0 -49
- package/dist/native/bridge-client.js +0 -174
- package/dist/native/macos-bridge-client.js +0 -5
- package/dist/npm-publish-helper.js +0 -117
- package/dist/npm-token-cdp.js +0 -113
- package/dist/npm-token-create.js +0 -135
- package/dist/npm-token-finish.js +0 -126
- package/dist/playbook/engine.js +0 -193
- package/dist/playbook/index.js +0 -4
- package/dist/playbook/recorder.js +0 -519
- package/dist/playbook/runner.js +0 -392
- package/dist/playbook/store.js +0 -166
- package/dist/playbook/types.js +0 -4
- package/dist/runtime/accessibility-adapter.js +0 -377
- package/dist/runtime/app-adapter.js +0 -48
- package/dist/runtime/applescript-adapter.js +0 -283
- package/dist/runtime/ax-role-map.js +0 -80
- package/dist/runtime/browser-adapter.js +0 -36
- package/dist/runtime/cdp-chrome-adapter.js +0 -505
- package/dist/runtime/composite-adapter.js +0 -205
- package/dist/runtime/executor.js +0 -250
- package/dist/runtime/locator-cache.js +0 -12
- package/dist/runtime/planning-loop.js +0 -47
- package/dist/runtime/service.js +0 -372
- package/dist/runtime/session-manager.js +0 -28
- package/dist/runtime/state-observer.js +0 -105
- package/dist/runtime/vision-adapter.js +0 -208
- package/dist/test-mcp-protocol.js +0 -138
- package/dist/types.js +0 -1
|
@@ -0,0 +1,257 @@
|
|
|
1
|
+
// Copyright (C) 2025 Clazro Technology Private Limited
|
|
2
|
+
// SPDX-License-Identifier: AGPL-3.0-only
|
|
3
|
+
/**
|
|
4
|
+
* PlatformLearner — scrape official docs, help center, shortcuts for a platform.
|
|
5
|
+
*
|
|
6
|
+
* Crawls documentation pages via CDP, extracts structured data,
|
|
7
|
+
* and saves as a reference JSON.
|
|
8
|
+
*/
|
|
9
|
+
import fs from "node:fs";
|
|
10
|
+
import path from "node:path";
|
|
11
|
+
import { writeFileAtomicSync } from "../util/atomic-write.js";
|
|
12
|
+
/** Common URL patterns for platform documentation */
|
|
13
|
+
export function buildDocUrls(platform, rootUrl) {
|
|
14
|
+
const base = rootUrl ?? `https://${platform}.com`;
|
|
15
|
+
const origin = base.replace(/\/$/, "");
|
|
16
|
+
return [
|
|
17
|
+
origin,
|
|
18
|
+
`${origin}/help`,
|
|
19
|
+
`${origin}/support`,
|
|
20
|
+
`${origin}/docs`,
|
|
21
|
+
`${origin}/keyboard-shortcuts`,
|
|
22
|
+
`${origin}/shortcuts`,
|
|
23
|
+
`https://help.${platform}.com`,
|
|
24
|
+
`https://support.${platform}.com`,
|
|
25
|
+
`https://docs.${platform}.com`,
|
|
26
|
+
`${origin}/developers`,
|
|
27
|
+
`${origin}/api`,
|
|
28
|
+
`${origin}/changelog`,
|
|
29
|
+
`${origin}/whats-new`,
|
|
30
|
+
];
|
|
31
|
+
}
|
|
32
|
+
/** Extract keyboard shortcuts from a page */
|
|
33
|
+
export async function extractShortcuts(cdpEvaluate) {
|
|
34
|
+
const result = await cdpEvaluate(`(() => {
|
|
35
|
+
const shortcuts = {};
|
|
36
|
+
// Look for common shortcut table patterns
|
|
37
|
+
const tables = document.querySelectorAll('table');
|
|
38
|
+
for (const table of tables) {
|
|
39
|
+
const rows = table.querySelectorAll('tr');
|
|
40
|
+
for (const row of rows) {
|
|
41
|
+
const cells = row.querySelectorAll('td, th');
|
|
42
|
+
if (cells.length >= 2) {
|
|
43
|
+
const text0 = (cells[0].textContent || '').trim();
|
|
44
|
+
const text1 = (cells[1].textContent || '').trim();
|
|
45
|
+
// Check if either cell contains key combos
|
|
46
|
+
if (text0.match(/[⌘⌥⇧⌃]|ctrl|cmd|alt|shift/i) || text1.match(/[⌘⌥⇧⌃]|ctrl|cmd|alt|shift/i)) {
|
|
47
|
+
shortcuts[text0] = text1;
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
// Also check kbd elements
|
|
53
|
+
const kbds = document.querySelectorAll('kbd');
|
|
54
|
+
for (const kbd of kbds) {
|
|
55
|
+
const parent = kbd.closest('li, tr, p, div');
|
|
56
|
+
if (parent) {
|
|
57
|
+
const keyText = kbd.textContent.trim();
|
|
58
|
+
const descText = parent.textContent.replace(keyText, '').trim().substring(0, 80);
|
|
59
|
+
if (keyText && descText) shortcuts[keyText] = descText;
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
return shortcuts;
|
|
63
|
+
})()`);
|
|
64
|
+
return result.result?.value ?? {};
|
|
65
|
+
}
|
|
66
|
+
/** Extract page content as structured text */
|
|
67
|
+
export async function extractPageContent(cdpEvaluate) {
|
|
68
|
+
const result = await cdpEvaluate(`(() => {
|
|
69
|
+
const headings = Array.from(document.querySelectorAll('h1, h2, h3')).map(h => h.textContent.trim()).filter(Boolean);
|
|
70
|
+
const links = Array.from(document.querySelectorAll('a[href]')).slice(0, 100).map(a => ({
|
|
71
|
+
text: (a.textContent || '').trim().substring(0, 80),
|
|
72
|
+
href: a.href,
|
|
73
|
+
})).filter(l => l.text && l.href);
|
|
74
|
+
return {
|
|
75
|
+
title: document.title,
|
|
76
|
+
headings,
|
|
77
|
+
links,
|
|
78
|
+
text: document.body.innerText.substring(0, 8000),
|
|
79
|
+
};
|
|
80
|
+
})()`);
|
|
81
|
+
return result.result?.value ?? { title: "", headings: [], links: [], text: "" };
|
|
82
|
+
}
|
|
83
|
+
/** Extract interactive element selectors from a page */
|
|
84
|
+
export async function extractSelectors(cdpEvaluate) {
|
|
85
|
+
const result = await cdpEvaluate(`(() => {
|
|
86
|
+
const selectors = {};
|
|
87
|
+
const elements = document.querySelectorAll('[data-testid], [aria-label], [role="button"], [role="tab"], [role="menuitem"]');
|
|
88
|
+
for (const el of Array.from(elements).slice(0, 50)) {
|
|
89
|
+
const testId = el.getAttribute('data-testid');
|
|
90
|
+
const label = el.getAttribute('aria-label');
|
|
91
|
+
const key = testId || label || el.textContent?.trim().substring(0, 30) || '';
|
|
92
|
+
if (!key) continue;
|
|
93
|
+
|
|
94
|
+
let selector = '';
|
|
95
|
+
if (testId) selector = '[data-testid="' + testId + '"]';
|
|
96
|
+
else if (el.id) selector = '#' + el.id;
|
|
97
|
+
else if (label) selector = '[aria-label="' + label + '"]';
|
|
98
|
+
|
|
99
|
+
if (selector) selectors[key] = selector;
|
|
100
|
+
}
|
|
101
|
+
return selectors;
|
|
102
|
+
})()`);
|
|
103
|
+
return result.result?.value ?? {};
|
|
104
|
+
}
|
|
105
|
+
/** Crawl a page via CDP: navigate, wait, extract */
|
|
106
|
+
export async function crawlPage(cdpClient, url, timeoutMs = 10000) {
|
|
107
|
+
try {
|
|
108
|
+
// Navigate
|
|
109
|
+
await cdpClient.Page.navigate({ url });
|
|
110
|
+
// Wait for load
|
|
111
|
+
await new Promise((resolve) => {
|
|
112
|
+
const timer = setTimeout(resolve, timeoutMs);
|
|
113
|
+
cdpClient.Page.loadEventFired().then(() => { clearTimeout(timer); resolve(); }).catch(() => { clearTimeout(timer); resolve(); });
|
|
114
|
+
});
|
|
115
|
+
// Extra wait for SPA content
|
|
116
|
+
await new Promise(r => setTimeout(r, 2000));
|
|
117
|
+
const evaluate = async (expr) => {
|
|
118
|
+
return cdpClient.Runtime.evaluate({ expression: expr, returnByValue: true, awaitPromise: true });
|
|
119
|
+
};
|
|
120
|
+
const content = await extractPageContent(evaluate);
|
|
121
|
+
const shortcuts = await extractShortcuts(evaluate);
|
|
122
|
+
const selectors = await extractSelectors(evaluate);
|
|
123
|
+
return { success: true, content, shortcuts, selectors };
|
|
124
|
+
}
|
|
125
|
+
catch (err) {
|
|
126
|
+
return { success: false, error: err instanceof Error ? err.message : String(err) };
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
/** Compile crawl results into a learn result */
|
|
130
|
+
export function compileLearnResult(platform, crawledPages) {
|
|
131
|
+
const allShortcuts = {};
|
|
132
|
+
const allSelectors = {};
|
|
133
|
+
const features = [];
|
|
134
|
+
const tips = [];
|
|
135
|
+
const sourceUrls = [];
|
|
136
|
+
const flows = {};
|
|
137
|
+
const apiEndpoints = [];
|
|
138
|
+
const knownLimitations = [];
|
|
139
|
+
for (const page of crawledPages) {
|
|
140
|
+
sourceUrls.push(page.url);
|
|
141
|
+
if (page.shortcuts) {
|
|
142
|
+
Object.assign(allShortcuts, page.shortcuts);
|
|
143
|
+
}
|
|
144
|
+
if (page.selectors && Object.keys(page.selectors).length > 0) {
|
|
145
|
+
const pageName = page.content?.title?.replace(/[^a-zA-Z0-9]/g, "_").substring(0, 30) ?? "page";
|
|
146
|
+
allSelectors[pageName] = page.selectors;
|
|
147
|
+
}
|
|
148
|
+
if (page.content) {
|
|
149
|
+
// Extract features from headings
|
|
150
|
+
for (const h of page.content.headings) {
|
|
151
|
+
if (h.length > 3 && h.length < 80)
|
|
152
|
+
features.push(h);
|
|
153
|
+
}
|
|
154
|
+
// Look for API-related links
|
|
155
|
+
for (const link of page.content.links) {
|
|
156
|
+
if (/api|developer|endpoint|sdk|integration/i.test(link.text)) {
|
|
157
|
+
apiEndpoints.push(`${link.text}: ${link.href}`);
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
// Extract flows from numbered step sequences (e.g. "1. Click..." "2. Enter..." "3. Submit...")
|
|
161
|
+
const contentLines = page.content.text.split("\n");
|
|
162
|
+
let currentFlow = null;
|
|
163
|
+
for (let i = 0; i < contentLines.length; i++) {
|
|
164
|
+
const line = contentLines[i].trim();
|
|
165
|
+
const stepMatch = line.match(/^(\d+)[.)]\s+(.+)/);
|
|
166
|
+
if (stepMatch) {
|
|
167
|
+
const stepNum = parseInt(stepMatch[1], 10);
|
|
168
|
+
const stepText = stepMatch[2].trim();
|
|
169
|
+
if (stepNum === 1 && stepText.length > 5) {
|
|
170
|
+
// Start a new flow — use the preceding heading as the name
|
|
171
|
+
const heading = i > 0 ? contentLines.slice(Math.max(0, i - 3), i).find(l => l.trim().length > 3 && !l.trim().match(/^\d/)) : null;
|
|
172
|
+
const flowName = (heading?.trim() ?? `flow_${Object.keys(flows).length + 1}`).replace(/[^a-zA-Z0-9_ ]/g, "").substring(0, 50).trim();
|
|
173
|
+
currentFlow = { name: flowName, steps: [stepText] };
|
|
174
|
+
}
|
|
175
|
+
else if (currentFlow && stepNum > 1) {
|
|
176
|
+
currentFlow.steps.push(stepText);
|
|
177
|
+
}
|
|
178
|
+
}
|
|
179
|
+
else if (currentFlow && currentFlow.steps.length >= 2) {
|
|
180
|
+
// End of step sequence — save the flow
|
|
181
|
+
const key = currentFlow.name.toLowerCase().replace(/\s+/g, "_");
|
|
182
|
+
if (!flows[key]) {
|
|
183
|
+
flows[key] = { description: currentFlow.name, steps: currentFlow.steps };
|
|
184
|
+
}
|
|
185
|
+
currentFlow = null;
|
|
186
|
+
}
|
|
187
|
+
else if (line.length > 0 && !line.match(/^\d/)) {
|
|
188
|
+
currentFlow = null;
|
|
189
|
+
}
|
|
190
|
+
}
|
|
191
|
+
// Save any trailing flow
|
|
192
|
+
if (currentFlow && currentFlow.steps.length >= 2) {
|
|
193
|
+
const key = currentFlow.name.toLowerCase().replace(/\s+/g, "_");
|
|
194
|
+
if (!flows[key]) {
|
|
195
|
+
flows[key] = { description: currentFlow.name, steps: currentFlow.steps };
|
|
196
|
+
}
|
|
197
|
+
}
|
|
198
|
+
// Look for limitation/known-issue mentions
|
|
199
|
+
const text = page.content.text.toLowerCase();
|
|
200
|
+
if (text.includes("limitation") || text.includes("known issue") || text.includes("not supported")) {
|
|
201
|
+
const lines = page.content.text.split("\n");
|
|
202
|
+
for (const line of lines) {
|
|
203
|
+
if (/limitation|known issue|not supported|doesn't support|won't work/i.test(line)) {
|
|
204
|
+
knownLimitations.push(line.trim().substring(0, 200));
|
|
205
|
+
}
|
|
206
|
+
}
|
|
207
|
+
}
|
|
208
|
+
}
|
|
209
|
+
}
|
|
210
|
+
return {
|
|
211
|
+
platform,
|
|
212
|
+
learnedAt: new Date().toISOString(),
|
|
213
|
+
sourceUrls,
|
|
214
|
+
shortcuts: allShortcuts,
|
|
215
|
+
features: [...new Set(features)].slice(0, 50),
|
|
216
|
+
selectors: allSelectors,
|
|
217
|
+
flows,
|
|
218
|
+
apiEndpoints: [...new Set(apiEndpoints)].slice(0, 20),
|
|
219
|
+
knownLimitations: [...new Set(knownLimitations)].slice(0, 20),
|
|
220
|
+
tips,
|
|
221
|
+
};
|
|
222
|
+
}
|
|
223
|
+
/** Save learn result as a reference JSON */
|
|
224
|
+
export function saveLearnResult(referencesDir, result) {
|
|
225
|
+
if (!fs.existsSync(referencesDir)) {
|
|
226
|
+
fs.mkdirSync(referencesDir, { recursive: true });
|
|
227
|
+
}
|
|
228
|
+
const filePath = path.join(referencesDir, `${result.platform}-learned.json`);
|
|
229
|
+
const reference = {
|
|
230
|
+
id: `${result.platform}-learned`,
|
|
231
|
+
name: `${result.platform} — Auto-Learned from Docs`,
|
|
232
|
+
description: `Scraped ${result.sourceUrls.length} documentation pages. Found ${Object.keys(result.shortcuts).length} shortcuts, ${result.features.length} features.`,
|
|
233
|
+
platform: result.platform,
|
|
234
|
+
bundleId: result.bundleId ?? null,
|
|
235
|
+
version: "1.0.0",
|
|
236
|
+
tags: [result.platform, "auto-learned"],
|
|
237
|
+
successCount: 0,
|
|
238
|
+
failCount: 0,
|
|
239
|
+
urls: Object.fromEntries(result.sourceUrls.map((u, i) => [`doc_${i}`, u])),
|
|
240
|
+
selectors: result.selectors,
|
|
241
|
+
shortcuts: result.shortcuts,
|
|
242
|
+
flows: result.flows,
|
|
243
|
+
detection: {},
|
|
244
|
+
errors: [],
|
|
245
|
+
policyNotes: {},
|
|
246
|
+
_meta: {
|
|
247
|
+
learnedAt: result.learnedAt,
|
|
248
|
+
sourceUrls: result.sourceUrls,
|
|
249
|
+
features: result.features,
|
|
250
|
+
apiEndpoints: result.apiEndpoints,
|
|
251
|
+
knownLimitations: result.knownLimitations,
|
|
252
|
+
tips: result.tips,
|
|
253
|
+
},
|
|
254
|
+
};
|
|
255
|
+
writeFileAtomicSync(filePath, JSON.stringify(reference, null, 2));
|
|
256
|
+
return filePath;
|
|
257
|
+
}
|
|
@@ -14,13 +14,25 @@
|
|
|
14
14
|
//
|
|
15
15
|
// You should have received a copy of the GNU Affero General Public License
|
|
16
16
|
// along with ScreenHand. If not, see <https://www.gnu.org/licenses/>.
|
|
17
|
+
import { readObserverState, getObserverPopup } from "../observer/state.js";
|
|
17
18
|
const DEFAULT_VERIFY_TIMEOUT = 5000;
|
|
18
19
|
const STEP_DELAY_MS = 300;
|
|
19
20
|
export class PlaybookEngine {
|
|
20
21
|
runtime;
|
|
22
|
+
cdpConnect;
|
|
23
|
+
/** Enable observer-based popup checks before each step */
|
|
24
|
+
popupCheckEnabled = false;
|
|
21
25
|
constructor(runtime) {
|
|
22
26
|
this.runtime = runtime;
|
|
23
27
|
}
|
|
28
|
+
/** Enable/disable pre-step popup detection via observer daemon */
|
|
29
|
+
setPopupCheck(enabled) {
|
|
30
|
+
this.popupCheckEnabled = enabled;
|
|
31
|
+
}
|
|
32
|
+
/** Set CDP connection factory for browser_js and cdp_key_event actions. Factory accepts optional port override. */
|
|
33
|
+
setCDPConnect(factory) {
|
|
34
|
+
this.cdpConnect = factory;
|
|
35
|
+
}
|
|
24
36
|
/**
|
|
25
37
|
* Execute a playbook against a live session.
|
|
26
38
|
* Returns result with success/failure and which step broke.
|
|
@@ -29,9 +41,20 @@ export class PlaybookEngine {
|
|
|
29
41
|
const start = Date.now();
|
|
30
42
|
let stepsCompleted = 0;
|
|
31
43
|
for (let i = 0; i < playbook.steps.length; i++) {
|
|
32
|
-
|
|
44
|
+
let step = options.vars ? this.substituteVars(playbook.steps[i], options.vars) : playbook.steps[i];
|
|
33
45
|
try {
|
|
34
|
-
|
|
46
|
+
// Pre-step: check for popups via observer (if enabled, non-blocking)
|
|
47
|
+
if (this.popupCheckEnabled) {
|
|
48
|
+
await this.dismissPopupIfPresent(sessionId);
|
|
49
|
+
}
|
|
50
|
+
// OCR-based locate: resolve locateByOcr to coordinates before execution
|
|
51
|
+
if (step.locateByOcr) {
|
|
52
|
+
const coords = this.resolveOcrTarget(step.locateByOcr, step.offsetX ?? 0, step.offsetY ?? 0);
|
|
53
|
+
if (coords) {
|
|
54
|
+
step = { ...step, target: { x: coords.x, y: coords.y } };
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
const result = await this.executeStep(sessionId, step, playbook.cdpPort);
|
|
35
58
|
stepsCompleted++;
|
|
36
59
|
if (options.onStep) {
|
|
37
60
|
options.onStep(i, step, result);
|
|
@@ -85,7 +108,7 @@ export class PlaybookEngine {
|
|
|
85
108
|
/**
|
|
86
109
|
* Execute a single playbook step.
|
|
87
110
|
*/
|
|
88
|
-
async executeStep(sessionId, step) {
|
|
111
|
+
async executeStep(sessionId, step, cdpPort) {
|
|
89
112
|
const target = this.resolveTarget(step.target);
|
|
90
113
|
switch (step.action) {
|
|
91
114
|
case "navigate": {
|
|
@@ -105,14 +128,21 @@ export class PlaybookEngine {
|
|
|
105
128
|
return `Pressed ${JSON.stringify(step.target)}`;
|
|
106
129
|
}
|
|
107
130
|
case "type_into": {
|
|
108
|
-
if (!target)
|
|
109
|
-
throw new Error("type_into step missing target");
|
|
110
131
|
if (!step.text)
|
|
111
132
|
throw new Error("type_into step missing text");
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
133
|
+
if (target) {
|
|
134
|
+
const r = await this.runtime.typeInto({ sessionId, target, text: step.text });
|
|
135
|
+
if (!r.ok)
|
|
136
|
+
throw new Error(r.error.message);
|
|
137
|
+
return `Typed "${step.text}" into ${JSON.stringify(step.target)}`;
|
|
138
|
+
}
|
|
139
|
+
// No target — type into focused element character by character via key events
|
|
140
|
+
for (const char of step.text) {
|
|
141
|
+
const r = await this.runtime.keyCombo({ sessionId, keys: [char] });
|
|
142
|
+
if (!r.ok)
|
|
143
|
+
throw new Error(r.error?.message ?? "key event failed");
|
|
144
|
+
}
|
|
145
|
+
return `Typed "${step.text}" into focused element`;
|
|
116
146
|
}
|
|
117
147
|
case "extract": {
|
|
118
148
|
if (!target)
|
|
@@ -126,13 +156,22 @@ export class PlaybookEngine {
|
|
|
126
156
|
throw new Error(r.error.message);
|
|
127
157
|
return `Extracted: ${JSON.stringify(r.data).slice(0, 200)}`;
|
|
128
158
|
}
|
|
159
|
+
case "key":
|
|
129
160
|
case "key_combo": {
|
|
130
161
|
if (!step.keys || step.keys.length === 0)
|
|
131
|
-
throw new Error(
|
|
162
|
+
throw new Error(`${step.action} step missing keys`);
|
|
132
163
|
const r = await this.runtime.keyCombo({ sessionId, keys: step.keys });
|
|
133
164
|
if (!r.ok)
|
|
134
165
|
throw new Error(r.error.message);
|
|
135
|
-
return
|
|
166
|
+
return `${step.action === "key" ? "Key" : "Key combo"}: ${step.keys.join("+")}`;
|
|
167
|
+
}
|
|
168
|
+
case "menu_click": {
|
|
169
|
+
if (!step.menuPath || step.menuPath.length === 0)
|
|
170
|
+
throw new Error("menu_click step missing menuPath");
|
|
171
|
+
const r = await this.runtime.menuClick({ sessionId, menuPath: step.menuPath });
|
|
172
|
+
if (!r.ok)
|
|
173
|
+
throw new Error(r.error.message);
|
|
174
|
+
return `Menu click: ${step.menuPath.join(" > ")}`;
|
|
136
175
|
}
|
|
137
176
|
case "scroll": {
|
|
138
177
|
const input = {
|
|
@@ -156,10 +195,115 @@ export class PlaybookEngine {
|
|
|
156
195
|
throw new Error(r.error.message);
|
|
157
196
|
return `Screenshot taken`;
|
|
158
197
|
}
|
|
198
|
+
case "browser_js": {
|
|
199
|
+
if (!step.code)
|
|
200
|
+
throw new Error("browser_js step missing code");
|
|
201
|
+
if (!this.cdpConnect)
|
|
202
|
+
throw new Error("browser_js requires CDP — call setCDPConnect() first");
|
|
203
|
+
const client = await this.cdpConnect(cdpPort);
|
|
204
|
+
try {
|
|
205
|
+
const result = await client.Runtime.evaluate({
|
|
206
|
+
expression: step.code,
|
|
207
|
+
awaitPromise: true,
|
|
208
|
+
returnByValue: true,
|
|
209
|
+
});
|
|
210
|
+
if (result.exceptionDetails) {
|
|
211
|
+
throw new Error(`JS Error: ${result.exceptionDetails.text ?? result.exceptionDetails.exception?.description ?? "unknown"}`);
|
|
212
|
+
}
|
|
213
|
+
const val = result.result?.value;
|
|
214
|
+
return `browser_js: ${typeof val === "object" ? JSON.stringify(val) : String(val ?? "undefined")}`;
|
|
215
|
+
}
|
|
216
|
+
finally {
|
|
217
|
+
await client.close();
|
|
218
|
+
}
|
|
219
|
+
}
|
|
220
|
+
case "browser_click":
|
|
221
|
+
case "browser_human_click": {
|
|
222
|
+
const selector = this.getBrowserSelector(step);
|
|
223
|
+
if (!this.cdpConnect)
|
|
224
|
+
throw new Error(`${step.action} requires CDP — call setCDPConnect() first`);
|
|
225
|
+
const client = await this.cdpConnect(cdpPort);
|
|
226
|
+
try {
|
|
227
|
+
const point = await this.resolveBrowserClickPoint(client, selector);
|
|
228
|
+
await this.dispatchMouseClick(client, point.x, point.y);
|
|
229
|
+
return `${step.action}: clicked ${selector}`;
|
|
230
|
+
}
|
|
231
|
+
finally {
|
|
232
|
+
await client.close();
|
|
233
|
+
}
|
|
234
|
+
}
|
|
235
|
+
case "browser_type": {
|
|
236
|
+
const selector = this.getBrowserSelector(step);
|
|
237
|
+
if (!step.text)
|
|
238
|
+
throw new Error("browser_type step missing text");
|
|
239
|
+
if (!this.cdpConnect)
|
|
240
|
+
throw new Error("browser_type requires CDP — call setCDPConnect() first");
|
|
241
|
+
const client = await this.cdpConnect(cdpPort);
|
|
242
|
+
try {
|
|
243
|
+
await this.focusBrowserElement(client, selector);
|
|
244
|
+
const shouldClear = step.text !== undefined;
|
|
245
|
+
if (shouldClear) {
|
|
246
|
+
await this.dispatchSelectAll(client);
|
|
247
|
+
await this.dispatchKey(client, "Backspace", "Backspace");
|
|
248
|
+
await sleep(50);
|
|
249
|
+
}
|
|
250
|
+
for (const char of step.text) {
|
|
251
|
+
await this.dispatchTextChar(client, char);
|
|
252
|
+
await sleep(50);
|
|
253
|
+
}
|
|
254
|
+
return `browser_type: typed ${step.text.length} chars into ${selector}`;
|
|
255
|
+
}
|
|
256
|
+
finally {
|
|
257
|
+
await client.close();
|
|
258
|
+
}
|
|
259
|
+
}
|
|
260
|
+
case "cdp_key_event": {
|
|
261
|
+
if (!step.keyEvent)
|
|
262
|
+
throw new Error("cdp_key_event step missing keyEvent");
|
|
263
|
+
if (!this.cdpConnect)
|
|
264
|
+
throw new Error("cdp_key_event requires CDP — call setCDPConnect() first");
|
|
265
|
+
const client = await this.cdpConnect(cdpPort);
|
|
266
|
+
try {
|
|
267
|
+
const { key, code, modifiers, windowsVirtualKeyCode } = step.keyEvent;
|
|
268
|
+
const baseParams = { key, code, modifiers: modifiers ?? 0, windowsVirtualKeyCode: windowsVirtualKeyCode ?? 0, nativeVirtualKeyCode: windowsVirtualKeyCode ?? 0 };
|
|
269
|
+
await client.Input.dispatchKeyEvent({ type: "keyDown", ...baseParams });
|
|
270
|
+
await client.Input.dispatchKeyEvent({ type: "keyUp", ...baseParams });
|
|
271
|
+
return `cdp_key_event: ${modifiers ? `mod${modifiers}+` : ""}${key}`;
|
|
272
|
+
}
|
|
273
|
+
finally {
|
|
274
|
+
await client.close();
|
|
275
|
+
}
|
|
276
|
+
}
|
|
159
277
|
default:
|
|
160
278
|
throw new Error(`Unknown action: ${step.action}`);
|
|
161
279
|
}
|
|
162
280
|
}
|
|
281
|
+
/**
|
|
282
|
+
* Substitute {VAR_NAME} placeholders in step string fields with actual values.
|
|
283
|
+
*/
|
|
284
|
+
substituteVars(step, vars) {
|
|
285
|
+
const sub = (s) => {
|
|
286
|
+
let result = s;
|
|
287
|
+
for (const [key, val] of Object.entries(vars)) {
|
|
288
|
+
result = result.replaceAll(`{${key}}`, val);
|
|
289
|
+
}
|
|
290
|
+
return result;
|
|
291
|
+
};
|
|
292
|
+
const result = { ...step };
|
|
293
|
+
if (result.code)
|
|
294
|
+
result.code = sub(result.code);
|
|
295
|
+
if (result.text)
|
|
296
|
+
result.text = sub(result.text);
|
|
297
|
+
if (result.url)
|
|
298
|
+
result.url = sub(result.url);
|
|
299
|
+
if (result.description)
|
|
300
|
+
result.description = sub(result.description);
|
|
301
|
+
if (result.verify)
|
|
302
|
+
result.verify = sub(result.verify);
|
|
303
|
+
if (result.menuPath)
|
|
304
|
+
result.menuPath = result.menuPath.map(sub);
|
|
305
|
+
return result;
|
|
306
|
+
}
|
|
163
307
|
/**
|
|
164
308
|
* Verify a step's postcondition via CSS selector check.
|
|
165
309
|
*/
|
|
@@ -174,6 +318,85 @@ export class PlaybookEngine {
|
|
|
174
318
|
});
|
|
175
319
|
return r.ok && r.data.matched;
|
|
176
320
|
}
|
|
321
|
+
/**
|
|
322
|
+
* Dismiss a popup detected by the observer daemon.
|
|
323
|
+
* Reads observer state, if popup found, sends the appropriate dismiss action.
|
|
324
|
+
* Non-fatal — if observer isn't running or no popup, silently returns.
|
|
325
|
+
*/
|
|
326
|
+
async dismissPopupIfPresent(sessionId) {
|
|
327
|
+
let popup;
|
|
328
|
+
try {
|
|
329
|
+
popup = getObserverPopup();
|
|
330
|
+
}
|
|
331
|
+
catch {
|
|
332
|
+
return; // Observer not running or state unreadable
|
|
333
|
+
}
|
|
334
|
+
if (!popup)
|
|
335
|
+
return;
|
|
336
|
+
try {
|
|
337
|
+
switch (popup.dismissAction) {
|
|
338
|
+
case "press_escape":
|
|
339
|
+
await this.runtime.keyCombo({ sessionId, keys: ["escape"] });
|
|
340
|
+
break;
|
|
341
|
+
case "click_ok":
|
|
342
|
+
case "click_cancel":
|
|
343
|
+
case "click_close":
|
|
344
|
+
case "click_allow":
|
|
345
|
+
case "click_deny": {
|
|
346
|
+
// Map action to button text
|
|
347
|
+
const buttonMap = {
|
|
348
|
+
click_ok: "OK",
|
|
349
|
+
click_cancel: "Cancel",
|
|
350
|
+
click_close: "Close",
|
|
351
|
+
click_allow: "Allow",
|
|
352
|
+
click_deny: "Don't Allow",
|
|
353
|
+
};
|
|
354
|
+
const buttonText = buttonMap[popup.dismissAction] ?? "OK";
|
|
355
|
+
// Try to click the button by text
|
|
356
|
+
await this.runtime.press({ sessionId, target: { type: "text", value: buttonText } });
|
|
357
|
+
break;
|
|
358
|
+
}
|
|
359
|
+
case "unknown":
|
|
360
|
+
break; // Don't auto-dismiss unknown popups
|
|
361
|
+
}
|
|
362
|
+
// Wait briefly for popup to close
|
|
363
|
+
await sleep(500);
|
|
364
|
+
}
|
|
365
|
+
catch {
|
|
366
|
+
// Popup dismiss failed — non-fatal, continue with step
|
|
367
|
+
}
|
|
368
|
+
}
|
|
369
|
+
/**
|
|
370
|
+
* Resolve an OCR text target to screen coordinates using observer state.
|
|
371
|
+
* Returns center coordinates of the matched text + offsets, or null if not found.
|
|
372
|
+
*/
|
|
373
|
+
resolveOcrTarget(searchText, offsetX, offsetY) {
|
|
374
|
+
let state;
|
|
375
|
+
try {
|
|
376
|
+
state = readObserverState();
|
|
377
|
+
}
|
|
378
|
+
catch {
|
|
379
|
+
return null;
|
|
380
|
+
}
|
|
381
|
+
if (!state?.running || !state.lastFrame?.ocrText)
|
|
382
|
+
return null;
|
|
383
|
+
// Simple text search in OCR output
|
|
384
|
+
// The native OCR (vision.ocr) returns bounding boxes when available.
|
|
385
|
+
// For now we use a fallback: if the observer has the text, we know
|
|
386
|
+
// the element is visible. The caller should provide approximate
|
|
387
|
+
// coordinates via offsetX/offsetY relative to a known anchor.
|
|
388
|
+
const ocrText = state.lastFrame.ocrText;
|
|
389
|
+
if (!ocrText.toLowerCase().includes(searchText.toLowerCase())) {
|
|
390
|
+
return null; // Text not found on screen
|
|
391
|
+
}
|
|
392
|
+
// Text found — return offset coordinates (caller provides absolute offsets
|
|
393
|
+
// or relative to screen center as a basic heuristic)
|
|
394
|
+
if (offsetX !== 0 || offsetY !== 0) {
|
|
395
|
+
return { x: offsetX, y: offsetY };
|
|
396
|
+
}
|
|
397
|
+
// No explicit coordinates — can't determine position from plain OCR text alone
|
|
398
|
+
return null;
|
|
399
|
+
}
|
|
177
400
|
/**
|
|
178
401
|
* Convert playbook target format to runtime Target format.
|
|
179
402
|
*/
|
|
@@ -195,6 +418,68 @@ export class PlaybookEngine {
|
|
|
195
418
|
}
|
|
196
419
|
return undefined;
|
|
197
420
|
}
|
|
421
|
+
getBrowserSelector(step) {
|
|
422
|
+
if (typeof step.target === "string")
|
|
423
|
+
return step.target;
|
|
424
|
+
if (step.target && "selector" in step.target)
|
|
425
|
+
return step.target.selector;
|
|
426
|
+
if (step.verify)
|
|
427
|
+
return step.verify;
|
|
428
|
+
throw new Error(`${step.action} step missing selector target`);
|
|
429
|
+
}
|
|
430
|
+
async focusBrowserElement(client, selector) {
|
|
431
|
+
const result = await client.Runtime.evaluate({
|
|
432
|
+
expression: `(() => {
|
|
433
|
+
const el = document.querySelector(${JSON.stringify(selector)});
|
|
434
|
+
if (!(el instanceof HTMLElement)) return { ok: false, reason: "Element not found: ${selector.replace(/"/g, '\\"')}" };
|
|
435
|
+
el.scrollIntoView({ block: "center" });
|
|
436
|
+
el.focus();
|
|
437
|
+
return { ok: true };
|
|
438
|
+
})()`,
|
|
439
|
+
returnByValue: true,
|
|
440
|
+
});
|
|
441
|
+
const value = result.result?.value;
|
|
442
|
+
if (!value?.ok) {
|
|
443
|
+
throw new Error(value?.reason || `Element not found: ${selector}`);
|
|
444
|
+
}
|
|
445
|
+
}
|
|
446
|
+
async resolveBrowserClickPoint(client, selector) {
|
|
447
|
+
const result = await client.Runtime.evaluate({
|
|
448
|
+
expression: `(() => {
|
|
449
|
+
const el = document.querySelector(${JSON.stringify(selector)});
|
|
450
|
+
if (!(el instanceof HTMLElement)) return { ok: false, reason: "Element not found: ${selector.replace(/"/g, '\\"')}" };
|
|
451
|
+
el.scrollIntoView({ block: "center" });
|
|
452
|
+
const r = el.getBoundingClientRect();
|
|
453
|
+
return { ok: true, x: r.x + r.width / 2, y: r.y + r.height / 2 };
|
|
454
|
+
})()`,
|
|
455
|
+
returnByValue: true,
|
|
456
|
+
});
|
|
457
|
+
const value = result.result?.value;
|
|
458
|
+
if (!value?.ok) {
|
|
459
|
+
throw new Error(value?.reason || `Element not found: ${selector}`);
|
|
460
|
+
}
|
|
461
|
+
return { x: value.x, y: value.y };
|
|
462
|
+
}
|
|
463
|
+
async dispatchMouseClick(client, x, y) {
|
|
464
|
+
await client.Input.dispatchMouseEvent({ type: "mouseMoved", x, y });
|
|
465
|
+
await sleep(40);
|
|
466
|
+
await client.Input.dispatchMouseEvent({ type: "mousePressed", x, y, button: "left", clickCount: 1 });
|
|
467
|
+
await sleep(40);
|
|
468
|
+
await client.Input.dispatchMouseEvent({ type: "mouseReleased", x, y, button: "left", clickCount: 1 });
|
|
469
|
+
}
|
|
470
|
+
async dispatchSelectAll(client) {
|
|
471
|
+
const metaModifier = process.platform === "darwin" ? 4 : 2;
|
|
472
|
+
await client.Input.dispatchKeyEvent({ type: "keyDown", key: "a", code: "KeyA", modifiers: metaModifier });
|
|
473
|
+
await client.Input.dispatchKeyEvent({ type: "keyUp", key: "a", code: "KeyA", modifiers: metaModifier });
|
|
474
|
+
}
|
|
475
|
+
async dispatchKey(client, key, code) {
|
|
476
|
+
await client.Input.dispatchKeyEvent({ type: "keyDown", key, code });
|
|
477
|
+
await client.Input.dispatchKeyEvent({ type: "keyUp", key, code });
|
|
478
|
+
}
|
|
479
|
+
async dispatchTextChar(client, char) {
|
|
480
|
+
await client.Input.dispatchKeyEvent({ type: "keyDown", text: char, key: char, unmodifiedText: char });
|
|
481
|
+
await client.Input.dispatchKeyEvent({ type: "keyUp", text: char, key: char, unmodifiedText: char });
|
|
482
|
+
}
|
|
198
483
|
}
|
|
199
484
|
function sleep(ms) {
|
|
200
485
|
return new Promise((resolve) => setTimeout(resolve, ms));
|