screenhand 0.4.9 → 0.5.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/mcp-desktop.js +297 -46
- package/dist/src/community/publisher.js +4 -2
- package/dist/src/context-tracker.js +36 -6
- package/dist/src/ingestion/reference-merger.js +33 -0
- package/dist/src/memory/recall.js +65 -1
- package/dist/src/memory/research.js +1 -1
- package/dist/src/memory/service.js +26 -5
- package/dist/src/memory/store.js +42 -23
- package/dist/src/native/bridge-client.js +3 -3
- package/dist/src/perception/coordinator.js +62 -15
- package/dist/src/perception/manager.js +65 -1
- package/dist/src/planner/executor.js +6 -2
- package/dist/src/planner/plan-refiner.js +213 -0
- package/dist/src/playbook/engine.js +18 -3
- package/dist/src/playbook/recorder.js +24 -8
- package/dist/src/playbook/runner.js +9 -3
- package/dist/src/playbook/store.js +8 -0
- package/dist/src/recovery/engine.js +9 -3
- package/dist/src/state/app-map.js +6 -2
- package/dist/src/state/state-watcher.js +144 -0
- package/dist/src/supervisor/supervisor.js +1 -1
- package/dist-app-maps/com.apple.iphonesimulator.json +714 -223
- package/dist-references/simulator.json +48 -2
- package/package.json +1 -1
package/dist/mcp-desktop.js
CHANGED
|
@@ -52,8 +52,10 @@ import { PlaybookStore } from "./src/playbook/store.js";
|
|
|
52
52
|
import { ContextTracker } from "./src/context-tracker.js";
|
|
53
53
|
import { McpPlaybookRecorder } from "./src/playbook/mcp-recorder.js";
|
|
54
54
|
import { WorldModel } from "./src/state/index.js";
|
|
55
|
+
import { StateWatcher } from "./src/state/state-watcher.js";
|
|
55
56
|
import { PerceptionManager } from "./src/perception/index.js";
|
|
56
57
|
import { Planner, PlanExecutor, GoalStore, ToolRegistry } from "./src/planner/index.js";
|
|
58
|
+
import { PlanRefiner } from "./src/planner/plan-refiner.js";
|
|
57
59
|
import { RecoveryEngine } from "./src/recovery/index.js";
|
|
58
60
|
import { LearningEngine, LocatorPolicy } from "./src/learning/index.js";
|
|
59
61
|
import { discoverWebElements, testWebElement, compileReference, saveExploreResult, discoverNativeElements } from "./src/platform/explorer.js";
|
|
@@ -531,17 +533,24 @@ const leaseManager = new LeaseManager(LOCK_DIR);
|
|
|
531
533
|
// Playbooks dir holds only executable step sequences for job_create
|
|
532
534
|
// Resolution order: local dev paths → npm dist paths → ~/.screenhand/ user paths
|
|
533
535
|
function resolveDataDir(name) {
|
|
534
|
-
|
|
536
|
+
const hasJson = (dir) => fs.existsSync(dir) && fs.readdirSync(dir).some(f => f.endsWith(".json"));
|
|
537
|
+
// 1. Local dev path (when running from source: references/, playbooks/)
|
|
535
538
|
const local = path.resolve(__dirname, name);
|
|
536
|
-
if (
|
|
539
|
+
if (hasJson(local))
|
|
537
540
|
return local;
|
|
538
|
-
|
|
539
|
-
// 2. npm dist path (when installed via npx/npm)
|
|
541
|
+
// 2. npm dist path — same level (dist-references/ next to dist/)
|
|
540
542
|
const dist = path.resolve(__dirname, `dist-${name}`);
|
|
541
|
-
if (
|
|
543
|
+
if (hasJson(dist))
|
|
542
544
|
return dist;
|
|
543
|
-
|
|
544
|
-
|
|
545
|
+
// 3. npm dist path — parent level (when __dirname is dist/, check ../dist-references/)
|
|
546
|
+
const parentDist = path.resolve(__dirname, "..", `dist-${name}`);
|
|
547
|
+
if (hasJson(parentDist))
|
|
548
|
+
return parentDist;
|
|
549
|
+
// 4. Parent level plain name (../references/)
|
|
550
|
+
const parentLocal = path.resolve(__dirname, "..", name);
|
|
551
|
+
if (hasJson(parentLocal))
|
|
552
|
+
return parentLocal;
|
|
553
|
+
// 5. User home path (always available for user-generated content)
|
|
545
554
|
const userDir = path.join(os.homedir(), ".screenhand", name);
|
|
546
555
|
if (!fs.existsSync(userDir)) {
|
|
547
556
|
fs.mkdirSync(userDir, { recursive: true });
|
|
@@ -563,6 +572,9 @@ const seedAppMapsDir = (() => {
|
|
|
563
572
|
const dist = path.resolve(__dirname, "dist-app-maps");
|
|
564
573
|
if (fs.existsSync(dist))
|
|
565
574
|
return dist;
|
|
575
|
+
const parentDist = path.resolve(__dirname, "..", "dist-app-maps");
|
|
576
|
+
if (fs.existsSync(parentDist))
|
|
577
|
+
return parentDist;
|
|
566
578
|
const local = path.resolve(__dirname, "seed-app-maps");
|
|
567
579
|
if (fs.existsSync(local))
|
|
568
580
|
return local;
|
|
@@ -599,6 +611,7 @@ catch { /* dir may not exist */ }
|
|
|
599
611
|
const planner = new Planner(_executablePlaybookStore, memory, contextTracker, worldModel, learningEngine);
|
|
600
612
|
const goalStore = new GoalStore(path.join(os.homedir(), ".screenhand", "planner"));
|
|
601
613
|
goalStore.init();
|
|
614
|
+
const planRefiner = new PlanRefiner(path.join(os.homedir(), ".screenhand", "planner"));
|
|
602
615
|
const toolRegistry = new ToolRegistry();
|
|
603
616
|
const recoveryEngine = new RecoveryEngine(worldModel, toolRegistry.toExecutor(), memory);
|
|
604
617
|
recoveryEngine.setLearningEngine(learningEngine);
|
|
@@ -606,6 +619,7 @@ recoveryEngine.setAppMap(appMap);
|
|
|
606
619
|
planner.setToolRegistry(toolRegistry);
|
|
607
620
|
planner.setAppMap(appMap);
|
|
608
621
|
perceptionManager.setLearningEngine(learningEngine);
|
|
622
|
+
const stateWatcher = new StateWatcher(worldModel, toolRegistry.toExecutor(), 2_000);
|
|
609
623
|
// ── Reactive event loop: wire perception events to automatic responses ──
|
|
610
624
|
// These fire at perception speed (100-300ms), not LLM speed (~2-3s).
|
|
611
625
|
perceptionManager.on("dialog_detected", (event) => {
|
|
@@ -639,6 +653,28 @@ perceptionManager.on("app_switched", (event) => {
|
|
|
639
653
|
// Log for observability
|
|
640
654
|
console.error(`[reactive] App switched to ${event.bundleId} (pid=${event.pid})`);
|
|
641
655
|
});
|
|
656
|
+
// ── Perception-triggered recovery: focus loss, app crash, stall ──
|
|
657
|
+
perceptionManager.on("focus_lost", (event) => {
|
|
658
|
+
console.error(`[reactive] Focus lost: expected ${event.expectedBundleId}, got ${event.actualBundleId} — auto-refocusing`);
|
|
659
|
+
// Auto-refocus the expected app
|
|
660
|
+
toolRegistry.toExecutor()("focus", { bundleId: event.expectedBundleId }).catch((err) => {
|
|
661
|
+
console.error(`[reactive] Auto-refocus failed: ${err instanceof Error ? err.message : err}`);
|
|
662
|
+
});
|
|
663
|
+
});
|
|
664
|
+
perceptionManager.on("app_crash", (event) => {
|
|
665
|
+
console.error(`[reactive] App crash detected: ${event.bundleId} (pid=${event.pid}) — auto-relaunching`);
|
|
666
|
+
// Auto-relaunch the crashed app
|
|
667
|
+
toolRegistry.toExecutor()("launch", { bundleId: event.bundleId }).catch((err) => {
|
|
668
|
+
console.error(`[reactive] Auto-relaunch failed: ${err instanceof Error ? err.message : err}`);
|
|
669
|
+
});
|
|
670
|
+
});
|
|
671
|
+
perceptionManager.on("stall_detected", (event) => {
|
|
672
|
+
console.error(`[reactive] UI stall detected: ${event.bundleId} — no changes for ${(event.stallMs / 1000).toFixed(0)}s — taking screenshot for diagnosis`);
|
|
673
|
+
// Take a screenshot so the next LLM call can see what's on screen
|
|
674
|
+
toolRegistry.toExecutor()("screenshot", {}).catch((err) => {
|
|
675
|
+
console.error(`[reactive] Stall screenshot failed: ${err instanceof Error ? err.message : err}`);
|
|
676
|
+
});
|
|
677
|
+
});
|
|
642
678
|
const mcpRecorder = new McpPlaybookRecorder(playbooksDir);
|
|
643
679
|
const referenceMerger = new ReferenceMerger(referencesDir);
|
|
644
680
|
const communityPublisher = new PlaybookPublisher();
|
|
@@ -671,6 +707,7 @@ const MEMORY_TOOLS = new Set([
|
|
|
671
707
|
]);
|
|
672
708
|
// Track the strategy we're currently following (for feedback loop)
|
|
673
709
|
let activeStrategyFingerprint = null;
|
|
710
|
+
let autoExecutionInProgress = false; // guard against concurrent auto-execution
|
|
674
711
|
let currentAdaptiveBudget = null;
|
|
675
712
|
// Intercept all tool registrations to auto-log + auto-recall
|
|
676
713
|
const _rawOriginalTool = server.tool.bind(server);
|
|
@@ -760,7 +797,7 @@ server.tool = (...args) => {
|
|
|
760
797
|
if (!perceptionManager.isRunning && bridgeReady) {
|
|
761
798
|
const focusApp = worldModel.getState().focusedApp;
|
|
762
799
|
if (focusApp?.bundleId && focusApp?.pid) {
|
|
763
|
-
perceptionManager.tryAutoStart(focusApp, bridge).catch(() => { });
|
|
800
|
+
perceptionManager.tryAutoStart(focusApp, bridge).catch((e) => { process.stderr.write(`[screenhand] perception auto-start failed: ${e instanceof Error ? e.message : String(e)}\n`); });
|
|
764
801
|
installSafariEnricher(focusApp.bundleId);
|
|
765
802
|
}
|
|
766
803
|
}
|
|
@@ -811,7 +848,7 @@ server.tool = (...args) => {
|
|
|
811
848
|
"type_with_fallback", "select_with_fallback", "scroll_with_fallback",
|
|
812
849
|
]);
|
|
813
850
|
try {
|
|
814
|
-
|
|
851
|
+
let result = await originalHandler(params, extra);
|
|
815
852
|
const durationMs = Date.now() - start;
|
|
816
853
|
// ── POST-CALL: log action (async, non-blocking) ──
|
|
817
854
|
const entry = {
|
|
@@ -863,7 +900,9 @@ server.tool = (...args) => {
|
|
|
863
900
|
try {
|
|
864
901
|
appMap.recordPageTransition(postBundleIdForCtx, pageTransition.from, pageTransition.to, toolName);
|
|
865
902
|
}
|
|
866
|
-
catch {
|
|
903
|
+
catch (e) {
|
|
904
|
+
process.stderr.write(`[screenhand] nav tracking failed: ${e instanceof Error ? e.message : String(e)}\n`);
|
|
905
|
+
}
|
|
867
906
|
}
|
|
868
907
|
// ── POST-CALL: detect focus drift ──
|
|
869
908
|
const postBundleId = worldModel.getState().focusedApp?.bundleId ?? null;
|
|
@@ -937,7 +976,9 @@ server.tool = (...args) => {
|
|
|
937
976
|
}
|
|
938
977
|
}
|
|
939
978
|
}
|
|
940
|
-
catch {
|
|
979
|
+
catch (e) {
|
|
980
|
+
process.stderr.write(`[screenhand] app map feature learning failed: ${e instanceof Error ? e.message : String(e)}\n`);
|
|
981
|
+
}
|
|
941
982
|
}
|
|
942
983
|
if (!resultIsError && learnBundleId !== "unknown") {
|
|
943
984
|
try {
|
|
@@ -1359,7 +1400,9 @@ server.tool = (...args) => {
|
|
|
1359
1400
|
}
|
|
1360
1401
|
}
|
|
1361
1402
|
}
|
|
1362
|
-
catch {
|
|
1403
|
+
catch (e) {
|
|
1404
|
+
process.stderr.write(`[screenhand] hierarchy extraction failed: ${e instanceof Error ? e.message : String(e)}\n`);
|
|
1405
|
+
}
|
|
1363
1406
|
}
|
|
1364
1407
|
}
|
|
1365
1408
|
// ── Conditional UI visibility tracking (throttled) ──
|
|
@@ -1407,7 +1450,9 @@ server.tool = (...args) => {
|
|
|
1407
1450
|
}
|
|
1408
1451
|
}
|
|
1409
1452
|
}
|
|
1410
|
-
catch {
|
|
1453
|
+
catch (e) {
|
|
1454
|
+
process.stderr.write(`[screenhand] visibility tracking failed: ${e instanceof Error ? e.message : String(e)}\n`);
|
|
1455
|
+
}
|
|
1411
1456
|
}
|
|
1412
1457
|
}
|
|
1413
1458
|
// ── Timing recording: track tool response times per element ──
|
|
@@ -1501,25 +1546,65 @@ server.tool = (...args) => {
|
|
|
1501
1546
|
if (knownError) {
|
|
1502
1547
|
hints.push(`⚡ Memory: "${toolName}" has failed before: "${knownError.error}" (${knownError.occurrences}x). Fix: ${knownError.resolution}`);
|
|
1503
1548
|
}
|
|
1504
|
-
//
|
|
1549
|
+
// ── Strategy matching: auto-execute proven strategies OR hint unproven ones ──
|
|
1505
1550
|
const recentTools = memory.getRecentToolNames();
|
|
1506
|
-
const
|
|
1507
|
-
|
|
1508
|
-
|
|
1509
|
-
|
|
1510
|
-
|
|
1511
|
-
|
|
1512
|
-
|
|
1513
|
-
|
|
1514
|
-
|
|
1515
|
-
|
|
1551
|
+
const currentBundleForStrategy = worldModel.getState().focusedApp?.bundleId;
|
|
1552
|
+
// Try auto-execution first (10+ successes, 0 failures)
|
|
1553
|
+
// Guard: skip if another auto-execution is already in progress
|
|
1554
|
+
const autoExec = autoExecutionInProgress ? null : memory.getAutoExecutableStrategy(recentTools, currentBundleForStrategy);
|
|
1555
|
+
if (autoExec) {
|
|
1556
|
+
autoExecutionInProgress = true;
|
|
1557
|
+
activeStrategyFingerprint = autoExec.fingerprint;
|
|
1558
|
+
const autoResults = [];
|
|
1559
|
+
let allOk = true;
|
|
1560
|
+
hints.push(`🚀 Auto-executing proven strategy "${autoExec.strategy.task}" (${autoExec.strategy.successCount} wins) — ${autoExec.remainingSteps.length} steps remaining`);
|
|
1561
|
+
for (const step of autoExec.remainingSteps) {
|
|
1562
|
+
try {
|
|
1563
|
+
const stepResult = await toolRegistry.toExecutor()(step.tool, step.params);
|
|
1564
|
+
autoResults.push({ tool: step.tool, ...stepResult });
|
|
1565
|
+
// Record outcome for learning
|
|
1566
|
+
const target = typeof step.params.target === "string" ? step.params.target
|
|
1567
|
+
: typeof step.params.title === "string" ? step.params.title
|
|
1568
|
+
: typeof step.params.text === "string" ? step.params.text
|
|
1569
|
+
: null;
|
|
1570
|
+
contextTracker.recordOutcome(step.tool, { target, text: typeof step.params.text === "string" ? step.params.text : null }, stepResult.ok, stepResult.ok ? null : (stepResult.error ?? null));
|
|
1571
|
+
if (!stepResult.ok) {
|
|
1572
|
+
allOk = false;
|
|
1573
|
+
hints.push(` ✗ ${step.tool} failed: ${stepResult.error ?? "unknown"}`);
|
|
1574
|
+
break; // Stop auto-execution on first failure
|
|
1575
|
+
}
|
|
1576
|
+
hints.push(` ✓ ${step.tool} — ok`);
|
|
1577
|
+
}
|
|
1578
|
+
catch (err) {
|
|
1579
|
+
allOk = false;
|
|
1580
|
+
hints.push(` ✗ ${step.tool} threw: ${err instanceof Error ? err.message : String(err)}`);
|
|
1581
|
+
break;
|
|
1582
|
+
}
|
|
1516
1583
|
}
|
|
1517
|
-
|
|
1518
|
-
|
|
1519
|
-
// We were following a strategy but the sequence diverged — record success
|
|
1520
|
-
// (the agent completed the strategy or went its own way after it)
|
|
1521
|
-
memory.recordStrategyOutcome(activeStrategyFingerprint, true);
|
|
1584
|
+
// Record strategy outcome
|
|
1585
|
+
memory.recordStrategyOutcome(autoExec.fingerprint, allOk);
|
|
1522
1586
|
activeStrategyFingerprint = null;
|
|
1587
|
+
autoExecutionInProgress = false;
|
|
1588
|
+
// Append auto-execution results to the response
|
|
1589
|
+
const autoSummary = autoResults.map((r) => `${r.tool}: ${r.ok ? "ok" : r.error}`).join("\n");
|
|
1590
|
+
const resultContent = Array.isArray(result?.content) ? result.content : [];
|
|
1591
|
+
resultContent.push({ type: "text", text: `\n── AUTO-EXECUTED (${autoResults.length} steps) ──\n${autoSummary}` });
|
|
1592
|
+
result = { ...result, content: resultContent };
|
|
1593
|
+
}
|
|
1594
|
+
else {
|
|
1595
|
+
// Fall back to strategy hint (suggest but don't execute)
|
|
1596
|
+
const strategyHint = memory.quickStrategyHint(recentTools, currentBundleForStrategy);
|
|
1597
|
+
if (strategyHint) {
|
|
1598
|
+
activeStrategyFingerprint = strategyHint.fingerprint;
|
|
1599
|
+
const nextParams = Object.keys(strategyHint.nextStep.params).length > 0
|
|
1600
|
+
? `(${JSON.stringify(strategyHint.nextStep.params)})`
|
|
1601
|
+
: "";
|
|
1602
|
+
hints.push(`💡 Memory: This matches strategy "${strategyHint.strategy.task}" (${strategyHint.strategy.successCount} wins, ${strategyHint.strategy.failCount ?? 0} fails). Next step: ${strategyHint.nextStep.tool}${nextParams}`);
|
|
1603
|
+
}
|
|
1604
|
+
else if (activeStrategyFingerprint && recentTools.length > 0) {
|
|
1605
|
+
memory.recordStrategyOutcome(activeStrategyFingerprint, true);
|
|
1606
|
+
activeStrategyFingerprint = null;
|
|
1607
|
+
}
|
|
1523
1608
|
}
|
|
1524
1609
|
// Attach hints in BOTH content (visible) and _meta (for programmatic access)
|
|
1525
1610
|
if (hints.length > 0) {
|
|
@@ -1735,7 +1820,9 @@ server.tool("focus", "Focus/activate an application (or a specific window by win
|
|
|
1735
1820
|
targetApp = { bundleId, name: appWin.appName, pid: appWin.pid || appWin.ownerPid };
|
|
1736
1821
|
}
|
|
1737
1822
|
}
|
|
1738
|
-
catch {
|
|
1823
|
+
catch (e) {
|
|
1824
|
+
process.stderr.write(`[screenhand] focus window check for ${bundleId} failed: ${e instanceof Error ? e.message : String(e)}\n`);
|
|
1825
|
+
}
|
|
1739
1826
|
if (!targetApp) {
|
|
1740
1827
|
return { content: [{ type: "text", text: `Error: ${bundleId} is not running. Use launch("${bundleId}") first.` }], isError: true };
|
|
1741
1828
|
}
|
|
@@ -1806,10 +1893,14 @@ server.tool("focus", "Focus/activate an application (or a specific window by win
|
|
|
1806
1893
|
await perceptionManager.ensureStarted(ctx);
|
|
1807
1894
|
installSafariEnricher(bundleId);
|
|
1808
1895
|
}
|
|
1809
|
-
catch {
|
|
1896
|
+
catch (e) {
|
|
1897
|
+
process.stderr.write(`[screenhand] perception ensureStarted in focus failed: ${e instanceof Error ? e.message : String(e)}\n`);
|
|
1898
|
+
}
|
|
1810
1899
|
}
|
|
1811
1900
|
}
|
|
1812
|
-
catch {
|
|
1901
|
+
catch (e) {
|
|
1902
|
+
process.stderr.write(`[screenhand] focus world-model update failed: ${e instanceof Error ? e.message : String(e)}\n`);
|
|
1903
|
+
}
|
|
1813
1904
|
return { content: [{ type: "text", text: focusMsg }] };
|
|
1814
1905
|
}
|
|
1815
1906
|
finally {
|
|
@@ -1873,7 +1964,9 @@ server.tool("launch", "Launch an application. Chrome/Chromium browsers are launc
|
|
|
1873
1964
|
await perceptionManager.ensureStarted({ bundleId, appName: r.appName ?? bundleId, pid: r.pid, windowTitle: "", ...(windowId != null ? { windowId } : {}) });
|
|
1874
1965
|
installSafariEnricher(bundleId);
|
|
1875
1966
|
}
|
|
1876
|
-
catch {
|
|
1967
|
+
catch (e) {
|
|
1968
|
+
process.stderr.write(`[screenhand] perception start after launch failed: ${e instanceof Error ? e.message : String(e)}\n`);
|
|
1969
|
+
}
|
|
1877
1970
|
let msg = `Launched ${r.appName} pid=${r.pid}`;
|
|
1878
1971
|
if (chromeAppName) {
|
|
1879
1972
|
const port = cdpPort ?? 9222;
|
|
@@ -2102,7 +2195,9 @@ server.tool("ui_press", "PREFERRED: Find and press/click a UI element by its tit
|
|
|
2102
2195
|
return { content: [{ type: "text", text: `Element "${title}" not found in PID ${pid}. A system dialog from "${front.name}" (${front.bundleId}, PID ${front.pid}) may be blocking. Dismiss it first, or use click(x, y) to interact with the dialog directly.` }], isError: true };
|
|
2103
2196
|
}
|
|
2104
2197
|
}
|
|
2105
|
-
catch {
|
|
2198
|
+
catch (e) {
|
|
2199
|
+
process.stderr.write(`[screenhand] frontmost check in ui_press failed: ${e instanceof Error ? e.message : String(e)}\n`);
|
|
2200
|
+
}
|
|
2106
2201
|
throw new Error(`Element "${title}" not found (searched title, value, and description)`);
|
|
2107
2202
|
}
|
|
2108
2203
|
}
|
|
@@ -2323,7 +2418,9 @@ server.tool("type_text", "Type text using the keyboard. Auto-detects Electron ap
|
|
|
2323
2418
|
catch { /* not available on this port */ }
|
|
2324
2419
|
}
|
|
2325
2420
|
}
|
|
2326
|
-
catch {
|
|
2421
|
+
catch (e) {
|
|
2422
|
+
process.stderr.write(`[screenhand] CDP auto-detect failed: ${e instanceof Error ? e.message : String(e)}\n`);
|
|
2423
|
+
}
|
|
2327
2424
|
}
|
|
2328
2425
|
if (electronCdpPort) {
|
|
2329
2426
|
// CDP path: click editor to ensure focus, then type via key events
|
|
@@ -2386,7 +2483,9 @@ server.tool("key", "Press a key combination", {
|
|
|
2386
2483
|
const front = await bridge.call("app.frontmost", {});
|
|
2387
2484
|
targetPid = front.pid;
|
|
2388
2485
|
}
|
|
2389
|
-
catch {
|
|
2486
|
+
catch (e) {
|
|
2487
|
+
process.stderr.write(`[screenhand] key frontmost PID resolve failed: ${e instanceof Error ? e.message : String(e)}\n`);
|
|
2488
|
+
}
|
|
2390
2489
|
}
|
|
2391
2490
|
const keys = combo.split("+");
|
|
2392
2491
|
const hasModifier = keys.some(k => ["cmd", "ctrl", "alt", "shift"].includes(k.toLowerCase()));
|
|
@@ -2456,7 +2555,9 @@ async function getCDPClient(tabId, overridePort) {
|
|
|
2456
2555
|
try {
|
|
2457
2556
|
perceptionManager.activateCDP(client);
|
|
2458
2557
|
}
|
|
2459
|
-
catch {
|
|
2558
|
+
catch (e) {
|
|
2559
|
+
process.stderr.write(`[screenhand] perception CDP activate failed: ${e instanceof Error ? e.message : String(e)}\n`);
|
|
2560
|
+
}
|
|
2460
2561
|
return { client, targetId: targetId, CDP: cdp, port };
|
|
2461
2562
|
}
|
|
2462
2563
|
// ── Random delay helper ──
|
|
@@ -3393,6 +3494,12 @@ server.tool("platform_explore", "Autonomously explore an app or website. Maps al
|
|
|
3393
3494
|
// Compile and save
|
|
3394
3495
|
const result = compileReference(platform, "web", tested, url);
|
|
3395
3496
|
const filePath = saveExploreResult(referencesDir, result);
|
|
3497
|
+
// Auto-merge explore selectors into main reference so data isn't fragmented
|
|
3498
|
+
if (result.selectors && Object.keys(result.selectors).length > 0) {
|
|
3499
|
+
referenceMerger.mergeExploreSelectors(result.selectors, result.errors, "", platform);
|
|
3500
|
+
}
|
|
3501
|
+
// Hot-reload: make new data immediately available to context tracker
|
|
3502
|
+
_playbookStoreForContext.reload();
|
|
3396
3503
|
return { content: [{ type: "text", text: `Exploration complete: ${filePath}\n\nElements found: ${elements.length}\nTested: ${result.testedElements}\nWorking selectors: ${result.workingSelectors}\nErrors: ${result.errors.length}\n\nKey discoveries:\n${result.keyDiscoveries.map(d => ` - ${d}`).join("\n")}` }] };
|
|
3397
3504
|
}
|
|
3398
3505
|
else if (bundleId) {
|
|
@@ -3414,6 +3521,12 @@ server.tool("platform_explore", "Autonomously explore an app or website. Maps al
|
|
|
3414
3521
|
...el, clickWorked: true, result: "discovered_not_tested",
|
|
3415
3522
|
})), undefined, bundleId);
|
|
3416
3523
|
const filePath = saveExploreResult(referencesDir, result);
|
|
3524
|
+
// Auto-merge explore selectors into main reference so data isn't fragmented
|
|
3525
|
+
if (result.selectors && Object.keys(result.selectors).length > 0) {
|
|
3526
|
+
referenceMerger.mergeExploreSelectors(result.selectors, result.errors, bundleId, platform);
|
|
3527
|
+
}
|
|
3528
|
+
// Hot-reload: make new data immediately available to context tracker
|
|
3529
|
+
_playbookStoreForContext.reload();
|
|
3417
3530
|
return { content: [{ type: "text", text: `Native app exploration complete: ${filePath}\n\nElements discovered: ${elements.length}\n(Native elements discovered but not auto-clicked for safety. Use playbook_record to test interactively.)` }] };
|
|
3418
3531
|
}
|
|
3419
3532
|
else {
|
|
@@ -5027,7 +5140,9 @@ server.tool("scroll_with_fallback", "Scroll within an element or the active wind
|
|
|
5027
5140
|
return { content: [{ type: "text", text: `"${target}" is visible after ${i} scroll(s).` }] };
|
|
5028
5141
|
}
|
|
5029
5142
|
}
|
|
5030
|
-
catch {
|
|
5143
|
+
catch (e) {
|
|
5144
|
+
process.stderr.write(`[screenhand] OCR during scroll search failed: ${e instanceof Error ? e.message : String(e)}\n`);
|
|
5145
|
+
}
|
|
5031
5146
|
// Scroll once
|
|
5032
5147
|
const deltaX = direction === "left" ? -scrollAmount : direction === "right" ? scrollAmount : 0;
|
|
5033
5148
|
const deltaY = direction === "up" ? -scrollAmount : direction === "down" ? scrollAmount : 0;
|
|
@@ -5154,7 +5269,9 @@ server.tool("wait_for_state", "Wait until a condition is met on screen: text app
|
|
|
5154
5269
|
await client.close();
|
|
5155
5270
|
}
|
|
5156
5271
|
}
|
|
5157
|
-
catch {
|
|
5272
|
+
catch (e) {
|
|
5273
|
+
process.stderr.write(`[screenhand] wait_for_state CDP check failed: ${e instanceof Error ? e.message : String(e)}\n`);
|
|
5274
|
+
}
|
|
5158
5275
|
}
|
|
5159
5276
|
const elapsed = Date.now() - (deadline - timeout);
|
|
5160
5277
|
lastCheck = `${elapsed}ms`;
|
|
@@ -5440,6 +5557,18 @@ function getJobRunner() {
|
|
|
5440
5557
|
timeout: 15000,
|
|
5441
5558
|
}).trim();
|
|
5442
5559
|
});
|
|
5560
|
+
// Wire learning feedback: PlaybookEngine reports step outcomes to context tracker + AppMap
|
|
5561
|
+
playbookEngine.setOutcomeCallback((step, success, error) => {
|
|
5562
|
+
const target = typeof step.target === "string" ? step.target : null;
|
|
5563
|
+
contextTracker.recordOutcome(step.action, { target, text: step.text }, success, error);
|
|
5564
|
+
const bid = worldModel.getState().focusedApp?.bundleId ?? lastKnownBundleId;
|
|
5565
|
+
if (bid && target) {
|
|
5566
|
+
try {
|
|
5567
|
+
appMap.recordElementOutcome(bid, "auto", target, success);
|
|
5568
|
+
}
|
|
5569
|
+
catch { /* non-critical */ }
|
|
5570
|
+
}
|
|
5571
|
+
});
|
|
5443
5572
|
activeJobRunner = new JobRunner(bridge, jobManager, leaseManager, supervisor, (() => {
|
|
5444
5573
|
const cfg = {
|
|
5445
5574
|
hasCDP: cdpPort !== null,
|
|
@@ -5616,10 +5745,22 @@ originalTool("plan_execute", "Run a plan automatically. Known steps (from playbo
|
|
|
5616
5745
|
if (!goal) {
|
|
5617
5746
|
return { content: [{ type: "text", text: `Goal not found: ${goalId}` }] };
|
|
5618
5747
|
}
|
|
5619
|
-
const
|
|
5748
|
+
const focusedBundleId = worldModel.getState().focusedApp?.bundleId ?? "unknown";
|
|
5749
|
+
const adaptiveBudget = learningEngine.getAdaptiveBudget(focusedBundleId);
|
|
5620
5750
|
const executor = new PlanExecutor(worldModel, planner, toolRegistry.toExecutor(), { postconditionWaitMs: adaptiveBudget.verifyMs, defaultStepTimeout: Math.max(30_000, adaptiveBudget.actMs * 2) }, recoveryEngine, learningEngine);
|
|
5621
5751
|
executor.setAppMap(appMap);
|
|
5622
|
-
|
|
5752
|
+
// Enable perception-triggered recovery during plan execution
|
|
5753
|
+
perceptionManager.setExpectedApp(focusedBundleId);
|
|
5754
|
+
perceptionManager.startStallDetection(30_000);
|
|
5755
|
+
let result;
|
|
5756
|
+
try {
|
|
5757
|
+
result = await executor.executeGoal(goal);
|
|
5758
|
+
}
|
|
5759
|
+
finally {
|
|
5760
|
+
// Disable reactive recovery after plan completes
|
|
5761
|
+
perceptionManager.setExpectedApp(null);
|
|
5762
|
+
perceptionManager.stopStallDetection();
|
|
5763
|
+
}
|
|
5623
5764
|
goalStore.update(goalId, goal);
|
|
5624
5765
|
// Check if paused at an LLM step
|
|
5625
5766
|
if ("paused" in result) {
|
|
@@ -5658,7 +5799,25 @@ originalTool("plan_execute", "Run a plan automatically. Known steps (from playbo
|
|
|
5658
5799
|
}
|
|
5659
5800
|
}
|
|
5660
5801
|
}
|
|
5661
|
-
catch {
|
|
5802
|
+
catch (e) {
|
|
5803
|
+
process.stderr.write(`[screenhand] strategy recording failed: ${e instanceof Error ? e.message : String(e)}\n`);
|
|
5804
|
+
}
|
|
5805
|
+
// Self-improving plans: refine and check for graduation
|
|
5806
|
+
try {
|
|
5807
|
+
const refinement = planRefiner.refine(goal, result);
|
|
5808
|
+
if (refinement.refinementCount > 0) {
|
|
5809
|
+
process.stderr.write(`[plan-refiner] Refined plan for "${goal.description}" (${refinement.refinementCount}x)\n`);
|
|
5810
|
+
}
|
|
5811
|
+
// Check graduation to playbook (3+ refinements)
|
|
5812
|
+
const playbook = planRefiner.checkGraduation(goal.description, focusedBundleId, worldModel.getState().focusedApp?.appName ?? focusedBundleId);
|
|
5813
|
+
if (playbook) {
|
|
5814
|
+
_playbookStoreForContext.save(playbook);
|
|
5815
|
+
process.stderr.write(`[plan-refiner] Plan GRADUATED to playbook: ${playbook.id}\n`);
|
|
5816
|
+
}
|
|
5817
|
+
}
|
|
5818
|
+
catch (e) {
|
|
5819
|
+
process.stderr.write(`[plan-refiner] Refinement failed: ${e instanceof Error ? e.message : String(e)}\n`);
|
|
5820
|
+
}
|
|
5662
5821
|
}
|
|
5663
5822
|
const lines = [
|
|
5664
5823
|
result.success ? "Goal completed successfully." : `Goal failed: ${result.error}`,
|
|
@@ -6419,6 +6578,88 @@ server.tool("observer_ocr_roi", "Submit a targeted ROI OCR command to the runnin
|
|
|
6419
6578
|
return { content: [{ type: "text", text: `ROI OCR command submitted: ${id}\nRegion: (${x}, ${y}, ${width}×${height})\nThe daemon will process this on its next cycle. Call observer_ocr_roi with commandId="${id}" to poll the result.` }] };
|
|
6420
6579
|
});
|
|
6421
6580
|
// ═══════════════════════════════════════════════
|
|
6581
|
+
// STATE WATCHER — Continuous observation event bus
|
|
6582
|
+
// ═══════════════════════════════════════════════
|
|
6583
|
+
server.tool("watch_start", "Start the state watcher polling loop. Evaluates registered watch rules every 2s against the world model.", {}, async () => {
|
|
6584
|
+
stateWatcher.start();
|
|
6585
|
+
const rules = stateWatcher.getRules();
|
|
6586
|
+
return { content: [{ type: "text", text: `State watcher started. ${rules.length} rules registered.` }] };
|
|
6587
|
+
});
|
|
6588
|
+
server.tool("watch_stop", "Stop the state watcher polling loop.", {}, async () => {
|
|
6589
|
+
stateWatcher.stop();
|
|
6590
|
+
return { content: [{ type: "text", text: "State watcher stopped." }] };
|
|
6591
|
+
});
|
|
6592
|
+
server.tool("watch_register", "Register a watch rule: when element with matching title appears, execute an action. Use for automated responses to known UI states.", {
|
|
6593
|
+
id: z.string().describe("Unique rule ID"),
|
|
6594
|
+
elementTitle: z.string().describe("UI element title/label to watch for (case-insensitive substring match)"),
|
|
6595
|
+
actionTool: z.string().describe("Tool to execute when element appears (e.g. click_text, key)"),
|
|
6596
|
+
actionParams: z.record(z.string(), z.unknown()).describe("Params for the action tool"),
|
|
6597
|
+
bundleId: z.string().optional().describe("Only match when this app is focused"),
|
|
6598
|
+
maxFires: z.number().optional().describe("Max times to fire (0=unlimited, default=1)"),
|
|
6599
|
+
}, async ({ id, elementTitle, actionTool, actionParams, bundleId, maxFires }) => {
|
|
6600
|
+
// Validate tool exists and is safe for automated execution
|
|
6601
|
+
const BLOCKED_WATCH_TOOLS = new Set(["applescript", "browser_js", "browser_stealth"]);
|
|
6602
|
+
if (BLOCKED_WATCH_TOOLS.has(actionTool)) {
|
|
6603
|
+
return { content: [{ type: "text", text: `Tool "${actionTool}" is not allowed in watch rules (security: prevents arbitrary code execution)` }], isError: true };
|
|
6604
|
+
}
|
|
6605
|
+
if (!toolRegistry.has(actionTool)) {
|
|
6606
|
+
return { content: [{ type: "text", text: `Unknown tool: "${actionTool}"` }], isError: true };
|
|
6607
|
+
}
|
|
6608
|
+
stateWatcher.watchForElement(id, elementTitle, { tool: actionTool, params: actionParams }, bundleId);
|
|
6609
|
+
if (maxFires !== undefined) {
|
|
6610
|
+
const rules = stateWatcher.getRules();
|
|
6611
|
+
const rule = rules.find((r) => r.id === id);
|
|
6612
|
+
if (rule) {
|
|
6613
|
+
// Update maxFires on the registered rule
|
|
6614
|
+
const ruleState = stateWatcher.rules.get(id);
|
|
6615
|
+
if (ruleState)
|
|
6616
|
+
ruleState.rule.maxFires = maxFires;
|
|
6617
|
+
}
|
|
6618
|
+
}
|
|
6619
|
+
return { content: [{ type: "text", text: `Watch rule "${id}" registered: when "${elementTitle}" appears → ${actionTool}(${JSON.stringify(actionParams)})` }] };
|
|
6620
|
+
});
|
|
6621
|
+
server.tool("watch_dialog", "Register a dialog watch rule: when a dialog matching the pattern appears, auto-execute an action.", {
|
|
6622
|
+
id: z.string().describe("Unique rule ID"),
|
|
6623
|
+
titlePattern: z.string().describe("Regex pattern to match dialog titles"),
|
|
6624
|
+
actionTool: z.string().describe("Tool to execute (e.g. click_text, key)"),
|
|
6625
|
+
actionParams: z.record(z.string(), z.unknown()).describe("Params for the action tool"),
|
|
6626
|
+
}, async ({ id, titlePattern, actionTool, actionParams }) => {
|
|
6627
|
+
// Validate regex — reject patterns that could cause ReDoS
|
|
6628
|
+
let regex;
|
|
6629
|
+
try {
|
|
6630
|
+
regex = new RegExp(titlePattern, "i");
|
|
6631
|
+
// Quick sanity check — if it takes >50ms on a test string, reject
|
|
6632
|
+
const testStr = "a".repeat(100);
|
|
6633
|
+
const t0 = Date.now();
|
|
6634
|
+
regex.test(testStr);
|
|
6635
|
+
if (Date.now() - t0 > 50) {
|
|
6636
|
+
return { content: [{ type: "text", text: `Rejected: regex pattern "${titlePattern}" is too expensive (potential ReDoS)` }], isError: true };
|
|
6637
|
+
}
|
|
6638
|
+
}
|
|
6639
|
+
catch (e) {
|
|
6640
|
+
return { content: [{ type: "text", text: `Invalid regex: ${e instanceof Error ? e.message : String(e)}` }], isError: true };
|
|
6641
|
+
}
|
|
6642
|
+
stateWatcher.watchForDialog(id, regex, { tool: actionTool, params: actionParams });
|
|
6643
|
+
return { content: [{ type: "text", text: `Dialog watch "${id}" registered: /${titlePattern}/i → ${actionTool}(${JSON.stringify(actionParams)})` }] };
|
|
6644
|
+
});
|
|
6645
|
+
server.tool("watch_unregister", "Remove a watch rule by ID.", {
|
|
6646
|
+
id: z.string().describe("Rule ID to remove"),
|
|
6647
|
+
}, async ({ id }) => {
|
|
6648
|
+
const removed = stateWatcher.unregister(id);
|
|
6649
|
+
return { content: [{ type: "text", text: removed ? `Rule "${id}" removed.` : `Rule "${id}" not found.` }] };
|
|
6650
|
+
});
|
|
6651
|
+
server.tool("watch_status", "Get all registered watch rules and their fire counts.", {}, async () => {
|
|
6652
|
+
const rules = stateWatcher.getRules();
|
|
6653
|
+
const running = stateWatcher.isRunning;
|
|
6654
|
+
const lines = [
|
|
6655
|
+
`State watcher: ${running ? "running" : "stopped"}`,
|
|
6656
|
+
`Rules: ${rules.length}`,
|
|
6657
|
+
"",
|
|
6658
|
+
...rules.map((r) => ` [${r.id}] ${r.description} (fired ${r.fireCount}x)`),
|
|
6659
|
+
];
|
|
6660
|
+
return { content: [{ type: "text", text: lines.join("\n") }] };
|
|
6661
|
+
});
|
|
6662
|
+
// ═══════════════════════════════════════════════
|
|
6422
6663
|
// PHASE 6: TOOL MASTERY — Ingestion + Community
|
|
6423
6664
|
// ═══════════════════════════════════════════════
|
|
6424
6665
|
server.tool("scan_menu_bar", "Scan an app's menu bar via AX tree. Extracts all menu paths, keyboard shortcuts, and enabled/disabled states. Automatically merges discovered shortcuts into the reference file.", {
|
|
@@ -6480,6 +6721,8 @@ server.tool("scan_menu_bar", "Scan an app's menu bar via AX tree. Extracts all m
|
|
|
6480
6721
|
});
|
|
6481
6722
|
}
|
|
6482
6723
|
}
|
|
6724
|
+
// Hot-reload: make new reference data immediately available to context tracker
|
|
6725
|
+
_playbookStoreForContext.reload();
|
|
6483
6726
|
let output = lines.join("\n") + bootstrapInfo;
|
|
6484
6727
|
output = redactUsername(output);
|
|
6485
6728
|
output = output.replace(/Log Out [^\n:]+/g, "Log Out [USER]");
|
|
@@ -6543,6 +6786,8 @@ server.tool("ingest_documentation", "Parse a documentation page (HTML, markdown,
|
|
|
6543
6786
|
}
|
|
6544
6787
|
}
|
|
6545
6788
|
}
|
|
6789
|
+
// Hot-reload: make new reference data immediately available to context tracker
|
|
6790
|
+
_playbookStoreForContext.reload();
|
|
6546
6791
|
return { content: [{ type: "text", text: lines.join("\n") }] };
|
|
6547
6792
|
});
|
|
6548
6793
|
server.tool("ingest_tutorial", "Extract structured playbook steps from a video transcript (e.g. YouTube captions). Converts tutorial narration into actionable automation steps with tool mappings.", {
|
|
@@ -6633,6 +6878,8 @@ server.tool("discover_features", "Extract features from an app's official websit
|
|
|
6633
6878
|
lines.push(` [${f.category}] ${f.name}: ${f.description}`);
|
|
6634
6879
|
}
|
|
6635
6880
|
}
|
|
6881
|
+
// Hot-reload: make new reference data immediately available to context tracker
|
|
6882
|
+
_playbookStoreForContext.reload();
|
|
6636
6883
|
return { content: [{ type: "text", text: lines.join("\n") }] };
|
|
6637
6884
|
});
|
|
6638
6885
|
server.tool("coverage_report", "Check what ScreenHand knows about an app: shortcuts, selectors, flows, playbooks, error patterns, and stability %. Useful before complex workflows to decide strategy: learn first (if empty), go fast (if high coverage), or use fallback tools (if error patterns exist). Optional for quick actions.", {
|
|
@@ -6741,10 +6988,14 @@ originalTool("community_fetch", "Search community playbooks for a platform or wo
|
|
|
6741
6988
|
// START
|
|
6742
6989
|
// ═══════════════════════════════════════════════
|
|
6743
6990
|
async function main() {
|
|
6744
|
-
// Flush
|
|
6745
|
-
|
|
6746
|
-
process.on("
|
|
6747
|
-
process.on("
|
|
6991
|
+
// Flush all learned state on shutdown (signals, stdin EOF, or normal exit)
|
|
6992
|
+
const flushAll = () => { void perceptionManager.stop(); perceptionManager.stopStallDetection(); stateWatcher.stop(); contextTracker.flush(); learningEngine.flush(); appMap.flush(); };
|
|
6993
|
+
process.on("SIGINT", () => { flushAll(); process.exit(0); });
|
|
6994
|
+
process.on("SIGTERM", () => { flushAll(); process.exit(0); });
|
|
6995
|
+
process.on("beforeExit", flushAll);
|
|
6996
|
+
// MCP clients often close stdin without sending a signal — flush on stdin end too
|
|
6997
|
+
process.stdin.on("end", () => { flushAll(); process.exit(0); });
|
|
6998
|
+
process.stdin.on("close", () => { flushAll(); process.exit(0); });
|
|
6748
6999
|
const transport = new StdioServerTransport();
|
|
6749
7000
|
await server.connect(transport);
|
|
6750
7001
|
}
|
|
@@ -75,9 +75,11 @@ export class PlaybookPublisher {
|
|
|
75
75
|
return null;
|
|
76
76
|
}
|
|
77
77
|
writeFileAtomicSync(filePath, JSON.stringify(shared, null, 2) + "\n");
|
|
78
|
-
// Best-effort sync to remote API
|
|
78
|
+
// Best-effort sync to remote API — log failures so user knows data didn't leave machine
|
|
79
79
|
if (this.remote) {
|
|
80
|
-
void this.remote.publish(shared).catch(() => {
|
|
80
|
+
void this.remote.publish(shared).catch((err) => {
|
|
81
|
+
process.stderr.write(`[screenhand] Remote publish failed: ${err instanceof Error ? err.message : String(err)}\n`);
|
|
82
|
+
});
|
|
81
83
|
}
|
|
82
84
|
return shared;
|
|
83
85
|
}
|