screenhand 0.5.0 → 0.5.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/mcp-desktop.js +280 -39
- package/dist/src/community/publisher.js +4 -2
- package/dist/src/context-tracker.js +36 -6
- package/dist/src/ingestion/reference-merger.js +33 -0
- package/dist/src/memory/recall.js +65 -1
- package/dist/src/memory/research.js +1 -1
- package/dist/src/memory/service.js +26 -5
- package/dist/src/memory/store.js +42 -23
- package/dist/src/native/bridge-client.js +3 -3
- package/dist/src/perception/coordinator.js +62 -15
- package/dist/src/perception/manager.js +65 -1
- package/dist/src/planner/executor.js +6 -2
- package/dist/src/planner/plan-refiner.js +213 -0
- package/dist/src/playbook/engine.js +18 -3
- package/dist/src/playbook/recorder.js +24 -8
- package/dist/src/playbook/runner.js +9 -3
- package/dist/src/playbook/store.js +8 -0
- package/dist/src/recovery/engine.js +9 -3
- package/dist/src/state/app-map.js +6 -2
- package/dist/src/state/state-watcher.js +144 -0
- package/dist/src/supervisor/supervisor.js +1 -1
- package/dist-app-maps/com.apple.iphonesimulator.json +714 -223
- package/dist-references/simulator.json +48 -2
- package/package.json +1 -1
package/dist/mcp-desktop.js
CHANGED
|
@@ -52,8 +52,10 @@ import { PlaybookStore } from "./src/playbook/store.js";
|
|
|
52
52
|
import { ContextTracker } from "./src/context-tracker.js";
|
|
53
53
|
import { McpPlaybookRecorder } from "./src/playbook/mcp-recorder.js";
|
|
54
54
|
import { WorldModel } from "./src/state/index.js";
|
|
55
|
+
import { StateWatcher } from "./src/state/state-watcher.js";
|
|
55
56
|
import { PerceptionManager } from "./src/perception/index.js";
|
|
56
57
|
import { Planner, PlanExecutor, GoalStore, ToolRegistry } from "./src/planner/index.js";
|
|
58
|
+
import { PlanRefiner } from "./src/planner/plan-refiner.js";
|
|
57
59
|
import { RecoveryEngine } from "./src/recovery/index.js";
|
|
58
60
|
import { LearningEngine, LocatorPolicy } from "./src/learning/index.js";
|
|
59
61
|
import { discoverWebElements, testWebElement, compileReference, saveExploreResult, discoverNativeElements } from "./src/platform/explorer.js";
|
|
@@ -609,6 +611,7 @@ catch { /* dir may not exist */ }
|
|
|
609
611
|
const planner = new Planner(_executablePlaybookStore, memory, contextTracker, worldModel, learningEngine);
|
|
610
612
|
const goalStore = new GoalStore(path.join(os.homedir(), ".screenhand", "planner"));
|
|
611
613
|
goalStore.init();
|
|
614
|
+
const planRefiner = new PlanRefiner(path.join(os.homedir(), ".screenhand", "planner"));
|
|
612
615
|
const toolRegistry = new ToolRegistry();
|
|
613
616
|
const recoveryEngine = new RecoveryEngine(worldModel, toolRegistry.toExecutor(), memory);
|
|
614
617
|
recoveryEngine.setLearningEngine(learningEngine);
|
|
@@ -616,6 +619,7 @@ recoveryEngine.setAppMap(appMap);
|
|
|
616
619
|
planner.setToolRegistry(toolRegistry);
|
|
617
620
|
planner.setAppMap(appMap);
|
|
618
621
|
perceptionManager.setLearningEngine(learningEngine);
|
|
622
|
+
const stateWatcher = new StateWatcher(worldModel, toolRegistry.toExecutor(), 2_000);
|
|
619
623
|
// ── Reactive event loop: wire perception events to automatic responses ──
|
|
620
624
|
// These fire at perception speed (100-300ms), not LLM speed (~2-3s).
|
|
621
625
|
perceptionManager.on("dialog_detected", (event) => {
|
|
@@ -649,6 +653,28 @@ perceptionManager.on("app_switched", (event) => {
|
|
|
649
653
|
// Log for observability
|
|
650
654
|
console.error(`[reactive] App switched to ${event.bundleId} (pid=${event.pid})`);
|
|
651
655
|
});
|
|
656
|
+
// ── Perception-triggered recovery: focus loss, app crash, stall ──
|
|
657
|
+
perceptionManager.on("focus_lost", (event) => {
|
|
658
|
+
console.error(`[reactive] Focus lost: expected ${event.expectedBundleId}, got ${event.actualBundleId} — auto-refocusing`);
|
|
659
|
+
// Auto-refocus the expected app
|
|
660
|
+
toolRegistry.toExecutor()("focus", { bundleId: event.expectedBundleId }).catch((err) => {
|
|
661
|
+
console.error(`[reactive] Auto-refocus failed: ${err instanceof Error ? err.message : err}`);
|
|
662
|
+
});
|
|
663
|
+
});
|
|
664
|
+
perceptionManager.on("app_crash", (event) => {
|
|
665
|
+
console.error(`[reactive] App crash detected: ${event.bundleId} (pid=${event.pid}) — auto-relaunching`);
|
|
666
|
+
// Auto-relaunch the crashed app
|
|
667
|
+
toolRegistry.toExecutor()("launch", { bundleId: event.bundleId }).catch((err) => {
|
|
668
|
+
console.error(`[reactive] Auto-relaunch failed: ${err instanceof Error ? err.message : err}`);
|
|
669
|
+
});
|
|
670
|
+
});
|
|
671
|
+
perceptionManager.on("stall_detected", (event) => {
|
|
672
|
+
console.error(`[reactive] UI stall detected: ${event.bundleId} — no changes for ${(event.stallMs / 1000).toFixed(0)}s — taking screenshot for diagnosis`);
|
|
673
|
+
// Take a screenshot so the next LLM call can see what's on screen
|
|
674
|
+
toolRegistry.toExecutor()("screenshot", {}).catch((err) => {
|
|
675
|
+
console.error(`[reactive] Stall screenshot failed: ${err instanceof Error ? err.message : err}`);
|
|
676
|
+
});
|
|
677
|
+
});
|
|
652
678
|
const mcpRecorder = new McpPlaybookRecorder(playbooksDir);
|
|
653
679
|
const referenceMerger = new ReferenceMerger(referencesDir);
|
|
654
680
|
const communityPublisher = new PlaybookPublisher();
|
|
@@ -681,6 +707,7 @@ const MEMORY_TOOLS = new Set([
|
|
|
681
707
|
]);
|
|
682
708
|
// Track the strategy we're currently following (for feedback loop)
|
|
683
709
|
let activeStrategyFingerprint = null;
|
|
710
|
+
let autoExecutionInProgress = false; // guard against concurrent auto-execution
|
|
684
711
|
let currentAdaptiveBudget = null;
|
|
685
712
|
// Intercept all tool registrations to auto-log + auto-recall
|
|
686
713
|
const _rawOriginalTool = server.tool.bind(server);
|
|
@@ -770,7 +797,7 @@ server.tool = (...args) => {
|
|
|
770
797
|
if (!perceptionManager.isRunning && bridgeReady) {
|
|
771
798
|
const focusApp = worldModel.getState().focusedApp;
|
|
772
799
|
if (focusApp?.bundleId && focusApp?.pid) {
|
|
773
|
-
perceptionManager.tryAutoStart(focusApp, bridge).catch(() => { });
|
|
800
|
+
perceptionManager.tryAutoStart(focusApp, bridge).catch((e) => { process.stderr.write(`[screenhand] perception auto-start failed: ${e instanceof Error ? e.message : String(e)}\n`); });
|
|
774
801
|
installSafariEnricher(focusApp.bundleId);
|
|
775
802
|
}
|
|
776
803
|
}
|
|
@@ -821,7 +848,7 @@ server.tool = (...args) => {
|
|
|
821
848
|
"type_with_fallback", "select_with_fallback", "scroll_with_fallback",
|
|
822
849
|
]);
|
|
823
850
|
try {
|
|
824
|
-
|
|
851
|
+
let result = await originalHandler(params, extra);
|
|
825
852
|
const durationMs = Date.now() - start;
|
|
826
853
|
// ── POST-CALL: log action (async, non-blocking) ──
|
|
827
854
|
const entry = {
|
|
@@ -873,7 +900,9 @@ server.tool = (...args) => {
|
|
|
873
900
|
try {
|
|
874
901
|
appMap.recordPageTransition(postBundleIdForCtx, pageTransition.from, pageTransition.to, toolName);
|
|
875
902
|
}
|
|
876
|
-
catch {
|
|
903
|
+
catch (e) {
|
|
904
|
+
process.stderr.write(`[screenhand] nav tracking failed: ${e instanceof Error ? e.message : String(e)}\n`);
|
|
905
|
+
}
|
|
877
906
|
}
|
|
878
907
|
// ── POST-CALL: detect focus drift ──
|
|
879
908
|
const postBundleId = worldModel.getState().focusedApp?.bundleId ?? null;
|
|
@@ -947,7 +976,9 @@ server.tool = (...args) => {
|
|
|
947
976
|
}
|
|
948
977
|
}
|
|
949
978
|
}
|
|
950
|
-
catch {
|
|
979
|
+
catch (e) {
|
|
980
|
+
process.stderr.write(`[screenhand] app map feature learning failed: ${e instanceof Error ? e.message : String(e)}\n`);
|
|
981
|
+
}
|
|
951
982
|
}
|
|
952
983
|
if (!resultIsError && learnBundleId !== "unknown") {
|
|
953
984
|
try {
|
|
@@ -1369,7 +1400,9 @@ server.tool = (...args) => {
|
|
|
1369
1400
|
}
|
|
1370
1401
|
}
|
|
1371
1402
|
}
|
|
1372
|
-
catch {
|
|
1403
|
+
catch (e) {
|
|
1404
|
+
process.stderr.write(`[screenhand] hierarchy extraction failed: ${e instanceof Error ? e.message : String(e)}\n`);
|
|
1405
|
+
}
|
|
1373
1406
|
}
|
|
1374
1407
|
}
|
|
1375
1408
|
// ── Conditional UI visibility tracking (throttled) ──
|
|
@@ -1417,7 +1450,9 @@ server.tool = (...args) => {
|
|
|
1417
1450
|
}
|
|
1418
1451
|
}
|
|
1419
1452
|
}
|
|
1420
|
-
catch {
|
|
1453
|
+
catch (e) {
|
|
1454
|
+
process.stderr.write(`[screenhand] visibility tracking failed: ${e instanceof Error ? e.message : String(e)}\n`);
|
|
1455
|
+
}
|
|
1421
1456
|
}
|
|
1422
1457
|
}
|
|
1423
1458
|
// ── Timing recording: track tool response times per element ──
|
|
@@ -1511,25 +1546,65 @@ server.tool = (...args) => {
|
|
|
1511
1546
|
if (knownError) {
|
|
1512
1547
|
hints.push(`⚡ Memory: "${toolName}" has failed before: "${knownError.error}" (${knownError.occurrences}x). Fix: ${knownError.resolution}`);
|
|
1513
1548
|
}
|
|
1514
|
-
//
|
|
1549
|
+
// ── Strategy matching: auto-execute proven strategies OR hint unproven ones ──
|
|
1515
1550
|
const recentTools = memory.getRecentToolNames();
|
|
1516
|
-
const
|
|
1517
|
-
|
|
1518
|
-
|
|
1519
|
-
|
|
1520
|
-
|
|
1521
|
-
|
|
1522
|
-
|
|
1523
|
-
|
|
1524
|
-
|
|
1525
|
-
|
|
1551
|
+
const currentBundleForStrategy = worldModel.getState().focusedApp?.bundleId;
|
|
1552
|
+
// Try auto-execution first (10+ successes, 0 failures)
|
|
1553
|
+
// Guard: skip if another auto-execution is already in progress
|
|
1554
|
+
const autoExec = autoExecutionInProgress ? null : memory.getAutoExecutableStrategy(recentTools, currentBundleForStrategy);
|
|
1555
|
+
if (autoExec) {
|
|
1556
|
+
autoExecutionInProgress = true;
|
|
1557
|
+
activeStrategyFingerprint = autoExec.fingerprint;
|
|
1558
|
+
const autoResults = [];
|
|
1559
|
+
let allOk = true;
|
|
1560
|
+
hints.push(`🚀 Auto-executing proven strategy "${autoExec.strategy.task}" (${autoExec.strategy.successCount} wins) — ${autoExec.remainingSteps.length} steps remaining`);
|
|
1561
|
+
for (const step of autoExec.remainingSteps) {
|
|
1562
|
+
try {
|
|
1563
|
+
const stepResult = await toolRegistry.toExecutor()(step.tool, step.params);
|
|
1564
|
+
autoResults.push({ tool: step.tool, ...stepResult });
|
|
1565
|
+
// Record outcome for learning
|
|
1566
|
+
const target = typeof step.params.target === "string" ? step.params.target
|
|
1567
|
+
: typeof step.params.title === "string" ? step.params.title
|
|
1568
|
+
: typeof step.params.text === "string" ? step.params.text
|
|
1569
|
+
: null;
|
|
1570
|
+
contextTracker.recordOutcome(step.tool, { target, text: typeof step.params.text === "string" ? step.params.text : null }, stepResult.ok, stepResult.ok ? null : (stepResult.error ?? null));
|
|
1571
|
+
if (!stepResult.ok) {
|
|
1572
|
+
allOk = false;
|
|
1573
|
+
hints.push(` ✗ ${step.tool} failed: ${stepResult.error ?? "unknown"}`);
|
|
1574
|
+
break; // Stop auto-execution on first failure
|
|
1575
|
+
}
|
|
1576
|
+
hints.push(` ✓ ${step.tool} — ok`);
|
|
1577
|
+
}
|
|
1578
|
+
catch (err) {
|
|
1579
|
+
allOk = false;
|
|
1580
|
+
hints.push(` ✗ ${step.tool} threw: ${err instanceof Error ? err.message : String(err)}`);
|
|
1581
|
+
break;
|
|
1582
|
+
}
|
|
1526
1583
|
}
|
|
1527
|
-
|
|
1528
|
-
|
|
1529
|
-
// We were following a strategy but the sequence diverged — record success
|
|
1530
|
-
// (the agent completed the strategy or went its own way after it)
|
|
1531
|
-
memory.recordStrategyOutcome(activeStrategyFingerprint, true);
|
|
1584
|
+
// Record strategy outcome
|
|
1585
|
+
memory.recordStrategyOutcome(autoExec.fingerprint, allOk);
|
|
1532
1586
|
activeStrategyFingerprint = null;
|
|
1587
|
+
autoExecutionInProgress = false;
|
|
1588
|
+
// Append auto-execution results to the response
|
|
1589
|
+
const autoSummary = autoResults.map((r) => `${r.tool}: ${r.ok ? "ok" : r.error}`).join("\n");
|
|
1590
|
+
const resultContent = Array.isArray(result?.content) ? result.content : [];
|
|
1591
|
+
resultContent.push({ type: "text", text: `\n── AUTO-EXECUTED (${autoResults.length} steps) ──\n${autoSummary}` });
|
|
1592
|
+
result = { ...result, content: resultContent };
|
|
1593
|
+
}
|
|
1594
|
+
else {
|
|
1595
|
+
// Fall back to strategy hint (suggest but don't execute)
|
|
1596
|
+
const strategyHint = memory.quickStrategyHint(recentTools, currentBundleForStrategy);
|
|
1597
|
+
if (strategyHint) {
|
|
1598
|
+
activeStrategyFingerprint = strategyHint.fingerprint;
|
|
1599
|
+
const nextParams = Object.keys(strategyHint.nextStep.params).length > 0
|
|
1600
|
+
? `(${JSON.stringify(strategyHint.nextStep.params)})`
|
|
1601
|
+
: "";
|
|
1602
|
+
hints.push(`💡 Memory: This matches strategy "${strategyHint.strategy.task}" (${strategyHint.strategy.successCount} wins, ${strategyHint.strategy.failCount ?? 0} fails). Next step: ${strategyHint.nextStep.tool}${nextParams}`);
|
|
1603
|
+
}
|
|
1604
|
+
else if (activeStrategyFingerprint && recentTools.length > 0) {
|
|
1605
|
+
memory.recordStrategyOutcome(activeStrategyFingerprint, true);
|
|
1606
|
+
activeStrategyFingerprint = null;
|
|
1607
|
+
}
|
|
1533
1608
|
}
|
|
1534
1609
|
// Attach hints in BOTH content (visible) and _meta (for programmatic access)
|
|
1535
1610
|
if (hints.length > 0) {
|
|
@@ -1745,7 +1820,9 @@ server.tool("focus", "Focus/activate an application (or a specific window by win
|
|
|
1745
1820
|
targetApp = { bundleId, name: appWin.appName, pid: appWin.pid || appWin.ownerPid };
|
|
1746
1821
|
}
|
|
1747
1822
|
}
|
|
1748
|
-
catch {
|
|
1823
|
+
catch (e) {
|
|
1824
|
+
process.stderr.write(`[screenhand] focus window check for ${bundleId} failed: ${e instanceof Error ? e.message : String(e)}\n`);
|
|
1825
|
+
}
|
|
1749
1826
|
if (!targetApp) {
|
|
1750
1827
|
return { content: [{ type: "text", text: `Error: ${bundleId} is not running. Use launch("${bundleId}") first.` }], isError: true };
|
|
1751
1828
|
}
|
|
@@ -1816,10 +1893,14 @@ server.tool("focus", "Focus/activate an application (or a specific window by win
|
|
|
1816
1893
|
await perceptionManager.ensureStarted(ctx);
|
|
1817
1894
|
installSafariEnricher(bundleId);
|
|
1818
1895
|
}
|
|
1819
|
-
catch {
|
|
1896
|
+
catch (e) {
|
|
1897
|
+
process.stderr.write(`[screenhand] perception ensureStarted in focus failed: ${e instanceof Error ? e.message : String(e)}\n`);
|
|
1898
|
+
}
|
|
1820
1899
|
}
|
|
1821
1900
|
}
|
|
1822
|
-
catch {
|
|
1901
|
+
catch (e) {
|
|
1902
|
+
process.stderr.write(`[screenhand] focus world-model update failed: ${e instanceof Error ? e.message : String(e)}\n`);
|
|
1903
|
+
}
|
|
1823
1904
|
return { content: [{ type: "text", text: focusMsg }] };
|
|
1824
1905
|
}
|
|
1825
1906
|
finally {
|
|
@@ -1883,7 +1964,9 @@ server.tool("launch", "Launch an application. Chrome/Chromium browsers are launc
|
|
|
1883
1964
|
await perceptionManager.ensureStarted({ bundleId, appName: r.appName ?? bundleId, pid: r.pid, windowTitle: "", ...(windowId != null ? { windowId } : {}) });
|
|
1884
1965
|
installSafariEnricher(bundleId);
|
|
1885
1966
|
}
|
|
1886
|
-
catch {
|
|
1967
|
+
catch (e) {
|
|
1968
|
+
process.stderr.write(`[screenhand] perception start after launch failed: ${e instanceof Error ? e.message : String(e)}\n`);
|
|
1969
|
+
}
|
|
1887
1970
|
let msg = `Launched ${r.appName} pid=${r.pid}`;
|
|
1888
1971
|
if (chromeAppName) {
|
|
1889
1972
|
const port = cdpPort ?? 9222;
|
|
@@ -2112,7 +2195,9 @@ server.tool("ui_press", "PREFERRED: Find and press/click a UI element by its tit
|
|
|
2112
2195
|
return { content: [{ type: "text", text: `Element "${title}" not found in PID ${pid}. A system dialog from "${front.name}" (${front.bundleId}, PID ${front.pid}) may be blocking. Dismiss it first, or use click(x, y) to interact with the dialog directly.` }], isError: true };
|
|
2113
2196
|
}
|
|
2114
2197
|
}
|
|
2115
|
-
catch {
|
|
2198
|
+
catch (e) {
|
|
2199
|
+
process.stderr.write(`[screenhand] frontmost check in ui_press failed: ${e instanceof Error ? e.message : String(e)}\n`);
|
|
2200
|
+
}
|
|
2116
2201
|
throw new Error(`Element "${title}" not found (searched title, value, and description)`);
|
|
2117
2202
|
}
|
|
2118
2203
|
}
|
|
@@ -2333,7 +2418,9 @@ server.tool("type_text", "Type text using the keyboard. Auto-detects Electron ap
|
|
|
2333
2418
|
catch { /* not available on this port */ }
|
|
2334
2419
|
}
|
|
2335
2420
|
}
|
|
2336
|
-
catch {
|
|
2421
|
+
catch (e) {
|
|
2422
|
+
process.stderr.write(`[screenhand] CDP auto-detect failed: ${e instanceof Error ? e.message : String(e)}\n`);
|
|
2423
|
+
}
|
|
2337
2424
|
}
|
|
2338
2425
|
if (electronCdpPort) {
|
|
2339
2426
|
// CDP path: click editor to ensure focus, then type via key events
|
|
@@ -2396,7 +2483,9 @@ server.tool("key", "Press a key combination", {
|
|
|
2396
2483
|
const front = await bridge.call("app.frontmost", {});
|
|
2397
2484
|
targetPid = front.pid;
|
|
2398
2485
|
}
|
|
2399
|
-
catch {
|
|
2486
|
+
catch (e) {
|
|
2487
|
+
process.stderr.write(`[screenhand] key frontmost PID resolve failed: ${e instanceof Error ? e.message : String(e)}\n`);
|
|
2488
|
+
}
|
|
2400
2489
|
}
|
|
2401
2490
|
const keys = combo.split("+");
|
|
2402
2491
|
const hasModifier = keys.some(k => ["cmd", "ctrl", "alt", "shift"].includes(k.toLowerCase()));
|
|
@@ -2466,7 +2555,9 @@ async function getCDPClient(tabId, overridePort) {
|
|
|
2466
2555
|
try {
|
|
2467
2556
|
perceptionManager.activateCDP(client);
|
|
2468
2557
|
}
|
|
2469
|
-
catch {
|
|
2558
|
+
catch (e) {
|
|
2559
|
+
process.stderr.write(`[screenhand] perception CDP activate failed: ${e instanceof Error ? e.message : String(e)}\n`);
|
|
2560
|
+
}
|
|
2470
2561
|
return { client, targetId: targetId, CDP: cdp, port };
|
|
2471
2562
|
}
|
|
2472
2563
|
// ── Random delay helper ──
|
|
@@ -3403,6 +3494,12 @@ server.tool("platform_explore", "Autonomously explore an app or website. Maps al
|
|
|
3403
3494
|
// Compile and save
|
|
3404
3495
|
const result = compileReference(platform, "web", tested, url);
|
|
3405
3496
|
const filePath = saveExploreResult(referencesDir, result);
|
|
3497
|
+
// Auto-merge explore selectors into main reference so data isn't fragmented
|
|
3498
|
+
if (result.selectors && Object.keys(result.selectors).length > 0) {
|
|
3499
|
+
referenceMerger.mergeExploreSelectors(result.selectors, result.errors, "", platform);
|
|
3500
|
+
}
|
|
3501
|
+
// Hot-reload: make new data immediately available to context tracker
|
|
3502
|
+
_playbookStoreForContext.reload();
|
|
3406
3503
|
return { content: [{ type: "text", text: `Exploration complete: ${filePath}\n\nElements found: ${elements.length}\nTested: ${result.testedElements}\nWorking selectors: ${result.workingSelectors}\nErrors: ${result.errors.length}\n\nKey discoveries:\n${result.keyDiscoveries.map(d => ` - ${d}`).join("\n")}` }] };
|
|
3407
3504
|
}
|
|
3408
3505
|
else if (bundleId) {
|
|
@@ -3424,6 +3521,12 @@ server.tool("platform_explore", "Autonomously explore an app or website. Maps al
|
|
|
3424
3521
|
...el, clickWorked: true, result: "discovered_not_tested",
|
|
3425
3522
|
})), undefined, bundleId);
|
|
3426
3523
|
const filePath = saveExploreResult(referencesDir, result);
|
|
3524
|
+
// Auto-merge explore selectors into main reference so data isn't fragmented
|
|
3525
|
+
if (result.selectors && Object.keys(result.selectors).length > 0) {
|
|
3526
|
+
referenceMerger.mergeExploreSelectors(result.selectors, result.errors, bundleId, platform);
|
|
3527
|
+
}
|
|
3528
|
+
// Hot-reload: make new data immediately available to context tracker
|
|
3529
|
+
_playbookStoreForContext.reload();
|
|
3427
3530
|
return { content: [{ type: "text", text: `Native app exploration complete: ${filePath}\n\nElements discovered: ${elements.length}\n(Native elements discovered but not auto-clicked for safety. Use playbook_record to test interactively.)` }] };
|
|
3428
3531
|
}
|
|
3429
3532
|
else {
|
|
@@ -5037,7 +5140,9 @@ server.tool("scroll_with_fallback", "Scroll within an element or the active wind
|
|
|
5037
5140
|
return { content: [{ type: "text", text: `"${target}" is visible after ${i} scroll(s).` }] };
|
|
5038
5141
|
}
|
|
5039
5142
|
}
|
|
5040
|
-
catch {
|
|
5143
|
+
catch (e) {
|
|
5144
|
+
process.stderr.write(`[screenhand] OCR during scroll search failed: ${e instanceof Error ? e.message : String(e)}\n`);
|
|
5145
|
+
}
|
|
5041
5146
|
// Scroll once
|
|
5042
5147
|
const deltaX = direction === "left" ? -scrollAmount : direction === "right" ? scrollAmount : 0;
|
|
5043
5148
|
const deltaY = direction === "up" ? -scrollAmount : direction === "down" ? scrollAmount : 0;
|
|
@@ -5164,7 +5269,9 @@ server.tool("wait_for_state", "Wait until a condition is met on screen: text app
|
|
|
5164
5269
|
await client.close();
|
|
5165
5270
|
}
|
|
5166
5271
|
}
|
|
5167
|
-
catch {
|
|
5272
|
+
catch (e) {
|
|
5273
|
+
process.stderr.write(`[screenhand] wait_for_state CDP check failed: ${e instanceof Error ? e.message : String(e)}\n`);
|
|
5274
|
+
}
|
|
5168
5275
|
}
|
|
5169
5276
|
const elapsed = Date.now() - (deadline - timeout);
|
|
5170
5277
|
lastCheck = `${elapsed}ms`;
|
|
@@ -5450,6 +5557,18 @@ function getJobRunner() {
|
|
|
5450
5557
|
timeout: 15000,
|
|
5451
5558
|
}).trim();
|
|
5452
5559
|
});
|
|
5560
|
+
// Wire learning feedback: PlaybookEngine reports step outcomes to context tracker + AppMap
|
|
5561
|
+
playbookEngine.setOutcomeCallback((step, success, error) => {
|
|
5562
|
+
const target = typeof step.target === "string" ? step.target : null;
|
|
5563
|
+
contextTracker.recordOutcome(step.action, { target, text: step.text }, success, error);
|
|
5564
|
+
const bid = worldModel.getState().focusedApp?.bundleId ?? lastKnownBundleId;
|
|
5565
|
+
if (bid && target) {
|
|
5566
|
+
try {
|
|
5567
|
+
appMap.recordElementOutcome(bid, "auto", target, success);
|
|
5568
|
+
}
|
|
5569
|
+
catch { /* non-critical */ }
|
|
5570
|
+
}
|
|
5571
|
+
});
|
|
5453
5572
|
activeJobRunner = new JobRunner(bridge, jobManager, leaseManager, supervisor, (() => {
|
|
5454
5573
|
const cfg = {
|
|
5455
5574
|
hasCDP: cdpPort !== null,
|
|
@@ -5626,10 +5745,22 @@ originalTool("plan_execute", "Run a plan automatically. Known steps (from playbo
|
|
|
5626
5745
|
if (!goal) {
|
|
5627
5746
|
return { content: [{ type: "text", text: `Goal not found: ${goalId}` }] };
|
|
5628
5747
|
}
|
|
5629
|
-
const
|
|
5748
|
+
const focusedBundleId = worldModel.getState().focusedApp?.bundleId ?? "unknown";
|
|
5749
|
+
const adaptiveBudget = learningEngine.getAdaptiveBudget(focusedBundleId);
|
|
5630
5750
|
const executor = new PlanExecutor(worldModel, planner, toolRegistry.toExecutor(), { postconditionWaitMs: adaptiveBudget.verifyMs, defaultStepTimeout: Math.max(30_000, adaptiveBudget.actMs * 2) }, recoveryEngine, learningEngine);
|
|
5631
5751
|
executor.setAppMap(appMap);
|
|
5632
|
-
|
|
5752
|
+
// Enable perception-triggered recovery during plan execution
|
|
5753
|
+
perceptionManager.setExpectedApp(focusedBundleId);
|
|
5754
|
+
perceptionManager.startStallDetection(30_000);
|
|
5755
|
+
let result;
|
|
5756
|
+
try {
|
|
5757
|
+
result = await executor.executeGoal(goal);
|
|
5758
|
+
}
|
|
5759
|
+
finally {
|
|
5760
|
+
// Disable reactive recovery after plan completes
|
|
5761
|
+
perceptionManager.setExpectedApp(null);
|
|
5762
|
+
perceptionManager.stopStallDetection();
|
|
5763
|
+
}
|
|
5633
5764
|
goalStore.update(goalId, goal);
|
|
5634
5765
|
// Check if paused at an LLM step
|
|
5635
5766
|
if ("paused" in result) {
|
|
@@ -5668,7 +5799,25 @@ originalTool("plan_execute", "Run a plan automatically. Known steps (from playbo
|
|
|
5668
5799
|
}
|
|
5669
5800
|
}
|
|
5670
5801
|
}
|
|
5671
|
-
catch {
|
|
5802
|
+
catch (e) {
|
|
5803
|
+
process.stderr.write(`[screenhand] strategy recording failed: ${e instanceof Error ? e.message : String(e)}\n`);
|
|
5804
|
+
}
|
|
5805
|
+
// Self-improving plans: refine and check for graduation
|
|
5806
|
+
try {
|
|
5807
|
+
const refinement = planRefiner.refine(goal, result);
|
|
5808
|
+
if (refinement.refinementCount > 0) {
|
|
5809
|
+
process.stderr.write(`[plan-refiner] Refined plan for "${goal.description}" (${refinement.refinementCount}x)\n`);
|
|
5810
|
+
}
|
|
5811
|
+
// Check graduation to playbook (3+ refinements)
|
|
5812
|
+
const playbook = planRefiner.checkGraduation(goal.description, focusedBundleId, worldModel.getState().focusedApp?.appName ?? focusedBundleId);
|
|
5813
|
+
if (playbook) {
|
|
5814
|
+
_playbookStoreForContext.save(playbook);
|
|
5815
|
+
process.stderr.write(`[plan-refiner] Plan GRADUATED to playbook: ${playbook.id}\n`);
|
|
5816
|
+
}
|
|
5817
|
+
}
|
|
5818
|
+
catch (e) {
|
|
5819
|
+
process.stderr.write(`[plan-refiner] Refinement failed: ${e instanceof Error ? e.message : String(e)}\n`);
|
|
5820
|
+
}
|
|
5672
5821
|
}
|
|
5673
5822
|
const lines = [
|
|
5674
5823
|
result.success ? "Goal completed successfully." : `Goal failed: ${result.error}`,
|
|
@@ -6429,6 +6578,88 @@ server.tool("observer_ocr_roi", "Submit a targeted ROI OCR command to the runnin
|
|
|
6429
6578
|
return { content: [{ type: "text", text: `ROI OCR command submitted: ${id}\nRegion: (${x}, ${y}, ${width}×${height})\nThe daemon will process this on its next cycle. Call observer_ocr_roi with commandId="${id}" to poll the result.` }] };
|
|
6430
6579
|
});
|
|
6431
6580
|
// ═══════════════════════════════════════════════
|
|
6581
|
+
// STATE WATCHER — Continuous observation event bus
|
|
6582
|
+
// ═══════════════════════════════════════════════
|
|
6583
|
+
server.tool("watch_start", "Start the state watcher polling loop. Evaluates registered watch rules every 2s against the world model.", {}, async () => {
|
|
6584
|
+
stateWatcher.start();
|
|
6585
|
+
const rules = stateWatcher.getRules();
|
|
6586
|
+
return { content: [{ type: "text", text: `State watcher started. ${rules.length} rules registered.` }] };
|
|
6587
|
+
});
|
|
6588
|
+
server.tool("watch_stop", "Stop the state watcher polling loop.", {}, async () => {
|
|
6589
|
+
stateWatcher.stop();
|
|
6590
|
+
return { content: [{ type: "text", text: "State watcher stopped." }] };
|
|
6591
|
+
});
|
|
6592
|
+
server.tool("watch_register", "Register a watch rule: when element with matching title appears, execute an action. Use for automated responses to known UI states.", {
|
|
6593
|
+
id: z.string().describe("Unique rule ID"),
|
|
6594
|
+
elementTitle: z.string().describe("UI element title/label to watch for (case-insensitive substring match)"),
|
|
6595
|
+
actionTool: z.string().describe("Tool to execute when element appears (e.g. click_text, key)"),
|
|
6596
|
+
actionParams: z.record(z.string(), z.unknown()).describe("Params for the action tool"),
|
|
6597
|
+
bundleId: z.string().optional().describe("Only match when this app is focused"),
|
|
6598
|
+
maxFires: z.number().optional().describe("Max times to fire (0=unlimited, default=1)"),
|
|
6599
|
+
}, async ({ id, elementTitle, actionTool, actionParams, bundleId, maxFires }) => {
|
|
6600
|
+
// Validate tool exists and is safe for automated execution
|
|
6601
|
+
const BLOCKED_WATCH_TOOLS = new Set(["applescript", "browser_js", "browser_stealth"]);
|
|
6602
|
+
if (BLOCKED_WATCH_TOOLS.has(actionTool)) {
|
|
6603
|
+
return { content: [{ type: "text", text: `Tool "${actionTool}" is not allowed in watch rules (security: prevents arbitrary code execution)` }], isError: true };
|
|
6604
|
+
}
|
|
6605
|
+
if (!toolRegistry.has(actionTool)) {
|
|
6606
|
+
return { content: [{ type: "text", text: `Unknown tool: "${actionTool}"` }], isError: true };
|
|
6607
|
+
}
|
|
6608
|
+
stateWatcher.watchForElement(id, elementTitle, { tool: actionTool, params: actionParams }, bundleId);
|
|
6609
|
+
if (maxFires !== undefined) {
|
|
6610
|
+
const rules = stateWatcher.getRules();
|
|
6611
|
+
const rule = rules.find((r) => r.id === id);
|
|
6612
|
+
if (rule) {
|
|
6613
|
+
// Update maxFires on the registered rule
|
|
6614
|
+
const ruleState = stateWatcher.rules.get(id);
|
|
6615
|
+
if (ruleState)
|
|
6616
|
+
ruleState.rule.maxFires = maxFires;
|
|
6617
|
+
}
|
|
6618
|
+
}
|
|
6619
|
+
return { content: [{ type: "text", text: `Watch rule "${id}" registered: when "${elementTitle}" appears → ${actionTool}(${JSON.stringify(actionParams)})` }] };
|
|
6620
|
+
});
|
|
6621
|
+
server.tool("watch_dialog", "Register a dialog watch rule: when a dialog matching the pattern appears, auto-execute an action.", {
|
|
6622
|
+
id: z.string().describe("Unique rule ID"),
|
|
6623
|
+
titlePattern: z.string().describe("Regex pattern to match dialog titles"),
|
|
6624
|
+
actionTool: z.string().describe("Tool to execute (e.g. click_text, key)"),
|
|
6625
|
+
actionParams: z.record(z.string(), z.unknown()).describe("Params for the action tool"),
|
|
6626
|
+
}, async ({ id, titlePattern, actionTool, actionParams }) => {
|
|
6627
|
+
// Validate regex — reject patterns that could cause ReDoS
|
|
6628
|
+
let regex;
|
|
6629
|
+
try {
|
|
6630
|
+
regex = new RegExp(titlePattern, "i");
|
|
6631
|
+
// Quick sanity check — if it takes >50ms on a test string, reject
|
|
6632
|
+
const testStr = "a".repeat(100);
|
|
6633
|
+
const t0 = Date.now();
|
|
6634
|
+
regex.test(testStr);
|
|
6635
|
+
if (Date.now() - t0 > 50) {
|
|
6636
|
+
return { content: [{ type: "text", text: `Rejected: regex pattern "${titlePattern}" is too expensive (potential ReDoS)` }], isError: true };
|
|
6637
|
+
}
|
|
6638
|
+
}
|
|
6639
|
+
catch (e) {
|
|
6640
|
+
return { content: [{ type: "text", text: `Invalid regex: ${e instanceof Error ? e.message : String(e)}` }], isError: true };
|
|
6641
|
+
}
|
|
6642
|
+
stateWatcher.watchForDialog(id, regex, { tool: actionTool, params: actionParams });
|
|
6643
|
+
return { content: [{ type: "text", text: `Dialog watch "${id}" registered: /${titlePattern}/i → ${actionTool}(${JSON.stringify(actionParams)})` }] };
|
|
6644
|
+
});
|
|
6645
|
+
server.tool("watch_unregister", "Remove a watch rule by ID.", {
|
|
6646
|
+
id: z.string().describe("Rule ID to remove"),
|
|
6647
|
+
}, async ({ id }) => {
|
|
6648
|
+
const removed = stateWatcher.unregister(id);
|
|
6649
|
+
return { content: [{ type: "text", text: removed ? `Rule "${id}" removed.` : `Rule "${id}" not found.` }] };
|
|
6650
|
+
});
|
|
6651
|
+
server.tool("watch_status", "Get all registered watch rules and their fire counts.", {}, async () => {
|
|
6652
|
+
const rules = stateWatcher.getRules();
|
|
6653
|
+
const running = stateWatcher.isRunning;
|
|
6654
|
+
const lines = [
|
|
6655
|
+
`State watcher: ${running ? "running" : "stopped"}`,
|
|
6656
|
+
`Rules: ${rules.length}`,
|
|
6657
|
+
"",
|
|
6658
|
+
...rules.map((r) => ` [${r.id}] ${r.description} (fired ${r.fireCount}x)`),
|
|
6659
|
+
];
|
|
6660
|
+
return { content: [{ type: "text", text: lines.join("\n") }] };
|
|
6661
|
+
});
|
|
6662
|
+
// ═══════════════════════════════════════════════
|
|
6432
6663
|
// PHASE 6: TOOL MASTERY — Ingestion + Community
|
|
6433
6664
|
// ═══════════════════════════════════════════════
|
|
6434
6665
|
server.tool("scan_menu_bar", "Scan an app's menu bar via AX tree. Extracts all menu paths, keyboard shortcuts, and enabled/disabled states. Automatically merges discovered shortcuts into the reference file.", {
|
|
@@ -6490,6 +6721,8 @@ server.tool("scan_menu_bar", "Scan an app's menu bar via AX tree. Extracts all m
|
|
|
6490
6721
|
});
|
|
6491
6722
|
}
|
|
6492
6723
|
}
|
|
6724
|
+
// Hot-reload: make new reference data immediately available to context tracker
|
|
6725
|
+
_playbookStoreForContext.reload();
|
|
6493
6726
|
let output = lines.join("\n") + bootstrapInfo;
|
|
6494
6727
|
output = redactUsername(output);
|
|
6495
6728
|
output = output.replace(/Log Out [^\n:]+/g, "Log Out [USER]");
|
|
@@ -6553,6 +6786,8 @@ server.tool("ingest_documentation", "Parse a documentation page (HTML, markdown,
|
|
|
6553
6786
|
}
|
|
6554
6787
|
}
|
|
6555
6788
|
}
|
|
6789
|
+
// Hot-reload: make new reference data immediately available to context tracker
|
|
6790
|
+
_playbookStoreForContext.reload();
|
|
6556
6791
|
return { content: [{ type: "text", text: lines.join("\n") }] };
|
|
6557
6792
|
});
|
|
6558
6793
|
server.tool("ingest_tutorial", "Extract structured playbook steps from a video transcript (e.g. YouTube captions). Converts tutorial narration into actionable automation steps with tool mappings.", {
|
|
@@ -6643,6 +6878,8 @@ server.tool("discover_features", "Extract features from an app's official websit
|
|
|
6643
6878
|
lines.push(` [${f.category}] ${f.name}: ${f.description}`);
|
|
6644
6879
|
}
|
|
6645
6880
|
}
|
|
6881
|
+
// Hot-reload: make new reference data immediately available to context tracker
|
|
6882
|
+
_playbookStoreForContext.reload();
|
|
6646
6883
|
return { content: [{ type: "text", text: lines.join("\n") }] };
|
|
6647
6884
|
});
|
|
6648
6885
|
server.tool("coverage_report", "Check what ScreenHand knows about an app: shortcuts, selectors, flows, playbooks, error patterns, and stability %. Useful before complex workflows to decide strategy: learn first (if empty), go fast (if high coverage), or use fallback tools (if error patterns exist). Optional for quick actions.", {
|
|
@@ -6751,10 +6988,14 @@ originalTool("community_fetch", "Search community playbooks for a platform or wo
|
|
|
6751
6988
|
// START
|
|
6752
6989
|
// ═══════════════════════════════════════════════
|
|
6753
6990
|
async function main() {
|
|
6754
|
-
// Flush
|
|
6755
|
-
|
|
6756
|
-
process.on("
|
|
6757
|
-
process.on("
|
|
6991
|
+
// Flush all learned state on shutdown (signals, stdin EOF, or normal exit)
|
|
6992
|
+
const flushAll = () => { void perceptionManager.stop(); perceptionManager.stopStallDetection(); stateWatcher.stop(); contextTracker.flush(); learningEngine.flush(); appMap.flush(); };
|
|
6993
|
+
process.on("SIGINT", () => { flushAll(); process.exit(0); });
|
|
6994
|
+
process.on("SIGTERM", () => { flushAll(); process.exit(0); });
|
|
6995
|
+
process.on("beforeExit", flushAll);
|
|
6996
|
+
// MCP clients often close stdin without sending a signal — flush on stdin end too
|
|
6997
|
+
process.stdin.on("end", () => { flushAll(); process.exit(0); });
|
|
6998
|
+
process.stdin.on("close", () => { flushAll(); process.exit(0); });
|
|
6758
6999
|
const transport = new StdioServerTransport();
|
|
6759
7000
|
await server.connect(transport);
|
|
6760
7001
|
}
|
|
@@ -75,9 +75,11 @@ export class PlaybookPublisher {
|
|
|
75
75
|
return null;
|
|
76
76
|
}
|
|
77
77
|
writeFileAtomicSync(filePath, JSON.stringify(shared, null, 2) + "\n");
|
|
78
|
-
// Best-effort sync to remote API
|
|
78
|
+
// Best-effort sync to remote API — log failures so user knows data didn't leave machine
|
|
79
79
|
if (this.remote) {
|
|
80
|
-
void this.remote.publish(shared).catch(() => {
|
|
80
|
+
void this.remote.publish(shared).catch((err) => {
|
|
81
|
+
process.stderr.write(`[screenhand] Remote publish failed: ${err instanceof Error ? err.message : String(err)}\n`);
|
|
82
|
+
});
|
|
81
83
|
}
|
|
82
84
|
return shared;
|
|
83
85
|
}
|
|
@@ -45,7 +45,7 @@ const BUNDLE_ID_TOOLS = new Set([
|
|
|
45
45
|
]);
|
|
46
46
|
// Tools that carry a target/selector in their params
|
|
47
47
|
const TARGET_PARAM_NAMES = ["selector", "target", "text", "label", "placeholder"];
|
|
48
|
-
const FLUSH_THRESHOLD =
|
|
48
|
+
const FLUSH_THRESHOLD = 5;
|
|
49
49
|
const MIN_OCCURRENCES_TO_PROMOTE = 2;
|
|
50
50
|
export class ContextTracker {
|
|
51
51
|
store;
|
|
@@ -176,8 +176,10 @@ export class ContextTracker {
|
|
|
176
176
|
// Only for known browser bundleIds
|
|
177
177
|
const BROWSER_BUNDLE_IDS = new Set([
|
|
178
178
|
"com.apple.Safari", "com.brave.Browser",
|
|
179
|
+
"com.google.Chrome", "com.google.Chrome.canary",
|
|
179
180
|
"org.chromium.Chromium", "com.vivaldi.Vivaldi",
|
|
180
|
-
"com.operasoftware.Opera",
|
|
181
|
+
"com.operasoftware.Opera", "company.thebrowser.Browser",
|
|
182
|
+
"org.mozilla.firefox", "org.mozilla.firefoxdeveloperedition",
|
|
181
183
|
]);
|
|
182
184
|
if (!BROWSER_BUNDLE_IDS.has(bundleId))
|
|
183
185
|
return;
|
|
@@ -346,18 +348,46 @@ export class ContextTracker {
|
|
|
346
348
|
flush() {
|
|
347
349
|
if (this.learnings.length === 0)
|
|
348
350
|
return;
|
|
349
|
-
if (!this.context
|
|
351
|
+
if (!this.context) {
|
|
350
352
|
this.learnings = [];
|
|
351
353
|
this.actionCount = 0;
|
|
352
354
|
return;
|
|
353
355
|
}
|
|
356
|
+
// If no playbook matched, create a stub so learnings aren't discarded.
|
|
357
|
+
// This is the fix for "train on unknown app → restart → everything gone".
|
|
358
|
+
if (!this.context.playbook) {
|
|
359
|
+
const domain = this.context.domain;
|
|
360
|
+
const platform = domain.replace(/^native:/, "").split(".").pop() ?? domain;
|
|
361
|
+
const isNative = domain.startsWith("native:");
|
|
362
|
+
const stub = {
|
|
363
|
+
id: platform + "-learned",
|
|
364
|
+
name: `${platform} — Auto-Learned`,
|
|
365
|
+
description: `Selectors and errors learned from live interaction with ${platform}`,
|
|
366
|
+
platform,
|
|
367
|
+
...(isNative ? { bundleId: domain.replace(/^native:/, "") } : {}),
|
|
368
|
+
version: "1.0.0",
|
|
369
|
+
steps: [],
|
|
370
|
+
tags: [platform, "auto-learned"],
|
|
371
|
+
successCount: 0,
|
|
372
|
+
failCount: 0,
|
|
373
|
+
selectors: {},
|
|
374
|
+
errors: [],
|
|
375
|
+
};
|
|
376
|
+
this.store.save(stub);
|
|
377
|
+
this.context.playbook = stub;
|
|
378
|
+
this.context.allSelectors = new Map();
|
|
379
|
+
}
|
|
354
380
|
const playbook = this.context.playbook;
|
|
355
381
|
let changed = false;
|
|
356
|
-
// ── Promote
|
|
382
|
+
// ── Promote targets that worked 2+ times ──
|
|
383
|
+
// Accepts CSS selectors AND AX targets (plain text labels like "New Note").
|
|
384
|
+
// Only rejects strings that look like event handlers or raw coordinates.
|
|
357
385
|
const selectorSuccessCount = new Map();
|
|
358
386
|
for (const l of this.learnings) {
|
|
359
|
-
if (l.success && l.target &&
|
|
360
|
-
!/\bon\w+\s*=/i.test(l.target)
|
|
387
|
+
if (l.success && l.target &&
|
|
388
|
+
!/\bon\w+\s*=/i.test(l.target) &&
|
|
389
|
+
!/^\d+,\d+$/.test(l.target) &&
|
|
390
|
+
l.target.length >= 2 && l.target.length <= 200) {
|
|
361
391
|
const key = l.target;
|
|
362
392
|
selectorSuccessCount.set(key, (selectorSuccessCount.get(key) ?? 0) + 1);
|
|
363
393
|
}
|