screenhand 0.5.0 → 0.5.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/mcp-desktop.js +463 -39
- package/dist/src/community/publisher.js +4 -2
- package/dist/src/context-tracker.js +62 -6
- package/dist/src/ingestion/reference-merger.js +33 -0
- package/dist/src/memory/recall.js +65 -1
- package/dist/src/memory/research.js +1 -1
- package/dist/src/memory/service.js +26 -5
- package/dist/src/memory/store.js +42 -23
- package/dist/src/native/bridge-client.js +3 -3
- package/dist/src/perception/coordinator.js +94 -15
- package/dist/src/perception/manager.js +65 -1
- package/dist/src/planner/executor.js +6 -2
- package/dist/src/planner/plan-refiner.js +213 -0
- package/dist/src/playbook/engine.js +18 -3
- package/dist/src/playbook/recorder.js +24 -8
- package/dist/src/playbook/runner.js +9 -3
- package/dist/src/playbook/store.js +8 -0
- package/dist/src/recovery/engine.js +9 -3
- package/dist/src/state/app-map.js +212 -2
- package/dist/src/state/state-watcher.js +144 -0
- package/dist/src/state/visual-mapper.js +325 -0
- package/dist/src/state/world-model.js +30 -1
- package/dist/src/supervisor/supervisor.js +1 -1
- package/dist-app-maps/com.apple.Notes.json +2328 -2201
- package/dist-app-maps/com.apple.Terminal.json +331 -343
- package/dist-app-maps/com.apple.iCal.json +3 -3
- package/dist-app-maps/com.apple.iphonesimulator.json +714 -223
- package/dist-app-maps/com.apple.mail.json +3 -3
- package/dist-app-maps/com.apple.reminders.json +2 -2
- package/dist-app-maps/net.whatsapp.WhatsApp.json +27 -27
- package/dist-references/notes.json +53 -16
- package/dist-references/simulator.json +48 -2
- package/package.json +1 -1
package/dist/mcp-desktop.js
CHANGED
|
@@ -52,8 +52,10 @@ import { PlaybookStore } from "./src/playbook/store.js";
|
|
|
52
52
|
import { ContextTracker } from "./src/context-tracker.js";
|
|
53
53
|
import { McpPlaybookRecorder } from "./src/playbook/mcp-recorder.js";
|
|
54
54
|
import { WorldModel } from "./src/state/index.js";
|
|
55
|
+
import { StateWatcher } from "./src/state/state-watcher.js";
|
|
55
56
|
import { PerceptionManager } from "./src/perception/index.js";
|
|
56
57
|
import { Planner, PlanExecutor, GoalStore, ToolRegistry } from "./src/planner/index.js";
|
|
58
|
+
import { PlanRefiner } from "./src/planner/plan-refiner.js";
|
|
57
59
|
import { RecoveryEngine } from "./src/recovery/index.js";
|
|
58
60
|
import { LearningEngine, LocatorPolicy } from "./src/learning/index.js";
|
|
59
61
|
import { discoverWebElements, testWebElement, compileReference, saveExploreResult, discoverNativeElements } from "./src/platform/explorer.js";
|
|
@@ -70,6 +72,7 @@ import { MenuScanner } from "./src/ingestion/menu-scanner.js";
|
|
|
70
72
|
import { DocParser } from "./src/ingestion/doc-parser.js";
|
|
71
73
|
import { TutorialExtractor } from "./src/ingestion/tutorial-extractor.js";
|
|
72
74
|
import { extractFeaturesFromHTML } from "./src/ingestion/feature-extractor.js";
|
|
75
|
+
import { quickScan, llmEnrich, buildVisualMeta, isSensitiveApp } from "./src/state/visual-mapper.js";
|
|
73
76
|
import { CoverageAuditor } from "./src/ingestion/coverage-auditor.js";
|
|
74
77
|
import { ReferenceMerger } from "./src/ingestion/reference-merger.js";
|
|
75
78
|
import { PlaybookPublisher } from "./src/community/publisher.js";
|
|
@@ -609,6 +612,7 @@ catch { /* dir may not exist */ }
|
|
|
609
612
|
const planner = new Planner(_executablePlaybookStore, memory, contextTracker, worldModel, learningEngine);
|
|
610
613
|
const goalStore = new GoalStore(path.join(os.homedir(), ".screenhand", "planner"));
|
|
611
614
|
goalStore.init();
|
|
615
|
+
const planRefiner = new PlanRefiner(path.join(os.homedir(), ".screenhand", "planner"));
|
|
612
616
|
const toolRegistry = new ToolRegistry();
|
|
613
617
|
const recoveryEngine = new RecoveryEngine(worldModel, toolRegistry.toExecutor(), memory);
|
|
614
618
|
recoveryEngine.setLearningEngine(learningEngine);
|
|
@@ -616,6 +620,7 @@ recoveryEngine.setAppMap(appMap);
|
|
|
616
620
|
planner.setToolRegistry(toolRegistry);
|
|
617
621
|
planner.setAppMap(appMap);
|
|
618
622
|
perceptionManager.setLearningEngine(learningEngine);
|
|
623
|
+
const stateWatcher = new StateWatcher(worldModel, toolRegistry.toExecutor(), 2_000);
|
|
619
624
|
// ── Reactive event loop: wire perception events to automatic responses ──
|
|
620
625
|
// These fire at perception speed (100-300ms), not LLM speed (~2-3s).
|
|
621
626
|
perceptionManager.on("dialog_detected", (event) => {
|
|
@@ -649,6 +654,28 @@ perceptionManager.on("app_switched", (event) => {
|
|
|
649
654
|
// Log for observability
|
|
650
655
|
console.error(`[reactive] App switched to ${event.bundleId} (pid=${event.pid})`);
|
|
651
656
|
});
|
|
657
|
+
// ── Perception-triggered recovery: focus loss, app crash, stall ──
|
|
658
|
+
perceptionManager.on("focus_lost", (event) => {
|
|
659
|
+
console.error(`[reactive] Focus lost: expected ${event.expectedBundleId}, got ${event.actualBundleId} — auto-refocusing`);
|
|
660
|
+
// Auto-refocus the expected app
|
|
661
|
+
toolRegistry.toExecutor()("focus", { bundleId: event.expectedBundleId }).catch((err) => {
|
|
662
|
+
console.error(`[reactive] Auto-refocus failed: ${err instanceof Error ? err.message : err}`);
|
|
663
|
+
});
|
|
664
|
+
});
|
|
665
|
+
perceptionManager.on("app_crash", (event) => {
|
|
666
|
+
console.error(`[reactive] App crash detected: ${event.bundleId} (pid=${event.pid}) — auto-relaunching`);
|
|
667
|
+
// Auto-relaunch the crashed app
|
|
668
|
+
toolRegistry.toExecutor()("launch", { bundleId: event.bundleId }).catch((err) => {
|
|
669
|
+
console.error(`[reactive] Auto-relaunch failed: ${err instanceof Error ? err.message : err}`);
|
|
670
|
+
});
|
|
671
|
+
});
|
|
672
|
+
perceptionManager.on("stall_detected", (event) => {
|
|
673
|
+
console.error(`[reactive] UI stall detected: ${event.bundleId} — no changes for ${(event.stallMs / 1000).toFixed(0)}s — taking screenshot for diagnosis`);
|
|
674
|
+
// Take a screenshot so the next LLM call can see what's on screen
|
|
675
|
+
toolRegistry.toExecutor()("screenshot", {}).catch((err) => {
|
|
676
|
+
console.error(`[reactive] Stall screenshot failed: ${err instanceof Error ? err.message : err}`);
|
|
677
|
+
});
|
|
678
|
+
});
|
|
652
679
|
const mcpRecorder = new McpPlaybookRecorder(playbooksDir);
|
|
653
680
|
const referenceMerger = new ReferenceMerger(referencesDir);
|
|
654
681
|
const communityPublisher = new PlaybookPublisher();
|
|
@@ -681,6 +708,7 @@ const MEMORY_TOOLS = new Set([
|
|
|
681
708
|
]);
|
|
682
709
|
// Track the strategy we're currently following (for feedback loop)
|
|
683
710
|
let activeStrategyFingerprint = null;
|
|
711
|
+
let autoExecutionInProgress = false; // guard against concurrent auto-execution
|
|
684
712
|
let currentAdaptiveBudget = null;
|
|
685
713
|
// Intercept all tool registrations to auto-log + auto-recall
|
|
686
714
|
const _rawOriginalTool = server.tool.bind(server);
|
|
@@ -770,7 +798,7 @@ server.tool = (...args) => {
|
|
|
770
798
|
if (!perceptionManager.isRunning && bridgeReady) {
|
|
771
799
|
const focusApp = worldModel.getState().focusedApp;
|
|
772
800
|
if (focusApp?.bundleId && focusApp?.pid) {
|
|
773
|
-
perceptionManager.tryAutoStart(focusApp, bridge).catch(() => { });
|
|
801
|
+
perceptionManager.tryAutoStart(focusApp, bridge).catch((e) => { process.stderr.write(`[screenhand] perception auto-start failed: ${e instanceof Error ? e.message : String(e)}\n`); });
|
|
774
802
|
installSafariEnricher(focusApp.bundleId);
|
|
775
803
|
}
|
|
776
804
|
}
|
|
@@ -821,7 +849,7 @@ server.tool = (...args) => {
|
|
|
821
849
|
"type_with_fallback", "select_with_fallback", "scroll_with_fallback",
|
|
822
850
|
]);
|
|
823
851
|
try {
|
|
824
|
-
|
|
852
|
+
let result = await originalHandler(params, extra);
|
|
825
853
|
const durationMs = Date.now() - start;
|
|
826
854
|
// ── POST-CALL: log action (async, non-blocking) ──
|
|
827
855
|
const entry = {
|
|
@@ -873,7 +901,9 @@ server.tool = (...args) => {
|
|
|
873
901
|
try {
|
|
874
902
|
appMap.recordPageTransition(postBundleIdForCtx, pageTransition.from, pageTransition.to, toolName);
|
|
875
903
|
}
|
|
876
|
-
catch {
|
|
904
|
+
catch (e) {
|
|
905
|
+
process.stderr.write(`[screenhand] nav tracking failed: ${e instanceof Error ? e.message : String(e)}\n`);
|
|
906
|
+
}
|
|
877
907
|
}
|
|
878
908
|
// ── POST-CALL: detect focus drift ──
|
|
879
909
|
const postBundleId = worldModel.getState().focusedApp?.bundleId ?? null;
|
|
@@ -947,7 +977,9 @@ server.tool = (...args) => {
|
|
|
947
977
|
}
|
|
948
978
|
}
|
|
949
979
|
}
|
|
950
|
-
catch {
|
|
980
|
+
catch (e) {
|
|
981
|
+
process.stderr.write(`[screenhand] app map feature learning failed: ${e instanceof Error ? e.message : String(e)}\n`);
|
|
982
|
+
}
|
|
951
983
|
}
|
|
952
984
|
if (!resultIsError && learnBundleId !== "unknown") {
|
|
953
985
|
try {
|
|
@@ -1369,7 +1401,9 @@ server.tool = (...args) => {
|
|
|
1369
1401
|
}
|
|
1370
1402
|
}
|
|
1371
1403
|
}
|
|
1372
|
-
catch {
|
|
1404
|
+
catch (e) {
|
|
1405
|
+
process.stderr.write(`[screenhand] hierarchy extraction failed: ${e instanceof Error ? e.message : String(e)}\n`);
|
|
1406
|
+
}
|
|
1373
1407
|
}
|
|
1374
1408
|
}
|
|
1375
1409
|
// ── Conditional UI visibility tracking (throttled) ──
|
|
@@ -1417,7 +1451,9 @@ server.tool = (...args) => {
|
|
|
1417
1451
|
}
|
|
1418
1452
|
}
|
|
1419
1453
|
}
|
|
1420
|
-
catch {
|
|
1454
|
+
catch (e) {
|
|
1455
|
+
process.stderr.write(`[screenhand] visibility tracking failed: ${e instanceof Error ? e.message : String(e)}\n`);
|
|
1456
|
+
}
|
|
1421
1457
|
}
|
|
1422
1458
|
}
|
|
1423
1459
|
// ── Timing recording: track tool response times per element ──
|
|
@@ -1511,25 +1547,65 @@ server.tool = (...args) => {
|
|
|
1511
1547
|
if (knownError) {
|
|
1512
1548
|
hints.push(`⚡ Memory: "${toolName}" has failed before: "${knownError.error}" (${knownError.occurrences}x). Fix: ${knownError.resolution}`);
|
|
1513
1549
|
}
|
|
1514
|
-
//
|
|
1550
|
+
// ── Strategy matching: auto-execute proven strategies OR hint unproven ones ──
|
|
1515
1551
|
const recentTools = memory.getRecentToolNames();
|
|
1516
|
-
const
|
|
1517
|
-
|
|
1518
|
-
|
|
1519
|
-
|
|
1520
|
-
|
|
1521
|
-
|
|
1522
|
-
|
|
1523
|
-
|
|
1524
|
-
|
|
1525
|
-
|
|
1552
|
+
const currentBundleForStrategy = worldModel.getState().focusedApp?.bundleId;
|
|
1553
|
+
// Try auto-execution first (10+ successes, 0 failures)
|
|
1554
|
+
// Guard: skip if another auto-execution is already in progress
|
|
1555
|
+
const autoExec = autoExecutionInProgress ? null : memory.getAutoExecutableStrategy(recentTools, currentBundleForStrategy);
|
|
1556
|
+
if (autoExec) {
|
|
1557
|
+
autoExecutionInProgress = true;
|
|
1558
|
+
activeStrategyFingerprint = autoExec.fingerprint;
|
|
1559
|
+
const autoResults = [];
|
|
1560
|
+
let allOk = true;
|
|
1561
|
+
hints.push(`🚀 Auto-executing proven strategy "${autoExec.strategy.task}" (${autoExec.strategy.successCount} wins) — ${autoExec.remainingSteps.length} steps remaining`);
|
|
1562
|
+
for (const step of autoExec.remainingSteps) {
|
|
1563
|
+
try {
|
|
1564
|
+
const stepResult = await toolRegistry.toExecutor()(step.tool, step.params);
|
|
1565
|
+
autoResults.push({ tool: step.tool, ...stepResult });
|
|
1566
|
+
// Record outcome for learning
|
|
1567
|
+
const target = typeof step.params.target === "string" ? step.params.target
|
|
1568
|
+
: typeof step.params.title === "string" ? step.params.title
|
|
1569
|
+
: typeof step.params.text === "string" ? step.params.text
|
|
1570
|
+
: null;
|
|
1571
|
+
contextTracker.recordOutcome(step.tool, { target, text: typeof step.params.text === "string" ? step.params.text : null }, stepResult.ok, stepResult.ok ? null : (stepResult.error ?? null));
|
|
1572
|
+
if (!stepResult.ok) {
|
|
1573
|
+
allOk = false;
|
|
1574
|
+
hints.push(` ✗ ${step.tool} failed: ${stepResult.error ?? "unknown"}`);
|
|
1575
|
+
break; // Stop auto-execution on first failure
|
|
1576
|
+
}
|
|
1577
|
+
hints.push(` ✓ ${step.tool} — ok`);
|
|
1578
|
+
}
|
|
1579
|
+
catch (err) {
|
|
1580
|
+
allOk = false;
|
|
1581
|
+
hints.push(` ✗ ${step.tool} threw: ${err instanceof Error ? err.message : String(err)}`);
|
|
1582
|
+
break;
|
|
1583
|
+
}
|
|
1526
1584
|
}
|
|
1527
|
-
|
|
1528
|
-
|
|
1529
|
-
// We were following a strategy but the sequence diverged — record success
|
|
1530
|
-
// (the agent completed the strategy or went its own way after it)
|
|
1531
|
-
memory.recordStrategyOutcome(activeStrategyFingerprint, true);
|
|
1585
|
+
// Record strategy outcome
|
|
1586
|
+
memory.recordStrategyOutcome(autoExec.fingerprint, allOk);
|
|
1532
1587
|
activeStrategyFingerprint = null;
|
|
1588
|
+
autoExecutionInProgress = false;
|
|
1589
|
+
// Append auto-execution results to the response
|
|
1590
|
+
const autoSummary = autoResults.map((r) => `${r.tool}: ${r.ok ? "ok" : r.error}`).join("\n");
|
|
1591
|
+
const resultContent = Array.isArray(result?.content) ? result.content : [];
|
|
1592
|
+
resultContent.push({ type: "text", text: `\n── AUTO-EXECUTED (${autoResults.length} steps) ──\n${autoSummary}` });
|
|
1593
|
+
result = { ...result, content: resultContent };
|
|
1594
|
+
}
|
|
1595
|
+
else {
|
|
1596
|
+
// Fall back to strategy hint (suggest but don't execute)
|
|
1597
|
+
const strategyHint = memory.quickStrategyHint(recentTools, currentBundleForStrategy);
|
|
1598
|
+
if (strategyHint) {
|
|
1599
|
+
activeStrategyFingerprint = strategyHint.fingerprint;
|
|
1600
|
+
const nextParams = Object.keys(strategyHint.nextStep.params).length > 0
|
|
1601
|
+
? `(${JSON.stringify(strategyHint.nextStep.params)})`
|
|
1602
|
+
: "";
|
|
1603
|
+
hints.push(`💡 Memory: This matches strategy "${strategyHint.strategy.task}" (${strategyHint.strategy.successCount} wins, ${strategyHint.strategy.failCount ?? 0} fails). Next step: ${strategyHint.nextStep.tool}${nextParams}`);
|
|
1604
|
+
}
|
|
1605
|
+
else if (activeStrategyFingerprint && recentTools.length > 0) {
|
|
1606
|
+
memory.recordStrategyOutcome(activeStrategyFingerprint, true);
|
|
1607
|
+
activeStrategyFingerprint = null;
|
|
1608
|
+
}
|
|
1533
1609
|
}
|
|
1534
1610
|
// Attach hints in BOTH content (visible) and _meta (for programmatic access)
|
|
1535
1611
|
if (hints.length > 0) {
|
|
@@ -1745,7 +1821,9 @@ server.tool("focus", "Focus/activate an application (or a specific window by win
|
|
|
1745
1821
|
targetApp = { bundleId, name: appWin.appName, pid: appWin.pid || appWin.ownerPid };
|
|
1746
1822
|
}
|
|
1747
1823
|
}
|
|
1748
|
-
catch {
|
|
1824
|
+
catch (e) {
|
|
1825
|
+
process.stderr.write(`[screenhand] focus window check for ${bundleId} failed: ${e instanceof Error ? e.message : String(e)}\n`);
|
|
1826
|
+
}
|
|
1749
1827
|
if (!targetApp) {
|
|
1750
1828
|
return { content: [{ type: "text", text: `Error: ${bundleId} is not running. Use launch("${bundleId}") first.` }], isError: true };
|
|
1751
1829
|
}
|
|
@@ -1816,10 +1894,14 @@ server.tool("focus", "Focus/activate an application (or a specific window by win
|
|
|
1816
1894
|
await perceptionManager.ensureStarted(ctx);
|
|
1817
1895
|
installSafariEnricher(bundleId);
|
|
1818
1896
|
}
|
|
1819
|
-
catch {
|
|
1897
|
+
catch (e) {
|
|
1898
|
+
process.stderr.write(`[screenhand] perception ensureStarted in focus failed: ${e instanceof Error ? e.message : String(e)}\n`);
|
|
1899
|
+
}
|
|
1820
1900
|
}
|
|
1821
1901
|
}
|
|
1822
|
-
catch {
|
|
1902
|
+
catch (e) {
|
|
1903
|
+
process.stderr.write(`[screenhand] focus world-model update failed: ${e instanceof Error ? e.message : String(e)}\n`);
|
|
1904
|
+
}
|
|
1823
1905
|
return { content: [{ type: "text", text: focusMsg }] };
|
|
1824
1906
|
}
|
|
1825
1907
|
finally {
|
|
@@ -1883,7 +1965,9 @@ server.tool("launch", "Launch an application. Chrome/Chromium browsers are launc
|
|
|
1883
1965
|
await perceptionManager.ensureStarted({ bundleId, appName: r.appName ?? bundleId, pid: r.pid, windowTitle: "", ...(windowId != null ? { windowId } : {}) });
|
|
1884
1966
|
installSafariEnricher(bundleId);
|
|
1885
1967
|
}
|
|
1886
|
-
catch {
|
|
1968
|
+
catch (e) {
|
|
1969
|
+
process.stderr.write(`[screenhand] perception start after launch failed: ${e instanceof Error ? e.message : String(e)}\n`);
|
|
1970
|
+
}
|
|
1887
1971
|
let msg = `Launched ${r.appName} pid=${r.pid}`;
|
|
1888
1972
|
if (chromeAppName) {
|
|
1889
1973
|
const port = cdpPort ?? 9222;
|
|
@@ -2112,7 +2196,9 @@ server.tool("ui_press", "PREFERRED: Find and press/click a UI element by its tit
|
|
|
2112
2196
|
return { content: [{ type: "text", text: `Element "${title}" not found in PID ${pid}. A system dialog from "${front.name}" (${front.bundleId}, PID ${front.pid}) may be blocking. Dismiss it first, or use click(x, y) to interact with the dialog directly.` }], isError: true };
|
|
2113
2197
|
}
|
|
2114
2198
|
}
|
|
2115
|
-
catch {
|
|
2199
|
+
catch (e) {
|
|
2200
|
+
process.stderr.write(`[screenhand] frontmost check in ui_press failed: ${e instanceof Error ? e.message : String(e)}\n`);
|
|
2201
|
+
}
|
|
2116
2202
|
throw new Error(`Element "${title}" not found (searched title, value, and description)`);
|
|
2117
2203
|
}
|
|
2118
2204
|
}
|
|
@@ -2333,7 +2419,9 @@ server.tool("type_text", "Type text using the keyboard. Auto-detects Electron ap
|
|
|
2333
2419
|
catch { /* not available on this port */ }
|
|
2334
2420
|
}
|
|
2335
2421
|
}
|
|
2336
|
-
catch {
|
|
2422
|
+
catch (e) {
|
|
2423
|
+
process.stderr.write(`[screenhand] CDP auto-detect failed: ${e instanceof Error ? e.message : String(e)}\n`);
|
|
2424
|
+
}
|
|
2337
2425
|
}
|
|
2338
2426
|
if (electronCdpPort) {
|
|
2339
2427
|
// CDP path: click editor to ensure focus, then type via key events
|
|
@@ -2396,7 +2484,9 @@ server.tool("key", "Press a key combination", {
|
|
|
2396
2484
|
const front = await bridge.call("app.frontmost", {});
|
|
2397
2485
|
targetPid = front.pid;
|
|
2398
2486
|
}
|
|
2399
|
-
catch {
|
|
2487
|
+
catch (e) {
|
|
2488
|
+
process.stderr.write(`[screenhand] key frontmost PID resolve failed: ${e instanceof Error ? e.message : String(e)}\n`);
|
|
2489
|
+
}
|
|
2400
2490
|
}
|
|
2401
2491
|
const keys = combo.split("+");
|
|
2402
2492
|
const hasModifier = keys.some(k => ["cmd", "ctrl", "alt", "shift"].includes(k.toLowerCase()));
|
|
@@ -2466,7 +2556,9 @@ async function getCDPClient(tabId, overridePort) {
|
|
|
2466
2556
|
try {
|
|
2467
2557
|
perceptionManager.activateCDP(client);
|
|
2468
2558
|
}
|
|
2469
|
-
catch {
|
|
2559
|
+
catch (e) {
|
|
2560
|
+
process.stderr.write(`[screenhand] perception CDP activate failed: ${e instanceof Error ? e.message : String(e)}\n`);
|
|
2561
|
+
}
|
|
2470
2562
|
return { client, targetId: targetId, CDP: cdp, port };
|
|
2471
2563
|
}
|
|
2472
2564
|
// ── Random delay helper ──
|
|
@@ -3403,6 +3495,12 @@ server.tool("platform_explore", "Autonomously explore an app or website. Maps al
|
|
|
3403
3495
|
// Compile and save
|
|
3404
3496
|
const result = compileReference(platform, "web", tested, url);
|
|
3405
3497
|
const filePath = saveExploreResult(referencesDir, result);
|
|
3498
|
+
// Auto-merge explore selectors into main reference so data isn't fragmented
|
|
3499
|
+
if (result.selectors && Object.keys(result.selectors).length > 0) {
|
|
3500
|
+
referenceMerger.mergeExploreSelectors(result.selectors, result.errors, "", platform);
|
|
3501
|
+
}
|
|
3502
|
+
// Hot-reload: make new data immediately available to context tracker
|
|
3503
|
+
_playbookStoreForContext.reload();
|
|
3406
3504
|
return { content: [{ type: "text", text: `Exploration complete: ${filePath}\n\nElements found: ${elements.length}\nTested: ${result.testedElements}\nWorking selectors: ${result.workingSelectors}\nErrors: ${result.errors.length}\n\nKey discoveries:\n${result.keyDiscoveries.map(d => ` - ${d}`).join("\n")}` }] };
|
|
3407
3505
|
}
|
|
3408
3506
|
else if (bundleId) {
|
|
@@ -3424,6 +3522,12 @@ server.tool("platform_explore", "Autonomously explore an app or website. Maps al
|
|
|
3424
3522
|
...el, clickWorked: true, result: "discovered_not_tested",
|
|
3425
3523
|
})), undefined, bundleId);
|
|
3426
3524
|
const filePath = saveExploreResult(referencesDir, result);
|
|
3525
|
+
// Auto-merge explore selectors into main reference so data isn't fragmented
|
|
3526
|
+
if (result.selectors && Object.keys(result.selectors).length > 0) {
|
|
3527
|
+
referenceMerger.mergeExploreSelectors(result.selectors, result.errors, bundleId, platform);
|
|
3528
|
+
}
|
|
3529
|
+
// Hot-reload: make new data immediately available to context tracker
|
|
3530
|
+
_playbookStoreForContext.reload();
|
|
3427
3531
|
return { content: [{ type: "text", text: `Native app exploration complete: ${filePath}\n\nElements discovered: ${elements.length}\n(Native elements discovered but not auto-clicked for safety. Use playbook_record to test interactively.)` }] };
|
|
3428
3532
|
}
|
|
3429
3533
|
else {
|
|
@@ -5037,7 +5141,9 @@ server.tool("scroll_with_fallback", "Scroll within an element or the active wind
|
|
|
5037
5141
|
return { content: [{ type: "text", text: `"${target}" is visible after ${i} scroll(s).` }] };
|
|
5038
5142
|
}
|
|
5039
5143
|
}
|
|
5040
|
-
catch {
|
|
5144
|
+
catch (e) {
|
|
5145
|
+
process.stderr.write(`[screenhand] OCR during scroll search failed: ${e instanceof Error ? e.message : String(e)}\n`);
|
|
5146
|
+
}
|
|
5041
5147
|
// Scroll once
|
|
5042
5148
|
const deltaX = direction === "left" ? -scrollAmount : direction === "right" ? scrollAmount : 0;
|
|
5043
5149
|
const deltaY = direction === "up" ? -scrollAmount : direction === "down" ? scrollAmount : 0;
|
|
@@ -5164,7 +5270,9 @@ server.tool("wait_for_state", "Wait until a condition is met on screen: text app
|
|
|
5164
5270
|
await client.close();
|
|
5165
5271
|
}
|
|
5166
5272
|
}
|
|
5167
|
-
catch {
|
|
5273
|
+
catch (e) {
|
|
5274
|
+
process.stderr.write(`[screenhand] wait_for_state CDP check failed: ${e instanceof Error ? e.message : String(e)}\n`);
|
|
5275
|
+
}
|
|
5168
5276
|
}
|
|
5169
5277
|
const elapsed = Date.now() - (deadline - timeout);
|
|
5170
5278
|
lastCheck = `${elapsed}ms`;
|
|
@@ -5450,6 +5558,18 @@ function getJobRunner() {
|
|
|
5450
5558
|
timeout: 15000,
|
|
5451
5559
|
}).trim();
|
|
5452
5560
|
});
|
|
5561
|
+
// Wire learning feedback: PlaybookEngine reports step outcomes to context tracker + AppMap
|
|
5562
|
+
playbookEngine.setOutcomeCallback((step, success, error) => {
|
|
5563
|
+
const target = typeof step.target === "string" ? step.target : null;
|
|
5564
|
+
contextTracker.recordOutcome(step.action, { target, text: step.text }, success, error);
|
|
5565
|
+
const bid = worldModel.getState().focusedApp?.bundleId ?? lastKnownBundleId;
|
|
5566
|
+
if (bid && target) {
|
|
5567
|
+
try {
|
|
5568
|
+
appMap.recordElementOutcome(bid, "auto", target, success);
|
|
5569
|
+
}
|
|
5570
|
+
catch { /* non-critical */ }
|
|
5571
|
+
}
|
|
5572
|
+
});
|
|
5453
5573
|
activeJobRunner = new JobRunner(bridge, jobManager, leaseManager, supervisor, (() => {
|
|
5454
5574
|
const cfg = {
|
|
5455
5575
|
hasCDP: cdpPort !== null,
|
|
@@ -5626,10 +5746,22 @@ originalTool("plan_execute", "Run a plan automatically. Known steps (from playbo
|
|
|
5626
5746
|
if (!goal) {
|
|
5627
5747
|
return { content: [{ type: "text", text: `Goal not found: ${goalId}` }] };
|
|
5628
5748
|
}
|
|
5629
|
-
const
|
|
5749
|
+
const focusedBundleId = worldModel.getState().focusedApp?.bundleId ?? "unknown";
|
|
5750
|
+
const adaptiveBudget = learningEngine.getAdaptiveBudget(focusedBundleId);
|
|
5630
5751
|
const executor = new PlanExecutor(worldModel, planner, toolRegistry.toExecutor(), { postconditionWaitMs: adaptiveBudget.verifyMs, defaultStepTimeout: Math.max(30_000, adaptiveBudget.actMs * 2) }, recoveryEngine, learningEngine);
|
|
5631
5752
|
executor.setAppMap(appMap);
|
|
5632
|
-
|
|
5753
|
+
// Enable perception-triggered recovery during plan execution
|
|
5754
|
+
perceptionManager.setExpectedApp(focusedBundleId);
|
|
5755
|
+
perceptionManager.startStallDetection(30_000);
|
|
5756
|
+
let result;
|
|
5757
|
+
try {
|
|
5758
|
+
result = await executor.executeGoal(goal);
|
|
5759
|
+
}
|
|
5760
|
+
finally {
|
|
5761
|
+
// Disable reactive recovery after plan completes
|
|
5762
|
+
perceptionManager.setExpectedApp(null);
|
|
5763
|
+
perceptionManager.stopStallDetection();
|
|
5764
|
+
}
|
|
5633
5765
|
goalStore.update(goalId, goal);
|
|
5634
5766
|
// Check if paused at an LLM step
|
|
5635
5767
|
if ("paused" in result) {
|
|
@@ -5668,7 +5800,25 @@ originalTool("plan_execute", "Run a plan automatically. Known steps (from playbo
|
|
|
5668
5800
|
}
|
|
5669
5801
|
}
|
|
5670
5802
|
}
|
|
5671
|
-
catch {
|
|
5803
|
+
catch (e) {
|
|
5804
|
+
process.stderr.write(`[screenhand] strategy recording failed: ${e instanceof Error ? e.message : String(e)}\n`);
|
|
5805
|
+
}
|
|
5806
|
+
// Self-improving plans: refine and check for graduation
|
|
5807
|
+
try {
|
|
5808
|
+
const refinement = planRefiner.refine(goal, result);
|
|
5809
|
+
if (refinement.refinementCount > 0) {
|
|
5810
|
+
process.stderr.write(`[plan-refiner] Refined plan for "${goal.description}" (${refinement.refinementCount}x)\n`);
|
|
5811
|
+
}
|
|
5812
|
+
// Check graduation to playbook (3+ refinements)
|
|
5813
|
+
const playbook = planRefiner.checkGraduation(goal.description, focusedBundleId, worldModel.getState().focusedApp?.appName ?? focusedBundleId);
|
|
5814
|
+
if (playbook) {
|
|
5815
|
+
_playbookStoreForContext.save(playbook);
|
|
5816
|
+
process.stderr.write(`[plan-refiner] Plan GRADUATED to playbook: ${playbook.id}\n`);
|
|
5817
|
+
}
|
|
5818
|
+
}
|
|
5819
|
+
catch (e) {
|
|
5820
|
+
process.stderr.write(`[plan-refiner] Refinement failed: ${e instanceof Error ? e.message : String(e)}\n`);
|
|
5821
|
+
}
|
|
5672
5822
|
}
|
|
5673
5823
|
const lines = [
|
|
5674
5824
|
result.success ? "Goal completed successfully." : `Goal failed: ${result.error}`,
|
|
@@ -6429,6 +6579,88 @@ server.tool("observer_ocr_roi", "Submit a targeted ROI OCR command to the runnin
|
|
|
6429
6579
|
return { content: [{ type: "text", text: `ROI OCR command submitted: ${id}\nRegion: (${x}, ${y}, ${width}×${height})\nThe daemon will process this on its next cycle. Call observer_ocr_roi with commandId="${id}" to poll the result.` }] };
|
|
6430
6580
|
});
|
|
6431
6581
|
// ═══════════════════════════════════════════════
|
|
6582
|
+
// STATE WATCHER — Continuous observation event bus
|
|
6583
|
+
// ═══════════════════════════════════════════════
|
|
6584
|
+
server.tool("watch_start", "Start the state watcher polling loop. Evaluates registered watch rules every 2s against the world model.", {}, async () => {
|
|
6585
|
+
stateWatcher.start();
|
|
6586
|
+
const rules = stateWatcher.getRules();
|
|
6587
|
+
return { content: [{ type: "text", text: `State watcher started. ${rules.length} rules registered.` }] };
|
|
6588
|
+
});
|
|
6589
|
+
server.tool("watch_stop", "Stop the state watcher polling loop.", {}, async () => {
|
|
6590
|
+
stateWatcher.stop();
|
|
6591
|
+
return { content: [{ type: "text", text: "State watcher stopped." }] };
|
|
6592
|
+
});
|
|
6593
|
+
server.tool("watch_register", "Register a watch rule: when element with matching title appears, execute an action. Use for automated responses to known UI states.", {
|
|
6594
|
+
id: z.string().describe("Unique rule ID"),
|
|
6595
|
+
elementTitle: z.string().describe("UI element title/label to watch for (case-insensitive substring match)"),
|
|
6596
|
+
actionTool: z.string().describe("Tool to execute when element appears (e.g. click_text, key)"),
|
|
6597
|
+
actionParams: z.record(z.string(), z.unknown()).describe("Params for the action tool"),
|
|
6598
|
+
bundleId: z.string().optional().describe("Only match when this app is focused"),
|
|
6599
|
+
maxFires: z.number().optional().describe("Max times to fire (0=unlimited, default=1)"),
|
|
6600
|
+
}, async ({ id, elementTitle, actionTool, actionParams, bundleId, maxFires }) => {
|
|
6601
|
+
// Validate tool exists and is safe for automated execution
|
|
6602
|
+
const BLOCKED_WATCH_TOOLS = new Set(["applescript", "browser_js", "browser_stealth"]);
|
|
6603
|
+
if (BLOCKED_WATCH_TOOLS.has(actionTool)) {
|
|
6604
|
+
return { content: [{ type: "text", text: `Tool "${actionTool}" is not allowed in watch rules (security: prevents arbitrary code execution)` }], isError: true };
|
|
6605
|
+
}
|
|
6606
|
+
if (!toolRegistry.has(actionTool)) {
|
|
6607
|
+
return { content: [{ type: "text", text: `Unknown tool: "${actionTool}"` }], isError: true };
|
|
6608
|
+
}
|
|
6609
|
+
stateWatcher.watchForElement(id, elementTitle, { tool: actionTool, params: actionParams }, bundleId);
|
|
6610
|
+
if (maxFires !== undefined) {
|
|
6611
|
+
const rules = stateWatcher.getRules();
|
|
6612
|
+
const rule = rules.find((r) => r.id === id);
|
|
6613
|
+
if (rule) {
|
|
6614
|
+
// Update maxFires on the registered rule
|
|
6615
|
+
const ruleState = stateWatcher.rules.get(id);
|
|
6616
|
+
if (ruleState)
|
|
6617
|
+
ruleState.rule.maxFires = maxFires;
|
|
6618
|
+
}
|
|
6619
|
+
}
|
|
6620
|
+
return { content: [{ type: "text", text: `Watch rule "${id}" registered: when "${elementTitle}" appears → ${actionTool}(${JSON.stringify(actionParams)})` }] };
|
|
6621
|
+
});
|
|
6622
|
+
server.tool("watch_dialog", "Register a dialog watch rule: when a dialog matching the pattern appears, auto-execute an action.", {
|
|
6623
|
+
id: z.string().describe("Unique rule ID"),
|
|
6624
|
+
titlePattern: z.string().describe("Regex pattern to match dialog titles"),
|
|
6625
|
+
actionTool: z.string().describe("Tool to execute (e.g. click_text, key)"),
|
|
6626
|
+
actionParams: z.record(z.string(), z.unknown()).describe("Params for the action tool"),
|
|
6627
|
+
}, async ({ id, titlePattern, actionTool, actionParams }) => {
|
|
6628
|
+
// Validate regex — reject patterns that could cause ReDoS
|
|
6629
|
+
let regex;
|
|
6630
|
+
try {
|
|
6631
|
+
regex = new RegExp(titlePattern, "i");
|
|
6632
|
+
// Quick sanity check — if it takes >50ms on a test string, reject
|
|
6633
|
+
const testStr = "a".repeat(100);
|
|
6634
|
+
const t0 = Date.now();
|
|
6635
|
+
regex.test(testStr);
|
|
6636
|
+
if (Date.now() - t0 > 50) {
|
|
6637
|
+
return { content: [{ type: "text", text: `Rejected: regex pattern "${titlePattern}" is too expensive (potential ReDoS)` }], isError: true };
|
|
6638
|
+
}
|
|
6639
|
+
}
|
|
6640
|
+
catch (e) {
|
|
6641
|
+
return { content: [{ type: "text", text: `Invalid regex: ${e instanceof Error ? e.message : String(e)}` }], isError: true };
|
|
6642
|
+
}
|
|
6643
|
+
stateWatcher.watchForDialog(id, regex, { tool: actionTool, params: actionParams });
|
|
6644
|
+
return { content: [{ type: "text", text: `Dialog watch "${id}" registered: /${titlePattern}/i → ${actionTool}(${JSON.stringify(actionParams)})` }] };
|
|
6645
|
+
});
|
|
6646
|
+
server.tool("watch_unregister", "Remove a watch rule by ID.", {
|
|
6647
|
+
id: z.string().describe("Rule ID to remove"),
|
|
6648
|
+
}, async ({ id }) => {
|
|
6649
|
+
const removed = stateWatcher.unregister(id);
|
|
6650
|
+
return { content: [{ type: "text", text: removed ? `Rule "${id}" removed.` : `Rule "${id}" not found.` }] };
|
|
6651
|
+
});
|
|
6652
|
+
server.tool("watch_status", "Get all registered watch rules and their fire counts.", {}, async () => {
|
|
6653
|
+
const rules = stateWatcher.getRules();
|
|
6654
|
+
const running = stateWatcher.isRunning;
|
|
6655
|
+
const lines = [
|
|
6656
|
+
`State watcher: ${running ? "running" : "stopped"}`,
|
|
6657
|
+
`Rules: ${rules.length}`,
|
|
6658
|
+
"",
|
|
6659
|
+
...rules.map((r) => ` [${r.id}] ${r.description} (fired ${r.fireCount}x)`),
|
|
6660
|
+
];
|
|
6661
|
+
return { content: [{ type: "text", text: lines.join("\n") }] };
|
|
6662
|
+
});
|
|
6663
|
+
// ═══════════════════════════════════════════════
|
|
6432
6664
|
// PHASE 6: TOOL MASTERY — Ingestion + Community
|
|
6433
6665
|
// ═══════════════════════════════════════════════
|
|
6434
6666
|
server.tool("scan_menu_bar", "Scan an app's menu bar via AX tree. Extracts all menu paths, keyboard shortcuts, and enabled/disabled states. Automatically merges discovered shortcuts into the reference file.", {
|
|
@@ -6490,6 +6722,8 @@ server.tool("scan_menu_bar", "Scan an app's menu bar via AX tree. Extracts all m
|
|
|
6490
6722
|
});
|
|
6491
6723
|
}
|
|
6492
6724
|
}
|
|
6725
|
+
// Hot-reload: make new reference data immediately available to context tracker
|
|
6726
|
+
_playbookStoreForContext.reload();
|
|
6493
6727
|
let output = lines.join("\n") + bootstrapInfo;
|
|
6494
6728
|
output = redactUsername(output);
|
|
6495
6729
|
output = output.replace(/Log Out [^\n:]+/g, "Log Out [USER]");
|
|
@@ -6553,6 +6787,8 @@ server.tool("ingest_documentation", "Parse a documentation page (HTML, markdown,
|
|
|
6553
6787
|
}
|
|
6554
6788
|
}
|
|
6555
6789
|
}
|
|
6790
|
+
// Hot-reload: make new reference data immediately available to context tracker
|
|
6791
|
+
_playbookStoreForContext.reload();
|
|
6556
6792
|
return { content: [{ type: "text", text: lines.join("\n") }] };
|
|
6557
6793
|
});
|
|
6558
6794
|
server.tool("ingest_tutorial", "Extract structured playbook steps from a video transcript (e.g. YouTube captions). Converts tutorial narration into actionable automation steps with tool mappings.", {
|
|
@@ -6643,6 +6879,190 @@ server.tool("discover_features", "Extract features from an app's official websit
|
|
|
6643
6879
|
lines.push(` [${f.category}] ${f.name}: ${f.description}`);
|
|
6644
6880
|
}
|
|
6645
6881
|
}
|
|
6882
|
+
// Hot-reload: make new reference data immediately available to context tracker
|
|
6883
|
+
_playbookStoreForContext.reload();
|
|
6884
|
+
return { content: [{ type: "text", text: lines.join("\n") }] };
|
|
6885
|
+
});
|
|
6886
|
+
// ── Visual App Mapping (Phase 3) ─────────────────────────────────
|
|
6887
|
+
server.tool("map_app", "Visually map an app's UI by taking a screenshot, running OCR to identify interactive elements and zones with coordinates. Makes subsequent tool calls faster and more accurate. Runs quick scan (~500ms) inline, optional LLM enrichment in background if ANTHROPIC_API_KEY is set.", {
|
|
6888
|
+
bundleId: z.string().describe("macOS bundle ID (e.g. com.apple.Notes)"),
|
|
6889
|
+
appName: z.string().describe("Human-readable app name"),
|
|
6890
|
+
force: z.boolean().optional().describe("Re-map even if a recent map exists (default: false)"),
|
|
6891
|
+
depth: z.enum(["quick", "full"]).optional().describe("'quick' = OCR only (~500ms). 'full' = OCR + LLM enrichment (~15s). Default: quick"),
|
|
6892
|
+
}, async ({ bundleId, appName, force, depth }) => {
|
|
6893
|
+
if (isSensitiveApp(bundleId)) {
|
|
6894
|
+
return { content: [{ type: "text", text: `Blocked: ${bundleId} is a sensitive app (password manager, banking, etc.). Visual mapping is not allowed for privacy reasons.` }] };
|
|
6895
|
+
}
|
|
6896
|
+
// Check if already mapped (unless force)
|
|
6897
|
+
if (!force) {
|
|
6898
|
+
const existingMeta = appMap.getVisualMeta(bundleId);
|
|
6899
|
+
if (existingMeta && !appMap.isVisualMapStale(bundleId)) {
|
|
6900
|
+
return { content: [{ type: "text", text: `Visual map for ${appName} already exists (${existingMeta.screensMapped.length} screens, confidence: ${existingMeta.confidence.toFixed(2)}). Use force: true to re-map.` }] };
|
|
6901
|
+
}
|
|
6902
|
+
}
|
|
6903
|
+
await ensureBridge();
|
|
6904
|
+
// Get focused app PID
|
|
6905
|
+
const apps = await bridge.call("app.list", {});
|
|
6906
|
+
const matchedApp = apps?.find((a) => a.bundleId === bundleId);
|
|
6907
|
+
if (!matchedApp?.pid) {
|
|
6908
|
+
return { content: [{ type: "text", text: `App ${bundleId} is not running. Launch it first with focus("${bundleId}") or launch("${bundleId}").` }] };
|
|
6909
|
+
}
|
|
6910
|
+
const pid = matchedApp.pid;
|
|
6911
|
+
// Get window bounds
|
|
6912
|
+
let windowTitle = "";
|
|
6913
|
+
let windowBounds;
|
|
6914
|
+
try {
|
|
6915
|
+
const wins = await bridge.call("window.list", {});
|
|
6916
|
+
const appWins = wins?.filter((w) => w.pid === pid);
|
|
6917
|
+
const mainWin = appWins?.find((w) => w.focused || w.frontmost || w.isMain) ?? appWins?.[0];
|
|
6918
|
+
if (mainWin) {
|
|
6919
|
+
windowTitle = mainWin.title ?? "";
|
|
6920
|
+
windowBounds = mainWin.bounds ?? mainWin;
|
|
6921
|
+
}
|
|
6922
|
+
}
|
|
6923
|
+
catch { /* use defaults */ }
|
|
6924
|
+
// Phase A: Quick scan (OCR)
|
|
6925
|
+
const scanResult = await quickScan(bridge, pid, windowBounds);
|
|
6926
|
+
if (!scanResult) {
|
|
6927
|
+
return { content: [{ type: "text", text: `Failed to capture screenshot of ${appName}. Make sure the app window is visible.` }] };
|
|
6928
|
+
}
|
|
6929
|
+
// Get app version for staleness tracking
|
|
6930
|
+
let appVersion = "unknown";
|
|
6931
|
+
try {
|
|
6932
|
+
const infoResult = await bridge.call("app.info", { bundleId });
|
|
6933
|
+
appVersion = infoResult?.version ?? infoResult?.shortVersion ?? "unknown";
|
|
6934
|
+
}
|
|
6935
|
+
catch { /* use default */ }
|
|
6936
|
+
// Get display scale factor
|
|
6937
|
+
let scaleFactor = 2;
|
|
6938
|
+
try {
|
|
6939
|
+
const screenInfo = await bridge.call("screen.info", {});
|
|
6940
|
+
scaleFactor = screenInfo?.scaleFactor ?? 2;
|
|
6941
|
+
}
|
|
6942
|
+
catch { /* default to Retina */ }
|
|
6943
|
+
const meta = buildVisualMeta(scanResult.hash, scanResult.captureSize, windowTitle, appVersion, scanResult.scan.confidence, scaleFactor);
|
|
6944
|
+
// Populate into AppMap
|
|
6945
|
+
const { added, updated } = appMap.populateFromVisualScan(bundleId, appName, scanResult.scan, meta);
|
|
6946
|
+
const lines = [
|
|
6947
|
+
`Visual map for ${appName} (${bundleId}):`,
|
|
6948
|
+
` Zones identified: ${scanResult.scan.zones.length}`,
|
|
6949
|
+
` Elements mapped: ${scanResult.scan.elements.length} (${added} new, ${updated} updated)`,
|
|
6950
|
+
` Map confidence: ${scanResult.scan.confidence.toFixed(2)}`,
|
|
6951
|
+
` App version: ${appVersion}`,
|
|
6952
|
+
];
|
|
6953
|
+
if (scanResult.scan.zones.length > 0) {
|
|
6954
|
+
lines.push(" Zones:");
|
|
6955
|
+
for (const z of scanResult.scan.zones) {
|
|
6956
|
+
const elCount = scanResult.scan.elements.filter(e => e.zone === z.label).length;
|
|
6957
|
+
lines.push(` ${z.label} (${z.type}): ${elCount} elements`);
|
|
6958
|
+
}
|
|
6959
|
+
}
|
|
6960
|
+
// Phase B: LLM enrichment (background, if depth=full and API key exists)
|
|
6961
|
+
if ((depth === "full") && process.env.ANTHROPIC_API_KEY) {
|
|
6962
|
+
lines.push(" LLM enrichment: starting in background...");
|
|
6963
|
+
// Fire and forget — don't block the response
|
|
6964
|
+
(async () => {
|
|
6965
|
+
try {
|
|
6966
|
+
// Get screenshot as file, then read as base64 for LLM
|
|
6967
|
+
const screenshotShot = await bridge.call("cg.captureScreen", {});
|
|
6968
|
+
if (!screenshotShot?.path)
|
|
6969
|
+
return;
|
|
6970
|
+
const fs = await import("node:fs");
|
|
6971
|
+
const screenshotBase64 = fs.readFileSync(screenshotShot.path).toString("base64");
|
|
6972
|
+
const screenshotData = { base64: screenshotBase64 };
|
|
6973
|
+
// Get AX tree for cross-reference
|
|
6974
|
+
let axTree = "";
|
|
6975
|
+
try {
|
|
6976
|
+
const tree = await bridge.call("ax.tree", { pid, depth: 3 });
|
|
6977
|
+
axTree = JSON.stringify(tree, null, 1).slice(0, 3000);
|
|
6978
|
+
}
|
|
6979
|
+
catch { /* proceed without AX */ }
|
|
6980
|
+
const enrichment = await llmEnrich(screenshotData.base64, axTree, appName, bundleId, windowTitle, scanResult.captureSize);
|
|
6981
|
+
if (enrichment) {
|
|
6982
|
+
// Merge LLM results into AppMap (LLM confidence capped at 0.5)
|
|
6983
|
+
const llmScan = {
|
|
6984
|
+
zones: enrichment.zones,
|
|
6985
|
+
elements: enrichment.elements.map(e => ({
|
|
6986
|
+
...e,
|
|
6987
|
+
confidence: Math.min(e.confidence, 0.5), // Cap — LLM is hypothesis
|
|
6988
|
+
})),
|
|
6989
|
+
confidence: Math.min(enrichment.confidence, 0.6),
|
|
6990
|
+
};
|
|
6991
|
+
appMap.populateFromVisualScan(bundleId, appName, llmScan, {
|
|
6992
|
+
...meta,
|
|
6993
|
+
confidence: Math.min(enrichment.confidence, 0.6),
|
|
6994
|
+
});
|
|
6995
|
+
process.stderr.write(`[visual-mapper] LLM enrichment complete for ${appName}: ${enrichment.elements.length} elements, ${enrichment.zones.length} zones\n`);
|
|
6996
|
+
}
|
|
6997
|
+
}
|
|
6998
|
+
catch (err) {
|
|
6999
|
+
process.stderr.write(`[visual-mapper] Background enrichment failed: ${err instanceof Error ? err.message : String(err)}\n`);
|
|
7000
|
+
}
|
|
7001
|
+
})().catch(() => { });
|
|
7002
|
+
}
|
|
7003
|
+
else if (depth === "full" && !process.env.ANTHROPIC_API_KEY) {
|
|
7004
|
+
lines.push(" LLM enrichment: skipped (no ANTHROPIC_API_KEY set)");
|
|
7005
|
+
}
|
|
7006
|
+
return { content: [{ type: "text", text: lines.join("\n") }] };
|
|
7007
|
+
});
|
|
7008
|
+
originalTool("map_status", "Check the health of an app's visual map. Shows zones, element counts, confidence, staleness, and failure rates. Useful for debugging click failures.", {
|
|
7009
|
+
bundleId: z.string().describe("macOS bundle ID"),
|
|
7010
|
+
}, async ({ bundleId }) => {
|
|
7011
|
+
const meta = appMap.getVisualMeta(bundleId);
|
|
7012
|
+
const data = appMap.getLoaded(bundleId) ?? appMap.load(bundleId);
|
|
7013
|
+
if (!meta) {
|
|
7014
|
+
return { content: [{ type: "text", text: `No visual map exists for ${bundleId}. Run map_app to create one.` }] };
|
|
7015
|
+
}
|
|
7016
|
+
const isStale = appMap.isVisualMapStale(bundleId);
|
|
7017
|
+
const ageMs = Date.now() - new Date(meta.lastScannedAt).getTime();
|
|
7018
|
+
const ageHours = Math.round(ageMs / 3_600_000);
|
|
7019
|
+
const lines = [
|
|
7020
|
+
`Visual Map Status: ${bundleId}`,
|
|
7021
|
+
` Last scanned: ${meta.lastScannedAt} (${ageHours}h ago)`,
|
|
7022
|
+
` App version: ${meta.appVersion}`,
|
|
7023
|
+
` Confidence: ${meta.confidence.toFixed(2)}`,
|
|
7024
|
+
` Staleness: ${isStale ? "STALE — consider re-mapping" : "fresh"}`,
|
|
7025
|
+
` Screens mapped: ${meta.screensMapped.join(", ") || "(none)"}`,
|
|
7026
|
+
` Scale factor: ${meta.scaleFactor}x`,
|
|
7027
|
+
` Capture size: ${meta.captureSize.w}x${meta.captureSize.h}`,
|
|
7028
|
+
];
|
|
7029
|
+
// Count visual-scan elements
|
|
7030
|
+
if (data) {
|
|
7031
|
+
let visualElements = 0;
|
|
7032
|
+
let axElements = 0;
|
|
7033
|
+
let totalValidations = 0;
|
|
7034
|
+
let totalMismatches = 0;
|
|
7035
|
+
for (const zone of Object.values(data.zones)) {
|
|
7036
|
+
for (const el of zone.elements) {
|
|
7037
|
+
if (el.labelSource === "ocr" || el.labelSource === "llm") {
|
|
7038
|
+
visualElements++;
|
|
7039
|
+
totalValidations += el.validationCount ?? 0;
|
|
7040
|
+
totalMismatches += el.mismatchCount ?? 0;
|
|
7041
|
+
}
|
|
7042
|
+
else if (el.labelSource === "ax" || el.labelSource === "manual") {
|
|
7043
|
+
axElements++;
|
|
7044
|
+
}
|
|
7045
|
+
}
|
|
7046
|
+
}
|
|
7047
|
+
lines.push(` Visual-scan elements: ${visualElements}`);
|
|
7048
|
+
lines.push(` AX-confirmed elements: ${axElements}`);
|
|
7049
|
+
if (totalValidations + totalMismatches > 0) {
|
|
7050
|
+
const matchRate = totalValidations / (totalValidations + totalMismatches);
|
|
7051
|
+
lines.push(` Position match rate: ${(matchRate * 100).toFixed(1)}% (${totalValidations} matches, ${totalMismatches} mismatches)`);
|
|
7052
|
+
}
|
|
7053
|
+
// Zone breakdown
|
|
7054
|
+
const zoneKeys = Object.keys(data.zones);
|
|
7055
|
+
if (zoneKeys.length > 0) {
|
|
7056
|
+
lines.push(" Zones:");
|
|
7057
|
+
for (const key of zoneKeys.slice(0, 15)) {
|
|
7058
|
+
const zone = data.zones[key];
|
|
7059
|
+
lines.push(` ${key} (${zone.type}): ${zone.elements.length} elements`);
|
|
7060
|
+
}
|
|
7061
|
+
if (zoneKeys.length > 15) {
|
|
7062
|
+
lines.push(` ... and ${zoneKeys.length - 15} more`);
|
|
7063
|
+
}
|
|
7064
|
+
}
|
|
7065
|
+
}
|
|
6646
7066
|
return { content: [{ type: "text", text: lines.join("\n") }] };
|
|
6647
7067
|
});
|
|
6648
7068
|
server.tool("coverage_report", "Check what ScreenHand knows about an app: shortcuts, selectors, flows, playbooks, error patterns, and stability %. Useful before complex workflows to decide strategy: learn first (if empty), go fast (if high coverage), or use fallback tools (if error patterns exist). Optional for quick actions.", {
|
|
@@ -6751,10 +7171,14 @@ originalTool("community_fetch", "Search community playbooks for a platform or wo
|
|
|
6751
7171
|
// START
|
|
6752
7172
|
// ═══════════════════════════════════════════════
|
|
6753
7173
|
async function main() {
|
|
6754
|
-
// Flush
|
|
6755
|
-
|
|
6756
|
-
process.on("
|
|
6757
|
-
process.on("
|
|
7174
|
+
// Flush all learned state on shutdown (signals, stdin EOF, or normal exit)
|
|
7175
|
+
const flushAll = () => { void perceptionManager.stop(); perceptionManager.stopStallDetection(); stateWatcher.stop(); contextTracker.flush(); learningEngine.flush(); appMap.flush(); };
|
|
7176
|
+
process.on("SIGINT", () => { flushAll(); process.exit(0); });
|
|
7177
|
+
process.on("SIGTERM", () => { flushAll(); process.exit(0); });
|
|
7178
|
+
process.on("beforeExit", flushAll);
|
|
7179
|
+
// MCP clients often close stdin without sending a signal — flush on stdin end too
|
|
7180
|
+
process.stdin.on("end", () => { flushAll(); process.exit(0); });
|
|
7181
|
+
process.stdin.on("close", () => { flushAll(); process.exit(0); });
|
|
6758
7182
|
const transport = new StdioServerTransport();
|
|
6759
7183
|
await server.connect(transport);
|
|
6760
7184
|
}
|