screenhand 0.4.9 → 0.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -52,8 +52,10 @@ import { PlaybookStore } from "./src/playbook/store.js";
52
52
  import { ContextTracker } from "./src/context-tracker.js";
53
53
  import { McpPlaybookRecorder } from "./src/playbook/mcp-recorder.js";
54
54
  import { WorldModel } from "./src/state/index.js";
55
+ import { StateWatcher } from "./src/state/state-watcher.js";
55
56
  import { PerceptionManager } from "./src/perception/index.js";
56
57
  import { Planner, PlanExecutor, GoalStore, ToolRegistry } from "./src/planner/index.js";
58
+ import { PlanRefiner } from "./src/planner/plan-refiner.js";
57
59
  import { RecoveryEngine } from "./src/recovery/index.js";
58
60
  import { LearningEngine, LocatorPolicy } from "./src/learning/index.js";
59
61
  import { discoverWebElements, testWebElement, compileReference, saveExploreResult, discoverNativeElements } from "./src/platform/explorer.js";
@@ -531,17 +533,24 @@ const leaseManager = new LeaseManager(LOCK_DIR);
531
533
  // Playbooks dir holds only executable step sequences for job_create
532
534
  // Resolution order: local dev paths → npm dist paths → ~/.screenhand/ user paths
533
535
  function resolveDataDir(name) {
534
- // 1. Local dev path (when running from source)
536
+ const hasJson = (dir) => fs.existsSync(dir) && fs.readdirSync(dir).some(f => f.endsWith(".json"));
537
+ // 1. Local dev path (when running from source: references/, playbooks/)
535
538
  const local = path.resolve(__dirname, name);
536
- if (fs.existsSync(local) && fs.readdirSync(local).some(f => f.endsWith(".json"))) {
539
+ if (hasJson(local))
537
540
  return local;
538
- }
539
- // 2. npm dist path (when installed via npx/npm)
541
+ // 2. npm dist path — same level (dist-references/ next to dist/)
540
542
  const dist = path.resolve(__dirname, `dist-${name}`);
541
- if (fs.existsSync(dist) && fs.readdirSync(dist).some(f => f.endsWith(".json"))) {
543
+ if (hasJson(dist))
542
544
  return dist;
543
- }
544
- // 3. User home path (always available for user-generated content)
545
+ // 3. npm dist path — parent level (when __dirname is dist/, check ../dist-references/)
546
+ const parentDist = path.resolve(__dirname, "..", `dist-${name}`);
547
+ if (hasJson(parentDist))
548
+ return parentDist;
549
+ // 4. Parent level plain name (../references/)
550
+ const parentLocal = path.resolve(__dirname, "..", name);
551
+ if (hasJson(parentLocal))
552
+ return parentLocal;
553
+ // 5. User home path (always available for user-generated content)
545
554
  const userDir = path.join(os.homedir(), ".screenhand", name);
546
555
  if (!fs.existsSync(userDir)) {
547
556
  fs.mkdirSync(userDir, { recursive: true });
@@ -563,6 +572,9 @@ const seedAppMapsDir = (() => {
563
572
  const dist = path.resolve(__dirname, "dist-app-maps");
564
573
  if (fs.existsSync(dist))
565
574
  return dist;
575
+ const parentDist = path.resolve(__dirname, "..", "dist-app-maps");
576
+ if (fs.existsSync(parentDist))
577
+ return parentDist;
566
578
  const local = path.resolve(__dirname, "seed-app-maps");
567
579
  if (fs.existsSync(local))
568
580
  return local;
@@ -599,6 +611,7 @@ catch { /* dir may not exist */ }
599
611
  const planner = new Planner(_executablePlaybookStore, memory, contextTracker, worldModel, learningEngine);
600
612
  const goalStore = new GoalStore(path.join(os.homedir(), ".screenhand", "planner"));
601
613
  goalStore.init();
614
+ const planRefiner = new PlanRefiner(path.join(os.homedir(), ".screenhand", "planner"));
602
615
  const toolRegistry = new ToolRegistry();
603
616
  const recoveryEngine = new RecoveryEngine(worldModel, toolRegistry.toExecutor(), memory);
604
617
  recoveryEngine.setLearningEngine(learningEngine);
@@ -606,6 +619,7 @@ recoveryEngine.setAppMap(appMap);
606
619
  planner.setToolRegistry(toolRegistry);
607
620
  planner.setAppMap(appMap);
608
621
  perceptionManager.setLearningEngine(learningEngine);
622
+ const stateWatcher = new StateWatcher(worldModel, toolRegistry.toExecutor(), 2_000);
609
623
  // ── Reactive event loop: wire perception events to automatic responses ──
610
624
  // These fire at perception speed (100-300ms), not LLM speed (~2-3s).
611
625
  perceptionManager.on("dialog_detected", (event) => {
@@ -639,6 +653,28 @@ perceptionManager.on("app_switched", (event) => {
639
653
  // Log for observability
640
654
  console.error(`[reactive] App switched to ${event.bundleId} (pid=${event.pid})`);
641
655
  });
656
+ // ── Perception-triggered recovery: focus loss, app crash, stall ──
657
+ perceptionManager.on("focus_lost", (event) => {
658
+ console.error(`[reactive] Focus lost: expected ${event.expectedBundleId}, got ${event.actualBundleId} — auto-refocusing`);
659
+ // Auto-refocus the expected app
660
+ toolRegistry.toExecutor()("focus", { bundleId: event.expectedBundleId }).catch((err) => {
661
+ console.error(`[reactive] Auto-refocus failed: ${err instanceof Error ? err.message : err}`);
662
+ });
663
+ });
664
+ perceptionManager.on("app_crash", (event) => {
665
+ console.error(`[reactive] App crash detected: ${event.bundleId} (pid=${event.pid}) — auto-relaunching`);
666
+ // Auto-relaunch the crashed app
667
+ toolRegistry.toExecutor()("launch", { bundleId: event.bundleId }).catch((err) => {
668
+ console.error(`[reactive] Auto-relaunch failed: ${err instanceof Error ? err.message : err}`);
669
+ });
670
+ });
671
+ perceptionManager.on("stall_detected", (event) => {
672
+ console.error(`[reactive] UI stall detected: ${event.bundleId} — no changes for ${(event.stallMs / 1000).toFixed(0)}s — taking screenshot for diagnosis`);
673
+ // Take a screenshot so the next LLM call can see what's on screen
674
+ toolRegistry.toExecutor()("screenshot", {}).catch((err) => {
675
+ console.error(`[reactive] Stall screenshot failed: ${err instanceof Error ? err.message : err}`);
676
+ });
677
+ });
642
678
  const mcpRecorder = new McpPlaybookRecorder(playbooksDir);
643
679
  const referenceMerger = new ReferenceMerger(referencesDir);
644
680
  const communityPublisher = new PlaybookPublisher();
@@ -671,6 +707,7 @@ const MEMORY_TOOLS = new Set([
671
707
  ]);
672
708
  // Track the strategy we're currently following (for feedback loop)
673
709
  let activeStrategyFingerprint = null;
710
+ let autoExecutionInProgress = false; // guard against concurrent auto-execution
674
711
  let currentAdaptiveBudget = null;
675
712
  // Intercept all tool registrations to auto-log + auto-recall
676
713
  const _rawOriginalTool = server.tool.bind(server);
@@ -760,7 +797,7 @@ server.tool = (...args) => {
760
797
  if (!perceptionManager.isRunning && bridgeReady) {
761
798
  const focusApp = worldModel.getState().focusedApp;
762
799
  if (focusApp?.bundleId && focusApp?.pid) {
763
- perceptionManager.tryAutoStart(focusApp, bridge).catch(() => { });
800
+ perceptionManager.tryAutoStart(focusApp, bridge).catch((e) => { process.stderr.write(`[screenhand] perception auto-start failed: ${e instanceof Error ? e.message : String(e)}\n`); });
764
801
  installSafariEnricher(focusApp.bundleId);
765
802
  }
766
803
  }
@@ -811,7 +848,7 @@ server.tool = (...args) => {
811
848
  "type_with_fallback", "select_with_fallback", "scroll_with_fallback",
812
849
  ]);
813
850
  try {
814
- const result = await originalHandler(params, extra);
851
+ let result = await originalHandler(params, extra);
815
852
  const durationMs = Date.now() - start;
816
853
  // ── POST-CALL: log action (async, non-blocking) ──
817
854
  const entry = {
@@ -863,7 +900,9 @@ server.tool = (...args) => {
863
900
  try {
864
901
  appMap.recordPageTransition(postBundleIdForCtx, pageTransition.from, pageTransition.to, toolName);
865
902
  }
866
- catch { /* non-critical — don't break tool execution for nav tracking */ }
903
+ catch (e) {
904
+ process.stderr.write(`[screenhand] nav tracking failed: ${e instanceof Error ? e.message : String(e)}\n`);
905
+ }
867
906
  }
868
907
  // ── POST-CALL: detect focus drift ──
869
908
  const postBundleId = worldModel.getState().focusedApp?.bundleId ?? null;
@@ -937,7 +976,9 @@ server.tool = (...args) => {
937
976
  }
938
977
  }
939
978
  }
940
- catch { /* non-fatal */ }
979
+ catch (e) {
980
+ process.stderr.write(`[screenhand] app map feature learning failed: ${e instanceof Error ? e.message : String(e)}\n`);
981
+ }
941
982
  }
942
983
  if (!resultIsError && learnBundleId !== "unknown") {
943
984
  try {
@@ -1359,7 +1400,9 @@ server.tool = (...args) => {
1359
1400
  }
1360
1401
  }
1361
1402
  }
1362
- catch { /* hierarchy extraction non-fatal */ }
1403
+ catch (e) {
1404
+ process.stderr.write(`[screenhand] hierarchy extraction failed: ${e instanceof Error ? e.message : String(e)}\n`);
1405
+ }
1363
1406
  }
1364
1407
  }
1365
1408
  // ── Conditional UI visibility tracking (throttled) ──
@@ -1407,7 +1450,9 @@ server.tool = (...args) => {
1407
1450
  }
1408
1451
  }
1409
1452
  }
1410
- catch { /* visibility tracking non-fatal */ }
1453
+ catch (e) {
1454
+ process.stderr.write(`[screenhand] visibility tracking failed: ${e instanceof Error ? e.message : String(e)}\n`);
1455
+ }
1411
1456
  }
1412
1457
  }
1413
1458
  // ── Timing recording: track tool response times per element ──
@@ -1501,25 +1546,65 @@ server.tool = (...args) => {
1501
1546
  if (knownError) {
1502
1547
  hints.push(`⚡ Memory: "${toolName}" has failed before: "${knownError.error}" (${knownError.occurrences}x). Fix: ${knownError.resolution}`);
1503
1548
  }
1504
- // Suggest next step if we're mid-strategy
1549
+ // ── Strategy matching: auto-execute proven strategies OR hint unproven ones ──
1505
1550
  const recentTools = memory.getRecentToolNames();
1506
- const strategyHint = memory.quickStrategyHint(recentTools, worldModel.getState().focusedApp?.bundleId);
1507
- if (strategyHint) {
1508
- activeStrategyFingerprint = strategyHint.fingerprint;
1509
- const nextParams = Object.keys(strategyHint.nextStep.params).length > 0
1510
- ? `(${JSON.stringify(strategyHint.nextStep.params)})`
1511
- : "";
1512
- hints.push(`💡 Memory: This matches strategy "${strategyHint.strategy.task}" (${strategyHint.strategy.successCount} wins, ${strategyHint.strategy.failCount ?? 0} fails). Next step: ${strategyHint.nextStep.tool}${nextParams}`);
1513
- // If this was the last step of the strategy, record success
1514
- if (recentTools.length === strategyHint.strategy.steps.length - 1) {
1515
- // Next call will be the final step but this call completing means we're on track
1551
+ const currentBundleForStrategy = worldModel.getState().focusedApp?.bundleId;
1552
+ // Try auto-execution first (10+ successes, 0 failures)
1553
+ // Guard: skip if another auto-execution is already in progress
1554
+ const autoExec = autoExecutionInProgress ? null : memory.getAutoExecutableStrategy(recentTools, currentBundleForStrategy);
1555
+ if (autoExec) {
1556
+ autoExecutionInProgress = true;
1557
+ activeStrategyFingerprint = autoExec.fingerprint;
1558
+ const autoResults = [];
1559
+ let allOk = true;
1560
+ hints.push(`🚀 Auto-executing proven strategy "${autoExec.strategy.task}" (${autoExec.strategy.successCount} wins)${autoExec.remainingSteps.length} steps remaining`);
1561
+ for (const step of autoExec.remainingSteps) {
1562
+ try {
1563
+ const stepResult = await toolRegistry.toExecutor()(step.tool, step.params);
1564
+ autoResults.push({ tool: step.tool, ...stepResult });
1565
+ // Record outcome for learning
1566
+ const target = typeof step.params.target === "string" ? step.params.target
1567
+ : typeof step.params.title === "string" ? step.params.title
1568
+ : typeof step.params.text === "string" ? step.params.text
1569
+ : null;
1570
+ contextTracker.recordOutcome(step.tool, { target, text: typeof step.params.text === "string" ? step.params.text : null }, stepResult.ok, stepResult.ok ? null : (stepResult.error ?? null));
1571
+ if (!stepResult.ok) {
1572
+ allOk = false;
1573
+ hints.push(` ✗ ${step.tool} failed: ${stepResult.error ?? "unknown"}`);
1574
+ break; // Stop auto-execution on first failure
1575
+ }
1576
+ hints.push(` ✓ ${step.tool} — ok`);
1577
+ }
1578
+ catch (err) {
1579
+ allOk = false;
1580
+ hints.push(` ✗ ${step.tool} threw: ${err instanceof Error ? err.message : String(err)}`);
1581
+ break;
1582
+ }
1516
1583
  }
1517
- }
1518
- else if (activeStrategyFingerprint && recentTools.length > 0) {
1519
- // We were following a strategy but the sequence diverged — record success
1520
- // (the agent completed the strategy or went its own way after it)
1521
- memory.recordStrategyOutcome(activeStrategyFingerprint, true);
1584
+ // Record strategy outcome
1585
+ memory.recordStrategyOutcome(autoExec.fingerprint, allOk);
1522
1586
  activeStrategyFingerprint = null;
1587
+ autoExecutionInProgress = false;
1588
+ // Append auto-execution results to the response
1589
+ const autoSummary = autoResults.map((r) => `${r.tool}: ${r.ok ? "ok" : r.error}`).join("\n");
1590
+ const resultContent = Array.isArray(result?.content) ? result.content : [];
1591
+ resultContent.push({ type: "text", text: `\n── AUTO-EXECUTED (${autoResults.length} steps) ──\n${autoSummary}` });
1592
+ result = { ...result, content: resultContent };
1593
+ }
1594
+ else {
1595
+ // Fall back to strategy hint (suggest but don't execute)
1596
+ const strategyHint = memory.quickStrategyHint(recentTools, currentBundleForStrategy);
1597
+ if (strategyHint) {
1598
+ activeStrategyFingerprint = strategyHint.fingerprint;
1599
+ const nextParams = Object.keys(strategyHint.nextStep.params).length > 0
1600
+ ? `(${JSON.stringify(strategyHint.nextStep.params)})`
1601
+ : "";
1602
+ hints.push(`💡 Memory: This matches strategy "${strategyHint.strategy.task}" (${strategyHint.strategy.successCount} wins, ${strategyHint.strategy.failCount ?? 0} fails). Next step: ${strategyHint.nextStep.tool}${nextParams}`);
1603
+ }
1604
+ else if (activeStrategyFingerprint && recentTools.length > 0) {
1605
+ memory.recordStrategyOutcome(activeStrategyFingerprint, true);
1606
+ activeStrategyFingerprint = null;
1607
+ }
1523
1608
  }
1524
1609
  // Attach hints in BOTH content (visible) and _meta (for programmatic access)
1525
1610
  if (hints.length > 0) {
@@ -1735,7 +1820,9 @@ server.tool("focus", "Focus/activate an application (or a specific window by win
1735
1820
  targetApp = { bundleId, name: appWin.appName, pid: appWin.pid || appWin.ownerPid };
1736
1821
  }
1737
1822
  }
1738
- catch { /* ignore */ }
1823
+ catch (e) {
1824
+ process.stderr.write(`[screenhand] focus window check for ${bundleId} failed: ${e instanceof Error ? e.message : String(e)}\n`);
1825
+ }
1739
1826
  if (!targetApp) {
1740
1827
  return { content: [{ type: "text", text: `Error: ${bundleId} is not running. Use launch("${bundleId}") first.` }], isError: true };
1741
1828
  }
@@ -1806,10 +1893,14 @@ server.tool("focus", "Focus/activate an application (or a specific window by win
1806
1893
  await perceptionManager.ensureStarted(ctx);
1807
1894
  installSafariEnricher(bundleId);
1808
1895
  }
1809
- catch { /* best-effort */ }
1896
+ catch (e) {
1897
+ process.stderr.write(`[screenhand] perception ensureStarted in focus failed: ${e instanceof Error ? e.message : String(e)}\n`);
1898
+ }
1810
1899
  }
1811
1900
  }
1812
- catch { /* app.list failed — world model update is best-effort */ }
1901
+ catch (e) {
1902
+ process.stderr.write(`[screenhand] focus world-model update failed: ${e instanceof Error ? e.message : String(e)}\n`);
1903
+ }
1813
1904
  return { content: [{ type: "text", text: focusMsg }] };
1814
1905
  }
1815
1906
  finally {
@@ -1873,7 +1964,9 @@ server.tool("launch", "Launch an application. Chrome/Chromium browsers are launc
1873
1964
  await perceptionManager.ensureStarted({ bundleId, appName: r.appName ?? bundleId, pid: r.pid, windowTitle: "", ...(windowId != null ? { windowId } : {}) });
1874
1965
  installSafariEnricher(bundleId);
1875
1966
  }
1876
- catch { /* perception start is best-effort */ }
1967
+ catch (e) {
1968
+ process.stderr.write(`[screenhand] perception start after launch failed: ${e instanceof Error ? e.message : String(e)}\n`);
1969
+ }
1877
1970
  let msg = `Launched ${r.appName} pid=${r.pid}`;
1878
1971
  if (chromeAppName) {
1879
1972
  const port = cdpPort ?? 9222;
@@ -2102,7 +2195,9 @@ server.tool("ui_press", "PREFERRED: Find and press/click a UI element by its tit
2102
2195
  return { content: [{ type: "text", text: `Element "${title}" not found in PID ${pid}. A system dialog from "${front.name}" (${front.bundleId}, PID ${front.pid}) may be blocking. Dismiss it first, or use click(x, y) to interact with the dialog directly.` }], isError: true };
2103
2196
  }
2104
2197
  }
2105
- catch { /* ignore frontmost check failure */ }
2198
+ catch (e) {
2199
+ process.stderr.write(`[screenhand] frontmost check in ui_press failed: ${e instanceof Error ? e.message : String(e)}\n`);
2200
+ }
2106
2201
  throw new Error(`Element "${title}" not found (searched title, value, and description)`);
2107
2202
  }
2108
2203
  }
@@ -2323,7 +2418,9 @@ server.tool("type_text", "Type text using the keyboard. Auto-detects Electron ap
2323
2418
  catch { /* not available on this port */ }
2324
2419
  }
2325
2420
  }
2326
- catch { /* auto-detect is best-effort */ }
2421
+ catch (e) {
2422
+ process.stderr.write(`[screenhand] CDP auto-detect failed: ${e instanceof Error ? e.message : String(e)}\n`);
2423
+ }
2327
2424
  }
2328
2425
  if (electronCdpPort) {
2329
2426
  // CDP path: click editor to ensure focus, then type via key events
@@ -2386,7 +2483,9 @@ server.tool("key", "Press a key combination", {
2386
2483
  const front = await bridge.call("app.frontmost", {});
2387
2484
  targetPid = front.pid;
2388
2485
  }
2389
- catch { /* fallback to global posting */ }
2486
+ catch (e) {
2487
+ process.stderr.write(`[screenhand] key frontmost PID resolve failed: ${e instanceof Error ? e.message : String(e)}\n`);
2488
+ }
2390
2489
  }
2391
2490
  const keys = combo.split("+");
2392
2491
  const hasModifier = keys.some(k => ["cmd", "ctrl", "alt", "shift"].includes(k.toLowerCase()));
@@ -2456,7 +2555,9 @@ async function getCDPClient(tabId, overridePort) {
2456
2555
  try {
2457
2556
  perceptionManager.activateCDP(client);
2458
2557
  }
2459
- catch { /* best-effort */ }
2558
+ catch (e) {
2559
+ process.stderr.write(`[screenhand] perception CDP activate failed: ${e instanceof Error ? e.message : String(e)}\n`);
2560
+ }
2460
2561
  return { client, targetId: targetId, CDP: cdp, port };
2461
2562
  }
2462
2563
  // ── Random delay helper ──
@@ -3393,6 +3494,12 @@ server.tool("platform_explore", "Autonomously explore an app or website. Maps al
3393
3494
  // Compile and save
3394
3495
  const result = compileReference(platform, "web", tested, url);
3395
3496
  const filePath = saveExploreResult(referencesDir, result);
3497
+ // Auto-merge explore selectors into main reference so data isn't fragmented
3498
+ if (result.selectors && Object.keys(result.selectors).length > 0) {
3499
+ referenceMerger.mergeExploreSelectors(result.selectors, result.errors, "", platform);
3500
+ }
3501
+ // Hot-reload: make new data immediately available to context tracker
3502
+ _playbookStoreForContext.reload();
3396
3503
  return { content: [{ type: "text", text: `Exploration complete: ${filePath}\n\nElements found: ${elements.length}\nTested: ${result.testedElements}\nWorking selectors: ${result.workingSelectors}\nErrors: ${result.errors.length}\n\nKey discoveries:\n${result.keyDiscoveries.map(d => ` - ${d}`).join("\n")}` }] };
3397
3504
  }
3398
3505
  else if (bundleId) {
@@ -3414,6 +3521,12 @@ server.tool("platform_explore", "Autonomously explore an app or website. Maps al
3414
3521
  ...el, clickWorked: true, result: "discovered_not_tested",
3415
3522
  })), undefined, bundleId);
3416
3523
  const filePath = saveExploreResult(referencesDir, result);
3524
+ // Auto-merge explore selectors into main reference so data isn't fragmented
3525
+ if (result.selectors && Object.keys(result.selectors).length > 0) {
3526
+ referenceMerger.mergeExploreSelectors(result.selectors, result.errors, bundleId, platform);
3527
+ }
3528
+ // Hot-reload: make new data immediately available to context tracker
3529
+ _playbookStoreForContext.reload();
3417
3530
  return { content: [{ type: "text", text: `Native app exploration complete: ${filePath}\n\nElements discovered: ${elements.length}\n(Native elements discovered but not auto-clicked for safety. Use playbook_record to test interactively.)` }] };
3418
3531
  }
3419
3532
  else {
@@ -5027,7 +5140,9 @@ server.tool("scroll_with_fallback", "Scroll within an element or the active wind
5027
5140
  return { content: [{ type: "text", text: `"${target}" is visible after ${i} scroll(s).` }] };
5028
5141
  }
5029
5142
  }
5030
- catch { /* OCR failed, keep scrolling */ }
5143
+ catch (e) {
5144
+ process.stderr.write(`[screenhand] OCR during scroll search failed: ${e instanceof Error ? e.message : String(e)}\n`);
5145
+ }
5031
5146
  // Scroll once
5032
5147
  const deltaX = direction === "left" ? -scrollAmount : direction === "right" ? scrollAmount : 0;
5033
5148
  const deltaY = direction === "up" ? -scrollAmount : direction === "down" ? scrollAmount : 0;
@@ -5154,7 +5269,9 @@ server.tool("wait_for_state", "Wait until a condition is met on screen: text app
5154
5269
  await client.close();
5155
5270
  }
5156
5271
  }
5157
- catch { /* CDP unavailable */ }
5272
+ catch (e) {
5273
+ process.stderr.write(`[screenhand] wait_for_state CDP check failed: ${e instanceof Error ? e.message : String(e)}\n`);
5274
+ }
5158
5275
  }
5159
5276
  const elapsed = Date.now() - (deadline - timeout);
5160
5277
  lastCheck = `${elapsed}ms`;
@@ -5440,6 +5557,18 @@ function getJobRunner() {
5440
5557
  timeout: 15000,
5441
5558
  }).trim();
5442
5559
  });
5560
+ // Wire learning feedback: PlaybookEngine reports step outcomes to context tracker + AppMap
5561
+ playbookEngine.setOutcomeCallback((step, success, error) => {
5562
+ const target = typeof step.target === "string" ? step.target : null;
5563
+ contextTracker.recordOutcome(step.action, { target, text: step.text }, success, error);
5564
+ const bid = worldModel.getState().focusedApp?.bundleId ?? lastKnownBundleId;
5565
+ if (bid && target) {
5566
+ try {
5567
+ appMap.recordElementOutcome(bid, "auto", target, success);
5568
+ }
5569
+ catch { /* non-critical */ }
5570
+ }
5571
+ });
5443
5572
  activeJobRunner = new JobRunner(bridge, jobManager, leaseManager, supervisor, (() => {
5444
5573
  const cfg = {
5445
5574
  hasCDP: cdpPort !== null,
@@ -5616,10 +5745,22 @@ originalTool("plan_execute", "Run a plan automatically. Known steps (from playbo
5616
5745
  if (!goal) {
5617
5746
  return { content: [{ type: "text", text: `Goal not found: ${goalId}` }] };
5618
5747
  }
5619
- const adaptiveBudget = learningEngine.getAdaptiveBudget(worldModel.getState().focusedApp?.bundleId ?? "unknown");
5748
+ const focusedBundleId = worldModel.getState().focusedApp?.bundleId ?? "unknown";
5749
+ const adaptiveBudget = learningEngine.getAdaptiveBudget(focusedBundleId);
5620
5750
  const executor = new PlanExecutor(worldModel, planner, toolRegistry.toExecutor(), { postconditionWaitMs: adaptiveBudget.verifyMs, defaultStepTimeout: Math.max(30_000, adaptiveBudget.actMs * 2) }, recoveryEngine, learningEngine);
5621
5751
  executor.setAppMap(appMap);
5622
- const result = await executor.executeGoal(goal);
5752
+ // Enable perception-triggered recovery during plan execution
5753
+ perceptionManager.setExpectedApp(focusedBundleId);
5754
+ perceptionManager.startStallDetection(30_000);
5755
+ let result;
5756
+ try {
5757
+ result = await executor.executeGoal(goal);
5758
+ }
5759
+ finally {
5760
+ // Disable reactive recovery after plan completes
5761
+ perceptionManager.setExpectedApp(null);
5762
+ perceptionManager.stopStallDetection();
5763
+ }
5623
5764
  goalStore.update(goalId, goal);
5624
5765
  // Check if paused at an LLM step
5625
5766
  if ("paused" in result) {
@@ -5658,7 +5799,25 @@ originalTool("plan_execute", "Run a plan automatically. Known steps (from playbo
5658
5799
  }
5659
5800
  }
5660
5801
  }
5661
- catch { /* strategy recording is best-effort */ }
5802
+ catch (e) {
5803
+ process.stderr.write(`[screenhand] strategy recording failed: ${e instanceof Error ? e.message : String(e)}\n`);
5804
+ }
5805
+ // Self-improving plans: refine and check for graduation
5806
+ try {
5807
+ const refinement = planRefiner.refine(goal, result);
5808
+ if (refinement.refinementCount > 0) {
5809
+ process.stderr.write(`[plan-refiner] Refined plan for "${goal.description}" (${refinement.refinementCount}x)\n`);
5810
+ }
5811
+ // Check graduation to playbook (3+ refinements)
5812
+ const playbook = planRefiner.checkGraduation(goal.description, focusedBundleId, worldModel.getState().focusedApp?.appName ?? focusedBundleId);
5813
+ if (playbook) {
5814
+ _playbookStoreForContext.save(playbook);
5815
+ process.stderr.write(`[plan-refiner] Plan GRADUATED to playbook: ${playbook.id}\n`);
5816
+ }
5817
+ }
5818
+ catch (e) {
5819
+ process.stderr.write(`[plan-refiner] Refinement failed: ${e instanceof Error ? e.message : String(e)}\n`);
5820
+ }
5662
5821
  }
5663
5822
  const lines = [
5664
5823
  result.success ? "Goal completed successfully." : `Goal failed: ${result.error}`,
@@ -6419,6 +6578,88 @@ server.tool("observer_ocr_roi", "Submit a targeted ROI OCR command to the runnin
6419
6578
  return { content: [{ type: "text", text: `ROI OCR command submitted: ${id}\nRegion: (${x}, ${y}, ${width}×${height})\nThe daemon will process this on its next cycle. Call observer_ocr_roi with commandId="${id}" to poll the result.` }] };
6420
6579
  });
6421
6580
  // ═══════════════════════════════════════════════
6581
+ // STATE WATCHER — Continuous observation event bus
6582
+ // ═══════════════════════════════════════════════
6583
+ server.tool("watch_start", "Start the state watcher polling loop. Evaluates registered watch rules every 2s against the world model.", {}, async () => {
6584
+ stateWatcher.start();
6585
+ const rules = stateWatcher.getRules();
6586
+ return { content: [{ type: "text", text: `State watcher started. ${rules.length} rules registered.` }] };
6587
+ });
6588
+ server.tool("watch_stop", "Stop the state watcher polling loop.", {}, async () => {
6589
+ stateWatcher.stop();
6590
+ return { content: [{ type: "text", text: "State watcher stopped." }] };
6591
+ });
6592
+ server.tool("watch_register", "Register a watch rule: when element with matching title appears, execute an action. Use for automated responses to known UI states.", {
6593
+ id: z.string().describe("Unique rule ID"),
6594
+ elementTitle: z.string().describe("UI element title/label to watch for (case-insensitive substring match)"),
6595
+ actionTool: z.string().describe("Tool to execute when element appears (e.g. click_text, key)"),
6596
+ actionParams: z.record(z.string(), z.unknown()).describe("Params for the action tool"),
6597
+ bundleId: z.string().optional().describe("Only match when this app is focused"),
6598
+ maxFires: z.number().optional().describe("Max times to fire (0=unlimited, default=1)"),
6599
+ }, async ({ id, elementTitle, actionTool, actionParams, bundleId, maxFires }) => {
6600
+ // Validate tool exists and is safe for automated execution
6601
+ const BLOCKED_WATCH_TOOLS = new Set(["applescript", "browser_js", "browser_stealth"]);
6602
+ if (BLOCKED_WATCH_TOOLS.has(actionTool)) {
6603
+ return { content: [{ type: "text", text: `Tool "${actionTool}" is not allowed in watch rules (security: prevents arbitrary code execution)` }], isError: true };
6604
+ }
6605
+ if (!toolRegistry.has(actionTool)) {
6606
+ return { content: [{ type: "text", text: `Unknown tool: "${actionTool}"` }], isError: true };
6607
+ }
6608
+ stateWatcher.watchForElement(id, elementTitle, { tool: actionTool, params: actionParams }, bundleId);
6609
+ if (maxFires !== undefined) {
6610
+ const rules = stateWatcher.getRules();
6611
+ const rule = rules.find((r) => r.id === id);
6612
+ if (rule) {
6613
+ // Update maxFires on the registered rule
6614
+ const ruleState = stateWatcher.rules.get(id);
6615
+ if (ruleState)
6616
+ ruleState.rule.maxFires = maxFires;
6617
+ }
6618
+ }
6619
+ return { content: [{ type: "text", text: `Watch rule "${id}" registered: when "${elementTitle}" appears → ${actionTool}(${JSON.stringify(actionParams)})` }] };
6620
+ });
6621
+ server.tool("watch_dialog", "Register a dialog watch rule: when a dialog matching the pattern appears, auto-execute an action.", {
6622
+ id: z.string().describe("Unique rule ID"),
6623
+ titlePattern: z.string().describe("Regex pattern to match dialog titles"),
6624
+ actionTool: z.string().describe("Tool to execute (e.g. click_text, key)"),
6625
+ actionParams: z.record(z.string(), z.unknown()).describe("Params for the action tool"),
6626
+ }, async ({ id, titlePattern, actionTool, actionParams }) => {
6627
+ // Validate regex — reject patterns that could cause ReDoS
6628
+ let regex;
6629
+ try {
6630
+ regex = new RegExp(titlePattern, "i");
6631
+ // Quick sanity check — if it takes >50ms on a test string, reject
6632
+ const testStr = "a".repeat(100);
6633
+ const t0 = Date.now();
6634
+ regex.test(testStr);
6635
+ if (Date.now() - t0 > 50) {
6636
+ return { content: [{ type: "text", text: `Rejected: regex pattern "${titlePattern}" is too expensive (potential ReDoS)` }], isError: true };
6637
+ }
6638
+ }
6639
+ catch (e) {
6640
+ return { content: [{ type: "text", text: `Invalid regex: ${e instanceof Error ? e.message : String(e)}` }], isError: true };
6641
+ }
6642
+ stateWatcher.watchForDialog(id, regex, { tool: actionTool, params: actionParams });
6643
+ return { content: [{ type: "text", text: `Dialog watch "${id}" registered: /${titlePattern}/i → ${actionTool}(${JSON.stringify(actionParams)})` }] };
6644
+ });
6645
+ server.tool("watch_unregister", "Remove a watch rule by ID.", {
6646
+ id: z.string().describe("Rule ID to remove"),
6647
+ }, async ({ id }) => {
6648
+ const removed = stateWatcher.unregister(id);
6649
+ return { content: [{ type: "text", text: removed ? `Rule "${id}" removed.` : `Rule "${id}" not found.` }] };
6650
+ });
6651
+ server.tool("watch_status", "Get all registered watch rules and their fire counts.", {}, async () => {
6652
+ const rules = stateWatcher.getRules();
6653
+ const running = stateWatcher.isRunning;
6654
+ const lines = [
6655
+ `State watcher: ${running ? "running" : "stopped"}`,
6656
+ `Rules: ${rules.length}`,
6657
+ "",
6658
+ ...rules.map((r) => ` [${r.id}] ${r.description} (fired ${r.fireCount}x)`),
6659
+ ];
6660
+ return { content: [{ type: "text", text: lines.join("\n") }] };
6661
+ });
6662
+ // ═══════════════════════════════════════════════
6422
6663
  // PHASE 6: TOOL MASTERY — Ingestion + Community
6423
6664
  // ═══════════════════════════════════════════════
6424
6665
  server.tool("scan_menu_bar", "Scan an app's menu bar via AX tree. Extracts all menu paths, keyboard shortcuts, and enabled/disabled states. Automatically merges discovered shortcuts into the reference file.", {
@@ -6480,6 +6721,8 @@ server.tool("scan_menu_bar", "Scan an app's menu bar via AX tree. Extracts all m
6480
6721
  });
6481
6722
  }
6482
6723
  }
6724
+ // Hot-reload: make new reference data immediately available to context tracker
6725
+ _playbookStoreForContext.reload();
6483
6726
  let output = lines.join("\n") + bootstrapInfo;
6484
6727
  output = redactUsername(output);
6485
6728
  output = output.replace(/Log Out [^\n:]+/g, "Log Out [USER]");
@@ -6543,6 +6786,8 @@ server.tool("ingest_documentation", "Parse a documentation page (HTML, markdown,
6543
6786
  }
6544
6787
  }
6545
6788
  }
6789
+ // Hot-reload: make new reference data immediately available to context tracker
6790
+ _playbookStoreForContext.reload();
6546
6791
  return { content: [{ type: "text", text: lines.join("\n") }] };
6547
6792
  });
6548
6793
  server.tool("ingest_tutorial", "Extract structured playbook steps from a video transcript (e.g. YouTube captions). Converts tutorial narration into actionable automation steps with tool mappings.", {
@@ -6633,6 +6878,8 @@ server.tool("discover_features", "Extract features from an app's official websit
6633
6878
  lines.push(` [${f.category}] ${f.name}: ${f.description}`);
6634
6879
  }
6635
6880
  }
6881
+ // Hot-reload: make new reference data immediately available to context tracker
6882
+ _playbookStoreForContext.reload();
6636
6883
  return { content: [{ type: "text", text: lines.join("\n") }] };
6637
6884
  });
6638
6885
  server.tool("coverage_report", "Check what ScreenHand knows about an app: shortcuts, selectors, flows, playbooks, error patterns, and stability %. Useful before complex workflows to decide strategy: learn first (if empty), go fast (if high coverage), or use fallback tools (if error patterns exist). Optional for quick actions.", {
@@ -6741,10 +6988,14 @@ originalTool("community_fetch", "Search community playbooks for a platform or wo
6741
6988
  // START
6742
6989
  // ═══════════════════════════════════════════════
6743
6990
  async function main() {
6744
- // Flush playbook learnings on graceful shutdown
6745
- process.on("SIGINT", () => { void perceptionManager.stop(); contextTracker.flush(); learningEngine.flush(); appMap.flush(); process.exit(0); });
6746
- process.on("SIGTERM", () => { void perceptionManager.stop(); contextTracker.flush(); learningEngine.flush(); appMap.flush(); process.exit(0); });
6747
- process.on("beforeExit", () => { void perceptionManager.stop(); contextTracker.flush(); learningEngine.flush(); appMap.flush(); });
6991
+ // Flush all learned state on shutdown (signals, stdin EOF, or normal exit)
6992
+ const flushAll = () => { void perceptionManager.stop(); perceptionManager.stopStallDetection(); stateWatcher.stop(); contextTracker.flush(); learningEngine.flush(); appMap.flush(); };
6993
+ process.on("SIGINT", () => { flushAll(); process.exit(0); });
6994
+ process.on("SIGTERM", () => { flushAll(); process.exit(0); });
6995
+ process.on("beforeExit", flushAll);
6996
+ // MCP clients often close stdin without sending a signal — flush on stdin end too
6997
+ process.stdin.on("end", () => { flushAll(); process.exit(0); });
6998
+ process.stdin.on("close", () => { flushAll(); process.exit(0); });
6748
6999
  const transport = new StdioServerTransport();
6749
7000
  await server.connect(transport);
6750
7001
  }
@@ -75,9 +75,11 @@ export class PlaybookPublisher {
75
75
  return null;
76
76
  }
77
77
  writeFileAtomicSync(filePath, JSON.stringify(shared, null, 2) + "\n");
78
- // Best-effort sync to remote API
78
+ // Best-effort sync to remote API — log failures so user knows data didn't leave machine
79
79
  if (this.remote) {
80
- void this.remote.publish(shared).catch(() => { });
80
+ void this.remote.publish(shared).catch((err) => {
81
+ process.stderr.write(`[screenhand] Remote publish failed: ${err instanceof Error ? err.message : String(err)}\n`);
82
+ });
81
83
  }
82
84
  return shared;
83
85
  }