screenhand 0.5.0 → 0.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -52,8 +52,10 @@ import { PlaybookStore } from "./src/playbook/store.js";
52
52
  import { ContextTracker } from "./src/context-tracker.js";
53
53
  import { McpPlaybookRecorder } from "./src/playbook/mcp-recorder.js";
54
54
  import { WorldModel } from "./src/state/index.js";
55
+ import { StateWatcher } from "./src/state/state-watcher.js";
55
56
  import { PerceptionManager } from "./src/perception/index.js";
56
57
  import { Planner, PlanExecutor, GoalStore, ToolRegistry } from "./src/planner/index.js";
58
+ import { PlanRefiner } from "./src/planner/plan-refiner.js";
57
59
  import { RecoveryEngine } from "./src/recovery/index.js";
58
60
  import { LearningEngine, LocatorPolicy } from "./src/learning/index.js";
59
61
  import { discoverWebElements, testWebElement, compileReference, saveExploreResult, discoverNativeElements } from "./src/platform/explorer.js";
@@ -609,6 +611,7 @@ catch { /* dir may not exist */ }
609
611
  const planner = new Planner(_executablePlaybookStore, memory, contextTracker, worldModel, learningEngine);
610
612
  const goalStore = new GoalStore(path.join(os.homedir(), ".screenhand", "planner"));
611
613
  goalStore.init();
614
+ const planRefiner = new PlanRefiner(path.join(os.homedir(), ".screenhand", "planner"));
612
615
  const toolRegistry = new ToolRegistry();
613
616
  const recoveryEngine = new RecoveryEngine(worldModel, toolRegistry.toExecutor(), memory);
614
617
  recoveryEngine.setLearningEngine(learningEngine);
@@ -616,6 +619,7 @@ recoveryEngine.setAppMap(appMap);
616
619
  planner.setToolRegistry(toolRegistry);
617
620
  planner.setAppMap(appMap);
618
621
  perceptionManager.setLearningEngine(learningEngine);
622
+ const stateWatcher = new StateWatcher(worldModel, toolRegistry.toExecutor(), 2_000);
619
623
  // ── Reactive event loop: wire perception events to automatic responses ──
620
624
  // These fire at perception speed (100-300ms), not LLM speed (~2-3s).
621
625
  perceptionManager.on("dialog_detected", (event) => {
@@ -649,6 +653,28 @@ perceptionManager.on("app_switched", (event) => {
649
653
  // Log for observability
650
654
  console.error(`[reactive] App switched to ${event.bundleId} (pid=${event.pid})`);
651
655
  });
656
+ // ── Perception-triggered recovery: focus loss, app crash, stall ──
657
+ perceptionManager.on("focus_lost", (event) => {
658
+ console.error(`[reactive] Focus lost: expected ${event.expectedBundleId}, got ${event.actualBundleId} — auto-refocusing`);
659
+ // Auto-refocus the expected app
660
+ toolRegistry.toExecutor()("focus", { bundleId: event.expectedBundleId }).catch((err) => {
661
+ console.error(`[reactive] Auto-refocus failed: ${err instanceof Error ? err.message : err}`);
662
+ });
663
+ });
664
+ perceptionManager.on("app_crash", (event) => {
665
+ console.error(`[reactive] App crash detected: ${event.bundleId} (pid=${event.pid}) — auto-relaunching`);
666
+ // Auto-relaunch the crashed app
667
+ toolRegistry.toExecutor()("launch", { bundleId: event.bundleId }).catch((err) => {
668
+ console.error(`[reactive] Auto-relaunch failed: ${err instanceof Error ? err.message : err}`);
669
+ });
670
+ });
671
+ perceptionManager.on("stall_detected", (event) => {
672
+ console.error(`[reactive] UI stall detected: ${event.bundleId} — no changes for ${(event.stallMs / 1000).toFixed(0)}s — taking screenshot for diagnosis`);
673
+ // Take a screenshot so the next LLM call can see what's on screen
674
+ toolRegistry.toExecutor()("screenshot", {}).catch((err) => {
675
+ console.error(`[reactive] Stall screenshot failed: ${err instanceof Error ? err.message : err}`);
676
+ });
677
+ });
652
678
  const mcpRecorder = new McpPlaybookRecorder(playbooksDir);
653
679
  const referenceMerger = new ReferenceMerger(referencesDir);
654
680
  const communityPublisher = new PlaybookPublisher();
@@ -681,6 +707,7 @@ const MEMORY_TOOLS = new Set([
681
707
  ]);
682
708
  // Track the strategy we're currently following (for feedback loop)
683
709
  let activeStrategyFingerprint = null;
710
+ let autoExecutionInProgress = false; // guard against concurrent auto-execution
684
711
  let currentAdaptiveBudget = null;
685
712
  // Intercept all tool registrations to auto-log + auto-recall
686
713
  const _rawOriginalTool = server.tool.bind(server);
@@ -770,7 +797,7 @@ server.tool = (...args) => {
770
797
  if (!perceptionManager.isRunning && bridgeReady) {
771
798
  const focusApp = worldModel.getState().focusedApp;
772
799
  if (focusApp?.bundleId && focusApp?.pid) {
773
- perceptionManager.tryAutoStart(focusApp, bridge).catch(() => { });
800
+ perceptionManager.tryAutoStart(focusApp, bridge).catch((e) => { process.stderr.write(`[screenhand] perception auto-start failed: ${e instanceof Error ? e.message : String(e)}\n`); });
774
801
  installSafariEnricher(focusApp.bundleId);
775
802
  }
776
803
  }
@@ -821,7 +848,7 @@ server.tool = (...args) => {
821
848
  "type_with_fallback", "select_with_fallback", "scroll_with_fallback",
822
849
  ]);
823
850
  try {
824
- const result = await originalHandler(params, extra);
851
+ let result = await originalHandler(params, extra);
825
852
  const durationMs = Date.now() - start;
826
853
  // ── POST-CALL: log action (async, non-blocking) ──
827
854
  const entry = {
@@ -873,7 +900,9 @@ server.tool = (...args) => {
873
900
  try {
874
901
  appMap.recordPageTransition(postBundleIdForCtx, pageTransition.from, pageTransition.to, toolName);
875
902
  }
876
- catch { /* non-critical — don't break tool execution for nav tracking */ }
903
+ catch (e) {
904
+ process.stderr.write(`[screenhand] nav tracking failed: ${e instanceof Error ? e.message : String(e)}\n`);
905
+ }
877
906
  }
878
907
  // ── POST-CALL: detect focus drift ──
879
908
  const postBundleId = worldModel.getState().focusedApp?.bundleId ?? null;
@@ -947,7 +976,9 @@ server.tool = (...args) => {
947
976
  }
948
977
  }
949
978
  }
950
- catch { /* non-fatal */ }
979
+ catch (e) {
980
+ process.stderr.write(`[screenhand] app map feature learning failed: ${e instanceof Error ? e.message : String(e)}\n`);
981
+ }
951
982
  }
952
983
  if (!resultIsError && learnBundleId !== "unknown") {
953
984
  try {
@@ -1369,7 +1400,9 @@ server.tool = (...args) => {
1369
1400
  }
1370
1401
  }
1371
1402
  }
1372
- catch { /* hierarchy extraction non-fatal */ }
1403
+ catch (e) {
1404
+ process.stderr.write(`[screenhand] hierarchy extraction failed: ${e instanceof Error ? e.message : String(e)}\n`);
1405
+ }
1373
1406
  }
1374
1407
  }
1375
1408
  // ── Conditional UI visibility tracking (throttled) ──
@@ -1417,7 +1450,9 @@ server.tool = (...args) => {
1417
1450
  }
1418
1451
  }
1419
1452
  }
1420
- catch { /* visibility tracking non-fatal */ }
1453
+ catch (e) {
1454
+ process.stderr.write(`[screenhand] visibility tracking failed: ${e instanceof Error ? e.message : String(e)}\n`);
1455
+ }
1421
1456
  }
1422
1457
  }
1423
1458
  // ── Timing recording: track tool response times per element ──
@@ -1511,25 +1546,65 @@ server.tool = (...args) => {
1511
1546
  if (knownError) {
1512
1547
  hints.push(`⚡ Memory: "${toolName}" has failed before: "${knownError.error}" (${knownError.occurrences}x). Fix: ${knownError.resolution}`);
1513
1548
  }
1514
- // Suggest next step if we're mid-strategy
1549
+ // ── Strategy matching: auto-execute proven strategies OR hint unproven ones ──
1515
1550
  const recentTools = memory.getRecentToolNames();
1516
- const strategyHint = memory.quickStrategyHint(recentTools, worldModel.getState().focusedApp?.bundleId);
1517
- if (strategyHint) {
1518
- activeStrategyFingerprint = strategyHint.fingerprint;
1519
- const nextParams = Object.keys(strategyHint.nextStep.params).length > 0
1520
- ? `(${JSON.stringify(strategyHint.nextStep.params)})`
1521
- : "";
1522
- hints.push(`💡 Memory: This matches strategy "${strategyHint.strategy.task}" (${strategyHint.strategy.successCount} wins, ${strategyHint.strategy.failCount ?? 0} fails). Next step: ${strategyHint.nextStep.tool}${nextParams}`);
1523
- // If this was the last step of the strategy, record success
1524
- if (recentTools.length === strategyHint.strategy.steps.length - 1) {
1525
- // Next call will be the final step but this call completing means we're on track
1551
+ const currentBundleForStrategy = worldModel.getState().focusedApp?.bundleId;
1552
+ // Try auto-execution first (10+ successes, 0 failures)
1553
+ // Guard: skip if another auto-execution is already in progress
1554
+ const autoExec = autoExecutionInProgress ? null : memory.getAutoExecutableStrategy(recentTools, currentBundleForStrategy);
1555
+ if (autoExec) {
1556
+ autoExecutionInProgress = true;
1557
+ activeStrategyFingerprint = autoExec.fingerprint;
1558
+ const autoResults = [];
1559
+ let allOk = true;
1560
+ hints.push(`🚀 Auto-executing proven strategy "${autoExec.strategy.task}" (${autoExec.strategy.successCount} wins)${autoExec.remainingSteps.length} steps remaining`);
1561
+ for (const step of autoExec.remainingSteps) {
1562
+ try {
1563
+ const stepResult = await toolRegistry.toExecutor()(step.tool, step.params);
1564
+ autoResults.push({ tool: step.tool, ...stepResult });
1565
+ // Record outcome for learning
1566
+ const target = typeof step.params.target === "string" ? step.params.target
1567
+ : typeof step.params.title === "string" ? step.params.title
1568
+ : typeof step.params.text === "string" ? step.params.text
1569
+ : null;
1570
+ contextTracker.recordOutcome(step.tool, { target, text: typeof step.params.text === "string" ? step.params.text : null }, stepResult.ok, stepResult.ok ? null : (stepResult.error ?? null));
1571
+ if (!stepResult.ok) {
1572
+ allOk = false;
1573
+ hints.push(` ✗ ${step.tool} failed: ${stepResult.error ?? "unknown"}`);
1574
+ break; // Stop auto-execution on first failure
1575
+ }
1576
+ hints.push(` ✓ ${step.tool} — ok`);
1577
+ }
1578
+ catch (err) {
1579
+ allOk = false;
1580
+ hints.push(` ✗ ${step.tool} threw: ${err instanceof Error ? err.message : String(err)}`);
1581
+ break;
1582
+ }
1526
1583
  }
1527
- }
1528
- else if (activeStrategyFingerprint && recentTools.length > 0) {
1529
- // We were following a strategy but the sequence diverged — record success
1530
- // (the agent completed the strategy or went its own way after it)
1531
- memory.recordStrategyOutcome(activeStrategyFingerprint, true);
1584
+ // Record strategy outcome
1585
+ memory.recordStrategyOutcome(autoExec.fingerprint, allOk);
1532
1586
  activeStrategyFingerprint = null;
1587
+ autoExecutionInProgress = false;
1588
+ // Append auto-execution results to the response
1589
+ const autoSummary = autoResults.map((r) => `${r.tool}: ${r.ok ? "ok" : r.error}`).join("\n");
1590
+ const resultContent = Array.isArray(result?.content) ? result.content : [];
1591
+ resultContent.push({ type: "text", text: `\n── AUTO-EXECUTED (${autoResults.length} steps) ──\n${autoSummary}` });
1592
+ result = { ...result, content: resultContent };
1593
+ }
1594
+ else {
1595
+ // Fall back to strategy hint (suggest but don't execute)
1596
+ const strategyHint = memory.quickStrategyHint(recentTools, currentBundleForStrategy);
1597
+ if (strategyHint) {
1598
+ activeStrategyFingerprint = strategyHint.fingerprint;
1599
+ const nextParams = Object.keys(strategyHint.nextStep.params).length > 0
1600
+ ? `(${JSON.stringify(strategyHint.nextStep.params)})`
1601
+ : "";
1602
+ hints.push(`💡 Memory: This matches strategy "${strategyHint.strategy.task}" (${strategyHint.strategy.successCount} wins, ${strategyHint.strategy.failCount ?? 0} fails). Next step: ${strategyHint.nextStep.tool}${nextParams}`);
1603
+ }
1604
+ else if (activeStrategyFingerprint && recentTools.length > 0) {
1605
+ memory.recordStrategyOutcome(activeStrategyFingerprint, true);
1606
+ activeStrategyFingerprint = null;
1607
+ }
1533
1608
  }
1534
1609
  // Attach hints in BOTH content (visible) and _meta (for programmatic access)
1535
1610
  if (hints.length > 0) {
@@ -1745,7 +1820,9 @@ server.tool("focus", "Focus/activate an application (or a specific window by win
1745
1820
  targetApp = { bundleId, name: appWin.appName, pid: appWin.pid || appWin.ownerPid };
1746
1821
  }
1747
1822
  }
1748
- catch { /* ignore */ }
1823
+ catch (e) {
1824
+ process.stderr.write(`[screenhand] focus window check for ${bundleId} failed: ${e instanceof Error ? e.message : String(e)}\n`);
1825
+ }
1749
1826
  if (!targetApp) {
1750
1827
  return { content: [{ type: "text", text: `Error: ${bundleId} is not running. Use launch("${bundleId}") first.` }], isError: true };
1751
1828
  }
@@ -1816,10 +1893,14 @@ server.tool("focus", "Focus/activate an application (or a specific window by win
1816
1893
  await perceptionManager.ensureStarted(ctx);
1817
1894
  installSafariEnricher(bundleId);
1818
1895
  }
1819
- catch { /* best-effort */ }
1896
+ catch (e) {
1897
+ process.stderr.write(`[screenhand] perception ensureStarted in focus failed: ${e instanceof Error ? e.message : String(e)}\n`);
1898
+ }
1820
1899
  }
1821
1900
  }
1822
- catch { /* app.list failed — world model update is best-effort */ }
1901
+ catch (e) {
1902
+ process.stderr.write(`[screenhand] focus world-model update failed: ${e instanceof Error ? e.message : String(e)}\n`);
1903
+ }
1823
1904
  return { content: [{ type: "text", text: focusMsg }] };
1824
1905
  }
1825
1906
  finally {
@@ -1883,7 +1964,9 @@ server.tool("launch", "Launch an application. Chrome/Chromium browsers are launc
1883
1964
  await perceptionManager.ensureStarted({ bundleId, appName: r.appName ?? bundleId, pid: r.pid, windowTitle: "", ...(windowId != null ? { windowId } : {}) });
1884
1965
  installSafariEnricher(bundleId);
1885
1966
  }
1886
- catch { /* perception start is best-effort */ }
1967
+ catch (e) {
1968
+ process.stderr.write(`[screenhand] perception start after launch failed: ${e instanceof Error ? e.message : String(e)}\n`);
1969
+ }
1887
1970
  let msg = `Launched ${r.appName} pid=${r.pid}`;
1888
1971
  if (chromeAppName) {
1889
1972
  const port = cdpPort ?? 9222;
@@ -2112,7 +2195,9 @@ server.tool("ui_press", "PREFERRED: Find and press/click a UI element by its tit
2112
2195
  return { content: [{ type: "text", text: `Element "${title}" not found in PID ${pid}. A system dialog from "${front.name}" (${front.bundleId}, PID ${front.pid}) may be blocking. Dismiss it first, or use click(x, y) to interact with the dialog directly.` }], isError: true };
2113
2196
  }
2114
2197
  }
2115
- catch { /* ignore frontmost check failure */ }
2198
+ catch (e) {
2199
+ process.stderr.write(`[screenhand] frontmost check in ui_press failed: ${e instanceof Error ? e.message : String(e)}\n`);
2200
+ }
2116
2201
  throw new Error(`Element "${title}" not found (searched title, value, and description)`);
2117
2202
  }
2118
2203
  }
@@ -2333,7 +2418,9 @@ server.tool("type_text", "Type text using the keyboard. Auto-detects Electron ap
2333
2418
  catch { /* not available on this port */ }
2334
2419
  }
2335
2420
  }
2336
- catch { /* auto-detect is best-effort */ }
2421
+ catch (e) {
2422
+ process.stderr.write(`[screenhand] CDP auto-detect failed: ${e instanceof Error ? e.message : String(e)}\n`);
2423
+ }
2337
2424
  }
2338
2425
  if (electronCdpPort) {
2339
2426
  // CDP path: click editor to ensure focus, then type via key events
@@ -2396,7 +2483,9 @@ server.tool("key", "Press a key combination", {
2396
2483
  const front = await bridge.call("app.frontmost", {});
2397
2484
  targetPid = front.pid;
2398
2485
  }
2399
- catch { /* fallback to global posting */ }
2486
+ catch (e) {
2487
+ process.stderr.write(`[screenhand] key frontmost PID resolve failed: ${e instanceof Error ? e.message : String(e)}\n`);
2488
+ }
2400
2489
  }
2401
2490
  const keys = combo.split("+");
2402
2491
  const hasModifier = keys.some(k => ["cmd", "ctrl", "alt", "shift"].includes(k.toLowerCase()));
@@ -2466,7 +2555,9 @@ async function getCDPClient(tabId, overridePort) {
2466
2555
  try {
2467
2556
  perceptionManager.activateCDP(client);
2468
2557
  }
2469
- catch { /* best-effort */ }
2558
+ catch (e) {
2559
+ process.stderr.write(`[screenhand] perception CDP activate failed: ${e instanceof Error ? e.message : String(e)}\n`);
2560
+ }
2470
2561
  return { client, targetId: targetId, CDP: cdp, port };
2471
2562
  }
2472
2563
  // ── Random delay helper ──
@@ -3403,6 +3494,12 @@ server.tool("platform_explore", "Autonomously explore an app or website. Maps al
3403
3494
  // Compile and save
3404
3495
  const result = compileReference(platform, "web", tested, url);
3405
3496
  const filePath = saveExploreResult(referencesDir, result);
3497
+ // Auto-merge explore selectors into main reference so data isn't fragmented
3498
+ if (result.selectors && Object.keys(result.selectors).length > 0) {
3499
+ referenceMerger.mergeExploreSelectors(result.selectors, result.errors, "", platform);
3500
+ }
3501
+ // Hot-reload: make new data immediately available to context tracker
3502
+ _playbookStoreForContext.reload();
3406
3503
  return { content: [{ type: "text", text: `Exploration complete: ${filePath}\n\nElements found: ${elements.length}\nTested: ${result.testedElements}\nWorking selectors: ${result.workingSelectors}\nErrors: ${result.errors.length}\n\nKey discoveries:\n${result.keyDiscoveries.map(d => ` - ${d}`).join("\n")}` }] };
3407
3504
  }
3408
3505
  else if (bundleId) {
@@ -3424,6 +3521,12 @@ server.tool("platform_explore", "Autonomously explore an app or website. Maps al
3424
3521
  ...el, clickWorked: true, result: "discovered_not_tested",
3425
3522
  })), undefined, bundleId);
3426
3523
  const filePath = saveExploreResult(referencesDir, result);
3524
+ // Auto-merge explore selectors into main reference so data isn't fragmented
3525
+ if (result.selectors && Object.keys(result.selectors).length > 0) {
3526
+ referenceMerger.mergeExploreSelectors(result.selectors, result.errors, bundleId, platform);
3527
+ }
3528
+ // Hot-reload: make new data immediately available to context tracker
3529
+ _playbookStoreForContext.reload();
3427
3530
  return { content: [{ type: "text", text: `Native app exploration complete: ${filePath}\n\nElements discovered: ${elements.length}\n(Native elements discovered but not auto-clicked for safety. Use playbook_record to test interactively.)` }] };
3428
3531
  }
3429
3532
  else {
@@ -5037,7 +5140,9 @@ server.tool("scroll_with_fallback", "Scroll within an element or the active wind
5037
5140
  return { content: [{ type: "text", text: `"${target}" is visible after ${i} scroll(s).` }] };
5038
5141
  }
5039
5142
  }
5040
- catch { /* OCR failed, keep scrolling */ }
5143
+ catch (e) {
5144
+ process.stderr.write(`[screenhand] OCR during scroll search failed: ${e instanceof Error ? e.message : String(e)}\n`);
5145
+ }
5041
5146
  // Scroll once
5042
5147
  const deltaX = direction === "left" ? -scrollAmount : direction === "right" ? scrollAmount : 0;
5043
5148
  const deltaY = direction === "up" ? -scrollAmount : direction === "down" ? scrollAmount : 0;
@@ -5164,7 +5269,9 @@ server.tool("wait_for_state", "Wait until a condition is met on screen: text app
5164
5269
  await client.close();
5165
5270
  }
5166
5271
  }
5167
- catch { /* CDP unavailable */ }
5272
+ catch (e) {
5273
+ process.stderr.write(`[screenhand] wait_for_state CDP check failed: ${e instanceof Error ? e.message : String(e)}\n`);
5274
+ }
5168
5275
  }
5169
5276
  const elapsed = Date.now() - (deadline - timeout);
5170
5277
  lastCheck = `${elapsed}ms`;
@@ -5450,6 +5557,18 @@ function getJobRunner() {
5450
5557
  timeout: 15000,
5451
5558
  }).trim();
5452
5559
  });
5560
+ // Wire learning feedback: PlaybookEngine reports step outcomes to context tracker + AppMap
5561
+ playbookEngine.setOutcomeCallback((step, success, error) => {
5562
+ const target = typeof step.target === "string" ? step.target : null;
5563
+ contextTracker.recordOutcome(step.action, { target, text: step.text }, success, error);
5564
+ const bid = worldModel.getState().focusedApp?.bundleId ?? lastKnownBundleId;
5565
+ if (bid && target) {
5566
+ try {
5567
+ appMap.recordElementOutcome(bid, "auto", target, success);
5568
+ }
5569
+ catch { /* non-critical */ }
5570
+ }
5571
+ });
5453
5572
  activeJobRunner = new JobRunner(bridge, jobManager, leaseManager, supervisor, (() => {
5454
5573
  const cfg = {
5455
5574
  hasCDP: cdpPort !== null,
@@ -5626,10 +5745,22 @@ originalTool("plan_execute", "Run a plan automatically. Known steps (from playbo
5626
5745
  if (!goal) {
5627
5746
  return { content: [{ type: "text", text: `Goal not found: ${goalId}` }] };
5628
5747
  }
5629
- const adaptiveBudget = learningEngine.getAdaptiveBudget(worldModel.getState().focusedApp?.bundleId ?? "unknown");
5748
+ const focusedBundleId = worldModel.getState().focusedApp?.bundleId ?? "unknown";
5749
+ const adaptiveBudget = learningEngine.getAdaptiveBudget(focusedBundleId);
5630
5750
  const executor = new PlanExecutor(worldModel, planner, toolRegistry.toExecutor(), { postconditionWaitMs: adaptiveBudget.verifyMs, defaultStepTimeout: Math.max(30_000, adaptiveBudget.actMs * 2) }, recoveryEngine, learningEngine);
5631
5751
  executor.setAppMap(appMap);
5632
- const result = await executor.executeGoal(goal);
5752
+ // Enable perception-triggered recovery during plan execution
5753
+ perceptionManager.setExpectedApp(focusedBundleId);
5754
+ perceptionManager.startStallDetection(30_000);
5755
+ let result;
5756
+ try {
5757
+ result = await executor.executeGoal(goal);
5758
+ }
5759
+ finally {
5760
+ // Disable reactive recovery after plan completes
5761
+ perceptionManager.setExpectedApp(null);
5762
+ perceptionManager.stopStallDetection();
5763
+ }
5633
5764
  goalStore.update(goalId, goal);
5634
5765
  // Check if paused at an LLM step
5635
5766
  if ("paused" in result) {
@@ -5668,7 +5799,25 @@ originalTool("plan_execute", "Run a plan automatically. Known steps (from playbo
5668
5799
  }
5669
5800
  }
5670
5801
  }
5671
- catch { /* strategy recording is best-effort */ }
5802
+ catch (e) {
5803
+ process.stderr.write(`[screenhand] strategy recording failed: ${e instanceof Error ? e.message : String(e)}\n`);
5804
+ }
5805
+ // Self-improving plans: refine and check for graduation
5806
+ try {
5807
+ const refinement = planRefiner.refine(goal, result);
5808
+ if (refinement.refinementCount > 0) {
5809
+ process.stderr.write(`[plan-refiner] Refined plan for "${goal.description}" (${refinement.refinementCount}x)\n`);
5810
+ }
5811
+ // Check graduation to playbook (3+ refinements)
5812
+ const playbook = planRefiner.checkGraduation(goal.description, focusedBundleId, worldModel.getState().focusedApp?.appName ?? focusedBundleId);
5813
+ if (playbook) {
5814
+ _playbookStoreForContext.save(playbook);
5815
+ process.stderr.write(`[plan-refiner] Plan GRADUATED to playbook: ${playbook.id}\n`);
5816
+ }
5817
+ }
5818
+ catch (e) {
5819
+ process.stderr.write(`[plan-refiner] Refinement failed: ${e instanceof Error ? e.message : String(e)}\n`);
5820
+ }
5672
5821
  }
5673
5822
  const lines = [
5674
5823
  result.success ? "Goal completed successfully." : `Goal failed: ${result.error}`,
@@ -6429,6 +6578,88 @@ server.tool("observer_ocr_roi", "Submit a targeted ROI OCR command to the runnin
6429
6578
  return { content: [{ type: "text", text: `ROI OCR command submitted: ${id}\nRegion: (${x}, ${y}, ${width}×${height})\nThe daemon will process this on its next cycle. Call observer_ocr_roi with commandId="${id}" to poll the result.` }] };
6430
6579
  });
6431
6580
  // ═══════════════════════════════════════════════
6581
+ // STATE WATCHER — Continuous observation event bus
6582
+ // ═══════════════════════════════════════════════
6583
+ server.tool("watch_start", "Start the state watcher polling loop. Evaluates registered watch rules every 2s against the world model.", {}, async () => {
6584
+ stateWatcher.start();
6585
+ const rules = stateWatcher.getRules();
6586
+ return { content: [{ type: "text", text: `State watcher started. ${rules.length} rules registered.` }] };
6587
+ });
6588
+ server.tool("watch_stop", "Stop the state watcher polling loop.", {}, async () => {
6589
+ stateWatcher.stop();
6590
+ return { content: [{ type: "text", text: "State watcher stopped." }] };
6591
+ });
6592
+ server.tool("watch_register", "Register a watch rule: when element with matching title appears, execute an action. Use for automated responses to known UI states.", {
6593
+ id: z.string().describe("Unique rule ID"),
6594
+ elementTitle: z.string().describe("UI element title/label to watch for (case-insensitive substring match)"),
6595
+ actionTool: z.string().describe("Tool to execute when element appears (e.g. click_text, key)"),
6596
+ actionParams: z.record(z.string(), z.unknown()).describe("Params for the action tool"),
6597
+ bundleId: z.string().optional().describe("Only match when this app is focused"),
6598
+ maxFires: z.number().optional().describe("Max times to fire (0=unlimited, default=1)"),
6599
+ }, async ({ id, elementTitle, actionTool, actionParams, bundleId, maxFires }) => {
6600
+ // Validate tool exists and is safe for automated execution
6601
+ const BLOCKED_WATCH_TOOLS = new Set(["applescript", "browser_js", "browser_stealth"]);
6602
+ if (BLOCKED_WATCH_TOOLS.has(actionTool)) {
6603
+ return { content: [{ type: "text", text: `Tool "${actionTool}" is not allowed in watch rules (security: prevents arbitrary code execution)` }], isError: true };
6604
+ }
6605
+ if (!toolRegistry.has(actionTool)) {
6606
+ return { content: [{ type: "text", text: `Unknown tool: "${actionTool}"` }], isError: true };
6607
+ }
6608
+ stateWatcher.watchForElement(id, elementTitle, { tool: actionTool, params: actionParams }, bundleId);
6609
+ if (maxFires !== undefined) {
6610
+ const rules = stateWatcher.getRules();
6611
+ const rule = rules.find((r) => r.id === id);
6612
+ if (rule) {
6613
+ // Update maxFires on the registered rule
6614
+ const ruleState = stateWatcher.rules.get(id);
6615
+ if (ruleState)
6616
+ ruleState.rule.maxFires = maxFires;
6617
+ }
6618
+ }
6619
+ return { content: [{ type: "text", text: `Watch rule "${id}" registered: when "${elementTitle}" appears → ${actionTool}(${JSON.stringify(actionParams)})` }] };
6620
+ });
6621
+ server.tool("watch_dialog", "Register a dialog watch rule: when a dialog matching the pattern appears, auto-execute an action.", {
6622
+ id: z.string().describe("Unique rule ID"),
6623
+ titlePattern: z.string().describe("Regex pattern to match dialog titles"),
6624
+ actionTool: z.string().describe("Tool to execute (e.g. click_text, key)"),
6625
+ actionParams: z.record(z.string(), z.unknown()).describe("Params for the action tool"),
6626
+ }, async ({ id, titlePattern, actionTool, actionParams }) => {
6627
+ // Validate regex — reject patterns that could cause ReDoS
6628
+ let regex;
6629
+ try {
6630
+ regex = new RegExp(titlePattern, "i");
6631
+ // Quick sanity check — if it takes >50ms on a test string, reject
6632
+ const testStr = "a".repeat(100);
6633
+ const t0 = Date.now();
6634
+ regex.test(testStr);
6635
+ if (Date.now() - t0 > 50) {
6636
+ return { content: [{ type: "text", text: `Rejected: regex pattern "${titlePattern}" is too expensive (potential ReDoS)` }], isError: true };
6637
+ }
6638
+ }
6639
+ catch (e) {
6640
+ return { content: [{ type: "text", text: `Invalid regex: ${e instanceof Error ? e.message : String(e)}` }], isError: true };
6641
+ }
6642
+ stateWatcher.watchForDialog(id, regex, { tool: actionTool, params: actionParams });
6643
+ return { content: [{ type: "text", text: `Dialog watch "${id}" registered: /${titlePattern}/i → ${actionTool}(${JSON.stringify(actionParams)})` }] };
6644
+ });
6645
+ server.tool("watch_unregister", "Remove a watch rule by ID.", {
6646
+ id: z.string().describe("Rule ID to remove"),
6647
+ }, async ({ id }) => {
6648
+ const removed = stateWatcher.unregister(id);
6649
+ return { content: [{ type: "text", text: removed ? `Rule "${id}" removed.` : `Rule "${id}" not found.` }] };
6650
+ });
6651
+ server.tool("watch_status", "Get all registered watch rules and their fire counts.", {}, async () => {
6652
+ const rules = stateWatcher.getRules();
6653
+ const running = stateWatcher.isRunning;
6654
+ const lines = [
6655
+ `State watcher: ${running ? "running" : "stopped"}`,
6656
+ `Rules: ${rules.length}`,
6657
+ "",
6658
+ ...rules.map((r) => ` [${r.id}] ${r.description} (fired ${r.fireCount}x)`),
6659
+ ];
6660
+ return { content: [{ type: "text", text: lines.join("\n") }] };
6661
+ });
6662
+ // ═══════════════════════════════════════════════
6432
6663
  // PHASE 6: TOOL MASTERY — Ingestion + Community
6433
6664
  // ═══════════════════════════════════════════════
6434
6665
  server.tool("scan_menu_bar", "Scan an app's menu bar via AX tree. Extracts all menu paths, keyboard shortcuts, and enabled/disabled states. Automatically merges discovered shortcuts into the reference file.", {
@@ -6490,6 +6721,8 @@ server.tool("scan_menu_bar", "Scan an app's menu bar via AX tree. Extracts all m
6490
6721
  });
6491
6722
  }
6492
6723
  }
6724
+ // Hot-reload: make new reference data immediately available to context tracker
6725
+ _playbookStoreForContext.reload();
6493
6726
  let output = lines.join("\n") + bootstrapInfo;
6494
6727
  output = redactUsername(output);
6495
6728
  output = output.replace(/Log Out [^\n:]+/g, "Log Out [USER]");
@@ -6553,6 +6786,8 @@ server.tool("ingest_documentation", "Parse a documentation page (HTML, markdown,
6553
6786
  }
6554
6787
  }
6555
6788
  }
6789
+ // Hot-reload: make new reference data immediately available to context tracker
6790
+ _playbookStoreForContext.reload();
6556
6791
  return { content: [{ type: "text", text: lines.join("\n") }] };
6557
6792
  });
6558
6793
  server.tool("ingest_tutorial", "Extract structured playbook steps from a video transcript (e.g. YouTube captions). Converts tutorial narration into actionable automation steps with tool mappings.", {
@@ -6643,6 +6878,8 @@ server.tool("discover_features", "Extract features from an app's official websit
6643
6878
  lines.push(` [${f.category}] ${f.name}: ${f.description}`);
6644
6879
  }
6645
6880
  }
6881
+ // Hot-reload: make new reference data immediately available to context tracker
6882
+ _playbookStoreForContext.reload();
6646
6883
  return { content: [{ type: "text", text: lines.join("\n") }] };
6647
6884
  });
6648
6885
  server.tool("coverage_report", "Check what ScreenHand knows about an app: shortcuts, selectors, flows, playbooks, error patterns, and stability %. Useful before complex workflows to decide strategy: learn first (if empty), go fast (if high coverage), or use fallback tools (if error patterns exist). Optional for quick actions.", {
@@ -6751,10 +6988,14 @@ originalTool("community_fetch", "Search community playbooks for a platform or wo
6751
6988
  // START
6752
6989
  // ═══════════════════════════════════════════════
6753
6990
  async function main() {
6754
- // Flush playbook learnings on graceful shutdown
6755
- process.on("SIGINT", () => { void perceptionManager.stop(); contextTracker.flush(); learningEngine.flush(); appMap.flush(); process.exit(0); });
6756
- process.on("SIGTERM", () => { void perceptionManager.stop(); contextTracker.flush(); learningEngine.flush(); appMap.flush(); process.exit(0); });
6757
- process.on("beforeExit", () => { void perceptionManager.stop(); contextTracker.flush(); learningEngine.flush(); appMap.flush(); });
6991
+ // Flush all learned state on shutdown (signals, stdin EOF, or normal exit)
6992
+ const flushAll = () => { void perceptionManager.stop(); perceptionManager.stopStallDetection(); stateWatcher.stop(); contextTracker.flush(); learningEngine.flush(); appMap.flush(); };
6993
+ process.on("SIGINT", () => { flushAll(); process.exit(0); });
6994
+ process.on("SIGTERM", () => { flushAll(); process.exit(0); });
6995
+ process.on("beforeExit", flushAll);
6996
+ // MCP clients often close stdin without sending a signal — flush on stdin end too
6997
+ process.stdin.on("end", () => { flushAll(); process.exit(0); });
6998
+ process.stdin.on("close", () => { flushAll(); process.exit(0); });
6758
6999
  const transport = new StdioServerTransport();
6759
7000
  await server.connect(transport);
6760
7001
  }
@@ -75,9 +75,11 @@ export class PlaybookPublisher {
75
75
  return null;
76
76
  }
77
77
  writeFileAtomicSync(filePath, JSON.stringify(shared, null, 2) + "\n");
78
- // Best-effort sync to remote API
78
+ // Best-effort sync to remote API — log failures so user knows data didn't leave machine
79
79
  if (this.remote) {
80
- void this.remote.publish(shared).catch(() => { });
80
+ void this.remote.publish(shared).catch((err) => {
81
+ process.stderr.write(`[screenhand] Remote publish failed: ${err instanceof Error ? err.message : String(err)}\n`);
82
+ });
81
83
  }
82
84
  return shared;
83
85
  }
@@ -45,7 +45,7 @@ const BUNDLE_ID_TOOLS = new Set([
45
45
  ]);
46
46
  // Tools that carry a target/selector in their params
47
47
  const TARGET_PARAM_NAMES = ["selector", "target", "text", "label", "placeholder"];
48
- const FLUSH_THRESHOLD = 50;
48
+ const FLUSH_THRESHOLD = 5;
49
49
  const MIN_OCCURRENCES_TO_PROMOTE = 2;
50
50
  export class ContextTracker {
51
51
  store;
@@ -176,8 +176,10 @@ export class ContextTracker {
176
176
  // Only for known browser bundleIds
177
177
  const BROWSER_BUNDLE_IDS = new Set([
178
178
  "com.apple.Safari", "com.brave.Browser",
179
+ "com.google.Chrome", "com.google.Chrome.canary",
179
180
  "org.chromium.Chromium", "com.vivaldi.Vivaldi",
180
- "com.operasoftware.Opera",
181
+ "com.operasoftware.Opera", "company.thebrowser.Browser",
182
+ "org.mozilla.firefox", "org.mozilla.firefoxdeveloperedition",
181
183
  ]);
182
184
  if (!BROWSER_BUNDLE_IDS.has(bundleId))
183
185
  return;
@@ -346,18 +348,46 @@ export class ContextTracker {
346
348
  flush() {
347
349
  if (this.learnings.length === 0)
348
350
  return;
349
- if (!this.context?.playbook) {
351
+ if (!this.context) {
350
352
  this.learnings = [];
351
353
  this.actionCount = 0;
352
354
  return;
353
355
  }
356
+ // If no playbook matched, create a stub so learnings aren't discarded.
357
+ // This is the fix for "train on unknown app → restart → everything gone".
358
+ if (!this.context.playbook) {
359
+ const domain = this.context.domain;
360
+ const platform = domain.replace(/^native:/, "").split(".").pop() ?? domain;
361
+ const isNative = domain.startsWith("native:");
362
+ const stub = {
363
+ id: platform + "-learned",
364
+ name: `${platform} — Auto-Learned`,
365
+ description: `Selectors and errors learned from live interaction with ${platform}`,
366
+ platform,
367
+ ...(isNative ? { bundleId: domain.replace(/^native:/, "") } : {}),
368
+ version: "1.0.0",
369
+ steps: [],
370
+ tags: [platform, "auto-learned"],
371
+ successCount: 0,
372
+ failCount: 0,
373
+ selectors: {},
374
+ errors: [],
375
+ };
376
+ this.store.save(stub);
377
+ this.context.playbook = stub;
378
+ this.context.allSelectors = new Map();
379
+ }
354
380
  const playbook = this.context.playbook;
355
381
  let changed = false;
356
- // ── Promote selectors that worked 2+ times ──
382
+ // ── Promote targets that worked 2+ times ──
383
+ // Accepts CSS selectors AND AX targets (plain text labels like "New Note").
384
+ // Only rejects strings that look like event handlers or raw coordinates.
357
385
  const selectorSuccessCount = new Map();
358
386
  for (const l of this.learnings) {
359
- if (l.success && l.target && /^[#.\[]|^[a-z]+[\[.#\s>+~]/.test(l.target) &&
360
- !/\bon\w+\s*=/i.test(l.target)) {
387
+ if (l.success && l.target &&
388
+ !/\bon\w+\s*=/i.test(l.target) &&
389
+ !/^\d+,\d+$/.test(l.target) &&
390
+ l.target.length >= 2 && l.target.length <= 200) {
361
391
  const key = l.target;
362
392
  selectorSuccessCount.set(key, (selectorSuccessCount.get(key) ?? 0) + 1);
363
393
  }