screenhand 0.3.2 → 0.3.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -55,7 +55,7 @@ import { WorldModel } from "./src/state/index.js";
55
55
  import { PerceptionManager } from "./src/perception/index.js";
56
56
  import { Planner, PlanExecutor, GoalStore, ToolRegistry } from "./src/planner/index.js";
57
57
  import { RecoveryEngine } from "./src/recovery/index.js";
58
- import { LearningEngine } from "./src/learning/index.js";
58
+ import { LearningEngine, LocatorPolicy } from "./src/learning/index.js";
59
59
  import { discoverWebElements, testWebElement, compileReference, saveExploreResult, discoverNativeElements } from "./src/platform/explorer.js";
60
60
  import { buildDocUrls, crawlPage, compileLearnResult, saveLearnResult } from "./src/platform/learner.js";
61
61
  import { AccessibilityAdapter } from "./src/runtime/accessibility-adapter.js";
@@ -234,6 +234,10 @@ let CDP = null;
234
234
  async function ensureCDP(overridePort) {
235
235
  if (!CDP)
236
236
  CDP = (await import("chrome-remote-interface")).default;
237
+ // Validate port range (defense in depth — Zod validates at MCP boundary, this catches internal callers)
238
+ if (overridePort && (overridePort < 9222 || overridePort > 9999)) {
239
+ throw new Error(`Invalid CDP port ${overridePort} — must be 9222-9999`);
240
+ }
237
241
  // If caller specified a port, use it directly (e.g. 9333 for Electron apps)
238
242
  if (overridePort) {
239
243
  try {
@@ -411,6 +415,17 @@ let lastSuccessfulToolName = "unknown";
411
415
  let lastKnownBundleId = null;
412
416
  contextTracker.setAppMap(appMap);
413
417
  perceptionManager.setAppMap(appMap);
418
+ // Wire F10: connect ContextTracker to PerceptionCoordinator for per-app perception config
419
+ perceptionManager.setContextTracker(contextTracker);
420
+ // Wire #11: connect TopologyPolicy to AppMap for unified edge scoring
421
+ appMap.setTopologyPolicy(learningEngine.topology);
422
+ // Wire #14: seed TimingModel from AppMap's stored timing profiles (cold-start bootstrap)
423
+ learningEngine.seedTimingFromAppMap(appMap);
424
+ // Wire F5-F7: Cold-start bootstrap — seed all learning policies from AppMap data
425
+ learningEngine.seedLocatorsFromAppMap(appMap);
426
+ learningEngine.seedSensorsFromReadySignals(appMap);
427
+ learningEngine.seedPatternsFromAppMap(appMap);
428
+ learningEngine.seedRecoveryFromContracts(appMap);
414
429
  const _executablePlaybookStore = new PlaybookStore(playbooksDir);
415
430
  try {
416
431
  _executablePlaybookStore.load();
@@ -422,7 +437,9 @@ goalStore.init();
422
437
  const toolRegistry = new ToolRegistry();
423
438
  const recoveryEngine = new RecoveryEngine(worldModel, toolRegistry.toExecutor(), memory);
424
439
  recoveryEngine.setLearningEngine(learningEngine);
440
+ recoveryEngine.setAppMap(appMap);
425
441
  planner.setToolRegistry(toolRegistry);
442
+ planner.setAppMap(appMap);
426
443
  perceptionManager.setLearningEngine(learningEngine);
427
444
  const mcpRecorder = new McpPlaybookRecorder(playbooksDir);
428
445
  const referenceMerger = new ReferenceMerger(referencesDir);
@@ -513,6 +530,20 @@ server.tool = (...args) => {
513
530
  perceptionManager.notifyToolCall();
514
531
  // ── PRE-CALL: check for known error warnings (~0ms, in-memory) ──
515
532
  const knownError = memory.quickErrorCheck(toolName);
533
+ // Wire F11: Block execution for tools that fail repeatedly with known resolution (L2→L1)
534
+ // Exclude playbook-seeded errors (id starts with pb_err_) — those are generic platform warnings,
535
+ // not errors observed in this session. Only block on real runtime failures.
536
+ // Also exclude errors injected via memory_record_error API (empty params) — only runtime errors
537
+ // from the intelligence wrapper (which always have populated params) should trigger blocks.
538
+ const isRuntimeError = knownError && typeof knownError.params === "object" && knownError.params !== null && Object.keys(knownError.params).length > 0;
539
+ if (knownError && knownError.occurrences >= 5 && knownError.resolution && !knownError.id.startsWith("pb_err_") && isRuntimeError) {
540
+ return {
541
+ content: [{
542
+ type: "text",
543
+ text: `⛔ Blocked: "${toolName}" has failed ${knownError.occurrences}x with: "${knownError.error}". Known fix: ${knownError.resolution}. Apply the fix first, then retry.`,
544
+ }],
545
+ };
546
+ }
516
547
  // ── PRE-CALL: auto-start perception if not running ──
517
548
  if (!perceptionManager.isRunning && bridgeReady) {
518
549
  const focusApp = worldModel.getState().focusedApp;
@@ -548,6 +579,9 @@ server.tool = (...args) => {
548
579
  else if (typeof paramBundleId === "string" && paramBundleId) {
549
580
  lastKnownBundleId = paramBundleId;
550
581
  }
582
+ // Snapshot the bundleId for this tool's POST-CALL, so concurrent PRE-CALL
583
+ // overwrites of lastKnownBundleId don't contaminate this tool's context
584
+ const postCallBundleId = preBundleId ?? lastKnownBundleId;
551
585
  // Capture pre-call window title for navigation edge tracking
552
586
  const preWindowTitle = worldModel.getFocusedWindow()?.title.value ?? null;
553
587
  // Action tools = actually doing something. Navigation = just clicking around.
@@ -578,7 +612,7 @@ server.tool = (...args) => {
578
612
  contextTracker.recordOutcome(toolName, safeParams, true, null);
579
613
  // ── POST-CALL: Safari context gap + page context update ──
580
614
  const postFocusApp = worldModel.getState().focusedApp;
581
- const postBundleIdForCtx = postFocusApp?.bundleId ?? lastKnownBundleId;
615
+ const postBundleIdForCtx = postFocusApp?.bundleId ?? postCallBundleId;
582
616
  if (postBundleIdForCtx) {
583
617
  lastKnownBundleId = postBundleIdForCtx;
584
618
  // Try focused window first, then search all windows for matching bundleId
@@ -622,7 +656,7 @@ server.tool = (...args) => {
622
656
  }
623
657
  }
624
658
  // ── POST-CALL: feed learning engine (timing + locator outcomes) ──
625
- const learnBundleId = worldModel.getState().focusedApp?.bundleId ?? lastKnownBundleId ?? "unknown";
659
+ const learnBundleId = worldModel.getState().focusedApp?.bundleId ?? postCallBundleId ?? "unknown";
626
660
  learningEngine.recordToolTiming({ tool: toolName, bundleId: learnBundleId, durationMs, success: true });
627
661
  // Record locator outcome if the tool used a target/selector
628
662
  const locatorTarget = safeParams.target ?? safeParams.selector ?? safeParams.locator
@@ -901,14 +935,17 @@ server.tool = (...args) => {
901
935
  if (fromNode !== toNode) {
902
936
  appMap.addNavNode(learnBundleId, fromNode, { type: "window", description: fromNode });
903
937
  appMap.addNavNode(learnBundleId, toNode, { type: "window", description: toNode });
904
- appMap.recordEdgeOutcome(learnBundleId, fromNode, locatorTarget ?? toolName, toNode, true);
938
+ const locatorSlug = locatorTarget ? String(locatorTarget).slice(0, 80) : null;
939
+ const edgeAction = locatorSlug ? `${toolName}:${locatorSlug}` : toolName;
940
+ // Wire #11: record topology FIRST so AppMap can read the updated Bayesian score
905
941
  learningEngine.recordTopologyOutcome({
906
942
  bundleId: learnBundleId,
907
943
  fromNode,
908
- action: locatorTarget ?? toolName,
944
+ action: edgeAction,
909
945
  toNode,
910
946
  success: true,
911
947
  });
948
+ appMap.recordEdgeOutcome(learnBundleId, fromNode, edgeAction, toNode, true);
912
949
  }
913
950
  }
914
951
  // ── State machine: detect state changes from tool results ──
@@ -1266,7 +1303,7 @@ server.tool = (...args) => {
1266
1303
  // ── Record failure for playbook learning (in-memory only) ──
1267
1304
  contextTracker.recordOutcome(toolName, safeParams, false, errorMsg);
1268
1305
  // ── Feed learning engine (failure timing + locator) ──
1269
- const learnBundleIdErr = worldModel.getState().focusedApp?.bundleId ?? lastKnownBundleId ?? "unknown";
1306
+ const learnBundleIdErr = worldModel.getState().focusedApp?.bundleId ?? postCallBundleId ?? "unknown";
1270
1307
  learningEngine.recordToolTiming({ tool: toolName, bundleId: learnBundleIdErr, durationMs, success: false });
1271
1308
  const failedLocator = safeParams.target ?? safeParams.selector ?? safeParams.locator
1272
1309
  ?? (toolName === "click_text" ? safeParams.text : undefined);
@@ -1422,7 +1459,7 @@ server.tool("windows", "List all visible windows with IDs, positions, and sizes"
1422
1459
  return { content: [{ type: "text", text: lines.join("\n") }] };
1423
1460
  });
1424
1461
  server.tool("focus", "Focus/activate an application (or a specific window by windowId)", {
1425
- bundleId: z.string().describe("App bundle ID, e.g. com.apple.Safari"),
1462
+ bundleId: z.string().regex(/^[a-zA-Z0-9._-]+$/, "Invalid bundleId format").describe("App bundle ID, e.g. com.apple.Safari"),
1426
1463
  windowId: z.number().optional().describe("Specific window ID from windows() — raises that exact window. Use when multiple instances of the same app exist."),
1427
1464
  }, async ({ bundleId, windowId }) => {
1428
1465
  await ensureBridge();
@@ -1528,8 +1565,8 @@ server.tool("focus", "Focus/activate an application (or a specific window by win
1528
1565
  }
1529
1566
  });
1530
1567
  server.tool("launch", "Launch an application. Chrome/Chromium browsers are launched with CDP enabled (port 9222) for browser_* tools.", {
1531
- bundleId: z.string().describe("App bundle ID"),
1532
- cdpPort: z.number().optional().describe("CDP port for Chrome/Chromium (default: 9222). Ignored for non-browser apps."),
1568
+ bundleId: z.string().regex(/^[a-zA-Z0-9._-]+$/, "Invalid bundleId format").describe("App bundle ID"),
1569
+ cdpPort: z.number().min(9222).max(9999).optional().describe("CDP port for Chrome/Chromium (default: 9222). Ignored for non-browser apps."),
1533
1570
  }, async ({ bundleId, cdpPort }) => {
1534
1571
  await ensureBridge();
1535
1572
  const riskyBundleIds = {
@@ -1930,7 +1967,7 @@ server.tool("click_text", "SLOW fallback: Find text on screen via OCR and click
1930
1967
  server.tool("type_text", "Type text using the keyboard. Auto-detects Electron apps and routes through CDP for reliable editor input.", {
1931
1968
  text: z.string().describe("Text to type"),
1932
1969
  pid: z.number().optional().describe("Target process ID for PID-targeted event delivery"),
1933
- cdpPort: z.number().optional().describe("CDP port for Electron apps (e.g. 9229). When set, types via CDP instead of AX — fixes Copilot/panel focus theft."),
1970
+ cdpPort: z.number().min(9222).max(9999).optional().describe("CDP port for Electron apps (e.g. 9229). When set, types via CDP instead of AX — fixes Copilot/panel focus theft."),
1934
1971
  }, async ({ text, pid, cdpPort: portOverride }) => {
1935
1972
  await ensureBridge();
1936
1973
  // Auto-resolve frontmost PID when none provided — global HID posting
@@ -2178,7 +2215,7 @@ function randomDelay(min, max) {
2178
2215
  // BROWSER — control Chrome pages via CDP (10ms, not OCR)
2179
2216
  // ═══════════════════════════════════════════════
2180
2217
  server.tool("browser_tabs", "List all open Chrome/Electron tabs. Use cdpPort to connect to a specific app (e.g. 9333 for Codex Desktop).", {
2181
- cdpPort: z.number().optional().describe("CDP port override (e.g. 9333 for Electron apps). Omit to auto-detect."),
2218
+ cdpPort: z.number().min(9222).max(9999).optional().describe("CDP port override (e.g. 9333 for Electron apps). Omit to auto-detect."),
2182
2219
  }, async ({ cdpPort: portOverride }) => {
2183
2220
  const { CDP: cdp, port } = await ensureCDP(portOverride);
2184
2221
  const targets = await cdp.List({ port });
@@ -2188,7 +2225,7 @@ server.tool("browser_tabs", "List all open Chrome/Electron tabs. Use cdpPort to
2188
2225
  });
2189
2226
  server.tool("browser_open", "Open a URL in Chrome/Electron (creates new tab)", {
2190
2227
  url: z.string().describe("URL to open"),
2191
- cdpPort: z.number().optional().describe("CDP port override (e.g. 9333 for Electron apps)"),
2228
+ cdpPort: z.number().min(9222).max(9999).optional().describe("CDP port override (e.g. 9333 for Electron apps)"),
2192
2229
  }, async ({ url, cdpPort: portOverride }) => {
2193
2230
  // L2-71 fix: Block dangerous URL protocols
2194
2231
  const BLOCKED_PROTOCOLS = ["javascript:", "data:", "blob:", "vbscript:"];
@@ -2212,7 +2249,7 @@ server.tool("browser_open", "Open a URL in Chrome/Electron (creates new tab)", {
2212
2249
  server.tool("browser_navigate", "Navigate the active Chrome/Electron tab to a URL", {
2213
2250
  url: z.string().describe("URL to navigate to"),
2214
2251
  tabId: z.string().optional().describe("Tab ID (from browser_tabs). Omit for most recent tab."),
2215
- cdpPort: z.number().optional().describe("CDP port override (e.g. 9333 for Electron apps)"),
2252
+ cdpPort: z.number().min(9222).max(9999).optional().describe("CDP port override (e.g. 9333 for Electron apps)"),
2216
2253
  }, async ({ url, tabId, cdpPort: portOverride }) => {
2217
2254
  // L2-71 fix: Block dangerous URL protocols that could execute arbitrary code
2218
2255
  const BLOCKED_PROTOCOLS = ["javascript:", "data:", "blob:", "vbscript:"];
@@ -2257,7 +2294,7 @@ server.tool("browser_navigate", "Navigate the active Chrome/Electron tab to a UR
2257
2294
  server.tool("browser_js", "Execute JavaScript in a Chrome/Electron tab. Returns the result. WARNING: This runs arbitrary JS in the browser context — avoid on sensitive pages (banking, email). All executions are audit-logged.", {
2258
2295
  code: z.string().describe("JavaScript to execute. Must be an expression that returns a value. Use (() => { ... })() for multi-line."),
2259
2296
  tabId: z.string().optional().describe("Tab ID. Omit for most recent tab."),
2260
- cdpPort: z.number().optional().describe("CDP port override (e.g. 9333 for Electron apps)"),
2297
+ cdpPort: z.number().min(9222).max(9999).optional().describe("CDP port override (e.g. 9333 for Electron apps)"),
2261
2298
  }, async ({ code, tabId, cdpPort: portOverride }) => {
2262
2299
  auditLog("browser_js", { code, tabId });
2263
2300
  const { CDP: cdp, port } = await ensureCDP(portOverride);
@@ -2291,7 +2328,7 @@ server.tool("browser_dom", "Query the DOM of a Chrome/Electron page. Returns mat
2291
2328
  selector: z.string().describe("CSS selector, e.g. 'button', '.nav a', '#main h2'"),
2292
2329
  tabId: z.string().optional().describe("Tab ID. Omit for most recent tab."),
2293
2330
  limit: z.number().optional().describe("Max results (default 20)"),
2294
- cdpPort: z.number().optional().describe("CDP port override (e.g. 9333 for Electron apps)"),
2331
+ cdpPort: z.number().min(9222).max(9999).optional().describe("CDP port override (e.g. 9333 for Electron apps)"),
2295
2332
  }, async ({ selector, tabId, limit, cdpPort: portOverride }) => {
2296
2333
  // Capture bundleId before any async CDP calls to avoid race condition
2297
2334
  const browserBundleId = worldModel.getState().focusedApp?.bundleId ?? "com.google.Chrome";
@@ -2342,7 +2379,7 @@ server.tool("browser_dom", "Query the DOM of a Chrome/Electron page. Returns mat
2342
2379
  server.tool("browser_click", "Click an element in Chrome/Electron by CSS selector. Uses CDP Input.dispatchMouseEvent for realistic mouse events.", {
2343
2380
  selector: z.string().describe("CSS selector of element to click"),
2344
2381
  tabId: z.string().optional().describe("Tab ID. Omit for most recent tab."),
2345
- cdpPort: z.number().optional().describe("CDP port override (e.g. 9333 for Electron apps)"),
2382
+ cdpPort: z.number().min(9222).max(9999).optional().describe("CDP port override (e.g. 9333 for Electron apps)"),
2346
2383
  }, async ({ selector, tabId, cdpPort: portOverride }) => {
2347
2384
  const { client } = await getCDPClient(tabId, portOverride);
2348
2385
  await client.Runtime.enable();
@@ -2375,7 +2412,7 @@ server.tool("browser_type", "Type into an input field in Chrome/Electron. Uses C
2375
2412
  text: z.string().describe("Text to type"),
2376
2413
  clear: z.boolean().optional().describe("Clear field first (default true)"),
2377
2414
  tabId: z.string().optional().describe("Tab ID"),
2378
- cdpPort: z.number().optional().describe("CDP port override (e.g. 9333 for Electron apps)"),
2415
+ cdpPort: z.number().min(9222).max(9999).optional().describe("CDP port override (e.g. 9333 for Electron apps)"),
2379
2416
  }, async ({ selector, text, clear, tabId, cdpPort: portOverride }) => {
2380
2417
  const { client } = await getCDPClient(tabId, portOverride);
2381
2418
  await client.Runtime.enable();
@@ -2416,7 +2453,7 @@ server.tool("browser_wait", "Wait for a condition on a Chrome/Electron page", {
2416
2453
  condition: z.string().describe("JS expression that returns truthy when ready. e.g. 'document.querySelector(\".loaded\")'"),
2417
2454
  timeoutMs: z.number().optional().describe("Timeout in ms (default 10000)"),
2418
2455
  tabId: z.string().optional().describe("Tab ID"),
2419
- cdpPort: z.number().optional().describe("CDP port override (e.g. 9333 for Electron apps)"),
2456
+ cdpPort: z.number().min(9222).max(9999).optional().describe("CDP port override (e.g. 9333 for Electron apps)"),
2420
2457
  }, async ({ condition, timeoutMs, tabId, cdpPort: portOverride }) => {
2421
2458
  const { CDP: cdp, port } = await ensureCDP(portOverride);
2422
2459
  let targetId = tabId;
@@ -2444,7 +2481,7 @@ server.tool("browser_wait", "Wait for a condition on a Chrome/Electron page", {
2444
2481
  });
2445
2482
  server.tool("browser_page_info", "Get current page title, URL, and text content summary", {
2446
2483
  tabId: z.string().optional().describe("Tab ID"),
2447
- cdpPort: z.number().optional().describe("CDP port override (e.g. 9333 for Electron apps)"),
2484
+ cdpPort: z.number().min(9222).max(9999).optional().describe("CDP port override (e.g. 9333 for Electron apps)"),
2448
2485
  }, async ({ tabId, cdpPort: portOverride }) => {
2449
2486
  // Capture bundleId BEFORE CDP call to prevent focus-change race
2450
2487
  const browserBundleId = worldModel.getState().focusedApp?.bundleId ?? "com.google.Chrome";
@@ -2519,7 +2556,7 @@ if (origQuery) {
2519
2556
  `;
2520
2557
  server.tool("browser_stealth", "Inject anti-detection patches into Chrome/Electron page. Call once after navigating to a protected site. Hides webdriver flag, patches plugins/languages/permissions.", {
2521
2558
  tabId: z.string().optional().describe("Tab ID. Omit for most recent tab."),
2522
- cdpPort: z.number().optional().describe("CDP port override (e.g. 9333 for Electron apps)"),
2559
+ cdpPort: z.number().min(9222).max(9999).optional().describe("CDP port override (e.g. 9333 for Electron apps)"),
2523
2560
  }, async ({ tabId, cdpPort: portOverride }) => {
2524
2561
  const { client } = await getCDPClient(tabId, portOverride);
2525
2562
  await client.Page.enable();
@@ -2539,7 +2576,7 @@ server.tool("browser_fill_form", "Fill a form field with human-like typing (anti
2539
2576
  clear: z.boolean().optional().describe("Clear field first (default true)"),
2540
2577
  delayMs: z.number().optional().describe("Avg delay between keystrokes in ms (default 50)"),
2541
2578
  tabId: z.string().optional().describe("Tab ID"),
2542
- cdpPort: z.number().optional().describe("CDP port override (e.g. 9333 for Electron apps)"),
2579
+ cdpPort: z.number().min(9222).max(9999).optional().describe("CDP port override (e.g. 9333 for Electron apps)"),
2543
2580
  }, async ({ selector, text, clear, delayMs, tabId, cdpPort: portOverride }) => {
2544
2581
  const { client } = await getCDPClient(tabId, portOverride);
2545
2582
  await client.Runtime.enable();
@@ -2583,7 +2620,7 @@ server.tool("browser_fill_form", "Fill a form field with human-like typing (anti
2583
2620
  server.tool("browser_human_click", "Alias for browser_click — both use realistic mouseMoved → mousePressed → mouseReleased events. Prefer browser_click directly.", {
2584
2621
  selector: z.string().describe("CSS selector of element to click"),
2585
2622
  tabId: z.string().optional().describe("Tab ID. Omit for most recent tab."),
2586
- cdpPort: z.number().optional().describe("CDP port override (e.g. 9333 for Electron apps)"),
2623
+ cdpPort: z.number().min(9222).max(9999).optional().describe("CDP port override (e.g. 9333 for Electron apps)"),
2587
2624
  }, async ({ selector, tabId, cdpPort: portOverride }) => {
2588
2625
  const { client } = await getCDPClient(tabId, portOverride);
2589
2626
  await client.Runtime.enable();
@@ -2991,7 +3028,7 @@ server.tool("playbook_record", "Macro recorder: start recording, do the flow, st
2991
3028
  platform: z.string().optional().describe("Platform name (required for start)"),
2992
3029
  name: z.string().optional().describe("Playbook name (required for stop)"),
2993
3030
  description: z.string().optional().describe("Playbook description (for stop)"),
2994
- cdpPort: z.number().optional().describe("CDP port if needed for browser_js steps (e.g. 9333 for Codex)"),
3031
+ cdpPort: z.number().min(9222).max(9999).optional().describe("CDP port if needed for browser_js steps (e.g. 9333 for Codex)"),
2995
3032
  }, async ({ action, platform, name, description, cdpPort }) => {
2996
3033
  switch (action) {
2997
3034
  case "start": {
@@ -3162,6 +3199,27 @@ server.tool("applescript", "Run an AppleScript command. For controlling Finder,
3162
3199
  if (process.platform === "win32") {
3163
3200
  return { content: [{ type: "text", text: "AppleScript is not supported on Windows. Use ui_tree, ui_press, and other accessibility tools instead." }] };
3164
3201
  }
3202
+ // Block shell execution vectors in AppleScript — allowlist approach for safety-critical commands
3203
+ const scriptLower = script.toLowerCase();
3204
+ const BLOCKED_PATTERNS = [
3205
+ /do\s+shell\s+script/i, // direct shell execution
3206
+ /run\s+shell\s+script/i, // variant
3207
+ /run\s+script/i, // dynamic AppleScript eval (can construct blocked commands)
3208
+ /do\s+script/i, // Terminal.app shell execution
3209
+ /«class\s/i, // raw Apple Event codes (bypass text-level blocks)
3210
+ /system\s+events.*process/i, // process spawning via System Events
3211
+ /NSAppleScript/i, // Objective-C bridge
3212
+ /ObjC\.import/i, // JXA Objective-C bridge
3213
+ /\bshell\b/i, // catch-all for shell-related commands
3214
+ /do\s+JavaScript/i, // JXA execution
3215
+ ];
3216
+ if (BLOCKED_PATTERNS.some(p => p.test(script))) {
3217
+ return { content: [{ type: "text", text: "Blocked: this AppleScript contains a restricted command (shell execution, dynamic eval, or process spawning). Use the Bash tool for shell commands." }] };
3218
+ }
3219
+ // Block string concatenation that could reassemble blocked commands
3220
+ if (/&/.test(script) && (/script/i.test(script) || /shell/i.test(script))) {
3221
+ return { content: [{ type: "text", text: "Blocked: AppleScript with string concatenation containing 'script' or 'shell' — potential bypass attempt." }] };
3222
+ }
3165
3223
  try {
3166
3224
  const result = execSync(`osascript -e '${script.replace(/'/g, "'\\''")}'`, {
3167
3225
  encoding: "utf-8",
@@ -3722,7 +3780,7 @@ import { METHOD_CAPABILITIES, DEFAULT_RETRY_POLICY, planExecution, executeWithFa
3722
3780
  server.tool("execution_plan", "Show the execution plan for an action type. Returns the ordered fallback chain based on available infrastructure.", {
3723
3781
  action: z.enum(["click", "type", "read", "locate", "select", "scroll"]).describe("Action type"),
3724
3782
  }, async ({ action }) => {
3725
- const plan = planExecution(action, { hasBridge: true, hasCDP: cdpPort !== null });
3783
+ const plan = planExecution(action, { hasBridge: true, hasCDP: cdpPort !== null }, getSensorRanking());
3726
3784
  const lines = plan.map((method, i) => {
3727
3785
  const cap = METHOD_CAPABILITIES[method];
3728
3786
  return `${i + 1}. ${method} (~${cap.avgLatencyMs}ms)${i === 0 ? " ← primary" : ""}`;
@@ -3764,26 +3822,161 @@ function infra() {
3764
3822
  return { hasBridge: true, hasCDP: cdpPort !== null };
3765
3823
  }
3766
3824
  /**
3767
- * Get a retry policy adapted by the learning engine's adaptive budgets.
3768
- * If the learning engine shows the current app responds quickly, reduce retry delays.
3825
+ * Get sensor rankings for the current app from the learning engine.
3826
+ * Used by planExecution() to reorder fallback methods based on learned success rates.
3827
+ * Returns undefined if no bundleId is known (falls back to canonical order).
3828
+ */
3829
+ function getSensorRanking(overrideBundleId) {
3830
+ // Use override bundleId when provided (from tool params), else worldModel, else lastKnown
3831
+ const bundleId = overrideBundleId ?? worldModel.getState().focusedApp?.bundleId ?? lastKnownBundleId;
3832
+ if (!bundleId)
3833
+ return undefined;
3834
+ const ranked = learningEngine.rankSensors(bundleId);
3835
+ return ranked.length > 0 ? ranked : undefined;
3836
+ }
3837
+ /**
3838
+ * Get a retry policy adapted by the learning engine's adaptive budgets
3839
+ * AND the AppMap's timing profiles (L7→L1).
3840
+ *
3841
+ * Priority: AppMap timing > Learning budget > Default
3842
+ * AppMap stores per-tool/per-action avg durations from real executions.
3843
+ * Learning budget stores per-app adaptive budgets from outcome stats.
3769
3844
  */
3770
- function getAdaptedRetryPolicy() {
3771
- if (!currentAdaptiveBudget)
3845
+ function getAdaptedRetryPolicy(toolName, overrideBundleId) {
3846
+ let typicalMs = null;
3847
+ // L7→L1: Check AppMap timing profiles for the action type.
3848
+ // Timing keys are stored as "click::Submit", "click_text::Login", etc.
3849
+ // Fallback tools pass "click_with_fallback" — extract the action prefix to match.
3850
+ const bundleId = overrideBundleId ?? worldModel.getState().focusedApp?.bundleId ?? lastKnownBundleId;
3851
+ if (bundleId && toolName) {
3852
+ const actionPrefix = toolName.replace(/_with_fallback$/, "");
3853
+ // Get all timing profiles for this app, then filter by action prefix
3854
+ const allTimings = appMap.getTimingProfile(bundleId);
3855
+ const matchingTimings = allTimings.filter((t) => t.key.startsWith(actionPrefix + "::") || t.key === actionPrefix);
3856
+ if (matchingTimings.length > 0) {
3857
+ // Use element_response type if available, compute median avgMs across all matching entries
3858
+ const responseTimes = matchingTimings
3859
+ .filter((t) => t.type === "element_response")
3860
+ .map((t) => t.avgMs);
3861
+ if (responseTimes.length > 0) {
3862
+ responseTimes.sort((a, b) => a - b);
3863
+ const mid = Math.floor(responseTimes.length / 2);
3864
+ typicalMs = responseTimes.length % 2 === 1
3865
+ ? responseTimes[mid]
3866
+ : (responseTimes[mid - 1] + responseTimes[mid]) / 2;
3867
+ }
3868
+ else {
3869
+ typicalMs = matchingTimings[0].avgMs;
3870
+ }
3871
+ }
3872
+ }
3873
+ // Fall back to L5 adaptive budget
3874
+ if (typicalMs == null && currentAdaptiveBudget) {
3875
+ typicalMs = Math.max(currentAdaptiveBudget.locateMs, currentAdaptiveBudget.actMs);
3876
+ }
3877
+ if (typicalMs == null)
3772
3878
  return DEFAULT_RETRY_POLICY;
3773
- // Use the max of locate+act as a guide for retry delay — faster apps need shorter delays
3774
- const typicalMs = Math.max(currentAdaptiveBudget.locateMs, currentAdaptiveBudget.actMs);
3775
3879
  // Retry delay = max(100ms, typical * 1.5), capped at the default
3776
3880
  const adaptedDelay = Math.min(DEFAULT_RETRY_POLICY.delayBetweenRetriesMs, Math.max(100, Math.ceil(typicalMs * 1.5)));
3777
3881
  if (adaptedDelay === DEFAULT_RETRY_POLICY.delayBetweenRetriesMs)
3778
3882
  return DEFAULT_RETRY_POLICY;
3779
3883
  return { ...DEFAULT_RETRY_POLICY, delayBetweenRetriesMs: adaptedDelay };
3780
3884
  }
3781
- function formatResult(action, target, result) {
3885
+ function formatResult(action, target, result, preCheckWarnings) {
3886
+ const prefix = preCheckWarnings && preCheckWarnings.length > 0
3887
+ ? preCheckWarnings.join("\n") + "\n"
3888
+ : "";
3782
3889
  if (result.ok) {
3783
3890
  const fallbackNote = result.fallbackFrom ? ` (fell back from ${result.fallbackFrom})` : "";
3784
- return { content: [{ type: "text", text: `${action} "${result.target ?? target}" via ${result.method}${fallbackNote} in ${result.durationMs}ms` }] };
3891
+ return { content: [{ type: "text", text: `${prefix}${action} "${result.target ?? target}" via ${result.method}${fallbackNote} in ${result.durationMs}ms` }] };
3785
3892
  }
3786
- return { content: [{ type: "text", text: `Failed to ${action} "${target}" — all methods exhausted. Last error: ${result.error}` }] };
3893
+ return { content: [{ type: "text", text: `${prefix}Failed to ${action} "${target}" — all methods exhausted. Last error: ${result.error}` }] };
3894
+ }
3895
+ /**
3896
+ * L3→L1: Pre-execution worldModel check.
3897
+ * Verifies the target app is focused and not blocked by dialogs.
3898
+ * Auto-focuses the app if it's in the background. Returns warnings
3899
+ * that should be prepended to the result.
3900
+ */
3901
+ async function preExecutionCheck(bundleId) {
3902
+ const warnings = [];
3903
+ try {
3904
+ const state = worldModel.getState();
3905
+ const targetBundleId = bundleId ?? lastKnownBundleId ?? state.focusedApp?.bundleId;
3906
+ if (!targetBundleId)
3907
+ return warnings;
3908
+ // Check if target app is focused — use correct bridge method "app.focus"
3909
+ if (state.focusedApp && state.focusedApp.bundleId !== targetBundleId) {
3910
+ warnings.push(`[L3→L1] Target app ${targetBundleId} is not focused (current: ${state.focusedApp.bundleId}). Auto-focusing...`);
3911
+ try {
3912
+ await bridge.call("app.focus", { bundleId: targetBundleId });
3913
+ }
3914
+ catch {
3915
+ warnings.push(`[L3→L1] Auto-focus failed — proceeding anyway`);
3916
+ }
3917
+ }
3918
+ // Re-fetch state after auto-focus to get current focused app
3919
+ const postFocusState = worldModel.getState();
3920
+ // Check for blocking dialogs — scoped to target app only.
3921
+ // Observer-sourced dialogs have windowId=0 (no real window ID),
3922
+ // so fall back to checking if the focused app matches.
3923
+ const relevantDialogs = postFocusState.activeDialogs.filter((d) => {
3924
+ if (d.windowId === 0) {
3925
+ return postFocusState.focusedApp?.bundleId === targetBundleId;
3926
+ }
3927
+ const win = postFocusState.windows.get(d.windowId);
3928
+ return win?.bundleId === targetBundleId;
3929
+ });
3930
+ if (relevantDialogs.length > 0) {
3931
+ const dialogTitles = relevantDialogs
3932
+ .map((d) => d.title || d.type)
3933
+ .join(", ");
3934
+ warnings.push(`[L3→L1] Active dialog(s) detected: ${dialogTitles} — may block interaction`);
3935
+ }
3936
+ // Check if target window is off-screen
3937
+ for (const [, win] of state.windows) {
3938
+ if (win.bundleId === targetBundleId && !win.isOnScreen) {
3939
+ warnings.push(`[L3→L1] Window "${win.title.value}" is off-screen or minimized`);
3940
+ }
3941
+ }
3942
+ // Check if world state is stale (>10s since last update)
3943
+ const staleThresholdMs = 10_000;
3944
+ const lastUpdate = new Date(state.updatedAt).getTime();
3945
+ if (!Number.isNaN(lastUpdate) && Date.now() - lastUpdate > staleThresholdMs && state.confidence < 0.5) {
3946
+ warnings.push(`[L3→L1] World state is stale (${Math.round((Date.now() - lastUpdate) / 1000)}s old, confidence ${state.confidence.toFixed(2)}) — screen may have changed`);
3947
+ }
3948
+ }
3949
+ catch {
3950
+ // Pre-check is best-effort advisory — never crash the tool call
3951
+ }
3952
+ return warnings;
3953
+ }
3954
+ /**
3955
+ * L7→L1: Try to resolve an element's position from the AppMap.
3956
+ * Returns known screen coordinates if the map has a position for this label
3957
+ * AND we can get the current window bounds. Returns null otherwise.
3958
+ */
3959
+ function resolveMapPosition(target, bundleId) {
3960
+ const bid = bundleId ?? worldModel.getState().focusedApp?.bundleId ?? lastKnownBundleId;
3961
+ if (!bid)
3962
+ return null;
3963
+ // Get window bounds from worldModel for coordinate conversion
3964
+ const state = worldModel.getState();
3965
+ const focusedWinId = state.focusedWindowId;
3966
+ if (focusedWinId == null)
3967
+ return null;
3968
+ const win = state.windows.get(focusedWinId);
3969
+ if (!win || win.bundleId !== bid)
3970
+ return null;
3971
+ const bounds = win.bounds.value;
3972
+ // Guard: reject stale bounds (>5s old) to prevent clicking at wrong position after window move
3973
+ const boundsAge = Date.now() - new Date(win.bounds.updatedAt).getTime();
3974
+ if (boundsAge > 5000 || boundsAge < 0)
3975
+ return null; // stale or future timestamp
3976
+ // Guard: reject uninitialized/zero-size bounds to prevent clicking at (0,0)
3977
+ if (bounds.width < 50 || bounds.height < 50)
3978
+ return null;
3979
+ return appMap.resolvePosition(bid, target, bounds);
3787
3980
  }
3788
3981
  // ── click_with_fallback ──
3789
3982
  server.tool("click_with_fallback", "Click a target by text using the canonical fallback chain: AX → CDP → OCR. Automatically retries and falls through methods.", {
@@ -3791,10 +3984,37 @@ server.tool("click_with_fallback", "Click a target by text using the canonical f
3791
3984
  bundleId: z.string().optional().describe("App bundle ID (for AX path)"),
3792
3985
  }, async ({ target, bundleId }) => {
3793
3986
  await ensureBridge();
3794
- const plan = planExecution("click", infra())
3987
+ const preCheckWarnings = await preExecutionCheck(bundleId);
3988
+ // L7→L1: If AppMap knows this element's position, try coordinates first.
3989
+ // WARNING: Coordinate clicks are unverified — if the window moved or a modal
3990
+ // appeared, the click may hit the wrong target. On failure, falls through to
3991
+ // the standard AX/CDP/OCR chain which verifies element identity.
3992
+ // Skip map-guided shortcut if precheck detected blocking conditions (dialogs, off-screen)
3993
+ const hasBlockingCondition = preCheckWarnings.some((w) => w.includes("dialog") || w.includes("off-screen") || w.includes("not frontmost"));
3994
+ const mapPos = !hasBlockingCondition ? resolveMapPosition(target, bundleId) : null;
3995
+ if (mapPos) {
3996
+ try {
3997
+ const start = Date.now();
3998
+ await bridge.call("cg.mouseClick", { x: mapPos.x, y: mapPos.y });
3999
+ preCheckWarnings.push(`[L7→L1] Used map position (${mapPos.x}, ${mapPos.y}) for "${target}" — UNVERIFIED coordinate click`);
4000
+ return formatResult("Clicked", target, {
4001
+ ok: true, method: "coordinates", durationMs: Date.now() - start,
4002
+ fallbackFrom: null, retries: 0, error: null, target: `${target} at (${mapPos.x},${mapPos.y}) [map-guided, unverified]`,
4003
+ }, preCheckWarnings);
4004
+ }
4005
+ catch {
4006
+ preCheckWarnings.push(`[L7→L1] Map position click failed — falling back to standard chain`);
4007
+ }
4008
+ }
4009
+ const plan = planExecution("click", infra(), getSensorRanking())
3795
4010
  .filter((m) => m !== "coordinates");
3796
4011
  const targetPid = await resolvePid(bundleId);
3797
- const result = await executeWithFallback("click", plan, getAdaptedRetryPolicy(), async (method, attempt) => {
4012
+ // L2→L1: Resolve known selector from references for direct injection
4013
+ const knownSelector = contextTracker.getSelector(target);
4014
+ if (knownSelector) {
4015
+ preCheckWarnings.push(`[L2→L1] Injecting known selector: ${knownSelector}`);
4016
+ }
4017
+ const result = await executeWithFallback("click", plan, getAdaptedRetryPolicy("click_with_fallback"), async (method, attempt) => {
3798
4018
  const start = Date.now();
3799
4019
  try {
3800
4020
  switch (method) {
@@ -3829,15 +4049,28 @@ server.tool("click_with_fallback", "Click a target by text using the canonical f
3829
4049
  const client = await CDPClient({ port });
3830
4050
  try {
3831
4051
  const { Runtime } = client;
3832
- const evalResult = await Runtime.evaluate({
3833
- expression: `(() => {
3834
- const el = Array.from(document.querySelectorAll('*')).find(e =>
4052
+ // L2→L1: Try known selector first (wrapped in try/catch to handle
4053
+ // invalid selectors gracefully), then fall back to text search.
4054
+ const textSearchExpr = `Array.from(document.querySelectorAll('*')).find(e =>
3835
4055
  e.textContent?.trim() === ${JSON.stringify(target)} ||
3836
- e.getAttribute('aria-label') === ${JSON.stringify(target)}
3837
- );
4056
+ e.getAttribute('aria-label') === ${JSON.stringify(target)})`;
4057
+ const selectorExpr = knownSelector
4058
+ ? `(() => {
4059
+ try {
4060
+ const el = document.querySelector(${JSON.stringify(knownSelector)});
4061
+ if (el) { el.click(); return 'clicked'; }
4062
+ } catch(e) { /* invalid selector — fall through to text search */ }
4063
+ const fallback = ${textSearchExpr};
4064
+ if (fallback) { fallback.click(); return 'clicked'; }
4065
+ return null;
4066
+ })()`
4067
+ : `(() => {
4068
+ const el = ${textSearchExpr};
3838
4069
  if (el) { el.click(); return 'clicked'; }
3839
4070
  return null;
3840
- })()`,
4071
+ })()`;
4072
+ const evalResult = await Runtime.evaluate({
4073
+ expression: selectorExpr,
3841
4074
  returnByValue: true,
3842
4075
  });
3843
4076
  if (evalResult.result?.value === "clicked") {
@@ -3872,7 +4105,7 @@ server.tool("click_with_fallback", "Click a target by text using the canonical f
3872
4105
  return { ok: false, method, durationMs: Date.now() - start, fallbackFrom: null, retries: attempt, error: err instanceof Error ? err.message : String(err), target };
3873
4106
  }
3874
4107
  });
3875
- return formatResult("Clicked", target, result);
4108
+ return formatResult("Clicked", target, result, preCheckWarnings);
3876
4109
  });
3877
4110
  // ── type_with_fallback ──
3878
4111
  server.tool("type_with_fallback", "Type text into a target field using the canonical fallback chain: AX → CDP → coordinates. Finds the field by label/placeholder, focuses it, then types.", {
@@ -3882,9 +4115,12 @@ server.tool("type_with_fallback", "Type text into a target field using the canon
3882
4115
  clearFirst: z.boolean().optional().describe("Select-all and clear the field before typing (default: false)"),
3883
4116
  }, async ({ target, text, bundleId, clearFirst }) => {
3884
4117
  await ensureBridge();
3885
- const plan = planExecution("type", infra());
4118
+ const preCheckWarnings = await preExecutionCheck(bundleId);
4119
+ const plan = planExecution("type", infra(), getSensorRanking());
3886
4120
  const targetPid = await resolvePid(bundleId);
3887
- const result = await executeWithFallback("type", plan, getAdaptedRetryPolicy(), async (method, attempt) => {
4121
+ // L2→L1: Resolve known selector for direct injection
4122
+ const knownSelector = contextTracker.getSelector(target);
4123
+ const result = await executeWithFallback("type", plan, getAdaptedRetryPolicy("type_with_fallback"), async (method, attempt) => {
3888
4124
  const start = Date.now();
3889
4125
  try {
3890
4126
  switch (method) {
@@ -3972,17 +4208,30 @@ server.tool("type_with_fallback", "Type text into a target field using the canon
3972
4208
  const client = await CDPClient({ port });
3973
4209
  try {
3974
4210
  const { Runtime, DOM, Input } = client;
3975
- const evalResult = await Runtime.evaluate({
3976
- expression: `(() => {
3977
- const el = Array.from(document.querySelectorAll('input, textarea, [contenteditable]')).find(e =>
4211
+ // L2→L1: Try known selector first (with try/catch for invalid selectors),
4212
+ // then fall back to attribute search.
4213
+ const fieldSearchExpr = `Array.from(document.querySelectorAll('input, textarea, [contenteditable]')).find(e =>
3978
4214
  e.getAttribute('placeholder') === ${JSON.stringify(target)} ||
3979
4215
  e.getAttribute('aria-label') === ${JSON.stringify(target)} ||
3980
4216
  e.getAttribute('name') === ${JSON.stringify(target)} ||
3981
- (e.labels && Array.from(e.labels).some(l => l.textContent?.trim() === ${JSON.stringify(target)}))
3982
- );
4217
+ (e.labels && Array.from(e.labels).some(l => l.textContent?.trim() === ${JSON.stringify(target)})))`;
4218
+ const fieldExpr = knownSelector
4219
+ ? `(() => {
4220
+ try {
4221
+ const el = document.querySelector(${JSON.stringify(knownSelector)});
4222
+ if (el) { el.focus(); return true; }
4223
+ } catch(e) { /* invalid selector — fall through */ }
4224
+ const fallback = ${fieldSearchExpr};
4225
+ if (fallback) { fallback.focus(); return true; }
4226
+ return false;
4227
+ })()`
4228
+ : `(() => {
4229
+ const el = ${fieldSearchExpr};
3983
4230
  if (el) { el.focus(); return true; }
3984
4231
  return false;
3985
- })()`,
4232
+ })()`;
4233
+ const evalResult = await Runtime.evaluate({
4234
+ expression: fieldExpr,
3986
4235
  returnByValue: true,
3987
4236
  });
3988
4237
  if (!evalResult.result?.value)
@@ -4009,7 +4258,7 @@ server.tool("type_with_fallback", "Type text into a target field using the canon
4009
4258
  return { ok: false, method, durationMs: Date.now() - start, fallbackFrom: null, retries: attempt, error: err instanceof Error ? err.message : String(err), target };
4010
4259
  }
4011
4260
  });
4012
- return formatResult("Typed into", target, result);
4261
+ return formatResult("Typed into", target, result, preCheckWarnings);
4013
4262
  });
4014
4263
  // ── read_with_fallback ──
4015
4264
  server.tool("read_with_fallback", "Read text content from the screen or a specific element using the canonical fallback chain: AX → CDP → OCR. Returns the text found.", {
@@ -4017,9 +4266,15 @@ server.tool("read_with_fallback", "Read text content from the screen or a specif
4017
4266
  bundleId: z.string().optional().describe("App bundle ID"),
4018
4267
  }, async ({ target, bundleId }) => {
4019
4268
  await ensureBridge();
4020
- const plan = planExecution("read", infra());
4269
+ const preCheckWarnings = await preExecutionCheck(bundleId);
4270
+ const plan = planExecution("read", infra(), getSensorRanking());
4021
4271
  const targetPid = await resolvePid(bundleId);
4022
- const result = await executeWithFallback("read", plan, getAdaptedRetryPolicy(), async (method, attempt) => {
4272
+ // L2→L1: Resolve known selector from references for direct injection
4273
+ const knownSelector = target ? contextTracker.getSelector(target) : null;
4274
+ if (knownSelector) {
4275
+ preCheckWarnings.push(`[L2→L1] Injecting known selector: ${knownSelector}`);
4276
+ }
4277
+ const result = await executeWithFallback("read", plan, getAdaptedRetryPolicy("read_with_fallback"), async (method, attempt) => {
4023
4278
  const start = Date.now();
4024
4279
  try {
4025
4280
  switch (method) {
@@ -4126,14 +4381,25 @@ server.tool("read_with_fallback", "Read text content from the screen or a specif
4126
4381
  try {
4127
4382
  const { Runtime } = client;
4128
4383
  if (target) {
4129
- const evalResult = await Runtime.evaluate({
4130
- expression: `(() => {
4131
- const el = Array.from(document.querySelectorAll('*')).find(e =>
4384
+ // L2→L1: Try known selector first, then fall back to text search
4385
+ const textSearch = `Array.from(document.querySelectorAll('*')).find(e =>
4132
4386
  e.getAttribute('aria-label') === ${JSON.stringify(target)} ||
4133
- e.textContent?.trim() === ${JSON.stringify(target)}
4134
- );
4135
- return el ? (el.value ?? el.textContent ?? '').trim() : null;
4136
- })()`,
4387
+ e.textContent?.trim() === ${JSON.stringify(target)})`;
4388
+ const expr = knownSelector
4389
+ ? `(() => {
4390
+ try {
4391
+ const el = document.querySelector(${JSON.stringify(knownSelector)});
4392
+ if (el) return (el.value ?? el.textContent ?? '').trim();
4393
+ } catch(e) {}
4394
+ const fallback = ${textSearch};
4395
+ return fallback ? (fallback.value ?? fallback.textContent ?? '').trim() : null;
4396
+ })()`
4397
+ : `(() => {
4398
+ const el = ${textSearch};
4399
+ return el ? (el.value ?? el.textContent ?? '').trim() : null;
4400
+ })()`;
4401
+ const evalResult = await Runtime.evaluate({
4402
+ expression: expr,
4137
4403
  returnByValue: true,
4138
4404
  });
4139
4405
  if (evalResult.result?.value == null)
@@ -4173,11 +4439,13 @@ server.tool("read_with_fallback", "Read text content from the screen or a specif
4173
4439
  return { ok: false, method, durationMs: Date.now() - start, fallbackFrom: null, retries: attempt, error: err instanceof Error ? err.message : String(err), target: null };
4174
4440
  }
4175
4441
  });
4442
+ // Custom format (not formatResult) — read results include content inline
4443
+ const prefix = preCheckWarnings.length > 0 ? preCheckWarnings.join("\n") + "\n" : "";
4176
4444
  if (result.ok) {
4177
4445
  const fallbackNote = result.fallbackFrom ? ` (fell back from ${result.fallbackFrom})` : "";
4178
- return { content: [{ type: "text", text: `Read via ${result.method}${fallbackNote} in ${result.durationMs}ms:\n\n${result.target}` }] };
4446
+ return { content: [{ type: "text", text: `${prefix}Read via ${result.method}${fallbackNote} in ${result.durationMs}ms:\n\n${result.target}` }] };
4179
4447
  }
4180
- return { content: [{ type: "text", text: `Failed to read${target ? ` "${target}"` : ""} — all methods exhausted. Last error: ${result.error}` }] };
4448
+ return { content: [{ type: "text", text: `${prefix}Failed to read${target ? ` "${target}"` : ""} — all methods exhausted. Last error: ${result.error}` }] };
4181
4449
  });
4182
4450
  // ── locate_with_fallback ──
4183
4451
  server.tool("locate_with_fallback", "Find an element's position on screen using the canonical fallback chain: AX → CDP → OCR. Returns bounds (x, y, width, height).", {
@@ -4185,9 +4453,22 @@ server.tool("locate_with_fallback", "Find an element's position on screen using
4185
4453
  bundleId: z.string().optional().describe("App bundle ID"),
4186
4454
  }, async ({ target, bundleId }) => {
4187
4455
  await ensureBridge();
4188
- const plan = planExecution("locate", infra());
4456
+ const preCheckWarnings = await preExecutionCheck(bundleId);
4457
+ // L7→L1: If AppMap knows this element's position, return it immediately
4458
+ const mapPos = resolveMapPosition(target, bundleId);
4459
+ if (mapPos) {
4460
+ // Map provides center point only — use as hint, not authoritative bounds.
4461
+ // Fall through to full locate chain for accurate bounds.
4462
+ preCheckWarnings.push(`[L7→L1] Map hint: "${target}" expected near (${mapPos.x}, ${mapPos.y}) — verifying via locate chain`);
4463
+ }
4464
+ const plan = planExecution("locate", infra(), getSensorRanking());
4189
4465
  const targetPid = await resolvePid(bundleId);
4190
- const result = await executeWithFallback("locate", plan, getAdaptedRetryPolicy(), async (method, attempt) => {
4466
+ // L2→L1: Resolve known selector from references for direct injection
4467
+ const knownSelector = contextTracker.getSelector(target);
4468
+ if (knownSelector) {
4469
+ preCheckWarnings.push(`[L2→L1] Injecting known selector: ${knownSelector}`);
4470
+ }
4471
+ const result = await executeWithFallback("locate", plan, getAdaptedRetryPolicy("locate_with_fallback"), async (method, attempt) => {
4191
4472
  const start = Date.now();
4192
4473
  try {
4193
4474
  switch (method) {
@@ -4220,16 +4501,29 @@ server.tool("locate_with_fallback", "Find an element's position on screen using
4220
4501
  const client = await CDPClient({ port });
4221
4502
  try {
4222
4503
  const { Runtime } = client;
4223
- const evalResult = await Runtime.evaluate({
4224
- expression: `(() => {
4225
- const el = Array.from(document.querySelectorAll('*')).find(e =>
4504
+ // L2→L1: Try known selector first, then fall back to text search
4505
+ const textSearch = `Array.from(document.querySelectorAll('*')).find(e =>
4226
4506
  e.textContent?.trim() === ${JSON.stringify(target)} ||
4227
- e.getAttribute('aria-label') === ${JSON.stringify(target)}
4228
- );
4229
- if (!el) return null;
4230
- const r = el.getBoundingClientRect();
4231
- return { x: Math.round(r.x), y: Math.round(r.y), width: Math.round(r.width), height: Math.round(r.height) };
4232
- })()`,
4507
+ e.getAttribute('aria-label') === ${JSON.stringify(target)})`;
4508
+ const expr = knownSelector
4509
+ ? `(() => {
4510
+ try {
4511
+ const el = document.querySelector(${JSON.stringify(knownSelector)});
4512
+ if (el) { const r = el.getBoundingClientRect(); return { x: Math.round(r.x), y: Math.round(r.y), width: Math.round(r.width), height: Math.round(r.height) }; }
4513
+ } catch(e) {}
4514
+ const fallback = ${textSearch};
4515
+ if (!fallback) return null;
4516
+ const r = fallback.getBoundingClientRect();
4517
+ return { x: Math.round(r.x), y: Math.round(r.y), width: Math.round(r.width), height: Math.round(r.height) };
4518
+ })()`
4519
+ : `(() => {
4520
+ const el = ${textSearch};
4521
+ if (!el) return null;
4522
+ const r = el.getBoundingClientRect();
4523
+ return { x: Math.round(r.x), y: Math.round(r.y), width: Math.round(r.width), height: Math.round(r.height) };
4524
+ })()`;
4525
+ const evalResult = await Runtime.evaluate({
4526
+ expression: expr,
4233
4527
  returnByValue: true,
4234
4528
  });
4235
4529
  const bounds = evalResult.result?.value;
@@ -4260,7 +4554,7 @@ server.tool("locate_with_fallback", "Find an element's position on screen using
4260
4554
  return { ok: false, method, durationMs: Date.now() - start, fallbackFrom: null, retries: attempt, error: err instanceof Error ? err.message : String(err), target: null };
4261
4555
  }
4262
4556
  });
4263
- return formatResult("Located", target, result);
4557
+ return formatResult("Located", target, result, preCheckWarnings);
4264
4558
  });
4265
4559
  // ── select_with_fallback ──
4266
4560
  server.tool("select_with_fallback", "Select an option from a dropdown/menu using the canonical fallback chain: AX → CDP. Finds the control, opens it, and picks the specified option.", {
@@ -4269,9 +4563,15 @@ server.tool("select_with_fallback", "Select an option from a dropdown/menu using
4269
4563
  bundleId: z.string().optional().describe("App bundle ID"),
4270
4564
  }, async ({ target, option, bundleId }) => {
4271
4565
  await ensureBridge();
4272
- const plan = planExecution("select", infra());
4566
+ const preCheckWarnings = await preExecutionCheck(bundleId);
4567
+ const plan = planExecution("select", infra(), getSensorRanking());
4273
4568
  const targetPid = await resolvePid(bundleId);
4274
- const result = await executeWithFallback("select", plan, getAdaptedRetryPolicy(), async (method, attempt) => {
4569
+ // L2→L1: Resolve known selector from references for direct injection
4570
+ const knownSelector = contextTracker.getSelector(target);
4571
+ if (knownSelector) {
4572
+ preCheckWarnings.push(`[L2→L1] Injecting known selector: ${knownSelector}`);
4573
+ }
4574
+ const result = await executeWithFallback("select", plan, getAdaptedRetryPolicy("select_with_fallback"), async (method, attempt) => {
4275
4575
  const start = Date.now();
4276
4576
  try {
4277
4577
  switch (method) {
@@ -4301,20 +4601,34 @@ server.tool("select_with_fallback", "Select an option from a dropdown/menu using
4301
4601
  const client = await CDPClient({ port });
4302
4602
  try {
4303
4603
  const { Runtime } = client;
4304
- const evalResult = await Runtime.evaluate({
4305
- expression: `(() => {
4306
- const sel = Array.from(document.querySelectorAll('select')).find(s =>
4604
+ // L2→L1: Try known selector first for the select element
4605
+ const textSearch = `Array.from(document.querySelectorAll('select')).find(s =>
4307
4606
  s.getAttribute('aria-label') === ${JSON.stringify(target)} ||
4308
4607
  s.getAttribute('name') === ${JSON.stringify(target)} ||
4309
- (s.labels && Array.from(s.labels).some(l => l.textContent?.trim() === ${JSON.stringify(target)}))
4310
- );
4311
- if (!sel) return null;
4312
- const opt = Array.from(sel.options).find(o => o.text.trim() === ${JSON.stringify(option)} || o.value === ${JSON.stringify(option)});
4313
- if (!opt) return 'no_option';
4314
- sel.value = opt.value;
4315
- sel.dispatchEvent(new Event('change', { bubbles: true }));
4316
- return 'selected';
4317
- })()`,
4608
+ (s.labels && Array.from(s.labels).some(l => l.textContent?.trim() === ${JSON.stringify(target)})))`;
4609
+ const selectExpr = knownSelector
4610
+ ? `(() => {
4611
+ let sel = null;
4612
+ try { sel = document.querySelector(${JSON.stringify(knownSelector)}); } catch(e) {}
4613
+ if (!sel || sel.tagName !== 'SELECT') sel = ${textSearch};
4614
+ if (!sel) return null;
4615
+ const opt = Array.from(sel.options).find(o => o.text.trim() === ${JSON.stringify(option)} || o.value === ${JSON.stringify(option)});
4616
+ if (!opt) return 'no_option';
4617
+ sel.value = opt.value;
4618
+ sel.dispatchEvent(new Event('change', { bubbles: true }));
4619
+ return 'selected';
4620
+ })()`
4621
+ : `(() => {
4622
+ const sel = ${textSearch};
4623
+ if (!sel) return null;
4624
+ const opt = Array.from(sel.options).find(o => o.text.trim() === ${JSON.stringify(option)} || o.value === ${JSON.stringify(option)});
4625
+ if (!opt) return 'no_option';
4626
+ sel.value = opt.value;
4627
+ sel.dispatchEvent(new Event('change', { bubbles: true }));
4628
+ return 'selected';
4629
+ })()`;
4630
+ const evalResult = await Runtime.evaluate({
4631
+ expression: selectExpr,
4318
4632
  returnByValue: true,
4319
4633
  });
4320
4634
  if (evalResult.result?.value === "selected") {
@@ -4335,7 +4649,7 @@ server.tool("select_with_fallback", "Select an option from a dropdown/menu using
4335
4649
  return { ok: false, method, durationMs: Date.now() - start, fallbackFrom: null, retries: attempt, error: err instanceof Error ? err.message : String(err), target: null };
4336
4650
  }
4337
4651
  });
4338
- return formatResult("Selected", `${target} → ${option}`, result);
4652
+ return formatResult("Selected", `${target} → ${option}`, result, preCheckWarnings);
4339
4653
  });
4340
4654
  // ── scroll_with_fallback ──
4341
4655
  server.tool("scroll_with_fallback", "Scroll within an element or the active window using the canonical fallback chain: AX → CDP → coordinates. Scrolls until target text is visible, or by a fixed amount.", {
@@ -4345,9 +4659,15 @@ server.tool("scroll_with_fallback", "Scroll within an element or the active wind
4345
4659
  bundleId: z.string().optional().describe("App bundle ID"),
4346
4660
  }, async ({ direction, amount, target, bundleId }) => {
4347
4661
  await ensureBridge();
4348
- const plan = planExecution("scroll", infra());
4662
+ const preCheckWarnings = await preExecutionCheck(bundleId);
4663
+ const plan = planExecution("scroll", infra(), getSensorRanking());
4349
4664
  const targetPid = await resolvePid(bundleId);
4350
4665
  const scrollAmount = amount ?? 300;
4666
+ // L2→L1: Resolve known selector from references for scroll container
4667
+ const knownSelector = target ? contextTracker.getSelector(target) : null;
4668
+ if (knownSelector) {
4669
+ preCheckWarnings.push(`[L2→L1] Injecting known selector: ${knownSelector}`);
4670
+ }
4351
4671
  // Resolve scroll coordinates — center of the frontmost window
4352
4672
  let scrollX = 400, scrollY = 400;
4353
4673
  try {
@@ -4383,7 +4703,7 @@ server.tool("scroll_with_fallback", "Scroll within an element or the active wind
4383
4703
  return { content: [{ type: "text", text: `Scrolled ${direction} 10 times but "${target}" not found.` }] };
4384
4704
  }
4385
4705
  // Fixed-amount scroll via fallback chain
4386
- const result = await executeWithFallback("scroll", plan, getAdaptedRetryPolicy(), async (method, attempt) => {
4706
+ const result = await executeWithFallback("scroll", plan, getAdaptedRetryPolicy("scroll_with_fallback"), async (method, attempt) => {
4387
4707
  const start = Date.now();
4388
4708
  try {
4389
4709
  const deltaX = direction === "left" ? -scrollAmount : direction === "right" ? scrollAmount : 0;
@@ -4401,9 +4721,18 @@ server.tool("scroll_with_fallback", "Scroll within an element or the active wind
4401
4721
  const client = await CDPClient({ port });
4402
4722
  try {
4403
4723
  const { Runtime } = client;
4404
- await Runtime.evaluate({
4405
- expression: `window.scrollBy(${deltaX}, ${deltaY})`,
4406
- });
4724
+ // L2→L1: Try scrolling known selector container first
4725
+ const scrollExpr = knownSelector
4726
+ ? `(() => {
4727
+ try {
4728
+ const el = document.querySelector(${JSON.stringify(knownSelector)});
4729
+ if (el) { el.scrollBy(${deltaX}, ${deltaY}); return 'scrolled'; }
4730
+ } catch(e) {}
4731
+ window.scrollBy(${deltaX}, ${deltaY});
4732
+ return 'scrolled';
4733
+ })()`
4734
+ : `window.scrollBy(${deltaX}, ${deltaY})`;
4735
+ await Runtime.evaluate({ expression: scrollExpr });
4407
4736
  return { ok: true, method, durationMs: Date.now() - start, fallbackFrom: null, retries: attempt, error: null, target: `${direction} ${scrollAmount}px` };
4408
4737
  }
4409
4738
  finally {
@@ -4421,7 +4750,7 @@ server.tool("scroll_with_fallback", "Scroll within an element or the active wind
4421
4750
  return { ok: false, method, durationMs: Date.now() - start, fallbackFrom: null, retries: attempt, error: err instanceof Error ? err.message : String(err), target: null };
4422
4751
  }
4423
4752
  });
4424
- return formatResult("Scrolled", `${direction} ${scrollAmount}px`, result);
4753
+ return formatResult("Scrolled", `${direction} ${scrollAmount}px`, result, preCheckWarnings);
4425
4754
  });
4426
4755
  // ── wait_for_state ──
4427
4756
  server.tool("wait_for_state", "Wait until a condition is met on screen: text appears, text disappears, or element becomes available. Polls at intervals using the fallback chain.", {
@@ -4751,6 +5080,8 @@ function getJobRunner() {
4751
5080
  const locCache = new LocatorCache();
4752
5081
  locCache.setLearningEngine(learningEngine);
4753
5082
  const runtimeService = new AutomationRuntimeService(adapter, logger, locCache);
5083
+ // Wire #15: connect AppMap to Executor for skip-verify optimization
5084
+ runtimeService.setAppMap(appMap);
4754
5085
  const playbookEngine = new PlaybookEngine(runtimeService);
4755
5086
  activePlaybookEngine = playbookEngine;
4756
5087
  // Wire CDP into playbook engine for browser_js / cdp_key_event steps
@@ -4943,6 +5274,7 @@ originalTool("plan_execute", "Run a plan automatically. Known steps (from playbo
4943
5274
  }
4944
5275
  const adaptiveBudget = learningEngine.getAdaptiveBudget(worldModel.getState().focusedApp?.bundleId ?? "unknown");
4945
5276
  const executor = new PlanExecutor(worldModel, planner, toolRegistry.toExecutor(), { postconditionWaitMs: adaptiveBudget.verifyMs, defaultStepTimeout: Math.max(30_000, adaptiveBudget.actMs * 2) }, recoveryEngine, learningEngine);
5277
+ executor.setAppMap(appMap);
4946
5278
  const result = await executor.executeGoal(goal);
4947
5279
  goalStore.update(goalId, goal);
4948
5280
  // Check if paused at an LLM step
@@ -5004,6 +5336,7 @@ originalTool("plan_step", "Execute the next single step of a goal. For increment
5004
5336
  }
5005
5337
  const adaptiveBudget = learningEngine.getAdaptiveBudget(worldModel.getState().focusedApp?.bundleId ?? "unknown");
5006
5338
  const executor = new PlanExecutor(worldModel, planner, toolRegistry.toExecutor(), { postconditionWaitMs: adaptiveBudget.verifyMs, defaultStepTimeout: Math.max(30_000, adaptiveBudget.actMs * 2) }, recoveryEngine, learningEngine);
5339
+ executor.setAppMap(appMap);
5007
5340
  const result = await executor.executeNextStep(goal);
5008
5341
  goalStore.update(goalId, goal);
5009
5342
  if ("paused" in result) {
@@ -5047,6 +5380,7 @@ originalTool("plan_step_resolve", "Resolve a paused LLM step by providing the to
5047
5380
  }
5048
5381
  const adaptiveBudget = learningEngine.getAdaptiveBudget(worldModel.getState().focusedApp?.bundleId ?? "unknown");
5049
5382
  const executor = new PlanExecutor(worldModel, planner, toolRegistry.toExecutor(), { postconditionWaitMs: adaptiveBudget.verifyMs, defaultStepTimeout: Math.max(30_000, adaptiveBudget.actMs * 2) }, recoveryEngine, learningEngine);
5383
+ executor.setAppMap(appMap);
5050
5384
  const result = await executor.resolveStep(goal, tool, params ?? {});
5051
5385
  goalStore.update(goalId, goal);
5052
5386
  return {
@@ -5287,6 +5621,10 @@ originalTool("perception_start", "Start continuous screen monitoring — ScreenH
5287
5621
  return { content: [{ type: "text", text: `Perception already running (started ${stats.startedAt}). Use perception_stop first to restart, or pass bundleId to switch target.` }] };
5288
5622
  }
5289
5623
  let app = worldModel.getState().focusedApp;
5624
+ // Validate bundleId format before it touches AppleScript/exec
5625
+ if (overrideBundleId && !/^[a-zA-Z0-9._-]+$/.test(overrideBundleId)) {
5626
+ return { content: [{ type: "text", text: "Error: Invalid bundleId format. Only alphanumeric characters, dots, hyphens, and underscores are allowed." }] };
5627
+ }
5290
5628
  // If bundleId override provided, try to resolve app info via bridge or AppleScript
5291
5629
  if (overrideBundleId && (!app || app.bundleId !== overrideBundleId)) {
5292
5630
  try {
@@ -5768,7 +6106,37 @@ server.tool("scan_menu_bar", "Scan an app's menu bar via AX tree. Extracts all m
5768
6106
  safePath = safePath.replace(/Log Out [^\n:]+/g, "Log Out [USER]");
5769
6107
  lines.push(` ${safePath}: ${keys}`);
5770
6108
  }
5771
- let output = lines.join("\n");
6109
+ // Wire #12: L6→L7 — bootstrap AppMap zones from menu scan
6110
+ let bootstrapInfo = "";
6111
+ if (appMap) {
6112
+ const bootstrapped = appMap.bootstrapFromMenuScan(bundleId, appName, result);
6113
+ // Clear hint unconditionally — the scan was attempted regardless of bootstrap outcome
6114
+ contextTracker.clearMenuScanHint();
6115
+ if (bootstrapped) {
6116
+ bootstrapInfo = `\nAppMap: bootstrapped zones from menu structure (new app)`;
6117
+ }
6118
+ }
6119
+ // Wire F8: Seed learning from menu scan shortcuts (L6→L5)
6120
+ // Use successCount=5 and score=0.6 so seeds pass recommend() thresholds
6121
+ // (minSamples=5 for locators, score > 0.5 for patterns)
6122
+ if (learningEngine && result.shortcuts) {
6123
+ for (const [menuPath, keys] of Object.entries(result.shortcuts)) {
6124
+ const key = LocatorPolicy.makeKey(bundleId, "key");
6125
+ learningEngine.locators.seedEntry({
6126
+ key, locator: keys, method: "ax",
6127
+ successCount: 5, failCount: 0, score: 0.6,
6128
+ lastUsed: new Date().toISOString(),
6129
+ });
6130
+ // Also seed as pattern: menu_click with the menu path
6131
+ learningEngine.patterns.seedEntry({
6132
+ key: `${bundleId}::menu_click::${menuPath}`,
6133
+ bundleId, tool: "menu_click", locator: menuPath,
6134
+ method: "ax", successCount: 3, failCount: 0, score: 0.6,
6135
+ lastSeen: new Date().toISOString(),
6136
+ });
6137
+ }
6138
+ }
6139
+ let output = lines.join("\n") + bootstrapInfo;
5772
6140
  output = redactUsername(output);
5773
6141
  output = output.replace(/Log Out [^\n:]+/g, "Log Out [USER]");
5774
6142
  return { content: [{ type: "text", text: output }] };
@@ -5813,6 +6181,24 @@ server.tool("ingest_documentation", "Parse a documentation page (HTML, markdown,
5813
6181
  lines.push(` - ${t}`);
5814
6182
  }
5815
6183
  }
6184
+ // Wire F8: Seed learning from ingested documentation flows (L6→L5)
6185
+ if (learningEngine && result.flows) {
6186
+ for (const flow of result.flows) {
6187
+ for (const step of flow.steps) {
6188
+ if (!step.tool)
6189
+ continue;
6190
+ const target = (step.params?.text ?? step.params?.title ?? step.params?.target ?? step.description);
6191
+ if (target) {
6192
+ learningEngine.patterns.seedEntry({
6193
+ key: `${bundleId}::${step.tool}::${target}`,
6194
+ bundleId, tool: step.tool, locator: String(target),
6195
+ method: "ax", successCount: 3, failCount: 0, score: 0.6,
6196
+ lastSeen: new Date().toISOString(),
6197
+ });
6198
+ }
6199
+ }
6200
+ }
6201
+ }
5816
6202
  return { content: [{ type: "text", text: lines.join("\n") }] };
5817
6203
  });
5818
6204
  server.tool("ingest_tutorial", "Extract structured playbook steps from a video transcript (e.g. YouTube captions). Converts tutorial narration into actionable automation steps with tool mappings.", {
@@ -5937,6 +6323,14 @@ originalTool("community_fetch", "Search community playbooks for a platform or wo
5937
6323
  lines.push(` Score: ${pb.ratings.score} | By: ${pb.metadata.author}`);
5938
6324
  lines.push("");
5939
6325
  }
6326
+ // Wire F9: Import community playbooks into AppMap (L6→L7)
6327
+ if (appMap) {
6328
+ for (const pb of results) {
6329
+ if (pb.bundleId && pb.steps.length > 0) {
6330
+ appMap.importFromPlaybook(pb.bundleId, pb.name, pb.steps);
6331
+ }
6332
+ }
6333
+ }
5940
6334
  return { content: [{ type: "text", text: lines.join("\n") }] };
5941
6335
  });
5942
6336
  // ═══════════════════════════════════════════════