screenhand 0.3.2 → 0.3.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -2
- package/dist/mcp-desktop.js +490 -96
- package/dist/src/community/fetcher.js +32 -2
- package/dist/src/community/validator.js +15 -1
- package/dist/src/context-tracker.js +115 -43
- package/dist/src/ingestion/reference-merger.js +3 -1
- package/dist/src/learning/engine.js +225 -7
- package/dist/src/learning/locator-policy.js +16 -0
- package/dist/src/learning/pattern-policy.js +9 -0
- package/dist/src/learning/recovery-policy.js +16 -0
- package/dist/src/learning/sensor-policy.js +9 -0
- package/dist/src/learning/timing-model.js +62 -0
- package/dist/src/memory/research.js +7 -1
- package/dist/src/memory/store.js +18 -7
- package/dist/src/perception/coordinator.js +304 -4
- package/dist/src/perception/manager.js +13 -0
- package/dist/src/perception/vision-source.js +14 -4
- package/dist/src/planner/executor.js +125 -2
- package/dist/src/planner/planner.js +509 -10
- package/dist/src/playbook/engine.js +10 -0
- package/dist/src/recovery/engine.js +50 -3
- package/dist/src/runtime/execution-contract.js +67 -5
- package/dist/src/runtime/executor.js +41 -1
- package/dist/src/runtime/service.js +7 -0
- package/dist/src/state/app-map.js +307 -17
- package/dist/src/util/atomic-write.js +25 -4
- package/dist-references/reddit.json +2 -2
- package/package.json +1 -1
package/dist/mcp-desktop.js
CHANGED
|
@@ -55,7 +55,7 @@ import { WorldModel } from "./src/state/index.js";
|
|
|
55
55
|
import { PerceptionManager } from "./src/perception/index.js";
|
|
56
56
|
import { Planner, PlanExecutor, GoalStore, ToolRegistry } from "./src/planner/index.js";
|
|
57
57
|
import { RecoveryEngine } from "./src/recovery/index.js";
|
|
58
|
-
import { LearningEngine } from "./src/learning/index.js";
|
|
58
|
+
import { LearningEngine, LocatorPolicy } from "./src/learning/index.js";
|
|
59
59
|
import { discoverWebElements, testWebElement, compileReference, saveExploreResult, discoverNativeElements } from "./src/platform/explorer.js";
|
|
60
60
|
import { buildDocUrls, crawlPage, compileLearnResult, saveLearnResult } from "./src/platform/learner.js";
|
|
61
61
|
import { AccessibilityAdapter } from "./src/runtime/accessibility-adapter.js";
|
|
@@ -234,6 +234,10 @@ let CDP = null;
|
|
|
234
234
|
async function ensureCDP(overridePort) {
|
|
235
235
|
if (!CDP)
|
|
236
236
|
CDP = (await import("chrome-remote-interface")).default;
|
|
237
|
+
// Validate port range (defense in depth — Zod validates at MCP boundary, this catches internal callers)
|
|
238
|
+
if (overridePort && (overridePort < 9222 || overridePort > 9999)) {
|
|
239
|
+
throw new Error(`Invalid CDP port ${overridePort} — must be 9222-9999`);
|
|
240
|
+
}
|
|
237
241
|
// If caller specified a port, use it directly (e.g. 9333 for Electron apps)
|
|
238
242
|
if (overridePort) {
|
|
239
243
|
try {
|
|
@@ -411,6 +415,17 @@ let lastSuccessfulToolName = "unknown";
|
|
|
411
415
|
let lastKnownBundleId = null;
|
|
412
416
|
contextTracker.setAppMap(appMap);
|
|
413
417
|
perceptionManager.setAppMap(appMap);
|
|
418
|
+
// Wire F10: connect ContextTracker to PerceptionCoordinator for per-app perception config
|
|
419
|
+
perceptionManager.setContextTracker(contextTracker);
|
|
420
|
+
// Wire #11: connect TopologyPolicy to AppMap for unified edge scoring
|
|
421
|
+
appMap.setTopologyPolicy(learningEngine.topology);
|
|
422
|
+
// Wire #14: seed TimingModel from AppMap's stored timing profiles (cold-start bootstrap)
|
|
423
|
+
learningEngine.seedTimingFromAppMap(appMap);
|
|
424
|
+
// Wire F5-F7: Cold-start bootstrap — seed all learning policies from AppMap data
|
|
425
|
+
learningEngine.seedLocatorsFromAppMap(appMap);
|
|
426
|
+
learningEngine.seedSensorsFromReadySignals(appMap);
|
|
427
|
+
learningEngine.seedPatternsFromAppMap(appMap);
|
|
428
|
+
learningEngine.seedRecoveryFromContracts(appMap);
|
|
414
429
|
const _executablePlaybookStore = new PlaybookStore(playbooksDir);
|
|
415
430
|
try {
|
|
416
431
|
_executablePlaybookStore.load();
|
|
@@ -422,7 +437,9 @@ goalStore.init();
|
|
|
422
437
|
const toolRegistry = new ToolRegistry();
|
|
423
438
|
const recoveryEngine = new RecoveryEngine(worldModel, toolRegistry.toExecutor(), memory);
|
|
424
439
|
recoveryEngine.setLearningEngine(learningEngine);
|
|
440
|
+
recoveryEngine.setAppMap(appMap);
|
|
425
441
|
planner.setToolRegistry(toolRegistry);
|
|
442
|
+
planner.setAppMap(appMap);
|
|
426
443
|
perceptionManager.setLearningEngine(learningEngine);
|
|
427
444
|
const mcpRecorder = new McpPlaybookRecorder(playbooksDir);
|
|
428
445
|
const referenceMerger = new ReferenceMerger(referencesDir);
|
|
@@ -513,6 +530,20 @@ server.tool = (...args) => {
|
|
|
513
530
|
perceptionManager.notifyToolCall();
|
|
514
531
|
// ── PRE-CALL: check for known error warnings (~0ms, in-memory) ──
|
|
515
532
|
const knownError = memory.quickErrorCheck(toolName);
|
|
533
|
+
// Wire F11: Block execution for tools that fail repeatedly with known resolution (L2→L1)
|
|
534
|
+
// Exclude playbook-seeded errors (id starts with pb_err_) — those are generic platform warnings,
|
|
535
|
+
// not errors observed in this session. Only block on real runtime failures.
|
|
536
|
+
// Also exclude errors injected via memory_record_error API (empty params) — only runtime errors
|
|
537
|
+
// from the intelligence wrapper (which always have populated params) should trigger blocks.
|
|
538
|
+
const isRuntimeError = knownError && typeof knownError.params === "object" && knownError.params !== null && Object.keys(knownError.params).length > 0;
|
|
539
|
+
if (knownError && knownError.occurrences >= 5 && knownError.resolution && !knownError.id.startsWith("pb_err_") && isRuntimeError) {
|
|
540
|
+
return {
|
|
541
|
+
content: [{
|
|
542
|
+
type: "text",
|
|
543
|
+
text: `⛔ Blocked: "${toolName}" has failed ${knownError.occurrences}x with: "${knownError.error}". Known fix: ${knownError.resolution}. Apply the fix first, then retry.`,
|
|
544
|
+
}],
|
|
545
|
+
};
|
|
546
|
+
}
|
|
516
547
|
// ── PRE-CALL: auto-start perception if not running ──
|
|
517
548
|
if (!perceptionManager.isRunning && bridgeReady) {
|
|
518
549
|
const focusApp = worldModel.getState().focusedApp;
|
|
@@ -548,6 +579,9 @@ server.tool = (...args) => {
|
|
|
548
579
|
else if (typeof paramBundleId === "string" && paramBundleId) {
|
|
549
580
|
lastKnownBundleId = paramBundleId;
|
|
550
581
|
}
|
|
582
|
+
// Snapshot the bundleId for this tool's POST-CALL, so concurrent PRE-CALL
|
|
583
|
+
// overwrites of lastKnownBundleId don't contaminate this tool's context
|
|
584
|
+
const postCallBundleId = preBundleId ?? lastKnownBundleId;
|
|
551
585
|
// Capture pre-call window title for navigation edge tracking
|
|
552
586
|
const preWindowTitle = worldModel.getFocusedWindow()?.title.value ?? null;
|
|
553
587
|
// Action tools = actually doing something. Navigation = just clicking around.
|
|
@@ -578,7 +612,7 @@ server.tool = (...args) => {
|
|
|
578
612
|
contextTracker.recordOutcome(toolName, safeParams, true, null);
|
|
579
613
|
// ── POST-CALL: Safari context gap + page context update ──
|
|
580
614
|
const postFocusApp = worldModel.getState().focusedApp;
|
|
581
|
-
const postBundleIdForCtx = postFocusApp?.bundleId ??
|
|
615
|
+
const postBundleIdForCtx = postFocusApp?.bundleId ?? postCallBundleId;
|
|
582
616
|
if (postBundleIdForCtx) {
|
|
583
617
|
lastKnownBundleId = postBundleIdForCtx;
|
|
584
618
|
// Try focused window first, then search all windows for matching bundleId
|
|
@@ -622,7 +656,7 @@ server.tool = (...args) => {
|
|
|
622
656
|
}
|
|
623
657
|
}
|
|
624
658
|
// ── POST-CALL: feed learning engine (timing + locator outcomes) ──
|
|
625
|
-
const learnBundleId = worldModel.getState().focusedApp?.bundleId ??
|
|
659
|
+
const learnBundleId = worldModel.getState().focusedApp?.bundleId ?? postCallBundleId ?? "unknown";
|
|
626
660
|
learningEngine.recordToolTiming({ tool: toolName, bundleId: learnBundleId, durationMs, success: true });
|
|
627
661
|
// Record locator outcome if the tool used a target/selector
|
|
628
662
|
const locatorTarget = safeParams.target ?? safeParams.selector ?? safeParams.locator
|
|
@@ -901,14 +935,17 @@ server.tool = (...args) => {
|
|
|
901
935
|
if (fromNode !== toNode) {
|
|
902
936
|
appMap.addNavNode(learnBundleId, fromNode, { type: "window", description: fromNode });
|
|
903
937
|
appMap.addNavNode(learnBundleId, toNode, { type: "window", description: toNode });
|
|
904
|
-
|
|
938
|
+
const locatorSlug = locatorTarget ? String(locatorTarget).slice(0, 80) : null;
|
|
939
|
+
const edgeAction = locatorSlug ? `${toolName}:${locatorSlug}` : toolName;
|
|
940
|
+
// Wire #11: record topology FIRST so AppMap can read the updated Bayesian score
|
|
905
941
|
learningEngine.recordTopologyOutcome({
|
|
906
942
|
bundleId: learnBundleId,
|
|
907
943
|
fromNode,
|
|
908
|
-
action:
|
|
944
|
+
action: edgeAction,
|
|
909
945
|
toNode,
|
|
910
946
|
success: true,
|
|
911
947
|
});
|
|
948
|
+
appMap.recordEdgeOutcome(learnBundleId, fromNode, edgeAction, toNode, true);
|
|
912
949
|
}
|
|
913
950
|
}
|
|
914
951
|
// ── State machine: detect state changes from tool results ──
|
|
@@ -1266,7 +1303,7 @@ server.tool = (...args) => {
|
|
|
1266
1303
|
// ── Record failure for playbook learning (in-memory only) ──
|
|
1267
1304
|
contextTracker.recordOutcome(toolName, safeParams, false, errorMsg);
|
|
1268
1305
|
// ── Feed learning engine (failure timing + locator) ──
|
|
1269
|
-
const learnBundleIdErr = worldModel.getState().focusedApp?.bundleId ??
|
|
1306
|
+
const learnBundleIdErr = worldModel.getState().focusedApp?.bundleId ?? postCallBundleId ?? "unknown";
|
|
1270
1307
|
learningEngine.recordToolTiming({ tool: toolName, bundleId: learnBundleIdErr, durationMs, success: false });
|
|
1271
1308
|
const failedLocator = safeParams.target ?? safeParams.selector ?? safeParams.locator
|
|
1272
1309
|
?? (toolName === "click_text" ? safeParams.text : undefined);
|
|
@@ -1422,7 +1459,7 @@ server.tool("windows", "List all visible windows with IDs, positions, and sizes"
|
|
|
1422
1459
|
return { content: [{ type: "text", text: lines.join("\n") }] };
|
|
1423
1460
|
});
|
|
1424
1461
|
server.tool("focus", "Focus/activate an application (or a specific window by windowId)", {
|
|
1425
|
-
bundleId: z.string().describe("App bundle ID, e.g. com.apple.Safari"),
|
|
1462
|
+
bundleId: z.string().regex(/^[a-zA-Z0-9._-]+$/, "Invalid bundleId format").describe("App bundle ID, e.g. com.apple.Safari"),
|
|
1426
1463
|
windowId: z.number().optional().describe("Specific window ID from windows() — raises that exact window. Use when multiple instances of the same app exist."),
|
|
1427
1464
|
}, async ({ bundleId, windowId }) => {
|
|
1428
1465
|
await ensureBridge();
|
|
@@ -1528,8 +1565,8 @@ server.tool("focus", "Focus/activate an application (or a specific window by win
|
|
|
1528
1565
|
}
|
|
1529
1566
|
});
|
|
1530
1567
|
server.tool("launch", "Launch an application. Chrome/Chromium browsers are launched with CDP enabled (port 9222) for browser_* tools.", {
|
|
1531
|
-
bundleId: z.string().describe("App bundle ID"),
|
|
1532
|
-
cdpPort: z.number().optional().describe("CDP port for Chrome/Chromium (default: 9222). Ignored for non-browser apps."),
|
|
1568
|
+
bundleId: z.string().regex(/^[a-zA-Z0-9._-]+$/, "Invalid bundleId format").describe("App bundle ID"),
|
|
1569
|
+
cdpPort: z.number().min(9222).max(9999).optional().describe("CDP port for Chrome/Chromium (default: 9222). Ignored for non-browser apps."),
|
|
1533
1570
|
}, async ({ bundleId, cdpPort }) => {
|
|
1534
1571
|
await ensureBridge();
|
|
1535
1572
|
const riskyBundleIds = {
|
|
@@ -1930,7 +1967,7 @@ server.tool("click_text", "SLOW fallback: Find text on screen via OCR and click
|
|
|
1930
1967
|
server.tool("type_text", "Type text using the keyboard. Auto-detects Electron apps and routes through CDP for reliable editor input.", {
|
|
1931
1968
|
text: z.string().describe("Text to type"),
|
|
1932
1969
|
pid: z.number().optional().describe("Target process ID for PID-targeted event delivery"),
|
|
1933
|
-
cdpPort: z.number().optional().describe("CDP port for Electron apps (e.g. 9229). When set, types via CDP instead of AX — fixes Copilot/panel focus theft."),
|
|
1970
|
+
cdpPort: z.number().min(9222).max(9999).optional().describe("CDP port for Electron apps (e.g. 9229). When set, types via CDP instead of AX — fixes Copilot/panel focus theft."),
|
|
1934
1971
|
}, async ({ text, pid, cdpPort: portOverride }) => {
|
|
1935
1972
|
await ensureBridge();
|
|
1936
1973
|
// Auto-resolve frontmost PID when none provided — global HID posting
|
|
@@ -2178,7 +2215,7 @@ function randomDelay(min, max) {
|
|
|
2178
2215
|
// BROWSER — control Chrome pages via CDP (10ms, not OCR)
|
|
2179
2216
|
// ═══════════════════════════════════════════════
|
|
2180
2217
|
server.tool("browser_tabs", "List all open Chrome/Electron tabs. Use cdpPort to connect to a specific app (e.g. 9333 for Codex Desktop).", {
|
|
2181
|
-
cdpPort: z.number().optional().describe("CDP port override (e.g. 9333 for Electron apps). Omit to auto-detect."),
|
|
2218
|
+
cdpPort: z.number().min(9222).max(9999).optional().describe("CDP port override (e.g. 9333 for Electron apps). Omit to auto-detect."),
|
|
2182
2219
|
}, async ({ cdpPort: portOverride }) => {
|
|
2183
2220
|
const { CDP: cdp, port } = await ensureCDP(portOverride);
|
|
2184
2221
|
const targets = await cdp.List({ port });
|
|
@@ -2188,7 +2225,7 @@ server.tool("browser_tabs", "List all open Chrome/Electron tabs. Use cdpPort to
|
|
|
2188
2225
|
});
|
|
2189
2226
|
server.tool("browser_open", "Open a URL in Chrome/Electron (creates new tab)", {
|
|
2190
2227
|
url: z.string().describe("URL to open"),
|
|
2191
|
-
cdpPort: z.number().optional().describe("CDP port override (e.g. 9333 for Electron apps)"),
|
|
2228
|
+
cdpPort: z.number().min(9222).max(9999).optional().describe("CDP port override (e.g. 9333 for Electron apps)"),
|
|
2192
2229
|
}, async ({ url, cdpPort: portOverride }) => {
|
|
2193
2230
|
// L2-71 fix: Block dangerous URL protocols
|
|
2194
2231
|
const BLOCKED_PROTOCOLS = ["javascript:", "data:", "blob:", "vbscript:"];
|
|
@@ -2212,7 +2249,7 @@ server.tool("browser_open", "Open a URL in Chrome/Electron (creates new tab)", {
|
|
|
2212
2249
|
server.tool("browser_navigate", "Navigate the active Chrome/Electron tab to a URL", {
|
|
2213
2250
|
url: z.string().describe("URL to navigate to"),
|
|
2214
2251
|
tabId: z.string().optional().describe("Tab ID (from browser_tabs). Omit for most recent tab."),
|
|
2215
|
-
cdpPort: z.number().optional().describe("CDP port override (e.g. 9333 for Electron apps)"),
|
|
2252
|
+
cdpPort: z.number().min(9222).max(9999).optional().describe("CDP port override (e.g. 9333 for Electron apps)"),
|
|
2216
2253
|
}, async ({ url, tabId, cdpPort: portOverride }) => {
|
|
2217
2254
|
// L2-71 fix: Block dangerous URL protocols that could execute arbitrary code
|
|
2218
2255
|
const BLOCKED_PROTOCOLS = ["javascript:", "data:", "blob:", "vbscript:"];
|
|
@@ -2257,7 +2294,7 @@ server.tool("browser_navigate", "Navigate the active Chrome/Electron tab to a UR
|
|
|
2257
2294
|
server.tool("browser_js", "Execute JavaScript in a Chrome/Electron tab. Returns the result. WARNING: This runs arbitrary JS in the browser context — avoid on sensitive pages (banking, email). All executions are audit-logged.", {
|
|
2258
2295
|
code: z.string().describe("JavaScript to execute. Must be an expression that returns a value. Use (() => { ... })() for multi-line."),
|
|
2259
2296
|
tabId: z.string().optional().describe("Tab ID. Omit for most recent tab."),
|
|
2260
|
-
cdpPort: z.number().optional().describe("CDP port override (e.g. 9333 for Electron apps)"),
|
|
2297
|
+
cdpPort: z.number().min(9222).max(9999).optional().describe("CDP port override (e.g. 9333 for Electron apps)"),
|
|
2261
2298
|
}, async ({ code, tabId, cdpPort: portOverride }) => {
|
|
2262
2299
|
auditLog("browser_js", { code, tabId });
|
|
2263
2300
|
const { CDP: cdp, port } = await ensureCDP(portOverride);
|
|
@@ -2291,7 +2328,7 @@ server.tool("browser_dom", "Query the DOM of a Chrome/Electron page. Returns mat
|
|
|
2291
2328
|
selector: z.string().describe("CSS selector, e.g. 'button', '.nav a', '#main h2'"),
|
|
2292
2329
|
tabId: z.string().optional().describe("Tab ID. Omit for most recent tab."),
|
|
2293
2330
|
limit: z.number().optional().describe("Max results (default 20)"),
|
|
2294
|
-
cdpPort: z.number().optional().describe("CDP port override (e.g. 9333 for Electron apps)"),
|
|
2331
|
+
cdpPort: z.number().min(9222).max(9999).optional().describe("CDP port override (e.g. 9333 for Electron apps)"),
|
|
2295
2332
|
}, async ({ selector, tabId, limit, cdpPort: portOverride }) => {
|
|
2296
2333
|
// Capture bundleId before any async CDP calls to avoid race condition
|
|
2297
2334
|
const browserBundleId = worldModel.getState().focusedApp?.bundleId ?? "com.google.Chrome";
|
|
@@ -2342,7 +2379,7 @@ server.tool("browser_dom", "Query the DOM of a Chrome/Electron page. Returns mat
|
|
|
2342
2379
|
server.tool("browser_click", "Click an element in Chrome/Electron by CSS selector. Uses CDP Input.dispatchMouseEvent for realistic mouse events.", {
|
|
2343
2380
|
selector: z.string().describe("CSS selector of element to click"),
|
|
2344
2381
|
tabId: z.string().optional().describe("Tab ID. Omit for most recent tab."),
|
|
2345
|
-
cdpPort: z.number().optional().describe("CDP port override (e.g. 9333 for Electron apps)"),
|
|
2382
|
+
cdpPort: z.number().min(9222).max(9999).optional().describe("CDP port override (e.g. 9333 for Electron apps)"),
|
|
2346
2383
|
}, async ({ selector, tabId, cdpPort: portOverride }) => {
|
|
2347
2384
|
const { client } = await getCDPClient(tabId, portOverride);
|
|
2348
2385
|
await client.Runtime.enable();
|
|
@@ -2375,7 +2412,7 @@ server.tool("browser_type", "Type into an input field in Chrome/Electron. Uses C
|
|
|
2375
2412
|
text: z.string().describe("Text to type"),
|
|
2376
2413
|
clear: z.boolean().optional().describe("Clear field first (default true)"),
|
|
2377
2414
|
tabId: z.string().optional().describe("Tab ID"),
|
|
2378
|
-
cdpPort: z.number().optional().describe("CDP port override (e.g. 9333 for Electron apps)"),
|
|
2415
|
+
cdpPort: z.number().min(9222).max(9999).optional().describe("CDP port override (e.g. 9333 for Electron apps)"),
|
|
2379
2416
|
}, async ({ selector, text, clear, tabId, cdpPort: portOverride }) => {
|
|
2380
2417
|
const { client } = await getCDPClient(tabId, portOverride);
|
|
2381
2418
|
await client.Runtime.enable();
|
|
@@ -2416,7 +2453,7 @@ server.tool("browser_wait", "Wait for a condition on a Chrome/Electron page", {
|
|
|
2416
2453
|
condition: z.string().describe("JS expression that returns truthy when ready. e.g. 'document.querySelector(\".loaded\")'"),
|
|
2417
2454
|
timeoutMs: z.number().optional().describe("Timeout in ms (default 10000)"),
|
|
2418
2455
|
tabId: z.string().optional().describe("Tab ID"),
|
|
2419
|
-
cdpPort: z.number().optional().describe("CDP port override (e.g. 9333 for Electron apps)"),
|
|
2456
|
+
cdpPort: z.number().min(9222).max(9999).optional().describe("CDP port override (e.g. 9333 for Electron apps)"),
|
|
2420
2457
|
}, async ({ condition, timeoutMs, tabId, cdpPort: portOverride }) => {
|
|
2421
2458
|
const { CDP: cdp, port } = await ensureCDP(portOverride);
|
|
2422
2459
|
let targetId = tabId;
|
|
@@ -2444,7 +2481,7 @@ server.tool("browser_wait", "Wait for a condition on a Chrome/Electron page", {
|
|
|
2444
2481
|
});
|
|
2445
2482
|
server.tool("browser_page_info", "Get current page title, URL, and text content summary", {
|
|
2446
2483
|
tabId: z.string().optional().describe("Tab ID"),
|
|
2447
|
-
cdpPort: z.number().optional().describe("CDP port override (e.g. 9333 for Electron apps)"),
|
|
2484
|
+
cdpPort: z.number().min(9222).max(9999).optional().describe("CDP port override (e.g. 9333 for Electron apps)"),
|
|
2448
2485
|
}, async ({ tabId, cdpPort: portOverride }) => {
|
|
2449
2486
|
// Capture bundleId BEFORE CDP call to prevent focus-change race
|
|
2450
2487
|
const browserBundleId = worldModel.getState().focusedApp?.bundleId ?? "com.google.Chrome";
|
|
@@ -2519,7 +2556,7 @@ if (origQuery) {
|
|
|
2519
2556
|
`;
|
|
2520
2557
|
server.tool("browser_stealth", "Inject anti-detection patches into Chrome/Electron page. Call once after navigating to a protected site. Hides webdriver flag, patches plugins/languages/permissions.", {
|
|
2521
2558
|
tabId: z.string().optional().describe("Tab ID. Omit for most recent tab."),
|
|
2522
|
-
cdpPort: z.number().optional().describe("CDP port override (e.g. 9333 for Electron apps)"),
|
|
2559
|
+
cdpPort: z.number().min(9222).max(9999).optional().describe("CDP port override (e.g. 9333 for Electron apps)"),
|
|
2523
2560
|
}, async ({ tabId, cdpPort: portOverride }) => {
|
|
2524
2561
|
const { client } = await getCDPClient(tabId, portOverride);
|
|
2525
2562
|
await client.Page.enable();
|
|
@@ -2539,7 +2576,7 @@ server.tool("browser_fill_form", "Fill a form field with human-like typing (anti
|
|
|
2539
2576
|
clear: z.boolean().optional().describe("Clear field first (default true)"),
|
|
2540
2577
|
delayMs: z.number().optional().describe("Avg delay between keystrokes in ms (default 50)"),
|
|
2541
2578
|
tabId: z.string().optional().describe("Tab ID"),
|
|
2542
|
-
cdpPort: z.number().optional().describe("CDP port override (e.g. 9333 for Electron apps)"),
|
|
2579
|
+
cdpPort: z.number().min(9222).max(9999).optional().describe("CDP port override (e.g. 9333 for Electron apps)"),
|
|
2543
2580
|
}, async ({ selector, text, clear, delayMs, tabId, cdpPort: portOverride }) => {
|
|
2544
2581
|
const { client } = await getCDPClient(tabId, portOverride);
|
|
2545
2582
|
await client.Runtime.enable();
|
|
@@ -2583,7 +2620,7 @@ server.tool("browser_fill_form", "Fill a form field with human-like typing (anti
|
|
|
2583
2620
|
server.tool("browser_human_click", "Alias for browser_click — both use realistic mouseMoved → mousePressed → mouseReleased events. Prefer browser_click directly.", {
|
|
2584
2621
|
selector: z.string().describe("CSS selector of element to click"),
|
|
2585
2622
|
tabId: z.string().optional().describe("Tab ID. Omit for most recent tab."),
|
|
2586
|
-
cdpPort: z.number().optional().describe("CDP port override (e.g. 9333 for Electron apps)"),
|
|
2623
|
+
cdpPort: z.number().min(9222).max(9999).optional().describe("CDP port override (e.g. 9333 for Electron apps)"),
|
|
2587
2624
|
}, async ({ selector, tabId, cdpPort: portOverride }) => {
|
|
2588
2625
|
const { client } = await getCDPClient(tabId, portOverride);
|
|
2589
2626
|
await client.Runtime.enable();
|
|
@@ -2991,7 +3028,7 @@ server.tool("playbook_record", "Macro recorder: start recording, do the flow, st
|
|
|
2991
3028
|
platform: z.string().optional().describe("Platform name (required for start)"),
|
|
2992
3029
|
name: z.string().optional().describe("Playbook name (required for stop)"),
|
|
2993
3030
|
description: z.string().optional().describe("Playbook description (for stop)"),
|
|
2994
|
-
cdpPort: z.number().optional().describe("CDP port if needed for browser_js steps (e.g. 9333 for Codex)"),
|
|
3031
|
+
cdpPort: z.number().min(9222).max(9999).optional().describe("CDP port if needed for browser_js steps (e.g. 9333 for Codex)"),
|
|
2995
3032
|
}, async ({ action, platform, name, description, cdpPort }) => {
|
|
2996
3033
|
switch (action) {
|
|
2997
3034
|
case "start": {
|
|
@@ -3162,6 +3199,27 @@ server.tool("applescript", "Run an AppleScript command. For controlling Finder,
|
|
|
3162
3199
|
if (process.platform === "win32") {
|
|
3163
3200
|
return { content: [{ type: "text", text: "AppleScript is not supported on Windows. Use ui_tree, ui_press, and other accessibility tools instead." }] };
|
|
3164
3201
|
}
|
|
3202
|
+
// Block shell execution vectors in AppleScript — allowlist approach for safety-critical commands
|
|
3203
|
+
const scriptLower = script.toLowerCase();
|
|
3204
|
+
const BLOCKED_PATTERNS = [
|
|
3205
|
+
/do\s+shell\s+script/i, // direct shell execution
|
|
3206
|
+
/run\s+shell\s+script/i, // variant
|
|
3207
|
+
/run\s+script/i, // dynamic AppleScript eval (can construct blocked commands)
|
|
3208
|
+
/do\s+script/i, // Terminal.app shell execution
|
|
3209
|
+
/«class\s/i, // raw Apple Event codes (bypass text-level blocks)
|
|
3210
|
+
/system\s+events.*process/i, // process spawning via System Events
|
|
3211
|
+
/NSAppleScript/i, // Objective-C bridge
|
|
3212
|
+
/ObjC\.import/i, // JXA Objective-C bridge
|
|
3213
|
+
/\bshell\b/i, // catch-all for shell-related commands
|
|
3214
|
+
/do\s+JavaScript/i, // JXA execution
|
|
3215
|
+
];
|
|
3216
|
+
if (BLOCKED_PATTERNS.some(p => p.test(script))) {
|
|
3217
|
+
return { content: [{ type: "text", text: "Blocked: this AppleScript contains a restricted command (shell execution, dynamic eval, or process spawning). Use the Bash tool for shell commands." }] };
|
|
3218
|
+
}
|
|
3219
|
+
// Block string concatenation that could reassemble blocked commands
|
|
3220
|
+
if (/&/.test(script) && (/script/i.test(script) || /shell/i.test(script))) {
|
|
3221
|
+
return { content: [{ type: "text", text: "Blocked: AppleScript with string concatenation containing 'script' or 'shell' — potential bypass attempt." }] };
|
|
3222
|
+
}
|
|
3165
3223
|
try {
|
|
3166
3224
|
const result = execSync(`osascript -e '${script.replace(/'/g, "'\\''")}'`, {
|
|
3167
3225
|
encoding: "utf-8",
|
|
@@ -3722,7 +3780,7 @@ import { METHOD_CAPABILITIES, DEFAULT_RETRY_POLICY, planExecution, executeWithFa
|
|
|
3722
3780
|
server.tool("execution_plan", "Show the execution plan for an action type. Returns the ordered fallback chain based on available infrastructure.", {
|
|
3723
3781
|
action: z.enum(["click", "type", "read", "locate", "select", "scroll"]).describe("Action type"),
|
|
3724
3782
|
}, async ({ action }) => {
|
|
3725
|
-
const plan = planExecution(action, { hasBridge: true, hasCDP: cdpPort !== null });
|
|
3783
|
+
const plan = planExecution(action, { hasBridge: true, hasCDP: cdpPort !== null }, getSensorRanking());
|
|
3726
3784
|
const lines = plan.map((method, i) => {
|
|
3727
3785
|
const cap = METHOD_CAPABILITIES[method];
|
|
3728
3786
|
return `${i + 1}. ${method} (~${cap.avgLatencyMs}ms)${i === 0 ? " ← primary" : ""}`;
|
|
@@ -3764,26 +3822,161 @@ function infra() {
|
|
|
3764
3822
|
return { hasBridge: true, hasCDP: cdpPort !== null };
|
|
3765
3823
|
}
|
|
3766
3824
|
/**
|
|
3767
|
-
* Get
|
|
3768
|
-
*
|
|
3825
|
+
* Get sensor rankings for the current app from the learning engine.
|
|
3826
|
+
* Used by planExecution() to reorder fallback methods based on learned success rates.
|
|
3827
|
+
* Returns undefined if no bundleId is known (falls back to canonical order).
|
|
3828
|
+
*/
|
|
3829
|
+
function getSensorRanking(overrideBundleId) {
|
|
3830
|
+
// Use override bundleId when provided (from tool params), else worldModel, else lastKnown
|
|
3831
|
+
const bundleId = overrideBundleId ?? worldModel.getState().focusedApp?.bundleId ?? lastKnownBundleId;
|
|
3832
|
+
if (!bundleId)
|
|
3833
|
+
return undefined;
|
|
3834
|
+
const ranked = learningEngine.rankSensors(bundleId);
|
|
3835
|
+
return ranked.length > 0 ? ranked : undefined;
|
|
3836
|
+
}
|
|
3837
|
+
/**
|
|
3838
|
+
* Get a retry policy adapted by the learning engine's adaptive budgets
|
|
3839
|
+
* AND the AppMap's timing profiles (L7→L1).
|
|
3840
|
+
*
|
|
3841
|
+
* Priority: AppMap timing > Learning budget > Default
|
|
3842
|
+
* AppMap stores per-tool/per-action avg durations from real executions.
|
|
3843
|
+
* Learning budget stores per-app adaptive budgets from outcome stats.
|
|
3769
3844
|
*/
|
|
3770
|
-
function getAdaptedRetryPolicy() {
|
|
3771
|
-
|
|
3845
|
+
function getAdaptedRetryPolicy(toolName, overrideBundleId) {
|
|
3846
|
+
let typicalMs = null;
|
|
3847
|
+
// L7→L1: Check AppMap timing profiles for the action type.
|
|
3848
|
+
// Timing keys are stored as "click::Submit", "click_text::Login", etc.
|
|
3849
|
+
// Fallback tools pass "click_with_fallback" — extract the action prefix to match.
|
|
3850
|
+
const bundleId = overrideBundleId ?? worldModel.getState().focusedApp?.bundleId ?? lastKnownBundleId;
|
|
3851
|
+
if (bundleId && toolName) {
|
|
3852
|
+
const actionPrefix = toolName.replace(/_with_fallback$/, "");
|
|
3853
|
+
// Get all timing profiles for this app, then filter by action prefix
|
|
3854
|
+
const allTimings = appMap.getTimingProfile(bundleId);
|
|
3855
|
+
const matchingTimings = allTimings.filter((t) => t.key.startsWith(actionPrefix + "::") || t.key === actionPrefix);
|
|
3856
|
+
if (matchingTimings.length > 0) {
|
|
3857
|
+
// Use element_response type if available, compute median avgMs across all matching entries
|
|
3858
|
+
const responseTimes = matchingTimings
|
|
3859
|
+
.filter((t) => t.type === "element_response")
|
|
3860
|
+
.map((t) => t.avgMs);
|
|
3861
|
+
if (responseTimes.length > 0) {
|
|
3862
|
+
responseTimes.sort((a, b) => a - b);
|
|
3863
|
+
const mid = Math.floor(responseTimes.length / 2);
|
|
3864
|
+
typicalMs = responseTimes.length % 2 === 1
|
|
3865
|
+
? responseTimes[mid]
|
|
3866
|
+
: (responseTimes[mid - 1] + responseTimes[mid]) / 2;
|
|
3867
|
+
}
|
|
3868
|
+
else {
|
|
3869
|
+
typicalMs = matchingTimings[0].avgMs;
|
|
3870
|
+
}
|
|
3871
|
+
}
|
|
3872
|
+
}
|
|
3873
|
+
// Fall back to L5 adaptive budget
|
|
3874
|
+
if (typicalMs == null && currentAdaptiveBudget) {
|
|
3875
|
+
typicalMs = Math.max(currentAdaptiveBudget.locateMs, currentAdaptiveBudget.actMs);
|
|
3876
|
+
}
|
|
3877
|
+
if (typicalMs == null)
|
|
3772
3878
|
return DEFAULT_RETRY_POLICY;
|
|
3773
|
-
// Use the max of locate+act as a guide for retry delay — faster apps need shorter delays
|
|
3774
|
-
const typicalMs = Math.max(currentAdaptiveBudget.locateMs, currentAdaptiveBudget.actMs);
|
|
3775
3879
|
// Retry delay = max(100ms, typical * 1.5), capped at the default
|
|
3776
3880
|
const adaptedDelay = Math.min(DEFAULT_RETRY_POLICY.delayBetweenRetriesMs, Math.max(100, Math.ceil(typicalMs * 1.5)));
|
|
3777
3881
|
if (adaptedDelay === DEFAULT_RETRY_POLICY.delayBetweenRetriesMs)
|
|
3778
3882
|
return DEFAULT_RETRY_POLICY;
|
|
3779
3883
|
return { ...DEFAULT_RETRY_POLICY, delayBetweenRetriesMs: adaptedDelay };
|
|
3780
3884
|
}
|
|
3781
|
-
function formatResult(action, target, result) {
|
|
3885
|
+
function formatResult(action, target, result, preCheckWarnings) {
|
|
3886
|
+
const prefix = preCheckWarnings && preCheckWarnings.length > 0
|
|
3887
|
+
? preCheckWarnings.join("\n") + "\n"
|
|
3888
|
+
: "";
|
|
3782
3889
|
if (result.ok) {
|
|
3783
3890
|
const fallbackNote = result.fallbackFrom ? ` (fell back from ${result.fallbackFrom})` : "";
|
|
3784
|
-
return { content: [{ type: "text", text: `${action} "${result.target ?? target}" via ${result.method}${fallbackNote} in ${result.durationMs}ms` }] };
|
|
3891
|
+
return { content: [{ type: "text", text: `${prefix}${action} "${result.target ?? target}" via ${result.method}${fallbackNote} in ${result.durationMs}ms` }] };
|
|
3785
3892
|
}
|
|
3786
|
-
return { content: [{ type: "text", text:
|
|
3893
|
+
return { content: [{ type: "text", text: `${prefix}Failed to ${action} "${target}" — all methods exhausted. Last error: ${result.error}` }] };
|
|
3894
|
+
}
|
|
3895
|
+
/**
|
|
3896
|
+
* L3→L1: Pre-execution worldModel check.
|
|
3897
|
+
* Verifies the target app is focused and not blocked by dialogs.
|
|
3898
|
+
* Auto-focuses the app if it's in the background. Returns warnings
|
|
3899
|
+
* that should be prepended to the result.
|
|
3900
|
+
*/
|
|
3901
|
+
async function preExecutionCheck(bundleId) {
|
|
3902
|
+
const warnings = [];
|
|
3903
|
+
try {
|
|
3904
|
+
const state = worldModel.getState();
|
|
3905
|
+
const targetBundleId = bundleId ?? lastKnownBundleId ?? state.focusedApp?.bundleId;
|
|
3906
|
+
if (!targetBundleId)
|
|
3907
|
+
return warnings;
|
|
3908
|
+
// Check if target app is focused — use correct bridge method "app.focus"
|
|
3909
|
+
if (state.focusedApp && state.focusedApp.bundleId !== targetBundleId) {
|
|
3910
|
+
warnings.push(`[L3→L1] Target app ${targetBundleId} is not focused (current: ${state.focusedApp.bundleId}). Auto-focusing...`);
|
|
3911
|
+
try {
|
|
3912
|
+
await bridge.call("app.focus", { bundleId: targetBundleId });
|
|
3913
|
+
}
|
|
3914
|
+
catch {
|
|
3915
|
+
warnings.push(`[L3→L1] Auto-focus failed — proceeding anyway`);
|
|
3916
|
+
}
|
|
3917
|
+
}
|
|
3918
|
+
// Re-fetch state after auto-focus to get current focused app
|
|
3919
|
+
const postFocusState = worldModel.getState();
|
|
3920
|
+
// Check for blocking dialogs — scoped to target app only.
|
|
3921
|
+
// Observer-sourced dialogs have windowId=0 (no real window ID),
|
|
3922
|
+
// so fall back to checking if the focused app matches.
|
|
3923
|
+
const relevantDialogs = postFocusState.activeDialogs.filter((d) => {
|
|
3924
|
+
if (d.windowId === 0) {
|
|
3925
|
+
return postFocusState.focusedApp?.bundleId === targetBundleId;
|
|
3926
|
+
}
|
|
3927
|
+
const win = postFocusState.windows.get(d.windowId);
|
|
3928
|
+
return win?.bundleId === targetBundleId;
|
|
3929
|
+
});
|
|
3930
|
+
if (relevantDialogs.length > 0) {
|
|
3931
|
+
const dialogTitles = relevantDialogs
|
|
3932
|
+
.map((d) => d.title || d.type)
|
|
3933
|
+
.join(", ");
|
|
3934
|
+
warnings.push(`[L3→L1] Active dialog(s) detected: ${dialogTitles} — may block interaction`);
|
|
3935
|
+
}
|
|
3936
|
+
// Check if target window is off-screen
|
|
3937
|
+
for (const [, win] of state.windows) {
|
|
3938
|
+
if (win.bundleId === targetBundleId && !win.isOnScreen) {
|
|
3939
|
+
warnings.push(`[L3→L1] Window "${win.title.value}" is off-screen or minimized`);
|
|
3940
|
+
}
|
|
3941
|
+
}
|
|
3942
|
+
// Check if world state is stale (>10s since last update)
|
|
3943
|
+
const staleThresholdMs = 10_000;
|
|
3944
|
+
const lastUpdate = new Date(state.updatedAt).getTime();
|
|
3945
|
+
if (!Number.isNaN(lastUpdate) && Date.now() - lastUpdate > staleThresholdMs && state.confidence < 0.5) {
|
|
3946
|
+
warnings.push(`[L3→L1] World state is stale (${Math.round((Date.now() - lastUpdate) / 1000)}s old, confidence ${state.confidence.toFixed(2)}) — screen may have changed`);
|
|
3947
|
+
}
|
|
3948
|
+
}
|
|
3949
|
+
catch {
|
|
3950
|
+
// Pre-check is best-effort advisory — never crash the tool call
|
|
3951
|
+
}
|
|
3952
|
+
return warnings;
|
|
3953
|
+
}
|
|
3954
|
+
/**
|
|
3955
|
+
* L7→L1: Try to resolve an element's position from the AppMap.
|
|
3956
|
+
* Returns known screen coordinates if the map has a position for this label
|
|
3957
|
+
* AND we can get the current window bounds. Returns null otherwise.
|
|
3958
|
+
*/
|
|
3959
|
+
function resolveMapPosition(target, bundleId) {
|
|
3960
|
+
const bid = bundleId ?? worldModel.getState().focusedApp?.bundleId ?? lastKnownBundleId;
|
|
3961
|
+
if (!bid)
|
|
3962
|
+
return null;
|
|
3963
|
+
// Get window bounds from worldModel for coordinate conversion
|
|
3964
|
+
const state = worldModel.getState();
|
|
3965
|
+
const focusedWinId = state.focusedWindowId;
|
|
3966
|
+
if (focusedWinId == null)
|
|
3967
|
+
return null;
|
|
3968
|
+
const win = state.windows.get(focusedWinId);
|
|
3969
|
+
if (!win || win.bundleId !== bid)
|
|
3970
|
+
return null;
|
|
3971
|
+
const bounds = win.bounds.value;
|
|
3972
|
+
// Guard: reject stale bounds (>5s old) to prevent clicking at wrong position after window move
|
|
3973
|
+
const boundsAge = Date.now() - new Date(win.bounds.updatedAt).getTime();
|
|
3974
|
+
if (boundsAge > 5000 || boundsAge < 0)
|
|
3975
|
+
return null; // stale or future timestamp
|
|
3976
|
+
// Guard: reject uninitialized/zero-size bounds to prevent clicking at (0,0)
|
|
3977
|
+
if (bounds.width < 50 || bounds.height < 50)
|
|
3978
|
+
return null;
|
|
3979
|
+
return appMap.resolvePosition(bid, target, bounds);
|
|
3787
3980
|
}
|
|
3788
3981
|
// ── click_with_fallback ──
|
|
3789
3982
|
server.tool("click_with_fallback", "Click a target by text using the canonical fallback chain: AX → CDP → OCR. Automatically retries and falls through methods.", {
|
|
@@ -3791,10 +3984,37 @@ server.tool("click_with_fallback", "Click a target by text using the canonical f
|
|
|
3791
3984
|
bundleId: z.string().optional().describe("App bundle ID (for AX path)"),
|
|
3792
3985
|
}, async ({ target, bundleId }) => {
|
|
3793
3986
|
await ensureBridge();
|
|
3794
|
-
const
|
|
3987
|
+
const preCheckWarnings = await preExecutionCheck(bundleId);
|
|
3988
|
+
// L7→L1: If AppMap knows this element's position, try coordinates first.
|
|
3989
|
+
// WARNING: Coordinate clicks are unverified — if the window moved or a modal
|
|
3990
|
+
// appeared, the click may hit the wrong target. On failure, falls through to
|
|
3991
|
+
// the standard AX/CDP/OCR chain which verifies element identity.
|
|
3992
|
+
// Skip map-guided shortcut if precheck detected blocking conditions (dialogs, off-screen)
|
|
3993
|
+
const hasBlockingCondition = preCheckWarnings.some((w) => w.includes("dialog") || w.includes("off-screen") || w.includes("not frontmost"));
|
|
3994
|
+
const mapPos = !hasBlockingCondition ? resolveMapPosition(target, bundleId) : null;
|
|
3995
|
+
if (mapPos) {
|
|
3996
|
+
try {
|
|
3997
|
+
const start = Date.now();
|
|
3998
|
+
await bridge.call("cg.mouseClick", { x: mapPos.x, y: mapPos.y });
|
|
3999
|
+
preCheckWarnings.push(`[L7→L1] Used map position (${mapPos.x}, ${mapPos.y}) for "${target}" — UNVERIFIED coordinate click`);
|
|
4000
|
+
return formatResult("Clicked", target, {
|
|
4001
|
+
ok: true, method: "coordinates", durationMs: Date.now() - start,
|
|
4002
|
+
fallbackFrom: null, retries: 0, error: null, target: `${target} at (${mapPos.x},${mapPos.y}) [map-guided, unverified]`,
|
|
4003
|
+
}, preCheckWarnings);
|
|
4004
|
+
}
|
|
4005
|
+
catch {
|
|
4006
|
+
preCheckWarnings.push(`[L7→L1] Map position click failed — falling back to standard chain`);
|
|
4007
|
+
}
|
|
4008
|
+
}
|
|
4009
|
+
const plan = planExecution("click", infra(), getSensorRanking())
|
|
3795
4010
|
.filter((m) => m !== "coordinates");
|
|
3796
4011
|
const targetPid = await resolvePid(bundleId);
|
|
3797
|
-
|
|
4012
|
+
// L2→L1: Resolve known selector from references for direct injection
|
|
4013
|
+
const knownSelector = contextTracker.getSelector(target);
|
|
4014
|
+
if (knownSelector) {
|
|
4015
|
+
preCheckWarnings.push(`[L2→L1] Injecting known selector: ${knownSelector}`);
|
|
4016
|
+
}
|
|
4017
|
+
const result = await executeWithFallback("click", plan, getAdaptedRetryPolicy("click_with_fallback"), async (method, attempt) => {
|
|
3798
4018
|
const start = Date.now();
|
|
3799
4019
|
try {
|
|
3800
4020
|
switch (method) {
|
|
@@ -3829,15 +4049,28 @@ server.tool("click_with_fallback", "Click a target by text using the canonical f
|
|
|
3829
4049
|
const client = await CDPClient({ port });
|
|
3830
4050
|
try {
|
|
3831
4051
|
const { Runtime } = client;
|
|
3832
|
-
|
|
3833
|
-
|
|
3834
|
-
|
|
4052
|
+
// L2→L1: Try known selector first (wrapped in try/catch to handle
|
|
4053
|
+
// invalid selectors gracefully), then fall back to text search.
|
|
4054
|
+
const textSearchExpr = `Array.from(document.querySelectorAll('*')).find(e =>
|
|
3835
4055
|
e.textContent?.trim() === ${JSON.stringify(target)} ||
|
|
3836
|
-
e.getAttribute('aria-label') === ${JSON.stringify(target)}
|
|
3837
|
-
|
|
4056
|
+
e.getAttribute('aria-label') === ${JSON.stringify(target)})`;
|
|
4057
|
+
const selectorExpr = knownSelector
|
|
4058
|
+
? `(() => {
|
|
4059
|
+
try {
|
|
4060
|
+
const el = document.querySelector(${JSON.stringify(knownSelector)});
|
|
4061
|
+
if (el) { el.click(); return 'clicked'; }
|
|
4062
|
+
} catch(e) { /* invalid selector — fall through to text search */ }
|
|
4063
|
+
const fallback = ${textSearchExpr};
|
|
4064
|
+
if (fallback) { fallback.click(); return 'clicked'; }
|
|
4065
|
+
return null;
|
|
4066
|
+
})()`
|
|
4067
|
+
: `(() => {
|
|
4068
|
+
const el = ${textSearchExpr};
|
|
3838
4069
|
if (el) { el.click(); return 'clicked'; }
|
|
3839
4070
|
return null;
|
|
3840
|
-
})()
|
|
4071
|
+
})()`;
|
|
4072
|
+
const evalResult = await Runtime.evaluate({
|
|
4073
|
+
expression: selectorExpr,
|
|
3841
4074
|
returnByValue: true,
|
|
3842
4075
|
});
|
|
3843
4076
|
if (evalResult.result?.value === "clicked") {
|
|
@@ -3872,7 +4105,7 @@ server.tool("click_with_fallback", "Click a target by text using the canonical f
|
|
|
3872
4105
|
return { ok: false, method, durationMs: Date.now() - start, fallbackFrom: null, retries: attempt, error: err instanceof Error ? err.message : String(err), target };
|
|
3873
4106
|
}
|
|
3874
4107
|
});
|
|
3875
|
-
return formatResult("Clicked", target, result);
|
|
4108
|
+
return formatResult("Clicked", target, result, preCheckWarnings);
|
|
3876
4109
|
});
|
|
3877
4110
|
// ── type_with_fallback ──
|
|
3878
4111
|
server.tool("type_with_fallback", "Type text into a target field using the canonical fallback chain: AX → CDP → coordinates. Finds the field by label/placeholder, focuses it, then types.", {
|
|
@@ -3882,9 +4115,12 @@ server.tool("type_with_fallback", "Type text into a target field using the canon
|
|
|
3882
4115
|
clearFirst: z.boolean().optional().describe("Select-all and clear the field before typing (default: false)"),
|
|
3883
4116
|
}, async ({ target, text, bundleId, clearFirst }) => {
|
|
3884
4117
|
await ensureBridge();
|
|
3885
|
-
const
|
|
4118
|
+
const preCheckWarnings = await preExecutionCheck(bundleId);
|
|
4119
|
+
const plan = planExecution("type", infra(), getSensorRanking());
|
|
3886
4120
|
const targetPid = await resolvePid(bundleId);
|
|
3887
|
-
|
|
4121
|
+
// L2→L1: Resolve known selector for direct injection
|
|
4122
|
+
const knownSelector = contextTracker.getSelector(target);
|
|
4123
|
+
const result = await executeWithFallback("type", plan, getAdaptedRetryPolicy("type_with_fallback"), async (method, attempt) => {
|
|
3888
4124
|
const start = Date.now();
|
|
3889
4125
|
try {
|
|
3890
4126
|
switch (method) {
|
|
@@ -3972,17 +4208,30 @@ server.tool("type_with_fallback", "Type text into a target field using the canon
|
|
|
3972
4208
|
const client = await CDPClient({ port });
|
|
3973
4209
|
try {
|
|
3974
4210
|
const { Runtime, DOM, Input } = client;
|
|
3975
|
-
|
|
3976
|
-
|
|
3977
|
-
|
|
4211
|
+
// L2→L1: Try known selector first (with try/catch for invalid selectors),
|
|
4212
|
+
// then fall back to attribute search.
|
|
4213
|
+
const fieldSearchExpr = `Array.from(document.querySelectorAll('input, textarea, [contenteditable]')).find(e =>
|
|
3978
4214
|
e.getAttribute('placeholder') === ${JSON.stringify(target)} ||
|
|
3979
4215
|
e.getAttribute('aria-label') === ${JSON.stringify(target)} ||
|
|
3980
4216
|
e.getAttribute('name') === ${JSON.stringify(target)} ||
|
|
3981
|
-
(e.labels && Array.from(e.labels).some(l => l.textContent?.trim() === ${JSON.stringify(target)}))
|
|
3982
|
-
|
|
4217
|
+
(e.labels && Array.from(e.labels).some(l => l.textContent?.trim() === ${JSON.stringify(target)})))`;
|
|
4218
|
+
const fieldExpr = knownSelector
|
|
4219
|
+
? `(() => {
|
|
4220
|
+
try {
|
|
4221
|
+
const el = document.querySelector(${JSON.stringify(knownSelector)});
|
|
4222
|
+
if (el) { el.focus(); return true; }
|
|
4223
|
+
} catch(e) { /* invalid selector — fall through */ }
|
|
4224
|
+
const fallback = ${fieldSearchExpr};
|
|
4225
|
+
if (fallback) { fallback.focus(); return true; }
|
|
4226
|
+
return false;
|
|
4227
|
+
})()`
|
|
4228
|
+
: `(() => {
|
|
4229
|
+
const el = ${fieldSearchExpr};
|
|
3983
4230
|
if (el) { el.focus(); return true; }
|
|
3984
4231
|
return false;
|
|
3985
|
-
})()
|
|
4232
|
+
})()`;
|
|
4233
|
+
const evalResult = await Runtime.evaluate({
|
|
4234
|
+
expression: fieldExpr,
|
|
3986
4235
|
returnByValue: true,
|
|
3987
4236
|
});
|
|
3988
4237
|
if (!evalResult.result?.value)
|
|
@@ -4009,7 +4258,7 @@ server.tool("type_with_fallback", "Type text into a target field using the canon
|
|
|
4009
4258
|
return { ok: false, method, durationMs: Date.now() - start, fallbackFrom: null, retries: attempt, error: err instanceof Error ? err.message : String(err), target };
|
|
4010
4259
|
}
|
|
4011
4260
|
});
|
|
4012
|
-
return formatResult("Typed into", target, result);
|
|
4261
|
+
return formatResult("Typed into", target, result, preCheckWarnings);
|
|
4013
4262
|
});
|
|
4014
4263
|
// ── read_with_fallback ──
|
|
4015
4264
|
server.tool("read_with_fallback", "Read text content from the screen or a specific element using the canonical fallback chain: AX → CDP → OCR. Returns the text found.", {
|
|
@@ -4017,9 +4266,15 @@ server.tool("read_with_fallback", "Read text content from the screen or a specif
|
|
|
4017
4266
|
bundleId: z.string().optional().describe("App bundle ID"),
|
|
4018
4267
|
}, async ({ target, bundleId }) => {
|
|
4019
4268
|
await ensureBridge();
|
|
4020
|
-
const
|
|
4269
|
+
const preCheckWarnings = await preExecutionCheck(bundleId);
|
|
4270
|
+
const plan = planExecution("read", infra(), getSensorRanking());
|
|
4021
4271
|
const targetPid = await resolvePid(bundleId);
|
|
4022
|
-
|
|
4272
|
+
// L2→L1: Resolve known selector from references for direct injection
|
|
4273
|
+
const knownSelector = target ? contextTracker.getSelector(target) : null;
|
|
4274
|
+
if (knownSelector) {
|
|
4275
|
+
preCheckWarnings.push(`[L2→L1] Injecting known selector: ${knownSelector}`);
|
|
4276
|
+
}
|
|
4277
|
+
const result = await executeWithFallback("read", plan, getAdaptedRetryPolicy("read_with_fallback"), async (method, attempt) => {
|
|
4023
4278
|
const start = Date.now();
|
|
4024
4279
|
try {
|
|
4025
4280
|
switch (method) {
|
|
@@ -4126,14 +4381,25 @@ server.tool("read_with_fallback", "Read text content from the screen or a specif
|
|
|
4126
4381
|
try {
|
|
4127
4382
|
const { Runtime } = client;
|
|
4128
4383
|
if (target) {
|
|
4129
|
-
|
|
4130
|
-
|
|
4131
|
-
const el = Array.from(document.querySelectorAll('*')).find(e =>
|
|
4384
|
+
// L2→L1: Try known selector first, then fall back to text search
|
|
4385
|
+
const textSearch = `Array.from(document.querySelectorAll('*')).find(e =>
|
|
4132
4386
|
e.getAttribute('aria-label') === ${JSON.stringify(target)} ||
|
|
4133
|
-
e.textContent?.trim() === ${JSON.stringify(target)}
|
|
4134
|
-
|
|
4135
|
-
|
|
4136
|
-
|
|
4387
|
+
e.textContent?.trim() === ${JSON.stringify(target)})`;
|
|
4388
|
+
const expr = knownSelector
|
|
4389
|
+
? `(() => {
|
|
4390
|
+
try {
|
|
4391
|
+
const el = document.querySelector(${JSON.stringify(knownSelector)});
|
|
4392
|
+
if (el) return (el.value ?? el.textContent ?? '').trim();
|
|
4393
|
+
} catch(e) {}
|
|
4394
|
+
const fallback = ${textSearch};
|
|
4395
|
+
return fallback ? (fallback.value ?? fallback.textContent ?? '').trim() : null;
|
|
4396
|
+
})()`
|
|
4397
|
+
: `(() => {
|
|
4398
|
+
const el = ${textSearch};
|
|
4399
|
+
return el ? (el.value ?? el.textContent ?? '').trim() : null;
|
|
4400
|
+
})()`;
|
|
4401
|
+
const evalResult = await Runtime.evaluate({
|
|
4402
|
+
expression: expr,
|
|
4137
4403
|
returnByValue: true,
|
|
4138
4404
|
});
|
|
4139
4405
|
if (evalResult.result?.value == null)
|
|
@@ -4173,11 +4439,13 @@ server.tool("read_with_fallback", "Read text content from the screen or a specif
|
|
|
4173
4439
|
return { ok: false, method, durationMs: Date.now() - start, fallbackFrom: null, retries: attempt, error: err instanceof Error ? err.message : String(err), target: null };
|
|
4174
4440
|
}
|
|
4175
4441
|
});
|
|
4442
|
+
// Custom format (not formatResult) — read results include content inline
|
|
4443
|
+
const prefix = preCheckWarnings.length > 0 ? preCheckWarnings.join("\n") + "\n" : "";
|
|
4176
4444
|
if (result.ok) {
|
|
4177
4445
|
const fallbackNote = result.fallbackFrom ? ` (fell back from ${result.fallbackFrom})` : "";
|
|
4178
|
-
return { content: [{ type: "text", text:
|
|
4446
|
+
return { content: [{ type: "text", text: `${prefix}Read via ${result.method}${fallbackNote} in ${result.durationMs}ms:\n\n${result.target}` }] };
|
|
4179
4447
|
}
|
|
4180
|
-
return { content: [{ type: "text", text:
|
|
4448
|
+
return { content: [{ type: "text", text: `${prefix}Failed to read${target ? ` "${target}"` : ""} — all methods exhausted. Last error: ${result.error}` }] };
|
|
4181
4449
|
});
|
|
4182
4450
|
// ── locate_with_fallback ──
|
|
4183
4451
|
server.tool("locate_with_fallback", "Find an element's position on screen using the canonical fallback chain: AX → CDP → OCR. Returns bounds (x, y, width, height).", {
|
|
@@ -4185,9 +4453,22 @@ server.tool("locate_with_fallback", "Find an element's position on screen using
|
|
|
4185
4453
|
bundleId: z.string().optional().describe("App bundle ID"),
|
|
4186
4454
|
}, async ({ target, bundleId }) => {
|
|
4187
4455
|
await ensureBridge();
|
|
4188
|
-
const
|
|
4456
|
+
const preCheckWarnings = await preExecutionCheck(bundleId);
|
|
4457
|
+
// L7→L1: If AppMap knows this element's position, return it immediately
|
|
4458
|
+
const mapPos = resolveMapPosition(target, bundleId);
|
|
4459
|
+
if (mapPos) {
|
|
4460
|
+
// Map provides center point only — use as hint, not authoritative bounds.
|
|
4461
|
+
// Fall through to full locate chain for accurate bounds.
|
|
4462
|
+
preCheckWarnings.push(`[L7→L1] Map hint: "${target}" expected near (${mapPos.x}, ${mapPos.y}) — verifying via locate chain`);
|
|
4463
|
+
}
|
|
4464
|
+
const plan = planExecution("locate", infra(), getSensorRanking());
|
|
4189
4465
|
const targetPid = await resolvePid(bundleId);
|
|
4190
|
-
|
|
4466
|
+
// L2→L1: Resolve known selector from references for direct injection
|
|
4467
|
+
const knownSelector = contextTracker.getSelector(target);
|
|
4468
|
+
if (knownSelector) {
|
|
4469
|
+
preCheckWarnings.push(`[L2→L1] Injecting known selector: ${knownSelector}`);
|
|
4470
|
+
}
|
|
4471
|
+
const result = await executeWithFallback("locate", plan, getAdaptedRetryPolicy("locate_with_fallback"), async (method, attempt) => {
|
|
4191
4472
|
const start = Date.now();
|
|
4192
4473
|
try {
|
|
4193
4474
|
switch (method) {
|
|
@@ -4220,16 +4501,29 @@ server.tool("locate_with_fallback", "Find an element's position on screen using
|
|
|
4220
4501
|
const client = await CDPClient({ port });
|
|
4221
4502
|
try {
|
|
4222
4503
|
const { Runtime } = client;
|
|
4223
|
-
|
|
4224
|
-
|
|
4225
|
-
const el = Array.from(document.querySelectorAll('*')).find(e =>
|
|
4504
|
+
// L2→L1: Try known selector first, then fall back to text search
|
|
4505
|
+
const textSearch = `Array.from(document.querySelectorAll('*')).find(e =>
|
|
4226
4506
|
e.textContent?.trim() === ${JSON.stringify(target)} ||
|
|
4227
|
-
e.getAttribute('aria-label') === ${JSON.stringify(target)}
|
|
4228
|
-
|
|
4229
|
-
|
|
4230
|
-
|
|
4231
|
-
|
|
4232
|
-
|
|
4507
|
+
e.getAttribute('aria-label') === ${JSON.stringify(target)})`;
|
|
4508
|
+
const expr = knownSelector
|
|
4509
|
+
? `(() => {
|
|
4510
|
+
try {
|
|
4511
|
+
const el = document.querySelector(${JSON.stringify(knownSelector)});
|
|
4512
|
+
if (el) { const r = el.getBoundingClientRect(); return { x: Math.round(r.x), y: Math.round(r.y), width: Math.round(r.width), height: Math.round(r.height) }; }
|
|
4513
|
+
} catch(e) {}
|
|
4514
|
+
const fallback = ${textSearch};
|
|
4515
|
+
if (!fallback) return null;
|
|
4516
|
+
const r = fallback.getBoundingClientRect();
|
|
4517
|
+
return { x: Math.round(r.x), y: Math.round(r.y), width: Math.round(r.width), height: Math.round(r.height) };
|
|
4518
|
+
})()`
|
|
4519
|
+
: `(() => {
|
|
4520
|
+
const el = ${textSearch};
|
|
4521
|
+
if (!el) return null;
|
|
4522
|
+
const r = el.getBoundingClientRect();
|
|
4523
|
+
return { x: Math.round(r.x), y: Math.round(r.y), width: Math.round(r.width), height: Math.round(r.height) };
|
|
4524
|
+
})()`;
|
|
4525
|
+
const evalResult = await Runtime.evaluate({
|
|
4526
|
+
expression: expr,
|
|
4233
4527
|
returnByValue: true,
|
|
4234
4528
|
});
|
|
4235
4529
|
const bounds = evalResult.result?.value;
|
|
@@ -4260,7 +4554,7 @@ server.tool("locate_with_fallback", "Find an element's position on screen using
|
|
|
4260
4554
|
return { ok: false, method, durationMs: Date.now() - start, fallbackFrom: null, retries: attempt, error: err instanceof Error ? err.message : String(err), target: null };
|
|
4261
4555
|
}
|
|
4262
4556
|
});
|
|
4263
|
-
return formatResult("Located", target, result);
|
|
4557
|
+
return formatResult("Located", target, result, preCheckWarnings);
|
|
4264
4558
|
});
|
|
4265
4559
|
// ── select_with_fallback ──
|
|
4266
4560
|
server.tool("select_with_fallback", "Select an option from a dropdown/menu using the canonical fallback chain: AX → CDP. Finds the control, opens it, and picks the specified option.", {
|
|
@@ -4269,9 +4563,15 @@ server.tool("select_with_fallback", "Select an option from a dropdown/menu using
|
|
|
4269
4563
|
bundleId: z.string().optional().describe("App bundle ID"),
|
|
4270
4564
|
}, async ({ target, option, bundleId }) => {
|
|
4271
4565
|
await ensureBridge();
|
|
4272
|
-
const
|
|
4566
|
+
const preCheckWarnings = await preExecutionCheck(bundleId);
|
|
4567
|
+
const plan = planExecution("select", infra(), getSensorRanking());
|
|
4273
4568
|
const targetPid = await resolvePid(bundleId);
|
|
4274
|
-
|
|
4569
|
+
// L2→L1: Resolve known selector from references for direct injection
|
|
4570
|
+
const knownSelector = contextTracker.getSelector(target);
|
|
4571
|
+
if (knownSelector) {
|
|
4572
|
+
preCheckWarnings.push(`[L2→L1] Injecting known selector: ${knownSelector}`);
|
|
4573
|
+
}
|
|
4574
|
+
const result = await executeWithFallback("select", plan, getAdaptedRetryPolicy("select_with_fallback"), async (method, attempt) => {
|
|
4275
4575
|
const start = Date.now();
|
|
4276
4576
|
try {
|
|
4277
4577
|
switch (method) {
|
|
@@ -4301,20 +4601,34 @@ server.tool("select_with_fallback", "Select an option from a dropdown/menu using
|
|
|
4301
4601
|
const client = await CDPClient({ port });
|
|
4302
4602
|
try {
|
|
4303
4603
|
const { Runtime } = client;
|
|
4304
|
-
|
|
4305
|
-
|
|
4306
|
-
const sel = Array.from(document.querySelectorAll('select')).find(s =>
|
|
4604
|
+
// L2→L1: Try known selector first for the select element
|
|
4605
|
+
const textSearch = `Array.from(document.querySelectorAll('select')).find(s =>
|
|
4307
4606
|
s.getAttribute('aria-label') === ${JSON.stringify(target)} ||
|
|
4308
4607
|
s.getAttribute('name') === ${JSON.stringify(target)} ||
|
|
4309
|
-
(s.labels && Array.from(s.labels).some(l => l.textContent?.trim() === ${JSON.stringify(target)}))
|
|
4310
|
-
|
|
4311
|
-
|
|
4312
|
-
|
|
4313
|
-
|
|
4314
|
-
|
|
4315
|
-
|
|
4316
|
-
|
|
4317
|
-
|
|
4608
|
+
(s.labels && Array.from(s.labels).some(l => l.textContent?.trim() === ${JSON.stringify(target)})))`;
|
|
4609
|
+
const selectExpr = knownSelector
|
|
4610
|
+
? `(() => {
|
|
4611
|
+
let sel = null;
|
|
4612
|
+
try { sel = document.querySelector(${JSON.stringify(knownSelector)}); } catch(e) {}
|
|
4613
|
+
if (!sel || sel.tagName !== 'SELECT') sel = ${textSearch};
|
|
4614
|
+
if (!sel) return null;
|
|
4615
|
+
const opt = Array.from(sel.options).find(o => o.text.trim() === ${JSON.stringify(option)} || o.value === ${JSON.stringify(option)});
|
|
4616
|
+
if (!opt) return 'no_option';
|
|
4617
|
+
sel.value = opt.value;
|
|
4618
|
+
sel.dispatchEvent(new Event('change', { bubbles: true }));
|
|
4619
|
+
return 'selected';
|
|
4620
|
+
})()`
|
|
4621
|
+
: `(() => {
|
|
4622
|
+
const sel = ${textSearch};
|
|
4623
|
+
if (!sel) return null;
|
|
4624
|
+
const opt = Array.from(sel.options).find(o => o.text.trim() === ${JSON.stringify(option)} || o.value === ${JSON.stringify(option)});
|
|
4625
|
+
if (!opt) return 'no_option';
|
|
4626
|
+
sel.value = opt.value;
|
|
4627
|
+
sel.dispatchEvent(new Event('change', { bubbles: true }));
|
|
4628
|
+
return 'selected';
|
|
4629
|
+
})()`;
|
|
4630
|
+
const evalResult = await Runtime.evaluate({
|
|
4631
|
+
expression: selectExpr,
|
|
4318
4632
|
returnByValue: true,
|
|
4319
4633
|
});
|
|
4320
4634
|
if (evalResult.result?.value === "selected") {
|
|
@@ -4335,7 +4649,7 @@ server.tool("select_with_fallback", "Select an option from a dropdown/menu using
|
|
|
4335
4649
|
return { ok: false, method, durationMs: Date.now() - start, fallbackFrom: null, retries: attempt, error: err instanceof Error ? err.message : String(err), target: null };
|
|
4336
4650
|
}
|
|
4337
4651
|
});
|
|
4338
|
-
return formatResult("Selected", `${target} → ${option}`, result);
|
|
4652
|
+
return formatResult("Selected", `${target} → ${option}`, result, preCheckWarnings);
|
|
4339
4653
|
});
|
|
4340
4654
|
// ── scroll_with_fallback ──
|
|
4341
4655
|
server.tool("scroll_with_fallback", "Scroll within an element or the active window using the canonical fallback chain: AX → CDP → coordinates. Scrolls until target text is visible, or by a fixed amount.", {
|
|
@@ -4345,9 +4659,15 @@ server.tool("scroll_with_fallback", "Scroll within an element or the active wind
|
|
|
4345
4659
|
bundleId: z.string().optional().describe("App bundle ID"),
|
|
4346
4660
|
}, async ({ direction, amount, target, bundleId }) => {
|
|
4347
4661
|
await ensureBridge();
|
|
4348
|
-
const
|
|
4662
|
+
const preCheckWarnings = await preExecutionCheck(bundleId);
|
|
4663
|
+
const plan = planExecution("scroll", infra(), getSensorRanking());
|
|
4349
4664
|
const targetPid = await resolvePid(bundleId);
|
|
4350
4665
|
const scrollAmount = amount ?? 300;
|
|
4666
|
+
// L2→L1: Resolve known selector from references for scroll container
|
|
4667
|
+
const knownSelector = target ? contextTracker.getSelector(target) : null;
|
|
4668
|
+
if (knownSelector) {
|
|
4669
|
+
preCheckWarnings.push(`[L2→L1] Injecting known selector: ${knownSelector}`);
|
|
4670
|
+
}
|
|
4351
4671
|
// Resolve scroll coordinates — center of the frontmost window
|
|
4352
4672
|
let scrollX = 400, scrollY = 400;
|
|
4353
4673
|
try {
|
|
@@ -4383,7 +4703,7 @@ server.tool("scroll_with_fallback", "Scroll within an element or the active wind
|
|
|
4383
4703
|
return { content: [{ type: "text", text: `Scrolled ${direction} 10 times but "${target}" not found.` }] };
|
|
4384
4704
|
}
|
|
4385
4705
|
// Fixed-amount scroll via fallback chain
|
|
4386
|
-
const result = await executeWithFallback("scroll", plan, getAdaptedRetryPolicy(), async (method, attempt) => {
|
|
4706
|
+
const result = await executeWithFallback("scroll", plan, getAdaptedRetryPolicy("scroll_with_fallback"), async (method, attempt) => {
|
|
4387
4707
|
const start = Date.now();
|
|
4388
4708
|
try {
|
|
4389
4709
|
const deltaX = direction === "left" ? -scrollAmount : direction === "right" ? scrollAmount : 0;
|
|
@@ -4401,9 +4721,18 @@ server.tool("scroll_with_fallback", "Scroll within an element or the active wind
|
|
|
4401
4721
|
const client = await CDPClient({ port });
|
|
4402
4722
|
try {
|
|
4403
4723
|
const { Runtime } = client;
|
|
4404
|
-
|
|
4405
|
-
|
|
4406
|
-
|
|
4724
|
+
// L2→L1: Try scrolling known selector container first
|
|
4725
|
+
const scrollExpr = knownSelector
|
|
4726
|
+
? `(() => {
|
|
4727
|
+
try {
|
|
4728
|
+
const el = document.querySelector(${JSON.stringify(knownSelector)});
|
|
4729
|
+
if (el) { el.scrollBy(${deltaX}, ${deltaY}); return 'scrolled'; }
|
|
4730
|
+
} catch(e) {}
|
|
4731
|
+
window.scrollBy(${deltaX}, ${deltaY});
|
|
4732
|
+
return 'scrolled';
|
|
4733
|
+
})()`
|
|
4734
|
+
: `window.scrollBy(${deltaX}, ${deltaY})`;
|
|
4735
|
+
await Runtime.evaluate({ expression: scrollExpr });
|
|
4407
4736
|
return { ok: true, method, durationMs: Date.now() - start, fallbackFrom: null, retries: attempt, error: null, target: `${direction} ${scrollAmount}px` };
|
|
4408
4737
|
}
|
|
4409
4738
|
finally {
|
|
@@ -4421,7 +4750,7 @@ server.tool("scroll_with_fallback", "Scroll within an element or the active wind
|
|
|
4421
4750
|
return { ok: false, method, durationMs: Date.now() - start, fallbackFrom: null, retries: attempt, error: err instanceof Error ? err.message : String(err), target: null };
|
|
4422
4751
|
}
|
|
4423
4752
|
});
|
|
4424
|
-
return formatResult("Scrolled", `${direction} ${scrollAmount}px`, result);
|
|
4753
|
+
return formatResult("Scrolled", `${direction} ${scrollAmount}px`, result, preCheckWarnings);
|
|
4425
4754
|
});
|
|
4426
4755
|
// ── wait_for_state ──
|
|
4427
4756
|
server.tool("wait_for_state", "Wait until a condition is met on screen: text appears, text disappears, or element becomes available. Polls at intervals using the fallback chain.", {
|
|
@@ -4751,6 +5080,8 @@ function getJobRunner() {
|
|
|
4751
5080
|
const locCache = new LocatorCache();
|
|
4752
5081
|
locCache.setLearningEngine(learningEngine);
|
|
4753
5082
|
const runtimeService = new AutomationRuntimeService(adapter, logger, locCache);
|
|
5083
|
+
// Wire #15: connect AppMap to Executor for skip-verify optimization
|
|
5084
|
+
runtimeService.setAppMap(appMap);
|
|
4754
5085
|
const playbookEngine = new PlaybookEngine(runtimeService);
|
|
4755
5086
|
activePlaybookEngine = playbookEngine;
|
|
4756
5087
|
// Wire CDP into playbook engine for browser_js / cdp_key_event steps
|
|
@@ -4943,6 +5274,7 @@ originalTool("plan_execute", "Run a plan automatically. Known steps (from playbo
|
|
|
4943
5274
|
}
|
|
4944
5275
|
const adaptiveBudget = learningEngine.getAdaptiveBudget(worldModel.getState().focusedApp?.bundleId ?? "unknown");
|
|
4945
5276
|
const executor = new PlanExecutor(worldModel, planner, toolRegistry.toExecutor(), { postconditionWaitMs: adaptiveBudget.verifyMs, defaultStepTimeout: Math.max(30_000, adaptiveBudget.actMs * 2) }, recoveryEngine, learningEngine);
|
|
5277
|
+
executor.setAppMap(appMap);
|
|
4946
5278
|
const result = await executor.executeGoal(goal);
|
|
4947
5279
|
goalStore.update(goalId, goal);
|
|
4948
5280
|
// Check if paused at an LLM step
|
|
@@ -5004,6 +5336,7 @@ originalTool("plan_step", "Execute the next single step of a goal. For increment
|
|
|
5004
5336
|
}
|
|
5005
5337
|
const adaptiveBudget = learningEngine.getAdaptiveBudget(worldModel.getState().focusedApp?.bundleId ?? "unknown");
|
|
5006
5338
|
const executor = new PlanExecutor(worldModel, planner, toolRegistry.toExecutor(), { postconditionWaitMs: adaptiveBudget.verifyMs, defaultStepTimeout: Math.max(30_000, adaptiveBudget.actMs * 2) }, recoveryEngine, learningEngine);
|
|
5339
|
+
executor.setAppMap(appMap);
|
|
5007
5340
|
const result = await executor.executeNextStep(goal);
|
|
5008
5341
|
goalStore.update(goalId, goal);
|
|
5009
5342
|
if ("paused" in result) {
|
|
@@ -5047,6 +5380,7 @@ originalTool("plan_step_resolve", "Resolve a paused LLM step by providing the to
|
|
|
5047
5380
|
}
|
|
5048
5381
|
const adaptiveBudget = learningEngine.getAdaptiveBudget(worldModel.getState().focusedApp?.bundleId ?? "unknown");
|
|
5049
5382
|
const executor = new PlanExecutor(worldModel, planner, toolRegistry.toExecutor(), { postconditionWaitMs: adaptiveBudget.verifyMs, defaultStepTimeout: Math.max(30_000, adaptiveBudget.actMs * 2) }, recoveryEngine, learningEngine);
|
|
5383
|
+
executor.setAppMap(appMap);
|
|
5050
5384
|
const result = await executor.resolveStep(goal, tool, params ?? {});
|
|
5051
5385
|
goalStore.update(goalId, goal);
|
|
5052
5386
|
return {
|
|
@@ -5287,6 +5621,10 @@ originalTool("perception_start", "Start continuous screen monitoring — ScreenH
|
|
|
5287
5621
|
return { content: [{ type: "text", text: `Perception already running (started ${stats.startedAt}). Use perception_stop first to restart, or pass bundleId to switch target.` }] };
|
|
5288
5622
|
}
|
|
5289
5623
|
let app = worldModel.getState().focusedApp;
|
|
5624
|
+
// Validate bundleId format before it touches AppleScript/exec
|
|
5625
|
+
if (overrideBundleId && !/^[a-zA-Z0-9._-]+$/.test(overrideBundleId)) {
|
|
5626
|
+
return { content: [{ type: "text", text: "Error: Invalid bundleId format. Only alphanumeric characters, dots, hyphens, and underscores are allowed." }] };
|
|
5627
|
+
}
|
|
5290
5628
|
// If bundleId override provided, try to resolve app info via bridge or AppleScript
|
|
5291
5629
|
if (overrideBundleId && (!app || app.bundleId !== overrideBundleId)) {
|
|
5292
5630
|
try {
|
|
@@ -5768,7 +6106,37 @@ server.tool("scan_menu_bar", "Scan an app's menu bar via AX tree. Extracts all m
|
|
|
5768
6106
|
safePath = safePath.replace(/Log Out [^\n:]+/g, "Log Out [USER]");
|
|
5769
6107
|
lines.push(` ${safePath}: ${keys}`);
|
|
5770
6108
|
}
|
|
5771
|
-
|
|
6109
|
+
// Wire #12: L6→L7 — bootstrap AppMap zones from menu scan
|
|
6110
|
+
let bootstrapInfo = "";
|
|
6111
|
+
if (appMap) {
|
|
6112
|
+
const bootstrapped = appMap.bootstrapFromMenuScan(bundleId, appName, result);
|
|
6113
|
+
// Clear hint unconditionally — the scan was attempted regardless of bootstrap outcome
|
|
6114
|
+
contextTracker.clearMenuScanHint();
|
|
6115
|
+
if (bootstrapped) {
|
|
6116
|
+
bootstrapInfo = `\nAppMap: bootstrapped zones from menu structure (new app)`;
|
|
6117
|
+
}
|
|
6118
|
+
}
|
|
6119
|
+
// Wire F8: Seed learning from menu scan shortcuts (L6→L5)
|
|
6120
|
+
// Use successCount=5 and score=0.6 so seeds pass recommend() thresholds
|
|
6121
|
+
// (minSamples=5 for locators, score > 0.5 for patterns)
|
|
6122
|
+
if (learningEngine && result.shortcuts) {
|
|
6123
|
+
for (const [menuPath, keys] of Object.entries(result.shortcuts)) {
|
|
6124
|
+
const key = LocatorPolicy.makeKey(bundleId, "key");
|
|
6125
|
+
learningEngine.locators.seedEntry({
|
|
6126
|
+
key, locator: keys, method: "ax",
|
|
6127
|
+
successCount: 5, failCount: 0, score: 0.6,
|
|
6128
|
+
lastUsed: new Date().toISOString(),
|
|
6129
|
+
});
|
|
6130
|
+
// Also seed as pattern: menu_click with the menu path
|
|
6131
|
+
learningEngine.patterns.seedEntry({
|
|
6132
|
+
key: `${bundleId}::menu_click::${menuPath}`,
|
|
6133
|
+
bundleId, tool: "menu_click", locator: menuPath,
|
|
6134
|
+
method: "ax", successCount: 3, failCount: 0, score: 0.6,
|
|
6135
|
+
lastSeen: new Date().toISOString(),
|
|
6136
|
+
});
|
|
6137
|
+
}
|
|
6138
|
+
}
|
|
6139
|
+
let output = lines.join("\n") + bootstrapInfo;
|
|
5772
6140
|
output = redactUsername(output);
|
|
5773
6141
|
output = output.replace(/Log Out [^\n:]+/g, "Log Out [USER]");
|
|
5774
6142
|
return { content: [{ type: "text", text: output }] };
|
|
@@ -5813,6 +6181,24 @@ server.tool("ingest_documentation", "Parse a documentation page (HTML, markdown,
|
|
|
5813
6181
|
lines.push(` - ${t}`);
|
|
5814
6182
|
}
|
|
5815
6183
|
}
|
|
6184
|
+
// Wire F8: Seed learning from ingested documentation flows (L6→L5)
|
|
6185
|
+
if (learningEngine && result.flows) {
|
|
6186
|
+
for (const flow of result.flows) {
|
|
6187
|
+
for (const step of flow.steps) {
|
|
6188
|
+
if (!step.tool)
|
|
6189
|
+
continue;
|
|
6190
|
+
const target = (step.params?.text ?? step.params?.title ?? step.params?.target ?? step.description);
|
|
6191
|
+
if (target) {
|
|
6192
|
+
learningEngine.patterns.seedEntry({
|
|
6193
|
+
key: `${bundleId}::${step.tool}::${target}`,
|
|
6194
|
+
bundleId, tool: step.tool, locator: String(target),
|
|
6195
|
+
method: "ax", successCount: 3, failCount: 0, score: 0.6,
|
|
6196
|
+
lastSeen: new Date().toISOString(),
|
|
6197
|
+
});
|
|
6198
|
+
}
|
|
6199
|
+
}
|
|
6200
|
+
}
|
|
6201
|
+
}
|
|
5816
6202
|
return { content: [{ type: "text", text: lines.join("\n") }] };
|
|
5817
6203
|
});
|
|
5818
6204
|
server.tool("ingest_tutorial", "Extract structured playbook steps from a video transcript (e.g. YouTube captions). Converts tutorial narration into actionable automation steps with tool mappings.", {
|
|
@@ -5937,6 +6323,14 @@ originalTool("community_fetch", "Search community playbooks for a platform or wo
|
|
|
5937
6323
|
lines.push(` Score: ${pb.ratings.score} | By: ${pb.metadata.author}`);
|
|
5938
6324
|
lines.push("");
|
|
5939
6325
|
}
|
|
6326
|
+
// Wire F9: Import community playbooks into AppMap (L6→L7)
|
|
6327
|
+
if (appMap) {
|
|
6328
|
+
for (const pb of results) {
|
|
6329
|
+
if (pb.bundleId && pb.steps.length > 0) {
|
|
6330
|
+
appMap.importFromPlaybook(pb.bundleId, pb.name, pb.steps);
|
|
6331
|
+
}
|
|
6332
|
+
}
|
|
6333
|
+
}
|
|
5940
6334
|
return { content: [{ type: "text", text: lines.join("\n") }] };
|
|
5941
6335
|
});
|
|
5942
6336
|
// ═══════════════════════════════════════════════
|