screenhand 0.3.1 → 0.3.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -2
- package/dist/mcp-desktop.js +502 -98
- package/dist/src/community/fetcher.js +32 -2
- package/dist/src/community/validator.js +15 -1
- package/dist/src/context-tracker.js +115 -43
- package/dist/src/ingestion/reference-merger.js +3 -1
- package/dist/src/learning/engine.js +225 -7
- package/dist/src/learning/locator-policy.js +16 -0
- package/dist/src/learning/pattern-policy.js +9 -0
- package/dist/src/learning/recovery-policy.js +16 -0
- package/dist/src/learning/sensor-policy.js +9 -0
- package/dist/src/learning/timing-model.js +62 -0
- package/dist/src/memory/research.js +7 -1
- package/dist/src/memory/store.js +18 -7
- package/dist/src/perception/coordinator.js +304 -4
- package/dist/src/perception/manager.js +13 -0
- package/dist/src/perception/vision-source.js +14 -4
- package/dist/src/planner/executor.js +125 -2
- package/dist/src/planner/planner.js +509 -10
- package/dist/src/playbook/engine.js +10 -0
- package/dist/src/recovery/engine.js +50 -3
- package/dist/src/runtime/execution-contract.js +67 -5
- package/dist/src/runtime/executor.js +41 -1
- package/dist/src/runtime/service.js +7 -0
- package/dist/src/state/app-map.js +307 -17
- package/dist/src/util/atomic-write.js +25 -4
- package/dist-references/reddit.json +2 -2
- package/package.json +1 -1
package/dist/mcp-desktop.js
CHANGED
|
@@ -55,7 +55,7 @@ import { WorldModel } from "./src/state/index.js";
|
|
|
55
55
|
import { PerceptionManager } from "./src/perception/index.js";
|
|
56
56
|
import { Planner, PlanExecutor, GoalStore, ToolRegistry } from "./src/planner/index.js";
|
|
57
57
|
import { RecoveryEngine } from "./src/recovery/index.js";
|
|
58
|
-
import { LearningEngine } from "./src/learning/index.js";
|
|
58
|
+
import { LearningEngine, LocatorPolicy } from "./src/learning/index.js";
|
|
59
59
|
import { discoverWebElements, testWebElement, compileReference, saveExploreResult, discoverNativeElements } from "./src/platform/explorer.js";
|
|
60
60
|
import { buildDocUrls, crawlPage, compileLearnResult, saveLearnResult } from "./src/platform/learner.js";
|
|
61
61
|
import { AccessibilityAdapter } from "./src/runtime/accessibility-adapter.js";
|
|
@@ -234,6 +234,10 @@ let CDP = null;
|
|
|
234
234
|
async function ensureCDP(overridePort) {
|
|
235
235
|
if (!CDP)
|
|
236
236
|
CDP = (await import("chrome-remote-interface")).default;
|
|
237
|
+
// Validate port range (defense in depth — Zod validates at MCP boundary, this catches internal callers)
|
|
238
|
+
if (overridePort && (overridePort < 9222 || overridePort > 9999)) {
|
|
239
|
+
throw new Error(`Invalid CDP port ${overridePort} — must be 9222-9999`);
|
|
240
|
+
}
|
|
237
241
|
// If caller specified a port, use it directly (e.g. 9333 for Electron apps)
|
|
238
242
|
if (overridePort) {
|
|
239
243
|
try {
|
|
@@ -318,6 +322,16 @@ const server = new McpServer({ name: "screenhand", version: "3.0.0" }, {
|
|
|
318
322
|
- **supervisor_start()** — background daemon that detects stalled agents and recovers.
|
|
319
323
|
- Pattern: session_claim() → do work → session_heartbeat() periodically → session_release()
|
|
320
324
|
|
|
325
|
+
### Planning (let ScreenHand figure out the steps)
|
|
326
|
+
- **plan_goal("Export video as H.264")** — describe WHAT you want, ScreenHand generates a step-by-step plan. It searches playbooks, saved strategies, and reference knowledge to build the plan. Does NOT execute — returns the plan for review.
|
|
327
|
+
- **plan_execute(goalId)** — run the plan automatically. Deterministic steps (known selectors/flows) run internally. Pauses at LLM steps where your judgment is needed — resolve them with plan_step_resolve().
|
|
328
|
+
- **plan_step(goalId)** — execute one step at a time (for more control than plan_execute).
|
|
329
|
+
- **plan_step_resolve(goalId, tool, params)** — when a plan pauses at an LLM step, YOU decide which tool and params to use. The server executes it, verifies postconditions, and advances.
|
|
330
|
+
- **plan_status(goalId)** — check progress: which step you're on, what's done, what's left.
|
|
331
|
+
- **plan_list()** — see all goals (active, completed, failed).
|
|
332
|
+
- **plan_cancel(goalId)** — abort a goal.
|
|
333
|
+
- Pattern: plan_goal("do X") → review steps → plan_execute() → resolve LLM steps as they pause → on success, strategy auto-saved to memory
|
|
334
|
+
|
|
321
335
|
## Tool Selection Priority
|
|
322
336
|
1. **ui_tree + ui_press** for native app elements (fastest, most reliable)
|
|
323
337
|
2. **browser_* tools** for web content in Chrome/Electron
|
|
@@ -401,6 +415,17 @@ let lastSuccessfulToolName = "unknown";
|
|
|
401
415
|
let lastKnownBundleId = null;
|
|
402
416
|
contextTracker.setAppMap(appMap);
|
|
403
417
|
perceptionManager.setAppMap(appMap);
|
|
418
|
+
// Wire F10: connect ContextTracker to PerceptionCoordinator for per-app perception config
|
|
419
|
+
perceptionManager.setContextTracker(contextTracker);
|
|
420
|
+
// Wire #11: connect TopologyPolicy to AppMap for unified edge scoring
|
|
421
|
+
appMap.setTopologyPolicy(learningEngine.topology);
|
|
422
|
+
// Wire #14: seed TimingModel from AppMap's stored timing profiles (cold-start bootstrap)
|
|
423
|
+
learningEngine.seedTimingFromAppMap(appMap);
|
|
424
|
+
// Wire F5-F7: Cold-start bootstrap — seed all learning policies from AppMap data
|
|
425
|
+
learningEngine.seedLocatorsFromAppMap(appMap);
|
|
426
|
+
learningEngine.seedSensorsFromReadySignals(appMap);
|
|
427
|
+
learningEngine.seedPatternsFromAppMap(appMap);
|
|
428
|
+
learningEngine.seedRecoveryFromContracts(appMap);
|
|
404
429
|
const _executablePlaybookStore = new PlaybookStore(playbooksDir);
|
|
405
430
|
try {
|
|
406
431
|
_executablePlaybookStore.load();
|
|
@@ -412,7 +437,9 @@ goalStore.init();
|
|
|
412
437
|
const toolRegistry = new ToolRegistry();
|
|
413
438
|
const recoveryEngine = new RecoveryEngine(worldModel, toolRegistry.toExecutor(), memory);
|
|
414
439
|
recoveryEngine.setLearningEngine(learningEngine);
|
|
440
|
+
recoveryEngine.setAppMap(appMap);
|
|
415
441
|
planner.setToolRegistry(toolRegistry);
|
|
442
|
+
planner.setAppMap(appMap);
|
|
416
443
|
perceptionManager.setLearningEngine(learningEngine);
|
|
417
444
|
const mcpRecorder = new McpPlaybookRecorder(playbooksDir);
|
|
418
445
|
const referenceMerger = new ReferenceMerger(referencesDir);
|
|
@@ -503,6 +530,20 @@ server.tool = (...args) => {
|
|
|
503
530
|
perceptionManager.notifyToolCall();
|
|
504
531
|
// ── PRE-CALL: check for known error warnings (~0ms, in-memory) ──
|
|
505
532
|
const knownError = memory.quickErrorCheck(toolName);
|
|
533
|
+
// Wire F11: Block execution for tools that fail repeatedly with known resolution (L2→L1)
|
|
534
|
+
// Exclude playbook-seeded errors (id starts with pb_err_) — those are generic platform warnings,
|
|
535
|
+
// not errors observed in this session. Only block on real runtime failures.
|
|
536
|
+
// Also exclude errors injected via memory_record_error API (empty params) — only runtime errors
|
|
537
|
+
// from the intelligence wrapper (which always have populated params) should trigger blocks.
|
|
538
|
+
const isRuntimeError = knownError && typeof knownError.params === "object" && knownError.params !== null && Object.keys(knownError.params).length > 0;
|
|
539
|
+
if (knownError && knownError.occurrences >= 5 && knownError.resolution && !knownError.id.startsWith("pb_err_") && isRuntimeError) {
|
|
540
|
+
return {
|
|
541
|
+
content: [{
|
|
542
|
+
type: "text",
|
|
543
|
+
text: `⛔ Blocked: "${toolName}" has failed ${knownError.occurrences}x with: "${knownError.error}". Known fix: ${knownError.resolution}. Apply the fix first, then retry.`,
|
|
544
|
+
}],
|
|
545
|
+
};
|
|
546
|
+
}
|
|
506
547
|
// ── PRE-CALL: auto-start perception if not running ──
|
|
507
548
|
if (!perceptionManager.isRunning && bridgeReady) {
|
|
508
549
|
const focusApp = worldModel.getState().focusedApp;
|
|
@@ -538,6 +579,9 @@ server.tool = (...args) => {
|
|
|
538
579
|
else if (typeof paramBundleId === "string" && paramBundleId) {
|
|
539
580
|
lastKnownBundleId = paramBundleId;
|
|
540
581
|
}
|
|
582
|
+
// Snapshot the bundleId for this tool's POST-CALL, so concurrent PRE-CALL
|
|
583
|
+
// overwrites of lastKnownBundleId don't contaminate this tool's context
|
|
584
|
+
const postCallBundleId = preBundleId ?? lastKnownBundleId;
|
|
541
585
|
// Capture pre-call window title for navigation edge tracking
|
|
542
586
|
const preWindowTitle = worldModel.getFocusedWindow()?.title.value ?? null;
|
|
543
587
|
// Action tools = actually doing something. Navigation = just clicking around.
|
|
@@ -568,7 +612,7 @@ server.tool = (...args) => {
|
|
|
568
612
|
contextTracker.recordOutcome(toolName, safeParams, true, null);
|
|
569
613
|
// ── POST-CALL: Safari context gap + page context update ──
|
|
570
614
|
const postFocusApp = worldModel.getState().focusedApp;
|
|
571
|
-
const postBundleIdForCtx = postFocusApp?.bundleId ??
|
|
615
|
+
const postBundleIdForCtx = postFocusApp?.bundleId ?? postCallBundleId;
|
|
572
616
|
if (postBundleIdForCtx) {
|
|
573
617
|
lastKnownBundleId = postBundleIdForCtx;
|
|
574
618
|
// Try focused window first, then search all windows for matching bundleId
|
|
@@ -612,7 +656,7 @@ server.tool = (...args) => {
|
|
|
612
656
|
}
|
|
613
657
|
}
|
|
614
658
|
// ── POST-CALL: feed learning engine (timing + locator outcomes) ──
|
|
615
|
-
const learnBundleId = worldModel.getState().focusedApp?.bundleId ??
|
|
659
|
+
const learnBundleId = worldModel.getState().focusedApp?.bundleId ?? postCallBundleId ?? "unknown";
|
|
616
660
|
learningEngine.recordToolTiming({ tool: toolName, bundleId: learnBundleId, durationMs, success: true });
|
|
617
661
|
// Record locator outcome if the tool used a target/selector
|
|
618
662
|
const locatorTarget = safeParams.target ?? safeParams.selector ?? safeParams.locator
|
|
@@ -891,14 +935,17 @@ server.tool = (...args) => {
|
|
|
891
935
|
if (fromNode !== toNode) {
|
|
892
936
|
appMap.addNavNode(learnBundleId, fromNode, { type: "window", description: fromNode });
|
|
893
937
|
appMap.addNavNode(learnBundleId, toNode, { type: "window", description: toNode });
|
|
894
|
-
|
|
938
|
+
const locatorSlug = locatorTarget ? String(locatorTarget).slice(0, 80) : null;
|
|
939
|
+
const edgeAction = locatorSlug ? `${toolName}:${locatorSlug}` : toolName;
|
|
940
|
+
// Wire #11: record topology FIRST so AppMap can read the updated Bayesian score
|
|
895
941
|
learningEngine.recordTopologyOutcome({
|
|
896
942
|
bundleId: learnBundleId,
|
|
897
943
|
fromNode,
|
|
898
|
-
action:
|
|
944
|
+
action: edgeAction,
|
|
899
945
|
toNode,
|
|
900
946
|
success: true,
|
|
901
947
|
});
|
|
948
|
+
appMap.recordEdgeOutcome(learnBundleId, fromNode, edgeAction, toNode, true);
|
|
902
949
|
}
|
|
903
950
|
}
|
|
904
951
|
// ── State machine: detect state changes from tool results ──
|
|
@@ -1256,7 +1303,7 @@ server.tool = (...args) => {
|
|
|
1256
1303
|
// ── Record failure for playbook learning (in-memory only) ──
|
|
1257
1304
|
contextTracker.recordOutcome(toolName, safeParams, false, errorMsg);
|
|
1258
1305
|
// ── Feed learning engine (failure timing + locator) ──
|
|
1259
|
-
const learnBundleIdErr = worldModel.getState().focusedApp?.bundleId ??
|
|
1306
|
+
const learnBundleIdErr = worldModel.getState().focusedApp?.bundleId ?? postCallBundleId ?? "unknown";
|
|
1260
1307
|
learningEngine.recordToolTiming({ tool: toolName, bundleId: learnBundleIdErr, durationMs, success: false });
|
|
1261
1308
|
const failedLocator = safeParams.target ?? safeParams.selector ?? safeParams.locator
|
|
1262
1309
|
?? (toolName === "click_text" ? safeParams.text : undefined);
|
|
@@ -1412,7 +1459,7 @@ server.tool("windows", "List all visible windows with IDs, positions, and sizes"
|
|
|
1412
1459
|
return { content: [{ type: "text", text: lines.join("\n") }] };
|
|
1413
1460
|
});
|
|
1414
1461
|
server.tool("focus", "Focus/activate an application (or a specific window by windowId)", {
|
|
1415
|
-
bundleId: z.string().describe("App bundle ID, e.g. com.apple.Safari"),
|
|
1462
|
+
bundleId: z.string().regex(/^[a-zA-Z0-9._-]+$/, "Invalid bundleId format").describe("App bundle ID, e.g. com.apple.Safari"),
|
|
1416
1463
|
windowId: z.number().optional().describe("Specific window ID from windows() — raises that exact window. Use when multiple instances of the same app exist."),
|
|
1417
1464
|
}, async ({ bundleId, windowId }) => {
|
|
1418
1465
|
await ensureBridge();
|
|
@@ -1518,8 +1565,8 @@ server.tool("focus", "Focus/activate an application (or a specific window by win
|
|
|
1518
1565
|
}
|
|
1519
1566
|
});
|
|
1520
1567
|
server.tool("launch", "Launch an application. Chrome/Chromium browsers are launched with CDP enabled (port 9222) for browser_* tools.", {
|
|
1521
|
-
bundleId: z.string().describe("App bundle ID"),
|
|
1522
|
-
cdpPort: z.number().optional().describe("CDP port for Chrome/Chromium (default: 9222). Ignored for non-browser apps."),
|
|
1568
|
+
bundleId: z.string().regex(/^[a-zA-Z0-9._-]+$/, "Invalid bundleId format").describe("App bundle ID"),
|
|
1569
|
+
cdpPort: z.number().min(9222).max(9999).optional().describe("CDP port for Chrome/Chromium (default: 9222). Ignored for non-browser apps."),
|
|
1523
1570
|
}, async ({ bundleId, cdpPort }) => {
|
|
1524
1571
|
await ensureBridge();
|
|
1525
1572
|
const riskyBundleIds = {
|
|
@@ -1920,7 +1967,7 @@ server.tool("click_text", "SLOW fallback: Find text on screen via OCR and click
|
|
|
1920
1967
|
server.tool("type_text", "Type text using the keyboard. Auto-detects Electron apps and routes through CDP for reliable editor input.", {
|
|
1921
1968
|
text: z.string().describe("Text to type"),
|
|
1922
1969
|
pid: z.number().optional().describe("Target process ID for PID-targeted event delivery"),
|
|
1923
|
-
cdpPort: z.number().optional().describe("CDP port for Electron apps (e.g. 9229). When set, types via CDP instead of AX — fixes Copilot/panel focus theft."),
|
|
1970
|
+
cdpPort: z.number().min(9222).max(9999).optional().describe("CDP port for Electron apps (e.g. 9229). When set, types via CDP instead of AX — fixes Copilot/panel focus theft."),
|
|
1924
1971
|
}, async ({ text, pid, cdpPort: portOverride }) => {
|
|
1925
1972
|
await ensureBridge();
|
|
1926
1973
|
// Auto-resolve frontmost PID when none provided — global HID posting
|
|
@@ -2168,7 +2215,7 @@ function randomDelay(min, max) {
|
|
|
2168
2215
|
// BROWSER — control Chrome pages via CDP (10ms, not OCR)
|
|
2169
2216
|
// ═══════════════════════════════════════════════
|
|
2170
2217
|
server.tool("browser_tabs", "List all open Chrome/Electron tabs. Use cdpPort to connect to a specific app (e.g. 9333 for Codex Desktop).", {
|
|
2171
|
-
cdpPort: z.number().optional().describe("CDP port override (e.g. 9333 for Electron apps). Omit to auto-detect."),
|
|
2218
|
+
cdpPort: z.number().min(9222).max(9999).optional().describe("CDP port override (e.g. 9333 for Electron apps). Omit to auto-detect."),
|
|
2172
2219
|
}, async ({ cdpPort: portOverride }) => {
|
|
2173
2220
|
const { CDP: cdp, port } = await ensureCDP(portOverride);
|
|
2174
2221
|
const targets = await cdp.List({ port });
|
|
@@ -2178,7 +2225,7 @@ server.tool("browser_tabs", "List all open Chrome/Electron tabs. Use cdpPort to
|
|
|
2178
2225
|
});
|
|
2179
2226
|
server.tool("browser_open", "Open a URL in Chrome/Electron (creates new tab)", {
|
|
2180
2227
|
url: z.string().describe("URL to open"),
|
|
2181
|
-
cdpPort: z.number().optional().describe("CDP port override (e.g. 9333 for Electron apps)"),
|
|
2228
|
+
cdpPort: z.number().min(9222).max(9999).optional().describe("CDP port override (e.g. 9333 for Electron apps)"),
|
|
2182
2229
|
}, async ({ url, cdpPort: portOverride }) => {
|
|
2183
2230
|
// L2-71 fix: Block dangerous URL protocols
|
|
2184
2231
|
const BLOCKED_PROTOCOLS = ["javascript:", "data:", "blob:", "vbscript:"];
|
|
@@ -2202,7 +2249,7 @@ server.tool("browser_open", "Open a URL in Chrome/Electron (creates new tab)", {
|
|
|
2202
2249
|
server.tool("browser_navigate", "Navigate the active Chrome/Electron tab to a URL", {
|
|
2203
2250
|
url: z.string().describe("URL to navigate to"),
|
|
2204
2251
|
tabId: z.string().optional().describe("Tab ID (from browser_tabs). Omit for most recent tab."),
|
|
2205
|
-
cdpPort: z.number().optional().describe("CDP port override (e.g. 9333 for Electron apps)"),
|
|
2252
|
+
cdpPort: z.number().min(9222).max(9999).optional().describe("CDP port override (e.g. 9333 for Electron apps)"),
|
|
2206
2253
|
}, async ({ url, tabId, cdpPort: portOverride }) => {
|
|
2207
2254
|
// L2-71 fix: Block dangerous URL protocols that could execute arbitrary code
|
|
2208
2255
|
const BLOCKED_PROTOCOLS = ["javascript:", "data:", "blob:", "vbscript:"];
|
|
@@ -2247,7 +2294,7 @@ server.tool("browser_navigate", "Navigate the active Chrome/Electron tab to a UR
|
|
|
2247
2294
|
server.tool("browser_js", "Execute JavaScript in a Chrome/Electron tab. Returns the result. WARNING: This runs arbitrary JS in the browser context — avoid on sensitive pages (banking, email). All executions are audit-logged.", {
|
|
2248
2295
|
code: z.string().describe("JavaScript to execute. Must be an expression that returns a value. Use (() => { ... })() for multi-line."),
|
|
2249
2296
|
tabId: z.string().optional().describe("Tab ID. Omit for most recent tab."),
|
|
2250
|
-
cdpPort: z.number().optional().describe("CDP port override (e.g. 9333 for Electron apps)"),
|
|
2297
|
+
cdpPort: z.number().min(9222).max(9999).optional().describe("CDP port override (e.g. 9333 for Electron apps)"),
|
|
2251
2298
|
}, async ({ code, tabId, cdpPort: portOverride }) => {
|
|
2252
2299
|
auditLog("browser_js", { code, tabId });
|
|
2253
2300
|
const { CDP: cdp, port } = await ensureCDP(portOverride);
|
|
@@ -2281,7 +2328,7 @@ server.tool("browser_dom", "Query the DOM of a Chrome/Electron page. Returns mat
|
|
|
2281
2328
|
selector: z.string().describe("CSS selector, e.g. 'button', '.nav a', '#main h2'"),
|
|
2282
2329
|
tabId: z.string().optional().describe("Tab ID. Omit for most recent tab."),
|
|
2283
2330
|
limit: z.number().optional().describe("Max results (default 20)"),
|
|
2284
|
-
cdpPort: z.number().optional().describe("CDP port override (e.g. 9333 for Electron apps)"),
|
|
2331
|
+
cdpPort: z.number().min(9222).max(9999).optional().describe("CDP port override (e.g. 9333 for Electron apps)"),
|
|
2285
2332
|
}, async ({ selector, tabId, limit, cdpPort: portOverride }) => {
|
|
2286
2333
|
// Capture bundleId before any async CDP calls to avoid race condition
|
|
2287
2334
|
const browserBundleId = worldModel.getState().focusedApp?.bundleId ?? "com.google.Chrome";
|
|
@@ -2332,7 +2379,7 @@ server.tool("browser_dom", "Query the DOM of a Chrome/Electron page. Returns mat
|
|
|
2332
2379
|
server.tool("browser_click", "Click an element in Chrome/Electron by CSS selector. Uses CDP Input.dispatchMouseEvent for realistic mouse events.", {
|
|
2333
2380
|
selector: z.string().describe("CSS selector of element to click"),
|
|
2334
2381
|
tabId: z.string().optional().describe("Tab ID. Omit for most recent tab."),
|
|
2335
|
-
cdpPort: z.number().optional().describe("CDP port override (e.g. 9333 for Electron apps)"),
|
|
2382
|
+
cdpPort: z.number().min(9222).max(9999).optional().describe("CDP port override (e.g. 9333 for Electron apps)"),
|
|
2336
2383
|
}, async ({ selector, tabId, cdpPort: portOverride }) => {
|
|
2337
2384
|
const { client } = await getCDPClient(tabId, portOverride);
|
|
2338
2385
|
await client.Runtime.enable();
|
|
@@ -2365,7 +2412,7 @@ server.tool("browser_type", "Type into an input field in Chrome/Electron. Uses C
|
|
|
2365
2412
|
text: z.string().describe("Text to type"),
|
|
2366
2413
|
clear: z.boolean().optional().describe("Clear field first (default true)"),
|
|
2367
2414
|
tabId: z.string().optional().describe("Tab ID"),
|
|
2368
|
-
cdpPort: z.number().optional().describe("CDP port override (e.g. 9333 for Electron apps)"),
|
|
2415
|
+
cdpPort: z.number().min(9222).max(9999).optional().describe("CDP port override (e.g. 9333 for Electron apps)"),
|
|
2369
2416
|
}, async ({ selector, text, clear, tabId, cdpPort: portOverride }) => {
|
|
2370
2417
|
const { client } = await getCDPClient(tabId, portOverride);
|
|
2371
2418
|
await client.Runtime.enable();
|
|
@@ -2406,7 +2453,7 @@ server.tool("browser_wait", "Wait for a condition on a Chrome/Electron page", {
|
|
|
2406
2453
|
condition: z.string().describe("JS expression that returns truthy when ready. e.g. 'document.querySelector(\".loaded\")'"),
|
|
2407
2454
|
timeoutMs: z.number().optional().describe("Timeout in ms (default 10000)"),
|
|
2408
2455
|
tabId: z.string().optional().describe("Tab ID"),
|
|
2409
|
-
cdpPort: z.number().optional().describe("CDP port override (e.g. 9333 for Electron apps)"),
|
|
2456
|
+
cdpPort: z.number().min(9222).max(9999).optional().describe("CDP port override (e.g. 9333 for Electron apps)"),
|
|
2410
2457
|
}, async ({ condition, timeoutMs, tabId, cdpPort: portOverride }) => {
|
|
2411
2458
|
const { CDP: cdp, port } = await ensureCDP(portOverride);
|
|
2412
2459
|
let targetId = tabId;
|
|
@@ -2434,7 +2481,7 @@ server.tool("browser_wait", "Wait for a condition on a Chrome/Electron page", {
|
|
|
2434
2481
|
});
|
|
2435
2482
|
server.tool("browser_page_info", "Get current page title, URL, and text content summary", {
|
|
2436
2483
|
tabId: z.string().optional().describe("Tab ID"),
|
|
2437
|
-
cdpPort: z.number().optional().describe("CDP port override (e.g. 9333 for Electron apps)"),
|
|
2484
|
+
cdpPort: z.number().min(9222).max(9999).optional().describe("CDP port override (e.g. 9333 for Electron apps)"),
|
|
2438
2485
|
}, async ({ tabId, cdpPort: portOverride }) => {
|
|
2439
2486
|
// Capture bundleId BEFORE CDP call to prevent focus-change race
|
|
2440
2487
|
const browserBundleId = worldModel.getState().focusedApp?.bundleId ?? "com.google.Chrome";
|
|
@@ -2509,7 +2556,7 @@ if (origQuery) {
|
|
|
2509
2556
|
`;
|
|
2510
2557
|
server.tool("browser_stealth", "Inject anti-detection patches into Chrome/Electron page. Call once after navigating to a protected site. Hides webdriver flag, patches plugins/languages/permissions.", {
|
|
2511
2558
|
tabId: z.string().optional().describe("Tab ID. Omit for most recent tab."),
|
|
2512
|
-
cdpPort: z.number().optional().describe("CDP port override (e.g. 9333 for Electron apps)"),
|
|
2559
|
+
cdpPort: z.number().min(9222).max(9999).optional().describe("CDP port override (e.g. 9333 for Electron apps)"),
|
|
2513
2560
|
}, async ({ tabId, cdpPort: portOverride }) => {
|
|
2514
2561
|
const { client } = await getCDPClient(tabId, portOverride);
|
|
2515
2562
|
await client.Page.enable();
|
|
@@ -2529,7 +2576,7 @@ server.tool("browser_fill_form", "Fill a form field with human-like typing (anti
|
|
|
2529
2576
|
clear: z.boolean().optional().describe("Clear field first (default true)"),
|
|
2530
2577
|
delayMs: z.number().optional().describe("Avg delay between keystrokes in ms (default 50)"),
|
|
2531
2578
|
tabId: z.string().optional().describe("Tab ID"),
|
|
2532
|
-
cdpPort: z.number().optional().describe("CDP port override (e.g. 9333 for Electron apps)"),
|
|
2579
|
+
cdpPort: z.number().min(9222).max(9999).optional().describe("CDP port override (e.g. 9333 for Electron apps)"),
|
|
2533
2580
|
}, async ({ selector, text, clear, delayMs, tabId, cdpPort: portOverride }) => {
|
|
2534
2581
|
const { client } = await getCDPClient(tabId, portOverride);
|
|
2535
2582
|
await client.Runtime.enable();
|
|
@@ -2573,7 +2620,7 @@ server.tool("browser_fill_form", "Fill a form field with human-like typing (anti
|
|
|
2573
2620
|
server.tool("browser_human_click", "Alias for browser_click — both use realistic mouseMoved → mousePressed → mouseReleased events. Prefer browser_click directly.", {
|
|
2574
2621
|
selector: z.string().describe("CSS selector of element to click"),
|
|
2575
2622
|
tabId: z.string().optional().describe("Tab ID. Omit for most recent tab."),
|
|
2576
|
-
cdpPort: z.number().optional().describe("CDP port override (e.g. 9333 for Electron apps)"),
|
|
2623
|
+
cdpPort: z.number().min(9222).max(9999).optional().describe("CDP port override (e.g. 9333 for Electron apps)"),
|
|
2577
2624
|
}, async ({ selector, tabId, cdpPort: portOverride }) => {
|
|
2578
2625
|
const { client } = await getCDPClient(tabId, portOverride);
|
|
2579
2626
|
await client.Runtime.enable();
|
|
@@ -2981,7 +3028,7 @@ server.tool("playbook_record", "Macro recorder: start recording, do the flow, st
|
|
|
2981
3028
|
platform: z.string().optional().describe("Platform name (required for start)"),
|
|
2982
3029
|
name: z.string().optional().describe("Playbook name (required for stop)"),
|
|
2983
3030
|
description: z.string().optional().describe("Playbook description (for stop)"),
|
|
2984
|
-
cdpPort: z.number().optional().describe("CDP port if needed for browser_js steps (e.g. 9333 for Codex)"),
|
|
3031
|
+
cdpPort: z.number().min(9222).max(9999).optional().describe("CDP port if needed for browser_js steps (e.g. 9333 for Codex)"),
|
|
2985
3032
|
}, async ({ action, platform, name, description, cdpPort }) => {
|
|
2986
3033
|
switch (action) {
|
|
2987
3034
|
case "start": {
|
|
@@ -3152,6 +3199,27 @@ server.tool("applescript", "Run an AppleScript command. For controlling Finder,
|
|
|
3152
3199
|
if (process.platform === "win32") {
|
|
3153
3200
|
return { content: [{ type: "text", text: "AppleScript is not supported on Windows. Use ui_tree, ui_press, and other accessibility tools instead." }] };
|
|
3154
3201
|
}
|
|
3202
|
+
// Block shell execution vectors in AppleScript — allowlist approach for safety-critical commands
|
|
3203
|
+
const scriptLower = script.toLowerCase();
|
|
3204
|
+
const BLOCKED_PATTERNS = [
|
|
3205
|
+
/do\s+shell\s+script/i, // direct shell execution
|
|
3206
|
+
/run\s+shell\s+script/i, // variant
|
|
3207
|
+
/run\s+script/i, // dynamic AppleScript eval (can construct blocked commands)
|
|
3208
|
+
/do\s+script/i, // Terminal.app shell execution
|
|
3209
|
+
/«class\s/i, // raw Apple Event codes (bypass text-level blocks)
|
|
3210
|
+
/system\s+events.*process/i, // process spawning via System Events
|
|
3211
|
+
/NSAppleScript/i, // Objective-C bridge
|
|
3212
|
+
/ObjC\.import/i, // JXA Objective-C bridge
|
|
3213
|
+
/\bshell\b/i, // catch-all for shell-related commands
|
|
3214
|
+
/do\s+JavaScript/i, // JXA execution
|
|
3215
|
+
];
|
|
3216
|
+
if (BLOCKED_PATTERNS.some(p => p.test(script))) {
|
|
3217
|
+
return { content: [{ type: "text", text: "Blocked: this AppleScript contains a restricted command (shell execution, dynamic eval, or process spawning). Use the Bash tool for shell commands." }] };
|
|
3218
|
+
}
|
|
3219
|
+
// Block string concatenation that could reassemble blocked commands
|
|
3220
|
+
if (/&/.test(script) && (/script/i.test(script) || /shell/i.test(script))) {
|
|
3221
|
+
return { content: [{ type: "text", text: "Blocked: AppleScript with string concatenation containing 'script' or 'shell' — potential bypass attempt." }] };
|
|
3222
|
+
}
|
|
3155
3223
|
try {
|
|
3156
3224
|
const result = execSync(`osascript -e '${script.replace(/'/g, "'\\''")}'`, {
|
|
3157
3225
|
encoding: "utf-8",
|
|
@@ -3712,7 +3780,7 @@ import { METHOD_CAPABILITIES, DEFAULT_RETRY_POLICY, planExecution, executeWithFa
|
|
|
3712
3780
|
server.tool("execution_plan", "Show the execution plan for an action type. Returns the ordered fallback chain based on available infrastructure.", {
|
|
3713
3781
|
action: z.enum(["click", "type", "read", "locate", "select", "scroll"]).describe("Action type"),
|
|
3714
3782
|
}, async ({ action }) => {
|
|
3715
|
-
const plan = planExecution(action, { hasBridge: true, hasCDP: cdpPort !== null });
|
|
3783
|
+
const plan = planExecution(action, { hasBridge: true, hasCDP: cdpPort !== null }, getSensorRanking());
|
|
3716
3784
|
const lines = plan.map((method, i) => {
|
|
3717
3785
|
const cap = METHOD_CAPABILITIES[method];
|
|
3718
3786
|
return `${i + 1}. ${method} (~${cap.avgLatencyMs}ms)${i === 0 ? " ← primary" : ""}`;
|
|
@@ -3754,26 +3822,161 @@ function infra() {
|
|
|
3754
3822
|
return { hasBridge: true, hasCDP: cdpPort !== null };
|
|
3755
3823
|
}
|
|
3756
3824
|
/**
|
|
3757
|
-
* Get
|
|
3758
|
-
*
|
|
3825
|
+
* Get sensor rankings for the current app from the learning engine.
|
|
3826
|
+
* Used by planExecution() to reorder fallback methods based on learned success rates.
|
|
3827
|
+
* Returns undefined if no bundleId is known (falls back to canonical order).
|
|
3828
|
+
*/
|
|
3829
|
+
function getSensorRanking(overrideBundleId) {
|
|
3830
|
+
// Use override bundleId when provided (from tool params), else worldModel, else lastKnown
|
|
3831
|
+
const bundleId = overrideBundleId ?? worldModel.getState().focusedApp?.bundleId ?? lastKnownBundleId;
|
|
3832
|
+
if (!bundleId)
|
|
3833
|
+
return undefined;
|
|
3834
|
+
const ranked = learningEngine.rankSensors(bundleId);
|
|
3835
|
+
return ranked.length > 0 ? ranked : undefined;
|
|
3836
|
+
}
|
|
3837
|
+
/**
|
|
3838
|
+
* Get a retry policy adapted by the learning engine's adaptive budgets
|
|
3839
|
+
* AND the AppMap's timing profiles (L7→L1).
|
|
3840
|
+
*
|
|
3841
|
+
* Priority: AppMap timing > Learning budget > Default
|
|
3842
|
+
* AppMap stores per-tool/per-action avg durations from real executions.
|
|
3843
|
+
* Learning budget stores per-app adaptive budgets from outcome stats.
|
|
3759
3844
|
*/
|
|
3760
|
-
function getAdaptedRetryPolicy() {
|
|
3761
|
-
|
|
3845
|
+
function getAdaptedRetryPolicy(toolName, overrideBundleId) {
|
|
3846
|
+
let typicalMs = null;
|
|
3847
|
+
// L7→L1: Check AppMap timing profiles for the action type.
|
|
3848
|
+
// Timing keys are stored as "click::Submit", "click_text::Login", etc.
|
|
3849
|
+
// Fallback tools pass "click_with_fallback" — extract the action prefix to match.
|
|
3850
|
+
const bundleId = overrideBundleId ?? worldModel.getState().focusedApp?.bundleId ?? lastKnownBundleId;
|
|
3851
|
+
if (bundleId && toolName) {
|
|
3852
|
+
const actionPrefix = toolName.replace(/_with_fallback$/, "");
|
|
3853
|
+
// Get all timing profiles for this app, then filter by action prefix
|
|
3854
|
+
const allTimings = appMap.getTimingProfile(bundleId);
|
|
3855
|
+
const matchingTimings = allTimings.filter((t) => t.key.startsWith(actionPrefix + "::") || t.key === actionPrefix);
|
|
3856
|
+
if (matchingTimings.length > 0) {
|
|
3857
|
+
// Use element_response type if available, compute median avgMs across all matching entries
|
|
3858
|
+
const responseTimes = matchingTimings
|
|
3859
|
+
.filter((t) => t.type === "element_response")
|
|
3860
|
+
.map((t) => t.avgMs);
|
|
3861
|
+
if (responseTimes.length > 0) {
|
|
3862
|
+
responseTimes.sort((a, b) => a - b);
|
|
3863
|
+
const mid = Math.floor(responseTimes.length / 2);
|
|
3864
|
+
typicalMs = responseTimes.length % 2 === 1
|
|
3865
|
+
? responseTimes[mid]
|
|
3866
|
+
: (responseTimes[mid - 1] + responseTimes[mid]) / 2;
|
|
3867
|
+
}
|
|
3868
|
+
else {
|
|
3869
|
+
typicalMs = matchingTimings[0].avgMs;
|
|
3870
|
+
}
|
|
3871
|
+
}
|
|
3872
|
+
}
|
|
3873
|
+
// Fall back to L5 adaptive budget
|
|
3874
|
+
if (typicalMs == null && currentAdaptiveBudget) {
|
|
3875
|
+
typicalMs = Math.max(currentAdaptiveBudget.locateMs, currentAdaptiveBudget.actMs);
|
|
3876
|
+
}
|
|
3877
|
+
if (typicalMs == null)
|
|
3762
3878
|
return DEFAULT_RETRY_POLICY;
|
|
3763
|
-
// Use the max of locate+act as a guide for retry delay — faster apps need shorter delays
|
|
3764
|
-
const typicalMs = Math.max(currentAdaptiveBudget.locateMs, currentAdaptiveBudget.actMs);
|
|
3765
3879
|
// Retry delay = max(100ms, typical * 1.5), capped at the default
|
|
3766
3880
|
const adaptedDelay = Math.min(DEFAULT_RETRY_POLICY.delayBetweenRetriesMs, Math.max(100, Math.ceil(typicalMs * 1.5)));
|
|
3767
3881
|
if (adaptedDelay === DEFAULT_RETRY_POLICY.delayBetweenRetriesMs)
|
|
3768
3882
|
return DEFAULT_RETRY_POLICY;
|
|
3769
3883
|
return { ...DEFAULT_RETRY_POLICY, delayBetweenRetriesMs: adaptedDelay };
|
|
3770
3884
|
}
|
|
3771
|
-
function formatResult(action, target, result) {
|
|
3885
|
+
function formatResult(action, target, result, preCheckWarnings) {
|
|
3886
|
+
const prefix = preCheckWarnings && preCheckWarnings.length > 0
|
|
3887
|
+
? preCheckWarnings.join("\n") + "\n"
|
|
3888
|
+
: "";
|
|
3772
3889
|
if (result.ok) {
|
|
3773
3890
|
const fallbackNote = result.fallbackFrom ? ` (fell back from ${result.fallbackFrom})` : "";
|
|
3774
|
-
return { content: [{ type: "text", text: `${action} "${result.target ?? target}" via ${result.method}${fallbackNote} in ${result.durationMs}ms` }] };
|
|
3891
|
+
return { content: [{ type: "text", text: `${prefix}${action} "${result.target ?? target}" via ${result.method}${fallbackNote} in ${result.durationMs}ms` }] };
|
|
3892
|
+
}
|
|
3893
|
+
return { content: [{ type: "text", text: `${prefix}Failed to ${action} "${target}" — all methods exhausted. Last error: ${result.error}` }] };
|
|
3894
|
+
}
|
|
3895
|
+
/**
|
|
3896
|
+
* L3→L1: Pre-execution worldModel check.
|
|
3897
|
+
* Verifies the target app is focused and not blocked by dialogs.
|
|
3898
|
+
* Auto-focuses the app if it's in the background. Returns warnings
|
|
3899
|
+
* that should be prepended to the result.
|
|
3900
|
+
*/
|
|
3901
|
+
async function preExecutionCheck(bundleId) {
|
|
3902
|
+
const warnings = [];
|
|
3903
|
+
try {
|
|
3904
|
+
const state = worldModel.getState();
|
|
3905
|
+
const targetBundleId = bundleId ?? lastKnownBundleId ?? state.focusedApp?.bundleId;
|
|
3906
|
+
if (!targetBundleId)
|
|
3907
|
+
return warnings;
|
|
3908
|
+
// Check if target app is focused — use correct bridge method "app.focus"
|
|
3909
|
+
if (state.focusedApp && state.focusedApp.bundleId !== targetBundleId) {
|
|
3910
|
+
warnings.push(`[L3→L1] Target app ${targetBundleId} is not focused (current: ${state.focusedApp.bundleId}). Auto-focusing...`);
|
|
3911
|
+
try {
|
|
3912
|
+
await bridge.call("app.focus", { bundleId: targetBundleId });
|
|
3913
|
+
}
|
|
3914
|
+
catch {
|
|
3915
|
+
warnings.push(`[L3→L1] Auto-focus failed — proceeding anyway`);
|
|
3916
|
+
}
|
|
3917
|
+
}
|
|
3918
|
+
// Re-fetch state after auto-focus to get current focused app
|
|
3919
|
+
const postFocusState = worldModel.getState();
|
|
3920
|
+
// Check for blocking dialogs — scoped to target app only.
|
|
3921
|
+
// Observer-sourced dialogs have windowId=0 (no real window ID),
|
|
3922
|
+
// so fall back to checking if the focused app matches.
|
|
3923
|
+
const relevantDialogs = postFocusState.activeDialogs.filter((d) => {
|
|
3924
|
+
if (d.windowId === 0) {
|
|
3925
|
+
return postFocusState.focusedApp?.bundleId === targetBundleId;
|
|
3926
|
+
}
|
|
3927
|
+
const win = postFocusState.windows.get(d.windowId);
|
|
3928
|
+
return win?.bundleId === targetBundleId;
|
|
3929
|
+
});
|
|
3930
|
+
if (relevantDialogs.length > 0) {
|
|
3931
|
+
const dialogTitles = relevantDialogs
|
|
3932
|
+
.map((d) => d.title || d.type)
|
|
3933
|
+
.join(", ");
|
|
3934
|
+
warnings.push(`[L3→L1] Active dialog(s) detected: ${dialogTitles} — may block interaction`);
|
|
3935
|
+
}
|
|
3936
|
+
// Check if target window is off-screen
|
|
3937
|
+
for (const [, win] of state.windows) {
|
|
3938
|
+
if (win.bundleId === targetBundleId && !win.isOnScreen) {
|
|
3939
|
+
warnings.push(`[L3→L1] Window "${win.title.value}" is off-screen or minimized`);
|
|
3940
|
+
}
|
|
3941
|
+
}
|
|
3942
|
+
// Check if world state is stale (>10s since last update)
|
|
3943
|
+
const staleThresholdMs = 10_000;
|
|
3944
|
+
const lastUpdate = new Date(state.updatedAt).getTime();
|
|
3945
|
+
if (!Number.isNaN(lastUpdate) && Date.now() - lastUpdate > staleThresholdMs && state.confidence < 0.5) {
|
|
3946
|
+
warnings.push(`[L3→L1] World state is stale (${Math.round((Date.now() - lastUpdate) / 1000)}s old, confidence ${state.confidence.toFixed(2)}) — screen may have changed`);
|
|
3947
|
+
}
|
|
3775
3948
|
}
|
|
3776
|
-
|
|
3949
|
+
catch {
|
|
3950
|
+
// Pre-check is best-effort advisory — never crash the tool call
|
|
3951
|
+
}
|
|
3952
|
+
return warnings;
|
|
3953
|
+
}
|
|
3954
|
+
/**
|
|
3955
|
+
* L7→L1: Try to resolve an element's position from the AppMap.
|
|
3956
|
+
* Returns known screen coordinates if the map has a position for this label
|
|
3957
|
+
* AND we can get the current window bounds. Returns null otherwise.
|
|
3958
|
+
*/
|
|
3959
|
+
function resolveMapPosition(target, bundleId) {
|
|
3960
|
+
const bid = bundleId ?? worldModel.getState().focusedApp?.bundleId ?? lastKnownBundleId;
|
|
3961
|
+
if (!bid)
|
|
3962
|
+
return null;
|
|
3963
|
+
// Get window bounds from worldModel for coordinate conversion
|
|
3964
|
+
const state = worldModel.getState();
|
|
3965
|
+
const focusedWinId = state.focusedWindowId;
|
|
3966
|
+
if (focusedWinId == null)
|
|
3967
|
+
return null;
|
|
3968
|
+
const win = state.windows.get(focusedWinId);
|
|
3969
|
+
if (!win || win.bundleId !== bid)
|
|
3970
|
+
return null;
|
|
3971
|
+
const bounds = win.bounds.value;
|
|
3972
|
+
// Guard: reject stale bounds (>5s old) to prevent clicking at wrong position after window move
|
|
3973
|
+
const boundsAge = Date.now() - new Date(win.bounds.updatedAt).getTime();
|
|
3974
|
+
if (boundsAge > 5000 || boundsAge < 0)
|
|
3975
|
+
return null; // stale or future timestamp
|
|
3976
|
+
// Guard: reject uninitialized/zero-size bounds to prevent clicking at (0,0)
|
|
3977
|
+
if (bounds.width < 50 || bounds.height < 50)
|
|
3978
|
+
return null;
|
|
3979
|
+
return appMap.resolvePosition(bid, target, bounds);
|
|
3777
3980
|
}
|
|
3778
3981
|
// ── click_with_fallback ──
|
|
3779
3982
|
server.tool("click_with_fallback", "Click a target by text using the canonical fallback chain: AX → CDP → OCR. Automatically retries and falls through methods.", {
|
|
@@ -3781,10 +3984,37 @@ server.tool("click_with_fallback", "Click a target by text using the canonical f
|
|
|
3781
3984
|
bundleId: z.string().optional().describe("App bundle ID (for AX path)"),
|
|
3782
3985
|
}, async ({ target, bundleId }) => {
|
|
3783
3986
|
await ensureBridge();
|
|
3784
|
-
const
|
|
3987
|
+
const preCheckWarnings = await preExecutionCheck(bundleId);
|
|
3988
|
+
// L7→L1: If AppMap knows this element's position, try coordinates first.
|
|
3989
|
+
// WARNING: Coordinate clicks are unverified — if the window moved or a modal
|
|
3990
|
+
// appeared, the click may hit the wrong target. On failure, falls through to
|
|
3991
|
+
// the standard AX/CDP/OCR chain which verifies element identity.
|
|
3992
|
+
// Skip map-guided shortcut if precheck detected blocking conditions (dialogs, off-screen)
|
|
3993
|
+
const hasBlockingCondition = preCheckWarnings.some((w) => w.includes("dialog") || w.includes("off-screen") || w.includes("not frontmost"));
|
|
3994
|
+
const mapPos = !hasBlockingCondition ? resolveMapPosition(target, bundleId) : null;
|
|
3995
|
+
if (mapPos) {
|
|
3996
|
+
try {
|
|
3997
|
+
const start = Date.now();
|
|
3998
|
+
await bridge.call("cg.mouseClick", { x: mapPos.x, y: mapPos.y });
|
|
3999
|
+
preCheckWarnings.push(`[L7→L1] Used map position (${mapPos.x}, ${mapPos.y}) for "${target}" — UNVERIFIED coordinate click`);
|
|
4000
|
+
return formatResult("Clicked", target, {
|
|
4001
|
+
ok: true, method: "coordinates", durationMs: Date.now() - start,
|
|
4002
|
+
fallbackFrom: null, retries: 0, error: null, target: `${target} at (${mapPos.x},${mapPos.y}) [map-guided, unverified]`,
|
|
4003
|
+
}, preCheckWarnings);
|
|
4004
|
+
}
|
|
4005
|
+
catch {
|
|
4006
|
+
preCheckWarnings.push(`[L7→L1] Map position click failed — falling back to standard chain`);
|
|
4007
|
+
}
|
|
4008
|
+
}
|
|
4009
|
+
const plan = planExecution("click", infra(), getSensorRanking())
|
|
3785
4010
|
.filter((m) => m !== "coordinates");
|
|
3786
4011
|
const targetPid = await resolvePid(bundleId);
|
|
3787
|
-
|
|
4012
|
+
// L2→L1: Resolve known selector from references for direct injection
|
|
4013
|
+
const knownSelector = contextTracker.getSelector(target);
|
|
4014
|
+
if (knownSelector) {
|
|
4015
|
+
preCheckWarnings.push(`[L2→L1] Injecting known selector: ${knownSelector}`);
|
|
4016
|
+
}
|
|
4017
|
+
const result = await executeWithFallback("click", plan, getAdaptedRetryPolicy("click_with_fallback"), async (method, attempt) => {
|
|
3788
4018
|
const start = Date.now();
|
|
3789
4019
|
try {
|
|
3790
4020
|
switch (method) {
|
|
@@ -3819,15 +4049,28 @@ server.tool("click_with_fallback", "Click a target by text using the canonical f
|
|
|
3819
4049
|
const client = await CDPClient({ port });
|
|
3820
4050
|
try {
|
|
3821
4051
|
const { Runtime } = client;
|
|
3822
|
-
|
|
3823
|
-
|
|
3824
|
-
|
|
4052
|
+
// L2→L1: Try known selector first (wrapped in try/catch to handle
|
|
4053
|
+
// invalid selectors gracefully), then fall back to text search.
|
|
4054
|
+
const textSearchExpr = `Array.from(document.querySelectorAll('*')).find(e =>
|
|
3825
4055
|
e.textContent?.trim() === ${JSON.stringify(target)} ||
|
|
3826
|
-
e.getAttribute('aria-label') === ${JSON.stringify(target)}
|
|
3827
|
-
|
|
4056
|
+
e.getAttribute('aria-label') === ${JSON.stringify(target)})`;
|
|
4057
|
+
const selectorExpr = knownSelector
|
|
4058
|
+
? `(() => {
|
|
4059
|
+
try {
|
|
4060
|
+
const el = document.querySelector(${JSON.stringify(knownSelector)});
|
|
4061
|
+
if (el) { el.click(); return 'clicked'; }
|
|
4062
|
+
} catch(e) { /* invalid selector — fall through to text search */ }
|
|
4063
|
+
const fallback = ${textSearchExpr};
|
|
4064
|
+
if (fallback) { fallback.click(); return 'clicked'; }
|
|
4065
|
+
return null;
|
|
4066
|
+
})()`
|
|
4067
|
+
: `(() => {
|
|
4068
|
+
const el = ${textSearchExpr};
|
|
3828
4069
|
if (el) { el.click(); return 'clicked'; }
|
|
3829
4070
|
return null;
|
|
3830
|
-
})()
|
|
4071
|
+
})()`;
|
|
4072
|
+
const evalResult = await Runtime.evaluate({
|
|
4073
|
+
expression: selectorExpr,
|
|
3831
4074
|
returnByValue: true,
|
|
3832
4075
|
});
|
|
3833
4076
|
if (evalResult.result?.value === "clicked") {
|
|
@@ -3862,7 +4105,7 @@ server.tool("click_with_fallback", "Click a target by text using the canonical f
|
|
|
3862
4105
|
return { ok: false, method, durationMs: Date.now() - start, fallbackFrom: null, retries: attempt, error: err instanceof Error ? err.message : String(err), target };
|
|
3863
4106
|
}
|
|
3864
4107
|
});
|
|
3865
|
-
return formatResult("Clicked", target, result);
|
|
4108
|
+
return formatResult("Clicked", target, result, preCheckWarnings);
|
|
3866
4109
|
});
|
|
3867
4110
|
// ── type_with_fallback ──
|
|
3868
4111
|
server.tool("type_with_fallback", "Type text into a target field using the canonical fallback chain: AX → CDP → coordinates. Finds the field by label/placeholder, focuses it, then types.", {
|
|
@@ -3872,9 +4115,12 @@ server.tool("type_with_fallback", "Type text into a target field using the canon
|
|
|
3872
4115
|
clearFirst: z.boolean().optional().describe("Select-all and clear the field before typing (default: false)"),
|
|
3873
4116
|
}, async ({ target, text, bundleId, clearFirst }) => {
|
|
3874
4117
|
await ensureBridge();
|
|
3875
|
-
const
|
|
4118
|
+
const preCheckWarnings = await preExecutionCheck(bundleId);
|
|
4119
|
+
const plan = planExecution("type", infra(), getSensorRanking());
|
|
3876
4120
|
const targetPid = await resolvePid(bundleId);
|
|
3877
|
-
|
|
4121
|
+
// L2→L1: Resolve known selector for direct injection
|
|
4122
|
+
const knownSelector = contextTracker.getSelector(target);
|
|
4123
|
+
const result = await executeWithFallback("type", plan, getAdaptedRetryPolicy("type_with_fallback"), async (method, attempt) => {
|
|
3878
4124
|
const start = Date.now();
|
|
3879
4125
|
try {
|
|
3880
4126
|
switch (method) {
|
|
@@ -3962,17 +4208,30 @@ server.tool("type_with_fallback", "Type text into a target field using the canon
|
|
|
3962
4208
|
const client = await CDPClient({ port });
|
|
3963
4209
|
try {
|
|
3964
4210
|
const { Runtime, DOM, Input } = client;
|
|
3965
|
-
|
|
3966
|
-
|
|
3967
|
-
|
|
4211
|
+
// L2→L1: Try known selector first (with try/catch for invalid selectors),
|
|
4212
|
+
// then fall back to attribute search.
|
|
4213
|
+
const fieldSearchExpr = `Array.from(document.querySelectorAll('input, textarea, [contenteditable]')).find(e =>
|
|
3968
4214
|
e.getAttribute('placeholder') === ${JSON.stringify(target)} ||
|
|
3969
4215
|
e.getAttribute('aria-label') === ${JSON.stringify(target)} ||
|
|
3970
4216
|
e.getAttribute('name') === ${JSON.stringify(target)} ||
|
|
3971
|
-
(e.labels && Array.from(e.labels).some(l => l.textContent?.trim() === ${JSON.stringify(target)}))
|
|
3972
|
-
|
|
4217
|
+
(e.labels && Array.from(e.labels).some(l => l.textContent?.trim() === ${JSON.stringify(target)})))`;
|
|
4218
|
+
const fieldExpr = knownSelector
|
|
4219
|
+
? `(() => {
|
|
4220
|
+
try {
|
|
4221
|
+
const el = document.querySelector(${JSON.stringify(knownSelector)});
|
|
4222
|
+
if (el) { el.focus(); return true; }
|
|
4223
|
+
} catch(e) { /* invalid selector — fall through */ }
|
|
4224
|
+
const fallback = ${fieldSearchExpr};
|
|
4225
|
+
if (fallback) { fallback.focus(); return true; }
|
|
4226
|
+
return false;
|
|
4227
|
+
})()`
|
|
4228
|
+
: `(() => {
|
|
4229
|
+
const el = ${fieldSearchExpr};
|
|
3973
4230
|
if (el) { el.focus(); return true; }
|
|
3974
4231
|
return false;
|
|
3975
|
-
})()
|
|
4232
|
+
})()`;
|
|
4233
|
+
const evalResult = await Runtime.evaluate({
|
|
4234
|
+
expression: fieldExpr,
|
|
3976
4235
|
returnByValue: true,
|
|
3977
4236
|
});
|
|
3978
4237
|
if (!evalResult.result?.value)
|
|
@@ -3999,7 +4258,7 @@ server.tool("type_with_fallback", "Type text into a target field using the canon
|
|
|
3999
4258
|
return { ok: false, method, durationMs: Date.now() - start, fallbackFrom: null, retries: attempt, error: err instanceof Error ? err.message : String(err), target };
|
|
4000
4259
|
}
|
|
4001
4260
|
});
|
|
4002
|
-
return formatResult("Typed into", target, result);
|
|
4261
|
+
return formatResult("Typed into", target, result, preCheckWarnings);
|
|
4003
4262
|
});
|
|
4004
4263
|
// ── read_with_fallback ──
|
|
4005
4264
|
server.tool("read_with_fallback", "Read text content from the screen or a specific element using the canonical fallback chain: AX → CDP → OCR. Returns the text found.", {
|
|
@@ -4007,9 +4266,15 @@ server.tool("read_with_fallback", "Read text content from the screen or a specif
|
|
|
4007
4266
|
bundleId: z.string().optional().describe("App bundle ID"),
|
|
4008
4267
|
}, async ({ target, bundleId }) => {
|
|
4009
4268
|
await ensureBridge();
|
|
4010
|
-
const
|
|
4269
|
+
const preCheckWarnings = await preExecutionCheck(bundleId);
|
|
4270
|
+
const plan = planExecution("read", infra(), getSensorRanking());
|
|
4011
4271
|
const targetPid = await resolvePid(bundleId);
|
|
4012
|
-
|
|
4272
|
+
// L2→L1: Resolve known selector from references for direct injection
|
|
4273
|
+
const knownSelector = target ? contextTracker.getSelector(target) : null;
|
|
4274
|
+
if (knownSelector) {
|
|
4275
|
+
preCheckWarnings.push(`[L2→L1] Injecting known selector: ${knownSelector}`);
|
|
4276
|
+
}
|
|
4277
|
+
const result = await executeWithFallback("read", plan, getAdaptedRetryPolicy("read_with_fallback"), async (method, attempt) => {
|
|
4013
4278
|
const start = Date.now();
|
|
4014
4279
|
try {
|
|
4015
4280
|
switch (method) {
|
|
@@ -4116,14 +4381,25 @@ server.tool("read_with_fallback", "Read text content from the screen or a specif
|
|
|
4116
4381
|
try {
|
|
4117
4382
|
const { Runtime } = client;
|
|
4118
4383
|
if (target) {
|
|
4119
|
-
|
|
4120
|
-
|
|
4121
|
-
const el = Array.from(document.querySelectorAll('*')).find(e =>
|
|
4384
|
+
// L2→L1: Try known selector first, then fall back to text search
|
|
4385
|
+
const textSearch = `Array.from(document.querySelectorAll('*')).find(e =>
|
|
4122
4386
|
e.getAttribute('aria-label') === ${JSON.stringify(target)} ||
|
|
4123
|
-
e.textContent?.trim() === ${JSON.stringify(target)}
|
|
4124
|
-
|
|
4125
|
-
|
|
4126
|
-
|
|
4387
|
+
e.textContent?.trim() === ${JSON.stringify(target)})`;
|
|
4388
|
+
const expr = knownSelector
|
|
4389
|
+
? `(() => {
|
|
4390
|
+
try {
|
|
4391
|
+
const el = document.querySelector(${JSON.stringify(knownSelector)});
|
|
4392
|
+
if (el) return (el.value ?? el.textContent ?? '').trim();
|
|
4393
|
+
} catch(e) {}
|
|
4394
|
+
const fallback = ${textSearch};
|
|
4395
|
+
return fallback ? (fallback.value ?? fallback.textContent ?? '').trim() : null;
|
|
4396
|
+
})()`
|
|
4397
|
+
: `(() => {
|
|
4398
|
+
const el = ${textSearch};
|
|
4399
|
+
return el ? (el.value ?? el.textContent ?? '').trim() : null;
|
|
4400
|
+
})()`;
|
|
4401
|
+
const evalResult = await Runtime.evaluate({
|
|
4402
|
+
expression: expr,
|
|
4127
4403
|
returnByValue: true,
|
|
4128
4404
|
});
|
|
4129
4405
|
if (evalResult.result?.value == null)
|
|
@@ -4163,11 +4439,13 @@ server.tool("read_with_fallback", "Read text content from the screen or a specif
|
|
|
4163
4439
|
return { ok: false, method, durationMs: Date.now() - start, fallbackFrom: null, retries: attempt, error: err instanceof Error ? err.message : String(err), target: null };
|
|
4164
4440
|
}
|
|
4165
4441
|
});
|
|
4442
|
+
// Custom format (not formatResult) — read results include content inline
|
|
4443
|
+
const prefix = preCheckWarnings.length > 0 ? preCheckWarnings.join("\n") + "\n" : "";
|
|
4166
4444
|
if (result.ok) {
|
|
4167
4445
|
const fallbackNote = result.fallbackFrom ? ` (fell back from ${result.fallbackFrom})` : "";
|
|
4168
|
-
return { content: [{ type: "text", text:
|
|
4446
|
+
return { content: [{ type: "text", text: `${prefix}Read via ${result.method}${fallbackNote} in ${result.durationMs}ms:\n\n${result.target}` }] };
|
|
4169
4447
|
}
|
|
4170
|
-
return { content: [{ type: "text", text:
|
|
4448
|
+
return { content: [{ type: "text", text: `${prefix}Failed to read${target ? ` "${target}"` : ""} — all methods exhausted. Last error: ${result.error}` }] };
|
|
4171
4449
|
});
|
|
4172
4450
|
// ── locate_with_fallback ──
|
|
4173
4451
|
server.tool("locate_with_fallback", "Find an element's position on screen using the canonical fallback chain: AX → CDP → OCR. Returns bounds (x, y, width, height).", {
|
|
@@ -4175,9 +4453,22 @@ server.tool("locate_with_fallback", "Find an element's position on screen using
|
|
|
4175
4453
|
bundleId: z.string().optional().describe("App bundle ID"),
|
|
4176
4454
|
}, async ({ target, bundleId }) => {
|
|
4177
4455
|
await ensureBridge();
|
|
4178
|
-
const
|
|
4456
|
+
const preCheckWarnings = await preExecutionCheck(bundleId);
|
|
4457
|
+
// L7→L1: If AppMap knows this element's position, return it immediately
|
|
4458
|
+
const mapPos = resolveMapPosition(target, bundleId);
|
|
4459
|
+
if (mapPos) {
|
|
4460
|
+
// Map provides center point only — use as hint, not authoritative bounds.
|
|
4461
|
+
// Fall through to full locate chain for accurate bounds.
|
|
4462
|
+
preCheckWarnings.push(`[L7→L1] Map hint: "${target}" expected near (${mapPos.x}, ${mapPos.y}) — verifying via locate chain`);
|
|
4463
|
+
}
|
|
4464
|
+
const plan = planExecution("locate", infra(), getSensorRanking());
|
|
4179
4465
|
const targetPid = await resolvePid(bundleId);
|
|
4180
|
-
|
|
4466
|
+
// L2→L1: Resolve known selector from references for direct injection
|
|
4467
|
+
const knownSelector = contextTracker.getSelector(target);
|
|
4468
|
+
if (knownSelector) {
|
|
4469
|
+
preCheckWarnings.push(`[L2→L1] Injecting known selector: ${knownSelector}`);
|
|
4470
|
+
}
|
|
4471
|
+
const result = await executeWithFallback("locate", plan, getAdaptedRetryPolicy("locate_with_fallback"), async (method, attempt) => {
|
|
4181
4472
|
const start = Date.now();
|
|
4182
4473
|
try {
|
|
4183
4474
|
switch (method) {
|
|
@@ -4210,16 +4501,29 @@ server.tool("locate_with_fallback", "Find an element's position on screen using
|
|
|
4210
4501
|
const client = await CDPClient({ port });
|
|
4211
4502
|
try {
|
|
4212
4503
|
const { Runtime } = client;
|
|
4213
|
-
|
|
4214
|
-
|
|
4215
|
-
const el = Array.from(document.querySelectorAll('*')).find(e =>
|
|
4504
|
+
// L2→L1: Try known selector first, then fall back to text search
|
|
4505
|
+
const textSearch = `Array.from(document.querySelectorAll('*')).find(e =>
|
|
4216
4506
|
e.textContent?.trim() === ${JSON.stringify(target)} ||
|
|
4217
|
-
e.getAttribute('aria-label') === ${JSON.stringify(target)}
|
|
4218
|
-
|
|
4219
|
-
|
|
4220
|
-
|
|
4221
|
-
|
|
4222
|
-
|
|
4507
|
+
e.getAttribute('aria-label') === ${JSON.stringify(target)})`;
|
|
4508
|
+
const expr = knownSelector
|
|
4509
|
+
? `(() => {
|
|
4510
|
+
try {
|
|
4511
|
+
const el = document.querySelector(${JSON.stringify(knownSelector)});
|
|
4512
|
+
if (el) { const r = el.getBoundingClientRect(); return { x: Math.round(r.x), y: Math.round(r.y), width: Math.round(r.width), height: Math.round(r.height) }; }
|
|
4513
|
+
} catch(e) {}
|
|
4514
|
+
const fallback = ${textSearch};
|
|
4515
|
+
if (!fallback) return null;
|
|
4516
|
+
const r = fallback.getBoundingClientRect();
|
|
4517
|
+
return { x: Math.round(r.x), y: Math.round(r.y), width: Math.round(r.width), height: Math.round(r.height) };
|
|
4518
|
+
})()`
|
|
4519
|
+
: `(() => {
|
|
4520
|
+
const el = ${textSearch};
|
|
4521
|
+
if (!el) return null;
|
|
4522
|
+
const r = el.getBoundingClientRect();
|
|
4523
|
+
return { x: Math.round(r.x), y: Math.round(r.y), width: Math.round(r.width), height: Math.round(r.height) };
|
|
4524
|
+
})()`;
|
|
4525
|
+
const evalResult = await Runtime.evaluate({
|
|
4526
|
+
expression: expr,
|
|
4223
4527
|
returnByValue: true,
|
|
4224
4528
|
});
|
|
4225
4529
|
const bounds = evalResult.result?.value;
|
|
@@ -4250,7 +4554,7 @@ server.tool("locate_with_fallback", "Find an element's position on screen using
|
|
|
4250
4554
|
return { ok: false, method, durationMs: Date.now() - start, fallbackFrom: null, retries: attempt, error: err instanceof Error ? err.message : String(err), target: null };
|
|
4251
4555
|
}
|
|
4252
4556
|
});
|
|
4253
|
-
return formatResult("Located", target, result);
|
|
4557
|
+
return formatResult("Located", target, result, preCheckWarnings);
|
|
4254
4558
|
});
|
|
4255
4559
|
// ── select_with_fallback ──
|
|
4256
4560
|
server.tool("select_with_fallback", "Select an option from a dropdown/menu using the canonical fallback chain: AX → CDP. Finds the control, opens it, and picks the specified option.", {
|
|
@@ -4259,9 +4563,15 @@ server.tool("select_with_fallback", "Select an option from a dropdown/menu using
|
|
|
4259
4563
|
bundleId: z.string().optional().describe("App bundle ID"),
|
|
4260
4564
|
}, async ({ target, option, bundleId }) => {
|
|
4261
4565
|
await ensureBridge();
|
|
4262
|
-
const
|
|
4566
|
+
const preCheckWarnings = await preExecutionCheck(bundleId);
|
|
4567
|
+
const plan = planExecution("select", infra(), getSensorRanking());
|
|
4263
4568
|
const targetPid = await resolvePid(bundleId);
|
|
4264
|
-
|
|
4569
|
+
// L2→L1: Resolve known selector from references for direct injection
|
|
4570
|
+
const knownSelector = contextTracker.getSelector(target);
|
|
4571
|
+
if (knownSelector) {
|
|
4572
|
+
preCheckWarnings.push(`[L2→L1] Injecting known selector: ${knownSelector}`);
|
|
4573
|
+
}
|
|
4574
|
+
const result = await executeWithFallback("select", plan, getAdaptedRetryPolicy("select_with_fallback"), async (method, attempt) => {
|
|
4265
4575
|
const start = Date.now();
|
|
4266
4576
|
try {
|
|
4267
4577
|
switch (method) {
|
|
@@ -4291,20 +4601,34 @@ server.tool("select_with_fallback", "Select an option from a dropdown/menu using
|
|
|
4291
4601
|
const client = await CDPClient({ port });
|
|
4292
4602
|
try {
|
|
4293
4603
|
const { Runtime } = client;
|
|
4294
|
-
|
|
4295
|
-
|
|
4296
|
-
const sel = Array.from(document.querySelectorAll('select')).find(s =>
|
|
4604
|
+
// L2→L1: Try known selector first for the select element
|
|
4605
|
+
const textSearch = `Array.from(document.querySelectorAll('select')).find(s =>
|
|
4297
4606
|
s.getAttribute('aria-label') === ${JSON.stringify(target)} ||
|
|
4298
4607
|
s.getAttribute('name') === ${JSON.stringify(target)} ||
|
|
4299
|
-
(s.labels && Array.from(s.labels).some(l => l.textContent?.trim() === ${JSON.stringify(target)}))
|
|
4300
|
-
|
|
4301
|
-
|
|
4302
|
-
|
|
4303
|
-
|
|
4304
|
-
|
|
4305
|
-
|
|
4306
|
-
|
|
4307
|
-
|
|
4608
|
+
(s.labels && Array.from(s.labels).some(l => l.textContent?.trim() === ${JSON.stringify(target)})))`;
|
|
4609
|
+
const selectExpr = knownSelector
|
|
4610
|
+
? `(() => {
|
|
4611
|
+
let sel = null;
|
|
4612
|
+
try { sel = document.querySelector(${JSON.stringify(knownSelector)}); } catch(e) {}
|
|
4613
|
+
if (!sel || sel.tagName !== 'SELECT') sel = ${textSearch};
|
|
4614
|
+
if (!sel) return null;
|
|
4615
|
+
const opt = Array.from(sel.options).find(o => o.text.trim() === ${JSON.stringify(option)} || o.value === ${JSON.stringify(option)});
|
|
4616
|
+
if (!opt) return 'no_option';
|
|
4617
|
+
sel.value = opt.value;
|
|
4618
|
+
sel.dispatchEvent(new Event('change', { bubbles: true }));
|
|
4619
|
+
return 'selected';
|
|
4620
|
+
})()`
|
|
4621
|
+
: `(() => {
|
|
4622
|
+
const sel = ${textSearch};
|
|
4623
|
+
if (!sel) return null;
|
|
4624
|
+
const opt = Array.from(sel.options).find(o => o.text.trim() === ${JSON.stringify(option)} || o.value === ${JSON.stringify(option)});
|
|
4625
|
+
if (!opt) return 'no_option';
|
|
4626
|
+
sel.value = opt.value;
|
|
4627
|
+
sel.dispatchEvent(new Event('change', { bubbles: true }));
|
|
4628
|
+
return 'selected';
|
|
4629
|
+
})()`;
|
|
4630
|
+
const evalResult = await Runtime.evaluate({
|
|
4631
|
+
expression: selectExpr,
|
|
4308
4632
|
returnByValue: true,
|
|
4309
4633
|
});
|
|
4310
4634
|
if (evalResult.result?.value === "selected") {
|
|
@@ -4325,7 +4649,7 @@ server.tool("select_with_fallback", "Select an option from a dropdown/menu using
|
|
|
4325
4649
|
return { ok: false, method, durationMs: Date.now() - start, fallbackFrom: null, retries: attempt, error: err instanceof Error ? err.message : String(err), target: null };
|
|
4326
4650
|
}
|
|
4327
4651
|
});
|
|
4328
|
-
return formatResult("Selected", `${target} → ${option}`, result);
|
|
4652
|
+
return formatResult("Selected", `${target} → ${option}`, result, preCheckWarnings);
|
|
4329
4653
|
});
|
|
4330
4654
|
// ── scroll_with_fallback ──
|
|
4331
4655
|
server.tool("scroll_with_fallback", "Scroll within an element or the active window using the canonical fallback chain: AX → CDP → coordinates. Scrolls until target text is visible, or by a fixed amount.", {
|
|
@@ -4335,9 +4659,15 @@ server.tool("scroll_with_fallback", "Scroll within an element or the active wind
|
|
|
4335
4659
|
bundleId: z.string().optional().describe("App bundle ID"),
|
|
4336
4660
|
}, async ({ direction, amount, target, bundleId }) => {
|
|
4337
4661
|
await ensureBridge();
|
|
4338
|
-
const
|
|
4662
|
+
const preCheckWarnings = await preExecutionCheck(bundleId);
|
|
4663
|
+
const plan = planExecution("scroll", infra(), getSensorRanking());
|
|
4339
4664
|
const targetPid = await resolvePid(bundleId);
|
|
4340
4665
|
const scrollAmount = amount ?? 300;
|
|
4666
|
+
// L2→L1: Resolve known selector from references for scroll container
|
|
4667
|
+
const knownSelector = target ? contextTracker.getSelector(target) : null;
|
|
4668
|
+
if (knownSelector) {
|
|
4669
|
+
preCheckWarnings.push(`[L2→L1] Injecting known selector: ${knownSelector}`);
|
|
4670
|
+
}
|
|
4341
4671
|
// Resolve scroll coordinates — center of the frontmost window
|
|
4342
4672
|
let scrollX = 400, scrollY = 400;
|
|
4343
4673
|
try {
|
|
@@ -4373,7 +4703,7 @@ server.tool("scroll_with_fallback", "Scroll within an element or the active wind
|
|
|
4373
4703
|
return { content: [{ type: "text", text: `Scrolled ${direction} 10 times but "${target}" not found.` }] };
|
|
4374
4704
|
}
|
|
4375
4705
|
// Fixed-amount scroll via fallback chain
|
|
4376
|
-
const result = await executeWithFallback("scroll", plan, getAdaptedRetryPolicy(), async (method, attempt) => {
|
|
4706
|
+
const result = await executeWithFallback("scroll", plan, getAdaptedRetryPolicy("scroll_with_fallback"), async (method, attempt) => {
|
|
4377
4707
|
const start = Date.now();
|
|
4378
4708
|
try {
|
|
4379
4709
|
const deltaX = direction === "left" ? -scrollAmount : direction === "right" ? scrollAmount : 0;
|
|
@@ -4391,9 +4721,18 @@ server.tool("scroll_with_fallback", "Scroll within an element or the active wind
|
|
|
4391
4721
|
const client = await CDPClient({ port });
|
|
4392
4722
|
try {
|
|
4393
4723
|
const { Runtime } = client;
|
|
4394
|
-
|
|
4395
|
-
|
|
4396
|
-
|
|
4724
|
+
// L2→L1: Try scrolling known selector container first
|
|
4725
|
+
const scrollExpr = knownSelector
|
|
4726
|
+
? `(() => {
|
|
4727
|
+
try {
|
|
4728
|
+
const el = document.querySelector(${JSON.stringify(knownSelector)});
|
|
4729
|
+
if (el) { el.scrollBy(${deltaX}, ${deltaY}); return 'scrolled'; }
|
|
4730
|
+
} catch(e) {}
|
|
4731
|
+
window.scrollBy(${deltaX}, ${deltaY});
|
|
4732
|
+
return 'scrolled';
|
|
4733
|
+
})()`
|
|
4734
|
+
: `window.scrollBy(${deltaX}, ${deltaY})`;
|
|
4735
|
+
await Runtime.evaluate({ expression: scrollExpr });
|
|
4397
4736
|
return { ok: true, method, durationMs: Date.now() - start, fallbackFrom: null, retries: attempt, error: null, target: `${direction} ${scrollAmount}px` };
|
|
4398
4737
|
}
|
|
4399
4738
|
finally {
|
|
@@ -4411,7 +4750,7 @@ server.tool("scroll_with_fallback", "Scroll within an element or the active wind
|
|
|
4411
4750
|
return { ok: false, method, durationMs: Date.now() - start, fallbackFrom: null, retries: attempt, error: err instanceof Error ? err.message : String(err), target: null };
|
|
4412
4751
|
}
|
|
4413
4752
|
});
|
|
4414
|
-
return formatResult("Scrolled", `${direction} ${scrollAmount}px`, result);
|
|
4753
|
+
return formatResult("Scrolled", `${direction} ${scrollAmount}px`, result, preCheckWarnings);
|
|
4415
4754
|
});
|
|
4416
4755
|
// ── wait_for_state ──
|
|
4417
4756
|
server.tool("wait_for_state", "Wait until a condition is met on screen: text appears, text disappears, or element becomes available. Polls at intervals using the fallback chain.", {
|
|
@@ -4741,6 +5080,8 @@ function getJobRunner() {
|
|
|
4741
5080
|
const locCache = new LocatorCache();
|
|
4742
5081
|
locCache.setLearningEngine(learningEngine);
|
|
4743
5082
|
const runtimeService = new AutomationRuntimeService(adapter, logger, locCache);
|
|
5083
|
+
// Wire #15: connect AppMap to Executor for skip-verify optimization
|
|
5084
|
+
runtimeService.setAppMap(appMap);
|
|
4744
5085
|
const playbookEngine = new PlaybookEngine(runtimeService);
|
|
4745
5086
|
activePlaybookEngine = playbookEngine;
|
|
4746
5087
|
// Wire CDP into playbook engine for browser_js / cdp_key_event steps
|
|
@@ -4889,7 +5230,7 @@ originalTool("worker_status", "Get the current status of the worker daemon (read
|
|
|
4889
5230
|
// ═══════════════════════════════════════════════
|
|
4890
5231
|
// PLANNER — goal-oriented planning
|
|
4891
5232
|
// ═══════════════════════════════════════════════
|
|
4892
|
-
originalTool("plan_goal", "
|
|
5233
|
+
originalTool("plan_goal", "Describe WHAT you want to achieve — ScreenHand builds a step-by-step plan by searching playbooks, saved strategies, and platform references. Returns steps with confidence scores. Does NOT execute — review the plan, then use plan_execute() or plan_step() to run it. Use for complex multi-step workflows instead of figuring out each step yourself.", {
|
|
4893
5234
|
goal: z.string().describe("What you want to achieve (e.g. 'Export Premiere Pro timeline as H.264')"),
|
|
4894
5235
|
}, async ({ goal: goalDescription }) => {
|
|
4895
5236
|
const goal = planner.createGoal(goalDescription);
|
|
@@ -4924,7 +5265,7 @@ originalTool("plan_goal", "Create a goal and generate an execution plan. Returns
|
|
|
4924
5265
|
_meta: { goalId: goal.id, plan },
|
|
4925
5266
|
};
|
|
4926
5267
|
});
|
|
4927
|
-
originalTool("plan_execute", "
|
|
5268
|
+
originalTool("plan_execute", "Run a plan automatically. Known steps (from playbooks/references) execute internally at full speed. Pauses at LLM steps where YOUR judgment is needed — call plan_step_resolve() to provide the tool+params. On completion, the successful strategy is auto-saved to memory for future reuse.", {
|
|
4928
5269
|
goalId: z.string().describe("Goal ID from plan_goal"),
|
|
4929
5270
|
}, async ({ goalId }) => {
|
|
4930
5271
|
const goal = goalStore.get(goalId);
|
|
@@ -4933,6 +5274,7 @@ originalTool("plan_execute", "Execute a goal's plan automatically. Runs determin
|
|
|
4933
5274
|
}
|
|
4934
5275
|
const adaptiveBudget = learningEngine.getAdaptiveBudget(worldModel.getState().focusedApp?.bundleId ?? "unknown");
|
|
4935
5276
|
const executor = new PlanExecutor(worldModel, planner, toolRegistry.toExecutor(), { postconditionWaitMs: adaptiveBudget.verifyMs, defaultStepTimeout: Math.max(30_000, adaptiveBudget.actMs * 2) }, recoveryEngine, learningEngine);
|
|
5277
|
+
executor.setAppMap(appMap);
|
|
4936
5278
|
const result = await executor.executeGoal(goal);
|
|
4937
5279
|
goalStore.update(goalId, goal);
|
|
4938
5280
|
// Check if paused at an LLM step
|
|
@@ -4994,6 +5336,7 @@ originalTool("plan_step", "Execute the next single step of a goal. For increment
|
|
|
4994
5336
|
}
|
|
4995
5337
|
const adaptiveBudget = learningEngine.getAdaptiveBudget(worldModel.getState().focusedApp?.bundleId ?? "unknown");
|
|
4996
5338
|
const executor = new PlanExecutor(worldModel, planner, toolRegistry.toExecutor(), { postconditionWaitMs: adaptiveBudget.verifyMs, defaultStepTimeout: Math.max(30_000, adaptiveBudget.actMs * 2) }, recoveryEngine, learningEngine);
|
|
5339
|
+
executor.setAppMap(appMap);
|
|
4997
5340
|
const result = await executor.executeNextStep(goal);
|
|
4998
5341
|
goalStore.update(goalId, goal);
|
|
4999
5342
|
if ("paused" in result) {
|
|
@@ -5037,6 +5380,7 @@ originalTool("plan_step_resolve", "Resolve a paused LLM step by providing the to
|
|
|
5037
5380
|
}
|
|
5038
5381
|
const adaptiveBudget = learningEngine.getAdaptiveBudget(worldModel.getState().focusedApp?.bundleId ?? "unknown");
|
|
5039
5382
|
const executor = new PlanExecutor(worldModel, planner, toolRegistry.toExecutor(), { postconditionWaitMs: adaptiveBudget.verifyMs, defaultStepTimeout: Math.max(30_000, adaptiveBudget.actMs * 2) }, recoveryEngine, learningEngine);
|
|
5383
|
+
executor.setAppMap(appMap);
|
|
5040
5384
|
const result = await executor.resolveStep(goal, tool, params ?? {});
|
|
5041
5385
|
goalStore.update(goalId, goal);
|
|
5042
5386
|
return {
|
|
@@ -5277,6 +5621,10 @@ originalTool("perception_start", "Start continuous screen monitoring — ScreenH
|
|
|
5277
5621
|
return { content: [{ type: "text", text: `Perception already running (started ${stats.startedAt}). Use perception_stop first to restart, or pass bundleId to switch target.` }] };
|
|
5278
5622
|
}
|
|
5279
5623
|
let app = worldModel.getState().focusedApp;
|
|
5624
|
+
// Validate bundleId format before it touches AppleScript/exec
|
|
5625
|
+
if (overrideBundleId && !/^[a-zA-Z0-9._-]+$/.test(overrideBundleId)) {
|
|
5626
|
+
return { content: [{ type: "text", text: "Error: Invalid bundleId format. Only alphanumeric characters, dots, hyphens, and underscores are allowed." }] };
|
|
5627
|
+
}
|
|
5280
5628
|
// If bundleId override provided, try to resolve app info via bridge or AppleScript
|
|
5281
5629
|
if (overrideBundleId && (!app || app.bundleId !== overrideBundleId)) {
|
|
5282
5630
|
try {
|
|
@@ -5758,7 +6106,37 @@ server.tool("scan_menu_bar", "Scan an app's menu bar via AX tree. Extracts all m
|
|
|
5758
6106
|
safePath = safePath.replace(/Log Out [^\n:]+/g, "Log Out [USER]");
|
|
5759
6107
|
lines.push(` ${safePath}: ${keys}`);
|
|
5760
6108
|
}
|
|
5761
|
-
|
|
6109
|
+
// Wire #12: L6→L7 — bootstrap AppMap zones from menu scan
|
|
6110
|
+
let bootstrapInfo = "";
|
|
6111
|
+
if (appMap) {
|
|
6112
|
+
const bootstrapped = appMap.bootstrapFromMenuScan(bundleId, appName, result);
|
|
6113
|
+
// Clear hint unconditionally — the scan was attempted regardless of bootstrap outcome
|
|
6114
|
+
contextTracker.clearMenuScanHint();
|
|
6115
|
+
if (bootstrapped) {
|
|
6116
|
+
bootstrapInfo = `\nAppMap: bootstrapped zones from menu structure (new app)`;
|
|
6117
|
+
}
|
|
6118
|
+
}
|
|
6119
|
+
// Wire F8: Seed learning from menu scan shortcuts (L6→L5)
|
|
6120
|
+
// Use successCount=5 and score=0.6 so seeds pass recommend() thresholds
|
|
6121
|
+
// (minSamples=5 for locators, score > 0.5 for patterns)
|
|
6122
|
+
if (learningEngine && result.shortcuts) {
|
|
6123
|
+
for (const [menuPath, keys] of Object.entries(result.shortcuts)) {
|
|
6124
|
+
const key = LocatorPolicy.makeKey(bundleId, "key");
|
|
6125
|
+
learningEngine.locators.seedEntry({
|
|
6126
|
+
key, locator: keys, method: "ax",
|
|
6127
|
+
successCount: 5, failCount: 0, score: 0.6,
|
|
6128
|
+
lastUsed: new Date().toISOString(),
|
|
6129
|
+
});
|
|
6130
|
+
// Also seed as pattern: menu_click with the menu path
|
|
6131
|
+
learningEngine.patterns.seedEntry({
|
|
6132
|
+
key: `${bundleId}::menu_click::${menuPath}`,
|
|
6133
|
+
bundleId, tool: "menu_click", locator: menuPath,
|
|
6134
|
+
method: "ax", successCount: 3, failCount: 0, score: 0.6,
|
|
6135
|
+
lastSeen: new Date().toISOString(),
|
|
6136
|
+
});
|
|
6137
|
+
}
|
|
6138
|
+
}
|
|
6139
|
+
let output = lines.join("\n") + bootstrapInfo;
|
|
5762
6140
|
output = redactUsername(output);
|
|
5763
6141
|
output = output.replace(/Log Out [^\n:]+/g, "Log Out [USER]");
|
|
5764
6142
|
return { content: [{ type: "text", text: output }] };
|
|
@@ -5803,6 +6181,24 @@ server.tool("ingest_documentation", "Parse a documentation page (HTML, markdown,
|
|
|
5803
6181
|
lines.push(` - ${t}`);
|
|
5804
6182
|
}
|
|
5805
6183
|
}
|
|
6184
|
+
// Wire F8: Seed learning from ingested documentation flows (L6→L5)
|
|
6185
|
+
if (learningEngine && result.flows) {
|
|
6186
|
+
for (const flow of result.flows) {
|
|
6187
|
+
for (const step of flow.steps) {
|
|
6188
|
+
if (!step.tool)
|
|
6189
|
+
continue;
|
|
6190
|
+
const target = (step.params?.text ?? step.params?.title ?? step.params?.target ?? step.description);
|
|
6191
|
+
if (target) {
|
|
6192
|
+
learningEngine.patterns.seedEntry({
|
|
6193
|
+
key: `${bundleId}::${step.tool}::${target}`,
|
|
6194
|
+
bundleId, tool: step.tool, locator: String(target),
|
|
6195
|
+
method: "ax", successCount: 3, failCount: 0, score: 0.6,
|
|
6196
|
+
lastSeen: new Date().toISOString(),
|
|
6197
|
+
});
|
|
6198
|
+
}
|
|
6199
|
+
}
|
|
6200
|
+
}
|
|
6201
|
+
}
|
|
5806
6202
|
return { content: [{ type: "text", text: lines.join("\n") }] };
|
|
5807
6203
|
});
|
|
5808
6204
|
server.tool("ingest_tutorial", "Extract structured playbook steps from a video transcript (e.g. YouTube captions). Converts tutorial narration into actionable automation steps with tool mappings.", {
|
|
@@ -5927,6 +6323,14 @@ originalTool("community_fetch", "Search community playbooks for a platform or wo
|
|
|
5927
6323
|
lines.push(` Score: ${pb.ratings.score} | By: ${pb.metadata.author}`);
|
|
5928
6324
|
lines.push("");
|
|
5929
6325
|
}
|
|
6326
|
+
// Wire F9: Import community playbooks into AppMap (L6→L7)
|
|
6327
|
+
if (appMap) {
|
|
6328
|
+
for (const pb of results) {
|
|
6329
|
+
if (pb.bundleId && pb.steps.length > 0) {
|
|
6330
|
+
appMap.importFromPlaybook(pb.bundleId, pb.name, pb.steps);
|
|
6331
|
+
}
|
|
6332
|
+
}
|
|
6333
|
+
}
|
|
5930
6334
|
return { content: [{ type: "text", text: lines.join("\n") }] };
|
|
5931
6335
|
});
|
|
5932
6336
|
// ═══════════════════════════════════════════════
|