screenhand 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +165 -446
- package/bin/darwin-arm64/macos-bridge +0 -0
- package/dist/mcp-desktop.js +3549 -404
- package/dist/scripts/export-help-center.js +112 -0
- package/dist/scripts/marketing-loop.js +117 -0
- package/dist/scripts/observer-daemon.js +288 -0
- package/dist/scripts/orchestrator-daemon.js +399 -0
- package/dist/scripts/threads-campaign.js +208 -0
- package/dist/src/community/fetcher.js +109 -0
- package/dist/src/community/index.js +6 -0
- package/dist/src/community/publisher.js +191 -0
- package/dist/src/community/remote-api.js +121 -0
- package/dist/src/community/types.js +3 -0
- package/dist/src/community/validator.js +95 -0
- package/dist/src/context-tracker.js +489 -0
- package/dist/src/ingestion/coverage-auditor.js +233 -0
- package/dist/src/ingestion/doc-parser.js +164 -0
- package/dist/src/ingestion/index.js +8 -0
- package/dist/src/ingestion/menu-scanner.js +152 -0
- package/dist/src/ingestion/reference-merger.js +186 -0
- package/dist/src/ingestion/shortcut-extractor.js +180 -0
- package/dist/src/ingestion/tutorial-extractor.js +170 -0
- package/dist/src/ingestion/types.js +3 -0
- package/dist/src/jobs/manager.js +82 -14
- package/dist/src/jobs/runner.js +138 -15
- package/dist/src/learning/engine.js +356 -0
- package/dist/src/learning/index.js +9 -0
- package/dist/src/learning/locator-policy.js +120 -0
- package/dist/src/learning/pattern-policy.js +89 -0
- package/dist/src/learning/recovery-policy.js +116 -0
- package/dist/src/learning/sensor-policy.js +115 -0
- package/dist/src/learning/timing-model.js +204 -0
- package/dist/src/learning/topology-policy.js +90 -0
- package/dist/src/learning/types.js +9 -0
- package/dist/src/logging/timeline-logger.js +4 -1
- package/dist/src/memory/playbook-seeds.js +200 -0
- package/dist/src/memory/recall.js +60 -8
- package/dist/src/memory/service.js +30 -5
- package/dist/src/memory/store.js +34 -5
- package/dist/src/native/bridge-client.js +253 -31
- package/dist/src/observer/state.js +199 -0
- package/dist/src/observer/types.js +43 -0
- package/dist/src/orchestrator/state.js +68 -0
- package/dist/src/orchestrator/types.js +22 -0
- package/dist/src/perception/ax-source.js +162 -0
- package/dist/src/perception/cdp-source.js +162 -0
- package/dist/src/perception/coordinator.js +771 -0
- package/dist/src/perception/frame-differ.js +287 -0
- package/dist/src/perception/index.js +22 -0
- package/dist/src/perception/manager.js +199 -0
- package/dist/src/perception/types.js +47 -0
- package/dist/src/perception/vision-source.js +399 -0
- package/dist/src/planner/deterministic.js +298 -0
- package/dist/src/planner/executor.js +870 -0
- package/dist/src/planner/goal-store.js +92 -0
- package/dist/src/planner/index.js +21 -0
- package/dist/src/planner/planner.js +520 -0
- package/dist/src/planner/tool-registry.js +71 -0
- package/dist/src/planner/types.js +22 -0
- package/dist/src/platform/explorer.js +213 -0
- package/dist/src/platform/help-center-markdown.js +527 -0
- package/dist/src/platform/learner.js +257 -0
- package/dist/src/playbook/engine.js +296 -11
- package/dist/src/playbook/mcp-recorder.js +204 -0
- package/dist/src/playbook/recorder.js +3 -2
- package/dist/src/playbook/runner.js +1 -1
- package/dist/src/playbook/store.js +139 -10
- package/dist/src/recovery/detectors.js +156 -0
- package/dist/src/recovery/engine.js +327 -0
- package/dist/src/recovery/index.js +20 -0
- package/dist/src/recovery/strategies.js +274 -0
- package/dist/src/recovery/types.js +20 -0
- package/dist/src/runtime/accessibility-adapter.js +55 -18
- package/dist/src/runtime/applescript-adapter.js +8 -2
- package/dist/src/runtime/cdp-chrome-adapter.js +1 -1
- package/dist/src/runtime/executor.js +23 -3
- package/dist/src/runtime/locator-cache.js +24 -2
- package/dist/src/runtime/service.js +59 -15
- package/dist/src/runtime/session-manager.js +4 -1
- package/dist/src/runtime/vision-adapter.js +2 -1
- package/dist/src/state/app-map-types.js +72 -0
- package/dist/src/state/app-map.js +1974 -0
- package/dist/src/state/entity-tracker.js +108 -0
- package/dist/src/state/fusion.js +96 -0
- package/dist/src/state/index.js +21 -0
- package/dist/src/state/ladder-generator.js +236 -0
- package/dist/src/state/persistence.js +156 -0
- package/dist/src/state/types.js +17 -0
- package/dist/src/state/world-model.js +1456 -0
- package/dist/src/util/atomic-write.js +19 -4
- package/dist/src/util/sanitize.js +146 -0
- package/dist-app-maps/com.figma.Desktop.json +959 -0
- package/dist-app-maps/com.hnc.Discord.json +1146 -0
- package/dist-app-maps/notion.id.json +2831 -0
- package/dist-playbooks/canva-screenhand-carousel.json +445 -0
- package/dist-playbooks/codex-desktop.json +76 -0
- package/dist-playbooks/competitor-research-stack.json +122 -0
- package/dist-playbooks/davinci-color-grade.json +153 -0
- package/dist-playbooks/davinci-edit-timeline.json +162 -0
- package/dist-playbooks/davinci-render.json +114 -0
- package/dist-playbooks/devto.json +52 -0
- package/dist-playbooks/discord.json +41 -0
- package/dist-playbooks/google-flow-create-project.json +59 -0
- package/dist-playbooks/google-flow-edit-image.json +90 -0
- package/dist-playbooks/google-flow-edit-video.json +90 -0
- package/dist-playbooks/google-flow-generate-image.json +68 -0
- package/dist-playbooks/google-flow-generate-video.json +191 -0
- package/dist-playbooks/google-flow-open-project.json +48 -0
- package/dist-playbooks/google-flow-open-scenebuilder.json +64 -0
- package/dist-playbooks/google-flow-search-assets.json +64 -0
- package/dist-playbooks/instagram.json +57 -0
- package/dist-playbooks/linkedin.json +52 -0
- package/dist-playbooks/n8n.json +43 -0
- package/dist-playbooks/reddit.json +52 -0
- package/dist-playbooks/threads.json +59 -0
- package/dist-playbooks/x-twitter.json +59 -0
- package/dist-playbooks/youtube.json +59 -0
- package/dist-references/canva.json +646 -0
- package/dist-references/codex-desktop.json +305 -0
- package/dist-references/davinci-resolve-keyboard.json +594 -0
- package/dist-references/davinci-resolve-menu-map.json +1139 -0
- package/dist-references/davinci-resolve-menus-batch1.json +116 -0
- package/dist-references/davinci-resolve-menus-batch2.json +372 -0
- package/dist-references/davinci-resolve-menus-batch3.json +330 -0
- package/dist-references/davinci-resolve-menus-batch4.json +297 -0
- package/dist-references/davinci-resolve-shortcuts.json +333 -0
- package/dist-references/devpost.json +186 -0
- package/dist-references/devto.json +317 -0
- package/dist-references/discord.json +549 -0
- package/dist-references/figma.json +1186 -0
- package/dist-references/finder.json +146 -0
- package/dist-references/google-ads-transparency.json +95 -0
- package/dist-references/google-flow.json +649 -0
- package/dist-references/instagram.json +341 -0
- package/dist-references/linkedin.json +324 -0
- package/dist-references/meta-ad-library.json +86 -0
- package/dist-references/n8n.json +387 -0
- package/dist-references/notes.json +27 -0
- package/dist-references/notion.json +163 -0
- package/dist-references/reddit.json +341 -0
- package/dist-references/threads.json +337 -0
- package/dist-references/x-twitter.json +403 -0
- package/dist-references/youtube.json +373 -0
- package/native/macos-bridge/Package.swift +22 -0
- package/native/macos-bridge/Sources/AccessibilityBridge.swift +482 -0
- package/native/macos-bridge/Sources/AppManagement.swift +339 -0
- package/native/macos-bridge/Sources/CoreGraphicsBridge.swift +537 -0
- package/native/macos-bridge/Sources/ObserverBridge.swift +120 -0
- package/native/macos-bridge/Sources/StreamCapture.swift +136 -0
- package/native/macos-bridge/Sources/VisionBridge.swift +238 -0
- package/native/macos-bridge/Sources/main.swift +498 -0
- package/native/windows-bridge/AppManagement.cs +234 -0
- package/native/windows-bridge/InputBridge.cs +436 -0
- package/native/windows-bridge/Program.cs +270 -0
- package/native/windows-bridge/ScreenCapture.cs +453 -0
- package/native/windows-bridge/UIAutomationBridge.cs +571 -0
- package/native/windows-bridge/WindowsBridge.csproj +17 -0
- package/package.json +12 -1
- package/scripts/postinstall.cjs +127 -0
- package/dist/.audit-log.jsonl +0 -55
- package/dist/.screenhand/memory/.lock +0 -1
- package/dist/.screenhand/memory/actions.jsonl +0 -85
- package/dist/.screenhand/memory/errors.jsonl +0 -5
- package/dist/.screenhand/memory/errors.jsonl.bak +0 -4
- package/dist/.screenhand/memory/state.json +0 -35
- package/dist/.screenhand/memory/state.json.bak +0 -35
- package/dist/.screenhand/memory/strategies.jsonl +0 -12
- package/dist/agent/cli.js +0 -73
- package/dist/agent/loop.js +0 -258
- package/dist/config.js +0 -9
- package/dist/index.js +0 -56
- package/dist/logging/timeline-logger.js +0 -29
- package/dist/mcp/mcp-stdio-server.js +0 -448
- package/dist/mcp/server.js +0 -347
- package/dist/mcp-entry.js +0 -59
- package/dist/memory/recall.js +0 -160
- package/dist/memory/research.js +0 -98
- package/dist/memory/seeds.js +0 -89
- package/dist/memory/session.js +0 -161
- package/dist/memory/store.js +0 -391
- package/dist/memory/types.js +0 -4
- package/dist/monitor/codex-monitor.js +0 -377
- package/dist/monitor/task-queue.js +0 -84
- package/dist/monitor/types.js +0 -49
- package/dist/native/bridge-client.js +0 -174
- package/dist/native/macos-bridge-client.js +0 -5
- package/dist/npm-publish-helper.js +0 -117
- package/dist/npm-token-cdp.js +0 -113
- package/dist/npm-token-create.js +0 -135
- package/dist/npm-token-finish.js +0 -126
- package/dist/playbook/engine.js +0 -193
- package/dist/playbook/index.js +0 -4
- package/dist/playbook/recorder.js +0 -519
- package/dist/playbook/runner.js +0 -392
- package/dist/playbook/store.js +0 -166
- package/dist/playbook/types.js +0 -4
- package/dist/runtime/accessibility-adapter.js +0 -377
- package/dist/runtime/app-adapter.js +0 -48
- package/dist/runtime/applescript-adapter.js +0 -283
- package/dist/runtime/ax-role-map.js +0 -80
- package/dist/runtime/browser-adapter.js +0 -36
- package/dist/runtime/cdp-chrome-adapter.js +0 -505
- package/dist/runtime/composite-adapter.js +0 -205
- package/dist/runtime/executor.js +0 -250
- package/dist/runtime/locator-cache.js +0 -12
- package/dist/runtime/planning-loop.js +0 -47
- package/dist/runtime/service.js +0 -372
- package/dist/runtime/session-manager.js +0 -28
- package/dist/runtime/state-observer.js +0 -105
- package/dist/runtime/vision-adapter.js +0 -208
- package/dist/test-mcp-protocol.js +0 -138
- package/dist/types.js +0 -1
package/dist/mcp-desktop.js
CHANGED
|
@@ -34,10 +34,13 @@ import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js"
|
|
|
34
34
|
import { z } from "zod";
|
|
35
35
|
import path from "node:path";
|
|
36
36
|
import { fileURLToPath } from "node:url";
|
|
37
|
-
import { execSync } from "node:child_process";
|
|
37
|
+
import { execSync, exec } from "node:child_process";
|
|
38
|
+
import { promisify } from "node:util";
|
|
39
|
+
const execAsync = promisify(exec);
|
|
38
40
|
import fs from "node:fs";
|
|
39
41
|
import { BridgeClient } from "./src/native/bridge-client.js";
|
|
40
42
|
import { writeFileAtomicSync, readJsonWithRecovery } from "./src/util/atomic-write.js";
|
|
43
|
+
import { sanitizeUrl, redactSensitiveLabel, redactUsername, redactPII } from "./src/util/sanitize.js";
|
|
41
44
|
import { MemoryService } from "./src/memory/service.js";
|
|
42
45
|
import { backgroundResearch } from "./src/memory/research.js";
|
|
43
46
|
import { SessionSupervisor, LeaseManager } from "./src/supervisor/supervisor.js";
|
|
@@ -46,11 +49,30 @@ import { JobRunner } from "./src/jobs/runner.js";
|
|
|
46
49
|
import { getWorkerLiveStatus, getWorkerDaemonPid, WORKER_LOG_FILE } from "./src/jobs/worker.js";
|
|
47
50
|
import { PlaybookEngine } from "./src/playbook/engine.js";
|
|
48
51
|
import { PlaybookStore } from "./src/playbook/store.js";
|
|
52
|
+
import { ContextTracker } from "./src/context-tracker.js";
|
|
53
|
+
import { McpPlaybookRecorder } from "./src/playbook/mcp-recorder.js";
|
|
54
|
+
import { WorldModel } from "./src/state/index.js";
|
|
55
|
+
import { PerceptionManager } from "./src/perception/index.js";
|
|
56
|
+
import { Planner, PlanExecutor, GoalStore, ToolRegistry } from "./src/planner/index.js";
|
|
57
|
+
import { RecoveryEngine } from "./src/recovery/index.js";
|
|
58
|
+
import { LearningEngine } from "./src/learning/index.js";
|
|
59
|
+
import { discoverWebElements, testWebElement, compileReference, saveExploreResult, discoverNativeElements } from "./src/platform/explorer.js";
|
|
60
|
+
import { buildDocUrls, crawlPage, compileLearnResult, saveLearnResult } from "./src/platform/learner.js";
|
|
49
61
|
import { AccessibilityAdapter } from "./src/runtime/accessibility-adapter.js";
|
|
50
62
|
import { AutomationRuntimeService } from "./src/runtime/service.js";
|
|
63
|
+
import { LocatorCache } from "./src/runtime/locator-cache.js";
|
|
51
64
|
import { TimelineLogger } from "./src/logging/timeline-logger.js";
|
|
65
|
+
import { readObserverState, getObserverDaemonPid, submitObserverCommand, getObserverCommand } from "./src/observer/state.js";
|
|
66
|
+
import { OBSERVER_LOG_FILE } from "./src/observer/types.js";
|
|
52
67
|
import { spawn } from "node:child_process";
|
|
53
68
|
import os from "node:os";
|
|
69
|
+
import { MenuScanner } from "./src/ingestion/menu-scanner.js";
|
|
70
|
+
import { DocParser } from "./src/ingestion/doc-parser.js";
|
|
71
|
+
import { TutorialExtractor } from "./src/ingestion/tutorial-extractor.js";
|
|
72
|
+
import { CoverageAuditor } from "./src/ingestion/coverage-auditor.js";
|
|
73
|
+
import { ReferenceMerger } from "./src/ingestion/reference-merger.js";
|
|
74
|
+
import { PlaybookPublisher } from "./src/community/publisher.js";
|
|
75
|
+
import { PlaybookFetcher } from "./src/community/fetcher.js";
|
|
54
76
|
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
|
55
77
|
// ── Audit logging for dangerous tools ──
|
|
56
78
|
const AUDIT_LOG_PATH = path.resolve(__dirname, ".audit-log.jsonl");
|
|
@@ -73,18 +95,155 @@ const bridgePath = process.platform === "win32"
|
|
|
73
95
|
: path.resolve(__dirname, "native/macos-bridge/.build/release/macos-bridge");
|
|
74
96
|
const bridge = new BridgeClient(bridgePath);
|
|
75
97
|
let bridgeReady = false;
|
|
98
|
+
// Focus mutex — only one focus() call runs at a time since only one app can be frontmost.
|
|
99
|
+
// Prevents N concurrent focus calls from generating N*5 bridge calls that overwhelm the bridge.
|
|
100
|
+
let focusLock = Promise.resolve();
|
|
76
101
|
async function ensureBridge() {
|
|
77
102
|
if (!bridgeReady) {
|
|
78
103
|
await bridge.start();
|
|
79
104
|
bridgeReady = true;
|
|
105
|
+
perceptionManager.createSources(bridge);
|
|
80
106
|
}
|
|
81
107
|
}
|
|
108
|
+
/** Window titles that indicate auxiliary/utility windows — deprioritize these */
|
|
109
|
+
const AUXILIARY_WINDOW_TITLES = new Set([
|
|
110
|
+
"Privacy Report", "Downloads", "Extensions", "Bookmarks",
|
|
111
|
+
"History", "Preferences", "Settings", "Web Inspector",
|
|
112
|
+
]);
|
|
113
|
+
/**
|
|
114
|
+
* L3-04 fix: Check if a PID is running — checks app.list first, then falls back to
|
|
115
|
+
* app.frontmost and window list. Some Electron apps (Slack, Discord) don't appear in
|
|
116
|
+
* NSWorkspace.runningApplications but are visible via CGWindowList and frontmost checks.
|
|
117
|
+
*/
|
|
118
|
+
async function isPidRunning(pid) {
|
|
119
|
+
try {
|
|
120
|
+
const apps = await bridge.call("app.list", {});
|
|
121
|
+
if (apps?.some((a) => a.pid === pid))
|
|
122
|
+
return true;
|
|
123
|
+
}
|
|
124
|
+
catch { /* ignore */ }
|
|
125
|
+
// Fallback 1: check frontmost
|
|
126
|
+
try {
|
|
127
|
+
const front = await bridge.call("app.frontmost", {});
|
|
128
|
+
if (front.pid === pid)
|
|
129
|
+
return true;
|
|
130
|
+
}
|
|
131
|
+
catch { /* ignore */ }
|
|
132
|
+
// Fallback 2: check window list
|
|
133
|
+
try {
|
|
134
|
+
const wins = await bridge.call("app.windows");
|
|
135
|
+
if (wins?.some((w) => (w.pid || w.ownerPid) === pid))
|
|
136
|
+
return true;
|
|
137
|
+
}
|
|
138
|
+
catch { /* ignore */ }
|
|
139
|
+
return false;
|
|
140
|
+
}
|
|
141
|
+
/** Resolve the native windowId for a given PID via the AX bridge. */
|
|
142
|
+
async function resolveWindowId(pid) {
|
|
143
|
+
// Prefer AX-enriched window.list — returns focused/isMain fields from AX API
|
|
144
|
+
try {
|
|
145
|
+
const wins = await bridge.call("window.list", {});
|
|
146
|
+
const matching = wins?.filter((w) => w.pid === pid);
|
|
147
|
+
if (matching && matching.length > 0) {
|
|
148
|
+
// Filter out auxiliary windows (Privacy Report, Downloads, etc.)
|
|
149
|
+
const contentWindows = matching.filter((w) => !AUXILIARY_WINDOW_TITLES.has(w.title) && w.subrole !== "AXFloatingWindow");
|
|
150
|
+
const candidates = contentWindows.length > 0 ? contentWindows : matching;
|
|
151
|
+
// Prefer focused > isMain > first content window
|
|
152
|
+
const focused = candidates.find((w) => w.focused);
|
|
153
|
+
if (focused?.windowId != null)
|
|
154
|
+
return focused.windowId;
|
|
155
|
+
const main = candidates.find((w) => w.isMain);
|
|
156
|
+
if (main?.windowId != null)
|
|
157
|
+
return main.windowId;
|
|
158
|
+
const win = candidates[0];
|
|
159
|
+
if (win?.windowId != null)
|
|
160
|
+
return win.windowId;
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
catch { /* fall through */ }
|
|
164
|
+
try {
|
|
165
|
+
// Fallback to CG-based app.windows (no focused/isMain, may crash on GPU-heavy windows)
|
|
166
|
+
const wins = await bridge.call("app.windows");
|
|
167
|
+
const matching = wins?.filter((w) => w.pid === pid);
|
|
168
|
+
if (matching && matching.length > 0) {
|
|
169
|
+
// Still filter auxiliary windows even in fallback path
|
|
170
|
+
const content = matching.filter((w) => !AUXILIARY_WINDOW_TITLES.has(w.title));
|
|
171
|
+
const win = content.length > 0 ? content[0] : matching[0];
|
|
172
|
+
if (win?.windowId != null)
|
|
173
|
+
return win.windowId;
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
catch { /* ignore */ }
|
|
177
|
+
return undefined;
|
|
178
|
+
}
|
|
179
|
+
/** Check if the focused app is a browser — used to enable safeCLI capture mode */
|
|
180
|
+
function isBrowserApp() {
|
|
181
|
+
const bundleId = worldModel.getState().focusedApp?.bundleId ?? "";
|
|
182
|
+
return /^com\.(apple\.Safari|google\.Chrome|microsoft\.edgemac)$|^org\.mozilla\.firefox$/.test(bundleId);
|
|
183
|
+
}
|
|
184
|
+
/**
|
|
185
|
+
* Install async Safari browser enricher on the perception coordinator.
|
|
186
|
+
* Non-blocking — uses async exec instead of execSync.
|
|
187
|
+
* Only installs if bundleId is Safari; clears enricher otherwise.
|
|
188
|
+
*/
|
|
189
|
+
function installSafariEnricher(bundleId) {
|
|
190
|
+
const coord = perceptionManager.getCoordinator();
|
|
191
|
+
if (!coord)
|
|
192
|
+
return;
|
|
193
|
+
if (bundleId !== "com.apple.Safari") {
|
|
194
|
+
coord.setBrowserEnricher(null);
|
|
195
|
+
return;
|
|
196
|
+
}
|
|
197
|
+
coord.setBrowserEnricher(async () => {
|
|
198
|
+
const script = `tell application "Safari"
|
|
199
|
+
set t to current tab of front window
|
|
200
|
+
set tabInfo to name of t & "|" & URL of t
|
|
201
|
+
set tabList to ""
|
|
202
|
+
set tabIdx to 1
|
|
203
|
+
repeat with w in windows
|
|
204
|
+
repeat with tb in tabs of w
|
|
205
|
+
set isActive to (tb = current tab of w) as string
|
|
206
|
+
set tabList to tabList & tabIdx & "|" & name of tb & "|" & URL of tb & "|" & isActive & "\\n"
|
|
207
|
+
set tabIdx to tabIdx + 1
|
|
208
|
+
end repeat
|
|
209
|
+
end repeat
|
|
210
|
+
return tabInfo & "\\n---\\n" & tabList
|
|
211
|
+
end tell`;
|
|
212
|
+
const { stdout } = await execAsync(`osascript -e '${script.replace(/'/g, "'\\''")}'`, {
|
|
213
|
+
encoding: "utf-8",
|
|
214
|
+
timeout: 5000,
|
|
215
|
+
});
|
|
216
|
+
const result = (stdout ?? "").trim();
|
|
217
|
+
if (result) {
|
|
218
|
+
const [currentLine, , ...tabLines] = result.split("\n");
|
|
219
|
+
const [title, url] = (currentLine ?? "").split("|");
|
|
220
|
+
const tabs = tabLines
|
|
221
|
+
.filter((l) => l.includes("|"))
|
|
222
|
+
.map((l) => {
|
|
223
|
+
const [idx, tTitle, tUrl, active] = l.split("|");
|
|
224
|
+
return { index: parseInt(idx ?? "0", 10), title: tTitle ?? "", url: tUrl ?? "", isActive: active === "true" };
|
|
225
|
+
});
|
|
226
|
+
if (url)
|
|
227
|
+
worldModel.ingestSafariBrowserState(url, title ?? "", tabs.length > 0 ? tabs : undefined);
|
|
228
|
+
}
|
|
229
|
+
});
|
|
230
|
+
}
|
|
82
231
|
// CDP connection cache
|
|
83
232
|
let cdpPort = null;
|
|
84
233
|
let CDP = null;
|
|
85
|
-
async function ensureCDP() {
|
|
234
|
+
async function ensureCDP(overridePort) {
|
|
86
235
|
if (!CDP)
|
|
87
236
|
CDP = (await import("chrome-remote-interface")).default;
|
|
237
|
+
// If caller specified a port, use it directly (e.g. 9333 for Electron apps)
|
|
238
|
+
if (overridePort) {
|
|
239
|
+
try {
|
|
240
|
+
await CDP.Version({ port: overridePort });
|
|
241
|
+
return { CDP, port: overridePort };
|
|
242
|
+
}
|
|
243
|
+
catch {
|
|
244
|
+
throw new Error(`CDP not available on port ${overridePort}. Ensure the app is running with --remote-debugging-port=${overridePort}`);
|
|
245
|
+
}
|
|
246
|
+
}
|
|
88
247
|
if (cdpPort) {
|
|
89
248
|
try {
|
|
90
249
|
await CDP.Version({ port: cdpPort });
|
|
@@ -92,8 +251,8 @@ async function ensureCDP() {
|
|
|
92
251
|
}
|
|
93
252
|
catch { }
|
|
94
253
|
}
|
|
95
|
-
// Try common ports
|
|
96
|
-
for (const p of [9222, 9223, 9224]) {
|
|
254
|
+
// Try common ports (9222-9224 = Chrome, 9333 = Codex desktop)
|
|
255
|
+
for (const p of [9222, 9223, 9224, 9333]) {
|
|
97
256
|
try {
|
|
98
257
|
await CDP.Version({ port: p });
|
|
99
258
|
cdpPort = p;
|
|
@@ -103,7 +262,7 @@ async function ensureCDP() {
|
|
|
103
262
|
}
|
|
104
263
|
throw new Error("Chrome not running with --remote-debugging-port. Launch with: /Applications/Google\\ Chrome.app/Contents/MacOS/Google\\ Chrome --remote-debugging-port=9222 --user-data-dir=/tmp/chrome-debug");
|
|
105
264
|
}
|
|
106
|
-
const server = new McpServer({ name: "screenhand", version: "
|
|
265
|
+
const server = new McpServer({ name: "screenhand", version: "3.0.0" });
|
|
107
266
|
// ═══════════════════════════════════════════════
|
|
108
267
|
// LEARNING MEMORY — cached, auto-recall, non-blocking
|
|
109
268
|
// ═══════════════════════════════════════════════
|
|
@@ -118,7 +277,82 @@ jobManager.init();
|
|
|
118
277
|
// Direct lease manager that shares the filesystem lock dir with the daemon
|
|
119
278
|
const LOCK_DIR = path.join(os.homedir(), ".screenhand", "locks");
|
|
120
279
|
const leaseManager = new LeaseManager(LOCK_DIR);
|
|
121
|
-
//
|
|
280
|
+
// ── Context tracker — connects tool execution to playbook knowledge ──
|
|
281
|
+
// References dir holds curated platform knowledge (selectors, flows, errors)
|
|
282
|
+
// Playbooks dir holds only executable step sequences for job_create
|
|
283
|
+
// Resolution order: local dev paths → npm dist paths → ~/.screenhand/ user paths
|
|
284
|
+
function resolveDataDir(name) {
|
|
285
|
+
// 1. Local dev path (when running from source)
|
|
286
|
+
const local = path.resolve(__dirname, name);
|
|
287
|
+
if (fs.existsSync(local) && fs.readdirSync(local).some(f => f.endsWith(".json"))) {
|
|
288
|
+
return local;
|
|
289
|
+
}
|
|
290
|
+
// 2. npm dist path (when installed via npx/npm)
|
|
291
|
+
const dist = path.resolve(__dirname, `dist-${name}`);
|
|
292
|
+
if (fs.existsSync(dist) && fs.readdirSync(dist).some(f => f.endsWith(".json"))) {
|
|
293
|
+
return dist;
|
|
294
|
+
}
|
|
295
|
+
// 3. User home path (always available for user-generated content)
|
|
296
|
+
const userDir = path.join(os.homedir(), ".screenhand", name);
|
|
297
|
+
if (!fs.existsSync(userDir)) {
|
|
298
|
+
fs.mkdirSync(userDir, { recursive: true });
|
|
299
|
+
}
|
|
300
|
+
return userDir;
|
|
301
|
+
}
|
|
302
|
+
const referencesDir = resolveDataDir("references");
|
|
303
|
+
const _playbookStoreForContext = new PlaybookStore(referencesDir);
|
|
304
|
+
_playbookStoreForContext.load();
|
|
305
|
+
const playbooksDir = resolveDataDir("playbooks");
|
|
306
|
+
const contextTracker = new ContextTracker(_playbookStoreForContext, playbooksDir);
|
|
307
|
+
const worldModel = new WorldModel();
|
|
308
|
+
const perceptionManager = new PerceptionManager(worldModel);
|
|
309
|
+
const learningEngine = new LearningEngine();
|
|
310
|
+
learningEngine.init();
|
|
311
|
+
import { AppMap } from "./src/state/app-map.js";
|
|
312
|
+
// Seed app maps: check npm dist path first, then local dev path
|
|
313
|
+
const seedAppMapsDir = (() => {
|
|
314
|
+
const dist = path.resolve(__dirname, "dist-app-maps");
|
|
315
|
+
if (fs.existsSync(dist))
|
|
316
|
+
return dist;
|
|
317
|
+
const local = path.resolve(__dirname, "seed-app-maps");
|
|
318
|
+
if (fs.existsSync(local))
|
|
319
|
+
return local;
|
|
320
|
+
return undefined;
|
|
321
|
+
})();
|
|
322
|
+
const appMap = new AppMap(seedAppMapsDir ? { seedDir: seedAppMapsDir } : undefined);
|
|
323
|
+
appMap.init();
|
|
324
|
+
// Cross-feature workflow tracking: per-app buffer of distinct features hit by action tools
|
|
325
|
+
const crossFeatureBuffer = new Map();
|
|
326
|
+
// Visibility tracking throttle: run conditional UI check every 10th tool call
|
|
327
|
+
let visibilityCheckCounter = 0;
|
|
328
|
+
// Previous tool name for ready-signal recording (what action preceded a wait)
|
|
329
|
+
let lastSuccessfulToolName = "unknown";
|
|
330
|
+
// Last known bundleId — survives focusedApp being nulled by app_deactivated events
|
|
331
|
+
let lastKnownBundleId = null;
|
|
332
|
+
contextTracker.setAppMap(appMap);
|
|
333
|
+
perceptionManager.setAppMap(appMap);
|
|
334
|
+
const _executablePlaybookStore = new PlaybookStore(playbooksDir);
|
|
335
|
+
try {
|
|
336
|
+
_executablePlaybookStore.load();
|
|
337
|
+
}
|
|
338
|
+
catch { /* dir may not exist */ }
|
|
339
|
+
const planner = new Planner(_executablePlaybookStore, memory, contextTracker, worldModel, learningEngine);
|
|
340
|
+
const goalStore = new GoalStore(path.join(os.homedir(), ".screenhand", "planner"));
|
|
341
|
+
goalStore.init();
|
|
342
|
+
const toolRegistry = new ToolRegistry();
|
|
343
|
+
const recoveryEngine = new RecoveryEngine(worldModel, toolRegistry.toExecutor(), memory);
|
|
344
|
+
recoveryEngine.setLearningEngine(learningEngine);
|
|
345
|
+
planner.setToolRegistry(toolRegistry);
|
|
346
|
+
perceptionManager.setLearningEngine(learningEngine);
|
|
347
|
+
const mcpRecorder = new McpPlaybookRecorder(playbooksDir);
|
|
348
|
+
const referenceMerger = new ReferenceMerger(referencesDir);
|
|
349
|
+
const communityPublisher = new PlaybookPublisher();
|
|
350
|
+
const communityFetcher = new PlaybookFetcher();
|
|
351
|
+
// Tools excluded from the intelligence wrapper (memory/context hints).
|
|
352
|
+
// Memory, supervisor, job, and daemon lifecycle tools skip the wrapper to avoid recursion
|
|
353
|
+
// and because they don't benefit from playbook hints.
|
|
354
|
+
// NOTE: platform knowledge tools (platform_guide, playbook_preflight, export_playbook)
|
|
355
|
+
// are NOT excluded — they benefit from context-aware hints.
|
|
122
356
|
const MEMORY_TOOLS = new Set([
|
|
123
357
|
"memory_snapshot", "memory_recall", "memory_save", "memory_record_error",
|
|
124
358
|
"memory_record_learning", "memory_query_patterns", "memory_errors",
|
|
@@ -131,19 +365,49 @@ const MEMORY_TOOLS = new Set([
|
|
|
131
365
|
"job_step_done", "job_step_fail", "job_resume", "job_dequeue", "job_remove",
|
|
132
366
|
"job_run", "job_run_all",
|
|
133
367
|
"worker_start", "worker_stop", "worker_status",
|
|
368
|
+
"job_create_chain",
|
|
369
|
+
"observer_start", "observer_stop", "observer_status", "observer_ocr_roi",
|
|
370
|
+
"orchestrator_start", "orchestrator_stop", "orchestrator_submit", "orchestrator_status",
|
|
371
|
+
"world_state", "world_state_diff", "perception_status", "perception_start", "perception_stop",
|
|
372
|
+
"learning_status", "learning_reset",
|
|
373
|
+
"plan_goal", "plan_execute", "plan_step", "plan_step_resolve", "plan_status", "plan_list", "plan_cancel",
|
|
374
|
+
"recovery_status", "recovery_configure",
|
|
375
|
+
"community_publish", "community_fetch",
|
|
134
376
|
]);
|
|
135
377
|
// Track the strategy we're currently following (for feedback loop)
|
|
136
378
|
let activeStrategyFingerprint = null;
|
|
379
|
+
let currentAdaptiveBudget = null;
|
|
137
380
|
// Intercept all tool registrations to auto-log + auto-recall
|
|
138
|
-
const
|
|
381
|
+
const _rawOriginalTool = server.tool.bind(server);
|
|
382
|
+
// Wrap originalTool to also register handlers in the tool registry
|
|
383
|
+
const originalTool = ((...args) => {
|
|
384
|
+
const handlerIdx = args.findIndex((a) => typeof a === "function");
|
|
385
|
+
if (handlerIdx !== -1) {
|
|
386
|
+
const name = args[0];
|
|
387
|
+
const handler = args[handlerIdx];
|
|
388
|
+
// Wrap handler to ensure world model session rebinding (same as server.tool wrapper)
|
|
389
|
+
const wrappedHandler = async (params, extra) => {
|
|
390
|
+
const sessionId = memory.getSessionId();
|
|
391
|
+
if (sessionId && worldModel.getState().sessionId !== sessionId) {
|
|
392
|
+
worldModel.init(sessionId);
|
|
393
|
+
}
|
|
394
|
+
return handler(params, extra);
|
|
395
|
+
};
|
|
396
|
+
args[handlerIdx] = wrappedHandler;
|
|
397
|
+
toolRegistry.register(name, (params) => handler(params, {}));
|
|
398
|
+
}
|
|
399
|
+
return _rawOriginalTool(...args);
|
|
400
|
+
});
|
|
139
401
|
function extractText(result) {
|
|
140
402
|
if (!result?.content)
|
|
141
403
|
return "";
|
|
142
|
-
|
|
404
|
+
const full = result.content
|
|
143
405
|
.filter((c) => c.type === "text")
|
|
144
406
|
.map((c) => c.text)
|
|
145
|
-
.join("\n")
|
|
146
|
-
|
|
407
|
+
.join("\n");
|
|
408
|
+
if (full.length > 500)
|
|
409
|
+
return full.slice(0, 500) + " [TRUNCATED]";
|
|
410
|
+
return full;
|
|
147
411
|
}
|
|
148
412
|
server.tool = (...args) => {
|
|
149
413
|
const handlerIdx = args.findIndex((a) => typeof a === "function");
|
|
@@ -151,6 +415,8 @@ server.tool = (...args) => {
|
|
|
151
415
|
return originalTool(...args);
|
|
152
416
|
const originalHandler = args[handlerIdx];
|
|
153
417
|
const toolName = args[0];
|
|
418
|
+
// Register the original (unwrapped) handler for internal tool dispatch
|
|
419
|
+
toolRegistry.register(toolName, (params) => originalHandler(params, {}));
|
|
154
420
|
const wrappedHandler = async (params, extra) => {
|
|
155
421
|
// Skip intercepting memory tools to avoid recursion
|
|
156
422
|
if (MEMORY_TOOLS.has(toolName)) {
|
|
@@ -159,8 +425,59 @@ server.tool = (...args) => {
|
|
|
159
425
|
const sessionId = memory.getSessionId();
|
|
160
426
|
const safeParams = typeof params === "object" && params !== null ? params : {};
|
|
161
427
|
const start = Date.now();
|
|
428
|
+
// ── PRE-CALL: lazy-init world model on first session ──
|
|
429
|
+
if (sessionId && worldModel.getState().sessionId !== sessionId) {
|
|
430
|
+
worldModel.init(sessionId);
|
|
431
|
+
}
|
|
432
|
+
// ── PRE-CALL: notify perception to stay active (idle gating) ──
|
|
433
|
+
perceptionManager.notifyToolCall();
|
|
162
434
|
// ── PRE-CALL: check for known error warnings (~0ms, in-memory) ──
|
|
163
435
|
const knownError = memory.quickErrorCheck(toolName);
|
|
436
|
+
// ── PRE-CALL: auto-start perception if not running ──
|
|
437
|
+
if (!perceptionManager.isRunning && bridgeReady) {
|
|
438
|
+
const focusApp = worldModel.getState().focusedApp;
|
|
439
|
+
if (focusApp?.bundleId && focusApp?.pid) {
|
|
440
|
+
perceptionManager.tryAutoStart(focusApp, bridge).catch(() => { });
|
|
441
|
+
installSafariEnricher(focusApp.bundleId);
|
|
442
|
+
}
|
|
443
|
+
}
|
|
444
|
+
// ── PRE-CALL: update context tracker (fires playbook lookup only on domain change) ──
|
|
445
|
+
contextTracker.updateContext(toolName, safeParams);
|
|
446
|
+
const playbookHints = contextTracker.getHints(toolName, safeParams);
|
|
447
|
+
// ── PRE-CALL: compute adaptive budget from learning engine ──
|
|
448
|
+
const budgetBundleId = worldModel.getState().focusedApp?.bundleId;
|
|
449
|
+
if (budgetBundleId) {
|
|
450
|
+
const budget = learningEngine.getAdaptiveBudget(budgetBundleId);
|
|
451
|
+
if (budget.locateMs !== 800 || budget.actMs !== 200 || budget.verifyMs !== 2000) {
|
|
452
|
+
currentAdaptiveBudget = budget;
|
|
453
|
+
}
|
|
454
|
+
else {
|
|
455
|
+
currentAdaptiveBudget = null;
|
|
456
|
+
}
|
|
457
|
+
}
|
|
458
|
+
else {
|
|
459
|
+
currentAdaptiveBudget = null;
|
|
460
|
+
}
|
|
461
|
+
// Capture pre-call focused app for focus drift detection
|
|
462
|
+
const preBundleId = worldModel.getState().focusedApp?.bundleId ?? null;
|
|
463
|
+
// Update last known bundleId from world model, tool params, or context tracker
|
|
464
|
+
const paramBundleId = safeParams.bundleId ?? safeParams.pid;
|
|
465
|
+
if (preBundleId) {
|
|
466
|
+
lastKnownBundleId = preBundleId;
|
|
467
|
+
}
|
|
468
|
+
else if (typeof paramBundleId === "string" && paramBundleId) {
|
|
469
|
+
lastKnownBundleId = paramBundleId;
|
|
470
|
+
}
|
|
471
|
+
// Capture pre-call window title for navigation edge tracking
|
|
472
|
+
const preWindowTitle = worldModel.getFocusedWindow()?.title.value ?? null;
|
|
473
|
+
// Action tools = actually doing something. Navigation = just clicking around.
|
|
474
|
+
const ACTION_TOOLS = new Set([
|
|
475
|
+
"type_text", "key", "drag", "scroll", "menu_click", "applescript",
|
|
476
|
+
"ui_set_value", "ui_press",
|
|
477
|
+
"browser_type", "browser_click", "browser_fill_form", "browser_human_click",
|
|
478
|
+
"browser_js", "browser_navigate",
|
|
479
|
+
"type_with_fallback", "select_with_fallback", "scroll_with_fallback",
|
|
480
|
+
]);
|
|
164
481
|
try {
|
|
165
482
|
const result = await originalHandler(params, extra);
|
|
166
483
|
const durationMs = Date.now() - start;
|
|
@@ -177,15 +494,647 @@ server.tool = (...args) => {
|
|
|
177
494
|
error: null,
|
|
178
495
|
};
|
|
179
496
|
memory.recordEvent(entry); // non-blocking write + session tracking
|
|
497
|
+
// ── POST-CALL: record success for playbook learning (in-memory only) ──
|
|
498
|
+
contextTracker.recordOutcome(toolName, safeParams, true, null);
|
|
499
|
+
// ── POST-CALL: Safari context gap + page context update ──
|
|
500
|
+
const postFocusApp = worldModel.getState().focusedApp;
|
|
501
|
+
const postBundleIdForCtx = postFocusApp?.bundleId ?? lastKnownBundleId;
|
|
502
|
+
if (postBundleIdForCtx) {
|
|
503
|
+
lastKnownBundleId = postBundleIdForCtx;
|
|
504
|
+
// Try focused window first, then search all windows for matching bundleId
|
|
505
|
+
let winTitle = null;
|
|
506
|
+
const focWin = worldModel.getFocusedWindow();
|
|
507
|
+
if (focWin?.title.value) {
|
|
508
|
+
winTitle = focWin.title.value;
|
|
509
|
+
}
|
|
510
|
+
else if (postFocusApp?.pid) {
|
|
511
|
+
// Focused window lost — search state for any window from this app
|
|
512
|
+
for (const [, win] of worldModel.getState().windows) {
|
|
513
|
+
if (win.pid === postFocusApp.pid && win.title.value) {
|
|
514
|
+
winTitle = win.title.value;
|
|
515
|
+
break;
|
|
516
|
+
}
|
|
517
|
+
}
|
|
518
|
+
}
|
|
519
|
+
if (winTitle) {
|
|
520
|
+
contextTracker.updateContextFromWindowTitle(postBundleIdForCtx, winTitle);
|
|
521
|
+
contextTracker.updatePageContext(winTitle);
|
|
522
|
+
}
|
|
523
|
+
else {
|
|
524
|
+
// Don't null out page context if we just can't find the window —
|
|
525
|
+
// keep the last known page context to avoid losing it on transient events
|
|
526
|
+
}
|
|
527
|
+
}
|
|
528
|
+
// ── POST-CALL: record page transitions for navigation graph ──
|
|
529
|
+
const pageTransition = contextTracker.consumePageTransition();
|
|
530
|
+
if (pageTransition && postBundleIdForCtx) {
|
|
531
|
+
try {
|
|
532
|
+
appMap.recordPageTransition(postBundleIdForCtx, pageTransition.from, pageTransition.to, toolName);
|
|
533
|
+
}
|
|
534
|
+
catch { /* non-critical — don't break tool execution for nav tracking */ }
|
|
535
|
+
}
|
|
536
|
+
// ── POST-CALL: detect focus drift ──
|
|
537
|
+
const postBundleId = worldModel.getState().focusedApp?.bundleId ?? null;
|
|
538
|
+
if (preBundleId && postBundleId && preBundleId !== postBundleId) {
|
|
539
|
+
const driftWarning = `⚠ Focus changed: ${preBundleId} → ${postBundleId}. Use \`focus\` to return.`;
|
|
540
|
+
if (result?.content && Array.isArray(result.content)) {
|
|
541
|
+
result.content.unshift({ type: "text", text: driftWarning });
|
|
542
|
+
}
|
|
543
|
+
}
|
|
544
|
+
// ── POST-CALL: feed learning engine (timing + locator outcomes) ──
|
|
545
|
+
const learnBundleId = worldModel.getState().focusedApp?.bundleId ?? lastKnownBundleId ?? "unknown";
|
|
546
|
+
learningEngine.recordToolTiming({ tool: toolName, bundleId: learnBundleId, durationMs, success: true });
|
|
547
|
+
// Record locator outcome if the tool used a target/selector
|
|
548
|
+
const locatorTarget = safeParams.target ?? safeParams.selector ?? safeParams.locator
|
|
549
|
+
?? (toolName === "click_text" ? safeParams.text : undefined);
|
|
550
|
+
if (typeof locatorTarget === "string" && locatorTarget) {
|
|
551
|
+
const method = toolName.startsWith("browser_") ? "cdp"
|
|
552
|
+
: toolName.includes("ocr") ? "ocr"
|
|
553
|
+
: "ax";
|
|
554
|
+
learningEngine.recordLocatorOutcome({
|
|
555
|
+
bundleId: learnBundleId,
|
|
556
|
+
actionKey: toolName,
|
|
557
|
+
locator: locatorTarget,
|
|
558
|
+
method,
|
|
559
|
+
success: true,
|
|
560
|
+
});
|
|
561
|
+
// Auto-record verified pattern to patterns.jsonl via learning engine
|
|
562
|
+
learningEngine.recordPattern({
|
|
563
|
+
bundleId: learnBundleId,
|
|
564
|
+
tool: toolName,
|
|
565
|
+
locator: locatorTarget,
|
|
566
|
+
method,
|
|
567
|
+
success: true,
|
|
568
|
+
});
|
|
569
|
+
}
|
|
570
|
+
// ── POST-CALL: update app mastery map from successful action ──
|
|
571
|
+
// Check if the result signals an error (e.g. click_text "not found" returns isError: true)
|
|
572
|
+
const resultIsError = !!result?.isError;
|
|
573
|
+
const isActionTool = ACTION_TOOLS.has(toolName);
|
|
574
|
+
if (resultIsError && learnBundleId !== "unknown") {
|
|
575
|
+
// Redirect to failure mastery recording + count as edge case handled
|
|
576
|
+
try {
|
|
577
|
+
const failedLocatorSoft = safeParams.target ?? safeParams.selector ?? safeParams.locator
|
|
578
|
+
?? (toolName === "click_text" ? safeParams.text : undefined);
|
|
579
|
+
if (typeof failedLocatorSoft === "string" && failedLocatorSoft) {
|
|
580
|
+
appMap.recordElementOutcome(learnBundleId, "auto", failedLocatorSoft, false, contextTracker.currentPageContext ?? undefined);
|
|
581
|
+
}
|
|
582
|
+
if (isActionTool) {
|
|
583
|
+
appMap.recordActionOutcome(learnBundleId, false);
|
|
584
|
+
}
|
|
585
|
+
// Track as edge case: encountering an error is an unexpected state
|
|
586
|
+
const edgeMapData = appMap.getLoaded(learnBundleId);
|
|
587
|
+
if (edgeMapData) {
|
|
588
|
+
edgeMapData.edgeCasesHandled = (edgeMapData.edgeCasesHandled ?? 0) + 1;
|
|
589
|
+
appMap.save(edgeMapData, true);
|
|
590
|
+
}
|
|
591
|
+
const failMapDataSoft = appMap.getLoaded(learnBundleId);
|
|
592
|
+
if (failMapDataSoft?.featureLadder) {
|
|
593
|
+
const failSignalSoft = [toolName, typeof failedLocatorSoft === "string" ? failedLocatorSoft : ""].join(" ").toLowerCase();
|
|
594
|
+
const failGenSignalsSoft = appMap.getGeneratedSignals(learnBundleId) ?? {};
|
|
595
|
+
for (const feature of failMapDataSoft.featureLadder) {
|
|
596
|
+
const fm = failMapDataSoft.featureMastery?.[feature.id];
|
|
597
|
+
if (!fm || fm.depth === 0)
|
|
598
|
+
continue;
|
|
599
|
+
const featureInSignal = failSignalSoft.includes(feature.id.replace(/_/g, " "));
|
|
600
|
+
const keywords = failGenSignalsSoft[feature.id];
|
|
601
|
+
const keywordMatch = keywords?.some((kw) => failSignalSoft.includes(kw));
|
|
602
|
+
if (featureInSignal || keywordMatch) {
|
|
603
|
+
appMap.recordFeatureSignal(learnBundleId, feature.id, fm.depth, false);
|
|
604
|
+
}
|
|
605
|
+
}
|
|
606
|
+
}
|
|
607
|
+
}
|
|
608
|
+
catch { /* non-fatal */ }
|
|
609
|
+
}
|
|
610
|
+
if (!resultIsError && learnBundleId !== "unknown") {
|
|
611
|
+
try {
|
|
612
|
+
if (!appMap.load(learnBundleId)) {
|
|
613
|
+
const focApp = worldModel.getState().focusedApp;
|
|
614
|
+
appMap.createEmpty(learnBundleId, focApp?.appName ?? learnBundleId);
|
|
615
|
+
}
|
|
616
|
+
// Record element outcome for tools with a locator target
|
|
617
|
+
if (typeof locatorTarget === "string" && locatorTarget) {
|
|
618
|
+
appMap.recordElementOutcome(learnBundleId, "auto", locatorTarget, true, contextTracker.currentPageContext ?? undefined);
|
|
619
|
+
// Write relative position from click coordinates
|
|
620
|
+
const resultText = extractText(result);
|
|
621
|
+
const screenMatch = resultText.match(/at screen \((\d+),\s*(\d+)\)/);
|
|
622
|
+
const windowMatch = resultText.match(/\[window: \((\d+),\s*(\d+)\) (\d+)[x×](\d+)\]/);
|
|
623
|
+
if (screenMatch && windowMatch) {
|
|
624
|
+
const sx = parseInt(screenMatch[1], 10);
|
|
625
|
+
const sy = parseInt(screenMatch[2], 10);
|
|
626
|
+
const wx = parseInt(windowMatch[1], 10);
|
|
627
|
+
const wy = parseInt(windowMatch[2], 10);
|
|
628
|
+
const ww = parseInt(windowMatch[3], 10);
|
|
629
|
+
const wh = parseInt(windowMatch[4], 10);
|
|
630
|
+
if (ww > 0 && wh > 0) {
|
|
631
|
+
const relX = Math.max(0, Math.min(1, (sx - wx) / ww));
|
|
632
|
+
const relY = Math.max(0, Math.min(1, (sy - wy) / wh));
|
|
633
|
+
appMap.updateElementPosition(learnBundleId, "auto_discovered", locatorTarget, relX, relY);
|
|
634
|
+
}
|
|
635
|
+
}
|
|
636
|
+
}
|
|
637
|
+
// Record action outcome (only for tools that DO something, not navigation)
|
|
638
|
+
if (isActionTool) {
|
|
639
|
+
appMap.recordActionOutcome(learnBundleId, true);
|
|
640
|
+
}
|
|
641
|
+
// ── Record input/output contract for element interaction tools ──
|
|
642
|
+
{
|
|
643
|
+
const CONTRACT_TOOLS = new Set(["click", "click_text", "type_text", "key", "menu_click"]);
|
|
644
|
+
if (CONTRACT_TOOLS.has(toolName) && typeof locatorTarget === "string" && locatorTarget) {
|
|
645
|
+
// Use "auto" to search all zones — page-specific zones may not exist yet
|
|
646
|
+
appMap.recordContract(learnBundleId, "auto", locatorTarget, toolName, ["action succeeded"]);
|
|
647
|
+
}
|
|
648
|
+
}
|
|
649
|
+
// ── Track shortcut usage (keyboard combos with modifier keys) ──
|
|
650
|
+
if (toolName === "key" && typeof safeParams.combo === "string") {
|
|
651
|
+
const combo = safeParams.combo.toLowerCase();
|
|
652
|
+
if (combo.includes("cmd+") || combo.includes("ctrl+") || combo.includes("alt+") || combo.includes("shift+")) {
|
|
653
|
+
const mapDataShortcut = appMap.getLoaded(learnBundleId);
|
|
654
|
+
if (mapDataShortcut) {
|
|
655
|
+
mapDataShortcut.shortcutsUsed = (mapDataShortcut.shortcutsUsed ?? 0) + 1;
|
|
656
|
+
appMap.save(mapDataShortcut, true);
|
|
657
|
+
}
|
|
658
|
+
}
|
|
659
|
+
}
|
|
660
|
+
// ── Track edge case handling (escape = dialog/popup dismissal) ──
|
|
661
|
+
if (toolName === "key" && safeParams.combo === "escape") {
|
|
662
|
+
const mapDataEdge = appMap.getLoaded(learnBundleId);
|
|
663
|
+
if (mapDataEdge) {
|
|
664
|
+
mapDataEdge.edgeCasesHandled = (mapDataEdge.edgeCasesHandled ?? 0) + 1;
|
|
665
|
+
appMap.save(mapDataEdge, true);
|
|
666
|
+
}
|
|
667
|
+
}
|
|
668
|
+
// ── Auto-detect feature depth from tool usage signals ──
|
|
669
|
+
// Depth: 1=navigated (screenshot/focus), 2=basic action (click/type),
|
|
670
|
+
// 3=multi-step workflow (action tools in sequence), 4=verified outcome
|
|
671
|
+
{
|
|
672
|
+
const mapData = appMap.getLoaded(learnBundleId);
|
|
673
|
+
if (mapData?.featureLadder) {
|
|
674
|
+
const signalText = [
|
|
675
|
+
toolName,
|
|
676
|
+
typeof locatorTarget === "string" ? locatorTarget : "",
|
|
677
|
+
typeof safeParams.text === "string" ? safeParams.text : "",
|
|
678
|
+
preWindowTitle ?? "",
|
|
679
|
+
worldModel.getFocusedWindow()?.title.value ?? "",
|
|
680
|
+
].join(" ").toLowerCase();
|
|
681
|
+
// Determine depth from tool type and history:
|
|
682
|
+
// depth 1 = navigated (screenshot/focus/ocr)
|
|
683
|
+
// depth 2 = basic action (click/type/key on the feature)
|
|
684
|
+
// depth 3 = multi-step workflow (already at depth 2, hit again with different action tool)
|
|
685
|
+
// depth 4 = verified outcome (at depth 3, then verified via screenshot/ocr)
|
|
686
|
+
const NAV_TOOLS = new Set(["screenshot", "screenshot_file", "focus", "ocr", "ui_tree", "ui_find", "windows", "apps", "browser_tabs", "browser_page_info", "browser_dom"]);
|
|
687
|
+
const VERIFY_TOOLS = new Set(["screenshot", "screenshot_file", "ocr", "ui_tree", "ui_find", "browser_dom", "browser_page_info"]);
|
|
688
|
+
const isNavTool = NAV_TOOLS.has(toolName);
|
|
689
|
+
const isVerifyTool = VERIFY_TOOLS.has(toolName);
|
|
690
|
+
// Keyword map: featureId → keywords that signal the feature was used
|
|
691
|
+
// Hardcoded signals for apps with BUILTIN_LADDERS
|
|
692
|
+
const BUILTIN_FEATURE_SIGNALS = {
|
|
693
|
+
// Discord
|
|
694
|
+
browse_channels: ["channel", "server", "sidebar", "lounge", "information"],
|
|
695
|
+
send_message: ["message", "type_text", "browser_type", "chatter", "chat"],
|
|
696
|
+
direct_messages: ["direct message", "dm", "group chat", "friends"],
|
|
697
|
+
voice_video: ["voice", "stage", "listen", "audio", "video", "call", "screen share", "activity"],
|
|
698
|
+
threads_forums: ["thread", "forum", "post", "topic", "discussion"],
|
|
699
|
+
roles_permissions: ["role", "permission", "override", "hidden channel"],
|
|
700
|
+
notification_control: ["notification", "mention", "mute", "suppress"],
|
|
701
|
+
events_stage: ["event", "stage", "trivia", "interested", "schedule"],
|
|
702
|
+
onboarding_funnel: ["onboarding", "welcome", "get started", "rules screening", "starter", "channels & roles", "customize", "browse channels", "choose your channels"],
|
|
703
|
+
moderation_system: ["moderation", "automod", "ban", "modmail", "audit", "report", "rules", "safety", "raid"],
|
|
704
|
+
bot_ecosystem: ["bot", "automod", "integration", "app directory", "slash command", "verification", "add app", "add to server", "mee6", "webhook"],
|
|
705
|
+
server_architecture: ["category", "channel taxonomy", "channels & roles", "server guide", "server settings"],
|
|
706
|
+
community_growth: ["announcement", "event", "reward", "retention", "engagement"],
|
|
707
|
+
analytics_health: ["analytics", "insights", "server insights", "activity", "member count"],
|
|
708
|
+
monetization_membership: ["premium", "boost", "subscription", "tier", "monetiz"],
|
|
709
|
+
crisis_handling: ["raid", "spam", "harassment", "lockdown", "ban wave"],
|
|
710
|
+
cross_platform: ["github", "notion", "twitch", "stripe", "zapier", "webhook"],
|
|
711
|
+
staff_system: ["moderator", "staff", "escalation", "internal", "mod channel"],
|
|
712
|
+
brand_culture: ["community", "identity", "ritual", "culture", "recognition"],
|
|
713
|
+
governance_policy: ["rules", "policy", "enforcement", "appeal", "governance"],
|
|
714
|
+
// Safari
|
|
715
|
+
browse_navigate: ["navigate", "browser_navigate", "browser_open", "url"],
|
|
716
|
+
tabs_windows: ["tab", "browser_tabs", "window"],
|
|
717
|
+
bookmarks: ["bookmark", "reading list"],
|
|
718
|
+
history_search: ["history", "search"],
|
|
719
|
+
tab_groups: ["tab group", "profile"],
|
|
720
|
+
extensions: ["extension"],
|
|
721
|
+
dev_tools: ["inspector", "developer", "console", "browser_js"],
|
|
722
|
+
privacy_settings: ["privacy", "cookie", "blocker"],
|
|
723
|
+
web_apps: ["add to dock", "web app"],
|
|
724
|
+
// Finder
|
|
725
|
+
browse_files: ["finder", "file", "folder", "browse"],
|
|
726
|
+
copy_move: ["copy", "move", "rename", "delete", "trash"],
|
|
727
|
+
search: ["search", "spotlight"],
|
|
728
|
+
views_sort: ["view", "sort", "column", "icon", "list"],
|
|
729
|
+
tags_favorites: ["tag", "favorite", "sidebar"],
|
|
730
|
+
quick_actions: ["quick look", "quick action", "service"],
|
|
731
|
+
automator_scripts: ["automator", "terminal", "script", "applescript"],
|
|
732
|
+
// Generic (fallback for apps with generic ladders)
|
|
733
|
+
basic_navigation: ["navigate", "open", "browse", "launch"],
|
|
734
|
+
core_action: ["type_text", "click", "press", "key"],
|
|
735
|
+
settings: ["settings", "preferences", "config"],
|
|
736
|
+
advanced_features: ["advanced", "power", "shortcut", "automation"],
|
|
737
|
+
};
|
|
738
|
+
// Auto-generate ladder from reference if no builtin exists
|
|
739
|
+
if (!appMap.hasGeneratedLadder(learnBundleId)) {
|
|
740
|
+
const ref = _playbookStoreForContext.matchByBundleId(learnBundleId);
|
|
741
|
+
if (ref?.selectors && Object.keys(ref.selectors).length >= 2) {
|
|
742
|
+
const generated = appMap.generateLadderFromRef(learnBundleId, ref);
|
|
743
|
+
if (generated) {
|
|
744
|
+
// Reload mapData with new ladder
|
|
745
|
+
const refreshed = appMap.getLoaded(learnBundleId);
|
|
746
|
+
if (refreshed) {
|
|
747
|
+
Object.assign(mapData, refreshed);
|
|
748
|
+
}
|
|
749
|
+
}
|
|
750
|
+
}
|
|
751
|
+
}
|
|
752
|
+
// Merge auto-generated signals with builtins (generated takes priority)
|
|
753
|
+
const generatedSignals = appMap.getGeneratedSignals(learnBundleId);
|
|
754
|
+
const mergedSignals = { ...BUILTIN_FEATURE_SIGNALS };
|
|
755
|
+
if (generatedSignals) {
|
|
756
|
+
for (const [fid, kws] of Object.entries(generatedSignals)) {
|
|
757
|
+
mergedSignals[fid] = kws;
|
|
758
|
+
}
|
|
759
|
+
}
|
|
760
|
+
const hitFeatures = [];
|
|
761
|
+
for (const feature of mapData.featureLadder) {
|
|
762
|
+
const keywords = mergedSignals[feature.id];
|
|
763
|
+
if (!keywords)
|
|
764
|
+
continue;
|
|
765
|
+
if (keywords.some((kw) => signalText.includes(kw))) {
|
|
766
|
+
// Compute depth based on current state + tool type
|
|
767
|
+
const existing = mapData.featureMastery?.[feature.id];
|
|
768
|
+
const currentDepth = existing?.depth ?? 0;
|
|
769
|
+
let signalDepth;
|
|
770
|
+
if (isVerifyTool && currentDepth >= 3) {
|
|
771
|
+
// Verifying after a workflow = verified outcome (depth 4)
|
|
772
|
+
signalDepth = 4;
|
|
773
|
+
}
|
|
774
|
+
else if (!isNavTool && currentDepth >= 2 && (existing?.repeatCount ?? 0) >= 3) {
|
|
775
|
+
// Repeated action tool on a feature we've already actioned = workflow (depth 3)
|
|
776
|
+
signalDepth = 3;
|
|
777
|
+
}
|
|
778
|
+
else if (isNavTool) {
|
|
779
|
+
signalDepth = 1;
|
|
780
|
+
}
|
|
781
|
+
else {
|
|
782
|
+
signalDepth = 2;
|
|
783
|
+
}
|
|
784
|
+
appMap.recordFeatureSignal(learnBundleId, feature.id, signalDepth, true);
|
|
785
|
+
// Healing detection: success after prior failure = recovery
|
|
786
|
+
if (existing && existing.failCount > (existing.healingCount ?? 0)) {
|
|
787
|
+
appMap.recordHealing(learnBundleId, feature.id);
|
|
788
|
+
}
|
|
789
|
+
if (!isNavTool)
|
|
790
|
+
hitFeatures.push(feature.id);
|
|
791
|
+
}
|
|
792
|
+
}
|
|
793
|
+
// Cross-feature workflow detection: track distinct features hit by action tools.
|
|
794
|
+
// When 3+ distinct features are hit in a rolling window, record a cross-feature workflow.
|
|
795
|
+
if (!crossFeatureBuffer.has(learnBundleId)) {
|
|
796
|
+
crossFeatureBuffer.set(learnBundleId, { features: [], lastRecordedAt: 0 });
|
|
797
|
+
}
|
|
798
|
+
const cfBuf = crossFeatureBuffer.get(learnBundleId);
|
|
799
|
+
for (const fid of hitFeatures) {
|
|
800
|
+
if (!cfBuf.features.includes(fid))
|
|
801
|
+
cfBuf.features.push(fid);
|
|
802
|
+
}
|
|
803
|
+
// Trim to last 10 features
|
|
804
|
+
if (cfBuf.features.length > 10)
|
|
805
|
+
cfBuf.features = cfBuf.features.slice(-10);
|
|
806
|
+
// Record a cross-feature workflow every 3 distinct features (throttled)
|
|
807
|
+
if (cfBuf.features.length >= 3 && Date.now() - cfBuf.lastRecordedAt > 30_000) {
|
|
808
|
+
appMap.recordCrossFeatureWorkflow(learnBundleId);
|
|
809
|
+
cfBuf.lastRecordedAt = Date.now();
|
|
810
|
+
cfBuf.features = []; // Reset for next workflow
|
|
811
|
+
}
|
|
812
|
+
}
|
|
813
|
+
}
|
|
814
|
+
// Record navigation edge when window title changes (screen transition)
|
|
815
|
+
const postWindowTitle = worldModel.getFocusedWindow()?.title.value ?? null;
|
|
816
|
+
if (preWindowTitle && postWindowTitle && preWindowTitle !== postWindowTitle) {
|
|
817
|
+
const appName = worldModel.getState().focusedApp?.appName ?? "";
|
|
818
|
+
const titleSuffix = appName ? new RegExp(` - ${appName.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")}$`) : null;
|
|
819
|
+
const fromNode = titleSuffix ? preWindowTitle.replace(titleSuffix, "") : preWindowTitle;
|
|
820
|
+
const toNode = titleSuffix ? postWindowTitle.replace(titleSuffix, "") : postWindowTitle;
|
|
821
|
+
if (fromNode !== toNode) {
|
|
822
|
+
appMap.addNavNode(learnBundleId, fromNode, { type: "window", description: fromNode });
|
|
823
|
+
appMap.addNavNode(learnBundleId, toNode, { type: "window", description: toNode });
|
|
824
|
+
appMap.recordEdgeOutcome(learnBundleId, fromNode, locatorTarget ?? toolName, toNode, true);
|
|
825
|
+
learningEngine.recordTopologyOutcome({
|
|
826
|
+
bundleId: learnBundleId,
|
|
827
|
+
fromNode,
|
|
828
|
+
action: locatorTarget ?? toolName,
|
|
829
|
+
toNode,
|
|
830
|
+
success: true,
|
|
831
|
+
});
|
|
832
|
+
}
|
|
833
|
+
}
|
|
834
|
+
// ── State machine: detect state changes from tool results ──
|
|
835
|
+
// Two detection paths:
|
|
836
|
+
// 1. Keyword matching on result text (original regex patterns)
|
|
837
|
+
// 2. Structural detection: key combos that open/close UI elements
|
|
838
|
+
{
|
|
839
|
+
const stateResultText = extractText(result).toLowerCase();
|
|
840
|
+
const stateTrigger = locatorTarget ?? toolName;
|
|
841
|
+
// --- Structural state detection from tool + combo patterns ---
|
|
842
|
+
// Keyboard shortcuts that toggle UI state (works even when result text has no keywords)
|
|
843
|
+
if (toolName === "key" && typeof safeParams.combo === "string") {
|
|
844
|
+
const combo = safeParams.combo.toLowerCase();
|
|
845
|
+
// Cmd+K / Ctrl+K / Cmd+P = search/command palette (dialog open)
|
|
846
|
+
if (combo === "cmd+k" || combo === "ctrl+k" || combo === "cmd+p" || combo === "ctrl+p") {
|
|
847
|
+
const prevState = appMap.getCurrentState(learnBundleId);
|
|
848
|
+
const from = prevState["modal_state"] ?? "closed";
|
|
849
|
+
appMap.recordStateChange(learnBundleId, "modal_state", from, "open", combo);
|
|
850
|
+
}
|
|
851
|
+
// Escape = dismiss dialog/modal
|
|
852
|
+
if (combo === "escape") {
|
|
853
|
+
const prevState = appMap.getCurrentState(learnBundleId);
|
|
854
|
+
if (prevState["modal_state"] === "open") {
|
|
855
|
+
appMap.recordStateChange(learnBundleId, "modal_state", "open", "closed", combo);
|
|
856
|
+
}
|
|
857
|
+
}
|
|
858
|
+
// Cmd+\ or Cmd+Shift+S = sidebar toggle (common pattern)
|
|
859
|
+
if (combo === "cmd+\\" || combo === "ctrl+\\" || combo === "cmd+shift+s") {
|
|
860
|
+
const prevState = appMap.getCurrentState(learnBundleId);
|
|
861
|
+
const currentSidebar = prevState["sidebar_state"] ?? "expanded";
|
|
862
|
+
const newSidebar = currentSidebar === "expanded" ? "collapsed" : "expanded";
|
|
863
|
+
appMap.recordStateChange(learnBundleId, "sidebar_state", currentSidebar, newSidebar, combo);
|
|
864
|
+
}
|
|
865
|
+
}
|
|
866
|
+
// --- Keyword matching on result text (original patterns) ---
|
|
867
|
+
// Modal/dialog state
|
|
868
|
+
// V4: Require noun+verb proximity to prevent false injection from element labels.
|
|
869
|
+
if (/\b(modal|dialog|popup|alert|sheet|search|command palette)\s+\w*\s*\b(opened|appeared|shown|displayed|presented)\b/.test(stateResultText) ||
|
|
870
|
+
/\b(opened|appeared|shown|displayed|presented)\s+\w*\s*\b(modal|dialog|popup|alert|sheet)\b/.test(stateResultText) ||
|
|
871
|
+
/\b(modal|dialog|popup|alert|sheet)\s+(is|was|has been)\s+(opened|shown|displayed|presented)\b/.test(stateResultText)) {
|
|
872
|
+
const prevState = appMap.getCurrentState(learnBundleId);
|
|
873
|
+
const from = prevState["modal_state"] ?? "closed";
|
|
874
|
+
appMap.recordStateChange(learnBundleId, "modal_state", from, "open", stateTrigger);
|
|
875
|
+
}
|
|
876
|
+
else if (/\b(modal|dialog|popup|alert|sheet)\s+\w*\s*\b(closed|dismissed|hidden|disappeared)\b/.test(stateResultText) ||
|
|
877
|
+
/\b(closed|dismissed|hidden|disappeared)\s+\w*\s*\b(modal|dialog|popup|alert|sheet)\b/.test(stateResultText) ||
|
|
878
|
+
/\b(modal|dialog|popup|alert|sheet)\s+(is|was|has been)\s+(closed|dismissed|hidden)\b/.test(stateResultText)) {
|
|
879
|
+
const prevState = appMap.getCurrentState(learnBundleId);
|
|
880
|
+
const from = prevState["modal_state"] ?? "open";
|
|
881
|
+
appMap.recordStateChange(learnBundleId, "modal_state", from, "closed", stateTrigger);
|
|
882
|
+
}
|
|
883
|
+
// Sidebar/panel state
|
|
884
|
+
if (/\b(sidebar|panel)\s+\w*\s*\b(collapsed|hidden|closed|minimized)\b/.test(stateResultText) ||
|
|
885
|
+
/\b(collapsed|hidden|closed|minimized)\s+\w*\s*\b(sidebar|panel)\b/.test(stateResultText) ||
|
|
886
|
+
/\b(sidebar|panel)\s+(is|was|has been)\s+(collapsed|hidden|closed|minimized)\b/.test(stateResultText)) {
|
|
887
|
+
const prevState = appMap.getCurrentState(learnBundleId);
|
|
888
|
+
const from = prevState["sidebar_state"] ?? "expanded";
|
|
889
|
+
appMap.recordStateChange(learnBundleId, "sidebar_state", from, "collapsed", stateTrigger);
|
|
890
|
+
}
|
|
891
|
+
else if (/\b(sidebar|panel)\s+\w*\s*\b(expanded|shown|opened|visible|maximized)\b/.test(stateResultText) ||
|
|
892
|
+
/\b(expanded|shown|opened|visible|maximized)\s+\w*\s*\b(sidebar|panel)\b/.test(stateResultText) ||
|
|
893
|
+
/\b(sidebar|panel)\s+(is|was|has been)\s+(expanded|shown|opened|visible|maximized)\b/.test(stateResultText)) {
|
|
894
|
+
const prevState = appMap.getCurrentState(learnBundleId);
|
|
895
|
+
const from = prevState["sidebar_state"] ?? "collapsed";
|
|
896
|
+
appMap.recordStateChange(learnBundleId, "sidebar_state", from, "expanded", stateTrigger);
|
|
897
|
+
}
|
|
898
|
+
// View mode state (e.g., board/list/table/grid/timeline)
|
|
899
|
+
const viewModeMatch = stateResultText.match(/\b(board|list|table|grid|timeline|calendar|gallery|kanban)\s*view\b/);
|
|
900
|
+
if (!viewModeMatch) {
|
|
901
|
+
const altViewMatch = stateResultText.match(/(?:switched\s+to|view:\s*)\s*(board|list|table|grid|timeline|calendar|gallery|kanban)\b/);
|
|
902
|
+
if (altViewMatch) {
|
|
903
|
+
const newView = altViewMatch[1];
|
|
904
|
+
const prevState = appMap.getCurrentState(learnBundleId);
|
|
905
|
+
const from = prevState["view_mode"] ?? "unknown";
|
|
906
|
+
if (from !== newView) {
|
|
907
|
+
appMap.recordStateChange(learnBundleId, "view_mode", from, newView, stateTrigger);
|
|
908
|
+
}
|
|
909
|
+
}
|
|
910
|
+
}
|
|
911
|
+
else {
|
|
912
|
+
const newView = viewModeMatch[1];
|
|
913
|
+
const prevState = appMap.getCurrentState(learnBundleId);
|
|
914
|
+
const from = prevState["view_mode"] ?? "unknown";
|
|
915
|
+
if (from !== newView) {
|
|
916
|
+
appMap.recordStateChange(learnBundleId, "view_mode", from, newView, stateTrigger);
|
|
917
|
+
}
|
|
918
|
+
}
|
|
919
|
+
}
|
|
920
|
+
// ── Hierarchy extraction from UI inspection tools ──
|
|
921
|
+
// Extract parent/child containment from any tool that reveals structure
|
|
922
|
+
{
|
|
923
|
+
const HIERARCHY_TOOLS = new Set(["ui_tree", "ui_find", "screenshot", "ocr"]);
|
|
924
|
+
if (HIERARCHY_TOOLS.has(toolName)) {
|
|
925
|
+
try {
|
|
926
|
+
const treeText = extractText(result);
|
|
927
|
+
if (treeText) {
|
|
928
|
+
const lines = treeText.split("\n");
|
|
929
|
+
const hierarchyZone = contextTracker.currentPageContext
|
|
930
|
+
? `page::${contextTracker.currentPageContext}` : "auto_discovered";
|
|
931
|
+
if (toolName === "ui_tree" || toolName === "ui_find") {
|
|
932
|
+
// Parse indented AX tree: depth 0 = root, depth 1 = top containers, depth 2 = children
|
|
933
|
+
// Format: " ".repeat(depth) + role "title" ...
|
|
934
|
+
const containers = [];
|
|
935
|
+
for (const line of lines) {
|
|
936
|
+
const stripped = line.replace(/\s+$/, "");
|
|
937
|
+
const indent = stripped.length - stripped.trimStart().length;
|
|
938
|
+
const depth = Math.floor(indent / 2);
|
|
939
|
+
const titleMatch = stripped.match(/"([^"]+)"/);
|
|
940
|
+
if (!titleMatch)
|
|
941
|
+
continue;
|
|
942
|
+
const label = titleMatch[1];
|
|
943
|
+
if (!label || label.length > 200)
|
|
944
|
+
continue;
|
|
945
|
+
if (depth <= 1) {
|
|
946
|
+
containers.push({ label, depth, children: [] });
|
|
947
|
+
}
|
|
948
|
+
else if (depth === 2 && containers.length > 0) {
|
|
949
|
+
const parent = containers[containers.length - 1];
|
|
950
|
+
if (parent && parent.children.length < 50) {
|
|
951
|
+
parent.children.push(label);
|
|
952
|
+
}
|
|
953
|
+
}
|
|
954
|
+
}
|
|
955
|
+
for (const container of containers) {
|
|
956
|
+
if (container.children.length > 0) {
|
|
957
|
+
appMap.recordHierarchy(learnBundleId, hierarchyZone, container.label, container.children, "ax_tree");
|
|
958
|
+
}
|
|
959
|
+
}
|
|
960
|
+
}
|
|
961
|
+
else {
|
|
962
|
+
// screenshot/ocr: extract spatial grouping from OCR lines
|
|
963
|
+
// OCR text is top-to-bottom — consecutive lines within the same
|
|
964
|
+
// vertical region (heading followed by items) form parent/child
|
|
965
|
+
const ocrLabels = [];
|
|
966
|
+
for (const line of lines) {
|
|
967
|
+
const trimmed = line.trim();
|
|
968
|
+
if (trimmed && trimmed.length >= 2 && trimmed.length <= 100) {
|
|
969
|
+
ocrLabels.push(trimmed);
|
|
970
|
+
}
|
|
971
|
+
}
|
|
972
|
+
// Heuristic: detect section headings from OCR text.
|
|
973
|
+
// A heading is a short label (1-2 words, <=20 chars) followed by 2+ lines,
|
|
974
|
+
// or a title-case label followed by bullet-prefixed items.
|
|
975
|
+
// Catches "Recents", "Private", "Tasks Tracker" in Notion, etc.
|
|
976
|
+
let currentParent = null;
|
|
977
|
+
let currentChildren = [];
|
|
978
|
+
const flushGroup = () => {
|
|
979
|
+
if (currentParent && currentChildren.length > 0) {
|
|
980
|
+
appMap.recordHierarchy(learnBundleId, hierarchyZone, currentParent, currentChildren.slice(0, 50), "ocr_spatial");
|
|
981
|
+
}
|
|
982
|
+
currentParent = null;
|
|
983
|
+
currentChildren = [];
|
|
984
|
+
};
|
|
985
|
+
for (let i = 0; i < ocrLabels.length; i++) {
|
|
986
|
+
const label = ocrLabels[i];
|
|
987
|
+
const isAllCaps = /^[A-Z][A-Z\s]{2,}$/.test(label);
|
|
988
|
+
const hasColon = label.endsWith(":");
|
|
989
|
+
// Short single/double-word section name (e.g. "Recents", "Private", "New database")
|
|
990
|
+
const isShortSection = /^[A-Z][a-z]+(\s+[a-z]+)?$/.test(label) && label.length <= 20;
|
|
991
|
+
// Title-case heading: 1-4 words
|
|
992
|
+
const isTitleCase = /^[A-Z][a-zA-Z]+(\s+[A-Za-z]+){0,3}$/.test(label) && label.length <= 30;
|
|
993
|
+
const hasFollowingContent = i + 2 < ocrLabels.length;
|
|
994
|
+
// Bullet/icon items (strong signal)
|
|
995
|
+
const nextHasBullet = (idx) => {
|
|
996
|
+
const next = ocrLabels[idx];
|
|
997
|
+
return next != null && /^[•\*\+\-\u2022\u25CF※®=¿]/.test(next);
|
|
998
|
+
};
|
|
999
|
+
const followedByBullets = hasFollowingContent && nextHasBullet(i + 1);
|
|
1000
|
+
const isHeading = isAllCaps || hasColon || (isShortSection && hasFollowingContent) || (isTitleCase && followedByBullets);
|
|
1001
|
+
if (isHeading) {
|
|
1002
|
+
flushGroup();
|
|
1003
|
+
currentParent = label.replace(/:$/, "");
|
|
1004
|
+
}
|
|
1005
|
+
else if (currentParent) {
|
|
1006
|
+
currentChildren.push(label);
|
|
1007
|
+
}
|
|
1008
|
+
}
|
|
1009
|
+
flushGroup();
|
|
1010
|
+
}
|
|
1011
|
+
}
|
|
1012
|
+
}
|
|
1013
|
+
catch { /* hierarchy extraction non-fatal */ }
|
|
1014
|
+
}
|
|
1015
|
+
}
|
|
1016
|
+
// ── Conditional UI visibility tracking (throttled) ──
|
|
1017
|
+
// Every 3rd inspection-like tool call, compare discovered elements against
|
|
1018
|
+
// known map elements to detect which appear/disappear by page context.
|
|
1019
|
+
{
|
|
1020
|
+
const VISIBILITY_TOOLS = new Set([
|
|
1021
|
+
"ui_tree", "ocr", "ui_find", "screenshot", "click_text",
|
|
1022
|
+
"windows", "browser_dom", "browser_page_info",
|
|
1023
|
+
]);
|
|
1024
|
+
if (VISIBILITY_TOOLS.has(toolName)) {
|
|
1025
|
+
visibilityCheckCounter++;
|
|
1026
|
+
}
|
|
1027
|
+
if (visibilityCheckCounter % 3 === 0 && VISIBILITY_TOOLS.has(toolName)) {
|
|
1028
|
+
try {
|
|
1029
|
+
const visMapData = appMap.getLoaded(learnBundleId);
|
|
1030
|
+
const visPageCtx = contextTracker.currentPageContext ?? "";
|
|
1031
|
+
if (visMapData && visPageCtx) {
|
|
1032
|
+
// Collect element labels from the result text
|
|
1033
|
+
const visResultText = extractText(result);
|
|
1034
|
+
const discoveredLabels = new Set();
|
|
1035
|
+
// Extract quoted labels (from ui_tree/ui_find format)
|
|
1036
|
+
const labelMatches = visResultText.matchAll(/"([^"]{1,100})"/g);
|
|
1037
|
+
for (const m of labelMatches) {
|
|
1038
|
+
if (m[1])
|
|
1039
|
+
discoveredLabels.add(m[1]);
|
|
1040
|
+
}
|
|
1041
|
+
// Also extract unquoted OCR/screenshot text lines as potential labels
|
|
1042
|
+
for (const line of visResultText.split("\n")) {
|
|
1043
|
+
const trimmed = line.trim();
|
|
1044
|
+
if (trimmed && trimmed.length >= 2 && trimmed.length <= 80 && !/^[\[\(]/.test(trimmed)) {
|
|
1045
|
+
discoveredLabels.add(trimmed);
|
|
1046
|
+
}
|
|
1047
|
+
}
|
|
1048
|
+
// For known elements in the map, record whether they were seen or absent
|
|
1049
|
+
const knownElements = new Set();
|
|
1050
|
+
for (const zone of Object.values(visMapData.zones)) {
|
|
1051
|
+
for (const el of zone.elements) {
|
|
1052
|
+
knownElements.add(el.label);
|
|
1053
|
+
}
|
|
1054
|
+
}
|
|
1055
|
+
for (const label of knownElements) {
|
|
1056
|
+
const seen = discoveredLabels.has(label);
|
|
1057
|
+
appMap.recordElementVisibility(learnBundleId, label, visPageCtx, seen);
|
|
1058
|
+
}
|
|
1059
|
+
}
|
|
1060
|
+
}
|
|
1061
|
+
catch { /* visibility tracking non-fatal */ }
|
|
1062
|
+
}
|
|
1063
|
+
}
|
|
1064
|
+
// ── Timing recording: track tool response times per element ──
|
|
1065
|
+
{
|
|
1066
|
+
const TIMING_TOOLS = new Set([
|
|
1067
|
+
"click", "click_text", "type_text", "key", "menu_click",
|
|
1068
|
+
"browser_click", "browser_type",
|
|
1069
|
+
]);
|
|
1070
|
+
if (TIMING_TOOLS.has(toolName)) {
|
|
1071
|
+
const timingLabel = locatorTarget ?? toolName;
|
|
1072
|
+
appMap.recordTiming(learnBundleId, toolName + "::" + timingLabel, "element_response", durationMs);
|
|
1073
|
+
}
|
|
1074
|
+
// Ready-signal recording
|
|
1075
|
+
// 1. Explicit wait tools
|
|
1076
|
+
if (toolName === "browser_wait" || toolName === "wait_for_state") {
|
|
1077
|
+
appMap.recordReadySignal(learnBundleId, lastSuccessfulToolName, "wait_completed", durationMs);
|
|
1078
|
+
}
|
|
1079
|
+
// 2. Any interaction tool that took notably long (>1.5s) = implicit wait
|
|
1080
|
+
// This captures slow page loads, animation waits, network-bound actions
|
|
1081
|
+
if (durationMs > 1500 && TIMING_TOOLS.has(toolName)) {
|
|
1082
|
+
appMap.recordReadySignal(learnBundleId, toolName, "slow_response", durationMs);
|
|
1083
|
+
}
|
|
1084
|
+
// 3. Screenshot/OCR after a navigation click = page-ready signal
|
|
1085
|
+
if ((toolName === "screenshot" || toolName === "ocr") && lastSuccessfulToolName === "click_text") {
|
|
1086
|
+
appMap.recordReadySignal(learnBundleId, "click_text", "page_ready", durationMs);
|
|
1087
|
+
}
|
|
1088
|
+
}
|
|
1089
|
+
// Refresh mastery level after updates
|
|
1090
|
+
appMap.refreshMastery(learnBundleId);
|
|
1091
|
+
}
|
|
1092
|
+
catch { /* app map update non-fatal */ }
|
|
1093
|
+
}
|
|
1094
|
+
// Track last successful tool name for ready-signal context
|
|
1095
|
+
lastSuccessfulToolName = toolName;
|
|
1096
|
+
// ── POST-CALL: capture for playbook recording if active ──
|
|
1097
|
+
if (mcpRecorder.isRecording) {
|
|
1098
|
+
const fullResultText = Array.isArray(result?.content) ? result.content.map((c) => c.text ?? "").join(" ") : "";
|
|
1099
|
+
const resultText = fullResultText.length > 500 ? fullResultText.substring(0, 500) + " [TRUNCATED]" : fullResultText;
|
|
1100
|
+
mcpRecorder.captureToolCall(toolName, safeParams, true, resultText, durationMs);
|
|
1101
|
+
}
|
|
180
1102
|
// ── POST-CALL: auto-recall hints (~0ms, in-memory) ──
|
|
181
1103
|
const hints = [];
|
|
182
|
-
//
|
|
1104
|
+
// Playbook-aware hints (errors, selectors, job suggestions)
|
|
1105
|
+
for (const h of playbookHints) {
|
|
1106
|
+
hints.push(h);
|
|
1107
|
+
}
|
|
1108
|
+
// World model summary (window/control state)
|
|
1109
|
+
const wmSummary = worldModel.toSummary();
|
|
1110
|
+
if (wmSummary && worldModel.getState().windows.size > 0) {
|
|
1111
|
+
hints.push(`World: ${wmSummary.split("\n")[0]}`);
|
|
1112
|
+
}
|
|
1113
|
+
// Perception freshness
|
|
1114
|
+
if (perceptionManager.isRunning) {
|
|
1115
|
+
hints.push(perceptionManager.getFreshnessSummary());
|
|
1116
|
+
}
|
|
1117
|
+
// Learning engine recommendations
|
|
1118
|
+
const patternRec = learningEngine.recommendPattern(learnBundleId, toolName);
|
|
1119
|
+
if (patternRec) {
|
|
1120
|
+
const rate = ((patternRec.successCount / Math.max(1, patternRec.successCount + patternRec.failCount)) * 100).toFixed(0);
|
|
1121
|
+
hints.push(`Pattern: "${patternRec.locator}" (${patternRec.method}, ${rate}% over ${patternRec.successCount + patternRec.failCount} uses)`);
|
|
1122
|
+
}
|
|
1123
|
+
const learnLocator = learningEngine.recommendLocator(learnBundleId, toolName);
|
|
1124
|
+
if (learnLocator) {
|
|
1125
|
+
hints.push(`Learning: best locator for ${toolName} → "${learnLocator.locator}" (${learnLocator.method}, ${learnLocator.score.toFixed(2)} score, ${learnLocator.successCount}/${learnLocator.successCount + learnLocator.failCount} success)`);
|
|
1126
|
+
}
|
|
1127
|
+
const adaptiveBudget = learningEngine.getAdaptiveBudget(learnBundleId);
|
|
1128
|
+
if (adaptiveBudget.locateMs !== 800 || adaptiveBudget.actMs !== 200 || adaptiveBudget.verifyMs !== 2000) {
|
|
1129
|
+
hints.push(`Learning: adaptive budgets → locate=${adaptiveBudget.locateMs}ms, act=${adaptiveBudget.actMs}ms, verify=${adaptiveBudget.verifyMs}ms`);
|
|
1130
|
+
}
|
|
1131
|
+
// Warn about known errors for this tool (from memory)
|
|
183
1132
|
if (knownError) {
|
|
184
1133
|
hints.push(`⚡ Memory: "${toolName}" has failed before: "${knownError.error}" (${knownError.occurrences}x). Fix: ${knownError.resolution}`);
|
|
185
1134
|
}
|
|
186
1135
|
// Suggest next step if we're mid-strategy
|
|
187
1136
|
const recentTools = memory.getRecentToolNames();
|
|
188
|
-
const strategyHint = memory.quickStrategyHint(recentTools);
|
|
1137
|
+
const strategyHint = memory.quickStrategyHint(recentTools, worldModel.getState().focusedApp?.bundleId);
|
|
189
1138
|
if (strategyHint) {
|
|
190
1139
|
activeStrategyFingerprint = strategyHint.fingerprint;
|
|
191
1140
|
const nextParams = Object.keys(strategyHint.nextStep.params).length > 0
|
|
@@ -203,10 +1152,16 @@ server.tool = (...args) => {
|
|
|
203
1152
|
memory.recordStrategyOutcome(activeStrategyFingerprint, true);
|
|
204
1153
|
activeStrategyFingerprint = null;
|
|
205
1154
|
}
|
|
206
|
-
// Attach hints
|
|
1155
|
+
// Attach hints in BOTH content (visible) and _meta (for programmatic access)
|
|
207
1156
|
if (hints.length > 0) {
|
|
1157
|
+
const hintText = hints.join("\n");
|
|
1158
|
+
const resultContent = Array.isArray(result?.content) ? result.content : [];
|
|
208
1159
|
return {
|
|
209
1160
|
...result,
|
|
1161
|
+
content: [
|
|
1162
|
+
...resultContent,
|
|
1163
|
+
{ type: "text", text: `\n---\n${hintText}` },
|
|
1164
|
+
],
|
|
210
1165
|
_meta: { ...(result?._meta ?? {}), memoryHints: hints },
|
|
211
1166
|
};
|
|
212
1167
|
}
|
|
@@ -228,6 +1183,69 @@ server.tool = (...args) => {
|
|
|
228
1183
|
error: errorMsg,
|
|
229
1184
|
};
|
|
230
1185
|
memory.recordEvent(entry); // non-blocking write + session tracking
|
|
1186
|
+
// ── Record failure for playbook learning (in-memory only) ──
|
|
1187
|
+
contextTracker.recordOutcome(toolName, safeParams, false, errorMsg);
|
|
1188
|
+
// ── Feed learning engine (failure timing + locator) ──
|
|
1189
|
+
const learnBundleIdErr = worldModel.getState().focusedApp?.bundleId ?? lastKnownBundleId ?? "unknown";
|
|
1190
|
+
learningEngine.recordToolTiming({ tool: toolName, bundleId: learnBundleIdErr, durationMs, success: false });
|
|
1191
|
+
const failedLocator = safeParams.target ?? safeParams.selector ?? safeParams.locator
|
|
1192
|
+
?? (toolName === "click_text" ? safeParams.text : undefined);
|
|
1193
|
+
if (typeof failedLocator === "string" && failedLocator) {
|
|
1194
|
+
const method = toolName.startsWith("browser_") ? "cdp"
|
|
1195
|
+
: toolName.includes("ocr") ? "ocr"
|
|
1196
|
+
: "ax";
|
|
1197
|
+
learningEngine.recordLocatorOutcome({
|
|
1198
|
+
bundleId: learnBundleIdErr,
|
|
1199
|
+
actionKey: toolName,
|
|
1200
|
+
locator: failedLocator,
|
|
1201
|
+
method,
|
|
1202
|
+
success: false,
|
|
1203
|
+
});
|
|
1204
|
+
// Record failed pattern to patterns.jsonl
|
|
1205
|
+
learningEngine.recordPattern({
|
|
1206
|
+
bundleId: learnBundleIdErr,
|
|
1207
|
+
tool: toolName,
|
|
1208
|
+
locator: failedLocator,
|
|
1209
|
+
method,
|
|
1210
|
+
success: false,
|
|
1211
|
+
});
|
|
1212
|
+
}
|
|
1213
|
+
// ── POST-CALL: record failure in app mastery map ──
|
|
1214
|
+
if (learnBundleIdErr !== "unknown") {
|
|
1215
|
+
try {
|
|
1216
|
+
if (typeof failedLocator === "string" && failedLocator) {
|
|
1217
|
+
appMap.recordElementOutcome(learnBundleIdErr, "auto", failedLocator, false, contextTracker.currentPageContext ?? undefined);
|
|
1218
|
+
}
|
|
1219
|
+
// Record action failure
|
|
1220
|
+
const isFailedAction = ACTION_TOOLS.has(toolName);
|
|
1221
|
+
if (isFailedAction) {
|
|
1222
|
+
appMap.recordActionOutcome(learnBundleIdErr, false);
|
|
1223
|
+
}
|
|
1224
|
+
// Record feature signal failure (affects confidence and reliability)
|
|
1225
|
+
const failMapData = appMap.getLoaded(learnBundleIdErr);
|
|
1226
|
+
if (failMapData?.featureLadder) {
|
|
1227
|
+
const failSignal = [toolName, typeof failedLocator === "string" ? failedLocator : ""].join(" ").toLowerCase();
|
|
1228
|
+
const failGeneratedSignals = appMap.getGeneratedSignals(learnBundleIdErr) ?? {};
|
|
1229
|
+
for (const feature of failMapData.featureLadder) {
|
|
1230
|
+
const fm = failMapData.featureMastery?.[feature.id];
|
|
1231
|
+
if (!fm || fm.depth === 0)
|
|
1232
|
+
continue; // Only track failures on features we've seen
|
|
1233
|
+
// Check feature ID match OR keyword match (same as success path)
|
|
1234
|
+
const featureInSignal = failSignal.includes(feature.id.replace(/_/g, " "));
|
|
1235
|
+
const keywords = failGeneratedSignals[feature.id];
|
|
1236
|
+
const keywordMatch = keywords?.some((kw) => failSignal.includes(kw));
|
|
1237
|
+
if (featureInSignal || keywordMatch) {
|
|
1238
|
+
appMap.recordFeatureSignal(learnBundleIdErr, feature.id, fm.depth, false);
|
|
1239
|
+
}
|
|
1240
|
+
}
|
|
1241
|
+
}
|
|
1242
|
+
}
|
|
1243
|
+
catch { /* app map update non-fatal */ }
|
|
1244
|
+
}
|
|
1245
|
+
// ── Capture failure for playbook recording ──
|
|
1246
|
+
if (mcpRecorder.isRecording) {
|
|
1247
|
+
mcpRecorder.captureToolCall(toolName, safeParams, false, errorMsg, durationMs);
|
|
1248
|
+
}
|
|
231
1249
|
// Record strategy failure if we were following one
|
|
232
1250
|
if (activeStrategyFingerprint) {
|
|
233
1251
|
memory.recordStrategyOutcome(activeStrategyFingerprint, false);
|
|
@@ -252,6 +1270,9 @@ server.tool = (...args) => {
|
|
|
252
1270
|
}
|
|
253
1271
|
throw err;
|
|
254
1272
|
}
|
|
1273
|
+
finally {
|
|
1274
|
+
currentAdaptiveBudget = null;
|
|
1275
|
+
}
|
|
255
1276
|
};
|
|
256
1277
|
const newArgs = [...args];
|
|
257
1278
|
newArgs[handlerIdx] = wrappedHandler;
|
|
@@ -263,31 +1284,236 @@ server.tool = (...args) => {
|
|
|
263
1284
|
server.tool("apps", "List all running applications with bundle IDs and PIDs", {}, async () => {
|
|
264
1285
|
await ensureBridge();
|
|
265
1286
|
const apps = await bridge.call("app.list");
|
|
1287
|
+
// L3-04 fix: Some Electron apps (Slack, Discord) don't appear in NSWorkspace.runningApplications
|
|
1288
|
+
// despite being visible with windows. Augment with frontmost app if missing from list.
|
|
1289
|
+
try {
|
|
1290
|
+
const front = await bridge.call("app.frontmost", {});
|
|
1291
|
+
if (front.pid && !apps.some((a) => a.pid === front.pid)) {
|
|
1292
|
+
apps.push({ ...front, isActive: true });
|
|
1293
|
+
}
|
|
1294
|
+
}
|
|
1295
|
+
catch { /* ignore */ }
|
|
1296
|
+
// Also augment from window list — any app with visible windows should appear.
|
|
1297
|
+
// Filter out XPC services and system helpers that own tiny overlay windows.
|
|
1298
|
+
try {
|
|
1299
|
+
const wins = await bridge.call("app.windows");
|
|
1300
|
+
const appPids = new Set(apps.map((a) => a.pid));
|
|
1301
|
+
const seenWinPids = new Set();
|
|
1302
|
+
for (const w of wins) {
|
|
1303
|
+
const wPid = w.pid || w.ownerPid;
|
|
1304
|
+
const bid = w.bundleId || "";
|
|
1305
|
+
// Skip XPC services, system helpers, and loginwindow — not real user apps
|
|
1306
|
+
if (!wPid || appPids.has(wPid) || seenWinPids.has(wPid))
|
|
1307
|
+
continue;
|
|
1308
|
+
if (bid.includes(".xpc.") || bid === "com.apple.loginwindow" || bid === "unknown" || bid === "")
|
|
1309
|
+
continue;
|
|
1310
|
+
// Only include if the window has meaningful size (>50x50)
|
|
1311
|
+
const b = w.bounds || {};
|
|
1312
|
+
if ((b.width || 0) < 50 || (b.height || 0) < 50)
|
|
1313
|
+
continue;
|
|
1314
|
+
seenWinPids.add(wPid);
|
|
1315
|
+
apps.push({
|
|
1316
|
+
bundleId: bid,
|
|
1317
|
+
name: w.appName || "Unknown",
|
|
1318
|
+
pid: wPid,
|
|
1319
|
+
isActive: false,
|
|
1320
|
+
});
|
|
1321
|
+
}
|
|
1322
|
+
}
|
|
1323
|
+
catch { /* ignore */ }
|
|
266
1324
|
const lines = apps.map((a) => `${a.name} (${a.bundleId}) pid=${a.pid}${a.isActive ? " ← active" : ""}`);
|
|
267
1325
|
return { content: [{ type: "text", text: lines.join("\n") }] };
|
|
268
1326
|
});
|
|
269
1327
|
server.tool("windows", "List all visible windows with IDs, positions, and sizes", {}, async () => {
|
|
270
1328
|
await ensureBridge();
|
|
271
1329
|
const wins = await bridge.call("app.windows");
|
|
272
|
-
|
|
1330
|
+
// Filter to meaningful windows: must have a title or reasonable size (>50x50)
|
|
1331
|
+
const meaningful = wins.filter((w) => {
|
|
1332
|
+
const b = w.bounds || {};
|
|
1333
|
+
const hasTitle = w.title && w.title.length > 0;
|
|
1334
|
+
const hasSize = (b.width || 0) > 50 && (b.height || 0) > 50;
|
|
1335
|
+
return hasTitle || hasSize;
|
|
1336
|
+
});
|
|
1337
|
+
const lines = meaningful.map((w) => {
|
|
273
1338
|
const b = w.bounds || {};
|
|
274
|
-
|
|
1339
|
+
const onScreen = w.isOnScreen === false ? " [minimized]" : "";
|
|
1340
|
+
return `[${w.windowId}] ${w.appName} "${w.title}" (${Math.round(b.x || 0)},${Math.round(b.y || 0)}) ${Math.round(b.width || 0)}x${Math.round(b.height || 0)}${onScreen}`;
|
|
275
1341
|
});
|
|
276
1342
|
return { content: [{ type: "text", text: lines.join("\n") }] };
|
|
277
1343
|
});
|
|
278
|
-
server.tool("focus", "Focus/activate an application", {
|
|
1344
|
+
server.tool("focus", "Focus/activate an application (or a specific window by windowId)", {
|
|
279
1345
|
bundleId: z.string().describe("App bundle ID, e.g. com.apple.Safari"),
|
|
280
|
-
|
|
1346
|
+
windowId: z.number().optional().describe("Specific window ID from windows() — raises that exact window. Use when multiple instances of the same app exist."),
|
|
1347
|
+
}, async ({ bundleId, windowId }) => {
|
|
281
1348
|
await ensureBridge();
|
|
282
|
-
|
|
283
|
-
|
|
1349
|
+
// Serialize focus calls — only one can run at a time since only one app can be frontmost.
|
|
1350
|
+
// Without this, N concurrent focus() calls generate N*5 bridge calls that crash the bridge.
|
|
1351
|
+
let resolve;
|
|
1352
|
+
const prev = focusLock;
|
|
1353
|
+
focusLock = new Promise(r => { resolve = r; });
|
|
1354
|
+
await prev;
|
|
1355
|
+
try {
|
|
1356
|
+
// Step 0: Verify the app is actually running — fail fast with error content
|
|
1357
|
+
const runningApps = await bridge.call("app.list", {});
|
|
1358
|
+
let targetApp = runningApps?.find((a) => a.bundleId === bundleId);
|
|
1359
|
+
if (!targetApp) {
|
|
1360
|
+
// L3-04 fix: Some Electron apps (Slack, Discord) don't appear in app.list.
|
|
1361
|
+
// Check if they have visible windows before rejecting.
|
|
1362
|
+
try {
|
|
1363
|
+
const wins = await bridge.call("app.windows");
|
|
1364
|
+
const appWin = wins?.find((w) => w.bundleId === bundleId);
|
|
1365
|
+
if (appWin) {
|
|
1366
|
+
targetApp = { bundleId, name: appWin.appName, pid: appWin.pid || appWin.ownerPid };
|
|
1367
|
+
}
|
|
1368
|
+
}
|
|
1369
|
+
catch { /* ignore */ }
|
|
1370
|
+
if (!targetApp) {
|
|
1371
|
+
return { content: [{ type: "text", text: `Error: ${bundleId} is not running. Use launch("${bundleId}") first.` }], isError: true };
|
|
1372
|
+
}
|
|
1373
|
+
}
|
|
1374
|
+
// Step 1: Focus — use window.focus(windowId) when provided (L3-01 fix: precise window targeting)
|
|
1375
|
+
// This solves multi-instance Electron apps where bundleId-based focus raises the wrong window.
|
|
1376
|
+
let bridgeFocusError;
|
|
1377
|
+
try {
|
|
1378
|
+
if (windowId != null) {
|
|
1379
|
+
await bridge.call("window.focus", { windowId });
|
|
1380
|
+
}
|
|
1381
|
+
else {
|
|
1382
|
+
await bridge.call("app.focus", { bundleId });
|
|
1383
|
+
}
|
|
1384
|
+
}
|
|
1385
|
+
catch (e) {
|
|
1386
|
+
bridgeFocusError = e?.message ?? String(e);
|
|
1387
|
+
}
|
|
1388
|
+
// Step 2: Verify IMMEDIATELY — 150ms settle for macOS window server async transition.
|
|
1389
|
+
// 50ms was too short on cold start; 150ms handles even first-launch activation delays.
|
|
1390
|
+
await new Promise(r => setTimeout(r, 150));
|
|
1391
|
+
let focusMsg = "Focused " + bundleId;
|
|
1392
|
+
try {
|
|
1393
|
+
const front = await bridge.call("app.frontmost", {});
|
|
1394
|
+
if (front.bundleId !== bundleId) {
|
|
1395
|
+
// MCP-level retry: AppleScript activation as final fallback
|
|
1396
|
+
try {
|
|
1397
|
+
await bridge.call("as.run", { script: `tell application id "${bundleId}" to activate` });
|
|
1398
|
+
await new Promise(r => setTimeout(r, 200));
|
|
1399
|
+
const front2 = await bridge.call("app.frontmost", {});
|
|
1400
|
+
if (front2.bundleId === bundleId) {
|
|
1401
|
+
focusMsg = "Focused " + bundleId;
|
|
1402
|
+
}
|
|
1403
|
+
else {
|
|
1404
|
+
focusMsg = `Warning: focus requested for ${bundleId} but ${front2.bundleId} (${front2.name}) is frontmost. Try again or use launch() first.`;
|
|
1405
|
+
}
|
|
1406
|
+
}
|
|
1407
|
+
catch {
|
|
1408
|
+
focusMsg = `Warning: focus requested for ${bundleId} but ${front.bundleId} (${front.name}) is frontmost. Try again or use launch() first.`;
|
|
1409
|
+
}
|
|
1410
|
+
}
|
|
1411
|
+
}
|
|
1412
|
+
catch {
|
|
1413
|
+
if (bridgeFocusError) {
|
|
1414
|
+
focusMsg = `Warning: ${bridgeFocusError}. Call apps() to check if ${bundleId} is running.`;
|
|
1415
|
+
}
|
|
1416
|
+
}
|
|
1417
|
+
// Step 3: World model + perception (best-effort, after verification)
|
|
1418
|
+
try {
|
|
1419
|
+
const apps = await bridge.call("app.list", {});
|
|
1420
|
+
const app = apps?.find((a) => a.bundleId === bundleId);
|
|
1421
|
+
if (app) {
|
|
1422
|
+
let windowId;
|
|
1423
|
+
try {
|
|
1424
|
+
windowId = await resolveWindowId(app.pid);
|
|
1425
|
+
}
|
|
1426
|
+
catch { /* best-effort */ }
|
|
1427
|
+
if (windowId != null) {
|
|
1428
|
+
try {
|
|
1429
|
+
await bridge.call("window.focus", { windowId });
|
|
1430
|
+
}
|
|
1431
|
+
catch { /* best-effort */ }
|
|
1432
|
+
}
|
|
1433
|
+
const ctx = { bundleId, appName: app.name ?? bundleId, pid: app.pid, windowTitle: "", ...(windowId != null ? { windowId } : {}) };
|
|
1434
|
+
worldModel.updateFocusedApp(ctx);
|
|
1435
|
+
lastKnownBundleId = bundleId;
|
|
1436
|
+
try {
|
|
1437
|
+
await perceptionManager.ensureStarted(ctx);
|
|
1438
|
+
installSafariEnricher(bundleId);
|
|
1439
|
+
}
|
|
1440
|
+
catch { /* best-effort */ }
|
|
1441
|
+
}
|
|
1442
|
+
}
|
|
1443
|
+
catch { /* app.list failed — world model update is best-effort */ }
|
|
1444
|
+
return { content: [{ type: "text", text: focusMsg }] };
|
|
1445
|
+
}
|
|
1446
|
+
finally {
|
|
1447
|
+
resolve();
|
|
1448
|
+
}
|
|
284
1449
|
});
|
|
285
|
-
server.tool("launch", "Launch an application", {
|
|
1450
|
+
server.tool("launch", "Launch an application. Chrome/Chromium browsers are launched with CDP enabled (port 9222) for browser_* tools.", {
|
|
286
1451
|
bundleId: z.string().describe("App bundle ID"),
|
|
287
|
-
|
|
1452
|
+
cdpPort: z.number().optional().describe("CDP port for Chrome/Chromium (default: 9222). Ignored for non-browser apps."),
|
|
1453
|
+
}, async ({ bundleId, cdpPort }) => {
|
|
288
1454
|
await ensureBridge();
|
|
289
|
-
const
|
|
290
|
-
|
|
1455
|
+
const riskyBundleIds = {
|
|
1456
|
+
"com.apple.Terminal": "Terminal",
|
|
1457
|
+
"com.apple.ScriptEditor2": "Script Editor",
|
|
1458
|
+
"com.googlecode.iterm2": "iTerm",
|
|
1459
|
+
"com.apple.ActivityMonitor": "Activity Monitor",
|
|
1460
|
+
};
|
|
1461
|
+
// Chrome/Chromium: launch with CDP enabled so browser_* tools work immediately
|
|
1462
|
+
const chromeBundleIds = {
|
|
1463
|
+
"com.google.Chrome": "Google Chrome",
|
|
1464
|
+
"com.google.Chrome.canary": "Google Chrome Canary",
|
|
1465
|
+
"com.brave.Browser": "Brave Browser",
|
|
1466
|
+
"com.microsoft.edgemac": "Microsoft Edge",
|
|
1467
|
+
"org.chromium.Chromium": "Chromium",
|
|
1468
|
+
};
|
|
1469
|
+
const chromeAppName = chromeBundleIds[bundleId];
|
|
1470
|
+
let r;
|
|
1471
|
+
if (chromeAppName) {
|
|
1472
|
+
const port = cdpPort ?? 9222;
|
|
1473
|
+
try {
|
|
1474
|
+
// Spawn Chrome binary directly with --remote-debugging-port.
|
|
1475
|
+
// Must use a dedicated user-data-dir because Chrome ignores the CDP flag
|
|
1476
|
+
// when the default profile is already locked by a previous instance.
|
|
1477
|
+
const { spawn } = await import("child_process");
|
|
1478
|
+
const os = await import("os");
|
|
1479
|
+
const chromeBinary = `/Applications/${chromeAppName}.app/Contents/MacOS/${chromeAppName}`;
|
|
1480
|
+
const cdpProfile = `${os.tmpdir()}/screenhand-cdp-${port}`;
|
|
1481
|
+
const proc = spawn(chromeBinary, [
|
|
1482
|
+
`--remote-debugging-port=${port}`,
|
|
1483
|
+
`--user-data-dir=${cdpProfile}`,
|
|
1484
|
+
], { detached: true, stdio: "ignore" });
|
|
1485
|
+
proc.unref();
|
|
1486
|
+
// Wait for Chrome to start, then get its PID
|
|
1487
|
+
await new Promise(res => setTimeout(res, 1500));
|
|
1488
|
+
const apps = await bridge.call("app.list", {});
|
|
1489
|
+
const chromeApp = apps?.find((a) => a.bundleId === bundleId);
|
|
1490
|
+
r = { pid: chromeApp?.pid ?? 0, appName: chromeApp?.name ?? bundleId };
|
|
1491
|
+
}
|
|
1492
|
+
catch {
|
|
1493
|
+
// Fallback to normal launch if CDP launch fails
|
|
1494
|
+
r = await bridge.call("app.launch", { bundleId });
|
|
1495
|
+
}
|
|
1496
|
+
}
|
|
1497
|
+
else {
|
|
1498
|
+
r = await bridge.call("app.launch", { bundleId });
|
|
1499
|
+
}
|
|
1500
|
+
const riskyName = riskyBundleIds[bundleId];
|
|
1501
|
+
// Auto-start perception for the launched app
|
|
1502
|
+
try {
|
|
1503
|
+
const windowId = await resolveWindowId(r.pid);
|
|
1504
|
+
await perceptionManager.ensureStarted({ bundleId, appName: r.appName ?? bundleId, pid: r.pid, windowTitle: "", ...(windowId != null ? { windowId } : {}) });
|
|
1505
|
+
installSafariEnricher(bundleId);
|
|
1506
|
+
}
|
|
1507
|
+
catch { /* perception start is best-effort */ }
|
|
1508
|
+
let msg = `Launched ${r.appName} pid=${r.pid}`;
|
|
1509
|
+
if (chromeAppName) {
|
|
1510
|
+
const port = cdpPort ?? 9222;
|
|
1511
|
+
msg += `\nCDP enabled on port ${port} — browser_* tools ready`;
|
|
1512
|
+
}
|
|
1513
|
+
if (riskyName) {
|
|
1514
|
+
msg += `\nWarning: launching ${riskyName} \u2014 this app can execute arbitrary commands`;
|
|
1515
|
+
}
|
|
1516
|
+
return { content: [{ type: "text", text: msg }] };
|
|
291
1517
|
});
|
|
292
1518
|
// ═══════════════════════════════════════════════
|
|
293
1519
|
// INSPECT — see what's on screen (debugging/design)
|
|
@@ -298,12 +1524,27 @@ server.tool("screenshot", "Take a screenshot and OCR it. Returns all visible tex
|
|
|
298
1524
|
await ensureBridge();
|
|
299
1525
|
let shot;
|
|
300
1526
|
if (windowId) {
|
|
301
|
-
shot = await bridge.call("cg.captureWindow", { windowId });
|
|
1527
|
+
shot = await bridge.call("cg.captureWindow", { windowId, safeCLI: isBrowserApp() });
|
|
302
1528
|
}
|
|
303
1529
|
else {
|
|
304
1530
|
shot = await bridge.call("cg.captureScreen");
|
|
305
1531
|
}
|
|
306
1532
|
const ocr = await bridge.call("vision.ocr", { imagePath: shot.path });
|
|
1533
|
+
// Feed OCR regions into world model
|
|
1534
|
+
try {
|
|
1535
|
+
if (windowId && Array.isArray(ocr.regions) && ocr.regions.length > 0) {
|
|
1536
|
+
worldModel.ingestOCRRegions(windowId, ocr.regions.map((r) => ({
|
|
1537
|
+
text: r.text,
|
|
1538
|
+
bounds: {
|
|
1539
|
+
x: r.bounds.x,
|
|
1540
|
+
y: r.bounds.y,
|
|
1541
|
+
width: r.bounds.width,
|
|
1542
|
+
height: r.bounds.height,
|
|
1543
|
+
},
|
|
1544
|
+
})));
|
|
1545
|
+
}
|
|
1546
|
+
}
|
|
1547
|
+
catch { /* world model update is best-effort */ }
|
|
307
1548
|
return { content: [{ type: "text", text: `Screenshot: ${shot.width}x${shot.height} (${shot.path})\n\n${ocr.text}` }] };
|
|
308
1549
|
});
|
|
309
1550
|
server.tool("screenshot_file", "Take a screenshot and return the file path (for viewing the actual image)", {
|
|
@@ -312,7 +1553,7 @@ server.tool("screenshot_file", "Take a screenshot and return the file path (for
|
|
|
312
1553
|
await ensureBridge();
|
|
313
1554
|
let shot;
|
|
314
1555
|
if (windowId) {
|
|
315
|
-
shot = await bridge.call("cg.captureWindow", { windowId });
|
|
1556
|
+
shot = await bridge.call("cg.captureWindow", { windowId, safeCLI: isBrowserApp() });
|
|
316
1557
|
}
|
|
317
1558
|
else {
|
|
318
1559
|
shot = await bridge.call("cg.captureScreen");
|
|
@@ -325,7 +1566,7 @@ server.tool("ocr", "OCR a window with element positions. SLOW — prefer ui_tree
|
|
|
325
1566
|
await ensureBridge();
|
|
326
1567
|
let shot;
|
|
327
1568
|
if (windowId) {
|
|
328
|
-
shot = await bridge.call("cg.captureWindow", { windowId });
|
|
1569
|
+
shot = await bridge.call("cg.captureWindow", { windowId, safeCLI: isBrowserApp() });
|
|
329
1570
|
}
|
|
330
1571
|
else {
|
|
331
1572
|
shot = await bridge.call("cg.captureScreen");
|
|
@@ -337,7 +1578,28 @@ server.tool("ocr", "OCR a window with element positions. SLOW — prefer ui_tree
|
|
|
337
1578
|
const win = wins.find((w) => w.windowId === windowId);
|
|
338
1579
|
winBounds = win?.bounds;
|
|
339
1580
|
}
|
|
340
|
-
const regions = ocr.regions.map((r) =>
|
|
1581
|
+
const regions = ocr.regions.map((r) => {
|
|
1582
|
+
let text = redactSensitiveLabel(r.text);
|
|
1583
|
+
text = redactUsername(text);
|
|
1584
|
+
// Redact URLs in OCR text
|
|
1585
|
+
text = text.replace(/https?:\/\/[^\s"'`]+/g, (url) => sanitizeUrl(url));
|
|
1586
|
+
return `"${text}" (${Math.round(r.bounds.x)},${Math.round(r.bounds.y)}) ${Math.round(r.bounds.width)}x${Math.round(r.bounds.height)}`;
|
|
1587
|
+
});
|
|
1588
|
+
// Feed OCR regions into world model
|
|
1589
|
+
try {
|
|
1590
|
+
if (windowId && Array.isArray(ocr.regions) && ocr.regions.length > 0) {
|
|
1591
|
+
worldModel.ingestOCRRegions(windowId, ocr.regions.map((r) => ({
|
|
1592
|
+
text: r.text,
|
|
1593
|
+
bounds: {
|
|
1594
|
+
x: r.bounds.x,
|
|
1595
|
+
y: r.bounds.y,
|
|
1596
|
+
width: r.bounds.width,
|
|
1597
|
+
height: r.bounds.height,
|
|
1598
|
+
},
|
|
1599
|
+
})));
|
|
1600
|
+
}
|
|
1601
|
+
}
|
|
1602
|
+
catch { /* world model update is best-effort */ }
|
|
341
1603
|
return {
|
|
342
1604
|
content: [{
|
|
343
1605
|
type: "text",
|
|
@@ -358,13 +1620,32 @@ server.tool("ui_tree", "PREFERRED: Get the full UI element tree of an app via Ac
|
|
|
358
1620
|
maxDepth: z.number().optional().describe("Max depth (default 4). Use 2 for overview, 6+ for deep inspection."),
|
|
359
1621
|
}, async ({ pid, maxDepth }) => {
|
|
360
1622
|
await ensureBridge();
|
|
1623
|
+
// Check if PID is running before querying AX tree (L3-04: uses fallback checks)
|
|
1624
|
+
if (!(await isPidRunning(pid))) {
|
|
1625
|
+
return { content: [{ type: "text", text: `PID ${pid} is not running. Call apps() to get current PIDs.` }] };
|
|
1626
|
+
}
|
|
361
1627
|
const tree = await bridge.call("ax.getElementTree", { pid, maxDepth: maxDepth || 4 });
|
|
1628
|
+
// Feed AX tree into world model for state tracking
|
|
1629
|
+
try {
|
|
1630
|
+
const wins = await bridge.call("window.list", {});
|
|
1631
|
+
const win = wins?.find((w) => w.pid === pid);
|
|
1632
|
+
if (win) {
|
|
1633
|
+
worldModel.ingestAXTree(win.windowId, tree, {
|
|
1634
|
+
bundleId: win.bundleId ?? "",
|
|
1635
|
+
appName: win.bundleId ?? "",
|
|
1636
|
+
pid,
|
|
1637
|
+
windowTitle: win.title ?? "",
|
|
1638
|
+
windowId: win.windowId,
|
|
1639
|
+
});
|
|
1640
|
+
}
|
|
1641
|
+
}
|
|
1642
|
+
catch { /* ignore — world model update is best-effort */ }
|
|
362
1643
|
function format(node, depth) {
|
|
363
1644
|
let line = " ".repeat(depth) + (node.role || "?");
|
|
364
1645
|
if (node.title)
|
|
365
1646
|
line += ` "${node.title}"`;
|
|
366
1647
|
if (node.value)
|
|
367
|
-
line += ` =${String(node.value).slice(0,
|
|
1648
|
+
line += ` =${String(node.value).slice(0, 200)}`;
|
|
368
1649
|
if (node.bounds)
|
|
369
1650
|
line += ` (${Math.round(node.bounds.x)},${Math.round(node.bounds.y)} ${Math.round(node.bounds.width)}x${Math.round(node.bounds.height)})`;
|
|
370
1651
|
let result = line;
|
|
@@ -374,34 +1655,107 @@ server.tool("ui_tree", "PREFERRED: Get the full UI element tree of an app via Ac
|
|
|
374
1655
|
}
|
|
375
1656
|
return result;
|
|
376
1657
|
}
|
|
377
|
-
return { content: [{ type: "text", text: format(tree, 0) }] };
|
|
1658
|
+
return { content: [{ type: "text", text: redactUsername(format(tree, 0)) }] };
|
|
378
1659
|
});
|
|
379
|
-
server.tool("ui_find", "Find a specific UI element by text
|
|
1660
|
+
server.tool("ui_find", "Find a specific UI element by text, title, or value. Falls back to value search if title match fails (e.g. finds Safari URL bar by URL).", {
|
|
380
1661
|
pid: z.number().describe("Process ID"),
|
|
381
|
-
title: z.string().describe("Text to search for (partial match)"),
|
|
382
|
-
|
|
1662
|
+
title: z.string().describe("Text to search for — matches title first, then value (partial match)"),
|
|
1663
|
+
role: z.string().optional().describe("AX role filter, e.g. AXButton, AXMenuItem, AXTextField"),
|
|
1664
|
+
exact: z.boolean().optional().default(false).describe("Exact title match (default: partial)"),
|
|
1665
|
+
}, async ({ pid, title, role, exact }) => {
|
|
383
1666
|
await ensureBridge();
|
|
384
|
-
|
|
1667
|
+
if (!(await isPidRunning(pid))) {
|
|
1668
|
+
return { content: [{ type: "text", text: `PID ${pid} is not running. Call apps() to get current PIDs.` }] };
|
|
1669
|
+
}
|
|
1670
|
+
let r;
|
|
1671
|
+
try {
|
|
1672
|
+
r = await bridge.call("ax.findElement", { pid, title, exact, ...(role ? { role } : {}) });
|
|
1673
|
+
}
|
|
1674
|
+
catch {
|
|
1675
|
+
// Title search failed — retry searching by value (e.g. AXTextField with URL as value)
|
|
1676
|
+
r = await bridge.call("ax.findElement", { pid, value: title, exact, ...(role ? { role } : {}) });
|
|
1677
|
+
}
|
|
1678
|
+
// Feed found element into world model as a minimal AX subtree
|
|
1679
|
+
try {
|
|
1680
|
+
if (r && r.role) {
|
|
1681
|
+
const wins = await bridge.call("window.list", {});
|
|
1682
|
+
const win = wins?.find((w) => w.pid === pid);
|
|
1683
|
+
if (win) {
|
|
1684
|
+
const subtree = {
|
|
1685
|
+
role: r.role,
|
|
1686
|
+
title: r.title ?? null,
|
|
1687
|
+
value: r.value ?? null,
|
|
1688
|
+
enabled: r.enabled ?? true,
|
|
1689
|
+
focused: r.focused ?? false,
|
|
1690
|
+
children: r.children ?? [],
|
|
1691
|
+
};
|
|
1692
|
+
if (r.bounds) {
|
|
1693
|
+
subtree.position = { x: r.bounds.x, y: r.bounds.y };
|
|
1694
|
+
subtree.size = { width: r.bounds.width, height: r.bounds.height };
|
|
1695
|
+
}
|
|
1696
|
+
worldModel.ingestAXTree(win.windowId, subtree, {
|
|
1697
|
+
bundleId: win.bundleId ?? "",
|
|
1698
|
+
appName: win.bundleId ?? "",
|
|
1699
|
+
pid,
|
|
1700
|
+
windowTitle: win.title ?? "",
|
|
1701
|
+
windowId: win.windowId,
|
|
1702
|
+
});
|
|
1703
|
+
}
|
|
1704
|
+
}
|
|
1705
|
+
}
|
|
1706
|
+
catch { /* world model update is best-effort */ }
|
|
385
1707
|
return { content: [{ type: "text", text: JSON.stringify(r, null, 2) }] };
|
|
386
1708
|
});
|
|
387
1709
|
server.tool("ui_press", "PREFERRED: Find and press/click a UI element by its title via Accessibility. Faster and more reliable than click_text — no screenshot needed.", {
|
|
388
1710
|
pid: z.number().describe("Process ID"),
|
|
389
1711
|
title: z.string().describe("Element title to find and press"),
|
|
390
|
-
|
|
1712
|
+
role: z.string().optional().describe("AX role filter, e.g. AXButton, AXMenuItem, AXTextField"),
|
|
1713
|
+
exact: z.boolean().optional().default(false).describe("Exact title match (default: partial)"),
|
|
1714
|
+
}, async ({ pid, title, role, exact }) => {
|
|
391
1715
|
await ensureBridge();
|
|
392
|
-
|
|
1716
|
+
if (!(await isPidRunning(pid))) {
|
|
1717
|
+
return { content: [{ type: "text", text: `PID ${pid} is not running. Call apps() to get current PIDs.` }] };
|
|
1718
|
+
}
|
|
1719
|
+
let el;
|
|
1720
|
+
try {
|
|
1721
|
+
el = await bridge.call("ax.findElement", { pid, title, exact, ...(role ? { role } : {}) });
|
|
1722
|
+
}
|
|
1723
|
+
catch {
|
|
1724
|
+
try {
|
|
1725
|
+
// Fallback: search by value (buttons/controls may have value instead of title)
|
|
1726
|
+
el = await bridge.call("ax.findElement", { pid, value: title, exact, ...(role ? { role } : {}) });
|
|
1727
|
+
}
|
|
1728
|
+
catch {
|
|
1729
|
+
// Check if a system dialog is blocking — different process owns the frontmost window
|
|
1730
|
+
try {
|
|
1731
|
+
const front = await bridge.call("app.frontmost", {});
|
|
1732
|
+
if (front.pid !== pid) {
|
|
1733
|
+
return { content: [{ type: "text", text: `Element "${title}" not found in PID ${pid}. A system dialog from "${front.name}" (${front.bundleId}, PID ${front.pid}) may be blocking. Dismiss it first, or use click(x, y) to interact with the dialog directly.` }], isError: true };
|
|
1734
|
+
}
|
|
1735
|
+
}
|
|
1736
|
+
catch { /* ignore frontmost check failure */ }
|
|
1737
|
+
throw new Error(`Element "${title}" not found (searched title, value, and description)`);
|
|
1738
|
+
}
|
|
1739
|
+
}
|
|
393
1740
|
await bridge.call("ax.performAction", { pid, elementPath: el.elementPath, action: "AXPress" });
|
|
394
|
-
return { content: [{ type: "text", text: `Pressed "${el.title}" (${el.role})` }] };
|
|
1741
|
+
return { content: [{ type: "text", text: `Pressed "${el.title || el.description || el.value}" (${el.role})` }] };
|
|
395
1742
|
});
|
|
396
|
-
server.tool("ui_set_value", "Set the value of a UI element (text field, slider, etc.)", {
|
|
1743
|
+
server.tool("ui_set_value", "Set the value of a UI element (text field, slider, etc.). Searches by title first, falls back to value match.", {
|
|
397
1744
|
pid: z.number().describe("Process ID"),
|
|
398
1745
|
title: z.string().describe("Element title to find"),
|
|
399
1746
|
value: z.string().describe("Value to set"),
|
|
400
1747
|
}, async ({ pid, title, value }) => {
|
|
401
1748
|
await ensureBridge();
|
|
402
|
-
|
|
1749
|
+
let el;
|
|
1750
|
+
try {
|
|
1751
|
+
el = await bridge.call("ax.findElement", { pid, title, exact: false });
|
|
1752
|
+
}
|
|
1753
|
+
catch {
|
|
1754
|
+
// Fallback: search by value (combo boxes, text fields often have no title)
|
|
1755
|
+
el = await bridge.call("ax.findElement", { pid, value: title, exact: false });
|
|
1756
|
+
}
|
|
403
1757
|
await bridge.call("ax.setElementValue", { pid, elementPath: el.elementPath, value });
|
|
404
|
-
return { content: [{ type: "text", text: `Set "${el.title}" = "${value}"` }] };
|
|
1758
|
+
return { content: [{ type: "text", text: `Set "${el.title || el.value}" = "${value}"` }] };
|
|
405
1759
|
});
|
|
406
1760
|
server.tool("menu_click", "Click a menu item in an app's menu bar", {
|
|
407
1761
|
pid: z.number().describe("Process ID"),
|
|
@@ -417,73 +1771,309 @@ server.tool("menu_click", "Click a menu item in an app's menu bar", {
|
|
|
417
1771
|
server.tool("click", "Click at screen coordinates", {
|
|
418
1772
|
x: z.number().describe("Screen X"),
|
|
419
1773
|
y: z.number().describe("Screen Y"),
|
|
420
|
-
|
|
1774
|
+
button: z.enum(["left", "right", "middle"]).optional().default("left").describe("Mouse button (default: left)"),
|
|
1775
|
+
clickCount: z.number().optional().default(1).describe("Click count: 1=single, 2=double (word select), 3=triple (line select)"),
|
|
1776
|
+
modifiers: z.array(z.enum(["cmd", "shift", "alt", "ctrl"])).optional().describe("Hold modifier keys during click (e.g. ['cmd'] for cmd+click, ['shift'] for shift+click)"),
|
|
1777
|
+
pid: z.number().optional().describe("Target process ID for PID-targeted event delivery"),
|
|
1778
|
+
}, async ({ x, y, button, clickCount, modifiers, pid }) => {
|
|
421
1779
|
await ensureBridge();
|
|
422
|
-
await bridge.call("cg.mouseMove", { x, y });
|
|
1780
|
+
await bridge.call("cg.mouseMove", { x, y, targetPid: pid });
|
|
423
1781
|
await new Promise(r => setTimeout(r, 50));
|
|
424
|
-
await bridge.call("cg.mouseClick", { x, y });
|
|
425
|
-
|
|
1782
|
+
await bridge.call("cg.mouseClick", { x, y, button: button || "left", clickCount: clickCount || 1, modifiers: modifiers || [], targetPid: pid });
|
|
1783
|
+
const extras = [];
|
|
1784
|
+
if (modifiers?.length)
|
|
1785
|
+
extras.push(modifiers.join("+"));
|
|
1786
|
+
if (button && button !== "left")
|
|
1787
|
+
extras.push(button);
|
|
1788
|
+
if (clickCount && clickCount > 1)
|
|
1789
|
+
extras.push(clickCount === 2 ? "double" : `${clickCount}x`);
|
|
1790
|
+
return { content: [{ type: "text", text: `Clicked (${x}, ${y})${extras.length ? ` [${extras.join(", ")}]` : ""}` }] };
|
|
426
1791
|
});
|
|
427
1792
|
server.tool("click_text", "SLOW fallback: Find text on screen via OCR and click it. Use ui_press instead when possible — it's 10x faster. Only use this for canvas/image content where Accessibility doesn't work.", {
|
|
428
1793
|
windowId: z.number().describe("Window ID"),
|
|
429
1794
|
text: z.string().describe("Text to find and click"),
|
|
430
1795
|
offset_y: z.number().optional().describe("Y offset from text center (e.g. -25 for icon above label)"),
|
|
431
|
-
|
|
1796
|
+
prefer: z.enum(["first", "largest", "topmost", "leftmost"]).optional().default("first").describe("Match preference when multiple OCR hits: largest (headers), topmost, leftmost (sidebar), first (OCR order)"),
|
|
1797
|
+
}, async ({ windowId, text, offset_y, prefer }) => {
|
|
432
1798
|
await ensureBridge();
|
|
433
1799
|
const wins = await bridge.call("app.windows");
|
|
434
1800
|
const win = wins.find((w) => w.windowId === windowId);
|
|
435
1801
|
if (!win)
|
|
436
1802
|
return { content: [{ type: "text", text: "Window not found" }] };
|
|
437
1803
|
const wb = win.bounds;
|
|
438
|
-
const shot = await bridge.call("cg.captureWindow", { windowId });
|
|
1804
|
+
const shot = await bridge.call("cg.captureWindow", { windowId, safeCLI: isBrowserApp() });
|
|
439
1805
|
const ocr = await bridge.call("vision.ocr", { imagePath: shot.path });
|
|
440
|
-
const
|
|
441
|
-
if (
|
|
442
|
-
return { content: [{ type: "text", text: `"${text}" not found. Available: ${ocr.regions.map((r) => r.text).slice(0, 20).join(", ")}` }] };
|
|
443
|
-
}
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
1806
|
+
const allMatches = ocr.regions.filter((r) => r.text.toLowerCase().includes(text.toLowerCase()));
|
|
1807
|
+
if (allMatches.length === 0) {
|
|
1808
|
+
return { content: [{ type: "text", text: `"${text}" not found. Available: ${ocr.regions.map((r) => r.text).slice(0, 20).join(", ")}` }], isError: true };
|
|
1809
|
+
}
|
|
1810
|
+
// Sort by preference strategy
|
|
1811
|
+
if (prefer === "largest") {
|
|
1812
|
+
allMatches.sort((a, b) => (b.bounds.width * b.bounds.height) - (a.bounds.width * a.bounds.height));
|
|
1813
|
+
}
|
|
1814
|
+
else if (prefer === "topmost") {
|
|
1815
|
+
allMatches.sort((a, b) => a.bounds.y - b.bounds.y);
|
|
1816
|
+
}
|
|
1817
|
+
else if (prefer === "leftmost") {
|
|
1818
|
+
allMatches.sort((a, b) => a.bounds.x - b.bounds.x);
|
|
1819
|
+
}
|
|
1820
|
+
const match = allMatches[0];
|
|
1821
|
+
// Convert OCR pixel coordinates to screen coordinates.
|
|
1822
|
+
// shot.width/height are in pixels; wb.width/height are in screen points.
|
|
1823
|
+
// The scale factor handles both Retina (2x) and non-Retina (1x) displays.
|
|
1824
|
+
//
|
|
1825
|
+
// L3-05 fix: Window captures now use boundsIgnoreFraming to exclude shadow,
|
|
1826
|
+
// so image dimensions match window bounds × backing scale (2x on Retina).
|
|
1827
|
+
// Simple ratio mapping: OCR pixels → screen points.
|
|
1828
|
+
const scaleX = shot.width > 0 ? wb.width / shot.width : 1;
|
|
1829
|
+
const scaleY = shot.height > 0 ? wb.height / shot.height : 1;
|
|
1830
|
+
const centerPixelX = match.bounds.x + match.bounds.width / 2;
|
|
1831
|
+
const centerPixelY = match.bounds.y + match.bounds.height / 2;
|
|
1832
|
+
let sx = Math.round(wb.x + centerPixelX * scaleX);
|
|
1833
|
+
let sy = Math.round(wb.y + centerPixelY * scaleY + (offset_y || 0));
|
|
1834
|
+
// Clamp to window bounds — OCR boxes can extend slightly beyond the window
|
|
1835
|
+
sx = Math.max(wb.x + 2, Math.min(sx, wb.x + wb.width - 2));
|
|
1836
|
+
sy = Math.max(wb.y + 2, Math.min(sy, wb.y + wb.height - 2));
|
|
448
1837
|
await bridge.call("cg.mouseMove", { x: sx, y: sy });
|
|
449
|
-
await new Promise(r => setTimeout(r,
|
|
1838
|
+
await new Promise(r => setTimeout(r, 80)); // 80ms dwell — longer than 50ms helps dense UIs register hover
|
|
450
1839
|
await bridge.call("cg.mouseClick", { x: sx, y: sy });
|
|
451
|
-
|
|
1840
|
+
let response = `Clicked "${match.text}" at screen (${Math.round(sx)}, ${Math.round(sy)}) ` +
|
|
1841
|
+
`[OCR pixel: (${Math.round(match.bounds.x)}, ${Math.round(match.bounds.y)}) ${match.bounds.width}×${match.bounds.height}] ` +
|
|
1842
|
+
`[window: (${wb.x}, ${wb.y}) ${wb.width}×${wb.height}] ` +
|
|
1843
|
+
`[scale: ${scaleX.toFixed(3)}×${scaleY.toFixed(3)}]`;
|
|
1844
|
+
if (allMatches.length > 1) {
|
|
1845
|
+
response += ` [${allMatches.length} matches, used prefer="${prefer}"]`;
|
|
1846
|
+
response += `\n⚠ ${allMatches.length} matches found. Use prefer param or offset_y to disambiguate.`;
|
|
1847
|
+
}
|
|
1848
|
+
return { content: [{ type: "text", text: response }] };
|
|
452
1849
|
});
|
|
453
|
-
server.tool("type_text", "Type text using the keyboard", {
|
|
1850
|
+
server.tool("type_text", "Type text using the keyboard. Auto-detects Electron apps and routes through CDP for reliable editor input.", {
|
|
454
1851
|
text: z.string().describe("Text to type"),
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
return { content: [{ type: "text", text: "Typed: " + text }] };
|
|
459
|
-
});
|
|
460
|
-
server.tool("key", "Press a key combination", {
|
|
461
|
-
combo: z.string().describe("Key combo: 'cmd+c', 'enter', 'cmd+shift+n', 'space'. Use + to separate."),
|
|
462
|
-
}, async ({ combo }) => {
|
|
1852
|
+
pid: z.number().optional().describe("Target process ID for PID-targeted event delivery"),
|
|
1853
|
+
cdpPort: z.number().optional().describe("CDP port for Electron apps (e.g. 9229). When set, types via CDP instead of AX — fixes Copilot/panel focus theft."),
|
|
1854
|
+
}, async ({ text, pid, cdpPort: portOverride }) => {
|
|
463
1855
|
await ensureBridge();
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
1856
|
+
// Auto-resolve frontmost PID when none provided — global HID posting
|
|
1857
|
+
// fails silently in NSTextView apps (TextEdit, etc.), but PID-targeted
|
|
1858
|
+
// delivery works reliably in all apps.
|
|
1859
|
+
let targetPid = pid;
|
|
1860
|
+
if (!targetPid) {
|
|
1861
|
+
try {
|
|
1862
|
+
const front = await bridge.call("app.frontmost", {});
|
|
1863
|
+
targetPid = front.pid;
|
|
1864
|
+
}
|
|
1865
|
+
catch {
|
|
1866
|
+
// Fallback to global posting if frontmost detection fails
|
|
1867
|
+
}
|
|
1868
|
+
}
|
|
1869
|
+
// Verify the target process exists and has windows
|
|
1870
|
+
if (targetPid) {
|
|
1871
|
+
try {
|
|
1872
|
+
const apps = await bridge.call("app.list", {});
|
|
1873
|
+
let app = apps?.find((a) => a.pid === targetPid);
|
|
1874
|
+
if (!app) {
|
|
1875
|
+
// L3-04 fix: Some Electron apps (Slack, Discord) don't appear in NSWorkspace.runningApplications
|
|
1876
|
+
// despite being frontmost. Check app.frontmost as fallback before rejecting.
|
|
1877
|
+
try {
|
|
1878
|
+
const front = await bridge.call("app.frontmost", {});
|
|
1879
|
+
if (front.pid === targetPid) {
|
|
1880
|
+
app = front;
|
|
1881
|
+
}
|
|
1882
|
+
}
|
|
1883
|
+
catch { /* ignore */ }
|
|
1884
|
+
if (!app) {
|
|
1885
|
+
return { content: [{ type: "text", text: `PID ${targetPid} is not running. Call apps() to get current PIDs.` }] };
|
|
1886
|
+
}
|
|
1887
|
+
}
|
|
1888
|
+
const wins = await bridge.call("window.list", { pid: targetPid });
|
|
1889
|
+
if (!wins || wins.length === 0) {
|
|
1890
|
+
return { content: [{ type: "text", text: `Warning: PID ${targetPid} (${app.name}) has no windows. Keystrokes may be lost. Open a document first.` }] };
|
|
1891
|
+
}
|
|
1892
|
+
}
|
|
1893
|
+
catch {
|
|
1894
|
+
// Best-effort check — proceed with typing if validation fails
|
|
1895
|
+
}
|
|
1896
|
+
}
|
|
1897
|
+
// L3-02 fix: Raise the specific window before typing to ensure keystrokes land correctly.
|
|
1898
|
+
// Without this, Electron apps with multiple instances can lose keystrokes to the wrong window,
|
|
1899
|
+
// or text can go to a non-editor area (e.g. Walkthrough tab instead of editor).
|
|
1900
|
+
if (targetPid) {
|
|
1901
|
+
try {
|
|
1902
|
+
const winId = await resolveWindowId(targetPid);
|
|
1903
|
+
if (winId != null) {
|
|
1904
|
+
await bridge.call("window.focus", { windowId: winId });
|
|
1905
|
+
}
|
|
1906
|
+
}
|
|
1907
|
+
catch { /* best-effort — proceed with typing */ }
|
|
1908
|
+
}
|
|
1909
|
+
// L3-02 fix: Electron CDP typing — routes through CDP Input.dispatchKeyEvent
|
|
1910
|
+
// when cdpPort is specified or auto-detected. Solves Copilot chat / panel focus
|
|
1911
|
+
// theft where AX keystrokes go to chat input instead of Monaco editor.
|
|
1912
|
+
let electronCdpPort = portOverride;
|
|
1913
|
+
if (!electronCdpPort && targetPid) {
|
|
1914
|
+
// Auto-detect: probe Electron-common CDP ports, but ONLY use if the CDP target
|
|
1915
|
+
// belongs to the same app we're targeting. Without this check, typing to Slack
|
|
1916
|
+
// could get routed through VS Code's CDP port 9229.
|
|
1917
|
+
try {
|
|
1918
|
+
// Look up target app name for matching
|
|
1919
|
+
let targetAppName = "";
|
|
1920
|
+
try {
|
|
1921
|
+
const apps = await bridge.call("app.list", {});
|
|
1922
|
+
const app = apps?.find((a) => a.pid === targetPid);
|
|
1923
|
+
targetAppName = (app?.name || "").toLowerCase();
|
|
1924
|
+
if (!targetAppName) {
|
|
1925
|
+
const front = await bridge.call("app.frontmost", {});
|
|
1926
|
+
if (front.pid === targetPid)
|
|
1927
|
+
targetAppName = (front.name || "").toLowerCase();
|
|
1928
|
+
}
|
|
1929
|
+
}
|
|
1930
|
+
catch { /* ignore */ }
|
|
1931
|
+
for (const p of [9229, 9333]) {
|
|
1932
|
+
try {
|
|
1933
|
+
if (!CDP)
|
|
1934
|
+
CDP = (await import("chrome-remote-interface")).default;
|
|
1935
|
+
const version = await CDP.Version({ port: p });
|
|
1936
|
+
// Verify the CDP target matches the target app — check if the browser name
|
|
1937
|
+
// or any page title contains the app name (e.g. "Code" in VS Code page titles)
|
|
1938
|
+
const browserName = (version?.Browser || "").toLowerCase();
|
|
1939
|
+
if (targetAppName && !browserName.includes(targetAppName)) {
|
|
1940
|
+
// Double-check against page titles
|
|
1941
|
+
try {
|
|
1942
|
+
const targets = await CDP.List({ port: p });
|
|
1943
|
+
const titleMatch = targets?.some((t) => (t.title || "").toLowerCase().includes(targetAppName));
|
|
1944
|
+
if (!titleMatch)
|
|
1945
|
+
continue; // CDP doesn't belong to target app — skip
|
|
1946
|
+
}
|
|
1947
|
+
catch {
|
|
1948
|
+
continue;
|
|
1949
|
+
}
|
|
1950
|
+
}
|
|
1951
|
+
electronCdpPort = p;
|
|
1952
|
+
break;
|
|
1953
|
+
}
|
|
1954
|
+
catch { /* not available on this port */ }
|
|
1955
|
+
}
|
|
1956
|
+
}
|
|
1957
|
+
catch { /* auto-detect is best-effort */ }
|
|
1958
|
+
}
|
|
1959
|
+
if (electronCdpPort) {
|
|
1960
|
+
// CDP path: click editor to ensure focus, then type via key events
|
|
1961
|
+
try {
|
|
1962
|
+
const { client } = await getCDPClient(undefined, electronCdpPort);
|
|
1963
|
+
// Click the editor area to grab focus from Copilot/panels
|
|
1964
|
+
await client.Runtime.evaluate({
|
|
1965
|
+
expression: `(() => {
|
|
1966
|
+
const editor = document.querySelector('.monaco-editor .view-lines');
|
|
1967
|
+
if (editor) { editor.click(); return true; }
|
|
1968
|
+
// Generic fallback: focus the first contenteditable or active editor context
|
|
1969
|
+
const editable = document.querySelector('[contenteditable="true"]') || document.querySelector('.native-edit-context');
|
|
1970
|
+
if (editable) { editable.focus(); return true; }
|
|
1971
|
+
return false;
|
|
1972
|
+
})()`,
|
|
1973
|
+
returnByValue: true,
|
|
1974
|
+
});
|
|
1975
|
+
await randomDelay(30, 60);
|
|
1976
|
+
// Type character by character via CDP Input.dispatchKeyEvent
|
|
1977
|
+
for (const char of text) {
|
|
1978
|
+
await client.Input.dispatchKeyEvent({ type: "keyDown", text: char, key: char, unmodifiedText: char });
|
|
1979
|
+
await client.Input.dispatchKeyEvent({ type: "keyUp", text: char, key: char, unmodifiedText: char });
|
|
1980
|
+
await randomDelay(10, 30);
|
|
1981
|
+
}
|
|
1982
|
+
await client.close();
|
|
1983
|
+
const msg = `Typed via CDP (port ${electronCdpPort}): "${text}"`;
|
|
1984
|
+
return { content: [{ type: "text", text: msg }] };
|
|
1985
|
+
}
|
|
1986
|
+
catch (cdpErr) {
|
|
1987
|
+
// CDP failed — fall through to AX typing
|
|
1988
|
+
}
|
|
1989
|
+
}
|
|
1990
|
+
// AX path: standard cg.typeText via native bridge
|
|
1991
|
+
// L2-66 fix: Auto-chunk long text to prevent bridge timeout.
|
|
1992
|
+
// cg.typeText simulates individual keystrokes, so >500 chars can be slow.
|
|
1993
|
+
const CHUNK_SIZE = 500;
|
|
1994
|
+
if (text.length > CHUNK_SIZE) {
|
|
1995
|
+
for (let i = 0; i < text.length; i += CHUNK_SIZE) {
|
|
1996
|
+
const chunk = text.slice(i, i + CHUNK_SIZE);
|
|
1997
|
+
await bridge.call("cg.typeText", { text: chunk, targetPid });
|
|
1998
|
+
}
|
|
1999
|
+
}
|
|
2000
|
+
else {
|
|
2001
|
+
await bridge.call("cg.typeText", { text, targetPid });
|
|
2002
|
+
}
|
|
2003
|
+
const msg = targetPid ? `Typed to PID ${targetPid}: "${text}"` : "Typed: " + text;
|
|
2004
|
+
return { content: [{ type: "text", text: msg }] };
|
|
2005
|
+
});
|
|
2006
|
+
server.tool("key", "Press a key combination", {
|
|
2007
|
+
combo: z.string().describe("Key combo: 'cmd+c', 'enter', 'cmd+shift+n', 'space'. Use + to separate."),
|
|
2008
|
+
holdMs: z.number().optional().describe("Hold the key for this many ms (for accent picker, long-press menus). Default: tap."),
|
|
2009
|
+
pid: z.number().optional().describe("Target process ID for PID-targeted event delivery"),
|
|
2010
|
+
}, async ({ combo, holdMs, pid }) => {
|
|
2011
|
+
await ensureBridge();
|
|
2012
|
+
// Auto-resolve frontmost PID when none provided — ensures keystrokes
|
|
2013
|
+
// reach the correct app (same pattern as type_text auto-PID).
|
|
2014
|
+
let targetPid = pid;
|
|
2015
|
+
if (!targetPid) {
|
|
2016
|
+
try {
|
|
2017
|
+
const front = await bridge.call("app.frontmost", {});
|
|
2018
|
+
targetPid = front.pid;
|
|
2019
|
+
}
|
|
2020
|
+
catch { /* fallback to global posting */ }
|
|
2021
|
+
}
|
|
2022
|
+
const keys = combo.split("+");
|
|
2023
|
+
const hasModifier = keys.some(k => ["cmd", "ctrl", "alt", "shift"].includes(k.toLowerCase()));
|
|
2024
|
+
// macOS only processes modifier shortcuts (cmd+c, cmd+n, etc.) for the frontmost app.
|
|
2025
|
+
// When pid is targeted with modifiers, raise the specific window first.
|
|
2026
|
+
// L3-01 fix: use window.focus(windowId) instead of app.focus(bundleId) to avoid
|
|
2027
|
+
// targeting the wrong instance when multiple Electron apps share the same bundleId.
|
|
2028
|
+
if (targetPid && hasModifier) {
|
|
2029
|
+
try {
|
|
2030
|
+
const winId = await resolveWindowId(targetPid);
|
|
2031
|
+
if (winId != null) {
|
|
2032
|
+
await bridge.call("window.focus", { windowId: winId });
|
|
2033
|
+
}
|
|
2034
|
+
else {
|
|
2035
|
+
// Fallback to bundleId-based focus if no window found
|
|
2036
|
+
const apps = await bridge.call("app.list", {});
|
|
2037
|
+
const target = apps.find(a => a.pid === targetPid);
|
|
2038
|
+
if (target) {
|
|
2039
|
+
await bridge.call("app.focus", { bundleId: target.bundleId });
|
|
2040
|
+
}
|
|
2041
|
+
}
|
|
2042
|
+
}
|
|
2043
|
+
catch { /* focus is best-effort */ }
|
|
2044
|
+
}
|
|
2045
|
+
// Press-and-hold mode for accent picker / long-press menus
|
|
2046
|
+
if (holdMs && !hasModifier && keys.length === 1) {
|
|
2047
|
+
await bridge.call("cg.keyPressAndHold", { key: keys[0], durationMs: holdMs, targetPid });
|
|
2048
|
+
return { content: [{ type: "text", text: `Key held: ${combo} (${holdMs}ms)` + (targetPid ? ` (PID ${targetPid})` : "") }] };
|
|
2049
|
+
}
|
|
2050
|
+
await bridge.call("cg.keyCombo", { keys, targetPid });
|
|
2051
|
+
return { content: [{ type: "text", text: `Key: ${combo}` + (targetPid ? ` (PID ${targetPid})` : "") }] };
|
|
2052
|
+
});
|
|
2053
|
+
server.tool("drag", "Drag from one point to another", {
|
|
2054
|
+
fromX: z.number(), fromY: z.number(),
|
|
2055
|
+
toX: z.number(), toY: z.number(),
|
|
2056
|
+
modifiers: z.array(z.enum(["cmd", "shift", "alt", "ctrl"])).optional().describe("Hold modifier keys during drag (e.g. ['alt'] for option+drag copy in Finder)"),
|
|
2057
|
+
pid: z.number().optional().describe("Target process ID for PID-targeted event delivery"),
|
|
2058
|
+
}, async ({ fromX, fromY, toX, toY, modifiers, pid }) => {
|
|
2059
|
+
await ensureBridge();
|
|
2060
|
+
await bridge.call("cg.mouseDrag", { fromX, fromY, toX, toY, modifiers: modifiers || [], targetPid: pid });
|
|
2061
|
+
const modStr = modifiers?.length ? ` [${modifiers.join("+")}]` : "";
|
|
2062
|
+
return { content: [{ type: "text", text: `Dragged (${fromX},${fromY}) → (${toX},${toY})${modStr}` }] };
|
|
2063
|
+
});
|
|
2064
|
+
server.tool("scroll", "Scroll at a position", {
|
|
2065
|
+
x: z.number(), y: z.number(),
|
|
477
2066
|
deltaX: z.number().optional().describe("Horizontal scroll (default 0)"),
|
|
478
2067
|
deltaY: z.number().describe("Vertical scroll (negative = down)"),
|
|
479
|
-
|
|
2068
|
+
pid: z.number().optional().describe("Target process ID for PID-targeted event delivery"),
|
|
2069
|
+
}, async ({ x, y, deltaX, deltaY, pid }) => {
|
|
480
2070
|
await ensureBridge();
|
|
481
|
-
await bridge.call("cg.scroll", { x, y, deltaX: deltaX || 0, deltaY });
|
|
2071
|
+
await bridge.call("cg.scroll", { x, y, deltaX: deltaX || 0, deltaY, targetPid: pid });
|
|
482
2072
|
return { content: [{ type: "text", text: "Scrolled" }] };
|
|
483
2073
|
});
|
|
484
2074
|
// ── CDP helper: get client for a tab ──
|
|
485
|
-
async function getCDPClient(tabId) {
|
|
486
|
-
const { CDP: cdp, port } = await ensureCDP();
|
|
2075
|
+
async function getCDPClient(tabId, overridePort) {
|
|
2076
|
+
const { CDP: cdp, port } = await ensureCDP(overridePort);
|
|
487
2077
|
let targetId = tabId;
|
|
488
2078
|
if (!targetId) {
|
|
489
2079
|
const targets = await cdp.List({ port });
|
|
@@ -493,6 +2083,11 @@ async function getCDPClient(tabId) {
|
|
|
493
2083
|
targetId = page.id;
|
|
494
2084
|
}
|
|
495
2085
|
const client = await cdp({ port, target: targetId });
|
|
2086
|
+
// Activate CDP source in perception when a browser connection is established
|
|
2087
|
+
try {
|
|
2088
|
+
perceptionManager.activateCDP(client);
|
|
2089
|
+
}
|
|
2090
|
+
catch { /* best-effort */ }
|
|
496
2091
|
return { client, targetId: targetId, CDP: cdp, port };
|
|
497
2092
|
}
|
|
498
2093
|
// ── Random delay helper ──
|
|
@@ -502,25 +2097,54 @@ function randomDelay(min, max) {
|
|
|
502
2097
|
// ═══════════════════════════════════════════════
|
|
503
2098
|
// BROWSER — control Chrome pages via CDP (10ms, not OCR)
|
|
504
2099
|
// ═══════════════════════════════════════════════
|
|
505
|
-
server.tool("browser_tabs", "List all open Chrome tabs
|
|
506
|
-
|
|
2100
|
+
server.tool("browser_tabs", "List all open Chrome/Electron tabs. Use cdpPort to connect to a specific app (e.g. 9333 for Codex Desktop).", {
|
|
2101
|
+
cdpPort: z.number().optional().describe("CDP port override (e.g. 9333 for Electron apps). Omit to auto-detect."),
|
|
2102
|
+
}, async ({ cdpPort: portOverride }) => {
|
|
2103
|
+
const { CDP: cdp, port } = await ensureCDP(portOverride);
|
|
507
2104
|
const targets = await cdp.List({ port });
|
|
508
2105
|
const pages = targets.filter((t) => t.type === "page");
|
|
509
2106
|
const lines = pages.map((t) => `[${t.id}] ${t.title} — ${t.url}`);
|
|
510
2107
|
return { content: [{ type: "text", text: lines.join("\n") || "No tabs open" }] };
|
|
511
2108
|
});
|
|
512
|
-
server.tool("browser_open", "Open a URL in Chrome (creates new tab)", {
|
|
2109
|
+
server.tool("browser_open", "Open a URL in Chrome/Electron (creates new tab)", {
|
|
513
2110
|
url: z.string().describe("URL to open"),
|
|
514
|
-
|
|
515
|
-
|
|
2111
|
+
cdpPort: z.number().optional().describe("CDP port override (e.g. 9333 for Electron apps)"),
|
|
2112
|
+
}, async ({ url, cdpPort: portOverride }) => {
|
|
2113
|
+
// L2-71 fix: Block dangerous URL protocols
|
|
2114
|
+
const BLOCKED_PROTOCOLS = ["javascript:", "data:", "blob:", "vbscript:"];
|
|
2115
|
+
const urlLower = url.trim().toLowerCase();
|
|
2116
|
+
for (const proto of BLOCKED_PROTOCOLS) {
|
|
2117
|
+
if (urlLower.startsWith(proto)) {
|
|
2118
|
+
throw new Error(`Blocked: "${proto}" URLs are not allowed in browser_open for security reasons.`);
|
|
2119
|
+
}
|
|
2120
|
+
}
|
|
2121
|
+
// Capture bundleId BEFORE CDP call to prevent focus-change race
|
|
2122
|
+
const browserBundleId = worldModel.getState().focusedApp?.bundleId ?? "com.google.Chrome";
|
|
2123
|
+
const { CDP: cdp, port } = await ensureCDP(portOverride);
|
|
516
2124
|
const target = await cdp.New({ port, url });
|
|
2125
|
+
// Feed new tab into world model
|
|
2126
|
+
try {
|
|
2127
|
+
worldModel.ingestCDPSnapshot(browserBundleId, url, target.title ?? url);
|
|
2128
|
+
}
|
|
2129
|
+
catch { /* world model update is best-effort */ }
|
|
517
2130
|
return { content: [{ type: "text", text: `Opened: ${target.id} — ${url}` }] };
|
|
518
2131
|
});
|
|
519
|
-
server.tool("browser_navigate", "Navigate the active Chrome tab to a URL", {
|
|
2132
|
+
server.tool("browser_navigate", "Navigate the active Chrome/Electron tab to a URL", {
|
|
520
2133
|
url: z.string().describe("URL to navigate to"),
|
|
521
2134
|
tabId: z.string().optional().describe("Tab ID (from browser_tabs). Omit for most recent tab."),
|
|
522
|
-
|
|
523
|
-
|
|
2135
|
+
cdpPort: z.number().optional().describe("CDP port override (e.g. 9333 for Electron apps)"),
|
|
2136
|
+
}, async ({ url, tabId, cdpPort: portOverride }) => {
|
|
2137
|
+
// L2-71 fix: Block dangerous URL protocols that could execute arbitrary code
|
|
2138
|
+
const BLOCKED_PROTOCOLS = ["javascript:", "data:", "blob:", "vbscript:"];
|
|
2139
|
+
const urlLower = url.trim().toLowerCase();
|
|
2140
|
+
for (const proto of BLOCKED_PROTOCOLS) {
|
|
2141
|
+
if (urlLower.startsWith(proto)) {
|
|
2142
|
+
throw new Error(`Blocked: "${proto}" URLs are not allowed in browser_navigate for security reasons. Use browser_js for JavaScript execution.`);
|
|
2143
|
+
}
|
|
2144
|
+
}
|
|
2145
|
+
// Capture bundleId BEFORE CDP call to prevent focus-change race
|
|
2146
|
+
const browserBundleId = worldModel.getState().focusedApp?.bundleId ?? "com.google.Chrome";
|
|
2147
|
+
const { CDP: cdp, port } = await ensureCDP(portOverride);
|
|
524
2148
|
let targetId = tabId;
|
|
525
2149
|
if (!targetId) {
|
|
526
2150
|
const targets = await cdp.List({ port });
|
|
@@ -540,16 +2164,23 @@ server.tool("browser_navigate", "Navigate the active Chrome tab to a URL", {
|
|
|
540
2164
|
break;
|
|
541
2165
|
await new Promise(r => setTimeout(r, 200));
|
|
542
2166
|
}
|
|
543
|
-
const
|
|
2167
|
+
const titleResult = await client.Runtime.evaluate({ expression: "document.title", returnByValue: true });
|
|
2168
|
+
const pageTitle = titleResult.result.value ?? "";
|
|
544
2169
|
await client.close();
|
|
545
|
-
|
|
2170
|
+
// Feed navigation result into world model
|
|
2171
|
+
try {
|
|
2172
|
+
worldModel.ingestCDPSnapshot(browserBundleId, url, pageTitle);
|
|
2173
|
+
}
|
|
2174
|
+
catch { /* world model update is best-effort */ }
|
|
2175
|
+
return { content: [{ type: "text", text: `Navigated to: ${pageTitle}` }] };
|
|
546
2176
|
});
|
|
547
|
-
server.tool("browser_js", "Execute JavaScript in a Chrome tab. Returns the result. WARNING: This runs arbitrary JS in the browser context — avoid on sensitive pages (banking, email). All executions are audit-logged.", {
|
|
2177
|
+
server.tool("browser_js", "Execute JavaScript in a Chrome/Electron tab. Returns the result. WARNING: This runs arbitrary JS in the browser context — avoid on sensitive pages (banking, email). All executions are audit-logged.", {
|
|
548
2178
|
code: z.string().describe("JavaScript to execute. Must be an expression that returns a value. Use (() => { ... })() for multi-line."),
|
|
549
2179
|
tabId: z.string().optional().describe("Tab ID. Omit for most recent tab."),
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
2180
|
+
cdpPort: z.number().optional().describe("CDP port override (e.g. 9333 for Electron apps)"),
|
|
2181
|
+
}, async ({ code, tabId, cdpPort: portOverride }) => {
|
|
2182
|
+
auditLog("browser_js", { code, tabId });
|
|
2183
|
+
const { CDP: cdp, port } = await ensureCDP(portOverride);
|
|
553
2184
|
let targetId = tabId;
|
|
554
2185
|
if (!targetId) {
|
|
555
2186
|
const targets = await cdp.List({ port });
|
|
@@ -570,15 +2201,21 @@ server.tool("browser_js", "Execute JavaScript in a Chrome tab. Returns the resul
|
|
|
570
2201
|
return { content: [{ type: "text", text: `JS Error: ${result.exceptionDetails.text}\n${result.exceptionDetails.exception?.description || ""}` }] };
|
|
571
2202
|
}
|
|
572
2203
|
const val = result.result.value;
|
|
573
|
-
|
|
2204
|
+
let text = typeof val === "object" ? JSON.stringify(val, null, 2) : String(val ?? "undefined");
|
|
2205
|
+
// Redact sensitive URLs and tokens in JS output
|
|
2206
|
+
text = text.replace(/https?:\/\/[^\s"'`]+/g, (url) => sanitizeUrl(url));
|
|
2207
|
+
text = redactSensitiveLabel(text);
|
|
574
2208
|
return { content: [{ type: "text", text }] };
|
|
575
2209
|
});
|
|
576
|
-
server.tool("browser_dom", "Query the DOM of a Chrome page. Returns matching elements' text, attributes, and structure.", {
|
|
2210
|
+
server.tool("browser_dom", "Query the DOM of a Chrome/Electron page. Returns matching elements' text, attributes, and structure.", {
|
|
577
2211
|
selector: z.string().describe("CSS selector, e.g. 'button', '.nav a', '#main h2'"),
|
|
578
2212
|
tabId: z.string().optional().describe("Tab ID. Omit for most recent tab."),
|
|
579
2213
|
limit: z.number().optional().describe("Max results (default 20)"),
|
|
580
|
-
|
|
581
|
-
|
|
2214
|
+
cdpPort: z.number().optional().describe("CDP port override (e.g. 9333 for Electron apps)"),
|
|
2215
|
+
}, async ({ selector, tabId, limit, cdpPort: portOverride }) => {
|
|
2216
|
+
// Capture bundleId before any async CDP calls to avoid race condition
|
|
2217
|
+
const browserBundleId = worldModel.getState().focusedApp?.bundleId ?? "com.google.Chrome";
|
|
2218
|
+
const { CDP: cdp, port } = await ensureCDP(portOverride);
|
|
582
2219
|
let targetId = tabId;
|
|
583
2220
|
if (!targetId) {
|
|
584
2221
|
const targets = await cdp.List({ port });
|
|
@@ -607,14 +2244,27 @@ server.tool("browser_dom", "Query the DOM of a Chrome page. Returns matching ele
|
|
|
607
2244
|
})()`,
|
|
608
2245
|
returnByValue: true,
|
|
609
2246
|
});
|
|
2247
|
+
// Feed page info into world model while client is still open
|
|
2248
|
+
try {
|
|
2249
|
+
const pageInfo = await client.Runtime.evaluate({
|
|
2250
|
+
expression: `({ url: location.href, title: document.title })`,
|
|
2251
|
+
returnByValue: true,
|
|
2252
|
+
});
|
|
2253
|
+
const info = pageInfo.result.value;
|
|
2254
|
+
if (info?.url) {
|
|
2255
|
+
worldModel.ingestCDPSnapshot(browserBundleId, info.url, info.title ?? "");
|
|
2256
|
+
}
|
|
2257
|
+
}
|
|
2258
|
+
catch { /* world model update is best-effort */ }
|
|
610
2259
|
await client.close();
|
|
611
2260
|
return { content: [{ type: "text", text: JSON.stringify(result.result.value, null, 2) }] };
|
|
612
2261
|
});
|
|
613
|
-
server.tool("browser_click", "Click an element in Chrome by CSS selector. Uses CDP Input.dispatchMouseEvent for realistic mouse events.", {
|
|
2262
|
+
server.tool("browser_click", "Click an element in Chrome/Electron by CSS selector. Uses CDP Input.dispatchMouseEvent for realistic mouse events.", {
|
|
614
2263
|
selector: z.string().describe("CSS selector of element to click"),
|
|
615
2264
|
tabId: z.string().optional().describe("Tab ID. Omit for most recent tab."),
|
|
616
|
-
|
|
617
|
-
|
|
2265
|
+
cdpPort: z.number().optional().describe("CDP port override (e.g. 9333 for Electron apps)"),
|
|
2266
|
+
}, async ({ selector, tabId, cdpPort: portOverride }) => {
|
|
2267
|
+
const { client } = await getCDPClient(tabId, portOverride);
|
|
618
2268
|
await client.Runtime.enable();
|
|
619
2269
|
const result = await client.Runtime.evaluate({
|
|
620
2270
|
expression: `(() => {
|
|
@@ -640,13 +2290,14 @@ server.tool("browser_click", "Click an element in Chrome by CSS selector. Uses C
|
|
|
640
2290
|
await client.close();
|
|
641
2291
|
return { content: [{ type: "text", text: `Clicked: "${val.text}" at (${Math.round(x)}, ${Math.round(y)})` }] };
|
|
642
2292
|
});
|
|
643
|
-
server.tool("browser_type", "Type into an input field in Chrome. Uses CDP Input.dispatchKeyEvent for real keyboard events (works with React/Angular).", {
|
|
2293
|
+
server.tool("browser_type", "Type into an input field in Chrome/Electron. Uses CDP Input.dispatchKeyEvent for real keyboard events (works with React/Angular).", {
|
|
644
2294
|
selector: z.string().describe("CSS selector of the input"),
|
|
645
2295
|
text: z.string().describe("Text to type"),
|
|
646
2296
|
clear: z.boolean().optional().describe("Clear field first (default true)"),
|
|
647
2297
|
tabId: z.string().optional().describe("Tab ID"),
|
|
648
|
-
|
|
649
|
-
|
|
2298
|
+
cdpPort: z.number().optional().describe("CDP port override (e.g. 9333 for Electron apps)"),
|
|
2299
|
+
}, async ({ selector, text, clear, tabId, cdpPort: portOverride }) => {
|
|
2300
|
+
const { client } = await getCDPClient(tabId, portOverride);
|
|
650
2301
|
await client.Runtime.enable();
|
|
651
2302
|
// Focus the element
|
|
652
2303
|
const focusResult = await client.Runtime.evaluate({
|
|
@@ -681,12 +2332,13 @@ server.tool("browser_type", "Type into an input field in Chrome. Uses CDP Input.
|
|
|
681
2332
|
await client.close();
|
|
682
2333
|
return { content: [{ type: "text", text: `Typed "${text}"` }] };
|
|
683
2334
|
});
|
|
684
|
-
server.tool("browser_wait", "Wait for a condition on a Chrome page", {
|
|
2335
|
+
server.tool("browser_wait", "Wait for a condition on a Chrome/Electron page", {
|
|
685
2336
|
condition: z.string().describe("JS expression that returns truthy when ready. e.g. 'document.querySelector(\".loaded\")'"),
|
|
686
2337
|
timeoutMs: z.number().optional().describe("Timeout in ms (default 10000)"),
|
|
687
2338
|
tabId: z.string().optional().describe("Tab ID"),
|
|
688
|
-
|
|
689
|
-
|
|
2339
|
+
cdpPort: z.number().optional().describe("CDP port override (e.g. 9333 for Electron apps)"),
|
|
2340
|
+
}, async ({ condition, timeoutMs, tabId, cdpPort: portOverride }) => {
|
|
2341
|
+
const { CDP: cdp, port } = await ensureCDP(portOverride);
|
|
690
2342
|
let targetId = tabId;
|
|
691
2343
|
if (!targetId) {
|
|
692
2344
|
const targets = await cdp.List({ port });
|
|
@@ -712,8 +2364,11 @@ server.tool("browser_wait", "Wait for a condition on a Chrome page", {
|
|
|
712
2364
|
});
|
|
713
2365
|
server.tool("browser_page_info", "Get current page title, URL, and text content summary", {
|
|
714
2366
|
tabId: z.string().optional().describe("Tab ID"),
|
|
715
|
-
|
|
716
|
-
|
|
2367
|
+
cdpPort: z.number().optional().describe("CDP port override (e.g. 9333 for Electron apps)"),
|
|
2368
|
+
}, async ({ tabId, cdpPort: portOverride }) => {
|
|
2369
|
+
// Capture bundleId BEFORE CDP call to prevent focus-change race
|
|
2370
|
+
const browserBundleId = worldModel.getState().focusedApp?.bundleId ?? "com.google.Chrome";
|
|
2371
|
+
const { CDP: cdp, port } = await ensureCDP(portOverride);
|
|
717
2372
|
let targetId = tabId;
|
|
718
2373
|
if (!targetId) {
|
|
719
2374
|
const targets = await cdp.List({ port });
|
|
@@ -733,6 +2388,14 @@ server.tool("browser_page_info", "Get current page title, URL, and text content
|
|
|
733
2388
|
returnByValue: true,
|
|
734
2389
|
});
|
|
735
2390
|
await client.close();
|
|
2391
|
+
// Feed page info into world model
|
|
2392
|
+
try {
|
|
2393
|
+
const info = result.result.value;
|
|
2394
|
+
if (info?.url) {
|
|
2395
|
+
worldModel.ingestCDPSnapshot(browserBundleId, info.url, info.title ?? "");
|
|
2396
|
+
}
|
|
2397
|
+
}
|
|
2398
|
+
catch { /* world model update is best-effort */ }
|
|
736
2399
|
return { content: [{ type: "text", text: JSON.stringify(result.result.value, null, 2) }] };
|
|
737
2400
|
});
|
|
738
2401
|
// ═══════════════════════════════════════════════
|
|
@@ -774,10 +2437,11 @@ if (origQuery) {
|
|
|
774
2437
|
};
|
|
775
2438
|
}
|
|
776
2439
|
`;
|
|
777
|
-
server.tool("browser_stealth", "Inject anti-detection patches into Chrome page. Call once after navigating to a protected site. Hides webdriver flag, patches plugins/languages/permissions.", {
|
|
2440
|
+
server.tool("browser_stealth", "Inject anti-detection patches into Chrome/Electron page. Call once after navigating to a protected site. Hides webdriver flag, patches plugins/languages/permissions.", {
|
|
778
2441
|
tabId: z.string().optional().describe("Tab ID. Omit for most recent tab."),
|
|
779
|
-
|
|
780
|
-
|
|
2442
|
+
cdpPort: z.number().optional().describe("CDP port override (e.g. 9333 for Electron apps)"),
|
|
2443
|
+
}, async ({ tabId, cdpPort: portOverride }) => {
|
|
2444
|
+
const { client } = await getCDPClient(tabId, portOverride);
|
|
781
2445
|
await client.Page.enable();
|
|
782
2446
|
await client.Page.addScriptToEvaluateOnNewDocument({ source: STEALTH_SCRIPT });
|
|
783
2447
|
// Also evaluate immediately on current page
|
|
@@ -795,8 +2459,9 @@ server.tool("browser_fill_form", "Fill a form field with human-like typing (anti
|
|
|
795
2459
|
clear: z.boolean().optional().describe("Clear field first (default true)"),
|
|
796
2460
|
delayMs: z.number().optional().describe("Avg delay between keystrokes in ms (default 50)"),
|
|
797
2461
|
tabId: z.string().optional().describe("Tab ID"),
|
|
798
|
-
|
|
799
|
-
|
|
2462
|
+
cdpPort: z.number().optional().describe("CDP port override (e.g. 9333 for Electron apps)"),
|
|
2463
|
+
}, async ({ selector, text, clear, delayMs, tabId, cdpPort: portOverride }) => {
|
|
2464
|
+
const { client } = await getCDPClient(tabId, portOverride);
|
|
800
2465
|
await client.Runtime.enable();
|
|
801
2466
|
// Focus the element
|
|
802
2467
|
const focusResult = await client.Runtime.evaluate({
|
|
@@ -834,14 +2499,15 @@ server.tool("browser_fill_form", "Fill a form field with human-like typing (anti
|
|
|
834
2499
|
await client.close();
|
|
835
2500
|
return { content: [{ type: "text", text: `Typed "${text}" (${text.length} chars, human-like)` }] };
|
|
836
2501
|
});
|
|
837
|
-
|
|
2502
|
+
// browser_human_click — alias for browser_click (both already use realistic mouse events)
|
|
2503
|
+
server.tool("browser_human_click", "Alias for browser_click — both use realistic mouseMoved → mousePressed → mouseReleased events. Prefer browser_click directly.", {
|
|
838
2504
|
selector: z.string().describe("CSS selector of element to click"),
|
|
839
2505
|
tabId: z.string().optional().describe("Tab ID. Omit for most recent tab."),
|
|
840
|
-
|
|
841
|
-
|
|
2506
|
+
cdpPort: z.number().optional().describe("CDP port override (e.g. 9333 for Electron apps)"),
|
|
2507
|
+
}, async ({ selector, tabId, cdpPort: portOverride }) => {
|
|
2508
|
+
const { client } = await getCDPClient(tabId, portOverride);
|
|
842
2509
|
await client.Runtime.enable();
|
|
843
|
-
|
|
844
|
-
const rectResult = await client.Runtime.evaluate({
|
|
2510
|
+
const result = await client.Runtime.evaluate({
|
|
845
2511
|
expression: `(() => {
|
|
846
2512
|
const el = document.querySelector(${JSON.stringify(selector)});
|
|
847
2513
|
if (!el) return { ok: false, reason: "Element not found: ${selector.replace(/"/g, '\\"')}" };
|
|
@@ -851,13 +2517,12 @@ server.tool("browser_human_click", "Click an element with realistic mouse events
|
|
|
851
2517
|
})()`,
|
|
852
2518
|
returnByValue: true,
|
|
853
2519
|
});
|
|
854
|
-
const val =
|
|
2520
|
+
const val = result.result.value;
|
|
855
2521
|
if (!val?.ok) {
|
|
856
2522
|
await client.close();
|
|
857
2523
|
return { content: [{ type: "text", text: val?.reason || "Element not found" }] };
|
|
858
2524
|
}
|
|
859
2525
|
const { x, y } = val;
|
|
860
|
-
// Simulate realistic mouse event sequence
|
|
861
2526
|
await client.Input.dispatchMouseEvent({ type: "mouseMoved", x, y });
|
|
862
2527
|
await randomDelay(30, 60);
|
|
863
2528
|
await client.Input.dispatchMouseEvent({ type: "mousePressed", x, y, button: "left", clickCount: 1 });
|
|
@@ -869,19 +2534,30 @@ server.tool("browser_human_click", "Click an element with realistic mouse events
|
|
|
869
2534
|
// ═══════════════════════════════════════════════
|
|
870
2535
|
// PLATFORM PLAYBOOKS — lazy-loaded site knowledge
|
|
871
2536
|
// ═══════════════════════════════════════════════
|
|
872
|
-
const
|
|
873
|
-
server.tool("platform_guide", "Get automation guide for a platform (selectors, URLs, flows, error solutions).
|
|
874
|
-
platform: z.string().describe("Platform name, e.g. 'devpost'"),
|
|
2537
|
+
const coverageAuditor = new CoverageAuditor(referencesDir, playbooksDir, learningEngine, goalStore);
|
|
2538
|
+
server.tool("platform_guide", "Get automation guide for a platform (selectors, URLs, flows, error solutions). Reads from references/ (curated knowledge). Zero cost — only loads when called.", {
|
|
2539
|
+
platform: z.string().describe("Platform name, e.g. 'figma', 'x-twitter', 'devpost'"),
|
|
875
2540
|
section: z.enum(["all", "urls", "flows", "selectors", "errors", "detection"]).optional().describe("Section to return (default: all). Use 'errors' for just error+solution pairs."),
|
|
876
2541
|
}, async ({ platform, section }) => {
|
|
877
|
-
const
|
|
2542
|
+
const safePlatName = platform.toLowerCase().replace(/[^a-z0-9_\-]/g, "_").slice(0, 100);
|
|
2543
|
+
const filePath = path.resolve(referencesDir, `${safePlatName}.json`);
|
|
2544
|
+
if (!filePath.startsWith(path.resolve(referencesDir))) {
|
|
2545
|
+
return { content: [{ type: "text", text: `Error: invalid platform name "${platform}"` }] };
|
|
2546
|
+
}
|
|
878
2547
|
if (!fs.existsSync(filePath)) {
|
|
879
|
-
const available = fs.existsSync(
|
|
880
|
-
? fs.readdirSync(
|
|
2548
|
+
const available = fs.existsSync(referencesDir)
|
|
2549
|
+
? fs.readdirSync(referencesDir).filter(f => f.endsWith(".json")).map(f => f.replace(".json", ""))
|
|
881
2550
|
: [];
|
|
882
2551
|
return { content: [{ type: "text", text: `No playbook for "${platform}". Available: ${available.join(", ") || "none"}` }] };
|
|
883
2552
|
}
|
|
884
|
-
|
|
2553
|
+
// L2-73 fix: Gracefully handle malformed reference JSON
|
|
2554
|
+
let data;
|
|
2555
|
+
try {
|
|
2556
|
+
data = JSON.parse(fs.readFileSync(filePath, "utf-8"));
|
|
2557
|
+
}
|
|
2558
|
+
catch (parseErr) {
|
|
2559
|
+
return { content: [{ type: "text", text: `Warning: reference file for "${platform}" is malformed and was skipped. Error: ${parseErr instanceof Error ? parseErr.message : String(parseErr)}` }] };
|
|
2560
|
+
}
|
|
885
2561
|
const s = section || "all";
|
|
886
2562
|
if (s === "errors") {
|
|
887
2563
|
const errors = data.errors || [];
|
|
@@ -915,6 +2591,152 @@ server.tool("platform_guide", "Get automation guide for a platform (selectors, U
|
|
|
915
2591
|
// "all" — return full playbook
|
|
916
2592
|
return { content: [{ type: "text", text: JSON.stringify(data, null, 2) }] };
|
|
917
2593
|
});
|
|
2594
|
+
server.tool("playbook_preflight", "Quick feasibility check before automating a platform. Scans the page for known blockers (captchas, WebGL, iframes), checks against playbook errors, tests selector availability. Returns go/yellow/red.", {
|
|
2595
|
+
url: z.string().describe("URL to check, e.g. 'https://x.com'"),
|
|
2596
|
+
task: z.string().optional().describe("What you want to automate, e.g. 'post a tweet'"),
|
|
2597
|
+
tabId: z.string().optional().describe("Tab ID if page is already open"),
|
|
2598
|
+
}, async ({ url, task, tabId }) => {
|
|
2599
|
+
const issues = [];
|
|
2600
|
+
const warnings = [];
|
|
2601
|
+
const good = [];
|
|
2602
|
+
// 1. Extract domain and find matching playbook
|
|
2603
|
+
let domain;
|
|
2604
|
+
try {
|
|
2605
|
+
domain = new URL(url).hostname.replace(/^www\./, "");
|
|
2606
|
+
}
|
|
2607
|
+
catch {
|
|
2608
|
+
return { content: [{ type: "text", text: `❌ Invalid URL: ${url}` }] };
|
|
2609
|
+
}
|
|
2610
|
+
// Check references/ for curated knowledge
|
|
2611
|
+
const reference = _playbookStoreForContext.matchByDomain(domain);
|
|
2612
|
+
if (reference) {
|
|
2613
|
+
good.push(`Found reference: "${reference.id}" (${reference.successCount} successes, ${reference.failCount} failures)`);
|
|
2614
|
+
// Check known errors
|
|
2615
|
+
if (reference.errors && reference.errors.length > 0) {
|
|
2616
|
+
for (const err of reference.errors) {
|
|
2617
|
+
if (err.severity === "high") {
|
|
2618
|
+
issues.push(`🔴 ${err.error} → ${err.solution}`);
|
|
2619
|
+
}
|
|
2620
|
+
else {
|
|
2621
|
+
warnings.push(`🟡 ${err.error} → ${err.solution}`);
|
|
2622
|
+
}
|
|
2623
|
+
}
|
|
2624
|
+
}
|
|
2625
|
+
// Check selector availability
|
|
2626
|
+
if (reference.selectors) {
|
|
2627
|
+
const selectorCount = Object.values(reference.selectors).reduce((sum, group) => sum + Object.keys(group).length, 0);
|
|
2628
|
+
good.push(`${selectorCount} selectors documented in reference`);
|
|
2629
|
+
}
|
|
2630
|
+
if (reference.flows && Object.keys(reference.flows).length > 0) {
|
|
2631
|
+
good.push(`${Object.keys(reference.flows).length} flows documented`);
|
|
2632
|
+
}
|
|
2633
|
+
}
|
|
2634
|
+
else {
|
|
2635
|
+
warnings.push(`🟡 No playbook exists for ${domain} — first-time automation, expect trial and error`);
|
|
2636
|
+
}
|
|
2637
|
+
// Check playbooks/ for executable steps
|
|
2638
|
+
const execPlaybookPath = path.resolve(playbooksDir, `${reference?.id ?? domain.split(".")[0]}.json`);
|
|
2639
|
+
if (fs.existsSync(execPlaybookPath)) {
|
|
2640
|
+
try {
|
|
2641
|
+
const execPb = JSON.parse(fs.readFileSync(execPlaybookPath, "utf-8"));
|
|
2642
|
+
if (Array.isArray(execPb.steps) && execPb.steps.length > 0) {
|
|
2643
|
+
good.push(`Executable playbook found: ${execPb.steps.length} steps — use job_create(playbookId="${execPb.id}") for auto-run`);
|
|
2644
|
+
}
|
|
2645
|
+
}
|
|
2646
|
+
catch { /* skip */ }
|
|
2647
|
+
}
|
|
2648
|
+
else if (reference) {
|
|
2649
|
+
warnings.push(`🟡 Reference exists but no executable playbook — manual execution needed`);
|
|
2650
|
+
}
|
|
2651
|
+
// 2. Scan the page if we have CDP access
|
|
2652
|
+
try {
|
|
2653
|
+
const { CDP: cdp, port } = await ensureCDP();
|
|
2654
|
+
let targetId = tabId;
|
|
2655
|
+
if (!targetId) {
|
|
2656
|
+
const targets = await cdp.List({ port });
|
|
2657
|
+
const page = targets.find((t) => t.type === "page" && t.url?.includes(domain));
|
|
2658
|
+
targetId = page?.id;
|
|
2659
|
+
}
|
|
2660
|
+
if (targetId) {
|
|
2661
|
+
const client = await cdp({ port, target: targetId });
|
|
2662
|
+
// Check for common blockers
|
|
2663
|
+
const checks = await client.Runtime.evaluate({
|
|
2664
|
+
expression: `(() => {
|
|
2665
|
+
const results = {};
|
|
2666
|
+
// Captcha detection
|
|
2667
|
+
results.hasCaptcha = !!(
|
|
2668
|
+
document.querySelector('[class*="captcha"]') ||
|
|
2669
|
+
document.querySelector('[class*="recaptcha"]') ||
|
|
2670
|
+
document.querySelector('[data-sitekey]') ||
|
|
2671
|
+
document.querySelector('iframe[src*="captcha"]') ||
|
|
2672
|
+
document.querySelector('iframe[src*="recaptcha"]')
|
|
2673
|
+
);
|
|
2674
|
+
// WebGL canvas (can't click via DOM)
|
|
2675
|
+
results.hasWebGL = !!(document.querySelector('canvas[data-engine]') || document.querySelector('canvas.webgl'));
|
|
2676
|
+
// Shadow DOM
|
|
2677
|
+
const allEls = document.querySelectorAll('*');
|
|
2678
|
+
let shadowCount = 0;
|
|
2679
|
+
for (const el of allEls) { if (el.shadowRoot) shadowCount++; }
|
|
2680
|
+
results.shadowDomCount = shadowCount;
|
|
2681
|
+
// Iframes
|
|
2682
|
+
results.iframeCount = document.querySelectorAll('iframe').length;
|
|
2683
|
+
// React/SPA detection
|
|
2684
|
+
results.isReact = !!(window.__REACT_DEVTOOLS_GLOBAL_HOOK__ || document.querySelector('[data-reactroot]'));
|
|
2685
|
+
results.isNextJs = !!document.querySelector('#__next');
|
|
2686
|
+
results.pageTitle = document.title;
|
|
2687
|
+
results.url = location.href;
|
|
2688
|
+
return results;
|
|
2689
|
+
})()`,
|
|
2690
|
+
returnByValue: true,
|
|
2691
|
+
});
|
|
2692
|
+
await client.close();
|
|
2693
|
+
const r = checks.result.value;
|
|
2694
|
+
if (r) {
|
|
2695
|
+
good.push(`Page loaded: "${r.pageTitle}"`);
|
|
2696
|
+
if (r.hasCaptcha)
|
|
2697
|
+
issues.push(`🔴 CAPTCHA detected — cannot be automated, needs manual solve`);
|
|
2698
|
+
if (r.hasWebGL)
|
|
2699
|
+
warnings.push(`🟡 WebGL canvas detected — DOM clicks won't work, use Input.dispatchMouseEvent or coordinates`);
|
|
2700
|
+
if (r.shadowDomCount > 0)
|
|
2701
|
+
warnings.push(`🟡 ${r.shadowDomCount} Shadow DOM elements — standard selectors may not reach them`);
|
|
2702
|
+
if (r.iframeCount > 0)
|
|
2703
|
+
warnings.push(`🟡 ${r.iframeCount} iframes — may need to switch context`);
|
|
2704
|
+
if (r.isReact)
|
|
2705
|
+
warnings.push(`🟡 React app — el.value assignment may not work, use browser_fill_form instead`);
|
|
2706
|
+
}
|
|
2707
|
+
}
|
|
2708
|
+
else {
|
|
2709
|
+
warnings.push(`🟡 Page not open in Chrome — open ${url} first for deeper scan`);
|
|
2710
|
+
}
|
|
2711
|
+
}
|
|
2712
|
+
catch {
|
|
2713
|
+
warnings.push(`🟡 Chrome CDP not available — can't scan page. Launch Chrome with --remote-debugging-port=9222`);
|
|
2714
|
+
}
|
|
2715
|
+
// 3. Check memory for past errors on this domain
|
|
2716
|
+
const memErrors = memory.readErrors();
|
|
2717
|
+
const domainErrors = memErrors.filter(e => {
|
|
2718
|
+
const paramStr = JSON.stringify(e.params ?? {});
|
|
2719
|
+
return paramStr.includes(domain);
|
|
2720
|
+
});
|
|
2721
|
+
if (domainErrors.length > 0) {
|
|
2722
|
+
warnings.push(`🟡 ${domainErrors.length} past error(s) recorded for ${domain} in memory`);
|
|
2723
|
+
}
|
|
2724
|
+
// 4. Build verdict
|
|
2725
|
+
const rating = issues.length > 0 ? "🔴 RED" : warnings.length > 2 ? "🟡 YELLOW" : "🟢 GREEN";
|
|
2726
|
+
const lines = [
|
|
2727
|
+
`# Preflight: ${domain}`,
|
|
2728
|
+
`Rating: ${rating}`,
|
|
2729
|
+
"",
|
|
2730
|
+
...good.map(g => `✅ ${g}`),
|
|
2731
|
+
...(issues.length > 0 ? ["", "## Blockers", ...issues] : []),
|
|
2732
|
+
...(warnings.length > 0 ? ["", "## Warnings", ...warnings] : []),
|
|
2733
|
+
"",
|
|
2734
|
+
issues.length > 0
|
|
2735
|
+
? "⛔ Some tasks may not be fully automatable. Review blockers above."
|
|
2736
|
+
: "✅ Looks feasible. Proceed with automation.",
|
|
2737
|
+
];
|
|
2738
|
+
return { content: [{ type: "text", text: lines.join("\n") }] };
|
|
2739
|
+
});
|
|
918
2740
|
server.tool("export_playbook", "Generate a playbook JSON from your session. Extracts URLs, selectors, errors+solutions from memory. Share the output with ScreenHand to help others automate this platform.", {
|
|
919
2741
|
platform: z.string().describe("Platform name, e.g. 'linkedin', 'twitter'"),
|
|
920
2742
|
domain: z.string().describe("Domain to filter actions by, e.g. 'linkedin.com'"),
|
|
@@ -968,7 +2790,7 @@ server.tool("export_playbook", "Generate a playbook JSON from your session. Extr
|
|
|
968
2790
|
s.tags.some(t => t.toLowerCase().includes(platform.toLowerCase())));
|
|
969
2791
|
// 2. Scan current page for selectors if tab is available
|
|
970
2792
|
let pageSelectors = {};
|
|
971
|
-
if (tabId
|
|
2793
|
+
if (tabId) {
|
|
972
2794
|
try {
|
|
973
2795
|
const { client } = await getCDPClient(tabId);
|
|
974
2796
|
await client.Runtime.enable();
|
|
@@ -1008,13 +2830,24 @@ server.tool("export_playbook", "Generate a playbook JSON from your session. Extr
|
|
|
1008
2830
|
description: description || `Automation playbook for ${platform}`,
|
|
1009
2831
|
urls: Object.fromEntries(Array.from(urlSet).sort().map((u, i) => {
|
|
1010
2832
|
const urlObj = new URL(u);
|
|
2833
|
+
// L2-69 fix: Redact sensitive query params before exporting
|
|
2834
|
+
const sensitiveParams = new Set(["code", "token", "access_token", "refresh_token", "id_token",
|
|
2835
|
+
"secret", "password", "key", "api_key", "apikey", "auth",
|
|
2836
|
+
"session", "session_id", "sessionid", "state", "nonce"]);
|
|
2837
|
+
for (const paramName of urlObj.searchParams.keys()) {
|
|
2838
|
+
if (sensitiveParams.has(paramName.toLowerCase())) {
|
|
2839
|
+
urlObj.searchParams.set(paramName, "[REDACTED]");
|
|
2840
|
+
}
|
|
2841
|
+
}
|
|
2842
|
+
const safeUrl = urlObj.toString();
|
|
1011
2843
|
const pathKey = urlObj.pathname.replace(/^\//, "").replace(/\//g, "_").replace(/[^a-zA-Z0-9_]/g, "") || "home";
|
|
1012
|
-
return [pathKey,
|
|
2844
|
+
return [pathKey, safeUrl];
|
|
1013
2845
|
})),
|
|
1014
2846
|
flows: {
|
|
1015
2847
|
discovered: {
|
|
2848
|
+
// S75 Option C: Redact PII from exported strategy steps
|
|
1016
2849
|
steps: domainStrategies.length > 0
|
|
1017
|
-
? domainStrategies[0].steps.map((s) => `${s.tool}(${JSON.stringify(s.params)})`)
|
|
2850
|
+
? domainStrategies[0].steps.map((s) => redactPII(`${s.tool}(${JSON.stringify(s.params)})`))
|
|
1018
2851
|
: ["No strategies recorded yet. Use the platform, then call export_playbook again."],
|
|
1019
2852
|
selectors: pageSelectors,
|
|
1020
2853
|
},
|
|
@@ -1037,16 +2870,30 @@ server.tool("export_playbook", "Generate a playbook JSON from your session. Extr
|
|
|
1037
2870
|
strategies_count: domainStrategies.length,
|
|
1038
2871
|
},
|
|
1039
2872
|
};
|
|
1040
|
-
// 4. Save to
|
|
1041
|
-
const
|
|
2873
|
+
// 4. Save to references dir (curated knowledge, not executable steps)
|
|
2874
|
+
const safePlatformName = platform.toLowerCase().replace(/[^a-z0-9_\-]/g, "_").slice(0, 100);
|
|
2875
|
+
const outPath = path.resolve(referencesDir, `${safePlatformName}.json`);
|
|
2876
|
+
// Guard: refuse to write outside references dir
|
|
2877
|
+
if (!outPath.startsWith(path.resolve(referencesDir))) {
|
|
2878
|
+
return { content: [{ type: "text", text: `Error: invalid platform name "${platform}" — path traversal detected` }] };
|
|
2879
|
+
}
|
|
1042
2880
|
const exists = fs.existsSync(outPath);
|
|
1043
|
-
if (!fs.existsSync(
|
|
1044
|
-
fs.mkdirSync(
|
|
2881
|
+
if (!fs.existsSync(referencesDir))
|
|
2882
|
+
fs.mkdirSync(referencesDir, { recursive: true });
|
|
1045
2883
|
fs.writeFileSync(outPath, JSON.stringify(playbook, null, 2));
|
|
2884
|
+
// Track playbook export for teaching ability rating factor
|
|
2885
|
+
const expBundleId = worldModel.getState().focusedApp?.bundleId;
|
|
2886
|
+
if (expBundleId) {
|
|
2887
|
+
const expMapData = appMap.getLoaded(expBundleId);
|
|
2888
|
+
if (expMapData) {
|
|
2889
|
+
expMapData.playbooksExported = (expMapData.playbooksExported ?? 0) + 1;
|
|
2890
|
+
appMap.save(expMapData, true);
|
|
2891
|
+
}
|
|
2892
|
+
}
|
|
1046
2893
|
return {
|
|
1047
2894
|
content: [{
|
|
1048
2895
|
type: "text",
|
|
1049
|
-
text: `${exists ? "Updated" : "Created"}
|
|
2896
|
+
text: `${exists ? "Updated" : "Created"} reference: references/${platform.toLowerCase()}.json\n\n` +
|
|
1050
2897
|
`URLs found: ${urlSet.size}\n` +
|
|
1051
2898
|
`Selectors found: ${Object.keys(pageSelectors).length}\n` +
|
|
1052
2899
|
`Errors documented: ${domainErrors.length}\n` +
|
|
@@ -1057,12 +2904,181 @@ server.tool("export_playbook", "Generate a playbook JSON from your session. Extr
|
|
|
1057
2904
|
};
|
|
1058
2905
|
});
|
|
1059
2906
|
// ═══════════════════════════════════════════════
|
|
2907
|
+
// PLAYBOOK RECORD — macro recorder for MCP tool calls
|
|
2908
|
+
// ═══════════════════════════════════════════════
|
|
2909
|
+
server.tool("playbook_record", "Macro recorder: start recording, do the flow, stop to save as executable playbook. Captures every click/type/navigate tool call as a PlaybookStep.", {
|
|
2910
|
+
action: z.enum(["start", "stop", "cancel", "status"]).describe("start/stop/cancel/status"),
|
|
2911
|
+
platform: z.string().optional().describe("Platform name (required for start)"),
|
|
2912
|
+
name: z.string().optional().describe("Playbook name (required for stop)"),
|
|
2913
|
+
description: z.string().optional().describe("Playbook description (for stop)"),
|
|
2914
|
+
cdpPort: z.number().optional().describe("CDP port if needed for browser_js steps (e.g. 9333 for Codex)"),
|
|
2915
|
+
}, async ({ action, platform, name, description, cdpPort }) => {
|
|
2916
|
+
switch (action) {
|
|
2917
|
+
case "start": {
|
|
2918
|
+
if (!platform)
|
|
2919
|
+
return { content: [{ type: "text", text: "Error: platform is required for start" }] };
|
|
2920
|
+
if (mcpRecorder.isRecording)
|
|
2921
|
+
return { content: [{ type: "text", text: "Already recording. Call stop or cancel first." }] };
|
|
2922
|
+
mcpRecorder.start(platform, cdpPort ?? undefined);
|
|
2923
|
+
return { content: [{ type: "text", text: `Recording started for "${platform}". All subsequent tool calls will be captured.\nCall playbook_record(action="stop", name="...") when done.` }] };
|
|
2924
|
+
}
|
|
2925
|
+
case "stop": {
|
|
2926
|
+
if (!mcpRecorder.isRecording)
|
|
2927
|
+
return { content: [{ type: "text", text: "No active recording." }] };
|
|
2928
|
+
if (!name)
|
|
2929
|
+
return { content: [{ type: "text", text: "Error: name is required for stop" }] };
|
|
2930
|
+
const playbook = mcpRecorder.stop(name, description ?? name);
|
|
2931
|
+
// Track playbook export for teaching ability rating factor
|
|
2932
|
+
const pbBundleId = worldModel.getState().focusedApp?.bundleId;
|
|
2933
|
+
if (pbBundleId) {
|
|
2934
|
+
const pbMapData = appMap.getLoaded(pbBundleId);
|
|
2935
|
+
if (pbMapData) {
|
|
2936
|
+
pbMapData.playbooksExported = (pbMapData.playbooksExported ?? 0) + 1;
|
|
2937
|
+
appMap.save(pbMapData, true);
|
|
2938
|
+
}
|
|
2939
|
+
}
|
|
2940
|
+
const stepList = playbook.steps.map((s, i) => ` ${i + 1}. [${s.action}] ${s.description ?? ""}`).join("\n");
|
|
2941
|
+
return { content: [{ type: "text", text: `Playbook saved: playbooks/${playbook.id}.json (${playbook.steps.length} steps)\n\n${stepList}` }] };
|
|
2942
|
+
}
|
|
2943
|
+
case "cancel": {
|
|
2944
|
+
mcpRecorder.cancel();
|
|
2945
|
+
return { content: [{ type: "text", text: "Recording cancelled." }] };
|
|
2946
|
+
}
|
|
2947
|
+
case "status": {
|
|
2948
|
+
if (!mcpRecorder.isRecording)
|
|
2949
|
+
return { content: [{ type: "text", text: "Not recording." }] };
|
|
2950
|
+
const steps = mcpRecorder.getSteps().map((s, i) => ` ${i + 1}. [${s.action}] ${s.description ?? ""}`).join("\n");
|
|
2951
|
+
return { content: [{ type: "text", text: `Recording active: ${mcpRecorder.stepCount} steps captured\n${steps}` }] };
|
|
2952
|
+
}
|
|
2953
|
+
}
|
|
2954
|
+
});
|
|
2955
|
+
// ═══════════════════════════════════════════════
|
|
2956
|
+
// PLATFORM EXPLORE — autonomous app exploration
|
|
2957
|
+
// ═══════════════════════════════════════════════
|
|
2958
|
+
server.tool("platform_explore", "Autonomously explore an app or website. Maps all interactive elements, tries each one, records working selectors and broken paths. Outputs a reference JSON.", {
|
|
2959
|
+
platform: z.string().describe("Platform name for the output file, e.g. 'figma', 'canva'"),
|
|
2960
|
+
url: z.string().optional().describe("URL for web app. Requires Chrome with --remote-debugging-port."),
|
|
2961
|
+
bundleId: z.string().optional().describe("macOS bundle ID for native app, e.g. 'com.figma.Desktop'"),
|
|
2962
|
+
maxElements: z.number().optional().describe("Max elements to test (default: 30)"),
|
|
2963
|
+
tabId: z.string().optional().describe("Existing Chrome tab ID if page is already open"),
|
|
2964
|
+
}, async ({ platform, url, bundleId, maxElements, tabId }) => {
|
|
2965
|
+
const max = maxElements ?? 30;
|
|
2966
|
+
if (url || tabId) {
|
|
2967
|
+
// Web exploration via CDP
|
|
2968
|
+
const { CDP: cdp, port } = await ensureCDP();
|
|
2969
|
+
let targetId = tabId;
|
|
2970
|
+
if (!targetId) {
|
|
2971
|
+
if (url) {
|
|
2972
|
+
// Navigate to URL in a new tab
|
|
2973
|
+
const targets = await cdp.List({ port });
|
|
2974
|
+
const page = targets.find((t) => t.type === "page");
|
|
2975
|
+
if (!page)
|
|
2976
|
+
throw new Error("No Chrome tabs open");
|
|
2977
|
+
targetId = page.id;
|
|
2978
|
+
const client = await cdp({ port, target: targetId });
|
|
2979
|
+
await client.Page.enable();
|
|
2980
|
+
await client.Page.navigate({ url });
|
|
2981
|
+
await new Promise(r => setTimeout(r, 3000));
|
|
2982
|
+
await client.close();
|
|
2983
|
+
}
|
|
2984
|
+
}
|
|
2985
|
+
if (!targetId)
|
|
2986
|
+
throw new Error("No tab available");
|
|
2987
|
+
const client = await cdp({ port, target: targetId });
|
|
2988
|
+
await client.Runtime.enable();
|
|
2989
|
+
const evaluate = async (expr) => {
|
|
2990
|
+
return client.Runtime.evaluate({ expression: expr, returnByValue: true, awaitPromise: true });
|
|
2991
|
+
};
|
|
2992
|
+
// Discover elements
|
|
2993
|
+
const elements = await discoverWebElements(evaluate, max);
|
|
2994
|
+
// Test each element
|
|
2995
|
+
const tested = [];
|
|
2996
|
+
for (const el of elements) {
|
|
2997
|
+
const result = await testWebElement(evaluate, el);
|
|
2998
|
+
tested.push(result);
|
|
2999
|
+
await new Promise(r => setTimeout(r, 300 + Math.random() * 500));
|
|
3000
|
+
}
|
|
3001
|
+
await client.close();
|
|
3002
|
+
// Compile and save
|
|
3003
|
+
const result = compileReference(platform, "web", tested, url);
|
|
3004
|
+
const filePath = saveExploreResult(referencesDir, result);
|
|
3005
|
+
return { content: [{ type: "text", text: `Exploration complete: ${filePath}\n\nElements found: ${elements.length}\nTested: ${result.testedElements}\nWorking selectors: ${result.workingSelectors}\nErrors: ${result.errors.length}\n\nKey discoveries:\n${result.keyDiscoveries.map(d => ` - ${d}`).join("\n")}` }] };
|
|
3006
|
+
}
|
|
3007
|
+
else if (bundleId) {
|
|
3008
|
+
// Native app exploration via bridge
|
|
3009
|
+
await ensureBridge();
|
|
3010
|
+
const apps = await bridge.call("app.list");
|
|
3011
|
+
const app = apps.find(a => a.bundleId === bundleId);
|
|
3012
|
+
if (!app) {
|
|
3013
|
+
await bridge.call("app.launch", { bundleId });
|
|
3014
|
+
await new Promise(r => setTimeout(r, 3000));
|
|
3015
|
+
}
|
|
3016
|
+
const appList = await bridge.call("app.list");
|
|
3017
|
+
const target = appList.find(a => a.bundleId === bundleId);
|
|
3018
|
+
if (!target)
|
|
3019
|
+
throw new Error(`App ${bundleId} not running`);
|
|
3020
|
+
const elements = await discoverNativeElements(bridge, target.pid, max);
|
|
3021
|
+
// For native apps, we record discovery but don't auto-click (too risky)
|
|
3022
|
+
const result = compileReference(platform, "native", elements.map(el => ({
|
|
3023
|
+
...el, clickWorked: true, result: "discovered_not_tested",
|
|
3024
|
+
})), undefined, bundleId);
|
|
3025
|
+
const filePath = saveExploreResult(referencesDir, result);
|
|
3026
|
+
return { content: [{ type: "text", text: `Native app exploration complete: ${filePath}\n\nElements discovered: ${elements.length}\n(Native elements discovered but not auto-clicked for safety. Use playbook_record to test interactively.)` }] };
|
|
3027
|
+
}
|
|
3028
|
+
else {
|
|
3029
|
+
return { content: [{ type: "text", text: "Error: Provide either url (for web apps) or bundleId (for native apps)." }] };
|
|
3030
|
+
}
|
|
3031
|
+
});
|
|
3032
|
+
// ═══════════════════════════════════════════════
|
|
3033
|
+
// PLATFORM LEARN — scrape docs/help/shortcuts
|
|
3034
|
+
// ═══════════════════════════════════════════════
|
|
3035
|
+
server.tool("platform_learn", "Scrape official docs, help center, keyboard shortcuts for a platform. Crawls pages via Chrome and extracts structured data into a reference JSON.", {
|
|
3036
|
+
platform: z.string().describe("Platform name, e.g. 'figma', 'notion', 'slack'"),
|
|
3037
|
+
url: z.string().optional().describe("Root URL to start from. If omitted, guesses from platform name."),
|
|
3038
|
+
maxPages: z.number().optional().describe("Max pages to crawl (default: 5)"),
|
|
3039
|
+
}, async ({ platform, url, maxPages }) => {
|
|
3040
|
+
const max = maxPages ?? 5;
|
|
3041
|
+
const urls = buildDocUrls(platform, url);
|
|
3042
|
+
const { CDP: cdp, port } = await ensureCDP();
|
|
3043
|
+
const targets = await cdp.List({ port });
|
|
3044
|
+
const page = targets.find((t) => t.type === "page");
|
|
3045
|
+
if (!page)
|
|
3046
|
+
throw new Error("No Chrome tabs open. Open Chrome first.");
|
|
3047
|
+
const client = await cdp({ port, target: page.id });
|
|
3048
|
+
await client.Runtime.enable();
|
|
3049
|
+
await client.Page.enable();
|
|
3050
|
+
const crawled = [];
|
|
3051
|
+
let successCount = 0;
|
|
3052
|
+
for (const docUrl of urls) {
|
|
3053
|
+
if (successCount >= max)
|
|
3054
|
+
break;
|
|
3055
|
+
try {
|
|
3056
|
+
const result = await crawlPage(client, docUrl, 8000);
|
|
3057
|
+
if (result.success && result.content && result.content.text.length > 100) {
|
|
3058
|
+
crawled.push({ url: docUrl, content: result.content, ...(result.shortcuts ? { shortcuts: result.shortcuts } : {}), ...(result.selectors ? { selectors: result.selectors } : {}) });
|
|
3059
|
+
successCount++;
|
|
3060
|
+
}
|
|
3061
|
+
}
|
|
3062
|
+
catch {
|
|
3063
|
+
// Skip failed URLs silently
|
|
3064
|
+
}
|
|
3065
|
+
await new Promise(r => setTimeout(r, 1000 + Math.random() * 1000));
|
|
3066
|
+
}
|
|
3067
|
+
await client.close();
|
|
3068
|
+
if (crawled.length === 0) {
|
|
3069
|
+
return { content: [{ type: "text", text: `No documentation pages found for "${platform}". Try providing a specific URL.` }] };
|
|
3070
|
+
}
|
|
3071
|
+
const result = compileLearnResult(platform, crawled);
|
|
3072
|
+
const filePath = saveLearnResult(referencesDir, result);
|
|
3073
|
+
return { content: [{ type: "text", text: `Learning complete: ${filePath}\n\nPages crawled: ${crawled.length}\nShortcuts found: ${Object.keys(result.shortcuts).length}\nFeatures found: ${result.features.length}\nSelectors found: ${Object.values(result.selectors).reduce((n, g) => n + Object.keys(g).length, 0)}\nAPI endpoints: ${result.apiEndpoints.length}\nKnown limitations: ${result.knownLimitations.length}` }] };
|
|
3074
|
+
});
|
|
3075
|
+
// ═══════════════════════════════════════════════
|
|
1060
3076
|
// APPLESCRIPT — control scriptable apps directly
|
|
1061
3077
|
// ═══════════════════════════════════════════════
|
|
1062
3078
|
server.tool("applescript", "Run an AppleScript command. For controlling Finder, Safari, Mail, Notes, etc. (macOS only). WARNING: Executes arbitrary AppleScript — can perform destructive actions (delete files, send emails). All executions are audit-logged.", {
|
|
1063
3079
|
script: z.string().describe("AppleScript code to execute"),
|
|
1064
3080
|
}, async ({ script }) => {
|
|
1065
|
-
auditLog("applescript", { script
|
|
3081
|
+
auditLog("applescript", { script });
|
|
1066
3082
|
if (process.platform === "win32") {
|
|
1067
3083
|
return { content: [{ type: "text", text: "AppleScript is not supported on Windows. Use ui_tree, ui_press, and other accessibility tools instead." }] };
|
|
1068
3084
|
}
|
|
@@ -1190,6 +3206,15 @@ originalTool("session_claim", "Claim exclusive control of an app window. Prevent
|
|
|
1190
3206
|
app: z.string().describe("Bundle ID of the app (e.g., 'com.google.Chrome')"),
|
|
1191
3207
|
windowId: z.number().describe("Window ID to claim (get from 'windows' tool)"),
|
|
1192
3208
|
}, async ({ clientId, clientType, app, windowId }) => {
|
|
3209
|
+
// Validate window ID exists
|
|
3210
|
+
try {
|
|
3211
|
+
await ensureBridge();
|
|
3212
|
+
const wins = await bridge.call("window.list", {});
|
|
3213
|
+
if (wins && !wins.some((w) => w.windowId === windowId)) {
|
|
3214
|
+
return { content: [{ type: "text", text: `Window ${windowId} does not exist. Use the windows() tool to get valid window IDs.` }] };
|
|
3215
|
+
}
|
|
3216
|
+
}
|
|
3217
|
+
catch { /* best-effort validation — proceed if bridge unavailable */ }
|
|
1193
3218
|
// Use filesystem-backed lease manager directly (shared with daemon)
|
|
1194
3219
|
const lease = leaseManager.claim({ id: clientId, type: clientType, startedAt: new Date().toISOString() }, app, windowId);
|
|
1195
3220
|
if (!lease) {
|
|
@@ -1211,6 +3236,8 @@ originalTool("session_heartbeat", "Keep your session lease alive. Call every 60
|
|
|
1211
3236
|
originalTool("session_release", "Release your session lease so other clients can use the window.", {
|
|
1212
3237
|
sessionId: z.string().describe("Session ID to release"),
|
|
1213
3238
|
}, async ({ sessionId }) => {
|
|
3239
|
+
// Flush playbook learnings before releasing session
|
|
3240
|
+
contextTracker.flush();
|
|
1214
3241
|
// Use filesystem-backed lease manager directly (shared with daemon)
|
|
1215
3242
|
const released = leaseManager.release(sessionId);
|
|
1216
3243
|
return { content: [{ type: "text", text: released ? `Session ${sessionId} released.` : `Session ${sessionId} not found.` }] };
|
|
@@ -1397,6 +3424,18 @@ originalTool("recovery_queue_add", "Add a manual recovery instruction for a stal
|
|
|
1397
3424
|
type: z.enum(["nudge", "restart", "escalate", "custom"]).describe("Recovery type"),
|
|
1398
3425
|
instruction: z.string().describe("What to do (e.g., 'Click the login button', 'Restart Chrome')"),
|
|
1399
3426
|
}, async ({ sessionId, type, instruction }) => {
|
|
3427
|
+
// Validate that the session ID looks reasonable (basic format check)
|
|
3428
|
+
// Accept both lease-style (lease_*) and generic session IDs
|
|
3429
|
+
if (!sessionId || sessionId.length < 3 || sessionId.length > 200) {
|
|
3430
|
+
return { content: [{ type: "text", text: `Error: Invalid session ID "${sessionId}". Must be 3-200 characters.` }] };
|
|
3431
|
+
}
|
|
3432
|
+
// Validate session is active — reject orphaned recovery instructions
|
|
3433
|
+
const activeSessions = leaseManager.getActive();
|
|
3434
|
+
const isActive = activeSessions.some(s => s.sessionId === sessionId);
|
|
3435
|
+
if (!isActive) {
|
|
3436
|
+
return { content: [{ type: "text", text: `Session "${sessionId}" is not active. Use supervisor_status to find active sessions.` }] };
|
|
3437
|
+
}
|
|
3438
|
+
const warning = "";
|
|
1400
3439
|
const recovery = {
|
|
1401
3440
|
id: "recv_" + Date.now().toString(36) + "_" + Math.random().toString(36).slice(2, 8),
|
|
1402
3441
|
sessionId,
|
|
@@ -1409,9 +3448,19 @@ originalTool("recovery_queue_add", "Add a manual recovery instruction for a stal
|
|
|
1409
3448
|
};
|
|
1410
3449
|
// Write to daemon's filesystem state so the daemon picks it up
|
|
1411
3450
|
const recoveries = readDaemonRecoveries();
|
|
1412
|
-
|
|
1413
|
-
|
|
1414
|
-
|
|
3451
|
+
// Prune old completed/failed entries (keep last 50, drop entries older than 24h)
|
|
3452
|
+
const MAX_QUEUE_SIZE = 50;
|
|
3453
|
+
const MAX_AGE_MS = 24 * 60 * 60 * 1000;
|
|
3454
|
+
const cutoff = Date.now() - MAX_AGE_MS;
|
|
3455
|
+
const pruned = recoveries.filter((r) => {
|
|
3456
|
+
if (r.status === "pending")
|
|
3457
|
+
return true; // always keep pending
|
|
3458
|
+
const age = new Date(r.createdAt).getTime();
|
|
3459
|
+
return age > cutoff;
|
|
3460
|
+
}).slice(-MAX_QUEUE_SIZE);
|
|
3461
|
+
pruned.push(recovery);
|
|
3462
|
+
writeDaemonRecoveries(pruned);
|
|
3463
|
+
return { content: [{ type: "text", text: `Recovery queued: ${recovery.id} (type=${type})${warning}` }] };
|
|
1415
3464
|
});
|
|
1416
3465
|
originalTool("recovery_queue_list", "List recovery actions, optionally filtered by status.", {
|
|
1417
3466
|
status: z.enum(["pending", "attempted", "succeeded", "failed"]).optional().describe("Filter by status"),
|
|
@@ -1590,7 +3639,7 @@ originalTool("supervisor_uninstall", "Uninstall the supervisor system service. S
|
|
|
1590
3639
|
// EXECUTION CONTRACT — canonical fallback chain
|
|
1591
3640
|
// ═══════════════════════════════════════════════
|
|
1592
3641
|
import { METHOD_CAPABILITIES, DEFAULT_RETRY_POLICY, planExecution, executeWithFallback, } from "./src/runtime/execution-contract.js";
|
|
1593
|
-
|
|
3642
|
+
server.tool("execution_plan", "Show the execution plan for an action type. Returns the ordered fallback chain based on available infrastructure.", {
|
|
1594
3643
|
action: z.enum(["click", "type", "read", "locate", "select", "scroll"]).describe("Action type"),
|
|
1595
3644
|
}, async ({ action }) => {
|
|
1596
3645
|
const plan = planExecution(action, { hasBridge: true, hasCDP: cdpPort !== null });
|
|
@@ -1598,7 +3647,18 @@ originalTool("execution_plan", "Show the execution plan for an action type. Retu
|
|
|
1598
3647
|
const cap = METHOD_CAPABILITIES[method];
|
|
1599
3648
|
return `${i + 1}. ${method} (~${cap.avgLatencyMs}ms)${i === 0 ? " ← primary" : ""}`;
|
|
1600
3649
|
});
|
|
1601
|
-
|
|
3650
|
+
const policy = getAdaptedRetryPolicy();
|
|
3651
|
+
lines.push("", `Retry policy: ${policy.maxRetriesPerMethod}/method, ${policy.maxTotalRetries} total, escalate after ${policy.escalateAfter}, delay ${policy.delayBetweenRetriesMs}ms`);
|
|
3652
|
+
const appBundleId = worldModel.getState().focusedApp?.bundleId;
|
|
3653
|
+
if (appBundleId) {
|
|
3654
|
+
const budget = learningEngine.getAdaptiveBudget(appBundleId);
|
|
3655
|
+
lines.push(`Adaptive budgets: locate=${budget.locateMs}ms, act=${budget.actMs}ms, verify=${budget.verifyMs}ms`);
|
|
3656
|
+
}
|
|
3657
|
+
// Include app-specific hints from reference files and context tracker
|
|
3658
|
+
const hints = contextTracker.getHints(action, {});
|
|
3659
|
+
if (hints.length > 0) {
|
|
3660
|
+
lines.push("", "App-specific context:", ...hints.slice(0, 5));
|
|
3661
|
+
}
|
|
1602
3662
|
return { content: [{ type: "text", text: `Execution plan for "${action}":\n${lines.join("\n")}` }] };
|
|
1603
3663
|
});
|
|
1604
3664
|
// ── Shared helpers for resilient action tools ──
|
|
@@ -1623,6 +3683,21 @@ async function resolvePid(bundleId) {
|
|
|
1623
3683
|
function infra() {
|
|
1624
3684
|
return { hasBridge: true, hasCDP: cdpPort !== null };
|
|
1625
3685
|
}
|
|
3686
|
+
/**
|
|
3687
|
+
* Get a retry policy adapted by the learning engine's adaptive budgets.
|
|
3688
|
+
* If the learning engine shows the current app responds quickly, reduce retry delays.
|
|
3689
|
+
*/
|
|
3690
|
+
function getAdaptedRetryPolicy() {
|
|
3691
|
+
if (!currentAdaptiveBudget)
|
|
3692
|
+
return DEFAULT_RETRY_POLICY;
|
|
3693
|
+
// Use the max of locate+act as a guide for retry delay — faster apps need shorter delays
|
|
3694
|
+
const typicalMs = Math.max(currentAdaptiveBudget.locateMs, currentAdaptiveBudget.actMs);
|
|
3695
|
+
// Retry delay = max(100ms, typical * 1.5), capped at the default
|
|
3696
|
+
const adaptedDelay = Math.min(DEFAULT_RETRY_POLICY.delayBetweenRetriesMs, Math.max(100, Math.ceil(typicalMs * 1.5)));
|
|
3697
|
+
if (adaptedDelay === DEFAULT_RETRY_POLICY.delayBetweenRetriesMs)
|
|
3698
|
+
return DEFAULT_RETRY_POLICY;
|
|
3699
|
+
return { ...DEFAULT_RETRY_POLICY, delayBetweenRetriesMs: adaptedDelay };
|
|
3700
|
+
}
|
|
1626
3701
|
function formatResult(action, target, result) {
|
|
1627
3702
|
if (result.ok) {
|
|
1628
3703
|
const fallbackNote = result.fallbackFrom ? ` (fell back from ${result.fallbackFrom})` : "";
|
|
@@ -1631,7 +3706,7 @@ function formatResult(action, target, result) {
|
|
|
1631
3706
|
return { content: [{ type: "text", text: `Failed to ${action} "${target}" — all methods exhausted. Last error: ${result.error}` }] };
|
|
1632
3707
|
}
|
|
1633
3708
|
// ── click_with_fallback ──
|
|
1634
|
-
|
|
3709
|
+
server.tool("click_with_fallback", "Click a target by text using the canonical fallback chain: AX → CDP → OCR. Automatically retries and falls through methods.", {
|
|
1635
3710
|
target: z.string().describe("Text, title, or identifier of the element to click"),
|
|
1636
3711
|
bundleId: z.string().optional().describe("App bundle ID (for AX path)"),
|
|
1637
3712
|
}, async ({ target, bundleId }) => {
|
|
@@ -1639,17 +3714,27 @@ originalTool("click_with_fallback", "Click a target by text using the canonical
|
|
|
1639
3714
|
const plan = planExecution("click", infra())
|
|
1640
3715
|
.filter((m) => m !== "coordinates");
|
|
1641
3716
|
const targetPid = await resolvePid(bundleId);
|
|
1642
|
-
const result = await executeWithFallback("click", plan,
|
|
3717
|
+
const result = await executeWithFallback("click", plan, getAdaptedRetryPolicy(), async (method, attempt) => {
|
|
1643
3718
|
const start = Date.now();
|
|
1644
3719
|
try {
|
|
1645
3720
|
switch (method) {
|
|
1646
3721
|
case "ax": {
|
|
1647
|
-
//
|
|
1648
|
-
|
|
1649
|
-
|
|
1650
|
-
|
|
1651
|
-
|
|
1652
|
-
|
|
3722
|
+
// L2-65 fix: Try exact match first to avoid wrong-window match on minimized windows
|
|
3723
|
+
let found;
|
|
3724
|
+
try {
|
|
3725
|
+
found = await bridge.call("ax.findElement", {
|
|
3726
|
+
pid: targetPid,
|
|
3727
|
+
title: target,
|
|
3728
|
+
exact: true,
|
|
3729
|
+
});
|
|
3730
|
+
}
|
|
3731
|
+
catch {
|
|
3732
|
+
found = await bridge.call("ax.findElement", {
|
|
3733
|
+
pid: targetPid,
|
|
3734
|
+
title: target,
|
|
3735
|
+
exact: false,
|
|
3736
|
+
});
|
|
3737
|
+
}
|
|
1653
3738
|
await bridge.call("ax.performAction", {
|
|
1654
3739
|
pid: targetPid,
|
|
1655
3740
|
elementPath: found.elementPath,
|
|
@@ -1710,7 +3795,7 @@ originalTool("click_with_fallback", "Click a target by text using the canonical
|
|
|
1710
3795
|
return formatResult("Clicked", target, result);
|
|
1711
3796
|
});
|
|
1712
3797
|
// ── type_with_fallback ──
|
|
1713
|
-
|
|
3798
|
+
server.tool("type_with_fallback", "Type text into a target field using the canonical fallback chain: AX → CDP → coordinates. Finds the field by label/placeholder, focuses it, then types.", {
|
|
1714
3799
|
target: z.string().describe("Label, placeholder, or title of the field to type into"),
|
|
1715
3800
|
text: z.string().describe("Text to type"),
|
|
1716
3801
|
bundleId: z.string().optional().describe("App bundle ID"),
|
|
@@ -1719,16 +3804,81 @@ originalTool("type_with_fallback", "Type text into a target field using the cano
|
|
|
1719
3804
|
await ensureBridge();
|
|
1720
3805
|
const plan = planExecution("type", infra());
|
|
1721
3806
|
const targetPid = await resolvePid(bundleId);
|
|
1722
|
-
const result = await executeWithFallback("type", plan,
|
|
3807
|
+
const result = await executeWithFallback("type", plan, getAdaptedRetryPolicy(), async (method, attempt) => {
|
|
1723
3808
|
const start = Date.now();
|
|
1724
3809
|
try {
|
|
1725
3810
|
switch (method) {
|
|
1726
3811
|
case "ax": {
|
|
1727
|
-
|
|
1728
|
-
|
|
1729
|
-
|
|
1730
|
-
|
|
1731
|
-
|
|
3812
|
+
// L2-65 fix: Try exact match first to avoid wrong-window match on minimized windows
|
|
3813
|
+
let found;
|
|
3814
|
+
try {
|
|
3815
|
+
found = await bridge.call("ax.findElement", {
|
|
3816
|
+
pid: targetPid,
|
|
3817
|
+
title: target,
|
|
3818
|
+
exact: true,
|
|
3819
|
+
});
|
|
3820
|
+
}
|
|
3821
|
+
catch {
|
|
3822
|
+
found = await bridge.call("ax.findElement", {
|
|
3823
|
+
pid: targetPid,
|
|
3824
|
+
title: target,
|
|
3825
|
+
exact: false,
|
|
3826
|
+
});
|
|
3827
|
+
}
|
|
3828
|
+
// L2-62+L2-68 fix: If matched element is a window (short elementPath), find
|
|
3829
|
+
// the child AXTextArea/AXTextField SCOPED to the target window.
|
|
3830
|
+
const isLikelyWindow = found.elementPath.length <= 1;
|
|
3831
|
+
if (isLikelyWindow) {
|
|
3832
|
+
// Try window-scoped search first via getElementTree
|
|
3833
|
+
let scopedFound = false;
|
|
3834
|
+
try {
|
|
3835
|
+
const wins = await bridge.call("app.windows");
|
|
3836
|
+
const matchWin = wins.find((w) => w.title === target) ?? wins.find((w) => w.title?.includes(target));
|
|
3837
|
+
if (matchWin?.windowId) {
|
|
3838
|
+
const windowTree = await bridge.call("ax.getElementTree", {
|
|
3839
|
+
pid: targetPid,
|
|
3840
|
+
windowId: matchWin.windowId,
|
|
3841
|
+
maxDepth: 8,
|
|
3842
|
+
});
|
|
3843
|
+
const findInTree = (node, path) => {
|
|
3844
|
+
if (node?.role && (node.role === "AXTextArea" || node.role === "AXTextField")) {
|
|
3845
|
+
return path;
|
|
3846
|
+
}
|
|
3847
|
+
if (node?.children && Array.isArray(node.children)) {
|
|
3848
|
+
for (let i = 0; i < node.children.length; i++) {
|
|
3849
|
+
const r = findInTree(node.children[i], [...path, i]);
|
|
3850
|
+
if (r)
|
|
3851
|
+
return r;
|
|
3852
|
+
}
|
|
3853
|
+
}
|
|
3854
|
+
return null;
|
|
3855
|
+
};
|
|
3856
|
+
const textPath = findInTree(windowTree, found.elementPath);
|
|
3857
|
+
if (textPath) {
|
|
3858
|
+
found = found.bounds
|
|
3859
|
+
? { elementPath: textPath, bounds: found.bounds }
|
|
3860
|
+
: { elementPath: textPath };
|
|
3861
|
+
scopedFound = true;
|
|
3862
|
+
}
|
|
3863
|
+
}
|
|
3864
|
+
}
|
|
3865
|
+
catch { /* fall through to unscoped search */ }
|
|
3866
|
+
// Fallback: unscoped search (original L2-62 behavior)
|
|
3867
|
+
if (!scopedFound) {
|
|
3868
|
+
for (const role of ["AXTextArea", "AXTextField"]) {
|
|
3869
|
+
try {
|
|
3870
|
+
const textEl = await bridge.call("ax.findElement", {
|
|
3871
|
+
pid: targetPid,
|
|
3872
|
+
role,
|
|
3873
|
+
maxDepth: 10,
|
|
3874
|
+
});
|
|
3875
|
+
found = textEl;
|
|
3876
|
+
break;
|
|
3877
|
+
}
|
|
3878
|
+
catch { /* try next role */ }
|
|
3879
|
+
}
|
|
3880
|
+
}
|
|
3881
|
+
}
|
|
1732
3882
|
if (clearFirst) {
|
|
1733
3883
|
await bridge.call("ax.setElementValue", { pid: targetPid, elementPath: found.elementPath, value: "" });
|
|
1734
3884
|
}
|
|
@@ -1758,8 +3908,9 @@ originalTool("type_with_fallback", "Type text into a target field using the cano
|
|
|
1758
3908
|
if (!evalResult.result?.value)
|
|
1759
3909
|
throw new Error("Field not found via CDP");
|
|
1760
3910
|
if (clearFirst) {
|
|
1761
|
-
|
|
1762
|
-
await Input.dispatchKeyEvent({ type: "
|
|
3911
|
+
const selectAllMod = process.platform === "darwin" ? 4 : 2; // Cmd on macOS, Ctrl on Windows/Linux
|
|
3912
|
+
await Input.dispatchKeyEvent({ type: "keyDown", key: "a", code: "KeyA", modifiers: selectAllMod });
|
|
3913
|
+
await Input.dispatchKeyEvent({ type: "keyUp", key: "a", code: "KeyA", modifiers: selectAllMod });
|
|
1763
3914
|
}
|
|
1764
3915
|
for (const char of text) {
|
|
1765
3916
|
await Input.dispatchKeyEvent({ type: "keyDown", key: char, text: char });
|
|
@@ -1781,28 +3932,103 @@ originalTool("type_with_fallback", "Type text into a target field using the cano
|
|
|
1781
3932
|
return formatResult("Typed into", target, result);
|
|
1782
3933
|
});
|
|
1783
3934
|
// ── read_with_fallback ──
|
|
1784
|
-
|
|
3935
|
+
server.tool("read_with_fallback", "Read text content from the screen or a specific element using the canonical fallback chain: AX → CDP → OCR. Returns the text found.", {
|
|
1785
3936
|
target: z.string().optional().describe("Element label/title to read from (omit for full-screen OCR)"),
|
|
1786
3937
|
bundleId: z.string().optional().describe("App bundle ID"),
|
|
1787
3938
|
}, async ({ target, bundleId }) => {
|
|
1788
3939
|
await ensureBridge();
|
|
1789
3940
|
const plan = planExecution("read", infra());
|
|
1790
3941
|
const targetPid = await resolvePid(bundleId);
|
|
1791
|
-
const result = await executeWithFallback("read", plan,
|
|
3942
|
+
const result = await executeWithFallback("read", plan, getAdaptedRetryPolicy(), async (method, attempt) => {
|
|
1792
3943
|
const start = Date.now();
|
|
1793
3944
|
try {
|
|
1794
3945
|
switch (method) {
|
|
1795
3946
|
case "ax": {
|
|
1796
3947
|
if (target) {
|
|
1797
|
-
|
|
1798
|
-
|
|
1799
|
-
|
|
1800
|
-
|
|
1801
|
-
|
|
3948
|
+
// L2-65 fix: Try exact match first to avoid reading from the wrong
|
|
3949
|
+
// window when multiple windows share a title prefix (e.g. "Untitled 39" vs "Untitled 40").
|
|
3950
|
+
// Minimized windows may be skipped by the bridge search, so an inexact match
|
|
3951
|
+
// can silently return a sibling window's content with no warning.
|
|
3952
|
+
let found;
|
|
3953
|
+
try {
|
|
3954
|
+
found = await bridge.call("ax.findElement", {
|
|
3955
|
+
pid: targetPid,
|
|
3956
|
+
title: target,
|
|
3957
|
+
exact: true,
|
|
3958
|
+
});
|
|
3959
|
+
}
|
|
3960
|
+
catch {
|
|
3961
|
+
// Exact match failed — fall back to fuzzy match
|
|
3962
|
+
found = await bridge.call("ax.findElement", {
|
|
3963
|
+
pid: targetPid,
|
|
3964
|
+
title: target,
|
|
3965
|
+
exact: false,
|
|
3966
|
+
});
|
|
3967
|
+
}
|
|
1802
3968
|
const val = await bridge.call("ax.getElementValue", {
|
|
1803
3969
|
pid: targetPid,
|
|
1804
3970
|
elementPath: found.elementPath,
|
|
1805
3971
|
});
|
|
3972
|
+
// L2-59+L2-61+L2-68 fix: If matched element has no value (e.g. AXWindow), find a
|
|
3973
|
+
// text-bearing child element SCOPED to the target window.
|
|
3974
|
+
// L2-68: Previously used unscoped ax.findElement(role) which returned AXTextArea from
|
|
3975
|
+
// ANY window. Now uses ax.getElementTree(windowId) to scope the search.
|
|
3976
|
+
if (!val.value) {
|
|
3977
|
+
// Try to find the matching CG windowId by title
|
|
3978
|
+
let windowTree = null;
|
|
3979
|
+
try {
|
|
3980
|
+
const wins = await bridge.call("app.windows");
|
|
3981
|
+
const matchWin = wins.find((w) => w.title === target) ?? wins.find((w) => w.title?.includes(target));
|
|
3982
|
+
if (matchWin?.windowId) {
|
|
3983
|
+
windowTree = await bridge.call("ax.getElementTree", {
|
|
3984
|
+
pid: targetPid,
|
|
3985
|
+
windowId: matchWin.windowId,
|
|
3986
|
+
maxDepth: 8,
|
|
3987
|
+
});
|
|
3988
|
+
}
|
|
3989
|
+
}
|
|
3990
|
+
catch { /* fall through to unscoped search */ }
|
|
3991
|
+
// Walk the window tree to find first text-bearing element
|
|
3992
|
+
const textRoles = new Set(["AXTextArea", "AXTextField", "AXWebArea"]);
|
|
3993
|
+
const findTextInTree = (node, path) => {
|
|
3994
|
+
if (node?.role && textRoles.has(node.role) && node.value) {
|
|
3995
|
+
return { value: node.value, path };
|
|
3996
|
+
}
|
|
3997
|
+
if (node?.children && Array.isArray(node.children)) {
|
|
3998
|
+
for (let i = 0; i < node.children.length; i++) {
|
|
3999
|
+
const result = findTextInTree(node.children[i], [...path, i]);
|
|
4000
|
+
if (result)
|
|
4001
|
+
return result;
|
|
4002
|
+
}
|
|
4003
|
+
}
|
|
4004
|
+
return null;
|
|
4005
|
+
};
|
|
4006
|
+
if (windowTree) {
|
|
4007
|
+
const textNode = findTextInTree(windowTree, found.elementPath);
|
|
4008
|
+
if (textNode?.value) {
|
|
4009
|
+
return { ok: true, method, durationMs: Date.now() - start, fallbackFrom: null, retries: attempt, error: null, target: textNode.value };
|
|
4010
|
+
}
|
|
4011
|
+
}
|
|
4012
|
+
// Fallback: unscoped search (original L2-59 behavior) if window-scoped search fails
|
|
4013
|
+
const fallbackRoles = ["AXTextArea", "AXTextField", "AXWebArea"];
|
|
4014
|
+
for (const role of fallbackRoles) {
|
|
4015
|
+
try {
|
|
4016
|
+
const textEl = await bridge.call("ax.findElement", {
|
|
4017
|
+
pid: targetPid,
|
|
4018
|
+
role,
|
|
4019
|
+
maxDepth: 10,
|
|
4020
|
+
});
|
|
4021
|
+
const textVal = await bridge.call("ax.getElementValue", {
|
|
4022
|
+
pid: targetPid,
|
|
4023
|
+
elementPath: textEl.elementPath,
|
|
4024
|
+
});
|
|
4025
|
+
if (textVal.value) {
|
|
4026
|
+
return { ok: true, method, durationMs: Date.now() - start, fallbackFrom: null, retries: attempt, error: null, target: textVal.value };
|
|
4027
|
+
}
|
|
4028
|
+
}
|
|
4029
|
+
catch { /* try next role */ }
|
|
4030
|
+
}
|
|
4031
|
+
}
|
|
1806
4032
|
return { ok: true, method, durationMs: Date.now() - start, fallbackFrom: null, retries: attempt, error: null, target: val.value ?? "" };
|
|
1807
4033
|
}
|
|
1808
4034
|
// No specific target — get the full element tree text
|
|
@@ -1874,23 +4100,34 @@ originalTool("read_with_fallback", "Read text content from the screen or a speci
|
|
|
1874
4100
|
return { content: [{ type: "text", text: `Failed to read${target ? ` "${target}"` : ""} — all methods exhausted. Last error: ${result.error}` }] };
|
|
1875
4101
|
});
|
|
1876
4102
|
// ── locate_with_fallback ──
|
|
1877
|
-
|
|
4103
|
+
server.tool("locate_with_fallback", "Find an element's position on screen using the canonical fallback chain: AX → CDP → OCR. Returns bounds (x, y, width, height).", {
|
|
1878
4104
|
target: z.string().describe("Text, title, or identifier of the element to locate"),
|
|
1879
4105
|
bundleId: z.string().optional().describe("App bundle ID"),
|
|
1880
4106
|
}, async ({ target, bundleId }) => {
|
|
1881
4107
|
await ensureBridge();
|
|
1882
4108
|
const plan = planExecution("locate", infra());
|
|
1883
4109
|
const targetPid = await resolvePid(bundleId);
|
|
1884
|
-
const result = await executeWithFallback("locate", plan,
|
|
4110
|
+
const result = await executeWithFallback("locate", plan, getAdaptedRetryPolicy(), async (method, attempt) => {
|
|
1885
4111
|
const start = Date.now();
|
|
1886
4112
|
try {
|
|
1887
4113
|
switch (method) {
|
|
1888
4114
|
case "ax": {
|
|
1889
|
-
|
|
1890
|
-
|
|
1891
|
-
|
|
1892
|
-
|
|
1893
|
-
|
|
4115
|
+
// L2-65 fix: Try exact match first
|
|
4116
|
+
let found;
|
|
4117
|
+
try {
|
|
4118
|
+
found = await bridge.call("ax.findElement", {
|
|
4119
|
+
pid: targetPid,
|
|
4120
|
+
title: target,
|
|
4121
|
+
exact: true,
|
|
4122
|
+
});
|
|
4123
|
+
}
|
|
4124
|
+
catch {
|
|
4125
|
+
found = await bridge.call("ax.findElement", {
|
|
4126
|
+
pid: targetPid,
|
|
4127
|
+
title: target,
|
|
4128
|
+
exact: false,
|
|
4129
|
+
});
|
|
4130
|
+
}
|
|
1894
4131
|
if (!found.bounds)
|
|
1895
4132
|
throw new Error("Element found but has no bounds");
|
|
1896
4133
|
const b = found.bounds;
|
|
@@ -1946,7 +4183,7 @@ originalTool("locate_with_fallback", "Find an element's position on screen using
|
|
|
1946
4183
|
return formatResult("Located", target, result);
|
|
1947
4184
|
});
|
|
1948
4185
|
// ── select_with_fallback ──
|
|
1949
|
-
|
|
4186
|
+
server.tool("select_with_fallback", "Select an option from a dropdown/menu using the canonical fallback chain: AX → CDP. Finds the control, opens it, and picks the specified option.", {
|
|
1950
4187
|
target: z.string().describe("Label or title of the dropdown/menu control"),
|
|
1951
4188
|
option: z.string().describe("Text of the option to select"),
|
|
1952
4189
|
bundleId: z.string().optional().describe("App bundle ID"),
|
|
@@ -1954,7 +4191,7 @@ originalTool("select_with_fallback", "Select an option from a dropdown/menu usin
|
|
|
1954
4191
|
await ensureBridge();
|
|
1955
4192
|
const plan = planExecution("select", infra());
|
|
1956
4193
|
const targetPid = await resolvePid(bundleId);
|
|
1957
|
-
const result = await executeWithFallback("select", plan,
|
|
4194
|
+
const result = await executeWithFallback("select", plan, getAdaptedRetryPolicy(), async (method, attempt) => {
|
|
1958
4195
|
const start = Date.now();
|
|
1959
4196
|
try {
|
|
1960
4197
|
switch (method) {
|
|
@@ -2021,7 +4258,7 @@ originalTool("select_with_fallback", "Select an option from a dropdown/menu usin
|
|
|
2021
4258
|
return formatResult("Selected", `${target} → ${option}`, result);
|
|
2022
4259
|
});
|
|
2023
4260
|
// ── scroll_with_fallback ──
|
|
2024
|
-
|
|
4261
|
+
server.tool("scroll_with_fallback", "Scroll within an element or the active window using the canonical fallback chain: AX → CDP → coordinates. Scrolls until target text is visible, or by a fixed amount.", {
|
|
2025
4262
|
direction: z.enum(["up", "down", "left", "right"]).describe("Scroll direction"),
|
|
2026
4263
|
amount: z.number().optional().describe("Scroll amount in pixels (default: 300)"),
|
|
2027
4264
|
target: z.string().optional().describe("Scroll until this text is visible (overrides amount)"),
|
|
@@ -2031,6 +4268,17 @@ originalTool("scroll_with_fallback", "Scroll within an element or the active win
|
|
|
2031
4268
|
const plan = planExecution("scroll", infra());
|
|
2032
4269
|
const targetPid = await resolvePid(bundleId);
|
|
2033
4270
|
const scrollAmount = amount ?? 300;
|
|
4271
|
+
// Resolve scroll coordinates — center of the frontmost window
|
|
4272
|
+
let scrollX = 400, scrollY = 400;
|
|
4273
|
+
try {
|
|
4274
|
+
const wins = await bridge.call("cg.windows", {});
|
|
4275
|
+
if (wins && wins.length > 0) {
|
|
4276
|
+
const w = wins[0];
|
|
4277
|
+
scrollX = Math.round(w.x + w.width / 2);
|
|
4278
|
+
scrollY = Math.round(w.y + w.height / 2);
|
|
4279
|
+
}
|
|
4280
|
+
}
|
|
4281
|
+
catch { /* fallback to default coords */ }
|
|
2034
4282
|
// If target is specified, scroll in a loop until text is visible (max 10 scrolls)
|
|
2035
4283
|
if (target) {
|
|
2036
4284
|
for (let i = 0; i < 10; i++) {
|
|
@@ -2049,26 +4297,21 @@ originalTool("scroll_with_fallback", "Scroll within an element or the active win
|
|
|
2049
4297
|
// Scroll once
|
|
2050
4298
|
const deltaX = direction === "left" ? -scrollAmount : direction === "right" ? scrollAmount : 0;
|
|
2051
4299
|
const deltaY = direction === "up" ? -scrollAmount : direction === "down" ? scrollAmount : 0;
|
|
2052
|
-
await bridge.call("cg.scroll", { deltaX, deltaY });
|
|
4300
|
+
await bridge.call("cg.scroll", { x: scrollX, y: scrollY, deltaX, deltaY });
|
|
2053
4301
|
await new Promise((r) => setTimeout(r, 400));
|
|
2054
4302
|
}
|
|
2055
4303
|
return { content: [{ type: "text", text: `Scrolled ${direction} 10 times but "${target}" not found.` }] };
|
|
2056
4304
|
}
|
|
2057
4305
|
// Fixed-amount scroll via fallback chain
|
|
2058
|
-
const result = await executeWithFallback("scroll", plan,
|
|
4306
|
+
const result = await executeWithFallback("scroll", plan, getAdaptedRetryPolicy(), async (method, attempt) => {
|
|
2059
4307
|
const start = Date.now();
|
|
2060
4308
|
try {
|
|
2061
4309
|
const deltaX = direction === "left" ? -scrollAmount : direction === "right" ? scrollAmount : 0;
|
|
2062
4310
|
const deltaY = direction === "up" ? -scrollAmount : direction === "down" ? scrollAmount : 0;
|
|
2063
4311
|
switch (method) {
|
|
2064
4312
|
case "ax": {
|
|
2065
|
-
//
|
|
2066
|
-
|
|
2067
|
-
pid: targetPid,
|
|
2068
|
-
maxDepth: 1,
|
|
2069
|
-
});
|
|
2070
|
-
// Fall through to cg.scroll since AX scroll is less reliable
|
|
2071
|
-
await bridge.call("cg.scroll", { deltaX, deltaY });
|
|
4313
|
+
// AX scroll is unreliable — use CG scroll directly (works on the focused app)
|
|
4314
|
+
await bridge.call("cg.scroll", { x: scrollX, y: scrollY, deltaX, deltaY });
|
|
2072
4315
|
return { ok: true, method, durationMs: Date.now() - start, fallbackFrom: null, retries: attempt, error: null, target: `${direction} ${scrollAmount}px` };
|
|
2073
4316
|
}
|
|
2074
4317
|
case "cdp": {
|
|
@@ -2088,7 +4331,7 @@ originalTool("scroll_with_fallback", "Scroll within an element or the active win
|
|
|
2088
4331
|
}
|
|
2089
4332
|
}
|
|
2090
4333
|
case "coordinates": {
|
|
2091
|
-
await bridge.call("cg.scroll", { deltaX, deltaY });
|
|
4334
|
+
await bridge.call("cg.scroll", { x: scrollX, y: scrollY, deltaX, deltaY });
|
|
2092
4335
|
return { ok: true, method, durationMs: Date.now() - start, fallbackFrom: null, retries: attempt, error: null, target: `${direction} ${scrollAmount}px` };
|
|
2093
4336
|
}
|
|
2094
4337
|
}
|
|
@@ -2101,7 +4344,7 @@ originalTool("scroll_with_fallback", "Scroll within an element or the active win
|
|
|
2101
4344
|
return formatResult("Scrolled", `${direction} ${scrollAmount}px`, result);
|
|
2102
4345
|
});
|
|
2103
4346
|
// ── wait_for_state ──
|
|
2104
|
-
|
|
4347
|
+
server.tool("wait_for_state", "Wait until a condition is met on screen: text appears, text disappears, or element becomes available. Polls at intervals using the fallback chain.", {
|
|
2105
4348
|
condition: z.enum(["text_appears", "text_disappears", "element_exists"]).describe("What to wait for"),
|
|
2106
4349
|
target: z.string().describe("Text or element to watch for"),
|
|
2107
4350
|
timeoutMs: z.number().optional().describe("Maximum wait time in ms (default: 10000)"),
|
|
@@ -2123,13 +4366,29 @@ originalTool("wait_for_state", "Wait until a condition is met on screen: text ap
|
|
|
2123
4366
|
found = true;
|
|
2124
4367
|
}
|
|
2125
4368
|
else {
|
|
2126
|
-
//
|
|
2127
|
-
|
|
2128
|
-
|
|
2129
|
-
|
|
2130
|
-
|
|
2131
|
-
}
|
|
2132
|
-
|
|
4369
|
+
// L2-67 fix: Try AX text search first (works for non-frontmost apps),
|
|
4370
|
+
// then fall back to OCR if AX doesn't find it.
|
|
4371
|
+
try {
|
|
4372
|
+
const axEl = await bridge.call("ax.findElement", { pid: targetPid, title: target, exact: false });
|
|
4373
|
+
found = true;
|
|
4374
|
+
}
|
|
4375
|
+
catch {
|
|
4376
|
+
// AX title search failed — also try reading text content via AX tree
|
|
4377
|
+
try {
|
|
4378
|
+
const tree = await bridge.call("ax.getElementTree", { pid: targetPid, maxDepth: 4 });
|
|
4379
|
+
const desc = tree.description ?? JSON.stringify(tree);
|
|
4380
|
+
found = desc.includes(target);
|
|
4381
|
+
}
|
|
4382
|
+
catch {
|
|
4383
|
+
// AX unavailable — fall back to OCR
|
|
4384
|
+
const shot = await bridge.call("cg.captureScreen", {});
|
|
4385
|
+
const matches = await bridge.call("vision.findText", {
|
|
4386
|
+
imagePath: shot.path,
|
|
4387
|
+
searchText: target,
|
|
4388
|
+
});
|
|
4389
|
+
found = Array.isArray(matches) && matches.length > 0;
|
|
4390
|
+
}
|
|
4391
|
+
}
|
|
2133
4392
|
}
|
|
2134
4393
|
}
|
|
2135
4394
|
catch {
|
|
@@ -2172,13 +4431,13 @@ originalTool("wait_for_state", "Wait until a condition is met on screen: text ap
|
|
|
2172
4431
|
// ═══════════════════════════════════════════════
|
|
2173
4432
|
// JOBS — persistent multi-step automation with resume
|
|
2174
4433
|
// ═══════════════════════════════════════════════
|
|
2175
|
-
originalTool("job_create", "Create a new automation job. Jobs persist across restarts and can be resumed from the last successful step.", {
|
|
4434
|
+
originalTool("job_create", "Create a new automation job. Jobs persist across restarts and can be resumed from the last successful step. Supports chaining: set dependsOn to wait for another job, and vars for template substitution (e.g. {PROMPT_TEXT}).", {
|
|
2176
4435
|
task: z.string().describe("Human-readable description of what this job should do"),
|
|
2177
4436
|
playbookId: z.string().optional().describe("Playbook ID to drive this job (optional — AI-only if omitted)"),
|
|
2178
4437
|
bundleId: z.string().optional().describe("Target application bundle ID (e.g., 'com.apple.Safari'). Omit for app-agnostic jobs."),
|
|
2179
4438
|
windowId: z.number().optional().describe("Target window ID within the application. Omit for app-agnostic jobs."),
|
|
2180
4439
|
steps: z.array(z.object({
|
|
2181
|
-
action: z.string().describe("Action name (e.g., navigate, click, type_text, screenshot, key)"),
|
|
4440
|
+
action: z.string().describe("Action name (e.g., navigate, click, type_text, screenshot, key, browser_js, cdp_key_event)"),
|
|
2182
4441
|
target: z.string().optional().describe("Target element or URL"),
|
|
2183
4442
|
description: z.string().optional().describe("Human-readable description"),
|
|
2184
4443
|
text: z.string().optional().describe("Text payload for type_text/type_into actions"),
|
|
@@ -2189,7 +4448,10 @@ originalTool("job_create", "Create a new automation job. Jobs persist across res
|
|
|
2189
4448
|
priority: z.number().optional().describe("Priority (lower = higher priority, default: 10)"),
|
|
2190
4449
|
maxRetries: z.number().optional().describe("Max retry attempts on failure (default: 3)"),
|
|
2191
4450
|
sessionId: z.string().optional().describe("Bind to an existing supervisor session"),
|
|
2192
|
-
|
|
4451
|
+
chainId: z.string().optional().describe("Chain ID to group linked jobs into a flow"),
|
|
4452
|
+
dependsOn: z.string().optional().describe("Job ID this job depends on — won't run until dependency is done"),
|
|
4453
|
+
vars: z.record(z.string(), z.string()).optional().describe("Variables for template substitution in playbook steps (e.g. {PROMPT_TEXT} → 'hello world'). Use {prev.outputKey} to reference outputs from dependsOn job."),
|
|
4454
|
+
}, async ({ task, playbookId, bundleId, windowId, steps, tags, priority, maxRetries, sessionId, chainId, dependsOn, vars }) => {
|
|
2193
4455
|
const createOpts = { task };
|
|
2194
4456
|
if (playbookId !== undefined)
|
|
2195
4457
|
createOpts.playbookId = playbookId;
|
|
@@ -2207,8 +4469,50 @@ originalTool("job_create", "Create a new automation job. Jobs persist across res
|
|
|
2207
4469
|
createOpts.maxRetries = maxRetries;
|
|
2208
4470
|
if (sessionId !== undefined)
|
|
2209
4471
|
createOpts.sessionId = sessionId;
|
|
4472
|
+
if (chainId !== undefined)
|
|
4473
|
+
createOpts.chainId = chainId;
|
|
4474
|
+
if (dependsOn !== undefined)
|
|
4475
|
+
createOpts.dependsOn = dependsOn;
|
|
4476
|
+
if (vars !== undefined)
|
|
4477
|
+
createOpts.vars = vars;
|
|
2210
4478
|
const job = jobManager.create(createOpts);
|
|
2211
|
-
|
|
4479
|
+
const extra = [];
|
|
4480
|
+
if (job.chainId)
|
|
4481
|
+
extra.push(`Chain: ${job.chainId}`);
|
|
4482
|
+
if (job.dependsOn)
|
|
4483
|
+
extra.push(`Depends on: ${job.dependsOn}`);
|
|
4484
|
+
if (job.vars && Object.keys(job.vars).length > 0)
|
|
4485
|
+
extra.push(`Vars: ${Object.keys(job.vars).join(", ")}`);
|
|
4486
|
+
return { content: [{ type: "text", text: `Job created: ${job.id}\nTask: ${job.task}\nState: ${job.state}\nSteps: ${job.steps.length}\nPriority: ${job.priority}\nTarget: ${job.bundleId ?? "(any app)"}${job.windowId != null ? ` window ${job.windowId}` : ""}${extra.length > 0 ? "\n" + extra.join("\n") : ""}` }] };
|
|
4487
|
+
});
|
|
4488
|
+
originalTool("job_create_chain", "Create a chain of linked jobs that run sequentially. Each job waits for the previous one to finish. Use vars with {prev.outputKey} to pass data between jobs.", {
|
|
4489
|
+
jobs: z.array(z.object({
|
|
4490
|
+
task: z.string().describe("What this job does"),
|
|
4491
|
+
playbookId: z.string().optional().describe("Playbook ID"),
|
|
4492
|
+
bundleId: z.string().optional().describe("Target app bundle ID"),
|
|
4493
|
+
vars: z.record(z.string(), z.string()).optional().describe("Variables — use {prev.Read_Codex_response} to get output from prior job step"),
|
|
4494
|
+
tags: z.array(z.string()).optional(),
|
|
4495
|
+
})).describe("Ordered list of jobs to chain"),
|
|
4496
|
+
}, async ({ jobs }) => {
|
|
4497
|
+
const cleanJobs = jobs.map(j => {
|
|
4498
|
+
const clean = { task: j.task };
|
|
4499
|
+
if (j.playbookId)
|
|
4500
|
+
clean.playbookId = j.playbookId;
|
|
4501
|
+
if (j.bundleId)
|
|
4502
|
+
clean.bundleId = j.bundleId;
|
|
4503
|
+
if (j.vars)
|
|
4504
|
+
clean.vars = j.vars;
|
|
4505
|
+
if (j.tags)
|
|
4506
|
+
clean.tags = j.tags;
|
|
4507
|
+
return clean;
|
|
4508
|
+
});
|
|
4509
|
+
const chain = jobManager.createChain({ jobs: cleanJobs });
|
|
4510
|
+
const lines = [`Chain created: ${chain[0]?.chainId ?? "unknown"} (${chain.length} jobs)`];
|
|
4511
|
+
for (const job of chain) {
|
|
4512
|
+
lines.push(` ${job.id}: ${job.task}${job.dependsOn ? ` (after ${job.dependsOn})` : " (first)"}`);
|
|
4513
|
+
}
|
|
4514
|
+
lines.push("", "Run with: job_run_all() to execute the full chain sequentially.");
|
|
4515
|
+
return { content: [{ type: "text", text: lines.join("\n") }] };
|
|
2212
4516
|
});
|
|
2213
4517
|
originalTool("job_status", "Get detailed status of a job including step progress and resume point.", {
|
|
2214
4518
|
jobId: z.string().describe("Job ID"),
|
|
@@ -2232,6 +4536,12 @@ originalTool("job_status", "Get detailed status of a job including step progress
|
|
|
2232
4536
|
`Resume point: ${resume ? `step ${resume.stepIndex} — ${resume.step.description ?? resume.step.action}` : "(none — all done or no pending steps)"}`,
|
|
2233
4537
|
`Retries: ${job.retries}/${job.maxRetries}`,
|
|
2234
4538
|
];
|
|
4539
|
+
if (job.chainId)
|
|
4540
|
+
lines.push(`Chain: ${job.chainId}`);
|
|
4541
|
+
if (job.dependsOn)
|
|
4542
|
+
lines.push(`Depends on: ${job.dependsOn}`);
|
|
4543
|
+
if (job.vars && Object.keys(job.vars).length > 0)
|
|
4544
|
+
lines.push(`Vars: ${JSON.stringify(job.vars)}`);
|
|
2235
4545
|
if (job.blockReason)
|
|
2236
4546
|
lines.push(`Block reason: ${job.blockReason}`);
|
|
2237
4547
|
if (job.lastError)
|
|
@@ -2245,6 +4555,8 @@ originalTool("job_status", "Get detailed status of a job including step progress
|
|
|
2245
4555
|
for (const s of job.steps) {
|
|
2246
4556
|
const icon = s.status === "done" ? "✓" : s.status === "failed" ? "✗" : s.status === "skipped" ? "–" : "○";
|
|
2247
4557
|
lines.push(` ${icon} [${s.index}] ${s.description ?? s.action}${s.error ? ` (${s.error})` : ""}${s.durationMs != null ? ` ${s.durationMs}ms` : ""}`);
|
|
4558
|
+
if (s.output)
|
|
4559
|
+
lines.push(` → ${s.output.substring(0, 200)}${s.output.length > 200 ? "..." : ""}`);
|
|
2248
4560
|
}
|
|
2249
4561
|
}
|
|
2250
4562
|
return { content: [{ type: "text", text: lines.join("\n") }] };
|
|
@@ -2342,22 +4654,42 @@ originalTool("job_remove", "Remove a job entirely (any state).", {
|
|
|
2342
4654
|
return { content: [{ type: "text", text: ok ? `Job ${jobId} removed.` : `Job ${jobId} not found.` }] };
|
|
2343
4655
|
});
|
|
2344
4656
|
// ── Job Runner + Worker ─────────────────────────
|
|
2345
|
-
const PLAYBOOKS_DIR =
|
|
4657
|
+
const PLAYBOOKS_DIR = playbooksDir; // Use same dir as recorder (project-local ./playbooks/)
|
|
2346
4658
|
let activeJobRunner = null;
|
|
4659
|
+
let activePlaybookStore = null;
|
|
4660
|
+
let activePlaybookEngine = null;
|
|
2347
4661
|
function getJobRunner() {
|
|
4662
|
+
// Always reload playbooks from disk (new files may have been added)
|
|
4663
|
+
if (!activePlaybookStore) {
|
|
4664
|
+
activePlaybookStore = new PlaybookStore(PLAYBOOKS_DIR);
|
|
4665
|
+
}
|
|
4666
|
+
activePlaybookStore.load();
|
|
2348
4667
|
if (!activeJobRunner) {
|
|
2349
4668
|
// Build playbook engine stack: adapter → runtime → engine
|
|
2350
4669
|
const adapter = new AccessibilityAdapter(bridge);
|
|
2351
4670
|
const logger = new TimelineLogger();
|
|
2352
|
-
const
|
|
4671
|
+
const locCache = new LocatorCache();
|
|
4672
|
+
locCache.setLearningEngine(learningEngine);
|
|
4673
|
+
const runtimeService = new AutomationRuntimeService(adapter, logger, locCache);
|
|
2353
4674
|
const playbookEngine = new PlaybookEngine(runtimeService);
|
|
2354
|
-
|
|
2355
|
-
|
|
4675
|
+
activePlaybookEngine = playbookEngine;
|
|
4676
|
+
// Wire CDP into playbook engine for browser_js / cdp_key_event steps
|
|
4677
|
+
playbookEngine.setCDPConnect(async (overridePort) => {
|
|
4678
|
+
if (overridePort) {
|
|
4679
|
+
if (!CDP)
|
|
4680
|
+
CDP = (await import("chrome-remote-interface")).default;
|
|
4681
|
+
const client = await CDP({ port: overridePort });
|
|
4682
|
+
return { Runtime: client.Runtime, Input: client.Input, close: () => client.close() };
|
|
4683
|
+
}
|
|
4684
|
+
const { CDP: CDPClient, port } = await ensureCDP();
|
|
4685
|
+
const client = await CDPClient({ port });
|
|
4686
|
+
return { Runtime: client.Runtime, Input: client.Input, close: () => client.close() };
|
|
4687
|
+
});
|
|
2356
4688
|
activeJobRunner = new JobRunner(bridge, jobManager, leaseManager, supervisor, (() => {
|
|
2357
4689
|
const cfg = {
|
|
2358
4690
|
hasCDP: cdpPort !== null,
|
|
2359
4691
|
playbookEngine,
|
|
2360
|
-
playbookStore,
|
|
4692
|
+
playbookStore: activePlaybookStore,
|
|
2361
4693
|
runtimeService,
|
|
2362
4694
|
};
|
|
2363
4695
|
if (cdpPort) {
|
|
@@ -2485,243 +4817,1056 @@ originalTool("worker_status", "Get the current status of the worker daemon (read
|
|
|
2485
4817
|
return { content: [{ type: "text", text: lines.join("\n") }] };
|
|
2486
4818
|
});
|
|
2487
4819
|
// ═══════════════════════════════════════════════
|
|
2488
|
-
//
|
|
4820
|
+
// PLANNER — goal-oriented planning
|
|
2489
4821
|
// ═══════════════════════════════════════════════
|
|
2490
|
-
|
|
2491
|
-
|
|
2492
|
-
|
|
2493
|
-
const
|
|
2494
|
-
|
|
2495
|
-
|
|
2496
|
-
const
|
|
2497
|
-
|
|
2498
|
-
|
|
2499
|
-
|
|
2500
|
-
return { running: false, pid: null };
|
|
2501
|
-
const pid = Number(fs.readFileSync(MONITOR_PID, "utf-8").trim());
|
|
2502
|
-
// Check if process is alive
|
|
2503
|
-
process.kill(pid, 0);
|
|
2504
|
-
return { running: true, pid };
|
|
4822
|
+
originalTool("plan_goal", "Create a goal and generate an execution plan. Returns the plan source (playbook/strategy/llm), steps, and confidence. Does NOT execute — use the returned plan for review or pass to job system.", {
|
|
4823
|
+
goal: z.string().describe("What you want to achieve (e.g. 'Export Premiere Pro timeline as H.264')"),
|
|
4824
|
+
}, async ({ goal: goalDescription }) => {
|
|
4825
|
+
const goal = planner.createGoal(goalDescription);
|
|
4826
|
+
await planner.planGoal(goal);
|
|
4827
|
+
goalStore.add(goal);
|
|
4828
|
+
const sg = goal.subgoals[0];
|
|
4829
|
+
const plan = sg.plan;
|
|
4830
|
+
if (!plan) {
|
|
4831
|
+
return { content: [{ type: "text", text: "No plan could be generated." }] };
|
|
2505
4832
|
}
|
|
2506
|
-
|
|
2507
|
-
|
|
4833
|
+
const lines = [
|
|
4834
|
+
`Goal: ${goalDescription}`,
|
|
4835
|
+
`Plan source: ${plan.source}${plan.sourceId ? ` (${plan.sourceId})` : ""}`,
|
|
4836
|
+
`Confidence: ${(plan.confidence * 100).toFixed(0)}%`,
|
|
4837
|
+
`Steps: ${plan.steps.length}`,
|
|
4838
|
+
"",
|
|
4839
|
+
];
|
|
4840
|
+
for (let i = 0; i < plan.steps.length; i++) {
|
|
4841
|
+
const step = plan.steps[i];
|
|
4842
|
+
const params = Object.keys(step.params).length > 0
|
|
4843
|
+
? ` ${JSON.stringify(step.params)}`
|
|
4844
|
+
: "";
|
|
4845
|
+
const llmTag = step.requiresLLM ? " [LLM]" : "";
|
|
4846
|
+
const postcond = step.expectedPostcondition
|
|
4847
|
+
? ` → verify: ${step.expectedPostcondition.type}(${step.expectedPostcondition.target})`
|
|
4848
|
+
: "";
|
|
4849
|
+
lines.push(` ${i + 1}. ${step.tool || step.description}${params}${llmTag}${postcond}`);
|
|
2508
4850
|
}
|
|
2509
|
-
}
|
|
2510
|
-
|
|
2511
|
-
|
|
2512
|
-
|
|
2513
|
-
|
|
2514
|
-
|
|
4851
|
+
lines.push("", `Goal ID: ${goal.id}`);
|
|
4852
|
+
return {
|
|
4853
|
+
content: [{ type: "text", text: lines.join("\n") }],
|
|
4854
|
+
_meta: { goalId: goal.id, plan },
|
|
4855
|
+
};
|
|
4856
|
+
});
|
|
4857
|
+
originalTool("plan_execute", "Execute a goal's plan automatically. Runs deterministic steps internally. Pauses at LLM steps and returns the step description for you to resolve with plan_step_resolve. On completion, saves the strategy to memory for future reuse.", {
|
|
4858
|
+
goalId: z.string().describe("Goal ID from plan_goal"),
|
|
4859
|
+
}, async ({ goalId }) => {
|
|
4860
|
+
const goal = goalStore.get(goalId);
|
|
4861
|
+
if (!goal) {
|
|
4862
|
+
return { content: [{ type: "text", text: `Goal not found: ${goalId}` }] };
|
|
2515
4863
|
}
|
|
2516
|
-
|
|
2517
|
-
|
|
4864
|
+
const adaptiveBudget = learningEngine.getAdaptiveBudget(worldModel.getState().focusedApp?.bundleId ?? "unknown");
|
|
4865
|
+
const executor = new PlanExecutor(worldModel, planner, toolRegistry.toExecutor(), { postconditionWaitMs: adaptiveBudget.verifyMs, defaultStepTimeout: Math.max(30_000, adaptiveBudget.actMs * 2) }, recoveryEngine, learningEngine);
|
|
4866
|
+
const result = await executor.executeGoal(goal);
|
|
4867
|
+
goalStore.update(goalId, goal);
|
|
4868
|
+
// Check if paused at an LLM step
|
|
4869
|
+
if ("paused" in result) {
|
|
4870
|
+
const pause = result;
|
|
4871
|
+
return {
|
|
4872
|
+
content: [{ type: "text", text: [
|
|
4873
|
+
`PAUSED at step ${pause.stepIndex + 1}/${pause.totalSteps} — requires your interpretation.`,
|
|
4874
|
+
`Step: ${pause.stepDescription}`,
|
|
4875
|
+
"",
|
|
4876
|
+
"Use plan_step_resolve to provide the tool + params for this step,",
|
|
4877
|
+
"then call plan_execute again to continue.",
|
|
4878
|
+
].join("\n") }],
|
|
4879
|
+
_meta: { goalId, paused: true, stepIndex: pause.stepIndex },
|
|
4880
|
+
};
|
|
2518
4881
|
}
|
|
2519
|
-
|
|
2520
|
-
|
|
2521
|
-
|
|
2522
|
-
|
|
2523
|
-
|
|
2524
|
-
|
|
4882
|
+
// Completed — save strategy to memory if successful
|
|
4883
|
+
if (result.success) {
|
|
4884
|
+
try {
|
|
4885
|
+
const sg = goal.subgoals.find((s) => s.status === "completed");
|
|
4886
|
+
if (sg?.plan) {
|
|
4887
|
+
const steps = sg.plan.steps
|
|
4888
|
+
.filter((s) => s.status === "completed" && s.tool)
|
|
4889
|
+
.map((s) => ({ tool: s.tool, params: s.params }));
|
|
4890
|
+
if (steps.length > 0) {
|
|
4891
|
+
memory.appendStrategy({
|
|
4892
|
+
id: "str_plan_" + Date.now().toString(36),
|
|
4893
|
+
task: goal.description,
|
|
4894
|
+
steps,
|
|
4895
|
+
totalDurationMs: result.durationMs,
|
|
4896
|
+
successCount: 1,
|
|
4897
|
+
failCount: 0,
|
|
4898
|
+
lastUsed: new Date().toISOString(),
|
|
4899
|
+
tags: ["auto-plan", sg.plan.source],
|
|
4900
|
+
fingerprint: "",
|
|
4901
|
+
});
|
|
4902
|
+
}
|
|
4903
|
+
}
|
|
4904
|
+
}
|
|
4905
|
+
catch { /* strategy recording is best-effort */ }
|
|
2525
4906
|
}
|
|
2526
|
-
|
|
2527
|
-
|
|
4907
|
+
const lines = [
|
|
4908
|
+
result.success ? "Goal completed successfully." : `Goal failed: ${result.error}`,
|
|
4909
|
+
`Steps: ${result.stepsExecuted} executed, ${result.replans} replans`,
|
|
4910
|
+
`Duration: ${result.durationMs}ms`,
|
|
4911
|
+
`Subgoals: ${result.subgoalsCompleted}/${result.totalSubgoals} completed`,
|
|
4912
|
+
"",
|
|
4913
|
+
"── EXECUTION LOG ──",
|
|
4914
|
+
...("executionLog" in result ? result.executionLog : []),
|
|
4915
|
+
];
|
|
4916
|
+
return { content: [{ type: "text", text: lines.join("\n") }] };
|
|
4917
|
+
});
|
|
4918
|
+
originalTool("plan_step", "Execute the next single step of a goal. For incremental client-driven execution. Returns the step result, or pauses at LLM steps for you to interpret.", {
|
|
4919
|
+
goalId: z.string().describe("Goal ID from plan_goal"),
|
|
4920
|
+
}, async ({ goalId }) => {
|
|
4921
|
+
const goal = goalStore.get(goalId);
|
|
4922
|
+
if (!goal) {
|
|
4923
|
+
return { content: [{ type: "text", text: `Goal not found: ${goalId}` }] };
|
|
2528
4924
|
}
|
|
2529
|
-
|
|
2530
|
-
|
|
2531
|
-
|
|
2532
|
-
|
|
2533
|
-
|
|
2534
|
-
|
|
2535
|
-
|
|
2536
|
-
|
|
2537
|
-
|
|
2538
|
-
|
|
2539
|
-
|
|
2540
|
-
|
|
2541
|
-
|
|
2542
|
-
|
|
2543
|
-
|
|
2544
|
-
|
|
2545
|
-
|
|
2546
|
-
|
|
2547
|
-
|
|
2548
|
-
|
|
2549
|
-
|
|
2550
|
-
|
|
2551
|
-
|
|
2552
|
-
|
|
2553
|
-
|
|
2554
|
-
daemonArgs.push("--no-auto-assign");
|
|
2555
|
-
// Spawn detached daemon
|
|
2556
|
-
const child = spawn("npx", daemonArgs, {
|
|
2557
|
-
detached: true,
|
|
2558
|
-
stdio: "ignore",
|
|
2559
|
-
cwd: __dirname,
|
|
2560
|
-
});
|
|
2561
|
-
child.unref();
|
|
2562
|
-
const daemonPid = child.pid;
|
|
2563
|
-
// Wait a moment for daemon to start and write state
|
|
2564
|
-
await new Promise((r) => setTimeout(r, 3000));
|
|
2565
|
-
const state = readDaemonState();
|
|
2566
|
-
const terminalId = state?.terminals?.[0]?.id ?? "pending";
|
|
4925
|
+
const adaptiveBudget = learningEngine.getAdaptiveBudget(worldModel.getState().focusedApp?.bundleId ?? "unknown");
|
|
4926
|
+
const executor = new PlanExecutor(worldModel, planner, toolRegistry.toExecutor(), { postconditionWaitMs: adaptiveBudget.verifyMs, defaultStepTimeout: Math.max(30_000, adaptiveBudget.actMs * 2) }, recoveryEngine, learningEngine);
|
|
4927
|
+
const result = await executor.executeNextStep(goal);
|
|
4928
|
+
goalStore.update(goalId, goal);
|
|
4929
|
+
if ("paused" in result) {
|
|
4930
|
+
const pause = result;
|
|
4931
|
+
return {
|
|
4932
|
+
content: [{ type: "text", text: [
|
|
4933
|
+
`Step ${pause.stepIndex + 1}/${pause.totalSteps} requires LLM interpretation:`,
|
|
4934
|
+
` ${pause.stepDescription}`,
|
|
4935
|
+
"",
|
|
4936
|
+
"Use plan_step_resolve to provide tool + params, or execute the step yourself and call plan_step again.",
|
|
4937
|
+
].join("\n") }],
|
|
4938
|
+
};
|
|
4939
|
+
}
|
|
4940
|
+
if ("goalId" in result) {
|
|
4941
|
+
// PlanResult — goal completed
|
|
4942
|
+
return {
|
|
4943
|
+
content: [{ type: "text", text: result.success
|
|
4944
|
+
? `Goal completed: ${result.subgoalsCompleted}/${result.totalSubgoals} subgoals done.`
|
|
4945
|
+
: `Goal failed: ${result.error}` }],
|
|
4946
|
+
};
|
|
4947
|
+
}
|
|
4948
|
+
// StepResult
|
|
4949
|
+
const sr = result;
|
|
2567
4950
|
return {
|
|
2568
|
-
content: [{
|
|
2569
|
-
|
|
2570
|
-
|
|
2571
|
-
|
|
2572
|
-
|
|
2573
|
-
|
|
2574
|
-
`Window ID: ${windowId ?? "auto-detecting"}\n` +
|
|
2575
|
-
`Poll interval: ${pollIntervalMs ?? 3000}ms\n` +
|
|
2576
|
-
`Auto-assign: ${autoAssign !== false}\n` +
|
|
2577
|
-
`Log: ${MONITOR_LOG}\n` +
|
|
2578
|
-
`State: ${MONITOR_STATE}\n\n` +
|
|
2579
|
-
`The daemon runs independently — survives Claude Code restarts.\n` +
|
|
2580
|
-
`Use codex_monitor_status to check on it anytime.`,
|
|
2581
|
-
}],
|
|
4951
|
+
content: [{ type: "text", text: [
|
|
4952
|
+
sr.success ? `Step completed: ${sr.step.tool}` : `Step failed: ${sr.error}`,
|
|
4953
|
+
`Duration: ${sr.durationMs}ms`,
|
|
4954
|
+
sr.usedFallback ? "(used fallback tool)" : "",
|
|
4955
|
+
sr.postconditionMet ? "" : "Warning: postcondition not met",
|
|
4956
|
+
].filter(Boolean).join("\n") }],
|
|
2582
4957
|
};
|
|
2583
4958
|
});
|
|
2584
|
-
|
|
2585
|
-
|
|
2586
|
-
|
|
2587
|
-
|
|
2588
|
-
|
|
2589
|
-
const
|
|
4959
|
+
originalTool("plan_step_resolve", "Resolve a paused LLM step by providing the tool and params to use. The server executes the tool, verifies postconditions, and advances the plan.", {
|
|
4960
|
+
goalId: z.string().describe("Goal ID"),
|
|
4961
|
+
tool: z.string().describe("MCP tool name to execute for this step"),
|
|
4962
|
+
params: z.record(z.string(), z.unknown()).optional().describe("Tool parameters"),
|
|
4963
|
+
}, async ({ goalId, tool, params }) => {
|
|
4964
|
+
const goal = goalStore.get(goalId);
|
|
4965
|
+
if (!goal) {
|
|
4966
|
+
return { content: [{ type: "text", text: `Goal not found: ${goalId}` }] };
|
|
4967
|
+
}
|
|
4968
|
+
const adaptiveBudget = learningEngine.getAdaptiveBudget(worldModel.getState().focusedApp?.bundleId ?? "unknown");
|
|
4969
|
+
const executor = new PlanExecutor(worldModel, planner, toolRegistry.toExecutor(), { postconditionWaitMs: adaptiveBudget.verifyMs, defaultStepTimeout: Math.max(30_000, adaptiveBudget.actMs * 2) }, recoveryEngine, learningEngine);
|
|
4970
|
+
const result = await executor.resolveStep(goal, tool, params ?? {});
|
|
4971
|
+
goalStore.update(goalId, goal);
|
|
4972
|
+
return {
|
|
4973
|
+
content: [{ type: "text", text: result.success
|
|
4974
|
+
? `Step resolved and completed: ${tool}`
|
|
4975
|
+
: `Step failed: ${result.error}` }],
|
|
4976
|
+
};
|
|
4977
|
+
});
|
|
4978
|
+
originalTool("plan_status", "Check the current status of a goal: subgoal progress, current step, completion state.", {
|
|
4979
|
+
goalId: z.string().describe("Goal ID"),
|
|
4980
|
+
}, async ({ goalId }) => {
|
|
4981
|
+
const goal = goalStore.get(goalId);
|
|
4982
|
+
if (!goal) {
|
|
4983
|
+
return { content: [{ type: "text", text: `Goal not found: ${goalId}` }] };
|
|
4984
|
+
}
|
|
4985
|
+
const lines = [
|
|
4986
|
+
`Goal: ${goal.description}`,
|
|
4987
|
+
`Status: ${goal.status}`,
|
|
4988
|
+
`Created: ${goal.createdAt}`,
|
|
4989
|
+
goal.completedAt ? `Completed: ${goal.completedAt}` : "",
|
|
4990
|
+
"",
|
|
4991
|
+
].filter(Boolean);
|
|
4992
|
+
for (let i = 0; i < goal.subgoals.length; i++) {
|
|
4993
|
+
const sg = goal.subgoals[i];
|
|
4994
|
+
const plan = sg.plan;
|
|
4995
|
+
const progress = plan
|
|
4996
|
+
? `${plan.currentStepIndex}/${plan.steps.length} steps`
|
|
4997
|
+
: "no plan";
|
|
4998
|
+
lines.push(` Subgoal ${i + 1}: ${sg.status} (${progress}, ${sg.attempts} attempts)`);
|
|
4999
|
+
if (sg.lastError)
|
|
5000
|
+
lines.push(` Error: ${sg.lastError}`);
|
|
5001
|
+
}
|
|
5002
|
+
if (goal.pausedAt) {
|
|
5003
|
+
lines.push("", `Paused at: subgoal ${goal.pausedAt.subgoalIndex + 1}, step ${goal.pausedAt.stepIndex + 1}`);
|
|
5004
|
+
}
|
|
5005
|
+
return { content: [{ type: "text", text: lines.join("\n") }] };
|
|
5006
|
+
});
|
|
5007
|
+
originalTool("plan_list", "List all goals (active, completed, failed). Optionally filter by status.", {
|
|
5008
|
+
status: z.string().optional().describe("Filter by status: pending, active, completed, failed"),
|
|
5009
|
+
}, async ({ status }) => {
|
|
5010
|
+
const goals = status
|
|
5011
|
+
? goalStore.list(status)
|
|
5012
|
+
: goalStore.list();
|
|
5013
|
+
if (goals.length === 0) {
|
|
5014
|
+
return { content: [{ type: "text", text: "No goals found." }] };
|
|
5015
|
+
}
|
|
5016
|
+
const lines = goals.map((g) => {
|
|
5017
|
+
const sgDone = g.subgoals.filter((s) => s.status === "completed").length;
|
|
5018
|
+
return ` ${g.id}: ${g.status} — "${g.description}" (${sgDone}/${g.subgoals.length} subgoals, ${g.createdAt})`;
|
|
5019
|
+
});
|
|
5020
|
+
return { content: [{ type: "text", text: [`${goals.length} goal(s):`, ...lines].join("\n") }] };
|
|
5021
|
+
});
|
|
5022
|
+
// ═══════════════════════════════════════════════
|
|
5023
|
+
// PERCEPTION + WORLD MODEL — continuous state tracking
|
|
5024
|
+
// ═══════════════════════════════════════════════
|
|
5025
|
+
originalTool("perception_status", "Get continuous perception status: multi-rate loop stats, freshness of AX/CDP/vision sources, and event counts.", {}, async () => {
|
|
5026
|
+
const stats = perceptionManager.getStats();
|
|
5027
|
+
const freshness = perceptionManager.getFreshnessSummary();
|
|
5028
|
+
const lines = [
|
|
5029
|
+
freshness,
|
|
5030
|
+
`Running: ${perceptionManager.isRunning}`,
|
|
5031
|
+
];
|
|
5032
|
+
if (stats.started) {
|
|
5033
|
+
lines.push(`Started: ${stats.startedAt}`);
|
|
5034
|
+
lines.push("");
|
|
5035
|
+
const pcConfig = perceptionManager.getConfig();
|
|
5036
|
+
lines.push("Loop cycles:");
|
|
5037
|
+
lines.push(` Fast (${pcConfig?.fastIntervalMs ?? 100}ms): ${stats.fastCycles} cycles`);
|
|
5038
|
+
lines.push(` Medium (${pcConfig?.mediumIntervalMs ?? 500}ms): ${stats.mediumCycles} cycles`);
|
|
5039
|
+
lines.push(` Slow (${pcConfig?.slowIntervalMs ?? 2000}ms): ${stats.slowCycles} cycles`);
|
|
5040
|
+
lines.push("");
|
|
5041
|
+
lines.push("Events processed:");
|
|
5042
|
+
lines.push(` AX events: ${stats.axEventsProcessed}`);
|
|
5043
|
+
lines.push(` AX tree polls: ${stats.axTreePolls}`);
|
|
5044
|
+
lines.push(` CDP mutations: ${stats.cdpMutationsProcessed}`);
|
|
5045
|
+
lines.push(` CDP snapshots: ${stats.cdpSnapshots}`);
|
|
5046
|
+
lines.push(` Vision diffs: ${stats.visionDiffs}`);
|
|
5047
|
+
lines.push(` Vision OCRs: ${stats.visionOCRs}`);
|
|
5048
|
+
}
|
|
5049
|
+
return { content: [{ type: "text", text: lines.join("\n") }] };
|
|
5050
|
+
});
|
|
5051
|
+
originalTool("world_state", "Get the current world model state: focused app, window/control counts, active dialogs, and last scan age. Use verbose=true to dump all controls.", {
|
|
5052
|
+
verbose: z.boolean().optional().default(false).describe("Dump all controls with roles, labels, positions, and confidence"),
|
|
5053
|
+
}, async ({ verbose }) => {
|
|
5054
|
+
const state = worldModel.getState();
|
|
5055
|
+
const summary = worldModel.toSummary();
|
|
5056
|
+
const focused = worldModel.getFocusedWindow();
|
|
5057
|
+
const dialogs = worldModel.getActiveDialogs();
|
|
2590
5058
|
const lines = [];
|
|
2591
|
-
|
|
2592
|
-
if (state
|
|
2593
|
-
|
|
2594
|
-
|
|
2595
|
-
|
|
2596
|
-
|
|
2597
|
-
lines.push(
|
|
2598
|
-
|
|
2599
|
-
|
|
2600
|
-
|
|
2601
|
-
|
|
2602
|
-
|
|
2603
|
-
|
|
2604
|
-
|
|
2605
|
-
|
|
2606
|
-
|
|
2607
|
-
|
|
2608
|
-
|
|
2609
|
-
|
|
2610
|
-
|
|
2611
|
-
|
|
2612
|
-
|
|
5059
|
+
// Warn when world model is empty
|
|
5060
|
+
if (state.windows.size === 0 && !state.focusedApp) {
|
|
5061
|
+
if (!perceptionManager.isRunning) {
|
|
5062
|
+
lines.push("Warning: World model is empty. Run perception_start or use focus()/ui_tree to populate state.");
|
|
5063
|
+
}
|
|
5064
|
+
else {
|
|
5065
|
+
lines.push("World model is empty — perception is running but no data received yet.");
|
|
5066
|
+
}
|
|
5067
|
+
lines.push("");
|
|
5068
|
+
}
|
|
5069
|
+
lines.push(summary);
|
|
5070
|
+
if (focused) {
|
|
5071
|
+
lines.push(`\nFocused window: "${focused.title.value}" (id=${focused.windowId}, ${focused.controls.size} controls, confidence=${focused.title.confidence.toFixed(2)})`);
|
|
5072
|
+
}
|
|
5073
|
+
if (dialogs.length > 0) {
|
|
5074
|
+
lines.push("\nActive dialogs:");
|
|
5075
|
+
for (const d of dialogs) {
|
|
5076
|
+
lines.push(` - ${d.type}: "${d.title}" (${d.controls.size} controls, detected ${d.detectedAt})`);
|
|
5077
|
+
}
|
|
5078
|
+
}
|
|
5079
|
+
lines.push(`\nSession: ${state.sessionId || "(not initialized)"}`);
|
|
5080
|
+
// Show browser domain state (URL, title, tabs) if available
|
|
5081
|
+
for (const [bid, domain] of state.appDomains) {
|
|
5082
|
+
if (domain.family === "browser") {
|
|
5083
|
+
const bs = domain;
|
|
5084
|
+
if (bs.url?.value || bs.title?.value) {
|
|
5085
|
+
lines.push(`\nBrowser (${bid}):`);
|
|
5086
|
+
if (bs.url?.value)
|
|
5087
|
+
lines.push(` URL: ${bs.url.value}`);
|
|
5088
|
+
if (bs.title?.value)
|
|
5089
|
+
lines.push(` Title: ${bs.title.value}`);
|
|
5090
|
+
if (bs.tabs && bs.tabs.length > 0) {
|
|
5091
|
+
lines.push(` Tabs (${bs.tabs.length}):`);
|
|
5092
|
+
for (const tab of bs.tabs) {
|
|
5093
|
+
lines.push(` ${tab.index}. ${tab.isActive ? "▸ " : " "}${tab.title} | ${tab.url}`);
|
|
5094
|
+
}
|
|
5095
|
+
}
|
|
5096
|
+
}
|
|
5097
|
+
}
|
|
5098
|
+
}
|
|
5099
|
+
// Show tracked entities
|
|
5100
|
+
const entities = worldModel.getTrackedEntities();
|
|
5101
|
+
if (entities.size > 0) {
|
|
5102
|
+
lines.push(`\nTracked entities (${entities.size}):`);
|
|
5103
|
+
for (const entity of entities.values()) {
|
|
5104
|
+
const lastPos = entity.positions[entity.positions.length - 1];
|
|
5105
|
+
const posStr = lastPos ? `(${lastPos.x},${lastPos.y})` : "";
|
|
5106
|
+
lines.push(` - ${entity.type}: "${entity.label}" ${posStr} (seen ${entity.positions.length}x, since ${entity.firstSeen})`);
|
|
5107
|
+
}
|
|
5108
|
+
}
|
|
5109
|
+
if (verbose) {
|
|
5110
|
+
lines.push("\n── ALL CONTROLS ──");
|
|
5111
|
+
for (const [winId, win] of state.windows) {
|
|
5112
|
+
lines.push(`\nWindow ${winId}: "${win.title.value}" (${win.bundleId ?? "?"})`);
|
|
5113
|
+
if (win.focusedElement) {
|
|
5114
|
+
lines.push(` Focused: ${win.focusedElement.role} "${win.focusedElement.label.value}" @ (${win.focusedElement.position.x}, ${win.focusedElement.position.y})`);
|
|
5115
|
+
}
|
|
5116
|
+
// Group by role for readability
|
|
5117
|
+
const byRole = new Map();
|
|
5118
|
+
for (const ctrl of win.controls.values()) {
|
|
5119
|
+
const role = ctrl.role;
|
|
5120
|
+
if (!byRole.has(role))
|
|
5121
|
+
byRole.set(role, []);
|
|
5122
|
+
byRole.get(role).push({
|
|
5123
|
+
label: ctrl.label.value || "(no label)",
|
|
5124
|
+
pos: `${Math.round(ctrl.position.x)},${Math.round(ctrl.position.y)}`,
|
|
5125
|
+
size: `${ctrl.size.width}x${ctrl.size.height}`,
|
|
5126
|
+
conf: ctrl.label.confidence.toFixed(2),
|
|
5127
|
+
focused: ctrl.focused,
|
|
5128
|
+
});
|
|
5129
|
+
}
|
|
5130
|
+
for (const [role, controls] of [...byRole.entries()].sort((a, b) => b[1].length - a[1].length)) {
|
|
5131
|
+
lines.push(` [${role}] (${controls.length})`);
|
|
5132
|
+
for (const c of controls.slice(0, 50)) {
|
|
5133
|
+
const focus = c.focused ? " *FOCUSED*" : "";
|
|
5134
|
+
lines.push(` "${c.label}" @ (${c.pos}) ${c.size} conf=${c.conf}${focus}`);
|
|
5135
|
+
}
|
|
5136
|
+
if (controls.length > 50)
|
|
5137
|
+
lines.push(` ... +${controls.length - 50} more`);
|
|
5138
|
+
}
|
|
5139
|
+
}
|
|
5140
|
+
}
|
|
5141
|
+
return { content: [{ type: "text", text: lines.join("\n") }] };
|
|
5142
|
+
});
|
|
5143
|
+
originalTool("world_state_diff", "Get stale UI controls that haven't been refreshed within a threshold. Useful for finding controls whose state may be outdated.", {
|
|
5144
|
+
thresholdMs: z.number().optional().describe("Stale threshold in ms (default: 5 minutes)"),
|
|
5145
|
+
}, async ({ thresholdMs }) => {
|
|
5146
|
+
const stale = worldModel.getStaleControls(thresholdMs);
|
|
5147
|
+
if (stale.length === 0) {
|
|
5148
|
+
// Distinguish "no data" from "all fresh"
|
|
5149
|
+
const totalControls = Array.from(worldModel.getState().windows.values()).reduce((sum, w) => sum + w.controls.size, 0);
|
|
5150
|
+
if (totalControls === 0) {
|
|
5151
|
+
const hint = perceptionManager.isRunning
|
|
5152
|
+
? "Perception is running but no controls tracked yet."
|
|
5153
|
+
: "Run perception_start or ui_tree to populate state.";
|
|
5154
|
+
return { content: [{ type: "text", text: `World model has no tracked controls. ${hint}` }] };
|
|
5155
|
+
}
|
|
5156
|
+
return { content: [{ type: "text", text: "No stale controls — all state is fresh." }] };
|
|
5157
|
+
}
|
|
5158
|
+
const lines = [`${stale.length} stale control(s):`];
|
|
5159
|
+
for (const c of stale.slice(0, 20)) {
|
|
5160
|
+
const age = Math.round((Date.now() - new Date(c.value.updatedAt).getTime()) / 1000);
|
|
5161
|
+
lines.push(` ${c.stableId} ${c.role} "${c.label.value}" — ${age}s old`);
|
|
5162
|
+
}
|
|
5163
|
+
if (stale.length > 20)
|
|
5164
|
+
lines.push(` ... and ${stale.length - 20} more`);
|
|
5165
|
+
return { content: [{ type: "text", text: lines.join("\n") }] };
|
|
5166
|
+
});
|
|
5167
|
+
originalTool("learning_status", "Get learning engine stats: locator preferences, recovery strategy rankings, adaptive budgets, and sensor preferences for a given app.", {
|
|
5168
|
+
bundleId: z.string().optional().describe("App bundle ID to query (default: currently focused app)"),
|
|
5169
|
+
}, async ({ bundleId }) => {
|
|
5170
|
+
const bid = bundleId ?? worldModel.getState().focusedApp?.bundleId ?? "unknown";
|
|
5171
|
+
const summary = learningEngine.getAppSummary(bid);
|
|
5172
|
+
const lines = [
|
|
5173
|
+
`Learning stats for ${bid}:`,
|
|
5174
|
+
` Locator entries: ${summary.locatorEntries}`,
|
|
5175
|
+
` Recovery entries: ${summary.recoveryEntries}`,
|
|
5176
|
+
` Timing samples: ${summary.timingSamples}`,
|
|
5177
|
+
` Sensor entries: ${summary.sensorEntries}`,
|
|
5178
|
+
];
|
|
5179
|
+
if (summary.topLocatorMethod) {
|
|
5180
|
+
lines.push(` Best locator method: ${summary.topLocatorMethod}`);
|
|
5181
|
+
}
|
|
5182
|
+
if (summary.topSensor) {
|
|
5183
|
+
lines.push(` Best sensor: ${summary.topSensor}`);
|
|
5184
|
+
}
|
|
2613
5185
|
lines.push("");
|
|
2614
|
-
lines.push(
|
|
2615
|
-
|
|
2616
|
-
|
|
5186
|
+
lines.push("Adaptive budgets:");
|
|
5187
|
+
lines.push(` Locate: ${summary.adaptiveBudget.locateMs}ms`);
|
|
5188
|
+
lines.push(` Act: ${summary.adaptiveBudget.actMs}ms`);
|
|
5189
|
+
lines.push(` Verify: ${summary.adaptiveBudget.verifyMs}ms`);
|
|
5190
|
+
const sensors = learningEngine.rankSensors(bid);
|
|
5191
|
+
if (sensors.length > 0) {
|
|
5192
|
+
lines.push("");
|
|
5193
|
+
lines.push("Sensor ranking:");
|
|
5194
|
+
for (const s of sensors) {
|
|
5195
|
+
lines.push(` ${s.sourceType}: score=${s.score.toFixed(3)}, avg=${Math.round(s.avgLatencyMs)}ms`);
|
|
5196
|
+
}
|
|
5197
|
+
}
|
|
5198
|
+
return { content: [{ type: "text", text: lines.join("\n") }] };
|
|
5199
|
+
});
|
|
5200
|
+
// ── Perception lifecycle ──
|
|
5201
|
+
originalTool("perception_start", "Start continuous perception for the currently focused app (or specify bundleId). Begins multi-rate AX/CDP/vision polling loop: FAST (100ms AX events), MEDIUM (300ms AX/CDP poll), SLOW (1000ms vision/OCR).", {
|
|
5202
|
+
bundleId: z.string().optional().describe("Optional: specify app bundle ID directly instead of using focused app"),
|
|
5203
|
+
}, async ({ bundleId: overrideBundleId }) => {
|
|
5204
|
+
// Already running check
|
|
5205
|
+
if (perceptionManager.isRunning && !overrideBundleId) {
|
|
5206
|
+
const stats = perceptionManager.getStats();
|
|
5207
|
+
return { content: [{ type: "text", text: `Perception already running (started ${stats.startedAt}). Use perception_stop first to restart, or pass bundleId to switch target.` }] };
|
|
5208
|
+
}
|
|
5209
|
+
let app = worldModel.getState().focusedApp;
|
|
5210
|
+
// If bundleId override provided, try to resolve app info via bridge or AppleScript
|
|
5211
|
+
if (overrideBundleId && (!app || app.bundleId !== overrideBundleId)) {
|
|
2617
5212
|
try {
|
|
2618
|
-
|
|
2619
|
-
const
|
|
2620
|
-
|
|
2621
|
-
|
|
2622
|
-
|
|
5213
|
+
await ensureBridge();
|
|
5214
|
+
const apps = await bridge.call("app.list", {});
|
|
5215
|
+
const found = apps?.find((a) => a.bundleId === overrideBundleId);
|
|
5216
|
+
if (found) {
|
|
5217
|
+
app = { bundleId: overrideBundleId, appName: found.name ?? overrideBundleId, pid: found.pid };
|
|
5218
|
+
worldModel.updateFocusedApp({ bundleId: overrideBundleId, appName: found.name ?? overrideBundleId, pid: found.pid, windowTitle: "" });
|
|
5219
|
+
}
|
|
2623
5220
|
}
|
|
2624
|
-
catch {
|
|
2625
|
-
|
|
5221
|
+
catch { /* Bridge unavailable — fall through to AppleScript */ }
|
|
5222
|
+
// AppleScript fallback: bridge may not list windowless apps (e.g. freshly launched/killed TextEdit)
|
|
5223
|
+
if (!app || app.bundleId !== overrideBundleId) {
|
|
5224
|
+
try {
|
|
5225
|
+
const { stdout } = await execAsync(`osascript -e 'tell application "System Events" to get unix id of (first process whose bundle identifier is "${overrideBundleId.replace(/'/g, "'\\''")}")'`, { encoding: "utf-8", timeout: 5000 });
|
|
5226
|
+
const pid = parseInt((stdout ?? "").trim(), 10);
|
|
5227
|
+
if (!isNaN(pid)) {
|
|
5228
|
+
app = { bundleId: overrideBundleId, appName: overrideBundleId, pid };
|
|
5229
|
+
worldModel.updateFocusedApp({ bundleId: overrideBundleId, appName: overrideBundleId, pid, windowTitle: "" });
|
|
5230
|
+
}
|
|
5231
|
+
}
|
|
5232
|
+
catch { /* AppleScript also failed — app truly not running */ }
|
|
5233
|
+
}
|
|
5234
|
+
}
|
|
5235
|
+
// If bundleId was explicitly provided but we couldn't find the app, error out
|
|
5236
|
+
// instead of silently falling back to the frontmost app
|
|
5237
|
+
if (overrideBundleId && (!app || app.bundleId !== overrideBundleId)) {
|
|
5238
|
+
return { content: [{ type: "text", text: `Error: App with bundleId "${overrideBundleId}" is not running. Launch it first with launch(bundleId: "${overrideBundleId}").` }] };
|
|
5239
|
+
}
|
|
5240
|
+
// If still no app, try AppleScript to detect frontmost app
|
|
5241
|
+
if (!app) {
|
|
5242
|
+
try {
|
|
5243
|
+
const asScript = `tell application "System Events"
|
|
5244
|
+
set fp to first process whose frontmost is true
|
|
5245
|
+
return (bundle identifier of fp) & "|" & (name of fp) & "|" & (unix id of fp)
|
|
5246
|
+
end tell`;
|
|
5247
|
+
const { stdout: asOut } = await execAsync(`osascript -e '${asScript.replace(/'/g, "'\\''")}'`, { encoding: "utf-8", timeout: 5000 });
|
|
5248
|
+
const result = asOut ?? "";
|
|
5249
|
+
const [bid, name, pidStr] = result.trim().split("|");
|
|
5250
|
+
const pid = parseInt(pidStr ?? "", 10);
|
|
5251
|
+
if (bid && !isNaN(pid)) {
|
|
5252
|
+
app = { bundleId: bid, appName: name ?? bid, pid };
|
|
5253
|
+
worldModel.updateFocusedApp({ bundleId: bid, appName: name ?? bid, pid, windowTitle: "" });
|
|
5254
|
+
}
|
|
2626
5255
|
}
|
|
5256
|
+
catch { /* AppleScript fallback failed */ }
|
|
5257
|
+
}
|
|
5258
|
+
if (!app) {
|
|
5259
|
+
return { content: [{ type: "text", text: "Error: No focused app detected. Focus an app with focus() first, or pass bundleId directly." }] };
|
|
5260
|
+
}
|
|
5261
|
+
let bridgeAvailable = false;
|
|
5262
|
+
try {
|
|
5263
|
+
await ensureBridge();
|
|
5264
|
+
bridgeAvailable = true;
|
|
5265
|
+
}
|
|
5266
|
+
catch { /* bridge unavailable — proceed without AX/vision */ }
|
|
5267
|
+
let windowId;
|
|
5268
|
+
if (bridgeAvailable) {
|
|
5269
|
+
try {
|
|
5270
|
+
windowId = await resolveWindowId(app.pid);
|
|
5271
|
+
}
|
|
5272
|
+
catch { /* best-effort */ }
|
|
5273
|
+
}
|
|
5274
|
+
const ctx = { bundleId: app.bundleId, appName: app.appName, pid: app.pid, windowTitle: "", ...(windowId != null ? { windowId } : {}) };
|
|
5275
|
+
await perceptionManager.ensureStarted(ctx);
|
|
5276
|
+
// Auto-connect CDP for browser apps — pass a connect factory so the
|
|
5277
|
+
// perception coordinator can reconnect when the WebSocket drops
|
|
5278
|
+
let cdpStatus = "skipped (not browser)";
|
|
5279
|
+
const isBrowser = isBrowserApp();
|
|
5280
|
+
console.error(`[perception_start] app=${app.bundleId} pid=${app.pid} windowId=${windowId} isBrowser=${isBrowser}`);
|
|
5281
|
+
if (isBrowser) {
|
|
5282
|
+
try {
|
|
5283
|
+
console.error("[perception_start] calling ensureCDP...");
|
|
5284
|
+
const { CDP: cdp, port } = await ensureCDP();
|
|
5285
|
+
console.error(`[perception_start] ensureCDP ok, port=${port}`);
|
|
5286
|
+
const connectFn = async () => {
|
|
5287
|
+
const targets = await cdp.List({ port });
|
|
5288
|
+
const page = targets.find((t) => t.type === "page");
|
|
5289
|
+
if (!page)
|
|
5290
|
+
throw new Error("No CDP page target");
|
|
5291
|
+
return cdp({ port, target: page.id });
|
|
5292
|
+
};
|
|
5293
|
+
const client = await connectFn();
|
|
5294
|
+
console.error(`[perception_start] CDP client created, client keys: ${Object.keys(client).slice(0, 5).join(",")}`);
|
|
5295
|
+
const coordinator = perceptionManager.getCoordinator();
|
|
5296
|
+
console.error(`[perception_start] coordinator exists: ${!!coordinator}, isRunning: ${coordinator?.isRunning}`);
|
|
5297
|
+
if (coordinator) {
|
|
5298
|
+
coordinator.activateCDP(client, connectFn);
|
|
5299
|
+
cdpStatus = `connected (port ${port})`;
|
|
5300
|
+
}
|
|
5301
|
+
else {
|
|
5302
|
+
cdpStatus = "no coordinator";
|
|
5303
|
+
}
|
|
5304
|
+
}
|
|
5305
|
+
catch (e) {
|
|
5306
|
+
cdpStatus = `failed: ${e?.message ?? e}`;
|
|
5307
|
+
console.error(`[perception_start] CDP error: ${cdpStatus}`);
|
|
5308
|
+
}
|
|
5309
|
+
}
|
|
5310
|
+
console.error(`[perception_start] CDP status: ${cdpStatus}`);
|
|
5311
|
+
// Set up Safari browser enricher (or clear it for non-Safari)
|
|
5312
|
+
installSafariEnricher(app.bundleId);
|
|
5313
|
+
return { content: [{ type: "text", text: `Perception started for ${app.bundleId} (${app.appName}). CDP: ${cdpStatus}` }] };
|
|
5314
|
+
});
|
|
5315
|
+
originalTool("perception_stop", "Stop continuous perception loop.", {}, async () => {
|
|
5316
|
+
if (!perceptionManager.isRunning) {
|
|
5317
|
+
return { content: [{ type: "text", text: "Perception was not running." }] };
|
|
5318
|
+
}
|
|
5319
|
+
const stats = perceptionManager.getStats();
|
|
5320
|
+
await perceptionManager.stop();
|
|
5321
|
+
const lines = ["Perception stopped."];
|
|
5322
|
+
if (stats.started) {
|
|
5323
|
+
lines.push(`Processed: ${stats.axEventsProcessed} AX events, ${stats.cdpSnapshots} CDP snapshots, ${stats.visionDiffs} vision diffs, ${stats.visionOCRs} OCRs.`);
|
|
5324
|
+
lines.push(`Cycles: ${stats.fastCycles} fast, ${stats.mediumCycles} medium, ${stats.slowCycles} slow.`);
|
|
2627
5325
|
}
|
|
2628
5326
|
return { content: [{ type: "text", text: lines.join("\n") }] };
|
|
2629
5327
|
});
|
|
2630
|
-
|
|
2631
|
-
|
|
2632
|
-
|
|
2633
|
-
|
|
2634
|
-
|
|
2635
|
-
|
|
2636
|
-
|
|
2637
|
-
|
|
2638
|
-
|
|
2639
|
-
|
|
2640
|
-
|
|
2641
|
-
|
|
2642
|
-
createdAt: new Date().toISOString(),
|
|
2643
|
-
assignedAt: null,
|
|
2644
|
-
completedAt: null,
|
|
2645
|
-
result: null,
|
|
2646
|
-
};
|
|
2647
|
-
tasks.push(task);
|
|
2648
|
-
tasks.sort((a, b) => a.priority - b.priority);
|
|
2649
|
-
writeDaemonTasks(tasks);
|
|
2650
|
-
const queued = tasks.filter((t) => t.status === "queued").length;
|
|
2651
|
-
return {
|
|
2652
|
-
content: [{
|
|
2653
|
-
type: "text",
|
|
2654
|
-
text: `Task queued!\n` +
|
|
2655
|
-
`ID: ${task.id}\n` +
|
|
2656
|
-
`Prompt: "${prompt.slice(0, 100)}${prompt.length > 100 ? "..." : ""}"\n` +
|
|
2657
|
-
`Priority: ${task.priority}\n` +
|
|
2658
|
-
`Target terminal: ${task.terminalId ?? "any available"}\n` +
|
|
2659
|
-
`Queue size: ${queued}`,
|
|
2660
|
-
}],
|
|
2661
|
-
};
|
|
5328
|
+
// ── Plan lifecycle ──
|
|
5329
|
+
originalTool("plan_cancel", "Cancel an active goal, marking it as failed.", {
|
|
5330
|
+
goalId: z.string().describe("Goal ID to cancel"),
|
|
5331
|
+
}, async ({ goalId }) => {
|
|
5332
|
+
const goal = goalStore.get(goalId);
|
|
5333
|
+
if (!goal) {
|
|
5334
|
+
return { content: [{ type: "text", text: `Goal not found: ${goalId}` }] };
|
|
5335
|
+
}
|
|
5336
|
+
goal.status = "failed";
|
|
5337
|
+
goal.completedAt = new Date().toISOString();
|
|
5338
|
+
goalStore.update(goalId, goal);
|
|
5339
|
+
return { content: [{ type: "text", text: `Goal cancelled: ${goalId}` }] };
|
|
2662
5340
|
});
|
|
2663
|
-
|
|
2664
|
-
|
|
2665
|
-
|
|
2666
|
-
|
|
2667
|
-
|
|
2668
|
-
|
|
2669
|
-
|
|
2670
|
-
|
|
2671
|
-
|
|
2672
|
-
}
|
|
2673
|
-
const lines = tasks.map((t, i) => {
|
|
2674
|
-
const parts = [
|
|
2675
|
-
`${i + 1}. [${t.status.toUpperCase()}] "${(t.prompt || "").slice(0, 80)}"`,
|
|
2676
|
-
` ID: ${t.id} | Priority: ${t.priority}`,
|
|
2677
|
-
` Terminal: ${t.terminalId ?? "any"}`,
|
|
2678
|
-
` Created: ${t.createdAt}`,
|
|
2679
|
-
];
|
|
2680
|
-
if (t.assignedAt)
|
|
2681
|
-
parts.push(` Assigned: ${t.assignedAt}`);
|
|
2682
|
-
if (t.completedAt)
|
|
2683
|
-
parts.push(` Completed: ${t.completedAt}`);
|
|
2684
|
-
if (t.result)
|
|
2685
|
-
parts.push(` Result: ${(t.result || "").slice(0, 100)}`);
|
|
2686
|
-
return parts.join("\n");
|
|
2687
|
-
});
|
|
2688
|
-
return { content: [{ type: "text", text: lines.join("\n\n") }] };
|
|
5341
|
+
// ── Recovery status + configure ──
|
|
5342
|
+
originalTool("recovery_status", "Get recovery engine status: cooldowns, reference cache, learning engine connection.", {}, async () => {
|
|
5343
|
+
const status = recoveryEngine.getStatus();
|
|
5344
|
+
const lines = [
|
|
5345
|
+
"Recovery Engine Status:",
|
|
5346
|
+
` Active cooldowns: ${status.cooldownCount}`,
|
|
5347
|
+
` Reference cache entries: ${status.referenceCacheSize}`,
|
|
5348
|
+
` Learning engine connected: ${status.learningEngineConnected}`,
|
|
5349
|
+
];
|
|
5350
|
+
return { content: [{ type: "text", text: lines.join("\n") }] };
|
|
2689
5351
|
});
|
|
2690
|
-
|
|
2691
|
-
|
|
2692
|
-
|
|
2693
|
-
|
|
5352
|
+
originalTool("recovery_configure", "Update recovery engine default budget configuration.", {
|
|
5353
|
+
maxRecoveryTimeMs: z.number().optional().describe("Max time for recovery attempts in ms"),
|
|
5354
|
+
maxStrategies: z.number().optional().describe("Max number of strategies to try"),
|
|
5355
|
+
}, async ({ maxRecoveryTimeMs, maxStrategies }) => {
|
|
5356
|
+
const updates = {};
|
|
5357
|
+
if (maxRecoveryTimeMs !== undefined)
|
|
5358
|
+
updates.maxRecoveryTimeMs = maxRecoveryTimeMs;
|
|
5359
|
+
if (maxStrategies !== undefined)
|
|
5360
|
+
updates.maxStrategies = maxStrategies;
|
|
5361
|
+
recoveryEngine.configure(updates);
|
|
5362
|
+
return { content: [{ type: "text", text: `Recovery config updated: ${JSON.stringify(updates)}` }] };
|
|
5363
|
+
});
|
|
5364
|
+
// ── Learning lifecycle ──
|
|
5365
|
+
originalTool("learning_reset", "Clear ALL learning data (locators, recovery, timing, sensors). Requires confirm=true.", {
|
|
5366
|
+
confirm: z.boolean().describe("Must be true to proceed"),
|
|
5367
|
+
}, async ({ confirm }) => {
|
|
5368
|
+
if (!confirm) {
|
|
5369
|
+
return { content: [{ type: "text", text: "Aborted: set confirm=true to clear all learning data." }] };
|
|
5370
|
+
}
|
|
5371
|
+
learningEngine.reset();
|
|
5372
|
+
return { content: [{ type: "text", text: "All learning data cleared and flushed to disk." }] };
|
|
5373
|
+
});
|
|
5374
|
+
// ═══════════════════════════════════════════════
|
|
5375
|
+
// ORCHESTRATOR — multi-agent task routing
|
|
5376
|
+
// ═══════════════════════════════════════════════
|
|
5377
|
+
const ORCHESTRATOR_DAEMON_SCRIPT = path.resolve(__dirname, "scripts", "orchestrator-daemon.ts");
|
|
5378
|
+
server.tool("orchestrator_start", "Start the multi-agent orchestrator daemon. Manages parallel worker slots: web tasks (CDP) run in parallel, native tasks (AX/keyboard) are serialized per-app. Survives restarts.", {
|
|
5379
|
+
webSlots: z.number().optional().describe("Number of parallel web worker slots (default: 4)"),
|
|
5380
|
+
nativeSlots: z.number().optional().describe("Number of native worker slots (default: 1)"),
|
|
5381
|
+
pollMs: z.number().optional().describe("Poll interval in ms (default: 1000)"),
|
|
5382
|
+
}, async ({ webSlots, nativeSlots, pollMs }) => {
|
|
5383
|
+
const existingPid = getOrchestratorPid();
|
|
5384
|
+
if (existingPid !== null) {
|
|
5385
|
+
return { content: [{ type: "text", text: `Orchestrator already running (pid=${existingPid}). Use orchestrator_stop first.` }] };
|
|
5386
|
+
}
|
|
5387
|
+
const compiledPath = fs.existsSync(path.resolve(__dirname, "scripts", "orchestrator-daemon.js"))
|
|
5388
|
+
? path.resolve(__dirname, "scripts", "orchestrator-daemon.js")
|
|
5389
|
+
: path.resolve(__dirname, "dist", "scripts", "orchestrator-daemon.js");
|
|
5390
|
+
const daemonArgs = [];
|
|
5391
|
+
let child;
|
|
5392
|
+
let usedCompiled = false;
|
|
5393
|
+
if (fs.existsSync(compiledPath)) {
|
|
5394
|
+
daemonArgs.push(compiledPath);
|
|
5395
|
+
if (webSlots)
|
|
5396
|
+
daemonArgs.push("--web-slots", String(webSlots));
|
|
5397
|
+
if (nativeSlots)
|
|
5398
|
+
daemonArgs.push("--native-slots", String(nativeSlots));
|
|
5399
|
+
if (pollMs)
|
|
5400
|
+
daemonArgs.push("--poll", String(pollMs));
|
|
5401
|
+
child = spawn("node", daemonArgs, { detached: true, stdio: "ignore", cwd: __dirname });
|
|
5402
|
+
usedCompiled = true;
|
|
5403
|
+
}
|
|
5404
|
+
else {
|
|
5405
|
+
daemonArgs.push("tsx", ORCHESTRATOR_DAEMON_SCRIPT);
|
|
5406
|
+
if (webSlots)
|
|
5407
|
+
daemonArgs.push("--web-slots", String(webSlots));
|
|
5408
|
+
if (nativeSlots)
|
|
5409
|
+
daemonArgs.push("--native-slots", String(nativeSlots));
|
|
5410
|
+
if (pollMs)
|
|
5411
|
+
daemonArgs.push("--poll", String(pollMs));
|
|
5412
|
+
child = spawn("npx", daemonArgs, { detached: true, stdio: "ignore", cwd: __dirname });
|
|
5413
|
+
}
|
|
5414
|
+
child.unref();
|
|
5415
|
+
await new Promise((r) => setTimeout(r, 3000));
|
|
5416
|
+
const verifyPid = getOrchestratorPid();
|
|
5417
|
+
if (!verifyPid) {
|
|
5418
|
+
return { content: [{ type: "text", text: `Orchestrator failed to start (mode=${usedCompiled ? "compiled" : "tsx"}).\nCheck log: ${ORCH_LOG_FILE}` }] };
|
|
5419
|
+
}
|
|
5420
|
+
return { content: [{ type: "text", text: `Orchestrator started (pid=${verifyPid}).\nWeb slots: ${webSlots ?? 4} (parallel CDP) | Native slots: ${nativeSlots ?? 1} (serialized per-app)\nPoll: ${pollMs ?? 1000}ms\nLog: ${ORCH_LOG_FILE}\n\nSubmit tasks with orchestrator_submit. Web tasks run in parallel, native tasks queue per-app.` }] };
|
|
5421
|
+
});
|
|
5422
|
+
server.tool("orchestrator_stop", "Stop the orchestrator daemon. Running tasks finish before exit.", {}, async () => {
|
|
5423
|
+
const pid = getOrchestratorPid();
|
|
5424
|
+
if (!pid) {
|
|
5425
|
+
return { content: [{ type: "text", text: "No orchestrator daemon running." }] };
|
|
5426
|
+
}
|
|
2694
5427
|
try {
|
|
2695
|
-
|
|
2696
|
-
await new Promise((r) => setTimeout(r,
|
|
2697
|
-
|
|
2698
|
-
await new Promise((r) => setTimeout(r, 100));
|
|
2699
|
-
await bridge.call("cg.keyCombo", { keys: ["enter"] });
|
|
2700
|
-
return { content: [{ type: "text", text: `Typed and sent: "${prompt.slice(0, 100)}"` }] };
|
|
5428
|
+
process.kill(pid, "SIGTERM");
|
|
5429
|
+
await new Promise((r) => setTimeout(r, 2000));
|
|
5430
|
+
return { content: [{ type: "text", text: `Orchestrator stopped (pid=${pid}).` }] };
|
|
2701
5431
|
}
|
|
2702
5432
|
catch (err) {
|
|
2703
|
-
return { content: [{ type: "text", text: `Failed: ${err.message}` }] };
|
|
5433
|
+
return { content: [{ type: "text", text: `Failed to stop: ${err.message}` }] };
|
|
2704
5434
|
}
|
|
2705
5435
|
});
|
|
2706
|
-
server.tool("
|
|
2707
|
-
|
|
2708
|
-
|
|
2709
|
-
|
|
5436
|
+
server.tool("orchestrator_submit", "Submit a task to the orchestrator. Web tasks (CDP) run in parallel, native tasks queue per-app. Returns immediately — task is processed asynchronously.", {
|
|
5437
|
+
task: z.string().describe("What to do"),
|
|
5438
|
+
mode: z.enum(["web", "native", "mixed"]).optional().describe("Execution mode: web (parallel CDP), native (serialized AX/keyboard), mixed (default: auto-detect)"),
|
|
5439
|
+
playbookId: z.string().optional().describe("Playbook to execute"),
|
|
5440
|
+
bundleId: z.string().optional().describe("Target app bundle ID (required for native tasks)"),
|
|
5441
|
+
windowId: z.number().optional().describe("Target window ID"),
|
|
5442
|
+
vars: z.record(z.string(), z.string()).optional().describe("Variables for playbook substitution"),
|
|
5443
|
+
priority: z.number().optional().describe("Priority: lower = higher (default: 10)"),
|
|
5444
|
+
}, async ({ task, mode, playbookId, bundleId, windowId, vars, priority }) => {
|
|
5445
|
+
const state = readOrchState();
|
|
5446
|
+
if (!state?.running) {
|
|
5447
|
+
return { content: [{ type: "text", text: "Orchestrator not running. Use orchestrator_start first." }] };
|
|
5448
|
+
}
|
|
5449
|
+
const newTask = createOrchestratorTask(task, {
|
|
5450
|
+
mode: mode ?? detectMode(playbookId, bundleId),
|
|
5451
|
+
...(playbookId !== undefined ? { playbookId } : {}),
|
|
5452
|
+
...(bundleId !== undefined ? { bundleId } : {}),
|
|
5453
|
+
...(windowId !== undefined ? { windowId } : {}),
|
|
5454
|
+
...(vars ? { vars } : {}),
|
|
5455
|
+
...(priority !== undefined ? { priority } : {}),
|
|
5456
|
+
});
|
|
5457
|
+
state.tasks.push(newTask);
|
|
5458
|
+
state.totalSubmitted++;
|
|
5459
|
+
writeOrchState(state);
|
|
5460
|
+
const slotInfo = newTask.mode === "web"
|
|
5461
|
+
? `→ will run on next free web slot (${state.webSlots} available)`
|
|
5462
|
+
: `→ will run on native slot (serialized for ${bundleId ?? "unknown app"})`;
|
|
5463
|
+
return { content: [{ type: "text", text: `Task submitted: ${newTask.id}\nMode: ${newTask.mode} ${slotInfo}\nPriority: ${newTask.priority}\n\nThe orchestrator will pick it up on the next poll cycle.` }] };
|
|
5464
|
+
});
|
|
5465
|
+
server.tool("orchestrator_status", "Get orchestrator status — worker slots, task queue, active/completed tasks.", {}, async () => {
|
|
5466
|
+
const state = readOrchState();
|
|
5467
|
+
if (!state) {
|
|
5468
|
+
return { content: [{ type: "text", text: "Orchestrator not running. Use orchestrator_start first." }] };
|
|
5469
|
+
}
|
|
5470
|
+
const lines = [
|
|
5471
|
+
`Running: ${state.running}${state.pid ? ` (pid=${state.pid})` : ""}`,
|
|
5472
|
+
`Started: ${state.startedAt}`,
|
|
5473
|
+
`Slots: ${state.webSlots} web (parallel) + ${state.nativeSlots} native (per-app serial)`,
|
|
5474
|
+
"",
|
|
5475
|
+
"Workers:",
|
|
5476
|
+
];
|
|
5477
|
+
for (const w of state.workers) {
|
|
5478
|
+
const status = w.busy ? `BUSY → ${w.currentTaskId}` : "idle";
|
|
5479
|
+
lines.push(` [${w.id}] ${w.type} — ${status} (done: ${w.tasksCompleted}, failed: ${w.tasksFailed})`);
|
|
5480
|
+
}
|
|
5481
|
+
const queued = state.tasks.filter(t => t.status === "queued");
|
|
5482
|
+
const running = state.tasks.filter(t => t.status === "running" || t.status === "assigned");
|
|
5483
|
+
const done = state.tasks.filter(t => t.status === "done");
|
|
5484
|
+
const failed = state.tasks.filter(t => t.status === "failed");
|
|
5485
|
+
const blocked = state.tasks.filter(t => t.status === "blocked");
|
|
5486
|
+
lines.push("", `Tasks: ${state.totalSubmitted} submitted, ${state.totalCompleted} done, ${state.totalFailed} failed`);
|
|
5487
|
+
lines.push(`Queue: ${queued.length} queued, ${running.length} running, ${blocked.length} blocked`);
|
|
5488
|
+
if (running.length > 0) {
|
|
5489
|
+
lines.push("", "Running:");
|
|
5490
|
+
for (const t of running) {
|
|
5491
|
+
lines.push(` ${t.id}: "${t.task.slice(0, 60)}" [${t.mode}] → slot ${t.assignedWorker}`);
|
|
5492
|
+
}
|
|
5493
|
+
}
|
|
5494
|
+
if (queued.length > 0) {
|
|
5495
|
+
lines.push("", `Queued (next ${Math.min(queued.length, 5)}):`);
|
|
5496
|
+
for (const t of queued.slice(0, 5)) {
|
|
5497
|
+
lines.push(` ${t.id}: "${t.task.slice(0, 60)}" [${t.mode}] priority=${t.priority}`);
|
|
5498
|
+
}
|
|
5499
|
+
}
|
|
5500
|
+
if (done.length > 0) {
|
|
5501
|
+
lines.push("", `Recent completed (last ${Math.min(done.length, 5)}):`);
|
|
5502
|
+
for (const t of done.slice(-5)) {
|
|
5503
|
+
lines.push(` ${t.id}: "${t.task.slice(0, 60)}" → ${t.result?.slice(0, 80) ?? "ok"}`);
|
|
5504
|
+
}
|
|
5505
|
+
}
|
|
5506
|
+
if (failed.length > 0) {
|
|
5507
|
+
lines.push("", `Recent failed (last ${Math.min(failed.length, 3)}):`);
|
|
5508
|
+
for (const t of failed.slice(-3)) {
|
|
5509
|
+
lines.push(` ${t.id}: "${t.task.slice(0, 60)}" → ${t.error?.slice(0, 80) ?? "unknown"}`);
|
|
5510
|
+
}
|
|
5511
|
+
}
|
|
5512
|
+
if (Object.keys(state.nativeLocks).length > 0) {
|
|
5513
|
+
lines.push("", "Native app locks:");
|
|
5514
|
+
for (const [app, slot] of Object.entries(state.nativeLocks)) {
|
|
5515
|
+
lines.push(` ${app} → slot ${slot}`);
|
|
5516
|
+
}
|
|
5517
|
+
}
|
|
5518
|
+
lines.push("", `Log: ${ORCH_LOG_FILE}`);
|
|
5519
|
+
return { content: [{ type: "text", text: lines.join("\n") }] };
|
|
5520
|
+
});
|
|
5521
|
+
// Helper aliases to keep tool code concise
|
|
5522
|
+
import { readOrchestratorState as readOrchState, writeOrchestratorState as writeOrchState, getOrchestratorDaemonPid as getOrchestratorPid, createTask as createOrchestratorTask, detectTaskMode as detectMode } from "./src/orchestrator/state.js";
|
|
5523
|
+
import { ORCHESTRATOR_LOG_FILE as ORCH_LOG_FILE } from "./src/orchestrator/types.js";
|
|
5524
|
+
// ═══════════════════════════════════════════════
|
|
5525
|
+
// OBSERVER — background app-level visual monitor
|
|
5526
|
+
// ═══════════════════════════════════════════════
|
|
5527
|
+
const OBSERVER_DAEMON_SCRIPT = path.resolve(__dirname, "scripts", "observer-daemon.ts");
|
|
5528
|
+
server.tool("observer_start", "Start the observer daemon to continuously watch an app window. Captures frames via CGWindowListCreateImage, runs OCR only when pixels change, detects popups. Zero overhead on engine — reads a JSON file.", {
|
|
5529
|
+
bundleId: z.string().describe("Bundle ID of the app to watch (e.g. com.blackmagic-design.DaVinciResolve)"),
|
|
5530
|
+
windowId: z.number().describe("Window ID to capture (get from the 'windows' tool)"),
|
|
5531
|
+
intervalMs: z.number().optional().describe("Capture interval in ms (default: 2000). Lower = more responsive but more CPU"),
|
|
5532
|
+
}, async ({ bundleId, windowId, intervalMs }) => {
|
|
5533
|
+
const existingPid = getObserverDaemonPid();
|
|
5534
|
+
if (existingPid !== null) {
|
|
5535
|
+
return { content: [{ type: "text", text: `Observer daemon already running (pid=${existingPid}). Use observer_stop first.` }] };
|
|
5536
|
+
}
|
|
5537
|
+
const compiledPath = fs.existsSync(path.resolve(__dirname, "scripts", "observer-daemon.js"))
|
|
5538
|
+
? path.resolve(__dirname, "scripts", "observer-daemon.js")
|
|
5539
|
+
: path.resolve(__dirname, "dist", "scripts", "observer-daemon.js");
|
|
5540
|
+
const daemonArgs = [];
|
|
5541
|
+
let child;
|
|
5542
|
+
let usedCompiled = false;
|
|
5543
|
+
if (fs.existsSync(compiledPath)) {
|
|
5544
|
+
daemonArgs.push(compiledPath, "--bundleId", bundleId, "--windowId", String(windowId));
|
|
5545
|
+
if (intervalMs)
|
|
5546
|
+
daemonArgs.push("--interval", String(intervalMs));
|
|
5547
|
+
child = spawn("node", daemonArgs, { detached: true, stdio: "ignore", cwd: __dirname });
|
|
5548
|
+
usedCompiled = true;
|
|
5549
|
+
}
|
|
5550
|
+
else {
|
|
5551
|
+
daemonArgs.push("tsx", OBSERVER_DAEMON_SCRIPT, "--bundleId", bundleId, "--windowId", String(windowId));
|
|
5552
|
+
if (intervalMs)
|
|
5553
|
+
daemonArgs.push("--interval", String(intervalMs));
|
|
5554
|
+
child = spawn("npx", daemonArgs, { detached: true, stdio: "ignore", cwd: __dirname });
|
|
5555
|
+
}
|
|
5556
|
+
child.unref();
|
|
5557
|
+
await new Promise((r) => setTimeout(r, 2000));
|
|
5558
|
+
const verifyPid = getObserverDaemonPid();
|
|
5559
|
+
if (!verifyPid) {
|
|
5560
|
+
return { content: [{ type: "text", text: `Observer daemon failed to start (mode=${usedCompiled ? "compiled" : "tsx"}).\nCheck log: ${OBSERVER_LOG_FILE}` }] };
|
|
5561
|
+
}
|
|
5562
|
+
// Enable popup checks in the playbook engine (lazy-init if needed)
|
|
5563
|
+
if (!activePlaybookEngine) {
|
|
5564
|
+
getJobRunner(); // initializes activePlaybookEngine as a side effect
|
|
5565
|
+
}
|
|
5566
|
+
if (activePlaybookEngine)
|
|
5567
|
+
activePlaybookEngine.setPopupCheck(true);
|
|
5568
|
+
return { content: [{ type: "text", text: `Observer daemon started (pid=${verifyPid}).\nWatching: ${bundleId} (window ${windowId})\nInterval: ${intervalMs ?? 2000}ms\nLog: ${OBSERVER_LOG_FILE}\n\nPopup auto-dismiss enabled in playbook engine.\nUse observer_status to check frames/popups.` }] };
|
|
5569
|
+
});
|
|
5570
|
+
server.tool("observer_stop", "Stop the observer daemon.", {}, async () => {
|
|
5571
|
+
const pid = getObserverDaemonPid();
|
|
5572
|
+
if (!pid) {
|
|
5573
|
+
return { content: [{ type: "text", text: "No observer daemon running." }] };
|
|
2710
5574
|
}
|
|
2711
5575
|
try {
|
|
2712
5576
|
process.kill(pid, "SIGTERM");
|
|
2713
|
-
// Wait for it to clean up
|
|
2714
5577
|
await new Promise((r) => setTimeout(r, 1000));
|
|
2715
|
-
|
|
5578
|
+
if (activePlaybookEngine)
|
|
5579
|
+
activePlaybookEngine.setPopupCheck(false);
|
|
5580
|
+
return { content: [{ type: "text", text: `Observer daemon stopped (pid=${pid}).` }] };
|
|
2716
5581
|
}
|
|
2717
5582
|
catch (err) {
|
|
2718
|
-
return { content: [{ type: "text", text: `Failed to stop
|
|
5583
|
+
return { content: [{ type: "text", text: `Failed to stop: ${err.message}` }] };
|
|
5584
|
+
}
|
|
5585
|
+
});
|
|
5586
|
+
server.tool("observer_status", "Get observer daemon status — frames captured, OCR text, popup detection.", {}, async () => {
|
|
5587
|
+
const state = readObserverState();
|
|
5588
|
+
if (!state) {
|
|
5589
|
+
return { content: [{ type: "text", text: "Observer not running. Use observer_start to begin watching an app." }] };
|
|
5590
|
+
}
|
|
5591
|
+
const lines = [
|
|
5592
|
+
`Running: ${state.running}${state.pid ? ` (pid=${state.pid})` : ""}`,
|
|
5593
|
+
`Watching: ${state.bundleId} (window ${state.windowId})`,
|
|
5594
|
+
`Interval: ${state.intervalMs}ms`,
|
|
5595
|
+
`Frames: ${state.framesCaptured} captured, ${state.framesChanged} changed, ${state.ocrRuns} OCR runs`,
|
|
5596
|
+
];
|
|
5597
|
+
if (state.lastFrame) {
|
|
5598
|
+
lines.push(`Last frame: ${state.lastFrame.capturedAt} (changed: ${state.lastFrame.changed})`);
|
|
5599
|
+
const ocrPreview = state.lastFrame.ocrText.substring(0, 500);
|
|
5600
|
+
lines.push(`OCR text (first 500 chars):\n${ocrPreview}`);
|
|
5601
|
+
}
|
|
5602
|
+
if (state.popup) {
|
|
5603
|
+
lines.push(`\nPOPUP DETECTED: "${state.popup.pattern}"`);
|
|
5604
|
+
lines.push(` Action: ${state.popup.dismissAction}`);
|
|
5605
|
+
lines.push(` Detected: ${state.popup.detectedAt}`);
|
|
5606
|
+
}
|
|
5607
|
+
if (state.lastError) {
|
|
5608
|
+
lines.push(`\nLast error: ${state.lastError}`);
|
|
5609
|
+
}
|
|
5610
|
+
lines.push(`\nLog: ${OBSERVER_LOG_FILE}`);
|
|
5611
|
+
return { content: [{ type: "text", text: lines.join("\n") }] };
|
|
5612
|
+
});
|
|
5613
|
+
server.tool("observer_ocr_roi", "Submit a targeted ROI OCR command to the running observer daemon. The daemon captures the window region, runs OCR, and stores the result. Non-blocking — returns a command ID you can poll with a second call.", {
|
|
5614
|
+
x: z.number().describe("X offset of the region (window-relative)"),
|
|
5615
|
+
y: z.number().describe("Y offset of the region (window-relative)"),
|
|
5616
|
+
width: z.number().describe("Width of the region"),
|
|
5617
|
+
height: z.number().describe("Height of the region"),
|
|
5618
|
+
windowId: z.number().optional().describe("Window ID (defaults to daemon's watched window)"),
|
|
5619
|
+
commandId: z.string().optional().describe("If provided, poll an existing command instead of submitting a new one"),
|
|
5620
|
+
}, async ({ x, y, width, height, windowId, commandId }) => {
|
|
5621
|
+
// Poll mode — check result of a previously submitted command
|
|
5622
|
+
if (commandId) {
|
|
5623
|
+
const cmd = getObserverCommand(commandId);
|
|
5624
|
+
if (!cmd) {
|
|
5625
|
+
return { content: [{ type: "text", text: `Command ${commandId} not found.` }] };
|
|
5626
|
+
}
|
|
5627
|
+
if (cmd.status === "pending" || cmd.status === "running") {
|
|
5628
|
+
return { content: [{ type: "text", text: `Command ${commandId}: ${cmd.status} — call again to poll.` }] };
|
|
5629
|
+
}
|
|
5630
|
+
if (cmd.status === "error") {
|
|
5631
|
+
return { content: [{ type: "text", text: `Command ${commandId} failed: ${cmd.error}` }] };
|
|
5632
|
+
}
|
|
5633
|
+
// done
|
|
5634
|
+
const r = cmd.result;
|
|
5635
|
+
const lines = [
|
|
5636
|
+
`Command ${commandId}: done at ${r.completedAt}`,
|
|
5637
|
+
`Text: ${r.text.substring(0, 1000)}`,
|
|
5638
|
+
`Regions: ${r.regions.length}`,
|
|
5639
|
+
];
|
|
5640
|
+
for (const region of r.regions.slice(0, 20)) {
|
|
5641
|
+
lines.push(` "${region.text}" @ (${region.bounds.x}, ${region.bounds.y}, ${region.bounds.width}×${region.bounds.height})`);
|
|
5642
|
+
}
|
|
5643
|
+
return { content: [{ type: "text", text: lines.join("\n") }] };
|
|
5644
|
+
}
|
|
5645
|
+
// Submit mode — create a new command
|
|
5646
|
+
const pid = getObserverDaemonPid();
|
|
5647
|
+
if (!pid) {
|
|
5648
|
+
return { content: [{ type: "text", text: "Observer daemon not running. Use observer_start first." }] };
|
|
5649
|
+
}
|
|
5650
|
+
const cmd = {
|
|
5651
|
+
type: "ocr_roi",
|
|
5652
|
+
roi: { x, y, width, height },
|
|
5653
|
+
};
|
|
5654
|
+
if (windowId !== undefined)
|
|
5655
|
+
cmd.windowId = windowId;
|
|
5656
|
+
const id = submitObserverCommand(cmd);
|
|
5657
|
+
return { content: [{ type: "text", text: `ROI OCR command submitted: ${id}\nRegion: (${x}, ${y}, ${width}×${height})\nThe daemon will process this on its next cycle. Call observer_ocr_roi with commandId="${id}" to poll the result.` }] };
|
|
5658
|
+
});
|
|
5659
|
+
// ═══════════════════════════════════════════════
|
|
5660
|
+
// PHASE 6: TOOL MASTERY — Ingestion + Community
|
|
5661
|
+
// ═══════════════════════════════════════════════
|
|
5662
|
+
server.tool("scan_menu_bar", "Scan an app's menu bar via AX tree. Extracts all menu paths, keyboard shortcuts, and enabled/disabled states. Automatically merges discovered shortcuts into the reference file.", {
|
|
5663
|
+
pid: z.number().describe("Process ID of the running app"),
|
|
5664
|
+
bundleId: z.string().describe("macOS bundle ID (e.g. com.adobe.Photoshop)"),
|
|
5665
|
+
appName: z.string().describe("Human-readable app name (e.g. Photoshop)"),
|
|
5666
|
+
mergeToReference: z.boolean().optional().describe("Merge discovered shortcuts into the reference file (default true)"),
|
|
5667
|
+
}, async ({ pid, bundleId, appName, mergeToReference }) => {
|
|
5668
|
+
await ensureBridge();
|
|
5669
|
+
const scanner = new MenuScanner(bridge);
|
|
5670
|
+
const result = await scanner.scan(pid, bundleId, appName);
|
|
5671
|
+
// Auto-merge to reference unless explicitly disabled
|
|
5672
|
+
let mergeInfo = "";
|
|
5673
|
+
if (mergeToReference !== false) {
|
|
5674
|
+
const merge = referenceMerger.mergeMenuScan(result);
|
|
5675
|
+
mergeInfo = `\nReference updated: ${merge.filePath} (${merge.added} added, ${merge.updated} updated)`;
|
|
5676
|
+
}
|
|
5677
|
+
const lines = [
|
|
5678
|
+
`Menu scan: ${result.appName} (${result.bundleId})`,
|
|
5679
|
+
`Total menus: ${result.totalMenus}, Total items: ${result.totalItems}`,
|
|
5680
|
+
`Shortcuts found: ${Object.keys(result.shortcuts).length}`,
|
|
5681
|
+
mergeInfo,
|
|
5682
|
+
"",
|
|
5683
|
+
"Shortcuts:",
|
|
5684
|
+
];
|
|
5685
|
+
for (const [menuPath, keys] of Object.entries(result.shortcuts)) {
|
|
5686
|
+
// Redact username from menu paths + catch "Log Out <name>" pattern inline
|
|
5687
|
+
let safePath = redactUsername(menuPath);
|
|
5688
|
+
safePath = safePath.replace(/Log Out [^\n:]+/g, "Log Out [USER]");
|
|
5689
|
+
lines.push(` ${safePath}: ${keys}`);
|
|
5690
|
+
}
|
|
5691
|
+
let output = lines.join("\n");
|
|
5692
|
+
output = redactUsername(output);
|
|
5693
|
+
output = output.replace(/Log Out [^\n:]+/g, "Log Out [USER]");
|
|
5694
|
+
return { content: [{ type: "text", text: output }] };
|
|
5695
|
+
});
|
|
5696
|
+
server.tool("ingest_documentation", "Parse a documentation page (HTML, markdown, or text) and extract shortcuts, workflows, and tips. Merges extracted knowledge into the app's reference file.", {
|
|
5697
|
+
content: z.string().describe("The documentation content (HTML, markdown, or plain text)"),
|
|
5698
|
+
url: z.string().describe("Source URL of the documentation"),
|
|
5699
|
+
format: z.enum(["html", "markdown", "text"]).optional().describe("Content format (default html)"),
|
|
5700
|
+
bundleId: z.string().describe("macOS bundle ID for the app this documentation covers"),
|
|
5701
|
+
appName: z.string().describe("Human-readable app name"),
|
|
5702
|
+
mergeToReference: z.boolean().optional().describe("Merge extracted knowledge into reference file (default true)"),
|
|
5703
|
+
}, async ({ content, url, format, bundleId, appName, mergeToReference }) => {
|
|
5704
|
+
const parser = new DocParser();
|
|
5705
|
+
const result = parser.parse(content, url, format ?? "html");
|
|
5706
|
+
let mergeInfo = "";
|
|
5707
|
+
if (mergeToReference !== false) {
|
|
5708
|
+
const shortcutMerge = referenceMerger.mergeDocShortcuts(result.shortcuts, bundleId, appName);
|
|
5709
|
+
const flowMerge = referenceMerger.mergeDocFlows(result, bundleId, appName);
|
|
5710
|
+
mergeInfo = `\nReference updated: ${shortcutMerge.filePath}\n Shortcuts: ${shortcutMerge.added} added, ${shortcutMerge.updated} updated\n Flows: ${flowMerge.added} added`;
|
|
5711
|
+
}
|
|
5712
|
+
const lines = [
|
|
5713
|
+
`Documentation parsed: ${result.title}`,
|
|
5714
|
+
`Source: ${result.url}`,
|
|
5715
|
+
`Shortcuts: ${result.shortcuts.length}, Flows: ${result.flows.length}, Tips: ${result.tips.length}`,
|
|
5716
|
+
mergeInfo,
|
|
5717
|
+
];
|
|
5718
|
+
if (result.shortcuts.length > 0) {
|
|
5719
|
+
lines.push("", "Shortcuts:");
|
|
5720
|
+
for (const s of result.shortcuts.slice(0, 30)) {
|
|
5721
|
+
lines.push(` ${s.name}: ${s.keys}${s.category ? ` (${s.category})` : ""}`);
|
|
5722
|
+
}
|
|
5723
|
+
}
|
|
5724
|
+
if (result.flows.length > 0) {
|
|
5725
|
+
lines.push("", "Workflows:");
|
|
5726
|
+
for (const f of result.flows.slice(0, 10)) {
|
|
5727
|
+
lines.push(` ${f.name} (${f.steps.length} steps)`);
|
|
5728
|
+
}
|
|
5729
|
+
}
|
|
5730
|
+
if (result.tips.length > 0) {
|
|
5731
|
+
lines.push("", "Tips:");
|
|
5732
|
+
for (const t of result.tips.slice(0, 10)) {
|
|
5733
|
+
lines.push(` - ${t}`);
|
|
5734
|
+
}
|
|
5735
|
+
}
|
|
5736
|
+
return { content: [{ type: "text", text: lines.join("\n") }] };
|
|
5737
|
+
});
|
|
5738
|
+
server.tool("ingest_tutorial", "Extract structured playbook steps from a video transcript (e.g. YouTube captions). Converts tutorial narration into actionable automation steps with tool mappings.", {
|
|
5739
|
+
segments: z.array(z.object({
|
|
5740
|
+
text: z.string(),
|
|
5741
|
+
startTime: z.number(),
|
|
5742
|
+
duration: z.number(),
|
|
5743
|
+
})).describe("Transcript segments (text + timing from YouTube captions or similar)"),
|
|
5744
|
+
title: z.string().describe("Video title"),
|
|
5745
|
+
platform: z.string().describe("Target platform name (e.g. davinci-resolve, figma)"),
|
|
5746
|
+
}, async ({ segments, title, platform }) => {
|
|
5747
|
+
const extractor = new TutorialExtractor();
|
|
5748
|
+
const result = extractor.extract(segments, title, platform);
|
|
5749
|
+
const playbookSteps = extractor.toPlaybookSteps(result);
|
|
5750
|
+
const lines = [
|
|
5751
|
+
`Tutorial extracted: ${result.title}`,
|
|
5752
|
+
`Platform: ${result.platform}`,
|
|
5753
|
+
`Raw segments: ${result.rawSegments}, Action steps: ${result.actionSegments}`,
|
|
5754
|
+
`Playbook-ready steps: ${playbookSteps.length}`,
|
|
5755
|
+
"",
|
|
5756
|
+
"Steps:",
|
|
5757
|
+
];
|
|
5758
|
+
for (let i = 0; i < result.steps.length; i++) {
|
|
5759
|
+
const step = result.steps[i];
|
|
5760
|
+
lines.push(` ${i + 1}. [${step.tool ?? "?"}] ${step.description}`);
|
|
5761
|
+
}
|
|
5762
|
+
return {
|
|
5763
|
+
content: [{
|
|
5764
|
+
type: "text",
|
|
5765
|
+
text: lines.join("\n"),
|
|
5766
|
+
}],
|
|
5767
|
+
};
|
|
5768
|
+
});
|
|
5769
|
+
server.tool("coverage_report", "Generate a coverage report for an app — shows what knowledge we have (shortcuts, selectors, flows, playbooks, errors) and identifies gaps with recommendations.", {
|
|
5770
|
+
bundleId: z.string().describe("macOS bundle ID (e.g. com.blackmagic-design.DaVinciResolveLite)"),
|
|
5771
|
+
appName: z.string().describe("Human-readable app name"),
|
|
5772
|
+
includeLiveMenuScan: z.boolean().optional().describe("Also scan the live menu bar for comparison (requires app to be running, needs pid)"),
|
|
5773
|
+
pid: z.number().optional().describe("Process ID (required if includeLiveMenuScan is true)"),
|
|
5774
|
+
}, async ({ bundleId, appName, includeLiveMenuScan, pid }) => {
|
|
5775
|
+
let menuScan;
|
|
5776
|
+
if (includeLiveMenuScan && pid) {
|
|
5777
|
+
await ensureBridge();
|
|
5778
|
+
const scanner = new MenuScanner(bridge);
|
|
5779
|
+
menuScan = await scanner.scan(pid, bundleId, appName);
|
|
2719
5780
|
}
|
|
5781
|
+
const report = coverageAuditor.audit(bundleId, appName, menuScan);
|
|
5782
|
+
const lines = [
|
|
5783
|
+
`Coverage Report: ${report.app} (${report.bundleId})`,
|
|
5784
|
+
"",
|
|
5785
|
+
"Knowledge inventory:",
|
|
5786
|
+
` Shortcuts: ${report.shortcutsKnown}`,
|
|
5787
|
+
` Selectors: ${report.selectorsKnown}`,
|
|
5788
|
+
` Flows: ${report.flowsKnown}`,
|
|
5789
|
+
` Playbooks: ${report.playbooksAvailable}`,
|
|
5790
|
+
` Error patterns: ${report.errorsDocumented}`,
|
|
5791
|
+
];
|
|
5792
|
+
if (report.selectorStabilityScore > 0) {
|
|
5793
|
+
lines.push(` Selector stability: ${(report.selectorStabilityScore * 100).toFixed(0)}%`);
|
|
5794
|
+
}
|
|
5795
|
+
if (report.highValueGaps.length > 0) {
|
|
5796
|
+
lines.push("", "High-value gaps:");
|
|
5797
|
+
for (const gap of report.highValueGaps) {
|
|
5798
|
+
lines.push(` - ${gap}`);
|
|
5799
|
+
}
|
|
5800
|
+
}
|
|
5801
|
+
if (report.shortcutsNotInReference.length > 0) {
|
|
5802
|
+
lines.push("", `Undocumented shortcuts (${report.shortcutsNotInReference.length}):`);
|
|
5803
|
+
for (const s of report.shortcutsNotInReference.slice(0, 20)) {
|
|
5804
|
+
lines.push(` ${s}`);
|
|
5805
|
+
}
|
|
5806
|
+
}
|
|
5807
|
+
if (report.workflowsWithNoPlaybook.length > 0) {
|
|
5808
|
+
lines.push("", `Missing playbooks for common workflows: ${report.workflowsWithNoPlaybook.join(", ")}`);
|
|
5809
|
+
}
|
|
5810
|
+
return { content: [{ type: "text", text: lines.join("\n") }] };
|
|
5811
|
+
});
|
|
5812
|
+
originalTool("community_publish", "Publish a validated local playbook to the community repository. Requires the playbook to have been executed successfully multiple times. Strips sensitive data (passwords, file paths).", {
|
|
5813
|
+
playbookId: z.string().describe("ID of the local playbook to publish"),
|
|
5814
|
+
successRate: z.number().min(0).max(1).describe("Success rate from testing (0.0-1.0)"),
|
|
5815
|
+
executionCount: z.number().describe("Number of times the playbook has been executed"),
|
|
5816
|
+
minRuns: z.number().optional().describe("Minimum successful runs required (default 3)"),
|
|
5817
|
+
}, async ({ playbookId, successRate, executionCount }) => {
|
|
5818
|
+
// Look up the playbook from the store
|
|
5819
|
+
const playbook = _executablePlaybookStore.get(playbookId);
|
|
5820
|
+
if (!playbook) {
|
|
5821
|
+
return { content: [{ type: "text", text: `Playbook "${playbookId}" not found. Use export_playbook to list available playbooks.` }] };
|
|
5822
|
+
}
|
|
5823
|
+
// Server enforces minimum of 3 runs using playbook's own tracked data — client values are ignored
|
|
5824
|
+
const result = communityPublisher.publish(playbook, successRate, executionCount);
|
|
5825
|
+
if (!result) {
|
|
5826
|
+
const actualRuns = playbook.successCount + playbook.failCount;
|
|
5827
|
+
return { content: [{ type: "text", text: `Playbook not published. Requirements: at least 3 tracked executions and >50% success rate. Actual: ${actualRuns} tracked runs, ${actualRuns > 0 ? ((playbook.successCount / actualRuns) * 100).toFixed(0) : 0}% success.` }] };
|
|
5828
|
+
}
|
|
5829
|
+
communityFetcher.invalidateCache();
|
|
5830
|
+
return { content: [{ type: "text", text: `Published to community: ${result.id}\nName: ${result.name}\nSteps: ${result.steps.length}\nSuccess rate: ${(result.metadata.successRate * 100).toFixed(0)}%` }] };
|
|
5831
|
+
});
|
|
5832
|
+
originalTool("community_fetch", "Search community playbooks for a platform or workflow. Returns ranked results by success rate.", {
|
|
5833
|
+
platform: z.string().optional().describe("Filter by platform name"),
|
|
5834
|
+
bundleId: z.string().optional().describe("Filter by macOS bundle ID"),
|
|
5835
|
+
workflow: z.string().optional().describe("Search by workflow name/description"),
|
|
5836
|
+
limit: z.number().optional().describe("Max results (default 20)"),
|
|
5837
|
+
}, async ({ platform, bundleId, workflow, limit }) => {
|
|
5838
|
+
const query = {};
|
|
5839
|
+
if (platform !== undefined)
|
|
5840
|
+
query.platform = platform;
|
|
5841
|
+
if (bundleId !== undefined)
|
|
5842
|
+
query.bundleId = bundleId;
|
|
5843
|
+
if (workflow !== undefined)
|
|
5844
|
+
query.workflow = workflow;
|
|
5845
|
+
if (limit !== undefined)
|
|
5846
|
+
query.limit = limit;
|
|
5847
|
+
const results = await communityFetcher.fetchWithRemote(query);
|
|
5848
|
+
if (results.length === 0) {
|
|
5849
|
+
return { content: [{ type: "text", text: "No community playbooks found matching the query." }] };
|
|
5850
|
+
}
|
|
5851
|
+
const lines = [`Community playbooks (${results.length} results):`, ""];
|
|
5852
|
+
for (const pb of results) {
|
|
5853
|
+
lines.push(` ${pb.id}`);
|
|
5854
|
+
lines.push(` Name: ${pb.name}`);
|
|
5855
|
+
lines.push(` Platform: ${pb.platform} | Steps: ${pb.steps.length}`);
|
|
5856
|
+
lines.push(` Success: ${(pb.metadata.successRate * 100).toFixed(0)}% (${pb.metadata.executionCount} runs)`);
|
|
5857
|
+
lines.push(` Score: ${pb.ratings.score} | By: ${pb.metadata.author}`);
|
|
5858
|
+
lines.push("");
|
|
5859
|
+
}
|
|
5860
|
+
return { content: [{ type: "text", text: lines.join("\n") }] };
|
|
2720
5861
|
});
|
|
2721
5862
|
// ═══════════════════════════════════════════════
|
|
2722
5863
|
// START
|
|
2723
5864
|
// ═══════════════════════════════════════════════
|
|
2724
5865
|
async function main() {
|
|
5866
|
+
// Flush playbook learnings on graceful shutdown
|
|
5867
|
+
process.on("SIGINT", () => { void perceptionManager.stop(); contextTracker.flush(); learningEngine.flush(); appMap.flush(); process.exit(0); });
|
|
5868
|
+
process.on("SIGTERM", () => { void perceptionManager.stop(); contextTracker.flush(); learningEngine.flush(); appMap.flush(); process.exit(0); });
|
|
5869
|
+
process.on("beforeExit", () => { void perceptionManager.stop(); contextTracker.flush(); learningEngine.flush(); appMap.flush(); });
|
|
2725
5870
|
const transport = new StdioServerTransport();
|
|
2726
5871
|
await server.connect(transport);
|
|
2727
5872
|
}
|