codeloop-mcp-server 0.1.46 → 0.1.48
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/auth/critical_floors.d.ts.map +1 -1
- package/dist/auth/critical_floors.js +8 -0
- package/dist/auth/critical_floors.js.map +1 -1
- package/dist/evidence/interaction_coverage.d.ts +15 -0
- package/dist/evidence/interaction_coverage.d.ts.map +1 -1
- package/dist/evidence/interaction_coverage.js +59 -1
- package/dist/evidence/interaction_coverage.js.map +1 -1
- package/dist/index.js +345 -31
- package/dist/index.js.map +1 -1
- package/dist/runners/window_manager.d.ts +34 -0
- package/dist/runners/window_manager.d.ts.map +1 -1
- package/dist/runners/window_manager.js +226 -0
- package/dist/runners/window_manager.js.map +1 -1
- package/dist/tools/design_compare.d.ts +43 -0
- package/dist/tools/design_compare.d.ts.map +1 -1
- package/dist/tools/design_compare.js +185 -66
- package/dist/tools/design_compare.js.map +1 -1
- package/dist/tools/gate_check.js +29 -7
- package/dist/tools/gate_check.js.map +1 -1
- package/dist/tools/verify.d.ts.map +1 -1
- package/dist/tools/verify.js +20 -0
- package/dist/tools/verify.js.map +1 -1
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -135,6 +135,12 @@ const server = new McpServer({
|
|
|
135
135
|
async function withAuth(fn, tracker) {
|
|
136
136
|
const started = Date.now();
|
|
137
137
|
let outcome = { success: false };
|
|
138
|
+
// Photometry-DB E2E 8 regression: when the agent passes
|
|
139
|
+
// `project_dir`/`workspace_root` to a tool, remember that dir for
|
|
140
|
+
// the lifetime of this MCP process so the init-hint check no
|
|
141
|
+
// longer false-positives later calls where the call site didn't
|
|
142
|
+
// forward `dir` to `withInitHint`.
|
|
143
|
+
rememberInitializedDir(tracker?.cwd);
|
|
138
144
|
try {
|
|
139
145
|
// Local / self-hosted mode (CODELOOP_MODE=local): skip API-key validation
|
|
140
146
|
// entirely. All cloud-side checks are bypassed; usage events are queued
|
|
@@ -286,6 +292,26 @@ function buildVersionBanner() {
|
|
|
286
292
|
text: `[CodeLoop server v${v}]`,
|
|
287
293
|
};
|
|
288
294
|
}
|
|
295
|
+
/**
|
|
296
|
+
* Last project directory observed to be initialized via an actual
|
|
297
|
+
* tool call (set by `withAuth`'s tracker — see the .cwd field). The
|
|
298
|
+
* Photometry-DB E2E session 8 regression was that the agent passed
|
|
299
|
+
* `project_dir: "D:\\Work\\Photometry DB"` (which IS initialized) on
|
|
300
|
+
* every call, but the init hint was checking the server's startup
|
|
301
|
+
* `projectDir` (C:\Users\jiq on Windows) and incorrectly prepending
|
|
302
|
+
* "This project has not been initialized" to every response. With
|
|
303
|
+
* this cache, the first authenticated call that hits an initialized
|
|
304
|
+
* dir silences the hint for the rest of the session — independent of
|
|
305
|
+
* whether the specific call site forwarded `dir` to `withInitHint`.
|
|
306
|
+
*/
|
|
307
|
+
let lastInitializedDir = null;
|
|
308
|
+
function rememberInitializedDir(dir) {
|
|
309
|
+
if (!dir)
|
|
310
|
+
return;
|
|
311
|
+
if (isProjectInitialized(dir)) {
|
|
312
|
+
lastInitializedDir = dir;
|
|
313
|
+
}
|
|
314
|
+
}
|
|
289
315
|
function withInitHint(content, dir) {
|
|
290
316
|
// Order matters:
|
|
291
317
|
// 1. Update notice (most actionable signal — CRITICAL stays at top).
|
|
@@ -296,7 +322,22 @@ function withInitHint(content, dir) {
|
|
|
296
322
|
const banner = buildVersionBanner();
|
|
297
323
|
const withUpdate = withUpdateNotice(content);
|
|
298
324
|
const head = [];
|
|
299
|
-
|
|
325
|
+
// Candidate dirs in priority order:
|
|
326
|
+
// 1. Explicit `dir` arg from the call site (when the handler
|
|
327
|
+
// bothered to thread it through — the verify / capture /
|
|
328
|
+
// design_compare etc. handlers do).
|
|
329
|
+
// 2. `lastInitializedDir` — any dir the agent has already passed
|
|
330
|
+
// to *any* CodeLoop tool this session that resolved to an
|
|
331
|
+
// initialized config. This catches the long tail of handlers
|
|
332
|
+
// that don't forward `dir` (e.g. `recommend_tool`,
|
|
333
|
+
// `release_readiness`, `section_status`) so they no longer
|
|
334
|
+
// false-positive the hint after init has succeeded once.
|
|
335
|
+
// 3. The MCP server's startup `projectDir` (often the user's
|
|
336
|
+
// home folder on Windows / Cursor — see CODELOOP_PROJECT_DIR
|
|
337
|
+
// auto-injection notes in setup-project.ts).
|
|
338
|
+
const candidates = [dir, lastInitializedDir, projectDir].filter((d) => typeof d === "string" && d.length > 0);
|
|
339
|
+
const anyInitialized = candidates.some((d) => isProjectInitialized(d));
|
|
340
|
+
if (!anyInitialized) {
|
|
300
341
|
head.push({ type: "text", text: INIT_HINT });
|
|
301
342
|
}
|
|
302
343
|
const result = [...head, ...withUpdate];
|
|
@@ -305,6 +346,42 @@ function withInitHint(content, dir) {
|
|
|
305
346
|
return result;
|
|
306
347
|
}
|
|
307
348
|
const TOOL_BOOTSTRAP = "[CodeLoop] You MUST call codeloop_verify after every code change. If .codeloop/config.json is missing, call codeloop_init_project FIRST.\n\n";
|
|
349
|
+
/**
|
|
350
|
+
* Zod preprocessor for `target_type` that accepts common synonyms.
|
|
351
|
+
*
|
|
352
|
+
* Photometry-DB E2E 8 regression: the agent (reasonably) tried
|
|
353
|
+
* `target_type: "windows_desktop"` because the project is a WPF
|
|
354
|
+
* Windows app, and got a hard schema-validation rejection. Same
|
|
355
|
+
* pattern for `mac_desktop`, `linux_desktop`, `web`, `android`,
|
|
356
|
+
* `ios`. Coerce them to the canonical 4-value enum so the agent
|
|
357
|
+
* isn't blocked by a thin naming convention.
|
|
358
|
+
*/
|
|
359
|
+
const TARGET_TYPE_SYNONYMS = {
|
|
360
|
+
desktop: "desktop",
|
|
361
|
+
windows_desktop: "desktop",
|
|
362
|
+
win_desktop: "desktop",
|
|
363
|
+
win32_desktop: "desktop",
|
|
364
|
+
mac_desktop: "desktop",
|
|
365
|
+
macos_desktop: "desktop",
|
|
366
|
+
osx_desktop: "desktop",
|
|
367
|
+
linux_desktop: "desktop",
|
|
368
|
+
native_desktop: "desktop",
|
|
369
|
+
browser: "browser",
|
|
370
|
+
web: "browser",
|
|
371
|
+
webapp: "browser",
|
|
372
|
+
chrome: "browser",
|
|
373
|
+
android_emulator: "android_emulator",
|
|
374
|
+
android: "android_emulator",
|
|
375
|
+
ios_simulator: "ios_simulator",
|
|
376
|
+
ios: "ios_simulator",
|
|
377
|
+
};
|
|
378
|
+
function normalizeTargetType(v) {
|
|
379
|
+
if (typeof v !== "string")
|
|
380
|
+
return v;
|
|
381
|
+
const key = v.toLowerCase().trim();
|
|
382
|
+
return TARGET_TYPE_SYNONYMS[key] ?? v;
|
|
383
|
+
}
|
|
384
|
+
const targetTypeSchema = z.preprocess(normalizeTargetType, z.enum(["desktop", "browser", "android_emulator", "ios_simulator"]));
|
|
308
385
|
// ── Implemented Tools ────────────────────────────────────────────
|
|
309
386
|
server.tool("codeloop_verify", TOOL_BOOTSTRAP + `Run the CodeLoop verification suite on the current project. Use this tool when:
|
|
310
387
|
- You have implemented or modified code and need to check if it works correctly
|
|
@@ -1093,9 +1170,12 @@ server.tool("codeloop_capture_screenshot", TOOL_BOOTSTRAP + `Capture a screensho
|
|
|
1093
1170
|
- You want to capture a specific page/screen of the app for visual analysis
|
|
1094
1171
|
- You are navigating through the app to capture all pages for complete visual coverage
|
|
1095
1172
|
- You want to add a screenshot to an existing verification run
|
|
1096
|
-
Provide app_name to capture ONLY that app's window
|
|
1097
|
-
|
|
1098
|
-
|
|
1173
|
+
Provide app_name to capture ONLY that app's window. The app is automatically brought to the
|
|
1174
|
+
front before capture, and the IDE is restored to the front after. When app_name is omitted on
|
|
1175
|
+
a desktop-app project (WPF/.NET, native Xcode, Android Gradle host), this tool first falls
|
|
1176
|
+
back to evidence.target_app from .codeloop/config.json; if that's also missing it REFUSES to
|
|
1177
|
+
silently grab the full screen (which would otherwise capture the IDE) and returns an
|
|
1178
|
+
actionable error pointing at the exact config key to set.
|
|
1099
1179
|
Returns: confirmation + the captured image as an MCP ImageContent block so you can see what was captured.`, {
|
|
1100
1180
|
screen_name: z.string(),
|
|
1101
1181
|
app_name: z.string().optional(),
|
|
@@ -1116,9 +1196,45 @@ Returns: confirmation + the captured image as an MCP ImageContent block so you c
|
|
|
1116
1196
|
const { runDir } = createRunDir(undefined, join(cwd, "artifacts", "runs"));
|
|
1117
1197
|
screenshotsDir = join(runDir, "screenshots");
|
|
1118
1198
|
}
|
|
1119
|
-
|
|
1199
|
+
// Mirror codeloop_verify's auto-capture honesty: when the
|
|
1200
|
+
// project is a desktop app (WPF/.NET, native Xcode, Android
|
|
1201
|
+
// Gradle host) and the agent didn't pass an explicit app_name,
|
|
1202
|
+
// fall back to `evidence.target_app` from the project config
|
|
1203
|
+
// and turn on `desktopAppMode` so captureScreenshot refuses
|
|
1204
|
+
// a silent full-screen grab of the IDE. Previously this
|
|
1205
|
+
// tool would happily save a 4K PNG of Cursor whenever the
|
|
1206
|
+
// agent forgot app_name — and the auto-fix loop would then
|
|
1207
|
+
// burn cycles trying to "fix design diffs" against a
|
|
1208
|
+
// screenshot of the editor.
|
|
1209
|
+
const { detectPlatform } = await import("./tools/verify.js");
|
|
1210
|
+
const { loadConfig } = await import("./config.js");
|
|
1211
|
+
const platform = detectPlatform(cwd);
|
|
1212
|
+
const isDesktopAppProject = platform === "dotnet" || platform === "xcode" || platform === "android";
|
|
1213
|
+
const cfg = loadConfig(cwd);
|
|
1214
|
+
const targetApp = params.app_name ?? cfg.evidence?.target_app;
|
|
1215
|
+
const result = await captureScreenshot(screenshotsDir, params.screen_name, targetApp, undefined, { desktopAppMode: isDesktopAppProject });
|
|
1216
|
+
// Photometry-DB E2E 8 follow-on: when we capture a desktop app
|
|
1217
|
+
// window, also resolve its on-screen bounds so the agent can
|
|
1218
|
+
// (a) compute window-relative coords from the returned image
|
|
1219
|
+
// dimensions, and
|
|
1220
|
+
// (b) pass coords:"window" to codeloop_interact to get them
|
|
1221
|
+
// translated to screen-absolute automatically.
|
|
1222
|
+
// Without this, agents reasoned from a downscaled vision view
|
|
1223
|
+
// of the image and clicked tens or hundreds of pixels off the
|
|
1224
|
+
// intended target.
|
|
1225
|
+
let windowBounds = null;
|
|
1226
|
+
if (isDesktopAppProject && targetApp && result.captured) {
|
|
1227
|
+
try {
|
|
1228
|
+
const wm = await import("./runners/window_manager.js");
|
|
1229
|
+
const b = await wm.getWindowBounds(targetApp);
|
|
1230
|
+
if (b && b.width > 0 && b.height > 0) {
|
|
1231
|
+
windowBounds = { x: b.x, y: b.y, width: b.width, height: b.height };
|
|
1232
|
+
}
|
|
1233
|
+
}
|
|
1234
|
+
catch { /* best-effort */ }
|
|
1235
|
+
}
|
|
1120
1236
|
await trackUsage(apiKey, "visual_review");
|
|
1121
|
-
return result;
|
|
1237
|
+
return { ...result, windowBounds };
|
|
1122
1238
|
}, { tool: "codeloop_capture_screenshot", cwd: (params.project_dir || params.workspace_root || projectDir), input: params });
|
|
1123
1239
|
if (typeof authResult === "object" && authResult !== null && "error" in authResult) {
|
|
1124
1240
|
return { content: [{ type: "text", text: JSON.stringify(authResult, null, 2) }] };
|
|
@@ -1126,12 +1242,18 @@ Returns: confirmation + the captured image as an MCP ImageContent block so you c
|
|
|
1126
1242
|
const result = authResult;
|
|
1127
1243
|
const content = [];
|
|
1128
1244
|
if (result.captured && result.paths.length > 0) {
|
|
1129
|
-
|
|
1130
|
-
|
|
1131
|
-
|
|
1132
|
-
|
|
1133
|
-
|
|
1134
|
-
|
|
1245
|
+
const payload = {
|
|
1246
|
+
captured: true,
|
|
1247
|
+
screen_name: params.screen_name,
|
|
1248
|
+
path: result.paths[0],
|
|
1249
|
+
method: result.method,
|
|
1250
|
+
};
|
|
1251
|
+
if (result.windowBounds) {
|
|
1252
|
+
payload.window_bounds = result.windowBounds;
|
|
1253
|
+
payload.coordinate_hint =
|
|
1254
|
+
"This screenshot captures the named window. When you compute click coordinates from the image, pass them to codeloop_interact with `coords: \"window\"` so they're translated to screen-absolute automatically. (Default `coords: \"auto\"` also works when the coord fits inside the window — but `\"window\"` is unambiguous.)";
|
|
1255
|
+
}
|
|
1256
|
+
content.push({ type: "text", text: JSON.stringify(payload, null, 2) });
|
|
1135
1257
|
const data = readImageAsBase64(result.paths[0]);
|
|
1136
1258
|
if (data) {
|
|
1137
1259
|
content.push({ type: "image", data, mimeType: mimeForPath(result.paths[0]) });
|
|
@@ -1302,6 +1424,45 @@ After recording, call codeloop_interaction_replay to extract frames and analyze
|
|
|
1302
1424
|
content: withInitHint([{ type: "text", text: JSON.stringify(result, null, 2) }]),
|
|
1303
1425
|
};
|
|
1304
1426
|
});
|
|
1427
|
+
server.tool("codeloop_launch_app", TOOL_BOOTSTRAP + `Launch a desktop application by name from a project's build output. Use when:
|
|
1428
|
+
- You need the app under test to be running before codeloop_start_recording / codeloop_interact.
|
|
1429
|
+
- The agent doesn't know where the executable lives and shouldn't have to hand-roll Start-Process / open -a.
|
|
1430
|
+
|
|
1431
|
+
This tool is the canonical replacement for hand-coding PowerShell Start-Process / osascript / xdg-open
|
|
1432
|
+
from the agent. For Android / iOS, use codeloop_interact action="launch_app" with package_id instead.
|
|
1433
|
+
|
|
1434
|
+
Search order on Windows: publish/**/*.exe → bin/Release/**/*.exe → bin/Debug/**/*.exe (newest first).
|
|
1435
|
+
On macOS: publish/**/*.app → build/**/*.app → /Applications/<name>.app → open -a.
|
|
1436
|
+
On Linux: build/**/<name> → bin/**/<name> → dist/**/<name>.
|
|
1437
|
+
|
|
1438
|
+
If app_name is omitted, falls back to evidence.target_app from .codeloop/config.json (auto-detected at
|
|
1439
|
+
init for .NET/Xcode/Android projects via detect-target-app).`, {
|
|
1440
|
+
app_name: z.string().optional().describe("Window title / executable name of the app to launch. Defaults to evidence.target_app from .codeloop/config.json. Required if target_app is unset."),
|
|
1441
|
+
project_dir: z.string().optional().describe("Absolute path to the project root. Defaults to CODELOOP_PROJECT_DIR / discovered project dir."),
|
|
1442
|
+
workspace_root: z.string().optional().describe("[Alias for project_dir]"),
|
|
1443
|
+
}, async (params) => {
|
|
1444
|
+
const authResult = await withAuth(async () => {
|
|
1445
|
+
const wm = await import("./runners/window_manager.js");
|
|
1446
|
+
const { loadConfig } = await import("./config.js");
|
|
1447
|
+
const cwd = (params.project_dir || params.workspace_root || projectDir);
|
|
1448
|
+
const cfg = loadConfig(cwd);
|
|
1449
|
+
const appName = params.app_name || cfg.evidence?.target_app;
|
|
1450
|
+
if (!appName) {
|
|
1451
|
+
return {
|
|
1452
|
+
launched: false,
|
|
1453
|
+
reason: "No app_name provided and evidence.target_app is unset in .codeloop/config.json. Set it (e.g. \"evidence\": { \"target_app\": \"My App\" }) or pass app_name explicitly. For .NET/Xcode/Android projects, codeloop_init_project auto-detects this — re-run init or edit the config by hand.",
|
|
1454
|
+
};
|
|
1455
|
+
}
|
|
1456
|
+
const r = await wm.launchDesktopApp(appName, cwd);
|
|
1457
|
+
return { app_name: appName, ...r };
|
|
1458
|
+
}, { tool: "codeloop_launch_app", cwd: (params.project_dir || params.workspace_root || projectDir), input: params });
|
|
1459
|
+
if (typeof authResult === "object" && authResult !== null && "error" in authResult) {
|
|
1460
|
+
return { content: [{ type: "text", text: JSON.stringify(authResult, null, 2) }] };
|
|
1461
|
+
}
|
|
1462
|
+
return {
|
|
1463
|
+
content: withInitHint([{ type: "text", text: JSON.stringify(authResult, null, 2) }], params.project_dir || params.workspace_root),
|
|
1464
|
+
};
|
|
1465
|
+
});
|
|
1305
1466
|
server.tool("codeloop_start_recording", TOOL_BOOTSTRAP + `Start recording the app window in the background. The app is brought to the front automatically
|
|
1306
1467
|
(un-minimized if needed). Recording continues while you interact with the app. Call codeloop_stop_recording when done.
|
|
1307
1468
|
This is the PREFERRED recording method because it lets you actively operate the app during capture.
|
|
@@ -1328,11 +1489,12 @@ Flow: start_recording → codeloop_interact with ALL app elements → stop_recor
|
|
|
1328
1489
|
Supports desktop apps, Android emulator, iOS Simulator, and browser targets.
|
|
1329
1490
|
Multi-monitor: on macOS, automatically detects which screen the app window is on.
|
|
1330
1491
|
App logs (stdout, logcat, simctl log) are automatically captured alongside the video.`, {
|
|
1331
|
-
app_name: z.string().describe("The name of the app to record (used to find and focus its window)"),
|
|
1492
|
+
app_name: z.string().optional().describe("The name of the app to record (used to find and focus its window). For desktop projects, defaults to evidence.target_app from .codeloop/config.json — set during init via detect-target-app for .NET/Xcode/Android projects, or settable manually."),
|
|
1332
1493
|
run_id: z.string().optional().describe("Existing run ID to store the video in"),
|
|
1333
1494
|
max_duration_seconds: z.number().default(120).describe("Safety timeout — recording stops automatically after this many seconds"),
|
|
1334
|
-
target_type:
|
|
1495
|
+
target_type: targetTypeSchema.optional()
|
|
1335
1496
|
.describe("Capture method. Auto-detected from project if omitted. desktop=ffmpeg screen, android_emulator=adb screenrecord, ios_simulator=simctl recordVideo, browser=ffmpeg/Playwright"),
|
|
1497
|
+
auto_launch: z.boolean().default(true).describe("When target_type=desktop and the app isn't already running, auto-launch it from the project's build output via evidence.target_app. Set false to skip (e.g. when the app is started by another process)."),
|
|
1336
1498
|
project_dir: z.string().optional().describe("Absolute path to the project root. Defaults to CODELOOP_PROJECT_DIR env var or auto-discovered project directory. MUST be an actual project folder — passing the user's home directory is rejected. If your IDE launches the MCP server from the wrong cwd (common on Windows where Cursor uses C:\\Users\\<name> as cwd), set CODELOOP_PROJECT_DIR or pass this param explicitly."),
|
|
1337
1499
|
workspace_root: z.string().optional().describe("[Alias for project_dir] Same semantics; accepted because many agents reach for this conventional name. Pass either `project_dir` OR `workspace_root` — they're equivalent."),
|
|
1338
1500
|
}, async (params) => {
|
|
@@ -1340,6 +1502,7 @@ App logs (stdout, logcat, simctl log) are automatically captured alongside the v
|
|
|
1340
1502
|
const { startBackgroundRecording } = await import("./runners/video_recorder.js");
|
|
1341
1503
|
const { createRunDir, getRunDir, getArtifactsBaseDir } = await import("./evidence/artifacts.js");
|
|
1342
1504
|
const { detectTargetType } = await import("./runners/platform_detect.js");
|
|
1505
|
+
const { loadConfig } = await import("./config.js");
|
|
1343
1506
|
const cwd = (params.project_dir || params.workspace_root || projectDir);
|
|
1344
1507
|
let videosDir;
|
|
1345
1508
|
if (params.run_id) {
|
|
@@ -1351,7 +1514,11 @@ App logs (stdout, logcat, simctl log) are automatically captured alongside the v
|
|
|
1351
1514
|
videosDir = join(runDir, "videos");
|
|
1352
1515
|
}
|
|
1353
1516
|
const targetType = params.target_type || (await detectTargetType(cwd));
|
|
1517
|
+
const cfg = loadConfig(cwd);
|
|
1354
1518
|
let appName = params.app_name;
|
|
1519
|
+
if (!appName && (targetType === "desktop")) {
|
|
1520
|
+
appName = cfg.evidence?.target_app;
|
|
1521
|
+
}
|
|
1355
1522
|
if (targetType === "browser") {
|
|
1356
1523
|
const bi = await import("./runners/browser_interaction.js");
|
|
1357
1524
|
await bi.ensureBrowserPage();
|
|
@@ -1360,7 +1527,42 @@ App logs (stdout, logcat, simctl log) are automatically captured alongside the v
|
|
|
1360
1527
|
appName = pwAppName;
|
|
1361
1528
|
}
|
|
1362
1529
|
}
|
|
1363
|
-
|
|
1530
|
+
// Photometry-DB E2E 8: agents spent many turns manually
|
|
1531
|
+
// probing `Get-Process` / `Start-Process` to launch the app
|
|
1532
|
+
// because nothing in CodeLoop did it for them. Now, when
|
|
1533
|
+
// recording a desktop app, we auto-launch from the build
|
|
1534
|
+
// output if the app isn't already running.
|
|
1535
|
+
const wm = await import("./runners/window_manager.js");
|
|
1536
|
+
let autoLaunchSummary;
|
|
1537
|
+
if (targetType === "desktop" && params.auto_launch !== false && appName) {
|
|
1538
|
+
try {
|
|
1539
|
+
const bounds = await wm.getWindowBounds(appName);
|
|
1540
|
+
if (!bounds) {
|
|
1541
|
+
const r = await wm.launchDesktopApp(appName, cwd);
|
|
1542
|
+
autoLaunchSummary = {
|
|
1543
|
+
attempted: true,
|
|
1544
|
+
launched: r.launched,
|
|
1545
|
+
command: r.command,
|
|
1546
|
+
reason: r.reason,
|
|
1547
|
+
};
|
|
1548
|
+
if (r.launched) {
|
|
1549
|
+
// Give the window time to appear / paint before the
|
|
1550
|
+
// recorder starts capturing frames.
|
|
1551
|
+
await new Promise((res) => setTimeout(res, 2000));
|
|
1552
|
+
}
|
|
1553
|
+
}
|
|
1554
|
+
else {
|
|
1555
|
+
autoLaunchSummary = { attempted: false, launched: true, reason: "already running" };
|
|
1556
|
+
}
|
|
1557
|
+
}
|
|
1558
|
+
catch (e) {
|
|
1559
|
+
autoLaunchSummary = { attempted: true, launched: false, reason: e.message };
|
|
1560
|
+
}
|
|
1561
|
+
}
|
|
1562
|
+
const result = await startBackgroundRecording(videosDir, appName ?? "", params.max_duration_seconds, targetType);
|
|
1563
|
+
if (autoLaunchSummary) {
|
|
1564
|
+
result.auto_launch = autoLaunchSummary;
|
|
1565
|
+
}
|
|
1364
1566
|
await trackUsage(apiKey, "visual_review");
|
|
1365
1567
|
return result;
|
|
1366
1568
|
}, { tool: "codeloop_start_recording", cwd: (params.project_dir || params.workspace_root || projectDir), input: params });
|
|
@@ -1936,8 +2138,8 @@ MANDATORY for web apps: You MUST type into form fields, fill login/signup forms,
|
|
|
1936
2138
|
validation errors, and click submit buttons. Just navigating pages is NOT enough.
|
|
1937
2139
|
Wait 1-2 seconds between interactions so video frames capture state changes.`, {
|
|
1938
2140
|
action: z.string().describe("Action to perform: click, double_click, right_click, hover, type, keystroke, hotkey, scroll, drag_drop, long_press, type_and_submit, type_and_tab, fill_form, select_option, toggle, upload_file, navigate_url, navigate_back, navigate_forward, wait, sequence, swipe, back_button, home_button, deep_link, grant_permission, rotate_device, biometric_auth, launch_app, clear_app_data, mock_location, simulate_network, maestro_flow, win_ui_inspect, win_ui_automate"),
|
|
1939
|
-
target_type:
|
|
1940
|
-
.describe("Interaction target. Auto-detected if omitted."),
|
|
2141
|
+
target_type: targetTypeSchema.optional()
|
|
2142
|
+
.describe("Interaction target. Auto-detected if omitted. Accepts synonyms: `windows_desktop`/`mac_desktop`/`linux_desktop` → `desktop`; `web` → `browser`; `android` → `android_emulator`; `ios` → `ios_simulator`."),
|
|
1941
2143
|
x: z.number().optional().describe("X coordinate for click/scroll/drag/swipe"),
|
|
1942
2144
|
y: z.number().optional().describe("Y coordinate for click/scroll/drag/swipe"),
|
|
1943
2145
|
x2: z.number().optional().describe("End X for drag_drop/swipe"),
|
|
@@ -1968,7 +2170,7 @@ Wait 1-2 seconds between interactions so video frames capture state changes.`, {
|
|
|
1968
2170
|
action: z.string(),
|
|
1969
2171
|
params: z.record(z.unknown()).optional(),
|
|
1970
2172
|
delay_ms: z.number().optional(),
|
|
1971
|
-
})).optional().describe("Steps for sequence action"),
|
|
2173
|
+
}).passthrough()).optional().describe("Steps for sequence action. Accepts BOTH nested form `{ action, params: { x, y, … }, delay_ms? }` and flat form `{ action, x, y, … }` — the flat form is what agents naturally write (mirrors the top-level codeloop_interact shape). Supports inside desktop sequences: click, double_click, right_click, hover, scroll, type, hotkey, keystroke, navigate_url, wait, win_ui_automate."),
|
|
1972
2174
|
maestro_steps: z.array(z.string()).optional().describe("High-level steps for maestro_flow"),
|
|
1973
2175
|
automation_action: z.enum(["invoke", "setValue", "toggle", "select", "scroll"]).optional()
|
|
1974
2176
|
.describe("For win_ui_automate"),
|
|
@@ -1978,6 +2180,7 @@ Wait 1-2 seconds between interactions so video frames capture state changes.`, {
|
|
|
1978
2180
|
description: z.string().optional().describe("[Alias for intent] Same semantics."),
|
|
1979
2181
|
purpose: z.string().optional().describe("[Alias for intent] Same semantics."),
|
|
1980
2182
|
step: z.string().optional().describe("Plan-step name when this interaction is driving a codeloop_plan_user_journey arc (e.g. 'edit', 'delete', 'create', 'save', 'verify'). Logged alongside `intent` and read by the CRUD classifier."),
|
|
2183
|
+
coords: z.enum(["auto", "window", "screen"]).optional().describe("How to interpret x/y for desktop click/double_click/right_click/hover/scroll/drag/long_press. `auto` (default): if `app_name` resolves to a visible window AND (x, y) fits inside the window's client area, treat as window-relative and auto-offset by the window origin; otherwise leave as raw screen-absolute coords. `window`: ALWAYS add the window origin offset (errors if the window isn't found). `screen`: ALWAYS pass through (legacy behaviour, matches CGEvent / user32.dll / xdotool semantics). Fixes the Photometry-DB E2E 8 failure mode where the agent captured a 1600×900 window screenshot, computed click coords against the image, and missed the sidebar because the window's actual top-left was (286, 286) on a 5120×1440 screen."),
|
|
1981
2184
|
project_dir: z.string().optional().describe("Absolute path to project root."),
|
|
1982
2185
|
workspace_root: z.string().optional().describe("[Alias for project_dir] Pass either; they're equivalent."),
|
|
1983
2186
|
}, async (params) => {
|
|
@@ -2007,13 +2210,58 @@ Wait 1-2 seconds between interactions so video frames capture state changes.`, {
|
|
|
2007
2210
|
await bi.ensureBrowserPage();
|
|
2008
2211
|
}
|
|
2009
2212
|
// Bring the app to front before desktop interactions (non-browser, non-mobile).
|
|
2213
|
+
let windowOriginOffset = null;
|
|
2010
2214
|
if (tt === "desktop") {
|
|
2011
2215
|
const appName = params.app_name || vr.getActiveRecordingAppName();
|
|
2012
2216
|
if (appName && action !== "wait") {
|
|
2013
2217
|
await wm.bringAppToFront(appName);
|
|
2014
2218
|
await new Promise(r => setTimeout(r, 300));
|
|
2219
|
+
// Photometry-DB E2E 8: agents commonly compute click coords
|
|
2220
|
+
// from a window-cropped screenshot (which is what
|
|
2221
|
+
// codeloop_capture_screenshot returns when `app_name` is
|
|
2222
|
+
// set), then pass those coords to codeloop_interact —
|
|
2223
|
+
// which expects raw SCREEN coordinates. On a multi-monitor
|
|
2224
|
+
// / DPI-scaled setup that mismatch silently dropped clicks
|
|
2225
|
+
// 100s of pixels off-target. When `coords` is `auto` (the
|
|
2226
|
+
// default) we look up the window's actual screen origin
|
|
2227
|
+
// and add it to x/y, but ONLY if (x, y) fits inside the
|
|
2228
|
+
// window — that keeps legacy callers passing raw screen
|
|
2229
|
+
// coords working unchanged. `coords: "window"` forces the
|
|
2230
|
+
// offset; `coords: "screen"` opts out.
|
|
2231
|
+
const coordsMode = params.coords ?? "auto";
|
|
2232
|
+
if (coordsMode !== "screen") {
|
|
2233
|
+
try {
|
|
2234
|
+
const b = await wm.getWindowBounds(appName);
|
|
2235
|
+
if (b && b.width > 0 && b.height > 0) {
|
|
2236
|
+
windowOriginOffset = { dx: b.x, dy: b.y, width: b.width, height: b.height };
|
|
2237
|
+
}
|
|
2238
|
+
}
|
|
2239
|
+
catch { /* best-effort */ }
|
|
2240
|
+
}
|
|
2015
2241
|
}
|
|
2016
2242
|
}
|
|
2243
|
+
// Helper used by every coordinate-driven desktop action below.
|
|
2244
|
+
const translateXY = (x, y) => {
|
|
2245
|
+
if (tt !== "desktop" || x == null || y == null || !windowOriginOffset) {
|
|
2246
|
+
return { x, y };
|
|
2247
|
+
}
|
|
2248
|
+
const mode = params.coords ?? "auto";
|
|
2249
|
+
if (mode === "screen")
|
|
2250
|
+
return { x, y };
|
|
2251
|
+
if (mode === "window") {
|
|
2252
|
+
return { x: x + windowOriginOffset.dx, y: y + windowOriginOffset.dy };
|
|
2253
|
+
}
|
|
2254
|
+
// auto: if (x, y) fits inside the window's client area,
|
|
2255
|
+
// assume the agent computed against a window-cropped
|
|
2256
|
+
// screenshot and add the origin. Otherwise pass through
|
|
2257
|
+
// (likely a raw screen coord from a manual workflow).
|
|
2258
|
+
const inside = x >= 0 && x <= windowOriginOffset.width &&
|
|
2259
|
+
y >= 0 && y <= windowOriginOffset.height;
|
|
2260
|
+
if (inside) {
|
|
2261
|
+
return { x: x + windowOriginOffset.dx, y: y + windowOriginOffset.dy };
|
|
2262
|
+
}
|
|
2263
|
+
return { x, y };
|
|
2264
|
+
};
|
|
2017
2265
|
switch (action) {
|
|
2018
2266
|
case "click":
|
|
2019
2267
|
if (tt === "browser" && params.selector) {
|
|
@@ -2035,7 +2283,8 @@ Wait 1-2 seconds between interactions so video frames capture state changes.`, {
|
|
|
2035
2283
|
}
|
|
2036
2284
|
}
|
|
2037
2285
|
else if (params.x != null && params.y != null) {
|
|
2038
|
-
|
|
2286
|
+
const t = translateXY(params.x, params.y);
|
|
2287
|
+
success = await wm.clickAtPosition(t.x, t.y);
|
|
2039
2288
|
}
|
|
2040
2289
|
detail = `click at ${params.selector || `(${params.x},${params.y})`}`;
|
|
2041
2290
|
break;
|
|
@@ -2044,7 +2293,8 @@ Wait 1-2 seconds between interactions so video frames capture state changes.`, {
|
|
|
2044
2293
|
success = await bi.browserDoubleClick(params.selector);
|
|
2045
2294
|
}
|
|
2046
2295
|
else if (params.x != null && params.y != null) {
|
|
2047
|
-
|
|
2296
|
+
const t = translateXY(params.x, params.y);
|
|
2297
|
+
success = await wm.doubleClickAtPosition(t.x, t.y);
|
|
2048
2298
|
}
|
|
2049
2299
|
detail = `double_click at ${params.selector || `(${params.x},${params.y})`}`;
|
|
2050
2300
|
break;
|
|
@@ -2053,7 +2303,8 @@ Wait 1-2 seconds between interactions so video frames capture state changes.`, {
|
|
|
2053
2303
|
success = await bi.browserRightClick(params.selector);
|
|
2054
2304
|
}
|
|
2055
2305
|
else if (params.x != null && params.y != null) {
|
|
2056
|
-
|
|
2306
|
+
const t = translateXY(params.x, params.y);
|
|
2307
|
+
success = await wm.rightClickAtPosition(t.x, t.y);
|
|
2057
2308
|
}
|
|
2058
2309
|
detail = `right_click at ${params.selector || `(${params.x},${params.y})`}`;
|
|
2059
2310
|
break;
|
|
@@ -2062,7 +2313,8 @@ Wait 1-2 seconds between interactions so video frames capture state changes.`, {
|
|
|
2062
2313
|
success = await bi.browserHover(params.selector);
|
|
2063
2314
|
}
|
|
2064
2315
|
else if (params.x != null && params.y != null) {
|
|
2065
|
-
|
|
2316
|
+
const t = translateXY(params.x, params.y);
|
|
2317
|
+
success = await wm.hoverAtPosition(t.x, t.y);
|
|
2066
2318
|
}
|
|
2067
2319
|
detail = `hover at ${params.selector || `(${params.x},${params.y})`}`;
|
|
2068
2320
|
break;
|
|
@@ -2147,7 +2399,8 @@ Wait 1-2 seconds between interactions so video frames capture state changes.`, {
|
|
|
2147
2399
|
success = await wm.simctlSwipe(sx, sy, ex, ey);
|
|
2148
2400
|
}
|
|
2149
2401
|
else {
|
|
2150
|
-
|
|
2402
|
+
const t = translateXY(params.x || 500, params.y || 400);
|
|
2403
|
+
success = await wm.scrollAtPosition(t.x, t.y, params.direction || "down", params.amount || 3);
|
|
2151
2404
|
}
|
|
2152
2405
|
detail = `scroll ${params.direction || "down"}`;
|
|
2153
2406
|
break;
|
|
@@ -2160,7 +2413,9 @@ Wait 1-2 seconds between interactions so video frames capture state changes.`, {
|
|
|
2160
2413
|
success = await wm.adbSwipe(params.x, params.y, params.x2, params.y2, params.duration_ms || 500);
|
|
2161
2414
|
}
|
|
2162
2415
|
else {
|
|
2163
|
-
|
|
2416
|
+
const a = translateXY(params.x, params.y);
|
|
2417
|
+
const b = translateXY(params.x2, params.y2);
|
|
2418
|
+
success = await wm.dragDrop(a.x, a.y, b.x, b.y, params.duration_ms || 500);
|
|
2164
2419
|
}
|
|
2165
2420
|
}
|
|
2166
2421
|
detail = `drag_drop`;
|
|
@@ -2170,7 +2425,8 @@ Wait 1-2 seconds between interactions so video frames capture state changes.`, {
|
|
|
2170
2425
|
success = await wm.adbLongPress(params.x, params.y, params.duration_ms || 1000);
|
|
2171
2426
|
}
|
|
2172
2427
|
else if (params.x != null && params.y != null) {
|
|
2173
|
-
|
|
2428
|
+
const t = translateXY(params.x, params.y);
|
|
2429
|
+
success = await wm.longPressAtPosition(t.x, t.y, params.duration_ms || 1000);
|
|
2174
2430
|
}
|
|
2175
2431
|
detail = `long_press at (${params.x},${params.y})`;
|
|
2176
2432
|
break;
|
|
@@ -2352,11 +2608,37 @@ Wait 1-2 seconds between interactions so video frames capture state changes.`, {
|
|
|
2352
2608
|
if (tt === "android_emulator" && params.package_id) {
|
|
2353
2609
|
const r = await import("./runners/base.js").then(m => m.runCommand("adb", ["shell", "am", "start", "-n", params.package_id], process.cwd()));
|
|
2354
2610
|
success = r.exit_code === 0;
|
|
2611
|
+
detail = `launch_app "${params.package_id}"`;
|
|
2355
2612
|
}
|
|
2356
2613
|
else if (tt === "ios_simulator" && params.package_id) {
|
|
2357
2614
|
success = await wm.simctlLaunch(params.package_id);
|
|
2615
|
+
detail = `launch_app "${params.package_id}"`;
|
|
2616
|
+
}
|
|
2617
|
+
else if (tt === "desktop") {
|
|
2618
|
+
// Photometry-DB E2E 8: desktop launch was completely
|
|
2619
|
+
// missing — agents had to hand-roll PowerShell
|
|
2620
|
+
// Start-Process / `open -a` calls. Now resolves via
|
|
2621
|
+
// evidence.target_app from the project config when
|
|
2622
|
+
// app_name is omitted.
|
|
2623
|
+
const { loadConfig } = await import("./config.js");
|
|
2624
|
+
const cfg = loadConfig(cwd);
|
|
2625
|
+
const appName = params.app_name || cfg.evidence?.target_app;
|
|
2626
|
+
if (!appName) {
|
|
2627
|
+
success = false;
|
|
2628
|
+
detail = "launch_app desktop: no app_name provided and evidence.target_app is unset in .codeloop/config.json. Set it (e.g. \"target_app\": \"Photometry DB\") or pass app_name explicitly.";
|
|
2629
|
+
}
|
|
2630
|
+
else {
|
|
2631
|
+
const r = await wm.launchDesktopApp(appName, cwd);
|
|
2632
|
+
success = r.launched;
|
|
2633
|
+
detail = r.launched
|
|
2634
|
+
? `launch_app "${appName}" via ${r.command}${r.pid ? ` (pid ${r.pid})` : ""}`
|
|
2635
|
+
: `launch_app "${appName}" failed: ${r.reason || "unknown error"}`;
|
|
2636
|
+
}
|
|
2637
|
+
}
|
|
2638
|
+
else {
|
|
2639
|
+
success = false;
|
|
2640
|
+
detail = `launch_app "${params.package_id || params.app_name || ""}": target ${tt} not supported in this action`;
|
|
2358
2641
|
}
|
|
2359
|
-
detail = `launch_app "${params.package_id}"`;
|
|
2360
2642
|
break;
|
|
2361
2643
|
case "clear_app_data":
|
|
2362
2644
|
if (tt === "android_emulator" && params.package_id) {
|
|
@@ -2420,7 +2702,22 @@ Wait 1-2 seconds between interactions so video frames capture state changes.`, {
|
|
|
2420
2702
|
if (step.delay_ms)
|
|
2421
2703
|
await new Promise(r => setTimeout(r, step.delay_ms));
|
|
2422
2704
|
const stepAction = step.action;
|
|
2423
|
-
|
|
2705
|
+
// Photometry-DB E2E 8: agents naturally write steps in
|
|
2706
|
+
// FLAT form `{ action, x, y, ms, … }` because that
|
|
2707
|
+
// mirrors the top-level codeloop_interact shape. The
|
|
2708
|
+
// schema documented the NESTED form `{ action,
|
|
2709
|
+
// params: { … } }`. Now we accept both: prefer
|
|
2710
|
+
// `step.params` if present, otherwise fall back to the
|
|
2711
|
+
// step object itself minus the wrapper keys.
|
|
2712
|
+
const stepObj = step;
|
|
2713
|
+
const nested = (step.params || {});
|
|
2714
|
+
const sp = Object.keys(nested).length > 0
|
|
2715
|
+
? nested
|
|
2716
|
+
: Object.fromEntries(Object.entries(stepObj).filter(([k]) => k !== "action" && k !== "params" && k !== "delay_ms"));
|
|
2717
|
+
// Convenient aliases: agents wrote `ms` for wait
|
|
2718
|
+
// duration in the log; accept that as `duration_ms`.
|
|
2719
|
+
if (sp.ms != null && sp.duration_ms == null)
|
|
2720
|
+
sp.duration_ms = sp.ms;
|
|
2424
2721
|
let stepOk = false;
|
|
2425
2722
|
let stepReason;
|
|
2426
2723
|
try {
|
|
@@ -2428,7 +2725,20 @@ Wait 1-2 seconds between interactions so video frames capture state changes.`, {
|
|
|
2428
2725
|
stepOk = await bi.browserClick(sp.selector);
|
|
2429
2726
|
}
|
|
2430
2727
|
else if (stepAction === "click" && sp.x != null && sp.y != null) {
|
|
2431
|
-
|
|
2728
|
+
const t = translateXY(sp.x, sp.y);
|
|
2729
|
+
stepOk = await wm.clickAtPosition(t.x, t.y);
|
|
2730
|
+
}
|
|
2731
|
+
else if (stepAction === "double_click" && tt !== "browser" && sp.x != null && sp.y != null) {
|
|
2732
|
+
const t = translateXY(sp.x, sp.y);
|
|
2733
|
+
stepOk = await wm.doubleClickAtPosition(t.x, t.y);
|
|
2734
|
+
}
|
|
2735
|
+
else if (stepAction === "right_click" && tt !== "browser" && sp.x != null && sp.y != null) {
|
|
2736
|
+
const t = translateXY(sp.x, sp.y);
|
|
2737
|
+
stepOk = await wm.rightClickAtPosition(t.x, t.y);
|
|
2738
|
+
}
|
|
2739
|
+
else if (stepAction === "hover" && tt !== "browser" && sp.x != null && sp.y != null) {
|
|
2740
|
+
const t = translateXY(sp.x, sp.y);
|
|
2741
|
+
stepOk = await wm.hoverAtPosition(t.x, t.y);
|
|
2432
2742
|
}
|
|
2433
2743
|
else if (stepAction === "type" && tt === "browser" && sp.selector && sp.text) {
|
|
2434
2744
|
stepOk = await bi.browserType(sp.selector, sp.text);
|
|
@@ -2464,9 +2774,13 @@ Wait 1-2 seconds between interactions so video frames capture state changes.`, {
|
|
|
2464
2774
|
stepOk = tt === "browser" ? await bi.browserKeystroke(sp.key) : await wm.sendKeyByName(sp.key);
|
|
2465
2775
|
}
|
|
2466
2776
|
else if (stepAction === "scroll") {
|
|
2467
|
-
|
|
2468
|
-
|
|
2469
|
-
|
|
2777
|
+
if (tt === "browser") {
|
|
2778
|
+
stepOk = await bi.browserScroll(sp.direction || "down", sp.amount || 300);
|
|
2779
|
+
}
|
|
2780
|
+
else {
|
|
2781
|
+
const t = translateXY(sp.x || 500, sp.y || 400);
|
|
2782
|
+
stepOk = await wm.scrollAtPosition(t.x, t.y, sp.direction || "down", sp.amount || 3);
|
|
2783
|
+
}
|
|
2470
2784
|
}
|
|
2471
2785
|
else if (stepAction === "wait") {
|
|
2472
2786
|
await new Promise(r => setTimeout(r, sp.duration_ms || 1000));
|