codeloop-mcp-server 0.1.47 → 0.1.48
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/auth/critical_floors.d.ts.map +1 -1
- package/dist/auth/critical_floors.js +4 -0
- package/dist/auth/critical_floors.js.map +1 -1
- package/dist/index.js +322 -27
- package/dist/index.js.map +1 -1
- package/dist/runners/window_manager.d.ts +34 -0
- package/dist/runners/window_manager.d.ts.map +1 -1
- package/dist/runners/window_manager.js +226 -0
- package/dist/runners/window_manager.js.map +1 -1
- package/dist/tools/design_compare.d.ts +43 -0
- package/dist/tools/design_compare.d.ts.map +1 -1
- package/dist/tools/design_compare.js +185 -66
- package/dist/tools/design_compare.js.map +1 -1
- package/dist/tools/gate_check.js +29 -7
- package/dist/tools/gate_check.js.map +1 -1
- package/package.json +1 -1
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"critical_floors.d.ts","sourceRoot":"","sources":["../../src/auth/critical_floors.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;GA2BG;AAEH,MAAM,WAAW,aAAa;IAC5B,4DAA4D;IAC5D,WAAW,EAAE,MAAM,CAAC;IACpB,wDAAwD;IACxD,MAAM,EAAE,MAAM,CAAC;CAChB;AAED;;;;;;;;GAQG;AACH,eAAO,MAAM,eAAe,EAAE,aAAa,
|
|
1
|
+
{"version":3,"file":"critical_floors.d.ts","sourceRoot":"","sources":["../../src/auth/critical_floors.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;GA2BG;AAEH,MAAM,WAAW,aAAa;IAC5B,4DAA4D;IAC5D,WAAW,EAAE,MAAM,CAAC;IACpB,wDAAwD;IACxD,MAAM,EAAE,MAAM,CAAC;CAChB;AAED;;;;;;;;GAQG;AACH,eAAO,MAAM,eAAe,EAAE,aAAa,EAyB1C,CAAC"}
|
|
@@ -56,5 +56,9 @@ export const CRITICAL_FLOORS = [
|
|
|
56
56
|
min_version: "0.1.47",
|
|
57
57
|
reason: "Desktop-app capture-screenshot honesty + auto-detected target_app — pre-0.1.47 builds applied the desktop-app honesty refusal ONLY inside codeloop_verify's auto-capture path, so the standalone codeloop_capture_screenshot tool still silently grabbed the IDE when called without app_name during recording. Also pre-0.1.47, evidence.target_app required manual user setup — codeloop_init_project now auto-extracts it from .csproj AssemblyName / Xcode PRODUCT_NAME / AndroidManifest android:label so desktop projects work without manual config",
|
|
58
58
|
},
|
|
59
|
+
{
|
|
60
|
+
min_version: "0.1.48",
|
|
61
|
+
reason: "Desktop interaction reliability — pre-0.1.48 builds rejected target_type: 'windows_desktop' / 'mac_desktop' / 'linux_desktop' with a hard Zod error (agents had to know to type 'desktop' instead), didn't translate window-relative click coordinates to screen-absolute (so coords computed from a captured window-screenshot missed targets by hundreds of pixels), had no codeloop_launch_app tool / auto-launch in start_recording (agents had to hand-roll Get-Process + Start-Process PowerShell every recording), refused agent-natural flat sequence step shapes like { action: 'click', x: 100, y: 200 }, and treated cross-run weak design_compare matches as critical 0% failures of the gate-run screen. All four broke the autonomous loop on Photometry-DB E2E session 8",
|
|
62
|
+
},
|
|
59
63
|
];
|
|
60
64
|
//# sourceMappingURL=critical_floors.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"critical_floors.js","sourceRoot":"","sources":["../../src/auth/critical_floors.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;GA2BG;AASH;;;;;;;;GAQG;AACH,MAAM,CAAC,MAAM,eAAe,GAAoB;IAC9C;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EAAE,8JAA8J;KACvK;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EAAE,yNAAyN;KAClO;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EAAE,oYAAoY;KAC7Y;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EAAE,sbAAsb;KAC/b;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EAAE,4hBAA4hB;KACriB;CACF,CAAC"}
|
|
1
|
+
{"version":3,"file":"critical_floors.js","sourceRoot":"","sources":["../../src/auth/critical_floors.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;GA2BG;AASH;;;;;;;;GAQG;AACH,MAAM,CAAC,MAAM,eAAe,GAAoB;IAC9C;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EAAE,8JAA8J;KACvK;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EAAE,yNAAyN;KAClO;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EAAE,oYAAoY;KAC7Y;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EAAE,sbAAsb;KAC/b;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EAAE,4hBAA4hB;KACriB;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EAAE,yvBAAyvB;KAClwB;CACF,CAAC"}
|
package/dist/index.js
CHANGED
|
@@ -135,6 +135,12 @@ const server = new McpServer({
|
|
|
135
135
|
async function withAuth(fn, tracker) {
|
|
136
136
|
const started = Date.now();
|
|
137
137
|
let outcome = { success: false };
|
|
138
|
+
// Photometry-DB E2E 8 regression: when the agent passes
|
|
139
|
+
// `project_dir`/`workspace_root` to a tool, remember that dir for
|
|
140
|
+
// the lifetime of this MCP process so the init-hint check no
|
|
141
|
+
// longer false-positives later calls where the call site didn't
|
|
142
|
+
// forward `dir` to `withInitHint`.
|
|
143
|
+
rememberInitializedDir(tracker?.cwd);
|
|
138
144
|
try {
|
|
139
145
|
// Local / self-hosted mode (CODELOOP_MODE=local): skip API-key validation
|
|
140
146
|
// entirely. All cloud-side checks are bypassed; usage events are queued
|
|
@@ -286,6 +292,26 @@ function buildVersionBanner() {
|
|
|
286
292
|
text: `[CodeLoop server v${v}]`,
|
|
287
293
|
};
|
|
288
294
|
}
|
|
295
|
+
/**
|
|
296
|
+
* Last project directory observed to be initialized via an actual
|
|
297
|
+
* tool call (set by `withAuth`'s tracker — see the .cwd field). The
|
|
298
|
+
* Photometry-DB E2E session 8 regression was that the agent passed
|
|
299
|
+
* `project_dir: "D:\\Work\\Photometry DB"` (which IS initialized) on
|
|
300
|
+
* every call, but the init hint was checking the server's startup
|
|
301
|
+
* `projectDir` (C:\Users\jiq on Windows) and incorrectly prepending
|
|
302
|
+
* "This project has not been initialized" to every response. With
|
|
303
|
+
* this cache, the first authenticated call that hits an initialized
|
|
304
|
+
* dir silences the hint for the rest of the session — independent of
|
|
305
|
+
* whether the specific call site forwarded `dir` to `withInitHint`.
|
|
306
|
+
*/
|
|
307
|
+
let lastInitializedDir = null;
|
|
308
|
+
function rememberInitializedDir(dir) {
|
|
309
|
+
if (!dir)
|
|
310
|
+
return;
|
|
311
|
+
if (isProjectInitialized(dir)) {
|
|
312
|
+
lastInitializedDir = dir;
|
|
313
|
+
}
|
|
314
|
+
}
|
|
289
315
|
function withInitHint(content, dir) {
|
|
290
316
|
// Order matters:
|
|
291
317
|
// 1. Update notice (most actionable signal — CRITICAL stays at top).
|
|
@@ -296,7 +322,22 @@ function withInitHint(content, dir) {
|
|
|
296
322
|
const banner = buildVersionBanner();
|
|
297
323
|
const withUpdate = withUpdateNotice(content);
|
|
298
324
|
const head = [];
|
|
299
|
-
|
|
325
|
+
// Candidate dirs in priority order:
|
|
326
|
+
// 1. Explicit `dir` arg from the call site (when the handler
|
|
327
|
+
// bothered to thread it through — the verify / capture /
|
|
328
|
+
// design_compare etc. handlers do).
|
|
329
|
+
// 2. `lastInitializedDir` — any dir the agent has already passed
|
|
330
|
+
// to *any* CodeLoop tool this session that resolved to an
|
|
331
|
+
// initialized config. This catches the long tail of handlers
|
|
332
|
+
// that don't forward `dir` (e.g. `recommend_tool`,
|
|
333
|
+
// `release_readiness`, `section_status`) so they no longer
|
|
334
|
+
// false-positive the hint after init has succeeded once.
|
|
335
|
+
// 3. The MCP server's startup `projectDir` (often the user's
|
|
336
|
+
// home folder on Windows / Cursor — see CODELOOP_PROJECT_DIR
|
|
337
|
+
// auto-injection notes in setup-project.ts).
|
|
338
|
+
const candidates = [dir, lastInitializedDir, projectDir].filter((d) => typeof d === "string" && d.length > 0);
|
|
339
|
+
const anyInitialized = candidates.some((d) => isProjectInitialized(d));
|
|
340
|
+
if (!anyInitialized) {
|
|
300
341
|
head.push({ type: "text", text: INIT_HINT });
|
|
301
342
|
}
|
|
302
343
|
const result = [...head, ...withUpdate];
|
|
@@ -305,6 +346,42 @@ function withInitHint(content, dir) {
|
|
|
305
346
|
return result;
|
|
306
347
|
}
|
|
307
348
|
const TOOL_BOOTSTRAP = "[CodeLoop] You MUST call codeloop_verify after every code change. If .codeloop/config.json is missing, call codeloop_init_project FIRST.\n\n";
|
|
349
|
+
/**
|
|
350
|
+
* Zod preprocessor for `target_type` that accepts common synonyms.
|
|
351
|
+
*
|
|
352
|
+
* Photometry-DB E2E 8 regression: the agent (reasonably) tried
|
|
353
|
+
* `target_type: "windows_desktop"` because the project is a WPF
|
|
354
|
+
* Windows app, and got a hard schema-validation rejection. Same
|
|
355
|
+
* pattern for `mac_desktop`, `linux_desktop`, `web`, `android`,
|
|
356
|
+
* `ios`. Coerce them to the canonical 4-value enum so the agent
|
|
357
|
+
* isn't blocked by a thin naming convention.
|
|
358
|
+
*/
|
|
359
|
+
const TARGET_TYPE_SYNONYMS = {
|
|
360
|
+
desktop: "desktop",
|
|
361
|
+
windows_desktop: "desktop",
|
|
362
|
+
win_desktop: "desktop",
|
|
363
|
+
win32_desktop: "desktop",
|
|
364
|
+
mac_desktop: "desktop",
|
|
365
|
+
macos_desktop: "desktop",
|
|
366
|
+
osx_desktop: "desktop",
|
|
367
|
+
linux_desktop: "desktop",
|
|
368
|
+
native_desktop: "desktop",
|
|
369
|
+
browser: "browser",
|
|
370
|
+
web: "browser",
|
|
371
|
+
webapp: "browser",
|
|
372
|
+
chrome: "browser",
|
|
373
|
+
android_emulator: "android_emulator",
|
|
374
|
+
android: "android_emulator",
|
|
375
|
+
ios_simulator: "ios_simulator",
|
|
376
|
+
ios: "ios_simulator",
|
|
377
|
+
};
|
|
378
|
+
function normalizeTargetType(v) {
|
|
379
|
+
if (typeof v !== "string")
|
|
380
|
+
return v;
|
|
381
|
+
const key = v.toLowerCase().trim();
|
|
382
|
+
return TARGET_TYPE_SYNONYMS[key] ?? v;
|
|
383
|
+
}
|
|
384
|
+
const targetTypeSchema = z.preprocess(normalizeTargetType, z.enum(["desktop", "browser", "android_emulator", "ios_simulator"]));
|
|
308
385
|
// ── Implemented Tools ────────────────────────────────────────────
|
|
309
386
|
server.tool("codeloop_verify", TOOL_BOOTSTRAP + `Run the CodeLoop verification suite on the current project. Use this tool when:
|
|
310
387
|
- You have implemented or modified code and need to check if it works correctly
|
|
@@ -1136,8 +1213,28 @@ Returns: confirmation + the captured image as an MCP ImageContent block so you c
|
|
|
1136
1213
|
const cfg = loadConfig(cwd);
|
|
1137
1214
|
const targetApp = params.app_name ?? cfg.evidence?.target_app;
|
|
1138
1215
|
const result = await captureScreenshot(screenshotsDir, params.screen_name, targetApp, undefined, { desktopAppMode: isDesktopAppProject });
|
|
1216
|
+
// Photometry-DB E2E 8 follow-on: when we capture a desktop app
|
|
1217
|
+
// window, also resolve its on-screen bounds so the agent can
|
|
1218
|
+
// (a) compute window-relative coords from the returned image
|
|
1219
|
+
// dimensions, and
|
|
1220
|
+
// (b) pass coords:"window" to codeloop_interact to get them
|
|
1221
|
+
// translated to screen-absolute automatically.
|
|
1222
|
+
// Without this, agents reasoned from a downscaled vision view
|
|
1223
|
+
// of the image and clicked tens or hundreds of pixels off the
|
|
1224
|
+
// intended target.
|
|
1225
|
+
let windowBounds = null;
|
|
1226
|
+
if (isDesktopAppProject && targetApp && result.captured) {
|
|
1227
|
+
try {
|
|
1228
|
+
const wm = await import("./runners/window_manager.js");
|
|
1229
|
+
const b = await wm.getWindowBounds(targetApp);
|
|
1230
|
+
if (b && b.width > 0 && b.height > 0) {
|
|
1231
|
+
windowBounds = { x: b.x, y: b.y, width: b.width, height: b.height };
|
|
1232
|
+
}
|
|
1233
|
+
}
|
|
1234
|
+
catch { /* best-effort */ }
|
|
1235
|
+
}
|
|
1139
1236
|
await trackUsage(apiKey, "visual_review");
|
|
1140
|
-
return result;
|
|
1237
|
+
return { ...result, windowBounds };
|
|
1141
1238
|
}, { tool: "codeloop_capture_screenshot", cwd: (params.project_dir || params.workspace_root || projectDir), input: params });
|
|
1142
1239
|
if (typeof authResult === "object" && authResult !== null && "error" in authResult) {
|
|
1143
1240
|
return { content: [{ type: "text", text: JSON.stringify(authResult, null, 2) }] };
|
|
@@ -1145,12 +1242,18 @@ Returns: confirmation + the captured image as an MCP ImageContent block so you c
|
|
|
1145
1242
|
const result = authResult;
|
|
1146
1243
|
const content = [];
|
|
1147
1244
|
if (result.captured && result.paths.length > 0) {
|
|
1148
|
-
|
|
1149
|
-
|
|
1150
|
-
|
|
1151
|
-
|
|
1152
|
-
|
|
1153
|
-
|
|
1245
|
+
const payload = {
|
|
1246
|
+
captured: true,
|
|
1247
|
+
screen_name: params.screen_name,
|
|
1248
|
+
path: result.paths[0],
|
|
1249
|
+
method: result.method,
|
|
1250
|
+
};
|
|
1251
|
+
if (result.windowBounds) {
|
|
1252
|
+
payload.window_bounds = result.windowBounds;
|
|
1253
|
+
payload.coordinate_hint =
|
|
1254
|
+
"This screenshot captures the named window. When you compute click coordinates from the image, pass them to codeloop_interact with `coords: \"window\"` so they're translated to screen-absolute automatically. (Default `coords: \"auto\"` also works when the coord fits inside the window — but `\"window\"` is unambiguous.)";
|
|
1255
|
+
}
|
|
1256
|
+
content.push({ type: "text", text: JSON.stringify(payload, null, 2) });
|
|
1154
1257
|
const data = readImageAsBase64(result.paths[0]);
|
|
1155
1258
|
if (data) {
|
|
1156
1259
|
content.push({ type: "image", data, mimeType: mimeForPath(result.paths[0]) });
|
|
@@ -1321,6 +1424,45 @@ After recording, call codeloop_interaction_replay to extract frames and analyze
|
|
|
1321
1424
|
content: withInitHint([{ type: "text", text: JSON.stringify(result, null, 2) }]),
|
|
1322
1425
|
};
|
|
1323
1426
|
});
|
|
1427
|
+
server.tool("codeloop_launch_app", TOOL_BOOTSTRAP + `Launch a desktop application by name from a project's build output. Use when:
|
|
1428
|
+
- You need the app under test to be running before codeloop_start_recording / codeloop_interact.
|
|
1429
|
+
- The agent doesn't know where the executable lives and shouldn't have to hand-roll Start-Process / open -a.
|
|
1430
|
+
|
|
1431
|
+
This tool is the canonical replacement for hand-coding PowerShell Start-Process / osascript / xdg-open
|
|
1432
|
+
from the agent. For Android / iOS, use codeloop_interact action="launch_app" with package_id instead.
|
|
1433
|
+
|
|
1434
|
+
Search order on Windows: publish/**/*.exe → bin/Release/**/*.exe → bin/Debug/**/*.exe (newest first).
|
|
1435
|
+
On macOS: publish/**/*.app → build/**/*.app → /Applications/<name>.app → open -a.
|
|
1436
|
+
On Linux: build/**/<name> → bin/**/<name> → dist/**/<name>.
|
|
1437
|
+
|
|
1438
|
+
If app_name is omitted, falls back to evidence.target_app from .codeloop/config.json (auto-detected at
|
|
1439
|
+
init for .NET/Xcode/Android projects via detect-target-app).`, {
|
|
1440
|
+
app_name: z.string().optional().describe("Window title / executable name of the app to launch. Defaults to evidence.target_app from .codeloop/config.json. Required if target_app is unset."),
|
|
1441
|
+
project_dir: z.string().optional().describe("Absolute path to the project root. Defaults to CODELOOP_PROJECT_DIR / discovered project dir."),
|
|
1442
|
+
workspace_root: z.string().optional().describe("[Alias for project_dir]"),
|
|
1443
|
+
}, async (params) => {
|
|
1444
|
+
const authResult = await withAuth(async () => {
|
|
1445
|
+
const wm = await import("./runners/window_manager.js");
|
|
1446
|
+
const { loadConfig } = await import("./config.js");
|
|
1447
|
+
const cwd = (params.project_dir || params.workspace_root || projectDir);
|
|
1448
|
+
const cfg = loadConfig(cwd);
|
|
1449
|
+
const appName = params.app_name || cfg.evidence?.target_app;
|
|
1450
|
+
if (!appName) {
|
|
1451
|
+
return {
|
|
1452
|
+
launched: false,
|
|
1453
|
+
reason: "No app_name provided and evidence.target_app is unset in .codeloop/config.json. Set it (e.g. \"evidence\": { \"target_app\": \"My App\" }) or pass app_name explicitly. For .NET/Xcode/Android projects, codeloop_init_project auto-detects this — re-run init or edit the config by hand.",
|
|
1454
|
+
};
|
|
1455
|
+
}
|
|
1456
|
+
const r = await wm.launchDesktopApp(appName, cwd);
|
|
1457
|
+
return { app_name: appName, ...r };
|
|
1458
|
+
}, { tool: "codeloop_launch_app", cwd: (params.project_dir || params.workspace_root || projectDir), input: params });
|
|
1459
|
+
if (typeof authResult === "object" && authResult !== null && "error" in authResult) {
|
|
1460
|
+
return { content: [{ type: "text", text: JSON.stringify(authResult, null, 2) }] };
|
|
1461
|
+
}
|
|
1462
|
+
return {
|
|
1463
|
+
content: withInitHint([{ type: "text", text: JSON.stringify(authResult, null, 2) }], params.project_dir || params.workspace_root),
|
|
1464
|
+
};
|
|
1465
|
+
});
|
|
1324
1466
|
server.tool("codeloop_start_recording", TOOL_BOOTSTRAP + `Start recording the app window in the background. The app is brought to the front automatically
|
|
1325
1467
|
(un-minimized if needed). Recording continues while you interact with the app. Call codeloop_stop_recording when done.
|
|
1326
1468
|
This is the PREFERRED recording method because it lets you actively operate the app during capture.
|
|
@@ -1347,11 +1489,12 @@ Flow: start_recording → codeloop_interact with ALL app elements → stop_recor
|
|
|
1347
1489
|
Supports desktop apps, Android emulator, iOS Simulator, and browser targets.
|
|
1348
1490
|
Multi-monitor: on macOS, automatically detects which screen the app window is on.
|
|
1349
1491
|
App logs (stdout, logcat, simctl log) are automatically captured alongside the video.`, {
|
|
1350
|
-
app_name: z.string().describe("The name of the app to record (used to find and focus its window)"),
|
|
1492
|
+
app_name: z.string().optional().describe("The name of the app to record (used to find and focus its window). For desktop projects, defaults to evidence.target_app from .codeloop/config.json — set during init via detect-target-app for .NET/Xcode/Android projects, or settable manually."),
|
|
1351
1493
|
run_id: z.string().optional().describe("Existing run ID to store the video in"),
|
|
1352
1494
|
max_duration_seconds: z.number().default(120).describe("Safety timeout — recording stops automatically after this many seconds"),
|
|
1353
|
-
target_type:
|
|
1495
|
+
target_type: targetTypeSchema.optional()
|
|
1354
1496
|
.describe("Capture method. Auto-detected from project if omitted. desktop=ffmpeg screen, android_emulator=adb screenrecord, ios_simulator=simctl recordVideo, browser=ffmpeg/Playwright"),
|
|
1497
|
+
auto_launch: z.boolean().default(true).describe("When target_type=desktop and the app isn't already running, auto-launch it from the project's build output via evidence.target_app. Set false to skip (e.g. when the app is started by another process)."),
|
|
1355
1498
|
project_dir: z.string().optional().describe("Absolute path to the project root. Defaults to CODELOOP_PROJECT_DIR env var or auto-discovered project directory. MUST be an actual project folder — passing the user's home directory is rejected. If your IDE launches the MCP server from the wrong cwd (common on Windows where Cursor uses C:\\Users\\<name> as cwd), set CODELOOP_PROJECT_DIR or pass this param explicitly."),
|
|
1356
1499
|
workspace_root: z.string().optional().describe("[Alias for project_dir] Same semantics; accepted because many agents reach for this conventional name. Pass either `project_dir` OR `workspace_root` — they're equivalent."),
|
|
1357
1500
|
}, async (params) => {
|
|
@@ -1359,6 +1502,7 @@ App logs (stdout, logcat, simctl log) are automatically captured alongside the v
|
|
|
1359
1502
|
const { startBackgroundRecording } = await import("./runners/video_recorder.js");
|
|
1360
1503
|
const { createRunDir, getRunDir, getArtifactsBaseDir } = await import("./evidence/artifacts.js");
|
|
1361
1504
|
const { detectTargetType } = await import("./runners/platform_detect.js");
|
|
1505
|
+
const { loadConfig } = await import("./config.js");
|
|
1362
1506
|
const cwd = (params.project_dir || params.workspace_root || projectDir);
|
|
1363
1507
|
let videosDir;
|
|
1364
1508
|
if (params.run_id) {
|
|
@@ -1370,7 +1514,11 @@ App logs (stdout, logcat, simctl log) are automatically captured alongside the v
|
|
|
1370
1514
|
videosDir = join(runDir, "videos");
|
|
1371
1515
|
}
|
|
1372
1516
|
const targetType = params.target_type || (await detectTargetType(cwd));
|
|
1517
|
+
const cfg = loadConfig(cwd);
|
|
1373
1518
|
let appName = params.app_name;
|
|
1519
|
+
if (!appName && (targetType === "desktop")) {
|
|
1520
|
+
appName = cfg.evidence?.target_app;
|
|
1521
|
+
}
|
|
1374
1522
|
if (targetType === "browser") {
|
|
1375
1523
|
const bi = await import("./runners/browser_interaction.js");
|
|
1376
1524
|
await bi.ensureBrowserPage();
|
|
@@ -1379,7 +1527,42 @@ App logs (stdout, logcat, simctl log) are automatically captured alongside the v
|
|
|
1379
1527
|
appName = pwAppName;
|
|
1380
1528
|
}
|
|
1381
1529
|
}
|
|
1382
|
-
|
|
1530
|
+
// Photometry-DB E2E 8: agents spent many turns manually
|
|
1531
|
+
// probing `Get-Process` / `Start-Process` to launch the app
|
|
1532
|
+
// because nothing in CodeLoop did it for them. Now, when
|
|
1533
|
+
// recording a desktop app, we auto-launch from the build
|
|
1534
|
+
// output if the app isn't already running.
|
|
1535
|
+
const wm = await import("./runners/window_manager.js");
|
|
1536
|
+
let autoLaunchSummary;
|
|
1537
|
+
if (targetType === "desktop" && params.auto_launch !== false && appName) {
|
|
1538
|
+
try {
|
|
1539
|
+
const bounds = await wm.getWindowBounds(appName);
|
|
1540
|
+
if (!bounds) {
|
|
1541
|
+
const r = await wm.launchDesktopApp(appName, cwd);
|
|
1542
|
+
autoLaunchSummary = {
|
|
1543
|
+
attempted: true,
|
|
1544
|
+
launched: r.launched,
|
|
1545
|
+
command: r.command,
|
|
1546
|
+
reason: r.reason,
|
|
1547
|
+
};
|
|
1548
|
+
if (r.launched) {
|
|
1549
|
+
// Give the window time to appear / paint before the
|
|
1550
|
+
// recorder starts capturing frames.
|
|
1551
|
+
await new Promise((res) => setTimeout(res, 2000));
|
|
1552
|
+
}
|
|
1553
|
+
}
|
|
1554
|
+
else {
|
|
1555
|
+
autoLaunchSummary = { attempted: false, launched: true, reason: "already running" };
|
|
1556
|
+
}
|
|
1557
|
+
}
|
|
1558
|
+
catch (e) {
|
|
1559
|
+
autoLaunchSummary = { attempted: true, launched: false, reason: e.message };
|
|
1560
|
+
}
|
|
1561
|
+
}
|
|
1562
|
+
const result = await startBackgroundRecording(videosDir, appName ?? "", params.max_duration_seconds, targetType);
|
|
1563
|
+
if (autoLaunchSummary) {
|
|
1564
|
+
result.auto_launch = autoLaunchSummary;
|
|
1565
|
+
}
|
|
1383
1566
|
await trackUsage(apiKey, "visual_review");
|
|
1384
1567
|
return result;
|
|
1385
1568
|
}, { tool: "codeloop_start_recording", cwd: (params.project_dir || params.workspace_root || projectDir), input: params });
|
|
@@ -1955,8 +2138,8 @@ MANDATORY for web apps: You MUST type into form fields, fill login/signup forms,
|
|
|
1955
2138
|
validation errors, and click submit buttons. Just navigating pages is NOT enough.
|
|
1956
2139
|
Wait 1-2 seconds between interactions so video frames capture state changes.`, {
|
|
1957
2140
|
action: z.string().describe("Action to perform: click, double_click, right_click, hover, type, keystroke, hotkey, scroll, drag_drop, long_press, type_and_submit, type_and_tab, fill_form, select_option, toggle, upload_file, navigate_url, navigate_back, navigate_forward, wait, sequence, swipe, back_button, home_button, deep_link, grant_permission, rotate_device, biometric_auth, launch_app, clear_app_data, mock_location, simulate_network, maestro_flow, win_ui_inspect, win_ui_automate"),
|
|
1958
|
-
target_type:
|
|
1959
|
-
.describe("Interaction target. Auto-detected if omitted."),
|
|
2141
|
+
target_type: targetTypeSchema.optional()
|
|
2142
|
+
.describe("Interaction target. Auto-detected if omitted. Accepts synonyms: `windows_desktop`/`mac_desktop`/`linux_desktop` → `desktop`; `web` → `browser`; `android` → `android_emulator`; `ios` → `ios_simulator`."),
|
|
1960
2143
|
x: z.number().optional().describe("X coordinate for click/scroll/drag/swipe"),
|
|
1961
2144
|
y: z.number().optional().describe("Y coordinate for click/scroll/drag/swipe"),
|
|
1962
2145
|
x2: z.number().optional().describe("End X for drag_drop/swipe"),
|
|
@@ -1987,7 +2170,7 @@ Wait 1-2 seconds between interactions so video frames capture state changes.`, {
|
|
|
1987
2170
|
action: z.string(),
|
|
1988
2171
|
params: z.record(z.unknown()).optional(),
|
|
1989
2172
|
delay_ms: z.number().optional(),
|
|
1990
|
-
})).optional().describe("Steps for sequence action"),
|
|
2173
|
+
}).passthrough()).optional().describe("Steps for sequence action. Accepts BOTH nested form `{ action, params: { x, y, … }, delay_ms? }` and flat form `{ action, x, y, … }` — the flat form is what agents naturally write (mirrors the top-level codeloop_interact shape). Supports inside desktop sequences: click, double_click, right_click, hover, scroll, type, hotkey, keystroke, navigate_url, wait, win_ui_automate."),
|
|
1991
2174
|
maestro_steps: z.array(z.string()).optional().describe("High-level steps for maestro_flow"),
|
|
1992
2175
|
automation_action: z.enum(["invoke", "setValue", "toggle", "select", "scroll"]).optional()
|
|
1993
2176
|
.describe("For win_ui_automate"),
|
|
@@ -1997,6 +2180,7 @@ Wait 1-2 seconds between interactions so video frames capture state changes.`, {
|
|
|
1997
2180
|
description: z.string().optional().describe("[Alias for intent] Same semantics."),
|
|
1998
2181
|
purpose: z.string().optional().describe("[Alias for intent] Same semantics."),
|
|
1999
2182
|
step: z.string().optional().describe("Plan-step name when this interaction is driving a codeloop_plan_user_journey arc (e.g. 'edit', 'delete', 'create', 'save', 'verify'). Logged alongside `intent` and read by the CRUD classifier."),
|
|
2183
|
+
coords: z.enum(["auto", "window", "screen"]).optional().describe("How to interpret x/y for desktop click/double_click/right_click/hover/scroll/drag/long_press. `auto` (default): if `app_name` resolves to a visible window AND (x, y) fits inside the window's client area, treat as window-relative and auto-offset by the window origin; otherwise leave as raw screen-absolute coords. `window`: ALWAYS add the window origin offset (errors if the window isn't found). `screen`: ALWAYS pass through (legacy behaviour, matches CGEvent / user32.dll / xdotool semantics). Fixes the Photometry-DB E2E 8 failure mode where the agent captured a 1600×900 window screenshot, computed click coords against the image, and missed the sidebar because the window's actual top-left was (286, 286) on a 5120×1440 screen."),
|
|
2000
2184
|
project_dir: z.string().optional().describe("Absolute path to project root."),
|
|
2001
2185
|
workspace_root: z.string().optional().describe("[Alias for project_dir] Pass either; they're equivalent."),
|
|
2002
2186
|
}, async (params) => {
|
|
@@ -2026,13 +2210,58 @@ Wait 1-2 seconds between interactions so video frames capture state changes.`, {
|
|
|
2026
2210
|
await bi.ensureBrowserPage();
|
|
2027
2211
|
}
|
|
2028
2212
|
// Bring the app to front before desktop interactions (non-browser, non-mobile).
|
|
2213
|
+
let windowOriginOffset = null;
|
|
2029
2214
|
if (tt === "desktop") {
|
|
2030
2215
|
const appName = params.app_name || vr.getActiveRecordingAppName();
|
|
2031
2216
|
if (appName && action !== "wait") {
|
|
2032
2217
|
await wm.bringAppToFront(appName);
|
|
2033
2218
|
await new Promise(r => setTimeout(r, 300));
|
|
2219
|
+
// Photometry-DB E2E 8: agents commonly compute click coords
|
|
2220
|
+
// from a window-cropped screenshot (which is what
|
|
2221
|
+
// codeloop_capture_screenshot returns when `app_name` is
|
|
2222
|
+
// set), then pass those coords to codeloop_interact —
|
|
2223
|
+
// which expects raw SCREEN coordinates. On a multi-monitor
|
|
2224
|
+
// / DPI-scaled setup that mismatch silently dropped clicks
|
|
2225
|
+
// 100s of pixels off-target. When `coords` is `auto` (the
|
|
2226
|
+
// default) we look up the window's actual screen origin
|
|
2227
|
+
// and add it to x/y, but ONLY if (x, y) fits inside the
|
|
2228
|
+
// window — that keeps legacy callers passing raw screen
|
|
2229
|
+
// coords working unchanged. `coords: "window"` forces the
|
|
2230
|
+
// offset; `coords: "screen"` opts out.
|
|
2231
|
+
const coordsMode = params.coords ?? "auto";
|
|
2232
|
+
if (coordsMode !== "screen") {
|
|
2233
|
+
try {
|
|
2234
|
+
const b = await wm.getWindowBounds(appName);
|
|
2235
|
+
if (b && b.width > 0 && b.height > 0) {
|
|
2236
|
+
windowOriginOffset = { dx: b.x, dy: b.y, width: b.width, height: b.height };
|
|
2237
|
+
}
|
|
2238
|
+
}
|
|
2239
|
+
catch { /* best-effort */ }
|
|
2240
|
+
}
|
|
2034
2241
|
}
|
|
2035
2242
|
}
|
|
2243
|
+
// Helper used by every coordinate-driven desktop action below.
|
|
2244
|
+
const translateXY = (x, y) => {
|
|
2245
|
+
if (tt !== "desktop" || x == null || y == null || !windowOriginOffset) {
|
|
2246
|
+
return { x, y };
|
|
2247
|
+
}
|
|
2248
|
+
const mode = params.coords ?? "auto";
|
|
2249
|
+
if (mode === "screen")
|
|
2250
|
+
return { x, y };
|
|
2251
|
+
if (mode === "window") {
|
|
2252
|
+
return { x: x + windowOriginOffset.dx, y: y + windowOriginOffset.dy };
|
|
2253
|
+
}
|
|
2254
|
+
// auto: if (x, y) fits inside the window's client area,
|
|
2255
|
+
// assume the agent computed against a window-cropped
|
|
2256
|
+
// screenshot and add the origin. Otherwise pass through
|
|
2257
|
+
// (likely a raw screen coord from a manual workflow).
|
|
2258
|
+
const inside = x >= 0 && x <= windowOriginOffset.width &&
|
|
2259
|
+
y >= 0 && y <= windowOriginOffset.height;
|
|
2260
|
+
if (inside) {
|
|
2261
|
+
return { x: x + windowOriginOffset.dx, y: y + windowOriginOffset.dy };
|
|
2262
|
+
}
|
|
2263
|
+
return { x, y };
|
|
2264
|
+
};
|
|
2036
2265
|
switch (action) {
|
|
2037
2266
|
case "click":
|
|
2038
2267
|
if (tt === "browser" && params.selector) {
|
|
@@ -2054,7 +2283,8 @@ Wait 1-2 seconds between interactions so video frames capture state changes.`, {
|
|
|
2054
2283
|
}
|
|
2055
2284
|
}
|
|
2056
2285
|
else if (params.x != null && params.y != null) {
|
|
2057
|
-
|
|
2286
|
+
const t = translateXY(params.x, params.y);
|
|
2287
|
+
success = await wm.clickAtPosition(t.x, t.y);
|
|
2058
2288
|
}
|
|
2059
2289
|
detail = `click at ${params.selector || `(${params.x},${params.y})`}`;
|
|
2060
2290
|
break;
|
|
@@ -2063,7 +2293,8 @@ Wait 1-2 seconds between interactions so video frames capture state changes.`, {
|
|
|
2063
2293
|
success = await bi.browserDoubleClick(params.selector);
|
|
2064
2294
|
}
|
|
2065
2295
|
else if (params.x != null && params.y != null) {
|
|
2066
|
-
|
|
2296
|
+
const t = translateXY(params.x, params.y);
|
|
2297
|
+
success = await wm.doubleClickAtPosition(t.x, t.y);
|
|
2067
2298
|
}
|
|
2068
2299
|
detail = `double_click at ${params.selector || `(${params.x},${params.y})`}`;
|
|
2069
2300
|
break;
|
|
@@ -2072,7 +2303,8 @@ Wait 1-2 seconds between interactions so video frames capture state changes.`, {
|
|
|
2072
2303
|
success = await bi.browserRightClick(params.selector);
|
|
2073
2304
|
}
|
|
2074
2305
|
else if (params.x != null && params.y != null) {
|
|
2075
|
-
|
|
2306
|
+
const t = translateXY(params.x, params.y);
|
|
2307
|
+
success = await wm.rightClickAtPosition(t.x, t.y);
|
|
2076
2308
|
}
|
|
2077
2309
|
detail = `right_click at ${params.selector || `(${params.x},${params.y})`}`;
|
|
2078
2310
|
break;
|
|
@@ -2081,7 +2313,8 @@ Wait 1-2 seconds between interactions so video frames capture state changes.`, {
|
|
|
2081
2313
|
success = await bi.browserHover(params.selector);
|
|
2082
2314
|
}
|
|
2083
2315
|
else if (params.x != null && params.y != null) {
|
|
2084
|
-
|
|
2316
|
+
const t = translateXY(params.x, params.y);
|
|
2317
|
+
success = await wm.hoverAtPosition(t.x, t.y);
|
|
2085
2318
|
}
|
|
2086
2319
|
detail = `hover at ${params.selector || `(${params.x},${params.y})`}`;
|
|
2087
2320
|
break;
|
|
@@ -2166,7 +2399,8 @@ Wait 1-2 seconds between interactions so video frames capture state changes.`, {
|
|
|
2166
2399
|
success = await wm.simctlSwipe(sx, sy, ex, ey);
|
|
2167
2400
|
}
|
|
2168
2401
|
else {
|
|
2169
|
-
|
|
2402
|
+
const t = translateXY(params.x || 500, params.y || 400);
|
|
2403
|
+
success = await wm.scrollAtPosition(t.x, t.y, params.direction || "down", params.amount || 3);
|
|
2170
2404
|
}
|
|
2171
2405
|
detail = `scroll ${params.direction || "down"}`;
|
|
2172
2406
|
break;
|
|
@@ -2179,7 +2413,9 @@ Wait 1-2 seconds between interactions so video frames capture state changes.`, {
|
|
|
2179
2413
|
success = await wm.adbSwipe(params.x, params.y, params.x2, params.y2, params.duration_ms || 500);
|
|
2180
2414
|
}
|
|
2181
2415
|
else {
|
|
2182
|
-
|
|
2416
|
+
const a = translateXY(params.x, params.y);
|
|
2417
|
+
const b = translateXY(params.x2, params.y2);
|
|
2418
|
+
success = await wm.dragDrop(a.x, a.y, b.x, b.y, params.duration_ms || 500);
|
|
2183
2419
|
}
|
|
2184
2420
|
}
|
|
2185
2421
|
detail = `drag_drop`;
|
|
@@ -2189,7 +2425,8 @@ Wait 1-2 seconds between interactions so video frames capture state changes.`, {
|
|
|
2189
2425
|
success = await wm.adbLongPress(params.x, params.y, params.duration_ms || 1000);
|
|
2190
2426
|
}
|
|
2191
2427
|
else if (params.x != null && params.y != null) {
|
|
2192
|
-
|
|
2428
|
+
const t = translateXY(params.x, params.y);
|
|
2429
|
+
success = await wm.longPressAtPosition(t.x, t.y, params.duration_ms || 1000);
|
|
2193
2430
|
}
|
|
2194
2431
|
detail = `long_press at (${params.x},${params.y})`;
|
|
2195
2432
|
break;
|
|
@@ -2371,11 +2608,37 @@ Wait 1-2 seconds between interactions so video frames capture state changes.`, {
|
|
|
2371
2608
|
if (tt === "android_emulator" && params.package_id) {
|
|
2372
2609
|
const r = await import("./runners/base.js").then(m => m.runCommand("adb", ["shell", "am", "start", "-n", params.package_id], process.cwd()));
|
|
2373
2610
|
success = r.exit_code === 0;
|
|
2611
|
+
detail = `launch_app "${params.package_id}"`;
|
|
2374
2612
|
}
|
|
2375
2613
|
else if (tt === "ios_simulator" && params.package_id) {
|
|
2376
2614
|
success = await wm.simctlLaunch(params.package_id);
|
|
2615
|
+
detail = `launch_app "${params.package_id}"`;
|
|
2616
|
+
}
|
|
2617
|
+
else if (tt === "desktop") {
|
|
2618
|
+
// Photometry-DB E2E 8: desktop launch was completely
|
|
2619
|
+
// missing — agents had to hand-roll PowerShell
|
|
2620
|
+
// Start-Process / `open -a` calls. Now resolves via
|
|
2621
|
+
// evidence.target_app from the project config when
|
|
2622
|
+
// app_name is omitted.
|
|
2623
|
+
const { loadConfig } = await import("./config.js");
|
|
2624
|
+
const cfg = loadConfig(cwd);
|
|
2625
|
+
const appName = params.app_name || cfg.evidence?.target_app;
|
|
2626
|
+
if (!appName) {
|
|
2627
|
+
success = false;
|
|
2628
|
+
detail = "launch_app desktop: no app_name provided and evidence.target_app is unset in .codeloop/config.json. Set it (e.g. \"target_app\": \"Photometry DB\") or pass app_name explicitly.";
|
|
2629
|
+
}
|
|
2630
|
+
else {
|
|
2631
|
+
const r = await wm.launchDesktopApp(appName, cwd);
|
|
2632
|
+
success = r.launched;
|
|
2633
|
+
detail = r.launched
|
|
2634
|
+
? `launch_app "${appName}" via ${r.command}${r.pid ? ` (pid ${r.pid})` : ""}`
|
|
2635
|
+
: `launch_app "${appName}" failed: ${r.reason || "unknown error"}`;
|
|
2636
|
+
}
|
|
2637
|
+
}
|
|
2638
|
+
else {
|
|
2639
|
+
success = false;
|
|
2640
|
+
detail = `launch_app "${params.package_id || params.app_name || ""}": target ${tt} not supported in this action`;
|
|
2377
2641
|
}
|
|
2378
|
-
detail = `launch_app "${params.package_id}"`;
|
|
2379
2642
|
break;
|
|
2380
2643
|
case "clear_app_data":
|
|
2381
2644
|
if (tt === "android_emulator" && params.package_id) {
|
|
@@ -2439,7 +2702,22 @@ Wait 1-2 seconds between interactions so video frames capture state changes.`, {
|
|
|
2439
2702
|
if (step.delay_ms)
|
|
2440
2703
|
await new Promise(r => setTimeout(r, step.delay_ms));
|
|
2441
2704
|
const stepAction = step.action;
|
|
2442
|
-
|
|
2705
|
+
// Photometry-DB E2E 8: agents naturally write steps in
|
|
2706
|
+
// FLAT form `{ action, x, y, ms, … }` because that
|
|
2707
|
+
// mirrors the top-level codeloop_interact shape. The
|
|
2708
|
+
// schema documented the NESTED form `{ action,
|
|
2709
|
+
// params: { … } }`. Now we accept both: prefer
|
|
2710
|
+
// `step.params` if present, otherwise fall back to the
|
|
2711
|
+
// step object itself minus the wrapper keys.
|
|
2712
|
+
const stepObj = step;
|
|
2713
|
+
const nested = (step.params || {});
|
|
2714
|
+
const sp = Object.keys(nested).length > 0
|
|
2715
|
+
? nested
|
|
2716
|
+
: Object.fromEntries(Object.entries(stepObj).filter(([k]) => k !== "action" && k !== "params" && k !== "delay_ms"));
|
|
2717
|
+
// Convenient aliases: agents wrote `ms` for wait
|
|
2718
|
+
// duration in the log; accept that as `duration_ms`.
|
|
2719
|
+
if (sp.ms != null && sp.duration_ms == null)
|
|
2720
|
+
sp.duration_ms = sp.ms;
|
|
2443
2721
|
let stepOk = false;
|
|
2444
2722
|
let stepReason;
|
|
2445
2723
|
try {
|
|
@@ -2447,7 +2725,20 @@ Wait 1-2 seconds between interactions so video frames capture state changes.`, {
|
|
|
2447
2725
|
stepOk = await bi.browserClick(sp.selector);
|
|
2448
2726
|
}
|
|
2449
2727
|
else if (stepAction === "click" && sp.x != null && sp.y != null) {
|
|
2450
|
-
|
|
2728
|
+
const t = translateXY(sp.x, sp.y);
|
|
2729
|
+
stepOk = await wm.clickAtPosition(t.x, t.y);
|
|
2730
|
+
}
|
|
2731
|
+
else if (stepAction === "double_click" && tt !== "browser" && sp.x != null && sp.y != null) {
|
|
2732
|
+
const t = translateXY(sp.x, sp.y);
|
|
2733
|
+
stepOk = await wm.doubleClickAtPosition(t.x, t.y);
|
|
2734
|
+
}
|
|
2735
|
+
else if (stepAction === "right_click" && tt !== "browser" && sp.x != null && sp.y != null) {
|
|
2736
|
+
const t = translateXY(sp.x, sp.y);
|
|
2737
|
+
stepOk = await wm.rightClickAtPosition(t.x, t.y);
|
|
2738
|
+
}
|
|
2739
|
+
else if (stepAction === "hover" && tt !== "browser" && sp.x != null && sp.y != null) {
|
|
2740
|
+
const t = translateXY(sp.x, sp.y);
|
|
2741
|
+
stepOk = await wm.hoverAtPosition(t.x, t.y);
|
|
2451
2742
|
}
|
|
2452
2743
|
else if (stepAction === "type" && tt === "browser" && sp.selector && sp.text) {
|
|
2453
2744
|
stepOk = await bi.browserType(sp.selector, sp.text);
|
|
@@ -2483,9 +2774,13 @@ Wait 1-2 seconds between interactions so video frames capture state changes.`, {
|
|
|
2483
2774
|
stepOk = tt === "browser" ? await bi.browserKeystroke(sp.key) : await wm.sendKeyByName(sp.key);
|
|
2484
2775
|
}
|
|
2485
2776
|
else if (stepAction === "scroll") {
|
|
2486
|
-
|
|
2487
|
-
|
|
2488
|
-
|
|
2777
|
+
if (tt === "browser") {
|
|
2778
|
+
stepOk = await bi.browserScroll(sp.direction || "down", sp.amount || 300);
|
|
2779
|
+
}
|
|
2780
|
+
else {
|
|
2781
|
+
const t = translateXY(sp.x || 500, sp.y || 400);
|
|
2782
|
+
stepOk = await wm.scrollAtPosition(t.x, t.y, sp.direction || "down", sp.amount || 3);
|
|
2783
|
+
}
|
|
2489
2784
|
}
|
|
2490
2785
|
else if (stepAction === "wait") {
|
|
2491
2786
|
await new Promise(r => setTimeout(r, sp.duration_ms || 1000));
|