codeloop-mcp-server 0.1.47 → 0.1.49

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. package/dist/auth/critical_floors.d.ts +8 -4
  2. package/dist/auth/critical_floors.d.ts.map +1 -1
  3. package/dist/auth/critical_floors.js +16 -16
  4. package/dist/auth/critical_floors.js.map +1 -1
  5. package/dist/auth/init_hint_cache.d.ts +35 -0
  6. package/dist/auth/init_hint_cache.d.ts.map +1 -0
  7. package/dist/auth/init_hint_cache.js +143 -0
  8. package/dist/auth/init_hint_cache.js.map +1 -0
  9. package/dist/evidence/screenshot_diff.d.ts +23 -0
  10. package/dist/evidence/screenshot_diff.d.ts.map +1 -1
  11. package/dist/evidence/screenshot_diff.js +46 -13
  12. package/dist/evidence/screenshot_diff.js.map +1 -1
  13. package/dist/index.js +484 -32
  14. package/dist/index.js.map +1 -1
  15. package/dist/runners/csproj_output_path.d.ts +22 -0
  16. package/dist/runners/csproj_output_path.d.ts.map +1 -0
  17. package/dist/runners/csproj_output_path.js +108 -0
  18. package/dist/runners/csproj_output_path.js.map +1 -0
  19. package/dist/runners/png_dims.d.ts +20 -0
  20. package/dist/runners/png_dims.d.ts.map +1 -0
  21. package/dist/runners/png_dims.js +58 -0
  22. package/dist/runners/png_dims.js.map +1 -0
  23. package/dist/runners/window_manager.d.ts +51 -4
  24. package/dist/runners/window_manager.d.ts.map +1 -1
  25. package/dist/runners/window_manager.js +348 -9
  26. package/dist/runners/window_manager.js.map +1 -1
  27. package/dist/tools/design_compare.d.ts +43 -0
  28. package/dist/tools/design_compare.d.ts.map +1 -1
  29. package/dist/tools/design_compare.js +199 -66
  30. package/dist/tools/design_compare.js.map +1 -1
  31. package/dist/tools/desktop_app_mode.d.ts +48 -0
  32. package/dist/tools/desktop_app_mode.d.ts.map +1 -0
  33. package/dist/tools/desktop_app_mode.js +86 -0
  34. package/dist/tools/desktop_app_mode.js.map +1 -0
  35. package/dist/tools/gate_check.js +29 -7
  36. package/dist/tools/gate_check.js.map +1 -1
  37. package/dist/tools/self_test.d.ts +40 -0
  38. package/dist/tools/self_test.d.ts.map +1 -0
  39. package/dist/tools/self_test.js +205 -0
  40. package/dist/tools/self_test.js.map +1 -0
  41. package/dist/tools/verify.d.ts.map +1 -1
  42. package/dist/tools/verify.js +4 -5
  43. package/dist/tools/verify.js.map +1 -1
  44. package/package.json +1 -1
package/dist/index.js CHANGED
@@ -2,7 +2,7 @@
2
2
  import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
3
3
  import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
4
4
  import { z } from "zod";
5
- import { readFileSync, writeFileSync, existsSync, readdirSync } from "fs";
5
+ import { readFileSync, writeFileSync, existsSync, readdirSync, statSync } from "fs";
6
6
  function dirHasFile(dir, predicate) {
7
7
  try {
8
8
  if (!existsSync(dir))
@@ -20,6 +20,7 @@ import { loadConfig } from "./config.js";
20
20
  import { validateApiKey, isActivationRequired } from "./auth/api_key.js";
21
21
  import { identifyKeySource, buildRevokedKeyDiagnostic } from "./auth/key_source.js";
22
22
  import { warmCliCache } from "./auth/cli_cache_warmer.js";
23
+ import { recordInitialisedDir, wasInitialisedAtPath, } from "./auth/init_hint_cache.js";
23
24
  import { startUpdateCheck, getUpdateInfo, formatUpdateNotice, getRunningVersion, } from "./auth/update_check.js";
24
25
  import { applyUpdate, applyUpdateInputSchema, } from "./tools/apply_update.js";
25
26
  import { trackUsage } from "./auth/usage_tracker.js";
@@ -87,6 +88,49 @@ if (!process.env.CODELOOP_PROJECT_DIR &&
87
88
  `or set CODELOOP_PROJECT_DIR in your MCP config so future calls auto-resolve. ` +
88
89
  `codeloop_init_project will REFUSE to scaffold here.`);
89
90
  }
91
+ // 0.1.49 — stale CODELOOP_PROJECT_DIR detection.
92
+ //
93
+ // When init writes a workspace pin into .cursor/mcp.json, it bakes
94
+ // the absolute path of the workspace at the time. If the user later
95
+ // renames or moves the workspace folder (common on Windows when a
96
+ // project graduates from D:\Work\<name> to D:\Repos\<name>), the pin
97
+ // keeps pointing at the old path that no longer exists, and every
98
+ // MCP boot resolves projectDir to a non-existent directory — which
99
+ // silently turns init/verify/gate into no-ops because every "does
100
+ // the .codeloop/ folder exist?" check returns false.
101
+ //
102
+ // We log a single, loud, agent-readable line on stderr so the agent
103
+ // knows to re-run `npx codeloop init` (which rewrites the pin to
104
+ // the workspace's current absolute path — see G8 in the CLI).
105
+ {
106
+ const pinned = process.env.CODELOOP_PROJECT_DIR;
107
+ if (pinned) {
108
+ let stale = false;
109
+ let reason = "";
110
+ try {
111
+ if (!existsSync(pinned)) {
112
+ stale = true;
113
+ reason = "path does not exist";
114
+ }
115
+ else if (!statSync(pinned).isDirectory()) {
116
+ stale = true;
117
+ reason = "path is not a directory";
118
+ }
119
+ else if (!existsSync(join(pinned, ".codeloop", "config.json"))) {
120
+ stale = true;
121
+ reason = "no .codeloop/config.json under the pinned path";
122
+ }
123
+ }
124
+ catch (e) {
125
+ stale = true;
126
+ reason = e.message;
127
+ }
128
+ if (stale) {
129
+ console.error(`[CodeLoop] ⚠ CODELOOP_PROJECT_DIR=${pinned} is stale (${reason}) — falling back to discovery. ` +
130
+ `Re-run \`npx codeloop init\` from the workspace's current location to rewrite the pin.`);
131
+ }
132
+ }
133
+ }
90
134
  const config = loadConfig(projectDir);
91
135
  const apiKey = process.env.CODELOOP_API_KEY || config.api_key;
92
136
  // Pre-warm the npx cache for the `codeloop` CLI in the background so
@@ -135,6 +179,12 @@ const server = new McpServer({
135
179
  async function withAuth(fn, tracker) {
136
180
  const started = Date.now();
137
181
  let outcome = { success: false };
182
+ // Photometry-DB E2E 8 regression: when the agent passes
183
+ // `project_dir`/`workspace_root` to a tool, remember that dir for
184
+ // the lifetime of this MCP process so the init-hint check no
185
+ // longer false-positives later calls where the call site didn't
186
+ // forward `dir` to `withInitHint`.
187
+ rememberInitializedDir(tracker?.cwd);
138
188
  try {
139
189
  // Local / self-hosted mode (CODELOOP_MODE=local): skip API-key validation
140
190
  // entirely. All cloud-side checks are bypassed; usage events are queued
@@ -286,6 +336,33 @@ function buildVersionBanner() {
286
336
  text: `[CodeLoop server v${v}]`,
287
337
  };
288
338
  }
339
+ /**
340
+ * Last project directory observed to be initialized via an actual
341
+ * tool call (set by `withAuth`'s tracker — see the .cwd field). The
342
+ * Photometry-DB E2E session 8 regression was that the agent passed
343
+ * `project_dir: "D:\\Work\\Photometry DB"` (which IS initialized) on
344
+ * every call, but the init hint was checking the server's startup
345
+ * `projectDir` (C:\Users\jiq on Windows) and incorrectly prepending
346
+ * "This project has not been initialized" to every response. With
347
+ * this cache, the first authenticated call that hits an initialized
348
+ * dir silences the hint for the rest of the session — independent of
349
+ * whether the specific call site forwarded `dir` to `withInitHint`.
350
+ */
351
+ let lastInitializedDir = null;
352
+ function rememberInitializedDir(dir) {
353
+ if (!dir)
354
+ return;
355
+ if (isProjectInitialized(dir)) {
356
+ lastInitializedDir = dir;
357
+ // 0.1.49 — also persist to ~/.codeloop/init-hint-cache.json so
358
+ // the next MCP server boot (every IDE restart) doesn't false-
359
+ // positive the "project not initialised" hint until the agent
360
+ // has happened to forward `dir` to a handler that calls back
361
+ // into this function. Best-effort; failures swallowed inside
362
+ // recordInitialisedDir.
363
+ recordInitialisedDir(dir);
364
+ }
365
+ }
289
366
  function withInitHint(content, dir) {
290
367
  // Order matters:
291
368
  // 1. Update notice (most actionable signal — CRITICAL stays at top).
@@ -296,7 +373,27 @@ function withInitHint(content, dir) {
296
373
  const banner = buildVersionBanner();
297
374
  const withUpdate = withUpdateNotice(content);
298
375
  const head = [];
299
- if (!isProjectInitialized(dir || projectDir)) {
376
+ // Candidate dirs in priority order:
377
+ // 1. Explicit `dir` arg from the call site (when the handler
378
+ // bothered to thread it through — the verify / capture /
379
+ // design_compare etc. handlers do).
380
+ // 2. `lastInitializedDir` — any dir the agent has already passed
381
+ // to *any* CodeLoop tool this session that resolved to an
382
+ // initialized config. This catches the long tail of handlers
383
+ // that don't forward `dir` (e.g. `recommend_tool`,
384
+ // `release_readiness`, `section_status`) so they no longer
385
+ // false-positive the hint after init has succeeded once.
386
+ // 3. The MCP server's startup `projectDir` (often the user's
387
+ // home folder on Windows / Cursor — see CODELOOP_PROJECT_DIR
388
+ // auto-injection notes in setup-project.ts).
389
+ const candidates = [dir, lastInitializedDir, projectDir].filter((d) => typeof d === "string" && d.length > 0);
390
+ // 0.1.49 — also consult the persistent cache so the very first
391
+ // tool call after an IDE restart doesn't false-positive the hint
392
+ // when `dir` wasn't passed and `lastInitializedDir` is empty (the
393
+ // session's not warmed up yet).
394
+ const anyInitialized = candidates.some((d) => isProjectInitialized(d)) ||
395
+ candidates.some((d) => wasInitialisedAtPath(d));
396
+ if (!anyInitialized) {
300
397
  head.push({ type: "text", text: INIT_HINT });
301
398
  }
302
399
  const result = [...head, ...withUpdate];
@@ -305,6 +402,42 @@ function withInitHint(content, dir) {
305
402
  return result;
306
403
  }
307
404
  const TOOL_BOOTSTRAP = "[CodeLoop] You MUST call codeloop_verify after every code change. If .codeloop/config.json is missing, call codeloop_init_project FIRST.\n\n";
405
+ /**
406
+ * Zod preprocessor for `target_type` that accepts common synonyms.
407
+ *
408
+ * Photometry-DB E2E 8 regression: the agent (reasonably) tried
409
+ * `target_type: "windows_desktop"` because the project is a WPF
410
+ * Windows app, and got a hard schema-validation rejection. Same
411
+ * pattern for `mac_desktop`, `linux_desktop`, `web`, `android`,
412
+ * `ios`. Coerce them to the canonical 4-value enum so the agent
413
+ * isn't blocked by a thin naming convention.
414
+ */
415
+ const TARGET_TYPE_SYNONYMS = {
416
+ desktop: "desktop",
417
+ windows_desktop: "desktop",
418
+ win_desktop: "desktop",
419
+ win32_desktop: "desktop",
420
+ mac_desktop: "desktop",
421
+ macos_desktop: "desktop",
422
+ osx_desktop: "desktop",
423
+ linux_desktop: "desktop",
424
+ native_desktop: "desktop",
425
+ browser: "browser",
426
+ web: "browser",
427
+ webapp: "browser",
428
+ chrome: "browser",
429
+ android_emulator: "android_emulator",
430
+ android: "android_emulator",
431
+ ios_simulator: "ios_simulator",
432
+ ios: "ios_simulator",
433
+ };
434
+ function normalizeTargetType(v) {
435
+ if (typeof v !== "string")
436
+ return v;
437
+ const key = v.toLowerCase().trim();
438
+ return TARGET_TYPE_SYNONYMS[key] ?? v;
439
+ }
440
+ const targetTypeSchema = z.preprocess(normalizeTargetType, z.enum(["desktop", "browser", "android_emulator", "ios_simulator"]));
308
441
  // ── Implemented Tools ────────────────────────────────────────────
309
442
  server.tool("codeloop_verify", TOOL_BOOTSTRAP + `Run the CodeLoop verification suite on the current project. Use this tool when:
310
443
  - You have implemented or modified code and need to check if it works correctly
@@ -1129,15 +1262,34 @@ Returns: confirmation + the captured image as an MCP ImageContent block so you c
1129
1262
  // agent forgot app_name — and the auto-fix loop would then
1130
1263
  // burn cycles trying to "fix design diffs" against a
1131
1264
  // screenshot of the editor.
1132
- const { detectPlatform } = await import("./tools/verify.js");
1133
1265
  const { loadConfig } = await import("./config.js");
1134
- const platform = detectPlatform(cwd);
1135
- const isDesktopAppProject = platform === "dotnet" || platform === "xcode" || platform === "android";
1266
+ const { isDesktopAppProject } = await import("./tools/desktop_app_mode.js");
1267
+ const desktopApp = isDesktopAppProject(cwd);
1136
1268
  const cfg = loadConfig(cwd);
1137
1269
  const targetApp = params.app_name ?? cfg.evidence?.target_app;
1138
- const result = await captureScreenshot(screenshotsDir, params.screen_name, targetApp, undefined, { desktopAppMode: isDesktopAppProject });
1270
+ const result = await captureScreenshot(screenshotsDir, params.screen_name, targetApp, undefined, { desktopAppMode: desktopApp });
1271
+ // Photometry-DB E2E 8 follow-on: when we capture a desktop app
1272
+ // window, also resolve its on-screen bounds so the agent can
1273
+ // (a) compute window-relative coords from the returned image
1274
+ // dimensions, and
1275
+ // (b) pass coords:"window" to codeloop_interact to get them
1276
+ // translated to screen-absolute automatically.
1277
+ // Without this, agents reasoned from a downscaled vision view
1278
+ // of the image and clicked tens or hundreds of pixels off the
1279
+ // intended target.
1280
+ let windowBounds = null;
1281
+ if (desktopApp && targetApp && result.captured) {
1282
+ try {
1283
+ const wm = await import("./runners/window_manager.js");
1284
+ const b = await wm.getWindowBounds(targetApp);
1285
+ if (b && b.width > 0 && b.height > 0) {
1286
+ windowBounds = { x: b.x, y: b.y, width: b.width, height: b.height };
1287
+ }
1288
+ }
1289
+ catch { /* best-effort */ }
1290
+ }
1139
1291
  await trackUsage(apiKey, "visual_review");
1140
- return result;
1292
+ return { ...result, windowBounds };
1141
1293
  }, { tool: "codeloop_capture_screenshot", cwd: (params.project_dir || params.workspace_root || projectDir), input: params });
1142
1294
  if (typeof authResult === "object" && authResult !== null && "error" in authResult) {
1143
1295
  return { content: [{ type: "text", text: JSON.stringify(authResult, null, 2) }] };
@@ -1145,12 +1297,18 @@ Returns: confirmation + the captured image as an MCP ImageContent block so you c
1145
1297
  const result = authResult;
1146
1298
  const content = [];
1147
1299
  if (result.captured && result.paths.length > 0) {
1148
- content.push({ type: "text", text: JSON.stringify({
1149
- captured: true,
1150
- screen_name: params.screen_name,
1151
- path: result.paths[0],
1152
- method: result.method,
1153
- }, null, 2) });
1300
+ const payload = {
1301
+ captured: true,
1302
+ screen_name: params.screen_name,
1303
+ path: result.paths[0],
1304
+ method: result.method,
1305
+ };
1306
+ if (result.windowBounds) {
1307
+ payload.window_bounds = result.windowBounds;
1308
+ payload.coordinate_hint =
1309
+ "This screenshot captures the named window. When you compute click coordinates from the image, pass them to codeloop_interact with `coords: \"window\"` so they're translated to screen-absolute automatically. (Default `coords: \"auto\"` also works when the coord fits inside the window — but `\"window\"` is unambiguous.)";
1310
+ }
1311
+ content.push({ type: "text", text: JSON.stringify(payload, null, 2) });
1154
1312
  const data = readImageAsBase64(result.paths[0]);
1155
1313
  if (data) {
1156
1314
  content.push({ type: "image", data, mimeType: mimeForPath(result.paths[0]) });
@@ -1321,6 +1479,45 @@ After recording, call codeloop_interaction_replay to extract frames and analyze
1321
1479
  content: withInitHint([{ type: "text", text: JSON.stringify(result, null, 2) }]),
1322
1480
  };
1323
1481
  });
1482
+ server.tool("codeloop_launch_app", TOOL_BOOTSTRAP + `Launch a desktop application by name from a project's build output. Use when:
1483
+ - You need the app under test to be running before codeloop_start_recording / codeloop_interact.
1484
+ - The agent doesn't know where the executable lives and shouldn't have to hand-roll Start-Process / open -a.
1485
+
1486
+ This tool is the canonical replacement for hand-coding PowerShell Start-Process / osascript / xdg-open
1487
+ from the agent. For Android / iOS, use codeloop_interact action="launch_app" with package_id instead.
1488
+
1489
+ Search order on Windows: publish/**/*.exe → bin/Release/**/*.exe → bin/Debug/**/*.exe (newest first).
1490
+ On macOS: publish/**/*.app → build/**/*.app → /Applications/<name>.app → open -a.
1491
+ On Linux: build/**/<name> → bin/**/<name> → dist/**/<name>.
1492
+
1493
+ If app_name is omitted, falls back to evidence.target_app from .codeloop/config.json (auto-detected at
1494
+ init for .NET/Xcode/Android projects via detect-target-app).`, {
1495
+ app_name: z.string().optional().describe("Window title / executable name of the app to launch. Defaults to evidence.target_app from .codeloop/config.json. Required if target_app is unset."),
1496
+ project_dir: z.string().optional().describe("Absolute path to the project root. Defaults to CODELOOP_PROJECT_DIR / discovered project dir."),
1497
+ workspace_root: z.string().optional().describe("[Alias for project_dir]"),
1498
+ }, async (params) => {
1499
+ const authResult = await withAuth(async () => {
1500
+ const wm = await import("./runners/window_manager.js");
1501
+ const { loadConfig } = await import("./config.js");
1502
+ const cwd = (params.project_dir || params.workspace_root || projectDir);
1503
+ const cfg = loadConfig(cwd);
1504
+ const appName = params.app_name || cfg.evidence?.target_app;
1505
+ if (!appName) {
1506
+ return {
1507
+ launched: false,
1508
+ reason: "No app_name provided and evidence.target_app is unset in .codeloop/config.json. Set it (e.g. \"evidence\": { \"target_app\": \"My App\" }) or pass app_name explicitly. For .NET/Xcode/Android projects, codeloop_init_project auto-detects this — re-run init or edit the config by hand.",
1509
+ };
1510
+ }
1511
+ const r = await wm.launchDesktopApp(appName, cwd);
1512
+ return { app_name: appName, ...r };
1513
+ }, { tool: "codeloop_launch_app", cwd: (params.project_dir || params.workspace_root || projectDir), input: params });
1514
+ if (typeof authResult === "object" && authResult !== null && "error" in authResult) {
1515
+ return { content: [{ type: "text", text: JSON.stringify(authResult, null, 2) }] };
1516
+ }
1517
+ return {
1518
+ content: withInitHint([{ type: "text", text: JSON.stringify(authResult, null, 2) }], params.project_dir || params.workspace_root),
1519
+ };
1520
+ });
1324
1521
  server.tool("codeloop_start_recording", TOOL_BOOTSTRAP + `Start recording the app window in the background. The app is brought to the front automatically
1325
1522
  (un-minimized if needed). Recording continues while you interact with the app. Call codeloop_stop_recording when done.
1326
1523
  This is the PREFERRED recording method because it lets you actively operate the app during capture.
@@ -1347,11 +1544,12 @@ Flow: start_recording → codeloop_interact with ALL app elements → stop_recor
1347
1544
  Supports desktop apps, Android emulator, iOS Simulator, and browser targets.
1348
1545
  Multi-monitor: on macOS, automatically detects which screen the app window is on.
1349
1546
  App logs (stdout, logcat, simctl log) are automatically captured alongside the video.`, {
1350
- app_name: z.string().describe("The name of the app to record (used to find and focus its window)"),
1547
+ app_name: z.string().optional().describe("The name of the app to record (used to find and focus its window). For desktop projects, defaults to evidence.target_app from .codeloop/config.json — set during init via detect-target-app for .NET/Xcode/Android projects, or settable manually."),
1351
1548
  run_id: z.string().optional().describe("Existing run ID to store the video in"),
1352
1549
  max_duration_seconds: z.number().default(120).describe("Safety timeout — recording stops automatically after this many seconds"),
1353
- target_type: z.enum(["desktop", "android_emulator", "ios_simulator", "browser"]).optional()
1550
+ target_type: targetTypeSchema.optional()
1354
1551
  .describe("Capture method. Auto-detected from project if omitted. desktop=ffmpeg screen, android_emulator=adb screenrecord, ios_simulator=simctl recordVideo, browser=ffmpeg/Playwright"),
1552
+ auto_launch: z.boolean().default(true).describe("When target_type=desktop and the app isn't already running, auto-launch it from the project's build output via evidence.target_app. Set false to skip (e.g. when the app is started by another process)."),
1355
1553
  project_dir: z.string().optional().describe("Absolute path to the project root. Defaults to CODELOOP_PROJECT_DIR env var or auto-discovered project directory. MUST be an actual project folder — passing the user's home directory is rejected. If your IDE launches the MCP server from the wrong cwd (common on Windows where Cursor uses C:\\Users\\<name> as cwd), set CODELOOP_PROJECT_DIR or pass this param explicitly."),
1356
1554
  workspace_root: z.string().optional().describe("[Alias for project_dir] Same semantics; accepted because many agents reach for this conventional name. Pass either `project_dir` OR `workspace_root` — they're equivalent."),
1357
1555
  }, async (params) => {
@@ -1359,6 +1557,7 @@ App logs (stdout, logcat, simctl log) are automatically captured alongside the v
1359
1557
  const { startBackgroundRecording } = await import("./runners/video_recorder.js");
1360
1558
  const { createRunDir, getRunDir, getArtifactsBaseDir } = await import("./evidence/artifacts.js");
1361
1559
  const { detectTargetType } = await import("./runners/platform_detect.js");
1560
+ const { loadConfig } = await import("./config.js");
1362
1561
  const cwd = (params.project_dir || params.workspace_root || projectDir);
1363
1562
  let videosDir;
1364
1563
  if (params.run_id) {
@@ -1370,7 +1569,11 @@ App logs (stdout, logcat, simctl log) are automatically captured alongside the v
1370
1569
  videosDir = join(runDir, "videos");
1371
1570
  }
1372
1571
  const targetType = params.target_type || (await detectTargetType(cwd));
1572
+ const cfg = loadConfig(cwd);
1373
1573
  let appName = params.app_name;
1574
+ if (!appName && (targetType === "desktop")) {
1575
+ appName = cfg.evidence?.target_app;
1576
+ }
1374
1577
  if (targetType === "browser") {
1375
1578
  const bi = await import("./runners/browser_interaction.js");
1376
1579
  await bi.ensureBrowserPage();
@@ -1379,7 +1582,42 @@ App logs (stdout, logcat, simctl log) are automatically captured alongside the v
1379
1582
  appName = pwAppName;
1380
1583
  }
1381
1584
  }
1382
- const result = await startBackgroundRecording(videosDir, appName, params.max_duration_seconds, targetType);
1585
+ // Photometry-DB E2E 8: agents spent many turns manually
1586
+ // probing `Get-Process` / `Start-Process` to launch the app
1587
+ // because nothing in CodeLoop did it for them. Now, when
1588
+ // recording a desktop app, we auto-launch from the build
1589
+ // output if the app isn't already running.
1590
+ const wm = await import("./runners/window_manager.js");
1591
+ let autoLaunchSummary;
1592
+ if (targetType === "desktop" && params.auto_launch !== false && appName) {
1593
+ try {
1594
+ const bounds = await wm.getWindowBounds(appName);
1595
+ if (!bounds) {
1596
+ const r = await wm.launchDesktopApp(appName, cwd);
1597
+ autoLaunchSummary = {
1598
+ attempted: true,
1599
+ launched: r.launched,
1600
+ command: r.command,
1601
+ reason: r.reason,
1602
+ };
1603
+ if (r.launched) {
1604
+ // Give the window time to appear / paint before the
1605
+ // recorder starts capturing frames.
1606
+ await new Promise((res) => setTimeout(res, 2000));
1607
+ }
1608
+ }
1609
+ else {
1610
+ autoLaunchSummary = { attempted: false, launched: true, reason: "already running" };
1611
+ }
1612
+ }
1613
+ catch (e) {
1614
+ autoLaunchSummary = { attempted: true, launched: false, reason: e.message };
1615
+ }
1616
+ }
1617
+ const result = await startBackgroundRecording(videosDir, appName ?? "", params.max_duration_seconds, targetType);
1618
+ if (autoLaunchSummary) {
1619
+ result.auto_launch = autoLaunchSummary;
1620
+ }
1383
1621
  await trackUsage(apiKey, "visual_review");
1384
1622
  return result;
1385
1623
  }, { tool: "codeloop_start_recording", cwd: (params.project_dir || params.workspace_root || projectDir), input: params });
@@ -1829,10 +2067,25 @@ Returns: checklist of completed and pending verification steps.`, {
1829
2067
  const verdict = evaluateDepth(coverage, minimums, discoverySnapshot);
1830
2068
  const b = coverage.buckets;
1831
2069
  const breakdown = `click=${b.click}, navigation=${b.navigation}, input=${b.input}, commit=${b.commit}, toggle=${b.toggle}, gesture=${b.gesture}, upload=${b.upload}, keystroke=${b.keystroke}, inspect=${b.inspect}`;
2070
+ // 0.1.49: coordinate_clicks_without_intent is now a HARD
2071
+ // step-7 PENDING blocker so the agent sees the gap BEFORE
2072
+ // gate_check, not after. Pre-0.1.49 this only surfaced as
2073
+ // a verify postscript note, which agents commonly ignored
2074
+ // until the user_journey_evidence gate failed at the
2075
+ // bottom of a long verify→capture→video→gate cycle —
2076
+ // wasting the entire UI-evidence loop.
2077
+ const coordsWithoutIntent = coverage.coordinate_clicks_without_intent;
1832
2078
  if (!minimums.enabled) {
1833
2079
  depthStatus = "n/a";
1834
2080
  depthDetail = `Depth gate disabled in .codeloop/config.json. Observed buckets: ${breakdown}.`;
1835
2081
  }
2082
+ else if (coordsWithoutIntent > 0) {
2083
+ depthStatus = "PENDING";
2084
+ depthDetail =
2085
+ `${coverage.successful} successful interactions across ${runs.length} run(s) (${breakdown}). ` +
2086
+ `BLOCKER: ${coordsWithoutIntent} coordinate-only click(s) dispatched without intent / description / purpose / step fields — the CRUD classifier in user_journey_evidence cannot credit them as edit/delete/create. ` +
2087
+ `Re-run codeloop_interact for those clicks WITH \`intent\` (e.g. intent="confirm delete dialog", intent="save form"). Otherwise gate_check will return continue_fixing on user_journey_evidence even after the rest of the workflow is green.`;
2088
+ }
1836
2089
  else if (verdict.passed) {
1837
2090
  depthStatus = "done";
1838
2091
  depthDetail = `${coverage.successful} successful interactions across ${runs.length} run(s) (${breakdown}). Depth minimums met.`;
@@ -1955,8 +2208,8 @@ MANDATORY for web apps: You MUST type into form fields, fill login/signup forms,
1955
2208
  validation errors, and click submit buttons. Just navigating pages is NOT enough.
1956
2209
  Wait 1-2 seconds between interactions so video frames capture state changes.`, {
1957
2210
  action: z.string().describe("Action to perform: click, double_click, right_click, hover, type, keystroke, hotkey, scroll, drag_drop, long_press, type_and_submit, type_and_tab, fill_form, select_option, toggle, upload_file, navigate_url, navigate_back, navigate_forward, wait, sequence, swipe, back_button, home_button, deep_link, grant_permission, rotate_device, biometric_auth, launch_app, clear_app_data, mock_location, simulate_network, maestro_flow, win_ui_inspect, win_ui_automate"),
1958
- target_type: z.enum(["desktop", "browser", "android_emulator", "ios_simulator"]).optional()
1959
- .describe("Interaction target. Auto-detected if omitted."),
2211
+ target_type: targetTypeSchema.optional()
2212
+ .describe("Interaction target. Auto-detected if omitted. Accepts synonyms: `windows_desktop`/`mac_desktop`/`linux_desktop` → `desktop`; `web` → `browser`; `android` → `android_emulator`; `ios` → `ios_simulator`."),
1960
2213
  x: z.number().optional().describe("X coordinate for click/scroll/drag/swipe"),
1961
2214
  y: z.number().optional().describe("Y coordinate for click/scroll/drag/swipe"),
1962
2215
  x2: z.number().optional().describe("End X for drag_drop/swipe"),
@@ -1987,7 +2240,7 @@ Wait 1-2 seconds between interactions so video frames capture state changes.`, {
1987
2240
  action: z.string(),
1988
2241
  params: z.record(z.unknown()).optional(),
1989
2242
  delay_ms: z.number().optional(),
1990
- })).optional().describe("Steps for sequence action"),
2243
+ }).passthrough()).optional().describe("Steps for sequence action. Accepts BOTH nested form `{ action, params: { x, y, … }, delay_ms? }` and flat form `{ action, x, y, … }` — the flat form is what agents naturally write (mirrors the top-level codeloop_interact shape). Supports inside desktop sequences: click, double_click, right_click, hover, scroll, type, hotkey, keystroke, navigate_url, wait, win_ui_automate."),
1991
2244
  maestro_steps: z.array(z.string()).optional().describe("High-level steps for maestro_flow"),
1992
2245
  automation_action: z.enum(["invoke", "setValue", "toggle", "select", "scroll"]).optional()
1993
2246
  .describe("For win_ui_automate"),
@@ -1997,6 +2250,8 @@ Wait 1-2 seconds between interactions so video frames capture state changes.`, {
1997
2250
  description: z.string().optional().describe("[Alias for intent] Same semantics."),
1998
2251
  purpose: z.string().optional().describe("[Alias for intent] Same semantics."),
1999
2252
  step: z.string().optional().describe("Plan-step name when this interaction is driving a codeloop_plan_user_journey arc (e.g. 'edit', 'delete', 'create', 'save', 'verify'). Logged alongside `intent` and read by the CRUD classifier."),
2253
+ coords: z.enum(["auto", "window", "screen", "screenshot"]).optional().describe("How to interpret x/y for desktop click/double_click/right_click/hover/scroll/drag/long_press. `auto` (default): if `app_name` resolves to a visible window AND (x, y) fits inside the window's client area, treat as window-relative and auto-offset by the window origin; otherwise leave as raw screen-absolute coords. `window`: ALWAYS add the window origin offset (errors if the window isn't found). `screen`: ALWAYS pass through (legacy behaviour, matches CGEvent / user32.dll / xdotool semantics). `screenshot` (most accurate for vision-driven agents): treat (x, y) as coordinates against a captured screenshot — provide `screenshot_path` so the runner can read the image's actual width/height, scale (x, y) to the window's true pixel dimensions, then add the window origin and apply DPI. Use this whenever you computed coords from the image returned by codeloop_capture_screenshot, especially when the MCP transport may have downscaled the PNG. Fixes the Photometry-DB E2E 8 failure mode where the agent captured a 1600×900 window screenshot, computed click coords against the image, and missed the sidebar because the window's actual top-left was (286, 286) on a 5120×1440 screen."),
2254
+ screenshot_path: z.string().optional().describe("Absolute path to the screenshot PNG that x/y were computed against. Used with `coords: \"screenshot\"` to scale agent-supplied coords from the captured image dimensions to the window's actual pixel dimensions before applying the window origin and DPI factor. Pass the `path` field returned by codeloop_capture_screenshot."),
2000
2255
  project_dir: z.string().optional().describe("Absolute path to project root."),
2001
2256
  workspace_root: z.string().optional().describe("[Alias for project_dir] Pass either; they're equivalent."),
2002
2257
  }, async (params) => {
@@ -2026,13 +2281,108 @@ Wait 1-2 seconds between interactions so video frames capture state changes.`, {
2026
2281
  await bi.ensureBrowserPage();
2027
2282
  }
2028
2283
  // Bring the app to front before desktop interactions (non-browser, non-mobile).
2284
+ let windowOriginOffset = null;
2285
+ let screenshotDims = null;
2029
2286
  if (tt === "desktop") {
2030
2287
  const appName = params.app_name || vr.getActiveRecordingAppName();
2031
2288
  if (appName && action !== "wait") {
2032
2289
  await wm.bringAppToFront(appName);
2033
2290
  await new Promise(r => setTimeout(r, 300));
2291
+ // Photometry-DB E2E 8: agents commonly compute click coords
2292
+ // from a window-cropped screenshot (which is what
2293
+ // codeloop_capture_screenshot returns when `app_name` is
2294
+ // set), then pass those coords to codeloop_interact —
2295
+ // which expects raw SCREEN coordinates. On a multi-monitor
2296
+ // / DPI-scaled setup that mismatch silently dropped clicks
2297
+ // 100s of pixels off-target. When `coords` is `auto` (the
2298
+ // default) we look up the window's actual screen origin
2299
+ // and add it to x/y, but ONLY if (x, y) fits inside the
2300
+ // window — that keeps legacy callers passing raw screen
2301
+ // coords working unchanged. `coords: "window"` forces the
2302
+ // offset; `coords: "screen"` opts out.
2303
+ const coordsMode = params.coords ?? "auto";
2304
+ if (coordsMode !== "screen") {
2305
+ try {
2306
+ const b = await wm.getWindowBounds(appName);
2307
+ if (b && b.width > 0 && b.height > 0) {
2308
+ windowOriginOffset = {
2309
+ dx: b.x,
2310
+ dy: b.y,
2311
+ width: b.width,
2312
+ height: b.height,
2313
+ dpiX: b.dpi_x,
2314
+ dpiY: b.dpi_y,
2315
+ };
2316
+ }
2317
+ }
2318
+ catch { /* best-effort */ }
2319
+ }
2320
+ // For coords:"screenshot", load the actual PNG dims so we
2321
+ // can scale agent-supplied (x, y) up from the (possibly
2322
+ // MCP-downscaled) image to the window's true pixel size.
2323
+ if (coordsMode === "screenshot" && params.screenshot_path) {
2324
+ try {
2325
+ const { readPngDims } = await import("./runners/png_dims.js");
2326
+ screenshotDims = readPngDims(params.screenshot_path);
2327
+ }
2328
+ catch { /* best-effort */ }
2329
+ }
2034
2330
  }
2035
2331
  }
2332
+ // Helper used by every coordinate-driven desktop action below.
2333
+ // Photometry-DB E2E 8 + 0.1.49 hardening: handles four modes
2334
+ // (auto / window / screen / screenshot) plus an optional DPI
2335
+ // factor on the window bounds so high-DPI Windows / Retina
2336
+ // displays don't drop clicks 100s of pixels off-target.
2337
+ const translateXY = (x, y) => {
2338
+ if (tt !== "desktop" || x == null || y == null || !windowOriginOffset) {
2339
+ return { x, y };
2340
+ }
2341
+ const mode = params.coords ?? "auto";
2342
+ if (mode === "screen")
2343
+ return { x, y };
2344
+ const applyDpi = (px, py) => {
2345
+ // window_manager records DPI in physical-pixel-per-logical
2346
+ // form (1.0 = 96 DPI baseline; 2.0 = 200% / Retina). When
2347
+ // the screenshot was captured in logical pixels but the
2348
+ // OS click API expects physical pixels (Win32 user32.dll
2349
+ // and modern macOS CGEvent both expect physical), scale up.
2350
+ const dpiX = windowOriginOffset.dpiX ?? 1;
2351
+ const dpiY = windowOriginOffset.dpiY ?? 1;
2352
+ if (dpiX === 1 && dpiY === 1)
2353
+ return { x: px, y: py };
2354
+ return { x: px * dpiX, y: py * dpiY };
2355
+ };
2356
+ if (mode === "screenshot") {
2357
+ // Scale (x, y) from screenshot dims → window dims,
2358
+ // then add the window origin, then DPI.
2359
+ let sx = x;
2360
+ let sy = y;
2361
+ if (screenshotDims && screenshotDims.width > 0 && screenshotDims.height > 0) {
2362
+ const ratioX = windowOriginOffset.width / screenshotDims.width;
2363
+ const ratioY = windowOriginOffset.height / screenshotDims.height;
2364
+ sx = x * ratioX;
2365
+ sy = y * ratioY;
2366
+ }
2367
+ const dpi = applyDpi(sx, sy);
2368
+ return { x: dpi.x + windowOriginOffset.dx, y: dpi.y + windowOriginOffset.dy };
2369
+ }
2370
+ if (mode === "window") {
2371
+ const dpi = applyDpi(x, y);
2372
+ return { x: dpi.x + windowOriginOffset.dx, y: dpi.y + windowOriginOffset.dy };
2373
+ }
2374
+ // auto: if (x, y) fits inside the window's client area,
2375
+ // assume the agent computed against a window-cropped
2376
+ // screenshot and add the origin. Otherwise pass through
2377
+ // (likely a raw screen coord from a manual workflow).
2378
+ const inside = x >= 0 && x <= windowOriginOffset.width &&
2379
+ y >= 0 && y <= windowOriginOffset.height;
2380
+ if (inside) {
2381
+ const dpi = applyDpi(x, y);
2382
+ return { x: dpi.x + windowOriginOffset.dx, y: dpi.y + windowOriginOffset.dy };
2383
+ }
2384
+ return { x, y };
2385
+ };
2036
2386
  switch (action) {
2037
2387
  case "click":
2038
2388
  if (tt === "browser" && params.selector) {
@@ -2054,7 +2404,8 @@ Wait 1-2 seconds between interactions so video frames capture state changes.`, {
2054
2404
  }
2055
2405
  }
2056
2406
  else if (params.x != null && params.y != null) {
2057
- success = await wm.clickAtPosition(params.x, params.y);
2407
+ const t = translateXY(params.x, params.y);
2408
+ success = await wm.clickAtPosition(t.x, t.y);
2058
2409
  }
2059
2410
  detail = `click at ${params.selector || `(${params.x},${params.y})`}`;
2060
2411
  break;
@@ -2063,7 +2414,8 @@ Wait 1-2 seconds between interactions so video frames capture state changes.`, {
2063
2414
  success = await bi.browserDoubleClick(params.selector);
2064
2415
  }
2065
2416
  else if (params.x != null && params.y != null) {
2066
- success = await wm.doubleClickAtPosition(params.x, params.y);
2417
+ const t = translateXY(params.x, params.y);
2418
+ success = await wm.doubleClickAtPosition(t.x, t.y);
2067
2419
  }
2068
2420
  detail = `double_click at ${params.selector || `(${params.x},${params.y})`}`;
2069
2421
  break;
@@ -2072,7 +2424,8 @@ Wait 1-2 seconds between interactions so video frames capture state changes.`, {
2072
2424
  success = await bi.browserRightClick(params.selector);
2073
2425
  }
2074
2426
  else if (params.x != null && params.y != null) {
2075
- success = await wm.rightClickAtPosition(params.x, params.y);
2427
+ const t = translateXY(params.x, params.y);
2428
+ success = await wm.rightClickAtPosition(t.x, t.y);
2076
2429
  }
2077
2430
  detail = `right_click at ${params.selector || `(${params.x},${params.y})`}`;
2078
2431
  break;
@@ -2081,7 +2434,8 @@ Wait 1-2 seconds between interactions so video frames capture state changes.`, {
2081
2434
  success = await bi.browserHover(params.selector);
2082
2435
  }
2083
2436
  else if (params.x != null && params.y != null) {
2084
- success = await wm.hoverAtPosition(params.x, params.y);
2437
+ const t = translateXY(params.x, params.y);
2438
+ success = await wm.hoverAtPosition(t.x, t.y);
2085
2439
  }
2086
2440
  detail = `hover at ${params.selector || `(${params.x},${params.y})`}`;
2087
2441
  break;
@@ -2166,7 +2520,8 @@ Wait 1-2 seconds between interactions so video frames capture state changes.`, {
2166
2520
  success = await wm.simctlSwipe(sx, sy, ex, ey);
2167
2521
  }
2168
2522
  else {
2169
- success = await wm.scrollAtPosition(params.x || 500, params.y || 400, params.direction || "down", params.amount || 3);
2523
+ const t = translateXY(params.x || 500, params.y || 400);
2524
+ success = await wm.scrollAtPosition(t.x, t.y, params.direction || "down", params.amount || 3);
2170
2525
  }
2171
2526
  detail = `scroll ${params.direction || "down"}`;
2172
2527
  break;
@@ -2179,7 +2534,9 @@ Wait 1-2 seconds between interactions so video frames capture state changes.`, {
2179
2534
  success = await wm.adbSwipe(params.x, params.y, params.x2, params.y2, params.duration_ms || 500);
2180
2535
  }
2181
2536
  else {
2182
- success = await wm.dragDrop(params.x, params.y, params.x2, params.y2, params.duration_ms || 500);
2537
+ const a = translateXY(params.x, params.y);
2538
+ const b = translateXY(params.x2, params.y2);
2539
+ success = await wm.dragDrop(a.x, a.y, b.x, b.y, params.duration_ms || 500);
2183
2540
  }
2184
2541
  }
2185
2542
  detail = `drag_drop`;
@@ -2189,7 +2546,8 @@ Wait 1-2 seconds between interactions so video frames capture state changes.`, {
2189
2546
  success = await wm.adbLongPress(params.x, params.y, params.duration_ms || 1000);
2190
2547
  }
2191
2548
  else if (params.x != null && params.y != null) {
2192
- success = await wm.longPressAtPosition(params.x, params.y, params.duration_ms || 1000);
2549
+ const t = translateXY(params.x, params.y);
2550
+ success = await wm.longPressAtPosition(t.x, t.y, params.duration_ms || 1000);
2193
2551
  }
2194
2552
  detail = `long_press at (${params.x},${params.y})`;
2195
2553
  break;
@@ -2371,11 +2729,37 @@ Wait 1-2 seconds between interactions so video frames capture state changes.`, {
2371
2729
  if (tt === "android_emulator" && params.package_id) {
2372
2730
  const r = await import("./runners/base.js").then(m => m.runCommand("adb", ["shell", "am", "start", "-n", params.package_id], process.cwd()));
2373
2731
  success = r.exit_code === 0;
2732
+ detail = `launch_app "${params.package_id}"`;
2374
2733
  }
2375
2734
  else if (tt === "ios_simulator" && params.package_id) {
2376
2735
  success = await wm.simctlLaunch(params.package_id);
2736
+ detail = `launch_app "${params.package_id}"`;
2737
+ }
2738
+ else if (tt === "desktop") {
2739
+ // Photometry-DB E2E 8: desktop launch was completely
2740
+ // missing — agents had to hand-roll PowerShell
2741
+ // Start-Process / `open -a` calls. Now resolves via
2742
+ // evidence.target_app from the project config when
2743
+ // app_name is omitted.
2744
+ const { loadConfig } = await import("./config.js");
2745
+ const cfg = loadConfig(cwd);
2746
+ const appName = params.app_name || cfg.evidence?.target_app;
2747
+ if (!appName) {
2748
+ success = false;
2749
+ detail = "launch_app desktop: no app_name provided and evidence.target_app is unset in .codeloop/config.json. Set it (e.g. \"target_app\": \"Photometry DB\") or pass app_name explicitly.";
2750
+ }
2751
+ else {
2752
+ const r = await wm.launchDesktopApp(appName, cwd);
2753
+ success = r.launched;
2754
+ detail = r.launched
2755
+ ? `launch_app "${appName}" via ${r.command}${r.pid ? ` (pid ${r.pid})` : ""}`
2756
+ : `launch_app "${appName}" failed: ${r.reason || "unknown error"}`;
2757
+ }
2758
+ }
2759
+ else {
2760
+ success = false;
2761
+ detail = `launch_app "${params.package_id || params.app_name || ""}": target ${tt} not supported in this action`;
2377
2762
  }
2378
- detail = `launch_app "${params.package_id}"`;
2379
2763
  break;
2380
2764
  case "clear_app_data":
2381
2765
  if (tt === "android_emulator" && params.package_id) {
@@ -2439,7 +2823,22 @@ Wait 1-2 seconds between interactions so video frames capture state changes.`, {
2439
2823
  if (step.delay_ms)
2440
2824
  await new Promise(r => setTimeout(r, step.delay_ms));
2441
2825
  const stepAction = step.action;
2442
- const sp = (step.params || {});
2826
+ // Photometry-DB E2E 8: agents naturally write steps in
2827
+ // FLAT form `{ action, x, y, ms, … }` because that
2828
+ // mirrors the top-level codeloop_interact shape. The
2829
+ // schema documented the NESTED form `{ action,
2830
+ // params: { … } }`. Now we accept both: prefer
2831
+ // `step.params` if present, otherwise fall back to the
2832
+ // step object itself minus the wrapper keys.
2833
+ const stepObj = step;
2834
+ const nested = (step.params || {});
2835
+ const sp = Object.keys(nested).length > 0
2836
+ ? nested
2837
+ : Object.fromEntries(Object.entries(stepObj).filter(([k]) => k !== "action" && k !== "params" && k !== "delay_ms"));
2838
+ // Convenient aliases: agents wrote `ms` for wait
2839
+ // duration in the log; accept that as `duration_ms`.
2840
+ if (sp.ms != null && sp.duration_ms == null)
2841
+ sp.duration_ms = sp.ms;
2443
2842
  let stepOk = false;
2444
2843
  let stepReason;
2445
2844
  try {
@@ -2447,7 +2846,20 @@ Wait 1-2 seconds between interactions so video frames capture state changes.`, {
2447
2846
  stepOk = await bi.browserClick(sp.selector);
2448
2847
  }
2449
2848
  else if (stepAction === "click" && sp.x != null && sp.y != null) {
2450
- stepOk = await wm.clickAtPosition(sp.x, sp.y);
2849
+ const t = translateXY(sp.x, sp.y);
2850
+ stepOk = await wm.clickAtPosition(t.x, t.y);
2851
+ }
2852
+ else if (stepAction === "double_click" && tt !== "browser" && sp.x != null && sp.y != null) {
2853
+ const t = translateXY(sp.x, sp.y);
2854
+ stepOk = await wm.doubleClickAtPosition(t.x, t.y);
2855
+ }
2856
+ else if (stepAction === "right_click" && tt !== "browser" && sp.x != null && sp.y != null) {
2857
+ const t = translateXY(sp.x, sp.y);
2858
+ stepOk = await wm.rightClickAtPosition(t.x, t.y);
2859
+ }
2860
+ else if (stepAction === "hover" && tt !== "browser" && sp.x != null && sp.y != null) {
2861
+ const t = translateXY(sp.x, sp.y);
2862
+ stepOk = await wm.hoverAtPosition(t.x, t.y);
2451
2863
  }
2452
2864
  else if (stepAction === "type" && tt === "browser" && sp.selector && sp.text) {
2453
2865
  stepOk = await bi.browserType(sp.selector, sp.text);
@@ -2483,9 +2895,13 @@ Wait 1-2 seconds between interactions so video frames capture state changes.`, {
2483
2895
  stepOk = tt === "browser" ? await bi.browserKeystroke(sp.key) : await wm.sendKeyByName(sp.key);
2484
2896
  }
2485
2897
  else if (stepAction === "scroll") {
2486
- stepOk = tt === "browser"
2487
- ? await bi.browserScroll(sp.direction || "down", sp.amount || 300)
2488
- : await wm.scrollAtPosition(sp.x || 500, sp.y || 400, sp.direction || "down", sp.amount || 3);
2898
+ if (tt === "browser") {
2899
+ stepOk = await bi.browserScroll(sp.direction || "down", sp.amount || 300);
2900
+ }
2901
+ else {
2902
+ const t = translateXY(sp.x || 500, sp.y || 400);
2903
+ stepOk = await wm.scrollAtPosition(t.x, t.y, sp.direction || "down", sp.amount || 3);
2904
+ }
2489
2905
  }
2490
2906
  else if (stepAction === "wait") {
2491
2907
  await new Promise(r => setTimeout(r, sp.duration_ms || 1000));
@@ -2818,6 +3234,42 @@ No project_dir / workspace_root required — this tool is workspace-independent.
2818
3234
  ]),
2819
3235
  };
2820
3236
  });
3237
+ server.tool("codeloop_self_test", TOOL_BOOTSTRAP + `Pre-flight smoke test for CodeLoop on the current workspace. Run this on any NEW project BEFORE your first verify cycle, or whenever something looks off (silent IDE captures, "no .exe found", design_compare returning 0%, etc.).
3238
+
3239
+ It validates every critical pre-condition synthetically (no live build, no live screenshot, no network past the platform sniff):
3240
+ - Workspace exists on disk and is a directory
3241
+ - codeloop_init_project has been run (.codeloop/config.json present)
3242
+ - Platform detection produced a known platform
3243
+ - isDesktopAppProject correctly identifies the project type (so captureScreenshot won't silently fall back to fullscreen)
3244
+ - evidence.target_app is set when desktop-app mode is ON (so launchDesktopApp + captureScreenshot can resolve a window)
3245
+ - PNG decoder skip path is wired (corrupt PNGs become skip warnings, not 0% match)
3246
+ - Coordinate translation round-trips on a synthetic high-DPI fixture (so clicks land where the agent expects on Retina / 200%-DPI displays)
3247
+
3248
+ Returns a structured pass/fail report with per-check fix suggestions and a single \`next_step\` directive.
3249
+
3250
+ Use this tool FIRST when:
3251
+ - The user reports CodeLoop "isn't working" or evidence is missing
3252
+ - Switching to a project / repo you've never run CodeLoop against
3253
+ - Debugging unexplained gate failures that don't match the agent's mental model
3254
+
3255
+ Idempotent and free — safe to call as the first step of every new chat.`, {
3256
+ project_dir: z.string().optional().describe("Absolute path to the project root. Defaults to CODELOOP_PROJECT_DIR env var or auto-discovered project directory."),
3257
+ workspace_root: z.string().optional().describe("[Alias for project_dir] Same semantics."),
3258
+ }, async (params) => {
3259
+ const result = await withAuth(async () => {
3260
+ const cwd = (params.project_dir || params.workspace_root || projectDir);
3261
+ const { runSelfTest } = await import("./tools/self_test.js");
3262
+ return runSelfTest(cwd);
3263
+ }, { tool: "codeloop_self_test", cwd: (params.project_dir || params.workspace_root || projectDir), input: params });
3264
+ if (typeof result === "object" && result !== null && "error" in result) {
3265
+ return { content: [{ type: "text", text: JSON.stringify(result, null, 2) }] };
3266
+ }
3267
+ return {
3268
+ content: withInitHint([
3269
+ { type: "text", text: JSON.stringify(result, null, 2) },
3270
+ ]),
3271
+ };
3272
+ });
2821
3273
  server.tool("codeloop_apply_update", TOOL_BOOTSTRAP + `Apply a pending CodeLoop MCP server update to the current chat session — without asking the user to restart their IDE.
2822
3274
 
2823
3275
  Use this tool when: