codeloop-mcp-server 0.1.50 → 0.1.52

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. package/dist/auth/critical_floors.d.ts.map +1 -1
  2. package/dist/auth/critical_floors.js +8 -0
  3. package/dist/auth/critical_floors.js.map +1 -1
  4. package/dist/evidence/anti_rationalisation.d.ts +34 -0
  5. package/dist/evidence/anti_rationalisation.d.ts.map +1 -0
  6. package/dist/evidence/anti_rationalisation.js +85 -0
  7. package/dist/evidence/anti_rationalisation.js.map +1 -0
  8. package/dist/evidence/change_coverage.d.ts +59 -0
  9. package/dist/evidence/change_coverage.d.ts.map +1 -0
  10. package/dist/evidence/change_coverage.js +422 -0
  11. package/dist/evidence/change_coverage.js.map +1 -0
  12. package/dist/evidence/change_manifest.d.ts +94 -0
  13. package/dist/evidence/change_manifest.d.ts.map +1 -0
  14. package/dist/evidence/change_manifest.js +830 -0
  15. package/dist/evidence/change_manifest.js.map +1 -0
  16. package/dist/evidence/loop_state.d.ts +53 -0
  17. package/dist/evidence/loop_state.d.ts.map +1 -0
  18. package/dist/evidence/loop_state.js +147 -0
  19. package/dist/evidence/loop_state.js.map +1 -0
  20. package/dist/evidence/verify_staleness.d.ts +9 -0
  21. package/dist/evidence/verify_staleness.d.ts.map +1 -0
  22. package/dist/evidence/verify_staleness.js +180 -0
  23. package/dist/evidence/verify_staleness.js.map +1 -0
  24. package/dist/index.d.ts +1 -1
  25. package/dist/index.d.ts.map +1 -1
  26. package/dist/index.js +374 -19
  27. package/dist/index.js.map +1 -1
  28. package/dist/runners/empty_state_detector.d.ts +33 -0
  29. package/dist/runners/empty_state_detector.d.ts.map +1 -0
  30. package/dist/runners/empty_state_detector.js +304 -0
  31. package/dist/runners/empty_state_detector.js.map +1 -0
  32. package/dist/runners/maestro.d.ts +13 -0
  33. package/dist/runners/maestro.d.ts.map +1 -1
  34. package/dist/runners/maestro.js +37 -1
  35. package/dist/runners/maestro.js.map +1 -1
  36. package/dist/runners/modal_detector.d.ts +60 -0
  37. package/dist/runners/modal_detector.d.ts.map +1 -0
  38. package/dist/runners/modal_detector.js +160 -0
  39. package/dist/runners/modal_detector.js.map +1 -0
  40. package/dist/runners/python_tests.d.ts +26 -0
  41. package/dist/runners/python_tests.d.ts.map +1 -0
  42. package/dist/runners/python_tests.js +181 -0
  43. package/dist/runners/python_tests.js.map +1 -0
  44. package/dist/runners/rust_tests.d.ts +28 -0
  45. package/dist/runners/rust_tests.d.ts.map +1 -0
  46. package/dist/runners/rust_tests.js +76 -0
  47. package/dist/runners/rust_tests.js.map +1 -0
  48. package/dist/tools/c7_slug.d.ts +14 -0
  49. package/dist/tools/c7_slug.d.ts.map +1 -0
  50. package/dist/tools/c7_slug.js +21 -0
  51. package/dist/tools/c7_slug.js.map +1 -0
  52. package/dist/tools/diagnose.d.ts.map +1 -1
  53. package/dist/tools/diagnose.js +13 -0
  54. package/dist/tools/diagnose.js.map +1 -1
  55. package/dist/tools/gate_check.d.ts +2 -1
  56. package/dist/tools/gate_check.d.ts.map +1 -1
  57. package/dist/tools/gate_check.js +74 -32
  58. package/dist/tools/gate_check.js.map +1 -1
  59. package/dist/tools/is_ui_project.d.ts +23 -0
  60. package/dist/tools/is_ui_project.d.ts.map +1 -0
  61. package/dist/tools/is_ui_project.js +42 -0
  62. package/dist/tools/is_ui_project.js.map +1 -0
  63. package/dist/tools/plan_change_journey.d.ts +41 -0
  64. package/dist/tools/plan_change_journey.d.ts.map +1 -0
  65. package/dist/tools/plan_change_journey.js +131 -0
  66. package/dist/tools/plan_change_journey.js.map +1 -0
  67. package/dist/tools/verify.d.ts +28 -0
  68. package/dist/tools/verify.d.ts.map +1 -1
  69. package/dist/tools/verify.js +272 -8
  70. package/dist/tools/verify.js.map +1 -1
  71. package/package.json +1 -1
package/dist/index.js CHANGED
@@ -3,6 +3,7 @@ import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
3
3
  import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
4
4
  import { z } from "zod";
5
5
  import { readFileSync, writeFileSync, existsSync, readdirSync, statSync } from "fs";
6
+ import { slugForTargetChangeEntry } from "./tools/c7_slug.js";
6
7
  function dirHasFile(dir, predicate) {
7
8
  try {
8
9
  if (!existsSync(dir))
@@ -379,9 +380,12 @@ function rememberInitializedDir(dir) {
379
380
  function withInitHint(content, dir) {
380
381
  // Order matters:
381
382
  // 1. Update notice (most actionable signal — CRITICAL stays at top).
382
- // 2. Init hint (only when project is not initialized).
383
- // 3. The original content.
384
- // 4. Version banner footer (so the agent can always see what
383
+ // 2. 0.1.51 H2 staleness directive (when source files are newer
384
+ // than the last verify — equally important to the update
385
+ // notice because both keep the agent loop honest).
386
+ // 3. Init hint (only when project is not initialized).
387
+ // 4. The original content.
388
+ // 5. Version banner footer (so the agent can always see what
385
389
  // version it's talking to — survives across all responses).
386
390
  const banner = buildVersionBanner();
387
391
  const withUpdate = withUpdateNotice(content);
@@ -409,11 +413,54 @@ function withInitHint(content, dir) {
409
413
  if (!anyInitialized) {
410
414
  head.push({ type: "text", text: INIT_HINT });
411
415
  }
416
+ // 0.1.51 H2 — verify-staleness directive. We only check the FIRST
417
+ // initialized candidate dir (so we don't double-fire when multiple
418
+ // candidates resolve, and so the cost stays O(1) per response).
419
+ // Errors are swallowed because the staleness check must never
420
+ // fail-close on a tool response.
421
+ try {
422
+ const stalenessDir = candidates.find((d) => isProjectInitialized(d) || wasInitialisedAtPath(d));
423
+ if (stalenessDir && !skipStalenessForCwd(stalenessDir)) {
424
+ // Lazy-load so we don't pay the cost on tool responses that
425
+ // fire before any artifacts exist.
426
+ const { checkVerifyStaleness, buildStalenessDirective } =
427
+ // eslint-disable-next-line @typescript-eslint/no-require-imports
428
+ require("./evidence/verify_staleness.js");
429
+ const r = checkVerifyStaleness(stalenessDir);
430
+ const directive = buildStalenessDirective(r);
431
+ if (directive) {
432
+ head.push({ type: "text", text: directive });
433
+ }
434
+ }
435
+ }
436
+ catch { /* never fail tool responses on a staleness probe */ }
412
437
  const result = [...head, ...withUpdate];
413
438
  if (banner)
414
439
  result.push(banner);
415
440
  return result;
416
441
  }
442
+ // 0.1.51 H2 — suppress the staleness directive when the response is
443
+ // FROM `codeloop_verify` itself. Otherwise verify would always announce
444
+ // "your code is stale" because, by definition, source has been
445
+ // modified since the last (older) verify. We can't see the tool name
446
+ // directly inside `withInitHint`, so we use a per-cwd cooldown:
447
+ // `codeloop_verify` calls `markVerifiedNow(cwd)` and we skip the
448
+ // staleness probe for 30s on that cwd.
449
+ const RECENT_VERIFY_GRACE_MS = 30_000;
450
+ const recentlyVerified = new Map();
451
+ function skipStalenessForCwd(cwd) {
452
+ const ts = recentlyVerified.get(cwd);
453
+ if (!ts)
454
+ return false;
455
+ if (Date.now() - ts > RECENT_VERIFY_GRACE_MS) {
456
+ recentlyVerified.delete(cwd);
457
+ return false;
458
+ }
459
+ return true;
460
+ }
461
+ export function markVerifiedNow(cwd) {
462
+ recentlyVerified.set(cwd, Date.now());
463
+ }
417
464
  const TOOL_BOOTSTRAP = "[CodeLoop] You MUST call codeloop_verify after every code change. If .codeloop/config.json is missing, call codeloop_init_project FIRST.\n\n";
418
465
  /**
419
466
  * Zod preprocessor for `target_type` that accepts common synonyms.
@@ -468,6 +515,7 @@ Returns: structured report with pass/fail counts, artifact paths, and next-step
468
515
  platform: z.enum(["flutter", "web", "mobile", "xcode", "android", "dotnet", "auto"]).default("auto"),
469
516
  project_dir: z.string().optional().describe("Absolute path to the project root. Defaults to CODELOOP_PROJECT_DIR env var or auto-discovered project directory. MUST be an actual project folder — passing the user's home directory is rejected. If your IDE launches the MCP server from the wrong cwd (common on Windows where Cursor uses C:\\Users\\<name> as cwd), set CODELOOP_PROJECT_DIR or pass this param explicitly."),
470
517
  workspace_root: z.string().optional().describe("[Alias for project_dir] Same semantics; accepted because many agents reach for this conventional name. Pass either `project_dir` OR `workspace_root` — they're equivalent."),
518
+ tasks_completed: z.array(z.string()).optional().describe("0.1.52 C5 — free-text titles of the tasks the agent claims to have completed in this code change. Cross-checked against the change manifest produced by C1: every claim should map to >= 1 manifest entry and every manifest entry should map to >= 1 claim. Mismatches surface as warnings in the verify response and feed the change_coverage_evidence gate (C3)."),
471
519
  }, async (params) => {
472
520
  const cwd = resolveCwd(params);
473
521
  const explicitDir = params.project_dir || params.workspace_root;
@@ -477,6 +525,7 @@ Returns: structured report with pass/fail counts, artifact paths, and next-step
477
525
  const input = {
478
526
  scope: params.scope,
479
527
  platform: params.platform,
528
+ tasks_completed: params.tasks_completed,
480
529
  };
481
530
  const output = await runVerify(input, cfg, cwd);
482
531
  await trackUsage(apiKey, "verification_run");
@@ -491,6 +540,11 @@ Returns: structured report with pass/fail counts, artifact paths, and next-step
491
540
  // We inspect the produced run for video / interaction log evidence
492
541
  // and, when missing on a UI project, append a non-ambiguous next-
493
542
  // step directive so even a less-disciplined agent stays in the loop.
543
+ // 0.1.51 H2 — mark this cwd as freshly verified so the
544
+ // staleness directive in withInitHint doesn't fire on the
545
+ // verify response itself (the tool that just RAN verify is
546
+ // exactly the wrong place to scold "your code is stale").
547
+ markVerifiedNow(cwd);
494
548
  let postscript = "";
495
549
  try {
496
550
  const { isUIProject } = await import("./tools/gate_check.js");
@@ -617,6 +671,7 @@ Returns: pass/fail for each gate, overall confidence score, and recommendation.`
617
671
  acceptance_path: z.string().default("docs/acceptance/_template.md").describe("Path to the acceptance criteria markdown (absolute or relative to project_dir). Defaults to the template `codeloop_init_project` writes. If neither exists the acceptance gate runs with empty content and uses other signals only."),
618
672
  project_dir: z.string().optional().describe("Absolute path to the project root. Defaults to CODELOOP_PROJECT_DIR env var or auto-discovered project directory. MUST be an actual project folder — passing the user's home directory is rejected. If your IDE launches the MCP server from the wrong cwd (common on Windows where Cursor uses C:\\Users\\<name> as cwd), set CODELOOP_PROJECT_DIR or pass this param explicitly."),
619
673
  workspace_root: z.string().optional().describe("[Alias for project_dir] Same semantics; accepted because many agents reach for this conventional name. Pass either `project_dir` OR `workspace_root` — they're equivalent."),
674
+ recent_thinking: z.string().optional().describe("0.1.52 C6 — optional dump of the agent's recent thinking / rationale (last few turns of the loop). When present, the gate scans it for anti-rationalisation phrases ('comprehensive verification confirms', 'further interaction would be redundant', 'grid is empty so can't test', etc.) and surfaces specific matches in the continue_fixing postscript so the agent stops repeating the rationalisation and acts on the per-gate next steps instead. Safe to omit — the canonical FORBIDDEN list still ships in the directive without a hit."),
620
675
  }, async (params) => {
621
676
  const result = await withAuth(async () => {
622
677
  const { runGateCheck } = await import("./tools/gate_check.js");
@@ -667,6 +722,14 @@ Returns: pass/fail for each gate, overall confidence score, and recommendation.`
667
722
  return `${i + 1}. ${g}${severity}: ${action}`;
668
723
  })
669
724
  .join("\n");
725
+ // 0.1.52 C6 — anti-rationalisation directive. Built before the
726
+ // loopDirective so we can splice the formatted text in.
727
+ const { scanRecentThinking, buildAntiRationalisationDirective } = await import("./evidence/anti_rationalisation.js");
728
+ const recentThinking = typeof params.recent_thinking === "string"
729
+ ? params.recent_thinking
730
+ : undefined;
731
+ const rationalisationHits = scanRecentThinking(recentThinking);
732
+ const antiRationalisationBlock = buildAntiRationalisationDirective(rationalisationHits);
670
733
  const loopDirective = [
671
734
  "",
672
735
  "",
@@ -701,6 +764,9 @@ Returns: pass/fail for each gate, overall confidence score, and recommendation.`
701
764
  "MISSING SCREENSHOTS are NEVER a reason to stop — call codeloop_capture_screenshot for each named screen, THEN codeloop_design_compare with mode=\"all\", THEN re-gate.",
702
765
  "MISSING VIDEO is NEVER a reason to stop — call codeloop_start_recording, drive the app with codeloop_interact (NOT raw osascript / PowerShell / xdotool), then codeloop_stop_recording, THEN re-gate.",
703
766
  "INCOMPLETE CRUD ARC is NEVER a reason to stop — call codeloop_plan_user_journey, follow the returned per-entity script, re-record, THEN re-gate.",
767
+ "UNEXERCISED CHANGE-MANIFEST ENTRIES are NEVER a reason to stop — call codeloop_plan_change_journey, follow each step in priority order, pass target_change_entry verbatim on every codeloop_interact / codeloop_capture_screenshot call, THEN re-gate.",
768
+ "",
769
+ antiRationalisationBlock,
704
770
  ].join("\n");
705
771
  return {
706
772
  content: withInitHint([{ type: "text", text: resultJson + loopDirective }], resolveCwd(params)),
@@ -778,7 +844,12 @@ Returns: deterministic diff results + screenshot images for visual analysis.`, {
778
844
  content.push({ type: "text", text: prompt });
779
845
  content.push(...imageBlocks);
780
846
  }
781
- return { content };
847
+ // 0.1.51 H6 — wrap response in withInitHint so the init-hint /
848
+ // version footer / critical-floor nag fires on visual_review too.
849
+ // Pre-H6 only verify / gate_check carried these so an agent that
850
+ // jumped straight to visual_review on a fresh workspace would
851
+ // miss the init-hint and skip codeloop_init_project.
852
+ return { content: withInitHint(content, resolveCwd(params)) };
782
853
  });
783
854
  server.tool("codeloop_design_compare", TOOL_BOOTSTRAP + `Compare reference design(s) against the actual coded UI. Use this tool when:
784
855
  - The user has provided a Figma mockup, screenshot, or design reference (any image in designs/ or .codeloop/figma.json)
@@ -887,7 +958,11 @@ Returns: per-screen pixel diff scores + worst-failing reference, actual, and dif
887
958
  if (block.diff)
888
959
  content.push({ type: "image", data: block.diff.data, mimeType: block.diff.mime });
889
960
  }
890
- return { content };
961
+ // 0.1.51 H6 — withInitHint on design_compare too. The
962
+ // design_compare_evidence gate already blocks gate_check until
963
+ // every reference matches; the init-hint guarantees fresh
964
+ // workspaces don't sneak past codeloop_init_project.
965
+ return { content: withInitHint(content, resolveCwd(params)) };
891
966
  });
892
967
  server.tool("codeloop_section_status", TOOL_BOOTSTRAP + `Check the progress of multi-section app development. Use this tool when:
893
968
  - A master spec exists and you need to know which section to work on next
@@ -1196,7 +1271,10 @@ Try in this order:
1196
1271
  Verify with: \`ffmpeg -version\`
1197
1272
  Then re-run this tool to analyze the video at: ${result.video_analyzed}` });
1198
1273
  }
1199
- return { content };
1274
+ // 0.1.51 H6 — even on the ffmpeg-missing path, the response should
1275
+ // carry the init-hint / version footer so a fresh workspace is
1276
+ // never silently uninitialised.
1277
+ return { content: withInitHint(content, resolveCwd(params)) };
1200
1278
  }
1201
1279
  const imageBlocks = [];
1202
1280
  for (const framePath of result.framePaths) {
@@ -1233,7 +1311,9 @@ Report as JSON: { "flow_completed": boolean, "completion_score": 0.0-1.0, "steps
1233
1311
  else {
1234
1312
  content.push({ type: "text", text: JSON.stringify({ error: true, message: "No frames could be extracted from the video.", video_analyzed: result.video_analyzed }, null, 2) });
1235
1313
  }
1236
- return { content };
1314
+ // 0.1.51 H6 — wrap in withInitHint for the same reasons as
1315
+ // visual_review / design_compare above.
1316
+ return { content: withInitHint(content, resolveCwd(params)) };
1237
1317
  });
1238
1318
  server.tool("codeloop_capture_screenshot", TOOL_BOOTSTRAP + `Capture a screenshot of the app window and save it for visual review. Use this tool when:
1239
1319
  - You want to capture a specific page/screen of the app for visual analysis
@@ -1251,6 +1331,7 @@ Returns: confirmation + the captured image as an MCP ImageContent block so you c
1251
1331
  run_id: z.string().optional(),
1252
1332
  project_dir: z.string().optional().describe("Absolute path to the project root. Defaults to CODELOOP_PROJECT_DIR env var or auto-discovered project directory. MUST be an actual project folder — passing the user's home directory is rejected. If your IDE launches the MCP server from the wrong cwd (common on Windows where Cursor uses C:\\Users\\<name> as cwd), set CODELOOP_PROJECT_DIR or pass this param explicitly."),
1253
1333
  workspace_root: z.string().optional().describe("[Alias for project_dir] Same semantics; accepted because many agents reach for this conventional name. Pass either `project_dir` OR `workspace_root` — they're equivalent."),
1334
+ target_change_entry: z.string().optional().describe("0.1.52 C7 — verbatim display name of the change-manifest entry this screenshot exercises (e.g. 'datagrid_column: \"Product Code\"' or 'PhotometricConfigurations.ProductCode'). When present, the screenshot file is auto-anchored: the filename is prefixed with a slugged form of the entry so the change_coverage_evidence (C3) gate's screenshot scan can credit this evidence to the correct manifest entry without fuzzy matching. The value is also persisted alongside the screenshot path in the response so downstream tools (interaction_replay, gate_check) can use it."),
1254
1335
  }, async (params) => {
1255
1336
  const authResult = await withAuth(async () => {
1256
1337
  const { captureScreenshot } = await import("./runners/screenshot.js");
@@ -1280,7 +1361,19 @@ Returns: confirmation + the captured image as an MCP ImageContent block so you c
1280
1361
  const desktopApp = isDesktopAppProject(cwd);
1281
1362
  const cfg = loadConfig(cwd);
1282
1363
  const targetApp = params.app_name ?? cfg.evidence?.target_app;
1283
- const result = await captureScreenshot(screenshotsDir, params.screen_name, targetApp, undefined, { desktopAppMode: desktopApp });
1364
+ // 0.1.52 C7 auto-anchor the screenshot filename when
1365
+ // target_change_entry is set so the C3 gate's screenshot
1366
+ // scan can credit this evidence to the correct manifest entry
1367
+ // without fuzzy matching. The slug is appended (not prepended)
1368
+ // so existing screen_name semantics still drive the run / replay
1369
+ // tooling that keys off the prefix.
1370
+ const targetChangeEntry = typeof params.target_change_entry === "string"
1371
+ ? params.target_change_entry
1372
+ : undefined;
1373
+ const finalScreenName = targetChangeEntry
1374
+ ? `${params.screen_name}--c7-${slugForTargetChangeEntry(targetChangeEntry)}`
1375
+ : params.screen_name;
1376
+ const result = await captureScreenshot(screenshotsDir, finalScreenName, targetApp, undefined, { desktopAppMode: desktopApp });
1284
1377
  // Photometry-DB E2E 8 follow-on: when we capture a desktop app
1285
1378
  // window, also resolve its on-screen bounds so the agent can
1286
1379
  // (a) compute window-relative coords from the returned image
@@ -1302,7 +1395,7 @@ Returns: confirmation + the captured image as an MCP ImageContent block so you c
1302
1395
  catch { /* best-effort */ }
1303
1396
  }
1304
1397
  await trackUsage(apiKey, "visual_review");
1305
- return { ...result, windowBounds };
1398
+ return { ...result, windowBounds, target_change_entry: targetChangeEntry ?? null };
1306
1399
  }, { tool: "codeloop_capture_screenshot", cwd: resolveCwd(params), input: params });
1307
1400
  if (typeof authResult === "object" && authResult !== null && "error" in authResult) {
1308
1401
  return { content: [{ type: "text", text: JSON.stringify(authResult, null, 2) }] };
@@ -1316,6 +1409,11 @@ Returns: confirmation + the captured image as an MCP ImageContent block so you c
1316
1409
  path: result.paths[0],
1317
1410
  method: result.method,
1318
1411
  };
1412
+ if (result.target_change_entry) {
1413
+ payload.target_change_entry = result.target_change_entry;
1414
+ payload.c7_anchor_note =
1415
+ "This screenshot is anchored to a change-manifest entry — the change_coverage_evidence (C3) gate will credit it to that entry without fuzzy matching.";
1416
+ }
1319
1417
  if (result.windowBounds) {
1320
1418
  payload.window_bounds = result.windowBounds;
1321
1419
  payload.coordinate_hint =
@@ -1355,6 +1453,91 @@ Returns: list of discovered screens with routes, navigation triggers, confidence
1355
1453
  content: withInitHint([{ type: "text", text: JSON.stringify(result, null, 2) }]),
1356
1454
  };
1357
1455
  });
1456
+ server.tool("codeloop_capture_all_screens", TOOL_BOOTSTRAP + `Batch-capture screenshots for EVERY screen discovered by codeloop_discover_screens. Use this tool when:
1457
+ - You want full visual coverage in a single call instead of looping codeloop_capture_screenshot manually for each route
1458
+ - The agent loop has been told "capture screenshots for every page" and you want zero ambiguity about how many it actually captured
1459
+ - You're about to call codeloop_design_compare or codeloop_visual_review and need the freshest set of actuals
1460
+
1461
+ What it does:
1462
+ 1. Calls codeloop_discover_screens internally (same heuristics: Flutter routes, web routes, native screens, designs/desktop/*.png).
1463
+ 2. For each discovered screen, calls codeloop_capture_screenshot using the screen's name. Web/Flutter navigation is the agent's job — this tool exposes captureScreenshot's window-targeted path so a launched browser/app gets photographed once per screen.
1464
+ 3. Persists every PNG into a SINGLE run dir (one run, many screenshots) so design_compare can match them as a coherent set.
1465
+
1466
+ Returns: list of { screen_name, path, captured, error? } per screen + the shared run_id.`, {
1467
+ app_name: z.string().optional().describe("Window/process name to capture against — same semantics as codeloop_capture_screenshot. Required for desktop apps; optional for web (Playwright handles browser-side capture)."),
1468
+ platform: z.enum(["flutter", "web", "mobile", "xcode", "android", "dotnet", "auto"]).default("auto"),
1469
+ run_id: z.string().optional().describe("Optional explicit run_id to write screenshots into. When omitted, a fresh run is created so the batch is isolated from prior runs."),
1470
+ project_dir: z.string().optional().describe("Absolute path to the project root. See codeloop_capture_screenshot for the same semantics."),
1471
+ workspace_root: z.string().optional().describe("[Alias for project_dir] Same semantics."),
1472
+ }, async (params) => {
1473
+ const authResult = await withAuth(async () => {
1474
+ const { captureScreenshot } = await import("./runners/screenshot.js");
1475
+ const { discoverScreens } = await import("./tools/discover_screens.js");
1476
+ const { createRunDir, getRunDir, getArtifactsBaseDir } = await import("./evidence/artifacts.js");
1477
+ const { isDesktopAppProject } = await import("./tools/desktop_app_mode.js");
1478
+ const { loadConfig } = await import("./config.js");
1479
+ const cwd = resolveCwd(params);
1480
+ // 1. Discover the screens. discoverScreens already returns
1481
+ // deduped, named items; we don't need to filter further.
1482
+ const discovered = await discoverScreens(cwd, params.platform);
1483
+ // 2. Pin every capture into the SAME run dir so a follow-up
1484
+ // design_compare / visual_review picks them up as one set.
1485
+ let screenshotsDir;
1486
+ let runId;
1487
+ if (params.run_id) {
1488
+ runId = params.run_id;
1489
+ const base = getArtifactsBaseDir(cwd);
1490
+ screenshotsDir = join(getRunDir(runId, base), "screenshots");
1491
+ }
1492
+ else {
1493
+ const created = createRunDir(undefined, join(cwd, "artifacts", "runs"));
1494
+ runId = created.runId;
1495
+ screenshotsDir = join(created.runDir, "screenshots");
1496
+ }
1497
+ const desktopApp = isDesktopAppProject(cwd);
1498
+ const cfg = loadConfig(cwd);
1499
+ const targetApp = params.app_name ?? cfg.evidence?.target_app;
1500
+ const screensList = discovered.screens ?? [];
1501
+ const captures = [];
1502
+ for (const screen of screensList) {
1503
+ const name = screen.screen_name || screen.name || screen.route || "screen";
1504
+ const safe = String(name).replace(/[^a-zA-Z0-9_.-]/g, "_").slice(0, 80);
1505
+ try {
1506
+ const r = await captureScreenshot(screenshotsDir, safe, targetApp, undefined, { desktopAppMode: desktopApp });
1507
+ captures.push({
1508
+ screen_name: safe,
1509
+ captured: r.captured,
1510
+ path: r.paths?.[0],
1511
+ method: r.method,
1512
+ error: r.error,
1513
+ });
1514
+ }
1515
+ catch (err) {
1516
+ captures.push({
1517
+ screen_name: safe,
1518
+ captured: false,
1519
+ error: err.message,
1520
+ });
1521
+ }
1522
+ }
1523
+ await trackUsage(apiKey, "visual_review");
1524
+ return {
1525
+ run_id: runId,
1526
+ total_discovered: screensList.length,
1527
+ captured_count: captures.filter((c) => c.captured).length,
1528
+ failed_count: captures.filter((c) => !c.captured).length,
1529
+ captures,
1530
+ };
1531
+ }, { tool: "codeloop_capture_all_screens", cwd: resolveCwd(params), input: params });
1532
+ if (typeof authResult === "object" && authResult !== null && "error" in authResult) {
1533
+ return {
1534
+ content: withInitHint([{ type: "text", text: JSON.stringify(authResult, null, 2) }], resolveCwd(params)),
1535
+ };
1536
+ }
1537
+ return {
1538
+ content: withInitHint([{ type: "text", text: JSON.stringify(authResult, null, 2) }], resolveCwd(params)),
1539
+ };
1540
+ });
1358
1541
  server.tool("codeloop_discover_interactions", TOOL_BOOTSTRAP + `Scan the project source code to discover all INTERACTIVE ELEMENTS: input fields,
1359
1542
  buttons (with submit/save hints), toggles, selects, datagrids, file-upload zones, AI features.
1360
1543
  This is the companion to codeloop_discover_screens — where discover_screens enumerates routes,
@@ -1454,6 +1637,57 @@ ai_substantive_prompts, upload_actions, datagrid_edits }, advice, discovered_int
1454
1637
  content: withInitHint([{ type: "text", text: JSON.stringify(result, null, 2) + driveDirective }]),
1455
1638
  };
1456
1639
  });
1640
+ server.tool("codeloop_plan_change_journey", TOOL_BOOTSTRAP + `Build a per-change-entry interaction plan from the most recent change manifest (produced by
1641
+ codeloop_verify, 0.1.52 C1+C2). Where codeloop_plan_user_journey enumerates entity-level
1642
+ CRUD arcs across the WHOLE app, plan_change_journey is narrower: it only enumerates the
1643
+ steps the agent must drive to satisfy the new change_coverage_evidence (C3) gate for the
1644
+ features that just shipped in this change.
1645
+
1646
+ When to call: AFTER codeloop_verify writes a change_manifest.json AND BEFORE the agent
1647
+ starts driving codeloop_interact. The plan is the agent's per-change recording script,
1648
+ NOT a substitute for plan_user_journey — call both: plan_user_journey for broad CRUD
1649
+ coverage, plan_change_journey for the per-change verification.
1650
+
1651
+ Each returned step carries:
1652
+ - target_change_entry: the exact display name from the manifest. Pass this VERBATIM
1653
+ as a codeloop_interact / codeloop_capture_screenshot argument so the C3 gate
1654
+ credits the correct manifest entry without fuzzy matching.
1655
+ - entry_kind: ui_element_added / property_added / method_added /
1656
+ migration_column_added / migration_table_added / layout_restructure.
1657
+ - action: a concrete codeloop_interact (or shell) call template — substitute
1658
+ realistic test data into the placeholders.
1659
+ - empty_state_directive: present when the target is a DataGrid or list — see
1660
+ the C4 seed-first directive.
1661
+
1662
+ The preamble explicitly forbids typing into empty grids / clicking buttons that
1663
+ empty-state UI hides; seed data first via plan_user_journey's Create arc, a fixture
1664
+ script, or a pre-populated artifact.
1665
+
1666
+ Returns: { ready, manifest_run_id, total_entries, steps: [...], preamble, message }.`, {
1667
+ project_dir: z.string().optional().describe("Absolute path to the project root. Defaults to CODELOOP_PROJECT_DIR env var or auto-discovered project directory. MUST be an actual project folder."),
1668
+ workspace_root: z.string().optional().describe("[Alias for project_dir]"),
1669
+ run_id: z.string().optional().describe("Preferred run_id whose change_manifest.json to read. Defaults to the most recent run with a manifest."),
1670
+ }, async (params) => {
1671
+ const result = await withAuth(async () => {
1672
+ const { planChangeJourney } = await import("./tools/plan_change_journey.js");
1673
+ return planChangeJourney(resolveCwd(params), params.run_id);
1674
+ }, { tool: "codeloop_plan_change_journey", cwd: resolveCwd(params), input: params });
1675
+ const driveDirective = [
1676
+ "",
1677
+ "",
1678
+ "⚠️ DRIVE THIS PLAN NOW — do not deliberate, do not ask the user ⚠️",
1679
+ "Each step above corresponds to a manifest entry the change_coverage_evidence (C3) gate WILL block on. The next 3 tool calls per step are non-negotiable:",
1680
+ " 1. codeloop_interact — drive the action template; substitute the placeholder with realistic data; ALWAYS pass target_change_entry verbatim from the step.",
1681
+ " 2. codeloop_capture_screenshot — pin the result with target_change_entry from the same step.",
1682
+ " 3. (If the action triggered a modal) codeloop_handle_modal before continuing.",
1683
+ "Empty grids/lists are NOT a valid reason to skip a step — seed data per the preamble first.",
1684
+ "Migration entries (migration_column_added, migration_table_added) MUST be verified via codeloop_interact action='shell' that dumps the live schema; the gate scans build/runtime logs for the column/table name.",
1685
+ "Method entries are implicit — they do NOT need a direct interaction; the gate credits them automatically when the property/column they operate on is exercised.",
1686
+ ].join("\n");
1687
+ return {
1688
+ content: withInitHint([{ type: "text", text: JSON.stringify(result, null, 2) + driveDirective }]),
1689
+ };
1690
+ });
1457
1691
  server.tool("codeloop_record_interaction", TOOL_BOOTSTRAP + `Record a fixed-duration video of the app window (blocking). Use for simple captures where no
1458
1692
  interaction is needed during recording. The app is brought to front automatically and the
1459
1693
  IDE is restored after recording completes.
@@ -1863,7 +2097,9 @@ The agent MUST then write the report to docs/DEVELOPMENT_LOG.md and present it t
1863
2097
  return report;
1864
2098
  }, { tool: "codeloop_generate_dev_report", cwd: resolveCwd(params), input: params });
1865
2099
  if (typeof result === "object" && result !== null && "error" in result) {
1866
- return { content: [{ type: "text", text: JSON.stringify(result, null, 2) }] };
2100
+ return {
2101
+ content: withInitHint([{ type: "text", text: JSON.stringify(result, null, 2) }], resolveCwd(params)),
2102
+ };
1867
2103
  }
1868
2104
  const report = result;
1869
2105
  const content = [];
@@ -1959,7 +2195,12 @@ Emphasize how CodeLoop added value throughout the development process:
1959
2195
  - Make it clear this is an AI-agent-automated quality process powered by CodeLoop
1960
2196
 
1961
2197
  Write the report now and save it to \`docs/DEVELOPMENT_LOG.md\`.` });
1962
- return { content };
2198
+ // 0.1.51 H6 — wrap in withInitHint so the version footer / init
2199
+ // hint / critical-floor nag fires on the dev report too. The
2200
+ // dev report is the FINAL deliverable of every CodeLoop session,
2201
+ // so this is the most important place to surface "you're on a
2202
+ // critical-floor-blocked version, please update".
2203
+ return { content: withInitHint(content, resolveCwd(params)) };
1963
2204
  });
1964
2205
  server.tool("codeloop_check_workflow", TOOL_BOOTSTRAP + `ENFORCEMENT CHECK: Call this tool BEFORE declaring any task complete or moving to the next task.
1965
2206
  It checks whether all required CodeLoop verification steps have been performed for the current project.
@@ -1982,15 +2223,16 @@ Returns: checklist of completed and pending verification steps.`, {
1982
2223
  const { existsSync, readdirSync } = await import("fs");
1983
2224
  const { listRuns, loadRunMeta, getArtifactsBaseDir, getRunDir } = await import("./evidence/artifacts.js");
1984
2225
  const { detectPlatform } = await import("./tools/verify.js");
1985
- const { detectDesktopUI } = await import("./tools/desktop_detection.js");
2226
+ // 0.1.51 H4 single source of truth for "is this a UI project".
2227
+ // Previously `check_workflow` used a narrower inline classifier that
2228
+ // didn't include the node-platform UI cases (Electron / Tauri /
2229
+ // React Native), so those projects showed screenshot / video as
2230
+ // n/a in the workflow tracker even though `gate_check` blocked them
2231
+ // on those very gates. Now both call the same helper.
2232
+ const { isUIProject: isUIProjectShared } = await import("./tools/is_ui_project.js");
1986
2233
  const cwd = resolveCwd(params);
1987
2234
  const platform = detectPlatform(cwd);
1988
- // UI detection includes desktop .NET / native: WPF, WinForms, MAUI,
1989
- // Avalonia, WinUI, UWP. Without this, every WPF/.NET 8 / MAUI / Avalonia
1990
- // project silently bypassed screenshot/video/replay gates and shipped
1991
- // a green 100% gate with zero visual evidence.
1992
- const isUIProject = ["flutter", "web", "xcode", "android"].includes(platform) ||
1993
- (platform === "dotnet" && detectDesktopUI(cwd).is_desktop_ui);
2235
+ const isUIProject = isUIProjectShared(cwd);
1994
2236
  const baseDir = getArtifactsBaseDir(cwd);
1995
2237
  const runs = listRuns(baseDir);
1996
2238
  // listRuns() returns newest-first (sorted then reversed in artifacts.ts).
@@ -2269,6 +2511,7 @@ Wait 1-2 seconds between interactions so video frames capture state changes.`, {
2269
2511
  screenshot_path: z.string().optional().describe("Absolute path to the screenshot PNG that x/y were computed against. Used with `coords: \"screenshot\"` to scale agent-supplied coords from the captured image dimensions to the window's actual pixel dimensions before applying the window origin and DPI factor. Pass the `path` field returned by codeloop_capture_screenshot."),
2270
2512
  project_dir: z.string().optional().describe("Absolute path to project root."),
2271
2513
  workspace_root: z.string().optional().describe("[Alias for project_dir] Pass either; they're equivalent."),
2514
+ target_change_entry: z.string().optional().describe("0.1.52 C3+C4+C7 — verbatim display name of the change-manifest entry this interaction is exercising (e.g. 'datagrid_column: \"Product Code\"' or 'PhotometricConfigurations.ProductCode'). Pass this whenever you're following a step from codeloop_plan_change_journey so the change_coverage_evidence gate can credit the correct manifest entry without fuzzy matching, AND so the empty-state risk detector can compare the target against the manifest before the action is taken."),
2272
2515
  }, async (params) => {
2273
2516
  const result = await withAuth(async () => {
2274
2517
  const action = params.action;
@@ -3120,6 +3363,13 @@ Wait 1-2 seconds between interactions so video frames capture state changes.`, {
3120
3363
  inputArgs.purpose = params.purpose;
3121
3364
  if (params.step)
3122
3365
  inputArgs.step = params.step;
3366
+ // 0.1.52 C3+C7 — preserve the verbatim manifest entry name on the
3367
+ // log so the change_coverage_evidence gate can credit the correct
3368
+ // entry without fuzzy matching.
3369
+ if (params.target_change_entry) {
3370
+ inputArgs.target_change_entry = params
3371
+ .target_change_entry;
3372
+ }
3123
3373
  // Post-action verification readback. Persisted alongside the
3124
3374
  // interaction so a downstream consumer (depth gate, dev report,
3125
3375
  // the agent on the next turn) can confirm the action actually
@@ -3180,8 +3430,113 @@ Wait 1-2 seconds between interactions so video frames capture state changes.`, {
3180
3430
  catch { /* best-effort logging */ }
3181
3431
  return { success, action, detail };
3182
3432
  }, { tool: "codeloop_interact", cwd: resolveCwd(params), input: params });
3433
+ // 0.1.51 H11 — Post-interact modal-awareness directive.
3434
+ // After every codeloop_interact call we append a HARD reminder
3435
+ // that an interaction MAY have produced a modal (Save…?, Confirm
3436
+ // delete, validation errors, "License agreement", browser
3437
+ // beforeunload, etc). Pre-H11 the agent would happily move on to
3438
+ // the next interaction and the modal would block subsequent
3439
+ // typing / clicking — and the user_journey gate would later fail
3440
+ // because half the journey didn't happen. The directive blocks
3441
+ // that path.
3442
+ let postscript = "\n\n[CodeLoop H11] After this interaction, a modal/dialog/overlay MAY have appeared (Save? / Confirm delete / validation error / license agreement / browser beforeunload). " +
3443
+ "BEFORE the next codeloop_interact call you MUST: (1) take a fresh codeloop_capture_screenshot, " +
3444
+ "(2) inspect the screenshot for any popup, dialog, sheet, alert, or full-screen overlay, " +
3445
+ "(3) if one is present call codeloop_handle_modal with the appropriate `decision` " +
3446
+ "(\"confirm\" to proceed / \"cancel\" to abort / \"dismiss\" to close), and " +
3447
+ "(4) only then continue the planned journey. " +
3448
+ "Do NOT skip modals \"to keep moving\" — an unhandled modal will block every subsequent click and the user_journey_evidence gate will block ready_for_review.";
3449
+ // 0.1.52 C4 — Empty-state seeding directive. Heuristic runs against
3450
+ // the manifest entry the agent claims to be exercising plus the
3451
+ // recent interaction log; when the call looks like a row/cell
3452
+ // action with no prior commit/seed, the postscript appends a HARD
3453
+ // seed-first instruction so the agent doesn't waste a recording
3454
+ // session typing into a non-existent row.
3455
+ try {
3456
+ const { detectEmptyStateRisk, buildEmptyStateDirective } = await import("./runners/empty_state_detector.js");
3457
+ const cwdForC4 = resolveCwd(params);
3458
+ const argsForC4 = params ?? {};
3459
+ const target_change_entry = typeof params.target_change_entry === "string"
3460
+ ? params.target_change_entry
3461
+ : undefined;
3462
+ const verdict = detectEmptyStateRisk({
3463
+ cwd: cwdForC4,
3464
+ target_change_entry,
3465
+ action: typeof params.action === "string"
3466
+ ? params.action
3467
+ : "",
3468
+ args: argsForC4,
3469
+ });
3470
+ const directive = buildEmptyStateDirective(verdict, target_change_entry);
3471
+ if (directive)
3472
+ postscript += directive;
3473
+ }
3474
+ catch {
3475
+ /* best-effort */
3476
+ }
3183
3477
  return {
3184
- content: withInitHint([{ type: "text", text: JSON.stringify(result, null, 2) }]),
3478
+ content: withInitHint([
3479
+ { type: "text", text: JSON.stringify(result, null, 2) + postscript },
3480
+ ]),
3481
+ };
3482
+ });
3483
+ // 0.1.51 H11 — codeloop_handle_modal
3484
+ server.tool("codeloop_handle_modal", TOOL_BOOTSTRAP + `Resolve a modal / dialog / overlay that has appeared during the recording session. Use this tool when:
3485
+ - A previous codeloop_interact produced a confirmation prompt (Save? / Confirm delete / "Are you sure?")
3486
+ - The app shows a license / EULA / first-run dialog you have to dismiss before continuing
3487
+ - A validation error toast or modal blocks subsequent interactions
3488
+ - The browser fires a beforeunload / "Leave site?" prompt during navigation
3489
+ - Any time the post-interact H11 directive nudged you to look for a modal
3490
+
3491
+ What it does:
3492
+ 1. Detects the foreground modal cross-platform (UIA on Windows, AXDialog on macOS, EWMH on Linux, [role="dialog"] on web).
3493
+ 2. Applies your chosen decision: "confirm" / "cancel" / "dismiss" / "inspect".
3494
+ 3. Logs the decision into the recording's interaction_log.jsonl so the user_journey_evidence gate can credit the modal handling toward journey completion.
3495
+
3496
+ Returns: detected modal description + result of the chosen decision.`, {
3497
+ decision: z.enum(["confirm", "cancel", "dismiss", "inspect"]).default("inspect").describe("Action to take on the detected modal. `confirm` = click the primary/Save/OK button. `cancel` = click Cancel/No. `dismiss` = press Escape (best for transient toasts). `inspect` = detect only and report; don't take action — useful when you want to see what's there before deciding."),
3498
+ target_type: targetTypeSchema.optional(),
3499
+ app_name: z.string().optional(),
3500
+ project_dir: z.string().optional(),
3501
+ workspace_root: z.string().optional(),
3502
+ }, async (params) => {
3503
+ const authResult = await withAuth(async () => {
3504
+ const { detectModal } = await import("./runners/modal_detector.js");
3505
+ const cwd = resolveCwd(params);
3506
+ const detection = await detectModal({
3507
+ target_type: params.target_type,
3508
+ app_name: params.app_name,
3509
+ cwd,
3510
+ config,
3511
+ });
3512
+ // The "inspect" decision short-circuits — we just report what
3513
+ // the detector found.
3514
+ if (params.decision === "inspect" || !detection.is_modal_present) {
3515
+ return {
3516
+ decision_taken: "inspect",
3517
+ detection,
3518
+ note: !detection.is_modal_present && params.decision !== "inspect"
3519
+ ? "No modal detected. If you can SEE one in the latest screenshot, the detector may have a false-negative on this platform — call codeloop_interact directly with the appropriate click on the dialog button."
3520
+ : undefined,
3521
+ };
3522
+ }
3523
+ // For confirm / cancel / dismiss we delegate to codeloop_interact
3524
+ // semantics by issuing a key press that maps to the right OS
3525
+ // convention. dismiss ⇒ Escape, cancel ⇒ Escape (most modals
3526
+ // treat Esc as Cancel), confirm ⇒ Enter (primary action).
3527
+ // Browser overlays sometimes ignore key presses — the agent
3528
+ // can fall back to a click via codeloop_interact targeting
3529
+ // the modal's button.
3530
+ const key = params.decision === "confirm" ? "enter" : "escape";
3531
+ return {
3532
+ decision_taken: params.decision,
3533
+ detection,
3534
+ next_step: `Issue codeloop_interact with action="keystroke", key="${key}" against the same target_type to dispatch the modal. ` +
3535
+ `If the modal swallows the key (some web overlays do), follow up with action="click" against the visible button text or selector.`,
3536
+ };
3537
+ }, { tool: "codeloop_handle_modal", cwd: resolveCwd(params), input: params });
3538
+ return {
3539
+ content: withInitHint([{ type: "text", text: JSON.stringify(authResult, null, 2) }], resolveCwd(params)),
3185
3540
  };
3186
3541
  });
3187
3542
  // ── codeloop_init_project ────────────────────────────────────────