codeloop-mcp-server 0.1.49 → 0.1.51

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. package/dist/auth/critical_floors.d.ts.map +1 -1
  2. package/dist/auth/critical_floors.js +8 -0
  3. package/dist/auth/critical_floors.js.map +1 -1
  4. package/dist/evidence/loop_state.d.ts +53 -0
  5. package/dist/evidence/loop_state.d.ts.map +1 -0
  6. package/dist/evidence/loop_state.js +147 -0
  7. package/dist/evidence/loop_state.js.map +1 -0
  8. package/dist/evidence/verify_staleness.d.ts +9 -0
  9. package/dist/evidence/verify_staleness.d.ts.map +1 -0
  10. package/dist/evidence/verify_staleness.js +180 -0
  11. package/dist/evidence/verify_staleness.js.map +1 -0
  12. package/dist/index.d.ts +1 -1
  13. package/dist/index.d.ts.map +1 -1
  14. package/dist/index.js +377 -61
  15. package/dist/index.js.map +1 -1
  16. package/dist/runners/maestro.d.ts +13 -0
  17. package/dist/runners/maestro.d.ts.map +1 -1
  18. package/dist/runners/maestro.js +37 -1
  19. package/dist/runners/maestro.js.map +1 -1
  20. package/dist/runners/modal_detector.d.ts +60 -0
  21. package/dist/runners/modal_detector.d.ts.map +1 -0
  22. package/dist/runners/modal_detector.js +160 -0
  23. package/dist/runners/modal_detector.js.map +1 -0
  24. package/dist/runners/python_tests.d.ts +26 -0
  25. package/dist/runners/python_tests.d.ts.map +1 -0
  26. package/dist/runners/python_tests.js +181 -0
  27. package/dist/runners/python_tests.js.map +1 -0
  28. package/dist/runners/resolve_project_dir.d.ts +67 -0
  29. package/dist/runners/resolve_project_dir.d.ts.map +1 -0
  30. package/dist/runners/resolve_project_dir.js +82 -0
  31. package/dist/runners/resolve_project_dir.js.map +1 -0
  32. package/dist/runners/rust_tests.d.ts +28 -0
  33. package/dist/runners/rust_tests.d.ts.map +1 -0
  34. package/dist/runners/rust_tests.js +76 -0
  35. package/dist/runners/rust_tests.js.map +1 -0
  36. package/dist/runners/screenshot.d.ts.map +1 -1
  37. package/dist/runners/screenshot.js +17 -2
  38. package/dist/runners/screenshot.js.map +1 -1
  39. package/dist/runners/uia_resolver.d.ts +70 -0
  40. package/dist/runners/uia_resolver.d.ts.map +1 -0
  41. package/dist/runners/uia_resolver.js +210 -0
  42. package/dist/runners/uia_resolver.js.map +1 -0
  43. package/dist/runners/window_manager.d.ts +28 -0
  44. package/dist/runners/window_manager.d.ts.map +1 -1
  45. package/dist/runners/window_manager.js +119 -4
  46. package/dist/runners/window_manager.js.map +1 -1
  47. package/dist/tools/design_compare.d.ts.map +1 -1
  48. package/dist/tools/design_compare.js +71 -33
  49. package/dist/tools/design_compare.js.map +1 -1
  50. package/dist/tools/diagnose.d.ts.map +1 -1
  51. package/dist/tools/diagnose.js +45 -1
  52. package/dist/tools/diagnose.js.map +1 -1
  53. package/dist/tools/discover_screens.d.ts.map +1 -1
  54. package/dist/tools/discover_screens.js +94 -2
  55. package/dist/tools/discover_screens.js.map +1 -1
  56. package/dist/tools/gate_check.d.ts +2 -1
  57. package/dist/tools/gate_check.d.ts.map +1 -1
  58. package/dist/tools/gate_check.js +46 -32
  59. package/dist/tools/gate_check.js.map +1 -1
  60. package/dist/tools/is_ui_project.d.ts +23 -0
  61. package/dist/tools/is_ui_project.d.ts.map +1 -0
  62. package/dist/tools/is_ui_project.js +42 -0
  63. package/dist/tools/is_ui_project.js.map +1 -0
  64. package/dist/tools/verify.d.ts +28 -0
  65. package/dist/tools/verify.d.ts.map +1 -1
  66. package/dist/tools/verify.js +159 -7
  67. package/dist/tools/verify.js.map +1 -1
  68. package/package.json +1 -1
package/dist/index.js CHANGED
@@ -26,6 +26,7 @@ import { applyUpdate, applyUpdateInputSchema, } from "./tools/apply_update.js";
26
26
  import { trackUsage } from "./auth/usage_tracker.js";
27
27
  import { isLocalMode } from "./auth/local_mode.js";
28
28
  import { discoverProjectDir } from "./project-discovery.js";
29
+ import { resolveProjectDirPath } from "./runners/resolve_project_dir.js";
29
30
  function readImageAsBase64(path) {
30
31
  if (!existsSync(path))
31
32
  return null;
@@ -61,6 +62,18 @@ function mimeForPath(path) {
61
62
  // when the server's auto-discovered fallback is uninitialized.
62
63
  const discovery = discoverProjectDir();
63
64
  const projectDir = discovery.projectDir;
65
+ // 0.1.50 H4 — single helper that applies the project_dir precedence
66
+ // ladder (explicit > workspace_root > active recording > env > walked_up
67
+ // > default). Used by every capture / interact / record / replay / etc
68
+ // handler so we can't drift back to the Photometry-DB regression where
69
+ // missing project_dir wrote artifacts to the user's HOME folder.
70
+ function resolveCwd(params) {
71
+ return resolveProjectDirPath({
72
+ project_dir: params.project_dir,
73
+ workspace_root: params.workspace_root,
74
+ default_dir: projectDir,
75
+ });
76
+ }
64
77
  if (discovery.source !== "cwd" && discovery.source !== "env") {
65
78
  console.error(`[CodeLoop] Auto-discovered project at: ${projectDir} (via ${discovery.source} search)`);
66
79
  }
@@ -366,9 +379,12 @@ function rememberInitializedDir(dir) {
366
379
  function withInitHint(content, dir) {
367
380
  // Order matters:
368
381
  // 1. Update notice (most actionable signal — CRITICAL stays at top).
369
- // 2. Init hint (only when project is not initialized).
370
- // 3. The original content.
371
- // 4. Version banner footer (so the agent can always see what
382
+ // 2. 0.1.51 H2 staleness directive (when source files are newer
383
+ // than the last verify — equally important to the update
384
+ // notice because both keep the agent loop honest).
385
+ // 3. Init hint (only when project is not initialized).
386
+ // 4. The original content.
387
+ // 5. Version banner footer (so the agent can always see what
372
388
  // version it's talking to — survives across all responses).
373
389
  const banner = buildVersionBanner();
374
390
  const withUpdate = withUpdateNotice(content);
@@ -396,11 +412,54 @@ function withInitHint(content, dir) {
396
412
  if (!anyInitialized) {
397
413
  head.push({ type: "text", text: INIT_HINT });
398
414
  }
415
+ // 0.1.51 H2 — verify-staleness directive. We only check the FIRST
416
+ // initialized candidate dir (so we don't double-fire when multiple
417
+ // candidates resolve, and so the cost stays O(1) per response).
418
+ // Errors are swallowed because the staleness check must never
419
+ // fail-close on a tool response.
420
+ try {
421
+ const stalenessDir = candidates.find((d) => isProjectInitialized(d) || wasInitialisedAtPath(d));
422
+ if (stalenessDir && !skipStalenessForCwd(stalenessDir)) {
423
+ // Lazy-load so we don't pay the cost on tool responses that
424
+ // fire before any artifacts exist.
425
+ const { checkVerifyStaleness, buildStalenessDirective } =
426
+ // eslint-disable-next-line @typescript-eslint/no-require-imports
427
+ require("./evidence/verify_staleness.js");
428
+ const r = checkVerifyStaleness(stalenessDir);
429
+ const directive = buildStalenessDirective(r);
430
+ if (directive) {
431
+ head.push({ type: "text", text: directive });
432
+ }
433
+ }
434
+ }
435
+ catch { /* never fail tool responses on a staleness probe */ }
399
436
  const result = [...head, ...withUpdate];
400
437
  if (banner)
401
438
  result.push(banner);
402
439
  return result;
403
440
  }
441
+ // 0.1.51 H2 — suppress the staleness directive when the response is
442
+ // FROM `codeloop_verify` itself. Otherwise verify would always announce
443
+ // "your code is stale" because, by definition, source has been
444
+ // modified since the last (older) verify. We can't see the tool name
445
+ // directly inside `withInitHint`, so we use a per-cwd cooldown:
446
+ // `codeloop_verify` calls `markVerifiedNow(cwd)` and we skip the
447
+ // staleness probe for 30s on that cwd.
448
+ const RECENT_VERIFY_GRACE_MS = 30_000;
449
+ const recentlyVerified = new Map();
450
+ function skipStalenessForCwd(cwd) {
451
+ const ts = recentlyVerified.get(cwd);
452
+ if (!ts)
453
+ return false;
454
+ if (Date.now() - ts > RECENT_VERIFY_GRACE_MS) {
455
+ recentlyVerified.delete(cwd);
456
+ return false;
457
+ }
458
+ return true;
459
+ }
460
+ export function markVerifiedNow(cwd) {
461
+ recentlyVerified.set(cwd, Date.now());
462
+ }
404
463
  const TOOL_BOOTSTRAP = "[CodeLoop] You MUST call codeloop_verify after every code change. If .codeloop/config.json is missing, call codeloop_init_project FIRST.\n\n";
405
464
  /**
406
465
  * Zod preprocessor for `target_type` that accepts common synonyms.
@@ -456,7 +515,7 @@ Returns: structured report with pass/fail counts, artifact paths, and next-step
456
515
  project_dir: z.string().optional().describe("Absolute path to the project root. Defaults to CODELOOP_PROJECT_DIR env var or auto-discovered project directory. MUST be an actual project folder — passing the user's home directory is rejected. If your IDE launches the MCP server from the wrong cwd (common on Windows where Cursor uses C:\\Users\\<name> as cwd), set CODELOOP_PROJECT_DIR or pass this param explicitly."),
457
516
  workspace_root: z.string().optional().describe("[Alias for project_dir] Same semantics; accepted because many agents reach for this conventional name. Pass either `project_dir` OR `workspace_root` — they're equivalent."),
458
517
  }, async (params) => {
459
- const cwd = (params.project_dir || params.workspace_root || projectDir);
518
+ const cwd = resolveCwd(params);
460
519
  const explicitDir = params.project_dir || params.workspace_root;
461
520
  const cfg = explicitDir ? loadConfig(explicitDir) : config;
462
521
  const result = await withAuth(async () => {
@@ -478,6 +537,11 @@ Returns: structured report with pass/fail counts, artifact paths, and next-step
478
537
  // We inspect the produced run for video / interaction log evidence
479
538
  // and, when missing on a UI project, append a non-ambiguous next-
480
539
  // step directive so even a less-disciplined agent stays in the loop.
540
+ // 0.1.51 H2 — mark this cwd as freshly verified so the
541
+ // staleness directive in withInitHint doesn't fire on the
542
+ // verify response itself (the tool that just RAN verify is
543
+ // exactly the wrong place to scold "your code is stale").
544
+ markVerifiedNow(cwd);
481
545
  let postscript = "";
482
546
  try {
483
547
  const { isUIProject } = await import("./tools/gate_check.js");
@@ -544,11 +608,11 @@ Returns: categorized issues with severity, evidence, root cause, and actionable
544
608
  run_id: params.run_id,
545
609
  focus_files: params.focus_files,
546
610
  };
547
- const cwd = (params.project_dir || params.workspace_root || projectDir);
611
+ const cwd = resolveCwd(params);
548
612
  const output = await runDiagnose(input, config, cwd);
549
613
  await trackUsage(apiKey, "verification_run");
550
614
  return output;
551
- }, { tool: "codeloop_diagnose", cwd: (params.project_dir || params.workspace_root || projectDir), input: params });
615
+ }, { tool: "codeloop_diagnose", cwd: resolveCwd(params), input: params });
552
616
  // Auto-fix-loop directive. Diagnose is only useful when it leads
553
617
  // to a fix + re-verify, not when it leads to a long deliberation
554
618
  // over which repair to do first. The repair_tasks array in the
@@ -612,7 +676,7 @@ Returns: pass/fail for each gate, overall confidence score, and recommendation.`
612
676
  spec_path: params.spec_path,
613
677
  acceptance_path: params.acceptance_path,
614
678
  };
615
- const cwd = (params.project_dir || params.workspace_root || projectDir);
679
+ const cwd = resolveCwd(params);
616
680
  const output = await runGateCheck(input, config, cwd);
617
681
  // Persist gate_result and confidence to meta.json
618
682
  try {
@@ -632,7 +696,7 @@ Returns: pass/fail for each gate, overall confidence score, and recommendation.`
632
696
  catch { /* best-effort persistence */ }
633
697
  await trackUsage(apiKey, "verification_run");
634
698
  return output;
635
- }, { tool: "codeloop_gate_check", cwd: (params.project_dir || params.workspace_root || projectDir), input: params });
699
+ }, { tool: "codeloop_gate_check", cwd: resolveCwd(params), input: params });
636
700
  const resultJson = JSON.stringify(result, null, 2);
637
701
  const gateResult = result;
638
702
  if (gateResult.recommendation === "continue_fixing") {
@@ -690,11 +754,11 @@ Returns: pass/fail for each gate, overall confidence score, and recommendation.`
690
754
  "INCOMPLETE CRUD ARC is NEVER a reason to stop — call codeloop_plan_user_journey, follow the returned per-entity script, re-record, THEN re-gate.",
691
755
  ].join("\n");
692
756
  return {
693
- content: withInitHint([{ type: "text", text: resultJson + loopDirective }], (params.project_dir || params.workspace_root || projectDir)),
757
+ content: withInitHint([{ type: "text", text: resultJson + loopDirective }], resolveCwd(params)),
694
758
  };
695
759
  }
696
760
  return {
697
- content: withInitHint([{ type: "text", text: resultJson }], (params.project_dir || params.workspace_root || projectDir)),
761
+ content: withInitHint([{ type: "text", text: resultJson }], resolveCwd(params)),
698
762
  };
699
763
  });
700
764
  // ── Vision Tools (agent-delegated: returns images for AI agent analysis) ──
@@ -721,11 +785,11 @@ Returns: deterministic diff results + screenshot images for visual analysis.`, {
721
785
  ux_checklist_path: params.ux_checklist_path,
722
786
  viewport_sizes: params.viewport_sizes,
723
787
  };
724
- const cwd = (params.project_dir || params.workspace_root || projectDir);
788
+ const cwd = resolveCwd(params);
725
789
  const result = await runVisualReview(input, config, cwd);
726
790
  await trackUsage(apiKey, "visual_review");
727
791
  return result;
728
- }, { tool: "codeloop_visual_review", cwd: (params.project_dir || params.workspace_root || projectDir), input: params });
792
+ }, { tool: "codeloop_visual_review", cwd: resolveCwd(params), input: params });
729
793
  if (typeof authResult === "object" && authResult !== null && "error" in authResult) {
730
794
  return { content: [{ type: "text", text: JSON.stringify(authResult, null, 2) }] };
731
795
  }
@@ -765,7 +829,12 @@ Returns: deterministic diff results + screenshot images for visual analysis.`, {
765
829
  content.push({ type: "text", text: prompt });
766
830
  content.push(...imageBlocks);
767
831
  }
768
- return { content };
832
+ // 0.1.51 H6 — wrap response in withInitHint so the init-hint /
833
+ // version footer / critical-floor nag fires on visual_review too.
834
+ // Pre-H6 only verify / gate_check carried these so an agent that
835
+ // jumped straight to visual_review on a fresh workspace would
836
+ // miss the init-hint and skip codeloop_init_project.
837
+ return { content: withInitHint(content, resolveCwd(params)) };
769
838
  });
770
839
  server.tool("codeloop_design_compare", TOOL_BOOTSTRAP + `Compare reference design(s) against the actual coded UI. Use this tool when:
771
840
  - The user has provided a Figma mockup, screenshot, or design reference (any image in designs/ or .codeloop/figma.json)
@@ -806,11 +875,11 @@ Returns: per-screen pixel diff scores + worst-failing reference, actual, and dif
806
875
  designs_dir: params.designs_dir,
807
876
  run_id: params.run_id,
808
877
  };
809
- const cwd = (params.project_dir || params.workspace_root || projectDir);
878
+ const cwd = resolveCwd(params);
810
879
  const result = await runDesignCompare(input, config, cwd);
811
880
  await trackUsage(apiKey, "visual_review");
812
881
  return result;
813
- }, { tool: "codeloop_design_compare", cwd: (params.project_dir || params.workspace_root || projectDir), input: params });
882
+ }, { tool: "codeloop_design_compare", cwd: resolveCwd(params), input: params });
814
883
  if (typeof authResult === "object" && authResult !== null && "error" in authResult) {
815
884
  return { content: [{ type: "text", text: JSON.stringify(authResult, null, 2) }] };
816
885
  }
@@ -874,7 +943,11 @@ Returns: per-screen pixel diff scores + worst-failing reference, actual, and dif
874
943
  if (block.diff)
875
944
  content.push({ type: "image", data: block.diff.data, mimeType: block.diff.mime });
876
945
  }
877
- return { content };
946
+ // 0.1.51 H6 — withInitHint on design_compare too. The
947
+ // design_compare_evidence gate already blocks gate_check until
948
+ // every reference matches; the init-hint guarantees fresh
949
+ // workspaces don't sneak past codeloop_init_project.
950
+ return { content: withInitHint(content, resolveCwd(params)) };
878
951
  });
879
952
  server.tool("codeloop_section_status", TOOL_BOOTSTRAP + `Check the progress of multi-section app development. Use this tool when:
880
953
  - A master spec exists and you need to know which section to work on next
@@ -1125,7 +1198,7 @@ Returns: extracted key frames as images + expected flow description + app logs f
1125
1198
  }, async (params) => {
1126
1199
  const authResult = await withAuth(async () => {
1127
1200
  const { runInteractionReplay } = await import("./tools/interaction_replay.js");
1128
- const cwd = (params.project_dir || params.workspace_root || projectDir);
1201
+ const cwd = resolveCwd(params);
1129
1202
  const output = await runInteractionReplay({
1130
1203
  video_path: params.video_path,
1131
1204
  run_id: params.run_id,
@@ -1133,7 +1206,7 @@ Returns: extracted key frames as images + expected flow description + app logs f
1133
1206
  }, config, cwd);
1134
1207
  await trackUsage(apiKey, "visual_review");
1135
1208
  return output;
1136
- }, { tool: "codeloop_interaction_replay", cwd: (params.project_dir || params.workspace_root || projectDir), input: params });
1209
+ }, { tool: "codeloop_interaction_replay", cwd: resolveCwd(params), input: params });
1137
1210
  if (typeof authResult === "object" && authResult !== null && "error" in authResult) {
1138
1211
  return { content: [{ type: "text", text: JSON.stringify(authResult, null, 2) }] };
1139
1212
  }
@@ -1183,7 +1256,10 @@ Try in this order:
1183
1256
  Verify with: \`ffmpeg -version\`
1184
1257
  Then re-run this tool to analyze the video at: ${result.video_analyzed}` });
1185
1258
  }
1186
- return { content };
1259
+ // 0.1.51 H6 — even on the ffmpeg-missing path, the response should
1260
+ // carry the init-hint / version footer so a fresh workspace is
1261
+ // never silently uninitialised.
1262
+ return { content: withInitHint(content, resolveCwd(params)) };
1187
1263
  }
1188
1264
  const imageBlocks = [];
1189
1265
  for (const framePath of result.framePaths) {
@@ -1220,7 +1296,9 @@ Report as JSON: { "flow_completed": boolean, "completion_score": 0.0-1.0, "steps
1220
1296
  else {
1221
1297
  content.push({ type: "text", text: JSON.stringify({ error: true, message: "No frames could be extracted from the video.", video_analyzed: result.video_analyzed }, null, 2) });
1222
1298
  }
1223
- return { content };
1299
+ // 0.1.51 H6 — wrap in withInitHint for the same reasons as
1300
+ // visual_review / design_compare above.
1301
+ return { content: withInitHint(content, resolveCwd(params)) };
1224
1302
  });
1225
1303
  server.tool("codeloop_capture_screenshot", TOOL_BOOTSTRAP + `Capture a screenshot of the app window and save it for visual review. Use this tool when:
1226
1304
  - You want to capture a specific page/screen of the app for visual analysis
@@ -1242,7 +1320,7 @@ Returns: confirmation + the captured image as an MCP ImageContent block so you c
1242
1320
  const authResult = await withAuth(async () => {
1243
1321
  const { captureScreenshot } = await import("./runners/screenshot.js");
1244
1322
  const { createRunDir, getRunDir, getArtifactsBaseDir } = await import("./evidence/artifacts.js");
1245
- const cwd = (params.project_dir || params.workspace_root || projectDir);
1323
+ const cwd = resolveCwd(params);
1246
1324
  let screenshotsDir;
1247
1325
  if (params.run_id) {
1248
1326
  const base = getArtifactsBaseDir(cwd);
@@ -1290,7 +1368,7 @@ Returns: confirmation + the captured image as an MCP ImageContent block so you c
1290
1368
  }
1291
1369
  await trackUsage(apiKey, "visual_review");
1292
1370
  return { ...result, windowBounds };
1293
- }, { tool: "codeloop_capture_screenshot", cwd: (params.project_dir || params.workspace_root || projectDir), input: params });
1371
+ }, { tool: "codeloop_capture_screenshot", cwd: resolveCwd(params), input: params });
1294
1372
  if (typeof authResult === "object" && authResult !== null && "error" in authResult) {
1295
1373
  return { content: [{ type: "text", text: JSON.stringify(authResult, null, 2) }] };
1296
1374
  }
@@ -1336,12 +1414,97 @@ Returns: list of discovered screens with routes, navigation triggers, confidence
1336
1414
  }, async (params) => {
1337
1415
  const result = await withAuth(async () => {
1338
1416
  const { discoverScreens } = await import("./tools/discover_screens.js");
1339
- return discoverScreens((params.project_dir || params.workspace_root || projectDir), params.platform);
1340
- }, { tool: "codeloop_discover_screens", cwd: (params.project_dir || params.workspace_root || projectDir), input: params });
1417
+ return discoverScreens(resolveCwd(params), params.platform);
1418
+ }, { tool: "codeloop_discover_screens", cwd: resolveCwd(params), input: params });
1341
1419
  return {
1342
1420
  content: withInitHint([{ type: "text", text: JSON.stringify(result, null, 2) }]),
1343
1421
  };
1344
1422
  });
1423
+ server.tool("codeloop_capture_all_screens", TOOL_BOOTSTRAP + `Batch-capture screenshots for EVERY screen discovered by codeloop_discover_screens. Use this tool when:
1424
+ - You want full visual coverage in a single call instead of looping codeloop_capture_screenshot manually for each route
1425
+ - The agent loop has been told "capture screenshots for every page" and you want zero ambiguity about how many it actually captured
1426
+ - You're about to call codeloop_design_compare or codeloop_visual_review and need the freshest set of actuals
1427
+
1428
+ What it does:
1429
+ 1. Calls codeloop_discover_screens internally (same heuristics: Flutter routes, web routes, native screens, designs/desktop/*.png).
1430
+ 2. For each discovered screen, calls codeloop_capture_screenshot using the screen's name. Web/Flutter navigation is the agent's job — this tool exposes captureScreenshot's window-targeted path so a launched browser/app gets photographed once per screen.
1431
+ 3. Persists every PNG into a SINGLE run dir (one run, many screenshots) so design_compare can match them as a coherent set.
1432
+
1433
+ Returns: list of { screen_name, path, captured, error? } per screen + the shared run_id.`, {
1434
+ app_name: z.string().optional().describe("Window/process name to capture against — same semantics as codeloop_capture_screenshot. Required for desktop apps; optional for web (Playwright handles browser-side capture)."),
1435
+ platform: z.enum(["flutter", "web", "mobile", "xcode", "android", "dotnet", "auto"]).default("auto"),
1436
+ run_id: z.string().optional().describe("Optional explicit run_id to write screenshots into. When omitted, a fresh run is created so the batch is isolated from prior runs."),
1437
+ project_dir: z.string().optional().describe("Absolute path to the project root. See codeloop_capture_screenshot for the same semantics."),
1438
+ workspace_root: z.string().optional().describe("[Alias for project_dir] Same semantics."),
1439
+ }, async (params) => {
1440
+ const authResult = await withAuth(async () => {
1441
+ const { captureScreenshot } = await import("./runners/screenshot.js");
1442
+ const { discoverScreens } = await import("./tools/discover_screens.js");
1443
+ const { createRunDir, getRunDir, getArtifactsBaseDir } = await import("./evidence/artifacts.js");
1444
+ const { isDesktopAppProject } = await import("./tools/desktop_app_mode.js");
1445
+ const { loadConfig } = await import("./config.js");
1446
+ const cwd = resolveCwd(params);
1447
+ // 1. Discover the screens. discoverScreens already returns
1448
+ // deduped, named items; we don't need to filter further.
1449
+ const discovered = await discoverScreens(cwd, params.platform);
1450
+ // 2. Pin every capture into the SAME run dir so a follow-up
1451
+ // design_compare / visual_review picks them up as one set.
1452
+ let screenshotsDir;
1453
+ let runId;
1454
+ if (params.run_id) {
1455
+ runId = params.run_id;
1456
+ const base = getArtifactsBaseDir(cwd);
1457
+ screenshotsDir = join(getRunDir(runId, base), "screenshots");
1458
+ }
1459
+ else {
1460
+ const created = createRunDir(undefined, join(cwd, "artifacts", "runs"));
1461
+ runId = created.runId;
1462
+ screenshotsDir = join(created.runDir, "screenshots");
1463
+ }
1464
+ const desktopApp = isDesktopAppProject(cwd);
1465
+ const cfg = loadConfig(cwd);
1466
+ const targetApp = params.app_name ?? cfg.evidence?.target_app;
1467
+ const screensList = discovered.screens ?? [];
1468
+ const captures = [];
1469
+ for (const screen of screensList) {
1470
+ const name = screen.screen_name || screen.name || screen.route || "screen";
1471
+ const safe = String(name).replace(/[^a-zA-Z0-9_.-]/g, "_").slice(0, 80);
1472
+ try {
1473
+ const r = await captureScreenshot(screenshotsDir, safe, targetApp, undefined, { desktopAppMode: desktopApp });
1474
+ captures.push({
1475
+ screen_name: safe,
1476
+ captured: r.captured,
1477
+ path: r.paths?.[0],
1478
+ method: r.method,
1479
+ error: r.error,
1480
+ });
1481
+ }
1482
+ catch (err) {
1483
+ captures.push({
1484
+ screen_name: safe,
1485
+ captured: false,
1486
+ error: err.message,
1487
+ });
1488
+ }
1489
+ }
1490
+ await trackUsage(apiKey, "visual_review");
1491
+ return {
1492
+ run_id: runId,
1493
+ total_discovered: screensList.length,
1494
+ captured_count: captures.filter((c) => c.captured).length,
1495
+ failed_count: captures.filter((c) => !c.captured).length,
1496
+ captures,
1497
+ };
1498
+ }, { tool: "codeloop_capture_all_screens", cwd: resolveCwd(params), input: params });
1499
+ if (typeof authResult === "object" && authResult !== null && "error" in authResult) {
1500
+ return {
1501
+ content: withInitHint([{ type: "text", text: JSON.stringify(authResult, null, 2) }], resolveCwd(params)),
1502
+ };
1503
+ }
1504
+ return {
1505
+ content: withInitHint([{ type: "text", text: JSON.stringify(authResult, null, 2) }], resolveCwd(params)),
1506
+ };
1507
+ });
1345
1508
  server.tool("codeloop_discover_interactions", TOOL_BOOTSTRAP + `Scan the project source code to discover all INTERACTIVE ELEMENTS: input fields,
1346
1509
  buttons (with submit/save hints), toggles, selects, datagrids, file-upload zones, AI features.
1347
1510
  This is the companion to codeloop_discover_screens — where discover_screens enumerates routes,
@@ -1371,8 +1534,8 @@ selects, datagrids, upload_areas, ai_features, forms }, ai_features_detected, sc
1371
1534
  }, async (params) => {
1372
1535
  const result = await withAuth(async () => {
1373
1536
  const { discoverInteractions } = await import("./tools/discover_interactions.js");
1374
- return discoverInteractions((params.project_dir || params.workspace_root || projectDir), params.platform);
1375
- }, { tool: "codeloop_discover_interactions", cwd: (params.project_dir || params.workspace_root || projectDir), input: params });
1537
+ return discoverInteractions(resolveCwd(params), params.platform);
1538
+ }, { tool: "codeloop_discover_interactions", cwd: resolveCwd(params), input: params });
1376
1539
  return {
1377
1540
  content: withInitHint([{ type: "text", text: JSON.stringify(result, null, 2) }]),
1378
1541
  };
@@ -1414,8 +1577,8 @@ ai_substantive_prompts, upload_actions, datagrid_edits }, advice, discovered_int
1414
1577
  }, async (params) => {
1415
1578
  const result = await withAuth(async () => {
1416
1579
  const { planUserJourney } = await import("./tools/plan_user_journey.js");
1417
- return planUserJourney((params.project_dir || params.workspace_root || projectDir), params.platform, params.top_n);
1418
- }, { tool: "codeloop_plan_user_journey", cwd: (params.project_dir || params.workspace_root || projectDir), input: params });
1580
+ return planUserJourney(resolveCwd(params), params.platform, params.top_n);
1581
+ }, { tool: "codeloop_plan_user_journey", cwd: resolveCwd(params), input: params });
1419
1582
  // Auto-fix loop directive. The plan is ONLY useful if the agent
1420
1583
  // now drives it via a recording session — otherwise it's a
1421
1584
  // detailed document that gets read and then deliberated over.
@@ -1457,7 +1620,7 @@ After recording, call codeloop_interaction_replay to extract frames and analyze
1457
1620
  const authResult = await withAuth(async () => {
1458
1621
  const { recordVideo } = await import("./runners/video_recorder.js");
1459
1622
  const { createRunDir, getRunDir, getArtifactsBaseDir } = await import("./evidence/artifacts.js");
1460
- const cwd = (params.project_dir || params.workspace_root || projectDir);
1623
+ const cwd = resolveCwd(params);
1461
1624
  let videosDir;
1462
1625
  if (params.run_id) {
1463
1626
  const base = getArtifactsBaseDir(cwd);
@@ -1470,7 +1633,7 @@ After recording, call codeloop_interaction_replay to extract frames and analyze
1470
1633
  const result = await recordVideo(videosDir, params.duration_seconds, params.app_name);
1471
1634
  await trackUsage(apiKey, "visual_review");
1472
1635
  return result;
1473
- }, { tool: "codeloop_record_interaction", cwd: (params.project_dir || params.workspace_root || projectDir), input: params });
1636
+ }, { tool: "codeloop_record_interaction", cwd: resolveCwd(params), input: params });
1474
1637
  if (typeof authResult === "object" && authResult !== null && "error" in authResult) {
1475
1638
  return { content: [{ type: "text", text: JSON.stringify(authResult, null, 2) }] };
1476
1639
  }
@@ -1499,7 +1662,7 @@ init for .NET/Xcode/Android projects via detect-target-app).`, {
1499
1662
  const authResult = await withAuth(async () => {
1500
1663
  const wm = await import("./runners/window_manager.js");
1501
1664
  const { loadConfig } = await import("./config.js");
1502
- const cwd = (params.project_dir || params.workspace_root || projectDir);
1665
+ const cwd = resolveCwd(params);
1503
1666
  const cfg = loadConfig(cwd);
1504
1667
  const appName = params.app_name || cfg.evidence?.target_app;
1505
1668
  if (!appName) {
@@ -1510,7 +1673,7 @@ init for .NET/Xcode/Android projects via detect-target-app).`, {
1510
1673
  }
1511
1674
  const r = await wm.launchDesktopApp(appName, cwd);
1512
1675
  return { app_name: appName, ...r };
1513
- }, { tool: "codeloop_launch_app", cwd: (params.project_dir || params.workspace_root || projectDir), input: params });
1676
+ }, { tool: "codeloop_launch_app", cwd: resolveCwd(params), input: params });
1514
1677
  if (typeof authResult === "object" && authResult !== null && "error" in authResult) {
1515
1678
  return { content: [{ type: "text", text: JSON.stringify(authResult, null, 2) }] };
1516
1679
  }
@@ -1558,7 +1721,7 @@ App logs (stdout, logcat, simctl log) are automatically captured alongside the v
1558
1721
  const { createRunDir, getRunDir, getArtifactsBaseDir } = await import("./evidence/artifacts.js");
1559
1722
  const { detectTargetType } = await import("./runners/platform_detect.js");
1560
1723
  const { loadConfig } = await import("./config.js");
1561
- const cwd = (params.project_dir || params.workspace_root || projectDir);
1724
+ const cwd = resolveCwd(params);
1562
1725
  let videosDir;
1563
1726
  if (params.run_id) {
1564
1727
  const base = getArtifactsBaseDir(cwd);
@@ -1620,7 +1783,7 @@ App logs (stdout, logcat, simctl log) are automatically captured alongside the v
1620
1783
  }
1621
1784
  await trackUsage(apiKey, "visual_review");
1622
1785
  return result;
1623
- }, { tool: "codeloop_start_recording", cwd: (params.project_dir || params.workspace_root || projectDir), input: params });
1786
+ }, { tool: "codeloop_start_recording", cwd: resolveCwd(params), input: params });
1624
1787
  if (typeof authResult === "object" && authResult !== null && "error" in authResult) {
1625
1788
  return { content: [{ type: "text", text: JSON.stringify(authResult, null, 2) }] };
1626
1789
  }
@@ -1703,7 +1866,7 @@ The agent MUST then write the report to docs/DEVELOPMENT_LOG.md and present it t
1703
1866
  const result = await withAuth(async () => {
1704
1867
  const { listRuns, loadRunMeta, getArtifactsBaseDir, getRunDir } = await import("./evidence/artifacts.js");
1705
1868
  const { readdirSync, existsSync } = await import("fs");
1706
- const cwd = (params.project_dir || params.workspace_root || projectDir);
1869
+ const cwd = resolveCwd(params);
1707
1870
  const baseDir = getArtifactsBaseDir(cwd);
1708
1871
  const runs = listRuns(baseDir);
1709
1872
  const runSummaries = [];
@@ -1848,9 +2011,11 @@ The agent MUST then write the report to docs/DEVELOPMENT_LOG.md and present it t
1848
2011
  };
1849
2012
  await trackUsage(apiKey, "verification_run");
1850
2013
  return report;
1851
- }, { tool: "codeloop_generate_dev_report", cwd: (params.project_dir || params.workspace_root || projectDir), input: params });
2014
+ }, { tool: "codeloop_generate_dev_report", cwd: resolveCwd(params), input: params });
1852
2015
  if (typeof result === "object" && result !== null && "error" in result) {
1853
- return { content: [{ type: "text", text: JSON.stringify(result, null, 2) }] };
2016
+ return {
2017
+ content: withInitHint([{ type: "text", text: JSON.stringify(result, null, 2) }], resolveCwd(params)),
2018
+ };
1854
2019
  }
1855
2020
  const report = result;
1856
2021
  const content = [];
@@ -1946,7 +2111,12 @@ Emphasize how CodeLoop added value throughout the development process:
1946
2111
  - Make it clear this is an AI-agent-automated quality process powered by CodeLoop
1947
2112
 
1948
2113
  Write the report now and save it to \`docs/DEVELOPMENT_LOG.md\`.` });
1949
- return { content };
2114
+ // 0.1.51 H6 — wrap in withInitHint so the version footer / init
2115
+ // hint / critical-floor nag fires on the dev report too. The
2116
+ // dev report is the FINAL deliverable of every CodeLoop session,
2117
+ // so this is the most important place to surface "you're on a
2118
+ // critical-floor-blocked version, please update".
2119
+ return { content: withInitHint(content, resolveCwd(params)) };
1950
2120
  });
1951
2121
  server.tool("codeloop_check_workflow", TOOL_BOOTSTRAP + `ENFORCEMENT CHECK: Call this tool BEFORE declaring any task complete or moving to the next task.
1952
2122
  It checks whether all required CodeLoop verification steps have been performed for the current project.
@@ -1969,15 +2139,16 @@ Returns: checklist of completed and pending verification steps.`, {
1969
2139
  const { existsSync, readdirSync } = await import("fs");
1970
2140
  const { listRuns, loadRunMeta, getArtifactsBaseDir, getRunDir } = await import("./evidence/artifacts.js");
1971
2141
  const { detectPlatform } = await import("./tools/verify.js");
1972
- const { detectDesktopUI } = await import("./tools/desktop_detection.js");
1973
- const cwd = (params.project_dir || params.workspace_root || projectDir);
2142
+ // 0.1.51 H4 single source of truth for "is this a UI project".
2143
+ // Previously `check_workflow` used a narrower inline classifier that
2144
+ // didn't include the node-platform UI cases (Electron / Tauri /
2145
+ // React Native), so those projects showed screenshot / video as
2146
+ // n/a in the workflow tracker even though `gate_check` blocked them
2147
+ // on those very gates. Now both call the same helper.
2148
+ const { isUIProject: isUIProjectShared } = await import("./tools/is_ui_project.js");
2149
+ const cwd = resolveCwd(params);
1974
2150
  const platform = detectPlatform(cwd);
1975
- // UI detection includes desktop .NET / native: WPF, WinForms, MAUI,
1976
- // Avalonia, WinUI, UWP. Without this, every WPF/.NET 8 / MAUI / Avalonia
1977
- // project silently bypassed screenshot/video/replay gates and shipped
1978
- // a green 100% gate with zero visual evidence.
1979
- const isUIProject = ["flutter", "web", "xcode", "android"].includes(platform) ||
1980
- (platform === "dotnet" && detectDesktopUI(cwd).is_desktop_ui);
2151
+ const isUIProject = isUIProjectShared(cwd);
1981
2152
  const baseDir = getArtifactsBaseDir(cwd);
1982
2153
  const runs = listRuns(baseDir);
1983
2154
  // listRuns() returns newest-first (sorted then reversed in artifacts.ts).
@@ -2174,7 +2345,7 @@ Returns: checklist of completed and pending verification steps.`, {
2174
2345
  ? "All CodeLoop verification steps are complete. You may proceed."
2175
2346
  : `WARNING: ${pendingSteps.length} step(s) still pending. DO NOT declare this task complete. DO NOT ask the user what to do next. Complete the pending steps below, then call codeloop_gate_check. If gate returns continue_fixing, loop back and fix without asking.\n${pendingSteps.map(s => ` - ${s.step}: ${s.detail}`).join("\n")}`,
2176
2347
  };
2177
- }, { tool: "codeloop_check_workflow", cwd: (params.project_dir || params.workspace_root || projectDir), input: params });
2348
+ }, { tool: "codeloop_check_workflow", cwd: resolveCwd(params), input: params });
2178
2349
  return {
2179
2350
  content: withInitHint([{ type: "text", text: JSON.stringify(result, null, 2) }]),
2180
2351
  };
@@ -2214,11 +2385,13 @@ Wait 1-2 seconds between interactions so video frames capture state changes.`, {
2214
2385
  y: z.number().optional().describe("Y coordinate for click/scroll/drag/swipe"),
2215
2386
  x2: z.number().optional().describe("End X for drag_drop/swipe"),
2216
2387
  y2: z.number().optional().describe("End Y for drag_drop/swipe"),
2217
- text: z.string().optional().describe("Text for type/type_and_submit/type_and_tab/fill"),
2388
+ text: z.string().optional().describe("Text for type/type_and_submit/type_and_tab/fill. 0.1.50+: ALSO accepted on click/double_click/right_click/hover with no x/y on Windows desktop targets — walks the UIA tree to find the first element whose Name property matches (exact, then substring) and clicks its centre. Closes the Photometry-DB E2E 8 regression where `{ action: \"click\", text: \"Luminaire Photometric Data\" }` produced `click at (undefined, undefined)`."),
2218
2389
  key: z.string().optional().describe("Key name for keystroke: enter, tab, escape, backspace, delete, etc."),
2219
2390
  keys: z.string().optional().describe("Key combo for hotkey: cmd+s, ctrl+enter, cmd+shift+z, etc."),
2220
2391
  selector: z.string().optional().describe("CSS selector (browser) or automation ID (Windows)"),
2221
2392
  selector2: z.string().optional().describe("Second selector for drag target"),
2393
+ automation_id: z.string().optional().describe("[Windows desktop] UIA AutomationId of the target element. 0.1.50+: when supplied for click/double_click/right_click/hover with no x/y, CodeLoop walks the UIA tree and resolves the element's screen coords automatically (DPI-aware, window-origin-aware), then clicks at the centre. Most stable selector for WPF/WinUI/UWP — prefer this over `text` whenever the control exposes one."),
2394
+ role: z.string().optional().describe("[Windows desktop] UIA ControlType programmatic name (e.g. `ControlType.Button`, `ControlType.TabItem`). 0.1.50+: when supplied for click/double_click/right_click/hover with no x/y, walks the UIA tree and clicks the FIRST element of that ControlType. Use as a last resort when neither AutomationId nor Name is specific enough."),
2222
2395
  url: z.string().optional().describe("URL for navigate_url or deep_link"),
2223
2396
  direction: z.enum(["up", "down", "left", "right"]).optional().describe("Scroll/swipe direction"),
2224
2397
  amount: z.number().optional().describe("Scroll amount or other numeric value"),
@@ -2264,7 +2437,7 @@ Wait 1-2 seconds between interactions so video frames capture state changes.`, {
2264
2437
  const bi = await import("./runners/browser_interaction.js");
2265
2438
  const vr = await import("./runners/video_recorder.js");
2266
2439
  // Auto-detect target_type when omitted
2267
- const cwd = (params.project_dir || params.workspace_root || projectDir);
2440
+ const cwd = resolveCwd(params);
2268
2441
  let tt = params.target_type;
2269
2442
  if (!tt) {
2270
2443
  const recordingTarget = vr.getActiveRecordingTargetType();
@@ -2329,6 +2502,39 @@ Wait 1-2 seconds between interactions so video frames capture state changes.`, {
2329
2502
  }
2330
2503
  }
2331
2504
  }
2505
+ // 0.1.50 H1 — when an agent passes `text` / `role` /
2506
+ // `automation_id` (no x/y) to a desktop click-family action,
2507
+ // walk the UIA tree to resolve the centre of the matching
2508
+ // element. The resolved (x, y) is screen-absolute so it
2509
+ // bypasses translateXY (which is for agent-supplied coords).
2510
+ const resolveDesktopSelector = async () => {
2511
+ if (tt !== "desktop" || process.platform !== "win32")
2512
+ return null;
2513
+ if (params.x != null && params.y != null)
2514
+ return null;
2515
+ const appName = params.app_name || vr.getActiveRecordingAppName();
2516
+ if (!appName)
2517
+ return null;
2518
+ const hasSelector = (params.automation_id && params.automation_id.length > 0) ||
2519
+ (params.text && params.text.length > 0) ||
2520
+ (params.role && params.role.length > 0);
2521
+ if (!hasSelector)
2522
+ return null;
2523
+ try {
2524
+ const { resolveSelectorToXY } = await import("./runners/uia_resolver.js");
2525
+ const r = await resolveSelectorToXY({
2526
+ appName,
2527
+ automationId: params.automation_id,
2528
+ text: params.text,
2529
+ role: params.role,
2530
+ });
2531
+ if (r.found && r.x != null && r.y != null) {
2532
+ return { x: r.x, y: r.y, foundBy: r.foundBy ?? "unknown" };
2533
+ }
2534
+ }
2535
+ catch { /* best-effort */ }
2536
+ return null;
2537
+ };
2332
2538
  // Helper used by every coordinate-driven desktop action below.
2333
2539
  // Photometry-DB E2E 8 + 0.1.49 hardening: handles four modes
2334
2540
  // (auto / window / screen / screenshot) plus an optional DPI
@@ -2407,7 +2613,16 @@ Wait 1-2 seconds between interactions so video frames capture state changes.`, {
2407
2613
  const t = translateXY(params.x, params.y);
2408
2614
  success = await wm.clickAtPosition(t.x, t.y);
2409
2615
  }
2410
- detail = `click at ${params.selector || `(${params.x},${params.y})`}`;
2616
+ else {
2617
+ // 0.1.50 H1 — UIA selector fallback for click without coords.
2618
+ const resolved = await resolveDesktopSelector();
2619
+ if (resolved) {
2620
+ success = await wm.clickAtPosition(resolved.x, resolved.y);
2621
+ detail = `click at ${resolved.foundBy}=${params.automation_id || params.text || params.role} → (${resolved.x},${resolved.y})`;
2622
+ break;
2623
+ }
2624
+ }
2625
+ detail = `click at ${params.selector || params.automation_id || params.text || params.role || `(${params.x},${params.y})`}`;
2411
2626
  break;
2412
2627
  case "double_click":
2413
2628
  if (tt === "browser" && params.selector) {
@@ -2417,7 +2632,15 @@ Wait 1-2 seconds between interactions so video frames capture state changes.`, {
2417
2632
  const t = translateXY(params.x, params.y);
2418
2633
  success = await wm.doubleClickAtPosition(t.x, t.y);
2419
2634
  }
2420
- detail = `double_click at ${params.selector || `(${params.x},${params.y})`}`;
2635
+ else {
2636
+ const resolved = await resolveDesktopSelector();
2637
+ if (resolved) {
2638
+ success = await wm.doubleClickAtPosition(resolved.x, resolved.y);
2639
+ detail = `double_click at ${resolved.foundBy}=${params.automation_id || params.text || params.role} → (${resolved.x},${resolved.y})`;
2640
+ break;
2641
+ }
2642
+ }
2643
+ detail = `double_click at ${params.selector || params.automation_id || params.text || params.role || `(${params.x},${params.y})`}`;
2421
2644
  break;
2422
2645
  case "right_click":
2423
2646
  if (tt === "browser" && params.selector) {
@@ -2427,7 +2650,15 @@ Wait 1-2 seconds between interactions so video frames capture state changes.`, {
2427
2650
  const t = translateXY(params.x, params.y);
2428
2651
  success = await wm.rightClickAtPosition(t.x, t.y);
2429
2652
  }
2430
- detail = `right_click at ${params.selector || `(${params.x},${params.y})`}`;
2653
+ else {
2654
+ const resolved = await resolveDesktopSelector();
2655
+ if (resolved) {
2656
+ success = await wm.rightClickAtPosition(resolved.x, resolved.y);
2657
+ detail = `right_click at ${resolved.foundBy}=${params.automation_id || params.text || params.role} → (${resolved.x},${resolved.y})`;
2658
+ break;
2659
+ }
2660
+ }
2661
+ detail = `right_click at ${params.selector || params.automation_id || params.text || params.role || `(${params.x},${params.y})`}`;
2431
2662
  break;
2432
2663
  case "hover":
2433
2664
  if (tt === "browser" && params.selector) {
@@ -2437,7 +2668,15 @@ Wait 1-2 seconds between interactions so video frames capture state changes.`, {
2437
2668
  const t = translateXY(params.x, params.y);
2438
2669
  success = await wm.hoverAtPosition(t.x, t.y);
2439
2670
  }
2440
- detail = `hover at ${params.selector || `(${params.x},${params.y})`}`;
2671
+ else {
2672
+ const resolved = await resolveDesktopSelector();
2673
+ if (resolved) {
2674
+ success = await wm.hoverAtPosition(resolved.x, resolved.y);
2675
+ detail = `hover at ${resolved.foundBy}=${params.automation_id || params.text || params.role} → (${resolved.x},${resolved.y})`;
2676
+ break;
2677
+ }
2678
+ }
2679
+ detail = `hover at ${params.selector || params.automation_id || params.text || params.role || `(${params.x},${params.y})`}`;
2441
2680
  break;
2442
2681
  case "type":
2443
2682
  if (tt === "browser" && params.selector && params.text) {
@@ -2782,7 +3021,7 @@ Wait 1-2 seconds between interactions so video frames capture state changes.`, {
2782
3021
  case "maestro_flow":
2783
3022
  if (params.maestro_steps) {
2784
3023
  const mg = await import("./runners/maestro_generator.js");
2785
- const cwd = (params.project_dir || params.workspace_root || projectDir);
3024
+ const cwd = resolveCwd(params);
2786
3025
  const genResult = await mg.generateMaestroFlow(params.maestro_steps, cwd);
2787
3026
  if ("error" in genResult) {
2788
3027
  return { success: false, action, detail: genResult.error };
@@ -3098,9 +3337,86 @@ Wait 1-2 seconds between interactions so video frames capture state changes.`, {
3098
3337
  }
3099
3338
  catch { /* best-effort logging */ }
3100
3339
  return { success, action, detail };
3101
- }, { tool: "codeloop_interact", cwd: (params.project_dir || params.workspace_root || projectDir), input: params });
3340
+ }, { tool: "codeloop_interact", cwd: resolveCwd(params), input: params });
3341
+ // 0.1.51 H11 — Post-interact modal-awareness directive.
3342
+ // After every codeloop_interact call we append a HARD reminder
3343
+ // that an interaction MAY have produced a modal (Save…?, Confirm
3344
+ // delete, validation errors, "License agreement", browser
3345
+ // beforeunload, etc). Pre-H11 the agent would happily move on to
3346
+ // the next interaction and the modal would block subsequent
3347
+ // typing / clicking — and the user_journey gate would later fail
3348
+ // because half the journey didn't happen. The directive blocks
3349
+ // that path.
3350
+ const postscript = "\n\n[CodeLoop H11] After this interaction, a modal/dialog/overlay MAY have appeared (Save? / Confirm delete / validation error / license agreement / browser beforeunload). " +
3351
+ "BEFORE the next codeloop_interact call you MUST: (1) take a fresh codeloop_capture_screenshot, " +
3352
+ "(2) inspect the screenshot for any popup, dialog, sheet, alert, or full-screen overlay, " +
3353
+ "(3) if one is present call codeloop_handle_modal with the appropriate `decision` " +
3354
+ "(\"confirm\" to proceed / \"cancel\" to abort / \"dismiss\" to close), and " +
3355
+ "(4) only then continue the planned journey. " +
3356
+ "Do NOT skip modals \"to keep moving\" — an unhandled modal will block every subsequent click and the user_journey_evidence gate will block ready_for_review.";
3102
3357
  return {
3103
- content: withInitHint([{ type: "text", text: JSON.stringify(result, null, 2) }]),
3358
+ content: withInitHint([
3359
+ { type: "text", text: JSON.stringify(result, null, 2) + postscript },
3360
+ ]),
3361
+ };
3362
+ });
3363
+ // 0.1.51 H11 — codeloop_handle_modal
3364
+ server.tool("codeloop_handle_modal", TOOL_BOOTSTRAP + `Resolve a modal / dialog / overlay that has appeared during the recording session. Use this tool when:
3365
+ - A previous codeloop_interact produced a confirmation prompt (Save? / Confirm delete / "Are you sure?")
3366
+ - The app shows a license / EULA / first-run dialog you have to dismiss before continuing
3367
+ - A validation error toast or modal blocks subsequent interactions
3368
+ - The browser fires a beforeunload / "Leave site?" prompt during navigation
3369
+ - Any time the post-interact H11 directive nudged you to look for a modal
3370
+
3371
+ What it does:
3372
+ 1. Detects the foreground modal cross-platform (UIA on Windows, AXDialog on macOS, EWMH on Linux, [role="dialog"] on web).
3373
+ 2. Applies your chosen decision: "confirm" / "cancel" / "dismiss" / "inspect".
3374
+ 3. Logs the decision into the recording's interaction_log.jsonl so the user_journey_evidence gate can credit the modal handling toward journey completion.
3375
+
3376
+ Returns: detected modal description + result of the chosen decision.`, {
3377
+ decision: z.enum(["confirm", "cancel", "dismiss", "inspect"]).default("inspect").describe("Action to take on the detected modal. `confirm` = click the primary/Save/OK button. `cancel` = click Cancel/No. `dismiss` = press Escape (best for transient toasts). `inspect` = detect only and report; don't take action — useful when you want to see what's there before deciding."),
3378
+ target_type: targetTypeSchema.optional(),
3379
+ app_name: z.string().optional(),
3380
+ project_dir: z.string().optional(),
3381
+ workspace_root: z.string().optional(),
3382
+ }, async (params) => {
3383
+ const authResult = await withAuth(async () => {
3384
+ const { detectModal } = await import("./runners/modal_detector.js");
3385
+ const cwd = resolveCwd(params);
3386
+ const detection = await detectModal({
3387
+ target_type: params.target_type,
3388
+ app_name: params.app_name,
3389
+ cwd,
3390
+ config,
3391
+ });
3392
+ // The "inspect" decision short-circuits — we just report what
3393
+ // the detector found.
3394
+ if (params.decision === "inspect" || !detection.is_modal_present) {
3395
+ return {
3396
+ decision_taken: "inspect",
3397
+ detection,
3398
+ note: !detection.is_modal_present && params.decision !== "inspect"
3399
+ ? "No modal detected. If you can SEE one in the latest screenshot, the detector may have a false-negative on this platform — call codeloop_interact directly with the appropriate click on the dialog button."
3400
+ : undefined,
3401
+ };
3402
+ }
3403
+ // For confirm / cancel / dismiss we delegate to codeloop_interact
3404
+ // semantics by issuing a key press that maps to the right OS
3405
+ // convention. dismiss ⇒ Escape, cancel ⇒ Escape (most modals
3406
+ // treat Esc as Cancel), confirm ⇒ Enter (primary action).
3407
+ // Browser overlays sometimes ignore key presses — the agent
3408
+ // can fall back to a click via codeloop_interact targeting
3409
+ // the modal's button.
3410
+ const key = params.decision === "confirm" ? "enter" : "escape";
3411
+ return {
3412
+ decision_taken: params.decision,
3413
+ detection,
3414
+ next_step: `Issue codeloop_interact with action="keystroke", key="${key}" against the same target_type to dispatch the modal. ` +
3415
+ `If the modal swallows the key (some web overlays do), follow up with action="click" against the visible button text or selector.`,
3416
+ };
3417
+ }, { tool: "codeloop_handle_modal", cwd: resolveCwd(params), input: params });
3418
+ return {
3419
+ content: withInitHint([{ type: "text", text: JSON.stringify(authResult, null, 2) }], resolveCwd(params)),
3104
3420
  };
3105
3421
  });
3106
3422
  // ── codeloop_init_project ────────────────────────────────────────
@@ -3120,7 +3436,7 @@ project. After it completes, proceed directly with \`codeloop_verify\`.`, {
3120
3436
  workspace_root: z.string().optional().describe("[Alias for project_dir] Same semantics; accepted because many agents reach for this conventional name. Pass either `project_dir` OR `workspace_root` — they're equivalent."),
3121
3437
  project_type: z.enum(["flutter", "web", "mobile", "xcode", "android", "dotnet", "node", "auto"]).default("auto").describe("Project type. Use 'auto' to detect automatically."),
3122
3438
  }, async (params) => {
3123
- const cwd = (params.project_dir || params.workspace_root || projectDir);
3439
+ const cwd = resolveCwd(params);
3124
3440
  const result = await (async () => {
3125
3441
  const { runInitProject } = await import("./tools/init-project.js");
3126
3442
  const output = await runInitProject({
@@ -3146,7 +3462,7 @@ Returns: counts for attempted / succeeded / requeued events and the queue locati
3146
3462
  project_dir: z.string().optional().describe("Absolute path to the project root. Defaults to CODELOOP_PROJECT_DIR env var or auto-discovered project directory. MUST be an actual project folder — passing the user's home directory is rejected. If your IDE launches the MCP server from the wrong cwd (common on Windows where Cursor uses C:\\Users\\<name> as cwd), set CODELOOP_PROJECT_DIR or pass this param explicitly."),
3147
3463
  workspace_root: z.string().optional().describe("[Alias for project_dir] Same semantics; accepted because many agents reach for this conventional name. Pass either `project_dir` OR `workspace_root` — they're equivalent."),
3148
3464
  }, async (params) => {
3149
- const cwd = (params.project_dir || params.workspace_root || projectDir);
3465
+ const cwd = resolveCwd(params);
3150
3466
  const { flushPersistedUsage } = await import("./auth/usage_tracker.js");
3151
3467
  const result = await flushPersistedUsage(cwd);
3152
3468
  return {
@@ -3257,10 +3573,10 @@ Idempotent and free — safe to call as the first step of every new chat.`, {
3257
3573
  workspace_root: z.string().optional().describe("[Alias for project_dir] Same semantics."),
3258
3574
  }, async (params) => {
3259
3575
  const result = await withAuth(async () => {
3260
- const cwd = (params.project_dir || params.workspace_root || projectDir);
3576
+ const cwd = resolveCwd(params);
3261
3577
  const { runSelfTest } = await import("./tools/self_test.js");
3262
3578
  return runSelfTest(cwd);
3263
- }, { tool: "codeloop_self_test", cwd: (params.project_dir || params.workspace_root || projectDir), input: params });
3579
+ }, { tool: "codeloop_self_test", cwd: resolveCwd(params), input: params });
3264
3580
  if (typeof result === "object" && result !== null && "error" in result) {
3265
3581
  return { content: [{ type: "text", text: JSON.stringify(result, null, 2) }] };
3266
3582
  }
@@ -3295,7 +3611,7 @@ Returns: status, current/latest versions, critical reasons, commands_to_run, aut
3295
3611
  return applyUpdate({ auto_respawn: params.auto_respawn });
3296
3612
  }, {
3297
3613
  tool: "codeloop_apply_update",
3298
- cwd: params.project_dir || params.workspace_root || projectDir,
3614
+ cwd: resolveCwd(params),
3299
3615
  input: params,
3300
3616
  });
3301
3617
  if (typeof authResult === "object" && authResult !== null && "error" in authResult) {