codeloop-mcp-server 0.1.53 → 0.1.55

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. package/dist/auth/critical_floors.d.ts.map +1 -1
  2. package/dist/auth/critical_floors.js +8 -0
  3. package/dist/auth/critical_floors.js.map +1 -1
  4. package/dist/evidence/anti_rationalisation.d.ts.map +1 -1
  5. package/dist/evidence/anti_rationalisation.js +15 -0
  6. package/dist/evidence/anti_rationalisation.js.map +1 -1
  7. package/dist/evidence/binary_freshness.d.ts +21 -0
  8. package/dist/evidence/binary_freshness.d.ts.map +1 -0
  9. package/dist/evidence/binary_freshness.js +168 -0
  10. package/dist/evidence/binary_freshness.js.map +1 -0
  11. package/dist/evidence/change_coverage.d.ts.map +1 -1
  12. package/dist/evidence/change_coverage.js +22 -1
  13. package/dist/evidence/change_coverage.js.map +1 -1
  14. package/dist/evidence/cycle_issues.d.ts +99 -0
  15. package/dist/evidence/cycle_issues.d.ts.map +1 -0
  16. package/dist/evidence/cycle_issues.js +120 -0
  17. package/dist/evidence/cycle_issues.js.map +1 -0
  18. package/dist/evidence/interaction_coverage.d.ts +15 -0
  19. package/dist/evidence/interaction_coverage.d.ts.map +1 -1
  20. package/dist/evidence/interaction_coverage.js +53 -4
  21. package/dist/evidence/interaction_coverage.js.map +1 -1
  22. package/dist/index.d.ts.map +1 -1
  23. package/dist/index.js +360 -5
  24. package/dist/index.js.map +1 -1
  25. package/dist/runners/modal_close_strategies.d.ts +82 -0
  26. package/dist/runners/modal_close_strategies.d.ts.map +1 -0
  27. package/dist/runners/modal_close_strategies.js +226 -0
  28. package/dist/runners/modal_close_strategies.js.map +1 -0
  29. package/dist/runners/modal_detector.d.ts +17 -0
  30. package/dist/runners/modal_detector.d.ts.map +1 -1
  31. package/dist/runners/modal_detector.js +95 -22
  32. package/dist/runners/modal_detector.js.map +1 -1
  33. package/dist/tools/gate_check.d.ts.map +1 -1
  34. package/dist/tools/gate_check.js +57 -0
  35. package/dist/tools/gate_check.js.map +1 -1
  36. package/package.json +1 -1
package/dist/index.js CHANGED
@@ -162,6 +162,18 @@ warmCliCache();
162
162
  // to restart their IDE. See auth/update_check.ts for the full
163
163
  // rationale and opt-out env vars.
164
164
  startUpdateCheck();
165
+ /**
166
+ * 0.1.55 F1/F3 — In-process tracker for "is the same modal description
167
+ * surviving consecutive desktop interactions?". Keyed by the lower-cased
168
+ * app_name (or "<default>" when none). Cleared whenever detectModal
169
+ * returns is_modal_present: false.
170
+ *
171
+ * Lives only as long as the MCP server process. We deliberately do NOT
172
+ * persist this across restarts because a server restart is itself a
173
+ * meaningful event that resets the recurrence-class signal — the cycle
174
+ * issues we've already written to disk continue to gate ready_for_review.
175
+ */
176
+ const modalPersistenceTracker = new Map();
165
177
  const server = new McpServer({
166
178
  name: "codeloop",
167
179
  version: "0.1.14",
@@ -1861,10 +1873,40 @@ App logs (stdout, logcat, simctl log) are automatically captured alongside the v
1861
1873
  autoLaunchSummary = { attempted: true, launched: false, reason: e.message };
1862
1874
  }
1863
1875
  }
1876
+ // 0.1.54 E4 — Binary-vs-source mtime check.
1877
+ // Photometry-DB E2E #11 had bin/Release rebuilt but the recording
1878
+ // ran against publish/PhotometryDB-Beta with stale DLLs. We surface
1879
+ // a HARD directive AND record a binary_mismatch cycle issue so the
1880
+ // gate can block ready_for_review until rebuild + re-record.
1881
+ let binaryFreshnessWarning;
1882
+ let binaryFreshnessDetails;
1883
+ if (targetType === "desktop" && appName) {
1884
+ try {
1885
+ const { checkBinaryFreshness } = await import("./evidence/binary_freshness.js");
1886
+ binaryFreshnessDetails = checkBinaryFreshness({ app_name: appName, cwd });
1887
+ if (binaryFreshnessDetails.stale) {
1888
+ binaryFreshnessWarning = binaryFreshnessDetails.reason;
1889
+ const { recordCycleIssue } = await import("./evidence/cycle_issues.js");
1890
+ await recordCycleIssue(cwd, {
1891
+ kind: "binary_mismatch",
1892
+ target_app: binaryFreshnessDetails.binary_path ?? appName,
1893
+ binary_mtime: binaryFreshnessDetails.binary_mtime ?? "unknown",
1894
+ newest_source_mtime: binaryFreshnessDetails.newest_source_mtime ?? "unknown",
1895
+ newest_source_path: binaryFreshnessDetails.newest_source_path,
1896
+ lag_seconds: binaryFreshnessDetails.lag_seconds ?? 0,
1897
+ });
1898
+ }
1899
+ }
1900
+ catch { /* best-effort */ }
1901
+ }
1864
1902
  const result = await startBackgroundRecording(videosDir, appName ?? "", params.max_duration_seconds, targetType);
1865
1903
  if (autoLaunchSummary) {
1866
1904
  result.auto_launch = autoLaunchSummary;
1867
1905
  }
1906
+ if (binaryFreshnessWarning) {
1907
+ result.binary_freshness_warning = binaryFreshnessWarning;
1908
+ result.binary_freshness = binaryFreshnessDetails;
1909
+ }
1868
1910
  await trackUsage(apiKey, "visual_review");
1869
1911
  return result;
1870
1912
  }, { tool: "codeloop_start_recording", cwd: resolveCwd(params), input: params });
@@ -2043,9 +2085,20 @@ The agent MUST then write the report to docs/DEVELOPMENT_LOG.md and present it t
2043
2085
  }
2044
2086
  }
2045
2087
  }
2046
- // Parse interaction_log.jsonl from each run for the interaction history
2088
+ // Parse interaction_log.jsonl from each run for the interaction history.
2089
+ // 0.1.54 E5 — only count entries that look like real interactions
2090
+ // (must have `action: string` AND `input_args: object`). Photometry-DB
2091
+ // E2E #11 had 27% of action buckets land under "undefined" because
2092
+ // the log mixes interaction entries with replay-frame markers and
2093
+ // browser console_error rollups. Coerce missing/non-string actions
2094
+ // to "unclassified" so the dev-report breakdown stays honest.
2047
2095
  const interactionHistory = [];
2048
2096
  const interactionSummary = { total: 0, succeeded: 0, failed: 0, actions: {}, console_errors: 0 };
2097
+ const isInteractionEntry = (e) => {
2098
+ const hasArgs = e.input_args !== null && typeof e.input_args === "object";
2099
+ const hasAction = typeof e.action === "string" && e.action.length > 0;
2100
+ return hasArgs && hasAction;
2101
+ };
2049
2102
  for (const runId of runs) {
2050
2103
  const runDir = getRunDir(runId, baseDir);
2051
2104
  const iLogPath = join(runDir, "logs", "interaction_log.jsonl");
@@ -2054,15 +2107,21 @@ The agent MUST then write the report to docs/DEVELOPMENT_LOG.md and present it t
2054
2107
  for (const line of lines) {
2055
2108
  try {
2056
2109
  const entry = JSON.parse(line);
2110
+ if (!isInteractionEntry(entry))
2111
+ continue;
2057
2112
  interactionHistory.push({ run_id: runId, ...entry });
2058
2113
  interactionSummary.total++;
2059
2114
  if (entry.success)
2060
2115
  interactionSummary.succeeded++;
2061
2116
  else
2062
2117
  interactionSummary.failed++;
2063
- interactionSummary.actions[entry.action] = (interactionSummary.actions[entry.action] || 0) + 1;
2064
- if (entry.console_errors?.length)
2065
- interactionSummary.console_errors += entry.console_errors.length;
2118
+ const bucket = typeof entry.action === "string" && entry.action.length > 0
2119
+ ? entry.action
2120
+ : "unclassified";
2121
+ interactionSummary.actions[bucket] = (interactionSummary.actions[bucket] || 0) + 1;
2122
+ const ce = entry.console_errors;
2123
+ if (Array.isArray(ce))
2124
+ interactionSummary.console_errors += ce.length;
2066
2125
  }
2067
2126
  catch { /* skip malformed lines */ }
2068
2127
  }
@@ -2071,6 +2130,42 @@ The agent MUST then write the report to docs/DEVELOPMENT_LOG.md and present it t
2071
2130
  // Check for interaction replay results
2072
2131
  const replayDir = join(baseDir, "replay_frames");
2073
2132
  const hasReplayFrames = existsSync(replayDir) && readdirSync(replayDir).length > 0;
2133
+ // 0.1.55 F5 — surface cycle_issues to the dev report so the agent's
2134
+ // generated DEVELOPMENT_LOG.md can't open with "All gates green"
2135
+ // when recurrence-class issues happened mid-cycle. We expose:
2136
+ // - cycle_issues_summary: per-kind counts and resolved/unresolved breakdown
2137
+ // - cycle_issues: full per-issue list (with summaries pre-rendered)
2138
+ // - cycle_issues_directive: a HARD instruction that the report
2139
+ // MUST include a "Bugs Found & Fixed" row per unresolved issue
2140
+ const { loadCycleIssues, summariseCycleIssue } = await import("./evidence/cycle_issues.js");
2141
+ const ci = await loadCycleIssues(cwd);
2142
+ const cycleSummary = ci.issues.reduce((acc, issue) => {
2143
+ const k = issue.kind;
2144
+ if (!acc[k])
2145
+ acc[k] = { total: 0, unresolved: 0 };
2146
+ acc[k].total++;
2147
+ if (ci.unresolved.some((u) => u === issue))
2148
+ acc[k].unresolved++;
2149
+ return acc;
2150
+ }, {});
2151
+ const cycleIssuesEntries = ci.issues.map((issue) => ({
2152
+ kind: issue.kind,
2153
+ summary: summariseCycleIssue(issue),
2154
+ resolved: !ci.unresolved.includes(issue),
2155
+ raw: issue,
2156
+ }));
2157
+ const cycleIssuesDirective = ci.issues.length === 0
2158
+ ? undefined
2159
+ : `[CodeLoop F5] HARD: ${ci.issues.length} recurrence-class issue(s) were recorded during this cycle (` +
2160
+ Object.entries(cycleSummary)
2161
+ .map(([k, v]) => `${k}: ${v.total} total / ${v.unresolved} unresolved`)
2162
+ .join(", ") +
2163
+ `). The DEVELOPMENT_LOG.md you produce MUST include a "CodeLoop Cycle Issues" subsection under "Bugs Found & Fixed" that lists each entry from cycle_issues with: kind, summary, resolution (or "UNRESOLVED — escalated"). ` +
2164
+ `Do NOT open the report with "all gates green" / "everything looks good" — those phrases match the C6 anti-rationalisation scan when cycle_issues exist and will fail the cycle_issues_acknowledged gate. ` +
2165
+ `Specifically enumerate: ${ci.issues
2166
+ .slice(0, 5)
2167
+ .map((i) => `(${i.kind}) ${summariseCycleIssue(i)}`)
2168
+ .join(" | ")}${ci.issues.length > 5 ? " | … (see logs/cycle_issues.jsonl for the full list)" : ""}`;
2074
2169
  const report = {
2075
2170
  project_name: params.project_name,
2076
2171
  project_description: params.project_description || "",
@@ -2092,6 +2187,9 @@ The agent MUST then write the report to docs/DEVELOPMENT_LOG.md and present it t
2092
2187
  interaction_summary: interactionSummary,
2093
2188
  interaction_history: interactionHistory.slice(-50),
2094
2189
  run_timeline: runSummaries,
2190
+ cycle_issues_summary: cycleSummary,
2191
+ cycle_issues: cycleIssuesEntries,
2192
+ cycle_issues_directive: cycleIssuesDirective,
2095
2193
  };
2096
2194
  await trackUsage(apiKey, "verification_run");
2097
2195
  return report;
@@ -3400,6 +3498,145 @@ Wait 1-2 seconds between interactions so video frames capture state changes.`, {
3400
3498
  }
3401
3499
  // Drain browser console errors that occurred during this interaction
3402
3500
  const consoleErrors = tt === "browser" ? bi.drainRecentConsoleErrors() : [];
3501
+ // 0.1.54 E3 + 0.1.55 F1/F3/F4 — Click-effect verification.
3502
+ //
3503
+ // Photometry-DB E2E #11 had:
3504
+ // - clicks marked `success: true` (the dispatch landed) that
3505
+ // nonetheless missed the X close button on a stuck
3506
+ // OpenFolderDialog,
3507
+ // - the same OpenFolderDialog persisting across MULTIPLE clicks
3508
+ // without ever triggering codeloop_handle_modal,
3509
+ // - many of those clicks coming through with NO intent /
3510
+ // description field so the 0.1.54 E3 close-keyword check
3511
+ // silently never fired.
3512
+ //
3513
+ // 0.1.55 widens the verification to fire on EVERY desktop click /
3514
+ // double-click / right-click / escape-keystroke during recording,
3515
+ // tracks per-app modal persistence in-process, and:
3516
+ // 1. Always attaches `modal_present_after` to the log entry when
3517
+ // a modal is detected after the action so the user-journey
3518
+ // gate can score the signal.
3519
+ // 2. Overrides success=false when the agent's intent IS close-y
3520
+ // (preserves 0.1.54 E3 behaviour).
3521
+ // 3. Records a `click_missed_target` cycle issue WHENEVER the
3522
+ // same modal description survives consecutive interactions —
3523
+ // 2 in a row writes the issue regardless of intent, because
3524
+ // two clicks against an unchanged modal is the recurrence
3525
+ // signature from E2E #11.
3526
+ // 4. Records a `modal_close_failed` cycle issue at 3+ consecutive
3527
+ // persistences so cycle_issues_acknowledged blocks the gate
3528
+ // even if the agent never called codeloop_handle_modal.
3529
+ // 5. Escalates the post-interact directive (built later) to a
3530
+ // HARD instruction so the agent must call codeloop_handle_modal
3531
+ // or codeloop_kill_modal_window before the next interact.
3532
+ //
3533
+ // Scope: desktop only. Browser and mobile flows have their own
3534
+ // signal channels (Playwright console errors, adb dumpsys).
3535
+ let clickEffectVerification;
3536
+ let modalPersistenceDirective;
3537
+ const closingIntent = (params.intent ?? params.description ?? params.purpose ?? params.step ?? "").toLowerCase();
3538
+ const isClosingIntent = /\b(close|dismiss|cancel|escape|exit)\b/.test(closingIntent);
3539
+ const isClickyAction = action === "click" || action === "double_click" || action === "right_click";
3540
+ const isEscapeKeystroke = action === "keystroke" && (params.key ?? "").toLowerCase() === "escape";
3541
+ const shouldVerifyClickEffect = success && tt === "desktop" && (isClickyAction || isEscapeKeystroke);
3542
+ if (shouldVerifyClickEffect) {
3543
+ try {
3544
+ await new Promise((resolve) => setTimeout(resolve, 500));
3545
+ const { detectModal } = await import("./runners/modal_detector.js");
3546
+ const trackerKey = (params.app_name || vr.getActiveRecordingAppName() || "<default>").toLowerCase();
3547
+ const detection = await detectModal({
3548
+ target_type: "desktop",
3549
+ app_name: params.app_name || vr.getActiveRecordingAppName() || undefined,
3550
+ cwd,
3551
+ });
3552
+ const { recordCycleIssue } = await import("./evidence/cycle_issues.js");
3553
+ if (detection.is_modal_present) {
3554
+ const desc = detection.modal_description ?? "(unnamed)";
3555
+ const existing = modalPersistenceTracker.get(trackerKey);
3556
+ let consecutive;
3557
+ if (existing && existing.description === desc) {
3558
+ consecutive = existing.consecutive + 1;
3559
+ modalPersistenceTracker.set(trackerKey, {
3560
+ description: desc,
3561
+ consecutive,
3562
+ firstSeenMs: existing.firstSeenMs,
3563
+ modal_kind: detection.modal_kind,
3564
+ });
3565
+ }
3566
+ else {
3567
+ consecutive = 1;
3568
+ modalPersistenceTracker.set(trackerKey, {
3569
+ description: desc,
3570
+ consecutive,
3571
+ firstSeenMs: Date.now(),
3572
+ modal_kind: detection.modal_kind,
3573
+ });
3574
+ }
3575
+ clickEffectVerification = {
3576
+ intent: closingIntent,
3577
+ modal_still_present: true,
3578
+ modal_description: detection.modal_description,
3579
+ modal_kind: detection.modal_kind,
3580
+ consecutive_persistences: consecutive,
3581
+ };
3582
+ // 0.1.54 E3 — close-intent override (unchanged).
3583
+ const isCloseAction = (isClickyAction && isClosingIntent) || isEscapeKeystroke;
3584
+ if (isCloseAction) {
3585
+ success = false;
3586
+ detail = `${detail} | verification: modal still present after intended-close action (${desc})`;
3587
+ await recordCycleIssue(cwd, {
3588
+ kind: "click_missed_target",
3589
+ selector: params.selector,
3590
+ coords: params.x != null && params.y != null ? [params.x, params.y] : undefined,
3591
+ description: closingIntent || `persisted modal: ${desc}`,
3592
+ modal_kind: detection.modal_kind,
3593
+ });
3594
+ }
3595
+ else if (consecutive >= 2) {
3596
+ // 0.1.55 F1 — same modal survived two interactions in a row
3597
+ // even though the agent didn't tag the click with a close
3598
+ // intent. That's the E2E #11 signature: many coordinate
3599
+ // clicks with no intent against a stuck OpenFolderDialog.
3600
+ await recordCycleIssue(cwd, {
3601
+ kind: "click_missed_target",
3602
+ selector: params.selector,
3603
+ coords: params.x != null && params.y != null ? [params.x, params.y] : undefined,
3604
+ description: `persisted modal across ${consecutive} consecutive interactions: ${desc}`,
3605
+ modal_kind: detection.modal_kind,
3606
+ });
3607
+ }
3608
+ // 0.1.55 F3 — at 3+ consecutive persistences, treat the modal
3609
+ // as actively stuck and write a modal_close_failed entry so
3610
+ // the cycle_issues_acknowledged gate fails even when the
3611
+ // agent never called codeloop_handle_modal.
3612
+ if (consecutive >= 3) {
3613
+ await recordCycleIssue(cwd, {
3614
+ kind: "modal_close_failed",
3615
+ modal_kind: detection.modal_kind ?? "custom",
3616
+ modal_description: desc,
3617
+ strategies_tried: [
3618
+ "raw_codeloop_interact_clicks_or_escape_x" + String(consecutive),
3619
+ ],
3620
+ hwnd: detection.hwnd,
3621
+ });
3622
+ }
3623
+ // 0.1.55 F4 — HARD directive for the post-interact postscript.
3624
+ modalPersistenceDirective =
3625
+ `\n\n[CodeLoop F4] HARD: A ${detection.modal_kind ?? "modal"} dialog (${desc}) is STILL present after this interaction ` +
3626
+ `(${consecutive} consecutive interactions have not cleared it). ` +
3627
+ `Stop sending raw clicks / Escape keystrokes against it and call codeloop_handle_modal with decision: "cancel" or "dismiss" — ` +
3628
+ `the multi-strategy ladder (Escape → Alt+F4 → UIA Invoke "Close" → EndDialog) handles file dialogs the keystroke path cannot. ` +
3629
+ `If codeloop_handle_modal returns escalation: "kill_window_required", call codeloop_kill_modal_window with the returned hwnd. ` +
3630
+ `Continuing to ignore this modal will fail the cycle_issues_acknowledged gate and block ready_for_review.`;
3631
+ }
3632
+ else {
3633
+ // Modal cleared — reset the tracker for this app so the next
3634
+ // appearance is counted from 1.
3635
+ modalPersistenceTracker.delete(trackerKey);
3636
+ }
3637
+ }
3638
+ catch { /* best-effort verification */ }
3639
+ }
3403
3640
  // Log interaction result for post-recording analysis
3404
3641
  const interactionEntry = {
3405
3642
  timestamp: new Date().toISOString(),
@@ -3411,6 +3648,8 @@ Wait 1-2 seconds between interactions so video frames capture state changes.`, {
3411
3648
  };
3412
3649
  if (consoleErrors.length > 0)
3413
3650
  interactionEntry.console_errors = consoleErrors;
3651
+ if (clickEffectVerification)
3652
+ interactionEntry.verification = clickEffectVerification;
3414
3653
  try {
3415
3654
  const activeIds = vr.getActiveRecordingIds();
3416
3655
  if (activeIds.length > 0) {
@@ -3428,7 +3667,17 @@ Wait 1-2 seconds between interactions so video frames capture state changes.`, {
3428
3667
  }
3429
3668
  }
3430
3669
  catch { /* best-effort logging */ }
3431
- return { success, action, detail };
3670
+ // 0.1.55 F4 surface the modal-persistence directive as a
3671
+ // side-channel so the postscript builder outside withAuth can
3672
+ // append it to the agent-visible response.
3673
+ const ret = {
3674
+ success,
3675
+ action,
3676
+ detail,
3677
+ };
3678
+ if (modalPersistenceDirective)
3679
+ ret._f4_directive = modalPersistenceDirective;
3680
+ return ret;
3432
3681
  }, { tool: "codeloop_interact", cwd: resolveCwd(params), input: params });
3433
3682
  // 0.1.51 H11 — Post-interact modal-awareness directive.
3434
3683
  // After every codeloop_interact call we append a HARD reminder
@@ -3446,6 +3695,20 @@ Wait 1-2 seconds between interactions so video frames capture state changes.`, {
3446
3695
  "(\"confirm\" to proceed / \"cancel\" to abort / \"dismiss\" to close), and " +
3447
3696
  "(4) only then continue the planned journey. " +
3448
3697
  "Do NOT skip modals \"to keep moving\" — an unhandled modal will block every subsequent click and the user_journey_evidence gate will block ready_for_review.";
3698
+ // 0.1.55 F4 — when desktop click-effect verification detected a
3699
+ // modal that has now persisted across one or more interactions, the
3700
+ // F4 directive is stronger than the soft H11 reminder. Append it
3701
+ // and remove the side-channel field from the JSON-serialised result
3702
+ // so the agent doesn't see internals.
3703
+ {
3704
+ const r = result;
3705
+ const f4 = r && typeof r === "object" ? r._f4_directive : undefined;
3706
+ if (f4) {
3707
+ postscript += f4;
3708
+ if (r)
3709
+ delete r._f4_directive;
3710
+ }
3711
+ }
3449
3712
  // 0.1.52 C4 — Empty-state seeding directive. Heuristic runs against
3450
3713
  // the manifest entry the agent claims to be exercising plus the
3451
3714
  // recent interaction log; when the call looks like a row/cell
@@ -3520,6 +3783,51 @@ Returns: detected modal description + result of the chosen decision.`, {
3520
3783
  : undefined,
3521
3784
  };
3522
3785
  }
3786
+ // 0.1.54 E1 — file dialogs get the multi-strategy close ladder.
3787
+ // Photometry-DB E2E #11 had three .NET 8 OpenFolderDialog instances
3788
+ // that ignored single-keystroke close attempts; the ladder runs
3789
+ // Escape → Alt+F4 → UIA Invoke "Close" → EndDialog and re-detects
3790
+ // between steps. If all 4 fail it surfaces a HARD escalation hint
3791
+ // pointing at codeloop_kill_modal_window.
3792
+ if ((params.decision === "cancel" || params.decision === "dismiss") &&
3793
+ detection.modal_kind === "file_dialog" &&
3794
+ detection.target_type === "desktop") {
3795
+ const { closeModalWithStrategies } = await import("./runners/modal_close_strategies.js");
3796
+ const { recordCycleIssue } = await import("./evidence/cycle_issues.js");
3797
+ const close = await closeModalWithStrategies({
3798
+ initial_detection: detection,
3799
+ app_name: params.app_name,
3800
+ cwd,
3801
+ });
3802
+ if (!close.closed) {
3803
+ await recordCycleIssue(cwd, {
3804
+ kind: "modal_close_failed",
3805
+ modal_kind: detection.modal_kind ?? "custom",
3806
+ modal_description: detection.modal_description,
3807
+ strategies_tried: close.strategies_tried.map((a) => a.strategy),
3808
+ hwnd: close.hwnd,
3809
+ });
3810
+ return {
3811
+ decision_taken: params.decision,
3812
+ detection,
3813
+ close_attempt: close,
3814
+ escalation: "kill_window_required",
3815
+ next_step: `HARD: All 4 close strategies failed (escape, alt_f4, uia_invoke_close, end_dialog). ` +
3816
+ `Call codeloop_kill_modal_window with hwnd="${close.hwnd ?? "<rerun_detect_modal>"}" ` +
3817
+ `to escalate to PostMessage(WM_CLOSE) → TerminateProcess. After kill-window succeeds, ` +
3818
+ `you MUST also call codeloop_start_recording again because the app process was terminated. ` +
3819
+ `This stuck-modal cycle has been written to cycle_issues.jsonl and the gate-check will require you to acknowledge it before reporting ready_for_review.`,
3820
+ };
3821
+ }
3822
+ return {
3823
+ decision_taken: params.decision,
3824
+ detection,
3825
+ close_attempt: close,
3826
+ escalation: "none",
3827
+ next_step: `Modal closed successfully via strategy "${close.strategies_tried.find((a) => a.success)?.strategy ?? "unknown"}". ` +
3828
+ `Continue with codeloop_interact against the application.`,
3829
+ };
3830
+ }
3523
3831
  // For confirm / cancel / dismiss we delegate to codeloop_interact
3524
3832
  // semantics by issuing a key press that maps to the right OS
3525
3833
  // convention. dismiss ⇒ Escape, cancel ⇒ Escape (most modals
@@ -3539,6 +3847,53 @@ Returns: detected modal description + result of the chosen decision.`, {
3539
3847
  content: withInitHint([{ type: "text", text: JSON.stringify(authResult, null, 2) }], resolveCwd(params)),
3540
3848
  };
3541
3849
  });
3850
+ // 0.1.54 E1 — codeloop_kill_modal_window: last-resort modal escalation
3851
+ server.tool("codeloop_kill_modal_window", TOOL_BOOTSTRAP + `Last-resort escalation to kill a stuck modal that codeloop_handle_modal could not close with its 4-strategy ladder (Escape → Alt+F4 → UIA Invoke "Close" → EndDialog).
3852
+
3853
+ This tool is ONLY for the case where codeloop_handle_modal returned escalation: "kill_window_required". It will:
3854
+ 1. PostMessage(WM_CLOSE) to the specific HWND.
3855
+ 2. Wait 2 s and re-detect.
3856
+ 3. If the modal is still up, call TerminateProcess on the owning PID.
3857
+
3858
+ CRITICAL: TerminateProcess will kill the app you are recording. After this tool succeeds you MUST:
3859
+ - Call codeloop_stop_recording (the recording will be partial but evidence is preserved).
3860
+ - Rebuild + re-launch the app and call codeloop_start_recording again.
3861
+ - Resume the user-journey from the start of the affected flow.
3862
+
3863
+ This is a Windows-only tool. macOS / Linux modal handling does not need this escalation.`, {
3864
+ hwnd: z.string().describe("Win32 HWND captured by detectModal / codeloop_handle_modal. Pass the value from detection.hwnd or close_attempt.hwnd."),
3865
+ target_type: targetTypeSchema.optional(),
3866
+ app_name: z.string().optional(),
3867
+ project_dir: z.string().optional(),
3868
+ workspace_root: z.string().optional(),
3869
+ }, async (params) => {
3870
+ const authResult = await withAuth(async () => {
3871
+ const { killModalWindow } = await import("./runners/modal_close_strategies.js");
3872
+ const { recordCycleIssue } = await import("./evidence/cycle_issues.js");
3873
+ const cwd = resolveCwd(params);
3874
+ const result = await killModalWindow({
3875
+ hwnd: params.hwnd,
3876
+ app_name: params.app_name,
3877
+ cwd,
3878
+ });
3879
+ if (result.steps.some((s) => s.step === "terminate_process" && s.success)) {
3880
+ await recordCycleIssue(cwd, {
3881
+ kind: "app_restart_during_recording",
3882
+ reason: "TerminateProcess used to clear stuck modal — recording will be partial",
3883
+ hwnd: params.hwnd,
3884
+ });
3885
+ }
3886
+ return {
3887
+ ...result,
3888
+ next_step: result.closed
3889
+ ? "Modal cleared. Call codeloop_stop_recording (current recording is partial), then rebuild + relaunch the app + codeloop_start_recording before resuming the user-journey."
3890
+ : "WARNING: even after PostMessage(WM_CLOSE) + TerminateProcess the modal is still detected. The host UI may be wedged at the OS level. Consider rebooting the workstation or escalating to manual intervention.",
3891
+ };
3892
+ }, { tool: "codeloop_kill_modal_window", cwd: resolveCwd(params), input: params });
3893
+ return {
3894
+ content: withInitHint([{ type: "text", text: JSON.stringify(authResult, null, 2) }], resolveCwd(params)),
3895
+ };
3896
+ });
3542
3897
  // ── codeloop_init_project ────────────────────────────────────────
3543
3898
  server.tool("codeloop_init_project", TOOL_BOOTSTRAP + `Initialize CodeLoop in a project that hasn't been set up yet. Creates
3544
3899
  \`.codeloop/config.json\`, agent rules, MCP config, \`artifacts/\`, and