codeloop-mcp-server 0.1.54 → 0.1.56

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -162,6 +162,18 @@ warmCliCache();
162
162
  // to restart their IDE. See auth/update_check.ts for the full
163
163
  // rationale and opt-out env vars.
164
164
  startUpdateCheck();
165
+ /**
166
+ * 0.1.55 F1/F3 — In-process tracker for "is the same modal description
167
+ * surviving consecutive desktop interactions?". Keyed by the lower-cased
168
+ * app_name (or "<default>" when none). Cleared whenever detectModal
169
+ * returns is_modal_present: false.
170
+ *
171
+ * Lives only as long as the MCP server process. We deliberately do NOT
172
+ * persist this across restarts because a server restart is itself a
173
+ * meaningful event that resets the recurrence-class signal — the cycle
174
+ * issues we've already written to disk continue to gate ready_for_review.
175
+ */
176
+ const modalPersistenceTracker = new Map();
165
177
  const server = new McpServer({
166
178
  name: "codeloop",
167
179
  version: "0.1.14",
@@ -2118,6 +2130,42 @@ The agent MUST then write the report to docs/DEVELOPMENT_LOG.md and present it t
2118
2130
  // Check for interaction replay results
2119
2131
  const replayDir = join(baseDir, "replay_frames");
2120
2132
  const hasReplayFrames = existsSync(replayDir) && readdirSync(replayDir).length > 0;
2133
+ // 0.1.55 F5 — surface cycle_issues to the dev report so the agent's
2134
+ // generated DEVELOPMENT_LOG.md can't open with "All gates green"
2135
+ // when recurrence-class issues happened mid-cycle. We expose:
2136
+ // - cycle_issues_summary: per-kind counts and resolved/unresolved breakdown
2137
+ // - cycle_issues: full per-issue list (with summaries pre-rendered)
2138
+ // - cycle_issues_directive: a HARD instruction that the report
2139
+ // MUST include a "Bugs Found & Fixed" row per unresolved issue
2140
+ const { loadCycleIssues, summariseCycleIssue } = await import("./evidence/cycle_issues.js");
2141
+ const ci = await loadCycleIssues(cwd);
2142
+ const cycleSummary = ci.issues.reduce((acc, issue) => {
2143
+ const k = issue.kind;
2144
+ if (!acc[k])
2145
+ acc[k] = { total: 0, unresolved: 0 };
2146
+ acc[k].total++;
2147
+ if (ci.unresolved.some((u) => u === issue))
2148
+ acc[k].unresolved++;
2149
+ return acc;
2150
+ }, {});
2151
+ const cycleIssuesEntries = ci.issues.map((issue) => ({
2152
+ kind: issue.kind,
2153
+ summary: summariseCycleIssue(issue),
2154
+ resolved: !ci.unresolved.includes(issue),
2155
+ raw: issue,
2156
+ }));
2157
+ const cycleIssuesDirective = ci.issues.length === 0
2158
+ ? undefined
2159
+ : `[CodeLoop F5] HARD: ${ci.issues.length} recurrence-class issue(s) were recorded during this cycle (` +
2160
+ Object.entries(cycleSummary)
2161
+ .map(([k, v]) => `${k}: ${v.total} total / ${v.unresolved} unresolved`)
2162
+ .join(", ") +
2163
+ `). The DEVELOPMENT_LOG.md you produce MUST include a "CodeLoop Cycle Issues" subsection under "Bugs Found & Fixed" that lists each entry from cycle_issues with: kind, summary, resolution (or "UNRESOLVED — escalated"). ` +
2164
+ `Do NOT open the report with "all gates green" / "everything looks good" — those phrases match the C6 anti-rationalisation scan when cycle_issues exist and will fail the cycle_issues_acknowledged gate. ` +
2165
+ `Specifically enumerate: ${ci.issues
2166
+ .slice(0, 5)
2167
+ .map((i) => `(${i.kind}) ${summariseCycleIssue(i)}`)
2168
+ .join(" | ")}${ci.issues.length > 5 ? " | … (see logs/cycle_issues.jsonl for the full list)" : ""}`;
2121
2169
  const report = {
2122
2170
  project_name: params.project_name,
2123
2171
  project_description: params.project_description || "",
@@ -2139,6 +2187,9 @@ The agent MUST then write the report to docs/DEVELOPMENT_LOG.md and present it t
2139
2187
  interaction_summary: interactionSummary,
2140
2188
  interaction_history: interactionHistory.slice(-50),
2141
2189
  run_timeline: runSummaries,
2190
+ cycle_issues_summary: cycleSummary,
2191
+ cycle_issues: cycleIssuesEntries,
2192
+ cycle_issues_directive: cycleIssuesDirective,
2142
2193
  };
2143
2194
  await trackUsage(apiKey, "verification_run");
2144
2195
  return report;
@@ -3447,54 +3498,141 @@ Wait 1-2 seconds between interactions so video frames capture state changes.`, {
3447
3498
  }
3448
3499
  // Drain browser console errors that occurred during this interaction
3449
3500
  const consoleErrors = tt === "browser" ? bi.drainRecentConsoleErrors() : [];
3450
- // 0.1.54 E3 — Click-effect verification.
3501
+ // 0.1.54 E3 + 0.1.55 F1/F3/F4 — Click-effect verification.
3451
3502
  //
3452
- // Photometry-DB E2E #11 had clicks marked `success: true` (they
3453
- // dispatched cleanly) that nonetheless missed the "X" close button
3454
- // on a stuck OpenFolderDialog the agent then thought the modal
3455
- // was gone and burned cycles trying to interact with what was
3456
- // really a still-blocked main window.
3503
+ // Photometry-DB E2E #11 had:
3504
+ // - clicks marked `success: true` (the dispatch landed) that
3505
+ // nonetheless missed the X close button on a stuck
3506
+ // OpenFolderDialog,
3507
+ // - the same OpenFolderDialog persisting across MULTIPLE clicks
3508
+ // without ever triggering codeloop_handle_modal,
3509
+ // - many of those clicks coming through with NO intent /
3510
+ // description field so the 0.1.54 E3 close-keyword check
3511
+ // silently never fired.
3457
3512
  //
3458
- // When the agent's intent / description / purpose / step explicitly
3459
- // says "close" / "dismiss" / "cancel" we re-detect the modal after
3460
- // a 500ms grace window. If a modal is still present (and matches
3461
- // the description if we had one), we override `success` to false,
3462
- // attach a verification field to the log entry, and record a
3463
- // `click_missed_target` cycle issue so the gate-check picks it up.
3513
+ // 0.1.55 widens the verification to fire on EVERY desktop click /
3514
+ // double-click / right-click / escape-keystroke during recording,
3515
+ // tracks per-app modal persistence in-process, and:
3516
+ // 1. Always attaches `modal_present_after` to the log entry when
3517
+ // a modal is detected after the action so the user-journey
3518
+ // gate can score the signal.
3519
+ // 2. Overrides success=false when the agent's intent IS close-y
3520
+ // (preserves 0.1.54 E3 behaviour).
3521
+ // 3. Records a `click_missed_target` cycle issue WHENEVER the
3522
+ // same modal description survives consecutive interactions —
3523
+ // 2 in a row writes the issue regardless of intent, because
3524
+ // two clicks against an unchanged modal is the recurrence
3525
+ // signature from E2E #11.
3526
+ // 4. Records a `modal_close_failed` cycle issue at 3+ consecutive
3527
+ // persistences so cycle_issues_acknowledged blocks the gate
3528
+ // even if the agent never called codeloop_handle_modal.
3529
+ // 5. Escalates the post-interact directive (built later) to a
3530
+ // HARD instruction so the agent must call codeloop_handle_modal
3531
+ // or codeloop_kill_modal_window before the next interact.
3464
3532
  //
3465
- // Scope: desktop click + escape-keystroke during recording. Browser
3466
- // and mobile flows already get rich post-action signal from
3467
- // Playwright / adb so we don't double-fire there.
3533
+ // Scope: desktop only. Browser and mobile flows have their own
3534
+ // signal channels (Playwright console errors, adb dumpsys).
3468
3535
  let clickEffectVerification;
3536
+ let modalPersistenceDirective;
3469
3537
  const closingIntent = (params.intent ?? params.description ?? params.purpose ?? params.step ?? "").toLowerCase();
3470
3538
  const isClosingIntent = /\b(close|dismiss|cancel|escape|exit)\b/.test(closingIntent);
3471
- const isCloseAction = (action === "click" && isClosingIntent) ||
3472
- (action === "keystroke" && (params.key ?? "").toLowerCase() === "escape");
3473
- if (success && isCloseAction && tt === "desktop") {
3539
+ const isClickyAction = action === "click" || action === "double_click" || action === "right_click";
3540
+ const isEscapeKeystroke = action === "keystroke" && (params.key ?? "").toLowerCase() === "escape";
3541
+ const shouldVerifyClickEffect = success && tt === "desktop" && (isClickyAction || isEscapeKeystroke);
3542
+ if (shouldVerifyClickEffect) {
3474
3543
  try {
3475
3544
  await new Promise((resolve) => setTimeout(resolve, 500));
3476
3545
  const { detectModal } = await import("./runners/modal_detector.js");
3546
+ const trackerKey = (params.app_name || vr.getActiveRecordingAppName() || "<default>").toLowerCase();
3477
3547
  const detection = await detectModal({
3478
3548
  target_type: "desktop",
3479
3549
  app_name: params.app_name || vr.getActiveRecordingAppName() || undefined,
3480
3550
  cwd,
3481
3551
  });
3552
+ const { recordCycleIssue } = await import("./evidence/cycle_issues.js");
3482
3553
  if (detection.is_modal_present) {
3554
+ const desc = detection.modal_description ?? "(unnamed)";
3555
+ const existing = modalPersistenceTracker.get(trackerKey);
3556
+ let consecutive;
3557
+ if (existing && existing.description === desc) {
3558
+ consecutive = existing.consecutive + 1;
3559
+ modalPersistenceTracker.set(trackerKey, {
3560
+ description: desc,
3561
+ consecutive,
3562
+ firstSeenMs: existing.firstSeenMs,
3563
+ modal_kind: detection.modal_kind,
3564
+ });
3565
+ }
3566
+ else {
3567
+ consecutive = 1;
3568
+ modalPersistenceTracker.set(trackerKey, {
3569
+ description: desc,
3570
+ consecutive,
3571
+ firstSeenMs: Date.now(),
3572
+ modal_kind: detection.modal_kind,
3573
+ });
3574
+ }
3483
3575
  clickEffectVerification = {
3484
3576
  intent: closingIntent,
3485
3577
  modal_still_present: true,
3486
3578
  modal_description: detection.modal_description,
3487
- };
3488
- success = false;
3489
- detail = `${detail} | verification: modal still present after intended-close action (${detection.modal_description ?? "unnamed"})`;
3490
- const { recordCycleIssue } = await import("./evidence/cycle_issues.js");
3491
- await recordCycleIssue(cwd, {
3492
- kind: "click_missed_target",
3493
- selector: params.selector,
3494
- coords: params.x != null && params.y != null ? [params.x, params.y] : undefined,
3495
- description: closingIntent,
3496
3579
  modal_kind: detection.modal_kind,
3497
- });
3580
+ consecutive_persistences: consecutive,
3581
+ };
3582
+ // 0.1.54 E3 — close-intent override (unchanged).
3583
+ const isCloseAction = (isClickyAction && isClosingIntent) || isEscapeKeystroke;
3584
+ if (isCloseAction) {
3585
+ success = false;
3586
+ detail = `${detail} | verification: modal still present after intended-close action (${desc})`;
3587
+ await recordCycleIssue(cwd, {
3588
+ kind: "click_missed_target",
3589
+ selector: params.selector,
3590
+ coords: params.x != null && params.y != null ? [params.x, params.y] : undefined,
3591
+ description: closingIntent || `persisted modal: ${desc}`,
3592
+ modal_kind: detection.modal_kind,
3593
+ });
3594
+ }
3595
+ else if (consecutive >= 2) {
3596
+ // 0.1.55 F1 — same modal survived two interactions in a row
3597
+ // even though the agent didn't tag the click with a close
3598
+ // intent. That's the E2E #11 signature: many coordinate
3599
+ // clicks with no intent against a stuck OpenFolderDialog.
3600
+ await recordCycleIssue(cwd, {
3601
+ kind: "click_missed_target",
3602
+ selector: params.selector,
3603
+ coords: params.x != null && params.y != null ? [params.x, params.y] : undefined,
3604
+ description: `persisted modal across ${consecutive} consecutive interactions: ${desc}`,
3605
+ modal_kind: detection.modal_kind,
3606
+ });
3607
+ }
3608
+ // 0.1.55 F3 — at 3+ consecutive persistences, treat the modal
3609
+ // as actively stuck and write a modal_close_failed entry so
3610
+ // the cycle_issues_acknowledged gate fails even when the
3611
+ // agent never called codeloop_handle_modal.
3612
+ if (consecutive >= 3) {
3613
+ await recordCycleIssue(cwd, {
3614
+ kind: "modal_close_failed",
3615
+ modal_kind: detection.modal_kind ?? "custom",
3616
+ modal_description: desc,
3617
+ strategies_tried: [
3618
+ "raw_codeloop_interact_clicks_or_escape_x" + String(consecutive),
3619
+ ],
3620
+ hwnd: detection.hwnd,
3621
+ });
3622
+ }
3623
+ // 0.1.55 F4 — HARD directive for the post-interact postscript.
3624
+ modalPersistenceDirective =
3625
+ `\n\n[CodeLoop F4] HARD: A ${detection.modal_kind ?? "modal"} dialog (${desc}) is STILL present after this interaction ` +
3626
+ `(${consecutive} consecutive interactions have not cleared it). ` +
3627
+ `Stop sending raw clicks / Escape keystrokes against it and call codeloop_handle_modal with decision: "cancel" or "dismiss" — ` +
3628
+ `the multi-strategy ladder (Escape → Alt+F4 → UIA Invoke "Close" → EndDialog) handles file dialogs the keystroke path cannot. ` +
3629
+ `If codeloop_handle_modal returns escalation: "kill_window_required", call codeloop_kill_modal_window with the returned hwnd. ` +
3630
+ `Continuing to ignore this modal will fail the cycle_issues_acknowledged gate and block ready_for_review.`;
3631
+ }
3632
+ else {
3633
+ // Modal cleared — reset the tracker for this app so the next
3634
+ // appearance is counted from 1.
3635
+ modalPersistenceTracker.delete(trackerKey);
3498
3636
  }
3499
3637
  }
3500
3638
  catch { /* best-effort verification */ }
@@ -3529,7 +3667,17 @@ Wait 1-2 seconds between interactions so video frames capture state changes.`, {
3529
3667
  }
3530
3668
  }
3531
3669
  catch { /* best-effort logging */ }
3532
- return { success, action, detail };
3670
+ // 0.1.55 F4 surface the modal-persistence directive as a
3671
+ // side-channel so the postscript builder outside withAuth can
3672
+ // append it to the agent-visible response.
3673
+ const ret = {
3674
+ success,
3675
+ action,
3676
+ detail,
3677
+ };
3678
+ if (modalPersistenceDirective)
3679
+ ret._f4_directive = modalPersistenceDirective;
3680
+ return ret;
3533
3681
  }, { tool: "codeloop_interact", cwd: resolveCwd(params), input: params });
3534
3682
  // 0.1.51 H11 — Post-interact modal-awareness directive.
3535
3683
  // After every codeloop_interact call we append a HARD reminder
@@ -3547,6 +3695,20 @@ Wait 1-2 seconds between interactions so video frames capture state changes.`, {
3547
3695
  "(\"confirm\" to proceed / \"cancel\" to abort / \"dismiss\" to close), and " +
3548
3696
  "(4) only then continue the planned journey. " +
3549
3697
  "Do NOT skip modals \"to keep moving\" — an unhandled modal will block every subsequent click and the user_journey_evidence gate will block ready_for_review.";
3698
+ // 0.1.55 F4 — when desktop click-effect verification detected a
3699
+ // modal that has now persisted across one or more interactions, the
3700
+ // F4 directive is stronger than the soft H11 reminder. Append it
3701
+ // and remove the side-channel field from the JSON-serialised result
3702
+ // so the agent doesn't see internals.
3703
+ {
3704
+ const r = result;
3705
+ const f4 = r && typeof r === "object" ? r._f4_directive : undefined;
3706
+ if (f4) {
3707
+ postscript += f4;
3708
+ if (r)
3709
+ delete r._f4_directive;
3710
+ }
3711
+ }
3550
3712
  // 0.1.52 C4 — Empty-state seeding directive. Heuristic runs against
3551
3713
  // the manifest entry the agent claims to be exercising plus the
3552
3714
  // recent interaction log; when the call looks like a row/cell