codeloop-mcp-server 0.1.54 → 0.1.56
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/auth/critical_floors.d.ts.map +1 -1
- package/dist/auth/critical_floors.js +8 -0
- package/dist/auth/critical_floors.js.map +1 -1
- package/dist/evidence/binary_freshness.d.ts.map +1 -1
- package/dist/evidence/binary_freshness.js +76 -21
- package/dist/evidence/binary_freshness.js.map +1 -1
- package/dist/evidence/change_coverage.d.ts +6 -0
- package/dist/evidence/change_coverage.d.ts.map +1 -1
- package/dist/evidence/change_coverage.js +97 -12
- package/dist/evidence/change_coverage.js.map +1 -1
- package/dist/evidence/change_manifest.d.ts +33 -1
- package/dist/evidence/change_manifest.d.ts.map +1 -1
- package/dist/evidence/change_manifest.js +214 -3
- package/dist/evidence/change_manifest.js.map +1 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +191 -29
- package/dist/index.js.map +1 -1
- package/dist/tools/plan_change_journey.d.ts.map +1 -1
- package/dist/tools/plan_change_journey.js +15 -0
- package/dist/tools/plan_change_journey.js.map +1 -1
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -162,6 +162,18 @@ warmCliCache();
|
|
|
162
162
|
// to restart their IDE. See auth/update_check.ts for the full
|
|
163
163
|
// rationale and opt-out env vars.
|
|
164
164
|
startUpdateCheck();
|
|
165
|
+
/**
|
|
166
|
+
* 0.1.55 F1/F3 — In-process tracker for "is the same modal description
|
|
167
|
+
* surviving consecutive desktop interactions?". Keyed by the lower-cased
|
|
168
|
+
* app_name (or "<default>" when none). Cleared whenever detectModal
|
|
169
|
+
* returns is_modal_present: false.
|
|
170
|
+
*
|
|
171
|
+
* Lives only as long as the MCP server process. We deliberately do NOT
|
|
172
|
+
* persist this across restarts because a server restart is itself a
|
|
173
|
+
* meaningful event that resets the recurrence-class signal — the cycle
|
|
174
|
+
* issues we've already written to disk continue to gate ready_for_review.
|
|
175
|
+
*/
|
|
176
|
+
const modalPersistenceTracker = new Map();
|
|
165
177
|
const server = new McpServer({
|
|
166
178
|
name: "codeloop",
|
|
167
179
|
version: "0.1.14",
|
|
@@ -2118,6 +2130,42 @@ The agent MUST then write the report to docs/DEVELOPMENT_LOG.md and present it t
|
|
|
2118
2130
|
// Check for interaction replay results
|
|
2119
2131
|
const replayDir = join(baseDir, "replay_frames");
|
|
2120
2132
|
const hasReplayFrames = existsSync(replayDir) && readdirSync(replayDir).length > 0;
|
|
2133
|
+
// 0.1.55 F5 — surface cycle_issues to the dev report so the agent's
|
|
2134
|
+
// generated DEVELOPMENT_LOG.md can't open with "All gates green"
|
|
2135
|
+
// when recurrence-class issues happened mid-cycle. We expose:
|
|
2136
|
+
// - cycle_issues_summary: per-kind counts and resolved/unresolved breakdown
|
|
2137
|
+
// - cycle_issues: full per-issue list (with summaries pre-rendered)
|
|
2138
|
+
// - cycle_issues_directive: a HARD instruction that the report
|
|
2139
|
+
// MUST include a "Bugs Found & Fixed" row per unresolved issue
|
|
2140
|
+
const { loadCycleIssues, summariseCycleIssue } = await import("./evidence/cycle_issues.js");
|
|
2141
|
+
const ci = await loadCycleIssues(cwd);
|
|
2142
|
+
const cycleSummary = ci.issues.reduce((acc, issue) => {
|
|
2143
|
+
const k = issue.kind;
|
|
2144
|
+
if (!acc[k])
|
|
2145
|
+
acc[k] = { total: 0, unresolved: 0 };
|
|
2146
|
+
acc[k].total++;
|
|
2147
|
+
if (ci.unresolved.some((u) => u === issue))
|
|
2148
|
+
acc[k].unresolved++;
|
|
2149
|
+
return acc;
|
|
2150
|
+
}, {});
|
|
2151
|
+
const cycleIssuesEntries = ci.issues.map((issue) => ({
|
|
2152
|
+
kind: issue.kind,
|
|
2153
|
+
summary: summariseCycleIssue(issue),
|
|
2154
|
+
resolved: !ci.unresolved.includes(issue),
|
|
2155
|
+
raw: issue,
|
|
2156
|
+
}));
|
|
2157
|
+
const cycleIssuesDirective = ci.issues.length === 0
|
|
2158
|
+
? undefined
|
|
2159
|
+
: `[CodeLoop F5] HARD: ${ci.issues.length} recurrence-class issue(s) were recorded during this cycle (` +
|
|
2160
|
+
Object.entries(cycleSummary)
|
|
2161
|
+
.map(([k, v]) => `${k}: ${v.total} total / ${v.unresolved} unresolved`)
|
|
2162
|
+
.join(", ") +
|
|
2163
|
+
`). The DEVELOPMENT_LOG.md you produce MUST include a "CodeLoop Cycle Issues" subsection under "Bugs Found & Fixed" that lists each entry from cycle_issues with: kind, summary, resolution (or "UNRESOLVED — escalated"). ` +
|
|
2164
|
+
`Do NOT open the report with "all gates green" / "everything looks good" — those phrases match the C6 anti-rationalisation scan when cycle_issues exist and will fail the cycle_issues_acknowledged gate. ` +
|
|
2165
|
+
`Specifically enumerate: ${ci.issues
|
|
2166
|
+
.slice(0, 5)
|
|
2167
|
+
.map((i) => `(${i.kind}) ${summariseCycleIssue(i)}`)
|
|
2168
|
+
.join(" | ")}${ci.issues.length > 5 ? " | … (see logs/cycle_issues.jsonl for the full list)" : ""}`;
|
|
2121
2169
|
const report = {
|
|
2122
2170
|
project_name: params.project_name,
|
|
2123
2171
|
project_description: params.project_description || "",
|
|
@@ -2139,6 +2187,9 @@ The agent MUST then write the report to docs/DEVELOPMENT_LOG.md and present it t
|
|
|
2139
2187
|
interaction_summary: interactionSummary,
|
|
2140
2188
|
interaction_history: interactionHistory.slice(-50),
|
|
2141
2189
|
run_timeline: runSummaries,
|
|
2190
|
+
cycle_issues_summary: cycleSummary,
|
|
2191
|
+
cycle_issues: cycleIssuesEntries,
|
|
2192
|
+
cycle_issues_directive: cycleIssuesDirective,
|
|
2142
2193
|
};
|
|
2143
2194
|
await trackUsage(apiKey, "verification_run");
|
|
2144
2195
|
return report;
|
|
@@ -3447,54 +3498,141 @@ Wait 1-2 seconds between interactions so video frames capture state changes.`, {
|
|
|
3447
3498
|
}
|
|
3448
3499
|
// Drain browser console errors that occurred during this interaction
|
|
3449
3500
|
const consoleErrors = tt === "browser" ? bi.drainRecentConsoleErrors() : [];
|
|
3450
|
-
// 0.1.54 E3 — Click-effect verification.
|
|
3501
|
+
// 0.1.54 E3 + 0.1.55 F1/F3/F4 — Click-effect verification.
|
|
3451
3502
|
//
|
|
3452
|
-
// Photometry-DB E2E #11 had
|
|
3453
|
-
//
|
|
3454
|
-
//
|
|
3455
|
-
//
|
|
3456
|
-
//
|
|
3503
|
+
// Photometry-DB E2E #11 had:
|
|
3504
|
+
// - clicks marked `success: true` (the dispatch landed) that
|
|
3505
|
+
// nonetheless missed the X close button on a stuck
|
|
3506
|
+
// OpenFolderDialog,
|
|
3507
|
+
// - the same OpenFolderDialog persisting across MULTIPLE clicks
|
|
3508
|
+
// without ever triggering codeloop_handle_modal,
|
|
3509
|
+
// - many of those clicks coming through with NO intent /
|
|
3510
|
+
// description field so the 0.1.54 E3 close-keyword check
|
|
3511
|
+
// silently never fired.
|
|
3457
3512
|
//
|
|
3458
|
-
//
|
|
3459
|
-
//
|
|
3460
|
-
//
|
|
3461
|
-
//
|
|
3462
|
-
//
|
|
3463
|
-
//
|
|
3513
|
+
// 0.1.55 widens the verification to fire on EVERY desktop click /
|
|
3514
|
+
// double-click / right-click / escape-keystroke during recording,
|
|
3515
|
+
// tracks per-app modal persistence in-process, and:
|
|
3516
|
+
// 1. Always attaches `modal_present_after` to the log entry when
|
|
3517
|
+
// a modal is detected after the action so the user-journey
|
|
3518
|
+
// gate can score the signal.
|
|
3519
|
+
// 2. Overrides success=false when the agent's intent IS close-y
|
|
3520
|
+
// (preserves 0.1.54 E3 behaviour).
|
|
3521
|
+
// 3. Records a `click_missed_target` cycle issue WHENEVER the
|
|
3522
|
+
// same modal description survives consecutive interactions —
|
|
3523
|
+
// 2 in a row writes the issue regardless of intent, because
|
|
3524
|
+
// two clicks against an unchanged modal is the recurrence
|
|
3525
|
+
// signature from E2E #11.
|
|
3526
|
+
// 4. Records a `modal_close_failed` cycle issue at 3+ consecutive
|
|
3527
|
+
// persistences so cycle_issues_acknowledged blocks the gate
|
|
3528
|
+
// even if the agent never called codeloop_handle_modal.
|
|
3529
|
+
// 5. Escalates the post-interact directive (built later) to a
|
|
3530
|
+
// HARD instruction so the agent must call codeloop_handle_modal
|
|
3531
|
+
// or codeloop_kill_modal_window before the next interact.
|
|
3464
3532
|
//
|
|
3465
|
-
// Scope: desktop
|
|
3466
|
-
//
|
|
3467
|
-
// Playwright / adb so we don't double-fire there.
|
|
3533
|
+
// Scope: desktop only. Browser and mobile flows have their own
|
|
3534
|
+
// signal channels (Playwright console errors, adb dumpsys).
|
|
3468
3535
|
let clickEffectVerification;
|
|
3536
|
+
let modalPersistenceDirective;
|
|
3469
3537
|
const closingIntent = (params.intent ?? params.description ?? params.purpose ?? params.step ?? "").toLowerCase();
|
|
3470
3538
|
const isClosingIntent = /\b(close|dismiss|cancel|escape|exit)\b/.test(closingIntent);
|
|
3471
|
-
const
|
|
3472
|
-
|
|
3473
|
-
|
|
3539
|
+
const isClickyAction = action === "click" || action === "double_click" || action === "right_click";
|
|
3540
|
+
const isEscapeKeystroke = action === "keystroke" && (params.key ?? "").toLowerCase() === "escape";
|
|
3541
|
+
const shouldVerifyClickEffect = success && tt === "desktop" && (isClickyAction || isEscapeKeystroke);
|
|
3542
|
+
if (shouldVerifyClickEffect) {
|
|
3474
3543
|
try {
|
|
3475
3544
|
await new Promise((resolve) => setTimeout(resolve, 500));
|
|
3476
3545
|
const { detectModal } = await import("./runners/modal_detector.js");
|
|
3546
|
+
const trackerKey = (params.app_name || vr.getActiveRecordingAppName() || "<default>").toLowerCase();
|
|
3477
3547
|
const detection = await detectModal({
|
|
3478
3548
|
target_type: "desktop",
|
|
3479
3549
|
app_name: params.app_name || vr.getActiveRecordingAppName() || undefined,
|
|
3480
3550
|
cwd,
|
|
3481
3551
|
});
|
|
3552
|
+
const { recordCycleIssue } = await import("./evidence/cycle_issues.js");
|
|
3482
3553
|
if (detection.is_modal_present) {
|
|
3554
|
+
const desc = detection.modal_description ?? "(unnamed)";
|
|
3555
|
+
const existing = modalPersistenceTracker.get(trackerKey);
|
|
3556
|
+
let consecutive;
|
|
3557
|
+
if (existing && existing.description === desc) {
|
|
3558
|
+
consecutive = existing.consecutive + 1;
|
|
3559
|
+
modalPersistenceTracker.set(trackerKey, {
|
|
3560
|
+
description: desc,
|
|
3561
|
+
consecutive,
|
|
3562
|
+
firstSeenMs: existing.firstSeenMs,
|
|
3563
|
+
modal_kind: detection.modal_kind,
|
|
3564
|
+
});
|
|
3565
|
+
}
|
|
3566
|
+
else {
|
|
3567
|
+
consecutive = 1;
|
|
3568
|
+
modalPersistenceTracker.set(trackerKey, {
|
|
3569
|
+
description: desc,
|
|
3570
|
+
consecutive,
|
|
3571
|
+
firstSeenMs: Date.now(),
|
|
3572
|
+
modal_kind: detection.modal_kind,
|
|
3573
|
+
});
|
|
3574
|
+
}
|
|
3483
3575
|
clickEffectVerification = {
|
|
3484
3576
|
intent: closingIntent,
|
|
3485
3577
|
modal_still_present: true,
|
|
3486
3578
|
modal_description: detection.modal_description,
|
|
3487
|
-
};
|
|
3488
|
-
success = false;
|
|
3489
|
-
detail = `${detail} | verification: modal still present after intended-close action (${detection.modal_description ?? "unnamed"})`;
|
|
3490
|
-
const { recordCycleIssue } = await import("./evidence/cycle_issues.js");
|
|
3491
|
-
await recordCycleIssue(cwd, {
|
|
3492
|
-
kind: "click_missed_target",
|
|
3493
|
-
selector: params.selector,
|
|
3494
|
-
coords: params.x != null && params.y != null ? [params.x, params.y] : undefined,
|
|
3495
|
-
description: closingIntent,
|
|
3496
3579
|
modal_kind: detection.modal_kind,
|
|
3497
|
-
|
|
3580
|
+
consecutive_persistences: consecutive,
|
|
3581
|
+
};
|
|
3582
|
+
// 0.1.54 E3 — close-intent override (unchanged).
|
|
3583
|
+
const isCloseAction = (isClickyAction && isClosingIntent) || isEscapeKeystroke;
|
|
3584
|
+
if (isCloseAction) {
|
|
3585
|
+
success = false;
|
|
3586
|
+
detail = `${detail} | verification: modal still present after intended-close action (${desc})`;
|
|
3587
|
+
await recordCycleIssue(cwd, {
|
|
3588
|
+
kind: "click_missed_target",
|
|
3589
|
+
selector: params.selector,
|
|
3590
|
+
coords: params.x != null && params.y != null ? [params.x, params.y] : undefined,
|
|
3591
|
+
description: closingIntent || `persisted modal: ${desc}`,
|
|
3592
|
+
modal_kind: detection.modal_kind,
|
|
3593
|
+
});
|
|
3594
|
+
}
|
|
3595
|
+
else if (consecutive >= 2) {
|
|
3596
|
+
// 0.1.55 F1 — same modal survived two interactions in a row
|
|
3597
|
+
// even though the agent didn't tag the click with a close
|
|
3598
|
+
// intent. That's the E2E #11 signature: many coordinate
|
|
3599
|
+
// clicks with no intent against a stuck OpenFolderDialog.
|
|
3600
|
+
await recordCycleIssue(cwd, {
|
|
3601
|
+
kind: "click_missed_target",
|
|
3602
|
+
selector: params.selector,
|
|
3603
|
+
coords: params.x != null && params.y != null ? [params.x, params.y] : undefined,
|
|
3604
|
+
description: `persisted modal across ${consecutive} consecutive interactions: ${desc}`,
|
|
3605
|
+
modal_kind: detection.modal_kind,
|
|
3606
|
+
});
|
|
3607
|
+
}
|
|
3608
|
+
// 0.1.55 F3 — at 3+ consecutive persistences, treat the modal
|
|
3609
|
+
// as actively stuck and write a modal_close_failed entry so
|
|
3610
|
+
// the cycle_issues_acknowledged gate fails even when the
|
|
3611
|
+
// agent never called codeloop_handle_modal.
|
|
3612
|
+
if (consecutive >= 3) {
|
|
3613
|
+
await recordCycleIssue(cwd, {
|
|
3614
|
+
kind: "modal_close_failed",
|
|
3615
|
+
modal_kind: detection.modal_kind ?? "custom",
|
|
3616
|
+
modal_description: desc,
|
|
3617
|
+
strategies_tried: [
|
|
3618
|
+
"raw_codeloop_interact_clicks_or_escape_x" + String(consecutive),
|
|
3619
|
+
],
|
|
3620
|
+
hwnd: detection.hwnd,
|
|
3621
|
+
});
|
|
3622
|
+
}
|
|
3623
|
+
// 0.1.55 F4 — HARD directive for the post-interact postscript.
|
|
3624
|
+
modalPersistenceDirective =
|
|
3625
|
+
`\n\n[CodeLoop F4] HARD: A ${detection.modal_kind ?? "modal"} dialog (${desc}) is STILL present after this interaction ` +
|
|
3626
|
+
`(${consecutive} consecutive interactions have not cleared it). ` +
|
|
3627
|
+
`Stop sending raw clicks / Escape keystrokes against it and call codeloop_handle_modal with decision: "cancel" or "dismiss" — ` +
|
|
3628
|
+
`the multi-strategy ladder (Escape → Alt+F4 → UIA Invoke "Close" → EndDialog) handles file dialogs the keystroke path cannot. ` +
|
|
3629
|
+
`If codeloop_handle_modal returns escalation: "kill_window_required", call codeloop_kill_modal_window with the returned hwnd. ` +
|
|
3630
|
+
`Continuing to ignore this modal will fail the cycle_issues_acknowledged gate and block ready_for_review.`;
|
|
3631
|
+
}
|
|
3632
|
+
else {
|
|
3633
|
+
// Modal cleared — reset the tracker for this app so the next
|
|
3634
|
+
// appearance is counted from 1.
|
|
3635
|
+
modalPersistenceTracker.delete(trackerKey);
|
|
3498
3636
|
}
|
|
3499
3637
|
}
|
|
3500
3638
|
catch { /* best-effort verification */ }
|
|
@@ -3529,7 +3667,17 @@ Wait 1-2 seconds between interactions so video frames capture state changes.`, {
|
|
|
3529
3667
|
}
|
|
3530
3668
|
}
|
|
3531
3669
|
catch { /* best-effort logging */ }
|
|
3532
|
-
|
|
3670
|
+
// 0.1.55 F4 — surface the modal-persistence directive as a
|
|
3671
|
+
// side-channel so the postscript builder outside withAuth can
|
|
3672
|
+
// append it to the agent-visible response.
|
|
3673
|
+
const ret = {
|
|
3674
|
+
success,
|
|
3675
|
+
action,
|
|
3676
|
+
detail,
|
|
3677
|
+
};
|
|
3678
|
+
if (modalPersistenceDirective)
|
|
3679
|
+
ret._f4_directive = modalPersistenceDirective;
|
|
3680
|
+
return ret;
|
|
3533
3681
|
}, { tool: "codeloop_interact", cwd: resolveCwd(params), input: params });
|
|
3534
3682
|
// 0.1.51 H11 — Post-interact modal-awareness directive.
|
|
3535
3683
|
// After every codeloop_interact call we append a HARD reminder
|
|
@@ -3547,6 +3695,20 @@ Wait 1-2 seconds between interactions so video frames capture state changes.`, {
|
|
|
3547
3695
|
"(\"confirm\" to proceed / \"cancel\" to abort / \"dismiss\" to close), and " +
|
|
3548
3696
|
"(4) only then continue the planned journey. " +
|
|
3549
3697
|
"Do NOT skip modals \"to keep moving\" — an unhandled modal will block every subsequent click and the user_journey_evidence gate will block ready_for_review.";
|
|
3698
|
+
// 0.1.55 F4 — when desktop click-effect verification detected a
|
|
3699
|
+
// modal that has now persisted across one or more interactions, the
|
|
3700
|
+
// F4 directive is stronger than the soft H11 reminder. Append it
|
|
3701
|
+
// and remove the side-channel field from the JSON-serialised result
|
|
3702
|
+
// so the agent doesn't see internals.
|
|
3703
|
+
{
|
|
3704
|
+
const r = result;
|
|
3705
|
+
const f4 = r && typeof r === "object" ? r._f4_directive : undefined;
|
|
3706
|
+
if (f4) {
|
|
3707
|
+
postscript += f4;
|
|
3708
|
+
if (r)
|
|
3709
|
+
delete r._f4_directive;
|
|
3710
|
+
}
|
|
3711
|
+
}
|
|
3550
3712
|
// 0.1.52 C4 — Empty-state seeding directive. Heuristic runs against
|
|
3551
3713
|
// the manifest entry the agent claims to be exercising plus the
|
|
3552
3714
|
// recent interaction log; when the call looks like a row/cell
|