codeloop-mcp-server 0.1.53 → 0.1.55
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/auth/critical_floors.d.ts.map +1 -1
- package/dist/auth/critical_floors.js +8 -0
- package/dist/auth/critical_floors.js.map +1 -1
- package/dist/evidence/anti_rationalisation.d.ts.map +1 -1
- package/dist/evidence/anti_rationalisation.js +15 -0
- package/dist/evidence/anti_rationalisation.js.map +1 -1
- package/dist/evidence/binary_freshness.d.ts +21 -0
- package/dist/evidence/binary_freshness.d.ts.map +1 -0
- package/dist/evidence/binary_freshness.js +168 -0
- package/dist/evidence/binary_freshness.js.map +1 -0
- package/dist/evidence/change_coverage.d.ts.map +1 -1
- package/dist/evidence/change_coverage.js +22 -1
- package/dist/evidence/change_coverage.js.map +1 -1
- package/dist/evidence/cycle_issues.d.ts +99 -0
- package/dist/evidence/cycle_issues.d.ts.map +1 -0
- package/dist/evidence/cycle_issues.js +120 -0
- package/dist/evidence/cycle_issues.js.map +1 -0
- package/dist/evidence/interaction_coverage.d.ts +15 -0
- package/dist/evidence/interaction_coverage.d.ts.map +1 -1
- package/dist/evidence/interaction_coverage.js +53 -4
- package/dist/evidence/interaction_coverage.js.map +1 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +360 -5
- package/dist/index.js.map +1 -1
- package/dist/runners/modal_close_strategies.d.ts +82 -0
- package/dist/runners/modal_close_strategies.d.ts.map +1 -0
- package/dist/runners/modal_close_strategies.js +226 -0
- package/dist/runners/modal_close_strategies.js.map +1 -0
- package/dist/runners/modal_detector.d.ts +17 -0
- package/dist/runners/modal_detector.d.ts.map +1 -1
- package/dist/runners/modal_detector.js +95 -22
- package/dist/runners/modal_detector.js.map +1 -1
- package/dist/tools/gate_check.d.ts.map +1 -1
- package/dist/tools/gate_check.js +57 -0
- package/dist/tools/gate_check.js.map +1 -1
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -162,6 +162,18 @@ warmCliCache();
|
|
|
162
162
|
// to restart their IDE. See auth/update_check.ts for the full
|
|
163
163
|
// rationale and opt-out env vars.
|
|
164
164
|
startUpdateCheck();
|
|
165
|
+
/**
|
|
166
|
+
* 0.1.55 F1/F3 — In-process tracker for "is the same modal description
|
|
167
|
+
* surviving consecutive desktop interactions?". Keyed by the lower-cased
|
|
168
|
+
* app_name (or "<default>" when none). Cleared whenever detectModal
|
|
169
|
+
* returns is_modal_present: false.
|
|
170
|
+
*
|
|
171
|
+
* Lives only as long as the MCP server process. We deliberately do NOT
|
|
172
|
+
* persist this across restarts because a server restart is itself a
|
|
173
|
+
* meaningful event that resets the recurrence-class signal — the cycle
|
|
174
|
+
* issues we've already written to disk continue to gate ready_for_review.
|
|
175
|
+
*/
|
|
176
|
+
const modalPersistenceTracker = new Map();
|
|
165
177
|
const server = new McpServer({
|
|
166
178
|
name: "codeloop",
|
|
167
179
|
version: "0.1.14",
|
|
@@ -1861,10 +1873,40 @@ App logs (stdout, logcat, simctl log) are automatically captured alongside the v
|
|
|
1861
1873
|
autoLaunchSummary = { attempted: true, launched: false, reason: e.message };
|
|
1862
1874
|
}
|
|
1863
1875
|
}
|
|
1876
|
+
// 0.1.54 E4 — Binary-vs-source mtime check.
|
|
1877
|
+
// Photometry-DB E2E #11 had bin/Release rebuilt but the recording
|
|
1878
|
+
// ran against publish/PhotometryDB-Beta with stale DLLs. We surface
|
|
1879
|
+
// a HARD directive AND record a binary_mismatch cycle issue so the
|
|
1880
|
+
// gate can block ready_for_review until rebuild + re-record.
|
|
1881
|
+
let binaryFreshnessWarning;
|
|
1882
|
+
let binaryFreshnessDetails;
|
|
1883
|
+
if (targetType === "desktop" && appName) {
|
|
1884
|
+
try {
|
|
1885
|
+
const { checkBinaryFreshness } = await import("./evidence/binary_freshness.js");
|
|
1886
|
+
binaryFreshnessDetails = checkBinaryFreshness({ app_name: appName, cwd });
|
|
1887
|
+
if (binaryFreshnessDetails.stale) {
|
|
1888
|
+
binaryFreshnessWarning = binaryFreshnessDetails.reason;
|
|
1889
|
+
const { recordCycleIssue } = await import("./evidence/cycle_issues.js");
|
|
1890
|
+
await recordCycleIssue(cwd, {
|
|
1891
|
+
kind: "binary_mismatch",
|
|
1892
|
+
target_app: binaryFreshnessDetails.binary_path ?? appName,
|
|
1893
|
+
binary_mtime: binaryFreshnessDetails.binary_mtime ?? "unknown",
|
|
1894
|
+
newest_source_mtime: binaryFreshnessDetails.newest_source_mtime ?? "unknown",
|
|
1895
|
+
newest_source_path: binaryFreshnessDetails.newest_source_path,
|
|
1896
|
+
lag_seconds: binaryFreshnessDetails.lag_seconds ?? 0,
|
|
1897
|
+
});
|
|
1898
|
+
}
|
|
1899
|
+
}
|
|
1900
|
+
catch { /* best-effort */ }
|
|
1901
|
+
}
|
|
1864
1902
|
const result = await startBackgroundRecording(videosDir, appName ?? "", params.max_duration_seconds, targetType);
|
|
1865
1903
|
if (autoLaunchSummary) {
|
|
1866
1904
|
result.auto_launch = autoLaunchSummary;
|
|
1867
1905
|
}
|
|
1906
|
+
if (binaryFreshnessWarning) {
|
|
1907
|
+
result.binary_freshness_warning = binaryFreshnessWarning;
|
|
1908
|
+
result.binary_freshness = binaryFreshnessDetails;
|
|
1909
|
+
}
|
|
1868
1910
|
await trackUsage(apiKey, "visual_review");
|
|
1869
1911
|
return result;
|
|
1870
1912
|
}, { tool: "codeloop_start_recording", cwd: resolveCwd(params), input: params });
|
|
@@ -2043,9 +2085,20 @@ The agent MUST then write the report to docs/DEVELOPMENT_LOG.md and present it t
|
|
|
2043
2085
|
}
|
|
2044
2086
|
}
|
|
2045
2087
|
}
|
|
2046
|
-
// Parse interaction_log.jsonl from each run for the interaction history
|
|
2088
|
+
// Parse interaction_log.jsonl from each run for the interaction history.
|
|
2089
|
+
// 0.1.54 E5 — only count entries that look like real interactions
|
|
2090
|
+
// (must have `action: string` AND `input_args: object`). Photometry-DB
|
|
2091
|
+
// E2E #11 had 27% of action buckets land under "undefined" because
|
|
2092
|
+
// the log mixes interaction entries with replay-frame markers and
|
|
2093
|
+
// browser console_error rollups. Coerce missing/non-string actions
|
|
2094
|
+
// to "unclassified" so the dev-report breakdown stays honest.
|
|
2047
2095
|
const interactionHistory = [];
|
|
2048
2096
|
const interactionSummary = { total: 0, succeeded: 0, failed: 0, actions: {}, console_errors: 0 };
|
|
2097
|
+
const isInteractionEntry = (e) => {
|
|
2098
|
+
const hasArgs = e.input_args !== null && typeof e.input_args === "object";
|
|
2099
|
+
const hasAction = typeof e.action === "string" && e.action.length > 0;
|
|
2100
|
+
return hasArgs && hasAction;
|
|
2101
|
+
};
|
|
2049
2102
|
for (const runId of runs) {
|
|
2050
2103
|
const runDir = getRunDir(runId, baseDir);
|
|
2051
2104
|
const iLogPath = join(runDir, "logs", "interaction_log.jsonl");
|
|
@@ -2054,15 +2107,21 @@ The agent MUST then write the report to docs/DEVELOPMENT_LOG.md and present it t
|
|
|
2054
2107
|
for (const line of lines) {
|
|
2055
2108
|
try {
|
|
2056
2109
|
const entry = JSON.parse(line);
|
|
2110
|
+
if (!isInteractionEntry(entry))
|
|
2111
|
+
continue;
|
|
2057
2112
|
interactionHistory.push({ run_id: runId, ...entry });
|
|
2058
2113
|
interactionSummary.total++;
|
|
2059
2114
|
if (entry.success)
|
|
2060
2115
|
interactionSummary.succeeded++;
|
|
2061
2116
|
else
|
|
2062
2117
|
interactionSummary.failed++;
|
|
2063
|
-
|
|
2064
|
-
|
|
2065
|
-
|
|
2118
|
+
const bucket = typeof entry.action === "string" && entry.action.length > 0
|
|
2119
|
+
? entry.action
|
|
2120
|
+
: "unclassified";
|
|
2121
|
+
interactionSummary.actions[bucket] = (interactionSummary.actions[bucket] || 0) + 1;
|
|
2122
|
+
const ce = entry.console_errors;
|
|
2123
|
+
if (Array.isArray(ce))
|
|
2124
|
+
interactionSummary.console_errors += ce.length;
|
|
2066
2125
|
}
|
|
2067
2126
|
catch { /* skip malformed lines */ }
|
|
2068
2127
|
}
|
|
@@ -2071,6 +2130,42 @@ The agent MUST then write the report to docs/DEVELOPMENT_LOG.md and present it t
|
|
|
2071
2130
|
// Check for interaction replay results
|
|
2072
2131
|
const replayDir = join(baseDir, "replay_frames");
|
|
2073
2132
|
const hasReplayFrames = existsSync(replayDir) && readdirSync(replayDir).length > 0;
|
|
2133
|
+
// 0.1.55 F5 — surface cycle_issues to the dev report so the agent's
|
|
2134
|
+
// generated DEVELOPMENT_LOG.md can't open with "All gates green"
|
|
2135
|
+
// when recurrence-class issues happened mid-cycle. We expose:
|
|
2136
|
+
// - cycle_issues_summary: per-kind counts and resolved/unresolved breakdown
|
|
2137
|
+
// - cycle_issues: full per-issue list (with summaries pre-rendered)
|
|
2138
|
+
// - cycle_issues_directive: a HARD instruction that the report
|
|
2139
|
+
// MUST include a "Bugs Found & Fixed" row per unresolved issue
|
|
2140
|
+
const { loadCycleIssues, summariseCycleIssue } = await import("./evidence/cycle_issues.js");
|
|
2141
|
+
const ci = await loadCycleIssues(cwd);
|
|
2142
|
+
const cycleSummary = ci.issues.reduce((acc, issue) => {
|
|
2143
|
+
const k = issue.kind;
|
|
2144
|
+
if (!acc[k])
|
|
2145
|
+
acc[k] = { total: 0, unresolved: 0 };
|
|
2146
|
+
acc[k].total++;
|
|
2147
|
+
if (ci.unresolved.some((u) => u === issue))
|
|
2148
|
+
acc[k].unresolved++;
|
|
2149
|
+
return acc;
|
|
2150
|
+
}, {});
|
|
2151
|
+
const cycleIssuesEntries = ci.issues.map((issue) => ({
|
|
2152
|
+
kind: issue.kind,
|
|
2153
|
+
summary: summariseCycleIssue(issue),
|
|
2154
|
+
resolved: !ci.unresolved.includes(issue),
|
|
2155
|
+
raw: issue,
|
|
2156
|
+
}));
|
|
2157
|
+
const cycleIssuesDirective = ci.issues.length === 0
|
|
2158
|
+
? undefined
|
|
2159
|
+
: `[CodeLoop F5] HARD: ${ci.issues.length} recurrence-class issue(s) were recorded during this cycle (` +
|
|
2160
|
+
Object.entries(cycleSummary)
|
|
2161
|
+
.map(([k, v]) => `${k}: ${v.total} total / ${v.unresolved} unresolved`)
|
|
2162
|
+
.join(", ") +
|
|
2163
|
+
`). The DEVELOPMENT_LOG.md you produce MUST include a "CodeLoop Cycle Issues" subsection under "Bugs Found & Fixed" that lists each entry from cycle_issues with: kind, summary, resolution (or "UNRESOLVED — escalated"). ` +
|
|
2164
|
+
`Do NOT open the report with "all gates green" / "everything looks good" — those phrases match the C6 anti-rationalisation scan when cycle_issues exist and will fail the cycle_issues_acknowledged gate. ` +
|
|
2165
|
+
`Specifically enumerate: ${ci.issues
|
|
2166
|
+
.slice(0, 5)
|
|
2167
|
+
.map((i) => `(${i.kind}) ${summariseCycleIssue(i)}`)
|
|
2168
|
+
.join(" | ")}${ci.issues.length > 5 ? " | … (see logs/cycle_issues.jsonl for the full list)" : ""}`;
|
|
2074
2169
|
const report = {
|
|
2075
2170
|
project_name: params.project_name,
|
|
2076
2171
|
project_description: params.project_description || "",
|
|
@@ -2092,6 +2187,9 @@ The agent MUST then write the report to docs/DEVELOPMENT_LOG.md and present it t
|
|
|
2092
2187
|
interaction_summary: interactionSummary,
|
|
2093
2188
|
interaction_history: interactionHistory.slice(-50),
|
|
2094
2189
|
run_timeline: runSummaries,
|
|
2190
|
+
cycle_issues_summary: cycleSummary,
|
|
2191
|
+
cycle_issues: cycleIssuesEntries,
|
|
2192
|
+
cycle_issues_directive: cycleIssuesDirective,
|
|
2095
2193
|
};
|
|
2096
2194
|
await trackUsage(apiKey, "verification_run");
|
|
2097
2195
|
return report;
|
|
@@ -3400,6 +3498,145 @@ Wait 1-2 seconds between interactions so video frames capture state changes.`, {
|
|
|
3400
3498
|
}
|
|
3401
3499
|
// Drain browser console errors that occurred during this interaction
|
|
3402
3500
|
const consoleErrors = tt === "browser" ? bi.drainRecentConsoleErrors() : [];
|
|
3501
|
+
// 0.1.54 E3 + 0.1.55 F1/F3/F4 — Click-effect verification.
|
|
3502
|
+
//
|
|
3503
|
+
// Photometry-DB E2E #11 had:
|
|
3504
|
+
// - clicks marked `success: true` (the dispatch landed) that
|
|
3505
|
+
// nonetheless missed the X close button on a stuck
|
|
3506
|
+
// OpenFolderDialog,
|
|
3507
|
+
// - the same OpenFolderDialog persisting across MULTIPLE clicks
|
|
3508
|
+
// without ever triggering codeloop_handle_modal,
|
|
3509
|
+
// - many of those clicks coming through with NO intent /
|
|
3510
|
+
// description field so the 0.1.54 E3 close-keyword check
|
|
3511
|
+
// silently never fired.
|
|
3512
|
+
//
|
|
3513
|
+
// 0.1.55 widens the verification to fire on EVERY desktop click /
|
|
3514
|
+
// double-click / right-click / escape-keystroke during recording,
|
|
3515
|
+
// tracks per-app modal persistence in-process, and:
|
|
3516
|
+
// 1. Always attaches `modal_present_after` to the log entry when
|
|
3517
|
+
// a modal is detected after the action so the user-journey
|
|
3518
|
+
// gate can score the signal.
|
|
3519
|
+
// 2. Overrides success=false when the agent's intent IS close-y
|
|
3520
|
+
// (preserves 0.1.54 E3 behaviour).
|
|
3521
|
+
// 3. Records a `click_missed_target` cycle issue WHENEVER the
|
|
3522
|
+
// same modal description survives consecutive interactions —
|
|
3523
|
+
// 2 in a row writes the issue regardless of intent, because
|
|
3524
|
+
// two clicks against an unchanged modal is the recurrence
|
|
3525
|
+
// signature from E2E #11.
|
|
3526
|
+
// 4. Records a `modal_close_failed` cycle issue at 3+ consecutive
|
|
3527
|
+
// persistences so cycle_issues_acknowledged blocks the gate
|
|
3528
|
+
// even if the agent never called codeloop_handle_modal.
|
|
3529
|
+
// 5. Escalates the post-interact directive (built later) to a
|
|
3530
|
+
// HARD instruction so the agent must call codeloop_handle_modal
|
|
3531
|
+
// or codeloop_kill_modal_window before the next interact.
|
|
3532
|
+
//
|
|
3533
|
+
// Scope: desktop only. Browser and mobile flows have their own
|
|
3534
|
+
// signal channels (Playwright console errors, adb dumpsys).
|
|
3535
|
+
let clickEffectVerification;
|
|
3536
|
+
let modalPersistenceDirective;
|
|
3537
|
+
const closingIntent = (params.intent ?? params.description ?? params.purpose ?? params.step ?? "").toLowerCase();
|
|
3538
|
+
const isClosingIntent = /\b(close|dismiss|cancel|escape|exit)\b/.test(closingIntent);
|
|
3539
|
+
const isClickyAction = action === "click" || action === "double_click" || action === "right_click";
|
|
3540
|
+
const isEscapeKeystroke = action === "keystroke" && (params.key ?? "").toLowerCase() === "escape";
|
|
3541
|
+
const shouldVerifyClickEffect = success && tt === "desktop" && (isClickyAction || isEscapeKeystroke);
|
|
3542
|
+
if (shouldVerifyClickEffect) {
|
|
3543
|
+
try {
|
|
3544
|
+
await new Promise((resolve) => setTimeout(resolve, 500));
|
|
3545
|
+
const { detectModal } = await import("./runners/modal_detector.js");
|
|
3546
|
+
const trackerKey = (params.app_name || vr.getActiveRecordingAppName() || "<default>").toLowerCase();
|
|
3547
|
+
const detection = await detectModal({
|
|
3548
|
+
target_type: "desktop",
|
|
3549
|
+
app_name: params.app_name || vr.getActiveRecordingAppName() || undefined,
|
|
3550
|
+
cwd,
|
|
3551
|
+
});
|
|
3552
|
+
const { recordCycleIssue } = await import("./evidence/cycle_issues.js");
|
|
3553
|
+
if (detection.is_modal_present) {
|
|
3554
|
+
const desc = detection.modal_description ?? "(unnamed)";
|
|
3555
|
+
const existing = modalPersistenceTracker.get(trackerKey);
|
|
3556
|
+
let consecutive;
|
|
3557
|
+
if (existing && existing.description === desc) {
|
|
3558
|
+
consecutive = existing.consecutive + 1;
|
|
3559
|
+
modalPersistenceTracker.set(trackerKey, {
|
|
3560
|
+
description: desc,
|
|
3561
|
+
consecutive,
|
|
3562
|
+
firstSeenMs: existing.firstSeenMs,
|
|
3563
|
+
modal_kind: detection.modal_kind,
|
|
3564
|
+
});
|
|
3565
|
+
}
|
|
3566
|
+
else {
|
|
3567
|
+
consecutive = 1;
|
|
3568
|
+
modalPersistenceTracker.set(trackerKey, {
|
|
3569
|
+
description: desc,
|
|
3570
|
+
consecutive,
|
|
3571
|
+
firstSeenMs: Date.now(),
|
|
3572
|
+
modal_kind: detection.modal_kind,
|
|
3573
|
+
});
|
|
3574
|
+
}
|
|
3575
|
+
clickEffectVerification = {
|
|
3576
|
+
intent: closingIntent,
|
|
3577
|
+
modal_still_present: true,
|
|
3578
|
+
modal_description: detection.modal_description,
|
|
3579
|
+
modal_kind: detection.modal_kind,
|
|
3580
|
+
consecutive_persistences: consecutive,
|
|
3581
|
+
};
|
|
3582
|
+
// 0.1.54 E3 — close-intent override (unchanged).
|
|
3583
|
+
const isCloseAction = (isClickyAction && isClosingIntent) || isEscapeKeystroke;
|
|
3584
|
+
if (isCloseAction) {
|
|
3585
|
+
success = false;
|
|
3586
|
+
detail = `${detail} | verification: modal still present after intended-close action (${desc})`;
|
|
3587
|
+
await recordCycleIssue(cwd, {
|
|
3588
|
+
kind: "click_missed_target",
|
|
3589
|
+
selector: params.selector,
|
|
3590
|
+
coords: params.x != null && params.y != null ? [params.x, params.y] : undefined,
|
|
3591
|
+
description: closingIntent || `persisted modal: ${desc}`,
|
|
3592
|
+
modal_kind: detection.modal_kind,
|
|
3593
|
+
});
|
|
3594
|
+
}
|
|
3595
|
+
else if (consecutive >= 2) {
|
|
3596
|
+
// 0.1.55 F1 — same modal survived two interactions in a row
|
|
3597
|
+
// even though the agent didn't tag the click with a close
|
|
3598
|
+
// intent. That's the E2E #11 signature: many coordinate
|
|
3599
|
+
// clicks with no intent against a stuck OpenFolderDialog.
|
|
3600
|
+
await recordCycleIssue(cwd, {
|
|
3601
|
+
kind: "click_missed_target",
|
|
3602
|
+
selector: params.selector,
|
|
3603
|
+
coords: params.x != null && params.y != null ? [params.x, params.y] : undefined,
|
|
3604
|
+
description: `persisted modal across ${consecutive} consecutive interactions: ${desc}`,
|
|
3605
|
+
modal_kind: detection.modal_kind,
|
|
3606
|
+
});
|
|
3607
|
+
}
|
|
3608
|
+
// 0.1.55 F3 — at 3+ consecutive persistences, treat the modal
|
|
3609
|
+
// as actively stuck and write a modal_close_failed entry so
|
|
3610
|
+
// the cycle_issues_acknowledged gate fails even when the
|
|
3611
|
+
// agent never called codeloop_handle_modal.
|
|
3612
|
+
if (consecutive >= 3) {
|
|
3613
|
+
await recordCycleIssue(cwd, {
|
|
3614
|
+
kind: "modal_close_failed",
|
|
3615
|
+
modal_kind: detection.modal_kind ?? "custom",
|
|
3616
|
+
modal_description: desc,
|
|
3617
|
+
strategies_tried: [
|
|
3618
|
+
"raw_codeloop_interact_clicks_or_escape_x" + String(consecutive),
|
|
3619
|
+
],
|
|
3620
|
+
hwnd: detection.hwnd,
|
|
3621
|
+
});
|
|
3622
|
+
}
|
|
3623
|
+
// 0.1.55 F4 — HARD directive for the post-interact postscript.
|
|
3624
|
+
modalPersistenceDirective =
|
|
3625
|
+
`\n\n[CodeLoop F4] HARD: A ${detection.modal_kind ?? "modal"} dialog (${desc}) is STILL present after this interaction ` +
|
|
3626
|
+
`(${consecutive} consecutive interactions have not cleared it). ` +
|
|
3627
|
+
`Stop sending raw clicks / Escape keystrokes against it and call codeloop_handle_modal with decision: "cancel" or "dismiss" — ` +
|
|
3628
|
+
`the multi-strategy ladder (Escape → Alt+F4 → UIA Invoke "Close" → EndDialog) handles file dialogs the keystroke path cannot. ` +
|
|
3629
|
+
`If codeloop_handle_modal returns escalation: "kill_window_required", call codeloop_kill_modal_window with the returned hwnd. ` +
|
|
3630
|
+
`Continuing to ignore this modal will fail the cycle_issues_acknowledged gate and block ready_for_review.`;
|
|
3631
|
+
}
|
|
3632
|
+
else {
|
|
3633
|
+
// Modal cleared — reset the tracker for this app so the next
|
|
3634
|
+
// appearance is counted from 1.
|
|
3635
|
+
modalPersistenceTracker.delete(trackerKey);
|
|
3636
|
+
}
|
|
3637
|
+
}
|
|
3638
|
+
catch { /* best-effort verification */ }
|
|
3639
|
+
}
|
|
3403
3640
|
// Log interaction result for post-recording analysis
|
|
3404
3641
|
const interactionEntry = {
|
|
3405
3642
|
timestamp: new Date().toISOString(),
|
|
@@ -3411,6 +3648,8 @@ Wait 1-2 seconds between interactions so video frames capture state changes.`, {
|
|
|
3411
3648
|
};
|
|
3412
3649
|
if (consoleErrors.length > 0)
|
|
3413
3650
|
interactionEntry.console_errors = consoleErrors;
|
|
3651
|
+
if (clickEffectVerification)
|
|
3652
|
+
interactionEntry.verification = clickEffectVerification;
|
|
3414
3653
|
try {
|
|
3415
3654
|
const activeIds = vr.getActiveRecordingIds();
|
|
3416
3655
|
if (activeIds.length > 0) {
|
|
@@ -3428,7 +3667,17 @@ Wait 1-2 seconds between interactions so video frames capture state changes.`, {
|
|
|
3428
3667
|
}
|
|
3429
3668
|
}
|
|
3430
3669
|
catch { /* best-effort logging */ }
|
|
3431
|
-
|
|
3670
|
+
// 0.1.55 F4 — surface the modal-persistence directive as a
|
|
3671
|
+
// side-channel so the postscript builder outside withAuth can
|
|
3672
|
+
// append it to the agent-visible response.
|
|
3673
|
+
const ret = {
|
|
3674
|
+
success,
|
|
3675
|
+
action,
|
|
3676
|
+
detail,
|
|
3677
|
+
};
|
|
3678
|
+
if (modalPersistenceDirective)
|
|
3679
|
+
ret._f4_directive = modalPersistenceDirective;
|
|
3680
|
+
return ret;
|
|
3432
3681
|
}, { tool: "codeloop_interact", cwd: resolveCwd(params), input: params });
|
|
3433
3682
|
// 0.1.51 H11 — Post-interact modal-awareness directive.
|
|
3434
3683
|
// After every codeloop_interact call we append a HARD reminder
|
|
@@ -3446,6 +3695,20 @@ Wait 1-2 seconds between interactions so video frames capture state changes.`, {
|
|
|
3446
3695
|
"(\"confirm\" to proceed / \"cancel\" to abort / \"dismiss\" to close), and " +
|
|
3447
3696
|
"(4) only then continue the planned journey. " +
|
|
3448
3697
|
"Do NOT skip modals \"to keep moving\" — an unhandled modal will block every subsequent click and the user_journey_evidence gate will block ready_for_review.";
|
|
3698
|
+
// 0.1.55 F4 — when desktop click-effect verification detected a
|
|
3699
|
+
// modal that has now persisted across one or more interactions, the
|
|
3700
|
+
// F4 directive is stronger than the soft H11 reminder. Append it
|
|
3701
|
+
// and remove the side-channel field from the JSON-serialised result
|
|
3702
|
+
// so the agent doesn't see internals.
|
|
3703
|
+
{
|
|
3704
|
+
const r = result;
|
|
3705
|
+
const f4 = r && typeof r === "object" ? r._f4_directive : undefined;
|
|
3706
|
+
if (f4) {
|
|
3707
|
+
postscript += f4;
|
|
3708
|
+
if (r)
|
|
3709
|
+
delete r._f4_directive;
|
|
3710
|
+
}
|
|
3711
|
+
}
|
|
3449
3712
|
// 0.1.52 C4 — Empty-state seeding directive. Heuristic runs against
|
|
3450
3713
|
// the manifest entry the agent claims to be exercising plus the
|
|
3451
3714
|
// recent interaction log; when the call looks like a row/cell
|
|
@@ -3520,6 +3783,51 @@ Returns: detected modal description + result of the chosen decision.`, {
|
|
|
3520
3783
|
: undefined,
|
|
3521
3784
|
};
|
|
3522
3785
|
}
|
|
3786
|
+
// 0.1.54 E1 — file dialogs get the multi-strategy close ladder.
|
|
3787
|
+
// Photometry-DB E2E #11 had three .NET 8 OpenFolderDialog instances
|
|
3788
|
+
// that ignored single-keystroke close attempts; the ladder runs
|
|
3789
|
+
// Escape → Alt+F4 → UIA Invoke "Close" → EndDialog and re-detects
|
|
3790
|
+
// between steps. If all 4 fail it surfaces a HARD escalation hint
|
|
3791
|
+
// pointing at codeloop_kill_modal_window.
|
|
3792
|
+
if ((params.decision === "cancel" || params.decision === "dismiss") &&
|
|
3793
|
+
detection.modal_kind === "file_dialog" &&
|
|
3794
|
+
detection.target_type === "desktop") {
|
|
3795
|
+
const { closeModalWithStrategies } = await import("./runners/modal_close_strategies.js");
|
|
3796
|
+
const { recordCycleIssue } = await import("./evidence/cycle_issues.js");
|
|
3797
|
+
const close = await closeModalWithStrategies({
|
|
3798
|
+
initial_detection: detection,
|
|
3799
|
+
app_name: params.app_name,
|
|
3800
|
+
cwd,
|
|
3801
|
+
});
|
|
3802
|
+
if (!close.closed) {
|
|
3803
|
+
await recordCycleIssue(cwd, {
|
|
3804
|
+
kind: "modal_close_failed",
|
|
3805
|
+
modal_kind: detection.modal_kind ?? "custom",
|
|
3806
|
+
modal_description: detection.modal_description,
|
|
3807
|
+
strategies_tried: close.strategies_tried.map((a) => a.strategy),
|
|
3808
|
+
hwnd: close.hwnd,
|
|
3809
|
+
});
|
|
3810
|
+
return {
|
|
3811
|
+
decision_taken: params.decision,
|
|
3812
|
+
detection,
|
|
3813
|
+
close_attempt: close,
|
|
3814
|
+
escalation: "kill_window_required",
|
|
3815
|
+
next_step: `HARD: All 4 close strategies failed (escape, alt_f4, uia_invoke_close, end_dialog). ` +
|
|
3816
|
+
`Call codeloop_kill_modal_window with hwnd="${close.hwnd ?? "<rerun_detect_modal>"}" ` +
|
|
3817
|
+
`to escalate to PostMessage(WM_CLOSE) → TerminateProcess. After kill-window succeeds, ` +
|
|
3818
|
+
`you MUST also call codeloop_start_recording again because the app process was terminated. ` +
|
|
3819
|
+
`This stuck-modal cycle has been written to cycle_issues.jsonl and the gate-check will require you to acknowledge it before reporting ready_for_review.`,
|
|
3820
|
+
};
|
|
3821
|
+
}
|
|
3822
|
+
return {
|
|
3823
|
+
decision_taken: params.decision,
|
|
3824
|
+
detection,
|
|
3825
|
+
close_attempt: close,
|
|
3826
|
+
escalation: "none",
|
|
3827
|
+
next_step: `Modal closed successfully via strategy "${close.strategies_tried.find((a) => a.success)?.strategy ?? "unknown"}". ` +
|
|
3828
|
+
`Continue with codeloop_interact against the application.`,
|
|
3829
|
+
};
|
|
3830
|
+
}
|
|
3523
3831
|
// For confirm / cancel / dismiss we delegate to codeloop_interact
|
|
3524
3832
|
// semantics by issuing a key press that maps to the right OS
|
|
3525
3833
|
// convention. dismiss ⇒ Escape, cancel ⇒ Escape (most modals
|
|
@@ -3539,6 +3847,53 @@ Returns: detected modal description + result of the chosen decision.`, {
|
|
|
3539
3847
|
content: withInitHint([{ type: "text", text: JSON.stringify(authResult, null, 2) }], resolveCwd(params)),
|
|
3540
3848
|
};
|
|
3541
3849
|
});
|
|
3850
|
+
// 0.1.54 E1 — codeloop_kill_modal_window: last-resort modal escalation
|
|
3851
|
+
server.tool("codeloop_kill_modal_window", TOOL_BOOTSTRAP + `Last-resort escalation to kill a stuck modal that codeloop_handle_modal could not close with its 4-strategy ladder (Escape → Alt+F4 → UIA Invoke "Close" → EndDialog).
|
|
3852
|
+
|
|
3853
|
+
This tool is ONLY for the case where codeloop_handle_modal returned escalation: "kill_window_required". It will:
|
|
3854
|
+
1. PostMessage(WM_CLOSE) to the specific HWND.
|
|
3855
|
+
2. Wait 2 s and re-detect.
|
|
3856
|
+
3. If the modal is still up, call TerminateProcess on the owning PID.
|
|
3857
|
+
|
|
3858
|
+
CRITICAL: TerminateProcess will kill the app you are recording. After this tool succeeds you MUST:
|
|
3859
|
+
- Call codeloop_stop_recording (the recording will be partial but evidence is preserved).
|
|
3860
|
+
- Rebuild + re-launch the app and call codeloop_start_recording again.
|
|
3861
|
+
- Resume the user-journey from the start of the affected flow.
|
|
3862
|
+
|
|
3863
|
+
This is a Windows-only tool. macOS / Linux modal handling does not need this escalation.`, {
|
|
3864
|
+
hwnd: z.string().describe("Win32 HWND captured by detectModal / codeloop_handle_modal. Pass the value from detection.hwnd or close_attempt.hwnd."),
|
|
3865
|
+
target_type: targetTypeSchema.optional(),
|
|
3866
|
+
app_name: z.string().optional(),
|
|
3867
|
+
project_dir: z.string().optional(),
|
|
3868
|
+
workspace_root: z.string().optional(),
|
|
3869
|
+
}, async (params) => {
|
|
3870
|
+
const authResult = await withAuth(async () => {
|
|
3871
|
+
const { killModalWindow } = await import("./runners/modal_close_strategies.js");
|
|
3872
|
+
const { recordCycleIssue } = await import("./evidence/cycle_issues.js");
|
|
3873
|
+
const cwd = resolveCwd(params);
|
|
3874
|
+
const result = await killModalWindow({
|
|
3875
|
+
hwnd: params.hwnd,
|
|
3876
|
+
app_name: params.app_name,
|
|
3877
|
+
cwd,
|
|
3878
|
+
});
|
|
3879
|
+
if (result.steps.some((s) => s.step === "terminate_process" && s.success)) {
|
|
3880
|
+
await recordCycleIssue(cwd, {
|
|
3881
|
+
kind: "app_restart_during_recording",
|
|
3882
|
+
reason: "TerminateProcess used to clear stuck modal — recording will be partial",
|
|
3883
|
+
hwnd: params.hwnd,
|
|
3884
|
+
});
|
|
3885
|
+
}
|
|
3886
|
+
return {
|
|
3887
|
+
...result,
|
|
3888
|
+
next_step: result.closed
|
|
3889
|
+
? "Modal cleared. Call codeloop_stop_recording (current recording is partial), then rebuild + relaunch the app + codeloop_start_recording before resuming the user-journey."
|
|
3890
|
+
: "WARNING: even after PostMessage(WM_CLOSE) + TerminateProcess the modal is still detected. The host UI may be wedged at the OS level. Consider rebooting the workstation or escalating to manual intervention.",
|
|
3891
|
+
};
|
|
3892
|
+
}, { tool: "codeloop_kill_modal_window", cwd: resolveCwd(params), input: params });
|
|
3893
|
+
return {
|
|
3894
|
+
content: withInitHint([{ type: "text", text: JSON.stringify(authResult, null, 2) }], resolveCwd(params)),
|
|
3895
|
+
};
|
|
3896
|
+
});
|
|
3542
3897
|
// ── codeloop_init_project ────────────────────────────────────────
|
|
3543
3898
|
server.tool("codeloop_init_project", TOOL_BOOTSTRAP + `Initialize CodeLoop in a project that hasn't been set up yet. Creates
|
|
3544
3899
|
\`.codeloop/config.json\`, agent rules, MCP config, \`artifacts/\`, and
|