npm - codeloop-mcp-server - Versions diffs - 0.1.71 → 0.1.72 - Mend

codeloop-mcp-server 0.1.71 → 0.1.72

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

package/dist/auth/critical_floors.d.ts.map +1 -1
package/dist/auth/critical_floors.js +4 -0
package/dist/auth/critical_floors.js.map +1 -1
package/dist/index.d.ts.map +1 -1
package/dist/index.js +36 -3
package/dist/index.js.map +1 -1
package/dist/tools/verify.d.ts +67 -1
package/dist/tools/verify.d.ts.map +1 -1
package/dist/tools/verify.js +153 -1
package/dist/tools/verify.js.map +1 -1
package/package.json +1 -1

package/dist/auth/critical_floors.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"critical_floors.d.ts","sourceRoot":"","sources":["../../src/auth/critical_floors.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;GA2BG;AAEH,MAAM,WAAW,aAAa;IAC5B,4DAA4D;IAC5D,WAAW,EAAE,MAAM,CAAC;IACpB,wDAAwD;IACxD,MAAM,EAAE,MAAM,CAAC;CAChB;AAED;;;;;;;;;;;;GAYG;AACH,eAAO,MAAM,eAAe,EAAE,aAAa,~~EAuH1C~~,CAAC"}
1	+ {"version":3,"file":"critical_floors.d.ts","sourceRoot":"","sources":["../../src/auth/critical_floors.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;GA2BG;AAEH,MAAM,WAAW,aAAa;IAC5B,4DAA4D;IAC5D,WAAW,EAAE,MAAM,CAAC;IACpB,wDAAwD;IACxD,MAAM,EAAE,MAAM,CAAC;CAChB;AAED;;;;;;;;;;;;GAYG;AACH,eAAO,MAAM,eAAe,EAAE,aAAa,EA4H1C,CAAC"}

package/dist/auth/critical_floors.js CHANGED Viewed

@@ -136,5 +136,9 @@ export const CRITICAL_FLOORS = [
         min_version: "0.1.67",
         reason: "Mobile real-device E2E + read-only audit mode + nested native builds — three gaps that left Flutter/native mobile and 'just-tell-me-the-problems' workflows underserved. (1) DEVICE-DRIVEN INTERACTIVE TESTING: 0.1.66 stopped `flutter test integration_test/` from hanging when no device was booted, but only via a 6-min timeout, and NOTHING ever pushed the agent to OPEN the Android emulator / iOS simulator the user actually tests on — so real interactive verification (typing in the app, exercising native channels) never happened. 0.1.67 adds a fast cross-platform device-readiness probe (adb devices / xcrun simctl list booted, each wrapped in the 0.1.66 process-tree-killable 15s timeout, degrading to 'no device' when the CLI is absent): the Flutter integration runner now probes FIRST and, when nothing is booted, SKIPS the command (no 6-min stall) and returns a HARD directive with copy-paste commands telling the agent to boot an Android emulator AND/OR iOS simulator (and to exercise BOTH when the project targets both — the user uses Android + Xcode simulators); codeloop_start_recording runs the same probe for android_emulator/ios_simulator targets and attaches the open-simulator directive instead of failing cryptically inside adb/simctl. (2) ANALYZING IN-APP AI ANSWERS: codeloop_interact gains a `get_text` action (browser: Playwright textContent of a selector or the page body; android_emulator: uiautomator-hierarchy text extraction so the agent can read a Flutter/native AI chatbot's answer off a booted emulator without OCR; iOS: screenshot+vision) plus an `expect_contains` assertion, and the AI-prompt depth tip now instructs the agent to READ BACK and verify each chatbot answer instead of just submitting prompts — so 'type into the chatbox and analyze the AI's answer' is a first-class, evidence-logged flow. (3) READ-ONLY AUDIT MODE: when the user says 'do NOT modify my code, just list the problems', the agent can now set mode='audit' on codeloop_verify/diagnose/gate_check (persisted per session in .codeloop/agent_mode.json, also settable via config.agent_mode) — CodeLoop still runs every verification and returns the full findings, but its responses STOP pushing the auto-fix loop (no 'EXECUTE REPAIR TASKS NOW', no 'MANDATORY AUTO-FIX LOOP', no continue_fixing chase) and instead instruct the agent to present an audit report and stop, finally letting CodeLoop OBEY a no-modification request. ALSO in 0.1.67: native iOS/Android builds now discover NESTED project dirs — runNativeBuild walks up to 2 levels for ios/*.xcodeproj|*.xcworkspace and android/(app/)build.gradle(.kts) and builds in that dir (RN/Capacitor/monorepo layouts were DETECTED as xcode/android but then skipped with 'No project found'), and codeloop_capture_screenshot accepts target_type='android_emulator'|'ios_simulator' to capture the booted emulator/simulator via adb/simctl instead of the host desktop. Cross-platform: web verify+evidence works on Windows+macOS, Android on both, iOS on macOS (gracefully skipped off-Mac), Cursor and Claude Code identical.",
     },
+    {
+        min_version: "0.1.72",
+        reason: "codeloop_verify ran SILENT for minutes and looked frozen — so users INTERRUPTED it mid-build, orphaning gradle/gen_snapshot + flutter assemble processes. After the 0.1.71 mid-run-stall fix, the WedCheese hang was finally a real, BOUNDED long-running step (not a leak): a scope:'full' verify on a Flutter project also builds the native android/ host, and `gradle assembleDebug` triggers a CPU-intensive multi-arch AOT compile (gen_snapshot) that legitimately takes several minutes. But codeloop_verify is a SINGLE blocking MCP call that buffers all output and returns ONCE at the end — no progress is streamed — so the tool-call card sat silent past the 3-minute mark and the user reasonably read it as a freeze, hit cancel, and left orphaned native-build processes behind (which then contended for the next run). 0.1.72 streams live progress via MCP notifications/progress (rendered on Cursor's tool-call card since 1.7.39): a 10s heartbeat plus a per-phase boundary marker emit the CURRENT phase (e.g. 'Building Android app (gradle + AOT/gen_snapshot)'), elapsed mm:ss, and a rough ETA / honest 'still running (longer than expected)' once past the estimate — so even a 7-minute native build visibly keeps ticking instead of looking hung. The progress value is strictly monotonic per the MCP spec and the bar's total is clamped above elapsed so it never sticks at 100% before verify returns; the heartbeat interval is unref()'d and cleared on return so it can never hold the process open or fire against a stale token, and it only emits when the client requested progress (passed a progressToken) — otherwise verify behaves exactly as before. sendNotification is fire-and-forget and fully swallowed, so a transport hiccup can never fail or slow the verify itself. CROSS-CLIENT SAFETY: progress is gated per client — Cursor (and other spec-compliant clients: VS Code / Windsurf / Cline / generic SDK clients) get it, but Claude Code is explicitly EXCLUDED because as of 2026 it neither renders notifications/progress NOR tolerates it — emitting progress tears down its stdio transport and respawns the MCP server mid-run (anthropics/claude-code #47765 / #53617), and Claude Code DOES send a progressToken, so a naive 'honor the token' implementation would have BROKEN CodeLoop on Claude Code. progressClientDecision suppresses emission for any client whose name contains 'claude' (overridable via CODELOOP_PROGRESS=on|off), and runVerify never emits a terminal progress>=total notification and clears its heartbeat BEFORE returning, so progress can never race the tool response (the other half of the #47765 unknown-token hazard). Works identically on Windows/macOS/Linux for every app type (Windows/macOS .NET desktop, Flutter mobile, native iOS/Android, web) since the phase markers mirror the per-platform runner gating. Anyone below 0.1.72 either sees a silent multi-minute verify on slow native builds (Cursor) and is liable to interrupt it, or — had progress shipped without the client gate — would have hit transport crashes on Claude Code.",
+    },
 ];
 //# sourceMappingURL=critical_floors.js.map

package/dist/auth/critical_floors.js.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"critical_floors.js","sourceRoot":"","sources":["../../src/auth/critical_floors.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;GA2BG;AASH;;;;;;;;;;;;GAYG;AACH,MAAM,CAAC,MAAM,eAAe,GAAoB;IAC9C;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EACJ,ufAAuf;KAC1f;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EAAE,4hBAA4hB;KACriB;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EAAE,yvBAAyvB;KAClwB;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EACJ,kxBAAkxB;KACrxB;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EACJ,0/BAA0/B;KAC7/B;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EACJ,0iCAA0iC;KAC7iC;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EACJ,gqDAAgqD;KACnqD;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EACJ,uqDAAuqD;KAC1qD;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EACJ,w+EAAw+E;KAC3+E;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EACJ,88EAA88E;KACj9E;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EACJ,uiEAAuiE;KAC1iE;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EACJ,u/DAAu/D;KAC1/D;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EACJ,k3DAAk3D;KACr3D;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EACJ,oiDAAoiD;KACviD;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EACJ,g3CAAg3C;KACn3C;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EACJ,srFAAsrF;KACzrF;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EACJ,gwEAAgwE;KACnwE;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EACJ,84EAA84E;KACj5E;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EACJ,gwEAAgwE;KACnwE;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EACJ,s/DAAs/D;KACz/D;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EACJ,g8FAAg8F;KACn8F;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EACJ,09DAA09D;KAC79D;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EACJ,u4DAAu4D;KAC14D;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EACJ,u/FAAu/F;KAC1/F;CACF,CAAC"}
1	+ {"version":3,"file":"critical_floors.js","sourceRoot":"","sources":["../../src/auth/critical_floors.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;GA2BG;AASH;;;;;;;;;;;;GAYG;AACH,MAAM,CAAC,MAAM,eAAe,GAAoB;IAC9C;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EACJ,ufAAuf;KAC1f;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EAAE,4hBAA4hB;KACriB;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EAAE,yvBAAyvB;KAClwB;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EACJ,kxBAAkxB;KACrxB;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EACJ,0/BAA0/B;KAC7/B;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EACJ,0iCAA0iC;KAC7iC;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EACJ,gqDAAgqD;KACnqD;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EACJ,uqDAAuqD;KAC1qD;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EACJ,w+EAAw+E;KAC3+E;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EACJ,88EAA88E;KACj9E;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EACJ,uiEAAuiE;KAC1iE;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EACJ,u/DAAu/D;KAC1/D;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EACJ,k3DAAk3D;KACr3D;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EACJ,oiDAAoiD;KACviD;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EACJ,g3CAAg3C;KACn3C;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EACJ,srFAAsrF;KACzrF;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EACJ,gwEAAgwE;KACnwE;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EACJ,84EAA84E;KACj5E;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EACJ,gwEAAgwE;KACnwE;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EACJ,s/DAAs/D;KACz/D;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EACJ,g8FAAg8F;KACn8F;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EACJ,09DAA09D;KAC79D;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EACJ,u4DAAu4D;KAC14D;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EACJ,u/FAAu/F;KAC1/F;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EACJ,w+FAAw+F;KAC3+F;CACF,CAAC"}

package/dist/index.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":";~~AA6lBA~~,wBAAgB,eAAe,CAAC,GAAG,EAAE,MAAM,GAAG,IAAI,CAEjD"}
1	+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":";AAgmBA,wBAAgB,eAAe,CAAC,GAAG,EAAE,MAAM,GAAG,IAAI,CAEjD"}

package/dist/index.js CHANGED Viewed

@@ -608,20 +608,53 @@ Returns: structured report with pass/fail counts, artifact paths, and next-step
     tasks_completed: z.array(z.string()).optional().describe("0.1.52 C5 — free-text titles of the tasks the agent claims to have completed in this code change. Cross-checked against the change manifest produced by C1: every claim should map to >= 1 manifest entry and every manifest entry should map to >= 1 claim. Mismatches surface as warnings in the verify response and feed the change_coverage_evidence gate (C3)."),
     skip_tests: z.boolean().optional().describe("0.1.70 — Run every check EXCEPT the project's own test suite (flutter test / npm test / pytest / cargo test / playwright / maestro) and coverage. Use ONLY with the user's consent — when their suite is known-broken, hangs (a leaked Timer/StreamController/listener keeps the process alive after assertions pass), or is too slow to run every cycle. Analysis, build, backend, observability, screenshots, and design comparison still run. The required_tests_pass gate then BLOCKS ready_for_review until tests run (skip_tests:false) OR config.tests.waive_gate:true is set — so a skipped suite can never silently produce a 'Verified by CodeLoop' result. When a prior verify reports a hung/force-closed test runner ([CodeLoop ASK] note), ASK the user Y/N before setting this."),
     mode: z.string().optional().describe(AGENT_MODE_PARAM_DESC),
-}, async (params) => {
+}, async (params, extra) => {
     const cwd = resolveCwd(params);
     const explicitDir = params.project_dir || params.workspace_root;
     const cfg = explicitDir ? loadConfig(explicitDir) : config;
     const auditMode = resolveAgentMode({ cwd, paramMode: params.mode, configMode: cfg.agent_mode }) === "audit";
+    // 0.1.72 — Stream live progress so a multi-minute verify (native AOT
+    // build, slow test suite) doesn't look frozen in the chat. Cursor renders
+    // notifications/progress.message on the tool-call card. We only emit when
+    // (a) the client requested progress (it passed a progressToken in the
+    // request _meta) AND (b) the client is SAFE to stream to — Claude Code is
+    // NOT (it neither renders progress nor tolerates it: emitting tears down
+    // its stdio transport and respawns the server — anthropics/claude-code
+    // #47765 / #53617 — which would BREAK CodeLoop mid-run). progressClientDecision
+    // gates this (CODELOOP_PROGRESS=on|off overrides). When suppressed this is a
+    // no-op and verify behaves exactly as before; sendNotification is otherwise
+    // fire-and-forget and swallowed so a transport hiccup never fails/slows verify.
+    const progressToken = extra?._meta?.progressToken;
+    let clientName;
+    try {
+        clientName = server.server.getClientVersion?.()?.name;
+    }
+    catch {
+        clientName = undefined;
+    }
+    let lastProgress = 0;
     const result = await withAuth(async () => {
-        const { runVerify } = await import("./tools/verify.js");
+        const { runVerify, formatVerifyProgress, progressClientDecision } = await import("./tools/verify.js");
+        const progressSafe = progressClientDecision(clientName, process.env.CODELOOP_PROGRESS);
+        const onProgress = !progressSafe || progressToken === undefined
+            ? undefined
+            : (u) => {
+                const { progress, total, message } = formatVerifyProgress(u, lastProgress);
+                lastProgress = progress;
+                void extra
+                    .sendNotification({
+                    method: "notifications/progress",
+                    params: { progressToken, progress, total, message },
+                })
+                    .catch(() => { });
+            };
         const input = {
             scope: params.scope,
             platform: params.platform,
             tasks_completed: params.tasks_completed,
             skip_tests: params.skip_tests,
         };
-        const output = await runVerify(input, cfg, cwd);
+        const output = await runVerify(input, cfg, cwd, onProgress);
         await trackUsage(apiKey, "verification_run");
         return output;
     }, { tool: "codeloop_verify", cwd, input: params });