npm - codeloop-mcp-server - Versions diffs - 0.1.69 → 0.1.70 - Mend

codeloop-mcp-server 0.1.69 → 0.1.70

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

package/dist/auth/critical_floors.d.ts.map +1 -1
package/dist/auth/critical_floors.js +4 -0
package/dist/auth/critical_floors.js.map +1 -1
package/dist/index.js +2 -0
package/dist/index.js.map +1 -1
package/dist/runners/base.d.ts.map +1 -1
package/dist/runners/base.js +44 -17
package/dist/runners/base.js.map +1 -1
package/dist/runners/generic.d.ts.map +1 -1
package/dist/runners/generic.js +10 -1
package/dist/runners/generic.js.map +1 -1
package/dist/runners/maestro.d.ts.map +1 -1
package/dist/runners/maestro.js +4 -1
package/dist/runners/maestro.js.map +1 -1
package/dist/runners/playwright.d.ts.map +1 -1
package/dist/runners/playwright.js +7 -1
package/dist/runners/playwright.js.map +1 -1
package/dist/runners/python_tests.d.ts.map +1 -1
package/dist/runners/python_tests.js +6 -1
package/dist/runners/python_tests.js.map +1 -1
package/dist/runners/rust_tests.d.ts.map +1 -1
package/dist/runners/rust_tests.js +5 -1
package/dist/runners/rust_tests.js.map +1 -1
package/dist/runners/test_watchdog.d.ts +73 -0
package/dist/runners/test_watchdog.d.ts.map +1 -0
package/dist/runners/test_watchdog.js +73 -0
package/dist/runners/test_watchdog.js.map +1 -0
package/dist/tools/gate_check.js +18 -0
package/dist/tools/gate_check.js.map +1 -1
package/dist/tools/verify.d.ts.map +1 -1
package/dist/tools/verify.js +151 -38
package/dist/tools/verify.js.map +1 -1
package/package.json +1 -1

package/dist/auth/critical_floors.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"critical_floors.d.ts","sourceRoot":"","sources":["../../src/auth/critical_floors.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;GA2BG;AAEH,MAAM,WAAW,aAAa;IAC5B,4DAA4D;IAC5D,WAAW,EAAE,MAAM,CAAC;IACpB,wDAAwD;IACxD,MAAM,EAAE,MAAM,CAAC;CAChB;AAED;;;;;;;;;;;;GAYG;AACH,eAAO,MAAM,eAAe,EAAE,aAAa,~~EA6G1C~~,CAAC"}
1	+ {"version":3,"file":"critical_floors.d.ts","sourceRoot":"","sources":["../../src/auth/critical_floors.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;GA2BG;AAEH,MAAM,WAAW,aAAa;IAC5B,4DAA4D;IAC5D,WAAW,EAAE,MAAM,CAAC;IACpB,wDAAwD;IACxD,MAAM,EAAE,MAAM,CAAC;CAChB;AAED;;;;;;;;;;;;GAYG;AACH,eAAO,MAAM,eAAe,EAAE,aAAa,EAkH1C,CAAC"}

package/dist/auth/critical_floors.js CHANGED Viewed

@@ -116,6 +116,10 @@ export const CRITICAL_FLOORS = [
         min_version: "0.1.66",
         reason: "codeloop_verify could HANG FOREVER on Flutter projects — the three primary Flutter runners (flutter analyze, flutter test, flutter test integration_test/) all called runCommand WITHOUT a timeout, so any one of them could block the whole verify (and the MCP response, which only returns when the entire sequential pipeline finishes) indefinitely. The dominant trigger: `flutter test integration_test/` is launched with NO `-d` device flag and NO device probe, so when no emulator/simulator/device is booted the Flutter CLI waits forever for one — the user saw verify 'hang at here for 10 mins' on a fresh Flutter project with no device. `flutter analyze` could likewise stall on an implicit `pub get` over a slow/blocked network, and a brand-new machine could even block on Flutter's first-run analytics consent prompt. (The later deep-internal duplicates static_analysis `flutter analyze` and coverage `flutter test --coverage` already had 4-/5-min caps, but they never ran because the un-timed primary trio came first.) 0.1.66 fixes it: every primary Flutter runner now passes a wall-clock timeout (analyze 4 min, test 5 min, integration_test 6 min) — on expiry runCommand SIGKILLs and returns exit_code 124, which the runner surfaces as an explicit FAILED result with a clear note ('killed after N min … waiting on a cold emulator/device or a stalled pub get — boot a device or run flutter pub get, then re-verify') instead of a silent freeze — and all three now run with CI=true so Flutter never blocks on its first-run analytics/consent prompt. CRUCIALLY, 0.1.66 also fixes the runCommand watchdog itself: commands were spawned via `shell: true`, so killing the direct child only reaped the `/bin/sh -c …` wrapper, NOT the grandchild it forked (flutter is a bash script that spawns dart) — the grandchild kept the stdout pipe open so the `close` event never fired and the promise hung forever, defeating the timeout. The watchdog now makes the timed child a process-group leader (detached on POSIX) and SIGKILLs the ENTIRE group via process.kill(-pid) (taskkill /T /F on Windows), with a 1.5s safety net that force-settles the promise even if `close` is somehow withheld. Without this, every timeout added above would have been silently ineffective on real Flutter (and any shell-wrapped) commands.",
     },
+    {
+        min_version: "0.1.70",
+        reason: "codeloop_verify could HANG INDEFINITELY on a hanging/leaky test suite for EVERY non-Flutter stack — the 0.1.66–0.1.69 fixes only ever covered Flutter. Pre-0.1.70, the generic/node runner (`npm test` → jest/vitest/mocha), the python (pytest/unittest/django), the rust (`cargo test`), the playwright, and the Maestro runners ALL called runCommand with NO timeout at all, and the native build runner ran `xcodebuild`, `gradle assembleDebug` + `gradle test`, and `dotnet build` + `dotnet test` untimed too. So a Node/Jest suite that leaks an open handle (a DB pool, socket, server, or unref-less setInterval — Jest's own 'did not exit one second after the test run' warning), a pytest non-daemon thread, a stuck Gradle daemon, a codesign/keychain prompt, or a hung Maestro device wait would block the WHOLE verify (and the MCP response) FOREVER. 0.1.70 makes verify hang-proof on every OS (Windows/macOS/Linux), every agent (Cursor/Claude Code), and every app type (website, Node, Python, Rust, native iOS/Android, Windows/macOS .NET, Flutter): (1) every test runner now has a hard wall-clock cap (10 min tests, 15 min native builds, 1 min probes) so a true hang is always BOUNDED, not infinite; (2) a SILENCE-BASED completion-settle watchdog (generalised from the Flutter fix) arms on each runner's end-of-run marker (jest 'Ran all test suites'/'Tests:', vitest 'Test Files', mocha 'N passing', node:test TAP, pytest's '=== N passed in ===', cargo 'test result:', dotnet 'Test Run Successful./Failed.') and force-closes the process tree once output goes QUIET for the grace — recovering the real pass/fail in ~30s instead of burning the full timeout. It RESETS the grace on every subsequent output chunk, so monorepo runs (turbo/nx/lerna that print one summary PER PACKAGE) are never killed mid-stream; only true post-completion silence trips it. The settle force-close now also engages the POSIX process group (previously only timeouts did), so a leaked grandchild (npm→node→jest, flutter→dart) is actually reaped instead of orphaned. (3) A new skip_tests control (per-call `skip_tests:true` on codeloop_verify, or persistent config.tests.run=false) runs every check EXCEPT the project's own test suite + coverage — for suites that are known-broken, leak, or are too slow to run every cycle — and crucially now reaches the NATIVE test suites embedded in the build runner (`gradle test`, `dotnet test`) too, while the build itself still runs. Skipping is HONEST: the required_tests_pass gate BLOCKS ready_for_review (a skipped suite can never silently produce a 'Verified by CodeLoop' result) unless explicitly waived via config.tests.waive_gate. (4) verify now distinguishes a TRUE timeout (exit 124 — pushes the agent to ASK the user Y/N to skip or fix) from an AUTO-RECOVERED leak (completed but didn't exit — an informational note telling the developer to dispose the resource in tearDown(), no skip needed). Anyone below 0.1.70 still risks an indefinite verify freeze on a non-Flutter hanging test suite.",
+    },
     {
         min_version: "0.1.69",
         reason: "codeloop_verify STILL hung on real Flutter projects even after the 0.1.68 dedup (recurrence #2, confirmed from a live 0.1.68 WedCheese run) — two gaps remained. (A) DUPLICATE TEST SUITE: when a project has NO integration_test/ directory, the integration runner fell back to `flutter test test/`, which re-ran the EXACT same suite the primary flutter_test runner had just executed. On a normal project that only wasted time; on a project whose test/ suite hangs it burned a SECOND full timeout back-to-back with the first. The integration runner now SKIPS entirely when there is no integration_test/ directory (golden/widget tests in test/ are already covered by the flutter_test runner and need no device); it only runs for a real integration_test/ dir on a booted device. (B) LEAKED-ISOLATE HANG: the dominant real cause on WedCheese was a non-terminating TEST — the test body passes (assertions print) but the Dart isolate never exits because a Timer/Timer.periodic, an unclosed StreamController, or a Firebase/Firestore listener was never disposed, so `flutter test` hangs until the wall-clock timeout SIGKILLs it (exit 124) — wasting the FULL 5-6 min EVERY run. 0.1.69 adds a completion-settle watchdog to runCommand: when the streamed output matches a strong end-of-work marker (`flutter test` prints 'All tests passed!' / 'Some tests failed.' exactly once at the end) but the process then fails to exit, CodeLoop force-closes the process tree ~30s after the marker and resolves exit_code 0 with the REAL parsed pass/fail plus a note naming the leaked-resource cause and pointing the developer to dispose it in tearDown() — instead of blocking the MCP response for the entire timeout. A natural exit within the grace window is unaffected, so well-behaved suites are unchanged. The Flutter timeout note is also rewritten to name the non-terminating-test root cause first. NOTE: CodeLoop cannot FIX the leaked resource in the user's test — it now recovers fast and tells the developer exactly what to fix.",

package/dist/auth/critical_floors.js.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"critical_floors.js","sourceRoot":"","sources":["../../src/auth/critical_floors.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;GA2BG;AASH;;;;;;;;;;;;GAYG;AACH,MAAM,CAAC,MAAM,eAAe,GAAoB;IAC9C;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EACJ,ufAAuf;KAC1f;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EAAE,4hBAA4hB;KACriB;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EAAE,yvBAAyvB;KAClwB;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EACJ,kxBAAkxB;KACrxB;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EACJ,0/BAA0/B;KAC7/B;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EACJ,0iCAA0iC;KAC7iC;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EACJ,gqDAAgqD;KACnqD;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EACJ,uqDAAuqD;KAC1qD;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EACJ,w+EAAw+E;KAC3+E;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EACJ,88EAA88E;KACj9E;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EACJ,uiEAAuiE;KAC1iE;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EACJ,u/DAAu/D;KAC1/D;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EACJ,k3DAAk3D;KACr3D;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EACJ,oiDAAoiD;KACviD;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EACJ,g3CAAg3C;KACn3C;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EACJ,srFAAsrF;KACzrF;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EACJ,gwEAAgwE;KACnwE;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EACJ,84EAA84E;KACj5E;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EACJ,gwEAAgwE;KACnwE;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EACJ,09DAA09D;KAC79D;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EACJ,u4DAAu4D;KAC14D;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EACJ,u/FAAu/F;KAC1/F;CACF,CAAC"}
1	+ {"version":3,"file":"critical_floors.js","sourceRoot":"","sources":["../../src/auth/critical_floors.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;GA2BG;AASH;;;;;;;;;;;;GAYG;AACH,MAAM,CAAC,MAAM,eAAe,GAAoB;IAC9C;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EACJ,ufAAuf;KAC1f;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EAAE,4hBAA4hB;KACriB;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EAAE,yvBAAyvB;KAClwB;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EACJ,kxBAAkxB;KACrxB;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EACJ,0/BAA0/B;KAC7/B;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EACJ,0iCAA0iC;KAC7iC;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EACJ,gqDAAgqD;KACnqD;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EACJ,uqDAAuqD;KAC1qD;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EACJ,w+EAAw+E;KAC3+E;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EACJ,88EAA88E;KACj9E;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EACJ,uiEAAuiE;KAC1iE;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EACJ,u/DAAu/D;KAC1/D;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EACJ,k3DAAk3D;KACr3D;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EACJ,oiDAAoiD;KACviD;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EACJ,g3CAAg3C;KACn3C;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EACJ,srFAAsrF;KACzrF;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EACJ,gwEAAgwE;KACnwE;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EACJ,84EAA84E;KACj5E;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EACJ,gwEAAgwE;KACnwE;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EACJ,g8FAAg8F;KACn8F;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EACJ,09DAA09D;KAC79D;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EACJ,u4DAAu4D;KAC14D;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EACJ,u/FAAu/F;KAC1/F;CACF,CAAC"}

package/dist/index.js CHANGED Viewed

@@ -606,6 +606,7 @@ Returns: structured report with pass/fail counts, artifact paths, and next-step
     project_dir: z.string().optional().describe("Absolute path to the project root. Defaults to CODELOOP_PROJECT_DIR env var or auto-discovered project directory. MUST be an actual project folder — passing the user's home directory is rejected. If your IDE launches the MCP server from the wrong cwd (common on Windows where Cursor uses C:\\Users\\<name> as cwd), set CODELOOP_PROJECT_DIR or pass this param explicitly."),
     workspace_root: z.string().optional().describe("[Alias for project_dir] Same semantics; accepted because many agents reach for this conventional name. Pass either `project_dir` OR `workspace_root` — they're equivalent."),
     tasks_completed: z.array(z.string()).optional().describe("0.1.52 C5 — free-text titles of the tasks the agent claims to have completed in this code change. Cross-checked against the change manifest produced by C1: every claim should map to >= 1 manifest entry and every manifest entry should map to >= 1 claim. Mismatches surface as warnings in the verify response and feed the change_coverage_evidence gate (C3)."),
+    skip_tests: z.boolean().optional().describe("0.1.70 — Run every check EXCEPT the project's own test suite (flutter test / npm test / pytest / cargo test / playwright / maestro) and coverage. Use ONLY with the user's consent — when their suite is known-broken, hangs (a leaked Timer/StreamController/listener keeps the process alive after assertions pass), or is too slow to run every cycle. Analysis, build, backend, observability, screenshots, and design comparison still run. The required_tests_pass gate then BLOCKS ready_for_review until tests run (skip_tests:false) OR config.tests.waive_gate:true is set — so a skipped suite can never silently produce a 'Verified by CodeLoop' result. When a prior verify reports a hung/force-closed test runner ([CodeLoop ASK] note), ASK the user Y/N before setting this."),
     mode: z.string().optional().describe(AGENT_MODE_PARAM_DESC),
 }, async (params) => {
     const cwd = resolveCwd(params);
@@ -618,6 +619,7 @@ Returns: structured report with pass/fail counts, artifact paths, and next-step
             scope: params.scope,
             platform: params.platform,
             tasks_completed: params.tasks_completed,
+            skip_tests: params.skip_tests,
         };
         const output = await runVerify(input, cfg, cwd);
         await trackUsage(apiKey, "verification_run");