codeloop-mcp-server 0.1.69 → 0.1.71

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. package/dist/auth/critical_floors.d.ts.map +1 -1
  2. package/dist/auth/critical_floors.js +8 -0
  3. package/dist/auth/critical_floors.js.map +1 -1
  4. package/dist/index.js +2 -0
  5. package/dist/index.js.map +1 -1
  6. package/dist/runners/base.d.ts +3 -0
  7. package/dist/runners/base.d.ts.map +1 -1
  8. package/dist/runners/base.js +142 -22
  9. package/dist/runners/base.js.map +1 -1
  10. package/dist/runners/flutter.d.ts.map +1 -1
  11. package/dist/runners/flutter.js +10 -4
  12. package/dist/runners/flutter.js.map +1 -1
  13. package/dist/runners/flutter_cache.d.ts +4 -0
  14. package/dist/runners/flutter_cache.d.ts.map +1 -1
  15. package/dist/runners/flutter_cache.js +18 -1
  16. package/dist/runners/flutter_cache.js.map +1 -1
  17. package/dist/runners/generic.d.ts.map +1 -1
  18. package/dist/runners/generic.js +10 -1
  19. package/dist/runners/generic.js.map +1 -1
  20. package/dist/runners/maestro.d.ts.map +1 -1
  21. package/dist/runners/maestro.js +4 -1
  22. package/dist/runners/maestro.js.map +1 -1
  23. package/dist/runners/playwright.d.ts.map +1 -1
  24. package/dist/runners/playwright.js +7 -1
  25. package/dist/runners/playwright.js.map +1 -1
  26. package/dist/runners/python_tests.d.ts.map +1 -1
  27. package/dist/runners/python_tests.js +6 -1
  28. package/dist/runners/python_tests.js.map +1 -1
  29. package/dist/runners/rust_tests.d.ts.map +1 -1
  30. package/dist/runners/rust_tests.js +5 -1
  31. package/dist/runners/rust_tests.js.map +1 -1
  32. package/dist/runners/test_watchdog.d.ts +73 -0
  33. package/dist/runners/test_watchdog.d.ts.map +1 -0
  34. package/dist/runners/test_watchdog.js +73 -0
  35. package/dist/runners/test_watchdog.js.map +1 -0
  36. package/dist/tools/gate_check.js +18 -0
  37. package/dist/tools/gate_check.js.map +1 -1
  38. package/dist/tools/verify.d.ts.map +1 -1
  39. package/dist/tools/verify.js +165 -38
  40. package/dist/tools/verify.js.map +1 -1
  41. package/package.json +1 -1
@@ -1 +1 @@
1
- {"version":3,"file":"critical_floors.d.ts","sourceRoot":"","sources":["../../src/auth/critical_floors.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;GA2BG;AAEH,MAAM,WAAW,aAAa;IAC5B,4DAA4D;IAC5D,WAAW,EAAE,MAAM,CAAC;IACpB,wDAAwD;IACxD,MAAM,EAAE,MAAM,CAAC;CAChB;AAED;;;;;;;;;;;;GAYG;AACH,eAAO,MAAM,eAAe,EAAE,aAAa,EA6G1C,CAAC"}
1
+ {"version":3,"file":"critical_floors.d.ts","sourceRoot":"","sources":["../../src/auth/critical_floors.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;GA2BG;AAEH,MAAM,WAAW,aAAa;IAC5B,4DAA4D;IAC5D,WAAW,EAAE,MAAM,CAAC;IACpB,wDAAwD;IACxD,MAAM,EAAE,MAAM,CAAC;CAChB;AAED;;;;;;;;;;;;GAYG;AACH,eAAO,MAAM,eAAe,EAAE,aAAa,EAuH1C,CAAC"}
@@ -116,6 +116,14 @@ export const CRITICAL_FLOORS = [
116
116
  min_version: "0.1.66",
117
117
  reason: "codeloop_verify could HANG FOREVER on Flutter projects — the three primary Flutter runners (flutter analyze, flutter test, flutter test integration_test/) all called runCommand WITHOUT a timeout, so any one of them could block the whole verify (and the MCP response, which only returns when the entire sequential pipeline finishes) indefinitely. The dominant trigger: `flutter test integration_test/` is launched with NO `-d` device flag and NO device probe, so when no emulator/simulator/device is booted the Flutter CLI waits forever for one — the user saw verify 'hang at here for 10 mins' on a fresh Flutter project with no device. `flutter analyze` could likewise stall on an implicit `pub get` over a slow/blocked network, and a brand-new machine could even block on Flutter's first-run analytics consent prompt. (The later deep-internal duplicates static_analysis `flutter analyze` and coverage `flutter test --coverage` already had 4-/5-min caps, but they never ran because the un-timed primary trio came first.) 0.1.66 fixes it: every primary Flutter runner now passes a wall-clock timeout (analyze 4 min, test 5 min, integration_test 6 min) — on expiry runCommand SIGKILLs and returns exit_code 124, which the runner surfaces as an explicit FAILED result with a clear note ('killed after N min … waiting on a cold emulator/device or a stalled pub get — boot a device or run flutter pub get, then re-verify') instead of a silent freeze — and all three now run with CI=true so Flutter never blocks on its first-run analytics/consent prompt. CRUCIALLY, 0.1.66 also fixes the runCommand watchdog itself: commands were spawned via `shell: true`, so killing the direct child only reaped the `/bin/sh -c …` wrapper, NOT the grandchild it forked (flutter is a bash script that spawns dart) — the grandchild kept the stdout pipe open so the `close` event never fired and the promise hung forever, defeating the timeout. The watchdog now makes the timed child a process-group leader (detached on POSIX) and SIGKILLs the ENTIRE group via process.kill(-pid) (taskkill /T /F on Windows), with a 1.5s safety net that force-settles the promise even if `close` is somehow withheld. Without this, every timeout added above would have been silently ineffective on real Flutter (and any shell-wrapped) commands.",
118
118
  },
119
+ {
120
+ min_version: "0.1.71",
121
+ reason: "codeloop_verify could still HANG for several MINUTES on a Flutter (and any compact-reporter) test suite that hangs MID-RUN — the 0.1.69/0.1.70 completion-settle watchdog could NOT catch it. Real case (WedCheese, on 0.1.70): one test isolate passed its assertions but leaked a Timer/StreamController/Firebase listener, so the isolate never exited and the WHOLE `flutter test --coverage` run froze at `+24 -5: …` while the compact reporter's elapsed clock ticked 00:16 → 05:54. The suite NEVER printed its end-of-run marker (`All tests passed!`/`Some tests failed.`), so the completion-settle watchdog never armed; and the ticking clock is CONTINUOUS output, so the silence-based grace never tripped either — only the 6-min hard cap eventually freed it (a multi-minute freeze the agent/user reads as a hang, and usually interrupts first). 0.1.71 adds a PROGRESS-STALL watchdog to runCommand: it tracks the reporter's progress signature with the volatile clock stripped (Flutter compact `+P -F ~S: desc`); while that signature is FROZEN it counts down a grace and force-closes the process tree, but ANY change (a test passed/failed, a new test started) RESETS the grace so a legitimately slow single test only delays, never falsely kills. Once the end-of-run marker prints it disables itself and hands off to the silence-based settle watchdog, so legitimate end-of-run work with frozen counts (e.g. `--coverage` writing lcov) is never killed. Wired into `flutter test --coverage` and `flutter test integration_test/` with a 90s stall grace (well above between-file compile pauses and Flutter's 30s default per-test timeout, a fraction of the 6-min cap). A mid-run stall is force-closed in ~90s with PARTIAL results, the suite is marked failed (never a false pass), and verify pushes a `[CodeLoop ASK]` directive telling the agent to ask the user (Y/N) to dispose the leaked resource in tearDown() or re-run with skip_tests:true. Anyone below 0.1.71 still risks a multi-minute verify freeze on a Flutter suite that hangs before it finishes.",
122
+ },
123
+ {
124
+ min_version: "0.1.70",
125
+ reason: "codeloop_verify could HANG INDEFINITELY on a hanging/leaky test suite for EVERY non-Flutter stack — the 0.1.66–0.1.69 fixes only ever covered Flutter. Pre-0.1.70, the generic/node runner (`npm test` → jest/vitest/mocha), the python (pytest/unittest/django), the rust (`cargo test`), the playwright, and the Maestro runners ALL called runCommand with NO timeout at all, and the native build runner ran `xcodebuild`, `gradle assembleDebug` + `gradle test`, and `dotnet build` + `dotnet test` untimed too. So a Node/Jest suite that leaks an open handle (a DB pool, socket, server, or unref-less setInterval — Jest's own 'did not exit one second after the test run' warning), a pytest non-daemon thread, a stuck Gradle daemon, a codesign/keychain prompt, or a hung Maestro device wait would block the WHOLE verify (and the MCP response) FOREVER. 0.1.70 makes verify hang-proof on every OS (Windows/macOS/Linux), every agent (Cursor/Claude Code), and every app type (website, Node, Python, Rust, native iOS/Android, Windows/macOS .NET, Flutter): (1) every test runner now has a hard wall-clock cap (10 min tests, 15 min native builds, 1 min probes) so a true hang is always BOUNDED, not infinite; (2) a SILENCE-BASED completion-settle watchdog (generalised from the Flutter fix) arms on each runner's end-of-run marker (jest 'Ran all test suites'/'Tests:', vitest 'Test Files', mocha 'N passing', node:test TAP, pytest's '=== N passed in ===', cargo 'test result:', dotnet 'Test Run Successful./Failed.') and force-closes the process tree once output goes QUIET for the grace — recovering the real pass/fail in ~30s instead of burning the full timeout. It RESETS the grace on every subsequent output chunk, so monorepo runs (turbo/nx/lerna that print one summary PER PACKAGE) are never killed mid-stream; only true post-completion silence trips it. The settle force-close now also engages the POSIX process group (previously only timeouts did), so a leaked grandchild (npm→node→jest, flutter→dart) is actually reaped instead of orphaned. (3) A new skip_tests control (per-call `skip_tests:true` on codeloop_verify, or persistent config.tests.run=false) runs every check EXCEPT the project's own test suite + coverage — for suites that are known-broken, leak, or are too slow to run every cycle — and crucially now reaches the NATIVE test suites embedded in the build runner (`gradle test`, `dotnet test`) too, while the build itself still runs. Skipping is HONEST: the required_tests_pass gate BLOCKS ready_for_review (a skipped suite can never silently produce a 'Verified by CodeLoop' result) unless explicitly waived via config.tests.waive_gate. (4) verify now distinguishes a TRUE timeout (exit 124 — pushes the agent to ASK the user Y/N to skip or fix) from an AUTO-RECOVERED leak (completed but didn't exit — an informational note telling the developer to dispose the resource in tearDown(), no skip needed). Anyone below 0.1.70 still risks an indefinite verify freeze on a non-Flutter hanging test suite.",
126
+ },
119
127
  {
120
128
  min_version: "0.1.69",
121
129
  reason: "codeloop_verify STILL hung on real Flutter projects even after the 0.1.68 dedup (recurrence #2, confirmed from a live 0.1.68 WedCheese run) — two gaps remained. (A) DUPLICATE TEST SUITE: when a project has NO integration_test/ directory, the integration runner fell back to `flutter test test/`, which re-ran the EXACT same suite the primary flutter_test runner had just executed. On a normal project that only wasted time; on a project whose test/ suite hangs it burned a SECOND full timeout back-to-back with the first. The integration runner now SKIPS entirely when there is no integration_test/ directory (golden/widget tests in test/ are already covered by the flutter_test runner and need no device); it only runs for a real integration_test/ dir on a booted device. (B) LEAKED-ISOLATE HANG: the dominant real cause on WedCheese was a non-terminating TEST — the test body passes (assertions print) but the Dart isolate never exits because a Timer/Timer.periodic, an unclosed StreamController, or a Firebase/Firestore listener was never disposed, so `flutter test` hangs until the wall-clock timeout SIGKILLs it (exit 124) — wasting the FULL 5-6 min EVERY run. 0.1.69 adds a completion-settle watchdog to runCommand: when the streamed output matches a strong end-of-work marker (`flutter test` prints 'All tests passed!' / 'Some tests failed.' exactly once at the end) but the process then fails to exit, CodeLoop force-closes the process tree ~30s after the marker and resolves exit_code 0 with the REAL parsed pass/fail plus a note naming the leaked-resource cause and pointing the developer to dispose it in tearDown() — instead of blocking the MCP response for the entire timeout. A natural exit within the grace window is unaffected, so well-behaved suites are unchanged. The Flutter timeout note is also rewritten to name the non-terminating-test root cause first. NOTE: CodeLoop cannot FIX the leaked resource in the user's test — it now recovers fast and tells the developer exactly what to fix.",
@@ -1 +1 @@
1
- {"version":3,"file":"critical_floors.js","sourceRoot":"","sources":["../../src/auth/critical_floors.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;GA2BG;AASH;;;;;;;;;;;;GAYG;AACH,MAAM,CAAC,MAAM,eAAe,GAAoB;IAC9C;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EACJ,ufAAuf;KAC1f;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EAAE,4hBAA4hB;KACriB;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EAAE,yvBAAyvB;KAClwB;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EACJ,kxBAAkxB;KACrxB;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EACJ,0/BAA0/B;KAC7/B;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EACJ,0iCAA0iC;KAC7iC;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EACJ,gqDAAgqD;KACnqD;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EACJ,uqDAAuqD;KAC1qD;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EACJ,w+EAAw+E;KAC3+E;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EACJ,88EAA88E;KACj9E;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EACJ,uiEAAuiE;KAC1iE;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EACJ,u/DAAu/D;KAC1/D;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EACJ,k3DAAk3D;KACr3D;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EACJ,oiDAAoiD;KACviD;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EACJ,g3CAAg3C;KACn3C;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EACJ,srFAAsrF;KACzrF;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EACJ,gwEAAgwE;KACnwE;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EACJ,84EAA84E;KACj5E;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EACJ,gwEAAgwE;KACnwE;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EACJ,09DAA09D;KAC79D;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EACJ,u4DAAu4D;KAC14D;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EACJ,u/FAAu/F;KAC1/F;CACF,CAAC"}
1
+ {"version":3,"file":"critical_floors.js","sourceRoot":"","sources":["../../src/auth/critical_floors.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;GA2BG;AASH;;;;;;;;;;;;GAYG;AACH,MAAM,CAAC,MAAM,eAAe,GAAoB;IAC9C;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EACJ,ufAAuf;KAC1f;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EAAE,4hBAA4hB;KACriB;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EAAE,yvBAAyvB;KAClwB;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EACJ,kxBAAkxB;KACrxB;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EACJ,0/BAA0/B;KAC7/B;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EACJ,0iCAA0iC;KAC7iC;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EACJ,gqDAAgqD;KACnqD;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EACJ,uqDAAuqD;KAC1qD;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EACJ,w+EAAw+E;KAC3+E;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EACJ,88EAA88E;KACj9E;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EACJ,uiEAAuiE;KAC1iE;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EACJ,u/DAAu/D;KAC1/D;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EACJ,k3DAAk3D;KACr3D;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EACJ,oiDAAoiD;KACviD;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EACJ,g3CAAg3C;KACn3C;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EACJ,srFAAsrF;KACzrF;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EACJ,gwEAAgwE;KACnwE;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EACJ,84EAA84E;KACj5E;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EACJ,gwEAAgwE;KACnwE;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EACJ,s/DAAs/D;KACz/D;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EACJ,g8FAAg8F;KACn8F;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EACJ,09DAA09D;KAC79D;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EACJ,u4DAAu4D;KAC14D;IACD;QACE,WAAW,EAAE,QAAQ;QACrB,MAAM,EACJ,u/FAAu/F;KAC1/F;CACF,CAAC"}
package/dist/index.js CHANGED
@@ -606,6 +606,7 @@ Returns: structured report with pass/fail counts, artifact paths, and next-step
606
606
  project_dir: z.string().optional().describe("Absolute path to the project root. Defaults to CODELOOP_PROJECT_DIR env var or auto-discovered project directory. MUST be an actual project folder — passing the user's home directory is rejected. If your IDE launches the MCP server from the wrong cwd (common on Windows where Cursor uses C:\\Users\\<name> as cwd), set CODELOOP_PROJECT_DIR or pass this param explicitly."),
607
607
  workspace_root: z.string().optional().describe("[Alias for project_dir] Same semantics; accepted because many agents reach for this conventional name. Pass either `project_dir` OR `workspace_root` — they're equivalent."),
608
608
  tasks_completed: z.array(z.string()).optional().describe("0.1.52 C5 — free-text titles of the tasks the agent claims to have completed in this code change. Cross-checked against the change manifest produced by C1: every claim should map to >= 1 manifest entry and every manifest entry should map to >= 1 claim. Mismatches surface as warnings in the verify response and feed the change_coverage_evidence gate (C3)."),
609
+ skip_tests: z.boolean().optional().describe("0.1.70 — Run every check EXCEPT the project's own test suite (flutter test / npm test / pytest / cargo test / playwright / maestro) and coverage. Use ONLY with the user's consent — when their suite is known-broken, hangs (a leaked Timer/StreamController/listener keeps the process alive after assertions pass), or is too slow to run every cycle. Analysis, build, backend, observability, screenshots, and design comparison still run. The required_tests_pass gate then BLOCKS ready_for_review until tests run (skip_tests:false) OR config.tests.waive_gate:true is set — so a skipped suite can never silently produce a 'Verified by CodeLoop' result. When a prior verify reports a hung/force-closed test runner ([CodeLoop ASK] note), ASK the user Y/N before setting this."),
609
610
  mode: z.string().optional().describe(AGENT_MODE_PARAM_DESC),
610
611
  }, async (params) => {
611
612
  const cwd = resolveCwd(params);
@@ -618,6 +619,7 @@ Returns: structured report with pass/fail counts, artifact paths, and next-step
618
619
  scope: params.scope,
619
620
  platform: params.platform,
620
621
  tasks_completed: params.tasks_completed,
622
+ skip_tests: params.skip_tests,
621
623
  };
622
624
  const output = await runVerify(input, cfg, cwd);
623
625
  await trackUsage(apiKey, "verification_run");