npm - codeloop-mcp-server - Versions diffs - 0.1.38 → 0.1.40 - Mend

codeloop-mcp-server 0.1.38 → 0.1.40

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

package/dist/auth/critical_floors.d.ts +45 -0
package/dist/auth/critical_floors.d.ts.map +1 -0
package/dist/auth/critical_floors.js +48 -0
package/dist/auth/critical_floors.js.map +1 -0
package/dist/auth/update_check.d.ts +65 -0
package/dist/auth/update_check.d.ts.map +1 -0
package/dist/auth/update_check.js +246 -0
package/dist/auth/update_check.js.map +1 -0
package/dist/index.js +125 -30
package/dist/index.js.map +1 -1
package/dist/tools/desktop_detection.d.ts +1 -1
package/dist/tools/desktop_detection.d.ts.map +1 -1
package/dist/tools/desktop_detection.js +18 -0
package/dist/tools/desktop_detection.js.map +1 -1
package/dist/tools/gate_check.d.ts.map +1 -1
package/dist/tools/gate_check.js +15 -3
package/dist/tools/gate_check.js.map +1 -1
package/dist/tools/node_ui_detection.d.ts +32 -0
package/dist/tools/node_ui_detection.d.ts.map +1 -0
package/dist/tools/node_ui_detection.js +91 -0
package/dist/tools/node_ui_detection.js.map +1 -0
package/dist/tools/verify.d.ts.map +1 -1
package/dist/tools/verify.js +46 -8
package/dist/tools/verify.js.map +1 -1
package/package.json +1 -1

package/dist/index.js CHANGED Viewed

@@ -20,6 +20,7 @@ import { loadConfig } from "./config.js";
 import { validateApiKey, isActivationRequired } from "./auth/api_key.js";
 import { identifyKeySource, buildRevokedKeyDiagnostic } from "./auth/key_source.js";
 import { warmCliCache } from "./auth/cli_cache_warmer.js";
+import { startUpdateCheck, getUpdateInfo, formatUpdateNotice, } from "./auth/update_check.js";
 import { trackUsage } from "./auth/usage_tracker.js";
 import { isLocalMode } from "./auth/local_mode.js";
 import { discoverProjectDir } from "./project-discovery.js";
@@ -93,6 +94,15 @@ const apiKey = process.env.CODELOOP_API_KEY || config.api_key;
 // detached, throttled to once / 12h, and a no-op in CI. See
 // `auth/cli_cache_warmer.ts` for the full rationale.
 warmCliCache();
+// Background update checker. Polls npm for codeloop-mcp-server@latest
+// once per hour and stores the result in module state. Tool responses
+// prepend a notice (via withInitHint → withUpdateNotice) when the
+// running version is behind. Solves the "user keeps Cursor/Claude
+// open for days while a critical fix ships" failure mode — without
+// this, the only way to pick up a new version is the user happening
+// to restart their IDE. See auth/update_check.ts for the full
+// rationale and opt-out env vars.
+startUpdateCheck();
 const server = new McpServer({
     name: "codeloop",
     version: "0.1.14",
@@ -224,12 +234,46 @@ function stubResponse(toolName) {
     };
 }
 const INIT_HINT = "[CodeLoop] This project has not been initialized. Call codeloop_init_project FIRST (before any other CodeLoop tool) to create .codeloop/config.json, agent rules, and the artifacts/ directory. This is required, not optional — proceed with the bootstrap immediately.";
+/**
+ * Prepend the cached update notice (if any) to a content array. Always
+ * emits the critical block on every tool response; emits the
+ * non-critical block on every response too, but throttled to at most
+ * once per hour per server process so it doesn't spam an agent that's
+ * already aware. The throttle is in-memory so a fresh server boot
+ * starts the clock over — that matches the user's actual experience
+ * ("I just opened my IDE, surface the notice once").
+ */
+const NON_CRITICAL_NOTICE_INTERVAL_MS = 60 * 60 * 1_000;
+let lastNonCriticalNoticeAt = 0;
+function withUpdateNotice(content) {
+    const info = getUpdateInfo();
+    if (!info || !info.is_outdated)
+        return content;
+    if (!info.is_critical) {
+        const now = Date.now();
+        if (now - lastNonCriticalNoticeAt < NON_CRITICAL_NOTICE_INTERVAL_MS)
+            return content;
+        lastNonCriticalNoticeAt = now;
+    }
+    const text = formatUpdateNotice(info);
+    if (!text)
+        return content;
+    return [
+        { type: "text", text },
+        ...content,
+    ];
+}
 function withInitHint(content, dir) {
+    // Order matters: update notice goes FIRST (most actionable signal),
+    // then init hint (init-then-restart is a weird state but harmless),
+    // then the original content. Critical update notices override
+    // anything else the agent might do this turn.
+    const withUpdate = withUpdateNotice(content);
     if (isProjectInitialized(dir || projectDir))
-        return content;
+        return withUpdate;
     return [
         { type: "text", text: INIT_HINT },
-        ...content,
+        ...withUpdate,
     ];
 }
 const TOOL_BOOTSTRAP = "[CodeLoop] You MUST call codeloop_verify after every code change. If .codeloop/config.json is missing, call codeloop_init_project FIRST.\n\n";
@@ -280,11 +324,23 @@ Returns: structured report with pass/fail counts, artifact paths, and next-step
         if (verifyResult.run_id &&
             (verifyResult.fail_count ?? 0) === 0 &&
             isUIProject(cwd)) {
-            const { getArtifactsBaseDir, getRunDir } = await import("./evidence/artifacts.js");
+            const { getArtifactsBaseDir, getRunDir, listRuns } = await import("./evidence/artifacts.js");
             const baseDir = getArtifactsBaseDir(cwd);
-            const runDir = getRunDir(verifyResult.run_id, baseDir);
-            const hasUiEvidence = dirHasFile(join(runDir, "videos"), (f) => /\.(mp4|webm|mov)$/i.test(f)) ||
-                dirHasFile(join(runDir, "logs"), (f) => f === "interaction_log.jsonl");
+            // Look at the verify run AND recent sibling runs. Gate_check
+            // already accepts videos / interaction logs from any sibling
+            // run within ~1 hour (see findVideoFiles in gate_check.ts); if
+            // the postscript only inspected this run's dir it would
+            // false-alarm right after a perfectly valid separate recording
+            // session — and push the agent into a second recording it
+            // doesn't need. We mirror the gate's scope by scanning the
+            // last 10 sibling runs in addition to this one.
+            const siblingRunIds = listRuns(baseDir).slice(0, 10);
+            const candidateRuns = Array.from(new Set([verifyResult.run_id, ...siblingRunIds]));
+            const hasUiEvidence = candidateRuns.some((rid) => {
+                const rd = getRunDir(rid, baseDir);
+                return (dirHasFile(join(rd, "videos"), (f) => /\.(mp4|webm|mov)$/i.test(f)) ||
+                    dirHasFile(join(rd, "logs"), (f) => f === "interaction_log.jsonl"));
+            });
             if (!hasUiEvidence) {
                 postscript = [
                     "",
@@ -332,8 +388,25 @@ Returns: categorized issues with severity, evidence, root cause, and actionable
         await trackUsage(apiKey, "verification_run");
         return output;
     }, { tool: "codeloop_diagnose", cwd: (params.project_dir || params.workspace_root || projectDir), input: params });
+    // Auto-fix-loop directive. Diagnose is only useful when it leads
+    // to a fix + re-verify, not when it leads to a long deliberation
+    // over which repair to do first. The repair_tasks array in the
+    // result already enumerates the work; the agent must pick them up
+    // and execute, not stop to ask "which would you like me to start
+    // with?".
+    const fixDirective = [
+        "",
+        "",
+        "⚠️ EXECUTE REPAIR TASKS NOW — do not ask the user which one to start with ⚠️",
+        "The repair_tasks above are the agent's TODO list, in order. For EACH task:",
+        "  1. Modify ONLY the files listed in allowed_file_scope (do not refactor adjacent code).",
+        "  2. After all tasks for a single issue are applied, call codeloop_verify again.",
+        "  3. If new failures surface, call codeloop_diagnose again (max 8 attempts before escalating).",
+        "  4. When verify passes, continue the loop to codeloop_gate_check.",
+        "Do NOT report partial progress to the user between iterations. Do NOT pause to confirm the plan — the diagnose output IS the plan.",
+    ].join("\n");
     return {
-        content: [{ type: "text", text: JSON.stringify(result, null, 2) }],
+        content: withInitHint([{ type: "text", text: JSON.stringify(result, null, 2) + fixDirective }]),
     };
 });
 server.tool("codeloop_gate_check", TOOL_BOOTSTRAP + `Evaluate whether a section or feature meets all quality gates for completion. Use this tool when:
@@ -502,6 +575,14 @@ Returns: deterministic diff results + screenshot images for visual analysis.`, {
             prompt += `\n\nApply this UX checklist:\n${result.uxChecklist}`;
         }
         prompt += `\n\nReport issues as JSON array: [{ "screen": string, "issue": string, "severity": "critical"|"high"|"medium"|"low", "confidence": number, "evidence": string, "fix_hint": string }]`;
+        // Auto-fix-loop directive for the visual review. Without this,
+        // agents historically reported "I see X, Y, Z visual issues — do
+        // you want me to fix them?" instead of just fixing and
+        // re-capturing. The directive collapses that branch into a single
+        // path: fix the critical/high ones immediately, re-run verify and
+        // capture, then loop until the visual gate is clean.
+        prompt +=
+            "\n\nAFTER your visual analysis, if any 'critical' or 'high' severity issues are reported, FIX them in the relevant component/view code immediately — do NOT ask the user 'would you like me to fix these?'. After applying fixes, call codeloop_verify and codeloop_capture_screenshot to re-capture the affected screens, then call codeloop_visual_review again. Continue the loop until no 'critical' or 'high' issues remain. Medium/low issues can be batched into a follow-up commit only if the gate (visual_regression_threshold + screenshot_evidence) is otherwise clean.";
         content.push({ type: "text", text: prompt });
         content.push(...imageBlocks);
     }
@@ -628,7 +709,7 @@ Returns: section states, dependencies, confidence scores, and next action instru
         return getSectionStatus({ master_spec_path: params.master_spec_path }, config);
     });
     return {
-        content: [{ type: "text", text: JSON.stringify(result, null, 2) }],
+        content: withInitHint([{ type: "text", text: JSON.stringify(result, null, 2) }]),
     };
 });
 server.tool("codeloop_release_readiness", TOOL_BOOTSTRAP + `Generate a comprehensive release readiness report. Use this tool when:
@@ -646,7 +727,7 @@ Returns: overall readiness score, blockers, warnings, and full evidence summary.
         return output;
     });
     return {
-        content: [{ type: "text", text: JSON.stringify(result, null, 2) }],
+        content: withInitHint([{ type: "text", text: JSON.stringify(result, null, 2) }]),
     };
 });
 server.tool("codeloop_recommend_tool", TOOL_BOOTSTRAP + `Recommend third-party tools and services based on the project stack and constraints. Use this tool when:
@@ -670,7 +751,7 @@ Returns: ranked recommendations with reasoning, integration complexity, and star
         return output;
     });
     return {
-        content: [{ type: "text", text: JSON.stringify(result, null, 2) }],
+        content: withInitHint([{ type: "text", text: JSON.stringify(result, null, 2) }]),
     };
 });
 server.tool("codeloop_integration_check", TOOL_BOOTSTRAP + `Run cross-section integration verification on a multi-section project. Use this tool when:
@@ -689,7 +770,7 @@ Returns: integration test results, regression list, and section-level confidence
         }, config);
     });
     return {
-        content: [{ type: "text", text: JSON.stringify(result, null, 2) }],
+        content: withInitHint([{ type: "text", text: JSON.stringify(result, null, 2) }]),
     };
 });
 server.tool("codeloop_update_baseline", TOOL_BOOTSTRAP + `Accept current screenshots as the new visual baseline for regression testing. Use this tool when:
@@ -705,7 +786,7 @@ Returns: list of updated baseline files with before/after paths.`, {
         return runUpdateBaseline({ run_id: params.run_id, screens: params.screens }, config);
     });
     return {
-        content: [{ type: "text", text: JSON.stringify(result, null, 2) }],
+        content: withInitHint([{ type: "text", text: JSON.stringify(result, null, 2) }]),
     };
 });
 server.tool("codeloop_replan", TOOL_BOOTSTRAP + `Detect scope changes in the project spec and update section states accordingly. Use this tool when:
@@ -724,7 +805,7 @@ Returns: list of affected sections, new states, and recommended next actions.`,
         }, config);
     });
     return {
-        content: [{ type: "text", text: JSON.stringify(result, null, 2) }],
+        content: withInitHint([{ type: "text", text: JSON.stringify(result, null, 2) }]),
     };
 });
 server.tool("codeloop_visual_attribution", TOOL_BOOTSTRAP + `Identify which commit, branch, and section introduced each visual diff. Use this tool when:
@@ -744,7 +825,7 @@ Returns: list of visual changes attributed to specific commits and sections.`, {
         });
     });
     return {
-        content: [{ type: "text", text: JSON.stringify(result, null, 2) }],
+        content: withInitHint([{ type: "text", text: JSON.stringify(result, null, 2) }]),
     };
 });
 server.tool("codeloop_generate_spec", TOOL_BOOTSTRAP + `Generate a design specification from Figma design tokens. Use this tool when:
@@ -757,7 +838,7 @@ Returns: extracted tokens, generated file paths, and any errors from the Figma A
         return generateSpec(projectDir);
     });
     return {
-        content: [{ type: "text", text: JSON.stringify(result, null, 2) }],
+        content: withInitHint([{ type: "text", text: JSON.stringify(result, null, 2) }]),
     };
 });
 server.tool("codeloop_list_env_presets", TOOL_BOOTSTRAP + `List available environment normalization presets. Use this tool when:
@@ -771,7 +852,7 @@ Returns: lists of named presets for viewports, networks, locales, simulators, se
         return listPresets();
     });
     return {
-        content: [{ type: "text", text: JSON.stringify(result, null, 2) }],
+        content: withInitHint([{ type: "text", text: JSON.stringify(result, null, 2) }]),
     };
 });
 server.tool("codeloop_run_history", TOOL_BOOTSTRAP + `Query the run history for this project. Use this tool when:
@@ -800,7 +881,7 @@ Returns: list of runs with lineage fields (commit, branch, section, parent run),
         });
     });
     return {
-        content: [{ type: "text", text: JSON.stringify(result, null, 2) }],
+        content: withInitHint([{ type: "text", text: JSON.stringify(result, null, 2) }]),
     };
 });
 server.tool("codeloop_get_prompt", TOOL_BOOTSTRAP + `Retrieve a context-aware prompt template for the current stage of multi-section app development. Use this tool when:
@@ -827,7 +908,7 @@ Returns: rendered prompt text with metadata about any missing required variables
         });
     });
     return {
-        content: [{ type: "text", text: JSON.stringify(result, null, 2) }],
+        content: withInitHint([{ type: "text", text: JSON.stringify(result, null, 2) }]),
     };
 });
 server.tool("codeloop_list_prompts", TOOL_BOOTSTRAP + `List all available prompt template layers and their metadata. Use this tool when:
@@ -839,7 +920,7 @@ Returns: array of prompt layers with IDs, descriptions, and required variables.`
         return describeAllPrompts();
     });
     return {
-        content: [{ type: "text", text: JSON.stringify(result, null, 2) }],
+        content: withInitHint([{ type: "text", text: JSON.stringify(result, null, 2) }]),
     };
 });
 server.tool("codeloop_interaction_replay", TOOL_BOOTSTRAP + `Analyze a recorded video of a user interaction flow to verify it completes as expected. Use this tool when:
@@ -1035,7 +1116,7 @@ Returns: list of discovered screens with routes, navigation triggers, confidence
         return discoverScreens((params.project_dir || params.workspace_root || projectDir), params.platform);
     }, { tool: "codeloop_discover_screens", cwd: (params.project_dir || params.workspace_root || projectDir), input: params });
     return {
-        content: [{ type: "text", text: JSON.stringify(result, null, 2) }],
+        content: withInitHint([{ type: "text", text: JSON.stringify(result, null, 2) }]),
     };
 });
 server.tool("codeloop_discover_interactions", TOOL_BOOTSTRAP + `Scan the project source code to discover all INTERACTIVE ELEMENTS: input fields,
@@ -1070,7 +1151,7 @@ selects, datagrids, upload_areas, ai_features, forms }, ai_features_detected, sc
         return discoverInteractions((params.project_dir || params.workspace_root || projectDir), params.platform);
     }, { tool: "codeloop_discover_interactions", cwd: (params.project_dir || params.workspace_root || projectDir), input: params });
     return {
-        content: [{ type: "text", text: JSON.stringify(result, null, 2) }],
+        content: withInitHint([{ type: "text", text: JSON.stringify(result, null, 2) }]),
     };
 });
 server.tool("codeloop_plan_user_journey", TOOL_BOOTSTRAP + `Build a per-entity USER-JOURNEY PLAN the agent must follow during recording so the
@@ -1134,7 +1215,7 @@ ai_substantive_prompts, upload_actions, datagrid_edits }, advice, discovered_int
         "Do NOT present 'Option A vs Option B' to the user. The plan IS option B and option B is the only option.",
     ].join("\n");
     return {
-        content: [{ type: "text", text: JSON.stringify(result, null, 2) + driveDirective }],
+        content: withInitHint([{ type: "text", text: JSON.stringify(result, null, 2) + driveDirective }]),
     };
 });
 server.tool("codeloop_record_interaction", TOOL_BOOTSTRAP + `Record a fixed-duration video of the app window (blocking). Use for simple captures where no
@@ -1172,7 +1253,7 @@ After recording, call codeloop_interaction_replay to extract frames and analyze
     }
     const result = authResult;
     return {
-        content: [{ type: "text", text: JSON.stringify(result, null, 2) }],
+        content: withInitHint([{ type: "text", text: JSON.stringify(result, null, 2) }]),
     };
 });
 server.tool("codeloop_start_recording", TOOL_BOOTSTRAP + `Start recording the app window in the background. The app is brought to the front automatically
@@ -1242,7 +1323,7 @@ App logs (stdout, logcat, simctl log) are automatically captured alongside the v
     }
     const result = authResult;
     return {
-        content: [{ type: "text", text: JSON.stringify(result, null, 2) }],
+        content: withInitHint([{ type: "text", text: JSON.stringify(result, null, 2) }]),
     };
 });
 server.tool("codeloop_stop_recording", TOOL_BOOTSTRAP + `Stop a background recording that was started with codeloop_start_recording.
@@ -1259,8 +1340,22 @@ The response includes log_path if app logs were captured during the recording se
         return { content: [{ type: "text", text: JSON.stringify(authResult, null, 2) }] };
     }
     const result = authResult;
+    // Auto-fix-loop directive. Stopping the recording is the
+    // half-way point of the visual-evidence chain, not the end.
+    // Without a directive, agents sometimes capture a video, stop,
+    // and then announce "recording saved" to the user — leaving the
+    // interaction_replay + gate_check half of the workflow undone.
+    const nextStepDirective = [
+        "",
+        "",
+        "⚠️ RECORDING SAVED — finish the evidence chain now, do not report to the user yet ⚠️",
+        "Two more tool calls are required before this UI evidence cycle is complete:",
+        "  1. codeloop_interaction_replay — extract frames + app logs from the just-saved video. This populates the data the replay/journey gates score against.",
+        "  2. codeloop_gate_check — confirm confidence ≥ 94%. If continue_fixing, fix the failing gate's next_step and re-record / re-capture.",
+        "Do NOT skip step 1 — without replay frames the interaction_replay_evidence gate fails even when the video exists. Do NOT pause to ask the user 'should I run replay now?' — yes, always.",
+    ].join("\n");
     return {
-        content: [{ type: "text", text: JSON.stringify(result, null, 2) }],
+        content: withInitHint([{ type: "text", text: JSON.stringify(result, null, 2) + nextStepDirective }]),
     };
 });
 server.tool("codeloop_recommend_action", TOOL_BOOTSTRAP + `Context-aware recommendation router. Use this tool when:
@@ -1278,7 +1373,7 @@ Returns: inferred category and budget, ranked recommendations, and routing expla
         return output;
     });
     return {
-        content: [{ type: "text", text: JSON.stringify(result, null, 2) }],
+        content: withInitHint([{ type: "text", text: JSON.stringify(result, null, 2) }]),
     };
 });
 server.tool("codeloop_generate_dev_report", TOOL_BOOTSTRAP + `MANDATORY: Generate a comprehensive development report after the development loop completes.
@@ -2502,7 +2597,7 @@ Wait 1-2 seconds between interactions so video frames capture state changes.`, {
         return { success, action, detail };
     }, { tool: "codeloop_interact", cwd: (params.project_dir || params.workspace_root || projectDir), input: params });
     return {
-        content: [{ type: "text", text: JSON.stringify(result, null, 2) }],
+        content: withInitHint([{ type: "text", text: JSON.stringify(result, null, 2) }]),
     };
 });
 // ── codeloop_init_project ────────────────────────────────────────
@@ -2535,7 +2630,7 @@ project. After it completes, proceed directly with \`codeloop_verify\`.`, {
         return output;
     })();
     return {
-        content: [{ type: "text", text: JSON.stringify(result, null, 2) }],
+        content: withInitHint([{ type: "text", text: JSON.stringify(result, null, 2) }]),
     };
 });
 server.tool("codeloop_flush_usage", TOOL_BOOTSTRAP + `Drain the persisted offline usage queue and POST events to the CodeLoop backend.
@@ -2552,7 +2647,7 @@ Returns: counts for attempted / succeeded / requeued events and the queue locati
     const { flushPersistedUsage } = await import("./auth/usage_tracker.js");
     const result = await flushPersistedUsage(cwd);
     return {
-        content: [
+        content: withInitHint([
             {
                 type: "text",
                 text: JSON.stringify({
@@ -2561,7 +2656,7 @@ Returns: counts for attempted / succeeded / requeued events and the queue locati
                     project_dir: cwd,
                 }, null, 2),
             },
-        ],
+        ]),
     };
 });
 if (isLocalMode()) {