codeloop-mcp-server 0.1.38 → 0.1.40

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -20,6 +20,7 @@ import { loadConfig } from "./config.js";
20
20
  import { validateApiKey, isActivationRequired } from "./auth/api_key.js";
21
21
  import { identifyKeySource, buildRevokedKeyDiagnostic } from "./auth/key_source.js";
22
22
  import { warmCliCache } from "./auth/cli_cache_warmer.js";
23
+ import { startUpdateCheck, getUpdateInfo, formatUpdateNotice, } from "./auth/update_check.js";
23
24
  import { trackUsage } from "./auth/usage_tracker.js";
24
25
  import { isLocalMode } from "./auth/local_mode.js";
25
26
  import { discoverProjectDir } from "./project-discovery.js";
@@ -93,6 +94,15 @@ const apiKey = process.env.CODELOOP_API_KEY || config.api_key;
93
94
  // detached, throttled to once / 12h, and a no-op in CI. See
94
95
  // `auth/cli_cache_warmer.ts` for the full rationale.
95
96
  warmCliCache();
97
+ // Background update checker. Polls npm for codeloop-mcp-server@latest
98
+ // once per hour and stores the result in module state. Tool responses
99
+ // prepend a notice (via withInitHint → withUpdateNotice) when the
100
+ // running version is behind. Solves the "user keeps Cursor/Claude
101
+ // open for days while a critical fix ships" failure mode — without
102
+ // this, the only way to pick up a new version is the user happening
103
+ // to restart their IDE. See auth/update_check.ts for the full
104
+ // rationale and opt-out env vars.
105
+ startUpdateCheck();
96
106
  const server = new McpServer({
97
107
  name: "codeloop",
98
108
  version: "0.1.14",
@@ -224,12 +234,46 @@ function stubResponse(toolName) {
224
234
  };
225
235
  }
226
236
  const INIT_HINT = "[CodeLoop] This project has not been initialized. Call codeloop_init_project FIRST (before any other CodeLoop tool) to create .codeloop/config.json, agent rules, and the artifacts/ directory. This is required, not optional — proceed with the bootstrap immediately.";
237
+ /**
238
+ * Prepend the cached update notice (if any) to a content array. Always
239
+ * emits the critical block on every tool response; emits the
240
+ * non-critical block on every response too, but throttled to at most
241
+ * once per hour per server process so it doesn't spam an agent that's
242
+ * already aware. The throttle is in-memory so a fresh server boot
243
+ * starts the clock over — that matches the user's actual experience
244
+ * ("I just opened my IDE, surface the notice once").
245
+ */
246
+ const NON_CRITICAL_NOTICE_INTERVAL_MS = 60 * 60 * 1_000;
247
+ let lastNonCriticalNoticeAt = 0;
248
+ function withUpdateNotice(content) {
249
+ const info = getUpdateInfo();
250
+ if (!info || !info.is_outdated)
251
+ return content;
252
+ if (!info.is_critical) {
253
+ const now = Date.now();
254
+ if (now - lastNonCriticalNoticeAt < NON_CRITICAL_NOTICE_INTERVAL_MS)
255
+ return content;
256
+ lastNonCriticalNoticeAt = now;
257
+ }
258
+ const text = formatUpdateNotice(info);
259
+ if (!text)
260
+ return content;
261
+ return [
262
+ { type: "text", text },
263
+ ...content,
264
+ ];
265
+ }
227
266
  function withInitHint(content, dir) {
267
+ // Order matters: update notice goes FIRST (most actionable signal),
268
+ // then init hint (init-then-restart is a weird state but harmless),
269
+ // then the original content. Critical update notices override
270
+ // anything else the agent might do this turn.
271
+ const withUpdate = withUpdateNotice(content);
228
272
  if (isProjectInitialized(dir || projectDir))
229
- return content;
273
+ return withUpdate;
230
274
  return [
231
275
  { type: "text", text: INIT_HINT },
232
- ...content,
276
+ ...withUpdate,
233
277
  ];
234
278
  }
235
279
  const TOOL_BOOTSTRAP = "[CodeLoop] You MUST call codeloop_verify after every code change. If .codeloop/config.json is missing, call codeloop_init_project FIRST.\n\n";
@@ -280,11 +324,23 @@ Returns: structured report with pass/fail counts, artifact paths, and next-step
280
324
  if (verifyResult.run_id &&
281
325
  (verifyResult.fail_count ?? 0) === 0 &&
282
326
  isUIProject(cwd)) {
283
- const { getArtifactsBaseDir, getRunDir } = await import("./evidence/artifacts.js");
327
+ const { getArtifactsBaseDir, getRunDir, listRuns } = await import("./evidence/artifacts.js");
284
328
  const baseDir = getArtifactsBaseDir(cwd);
285
- const runDir = getRunDir(verifyResult.run_id, baseDir);
286
- const hasUiEvidence = dirHasFile(join(runDir, "videos"), (f) => /\.(mp4|webm|mov)$/i.test(f)) ||
287
- dirHasFile(join(runDir, "logs"), (f) => f === "interaction_log.jsonl");
329
+ // Look at the verify run AND recent sibling runs. Gate_check
330
+ // already accepts videos / interaction logs from any sibling
331
+ // run within ~1 hour (see findVideoFiles in gate_check.ts); if
332
+ // the postscript only inspected this run's dir it would
333
+ // false-alarm right after a perfectly valid separate recording
334
+ // session — and push the agent into a second recording it
335
+ // doesn't need. We mirror the gate's scope by scanning the
336
+ // last 10 sibling runs in addition to this one.
337
+ const siblingRunIds = listRuns(baseDir).slice(0, 10);
338
+ const candidateRuns = Array.from(new Set([verifyResult.run_id, ...siblingRunIds]));
339
+ const hasUiEvidence = candidateRuns.some((rid) => {
340
+ const rd = getRunDir(rid, baseDir);
341
+ return (dirHasFile(join(rd, "videos"), (f) => /\.(mp4|webm|mov)$/i.test(f)) ||
342
+ dirHasFile(join(rd, "logs"), (f) => f === "interaction_log.jsonl"));
343
+ });
288
344
  if (!hasUiEvidence) {
289
345
  postscript = [
290
346
  "",
@@ -332,8 +388,25 @@ Returns: categorized issues with severity, evidence, root cause, and actionable
332
388
  await trackUsage(apiKey, "verification_run");
333
389
  return output;
334
390
  }, { tool: "codeloop_diagnose", cwd: (params.project_dir || params.workspace_root || projectDir), input: params });
391
+ // Auto-fix-loop directive. Diagnose is only useful when it leads
392
+ // to a fix + re-verify, not when it leads to a long deliberation
393
+ // over which repair to do first. The repair_tasks array in the
394
+ // result already enumerates the work; the agent must pick them up
395
+ // and execute, not stop to ask "which would you like me to start
396
+ // with?".
397
+ const fixDirective = [
398
+ "",
399
+ "",
400
+ "⚠️ EXECUTE REPAIR TASKS NOW — do not ask the user which one to start with ⚠️",
401
+ "The repair_tasks above are the agent's TODO list, in order. For EACH task:",
402
+ " 1. Modify ONLY the files listed in allowed_file_scope (do not refactor adjacent code).",
403
+ " 2. After all tasks for a single issue are applied, call codeloop_verify again.",
404
+ " 3. If new failures surface, call codeloop_diagnose again (max 8 attempts before escalating).",
405
+ " 4. When verify passes, continue the loop to codeloop_gate_check.",
406
+ "Do NOT report partial progress to the user between iterations. Do NOT pause to confirm the plan — the diagnose output IS the plan.",
407
+ ].join("\n");
335
408
  return {
336
- content: [{ type: "text", text: JSON.stringify(result, null, 2) }],
409
+ content: withInitHint([{ type: "text", text: JSON.stringify(result, null, 2) + fixDirective }]),
337
410
  };
338
411
  });
339
412
  server.tool("codeloop_gate_check", TOOL_BOOTSTRAP + `Evaluate whether a section or feature meets all quality gates for completion. Use this tool when:
@@ -502,6 +575,14 @@ Returns: deterministic diff results + screenshot images for visual analysis.`, {
502
575
  prompt += `\n\nApply this UX checklist:\n${result.uxChecklist}`;
503
576
  }
504
577
  prompt += `\n\nReport issues as JSON array: [{ "screen": string, "issue": string, "severity": "critical"|"high"|"medium"|"low", "confidence": number, "evidence": string, "fix_hint": string }]`;
578
+ // Auto-fix-loop directive for the visual review. Without this,
579
+ // agents historically reported "I see X, Y, Z visual issues — do
580
+ // you want me to fix them?" instead of just fixing and
581
+ // re-capturing. The directive collapses that branch into a single
582
+ // path: fix the critical/high ones immediately, re-run verify and
583
+ // capture, then loop until the visual gate is clean.
584
+ prompt +=
585
+ "\n\nAFTER your visual analysis, if any 'critical' or 'high' severity issues are reported, FIX them in the relevant component/view code immediately — do NOT ask the user 'would you like me to fix these?'. After applying fixes, call codeloop_verify and codeloop_capture_screenshot to re-capture the affected screens, then call codeloop_visual_review again. Continue the loop until no 'critical' or 'high' issues remain. Medium/low issues can be batched into a follow-up commit only if the gate (visual_regression_threshold + screenshot_evidence) is otherwise clean.";
505
586
  content.push({ type: "text", text: prompt });
506
587
  content.push(...imageBlocks);
507
588
  }
@@ -628,7 +709,7 @@ Returns: section states, dependencies, confidence scores, and next action instru
628
709
  return getSectionStatus({ master_spec_path: params.master_spec_path }, config);
629
710
  });
630
711
  return {
631
- content: [{ type: "text", text: JSON.stringify(result, null, 2) }],
712
+ content: withInitHint([{ type: "text", text: JSON.stringify(result, null, 2) }]),
632
713
  };
633
714
  });
634
715
  server.tool("codeloop_release_readiness", TOOL_BOOTSTRAP + `Generate a comprehensive release readiness report. Use this tool when:
@@ -646,7 +727,7 @@ Returns: overall readiness score, blockers, warnings, and full evidence summary.
646
727
  return output;
647
728
  });
648
729
  return {
649
- content: [{ type: "text", text: JSON.stringify(result, null, 2) }],
730
+ content: withInitHint([{ type: "text", text: JSON.stringify(result, null, 2) }]),
650
731
  };
651
732
  });
652
733
  server.tool("codeloop_recommend_tool", TOOL_BOOTSTRAP + `Recommend third-party tools and services based on the project stack and constraints. Use this tool when:
@@ -670,7 +751,7 @@ Returns: ranked recommendations with reasoning, integration complexity, and star
670
751
  return output;
671
752
  });
672
753
  return {
673
- content: [{ type: "text", text: JSON.stringify(result, null, 2) }],
754
+ content: withInitHint([{ type: "text", text: JSON.stringify(result, null, 2) }]),
674
755
  };
675
756
  });
676
757
  server.tool("codeloop_integration_check", TOOL_BOOTSTRAP + `Run cross-section integration verification on a multi-section project. Use this tool when:
@@ -689,7 +770,7 @@ Returns: integration test results, regression list, and section-level confidence
689
770
  }, config);
690
771
  });
691
772
  return {
692
- content: [{ type: "text", text: JSON.stringify(result, null, 2) }],
773
+ content: withInitHint([{ type: "text", text: JSON.stringify(result, null, 2) }]),
693
774
  };
694
775
  });
695
776
  server.tool("codeloop_update_baseline", TOOL_BOOTSTRAP + `Accept current screenshots as the new visual baseline for regression testing. Use this tool when:
@@ -705,7 +786,7 @@ Returns: list of updated baseline files with before/after paths.`, {
705
786
  return runUpdateBaseline({ run_id: params.run_id, screens: params.screens }, config);
706
787
  });
707
788
  return {
708
- content: [{ type: "text", text: JSON.stringify(result, null, 2) }],
789
+ content: withInitHint([{ type: "text", text: JSON.stringify(result, null, 2) }]),
709
790
  };
710
791
  });
711
792
  server.tool("codeloop_replan", TOOL_BOOTSTRAP + `Detect scope changes in the project spec and update section states accordingly. Use this tool when:
@@ -724,7 +805,7 @@ Returns: list of affected sections, new states, and recommended next actions.`,
724
805
  }, config);
725
806
  });
726
807
  return {
727
- content: [{ type: "text", text: JSON.stringify(result, null, 2) }],
808
+ content: withInitHint([{ type: "text", text: JSON.stringify(result, null, 2) }]),
728
809
  };
729
810
  });
730
811
  server.tool("codeloop_visual_attribution", TOOL_BOOTSTRAP + `Identify which commit, branch, and section introduced each visual diff. Use this tool when:
@@ -744,7 +825,7 @@ Returns: list of visual changes attributed to specific commits and sections.`, {
744
825
  });
745
826
  });
746
827
  return {
747
- content: [{ type: "text", text: JSON.stringify(result, null, 2) }],
828
+ content: withInitHint([{ type: "text", text: JSON.stringify(result, null, 2) }]),
748
829
  };
749
830
  });
750
831
  server.tool("codeloop_generate_spec", TOOL_BOOTSTRAP + `Generate a design specification from Figma design tokens. Use this tool when:
@@ -757,7 +838,7 @@ Returns: extracted tokens, generated file paths, and any errors from the Figma A
757
838
  return generateSpec(projectDir);
758
839
  });
759
840
  return {
760
- content: [{ type: "text", text: JSON.stringify(result, null, 2) }],
841
+ content: withInitHint([{ type: "text", text: JSON.stringify(result, null, 2) }]),
761
842
  };
762
843
  });
763
844
  server.tool("codeloop_list_env_presets", TOOL_BOOTSTRAP + `List available environment normalization presets. Use this tool when:
@@ -771,7 +852,7 @@ Returns: lists of named presets for viewports, networks, locales, simulators, se
771
852
  return listPresets();
772
853
  });
773
854
  return {
774
- content: [{ type: "text", text: JSON.stringify(result, null, 2) }],
855
+ content: withInitHint([{ type: "text", text: JSON.stringify(result, null, 2) }]),
775
856
  };
776
857
  });
777
858
  server.tool("codeloop_run_history", TOOL_BOOTSTRAP + `Query the run history for this project. Use this tool when:
@@ -800,7 +881,7 @@ Returns: list of runs with lineage fields (commit, branch, section, parent run),
800
881
  });
801
882
  });
802
883
  return {
803
- content: [{ type: "text", text: JSON.stringify(result, null, 2) }],
884
+ content: withInitHint([{ type: "text", text: JSON.stringify(result, null, 2) }]),
804
885
  };
805
886
  });
806
887
  server.tool("codeloop_get_prompt", TOOL_BOOTSTRAP + `Retrieve a context-aware prompt template for the current stage of multi-section app development. Use this tool when:
@@ -827,7 +908,7 @@ Returns: rendered prompt text with metadata about any missing required variables
827
908
  });
828
909
  });
829
910
  return {
830
- content: [{ type: "text", text: JSON.stringify(result, null, 2) }],
911
+ content: withInitHint([{ type: "text", text: JSON.stringify(result, null, 2) }]),
831
912
  };
832
913
  });
833
914
  server.tool("codeloop_list_prompts", TOOL_BOOTSTRAP + `List all available prompt template layers and their metadata. Use this tool when:
@@ -839,7 +920,7 @@ Returns: array of prompt layers with IDs, descriptions, and required variables.`
839
920
  return describeAllPrompts();
840
921
  });
841
922
  return {
842
- content: [{ type: "text", text: JSON.stringify(result, null, 2) }],
923
+ content: withInitHint([{ type: "text", text: JSON.stringify(result, null, 2) }]),
843
924
  };
844
925
  });
845
926
  server.tool("codeloop_interaction_replay", TOOL_BOOTSTRAP + `Analyze a recorded video of a user interaction flow to verify it completes as expected. Use this tool when:
@@ -1035,7 +1116,7 @@ Returns: list of discovered screens with routes, navigation triggers, confidence
1035
1116
  return discoverScreens((params.project_dir || params.workspace_root || projectDir), params.platform);
1036
1117
  }, { tool: "codeloop_discover_screens", cwd: (params.project_dir || params.workspace_root || projectDir), input: params });
1037
1118
  return {
1038
- content: [{ type: "text", text: JSON.stringify(result, null, 2) }],
1119
+ content: withInitHint([{ type: "text", text: JSON.stringify(result, null, 2) }]),
1039
1120
  };
1040
1121
  });
1041
1122
  server.tool("codeloop_discover_interactions", TOOL_BOOTSTRAP + `Scan the project source code to discover all INTERACTIVE ELEMENTS: input fields,
@@ -1070,7 +1151,7 @@ selects, datagrids, upload_areas, ai_features, forms }, ai_features_detected, sc
1070
1151
  return discoverInteractions((params.project_dir || params.workspace_root || projectDir), params.platform);
1071
1152
  }, { tool: "codeloop_discover_interactions", cwd: (params.project_dir || params.workspace_root || projectDir), input: params });
1072
1153
  return {
1073
- content: [{ type: "text", text: JSON.stringify(result, null, 2) }],
1154
+ content: withInitHint([{ type: "text", text: JSON.stringify(result, null, 2) }]),
1074
1155
  };
1075
1156
  });
1076
1157
  server.tool("codeloop_plan_user_journey", TOOL_BOOTSTRAP + `Build a per-entity USER-JOURNEY PLAN the agent must follow during recording so the
@@ -1134,7 +1215,7 @@ ai_substantive_prompts, upload_actions, datagrid_edits }, advice, discovered_int
1134
1215
  "Do NOT present 'Option A vs Option B' to the user. The plan IS option B and option B is the only option.",
1135
1216
  ].join("\n");
1136
1217
  return {
1137
- content: [{ type: "text", text: JSON.stringify(result, null, 2) + driveDirective }],
1218
+ content: withInitHint([{ type: "text", text: JSON.stringify(result, null, 2) + driveDirective }]),
1138
1219
  };
1139
1220
  });
1140
1221
  server.tool("codeloop_record_interaction", TOOL_BOOTSTRAP + `Record a fixed-duration video of the app window (blocking). Use for simple captures where no
@@ -1172,7 +1253,7 @@ After recording, call codeloop_interaction_replay to extract frames and analyze
1172
1253
  }
1173
1254
  const result = authResult;
1174
1255
  return {
1175
- content: [{ type: "text", text: JSON.stringify(result, null, 2) }],
1256
+ content: withInitHint([{ type: "text", text: JSON.stringify(result, null, 2) }]),
1176
1257
  };
1177
1258
  });
1178
1259
  server.tool("codeloop_start_recording", TOOL_BOOTSTRAP + `Start recording the app window in the background. The app is brought to the front automatically
@@ -1242,7 +1323,7 @@ App logs (stdout, logcat, simctl log) are automatically captured alongside the v
1242
1323
  }
1243
1324
  const result = authResult;
1244
1325
  return {
1245
- content: [{ type: "text", text: JSON.stringify(result, null, 2) }],
1326
+ content: withInitHint([{ type: "text", text: JSON.stringify(result, null, 2) }]),
1246
1327
  };
1247
1328
  });
1248
1329
  server.tool("codeloop_stop_recording", TOOL_BOOTSTRAP + `Stop a background recording that was started with codeloop_start_recording.
@@ -1259,8 +1340,22 @@ The response includes log_path if app logs were captured during the recording se
1259
1340
  return { content: [{ type: "text", text: JSON.stringify(authResult, null, 2) }] };
1260
1341
  }
1261
1342
  const result = authResult;
1343
+ // Auto-fix-loop directive. Stopping the recording is the
1344
+ // half-way point of the visual-evidence chain, not the end.
1345
+ // Without a directive, agents sometimes capture a video, stop,
1346
+ // and then announce "recording saved" to the user — leaving the
1347
+ // interaction_replay + gate_check half of the workflow undone.
1348
+ const nextStepDirective = [
1349
+ "",
1350
+ "",
1351
+ "⚠️ RECORDING SAVED — finish the evidence chain now, do not report to the user yet ⚠️",
1352
+ "Two more tool calls are required before this UI evidence cycle is complete:",
1353
+ " 1. codeloop_interaction_replay — extract frames + app logs from the just-saved video. This populates the data the replay/journey gates score against.",
1354
+ " 2. codeloop_gate_check — confirm confidence ≥ 94%. If continue_fixing, fix the failing gate's next_step and re-record / re-capture.",
1355
+ "Do NOT skip step 1 — without replay frames the interaction_replay_evidence gate fails even when the video exists. Do NOT pause to ask the user 'should I run replay now?' — yes, always.",
1356
+ ].join("\n");
1262
1357
  return {
1263
- content: [{ type: "text", text: JSON.stringify(result, null, 2) }],
1358
+ content: withInitHint([{ type: "text", text: JSON.stringify(result, null, 2) + nextStepDirective }]),
1264
1359
  };
1265
1360
  });
1266
1361
  server.tool("codeloop_recommend_action", TOOL_BOOTSTRAP + `Context-aware recommendation router. Use this tool when:
@@ -1278,7 +1373,7 @@ Returns: inferred category and budget, ranked recommendations, and routing expla
1278
1373
  return output;
1279
1374
  });
1280
1375
  return {
1281
- content: [{ type: "text", text: JSON.stringify(result, null, 2) }],
1376
+ content: withInitHint([{ type: "text", text: JSON.stringify(result, null, 2) }]),
1282
1377
  };
1283
1378
  });
1284
1379
  server.tool("codeloop_generate_dev_report", TOOL_BOOTSTRAP + `MANDATORY: Generate a comprehensive development report after the development loop completes.
@@ -2502,7 +2597,7 @@ Wait 1-2 seconds between interactions so video frames capture state changes.`, {
2502
2597
  return { success, action, detail };
2503
2598
  }, { tool: "codeloop_interact", cwd: (params.project_dir || params.workspace_root || projectDir), input: params });
2504
2599
  return {
2505
- content: [{ type: "text", text: JSON.stringify(result, null, 2) }],
2600
+ content: withInitHint([{ type: "text", text: JSON.stringify(result, null, 2) }]),
2506
2601
  };
2507
2602
  });
2508
2603
  // ── codeloop_init_project ────────────────────────────────────────
@@ -2535,7 +2630,7 @@ project. After it completes, proceed directly with \`codeloop_verify\`.`, {
2535
2630
  return output;
2536
2631
  })();
2537
2632
  return {
2538
- content: [{ type: "text", text: JSON.stringify(result, null, 2) }],
2633
+ content: withInitHint([{ type: "text", text: JSON.stringify(result, null, 2) }]),
2539
2634
  };
2540
2635
  });
2541
2636
  server.tool("codeloop_flush_usage", TOOL_BOOTSTRAP + `Drain the persisted offline usage queue and POST events to the CodeLoop backend.
@@ -2552,7 +2647,7 @@ Returns: counts for attempted / succeeded / requeued events and the queue locati
2552
2647
  const { flushPersistedUsage } = await import("./auth/usage_tracker.js");
2553
2648
  const result = await flushPersistedUsage(cwd);
2554
2649
  return {
2555
- content: [
2650
+ content: withInitHint([
2556
2651
  {
2557
2652
  type: "text",
2558
2653
  text: JSON.stringify({
@@ -2561,7 +2656,7 @@ Returns: counts for attempted / succeeded / requeued events and the queue locati
2561
2656
  project_dir: cwd,
2562
2657
  }, null, 2),
2563
2658
  },
2564
- ],
2659
+ ]),
2565
2660
  };
2566
2661
  });
2567
2662
  if (isLocalMode()) {