claude-crap 0.4.7 → 0.4.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -72,9 +72,15 @@ export interface StartDashboardOptions {
72
72
  /**
73
73
  * Handle returned by {@link startDashboard}. Use `url` to build the
74
74
  * link the user clicks; call `close()` during shutdown.
75
+ *
76
+ * `adopted === true` means another claude-crap process already owned
77
+ * the dashboard port when we booted, and we are piggy-backing on its
78
+ * HTTP server. Adopted handles have a no-op `close()` because tearing
79
+ * down the Fastify instance would strand the other MCP servers.
75
80
  */
76
81
  export interface DashboardHandle {
77
82
  readonly url: string;
83
+ readonly adopted: boolean;
78
84
  close(): Promise<void>;
79
85
  }
80
86
 
@@ -87,6 +93,28 @@ export interface DashboardHandle {
87
93
  */
88
94
  export async function startDashboard(options: StartDashboardOptions): Promise<DashboardHandle> {
89
95
  const { config, sarifStore, workspaceStatsProvider, logger } = options;
96
+ const pidFilePath = resolvePidFilePath(config);
97
+
98
+ // Adopt-don't-steal: if a prior MCP server is already serving the
99
+ // dashboard on this port AND is healthy, piggy-back on it instead of
100
+ // killing it. This is what keeps N concurrent launchers from
101
+ // thrashing the port in an endless SIGTERM loop.
102
+ const adoption = await tryAdoptExisting(pidFilePath, config.dashboardPort, logger);
103
+ if (adoption) {
104
+ logger.info(
105
+ { url: adoption.url, ownerPid: adoption.pid, port: config.dashboardPort },
106
+ "adopted existing claude-crap dashboard",
107
+ );
108
+ return {
109
+ url: adoption.url,
110
+ adopted: true,
111
+ async close() {
112
+ // No-op: we never bound a socket of our own, so there is
113
+ // nothing to release. Removing the pidfile here would make the
114
+ // owner's `close()` race with our cleanup.
115
+ },
116
+ };
117
+ }
90
118
 
91
119
  // Resolve the public/ directory. After `npm run build` the compiled
92
120
  // server lives in `dist/dashboard/server.js`, but we keep the static
@@ -173,22 +201,41 @@ export async function startDashboard(options: StartDashboardOptions): Promise<Da
173
201
  return reply.sendFile("index.html");
174
202
  });
175
203
 
176
- // Kill any stale dashboard from a previous session so we always
177
- // bind to the configured port. This mirrors claude-mem's PID file
178
- // pattern: write a PID file when alive, check + kill on next boot.
179
- const pidFilePath = resolvePidFilePath(config);
180
- await killStaleDashboard(pidFilePath, config.dashboardPort, logger);
181
-
182
- await fastify.listen({ port: config.dashboardPort, host: "127.0.0.1" });
204
+ // The pidfile was either missing, stale, or pointed at a zombie —
205
+ // `tryAdoptExisting` has already cleaned it up. Try to bind. If we
206
+ // lose a race against another launcher that bound between our probe
207
+ // and our listen, fall back to adoption instead of failing.
208
+ try {
209
+ await fastify.listen({ port: config.dashboardPort, host: "127.0.0.1" });
210
+ } catch (err) {
211
+ const code = (err as NodeJS.ErrnoException).code;
212
+ if (code === "EADDRINUSE") {
213
+ await fastify.close().catch(() => { /* best effort */ });
214
+ const raceAdoption = await tryAdoptExisting(pidFilePath, config.dashboardPort, logger);
215
+ if (raceAdoption) {
216
+ logger.info(
217
+ { url: raceAdoption.url, ownerPid: raceAdoption.pid, port: config.dashboardPort },
218
+ "dashboard bind lost race, adopted concurrent owner",
219
+ );
220
+ return {
221
+ url: raceAdoption.url,
222
+ adopted: true,
223
+ async close() { /* no-op — see adopted branch above */ },
224
+ };
225
+ }
226
+ }
227
+ throw err;
228
+ }
183
229
 
184
230
  const url = `http://127.0.0.1:${config.dashboardPort}`;
185
231
  logger.info({ url, publicRoot }, "claude-crap dashboard listening");
186
232
 
187
- // Write PID file so the next session can find and kill us.
233
+ // Write PID file so sibling MCP servers can find us and adopt.
188
234
  writePidFile(pidFilePath, config.dashboardPort);
189
235
 
190
236
  return {
191
237
  url,
238
+ adopted: false,
192
239
  async close() {
193
240
  removePidFile(pidFilePath);
194
241
  await fastify.close();
@@ -310,71 +357,101 @@ function isPidAlive(pid: number): boolean {
310
357
  }
311
358
 
312
359
  /**
313
- * Read the PID file, kill any stale dashboard process, and free the
314
- * port so the current session can bind to it. This is the key
315
- * difference from the port-fallback approach: instead of drifting to
316
- * 5118, 5119, etc., we reclaim the configured port every time.
360
+ * Probe an existing dashboard and decide whether the current process
361
+ * can adopt it instead of binding its own Fastify server.
317
362
  *
318
- * @param pidFilePath Absolute path to `dashboard.pid`.
319
- * @param port The configured dashboard port.
320
- * @param logger Pino logger for diagnostics.
363
+ * Returns `{ url, pid }` only when all four conditions hold:
364
+ * 1. A pidfile exists and parses as JSON.
365
+ * 2. The recorded PID is still alive (signal-0 probe).
366
+ * 3. The pidfile's recorded port matches the configured port.
367
+ * 4. A GET on `/api/health` responds within ~500ms.
368
+ *
369
+ * Returns `null` in every other case, but with a side-effect that makes
370
+ * the call-site's next step obvious:
371
+ * - Missing / corrupt / dead-PID / port-mismatch → pidfile is removed
372
+ * so the caller can bind cleanly.
373
+ * - Zombie (PID alive, port unresponsive) → stale owner is
374
+ * SIGKILL'd and the pidfile is removed. This is the one case where
375
+ * we still have to kill something, because the socket belongs to a
376
+ * process that is not talking HTTP anymore.
321
377
  */
322
- async function killStaleDashboard(
378
+ async function tryAdoptExisting(
323
379
  pidFilePath: string,
324
380
  port: number,
325
381
  logger: Logger,
326
- ): Promise<void> {
327
- if (!existsSync(pidFilePath)) return;
382
+ ): Promise<{ url: string; pid: number } | null> {
383
+ if (!existsSync(pidFilePath)) return null;
328
384
 
329
385
  let stale: DashboardPidFile;
330
386
  try {
331
387
  stale = JSON.parse(readFileSync(pidFilePath, "utf8"));
332
388
  } catch {
333
- // Corrupted PID file remove it and move on.
389
+ logger.info({ pidFilePath }, "corrupt dashboard pidfile, removing");
334
390
  removePidFile(pidFilePath);
335
- return;
391
+ return null;
336
392
  }
337
393
 
338
394
  if (!isPidAlive(stale.pid)) {
339
- logger.info({ stalePid: stale.pid }, "stale dashboard PID file found (process dead), removing");
395
+ logger.info({ stalePid: stale.pid }, "stale dashboard pidfile (process dead), removing");
340
396
  removePidFile(pidFilePath);
341
- return;
397
+ return null;
342
398
  }
343
399
 
344
- // Process is alive — kill it so we can reclaim the port.
345
- logger.info(
346
- { stalePid: stale.pid, port: stale.port, startedAt: stale.startedAt },
347
- "killing stale dashboard process from previous session",
348
- );
400
+ if (stale.port !== port) {
401
+ // The recorded owner is on a different port than we want. Don't
402
+ // adopt it, don't kill it — just treat the pidfile as unrelated.
403
+ logger.info(
404
+ { stalePort: stale.port, wantedPort: port },
405
+ "dashboard pidfile points at different port, ignoring",
406
+ );
407
+ removePidFile(pidFilePath);
408
+ return null;
409
+ }
410
+
411
+ const healthy = await probeDashboardHealth(port);
412
+ if (healthy) {
413
+ return { url: `http://127.0.0.1:${port}`, pid: stale.pid };
414
+ }
349
415
 
416
+ // Zombie: PID is alive but not serving HTTP. Most likely the owner
417
+ // crashed mid-init or is stuck. Terminate it so we can take over.
418
+ logger.warn(
419
+ { stalePid: stale.pid, port },
420
+ "dashboard pidfile owner is unresponsive, terminating",
421
+ );
350
422
  try {
351
423
  process.kill(stale.pid, "SIGTERM");
352
424
  } catch {
353
- // Permission denied or already gone — remove PID file either way.
354
- removePidFile(pidFilePath);
355
- return;
425
+ /* already gone */
356
426
  }
357
-
358
- // Wait up to 3 seconds for the process to exit.
359
427
  for (let i = 0; i < 30; i++) {
360
428
  if (!isPidAlive(stale.pid)) break;
361
429
  await new Promise((r) => setTimeout(r, 100));
362
430
  }
363
-
364
- // If still alive after 3s, escalate to SIGKILL.
365
431
  if (isPidAlive(stale.pid)) {
366
- try {
367
- process.kill(stale.pid, "SIGKILL");
368
- } catch {
369
- /* best effort */
370
- }
432
+ try { process.kill(stale.pid, "SIGKILL"); } catch { /* best effort */ }
371
433
  await new Promise((r) => setTimeout(r, 200));
372
434
  }
373
-
374
435
  removePidFile(pidFilePath);
375
-
376
- // Give the OS a moment to release the TCP port after the process dies.
436
+ // Let the OS release the TCP port before the caller tries to bind.
377
437
  await new Promise((r) => setTimeout(r, 300));
438
+ return null;
439
+ }
440
+
441
+ /**
442
+ * Low-latency health probe. Resolves `true` when the dashboard replies
443
+ * 2xx to `/api/health` within 500ms, `false` on any other outcome
444
+ * (timeout, connection refused, 5xx, etc.).
445
+ */
446
+ async function probeDashboardHealth(port: number): Promise<boolean> {
447
+ try {
448
+ const res = await fetch(`http://127.0.0.1:${port}/api/health`, {
449
+ signal: AbortSignal.timeout(500),
450
+ });
451
+ return res.ok;
452
+ } catch {
453
+ return false;
454
+ }
378
455
  }
379
456
 
380
457
  // ── Complexity report builder ──────────────────────────────────────