@bitsocial/bitsocial-cli 0.19.65 → 0.19.67

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -6,10 +6,10 @@ import tcpPortUsed from "tcp-port-used";
6
6
  import { getLanIpV4Address, PKCLogger, setupDebugLogger, loadKuboConfigFile, parseMultiAddrKuboRpcToUrl, parseMultiAddrIpfsGatewayToUrl } from "../../util.js";
7
7
  import { startDaemonServer } from "../../webui/daemon-server.js";
8
8
  import { printBanner } from "../ascii-banner.js";
9
- import { loadChallengesIntoPKC } from "../../challenge-packages/challenge-utils.js";
9
+ import { loadChallengesIntoPKC, formatChallengeNameVersion } from "../../challenge-packages/challenge-utils.js";
10
10
  import { migrateDataDirectory } from "../../common-utils/data-migration.js";
11
11
  import { createBsoResolvers, DEFAULT_PROVIDERS } from "../../common-utils/resolvers.js";
12
- import { pruneStaleStates, writeDaemonState, deleteDaemonState } from "../../common-utils/daemon-state.js";
12
+ import { pruneStaleStates, writeDaemonState, deleteDaemonState, DAEMON_SHUTDOWN_TIMEOUT_MS } from "../../common-utils/daemon-state.js";
13
13
  import { createDaemonFileLogger } from "../../common-utils/daemon-file-logger.js";
14
14
  import fs from "fs";
15
15
  import fsPromise from "fs/promises";
@@ -275,7 +275,10 @@ export default class Daemon extends Command {
275
275
  let pendingKuboStart;
276
276
  // Kubo Node may fail randomly, we need to set a listener so when it exits because of an error we restart it
277
277
  let kuboProcess;
278
- const keepKuboUp = async () => {
278
+ // Every kubo we've spawned that hasn't exited yet. Exit cleanup kills all of these,
279
+ // so a kubo that slipped out of kuboProcess tracking still dies with the daemon (issue #70)
280
+ const liveKuboPids = new Set();
281
+ const keepKuboUpOnce = async () => {
279
282
  if (mainProcessExited)
280
283
  return;
281
284
  const kuboApiPort = Number(kuboRpcEndpoint.port);
@@ -283,6 +286,17 @@ export default class Daemon extends Command {
283
286
  return; // already started, no need to intervene
284
287
  const connectHostname = toConnectableHostname(kuboRpcEndpoint.hostname);
285
288
  const isKuboApiPortTaken = await tcpPortUsed.check(kuboApiPort, connectHostname);
289
+ // Test hook: widens the window between the re-entrancy guard above and the pendingKuboStart
290
+ // assignment below, so tests can deterministically reproduce concurrent keepKuboUp entries
291
+ // (issue #70, see test/cli/daemon-kubo-restart-race.test.ts)
292
+ const portCheckDelayRaw = process.env["PKC_CLI_TEST_KEEPKUBOUP_PORTCHECK_DELAY_MS"];
293
+ const portCheckDelay = portCheckDelayRaw ? Number(portCheckDelayRaw) : 0;
294
+ if (Number.isFinite(portCheckDelay) && portCheckDelay > 0)
295
+ await new Promise((resolve) => setTimeout(resolve, portCheckDelay));
296
+ // Re-check after the awaits above: the daemon may have begun shutting down, or another
297
+ // kubo may have been adopted in the meantime — spawning now would race it (issue #70)
298
+ if (mainProcessExited || kuboProcess || pendingKuboStart)
299
+ return;
286
300
  if (isKuboApiPortTaken) {
287
301
  const connectableEndpoint = new URL(kuboRpcEndpoint.toString());
288
302
  connectableEndpoint.hostname = connectHostname;
@@ -306,8 +320,15 @@ export default class Daemon extends Command {
306
320
  }
307
321
  throw new Error(`Cannot start IPFS daemon because the IPFS API port ${kuboRpcEndpoint.hostname}:${kuboApiPort} (configured as ${kuboRpcEndpoint.toString()}) is already in use.`);
308
322
  }
323
+ let spawnedProcess;
309
324
  const startPromise = startKuboNode(kuboRpcEndpoint, ipfsGatewayEndpoint, mergedPkcOptions.dataPath, (process) => {
325
+ spawnedProcess = process;
310
326
  kuboProcess = process;
327
+ if (process.pid) {
328
+ const pid = process.pid;
329
+ liveKuboPids.add(pid);
330
+ process.once("exit", () => liveKuboPids.delete(pid));
331
+ }
311
332
  });
312
333
  pendingKuboStart = startPromise;
313
334
  let startedProcess;
@@ -315,12 +336,15 @@ export default class Daemon extends Command {
315
336
  startedProcess = await startPromise;
316
337
  }
317
338
  catch (error) {
318
- pendingKuboStart = undefined;
319
- if (!mainProcessExited)
339
+ // Only clear state this attempt owns — it may track another attempt's healthy kubo (issue #70)
340
+ if (pendingKuboStart === startPromise)
341
+ pendingKuboStart = undefined;
342
+ if (!mainProcessExited && spawnedProcess && kuboProcess === spawnedProcess)
320
343
  kuboProcess = undefined;
321
344
  throw error;
322
345
  }
323
- pendingKuboStart = undefined;
346
+ if (pendingKuboStart === startPromise)
347
+ pendingKuboStart = undefined;
324
348
  if (mainProcessExited) {
325
349
  if (startedProcess?.pid && !startedProcess.killed) {
326
350
  // Race condition: Kubo finished starting after mainProcessExited.
@@ -341,7 +365,8 @@ export default class Daemon extends Command {
341
365
  /* best effort */
342
366
  }
343
367
  }
344
- kuboProcess = undefined;
368
+ if (kuboProcess === startedProcess)
369
+ kuboProcess = undefined;
345
370
  return;
346
371
  }
347
372
  kuboProcess = startedProcess;
@@ -353,7 +378,8 @@ export default class Daemon extends Command {
353
378
  // Restart Kubo process because it failed
354
379
  if (!mainProcessExited) {
355
380
  log(`Kubo node with pid (${currentProcess?.pid}) exited. Will attempt to restart it`);
356
- kuboProcess = undefined;
381
+ if (kuboProcess === currentProcess)
382
+ kuboProcess = undefined;
357
383
  try {
358
384
  await keepKuboUp();
359
385
  }
@@ -367,6 +393,19 @@ export default class Daemon extends Command {
367
393
  };
368
394
  currentProcess.once("exit", onKuboExit);
369
395
  };
396
+ // Single-flight wrapper: keepKuboUp is invoked from independent places (the kubo exit
397
+ // handler and the watchdog interval). Concurrent callers must share one attempt —
398
+ // otherwise both can pass keepKuboUpOnce's re-entrancy guard during its awaits and
399
+ // spawn two kubo processes whose failure handling corrupts shared state (issue #70)
400
+ let keepKuboUpInFlight;
401
+ const keepKuboUp = () => {
402
+ if (!keepKuboUpInFlight) {
403
+ keepKuboUpInFlight = keepKuboUpOnce().finally(() => {
404
+ keepKuboUpInFlight = undefined;
405
+ });
406
+ }
407
+ return keepKuboUpInFlight;
408
+ };
370
409
  let startedOwnRpc = false;
371
410
  let daemonServer;
372
411
  const createOrConnectRpc = async () => {
@@ -384,7 +423,7 @@ export default class Daemon extends Command {
384
423
  // Load installed challenge packages before starting the RPC server
385
424
  const loadedChallenges = await loadChallengesIntoPKC(mergedPkcOptions.dataPath);
386
425
  if (loadedChallenges.length > 0)
387
- console.log(`Loaded challenge packages: ${loadedChallenges.join(", ")}`);
426
+ console.log(`Loaded challenge packages: ${loadedChallenges.map(formatChallengeNameVersion).join(", ")}`);
388
427
  daemonServer = await startDaemonServer(pkcRpcUrl, ipfsGatewayEndpoint, mergedPkcOptions, {
389
428
  allowPrivateKeyExport: flags.allowPrivateKeyExport
390
429
  });
@@ -429,14 +468,29 @@ export default class Daemon extends Command {
429
468
  }
430
469
  };
431
470
  const killKuboProcess = async () => {
432
- if (pendingKuboStart) {
433
- try {
434
- await pendingKuboStart;
435
- }
436
- catch {
437
- /* ignore */
438
- }
439
- }
471
+ // Test hook (issue #70): hold off the kubo teardown for a fixed delay so a test can
472
+ // deterministically reproduce the window where the daemon's RPC port is already free
473
+ // (daemonServer.destroy() runs in parallel) but kubo is still alive and bound. This is
474
+ // exactly the window `update install` must not restart into — see
475
+ // test/cli/update-install-restart-race.test.ts. The daemon process stays alive for the
476
+ // duration because the exit hook awaits killKuboProcess() before exiting.
477
+ const kuboShutdownDelayRaw = process.env["PKC_CLI_TEST_KUBO_SHUTDOWN_DELAY_MS"];
478
+ const kuboShutdownDelay = kuboShutdownDelayRaw ? Number(kuboShutdownDelayRaw) : 0;
479
+ if (Number.isFinite(kuboShutdownDelay) && kuboShutdownDelay > 0)
480
+ await new Promise((resolve) => setTimeout(resolve, kuboShutdownDelay));
481
+ // Wait (bounded) for any in-flight start attempt so we kill the kubo it may still
482
+ // spawn. Both promises settle on all failure paths (issue #70), but a spawned kubo
483
+ // that wedges before "Daemon is ready" without exiting keeps them pending — the
484
+ // bound ensures shutdown still reaches the SIGINT/SIGKILL flow below, which kills
485
+ // it via kuboProcess (set in onSpawn) or the liveKuboPids sweep (PR #71 review).
486
+ const inFlightStarts = [keepKuboUpInFlight, pendingKuboStart]
487
+ .filter((promise) => promise !== undefined)
488
+ .map((promise) => promise.catch(() => { }));
489
+ if (inFlightStarts.length > 0)
490
+ await Promise.race([
491
+ Promise.all(inFlightStarts),
492
+ new Promise((resolve) => setTimeout(resolve, 15_000).unref())
493
+ ]);
440
494
  if (kuboProcess?.pid && !kuboProcess.killed) {
441
495
  const pid = kuboProcess.pid;
442
496
  log("Attempting to kill kubo process with pid", pid);
@@ -466,6 +520,11 @@ export default class Daemon extends Command {
466
520
  kuboProcess = undefined;
467
521
  }
468
522
  }
523
+ // Defense in depth: SIGKILL any spawned kubo that slipped out of kuboProcess
524
+ // tracking (e.g. via a state race) so nothing outlives the daemon (issue #70)
525
+ for (const pid of liveKuboPids)
526
+ killKuboProcessGroup(pid, "SIGKILL");
527
+ liveKuboPids.clear();
469
528
  };
470
529
  asyncExitHook(async () => {
471
530
  if (keepKuboUpInterval)
@@ -489,16 +548,61 @@ export default class Daemon extends Command {
489
548
  log.error("Error shutting down daemon server", e);
490
549
  }
491
550
  await kuboKillPromise;
492
- }, { wait: 120000 } // could take two minutes to shut down
551
+ }, { wait: DAEMON_SHUTDOWN_TIMEOUT_MS } // could take two minutes to shut down
493
552
  );
494
553
  // Emergency cleanup: if the process force-exits (e.g. double Ctrl+C),
495
- // synchronously SIGKILL kubo's process group. This is a no-op if
496
- // killKuboProcess() already ran (it sets kuboProcess = undefined).
554
+ // synchronously SIGKILL every live kubo's process group. This is a no-op if
555
+ // killKuboProcess() already ran (it clears kuboProcess and liveKuboPids).
497
556
  process.on("exit", () => {
498
557
  if (kuboProcess?.pid) {
499
558
  killKuboProcessGroup(kuboProcess.pid, "SIGKILL");
500
559
  }
560
+ for (const pid of liveKuboPids)
561
+ killKuboProcessGroup(pid, "SIGKILL");
501
562
  });
563
+ // Persistent signal guard (issue #70): exit-hook registers its SIGINT/SIGTERM handlers
564
+ // with process.once, so its listener vanishes from the listener list the moment a
565
+ // signal is dispatched. signal-exit (loaded by @pkcprotocol/proper-lock-file and other
566
+ // dependencies) re-raises the signal when every remaining listener is its own — which
567
+ // would kill the process while the async exit hook above is still shutting kubo down.
568
+ // A persistent non-signal-exit listener keeps that heuristic from ever firing.
569
+ // A repeated signal force-quits immediately (impatient Ctrl+C): process.exit triggers
570
+ // the emergency "exit" handler above, which SIGKILLs every live kubo.
571
+ let terminationSignalCount = 0;
572
+ for (const signal of ["SIGINT", "SIGTERM"]) {
573
+ process.on(signal, () => {
574
+ terminationSignalCount++;
575
+ if (terminationSignalCount >= 2) {
576
+ log(`Received ${signal} again during shutdown, force-quitting`);
577
+ process.exit(signal === "SIGINT" ? 130 : 143);
578
+ }
579
+ });
580
+ }
581
+ // Test hook (issue #70): simulates a dependency registering a signal-exit handler
582
+ // AFTER the asyncExitHook above — what @pkcprotocol/proper-lock-file (and the
583
+ // signal-exit copies under ink/restore-cursor) do at module load. signal-exit
584
+ // re-raises the signal when every remaining listener belongs to the signal-exit
585
+ // family (`if (listeners.length === count) { ... process.kill(process.pid, s) }`),
586
+ // which kills the daemon while the async exit hook is still cleaning up kubo —
587
+ // exit-hook registers with process.once, so its listener is already gone by then.
588
+ if (process.env["PKC_CLI_TEST_SIMULATE_LATE_SIGNAL_EXIT"]) {
589
+ for (const signal of ["SIGINT", "SIGTERM"]) {
590
+ // Real signal-exit copies only count each other as "family" (via a shared
591
+ // global marker); any other listener makes them defer. Identify family by
592
+ // source: signal-exit's dispatcher carries the "an exit is coming" comment.
593
+ const isSignalExitFamily = (listener) => String(listener).includes("an exit is coming");
594
+ const reRaiser = () => {
595
+ const onlyFamilyLeft = process
596
+ .listeners(signal)
597
+ .every((listener) => listener === reRaiser || isSignalExitFamily(listener));
598
+ if (onlyFamilyLeft) {
599
+ process.removeListener(signal, reRaiser);
600
+ process.kill(process.pid, signal);
601
+ }
602
+ };
603
+ process.on(signal, reRaiser);
604
+ }
605
+ }
502
606
  // RPC port was already verified free above (fail-fast); only the kuboRpcClientsOptions branch skips local kubo.
503
607
  if (!pkcOptionsFromFlag?.kuboRpcClientsOptions)
504
608
  await keepKuboUp();
@@ -163,6 +163,14 @@ export default class Logs extends Command {
163
163
  // Follow mode: dump existing content (filtered + tailed) then watch for new data
164
164
  let currentLogFile = latestLogFile;
165
165
  const existingContent = await fsPromise.readFile(currentLogFile, "utf-8");
166
+ // Anchor the follow offset to exactly the bytes we just read, NOT a separate
167
+ // fsPromise.stat() taken afterwards. A later stat is racy: any append landing between
168
+ // this read and the stat is skipped (position jumps past it) yet was never in the dump
169
+ // above, so follow mode silently drops those lines. Under load that window widens — this
170
+ // was the cause of the intermittent CI failure where an appended line was never surfaced
171
+ // (issue #77). Byte length (not string length) because position indexes bytes in the file.
172
+ let position = Buffer.byteLength(existingContent, "utf-8");
173
+ let pendingBuffer = "";
166
174
  const entries = this._parseLogEntries(existingContent);
167
175
  const filtered = this._filterEntries(entries, since, until);
168
176
  const streamFiltered = streamFilter ? this._filterByStream(filtered, streamFilter) : filtered;
@@ -170,9 +178,6 @@ export default class Logs extends Command {
170
178
  const initialOutput = tailed.map((e) => e.lines.join("\n")).join("\n");
171
179
  if (initialOutput)
172
180
  process.stdout.write(initialOutput + "\n");
173
- const stat = await fsPromise.stat(currentLogFile);
174
- let position = stat.size;
175
- let pendingBuffer = "";
176
181
  // Watch for new data by reading directly from `position`. We intentionally do
177
182
  // NOT gate on fsPromise.stat().size — on Windows + NTFS, stat() returns a stale
178
183
  // size for a short window after another process appends, which causes the gate
@@ -10,5 +10,11 @@ export default class Install extends Command {
10
10
  };
11
11
  static examples: string[];
12
12
  run(): Promise<void>;
13
+ /**
14
+ * Poll until the given PID no longer exists (signal 0 throws ESRCH), or the timeout elapses.
15
+ * Returns true if the process exited, false on timeout. EPERM means the process is still alive
16
+ * but owned by another user, so we keep waiting.
17
+ */
18
+ private _waitForProcessExit;
13
19
  private _restartDaemons;
14
20
  }
@@ -4,7 +4,7 @@ import tcpPortUsed from "tcp-port-used";
4
4
  import { fetchLatestVersion, installGlobal } from "../../../update/npm-registry.js";
5
5
  import { fastInstallGlobal } from "../../../update/fast-update.js";
6
6
  import { compareVersions } from "../../../update/semver.js";
7
- import { getAliveDaemonStates } from "../../../common-utils/daemon-state.js";
7
+ import { getAliveDaemonStates, DAEMON_SHUTDOWN_TIMEOUT_MS } from "../../../common-utils/daemon-state.js";
8
8
  export default class Install extends Command {
9
9
  static description = "Install a specific version of bitsocial from npm";
10
10
  static args = {
@@ -54,15 +54,17 @@ export default class Install extends Command {
54
54
  throw e;
55
55
  }
56
56
  }
57
- // Wait for all daemon ports to be free
57
+ // Wait for each daemon process to fully exit — NOT just for its RPC port to free.
58
+ // The daemon releases its RPC port (daemonServer.destroy()) before it finishes killing
59
+ // its kubo child, so a port-only wait lets us restart while the old kubo still holds the
60
+ // IPFS API port; the new daemon then dies on startup with "IPFS API port already in use"
61
+ // (issue #70). The daemon's exit hook kills kubo before the process exits, so waiting for
62
+ // the PID to disappear guarantees the kubo port is free before we restart.
58
63
  for (const d of aliveDaemons) {
59
- const url = new URL(d.pkcRpcUrl);
60
- const port = Number(url.port);
61
- const host = url.hostname;
62
- this.log(`Waiting for port ${port} to be free...`);
63
- const freed = await tcpPortUsed.waitUntilFree(port, 500, 30000).then(() => true).catch(() => false);
64
- if (!freed) {
65
- this.error(`Daemon (PID ${d.pid}) did not shut down within 30 seconds on port ${port}.`, { exit: 1 });
64
+ this.log(`Waiting for daemon (PID ${d.pid}) to exit...`);
65
+ const exited = await this._waitForProcessExit(d.pid, DAEMON_SHUTDOWN_TIMEOUT_MS);
66
+ if (!exited) {
67
+ this.error(`Daemon (PID ${d.pid}) did not shut down within ${DAEMON_SHUTDOWN_TIMEOUT_MS / 1000} seconds.`, { exit: 1 });
66
68
  }
67
69
  }
68
70
  this.log("All daemons stopped.");
@@ -118,6 +120,33 @@ export default class Install extends Command {
118
120
  this.log("To see the daemon logs run `bitsocial logs --stdout`");
119
121
  }
120
122
  }
123
+ /**
124
+ * Poll until the given PID no longer exists (signal 0 throws ESRCH), or the timeout elapses.
125
+ * Returns true if the process exited, false on timeout. EPERM means the process is still alive
126
+ * but owned by another user, so we keep waiting.
127
+ */
128
+ async _waitForProcessExit(pid, timeoutMs) {
129
+ const deadline = Date.now() + timeoutMs;
130
+ while (Date.now() < deadline) {
131
+ try {
132
+ process.kill(pid, 0);
133
+ }
134
+ catch (e) {
135
+ if (e.code === "ESRCH")
136
+ return true; // no such process — it exited
137
+ }
138
+ await new Promise((resolve) => setTimeout(resolve, 250));
139
+ }
140
+ // Final check so a process that exits in the last interval isn't reported as a timeout
141
+ try {
142
+ process.kill(pid, 0);
143
+ }
144
+ catch (e) {
145
+ if (e.code === "ESRCH")
146
+ return true;
147
+ }
148
+ return false;
149
+ }
121
150
  async _restartDaemons(daemons) {
122
151
  this.log(`Restarting ${daemons.length} daemon(s)...`);
123
152
  for (const d of daemons) {
@@ -1,8 +1,17 @@
1
+ /**
2
+ * Maximum time a daemon is allowed to shut down its kubo + RPC server during its
3
+ * async exit hook. The `update install --restart-daemons` orchestrator must wait at
4
+ * least this long for a stopped daemon's PID to disappear before giving up — otherwise
5
+ * a slow-but-valid shutdown (within the daemon's own contract) aborts the update midway.
6
+ */
7
+ export declare const DAEMON_SHUTDOWN_TIMEOUT_MS = 120000;
1
8
  export interface DaemonState {
2
9
  pid: number;
3
10
  startedAt: string;
4
11
  argv: string[];
5
12
  pkcRpcUrl: string;
13
+ /** OS-reported process start time, used to detect PID reuse. Absent in legacy state files. */
14
+ procStartTime?: string;
6
15
  }
7
16
  /** Write a daemon state file atomically (write to .tmp then rename). */
8
17
  export declare function writeDaemonState(state: DaemonState): Promise<void>;
@@ -10,7 +19,7 @@ export declare function writeDaemonState(state: DaemonState): Promise<void>;
10
19
  export declare function readAllDaemonStates(): Promise<DaemonState[]>;
11
20
  /** Delete a specific daemon's state file. Ignores ENOENT. */
12
21
  export declare function deleteDaemonState(pid: number): Promise<void>;
13
- /** Delete state files for dead PIDs from disk. */
22
+ /** Delete state files for dead or reused PIDs from disk. */
14
23
  export declare function pruneStaleStates(): Promise<void>;
15
- /** Read all states, delete stale files (dead PIDs) from disk, return only alive ones. */
24
+ /** Read all states, delete stale files (dead or reused PIDs) from disk, return only alive ones. */
16
25
  export declare function getAliveDaemonStates(): Promise<DaemonState[]>;
@@ -1,12 +1,67 @@
1
1
  import defaults from "./defaults.js";
2
2
  import path from "path";
3
3
  import fs from "fs/promises";
4
+ import { execFile } from "child_process";
5
+ import { promisify } from "util";
6
+ const execFileAsync = promisify(execFile);
4
7
  const DAEMON_STATES_DIR = path.join(defaults.PKC_DATA_PATH, ".daemon_states");
8
+ /**
9
+ * Maximum time a daemon is allowed to shut down its kubo + RPC server during its
10
+ * async exit hook. The `update install --restart-daemons` orchestrator must wait at
11
+ * least this long for a stopped daemon's PID to disappear before giving up — otherwise
12
+ * a slow-but-valid shutdown (within the daemon's own contract) aborts the update midway.
13
+ */
14
+ export const DAEMON_SHUTDOWN_TIMEOUT_MS = 120000;
5
15
  function stateFilePath(pid) {
6
16
  return path.join(DAEMON_STATES_DIR, `${pid}-daemon.state`);
7
17
  }
18
+ /**
19
+ * OS-reported start time of a process, used as an identity token: if a state file's PID
20
+ * was reused by an unrelated process, its start time won't match the recorded one.
21
+ * Linux: starttime (field 22) of /proc/<pid>/stat, in clock ticks since boot.
22
+ * Other unix: `ps -o lstart=` output. Returns undefined when it can't be determined.
23
+ */
24
+ async function getProcessStartTime(pid) {
25
+ try {
26
+ const stat = await fs.readFile(`/proc/${pid}/stat`, "utf-8");
27
+ // comm (field 2) may contain spaces/parens — real fields resume after the last ')'
28
+ const fields = stat.slice(stat.lastIndexOf(")") + 2).split(" ");
29
+ return fields[19]; // field 22 (starttime), offset by the 3 fields before the split
30
+ }
31
+ catch {
32
+ try {
33
+ const { stdout } = await execFileAsync("ps", ["-p", String(pid), "-o", "lstart="]);
34
+ return stdout.trim() || undefined;
35
+ }
36
+ catch {
37
+ return undefined;
38
+ }
39
+ }
40
+ }
41
+ /** Full command line of a process, or undefined when it can't be determined. */
42
+ async function getProcessCommandLine(pid) {
43
+ try {
44
+ // An empty /proc cmdline is meaningful (kernel thread — not a daemon), so keep it
45
+ const raw = await fs.readFile(`/proc/${pid}/cmdline`, "utf-8");
46
+ return raw.split("\0").join(" ").trim();
47
+ }
48
+ catch {
49
+ try {
50
+ const { stdout } = await execFileAsync("ps", ["-p", String(pid), "-o", "args="]);
51
+ return stdout.trim() || undefined;
52
+ }
53
+ catch {
54
+ return undefined;
55
+ }
56
+ }
57
+ }
8
58
  /** Write a daemon state file atomically (write to .tmp then rename). */
9
59
  export async function writeDaemonState(state) {
60
+ if (state.procStartTime === undefined) {
61
+ const procStartTime = await getProcessStartTime(state.pid);
62
+ if (procStartTime !== undefined)
63
+ state = { ...state, procStartTime };
64
+ }
10
65
  await fs.mkdir(DAEMON_STATES_DIR, { recursive: true });
11
66
  const dest = stateFilePath(state.pid);
12
67
  const tmp = dest + ".tmp";
@@ -60,21 +115,37 @@ function isPidAlive(pid) {
60
115
  return false; // ESRCH — no such process
61
116
  }
62
117
  }
63
- /** Delete state files for dead PIDs from disk. */
64
- export async function pruneStaleStates() {
65
- const states = await readAllDaemonStates();
66
- for (const state of states) {
67
- if (!isPidAlive(state.pid)) {
68
- await deleteDaemonState(state.pid);
69
- }
118
+ /**
119
+ * Check whether the daemon that wrote `state` is still the process running under its PID.
120
+ * A bare liveness check is not enough: a stale state file's PID may have been reused by an
121
+ * unrelated process (e.g. a state file written inside a Docker container whose PID maps to
122
+ * a kernel thread on the host — issue #66).
123
+ */
124
+ async function isDaemonStateAlive(state) {
125
+ if (!isPidAlive(state.pid))
126
+ return false;
127
+ if (state.procStartTime !== undefined) {
128
+ const current = await getProcessStartTime(state.pid);
129
+ if (current !== undefined)
130
+ return current === state.procStartTime; // mismatch — PID was reused
131
+ return true; // identity undeterminable — fall back to liveness only
70
132
  }
133
+ // Legacy state file without procStartTime — heuristic: the command line must reference bitsocial
134
+ const cmdline = await getProcessCommandLine(state.pid);
135
+ if (cmdline === undefined)
136
+ return true; // identity undeterminable — fall back to liveness only
137
+ return cmdline.includes("bitsocial");
138
+ }
139
+ /** Delete state files for dead or reused PIDs from disk. */
140
+ export async function pruneStaleStates() {
141
+ await getAliveDaemonStates();
71
142
  }
72
- /** Read all states, delete stale files (dead PIDs) from disk, return only alive ones. */
143
+ /** Read all states, delete stale files (dead or reused PIDs) from disk, return only alive ones. */
73
144
  export async function getAliveDaemonStates() {
74
145
  const states = await readAllDaemonStates();
75
146
  const alive = [];
76
147
  for (const state of states) {
77
- if (isPidAlive(state.pid)) {
148
+ if (await isDaemonStateAlive(state)) {
78
149
  alive.push(state);
79
150
  }
80
151
  else {
@@ -185,53 +185,53 @@ async function ensureIpfsPortsAreAvailable(log, configPath, apiUrl, gatewayUrl)
185
185
  }
186
186
  }
187
187
  export async function startKuboNode(apiUrl, gatewayUrl, dataPath, onSpawn) {
188
- return new Promise(async (resolve, reject) => {
189
- const log = PKCLogger("bitsocial-cli:ipfs:startKuboNode");
190
- const ipfsDataPath = process.env["IPFS_PATH"] || path.join(dataPath, ".bitsocial-cli.ipfs");
191
- await fs.promises.mkdir(ipfsDataPath, { recursive: true });
192
- const ipfsConfigPath = path.join(ipfsDataPath, "config");
193
- const kuboExePath = await getKuboExePath();
194
- const kuboVersion = await getKuboVersion();
195
- log(`Using Kubo version: ${kuboVersion}`);
196
- log(`IpfsDataPath (${ipfsDataPath}), kuboExePath (${kuboExePath})`, "kubo ipfs config file", path.join(ipfsDataPath, "config"));
197
- log("If you would like to change kubo config, please edit the config file at", path.join(ipfsDataPath, "config"));
198
- const env = { IPFS_PATH: ipfsDataPath, DEBUG_COLORS: "1" };
199
- let configJustInitialized = false;
200
- try {
201
- await _spawnAsync(log, kuboExePath, ["init"], { env, hideWindows: true });
202
- configJustInitialized = true;
203
- }
204
- catch (e) {
205
- const error = e;
206
- if (!error?.message?.includes("ipfs configuration file already exists!"))
207
- throw new Error("Failed to call ipfs init" + error);
208
- }
209
- if (configJustInitialized) {
210
- await _spawnAsync(log, kuboExePath, ["config", "profile", "apply", `server`], {
211
- env,
212
- hideWindows: true
213
- });
214
- log("Called 'ipfs config profile apply server' successfully");
215
- await mergeCliDefaultsIntoIpfsConfig(log, ipfsConfigPath, apiUrl, gatewayUrl);
216
- }
217
- else {
218
- log("IPFS config already exists; skipping config overrides to preserve user changes.");
219
- }
220
- try {
221
- await _spawnAsync(log, kuboExePath, ["repo", "migrate"], { env, hideWindows: true });
222
- log("Ensured IPFS repository is migrated to the latest supported version.");
223
- }
224
- catch (migrationError) {
225
- log.error("Failed to run IPFS repo migrations automatically", migrationError);
226
- throw migrationError;
227
- }
228
- try {
229
- await ensureIpfsPortsAreAvailable(log, ipfsConfigPath, apiUrl, gatewayUrl);
230
- }
231
- catch (error) {
232
- reject(error instanceof Error ? error : new Error(String(error)));
233
- return;
234
- }
188
+ // Preparation phase runs as plain awaits so any failure rejects the returned promise.
189
+ // It must NOT live inside the new Promise() executor below: an async executor swallows
190
+ // throws as unhandledRejections and the promise never settles, which wedges the daemon's
191
+ // pendingKuboStart tracking and hangs its shutdown (issue #70).
192
+ const log = PKCLogger("bitsocial-cli:ipfs:startKuboNode");
193
+ const ipfsDataPath = process.env["IPFS_PATH"] || path.join(dataPath, ".bitsocial-cli.ipfs");
194
+ await fs.promises.mkdir(ipfsDataPath, { recursive: true });
195
+ const ipfsConfigPath = path.join(ipfsDataPath, "config");
196
+ const kuboExePath = await getKuboExePath();
197
+ const kuboVersion = await getKuboVersion();
198
+ log(`Using Kubo version: ${kuboVersion}`);
199
+ log(`IpfsDataPath (${ipfsDataPath}), kuboExePath (${kuboExePath})`, "kubo ipfs config file", path.join(ipfsDataPath, "config"));
200
+ log("If you would like to change kubo config, please edit the config file at", path.join(ipfsDataPath, "config"));
201
+ const env = { IPFS_PATH: ipfsDataPath, DEBUG_COLORS: "1" };
202
+ let configJustInitialized = false;
203
+ try {
204
+ await _spawnAsync(log, kuboExePath, ["init"], { env, hideWindows: true });
205
+ configJustInitialized = true;
206
+ }
207
+ catch (e) {
208
+ const error = e;
209
+ if (!error?.message?.includes("ipfs configuration file already exists!"))
210
+ throw new Error("Failed to call ipfs init" + error);
211
+ }
212
+ if (configJustInitialized) {
213
+ await _spawnAsync(log, kuboExePath, ["config", "profile", "apply", `server`], {
214
+ env,
215
+ hideWindows: true
216
+ });
217
+ log("Called 'ipfs config profile apply server' successfully");
218
+ await mergeCliDefaultsIntoIpfsConfig(log, ipfsConfigPath, apiUrl, gatewayUrl);
219
+ }
220
+ else {
221
+ log("IPFS config already exists; skipping config overrides to preserve user changes.");
222
+ }
223
+ try {
224
+ await _spawnAsync(log, kuboExePath, ["repo", "migrate"], { env, hideWindows: true });
225
+ log("Ensured IPFS repository is migrated to the latest supported version.");
226
+ }
227
+ catch (migrationError) {
228
+ log.error("Failed to run IPFS repo migrations automatically", migrationError);
229
+ throw migrationError;
230
+ }
231
+ await ensureIpfsPortsAreAvailable(log, ipfsConfigPath, apiUrl, gatewayUrl);
232
+ // Spawn phase: the promise only wraps the event-driven wait for kubo's "Daemon is ready",
233
+ // so every settle path goes through resolve/reject.
234
+ return new Promise((resolve, reject) => {
235
235
  const daemonArgs = ["--enable-namesys-pubsub", "--migrate"];
236
236
  const kuboProcess = spawn(kuboExePath, ["daemon", ...daemonArgs], {
237
237
  env,
@@ -6,7 +6,7 @@ import fs from "fs/promises";
6
6
  import { PKCLogger } from "../util.js";
7
7
  import { randomBytes } from "crypto";
8
8
  import express from "express";
9
- import { loadChallengesIntoPKC } from "../challenge-packages/challenge-utils.js";
9
+ import { loadChallengesIntoPKC, formatChallengeNameVersion } from "../challenge-packages/challenge-utils.js";
10
10
  const rootHashRedirectScriptPattern = /<script\b[^>]*>(?:(?!<\/script>)[\s\S])*?window\.location\.replace\(["']\/#["']\s*\+\s*window\.location\.pathname\s*\+\s*window\.location\.search\);(?:(?!<\/script>)[\s\S])*?<\/script>/;
11
11
  async function _generateModifiedIndexHtmlWithRpcSettings(webuiPath, webuiName, ipfsGatewayPort) {
12
12
  const indexHtmlString = (await fs.readFile(path.join(webuiPath, "index_backup_no_rpc.html")))
@@ -124,7 +124,7 @@ export async function startDaemonServer(rpcUrl, ipfsGatewayUrl, pkcOptions, rpcS
124
124
  // Challenge reload endpoints
125
125
  const handleChallengeReload = async (_req, res) => {
126
126
  try {
127
- const loadedNames = await loadChallengesIntoPKC(pkcOptions.dataPath);
127
+ const loadedChallenges = await loadChallengesIntoPKC(pkcOptions.dataPath);
128
128
  // Notify all connected RPC clients about the updated challenges
129
129
  const onSettingsChange = rpcServer._onSettingsChange;
130
130
  if (onSettingsChange) {
@@ -139,7 +139,7 @@ export async function startDaemonServer(rpcUrl, ipfsGatewayUrl, pkcOptions, rpcS
139
139
  }
140
140
  }
141
141
  }
142
- res.json({ ok: true, challenges: loadedNames });
142
+ res.json({ ok: true, challenges: loadedChallenges.map(formatChallengeNameVersion) });
143
143
  }
144
144
  catch (err) {
145
145
  log.error("Failed to reload challenges", err);