@bitsocial/bitsocial-cli 0.19.65 → 0.19.67
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +208 -16
- package/dist/challenge-packages/challenge-utils.d.ts +2 -1
- package/dist/challenge-packages/challenge-utils.js +6 -3
- package/dist/cli/commands/challenge/install.d.ts +1 -0
- package/dist/cli/commands/challenge/install.js +27 -31
- package/dist/cli/commands/challenge/list.d.ts +1 -0
- package/dist/cli/commands/challenge/list.js +10 -9
- package/dist/cli/commands/challenge/remove.d.ts +1 -0
- package/dist/cli/commands/challenge/remove.js +13 -2
- package/dist/cli/commands/daemon.js +124 -20
- package/dist/cli/commands/logs.js +8 -3
- package/dist/cli/commands/update/install.d.ts +6 -0
- package/dist/cli/commands/update/install.js +38 -9
- package/dist/common-utils/daemon-state.d.ts +11 -2
- package/dist/common-utils/daemon-state.js +80 -9
- package/dist/ipfs/startIpfs.js +47 -47
- package/dist/webui/daemon-server.js +3 -3
- package/oclif.manifest.json +13 -4
- package/package.json +1 -1
|
@@ -6,10 +6,10 @@ import tcpPortUsed from "tcp-port-used";
|
|
|
6
6
|
import { getLanIpV4Address, PKCLogger, setupDebugLogger, loadKuboConfigFile, parseMultiAddrKuboRpcToUrl, parseMultiAddrIpfsGatewayToUrl } from "../../util.js";
|
|
7
7
|
import { startDaemonServer } from "../../webui/daemon-server.js";
|
|
8
8
|
import { printBanner } from "../ascii-banner.js";
|
|
9
|
-
import { loadChallengesIntoPKC } from "../../challenge-packages/challenge-utils.js";
|
|
9
|
+
import { loadChallengesIntoPKC, formatChallengeNameVersion } from "../../challenge-packages/challenge-utils.js";
|
|
10
10
|
import { migrateDataDirectory } from "../../common-utils/data-migration.js";
|
|
11
11
|
import { createBsoResolvers, DEFAULT_PROVIDERS } from "../../common-utils/resolvers.js";
|
|
12
|
-
import { pruneStaleStates, writeDaemonState, deleteDaemonState } from "../../common-utils/daemon-state.js";
|
|
12
|
+
import { pruneStaleStates, writeDaemonState, deleteDaemonState, DAEMON_SHUTDOWN_TIMEOUT_MS } from "../../common-utils/daemon-state.js";
|
|
13
13
|
import { createDaemonFileLogger } from "../../common-utils/daemon-file-logger.js";
|
|
14
14
|
import fs from "fs";
|
|
15
15
|
import fsPromise from "fs/promises";
|
|
@@ -275,7 +275,10 @@ export default class Daemon extends Command {
|
|
|
275
275
|
let pendingKuboStart;
|
|
276
276
|
// Kubo Node may fail randomly, we need to set a listener so when it exits because of an error we restart it
|
|
277
277
|
let kuboProcess;
|
|
278
|
-
|
|
278
|
+
// Every kubo we've spawned that hasn't exited yet. Exit cleanup kills all of these,
|
|
279
|
+
// so a kubo that slipped out of kuboProcess tracking still dies with the daemon (issue #70)
|
|
280
|
+
const liveKuboPids = new Set();
|
|
281
|
+
const keepKuboUpOnce = async () => {
|
|
279
282
|
if (mainProcessExited)
|
|
280
283
|
return;
|
|
281
284
|
const kuboApiPort = Number(kuboRpcEndpoint.port);
|
|
@@ -283,6 +286,17 @@ export default class Daemon extends Command {
|
|
|
283
286
|
return; // already started, no need to intervene
|
|
284
287
|
const connectHostname = toConnectableHostname(kuboRpcEndpoint.hostname);
|
|
285
288
|
const isKuboApiPortTaken = await tcpPortUsed.check(kuboApiPort, connectHostname);
|
|
289
|
+
// Test hook: widens the window between the re-entrancy guard above and the pendingKuboStart
|
|
290
|
+
// assignment below, so tests can deterministically reproduce concurrent keepKuboUp entries
|
|
291
|
+
// (issue #70, see test/cli/daemon-kubo-restart-race.test.ts)
|
|
292
|
+
const portCheckDelayRaw = process.env["PKC_CLI_TEST_KEEPKUBOUP_PORTCHECK_DELAY_MS"];
|
|
293
|
+
const portCheckDelay = portCheckDelayRaw ? Number(portCheckDelayRaw) : 0;
|
|
294
|
+
if (Number.isFinite(portCheckDelay) && portCheckDelay > 0)
|
|
295
|
+
await new Promise((resolve) => setTimeout(resolve, portCheckDelay));
|
|
296
|
+
// Re-check after the awaits above: the daemon may have begun shutting down, or another
|
|
297
|
+
// kubo may have been adopted in the meantime — spawning now would race it (issue #70)
|
|
298
|
+
if (mainProcessExited || kuboProcess || pendingKuboStart)
|
|
299
|
+
return;
|
|
286
300
|
if (isKuboApiPortTaken) {
|
|
287
301
|
const connectableEndpoint = new URL(kuboRpcEndpoint.toString());
|
|
288
302
|
connectableEndpoint.hostname = connectHostname;
|
|
@@ -306,8 +320,15 @@ export default class Daemon extends Command {
|
|
|
306
320
|
}
|
|
307
321
|
throw new Error(`Cannot start IPFS daemon because the IPFS API port ${kuboRpcEndpoint.hostname}:${kuboApiPort} (configured as ${kuboRpcEndpoint.toString()}) is already in use.`);
|
|
308
322
|
}
|
|
323
|
+
let spawnedProcess;
|
|
309
324
|
const startPromise = startKuboNode(kuboRpcEndpoint, ipfsGatewayEndpoint, mergedPkcOptions.dataPath, (process) => {
|
|
325
|
+
spawnedProcess = process;
|
|
310
326
|
kuboProcess = process;
|
|
327
|
+
if (process.pid) {
|
|
328
|
+
const pid = process.pid;
|
|
329
|
+
liveKuboPids.add(pid);
|
|
330
|
+
process.once("exit", () => liveKuboPids.delete(pid));
|
|
331
|
+
}
|
|
311
332
|
});
|
|
312
333
|
pendingKuboStart = startPromise;
|
|
313
334
|
let startedProcess;
|
|
@@ -315,12 +336,15 @@ export default class Daemon extends Command {
|
|
|
315
336
|
startedProcess = await startPromise;
|
|
316
337
|
}
|
|
317
338
|
catch (error) {
|
|
318
|
-
|
|
319
|
-
if (
|
|
339
|
+
// Only clear state this attempt owns — it may track another attempt's healthy kubo (issue #70)
|
|
340
|
+
if (pendingKuboStart === startPromise)
|
|
341
|
+
pendingKuboStart = undefined;
|
|
342
|
+
if (!mainProcessExited && spawnedProcess && kuboProcess === spawnedProcess)
|
|
320
343
|
kuboProcess = undefined;
|
|
321
344
|
throw error;
|
|
322
345
|
}
|
|
323
|
-
pendingKuboStart
|
|
346
|
+
if (pendingKuboStart === startPromise)
|
|
347
|
+
pendingKuboStart = undefined;
|
|
324
348
|
if (mainProcessExited) {
|
|
325
349
|
if (startedProcess?.pid && !startedProcess.killed) {
|
|
326
350
|
// Race condition: Kubo finished starting after mainProcessExited.
|
|
@@ -341,7 +365,8 @@ export default class Daemon extends Command {
|
|
|
341
365
|
/* best effort */
|
|
342
366
|
}
|
|
343
367
|
}
|
|
344
|
-
kuboProcess
|
|
368
|
+
if (kuboProcess === startedProcess)
|
|
369
|
+
kuboProcess = undefined;
|
|
345
370
|
return;
|
|
346
371
|
}
|
|
347
372
|
kuboProcess = startedProcess;
|
|
@@ -353,7 +378,8 @@ export default class Daemon extends Command {
|
|
|
353
378
|
// Restart Kubo process because it failed
|
|
354
379
|
if (!mainProcessExited) {
|
|
355
380
|
log(`Kubo node with pid (${currentProcess?.pid}) exited. Will attempt to restart it`);
|
|
356
|
-
kuboProcess
|
|
381
|
+
if (kuboProcess === currentProcess)
|
|
382
|
+
kuboProcess = undefined;
|
|
357
383
|
try {
|
|
358
384
|
await keepKuboUp();
|
|
359
385
|
}
|
|
@@ -367,6 +393,19 @@ export default class Daemon extends Command {
|
|
|
367
393
|
};
|
|
368
394
|
currentProcess.once("exit", onKuboExit);
|
|
369
395
|
};
|
|
396
|
+
// Single-flight wrapper: keepKuboUp is invoked from independent places (the kubo exit
|
|
397
|
+
// handler and the watchdog interval). Concurrent callers must share one attempt —
|
|
398
|
+
// otherwise both can pass keepKuboUpOnce's re-entrancy guard during its awaits and
|
|
399
|
+
// spawn two kubo processes whose failure handling corrupts shared state (issue #70)
|
|
400
|
+
let keepKuboUpInFlight;
|
|
401
|
+
const keepKuboUp = () => {
|
|
402
|
+
if (!keepKuboUpInFlight) {
|
|
403
|
+
keepKuboUpInFlight = keepKuboUpOnce().finally(() => {
|
|
404
|
+
keepKuboUpInFlight = undefined;
|
|
405
|
+
});
|
|
406
|
+
}
|
|
407
|
+
return keepKuboUpInFlight;
|
|
408
|
+
};
|
|
370
409
|
let startedOwnRpc = false;
|
|
371
410
|
let daemonServer;
|
|
372
411
|
const createOrConnectRpc = async () => {
|
|
@@ -384,7 +423,7 @@ export default class Daemon extends Command {
|
|
|
384
423
|
// Load installed challenge packages before starting the RPC server
|
|
385
424
|
const loadedChallenges = await loadChallengesIntoPKC(mergedPkcOptions.dataPath);
|
|
386
425
|
if (loadedChallenges.length > 0)
|
|
387
|
-
console.log(`Loaded challenge packages: ${loadedChallenges.join(", ")}`);
|
|
426
|
+
console.log(`Loaded challenge packages: ${loadedChallenges.map(formatChallengeNameVersion).join(", ")}`);
|
|
388
427
|
daemonServer = await startDaemonServer(pkcRpcUrl, ipfsGatewayEndpoint, mergedPkcOptions, {
|
|
389
428
|
allowPrivateKeyExport: flags.allowPrivateKeyExport
|
|
390
429
|
});
|
|
@@ -429,14 +468,29 @@ export default class Daemon extends Command {
|
|
|
429
468
|
}
|
|
430
469
|
};
|
|
431
470
|
const killKuboProcess = async () => {
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
471
|
+
// Test hook (issue #70): hold off the kubo teardown for a fixed delay so a test can
|
|
472
|
+
// deterministically reproduce the window where the daemon's RPC port is already free
|
|
473
|
+
// (daemonServer.destroy() runs in parallel) but kubo is still alive and bound. This is
|
|
474
|
+
// exactly the window `update install` must not restart into — see
|
|
475
|
+
// test/cli/update-install-restart-race.test.ts. The daemon process stays alive for the
|
|
476
|
+
// duration because the exit hook awaits killKuboProcess() before exiting.
|
|
477
|
+
const kuboShutdownDelayRaw = process.env["PKC_CLI_TEST_KUBO_SHUTDOWN_DELAY_MS"];
|
|
478
|
+
const kuboShutdownDelay = kuboShutdownDelayRaw ? Number(kuboShutdownDelayRaw) : 0;
|
|
479
|
+
if (Number.isFinite(kuboShutdownDelay) && kuboShutdownDelay > 0)
|
|
480
|
+
await new Promise((resolve) => setTimeout(resolve, kuboShutdownDelay));
|
|
481
|
+
// Wait (bounded) for any in-flight start attempt so we kill the kubo it may still
|
|
482
|
+
// spawn. Both promises settle on all failure paths (issue #70), but a spawned kubo
|
|
483
|
+
// that wedges before "Daemon is ready" without exiting keeps them pending — the
|
|
484
|
+
// bound ensures shutdown still reaches the SIGINT/SIGKILL flow below, which kills
|
|
485
|
+
// it via kuboProcess (set in onSpawn) or the liveKuboPids sweep (PR #71 review).
|
|
486
|
+
const inFlightStarts = [keepKuboUpInFlight, pendingKuboStart]
|
|
487
|
+
.filter((promise) => promise !== undefined)
|
|
488
|
+
.map((promise) => promise.catch(() => { }));
|
|
489
|
+
if (inFlightStarts.length > 0)
|
|
490
|
+
await Promise.race([
|
|
491
|
+
Promise.all(inFlightStarts),
|
|
492
|
+
new Promise((resolve) => setTimeout(resolve, 15_000).unref())
|
|
493
|
+
]);
|
|
440
494
|
if (kuboProcess?.pid && !kuboProcess.killed) {
|
|
441
495
|
const pid = kuboProcess.pid;
|
|
442
496
|
log("Attempting to kill kubo process with pid", pid);
|
|
@@ -466,6 +520,11 @@ export default class Daemon extends Command {
|
|
|
466
520
|
kuboProcess = undefined;
|
|
467
521
|
}
|
|
468
522
|
}
|
|
523
|
+
// Defense in depth: SIGKILL any spawned kubo that slipped out of kuboProcess
|
|
524
|
+
// tracking (e.g. via a state race) so nothing outlives the daemon (issue #70)
|
|
525
|
+
for (const pid of liveKuboPids)
|
|
526
|
+
killKuboProcessGroup(pid, "SIGKILL");
|
|
527
|
+
liveKuboPids.clear();
|
|
469
528
|
};
|
|
470
529
|
asyncExitHook(async () => {
|
|
471
530
|
if (keepKuboUpInterval)
|
|
@@ -489,16 +548,61 @@ export default class Daemon extends Command {
|
|
|
489
548
|
log.error("Error shutting down daemon server", e);
|
|
490
549
|
}
|
|
491
550
|
await kuboKillPromise;
|
|
492
|
-
}, { wait:
|
|
551
|
+
}, { wait: DAEMON_SHUTDOWN_TIMEOUT_MS } // could take two minutes to shut down
|
|
493
552
|
);
|
|
494
553
|
// Emergency cleanup: if the process force-exits (e.g. double Ctrl+C),
|
|
495
|
-
// synchronously SIGKILL kubo's process group. This is a no-op if
|
|
496
|
-
// killKuboProcess() already ran (it
|
|
554
|
+
// synchronously SIGKILL every live kubo's process group. This is a no-op if
|
|
555
|
+
// killKuboProcess() already ran (it clears kuboProcess and liveKuboPids).
|
|
497
556
|
process.on("exit", () => {
|
|
498
557
|
if (kuboProcess?.pid) {
|
|
499
558
|
killKuboProcessGroup(kuboProcess.pid, "SIGKILL");
|
|
500
559
|
}
|
|
560
|
+
for (const pid of liveKuboPids)
|
|
561
|
+
killKuboProcessGroup(pid, "SIGKILL");
|
|
501
562
|
});
|
|
563
|
+
// Persistent signal guard (issue #70): exit-hook registers its SIGINT/SIGTERM handlers
|
|
564
|
+
// with process.once, so its listener vanishes from the listener list the moment a
|
|
565
|
+
// signal is dispatched. signal-exit (loaded by @pkcprotocol/proper-lock-file and other
|
|
566
|
+
// dependencies) re-raises the signal when every remaining listener is its own — which
|
|
567
|
+
// would kill the process while the async exit hook above is still shutting kubo down.
|
|
568
|
+
// A persistent non-signal-exit listener keeps that heuristic from ever firing.
|
|
569
|
+
// A repeated signal force-quits immediately (impatient Ctrl+C): process.exit triggers
|
|
570
|
+
// the emergency "exit" handler above, which SIGKILLs every live kubo.
|
|
571
|
+
let terminationSignalCount = 0;
|
|
572
|
+
for (const signal of ["SIGINT", "SIGTERM"]) {
|
|
573
|
+
process.on(signal, () => {
|
|
574
|
+
terminationSignalCount++;
|
|
575
|
+
if (terminationSignalCount >= 2) {
|
|
576
|
+
log(`Received ${signal} again during shutdown, force-quitting`);
|
|
577
|
+
process.exit(signal === "SIGINT" ? 130 : 143);
|
|
578
|
+
}
|
|
579
|
+
});
|
|
580
|
+
}
|
|
581
|
+
// Test hook (issue #70): simulates a dependency registering a signal-exit handler
|
|
582
|
+
// AFTER the asyncExitHook above — what @pkcprotocol/proper-lock-file (and the
|
|
583
|
+
// signal-exit copies under ink/restore-cursor) do at module load. signal-exit
|
|
584
|
+
// re-raises the signal when every remaining listener belongs to the signal-exit
|
|
585
|
+
// family (`if (listeners.length === count) { ... process.kill(process.pid, s) }`),
|
|
586
|
+
// which kills the daemon while the async exit hook is still cleaning up kubo —
|
|
587
|
+
// exit-hook registers with process.once, so its listener is already gone by then.
|
|
588
|
+
if (process.env["PKC_CLI_TEST_SIMULATE_LATE_SIGNAL_EXIT"]) {
|
|
589
|
+
for (const signal of ["SIGINT", "SIGTERM"]) {
|
|
590
|
+
// Real signal-exit copies only count each other as "family" (via a shared
|
|
591
|
+
// global marker); any other listener makes them defer. Identify family by
|
|
592
|
+
// source: signal-exit's dispatcher carries the "an exit is coming" comment.
|
|
593
|
+
const isSignalExitFamily = (listener) => String(listener).includes("an exit is coming");
|
|
594
|
+
const reRaiser = () => {
|
|
595
|
+
const onlyFamilyLeft = process
|
|
596
|
+
.listeners(signal)
|
|
597
|
+
.every((listener) => listener === reRaiser || isSignalExitFamily(listener));
|
|
598
|
+
if (onlyFamilyLeft) {
|
|
599
|
+
process.removeListener(signal, reRaiser);
|
|
600
|
+
process.kill(process.pid, signal);
|
|
601
|
+
}
|
|
602
|
+
};
|
|
603
|
+
process.on(signal, reRaiser);
|
|
604
|
+
}
|
|
605
|
+
}
|
|
502
606
|
// RPC port was already verified free above (fail-fast); only the kuboRpcClientsOptions branch skips local kubo.
|
|
503
607
|
if (!pkcOptionsFromFlag?.kuboRpcClientsOptions)
|
|
504
608
|
await keepKuboUp();
|
|
@@ -163,6 +163,14 @@ export default class Logs extends Command {
|
|
|
163
163
|
// Follow mode: dump existing content (filtered + tailed) then watch for new data
|
|
164
164
|
let currentLogFile = latestLogFile;
|
|
165
165
|
const existingContent = await fsPromise.readFile(currentLogFile, "utf-8");
|
|
166
|
+
// Anchor the follow offset to exactly the bytes we just read, NOT a separate
|
|
167
|
+
// fsPromise.stat() taken afterwards. A later stat is racy: any append landing between
|
|
168
|
+
// this read and the stat is skipped (position jumps past it) yet was never in the dump
|
|
169
|
+
// above, so follow mode silently drops those lines. Under load that window widens — this
|
|
170
|
+
// was the cause of the intermittent CI failure where an appended line was never surfaced
|
|
171
|
+
// (issue #77). Byte length (not string length) because position indexes bytes in the file.
|
|
172
|
+
let position = Buffer.byteLength(existingContent, "utf-8");
|
|
173
|
+
let pendingBuffer = "";
|
|
166
174
|
const entries = this._parseLogEntries(existingContent);
|
|
167
175
|
const filtered = this._filterEntries(entries, since, until);
|
|
168
176
|
const streamFiltered = streamFilter ? this._filterByStream(filtered, streamFilter) : filtered;
|
|
@@ -170,9 +178,6 @@ export default class Logs extends Command {
|
|
|
170
178
|
const initialOutput = tailed.map((e) => e.lines.join("\n")).join("\n");
|
|
171
179
|
if (initialOutput)
|
|
172
180
|
process.stdout.write(initialOutput + "\n");
|
|
173
|
-
const stat = await fsPromise.stat(currentLogFile);
|
|
174
|
-
let position = stat.size;
|
|
175
|
-
let pendingBuffer = "";
|
|
176
181
|
// Watch for new data by reading directly from `position`. We intentionally do
|
|
177
182
|
// NOT gate on fsPromise.stat().size — on Windows + NTFS, stat() returns a stale
|
|
178
183
|
// size for a short window after another process appends, which causes the gate
|
|
@@ -10,5 +10,11 @@ export default class Install extends Command {
|
|
|
10
10
|
};
|
|
11
11
|
static examples: string[];
|
|
12
12
|
run(): Promise<void>;
|
|
13
|
+
/**
|
|
14
|
+
* Poll until the given PID no longer exists (signal 0 throws ESRCH), or the timeout elapses.
|
|
15
|
+
* Returns true if the process exited, false on timeout. EPERM means the process is still alive
|
|
16
|
+
* but owned by another user, so we keep waiting.
|
|
17
|
+
*/
|
|
18
|
+
private _waitForProcessExit;
|
|
13
19
|
private _restartDaemons;
|
|
14
20
|
}
|
|
@@ -4,7 +4,7 @@ import tcpPortUsed from "tcp-port-used";
|
|
|
4
4
|
import { fetchLatestVersion, installGlobal } from "../../../update/npm-registry.js";
|
|
5
5
|
import { fastInstallGlobal } from "../../../update/fast-update.js";
|
|
6
6
|
import { compareVersions } from "../../../update/semver.js";
|
|
7
|
-
import { getAliveDaemonStates } from "../../../common-utils/daemon-state.js";
|
|
7
|
+
import { getAliveDaemonStates, DAEMON_SHUTDOWN_TIMEOUT_MS } from "../../../common-utils/daemon-state.js";
|
|
8
8
|
export default class Install extends Command {
|
|
9
9
|
static description = "Install a specific version of bitsocial from npm";
|
|
10
10
|
static args = {
|
|
@@ -54,15 +54,17 @@ export default class Install extends Command {
|
|
|
54
54
|
throw e;
|
|
55
55
|
}
|
|
56
56
|
}
|
|
57
|
-
// Wait for
|
|
57
|
+
// Wait for each daemon process to fully exit — NOT just for its RPC port to free.
|
|
58
|
+
// The daemon releases its RPC port (daemonServer.destroy()) before it finishes killing
|
|
59
|
+
// its kubo child, so a port-only wait lets us restart while the old kubo still holds the
|
|
60
|
+
// IPFS API port; the new daemon then dies on startup with "IPFS API port already in use"
|
|
61
|
+
// (issue #70). The daemon's exit hook kills kubo before the process exits, so waiting for
|
|
62
|
+
// the PID to disappear guarantees the kubo port is free before we restart.
|
|
58
63
|
for (const d of aliveDaemons) {
|
|
59
|
-
|
|
60
|
-
const
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
const freed = await tcpPortUsed.waitUntilFree(port, 500, 30000).then(() => true).catch(() => false);
|
|
64
|
-
if (!freed) {
|
|
65
|
-
this.error(`Daemon (PID ${d.pid}) did not shut down within 30 seconds on port ${port}.`, { exit: 1 });
|
|
64
|
+
this.log(`Waiting for daemon (PID ${d.pid}) to exit...`);
|
|
65
|
+
const exited = await this._waitForProcessExit(d.pid, DAEMON_SHUTDOWN_TIMEOUT_MS);
|
|
66
|
+
if (!exited) {
|
|
67
|
+
this.error(`Daemon (PID ${d.pid}) did not shut down within ${DAEMON_SHUTDOWN_TIMEOUT_MS / 1000} seconds.`, { exit: 1 });
|
|
66
68
|
}
|
|
67
69
|
}
|
|
68
70
|
this.log("All daemons stopped.");
|
|
@@ -118,6 +120,33 @@ export default class Install extends Command {
|
|
|
118
120
|
this.log("To see the daemon logs run `bitsocial logs --stdout`");
|
|
119
121
|
}
|
|
120
122
|
}
|
|
123
|
+
/**
|
|
124
|
+
* Poll until the given PID no longer exists (signal 0 throws ESRCH), or the timeout elapses.
|
|
125
|
+
* Returns true if the process exited, false on timeout. EPERM means the process is still alive
|
|
126
|
+
* but owned by another user, so we keep waiting.
|
|
127
|
+
*/
|
|
128
|
+
async _waitForProcessExit(pid, timeoutMs) {
|
|
129
|
+
const deadline = Date.now() + timeoutMs;
|
|
130
|
+
while (Date.now() < deadline) {
|
|
131
|
+
try {
|
|
132
|
+
process.kill(pid, 0);
|
|
133
|
+
}
|
|
134
|
+
catch (e) {
|
|
135
|
+
if (e.code === "ESRCH")
|
|
136
|
+
return true; // no such process — it exited
|
|
137
|
+
}
|
|
138
|
+
await new Promise((resolve) => setTimeout(resolve, 250));
|
|
139
|
+
}
|
|
140
|
+
// Final check so a process that exits in the last interval isn't reported as a timeout
|
|
141
|
+
try {
|
|
142
|
+
process.kill(pid, 0);
|
|
143
|
+
}
|
|
144
|
+
catch (e) {
|
|
145
|
+
if (e.code === "ESRCH")
|
|
146
|
+
return true;
|
|
147
|
+
}
|
|
148
|
+
return false;
|
|
149
|
+
}
|
|
121
150
|
async _restartDaemons(daemons) {
|
|
122
151
|
this.log(`Restarting ${daemons.length} daemon(s)...`);
|
|
123
152
|
for (const d of daemons) {
|
|
@@ -1,8 +1,17 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Maximum time a daemon is allowed to shut down its kubo + RPC server during its
|
|
3
|
+
* async exit hook. The `update install --restart-daemons` orchestrator must wait at
|
|
4
|
+
* least this long for a stopped daemon's PID to disappear before giving up — otherwise
|
|
5
|
+
* a slow-but-valid shutdown (within the daemon's own contract) aborts the update midway.
|
|
6
|
+
*/
|
|
7
|
+
export declare const DAEMON_SHUTDOWN_TIMEOUT_MS = 120000;
|
|
1
8
|
export interface DaemonState {
|
|
2
9
|
pid: number;
|
|
3
10
|
startedAt: string;
|
|
4
11
|
argv: string[];
|
|
5
12
|
pkcRpcUrl: string;
|
|
13
|
+
/** OS-reported process start time, used to detect PID reuse. Absent in legacy state files. */
|
|
14
|
+
procStartTime?: string;
|
|
6
15
|
}
|
|
7
16
|
/** Write a daemon state file atomically (write to .tmp then rename). */
|
|
8
17
|
export declare function writeDaemonState(state: DaemonState): Promise<void>;
|
|
@@ -10,7 +19,7 @@ export declare function writeDaemonState(state: DaemonState): Promise<void>;
|
|
|
10
19
|
export declare function readAllDaemonStates(): Promise<DaemonState[]>;
|
|
11
20
|
/** Delete a specific daemon's state file. Ignores ENOENT. */
|
|
12
21
|
export declare function deleteDaemonState(pid: number): Promise<void>;
|
|
13
|
-
/** Delete state files for dead PIDs from disk. */
|
|
22
|
+
/** Delete state files for dead or reused PIDs from disk. */
|
|
14
23
|
export declare function pruneStaleStates(): Promise<void>;
|
|
15
|
-
/** Read all states, delete stale files (dead PIDs) from disk, return only alive ones. */
|
|
24
|
+
/** Read all states, delete stale files (dead or reused PIDs) from disk, return only alive ones. */
|
|
16
25
|
export declare function getAliveDaemonStates(): Promise<DaemonState[]>;
|
|
@@ -1,12 +1,67 @@
|
|
|
1
1
|
import defaults from "./defaults.js";
|
|
2
2
|
import path from "path";
|
|
3
3
|
import fs from "fs/promises";
|
|
4
|
+
import { execFile } from "child_process";
|
|
5
|
+
import { promisify } from "util";
|
|
6
|
+
const execFileAsync = promisify(execFile);
|
|
4
7
|
const DAEMON_STATES_DIR = path.join(defaults.PKC_DATA_PATH, ".daemon_states");
|
|
8
|
+
/**
|
|
9
|
+
* Maximum time a daemon is allowed to shut down its kubo + RPC server during its
|
|
10
|
+
* async exit hook. The `update install --restart-daemons` orchestrator must wait at
|
|
11
|
+
* least this long for a stopped daemon's PID to disappear before giving up — otherwise
|
|
12
|
+
* a slow-but-valid shutdown (within the daemon's own contract) aborts the update midway.
|
|
13
|
+
*/
|
|
14
|
+
export const DAEMON_SHUTDOWN_TIMEOUT_MS = 120000;
|
|
5
15
|
function stateFilePath(pid) {
|
|
6
16
|
return path.join(DAEMON_STATES_DIR, `${pid}-daemon.state`);
|
|
7
17
|
}
|
|
18
|
+
/**
|
|
19
|
+
* OS-reported start time of a process, used as an identity token: if a state file's PID
|
|
20
|
+
* was reused by an unrelated process, its start time won't match the recorded one.
|
|
21
|
+
* Linux: starttime (field 22) of /proc/<pid>/stat, in clock ticks since boot.
|
|
22
|
+
* Other unix: `ps -o lstart=` output. Returns undefined when it can't be determined.
|
|
23
|
+
*/
|
|
24
|
+
async function getProcessStartTime(pid) {
|
|
25
|
+
try {
|
|
26
|
+
const stat = await fs.readFile(`/proc/${pid}/stat`, "utf-8");
|
|
27
|
+
// comm (field 2) may contain spaces/parens — real fields resume after the last ')'
|
|
28
|
+
const fields = stat.slice(stat.lastIndexOf(")") + 2).split(" ");
|
|
29
|
+
return fields[19]; // field 22 (starttime), offset by the 3 fields before the split
|
|
30
|
+
}
|
|
31
|
+
catch {
|
|
32
|
+
try {
|
|
33
|
+
const { stdout } = await execFileAsync("ps", ["-p", String(pid), "-o", "lstart="]);
|
|
34
|
+
return stdout.trim() || undefined;
|
|
35
|
+
}
|
|
36
|
+
catch {
|
|
37
|
+
return undefined;
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
/** Full command line of a process, or undefined when it can't be determined. */
|
|
42
|
+
async function getProcessCommandLine(pid) {
|
|
43
|
+
try {
|
|
44
|
+
// An empty /proc cmdline is meaningful (kernel thread — not a daemon), so keep it
|
|
45
|
+
const raw = await fs.readFile(`/proc/${pid}/cmdline`, "utf-8");
|
|
46
|
+
return raw.split("\0").join(" ").trim();
|
|
47
|
+
}
|
|
48
|
+
catch {
|
|
49
|
+
try {
|
|
50
|
+
const { stdout } = await execFileAsync("ps", ["-p", String(pid), "-o", "args="]);
|
|
51
|
+
return stdout.trim() || undefined;
|
|
52
|
+
}
|
|
53
|
+
catch {
|
|
54
|
+
return undefined;
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
}
|
|
8
58
|
/** Write a daemon state file atomically (write to .tmp then rename). */
|
|
9
59
|
export async function writeDaemonState(state) {
|
|
60
|
+
if (state.procStartTime === undefined) {
|
|
61
|
+
const procStartTime = await getProcessStartTime(state.pid);
|
|
62
|
+
if (procStartTime !== undefined)
|
|
63
|
+
state = { ...state, procStartTime };
|
|
64
|
+
}
|
|
10
65
|
await fs.mkdir(DAEMON_STATES_DIR, { recursive: true });
|
|
11
66
|
const dest = stateFilePath(state.pid);
|
|
12
67
|
const tmp = dest + ".tmp";
|
|
@@ -60,21 +115,37 @@ function isPidAlive(pid) {
|
|
|
60
115
|
return false; // ESRCH — no such process
|
|
61
116
|
}
|
|
62
117
|
}
|
|
63
|
-
/**
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
118
|
+
/**
|
|
119
|
+
* Check whether the daemon that wrote `state` is still the process running under its PID.
|
|
120
|
+
* A bare liveness check is not enough: a stale state file's PID may have been reused by an
|
|
121
|
+
* unrelated process (e.g. a state file written inside a Docker container whose PID maps to
|
|
122
|
+
* a kernel thread on the host — issue #66).
|
|
123
|
+
*/
|
|
124
|
+
async function isDaemonStateAlive(state) {
|
|
125
|
+
if (!isPidAlive(state.pid))
|
|
126
|
+
return false;
|
|
127
|
+
if (state.procStartTime !== undefined) {
|
|
128
|
+
const current = await getProcessStartTime(state.pid);
|
|
129
|
+
if (current !== undefined)
|
|
130
|
+
return current === state.procStartTime; // mismatch — PID was reused
|
|
131
|
+
return true; // identity undeterminable — fall back to liveness only
|
|
70
132
|
}
|
|
133
|
+
// Legacy state file without procStartTime — heuristic: the command line must reference bitsocial
|
|
134
|
+
const cmdline = await getProcessCommandLine(state.pid);
|
|
135
|
+
if (cmdline === undefined)
|
|
136
|
+
return true; // identity undeterminable — fall back to liveness only
|
|
137
|
+
return cmdline.includes("bitsocial");
|
|
138
|
+
}
|
|
139
|
+
/** Delete state files for dead or reused PIDs from disk. */
|
|
140
|
+
export async function pruneStaleStates() {
|
|
141
|
+
await getAliveDaemonStates();
|
|
71
142
|
}
|
|
72
|
-
/** Read all states, delete stale files (dead PIDs) from disk, return only alive ones. */
|
|
143
|
+
/** Read all states, delete stale files (dead or reused PIDs) from disk, return only alive ones. */
|
|
73
144
|
export async function getAliveDaemonStates() {
|
|
74
145
|
const states = await readAllDaemonStates();
|
|
75
146
|
const alive = [];
|
|
76
147
|
for (const state of states) {
|
|
77
|
-
if (
|
|
148
|
+
if (await isDaemonStateAlive(state)) {
|
|
78
149
|
alive.push(state);
|
|
79
150
|
}
|
|
80
151
|
else {
|
package/dist/ipfs/startIpfs.js
CHANGED
|
@@ -185,53 +185,53 @@ async function ensureIpfsPortsAreAvailable(log, configPath, apiUrl, gatewayUrl)
|
|
|
185
185
|
}
|
|
186
186
|
}
|
|
187
187
|
export async function startKuboNode(apiUrl, gatewayUrl, dataPath, onSpawn) {
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
if (
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
}
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
188
|
+
// Preparation phase runs as plain awaits so any failure rejects the returned promise.
|
|
189
|
+
// It must NOT live inside the new Promise() executor below: an async executor swallows
|
|
190
|
+
// throws as unhandledRejections and the promise never settles, which wedges the daemon's
|
|
191
|
+
// pendingKuboStart tracking and hangs its shutdown (issue #70).
|
|
192
|
+
const log = PKCLogger("bitsocial-cli:ipfs:startKuboNode");
|
|
193
|
+
const ipfsDataPath = process.env["IPFS_PATH"] || path.join(dataPath, ".bitsocial-cli.ipfs");
|
|
194
|
+
await fs.promises.mkdir(ipfsDataPath, { recursive: true });
|
|
195
|
+
const ipfsConfigPath = path.join(ipfsDataPath, "config");
|
|
196
|
+
const kuboExePath = await getKuboExePath();
|
|
197
|
+
const kuboVersion = await getKuboVersion();
|
|
198
|
+
log(`Using Kubo version: ${kuboVersion}`);
|
|
199
|
+
log(`IpfsDataPath (${ipfsDataPath}), kuboExePath (${kuboExePath})`, "kubo ipfs config file", path.join(ipfsDataPath, "config"));
|
|
200
|
+
log("If you would like to change kubo config, please edit the config file at", path.join(ipfsDataPath, "config"));
|
|
201
|
+
const env = { IPFS_PATH: ipfsDataPath, DEBUG_COLORS: "1" };
|
|
202
|
+
let configJustInitialized = false;
|
|
203
|
+
try {
|
|
204
|
+
await _spawnAsync(log, kuboExePath, ["init"], { env, hideWindows: true });
|
|
205
|
+
configJustInitialized = true;
|
|
206
|
+
}
|
|
207
|
+
catch (e) {
|
|
208
|
+
const error = e;
|
|
209
|
+
if (!error?.message?.includes("ipfs configuration file already exists!"))
|
|
210
|
+
throw new Error("Failed to call ipfs init" + error);
|
|
211
|
+
}
|
|
212
|
+
if (configJustInitialized) {
|
|
213
|
+
await _spawnAsync(log, kuboExePath, ["config", "profile", "apply", `server`], {
|
|
214
|
+
env,
|
|
215
|
+
hideWindows: true
|
|
216
|
+
});
|
|
217
|
+
log("Called 'ipfs config profile apply server' successfully");
|
|
218
|
+
await mergeCliDefaultsIntoIpfsConfig(log, ipfsConfigPath, apiUrl, gatewayUrl);
|
|
219
|
+
}
|
|
220
|
+
else {
|
|
221
|
+
log("IPFS config already exists; skipping config overrides to preserve user changes.");
|
|
222
|
+
}
|
|
223
|
+
try {
|
|
224
|
+
await _spawnAsync(log, kuboExePath, ["repo", "migrate"], { env, hideWindows: true });
|
|
225
|
+
log("Ensured IPFS repository is migrated to the latest supported version.");
|
|
226
|
+
}
|
|
227
|
+
catch (migrationError) {
|
|
228
|
+
log.error("Failed to run IPFS repo migrations automatically", migrationError);
|
|
229
|
+
throw migrationError;
|
|
230
|
+
}
|
|
231
|
+
await ensureIpfsPortsAreAvailable(log, ipfsConfigPath, apiUrl, gatewayUrl);
|
|
232
|
+
// Spawn phase: the promise only wraps the event-driven wait for kubo's "Daemon is ready",
|
|
233
|
+
// so every settle path goes through resolve/reject.
|
|
234
|
+
return new Promise((resolve, reject) => {
|
|
235
235
|
const daemonArgs = ["--enable-namesys-pubsub", "--migrate"];
|
|
236
236
|
const kuboProcess = spawn(kuboExePath, ["daemon", ...daemonArgs], {
|
|
237
237
|
env,
|
|
@@ -6,7 +6,7 @@ import fs from "fs/promises";
|
|
|
6
6
|
import { PKCLogger } from "../util.js";
|
|
7
7
|
import { randomBytes } from "crypto";
|
|
8
8
|
import express from "express";
|
|
9
|
-
import { loadChallengesIntoPKC } from "../challenge-packages/challenge-utils.js";
|
|
9
|
+
import { loadChallengesIntoPKC, formatChallengeNameVersion } from "../challenge-packages/challenge-utils.js";
|
|
10
10
|
const rootHashRedirectScriptPattern = /<script\b[^>]*>(?:(?!<\/script>)[\s\S])*?window\.location\.replace\(["']\/#["']\s*\+\s*window\.location\.pathname\s*\+\s*window\.location\.search\);(?:(?!<\/script>)[\s\S])*?<\/script>/;
|
|
11
11
|
async function _generateModifiedIndexHtmlWithRpcSettings(webuiPath, webuiName, ipfsGatewayPort) {
|
|
12
12
|
const indexHtmlString = (await fs.readFile(path.join(webuiPath, "index_backup_no_rpc.html")))
|
|
@@ -124,7 +124,7 @@ export async function startDaemonServer(rpcUrl, ipfsGatewayUrl, pkcOptions, rpcS
|
|
|
124
124
|
// Challenge reload endpoints
|
|
125
125
|
const handleChallengeReload = async (_req, res) => {
|
|
126
126
|
try {
|
|
127
|
-
const
|
|
127
|
+
const loadedChallenges = await loadChallengesIntoPKC(pkcOptions.dataPath);
|
|
128
128
|
// Notify all connected RPC clients about the updated challenges
|
|
129
129
|
const onSettingsChange = rpcServer._onSettingsChange;
|
|
130
130
|
if (onSettingsChange) {
|
|
@@ -139,7 +139,7 @@ export async function startDaemonServer(rpcUrl, ipfsGatewayUrl, pkcOptions, rpcS
|
|
|
139
139
|
}
|
|
140
140
|
}
|
|
141
141
|
}
|
|
142
|
-
res.json({ ok: true, challenges:
|
|
142
|
+
res.json({ ok: true, challenges: loadedChallenges.map(formatChallengeNameVersion) });
|
|
143
143
|
}
|
|
144
144
|
catch (err) {
|
|
145
145
|
log.error("Failed to reload challenges", err);
|