@openparachute/hub 0.6.5-rc.7 → 0.6.5-rc.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@openparachute/hub",
3
- "version": "0.6.5-rc.7",
3
+ "version": "0.6.5-rc.8",
4
4
  "description": "parachute — the local hub for the Parachute ecosystem (discovery, ports, lifecycle, soon OAuth).",
5
5
  "license": "AGPL-3.0",
6
6
  "publishConfig": {
@@ -214,18 +214,23 @@ describe("DbHolder.probePath (#610 proactive detection)", () => {
214
214
  h.cleanup();
215
215
  });
216
216
 
217
- test("path GONE (ENOENT) → reopen attempted; reopen verify fails → exit(1)", () => {
218
- // Reopen returns a closed handle (the dir is still gone) SELECT 1 throws
219
- // exit. This is the genuine `rm -rf ~/.parachute` field shape.
220
- const dead = new Database(":memory:");
221
- dead.close();
217
+ test("path GONE (ENOENT) → exit(1) directly, NO reopen (#619 follow-up)", () => {
218
+ // The genuine `rm -rf ~/.parachute` field shape. We must NOT reopen here:
219
+ // reopen is openHubDb, which mkdir-recursive's the dir back + opens a fresh
220
+ // EMPTY db, so its SELECT-1 verify would PASS and the hub would "heal" into a
221
+ // half-recovered state (empty db, stale in-memory state, wiped well-known,
222
+ // un-respawned modules). A full wipe must exit so the platform manager does a
223
+ // clean restart that re-bootstraps everything. `onReopen` throws to PROVE the
224
+ // reopen path is never taken — if it were, this test would surface the throw.
222
225
  const h = makeHolder({
223
226
  initialInode: INODE_A,
224
227
  statInode: () => undefined, // ENOENT
225
- onReopen: () => dead,
228
+ onReopen: () => {
229
+ throw new Error("reopen must NOT be called on a gone verdict");
230
+ },
226
231
  });
227
232
  expect(h.holder.probePath()).toBe("gone");
228
- expect(h.stats().reopens).toBe(1);
233
+ expect(h.stats().reopens).toBe(0);
229
234
  expect(h.stats().exits).toBe(1);
230
235
  expect(h.stats().exitCode).toBe(1);
231
236
  h.cleanup();
@@ -383,25 +383,41 @@ export function createDbHolder(initial: Database, deps: DbHolderDeps): DbHolder
383
383
  const verdict = classifyPathLiveness({ expected: currentInode, current: pathInode });
384
384
  if (verdict === "ok" || verdict === "unknown") return verdict;
385
385
 
386
- // Genuine wipe signal: the on-disk DB the handle points at is gone
387
- // ("gone") or was replaced underneath us ("replaced"). Trigger the SAME
388
- // reopen-or-exit machinery. When the path is gone, reopen's SELECT-1
389
- // verify fails exit platform manager restarts with a fresh on-disk
390
- // handle (seconds, not "never"). When replaced, we adopt the fresh inode.
386
+ if (verdict === "gone") {
387
+ // The whole state dir was wiped under the running hub (`rm -rf
388
+ // ~/.parachute`). We must NOT reopen-in-place here: `reopen` is
389
+ // `openHubDb`, which `mkdirSync`'s the dir back + opens a fresh EMPTY db,
390
+ // so its SELECT-1 verify would PASS and we'd "heal" into a half-recovered
391
+ // hub — empty db, but stale in-memory state, wiped well-known files, and
392
+ // supervised modules whose own state dirs are gone yet never re-spawned
393
+ // (#619 follow-up). The correct recovery for a full wipe is a clean
394
+ // process exit so the platform manager (systemd / launchd / container)
395
+ // restarts `parachute serve`, which re-bootstraps everything (well-known,
396
+ // admin seed, supervisor re-spawn). This restores the #610 design intent
397
+ // ("we exit, letting the platform manager restart") that the shared
398
+ // reopen-or-exit path silently defeated via openHubDb's mkdir-recursive.
399
+ log(
400
+ `parachute hub: db path ${deps.dbPath} no longer exists (state dir wiped under a running hub, #610); exiting so the platform manager restarts the hub with a freshly bootstrapped state dir.`,
401
+ );
402
+ exit(1);
403
+ return verdict;
404
+ }
405
+
406
+ // "replaced": the db FILE was swapped underneath us (e.g. a restore copied
407
+ // a new file over the same path) while the rest of the state dir is intact.
408
+ // Adopting the fresh inode in-place via reopen-or-exit is correct here — a
409
+ // process restart would be heavier than needed.
391
410
  //
392
- // ONE-TICK /health ANOMALY (intentional): on a "replaced" verdict the
393
- // reopenOrExit below heals SYNCHRONOUSLY, but we still RETURN "replaced"
394
- // for this one call — so the /health request that drove this probe reports
395
- // `db:"error: path-replaced"` even though the handle is now healthy; the
396
- // very next request reads `ok`. We don't mask it (returning "ok" here would
397
- // hide that a heal just happened, which is exactly what monitoring wants to
398
- // see). It's safe because #591's adoption probe checks only HTTP 200
399
- // (`res.ok`), not the specific `db` string, so a single transient error
400
- // string can't cascade.
411
+ // ONE-TICK /health ANOMALY (intentional): the reopenOrExit below heals
412
+ // SYNCHRONOUSLY, but we still RETURN "replaced" for this one call — so the
413
+ // /health request that drove this probe reports `db:"error: path-replaced"`
414
+ // even though the handle is now healthy; the very next request reads `ok`.
415
+ // We don't mask it (returning "ok" here would hide that a heal just
416
+ // happened, which is exactly what monitoring wants to see). It's safe
417
+ // because #591's adoption probe checks only HTTP 200 (`res.ok`), not the
418
+ // specific `db` string, so a single transient error string can't cascade.
401
419
  reopenOrExit(
402
- verdict === "gone"
403
- ? `db path ${deps.dbPath} no longer exists (state dir wiped under a running hub, #610)`
404
- : `db path ${deps.dbPath} now resolves to a different inode (DB file replaced underneath the open handle, #610)`,
420
+ `db path ${deps.dbPath} now resolves to a different inode (DB file replaced underneath the open handle, #610)`,
405
421
  );
406
422
  return verdict;
407
423
  },
package/src/hub-server.ts CHANGED
@@ -1627,8 +1627,11 @@ export function hubFetch(
1627
1627
  // succeeding, so `probeDbLiveness` alone would report `db:"ok"` on a
1628
1628
  // database that's gone from disk (the /health lie the issue calls
1629
1629
  // out). `probeDbPath` stat()s the path + compares inodes; on a
1630
- // gone/replaced verdict it ALSO self-heals (reopen-or-exit) and we
1631
- // surface the fault so the #591 adoption probe + monitoring see it.
1630
+ // "replaced" verdict it self-heals in-place (reopen-or-exit, adopt
1631
+ // the new inode); on a "gone" verdict it exits the process directly
1632
+ // (#621 — a full wipe needs a clean platform-manager restart, not an
1633
+ // empty-db reopen). Either way we surface the fault so the #591
1634
+ // adoption probe + monitoring see it.
1632
1635
  const pathVerdict = deps?.probeDbPath?.();
1633
1636
  if (pathVerdict === "gone" || pathVerdict === "replaced") {
1634
1637
  // One-request anomaly on "replaced": probeDbPath already healed the