rollbridge 0.1.5 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,8 @@
1
1
  // @ts-check
2
2
 
3
3
  import assert from "node:assert/strict"
4
+ import {spawn} from "node:child_process"
5
+ import {once} from "node:events"
4
6
  import fs from "node:fs/promises"
5
7
  import net from "node:net"
6
8
  import os from "node:os"
@@ -10,15 +12,51 @@ import {fileURLToPath} from "node:url"
10
12
  import RollbridgeDaemon from "../src/daemon.js"
11
13
  import {normalizeConfig} from "../src/config.js"
12
14
  import {sendControlCommand} from "../src/control-client.js"
15
+ import {readState, writeState} from "../src/state-store.js"
13
16
  import {runCli} from "../src/cli.js"
14
17
 
15
18
  const currentDir = path.dirname(fileURLToPath(import.meta.url))
16
19
  const binPath = path.join(currentDir, "..", "bin", "rollbridge")
17
20
  const dependentAppPath = path.join(currentDir, "fixtures", "dependent-app.js")
18
21
  const dummyAppPath = path.join(currentDir, "fixtures", "dummy-app.js")
22
+ const memoryHogPath = path.join(currentDir, "fixtures", "memory-hog.js")
19
23
  const serviceAppPath = path.join(currentDir, "fixtures", "service-app.js")
20
24
  const singletonAppPath = path.join(currentDir, "fixtures", "singleton-app.js")
21
25
 
26
+ test("a nonBlockingDrain worker stops immediately while its release is still draining", async () => {
27
+ const fixture = await createFixture({nonBlockingDrainWorker: true})
28
+ const daemon = await startDaemon(fixture.config)
29
+ /** @type {WebSocket | undefined} */
30
+ let socket
31
+
32
+ try {
33
+ await daemon.deploy({releaseId: "v1", releasePath: fixture.root, revision: "v1"})
34
+
35
+ // An open WebSocket keeps v1's connection drain pending after v2 takes over.
36
+ socket = await openWebSocket(daemon)
37
+ await daemon.deploy({releaseId: "v2", releasePath: fixture.root, revision: "v2"})
38
+
39
+ // The nonBlockingDrain worker is stopped right away, in parallel with the connection drain.
40
+ await waitFor(() => {
41
+ const draining = daemon.status().releases.find((release) => release.releaseId === "v1")
42
+
43
+ return draining?.processes.find((processStatus) => processStatus.id === "worker")?.state === "stopped"
44
+ })
45
+
46
+ const v1 = statusRelease(daemon, "v1")
47
+
48
+ // The release is still draining (the WebSocket is held) and its proxied process is still
49
+ // serving, but the worker has already drained.
50
+ assert.equal(v1.state, "draining")
51
+ assert.equal(v1.processes.find((processStatus) => processStatus.id === "web")?.state, "running")
52
+ assert.equal(v1.processes.find((processStatus) => processStatus.id === "worker")?.state, "stopped")
53
+ } finally {
54
+ if (socket) socket.close()
55
+ await daemon.shutdown()
56
+ await fs.rm(fixture.root, {force: true, recursive: true})
57
+ }
58
+ })
59
+
22
60
  test("deploy switches new HTTP traffic while old WebSockets drain", async () => {
23
61
  const fixture = await createFixture()
24
62
  const daemon = await startDaemon(fixture.config)
@@ -205,6 +243,36 @@ test("service processes start before releases and restart with the latest deploy
205
243
  }
206
244
  })
207
245
 
246
+ test("a replicated companion starts one instance per replica, and restart targets one or all", async () => {
247
+ const fixture = await createFixture({companionReplicas: 3})
248
+ const daemon = await startDaemon(fixture.config)
249
+
250
+ try {
251
+ await daemon.deploy({releaseId: "v1", releasePath: fixture.root, revision: "v1"})
252
+
253
+ const release = daemon.status().releases.find((candidate) => candidate.state === "active")
254
+
255
+ assert.ok(release)
256
+
257
+ const workerIds = release.processes.filter((processStatus) => processStatus.id.startsWith("worker")).map((processStatus) => processStatus.id).sort()
258
+
259
+ assert.deepEqual(workerIds, ["worker#0", "worker#1", "worker#2"])
260
+
261
+ // A specific replica id restarts only that replica.
262
+ const one = await daemon.restartProcesses({processId: "worker#1"})
263
+
264
+ assert.deepEqual(one.restarted, ["worker#1"])
265
+
266
+ // The base id restarts every replica.
267
+ const all = /** @type {string[]} */ ((await daemon.restartProcesses({processId: "worker"})).restarted)
268
+
269
+ assert.deepEqual([...all].sort(), ["worker#0", "worker#1", "worker#2"])
270
+ } finally {
271
+ await daemon.shutdown()
272
+ await fs.rm(fixture.root, {force: true, recursive: true})
273
+ }
274
+ })
275
+
208
276
  test("restart bounces a single process by id", async () => {
209
277
  const fixture = await createFixture({includeService: true})
210
278
  const daemon = await startDaemon(fixture.config)
@@ -336,6 +404,383 @@ test("the restart control command bounces a process over the socket", async () =
336
404
  }
337
405
  })
338
406
 
407
+ test("status and events distinguish deploy starts from manual restarts", async () => {
408
+ const fixture = await createFixture({includeService: true})
409
+ const daemon = await startDaemon(fixture.config)
410
+
411
+ try {
412
+ await daemon.deploy({releaseId: "v1", releasePath: fixture.root, revision: "v1"})
413
+
414
+ const afterDeploy = daemon.status().services.find((service) => service.id === "beacon")
415
+
416
+ assert.ok(afterDeploy)
417
+ assert.equal(afterDeploy.process.lastStartReason, "deploy")
418
+
419
+ await daemon.restartProcesses({processId: "beacon"})
420
+
421
+ const afterRestart = daemon.status().services.find((service) => service.id === "beacon")
422
+
423
+ assert.ok(afterRestart)
424
+ assert.equal(afterRestart.process.lastStartReason, "manual")
425
+
426
+ const events = /** @type {import("../src/event-log.js").DaemonEvent[]} */ ((await sendControlCommand({
427
+ command: {command: "events"},
428
+ path: fixture.config.control.path
429
+ })).events)
430
+ const startReasons = events.filter((event) => event.message === "process started").map((event) => event.data.reason)
431
+
432
+ assert.ok(startReasons.includes("deploy"), JSON.stringify(startReasons))
433
+ assert.ok(startReasons.includes("manual"), JSON.stringify(startReasons))
434
+ } finally {
435
+ await daemon.shutdown()
436
+ await fs.rm(fixture.root, {force: true, recursive: true})
437
+ }
438
+ })
439
+
440
+ test("persists daemon state to statePath and removes it on a clean shutdown", async () => {
441
+ const fixture = await createFixture({persistState: true})
442
+ const daemon = await startDaemon(fixture.config)
443
+ let stateAfterShutdown
444
+
445
+ try {
446
+ await daemon.deploy({releaseId: "v1", releasePath: fixture.root, revision: "v1"})
447
+
448
+ // The state write is fire-and-forget (deploy doesn't block on it), so wait for it to land.
449
+ await waitFor(async () => {
450
+ const persisted = /** @type {{activeReleaseId: string} | undefined} */ (await readState(fixture.statePath))
451
+
452
+ return persisted?.activeReleaseId === "v1"
453
+ })
454
+
455
+ await daemon.shutdown()
456
+ stateAfterShutdown = await readState(fixture.statePath)
457
+ } finally {
458
+ if (!daemon.stopping) await daemon.shutdown()
459
+ await fs.rm(fixture.root, {force: true, recursive: true})
460
+ }
461
+
462
+ assert.equal(stateAfterShutdown, undefined, "state file removed on clean shutdown")
463
+ })
464
+
465
+ test("a clean shutdown clears the state file even when a persist write is in flight", async () => {
466
+ const fixture = await createFixture({persistState: true})
467
+ const daemon = await startDaemon(fixture.config)
468
+
469
+ try {
470
+ await daemon.deploy({releaseId: "v1", releasePath: fixture.root, revision: "v1"})
471
+
472
+ // Shut down immediately — the deploy's fire-and-forget persist may still be in flight.
473
+ await daemon.shutdown()
474
+
475
+ assert.equal(await readState(fixture.statePath), undefined, "state file must not be recreated by an in-flight write")
476
+ } finally {
477
+ if (!daemon.stopping) await daemon.shutdown()
478
+ await fs.rm(fixture.root, {force: true, recursive: true})
479
+ }
480
+ })
481
+
482
+ test("reports orphaned managed processes from a previous daemon's state", async () => {
483
+ const dir = await fs.mkdtemp(path.join(os.tmpdir(), "rollbridge-test-"))
484
+ const statePath = path.join(dir, "state.json")
485
+ // A live process standing in for a leftover managed child from a crashed daemon.
486
+ const leftover = spawn(process.execPath, ["-e", "setInterval(() => {}, 1000)"], {stdio: "ignore"})
487
+
488
+ await once(leftover, "spawn")
489
+
490
+ /** @type {{data: Record<string, import("../src/json.js").JsonValue>, message: string}[]} */
491
+ const logs = []
492
+ const config = normalizeConfig({
493
+ application: "rollbridge-test",
494
+ control: {path: path.join(dir, "rollbridge.sock")},
495
+ processes: [{command: "true", id: "web", policy: "proxied", port: {from: 0, to: 0}}],
496
+ proxy: {host: "127.0.0.1", port: 0},
497
+ statePath
498
+ })
499
+ const daemon = new RollbridgeDaemon({config, logger: (message, data = {}) => { logs.push({data, message}) }})
500
+
501
+ try {
502
+ // A prior daemon left a worker with this (still-alive) pid.
503
+ await writeState(statePath, {
504
+ activeReleaseId: "v1",
505
+ releases: [{processes: [{id: "worker", pid: leftover.pid}], releaseId: "v1"}],
506
+ services: [],
507
+ singletons: []
508
+ })
509
+
510
+ await daemon.reportOrphans()
511
+
512
+ assert.ok(logs.some((entry) => entry.message === "orphaned managed process detected" && entry.data.pid === leftover.pid), JSON.stringify(logs))
513
+
514
+ // A dead pid is not reported.
515
+ logs.length = 0
516
+ await writeState(statePath, {
517
+ activeReleaseId: "v1",
518
+ releases: [{processes: [{id: "worker", pid: 2147483646}], releaseId: "v1"}],
519
+ services: [],
520
+ singletons: []
521
+ })
522
+ await daemon.reportOrphans()
523
+
524
+ assert.ok(!logs.some((entry) => entry.message === "orphaned managed process detected"))
525
+ } finally {
526
+ leftover.kill("SIGKILL")
527
+ await fs.rm(dir, {force: true, recursive: true})
528
+ }
529
+ })
530
+
531
+ test("status surfaces still-alive orphaned processes from a previous daemon and drops them once gone", async () => {
532
+ const dir = await fs.mkdtemp(path.join(os.tmpdir(), "rollbridge-test-"))
533
+ const statePath = path.join(dir, "state.json")
534
+ const leftover = spawn(process.execPath, ["-e", "setInterval(() => {}, 1000)"], {stdio: "ignore"})
535
+
536
+ await once(leftover, "spawn")
537
+
538
+ const config = normalizeConfig({
539
+ application: "rollbridge-test",
540
+ control: {path: path.join(dir, "rollbridge.sock")},
541
+ processes: [{command: "true", id: "web", policy: "proxied", port: {from: 0, to: 0}}],
542
+ proxy: {host: "127.0.0.1", port: 0},
543
+ statePath
544
+ })
545
+ const daemon = new RollbridgeDaemon({config, logger: () => {}})
546
+
547
+ try {
548
+ // A prior daemon left a worker with this (still-alive) pid.
549
+ await writeState(statePath, {
550
+ activeReleaseId: "v1",
551
+ releases: [{processes: [{id: "worker", pid: leftover.pid}], releaseId: "v1"}],
552
+ services: [],
553
+ singletons: []
554
+ })
555
+
556
+ await daemon.reportOrphans()
557
+
558
+ // status reflects the still-running child even though the daemon cannot re-manage it.
559
+ assert.deepEqual(daemon.status().orphans, [{id: "worker", pid: leftover.pid, releaseId: "v1"}])
560
+
561
+ // Once the leftover is stopped, status re-checks liveness and drops it.
562
+ leftover.kill("SIGKILL")
563
+ await waitFor(() => daemon.status().orphans.length === 0)
564
+ assert.deepEqual(daemon.status().orphans, [])
565
+
566
+ // The dead entry is pruned from the underlying list, not merely filtered, so a recycled pid
567
+ // can't resurrect a cleared orphan.
568
+ assert.deepEqual(daemon.orphans, [])
569
+ } finally {
570
+ leftover.kill("SIGKILL")
571
+ await fs.rm(dir, {force: true, recursive: true})
572
+ }
573
+ })
574
+
575
+ test("the daemon records a structured event history served by the events command", async () => {
576
+ const fixture = await createFixture()
577
+ const daemon = await startDaemon(fixture.config)
578
+
579
+ try {
580
+ await daemon.deploy({releaseId: "v1", releasePath: fixture.root, revision: "v1"})
581
+
582
+ const response = await sendControlCommand({
583
+ command: {command: "events"},
584
+ path: fixture.config.control.path
585
+ })
586
+ const events = /** @type {import("../src/event-log.js").DaemonEvent[]} */ (response.events)
587
+ const messages = events.map((event) => event.message)
588
+
589
+ assert.ok(messages.includes("deploy starting"), JSON.stringify(messages))
590
+ assert.ok(messages.includes("traffic switched"), JSON.stringify(messages))
591
+
592
+ const switched = events.find((event) => event.message === "traffic switched")
593
+
594
+ assert.ok(switched)
595
+ assert.equal(switched.data.releaseId, "v1")
596
+ assert.match(switched.at, /^\d{4}-\d{2}-\d{2}T.*Z$/)
597
+ } finally {
598
+ await daemon.shutdown()
599
+ await fs.rm(fixture.root, {force: true, recursive: true})
600
+ }
601
+ })
602
+
603
+ test("the events command honors --limit and records failed commands", async () => {
604
+ const fixture = await createFixture()
605
+ const daemon = await startDaemon(fixture.config)
606
+
607
+ try {
608
+ await daemon.deploy({releaseId: "v1", releasePath: fixture.root, revision: "v1"})
609
+
610
+ // An unknown command is rejected and recorded as a "command failed" event.
611
+ await assert.rejects(() => sendControlCommand({
612
+ command: {command: "bogus"},
613
+ path: fixture.config.control.path
614
+ }))
615
+
616
+ const all = /** @type {import("../src/event-log.js").DaemonEvent[]} */ ((await sendControlCommand({
617
+ command: {command: "events"},
618
+ path: fixture.config.control.path
619
+ })).events)
620
+
621
+ assert.ok(all.some((event) => event.message === "command failed"))
622
+
623
+ const limited = /** @type {import("../src/event-log.js").DaemonEvent[]} */ ((await sendControlCommand({
624
+ command: {command: "events", limit: 1},
625
+ path: fixture.config.control.path
626
+ })).events)
627
+
628
+ assert.equal(limited.length, 1)
629
+ assert.deepEqual(limited[0], all[all.length - 1])
630
+ } finally {
631
+ await daemon.shutdown()
632
+ await fs.rm(fixture.root, {force: true, recursive: true})
633
+ }
634
+ })
635
+
636
+ test("a process over its memory limit is restarted with reason memory", {skip: process.platform !== "linux" && "requires /proc (Linux)"}, async () => {
637
+ const limitBytes = 64 * 1024 * 1024
638
+ const fixture = await createFixture({memoryLimitBytes: limitBytes})
639
+ const daemon = await startDaemon(fixture.config)
640
+
641
+ try {
642
+ await daemon.deploy({releaseId: "v1", releasePath: fixture.root, revision: "v1"})
643
+
644
+ // The hog allocates ~4x the limit, so the monitor restarts it.
645
+ await waitFor(() => (activeProcessStatus(daemon, "hog")?.memoryRestarts ?? 0) >= 1, 10000)
646
+
647
+ const hog = activeProcessStatus(daemon, "hog")
648
+
649
+ assert.ok(hog, "hog process should be present")
650
+ assert.ok(hog.memoryRestarts >= 1, `expected a memory restart, got ${hog.memoryRestarts}`)
651
+ assert.equal(hog.lastStartReason, "memory")
652
+ assert.equal(typeof hog.lastMemoryRestartAt, "string")
653
+
654
+ // rssBytes is sampled on the monitor's interval; wait for a measurement of the running process.
655
+ await waitFor(() => {
656
+ const rssBytes = activeProcessStatus(daemon, "hog")?.rssBytes
657
+
658
+ return typeof rssBytes === "number" && rssBytes > 0
659
+ }, 5000)
660
+
661
+ // The same monitor sample reports the process tree.
662
+ const monitored = activeProcessStatus(daemon, "hog")
663
+
664
+ assert.ok(monitored)
665
+ assert.ok(monitored.children.length >= 1, "status should include the process tree")
666
+ assert.ok(monitored.children.some((child) => typeof child.rssBytes === "number" && child.rssBytes > 0))
667
+ } finally {
668
+ await daemon.shutdown()
669
+ await fs.rm(fixture.root, {force: true, recursive: true})
670
+ }
671
+ })
672
+
673
+ test("rollback re-activates the previous release and switches traffic back", async () => {
674
+ const fixture = await createFixture()
675
+ const daemon = await startDaemon(fixture.config)
676
+
677
+ try {
678
+ await daemon.deploy({releaseId: "v1", releasePath: fixture.root, revision: "v1"})
679
+ await daemon.deploy({releaseId: "v2", releasePath: fixture.root, revision: "v2"})
680
+
681
+ assert.equal(await fetchText(daemon, "/release"), "v2")
682
+
683
+ const result = await daemon.rollback()
684
+
685
+ assert.equal(result.activeReleaseId, "v1")
686
+ assert.equal(result.previousReleaseId, "v2")
687
+ assert.equal(daemon.status().activeReleaseId, "v1")
688
+ assert.equal(await fetchText(daemon, "/release"), "v1")
689
+ } finally {
690
+ await daemon.shutdown()
691
+ await fs.rm(fixture.root, {force: true, recursive: true})
692
+ }
693
+ })
694
+
695
+ test("rollback --release-id targets a specific retained release", async () => {
696
+ const fixture = await createFixture()
697
+ const daemon = await startDaemon(fixture.config)
698
+
699
+ try {
700
+ await daemon.deploy({releaseId: "v1", releasePath: fixture.root, revision: "v1"})
701
+ await daemon.deploy({releaseId: "v2", releasePath: fixture.root, revision: "v2"})
702
+ await daemon.deploy({releaseId: "v3", releasePath: fixture.root, revision: "v3"})
703
+
704
+ const result = await daemon.rollback({releaseId: "v1"})
705
+
706
+ assert.equal(result.activeReleaseId, "v1")
707
+ assert.equal(await fetchText(daemon, "/release"), "v1")
708
+ } finally {
709
+ await daemon.shutdown()
710
+ await fs.rm(fixture.root, {force: true, recursive: true})
711
+ }
712
+ })
713
+
714
+ test("rollback rejects no-previous, unknown, and already-active targets", async () => {
715
+ const fixture = await createFixture()
716
+ const daemon = await startDaemon(fixture.config)
717
+
718
+ try {
719
+ await daemon.deploy({releaseId: "v1", releasePath: fixture.root, revision: "v1"})
720
+
721
+ await assert.rejects(() => daemon.rollback(), /No previous release/)
722
+ await assert.rejects(() => daemon.rollback({releaseId: "v1"}), /already active/)
723
+ await assert.rejects(() => daemon.rollback({releaseId: "nope"}), /No retained release "nope"/)
724
+ } finally {
725
+ await daemon.shutdown()
726
+ await fs.rm(fixture.root, {force: true, recursive: true})
727
+ }
728
+ })
729
+
730
+ test("rollback to a still-draining release stops the old instance instead of orphaning it", async () => {
731
+ const fixture = await createFixture()
732
+ const daemon = await startDaemon(fixture.config)
733
+ /** @type {WebSocket | undefined} */
734
+ let socket
735
+
736
+ try {
737
+ await daemon.deploy({releaseId: "v1", releasePath: fixture.root, revision: "v1"})
738
+
739
+ // An open WebSocket keeps v1's connection count > 0, so it stays draining after v2.
740
+ socket = await openWebSocket(daemon)
741
+ await daemon.deploy({releaseId: "v2", releasePath: fixture.root, revision: "v2"})
742
+
743
+ const draining = statusRelease(daemon, "v1")
744
+
745
+ assert.equal(draining.state, "draining")
746
+
747
+ const oldWebPid = draining.processes.find((processStatus) => processStatus.id === "web")?.pid
748
+
749
+ assert.ok(oldWebPid, "the draining release should have a running web process")
750
+
751
+ await daemon.rollback({releaseId: "v1"})
752
+
753
+ assert.equal(daemon.status().activeReleaseId, "v1")
754
+ // The old draining instance was stopped before its id was reused, so its process is gone.
755
+ assert.throws(() => process.kill(/** @type {number} */ (oldWebPid), 0), /ESRCH/)
756
+ } finally {
757
+ if (socket) socket.close()
758
+ await daemon.shutdown()
759
+ await fs.rm(fixture.root, {force: true, recursive: true})
760
+ }
761
+ })
762
+
763
+ test("the rollback control command switches traffic over the socket", async () => {
764
+ const fixture = await createFixture()
765
+ const daemon = await startDaemon(fixture.config)
766
+
767
+ try {
768
+ await daemon.deploy({releaseId: "v1", releasePath: fixture.root, revision: "v1"})
769
+ await daemon.deploy({releaseId: "v2", releasePath: fixture.root, revision: "v2"})
770
+
771
+ const response = await sendControlCommand({
772
+ command: {command: "rollback"},
773
+ path: fixture.config.control.path
774
+ })
775
+
776
+ assert.equal(response.activeReleaseId, "v1")
777
+ assert.equal(await fetchText(daemon, "/release"), "v1")
778
+ } finally {
779
+ await daemon.shutdown()
780
+ await fs.rm(fixture.root, {force: true, recursive: true})
781
+ }
782
+ })
783
+
339
784
  test("control socket accepts deploy and status commands", async () => {
340
785
  const fixture = await createFixture()
341
786
  const daemon = await startDaemon(fixture.config)
@@ -394,6 +839,34 @@ test("starting a second daemon on a live control socket reports the running daem
394
839
  }
395
840
  })
396
841
 
842
+ test("the daemon applies control.owner and control.group to the bound socket", {skip: process.platform !== "linux" && "requires POSIX chown"}, async () => {
843
+ const root = await fs.mkdtemp(path.join(os.tmpdir(), "rollbridge-test-"))
844
+ const socketPath = path.join(root, "rollbridge.sock")
845
+ const {uid, username} = os.userInfo()
846
+ const gid = process.getgid?.() ?? 0
847
+ const config = normalizeConfig({
848
+ application: "rollbridge-test",
849
+ // owner by name (resolved to the current uid); group by numeric id. Both are the current
850
+ // user's, so a non-root daemon can chown the socket to itself.
851
+ control: {group: gid, owner: username, path: socketPath},
852
+ processes: [{command: "true", id: "web", policy: "proxied", port: {from: 0, to: 0}}],
853
+ proxy: {host: "127.0.0.1", port: 0}
854
+ })
855
+ const daemon = new RollbridgeDaemon({config, logger: () => {}})
856
+
857
+ try {
858
+ await daemon.start()
859
+
860
+ const stats = await fs.stat(socketPath)
861
+
862
+ assert.equal(stats.uid, uid)
863
+ assert.equal(stats.gid, gid)
864
+ } finally {
865
+ await daemon.shutdown()
866
+ await fs.rm(root, {force: true, recursive: true})
867
+ }
868
+ })
869
+
397
870
  test("a control socket held by a non-Rollbridge process reports a generic conflict", async () => {
398
871
  const root = await fs.mkdtemp(path.join(os.tmpdir(), "rollbridge-test-"))
399
872
  const socketPath = path.join(root, "busy.sock")
@@ -499,13 +972,14 @@ test("deploy can ensure the daemon before sending the release command", async ()
499
972
  })
500
973
 
501
974
  /**
502
- * @param {{includeCompanion?: boolean, includeService?: boolean, includeSingleton?: boolean, proxyHost?: string, singletonCwd?: string, webCommand?: string, webDependsOnService?: boolean, webHealthTimeoutMs?: number}} [options] - Fixture options.
503
- * @returns {Promise<{config: import("../src/config.js").RollbridgeConfig, root: string, serviceLogPath: string, singletonLogPath: string}>} Fixture data.
975
+ * @param {{companionReplicas?: number, includeCompanion?: boolean, includeService?: boolean, includeSingleton?: boolean, memoryLimitBytes?: number, nonBlockingDrainWorker?: boolean, persistState?: boolean, proxyHost?: string, singletonCwd?: string, webCommand?: string, webDependsOnService?: boolean, webHealthTimeoutMs?: number}} [options] - Fixture options.
976
+ * @returns {Promise<{config: import("../src/config.js").RollbridgeConfig, root: string, serviceLogPath: string, singletonLogPath: string, statePath: string}>} Fixture data.
504
977
  */
505
978
  async function createFixture(options = {}) {
506
979
  const root = await fs.mkdtemp(path.join(os.tmpdir(), "rollbridge-test-"))
507
980
  const serviceLogPath = path.join(root, "service.log")
508
981
  const singletonLogPath = path.join(root, "singleton.log")
982
+ const statePath = path.join(root, "rollbridge.state.json")
509
983
  /** @type {Array<Record<string, import("../src/json.js").JsonValue>>} */
510
984
  const processes = []
511
985
 
@@ -530,6 +1004,36 @@ async function createFixture(options = {}) {
530
1004
  })
531
1005
  }
532
1006
 
1007
+ if (options.companionReplicas) {
1008
+ processes.push({
1009
+ command: `${JSON.stringify(process.execPath)} -e ${JSON.stringify("setInterval(() => {}, 1000)")}`,
1010
+ id: "worker",
1011
+ policy: "companion",
1012
+ replicas: options.companionReplicas
1013
+ })
1014
+ }
1015
+
1016
+ if (options.nonBlockingDrainWorker) {
1017
+ processes.push({
1018
+ command: `${JSON.stringify(process.execPath)} -e ${JSON.stringify("setInterval(() => {}, 1000)")}`,
1019
+ id: "worker",
1020
+ nonBlockingDrain: true,
1021
+ policy: "companion"
1022
+ })
1023
+ }
1024
+
1025
+ if (options.memoryLimitBytes) {
1026
+ processes.push({
1027
+ command: `${JSON.stringify(process.execPath)} ${JSON.stringify(memoryHogPath)}`,
1028
+ env: {
1029
+ ROLLBRIDGE_HOG_BYTES: String(options.memoryLimitBytes * 4)
1030
+ },
1031
+ id: "hog",
1032
+ memory: {checkIntervalMs: 100, limitBytes: options.memoryLimitBytes, warnBytes: 0},
1033
+ policy: "companion"
1034
+ })
1035
+ }
1036
+
533
1037
  processes.push({
534
1038
  command: options.webCommand || (options.webDependsOnService
535
1039
  ? `${JSON.stringify(process.execPath)} ${JSON.stringify(dependentAppPath)}`
@@ -569,10 +1073,11 @@ async function createFixture(options = {}) {
569
1073
  healthTimeoutMs: 3000,
570
1074
  host: options.proxyHost || "127.0.0.1",
571
1075
  port: 0
572
- }
1076
+ },
1077
+ ...(options.persistState ? {statePath} : {})
573
1078
  })
574
1079
 
575
- return {config, root, serviceLogPath, singletonLogPath}
1080
+ return {config, root, serviceLogPath, singletonLogPath, statePath}
576
1081
  }
577
1082
 
578
1083
  /**
@@ -697,10 +1202,11 @@ async function writeConfigFile(config, root) {
697
1202
 
698
1203
  /**
699
1204
  * @param {() => Promise<boolean> | boolean} callback - Probe callback.
1205
+ * @param {number} [timeoutMs] - How long to wait before giving up (default 3000).
700
1206
  * @returns {Promise<void>} Resolves when callback returns true.
701
1207
  */
702
- async function waitFor(callback) {
703
- const deadline = Date.now() + 3000
1208
+ async function waitFor(callback, timeoutMs = 3000) {
1209
+ const deadline = Date.now() + timeoutMs
704
1210
 
705
1211
  while (Date.now() < deadline) {
706
1212
  if (await callback()) return
@@ -709,3 +1215,14 @@ async function waitFor(callback) {
709
1215
 
710
1216
  throw new Error("Timed out waiting for condition")
711
1217
  }
1218
+
1219
+ /**
1220
+ * @param {RollbridgeDaemon} daemon - Daemon.
1221
+ * @param {string} processId - Process id within the active release.
1222
+ * @returns {import("../src/managed-process.js").ManagedProcessStatus | undefined} The process status, if present.
1223
+ */
1224
+ function activeProcessStatus(daemon, processId) {
1225
+ const release = daemon.status().releases.find((candidate) => candidate.state === "active")
1226
+
1227
+ return release ? release.processes.find((processStatus) => processStatus.id === processId) : undefined
1228
+ }
@@ -0,0 +1,69 @@
1
+ // @ts-check
2
+
3
+ import assert from "node:assert/strict"
4
+ import fs from "node:fs/promises"
5
+ import os from "node:os"
6
+ import path from "node:path"
7
+ import test from "node:test"
8
+ import {clearState, readState, writeState} from "../src/state-store.js"
9
+
10
+ test("writeState then readState round-trips a snapshot", async () => {
11
+ const dir = await fs.mkdtemp(path.join(os.tmpdir(), "rollbridge-state-"))
12
+ const statePath = path.join(dir, "state.json")
13
+
14
+ try {
15
+ await writeState(statePath, {activeReleaseId: "v1", releases: [{releaseId: "v1"}]})
16
+
17
+ const state = /** @type {{activeReleaseId: string}} */ (await readState(statePath))
18
+
19
+ assert.equal(state.activeReleaseId, "v1")
20
+ } finally {
21
+ await fs.rm(dir, {force: true, recursive: true})
22
+ }
23
+ })
24
+
25
+ test("readState returns undefined for a missing or unparseable file", async () => {
26
+ const dir = await fs.mkdtemp(path.join(os.tmpdir(), "rollbridge-state-"))
27
+ const statePath = path.join(dir, "state.json")
28
+
29
+ try {
30
+ assert.equal(await readState(statePath), undefined)
31
+
32
+ await fs.writeFile(statePath, "{not json")
33
+
34
+ assert.equal(await readState(statePath), undefined)
35
+ } finally {
36
+ await fs.rm(dir, {force: true, recursive: true})
37
+ }
38
+ })
39
+
40
+ test("concurrent writes leave a complete, uncorrupted snapshot", async () => {
41
+ const dir = await fs.mkdtemp(path.join(os.tmpdir(), "rollbridge-state-"))
42
+ const statePath = path.join(dir, "state.json")
43
+
44
+ try {
45
+ await Promise.all([writeState(statePath, {n: 1}), writeState(statePath, {n: 2}), writeState(statePath, {n: 3})])
46
+
47
+ const state = /** @type {{n: number}} */ (await readState(statePath))
48
+
49
+ // A complete snapshot from one of the writers — never a partial/corrupt file or a temp race.
50
+ assert.ok(state && typeof state.n === "number")
51
+ } finally {
52
+ await fs.rm(dir, {force: true, recursive: true})
53
+ }
54
+ })
55
+
56
+ test("clearState removes the file and ignores a missing one", async () => {
57
+ const dir = await fs.mkdtemp(path.join(os.tmpdir(), "rollbridge-state-"))
58
+ const statePath = path.join(dir, "state.json")
59
+
60
+ try {
61
+ await writeState(statePath, {ok: true})
62
+ await clearState(statePath)
63
+
64
+ assert.equal(await readState(statePath), undefined)
65
+ await clearState(statePath)
66
+ } finally {
67
+ await fs.rm(dir, {force: true, recursive: true})
68
+ }
69
+ })