rollbridge 0.1.4 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,8 @@
1
1
  // @ts-check
2
2
 
3
3
  import assert from "node:assert/strict"
4
+ import {spawn} from "node:child_process"
5
+ import {once} from "node:events"
4
6
  import fs from "node:fs/promises"
5
7
  import net from "node:net"
6
8
  import os from "node:os"
@@ -10,15 +12,51 @@ import {fileURLToPath} from "node:url"
10
12
  import RollbridgeDaemon from "../src/daemon.js"
11
13
  import {normalizeConfig} from "../src/config.js"
12
14
  import {sendControlCommand} from "../src/control-client.js"
15
+ import {readState, writeState} from "../src/state-store.js"
13
16
  import {runCli} from "../src/cli.js"
14
17
 
15
18
  const currentDir = path.dirname(fileURLToPath(import.meta.url))
16
19
  const binPath = path.join(currentDir, "..", "bin", "rollbridge")
17
20
  const dependentAppPath = path.join(currentDir, "fixtures", "dependent-app.js")
18
21
  const dummyAppPath = path.join(currentDir, "fixtures", "dummy-app.js")
22
+ const memoryHogPath = path.join(currentDir, "fixtures", "memory-hog.js")
19
23
  const serviceAppPath = path.join(currentDir, "fixtures", "service-app.js")
20
24
  const singletonAppPath = path.join(currentDir, "fixtures", "singleton-app.js")
21
25
 
26
+ test("a nonBlockingDrain worker stops immediately while its release is still draining", async () => {
27
+ const fixture = await createFixture({nonBlockingDrainWorker: true})
28
+ const daemon = await startDaemon(fixture.config)
29
+ /** @type {WebSocket | undefined} */
30
+ let socket
31
+
32
+ try {
33
+ await daemon.deploy({releaseId: "v1", releasePath: fixture.root, revision: "v1"})
34
+
35
+ // An open WebSocket keeps v1's connection drain pending after v2 takes over.
36
+ socket = await openWebSocket(daemon)
37
+ await daemon.deploy({releaseId: "v2", releasePath: fixture.root, revision: "v2"})
38
+
39
+ // The nonBlockingDrain worker is stopped right away, in parallel with the connection drain.
40
+ await waitFor(() => {
41
+ const draining = daemon.status().releases.find((release) => release.releaseId === "v1")
42
+
43
+ return draining?.processes.find((processStatus) => processStatus.id === "worker")?.state === "stopped"
44
+ })
45
+
46
+ const v1 = statusRelease(daemon, "v1")
47
+
48
+ // The release is still draining (the WebSocket is held) and its proxied process is still
49
+ // serving, but the worker has already drained.
50
+ assert.equal(v1.state, "draining")
51
+ assert.equal(v1.processes.find((processStatus) => processStatus.id === "web")?.state, "running")
52
+ assert.equal(v1.processes.find((processStatus) => processStatus.id === "worker")?.state, "stopped")
53
+ } finally {
54
+ if (socket) socket.close()
55
+ await daemon.shutdown()
56
+ await fs.rm(fixture.root, {force: true, recursive: true})
57
+ }
58
+ })
59
+
22
60
  test("deploy switches new HTTP traffic while old WebSockets drain", async () => {
23
61
  const fixture = await createFixture()
24
62
  const daemon = await startDaemon(fixture.config)
@@ -141,6 +179,38 @@ test("singleton processes restart without overlap during deploy", async () => {
141
179
  }
142
180
  })
143
181
 
182
+ test("a failed singleton replacement surfaces the error after stopping the old singleton", async () => {
183
+ // The singleton's working directory is per-release; only the v1 directory exists, so
184
+ // the v2 replacement cannot spawn (ENOENT on cwd) and its start() rejects.
185
+ const fixture = await createFixture({includeSingleton: true, singletonCwd: "{{releasePath}}/{{releaseId}}"})
186
+ const daemon = await startDaemon(fixture.config)
187
+
188
+ await fs.mkdir(path.join(fixture.root, "v1"))
189
+
190
+ try {
191
+ await daemon.deploy({releaseId: "v1", releasePath: fixture.root, revision: "v1"})
192
+ await waitFor(async () => (await processEvents(fixture.singletonLogPath)).some((event) => event.event === "start" && event.releaseId === "v1"))
193
+
194
+ // The new release's singleton fails to start, so the deploy surfaces the error.
195
+ await assert.rejects(() => daemon.deploy({releaseId: "v2", releasePath: fixture.root, revision: "v2"}))
196
+
197
+ // The old singleton is stopped before the new one is started, so two copies never
198
+ // overlap — even when the replacement then fails.
199
+ await waitFor(async () => (await processEvents(fixture.singletonLogPath)).some((event) => event.event === "stop" && event.releaseId === "v1"))
200
+
201
+ const status = daemon.status()
202
+
203
+ // Traffic switches before singletons are replaced, so the new release is already active,
204
+ // but its singleton is left failed with no replacement running.
205
+ assert.equal(status.activeReleaseId, "v2")
206
+ assert.equal(status.singletons.length, 1)
207
+ assert.equal(status.singletons[0].process.state, "failed")
208
+ } finally {
209
+ await daemon.shutdown()
210
+ await fs.rm(fixture.root, {force: true, recursive: true})
211
+ }
212
+ })
213
+
144
214
  test("service processes start before releases and restart with the latest deploy template", async () => {
145
215
  const fixture = await createFixture({includeService: true, webDependsOnService: true})
146
216
  const daemon = await startDaemon(fixture.config)
@@ -173,6 +243,544 @@ test("service processes start before releases and restart with the latest deploy
173
243
  }
174
244
  })
175
245
 
246
+ test("a replicated companion starts one instance per replica, and restart targets one or all", async () => {
247
+ const fixture = await createFixture({companionReplicas: 3})
248
+ const daemon = await startDaemon(fixture.config)
249
+
250
+ try {
251
+ await daemon.deploy({releaseId: "v1", releasePath: fixture.root, revision: "v1"})
252
+
253
+ const release = daemon.status().releases.find((candidate) => candidate.state === "active")
254
+
255
+ assert.ok(release)
256
+
257
+ const workerIds = release.processes.filter((processStatus) => processStatus.id.startsWith("worker")).map((processStatus) => processStatus.id).sort()
258
+
259
+ assert.deepEqual(workerIds, ["worker#0", "worker#1", "worker#2"])
260
+
261
+ // A specific replica id restarts only that replica.
262
+ const one = await daemon.restartProcesses({processId: "worker#1"})
263
+
264
+ assert.deepEqual(one.restarted, ["worker#1"])
265
+
266
+ // The base id restarts every replica.
267
+ const all = /** @type {string[]} */ ((await daemon.restartProcesses({processId: "worker"})).restarted)
268
+
269
+ assert.deepEqual([...all].sort(), ["worker#0", "worker#1", "worker#2"])
270
+ } finally {
271
+ await daemon.shutdown()
272
+ await fs.rm(fixture.root, {force: true, recursive: true})
273
+ }
274
+ })
275
+
276
+ test("restart bounces a single process by id", async () => {
277
+ const fixture = await createFixture({includeService: true})
278
+ const daemon = await startDaemon(fixture.config)
279
+
280
+ try {
281
+ await daemon.deploy({releaseId: "v1", releasePath: fixture.root, revision: "v1"})
282
+
283
+ const before = pidsById(daemon.status())
284
+ const result = await daemon.restartProcesses({processId: "beacon"})
285
+
286
+ assert.deepEqual(result.restarted, ["beacon"])
287
+
288
+ const after = pidsById(daemon.status())
289
+
290
+ assert.ok(before.beacon && after.beacon, "beacon should have a pid before and after")
291
+ assert.notEqual(after.beacon, before.beacon)
292
+ } finally {
293
+ await daemon.shutdown()
294
+ await fs.rm(fixture.root, {force: true, recursive: true})
295
+ }
296
+ })
297
+
298
+ test("restart with no selector bounces every non-proxied process but not the proxied one", async () => {
299
+ const fixture = await createFixture({includeCompanion: true, includeService: true, includeSingleton: true})
300
+ const daemon = await startDaemon(fixture.config)
301
+
302
+ try {
303
+ await daemon.deploy({releaseId: "v1", releasePath: fixture.root, revision: "v1"})
304
+
305
+ const before = pidsById(daemon.status())
306
+ const result = await daemon.restartProcesses()
307
+ const restarted = /** @type {string[]} */ (result.restarted)
308
+
309
+ assert.deepEqual([...restarted].sort(), ["beacon", "jobs-main", "worker"])
310
+
311
+ const after = pidsById(daemon.status())
312
+
313
+ assert.equal(after.web, before.web, "proxied process should not be restarted")
314
+ assert.notEqual(after.beacon, before.beacon)
315
+ assert.notEqual(after["jobs-main"], before["jobs-main"])
316
+ assert.notEqual(after.worker, before.worker)
317
+ } finally {
318
+ await daemon.shutdown()
319
+ await fs.rm(fixture.root, {force: true, recursive: true})
320
+ }
321
+ })
322
+
323
+ test("restart --policy targets only processes with that policy", async () => {
324
+ const fixture = await createFixture({includeCompanion: true, includeService: true})
325
+ const daemon = await startDaemon(fixture.config)
326
+
327
+ try {
328
+ await daemon.deploy({releaseId: "v1", releasePath: fixture.root, revision: "v1"})
329
+
330
+ const before = pidsById(daemon.status())
331
+ const result = await daemon.restartProcesses({policy: "companion"})
332
+
333
+ assert.deepEqual(result.restarted, ["worker"])
334
+
335
+ const after = pidsById(daemon.status())
336
+
337
+ assert.notEqual(after.worker, before.worker)
338
+ assert.equal(after.beacon, before.beacon, "the service should be left running")
339
+ } finally {
340
+ await daemon.shutdown()
341
+ await fs.rm(fixture.root, {force: true, recursive: true})
342
+ }
343
+ })
344
+
345
+ test("restart refuses the proxied process and reports unknown ids", async () => {
346
+ const fixture = await createFixture()
347
+ const daemon = await startDaemon(fixture.config)
348
+
349
+ try {
350
+ await daemon.deploy({releaseId: "v1", releasePath: fixture.root, revision: "v1"})
351
+
352
+ await assert.rejects(() => daemon.restartProcesses({processId: "web"}), /proxied process cannot be restarted/)
353
+ await assert.rejects(() => daemon.restartProcesses({policy: "proxied"}), /proxied process cannot be restarted/)
354
+ await assert.rejects(() => daemon.restartProcesses({processId: "missing"}), /No managed process with id "missing"/)
355
+ } finally {
356
+ await daemon.shutdown()
357
+ await fs.rm(fixture.root, {force: true, recursive: true})
358
+ }
359
+ })
360
+
361
+ test("restart revives a stopped process instead of erroring", async () => {
362
+ const fixture = await createFixture({includeCompanion: true})
363
+ const daemon = await startDaemon(fixture.config)
364
+
365
+ try {
366
+ await daemon.deploy({releaseId: "v1", releasePath: fixture.root, revision: "v1"})
367
+
368
+ // Simulate the worker having exited (e.g. crashed and exhausted its restart budget).
369
+ const worker = daemon.activeRelease?.getProcess("worker")
370
+
371
+ assert.ok(worker, "worker process should exist")
372
+ await worker.stop()
373
+ assert.equal(worker.status().state, "stopped")
374
+
375
+ const result = await daemon.restartProcesses({processId: "worker"})
376
+
377
+ assert.deepEqual(result.restarted, ["worker"])
378
+ assert.equal(worker.status().state, "running")
379
+ assert.ok(worker.status().pid)
380
+ } finally {
381
+ await daemon.shutdown()
382
+ await fs.rm(fixture.root, {force: true, recursive: true})
383
+ }
384
+ })
385
+
386
+ test("the restart control command bounces a process over the socket", async () => {
387
+ const fixture = await createFixture({includeService: true})
388
+ const daemon = await startDaemon(fixture.config)
389
+
390
+ try {
391
+ await daemon.deploy({releaseId: "v1", releasePath: fixture.root, revision: "v1"})
392
+
393
+ const before = pidsById(daemon.status())
394
+ const response = await sendControlCommand({
395
+ command: {command: "restart", processId: "beacon"},
396
+ path: fixture.config.control.path
397
+ })
398
+
399
+ assert.deepEqual(response.restarted, ["beacon"])
400
+ assert.notEqual(pidsById(daemon.status()).beacon, before.beacon)
401
+ } finally {
402
+ await daemon.shutdown()
403
+ await fs.rm(fixture.root, {force: true, recursive: true})
404
+ }
405
+ })
406
+
407
+ test("status and events distinguish deploy starts from manual restarts", async () => {
408
+ const fixture = await createFixture({includeService: true})
409
+ const daemon = await startDaemon(fixture.config)
410
+
411
+ try {
412
+ await daemon.deploy({releaseId: "v1", releasePath: fixture.root, revision: "v1"})
413
+
414
+ const afterDeploy = daemon.status().services.find((service) => service.id === "beacon")
415
+
416
+ assert.ok(afterDeploy)
417
+ assert.equal(afterDeploy.process.lastStartReason, "deploy")
418
+
419
+ await daemon.restartProcesses({processId: "beacon"})
420
+
421
+ const afterRestart = daemon.status().services.find((service) => service.id === "beacon")
422
+
423
+ assert.ok(afterRestart)
424
+ assert.equal(afterRestart.process.lastStartReason, "manual")
425
+
426
+ const events = /** @type {import("../src/event-log.js").DaemonEvent[]} */ ((await sendControlCommand({
427
+ command: {command: "events"},
428
+ path: fixture.config.control.path
429
+ })).events)
430
+ const startReasons = events.filter((event) => event.message === "process started").map((event) => event.data.reason)
431
+
432
+ assert.ok(startReasons.includes("deploy"), JSON.stringify(startReasons))
433
+ assert.ok(startReasons.includes("manual"), JSON.stringify(startReasons))
434
+ } finally {
435
+ await daemon.shutdown()
436
+ await fs.rm(fixture.root, {force: true, recursive: true})
437
+ }
438
+ })
439
+
440
+ test("persists daemon state to statePath and removes it on a clean shutdown", async () => {
441
+ const fixture = await createFixture({persistState: true})
442
+ const daemon = await startDaemon(fixture.config)
443
+ let stateAfterShutdown
444
+
445
+ try {
446
+ await daemon.deploy({releaseId: "v1", releasePath: fixture.root, revision: "v1"})
447
+
448
+ // The state write is fire-and-forget (deploy doesn't block on it), so wait for it to land.
449
+ await waitFor(async () => {
450
+ const persisted = /** @type {{activeReleaseId: string} | undefined} */ (await readState(fixture.statePath))
451
+
452
+ return persisted?.activeReleaseId === "v1"
453
+ })
454
+
455
+ await daemon.shutdown()
456
+ stateAfterShutdown = await readState(fixture.statePath)
457
+ } finally {
458
+ if (!daemon.stopping) await daemon.shutdown()
459
+ await fs.rm(fixture.root, {force: true, recursive: true})
460
+ }
461
+
462
+ assert.equal(stateAfterShutdown, undefined, "state file removed on clean shutdown")
463
+ })
464
+
465
+ test("a clean shutdown clears the state file even when a persist write is in flight", async () => {
466
+ const fixture = await createFixture({persistState: true})
467
+ const daemon = await startDaemon(fixture.config)
468
+
469
+ try {
470
+ await daemon.deploy({releaseId: "v1", releasePath: fixture.root, revision: "v1"})
471
+
472
+ // Shut down immediately — the deploy's fire-and-forget persist may still be in flight.
473
+ await daemon.shutdown()
474
+
475
+ assert.equal(await readState(fixture.statePath), undefined, "state file must not be recreated by an in-flight write")
476
+ } finally {
477
+ if (!daemon.stopping) await daemon.shutdown()
478
+ await fs.rm(fixture.root, {force: true, recursive: true})
479
+ }
480
+ })
481
+
482
+ test("reports orphaned managed processes from a previous daemon's state", async () => {
483
+ const dir = await fs.mkdtemp(path.join(os.tmpdir(), "rollbridge-test-"))
484
+ const statePath = path.join(dir, "state.json")
485
+ // A live process standing in for a leftover managed child from a crashed daemon.
486
+ const leftover = spawn(process.execPath, ["-e", "setInterval(() => {}, 1000)"], {stdio: "ignore"})
487
+
488
+ await once(leftover, "spawn")
489
+
490
+ /** @type {{data: Record<string, import("../src/json.js").JsonValue>, message: string}[]} */
491
+ const logs = []
492
+ const config = normalizeConfig({
493
+ application: "rollbridge-test",
494
+ control: {path: path.join(dir, "rollbridge.sock")},
495
+ processes: [{command: "true", id: "web", policy: "proxied", port: {from: 0, to: 0}}],
496
+ proxy: {host: "127.0.0.1", port: 0},
497
+ statePath
498
+ })
499
+ const daemon = new RollbridgeDaemon({config, logger: (message, data = {}) => { logs.push({data, message}) }})
500
+
501
+ try {
502
+ // A prior daemon left a worker with this (still-alive) pid.
503
+ await writeState(statePath, {
504
+ activeReleaseId: "v1",
505
+ releases: [{processes: [{id: "worker", pid: leftover.pid}], releaseId: "v1"}],
506
+ services: [],
507
+ singletons: []
508
+ })
509
+
510
+ await daemon.reportOrphans()
511
+
512
+ assert.ok(logs.some((entry) => entry.message === "orphaned managed process detected" && entry.data.pid === leftover.pid), JSON.stringify(logs))
513
+
514
+ // A dead pid is not reported.
515
+ logs.length = 0
516
+ await writeState(statePath, {
517
+ activeReleaseId: "v1",
518
+ releases: [{processes: [{id: "worker", pid: 2147483646}], releaseId: "v1"}],
519
+ services: [],
520
+ singletons: []
521
+ })
522
+ await daemon.reportOrphans()
523
+
524
+ assert.ok(!logs.some((entry) => entry.message === "orphaned managed process detected"))
525
+ } finally {
526
+ leftover.kill("SIGKILL")
527
+ await fs.rm(dir, {force: true, recursive: true})
528
+ }
529
+ })
530
+
531
+ test("status surfaces still-alive orphaned processes from a previous daemon and drops them once gone", async () => {
532
+ const dir = await fs.mkdtemp(path.join(os.tmpdir(), "rollbridge-test-"))
533
+ const statePath = path.join(dir, "state.json")
534
+ const leftover = spawn(process.execPath, ["-e", "setInterval(() => {}, 1000)"], {stdio: "ignore"})
535
+
536
+ await once(leftover, "spawn")
537
+
538
+ const config = normalizeConfig({
539
+ application: "rollbridge-test",
540
+ control: {path: path.join(dir, "rollbridge.sock")},
541
+ processes: [{command: "true", id: "web", policy: "proxied", port: {from: 0, to: 0}}],
542
+ proxy: {host: "127.0.0.1", port: 0},
543
+ statePath
544
+ })
545
+ const daemon = new RollbridgeDaemon({config, logger: () => {}})
546
+
547
+ try {
548
+ // A prior daemon left a worker with this (still-alive) pid.
549
+ await writeState(statePath, {
550
+ activeReleaseId: "v1",
551
+ releases: [{processes: [{id: "worker", pid: leftover.pid}], releaseId: "v1"}],
552
+ services: [],
553
+ singletons: []
554
+ })
555
+
556
+ await daemon.reportOrphans()
557
+
558
+ // status reflects the still-running child even though the daemon cannot re-manage it.
559
+ assert.deepEqual(daemon.status().orphans, [{id: "worker", pid: leftover.pid, releaseId: "v1"}])
560
+
561
+ // Once the leftover is stopped, status re-checks liveness and drops it.
562
+ leftover.kill("SIGKILL")
563
+ await waitFor(() => daemon.status().orphans.length === 0)
564
+ assert.deepEqual(daemon.status().orphans, [])
565
+
566
+ // The dead entry is pruned from the underlying list, not merely filtered, so a recycled pid
567
+ // can't resurrect a cleared orphan.
568
+ assert.deepEqual(daemon.orphans, [])
569
+ } finally {
570
+ leftover.kill("SIGKILL")
571
+ await fs.rm(dir, {force: true, recursive: true})
572
+ }
573
+ })
574
+
575
+ test("the daemon records a structured event history served by the events command", async () => {
576
+ const fixture = await createFixture()
577
+ const daemon = await startDaemon(fixture.config)
578
+
579
+ try {
580
+ await daemon.deploy({releaseId: "v1", releasePath: fixture.root, revision: "v1"})
581
+
582
+ const response = await sendControlCommand({
583
+ command: {command: "events"},
584
+ path: fixture.config.control.path
585
+ })
586
+ const events = /** @type {import("../src/event-log.js").DaemonEvent[]} */ (response.events)
587
+ const messages = events.map((event) => event.message)
588
+
589
+ assert.ok(messages.includes("deploy starting"), JSON.stringify(messages))
590
+ assert.ok(messages.includes("traffic switched"), JSON.stringify(messages))
591
+
592
+ const switched = events.find((event) => event.message === "traffic switched")
593
+
594
+ assert.ok(switched)
595
+ assert.equal(switched.data.releaseId, "v1")
596
+ assert.match(switched.at, /^\d{4}-\d{2}-\d{2}T.*Z$/)
597
+ } finally {
598
+ await daemon.shutdown()
599
+ await fs.rm(fixture.root, {force: true, recursive: true})
600
+ }
601
+ })
602
+
603
+ test("the events command honors --limit and records failed commands", async () => {
604
+ const fixture = await createFixture()
605
+ const daemon = await startDaemon(fixture.config)
606
+
607
+ try {
608
+ await daemon.deploy({releaseId: "v1", releasePath: fixture.root, revision: "v1"})
609
+
610
+ // An unknown command is rejected and recorded as a "command failed" event.
611
+ await assert.rejects(() => sendControlCommand({
612
+ command: {command: "bogus"},
613
+ path: fixture.config.control.path
614
+ }))
615
+
616
+ const all = /** @type {import("../src/event-log.js").DaemonEvent[]} */ ((await sendControlCommand({
617
+ command: {command: "events"},
618
+ path: fixture.config.control.path
619
+ })).events)
620
+
621
+ assert.ok(all.some((event) => event.message === "command failed"))
622
+
623
+ const limited = /** @type {import("../src/event-log.js").DaemonEvent[]} */ ((await sendControlCommand({
624
+ command: {command: "events", limit: 1},
625
+ path: fixture.config.control.path
626
+ })).events)
627
+
628
+ assert.equal(limited.length, 1)
629
+ assert.deepEqual(limited[0], all[all.length - 1])
630
+ } finally {
631
+ await daemon.shutdown()
632
+ await fs.rm(fixture.root, {force: true, recursive: true})
633
+ }
634
+ })
635
+
636
+ test("a process over its memory limit is restarted with reason memory", {skip: process.platform !== "linux" && "requires /proc (Linux)"}, async () => {
637
+ const limitBytes = 64 * 1024 * 1024
638
+ const fixture = await createFixture({memoryLimitBytes: limitBytes})
639
+ const daemon = await startDaemon(fixture.config)
640
+
641
+ try {
642
+ await daemon.deploy({releaseId: "v1", releasePath: fixture.root, revision: "v1"})
643
+
644
+ // The hog allocates ~4x the limit, so the monitor restarts it.
645
+ await waitFor(() => (activeProcessStatus(daemon, "hog")?.memoryRestarts ?? 0) >= 1, 10000)
646
+
647
+ const hog = activeProcessStatus(daemon, "hog")
648
+
649
+ assert.ok(hog, "hog process should be present")
650
+ assert.ok(hog.memoryRestarts >= 1, `expected a memory restart, got ${hog.memoryRestarts}`)
651
+ assert.equal(hog.lastStartReason, "memory")
652
+ assert.equal(typeof hog.lastMemoryRestartAt, "string")
653
+
654
+ // rssBytes is sampled on the monitor's interval; wait for a measurement of the running process.
655
+ await waitFor(() => {
656
+ const rssBytes = activeProcessStatus(daemon, "hog")?.rssBytes
657
+
658
+ return typeof rssBytes === "number" && rssBytes > 0
659
+ }, 5000)
660
+
661
+ // The same monitor sample reports the process tree.
662
+ const monitored = activeProcessStatus(daemon, "hog")
663
+
664
+ assert.ok(monitored)
665
+ assert.ok(monitored.children.length >= 1, "status should include the process tree")
666
+ assert.ok(monitored.children.some((child) => typeof child.rssBytes === "number" && child.rssBytes > 0))
667
+ } finally {
668
+ await daemon.shutdown()
669
+ await fs.rm(fixture.root, {force: true, recursive: true})
670
+ }
671
+ })
672
+
673
+ test("rollback re-activates the previous release and switches traffic back", async () => {
674
+ const fixture = await createFixture()
675
+ const daemon = await startDaemon(fixture.config)
676
+
677
+ try {
678
+ await daemon.deploy({releaseId: "v1", releasePath: fixture.root, revision: "v1"})
679
+ await daemon.deploy({releaseId: "v2", releasePath: fixture.root, revision: "v2"})
680
+
681
+ assert.equal(await fetchText(daemon, "/release"), "v2")
682
+
683
+ const result = await daemon.rollback()
684
+
685
+ assert.equal(result.activeReleaseId, "v1")
686
+ assert.equal(result.previousReleaseId, "v2")
687
+ assert.equal(daemon.status().activeReleaseId, "v1")
688
+ assert.equal(await fetchText(daemon, "/release"), "v1")
689
+ } finally {
690
+ await daemon.shutdown()
691
+ await fs.rm(fixture.root, {force: true, recursive: true})
692
+ }
693
+ })
694
+
695
+ test("rollback --release-id targets a specific retained release", async () => {
696
+ const fixture = await createFixture()
697
+ const daemon = await startDaemon(fixture.config)
698
+
699
+ try {
700
+ await daemon.deploy({releaseId: "v1", releasePath: fixture.root, revision: "v1"})
701
+ await daemon.deploy({releaseId: "v2", releasePath: fixture.root, revision: "v2"})
702
+ await daemon.deploy({releaseId: "v3", releasePath: fixture.root, revision: "v3"})
703
+
704
+ const result = await daemon.rollback({releaseId: "v1"})
705
+
706
+ assert.equal(result.activeReleaseId, "v1")
707
+ assert.equal(await fetchText(daemon, "/release"), "v1")
708
+ } finally {
709
+ await daemon.shutdown()
710
+ await fs.rm(fixture.root, {force: true, recursive: true})
711
+ }
712
+ })
713
+
714
+ test("rollback rejects no-previous, unknown, and already-active targets", async () => {
715
+ const fixture = await createFixture()
716
+ const daemon = await startDaemon(fixture.config)
717
+
718
+ try {
719
+ await daemon.deploy({releaseId: "v1", releasePath: fixture.root, revision: "v1"})
720
+
721
+ await assert.rejects(() => daemon.rollback(), /No previous release/)
722
+ await assert.rejects(() => daemon.rollback({releaseId: "v1"}), /already active/)
723
+ await assert.rejects(() => daemon.rollback({releaseId: "nope"}), /No retained release "nope"/)
724
+ } finally {
725
+ await daemon.shutdown()
726
+ await fs.rm(fixture.root, {force: true, recursive: true})
727
+ }
728
+ })
729
+
730
+ test("rollback to a still-draining release stops the old instance instead of orphaning it", async () => {
731
+ const fixture = await createFixture()
732
+ const daemon = await startDaemon(fixture.config)
733
+ /** @type {WebSocket | undefined} */
734
+ let socket
735
+
736
+ try {
737
+ await daemon.deploy({releaseId: "v1", releasePath: fixture.root, revision: "v1"})
738
+
739
+ // An open WebSocket keeps v1's connection count > 0, so it stays draining after v2.
740
+ socket = await openWebSocket(daemon)
741
+ await daemon.deploy({releaseId: "v2", releasePath: fixture.root, revision: "v2"})
742
+
743
+ const draining = statusRelease(daemon, "v1")
744
+
745
+ assert.equal(draining.state, "draining")
746
+
747
+ const oldWebPid = draining.processes.find((processStatus) => processStatus.id === "web")?.pid
748
+
749
+ assert.ok(oldWebPid, "the draining release should have a running web process")
750
+
751
+ await daemon.rollback({releaseId: "v1"})
752
+
753
+ assert.equal(daemon.status().activeReleaseId, "v1")
754
+ // The old draining instance was stopped before its id was reused, so its process is gone.
755
+ assert.throws(() => process.kill(/** @type {number} */ (oldWebPid), 0), /ESRCH/)
756
+ } finally {
757
+ if (socket) socket.close()
758
+ await daemon.shutdown()
759
+ await fs.rm(fixture.root, {force: true, recursive: true})
760
+ }
761
+ })
762
+
763
+ test("the rollback control command switches traffic over the socket", async () => {
764
+ const fixture = await createFixture()
765
+ const daemon = await startDaemon(fixture.config)
766
+
767
+ try {
768
+ await daemon.deploy({releaseId: "v1", releasePath: fixture.root, revision: "v1"})
769
+ await daemon.deploy({releaseId: "v2", releasePath: fixture.root, revision: "v2"})
770
+
771
+ const response = await sendControlCommand({
772
+ command: {command: "rollback"},
773
+ path: fixture.config.control.path
774
+ })
775
+
776
+ assert.equal(response.activeReleaseId, "v1")
777
+ assert.equal(await fetchText(daemon, "/release"), "v1")
778
+ } finally {
779
+ await daemon.shutdown()
780
+ await fs.rm(fixture.root, {force: true, recursive: true})
781
+ }
782
+ })
783
+
176
784
  test("control socket accepts deploy and status commands", async () => {
177
785
  const fixture = await createFixture()
178
786
  const daemon = await startDaemon(fixture.config)
@@ -231,6 +839,34 @@ test("starting a second daemon on a live control socket reports the running daem
231
839
  }
232
840
  })
233
841
 
842
+ test("the daemon applies control.owner and control.group to the bound socket", {skip: process.platform !== "linux" && "requires POSIX chown"}, async () => {
843
+ const root = await fs.mkdtemp(path.join(os.tmpdir(), "rollbridge-test-"))
844
+ const socketPath = path.join(root, "rollbridge.sock")
845
+ const {uid, username} = os.userInfo()
846
+ const gid = process.getgid?.() ?? 0
847
+ const config = normalizeConfig({
848
+ application: "rollbridge-test",
849
+ // owner by name (resolved to the current uid); group by numeric id. Both are the current
850
+ // user's, so a non-root daemon can chown the socket to itself.
851
+ control: {group: gid, owner: username, path: socketPath},
852
+ processes: [{command: "true", id: "web", policy: "proxied", port: {from: 0, to: 0}}],
853
+ proxy: {host: "127.0.0.1", port: 0}
854
+ })
855
+ const daemon = new RollbridgeDaemon({config, logger: () => {}})
856
+
857
+ try {
858
+ await daemon.start()
859
+
860
+ const stats = await fs.stat(socketPath)
861
+
862
+ assert.equal(stats.uid, uid)
863
+ assert.equal(stats.gid, gid)
864
+ } finally {
865
+ await daemon.shutdown()
866
+ await fs.rm(root, {force: true, recursive: true})
867
+ }
868
+ })
869
+
234
870
  test("a control socket held by a non-Rollbridge process reports a generic conflict", async () => {
235
871
  const root = await fs.mkdtemp(path.join(os.tmpdir(), "rollbridge-test-"))
236
872
  const socketPath = path.join(root, "busy.sock")
@@ -336,13 +972,14 @@ test("deploy can ensure the daemon before sending the release command", async ()
336
972
  })
337
973
 
338
974
  /**
339
- * @param {{includeService?: boolean, includeSingleton?: boolean, proxyHost?: string, webCommand?: string, webDependsOnService?: boolean, webHealthTimeoutMs?: number}} [options] - Fixture options.
340
- * @returns {Promise<{config: import("../src/config.js").RollbridgeConfig, root: string, serviceLogPath: string, singletonLogPath: string}>} Fixture data.
975
+ * @param {{companionReplicas?: number, includeCompanion?: boolean, includeService?: boolean, includeSingleton?: boolean, memoryLimitBytes?: number, nonBlockingDrainWorker?: boolean, persistState?: boolean, proxyHost?: string, singletonCwd?: string, webCommand?: string, webDependsOnService?: boolean, webHealthTimeoutMs?: number}} [options] - Fixture options.
976
+ * @returns {Promise<{config: import("../src/config.js").RollbridgeConfig, root: string, serviceLogPath: string, singletonLogPath: string, statePath: string}>} Fixture data.
341
977
  */
342
978
  async function createFixture(options = {}) {
343
979
  const root = await fs.mkdtemp(path.join(os.tmpdir(), "rollbridge-test-"))
344
980
  const serviceLogPath = path.join(root, "service.log")
345
981
  const singletonLogPath = path.join(root, "singleton.log")
982
+ const statePath = path.join(root, "rollbridge.state.json")
346
983
  /** @type {Array<Record<string, import("../src/json.js").JsonValue>>} */
347
984
  const processes = []
348
985
 
@@ -359,6 +996,44 @@ async function createFixture(options = {}) {
359
996
  })
360
997
  }
361
998
 
999
+ if (options.includeCompanion) {
1000
+ processes.push({
1001
+ command: `${JSON.stringify(process.execPath)} -e ${JSON.stringify("setInterval(() => {}, 1000)")}`,
1002
+ id: "worker",
1003
+ policy: "companion"
1004
+ })
1005
+ }
1006
+
1007
+ if (options.companionReplicas) {
1008
+ processes.push({
1009
+ command: `${JSON.stringify(process.execPath)} -e ${JSON.stringify("setInterval(() => {}, 1000)")}`,
1010
+ id: "worker",
1011
+ policy: "companion",
1012
+ replicas: options.companionReplicas
1013
+ })
1014
+ }
1015
+
1016
+ if (options.nonBlockingDrainWorker) {
1017
+ processes.push({
1018
+ command: `${JSON.stringify(process.execPath)} -e ${JSON.stringify("setInterval(() => {}, 1000)")}`,
1019
+ id: "worker",
1020
+ nonBlockingDrain: true,
1021
+ policy: "companion"
1022
+ })
1023
+ }
1024
+
1025
+ if (options.memoryLimitBytes) {
1026
+ processes.push({
1027
+ command: `${JSON.stringify(process.execPath)} ${JSON.stringify(memoryHogPath)}`,
1028
+ env: {
1029
+ ROLLBRIDGE_HOG_BYTES: String(options.memoryLimitBytes * 4)
1030
+ },
1031
+ id: "hog",
1032
+ memory: {checkIntervalMs: 100, limitBytes: options.memoryLimitBytes, warnBytes: 0},
1033
+ policy: "companion"
1034
+ })
1035
+ }
1036
+
362
1037
  processes.push({
363
1038
  command: options.webCommand || (options.webDependsOnService
364
1039
  ? `${JSON.stringify(process.execPath)} ${JSON.stringify(dependentAppPath)}`
@@ -376,6 +1051,7 @@ async function createFixture(options = {}) {
376
1051
  if (options.includeSingleton) {
377
1052
  processes.push({
378
1053
  command: `${JSON.stringify(process.execPath)} ${JSON.stringify(singletonAppPath)}`,
1054
+ ...(options.singletonCwd ? {cwd: options.singletonCwd} : {}),
379
1055
  env: {
380
1056
  ROLLBRIDGE_SINGLETON_LOG: singletonLogPath
381
1057
  },
@@ -397,10 +1073,11 @@ async function createFixture(options = {}) {
397
1073
  healthTimeoutMs: 3000,
398
1074
  host: options.proxyHost || "127.0.0.1",
399
1075
  port: 0
400
- }
1076
+ },
1077
+ ...(options.persistState ? {statePath} : {})
401
1078
  })
402
1079
 
403
- return {config, root, serviceLogPath, singletonLogPath}
1080
+ return {config, root, serviceLogPath, singletonLogPath, statePath}
404
1081
  }
405
1082
 
406
1083
  /**
@@ -466,6 +1143,27 @@ function statusRelease(daemon, releaseId) {
466
1143
  return release
467
1144
  }
468
1145
 
1146
+ /**
1147
+ * Maps process id to pid across the active release, services, and singletons.
1148
+ * @param {import("../src/daemon.js").DaemonStatus} status - Daemon status payload.
1149
+ * @returns {Record<string, number | undefined>} Process id to current pid.
1150
+ */
1151
+ function pidsById(status) {
1152
+ /** @type {Record<string, number | undefined>} */
1153
+ const pids = {}
1154
+
1155
+ for (const release of status.releases) {
1156
+ if (release.state !== "active") continue
1157
+
1158
+ for (const processStatus of release.processes) pids[processStatus.id] = processStatus.pid
1159
+ }
1160
+
1161
+ for (const service of status.services) pids[service.id] = service.process.pid
1162
+ for (const singleton of status.singletons) pids[singleton.id] = singleton.process.pid
1163
+
1164
+ return pids
1165
+ }
1166
+
469
1167
  /**
470
1168
  * @param {string} logPath - Log path.
471
1169
  * @returns {Promise<Array<{event: string, pid: number, releaseId: string}>>} Events.
@@ -504,10 +1202,11 @@ async function writeConfigFile(config, root) {
504
1202
 
505
1203
  /**
506
1204
  * @param {() => Promise<boolean> | boolean} callback - Probe callback.
1205
+ * @param {number} [timeoutMs] - How long to wait before giving up (default 3000).
507
1206
  * @returns {Promise<void>} Resolves when callback returns true.
508
1207
  */
509
- async function waitFor(callback) {
510
- const deadline = Date.now() + 3000
1208
+ async function waitFor(callback, timeoutMs = 3000) {
1209
+ const deadline = Date.now() + timeoutMs
511
1210
 
512
1211
  while (Date.now() < deadline) {
513
1212
  if (await callback()) return
@@ -516,3 +1215,14 @@ async function waitFor(callback) {
516
1215
 
517
1216
  throw new Error("Timed out waiting for condition")
518
1217
  }
1218
+
1219
+ /**
1220
+ * @param {RollbridgeDaemon} daemon - Daemon.
1221
+ * @param {string} processId - Process id within the active release.
1222
+ * @returns {import("../src/managed-process.js").ManagedProcessStatus | undefined} The process status, if present.
1223
+ */
1224
+ function activeProcessStatus(daemon, processId) {
1225
+ const release = daemon.status().releases.find((candidate) => candidate.state === "active")
1226
+
1227
+ return release ? release.processes.find((processStatus) => processStatus.id === processId) : undefined
1228
+ }