rollbridge 0.1.5 → 0.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +104 -4
- package/TODO.md +42 -40
- package/docs/cli.md +146 -6
- package/docs/config.md +139 -2
- package/docs/logging.md +77 -0
- package/docs/releasing.md +53 -0
- package/docs/tensorbuzz-runbook.md +129 -0
- package/docs/velocious.md +49 -11
- package/docs/workers.md +115 -0
- package/package.json +1 -1
- package/src/cli.js +290 -1
- package/src/config.js +169 -6
- package/src/daemon.js +216 -13
- package/src/doctor.js +177 -0
- package/src/event-log.js +47 -0
- package/src/managed-process.js +225 -16
- package/src/process-memory.js +110 -0
- package/src/recover.js +134 -0
- package/src/release-group.js +71 -21
- package/src/state-store.js +103 -0
- package/src/system-ids.js +71 -0
- package/src/template.js +32 -0
- package/test/completion.test.js +64 -0
- package/test/config-validation.test.js +227 -0
- package/test/doctor.test.js +205 -3
- package/test/event-log.test.js +46 -0
- package/test/fixtures/memory-hog.js +19 -0
- package/test/managed-process.test.js +290 -0
- package/test/process-memory.test.js +40 -0
- package/test/recover.test.js +162 -0
- package/test/release-group.test.js +22 -0
- package/test/rollbridge.test.js +523 -6
- package/test/state-store.test.js +69 -0
- package/test/system-ids.test.js +24 -0
package/test/rollbridge.test.js
CHANGED
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
// @ts-check
|
|
2
2
|
|
|
3
3
|
import assert from "node:assert/strict"
|
|
4
|
+
import {spawn} from "node:child_process"
|
|
5
|
+
import {once} from "node:events"
|
|
4
6
|
import fs from "node:fs/promises"
|
|
5
7
|
import net from "node:net"
|
|
6
8
|
import os from "node:os"
|
|
@@ -10,15 +12,51 @@ import {fileURLToPath} from "node:url"
|
|
|
10
12
|
import RollbridgeDaemon from "../src/daemon.js"
|
|
11
13
|
import {normalizeConfig} from "../src/config.js"
|
|
12
14
|
import {sendControlCommand} from "../src/control-client.js"
|
|
15
|
+
import {readState, writeState} from "../src/state-store.js"
|
|
13
16
|
import {runCli} from "../src/cli.js"
|
|
14
17
|
|
|
15
18
|
const currentDir = path.dirname(fileURLToPath(import.meta.url))
|
|
16
19
|
const binPath = path.join(currentDir, "..", "bin", "rollbridge")
|
|
17
20
|
const dependentAppPath = path.join(currentDir, "fixtures", "dependent-app.js")
|
|
18
21
|
const dummyAppPath = path.join(currentDir, "fixtures", "dummy-app.js")
|
|
22
|
+
const memoryHogPath = path.join(currentDir, "fixtures", "memory-hog.js")
|
|
19
23
|
const serviceAppPath = path.join(currentDir, "fixtures", "service-app.js")
|
|
20
24
|
const singletonAppPath = path.join(currentDir, "fixtures", "singleton-app.js")
|
|
21
25
|
|
|
26
|
+
test("a nonBlockingDrain worker stops immediately while its release is still draining", async () => {
|
|
27
|
+
const fixture = await createFixture({nonBlockingDrainWorker: true})
|
|
28
|
+
const daemon = await startDaemon(fixture.config)
|
|
29
|
+
/** @type {WebSocket | undefined} */
|
|
30
|
+
let socket
|
|
31
|
+
|
|
32
|
+
try {
|
|
33
|
+
await daemon.deploy({releaseId: "v1", releasePath: fixture.root, revision: "v1"})
|
|
34
|
+
|
|
35
|
+
// An open WebSocket keeps v1's connection drain pending after v2 takes over.
|
|
36
|
+
socket = await openWebSocket(daemon)
|
|
37
|
+
await daemon.deploy({releaseId: "v2", releasePath: fixture.root, revision: "v2"})
|
|
38
|
+
|
|
39
|
+
// The nonBlockingDrain worker is stopped right away, in parallel with the connection drain.
|
|
40
|
+
await waitFor(() => {
|
|
41
|
+
const draining = daemon.status().releases.find((release) => release.releaseId === "v1")
|
|
42
|
+
|
|
43
|
+
return draining?.processes.find((processStatus) => processStatus.id === "worker")?.state === "stopped"
|
|
44
|
+
})
|
|
45
|
+
|
|
46
|
+
const v1 = statusRelease(daemon, "v1")
|
|
47
|
+
|
|
48
|
+
// The release is still draining (the WebSocket is held) and its proxied process is still
|
|
49
|
+
// serving, but the worker has already drained.
|
|
50
|
+
assert.equal(v1.state, "draining")
|
|
51
|
+
assert.equal(v1.processes.find((processStatus) => processStatus.id === "web")?.state, "running")
|
|
52
|
+
assert.equal(v1.processes.find((processStatus) => processStatus.id === "worker")?.state, "stopped")
|
|
53
|
+
} finally {
|
|
54
|
+
if (socket) socket.close()
|
|
55
|
+
await daemon.shutdown()
|
|
56
|
+
await fs.rm(fixture.root, {force: true, recursive: true})
|
|
57
|
+
}
|
|
58
|
+
})
|
|
59
|
+
|
|
22
60
|
test("deploy switches new HTTP traffic while old WebSockets drain", async () => {
|
|
23
61
|
const fixture = await createFixture()
|
|
24
62
|
const daemon = await startDaemon(fixture.config)
|
|
@@ -205,6 +243,36 @@ test("service processes start before releases and restart with the latest deploy
|
|
|
205
243
|
}
|
|
206
244
|
})
|
|
207
245
|
|
|
246
|
+
test("a replicated companion starts one instance per replica, and restart targets one or all", async () => {
|
|
247
|
+
const fixture = await createFixture({companionReplicas: 3})
|
|
248
|
+
const daemon = await startDaemon(fixture.config)
|
|
249
|
+
|
|
250
|
+
try {
|
|
251
|
+
await daemon.deploy({releaseId: "v1", releasePath: fixture.root, revision: "v1"})
|
|
252
|
+
|
|
253
|
+
const release = daemon.status().releases.find((candidate) => candidate.state === "active")
|
|
254
|
+
|
|
255
|
+
assert.ok(release)
|
|
256
|
+
|
|
257
|
+
const workerIds = release.processes.filter((processStatus) => processStatus.id.startsWith("worker")).map((processStatus) => processStatus.id).sort()
|
|
258
|
+
|
|
259
|
+
assert.deepEqual(workerIds, ["worker#0", "worker#1", "worker#2"])
|
|
260
|
+
|
|
261
|
+
// A specific replica id restarts only that replica.
|
|
262
|
+
const one = await daemon.restartProcesses({processId: "worker#1"})
|
|
263
|
+
|
|
264
|
+
assert.deepEqual(one.restarted, ["worker#1"])
|
|
265
|
+
|
|
266
|
+
// The base id restarts every replica.
|
|
267
|
+
const all = /** @type {string[]} */ ((await daemon.restartProcesses({processId: "worker"})).restarted)
|
|
268
|
+
|
|
269
|
+
assert.deepEqual([...all].sort(), ["worker#0", "worker#1", "worker#2"])
|
|
270
|
+
} finally {
|
|
271
|
+
await daemon.shutdown()
|
|
272
|
+
await fs.rm(fixture.root, {force: true, recursive: true})
|
|
273
|
+
}
|
|
274
|
+
})
|
|
275
|
+
|
|
208
276
|
test("restart bounces a single process by id", async () => {
|
|
209
277
|
const fixture = await createFixture({includeService: true})
|
|
210
278
|
const daemon = await startDaemon(fixture.config)
|
|
@@ -336,6 +404,383 @@ test("the restart control command bounces a process over the socket", async () =
|
|
|
336
404
|
}
|
|
337
405
|
})
|
|
338
406
|
|
|
407
|
+
test("status and events distinguish deploy starts from manual restarts", async () => {
|
|
408
|
+
const fixture = await createFixture({includeService: true})
|
|
409
|
+
const daemon = await startDaemon(fixture.config)
|
|
410
|
+
|
|
411
|
+
try {
|
|
412
|
+
await daemon.deploy({releaseId: "v1", releasePath: fixture.root, revision: "v1"})
|
|
413
|
+
|
|
414
|
+
const afterDeploy = daemon.status().services.find((service) => service.id === "beacon")
|
|
415
|
+
|
|
416
|
+
assert.ok(afterDeploy)
|
|
417
|
+
assert.equal(afterDeploy.process.lastStartReason, "deploy")
|
|
418
|
+
|
|
419
|
+
await daemon.restartProcesses({processId: "beacon"})
|
|
420
|
+
|
|
421
|
+
const afterRestart = daemon.status().services.find((service) => service.id === "beacon")
|
|
422
|
+
|
|
423
|
+
assert.ok(afterRestart)
|
|
424
|
+
assert.equal(afterRestart.process.lastStartReason, "manual")
|
|
425
|
+
|
|
426
|
+
const events = /** @type {import("../src/event-log.js").DaemonEvent[]} */ ((await sendControlCommand({
|
|
427
|
+
command: {command: "events"},
|
|
428
|
+
path: fixture.config.control.path
|
|
429
|
+
})).events)
|
|
430
|
+
const startReasons = events.filter((event) => event.message === "process started").map((event) => event.data.reason)
|
|
431
|
+
|
|
432
|
+
assert.ok(startReasons.includes("deploy"), JSON.stringify(startReasons))
|
|
433
|
+
assert.ok(startReasons.includes("manual"), JSON.stringify(startReasons))
|
|
434
|
+
} finally {
|
|
435
|
+
await daemon.shutdown()
|
|
436
|
+
await fs.rm(fixture.root, {force: true, recursive: true})
|
|
437
|
+
}
|
|
438
|
+
})
|
|
439
|
+
|
|
440
|
+
test("persists daemon state to statePath and removes it on a clean shutdown", async () => {
|
|
441
|
+
const fixture = await createFixture({persistState: true})
|
|
442
|
+
const daemon = await startDaemon(fixture.config)
|
|
443
|
+
let stateAfterShutdown
|
|
444
|
+
|
|
445
|
+
try {
|
|
446
|
+
await daemon.deploy({releaseId: "v1", releasePath: fixture.root, revision: "v1"})
|
|
447
|
+
|
|
448
|
+
// The state write is fire-and-forget (deploy doesn't block on it), so wait for it to land.
|
|
449
|
+
await waitFor(async () => {
|
|
450
|
+
const persisted = /** @type {{activeReleaseId: string} | undefined} */ (await readState(fixture.statePath))
|
|
451
|
+
|
|
452
|
+
return persisted?.activeReleaseId === "v1"
|
|
453
|
+
})
|
|
454
|
+
|
|
455
|
+
await daemon.shutdown()
|
|
456
|
+
stateAfterShutdown = await readState(fixture.statePath)
|
|
457
|
+
} finally {
|
|
458
|
+
if (!daemon.stopping) await daemon.shutdown()
|
|
459
|
+
await fs.rm(fixture.root, {force: true, recursive: true})
|
|
460
|
+
}
|
|
461
|
+
|
|
462
|
+
assert.equal(stateAfterShutdown, undefined, "state file removed on clean shutdown")
|
|
463
|
+
})
|
|
464
|
+
|
|
465
|
+
test("a clean shutdown clears the state file even when a persist write is in flight", async () => {
|
|
466
|
+
const fixture = await createFixture({persistState: true})
|
|
467
|
+
const daemon = await startDaemon(fixture.config)
|
|
468
|
+
|
|
469
|
+
try {
|
|
470
|
+
await daemon.deploy({releaseId: "v1", releasePath: fixture.root, revision: "v1"})
|
|
471
|
+
|
|
472
|
+
// Shut down immediately — the deploy's fire-and-forget persist may still be in flight.
|
|
473
|
+
await daemon.shutdown()
|
|
474
|
+
|
|
475
|
+
assert.equal(await readState(fixture.statePath), undefined, "state file must not be recreated by an in-flight write")
|
|
476
|
+
} finally {
|
|
477
|
+
if (!daemon.stopping) await daemon.shutdown()
|
|
478
|
+
await fs.rm(fixture.root, {force: true, recursive: true})
|
|
479
|
+
}
|
|
480
|
+
})
|
|
481
|
+
|
|
482
|
+
test("reports orphaned managed processes from a previous daemon's state", async () => {
|
|
483
|
+
const dir = await fs.mkdtemp(path.join(os.tmpdir(), "rollbridge-test-"))
|
|
484
|
+
const statePath = path.join(dir, "state.json")
|
|
485
|
+
// A live process standing in for a leftover managed child from a crashed daemon.
|
|
486
|
+
const leftover = spawn(process.execPath, ["-e", "setInterval(() => {}, 1000)"], {stdio: "ignore"})
|
|
487
|
+
|
|
488
|
+
await once(leftover, "spawn")
|
|
489
|
+
|
|
490
|
+
/** @type {{data: Record<string, import("../src/json.js").JsonValue>, message: string}[]} */
|
|
491
|
+
const logs = []
|
|
492
|
+
const config = normalizeConfig({
|
|
493
|
+
application: "rollbridge-test",
|
|
494
|
+
control: {path: path.join(dir, "rollbridge.sock")},
|
|
495
|
+
processes: [{command: "true", id: "web", policy: "proxied", port: {from: 0, to: 0}}],
|
|
496
|
+
proxy: {host: "127.0.0.1", port: 0},
|
|
497
|
+
statePath
|
|
498
|
+
})
|
|
499
|
+
const daemon = new RollbridgeDaemon({config, logger: (message, data = {}) => { logs.push({data, message}) }})
|
|
500
|
+
|
|
501
|
+
try {
|
|
502
|
+
// A prior daemon left a worker with this (still-alive) pid.
|
|
503
|
+
await writeState(statePath, {
|
|
504
|
+
activeReleaseId: "v1",
|
|
505
|
+
releases: [{processes: [{id: "worker", pid: leftover.pid}], releaseId: "v1"}],
|
|
506
|
+
services: [],
|
|
507
|
+
singletons: []
|
|
508
|
+
})
|
|
509
|
+
|
|
510
|
+
await daemon.reportOrphans()
|
|
511
|
+
|
|
512
|
+
assert.ok(logs.some((entry) => entry.message === "orphaned managed process detected" && entry.data.pid === leftover.pid), JSON.stringify(logs))
|
|
513
|
+
|
|
514
|
+
// A dead pid is not reported.
|
|
515
|
+
logs.length = 0
|
|
516
|
+
await writeState(statePath, {
|
|
517
|
+
activeReleaseId: "v1",
|
|
518
|
+
releases: [{processes: [{id: "worker", pid: 2147483646}], releaseId: "v1"}],
|
|
519
|
+
services: [],
|
|
520
|
+
singletons: []
|
|
521
|
+
})
|
|
522
|
+
await daemon.reportOrphans()
|
|
523
|
+
|
|
524
|
+
assert.ok(!logs.some((entry) => entry.message === "orphaned managed process detected"))
|
|
525
|
+
} finally {
|
|
526
|
+
leftover.kill("SIGKILL")
|
|
527
|
+
await fs.rm(dir, {force: true, recursive: true})
|
|
528
|
+
}
|
|
529
|
+
})
|
|
530
|
+
|
|
531
|
+
test("status surfaces still-alive orphaned processes from a previous daemon and drops them once gone", async () => {
|
|
532
|
+
const dir = await fs.mkdtemp(path.join(os.tmpdir(), "rollbridge-test-"))
|
|
533
|
+
const statePath = path.join(dir, "state.json")
|
|
534
|
+
const leftover = spawn(process.execPath, ["-e", "setInterval(() => {}, 1000)"], {stdio: "ignore"})
|
|
535
|
+
|
|
536
|
+
await once(leftover, "spawn")
|
|
537
|
+
|
|
538
|
+
const config = normalizeConfig({
|
|
539
|
+
application: "rollbridge-test",
|
|
540
|
+
control: {path: path.join(dir, "rollbridge.sock")},
|
|
541
|
+
processes: [{command: "true", id: "web", policy: "proxied", port: {from: 0, to: 0}}],
|
|
542
|
+
proxy: {host: "127.0.0.1", port: 0},
|
|
543
|
+
statePath
|
|
544
|
+
})
|
|
545
|
+
const daemon = new RollbridgeDaemon({config, logger: () => {}})
|
|
546
|
+
|
|
547
|
+
try {
|
|
548
|
+
// A prior daemon left a worker with this (still-alive) pid.
|
|
549
|
+
await writeState(statePath, {
|
|
550
|
+
activeReleaseId: "v1",
|
|
551
|
+
releases: [{processes: [{id: "worker", pid: leftover.pid}], releaseId: "v1"}],
|
|
552
|
+
services: [],
|
|
553
|
+
singletons: []
|
|
554
|
+
})
|
|
555
|
+
|
|
556
|
+
await daemon.reportOrphans()
|
|
557
|
+
|
|
558
|
+
// status reflects the still-running child even though the daemon cannot re-manage it.
|
|
559
|
+
assert.deepEqual(daemon.status().orphans, [{id: "worker", pid: leftover.pid, releaseId: "v1"}])
|
|
560
|
+
|
|
561
|
+
// Once the leftover is stopped, status re-checks liveness and drops it.
|
|
562
|
+
leftover.kill("SIGKILL")
|
|
563
|
+
await waitFor(() => daemon.status().orphans.length === 0)
|
|
564
|
+
assert.deepEqual(daemon.status().orphans, [])
|
|
565
|
+
|
|
566
|
+
// The dead entry is pruned from the underlying list, not merely filtered, so a recycled pid
|
|
567
|
+
// can't resurrect a cleared orphan.
|
|
568
|
+
assert.deepEqual(daemon.orphans, [])
|
|
569
|
+
} finally {
|
|
570
|
+
leftover.kill("SIGKILL")
|
|
571
|
+
await fs.rm(dir, {force: true, recursive: true})
|
|
572
|
+
}
|
|
573
|
+
})
|
|
574
|
+
|
|
575
|
+
test("the daemon records a structured event history served by the events command", async () => {
|
|
576
|
+
const fixture = await createFixture()
|
|
577
|
+
const daemon = await startDaemon(fixture.config)
|
|
578
|
+
|
|
579
|
+
try {
|
|
580
|
+
await daemon.deploy({releaseId: "v1", releasePath: fixture.root, revision: "v1"})
|
|
581
|
+
|
|
582
|
+
const response = await sendControlCommand({
|
|
583
|
+
command: {command: "events"},
|
|
584
|
+
path: fixture.config.control.path
|
|
585
|
+
})
|
|
586
|
+
const events = /** @type {import("../src/event-log.js").DaemonEvent[]} */ (response.events)
|
|
587
|
+
const messages = events.map((event) => event.message)
|
|
588
|
+
|
|
589
|
+
assert.ok(messages.includes("deploy starting"), JSON.stringify(messages))
|
|
590
|
+
assert.ok(messages.includes("traffic switched"), JSON.stringify(messages))
|
|
591
|
+
|
|
592
|
+
const switched = events.find((event) => event.message === "traffic switched")
|
|
593
|
+
|
|
594
|
+
assert.ok(switched)
|
|
595
|
+
assert.equal(switched.data.releaseId, "v1")
|
|
596
|
+
assert.match(switched.at, /^\d{4}-\d{2}-\d{2}T.*Z$/)
|
|
597
|
+
} finally {
|
|
598
|
+
await daemon.shutdown()
|
|
599
|
+
await fs.rm(fixture.root, {force: true, recursive: true})
|
|
600
|
+
}
|
|
601
|
+
})
|
|
602
|
+
|
|
603
|
+
test("the events command honors --limit and records failed commands", async () => {
|
|
604
|
+
const fixture = await createFixture()
|
|
605
|
+
const daemon = await startDaemon(fixture.config)
|
|
606
|
+
|
|
607
|
+
try {
|
|
608
|
+
await daemon.deploy({releaseId: "v1", releasePath: fixture.root, revision: "v1"})
|
|
609
|
+
|
|
610
|
+
// An unknown command is rejected and recorded as a "command failed" event.
|
|
611
|
+
await assert.rejects(() => sendControlCommand({
|
|
612
|
+
command: {command: "bogus"},
|
|
613
|
+
path: fixture.config.control.path
|
|
614
|
+
}))
|
|
615
|
+
|
|
616
|
+
const all = /** @type {import("../src/event-log.js").DaemonEvent[]} */ ((await sendControlCommand({
|
|
617
|
+
command: {command: "events"},
|
|
618
|
+
path: fixture.config.control.path
|
|
619
|
+
})).events)
|
|
620
|
+
|
|
621
|
+
assert.ok(all.some((event) => event.message === "command failed"))
|
|
622
|
+
|
|
623
|
+
const limited = /** @type {import("../src/event-log.js").DaemonEvent[]} */ ((await sendControlCommand({
|
|
624
|
+
command: {command: "events", limit: 1},
|
|
625
|
+
path: fixture.config.control.path
|
|
626
|
+
})).events)
|
|
627
|
+
|
|
628
|
+
assert.equal(limited.length, 1)
|
|
629
|
+
assert.deepEqual(limited[0], all[all.length - 1])
|
|
630
|
+
} finally {
|
|
631
|
+
await daemon.shutdown()
|
|
632
|
+
await fs.rm(fixture.root, {force: true, recursive: true})
|
|
633
|
+
}
|
|
634
|
+
})
|
|
635
|
+
|
|
636
|
+
test("a process over its memory limit is restarted with reason memory", {skip: process.platform !== "linux" && "requires /proc (Linux)"}, async () => {
|
|
637
|
+
const limitBytes = 64 * 1024 * 1024
|
|
638
|
+
const fixture = await createFixture({memoryLimitBytes: limitBytes})
|
|
639
|
+
const daemon = await startDaemon(fixture.config)
|
|
640
|
+
|
|
641
|
+
try {
|
|
642
|
+
await daemon.deploy({releaseId: "v1", releasePath: fixture.root, revision: "v1"})
|
|
643
|
+
|
|
644
|
+
// The hog allocates ~4x the limit, so the monitor restarts it.
|
|
645
|
+
await waitFor(() => (activeProcessStatus(daemon, "hog")?.memoryRestarts ?? 0) >= 1, 10000)
|
|
646
|
+
|
|
647
|
+
const hog = activeProcessStatus(daemon, "hog")
|
|
648
|
+
|
|
649
|
+
assert.ok(hog, "hog process should be present")
|
|
650
|
+
assert.ok(hog.memoryRestarts >= 1, `expected a memory restart, got ${hog.memoryRestarts}`)
|
|
651
|
+
assert.equal(hog.lastStartReason, "memory")
|
|
652
|
+
assert.equal(typeof hog.lastMemoryRestartAt, "string")
|
|
653
|
+
|
|
654
|
+
// rssBytes is sampled on the monitor's interval; wait for a measurement of the running process.
|
|
655
|
+
await waitFor(() => {
|
|
656
|
+
const rssBytes = activeProcessStatus(daemon, "hog")?.rssBytes
|
|
657
|
+
|
|
658
|
+
return typeof rssBytes === "number" && rssBytes > 0
|
|
659
|
+
}, 5000)
|
|
660
|
+
|
|
661
|
+
// The same monitor sample reports the process tree.
|
|
662
|
+
const monitored = activeProcessStatus(daemon, "hog")
|
|
663
|
+
|
|
664
|
+
assert.ok(monitored)
|
|
665
|
+
assert.ok(monitored.children.length >= 1, "status should include the process tree")
|
|
666
|
+
assert.ok(monitored.children.some((child) => typeof child.rssBytes === "number" && child.rssBytes > 0))
|
|
667
|
+
} finally {
|
|
668
|
+
await daemon.shutdown()
|
|
669
|
+
await fs.rm(fixture.root, {force: true, recursive: true})
|
|
670
|
+
}
|
|
671
|
+
})
|
|
672
|
+
|
|
673
|
+
test("rollback re-activates the previous release and switches traffic back", async () => {
|
|
674
|
+
const fixture = await createFixture()
|
|
675
|
+
const daemon = await startDaemon(fixture.config)
|
|
676
|
+
|
|
677
|
+
try {
|
|
678
|
+
await daemon.deploy({releaseId: "v1", releasePath: fixture.root, revision: "v1"})
|
|
679
|
+
await daemon.deploy({releaseId: "v2", releasePath: fixture.root, revision: "v2"})
|
|
680
|
+
|
|
681
|
+
assert.equal(await fetchText(daemon, "/release"), "v2")
|
|
682
|
+
|
|
683
|
+
const result = await daemon.rollback()
|
|
684
|
+
|
|
685
|
+
assert.equal(result.activeReleaseId, "v1")
|
|
686
|
+
assert.equal(result.previousReleaseId, "v2")
|
|
687
|
+
assert.equal(daemon.status().activeReleaseId, "v1")
|
|
688
|
+
assert.equal(await fetchText(daemon, "/release"), "v1")
|
|
689
|
+
} finally {
|
|
690
|
+
await daemon.shutdown()
|
|
691
|
+
await fs.rm(fixture.root, {force: true, recursive: true})
|
|
692
|
+
}
|
|
693
|
+
})
|
|
694
|
+
|
|
695
|
+
test("rollback --release-id targets a specific retained release", async () => {
|
|
696
|
+
const fixture = await createFixture()
|
|
697
|
+
const daemon = await startDaemon(fixture.config)
|
|
698
|
+
|
|
699
|
+
try {
|
|
700
|
+
await daemon.deploy({releaseId: "v1", releasePath: fixture.root, revision: "v1"})
|
|
701
|
+
await daemon.deploy({releaseId: "v2", releasePath: fixture.root, revision: "v2"})
|
|
702
|
+
await daemon.deploy({releaseId: "v3", releasePath: fixture.root, revision: "v3"})
|
|
703
|
+
|
|
704
|
+
const result = await daemon.rollback({releaseId: "v1"})
|
|
705
|
+
|
|
706
|
+
assert.equal(result.activeReleaseId, "v1")
|
|
707
|
+
assert.equal(await fetchText(daemon, "/release"), "v1")
|
|
708
|
+
} finally {
|
|
709
|
+
await daemon.shutdown()
|
|
710
|
+
await fs.rm(fixture.root, {force: true, recursive: true})
|
|
711
|
+
}
|
|
712
|
+
})
|
|
713
|
+
|
|
714
|
+
test("rollback rejects no-previous, unknown, and already-active targets", async () => {
|
|
715
|
+
const fixture = await createFixture()
|
|
716
|
+
const daemon = await startDaemon(fixture.config)
|
|
717
|
+
|
|
718
|
+
try {
|
|
719
|
+
await daemon.deploy({releaseId: "v1", releasePath: fixture.root, revision: "v1"})
|
|
720
|
+
|
|
721
|
+
await assert.rejects(() => daemon.rollback(), /No previous release/)
|
|
722
|
+
await assert.rejects(() => daemon.rollback({releaseId: "v1"}), /already active/)
|
|
723
|
+
await assert.rejects(() => daemon.rollback({releaseId: "nope"}), /No retained release "nope"/)
|
|
724
|
+
} finally {
|
|
725
|
+
await daemon.shutdown()
|
|
726
|
+
await fs.rm(fixture.root, {force: true, recursive: true})
|
|
727
|
+
}
|
|
728
|
+
})
|
|
729
|
+
|
|
730
|
+
test("rollback to a still-draining release stops the old instance instead of orphaning it", async () => {
|
|
731
|
+
const fixture = await createFixture()
|
|
732
|
+
const daemon = await startDaemon(fixture.config)
|
|
733
|
+
/** @type {WebSocket | undefined} */
|
|
734
|
+
let socket
|
|
735
|
+
|
|
736
|
+
try {
|
|
737
|
+
await daemon.deploy({releaseId: "v1", releasePath: fixture.root, revision: "v1"})
|
|
738
|
+
|
|
739
|
+
// An open WebSocket keeps v1's connection count > 0, so it stays draining after v2.
|
|
740
|
+
socket = await openWebSocket(daemon)
|
|
741
|
+
await daemon.deploy({releaseId: "v2", releasePath: fixture.root, revision: "v2"})
|
|
742
|
+
|
|
743
|
+
const draining = statusRelease(daemon, "v1")
|
|
744
|
+
|
|
745
|
+
assert.equal(draining.state, "draining")
|
|
746
|
+
|
|
747
|
+
const oldWebPid = draining.processes.find((processStatus) => processStatus.id === "web")?.pid
|
|
748
|
+
|
|
749
|
+
assert.ok(oldWebPid, "the draining release should have a running web process")
|
|
750
|
+
|
|
751
|
+
await daemon.rollback({releaseId: "v1"})
|
|
752
|
+
|
|
753
|
+
assert.equal(daemon.status().activeReleaseId, "v1")
|
|
754
|
+
// The old draining instance was stopped before its id was reused, so its process is gone.
|
|
755
|
+
assert.throws(() => process.kill(/** @type {number} */ (oldWebPid), 0), /ESRCH/)
|
|
756
|
+
} finally {
|
|
757
|
+
if (socket) socket.close()
|
|
758
|
+
await daemon.shutdown()
|
|
759
|
+
await fs.rm(fixture.root, {force: true, recursive: true})
|
|
760
|
+
}
|
|
761
|
+
})
|
|
762
|
+
|
|
763
|
+
test("the rollback control command switches traffic over the socket", async () => {
|
|
764
|
+
const fixture = await createFixture()
|
|
765
|
+
const daemon = await startDaemon(fixture.config)
|
|
766
|
+
|
|
767
|
+
try {
|
|
768
|
+
await daemon.deploy({releaseId: "v1", releasePath: fixture.root, revision: "v1"})
|
|
769
|
+
await daemon.deploy({releaseId: "v2", releasePath: fixture.root, revision: "v2"})
|
|
770
|
+
|
|
771
|
+
const response = await sendControlCommand({
|
|
772
|
+
command: {command: "rollback"},
|
|
773
|
+
path: fixture.config.control.path
|
|
774
|
+
})
|
|
775
|
+
|
|
776
|
+
assert.equal(response.activeReleaseId, "v1")
|
|
777
|
+
assert.equal(await fetchText(daemon, "/release"), "v1")
|
|
778
|
+
} finally {
|
|
779
|
+
await daemon.shutdown()
|
|
780
|
+
await fs.rm(fixture.root, {force: true, recursive: true})
|
|
781
|
+
}
|
|
782
|
+
})
|
|
783
|
+
|
|
339
784
|
test("control socket accepts deploy and status commands", async () => {
|
|
340
785
|
const fixture = await createFixture()
|
|
341
786
|
const daemon = await startDaemon(fixture.config)
|
|
@@ -394,6 +839,34 @@ test("starting a second daemon on a live control socket reports the running daem
|
|
|
394
839
|
}
|
|
395
840
|
})
|
|
396
841
|
|
|
842
|
+
test("the daemon applies control.owner and control.group to the bound socket", {skip: process.platform !== "linux" && "requires POSIX chown"}, async () => {
|
|
843
|
+
const root = await fs.mkdtemp(path.join(os.tmpdir(), "rollbridge-test-"))
|
|
844
|
+
const socketPath = path.join(root, "rollbridge.sock")
|
|
845
|
+
const {uid, username} = os.userInfo()
|
|
846
|
+
const gid = process.getgid?.() ?? 0
|
|
847
|
+
const config = normalizeConfig({
|
|
848
|
+
application: "rollbridge-test",
|
|
849
|
+
// owner by name (resolved to the current uid); group by numeric id. Both are the current
|
|
850
|
+
// user's, so a non-root daemon can chown the socket to itself.
|
|
851
|
+
control: {group: gid, owner: username, path: socketPath},
|
|
852
|
+
processes: [{command: "true", id: "web", policy: "proxied", port: {from: 0, to: 0}}],
|
|
853
|
+
proxy: {host: "127.0.0.1", port: 0}
|
|
854
|
+
})
|
|
855
|
+
const daemon = new RollbridgeDaemon({config, logger: () => {}})
|
|
856
|
+
|
|
857
|
+
try {
|
|
858
|
+
await daemon.start()
|
|
859
|
+
|
|
860
|
+
const stats = await fs.stat(socketPath)
|
|
861
|
+
|
|
862
|
+
assert.equal(stats.uid, uid)
|
|
863
|
+
assert.equal(stats.gid, gid)
|
|
864
|
+
} finally {
|
|
865
|
+
await daemon.shutdown()
|
|
866
|
+
await fs.rm(root, {force: true, recursive: true})
|
|
867
|
+
}
|
|
868
|
+
})
|
|
869
|
+
|
|
397
870
|
test("a control socket held by a non-Rollbridge process reports a generic conflict", async () => {
|
|
398
871
|
const root = await fs.mkdtemp(path.join(os.tmpdir(), "rollbridge-test-"))
|
|
399
872
|
const socketPath = path.join(root, "busy.sock")
|
|
@@ -499,13 +972,14 @@ test("deploy can ensure the daemon before sending the release command", async ()
|
|
|
499
972
|
})
|
|
500
973
|
|
|
501
974
|
/**
|
|
502
|
-
* @param {{includeCompanion?: boolean, includeService?: boolean, includeSingleton?: boolean, proxyHost?: string, singletonCwd?: string, webCommand?: string, webDependsOnService?: boolean, webHealthTimeoutMs?: number}} [options] - Fixture options.
|
|
503
|
-
* @returns {Promise<{config: import("../src/config.js").RollbridgeConfig, root: string, serviceLogPath: string, singletonLogPath: string}>} Fixture data.
|
|
975
|
+
* @param {{companionReplicas?: number, includeCompanion?: boolean, includeService?: boolean, includeSingleton?: boolean, memoryLimitBytes?: number, nonBlockingDrainWorker?: boolean, persistState?: boolean, proxyHost?: string, singletonCwd?: string, webCommand?: string, webDependsOnService?: boolean, webHealthTimeoutMs?: number}} [options] - Fixture options.
|
|
976
|
+
* @returns {Promise<{config: import("../src/config.js").RollbridgeConfig, root: string, serviceLogPath: string, singletonLogPath: string, statePath: string}>} Fixture data.
|
|
504
977
|
*/
|
|
505
978
|
async function createFixture(options = {}) {
|
|
506
979
|
const root = await fs.mkdtemp(path.join(os.tmpdir(), "rollbridge-test-"))
|
|
507
980
|
const serviceLogPath = path.join(root, "service.log")
|
|
508
981
|
const singletonLogPath = path.join(root, "singleton.log")
|
|
982
|
+
const statePath = path.join(root, "rollbridge.state.json")
|
|
509
983
|
/** @type {Array<Record<string, import("../src/json.js").JsonValue>>} */
|
|
510
984
|
const processes = []
|
|
511
985
|
|
|
@@ -530,6 +1004,36 @@ async function createFixture(options = {}) {
|
|
|
530
1004
|
})
|
|
531
1005
|
}
|
|
532
1006
|
|
|
1007
|
+
if (options.companionReplicas) {
|
|
1008
|
+
processes.push({
|
|
1009
|
+
command: `${JSON.stringify(process.execPath)} -e ${JSON.stringify("setInterval(() => {}, 1000)")}`,
|
|
1010
|
+
id: "worker",
|
|
1011
|
+
policy: "companion",
|
|
1012
|
+
replicas: options.companionReplicas
|
|
1013
|
+
})
|
|
1014
|
+
}
|
|
1015
|
+
|
|
1016
|
+
if (options.nonBlockingDrainWorker) {
|
|
1017
|
+
processes.push({
|
|
1018
|
+
command: `${JSON.stringify(process.execPath)} -e ${JSON.stringify("setInterval(() => {}, 1000)")}`,
|
|
1019
|
+
id: "worker",
|
|
1020
|
+
nonBlockingDrain: true,
|
|
1021
|
+
policy: "companion"
|
|
1022
|
+
})
|
|
1023
|
+
}
|
|
1024
|
+
|
|
1025
|
+
if (options.memoryLimitBytes) {
|
|
1026
|
+
processes.push({
|
|
1027
|
+
command: `${JSON.stringify(process.execPath)} ${JSON.stringify(memoryHogPath)}`,
|
|
1028
|
+
env: {
|
|
1029
|
+
ROLLBRIDGE_HOG_BYTES: String(options.memoryLimitBytes * 4)
|
|
1030
|
+
},
|
|
1031
|
+
id: "hog",
|
|
1032
|
+
memory: {checkIntervalMs: 100, limitBytes: options.memoryLimitBytes, warnBytes: 0},
|
|
1033
|
+
policy: "companion"
|
|
1034
|
+
})
|
|
1035
|
+
}
|
|
1036
|
+
|
|
533
1037
|
processes.push({
|
|
534
1038
|
command: options.webCommand || (options.webDependsOnService
|
|
535
1039
|
? `${JSON.stringify(process.execPath)} ${JSON.stringify(dependentAppPath)}`
|
|
@@ -569,10 +1073,11 @@ async function createFixture(options = {}) {
|
|
|
569
1073
|
healthTimeoutMs: 3000,
|
|
570
1074
|
host: options.proxyHost || "127.0.0.1",
|
|
571
1075
|
port: 0
|
|
572
|
-
}
|
|
1076
|
+
},
|
|
1077
|
+
...(options.persistState ? {statePath} : {})
|
|
573
1078
|
})
|
|
574
1079
|
|
|
575
|
-
return {config, root, serviceLogPath, singletonLogPath}
|
|
1080
|
+
return {config, root, serviceLogPath, singletonLogPath, statePath}
|
|
576
1081
|
}
|
|
577
1082
|
|
|
578
1083
|
/**
|
|
@@ -697,10 +1202,11 @@ async function writeConfigFile(config, root) {
|
|
|
697
1202
|
|
|
698
1203
|
/**
|
|
699
1204
|
* @param {() => Promise<boolean> | boolean} callback - Probe callback.
|
|
1205
|
+
* @param {number} [timeoutMs] - How long to wait before giving up (default 3000).
|
|
700
1206
|
* @returns {Promise<void>} Resolves when callback returns true.
|
|
701
1207
|
*/
|
|
702
|
-
async function waitFor(callback) {
|
|
703
|
-
const deadline = Date.now() +
|
|
1208
|
+
async function waitFor(callback, timeoutMs = 3000) {
|
|
1209
|
+
const deadline = Date.now() + timeoutMs
|
|
704
1210
|
|
|
705
1211
|
while (Date.now() < deadline) {
|
|
706
1212
|
if (await callback()) return
|
|
@@ -709,3 +1215,14 @@ async function waitFor(callback) {
|
|
|
709
1215
|
|
|
710
1216
|
throw new Error("Timed out waiting for condition")
|
|
711
1217
|
}
|
|
1218
|
+
|
|
1219
|
+
/**
|
|
1220
|
+
* @param {RollbridgeDaemon} daemon - Daemon.
|
|
1221
|
+
* @param {string} processId - Process id within the active release.
|
|
1222
|
+
* @returns {import("../src/managed-process.js").ManagedProcessStatus | undefined} The process status, if present.
|
|
1223
|
+
*/
|
|
1224
|
+
function activeProcessStatus(daemon, processId) {
|
|
1225
|
+
const release = daemon.status().releases.find((candidate) => candidate.state === "active")
|
|
1226
|
+
|
|
1227
|
+
return release ? release.processes.find((processStatus) => processStatus.id === processId) : undefined
|
|
1228
|
+
}
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
// @ts-check
|
|
2
|
+
|
|
3
|
+
import assert from "node:assert/strict"
|
|
4
|
+
import fs from "node:fs/promises"
|
|
5
|
+
import os from "node:os"
|
|
6
|
+
import path from "node:path"
|
|
7
|
+
import test from "node:test"
|
|
8
|
+
import {clearState, readState, writeState} from "../src/state-store.js"
|
|
9
|
+
|
|
10
|
+
test("writeState then readState round-trips a snapshot", async () => {
|
|
11
|
+
const dir = await fs.mkdtemp(path.join(os.tmpdir(), "rollbridge-state-"))
|
|
12
|
+
const statePath = path.join(dir, "state.json")
|
|
13
|
+
|
|
14
|
+
try {
|
|
15
|
+
await writeState(statePath, {activeReleaseId: "v1", releases: [{releaseId: "v1"}]})
|
|
16
|
+
|
|
17
|
+
const state = /** @type {{activeReleaseId: string}} */ (await readState(statePath))
|
|
18
|
+
|
|
19
|
+
assert.equal(state.activeReleaseId, "v1")
|
|
20
|
+
} finally {
|
|
21
|
+
await fs.rm(dir, {force: true, recursive: true})
|
|
22
|
+
}
|
|
23
|
+
})
|
|
24
|
+
|
|
25
|
+
test("readState returns undefined for a missing or unparseable file", async () => {
|
|
26
|
+
const dir = await fs.mkdtemp(path.join(os.tmpdir(), "rollbridge-state-"))
|
|
27
|
+
const statePath = path.join(dir, "state.json")
|
|
28
|
+
|
|
29
|
+
try {
|
|
30
|
+
assert.equal(await readState(statePath), undefined)
|
|
31
|
+
|
|
32
|
+
await fs.writeFile(statePath, "{not json")
|
|
33
|
+
|
|
34
|
+
assert.equal(await readState(statePath), undefined)
|
|
35
|
+
} finally {
|
|
36
|
+
await fs.rm(dir, {force: true, recursive: true})
|
|
37
|
+
}
|
|
38
|
+
})
|
|
39
|
+
|
|
40
|
+
test("concurrent writes leave a complete, uncorrupted snapshot", async () => {
|
|
41
|
+
const dir = await fs.mkdtemp(path.join(os.tmpdir(), "rollbridge-state-"))
|
|
42
|
+
const statePath = path.join(dir, "state.json")
|
|
43
|
+
|
|
44
|
+
try {
|
|
45
|
+
await Promise.all([writeState(statePath, {n: 1}), writeState(statePath, {n: 2}), writeState(statePath, {n: 3})])
|
|
46
|
+
|
|
47
|
+
const state = /** @type {{n: number}} */ (await readState(statePath))
|
|
48
|
+
|
|
49
|
+
// A complete snapshot from one of the writers — never a partial/corrupt file or a temp race.
|
|
50
|
+
assert.ok(state && typeof state.n === "number")
|
|
51
|
+
} finally {
|
|
52
|
+
await fs.rm(dir, {force: true, recursive: true})
|
|
53
|
+
}
|
|
54
|
+
})
|
|
55
|
+
|
|
56
|
+
test("clearState removes the file and ignores a missing one", async () => {
|
|
57
|
+
const dir = await fs.mkdtemp(path.join(os.tmpdir(), "rollbridge-state-"))
|
|
58
|
+
const statePath = path.join(dir, "state.json")
|
|
59
|
+
|
|
60
|
+
try {
|
|
61
|
+
await writeState(statePath, {ok: true})
|
|
62
|
+
await clearState(statePath)
|
|
63
|
+
|
|
64
|
+
assert.equal(await readState(statePath), undefined)
|
|
65
|
+
await clearState(statePath)
|
|
66
|
+
} finally {
|
|
67
|
+
await fs.rm(dir, {force: true, recursive: true})
|
|
68
|
+
}
|
|
69
|
+
})
|