rollbridge 0.1.4 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/daemon.js CHANGED
@@ -4,13 +4,19 @@ import fs from "node:fs/promises"
4
4
  import http from "node:http"
5
5
  import net from "node:net"
6
6
  import httpProxy from "http-proxy"
7
+ import EventLog from "./event-log.js"
7
8
  import ReleaseGroup from "./release-group.js"
9
+ import {clearState, isProcessAlive, liveProcesses, readState, writeState} from "./state-store.js"
10
+ import {resolveGroupId, resolveUserId} from "./system-ids.js"
11
+
12
+ const EVENT_HISTORY_LIMIT = 1000
13
+ const STATE_PERSIST_INTERVAL_MS = 5000
8
14
 
9
15
  /**
10
16
  * @typedef {import("./json.js").JsonValue} JsonValue
11
17
  * @typedef {{releaseId?: string, releasePath: string, revision?: string}} DeployArgs
12
18
  * @typedef {{id: string, process: import("./managed-process.js").ManagedProcessStatus}} ProcessStatus
13
- * @typedef {{activeReleaseId: string | null, application: string, control: import("./config.js").ControlConfig, proxy: {host: string, port: number | undefined, upstreamHost: string}, releases: import("./release-group.js").ReleaseStatus[], services: ProcessStatus[], singletons: ProcessStatus[]}} DaemonStatus
19
+ * @typedef {{activeReleaseId: string | null, application: string, control: import("./config.js").ControlConfig, orphans: {id: string, pid: number, releaseId: string | null}[], proxy: {host: string, port: number | undefined, upstreamHost: string}, releases: import("./release-group.js").ReleaseStatus[], services: ProcessStatus[], singletons: ProcessStatus[]}} DaemonStatus
14
20
  */
15
21
 
16
22
  export default class RollbridgeDaemon {
@@ -21,7 +27,18 @@ export default class RollbridgeDaemon {
21
27
  */
22
28
  constructor({config, logger}) {
23
29
  this.config = config
24
- this.logger = logger || ((message, data = {}) => console.log(JSON.stringify({at: new Date().toISOString(), data, message})))
30
+ this.eventLog = new EventLog(EVENT_HISTORY_LIMIT)
31
+
32
+ const baseLogger = logger || ((message, data = {}) => console.log(JSON.stringify({at: new Date().toISOString(), data, message})))
33
+
34
+ // Every operational milestone is logged through this.logger, so recording here
35
+ // gives a structured event history for free (deploys, switches, stops, crashes,
36
+ // restarts, and failed commands).
37
+ this.logger = /** @type {(message: string, data?: Record<string, JsonValue>) => void} */ ((message, data = {}) => {
38
+ this.eventLog.record(message, data)
39
+ baseLogger(message, data)
40
+ })
41
+
25
42
  this.releases = /** @type {Map<string, ReleaseGroup>} */ (new Map())
26
43
  this.services = /** @type {Map<string, import("./managed-process.js").default>} */ (new Map())
27
44
  this.servicePorts = /** @type {Record<string, number>} */ ({})
@@ -32,14 +49,22 @@ export default class RollbridgeDaemon {
32
49
  this.controlServer = /** @type {net.Server | undefined} */ (undefined)
33
50
  this.proxyPort = /** @type {number | undefined} */ (undefined)
34
51
  this.stopping = false
52
+ this.statePath = config.statePath
53
+ this.persistTimer = /** @type {ReturnType<typeof setInterval> | undefined} */ (undefined)
54
+ this.pendingWrite = /** @type {Promise<void> | undefined} */ (undefined)
55
+ // Still-alive managed processes left by a previous daemon (from statePath), captured at
56
+ // startup and surfaced in status(). The daemon cannot re-manage them, only report them.
57
+ this.orphans = /** @type {{id: string, pid: number, releaseId: string | null}[]} */ ([])
35
58
 
36
59
  this.proxy.on("error", (error, req, res) => this.onProxyError(error, req, res))
37
60
  }
38
61
 
39
62
  /** @returns {Promise<void>} Starts proxy and control listeners. */
40
63
  async start() {
64
+ await this.reportOrphans()
41
65
  await this.startProxy()
42
66
  await this.startControlServer()
67
+ this.startStatePersistence()
43
68
  }
44
69
 
45
70
  /** @returns {Promise<void>} Starts the stable local proxy. */
@@ -78,6 +103,30 @@ export default class RollbridgeDaemon {
78
103
  if (this.config.control.mode !== undefined) {
79
104
  await fs.chmod(this.config.control.path, this.config.control.mode)
80
105
  }
106
+
107
+ await this.applyControlSocketOwnership()
108
+ }
109
+
110
+ /**
111
+ * Applies control.owner/control.group to the bound socket via chown, resolving names to ids.
112
+ * @returns {Promise<void>} Resolves once ownership is applied (no-op when neither is set).
113
+ */
114
+ async applyControlSocketOwnership() {
115
+ const {group, owner, path: socketPath} = this.config.control
116
+
117
+ if (owner === undefined && group === undefined) return
118
+
119
+ // -1 leaves the uid/gid unchanged (POSIX chown semantics).
120
+ const uid = owner === undefined ? -1 : resolveUserId(owner)
121
+ const gid = group === undefined ? -1 : resolveGroupId(group)
122
+
123
+ try {
124
+ await fs.chown(socketPath, uid, gid)
125
+ } catch (error) {
126
+ const reason = error instanceof Error ? error.message : String(error)
127
+
128
+ throw new Error(`Could not set control socket owner/group on ${socketPath}: ${reason}. Run the daemon as a user allowed to chown it (for example root, or a member of the target group).`, {cause: error})
129
+ }
81
130
  }
82
131
 
83
132
  /** @returns {Promise<void>} Removes a stale Unix socket before binding, or fails clearly when a daemon is alive. */
@@ -195,6 +244,7 @@ export default class RollbridgeDaemon {
195
244
  this.executeControlLine(line)
196
245
  .then((response) => socket.write(`${JSON.stringify({status: "success", ...response})}\n`))
197
246
  .catch((error) => {
247
+ this.logger("command failed", {error: error instanceof Error ? error.message : String(error)})
198
248
  socket.write(`${JSON.stringify({
199
249
  error: error instanceof Error ? error.message : String(error),
200
250
  status: "error"
@@ -228,11 +278,26 @@ export default class RollbridgeDaemon {
228
278
  return this.status()
229
279
  }
230
280
 
281
+ if (commandName === "events") {
282
+ return {events: this.eventLog.recent(typeof data.limit === "number" ? data.limit : undefined)}
283
+ }
284
+
231
285
  if (commandName === "stop") {
232
286
  await this.stopRelease(stringOrUndefined(data.releaseId))
233
287
  return this.status()
234
288
  }
235
289
 
290
+ if (commandName === "restart") {
291
+ return await this.restartProcesses({
292
+ policy: stringOrUndefined(data.policy),
293
+ processId: stringOrUndefined(data.processId)
294
+ })
295
+ }
296
+
297
+ if (commandName === "rollback") {
298
+ return await this.rollback({releaseId: stringOrUndefined(data.releaseId)})
299
+ }
300
+
236
301
  if (commandName === "shutdown") {
237
302
  setImmediate(() => {
238
303
  this.shutdown().catch((error) => {
@@ -271,6 +336,7 @@ export default class RollbridgeDaemon {
271
336
  await this.ensureServices(release, startedServices)
272
337
  await release.start()
273
338
  } catch (error) {
339
+ this.logger("deploy failed", {error: error instanceof Error ? error.message : String(error), releaseId: newReleaseId})
274
340
  await this.stopStartedServices(startedServices)
275
341
  throw error
276
342
  }
@@ -289,12 +355,60 @@ export default class RollbridgeDaemon {
289
355
  void this.drainAndPrune(previousRelease)
290
356
  }
291
357
 
358
+ this.persistState()
359
+
292
360
  return {
293
361
  activeReleaseId: release.releaseId,
294
362
  previousReleaseId: previousRelease ? previousRelease.releaseId : null
295
363
  }
296
364
  }
297
365
 
366
+ /**
367
+ * Rolls back to a previously-active release by re-running the deploy flow on its
368
+ * retained metadata: it re-starts the target release, health-checks it, switches
369
+ * traffic, replaces singletons, and drains the current release — just like a deploy,
370
+ * so a failed rollback leaves the current release active.
371
+ * @param {{releaseId?: string}} [args] - Target release id; defaults to the most recently retired release.
372
+ * @returns {Promise<Record<string, JsonValue>>} The rollback result.
373
+ */
374
+ async rollback({releaseId} = {}) {
375
+ const target = releaseId ? this.releases.get(releaseId) : this.previousRelease()
376
+
377
+ if (!target) {
378
+ throw new Error(releaseId ? `No retained release "${releaseId}" to roll back to.` : "No previous release to roll back to.")
379
+ }
380
+
381
+ if (target === this.activeRelease) {
382
+ throw new Error(`Release "${target.releaseId}" is already active.`)
383
+ }
384
+
385
+ // The target may still be draining a prior deploy (live processes). Stop it before the
386
+ // deploy below re-uses its id in this.releases, otherwise the still-running instance
387
+ // would be dropped from status/pruning/shutdown and could be orphaned.
388
+ if (target.state !== "stopped" && target.state !== "failed") {
389
+ await target.stop()
390
+ }
391
+
392
+ this.logger("rollback starting", {releaseId: target.releaseId, releasePath: target.releasePath})
393
+
394
+ return await this.deploy({releaseId: target.releaseId, releasePath: target.releasePath, revision: target.revision})
395
+ }
396
+
397
+ /**
398
+ * @returns {ReleaseGroup | undefined} The most recently active release other than the current one, if any.
399
+ */
400
+ previousRelease() {
401
+ /** @type {ReleaseGroup | undefined} */
402
+ let previous
403
+
404
+ for (const release of this.releases.values()) {
405
+ if (release === this.activeRelease || !release.activatedAt) continue
406
+ if (!previous || Date.parse(release.activatedAt) >= Date.parse(/** @type {string} */ (previous.activatedAt))) previous = release
407
+ }
408
+
409
+ return previous
410
+ }
411
+
298
412
  /**
299
413
  * Starts missing daemon-wide services before release-owned processes need them.
300
414
  * @param {ReleaseGroup} release - Release providing templates and ports.
@@ -317,7 +431,7 @@ export default class RollbridgeDaemon {
317
431
  }
318
432
 
319
433
  try {
320
- await service.start()
434
+ await service.start("deploy")
321
435
  startedServices.push(processConfig.id)
322
436
  } catch (error) {
323
437
  this.services.delete(processConfig.id)
@@ -363,10 +477,14 @@ export default class RollbridgeDaemon {
363
477
  command: nextDefinition.command,
364
478
  cwd: nextDefinition.cwd,
365
479
  env: nextDefinition.env,
480
+ lifecycle: nextDefinition.lifecycle,
366
481
  logger: nextDefinition.logger,
482
+ memory: nextDefinition.memory,
367
483
  outputLines: nextDefinition.outputLines,
484
+ restart: nextDefinition.restart,
368
485
  restartDelayMs: nextDefinition.restartDelayMs,
369
486
  shouldRestart: nextDefinition.shouldRestart,
487
+ stopSignal: nextDefinition.stopSignal,
370
488
  stopTimeoutMs: nextDefinition.stopTimeoutMs
371
489
  })
372
490
  }
@@ -390,10 +508,90 @@ export default class RollbridgeDaemon {
390
508
  const singleton = release.buildProcess(processConfig)
391
509
 
392
510
  this.singletons.set(processConfig.id, singleton)
393
- await singleton.start()
511
+ await singleton.start("deploy")
394
512
  }
395
513
  }
396
514
 
515
+ /**
516
+ * Restarts non-proxied processes selected by id or policy, or all of them: running
517
+ * processes are bounced (stop then start) and crashed or stopped ones are revived,
518
+ * matching the conventional meaning of "restart".
519
+ *
520
+ * The proxied process is never restarted in place (that would drop traffic); use a
521
+ * deploy for a zero-downtime replacement.
522
+ * @param {{policy?: string, processId?: string}} selector - Restart selector; restarts all non-proxied processes when both are omitted.
523
+ * @returns {Promise<Record<string, JsonValue>>} The ids that were restarted.
524
+ */
525
+ async restartProcesses({policy, processId} = {}) {
526
+ if (policy === "proxied" || (processId !== undefined && this.isProxiedId(processId))) {
527
+ throw new Error('The proxied process cannot be restarted in place; use "rollbridge deploy" for a zero-downtime replacement.')
528
+ }
529
+
530
+ const targets = this.collectRestartTargets({policy, processId})
531
+
532
+ if (processId !== undefined && targets.length === 0) {
533
+ throw new Error(`No managed process with id "${processId}" to restart.`)
534
+ }
535
+
536
+ for (const target of targets) {
537
+ this.logger("process restart requested", {processId: target.id})
538
+ await target.process.stop()
539
+ await target.process.start("manual")
540
+ }
541
+
542
+ return {restarted: targets.map((target) => target.id)}
543
+ }
544
+
545
+ /**
546
+ * @param {{policy?: string, processId?: string}} selector - Restart selector.
547
+ * @returns {{id: string, process: import("./managed-process.js").default}[]} Running non-proxied processes matching the selector.
548
+ */
549
+ collectRestartTargets({policy, processId}) {
550
+ const targets = /** @type {{id: string, process: import("./managed-process.js").default}[]} */ ([])
551
+
552
+ for (const processConfig of this.config.processes) {
553
+ if (processConfig.policy === "proxied") continue
554
+ if (policy !== undefined && processConfig.policy !== policy) continue
555
+
556
+ for (const instance of this.runningInstances(processConfig)) {
557
+ // A processId selector matches the base config id (all replicas) or one replica's id.
558
+ if (processId !== undefined && processId !== processConfig.id && processId !== instance.id) continue
559
+
560
+ targets.push(instance)
561
+ }
562
+ }
563
+
564
+ return targets
565
+ }
566
+
567
+ /**
568
+ * @param {import("./config.js").ProcessConfig} processConfig - Process definition.
569
+ * @returns {{id: string, process: import("./managed-process.js").default}[]} Running instances (replicas) for this config.
570
+ */
571
+ runningInstances(processConfig) {
572
+ if (processConfig.policy === "service") {
573
+ const service = this.services.get(processConfig.id)
574
+
575
+ return service ? [{id: processConfig.id, process: service}] : []
576
+ }
577
+
578
+ if (processConfig.policy === "singleton") {
579
+ const singleton = this.singletons.get(processConfig.id)
580
+
581
+ return singleton ? [{id: processConfig.id, process: singleton}] : []
582
+ }
583
+
584
+ return this.activeRelease ? this.activeRelease.getProcesses(processConfig.id) : []
585
+ }
586
+
587
+ /**
588
+ * @param {string} id - Process id.
589
+ * @returns {boolean} True when the id belongs to the proxied process.
590
+ */
591
+ isProxiedId(id) {
592
+ return this.config.processes.some((processConfig) => processConfig.policy === "proxied" && processConfig.id === id)
593
+ }
594
+
397
595
  /**
398
596
  * @param {string | undefined} releaseId - Release id, or active release when omitted.
399
597
  * @returns {Promise<void>} Resolves when stopped.
@@ -405,7 +603,9 @@ export default class RollbridgeDaemon {
405
603
  if (release === this.activeRelease) this.activeRelease = undefined
406
604
 
407
605
  await release.stop()
606
+ this.logger("release stopped", {releaseId: release.releaseId})
408
607
  this.pruneStoppedReleases()
608
+ this.persistState()
409
609
  }
410
610
 
411
611
  /**
@@ -416,10 +616,12 @@ export default class RollbridgeDaemon {
416
616
  async drainAndPrune(release) {
417
617
  try {
418
618
  await release.drainAndStop(this.config.proxy.drainTimeoutMs)
619
+ this.logger("release drained", {releaseId: release.releaseId})
419
620
  } catch (error) {
420
621
  this.logger("release drain failed", {error: error instanceof Error ? error.message : String(error), releaseId: release.releaseId})
421
622
  } finally {
422
623
  this.pruneStoppedReleases()
624
+ this.persistState()
423
625
  }
424
626
  }
425
627
 
@@ -432,11 +634,75 @@ export default class RollbridgeDaemon {
432
634
  }
433
635
  }
434
636
 
637
+ /** @returns {void} Starts periodic state persistence when statePath is configured. */
638
+ startStatePersistence() {
639
+ if (!this.statePath) return
640
+
641
+ this.persistState()
642
+ this.persistTimer = setInterval(() => this.persistState(), STATE_PERSIST_INTERVAL_MS)
643
+ this.persistTimer.unref?.()
644
+ }
645
+
646
+ /**
647
+ * Persists a state snapshot (status plus recent events) to statePath, atomically and
648
+ * fire-and-forget. A failed write is logged but never blocks daemon operation.
649
+ * @returns {void}
650
+ */
651
+ persistState() {
652
+ if (!this.statePath || this.stopping) return
653
+
654
+ const statePath = this.statePath
655
+ // Drop the orphans view from the snapshot: it reflects a *previous* daemon's leftovers, not
656
+ // this daemon's own managed state, and is recomputed from the persisted processes on restart.
657
+ const {orphans: _orphans, ...status} = this.status()
658
+ const snapshot = {...status, events: this.eventLog.recent(), persistedAt: new Date().toISOString()}
659
+
660
+ // Serialize writes (and track the tail) so shutdown can wait for an in-flight write before
661
+ // clearing the file — otherwise a write started before shutdown could recreate it afterward.
662
+ this.pendingWrite = Promise.resolve(this.pendingWrite)
663
+ .catch(() => {})
664
+ .then(() => writeState(statePath, snapshot))
665
+ .catch((error) => {
666
+ this.logger("state persist failed", {error: error instanceof Error ? error.message : String(error)})
667
+ })
668
+ }
669
+
670
+ /**
671
+ * On startup, reads any state left by a previous daemon and reports managed processes whose
672
+ * pids are still alive — likely orphans from a daemon that did not shut down cleanly. This is
673
+ * advisory (Rollbridge cannot re-adopt detached children); the operator stops the leftovers.
674
+ * A recycled pid could be a false positive, so reports are a prompt to investigate.
675
+ * @returns {Promise<void>} Resolves once orphans are reported.
676
+ */
677
+ async reportOrphans() {
678
+ if (!this.statePath) return
679
+
680
+ const orphans = liveProcesses(await readState(this.statePath))
681
+
682
+ // Keep them for status() so `rollbridge status` reflects still-running children after a
683
+ // restart, not just the startup log below.
684
+ this.orphans = orphans
685
+
686
+ for (const orphan of orphans) {
687
+ this.logger("orphaned managed process detected", {pid: orphan.pid, processId: orphan.id, releaseId: orphan.releaseId})
688
+ }
689
+
690
+ if (orphans.length > 0) {
691
+ this.logger("orphaned processes from a previous daemon", {count: orphans.length, hint: "a previous daemon did not shut down cleanly; verify these pids and stop any leftovers"})
692
+ }
693
+ }
694
+
435
695
  /** @returns {Promise<void>} Stops proxy, control socket, and child processes. */
436
696
  async shutdown() {
437
697
  if (this.stopping) return
438
698
 
439
699
  this.stopping = true
700
+
701
+ if (this.persistTimer) {
702
+ clearInterval(this.persistTimer)
703
+ this.persistTimer = undefined
704
+ }
705
+
440
706
  this.proxy.close()
441
707
  await Promise.allSettled([...this.services.values()].map((processInstance) => processInstance.stop()))
442
708
  await Promise.allSettled([...this.singletons.values()].map((processInstance) => processInstance.stop()))
@@ -444,6 +710,14 @@ export default class RollbridgeDaemon {
444
710
  await this.closeServer(this.proxyServer)
445
711
  await this.closeServer(this.controlServer)
446
712
  await fs.rm(this.config.control.path, {force: true})
713
+
714
+ // A clean shutdown leaves no orphans, so remove the state file rather than leaving stale
715
+ // pids. Wait for any in-flight write first so it can't recreate the file afterward (no new
716
+ // writes start: stopping is set and the persist timer is cleared above).
717
+ if (this.statePath) {
718
+ if (this.pendingWrite) await this.pendingWrite
719
+ await clearState(this.statePath)
720
+ }
447
721
  }
448
722
 
449
723
  /**
@@ -463,10 +737,16 @@ export default class RollbridgeDaemon {
463
737
 
464
738
  /** @returns {DaemonStatus} Status payload. */
465
739
  status() {
740
+ // Re-check liveness and prune the dead permanently, so the list self-clears as the operator
741
+ // stops the leftovers (e.g. via `rollbridge recover`). Pruning (not just filtering) matters:
742
+ // a cleared orphan must not reappear if the OS later recycles its pid for an unrelated process.
743
+ this.orphans = this.orphans.filter((orphan) => isProcessAlive(orphan.pid))
744
+
466
745
  return {
467
746
  activeReleaseId: this.activeRelease ? this.activeRelease.releaseId : null,
468
747
  application: this.config.application,
469
748
  control: {...this.config.control},
749
+ orphans: [...this.orphans],
470
750
  proxy: {
471
751
  host: this.config.proxy.host,
472
752
  port: this.proxyPort ?? this.config.proxy.port,
package/src/doctor.js CHANGED
@@ -5,9 +5,12 @@ import fs from "node:fs/promises"
5
5
  import net from "node:net"
6
6
  import path from "node:path"
7
7
  import {inspectControlSocket} from "./daemon.js"
8
+ import {liveProcesses, readState} from "./state-store.js"
9
+ import {processTemplateContext, renderObject, renderTemplate} from "./template.js"
8
10
 
9
11
  /**
10
12
  * @typedef {{detail: string, name: string, ok: boolean}} DoctorCheck
13
+ * @typedef {{cwd: string, id: string, ok: true} | {error: string, id: string, ok: false}} ProcessRender
11
14
  */
12
15
 
13
16
  /**
@@ -25,9 +28,55 @@ export async function runEnvironmentChecks(config) {
25
28
  checks.push(await controlSocketDirectoryCheck(config))
26
29
  checks.push(await proxyPortCheck(config))
27
30
 
31
+ if (config.statePath !== undefined) {
32
+ // A live daemon persists its own (live) pids into the state file, so they are not orphans.
33
+ const daemonRunning = !("error" in socketInspection) && socketInspection.alive
34
+
35
+ checks.push(await statePathDirectoryCheck(config.statePath))
36
+ checks.push(await orphanCheck(config.statePath, daemonRunning))
37
+ }
38
+
28
39
  return checks
29
40
  }
30
41
 
42
+ /**
43
+ * @param {string} statePath - Configured state file path.
44
+ * @returns {Promise<DoctorCheck>} Whether the state file's directory is writable.
45
+ */
46
+ async function statePathDirectoryCheck(statePath) {
47
+ const directory = path.dirname(path.resolve(statePath))
48
+
49
+ try {
50
+ await fs.access(directory, fsConstants.W_OK | fsConstants.X_OK)
51
+
52
+ return {detail: `${directory} is writable`, name: "state path directory", ok: true}
53
+ } catch {
54
+ return {detail: `${directory} is missing or not writable; state cannot be persisted`, name: "state path directory", ok: false}
55
+ }
56
+ }
57
+
58
+ /**
59
+ * @param {string} statePath - Configured state file path.
60
+ * @param {boolean} daemonRunning - Whether a Rollbridge daemon is currently live on the control socket.
61
+ * @returns {Promise<DoctorCheck>} Whether any orphaned managed processes from a prior daemon are still alive.
62
+ */
63
+ async function orphanCheck(statePath, daemonRunning) {
64
+ if (daemonRunning) {
65
+ // The running daemon owns the pids in the state file; they are managed, not orphaned.
66
+ return {detail: "a daemon is running; its managed processes are not orphans", name: "orphaned processes", ok: true}
67
+ }
68
+
69
+ const orphans = liveProcesses(await readState(statePath))
70
+
71
+ if (orphans.length === 0) {
72
+ return {detail: "no leftover processes from a previous daemon", name: "orphaned processes", ok: true}
73
+ }
74
+
75
+ const summary = orphans.map((orphan) => `${orphan.id} (pid ${orphan.pid})`).join(", ")
76
+
77
+ return {detail: `${orphans.length} possible orphaned process${orphans.length === 1 ? "" : "es"} still running: ${summary} — verify and stop any leftovers`, name: "orphaned processes", ok: false}
78
+ }
79
+
31
80
  /**
32
81
  * @param {string} socketPath - Control socket path.
33
82
  * @returns {Promise<{alive: boolean, application?: string} | {error: string}>} Probe result, or the probe error.
@@ -112,3 +161,131 @@ async function canBindPort(host, port) {
112
161
  server.listen(port, host, () => server.close(() => resolve({ok: true})))
113
162
  })
114
163
  }
164
+
165
+ /**
166
+ * Runs deploy-time checks against a specific release: that the release directory exists, that
167
+ * every process's command/cwd/env templates resolve, and that each rendered working directory
168
+ * exists. These need the per-release values that only exist at deploy time, so the operator
169
+ * supplies them (the release path, and optionally an id/revision). Ports referenced by templates
170
+ * are rendered with the low end of each process's configured range as a representative value.
171
+ * @param {import("./config.js").RollbridgeConfig} config - Normalized config.
172
+ * @param {{releaseId?: string, releasePath: string, revision?: string}} release - Release to render against.
173
+ * @returns {Promise<DoctorCheck[]>} One check per probed aspect.
174
+ */
175
+ export async function runReleaseChecks(config, release) {
176
+ const releasePath = path.resolve(release.releasePath)
177
+ const releaseId = release.releaseId || release.revision || path.basename(releasePath)
178
+ const revision = release.revision || releaseId
179
+ const ports = representativePorts(config)
180
+ const renders = config.processes.map((processConfig) => renderProcess(processConfig, {application: config.application, ports, proxy: config.proxy, releaseId, releasePath, revision}))
181
+
182
+ return [await releasePathCheck(releasePath), templateCheck(renders), await workingDirectoryCheck(renders)]
183
+ }
184
+
185
+ /**
186
+ * @param {import("./config.js").RollbridgeConfig} config - Normalized config.
187
+ * @returns {Record<string, number>} The ports a deploy would allocate, using each range's low end.
188
+ */
189
+ function representativePorts(config) {
190
+ /** @type {Record<string, number>} */
191
+ const ports = {}
192
+
193
+ for (const processConfig of config.processes) {
194
+ if (processConfig.port) ports[processConfig.id] = processConfig.port.from
195
+ }
196
+
197
+ return ports
198
+ }
199
+
200
+ /**
201
+ * Renders a process's command, cwd, and env against a deploy-time context (replica index 0).
202
+ * @param {import("./config.js").ProcessConfig} processConfig - Process to render.
203
+ * @param {{application: string, ports: Record<string, number>, proxy: import("./config.js").ProxyConfig, releaseId: string, releasePath: string, revision: string}} shared - Shared render inputs.
204
+ * @returns {ProcessRender} The rendered cwd, or the first template error.
205
+ */
206
+ function renderProcess(processConfig, shared) {
207
+ const context = processTemplateContext({
208
+ application: shared.application,
209
+ ports: shared.ports,
210
+ processId: processConfig.id,
211
+ proxy: shared.proxy,
212
+ releaseId: shared.releaseId,
213
+ releasePath: shared.releasePath,
214
+ replicaCount: processConfig.replicas,
215
+ replicaIndex: 0,
216
+ revision: shared.revision
217
+ })
218
+
219
+ try {
220
+ const cwd = processConfig.cwd ? renderTemplate(processConfig.cwd, context) : shared.releasePath
221
+
222
+ renderTemplate(processConfig.command, context)
223
+ renderObject(processConfig.env, context)
224
+
225
+ return {cwd: path.resolve(shared.releasePath, cwd), id: processConfig.id, ok: true}
226
+ } catch (error) {
227
+ return {error: error instanceof Error ? error.message : String(error), id: processConfig.id, ok: false}
228
+ }
229
+ }
230
+
231
+ /**
232
+ * @param {string} releasePath - Resolved release directory.
233
+ * @returns {Promise<DoctorCheck>} Whether the release directory exists.
234
+ */
235
+ async function releasePathCheck(releasePath) {
236
+ if (await isDirectory(releasePath)) {
237
+ return {detail: `${releasePath} exists`, name: "release path", ok: true}
238
+ }
239
+
240
+ return {detail: `${releasePath} is missing or not a directory`, name: "release path", ok: false}
241
+ }
242
+
243
+ /**
244
+ * @param {ProcessRender[]} renders - Per-process render results.
245
+ * @returns {DoctorCheck} Whether every process's templates resolved against the release context.
246
+ */
247
+ function templateCheck(renders) {
248
+ const failures = renders.flatMap((render) => (render.ok ? [] : [`${render.id}: ${render.error}`]))
249
+
250
+ if (failures.length === 0) {
251
+ return {detail: `all ${renders.length} process command/cwd/env templates resolve`, name: "process templates", ok: true}
252
+ }
253
+
254
+ return {detail: `unresolved templates — ${failures.join("; ")}`, name: "process templates", ok: false}
255
+ }
256
+
257
+ /**
258
+ * @param {ProcessRender[]} renders - Per-process render results.
259
+ * @returns {Promise<DoctorCheck>} Whether each rendered working directory exists.
260
+ */
261
+ async function workingDirectoryCheck(renders) {
262
+ /** @type {string[]} */
263
+ const missing = []
264
+ let checked = 0
265
+
266
+ for (const render of renders) {
267
+ if (!render.ok) continue
268
+
269
+ checked++
270
+
271
+ if (!(await isDirectory(render.cwd))) missing.push(`${render.id} (${render.cwd})`)
272
+ }
273
+
274
+ if (missing.length === 0) {
275
+ return {detail: `all ${checked} process working ${checked === 1 ? "directory exists" : "directories exist"}`, name: "process working directories", ok: true}
276
+ }
277
+
278
+ return {detail: `missing working ${missing.length === 1 ? "directory" : "directories"}: ${missing.join(", ")}`, name: "process working directories", ok: false}
279
+ }
280
+
281
+ /**
282
+ * @param {string} target - Path to test.
283
+ * @returns {Promise<boolean>} True when the path exists and is a directory.
284
+ */
285
+ async function isDirectory(target) {
286
+ try {
287
+ return (await fs.stat(target)).isDirectory()
288
+ } catch {
289
+ return false
290
+ }
291
+ }
@@ -0,0 +1,47 @@
1
+ // @ts-check
2
+
3
+ /**
4
+ * @typedef {import("./json.js").JsonValue} JsonValue
5
+ * @typedef {{at: string, data: Record<string, JsonValue>, message: string}} DaemonEvent
6
+ */
7
+
8
+ /**
9
+ * A bounded, in-memory history of structured daemon events (deploys, traffic
10
+ * switches, stops, crashes, restarts, and failed commands). The newest events
11
+ * are kept; the oldest are dropped once the limit is exceeded.
12
+ */
13
+ export default class EventLog {
14
+ /**
15
+ * @param {number} limit - Maximum number of events to retain.
16
+ */
17
+ constructor(limit) {
18
+ this.limit = limit
19
+ this.events = /** @type {DaemonEvent[]} */ ([])
20
+ }
21
+
22
+ /**
23
+ * Appends an event, dropping the oldest events beyond the limit.
24
+ * @param {string} message - Event type/message.
25
+ * @param {Record<string, JsonValue>} data - Structured event payload.
26
+ * @returns {void}
27
+ */
28
+ record(message, data) {
29
+ this.events.push({at: new Date().toISOString(), data, message})
30
+
31
+ if (this.events.length > this.limit) {
32
+ this.events.splice(0, this.events.length - this.limit)
33
+ }
34
+ }
35
+
36
+ /**
37
+ * @param {number} [limit] - Maximum number of most-recent events to return; all when omitted or invalid.
38
+ * @returns {DaemonEvent[]} The most recent events, oldest first.
39
+ */
40
+ recent(limit) {
41
+ if (typeof limit !== "number" || !Number.isFinite(limit) || limit <= 0 || limit >= this.events.length) {
42
+ return [...this.events]
43
+ }
44
+
45
+ return this.events.slice(this.events.length - limit)
46
+ }
47
+ }