rollbridge 0.1.6 → 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -174,6 +174,20 @@ stop them before restarting the daemon. A clean `shutdown` removes the file. See
174
174
  statePath: "/var/lib/rollbridge/ticket-server.state.json"
175
175
  ```
176
176
 
177
+ During the first migration from an old supervisor, set `legacyTakeover` and run
178
+ `rollbridge predeploy-cleanup --release-path <path>` before `rollbridge deploy`.
179
+ Rollbridge will only stop configured legacy processes when no reusable active
180
+ Rollbridge release is running.
181
+
182
+ ```js
183
+ legacyTakeover: {
184
+ screens: ["ticket-server"],
185
+ processes: [
186
+ {name: "legacy web", includes: ["/home/dev/ticket-server/", "velocious server", "--port 8082"]}
187
+ ]
188
+ }
189
+ ```
190
+
177
191
  A function export receives no arguments and lets you build the config at load
178
192
  time:
179
193
 
@@ -466,6 +480,13 @@ Shut down the daemon and managed processes:
466
480
  rollbridge shutdown --config rollbridge.js
467
481
  ```
468
482
 
483
+ Prepare a first Rollbridge deploy by recovering Rollbridge-managed orphans and
484
+ stopping configured legacy processes:
485
+
486
+ ```bash
487
+ rollbridge predeploy-cleanup --config rollbridge.js --release-path /srv/app/current
488
+ ```
489
+
469
490
  Enable shell completion (bash or zsh) for command names and option flags:
470
491
 
471
492
  ```bash
package/TODO.md CHANGED
@@ -25,7 +25,7 @@ This roadmap tracks planned Rollbridge features and documentation. Rollbridge sh
25
25
  - [x] Report memory stats and last memory-triggered restart in `status`.
26
26
  - [x] Restart memory-heavy workers gracefully when possible, with a forced stop timeout.
27
27
  - [x] Add tests with a fixture process that allocates memory above the configured limit.
28
- - [ ] Worker auto-restart and restart policy controls.
28
+ - [x] Worker auto-restart and restart policy controls.
29
29
  - [x] Add config for max restarts, restart window, exponential backoff, and disabled restart behavior (per-process `restart` policy).
30
30
  - [x] Distinguish crash restarts, deploy replacements, manual restarts, and memory restarts in status/events. (Per-process `lastStartReason` + a `reason` on the `process started` event; the `memory` reason is wired and fires once memory supervision restarts a process.)
31
31
  - [x] Add a `restart` CLI command for a single process, a policy group, or all non-proxied workers.
@@ -50,7 +50,7 @@ This roadmap tracks planned Rollbridge features and documentation. Rollbridge sh
50
50
  - [x] Add a `rollback` CLI command that health-checks the target before switching.
51
51
  - [x] Define how rollback interacts with singleton workers and draining releases. (Reuses the deploy flow: replaces singletons and drains the current release.)
52
52
  - [x] Document migration constraints for rollback.
53
- - [ ] Observability and diagnostics.
53
+ - [x] Observability and diagnostics.
54
54
  - [x] Add structured event history for deploys, switches, stops, crashes, memory restarts, and failed commands. (In-memory `EventLog` tapping the daemon logger; memory-restart events populate once memory supervision logs them.)
55
55
  - [x] Add restart counters and uptime to status (exit reasons already reported via `exitCode`/`exitSignal`/`state`).
56
56
  - [x] Add memory stats and child-process-tree details to status (with memory supervision). (`rssBytes`/`memoryRestarts`/`lastMemoryRestartAt` plus `children`: the sampled process tree with each member's pid, command, and RSS.)
@@ -58,7 +58,7 @@ This roadmap tracks planned Rollbridge features and documentation. Rollbridge sh
58
58
  - [x] Add an `events` CLI command (after structured event history lands).
59
59
  - [x] Add optional file logging with rotation guidance (`docs/logging.md`; daemon log file via `--daemon-log-path`, logrotate `copytruncate`).
60
60
  - [x] Add machine-readable JSON output for all CLI commands (data commands print JSON; `validate`/`doctor`/`logs` take `--json`).
61
- - [ ] Config validation and doctoring.
61
+ - [x] Config validation and doctoring.
62
62
  - [x] Add `validate` to parse config and report all config errors without starting the daemon.
63
63
  - [x] Add `doctor` to check config validity, control socket reachability, proxy port availability, and control-socket directory writability.
64
64
  - [x] Extend `doctor` with state-path checks: state-path directory writability and orphaned-process reporting from a prior state file.
package/docs/cli.md CHANGED
@@ -164,6 +164,26 @@ managed process (unknown, or a companion with no active release) is also an
164
164
  error. Restarting a `service` bounces a shared broker (for example Velocious
165
165
  Beacon), which briefly disrupts every process that depends on it.
166
166
 
167
+ ## `predeploy-cleanup`
168
+
169
+ ```
170
+ rollbridge predeploy-cleanup [--config <path>] [--release-path <path>]
171
+ ```
172
+
173
+ Prepares a host for the first Rollbridge deploy. If a Rollbridge daemon already
174
+ has an active release, the command exits without stopping anything. Otherwise it
175
+ recovers Rollbridge-managed orphans from `statePath` and stops the legacy
176
+ processes configured in [`legacyTakeover`](config.md#legacytakeover), then exits
177
+ before `rollbridge deploy` starts the new daemon/proxy.
178
+
179
+ When `--release-path` is provided, the command also restarts the existing daemon
180
+ if the active release uses a different Rollbridge package version than the
181
+ pending release. It also restarts the daemon when the active daemon's proxy host,
182
+ port, or upstream host differs from the pending config.
183
+
184
+ Use it immediately before `rollbridge deploy --ensure-daemon` when migrating an
185
+ app from `screen`, `process_bot`, or another old supervisor to Rollbridge.
186
+
167
187
  ## `recover`
168
188
 
169
189
  ```
package/docs/config.md CHANGED
@@ -23,6 +23,7 @@ export default {
23
23
  | --- | --- | --- | --- |
24
24
  | `application` | string | basename of the config file's directory | Names the app; used in the default control-socket path and the `ROLLBRIDGE_APPLICATION` env var. |
25
25
  | `control` | object | — | Control-socket settings (see below). |
26
+ | `legacyTakeover` | object | unset | Optional matchers for `rollbridge predeploy-cleanup` to stop pre-Rollbridge supervisors during first handover (see below). |
26
27
  | `proxy` | object | **required** | Proxy listener and shared defaults (see below). |
27
28
  | `processes` | array | **required** | Managed processes (see below). Exactly one must be `proxied`. |
28
29
  | `releaseRetention` | object | — | How many stopped releases the daemon retains (see below). |
@@ -88,6 +89,38 @@ statePath: "/var/lib/rollbridge/ticket-server.state.json"
88
89
 
89
90
  Leave `statePath` unset to disable persistence (the default).
90
91
 
92
+ ## `legacyTakeover`
93
+
94
+ `legacyTakeover` lets deploy scripts run `rollbridge predeploy-cleanup` during
95
+ the first migration from an old supervisor. The command only uses these matchers
96
+ when no active Rollbridge release is running. If a Rollbridge daemon already has
97
+ an active release, it exits without stopping legacy processes.
98
+
99
+ | Field | Type | Default | Description |
100
+ | --- | --- | --- | --- |
101
+ | `legacyTakeover.screens` | array of strings | `[]` | GNU Screen session names to stop with `screen -S <name> -X quit`. |
102
+ | `legacyTakeover.processes` | array | `[]` | Process command-line matchers. Each entry must define `includes`, and may define `name`. |
103
+ | `legacyTakeover.forceStopTimeoutMs` | number | `proxy.forceStopTimeoutMs` | Grace period after `SIGTERM` before `SIGKILL` is sent to matched legacy processes. |
104
+
105
+ Each `legacyTakeover.processes[]` entry:
106
+
107
+ | Field | Type | Default | Description |
108
+ | --- | --- | --- | --- |
109
+ | `includes` | array of strings | **required** | Every string must appear in a process command line for it to be considered a legacy seed process. Descendants of seed processes are stopped too. |
110
+ | `name` | string | generated | Human-readable label for diagnostics. |
111
+
112
+ Example:
113
+
114
+ ```js
115
+ legacyTakeover: {
116
+ forceStopTimeoutMs: 10000,
117
+ screens: ["ticket-server"],
118
+ processes: [
119
+ {name: "legacy web", includes: ["/home/dev/ticket-server/", "velocious server", "--port 8082"]}
120
+ ]
121
+ }
122
+ ```
123
+
91
124
  ## `processes[]`
92
125
 
93
126
  | Field | Type | Default | Description |
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "rollbridge",
3
- "version": "0.1.6",
3
+ "version": "0.1.7",
4
4
  "description": "Zero-downtime process supervisor and local traffic switcher for deploy-managed apps.",
5
5
  "keywords": [
6
6
  "deploy",
package/src/cli.js CHANGED
@@ -8,6 +8,7 @@ import {Command} from "commander"
8
8
  import RollbridgeDaemon from "./daemon.js"
9
9
  import {loadConfig, parseConfigFile, resolveConfigPath, validateConfig} from "./config.js"
10
10
  import {runEnvironmentChecks, runReleaseChecks} from "./doctor.js"
11
+ import {predeployCleanup} from "./predeploy-cleanup.js"
11
12
  import {recoverOrphans} from "./recover.js"
12
13
  import {sendControlCommand} from "./control-client.js"
13
14
 
@@ -357,6 +358,19 @@ export async function runCli(argv) {
357
358
  console.log(formatEvents(events))
358
359
  })
359
360
 
361
+ program
362
+ .command("predeploy-cleanup")
363
+ .description("Prepare a host for deploy: recover Rollbridge orphans and stop configured legacy processes when no release is active.")
364
+ .option("-c, --config <path>", "Config file path (defaults to rollbridge.js)")
365
+ .option("--release-path <path>", "Pending release path; restarts the daemon if this release changes Rollbridge itself")
366
+ .action(async (options) => {
367
+ const configPath = await resolveConfigPath(options.config)
368
+ const config = await loadConfig(configPath)
369
+ const result = await predeployCleanup({config, releasePath: options.releasePath})
370
+
371
+ console.log(formatPredeployCleanupResult(result))
372
+ })
373
+
360
374
  program
361
375
  .command("recover")
362
376
  .description("Stop orphaned processes left by a crashed daemon (reads statePath; lists them unless --force).")
@@ -399,6 +413,29 @@ export async function runCli(argv) {
399
413
  await program.parseAsync(argv)
400
414
  }
401
415
 
416
+ /**
417
+ * @param {import("./predeploy-cleanup.js").PredeployCleanupResult} result - Cleanup result.
418
+ * @returns {string} Human-readable summary.
419
+ */
420
+ function formatPredeployCleanupResult(result) {
421
+ if (result.action === "daemon-active") {
422
+ return "Rollbridge daemon already has an active release; no legacy cleanup needed."
423
+ }
424
+
425
+ const lines = []
426
+
427
+ if (result.action === "daemon-stopped") {
428
+ lines.push("Stopped existing Rollbridge daemon before deploy.")
429
+ } else {
430
+ lines.push("No active Rollbridge daemon found.")
431
+ }
432
+
433
+ lines.push(`Recovered ${result.recoveredOrphans} Rollbridge orphaned process${result.recoveredOrphans === 1 ? "" : "es"}.`)
434
+ lines.push(`Stopped ${result.legacyProcesses.length} legacy process${result.legacyProcesses.length === 1 ? "" : "es"}.`)
435
+
436
+ return lines.join("\n")
437
+ }
438
+
402
439
  /**
403
440
  * @typedef {import("./recover.js").OrphanProcess} OrphanProcess
404
441
  */
package/src/config.js CHANGED
@@ -16,8 +16,10 @@ import {pathToFileURL} from "node:url"
16
16
  * @typedef {{cwd?: string, env: Record<string, string>, gracefulStopMs: number, health?: HealthConfig, id: string, lifecycle: LifecycleConfig, memory?: MemoryConfig, nonBlockingDrain: boolean, outputLines: number, policy: ProcessPolicy, port?: PortRange, replicas: number, restart: RestartConfig, restartDelayMs: number, stopSignal: string, command: string}} ProcessConfig
17
17
  * @typedef {{group?: number | string, mode?: number, owner?: number | string, path: string}} ControlConfig
18
18
  * @typedef {{drainTimeoutMs: number, forceStopTimeoutMs: number, healthPath: string, healthTimeoutMs: number, host: string, port: number, upstreamHost: string}} ProxyConfig
19
+ * @typedef {{includes: string[], name: string}} LegacyTakeoverProcessConfig
20
+ * @typedef {{forceStopTimeoutMs: number, processes: LegacyTakeoverProcessConfig[], screens: string[]}} LegacyTakeoverConfig
19
21
  * @typedef {{keep: number, maxAgeMs: number}} ReleaseRetentionConfig
20
- * @typedef {{application: string, control: ControlConfig, processes: ProcessConfig[], proxy: ProxyConfig, releaseRetention: ReleaseRetentionConfig, statePath?: string}} RollbridgeConfig
22
+ * @typedef {{application: string, control: ControlConfig, legacyTakeover?: LegacyTakeoverConfig, processes: ProcessConfig[], proxy: ProxyConfig, releaseRetention: ReleaseRetentionConfig, statePath?: string}} RollbridgeConfig
21
23
  * @typedef {{fix: string, message: string}} ConfigIssue
22
24
  */
23
25
 
@@ -134,12 +136,13 @@ export function validateConfig(rawConfig, configPath = process.cwd()) {
134
136
  path: normalizeString(controlSource.path, "control.path", issues, {default: `/tmp/rollbridge-${application}.sock`})
135
137
  }
136
138
  const processes = processesSource.map((processSource, index) => normalizeProcess(processSource, index, proxy, issues))
139
+ const legacyTakeover = normalizeLegacyTakeover(source.legacyTakeover, proxy, issues)
137
140
  const releaseRetention = normalizeReleaseRetention(objectAt(source.releaseRetention, "releaseRetention", issues, {}), issues)
138
141
  const statePath = source.statePath === undefined || source.statePath === null ? undefined : normalizeString(source.statePath, "statePath", issues)
139
142
 
140
143
  validateProcessSet(processes, issues)
141
144
 
142
- return {config: {application, control, processes, proxy, releaseRetention, statePath}, issues}
145
+ return {config: {application, control, legacyTakeover, processes, proxy, releaseRetention, statePath}, issues}
143
146
  }
144
147
 
145
148
  /**
@@ -340,6 +343,102 @@ function normalizeLifecycle(value, key, issues) {
340
343
  return lifecycle
341
344
  }
342
345
 
346
+ /**
347
+ * @param {JsonValue} value - Raw legacy takeover config.
348
+ * @param {ProxyConfig} proxy - Proxy config defaults.
349
+ * @param {ConfigIssue[]} issues - Issue collector.
350
+ * @returns {LegacyTakeoverConfig | undefined} Normalized legacy takeover config, or undefined when omitted.
351
+ */
352
+ function normalizeLegacyTakeover(value, proxy, issues) {
353
+ if (value === undefined || value === null) return undefined
354
+
355
+ if (!isPlainObject(value)) {
356
+ issues.push({fix: "Set legacyTakeover to a mapping with screens and/or processes.", message: "legacyTakeover must be an object"})
357
+
358
+ return {forceStopTimeoutMs: proxy.forceStopTimeoutMs, processes: [], screens: []}
359
+ }
360
+
361
+ const forceStopTimeoutMs = normalizeNumber(value.forceStopTimeoutMs, "legacyTakeover.forceStopTimeoutMs", issues, {default: proxy.forceStopTimeoutMs})
362
+ const screens = normalizeStringList(value.screens, "legacyTakeover.screens", issues)
363
+ const processes = normalizeLegacyTakeoverProcesses(value.processes, issues)
364
+
365
+ if (screens.length === 0 && processes.length === 0) {
366
+ issues.push({fix: "Set legacyTakeover.screens or legacyTakeover.processes so predeploy-cleanup knows what legacy processes it may stop.", message: "legacyTakeover must define at least one screen or process matcher"})
367
+ }
368
+
369
+ return {
370
+ forceStopTimeoutMs: nonNegativeOrDefault(forceStopTimeoutMs, "legacyTakeover.forceStopTimeoutMs", issues, proxy.forceStopTimeoutMs, false),
371
+ processes,
372
+ screens
373
+ }
374
+ }
375
+
376
+ /**
377
+ * @param {JsonValue} value - Raw list value.
378
+ * @param {string} key - Config key.
379
+ * @param {ConfigIssue[]} issues - Issue collector.
380
+ * @returns {string[]} Normalized strings.
381
+ */
382
+ function normalizeStringList(value, key, issues) {
383
+ if (value === undefined || value === null) return []
384
+
385
+ if (!Array.isArray(value)) {
386
+ issues.push({fix: `Set ${key} to a list of strings.`, message: `${key} must be an array`})
387
+
388
+ return []
389
+ }
390
+
391
+ return value.flatMap((entry, index) => {
392
+ if (typeof entry === "string" && entry.length > 0) return [entry]
393
+
394
+ issues.push({fix: `Set ${key}[${index}] to a non-empty string.`, message: `${key}[${index}] must be a non-empty string`})
395
+ return []
396
+ })
397
+ }
398
+
399
+ /**
400
+ * @param {JsonValue} value - Raw legacy process matchers.
401
+ * @param {ConfigIssue[]} issues - Issue collector.
402
+ * @returns {LegacyTakeoverProcessConfig[]} Normalized process matchers.
403
+ */
404
+ function normalizeLegacyTakeoverProcesses(value, issues) {
405
+ if (value === undefined || value === null) return []
406
+
407
+ if (!Array.isArray(value)) {
408
+ issues.push({fix: "Set legacyTakeover.processes to a list of mappings with includes strings.", message: "legacyTakeover.processes must be an array"})
409
+
410
+ return []
411
+ }
412
+
413
+ return value.flatMap((entry, index) => normalizeLegacyTakeoverProcess(entry, index, issues))
414
+ }
415
+
416
+ /**
417
+ * @param {JsonValue} value - Raw legacy process matcher.
418
+ * @param {number} index - Matcher index.
419
+ * @param {ConfigIssue[]} issues - Issue collector.
420
+ * @returns {LegacyTakeoverProcessConfig[]} Normalized matcher, or an empty list when invalid.
421
+ */
422
+ function normalizeLegacyTakeoverProcess(value, index, issues) {
423
+ const key = `legacyTakeover.processes[${index}]`
424
+
425
+ if (!isPlainObject(value)) {
426
+ issues.push({fix: `Set ${key} to a mapping with includes strings.`, message: `${key} must be an object`})
427
+
428
+ return []
429
+ }
430
+
431
+ const includes = normalizeStringList(value.includes, `${key}.includes`, issues)
432
+ if (includes.length === 0) {
433
+ issues.push({fix: `Set ${key}.includes to one or more command-line substrings that identify the legacy process.`, message: `${key}.includes must contain at least one matcher`})
434
+ }
435
+
436
+ return [{
437
+ includes,
438
+ name: normalizeString(value.name, `${key}.name`, issues, {default: `legacy process ${index + 1}`})
439
+ }]
440
+ }
441
+
343
442
  /**
344
443
  * @param {JsonValue} value - Raw stop signal.
345
444
  * @param {string} key - Config key.
@@ -0,0 +1,340 @@
1
+ // @ts-check
2
+
3
+ import fs from "node:fs"
4
+ import path from "node:path"
5
+ import {spawnSync} from "node:child_process"
6
+ import {setTimeout as sleep} from "node:timers/promises"
7
+ import {inspectControlSocket} from "./daemon.js"
8
+ import {recoverOrphans} from "./recover.js"
9
+ import {sendControlCommand} from "./control-client.js"
10
+
11
+ /**
12
+ * @typedef {import("./json.js").JsonValue} JsonValue
13
+ * @typedef {{pid: number, parentPid: number, args: string}} ProcessRow
14
+ * @typedef {{action: string, legacyProcesses: ProcessRow[], recoveredOrphans: number}} PredeployCleanupResult
15
+ */
16
+
17
+ /**
18
+ * Prepares a host for a Rollbridge deploy by handling the two cases that can block a fresh
19
+ * daemon startup: orphaned Rollbridge-managed pids from a crashed daemon, and explicitly
20
+ * configured legacy processes from the pre-Rollbridge supervisor.
21
+ * @param {object} args - Options.
22
+ * @param {import("./config.js").RollbridgeConfig} args.config - Rollbridge config.
23
+ * @param {(socketPath: string) => Promise<import("./daemon.js").ControlSocketInspection>} [args.inspectSocket] - Control socket probe.
24
+ * @param {string} [args.releasePath] - Pending release path, used to restart the daemon when this release changes Rollbridge itself.
25
+ * @param {(args: {command: Record<string, JsonValue>, path: string}) => Promise<Record<string, JsonValue>>} [args.sendCommand] - Control command sender.
26
+ * @param {(command: string, args: string[]) => import("node:child_process").SpawnSyncReturns<Buffer>} [args.runCommand] - Command runner.
27
+ * @param {(pid: number, signal: string) => void} [args.killProcess] - Signal sender.
28
+ * @param {(args: {config: import("./config.js").RollbridgeConfig, force: boolean}) => Promise<import("./recover.js").RecoverResult>} [args.recover] - Orphan recovery function.
29
+ * @returns {Promise<PredeployCleanupResult>} Cleanup result.
30
+ */
31
+ export async function predeployCleanup({
32
+ config,
33
+ inspectSocket = inspectControlSocket,
34
+ killProcess = process.kill,
35
+ releasePath,
36
+ recover = recoverOrphans,
37
+ runCommand = spawnSync,
38
+ sendCommand = sendControlCommand
39
+ }) {
40
+ const inspection = await inspectSocket(config.control.path)
41
+
42
+ if (inspection.alive) {
43
+ const status = await activeDaemonStatus({config, inspection, sendCommand})
44
+
45
+ if (status.activeReleaseId && daemonMatchesPendingRelease({config, releasePath, status})) {
46
+ return {action: "daemon-active", legacyProcesses: [], recoveredOrphans: 0}
47
+ }
48
+
49
+ await sendCommand({command: {command: "shutdown"}, path: config.control.path})
50
+ await waitForControlSocketShutdown({config, inspectSocket})
51
+ }
52
+
53
+ const recoveredOrphans = await recoverConfiguredOrphans(config, recover)
54
+ const legacyProcesses = await stopLegacyProcesses({config, killProcess, runCommand})
55
+
56
+ return {
57
+ action: inspection.alive ? "daemon-stopped" : "no-daemon-cleaned",
58
+ legacyProcesses,
59
+ recoveredOrphans
60
+ }
61
+ }
62
+
63
+ /**
64
+ * @param {object} args - Options.
65
+ * @param {import("./config.js").RollbridgeConfig} args.config - Rollbridge config.
66
+ * @param {import("./daemon.js").ControlSocketInspection} args.inspection - Socket inspection.
67
+ * @param {(args: {command: Record<string, JsonValue>, path: string}) => Promise<Record<string, JsonValue>>} args.sendCommand - Control command sender.
68
+ * @returns {Promise<import("./daemon.js").DaemonStatus>} Daemon status.
69
+ */
70
+ async function activeDaemonStatus({config, inspection, sendCommand}) {
71
+ if (inspection.application === undefined) {
72
+ throw new Error(`A non-Rollbridge process is using ${config.control.path}; refusing predeploy cleanup.`)
73
+ }
74
+
75
+ if (inspection.application !== config.application) {
76
+ throw new Error(`A Rollbridge daemon for "${inspection.application}" is using ${config.control.path}; expected "${config.application}".`)
77
+ }
78
+
79
+ const status = await sendCommand({command: {command: "status"}, path: config.control.path})
80
+
81
+ return /** @type {import("./daemon.js").DaemonStatus} */ (status)
82
+ }
83
+
84
+ /**
85
+ * @param {object} args - Options.
86
+ * @param {import("./config.js").RollbridgeConfig} args.config - Rollbridge config.
87
+ * @param {string} [args.releasePath] - Pending release path.
88
+ * @param {import("./daemon.js").DaemonStatus} args.status - Active daemon status.
89
+ * @returns {boolean} True when the active daemon can be reused for this deploy.
90
+ */
91
+ function daemonMatchesPendingRelease({config, releasePath, status}) {
92
+ if (!proxyMatchesConfig(status.proxy, config.proxy)) return false
93
+ if (releasePath !== undefined && rollbridgePackageChanged({releasePath, status})) return false
94
+
95
+ return true
96
+ }
97
+
98
+ /**
99
+ * @param {import("./daemon.js").DaemonStatus["proxy"]} currentProxy - Current proxy status.
100
+ * @param {import("./config.js").ProxyConfig} expectedProxy - Pending release proxy config.
101
+ * @returns {boolean} True when the current daemon proxy matches the pending config.
102
+ */
103
+ function proxyMatchesConfig(currentProxy, expectedProxy) {
104
+ return currentProxy.host === expectedProxy.host &&
105
+ currentProxy.port === expectedProxy.port &&
106
+ currentProxy.upstreamHost === expectedProxy.upstreamHost
107
+ }
108
+
109
+ /**
110
+ * @param {object} args - Options.
111
+ * @param {string} args.releasePath - Pending release path.
112
+ * @param {import("./daemon.js").DaemonStatus} args.status - Active daemon status.
113
+ * @returns {boolean} True when the pending release uses a different Rollbridge package version.
114
+ */
115
+ function rollbridgePackageChanged({releasePath, status}) {
116
+ const activeRelease = status.releases.find((release) => release.releaseId === status.activeReleaseId)
117
+ if (activeRelease === undefined) return false
118
+
119
+ const releaseVersion = rollbridgePackageVersion(releasePath)
120
+ const activeVersion = rollbridgePackageVersion(activeRelease.releasePath)
121
+
122
+ return releaseVersion !== undefined &&
123
+ activeVersion !== undefined &&
124
+ releaseVersion !== activeVersion
125
+ }
126
+
127
+ /**
128
+ * @param {string} releasePath - Release path containing node_modules.
129
+ * @returns {string | undefined} Installed Rollbridge package version.
130
+ */
131
+ function rollbridgePackageVersion(releasePath) {
132
+ try {
133
+ const packageJson = JSON.parse(fs.readFileSync(path.join(releasePath, "node_modules", "rollbridge", "package.json"), "utf8"))
134
+
135
+ if (packageJson && typeof packageJson === "object" && "version" in packageJson && typeof packageJson.version === "string") {
136
+ return packageJson.version
137
+ }
138
+ } catch (error) {
139
+ if (error && typeof error === "object" && "code" in error && error.code === "ENOENT") return undefined
140
+
141
+ throw error
142
+ }
143
+
144
+ return undefined
145
+ }
146
+
147
+ /**
148
+ * @param {object} args - Options.
149
+ * @param {import("./config.js").RollbridgeConfig} args.config - Rollbridge config.
150
+ * @param {(socketPath: string) => Promise<import("./daemon.js").ControlSocketInspection>} args.inspectSocket - Control socket probe.
151
+ * @returns {Promise<void>} Resolves after the socket stops accepting Rollbridge commands.
152
+ */
153
+ async function waitForControlSocketShutdown({config, inspectSocket}) {
154
+ const deadline = Date.now() + 30000
155
+
156
+ while (true) {
157
+ const inspection = await inspectSocket(config.control.path)
158
+ if (!inspection.alive) return
159
+
160
+ if (Date.now() >= deadline) {
161
+ throw new Error(`Timed out waiting for Rollbridge daemon at ${config.control.path} to shut down.`)
162
+ }
163
+
164
+ await sleep(250)
165
+ }
166
+ }
167
+
168
+ /**
169
+ * @param {import("./config.js").RollbridgeConfig} config - Rollbridge config.
170
+ * @param {(args: {config: import("./config.js").RollbridgeConfig, force: boolean}) => Promise<import("./recover.js").RecoverResult>} recover - Orphan recovery function.
171
+ * @returns {Promise<number>} Number of orphans found.
172
+ */
173
+ async function recoverConfiguredOrphans(config, recover) {
174
+ if (config.statePath === undefined) return 0
175
+
176
+ const result = await recover({config, force: true})
177
+
178
+ if ("error" in result) {
179
+ throw new Error(result.error)
180
+ }
181
+
182
+ if (result.remaining.length > 0) {
183
+ throw new Error(`Could not stop ${result.remaining.length} Rollbridge orphaned process${result.remaining.length === 1 ? "" : "es"}.`)
184
+ }
185
+
186
+ return result.orphans.length
187
+ }
188
+
189
+ /**
190
+ * @param {object} args - Options.
191
+ * @param {import("./config.js").RollbridgeConfig} args.config - Rollbridge config.
192
+ * @param {(command: string, args: string[]) => import("node:child_process").SpawnSyncReturns<Buffer>} args.runCommand - Command runner.
193
+ * @param {(pid: number, signal: string) => void} args.killProcess - Signal sender.
194
+ * @returns {Promise<ProcessRow[]>} Stopped legacy processes.
195
+ */
196
+ async function stopLegacyProcesses({config, killProcess, runCommand}) {
197
+ const takeoverConfig = config.legacyTakeover
198
+ if (takeoverConfig === undefined) return []
199
+
200
+ for (const screenName of takeoverConfig.screens) {
201
+ runCommand("screen", ["-S", screenName, "-X", "quit"])
202
+ }
203
+
204
+ const stoppedProcesses = await stopProcessTree({
205
+ killProcess,
206
+ processRows: legacyProcesses(config),
207
+ timeoutMs: takeoverConfig.forceStopTimeoutMs
208
+ })
209
+ const remainingProcesses = legacyProcesses(config)
210
+
211
+ if (remainingProcesses.length > 0) {
212
+ const details = remainingProcesses.map((row) => `${row.pid} ${row.args}`).join("\n")
213
+
214
+ throw new Error(`Refusing Rollbridge deploy while legacy processes are still running:\n${details}`)
215
+ }
216
+
217
+ return stoppedProcesses
218
+ }
219
+
220
+ /**
221
+ * @param {import("./config.js").RollbridgeConfig} config - Rollbridge config.
222
+ * @returns {ProcessRow[]} Legacy process rows and their descendants.
223
+ */
224
+ function legacyProcesses(config) {
225
+ const rows = processRows()
226
+ const protectedPids = protectedProcessIds(rows)
227
+ const legacyPids = new Set(rows.filter((row) => legacySeedProcess(row, config, protectedPids)).map((row) => row.pid))
228
+ let changed = true
229
+
230
+ while (changed) {
231
+ changed = false
232
+
233
+ for (const row of rows) {
234
+ if (!legacyPids.has(row.pid) && legacyPids.has(row.parentPid)) {
235
+ legacyPids.add(row.pid)
236
+ changed = true
237
+ }
238
+ }
239
+ }
240
+
241
+ return rows.filter((row) => legacyPids.has(row.pid))
242
+ }
243
+
244
+ /**
245
+ * @param {ProcessRow} row - Process row.
246
+ * @param {import("./config.js").RollbridgeConfig} config - Rollbridge config.
247
+ * @param {Set<number>} protectedPids - Current cleanup process and ancestors.
248
+ * @returns {boolean} True when the row identifies a configured legacy process.
249
+ */
250
+ function legacySeedProcess(row, config, protectedPids) {
251
+ const takeoverConfig = config.legacyTakeover
252
+ if (takeoverConfig === undefined || protectedPids.has(row.pid)) return false
253
+
254
+ if (takeoverConfig.screens.some((screenName) => row.args.includes(`SCREEN -dmS ${screenName}`))) {
255
+ return true
256
+ }
257
+
258
+ return takeoverConfig.processes.some((processConfig) => (
259
+ processConfig.includes.every((matcher) => row.args.includes(matcher))
260
+ ))
261
+ }
262
+
263
+ /**
264
+ * @param {ProcessRow[]} rows - Current process table rows.
265
+ * @returns {Set<number>} Pids that belong to the running cleanup command.
266
+ */
267
+ function protectedProcessIds(rows) {
268
+ const byPid = new Map(rows.map((row) => [row.pid, row]))
269
+ const protectedPids = new Set([process.pid])
270
+ let parentPid = process.ppid
271
+
272
+ while (parentPid > 0 && !protectedPids.has(parentPid)) {
273
+ protectedPids.add(parentPid)
274
+ parentPid = byPid.get(parentPid)?.parentPid || 0
275
+ }
276
+
277
+ return protectedPids
278
+ }
279
+
280
+ /** @returns {ProcessRow[]} Current process table rows. */
281
+ function processRows() {
282
+ const result = spawnSync("ps", ["-eo", "pid=,ppid=,args="], {encoding: "utf8"})
283
+
284
+ if (result.error) throw result.error
285
+ if (result.status !== 0) throw new Error(`Failed to inspect running processes: ${result.stderr}`)
286
+
287
+ return result.stdout.split("\n").flatMap((line) => {
288
+ const match = line.match(/^\s*(\d+)\s+(\d+)\s+(.+)$/)
289
+ if (!match) return []
290
+
291
+ const pid = Number(match[1])
292
+ const parentPid = Number(match[2])
293
+
294
+ return [{args: match[3], parentPid, pid}]
295
+ })
296
+ }
297
+
298
+ /**
299
+ * @param {object} args - Options.
300
+ * @param {(pid: number, signal: string) => void} args.killProcess - Signal sender.
301
+ * @param {ProcessRow[]} args.processRows - Processes to stop.
302
+ * @param {number} args.timeoutMs - Grace period before SIGKILL.
303
+ * @returns {Promise<ProcessRow[]>} Processes that were signaled.
304
+ */
305
+ async function stopProcessTree({killProcess, processRows, timeoutMs}) {
306
+ /** @type {ProcessRow[]} */
307
+ const stoppedProcesses = []
308
+
309
+ for (const row of processRows) {
310
+ if (sendSignal(row.pid, "SIGTERM", killProcess)) stoppedProcesses.push(row)
311
+ }
312
+
313
+ if (stoppedProcesses.length === 0) return []
314
+
315
+ await sleep(timeoutMs)
316
+
317
+ for (const row of processRows) {
318
+ sendSignal(row.pid, "SIGKILL", killProcess)
319
+ }
320
+
321
+ return stoppedProcesses
322
+ }
323
+
324
+ /**
325
+ * @param {number} pid - Process id.
326
+ * @param {string} signal - Signal name.
327
+ * @param {(pid: number, signal: string) => void} killProcess - Signal sender.
328
+ * @returns {boolean} True when the signal was sent, false when the process was already gone.
329
+ */
330
+ function sendSignal(pid, signal, killProcess) {
331
+ try {
332
+ killProcess(pid, signal)
333
+
334
+ return true
335
+ } catch (error) {
336
+ if (error && typeof error === "object" && "code" in error && error.code === "ESRCH") return false
337
+
338
+ throw error
339
+ }
340
+ }
@@ -41,7 +41,7 @@ test("completion bash prints a sourceable script with commands and option flags"
41
41
 
42
42
  assert.notEqual(code, 1)
43
43
  assert.match(output, /complete -F _rollbridge rollbridge/)
44
- assert.match(output, /compgen -W "daemon deploy rollback ensure-daemon status stop restart shutdown validate doctor logs events recover completion"/)
44
+ assert.match(output, /compgen -W "daemon deploy rollback ensure-daemon status stop restart shutdown validate doctor logs events predeploy-cleanup recover completion"/)
45
45
  // A command's own options are completed after the command.
46
46
  assert.match(output, /deploy\)\n\s+opts="[^"]*--release-path[^"]*"/)
47
47
  assert.match(output, /restart\)\n\s+opts="[^"]*--policy[^"]*"/)
@@ -52,7 +52,7 @@ test("completion zsh prints a #compdef script with per-command options", async (
52
52
 
53
53
  assert.match(output, /^#compdef rollbridge/)
54
54
  assert.match(output, /compdef _rollbridge rollbridge/)
55
- assert.match(output, /commands=\(daemon deploy rollback ensure-daemon status stop restart shutdown validate doctor logs events recover completion\)/)
55
+ assert.match(output, /commands=\(daemon deploy rollback ensure-daemon status stop restart shutdown validate doctor logs events predeploy-cleanup recover completion\)/)
56
56
  assert.match(output, /events\) compadd -- [^\n]*--limit/)
57
57
  })
58
58
 
@@ -70,6 +70,47 @@ test("validateConfig defaults wildcard proxy upstreams to loopback", () => {
70
70
  assert.equal(config.proxy.upstreamHost, "127.0.0.1")
71
71
  })
72
72
 
73
+ test("validateConfig accepts legacy takeover screens and process matchers", () => {
74
+ const {config, issues} = validateConfig({
75
+ application: "demo",
76
+ control: {path: "/tmp/demo.sock"},
77
+ legacyTakeover: {
78
+ forceStopTimeoutMs: 250,
79
+ processes: [
80
+ {includes: ["/srv/demo/", "velocious server", "--port 4500"], name: "legacy web"}
81
+ ],
82
+ screens: ["demo-backend"]
83
+ },
84
+ processes: [
85
+ {command: "run web", id: "web", policy: "proxied", port: {from: 18000, to: 18099}}
86
+ ],
87
+ proxy: {host: "127.0.0.1", port: 8182}
88
+ })
89
+
90
+ assert.deepEqual(issues, [])
91
+ assert.deepEqual(config.legacyTakeover, {
92
+ forceStopTimeoutMs: 250,
93
+ processes: [
94
+ {includes: ["/srv/demo/", "velocious server", "--port 4500"], name: "legacy web"}
95
+ ],
96
+ screens: ["demo-backend"]
97
+ })
98
+ })
99
+
100
+ test("validateConfig rejects empty legacy takeover config", () => {
101
+ const {issues} = validateConfig({
102
+ application: "demo",
103
+ control: {path: "/tmp/demo.sock"},
104
+ legacyTakeover: {},
105
+ processes: [
106
+ {command: "run web", id: "web", policy: "proxied", port: {from: 18000, to: 18099}}
107
+ ],
108
+ proxy: {host: "127.0.0.1", port: 8182}
109
+ })
110
+
111
+ assert.ok(issues.some((issue) => issue.message === "legacyTakeover must define at least one screen or process matcher"), JSON.stringify(issues))
112
+ })
113
+
73
114
  test("validateConfig defaults outputLines and accepts a positive override", () => {
74
115
  const {config, issues} = validateConfig({
75
116
  application: "demo",
@@ -0,0 +1,131 @@
1
+ // @ts-check
2
+
3
+ import assert from "node:assert/strict"
4
+ import {spawn} from "node:child_process"
5
+ import {once} from "node:events"
6
+ import fs from "node:fs/promises"
7
+ import os from "node:os"
8
+ import path from "node:path"
9
+ import test from "node:test"
10
+ import {normalizeConfig} from "../src/config.js"
11
+ import {isProcessAlive} from "../src/state-store.js"
12
+ import {predeployCleanup} from "../src/predeploy-cleanup.js"
13
+
14
+ /**
15
+ * @param {string} dir - Working directory.
16
+ * @param {string} marker - Unique process marker.
17
+ * @returns {import("../src/config.js").RollbridgeConfig} Test config.
18
+ */
19
+ function buildConfig(dir, marker) {
20
+ return normalizeConfig({
21
+ application: "predeploy-cleanup-test",
22
+ control: {path: path.join(dir, "rollbridge.sock")},
23
+ legacyTakeover: {
24
+ forceStopTimeoutMs: 50,
25
+ processes: [{includes: [marker], name: "legacy marker process"}]
26
+ },
27
+ processes: [{command: "true", id: "web", policy: "proxied", port: {from: 0, to: 0}}],
28
+ proxy: {host: "127.0.0.1", port: 0}
29
+ })
30
+ }
31
+
32
+ test("predeploy cleanup stops configured legacy process when no daemon is active", async () => {
33
+ const dir = await fs.mkdtemp(path.join(os.tmpdir(), "rollbridge-predeploy-cleanup-"))
34
+ const marker = `rollbridge-app-legacy-marker-${process.pid}-${Date.now()}`
35
+ const legacy = spawn(process.execPath, ["-e", "setInterval(() => {}, 1000)", marker], {stdio: "ignore"})
36
+
37
+ await once(legacy, "spawn")
38
+
39
+ try {
40
+ const result = await predeployCleanup({config: buildConfig(dir, marker)})
41
+
42
+ assert.equal(result.action, "no-daemon-cleaned")
43
+ assert.equal(result.recoveredOrphans, 0)
44
+ assert.equal(result.legacyProcesses.length, 1)
45
+ assert.equal(result.legacyProcesses[0].pid, legacy.pid)
46
+ assert.ok(legacy.pid === undefined || !isProcessAlive(legacy.pid))
47
+ } finally {
48
+ legacy.kill("SIGKILL")
49
+ await fs.rm(dir, {force: true, recursive: true})
50
+ }
51
+ })
52
+
53
+ test("predeploy cleanup leaves legacy processes alone when daemon already has an active release", async () => {
54
+ const dir = await fs.mkdtemp(path.join(os.tmpdir(), "rollbridge-predeploy-cleanup-"))
55
+
56
+ try {
57
+ const result = await predeployCleanup({
58
+ config: buildConfig(dir, "unused-marker"),
59
+ inspectSocket: async () => ({
60
+ activeReleaseId: "v1",
61
+ alive: true,
62
+ application: "predeploy-cleanup-test"
63
+ }),
64
+ sendCommand: async () => ({
65
+ activeReleaseId: "v1",
66
+ application: "predeploy-cleanup-test",
67
+ control: {path: path.join(dir, "rollbridge.sock")},
68
+ orphans: [],
69
+ proxy: {host: "127.0.0.1", port: 0, upstreamHost: "127.0.0.1"},
70
+ releases: [],
71
+ services: [],
72
+ singletons: [],
73
+ status: "success"
74
+ })
75
+ })
76
+
77
+ assert.deepEqual(result, {
78
+ action: "daemon-active",
79
+ legacyProcesses: [],
80
+ recoveredOrphans: 0
81
+ })
82
+ } finally {
83
+ await fs.rm(dir, {force: true, recursive: true})
84
+ }
85
+ })
86
+
87
+ test("predeploy cleanup stops an active daemon when the proxy config changed", async () => {
88
+ const dir = await fs.mkdtemp(path.join(os.tmpdir(), "rollbridge-predeploy-cleanup-"))
89
+ /** @type {Record<string, import("../src/json.js").JsonValue>[]} */
90
+ const commands = []
91
+ let inspectCount = 0
92
+
93
+ try {
94
+ const result = await predeployCleanup({
95
+ config: buildConfig(dir, "unused-marker"),
96
+ inspectSocket: async () => {
97
+ inspectCount += 1
98
+
99
+ return {
100
+ activeReleaseId: inspectCount === 1 ? "v1" : undefined,
101
+ alive: inspectCount === 1,
102
+ application: inspectCount === 1 ? "predeploy-cleanup-test" : undefined
103
+ }
104
+ },
105
+ sendCommand: async ({command}) => {
106
+ commands.push(command)
107
+
108
+ if (command.command === "status") {
109
+ return {
110
+ activeReleaseId: "v1",
111
+ application: "predeploy-cleanup-test",
112
+ control: {path: path.join(dir, "rollbridge.sock")},
113
+ orphans: [],
114
+ proxy: {host: "127.0.0.1", port: 9999, upstreamHost: "127.0.0.1"},
115
+ releases: [],
116
+ services: [],
117
+ singletons: [],
118
+ status: "success"
119
+ }
120
+ }
121
+
122
+ return {status: "success"}
123
+ }
124
+ })
125
+
126
+ assert.equal(result.action, "daemon-stopped")
127
+ assert.deepEqual(commands.map((command) => command.command), ["status", "shutdown"])
128
+ } finally {
129
+ await fs.rm(dir, {force: true, recursive: true})
130
+ }
131
+ })