rollbridge 0.1.2 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +172 -5
- package/TODO.md +16 -13
- package/docs/cli.md +151 -0
- package/docs/config.md +128 -0
- package/docs/deploy-recipes.md +102 -0
- package/docs/troubleshooting.md +102 -0
- package/package.json +20 -1
- package/src/cli.js +141 -2
- package/src/config.js +73 -6
- package/src/daemon.js +61 -6
- package/src/doctor.js +114 -0
- package/src/health.js +4 -0
- package/src/managed-process.js +9 -2
- package/src/release-group.js +33 -4
- package/test/config-validation.test.js +105 -0
- package/test/doctor.test.js +228 -0
- package/test/fixtures/crasher.js +2 -0
- package/test/health.test.js +63 -0
- package/test/logs.test.js +99 -0
- package/test/managed-process.test.js +60 -0
- package/test/package-metadata.test.js +29 -0
- package/test/release-retention.test.js +107 -0
- package/test/rollbridge.test.js +56 -5
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
# Deploy-tool recipes
|
|
2
|
+
|
|
3
|
+
Rollbridge is deploy-tool agnostic: it ships no plugins or tasks for any deploy
|
|
4
|
+
tool. Whatever you use — a shell script, CI, or Capistrano — drives Rollbridge
|
|
5
|
+
by **calling its CLI** (see [`cli.md`](cli.md)). The daemon is long-lived;
|
|
6
|
+
deploys just hand it a prepared release path.
|
|
7
|
+
|
|
8
|
+
The deploy contract is the same everywhere:
|
|
9
|
+
|
|
10
|
+
1. Prepare the release directory (checkout, install dependencies, build assets).
|
|
11
|
+
2. Run **backwards-compatible** migrations *before* switching traffic (the old
|
|
12
|
+
and new web releases overlap during the drain).
|
|
13
|
+
3. Run `rollbridge deploy` — it starts the new release, health-checks the
|
|
14
|
+
proxied process, switches traffic, then drains and stops the old release.
|
|
15
|
+
It exits non-zero (leaving the previous release active) if the new release
|
|
16
|
+
fails to start or health-check, so your script should stop on a failed
|
|
17
|
+
deploy.
|
|
18
|
+
|
|
19
|
+
Point `--config` at a stable, daemon-wide config file; release paths are passed
|
|
20
|
+
per deploy. `rollbridge deploy --ensure-daemon` starts the daemon first if it
|
|
21
|
+
isn't already running, so the recipes below work whether or not the daemon is
|
|
22
|
+
already managed by systemd.
|
|
23
|
+
|
|
24
|
+
## Shell script
|
|
25
|
+
|
|
26
|
+
```bash
|
|
27
|
+
#!/usr/bin/env bash
|
|
28
|
+
set -euo pipefail
|
|
29
|
+
|
|
30
|
+
app_dir=/srv/ticket-server
|
|
31
|
+
config=/etc/rollbridge/rollbridge.js
|
|
32
|
+
# Read the revision from the source repo (not the script's cwd, which may not be
|
|
33
|
+
# a checkout under cron/systemd/CI).
|
|
34
|
+
revision="$(git -C "$app_dir/repo" rev-parse HEAD)"
|
|
35
|
+
release_path="$app_dir/releases/$(date -u +%Y%m%d%H%M%S)-$revision"
|
|
36
|
+
|
|
37
|
+
# 1. Prepare the release.
|
|
38
|
+
git clone --depth 1 "$app_dir/repo" "$release_path"
|
|
39
|
+
(cd "$release_path" && npm ci && npm run build)
|
|
40
|
+
|
|
41
|
+
# 2. Run backwards-compatible migrations before switching traffic.
|
|
42
|
+
(cd "$release_path" && npx velocious db:migrate)
|
|
43
|
+
|
|
44
|
+
# 3. Switch traffic (and start the daemon if needed). A non-zero exit here means
|
|
45
|
+
# the new release failed health checks and the previous one is still active;
|
|
46
|
+
# `set -e` aborts the script so the bad release is not promoted.
|
|
47
|
+
rollbridge deploy \
|
|
48
|
+
--ensure-daemon \
|
|
49
|
+
--config "$config" \
|
|
50
|
+
--release-path "$release_path" \
|
|
51
|
+
--revision "$revision"
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
## CI
|
|
55
|
+
|
|
56
|
+
In CI, build/test the release, then run the same `rollbridge deploy` over SSH
|
|
57
|
+
on the target host (CI rarely runs the long-lived daemon itself):
|
|
58
|
+
|
|
59
|
+
```bash
|
|
60
|
+
# after the build/test job has produced a release at $RELEASE_PATH on the host
|
|
61
|
+
ssh deploy@app.example.com \
|
|
62
|
+
"rollbridge deploy --ensure-daemon \
|
|
63
|
+
--config /etc/rollbridge/rollbridge.js \
|
|
64
|
+
--release-path '$RELEASE_PATH' \
|
|
65
|
+
--revision '$GIT_SHA'"
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
`rollbridge deploy` exits non-zero on a failed health check, which fails the CI
|
|
69
|
+
step — no extra gating needed. Use `rollbridge validate --json` / `rollbridge
|
|
70
|
+
doctor --json` earlier in the pipeline if you want to fail fast before building.
|
|
71
|
+
|
|
72
|
+
## Capistrano
|
|
73
|
+
|
|
74
|
+
Rollbridge ships **no Capistrano plugin or tasks** — you only run its CLI as a
|
|
75
|
+
shell command from your own `deploy.rb`. Capistrano already uploads the release
|
|
76
|
+
to `release_path`, so the deploy step is a single `execute` of the CLI:
|
|
77
|
+
|
|
78
|
+
```ruby
|
|
79
|
+
# config/deploy.rb — just a shell command; no Rollbridge-specific Capistrano code.
|
|
80
|
+
after "deploy:publishing", "rollbridge:deploy"
|
|
81
|
+
|
|
82
|
+
namespace :rollbridge do
|
|
83
|
+
task :deploy do
|
|
84
|
+
on roles(:app) do
|
|
85
|
+
within release_path do
|
|
86
|
+
execute :npx, "velocious", "db:migrate"
|
|
87
|
+
end
|
|
88
|
+
execute "rollbridge", "deploy",
|
|
89
|
+
"--ensure-daemon",
|
|
90
|
+
"--config", "/etc/rollbridge/rollbridge.js",
|
|
91
|
+
"--release-path", release_path,
|
|
92
|
+
"--revision", fetch(:current_revision)
|
|
93
|
+
end
|
|
94
|
+
end
|
|
95
|
+
end
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
`execute` runs the command over SSH and raises if it exits non-zero, so a failed
|
|
99
|
+
Rollbridge health check fails the Capistrano deploy. Keep Capistrano's own
|
|
100
|
+
`linked_dirs`/`keep_releases` for on-disk release directories; Rollbridge only
|
|
101
|
+
manages the running processes and its own in-memory release records (see
|
|
102
|
+
`releaseRetention`).
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
# Troubleshooting
|
|
2
|
+
|
|
3
|
+
Start with these three commands — they diagnose most problems without guessing:
|
|
4
|
+
|
|
5
|
+
- `rollbridge validate` — config errors, with an example fix for each.
|
|
6
|
+
- `rollbridge doctor` — control socket reachability, socket-directory writability, and proxy-port availability before the daemon starts.
|
|
7
|
+
- `rollbridge status` / `rollbridge logs` — live release/process state, restart counts, exit codes, connection counts, and recent process output.
|
|
8
|
+
|
|
9
|
+
For scripting, `validate`, `doctor`, and `logs` accept a `--json` flag, and
|
|
10
|
+
`status` already prints JSON — so every command's output is easy to parse.
|
|
11
|
+
|
|
12
|
+
## Health-check failures
|
|
13
|
+
|
|
14
|
+
**Symptom.** `rollbridge deploy` exits non-zero with:
|
|
15
|
+
|
|
16
|
+
```
|
|
17
|
+
Health check failed for http://127.0.0.1:18182/ping: HTTP 503
|
|
18
|
+
```
|
|
19
|
+
|
|
20
|
+
(the reason is `HTTP <status>` or a connection error such as `ECONNREFUSED`). The
|
|
21
|
+
new release never went live; the previous release stays active.
|
|
22
|
+
|
|
23
|
+
**Diagnose.** The new release's `proxied` process didn't return a healthy
|
|
24
|
+
response in time. Check its output with `rollbridge logs --process <id>` and its
|
|
25
|
+
state/`exitCode` with `rollbridge status`. Common causes: the app doesn't listen
|
|
26
|
+
on the templated `{{port}}`, the `health.path` returns a non-2xx status, or the
|
|
27
|
+
app boots slower than `health.timeoutMs`.
|
|
28
|
+
|
|
29
|
+
**Fix.** Make the proxied command bind `{{port}}` and serve `health.path` with a
|
|
30
|
+
2xx status. For slow boots, raise `health.timeoutMs` or set `health.startDelayMs`
|
|
31
|
+
so probing begins after the app is up.
|
|
32
|
+
|
|
33
|
+
## Port conflicts / exhausted ranges
|
|
34
|
+
|
|
35
|
+
**Symptom.** A deploy fails with:
|
|
36
|
+
|
|
37
|
+
```
|
|
38
|
+
No available ports in range 18182-18299 (118 ports on 127.0.0.1): 0 reserved by this deploy, 118 already in use. Widen the port range, free a port, or check bind permissions.
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
**Diagnose.** The counts tell you which case it is:
|
|
42
|
+
|
|
43
|
+
- **reserved by this deploy** high → the range is too small for the processes that share it.
|
|
44
|
+
- **already in use** → another process (or an old release that has not finished draining) holds the ports.
|
|
45
|
+
- **could not be bound (e.g. EACCES)** → permission problem, e.g. a privileged (`<1024`) port.
|
|
46
|
+
|
|
47
|
+
`rollbridge doctor` reports whether the configured `proxy.port` is bindable.
|
|
48
|
+
|
|
49
|
+
**Fix.** Widen the process's `port` range, free the conflicting port (`ss -ltnp`
|
|
50
|
+
or `lsof -i :<port>` to find the holder), or avoid privileged ports / grant the
|
|
51
|
+
needed capability.
|
|
52
|
+
|
|
53
|
+
## Stale or busy control socket
|
|
54
|
+
|
|
55
|
+
**Symptom.** `rollbridge daemon` (or `ensure-daemon`) errors with one of:
|
|
56
|
+
|
|
57
|
+
```
|
|
58
|
+
A Rollbridge daemon for application "ticket-server" is already running on /tmp/rollbridge-ticket-server.sock (active release: v3). Run "rollbridge status" to inspect it or "rollbridge shutdown" to stop it, or set a different control.path.
|
|
59
|
+
The control socket /tmp/rollbridge-ticket-server.sock is already in use by another process. Stop that process or set a different control.path.
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
**Diagnose.** Run `rollbridge status` (does a daemon answer?) and `rollbridge
|
|
63
|
+
doctor` (control-socket check). A leftover socket *file* with no live daemon
|
|
64
|
+
behind it is removed automatically the next time the daemon starts — no action
|
|
65
|
+
needed.
|
|
66
|
+
|
|
67
|
+
**Fix.** If a Rollbridge daemon is already running, use it, or
|
|
68
|
+
`rollbridge shutdown` before starting another. If a non-Rollbridge process owns
|
|
69
|
+
the path, stop it or point `control.path` somewhere else.
|
|
70
|
+
|
|
71
|
+
## Crash loops
|
|
72
|
+
|
|
73
|
+
**Symptom.** `rollbridge status` shows a process with a climbing `restarts`
|
|
74
|
+
count and a `state` that flips between `running` and `failed`, with repeated
|
|
75
|
+
`process started` / `process exited` log lines.
|
|
76
|
+
|
|
77
|
+
**Diagnose.** `rollbridge logs --process <id>` shows the crash output;
|
|
78
|
+
`rollbridge status` shows `exitCode`, `exitSignal`, `restarts`, and `uptimeMs`
|
|
79
|
+
(a tiny `uptimeMs` that keeps resetting is a fast crash loop). Crashed
|
|
80
|
+
active-release and `service` processes auto-restart after `restartDelayMs`.
|
|
81
|
+
|
|
82
|
+
**Fix.** Correct the command, environment, or dependency that makes the process
|
|
83
|
+
exit; raise `restartDelayMs` to slow a tight loop. Note that a release which
|
|
84
|
+
fails its health check never receives traffic, so a crash-looping proxied
|
|
85
|
+
process in a *failed* deploy does not take the site down — the previous release
|
|
86
|
+
stays active.
|
|
87
|
+
|
|
88
|
+
## Stuck draining releases
|
|
89
|
+
|
|
90
|
+
**Symptom.** Long after a deploy, `rollbridge status` still shows an old release
|
|
91
|
+
in `state: "draining"` with non-zero `connections` (often `websocket`).
|
|
92
|
+
|
|
93
|
+
**Diagnose.** Long-lived connections (WebSockets, SSE, streaming responses) keep
|
|
94
|
+
the retired release alive until they close or `proxy.drainTimeoutMs` elapses.
|
|
95
|
+
`status` shows the release's `connections.http`/`connections.websocket` and
|
|
96
|
+
`drainStartedAt`.
|
|
97
|
+
|
|
98
|
+
**Fix.** Draining ends automatically when those connections close, or after
|
|
99
|
+
`proxy.drainTimeoutMs` (then the release is stopped regardless). Lower
|
|
100
|
+
`proxy.drainTimeoutMs` to force-stop sooner, or make clients reconnect (for
|
|
101
|
+
example, have the front end close idle WebSockets on deploy). Once stopped, the
|
|
102
|
+
release is pruned per `releaseRetention`.
|
package/package.json
CHANGED
|
@@ -1,7 +1,26 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "rollbridge",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.4",
|
|
4
4
|
"description": "Zero-downtime process supervisor and local traffic switcher for deploy-managed apps.",
|
|
5
|
+
"keywords": [
|
|
6
|
+
"deploy",
|
|
7
|
+
"zero-downtime",
|
|
8
|
+
"process-supervisor",
|
|
9
|
+
"reverse-proxy",
|
|
10
|
+
"websocket",
|
|
11
|
+
"rollbridge",
|
|
12
|
+
"velocious"
|
|
13
|
+
],
|
|
14
|
+
"homepage": "https://github.com/kaspernj/rollbridge#readme",
|
|
15
|
+
"bugs": {
|
|
16
|
+
"url": "https://github.com/kaspernj/rollbridge/issues"
|
|
17
|
+
},
|
|
18
|
+
"license": "MIT",
|
|
19
|
+
"author": "kaspernj <kasper@diestoeckels.de>",
|
|
20
|
+
"repository": {
|
|
21
|
+
"type": "git",
|
|
22
|
+
"url": "git+https://github.com/kaspernj/rollbridge.git"
|
|
23
|
+
},
|
|
5
24
|
"type": "module",
|
|
6
25
|
"bin": {
|
|
7
26
|
"rollbridge": "./bin/rollbridge"
|
package/src/cli.js
CHANGED
|
@@ -7,6 +7,7 @@ import {spawn} from "node:child_process"
|
|
|
7
7
|
import {Command} from "commander"
|
|
8
8
|
import RollbridgeDaemon from "./daemon.js"
|
|
9
9
|
import {loadConfig, parseConfigFile, resolveConfigPath, validateConfig} from "./config.js"
|
|
10
|
+
import {runEnvironmentChecks} from "./doctor.js"
|
|
10
11
|
import {sendControlCommand} from "./control-client.js"
|
|
11
12
|
|
|
12
13
|
const DEFAULT_DAEMON_START_TIMEOUT_MS = 10000
|
|
@@ -153,20 +154,33 @@ export async function runCli(argv) {
|
|
|
153
154
|
.command("validate")
|
|
154
155
|
.description("Parse the config and report all errors without starting the daemon.")
|
|
155
156
|
.option("-c, --config <path>", "Config file path (defaults to rollbridge.js)")
|
|
157
|
+
.option("--json", "Output machine-readable JSON")
|
|
156
158
|
.action(async (options) => {
|
|
157
159
|
let configPath
|
|
158
160
|
|
|
159
161
|
try {
|
|
160
162
|
configPath = await resolveConfigPath(options.config)
|
|
161
163
|
} catch (error) {
|
|
162
|
-
|
|
164
|
+
const message = error instanceof Error ? error.message : String(error)
|
|
165
|
+
|
|
166
|
+
if (options.json) console.log(JSON.stringify({config: null, issues: [{fix: "Pass --config or add a rollbridge.js.", message}], path: null, valid: false}, null, 2))
|
|
167
|
+
else console.error(message)
|
|
163
168
|
process.exitCode = 1
|
|
164
169
|
return
|
|
165
170
|
}
|
|
166
171
|
|
|
167
172
|
const {config, issues} = await validateConfigFile(configPath)
|
|
173
|
+
const valid = issues.length === 0
|
|
174
|
+
|
|
175
|
+
if (options.json) {
|
|
176
|
+
const summary = valid ? {application: config.application, processes: config.processes.length, proxy: {host: config.proxy.host, port: config.proxy.port}} : null
|
|
177
|
+
|
|
178
|
+
console.log(JSON.stringify({config: summary, issues, path: configPath, valid}, null, 2))
|
|
179
|
+
if (!valid) process.exitCode = 1
|
|
180
|
+
return
|
|
181
|
+
}
|
|
168
182
|
|
|
169
|
-
if (
|
|
183
|
+
if (valid) {
|
|
170
184
|
const processCount = config.processes.length
|
|
171
185
|
|
|
172
186
|
console.log(`${configPath} is valid: ${processCount} ${processCount === 1 ? "process" : "processes"}, proxy on ${config.proxy.host}:${config.proxy.port}.`)
|
|
@@ -183,9 +197,134 @@ export async function runCli(argv) {
|
|
|
183
197
|
process.exitCode = 1
|
|
184
198
|
})
|
|
185
199
|
|
|
200
|
+
program
|
|
201
|
+
.command("doctor")
|
|
202
|
+
.description("Check the environment before starting the daemon: config, control socket, and proxy port.")
|
|
203
|
+
.option("-c, --config <path>", "Config file path (defaults to rollbridge.js)")
|
|
204
|
+
.option("--json", "Output machine-readable JSON")
|
|
205
|
+
.action(async (options) => {
|
|
206
|
+
let configPath
|
|
207
|
+
|
|
208
|
+
try {
|
|
209
|
+
configPath = await resolveConfigPath(options.config)
|
|
210
|
+
} catch (error) {
|
|
211
|
+
const message = error instanceof Error ? error.message : String(error)
|
|
212
|
+
|
|
213
|
+
if (options.json) console.log(JSON.stringify({checks: [{detail: message, name: "config", ok: false}], ok: false}, null, 2))
|
|
214
|
+
else console.error(message)
|
|
215
|
+
process.exitCode = 1
|
|
216
|
+
return
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
const {config, issues} = await validateConfigFile(configPath)
|
|
220
|
+
/** @type {import("./doctor.js").DoctorCheck[]} */
|
|
221
|
+
const checks = []
|
|
222
|
+
|
|
223
|
+
if (issues.length > 0) {
|
|
224
|
+
checks.push({detail: `${issues.length} ${issues.length === 1 ? "issue" : "issues"} — run "rollbridge validate" for details`, name: "config", ok: false})
|
|
225
|
+
} else {
|
|
226
|
+
checks.push({detail: `valid: ${config.processes.length} ${config.processes.length === 1 ? "process" : "processes"}, proxy on ${config.proxy.host}:${config.proxy.port}`, name: "config", ok: true})
|
|
227
|
+
checks.push(...await runEnvironmentChecks(config))
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
const failed = checks.filter((check) => !check.ok).length
|
|
231
|
+
|
|
232
|
+
if (options.json) {
|
|
233
|
+
console.log(JSON.stringify({checks, ok: failed === 0}, null, 2))
|
|
234
|
+
} else {
|
|
235
|
+
for (const check of checks) {
|
|
236
|
+
console.log(`${check.ok ? "✓" : "✗"} ${check.name}: ${check.detail}`)
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
if (failed === 0) console.log("\nAll checks passed.")
|
|
240
|
+
else console.error(`\n${failed} check${failed === 1 ? "" : "s"} failed.`)
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
if (failed > 0) process.exitCode = 1
|
|
244
|
+
})
|
|
245
|
+
|
|
246
|
+
program
|
|
247
|
+
.command("logs")
|
|
248
|
+
.description("Print recent stdout/stderr captured from managed processes.")
|
|
249
|
+
.option("-c, --config <path>", "Config file path (defaults to rollbridge.js)")
|
|
250
|
+
.option("--process <id>", "Only show logs for the process with this id")
|
|
251
|
+
.option("--json", "Output machine-readable JSON")
|
|
252
|
+
.action(async (options) => {
|
|
253
|
+
const configPath = await resolveConfigPath(options.config)
|
|
254
|
+
const config = await loadConfig(configPath)
|
|
255
|
+
const response = await sendControlCommand({
|
|
256
|
+
command: {command: "status"},
|
|
257
|
+
path: config.control.path
|
|
258
|
+
})
|
|
259
|
+
const sources = collectLogSources(/** @type {import("./daemon.js").DaemonStatus} */ (response))
|
|
260
|
+
|
|
261
|
+
if (options.json) {
|
|
262
|
+
const filtered = options.process === undefined ? sources : sources.filter((source) => source.id === options.process)
|
|
263
|
+
|
|
264
|
+
console.log(JSON.stringify(filtered, null, 2))
|
|
265
|
+
return
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
console.log(formatLogSources(sources, options.process))
|
|
269
|
+
})
|
|
270
|
+
|
|
186
271
|
await program.parseAsync(argv)
|
|
187
272
|
}
|
|
188
273
|
|
|
274
|
+
/**
|
|
275
|
+
* @typedef {{id: string, logs: import("./managed-process.js").ManagedProcessLog[], source: string}} LogSource
|
|
276
|
+
*/
|
|
277
|
+
|
|
278
|
+
/**
|
|
279
|
+
* Flattens managed-process logs from a daemon status payload, labelling each process by origin.
|
|
280
|
+
* @param {import("./daemon.js").DaemonStatus} status - Daemon status payload.
|
|
281
|
+
* @returns {LogSource[]} One entry per managed process.
|
|
282
|
+
*/
|
|
283
|
+
function collectLogSources(status) {
|
|
284
|
+
/** @type {LogSource[]} */
|
|
285
|
+
const sources = []
|
|
286
|
+
|
|
287
|
+
for (const release of status.releases) {
|
|
288
|
+
for (const processStatus of release.processes) {
|
|
289
|
+
sources.push({id: processStatus.id, logs: processStatus.logs, source: `release ${release.releaseId} (${release.state})`})
|
|
290
|
+
}
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
for (const service of status.services) {
|
|
294
|
+
sources.push({id: service.process.id, logs: service.process.logs, source: "service"})
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
for (const singleton of status.singletons) {
|
|
298
|
+
sources.push({id: singleton.process.id, logs: singleton.process.logs, source: "singleton"})
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
return sources
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
/**
|
|
305
|
+
* Formats collected log sources for display, optionally filtered to a single process id.
|
|
306
|
+
* @param {LogSource[]} sources - Collected log sources.
|
|
307
|
+
* @param {string | undefined} processFilter - Only include the process with this id when set.
|
|
308
|
+
* @returns {string} Human-readable log output.
|
|
309
|
+
*/
|
|
310
|
+
export function formatLogSources(sources, processFilter) {
|
|
311
|
+
const matched = processFilter === undefined ? sources : sources.filter((source) => source.id === processFilter)
|
|
312
|
+
|
|
313
|
+
if (matched.length === 0) {
|
|
314
|
+
return processFilter === undefined ? "No managed processes." : `No process found with id "${processFilter}".`
|
|
315
|
+
}
|
|
316
|
+
|
|
317
|
+
return matched
|
|
318
|
+
.map((source) => {
|
|
319
|
+
const header = `== ${source.id} [${source.source}] ==`
|
|
320
|
+
|
|
321
|
+
if (source.logs.length === 0) return `${header}\n (no recent output)`
|
|
322
|
+
|
|
323
|
+
return `${header}\n${source.logs.map((log) => ` ${log.at} [${log.stream}] ${log.line}`).join("\n")}`
|
|
324
|
+
})
|
|
325
|
+
.join("\n\n")
|
|
326
|
+
}
|
|
327
|
+
|
|
189
328
|
/**
|
|
190
329
|
* Reads, parses, and validates a config file, collecting read, parse, and validation issues.
|
|
191
330
|
* @param {string} configPath - Config file path.
|
package/src/config.js
CHANGED
|
@@ -7,12 +7,13 @@ import {pathToFileURL} from "node:url"
|
|
|
7
7
|
/**
|
|
8
8
|
* @typedef {import("./json.js").JsonValue} JsonValue
|
|
9
9
|
* @typedef {{from: number, to: number}} PortRange
|
|
10
|
-
* @typedef {{path: string, timeoutMs: number, intervalMs: number}} HealthConfig
|
|
10
|
+
* @typedef {{path: string, startDelayMs: number, timeoutMs: number, intervalMs: number}} HealthConfig
|
|
11
11
|
* @typedef {"proxied" | "companion" | "singleton" | "service"} ProcessPolicy
|
|
12
12
|
* @typedef {{cwd?: string, env: Record<string, string>, gracefulStopMs: number, health?: HealthConfig, id: string, outputLines: number, policy: ProcessPolicy, port?: PortRange, restartDelayMs: number, command: string}} ProcessConfig
|
|
13
13
|
* @typedef {{mode?: number, path: string}} ControlConfig
|
|
14
|
-
* @typedef {{drainTimeoutMs: number, forceStopTimeoutMs: number, healthPath: string, healthTimeoutMs: number, host: string, port: number}} ProxyConfig
|
|
15
|
-
* @typedef {{
|
|
14
|
+
* @typedef {{drainTimeoutMs: number, forceStopTimeoutMs: number, healthPath: string, healthTimeoutMs: number, host: string, port: number, upstreamHost: string}} ProxyConfig
|
|
15
|
+
* @typedef {{keep: number, maxAgeMs: number}} ReleaseRetentionConfig
|
|
16
|
+
* @typedef {{application: string, control: ControlConfig, processes: ProcessConfig[], proxy: ProxyConfig, releaseRetention: ReleaseRetentionConfig}} RollbridgeConfig
|
|
16
17
|
* @typedef {{fix: string, message: string}} ConfigIssue
|
|
17
18
|
*/
|
|
18
19
|
|
|
@@ -127,10 +128,11 @@ export function validateConfig(rawConfig, configPath = process.cwd()) {
|
|
|
127
128
|
path: normalizeString(controlSource.path, "control.path", issues, {default: `/tmp/rollbridge-${application}.sock`})
|
|
128
129
|
}
|
|
129
130
|
const processes = processesSource.map((processSource, index) => normalizeProcess(processSource, index, proxy, issues))
|
|
131
|
+
const releaseRetention = normalizeReleaseRetention(objectAt(source.releaseRetention, "releaseRetention", issues, {}), issues)
|
|
130
132
|
|
|
131
133
|
validateProcessSet(processes, issues)
|
|
132
134
|
|
|
133
|
-
return {config: {application, control, processes, proxy}, issues}
|
|
135
|
+
return {config: {application, control, processes, proxy, releaseRetention}, issues}
|
|
134
136
|
}
|
|
135
137
|
|
|
136
138
|
/**
|
|
@@ -139,16 +141,29 @@ export function validateConfig(rawConfig, configPath = process.cwd()) {
|
|
|
139
141
|
* @returns {ProxyConfig} Normalized proxy config.
|
|
140
142
|
*/
|
|
141
143
|
function normalizeProxy(source, issues) {
|
|
144
|
+
const host = normalizeString(source.host, "proxy.host", issues, {default: "127.0.0.1"})
|
|
145
|
+
|
|
142
146
|
return {
|
|
143
147
|
drainTimeoutMs: normalizeNumber(source.drainTimeoutMs, "proxy.drainTimeoutMs", issues, {default: 60000}),
|
|
144
148
|
forceStopTimeoutMs: normalizeNumber(source.forceStopTimeoutMs, "proxy.forceStopTimeoutMs", issues, {default: 10000}),
|
|
145
149
|
healthPath: normalizeString(source.healthPath, "proxy.healthPath", issues, {default: "/ping"}),
|
|
146
150
|
healthTimeoutMs: normalizeNumber(source.healthTimeoutMs, "proxy.healthTimeoutMs", issues, {default: 30000}),
|
|
147
|
-
host
|
|
148
|
-
port: normalizeNumber(source.port, "proxy.port", issues, {default: 8182})
|
|
151
|
+
host,
|
|
152
|
+
port: normalizeNumber(source.port, "proxy.port", issues, {default: 8182}),
|
|
153
|
+
upstreamHost: normalizeString(source.upstreamHost, "proxy.upstreamHost", issues, {default: defaultUpstreamHost(host)})
|
|
149
154
|
}
|
|
150
155
|
}
|
|
151
156
|
|
|
157
|
+
/**
|
|
158
|
+
* @param {string} host - Public proxy bind host.
|
|
159
|
+
* @returns {string} Default loopback upstream host for wildcard binds.
|
|
160
|
+
*/
|
|
161
|
+
function defaultUpstreamHost(host) {
|
|
162
|
+
if (host === "0.0.0.0" || host === "::") return "127.0.0.1"
|
|
163
|
+
|
|
164
|
+
return host
|
|
165
|
+
}
|
|
166
|
+
|
|
152
167
|
/**
|
|
153
168
|
* @param {JsonValue} value - Raw process config.
|
|
154
169
|
* @param {number} index - Process index.
|
|
@@ -197,6 +212,39 @@ function normalizeOutputLines(value, key, issues) {
|
|
|
197
212
|
return outputLines
|
|
198
213
|
}
|
|
199
214
|
|
|
215
|
+
/**
|
|
216
|
+
* @param {Record<string, JsonValue>} source - Raw release retention config.
|
|
217
|
+
* @param {ConfigIssue[]} issues - Issue collector.
|
|
218
|
+
* @returns {ReleaseRetentionConfig} Normalized release retention policy.
|
|
219
|
+
*/
|
|
220
|
+
function normalizeReleaseRetention(source, issues) {
|
|
221
|
+
const keep = normalizeNumber(source.keep, "releaseRetention.keep", issues, {default: 10})
|
|
222
|
+
const maxAgeMs = normalizeNumber(source.maxAgeMs, "releaseRetention.maxAgeMs", issues, {default: 0})
|
|
223
|
+
|
|
224
|
+
return {
|
|
225
|
+
keep: nonNegativeOrDefault(keep, "releaseRetention.keep", issues, 10, true),
|
|
226
|
+
maxAgeMs: nonNegativeOrDefault(maxAgeMs, "releaseRetention.maxAgeMs", issues, 0, false)
|
|
227
|
+
}
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
/**
|
|
231
|
+
* @param {number} value - Already type-normalized number.
|
|
232
|
+
* @param {string} key - Config key.
|
|
233
|
+
* @param {ConfigIssue[]} issues - Issue collector.
|
|
234
|
+
* @param {number} fallback - Value to use when invalid.
|
|
235
|
+
* @param {boolean} requireInteger - Whether the value must be an integer.
|
|
236
|
+
* @returns {number} The value when non-negative (and integer when required), else the fallback.
|
|
237
|
+
*/
|
|
238
|
+
function nonNegativeOrDefault(value, key, issues, fallback, requireInteger) {
|
|
239
|
+
if (value < 0 || (requireInteger && !Number.isInteger(value))) {
|
|
240
|
+
issues.push({fix: `Set ${key} to a non-negative ${requireInteger ? "integer" : "number"}, e.g. ${fallback}.`, message: `${key} must be a non-negative ${requireInteger ? "integer" : "number"}`})
|
|
241
|
+
|
|
242
|
+
return fallback
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
return value
|
|
246
|
+
}
|
|
247
|
+
|
|
200
248
|
/**
|
|
201
249
|
* @param {JsonValue} value - Raw socket permission mode.
|
|
202
250
|
* @param {string} key - Config key.
|
|
@@ -291,10 +339,29 @@ function normalizeHealth(value, key, proxy, issues) {
|
|
|
291
339
|
return {
|
|
292
340
|
intervalMs: normalizeNumber(source.intervalMs, `${key}.intervalMs`, issues, {default: 250}),
|
|
293
341
|
path: normalizeString(source.path, `${key}.path`, issues, {default: proxy.healthPath}),
|
|
342
|
+
startDelayMs: normalizeStartDelayMs(source.startDelayMs, `${key}.startDelayMs`, issues),
|
|
294
343
|
timeoutMs: normalizeNumber(source.timeoutMs, `${key}.timeoutMs`, issues, {default: proxy.healthTimeoutMs})
|
|
295
344
|
}
|
|
296
345
|
}
|
|
297
346
|
|
|
347
|
+
/**
|
|
348
|
+
* @param {JsonValue} value - Raw startup delay.
|
|
349
|
+
* @param {string} key - Config key.
|
|
350
|
+
* @param {ConfigIssue[]} issues - Issue collector.
|
|
351
|
+
* @returns {number} Milliseconds to wait before the first health probe (default 0).
|
|
352
|
+
*/
|
|
353
|
+
function normalizeStartDelayMs(value, key, issues) {
|
|
354
|
+
const startDelayMs = normalizeNumber(value, key, issues, {default: 0})
|
|
355
|
+
|
|
356
|
+
if (startDelayMs < 0) {
|
|
357
|
+
issues.push({fix: `Set ${key} to a non-negative number of milliseconds, e.g. 0 or 2000.`, message: `${key} must be a non-negative number`})
|
|
358
|
+
|
|
359
|
+
return 0
|
|
360
|
+
}
|
|
361
|
+
|
|
362
|
+
return startDelayMs
|
|
363
|
+
}
|
|
364
|
+
|
|
298
365
|
/**
|
|
299
366
|
* @param {JsonValue} value - Raw env config.
|
|
300
367
|
* @param {string} key - Config key.
|
package/src/daemon.js
CHANGED
|
@@ -10,7 +10,7 @@ import ReleaseGroup from "./release-group.js"
|
|
|
10
10
|
* @typedef {import("./json.js").JsonValue} JsonValue
|
|
11
11
|
* @typedef {{releaseId?: string, releasePath: string, revision?: string}} DeployArgs
|
|
12
12
|
* @typedef {{id: string, process: import("./managed-process.js").ManagedProcessStatus}} ProcessStatus
|
|
13
|
-
* @typedef {{activeReleaseId: string | null, application: string, control: import("./config.js").ControlConfig, proxy: {host: string, port: number | undefined}, releases: import("./release-group.js").ReleaseStatus[], services: ProcessStatus[], singletons: ProcessStatus[]}} DaemonStatus
|
|
13
|
+
* @typedef {{activeReleaseId: string | null, application: string, control: import("./config.js").ControlConfig, proxy: {host: string, port: number | undefined, upstreamHost: string}, releases: import("./release-group.js").ReleaseStatus[], services: ProcessStatus[], singletons: ProcessStatus[]}} DaemonStatus
|
|
14
14
|
*/
|
|
15
15
|
|
|
16
16
|
export default class RollbridgeDaemon {
|
|
@@ -286,9 +286,7 @@ export default class RollbridgeDaemon {
|
|
|
286
286
|
await this.replaceSingletons(release)
|
|
287
287
|
|
|
288
288
|
if (previousRelease) {
|
|
289
|
-
|
|
290
|
-
this.logger("release drain failed", {error: error instanceof Error ? error.message : String(error), releaseId: previousRelease.releaseId})
|
|
291
|
-
})
|
|
289
|
+
void this.drainAndPrune(previousRelease)
|
|
292
290
|
}
|
|
293
291
|
|
|
294
292
|
return {
|
|
@@ -407,6 +405,31 @@ export default class RollbridgeDaemon {
|
|
|
407
405
|
if (release === this.activeRelease) this.activeRelease = undefined
|
|
408
406
|
|
|
409
407
|
await release.stop()
|
|
408
|
+
this.pruneStoppedReleases()
|
|
409
|
+
}
|
|
410
|
+
|
|
411
|
+
/**
|
|
412
|
+
* Drains and stops a retired release in the background, then prunes stopped releases.
|
|
413
|
+
* @param {ReleaseGroup} release - Release to drain and stop.
|
|
414
|
+
* @returns {Promise<void>} Resolves once drained, stopped, and pruned.
|
|
415
|
+
*/
|
|
416
|
+
async drainAndPrune(release) {
|
|
417
|
+
try {
|
|
418
|
+
await release.drainAndStop(this.config.proxy.drainTimeoutMs)
|
|
419
|
+
} catch (error) {
|
|
420
|
+
this.logger("release drain failed", {error: error instanceof Error ? error.message : String(error), releaseId: release.releaseId})
|
|
421
|
+
} finally {
|
|
422
|
+
this.pruneStoppedReleases()
|
|
423
|
+
}
|
|
424
|
+
}
|
|
425
|
+
|
|
426
|
+
/** @returns {void} Removes stopped releases beyond the retention policy. */
|
|
427
|
+
pruneStoppedReleases() {
|
|
428
|
+
const statuses = [...this.releases.values()].map((release) => release.status())
|
|
429
|
+
|
|
430
|
+
for (const releaseId of releasesToPrune(statuses, this.config.releaseRetention, Date.now())) {
|
|
431
|
+
this.releases.delete(releaseId)
|
|
432
|
+
}
|
|
410
433
|
}
|
|
411
434
|
|
|
412
435
|
/** @returns {Promise<void>} Stops proxy, control socket, and child processes. */
|
|
@@ -446,7 +469,8 @@ export default class RollbridgeDaemon {
|
|
|
446
469
|
control: {...this.config.control},
|
|
447
470
|
proxy: {
|
|
448
471
|
host: this.config.proxy.host,
|
|
449
|
-
port: this.proxyPort ?? this.config.proxy.port
|
|
472
|
+
port: this.proxyPort ?? this.config.proxy.port,
|
|
473
|
+
upstreamHost: this.config.proxy.upstreamHost
|
|
450
474
|
},
|
|
451
475
|
releases: [...this.releases.values()].map((release) => release.status()),
|
|
452
476
|
services: [...this.services.entries()].map(([id, processInstance]) => ({
|
|
@@ -485,6 +509,37 @@ function requiredString(value, key) {
|
|
|
485
509
|
return value
|
|
486
510
|
}
|
|
487
511
|
|
|
512
|
+
/**
|
|
513
|
+
* @typedef {{releaseId: string, state: string, stoppedAt: string | undefined}} PrunableRelease
|
|
514
|
+
*/
|
|
515
|
+
|
|
516
|
+
/**
|
|
517
|
+
* Selects stopped releases to prune by the retention policy, keeping the most recent.
|
|
518
|
+
* @param {PrunableRelease[]} releases - Status of all tracked releases, in deploy order (oldest first).
|
|
519
|
+
* @param {import("./config.js").ReleaseRetentionConfig} policy - Retention policy.
|
|
520
|
+
* @param {number} now - Current epoch milliseconds.
|
|
521
|
+
* @returns {string[]} Release ids to remove.
|
|
522
|
+
*/
|
|
523
|
+
export function releasesToPrune(releases, policy, now) {
|
|
524
|
+
const stopped = releases
|
|
525
|
+
.filter((release) => release.state === "stopped")
|
|
526
|
+
.map((release, index) => ({deployOrder: index, releaseId: release.releaseId, stoppedAtMs: release.stoppedAt ? Date.parse(release.stoppedAt) : 0}))
|
|
527
|
+
// Most recent first; ties (same stoppedAt millisecond) prefer the later-deployed release.
|
|
528
|
+
.sort((first, second) => second.stoppedAtMs - first.stoppedAtMs || second.deployOrder - first.deployOrder)
|
|
529
|
+
|
|
530
|
+
/** @type {string[]} */
|
|
531
|
+
const remove = []
|
|
532
|
+
|
|
533
|
+
stopped.forEach((release, index) => {
|
|
534
|
+
const beyondKeep = index >= policy.keep
|
|
535
|
+
const tooOld = policy.maxAgeMs > 0 && release.stoppedAtMs > 0 && now - release.stoppedAtMs > policy.maxAgeMs
|
|
536
|
+
|
|
537
|
+
if (beyondKeep || tooOld) remove.push(release.releaseId)
|
|
538
|
+
})
|
|
539
|
+
|
|
540
|
+
return remove
|
|
541
|
+
}
|
|
542
|
+
|
|
488
543
|
/**
|
|
489
544
|
* @typedef {{alive: boolean, application?: string, activeReleaseId?: string | null}} ControlSocketInspection
|
|
490
545
|
*/
|
|
@@ -512,7 +567,7 @@ function controlSocketBusyMessage(socketPath, inspection) {
|
|
|
512
567
|
* @param {number} [timeoutMs] - How long to wait for a status response before treating the socket as busy.
|
|
513
568
|
* @returns {Promise<ControlSocketInspection>} Whether the socket is live and, when it is Rollbridge, its identity.
|
|
514
569
|
*/
|
|
515
|
-
async function inspectControlSocket(socketPath, timeoutMs = 1000) {
|
|
570
|
+
export async function inspectControlSocket(socketPath, timeoutMs = 1000) {
|
|
516
571
|
return await new Promise((resolve, reject) => {
|
|
517
572
|
const socket = net.createConnection(socketPath)
|
|
518
573
|
let buffer = ""
|