rollbridge 0.1.1 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +227 -5
- package/TODO.md +23 -19
- package/docs/cli.md +151 -0
- package/docs/config.md +128 -0
- package/docs/deploy-recipes.md +102 -0
- package/docs/troubleshooting.md +102 -0
- package/examples/rollbridge.service +48 -0
- package/package.json +20 -1
- package/src/cli.js +141 -2
- package/src/config.js +119 -9
- package/src/daemon.js +66 -6
- package/src/doctor.js +114 -0
- package/src/health.js +4 -0
- package/src/managed-process.js +17 -7
- package/src/release-group.js +35 -4
- package/test/config-validation.test.js +167 -0
- package/test/control-protocol.test.js +94 -0
- package/test/doctor.test.js +228 -0
- package/test/fixtures/crasher.js +2 -0
- package/test/health.test.js +63 -0
- package/test/logs.test.js +99 -0
- package/test/managed-process.test.js +106 -0
- package/test/package-metadata.test.js +29 -0
- package/test/proxy.test.js +128 -0
- package/test/release-group.test.js +58 -0
- package/test/release-retention.test.js +107 -0
- package/test/rollbridge.test.js +79 -5
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 kaspernj
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
CHANGED
|
@@ -37,6 +37,7 @@ export default {
|
|
|
37
37
|
proxy: {
|
|
38
38
|
host: "127.0.0.1",
|
|
39
39
|
port: 8182,
|
|
40
|
+
upstreamHost: "127.0.0.1",
|
|
40
41
|
healthPath: "/ping",
|
|
41
42
|
healthTimeoutMs: 30000,
|
|
42
43
|
drainTimeoutMs: 60000,
|
|
@@ -55,7 +56,8 @@ export default {
|
|
|
55
56
|
id: "background-jobs-worker",
|
|
56
57
|
policy: "companion",
|
|
57
58
|
cwd: "{{releasePath}}",
|
|
58
|
-
command: "npx velocious background-jobs-worker"
|
|
59
|
+
command: "npx velocious background-jobs-worker",
|
|
60
|
+
outputLines: 200
|
|
59
61
|
},
|
|
60
62
|
{
|
|
61
63
|
id: "background-jobs-main",
|
|
@@ -75,6 +77,31 @@ export default {
|
|
|
75
77
|
}
|
|
76
78
|
```
|
|
77
79
|
|
|
80
|
+
Each process retains its most recent stdout/stderr lines and reports them in
|
|
81
|
+
`status`. Set `outputLines` (a positive integer, default 50) per process to keep
|
|
82
|
+
more or fewer lines for chatty or quiet processes.
|
|
83
|
+
|
|
84
|
+
Set `control.mode` to an octal permission string (for example `"660"`) to
|
|
85
|
+
chmod the control socket after it binds. This restricts which users can send
|
|
86
|
+
control commands — useful when several deploy users share a group. When unset,
|
|
87
|
+
the socket keeps the default permissions from the daemon's umask.
|
|
88
|
+
|
|
89
|
+
Set the proxied process's `health.startDelayMs` (default `0`) to wait that long
|
|
90
|
+
after the process starts before the first health probe — like a readiness
|
|
91
|
+
probe's initial delay, useful for apps with a known boot time. The delay runs
|
|
92
|
+
before the `health.timeoutMs` window begins.
|
|
93
|
+
|
|
94
|
+
Set `releaseRetention` to bound how many stopped (drained) releases the daemon
|
|
95
|
+
keeps in memory and reports in `status`. `keep` (default `10`) retains the most
|
|
96
|
+
recent stopped releases; `maxAgeMs` (default `0`, disabled) also prunes stopped
|
|
97
|
+
releases older than that many milliseconds. The active and draining releases are
|
|
98
|
+
never pruned. This is Rollbridge's own release records — your deploy tool still
|
|
99
|
+
owns cleaning up on-disk release directories.
|
|
100
|
+
|
|
101
|
+
```js
|
|
102
|
+
releaseRetention: {keep: 5, maxAgeMs: 86400000}
|
|
103
|
+
```
|
|
104
|
+
|
|
78
105
|
A function export receives no arguments and lets you build the config at load
|
|
79
106
|
time:
|
|
80
107
|
|
|
@@ -90,15 +117,127 @@ export default () => ({
|
|
|
90
117
|
})
|
|
91
118
|
```
|
|
92
119
|
|
|
120
|
+
### Template variables
|
|
121
|
+
|
|
122
|
+
A process `command`, `cwd`, and `env` values support `{{...}}` placeholders
|
|
123
|
+
rendered when the process starts:
|
|
124
|
+
|
|
125
|
+
- `{{releasePath}}`, `{{releaseId}}`, `{{revision}}`, `{{application}}`, `{{processId}}`
|
|
126
|
+
- `{{port}}` — the port allocated to this process; `{{ports.<id>}}` — another process's allocated port
|
|
127
|
+
- `{{proxy.host}}`, `{{proxy.port}}`, `{{proxy.upstreamHost}}`
|
|
128
|
+
- `{{env.<NAME>}}` — a variable from the daemon's own environment, e.g. `{{env.HOME}}`
|
|
129
|
+
|
|
130
|
+
Referencing a placeholder with no value (including an unset `{{env.<NAME>}}`)
|
|
131
|
+
fails the process start with a clear error, so typos surface immediately.
|
|
132
|
+
|
|
93
133
|
Production-ready examples live in `examples/`, including
|
|
94
134
|
`examples/tensorbuzz.com.js` for the current TensorBuzz backend deployment.
|
|
95
135
|
|
|
136
|
+
See [`docs/config.md`](docs/config.md) for the full config reference — every
|
|
137
|
+
field, its default, validation rules, template variables, and the environment
|
|
138
|
+
variables Rollbridge injects.
|
|
139
|
+
|
|
96
140
|
## Process Policies
|
|
97
141
|
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
142
|
+
Every process declares a `policy` that controls its lifecycle. Pick one per
|
|
143
|
+
process:
|
|
144
|
+
|
|
145
|
+
| You need… | Use |
|
|
146
|
+
| --- | --- |
|
|
147
|
+
| The process that receives external HTTP/WebSocket traffic | `proxied` |
|
|
148
|
+
| A per-release helper tied to the release lifecycle | `companion` |
|
|
149
|
+
| Exactly one instance, never overlapping across deploys | `singleton` |
|
|
150
|
+
| A long-lived shared broker that survives deploys | `service` |
|
|
151
|
+
|
|
152
|
+
### `proxied`
|
|
153
|
+
|
|
154
|
+
The web/API process — exactly one per config. Rollbridge forwards HTTP and
|
|
155
|
+
WebSocket traffic to the active release's proxied process and tracks open
|
|
156
|
+
connections so they can be drained on the next deploy. It must define a `port`
|
|
157
|
+
range, is health-checked before traffic switches to a new release, and is
|
|
158
|
+
auto-restarted while its release is active.
|
|
159
|
+
|
|
160
|
+
```js
|
|
161
|
+
{
|
|
162
|
+
id: "web",
|
|
163
|
+
policy: "proxied",
|
|
164
|
+
cwd: "{{releasePath}}",
|
|
165
|
+
command: "npx velocious server --host 127.0.0.1 --port {{port}}",
|
|
166
|
+
port: {from: 18182, to: 18299},
|
|
167
|
+
health: {path: "/ping", timeoutMs: 30000}
|
|
168
|
+
}
|
|
169
|
+
```
|
|
170
|
+
|
|
171
|
+
### `companion`
|
|
172
|
+
|
|
173
|
+
A release-scoped helper (for example a background worker bound to one release).
|
|
174
|
+
It starts **before** the proxied process in the same release, so release-local
|
|
175
|
+
dependencies are ready before the health check, and it is auto-restarted while
|
|
176
|
+
its release is active. Each release gets its own companions; a release's
|
|
177
|
+
companions stop when that release is drained and retired after a newer release
|
|
178
|
+
takes over.
|
|
179
|
+
|
|
180
|
+
```js
|
|
181
|
+
{
|
|
182
|
+
id: "background-jobs-worker",
|
|
183
|
+
policy: "companion",
|
|
184
|
+
cwd: "{{releasePath}}",
|
|
185
|
+
command: "npx velocious background-jobs-worker",
|
|
186
|
+
gracefulStopMs: 60000
|
|
187
|
+
}
|
|
188
|
+
```
|
|
189
|
+
|
|
190
|
+
### `singleton`
|
|
191
|
+
|
|
192
|
+
A one-at-a-time helper for duplicate-unsafe schedulers or job dispatchers. After
|
|
193
|
+
a new release becomes active, Rollbridge stops the old singleton and then starts
|
|
194
|
+
the new one, so two copies never run at once. Use it when running the old and
|
|
195
|
+
new copies simultaneously during a deploy would be unsafe.
|
|
196
|
+
|
|
197
|
+
```js
|
|
198
|
+
{
|
|
199
|
+
id: "scheduler",
|
|
200
|
+
policy: "singleton",
|
|
201
|
+
cwd: "{{releasePath}}",
|
|
202
|
+
command: "npx velocious scheduler"
|
|
203
|
+
}
|
|
204
|
+
```
|
|
205
|
+
|
|
206
|
+
### `service`
|
|
207
|
+
|
|
208
|
+
A daemon-wide broker that should outlive individual releases — for example
|
|
209
|
+
Velocious Beacon or `background-jobs-main`. Rollbridge starts it once (before
|
|
210
|
+
release processes that depend on it), keeps it running across deploys, and gives
|
|
211
|
+
it a stable port that does not change between releases. After each successful
|
|
212
|
+
deploy its restart template is refreshed to the latest release, so if it crashes
|
|
213
|
+
it restarts from the newest good release. It keeps restarting until the daemon
|
|
214
|
+
shuts down.
|
|
215
|
+
|
|
216
|
+
```js
|
|
217
|
+
{
|
|
218
|
+
id: "background-jobs-main",
|
|
219
|
+
policy: "service",
|
|
220
|
+
cwd: "{{releasePath}}",
|
|
221
|
+
command: "npx velocious background-jobs-main",
|
|
222
|
+
port: 7331
|
|
223
|
+
}
|
|
224
|
+
```
|
|
225
|
+
|
|
226
|
+
### Deploy ordering
|
|
227
|
+
|
|
228
|
+
On `rollbridge deploy`, Rollbridge:
|
|
229
|
+
|
|
230
|
+
1. starts any `service` that is not already running;
|
|
231
|
+
2. starts the new release's `companion`s, then its `proxied` process, and
|
|
232
|
+
health-checks the proxied process;
|
|
233
|
+
3. switches new traffic to the new release;
|
|
234
|
+
4. refreshes each `service`'s restart template to the new release;
|
|
235
|
+
5. replaces `singleton`s (stops the old one, then starts the new one);
|
|
236
|
+
6. drains the previous release's connections, then stops its `proxied` and
|
|
237
|
+
`companion` processes.
|
|
238
|
+
|
|
239
|
+
If the new release fails to start or health-check, the previous release stays
|
|
240
|
+
active and any service started during this deploy is rolled back.
|
|
102
241
|
|
|
103
242
|
## Commands
|
|
104
243
|
|
|
@@ -107,6 +246,14 @@ Production-ready examples live in `examples/`, including
|
|
|
107
246
|
explicitly, but `rollbridge validate` (or any command) works with no flag when a
|
|
108
247
|
`rollbridge.js` is present.
|
|
109
248
|
|
|
249
|
+
For machine-readable output, `deploy`, `status`, `stop`, `shutdown`, and
|
|
250
|
+
`ensure-daemon` already print JSON, and `validate`, `doctor`, and `logs` accept
|
|
251
|
+
a `--json` flag that switches their output to JSON (with the same exit codes),
|
|
252
|
+
so deploy tooling can parse results.
|
|
253
|
+
|
|
254
|
+
See [`docs/cli.md`](docs/cli.md) for the full per-command reference (every
|
|
255
|
+
option, default, output shape, and exit code).
|
|
256
|
+
|
|
110
257
|
Validate a config without starting the daemon:
|
|
111
258
|
|
|
112
259
|
```bash
|
|
@@ -129,6 +276,30 @@ Found 2 configuration issues in rollbridge.js:
|
|
|
129
276
|
Fix: Give each process a unique id; "web" is used more than once.
|
|
130
277
|
```
|
|
131
278
|
|
|
279
|
+
Check the environment before starting the daemon:
|
|
280
|
+
|
|
281
|
+
```bash
|
|
282
|
+
rollbridge doctor --config rollbridge.js
|
|
283
|
+
```
|
|
284
|
+
|
|
285
|
+
`doctor` validates the config and then probes the runtime environment, exiting
|
|
286
|
+
non-zero if any check fails (so deploy tooling can gate on it):
|
|
287
|
+
|
|
288
|
+
```text
|
|
289
|
+
✓ config: valid: 4 processes, proxy on 127.0.0.1:8182
|
|
290
|
+
✓ control socket: no daemon running; /tmp/rollbridge-ticket-server.sock is free to bind
|
|
291
|
+
✓ control socket directory: /tmp is writable
|
|
292
|
+
✓ proxy port: 127.0.0.1:8182 is available
|
|
293
|
+
|
|
294
|
+
All checks passed.
|
|
295
|
+
```
|
|
296
|
+
|
|
297
|
+
A free control socket, a writable socket directory, and a bindable proxy port
|
|
298
|
+
pass. Because `rollbridge daemon` cannot bind a socket or port that is already
|
|
299
|
+
taken, doctor fails the relevant check when a Rollbridge daemon (or any other
|
|
300
|
+
process) is already listening on the control socket or holding the proxy port —
|
|
301
|
+
so a green `doctor` means a fresh daemon can actually start.
|
|
302
|
+
|
|
132
303
|
Start the daemon:
|
|
133
304
|
|
|
134
305
|
```bash
|
|
@@ -159,6 +330,19 @@ Inspect state:
|
|
|
159
330
|
rollbridge status --config rollbridge.js
|
|
160
331
|
```
|
|
161
332
|
|
|
333
|
+
`status` reports each managed process's `state`, `pid`, recent `logs`, last
|
|
334
|
+
`exitCode`/`exitSignal`, and — per process — its automatic-restart count
|
|
335
|
+
(`restarts`), last start time (`startedAt`), and current `uptimeMs` while
|
|
336
|
+
running.
|
|
337
|
+
|
|
338
|
+
Print the recent captured stdout/stderr per process (a one-shot snapshot of the
|
|
339
|
+
retained `outputLines`, not a live stream):
|
|
340
|
+
|
|
341
|
+
```bash
|
|
342
|
+
rollbridge logs --config rollbridge.js
|
|
343
|
+
rollbridge logs --config rollbridge.js --process web
|
|
344
|
+
```
|
|
345
|
+
|
|
162
346
|
Stop the active release:
|
|
163
347
|
|
|
164
348
|
```bash
|
|
@@ -187,10 +371,44 @@ location / {
|
|
|
187
371
|
}
|
|
188
372
|
```
|
|
189
373
|
|
|
374
|
+
## Running under systemd
|
|
375
|
+
|
|
376
|
+
Run the long-lived daemon as a systemd service so it starts on boot and is
|
|
377
|
+
restarted if it crashes. A ready-to-edit unit lives at
|
|
378
|
+
`examples/rollbridge.service`:
|
|
379
|
+
|
|
380
|
+
```bash
|
|
381
|
+
sudo cp examples/rollbridge.service /etc/systemd/system/rollbridge.service
|
|
382
|
+
# edit User/Group, WorkingDirectory, the ExecStart path, and --config
|
|
383
|
+
sudo systemctl daemon-reload
|
|
384
|
+
sudo systemctl enable --now rollbridge
|
|
385
|
+
sudo systemctl status rollbridge
|
|
386
|
+
```
|
|
387
|
+
|
|
388
|
+
The unit runs `rollbridge daemon --config <stable-config>` in the foreground,
|
|
389
|
+
so its output goes to the journal (`journalctl -u rollbridge`). Key directives:
|
|
390
|
+
|
|
391
|
+
- `KillMode=mixed` / `KillSignal=SIGTERM`: Rollbridge stops its own managed
|
|
392
|
+
child process groups on `SIGTERM`, so systemd signals only the daemon and
|
|
393
|
+
lets it shut down gracefully before escalating to `SIGKILL`.
|
|
394
|
+
- `TimeoutStopSec`: give the daemon time to stop its managed processes; size it
|
|
395
|
+
above the largest process `gracefulStopMs` (the daemon `SIGKILL`s stragglers
|
|
396
|
+
after that). Note that `systemctl stop`/reboot stops processes but does **not**
|
|
397
|
+
drain HTTP/WebSocket connections — connection draining happens only during
|
|
398
|
+
`rollbridge deploy` release transitions.
|
|
399
|
+
|
|
400
|
+
The daemon is long-lived and survives deploys. **Deploy with
|
|
401
|
+
`rollbridge deploy` (or `rollbridge deploy --ensure-daemon`), not
|
|
402
|
+
`systemctl restart`** — pointing `--config` at a stable, daemon-wide file while
|
|
403
|
+
release paths are passed per deploy. Use `command -v rollbridge` to find the
|
|
404
|
+
absolute CLI path for `ExecStart`.
|
|
405
|
+
|
|
190
406
|
## Deployment Notes
|
|
191
407
|
|
|
192
408
|
Run migrations before `rollbridge deploy`, and keep migrations backwards-compatible while old and new web releases overlap. For stable local brokers such as Velocious Beacon or `background-jobs-main`, use `service` when the process should survive deploys and restart from the latest successful release if it crashes.
|
|
193
409
|
|
|
410
|
+
See [`docs/deploy-recipes.md`](docs/deploy-recipes.md) for ready-to-use shell, CI, and Capistrano recipes that drive Rollbridge through its CLI, and [`docs/troubleshooting.md`](docs/troubleshooting.md) for diagnosing health-check failures, port conflicts, stale sockets, crash loops, and stuck draining releases.
|
|
411
|
+
|
|
194
412
|
## Releasing
|
|
195
413
|
|
|
196
414
|
Maintainers can publish a patch release from the latest default branch:
|
|
@@ -198,3 +416,7 @@ Maintainers can publish a patch release from the latest default branch:
|
|
|
198
416
|
```bash
|
|
199
417
|
npm run release:patch
|
|
200
418
|
```
|
|
419
|
+
|
|
420
|
+
## License
|
|
421
|
+
|
|
422
|
+
Rollbridge is released under the [MIT License](LICENSE).
|
package/TODO.md
CHANGED
|
@@ -52,45 +52,49 @@ This roadmap tracks planned Rollbridge features and documentation. Rollbridge sh
|
|
|
52
52
|
- [ ] Document migration constraints for rollback.
|
|
53
53
|
- [ ] Observability and diagnostics.
|
|
54
54
|
- [ ] Add structured event history for deploys, switches, stops, crashes, memory restarts, and failed commands.
|
|
55
|
-
- [
|
|
56
|
-
- [ ] Add
|
|
55
|
+
- [x] Add restart counters and uptime to status (exit reasons already reported via `exitCode`/`exitSignal`/`state`).
|
|
56
|
+
- [ ] Add memory stats and child-process-tree details to status (with memory supervision).
|
|
57
|
+
- [x] Add a `logs` CLI command (recent per-process output from status).
|
|
58
|
+
- [ ] Add an `events` CLI command (after structured event history lands).
|
|
57
59
|
- [ ] Add optional file logging with rotation guidance.
|
|
58
|
-
- [
|
|
60
|
+
- [x] Add machine-readable JSON output for all CLI commands (data commands print JSON; `validate`/`doctor`/`logs` take `--json`).
|
|
59
61
|
- [ ] Config validation and doctoring.
|
|
60
62
|
- [x] Add `validate` to parse config and report all config errors without starting the daemon.
|
|
61
|
-
- [
|
|
63
|
+
- [x] Add `doctor` to check config validity, control socket reachability, proxy port availability, and control-socket directory writability.
|
|
64
|
+
- [ ] Extend `doctor` with process-command, release-path, and log/state-path checks once those are resolvable (rendered templates, persisted state).
|
|
62
65
|
- [x] Validate duplicate process IDs, missing ports on proxied processes, invalid ranges, and the single-proxied-process policy rule.
|
|
63
66
|
- [ ] Validate unsupported lifecycle-hook combinations once worker lifecycle hooks land.
|
|
64
67
|
- [x] Include example fixes in validation output.
|
|
65
68
|
|
|
66
69
|
## Minor Features
|
|
67
70
|
|
|
68
|
-
- [
|
|
71
|
+
- [x] Add a control-socket permission option (`control.mode`) for shared deploy users.
|
|
72
|
+
- [ ] Add control-socket owner/group options for shared deploy users (needs name-to-id resolution).
|
|
69
73
|
- [x] Make stale control socket diagnostics clearer when another daemon is still alive.
|
|
70
|
-
- [
|
|
74
|
+
- [x] Add old-release cleanup policies by age, count, and stopped state (`releaseRetention`).
|
|
71
75
|
- [x] Add port allocation diagnostics when a range is exhausted.
|
|
72
|
-
- [
|
|
73
|
-
- [
|
|
74
|
-
- [
|
|
76
|
+
- [x] Add an optional startup delay (`health.startDelayMs`) before health checks begin.
|
|
77
|
+
- [x] Add process output retention config instead of a fixed recent-log count.
|
|
78
|
+
- [x] Add environment variable interpolation from the daemon environment.
|
|
75
79
|
- [x] Add `--config` default lookup resolving to `rollbridge.js` when no path is given.
|
|
76
80
|
- [ ] Add shell completion generation for common shells.
|
|
77
|
-
- [
|
|
78
|
-
- [
|
|
79
|
-
- [
|
|
81
|
+
- [x] Add npm package metadata such as repository, license, bugs, and homepage.
|
|
82
|
+
- [x] Add systemd service examples for the Rollbridge daemon.
|
|
83
|
+
- [x] Add tests for malformed control socket JSON and unknown control commands.
|
|
80
84
|
- [ ] Add tests for duplicate IDs and singleton replacement failure behavior.
|
|
81
|
-
- [
|
|
85
|
+
- [x] Add tests for proxy behavior when the active release exits unexpectedly.
|
|
82
86
|
|
|
83
87
|
## Documentation TODO
|
|
84
88
|
|
|
85
|
-
- [
|
|
86
|
-
- [
|
|
87
|
-
- [
|
|
89
|
+
- [x] Write a full config reference covering every field, default, and template variable (`docs/config.md`).
|
|
90
|
+
- [x] Write a CLI reference for `daemon`, `ensure-daemon`, `deploy`, `status`, `stop`, `shutdown`, and future commands (`docs/cli.md`).
|
|
91
|
+
- [x] Expand process policy docs with deployment examples for `proxied`, `companion`, `singleton`, and `service`.
|
|
88
92
|
- [ ] Document memory checks and auto-restart behavior after the feature lands.
|
|
89
93
|
- [ ] Document worker lifecycle hooks and safe background-job deployment patterns after the feature lands.
|
|
90
94
|
- [ ] Add a Velocious deployment guide with Beacon, background-jobs-main, background-jobs-worker, and web process examples.
|
|
91
95
|
- [ ] Add an Nginx guide with WebSocket headers, timeouts, and common failure modes.
|
|
92
|
-
- [
|
|
93
|
-
- [
|
|
96
|
+
- [x] Add deploy-tool recipes that call Rollbridge CLI commands directly (`docs/deploy-recipes.md`).
|
|
97
|
+
- [x] Add a Capistrano recipe showing shell commands only; do not add a Capistrano plugin or Rollbridge-specific Capistrano tasks (`docs/deploy-recipes.md`).
|
|
94
98
|
- [ ] Add a TensorBuzz-specific runbook for current production ports, external services, deploy ordering, and rollback constraints.
|
|
95
|
-
- [
|
|
99
|
+
- [x] Add troubleshooting docs for health-check failures, port conflicts, stale sockets, crash loops, and stuck draining releases (`docs/troubleshooting.md`).
|
|
96
100
|
- [ ] Add a release checklist for maintainers using `npm run release:patch`.
|
package/docs/cli.md
ADDED
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
# Rollbridge CLI reference
|
|
2
|
+
|
|
3
|
+
```
|
|
4
|
+
rollbridge <command> [options]
|
|
5
|
+
```
|
|
6
|
+
|
|
7
|
+
This reference covers every current command. See the README for config and
|
|
8
|
+
process-policy details.
|
|
9
|
+
|
|
10
|
+
## Global behavior
|
|
11
|
+
|
|
12
|
+
- **`-c, --config <path>`** is accepted by every command and is optional. When
|
|
13
|
+
omitted, Rollbridge loads `rollbridge.js` from the current directory (a
|
|
14
|
+
JavaScript module that `export default`s the config object or a function
|
|
15
|
+
returning it).
|
|
16
|
+
- Commands that talk to a running daemon — `deploy`, `status`, `stop`,
|
|
17
|
+
`shutdown`, and `logs` — connect to the control socket (`control.path`). They
|
|
18
|
+
fail with an error if no daemon is listening; start one first with
|
|
19
|
+
`rollbridge daemon` or `rollbridge deploy --ensure-daemon`.
|
|
20
|
+
- `validate`, `doctor`, and `logs` accept `--json` for machine-readable output.
|
|
21
|
+
`deploy`, `status`, `stop`, `shutdown`, and `ensure-daemon` always print JSON.
|
|
22
|
+
|
|
23
|
+
## `daemon`
|
|
24
|
+
|
|
25
|
+
```
|
|
26
|
+
rollbridge daemon [--config <path>]
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
Runs the supervisor in the foreground: binds the stable proxy port and the
|
|
30
|
+
control socket and stays running. On `SIGINT`/`SIGTERM` it stops its managed
|
|
31
|
+
processes, closes the servers, removes the control socket, and exits `0`.
|
|
32
|
+
Structured JSON log lines are written to stdout. Run it under a process manager
|
|
33
|
+
such as systemd (see `examples/rollbridge.service`).
|
|
34
|
+
|
|
35
|
+
## `ensure-daemon`
|
|
36
|
+
|
|
37
|
+
```
|
|
38
|
+
rollbridge ensure-daemon [--config <path>]
|
|
39
|
+
[--daemon-log-path <path>]
|
|
40
|
+
[--daemon-pid-path <path>]
|
|
41
|
+
[--daemon-start-timeout-ms <ms>]
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
Starts the daemon as a detached process **only if** the control socket is not
|
|
45
|
+
already accepting commands, waits until it responds, then prints the daemon
|
|
46
|
+
status JSON. Idempotent — safe to call before every deploy.
|
|
47
|
+
|
|
48
|
+
- `--daemon-log-path <path>` — file the detached daemon's stdout/stderr is
|
|
49
|
+
appended to. Default: `/tmp/rollbridge-<application>.log`.
|
|
50
|
+
- `--daemon-pid-path <path>` — file the detached daemon's PID is written to.
|
|
51
|
+
Default: `/tmp/rollbridge-<application>.pid`.
|
|
52
|
+
- `--daemon-start-timeout-ms <ms>` — how long to wait for the daemon to accept
|
|
53
|
+
control commands before failing. Default: `10000`.
|
|
54
|
+
|
|
55
|
+
## `deploy`
|
|
56
|
+
|
|
57
|
+
```
|
|
58
|
+
rollbridge deploy --release-path <path>
|
|
59
|
+
[--config <path>]
|
|
60
|
+
[--release-id <id>]
|
|
61
|
+
[--revision <sha>]
|
|
62
|
+
[--ensure-daemon]
|
|
63
|
+
[--daemon-log-path <path>]
|
|
64
|
+
[--daemon-pid-path <path>]
|
|
65
|
+
[--daemon-start-timeout-ms <ms>]
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
Starts the prepared release, health-checks the proxied process, switches new
|
|
69
|
+
traffic to it, then drains and stops the previous release. Prints
|
|
70
|
+
`{"status": "success", "activeReleaseId": "...", "previousReleaseId": "..."}`.
|
|
71
|
+
If the new release fails to start or health-check, the previous release stays
|
|
72
|
+
active and the command errors.
|
|
73
|
+
|
|
74
|
+
- `--release-path <path>` (**required**) — path to the prepared release
|
|
75
|
+
directory; available to process templates as `{{releasePath}}`.
|
|
76
|
+
- `--release-id <id>` — identifier for the release. Defaults to `--revision`,
|
|
77
|
+
or a timestamp when neither is given.
|
|
78
|
+
- `--revision <sha>` — VCS revision; available as `{{revision}}`.
|
|
79
|
+
- `--ensure-daemon` — start the daemon first if it isn't running (honors the
|
|
80
|
+
same `--daemon-*` options as `ensure-daemon`).
|
|
81
|
+
|
|
82
|
+
## `status`
|
|
83
|
+
|
|
84
|
+
```
|
|
85
|
+
rollbridge status [--config <path>]
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
Prints the daemon status JSON: the active release id, the proxy address, and —
|
|
89
|
+
per release, service, and singleton process — its `state`, `pid`, automatic
|
|
90
|
+
`restarts`, `startedAt`, `uptimeMs`, last `exitCode`/`exitSignal`, and recent
|
|
91
|
+
`logs`.
|
|
92
|
+
|
|
93
|
+
## `stop`
|
|
94
|
+
|
|
95
|
+
```
|
|
96
|
+
rollbridge stop [--config <path>] [--release-id <id>]
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
Stops the active release (or the release named by `--release-id`) and prints the
|
|
100
|
+
updated status JSON. With no active release, the proxy answers `503` until the
|
|
101
|
+
next deploy.
|
|
102
|
+
|
|
103
|
+
## `shutdown`
|
|
104
|
+
|
|
105
|
+
```
|
|
106
|
+
rollbridge shutdown [--config <path>]
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
Stops all managed processes (services, singletons, and releases), closes the
|
|
110
|
+
proxy and control socket, removes the socket file, and prints
|
|
111
|
+
`{"status": "success", "message": "shutdown"}`.
|
|
112
|
+
|
|
113
|
+
## `validate`
|
|
114
|
+
|
|
115
|
+
```
|
|
116
|
+
rollbridge validate [--config <path>] [--json]
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
Parses and validates the config without starting the daemon, reporting every
|
|
120
|
+
issue with an example fix. Exits `1` when issues are found. With `--json`, prints
|
|
121
|
+
`{"config": {...} | null, "issues": [{"message", "fix"}], "path", "valid"}`.
|
|
122
|
+
|
|
123
|
+
## `doctor`
|
|
124
|
+
|
|
125
|
+
```
|
|
126
|
+
rollbridge doctor [--config <path>] [--json]
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
Validates the config, then probes the environment: whether a daemon already
|
|
130
|
+
holds the control socket, whether the control socket's directory is writable,
|
|
131
|
+
and whether the proxy port can be bound. Exits `1` when any check fails (so a
|
|
132
|
+
green `doctor` means a fresh daemon can start). With `--json`, prints
|
|
133
|
+
`{"checks": [{"name", "ok", "detail"}], "ok"}`.
|
|
134
|
+
|
|
135
|
+
## `logs`
|
|
136
|
+
|
|
137
|
+
```
|
|
138
|
+
rollbridge logs [--config <path>] [--process <id>] [--json]
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
Prints the recent stdout/stderr retained per managed process — a one-shot
|
|
142
|
+
snapshot of each process's `outputLines`, not a live stream. `--process <id>`
|
|
143
|
+
limits output to one process. With `--json`, prints
|
|
144
|
+
`[{"id", "source", "logs": [{"at", "line", "stream"}]}]`.
|
|
145
|
+
|
|
146
|
+
## Exit codes
|
|
147
|
+
|
|
148
|
+
- `0` — success.
|
|
149
|
+
- `1` — `validate`/`doctor` found problems, or `--config` could not be resolved.
|
|
150
|
+
- non-zero (with an error message) — a daemon command could not reach the daemon,
|
|
151
|
+
or the daemon returned an error.
|
package/docs/config.md
ADDED
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
# Config reference
|
|
2
|
+
|
|
3
|
+
A Rollbridge config is a JavaScript module that `export default`s a config
|
|
4
|
+
object (or a sync/async function returning one). When `--config` is omitted,
|
|
5
|
+
the CLI loads `rollbridge.js` from the working directory. Run
|
|
6
|
+
`rollbridge validate` to check a config without starting the daemon.
|
|
7
|
+
|
|
8
|
+
```js
|
|
9
|
+
// rollbridge.js
|
|
10
|
+
export default {
|
|
11
|
+
application: "ticket-server",
|
|
12
|
+
control: {path: "/tmp/rollbridge-ticket-server.sock"},
|
|
13
|
+
proxy: {host: "127.0.0.1", port: 8182},
|
|
14
|
+
processes: [
|
|
15
|
+
{id: "web", policy: "proxied", cwd: "{{releasePath}}", command: "npx velocious server --port {{port}}", port: {from: 18182, to: 18299}, health: {path: "/ping"}}
|
|
16
|
+
]
|
|
17
|
+
}
|
|
18
|
+
```
|
|
19
|
+
|
|
20
|
+
## Top-level fields
|
|
21
|
+
|
|
22
|
+
| Field | Type | Default | Description |
|
|
23
|
+
| --- | --- | --- | --- |
|
|
24
|
+
| `application` | string | basename of the config file's directory | Names the app; used in the default control-socket path and the `ROLLBRIDGE_APPLICATION` env var. |
|
|
25
|
+
| `control` | object | — | Control-socket settings (see below). |
|
|
26
|
+
| `proxy` | object | **required** | Proxy listener and shared defaults (see below). |
|
|
27
|
+
| `processes` | array | **required** | Managed processes (see below). Exactly one must be `proxied`. |
|
|
28
|
+
| `releaseRetention` | object | — | How many stopped releases the daemon retains (see below). |
|
|
29
|
+
|
|
30
|
+
## `control`
|
|
31
|
+
|
|
32
|
+
| Field | Type | Default | Description |
|
|
33
|
+
| --- | --- | --- | --- |
|
|
34
|
+
| `control.path` | string | `/tmp/rollbridge-<application>.sock` | Unix domain socket the CLI uses to talk to the daemon. |
|
|
35
|
+
| `control.mode` | octal string (e.g. `"660"`) or octal number (`0o660`) | unset | `chmod` applied to the socket after it binds, to share it with a deploy group. When unset, the daemon umask applies. |
|
|
36
|
+
|
|
37
|
+
## `proxy`
|
|
38
|
+
|
|
39
|
+
| Field | Type | Default | Description |
|
|
40
|
+
| --- | --- | --- | --- |
|
|
41
|
+
| `proxy.host` | string | `"127.0.0.1"` | Interface the stable proxy binds. |
|
|
42
|
+
| `proxy.port` | number | `8182` | Stable port Nginx (or another front end) points at. |
|
|
43
|
+
| `proxy.upstreamHost` | string | `proxy.host`, or `"127.0.0.1"` when `proxy.host` is `0.0.0.0`/`::` | Host Rollbridge uses for release health checks and proxy targets. |
|
|
44
|
+
| `proxy.healthPath` | string | `"/ping"` | Default health-check path for proxied processes. |
|
|
45
|
+
| `proxy.healthTimeoutMs` | number | `30000` | Default health-check timeout for proxied processes. |
|
|
46
|
+
| `proxy.drainTimeoutMs` | number | `60000` | How long to drain open connections from a retired release before stopping it. |
|
|
47
|
+
| `proxy.forceStopTimeoutMs` | number | `10000` | Default per-process graceful-stop timeout (`SIGTERM`, then `SIGKILL`). |
|
|
48
|
+
|
|
49
|
+
## `releaseRetention`
|
|
50
|
+
|
|
51
|
+
| Field | Type | Default | Description |
|
|
52
|
+
| --- | --- | --- | --- |
|
|
53
|
+
| `releaseRetention.keep` | non-negative integer | `10` | Number of most-recent **stopped** releases the daemon keeps in memory and reports in `status`. |
|
|
54
|
+
| `releaseRetention.maxAgeMs` | non-negative number | `0` (disabled) | Also prune stopped releases older than this many milliseconds. |
|
|
55
|
+
|
|
56
|
+
Active and draining releases are never pruned. This governs Rollbridge's own
|
|
57
|
+
release records; the deploy tool still owns on-disk release directories.
|
|
58
|
+
|
|
59
|
+
## `processes[]`
|
|
60
|
+
|
|
61
|
+
| Field | Type | Default | Description |
|
|
62
|
+
| --- | --- | --- | --- |
|
|
63
|
+
| `id` | string | **required** | Unique identifier. Appears in `status`, logs, and `ROLLBRIDGE_*` env vars. |
|
|
64
|
+
| `policy` | `"proxied"` \| `"companion"` \| `"singleton"` \| `"service"` | `"companion"` | Lifecycle policy (see [README → Process Policies](../README.md#process-policies)). Exactly one process must be `proxied`. |
|
|
65
|
+
| `command` | string | **required** | Shell command to run (templated). |
|
|
66
|
+
| `cwd` | string | the release path | Working directory (templated). |
|
|
67
|
+
| `env` | object of string → string | `{}` | Extra environment variables (values templated). Merged over the injected `ROLLBRIDGE_*` vars. |
|
|
68
|
+
| `port` | number or `{from, to}` | unset | Port (or range) allocated per release. **Required for the `proxied` process.** A plain number `n` means the fixed port `n` (`{from: n, to: n}`). |
|
|
69
|
+
| `health` | object or `false` | enabled with defaults | Health check for the `proxied` process; set `false` to disable (see below). |
|
|
70
|
+
| `gracefulStopMs` | number | `proxy.forceStopTimeoutMs` | `SIGTERM`→`SIGKILL` window for this process. |
|
|
71
|
+
| `restartDelayMs` | number | `1000` | Delay before restarting this process after a crash. |
|
|
72
|
+
| `outputLines` | positive integer | `50` | Recent stdout/stderr lines retained per process and reported by `status`/`logs`. |
|
|
73
|
+
|
|
74
|
+
### `processes[].health`
|
|
75
|
+
|
|
76
|
+
Only the `proxied` process is health-checked (before traffic switches to a new
|
|
77
|
+
release). Set `health: false` to disable it.
|
|
78
|
+
|
|
79
|
+
| Field | Type | Default | Description |
|
|
80
|
+
| --- | --- | --- | --- |
|
|
81
|
+
| `health.path` | string | `proxy.healthPath` | HTTP path probed on the process's port. |
|
|
82
|
+
| `health.timeoutMs` | number | `proxy.healthTimeoutMs` | Total time to wait for the first healthy response. |
|
|
83
|
+
| `health.intervalMs` | number | `250` | Delay between probes. |
|
|
84
|
+
| `health.startDelayMs` | non-negative number | `0` | Wait this long after the process starts before the first probe (runs before the `timeoutMs` window). |
|
|
85
|
+
|
|
86
|
+
## Template variables
|
|
87
|
+
|
|
88
|
+
`command`, `cwd`, and `env` values support `{{...}}` placeholders, rendered when
|
|
89
|
+
the process starts. Referencing a placeholder with no value fails the process
|
|
90
|
+
start with a clear error.
|
|
91
|
+
|
|
92
|
+
| Placeholder | Value |
|
|
93
|
+
| --- | --- |
|
|
94
|
+
| `{{application}}` | `application` |
|
|
95
|
+
| `{{releaseId}}` | The deploy's release id. |
|
|
96
|
+
| `{{releasePath}}` | The deploy's `--release-path`. |
|
|
97
|
+
| `{{revision}}` | The deploy's `--revision` (falls back to the release id). |
|
|
98
|
+
| `{{processId}}` | This process's `id`. |
|
|
99
|
+
| `{{port}}` | The port allocated to this process. |
|
|
100
|
+
| `{{ports.<id>}}` | The port allocated to another process. |
|
|
101
|
+
| `{{proxy.host}}`, `{{proxy.port}}`, `{{proxy.upstreamHost}}` | The configured proxy bind host/port and upstream host. |
|
|
102
|
+
| `{{env.<NAME>}}` | A variable from the daemon's own environment, e.g. `{{env.HOME}}`. |
|
|
103
|
+
|
|
104
|
+
## Injected environment variables
|
|
105
|
+
|
|
106
|
+
Rollbridge sets these in every managed process's environment (the process's own
|
|
107
|
+
`env` is merged on top and can override them):
|
|
108
|
+
|
|
109
|
+
| Variable | Value |
|
|
110
|
+
| --- | --- |
|
|
111
|
+
| `ROLLBRIDGE_APPLICATION` | `application` |
|
|
112
|
+
| `ROLLBRIDGE_PROCESS_ID` | This process's `id`. |
|
|
113
|
+
| `ROLLBRIDGE_RELEASE_ID` | The release id. |
|
|
114
|
+
| `ROLLBRIDGE_RELEASE_PATH` | The release path. |
|
|
115
|
+
| `ROLLBRIDGE_REVISION` | The revision (or release id). |
|
|
116
|
+
| `ROLLBRIDGE_PORT` | This process's allocated port (only when it has one). |
|
|
117
|
+
| `ROLLBRIDGE_<ID>_PORT` | Each process's allocated port, where `<ID>` is the process id uppercased with non-alphanumerics replaced by `_` (e.g. `background-jobs-main` → `ROLLBRIDGE_BACKGROUND_JOBS_MAIN_PORT`). |
|
|
118
|
+
|
|
119
|
+
## Validation rules
|
|
120
|
+
|
|
121
|
+
`rollbridge validate` reports all of these at once with an example fix:
|
|
122
|
+
|
|
123
|
+
- Required `application` defaults are filled; `proxy` and `processes` must be present and well-typed.
|
|
124
|
+
- Exactly one process must be `proxied`, and the `proxied` process must define a `port`.
|
|
125
|
+
- Process `id`s must be unique.
|
|
126
|
+
- `port` must be a positive port number or an ascending `{from, to}` range.
|
|
127
|
+
- `control.mode` must be an octal mode between `0` and `0o777`.
|
|
128
|
+
- `outputLines` and `releaseRetention.keep` must be positive/non-negative integers; `health.startDelayMs` and `releaseRetention.maxAgeMs` must be non-negative numbers.
|