@stacksjs/rpx 0.11.5 → 0.11.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/daemon.ts ADDED
@@ -0,0 +1,496 @@
1
+ /**
2
+ * The rpx daemon: a single long-running process that fronts :443 and :80, holds
3
+ * the shared Root CA + host cert, and routes traffic per the registry.
4
+ *
5
+ * Lifecycle:
6
+ * 1. acquireDaemonLock() — atomic create of `daemon.pid` (or take over a
7
+ * stale one whose writer is gone). Bails if a healthy daemon is already
8
+ * running.
9
+ * 2. Bootstrap TLS (reuses the Root CA persisted by https.ts).
10
+ * 3. Bun.serve :443 with the proxy fetch handler; HTTP→HTTPS redirect on :80.
11
+ * 4. Watch the registry, rebuild the routing table on every change. Periodic
12
+ * PID GC reaps entries from writers that died `kill -9`.
13
+ * 5. SIGINT/SIGTERM → drain in-flight, release lock, exit 0.
14
+ *
15
+ * Tests inject a `rpxDir`/`registryDir`/non-priv ports, so all the heavy I/O
16
+ * paths are reachable without touching `~/.stacks/rpx` or :443.
17
+ */
18
+ /* eslint-disable no-console */
19
+ import type { ProxyOptions, SSLConfig, TlsOption } from './types'
20
+ import type { ProxyRoute } from './proxy-handler'
21
+ import { spawn as nodeSpawn } from 'node:child_process'
22
+ import * as fsp from 'node:fs/promises'
23
+ import { homedir } from 'node:os'
24
+ import * as path from 'node:path'
25
+ import * as process from 'node:process'
26
+ import { log } from './logger'
27
+ import { checkExistingCertificates, generateCertificate } from './https'
28
+ import { createProxyFetchHandler } from './proxy-handler'
29
+ import { gcStaleEntries, getRegistryDir, isPidAlive, readAll, watchRegistry } from './registry'
30
+ import type { RegistryEntry } from './registry'
31
+ import { debugLog } from './utils'
32
+
33
+ export interface DaemonOptions {
34
+ verbose?: boolean
35
+ /** Override `~/.stacks/rpx`. Used by tests to avoid touching the real dir. */
36
+ rpxDir?: string
37
+ /** Override the registry directory. Defaults to `<rpxDir>/registry.d`. */
38
+ registryDir?: string
39
+ /** HTTPS listen port. Defaults to 443. */
40
+ httpsPort?: number
41
+ /** HTTP redirect port. Defaults to 80. Pass 0 to skip the redirect server. */
42
+ httpPort?: number
43
+ /** Listener bind address. Defaults to `0.0.0.0`. */
44
+ hostname?: string
45
+ /** TLS bootstrap options forwarded to httpsConfig. */
46
+ https?: TlsOption
47
+ /** PID-GC interval in ms. Defaults to 5000. */
48
+ gcIntervalMs?: number
49
+ }
50
+
51
+ export interface DaemonHandle {
52
+ /** Stop the daemon, drain in-flight, release the lock. */
53
+ stop: () => Promise<void>
54
+ /** Resolves when the daemon has fully shut down. */
55
+ done: Promise<void>
56
+ httpsPort: number
57
+ httpPort: number
58
+ pidPath: string
59
+ }
60
+
61
+ const DEFAULT_GC_INTERVAL_MS = 5000
62
+
63
+ export function getDaemonRpxDir(): string {
64
+ return path.join(homedir(), '.stacks', 'rpx')
65
+ }
66
+
67
+ export function getDaemonPidPath(rpxDir: string = getDaemonRpxDir()): string {
68
+ return path.join(rpxDir, 'daemon.pid')
69
+ }
70
+
71
+ /**
72
+ * Read the PID stored in `daemon.pid`, or `null` if no file / unparseable.
73
+ */
74
+ export async function readDaemonPid(rpxDir: string = getDaemonRpxDir()): Promise<number | null> {
75
+ try {
76
+ const raw = await fsp.readFile(getDaemonPidPath(rpxDir), 'utf8')
77
+ const n = Number.parseInt(raw.trim(), 10)
78
+ if (!Number.isFinite(n) || n <= 0)
79
+ return null
80
+ return n
81
+ }
82
+ catch (err) {
83
+ if ((err as NodeJS.ErrnoException).code === 'ENOENT')
84
+ return null
85
+ throw err
86
+ }
87
+ }
88
+
89
+ /**
90
+ * True if `daemon.pid` points at a process that is still alive.
91
+ */
92
+ export async function isDaemonRunning(rpxDir: string = getDaemonRpxDir()): Promise<boolean> {
93
+ const pid = await readDaemonPid(rpxDir)
94
+ return pid !== null && isPidAlive(pid)
95
+ }
96
+
97
+ /**
98
+ * Acquire the daemon's single-instance lock by atomically creating
99
+ * `daemon.pid`. If the file exists but holds a stale PID we take it over;
100
+ * otherwise we throw.
101
+ *
102
+ * `O_CREAT | O_EXCL` (`'wx'`) guarantees only one process wins the create
103
+ * race, so we don't need an external lock library.
104
+ */
105
+ export async function acquireDaemonLock(rpxDir: string = getDaemonRpxDir()): Promise<string> {
106
+ await fsp.mkdir(rpxDir, { recursive: true })
107
+ const pidPath = getDaemonPidPath(rpxDir)
108
+
109
+ while (true) {
110
+ try {
111
+ const fh = await fsp.open(pidPath, 'wx')
112
+ try {
113
+ await fh.write(`${process.pid}\n`)
114
+ }
115
+ finally {
116
+ await fh.close()
117
+ }
118
+ return pidPath
119
+ }
120
+ catch (err) {
121
+ if ((err as NodeJS.ErrnoException).code !== 'EEXIST')
122
+ throw err
123
+ }
124
+
125
+ // File exists — figure out whether it's a real owner or a stale leftover.
126
+ const existing = await readDaemonPid(rpxDir)
127
+ if (existing !== null && isPidAlive(existing))
128
+ throw new Error(`rpx daemon already running (pid=${existing})`)
129
+
130
+ // Stale: remove and retry. The retry loses the race iff a different
131
+ // process recreates the file in between, which we'll detect on the next
132
+ // iteration.
133
+ await fsp.unlink(pidPath).catch(() => {})
134
+ }
135
+ }
136
+
137
+ export async function releaseDaemonLock(rpxDir: string = getDaemonRpxDir()): Promise<void> {
138
+ await fsp.unlink(getDaemonPidPath(rpxDir)).catch(() => {})
139
+ }
140
+
141
+ /**
142
+ * Translate a registry entry into the routing shape consumed by the proxy
143
+ * fetch handler. The entry's `from` is normalized to `host:port`.
144
+ */
145
+ function entryToRoute(entry: RegistryEntry): ProxyRoute {
146
+ const fromUrl = new URL(entry.from.startsWith('http') ? entry.from : `http://${entry.from}`)
147
+ return {
148
+ sourceHost: fromUrl.host,
149
+ cleanUrls: entry.cleanUrls ?? false,
150
+ changeOrigin: entry.changeOrigin ?? false,
151
+ pathRewrites: entry.pathRewrites,
152
+ }
153
+ }
154
+
155
+ /**
156
+ * Bootstrap the daemon's TLS material. Reuses the persisted Root CA and any
157
+ * existing trusted host cert; mints fresh ones if none exist.
158
+ *
159
+ * The host cert is issued with the standard `*.localhost` SAN list (set by
160
+ * `httpsConfig` via `getAllDomains`), so every `<app>.localhost` route is
161
+ * covered without needing to regenerate when apps register.
162
+ */
163
+ async function bootstrapTls(opts: DaemonOptions): Promise<SSLConfig> {
164
+ const proxyOpts: ProxyOptions = {
165
+ https: opts.https ?? true,
166
+ to: 'rpx.localhost',
167
+ verbose: opts.verbose,
168
+ regenerateUntrustedCerts: true,
169
+ }
170
+
171
+ let sslConfig = await checkExistingCertificates(proxyOpts)
172
+ if (!sslConfig) {
173
+ debugLog('daemon', 'no usable cert on disk, generating one', opts.verbose)
174
+ await generateCertificate(proxyOpts)
175
+ sslConfig = await checkExistingCertificates(proxyOpts)
176
+ }
177
+ if (!sslConfig)
178
+ throw new Error('failed to bootstrap TLS for rpx daemon')
179
+ return sslConfig
180
+ }
181
+
182
+ /**
183
+ * Start the daemon. Returns a handle that resolves `done` once the daemon has
184
+ * cleanly shut down (signal received and listeners closed).
185
+ *
186
+ * The promise itself resolves as soon as the daemon is *ready* — i.e. both
187
+ * listeners are bound and the initial routing table is populated. Use
188
+ * `handle.done` for the lifetime promise.
189
+ */
190
+ export async function runDaemon(opts: DaemonOptions = {}): Promise<DaemonHandle> {
191
+ const verbose = opts.verbose ?? false
192
+ const rpxDir = opts.rpxDir ?? getDaemonRpxDir()
193
+ const registryDir = opts.registryDir ?? path.join(rpxDir, 'registry.d')
194
+ const httpsPort = opts.httpsPort ?? 443
195
+ const httpPort = opts.httpPort ?? 80
196
+ const hostname = opts.hostname ?? '0.0.0.0'
197
+ const gcIntervalMs = opts.gcIntervalMs ?? DEFAULT_GC_INTERVAL_MS
198
+
199
+ const pidPath = await acquireDaemonLock(rpxDir)
200
+
201
+ // Module-scoped state so the watcher and fetch handler share one routing view.
202
+ let routingTable = new Map<string, ProxyRoute>()
203
+ const getRoute = (host: string): ProxyRoute | undefined => routingTable.get(host)
204
+
205
+ function rebuild(entries: RegistryEntry[]): void {
206
+ const next = new Map<string, ProxyRoute>()
207
+ for (const e of entries)
208
+ next.set(e.to, entryToRoute(e))
209
+ routingTable = next
210
+ debugLog('daemon', `routing table now covers ${next.size} host(s): ${Array.from(next.keys()).join(', ') || '<empty>'}`, verbose)
211
+ }
212
+
213
+ // Initial GC + load before binding so the very first request finds a route.
214
+ await gcStaleEntries(registryDir, verbose).catch((err) => {
215
+ debugLog('daemon', `initial gc failed: ${err}`, verbose)
216
+ })
217
+ rebuild(await readAll(registryDir, verbose))
218
+
219
+ const sslConfig = await bootstrapTls(opts)
220
+
221
+ const httpsServer = Bun.serve({
222
+ port: httpsPort,
223
+ hostname,
224
+ tls: {
225
+ key: sslConfig.key,
226
+ cert: sslConfig.cert,
227
+ ca: sslConfig.ca,
228
+ requestCert: false,
229
+ rejectUnauthorized: false,
230
+ },
231
+ fetch: createProxyFetchHandler(getRoute, verbose),
232
+ error(err: Error) {
233
+ debugLog('daemon', `https server error: ${err}`, verbose)
234
+ return new Response(`Server Error: ${err.message}`, { status: 500 })
235
+ },
236
+ })
237
+
238
+ let httpServer: ReturnType<typeof Bun.serve> | null = null
239
+ if (httpPort > 0) {
240
+ httpServer = Bun.serve({
241
+ port: httpPort,
242
+ hostname,
243
+ fetch(req: Request) {
244
+ const u = new URL(req.url)
245
+ const host = (req.headers.get('host') ?? u.hostname).split(':')[0]
246
+ return new Response(null, {
247
+ status: 301,
248
+ headers: { Location: `https://${host}${u.pathname}${u.search}` },
249
+ })
250
+ },
251
+ })
252
+ }
253
+
254
+ if (verbose) {
255
+ log.success(`rpx daemon listening on https://${hostname}:${httpsPort}${httpServer ? ` (http→https on :${httpPort})` : ''}`)
256
+ log.info(`pid file: ${pidPath}`)
257
+ log.info(`registry: ${registryDir}`)
258
+ }
259
+
260
+ const watcher = watchRegistry(
261
+ (entries) => { rebuild(entries) },
262
+ { dir: registryDir, verbose },
263
+ )
264
+
265
+ const gcInterval = setInterval(() => {
266
+ gcStaleEntries(registryDir, verbose)
267
+ .then((removed) => {
268
+ if (removed > 0)
269
+ debugLog('daemon', `gc reaped ${removed} stale entries`, verbose)
270
+ })
271
+ .catch((err) => {
272
+ debugLog('daemon', `periodic gc failed: ${err}`, verbose)
273
+ })
274
+ }, gcIntervalMs)
275
+ // Don't keep the event loop alive just for GC.
276
+ if (typeof gcInterval.unref === 'function')
277
+ gcInterval.unref()
278
+
279
+ let stopped = false
280
+ let resolveDone!: () => void
281
+ const done = new Promise<void>((r) => { resolveDone = r })
282
+
283
+ async function stop(): Promise<void> {
284
+ if (stopped)
285
+ return done
286
+ stopped = true
287
+ clearInterval(gcInterval)
288
+ watcher.close()
289
+ // `stop(false)` lets in-flight requests drain before closing the listener.
290
+ httpsServer.stop(false)
291
+ httpServer?.stop(false)
292
+ await releaseDaemonLock(rpxDir)
293
+ if (verbose)
294
+ log.info('rpx daemon stopped')
295
+ resolveDone()
296
+ return done
297
+ }
298
+
299
+ const onSignal = (sig: NodeJS.Signals) => {
300
+ debugLog('daemon', `received ${sig}, shutting down`, verbose)
301
+ stop().catch(() => {})
302
+ }
303
+ process.once('SIGINT', onSignal)
304
+ process.once('SIGTERM', onSignal)
305
+
306
+ return {
307
+ stop,
308
+ done,
309
+ httpsPort: typeof httpsServer.port === 'number' ? httpsServer.port : httpsPort,
310
+ httpPort: httpServer && typeof httpServer.port === 'number' ? httpServer.port : httpPort,
311
+ pidPath,
312
+ }
313
+ }
314
+
315
+ export interface EnsureDaemonOptions {
316
+ /** Override `~/.stacks/rpx`. */
317
+ rpxDir?: string
318
+ /**
319
+ * Argv to spawn if no daemon is running. Defaults to re-invoking the current
320
+ * Bun script with `daemon start`. Library consumers (e.g. `./buddy dev`)
321
+ * should pass an explicit command resolving to the `rpx` binary on PATH.
322
+ */
323
+ spawnCommand?: string[]
324
+ /** Working directory for the spawned daemon. Defaults to `process.cwd()`. */
325
+ spawnCwd?: string
326
+ /** Extra env for the spawned daemon. Merged on top of `process.env`. */
327
+ spawnEnv?: Record<string, string>
328
+ /** Max ms to wait for the spawned daemon's pid file to appear. Default 5000. */
329
+ startupTimeoutMs?: number
330
+ /** Polling interval while waiting for the daemon to register. Default 50ms. */
331
+ pollIntervalMs?: number
332
+ verbose?: boolean
333
+ }
334
+
335
+ export interface EnsureDaemonResult {
336
+ pid: number
337
+ /** True if we spawned a new daemon; false if one was already running. */
338
+ spawned: boolean
339
+ }
340
+
341
+ /**
342
+ * Best-effort default for the spawn command used by lazy-spawn. Compiled
343
+ * binaries (`bun build --compile`) self-invoke; source-mode executions invoke
344
+ * the same Bun + script that's running now.
345
+ *
346
+ * Library consumers should not rely on this — pass `spawnCommand` explicitly.
347
+ */
348
+ export function defaultDaemonSpawnCommand(): string[] {
349
+ const exec = process.execPath
350
+ const interpName = path.basename(exec).toLowerCase()
351
+ const isInterpreter = interpName === 'bun' || interpName === 'node' || interpName.startsWith('bun-')
352
+ if (isInterpreter && process.argv[1])
353
+ return [exec, process.argv[1], 'daemon:start']
354
+ return [exec, 'daemon:start']
355
+ }
356
+
357
+ /**
358
+ * Make sure a daemon is running, starting one as a detached child if needed.
359
+ *
360
+ * - If the pid file exists and points at a live process, returns immediately
361
+ * with `spawned: false`.
362
+ * - Otherwise cleans any stale pid file, spawns the configured command with
363
+ * `detached: true` + `stdio: 'ignore'` + `unref()` so it survives the caller
364
+ * exiting, and polls the pid file until the new daemon registers itself.
365
+ *
366
+ * Throws if the daemon never appears within `startupTimeoutMs`.
367
+ */
368
+ export async function ensureDaemonRunning(opts: EnsureDaemonOptions = {}): Promise<EnsureDaemonResult> {
369
+ const rpxDir = opts.rpxDir ?? getDaemonRpxDir()
370
+ const verbose = opts.verbose ?? false
371
+
372
+ const existingPid = await readDaemonPid(rpxDir)
373
+ if (existingPid !== null && isPidAlive(existingPid)) {
374
+ debugLog('daemon', `ensureDaemonRunning: already running pid=${existingPid}`, verbose)
375
+ return { pid: existingPid, spawned: false }
376
+ }
377
+ if (existingPid !== null) {
378
+ debugLog('daemon', `ensureDaemonRunning: clearing stale pid=${existingPid}`, verbose)
379
+ await releaseDaemonLock(rpxDir)
380
+ }
381
+
382
+ await fsp.mkdir(rpxDir, { recursive: true })
383
+
384
+ const command = opts.spawnCommand ?? defaultDaemonSpawnCommand()
385
+ if (command.length === 0)
386
+ throw new Error('ensureDaemonRunning: spawnCommand is empty')
387
+
388
+ debugLog('daemon', `spawning daemon: ${command.join(' ')}`, verbose)
389
+ const child = nodeSpawn(command[0]!, command.slice(1), {
390
+ detached: true,
391
+ stdio: 'ignore',
392
+ cwd: opts.spawnCwd ?? process.cwd(),
393
+ env: opts.spawnEnv ? { ...process.env, ...opts.spawnEnv } : process.env,
394
+ })
395
+ child.unref()
396
+
397
+ // Surface synchronous spawn failures (ENOENT for the binary, etc.) so the
398
+ // caller doesn't have to wait the full timeout to see them.
399
+ let spawnError: Error | null = null
400
+ child.once('error', (err) => { spawnError = err })
401
+
402
+ const timeoutMs = opts.startupTimeoutMs ?? 5000
403
+ const pollMs = opts.pollIntervalMs ?? 50
404
+ const deadline = Date.now() + timeoutMs
405
+
406
+ while (Date.now() < deadline) {
407
+ if (spawnError)
408
+ throw spawnError
409
+ const pid = await readDaemonPid(rpxDir)
410
+ if (pid !== null && isPidAlive(pid)) {
411
+ debugLog('daemon', `daemon registered with pid=${pid}`, verbose)
412
+ return { pid, spawned: true }
413
+ }
414
+ await new Promise(resolve => setTimeout(resolve, pollMs))
415
+ }
416
+
417
+ if (spawnError)
418
+ throw spawnError
419
+ throw new Error(`rpx daemon failed to start within ${timeoutMs}ms (rpxDir=${rpxDir})`)
420
+ }
421
+
422
+ export interface StopDaemonOptions {
423
+ rpxDir?: string
424
+ /** Total ms to wait for the pid to die. Default 5000. */
425
+ timeoutMs?: number
426
+ /** Poll interval while waiting. Default 50ms. */
427
+ pollIntervalMs?: number
428
+ /** Send SIGKILL after `timeoutMs` if SIGTERM didn't take. Default true. */
429
+ forceAfterTimeout?: boolean
430
+ verbose?: boolean
431
+ }
432
+
433
+ export interface StopDaemonResult {
434
+ /** True if a daemon was found and asked to stop. */
435
+ stopped: boolean
436
+ pid: number | null
437
+ /** True if we had to escalate to SIGKILL. */
438
+ forced: boolean
439
+ }
440
+
441
+ /**
442
+ * Stop a running daemon by reading its pid and sending SIGTERM. Polls until
443
+ * the process is gone (or escalates to SIGKILL if `forceAfterTimeout`). The
444
+ * pid file is removed by the daemon's own SIGTERM handler — we clean up only
445
+ * if we had to SIGKILL.
446
+ */
447
+ export async function stopDaemon(opts: StopDaemonOptions = {}): Promise<StopDaemonResult> {
448
+ const rpxDir = opts.rpxDir ?? getDaemonRpxDir()
449
+ const verbose = opts.verbose ?? false
450
+ const timeoutMs = opts.timeoutMs ?? 5000
451
+ const pollMs = opts.pollIntervalMs ?? 50
452
+ const force = opts.forceAfterTimeout ?? true
453
+
454
+ const pid = await readDaemonPid(rpxDir)
455
+ if (pid === null || !isPidAlive(pid)) {
456
+ if (pid !== null)
457
+ await releaseDaemonLock(rpxDir)
458
+ return { stopped: false, pid, forced: false }
459
+ }
460
+
461
+ try {
462
+ process.kill(pid, 'SIGTERM')
463
+ }
464
+ catch (err) {
465
+ const code = (err as NodeJS.ErrnoException).code
466
+ if (code === 'ESRCH') {
467
+ await releaseDaemonLock(rpxDir)
468
+ return { stopped: false, pid, forced: false }
469
+ }
470
+ throw err
471
+ }
472
+
473
+ const deadline = Date.now() + timeoutMs
474
+ while (Date.now() < deadline) {
475
+ if (!isPidAlive(pid)) {
476
+ debugLog('daemon', `daemon pid=${pid} stopped cleanly`, verbose)
477
+ return { stopped: true, pid, forced: false }
478
+ }
479
+ await new Promise(resolve => setTimeout(resolve, pollMs))
480
+ }
481
+
482
+ if (!force)
483
+ throw new Error(`rpx daemon (pid=${pid}) did not exit within ${timeoutMs}ms`)
484
+
485
+ debugLog('daemon', `daemon pid=${pid} did not exit, escalating to SIGKILL`, verbose)
486
+ try {
487
+ process.kill(pid, 'SIGKILL')
488
+ }
489
+ catch (err) {
490
+ if ((err as NodeJS.ErrnoException).code !== 'ESRCH')
491
+ throw err
492
+ }
493
+ // SIGKILL bypasses the cleanup handler, so remove the pid file ourselves.
494
+ await releaseDaemonLock(rpxDir)
495
+ return { stopped: true, pid, forced: true }
496
+ }