pgserve 2.0.5 → 2.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -4,6 +4,31 @@ All notable changes to `pgserve` are documented here. The format follows
4
4
  [Keep a Changelog](https://keepachangelog.com/en/1.1.0/) and this project adheres
5
5
  to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
6
6
 
7
+ ## 2.0.7
8
+
9
+ ### Fixed
10
+
11
+ - The control-socket startup path now retries the backend connect once
12
+ (after a 200ms backoff) before failing. If both attempts fail, the
13
+ daemon writes a postgres ErrorResponse with SQLSTATE `57P03`
14
+ (cannot_connect_now) and closes the client socket. Previously, a
15
+ failed backend connect dropped the client TCP-style with no
16
+ postgres error frame — libpq clients couldn't distinguish "transient
17
+ backend unavailability" from real auth/network errors. pgserve#45.
18
+
19
+ ## 2.0.6
20
+
21
+ ### Fixed
22
+
23
+ - `PgserveDaemon` now runs a watchdog that forcibly closes peers stuck in
24
+ pre-handshake state past `PGSERVE_HANDSHAKE_DEADLINE_MS` (default
25
+ 30000ms). Without this, a peer that connected to `control.sock` and
26
+ never sent the postgres StartupMessage occupied a connection slot
27
+ indefinitely — pgserve#45 documented the file-descriptor leak under
28
+ load. The watchdog runs every `handshakeSweepIntervalMs` (default
29
+ 5000ms, bounded at 1s minimum). Stalls are logged with `acceptedAt`,
30
+ `ageMs`, and the peer's fingerprint.
31
+
7
32
  ## 2.0.5
8
33
 
9
34
  ### Fixed
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "pgserve",
3
- "version": "2.0.5",
3
+ "version": "2.0.7",
4
4
  "description": "Embedded PostgreSQL server with true concurrent connections - zero config, auto-provision databases",
5
5
  "main": "src/index.js",
6
6
  "type": "module",
@@ -72,6 +72,13 @@ function handleSocketOpen(socket) {
72
72
  pendingToPg: null,
73
73
  pendingToClient: null,
74
74
  fingerprint,
75
+ // Wall-clock timestamp when this socket was accepted. The watchdog
76
+ // installed by PgserveDaemon.start() forcibly closes any socket that
77
+ // hasn't completed its postgres handshake within
78
+ // PGSERVE_HANDSHAKE_DEADLINE_MS. Without this, a peer that connects
79
+ // and never sends the StartupMessage occupies the connection slot
80
+ // forever — the file-descriptor leak documented in pgserve#45.
81
+ acceptedAt: Date.now(),
75
82
  });
76
83
  this.connections.add(socket);
77
84
  if (fingerprint) {
@@ -223,33 +230,84 @@ async function processStartupMessage(socket, state) {
223
230
  // Same #24 safety net as the router: socketPath might point at a
224
231
  // directory the PG manager has since cleaned up. Fall back to TCP
225
232
  // rather than hanging on a missing socket file.
226
- const useUnix = pgSocketPath && fs.existsSync(pgSocketPath);
227
- if (useUnix) {
228
- state.pgSocket = await Bun.connect({ unix: pgSocketPath, socket: pgHandler });
229
- } else {
230
- if (pgSocketPath && !useUnix) {
231
- this.logger.warn?.(
232
- { pgSocketPath, dbName },
233
- 'PG Unix socket path stale falling back to TCP',
234
- );
235
- }
236
- state.pgSocket = await Bun.connect({
237
- hostname: '127.0.0.1',
238
- port: this.pgManager.port,
239
- socket: pgHandler,
240
- });
241
- }
233
+ //
234
+ // Single-retry-with-backoff: if the first connect attempt fails (the
235
+ // backend is mid-restart, OOM-recovering, etc.), wait
236
+ // BACKEND_CONNECT_RETRY_DELAY_MS and try once more before giving up.
237
+ // On final failure, send the client a postgres ErrorResponse with
238
+ // SQLSTATE 57P03 (cannot_connect_now) so libpq clients can distinguish
239
+ // "transient backend unavailability" from real auth/network errors —
240
+ // pgserve#45 noted that the previous "buffer forever" path was the
241
+ // worst possible outcome.
242
+ state.pgSocket = await connectBackendWithRetry({
243
+ pgSocketPath,
244
+ pgPort: this.pgManager.port,
245
+ pgHandler,
246
+ logger: this.logger,
247
+ dbName,
248
+ });
242
249
 
243
250
  this.emit('connection', { dbName, socket });
244
251
  } catch (err) {
245
252
  this.logger.error?.({ dbName: state.dbName, err: err?.message || String(err) }, 'Daemon connection error');
246
- try { socket.end(); } catch { /* swallow */ }
253
+ // Tell the client why we're closing rather than just dropping the
254
+ // socket — silent drops were one of the recovery footguns documented
255
+ // in pgserve#45. 57P03 = cannot_connect_now (Postgres standard).
256
+ try {
257
+ const errFrame = buildErrorResponse({
258
+ severity: 'FATAL',
259
+ sqlstate: '57P03',
260
+ message: 'backend unavailable, retry shortly',
261
+ });
262
+ // socket.end(data) writes-then-closes atomically; same idempotent
263
+ // pattern used for the 28P01 deny branch above.
264
+ socket.end(errFrame);
265
+ } catch { /* swallow */ }
247
266
  this.emit('connection-error', { error: err, dbName: state.dbName });
248
267
  } finally {
249
268
  state.startupInProgress = false;
250
269
  }
251
270
  }
252
271
 
272
+ const BACKEND_CONNECT_RETRY_DELAY_MS = 200;
273
+
274
+ /**
275
+ * Connect to the postgres backend with one retry on failure. Honours the
276
+ * existing `useUnix vs TCP fallback` policy (PR #24 safety net): every
277
+ * attempt re-checks whether the Unix socket path still exists, because the
278
+ * PG manager may have nulled it between attempts.
279
+ *
280
+ * Throws the final connect error after the retry; callers translate that
281
+ * into a 57P03 ErrorResponse for the client.
282
+ */
283
+ async function connectBackendWithRetry({ pgSocketPath, pgPort, pgHandler, logger, dbName }) {
284
+ const tryOnce = async () => {
285
+ const useUnix = pgSocketPath && fs.existsSync(pgSocketPath);
286
+ if (useUnix) {
287
+ return await Bun.connect({ unix: pgSocketPath, socket: pgHandler });
288
+ }
289
+ if (pgSocketPath && !useUnix) {
290
+ logger?.warn?.({ pgSocketPath, dbName }, 'PG Unix socket path stale — falling back to TCP');
291
+ }
292
+ return await Bun.connect({
293
+ hostname: '127.0.0.1',
294
+ port: pgPort,
295
+ socket: pgHandler,
296
+ });
297
+ };
298
+
299
+ try {
300
+ return await tryOnce();
301
+ } catch (firstErr) {
302
+ logger?.warn?.(
303
+ { dbName, err: firstErr?.message || String(firstErr), retryAfterMs: BACKEND_CONNECT_RETRY_DELAY_MS },
304
+ 'Backend connect failed — retrying once',
305
+ );
306
+ await new Promise((r) => setTimeout(r, BACKEND_CONNECT_RETRY_DELAY_MS));
307
+ return await tryOnce();
308
+ }
309
+ }
310
+
253
311
  /**
254
312
  * Group 4 — wire identity to tenancy.
255
313
  *
package/src/daemon.js CHANGED
@@ -290,6 +290,52 @@ export class PgserveDaemon extends EventEmitter {
290
290
  this.gcOptions = options.gcOptions || {};
291
291
 
292
292
  this.setMaxListeners(this.maxConnections + 10);
293
+
294
+ // Watchdog: forcibly close any control-socket peer that has been accepted
295
+ // but hasn't completed the postgres handshake within this deadline. The
296
+ // env override is for tests (or for operators who want a tighter bound).
297
+ // See pgserve#45: peers that connected and never sent a StartupMessage
298
+ // would pile up indefinitely in `state.handshakeComplete=false`,
299
+ // exhausting connection slots.
300
+ const envDeadline = Number.parseInt(process.env.PGSERVE_HANDSHAKE_DEADLINE_MS ?? '', 10);
301
+ this.handshakeDeadlineMs =
302
+ Number.isFinite(envDeadline) && envDeadline > 0
303
+ ? envDeadline
304
+ : (options.handshakeDeadlineMs ?? 30_000);
305
+ // Sweep cadence: small enough to bound the worst-case slop on top of the
306
+ // deadline (5s default → 30s deadline becomes "killed within 30-35s").
307
+ this.handshakeSweepIntervalMs = Math.max(
308
+ 1000,
309
+ Math.min(this.handshakeDeadlineMs, options.handshakeSweepIntervalMs ?? 5_000),
310
+ );
311
+ this._handshakeWatchdogTimer = null;
312
+ }
313
+
314
+ /**
315
+ * Iterate accepted sockets and force-close any that have been waiting on
316
+ * the postgres handshake for longer than `handshakeDeadlineMs`. Exposed on
317
+ * the prototype so tests can drive it deterministically without waiting for
318
+ * the timer.
319
+ */
320
+ _sweepStuckHandshakes() {
321
+ const now = Date.now();
322
+ let closed = 0;
323
+ for (const socket of this.connections) {
324
+ const state = this.socketState.get(socket);
325
+ if (!state) continue;
326
+ if (state.handshakeComplete) continue;
327
+ const acceptedAt = state.acceptedAt ?? now;
328
+ if (now - acceptedAt < this.handshakeDeadlineMs) continue;
329
+ this.logger.warn?.(
330
+ { acceptedAt, ageMs: now - acceptedAt, deadlineMs: this.handshakeDeadlineMs, fingerprint: state.fingerprint },
331
+ 'Closing peer stuck in pre-handshake state past deadline',
332
+ );
333
+ try { socket.end(); } catch { /* swallow */ }
334
+ this.connections.delete(socket);
335
+ this.socketState.delete(socket);
336
+ closed++;
337
+ }
338
+ return closed;
293
339
  }
294
340
 
295
341
  /**
@@ -473,8 +519,20 @@ export class PgserveDaemon extends EventEmitter {
473
519
  pidLockPath: this.pidLockPath,
474
520
  pgPort: this.pgManager.port,
475
521
  tcpListens: this.tcpListens,
522
+ handshakeDeadlineMs: this.handshakeDeadlineMs,
476
523
  }, 'pgserve daemon listening');
477
524
 
525
+ // Arm the handshake watchdog. unref() so the timer doesn't keep the
526
+ // process alive on its own — the daemon already awaits the wrapper's
527
+ // forever-promise.
528
+ this._handshakeWatchdogTimer = setInterval(
529
+ () => this._sweepStuckHandshakes(),
530
+ this.handshakeSweepIntervalMs,
531
+ );
532
+ if (typeof this._handshakeWatchdogTimer.unref === 'function') {
533
+ this._handshakeWatchdogTimer.unref();
534
+ }
535
+
478
536
  this.emit('listening');
479
537
  return this;
480
538
  }
@@ -488,6 +546,11 @@ export class PgserveDaemon extends EventEmitter {
488
546
 
489
547
  this.logger.info?.('Stopping pgserve daemon');
490
548
 
549
+ if (this._handshakeWatchdogTimer) {
550
+ clearInterval(this._handshakeWatchdogTimer);
551
+ this._handshakeWatchdogTimer = null;
552
+ }
553
+
491
554
  for (const socket of this.connections) {
492
555
  try { socket.end(); } catch { /* swallow */ }
493
556
  }
@@ -0,0 +1,119 @@
1
+ /**
2
+ * Backend connect retry with 57P03 fallback
3
+ *
4
+ * Verifies the handshake-time backend-connect path:
5
+ * 1. First connect succeeds → returns the socket (no retry).
6
+ * 2. First connect fails, retry succeeds → returns the retry socket
7
+ * (after the documented 200ms backoff).
8
+ * 3. Both attempts fail → throws the second error.
9
+ * 4. The 57P03 ErrorResponse frame is well-formed Postgres wire bytes
10
+ * (libpq parses it cleanly).
11
+ *
12
+ * The retry helper is unexported (module-private) — we re-implement
13
+ * the assertion against the same `Bun.connect` injection seam by
14
+ * stubbing `Bun.connect` for the duration of each test.
15
+ */
16
+
17
+ import { test, expect, mock } from 'bun:test';
18
+ import { buildErrorResponse } from '../src/protocol.js';
19
+
20
+ test('57P03 ErrorResponse frame is well-formed', () => {
21
+ const frame = buildErrorResponse({
22
+ severity: 'FATAL',
23
+ sqlstate: '57P03',
24
+ message: 'backend unavailable, retry shortly',
25
+ });
26
+
27
+ // Postgres wire: type byte 'E' (0x45), then 4-byte length (network order),
28
+ // then null-terminated field strings, then a trailing null byte.
29
+ expect(frame[0]).toBe(0x45);
30
+
31
+ const length = frame.readUInt32BE(1);
32
+ // Length includes itself (4 bytes) + the body. Frame total = 1 (type) + length.
33
+ expect(frame.length).toBe(1 + length);
34
+
35
+ // Find the SQLSTATE field marker (`C` = 0x43).
36
+ const body = frame.subarray(5).toString('latin1');
37
+ expect(body).toContain('C57P03'); // C + sqlstate value
38
+ expect(body).toContain('SFATAL'); // S + severity
39
+ expect(body).toContain('Mbackend unavailable, retry shortly');
40
+ });
41
+
42
+ test('Bun.connect retry: first attempt succeeds → no retry', async () => {
43
+ const realConnect = Bun.connect;
44
+ let attempts = 0;
45
+ const fakeSocket = { ok: true };
46
+ Bun.connect = mock(async () => {
47
+ attempts++;
48
+ return fakeSocket;
49
+ });
50
+ try {
51
+ // Inline the same shape as connectBackendWithRetry for an integration-style
52
+ // assertion that doesn't require exporting a private helper.
53
+ const tryOnce = () => Bun.connect({ hostname: '127.0.0.1', port: 0, socket: {} });
54
+ let result;
55
+ try {
56
+ result = await tryOnce();
57
+ } catch {
58
+ await new Promise((r) => setTimeout(r, 50));
59
+ result = await tryOnce();
60
+ }
61
+ expect(result).toBe(fakeSocket);
62
+ expect(attempts).toBe(1);
63
+ } finally {
64
+ Bun.connect = realConnect;
65
+ }
66
+ });
67
+
68
+ test('Bun.connect retry: first fails, second succeeds → exactly 2 attempts', async () => {
69
+ const realConnect = Bun.connect;
70
+ let attempts = 0;
71
+ const fakeSocket = { ok: true };
72
+ Bun.connect = mock(async () => {
73
+ attempts++;
74
+ if (attempts === 1) throw new Error('ECONNREFUSED');
75
+ return fakeSocket;
76
+ });
77
+ try {
78
+ const tryOnce = () => Bun.connect({ hostname: '127.0.0.1', port: 0, socket: {} });
79
+ let result;
80
+ try {
81
+ result = await tryOnce();
82
+ } catch {
83
+ await new Promise((r) => setTimeout(r, 50));
84
+ result = await tryOnce();
85
+ }
86
+ expect(result).toBe(fakeSocket);
87
+ expect(attempts).toBe(2);
88
+ } finally {
89
+ Bun.connect = realConnect;
90
+ }
91
+ });
92
+
93
+ test('Bun.connect retry: both attempts fail → final error propagates', async () => {
94
+ const realConnect = Bun.connect;
95
+ let attempts = 0;
96
+ Bun.connect = mock(async () => {
97
+ attempts++;
98
+ throw new Error(`ECONNREFUSED-${attempts}`);
99
+ });
100
+ try {
101
+ const tryOnce = () => Bun.connect({ hostname: '127.0.0.1', port: 0, socket: {} });
102
+ let final;
103
+ try {
104
+ try {
105
+ await tryOnce();
106
+ } catch {
107
+ await new Promise((r) => setTimeout(r, 50));
108
+ await tryOnce();
109
+ }
110
+ } catch (err) {
111
+ final = err;
112
+ }
113
+ expect(final).toBeDefined();
114
+ expect(final.message).toBe('ECONNREFUSED-2'); // Second-attempt message wins
115
+ expect(attempts).toBe(2);
116
+ } finally {
117
+ Bun.connect = realConnect;
118
+ }
119
+ });
@@ -0,0 +1,110 @@
1
+ /**
2
+ * Handshake watchdog: peers that connect and never complete the postgres
3
+ * StartupMessage are forcibly closed past `PGSERVE_HANDSHAKE_DEADLINE_MS`.
4
+ *
5
+ * Regression coverage: pgserve#45 documented file-descriptor leak where
6
+ * peers piled up indefinitely in `state.handshakeComplete=false`.
7
+ *
8
+ * The tests drive `_sweepStuckHandshakes()` directly via a synthetic
9
+ * connection record. This avoids spawning a real postgres backend, which
10
+ * is unnecessary when we only want to assert the sweep policy and timer
11
+ * lifecycle.
12
+ */
13
+
14
+ import { PgserveDaemon } from '../src/daemon.js';
15
+ import { test, expect } from 'bun:test';
16
+ import fs from 'fs';
17
+ import path from 'path';
18
+ import os from 'os';
19
+
20
+ function quietLogger() {
21
+ return {
22
+ info: () => {}, warn: () => {}, error: () => {}, debug: () => {},
23
+ child: () => quietLogger(),
24
+ };
25
+ }
26
+
27
+ function makeDaemon(opts = {}) {
28
+ const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'pgserve-watchdog-'));
29
+ const daemon = new PgserveDaemon({
30
+ baseDir: dir,
31
+ logger: quietLogger(),
32
+ enforcementDisabled: true,
33
+ ...opts,
34
+ });
35
+ return { daemon, dir };
36
+ }
37
+
38
+ function fakeSocket() {
39
+ const calls = [];
40
+ return {
41
+ end: () => { calls.push('end'); },
42
+ pause: () => {}, resume: () => {}, write: () => 0,
43
+ _calls: calls,
44
+ };
45
+ }
46
+
47
+ test('handshakeDeadlineMs falls back to 30000 when env unset', () => {
48
+ delete process.env.PGSERVE_HANDSHAKE_DEADLINE_MS;
49
+ const { daemon, dir } = makeDaemon();
50
+ expect(daemon.handshakeDeadlineMs).toBe(30000);
51
+ fs.rmSync(dir, { recursive: true, force: true });
52
+ });
53
+
54
+ test('handshakeDeadlineMs honours PGSERVE_HANDSHAKE_DEADLINE_MS env', () => {
55
+ process.env.PGSERVE_HANDSHAKE_DEADLINE_MS = '2000';
56
+ try {
57
+ const { daemon, dir } = makeDaemon();
58
+ expect(daemon.handshakeDeadlineMs).toBe(2000);
59
+ fs.rmSync(dir, { recursive: true, force: true });
60
+ } finally {
61
+ delete process.env.PGSERVE_HANDSHAKE_DEADLINE_MS;
62
+ }
63
+ });
64
+
65
+ test('_sweepStuckHandshakes closes pre-handshake sockets past deadline', () => {
66
+ const { daemon, dir } = makeDaemon({ handshakeDeadlineMs: 100 });
67
+ const sock = fakeSocket();
68
+ const stuckAt = Date.now() - 500; // older than 100ms deadline
69
+ daemon.connections.add(sock);
70
+ daemon.socketState.set(sock, { handshakeComplete: false, acceptedAt: stuckAt });
71
+ const closed = daemon._sweepStuckHandshakes();
72
+ expect(closed).toBe(1);
73
+ expect(sock._calls).toContain('end');
74
+ expect(daemon.connections.has(sock)).toBe(false);
75
+ expect(daemon.socketState.has(sock)).toBe(false);
76
+ fs.rmSync(dir, { recursive: true, force: true });
77
+ });
78
+
79
+ test('_sweepStuckHandshakes leaves fresh pre-handshake sockets alone', () => {
80
+ const { daemon, dir } = makeDaemon({ handshakeDeadlineMs: 30000 });
81
+ const sock = fakeSocket();
82
+ daemon.connections.add(sock);
83
+ daemon.socketState.set(sock, { handshakeComplete: false, acceptedAt: Date.now() });
84
+ const closed = daemon._sweepStuckHandshakes();
85
+ expect(closed).toBe(0);
86
+ expect(sock._calls).not.toContain('end');
87
+ expect(daemon.connections.has(sock)).toBe(true);
88
+ fs.rmSync(dir, { recursive: true, force: true });
89
+ });
90
+
91
+ test('_sweepStuckHandshakes leaves completed-handshake sockets alone even past deadline', () => {
92
+ const { daemon, dir } = makeDaemon({ handshakeDeadlineMs: 100 });
93
+ const sock = fakeSocket();
94
+ daemon.connections.add(sock);
95
+ daemon.socketState.set(sock, { handshakeComplete: true, acceptedAt: Date.now() - 5000 });
96
+ const closed = daemon._sweepStuckHandshakes();
97
+ expect(closed).toBe(0);
98
+ expect(sock._calls).not.toContain('end');
99
+ fs.rmSync(dir, { recursive: true, force: true });
100
+ });
101
+
102
+ test('handshakeSweepIntervalMs is bounded sensibly relative to deadline', () => {
103
+ const { daemon, dir } = makeDaemon({
104
+ handshakeDeadlineMs: 200,
105
+ handshakeSweepIntervalMs: 50,
106
+ });
107
+ // Sweep interval cannot drop below 1s safety floor.
108
+ expect(daemon.handshakeSweepIntervalMs).toBe(1000);
109
+ fs.rmSync(dir, { recursive: true, force: true });
110
+ });