pgserve 2.0.2 → 2.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +14 -0
- package/package.json +1 -1
- package/src/admin-client.js +66 -14
- package/src/cluster.js +1 -0
- package/src/control-db.js +30 -13
- package/src/daemon-control.js +6 -4
- package/src/daemon-tcp.js +2 -1
- package/src/daemon.js +5 -0
- package/src/postgres.js +46 -0
- package/src/router.js +1 -0
- package/tests/daemon-control.test.js +171 -0
- package/tests/stale-postmaster-pid.test.js +85 -0
package/CHANGELOG.md
CHANGED
|
@@ -4,6 +4,20 @@ All notable changes to `pgserve` are documented here. The format follows
|
|
|
4
4
|
[Keep a Changelog](https://keepachangelog.com/en/1.1.0/) and this project adheres
|
|
5
5
|
to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
6
6
|
|
|
7
|
+
## 2.0.4
|
|
8
|
+
|
|
9
|
+
### Fixed
|
|
10
|
+
|
|
11
|
+
- `_startPostgres()` now removes a stale `postmaster.pid` from the data
|
|
12
|
+
directory before spawning postgres. Previously, an unclean shutdown
|
|
13
|
+
(SIGKILL, machine reboot, OOM) left a `postmaster.pid` whose recorded
|
|
14
|
+
PID was no longer alive, and postgres refused to start with
|
|
15
|
+
`FATAL: lock file "postmaster.pid" already exists` on the next boot.
|
|
16
|
+
Operators had to `rm postmaster.pid` manually to recover. A live PID
|
|
17
|
+
is never touched, so a real concurrent postmaster still surfaces the
|
|
18
|
+
normal lock conflict. ([#46](https://github.com/namastexlabs/pgserve/pull/46),
|
|
19
|
+
fixes [#45](https://github.com/namastexlabs/pgserve/issues/45))
|
|
20
|
+
|
|
7
21
|
## 2.0.0 — Unreleased
|
|
8
22
|
|
|
9
23
|
> The release date will replace "Unreleased" when the v2.0.0 release workflow
|
package/package.json
CHANGED
package/src/admin-client.js
CHANGED
|
@@ -36,7 +36,9 @@ import path from 'path';
|
|
|
36
36
|
* @param {string} [args.user='postgres']
|
|
37
37
|
* @param {string} [args.password='postgres']
|
|
38
38
|
* @param {number} [args.max=2]
|
|
39
|
-
* @
|
|
39
|
+
* @param {number} [args.idleTimeout=300]
|
|
40
|
+
* @param {number} [args.queryTimeoutMs=0]
|
|
41
|
+
* @returns {Promise<{supportsQueryOptions: boolean, query: (text: string, params?: any[], opts?: {timeoutMs?: number}) => Promise<{rows: any[], rowCount: number}>, end: () => Promise<void>, sql: any}>}
|
|
40
42
|
*/
|
|
41
43
|
export async function createAdminClient({
|
|
42
44
|
socketDir: _socketDir = null,
|
|
@@ -46,25 +48,37 @@ export async function createAdminClient({
|
|
|
46
48
|
user = 'postgres',
|
|
47
49
|
password = 'postgres',
|
|
48
50
|
max = 2,
|
|
51
|
+
idleTimeout = 300,
|
|
52
|
+
queryTimeoutMs = 0,
|
|
49
53
|
} = {}) {
|
|
50
54
|
if (typeof port !== 'number') throw new Error('createAdminClient: port required');
|
|
51
|
-
const
|
|
55
|
+
const options = {
|
|
52
56
|
hostname: host,
|
|
53
57
|
port,
|
|
54
58
|
database,
|
|
55
59
|
username: user,
|
|
56
60
|
password,
|
|
57
61
|
max,
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
});
|
|
62
|
+
idleTimeout,
|
|
63
|
+
};
|
|
64
|
+
let sql = new SQL(options);
|
|
62
65
|
// Light probe so a misconfigured daemon fails loudly here rather than at
|
|
63
66
|
// first query.
|
|
64
67
|
await sql`SELECT 1`;
|
|
68
|
+
|
|
69
|
+
async function reopen() {
|
|
70
|
+
const closing = sql;
|
|
71
|
+
sql = new SQL(options);
|
|
72
|
+
void closing.close().catch(() => { /* swallow */ });
|
|
73
|
+
await sql`SELECT 1`;
|
|
74
|
+
}
|
|
75
|
+
|
|
65
76
|
return {
|
|
66
|
-
|
|
67
|
-
|
|
77
|
+
supportsQueryOptions: true,
|
|
78
|
+
get sql() {
|
|
79
|
+
return sql;
|
|
80
|
+
},
|
|
81
|
+
async query(text, params = [], opts = {}) {
|
|
68
82
|
// control-db.js is written for the pg npm module's contract, which
|
|
69
83
|
// requires JSON-stringified payloads bound to JSONB parameters.
|
|
70
84
|
// Bun.SQL goes the other way: it stringifies JS objects when they
|
|
@@ -73,12 +87,14 @@ export async function createAdminClient({
|
|
|
73
87
|
// it represents). Bridge the impedance mismatch here so the same
|
|
74
88
|
// call sites work against either driver.
|
|
75
89
|
const adapted = params.map(coerceJsonbParam);
|
|
76
|
-
const
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
90
|
+
const timeoutMs = opts.timeoutMs ?? queryTimeoutMs;
|
|
91
|
+
try {
|
|
92
|
+
return await runQueryWithTimeout(sql, text, adapted, timeoutMs);
|
|
93
|
+
} catch (err) {
|
|
94
|
+
if (!isRetriableAdminQueryError(err)) throw err;
|
|
95
|
+
await reopen();
|
|
96
|
+
return await runQueryWithTimeout(sql, text, adapted, timeoutMs);
|
|
97
|
+
}
|
|
82
98
|
},
|
|
83
99
|
async end() {
|
|
84
100
|
try { await sql.close(); } catch { /* swallow */ }
|
|
@@ -86,6 +102,42 @@ export async function createAdminClient({
|
|
|
86
102
|
};
|
|
87
103
|
}
|
|
88
104
|
|
|
105
|
+
async function runQueryWithTimeout(sql, text, params, queryTimeoutMs) {
|
|
106
|
+
const query = runQuery(sql, text, params);
|
|
107
|
+
return withTimeout(query, queryTimeoutMs);
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
async function runQuery(sql, text, params) {
|
|
111
|
+
const rows = await sql.unsafe(text, params);
|
|
112
|
+
// Bun returns an Array of plain objects with `count` set on it; turn
|
|
113
|
+
// JSONB columns back into JS values so control-db.js's parseTokens
|
|
114
|
+
// sees the array-of-objects shape it would receive from pg.
|
|
115
|
+
const out = Array.from(rows).map(decodeJsonColumns);
|
|
116
|
+
return { rows: out, rowCount: rows.count ?? rows.length ?? 0 };
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
function withTimeout(promise, timeoutMs) {
|
|
120
|
+
if (!Number.isFinite(timeoutMs) || timeoutMs <= 0) return promise;
|
|
121
|
+
let timer;
|
|
122
|
+
const timeout = new Promise((_, reject) => {
|
|
123
|
+
timer = setTimeout(() => {
|
|
124
|
+
const err = new Error(`admin query timed out after ${timeoutMs}ms`);
|
|
125
|
+
err.code = 'EADMINQUERYTIMEOUT';
|
|
126
|
+
reject(err);
|
|
127
|
+
}, timeoutMs);
|
|
128
|
+
timer.unref?.();
|
|
129
|
+
});
|
|
130
|
+
promise.catch(() => { /* handled by the race winner */ });
|
|
131
|
+
return Promise.race([promise, timeout]).finally(() => clearTimeout(timer));
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
function isRetriableAdminQueryError(err) {
|
|
135
|
+
const code = err?.code;
|
|
136
|
+
if (['EADMINQUERYTIMEOUT', 'ECONNRESET', 'EPIPE', 'ETIMEDOUT', 'ConnectionClosed'].includes(code)) return true;
|
|
137
|
+
const message = err?.message || String(err);
|
|
138
|
+
return /connection (?:closed|terminated|reset)|socket closed|timeout|CONNECTION_ENDED|CONNECTION_DESTROYED/i.test(message);
|
|
139
|
+
}
|
|
140
|
+
|
|
89
141
|
/**
|
|
90
142
|
* Strings shaped like a JSON array or object are unwrapped so Bun.SQL's
|
|
91
143
|
* automatic JSONB serialiser sees the JS value (not a quoted JSON string).
|
package/src/cluster.js
CHANGED
|
@@ -256,6 +256,7 @@ class ClusterRouter extends EventEmitter {
|
|
|
256
256
|
if (code === SSL_REQUEST_CODE || code === GSSAPI_REQUEST_CODE) {
|
|
257
257
|
socket.write(Buffer.from('N'));
|
|
258
258
|
state.buffer = buffer.length > messageLength ? buffer.subarray(messageLength) : null;
|
|
259
|
+
if (state.buffer) await this.processStartupMessage(socket, state);
|
|
259
260
|
return;
|
|
260
261
|
}
|
|
261
262
|
|
package/src/control-db.js
CHANGED
|
@@ -35,6 +35,13 @@ const REAPABLE_QUERY = `
|
|
|
35
35
|
ORDER BY last_connection_at ASC
|
|
36
36
|
`;
|
|
37
37
|
|
|
38
|
+
function query(client, text, params = [], opts = {}) {
|
|
39
|
+
if (client.supportsQueryOptions && opts && Object.keys(opts).length > 0) {
|
|
40
|
+
return client.query(text, params, opts);
|
|
41
|
+
}
|
|
42
|
+
return client.query(text, params);
|
|
43
|
+
}
|
|
44
|
+
|
|
38
45
|
/**
|
|
39
46
|
* Create the `pgserve_meta` table if it does not already exist.
|
|
40
47
|
* Safe to call repeatedly — used at daemon boot and in tests.
|
|
@@ -88,12 +95,13 @@ export async function recordDbCreated(client, {
|
|
|
88
95
|
packageRealpath = null,
|
|
89
96
|
livenessPid = null,
|
|
90
97
|
persist = false,
|
|
91
|
-
}) {
|
|
98
|
+
}, opts = {}) {
|
|
92
99
|
if (!databaseName) throw new Error('recordDbCreated: databaseName required');
|
|
93
100
|
if (!fingerprint) throw new Error('recordDbCreated: fingerprint required');
|
|
94
101
|
if (typeof peerUid !== 'number') throw new Error('recordDbCreated: peerUid must be number');
|
|
95
102
|
|
|
96
|
-
await
|
|
103
|
+
await query(
|
|
104
|
+
client,
|
|
97
105
|
`
|
|
98
106
|
INSERT INTO pgserve_meta
|
|
99
107
|
(database_name, fingerprint, peer_uid, package_realpath, liveness_pid, persist)
|
|
@@ -107,6 +115,7 @@ export async function recordDbCreated(client, {
|
|
|
107
115
|
last_connection_at = now()
|
|
108
116
|
`,
|
|
109
117
|
[databaseName, fingerprint, peerUid, packageRealpath, livenessPid, persist],
|
|
118
|
+
opts,
|
|
110
119
|
);
|
|
111
120
|
}
|
|
112
121
|
|
|
@@ -117,9 +126,10 @@ export async function recordDbCreated(client, {
|
|
|
117
126
|
* @param {{query: Function}} client
|
|
118
127
|
* @param {{databaseName: string, livenessPid?: number|null}} args
|
|
119
128
|
*/
|
|
120
|
-
export async function touchLastConnection(client, { databaseName, livenessPid = null }) {
|
|
129
|
+
export async function touchLastConnection(client, { databaseName, livenessPid = null }, opts = {}) {
|
|
121
130
|
if (!databaseName) throw new Error('touchLastConnection: databaseName required');
|
|
122
|
-
await
|
|
131
|
+
await query(
|
|
132
|
+
client,
|
|
123
133
|
`
|
|
124
134
|
UPDATE pgserve_meta
|
|
125
135
|
SET last_connection_at = now(),
|
|
@@ -127,6 +137,7 @@ export async function touchLastConnection(client, { databaseName, livenessPid =
|
|
|
127
137
|
WHERE database_name = $1
|
|
128
138
|
`,
|
|
129
139
|
[databaseName, livenessPid],
|
|
140
|
+
opts,
|
|
130
141
|
);
|
|
131
142
|
}
|
|
132
143
|
|
|
@@ -137,11 +148,13 @@ export async function touchLastConnection(client, { databaseName, livenessPid =
|
|
|
137
148
|
* @param {string} databaseName
|
|
138
149
|
* @param {boolean} value
|
|
139
150
|
*/
|
|
140
|
-
export async function markPersist(client, databaseName, value) {
|
|
151
|
+
export async function markPersist(client, databaseName, value, opts = {}) {
|
|
141
152
|
if (!databaseName) throw new Error('markPersist: databaseName required');
|
|
142
|
-
await
|
|
153
|
+
await query(
|
|
154
|
+
client,
|
|
143
155
|
`UPDATE pgserve_meta SET persist = $2 WHERE database_name = $1`,
|
|
144
156
|
[databaseName, !!value],
|
|
157
|
+
opts,
|
|
145
158
|
);
|
|
146
159
|
}
|
|
147
160
|
|
|
@@ -203,12 +216,14 @@ export async function deleteMetaRow(client, databaseName) {
|
|
|
203
216
|
* @param {string} fingerprint — 12 hex chars
|
|
204
217
|
* @returns {Promise<{databaseName: string, fingerprint: string, peerUid: number, allowedTokens: Array<{id: string, hash: string, issued_at: string}>} | null>}
|
|
205
218
|
*/
|
|
206
|
-
export async function findRowByFingerprint(client, fingerprint) {
|
|
219
|
+
export async function findRowByFingerprint(client, fingerprint, opts = {}) {
|
|
207
220
|
if (!fingerprint) throw new Error('findRowByFingerprint: fingerprint required');
|
|
208
|
-
const r = await
|
|
221
|
+
const r = await query(
|
|
222
|
+
client,
|
|
209
223
|
`SELECT database_name, fingerprint, peer_uid, allowed_tokens
|
|
210
224
|
FROM pgserve_meta WHERE fingerprint = $1 LIMIT 1`,
|
|
211
225
|
[fingerprint],
|
|
226
|
+
opts,
|
|
212
227
|
);
|
|
213
228
|
if (r.rows.length === 0) return null;
|
|
214
229
|
const row = r.rows[0];
|
|
@@ -239,12 +254,12 @@ function parseTokens(raw) {
|
|
|
239
254
|
* @returns {Promise<{databaseName: string}>}
|
|
240
255
|
* @throws if the fingerprint has no pgserve_meta row
|
|
241
256
|
*/
|
|
242
|
-
export async function addAllowedToken(client, { fingerprint, tokenId, tokenHash }) {
|
|
257
|
+
export async function addAllowedToken(client, { fingerprint, tokenId, tokenHash }, opts = {}) {
|
|
243
258
|
if (!fingerprint) throw new Error('addAllowedToken: fingerprint required');
|
|
244
259
|
if (!tokenId) throw new Error('addAllowedToken: tokenId required');
|
|
245
260
|
if (!tokenHash) throw new Error('addAllowedToken: tokenHash required');
|
|
246
261
|
|
|
247
|
-
const row = await findRowByFingerprint(client, fingerprint);
|
|
262
|
+
const row = await findRowByFingerprint(client, fingerprint, opts);
|
|
248
263
|
if (!row) {
|
|
249
264
|
const err = new Error(
|
|
250
265
|
`addAllowedToken: no pgserve_meta row for fingerprint ${fingerprint}; ` +
|
|
@@ -259,11 +274,13 @@ export async function addAllowedToken(client, { fingerprint, tokenId, tokenHash
|
|
|
259
274
|
hash: tokenHash,
|
|
260
275
|
issued_at: new Date().toISOString(),
|
|
261
276
|
};
|
|
262
|
-
await
|
|
277
|
+
await query(
|
|
278
|
+
client,
|
|
263
279
|
`UPDATE pgserve_meta
|
|
264
280
|
SET allowed_tokens = allowed_tokens || $2::jsonb
|
|
265
281
|
WHERE database_name = $1`,
|
|
266
282
|
[row.databaseName, JSON.stringify([entry])],
|
|
283
|
+
opts,
|
|
267
284
|
);
|
|
268
285
|
return { databaseName: row.databaseName };
|
|
269
286
|
}
|
|
@@ -302,10 +319,10 @@ export async function revokeAllowedToken(client, tokenId) {
|
|
|
302
319
|
* @param {{fingerprint: string, tokenHash: string}} args
|
|
303
320
|
* @returns {Promise<{tokenId: string, databaseName: string} | null>}
|
|
304
321
|
*/
|
|
305
|
-
export async function verifyToken(client, { fingerprint, tokenHash }) {
|
|
322
|
+
export async function verifyToken(client, { fingerprint, tokenHash }, opts = {}) {
|
|
306
323
|
if (!fingerprint) throw new Error('verifyToken: fingerprint required');
|
|
307
324
|
if (!tokenHash) throw new Error('verifyToken: tokenHash required');
|
|
308
|
-
const row = await findRowByFingerprint(client, fingerprint);
|
|
325
|
+
const row = await findRowByFingerprint(client, fingerprint, opts);
|
|
309
326
|
if (!row) return null;
|
|
310
327
|
const match = row.allowedTokens.find((t) => timingSafeEqual(t.hash, tokenHash));
|
|
311
328
|
if (!match) return null;
|
package/src/daemon-control.js
CHANGED
|
@@ -130,6 +130,7 @@ async function processStartupMessage(socket, state) {
|
|
|
130
130
|
if (code === SSL_REQUEST_CODE || code === GSSAPI_REQUEST_CODE) {
|
|
131
131
|
socket.write(Buffer.from('N'));
|
|
132
132
|
state.buffer = buffer.length > messageLength ? buffer.subarray(messageLength) : null;
|
|
133
|
+
if (state.buffer) await processStartupMessage.call(this, socket, state);
|
|
133
134
|
return;
|
|
134
135
|
}
|
|
135
136
|
|
|
@@ -268,10 +269,11 @@ async function resolveTenantDatabase(state, requestedDb) {
|
|
|
268
269
|
}
|
|
269
270
|
|
|
270
271
|
const { fingerprint, name, uid, pid, packageRealpath } = fp;
|
|
272
|
+
const lookupOpts = { timeoutMs: this.adminLookupTimeoutMs };
|
|
271
273
|
|
|
272
274
|
let row = null;
|
|
273
275
|
try {
|
|
274
|
-
row = await findRowByFingerprint(this._adminClient, fingerprint);
|
|
276
|
+
row = await findRowByFingerprint(this._adminClient, fingerprint, lookupOpts);
|
|
275
277
|
} catch (err) {
|
|
276
278
|
this.logger.warn?.(
|
|
277
279
|
{ err: err?.message || String(err), fingerprint },
|
|
@@ -296,7 +298,7 @@ async function resolveTenantDatabase(state, requestedDb) {
|
|
|
296
298
|
packageRealpath: packageRealpath || null,
|
|
297
299
|
livenessPid: typeof pid === 'number' && pid > 0 ? pid : null,
|
|
298
300
|
persist: persistRequested,
|
|
299
|
-
});
|
|
301
|
+
}, lookupOpts);
|
|
300
302
|
audit(AUDIT_EVENTS.DB_CREATED, {
|
|
301
303
|
database: newName,
|
|
302
304
|
fingerprint,
|
|
@@ -319,7 +321,7 @@ async function resolveTenantDatabase(state, requestedDb) {
|
|
|
319
321
|
await touchLastConnection(this._adminClient, {
|
|
320
322
|
databaseName: row.databaseName,
|
|
321
323
|
livenessPid: typeof pid === 'number' && pid > 0 ? pid : null,
|
|
322
|
-
});
|
|
324
|
+
}, lookupOpts);
|
|
323
325
|
} catch (err) {
|
|
324
326
|
this.logger.warn?.(
|
|
325
327
|
{ err: err?.message || String(err), database: row.databaseName },
|
|
@@ -330,7 +332,7 @@ async function resolveTenantDatabase(state, requestedDb) {
|
|
|
330
332
|
// flag between connections — the previous run might have started without
|
|
331
333
|
// persist:true and the operator just added it (or vice versa).
|
|
332
334
|
try {
|
|
333
|
-
await markPersist(this._adminClient, row.databaseName, persistRequested);
|
|
335
|
+
await markPersist(this._adminClient, row.databaseName, persistRequested, lookupOpts);
|
|
334
336
|
} catch (err) {
|
|
335
337
|
this.logger.warn?.(
|
|
336
338
|
{ err: err?.message || String(err), database: row.databaseName },
|
package/src/daemon-tcp.js
CHANGED
|
@@ -126,6 +126,7 @@ async function processTcpStartupMessage(socket, state) {
|
|
|
126
126
|
if (code === SSL_REQUEST_CODE || code === GSSAPI_REQUEST_CODE) {
|
|
127
127
|
socket.write(Buffer.from('N'));
|
|
128
128
|
state.buffer = buffer.length > messageLength ? buffer.subarray(messageLength) : null;
|
|
129
|
+
if (state.buffer) await processTcpStartupMessage.call(this, socket, state);
|
|
129
130
|
return;
|
|
130
131
|
}
|
|
131
132
|
if (code === CANCEL_REQUEST_CODE) {
|
|
@@ -153,7 +154,7 @@ async function processTcpStartupMessage(socket, state) {
|
|
|
153
154
|
validated = await verifyToken(this._adminClient, {
|
|
154
155
|
fingerprint: auth.fingerprint,
|
|
155
156
|
tokenHash,
|
|
156
|
-
});
|
|
157
|
+
}, { timeoutMs: this.adminLookupTimeoutMs });
|
|
157
158
|
}
|
|
158
159
|
} catch (err) {
|
|
159
160
|
this.logger.warn?.({ err: err.message }, 'verifyToken failed');
|
package/src/daemon.js
CHANGED
|
@@ -266,6 +266,9 @@ export class PgserveDaemon extends EventEmitter {
|
|
|
266
266
|
this._stopping = false;
|
|
267
267
|
// Lazy-initialised admin DB client (Group 6 token validation).
|
|
268
268
|
this._adminClient = null;
|
|
269
|
+
this.adminIdleTimeout = options.adminIdleTimeout ?? 300;
|
|
270
|
+
this.adminQueryTimeoutMs = options.adminQueryTimeoutMs ?? 0;
|
|
271
|
+
this.adminLookupTimeoutMs = options.adminLookupTimeoutMs ?? 5000;
|
|
269
272
|
// Group 5: GC sweep handle ({stop, sweep}). Installed once the admin
|
|
270
273
|
// client is up and torn down on stop().
|
|
271
274
|
this._gcHandle = null;
|
|
@@ -411,6 +414,8 @@ export class PgserveDaemon extends EventEmitter {
|
|
|
411
414
|
this._adminClient = await createAdminClient({
|
|
412
415
|
socketDir: this.pgManager.socketDir,
|
|
413
416
|
port: this.pgManager.port,
|
|
417
|
+
idleTimeout: this.adminIdleTimeout,
|
|
418
|
+
queryTimeoutMs: this.adminQueryTimeoutMs,
|
|
414
419
|
});
|
|
415
420
|
await ensureMetaSchema(this._adminClient);
|
|
416
421
|
writeAdminDiscovery({
|
package/src/postgres.js
CHANGED
|
@@ -709,11 +709,57 @@ export class PostgresManager {
|
|
|
709
709
|
}
|
|
710
710
|
}
|
|
711
711
|
|
|
712
|
+
/**
|
|
713
|
+
* Detect and remove a stale postmaster.pid that postgres would otherwise
|
|
714
|
+
* refuse to start against. Stale = the PID written into the file is not
|
|
715
|
+
* alive on this host. Called at the top of _startPostgres so that crash
|
|
716
|
+
* / SIGKILL / unclean reboot recovery is automatic.
|
|
717
|
+
*
|
|
718
|
+
* Real running backends are NEVER touched — if the PID is alive we leave
|
|
719
|
+
* the file alone and let postgres surface its normal "lock file already
|
|
720
|
+
* exists" error so the operator sees the conflict.
|
|
721
|
+
*/
|
|
722
|
+
async _ensureNoStalePostmasterLock() {
|
|
723
|
+
const pidFile = path.join(this.databaseDir, 'postmaster.pid');
|
|
724
|
+
let raw;
|
|
725
|
+
try {
|
|
726
|
+
raw = await fs.promises.readFile(pidFile, 'utf-8');
|
|
727
|
+
} catch (err) {
|
|
728
|
+
if (err.code === 'ENOENT') return;
|
|
729
|
+
throw err;
|
|
730
|
+
}
|
|
731
|
+
const firstLine = (raw.split('\n')[0] ?? '').trim();
|
|
732
|
+
const pid = Number.parseInt(firstLine, 10);
|
|
733
|
+
if (!Number.isInteger(pid) || pid <= 0) {
|
|
734
|
+
this.logger.warn(
|
|
735
|
+
{ pidFile, firstLine },
|
|
736
|
+
'postmaster.pid is unparseable; removing as stale'
|
|
737
|
+
);
|
|
738
|
+
await fs.promises.unlink(pidFile).catch(() => {});
|
|
739
|
+
return;
|
|
740
|
+
}
|
|
741
|
+
let alive = false;
|
|
742
|
+
try {
|
|
743
|
+
process.kill(pid, 0);
|
|
744
|
+
alive = true;
|
|
745
|
+
} catch (err) {
|
|
746
|
+
// EPERM = process exists but we can't signal it — still alive.
|
|
747
|
+
alive = err.code === 'EPERM';
|
|
748
|
+
}
|
|
749
|
+
if (alive) return;
|
|
750
|
+
this.logger.info(
|
|
751
|
+
{ pidFile, stalePid: pid },
|
|
752
|
+
'Removing stale postmaster.pid (PID not running) before postgres start'
|
|
753
|
+
);
|
|
754
|
+
await fs.promises.unlink(pidFile).catch(() => {});
|
|
755
|
+
}
|
|
756
|
+
|
|
712
757
|
/**
|
|
713
758
|
* Start the PostgreSQL server process
|
|
714
759
|
* Uses Bun.spawn() for ~40% faster process startup
|
|
715
760
|
*/
|
|
716
761
|
async _startPostgres() {
|
|
762
|
+
await this._ensureNoStalePostmasterLock();
|
|
717
763
|
return new Promise((resolve, reject) => {
|
|
718
764
|
// Build PostgreSQL arguments
|
|
719
765
|
const pgArgs = [
|
package/src/router.js
CHANGED
|
@@ -342,6 +342,7 @@ export class MultiTenantRouter extends EventEmitter {
|
|
|
342
342
|
socket.write(Buffer.from('N'));
|
|
343
343
|
// Remove this request from buffer, wait for real startup
|
|
344
344
|
state.buffer = buffer.length > messageLength ? buffer.subarray(messageLength) : null;
|
|
345
|
+
if (state.buffer) await this.processStartupMessage(socket, state);
|
|
345
346
|
return;
|
|
346
347
|
}
|
|
347
348
|
|
|
@@ -0,0 +1,171 @@
|
|
|
1
|
+
import { describe, expect, test } from 'bun:test';
|
|
2
|
+
import fs from 'fs';
|
|
3
|
+
import net from 'net';
|
|
4
|
+
import path from 'path';
|
|
5
|
+
|
|
6
|
+
import {
|
|
7
|
+
PgserveDaemon,
|
|
8
|
+
resolveControlSocketPath,
|
|
9
|
+
resolvePidLockPath,
|
|
10
|
+
} from '../src/daemon.js';
|
|
11
|
+
import { createLogger } from '../src/logger.js';
|
|
12
|
+
|
|
13
|
+
const SSL_REQUEST_CODE = 80877103;
|
|
14
|
+
const PROTOCOL_VERSION_3 = 196608;
|
|
15
|
+
|
|
16
|
+
function silentLogger() {
|
|
17
|
+
return createLogger({ level: process.env.PGSERVE_TEST_LOG || 'warn' });
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
function makeIsolated(tag) {
|
|
21
|
+
const dir = path.join('/tmp', `pgs-${tag}-${process.pid}-${Date.now()}`);
|
|
22
|
+
fs.mkdirSync(dir, { recursive: true, mode: 0o700 });
|
|
23
|
+
return dir;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
function freeTcpPort() {
|
|
27
|
+
return new Promise((resolve, reject) => {
|
|
28
|
+
const srv = net.createServer();
|
|
29
|
+
srv.unref();
|
|
30
|
+
srv.on('error', reject);
|
|
31
|
+
srv.listen(0, '127.0.0.1', () => {
|
|
32
|
+
const { port } = srv.address();
|
|
33
|
+
srv.close(() => resolve(port));
|
|
34
|
+
});
|
|
35
|
+
});
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
function sslRequest() {
|
|
39
|
+
const buf = Buffer.alloc(8);
|
|
40
|
+
buf.writeUInt32BE(8, 0);
|
|
41
|
+
buf.writeUInt32BE(SSL_REQUEST_CODE, 4);
|
|
42
|
+
return buf;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
function startupMessage({ user = 'postgres', database = 'postgres' } = {}) {
|
|
46
|
+
const params = Buffer.from(`user\0${user}\0database\0${database}\0client_encoding\0UTF8\0\0`);
|
|
47
|
+
const buf = Buffer.alloc(8 + params.length);
|
|
48
|
+
buf.writeUInt32BE(buf.length, 0);
|
|
49
|
+
buf.writeUInt32BE(PROTOCOL_VERSION_3, 4);
|
|
50
|
+
params.copy(buf, 8);
|
|
51
|
+
return buf;
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
function passwordMessage(password = 'postgres') {
|
|
55
|
+
const body = Buffer.from(`${password}\0`);
|
|
56
|
+
const buf = Buffer.alloc(1 + 4 + body.length);
|
|
57
|
+
buf.write('p', 0);
|
|
58
|
+
buf.writeUInt32BE(4 + body.length, 1);
|
|
59
|
+
body.copy(buf, 5);
|
|
60
|
+
return buf;
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
async function connectWithCoalescedStartup(socketPath) {
|
|
64
|
+
return new Promise((resolve, reject) => {
|
|
65
|
+
const socket = net.createConnection(socketPath);
|
|
66
|
+
let buffer = Buffer.alloc(0);
|
|
67
|
+
let sawSslReject = false;
|
|
68
|
+
let sawAuthOk = false;
|
|
69
|
+
|
|
70
|
+
const timer = setTimeout(() => {
|
|
71
|
+
socket.destroy();
|
|
72
|
+
reject(new Error('timed out waiting for ReadyForQuery after coalesced startup'));
|
|
73
|
+
}, 5000);
|
|
74
|
+
timer.unref();
|
|
75
|
+
|
|
76
|
+
const done = (err, result) => {
|
|
77
|
+
clearTimeout(timer);
|
|
78
|
+
socket.destroy();
|
|
79
|
+
if (err) reject(err);
|
|
80
|
+
else resolve(result);
|
|
81
|
+
};
|
|
82
|
+
|
|
83
|
+
const pump = () => {
|
|
84
|
+
if (!sawSslReject) {
|
|
85
|
+
if (buffer.length < 1) return;
|
|
86
|
+
if (buffer[0] !== 78) {
|
|
87
|
+
done(new Error(`expected SSL reject byte N, got ${buffer[0]}`));
|
|
88
|
+
return;
|
|
89
|
+
}
|
|
90
|
+
sawSslReject = true;
|
|
91
|
+
buffer = buffer.subarray(1);
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
while (buffer.length >= 5) {
|
|
95
|
+
const type = String.fromCharCode(buffer[0]);
|
|
96
|
+
const length = buffer.readUInt32BE(1);
|
|
97
|
+
if (buffer.length < 1 + length) return;
|
|
98
|
+
|
|
99
|
+
const payload = buffer.subarray(5, 1 + length);
|
|
100
|
+
buffer = buffer.subarray(1 + length);
|
|
101
|
+
|
|
102
|
+
if (type === 'R') {
|
|
103
|
+
const authCode = payload.readUInt32BE(0);
|
|
104
|
+
if (authCode === 3) socket.write(passwordMessage());
|
|
105
|
+
if (authCode === 0) sawAuthOk = true;
|
|
106
|
+
} else if (type === 'E') {
|
|
107
|
+
done(new Error(`postgres error response: ${payload.toString('utf8')}`));
|
|
108
|
+
return;
|
|
109
|
+
} else if (type === 'Z') {
|
|
110
|
+
done(null, { sawSslReject, sawAuthOk });
|
|
111
|
+
return;
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
};
|
|
115
|
+
|
|
116
|
+
socket.on('connect', () => {
|
|
117
|
+
socket.write(Buffer.concat([sslRequest(), startupMessage()]));
|
|
118
|
+
});
|
|
119
|
+
socket.on('data', (chunk) => {
|
|
120
|
+
buffer = Buffer.concat([buffer, chunk]);
|
|
121
|
+
pump();
|
|
122
|
+
});
|
|
123
|
+
socket.on('error', done);
|
|
124
|
+
});
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
describe('daemon Unix control protocol', () => {
|
|
128
|
+
test('processes startup already buffered behind SSLRequest', async () => {
|
|
129
|
+
const dir = makeIsolated('coalesced');
|
|
130
|
+
const daemon = new PgserveDaemon({
|
|
131
|
+
controlSocketDir: dir,
|
|
132
|
+
controlSocketPath: resolveControlSocketPath(dir),
|
|
133
|
+
pidLockPath: resolvePidLockPath(dir),
|
|
134
|
+
pgPort: await freeTcpPort(),
|
|
135
|
+
logger: silentLogger(),
|
|
136
|
+
});
|
|
137
|
+
|
|
138
|
+
await daemon.start();
|
|
139
|
+
try {
|
|
140
|
+
const result = await connectWithCoalescedStartup(resolveControlSocketPath(dir));
|
|
141
|
+
expect(result).toEqual({ sawSslReject: true, sawAuthOk: true });
|
|
142
|
+
} finally {
|
|
143
|
+
await daemon.stop();
|
|
144
|
+
fs.rmSync(dir, { recursive: true, force: true });
|
|
145
|
+
}
|
|
146
|
+
});
|
|
147
|
+
|
|
148
|
+
test('processes startup after the admin client idles out', async () => {
|
|
149
|
+
const dir = makeIsolated('admin-idle');
|
|
150
|
+
const daemon = new PgserveDaemon({
|
|
151
|
+
controlSocketDir: dir,
|
|
152
|
+
controlSocketPath: resolveControlSocketPath(dir),
|
|
153
|
+
pidLockPath: resolvePidLockPath(dir),
|
|
154
|
+
pgPort: await freeTcpPort(),
|
|
155
|
+
adminIdleTimeout: 1,
|
|
156
|
+
adminLookupTimeoutMs: 1000,
|
|
157
|
+
logger: silentLogger(),
|
|
158
|
+
});
|
|
159
|
+
|
|
160
|
+
await daemon.start();
|
|
161
|
+
try {
|
|
162
|
+
await connectWithCoalescedStartup(resolveControlSocketPath(dir));
|
|
163
|
+
await Bun.sleep(1500);
|
|
164
|
+
const result = await connectWithCoalescedStartup(resolveControlSocketPath(dir));
|
|
165
|
+
expect(result).toEqual({ sawSslReject: true, sawAuthOk: true });
|
|
166
|
+
} finally {
|
|
167
|
+
await daemon.stop();
|
|
168
|
+
fs.rmSync(dir, { recursive: true, force: true });
|
|
169
|
+
}
|
|
170
|
+
});
|
|
171
|
+
});
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Stale postmaster.pid cleanup
|
|
3
|
+
*
|
|
4
|
+
* Verifies that PostgresManager._ensureNoStalePostmasterLock removes
|
|
5
|
+
* a postmaster.pid file whose recorded PID is no longer alive, and
|
|
6
|
+
* leaves alone a postmaster.pid whose recorded PID is alive.
|
|
7
|
+
*
|
|
8
|
+
* Regression coverage: postgres refuses to start when postmaster.pid
|
|
9
|
+
* exists, even if the writer crashed. After unclean shutdowns this
|
|
10
|
+
* required manual `rm` to recover.
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
import { PostgresManager } from '../src/postgres.js';
|
|
14
|
+
import { test, expect } from 'bun:test';
|
|
15
|
+
import fs from 'fs';
|
|
16
|
+
import path from 'path';
|
|
17
|
+
import os from 'os';
|
|
18
|
+
|
|
19
|
+
function makeMgr(dataDir) {
|
|
20
|
+
const mgr = new PostgresManager({ dataDir });
|
|
21
|
+
mgr.databaseDir = dataDir;
|
|
22
|
+
mgr.logger = {
|
|
23
|
+
info: () => {},
|
|
24
|
+
warn: () => {},
|
|
25
|
+
error: () => {},
|
|
26
|
+
debug: () => {},
|
|
27
|
+
};
|
|
28
|
+
return mgr;
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
function makePidFile(dir, contents) {
|
|
32
|
+
fs.mkdirSync(dir, { recursive: true });
|
|
33
|
+
const pidFile = path.join(dir, 'postmaster.pid');
|
|
34
|
+
fs.writeFileSync(pidFile, contents, 'utf-8');
|
|
35
|
+
return pidFile;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
test('removes postmaster.pid when recorded PID is dead', async () => {
|
|
39
|
+
const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'pgserve-stale-'));
|
|
40
|
+
try {
|
|
41
|
+
// PID 999999999 will not exist on any sane system
|
|
42
|
+
const pidFile = makePidFile(dir, '999999999\n/some/data\n123\n');
|
|
43
|
+
const mgr = makeMgr(dir);
|
|
44
|
+
await mgr._ensureNoStalePostmasterLock();
|
|
45
|
+
expect(fs.existsSync(pidFile)).toBe(false);
|
|
46
|
+
} finally {
|
|
47
|
+
fs.rmSync(dir, { recursive: true, force: true });
|
|
48
|
+
}
|
|
49
|
+
});
|
|
50
|
+
|
|
51
|
+
test('keeps postmaster.pid when recorded PID is the current (alive) process', async () => {
|
|
52
|
+
const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'pgserve-alive-'));
|
|
53
|
+
try {
|
|
54
|
+
const pidFile = makePidFile(dir, `${process.pid}\n/some/data\n123\n`);
|
|
55
|
+
const mgr = makeMgr(dir);
|
|
56
|
+
await mgr._ensureNoStalePostmasterLock();
|
|
57
|
+
expect(fs.existsSync(pidFile)).toBe(true);
|
|
58
|
+
} finally {
|
|
59
|
+
fs.rmSync(dir, { recursive: true, force: true });
|
|
60
|
+
}
|
|
61
|
+
});
|
|
62
|
+
|
|
63
|
+
test('removes postmaster.pid when first line is unparseable', async () => {
|
|
64
|
+
const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'pgserve-garbage-'));
|
|
65
|
+
try {
|
|
66
|
+
const pidFile = makePidFile(dir, 'garbage\nnot-a-pid\n');
|
|
67
|
+
const mgr = makeMgr(dir);
|
|
68
|
+
await mgr._ensureNoStalePostmasterLock();
|
|
69
|
+
expect(fs.existsSync(pidFile)).toBe(false);
|
|
70
|
+
} finally {
|
|
71
|
+
fs.rmSync(dir, { recursive: true, force: true });
|
|
72
|
+
}
|
|
73
|
+
});
|
|
74
|
+
|
|
75
|
+
test('no-ops when postmaster.pid does not exist', async () => {
|
|
76
|
+
const dir = fs.mkdtempSync(path.join(os.tmpdir(), 'pgserve-missing-'));
|
|
77
|
+
try {
|
|
78
|
+
const mgr = makeMgr(dir);
|
|
79
|
+
// Should resolve without throwing
|
|
80
|
+
await mgr._ensureNoStalePostmasterLock();
|
|
81
|
+
expect(fs.existsSync(path.join(dir, 'postmaster.pid'))).toBe(false);
|
|
82
|
+
} finally {
|
|
83
|
+
fs.rmSync(dir, { recursive: true, force: true });
|
|
84
|
+
}
|
|
85
|
+
});
|