pgserve 2.3.0 → 2.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/gc.js DELETED
@@ -1,351 +0,0 @@
1
- /**
2
- * pgserve GC — 3-layer lifecycle sweep (Group 5).
3
- *
4
- * Decides which user databases to reap based on:
5
- * 1. `persist=true` — exempt from GC, audited as `db_persist_honored`.
6
- * 2. Liveness — if `liveness_pid` points at a running process, slide
7
- * `last_connection_at` forward to "now" (the peer is alive, the row is
8
- * a heartbeat) and never reap.
9
- * 3. TTL — peer is gone AND `now - last_connection_at > ttlMs` (default
10
- * 24h) → `DROP DATABASE`, delete the meta row, audit reap event.
11
- *
12
- * Audit reap event is `db_reaped_liveness` when the row had a non-null
13
- * liveness_pid that is now dead, otherwise `db_reaped_ttl` (the row never
14
- * registered a liveness_pid — pure idle expiry).
15
- *
16
- * `installSweepTriggers(daemon, …)` wires the three call sites:
17
- * - boot: a single sweep right after the daemon is listening, with a
18
- * summary log line so operators see GC activity at startup.
19
- * - hourly `setInterval` (configurable via `intervalMs`).
20
- * - on-connect sampling: subscribe to the daemon's `'accept'` event and
21
- * fire `gcSweep` async at rate 1/N where `N = max(1, dbCount/10)`. The
22
- * listener never awaits the sweep, so accept latency is unaffected.
23
- */
24
-
25
- import { audit, AUDIT_EVENTS } from './audit.js';
26
- import { forEachReapable, deleteMetaRow, touchLastConnection } from './control-db.js';
27
-
28
- const TTL_MS_DEFAULT = 24 * 60 * 60 * 1000;
29
- const HOURLY_MS = 60 * 60 * 1000;
30
-
31
- /**
32
- * Default liveness probe — POSIX `kill(pid, 0)` returns 0 if the process is
33
- * alive, throws ESRCH if gone, EPERM if owned by another user (still alive).
34
- *
35
- * @param {number|null|undefined} pid
36
- * @returns {boolean}
37
- */
38
- function defaultIsProcessAlive(pid) {
39
- if (!Number.isInteger(pid) || pid <= 0) return false;
40
- try {
41
- process.kill(pid, 0);
42
- return true;
43
- } catch (err) {
44
- return err.code === 'EPERM';
45
- }
46
- }
47
-
48
- /**
49
- * @typedef {object} GcSweepOptions
50
- * @property {{query: Function}} adminClient — pgserve admin DB connection
51
- * @property {{adminPool: any, createdDatabases?: Set<string>}} [pgManager] —
52
- * optional; used to evict from the in-process createdDatabases cache after
53
- * a successful DROP. Tests can omit; gcSweep always falls back to the
54
- * adminClient's `query()` for the actual DROP.
55
- * @property {number|Date} [now]
56
- * @property {number} [ttlMs] — defaults to 24h
57
- * @property {boolean} [dryRun] — when true, never DROP / DELETE / audit reap
58
- * @property {(pid: number|null|undefined) => boolean} [isProcessAlive]
59
- * @property {{warn?: Function, info?: Function, error?: Function, debug?: Function}} [logger]
60
- */
61
-
62
- /**
63
- * @typedef {object} GcSweepResult
64
- * @property {number} examined
65
- * @property {number} reaped
66
- * @property {number} kept
67
- * @property {number} persistSkipped
68
- * @property {number} aliveSkipped
69
- * @property {string[]} reapedNames
70
- */
71
-
72
- /**
73
- * Run one GC sweep. Returns counts so callers can log a summary or assert
74
- * in tests.
75
- *
76
- * @param {GcSweepOptions} opts
77
- * @returns {Promise<GcSweepResult>}
78
- */
79
- export async function gcSweep({
80
- adminClient,
81
- pgManager = null,
82
- now = new Date(),
83
- ttlMs = TTL_MS_DEFAULT,
84
- dryRun = false,
85
- isProcessAlive = defaultIsProcessAlive,
86
- logger,
87
- } = {}) {
88
- if (!adminClient) throw new Error('gcSweep: adminClient required');
89
-
90
- const nowMs = now instanceof Date ? now.getTime() : Number(now);
91
- if (!Number.isFinite(nowMs)) throw new Error('gcSweep: now must be Date or numeric ms');
92
-
93
- const result = {
94
- examined: 0,
95
- reaped: 0,
96
- kept: 0,
97
- persistSkipped: 0,
98
- aliveSkipped: 0,
99
- reapedNames: [],
100
- };
101
-
102
- // Snapshot so we don't iterate while we DELETE — pg's async iterator
103
- // protocols vary across drivers, but materialising 240 rows is cheap and
104
- // sidesteps any cursor-vs-DELETE quirks.
105
- const candidates = [];
106
- for await (const row of forEachReapable(adminClient)) {
107
- candidates.push(row);
108
- }
109
-
110
- for (const row of candidates) {
111
- result.examined += 1;
112
-
113
- // Persist=true rows never appear from forEachReapable (the query filters
114
- // them out), but if the schema changes that contract we still defend
115
- // here — and emit the audit event the wish promises.
116
- if (row.persist) {
117
- result.persistSkipped += 1;
118
- result.kept += 1;
119
- if (!dryRun) {
120
- audit(AUDIT_EVENTS.DB_PERSIST_HONORED, {
121
- database: row.databaseName,
122
- fingerprint: row.fingerprint,
123
- });
124
- }
125
- continue;
126
- }
127
-
128
- const livenessPid = row.livenessPid;
129
- const hadLivenessPid = Number.isInteger(livenessPid) && livenessPid > 0;
130
- const alive = hadLivenessPid && isProcessAlive(livenessPid);
131
-
132
- if (alive) {
133
- result.aliveSkipped += 1;
134
- result.kept += 1;
135
- if (!dryRun) {
136
- // Slide the window: an alive process means the row is effectively
137
- // current, even if the pgserve_meta last_connection_at value lags.
138
- try {
139
- await touchLastConnection(adminClient, {
140
- databaseName: row.databaseName,
141
- livenessPid,
142
- });
143
- } catch (err) {
144
- logger?.warn?.(
145
- { err: err?.message || String(err), database: row.databaseName },
146
- 'gcSweep: touchLastConnection failed for live row (non-fatal)',
147
- );
148
- }
149
- }
150
- continue;
151
- }
152
-
153
- const lastMs = row.lastConnectionAt instanceof Date
154
- ? row.lastConnectionAt.getTime()
155
- : Number(row.lastConnectionAt);
156
- const ageMs = Number.isFinite(lastMs) ? nowMs - lastMs : Infinity;
157
-
158
- if (ageMs <= ttlMs) {
159
- result.kept += 1;
160
- continue;
161
- }
162
-
163
- if (dryRun) {
164
- result.reaped += 1;
165
- result.reapedNames.push(row.databaseName);
166
- continue;
167
- }
168
-
169
- try {
170
- await dropDatabaseSafely(adminClient, row.databaseName, logger);
171
- pgManager?.createdDatabases?.delete(row.databaseName);
172
- await deleteMetaRow(adminClient, row.databaseName);
173
- const reapEvent = hadLivenessPid
174
- ? AUDIT_EVENTS.DB_REAPED_LIVENESS
175
- : AUDIT_EVENTS.DB_REAPED_TTL;
176
- audit(reapEvent, {
177
- database: row.databaseName,
178
- fingerprint: row.fingerprint,
179
- last_connection_at: row.lastConnectionAt instanceof Date
180
- ? row.lastConnectionAt.toISOString()
181
- : row.lastConnectionAt,
182
- liveness_pid: livenessPid ?? null,
183
- age_ms: Number.isFinite(ageMs) ? ageMs : null,
184
- });
185
- result.reaped += 1;
186
- result.reapedNames.push(row.databaseName);
187
- } catch (err) {
188
- logger?.error?.(
189
- { err: err?.message || String(err), database: row.databaseName },
190
- 'gcSweep: failed to reap database',
191
- );
192
- }
193
- }
194
-
195
- return result;
196
- }
197
-
198
- async function dropDatabaseSafely(adminClient, databaseName, logger) {
199
- const escaped = `"${databaseName.replace(/"/g, '""')}"`;
200
- // Terminate any lingering backends so DROP DATABASE doesn't refuse with
201
- // 55006 (object_in_use). The peer's pgserve daemon socket is already gone
202
- // (liveness dead) but Postgres can hold idle backends a while longer.
203
- try {
204
- await adminClient.query(
205
- `SELECT pg_terminate_backend(pid)
206
- FROM pg_stat_activity
207
- WHERE datname = $1 AND pid <> pg_backend_pid()`,
208
- [databaseName],
209
- );
210
- } catch (err) {
211
- logger?.debug?.(
212
- { err: err?.message || String(err), database: databaseName },
213
- 'gcSweep: pg_terminate_backend failed (non-fatal)',
214
- );
215
- }
216
- await adminClient.query(`DROP DATABASE IF EXISTS ${escaped}`);
217
- }
218
-
219
- /**
220
- * Wire the three sweep call sites onto a running daemon.
221
- *
222
- * Returns a `{stop()}` handle so tests (and `daemon.stop()`) can detach.
223
- *
224
- * @param {object} daemon — PgserveDaemon instance
225
- * @param {object} [opts]
226
- * @param {{query: Function}} [opts.adminClient] — defaults to daemon._adminClient
227
- * @param {number} [opts.intervalMs] — hourly default; pass 0 to disable
228
- * @param {number} [opts.ttlMs]
229
- * @param {(pid: number) => boolean} [opts.isProcessAlive]
230
- * @param {() => Promise<number>|number} [opts.getDbCount] — defaults to a
231
- * COUNT(*) query against pgserve_meta
232
- * @param {boolean} [opts.bootSweep=true]
233
- * @returns {{stop: () => Promise<void>, sweep: () => Promise<GcSweepResult>}}
234
- */
235
- export function installSweepTriggers(daemon, opts = {}) {
236
- const adminClient = opts.adminClient || daemon._adminClient;
237
- if (!adminClient) {
238
- throw new Error('installSweepTriggers: daemon has no admin client');
239
- }
240
- const intervalMs = opts.intervalMs == null ? HOURLY_MS : opts.intervalMs;
241
- const ttlMs = opts.ttlMs == null ? TTL_MS_DEFAULT : opts.ttlMs;
242
- const logger = daemon.logger;
243
- const pgManager = daemon.pgManager;
244
- const isProcessAlive = opts.isProcessAlive || defaultIsProcessAlive;
245
- const getDbCount = opts.getDbCount || (async () => {
246
- try {
247
- const r = await adminClient.query('SELECT count(*)::int AS n FROM pgserve_meta');
248
- return r.rows?.[0]?.n ?? 0;
249
- } catch {
250
- return 0;
251
- }
252
- });
253
-
254
- let stopped = false;
255
- let inflight = false;
256
- let lastDbCount = 0;
257
-
258
- const runSweep = async () => {
259
- if (stopped) return null;
260
- if (inflight) return null;
261
- inflight = true;
262
- try {
263
- const res = await gcSweep({
264
- adminClient,
265
- pgManager,
266
- now: new Date(),
267
- ttlMs,
268
- isProcessAlive,
269
- logger,
270
- });
271
- lastDbCount = Math.max(0, lastDbCount - res.reaped);
272
- return res;
273
- } catch (err) {
274
- logger?.error?.(
275
- { err: err?.message || String(err) },
276
- 'gcSweep failed',
277
- );
278
- return null;
279
- } finally {
280
- inflight = false;
281
- }
282
- };
283
-
284
- let timer = null;
285
- if (intervalMs > 0) {
286
- timer = setInterval(() => {
287
- void runSweep();
288
- }, intervalMs);
289
- if (typeof timer.unref === 'function') timer.unref();
290
- }
291
-
292
- const acceptListener = () => {
293
- // Sample 1/N where N = max(1, ceil(dbCount/10)). Always async and
294
- // detached so accept latency isn't blocked.
295
- const n = Math.max(1, Math.ceil(lastDbCount / 10));
296
- if (n === 1 || Math.random() * n < 1) {
297
- setImmediate(() => {
298
- if (stopped) return;
299
- // Refresh count opportunistically before each sweep so on-connect
300
- // sampling tracks the live row count without polling.
301
- Promise.resolve(getDbCount())
302
- .then((c) => { lastDbCount = Number(c) || 0; })
303
- .then(runSweep)
304
- .catch(() => { /* swallowed by runSweep */ });
305
- });
306
- }
307
- };
308
- daemon.on?.('accept', acceptListener);
309
-
310
- const handle = {
311
- sweep: runSweep,
312
- async stop() {
313
- stopped = true;
314
- if (timer) {
315
- clearInterval(timer);
316
- timer = null;
317
- }
318
- daemon.off?.('accept', acceptListener);
319
- },
320
- };
321
-
322
- if (opts.bootSweep !== false) {
323
- // Boot sweep + count refresh + summary log. Detached so we don't block
324
- // start() — the daemon is already listening at this point.
325
- setImmediate(async () => {
326
- try {
327
- lastDbCount = Number(await getDbCount()) || 0;
328
- const res = await runSweep();
329
- if (res) {
330
- logger?.info?.(
331
- {
332
- examined: res.examined,
333
- reaped: res.reaped,
334
- kept: res.kept,
335
- persist_skipped: res.persistSkipped,
336
- alive_skipped: res.aliveSkipped,
337
- },
338
- 'pgserve GC: boot sweep complete',
339
- );
340
- }
341
- } catch (err) {
342
- logger?.warn?.(
343
- { err: err?.message || String(err) },
344
- 'pgserve GC: boot sweep failed',
345
- );
346
- }
347
- });
348
- }
349
-
350
- return handle;
351
- }