bunsane 0.4.0 → 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -254,8 +254,13 @@ export async function doSave(entity: Entity, trx: SQL, signal?: AbortSignal): Pr
254
254
  (comp as any).setPersisted(true);
255
255
  (comp as any).setDirty(false);
256
256
  } else if ((comp as any)._dirty) {
257
+ // Full columns so the batched upsert below can encode every row
258
+ // through the same sql(arr, cols) path as the INSERT batch.
257
259
  componentsToUpdate.push({
258
260
  id: comp.id,
261
+ entity_id: entity.id,
262
+ name: compName,
263
+ type_id: comp.getTypeID(),
259
264
  data: comp.serializableData()
260
265
  });
261
266
  (comp as any).setDirty(false);
@@ -267,10 +272,15 @@ export async function doSave(entity: Entity, trx: SQL, signal?: AbortSignal): Pr
267
272
  await run(saveTrx`INSERT INTO components ${sql(componentsToInsert, 'id', 'entity_id', 'name', 'type_id', 'data')}`);
268
273
  }
269
274
 
270
- // Perform updates. Validate all ids up front (synchronous, fails
271
- // fast), then fire the UPDATEs together via Promise.all so they
272
- // pipeline on the transaction connection instead of paying one
273
- // serial round-trip per dirty component.
275
+ // Perform updates as a SINGLE batched upsert. Dirty components already
276
+ // exist (persisted, live), so the ON CONFLICT path always fires and
277
+ // updates `data` for every row in one round-trip — replacing the
278
+ // previous N sequential UPDATEs (N wire round-trips inside the txn).
279
+ // Conflict target is the (id, type_id) PRIMARY KEY, which contains the
280
+ // partition key `type_id` — required for ON CONFLICT on the partitioned
281
+ // `components` table. Reuses the same sql(arr, cols) encoder as the
282
+ // INSERT batch, so jsonb encoding is identical across PostgreSQL and
283
+ // PGlite. `created_at` is preserved (DO UPDATE only touches `data`).
274
284
  if (componentsToUpdate.length > 0) {
275
285
  const traceEnabled = logger.isLevelEnabled?.('trace') === true;
276
286
  for (const comp of componentsToUpdate) {
@@ -285,11 +295,7 @@ export async function doSave(entity: Entity, trx: SQL, signal?: AbortSignal): Pr
285
295
  logger.trace({ componentId: comp.id, data: comp.data }, `[Entity.doSave] Updating component`);
286
296
  }
287
297
  }
288
- await Promise.all(
289
- componentsToUpdate.map(comp =>
290
- run(saveTrx`UPDATE components SET data = ${comp.data} WHERE id = ${comp.id}`)
291
- )
292
- );
298
+ await run(saveTrx`INSERT INTO components ${sql(componentsToUpdate, 'id', 'entity_id', 'name', 'type_id', 'data')} ON CONFLICT (id, type_id) DO UPDATE SET data = EXCLUDED.data`);
293
299
  }
294
300
  };
295
301
 
@@ -322,19 +328,17 @@ export async function doDelete(entity: Entity, force: boolean = false): Promise<
322
328
 
323
329
  try {
324
330
  await db.transaction(async (trx) => {
325
- // Independent tables, no FK constraints pipeline the
326
- // statements on the transaction connection instead of paying
327
- // serial round-trips while holding the connection.
331
+ // Independent tables, no FK constraints. Issued sequentially:
332
+ // multiple concurrent in-flight queries on one connection
333
+ // deadlock single-backend servers (PGlite test harness), and a
334
+ // single wire serializes them anyway — Promise.all gave no real
335
+ // pipelining here.
328
336
  if (force) {
329
- await Promise.all([
330
- run(trx`DELETE FROM components WHERE entity_id = ${entity.id}`),
331
- run(trx`DELETE FROM entities WHERE id = ${entity.id}`),
332
- ]);
337
+ await run(trx`DELETE FROM components WHERE entity_id = ${entity.id}`);
338
+ await run(trx`DELETE FROM entities WHERE id = ${entity.id}`);
333
339
  } else {
334
- await Promise.all([
335
- run(trx`UPDATE entities SET deleted_at = CURRENT_TIMESTAMP WHERE id = ${entity.id} AND deleted_at IS NULL`),
336
- run(trx`UPDATE components SET deleted_at = CURRENT_TIMESTAMP WHERE entity_id = ${entity.id} AND deleted_at IS NULL`),
337
- ]);
340
+ await run(trx`UPDATE entities SET deleted_at = CURRENT_TIMESTAMP WHERE id = ${entity.id} AND deleted_at IS NULL`);
341
+ await run(trx`UPDATE components SET deleted_at = CURRENT_TIMESTAMP WHERE entity_id = ${entity.id} AND deleted_at IS NULL`);
338
342
  }
339
343
  });
340
344
  clearTimeout(timeoutHandle);
package/core/health.ts CHANGED
@@ -1,4 +1,5 @@
1
1
  import db from "../database";
2
+ import { runWithSignal } from "../database/cancellable";
2
3
  import { CacheManager } from "./cache/CacheManager";
3
4
 
4
5
  export interface CheckResult {
@@ -13,6 +14,14 @@ export interface HealthResponse {
13
14
  checks: {
14
15
  database: CheckResult;
15
16
  cache: CheckResult;
17
+ /**
18
+ * Present only when the DB write probe is enabled (default on).
19
+ * Exercises the real `db.transaction()` write path so a wedged write
20
+ * pool — a stuck pooled client or exhausted pool that leaves reads
21
+ * (`SELECT 1`) healthy — fails the liveness check and the orchestrator
22
+ * restarts the container instead of it serving 504s indefinitely.
23
+ */
24
+ database_write?: CheckResult;
16
25
  };
17
26
  }
18
27
 
@@ -24,6 +33,71 @@ export interface HealthResult {
24
33
  export interface HealthDeps {
25
34
  pingDb: () => Promise<boolean>;
26
35
  pingCache: () => Promise<boolean>;
36
+ /**
37
+ * Write-path probe. Optional: when omitted (e.g. tests passing custom
38
+ * deps) the write check is skipped and behavior matches the read-only
39
+ * health check. `defaultDeps` supplies the real probe.
40
+ */
41
+ pingDbWrite?: () => Promise<boolean>;
42
+ }
43
+
44
+ // Independent, short timeout for the write probe so a wedged write path is
45
+ // caught fast (and the container restarted) rather than blocking on the 30s
46
+ // request/save timeout. Configurable via DB_HEALTH_WRITE_TIMEOUT.
47
+ const WRITE_PROBE_TIMEOUT_MS = parseInt(process.env.DB_HEALTH_WRITE_TIMEOUT ?? "5000", 10);
48
+
49
+ function writeProbeDisabled(): boolean {
50
+ return process.env.HEALTH_DB_WRITE_PROBE === "false";
51
+ }
52
+
53
+ /**
54
+ * Exercises a genuine write through the same `db.transaction()` acquisition
55
+ * path `Entity.save` uses. A wedged write pool (stuck pooled client, pool
56
+ * exhausted by leaked transactions) hangs here while `SELECT 1` stays healthy
57
+ * on any idle read connection — exactly the false-healthy scenario that kept a
58
+ * timed-out container "healthy" and unrestarted.
59
+ *
60
+ * The whole transaction is raced against an independent timeout so even a hang
61
+ * during connection *acquisition* (which runWithSignal alone cannot interrupt,
62
+ * since it only wraps in-flight queries) is caught. The temp table is dropped
63
+ * at COMMIT, so the probe has no persistent side effect.
64
+ */
65
+ async function probeDbWrite(): Promise<boolean> {
66
+ const timeoutMs = WRITE_PROBE_TIMEOUT_MS;
67
+ const controller = new AbortController();
68
+ let handle: ReturnType<typeof setTimeout> | undefined;
69
+ const timeoutPromise = new Promise<never>((_, reject) => {
70
+ handle = setTimeout(() => {
71
+ const err = new Error(`DB write health probe timeout after ${timeoutMs}ms`);
72
+ controller.abort(err);
73
+ reject(err);
74
+ }, timeoutMs);
75
+ (handle as any).unref?.();
76
+ });
77
+
78
+ const txn = db.transaction(async (trx) => {
79
+ await runWithSignal(
80
+ trx`CREATE TEMP TABLE IF NOT EXISTS _bunsane_health_write (probed_at timestamptz NOT NULL) ON COMMIT DROP`,
81
+ controller.signal,
82
+ );
83
+ await runWithSignal(
84
+ trx`INSERT INTO _bunsane_health_write (probed_at) VALUES (now())`,
85
+ controller.signal,
86
+ );
87
+ });
88
+
89
+ try {
90
+ await Promise.race([txn, timeoutPromise]);
91
+ return true;
92
+ } finally {
93
+ if (handle) clearTimeout(handle);
94
+ // Abort any in-flight query so the transaction rolls back and the
95
+ // pooled connection is released even when the timeout won the race.
96
+ if (!controller.signal.aborted) controller.abort();
97
+ // Swallow a late transaction settle after a lost race so it cannot
98
+ // surface as an unhandled rejection.
99
+ Promise.resolve(txn).catch(() => { /* ignore post-timeout settle */ });
100
+ }
27
101
  }
28
102
 
29
103
  const defaultDeps: HealthDeps = {
@@ -32,6 +106,7 @@ const defaultDeps: HealthDeps = {
32
106
  return true;
33
107
  },
34
108
  pingCache: () => CacheManager.getInstance().ping(),
109
+ pingDbWrite: probeDbWrite,
35
110
  };
36
111
 
37
112
  async function checkDatabase(pingDb: () => Promise<boolean>): Promise<CheckResult> {
@@ -55,24 +130,30 @@ async function checkCache(pingCache: () => Promise<boolean>): Promise<CheckResul
55
130
  }
56
131
 
57
132
  export async function deepHealthCheck(deps: HealthDeps = defaultDeps): Promise<HealthResult> {
58
- const [database, cache] = await Promise.all([
133
+ const runWrite = !!deps.pingDbWrite && !writeProbeDisabled();
134
+
135
+ const [database, cache, databaseWrite] = await Promise.all([
59
136
  checkDatabase(deps.pingDb),
60
137
  checkCache(deps.pingCache),
138
+ runWrite ? checkDatabase(deps.pingDbWrite!) : Promise.resolve(undefined),
61
139
  ]);
62
140
 
63
141
  const dbUp = database.status === "up";
142
+ const writeUp = !databaseWrite || databaseWrite.status === "up";
64
143
  const cacheUp = cache.status === "up";
65
144
 
66
145
  let status: HealthResponse["status"];
67
146
  let httpStatus: number;
68
147
 
69
- if (dbUp && cacheUp) {
148
+ if (dbUp && writeUp && cacheUp) {
70
149
  status = "ok";
71
150
  httpStatus = 200;
72
- } else if (dbUp && !cacheUp) {
151
+ } else if (dbUp && writeUp && !cacheUp) {
73
152
  status = "degraded";
74
153
  httpStatus = 200;
75
154
  } else {
155
+ // DB read OR write down → unavailable. A wedged write path (reads fine,
156
+ // writes hang) lands here so liveness fails and the container restarts.
76
157
  status = "unavailable";
77
158
  httpStatus = 503;
78
159
  }
@@ -82,7 +163,11 @@ export async function deepHealthCheck(deps: HealthDeps = defaultDeps): Promise<H
82
163
  status,
83
164
  timestamp: new Date().toISOString(),
84
165
  uptime: process.uptime(),
85
- checks: { database, cache },
166
+ checks: {
167
+ database,
168
+ cache,
169
+ ...(databaseWrite ? { database_write: databaseWrite } : {}),
170
+ },
86
171
  },
87
172
  httpStatus,
88
173
  };
@@ -94,6 +179,7 @@ export async function readinessCheck(
94
179
  deps: HealthDeps = defaultDeps,
95
180
  ): Promise<HealthResult> {
96
181
  if (!isReady || isShuttingDown) {
182
+ const includeWrite = !!deps.pingDbWrite && !writeProbeDisabled();
97
183
  return {
98
184
  result: {
99
185
  status: "unavailable",
@@ -102,6 +188,9 @@ export async function readinessCheck(
102
188
  checks: {
103
189
  database: { status: "unknown", latency_ms: 0 },
104
190
  cache: { status: "unknown", latency_ms: 0 },
191
+ ...(includeWrite
192
+ ? { database_write: { status: "unknown", latency_ms: 0 } }
193
+ : {}),
105
194
  },
106
195
  },
107
196
  httpStatus: 503,