bunsane 0.5.6 → 0.5.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -70,6 +70,7 @@ export class SchedulerManager {
70
70
  enableLogging: this.config.enableLogging,
71
71
  lockTimeout: this.config.lockTimeout ?? 0,
72
72
  retryInterval: this.config.lockRetryInterval ?? 100,
73
+ backend: this.config.lockBackend,
73
74
  });
74
75
 
75
76
  initializeLifecycleIntegration(this);
@@ -1,29 +1,32 @@
1
1
  /**
2
- * Distributed Lock using PostgreSQL Advisory Locks
2
+ * DistributedLock task-keyed mutual exclusion over a pluggable
3
+ * {@link LockBackend}.
3
4
  *
4
- * PostgreSQL advisory locks are session-level (bound to the connection that
5
- * acquired them). Bun's SQL pool hands out a different connection per query,
6
- * so naively calling `pg_try_advisory_lock` on the pooled client leaves the
7
- * lock stranded on whichever connection was used — `pg_advisory_unlock` on a
8
- * different connection silently returns `false` and the lock is held until
9
- * that connection eventually closes.
5
+ * This class is the stable, task-id-oriented facade the scheduler and
6
+ * {@link withLock} use. The actual locking mechanism is delegated to a backend:
10
7
  *
11
- * Fix: reserve a dedicated connection via `sql.reserve()` once per instance
12
- * and route every lock/unlock query through it. All locks owned by this
13
- * instance live in one PostgreSQL session, so unlock always hits the session
14
- * that acquired the lock. If the process crashes, PostgreSQL terminates the
15
- * session and every held lock is released automatically — no cleanup needed.
8
+ * - `advisory` — PostgreSQL session advisory locks (historical default;
9
+ * only safe with a session-pinned connection see BUNSANE-1).
10
+ * - `in-process` in-memory, single instance / test double.
11
+ * - `postgres` — pooler-safe lease table (Phase 2).
12
+ * - `redis` — `SET NX PX` lease (Phase 4).
16
13
  *
17
- * The reservation is lazy (acquired on first use) and released when either
18
- * `releaseAll()` is called or no locks remain outstanding, so idle instances
19
- * do not permanently consume a pool slot.
14
+ * Select via `config.backend`, the `BUNSANE_LOCK_BACKEND` env var, or leave it
15
+ * `'auto'`. The facade keeps a per-instance map of held handles so the public
16
+ * API (`isHeld`, `getHeldLockCount`, …) is unchanged from the advisory-only
17
+ * era.
20
18
  *
21
- * @see https://www.postgresql.org/docs/current/explicit-locking.html#ADVISORY-LOCKS
19
+ * @see core/scheduler/locks/LockBackend.ts
22
20
  */
23
21
 
24
- import type { ReservedSQL } from "bun";
25
- import db from "../../database";
26
22
  import { logger } from "../Logger";
23
+ import {
24
+ createLockBackend,
25
+ UnsafeAdvisoryPoolingError,
26
+ type LockBackend,
27
+ type LockBackendKind,
28
+ type LockHandle,
29
+ } from "./locks";
27
30
 
28
31
  const loggerInstance = logger.child({ scope: "DistributedLock" });
29
32
 
@@ -41,102 +44,88 @@ export interface DistributedLockConfig {
41
44
  lockTimeout: number;
42
45
  /** Retry interval when lockTimeout > 0 */
43
46
  retryInterval: number;
47
+ /** Which {@link LockBackend} to use. Omitted → env / `'auto'`. */
48
+ backend?: LockBackendKind;
49
+ /** Lease lifetime in ms for lease backends (ignored by advisory). */
50
+ leaseTtlMs: number;
44
51
  }
45
52
 
46
53
  export const DEFAULT_LOCK_CONFIG: DistributedLockConfig = {
47
54
  enabled: true,
48
- lockKeyPrefix: 0x42554E53, // "BUNS" in hex as a namespace prefix
55
+ lockKeyPrefix: 0x42554e53, // "BUNS" in hex as a namespace prefix
49
56
  enableLogging: false,
50
57
  lockTimeout: 0,
51
58
  retryInterval: 100,
59
+ leaseTtlMs: 30_000,
52
60
  };
53
61
 
62
+ interface HeldLease {
63
+ handle: LockHandle;
64
+ /** TTL used at acquire time; reused for heartbeat renewals. */
65
+ ttlMs: number;
66
+ }
67
+
54
68
  export class DistributedLock {
55
69
  private config: DistributedLockConfig;
56
- private heldLocks: Set<string> = new Set();
57
- private reservedConn: ReservedSQL | null = null;
58
- private reservePromise: Promise<ReservedSQL> | null = null;
70
+ private backend: LockBackend;
71
+ /** Held leases keyed by taskId (one logical lock per task per instance). */
72
+ private heldLocks: Map<string, HeldLease> = new Map();
73
+ /** Count of releases/renews that found the lease already lost (stranded). */
74
+ private lostLeases = 0;
59
75
 
60
76
  constructor(config: Partial<DistributedLockConfig> = {}) {
61
77
  this.config = { ...DEFAULT_LOCK_CONFIG, ...config };
78
+ this.backend = this.makeBackend();
79
+ }
80
+
81
+ private makeBackend(): LockBackend {
82
+ return createLockBackend({
83
+ kind: this.config.backend,
84
+ lockKeyPrefix: this.config.lockKeyPrefix,
85
+ enableLogging: this.config.enableLogging,
86
+ });
62
87
  }
63
88
 
89
+ /**
90
+ * Stable bigint id for a task, used only for the {@link LockResult.lockKey}
91
+ * field (logs/events). Matches the advisory backend's own hash so a
92
+ * reported key lines up with the underlying advisory id when that backend
93
+ * is active. NOTE (BUNSANE-4): 32-bit space → possible collisions; this is
94
+ * an observability id, not the source of exclusion.
95
+ */
64
96
  private generateLockKey(taskId: string): bigint {
65
97
  let hash = 0;
66
98
  for (let i = 0; i < taskId.length; i++) {
67
99
  const char = taskId.charCodeAt(i);
68
- hash = ((hash << 5) - hash) + char;
100
+ hash = (hash << 5) - hash + char;
69
101
  hash = hash & hash;
70
102
  }
71
103
  hash = Math.abs(hash);
72
-
73
104
  const prefix = BigInt(this.config.lockKeyPrefix);
74
105
  const hashBigInt = BigInt(hash >>> 0);
75
106
  return (prefix << 32n) | hashBigInt;
76
107
  }
77
108
 
78
- /**
79
- * Lazily reserve one dedicated connection that owns every advisory lock
80
- * this instance takes. Concurrent callers share the same reservation via
81
- * `reservePromise`.
82
- */
83
- private async ensureReserved(): Promise<ReservedSQL> {
84
- if (this.reservedConn) return this.reservedConn;
85
- if (!this.reservePromise) {
86
- // On reject (pool exhausted, shutdown mid-reserve), null the
87
- // promise so subsequent callers retry a fresh reserve instead of
88
- // receiving the same rejected promise forever (H-DB-2).
89
- this.reservePromise = db.reserve().then(
90
- (conn) => {
91
- this.reservedConn = conn;
92
- this.reservePromise = null;
93
- return conn;
94
- },
95
- (err) => {
96
- this.reservePromise = null;
97
- throw err;
98
- }
99
- );
100
- }
101
- return this.reservePromise;
102
- }
103
-
104
- /**
105
- * Release the pinned connection back to the pool. Only safe when no
106
- * advisory locks are currently held on this instance — otherwise the
107
- * session would be closed and locks forfeited.
108
- */
109
- private releaseReservation(): void {
110
- if (!this.reservedConn) return;
111
- try {
112
- this.reservedConn.release();
113
- } catch (error) {
114
- loggerInstance.warn(
115
- `Failed to release reserved connection: ${error instanceof Error ? error.message : String(error)}`
116
- );
117
- }
118
- this.reservedConn = null;
119
- }
120
-
121
109
  /**
122
110
  * Try to acquire a distributed lock for a task. Non-blocking when
123
111
  * `lockTimeout` is 0 (default); retries every `retryInterval` ms up to
124
112
  * `lockTimeout` otherwise.
125
113
  */
126
- async tryAcquire(taskId: string): Promise<LockResult> {
114
+ async tryAcquire(
115
+ taskId: string,
116
+ ttlMs: number = this.config.leaseTtlMs
117
+ ): Promise<LockResult> {
118
+ const lockKey = this.generateLockKey(taskId);
119
+
127
120
  if (!this.config.enabled) {
128
121
  return { acquired: true, lockKey: 0n, taskId };
129
122
  }
130
123
 
131
- const lockKey = this.generateLockKey(taskId);
132
-
133
124
  if (this.heldLocks.has(taskId)) {
134
- // Defense in depth: if this instance already holds the lock for
135
- // taskId, a second concurrent acquirer would mean overlapping
136
- // execution (retry firing while previous run is still in the
137
- // finally release step, for example). Return acquired:false so
138
- // the second caller skips, even if caller-side guards missed it.
139
- // (H-SCHED-4).
125
+ // Defense in depth: this instance already holds the lock. A second
126
+ // concurrent acquirer means overlapping execution (e.g. a retry
127
+ // firing before the prior run hit its release). Report contention
128
+ // even if caller-side guards missed it (H-SCHED-4).
140
129
  if (this.config.enableLogging) {
141
130
  loggerInstance.debug(
142
131
  `Lock for ${taskId} already held locally — reporting overlap (acquired:false)`
@@ -148,63 +137,58 @@ export class DistributedLock {
148
137
  const startTime = Date.now();
149
138
 
150
139
  try {
151
- const conn = await this.ensureReserved();
152
-
153
- let acquired = await this.attemptLock(conn, lockKey);
140
+ let handle = await this.backend.acquire(taskId, { ttlMs });
154
141
 
155
- if (!acquired && this.config.lockTimeout > 0) {
142
+ if (!handle && this.config.lockTimeout > 0) {
156
143
  while (
157
- !acquired &&
144
+ !handle &&
158
145
  Date.now() - startTime < this.config.lockTimeout
159
146
  ) {
160
147
  await this.sleep(this.config.retryInterval);
161
- acquired = await this.attemptLock(conn, lockKey);
148
+ handle = await this.backend.acquire(taskId, { ttlMs });
162
149
  }
163
150
  }
164
151
 
165
- if (acquired) {
166
- this.heldLocks.add(taskId);
152
+ if (handle) {
153
+ this.heldLocks.set(taskId, { handle, ttlMs });
167
154
  if (this.config.enableLogging) {
168
155
  loggerInstance.debug(
169
- `Acquired lock for task ${taskId} (key: ${lockKey})`
156
+ `Acquired lock for task ${taskId} (key: ${lockKey}, backend: ${this.backend.name})`
170
157
  );
171
158
  }
172
159
  return { acquired: true, lockKey, taskId };
173
160
  }
174
161
 
175
- // No locks taken on this attempt — if nothing else is held,
176
- // return the reserved connection to the pool.
177
- if (this.heldLocks.size === 0) {
178
- this.releaseReservation();
179
- }
180
-
181
162
  if (this.config.enableLogging) {
182
163
  loggerInstance.debug(
183
- `Failed to acquire lock for task ${taskId} (key: ${lockKey}) — another instance is executing`
164
+ `Failed to acquire lock for task ${taskId} (key: ${lockKey}) — another holder is executing`
184
165
  );
185
166
  }
186
167
  return { acquired: false, lockKey, taskId };
187
168
  } catch (error) {
169
+ // An unsafe-pooling config is NOT a transient lock failure — never
170
+ // degrade it to a silent {acquired:false}. Fail loud (BUNSANE-7).
171
+ if (error instanceof UnsafeAdvisoryPoolingError) {
172
+ throw error;
173
+ }
188
174
  loggerInstance.error(
189
175
  `Error acquiring lock for task ${taskId}: ${error instanceof Error ? error.message : String(error)}`
190
176
  );
191
- if (this.heldLocks.size === 0) {
192
- this.releaseReservation();
193
- }
194
177
  return { acquired: false, lockKey, taskId };
195
178
  }
196
179
  }
197
180
 
198
181
  /**
199
- * Release a single distributed lock. When the last lock is released the
200
- * reserved connection is returned to the pool.
182
+ * Release a single distributed lock. Returns `true` only if a genuinely
183
+ * held lock was released.
201
184
  */
202
185
  async release(taskId: string): Promise<boolean> {
203
186
  if (!this.config.enabled) {
204
187
  return true;
205
188
  }
206
189
 
207
- if (!this.heldLocks.has(taskId)) {
190
+ const held = this.heldLocks.get(taskId);
191
+ if (!held) {
208
192
  if (this.config.enableLogging) {
209
193
  loggerInstance.warn(
210
194
  `Lock for task ${taskId} was not held or already released`
@@ -213,63 +197,84 @@ export class DistributedLock {
213
197
  return false;
214
198
  }
215
199
 
216
- const lockKey = this.generateLockKey(taskId);
200
+ // Drop local ownership first so a failed backend release can't leave a
201
+ // phantom entry that blocks re-acquire forever.
202
+ this.heldLocks.delete(taskId);
217
203
 
218
- if (!this.reservedConn) {
219
- loggerInstance.warn(
220
- `No reserved connection available for ${taskId}; dropping from heldLocks`
204
+ try {
205
+ const released = await this.backend.release(held.handle);
206
+ if (!released) {
207
+ // A false release is the canonical stranded/lost-lease signal
208
+ // (BUNSANE-1/2). Surface it LOUDLY (ERROR) and count it — this
209
+ // is the single highest-signal symptom of a broken lock.
210
+ this.lostLeases++;
211
+ loggerInstance.error(
212
+ `Lock release for task ${taskId} returned false (backend: ${this.backend.name}) — lease was lost/stranded; the critical section may have run without exclusion`
213
+ );
214
+ } else if (this.config.enableLogging) {
215
+ loggerInstance.debug(`Released lock for task ${taskId}`);
216
+ }
217
+ return released;
218
+ } catch (error) {
219
+ loggerInstance.error(
220
+ `Error releasing lock for task ${taskId}: ${error instanceof Error ? error.message : String(error)}`
221
221
  );
222
- this.heldLocks.delete(taskId);
223
222
  return false;
224
223
  }
224
+ }
225
225
 
226
- try {
227
- const result = await this.reservedConn`
228
- SELECT pg_advisory_unlock(${lockKey}::bigint) as pg_advisory_unlock
229
- `;
230
- const released = result[0]?.pg_advisory_unlock ?? false;
226
+ /**
227
+ * Renew the lease for a held task (heartbeat). Returns `true` if the lease
228
+ * is still ours and was extended. A `false` means the lease lapsed and was
229
+ * stolen mid-execution — surfaced as ERROR + counted. Backends without a
230
+ * lease (advisory: session-bound, never lapses while held) report `true`.
231
+ */
232
+ async renew(taskId: string): Promise<boolean> {
233
+ if (!this.config.enabled) return true;
231
234
 
232
- this.heldLocks.delete(taskId);
235
+ const held = this.heldLocks.get(taskId);
236
+ if (!held) return false;
233
237
 
234
- if (released && this.config.enableLogging) {
235
- loggerInstance.debug(
236
- `Released lock for task ${taskId} (key: ${lockKey})`
237
- );
238
- } else if (!released) {
239
- loggerInstance.warn(
240
- `pg_advisory_unlock returned false for task ${taskId} (key: ${lockKey})`
241
- );
242
- }
238
+ // Session-bound backends have no renew → the lock cannot lapse while
239
+ // held, so treat as a successful no-op.
240
+ if (!this.backend.renew) return true;
243
241
 
244
- if (this.heldLocks.size === 0) {
245
- this.releaseReservation();
242
+ try {
243
+ const ok = await this.backend.renew(held.handle, held.ttlMs);
244
+ if (!ok) {
245
+ this.lostLeases++;
246
+ this.heldLocks.delete(taskId);
247
+ loggerInstance.error(
248
+ `Lease renewal failed for task ${taskId} (backend: ${this.backend.name}) — lease expired and was stolen; the critical section is NO LONGER protected`
249
+ );
246
250
  }
247
- return released;
251
+ return ok;
248
252
  } catch (error) {
249
253
  loggerInstance.error(
250
- `Error releasing lock for task ${taskId}: ${error instanceof Error ? error.message : String(error)}`
254
+ `Error renewing lease for task ${taskId}: ${error instanceof Error ? error.message : String(error)}`
251
255
  );
252
- this.heldLocks.delete(taskId);
253
- if (this.heldLocks.size === 0) {
254
- this.releaseReservation();
255
- }
256
256
  return false;
257
257
  }
258
258
  }
259
259
 
260
+ /** Total leases observed lost/stranded (failed renew or release). */
261
+ getLostLeaseCount(): number {
262
+ return this.lostLeases;
263
+ }
264
+
265
+ /** Configured lease lifetime in ms (drives heartbeat cadence in withLock). */
266
+ getLeaseTtlMs(): number {
267
+ return this.config.leaseTtlMs;
268
+ }
269
+
260
270
  /**
261
271
  * Release all held locks. Safe to call during shutdown.
262
272
  */
263
273
  async releaseAll(): Promise<void> {
264
- const tasks = Array.from(this.heldLocks);
274
+ const tasks = Array.from(this.heldLocks.keys());
265
275
  for (const taskId of tasks) {
266
276
  await this.release(taskId);
267
277
  }
268
- // release() returns the reservation once heldLocks empties, but if
269
- // nothing was held we still need to clean up any pending reservation.
270
- if (this.heldLocks.size === 0) {
271
- this.releaseReservation();
272
- }
273
278
  }
274
279
 
275
280
  isHeld(taskId: string): boolean {
@@ -281,21 +286,23 @@ export class DistributedLock {
281
286
  }
282
287
 
283
288
  updateConfig(config: Partial<DistributedLockConfig>): void {
289
+ const prevBackendKind = this.config.backend;
284
290
  this.config = { ...this.config, ...config };
291
+ // Recreate the backend only when its identity changes; otherwise keep
292
+ // the live one (it may own a reserved connection / held leases).
293
+ if (config.backend !== undefined && config.backend !== prevBackendKind) {
294
+ void this.backend.dispose?.();
295
+ this.backend = this.makeBackend();
296
+ }
285
297
  }
286
298
 
287
299
  getConfig(): DistributedLockConfig {
288
300
  return { ...this.config };
289
301
  }
290
302
 
291
- private async attemptLock(
292
- conn: ReservedSQL,
293
- lockKey: bigint
294
- ): Promise<boolean> {
295
- const result = await conn`
296
- SELECT pg_try_advisory_lock(${lockKey}::bigint) as pg_try_advisory_lock
297
- `;
298
- return result[0]?.pg_try_advisory_lock ?? false;
303
+ /** Backend name, for diagnostics / health output. */
304
+ getBackendName(): string {
305
+ return this.backend.name;
299
306
  }
300
307
 
301
308
  private sleep(ms: number): Promise<void> {
@@ -16,6 +16,19 @@ export {
16
16
 
17
17
  export {
18
18
  withLock,
19
+ LockUnavailableError,
19
20
  type WithLockOptions,
20
21
  type LockOutcome,
21
22
  } from './withLock';
23
+
24
+ export {
25
+ createLockBackend,
26
+ InProcessLockBackend,
27
+ AdvisoryLockBackend,
28
+ PostgresLeaseLockBackend,
29
+ UnsafeAdvisoryPoolingError,
30
+ type LockBackend,
31
+ type LockBackendKind,
32
+ type LockHandle,
33
+ type AcquireOptions,
34
+ } from './locks';
@@ -23,5 +23,6 @@ export function syncLockConfig(manager: SchedulerManager): void {
23
23
  enableLogging: manager.config.enableLogging,
24
24
  lockTimeout: manager.config.lockTimeout ?? 0,
25
25
  retryInterval: manager.config.lockRetryInterval ?? 100,
26
+ backend: manager.config.lockBackend,
26
27
  });
27
28
  }