pg-aequor 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LICENSE ADDED
@@ -0,0 +1,17 @@
1
+ ISC License
2
+
3
+ Copyright (c) 2026 dimaq12
4
+
5
+ Permission to use, copy, modify, and/or distribute this software for any
6
+ purpose with or without fee is hereby granted, provided that the above
7
+ copyright notice and this permission notice appear in all copies.
8
+
9
+ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10
+ WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11
+ MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12
+ ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13
+ WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14
+ ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15
+ OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16
+
17
+
package/README.md ADDED
@@ -0,0 +1,81 @@
1
+ # pg-aequor
2
+
3
+ If you use standard `pg` in AWS Lambda, you are either a madman or you haven't yet seen your database die under a pile of zombie connections. This library is a wrapper that forces PostgreSQL and Serverless to coexist in peace.
4
+
5
+ ---
6
+
7
+ ## Why isn't this just another wrapper?
8
+
9
+ In standard environments, connections live long. In Lambda, they "freeze" in suspended instances. We solve this via **Signed Leases**:
10
+
11
+ 1. **Signed Leases**: Each connection signs itself in `application_name` (expiration + HMAC).
12
+ 2. **Distributed Reaper**: A background "Reaper" scans the database and kills connections whose lease has expired.
13
+ 3. **Advisory Locks**: Coordination is handled via Postgres Advisory Locks, so instances don't fight each other to clean up the mess.
14
+
15
+ ---
16
+
17
+ ## Technical Rules (Read this so it doesn't hurt)
18
+
19
+ * **Disposable Idle**: If a connection is idle longer than the lease TTL, it is considered a corpse. Another instance will kill it. This is a feature, not a bug.
20
+ * **Crash Safety**: We swallow socket errors in `pg.Client` handlers. No more `Runtime.ExitError` crashing your entire Lambda.
21
+ * **Single Connection Architecture**: The Reaper runs on the *active* connection using Advisory Locks. It adds minimal latency to the "leader" request but prevents connection storms (Reaper-DOS) during massive scale-ups.
22
+
23
+ ---
24
+
25
+ ## Configuration
26
+
27
+ ### Required Parameters (Lease/Reaper)
28
+
29
+ | Parameter | Type | Description |
30
+ | :--- | :--- | :--- |
31
+ | `secret` | `string` | **Critical.** Shared secret for HMAC signing. Do NOT use your DB password. Must be at least 16 bytes. |
32
+ | `leaseMode` | `string` | `'required'` (throws without secret) or `'optional'`. Default: `'required'`. |
33
+ | `leaseTtlMs` | `number` | Lease Time-To-Live in milliseconds. Default: `90000` (90s). |
34
+
35
+ ### Retry Strategy
36
+
37
+ We use **Decorrelated Jitter** and **SQLSTATE** filtering. Retries trigger only on transient errors (network, DB restart, connection limits).
38
+
39
+ ---
40
+
41
+ ## Observability (Hooks)
42
+
43
+ Do not put heavy logic in hooks. Use them for metrics.
44
+
45
+ ```javascript
46
+ const { ServerlessClient } = require('pg-aequor')
47
+
48
+ const client = new ServerlessClient({
49
+ host: process.env.DB_HOST,
50
+ user: process.env.DB_USER,
51
+ password: process.env.DB_PASSWORD,
52
+ database: process.env.DB_NAME,
53
+
54
+ // Coordination Secret (Distinct from DB password)
55
+ secret: process.env.COORD_SECRET,
56
+
57
+ hooks: {
58
+ onQueryRetry: ({ retries, err }) => {
59
+ console.warn(`Retry #${retries} due to ${err.code}`)
60
+ },
61
+ onClientDead: ({ source, meta }) => {
62
+ // Perfect for CloudWatch EMF or X-Ray
63
+ logToEMF('ClientDeath', 1, { sqlstate: meta?.sqlstate })
64
+ }
65
+ }
66
+ })
67
+
68
+ await client.connect()
69
+ const res = await client.query('SELECT NOW()')
70
+ await client.clean() // or await client.end()
71
+ ```
72
+
73
+ ---
74
+
75
+ ## Installation
76
+
77
+ ```bash
78
+ npm install pg-aequor
79
+ ```
80
+
81
+ > **Attention:** This library requires `pg` as a peer dependency. Tested on versions `^8.11.0`.
package/index.d.ts ADDED
@@ -0,0 +1,200 @@
1
+ import { Client, ClientConfig, QueryResult, QueryResultRow } from 'pg';
2
+
3
+ export interface ServerlessClientHooks {
4
+ /**
5
+ * Called when a new database connection is successfully established.
6
+ */
7
+ onConnect?: (payload: { gen: number }) => void;
8
+
9
+ /**
10
+ * Called when a connection attempt fails and is about to be retried.
11
+ */
12
+ onReconnect?: (payload: { gen: number; retries: number; delay: number; err: Error }) => void;
13
+
14
+ /**
15
+ * Called when a query fails with a retryable error and is about to be retried.
16
+ */
17
+ onQueryRetry?: (payload: { retries: number; delay: number; err: Error }) => void;
18
+
19
+ /**
20
+ * Called when a heartbeat (lease renewal) succeeds.
21
+ */
22
+ onHeartbeat?: (payload: { gen: number }) => void;
23
+
24
+ /**
25
+ * Called when a heartbeat fails (either transiently or permanently).
26
+ */
27
+ onHeartbeatFail?: (payload: { gen: number; err: Error }) => void;
28
+
29
+ /**
30
+ * Called when the underlying pg.Client emits an 'error' event or ends unexpectedly.
31
+ * This is a critical signal that the connection is dead.
32
+ */
33
+ onClientDead?: (payload: { source: 'error' | 'end'; err?: Error; meta?: { sqlstate?: string; [key: string]: any } }) => void;
34
+
35
+ /**
36
+ * Called immediately before a user query is executed. Useful for tracing start time.
37
+ */
38
+ onQueryStart?: (payload: { args: any[]; startedAt: number }) => void;
39
+
40
+ /**
41
+ * Called immediately after a user query successfully completes.
42
+ */
43
+ onQueryEnd?: (payload: { args: any[]; res: QueryResult<any>; duration: number }) => void;
44
+
45
+ /**
46
+ * Called when a user query fails (before retry logic kicks in).
47
+ */
48
+ onQueryError?: (payload: { args: any[]; err: Error; duration: number }) => void;
49
+ }
50
+
51
+ export interface ServerlessClientConfig extends ClientConfig {
52
+ /**
53
+ * Shared secret for signing leases. Required if leaseMode is 'required'.
54
+ * Conceptually distinct from DB password. Must be at least 16 bytes.
55
+ */
56
+ secret?: string;
57
+
58
+ /**
59
+ * Logical name of the service using this client. Used for advisory lock namespace.
60
+ * Defaults to AWS_LAMBDA_FUNCTION_NAME or 'sls_pg'.
61
+ */
62
+ serviceName?: string;
63
+
64
+ /**
65
+ * Coordination mode.
66
+ * - 'required': throws if secret is missing (default).
67
+ * - 'optional': disables lease/reaper if secret is missing.
68
+ */
69
+ leaseMode?: 'required' | 'optional';
70
+
71
+ /**
72
+ * Enable/disable the background connection reaper. Default: true.
73
+ */
74
+ reaper?: boolean;
75
+
76
+ /**
77
+ * Probability (0.0 - 1.0) of running the reaper on connect.
78
+ * Alias for legacy 'connUtilization'. Default: 0.1.
79
+ */
80
+ reaperRunProbability?: number;
81
+
82
+ /**
83
+ * Minimum time (ms) between reaper runs on this container. Default: 120000 (2m).
84
+ */
85
+ reaperCooldownMs?: number;
86
+
87
+ /**
88
+ * How to handle reaper internal errors.
89
+ * - 'swallow': log and ignore (default).
90
+ * - 'throw': throw exception to the caller.
91
+ */
92
+ reaperErrorMode?: 'swallow' | 'throw';
93
+
94
+ /**
95
+ * Minimum idle time (seconds) before a connection is considered a zombie candidate.
96
+ * Default: 180 (3m).
97
+ */
98
+ minConnectionIdleTimeSec?: number;
99
+
100
+ /**
101
+ * Maximum number of zombie connections to kill in one reaper pass. Default: 1.
102
+ */
103
+ maxIdleConnectionsToKill?: number;
104
+
105
+ /**
106
+ * Lease time-to-live in milliseconds. Default: 90000 (90s).
107
+ */
108
+ leaseTtlMs?: number;
109
+
110
+ /**
111
+ * Time remaining (ms) where we soft-check lease renewal. Default: 30000.
112
+ */
113
+ heartbeatSoftRemainingMs?: number;
114
+
115
+ /**
116
+ * Time remaining (ms) where we force-wait for lease renewal. Default: 5000.
117
+ */
118
+ heartbeatHardWaitRemainingMs?: number;
119
+
120
+ /**
121
+ * Time (ms) to wait for set_config heartbeat query before timing out. Default: 2000.
122
+ */
123
+ heartbeatTimeoutMs?: number;
124
+
125
+ /**
126
+ * Action on heartbeat failure.
127
+ * - 'reconnect': mark client dead and reconnect (safest for serverless).
128
+ * - 'swallow': log and ignore.
129
+ * - 'throw': throw error.
130
+ * Default: 'reconnect'.
131
+ */
132
+ heartbeatErrorMode?: 'reconnect' | 'swallow' | 'throw';
133
+
134
+ /**
135
+ * Max time (ms) to spend retrying a connect operation. Default: 15000.
136
+ */
137
+ maxConnectRetryTimeMs?: number;
138
+
139
+ /**
140
+ * Max time (ms) to spend retrying a query operation. Default: 15000.
141
+ */
142
+ maxQueryRetryTimeMs?: number;
143
+
144
+ /**
145
+ * Default query_timeout (ms) passed to pg if not specified in individual query.
146
+ */
147
+ defaultQueryTimeoutMs?: number;
148
+
149
+ /**
150
+ * Observability hooks.
151
+ */
152
+ hooks?: ServerlessClientHooks;
153
+
154
+ /**
155
+ * Debug logging (console.log). Default: false.
156
+ */
157
+ debug?: boolean;
158
+
159
+ /**
160
+ * Underlying pg driver instance (e.g. for X-Ray capture).
161
+ */
162
+ library?: any;
163
+
164
+ // Legacy aliases
165
+ connUtilization?: number;
166
+ applicationName?: string;
167
+ }
168
+
169
+ export class ServerlessClient {
170
+ constructor(config: ServerlessClientConfig);
171
+
172
+ /**
173
+ * Establishes a connection (if not already connected) and acquires a lease.
174
+ */
175
+ connect(): Promise<void>;
176
+
177
+ /**
178
+ * Executes a query with automatic retry and lease management.
179
+ */
180
+ query<R extends QueryResultRow = any, I extends any[] = any[]>(
181
+ queryTextOrConfig: string | import('pg').QueryConfig<I>,
182
+ values?: I
183
+ ): Promise<QueryResult<R>>;
184
+
185
+ /**
186
+ * Gracefully closes the connection.
187
+ */
188
+ clean(): Promise<void>;
189
+
190
+ /**
191
+ * Alias for clean().
192
+ */
193
+ end(): Promise<void>;
194
+
195
+ /**
196
+ * Returns the underlying pg.Client instance (if connected).
197
+ * Use with caution.
198
+ */
199
+ getClient(): Client | null;
200
+ }
package/index.js ADDED
@@ -0,0 +1,7 @@
1
+ const ServerlessClient = require('./lib/client')
2
+
3
+ // Canonical export: match serverless-postgres API.
4
+ module.exports = {
5
+ ServerlessClient,
6
+ }
7
+
package/lib/client.js ADDED
@@ -0,0 +1,413 @@
1
+ const RetryStrategy = require('./retry')
2
+ const LeaseManager = require('./lease')
3
+ const Reaper = require('./reaper')
4
+ const crypto = require('crypto')
5
+
6
+ class ServerlessClient {
7
+ constructor(config = {}) {
8
+ this._config = config
9
+ this._library = config.library || require('pg')
10
+ this._client = null
11
+ this._isDead = false // Flag to force recreation
12
+ this._generation = 0
13
+ this._connectPromise = null
14
+
15
+ // Retry Strategy
16
+ this._retryStrategy = {
17
+ retries: config.retries ?? 3,
18
+ minBackoff: config.minBackoff ?? 100, // ms
19
+ maxBackoff: config.maxBackoff ?? 2000 // ms
20
+ }
21
+
22
+ // Lease/Reaper mode:
23
+ // - required: secret must be provided (safe distributed coordination)
24
+ // - optional: if secret missing, disable lease/reaper/heartbeat but client still works
25
+ this._leaseMode = config.leaseMode || 'required' // 'required' | 'optional'
26
+
27
+ // Reaper config (can be disabled if lease is disabled)
28
+ this._reaperEnabled = config.reaper !== false
29
+ this._strategy = {
30
+ // Probability of running a reaper pass on connect (0..1). Alias for backwards compatibility.
31
+ reaperRunProbability: config.reaperRunProbability ?? config.connUtilization ?? 0.1,
32
+ // Default should be minutes, not seconds, otherwise you create your own outages.
33
+ minConnIdleTimeSec: config.minConnectionIdleTimeSec || 180, // Default 3m
34
+ maxIdleConnectionsToKill: config.maxIdleConnectionsToKill || 10,
35
+ reaperErrorMode: config.reaperErrorMode || 'swallow', // 'swallow' | 'throw'
36
+ }
37
+ this._reaperCooldownMs = config.reaperCooldownMs ?? 30000
38
+ // Jittered Cooldown Base: Add random offset to avoid synchronized reapers
39
+ this._reaperBaseInterval = this._reaperCooldownMs + Math.random() * (this._reaperCooldownMs / 3)
40
+ this._reaperCurrentInterval = this._reaperBaseInterval
41
+ this._reaperNextRunAt = 0
42
+
43
+ // Setup Lease Manager
44
+ const serviceName = config.serviceName || process.env.AWS_LAMBDA_FUNCTION_NAME || 'sls_pg'
45
+ // 48-bit random instance id => exactly 8 base64url chars (no padding). Good entropy, tight budget.
46
+ const instanceId = crypto.randomBytes(6).toString('base64url')
47
+ const secret = config.secret // explicit coordination secret (not db password)
48
+ this._baseApplicationName =
49
+ (typeof config.application_name === 'string' && config.application_name) ||
50
+ (typeof config.applicationName === 'string' && config.applicationName) ||
51
+ serviceName
52
+
53
+ if (!secret) {
54
+ if (this._leaseMode === 'required') {
55
+ throw new Error('Missing config.secret (required for lease/reaper). Set leaseMode=\"optional\" to disable lease/reaper/heartbeat.')
56
+ }
57
+ this._leaseManager = null
58
+ this._reaperEnabled = false
59
+ } else {
60
+ this._leaseManager = new LeaseManager(serviceName, instanceId, secret)
61
+ }
62
+
63
+ // Heartbeat state
64
+ this._leaseExp = 0
65
+ this._heartbeatPromise = null
66
+ this._leaseTtlMs = config.leaseTtlMs ?? 90000
67
+ this._heartbeatSoftRemainingMs = config.heartbeatSoftRemainingMs ?? 30000
68
+ this._heartbeatHardWaitRemainingMs = config.heartbeatHardWaitRemainingMs ?? 5000
69
+ this._heartbeatErrorMode = config.heartbeatErrorMode || 'reconnect' // 'swallow' | 'reconnect' | 'throw'
70
+ this._heartbeatTimeoutMs = config.heartbeatTimeoutMs ?? 2000
71
+ this._defaultQueryTimeoutMs = config.defaultQueryTimeoutMs ?? 0
72
+
73
+ // Logging
74
+ this._logger = config.debug ? console.log : () => {}
75
+ this._hooks = config.hooks || {}
76
+
77
+ // Backoff state (decorrelated jitter needs previous delay)
78
+ this._connectPrevDelay = 0
79
+ this._queryPrevDelay = 0
80
+ this._maxConnectRetryTimeMs = config.maxConnectRetryTimeMs ?? 15000
81
+ this._maxQueryRetryTimeMs = config.maxQueryRetryTimeMs ?? 15000
82
+ }
83
+
84
+ _safeHook(name, payload) {
85
+ const fn = this._hooks && this._hooks[name]
86
+ if (typeof fn !== 'function') return
87
+ try { fn(payload) } catch (_) { /* never throw from hooks */ }
88
+ }
89
+
90
+ async connect() {
91
+ if (this._client && !this._isDead) return
92
+ if (this._connectPromise) return this._connectPromise
93
+ const gen = ++this._generation
94
+ this._connectPromise = (async () => {
95
+ try {
96
+ await this._connectWithRetry(gen)
97
+ } finally {
98
+ this._connectPromise = null
99
+ }
100
+ })()
101
+ return this._connectPromise
102
+ }
103
+
104
+ async _connectWithRetry(gen) {
105
+ const startedAt = Date.now()
106
+ let retries = 0
107
+ while (true) {
108
+ try {
109
+ await this._connect(gen)
110
+ this._connectPrevDelay = 0
111
+ this._safeHook('onConnect', { gen })
112
+ return // Success
113
+ } catch (err) {
114
+ if (this._maxConnectRetryTimeMs > 0 && (Date.now() - startedAt) > this._maxConnectRetryTimeMs) {
115
+ throw err
116
+ }
117
+ if (!RetryStrategy.isRetryable(err) || retries >= this._retryStrategy.retries) {
118
+ throw err
119
+ }
120
+ retries++
121
+ const delay = RetryStrategy.getBackoff(
122
+ this._retryStrategy.minBackoff,
123
+ this._retryStrategy.maxBackoff,
124
+ this._connectPrevDelay
125
+ )
126
+ this._connectPrevDelay = delay
127
+ this._safeHook('onReconnect', { gen, retries, delay, err })
128
+ this._logger(`Connect Retry ${retries}/${this._retryStrategy.retries} after ${delay}ms: ${err.message}`)
129
+ await new Promise(res => setTimeout(res, delay))
130
+ }
131
+ }
132
+ }
133
+
134
+ async _connect(gen) {
135
+ // Internal cleanup before creating a new client should NOT invalidate this generation.
136
+ await this._disposeClient('reconnect', { bumpGeneration: false })
137
+
138
+ // Generate initial lease
139
+ let appName = String(this._baseApplicationName || 'app').slice(0, 63)
140
+ if (this._leaseManager) {
141
+ this._leaseExp = Date.now() + this._leaseTtlMs
142
+ appName = this._leaseManager.generateAppName(this._leaseExp)
143
+ } else {
144
+ this._leaseExp = 0
145
+ }
146
+
147
+ const clientConfig = this._buildPgClientConfig({ application_name: appName })
148
+
149
+ const client = new this._library.Client(clientConfig)
150
+
151
+ // Crash Safety: Swallow errors to prevent Runtime.ExitError
152
+ client.on('error', (err) => this._markDeadAndDispose(client, err, 'error'))
153
+ // If connection ends, the client is not reusable.
154
+ client.on('end', () => this._markDeadAndDispose(client, null, 'end'))
155
+
156
+ await client.connect()
157
+
158
+ // Generation guard: do not resurrect if a newer generation started while we were connecting.
159
+ if (this._generation !== gen) {
160
+ try { await client.end() } catch (_) {}
161
+ return
162
+ }
163
+
164
+ this._client = client
165
+ this._isDead = false
166
+
167
+ // Run Reaper if enabled (async, best effort)
168
+ if (this._reaperEnabled) {
169
+ this._reap().catch(err => this._logger('Reap failed:', err.message))
170
+ }
171
+ }
172
+
173
+ // Best-effort connection cleanup
174
+ async _reap() {
175
+ // 1. Check Lease Manager
176
+ if (!this._leaseManager) return
177
+
178
+ // 2. Jittered Cooldown + Backoff
179
+ const now = Date.now()
180
+ if (now < this._reaperNextRunAt) return
181
+
182
+ // 3. Use CURRENT client (Single Connection Architecture)
183
+ const client = this._client
184
+ if (!client) return
185
+
186
+ try {
187
+ const result = await Reaper.reap(client, this._config, this._leaseManager, this._strategy, this._logger)
188
+
189
+ if (!result.locked) {
190
+ // Lock busy (someone else is reaping) -> Exponential Backoff
191
+ this._reaperCurrentInterval = Math.min(this._reaperCurrentInterval * 1.5, 600000) // max 10m
192
+ } else {
193
+ // Success (or just acquired lock) -> Reset to Base
194
+ this._reaperCurrentInterval = this._reaperBaseInterval
195
+ }
196
+
197
+ // Schedule next run with jitter
198
+ const jitter = Math.random() * (this._reaperCurrentInterval / 2)
199
+ this._reaperNextRunAt = now + this._reaperCurrentInterval + jitter
200
+
201
+ if (result.killed > 0) {
202
+ this._logger(`Reaper: Killed ${result.killed} zombies`)
203
+ }
204
+ } catch (err) {
205
+ this._logger('Reap failed:', err.message)
206
+ }
207
+ }
208
+
209
+ async query(...args) {
210
+ const startedAt = Date.now()
211
+ this._safeHook('onQueryStart', { args, startedAt })
212
+ let retries = 0
213
+ while (true) {
214
+ try {
215
+ if (!this._client || this._isDead) {
216
+ await this.connect()
217
+ } else {
218
+ // Check heartbeat. If lease expired -> WAIT. If OK -> async update.
219
+ await this._heartbeatIfNeeded()
220
+ }
221
+
222
+ const res = await this._client.query(...args)
223
+ this._queryPrevDelay = 0
224
+ this._safeHook('onQueryEnd', { args, res, duration: Date.now() - startedAt })
225
+ return res
226
+
227
+ } catch (err) {
228
+ // If error is NOT retryable, throw immediately
229
+ if (!RetryStrategy.isRetryable(err) || retries >= this._retryStrategy.retries) {
230
+ this._safeHook('onQueryError', { args, err, duration: Date.now() - startedAt })
231
+ throw err
232
+ }
233
+ if (this._maxQueryRetryTimeMs > 0 && (Date.now() - startedAt) > this._maxQueryRetryTimeMs) {
234
+ this._safeHook('onQueryError', { args, err, duration: Date.now() - startedAt })
235
+ throw err
236
+ }
237
+
238
+ retries++
239
+ const delay = RetryStrategy.getBackoff(
240
+ this._retryStrategy.minBackoff,
241
+ this._retryStrategy.maxBackoff,
242
+ this._queryPrevDelay
243
+ )
244
+ this._queryPrevDelay = delay
245
+ this._safeHook('onQueryRetry', { retries, delay, err })
246
+ this._logger(`Query Retry ${retries}/${this._retryStrategy.retries} after ${delay}ms: ${err.message}`)
247
+
248
+ // Force reconnect on next loop
249
+ this._isDead = true
250
+ await this._disposeClient('query_error')
251
+
252
+ await new Promise(res => setTimeout(res, delay))
253
+ }
254
+ }
255
+ }
256
+
257
+ async _heartbeatIfNeeded() {
258
+ if (!this._leaseManager) return
259
+ const gen = this._generation
260
+ const client = this._client
261
+ const now = Date.now()
262
+ const remaining = this._leaseExp - now
263
+
264
+ // If lease has > 30s remaining, we are safe. Do nothing.
265
+ if (remaining > this._heartbeatSoftRemainingMs) return
266
+
267
+ // If lease is expired or close to expiring (< 30s), we need update.
268
+ // Use promise deduplication to avoid thundering herd.
269
+ if (!this._heartbeatPromise) {
270
+ this._heartbeatPromise = this._performHeartbeat(gen, client).finally(() => {
271
+ this._heartbeatPromise = null
272
+ })
273
+ }
274
+
275
+ // If lease is ALREADY expired (or < 5s safety margin), we MUST wait for update.
276
+ if (remaining < this._heartbeatHardWaitRemainingMs) {
277
+ await this._heartbeatPromise
278
+ } else {
279
+ // Otherwise, let it update in background (fire-and-forget)
280
+ // This is safe because we still have > 5s lease
281
+ }
282
+ }
283
+
284
+ async _performHeartbeat(gen, client) {
285
+ try {
286
+ if (!this._leaseManager) return
287
+ if (!client || client !== this._client) return
288
+ if (this._generation !== gen) return
289
+ const newExp = Date.now() + this._leaseTtlMs
290
+ const appName = this._leaseManager.generateAppName(newExp)
291
+ // Never interpolate appName into SQL. Use bind parameters.
292
+ const heartbeatQuery = this._client.query(`SELECT set_config('application_name', $1, false)`, [appName])
293
+ const timeout = new Promise((_, reject) => {
294
+ const e = new Error(`Heartbeat timed out after ${this._heartbeatTimeoutMs}ms`)
295
+ e.code = 'ETIMEDOUT'
296
+ setTimeout(() => reject(e), this._heartbeatTimeoutMs)
297
+ })
298
+ const res = await Promise.race([heartbeatQuery, timeout])
299
+ if (!res) throw new Error('Heartbeat failed: no result')
300
+ // Only update local lease if DB update succeeded.
301
+ if (this._generation === gen && client === this._client) {
302
+ this._leaseExp = newExp
303
+ this._safeHook('onHeartbeat', { gen })
304
+ }
305
+ } catch (err) {
306
+ this._logger('Heartbeat failed:', err.message)
307
+ this._safeHook('onHeartbeatFail', { gen, err })
308
+ // If we're in hard-wait territory and heartbeat fails, do NOT keep a client that
309
+ // is now invisible to other reapers (lease can expire). Default action: reconnect.
310
+ if (this._heartbeatErrorMode === 'throw') throw err
311
+ if (this._heartbeatErrorMode === 'reconnect') {
312
+ // In soft zone we already decided heartbeat matters. Don't limp along into expiry.
313
+ // If it's retryable, definitely reconnect. If it's non-retryable, reconnect won't help,
314
+ // but it's still safer than staying in an inconsistent lease state.
315
+ this._isDead = true
316
+ await this._disposeClient('heartbeat_failed')
317
+ }
318
+ }
319
+ }
320
+
321
+ _buildPgClientConfig(overrides = {}) {
322
+ const clientConfig = { ...this._config, ...overrides }
323
+ if (!clientConfig.query_timeout && this._defaultQueryTimeoutMs > 0) {
324
+ clientConfig.query_timeout = this._defaultQueryTimeoutMs
325
+ }
326
+ // Strip internal fields (keep pg config clean and future-proof)
327
+ const internalKeys = [
328
+ 'library',
329
+ 'reaper',
330
+ 'reaperRunProbability',
331
+ 'reaperErrorMode',
332
+ 'connUtilization', // legacy alias
333
+ 'minConnectionIdleTimeSec',
334
+ 'maxIdleConnectionsToKill',
335
+ 'retries',
336
+ 'minBackoff',
337
+ 'maxBackoff',
338
+ 'serviceName',
339
+ 'secret',
340
+ 'debug',
341
+ 'leaseTtlMs',
342
+ 'heartbeatSoftRemainingMs',
343
+ 'heartbeatHardWaitRemainingMs',
344
+ 'heartbeatErrorMode',
345
+ 'heartbeatTimeoutMs',
346
+ 'reaperCooldownMs',
347
+ 'leaseMode',
348
+ 'applicationName',
349
+ 'defaultQueryTimeoutMs',
350
+ 'hooks',
351
+ 'maxConnectRetryTimeMs',
352
+ 'maxQueryRetryTimeMs',
353
+ ]
354
+ for (const k of internalKeys) delete clientConfig[k]
355
+ return clientConfig
356
+ }
357
+
358
+ async _disposeClient(reason, { bumpGeneration = true } = {}) {
359
+ if (bumpGeneration) this._generation++
360
+ const old = this._client
361
+ this._client = null
362
+ if (!old) return
363
+ try {
364
+ await old.end()
365
+ } catch (_) {
366
+ // ignore
367
+ }
368
+ }
369
+
370
+ _markDeadAndDispose(client, err, source) {
371
+ // Never throw from event handlers (Lambda crash safety).
372
+ this._isDead = true
373
+ // Invalidate any in-flight connect/heartbeat on older generations.
374
+ this._generation++
375
+ // Atomically detach the client if it is the current one.
376
+ if (this._client === client) {
377
+ this._client = null
378
+ }
379
+ if (err) {
380
+ const meta = {
381
+ code: err.code,
382
+ sqlstate: err.sqlstate,
383
+ errno: err.errno,
384
+ syscall: err.syscall,
385
+ address: err.address,
386
+ port: err.port,
387
+ severity: err.severity,
388
+ routine: err.routine,
389
+ }
390
+ this._logger(`WARN: pg client ${source} (swallowed):`, err.message || err.code, meta)
391
+ this._safeHook('onClientDead', { source, err, meta })
392
+ }
393
+ // Best-effort close; do not await.
394
+ try {
395
+ client.end().catch(() => {})
396
+ } catch (_) {}
397
+ }
398
+
399
+ async clean() {
400
+ // Try to close gracefully
401
+ await this._disposeClient('clean')
402
+ }
403
+
404
+ async end() {
405
+ return this.clean()
406
+ }
407
+
408
+ getClient() {
409
+ return this._client
410
+ }
411
+ }
412
+
413
+ module.exports = ServerlessClient
package/lib/lease.js ADDED
@@ -0,0 +1,124 @@
1
+ const crypto = require('crypto')
2
+
3
+ /**
4
+ * Lease Manager
5
+ * Handles generation and verification of signed application_name strings.
6
+ * Format: "s=SERVICE;i=INSTANCE_ID;e=TIMESTAMP;g=HMAC"
7
+ * Short keys used to fit within Postgres 63-byte limit.
8
+ */
9
+ class LeaseManager {
10
+ static APP_NAME_MAX_LEN = 63
11
+ static SIG_LEN = 11 // 8 bytes -> base64url w/o padding => 11 chars
12
+
13
+ /**
14
+ * @param {string} serviceName - The logical name of the service
15
+ * @param {string} instanceId - Unique ID of this client instance
16
+ * @param {string} secret - Shared secret for HMAC (e.g. DB password)
17
+ */
18
+ constructor(serviceName, instanceId, secret) {
19
+ // Keep instanceId compact and delimiter-safe.
20
+ this.instanceId = LeaseManager._sanitizeToken(instanceId || 'inst')
21
+ if (!secret) {
22
+ throw new Error('LeaseManager requires a non-empty secret')
23
+ }
24
+ if (Buffer.byteLength(String(secret), 'utf8') < 16) {
25
+ throw new Error('LeaseManager secret is too short; must be at least 16 bytes')
26
+ }
27
+ this.secret = secret
28
+ // Normalize serviceName so application_name ALWAYS fits into 63 bytes and is LIKE-safe.
29
+ this.serviceName = LeaseManager._normalizeServiceName(serviceName || 'sls_pg', this.instanceId)
30
+ }
31
+
32
+ /**
33
+ * Generates a signed application_name.
34
+ * @param {number} expirationTs - Unix timestamp (ms) when lease expires
35
+ * @returns {string} The formatted application_name
36
+ * @throws {Error} if generated name exceeds 63 bytes
37
+ */
38
+ generateAppName(expirationTs) {
39
+ // Format: s=...;i=...;e=...
40
+ const base = `s=${this.serviceName};i=${this.instanceId};e=${expirationTs}`
41
+ const sig = this._sign(base)
42
+ const result = `${base};g=${sig}`
43
+
44
+ // Hard guarantee: never exceed Postgres 63-byte truncation limit.
45
+ // If this fires, our normalization math is wrong.
46
+ if (result.length > LeaseManager.APP_NAME_MAX_LEN) {
47
+ throw new Error(`BUG: application_name too long (${result.length} > ${LeaseManager.APP_NAME_MAX_LEN}): ${result}`)
48
+ }
49
+
50
+ return result
51
+ }
52
+
53
+ /**
54
+ * Parses an application_name and verifies its signature and expiration.
55
+ * @param {string} appNameString
56
+ * @returns {Object|null} Parsed info if valid format & signature, else null
57
+ */
58
+ parseAndVerify(appNameString) {
59
+ if (!appNameString) return null
60
+
61
+ // Regex for: s=...;i=...;e=...;g=...
62
+ const match = appNameString.match(/^s=([^;]+);i=([^;]+);e=([^;]+);g=([^;]+)$/)
63
+ if (!match) return null
64
+
65
+ const [full, s, i, eStr, g] = match
66
+ const base = `s=${s};i=${i};e=${eStr}`
67
+ const expectedSig = this._sign(base)
68
+
69
+ // Timing-safe signature comparison
70
+ const bufG = Buffer.from(g, 'utf8')
71
+ const bufExpected = Buffer.from(expectedSig, 'utf8')
72
+ if (bufG.length !== bufExpected.length || !crypto.timingSafeEqual(bufG, bufExpected)) return null
73
+
74
+ const exp = parseInt(eStr, 10)
75
+ if (!Number.isFinite(exp)) return null
76
+
77
+ return {
78
+ svc: s,
79
+ inst: i,
80
+ exp,
81
+ isExpired: Date.now() > exp,
82
+ isValidSignature: true
83
+ }
84
+ }
85
+
86
+ _sign(text) {
87
+ // Compact signature: take first 8 bytes of HMAC and encode as base64url (11 chars, no padding)
88
+ const buf = crypto.createHmac('sha256', this.secret).update(text).digest()
89
+ return buf.subarray(0, 8).toString('base64url')
90
+ }
91
+
92
+ static _sanitizeToken(s) {
93
+ // Remove delimiter characters used by our format and LIKE wildcards.
94
+ // Keep it deterministic and log-friendly.
95
+ return String(s).replace(/[^a-zA-Z0-9:_-]/g, '_')
96
+ }
97
+
98
+ static _normalizeServiceName(serviceName, instanceId) {
99
+ const original = String(serviceName || 'sls_pg')
100
+ const raw = LeaseManager._sanitizeToken(original)
101
+ const inst = LeaseManager._sanitizeToken(instanceId || 'inst')
102
+
103
+ // Total format:
104
+ // s=<svc>;i=<inst>;e=<13digits>;g=<sig>
105
+ // Fixed overhead excluding <svc>: "s="(2) + ";i="(3) + inst + ";e="(3) + 13 + ";g="(3) + SIG_LEN
106
+ // => 24 + instLen + SIG_LEN
107
+ const overhead = 24 + inst.length + LeaseManager.SIG_LEN
108
+ const maxSvcLen = Math.max(1, LeaseManager.APP_NAME_MAX_LEN - overhead)
109
+
110
+ // If sanitization changed the name, we must add a hash suffix to avoid accidental collisions
111
+ // (different originals mapping to the same sanitized token).
112
+ const needsHash = raw !== original
113
+ if (!needsHash && raw.length <= maxSvcLen) return raw
114
+
115
+ // Truncate with a short hash suffix to preserve uniqueness.
116
+ const hash = crypto.createHash('sha1').update(original).digest('hex').slice(0, 8)
117
+ if (maxSvcLen <= hash.length) return hash.slice(0, maxSvcLen)
118
+
119
+ const prefixLen = maxSvcLen - (hash.length + 1)
120
+ return `${raw.slice(0, prefixLen)}-${hash}`
121
+ }
122
+ }
123
+
124
+ module.exports = LeaseManager
package/lib/reaper.js ADDED
@@ -0,0 +1,117 @@
1
+ /**
2
+ * Connection Reaper
3
+ * Safely kills zombie connections using Advisory Locks and Signed Leases.
4
+ */
5
+ class Reaper {
6
+ // Namespace advisory locks to avoid collisions with other apps in same DB.
7
+ // 0x50474151 corresponds to "PGAQ" (pg-aequor) in ASCII.
8
+ static LOCK_NS = 0x50474151
9
+ /**
10
+ * Runs the reaping process.
11
+ * @param {Object} client - The connected pg.Client
12
+ * @param {Object} config - Config including database name
13
+ * @param {LeaseManager} leaseManager - For verifying leases
14
+ * @param {Object} strategy - { minConnIdleTimeSec, connUtilization }
15
+ * @param {Function} logger
16
+ */
17
+ static async reap(client, config, leaseManager, strategy, logger) {
18
+ const serviceName = leaseManager.serviceName
19
+ let locked = false
20
+
21
+ // 1. Acquire Advisory Lock (Non-blocking)
22
+ // Use Postgres native hashtext() to get a consistent 64-bit lock ID from the service string.
23
+ // This avoids JS-side 32-bit hash collisions.
24
+
25
+ try {
26
+ const lockRes = await client.query(
27
+ `SELECT pg_try_advisory_lock($1::int, hashtext($2)) as locked`,
28
+ [Reaper.LOCK_NS, serviceName]
29
+ )
30
+ if (lockRes.rows[0].locked !== true) {
31
+ logger(`Reaper[pid=${process.pid}]: Lock busy, skipping`)
32
+ return { locked: false, killed: 0 }
33
+ }
34
+ locked = true
35
+
36
+ // 2. Scan for zombies
37
+ const minIdle = strategy.minConnIdleTimeSec
38
+
39
+ // Fetch idle connections that look like our service
40
+ // Exclude self (pg_backend_pid())
41
+ // Optimization: Filter by application_name prefix in SQL to reduce result set size.
42
+ const query = `
43
+ SELECT pid, application_name, extract(epoch from (now() - state_change)) as idle_time
44
+ FROM pg_stat_activity
45
+ WHERE datname = current_database()
46
+ AND state = 'idle'
47
+ AND pid <> pg_backend_pid()
48
+ AND application_name LIKE $1 || '%'
49
+ `
50
+
51
+ // Correctness > optimization: do not prefilter using untrusted application_name.
52
+ const res = await client.query(query, [`s=${leaseManager.serviceName};`])
53
+ const candidates = []
54
+
55
+ for (const row of res.rows) {
56
+ if (row.idle_time < minIdle) continue
57
+
58
+ const lease = leaseManager.parseAndVerify(row.application_name)
59
+
60
+ if (!lease) {
61
+ // Invalid format or signature -> Unsafe to touch (could be neighbor with different secret)
62
+ continue
63
+ }
64
+
65
+ if (lease.isExpired) {
66
+ // Valid signature, but expired -> ZOMBIE
67
+ candidates.push({
68
+ pid: row.pid,
69
+ idle_time: Number(row.idle_time) || 0,
70
+ exp: lease.exp,
71
+ })
72
+ }
73
+ // else: Lease valid -> ACTIVE neighbor -> Do not kill
74
+ }
75
+
76
+ // 3. Terminate zombies
77
+ if (candidates.length > 0) {
78
+ // Deterministic: kill the "stale-est" first.
79
+ // Primary: oldest expiration (smallest exp) -> longest expired.
80
+ // Secondary: largest idle_time.
81
+ candidates.sort((a, b) => (a.exp - b.exp) || (b.idle_time - a.idle_time) || (a.pid - b.pid))
82
+
83
+ const limit = Math.max(1, Number(strategy.maxIdleConnectionsToKill) || 1)
84
+ const selected = candidates.slice(0, limit)
85
+ const pidsToKill = selected.map(x => x.pid)
86
+
87
+ // Log a compact reason line for debugging.
88
+ const meta = selected.map(x => `pid=${x.pid},idle=${Math.round(x.idle_time)}s,expDelta=${Math.round((Date.now() - x.exp) / 1000)}s`).join(' | ')
89
+ logger(`Reaper[pid=${process.pid}]: Killing ${pidsToKill.length} zombies: ${meta}`)
90
+ // Cast to int[] to be safe
91
+ await client.query(`SELECT pg_terminate_backend(pid) FROM pg_stat_activity WHERE pid = ANY($1::int[])`, [pidsToKill])
92
+ return { locked: true, killed: pidsToKill.length }
93
+ }
94
+
95
+ return { locked: true, killed: 0 }
96
+
97
+ } catch (err) {
98
+ logger(`Reaper[pid=${process.pid}] failed:`, err && (err.stack || err.message || err))
99
+ if (strategy && strategy.reaperErrorMode === 'throw') throw err
100
+ return { locked: false, killed: 0, error: err }
101
+ } finally {
102
+ // 4. Release Lock
103
+ if (locked) {
104
+ try {
105
+ await client.query(
106
+ `SELECT pg_advisory_unlock($1::int, hashtext($2))`,
107
+ [Reaper.LOCK_NS, serviceName]
108
+ )
109
+ } catch (_) { /* ignore unlock error */ }
110
+ }
111
+ }
112
+ }
113
+ // Removed _hashString method as we use DB-side hashtext()
114
+ }
115
+
116
+ module.exports = Reaper
117
+
package/lib/retry.js ADDED
@@ -0,0 +1,59 @@
1
+ /**
2
+ * Retry Strategy & Error Classification
3
+ * Implements "Decorrelated Jitter" and safe error analysis.
4
+ */
5
+
6
+ class RetryStrategy {
7
+ /**
8
+ * Determines if an error is a "dead connection" error that warrants a retry/reconnect.
9
+ * @param {Error} err
10
+ * @returns {boolean}
11
+ */
12
+ static isRetryable(err) {
13
+ const code = err && err.code
14
+ const msg = (err && err.message) || ''
15
+ const sqlstate = (err && (err.code || err.sqlstate)) || null
16
+
17
+ // 1) Node.js socket / transport codes (not SQLSTATE)
18
+ if (code === 'ECONNRESET' || code === 'EPIPE' || code === 'ETIMEDOUT' || code === 'ECONNREFUSED') return true
19
+ if (code === 'ENETUNREACH' || code === 'EHOSTUNREACH' || code === 'EAI_AGAIN') return true
20
+ if (code === 'ECONNABORTED' || code === 'EADDRINUSE') return true
21
+
22
+ // 2) SQLSTATE-first (stable)
23
+ // Class 08 — connection exception
24
+ if (typeof sqlstate === 'string' && sqlstate.length === 5 && sqlstate.startsWith('08')) return true
25
+
26
+ // Admin / crash / cannot continue
27
+ if (sqlstate === '57P01' || sqlstate === '57P02' || sqlstate === '57P03') return true
28
+
29
+ // Too many connections (can be transient under spiky concurrency)
30
+ if (sqlstate === '53300') return true
31
+
32
+ // Optional: transient concurrency failures (only safe if queries are idempotent)
33
+ // Keep disabled for now to avoid duplicating non-idempotent writes.
34
+ // if (sqlstate === '40001' || sqlstate === '40P01') return true
35
+
36
+ // 3) LAST-RESORT message fallbacks (keep minimal; remove over time)
37
+ if (msg.includes('Connection terminated unexpectedly')) return true
38
+ if (msg.includes('sorry, too many clients already')) return true
39
+
40
+ return false
41
+ }
42
+
43
+ /**
44
+ * Calculates backoff delay using "Decorrelated Jitter".
45
+ * sleep = min(cap, random(base, sleep * 3))
46
+ * @param {number} baseMs - Minimum wait
47
+ * @param {number} capMs - Maximum wait
48
+ * @param {number} previousDelay - The delay used in the previous attempt (or 0)
49
+ * @returns {number} ms to sleep
50
+ */
51
+ static getBackoff(baseMs, capMs, previousDelay) {
52
+ const prev = previousDelay || baseMs
53
+ const randRange = (min, max) => Math.floor(Math.random() * (max - min + 1)) + min
54
+ return Math.min(capMs, randRange(baseMs, prev * 3))
55
+ }
56
+ }
57
+
58
+ module.exports = RetryStrategy
59
+
package/package.json ADDED
@@ -0,0 +1,46 @@
1
+ {
2
+ "name": "pg-aequor",
3
+ "version": "0.1.0",
4
+ "description": "Crash-safe, coordination-aware PostgreSQL client for Serverless environments",
5
+ "main": "./index.js",
6
+ "types": "index.d.ts",
7
+ "author": "dimaq12",
8
+ "license": "ISC",
9
+ "repository": {
10
+ "type": "git",
11
+ "url": "git+ssh://git@github.com/dimaq12/pg-aequor.git"
12
+ },
13
+ "bugs": {
14
+ "url": "https://github.com/dimaq12/pg-aequor/issues"
15
+ },
16
+ "homepage": "https://github.com/dimaq12/pg-aequor#readme",
17
+ "publishConfig": {
18
+ "registry": "https://registry.npmjs.org/"
19
+ },
20
+ "sideEffects": false,
21
+ "engines": {
22
+ "node": ">=18"
23
+ },
24
+ "exports": {
25
+ ".": {
26
+ "types": "./index.d.ts",
27
+ "require": "./index.js"
28
+ }
29
+ },
30
+ "files": [
31
+ "index.js",
32
+ "index.d.ts",
33
+ "lib/",
34
+ "README.md",
35
+ "LICENSE"
36
+ ],
37
+ "scripts": {
38
+ "test": "node --test"
39
+ },
40
+ "peerDependencies": {
41
+ "pg": "^8.11.0"
42
+ },
43
+ "devDependencies": {
44
+ "pg": "^8.11.0"
45
+ }
46
+ }