pg-aequor 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +17 -0
- package/README.md +81 -0
- package/index.d.ts +200 -0
- package/index.js +7 -0
- package/lib/client.js +413 -0
- package/lib/lease.js +124 -0
- package/lib/reaper.js +117 -0
- package/lib/retry.js +59 -0
- package/package.json +46 -0
package/LICENSE
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
ISC License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 dimaq12
|
|
4
|
+
|
|
5
|
+
Permission to use, copy, modify, and/or distribute this software for any
|
|
6
|
+
purpose with or without fee is hereby granted, provided that the above
|
|
7
|
+
copyright notice and this permission notice appear in all copies.
|
|
8
|
+
|
|
9
|
+
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
|
10
|
+
WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
|
11
|
+
MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
|
12
|
+
ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
|
13
|
+
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
|
14
|
+
ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
|
15
|
+
OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
|
16
|
+
|
|
17
|
+
|
package/README.md
ADDED
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
# pg-aequor
|
|
2
|
+
|
|
3
|
+
If you use standard `pg` in AWS Lambda, you are either a madman or you haven't yet seen your database die under a pile of zombie connections. This library is a wrapper that forces PostgreSQL and Serverless to coexist in peace.
|
|
4
|
+
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
## Why isn't this just another wrapper?
|
|
8
|
+
|
|
9
|
+
In standard environments, connections live long. In Lambda, they "freeze" in suspended instances. We solve this via **Signed Leases**:
|
|
10
|
+
|
|
11
|
+
1. **Signed Leases**: Each connection signs itself in `application_name` (expiration + HMAC).
|
|
12
|
+
2. **Distributed Reaper**: A background "Reaper" scans the database and kills connections whose lease has expired.
|
|
13
|
+
3. **Advisory Locks**: Coordination is handled via Postgres Advisory Locks, so instances don't fight each other to clean up the mess.
|
|
14
|
+
|
|
15
|
+
---
|
|
16
|
+
|
|
17
|
+
## Technical Rules (Read this so it doesn't hurt)
|
|
18
|
+
|
|
19
|
+
* **Disposable Idle**: If a connection is idle longer than the lease TTL, it is considered a corpse. Another instance will kill it. This is a feature, not a bug.
|
|
20
|
+
* **Crash Safety**: We swallow socket errors in `pg.Client` handlers. No more `Runtime.ExitError` crashing your entire Lambda.
|
|
21
|
+
* **Single Connection Architecture**: The Reaper runs on the *active* connection using Advisory Locks. It adds minimal latency to the "leader" request but prevents connection storms (Reaper-DOS) during massive scale-ups.
|
|
22
|
+
|
|
23
|
+
---
|
|
24
|
+
|
|
25
|
+
## Configuration
|
|
26
|
+
|
|
27
|
+
### Required Parameters (Lease/Reaper)
|
|
28
|
+
|
|
29
|
+
| Parameter | Type | Description |
|
|
30
|
+
| :--- | :--- | :--- |
|
|
31
|
+
| `secret` | `string` | **Critical.** Shared secret for HMAC signing. Do NOT use your DB password. Must be at least 16 bytes. |
|
|
32
|
+
| `leaseMode` | `string` | `'required'` (throws without secret) or `'optional'`. Default: `'required'`. |
|
|
33
|
+
| `leaseTtlMs` | `number` | Lease Time-To-Live in milliseconds. Default: `90000` (90s). |
|
|
34
|
+
|
|
35
|
+
### Retry Strategy
|
|
36
|
+
|
|
37
|
+
We use **Decorrelated Jitter** and **SQLSTATE** filtering. Retries trigger only on transient errors (network, DB restart, connection limits).
|
|
38
|
+
|
|
39
|
+
---
|
|
40
|
+
|
|
41
|
+
## Observability (Hooks)
|
|
42
|
+
|
|
43
|
+
Do not put heavy logic in hooks. Use them for metrics.
|
|
44
|
+
|
|
45
|
+
```javascript
|
|
46
|
+
const { ServerlessClient } = require('pg-aequor')
|
|
47
|
+
|
|
48
|
+
const client = new ServerlessClient({
|
|
49
|
+
host: process.env.DB_HOST,
|
|
50
|
+
user: process.env.DB_USER,
|
|
51
|
+
password: process.env.DB_PASSWORD,
|
|
52
|
+
database: process.env.DB_NAME,
|
|
53
|
+
|
|
54
|
+
// Coordination Secret (Distinct from DB password)
|
|
55
|
+
secret: process.env.COORD_SECRET,
|
|
56
|
+
|
|
57
|
+
hooks: {
|
|
58
|
+
onQueryRetry: ({ retries, err }) => {
|
|
59
|
+
console.warn(`Retry #${retries} due to ${err.code}`)
|
|
60
|
+
},
|
|
61
|
+
onClientDead: ({ source, meta }) => {
|
|
62
|
+
// Perfect for CloudWatch EMF or X-Ray
|
|
63
|
+
logToEMF('ClientDeath', 1, { sqlstate: meta?.sqlstate })
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
})
|
|
67
|
+
|
|
68
|
+
await client.connect()
|
|
69
|
+
const res = await client.query('SELECT NOW()')
|
|
70
|
+
await client.clean() // or await client.end()
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
---
|
|
74
|
+
|
|
75
|
+
## Installation
|
|
76
|
+
|
|
77
|
+
```bash
|
|
78
|
+
npm install pg-aequor
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
> **Attention:** This library requires `pg` as a peer dependency. Tested on versions `^8.11.0`.
|
package/index.d.ts
ADDED
|
@@ -0,0 +1,200 @@
|
|
|
1
|
+
import { Client, ClientConfig, QueryResult, QueryResultRow } from 'pg';
|
|
2
|
+
|
|
3
|
+
export interface ServerlessClientHooks {
|
|
4
|
+
/**
|
|
5
|
+
* Called when a new database connection is successfully established.
|
|
6
|
+
*/
|
|
7
|
+
onConnect?: (payload: { gen: number }) => void;
|
|
8
|
+
|
|
9
|
+
/**
|
|
10
|
+
* Called when a connection attempt fails and is about to be retried.
|
|
11
|
+
*/
|
|
12
|
+
onReconnect?: (payload: { gen: number; retries: number; delay: number; err: Error }) => void;
|
|
13
|
+
|
|
14
|
+
/**
|
|
15
|
+
* Called when a query fails with a retryable error and is about to be retried.
|
|
16
|
+
*/
|
|
17
|
+
onQueryRetry?: (payload: { retries: number; delay: number; err: Error }) => void;
|
|
18
|
+
|
|
19
|
+
/**
|
|
20
|
+
* Called when a heartbeat (lease renewal) succeeds.
|
|
21
|
+
*/
|
|
22
|
+
onHeartbeat?: (payload: { gen: number }) => void;
|
|
23
|
+
|
|
24
|
+
/**
|
|
25
|
+
* Called when a heartbeat fails (either transiently or permanently).
|
|
26
|
+
*/
|
|
27
|
+
onHeartbeatFail?: (payload: { gen: number; err: Error }) => void;
|
|
28
|
+
|
|
29
|
+
/**
|
|
30
|
+
* Called when the underlying pg.Client emits an 'error' event or ends unexpectedly.
|
|
31
|
+
* This is a critical signal that the connection is dead.
|
|
32
|
+
*/
|
|
33
|
+
onClientDead?: (payload: { source: 'error' | 'end'; err?: Error; meta?: { sqlstate?: string; [key: string]: any } }) => void;
|
|
34
|
+
|
|
35
|
+
/**
|
|
36
|
+
* Called immediately before a user query is executed. Useful for tracing start time.
|
|
37
|
+
*/
|
|
38
|
+
onQueryStart?: (payload: { args: any[]; startedAt: number }) => void;
|
|
39
|
+
|
|
40
|
+
/**
|
|
41
|
+
* Called immediately after a user query successfully completes.
|
|
42
|
+
*/
|
|
43
|
+
onQueryEnd?: (payload: { args: any[]; res: QueryResult<any>; duration: number }) => void;
|
|
44
|
+
|
|
45
|
+
/**
|
|
46
|
+
* Called when a user query fails (before retry logic kicks in).
|
|
47
|
+
*/
|
|
48
|
+
onQueryError?: (payload: { args: any[]; err: Error; duration: number }) => void;
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
export interface ServerlessClientConfig extends ClientConfig {
|
|
52
|
+
/**
|
|
53
|
+
* Shared secret for signing leases. Required if leaseMode is 'required'.
|
|
54
|
+
* Conceptually distinct from DB password. Must be at least 16 bytes.
|
|
55
|
+
*/
|
|
56
|
+
secret?: string;
|
|
57
|
+
|
|
58
|
+
/**
|
|
59
|
+
* Logical name of the service using this client. Used for advisory lock namespace.
|
|
60
|
+
* Defaults to AWS_LAMBDA_FUNCTION_NAME or 'sls_pg'.
|
|
61
|
+
*/
|
|
62
|
+
serviceName?: string;
|
|
63
|
+
|
|
64
|
+
/**
|
|
65
|
+
* Coordination mode.
|
|
66
|
+
* - 'required': throws if secret is missing (default).
|
|
67
|
+
* - 'optional': disables lease/reaper if secret is missing.
|
|
68
|
+
*/
|
|
69
|
+
leaseMode?: 'required' | 'optional';
|
|
70
|
+
|
|
71
|
+
/**
|
|
72
|
+
* Enable/disable the background connection reaper. Default: true.
|
|
73
|
+
*/
|
|
74
|
+
reaper?: boolean;
|
|
75
|
+
|
|
76
|
+
/**
|
|
77
|
+
* Probability (0.0 - 1.0) of running the reaper on connect.
|
|
78
|
+
* Alias for legacy 'connUtilization'. Default: 0.1.
|
|
79
|
+
*/
|
|
80
|
+
reaperRunProbability?: number;
|
|
81
|
+
|
|
82
|
+
/**
|
|
83
|
+
* Minimum time (ms) between reaper runs on this container. Default: 120000 (2m).
|
|
84
|
+
*/
|
|
85
|
+
reaperCooldownMs?: number;
|
|
86
|
+
|
|
87
|
+
/**
|
|
88
|
+
* How to handle reaper internal errors.
|
|
89
|
+
* - 'swallow': log and ignore (default).
|
|
90
|
+
* - 'throw': throw exception to the caller.
|
|
91
|
+
*/
|
|
92
|
+
reaperErrorMode?: 'swallow' | 'throw';
|
|
93
|
+
|
|
94
|
+
/**
|
|
95
|
+
* Minimum idle time (seconds) before a connection is considered a zombie candidate.
|
|
96
|
+
* Default: 180 (3m).
|
|
97
|
+
*/
|
|
98
|
+
minConnectionIdleTimeSec?: number;
|
|
99
|
+
|
|
100
|
+
/**
|
|
101
|
+
* Maximum number of zombie connections to kill in one reaper pass. Default: 1.
|
|
102
|
+
*/
|
|
103
|
+
maxIdleConnectionsToKill?: number;
|
|
104
|
+
|
|
105
|
+
/**
|
|
106
|
+
* Lease time-to-live in milliseconds. Default: 90000 (90s).
|
|
107
|
+
*/
|
|
108
|
+
leaseTtlMs?: number;
|
|
109
|
+
|
|
110
|
+
/**
|
|
111
|
+
* Time remaining (ms) where we soft-check lease renewal. Default: 30000.
|
|
112
|
+
*/
|
|
113
|
+
heartbeatSoftRemainingMs?: number;
|
|
114
|
+
|
|
115
|
+
/**
|
|
116
|
+
* Time remaining (ms) where we force-wait for lease renewal. Default: 5000.
|
|
117
|
+
*/
|
|
118
|
+
heartbeatHardWaitRemainingMs?: number;
|
|
119
|
+
|
|
120
|
+
/**
|
|
121
|
+
* Time (ms) to wait for set_config heartbeat query before timing out. Default: 2000.
|
|
122
|
+
*/
|
|
123
|
+
heartbeatTimeoutMs?: number;
|
|
124
|
+
|
|
125
|
+
/**
|
|
126
|
+
* Action on heartbeat failure.
|
|
127
|
+
* - 'reconnect': mark client dead and reconnect (safest for serverless).
|
|
128
|
+
* - 'swallow': log and ignore.
|
|
129
|
+
* - 'throw': throw error.
|
|
130
|
+
* Default: 'reconnect'.
|
|
131
|
+
*/
|
|
132
|
+
heartbeatErrorMode?: 'reconnect' | 'swallow' | 'throw';
|
|
133
|
+
|
|
134
|
+
/**
|
|
135
|
+
* Max time (ms) to spend retrying a connect operation. Default: 15000.
|
|
136
|
+
*/
|
|
137
|
+
maxConnectRetryTimeMs?: number;
|
|
138
|
+
|
|
139
|
+
/**
|
|
140
|
+
* Max time (ms) to spend retrying a query operation. Default: 15000.
|
|
141
|
+
*/
|
|
142
|
+
maxQueryRetryTimeMs?: number;
|
|
143
|
+
|
|
144
|
+
/**
|
|
145
|
+
* Default query_timeout (ms) passed to pg if not specified in individual query.
|
|
146
|
+
*/
|
|
147
|
+
defaultQueryTimeoutMs?: number;
|
|
148
|
+
|
|
149
|
+
/**
|
|
150
|
+
* Observability hooks.
|
|
151
|
+
*/
|
|
152
|
+
hooks?: ServerlessClientHooks;
|
|
153
|
+
|
|
154
|
+
/**
|
|
155
|
+
* Debug logging (console.log). Default: false.
|
|
156
|
+
*/
|
|
157
|
+
debug?: boolean;
|
|
158
|
+
|
|
159
|
+
/**
|
|
160
|
+
* Underlying pg driver instance (e.g. for X-Ray capture).
|
|
161
|
+
*/
|
|
162
|
+
library?: any;
|
|
163
|
+
|
|
164
|
+
// Legacy aliases
|
|
165
|
+
connUtilization?: number;
|
|
166
|
+
applicationName?: string;
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
export class ServerlessClient {
|
|
170
|
+
constructor(config: ServerlessClientConfig);
|
|
171
|
+
|
|
172
|
+
/**
|
|
173
|
+
* Establishes a connection (if not already connected) and acquires a lease.
|
|
174
|
+
*/
|
|
175
|
+
connect(): Promise<void>;
|
|
176
|
+
|
|
177
|
+
/**
|
|
178
|
+
* Executes a query with automatic retry and lease management.
|
|
179
|
+
*/
|
|
180
|
+
query<R extends QueryResultRow = any, I extends any[] = any[]>(
|
|
181
|
+
queryTextOrConfig: string | import('pg').QueryConfig<I>,
|
|
182
|
+
values?: I
|
|
183
|
+
): Promise<QueryResult<R>>;
|
|
184
|
+
|
|
185
|
+
/**
|
|
186
|
+
* Gracefully closes the connection.
|
|
187
|
+
*/
|
|
188
|
+
clean(): Promise<void>;
|
|
189
|
+
|
|
190
|
+
/**
|
|
191
|
+
* Alias for clean().
|
|
192
|
+
*/
|
|
193
|
+
end(): Promise<void>;
|
|
194
|
+
|
|
195
|
+
/**
|
|
196
|
+
* Returns the underlying pg.Client instance (if connected).
|
|
197
|
+
* Use with caution.
|
|
198
|
+
*/
|
|
199
|
+
getClient(): Client | null;
|
|
200
|
+
}
|
package/index.js
ADDED
package/lib/client.js
ADDED
|
@@ -0,0 +1,413 @@
|
|
|
1
|
+
const RetryStrategy = require('./retry')
|
|
2
|
+
const LeaseManager = require('./lease')
|
|
3
|
+
const Reaper = require('./reaper')
|
|
4
|
+
const crypto = require('crypto')
|
|
5
|
+
|
|
6
|
+
class ServerlessClient {
|
|
7
|
+
constructor(config = {}) {
|
|
8
|
+
this._config = config
|
|
9
|
+
this._library = config.library || require('pg')
|
|
10
|
+
this._client = null
|
|
11
|
+
this._isDead = false // Flag to force recreation
|
|
12
|
+
this._generation = 0
|
|
13
|
+
this._connectPromise = null
|
|
14
|
+
|
|
15
|
+
// Retry Strategy
|
|
16
|
+
this._retryStrategy = {
|
|
17
|
+
retries: config.retries ?? 3,
|
|
18
|
+
minBackoff: config.minBackoff ?? 100, // ms
|
|
19
|
+
maxBackoff: config.maxBackoff ?? 2000 // ms
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
// Lease/Reaper mode:
|
|
23
|
+
// - required: secret must be provided (safe distributed coordination)
|
|
24
|
+
// - optional: if secret missing, disable lease/reaper/heartbeat but client still works
|
|
25
|
+
this._leaseMode = config.leaseMode || 'required' // 'required' | 'optional'
|
|
26
|
+
|
|
27
|
+
// Reaper config (can be disabled if lease is disabled)
|
|
28
|
+
this._reaperEnabled = config.reaper !== false
|
|
29
|
+
this._strategy = {
|
|
30
|
+
// Probability of running a reaper pass on connect (0..1). Alias for backwards compatibility.
|
|
31
|
+
reaperRunProbability: config.reaperRunProbability ?? config.connUtilization ?? 0.1,
|
|
32
|
+
// Default should be minutes, not seconds, otherwise you create your own outages.
|
|
33
|
+
minConnIdleTimeSec: config.minConnectionIdleTimeSec || 180, // Default 3m
|
|
34
|
+
maxIdleConnectionsToKill: config.maxIdleConnectionsToKill || 10,
|
|
35
|
+
reaperErrorMode: config.reaperErrorMode || 'swallow', // 'swallow' | 'throw'
|
|
36
|
+
}
|
|
37
|
+
this._reaperCooldownMs = config.reaperCooldownMs ?? 30000
|
|
38
|
+
// Jittered Cooldown Base: Add random offset to avoid synchronized reapers
|
|
39
|
+
this._reaperBaseInterval = this._reaperCooldownMs + Math.random() * (this._reaperCooldownMs / 3)
|
|
40
|
+
this._reaperCurrentInterval = this._reaperBaseInterval
|
|
41
|
+
this._reaperNextRunAt = 0
|
|
42
|
+
|
|
43
|
+
// Setup Lease Manager
|
|
44
|
+
const serviceName = config.serviceName || process.env.AWS_LAMBDA_FUNCTION_NAME || 'sls_pg'
|
|
45
|
+
// 48-bit random instance id => exactly 8 base64url chars (no padding). Good entropy, tight budget.
|
|
46
|
+
const instanceId = crypto.randomBytes(6).toString('base64url')
|
|
47
|
+
const secret = config.secret // explicit coordination secret (not db password)
|
|
48
|
+
this._baseApplicationName =
|
|
49
|
+
(typeof config.application_name === 'string' && config.application_name) ||
|
|
50
|
+
(typeof config.applicationName === 'string' && config.applicationName) ||
|
|
51
|
+
serviceName
|
|
52
|
+
|
|
53
|
+
if (!secret) {
|
|
54
|
+
if (this._leaseMode === 'required') {
|
|
55
|
+
throw new Error('Missing config.secret (required for lease/reaper). Set leaseMode=\"optional\" to disable lease/reaper/heartbeat.')
|
|
56
|
+
}
|
|
57
|
+
this._leaseManager = null
|
|
58
|
+
this._reaperEnabled = false
|
|
59
|
+
} else {
|
|
60
|
+
this._leaseManager = new LeaseManager(serviceName, instanceId, secret)
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
// Heartbeat state
|
|
64
|
+
this._leaseExp = 0
|
|
65
|
+
this._heartbeatPromise = null
|
|
66
|
+
this._leaseTtlMs = config.leaseTtlMs ?? 90000
|
|
67
|
+
this._heartbeatSoftRemainingMs = config.heartbeatSoftRemainingMs ?? 30000
|
|
68
|
+
this._heartbeatHardWaitRemainingMs = config.heartbeatHardWaitRemainingMs ?? 5000
|
|
69
|
+
this._heartbeatErrorMode = config.heartbeatErrorMode || 'reconnect' // 'swallow' | 'reconnect' | 'throw'
|
|
70
|
+
this._heartbeatTimeoutMs = config.heartbeatTimeoutMs ?? 2000
|
|
71
|
+
this._defaultQueryTimeoutMs = config.defaultQueryTimeoutMs ?? 0
|
|
72
|
+
|
|
73
|
+
// Logging
|
|
74
|
+
this._logger = config.debug ? console.log : () => {}
|
|
75
|
+
this._hooks = config.hooks || {}
|
|
76
|
+
|
|
77
|
+
// Backoff state (decorrelated jitter needs previous delay)
|
|
78
|
+
this._connectPrevDelay = 0
|
|
79
|
+
this._queryPrevDelay = 0
|
|
80
|
+
this._maxConnectRetryTimeMs = config.maxConnectRetryTimeMs ?? 15000
|
|
81
|
+
this._maxQueryRetryTimeMs = config.maxQueryRetryTimeMs ?? 15000
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
_safeHook(name, payload) {
|
|
85
|
+
const fn = this._hooks && this._hooks[name]
|
|
86
|
+
if (typeof fn !== 'function') return
|
|
87
|
+
try { fn(payload) } catch (_) { /* never throw from hooks */ }
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
async connect() {
|
|
91
|
+
if (this._client && !this._isDead) return
|
|
92
|
+
if (this._connectPromise) return this._connectPromise
|
|
93
|
+
const gen = ++this._generation
|
|
94
|
+
this._connectPromise = (async () => {
|
|
95
|
+
try {
|
|
96
|
+
await this._connectWithRetry(gen)
|
|
97
|
+
} finally {
|
|
98
|
+
this._connectPromise = null
|
|
99
|
+
}
|
|
100
|
+
})()
|
|
101
|
+
return this._connectPromise
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
async _connectWithRetry(gen) {
|
|
105
|
+
const startedAt = Date.now()
|
|
106
|
+
let retries = 0
|
|
107
|
+
while (true) {
|
|
108
|
+
try {
|
|
109
|
+
await this._connect(gen)
|
|
110
|
+
this._connectPrevDelay = 0
|
|
111
|
+
this._safeHook('onConnect', { gen })
|
|
112
|
+
return // Success
|
|
113
|
+
} catch (err) {
|
|
114
|
+
if (this._maxConnectRetryTimeMs > 0 && (Date.now() - startedAt) > this._maxConnectRetryTimeMs) {
|
|
115
|
+
throw err
|
|
116
|
+
}
|
|
117
|
+
if (!RetryStrategy.isRetryable(err) || retries >= this._retryStrategy.retries) {
|
|
118
|
+
throw err
|
|
119
|
+
}
|
|
120
|
+
retries++
|
|
121
|
+
const delay = RetryStrategy.getBackoff(
|
|
122
|
+
this._retryStrategy.minBackoff,
|
|
123
|
+
this._retryStrategy.maxBackoff,
|
|
124
|
+
this._connectPrevDelay
|
|
125
|
+
)
|
|
126
|
+
this._connectPrevDelay = delay
|
|
127
|
+
this._safeHook('onReconnect', { gen, retries, delay, err })
|
|
128
|
+
this._logger(`Connect Retry ${retries}/${this._retryStrategy.retries} after ${delay}ms: ${err.message}`)
|
|
129
|
+
await new Promise(res => setTimeout(res, delay))
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
async _connect(gen) {
|
|
135
|
+
// Internal cleanup before creating a new client should NOT invalidate this generation.
|
|
136
|
+
await this._disposeClient('reconnect', { bumpGeneration: false })
|
|
137
|
+
|
|
138
|
+
// Generate initial lease
|
|
139
|
+
let appName = String(this._baseApplicationName || 'app').slice(0, 63)
|
|
140
|
+
if (this._leaseManager) {
|
|
141
|
+
this._leaseExp = Date.now() + this._leaseTtlMs
|
|
142
|
+
appName = this._leaseManager.generateAppName(this._leaseExp)
|
|
143
|
+
} else {
|
|
144
|
+
this._leaseExp = 0
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
const clientConfig = this._buildPgClientConfig({ application_name: appName })
|
|
148
|
+
|
|
149
|
+
const client = new this._library.Client(clientConfig)
|
|
150
|
+
|
|
151
|
+
// Crash Safety: Swallow errors to prevent Runtime.ExitError
|
|
152
|
+
client.on('error', (err) => this._markDeadAndDispose(client, err, 'error'))
|
|
153
|
+
// If connection ends, the client is not reusable.
|
|
154
|
+
client.on('end', () => this._markDeadAndDispose(client, null, 'end'))
|
|
155
|
+
|
|
156
|
+
await client.connect()
|
|
157
|
+
|
|
158
|
+
// Generation guard: do not resurrect if a newer generation started while we were connecting.
|
|
159
|
+
if (this._generation !== gen) {
|
|
160
|
+
try { await client.end() } catch (_) {}
|
|
161
|
+
return
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
this._client = client
|
|
165
|
+
this._isDead = false
|
|
166
|
+
|
|
167
|
+
// Run Reaper if enabled (async, best effort)
|
|
168
|
+
if (this._reaperEnabled) {
|
|
169
|
+
this._reap().catch(err => this._logger('Reap failed:', err.message))
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
// Best-effort connection cleanup
|
|
174
|
+
async _reap() {
|
|
175
|
+
// 1. Check Lease Manager
|
|
176
|
+
if (!this._leaseManager) return
|
|
177
|
+
|
|
178
|
+
// 2. Jittered Cooldown + Backoff
|
|
179
|
+
const now = Date.now()
|
|
180
|
+
if (now < this._reaperNextRunAt) return
|
|
181
|
+
|
|
182
|
+
// 3. Use CURRENT client (Single Connection Architecture)
|
|
183
|
+
const client = this._client
|
|
184
|
+
if (!client) return
|
|
185
|
+
|
|
186
|
+
try {
|
|
187
|
+
const result = await Reaper.reap(client, this._config, this._leaseManager, this._strategy, this._logger)
|
|
188
|
+
|
|
189
|
+
if (!result.locked) {
|
|
190
|
+
// Lock busy (someone else is reaping) -> Exponential Backoff
|
|
191
|
+
this._reaperCurrentInterval = Math.min(this._reaperCurrentInterval * 1.5, 600000) // max 10m
|
|
192
|
+
} else {
|
|
193
|
+
// Success (or just acquired lock) -> Reset to Base
|
|
194
|
+
this._reaperCurrentInterval = this._reaperBaseInterval
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
// Schedule next run with jitter
|
|
198
|
+
const jitter = Math.random() * (this._reaperCurrentInterval / 2)
|
|
199
|
+
this._reaperNextRunAt = now + this._reaperCurrentInterval + jitter
|
|
200
|
+
|
|
201
|
+
if (result.killed > 0) {
|
|
202
|
+
this._logger(`Reaper: Killed ${result.killed} zombies`)
|
|
203
|
+
}
|
|
204
|
+
} catch (err) {
|
|
205
|
+
this._logger('Reap failed:', err.message)
|
|
206
|
+
}
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
async query(...args) {
|
|
210
|
+
const startedAt = Date.now()
|
|
211
|
+
this._safeHook('onQueryStart', { args, startedAt })
|
|
212
|
+
let retries = 0
|
|
213
|
+
while (true) {
|
|
214
|
+
try {
|
|
215
|
+
if (!this._client || this._isDead) {
|
|
216
|
+
await this.connect()
|
|
217
|
+
} else {
|
|
218
|
+
// Check heartbeat. If lease expired -> WAIT. If OK -> async update.
|
|
219
|
+
await this._heartbeatIfNeeded()
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
const res = await this._client.query(...args)
|
|
223
|
+
this._queryPrevDelay = 0
|
|
224
|
+
this._safeHook('onQueryEnd', { args, res, duration: Date.now() - startedAt })
|
|
225
|
+
return res
|
|
226
|
+
|
|
227
|
+
} catch (err) {
|
|
228
|
+
// If error is NOT retryable, throw immediately
|
|
229
|
+
if (!RetryStrategy.isRetryable(err) || retries >= this._retryStrategy.retries) {
|
|
230
|
+
this._safeHook('onQueryError', { args, err, duration: Date.now() - startedAt })
|
|
231
|
+
throw err
|
|
232
|
+
}
|
|
233
|
+
if (this._maxQueryRetryTimeMs > 0 && (Date.now() - startedAt) > this._maxQueryRetryTimeMs) {
|
|
234
|
+
this._safeHook('onQueryError', { args, err, duration: Date.now() - startedAt })
|
|
235
|
+
throw err
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
retries++
|
|
239
|
+
const delay = RetryStrategy.getBackoff(
|
|
240
|
+
this._retryStrategy.minBackoff,
|
|
241
|
+
this._retryStrategy.maxBackoff,
|
|
242
|
+
this._queryPrevDelay
|
|
243
|
+
)
|
|
244
|
+
this._queryPrevDelay = delay
|
|
245
|
+
this._safeHook('onQueryRetry', { retries, delay, err })
|
|
246
|
+
this._logger(`Query Retry ${retries}/${this._retryStrategy.retries} after ${delay}ms: ${err.message}`)
|
|
247
|
+
|
|
248
|
+
// Force reconnect on next loop
|
|
249
|
+
this._isDead = true
|
|
250
|
+
await this._disposeClient('query_error')
|
|
251
|
+
|
|
252
|
+
await new Promise(res => setTimeout(res, delay))
|
|
253
|
+
}
|
|
254
|
+
}
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
async _heartbeatIfNeeded() {
|
|
258
|
+
if (!this._leaseManager) return
|
|
259
|
+
const gen = this._generation
|
|
260
|
+
const client = this._client
|
|
261
|
+
const now = Date.now()
|
|
262
|
+
const remaining = this._leaseExp - now
|
|
263
|
+
|
|
264
|
+
// If lease has > 30s remaining, we are safe. Do nothing.
|
|
265
|
+
if (remaining > this._heartbeatSoftRemainingMs) return
|
|
266
|
+
|
|
267
|
+
// If lease is expired or close to expiring (< 30s), we need update.
|
|
268
|
+
// Use promise deduplication to avoid thundering herd.
|
|
269
|
+
if (!this._heartbeatPromise) {
|
|
270
|
+
this._heartbeatPromise = this._performHeartbeat(gen, client).finally(() => {
|
|
271
|
+
this._heartbeatPromise = null
|
|
272
|
+
})
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
// If lease is ALREADY expired (or < 5s safety margin), we MUST wait for update.
|
|
276
|
+
if (remaining < this._heartbeatHardWaitRemainingMs) {
|
|
277
|
+
await this._heartbeatPromise
|
|
278
|
+
} else {
|
|
279
|
+
// Otherwise, let it update in background (fire-and-forget)
|
|
280
|
+
// This is safe because we still have > 5s lease
|
|
281
|
+
}
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
async _performHeartbeat(gen, client) {
|
|
285
|
+
try {
|
|
286
|
+
if (!this._leaseManager) return
|
|
287
|
+
if (!client || client !== this._client) return
|
|
288
|
+
if (this._generation !== gen) return
|
|
289
|
+
const newExp = Date.now() + this._leaseTtlMs
|
|
290
|
+
const appName = this._leaseManager.generateAppName(newExp)
|
|
291
|
+
// Never interpolate appName into SQL. Use bind parameters.
|
|
292
|
+
const heartbeatQuery = this._client.query(`SELECT set_config('application_name', $1, false)`, [appName])
|
|
293
|
+
const timeout = new Promise((_, reject) => {
|
|
294
|
+
const e = new Error(`Heartbeat timed out after ${this._heartbeatTimeoutMs}ms`)
|
|
295
|
+
e.code = 'ETIMEDOUT'
|
|
296
|
+
setTimeout(() => reject(e), this._heartbeatTimeoutMs)
|
|
297
|
+
})
|
|
298
|
+
const res = await Promise.race([heartbeatQuery, timeout])
|
|
299
|
+
if (!res) throw new Error('Heartbeat failed: no result')
|
|
300
|
+
// Only update local lease if DB update succeeded.
|
|
301
|
+
if (this._generation === gen && client === this._client) {
|
|
302
|
+
this._leaseExp = newExp
|
|
303
|
+
this._safeHook('onHeartbeat', { gen })
|
|
304
|
+
}
|
|
305
|
+
} catch (err) {
|
|
306
|
+
this._logger('Heartbeat failed:', err.message)
|
|
307
|
+
this._safeHook('onHeartbeatFail', { gen, err })
|
|
308
|
+
// If we're in hard-wait territory and heartbeat fails, do NOT keep a client that
|
|
309
|
+
// is now invisible to other reapers (lease can expire). Default action: reconnect.
|
|
310
|
+
if (this._heartbeatErrorMode === 'throw') throw err
|
|
311
|
+
if (this._heartbeatErrorMode === 'reconnect') {
|
|
312
|
+
// In soft zone we already decided heartbeat matters. Don't limp along into expiry.
|
|
313
|
+
// If it's retryable, definitely reconnect. If it's non-retryable, reconnect won't help,
|
|
314
|
+
// but it's still safer than staying in an inconsistent lease state.
|
|
315
|
+
this._isDead = true
|
|
316
|
+
await this._disposeClient('heartbeat_failed')
|
|
317
|
+
}
|
|
318
|
+
}
|
|
319
|
+
}
|
|
320
|
+
|
|
321
|
+
_buildPgClientConfig(overrides = {}) {
|
|
322
|
+
const clientConfig = { ...this._config, ...overrides }
|
|
323
|
+
if (!clientConfig.query_timeout && this._defaultQueryTimeoutMs > 0) {
|
|
324
|
+
clientConfig.query_timeout = this._defaultQueryTimeoutMs
|
|
325
|
+
}
|
|
326
|
+
// Strip internal fields (keep pg config clean and future-proof)
|
|
327
|
+
const internalKeys = [
|
|
328
|
+
'library',
|
|
329
|
+
'reaper',
|
|
330
|
+
'reaperRunProbability',
|
|
331
|
+
'reaperErrorMode',
|
|
332
|
+
'connUtilization', // legacy alias
|
|
333
|
+
'minConnectionIdleTimeSec',
|
|
334
|
+
'maxIdleConnectionsToKill',
|
|
335
|
+
'retries',
|
|
336
|
+
'minBackoff',
|
|
337
|
+
'maxBackoff',
|
|
338
|
+
'serviceName',
|
|
339
|
+
'secret',
|
|
340
|
+
'debug',
|
|
341
|
+
'leaseTtlMs',
|
|
342
|
+
'heartbeatSoftRemainingMs',
|
|
343
|
+
'heartbeatHardWaitRemainingMs',
|
|
344
|
+
'heartbeatErrorMode',
|
|
345
|
+
'heartbeatTimeoutMs',
|
|
346
|
+
'reaperCooldownMs',
|
|
347
|
+
'leaseMode',
|
|
348
|
+
'applicationName',
|
|
349
|
+
'defaultQueryTimeoutMs',
|
|
350
|
+
'hooks',
|
|
351
|
+
'maxConnectRetryTimeMs',
|
|
352
|
+
'maxQueryRetryTimeMs',
|
|
353
|
+
]
|
|
354
|
+
for (const k of internalKeys) delete clientConfig[k]
|
|
355
|
+
return clientConfig
|
|
356
|
+
}
|
|
357
|
+
|
|
358
|
+
async _disposeClient(reason, { bumpGeneration = true } = {}) {
|
|
359
|
+
if (bumpGeneration) this._generation++
|
|
360
|
+
const old = this._client
|
|
361
|
+
this._client = null
|
|
362
|
+
if (!old) return
|
|
363
|
+
try {
|
|
364
|
+
await old.end()
|
|
365
|
+
} catch (_) {
|
|
366
|
+
// ignore
|
|
367
|
+
}
|
|
368
|
+
}
|
|
369
|
+
|
|
370
|
+
_markDeadAndDispose(client, err, source) {
|
|
371
|
+
// Never throw from event handlers (Lambda crash safety).
|
|
372
|
+
this._isDead = true
|
|
373
|
+
// Invalidate any in-flight connect/heartbeat on older generations.
|
|
374
|
+
this._generation++
|
|
375
|
+
// Atomically detach the client if it is the current one.
|
|
376
|
+
if (this._client === client) {
|
|
377
|
+
this._client = null
|
|
378
|
+
}
|
|
379
|
+
if (err) {
|
|
380
|
+
const meta = {
|
|
381
|
+
code: err.code,
|
|
382
|
+
sqlstate: err.sqlstate,
|
|
383
|
+
errno: err.errno,
|
|
384
|
+
syscall: err.syscall,
|
|
385
|
+
address: err.address,
|
|
386
|
+
port: err.port,
|
|
387
|
+
severity: err.severity,
|
|
388
|
+
routine: err.routine,
|
|
389
|
+
}
|
|
390
|
+
this._logger(`WARN: pg client ${source} (swallowed):`, err.message || err.code, meta)
|
|
391
|
+
this._safeHook('onClientDead', { source, err, meta })
|
|
392
|
+
}
|
|
393
|
+
// Best-effort close; do not await.
|
|
394
|
+
try {
|
|
395
|
+
client.end().catch(() => {})
|
|
396
|
+
} catch (_) {}
|
|
397
|
+
}
|
|
398
|
+
|
|
399
|
+
async clean() {
|
|
400
|
+
// Try to close gracefully
|
|
401
|
+
await this._disposeClient('clean')
|
|
402
|
+
}
|
|
403
|
+
|
|
404
|
+
async end() {
|
|
405
|
+
return this.clean()
|
|
406
|
+
}
|
|
407
|
+
|
|
408
|
+
getClient() {
|
|
409
|
+
return this._client
|
|
410
|
+
}
|
|
411
|
+
}
|
|
412
|
+
|
|
413
|
+
module.exports = ServerlessClient
|
package/lib/lease.js
ADDED
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
const crypto = require('crypto')
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Lease Manager
|
|
5
|
+
* Handles generation and verification of signed application_name strings.
|
|
6
|
+
* Format: "s=SERVICE;i=INSTANCE_ID;e=TIMESTAMP;g=HMAC"
|
|
7
|
+
* Short keys used to fit within Postgres 63-byte limit.
|
|
8
|
+
*/
|
|
9
|
+
class LeaseManager {
|
|
10
|
+
static APP_NAME_MAX_LEN = 63
|
|
11
|
+
static SIG_LEN = 11 // 8 bytes -> base64url w/o padding => 11 chars
|
|
12
|
+
|
|
13
|
+
/**
|
|
14
|
+
* @param {string} serviceName - The logical name of the service
|
|
15
|
+
* @param {string} instanceId - Unique ID of this client instance
|
|
16
|
+
* @param {string} secret - Shared secret for HMAC (e.g. DB password)
|
|
17
|
+
*/
|
|
18
|
+
constructor(serviceName, instanceId, secret) {
|
|
19
|
+
// Keep instanceId compact and delimiter-safe.
|
|
20
|
+
this.instanceId = LeaseManager._sanitizeToken(instanceId || 'inst')
|
|
21
|
+
if (!secret) {
|
|
22
|
+
throw new Error('LeaseManager requires a non-empty secret')
|
|
23
|
+
}
|
|
24
|
+
if (Buffer.byteLength(String(secret), 'utf8') < 16) {
|
|
25
|
+
throw new Error('LeaseManager secret is too short; must be at least 16 bytes')
|
|
26
|
+
}
|
|
27
|
+
this.secret = secret
|
|
28
|
+
// Normalize serviceName so application_name ALWAYS fits into 63 bytes and is LIKE-safe.
|
|
29
|
+
this.serviceName = LeaseManager._normalizeServiceName(serviceName || 'sls_pg', this.instanceId)
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
/**
|
|
33
|
+
* Generates a signed application_name.
|
|
34
|
+
* @param {number} expirationTs - Unix timestamp (ms) when lease expires
|
|
35
|
+
* @returns {string} The formatted application_name
|
|
36
|
+
* @throws {Error} if generated name exceeds 63 bytes
|
|
37
|
+
*/
|
|
38
|
+
generateAppName(expirationTs) {
|
|
39
|
+
// Format: s=...;i=...;e=...
|
|
40
|
+
const base = `s=${this.serviceName};i=${this.instanceId};e=${expirationTs}`
|
|
41
|
+
const sig = this._sign(base)
|
|
42
|
+
const result = `${base};g=${sig}`
|
|
43
|
+
|
|
44
|
+
// Hard guarantee: never exceed Postgres 63-byte truncation limit.
|
|
45
|
+
// If this fires, our normalization math is wrong.
|
|
46
|
+
if (result.length > LeaseManager.APP_NAME_MAX_LEN) {
|
|
47
|
+
throw new Error(`BUG: application_name too long (${result.length} > ${LeaseManager.APP_NAME_MAX_LEN}): ${result}`)
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
return result
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
/**
|
|
54
|
+
* Parses an application_name and verifies its signature and expiration.
|
|
55
|
+
* @param {string} appNameString
|
|
56
|
+
* @returns {Object|null} Parsed info if valid format & signature, else null
|
|
57
|
+
*/
|
|
58
|
+
parseAndVerify(appNameString) {
|
|
59
|
+
if (!appNameString) return null
|
|
60
|
+
|
|
61
|
+
// Regex for: s=...;i=...;e=...;g=...
|
|
62
|
+
const match = appNameString.match(/^s=([^;]+);i=([^;]+);e=([^;]+);g=([^;]+)$/)
|
|
63
|
+
if (!match) return null
|
|
64
|
+
|
|
65
|
+
const [full, s, i, eStr, g] = match
|
|
66
|
+
const base = `s=${s};i=${i};e=${eStr}`
|
|
67
|
+
const expectedSig = this._sign(base)
|
|
68
|
+
|
|
69
|
+
// Timing-safe signature comparison
|
|
70
|
+
const bufG = Buffer.from(g, 'utf8')
|
|
71
|
+
const bufExpected = Buffer.from(expectedSig, 'utf8')
|
|
72
|
+
if (bufG.length !== bufExpected.length || !crypto.timingSafeEqual(bufG, bufExpected)) return null
|
|
73
|
+
|
|
74
|
+
const exp = parseInt(eStr, 10)
|
|
75
|
+
if (!Number.isFinite(exp)) return null
|
|
76
|
+
|
|
77
|
+
return {
|
|
78
|
+
svc: s,
|
|
79
|
+
inst: i,
|
|
80
|
+
exp,
|
|
81
|
+
isExpired: Date.now() > exp,
|
|
82
|
+
isValidSignature: true
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
_sign(text) {
|
|
87
|
+
// Compact signature: take first 8 bytes of HMAC and encode as base64url (11 chars, no padding)
|
|
88
|
+
const buf = crypto.createHmac('sha256', this.secret).update(text).digest()
|
|
89
|
+
return buf.subarray(0, 8).toString('base64url')
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
static _sanitizeToken(s) {
|
|
93
|
+
// Remove delimiter characters used by our format and LIKE wildcards.
|
|
94
|
+
// Keep it deterministic and log-friendly.
|
|
95
|
+
return String(s).replace(/[^a-zA-Z0-9:_-]/g, '_')
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
static _normalizeServiceName(serviceName, instanceId) {
|
|
99
|
+
const original = String(serviceName || 'sls_pg')
|
|
100
|
+
const raw = LeaseManager._sanitizeToken(original)
|
|
101
|
+
const inst = LeaseManager._sanitizeToken(instanceId || 'inst')
|
|
102
|
+
|
|
103
|
+
// Total format:
|
|
104
|
+
// s=<svc>;i=<inst>;e=<13digits>;g=<sig>
|
|
105
|
+
// Fixed overhead excluding <svc>: "s="(2) + ";i="(3) + inst + ";e="(3) + 13 + ";g="(3) + SIG_LEN
|
|
106
|
+
// => 24 + instLen + SIG_LEN
|
|
107
|
+
const overhead = 24 + inst.length + LeaseManager.SIG_LEN
|
|
108
|
+
const maxSvcLen = Math.max(1, LeaseManager.APP_NAME_MAX_LEN - overhead)
|
|
109
|
+
|
|
110
|
+
// If sanitization changed the name, we must add a hash suffix to avoid accidental collisions
|
|
111
|
+
// (different originals mapping to the same sanitized token).
|
|
112
|
+
const needsHash = raw !== original
|
|
113
|
+
if (!needsHash && raw.length <= maxSvcLen) return raw
|
|
114
|
+
|
|
115
|
+
// Truncate with a short hash suffix to preserve uniqueness.
|
|
116
|
+
const hash = crypto.createHash('sha1').update(original).digest('hex').slice(0, 8)
|
|
117
|
+
if (maxSvcLen <= hash.length) return hash.slice(0, maxSvcLen)
|
|
118
|
+
|
|
119
|
+
const prefixLen = maxSvcLen - (hash.length + 1)
|
|
120
|
+
return `${raw.slice(0, prefixLen)}-${hash}`
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
module.exports = LeaseManager
|
package/lib/reaper.js
ADDED
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Connection Reaper
|
|
3
|
+
* Safely kills zombie connections using Advisory Locks and Signed Leases.
|
|
4
|
+
*/
|
|
5
|
+
class Reaper {
|
|
6
|
+
// Namespace advisory locks to avoid collisions with other apps in same DB.
|
|
7
|
+
// 0x50474151 corresponds to "PGAQ" (pg-aequor) in ASCII.
|
|
8
|
+
static LOCK_NS = 0x50474151
|
|
9
|
+
/**
|
|
10
|
+
* Runs the reaping process.
|
|
11
|
+
* @param {Object} client - The connected pg.Client
|
|
12
|
+
* @param {Object} config - Config including database name
|
|
13
|
+
* @param {LeaseManager} leaseManager - For verifying leases
|
|
14
|
+
* @param {Object} strategy - { minConnIdleTimeSec, connUtilization }
|
|
15
|
+
* @param {Function} logger
|
|
16
|
+
*/
|
|
17
|
+
static async reap(client, config, leaseManager, strategy, logger) {
|
|
18
|
+
const serviceName = leaseManager.serviceName
|
|
19
|
+
let locked = false
|
|
20
|
+
|
|
21
|
+
// 1. Acquire Advisory Lock (Non-blocking)
|
|
22
|
+
// Use Postgres native hashtext() to get a consistent 64-bit lock ID from the service string.
|
|
23
|
+
// This avoids JS-side 32-bit hash collisions.
|
|
24
|
+
|
|
25
|
+
try {
|
|
26
|
+
const lockRes = await client.query(
|
|
27
|
+
`SELECT pg_try_advisory_lock($1::int, hashtext($2)) as locked`,
|
|
28
|
+
[Reaper.LOCK_NS, serviceName]
|
|
29
|
+
)
|
|
30
|
+
if (lockRes.rows[0].locked !== true) {
|
|
31
|
+
logger(`Reaper[pid=${process.pid}]: Lock busy, skipping`)
|
|
32
|
+
return { locked: false, killed: 0 }
|
|
33
|
+
}
|
|
34
|
+
locked = true
|
|
35
|
+
|
|
36
|
+
// 2. Scan for zombies
|
|
37
|
+
const minIdle = strategy.minConnIdleTimeSec
|
|
38
|
+
|
|
39
|
+
// Fetch idle connections that look like our service
|
|
40
|
+
// Exclude self (pg_backend_pid())
|
|
41
|
+
// Optimization: Filter by application_name prefix in SQL to reduce result set size.
|
|
42
|
+
const query = `
|
|
43
|
+
SELECT pid, application_name, extract(epoch from (now() - state_change)) as idle_time
|
|
44
|
+
FROM pg_stat_activity
|
|
45
|
+
WHERE datname = current_database()
|
|
46
|
+
AND state = 'idle'
|
|
47
|
+
AND pid <> pg_backend_pid()
|
|
48
|
+
AND application_name LIKE $1 || '%'
|
|
49
|
+
`
|
|
50
|
+
|
|
51
|
+
// Correctness > optimization: do not prefilter using untrusted application_name.
|
|
52
|
+
const res = await client.query(query, [`s=${leaseManager.serviceName};`])
|
|
53
|
+
const candidates = []
|
|
54
|
+
|
|
55
|
+
for (const row of res.rows) {
|
|
56
|
+
if (row.idle_time < minIdle) continue
|
|
57
|
+
|
|
58
|
+
const lease = leaseManager.parseAndVerify(row.application_name)
|
|
59
|
+
|
|
60
|
+
if (!lease) {
|
|
61
|
+
// Invalid format or signature -> Unsafe to touch (could be neighbor with different secret)
|
|
62
|
+
continue
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
if (lease.isExpired) {
|
|
66
|
+
// Valid signature, but expired -> ZOMBIE
|
|
67
|
+
candidates.push({
|
|
68
|
+
pid: row.pid,
|
|
69
|
+
idle_time: Number(row.idle_time) || 0,
|
|
70
|
+
exp: lease.exp,
|
|
71
|
+
})
|
|
72
|
+
}
|
|
73
|
+
// else: Lease valid -> ACTIVE neighbor -> Do not kill
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
// 3. Terminate zombies
|
|
77
|
+
if (candidates.length > 0) {
|
|
78
|
+
// Deterministic: kill the "stale-est" first.
|
|
79
|
+
// Primary: oldest expiration (smallest exp) -> longest expired.
|
|
80
|
+
// Secondary: largest idle_time.
|
|
81
|
+
candidates.sort((a, b) => (a.exp - b.exp) || (b.idle_time - a.idle_time) || (a.pid - b.pid))
|
|
82
|
+
|
|
83
|
+
const limit = Math.max(1, Number(strategy.maxIdleConnectionsToKill) || 1)
|
|
84
|
+
const selected = candidates.slice(0, limit)
|
|
85
|
+
const pidsToKill = selected.map(x => x.pid)
|
|
86
|
+
|
|
87
|
+
// Log a compact reason line for debugging.
|
|
88
|
+
const meta = selected.map(x => `pid=${x.pid},idle=${Math.round(x.idle_time)}s,expDelta=${Math.round((Date.now() - x.exp) / 1000)}s`).join(' | ')
|
|
89
|
+
logger(`Reaper[pid=${process.pid}]: Killing ${pidsToKill.length} zombies: ${meta}`)
|
|
90
|
+
// Cast to int[] to be safe
|
|
91
|
+
await client.query(`SELECT pg_terminate_backend(pid) FROM pg_stat_activity WHERE pid = ANY($1::int[])`, [pidsToKill])
|
|
92
|
+
return { locked: true, killed: pidsToKill.length }
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
return { locked: true, killed: 0 }
|
|
96
|
+
|
|
97
|
+
} catch (err) {
|
|
98
|
+
logger(`Reaper[pid=${process.pid}] failed:`, err && (err.stack || err.message || err))
|
|
99
|
+
if (strategy && strategy.reaperErrorMode === 'throw') throw err
|
|
100
|
+
return { locked: false, killed: 0, error: err }
|
|
101
|
+
} finally {
|
|
102
|
+
// 4. Release Lock
|
|
103
|
+
if (locked) {
|
|
104
|
+
try {
|
|
105
|
+
await client.query(
|
|
106
|
+
`SELECT pg_advisory_unlock($1::int, hashtext($2))`,
|
|
107
|
+
[Reaper.LOCK_NS, serviceName]
|
|
108
|
+
)
|
|
109
|
+
} catch (_) { /* ignore unlock error */ }
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
// Removed _hashString method as we use DB-side hashtext()
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
module.exports = Reaper
|
|
117
|
+
|
package/lib/retry.js
ADDED
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Retry Strategy & Error Classification
|
|
3
|
+
* Implements "Decorrelated Jitter" and safe error analysis.
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
class RetryStrategy {
|
|
7
|
+
/**
|
|
8
|
+
* Determines if an error is a "dead connection" error that warrants a retry/reconnect.
|
|
9
|
+
* @param {Error} err
|
|
10
|
+
* @returns {boolean}
|
|
11
|
+
*/
|
|
12
|
+
static isRetryable(err) {
|
|
13
|
+
const code = err && err.code
|
|
14
|
+
const msg = (err && err.message) || ''
|
|
15
|
+
const sqlstate = (err && (err.code || err.sqlstate)) || null
|
|
16
|
+
|
|
17
|
+
// 1) Node.js socket / transport codes (not SQLSTATE)
|
|
18
|
+
if (code === 'ECONNRESET' || code === 'EPIPE' || code === 'ETIMEDOUT' || code === 'ECONNREFUSED') return true
|
|
19
|
+
if (code === 'ENETUNREACH' || code === 'EHOSTUNREACH' || code === 'EAI_AGAIN') return true
|
|
20
|
+
if (code === 'ECONNABORTED' || code === 'EADDRINUSE') return true
|
|
21
|
+
|
|
22
|
+
// 2) SQLSTATE-first (stable)
|
|
23
|
+
// Class 08 — connection exception
|
|
24
|
+
if (typeof sqlstate === 'string' && sqlstate.length === 5 && sqlstate.startsWith('08')) return true
|
|
25
|
+
|
|
26
|
+
// Admin / crash / cannot continue
|
|
27
|
+
if (sqlstate === '57P01' || sqlstate === '57P02' || sqlstate === '57P03') return true
|
|
28
|
+
|
|
29
|
+
// Too many connections (can be transient under spiky concurrency)
|
|
30
|
+
if (sqlstate === '53300') return true
|
|
31
|
+
|
|
32
|
+
// Optional: transient concurrency failures (only safe if queries are idempotent)
|
|
33
|
+
// Keep disabled for now to avoid duplicating non-idempotent writes.
|
|
34
|
+
// if (sqlstate === '40001' || sqlstate === '40P01') return true
|
|
35
|
+
|
|
36
|
+
// 3) LAST-RESORT message fallbacks (keep minimal; remove over time)
|
|
37
|
+
if (msg.includes('Connection terminated unexpectedly')) return true
|
|
38
|
+
if (msg.includes('sorry, too many clients already')) return true
|
|
39
|
+
|
|
40
|
+
return false
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
/**
|
|
44
|
+
* Calculates backoff delay using "Decorrelated Jitter".
|
|
45
|
+
* sleep = min(cap, random(base, sleep * 3))
|
|
46
|
+
* @param {number} baseMs - Minimum wait
|
|
47
|
+
* @param {number} capMs - Maximum wait
|
|
48
|
+
* @param {number} previousDelay - The delay used in the previous attempt (or 0)
|
|
49
|
+
* @returns {number} ms to sleep
|
|
50
|
+
*/
|
|
51
|
+
static getBackoff(baseMs, capMs, previousDelay) {
|
|
52
|
+
const prev = previousDelay || baseMs
|
|
53
|
+
const randRange = (min, max) => Math.floor(Math.random() * (max - min + 1)) + min
|
|
54
|
+
return Math.min(capMs, randRange(baseMs, prev * 3))
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
module.exports = RetryStrategy
|
|
59
|
+
|
package/package.json
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "pg-aequor",
|
|
3
|
+
"version": "0.1.0",
|
|
4
|
+
"description": "Crash-safe, coordination-aware PostgreSQL client for Serverless environments",
|
|
5
|
+
"main": "./index.js",
|
|
6
|
+
"types": "index.d.ts",
|
|
7
|
+
"author": "dimaq12",
|
|
8
|
+
"license": "ISC",
|
|
9
|
+
"repository": {
|
|
10
|
+
"type": "git",
|
|
11
|
+
"url": "git+ssh://git@github.com/dimaq12/pg-aequor.git"
|
|
12
|
+
},
|
|
13
|
+
"bugs": {
|
|
14
|
+
"url": "https://github.com/dimaq12/pg-aequor/issues"
|
|
15
|
+
},
|
|
16
|
+
"homepage": "https://github.com/dimaq12/pg-aequor#readme",
|
|
17
|
+
"publishConfig": {
|
|
18
|
+
"registry": "https://registry.npmjs.org/"
|
|
19
|
+
},
|
|
20
|
+
"sideEffects": false,
|
|
21
|
+
"engines": {
|
|
22
|
+
"node": ">=18"
|
|
23
|
+
},
|
|
24
|
+
"exports": {
|
|
25
|
+
".": {
|
|
26
|
+
"types": "./index.d.ts",
|
|
27
|
+
"require": "./index.js"
|
|
28
|
+
}
|
|
29
|
+
},
|
|
30
|
+
"files": [
|
|
31
|
+
"index.js",
|
|
32
|
+
"index.d.ts",
|
|
33
|
+
"lib/",
|
|
34
|
+
"README.md",
|
|
35
|
+
"LICENSE"
|
|
36
|
+
],
|
|
37
|
+
"scripts": {
|
|
38
|
+
"test": "node --test"
|
|
39
|
+
},
|
|
40
|
+
"peerDependencies": {
|
|
41
|
+
"pg": "^8.11.0"
|
|
42
|
+
},
|
|
43
|
+
"devDependencies": {
|
|
44
|
+
"pg": "^8.11.0"
|
|
45
|
+
}
|
|
46
|
+
}
|