@powersync/service-module-postgres 0.16.9 → 0.16.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +20 -0
- package/dist/replication/ConnectionManagerFactory.js +8 -4
- package/dist/replication/ConnectionManagerFactory.js.map +1 -1
- package/dist/replication/PgManager.d.ts +5 -1
- package/dist/replication/PgManager.js +15 -5
- package/dist/replication/PgManager.js.map +1 -1
- package/dist/replication/PostgresErrorRateLimiter.js +6 -1
- package/dist/replication/PostgresErrorRateLimiter.js.map +1 -1
- package/dist/replication/WalStream.d.ts +2 -5
- package/dist/replication/WalStream.js +64 -95
- package/dist/replication/WalStream.js.map +1 -1
- package/dist/replication/WalStreamReplicationJob.d.ts +1 -2
- package/dist/replication/WalStreamReplicationJob.js +47 -70
- package/dist/replication/WalStreamReplicationJob.js.map +1 -1
- package/package.json +5 -5
- package/src/replication/ConnectionManagerFactory.ts +9 -4
- package/src/replication/PgManager.ts +19 -5
- package/src/replication/PostgresErrorRateLimiter.ts +5 -1
- package/src/replication/WalStream.ts +71 -116
- package/src/replication/WalStreamReplicationJob.ts +48 -68
- package/test/src/wal_stream.test.ts +15 -7
- package/test/src/wal_stream_utils.ts +23 -4
- package/tsconfig.tsbuildinfo +1 -1
|
@@ -1,21 +1,15 @@
|
|
|
1
|
-
import { container, logger
|
|
1
|
+
import { container, logger } from '@powersync/lib-services-framework';
|
|
2
2
|
import { MissingReplicationSlotError, sendKeepAlive, WalStream } from './WalStream.js';
|
|
3
3
|
import { replication } from '@powersync/service-core';
|
|
4
4
|
import { getApplicationName } from '../utils/application-name.js';
|
|
5
5
|
export class WalStreamReplicationJob extends replication.AbstractReplicationJob {
|
|
6
6
|
connectionFactory;
|
|
7
|
-
connectionManager;
|
|
7
|
+
connectionManager = null;
|
|
8
8
|
lastStream = null;
|
|
9
9
|
constructor(options) {
|
|
10
10
|
super(options);
|
|
11
11
|
this.logger = logger.child({ prefix: `[${this.slotName}] ` });
|
|
12
12
|
this.connectionFactory = options.connectionFactory;
|
|
13
|
-
this.connectionManager = this.connectionFactory.create({
|
|
14
|
-
// Pool connections are only used intermittently.
|
|
15
|
-
idleTimeout: 30_000,
|
|
16
|
-
maxSize: 2,
|
|
17
|
-
applicationName: getApplicationName()
|
|
18
|
-
});
|
|
19
13
|
}
|
|
20
14
|
/**
|
|
21
15
|
* Postgres on RDS writes performs a WAL checkpoint every 5 minutes by default, which creates a new 64MB file.
|
|
@@ -30,11 +24,13 @@ export class WalStreamReplicationJob extends replication.AbstractReplicationJob
|
|
|
30
24
|
* **This may be a bug in pgwire or how we're using it.
|
|
31
25
|
*/
|
|
32
26
|
async keepAlive() {
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
27
|
+
if (this.connectionManager) {
|
|
28
|
+
try {
|
|
29
|
+
await sendKeepAlive(this.connectionManager.pool);
|
|
30
|
+
}
|
|
31
|
+
catch (e) {
|
|
32
|
+
this.logger.warn(`KeepAlive failed, unable to post to WAL`, e);
|
|
33
|
+
}
|
|
38
34
|
}
|
|
39
35
|
}
|
|
40
36
|
get slotName() {
|
|
@@ -42,33 +38,55 @@ export class WalStreamReplicationJob extends replication.AbstractReplicationJob
|
|
|
42
38
|
}
|
|
43
39
|
async replicate() {
|
|
44
40
|
try {
|
|
45
|
-
await this.
|
|
41
|
+
await this.replicateOnce();
|
|
46
42
|
}
|
|
47
43
|
catch (e) {
|
|
48
44
|
// Fatal exception
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
45
|
+
if (!this.isStopped) {
|
|
46
|
+
// Ignore aborted errors
|
|
47
|
+
this.logger.error(`Replication error`, e);
|
|
48
|
+
if (e.cause != null) {
|
|
49
|
+
// Example:
|
|
50
|
+
// PgError.conn_ended: Unable to do postgres query on ended connection
|
|
51
|
+
// at PgConnection.stream (file:///.../powersync/node_modules/.pnpm/github.com+kagis+pgwire@f1cb95f9a0f42a612bb5a6b67bb2eb793fc5fc87/node_modules/pgwire/mod.js:315:13)
|
|
52
|
+
// at stream.next (<anonymous>)
|
|
53
|
+
// at PgResult.fromStream (file:///.../powersync/node_modules/.pnpm/github.com+kagis+pgwire@f1cb95f9a0f42a612bb5a6b67bb2eb793fc5fc87/node_modules/pgwire/mod.js:1174:22)
|
|
54
|
+
// at PgConnection.query (file:///.../powersync/node_modules/.pnpm/github.com+kagis+pgwire@f1cb95f9a0f42a612bb5a6b67bb2eb793fc5fc87/node_modules/pgwire/mod.js:311:21)
|
|
55
|
+
// at WalStream.startInitialReplication (file:///.../powersync/powersync-service/lib/replication/WalStream.js:266:22)
|
|
56
|
+
// ...
|
|
57
|
+
// cause: TypeError: match is not iterable
|
|
58
|
+
// at timestamptzToSqlite (file:///.../powersync/packages/jpgwire/dist/util.js:140:50)
|
|
59
|
+
// at PgType.decode (file:///.../powersync/packages/jpgwire/dist/pgwire_types.js:25:24)
|
|
60
|
+
// at PgConnection._recvDataRow (file:///.../powersync/packages/jpgwire/dist/util.js:88:22)
|
|
61
|
+
// at PgConnection._recvMessages (file:///.../powersync/node_modules/.pnpm/github.com+kagis+pgwire@f1cb95f9a0f42a612bb5a6b67bb2eb793fc5fc87/node_modules/pgwire/mod.js:656:30)
|
|
62
|
+
// at PgConnection._ioloopAttempt (file:///.../powersync/node_modules/.pnpm/github.com+kagis+pgwire@f1cb95f9a0f42a612bb5a6b67bb2eb793fc5fc87/node_modules/pgwire/mod.js:563:20)
|
|
63
|
+
// at process.processTicksAndRejections (node:internal/process/task_queues:95:5)
|
|
64
|
+
// at async PgConnection._ioloop (file:///.../powersync/node_modules/.pnpm/github.com+kagis+pgwire@f1cb95f9a0f42a612bb5a6b67bb2eb793fc5fc87/node_modules/pgwire/mod.js:517:14),
|
|
65
|
+
// [Symbol(pg.ErrorCode)]: 'conn_ended',
|
|
66
|
+
// [Symbol(pg.ErrorResponse)]: undefined
|
|
67
|
+
// }
|
|
68
|
+
// Without this additional log, the cause would not be visible in the logs.
|
|
69
|
+
this.logger.error(`cause`, e.cause);
|
|
52
70
|
}
|
|
53
|
-
|
|
54
|
-
|
|
71
|
+
// Report the error if relevant, before retrying
|
|
72
|
+
container.reporter.captureException(e, {
|
|
73
|
+
metadata: {
|
|
74
|
+
replication_slot: this.slotName
|
|
75
|
+
}
|
|
76
|
+
});
|
|
77
|
+
// This sets the retry delay
|
|
78
|
+
this.rateLimiter.reportError(e);
|
|
79
|
+
}
|
|
55
80
|
if (e instanceof MissingReplicationSlotError) {
|
|
56
81
|
// This stops replication on this slot and restarts with a new slot
|
|
57
82
|
await this.options.storage.factory.restartReplication(this.storage.group_id);
|
|
58
83
|
}
|
|
84
|
+
// No need to rethrow - the error is already logged, and retry behavior is the same on error
|
|
59
85
|
}
|
|
60
86
|
finally {
|
|
61
87
|
this.abortController.abort();
|
|
62
88
|
}
|
|
63
89
|
}
|
|
64
|
-
async replicateLoop() {
|
|
65
|
-
while (!this.isStopped) {
|
|
66
|
-
await this.replicateOnce();
|
|
67
|
-
if (!this.isStopped) {
|
|
68
|
-
await new Promise((resolve) => setTimeout(resolve, 5000));
|
|
69
|
-
}
|
|
70
|
-
}
|
|
71
|
-
}
|
|
72
90
|
async replicateOnce() {
|
|
73
91
|
// New connections on every iteration (every error with retry),
|
|
74
92
|
// otherwise we risk repeating errors related to the connection,
|
|
@@ -79,6 +97,7 @@ export class WalStreamReplicationJob extends replication.AbstractReplicationJob
|
|
|
79
97
|
maxSize: 2,
|
|
80
98
|
applicationName: getApplicationName()
|
|
81
99
|
});
|
|
100
|
+
this.connectionManager = connectionManager;
|
|
82
101
|
try {
|
|
83
102
|
await this.rateLimiter?.waitUntilAllowed({ signal: this.abortController.signal });
|
|
84
103
|
if (this.isStopped) {
|
|
@@ -94,50 +113,8 @@ export class WalStreamReplicationJob extends replication.AbstractReplicationJob
|
|
|
94
113
|
this.lastStream = stream;
|
|
95
114
|
await stream.replicate();
|
|
96
115
|
}
|
|
97
|
-
catch (e) {
|
|
98
|
-
if (this.isStopped && e instanceof ReplicationAbortedError) {
|
|
99
|
-
// Ignore aborted errors
|
|
100
|
-
return;
|
|
101
|
-
}
|
|
102
|
-
this.logger.error(`Replication error`, e);
|
|
103
|
-
if (e.cause != null) {
|
|
104
|
-
// Example:
|
|
105
|
-
// PgError.conn_ended: Unable to do postgres query on ended connection
|
|
106
|
-
// at PgConnection.stream (file:///.../powersync/node_modules/.pnpm/github.com+kagis+pgwire@f1cb95f9a0f42a612bb5a6b67bb2eb793fc5fc87/node_modules/pgwire/mod.js:315:13)
|
|
107
|
-
// at stream.next (<anonymous>)
|
|
108
|
-
// at PgResult.fromStream (file:///.../powersync/node_modules/.pnpm/github.com+kagis+pgwire@f1cb95f9a0f42a612bb5a6b67bb2eb793fc5fc87/node_modules/pgwire/mod.js:1174:22)
|
|
109
|
-
// at PgConnection.query (file:///.../powersync/node_modules/.pnpm/github.com+kagis+pgwire@f1cb95f9a0f42a612bb5a6b67bb2eb793fc5fc87/node_modules/pgwire/mod.js:311:21)
|
|
110
|
-
// at WalStream.startInitialReplication (file:///.../powersync/powersync-service/lib/replication/WalStream.js:266:22)
|
|
111
|
-
// ...
|
|
112
|
-
// cause: TypeError: match is not iterable
|
|
113
|
-
// at timestamptzToSqlite (file:///.../powersync/packages/jpgwire/dist/util.js:140:50)
|
|
114
|
-
// at PgType.decode (file:///.../powersync/packages/jpgwire/dist/pgwire_types.js:25:24)
|
|
115
|
-
// at PgConnection._recvDataRow (file:///.../powersync/packages/jpgwire/dist/util.js:88:22)
|
|
116
|
-
// at PgConnection._recvMessages (file:///.../powersync/node_modules/.pnpm/github.com+kagis+pgwire@f1cb95f9a0f42a612bb5a6b67bb2eb793fc5fc87/node_modules/pgwire/mod.js:656:30)
|
|
117
|
-
// at PgConnection._ioloopAttempt (file:///.../powersync/node_modules/.pnpm/github.com+kagis+pgwire@f1cb95f9a0f42a612bb5a6b67bb2eb793fc5fc87/node_modules/pgwire/mod.js:563:20)
|
|
118
|
-
// at process.processTicksAndRejections (node:internal/process/task_queues:95:5)
|
|
119
|
-
// at async PgConnection._ioloop (file:///.../powersync/node_modules/.pnpm/github.com+kagis+pgwire@f1cb95f9a0f42a612bb5a6b67bb2eb793fc5fc87/node_modules/pgwire/mod.js:517:14),
|
|
120
|
-
// [Symbol(pg.ErrorCode)]: 'conn_ended',
|
|
121
|
-
// [Symbol(pg.ErrorResponse)]: undefined
|
|
122
|
-
// }
|
|
123
|
-
// Without this additional log, the cause would not be visible in the logs.
|
|
124
|
-
this.logger.error(`cause`, e.cause);
|
|
125
|
-
}
|
|
126
|
-
if (e instanceof MissingReplicationSlotError) {
|
|
127
|
-
throw e;
|
|
128
|
-
}
|
|
129
|
-
else {
|
|
130
|
-
// Report the error if relevant, before retrying
|
|
131
|
-
container.reporter.captureException(e, {
|
|
132
|
-
metadata: {
|
|
133
|
-
replication_slot: this.slotName
|
|
134
|
-
}
|
|
135
|
-
});
|
|
136
|
-
// This sets the retry delay
|
|
137
|
-
this.rateLimiter?.reportError(e);
|
|
138
|
-
}
|
|
139
|
-
}
|
|
140
116
|
finally {
|
|
117
|
+
this.connectionManager = null;
|
|
141
118
|
await connectionManager.end();
|
|
142
119
|
}
|
|
143
120
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"WalStreamReplicationJob.js","sourceRoot":"","sources":["../../src/replication/WalStreamReplicationJob.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,MAAM,
|
|
1
|
+
{"version":3,"file":"WalStreamReplicationJob.js","sourceRoot":"","sources":["../../src/replication/WalStreamReplicationJob.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,MAAM,EAA2B,MAAM,mCAAmC,CAAC;AAE/F,OAAO,EAAE,2BAA2B,EAAE,aAAa,EAAE,SAAS,EAAE,MAAM,gBAAgB,CAAC;AAEvF,OAAO,EAAE,WAAW,EAAE,MAAM,yBAAyB,CAAC;AAEtD,OAAO,EAAE,kBAAkB,EAAE,MAAM,8BAA8B,CAAC;AAMlE,MAAM,OAAO,uBAAwB,SAAQ,WAAW,CAAC,sBAAsB;IACrE,iBAAiB,CAA2B;IAC5C,iBAAiB,GAAqB,IAAI,CAAC;IAC3C,UAAU,GAAqB,IAAI,CAAC;IAE5C,YAAY,OAAuC;QACjD,KAAK,CAAC,OAAO,CAAC,CAAC;QACf,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC,KAAK,CAAC,EAAE,MAAM,EAAE,IAAI,IAAI,CAAC,QAAQ,IAAI,EAAE,CAAC,CAAC;QAC9D,IAAI,CAAC,iBAAiB,GAAG,OAAO,CAAC,iBAAiB,CAAC;IACrD,CAAC;IAED;;;;;;;;;;;OAWG;IACH,KAAK,CAAC,SAAS;QACb,IAAI,IAAI,CAAC,iBAAiB,EAAE,CAAC;YAC3B,IAAI,CAAC;gBACH,MAAM,aAAa,CAAC,IAAI,CAAC,iBAAiB,CAAC,IAAI,CAAC,CAAC;YACnD,CAAC;YAAC,OAAO,CAAC,EAAE,CAAC;gBACX,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,yCAAyC,EAAE,CAAC,CAAC,CAAC;YACjE,CAAC;QACH,CAAC;IACH,CAAC;IAED,IAAI,QAAQ;QACV,OAAO,IAAI,CAAC,OAAO,CAAC,OAAO,CAAC,SAAS,CAAC;IACxC,CAAC;IAED,KAAK,CAAC,SAAS;QACb,IAAI,CAAC;YACH,MAAM,IAAI,CAAC,aAAa,EAAE,CAAC;QAC7B,CAAC;QAAC,OAAO,CAAC,EAAE,CAAC;YACX,kBAAkB;YAElB,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,CAAC;gBACpB,wBAAwB;gBAExB,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,mBAAmB,EAAE,CAAC,CAAC,CAAC;gBAC1C,IAAI,CAAC,CAAC,KAAK,IAAI,IAAI,EAAE,CAAC;oBACpB,WAAW;oBACX,sEAAsE;oBACtE,2KAA2K;oBAC3K,mCAAmC;oBACnC,4KAA4K;oBAC5K,0KAA0K;oBAC1K,yHAAyH;oBACzH,UAAU;oBACV,4CAA4C;oBAC5C,4FAA4F;oBAC5F,6FAA6F;oBAC7F,iGAAiG;oBACjG,oLAAoL;oBACpL,qLAAqL;oBACrL,sFAAsF;oBACtF,qLAAqL;oBACrL,0CAA0C;oBAC1C,0CAA0C;oBAC1C,IAAI;oBACJ,2EAA2E;oBAC3E,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,OAAO,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC;gBACtC,CAAC;gBACD,gDAAgD;gBAChD,SAAS,CAAC,QAAQ,CAAC,gBAAgB,CAAC,CAAC,EAAE;oBACrC,QAAQ,EAAE;wBACR,gBAAgB,EAAE,IAAI,CAAC,QAAQ;qBAChC;iBACF,CAAC,CAAC;gBACH,4BAA4B;gBAC5B,IAAI,CAAC,WAAW,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC;YAClC,CAAC;YAED,IAAI,CAAC,YAAY,2BAA2B,EAAE,CAAC;gBAC7C,mEAAmE;gBACnE,MAAM,IAAI,CAAC,OAAO,CAAC,OAAO,CAAC,OAAO,CAAC,kBAAkB,CAAC,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC;YAC/E,CAAC;YAED,4FAA4F;QAC9F,CAAC;gBAAS,CAAC;YACT,IAAI,CAAC,eAAe,CAAC,KAAK,EAAE,CAAC;QAC/B,CAAC;IACH,CAAC;IAED,KAAK,CAAC,aAAa;QACjB,+DAA+D;QAC/D,gEAAgE;QAChE,uCAAuC;QACvC,MAAM,iBAAiB,GAAG,IAAI,CAAC,iBAAiB,CAAC,MAAM,CAAC;YACtD,iDAAiD;YACjD,WAAW,EAAE,MAAM;YACnB,OAAO,EAAE,CAAC;YACV,eAAe,EAAE,kBAAkB,EAAE;SACtC,CAAC,CAAC;QACH,IAAI,CAAC,iBAAiB,GAAG,iBAAiB,CAAC;QAC3C,IAAI,CAAC;YACH,MAAM,IAAI,CAAC,WAAW,EAAE,gBAAgB,CAAC,EAAE,MAAM,EAAE,IAAI,CAAC,eAAe,CAAC,MAAM,EAAE,CAAC,CAAC;YAClF,IAAI,IAAI,CAAC,SAAS,EAAE,CAAC;gBACnB,OAAO;YACT,CAAC;YACD,MAAM,MAAM,GAAG,IAAI,SAAS,CAAC;gBAC3B,MAAM,EAAE,IAAI,CAAC,MAAM;gBACnB,YAAY,EAAE,IAAI,CAAC,eAAe,CAAC,MAAM;gBACzC,OAAO,EAAE,IAAI,CAAC,OAAO,CAAC,OAAO;gBAC7B,OAAO,EAAE,IAAI,CAAC,OAAO,CAAC,OAAO;gBAC7B,WAAW,EAAE,iBAAiB;aAC/B,CAAC,CAAC;YACH,IAAI,CAAC,UAAU,GAAG,MAAM,CAAC;YACzB,MAAM,MAAM,CAAC,SAAS,EAAE,CAAC;QAC3B,CAAC;gBAAS,CAAC;YACT,IAAI,CAAC,iBAAiB,GAAG,IAAI,CAAC;YAC9B,MAAM,iBAAiB,CAAC,GAAG,EAAE,CAAC;QAChC,CAAC;IACH,CAAC;IAED,KAAK,CAAC,uBAAuB;QAC3B,OAAO,IAAI,CAAC,UAAU,EAAE,uBAAuB,EAAE,CAAC;IACpD,CAAC;CACF"}
|
package/package.json
CHANGED
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
"publishConfig": {
|
|
6
6
|
"access": "public"
|
|
7
7
|
},
|
|
8
|
-
"version": "0.16.
|
|
8
|
+
"version": "0.16.11",
|
|
9
9
|
"main": "dist/index.js",
|
|
10
10
|
"license": "FSL-1.1-ALv2",
|
|
11
11
|
"type": "module",
|
|
@@ -30,7 +30,7 @@
|
|
|
30
30
|
"uuid": "^11.1.0",
|
|
31
31
|
"@powersync/lib-service-postgres": "0.4.13",
|
|
32
32
|
"@powersync/lib-services-framework": "0.7.9",
|
|
33
|
-
"@powersync/service-core": "1.16.
|
|
33
|
+
"@powersync/service-core": "1.16.2",
|
|
34
34
|
"@powersync/service-jpgwire": "0.21.5",
|
|
35
35
|
"@powersync/service-jsonbig": "0.17.12",
|
|
36
36
|
"@powersync/service-sync-rules": "0.29.6",
|
|
@@ -38,10 +38,10 @@
|
|
|
38
38
|
},
|
|
39
39
|
"devDependencies": {
|
|
40
40
|
"@types/semver": "^7.5.4",
|
|
41
|
-
"@powersync/service-core-tests": "0.12.
|
|
42
|
-
"@powersync/service-module-mongodb-storage": "0.12.
|
|
41
|
+
"@powersync/service-core-tests": "0.12.11",
|
|
42
|
+
"@powersync/service-module-mongodb-storage": "0.12.11",
|
|
43
43
|
"@powersync/lib-service-postgres": "0.4.13",
|
|
44
|
-
"@powersync/service-module-postgres-storage": "0.10.
|
|
44
|
+
"@powersync/service-module-postgres-storage": "0.10.11"
|
|
45
45
|
},
|
|
46
46
|
"scripts": {
|
|
47
47
|
"build": "tsc -b",
|
|
@@ -5,7 +5,7 @@ import { logger } from '@powersync/lib-services-framework';
|
|
|
5
5
|
import { CustomTypeRegistry } from '../types/registry.js';
|
|
6
6
|
|
|
7
7
|
export class ConnectionManagerFactory {
|
|
8
|
-
private readonly connectionManagers
|
|
8
|
+
private readonly connectionManagers = new Set<PgManager>();
|
|
9
9
|
public readonly dbConnectionConfig: NormalizedPostgresConnectionConfig;
|
|
10
10
|
|
|
11
11
|
constructor(
|
|
@@ -13,18 +13,23 @@ export class ConnectionManagerFactory {
|
|
|
13
13
|
private readonly registry: CustomTypeRegistry
|
|
14
14
|
) {
|
|
15
15
|
this.dbConnectionConfig = dbConnectionConfig;
|
|
16
|
-
this.connectionManagers = [];
|
|
17
16
|
}
|
|
18
17
|
|
|
19
18
|
create(poolOptions: PgPoolOptions) {
|
|
20
19
|
const manager = new PgManager(this.dbConnectionConfig, { ...poolOptions, registry: this.registry });
|
|
21
|
-
this.connectionManagers.
|
|
20
|
+
this.connectionManagers.add(manager);
|
|
21
|
+
|
|
22
|
+
manager.registerListener({
|
|
23
|
+
onEnded: () => {
|
|
24
|
+
this.connectionManagers.delete(manager);
|
|
25
|
+
}
|
|
26
|
+
});
|
|
22
27
|
return manager;
|
|
23
28
|
}
|
|
24
29
|
|
|
25
30
|
async shutdown() {
|
|
26
31
|
logger.info('Shutting down Postgres connection Managers...');
|
|
27
|
-
for (const manager of this.connectionManagers) {
|
|
32
|
+
for (const manager of [...this.connectionManagers]) {
|
|
28
33
|
await manager.end();
|
|
29
34
|
}
|
|
30
35
|
logger.info('Postgres connection Managers shutdown completed.');
|
|
@@ -5,6 +5,7 @@ import { getApplicationName } from '../utils/application-name.js';
|
|
|
5
5
|
import { PostgresTypeResolver } from '../types/resolver.js';
|
|
6
6
|
import { getServerVersion } from '../utils/postgres_version.js';
|
|
7
7
|
import { CustomTypeRegistry } from '../types/registry.js';
|
|
8
|
+
import { BaseObserver } from '@powersync/lib-services-framework';
|
|
8
9
|
|
|
9
10
|
export interface PgManagerOptions extends pgwire.PgPoolOptions {
|
|
10
11
|
registry: CustomTypeRegistry;
|
|
@@ -15,7 +16,11 @@ export interface PgManagerOptions extends pgwire.PgPoolOptions {
|
|
|
15
16
|
*/
|
|
16
17
|
const SNAPSHOT_SOCKET_TIMEOUT = 30_000;
|
|
17
18
|
|
|
18
|
-
export
|
|
19
|
+
export interface PgManagerListener {
|
|
20
|
+
onEnded(): void;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
export class PgManager extends BaseObserver<PgManagerListener> {
|
|
19
24
|
/**
|
|
20
25
|
* Do not use this for any transactions.
|
|
21
26
|
*/
|
|
@@ -29,6 +34,7 @@ export class PgManager {
|
|
|
29
34
|
public options: NormalizedPostgresConnectionConfig,
|
|
30
35
|
public poolOptions: PgManagerOptions
|
|
31
36
|
) {
|
|
37
|
+
super();
|
|
32
38
|
// The pool is lazy - no connections are opened until a query is performed.
|
|
33
39
|
this.pool = pgwire.connectPgWirePool(this.options, poolOptions);
|
|
34
40
|
this.types = new PostgresTypeResolver(poolOptions.registry, this.pool);
|
|
@@ -83,8 +89,9 @@ export class PgManager {
|
|
|
83
89
|
for (let result of await Promise.allSettled([
|
|
84
90
|
this.pool.end(),
|
|
85
91
|
...this.connectionPromises.map(async (promise) => {
|
|
86
|
-
|
|
87
|
-
|
|
92
|
+
// Wait for connection attempts to finish, but do not throw connection errors here
|
|
93
|
+
const connection = await promise.catch((_) => {});
|
|
94
|
+
return await connection?.end();
|
|
88
95
|
})
|
|
89
96
|
])) {
|
|
90
97
|
// Throw the first error, if any
|
|
@@ -92,14 +99,18 @@ export class PgManager {
|
|
|
92
99
|
throw result.reason;
|
|
93
100
|
}
|
|
94
101
|
}
|
|
102
|
+
this.iterateListeners((listener) => {
|
|
103
|
+
listener.onEnded?.();
|
|
104
|
+
});
|
|
95
105
|
}
|
|
96
106
|
|
|
97
107
|
async destroy() {
|
|
98
108
|
this.pool.destroy();
|
|
99
109
|
for (let result of await Promise.allSettled([
|
|
100
110
|
...this.connectionPromises.map(async (promise) => {
|
|
101
|
-
|
|
102
|
-
|
|
111
|
+
// Wait for connection attempts to finish, but do not throw connection errors here
|
|
112
|
+
const connection = await promise.catch((_) => {});
|
|
113
|
+
return connection?.destroy();
|
|
103
114
|
})
|
|
104
115
|
])) {
|
|
105
116
|
// Throw the first error, if any
|
|
@@ -107,5 +118,8 @@ export class PgManager {
|
|
|
107
118
|
throw result.reason;
|
|
108
119
|
}
|
|
109
120
|
}
|
|
121
|
+
this.iterateListeners((listener) => {
|
|
122
|
+
listener.onEnded?.();
|
|
123
|
+
});
|
|
110
124
|
}
|
|
111
125
|
}
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import { setTimeout } from 'timers/promises';
|
|
2
2
|
import { ErrorRateLimiter } from '@powersync/service-core';
|
|
3
|
+
import { MissingReplicationSlotError } from './WalStream.js';
|
|
3
4
|
|
|
4
5
|
export class PostgresErrorRateLimiter implements ErrorRateLimiter {
|
|
5
6
|
nextAllowed: number = Date.now();
|
|
@@ -17,7 +18,10 @@ export class PostgresErrorRateLimiter implements ErrorRateLimiter {
|
|
|
17
18
|
|
|
18
19
|
reportError(e: any): void {
|
|
19
20
|
const message = (e.message as string) ?? '';
|
|
20
|
-
if (
|
|
21
|
+
if (e instanceof MissingReplicationSlotError) {
|
|
22
|
+
// Short delay for a retrying (re-creating the slot)
|
|
23
|
+
this.setDelay(2_000);
|
|
24
|
+
} else if (message.includes('password authentication failed')) {
|
|
21
25
|
// Wait 15 minutes, to avoid triggering Supabase's fail2ban
|
|
22
26
|
this.setDelay(900_000);
|
|
23
27
|
} else if (message.includes('ENOTFOUND')) {
|
|
@@ -4,7 +4,6 @@ import {
|
|
|
4
4
|
DatabaseConnectionError,
|
|
5
5
|
logger as defaultLogger,
|
|
6
6
|
ErrorCode,
|
|
7
|
-
errors,
|
|
8
7
|
Logger,
|
|
9
8
|
ReplicationAbortedError,
|
|
10
9
|
ReplicationAssertionError
|
|
@@ -100,8 +99,10 @@ export const sendKeepAlive = async (db: pgwire.PgClient) => {
|
|
|
100
99
|
};
|
|
101
100
|
|
|
102
101
|
export class MissingReplicationSlotError extends Error {
|
|
103
|
-
constructor(message: string) {
|
|
102
|
+
constructor(message: string, cause?: any) {
|
|
104
103
|
super(message);
|
|
104
|
+
|
|
105
|
+
this.cause = cause;
|
|
105
106
|
}
|
|
106
107
|
}
|
|
107
108
|
|
|
@@ -304,135 +305,54 @@ export class WalStream {
|
|
|
304
305
|
})
|
|
305
306
|
)[0];
|
|
306
307
|
|
|
308
|
+
// Previously we also used pg_catalog.pg_logical_slot_peek_binary_changes to confirm that we can query the slot.
|
|
309
|
+
// However, there were some edge cases where the query times out, repeating the query, ultimately
|
|
310
|
+
// causing high load on the source database and never recovering automatically.
|
|
311
|
+
// We now instead jump straight to replication if the wal_status is not "lost", rather detecting those
|
|
312
|
+
// errors during streaming replication, which is a little more robust.
|
|
313
|
+
|
|
314
|
+
// We can have:
|
|
315
|
+
// 1. needsInitialSync: true, lost slot -> MissingReplicationSlotError (starts new sync rules version).
|
|
316
|
+
// Theoretically we could handle this the same as (2).
|
|
317
|
+
// 2. needsInitialSync: true, no slot -> create new slot
|
|
318
|
+
// 3. needsInitialSync: true, valid slot -> resume initial sync
|
|
319
|
+
// 4. needsInitialSync: false, lost slot -> MissingReplicationSlotError (starts new sync rules version)
|
|
320
|
+
// 5. needsInitialSync: false, no slot -> MissingReplicationSlotError (starts new sync rules version)
|
|
321
|
+
// 6. needsInitialSync: false, valid slot -> resume streaming replication
|
|
322
|
+
// The main advantage of MissingReplicationSlotError are:
|
|
323
|
+
// 1. If there was a complete snapshot already (cases 4/5), users can still sync from that snapshot while
|
|
324
|
+
// we do the reprocessing under a new slot name.
|
|
325
|
+
// 2. If there was a partial snapshot (case 1), we can start with the new slot faster by not waiting for
|
|
326
|
+
// the partial data to be cleared.
|
|
307
327
|
if (slot != null) {
|
|
308
328
|
// This checks that the slot is still valid
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
329
|
+
|
|
330
|
+
// wal_status is present in postgres 13+
|
|
331
|
+
// invalidation_reason is present in postgres 17+
|
|
332
|
+
const lost = slot.wal_status == 'lost';
|
|
333
|
+
if (lost) {
|
|
334
|
+
// Case 1 / 4
|
|
335
|
+
throw new MissingReplicationSlotError(
|
|
336
|
+
`Replication slot ${slotName} is not valid anymore. invalidation_reason: ${slot.invalidation_reason ?? 'unknown'}`
|
|
337
|
+
);
|
|
313
338
|
}
|
|
314
|
-
//
|
|
315
|
-
// needsInitialSync: true, needsNewSlot: true -> initial sync from scratch
|
|
316
|
-
// needsInitialSync: true, needsNewSlot: false -> resume initial sync
|
|
317
|
-
// needsInitialSync: false, needsNewSlot: true -> handled above
|
|
318
|
-
// needsInitialSync: false, needsNewSlot: false -> resume streaming replication
|
|
339
|
+
// Case 3 / 6
|
|
319
340
|
return {
|
|
320
341
|
needsInitialSync: !snapshotDone,
|
|
321
|
-
needsNewSlot:
|
|
342
|
+
needsNewSlot: false
|
|
322
343
|
};
|
|
323
344
|
} else {
|
|
324
345
|
if (snapshotDone) {
|
|
346
|
+
// Case 5
|
|
325
347
|
// This will create a new slot, while keeping the current sync rules active
|
|
326
348
|
throw new MissingReplicationSlotError(`Replication slot ${slotName} is missing`);
|
|
327
349
|
}
|
|
328
|
-
//
|
|
350
|
+
// Case 2
|
|
351
|
+
// This will clear data (if any) and re-create the same slot
|
|
329
352
|
return { needsInitialSync: true, needsNewSlot: true };
|
|
330
353
|
}
|
|
331
354
|
}
|
|
332
355
|
|
|
333
|
-
/**
|
|
334
|
-
* If a replication slot exists, check that it is healthy.
|
|
335
|
-
*/
|
|
336
|
-
private async checkReplicationSlot(slot: {
|
|
337
|
-
// postgres 13+
|
|
338
|
-
wal_status?: string;
|
|
339
|
-
// postgres 17+
|
|
340
|
-
invalidation_reason?: string | null;
|
|
341
|
-
}): Promise<{ needsNewSlot: boolean }> {
|
|
342
|
-
// Start with a placeholder error, should be replaced if there is an actual issue.
|
|
343
|
-
let last_error = new ReplicationAssertionError(`Slot health check failed to execute`);
|
|
344
|
-
|
|
345
|
-
const slotName = this.slot_name;
|
|
346
|
-
|
|
347
|
-
const lost = slot.wal_status == 'lost';
|
|
348
|
-
if (lost) {
|
|
349
|
-
this.logger.warn(
|
|
350
|
-
`Replication slot ${slotName} is invalidated. invalidation_reason: ${slot.invalidation_reason ?? 'unknown'}`
|
|
351
|
-
);
|
|
352
|
-
return {
|
|
353
|
-
needsNewSlot: true
|
|
354
|
-
};
|
|
355
|
-
}
|
|
356
|
-
|
|
357
|
-
// Check that replication slot exists, trying for up to 2 minutes.
|
|
358
|
-
const startAt = performance.now();
|
|
359
|
-
while (performance.now() - startAt < 120_000) {
|
|
360
|
-
this.touch();
|
|
361
|
-
|
|
362
|
-
try {
|
|
363
|
-
// We peek a large number of changes here, to make it more likely to pick up replication slot errors.
|
|
364
|
-
// For example, "publication does not exist" only occurs here if the peek actually includes changes related
|
|
365
|
-
// to the slot.
|
|
366
|
-
this.logger.info(`Checking ${slotName}`);
|
|
367
|
-
|
|
368
|
-
// The actual results can be quite large, so we don't actually return everything
|
|
369
|
-
// due to memory and processing overhead that would create.
|
|
370
|
-
const cursor = await this.connections.pool.stream({
|
|
371
|
-
statement: `SELECT 1 FROM pg_catalog.pg_logical_slot_peek_binary_changes($1, NULL, 1000, 'proto_version', '1', 'publication_names', $2)`,
|
|
372
|
-
params: [
|
|
373
|
-
{ type: 'varchar', value: slotName },
|
|
374
|
-
{ type: 'varchar', value: PUBLICATION_NAME }
|
|
375
|
-
]
|
|
376
|
-
});
|
|
377
|
-
|
|
378
|
-
for await (let _chunk of cursor) {
|
|
379
|
-
// No-op, just exhaust the cursor
|
|
380
|
-
}
|
|
381
|
-
|
|
382
|
-
// Success
|
|
383
|
-
this.logger.info(`Slot ${slotName} appears healthy`);
|
|
384
|
-
return { needsNewSlot: false };
|
|
385
|
-
} catch (e) {
|
|
386
|
-
last_error = e;
|
|
387
|
-
this.logger.warn(`Replication slot error`, e);
|
|
388
|
-
|
|
389
|
-
if (this.stopped) {
|
|
390
|
-
throw e;
|
|
391
|
-
}
|
|
392
|
-
|
|
393
|
-
if (
|
|
394
|
-
/incorrect prev-link/.test(e.message) ||
|
|
395
|
-
/replication slot.*does not exist/.test(e.message) ||
|
|
396
|
-
/publication.*does not exist/.test(e.message) ||
|
|
397
|
-
// Postgres 18 - exceeded max_slot_wal_keep_size
|
|
398
|
-
/can no longer access replication slot/.test(e.message) ||
|
|
399
|
-
// Postgres 17 - exceeded max_slot_wal_keep_size
|
|
400
|
-
/can no longer get changes from replication slot/.test(e.message)
|
|
401
|
-
) {
|
|
402
|
-
// Fatal error. In most cases since Postgres 13+, the `wal_status == 'lost'` check should pick this up, but this
|
|
403
|
-
// works as a fallback.
|
|
404
|
-
|
|
405
|
-
container.reporter.captureException(e, {
|
|
406
|
-
level: errors.ErrorSeverity.WARNING,
|
|
407
|
-
metadata: {
|
|
408
|
-
replication_slot: slotName
|
|
409
|
-
}
|
|
410
|
-
});
|
|
411
|
-
// Sample: record with incorrect prev-link 10000/10000 at 0/18AB778
|
|
412
|
-
// Seen during development. Some internal error, fixed by re-creating slot.
|
|
413
|
-
//
|
|
414
|
-
// Sample: publication "powersync" does not exist
|
|
415
|
-
// Happens when publication deleted or never created.
|
|
416
|
-
// Slot must be re-created in this case.
|
|
417
|
-
this.logger.info(`${slotName} is not valid anymore`);
|
|
418
|
-
|
|
419
|
-
return { needsNewSlot: true };
|
|
420
|
-
}
|
|
421
|
-
// Try again after a pause
|
|
422
|
-
await new Promise((resolve) => setTimeout(resolve, 1000));
|
|
423
|
-
}
|
|
424
|
-
}
|
|
425
|
-
|
|
426
|
-
container.reporter.captureException(last_error, {
|
|
427
|
-
level: errors.ErrorSeverity.ERROR,
|
|
428
|
-
metadata: {
|
|
429
|
-
replication_slot: slotName
|
|
430
|
-
}
|
|
431
|
-
});
|
|
432
|
-
|
|
433
|
-
throw last_error;
|
|
434
|
-
}
|
|
435
|
-
|
|
436
356
|
async estimatedCountNumber(db: pgwire.PgConnection, table: storage.SourceTable): Promise<number> {
|
|
437
357
|
const results = await db.query({
|
|
438
358
|
statement: `SELECT reltuples::bigint AS estimate
|
|
@@ -915,6 +835,17 @@ WHERE oid = $1::regclass`,
|
|
|
915
835
|
}
|
|
916
836
|
|
|
917
837
|
async streamChanges(replicationConnection: pgwire.PgConnection) {
|
|
838
|
+
try {
|
|
839
|
+
await this.streamChangesInternal(replicationConnection);
|
|
840
|
+
} catch (e) {
|
|
841
|
+
if (isReplicationSlotInvalidError(e)) {
|
|
842
|
+
throw new MissingReplicationSlotError(e.message, e);
|
|
843
|
+
}
|
|
844
|
+
throw e;
|
|
845
|
+
}
|
|
846
|
+
}
|
|
847
|
+
|
|
848
|
+
private async streamChangesInternal(replicationConnection: pgwire.PgConnection) {
|
|
918
849
|
// When changing any logic here, check /docs/wal-lsns.md.
|
|
919
850
|
const { createEmptyCheckpoints } = await this.ensureStorageCompatibility();
|
|
920
851
|
|
|
@@ -1179,3 +1110,27 @@ WHERE oid = $1::regclass`,
|
|
|
1179
1110
|
});
|
|
1180
1111
|
}
|
|
1181
1112
|
}
|
|
1113
|
+
|
|
1114
|
+
function isReplicationSlotInvalidError(e: any) {
|
|
1115
|
+
// We could access the error code from pgwire using this:
|
|
1116
|
+
// e[Symbol.for('pg.ErrorCode')]
|
|
1117
|
+
// However, we typically get a generic code such as 42704 (undefined_object), which does not
|
|
1118
|
+
// help much. So we check the actual error message.
|
|
1119
|
+
const message = e.message ?? '';
|
|
1120
|
+
|
|
1121
|
+
// Sample: record with incorrect prev-link 10000/10000 at 0/18AB778
|
|
1122
|
+
// Seen during development. Some internal error, fixed by re-creating slot.
|
|
1123
|
+
//
|
|
1124
|
+
// Sample: publication "powersync" does not exist
|
|
1125
|
+
// Happens when publication deleted or never created.
|
|
1126
|
+
// Slot must be re-created in this case.
|
|
1127
|
+
return (
|
|
1128
|
+
/incorrect prev-link/.test(message) ||
|
|
1129
|
+
/replication slot.*does not exist/.test(message) ||
|
|
1130
|
+
/publication.*does not exist/.test(message) ||
|
|
1131
|
+
// Postgres 18 - exceeded max_slot_wal_keep_size
|
|
1132
|
+
/can no longer access replication slot/.test(message) ||
|
|
1133
|
+
// Postgres 17 - exceeded max_slot_wal_keep_size
|
|
1134
|
+
/can no longer get changes from replication slot/.test(message)
|
|
1135
|
+
);
|
|
1136
|
+
}
|