@powersync/service-module-postgres 0.13.0 → 0.14.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +35 -0
- package/dist/api/PostgresRouteAPIAdapter.d.ts +1 -1
- package/dist/api/PostgresRouteAPIAdapter.js +5 -1
- package/dist/api/PostgresRouteAPIAdapter.js.map +1 -1
- package/dist/replication/SnapshotQuery.d.ts +78 -0
- package/dist/replication/SnapshotQuery.js +175 -0
- package/dist/replication/SnapshotQuery.js.map +1 -0
- package/dist/replication/WalStream.d.ts +37 -4
- package/dist/replication/WalStream.js +318 -91
- package/dist/replication/WalStream.js.map +1 -1
- package/dist/replication/WalStreamReplicationJob.d.ts +2 -0
- package/dist/replication/WalStreamReplicationJob.js +14 -3
- package/dist/replication/WalStreamReplicationJob.js.map +1 -1
- package/dist/replication/WalStreamReplicator.d.ts +1 -0
- package/dist/replication/WalStreamReplicator.js +22 -0
- package/dist/replication/WalStreamReplicator.js.map +1 -1
- package/dist/replication/replication-utils.d.ts +4 -0
- package/dist/replication/replication-utils.js +46 -2
- package/dist/replication/replication-utils.js.map +1 -1
- package/package.json +11 -10
- package/src/api/PostgresRouteAPIAdapter.ts +5 -1
- package/src/replication/SnapshotQuery.ts +209 -0
- package/src/replication/WalStream.ts +373 -98
- package/src/replication/WalStreamReplicationJob.ts +15 -3
- package/src/replication/WalStreamReplicator.ts +26 -0
- package/src/replication/replication-utils.ts +60 -2
- package/test/src/__snapshots__/schema_changes.test.ts.snap +2 -2
- package/test/src/checkpoints.test.ts +17 -7
- package/test/src/chunked_snapshots.test.ts +156 -0
- package/test/src/large_batch.test.ts +5 -154
- package/test/src/resuming_snapshots.test.ts +150 -0
- package/test/src/schema_changes.test.ts +5 -10
- package/test/src/slow_tests.test.ts +13 -30
- package/test/src/util.ts +12 -1
- package/test/src/validation.test.ts +0 -1
- package/test/src/wal_stream.test.ts +4 -9
- package/test/src/wal_stream_utils.ts +15 -7
- package/tsconfig.tsbuildinfo +1 -1
|
@@ -1,12 +1,13 @@
|
|
|
1
1
|
import * as lib_postgres from '@powersync/lib-service-postgres';
|
|
2
|
-
import { container, DatabaseConnectionError, ErrorCode, errors, logger
|
|
3
|
-
import { getUuidReplicaIdentityBson, storage } from '@powersync/service-core';
|
|
2
|
+
import { container, DatabaseConnectionError, ErrorCode, errors, logger as defaultLogger, ReplicationAssertionError, ReplicationAbortedError } from '@powersync/lib-services-framework';
|
|
3
|
+
import { getUuidReplicaIdentityBson, RelationCache, storage } from '@powersync/service-core';
|
|
4
4
|
import * as pgwire from '@powersync/service-jpgwire';
|
|
5
5
|
import { toSyncRulesRow } from '@powersync/service-sync-rules';
|
|
6
6
|
import * as pg_utils from '../utils/pgwire_utils.js';
|
|
7
7
|
import { getPgOutputRelation, getRelId } from './PgRelation.js';
|
|
8
|
-
import { checkSourceConfiguration, getReplicationIdentityColumns } from './replication-utils.js';
|
|
8
|
+
import { checkSourceConfiguration, checkTableRls, getReplicationIdentityColumns } from './replication-utils.js';
|
|
9
9
|
import { ReplicationMetric } from '@powersync/service-types';
|
|
10
|
+
import { ChunkedSnapshotQuery, IdSnapshotQuery, SimpleSnapshotQuery } from './SnapshotQuery.js';
|
|
10
11
|
export const ZERO_LSN = '00000000/00000000';
|
|
11
12
|
export const PUBLICATION_NAME = 'powersync';
|
|
12
13
|
export const POSTGRES_DEFAULT_SCHEMA = 'public';
|
|
@@ -39,20 +40,39 @@ export class WalStream {
|
|
|
39
40
|
sync_rules;
|
|
40
41
|
group_id;
|
|
41
42
|
connection_id = 1;
|
|
43
|
+
logger;
|
|
42
44
|
storage;
|
|
43
45
|
metrics;
|
|
44
46
|
slot_name;
|
|
45
47
|
connections;
|
|
46
48
|
abort_signal;
|
|
47
|
-
|
|
49
|
+
relationCache = new RelationCache((relation) => {
|
|
50
|
+
if (typeof relation == 'number') {
|
|
51
|
+
return relation;
|
|
52
|
+
}
|
|
53
|
+
return relation.objectId;
|
|
54
|
+
});
|
|
48
55
|
startedStreaming = false;
|
|
56
|
+
snapshotChunkLength;
|
|
57
|
+
/**
|
|
58
|
+
* Time of the oldest uncommitted change, according to the source db.
|
|
59
|
+
* This is used to determine the replication lag.
|
|
60
|
+
*/
|
|
61
|
+
oldestUncommittedChange = null;
|
|
62
|
+
/**
|
|
63
|
+
* Keep track of whether we have done a commit or keepalive yet.
|
|
64
|
+
* We can only compute replication lag if isStartingReplication == false, or oldestUncommittedChange is present.
|
|
65
|
+
*/
|
|
66
|
+
isStartingReplication = true;
|
|
49
67
|
constructor(options) {
|
|
68
|
+
this.logger = options.logger ?? defaultLogger;
|
|
50
69
|
this.storage = options.storage;
|
|
51
70
|
this.metrics = options.metrics;
|
|
52
71
|
this.sync_rules = options.storage.getParsedSyncRules({ defaultSchema: POSTGRES_DEFAULT_SCHEMA });
|
|
53
72
|
this.group_id = options.storage.group_id;
|
|
54
73
|
this.slot_name = options.storage.slot_name;
|
|
55
74
|
this.connections = options.connections;
|
|
75
|
+
this.snapshotChunkLength = options.snapshotChunkLength ?? 10_000;
|
|
56
76
|
this.abort_signal = options.abort_signal;
|
|
57
77
|
this.abort_signal.addEventListener('abort', () => {
|
|
58
78
|
if (this.startedStreaming) {
|
|
@@ -62,7 +82,7 @@ export class WalStream {
|
|
|
62
82
|
const promise = sendKeepAlive(this.connections.pool);
|
|
63
83
|
promise.catch((e) => {
|
|
64
84
|
// Failures here are okay - this only speeds up stopping the process.
|
|
65
|
-
logger.warn('Failed to ping connection', e);
|
|
85
|
+
this.logger.warn('Failed to ping connection', e);
|
|
66
86
|
});
|
|
67
87
|
}
|
|
68
88
|
else {
|
|
@@ -130,9 +150,20 @@ export class WalStream {
|
|
|
130
150
|
]
|
|
131
151
|
});
|
|
132
152
|
if (rs.rows.length == 0) {
|
|
133
|
-
logger.info(`Skipping ${tablePattern.schema}.${name} - not part of ${PUBLICATION_NAME} publication`);
|
|
153
|
+
this.logger.info(`Skipping ${tablePattern.schema}.${name} - not part of ${PUBLICATION_NAME} publication`);
|
|
134
154
|
continue;
|
|
135
155
|
}
|
|
156
|
+
try {
|
|
157
|
+
const result = await checkTableRls(db, relid);
|
|
158
|
+
if (!result.canRead) {
|
|
159
|
+
// We log the message, then continue anyway, since the check does not cover all cases.
|
|
160
|
+
this.logger.warn(result.message);
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
catch (e) {
|
|
164
|
+
// It's possible that we just don't have permission to access pg_roles - log the error and continue.
|
|
165
|
+
this.logger.warn(`Could not check RLS access for ${tablePattern.schema}.${name}`, e);
|
|
166
|
+
}
|
|
136
167
|
const cresult = await getReplicationIdentityColumns(db, relid);
|
|
137
168
|
const table = await this.handleRelation(batch, {
|
|
138
169
|
name,
|
|
@@ -152,7 +183,7 @@ export class WalStream {
|
|
|
152
183
|
const snapshotDone = status.snapshot_done && status.checkpoint_lsn != null;
|
|
153
184
|
if (snapshotDone) {
|
|
154
185
|
// Snapshot is done, but we still need to check the replication slot status
|
|
155
|
-
logger.info(
|
|
186
|
+
this.logger.info(`Initial replication already done`);
|
|
156
187
|
}
|
|
157
188
|
// Check if replication slot exists
|
|
158
189
|
const rs = await this.connections.pool.query({
|
|
@@ -208,7 +239,7 @@ export class WalStream {
|
|
|
208
239
|
// We peek a large number of changes here, to make it more likely to pick up replication slot errors.
|
|
209
240
|
// For example, "publication does not exist" only occurs here if the peek actually includes changes related
|
|
210
241
|
// to the slot.
|
|
211
|
-
logger.info(`Checking ${slotName}`);
|
|
242
|
+
this.logger.info(`Checking ${slotName}`);
|
|
212
243
|
// The actual results can be quite large, so we don't actually return everything
|
|
213
244
|
// due to memory and processing overhead that would create.
|
|
214
245
|
const cursor = await this.connections.pool.stream({
|
|
@@ -222,12 +253,12 @@ export class WalStream {
|
|
|
222
253
|
// No-op, just exhaust the cursor
|
|
223
254
|
}
|
|
224
255
|
// Success
|
|
225
|
-
logger.info(`Slot ${slotName} appears healthy`);
|
|
256
|
+
this.logger.info(`Slot ${slotName} appears healthy`);
|
|
226
257
|
return { needsNewSlot: false };
|
|
227
258
|
}
|
|
228
259
|
catch (e) {
|
|
229
260
|
last_error = e;
|
|
230
|
-
logger.warn(
|
|
261
|
+
this.logger.warn(`Replication slot error`, e);
|
|
231
262
|
if (this.stopped) {
|
|
232
263
|
throw e;
|
|
233
264
|
}
|
|
@@ -249,7 +280,7 @@ export class WalStream {
|
|
|
249
280
|
// Sample: publication "powersync" does not exist
|
|
250
281
|
// Happens when publication deleted or never created.
|
|
251
282
|
// Slot must be re-created in this case.
|
|
252
|
-
logger.info(`${slotName} is not valid anymore`);
|
|
283
|
+
this.logger.info(`${slotName} is not valid anymore`);
|
|
253
284
|
return { needsNewSlot: true };
|
|
254
285
|
}
|
|
255
286
|
// Try again after a pause
|
|
@@ -258,7 +289,7 @@ export class WalStream {
|
|
|
258
289
|
}
|
|
259
290
|
throw new ReplicationAssertionError('Unreachable');
|
|
260
291
|
}
|
|
261
|
-
async
|
|
292
|
+
async estimatedCountNumber(db, table) {
|
|
262
293
|
const results = await db.query({
|
|
263
294
|
statement: `SELECT reltuples::bigint AS estimate
|
|
264
295
|
FROM pg_class
|
|
@@ -267,10 +298,10 @@ WHERE oid = $1::regclass`,
|
|
|
267
298
|
});
|
|
268
299
|
const row = results.rows[0];
|
|
269
300
|
if ((row?.[0] ?? -1n) == -1n) {
|
|
270
|
-
return
|
|
301
|
+
return -1;
|
|
271
302
|
}
|
|
272
303
|
else {
|
|
273
|
-
return
|
|
304
|
+
return Number(row[0]);
|
|
274
305
|
}
|
|
275
306
|
}
|
|
276
307
|
/**
|
|
@@ -290,7 +321,7 @@ WHERE oid = $1::regclass`,
|
|
|
290
321
|
// In those cases, we have to start replication from scratch.
|
|
291
322
|
// If there is an existing healthy slot, we can skip this and continue
|
|
292
323
|
// initial replication where we left off.
|
|
293
|
-
await this.storage.clear();
|
|
324
|
+
await this.storage.clear({ signal: this.abort_signal });
|
|
294
325
|
await db.query({
|
|
295
326
|
statement: 'SELECT pg_drop_replication_slot(slot_name) FROM pg_replication_slots WHERE slot_name = $1',
|
|
296
327
|
params: [{ type: 'varchar', value: slotName }]
|
|
@@ -298,27 +329,39 @@ WHERE oid = $1::regclass`,
|
|
|
298
329
|
// We use the replication connection here, not a pool.
|
|
299
330
|
// The replication slot must be created before we start snapshotting tables.
|
|
300
331
|
await replicationConnection.query(`CREATE_REPLICATION_SLOT ${slotName} LOGICAL pgoutput`);
|
|
301
|
-
logger.info(`Created replication slot ${slotName}`);
|
|
332
|
+
this.logger.info(`Created replication slot ${slotName}`);
|
|
302
333
|
}
|
|
303
334
|
await this.initialReplication(db);
|
|
304
335
|
}
|
|
305
336
|
async initialReplication(db) {
|
|
306
337
|
const sourceTables = this.sync_rules.getSourceTables();
|
|
307
|
-
await this.storage.startBatch({
|
|
338
|
+
await this.storage.startBatch({
|
|
339
|
+
logger: this.logger,
|
|
340
|
+
zeroLSN: ZERO_LSN,
|
|
341
|
+
defaultSchema: POSTGRES_DEFAULT_SCHEMA,
|
|
342
|
+
storeCurrentData: true,
|
|
343
|
+
skipExistingRows: true
|
|
344
|
+
}, async (batch) => {
|
|
345
|
+
let tablesWithStatus = [];
|
|
308
346
|
for (let tablePattern of sourceTables) {
|
|
309
347
|
const tables = await this.getQualifiedTableNames(batch, db, tablePattern);
|
|
348
|
+
// Pre-get counts
|
|
310
349
|
for (let table of tables) {
|
|
311
350
|
if (table.snapshotComplete) {
|
|
312
|
-
logger.info(
|
|
351
|
+
this.logger.info(`Skipping ${table.qualifiedName} - snapshot already done`);
|
|
313
352
|
continue;
|
|
314
353
|
}
|
|
315
|
-
await this.
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
354
|
+
const count = await this.estimatedCountNumber(db, table);
|
|
355
|
+
table = await batch.updateTableProgress(table, { totalEstimatedCount: count });
|
|
356
|
+
this.relationCache.update(table);
|
|
357
|
+
tablesWithStatus.push(table);
|
|
358
|
+
this.logger.info(`To replicate: ${table.qualifiedName} ${table.formatSnapshotProgress()}`);
|
|
320
359
|
}
|
|
321
360
|
}
|
|
361
|
+
for (let table of tablesWithStatus) {
|
|
362
|
+
await this.snapshotTableInTx(batch, db, table);
|
|
363
|
+
await touch();
|
|
364
|
+
}
|
|
322
365
|
// Always commit the initial snapshot at zero.
|
|
323
366
|
// This makes sure we don't skip any changes applied before starting this snapshot,
|
|
324
367
|
// in the case of snapshot retries.
|
|
@@ -340,53 +383,147 @@ WHERE oid = $1::regclass`,
|
|
|
340
383
|
yield toSyncRulesRow(row);
|
|
341
384
|
}
|
|
342
385
|
}
|
|
343
|
-
async
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
386
|
+
async snapshotTableInTx(batch, db, table, limited) {
|
|
387
|
+
// Note: We use the default "Read Committed" isolation level here, not snapshot isolation.
|
|
388
|
+
// The data may change during the transaction, but that is compensated for in the streaming
|
|
389
|
+
// replication afterwards.
|
|
390
|
+
await db.query('BEGIN');
|
|
391
|
+
try {
|
|
392
|
+
let tableLsnNotBefore;
|
|
393
|
+
await this.snapshotTable(batch, db, table, limited);
|
|
394
|
+
// Get the current LSN.
|
|
395
|
+
// The data will only be consistent once incremental replication has passed that point.
|
|
396
|
+
// We have to get this LSN _after_ we have finished the table snapshot.
|
|
397
|
+
//
|
|
398
|
+
// There are basically two relevant LSNs here:
|
|
399
|
+
// A: The LSN before the snapshot starts. We don't explicitly record this on the PowerSync side,
|
|
400
|
+
// but it is implicitly recorded in the replication slot.
|
|
401
|
+
// B: The LSN after the table snapshot is complete, which is what we get here.
|
|
402
|
+
// When we do the snapshot queries, the data that we get back for each chunk could match the state
|
|
403
|
+
// anywhere between A and B. To actually have a consistent state on our side, we need to:
|
|
404
|
+
// 1. Complete the snapshot.
|
|
405
|
+
// 2. Wait until logical replication has caught up with all the change between A and B.
|
|
406
|
+
// Calling `markSnapshotDone(LSN B)` covers that.
|
|
407
|
+
const rs = await db.query(`select pg_current_wal_lsn() as lsn`);
|
|
408
|
+
tableLsnNotBefore = rs.rows[0][0];
|
|
409
|
+
// Side note: A ROLLBACK would probably also be fine here, since we only read in this transaction.
|
|
410
|
+
await db.query('COMMIT');
|
|
411
|
+
const [resultTable] = await batch.markSnapshotDone([table], tableLsnNotBefore);
|
|
412
|
+
this.relationCache.update(resultTable);
|
|
413
|
+
return resultTable;
|
|
414
|
+
}
|
|
415
|
+
catch (e) {
|
|
416
|
+
await db.query('ROLLBACK');
|
|
417
|
+
throw e;
|
|
418
|
+
}
|
|
419
|
+
}
|
|
420
|
+
async snapshotTable(batch, db, table, limited) {
|
|
421
|
+
let totalEstimatedCount = table.snapshotStatus?.totalEstimatedCount;
|
|
422
|
+
let at = table.snapshotStatus?.replicatedCount ?? 0;
|
|
423
|
+
let lastCountTime = 0;
|
|
424
|
+
let q;
|
|
425
|
+
// We do streaming on two levels:
|
|
426
|
+
// 1. Coarse level: DELCARE CURSOR, FETCH 10000 at a time.
|
|
427
|
+
// 2. Fine level: Stream chunks from each fetch call.
|
|
428
|
+
if (limited) {
|
|
429
|
+
q = new IdSnapshotQuery(db, table, limited);
|
|
430
|
+
}
|
|
431
|
+
else if (ChunkedSnapshotQuery.supports(table)) {
|
|
432
|
+
// Single primary key - we can use the primary key for chunking
|
|
433
|
+
const orderByKey = table.replicaIdColumns[0];
|
|
434
|
+
q = new ChunkedSnapshotQuery(db, table, this.snapshotChunkLength, table.snapshotStatus?.lastKey ?? null);
|
|
435
|
+
if (table.snapshotStatus?.lastKey != null) {
|
|
436
|
+
this.logger.info(`Replicating ${table.qualifiedName} ${table.formatSnapshotProgress()} - resuming from ${orderByKey.name} > ${q.lastKey}`);
|
|
437
|
+
}
|
|
438
|
+
else {
|
|
439
|
+
this.logger.info(`Replicating ${table.qualifiedName} ${table.formatSnapshotProgress()} - resumable`);
|
|
440
|
+
}
|
|
441
|
+
}
|
|
442
|
+
else {
|
|
443
|
+
// Fallback case - query the entire table
|
|
444
|
+
this.logger.info(`Replicating ${table.qualifiedName} ${table.formatSnapshotProgress()} - not resumable`);
|
|
445
|
+
q = new SimpleSnapshotQuery(db, table, this.snapshotChunkLength);
|
|
446
|
+
at = 0;
|
|
447
|
+
}
|
|
448
|
+
await q.initialize();
|
|
349
449
|
let columns = [];
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
450
|
+
let hasRemainingData = true;
|
|
451
|
+
while (hasRemainingData) {
|
|
452
|
+
// Fetch 10k at a time.
|
|
453
|
+
// The balance here is between latency overhead per FETCH call,
|
|
454
|
+
// and not spending too much time on each FETCH call.
|
|
455
|
+
// We aim for a couple of seconds on each FETCH call.
|
|
456
|
+
const cursor = q.nextChunk();
|
|
457
|
+
hasRemainingData = false;
|
|
458
|
+
// pgwire streams rows in chunks.
|
|
459
|
+
// These chunks can be quite small (as little as 16KB), so we don't flush chunks automatically.
|
|
460
|
+
// There are typically 100-200 rows per chunk.
|
|
461
|
+
for await (let chunk of cursor) {
|
|
462
|
+
if (chunk.tag == 'RowDescription') {
|
|
463
|
+
// We get a RowDescription for each FETCH call, but they should
|
|
464
|
+
// all be the same.
|
|
465
|
+
let i = 0;
|
|
466
|
+
columns = chunk.payload.map((c) => {
|
|
467
|
+
return { i: i++, name: c.name };
|
|
468
|
+
});
|
|
469
|
+
continue;
|
|
470
|
+
}
|
|
471
|
+
const rows = chunk.rows.map((row) => {
|
|
472
|
+
let q = {};
|
|
473
|
+
for (let c of columns) {
|
|
474
|
+
q[c.name] = row[c.i];
|
|
475
|
+
}
|
|
476
|
+
return q;
|
|
357
477
|
});
|
|
358
|
-
|
|
478
|
+
if (rows.length > 0) {
|
|
479
|
+
hasRemainingData = true;
|
|
480
|
+
}
|
|
481
|
+
for (const record of WalStream.getQueryData(rows)) {
|
|
482
|
+
// This auto-flushes when the batch reaches its size limit
|
|
483
|
+
await batch.save({
|
|
484
|
+
tag: storage.SaveOperationTag.INSERT,
|
|
485
|
+
sourceTable: table,
|
|
486
|
+
before: undefined,
|
|
487
|
+
beforeReplicaId: undefined,
|
|
488
|
+
after: record,
|
|
489
|
+
afterReplicaId: getUuidReplicaIdentityBson(record, table.replicaIdColumns)
|
|
490
|
+
});
|
|
491
|
+
}
|
|
492
|
+
at += rows.length;
|
|
493
|
+
this.metrics.getCounter(ReplicationMetric.ROWS_REPLICATED).add(rows.length);
|
|
494
|
+
await touch();
|
|
359
495
|
}
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
496
|
+
// Important: flush before marking progress
|
|
497
|
+
await batch.flush();
|
|
498
|
+
if (limited == null) {
|
|
499
|
+
let lastKey;
|
|
500
|
+
if (q instanceof ChunkedSnapshotQuery) {
|
|
501
|
+
lastKey = q.getLastKeySerialized();
|
|
364
502
|
}
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
503
|
+
if (lastCountTime < performance.now() - 10 * 60 * 1000) {
|
|
504
|
+
// Even though we're doing the snapshot inside a transaction, the transaction uses
|
|
505
|
+
// the default "Read Committed" isolation level. This means we can get new data
|
|
506
|
+
// within the transaction, so we re-estimate the count every 10 minutes when replicating
|
|
507
|
+
// large tables.
|
|
508
|
+
totalEstimatedCount = await this.estimatedCountNumber(db, table);
|
|
509
|
+
lastCountTime = performance.now();
|
|
510
|
+
}
|
|
511
|
+
table = await batch.updateTableProgress(table, {
|
|
512
|
+
lastKey: lastKey,
|
|
513
|
+
replicatedCount: at,
|
|
514
|
+
totalEstimatedCount: totalEstimatedCount
|
|
515
|
+
});
|
|
516
|
+
this.relationCache.update(table);
|
|
517
|
+
this.logger.info(`Replicating ${table.qualifiedName} ${table.formatSnapshotProgress()}`);
|
|
370
518
|
}
|
|
371
|
-
|
|
372
|
-
|
|
519
|
+
else {
|
|
520
|
+
this.logger.info(`Replicating ${table.qualifiedName} ${at}/${limited.length} for resnapshot`);
|
|
373
521
|
}
|
|
374
|
-
|
|
375
|
-
//
|
|
376
|
-
|
|
377
|
-
tag: storage.SaveOperationTag.INSERT,
|
|
378
|
-
sourceTable: table,
|
|
379
|
-
before: undefined,
|
|
380
|
-
beforeReplicaId: undefined,
|
|
381
|
-
after: record,
|
|
382
|
-
afterReplicaId: getUuidReplicaIdentityBson(record, table.replicaIdColumns)
|
|
383
|
-
});
|
|
522
|
+
if (this.abort_signal.aborted) {
|
|
523
|
+
// We only abort after flushing
|
|
524
|
+
throw new ReplicationAbortedError(`Initial replication interrupted`);
|
|
384
525
|
}
|
|
385
|
-
at += rows.length;
|
|
386
|
-
this.metrics.getCounter(ReplicationMetric.ROWS_REPLICATED).add(rows.length);
|
|
387
|
-
await touch();
|
|
388
526
|
}
|
|
389
|
-
await batch.flush();
|
|
390
527
|
}
|
|
391
528
|
async handleRelation(batch, descriptor, snapshot) {
|
|
392
529
|
if (!descriptor.objectId && typeof descriptor.objectId != 'number') {
|
|
@@ -399,7 +536,7 @@ WHERE oid = $1::regclass`,
|
|
|
399
536
|
entity_descriptor: descriptor,
|
|
400
537
|
sync_rules: this.sync_rules
|
|
401
538
|
});
|
|
402
|
-
this.
|
|
539
|
+
this.relationCache.update(result.table);
|
|
403
540
|
// Drop conflicting tables. This includes for example renamed tables.
|
|
404
541
|
await batch.drop(result.dropTables);
|
|
405
542
|
// Snapshot if:
|
|
@@ -410,38 +547,53 @@ WHERE oid = $1::regclass`,
|
|
|
410
547
|
if (shouldSnapshot) {
|
|
411
548
|
// Truncate this table, in case a previous snapshot was interrupted.
|
|
412
549
|
await batch.truncate([result.table]);
|
|
413
|
-
let lsn = ZERO_LSN;
|
|
414
550
|
// Start the snapshot inside a transaction.
|
|
415
551
|
// We use a dedicated connection for this.
|
|
416
552
|
const db = await this.connections.snapshotConnection();
|
|
417
553
|
try {
|
|
418
|
-
await db.
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
// We have to get this LSN _after_ we have started the snapshot query.
|
|
425
|
-
const rs = await db.query(`select pg_current_wal_lsn() as lsn`);
|
|
426
|
-
lsn = rs.rows[0][0];
|
|
427
|
-
await db.query('COMMIT');
|
|
428
|
-
}
|
|
429
|
-
catch (e) {
|
|
430
|
-
await db.query('ROLLBACK');
|
|
431
|
-
// TODO: Wrap with custom error type
|
|
432
|
-
throw e;
|
|
433
|
-
}
|
|
554
|
+
const table = await this.snapshotTableInTx(batch, db, result.table);
|
|
555
|
+
// After the table snapshot, we wait for replication to catch up.
|
|
556
|
+
// To make sure there is actually something to replicate, we send a keepalive
|
|
557
|
+
// message.
|
|
558
|
+
await sendKeepAlive(db);
|
|
559
|
+
return table;
|
|
434
560
|
}
|
|
435
561
|
finally {
|
|
436
562
|
await db.end();
|
|
437
563
|
}
|
|
438
|
-
const [table] = await batch.markSnapshotDone([result.table], lsn);
|
|
439
|
-
return table;
|
|
440
564
|
}
|
|
441
565
|
return result.table;
|
|
442
566
|
}
|
|
567
|
+
/**
|
|
568
|
+
* Process rows that have missing TOAST values.
|
|
569
|
+
*
|
|
570
|
+
* This can happen during edge cases in the chunked intial snapshot process.
|
|
571
|
+
*
|
|
572
|
+
* We handle this similar to an inline table snapshot, but limited to the specific
|
|
573
|
+
* set of rows.
|
|
574
|
+
*/
|
|
575
|
+
async resnapshot(batch, rows) {
|
|
576
|
+
const byTable = new Map();
|
|
577
|
+
for (let row of rows) {
|
|
578
|
+
const relId = row.table.objectId; // always a number for postgres
|
|
579
|
+
if (!byTable.has(relId)) {
|
|
580
|
+
byTable.set(relId, []);
|
|
581
|
+
}
|
|
582
|
+
byTable.get(relId).push(row);
|
|
583
|
+
}
|
|
584
|
+
const db = await this.connections.snapshotConnection();
|
|
585
|
+
try {
|
|
586
|
+
for (let rows of byTable.values()) {
|
|
587
|
+
const table = rows[0].table;
|
|
588
|
+
await this.snapshotTableInTx(batch, db, table, rows.map((r) => r.key));
|
|
589
|
+
}
|
|
590
|
+
}
|
|
591
|
+
finally {
|
|
592
|
+
await db.end();
|
|
593
|
+
}
|
|
594
|
+
}
|
|
443
595
|
getTable(relationId) {
|
|
444
|
-
const table = this.
|
|
596
|
+
const table = this.relationCache.get(relationId);
|
|
445
597
|
if (table == null) {
|
|
446
598
|
// We should always receive a replication message before the relation is used.
|
|
447
599
|
// If we can't find it, it's a bug.
|
|
@@ -456,7 +608,7 @@ WHERE oid = $1::regclass`,
|
|
|
456
608
|
if (msg.tag == 'insert' || msg.tag == 'update' || msg.tag == 'delete') {
|
|
457
609
|
const table = this.getTable(getRelId(msg.relation));
|
|
458
610
|
if (!table.syncAny) {
|
|
459
|
-
logger.debug(`Table ${table.qualifiedName} not used in sync rules - skipping`);
|
|
611
|
+
this.logger.debug(`Table ${table.qualifiedName} not used in sync rules - skipping`);
|
|
460
612
|
return null;
|
|
461
613
|
}
|
|
462
614
|
if (msg.tag == 'insert') {
|
|
@@ -556,7 +708,36 @@ WHERE oid = $1::regclass`,
|
|
|
556
708
|
this.startedStreaming = true;
|
|
557
709
|
// Auto-activate as soon as initial replication is done
|
|
558
710
|
await this.storage.autoActivate();
|
|
559
|
-
|
|
711
|
+
let resnapshot = [];
|
|
712
|
+
const markRecordUnavailable = (record) => {
|
|
713
|
+
if (!IdSnapshotQuery.supports(record.sourceTable)) {
|
|
714
|
+
// If it's not supported, it's also safe to ignore
|
|
715
|
+
return;
|
|
716
|
+
}
|
|
717
|
+
let key = {};
|
|
718
|
+
for (let column of record.sourceTable.replicaIdColumns) {
|
|
719
|
+
const name = column.name;
|
|
720
|
+
const value = record.after[name];
|
|
721
|
+
if (value == null) {
|
|
722
|
+
// We don't expect this to actually happen.
|
|
723
|
+
// The key should always be present in the "after" record.
|
|
724
|
+
return;
|
|
725
|
+
}
|
|
726
|
+
key[name] = value;
|
|
727
|
+
}
|
|
728
|
+
resnapshot.push({
|
|
729
|
+
table: record.sourceTable,
|
|
730
|
+
key: key
|
|
731
|
+
});
|
|
732
|
+
};
|
|
733
|
+
await this.storage.startBatch({
|
|
734
|
+
logger: this.logger,
|
|
735
|
+
zeroLSN: ZERO_LSN,
|
|
736
|
+
defaultSchema: POSTGRES_DEFAULT_SCHEMA,
|
|
737
|
+
storeCurrentData: true,
|
|
738
|
+
skipExistingRows: false,
|
|
739
|
+
markRecordUnavailable
|
|
740
|
+
}, async (batch) => {
|
|
560
741
|
// We don't handle any plain keepalive messages while we have transactions.
|
|
561
742
|
// While we have transactions, we use that to advance the position.
|
|
562
743
|
// Replication never starts in the middle of a transaction, so this starts as false.
|
|
@@ -585,6 +766,9 @@ WHERE oid = $1::regclass`,
|
|
|
585
766
|
else if (msg.tag == 'begin') {
|
|
586
767
|
// This may span multiple transactions in the same chunk, or even across chunks.
|
|
587
768
|
skipKeepalive = true;
|
|
769
|
+
if (this.oldestUncommittedChange == null) {
|
|
770
|
+
this.oldestUncommittedChange = new Date(Number(msg.commitTime / 1000n));
|
|
771
|
+
}
|
|
588
772
|
}
|
|
589
773
|
else if (msg.tag == 'commit') {
|
|
590
774
|
this.metrics.getCounter(ReplicationMetric.TRANSACTIONS_REPLICATED).add(1);
|
|
@@ -593,13 +777,30 @@ WHERE oid = $1::regclass`,
|
|
|
593
777
|
// This effectively lets us batch multiple transactions within the same chunk
|
|
594
778
|
// into a single flush, increasing throughput for many small transactions.
|
|
595
779
|
skipKeepalive = false;
|
|
596
|
-
|
|
780
|
+
// flush() must be before the resnapshot check - that is
|
|
781
|
+
// typically what reports the resnapshot records.
|
|
782
|
+
await batch.flush({ oldestUncommittedChange: this.oldestUncommittedChange });
|
|
783
|
+
// This _must_ be checked after the flush(), and before
|
|
784
|
+
// commit() or ack(). We never persist the resnapshot list,
|
|
785
|
+
// so we have to process it before marking our progress.
|
|
786
|
+
if (resnapshot.length > 0) {
|
|
787
|
+
await this.resnapshot(batch, resnapshot);
|
|
788
|
+
resnapshot = [];
|
|
789
|
+
}
|
|
790
|
+
const didCommit = await batch.commit(msg.lsn, {
|
|
791
|
+
createEmptyCheckpoints,
|
|
792
|
+
oldestUncommittedChange: this.oldestUncommittedChange
|
|
793
|
+
});
|
|
597
794
|
await this.ack(msg.lsn, replicationStream);
|
|
795
|
+
if (didCommit) {
|
|
796
|
+
this.oldestUncommittedChange = null;
|
|
797
|
+
this.isStartingReplication = false;
|
|
798
|
+
}
|
|
598
799
|
}
|
|
599
800
|
}
|
|
600
801
|
else {
|
|
601
802
|
if (count % 100 == 0) {
|
|
602
|
-
logger.info(
|
|
803
|
+
this.logger.info(`Replicating op ${count} ${msg.lsn}`);
|
|
603
804
|
}
|
|
604
805
|
/**
|
|
605
806
|
* If we can see the contents of logical messages, then we can check if a keepalive
|
|
@@ -611,7 +812,14 @@ WHERE oid = $1::regclass`,
|
|
|
611
812
|
keepAliveDetected = true;
|
|
612
813
|
}
|
|
613
814
|
count += 1;
|
|
614
|
-
await this.writeChange(batch, msg);
|
|
815
|
+
const flushResult = await this.writeChange(batch, msg);
|
|
816
|
+
if (flushResult != null && resnapshot.length > 0) {
|
|
817
|
+
// If we have large transactions, we also need to flush the resnapshot list
|
|
818
|
+
// periodically.
|
|
819
|
+
// TODO: make sure this bit is actually triggered
|
|
820
|
+
await this.resnapshot(batch, resnapshot);
|
|
821
|
+
resnapshot = [];
|
|
822
|
+
}
|
|
615
823
|
}
|
|
616
824
|
}
|
|
617
825
|
if (!skipKeepalive) {
|
|
@@ -623,7 +831,11 @@ WHERE oid = $1::regclass`,
|
|
|
623
831
|
// Big caveat: This _must not_ be used to skip individual messages, since this LSN
|
|
624
832
|
// may be in the middle of the next transaction.
|
|
625
833
|
// It must only be used to associate checkpoints with LSNs.
|
|
626
|
-
await batch.keepalive(chunkLastLsn);
|
|
834
|
+
const didCommit = await batch.keepalive(chunkLastLsn);
|
|
835
|
+
if (didCommit) {
|
|
836
|
+
this.oldestUncommittedChange = null;
|
|
837
|
+
}
|
|
838
|
+
this.isStartingReplication = false;
|
|
627
839
|
}
|
|
628
840
|
// We receive chunks with empty messages often (about each second).
|
|
629
841
|
// Acknowledging here progresses the slot past these and frees up resources.
|
|
@@ -649,7 +861,8 @@ WHERE oid = $1::regclass`,
|
|
|
649
861
|
if (storageIdentifier.type != lib_postgres.POSTGRES_CONNECTION_TYPE) {
|
|
650
862
|
return {
|
|
651
863
|
// Keep the same behaviour as before allowing Postgres storage.
|
|
652
|
-
createEmptyCheckpoints: true
|
|
864
|
+
createEmptyCheckpoints: true,
|
|
865
|
+
oldestUncommittedChange: null
|
|
653
866
|
};
|
|
654
867
|
}
|
|
655
868
|
const parsedStorageIdentifier = lib_postgres.utils.decodePostgresSystemIdentifier(storageIdentifier.id);
|
|
@@ -665,7 +878,8 @@ WHERE oid = $1::regclass`,
|
|
|
665
878
|
* Don't create empty checkpoints if the same Postgres database is used for the data source
|
|
666
879
|
* and sync bucket storage. Creating empty checkpoints will cause WAL feedback loops.
|
|
667
880
|
*/
|
|
668
|
-
createEmptyCheckpoints: replicationIdentifier.database_name != parsedStorageIdentifier.database_name
|
|
881
|
+
createEmptyCheckpoints: replicationIdentifier.database_name != parsedStorageIdentifier.database_name,
|
|
882
|
+
oldestUncommittedChange: null
|
|
669
883
|
};
|
|
670
884
|
}
|
|
671
885
|
/**
|
|
@@ -676,6 +890,19 @@ WHERE oid = $1::regclass`,
|
|
|
676
890
|
const version = await this.connections.getServerVersion();
|
|
677
891
|
return version ? version.compareMain('14.0.0') >= 0 : false;
|
|
678
892
|
}
|
|
893
|
+
async getReplicationLagMillis() {
|
|
894
|
+
if (this.oldestUncommittedChange == null) {
|
|
895
|
+
if (this.isStartingReplication) {
|
|
896
|
+
// We don't have anything to compute replication lag with yet.
|
|
897
|
+
return undefined;
|
|
898
|
+
}
|
|
899
|
+
else {
|
|
900
|
+
// We don't have any uncommitted changes, so replication is up-to-date.
|
|
901
|
+
return 0;
|
|
902
|
+
}
|
|
903
|
+
}
|
|
904
|
+
return Date.now() - this.oldestUncommittedChange.getTime();
|
|
905
|
+
}
|
|
679
906
|
}
|
|
680
907
|
async function touch() {
|
|
681
908
|
// FIXME: The hosted Kubernetes probe does not actually check the timestamp on this.
|