@powersync/service-module-postgres 0.0.0-dev-20250507154604 → 0.0.0-dev-20250611110033
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +47 -8
- package/dist/api/PostgresRouteAPIAdapter.d.ts +1 -1
- package/dist/api/PostgresRouteAPIAdapter.js +5 -1
- package/dist/api/PostgresRouteAPIAdapter.js.map +1 -1
- package/dist/auth/SupabaseKeyCollector.d.ts +3 -10
- package/dist/auth/SupabaseKeyCollector.js +6 -4
- package/dist/auth/SupabaseKeyCollector.js.map +1 -1
- package/dist/replication/SnapshotQuery.d.ts +75 -0
- package/dist/replication/SnapshotQuery.js +172 -0
- package/dist/replication/SnapshotQuery.js.map +1 -0
- package/dist/replication/WalStream.d.ts +37 -4
- package/dist/replication/WalStream.js +284 -88
- package/dist/replication/WalStream.js.map +1 -1
- package/dist/replication/WalStreamReplicationJob.d.ts +2 -0
- package/dist/replication/WalStreamReplicationJob.js +10 -3
- package/dist/replication/WalStreamReplicationJob.js.map +1 -1
- package/dist/replication/WalStreamReplicator.d.ts +1 -0
- package/dist/replication/WalStreamReplicator.js +22 -0
- package/dist/replication/WalStreamReplicator.js.map +1 -1
- package/package.json +12 -12
- package/src/api/PostgresRouteAPIAdapter.ts +5 -1
- package/src/auth/SupabaseKeyCollector.ts +14 -5
- package/src/replication/SnapshotQuery.ts +206 -0
- package/src/replication/WalStream.ts +338 -95
- package/src/replication/WalStreamReplicationJob.ts +11 -3
- package/src/replication/WalStreamReplicator.ts +26 -0
- package/test/src/__snapshots__/schema_changes.test.ts.snap +2 -2
- package/test/src/checkpoints.test.ts +10 -3
- package/test/src/chunked_snapshots.test.ts +156 -0
- package/test/src/large_batch.test.ts +5 -154
- package/test/src/resuming_snapshots.test.ts +150 -0
- package/test/src/schema_changes.test.ts +5 -10
- package/test/src/slow_tests.test.ts +13 -30
- package/test/src/util.ts +12 -1
- package/test/src/validation.test.ts +0 -1
- package/test/src/wal_stream.test.ts +4 -9
- package/test/src/wal_stream_utils.ts +15 -7
- package/tsconfig.tsbuildinfo +1 -1
|
@@ -1,12 +1,13 @@
|
|
|
1
1
|
import * as lib_postgres from '@powersync/lib-service-postgres';
|
|
2
|
-
import { container, DatabaseConnectionError, ErrorCode, errors, logger
|
|
3
|
-
import { getUuidReplicaIdentityBson, storage } from '@powersync/service-core';
|
|
2
|
+
import { container, DatabaseConnectionError, ErrorCode, errors, logger as defaultLogger, ReplicationAssertionError, ReplicationAbortedError } from '@powersync/lib-services-framework';
|
|
3
|
+
import { getUuidReplicaIdentityBson, RelationCache, storage } from '@powersync/service-core';
|
|
4
4
|
import * as pgwire from '@powersync/service-jpgwire';
|
|
5
5
|
import { toSyncRulesRow } from '@powersync/service-sync-rules';
|
|
6
6
|
import * as pg_utils from '../utils/pgwire_utils.js';
|
|
7
7
|
import { getPgOutputRelation, getRelId } from './PgRelation.js';
|
|
8
8
|
import { checkSourceConfiguration, getReplicationIdentityColumns } from './replication-utils.js';
|
|
9
9
|
import { ReplicationMetric } from '@powersync/service-types';
|
|
10
|
+
import { ChunkedSnapshotQuery, IdSnapshotQuery, SimpleSnapshotQuery } from './SnapshotQuery.js';
|
|
10
11
|
export const ZERO_LSN = '00000000/00000000';
|
|
11
12
|
export const PUBLICATION_NAME = 'powersync';
|
|
12
13
|
export const POSTGRES_DEFAULT_SCHEMA = 'public';
|
|
@@ -39,20 +40,39 @@ export class WalStream {
|
|
|
39
40
|
sync_rules;
|
|
40
41
|
group_id;
|
|
41
42
|
connection_id = 1;
|
|
43
|
+
logger;
|
|
42
44
|
storage;
|
|
43
45
|
metrics;
|
|
44
46
|
slot_name;
|
|
45
47
|
connections;
|
|
46
48
|
abort_signal;
|
|
47
|
-
|
|
49
|
+
relationCache = new RelationCache((relation) => {
|
|
50
|
+
if (typeof relation == 'number') {
|
|
51
|
+
return relation;
|
|
52
|
+
}
|
|
53
|
+
return relation.objectId;
|
|
54
|
+
});
|
|
48
55
|
startedStreaming = false;
|
|
56
|
+
snapshotChunkLength;
|
|
57
|
+
/**
|
|
58
|
+
* Time of the oldest uncommitted change, according to the source db.
|
|
59
|
+
* This is used to determine the replication lag.
|
|
60
|
+
*/
|
|
61
|
+
oldestUncommittedChange = null;
|
|
62
|
+
/**
|
|
63
|
+
* Keep track of whether we have done a commit or keepalive yet.
|
|
64
|
+
* We can only compute replication lag if isStartingReplication == false, or oldestUncommittedChange is present.
|
|
65
|
+
*/
|
|
66
|
+
isStartingReplication = true;
|
|
49
67
|
constructor(options) {
|
|
68
|
+
this.logger = options.logger ?? defaultLogger;
|
|
50
69
|
this.storage = options.storage;
|
|
51
70
|
this.metrics = options.metrics;
|
|
52
71
|
this.sync_rules = options.storage.getParsedSyncRules({ defaultSchema: POSTGRES_DEFAULT_SCHEMA });
|
|
53
72
|
this.group_id = options.storage.group_id;
|
|
54
73
|
this.slot_name = options.storage.slot_name;
|
|
55
74
|
this.connections = options.connections;
|
|
75
|
+
this.snapshotChunkLength = options.snapshotChunkLength ?? 10_000;
|
|
56
76
|
this.abort_signal = options.abort_signal;
|
|
57
77
|
this.abort_signal.addEventListener('abort', () => {
|
|
58
78
|
if (this.startedStreaming) {
|
|
@@ -62,7 +82,7 @@ export class WalStream {
|
|
|
62
82
|
const promise = sendKeepAlive(this.connections.pool);
|
|
63
83
|
promise.catch((e) => {
|
|
64
84
|
// Failures here are okay - this only speeds up stopping the process.
|
|
65
|
-
logger.warn('Failed to ping connection', e);
|
|
85
|
+
this.logger.warn('Failed to ping connection', e);
|
|
66
86
|
});
|
|
67
87
|
}
|
|
68
88
|
else {
|
|
@@ -130,7 +150,7 @@ export class WalStream {
|
|
|
130
150
|
]
|
|
131
151
|
});
|
|
132
152
|
if (rs.rows.length == 0) {
|
|
133
|
-
logger.info(`Skipping ${tablePattern.schema}.${name} - not part of ${PUBLICATION_NAME} publication`);
|
|
153
|
+
this.logger.info(`Skipping ${tablePattern.schema}.${name} - not part of ${PUBLICATION_NAME} publication`);
|
|
134
154
|
continue;
|
|
135
155
|
}
|
|
136
156
|
const cresult = await getReplicationIdentityColumns(db, relid);
|
|
@@ -152,7 +172,7 @@ export class WalStream {
|
|
|
152
172
|
const snapshotDone = status.snapshot_done && status.checkpoint_lsn != null;
|
|
153
173
|
if (snapshotDone) {
|
|
154
174
|
// Snapshot is done, but we still need to check the replication slot status
|
|
155
|
-
logger.info(
|
|
175
|
+
this.logger.info(`Initial replication already done`);
|
|
156
176
|
}
|
|
157
177
|
// Check if replication slot exists
|
|
158
178
|
const rs = await this.connections.pool.query({
|
|
@@ -208,7 +228,7 @@ export class WalStream {
|
|
|
208
228
|
// We peek a large number of changes here, to make it more likely to pick up replication slot errors.
|
|
209
229
|
// For example, "publication does not exist" only occurs here if the peek actually includes changes related
|
|
210
230
|
// to the slot.
|
|
211
|
-
logger.info(`Checking ${slotName}`);
|
|
231
|
+
this.logger.info(`Checking ${slotName}`);
|
|
212
232
|
// The actual results can be quite large, so we don't actually return everything
|
|
213
233
|
// due to memory and processing overhead that would create.
|
|
214
234
|
const cursor = await this.connections.pool.stream({
|
|
@@ -222,12 +242,12 @@ export class WalStream {
|
|
|
222
242
|
// No-op, just exhaust the cursor
|
|
223
243
|
}
|
|
224
244
|
// Success
|
|
225
|
-
logger.info(`Slot ${slotName} appears healthy`);
|
|
245
|
+
this.logger.info(`Slot ${slotName} appears healthy`);
|
|
226
246
|
return { needsNewSlot: false };
|
|
227
247
|
}
|
|
228
248
|
catch (e) {
|
|
229
249
|
last_error = e;
|
|
230
|
-
logger.warn(
|
|
250
|
+
this.logger.warn(`Replication slot error`, e);
|
|
231
251
|
if (this.stopped) {
|
|
232
252
|
throw e;
|
|
233
253
|
}
|
|
@@ -249,7 +269,7 @@ export class WalStream {
|
|
|
249
269
|
// Sample: publication "powersync" does not exist
|
|
250
270
|
// Happens when publication deleted or never created.
|
|
251
271
|
// Slot must be re-created in this case.
|
|
252
|
-
logger.info(`${slotName} is not valid anymore`);
|
|
272
|
+
this.logger.info(`${slotName} is not valid anymore`);
|
|
253
273
|
return { needsNewSlot: true };
|
|
254
274
|
}
|
|
255
275
|
// Try again after a pause
|
|
@@ -258,7 +278,7 @@ export class WalStream {
|
|
|
258
278
|
}
|
|
259
279
|
throw new ReplicationAssertionError('Unreachable');
|
|
260
280
|
}
|
|
261
|
-
async
|
|
281
|
+
async estimatedCountNumber(db, table) {
|
|
262
282
|
const results = await db.query({
|
|
263
283
|
statement: `SELECT reltuples::bigint AS estimate
|
|
264
284
|
FROM pg_class
|
|
@@ -267,10 +287,10 @@ WHERE oid = $1::regclass`,
|
|
|
267
287
|
});
|
|
268
288
|
const row = results.rows[0];
|
|
269
289
|
if ((row?.[0] ?? -1n) == -1n) {
|
|
270
|
-
return
|
|
290
|
+
return -1;
|
|
271
291
|
}
|
|
272
292
|
else {
|
|
273
|
-
return
|
|
293
|
+
return Number(row[0]);
|
|
274
294
|
}
|
|
275
295
|
}
|
|
276
296
|
/**
|
|
@@ -298,27 +318,39 @@ WHERE oid = $1::regclass`,
|
|
|
298
318
|
// We use the replication connection here, not a pool.
|
|
299
319
|
// The replication slot must be created before we start snapshotting tables.
|
|
300
320
|
await replicationConnection.query(`CREATE_REPLICATION_SLOT ${slotName} LOGICAL pgoutput`);
|
|
301
|
-
logger.info(`Created replication slot ${slotName}`);
|
|
321
|
+
this.logger.info(`Created replication slot ${slotName}`);
|
|
302
322
|
}
|
|
303
323
|
await this.initialReplication(db);
|
|
304
324
|
}
|
|
305
325
|
async initialReplication(db) {
|
|
306
326
|
const sourceTables = this.sync_rules.getSourceTables();
|
|
307
|
-
await this.storage.startBatch({
|
|
327
|
+
await this.storage.startBatch({
|
|
328
|
+
logger: this.logger,
|
|
329
|
+
zeroLSN: ZERO_LSN,
|
|
330
|
+
defaultSchema: POSTGRES_DEFAULT_SCHEMA,
|
|
331
|
+
storeCurrentData: true,
|
|
332
|
+
skipExistingRows: true
|
|
333
|
+
}, async (batch) => {
|
|
334
|
+
let tablesWithStatus = [];
|
|
308
335
|
for (let tablePattern of sourceTables) {
|
|
309
336
|
const tables = await this.getQualifiedTableNames(batch, db, tablePattern);
|
|
337
|
+
// Pre-get counts
|
|
310
338
|
for (let table of tables) {
|
|
311
339
|
if (table.snapshotComplete) {
|
|
312
|
-
logger.info(
|
|
340
|
+
this.logger.info(`Skipping ${table.qualifiedName} - snapshot already done`);
|
|
313
341
|
continue;
|
|
314
342
|
}
|
|
315
|
-
await this.
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
343
|
+
const count = await this.estimatedCountNumber(db, table);
|
|
344
|
+
table = await batch.updateTableProgress(table, { totalEstimatedCount: count });
|
|
345
|
+
this.relationCache.update(table);
|
|
346
|
+
tablesWithStatus.push(table);
|
|
347
|
+
this.logger.info(`To replicate: ${table.qualifiedName} ${table.formatSnapshotProgress()}`);
|
|
320
348
|
}
|
|
321
349
|
}
|
|
350
|
+
for (let table of tablesWithStatus) {
|
|
351
|
+
await this.snapshotTableInTx(batch, db, table);
|
|
352
|
+
await touch();
|
|
353
|
+
}
|
|
322
354
|
// Always commit the initial snapshot at zero.
|
|
323
355
|
// This makes sure we don't skip any changes applied before starting this snapshot,
|
|
324
356
|
// in the case of snapshot retries.
|
|
@@ -340,53 +372,130 @@ WHERE oid = $1::regclass`,
|
|
|
340
372
|
yield toSyncRulesRow(row);
|
|
341
373
|
}
|
|
342
374
|
}
|
|
343
|
-
async
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
375
|
+
async snapshotTableInTx(batch, db, table, limited) {
|
|
376
|
+
await db.query('BEGIN');
|
|
377
|
+
try {
|
|
378
|
+
let tableLsnNotBefore;
|
|
379
|
+
await this.snapshotTable(batch, db, table, limited);
|
|
380
|
+
// Get the current LSN.
|
|
381
|
+
// The data will only be consistent once incremental replication
|
|
382
|
+
// has passed that point.
|
|
383
|
+
// We have to get this LSN _after_ we have started the snapshot query.
|
|
384
|
+
const rs = await db.query(`select pg_current_wal_lsn() as lsn`);
|
|
385
|
+
tableLsnNotBefore = rs.rows[0][0];
|
|
386
|
+
await db.query('COMMIT');
|
|
387
|
+
const [resultTable] = await batch.markSnapshotDone([table], tableLsnNotBefore);
|
|
388
|
+
this.relationCache.update(resultTable);
|
|
389
|
+
return resultTable;
|
|
390
|
+
}
|
|
391
|
+
catch (e) {
|
|
392
|
+
await db.query('ROLLBACK');
|
|
393
|
+
throw e;
|
|
394
|
+
}
|
|
395
|
+
}
|
|
396
|
+
async snapshotTable(batch, db, table, limited) {
|
|
397
|
+
let totalEstimatedCount = table.snapshotStatus?.totalEstimatedCount;
|
|
398
|
+
let at = table.snapshotStatus?.replicatedCount ?? 0;
|
|
399
|
+
let lastCountTime = 0;
|
|
400
|
+
let q;
|
|
401
|
+
// We do streaming on two levels:
|
|
402
|
+
// 1. Coarse level: DELCARE CURSOR, FETCH 10000 at a time.
|
|
403
|
+
// 2. Fine level: Stream chunks from each fetch call.
|
|
404
|
+
if (limited) {
|
|
405
|
+
q = new IdSnapshotQuery(db, table, limited);
|
|
406
|
+
}
|
|
407
|
+
else if (ChunkedSnapshotQuery.supports(table)) {
|
|
408
|
+
// Single primary key - we can use the primary key for chunking
|
|
409
|
+
const orderByKey = table.replicaIdColumns[0];
|
|
410
|
+
q = new ChunkedSnapshotQuery(db, table, this.snapshotChunkLength, table.snapshotStatus?.lastKey ?? null);
|
|
411
|
+
if (table.snapshotStatus?.lastKey != null) {
|
|
412
|
+
this.logger.info(`Replicating ${table.qualifiedName} ${table.formatSnapshotProgress()} - resuming from ${orderByKey.name} > ${q.lastKey}`);
|
|
413
|
+
}
|
|
414
|
+
else {
|
|
415
|
+
this.logger.info(`Replicating ${table.qualifiedName} ${table.formatSnapshotProgress()} - resumable`);
|
|
416
|
+
}
|
|
417
|
+
}
|
|
418
|
+
else {
|
|
419
|
+
// Fallback case - query the entire table
|
|
420
|
+
this.logger.info(`Replicating ${table.qualifiedName} ${table.formatSnapshotProgress()} - not resumable`);
|
|
421
|
+
q = new SimpleSnapshotQuery(db, table, this.snapshotChunkLength);
|
|
422
|
+
at = 0;
|
|
423
|
+
}
|
|
424
|
+
await q.initialize();
|
|
349
425
|
let columns = [];
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
426
|
+
let hasRemainingData = true;
|
|
427
|
+
while (hasRemainingData) {
|
|
428
|
+
// Fetch 10k at a time.
|
|
429
|
+
// The balance here is between latency overhead per FETCH call,
|
|
430
|
+
// and not spending too much time on each FETCH call.
|
|
431
|
+
// We aim for a couple of seconds on each FETCH call.
|
|
432
|
+
const cursor = q.nextChunk();
|
|
433
|
+
hasRemainingData = false;
|
|
434
|
+
// pgwire streams rows in chunks.
|
|
435
|
+
// These chunks can be quite small (as little as 16KB), so we don't flush chunks automatically.
|
|
436
|
+
// There are typically 100-200 rows per chunk.
|
|
437
|
+
for await (let chunk of cursor) {
|
|
438
|
+
if (chunk.tag == 'RowDescription') {
|
|
439
|
+
// We get a RowDescription for each FETCH call, but they should
|
|
440
|
+
// all be the same.
|
|
441
|
+
let i = 0;
|
|
442
|
+
columns = chunk.payload.map((c) => {
|
|
443
|
+
return { i: i++, name: c.name };
|
|
444
|
+
});
|
|
445
|
+
continue;
|
|
446
|
+
}
|
|
447
|
+
const rows = chunk.rows.map((row) => {
|
|
448
|
+
let q = {};
|
|
449
|
+
for (let c of columns) {
|
|
450
|
+
q[c.name] = row[c.i];
|
|
451
|
+
}
|
|
452
|
+
return q;
|
|
357
453
|
});
|
|
358
|
-
|
|
454
|
+
if (rows.length > 0) {
|
|
455
|
+
hasRemainingData = true;
|
|
456
|
+
}
|
|
457
|
+
for (const record of WalStream.getQueryData(rows)) {
|
|
458
|
+
// This auto-flushes when the batch reaches its size limit
|
|
459
|
+
await batch.save({
|
|
460
|
+
tag: storage.SaveOperationTag.INSERT,
|
|
461
|
+
sourceTable: table,
|
|
462
|
+
before: undefined,
|
|
463
|
+
beforeReplicaId: undefined,
|
|
464
|
+
after: record,
|
|
465
|
+
afterReplicaId: getUuidReplicaIdentityBson(record, table.replicaIdColumns)
|
|
466
|
+
});
|
|
467
|
+
}
|
|
468
|
+
at += rows.length;
|
|
469
|
+
this.metrics.getCounter(ReplicationMetric.ROWS_REPLICATED).add(rows.length);
|
|
470
|
+
await touch();
|
|
359
471
|
}
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
472
|
+
// Important: flush before marking progress
|
|
473
|
+
await batch.flush();
|
|
474
|
+
if (limited == null) {
|
|
475
|
+
let lastKey;
|
|
476
|
+
if (q instanceof ChunkedSnapshotQuery) {
|
|
477
|
+
lastKey = q.getLastKeySerialized();
|
|
364
478
|
}
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
479
|
+
if (lastCountTime < performance.now() - 10 * 60 * 1000) {
|
|
480
|
+
totalEstimatedCount = await this.estimatedCountNumber(db, table);
|
|
481
|
+
lastCountTime = performance.now();
|
|
482
|
+
}
|
|
483
|
+
table = await batch.updateTableProgress(table, {
|
|
484
|
+
lastKey: lastKey,
|
|
485
|
+
replicatedCount: at,
|
|
486
|
+
totalEstimatedCount: totalEstimatedCount
|
|
487
|
+
});
|
|
488
|
+
this.relationCache.update(table);
|
|
489
|
+
this.logger.info(`Replicating ${table.qualifiedName} ${table.formatSnapshotProgress()}`);
|
|
370
490
|
}
|
|
371
|
-
|
|
372
|
-
|
|
491
|
+
else {
|
|
492
|
+
this.logger.info(`Replicating ${table.qualifiedName} ${at}/${limited.length} for resnapshot`);
|
|
373
493
|
}
|
|
374
|
-
|
|
375
|
-
//
|
|
376
|
-
|
|
377
|
-
tag: storage.SaveOperationTag.INSERT,
|
|
378
|
-
sourceTable: table,
|
|
379
|
-
before: undefined,
|
|
380
|
-
beforeReplicaId: undefined,
|
|
381
|
-
after: record,
|
|
382
|
-
afterReplicaId: getUuidReplicaIdentityBson(record, table.replicaIdColumns)
|
|
383
|
-
});
|
|
494
|
+
if (this.abort_signal.aborted) {
|
|
495
|
+
// We only abort after flushing
|
|
496
|
+
throw new ReplicationAbortedError(`Initial replication interrupted`);
|
|
384
497
|
}
|
|
385
|
-
at += rows.length;
|
|
386
|
-
this.metrics.getCounter(ReplicationMetric.ROWS_REPLICATED).add(rows.length);
|
|
387
|
-
await touch();
|
|
388
498
|
}
|
|
389
|
-
await batch.flush();
|
|
390
499
|
}
|
|
391
500
|
async handleRelation(batch, descriptor, snapshot) {
|
|
392
501
|
if (!descriptor.objectId && typeof descriptor.objectId != 'number') {
|
|
@@ -399,7 +508,7 @@ WHERE oid = $1::regclass`,
|
|
|
399
508
|
entity_descriptor: descriptor,
|
|
400
509
|
sync_rules: this.sync_rules
|
|
401
510
|
});
|
|
402
|
-
this.
|
|
511
|
+
this.relationCache.update(result.table);
|
|
403
512
|
// Drop conflicting tables. This includes for example renamed tables.
|
|
404
513
|
await batch.drop(result.dropTables);
|
|
405
514
|
// Snapshot if:
|
|
@@ -410,38 +519,53 @@ WHERE oid = $1::regclass`,
|
|
|
410
519
|
if (shouldSnapshot) {
|
|
411
520
|
// Truncate this table, in case a previous snapshot was interrupted.
|
|
412
521
|
await batch.truncate([result.table]);
|
|
413
|
-
let lsn = ZERO_LSN;
|
|
414
522
|
// Start the snapshot inside a transaction.
|
|
415
523
|
// We use a dedicated connection for this.
|
|
416
524
|
const db = await this.connections.snapshotConnection();
|
|
417
525
|
try {
|
|
418
|
-
await db.
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
// We have to get this LSN _after_ we have started the snapshot query.
|
|
425
|
-
const rs = await db.query(`select pg_current_wal_lsn() as lsn`);
|
|
426
|
-
lsn = rs.rows[0][0];
|
|
427
|
-
await db.query('COMMIT');
|
|
428
|
-
}
|
|
429
|
-
catch (e) {
|
|
430
|
-
await db.query('ROLLBACK');
|
|
431
|
-
// TODO: Wrap with custom error type
|
|
432
|
-
throw e;
|
|
433
|
-
}
|
|
526
|
+
const table = await this.snapshotTableInTx(batch, db, result.table);
|
|
527
|
+
// After the table snapshot, we wait for replication to catch up.
|
|
528
|
+
// To make sure there is actually something to replicate, we send a keepalive
|
|
529
|
+
// message.
|
|
530
|
+
await sendKeepAlive(db);
|
|
531
|
+
return table;
|
|
434
532
|
}
|
|
435
533
|
finally {
|
|
436
534
|
await db.end();
|
|
437
535
|
}
|
|
438
|
-
const [table] = await batch.markSnapshotDone([result.table], lsn);
|
|
439
|
-
return table;
|
|
440
536
|
}
|
|
441
537
|
return result.table;
|
|
442
538
|
}
|
|
539
|
+
/**
|
|
540
|
+
* Process rows that have missing TOAST values.
|
|
541
|
+
*
|
|
542
|
+
* This can happen during edge cases in the chunked intial snapshot process.
|
|
543
|
+
*
|
|
544
|
+
* We handle this similar to an inline table snapshot, but limited to the specific
|
|
545
|
+
* set of rows.
|
|
546
|
+
*/
|
|
547
|
+
async resnapshot(batch, rows) {
|
|
548
|
+
const byTable = new Map();
|
|
549
|
+
for (let row of rows) {
|
|
550
|
+
const relId = row.table.objectId; // always a number for postgres
|
|
551
|
+
if (!byTable.has(relId)) {
|
|
552
|
+
byTable.set(relId, []);
|
|
553
|
+
}
|
|
554
|
+
byTable.get(relId).push(row);
|
|
555
|
+
}
|
|
556
|
+
const db = await this.connections.snapshotConnection();
|
|
557
|
+
try {
|
|
558
|
+
for (let rows of byTable.values()) {
|
|
559
|
+
const table = rows[0].table;
|
|
560
|
+
await this.snapshotTableInTx(batch, db, table, rows.map((r) => r.key));
|
|
561
|
+
}
|
|
562
|
+
}
|
|
563
|
+
finally {
|
|
564
|
+
await db.end();
|
|
565
|
+
}
|
|
566
|
+
}
|
|
443
567
|
getTable(relationId) {
|
|
444
|
-
const table = this.
|
|
568
|
+
const table = this.relationCache.get(relationId);
|
|
445
569
|
if (table == null) {
|
|
446
570
|
// We should always receive a replication message before the relation is used.
|
|
447
571
|
// If we can't find it, it's a bug.
|
|
@@ -456,7 +580,7 @@ WHERE oid = $1::regclass`,
|
|
|
456
580
|
if (msg.tag == 'insert' || msg.tag == 'update' || msg.tag == 'delete') {
|
|
457
581
|
const table = this.getTable(getRelId(msg.relation));
|
|
458
582
|
if (!table.syncAny) {
|
|
459
|
-
logger.debug(`Table ${table.qualifiedName} not used in sync rules - skipping`);
|
|
583
|
+
this.logger.debug(`Table ${table.qualifiedName} not used in sync rules - skipping`);
|
|
460
584
|
return null;
|
|
461
585
|
}
|
|
462
586
|
if (msg.tag == 'insert') {
|
|
@@ -556,7 +680,36 @@ WHERE oid = $1::regclass`,
|
|
|
556
680
|
this.startedStreaming = true;
|
|
557
681
|
// Auto-activate as soon as initial replication is done
|
|
558
682
|
await this.storage.autoActivate();
|
|
559
|
-
|
|
683
|
+
let resnapshot = [];
|
|
684
|
+
const markRecordUnavailable = (record) => {
|
|
685
|
+
if (!IdSnapshotQuery.supports(record.sourceTable)) {
|
|
686
|
+
// If it's not supported, it's also safe to ignore
|
|
687
|
+
return;
|
|
688
|
+
}
|
|
689
|
+
let key = {};
|
|
690
|
+
for (let column of record.sourceTable.replicaIdColumns) {
|
|
691
|
+
const name = column.name;
|
|
692
|
+
const value = record.after[name];
|
|
693
|
+
if (value == null) {
|
|
694
|
+
// We don't expect this to actually happen.
|
|
695
|
+
// The key should always be present in the "after" record.
|
|
696
|
+
return;
|
|
697
|
+
}
|
|
698
|
+
key[name] = value;
|
|
699
|
+
}
|
|
700
|
+
resnapshot.push({
|
|
701
|
+
table: record.sourceTable,
|
|
702
|
+
key: key
|
|
703
|
+
});
|
|
704
|
+
};
|
|
705
|
+
await this.storage.startBatch({
|
|
706
|
+
logger: this.logger,
|
|
707
|
+
zeroLSN: ZERO_LSN,
|
|
708
|
+
defaultSchema: POSTGRES_DEFAULT_SCHEMA,
|
|
709
|
+
storeCurrentData: true,
|
|
710
|
+
skipExistingRows: false,
|
|
711
|
+
markRecordUnavailable
|
|
712
|
+
}, async (batch) => {
|
|
560
713
|
// We don't handle any plain keepalive messages while we have transactions.
|
|
561
714
|
// While we have transactions, we use that to advance the position.
|
|
562
715
|
// Replication never starts in the middle of a transaction, so this starts as false.
|
|
@@ -585,6 +738,9 @@ WHERE oid = $1::regclass`,
|
|
|
585
738
|
else if (msg.tag == 'begin') {
|
|
586
739
|
// This may span multiple transactions in the same chunk, or even across chunks.
|
|
587
740
|
skipKeepalive = true;
|
|
741
|
+
if (this.oldestUncommittedChange == null) {
|
|
742
|
+
this.oldestUncommittedChange = new Date(Number(msg.commitTime / 1000n));
|
|
743
|
+
}
|
|
588
744
|
}
|
|
589
745
|
else if (msg.tag == 'commit') {
|
|
590
746
|
this.metrics.getCounter(ReplicationMetric.TRANSACTIONS_REPLICATED).add(1);
|
|
@@ -593,13 +749,30 @@ WHERE oid = $1::regclass`,
|
|
|
593
749
|
// This effectively lets us batch multiple transactions within the same chunk
|
|
594
750
|
// into a single flush, increasing throughput for many small transactions.
|
|
595
751
|
skipKeepalive = false;
|
|
596
|
-
|
|
752
|
+
// flush() must be before the resnapshot check - that is
|
|
753
|
+
// typically what reports the resnapshot records.
|
|
754
|
+
await batch.flush();
|
|
755
|
+
// This _must_ be checked after the flush(), and before
|
|
756
|
+
// commit() or ack(). We never persist the resnapshot list,
|
|
757
|
+
// so we have to process it before marking our progress.
|
|
758
|
+
if (resnapshot.length > 0) {
|
|
759
|
+
await this.resnapshot(batch, resnapshot);
|
|
760
|
+
resnapshot = [];
|
|
761
|
+
}
|
|
762
|
+
const didCommit = await batch.commit(msg.lsn, {
|
|
763
|
+
createEmptyCheckpoints,
|
|
764
|
+
oldestUncommittedChange: this.oldestUncommittedChange
|
|
765
|
+
});
|
|
597
766
|
await this.ack(msg.lsn, replicationStream);
|
|
767
|
+
if (didCommit) {
|
|
768
|
+
this.oldestUncommittedChange = null;
|
|
769
|
+
this.isStartingReplication = false;
|
|
770
|
+
}
|
|
598
771
|
}
|
|
599
772
|
}
|
|
600
773
|
else {
|
|
601
774
|
if (count % 100 == 0) {
|
|
602
|
-
logger.info(
|
|
775
|
+
this.logger.info(`Replicating op ${count} ${msg.lsn}`);
|
|
603
776
|
}
|
|
604
777
|
/**
|
|
605
778
|
* If we can see the contents of logical messages, then we can check if a keepalive
|
|
@@ -611,7 +784,14 @@ WHERE oid = $1::regclass`,
|
|
|
611
784
|
keepAliveDetected = true;
|
|
612
785
|
}
|
|
613
786
|
count += 1;
|
|
614
|
-
await this.writeChange(batch, msg);
|
|
787
|
+
const flushResult = await this.writeChange(batch, msg);
|
|
788
|
+
if (flushResult != null && resnapshot.length > 0) {
|
|
789
|
+
// If we have large transactions, we also need to flush the resnapshot list
|
|
790
|
+
// periodically.
|
|
791
|
+
// TODO: make sure this bit is actually triggered
|
|
792
|
+
await this.resnapshot(batch, resnapshot);
|
|
793
|
+
resnapshot = [];
|
|
794
|
+
}
|
|
615
795
|
}
|
|
616
796
|
}
|
|
617
797
|
if (!skipKeepalive) {
|
|
@@ -624,6 +804,7 @@ WHERE oid = $1::regclass`,
|
|
|
624
804
|
// may be in the middle of the next transaction.
|
|
625
805
|
// It must only be used to associate checkpoints with LSNs.
|
|
626
806
|
await batch.keepalive(chunkLastLsn);
|
|
807
|
+
this.isStartingReplication = false;
|
|
627
808
|
}
|
|
628
809
|
// We receive chunks with empty messages often (about each second).
|
|
629
810
|
// Acknowledging here progresses the slot past these and frees up resources.
|
|
@@ -649,7 +830,8 @@ WHERE oid = $1::regclass`,
|
|
|
649
830
|
if (storageIdentifier.type != lib_postgres.POSTGRES_CONNECTION_TYPE) {
|
|
650
831
|
return {
|
|
651
832
|
// Keep the same behaviour as before allowing Postgres storage.
|
|
652
|
-
createEmptyCheckpoints: true
|
|
833
|
+
createEmptyCheckpoints: true,
|
|
834
|
+
oldestUncommittedChange: null
|
|
653
835
|
};
|
|
654
836
|
}
|
|
655
837
|
const parsedStorageIdentifier = lib_postgres.utils.decodePostgresSystemIdentifier(storageIdentifier.id);
|
|
@@ -665,7 +847,8 @@ WHERE oid = $1::regclass`,
|
|
|
665
847
|
* Don't create empty checkpoints if the same Postgres database is used for the data source
|
|
666
848
|
* and sync bucket storage. Creating empty checkpoints will cause WAL feedback loops.
|
|
667
849
|
*/
|
|
668
|
-
createEmptyCheckpoints: replicationIdentifier.database_name != parsedStorageIdentifier.database_name
|
|
850
|
+
createEmptyCheckpoints: replicationIdentifier.database_name != parsedStorageIdentifier.database_name,
|
|
851
|
+
oldestUncommittedChange: null
|
|
669
852
|
};
|
|
670
853
|
}
|
|
671
854
|
/**
|
|
@@ -676,6 +859,19 @@ WHERE oid = $1::regclass`,
|
|
|
676
859
|
const version = await this.connections.getServerVersion();
|
|
677
860
|
return version ? version.compareMain('14.0.0') >= 0 : false;
|
|
678
861
|
}
|
|
862
|
+
async getReplicationLagMillis() {
|
|
863
|
+
if (this.oldestUncommittedChange == null) {
|
|
864
|
+
if (this.isStartingReplication) {
|
|
865
|
+
// We don't have anything to compute replication lag with yet.
|
|
866
|
+
return undefined;
|
|
867
|
+
}
|
|
868
|
+
else {
|
|
869
|
+
// We don't have any uncommitted changes, so replication is up-to-date.
|
|
870
|
+
return 0;
|
|
871
|
+
}
|
|
872
|
+
}
|
|
873
|
+
return Date.now() - this.oldestUncommittedChange.getTime();
|
|
874
|
+
}
|
|
679
875
|
}
|
|
680
876
|
async function touch() {
|
|
681
877
|
// FIXME: The hosted Kubernetes probe does not actually check the timestamp on this.
|