@powersync/service-module-postgres 0.0.0-dev-20250507154604 → 0.0.0-dev-20250611110033
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +47 -8
- package/dist/api/PostgresRouteAPIAdapter.d.ts +1 -1
- package/dist/api/PostgresRouteAPIAdapter.js +5 -1
- package/dist/api/PostgresRouteAPIAdapter.js.map +1 -1
- package/dist/auth/SupabaseKeyCollector.d.ts +3 -10
- package/dist/auth/SupabaseKeyCollector.js +6 -4
- package/dist/auth/SupabaseKeyCollector.js.map +1 -1
- package/dist/replication/SnapshotQuery.d.ts +75 -0
- package/dist/replication/SnapshotQuery.js +172 -0
- package/dist/replication/SnapshotQuery.js.map +1 -0
- package/dist/replication/WalStream.d.ts +37 -4
- package/dist/replication/WalStream.js +284 -88
- package/dist/replication/WalStream.js.map +1 -1
- package/dist/replication/WalStreamReplicationJob.d.ts +2 -0
- package/dist/replication/WalStreamReplicationJob.js +10 -3
- package/dist/replication/WalStreamReplicationJob.js.map +1 -1
- package/dist/replication/WalStreamReplicator.d.ts +1 -0
- package/dist/replication/WalStreamReplicator.js +22 -0
- package/dist/replication/WalStreamReplicator.js.map +1 -1
- package/package.json +12 -12
- package/src/api/PostgresRouteAPIAdapter.ts +5 -1
- package/src/auth/SupabaseKeyCollector.ts +14 -5
- package/src/replication/SnapshotQuery.ts +206 -0
- package/src/replication/WalStream.ts +338 -95
- package/src/replication/WalStreamReplicationJob.ts +11 -3
- package/src/replication/WalStreamReplicator.ts +26 -0
- package/test/src/__snapshots__/schema_changes.test.ts.snap +2 -2
- package/test/src/checkpoints.test.ts +10 -3
- package/test/src/chunked_snapshots.test.ts +156 -0
- package/test/src/large_batch.test.ts +5 -154
- package/test/src/resuming_snapshots.test.ts +150 -0
- package/test/src/schema_changes.test.ts +5 -10
- package/test/src/slow_tests.test.ts +13 -30
- package/test/src/util.ts +12 -1
- package/test/src/validation.test.ts +0 -1
- package/test/src/wal_stream.test.ts +4 -9
- package/test/src/wal_stream_utils.ts +15 -7
- package/tsconfig.tsbuildinfo +1 -1
|
@@ -4,11 +4,21 @@ import {
|
|
|
4
4
|
DatabaseConnectionError,
|
|
5
5
|
ErrorCode,
|
|
6
6
|
errors,
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
ReplicationAssertionError
|
|
7
|
+
Logger,
|
|
8
|
+
logger as defaultLogger,
|
|
9
|
+
ReplicationAssertionError,
|
|
10
|
+
ReplicationAbortedError
|
|
10
11
|
} from '@powersync/lib-services-framework';
|
|
11
|
-
import {
|
|
12
|
+
import {
|
|
13
|
+
BucketStorageBatch,
|
|
14
|
+
getUuidReplicaIdentityBson,
|
|
15
|
+
MetricsEngine,
|
|
16
|
+
RelationCache,
|
|
17
|
+
SaveUpdate,
|
|
18
|
+
SourceEntityDescriptor,
|
|
19
|
+
SourceTable,
|
|
20
|
+
storage
|
|
21
|
+
} from '@powersync/service-core';
|
|
12
22
|
import * as pgwire from '@powersync/service-jpgwire';
|
|
13
23
|
import { DatabaseInputRow, SqliteRow, SqlSyncRules, TablePattern, toSyncRulesRow } from '@powersync/service-sync-rules';
|
|
14
24
|
import * as pg_utils from '../utils/pgwire_utils.js';
|
|
@@ -17,12 +27,30 @@ import { PgManager } from './PgManager.js';
|
|
|
17
27
|
import { getPgOutputRelation, getRelId } from './PgRelation.js';
|
|
18
28
|
import { checkSourceConfiguration, getReplicationIdentityColumns } from './replication-utils.js';
|
|
19
29
|
import { ReplicationMetric } from '@powersync/service-types';
|
|
30
|
+
import {
|
|
31
|
+
ChunkedSnapshotQuery,
|
|
32
|
+
IdSnapshotQuery,
|
|
33
|
+
MissingRow,
|
|
34
|
+
PrimaryKeyValue,
|
|
35
|
+
SimpleSnapshotQuery,
|
|
36
|
+
SnapshotQuery
|
|
37
|
+
} from './SnapshotQuery.js';
|
|
20
38
|
|
|
21
39
|
export interface WalStreamOptions {
|
|
40
|
+
logger?: Logger;
|
|
22
41
|
connections: PgManager;
|
|
23
42
|
storage: storage.SyncRulesBucketStorage;
|
|
24
43
|
metrics: MetricsEngine;
|
|
25
44
|
abort_signal: AbortSignal;
|
|
45
|
+
|
|
46
|
+
/**
|
|
47
|
+
* Override snapshot chunk length (number of rows), for testing.
|
|
48
|
+
*
|
|
49
|
+
* Defaults to 10_000.
|
|
50
|
+
*
|
|
51
|
+
* Note that queries are streamed, so we don't actually keep that much data in memory.
|
|
52
|
+
*/
|
|
53
|
+
snapshotChunkLength?: number;
|
|
26
54
|
}
|
|
27
55
|
|
|
28
56
|
interface InitResult {
|
|
@@ -73,6 +101,8 @@ export class WalStream {
|
|
|
73
101
|
|
|
74
102
|
connection_id = 1;
|
|
75
103
|
|
|
104
|
+
private logger: Logger;
|
|
105
|
+
|
|
76
106
|
private readonly storage: storage.SyncRulesBucketStorage;
|
|
77
107
|
private readonly metrics: MetricsEngine;
|
|
78
108
|
private readonly slot_name: string;
|
|
@@ -81,17 +111,37 @@ export class WalStream {
|
|
|
81
111
|
|
|
82
112
|
private abort_signal: AbortSignal;
|
|
83
113
|
|
|
84
|
-
private
|
|
114
|
+
private relationCache = new RelationCache((relation: number | SourceTable) => {
|
|
115
|
+
if (typeof relation == 'number') {
|
|
116
|
+
return relation;
|
|
117
|
+
}
|
|
118
|
+
return relation.objectId!;
|
|
119
|
+
});
|
|
85
120
|
|
|
86
121
|
private startedStreaming = false;
|
|
87
122
|
|
|
123
|
+
private snapshotChunkLength: number;
|
|
124
|
+
|
|
125
|
+
/**
|
|
126
|
+
* Time of the oldest uncommitted change, according to the source db.
|
|
127
|
+
* This is used to determine the replication lag.
|
|
128
|
+
*/
|
|
129
|
+
private oldestUncommittedChange: Date | null = null;
|
|
130
|
+
/**
|
|
131
|
+
* Keep track of whether we have done a commit or keepalive yet.
|
|
132
|
+
* We can only compute replication lag if isStartingReplication == false, or oldestUncommittedChange is present.
|
|
133
|
+
*/
|
|
134
|
+
private isStartingReplication = true;
|
|
135
|
+
|
|
88
136
|
constructor(options: WalStreamOptions) {
|
|
137
|
+
this.logger = options.logger ?? defaultLogger;
|
|
89
138
|
this.storage = options.storage;
|
|
90
139
|
this.metrics = options.metrics;
|
|
91
140
|
this.sync_rules = options.storage.getParsedSyncRules({ defaultSchema: POSTGRES_DEFAULT_SCHEMA });
|
|
92
141
|
this.group_id = options.storage.group_id;
|
|
93
142
|
this.slot_name = options.storage.slot_name;
|
|
94
143
|
this.connections = options.connections;
|
|
144
|
+
this.snapshotChunkLength = options.snapshotChunkLength ?? 10_000;
|
|
95
145
|
|
|
96
146
|
this.abort_signal = options.abort_signal;
|
|
97
147
|
this.abort_signal.addEventListener(
|
|
@@ -104,7 +154,7 @@ export class WalStream {
|
|
|
104
154
|
const promise = sendKeepAlive(this.connections.pool);
|
|
105
155
|
promise.catch((e) => {
|
|
106
156
|
// Failures here are okay - this only speeds up stopping the process.
|
|
107
|
-
logger.warn('Failed to ping connection', e);
|
|
157
|
+
this.logger.warn('Failed to ping connection', e);
|
|
108
158
|
});
|
|
109
159
|
} else {
|
|
110
160
|
// If we haven't started streaming yet, it could be due to something like
|
|
@@ -183,7 +233,7 @@ export class WalStream {
|
|
|
183
233
|
]
|
|
184
234
|
});
|
|
185
235
|
if (rs.rows.length == 0) {
|
|
186
|
-
logger.info(`Skipping ${tablePattern.schema}.${name} - not part of ${PUBLICATION_NAME} publication`);
|
|
236
|
+
this.logger.info(`Skipping ${tablePattern.schema}.${name} - not part of ${PUBLICATION_NAME} publication`);
|
|
187
237
|
continue;
|
|
188
238
|
}
|
|
189
239
|
|
|
@@ -215,7 +265,7 @@ export class WalStream {
|
|
|
215
265
|
const snapshotDone = status.snapshot_done && status.checkpoint_lsn != null;
|
|
216
266
|
if (snapshotDone) {
|
|
217
267
|
// Snapshot is done, but we still need to check the replication slot status
|
|
218
|
-
logger.info(
|
|
268
|
+
this.logger.info(`Initial replication already done`);
|
|
219
269
|
}
|
|
220
270
|
|
|
221
271
|
// Check if replication slot exists
|
|
@@ -276,7 +326,7 @@ export class WalStream {
|
|
|
276
326
|
// We peek a large number of changes here, to make it more likely to pick up replication slot errors.
|
|
277
327
|
// For example, "publication does not exist" only occurs here if the peek actually includes changes related
|
|
278
328
|
// to the slot.
|
|
279
|
-
logger.info(`Checking ${slotName}`);
|
|
329
|
+
this.logger.info(`Checking ${slotName}`);
|
|
280
330
|
|
|
281
331
|
// The actual results can be quite large, so we don't actually return everything
|
|
282
332
|
// due to memory and processing overhead that would create.
|
|
@@ -293,11 +343,11 @@ export class WalStream {
|
|
|
293
343
|
}
|
|
294
344
|
|
|
295
345
|
// Success
|
|
296
|
-
logger.info(`Slot ${slotName} appears healthy`);
|
|
346
|
+
this.logger.info(`Slot ${slotName} appears healthy`);
|
|
297
347
|
return { needsNewSlot: false };
|
|
298
348
|
} catch (e) {
|
|
299
349
|
last_error = e;
|
|
300
|
-
logger.warn(
|
|
350
|
+
this.logger.warn(`Replication slot error`, e);
|
|
301
351
|
|
|
302
352
|
if (this.stopped) {
|
|
303
353
|
throw e;
|
|
@@ -324,7 +374,7 @@ export class WalStream {
|
|
|
324
374
|
// Sample: publication "powersync" does not exist
|
|
325
375
|
// Happens when publication deleted or never created.
|
|
326
376
|
// Slot must be re-created in this case.
|
|
327
|
-
logger.info(`${slotName} is not valid anymore`);
|
|
377
|
+
this.logger.info(`${slotName} is not valid anymore`);
|
|
328
378
|
|
|
329
379
|
return { needsNewSlot: true };
|
|
330
380
|
}
|
|
@@ -336,7 +386,7 @@ export class WalStream {
|
|
|
336
386
|
throw new ReplicationAssertionError('Unreachable');
|
|
337
387
|
}
|
|
338
388
|
|
|
339
|
-
async
|
|
389
|
+
async estimatedCountNumber(db: pgwire.PgConnection, table: storage.SourceTable): Promise<number> {
|
|
340
390
|
const results = await db.query({
|
|
341
391
|
statement: `SELECT reltuples::bigint AS estimate
|
|
342
392
|
FROM pg_class
|
|
@@ -345,9 +395,9 @@ WHERE oid = $1::regclass`,
|
|
|
345
395
|
});
|
|
346
396
|
const row = results.rows[0];
|
|
347
397
|
if ((row?.[0] ?? -1n) == -1n) {
|
|
348
|
-
return
|
|
398
|
+
return -1;
|
|
349
399
|
} else {
|
|
350
|
-
return
|
|
400
|
+
return Number(row[0]);
|
|
351
401
|
}
|
|
352
402
|
}
|
|
353
403
|
|
|
@@ -381,7 +431,7 @@ WHERE oid = $1::regclass`,
|
|
|
381
431
|
// The replication slot must be created before we start snapshotting tables.
|
|
382
432
|
await replicationConnection.query(`CREATE_REPLICATION_SLOT ${slotName} LOGICAL pgoutput`);
|
|
383
433
|
|
|
384
|
-
logger.info(`Created replication slot ${slotName}`);
|
|
434
|
+
this.logger.info(`Created replication slot ${slotName}`);
|
|
385
435
|
}
|
|
386
436
|
|
|
387
437
|
await this.initialReplication(db);
|
|
@@ -390,24 +440,37 @@ WHERE oid = $1::regclass`,
|
|
|
390
440
|
async initialReplication(db: pgwire.PgConnection) {
|
|
391
441
|
const sourceTables = this.sync_rules.getSourceTables();
|
|
392
442
|
await this.storage.startBatch(
|
|
393
|
-
{
|
|
443
|
+
{
|
|
444
|
+
logger: this.logger,
|
|
445
|
+
zeroLSN: ZERO_LSN,
|
|
446
|
+
defaultSchema: POSTGRES_DEFAULT_SCHEMA,
|
|
447
|
+
storeCurrentData: true,
|
|
448
|
+
skipExistingRows: true
|
|
449
|
+
},
|
|
394
450
|
async (batch) => {
|
|
451
|
+
let tablesWithStatus: SourceTable[] = [];
|
|
395
452
|
for (let tablePattern of sourceTables) {
|
|
396
453
|
const tables = await this.getQualifiedTableNames(batch, db, tablePattern);
|
|
454
|
+
// Pre-get counts
|
|
397
455
|
for (let table of tables) {
|
|
398
456
|
if (table.snapshotComplete) {
|
|
399
|
-
logger.info(
|
|
457
|
+
this.logger.info(`Skipping ${table.qualifiedName} - snapshot already done`);
|
|
400
458
|
continue;
|
|
401
459
|
}
|
|
402
|
-
await this.
|
|
460
|
+
const count = await this.estimatedCountNumber(db, table);
|
|
461
|
+
table = await batch.updateTableProgress(table, { totalEstimatedCount: count });
|
|
462
|
+
this.relationCache.update(table);
|
|
463
|
+
tablesWithStatus.push(table);
|
|
403
464
|
|
|
404
|
-
|
|
405
|
-
const tableLsnNotBefore = rs.rows[0][0];
|
|
406
|
-
await batch.markSnapshotDone([table], tableLsnNotBefore);
|
|
407
|
-
await touch();
|
|
465
|
+
this.logger.info(`To replicate: ${table.qualifiedName} ${table.formatSnapshotProgress()}`);
|
|
408
466
|
}
|
|
409
467
|
}
|
|
410
468
|
|
|
469
|
+
for (let table of tablesWithStatus) {
|
|
470
|
+
await this.snapshotTableInTx(batch, db, table);
|
|
471
|
+
await touch();
|
|
472
|
+
}
|
|
473
|
+
|
|
411
474
|
// Always commit the initial snapshot at zero.
|
|
412
475
|
// This makes sure we don't skip any changes applied before starting this snapshot,
|
|
413
476
|
// in the case of snapshot retries.
|
|
@@ -431,60 +494,147 @@ WHERE oid = $1::regclass`,
|
|
|
431
494
|
yield toSyncRulesRow(row);
|
|
432
495
|
}
|
|
433
496
|
}
|
|
497
|
+
private async snapshotTableInTx(
|
|
498
|
+
batch: storage.BucketStorageBatch,
|
|
499
|
+
db: pgwire.PgConnection,
|
|
500
|
+
table: storage.SourceTable,
|
|
501
|
+
limited?: PrimaryKeyValue[]
|
|
502
|
+
): Promise<storage.SourceTable> {
|
|
503
|
+
await db.query('BEGIN');
|
|
504
|
+
try {
|
|
505
|
+
let tableLsnNotBefore: string;
|
|
506
|
+
await this.snapshotTable(batch, db, table, limited);
|
|
507
|
+
|
|
508
|
+
// Get the current LSN.
|
|
509
|
+
// The data will only be consistent once incremental replication
|
|
510
|
+
// has passed that point.
|
|
511
|
+
// We have to get this LSN _after_ we have started the snapshot query.
|
|
512
|
+
const rs = await db.query(`select pg_current_wal_lsn() as lsn`);
|
|
513
|
+
tableLsnNotBefore = rs.rows[0][0];
|
|
514
|
+
await db.query('COMMIT');
|
|
515
|
+
const [resultTable] = await batch.markSnapshotDone([table], tableLsnNotBefore);
|
|
516
|
+
this.relationCache.update(resultTable);
|
|
517
|
+
return resultTable;
|
|
518
|
+
} catch (e) {
|
|
519
|
+
await db.query('ROLLBACK');
|
|
520
|
+
throw e;
|
|
521
|
+
}
|
|
522
|
+
}
|
|
523
|
+
|
|
524
|
+
private async snapshotTable(
|
|
525
|
+
batch: storage.BucketStorageBatch,
|
|
526
|
+
db: pgwire.PgConnection,
|
|
527
|
+
table: storage.SourceTable,
|
|
528
|
+
limited?: PrimaryKeyValue[]
|
|
529
|
+
) {
|
|
530
|
+
let totalEstimatedCount = table.snapshotStatus?.totalEstimatedCount;
|
|
531
|
+
let at = table.snapshotStatus?.replicatedCount ?? 0;
|
|
532
|
+
let lastCountTime = 0;
|
|
533
|
+
let q: SnapshotQuery;
|
|
534
|
+
// We do streaming on two levels:
|
|
535
|
+
// 1. Coarse level: DELCARE CURSOR, FETCH 10000 at a time.
|
|
536
|
+
// 2. Fine level: Stream chunks from each fetch call.
|
|
537
|
+
if (limited) {
|
|
538
|
+
q = new IdSnapshotQuery(db, table, limited);
|
|
539
|
+
} else if (ChunkedSnapshotQuery.supports(table)) {
|
|
540
|
+
// Single primary key - we can use the primary key for chunking
|
|
541
|
+
const orderByKey = table.replicaIdColumns[0];
|
|
542
|
+
q = new ChunkedSnapshotQuery(db, table, this.snapshotChunkLength, table.snapshotStatus?.lastKey ?? null);
|
|
543
|
+
if (table.snapshotStatus?.lastKey != null) {
|
|
544
|
+
this.logger.info(
|
|
545
|
+
`Replicating ${table.qualifiedName} ${table.formatSnapshotProgress()} - resuming from ${orderByKey.name} > ${(q as ChunkedSnapshotQuery).lastKey}`
|
|
546
|
+
);
|
|
547
|
+
} else {
|
|
548
|
+
this.logger.info(`Replicating ${table.qualifiedName} ${table.formatSnapshotProgress()} - resumable`);
|
|
549
|
+
}
|
|
550
|
+
} else {
|
|
551
|
+
// Fallback case - query the entire table
|
|
552
|
+
this.logger.info(`Replicating ${table.qualifiedName} ${table.formatSnapshotProgress()} - not resumable`);
|
|
553
|
+
q = new SimpleSnapshotQuery(db, table, this.snapshotChunkLength);
|
|
554
|
+
at = 0;
|
|
555
|
+
}
|
|
556
|
+
await q.initialize();
|
|
434
557
|
|
|
435
|
-
private async snapshotTable(batch: storage.BucketStorageBatch, db: pgwire.PgConnection, table: storage.SourceTable) {
|
|
436
|
-
logger.info(`${this.slot_name} Replicating ${table.qualifiedName}`);
|
|
437
|
-
const estimatedCount = await this.estimatedCount(db, table);
|
|
438
|
-
let at = 0;
|
|
439
|
-
let lastLogIndex = 0;
|
|
440
|
-
const cursor = db.stream({ statement: `SELECT * FROM ${table.escapedIdentifier}` });
|
|
441
558
|
let columns: { i: number; name: string }[] = [];
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
559
|
+
let hasRemainingData = true;
|
|
560
|
+
while (hasRemainingData) {
|
|
561
|
+
// Fetch 10k at a time.
|
|
562
|
+
// The balance here is between latency overhead per FETCH call,
|
|
563
|
+
// and not spending too much time on each FETCH call.
|
|
564
|
+
// We aim for a couple of seconds on each FETCH call.
|
|
565
|
+
const cursor = q.nextChunk();
|
|
566
|
+
hasRemainingData = false;
|
|
567
|
+
// pgwire streams rows in chunks.
|
|
568
|
+
// These chunks can be quite small (as little as 16KB), so we don't flush chunks automatically.
|
|
569
|
+
// There are typically 100-200 rows per chunk.
|
|
570
|
+
for await (let chunk of cursor) {
|
|
571
|
+
if (chunk.tag == 'RowDescription') {
|
|
572
|
+
// We get a RowDescription for each FETCH call, but they should
|
|
573
|
+
// all be the same.
|
|
574
|
+
let i = 0;
|
|
575
|
+
columns = chunk.payload.map((c) => {
|
|
576
|
+
return { i: i++, name: c.name };
|
|
577
|
+
});
|
|
578
|
+
continue;
|
|
579
|
+
}
|
|
580
|
+
|
|
581
|
+
const rows = chunk.rows.map((row) => {
|
|
582
|
+
let q: DatabaseInputRow = {};
|
|
583
|
+
for (let c of columns) {
|
|
584
|
+
q[c.name] = row[c.i];
|
|
585
|
+
}
|
|
586
|
+
return q;
|
|
450
587
|
});
|
|
451
|
-
|
|
452
|
-
|
|
588
|
+
if (rows.length > 0) {
|
|
589
|
+
hasRemainingData = true;
|
|
590
|
+
}
|
|
453
591
|
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
592
|
+
for (const record of WalStream.getQueryData(rows)) {
|
|
593
|
+
// This auto-flushes when the batch reaches its size limit
|
|
594
|
+
await batch.save({
|
|
595
|
+
tag: storage.SaveOperationTag.INSERT,
|
|
596
|
+
sourceTable: table,
|
|
597
|
+
before: undefined,
|
|
598
|
+
beforeReplicaId: undefined,
|
|
599
|
+
after: record,
|
|
600
|
+
afterReplicaId: getUuidReplicaIdentityBson(record, table.replicaIdColumns)
|
|
601
|
+
});
|
|
458
602
|
}
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
}
|
|
465
|
-
if (this.abort_signal.aborted) {
|
|
466
|
-
throw new ReplicationAbortedError(`Aborted initial replication of ${this.slot_name}`);
|
|
603
|
+
|
|
604
|
+
at += rows.length;
|
|
605
|
+
this.metrics.getCounter(ReplicationMetric.ROWS_REPLICATED).add(rows.length);
|
|
606
|
+
|
|
607
|
+
await touch();
|
|
467
608
|
}
|
|
468
609
|
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
610
|
+
// Important: flush before marking progress
|
|
611
|
+
await batch.flush();
|
|
612
|
+
if (limited == null) {
|
|
613
|
+
let lastKey: Uint8Array | undefined;
|
|
614
|
+
if (q instanceof ChunkedSnapshotQuery) {
|
|
615
|
+
lastKey = q.getLastKeySerialized();
|
|
616
|
+
}
|
|
617
|
+
if (lastCountTime < performance.now() - 10 * 60 * 1000) {
|
|
618
|
+
totalEstimatedCount = await this.estimatedCountNumber(db, table);
|
|
619
|
+
lastCountTime = performance.now();
|
|
620
|
+
}
|
|
621
|
+
table = await batch.updateTableProgress(table, {
|
|
622
|
+
lastKey: lastKey,
|
|
623
|
+
replicatedCount: at,
|
|
624
|
+
totalEstimatedCount: totalEstimatedCount
|
|
478
625
|
});
|
|
479
|
-
|
|
626
|
+
this.relationCache.update(table);
|
|
480
627
|
|
|
481
|
-
|
|
482
|
-
|
|
628
|
+
this.logger.info(`Replicating ${table.qualifiedName} ${table.formatSnapshotProgress()}`);
|
|
629
|
+
} else {
|
|
630
|
+
this.logger.info(`Replicating ${table.qualifiedName} ${at}/${limited.length} for resnapshot`);
|
|
631
|
+
}
|
|
483
632
|
|
|
484
|
-
|
|
633
|
+
if (this.abort_signal.aborted) {
|
|
634
|
+
// We only abort after flushing
|
|
635
|
+
throw new ReplicationAbortedError(`Initial replication interrupted`);
|
|
636
|
+
}
|
|
485
637
|
}
|
|
486
|
-
|
|
487
|
-
await batch.flush();
|
|
488
638
|
}
|
|
489
639
|
|
|
490
640
|
async handleRelation(batch: storage.BucketStorageBatch, descriptor: SourceEntityDescriptor, snapshot: boolean) {
|
|
@@ -498,7 +648,7 @@ WHERE oid = $1::regclass`,
|
|
|
498
648
|
entity_descriptor: descriptor,
|
|
499
649
|
sync_rules: this.sync_rules
|
|
500
650
|
});
|
|
501
|
-
this.
|
|
651
|
+
this.relationCache.update(result.table);
|
|
502
652
|
|
|
503
653
|
// Drop conflicting tables. This includes for example renamed tables.
|
|
504
654
|
await batch.drop(result.dropTables);
|
|
@@ -513,40 +663,59 @@ WHERE oid = $1::regclass`,
|
|
|
513
663
|
// Truncate this table, in case a previous snapshot was interrupted.
|
|
514
664
|
await batch.truncate([result.table]);
|
|
515
665
|
|
|
516
|
-
let lsn: string = ZERO_LSN;
|
|
517
666
|
// Start the snapshot inside a transaction.
|
|
518
667
|
// We use a dedicated connection for this.
|
|
519
668
|
const db = await this.connections.snapshotConnection();
|
|
520
669
|
try {
|
|
521
|
-
await db.
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
// has passed that point.
|
|
528
|
-
// We have to get this LSN _after_ we have started the snapshot query.
|
|
529
|
-
const rs = await db.query(`select pg_current_wal_lsn() as lsn`);
|
|
530
|
-
lsn = rs.rows[0][0];
|
|
531
|
-
|
|
532
|
-
await db.query('COMMIT');
|
|
533
|
-
} catch (e) {
|
|
534
|
-
await db.query('ROLLBACK');
|
|
535
|
-
// TODO: Wrap with custom error type
|
|
536
|
-
throw e;
|
|
537
|
-
}
|
|
670
|
+
const table = await this.snapshotTableInTx(batch, db, result.table);
|
|
671
|
+
// After the table snapshot, we wait for replication to catch up.
|
|
672
|
+
// To make sure there is actually something to replicate, we send a keepalive
|
|
673
|
+
// message.
|
|
674
|
+
await sendKeepAlive(db);
|
|
675
|
+
return table;
|
|
538
676
|
} finally {
|
|
539
677
|
await db.end();
|
|
540
678
|
}
|
|
541
|
-
const [table] = await batch.markSnapshotDone([result.table], lsn);
|
|
542
|
-
return table;
|
|
543
679
|
}
|
|
544
680
|
|
|
545
681
|
return result.table;
|
|
546
682
|
}
|
|
547
683
|
|
|
684
|
+
/**
|
|
685
|
+
* Process rows that have missing TOAST values.
|
|
686
|
+
*
|
|
687
|
+
* This can happen during edge cases in the chunked intial snapshot process.
|
|
688
|
+
*
|
|
689
|
+
* We handle this similar to an inline table snapshot, but limited to the specific
|
|
690
|
+
* set of rows.
|
|
691
|
+
*/
|
|
692
|
+
private async resnapshot(batch: BucketStorageBatch, rows: MissingRow[]) {
|
|
693
|
+
const byTable = new Map<number, MissingRow[]>();
|
|
694
|
+
for (let row of rows) {
|
|
695
|
+
const relId = row.table.objectId as number; // always a number for postgres
|
|
696
|
+
if (!byTable.has(relId)) {
|
|
697
|
+
byTable.set(relId, []);
|
|
698
|
+
}
|
|
699
|
+
byTable.get(relId)!.push(row);
|
|
700
|
+
}
|
|
701
|
+
const db = await this.connections.snapshotConnection();
|
|
702
|
+
try {
|
|
703
|
+
for (let rows of byTable.values()) {
|
|
704
|
+
const table = rows[0].table;
|
|
705
|
+
await this.snapshotTableInTx(
|
|
706
|
+
batch,
|
|
707
|
+
db,
|
|
708
|
+
table,
|
|
709
|
+
rows.map((r) => r.key)
|
|
710
|
+
);
|
|
711
|
+
}
|
|
712
|
+
} finally {
|
|
713
|
+
await db.end();
|
|
714
|
+
}
|
|
715
|
+
}
|
|
716
|
+
|
|
548
717
|
private getTable(relationId: number): storage.SourceTable {
|
|
549
|
-
const table = this.
|
|
718
|
+
const table = this.relationCache.get(relationId);
|
|
550
719
|
if (table == null) {
|
|
551
720
|
// We should always receive a replication message before the relation is used.
|
|
552
721
|
// If we can't find it, it's a bug.
|
|
@@ -565,7 +734,7 @@ WHERE oid = $1::regclass`,
|
|
|
565
734
|
if (msg.tag == 'insert' || msg.tag == 'update' || msg.tag == 'delete') {
|
|
566
735
|
const table = this.getTable(getRelId(msg.relation));
|
|
567
736
|
if (!table.syncAny) {
|
|
568
|
-
logger.debug(`Table ${table.qualifiedName} not used in sync rules - skipping`);
|
|
737
|
+
this.logger.debug(`Table ${table.qualifiedName} not used in sync rules - skipping`);
|
|
569
738
|
return null;
|
|
570
739
|
}
|
|
571
740
|
|
|
@@ -673,8 +842,39 @@ WHERE oid = $1::regclass`,
|
|
|
673
842
|
// Auto-activate as soon as initial replication is done
|
|
674
843
|
await this.storage.autoActivate();
|
|
675
844
|
|
|
845
|
+
let resnapshot: { table: storage.SourceTable; key: PrimaryKeyValue }[] = [];
|
|
846
|
+
|
|
847
|
+
const markRecordUnavailable = (record: SaveUpdate) => {
|
|
848
|
+
if (!IdSnapshotQuery.supports(record.sourceTable)) {
|
|
849
|
+
// If it's not supported, it's also safe to ignore
|
|
850
|
+
return;
|
|
851
|
+
}
|
|
852
|
+
let key: PrimaryKeyValue = {};
|
|
853
|
+
for (let column of record.sourceTable.replicaIdColumns) {
|
|
854
|
+
const name = column.name;
|
|
855
|
+
const value = record.after[name];
|
|
856
|
+
if (value == null) {
|
|
857
|
+
// We don't expect this to actually happen.
|
|
858
|
+
// The key should always be present in the "after" record.
|
|
859
|
+
return;
|
|
860
|
+
}
|
|
861
|
+
key[name] = value;
|
|
862
|
+
}
|
|
863
|
+
resnapshot.push({
|
|
864
|
+
table: record.sourceTable,
|
|
865
|
+
key: key
|
|
866
|
+
});
|
|
867
|
+
};
|
|
868
|
+
|
|
676
869
|
await this.storage.startBatch(
|
|
677
|
-
{
|
|
870
|
+
{
|
|
871
|
+
logger: this.logger,
|
|
872
|
+
zeroLSN: ZERO_LSN,
|
|
873
|
+
defaultSchema: POSTGRES_DEFAULT_SCHEMA,
|
|
874
|
+
storeCurrentData: true,
|
|
875
|
+
skipExistingRows: false,
|
|
876
|
+
markRecordUnavailable
|
|
877
|
+
},
|
|
678
878
|
async (batch) => {
|
|
679
879
|
// We don't handle any plain keepalive messages while we have transactions.
|
|
680
880
|
// While we have transactions, we use that to advance the position.
|
|
@@ -708,6 +908,9 @@ WHERE oid = $1::regclass`,
|
|
|
708
908
|
} else if (msg.tag == 'begin') {
|
|
709
909
|
// This may span multiple transactions in the same chunk, or even across chunks.
|
|
710
910
|
skipKeepalive = true;
|
|
911
|
+
if (this.oldestUncommittedChange == null) {
|
|
912
|
+
this.oldestUncommittedChange = new Date(Number(msg.commitTime / 1000n));
|
|
913
|
+
}
|
|
711
914
|
} else if (msg.tag == 'commit') {
|
|
712
915
|
this.metrics.getCounter(ReplicationMetric.TRANSACTIONS_REPLICATED).add(1);
|
|
713
916
|
if (msg == lastCommit) {
|
|
@@ -715,12 +918,29 @@ WHERE oid = $1::regclass`,
|
|
|
715
918
|
// This effectively lets us batch multiple transactions within the same chunk
|
|
716
919
|
// into a single flush, increasing throughput for many small transactions.
|
|
717
920
|
skipKeepalive = false;
|
|
718
|
-
|
|
921
|
+
// flush() must be before the resnapshot check - that is
|
|
922
|
+
// typically what reports the resnapshot records.
|
|
923
|
+
await batch.flush();
|
|
924
|
+
// This _must_ be checked after the flush(), and before
|
|
925
|
+
// commit() or ack(). We never persist the resnapshot list,
|
|
926
|
+
// so we have to process it before marking our progress.
|
|
927
|
+
if (resnapshot.length > 0) {
|
|
928
|
+
await this.resnapshot(batch, resnapshot);
|
|
929
|
+
resnapshot = [];
|
|
930
|
+
}
|
|
931
|
+
const didCommit = await batch.commit(msg.lsn!, {
|
|
932
|
+
createEmptyCheckpoints,
|
|
933
|
+
oldestUncommittedChange: this.oldestUncommittedChange
|
|
934
|
+
});
|
|
719
935
|
await this.ack(msg.lsn!, replicationStream);
|
|
936
|
+
if (didCommit) {
|
|
937
|
+
this.oldestUncommittedChange = null;
|
|
938
|
+
this.isStartingReplication = false;
|
|
939
|
+
}
|
|
720
940
|
}
|
|
721
941
|
} else {
|
|
722
942
|
if (count % 100 == 0) {
|
|
723
|
-
logger.info(
|
|
943
|
+
this.logger.info(`Replicating op ${count} ${msg.lsn}`);
|
|
724
944
|
}
|
|
725
945
|
|
|
726
946
|
/**
|
|
@@ -734,7 +954,14 @@ WHERE oid = $1::regclass`,
|
|
|
734
954
|
}
|
|
735
955
|
|
|
736
956
|
count += 1;
|
|
737
|
-
await this.writeChange(batch, msg);
|
|
957
|
+
const flushResult = await this.writeChange(batch, msg);
|
|
958
|
+
if (flushResult != null && resnapshot.length > 0) {
|
|
959
|
+
// If we have large transactions, we also need to flush the resnapshot list
|
|
960
|
+
// periodically.
|
|
961
|
+
// TODO: make sure this bit is actually triggered
|
|
962
|
+
await this.resnapshot(batch, resnapshot);
|
|
963
|
+
resnapshot = [];
|
|
964
|
+
}
|
|
738
965
|
}
|
|
739
966
|
}
|
|
740
967
|
|
|
@@ -749,6 +976,7 @@ WHERE oid = $1::regclass`,
|
|
|
749
976
|
// may be in the middle of the next transaction.
|
|
750
977
|
// It must only be used to associate checkpoints with LSNs.
|
|
751
978
|
await batch.keepalive(chunkLastLsn);
|
|
979
|
+
this.isStartingReplication = false;
|
|
752
980
|
}
|
|
753
981
|
|
|
754
982
|
// We receive chunks with empty messages often (about each second).
|
|
@@ -781,7 +1009,8 @@ WHERE oid = $1::regclass`,
|
|
|
781
1009
|
if (storageIdentifier.type != lib_postgres.POSTGRES_CONNECTION_TYPE) {
|
|
782
1010
|
return {
|
|
783
1011
|
// Keep the same behaviour as before allowing Postgres storage.
|
|
784
|
-
createEmptyCheckpoints: true
|
|
1012
|
+
createEmptyCheckpoints: true,
|
|
1013
|
+
oldestUncommittedChange: null
|
|
785
1014
|
};
|
|
786
1015
|
}
|
|
787
1016
|
|
|
@@ -804,7 +1033,8 @@ WHERE oid = $1::regclass`,
|
|
|
804
1033
|
* Don't create empty checkpoints if the same Postgres database is used for the data source
|
|
805
1034
|
* and sync bucket storage. Creating empty checkpoints will cause WAL feedback loops.
|
|
806
1035
|
*/
|
|
807
|
-
createEmptyCheckpoints: replicationIdentifier.database_name != parsedStorageIdentifier.database_name
|
|
1036
|
+
createEmptyCheckpoints: replicationIdentifier.database_name != parsedStorageIdentifier.database_name,
|
|
1037
|
+
oldestUncommittedChange: null
|
|
808
1038
|
};
|
|
809
1039
|
}
|
|
810
1040
|
|
|
@@ -816,6 +1046,19 @@ WHERE oid = $1::regclass`,
|
|
|
816
1046
|
const version = await this.connections.getServerVersion();
|
|
817
1047
|
return version ? version.compareMain('14.0.0') >= 0 : false;
|
|
818
1048
|
}
|
|
1049
|
+
|
|
1050
|
+
async getReplicationLagMillis(): Promise<number | undefined> {
|
|
1051
|
+
if (this.oldestUncommittedChange == null) {
|
|
1052
|
+
if (this.isStartingReplication) {
|
|
1053
|
+
// We don't have anything to compute replication lag with yet.
|
|
1054
|
+
return undefined;
|
|
1055
|
+
} else {
|
|
1056
|
+
// We don't have any uncommitted changes, so replication is up-to-date.
|
|
1057
|
+
return 0;
|
|
1058
|
+
}
|
|
1059
|
+
}
|
|
1060
|
+
return Date.now() - this.oldestUncommittedChange.getTime();
|
|
1061
|
+
}
|
|
819
1062
|
}
|
|
820
1063
|
|
|
821
1064
|
async function touch() {
|