@powersync/service-module-postgres 0.13.1 → 0.14.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +25 -0
- package/dist/api/PostgresRouteAPIAdapter.d.ts +1 -1
- package/dist/api/PostgresRouteAPIAdapter.js +1 -1
- package/dist/api/PostgresRouteAPIAdapter.js.map +1 -1
- package/dist/replication/SnapshotQuery.d.ts +78 -0
- package/dist/replication/SnapshotQuery.js +175 -0
- package/dist/replication/SnapshotQuery.js.map +1 -0
- package/dist/replication/WalStream.d.ts +37 -4
- package/dist/replication/WalStream.js +318 -91
- package/dist/replication/WalStream.js.map +1 -1
- package/dist/replication/WalStreamReplicationJob.d.ts +2 -0
- package/dist/replication/WalStreamReplicationJob.js +14 -3
- package/dist/replication/WalStreamReplicationJob.js.map +1 -1
- package/dist/replication/WalStreamReplicator.d.ts +1 -0
- package/dist/replication/WalStreamReplicator.js +22 -0
- package/dist/replication/WalStreamReplicator.js.map +1 -1
- package/dist/replication/replication-utils.d.ts +4 -0
- package/dist/replication/replication-utils.js +46 -2
- package/dist/replication/replication-utils.js.map +1 -1
- package/package.json +10 -9
- package/src/api/PostgresRouteAPIAdapter.ts +1 -1
- package/src/replication/SnapshotQuery.ts +209 -0
- package/src/replication/WalStream.ts +373 -98
- package/src/replication/WalStreamReplicationJob.ts +15 -3
- package/src/replication/WalStreamReplicator.ts +26 -0
- package/src/replication/replication-utils.ts +60 -2
- package/test/src/__snapshots__/schema_changes.test.ts.snap +2 -2
- package/test/src/checkpoints.test.ts +7 -5
- package/test/src/chunked_snapshots.test.ts +156 -0
- package/test/src/large_batch.test.ts +5 -154
- package/test/src/resuming_snapshots.test.ts +150 -0
- package/test/src/schema_changes.test.ts +5 -10
- package/test/src/slow_tests.test.ts +13 -30
- package/test/src/util.ts +12 -1
- package/test/src/validation.test.ts +0 -1
- package/test/src/wal_stream.test.ts +4 -9
- package/test/src/wal_stream_utils.ts +15 -7
- package/tsconfig.tsbuildinfo +1 -1
|
@@ -4,25 +4,53 @@ import {
|
|
|
4
4
|
DatabaseConnectionError,
|
|
5
5
|
ErrorCode,
|
|
6
6
|
errors,
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
ReplicationAssertionError
|
|
7
|
+
Logger,
|
|
8
|
+
logger as defaultLogger,
|
|
9
|
+
ReplicationAssertionError,
|
|
10
|
+
ReplicationAbortedError
|
|
10
11
|
} from '@powersync/lib-services-framework';
|
|
11
|
-
import {
|
|
12
|
+
import {
|
|
13
|
+
BucketStorageBatch,
|
|
14
|
+
getUuidReplicaIdentityBson,
|
|
15
|
+
MetricsEngine,
|
|
16
|
+
RelationCache,
|
|
17
|
+
SaveUpdate,
|
|
18
|
+
SourceEntityDescriptor,
|
|
19
|
+
SourceTable,
|
|
20
|
+
storage
|
|
21
|
+
} from '@powersync/service-core';
|
|
12
22
|
import * as pgwire from '@powersync/service-jpgwire';
|
|
13
23
|
import { DatabaseInputRow, SqliteRow, SqlSyncRules, TablePattern, toSyncRulesRow } from '@powersync/service-sync-rules';
|
|
14
24
|
import * as pg_utils from '../utils/pgwire_utils.js';
|
|
15
25
|
|
|
16
26
|
import { PgManager } from './PgManager.js';
|
|
17
27
|
import { getPgOutputRelation, getRelId } from './PgRelation.js';
|
|
18
|
-
import { checkSourceConfiguration, getReplicationIdentityColumns } from './replication-utils.js';
|
|
28
|
+
import { checkSourceConfiguration, checkTableRls, getReplicationIdentityColumns } from './replication-utils.js';
|
|
19
29
|
import { ReplicationMetric } from '@powersync/service-types';
|
|
30
|
+
import {
|
|
31
|
+
ChunkedSnapshotQuery,
|
|
32
|
+
IdSnapshotQuery,
|
|
33
|
+
MissingRow,
|
|
34
|
+
PrimaryKeyValue,
|
|
35
|
+
SimpleSnapshotQuery,
|
|
36
|
+
SnapshotQuery
|
|
37
|
+
} from './SnapshotQuery.js';
|
|
20
38
|
|
|
21
39
|
export interface WalStreamOptions {
|
|
40
|
+
logger?: Logger;
|
|
22
41
|
connections: PgManager;
|
|
23
42
|
storage: storage.SyncRulesBucketStorage;
|
|
24
43
|
metrics: MetricsEngine;
|
|
25
44
|
abort_signal: AbortSignal;
|
|
45
|
+
|
|
46
|
+
/**
|
|
47
|
+
* Override snapshot chunk length (number of rows), for testing.
|
|
48
|
+
*
|
|
49
|
+
* Defaults to 10_000.
|
|
50
|
+
*
|
|
51
|
+
* Note that queries are streamed, so we don't actually keep that much data in memory.
|
|
52
|
+
*/
|
|
53
|
+
snapshotChunkLength?: number;
|
|
26
54
|
}
|
|
27
55
|
|
|
28
56
|
interface InitResult {
|
|
@@ -73,6 +101,8 @@ export class WalStream {
|
|
|
73
101
|
|
|
74
102
|
connection_id = 1;
|
|
75
103
|
|
|
104
|
+
private logger: Logger;
|
|
105
|
+
|
|
76
106
|
private readonly storage: storage.SyncRulesBucketStorage;
|
|
77
107
|
private readonly metrics: MetricsEngine;
|
|
78
108
|
private readonly slot_name: string;
|
|
@@ -81,17 +111,37 @@ export class WalStream {
|
|
|
81
111
|
|
|
82
112
|
private abort_signal: AbortSignal;
|
|
83
113
|
|
|
84
|
-
private
|
|
114
|
+
private relationCache = new RelationCache((relation: number | SourceTable) => {
|
|
115
|
+
if (typeof relation == 'number') {
|
|
116
|
+
return relation;
|
|
117
|
+
}
|
|
118
|
+
return relation.objectId!;
|
|
119
|
+
});
|
|
85
120
|
|
|
86
121
|
private startedStreaming = false;
|
|
87
122
|
|
|
123
|
+
private snapshotChunkLength: number;
|
|
124
|
+
|
|
125
|
+
/**
|
|
126
|
+
* Time of the oldest uncommitted change, according to the source db.
|
|
127
|
+
* This is used to determine the replication lag.
|
|
128
|
+
*/
|
|
129
|
+
private oldestUncommittedChange: Date | null = null;
|
|
130
|
+
/**
|
|
131
|
+
* Keep track of whether we have done a commit or keepalive yet.
|
|
132
|
+
* We can only compute replication lag if isStartingReplication == false, or oldestUncommittedChange is present.
|
|
133
|
+
*/
|
|
134
|
+
private isStartingReplication = true;
|
|
135
|
+
|
|
88
136
|
constructor(options: WalStreamOptions) {
|
|
137
|
+
this.logger = options.logger ?? defaultLogger;
|
|
89
138
|
this.storage = options.storage;
|
|
90
139
|
this.metrics = options.metrics;
|
|
91
140
|
this.sync_rules = options.storage.getParsedSyncRules({ defaultSchema: POSTGRES_DEFAULT_SCHEMA });
|
|
92
141
|
this.group_id = options.storage.group_id;
|
|
93
142
|
this.slot_name = options.storage.slot_name;
|
|
94
143
|
this.connections = options.connections;
|
|
144
|
+
this.snapshotChunkLength = options.snapshotChunkLength ?? 10_000;
|
|
95
145
|
|
|
96
146
|
this.abort_signal = options.abort_signal;
|
|
97
147
|
this.abort_signal.addEventListener(
|
|
@@ -104,7 +154,7 @@ export class WalStream {
|
|
|
104
154
|
const promise = sendKeepAlive(this.connections.pool);
|
|
105
155
|
promise.catch((e) => {
|
|
106
156
|
// Failures here are okay - this only speeds up stopping the process.
|
|
107
|
-
logger.warn('Failed to ping connection', e);
|
|
157
|
+
this.logger.warn('Failed to ping connection', e);
|
|
108
158
|
});
|
|
109
159
|
} else {
|
|
110
160
|
// If we haven't started streaming yet, it could be due to something like
|
|
@@ -183,10 +233,21 @@ export class WalStream {
|
|
|
183
233
|
]
|
|
184
234
|
});
|
|
185
235
|
if (rs.rows.length == 0) {
|
|
186
|
-
logger.info(`Skipping ${tablePattern.schema}.${name} - not part of ${PUBLICATION_NAME} publication`);
|
|
236
|
+
this.logger.info(`Skipping ${tablePattern.schema}.${name} - not part of ${PUBLICATION_NAME} publication`);
|
|
187
237
|
continue;
|
|
188
238
|
}
|
|
189
239
|
|
|
240
|
+
try {
|
|
241
|
+
const result = await checkTableRls(db, relid);
|
|
242
|
+
if (!result.canRead) {
|
|
243
|
+
// We log the message, then continue anyway, since the check does not cover all cases.
|
|
244
|
+
this.logger.warn(result.message!);
|
|
245
|
+
}
|
|
246
|
+
} catch (e) {
|
|
247
|
+
// It's possible that we just don't have permission to access pg_roles - log the error and continue.
|
|
248
|
+
this.logger.warn(`Could not check RLS access for ${tablePattern.schema}.${name}`, e);
|
|
249
|
+
}
|
|
250
|
+
|
|
190
251
|
const cresult = await getReplicationIdentityColumns(db, relid);
|
|
191
252
|
|
|
192
253
|
const table = await this.handleRelation(
|
|
@@ -215,7 +276,7 @@ export class WalStream {
|
|
|
215
276
|
const snapshotDone = status.snapshot_done && status.checkpoint_lsn != null;
|
|
216
277
|
if (snapshotDone) {
|
|
217
278
|
// Snapshot is done, but we still need to check the replication slot status
|
|
218
|
-
logger.info(
|
|
279
|
+
this.logger.info(`Initial replication already done`);
|
|
219
280
|
}
|
|
220
281
|
|
|
221
282
|
// Check if replication slot exists
|
|
@@ -276,7 +337,7 @@ export class WalStream {
|
|
|
276
337
|
// We peek a large number of changes here, to make it more likely to pick up replication slot errors.
|
|
277
338
|
// For example, "publication does not exist" only occurs here if the peek actually includes changes related
|
|
278
339
|
// to the slot.
|
|
279
|
-
logger.info(`Checking ${slotName}`);
|
|
340
|
+
this.logger.info(`Checking ${slotName}`);
|
|
280
341
|
|
|
281
342
|
// The actual results can be quite large, so we don't actually return everything
|
|
282
343
|
// due to memory and processing overhead that would create.
|
|
@@ -293,11 +354,11 @@ export class WalStream {
|
|
|
293
354
|
}
|
|
294
355
|
|
|
295
356
|
// Success
|
|
296
|
-
logger.info(`Slot ${slotName} appears healthy`);
|
|
357
|
+
this.logger.info(`Slot ${slotName} appears healthy`);
|
|
297
358
|
return { needsNewSlot: false };
|
|
298
359
|
} catch (e) {
|
|
299
360
|
last_error = e;
|
|
300
|
-
logger.warn(
|
|
361
|
+
this.logger.warn(`Replication slot error`, e);
|
|
301
362
|
|
|
302
363
|
if (this.stopped) {
|
|
303
364
|
throw e;
|
|
@@ -324,7 +385,7 @@ export class WalStream {
|
|
|
324
385
|
// Sample: publication "powersync" does not exist
|
|
325
386
|
// Happens when publication deleted or never created.
|
|
326
387
|
// Slot must be re-created in this case.
|
|
327
|
-
logger.info(`${slotName} is not valid anymore`);
|
|
388
|
+
this.logger.info(`${slotName} is not valid anymore`);
|
|
328
389
|
|
|
329
390
|
return { needsNewSlot: true };
|
|
330
391
|
}
|
|
@@ -336,7 +397,7 @@ export class WalStream {
|
|
|
336
397
|
throw new ReplicationAssertionError('Unreachable');
|
|
337
398
|
}
|
|
338
399
|
|
|
339
|
-
async
|
|
400
|
+
async estimatedCountNumber(db: pgwire.PgConnection, table: storage.SourceTable): Promise<number> {
|
|
340
401
|
const results = await db.query({
|
|
341
402
|
statement: `SELECT reltuples::bigint AS estimate
|
|
342
403
|
FROM pg_class
|
|
@@ -345,9 +406,9 @@ WHERE oid = $1::regclass`,
|
|
|
345
406
|
});
|
|
346
407
|
const row = results.rows[0];
|
|
347
408
|
if ((row?.[0] ?? -1n) == -1n) {
|
|
348
|
-
return
|
|
409
|
+
return -1;
|
|
349
410
|
} else {
|
|
350
|
-
return
|
|
411
|
+
return Number(row[0]);
|
|
351
412
|
}
|
|
352
413
|
}
|
|
353
414
|
|
|
@@ -370,7 +431,7 @@ WHERE oid = $1::regclass`,
|
|
|
370
431
|
// In those cases, we have to start replication from scratch.
|
|
371
432
|
// If there is an existing healthy slot, we can skip this and continue
|
|
372
433
|
// initial replication where we left off.
|
|
373
|
-
await this.storage.clear();
|
|
434
|
+
await this.storage.clear({ signal: this.abort_signal });
|
|
374
435
|
|
|
375
436
|
await db.query({
|
|
376
437
|
statement: 'SELECT pg_drop_replication_slot(slot_name) FROM pg_replication_slots WHERE slot_name = $1',
|
|
@@ -381,7 +442,7 @@ WHERE oid = $1::regclass`,
|
|
|
381
442
|
// The replication slot must be created before we start snapshotting tables.
|
|
382
443
|
await replicationConnection.query(`CREATE_REPLICATION_SLOT ${slotName} LOGICAL pgoutput`);
|
|
383
444
|
|
|
384
|
-
logger.info(`Created replication slot ${slotName}`);
|
|
445
|
+
this.logger.info(`Created replication slot ${slotName}`);
|
|
385
446
|
}
|
|
386
447
|
|
|
387
448
|
await this.initialReplication(db);
|
|
@@ -390,24 +451,37 @@ WHERE oid = $1::regclass`,
|
|
|
390
451
|
async initialReplication(db: pgwire.PgConnection) {
|
|
391
452
|
const sourceTables = this.sync_rules.getSourceTables();
|
|
392
453
|
await this.storage.startBatch(
|
|
393
|
-
{
|
|
454
|
+
{
|
|
455
|
+
logger: this.logger,
|
|
456
|
+
zeroLSN: ZERO_LSN,
|
|
457
|
+
defaultSchema: POSTGRES_DEFAULT_SCHEMA,
|
|
458
|
+
storeCurrentData: true,
|
|
459
|
+
skipExistingRows: true
|
|
460
|
+
},
|
|
394
461
|
async (batch) => {
|
|
462
|
+
let tablesWithStatus: SourceTable[] = [];
|
|
395
463
|
for (let tablePattern of sourceTables) {
|
|
396
464
|
const tables = await this.getQualifiedTableNames(batch, db, tablePattern);
|
|
465
|
+
// Pre-get counts
|
|
397
466
|
for (let table of tables) {
|
|
398
467
|
if (table.snapshotComplete) {
|
|
399
|
-
logger.info(
|
|
468
|
+
this.logger.info(`Skipping ${table.qualifiedName} - snapshot already done`);
|
|
400
469
|
continue;
|
|
401
470
|
}
|
|
402
|
-
await this.
|
|
471
|
+
const count = await this.estimatedCountNumber(db, table);
|
|
472
|
+
table = await batch.updateTableProgress(table, { totalEstimatedCount: count });
|
|
473
|
+
this.relationCache.update(table);
|
|
474
|
+
tablesWithStatus.push(table);
|
|
403
475
|
|
|
404
|
-
|
|
405
|
-
const tableLsnNotBefore = rs.rows[0][0];
|
|
406
|
-
await batch.markSnapshotDone([table], tableLsnNotBefore);
|
|
407
|
-
await touch();
|
|
476
|
+
this.logger.info(`To replicate: ${table.qualifiedName} ${table.formatSnapshotProgress()}`);
|
|
408
477
|
}
|
|
409
478
|
}
|
|
410
479
|
|
|
480
|
+
for (let table of tablesWithStatus) {
|
|
481
|
+
await this.snapshotTableInTx(batch, db, table);
|
|
482
|
+
await touch();
|
|
483
|
+
}
|
|
484
|
+
|
|
411
485
|
// Always commit the initial snapshot at zero.
|
|
412
486
|
// This makes sure we don't skip any changes applied before starting this snapshot,
|
|
413
487
|
// in the case of snapshot retries.
|
|
@@ -431,60 +505,164 @@ WHERE oid = $1::regclass`,
|
|
|
431
505
|
yield toSyncRulesRow(row);
|
|
432
506
|
}
|
|
433
507
|
}
|
|
508
|
+
private async snapshotTableInTx(
|
|
509
|
+
batch: storage.BucketStorageBatch,
|
|
510
|
+
db: pgwire.PgConnection,
|
|
511
|
+
table: storage.SourceTable,
|
|
512
|
+
limited?: PrimaryKeyValue[]
|
|
513
|
+
): Promise<storage.SourceTable> {
|
|
514
|
+
// Note: We use the default "Read Committed" isolation level here, not snapshot isolation.
|
|
515
|
+
// The data may change during the transaction, but that is compensated for in the streaming
|
|
516
|
+
// replication afterwards.
|
|
517
|
+
await db.query('BEGIN');
|
|
518
|
+
try {
|
|
519
|
+
let tableLsnNotBefore: string;
|
|
520
|
+
await this.snapshotTable(batch, db, table, limited);
|
|
521
|
+
|
|
522
|
+
// Get the current LSN.
|
|
523
|
+
// The data will only be consistent once incremental replication has passed that point.
|
|
524
|
+
// We have to get this LSN _after_ we have finished the table snapshot.
|
|
525
|
+
//
|
|
526
|
+
// There are basically two relevant LSNs here:
|
|
527
|
+
// A: The LSN before the snapshot starts. We don't explicitly record this on the PowerSync side,
|
|
528
|
+
// but it is implicitly recorded in the replication slot.
|
|
529
|
+
// B: The LSN after the table snapshot is complete, which is what we get here.
|
|
530
|
+
// When we do the snapshot queries, the data that we get back for each chunk could match the state
|
|
531
|
+
// anywhere between A and B. To actually have a consistent state on our side, we need to:
|
|
532
|
+
// 1. Complete the snapshot.
|
|
533
|
+
// 2. Wait until logical replication has caught up with all the change between A and B.
|
|
534
|
+
// Calling `markSnapshotDone(LSN B)` covers that.
|
|
535
|
+
const rs = await db.query(`select pg_current_wal_lsn() as lsn`);
|
|
536
|
+
tableLsnNotBefore = rs.rows[0][0];
|
|
537
|
+
// Side note: A ROLLBACK would probably also be fine here, since we only read in this transaction.
|
|
538
|
+
await db.query('COMMIT');
|
|
539
|
+
const [resultTable] = await batch.markSnapshotDone([table], tableLsnNotBefore);
|
|
540
|
+
this.relationCache.update(resultTable);
|
|
541
|
+
return resultTable;
|
|
542
|
+
} catch (e) {
|
|
543
|
+
await db.query('ROLLBACK');
|
|
544
|
+
throw e;
|
|
545
|
+
}
|
|
546
|
+
}
|
|
547
|
+
|
|
548
|
+
private async snapshotTable(
|
|
549
|
+
batch: storage.BucketStorageBatch,
|
|
550
|
+
db: pgwire.PgConnection,
|
|
551
|
+
table: storage.SourceTable,
|
|
552
|
+
limited?: PrimaryKeyValue[]
|
|
553
|
+
) {
|
|
554
|
+
let totalEstimatedCount = table.snapshotStatus?.totalEstimatedCount;
|
|
555
|
+
let at = table.snapshotStatus?.replicatedCount ?? 0;
|
|
556
|
+
let lastCountTime = 0;
|
|
557
|
+
let q: SnapshotQuery;
|
|
558
|
+
// We do streaming on two levels:
|
|
559
|
+
// 1. Coarse level: DELCARE CURSOR, FETCH 10000 at a time.
|
|
560
|
+
// 2. Fine level: Stream chunks from each fetch call.
|
|
561
|
+
if (limited) {
|
|
562
|
+
q = new IdSnapshotQuery(db, table, limited);
|
|
563
|
+
} else if (ChunkedSnapshotQuery.supports(table)) {
|
|
564
|
+
// Single primary key - we can use the primary key for chunking
|
|
565
|
+
const orderByKey = table.replicaIdColumns[0];
|
|
566
|
+
q = new ChunkedSnapshotQuery(db, table, this.snapshotChunkLength, table.snapshotStatus?.lastKey ?? null);
|
|
567
|
+
if (table.snapshotStatus?.lastKey != null) {
|
|
568
|
+
this.logger.info(
|
|
569
|
+
`Replicating ${table.qualifiedName} ${table.formatSnapshotProgress()} - resuming from ${orderByKey.name} > ${(q as ChunkedSnapshotQuery).lastKey}`
|
|
570
|
+
);
|
|
571
|
+
} else {
|
|
572
|
+
this.logger.info(`Replicating ${table.qualifiedName} ${table.formatSnapshotProgress()} - resumable`);
|
|
573
|
+
}
|
|
574
|
+
} else {
|
|
575
|
+
// Fallback case - query the entire table
|
|
576
|
+
this.logger.info(`Replicating ${table.qualifiedName} ${table.formatSnapshotProgress()} - not resumable`);
|
|
577
|
+
q = new SimpleSnapshotQuery(db, table, this.snapshotChunkLength);
|
|
578
|
+
at = 0;
|
|
579
|
+
}
|
|
580
|
+
await q.initialize();
|
|
434
581
|
|
|
435
|
-
private async snapshotTable(batch: storage.BucketStorageBatch, db: pgwire.PgConnection, table: storage.SourceTable) {
|
|
436
|
-
logger.info(`${this.slot_name} Replicating ${table.qualifiedName}`);
|
|
437
|
-
const estimatedCount = await this.estimatedCount(db, table);
|
|
438
|
-
let at = 0;
|
|
439
|
-
let lastLogIndex = 0;
|
|
440
|
-
const cursor = db.stream({ statement: `SELECT * FROM ${table.escapedIdentifier}` });
|
|
441
582
|
let columns: { i: number; name: string }[] = [];
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
583
|
+
let hasRemainingData = true;
|
|
584
|
+
while (hasRemainingData) {
|
|
585
|
+
// Fetch 10k at a time.
|
|
586
|
+
// The balance here is between latency overhead per FETCH call,
|
|
587
|
+
// and not spending too much time on each FETCH call.
|
|
588
|
+
// We aim for a couple of seconds on each FETCH call.
|
|
589
|
+
const cursor = q.nextChunk();
|
|
590
|
+
hasRemainingData = false;
|
|
591
|
+
// pgwire streams rows in chunks.
|
|
592
|
+
// These chunks can be quite small (as little as 16KB), so we don't flush chunks automatically.
|
|
593
|
+
// There are typically 100-200 rows per chunk.
|
|
594
|
+
for await (let chunk of cursor) {
|
|
595
|
+
if (chunk.tag == 'RowDescription') {
|
|
596
|
+
// We get a RowDescription for each FETCH call, but they should
|
|
597
|
+
// all be the same.
|
|
598
|
+
let i = 0;
|
|
599
|
+
columns = chunk.payload.map((c) => {
|
|
600
|
+
return { i: i++, name: c.name };
|
|
601
|
+
});
|
|
602
|
+
continue;
|
|
603
|
+
}
|
|
604
|
+
|
|
605
|
+
const rows = chunk.rows.map((row) => {
|
|
606
|
+
let q: DatabaseInputRow = {};
|
|
607
|
+
for (let c of columns) {
|
|
608
|
+
q[c.name] = row[c.i];
|
|
609
|
+
}
|
|
610
|
+
return q;
|
|
450
611
|
});
|
|
451
|
-
|
|
452
|
-
|
|
612
|
+
if (rows.length > 0) {
|
|
613
|
+
hasRemainingData = true;
|
|
614
|
+
}
|
|
453
615
|
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
616
|
+
for (const record of WalStream.getQueryData(rows)) {
|
|
617
|
+
// This auto-flushes when the batch reaches its size limit
|
|
618
|
+
await batch.save({
|
|
619
|
+
tag: storage.SaveOperationTag.INSERT,
|
|
620
|
+
sourceTable: table,
|
|
621
|
+
before: undefined,
|
|
622
|
+
beforeReplicaId: undefined,
|
|
623
|
+
after: record,
|
|
624
|
+
afterReplicaId: getUuidReplicaIdentityBson(record, table.replicaIdColumns)
|
|
625
|
+
});
|
|
458
626
|
}
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
}
|
|
465
|
-
if (this.abort_signal.aborted) {
|
|
466
|
-
throw new ReplicationAbortedError(`Aborted initial replication of ${this.slot_name}`);
|
|
627
|
+
|
|
628
|
+
at += rows.length;
|
|
629
|
+
this.metrics.getCounter(ReplicationMetric.ROWS_REPLICATED).add(rows.length);
|
|
630
|
+
|
|
631
|
+
await touch();
|
|
467
632
|
}
|
|
468
633
|
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
634
|
+
// Important: flush before marking progress
|
|
635
|
+
await batch.flush();
|
|
636
|
+
if (limited == null) {
|
|
637
|
+
let lastKey: Uint8Array | undefined;
|
|
638
|
+
if (q instanceof ChunkedSnapshotQuery) {
|
|
639
|
+
lastKey = q.getLastKeySerialized();
|
|
640
|
+
}
|
|
641
|
+
if (lastCountTime < performance.now() - 10 * 60 * 1000) {
|
|
642
|
+
// Even though we're doing the snapshot inside a transaction, the transaction uses
|
|
643
|
+
// the default "Read Committed" isolation level. This means we can get new data
|
|
644
|
+
// within the transaction, so we re-estimate the count every 10 minutes when replicating
|
|
645
|
+
// large tables.
|
|
646
|
+
totalEstimatedCount = await this.estimatedCountNumber(db, table);
|
|
647
|
+
lastCountTime = performance.now();
|
|
648
|
+
}
|
|
649
|
+
table = await batch.updateTableProgress(table, {
|
|
650
|
+
lastKey: lastKey,
|
|
651
|
+
replicatedCount: at,
|
|
652
|
+
totalEstimatedCount: totalEstimatedCount
|
|
478
653
|
});
|
|
479
|
-
|
|
654
|
+
this.relationCache.update(table);
|
|
480
655
|
|
|
481
|
-
|
|
482
|
-
|
|
656
|
+
this.logger.info(`Replicating ${table.qualifiedName} ${table.formatSnapshotProgress()}`);
|
|
657
|
+
} else {
|
|
658
|
+
this.logger.info(`Replicating ${table.qualifiedName} ${at}/${limited.length} for resnapshot`);
|
|
659
|
+
}
|
|
483
660
|
|
|
484
|
-
|
|
661
|
+
if (this.abort_signal.aborted) {
|
|
662
|
+
// We only abort after flushing
|
|
663
|
+
throw new ReplicationAbortedError(`Initial replication interrupted`);
|
|
664
|
+
}
|
|
485
665
|
}
|
|
486
|
-
|
|
487
|
-
await batch.flush();
|
|
488
666
|
}
|
|
489
667
|
|
|
490
668
|
async handleRelation(batch: storage.BucketStorageBatch, descriptor: SourceEntityDescriptor, snapshot: boolean) {
|
|
@@ -498,7 +676,7 @@ WHERE oid = $1::regclass`,
|
|
|
498
676
|
entity_descriptor: descriptor,
|
|
499
677
|
sync_rules: this.sync_rules
|
|
500
678
|
});
|
|
501
|
-
this.
|
|
679
|
+
this.relationCache.update(result.table);
|
|
502
680
|
|
|
503
681
|
// Drop conflicting tables. This includes for example renamed tables.
|
|
504
682
|
await batch.drop(result.dropTables);
|
|
@@ -513,40 +691,59 @@ WHERE oid = $1::regclass`,
|
|
|
513
691
|
// Truncate this table, in case a previous snapshot was interrupted.
|
|
514
692
|
await batch.truncate([result.table]);
|
|
515
693
|
|
|
516
|
-
let lsn: string = ZERO_LSN;
|
|
517
694
|
// Start the snapshot inside a transaction.
|
|
518
695
|
// We use a dedicated connection for this.
|
|
519
696
|
const db = await this.connections.snapshotConnection();
|
|
520
697
|
try {
|
|
521
|
-
await db.
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
// has passed that point.
|
|
528
|
-
// We have to get this LSN _after_ we have started the snapshot query.
|
|
529
|
-
const rs = await db.query(`select pg_current_wal_lsn() as lsn`);
|
|
530
|
-
lsn = rs.rows[0][0];
|
|
531
|
-
|
|
532
|
-
await db.query('COMMIT');
|
|
533
|
-
} catch (e) {
|
|
534
|
-
await db.query('ROLLBACK');
|
|
535
|
-
// TODO: Wrap with custom error type
|
|
536
|
-
throw e;
|
|
537
|
-
}
|
|
698
|
+
const table = await this.snapshotTableInTx(batch, db, result.table);
|
|
699
|
+
// After the table snapshot, we wait for replication to catch up.
|
|
700
|
+
// To make sure there is actually something to replicate, we send a keepalive
|
|
701
|
+
// message.
|
|
702
|
+
await sendKeepAlive(db);
|
|
703
|
+
return table;
|
|
538
704
|
} finally {
|
|
539
705
|
await db.end();
|
|
540
706
|
}
|
|
541
|
-
const [table] = await batch.markSnapshotDone([result.table], lsn);
|
|
542
|
-
return table;
|
|
543
707
|
}
|
|
544
708
|
|
|
545
709
|
return result.table;
|
|
546
710
|
}
|
|
547
711
|
|
|
712
|
+
/**
|
|
713
|
+
* Process rows that have missing TOAST values.
|
|
714
|
+
*
|
|
715
|
+
* This can happen during edge cases in the chunked intial snapshot process.
|
|
716
|
+
*
|
|
717
|
+
* We handle this similar to an inline table snapshot, but limited to the specific
|
|
718
|
+
* set of rows.
|
|
719
|
+
*/
|
|
720
|
+
private async resnapshot(batch: BucketStorageBatch, rows: MissingRow[]) {
|
|
721
|
+
const byTable = new Map<number, MissingRow[]>();
|
|
722
|
+
for (let row of rows) {
|
|
723
|
+
const relId = row.table.objectId as number; // always a number for postgres
|
|
724
|
+
if (!byTable.has(relId)) {
|
|
725
|
+
byTable.set(relId, []);
|
|
726
|
+
}
|
|
727
|
+
byTable.get(relId)!.push(row);
|
|
728
|
+
}
|
|
729
|
+
const db = await this.connections.snapshotConnection();
|
|
730
|
+
try {
|
|
731
|
+
for (let rows of byTable.values()) {
|
|
732
|
+
const table = rows[0].table;
|
|
733
|
+
await this.snapshotTableInTx(
|
|
734
|
+
batch,
|
|
735
|
+
db,
|
|
736
|
+
table,
|
|
737
|
+
rows.map((r) => r.key)
|
|
738
|
+
);
|
|
739
|
+
}
|
|
740
|
+
} finally {
|
|
741
|
+
await db.end();
|
|
742
|
+
}
|
|
743
|
+
}
|
|
744
|
+
|
|
548
745
|
private getTable(relationId: number): storage.SourceTable {
|
|
549
|
-
const table = this.
|
|
746
|
+
const table = this.relationCache.get(relationId);
|
|
550
747
|
if (table == null) {
|
|
551
748
|
// We should always receive a replication message before the relation is used.
|
|
552
749
|
// If we can't find it, it's a bug.
|
|
@@ -565,7 +762,7 @@ WHERE oid = $1::regclass`,
|
|
|
565
762
|
if (msg.tag == 'insert' || msg.tag == 'update' || msg.tag == 'delete') {
|
|
566
763
|
const table = this.getTable(getRelId(msg.relation));
|
|
567
764
|
if (!table.syncAny) {
|
|
568
|
-
logger.debug(`Table ${table.qualifiedName} not used in sync rules - skipping`);
|
|
765
|
+
this.logger.debug(`Table ${table.qualifiedName} not used in sync rules - skipping`);
|
|
569
766
|
return null;
|
|
570
767
|
}
|
|
571
768
|
|
|
@@ -673,8 +870,39 @@ WHERE oid = $1::regclass`,
|
|
|
673
870
|
// Auto-activate as soon as initial replication is done
|
|
674
871
|
await this.storage.autoActivate();
|
|
675
872
|
|
|
873
|
+
let resnapshot: { table: storage.SourceTable; key: PrimaryKeyValue }[] = [];
|
|
874
|
+
|
|
875
|
+
const markRecordUnavailable = (record: SaveUpdate) => {
|
|
876
|
+
if (!IdSnapshotQuery.supports(record.sourceTable)) {
|
|
877
|
+
// If it's not supported, it's also safe to ignore
|
|
878
|
+
return;
|
|
879
|
+
}
|
|
880
|
+
let key: PrimaryKeyValue = {};
|
|
881
|
+
for (let column of record.sourceTable.replicaIdColumns) {
|
|
882
|
+
const name = column.name;
|
|
883
|
+
const value = record.after[name];
|
|
884
|
+
if (value == null) {
|
|
885
|
+
// We don't expect this to actually happen.
|
|
886
|
+
// The key should always be present in the "after" record.
|
|
887
|
+
return;
|
|
888
|
+
}
|
|
889
|
+
key[name] = value;
|
|
890
|
+
}
|
|
891
|
+
resnapshot.push({
|
|
892
|
+
table: record.sourceTable,
|
|
893
|
+
key: key
|
|
894
|
+
});
|
|
895
|
+
};
|
|
896
|
+
|
|
676
897
|
await this.storage.startBatch(
|
|
677
|
-
{
|
|
898
|
+
{
|
|
899
|
+
logger: this.logger,
|
|
900
|
+
zeroLSN: ZERO_LSN,
|
|
901
|
+
defaultSchema: POSTGRES_DEFAULT_SCHEMA,
|
|
902
|
+
storeCurrentData: true,
|
|
903
|
+
skipExistingRows: false,
|
|
904
|
+
markRecordUnavailable
|
|
905
|
+
},
|
|
678
906
|
async (batch) => {
|
|
679
907
|
// We don't handle any plain keepalive messages while we have transactions.
|
|
680
908
|
// While we have transactions, we use that to advance the position.
|
|
@@ -708,6 +936,9 @@ WHERE oid = $1::regclass`,
|
|
|
708
936
|
} else if (msg.tag == 'begin') {
|
|
709
937
|
// This may span multiple transactions in the same chunk, or even across chunks.
|
|
710
938
|
skipKeepalive = true;
|
|
939
|
+
if (this.oldestUncommittedChange == null) {
|
|
940
|
+
this.oldestUncommittedChange = new Date(Number(msg.commitTime / 1000n));
|
|
941
|
+
}
|
|
711
942
|
} else if (msg.tag == 'commit') {
|
|
712
943
|
this.metrics.getCounter(ReplicationMetric.TRANSACTIONS_REPLICATED).add(1);
|
|
713
944
|
if (msg == lastCommit) {
|
|
@@ -715,12 +946,29 @@ WHERE oid = $1::regclass`,
|
|
|
715
946
|
// This effectively lets us batch multiple transactions within the same chunk
|
|
716
947
|
// into a single flush, increasing throughput for many small transactions.
|
|
717
948
|
skipKeepalive = false;
|
|
718
|
-
|
|
949
|
+
// flush() must be before the resnapshot check - that is
|
|
950
|
+
// typically what reports the resnapshot records.
|
|
951
|
+
await batch.flush({ oldestUncommittedChange: this.oldestUncommittedChange });
|
|
952
|
+
// This _must_ be checked after the flush(), and before
|
|
953
|
+
// commit() or ack(). We never persist the resnapshot list,
|
|
954
|
+
// so we have to process it before marking our progress.
|
|
955
|
+
if (resnapshot.length > 0) {
|
|
956
|
+
await this.resnapshot(batch, resnapshot);
|
|
957
|
+
resnapshot = [];
|
|
958
|
+
}
|
|
959
|
+
const didCommit = await batch.commit(msg.lsn!, {
|
|
960
|
+
createEmptyCheckpoints,
|
|
961
|
+
oldestUncommittedChange: this.oldestUncommittedChange
|
|
962
|
+
});
|
|
719
963
|
await this.ack(msg.lsn!, replicationStream);
|
|
964
|
+
if (didCommit) {
|
|
965
|
+
this.oldestUncommittedChange = null;
|
|
966
|
+
this.isStartingReplication = false;
|
|
967
|
+
}
|
|
720
968
|
}
|
|
721
969
|
} else {
|
|
722
970
|
if (count % 100 == 0) {
|
|
723
|
-
logger.info(
|
|
971
|
+
this.logger.info(`Replicating op ${count} ${msg.lsn}`);
|
|
724
972
|
}
|
|
725
973
|
|
|
726
974
|
/**
|
|
@@ -734,7 +982,14 @@ WHERE oid = $1::regclass`,
|
|
|
734
982
|
}
|
|
735
983
|
|
|
736
984
|
count += 1;
|
|
737
|
-
await this.writeChange(batch, msg);
|
|
985
|
+
const flushResult = await this.writeChange(batch, msg);
|
|
986
|
+
if (flushResult != null && resnapshot.length > 0) {
|
|
987
|
+
// If we have large transactions, we also need to flush the resnapshot list
|
|
988
|
+
// periodically.
|
|
989
|
+
// TODO: make sure this bit is actually triggered
|
|
990
|
+
await this.resnapshot(batch, resnapshot);
|
|
991
|
+
resnapshot = [];
|
|
992
|
+
}
|
|
738
993
|
}
|
|
739
994
|
}
|
|
740
995
|
|
|
@@ -748,7 +1003,12 @@ WHERE oid = $1::regclass`,
|
|
|
748
1003
|
// Big caveat: This _must not_ be used to skip individual messages, since this LSN
|
|
749
1004
|
// may be in the middle of the next transaction.
|
|
750
1005
|
// It must only be used to associate checkpoints with LSNs.
|
|
751
|
-
await batch.keepalive(chunkLastLsn);
|
|
1006
|
+
const didCommit = await batch.keepalive(chunkLastLsn);
|
|
1007
|
+
if (didCommit) {
|
|
1008
|
+
this.oldestUncommittedChange = null;
|
|
1009
|
+
}
|
|
1010
|
+
|
|
1011
|
+
this.isStartingReplication = false;
|
|
752
1012
|
}
|
|
753
1013
|
|
|
754
1014
|
// We receive chunks with empty messages often (about each second).
|
|
@@ -781,7 +1041,8 @@ WHERE oid = $1::regclass`,
|
|
|
781
1041
|
if (storageIdentifier.type != lib_postgres.POSTGRES_CONNECTION_TYPE) {
|
|
782
1042
|
return {
|
|
783
1043
|
// Keep the same behaviour as before allowing Postgres storage.
|
|
784
|
-
createEmptyCheckpoints: true
|
|
1044
|
+
createEmptyCheckpoints: true,
|
|
1045
|
+
oldestUncommittedChange: null
|
|
785
1046
|
};
|
|
786
1047
|
}
|
|
787
1048
|
|
|
@@ -804,7 +1065,8 @@ WHERE oid = $1::regclass`,
|
|
|
804
1065
|
* Don't create empty checkpoints if the same Postgres database is used for the data source
|
|
805
1066
|
* and sync bucket storage. Creating empty checkpoints will cause WAL feedback loops.
|
|
806
1067
|
*/
|
|
807
|
-
createEmptyCheckpoints: replicationIdentifier.database_name != parsedStorageIdentifier.database_name
|
|
1068
|
+
createEmptyCheckpoints: replicationIdentifier.database_name != parsedStorageIdentifier.database_name,
|
|
1069
|
+
oldestUncommittedChange: null
|
|
808
1070
|
};
|
|
809
1071
|
}
|
|
810
1072
|
|
|
@@ -816,6 +1078,19 @@ WHERE oid = $1::regclass`,
|
|
|
816
1078
|
const version = await this.connections.getServerVersion();
|
|
817
1079
|
return version ? version.compareMain('14.0.0') >= 0 : false;
|
|
818
1080
|
}
|
|
1081
|
+
|
|
1082
|
+
async getReplicationLagMillis(): Promise<number | undefined> {
|
|
1083
|
+
if (this.oldestUncommittedChange == null) {
|
|
1084
|
+
if (this.isStartingReplication) {
|
|
1085
|
+
// We don't have anything to compute replication lag with yet.
|
|
1086
|
+
return undefined;
|
|
1087
|
+
} else {
|
|
1088
|
+
// We don't have any uncommitted changes, so replication is up-to-date.
|
|
1089
|
+
return 0;
|
|
1090
|
+
}
|
|
1091
|
+
}
|
|
1092
|
+
return Date.now() - this.oldestUncommittedChange.getTime();
|
|
1093
|
+
}
|
|
819
1094
|
}
|
|
820
1095
|
|
|
821
1096
|
async function touch() {
|