@powersync/service-module-postgres 0.13.1 → 0.14.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. package/CHANGELOG.md +32 -0
  2. package/dist/api/PostgresRouteAPIAdapter.d.ts +1 -1
  3. package/dist/api/PostgresRouteAPIAdapter.js +1 -1
  4. package/dist/api/PostgresRouteAPIAdapter.js.map +1 -1
  5. package/dist/replication/SnapshotQuery.d.ts +78 -0
  6. package/dist/replication/SnapshotQuery.js +175 -0
  7. package/dist/replication/SnapshotQuery.js.map +1 -0
  8. package/dist/replication/WalStream.d.ts +37 -4
  9. package/dist/replication/WalStream.js +318 -91
  10. package/dist/replication/WalStream.js.map +1 -1
  11. package/dist/replication/WalStreamReplicationJob.d.ts +2 -0
  12. package/dist/replication/WalStreamReplicationJob.js +14 -3
  13. package/dist/replication/WalStreamReplicationJob.js.map +1 -1
  14. package/dist/replication/WalStreamReplicator.d.ts +1 -0
  15. package/dist/replication/WalStreamReplicator.js +22 -0
  16. package/dist/replication/WalStreamReplicator.js.map +1 -1
  17. package/dist/replication/replication-utils.d.ts +4 -0
  18. package/dist/replication/replication-utils.js +46 -2
  19. package/dist/replication/replication-utils.js.map +1 -1
  20. package/package.json +10 -9
  21. package/src/api/PostgresRouteAPIAdapter.ts +1 -1
  22. package/src/replication/SnapshotQuery.ts +209 -0
  23. package/src/replication/WalStream.ts +373 -98
  24. package/src/replication/WalStreamReplicationJob.ts +15 -3
  25. package/src/replication/WalStreamReplicator.ts +26 -0
  26. package/src/replication/replication-utils.ts +60 -2
  27. package/test/src/__snapshots__/schema_changes.test.ts.snap +2 -2
  28. package/test/src/checkpoints.test.ts +7 -5
  29. package/test/src/chunked_snapshots.test.ts +156 -0
  30. package/test/src/large_batch.test.ts +5 -154
  31. package/test/src/resuming_snapshots.test.ts +150 -0
  32. package/test/src/schema_changes.test.ts +5 -10
  33. package/test/src/slow_tests.test.ts +13 -30
  34. package/test/src/util.ts +12 -1
  35. package/test/src/validation.test.ts +0 -1
  36. package/test/src/wal_stream.test.ts +4 -9
  37. package/test/src/wal_stream_utils.ts +15 -7
  38. package/tsconfig.tsbuildinfo +1 -1
@@ -1,12 +1,13 @@
1
1
  import * as lib_postgres from '@powersync/lib-service-postgres';
2
- import { container, DatabaseConnectionError, ErrorCode, errors, logger, ReplicationAbortedError, ReplicationAssertionError } from '@powersync/lib-services-framework';
3
- import { getUuidReplicaIdentityBson, storage } from '@powersync/service-core';
2
+ import { container, DatabaseConnectionError, ErrorCode, errors, logger as defaultLogger, ReplicationAssertionError, ReplicationAbortedError } from '@powersync/lib-services-framework';
3
+ import { getUuidReplicaIdentityBson, RelationCache, storage } from '@powersync/service-core';
4
4
  import * as pgwire from '@powersync/service-jpgwire';
5
5
  import { toSyncRulesRow } from '@powersync/service-sync-rules';
6
6
  import * as pg_utils from '../utils/pgwire_utils.js';
7
7
  import { getPgOutputRelation, getRelId } from './PgRelation.js';
8
- import { checkSourceConfiguration, getReplicationIdentityColumns } from './replication-utils.js';
8
+ import { checkSourceConfiguration, checkTableRls, getReplicationIdentityColumns } from './replication-utils.js';
9
9
  import { ReplicationMetric } from '@powersync/service-types';
10
+ import { ChunkedSnapshotQuery, IdSnapshotQuery, SimpleSnapshotQuery } from './SnapshotQuery.js';
10
11
  export const ZERO_LSN = '00000000/00000000';
11
12
  export const PUBLICATION_NAME = 'powersync';
12
13
  export const POSTGRES_DEFAULT_SCHEMA = 'public';
@@ -39,20 +40,39 @@ export class WalStream {
39
40
  sync_rules;
40
41
  group_id;
41
42
  connection_id = 1;
43
+ logger;
42
44
  storage;
43
45
  metrics;
44
46
  slot_name;
45
47
  connections;
46
48
  abort_signal;
47
- relation_cache = new Map();
49
+ relationCache = new RelationCache((relation) => {
50
+ if (typeof relation == 'number') {
51
+ return relation;
52
+ }
53
+ return relation.objectId;
54
+ });
48
55
  startedStreaming = false;
56
+ snapshotChunkLength;
57
+ /**
58
+ * Time of the oldest uncommitted change, according to the source db.
59
+ * This is used to determine the replication lag.
60
+ */
61
+ oldestUncommittedChange = null;
62
+ /**
63
+ * Keep track of whether we have done a commit or keepalive yet.
64
+ * We can only compute replication lag if isStartingReplication == false, or oldestUncommittedChange is present.
65
+ */
66
+ isStartingReplication = true;
49
67
  constructor(options) {
68
+ this.logger = options.logger ?? defaultLogger;
50
69
  this.storage = options.storage;
51
70
  this.metrics = options.metrics;
52
71
  this.sync_rules = options.storage.getParsedSyncRules({ defaultSchema: POSTGRES_DEFAULT_SCHEMA });
53
72
  this.group_id = options.storage.group_id;
54
73
  this.slot_name = options.storage.slot_name;
55
74
  this.connections = options.connections;
75
+ this.snapshotChunkLength = options.snapshotChunkLength ?? 10_000;
56
76
  this.abort_signal = options.abort_signal;
57
77
  this.abort_signal.addEventListener('abort', () => {
58
78
  if (this.startedStreaming) {
@@ -62,7 +82,7 @@ export class WalStream {
62
82
  const promise = sendKeepAlive(this.connections.pool);
63
83
  promise.catch((e) => {
64
84
  // Failures here are okay - this only speeds up stopping the process.
65
- logger.warn('Failed to ping connection', e);
85
+ this.logger.warn('Failed to ping connection', e);
66
86
  });
67
87
  }
68
88
  else {
@@ -130,9 +150,20 @@ export class WalStream {
130
150
  ]
131
151
  });
132
152
  if (rs.rows.length == 0) {
133
- logger.info(`Skipping ${tablePattern.schema}.${name} - not part of ${PUBLICATION_NAME} publication`);
153
+ this.logger.info(`Skipping ${tablePattern.schema}.${name} - not part of ${PUBLICATION_NAME} publication`);
134
154
  continue;
135
155
  }
156
+ try {
157
+ const result = await checkTableRls(db, relid);
158
+ if (!result.canRead) {
159
+ // We log the message, then continue anyway, since the check does not cover all cases.
160
+ this.logger.warn(result.message);
161
+ }
162
+ }
163
+ catch (e) {
164
+ // It's possible that we just don't have permission to access pg_roles - log the error and continue.
165
+ this.logger.warn(`Could not check RLS access for ${tablePattern.schema}.${name}`, e);
166
+ }
136
167
  const cresult = await getReplicationIdentityColumns(db, relid);
137
168
  const table = await this.handleRelation(batch, {
138
169
  name,
@@ -152,7 +183,7 @@ export class WalStream {
152
183
  const snapshotDone = status.snapshot_done && status.checkpoint_lsn != null;
153
184
  if (snapshotDone) {
154
185
  // Snapshot is done, but we still need to check the replication slot status
155
- logger.info(`${slotName} Initial replication already done`);
186
+ this.logger.info(`Initial replication already done`);
156
187
  }
157
188
  // Check if replication slot exists
158
189
  const rs = await this.connections.pool.query({
@@ -208,7 +239,7 @@ export class WalStream {
208
239
  // We peek a large number of changes here, to make it more likely to pick up replication slot errors.
209
240
  // For example, "publication does not exist" only occurs here if the peek actually includes changes related
210
241
  // to the slot.
211
- logger.info(`Checking ${slotName}`);
242
+ this.logger.info(`Checking ${slotName}`);
212
243
  // The actual results can be quite large, so we don't actually return everything
213
244
  // due to memory and processing overhead that would create.
214
245
  const cursor = await this.connections.pool.stream({
@@ -222,12 +253,12 @@ export class WalStream {
222
253
  // No-op, just exhaust the cursor
223
254
  }
224
255
  // Success
225
- logger.info(`Slot ${slotName} appears healthy`);
256
+ this.logger.info(`Slot ${slotName} appears healthy`);
226
257
  return { needsNewSlot: false };
227
258
  }
228
259
  catch (e) {
229
260
  last_error = e;
230
- logger.warn(`${slotName} Replication slot error`, e);
261
+ this.logger.warn(`Replication slot error`, e);
231
262
  if (this.stopped) {
232
263
  throw e;
233
264
  }
@@ -249,7 +280,7 @@ export class WalStream {
249
280
  // Sample: publication "powersync" does not exist
250
281
  // Happens when publication deleted or never created.
251
282
  // Slot must be re-created in this case.
252
- logger.info(`${slotName} is not valid anymore`);
283
+ this.logger.info(`${slotName} is not valid anymore`);
253
284
  return { needsNewSlot: true };
254
285
  }
255
286
  // Try again after a pause
@@ -258,7 +289,7 @@ export class WalStream {
258
289
  }
259
290
  throw new ReplicationAssertionError('Unreachable');
260
291
  }
261
- async estimatedCount(db, table) {
292
+ async estimatedCountNumber(db, table) {
262
293
  const results = await db.query({
263
294
  statement: `SELECT reltuples::bigint AS estimate
264
295
  FROM pg_class
@@ -267,10 +298,10 @@ WHERE oid = $1::regclass`,
267
298
  });
268
299
  const row = results.rows[0];
269
300
  if ((row?.[0] ?? -1n) == -1n) {
270
- return '?';
301
+ return -1;
271
302
  }
272
303
  else {
273
- return `~${row[0]}`;
304
+ return Number(row[0]);
274
305
  }
275
306
  }
276
307
  /**
@@ -290,7 +321,7 @@ WHERE oid = $1::regclass`,
290
321
  // In those cases, we have to start replication from scratch.
291
322
  // If there is an existing healthy slot, we can skip this and continue
292
323
  // initial replication where we left off.
293
- await this.storage.clear();
324
+ await this.storage.clear({ signal: this.abort_signal });
294
325
  await db.query({
295
326
  statement: 'SELECT pg_drop_replication_slot(slot_name) FROM pg_replication_slots WHERE slot_name = $1',
296
327
  params: [{ type: 'varchar', value: slotName }]
@@ -298,27 +329,39 @@ WHERE oid = $1::regclass`,
298
329
  // We use the replication connection here, not a pool.
299
330
  // The replication slot must be created before we start snapshotting tables.
300
331
  await replicationConnection.query(`CREATE_REPLICATION_SLOT ${slotName} LOGICAL pgoutput`);
301
- logger.info(`Created replication slot ${slotName}`);
332
+ this.logger.info(`Created replication slot ${slotName}`);
302
333
  }
303
334
  await this.initialReplication(db);
304
335
  }
305
336
  async initialReplication(db) {
306
337
  const sourceTables = this.sync_rules.getSourceTables();
307
- await this.storage.startBatch({ zeroLSN: ZERO_LSN, defaultSchema: POSTGRES_DEFAULT_SCHEMA, storeCurrentData: true, skipExistingRows: true }, async (batch) => {
338
+ await this.storage.startBatch({
339
+ logger: this.logger,
340
+ zeroLSN: ZERO_LSN,
341
+ defaultSchema: POSTGRES_DEFAULT_SCHEMA,
342
+ storeCurrentData: true,
343
+ skipExistingRows: true
344
+ }, async (batch) => {
345
+ let tablesWithStatus = [];
308
346
  for (let tablePattern of sourceTables) {
309
347
  const tables = await this.getQualifiedTableNames(batch, db, tablePattern);
348
+ // Pre-get counts
310
349
  for (let table of tables) {
311
350
  if (table.snapshotComplete) {
312
- logger.info(`${this.slot_name} Skipping ${table.qualifiedName} - snapshot already done`);
351
+ this.logger.info(`Skipping ${table.qualifiedName} - snapshot already done`);
313
352
  continue;
314
353
  }
315
- await this.snapshotTable(batch, db, table);
316
- const rs = await db.query(`select pg_current_wal_lsn() as lsn`);
317
- const tableLsnNotBefore = rs.rows[0][0];
318
- await batch.markSnapshotDone([table], tableLsnNotBefore);
319
- await touch();
354
+ const count = await this.estimatedCountNumber(db, table);
355
+ table = await batch.updateTableProgress(table, { totalEstimatedCount: count });
356
+ this.relationCache.update(table);
357
+ tablesWithStatus.push(table);
358
+ this.logger.info(`To replicate: ${table.qualifiedName} ${table.formatSnapshotProgress()}`);
320
359
  }
321
360
  }
361
+ for (let table of tablesWithStatus) {
362
+ await this.snapshotTableInTx(batch, db, table);
363
+ await touch();
364
+ }
322
365
  // Always commit the initial snapshot at zero.
323
366
  // This makes sure we don't skip any changes applied before starting this snapshot,
324
367
  // in the case of snapshot retries.
@@ -340,53 +383,147 @@ WHERE oid = $1::regclass`,
340
383
  yield toSyncRulesRow(row);
341
384
  }
342
385
  }
343
- async snapshotTable(batch, db, table) {
344
- logger.info(`${this.slot_name} Replicating ${table.qualifiedName}`);
345
- const estimatedCount = await this.estimatedCount(db, table);
346
- let at = 0;
347
- let lastLogIndex = 0;
348
- const cursor = db.stream({ statement: `SELECT * FROM ${table.escapedIdentifier}` });
386
+ async snapshotTableInTx(batch, db, table, limited) {
387
+ // Note: We use the default "Read Committed" isolation level here, not snapshot isolation.
388
+ // The data may change during the transaction, but that is compensated for in the streaming
389
+ // replication afterwards.
390
+ await db.query('BEGIN');
391
+ try {
392
+ let tableLsnNotBefore;
393
+ await this.snapshotTable(batch, db, table, limited);
394
+ // Get the current LSN.
395
+ // The data will only be consistent once incremental replication has passed that point.
396
+ // We have to get this LSN _after_ we have finished the table snapshot.
397
+ //
398
+ // There are basically two relevant LSNs here:
399
+ // A: The LSN before the snapshot starts. We don't explicitly record this on the PowerSync side,
400
+ // but it is implicitly recorded in the replication slot.
401
+ // B: The LSN after the table snapshot is complete, which is what we get here.
402
+ // When we do the snapshot queries, the data that we get back for each chunk could match the state
403
+ // anywhere between A and B. To actually have a consistent state on our side, we need to:
404
+ // 1. Complete the snapshot.
405
+ // 2. Wait until logical replication has caught up with all the change between A and B.
406
+ // Calling `markSnapshotDone(LSN B)` covers that.
407
+ const rs = await db.query(`select pg_current_wal_lsn() as lsn`);
408
+ tableLsnNotBefore = rs.rows[0][0];
409
+ // Side note: A ROLLBACK would probably also be fine here, since we only read in this transaction.
410
+ await db.query('COMMIT');
411
+ const [resultTable] = await batch.markSnapshotDone([table], tableLsnNotBefore);
412
+ this.relationCache.update(resultTable);
413
+ return resultTable;
414
+ }
415
+ catch (e) {
416
+ await db.query('ROLLBACK');
417
+ throw e;
418
+ }
419
+ }
420
+ async snapshotTable(batch, db, table, limited) {
421
+ let totalEstimatedCount = table.snapshotStatus?.totalEstimatedCount;
422
+ let at = table.snapshotStatus?.replicatedCount ?? 0;
423
+ let lastCountTime = 0;
424
+ let q;
425
+ // We do streaming on two levels:
426
+ // 1. Coarse level: DELCARE CURSOR, FETCH 10000 at a time.
427
+ // 2. Fine level: Stream chunks from each fetch call.
428
+ if (limited) {
429
+ q = new IdSnapshotQuery(db, table, limited);
430
+ }
431
+ else if (ChunkedSnapshotQuery.supports(table)) {
432
+ // Single primary key - we can use the primary key for chunking
433
+ const orderByKey = table.replicaIdColumns[0];
434
+ q = new ChunkedSnapshotQuery(db, table, this.snapshotChunkLength, table.snapshotStatus?.lastKey ?? null);
435
+ if (table.snapshotStatus?.lastKey != null) {
436
+ this.logger.info(`Replicating ${table.qualifiedName} ${table.formatSnapshotProgress()} - resuming from ${orderByKey.name} > ${q.lastKey}`);
437
+ }
438
+ else {
439
+ this.logger.info(`Replicating ${table.qualifiedName} ${table.formatSnapshotProgress()} - resumable`);
440
+ }
441
+ }
442
+ else {
443
+ // Fallback case - query the entire table
444
+ this.logger.info(`Replicating ${table.qualifiedName} ${table.formatSnapshotProgress()} - not resumable`);
445
+ q = new SimpleSnapshotQuery(db, table, this.snapshotChunkLength);
446
+ at = 0;
447
+ }
448
+ await q.initialize();
349
449
  let columns = [];
350
- // pgwire streams rows in chunks.
351
- // These chunks can be quite small (as little as 16KB), so we don't flush chunks automatically.
352
- for await (let chunk of cursor) {
353
- if (chunk.tag == 'RowDescription') {
354
- let i = 0;
355
- columns = chunk.payload.map((c) => {
356
- return { i: i++, name: c.name };
450
+ let hasRemainingData = true;
451
+ while (hasRemainingData) {
452
+ // Fetch 10k at a time.
453
+ // The balance here is between latency overhead per FETCH call,
454
+ // and not spending too much time on each FETCH call.
455
+ // We aim for a couple of seconds on each FETCH call.
456
+ const cursor = q.nextChunk();
457
+ hasRemainingData = false;
458
+ // pgwire streams rows in chunks.
459
+ // These chunks can be quite small (as little as 16KB), so we don't flush chunks automatically.
460
+ // There are typically 100-200 rows per chunk.
461
+ for await (let chunk of cursor) {
462
+ if (chunk.tag == 'RowDescription') {
463
+ // We get a RowDescription for each FETCH call, but they should
464
+ // all be the same.
465
+ let i = 0;
466
+ columns = chunk.payload.map((c) => {
467
+ return { i: i++, name: c.name };
468
+ });
469
+ continue;
470
+ }
471
+ const rows = chunk.rows.map((row) => {
472
+ let q = {};
473
+ for (let c of columns) {
474
+ q[c.name] = row[c.i];
475
+ }
476
+ return q;
357
477
  });
358
- continue;
478
+ if (rows.length > 0) {
479
+ hasRemainingData = true;
480
+ }
481
+ for (const record of WalStream.getQueryData(rows)) {
482
+ // This auto-flushes when the batch reaches its size limit
483
+ await batch.save({
484
+ tag: storage.SaveOperationTag.INSERT,
485
+ sourceTable: table,
486
+ before: undefined,
487
+ beforeReplicaId: undefined,
488
+ after: record,
489
+ afterReplicaId: getUuidReplicaIdentityBson(record, table.replicaIdColumns)
490
+ });
491
+ }
492
+ at += rows.length;
493
+ this.metrics.getCounter(ReplicationMetric.ROWS_REPLICATED).add(rows.length);
494
+ await touch();
359
495
  }
360
- const rows = chunk.rows.map((row) => {
361
- let q = {};
362
- for (let c of columns) {
363
- q[c.name] = row[c.i];
496
+ // Important: flush before marking progress
497
+ await batch.flush();
498
+ if (limited == null) {
499
+ let lastKey;
500
+ if (q instanceof ChunkedSnapshotQuery) {
501
+ lastKey = q.getLastKeySerialized();
364
502
  }
365
- return q;
366
- });
367
- if (rows.length > 0 && at - lastLogIndex >= 5000) {
368
- logger.info(`${this.slot_name} Replicating ${table.qualifiedName} ${at}/${estimatedCount}`);
369
- lastLogIndex = at;
503
+ if (lastCountTime < performance.now() - 10 * 60 * 1000) {
504
+ // Even though we're doing the snapshot inside a transaction, the transaction uses
505
+ // the default "Read Committed" isolation level. This means we can get new data
506
+ // within the transaction, so we re-estimate the count every 10 minutes when replicating
507
+ // large tables.
508
+ totalEstimatedCount = await this.estimatedCountNumber(db, table);
509
+ lastCountTime = performance.now();
510
+ }
511
+ table = await batch.updateTableProgress(table, {
512
+ lastKey: lastKey,
513
+ replicatedCount: at,
514
+ totalEstimatedCount: totalEstimatedCount
515
+ });
516
+ this.relationCache.update(table);
517
+ this.logger.info(`Replicating ${table.qualifiedName} ${table.formatSnapshotProgress()}`);
370
518
  }
371
- if (this.abort_signal.aborted) {
372
- throw new ReplicationAbortedError(`Aborted initial replication of ${this.slot_name}`);
519
+ else {
520
+ this.logger.info(`Replicating ${table.qualifiedName} ${at}/${limited.length} for resnapshot`);
373
521
  }
374
- for (const record of WalStream.getQueryData(rows)) {
375
- // This auto-flushes when the batch reaches its size limit
376
- await batch.save({
377
- tag: storage.SaveOperationTag.INSERT,
378
- sourceTable: table,
379
- before: undefined,
380
- beforeReplicaId: undefined,
381
- after: record,
382
- afterReplicaId: getUuidReplicaIdentityBson(record, table.replicaIdColumns)
383
- });
522
+ if (this.abort_signal.aborted) {
523
+ // We only abort after flushing
524
+ throw new ReplicationAbortedError(`Initial replication interrupted`);
384
525
  }
385
- at += rows.length;
386
- this.metrics.getCounter(ReplicationMetric.ROWS_REPLICATED).add(rows.length);
387
- await touch();
388
526
  }
389
- await batch.flush();
390
527
  }
391
528
  async handleRelation(batch, descriptor, snapshot) {
392
529
  if (!descriptor.objectId && typeof descriptor.objectId != 'number') {
@@ -399,7 +536,7 @@ WHERE oid = $1::regclass`,
399
536
  entity_descriptor: descriptor,
400
537
  sync_rules: this.sync_rules
401
538
  });
402
- this.relation_cache.set(descriptor.objectId, result.table);
539
+ this.relationCache.update(result.table);
403
540
  // Drop conflicting tables. This includes for example renamed tables.
404
541
  await batch.drop(result.dropTables);
405
542
  // Snapshot if:
@@ -410,38 +547,53 @@ WHERE oid = $1::regclass`,
410
547
  if (shouldSnapshot) {
411
548
  // Truncate this table, in case a previous snapshot was interrupted.
412
549
  await batch.truncate([result.table]);
413
- let lsn = ZERO_LSN;
414
550
  // Start the snapshot inside a transaction.
415
551
  // We use a dedicated connection for this.
416
552
  const db = await this.connections.snapshotConnection();
417
553
  try {
418
- await db.query('BEGIN');
419
- try {
420
- await this.snapshotTable(batch, db, result.table);
421
- // Get the current LSN.
422
- // The data will only be consistent once incremental replication
423
- // has passed that point.
424
- // We have to get this LSN _after_ we have started the snapshot query.
425
- const rs = await db.query(`select pg_current_wal_lsn() as lsn`);
426
- lsn = rs.rows[0][0];
427
- await db.query('COMMIT');
428
- }
429
- catch (e) {
430
- await db.query('ROLLBACK');
431
- // TODO: Wrap with custom error type
432
- throw e;
433
- }
554
+ const table = await this.snapshotTableInTx(batch, db, result.table);
555
+ // After the table snapshot, we wait for replication to catch up.
556
+ // To make sure there is actually something to replicate, we send a keepalive
557
+ // message.
558
+ await sendKeepAlive(db);
559
+ return table;
434
560
  }
435
561
  finally {
436
562
  await db.end();
437
563
  }
438
- const [table] = await batch.markSnapshotDone([result.table], lsn);
439
- return table;
440
564
  }
441
565
  return result.table;
442
566
  }
567
+ /**
568
+ * Process rows that have missing TOAST values.
569
+ *
570
+ * This can happen during edge cases in the chunked intial snapshot process.
571
+ *
572
+ * We handle this similar to an inline table snapshot, but limited to the specific
573
+ * set of rows.
574
+ */
575
+ async resnapshot(batch, rows) {
576
+ const byTable = new Map();
577
+ for (let row of rows) {
578
+ const relId = row.table.objectId; // always a number for postgres
579
+ if (!byTable.has(relId)) {
580
+ byTable.set(relId, []);
581
+ }
582
+ byTable.get(relId).push(row);
583
+ }
584
+ const db = await this.connections.snapshotConnection();
585
+ try {
586
+ for (let rows of byTable.values()) {
587
+ const table = rows[0].table;
588
+ await this.snapshotTableInTx(batch, db, table, rows.map((r) => r.key));
589
+ }
590
+ }
591
+ finally {
592
+ await db.end();
593
+ }
594
+ }
443
595
  getTable(relationId) {
444
- const table = this.relation_cache.get(relationId);
596
+ const table = this.relationCache.get(relationId);
445
597
  if (table == null) {
446
598
  // We should always receive a replication message before the relation is used.
447
599
  // If we can't find it, it's a bug.
@@ -456,7 +608,7 @@ WHERE oid = $1::regclass`,
456
608
  if (msg.tag == 'insert' || msg.tag == 'update' || msg.tag == 'delete') {
457
609
  const table = this.getTable(getRelId(msg.relation));
458
610
  if (!table.syncAny) {
459
- logger.debug(`Table ${table.qualifiedName} not used in sync rules - skipping`);
611
+ this.logger.debug(`Table ${table.qualifiedName} not used in sync rules - skipping`);
460
612
  return null;
461
613
  }
462
614
  if (msg.tag == 'insert') {
@@ -556,7 +708,36 @@ WHERE oid = $1::regclass`,
556
708
  this.startedStreaming = true;
557
709
  // Auto-activate as soon as initial replication is done
558
710
  await this.storage.autoActivate();
559
- await this.storage.startBatch({ zeroLSN: ZERO_LSN, defaultSchema: POSTGRES_DEFAULT_SCHEMA, storeCurrentData: true, skipExistingRows: false }, async (batch) => {
711
+ let resnapshot = [];
712
+ const markRecordUnavailable = (record) => {
713
+ if (!IdSnapshotQuery.supports(record.sourceTable)) {
714
+ // If it's not supported, it's also safe to ignore
715
+ return;
716
+ }
717
+ let key = {};
718
+ for (let column of record.sourceTable.replicaIdColumns) {
719
+ const name = column.name;
720
+ const value = record.after[name];
721
+ if (value == null) {
722
+ // We don't expect this to actually happen.
723
+ // The key should always be present in the "after" record.
724
+ return;
725
+ }
726
+ key[name] = value;
727
+ }
728
+ resnapshot.push({
729
+ table: record.sourceTable,
730
+ key: key
731
+ });
732
+ };
733
+ await this.storage.startBatch({
734
+ logger: this.logger,
735
+ zeroLSN: ZERO_LSN,
736
+ defaultSchema: POSTGRES_DEFAULT_SCHEMA,
737
+ storeCurrentData: true,
738
+ skipExistingRows: false,
739
+ markRecordUnavailable
740
+ }, async (batch) => {
560
741
  // We don't handle any plain keepalive messages while we have transactions.
561
742
  // While we have transactions, we use that to advance the position.
562
743
  // Replication never starts in the middle of a transaction, so this starts as false.
@@ -585,6 +766,9 @@ WHERE oid = $1::regclass`,
585
766
  else if (msg.tag == 'begin') {
586
767
  // This may span multiple transactions in the same chunk, or even across chunks.
587
768
  skipKeepalive = true;
769
+ if (this.oldestUncommittedChange == null) {
770
+ this.oldestUncommittedChange = new Date(Number(msg.commitTime / 1000n));
771
+ }
588
772
  }
589
773
  else if (msg.tag == 'commit') {
590
774
  this.metrics.getCounter(ReplicationMetric.TRANSACTIONS_REPLICATED).add(1);
@@ -593,13 +777,30 @@ WHERE oid = $1::regclass`,
593
777
  // This effectively lets us batch multiple transactions within the same chunk
594
778
  // into a single flush, increasing throughput for many small transactions.
595
779
  skipKeepalive = false;
596
- await batch.commit(msg.lsn, { createEmptyCheckpoints });
780
+ // flush() must be before the resnapshot check - that is
781
+ // typically what reports the resnapshot records.
782
+ await batch.flush({ oldestUncommittedChange: this.oldestUncommittedChange });
783
+ // This _must_ be checked after the flush(), and before
784
+ // commit() or ack(). We never persist the resnapshot list,
785
+ // so we have to process it before marking our progress.
786
+ if (resnapshot.length > 0) {
787
+ await this.resnapshot(batch, resnapshot);
788
+ resnapshot = [];
789
+ }
790
+ const didCommit = await batch.commit(msg.lsn, {
791
+ createEmptyCheckpoints,
792
+ oldestUncommittedChange: this.oldestUncommittedChange
793
+ });
597
794
  await this.ack(msg.lsn, replicationStream);
795
+ if (didCommit) {
796
+ this.oldestUncommittedChange = null;
797
+ this.isStartingReplication = false;
798
+ }
598
799
  }
599
800
  }
600
801
  else {
601
802
  if (count % 100 == 0) {
602
- logger.info(`${this.slot_name} replicating op ${count} ${msg.lsn}`);
803
+ this.logger.info(`Replicating op ${count} ${msg.lsn}`);
603
804
  }
604
805
  /**
605
806
  * If we can see the contents of logical messages, then we can check if a keepalive
@@ -611,7 +812,14 @@ WHERE oid = $1::regclass`,
611
812
  keepAliveDetected = true;
612
813
  }
613
814
  count += 1;
614
- await this.writeChange(batch, msg);
815
+ const flushResult = await this.writeChange(batch, msg);
816
+ if (flushResult != null && resnapshot.length > 0) {
817
+ // If we have large transactions, we also need to flush the resnapshot list
818
+ // periodically.
819
+ // TODO: make sure this bit is actually triggered
820
+ await this.resnapshot(batch, resnapshot);
821
+ resnapshot = [];
822
+ }
615
823
  }
616
824
  }
617
825
  if (!skipKeepalive) {
@@ -623,7 +831,11 @@ WHERE oid = $1::regclass`,
623
831
  // Big caveat: This _must not_ be used to skip individual messages, since this LSN
624
832
  // may be in the middle of the next transaction.
625
833
  // It must only be used to associate checkpoints with LSNs.
626
- await batch.keepalive(chunkLastLsn);
834
+ const didCommit = await batch.keepalive(chunkLastLsn);
835
+ if (didCommit) {
836
+ this.oldestUncommittedChange = null;
837
+ }
838
+ this.isStartingReplication = false;
627
839
  }
628
840
  // We receive chunks with empty messages often (about each second).
629
841
  // Acknowledging here progresses the slot past these and frees up resources.
@@ -649,7 +861,8 @@ WHERE oid = $1::regclass`,
649
861
  if (storageIdentifier.type != lib_postgres.POSTGRES_CONNECTION_TYPE) {
650
862
  return {
651
863
  // Keep the same behaviour as before allowing Postgres storage.
652
- createEmptyCheckpoints: true
864
+ createEmptyCheckpoints: true,
865
+ oldestUncommittedChange: null
653
866
  };
654
867
  }
655
868
  const parsedStorageIdentifier = lib_postgres.utils.decodePostgresSystemIdentifier(storageIdentifier.id);
@@ -665,7 +878,8 @@ WHERE oid = $1::regclass`,
665
878
  * Don't create empty checkpoints if the same Postgres database is used for the data source
666
879
  * and sync bucket storage. Creating empty checkpoints will cause WAL feedback loops.
667
880
  */
668
- createEmptyCheckpoints: replicationIdentifier.database_name != parsedStorageIdentifier.database_name
881
+ createEmptyCheckpoints: replicationIdentifier.database_name != parsedStorageIdentifier.database_name,
882
+ oldestUncommittedChange: null
669
883
  };
670
884
  }
671
885
  /**
@@ -676,6 +890,19 @@ WHERE oid = $1::regclass`,
676
890
  const version = await this.connections.getServerVersion();
677
891
  return version ? version.compareMain('14.0.0') >= 0 : false;
678
892
  }
893
+ async getReplicationLagMillis() {
894
+ if (this.oldestUncommittedChange == null) {
895
+ if (this.isStartingReplication) {
896
+ // We don't have anything to compute replication lag with yet.
897
+ return undefined;
898
+ }
899
+ else {
900
+ // We don't have any uncommitted changes, so replication is up-to-date.
901
+ return 0;
902
+ }
903
+ }
904
+ return Date.now() - this.oldestUncommittedChange.getTime();
905
+ }
679
906
  }
680
907
  async function touch() {
681
908
  // FIXME: The hosted Kubernetes probe does not actually check the timestamp on this.