@powersync/service-module-postgres 0.0.0-dev-20250507154604 → 0.0.0-dev-20250611110033

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. package/CHANGELOG.md +47 -8
  2. package/dist/api/PostgresRouteAPIAdapter.d.ts +1 -1
  3. package/dist/api/PostgresRouteAPIAdapter.js +5 -1
  4. package/dist/api/PostgresRouteAPIAdapter.js.map +1 -1
  5. package/dist/auth/SupabaseKeyCollector.d.ts +3 -10
  6. package/dist/auth/SupabaseKeyCollector.js +6 -4
  7. package/dist/auth/SupabaseKeyCollector.js.map +1 -1
  8. package/dist/replication/SnapshotQuery.d.ts +75 -0
  9. package/dist/replication/SnapshotQuery.js +172 -0
  10. package/dist/replication/SnapshotQuery.js.map +1 -0
  11. package/dist/replication/WalStream.d.ts +37 -4
  12. package/dist/replication/WalStream.js +284 -88
  13. package/dist/replication/WalStream.js.map +1 -1
  14. package/dist/replication/WalStreamReplicationJob.d.ts +2 -0
  15. package/dist/replication/WalStreamReplicationJob.js +10 -3
  16. package/dist/replication/WalStreamReplicationJob.js.map +1 -1
  17. package/dist/replication/WalStreamReplicator.d.ts +1 -0
  18. package/dist/replication/WalStreamReplicator.js +22 -0
  19. package/dist/replication/WalStreamReplicator.js.map +1 -1
  20. package/package.json +12 -12
  21. package/src/api/PostgresRouteAPIAdapter.ts +5 -1
  22. package/src/auth/SupabaseKeyCollector.ts +14 -5
  23. package/src/replication/SnapshotQuery.ts +206 -0
  24. package/src/replication/WalStream.ts +338 -95
  25. package/src/replication/WalStreamReplicationJob.ts +11 -3
  26. package/src/replication/WalStreamReplicator.ts +26 -0
  27. package/test/src/__snapshots__/schema_changes.test.ts.snap +2 -2
  28. package/test/src/checkpoints.test.ts +10 -3
  29. package/test/src/chunked_snapshots.test.ts +156 -0
  30. package/test/src/large_batch.test.ts +5 -154
  31. package/test/src/resuming_snapshots.test.ts +150 -0
  32. package/test/src/schema_changes.test.ts +5 -10
  33. package/test/src/slow_tests.test.ts +13 -30
  34. package/test/src/util.ts +12 -1
  35. package/test/src/validation.test.ts +0 -1
  36. package/test/src/wal_stream.test.ts +4 -9
  37. package/test/src/wal_stream_utils.ts +15 -7
  38. package/tsconfig.tsbuildinfo +1 -1
@@ -4,11 +4,21 @@ import {
4
4
  DatabaseConnectionError,
5
5
  ErrorCode,
6
6
  errors,
7
- logger,
8
- ReplicationAbortedError,
9
- ReplicationAssertionError
7
+ Logger,
8
+ logger as defaultLogger,
9
+ ReplicationAssertionError,
10
+ ReplicationAbortedError
10
11
  } from '@powersync/lib-services-framework';
11
- import { getUuidReplicaIdentityBson, MetricsEngine, SourceEntityDescriptor, storage } from '@powersync/service-core';
12
+ import {
13
+ BucketStorageBatch,
14
+ getUuidReplicaIdentityBson,
15
+ MetricsEngine,
16
+ RelationCache,
17
+ SaveUpdate,
18
+ SourceEntityDescriptor,
19
+ SourceTable,
20
+ storage
21
+ } from '@powersync/service-core';
12
22
  import * as pgwire from '@powersync/service-jpgwire';
13
23
  import { DatabaseInputRow, SqliteRow, SqlSyncRules, TablePattern, toSyncRulesRow } from '@powersync/service-sync-rules';
14
24
  import * as pg_utils from '../utils/pgwire_utils.js';
@@ -17,12 +27,30 @@ import { PgManager } from './PgManager.js';
17
27
  import { getPgOutputRelation, getRelId } from './PgRelation.js';
18
28
  import { checkSourceConfiguration, getReplicationIdentityColumns } from './replication-utils.js';
19
29
  import { ReplicationMetric } from '@powersync/service-types';
30
+ import {
31
+ ChunkedSnapshotQuery,
32
+ IdSnapshotQuery,
33
+ MissingRow,
34
+ PrimaryKeyValue,
35
+ SimpleSnapshotQuery,
36
+ SnapshotQuery
37
+ } from './SnapshotQuery.js';
20
38
 
21
39
  export interface WalStreamOptions {
40
+ logger?: Logger;
22
41
  connections: PgManager;
23
42
  storage: storage.SyncRulesBucketStorage;
24
43
  metrics: MetricsEngine;
25
44
  abort_signal: AbortSignal;
45
+
46
+ /**
47
+ * Override snapshot chunk length (number of rows), for testing.
48
+ *
49
+ * Defaults to 10_000.
50
+ *
51
+ * Note that queries are streamed, so we don't actually keep that much data in memory.
52
+ */
53
+ snapshotChunkLength?: number;
26
54
  }
27
55
 
28
56
  interface InitResult {
@@ -73,6 +101,8 @@ export class WalStream {
73
101
 
74
102
  connection_id = 1;
75
103
 
104
+ private logger: Logger;
105
+
76
106
  private readonly storage: storage.SyncRulesBucketStorage;
77
107
  private readonly metrics: MetricsEngine;
78
108
  private readonly slot_name: string;
@@ -81,17 +111,37 @@ export class WalStream {
81
111
 
82
112
  private abort_signal: AbortSignal;
83
113
 
84
- private relation_cache = new Map<string | number, storage.SourceTable>();
114
+ private relationCache = new RelationCache((relation: number | SourceTable) => {
115
+ if (typeof relation == 'number') {
116
+ return relation;
117
+ }
118
+ return relation.objectId!;
119
+ });
85
120
 
86
121
  private startedStreaming = false;
87
122
 
123
+ private snapshotChunkLength: number;
124
+
125
+ /**
126
+ * Time of the oldest uncommitted change, according to the source db.
127
+ * This is used to determine the replication lag.
128
+ */
129
+ private oldestUncommittedChange: Date | null = null;
130
+ /**
131
+ * Keep track of whether we have done a commit or keepalive yet.
132
+ * We can only compute replication lag if isStartingReplication == false, or oldestUncommittedChange is present.
133
+ */
134
+ private isStartingReplication = true;
135
+
88
136
  constructor(options: WalStreamOptions) {
137
+ this.logger = options.logger ?? defaultLogger;
89
138
  this.storage = options.storage;
90
139
  this.metrics = options.metrics;
91
140
  this.sync_rules = options.storage.getParsedSyncRules({ defaultSchema: POSTGRES_DEFAULT_SCHEMA });
92
141
  this.group_id = options.storage.group_id;
93
142
  this.slot_name = options.storage.slot_name;
94
143
  this.connections = options.connections;
144
+ this.snapshotChunkLength = options.snapshotChunkLength ?? 10_000;
95
145
 
96
146
  this.abort_signal = options.abort_signal;
97
147
  this.abort_signal.addEventListener(
@@ -104,7 +154,7 @@ export class WalStream {
104
154
  const promise = sendKeepAlive(this.connections.pool);
105
155
  promise.catch((e) => {
106
156
  // Failures here are okay - this only speeds up stopping the process.
107
- logger.warn('Failed to ping connection', e);
157
+ this.logger.warn('Failed to ping connection', e);
108
158
  });
109
159
  } else {
110
160
  // If we haven't started streaming yet, it could be due to something like
@@ -183,7 +233,7 @@ export class WalStream {
183
233
  ]
184
234
  });
185
235
  if (rs.rows.length == 0) {
186
- logger.info(`Skipping ${tablePattern.schema}.${name} - not part of ${PUBLICATION_NAME} publication`);
236
+ this.logger.info(`Skipping ${tablePattern.schema}.${name} - not part of ${PUBLICATION_NAME} publication`);
187
237
  continue;
188
238
  }
189
239
 
@@ -215,7 +265,7 @@ export class WalStream {
215
265
  const snapshotDone = status.snapshot_done && status.checkpoint_lsn != null;
216
266
  if (snapshotDone) {
217
267
  // Snapshot is done, but we still need to check the replication slot status
218
- logger.info(`${slotName} Initial replication already done`);
268
+ this.logger.info(`Initial replication already done`);
219
269
  }
220
270
 
221
271
  // Check if replication slot exists
@@ -276,7 +326,7 @@ export class WalStream {
276
326
  // We peek a large number of changes here, to make it more likely to pick up replication slot errors.
277
327
  // For example, "publication does not exist" only occurs here if the peek actually includes changes related
278
328
  // to the slot.
279
- logger.info(`Checking ${slotName}`);
329
+ this.logger.info(`Checking ${slotName}`);
280
330
 
281
331
  // The actual results can be quite large, so we don't actually return everything
282
332
  // due to memory and processing overhead that would create.
@@ -293,11 +343,11 @@ export class WalStream {
293
343
  }
294
344
 
295
345
  // Success
296
- logger.info(`Slot ${slotName} appears healthy`);
346
+ this.logger.info(`Slot ${slotName} appears healthy`);
297
347
  return { needsNewSlot: false };
298
348
  } catch (e) {
299
349
  last_error = e;
300
- logger.warn(`${slotName} Replication slot error`, e);
350
+ this.logger.warn(`Replication slot error`, e);
301
351
 
302
352
  if (this.stopped) {
303
353
  throw e;
@@ -324,7 +374,7 @@ export class WalStream {
324
374
  // Sample: publication "powersync" does not exist
325
375
  // Happens when publication deleted or never created.
326
376
  // Slot must be re-created in this case.
327
- logger.info(`${slotName} is not valid anymore`);
377
+ this.logger.info(`${slotName} is not valid anymore`);
328
378
 
329
379
  return { needsNewSlot: true };
330
380
  }
@@ -336,7 +386,7 @@ export class WalStream {
336
386
  throw new ReplicationAssertionError('Unreachable');
337
387
  }
338
388
 
339
- async estimatedCount(db: pgwire.PgConnection, table: storage.SourceTable): Promise<string> {
389
+ async estimatedCountNumber(db: pgwire.PgConnection, table: storage.SourceTable): Promise<number> {
340
390
  const results = await db.query({
341
391
  statement: `SELECT reltuples::bigint AS estimate
342
392
  FROM pg_class
@@ -345,9 +395,9 @@ WHERE oid = $1::regclass`,
345
395
  });
346
396
  const row = results.rows[0];
347
397
  if ((row?.[0] ?? -1n) == -1n) {
348
- return '?';
398
+ return -1;
349
399
  } else {
350
- return `~${row[0]}`;
400
+ return Number(row[0]);
351
401
  }
352
402
  }
353
403
 
@@ -381,7 +431,7 @@ WHERE oid = $1::regclass`,
381
431
  // The replication slot must be created before we start snapshotting tables.
382
432
  await replicationConnection.query(`CREATE_REPLICATION_SLOT ${slotName} LOGICAL pgoutput`);
383
433
 
384
- logger.info(`Created replication slot ${slotName}`);
434
+ this.logger.info(`Created replication slot ${slotName}`);
385
435
  }
386
436
 
387
437
  await this.initialReplication(db);
@@ -390,24 +440,37 @@ WHERE oid = $1::regclass`,
390
440
  async initialReplication(db: pgwire.PgConnection) {
391
441
  const sourceTables = this.sync_rules.getSourceTables();
392
442
  await this.storage.startBatch(
393
- { zeroLSN: ZERO_LSN, defaultSchema: POSTGRES_DEFAULT_SCHEMA, storeCurrentData: true, skipExistingRows: true },
443
+ {
444
+ logger: this.logger,
445
+ zeroLSN: ZERO_LSN,
446
+ defaultSchema: POSTGRES_DEFAULT_SCHEMA,
447
+ storeCurrentData: true,
448
+ skipExistingRows: true
449
+ },
394
450
  async (batch) => {
451
+ let tablesWithStatus: SourceTable[] = [];
395
452
  for (let tablePattern of sourceTables) {
396
453
  const tables = await this.getQualifiedTableNames(batch, db, tablePattern);
454
+ // Pre-get counts
397
455
  for (let table of tables) {
398
456
  if (table.snapshotComplete) {
399
- logger.info(`${this.slot_name} Skipping ${table.qualifiedName} - snapshot already done`);
457
+ this.logger.info(`Skipping ${table.qualifiedName} - snapshot already done`);
400
458
  continue;
401
459
  }
402
- await this.snapshotTable(batch, db, table);
460
+ const count = await this.estimatedCountNumber(db, table);
461
+ table = await batch.updateTableProgress(table, { totalEstimatedCount: count });
462
+ this.relationCache.update(table);
463
+ tablesWithStatus.push(table);
403
464
 
404
- const rs = await db.query(`select pg_current_wal_lsn() as lsn`);
405
- const tableLsnNotBefore = rs.rows[0][0];
406
- await batch.markSnapshotDone([table], tableLsnNotBefore);
407
- await touch();
465
+ this.logger.info(`To replicate: ${table.qualifiedName} ${table.formatSnapshotProgress()}`);
408
466
  }
409
467
  }
410
468
 
469
+ for (let table of tablesWithStatus) {
470
+ await this.snapshotTableInTx(batch, db, table);
471
+ await touch();
472
+ }
473
+
411
474
  // Always commit the initial snapshot at zero.
412
475
  // This makes sure we don't skip any changes applied before starting this snapshot,
413
476
  // in the case of snapshot retries.
@@ -431,60 +494,147 @@ WHERE oid = $1::regclass`,
431
494
  yield toSyncRulesRow(row);
432
495
  }
433
496
  }
497
+ private async snapshotTableInTx(
498
+ batch: storage.BucketStorageBatch,
499
+ db: pgwire.PgConnection,
500
+ table: storage.SourceTable,
501
+ limited?: PrimaryKeyValue[]
502
+ ): Promise<storage.SourceTable> {
503
+ await db.query('BEGIN');
504
+ try {
505
+ let tableLsnNotBefore: string;
506
+ await this.snapshotTable(batch, db, table, limited);
507
+
508
+ // Get the current LSN.
509
+ // The data will only be consistent once incremental replication
510
+ // has passed that point.
511
+ // We have to get this LSN _after_ we have started the snapshot query.
512
+ const rs = await db.query(`select pg_current_wal_lsn() as lsn`);
513
+ tableLsnNotBefore = rs.rows[0][0];
514
+ await db.query('COMMIT');
515
+ const [resultTable] = await batch.markSnapshotDone([table], tableLsnNotBefore);
516
+ this.relationCache.update(resultTable);
517
+ return resultTable;
518
+ } catch (e) {
519
+ await db.query('ROLLBACK');
520
+ throw e;
521
+ }
522
+ }
523
+
524
+ private async snapshotTable(
525
+ batch: storage.BucketStorageBatch,
526
+ db: pgwire.PgConnection,
527
+ table: storage.SourceTable,
528
+ limited?: PrimaryKeyValue[]
529
+ ) {
530
+ let totalEstimatedCount = table.snapshotStatus?.totalEstimatedCount;
531
+ let at = table.snapshotStatus?.replicatedCount ?? 0;
532
+ let lastCountTime = 0;
533
+ let q: SnapshotQuery;
534
+ // We do streaming on two levels:
535
+ // 1. Coarse level: DELCARE CURSOR, FETCH 10000 at a time.
536
+ // 2. Fine level: Stream chunks from each fetch call.
537
+ if (limited) {
538
+ q = new IdSnapshotQuery(db, table, limited);
539
+ } else if (ChunkedSnapshotQuery.supports(table)) {
540
+ // Single primary key - we can use the primary key for chunking
541
+ const orderByKey = table.replicaIdColumns[0];
542
+ q = new ChunkedSnapshotQuery(db, table, this.snapshotChunkLength, table.snapshotStatus?.lastKey ?? null);
543
+ if (table.snapshotStatus?.lastKey != null) {
544
+ this.logger.info(
545
+ `Replicating ${table.qualifiedName} ${table.formatSnapshotProgress()} - resuming from ${orderByKey.name} > ${(q as ChunkedSnapshotQuery).lastKey}`
546
+ );
547
+ } else {
548
+ this.logger.info(`Replicating ${table.qualifiedName} ${table.formatSnapshotProgress()} - resumable`);
549
+ }
550
+ } else {
551
+ // Fallback case - query the entire table
552
+ this.logger.info(`Replicating ${table.qualifiedName} ${table.formatSnapshotProgress()} - not resumable`);
553
+ q = new SimpleSnapshotQuery(db, table, this.snapshotChunkLength);
554
+ at = 0;
555
+ }
556
+ await q.initialize();
434
557
 
435
- private async snapshotTable(batch: storage.BucketStorageBatch, db: pgwire.PgConnection, table: storage.SourceTable) {
436
- logger.info(`${this.slot_name} Replicating ${table.qualifiedName}`);
437
- const estimatedCount = await this.estimatedCount(db, table);
438
- let at = 0;
439
- let lastLogIndex = 0;
440
- const cursor = db.stream({ statement: `SELECT * FROM ${table.escapedIdentifier}` });
441
558
  let columns: { i: number; name: string }[] = [];
442
- // pgwire streams rows in chunks.
443
- // These chunks can be quite small (as little as 16KB), so we don't flush chunks automatically.
444
-
445
- for await (let chunk of cursor) {
446
- if (chunk.tag == 'RowDescription') {
447
- let i = 0;
448
- columns = chunk.payload.map((c) => {
449
- return { i: i++, name: c.name };
559
+ let hasRemainingData = true;
560
+ while (hasRemainingData) {
561
+ // Fetch 10k at a time.
562
+ // The balance here is between latency overhead per FETCH call,
563
+ // and not spending too much time on each FETCH call.
564
+ // We aim for a couple of seconds on each FETCH call.
565
+ const cursor = q.nextChunk();
566
+ hasRemainingData = false;
567
+ // pgwire streams rows in chunks.
568
+ // These chunks can be quite small (as little as 16KB), so we don't flush chunks automatically.
569
+ // There are typically 100-200 rows per chunk.
570
+ for await (let chunk of cursor) {
571
+ if (chunk.tag == 'RowDescription') {
572
+ // We get a RowDescription for each FETCH call, but they should
573
+ // all be the same.
574
+ let i = 0;
575
+ columns = chunk.payload.map((c) => {
576
+ return { i: i++, name: c.name };
577
+ });
578
+ continue;
579
+ }
580
+
581
+ const rows = chunk.rows.map((row) => {
582
+ let q: DatabaseInputRow = {};
583
+ for (let c of columns) {
584
+ q[c.name] = row[c.i];
585
+ }
586
+ return q;
450
587
  });
451
- continue;
452
- }
588
+ if (rows.length > 0) {
589
+ hasRemainingData = true;
590
+ }
453
591
 
454
- const rows = chunk.rows.map((row) => {
455
- let q: DatabaseInputRow = {};
456
- for (let c of columns) {
457
- q[c.name] = row[c.i];
592
+ for (const record of WalStream.getQueryData(rows)) {
593
+ // This auto-flushes when the batch reaches its size limit
594
+ await batch.save({
595
+ tag: storage.SaveOperationTag.INSERT,
596
+ sourceTable: table,
597
+ before: undefined,
598
+ beforeReplicaId: undefined,
599
+ after: record,
600
+ afterReplicaId: getUuidReplicaIdentityBson(record, table.replicaIdColumns)
601
+ });
458
602
  }
459
- return q;
460
- });
461
- if (rows.length > 0 && at - lastLogIndex >= 5000) {
462
- logger.info(`${this.slot_name} Replicating ${table.qualifiedName} ${at}/${estimatedCount}`);
463
- lastLogIndex = at;
464
- }
465
- if (this.abort_signal.aborted) {
466
- throw new ReplicationAbortedError(`Aborted initial replication of ${this.slot_name}`);
603
+
604
+ at += rows.length;
605
+ this.metrics.getCounter(ReplicationMetric.ROWS_REPLICATED).add(rows.length);
606
+
607
+ await touch();
467
608
  }
468
609
 
469
- for (const record of WalStream.getQueryData(rows)) {
470
- // This auto-flushes when the batch reaches its size limit
471
- await batch.save({
472
- tag: storage.SaveOperationTag.INSERT,
473
- sourceTable: table,
474
- before: undefined,
475
- beforeReplicaId: undefined,
476
- after: record,
477
- afterReplicaId: getUuidReplicaIdentityBson(record, table.replicaIdColumns)
610
+ // Important: flush before marking progress
611
+ await batch.flush();
612
+ if (limited == null) {
613
+ let lastKey: Uint8Array | undefined;
614
+ if (q instanceof ChunkedSnapshotQuery) {
615
+ lastKey = q.getLastKeySerialized();
616
+ }
617
+ if (lastCountTime < performance.now() - 10 * 60 * 1000) {
618
+ totalEstimatedCount = await this.estimatedCountNumber(db, table);
619
+ lastCountTime = performance.now();
620
+ }
621
+ table = await batch.updateTableProgress(table, {
622
+ lastKey: lastKey,
623
+ replicatedCount: at,
624
+ totalEstimatedCount: totalEstimatedCount
478
625
  });
479
- }
626
+ this.relationCache.update(table);
480
627
 
481
- at += rows.length;
482
- this.metrics.getCounter(ReplicationMetric.ROWS_REPLICATED).add(rows.length);
628
+ this.logger.info(`Replicating ${table.qualifiedName} ${table.formatSnapshotProgress()}`);
629
+ } else {
630
+ this.logger.info(`Replicating ${table.qualifiedName} ${at}/${limited.length} for resnapshot`);
631
+ }
483
632
 
484
- await touch();
633
+ if (this.abort_signal.aborted) {
634
+ // We only abort after flushing
635
+ throw new ReplicationAbortedError(`Initial replication interrupted`);
636
+ }
485
637
  }
486
-
487
- await batch.flush();
488
638
  }
489
639
 
490
640
  async handleRelation(batch: storage.BucketStorageBatch, descriptor: SourceEntityDescriptor, snapshot: boolean) {
@@ -498,7 +648,7 @@ WHERE oid = $1::regclass`,
498
648
  entity_descriptor: descriptor,
499
649
  sync_rules: this.sync_rules
500
650
  });
501
- this.relation_cache.set(descriptor.objectId, result.table);
651
+ this.relationCache.update(result.table);
502
652
 
503
653
  // Drop conflicting tables. This includes for example renamed tables.
504
654
  await batch.drop(result.dropTables);
@@ -513,40 +663,59 @@ WHERE oid = $1::regclass`,
513
663
  // Truncate this table, in case a previous snapshot was interrupted.
514
664
  await batch.truncate([result.table]);
515
665
 
516
- let lsn: string = ZERO_LSN;
517
666
  // Start the snapshot inside a transaction.
518
667
  // We use a dedicated connection for this.
519
668
  const db = await this.connections.snapshotConnection();
520
669
  try {
521
- await db.query('BEGIN');
522
- try {
523
- await this.snapshotTable(batch, db, result.table);
524
-
525
- // Get the current LSN.
526
- // The data will only be consistent once incremental replication
527
- // has passed that point.
528
- // We have to get this LSN _after_ we have started the snapshot query.
529
- const rs = await db.query(`select pg_current_wal_lsn() as lsn`);
530
- lsn = rs.rows[0][0];
531
-
532
- await db.query('COMMIT');
533
- } catch (e) {
534
- await db.query('ROLLBACK');
535
- // TODO: Wrap with custom error type
536
- throw e;
537
- }
670
+ const table = await this.snapshotTableInTx(batch, db, result.table);
671
+ // After the table snapshot, we wait for replication to catch up.
672
+ // To make sure there is actually something to replicate, we send a keepalive
673
+ // message.
674
+ await sendKeepAlive(db);
675
+ return table;
538
676
  } finally {
539
677
  await db.end();
540
678
  }
541
- const [table] = await batch.markSnapshotDone([result.table], lsn);
542
- return table;
543
679
  }
544
680
 
545
681
  return result.table;
546
682
  }
547
683
 
684
+ /**
685
+ * Process rows that have missing TOAST values.
686
+ *
687
+ * This can happen during edge cases in the chunked intial snapshot process.
688
+ *
689
+ * We handle this similar to an inline table snapshot, but limited to the specific
690
+ * set of rows.
691
+ */
692
+ private async resnapshot(batch: BucketStorageBatch, rows: MissingRow[]) {
693
+ const byTable = new Map<number, MissingRow[]>();
694
+ for (let row of rows) {
695
+ const relId = row.table.objectId as number; // always a number for postgres
696
+ if (!byTable.has(relId)) {
697
+ byTable.set(relId, []);
698
+ }
699
+ byTable.get(relId)!.push(row);
700
+ }
701
+ const db = await this.connections.snapshotConnection();
702
+ try {
703
+ for (let rows of byTable.values()) {
704
+ const table = rows[0].table;
705
+ await this.snapshotTableInTx(
706
+ batch,
707
+ db,
708
+ table,
709
+ rows.map((r) => r.key)
710
+ );
711
+ }
712
+ } finally {
713
+ await db.end();
714
+ }
715
+ }
716
+
548
717
  private getTable(relationId: number): storage.SourceTable {
549
- const table = this.relation_cache.get(relationId);
718
+ const table = this.relationCache.get(relationId);
550
719
  if (table == null) {
551
720
  // We should always receive a replication message before the relation is used.
552
721
  // If we can't find it, it's a bug.
@@ -565,7 +734,7 @@ WHERE oid = $1::regclass`,
565
734
  if (msg.tag == 'insert' || msg.tag == 'update' || msg.tag == 'delete') {
566
735
  const table = this.getTable(getRelId(msg.relation));
567
736
  if (!table.syncAny) {
568
- logger.debug(`Table ${table.qualifiedName} not used in sync rules - skipping`);
737
+ this.logger.debug(`Table ${table.qualifiedName} not used in sync rules - skipping`);
569
738
  return null;
570
739
  }
571
740
 
@@ -673,8 +842,39 @@ WHERE oid = $1::regclass`,
673
842
  // Auto-activate as soon as initial replication is done
674
843
  await this.storage.autoActivate();
675
844
 
845
+ let resnapshot: { table: storage.SourceTable; key: PrimaryKeyValue }[] = [];
846
+
847
+ const markRecordUnavailable = (record: SaveUpdate) => {
848
+ if (!IdSnapshotQuery.supports(record.sourceTable)) {
849
+ // If it's not supported, it's also safe to ignore
850
+ return;
851
+ }
852
+ let key: PrimaryKeyValue = {};
853
+ for (let column of record.sourceTable.replicaIdColumns) {
854
+ const name = column.name;
855
+ const value = record.after[name];
856
+ if (value == null) {
857
+ // We don't expect this to actually happen.
858
+ // The key should always be present in the "after" record.
859
+ return;
860
+ }
861
+ key[name] = value;
862
+ }
863
+ resnapshot.push({
864
+ table: record.sourceTable,
865
+ key: key
866
+ });
867
+ };
868
+
676
869
  await this.storage.startBatch(
677
- { zeroLSN: ZERO_LSN, defaultSchema: POSTGRES_DEFAULT_SCHEMA, storeCurrentData: true, skipExistingRows: false },
870
+ {
871
+ logger: this.logger,
872
+ zeroLSN: ZERO_LSN,
873
+ defaultSchema: POSTGRES_DEFAULT_SCHEMA,
874
+ storeCurrentData: true,
875
+ skipExistingRows: false,
876
+ markRecordUnavailable
877
+ },
678
878
  async (batch) => {
679
879
  // We don't handle any plain keepalive messages while we have transactions.
680
880
  // While we have transactions, we use that to advance the position.
@@ -708,6 +908,9 @@ WHERE oid = $1::regclass`,
708
908
  } else if (msg.tag == 'begin') {
709
909
  // This may span multiple transactions in the same chunk, or even across chunks.
710
910
  skipKeepalive = true;
911
+ if (this.oldestUncommittedChange == null) {
912
+ this.oldestUncommittedChange = new Date(Number(msg.commitTime / 1000n));
913
+ }
711
914
  } else if (msg.tag == 'commit') {
712
915
  this.metrics.getCounter(ReplicationMetric.TRANSACTIONS_REPLICATED).add(1);
713
916
  if (msg == lastCommit) {
@@ -715,12 +918,29 @@ WHERE oid = $1::regclass`,
715
918
  // This effectively lets us batch multiple transactions within the same chunk
716
919
  // into a single flush, increasing throughput for many small transactions.
717
920
  skipKeepalive = false;
718
- await batch.commit(msg.lsn!, { createEmptyCheckpoints });
921
+ // flush() must be before the resnapshot check - that is
922
+ // typically what reports the resnapshot records.
923
+ await batch.flush();
924
+ // This _must_ be checked after the flush(), and before
925
+ // commit() or ack(). We never persist the resnapshot list,
926
+ // so we have to process it before marking our progress.
927
+ if (resnapshot.length > 0) {
928
+ await this.resnapshot(batch, resnapshot);
929
+ resnapshot = [];
930
+ }
931
+ const didCommit = await batch.commit(msg.lsn!, {
932
+ createEmptyCheckpoints,
933
+ oldestUncommittedChange: this.oldestUncommittedChange
934
+ });
719
935
  await this.ack(msg.lsn!, replicationStream);
936
+ if (didCommit) {
937
+ this.oldestUncommittedChange = null;
938
+ this.isStartingReplication = false;
939
+ }
720
940
  }
721
941
  } else {
722
942
  if (count % 100 == 0) {
723
- logger.info(`${this.slot_name} replicating op ${count} ${msg.lsn}`);
943
+ this.logger.info(`Replicating op ${count} ${msg.lsn}`);
724
944
  }
725
945
 
726
946
  /**
@@ -734,7 +954,14 @@ WHERE oid = $1::regclass`,
734
954
  }
735
955
 
736
956
  count += 1;
737
- await this.writeChange(batch, msg);
957
+ const flushResult = await this.writeChange(batch, msg);
958
+ if (flushResult != null && resnapshot.length > 0) {
959
+ // If we have large transactions, we also need to flush the resnapshot list
960
+ // periodically.
961
+ // TODO: make sure this bit is actually triggered
962
+ await this.resnapshot(batch, resnapshot);
963
+ resnapshot = [];
964
+ }
738
965
  }
739
966
  }
740
967
 
@@ -749,6 +976,7 @@ WHERE oid = $1::regclass`,
749
976
  // may be in the middle of the next transaction.
750
977
  // It must only be used to associate checkpoints with LSNs.
751
978
  await batch.keepalive(chunkLastLsn);
979
+ this.isStartingReplication = false;
752
980
  }
753
981
 
754
982
  // We receive chunks with empty messages often (about each second).
@@ -781,7 +1009,8 @@ WHERE oid = $1::regclass`,
781
1009
  if (storageIdentifier.type != lib_postgres.POSTGRES_CONNECTION_TYPE) {
782
1010
  return {
783
1011
  // Keep the same behaviour as before allowing Postgres storage.
784
- createEmptyCheckpoints: true
1012
+ createEmptyCheckpoints: true,
1013
+ oldestUncommittedChange: null
785
1014
  };
786
1015
  }
787
1016
 
@@ -804,7 +1033,8 @@ WHERE oid = $1::regclass`,
804
1033
  * Don't create empty checkpoints if the same Postgres database is used for the data source
805
1034
  * and sync bucket storage. Creating empty checkpoints will cause WAL feedback loops.
806
1035
  */
807
- createEmptyCheckpoints: replicationIdentifier.database_name != parsedStorageIdentifier.database_name
1036
+ createEmptyCheckpoints: replicationIdentifier.database_name != parsedStorageIdentifier.database_name,
1037
+ oldestUncommittedChange: null
808
1038
  };
809
1039
  }
810
1040
 
@@ -816,6 +1046,19 @@ WHERE oid = $1::regclass`,
816
1046
  const version = await this.connections.getServerVersion();
817
1047
  return version ? version.compareMain('14.0.0') >= 0 : false;
818
1048
  }
1049
+
1050
+ async getReplicationLagMillis(): Promise<number | undefined> {
1051
+ if (this.oldestUncommittedChange == null) {
1052
+ if (this.isStartingReplication) {
1053
+ // We don't have anything to compute replication lag with yet.
1054
+ return undefined;
1055
+ } else {
1056
+ // We don't have any uncommitted changes, so replication is up-to-date.
1057
+ return 0;
1058
+ }
1059
+ }
1060
+ return Date.now() - this.oldestUncommittedChange.getTime();
1061
+ }
819
1062
  }
820
1063
 
821
1064
  async function touch() {