@powersync/service-module-postgres 0.5.1 → 0.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,8 @@
1
1
  import * as lib_postgres from '@powersync/lib-service-postgres';
2
2
  import {
3
3
  container,
4
+ DatabaseConnectionError,
5
+ ErrorCode,
4
6
  errors,
5
7
  logger,
6
8
  ReplicationAbortedError,
@@ -15,10 +17,6 @@ import { PgManager } from './PgManager.js';
15
17
  import { getPgOutputRelation, getRelId } from './PgRelation.js';
16
18
  import { checkSourceConfiguration, getReplicationIdentityColumns } from './replication-utils.js';
17
19
 
18
- export const ZERO_LSN = '00000000/00000000';
19
- export const PUBLICATION_NAME = 'powersync';
20
- export const POSTGRES_DEFAULT_SCHEMA = 'public';
21
-
22
20
  export interface WalStreamOptions {
23
21
  connections: PgManager;
24
22
  storage: storage.SyncRulesBucketStorage;
@@ -32,6 +30,35 @@ interface InitResult {
32
30
  needsNewSlot: boolean;
33
31
  }
34
32
 
33
+ export const ZERO_LSN = '00000000/00000000';
34
+ export const PUBLICATION_NAME = 'powersync';
35
+ export const POSTGRES_DEFAULT_SCHEMA = 'public';
36
+
37
+ export const KEEPALIVE_CONTENT = 'ping';
38
+ export const KEEPALIVE_BUFFER = Buffer.from(KEEPALIVE_CONTENT);
39
+ export const KEEPALIVE_STATEMENT: pgwire.Statement = {
40
+ statement: /* sql */ `
41
+ SELECT
42
+ *
43
+ FROM
44
+ pg_logical_emit_message(FALSE, 'powersync', $1)
45
+ `,
46
+ params: [{ type: 'varchar', value: KEEPALIVE_CONTENT }]
47
+ } as const;
48
+
49
+ export const isKeepAliveMessage = (msg: pgwire.PgoutputMessage) => {
50
+ return (
51
+ msg.tag == 'message' &&
52
+ msg.prefix == 'powersync' &&
53
+ msg.content &&
54
+ Buffer.from(msg.content).equals(KEEPALIVE_BUFFER)
55
+ );
56
+ };
57
+
58
+ export const sendKeepAlive = async (db: pgwire.PgClient) => {
59
+ await lib_postgres.retriedQuery(db, KEEPALIVE_STATEMENT);
60
+ };
61
+
35
62
  export class MissingReplicationSlotError extends Error {
36
63
  constructor(message: string) {
37
64
  super(message);
@@ -71,10 +98,7 @@ export class WalStream {
71
98
  // Ping to speed up cancellation of streaming replication
72
99
  // We're not using pg_snapshot here, since it could be in the middle of
73
100
  // an initial replication transaction.
74
- const promise = lib_postgres.retriedQuery(
75
- this.connections.pool,
76
- `SELECT * FROM pg_logical_emit_message(false, 'powersync', 'ping')`
77
- );
101
+ const promise = sendKeepAlive(this.connections.pool);
78
102
  promise.catch((e) => {
79
103
  // Failures here are okay - this only speeds up stopping the process.
80
104
  logger.warn('Failed to ping connection', e);
@@ -180,6 +204,7 @@ export class WalStream {
180
204
 
181
205
  async initSlot(): Promise<InitResult> {
182
206
  await checkSourceConfiguration(this.connections.pool, PUBLICATION_NAME);
207
+ await this.ensureStorageCompatibility();
183
208
 
184
209
  const slotName = this.slot_name;
185
210
 
@@ -382,6 +407,15 @@ WHERE oid = $1::regclass`,
382
407
  await batch.commit(ZERO_LSN);
383
408
  }
384
409
  );
410
+ /**
411
+ * Send a keepalive message after initial replication.
412
+ * In some edge cases we wait for a keepalive after the initial snapshot.
413
+ * If we don't explicitly check the contents of keepalive messages then a keepalive is detected
414
+ * rather quickly after initial replication - perhaps due to other WAL events.
415
+ * If we do explicitly check the contents of messages, we need an actual keepalive payload in order
416
+ * to advance the active sync rules LSN.
417
+ */
418
+ await sendKeepAlive(db);
385
419
  }
386
420
 
387
421
  static *getQueryData(results: Iterable<DatabaseInputRow>): Generator<SqliteRow> {
@@ -599,13 +633,33 @@ WHERE oid = $1::regclass`,
599
633
  async streamChanges(replicationConnection: pgwire.PgConnection) {
600
634
  // When changing any logic here, check /docs/wal-lsns.md.
601
635
 
636
+ const { createEmptyCheckpoints } = await this.ensureStorageCompatibility();
637
+
638
+ const replicationOptions: Record<string, string> = {
639
+ proto_version: '1',
640
+ publication_names: PUBLICATION_NAME
641
+ };
642
+
643
+ /**
644
+ * Viewing the contents of logical messages emitted with `pg_logical_emit_message`
645
+ * is only supported on Postgres >= 14.0.
646
+ * https://www.postgresql.org/docs/14/protocol-logical-replication.html
647
+ */
648
+ const exposesLogicalMessages = await this.checkLogicalMessageSupport();
649
+ if (exposesLogicalMessages) {
650
+ /**
651
+ * Only add this option if the Postgres server supports it.
652
+ * Adding the option to a server that doesn't support it will throw an exception when starting logical replication.
653
+ * Error: `unrecognized pgoutput option: messages`
654
+ */
655
+ replicationOptions['messages'] = 'true';
656
+ }
657
+
602
658
  const replicationStream = replicationConnection.logicalReplication({
603
659
  slot: this.slot_name,
604
- options: {
605
- proto_version: '1',
606
- publication_names: PUBLICATION_NAME
607
- }
660
+ options: replicationOptions
608
661
  });
662
+
609
663
  this.startedStreaming = true;
610
664
 
611
665
  // Auto-activate as soon as initial replication is done
@@ -628,6 +682,15 @@ WHERE oid = $1::regclass`,
628
682
  // chunkLastLsn may come from normal messages in the chunk,
629
683
  // or from a PrimaryKeepalive message.
630
684
  const { messages, lastLsn: chunkLastLsn } = chunk;
685
+
686
+ /**
687
+ * We can check if an explicit keepalive was sent if `exposesLogicalMessages == true`.
688
+ * If we can't check the logical messages, we should assume a keepalive if we
689
+ * receive an empty array of messages in a replication event.
690
+ */
691
+ const assumeKeepAlive = !exposesLogicalMessages;
692
+ let keepAliveDetected = false;
693
+
631
694
  for (const msg of messages) {
632
695
  if (msg.tag == 'relation') {
633
696
  await this.handleRelation(batch, getPgOutputRelation(msg), true);
@@ -636,27 +699,44 @@ WHERE oid = $1::regclass`,
636
699
  } else if (msg.tag == 'commit') {
637
700
  Metrics.getInstance().transactions_replicated_total.add(1);
638
701
  inTx = false;
639
- await batch.commit(msg.lsn!);
702
+ await batch.commit(msg.lsn!, { createEmptyCheckpoints });
640
703
  await this.ack(msg.lsn!, replicationStream);
641
704
  } else {
642
705
  if (count % 100 == 0) {
643
706
  logger.info(`${this.slot_name} replicating op ${count} ${msg.lsn}`);
644
707
  }
645
708
 
709
+ /**
710
+ * If we can see the contents of logical messages, then we can check if a keepalive
711
+ * message is present. We only perform a keepalive (below) if we explicitly detect a keepalive message.
712
+ * If we can't see the contents of logical messages, then we should assume a keepalive is required
713
+ * due to the default value of `assumeKeepalive`.
714
+ */
715
+ if (exposesLogicalMessages && isKeepAliveMessage(msg)) {
716
+ keepAliveDetected = true;
717
+ }
718
+
646
719
  count += 1;
647
720
  await this.writeChange(batch, msg);
648
721
  }
649
722
  }
650
723
 
651
724
  if (!inTx) {
652
- // In a transaction, we ack and commit according to the transaction progress.
653
- // Outside transactions, we use the PrimaryKeepalive messages to advance progress.
654
- // Big caveat: This _must not_ be used to skip individual messages, since this LSN
655
- // may be in the middle of the next transaction.
656
- // It must only be used to associate checkpoints with LSNs.
657
- if (await batch.keepalive(chunkLastLsn)) {
658
- await this.ack(chunkLastLsn, replicationStream);
725
+ if (assumeKeepAlive || keepAliveDetected) {
726
+ // Reset the detection flag.
727
+ keepAliveDetected = false;
728
+
729
+ // In a transaction, we ack and commit according to the transaction progress.
730
+ // Outside transactions, we use the PrimaryKeepalive messages to advance progress.
731
+ // Big caveat: This _must not_ be used to skip individual messages, since this LSN
732
+ // may be in the middle of the next transaction.
733
+ // It must only be used to associate checkpoints with LSNs.
734
+ await batch.keepalive(chunkLastLsn);
659
735
  }
736
+
737
+ // We receive chunks with empty messages often (about each second).
738
+ // Acknowledging here progresses the slot past these and frees up resources.
739
+ await this.ack(chunkLastLsn, replicationStream);
660
740
  }
661
741
 
662
742
  Metrics.getInstance().chunks_replicated_total.add(1);
@@ -672,6 +752,53 @@ WHERE oid = $1::regclass`,
672
752
 
673
753
  replicationStream.ack(lsn);
674
754
  }
755
+
756
+ /**
757
+ * Ensures that the storage is compatible with the replication connection.
758
+ * @throws {DatabaseConnectionError} If the storage is not compatible with the replication connection.
759
+ */
760
+ protected async ensureStorageCompatibility(): Promise<storage.ResolvedBucketBatchCommitOptions> {
761
+ const supportsLogicalMessages = await this.checkLogicalMessageSupport();
762
+
763
+ const storageIdentifier = await this.storage.factory.getSystemIdentifier();
764
+ if (storageIdentifier.type != lib_postgres.POSTGRES_CONNECTION_TYPE) {
765
+ return {
766
+ // Keep the same behaviour as before allowing Postgres storage.
767
+ createEmptyCheckpoints: true
768
+ };
769
+ }
770
+
771
+ const parsedStorageIdentifier = lib_postgres.utils.decodePostgresSystemIdentifier(storageIdentifier.id);
772
+ /**
773
+ * Check if the same server is being used for both the sync bucket storage and the logical replication.
774
+ */
775
+ const replicationIdentifier = await lib_postgres.utils.queryPostgresSystemIdentifier(this.connections.pool);
776
+
777
+ if (!supportsLogicalMessages && replicationIdentifier.server_id == parsedStorageIdentifier.server_id) {
778
+ throw new DatabaseConnectionError(
779
+ ErrorCode.PSYNC_S1144,
780
+ `Separate Postgres servers are required for the replication source and sync bucket storage when using Postgres versions below 14.0.`,
781
+ new Error('Postgres version is below 14')
782
+ );
783
+ }
784
+
785
+ return {
786
+ /**
787
+ * Don't create empty checkpoints if the same Postgres database is used for the data source
788
+ * and sync bucket storage. Creating empty checkpoints will cause WAL feedback loops.
789
+ */
790
+ createEmptyCheckpoints: replicationIdentifier.database_name != parsedStorageIdentifier.database_name
791
+ };
792
+ }
793
+
794
+ /**
795
+ * Check if the replication connection Postgres server supports
796
+ * viewing the contents of logical replication messages.
797
+ */
798
+ protected async checkLogicalMessageSupport() {
799
+ const version = await this.connections.getServerVersion();
800
+ return version ? version.compareMain('14.0.0') >= 0 : false;
801
+ }
675
802
  }
676
803
 
677
804
  async function touch() {
@@ -1,6 +1,6 @@
1
1
  import { container } from '@powersync/lib-services-framework';
2
2
  import { PgManager } from './PgManager.js';
3
- import { MissingReplicationSlotError, WalStream } from './WalStream.js';
3
+ import { MissingReplicationSlotError, sendKeepAlive, WalStream } from './WalStream.js';
4
4
 
5
5
  import { replication } from '@powersync/service-core';
6
6
  import { ConnectionManagerFactory } from './ConnectionManagerFactory.js';
@@ -37,7 +37,7 @@ export class WalStreamReplicationJob extends replication.AbstractReplicationJob
37
37
  */
38
38
  async keepAlive() {
39
39
  try {
40
- await this.connectionManager.pool.query(`SELECT * FROM pg_logical_emit_message(false, 'powersync', 'ping')`);
40
+ await sendKeepAlive(this.connectionManager.pool);
41
41
  } catch (e) {
42
42
  this.logger.warn(`KeepAlive failed, unable to post to WAL`, e);
43
43
  }
@@ -0,0 +1,35 @@
1
+ import * as postgres_storage from '@powersync/service-module-postgres-storage';
2
+ import { describe, expect, test } from 'vitest';
3
+ import { env } from './env.js';
4
+ import { WalStreamTestContext } from './wal_stream_utils.js';
5
+
6
+ describe.skipIf(!env.TEST_POSTGRES_STORAGE)('replication storage combination - postgres', function () {
7
+ test('should allow the same Postgres cluster to be used for data and storage', async () => {
8
+ // Use the same cluster for the storage as the data source
9
+ await using context = await WalStreamTestContext.open(
10
+ postgres_storage.PostgresTestStorageFactoryGenerator({
11
+ url: env.PG_TEST_URL
12
+ }),
13
+ { doNotClear: false }
14
+ );
15
+
16
+ await context.updateSyncRules(/* yaml */
17
+ ` bucket_definitions:
18
+ global:
19
+ data:
20
+ - SELECT * FROM "test_data" `);
21
+
22
+ const { pool, connectionManager } = context;
23
+
24
+ const sourceVersion = await connectionManager.getServerVersion();
25
+
26
+ await pool.query(`CREATE TABLE test_data(id text primary key, description text, other text)`);
27
+
28
+ if (sourceVersion!.compareMain('14.0.0') < 0) {
29
+ await expect(context.replicateSnapshot()).rejects.toThrow();
30
+ } else {
31
+ // Should resolve
32
+ await context.replicateSnapshot();
33
+ }
34
+ });
35
+ });