@powersync/service-module-postgres 0.19.2 → 0.19.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. package/dist/api/PostgresRouteAPIAdapter.d.ts +1 -1
  2. package/dist/api/PostgresRouteAPIAdapter.js +63 -72
  3. package/dist/api/PostgresRouteAPIAdapter.js.map +1 -1
  4. package/dist/module/PostgresModule.js.map +1 -1
  5. package/dist/replication/MissingReplicationSlotError.d.ts +41 -0
  6. package/dist/replication/MissingReplicationSlotError.js +33 -0
  7. package/dist/replication/MissingReplicationSlotError.js.map +1 -0
  8. package/dist/replication/PostgresErrorRateLimiter.js +1 -1
  9. package/dist/replication/PostgresErrorRateLimiter.js.map +1 -1
  10. package/dist/replication/SnapshotQuery.js +2 -2
  11. package/dist/replication/SnapshotQuery.js.map +1 -1
  12. package/dist/replication/WalStream.d.ts +37 -14
  13. package/dist/replication/WalStream.js +145 -41
  14. package/dist/replication/WalStream.js.map +1 -1
  15. package/dist/replication/WalStreamReplicationJob.d.ts +1 -1
  16. package/dist/replication/WalStreamReplicationJob.js +7 -4
  17. package/dist/replication/WalStreamReplicationJob.js.map +1 -1
  18. package/dist/replication/WalStreamReplicator.d.ts +0 -1
  19. package/dist/replication/WalStreamReplicator.js +0 -22
  20. package/dist/replication/WalStreamReplicator.js.map +1 -1
  21. package/dist/replication/replication-index.d.ts +3 -1
  22. package/dist/replication/replication-index.js +3 -1
  23. package/dist/replication/replication-index.js.map +1 -1
  24. package/dist/replication/replication-utils.d.ts +3 -11
  25. package/dist/replication/replication-utils.js +101 -164
  26. package/dist/replication/replication-utils.js.map +1 -1
  27. package/dist/replication/wal-budget-utils.d.ts +23 -0
  28. package/dist/replication/wal-budget-utils.js +57 -0
  29. package/dist/replication/wal-budget-utils.js.map +1 -0
  30. package/dist/types/registry.js +1 -1
  31. package/dist/types/registry.js.map +1 -1
  32. package/package.json +15 -11
  33. package/sql/check-source-configuration.plpgsql +13 -0
  34. package/sql/debug-tables-info-batched.plpgsql +230 -0
  35. package/CHANGELOG.md +0 -843
  36. package/src/api/PostgresRouteAPIAdapter.ts +0 -356
  37. package/src/index.ts +0 -1
  38. package/src/module/PostgresModule.ts +0 -122
  39. package/src/replication/ConnectionManagerFactory.ts +0 -33
  40. package/src/replication/PgManager.ts +0 -122
  41. package/src/replication/PgRelation.ts +0 -41
  42. package/src/replication/PostgresErrorRateLimiter.ts +0 -48
  43. package/src/replication/SnapshotQuery.ts +0 -213
  44. package/src/replication/WalStream.ts +0 -1157
  45. package/src/replication/WalStreamReplicationJob.ts +0 -138
  46. package/src/replication/WalStreamReplicator.ts +0 -79
  47. package/src/replication/replication-index.ts +0 -5
  48. package/src/replication/replication-utils.ts +0 -398
  49. package/src/types/registry.ts +0 -275
  50. package/src/types/resolver.ts +0 -227
  51. package/src/types/types.ts +0 -44
  52. package/src/utils/application-name.ts +0 -8
  53. package/src/utils/migration_lib.ts +0 -80
  54. package/src/utils/populate_test_data.ts +0 -37
  55. package/src/utils/populate_test_data_worker.ts +0 -53
  56. package/src/utils/postgres_version.ts +0 -8
  57. package/test/src/checkpoints.test.ts +0 -86
  58. package/test/src/chunked_snapshots.test.ts +0 -161
  59. package/test/src/env.ts +0 -11
  60. package/test/src/large_batch.test.ts +0 -241
  61. package/test/src/pg_test.test.ts +0 -729
  62. package/test/src/resuming_snapshots.test.ts +0 -160
  63. package/test/src/route_api_adapter.test.ts +0 -62
  64. package/test/src/schema_changes.test.ts +0 -655
  65. package/test/src/setup.ts +0 -12
  66. package/test/src/slow_tests.test.ts +0 -519
  67. package/test/src/storage_combination.test.ts +0 -35
  68. package/test/src/types/registry.test.ts +0 -149
  69. package/test/src/util.ts +0 -151
  70. package/test/src/validation.test.ts +0 -63
  71. package/test/src/wal_stream.test.ts +0 -607
  72. package/test/src/wal_stream_utils.ts +0 -284
  73. package/test/tsconfig.json +0 -27
  74. package/tsconfig.json +0 -34
  75. package/tsconfig.tsbuildinfo +0 -1
  76. package/vitest.config.ts +0 -3
@@ -1,1157 +0,0 @@
1
- import * as lib_postgres from '@powersync/lib-service-postgres';
2
- import {
3
- container,
4
- DatabaseConnectionError,
5
- logger as defaultLogger,
6
- ErrorCode,
7
- Logger,
8
- ReplicationAbortedError,
9
- ReplicationAssertionError
10
- } from '@powersync/lib-services-framework';
11
- import {
12
- BucketStorageBatch,
13
- getUuidReplicaIdentityBson,
14
- MetricsEngine,
15
- RelationCache,
16
- SaveUpdate,
17
- SourceEntityDescriptor,
18
- SourceTable,
19
- storage
20
- } from '@powersync/service-core';
21
- import * as pgwire from '@powersync/service-jpgwire';
22
- import {
23
- applyValueContext,
24
- CompatibilityContext,
25
- HydratedSyncRules,
26
- SqliteInputRow,
27
- SqliteInputValue,
28
- SqliteRow,
29
- TablePattern,
30
- ToastableSqliteRow,
31
- toSyncRulesValue
32
- } from '@powersync/service-sync-rules';
33
-
34
- import { ReplicationMetric } from '@powersync/service-types';
35
- import { PostgresTypeResolver } from '../types/resolver.js';
36
- import { PgManager } from './PgManager.js';
37
- import { getPgOutputRelation, getRelId, referencedColumnTypeIds } from './PgRelation.js';
38
- import { checkSourceConfiguration, checkTableRls, getReplicationIdentityColumns } from './replication-utils.js';
39
- import {
40
- ChunkedSnapshotQuery,
41
- IdSnapshotQuery,
42
- MissingRow,
43
- PrimaryKeyValue,
44
- SimpleSnapshotQuery,
45
- SnapshotQuery
46
- } from './SnapshotQuery.js';
47
-
48
- export interface WalStreamOptions {
49
- logger?: Logger;
50
- connections: PgManager;
51
- storage: storage.SyncRulesBucketStorage;
52
- metrics: MetricsEngine;
53
- abort_signal: AbortSignal;
54
-
55
- /**
56
- * Override snapshot chunk length (number of rows), for testing.
57
- *
58
- * Defaults to 10_000.
59
- *
60
- * Note that queries are streamed, so we don't actually keep that much data in memory.
61
- */
62
- snapshotChunkLength?: number;
63
- }
64
-
65
- interface InitResult {
66
- /** True if initial snapshot is not yet done. */
67
- needsInitialSync: boolean;
68
- /** True if snapshot must be started from scratch with a new slot. */
69
- needsNewSlot: boolean;
70
- }
71
-
72
- export const ZERO_LSN = '00000000/00000000';
73
- export const PUBLICATION_NAME = 'powersync';
74
- export const POSTGRES_DEFAULT_SCHEMA = 'public';
75
-
76
- export const KEEPALIVE_CONTENT = 'ping';
77
- export const KEEPALIVE_BUFFER = Buffer.from(KEEPALIVE_CONTENT);
78
- export const KEEPALIVE_STATEMENT: pgwire.Statement = {
79
- statement: /* sql */ `
80
- SELECT
81
- *
82
- FROM
83
- pg_logical_emit_message(FALSE, 'powersync', $1)
84
- `,
85
- params: [{ type: 'varchar', value: KEEPALIVE_CONTENT }]
86
- } as const;
87
-
88
- export const isKeepAliveMessage = (msg: pgwire.PgoutputMessage) => {
89
- return (
90
- msg.tag == 'message' &&
91
- msg.prefix == 'powersync' &&
92
- msg.content &&
93
- Buffer.from(msg.content).equals(KEEPALIVE_BUFFER)
94
- );
95
- };
96
-
97
- export const sendKeepAlive = async (db: pgwire.PgClient) => {
98
- await lib_postgres.retriedQuery(db, KEEPALIVE_STATEMENT);
99
- };
100
-
101
- export class MissingReplicationSlotError extends Error {
102
- constructor(message: string, cause?: any) {
103
- super(message);
104
-
105
- this.cause = cause;
106
- }
107
- }
108
-
109
- export class WalStream {
110
- sync_rules: HydratedSyncRules;
111
- group_id: number;
112
-
113
- connection_id = 1;
114
-
115
- private logger: Logger;
116
-
117
- private readonly storage: storage.SyncRulesBucketStorage;
118
- private readonly metrics: MetricsEngine;
119
- private readonly slot_name: string;
120
-
121
- private connections: PgManager;
122
-
123
- private abort_signal: AbortSignal;
124
-
125
- private relationCache = new RelationCache((relation: number | SourceTable) => {
126
- if (typeof relation == 'number') {
127
- return relation;
128
- }
129
- return relation.objectId!;
130
- });
131
-
132
- private startedStreaming = false;
133
-
134
- private snapshotChunkLength: number;
135
-
136
- /**
137
- * Time of the oldest uncommitted change, according to the source db.
138
- * This is used to determine the replication lag.
139
- */
140
- private oldestUncommittedChange: Date | null = null;
141
- /**
142
- * Keep track of whether we have done a commit or keepalive yet.
143
- * We can only compute replication lag if isStartingReplication == false, or oldestUncommittedChange is present.
144
- */
145
- private isStartingReplication = true;
146
-
147
- private initialSnapshotPromise: Promise<void> | null = null;
148
-
149
- constructor(options: WalStreamOptions) {
150
- this.logger = options.logger ?? defaultLogger;
151
- this.storage = options.storage;
152
- this.metrics = options.metrics;
153
- this.sync_rules = options.storage.getParsedSyncRules({ defaultSchema: POSTGRES_DEFAULT_SCHEMA });
154
- this.group_id = options.storage.group_id;
155
- this.slot_name = options.storage.slot_name;
156
- this.connections = options.connections;
157
- this.snapshotChunkLength = options.snapshotChunkLength ?? 10_000;
158
-
159
- this.abort_signal = options.abort_signal;
160
- this.abort_signal.addEventListener(
161
- 'abort',
162
- () => {
163
- if (this.startedStreaming) {
164
- // Ping to speed up cancellation of streaming replication
165
- // We're not using pg_snapshot here, since it could be in the middle of
166
- // an initial replication transaction.
167
- const promise = sendKeepAlive(this.connections.pool);
168
- promise.catch((e) => {
169
- // Failures here are okay - this only speeds up stopping the process.
170
- this.logger.warn('Failed to ping connection', e);
171
- });
172
- } else {
173
- // If we haven't started streaming yet, it could be due to something like
174
- // and invalid password. In that case, don't attempt to ping.
175
- }
176
- },
177
- { once: true }
178
- );
179
- }
180
-
181
- get stopped() {
182
- return this.abort_signal.aborted;
183
- }
184
-
185
- async getQualifiedTableNames(
186
- batch: storage.BucketStorageBatch,
187
- db: pgwire.PgConnection,
188
- tablePattern: TablePattern
189
- ): Promise<storage.SourceTable[]> {
190
- const schema = tablePattern.schema;
191
- if (tablePattern.connectionTag != this.connections.connectionTag) {
192
- return [];
193
- }
194
-
195
- let tableRows: any[];
196
- const prefix = tablePattern.isWildcard ? tablePattern.tablePrefix : undefined;
197
-
198
- {
199
- let query = `
200
- SELECT
201
- c.oid AS relid,
202
- c.relname AS table_name,
203
- (SELECT
204
- json_agg(DISTINCT a.atttypid)
205
- FROM pg_attribute a
206
- WHERE a.attnum > 0 AND NOT a.attisdropped AND a.attrelid = c.oid)
207
- AS column_types
208
- FROM pg_class c
209
- JOIN pg_namespace n ON n.oid = c.relnamespace
210
- WHERE n.nspname = $1
211
- AND c.relkind = 'r'`;
212
-
213
- if (tablePattern.isWildcard) {
214
- query += ' AND c.relname LIKE $2';
215
- } else {
216
- query += ' AND c.relname = $2';
217
- }
218
-
219
- const result = await db.query({
220
- statement: query,
221
- params: [
222
- { type: 'varchar', value: schema },
223
- { type: 'varchar', value: tablePattern.tablePattern }
224
- ]
225
- });
226
-
227
- tableRows = pgwire.pgwireRows(result);
228
- }
229
-
230
- let result: storage.SourceTable[] = [];
231
-
232
- for (let row of tableRows) {
233
- const name = row.table_name as string;
234
- if (typeof row.relid != 'bigint') {
235
- throw new ReplicationAssertionError(`Missing relid for ${name}`);
236
- }
237
- const relid = Number(row.relid as bigint);
238
-
239
- if (prefix && !name.startsWith(prefix)) {
240
- continue;
241
- }
242
-
243
- const rs = await db.query({
244
- statement: `SELECT 1 FROM pg_publication_tables WHERE pubname = $1 AND schemaname = $2 AND tablename = $3`,
245
- params: [
246
- { type: 'varchar', value: PUBLICATION_NAME },
247
- { type: 'varchar', value: tablePattern.schema },
248
- { type: 'varchar', value: name }
249
- ]
250
- });
251
- if (rs.rows.length == 0) {
252
- this.logger.info(`Skipping ${tablePattern.schema}.${name} - not part of ${PUBLICATION_NAME} publication`);
253
- continue;
254
- }
255
-
256
- try {
257
- const result = await checkTableRls(db, relid);
258
- if (!result.canRead) {
259
- // We log the message, then continue anyway, since the check does not cover all cases.
260
- this.logger.warn(result.message!);
261
- }
262
- } catch (e) {
263
- // It's possible that we just don't have permission to access pg_roles - log the error and continue.
264
- this.logger.warn(`Could not check RLS access for ${tablePattern.schema}.${name}`, e);
265
- }
266
-
267
- const cresult = await getReplicationIdentityColumns(db, relid);
268
-
269
- const columnTypes = (JSON.parse(row.column_types) as string[]).map((e) => Number(e));
270
- const table = await this.handleRelation({
271
- batch,
272
- descriptor: {
273
- name,
274
- schema,
275
- objectId: relid,
276
- replicaIdColumns: cresult.replicationColumns
277
- } as SourceEntityDescriptor,
278
- snapshot: false,
279
- referencedTypeIds: columnTypes
280
- });
281
-
282
- result.push(table);
283
- }
284
- return result;
285
- }
286
-
287
- async initSlot(): Promise<InitResult> {
288
- await checkSourceConfiguration(this.connections.pool, PUBLICATION_NAME);
289
- await this.ensureStorageCompatibility();
290
-
291
- const slotName = this.slot_name;
292
-
293
- const status = await this.storage.getStatus();
294
- const snapshotDone = status.snapshot_done && status.checkpoint_lsn != null;
295
- if (snapshotDone) {
296
- // Snapshot is done, but we still need to check the replication slot status
297
- this.logger.info(`Initial replication already done`);
298
- }
299
-
300
- // Check if replication slot exists
301
- const slot = pgwire.pgwireRows(
302
- await this.connections.pool.query({
303
- // We specifically want wal_status and invalidation_reason, but it's not available on older versions,
304
- // so we just query *.
305
- statement: 'SELECT * FROM pg_replication_slots WHERE slot_name = $1',
306
- params: [{ type: 'varchar', value: slotName }]
307
- })
308
- )[0];
309
-
310
- // Previously we also used pg_catalog.pg_logical_slot_peek_binary_changes to confirm that we can query the slot.
311
- // However, there were some edge cases where the query times out, repeating the query, ultimately
312
- // causing high load on the source database and never recovering automatically.
313
- // We now instead jump straight to replication if the wal_status is not "lost", rather detecting those
314
- // errors during streaming replication, which is a little more robust.
315
-
316
- // We can have:
317
- // 1. needsInitialSync: true, lost slot -> MissingReplicationSlotError (starts new sync rules version).
318
- // Theoretically we could handle this the same as (2).
319
- // 2. needsInitialSync: true, no slot -> create new slot
320
- // 3. needsInitialSync: true, valid slot -> resume initial sync
321
- // 4. needsInitialSync: false, lost slot -> MissingReplicationSlotError (starts new sync rules version)
322
- // 5. needsInitialSync: false, no slot -> MissingReplicationSlotError (starts new sync rules version)
323
- // 6. needsInitialSync: false, valid slot -> resume streaming replication
324
- // The main advantage of MissingReplicationSlotError are:
325
- // 1. If there was a complete snapshot already (cases 4/5), users can still sync from that snapshot while
326
- // we do the reprocessing under a new slot name.
327
- // 2. If there was a partial snapshot (case 1), we can start with the new slot faster by not waiting for
328
- // the partial data to be cleared.
329
- if (slot != null) {
330
- // This checks that the slot is still valid
331
-
332
- // wal_status is present in postgres 13+
333
- // invalidation_reason is present in postgres 17+
334
- const lost = slot.wal_status == 'lost';
335
- if (lost) {
336
- // Case 1 / 4
337
- throw new MissingReplicationSlotError(
338
- `Replication slot ${slotName} is not valid anymore. invalidation_reason: ${slot.invalidation_reason ?? 'unknown'}`
339
- );
340
- }
341
- // Case 3 / 6
342
- return {
343
- needsInitialSync: !snapshotDone,
344
- needsNewSlot: false
345
- };
346
- } else {
347
- if (snapshotDone) {
348
- // Case 5
349
- // This will create a new slot, while keeping the current sync rules active
350
- throw new MissingReplicationSlotError(`Replication slot ${slotName} is missing`);
351
- }
352
- // Case 2
353
- // This will clear data (if any) and re-create the same slot
354
- return { needsInitialSync: true, needsNewSlot: true };
355
- }
356
- }
357
-
358
- async estimatedCountNumber(db: pgwire.PgConnection, table: storage.SourceTable): Promise<number> {
359
- const results = await db.query({
360
- statement: `SELECT reltuples::bigint AS estimate
361
- FROM pg_class
362
- WHERE oid = $1::regclass`,
363
- params: [{ value: table.qualifiedName, type: 'varchar' }]
364
- });
365
- const row = results.rows[0];
366
- return Number(row?.decodeWithoutCustomTypes(0) ?? -1n);
367
- }
368
-
369
- /**
370
- * Start initial replication.
371
- *
372
- * If (partial) replication was done before on this slot, this clears the state
373
- * and starts again from scratch.
374
- */
375
- async startInitialReplication(replicationConnection: pgwire.PgConnection, status: InitResult) {
376
- // If anything here errors, the entire replication process is aborted,
377
- // and all connections are closed, including this one.
378
- const db = await this.connections.snapshotConnection();
379
-
380
- const slotName = this.slot_name;
381
-
382
- if (status.needsNewSlot) {
383
- // This happens when there is no existing replication slot, or if the
384
- // existing one is unhealthy.
385
- // In those cases, we have to start replication from scratch.
386
- // If there is an existing healthy slot, we can skip this and continue
387
- // initial replication where we left off.
388
- await this.storage.clear({ signal: this.abort_signal });
389
-
390
- await db.query({
391
- statement: 'SELECT pg_drop_replication_slot(slot_name) FROM pg_replication_slots WHERE slot_name = $1',
392
- params: [{ type: 'varchar', value: slotName }]
393
- });
394
-
395
- // We use the replication connection here, not a pool.
396
- // The replication slot must be created before we start snapshotting tables.
397
- await replicationConnection.query(`CREATE_REPLICATION_SLOT ${slotName} LOGICAL pgoutput`);
398
-
399
- this.logger.info(`Created replication slot ${slotName}`);
400
- }
401
-
402
- await this.initialReplication(db);
403
- }
404
-
405
- async initialReplication(db: pgwire.PgConnection) {
406
- const sourceTables = this.sync_rules.getSourceTables();
407
- const flushResults = await this.storage.startBatch(
408
- {
409
- logger: this.logger,
410
- zeroLSN: ZERO_LSN,
411
- defaultSchema: POSTGRES_DEFAULT_SCHEMA,
412
- storeCurrentData: true,
413
- skipExistingRows: true
414
- },
415
- async (batch) => {
416
- let tablesWithStatus: SourceTable[] = [];
417
- for (let tablePattern of sourceTables) {
418
- const tables = await this.getQualifiedTableNames(batch, db, tablePattern);
419
- // Pre-get counts
420
- for (let table of tables) {
421
- if (table.snapshotComplete) {
422
- this.logger.info(`Skipping ${table.qualifiedName} - snapshot already done`);
423
- continue;
424
- }
425
- const count = await this.estimatedCountNumber(db, table);
426
- table = await batch.updateTableProgress(table, { totalEstimatedCount: count });
427
- this.relationCache.update(table);
428
- tablesWithStatus.push(table);
429
-
430
- this.logger.info(`To replicate: ${table.qualifiedName} ${table.formatSnapshotProgress()}`);
431
- }
432
- }
433
-
434
- for (let table of tablesWithStatus) {
435
- await this.snapshotTableInTx(batch, db, table);
436
- this.touch();
437
- }
438
-
439
- // Always commit the initial snapshot at zero.
440
- // This makes sure we don't skip any changes applied before starting this snapshot,
441
- // in the case of snapshot retries.
442
- // We could alternatively commit at the replication slot LSN.
443
-
444
- // Get the current LSN for the snapshot.
445
- // We could also use the LSN from the last table snapshot.
446
- const rs = await db.query(`select pg_current_wal_lsn() as lsn`);
447
- const noCommitBefore = rs.rows[0].decodeWithoutCustomTypes(0);
448
-
449
- await batch.markAllSnapshotDone(noCommitBefore);
450
- await batch.commit(ZERO_LSN);
451
- }
452
- );
453
- /**
454
- * Send a keepalive message after initial replication.
455
- * In some edge cases we wait for a keepalive after the initial snapshot.
456
- * If we don't explicitly check the contents of keepalive messages then a keepalive is detected
457
- * rather quickly after initial replication - perhaps due to other WAL events.
458
- * If we do explicitly check the contents of messages, we need an actual keepalive payload in order
459
- * to advance the active sync rules LSN.
460
- */
461
- await sendKeepAlive(db);
462
-
463
- const lastOp = flushResults?.flushed_op;
464
- if (lastOp != null) {
465
- // Populate the cache _after_ initial replication, but _before_ we switch to this sync rules.
466
- await this.storage.populatePersistentChecksumCache({
467
- // No checkpoint yet, but we do have the opId.
468
- maxOpId: lastOp,
469
- signal: this.abort_signal
470
- });
471
- }
472
- }
473
-
474
- static decodeRow(row: pgwire.PgRow, types: PostgresTypeResolver): SqliteInputRow {
475
- let result: SqliteInputRow = {};
476
-
477
- row.raw.forEach((rawValue, i) => {
478
- const column = row.columns[i];
479
- let mappedValue: SqliteInputValue;
480
-
481
- if (typeof rawValue == 'string') {
482
- mappedValue = toSyncRulesValue(types.registry.decodeDatabaseValue(rawValue, column.typeOid), false, true);
483
- } else {
484
- // Binary format, expose as-is.
485
- mappedValue = rawValue;
486
- }
487
-
488
- result[column.name] = mappedValue;
489
- });
490
- return result;
491
- }
492
-
493
- private async snapshotTableInTx(
494
- batch: storage.BucketStorageBatch,
495
- db: pgwire.PgConnection,
496
- table: storage.SourceTable,
497
- limited?: PrimaryKeyValue[]
498
- ): Promise<storage.SourceTable> {
499
- // Note: We use the default "Read Committed" isolation level here, not snapshot isolation.
500
- // The data may change during the transaction, but that is compensated for in the streaming
501
- // replication afterwards.
502
- await db.query('BEGIN');
503
- try {
504
- await this.snapshotTable(batch, db, table, limited);
505
-
506
- // Get the current LSN.
507
- // The data will only be consistent once incremental replication has passed that point.
508
- // We have to get this LSN _after_ we have finished the table snapshot.
509
- //
510
- // There are basically two relevant LSNs here:
511
- // A: The LSN before the snapshot starts. We don't explicitly record this on the PowerSync side,
512
- // but it is implicitly recorded in the replication slot.
513
- // B: The LSN after the table snapshot is complete, which is what we get here.
514
- // When we do the snapshot queries, the data that we get back for each chunk could match the state
515
- // anywhere between A and B. To actually have a consistent state on our side, we need to:
516
- // 1. Complete the snapshot.
517
- // 2. Wait until logical replication has caught up with all the change between A and B.
518
- // Calling `markSnapshotDone(LSN B)` covers that.
519
- const rs = await db.query(`select pg_current_wal_lsn() as lsn`);
520
- const tableLsnNotBefore = rs.rows[0].decodeWithoutCustomTypes(0);
521
- // Side note: A ROLLBACK would probably also be fine here, since we only read in this transaction.
522
- await db.query('COMMIT');
523
- const [resultTable] = await batch.markTableSnapshotDone([table], tableLsnNotBefore);
524
- this.relationCache.update(resultTable);
525
- return resultTable;
526
- } catch (e) {
527
- await db.query('ROLLBACK');
528
- throw e;
529
- }
530
- }
531
-
532
- private async snapshotTable(
533
- batch: storage.BucketStorageBatch,
534
- db: pgwire.PgConnection,
535
- table: storage.SourceTable,
536
- limited?: PrimaryKeyValue[]
537
- ) {
538
- let totalEstimatedCount = table.snapshotStatus?.totalEstimatedCount;
539
- let at = table.snapshotStatus?.replicatedCount ?? 0;
540
- let lastCountTime = 0;
541
- let q: SnapshotQuery;
542
- // We do streaming on two levels:
543
- // 1. Coarse level: DELCARE CURSOR, FETCH 10000 at a time.
544
- // 2. Fine level: Stream chunks from each fetch call.
545
- if (limited) {
546
- q = new IdSnapshotQuery(db, table, limited);
547
- } else if (ChunkedSnapshotQuery.supports(table)) {
548
- // Single primary key - we can use the primary key for chunking
549
- const orderByKey = table.replicaIdColumns[0];
550
- q = new ChunkedSnapshotQuery(db, table, this.snapshotChunkLength, table.snapshotStatus?.lastKey ?? null);
551
- if (table.snapshotStatus?.lastKey != null) {
552
- this.logger.info(
553
- `Replicating ${table.qualifiedName} ${table.formatSnapshotProgress()} - resuming from ${orderByKey.name} > ${(q as ChunkedSnapshotQuery).lastKey}`
554
- );
555
- } else {
556
- this.logger.info(`Replicating ${table.qualifiedName} ${table.formatSnapshotProgress()} - resumable`);
557
- }
558
- } else {
559
- // Fallback case - query the entire table
560
- this.logger.info(`Replicating ${table.qualifiedName} ${table.formatSnapshotProgress()} - not resumable`);
561
- q = new SimpleSnapshotQuery(db, table, this.snapshotChunkLength);
562
- at = 0;
563
- }
564
- await q.initialize();
565
-
566
- let hasRemainingData = true;
567
- while (hasRemainingData) {
568
- // Fetch 10k at a time.
569
- // The balance here is between latency overhead per FETCH call,
570
- // and not spending too much time on each FETCH call.
571
- // We aim for a couple of seconds on each FETCH call.
572
- const cursor = q.nextChunk();
573
- hasRemainingData = false;
574
- // pgwire streams rows in chunks.
575
- // These chunks can be quite small (as little as 16KB), so we don't flush chunks automatically.
576
- // There are typically 100-200 rows per chunk.
577
- for await (let chunk of cursor) {
578
- if (chunk.tag == 'RowDescription') {
579
- continue;
580
- }
581
-
582
- if (chunk.rows.length > 0) {
583
- hasRemainingData = true;
584
- }
585
-
586
- for (const rawRow of chunk.rows) {
587
- const record = this.sync_rules.applyRowContext<never>(WalStream.decodeRow(rawRow, this.connections.types));
588
-
589
- // This auto-flushes when the batch reaches its size limit
590
- await batch.save({
591
- tag: storage.SaveOperationTag.INSERT,
592
- sourceTable: table,
593
- before: undefined,
594
- beforeReplicaId: undefined,
595
- after: record,
596
- afterReplicaId: getUuidReplicaIdentityBson(record, table.replicaIdColumns)
597
- });
598
- }
599
-
600
- at += chunk.rows.length;
601
- this.metrics.getCounter(ReplicationMetric.ROWS_REPLICATED).add(chunk.rows.length);
602
-
603
- this.touch();
604
- }
605
-
606
- // Important: flush before marking progress
607
- await batch.flush();
608
- if (limited == null) {
609
- let lastKey: Uint8Array | undefined;
610
- if (q instanceof ChunkedSnapshotQuery) {
611
- lastKey = q.getLastKeySerialized();
612
- }
613
- if (lastCountTime < performance.now() - 10 * 60 * 1000) {
614
- // Even though we're doing the snapshot inside a transaction, the transaction uses
615
- // the default "Read Committed" isolation level. This means we can get new data
616
- // within the transaction, so we re-estimate the count every 10 minutes when replicating
617
- // large tables.
618
- totalEstimatedCount = await this.estimatedCountNumber(db, table);
619
- lastCountTime = performance.now();
620
- }
621
- table = await batch.updateTableProgress(table, {
622
- lastKey: lastKey,
623
- replicatedCount: at,
624
- totalEstimatedCount: totalEstimatedCount
625
- });
626
- this.relationCache.update(table);
627
-
628
- this.logger.info(`Replicating ${table.qualifiedName} ${table.formatSnapshotProgress()}`);
629
- } else {
630
- this.logger.info(`Replicating ${table.qualifiedName} ${at}/${limited.length} for resnapshot`);
631
- }
632
-
633
- if (this.abort_signal.aborted) {
634
- // We only abort after flushing
635
- throw new ReplicationAbortedError(`Initial replication interrupted`);
636
- }
637
- }
638
- }
639
-
640
- async handleRelation(options: {
641
- batch: storage.BucketStorageBatch;
642
- descriptor: SourceEntityDescriptor;
643
- snapshot: boolean;
644
- referencedTypeIds: number[];
645
- }) {
646
- const { batch, descriptor, snapshot, referencedTypeIds } = options;
647
-
648
- if (!descriptor.objectId && typeof descriptor.objectId != 'number') {
649
- throw new ReplicationAssertionError(`objectId expected, got ${typeof descriptor.objectId}`);
650
- }
651
- const result = await this.storage.resolveTable({
652
- group_id: this.group_id,
653
- connection_id: this.connection_id,
654
- connection_tag: this.connections.connectionTag,
655
- entity_descriptor: descriptor,
656
- sync_rules: this.sync_rules
657
- });
658
- this.relationCache.update(result.table);
659
-
660
- // Drop conflicting tables. This includes for example renamed tables.
661
- await batch.drop(result.dropTables);
662
-
663
- // Ensure we have a description for custom types referenced in the table.
664
- await this.connections.types.fetchTypes(referencedTypeIds);
665
-
666
- // Snapshot if:
667
- // 1. Snapshot is requested (false for initial snapshot, since that process handles it elsewhere)
668
- // 2. Snapshot is not already done, AND:
669
- // 3. The table is used in sync rules.
670
- const shouldSnapshot = snapshot && !result.table.snapshotComplete && result.table.syncAny;
671
-
672
- if (shouldSnapshot) {
673
- // Truncate this table, in case a previous snapshot was interrupted.
674
- await batch.truncate([result.table]);
675
-
676
- // Start the snapshot inside a transaction.
677
- // We use a dedicated connection for this.
678
- const db = await this.connections.snapshotConnection();
679
- try {
680
- const table = await this.snapshotTableInTx(batch, db, result.table);
681
- // After the table snapshot, we wait for replication to catch up.
682
- // To make sure there is actually something to replicate, we send a keepalive
683
- // message.
684
- await sendKeepAlive(db);
685
- return table;
686
- } finally {
687
- await db.end();
688
- }
689
- }
690
-
691
- return result.table;
692
- }
693
-
694
- /**
695
- * Process rows that have missing TOAST values.
696
- *
697
- * This can happen during edge cases in the chunked intial snapshot process.
698
- *
699
- * We handle this similar to an inline table snapshot, but limited to the specific
700
- * set of rows.
701
- */
702
- private async resnapshot(batch: BucketStorageBatch, rows: MissingRow[]) {
703
- const byTable = new Map<number, MissingRow[]>();
704
- for (let row of rows) {
705
- const relId = row.table.objectId as number; // always a number for postgres
706
- if (!byTable.has(relId)) {
707
- byTable.set(relId, []);
708
- }
709
- byTable.get(relId)!.push(row);
710
- }
711
- const db = await this.connections.snapshotConnection();
712
- try {
713
- for (let rows of byTable.values()) {
714
- const table = rows[0].table;
715
- await this.snapshotTableInTx(
716
- batch,
717
- db,
718
- table,
719
- rows.map((r) => r.key)
720
- );
721
- }
722
- // Even with resnapshot, we need to wait until we get a new consistent checkpoint
723
- // after the snapshot, so we need to send a keepalive message.
724
- await sendKeepAlive(db);
725
- } finally {
726
- await db.end();
727
- }
728
- }
729
-
730
- private getTable(relationId: number): storage.SourceTable {
731
- const table = this.relationCache.get(relationId);
732
- if (table == null) {
733
- // We should always receive a replication message before the relation is used.
734
- // If we can't find it, it's a bug.
735
- throw new ReplicationAssertionError(`Missing relation cache for ${relationId}`);
736
- }
737
- return table;
738
- }
739
-
740
- private syncRulesRecord(row: SqliteInputRow): SqliteRow;
741
- private syncRulesRecord(row: SqliteInputRow | undefined): SqliteRow | undefined;
742
-
743
- private syncRulesRecord(row: SqliteInputRow | undefined): SqliteRow | undefined {
744
- if (row == null) {
745
- return undefined;
746
- }
747
- return this.sync_rules.applyRowContext<never>(row);
748
- }
749
-
750
- private toastableSyncRulesRecord(row: ToastableSqliteRow<SqliteInputValue>): ToastableSqliteRow {
751
- return this.sync_rules.applyRowContext(row);
752
- }
753
-
754
- async writeChange(
755
- batch: storage.BucketStorageBatch,
756
- msg: pgwire.PgoutputMessage
757
- ): Promise<storage.FlushedResult | null> {
758
- if (msg.lsn == null) {
759
- return null;
760
- }
761
- if (msg.tag == 'insert' || msg.tag == 'update' || msg.tag == 'delete') {
762
- const table = this.getTable(getRelId(msg.relation));
763
- if (!table.syncAny) {
764
- this.logger.debug(`Table ${table.qualifiedName} not used in sync rules - skipping`);
765
- return null;
766
- }
767
-
768
- if (msg.tag == 'insert') {
769
- this.metrics.getCounter(ReplicationMetric.ROWS_REPLICATED).add(1);
770
- const baseRecord = this.syncRulesRecord(this.connections.types.constructAfterRecord(msg));
771
- return await batch.save({
772
- tag: storage.SaveOperationTag.INSERT,
773
- sourceTable: table,
774
- before: undefined,
775
- beforeReplicaId: undefined,
776
- after: baseRecord,
777
- afterReplicaId: getUuidReplicaIdentityBson(baseRecord, table.replicaIdColumns)
778
- });
779
- } else if (msg.tag == 'update') {
780
- this.metrics.getCounter(ReplicationMetric.ROWS_REPLICATED).add(1);
781
- // "before" may be null if the replica id columns are unchanged
782
- // It's fine to treat that the same as an insert.
783
- const before = this.syncRulesRecord(this.connections.types.constructBeforeRecord(msg));
784
- const after = this.toastableSyncRulesRecord(this.connections.types.constructAfterRecord(msg));
785
- return await batch.save({
786
- tag: storage.SaveOperationTag.UPDATE,
787
- sourceTable: table,
788
- before: before,
789
- beforeReplicaId: before ? getUuidReplicaIdentityBson(before, table.replicaIdColumns) : undefined,
790
- after: after,
791
- afterReplicaId: getUuidReplicaIdentityBson(after, table.replicaIdColumns)
792
- });
793
- } else if (msg.tag == 'delete') {
794
- this.metrics.getCounter(ReplicationMetric.ROWS_REPLICATED).add(1);
795
- const before = this.syncRulesRecord(this.connections.types.constructBeforeRecord(msg)!);
796
-
797
- return await batch.save({
798
- tag: storage.SaveOperationTag.DELETE,
799
- sourceTable: table,
800
- before: before,
801
- beforeReplicaId: getUuidReplicaIdentityBson(before, table.replicaIdColumns),
802
- after: undefined,
803
- afterReplicaId: undefined
804
- });
805
- }
806
- } else if (msg.tag == 'truncate') {
807
- let tables: storage.SourceTable[] = [];
808
- for (let relation of msg.relations) {
809
- const table = this.getTable(getRelId(relation));
810
- tables.push(table);
811
- }
812
- return await batch.truncate(tables);
813
- }
814
- return null;
815
- }
816
-
817
- async replicate() {
818
- try {
819
- // If anything errors here, the entire replication process is halted, and
820
- // all connections automatically closed, including this one.
821
- this.initialSnapshotPromise = (async () => {
822
- const initReplicationConnection = await this.connections.replicationConnection();
823
- await this.initReplication(initReplicationConnection);
824
- await initReplicationConnection.end();
825
- })();
826
-
827
- await this.initialSnapshotPromise;
828
-
829
- // At this point, the above connection has often timed out, so we start a new one
830
- const streamReplicationConnection = await this.connections.replicationConnection();
831
- await this.streamChanges(streamReplicationConnection);
832
- await streamReplicationConnection.end();
833
- } catch (e) {
834
- await this.storage.reportError(e);
835
- throw e;
836
- }
837
- }
838
-
839
- /**
840
- * After calling replicate(), call this to wait for the initial snapshot to complete.
841
- *
842
- * For tests only.
843
- */
844
- async waitForInitialSnapshot() {
845
- if (this.initialSnapshotPromise == null) {
846
- throw new ReplicationAssertionError(`Initial snapshot not started yet`);
847
- }
848
- return this.initialSnapshotPromise;
849
- }
850
-
851
- async initReplication(replicationConnection: pgwire.PgConnection) {
852
- const result = await this.initSlot();
853
- if (result.needsInitialSync) {
854
- await this.startInitialReplication(replicationConnection, result);
855
- }
856
- }
857
-
858
- async streamChanges(replicationConnection: pgwire.PgConnection) {
859
- try {
860
- await this.streamChangesInternal(replicationConnection);
861
- } catch (e) {
862
- if (isReplicationSlotInvalidError(e)) {
863
- throw new MissingReplicationSlotError(e.message, e);
864
- }
865
- throw e;
866
- }
867
- }
868
-
869
- private async streamChangesInternal(replicationConnection: pgwire.PgConnection) {
870
- // When changing any logic here, check /docs/wal-lsns.md.
871
- const { createEmptyCheckpoints } = await this.ensureStorageCompatibility();
872
-
873
- const replicationOptions: Record<string, string> = {
874
- proto_version: '1',
875
- publication_names: PUBLICATION_NAME
876
- };
877
-
878
- /**
879
- * Viewing the contents of logical messages emitted with `pg_logical_emit_message`
880
- * is only supported on Postgres >= 14.0.
881
- * https://www.postgresql.org/docs/14/protocol-logical-replication.html
882
- */
883
- const exposesLogicalMessages = await this.checkLogicalMessageSupport();
884
- if (exposesLogicalMessages) {
885
- /**
886
- * Only add this option if the Postgres server supports it.
887
- * Adding the option to a server that doesn't support it will throw an exception when starting logical replication.
888
- * Error: `unrecognized pgoutput option: messages`
889
- */
890
- replicationOptions['messages'] = 'true';
891
- }
892
-
893
- const replicationStream = replicationConnection.logicalReplication({
894
- slot: this.slot_name,
895
- options: replicationOptions
896
- });
897
-
898
- this.startedStreaming = true;
899
-
900
- let resnapshot: { table: storage.SourceTable; key: PrimaryKeyValue }[] = [];
901
-
902
- const markRecordUnavailable = (record: SaveUpdate) => {
903
- if (!IdSnapshotQuery.supports(record.sourceTable)) {
904
- // If it's not supported, it's also safe to ignore
905
- return;
906
- }
907
- let key: PrimaryKeyValue = {};
908
- for (let column of record.sourceTable.replicaIdColumns) {
909
- const name = column.name;
910
- const value = record.after[name];
911
- if (value == null) {
912
- // We don't expect this to actually happen.
913
- // The key should always be present in the "after" record.
914
- return;
915
- }
916
- // We just need a consistent representation of the primary key, and don't care about fixed quirks.
917
- key[name] = applyValueContext(value, CompatibilityContext.FULL_BACKWARDS_COMPATIBILITY);
918
- }
919
- resnapshot.push({
920
- table: record.sourceTable,
921
- key: key
922
- });
923
- };
924
-
925
- await this.storage.startBatch(
926
- {
927
- logger: this.logger,
928
- zeroLSN: ZERO_LSN,
929
- defaultSchema: POSTGRES_DEFAULT_SCHEMA,
930
- storeCurrentData: true,
931
- skipExistingRows: false,
932
- markRecordUnavailable
933
- },
934
- async (batch) => {
935
- // We don't handle any plain keepalive messages while we have transactions.
936
- // While we have transactions, we use that to advance the position.
937
- // Replication never starts in the middle of a transaction, so this starts as false.
938
- let skipKeepalive = false;
939
- let count = 0;
940
-
941
- for await (const chunk of replicationStream.pgoutputDecode()) {
942
- this.touch();
943
-
944
- if (this.abort_signal.aborted) {
945
- break;
946
- }
947
-
948
- // chunkLastLsn may come from normal messages in the chunk,
949
- // or from a PrimaryKeepalive message.
950
- const { messages, lastLsn: chunkLastLsn } = chunk;
951
-
952
- /**
953
- * We can check if an explicit keepalive was sent if `exposesLogicalMessages == true`.
954
- * If we can't check the logical messages, we should assume a keepalive if we
955
- * receive an empty array of messages in a replication event.
956
- */
957
- const assumeKeepAlive = !exposesLogicalMessages;
958
- let keepAliveDetected = false;
959
- const lastCommit = messages.findLast((msg) => msg.tag == 'commit');
960
-
961
- for (const msg of messages) {
962
- if (msg.tag == 'relation') {
963
- await this.handleRelation({
964
- batch,
965
- descriptor: getPgOutputRelation(msg),
966
- snapshot: true,
967
- referencedTypeIds: referencedColumnTypeIds(msg)
968
- });
969
- } else if (msg.tag == 'begin') {
970
- // This may span multiple transactions in the same chunk, or even across chunks.
971
- skipKeepalive = true;
972
- if (this.oldestUncommittedChange == null) {
973
- this.oldestUncommittedChange = new Date(Number(msg.commitTime / 1000n));
974
- }
975
- } else if (msg.tag == 'commit') {
976
- this.metrics.getCounter(ReplicationMetric.TRANSACTIONS_REPLICATED).add(1);
977
- if (msg == lastCommit) {
978
- // Only commit if this is the last commit in the chunk.
979
- // This effectively lets us batch multiple transactions within the same chunk
980
- // into a single flush, increasing throughput for many small transactions.
981
- skipKeepalive = false;
982
- // flush() must be before the resnapshot check - that is
983
- // typically what reports the resnapshot records.
984
- await batch.flush({ oldestUncommittedChange: this.oldestUncommittedChange });
985
- // This _must_ be checked after the flush(), and before
986
- // commit() or ack(). We never persist the resnapshot list,
987
- // so we have to process it before marking our progress.
988
- if (resnapshot.length > 0) {
989
- await this.resnapshot(batch, resnapshot);
990
- resnapshot = [];
991
- }
992
- const { checkpointBlocked } = await batch.commit(msg.lsn!, {
993
- createEmptyCheckpoints,
994
- oldestUncommittedChange: this.oldestUncommittedChange
995
- });
996
- await this.ack(msg.lsn!, replicationStream);
997
- if (!checkpointBlocked) {
998
- this.oldestUncommittedChange = null;
999
- this.isStartingReplication = false;
1000
- }
1001
- }
1002
- } else {
1003
- if (count % 100 == 0) {
1004
- this.logger.info(`Replicating op ${count} ${msg.lsn}`);
1005
- }
1006
-
1007
- /**
1008
- * If we can see the contents of logical messages, then we can check if a keepalive
1009
- * message is present. We only perform a keepalive (below) if we explicitly detect a keepalive message.
1010
- * If we can't see the contents of logical messages, then we should assume a keepalive is required
1011
- * due to the default value of `assumeKeepalive`.
1012
- */
1013
- if (exposesLogicalMessages && isKeepAliveMessage(msg)) {
1014
- keepAliveDetected = true;
1015
- }
1016
-
1017
- count += 1;
1018
- const flushResult = await this.writeChange(batch, msg);
1019
- if (flushResult != null && resnapshot.length > 0) {
1020
- // If we have large transactions, we also need to flush the resnapshot list
1021
- // periodically.
1022
- // TODO: make sure this bit is actually triggered
1023
- await this.resnapshot(batch, resnapshot);
1024
- resnapshot = [];
1025
- }
1026
- }
1027
- }
1028
-
1029
- if (!skipKeepalive) {
1030
- if (assumeKeepAlive || keepAliveDetected) {
1031
- // Reset the detection flag.
1032
- keepAliveDetected = false;
1033
-
1034
- // In a transaction, we ack and commit according to the transaction progress.
1035
- // Outside transactions, we use the PrimaryKeepalive messages to advance progress.
1036
- // Big caveat: This _must not_ be used to skip individual messages, since this LSN
1037
- // may be in the middle of the next transaction.
1038
- // It must only be used to associate checkpoints with LSNs.
1039
- const { checkpointBlocked } = await batch.keepalive(chunkLastLsn);
1040
- if (!checkpointBlocked) {
1041
- this.oldestUncommittedChange = null;
1042
- }
1043
-
1044
- this.isStartingReplication = false;
1045
- }
1046
-
1047
- // We receive chunks with empty messages often (about each second).
1048
- // Acknowledging here progresses the slot past these and frees up resources.
1049
- await this.ack(chunkLastLsn, replicationStream);
1050
- }
1051
-
1052
- this.metrics.getCounter(ReplicationMetric.CHUNKS_REPLICATED).add(1);
1053
- }
1054
- }
1055
- );
1056
- }
1057
-
1058
- async ack(lsn: string, replicationStream: pgwire.ReplicationStream) {
1059
- if (lsn == ZERO_LSN) {
1060
- return;
1061
- }
1062
-
1063
- replicationStream.ack(lsn);
1064
- }
1065
-
1066
- /**
1067
- * Ensures that the storage is compatible with the replication connection.
1068
- * @throws {DatabaseConnectionError} If the storage is not compatible with the replication connection.
1069
- */
1070
- protected async ensureStorageCompatibility(): Promise<storage.ResolvedBucketBatchCommitOptions> {
1071
- const supportsLogicalMessages = await this.checkLogicalMessageSupport();
1072
-
1073
- const storageIdentifier = await this.storage.factory.getSystemIdentifier();
1074
- if (storageIdentifier.type != lib_postgres.POSTGRES_CONNECTION_TYPE) {
1075
- return {
1076
- // Keep the same behaviour as before allowing Postgres storage.
1077
- createEmptyCheckpoints: true,
1078
- oldestUncommittedChange: null
1079
- };
1080
- }
1081
-
1082
- const parsedStorageIdentifier = lib_postgres.utils.decodePostgresSystemIdentifier(storageIdentifier.id);
1083
- /**
1084
- * Check if the same server is being used for both the sync bucket storage and the logical replication.
1085
- */
1086
- const replicationIdentifier = await lib_postgres.utils.queryPostgresSystemIdentifier(this.connections.pool);
1087
-
1088
- if (!supportsLogicalMessages && replicationIdentifier.server_id == parsedStorageIdentifier.server_id) {
1089
- throw new DatabaseConnectionError(
1090
- ErrorCode.PSYNC_S1144,
1091
- `Separate Postgres servers are required for the replication source and sync bucket storage when using Postgres versions below 14.0.`,
1092
- new Error('Postgres version is below 14')
1093
- );
1094
- }
1095
-
1096
- return {
1097
- /**
1098
- * Don't create empty checkpoints if the same Postgres database is used for the data source
1099
- * and sync bucket storage. Creating empty checkpoints will cause WAL feedback loops.
1100
- */
1101
- createEmptyCheckpoints: replicationIdentifier.database_name != parsedStorageIdentifier.database_name,
1102
- oldestUncommittedChange: null
1103
- };
1104
- }
1105
-
1106
- /**
1107
- * Check if the replication connection Postgres server supports
1108
- * viewing the contents of logical replication messages.
1109
- */
1110
- protected async checkLogicalMessageSupport() {
1111
- const version = await this.connections.getServerVersion();
1112
- return version ? version.compareMain('14.0.0') >= 0 : false;
1113
- }
1114
-
1115
- async getReplicationLagMillis(): Promise<number | undefined> {
1116
- if (this.oldestUncommittedChange == null) {
1117
- if (this.isStartingReplication) {
1118
- // We don't have anything to compute replication lag with yet.
1119
- return undefined;
1120
- } else {
1121
- // We don't have any uncommitted changes, so replication is up-to-date.
1122
- return 0;
1123
- }
1124
- }
1125
- return Date.now() - this.oldestUncommittedChange.getTime();
1126
- }
1127
-
1128
- private touch() {
1129
- container.probes.touch().catch((e) => {
1130
- this.logger.error(`Error touching probe`, e);
1131
- });
1132
- }
1133
- }
1134
-
1135
- function isReplicationSlotInvalidError(e: any) {
1136
- // We could access the error code from pgwire using this:
1137
- // e[Symbol.for('pg.ErrorCode')]
1138
- // However, we typically get a generic code such as 42704 (undefined_object), which does not
1139
- // help much. So we check the actual error message.
1140
- const message = e.message ?? '';
1141
-
1142
- // Sample: record with incorrect prev-link 10000/10000 at 0/18AB778
1143
- // Seen during development. Some internal error, fixed by re-creating slot.
1144
- //
1145
- // Sample: publication "powersync" does not exist
1146
- // Happens when publication deleted or never created.
1147
- // Slot must be re-created in this case.
1148
- return (
1149
- /incorrect prev-link/.test(message) ||
1150
- /replication slot.*does not exist/.test(message) ||
1151
- /publication.*does not exist/.test(message) ||
1152
- // Postgres 18 - exceeded max_slot_wal_keep_size
1153
- /can no longer access replication slot/.test(message) ||
1154
- // Postgres 17 - exceeded max_slot_wal_keep_size
1155
- /can no longer get changes from replication slot/.test(message)
1156
- );
1157
- }