@powersync/service-module-postgres-storage 0.0.0-dev-20250116115804

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (157) hide show
  1. package/CHANGELOG.md +32 -0
  2. package/LICENSE +67 -0
  3. package/README.md +67 -0
  4. package/dist/.tsbuildinfo +1 -0
  5. package/dist/@types/index.d.ts +7 -0
  6. package/dist/@types/migrations/PostgresMigrationAgent.d.ts +12 -0
  7. package/dist/@types/migrations/PostgresMigrationStore.d.ts +14 -0
  8. package/dist/@types/migrations/migration-utils.d.ts +3 -0
  9. package/dist/@types/migrations/scripts/1684951997326-init.d.ts +3 -0
  10. package/dist/@types/module/PostgresStorageModule.d.ts +6 -0
  11. package/dist/@types/storage/PostgresBucketStorageFactory.d.ts +42 -0
  12. package/dist/@types/storage/PostgresCompactor.d.ts +40 -0
  13. package/dist/@types/storage/PostgresStorageProvider.d.ts +5 -0
  14. package/dist/@types/storage/PostgresSyncRulesStorage.d.ts +46 -0
  15. package/dist/@types/storage/PostgresTestStorageFactoryGenerator.d.ts +13 -0
  16. package/dist/@types/storage/batch/OperationBatch.d.ts +47 -0
  17. package/dist/@types/storage/batch/PostgresBucketBatch.d.ts +90 -0
  18. package/dist/@types/storage/batch/PostgresPersistedBatch.d.ts +64 -0
  19. package/dist/@types/storage/checkpoints/PostgresWriteCheckpointAPI.d.ts +20 -0
  20. package/dist/@types/storage/storage-index.d.ts +5 -0
  21. package/dist/@types/storage/sync-rules/PostgresPersistedSyncRulesContent.d.ts +17 -0
  22. package/dist/@types/types/codecs.d.ts +61 -0
  23. package/dist/@types/types/models/ActiveCheckpoint.d.ts +12 -0
  24. package/dist/@types/types/models/ActiveCheckpointNotification.d.ts +19 -0
  25. package/dist/@types/types/models/BucketData.d.ts +22 -0
  26. package/dist/@types/types/models/BucketParameters.d.ts +11 -0
  27. package/dist/@types/types/models/CurrentData.d.ts +22 -0
  28. package/dist/@types/types/models/Instance.d.ts +6 -0
  29. package/dist/@types/types/models/Migration.d.ts +12 -0
  30. package/dist/@types/types/models/SourceTable.d.ts +31 -0
  31. package/dist/@types/types/models/SyncRules.d.ts +47 -0
  32. package/dist/@types/types/models/WriteCheckpoint.d.ts +15 -0
  33. package/dist/@types/types/models/models-index.d.ts +10 -0
  34. package/dist/@types/types/types.d.ts +96 -0
  35. package/dist/@types/utils/bson.d.ts +6 -0
  36. package/dist/@types/utils/bucket-data.d.ts +18 -0
  37. package/dist/@types/utils/db.d.ts +8 -0
  38. package/dist/@types/utils/ts-codec.d.ts +5 -0
  39. package/dist/@types/utils/utils-index.d.ts +4 -0
  40. package/dist/index.js +8 -0
  41. package/dist/index.js.map +1 -0
  42. package/dist/migrations/PostgresMigrationAgent.js +36 -0
  43. package/dist/migrations/PostgresMigrationAgent.js.map +1 -0
  44. package/dist/migrations/PostgresMigrationStore.js +60 -0
  45. package/dist/migrations/PostgresMigrationStore.js.map +1 -0
  46. package/dist/migrations/migration-utils.js +13 -0
  47. package/dist/migrations/migration-utils.js.map +1 -0
  48. package/dist/migrations/scripts/1684951997326-init.js +196 -0
  49. package/dist/migrations/scripts/1684951997326-init.js.map +1 -0
  50. package/dist/module/PostgresStorageModule.js +23 -0
  51. package/dist/module/PostgresStorageModule.js.map +1 -0
  52. package/dist/storage/PostgresBucketStorageFactory.js +433 -0
  53. package/dist/storage/PostgresBucketStorageFactory.js.map +1 -0
  54. package/dist/storage/PostgresCompactor.js +298 -0
  55. package/dist/storage/PostgresCompactor.js.map +1 -0
  56. package/dist/storage/PostgresStorageProvider.js +35 -0
  57. package/dist/storage/PostgresStorageProvider.js.map +1 -0
  58. package/dist/storage/PostgresSyncRulesStorage.js +619 -0
  59. package/dist/storage/PostgresSyncRulesStorage.js.map +1 -0
  60. package/dist/storage/PostgresTestStorageFactoryGenerator.js +110 -0
  61. package/dist/storage/PostgresTestStorageFactoryGenerator.js.map +1 -0
  62. package/dist/storage/batch/OperationBatch.js +93 -0
  63. package/dist/storage/batch/OperationBatch.js.map +1 -0
  64. package/dist/storage/batch/PostgresBucketBatch.js +732 -0
  65. package/dist/storage/batch/PostgresBucketBatch.js.map +1 -0
  66. package/dist/storage/batch/PostgresPersistedBatch.js +367 -0
  67. package/dist/storage/batch/PostgresPersistedBatch.js.map +1 -0
  68. package/dist/storage/checkpoints/PostgresWriteCheckpointAPI.js +148 -0
  69. package/dist/storage/checkpoints/PostgresWriteCheckpointAPI.js.map +1 -0
  70. package/dist/storage/storage-index.js +6 -0
  71. package/dist/storage/storage-index.js.map +1 -0
  72. package/dist/storage/sync-rules/PostgresPersistedSyncRulesContent.js +58 -0
  73. package/dist/storage/sync-rules/PostgresPersistedSyncRulesContent.js.map +1 -0
  74. package/dist/types/codecs.js +97 -0
  75. package/dist/types/codecs.js.map +1 -0
  76. package/dist/types/models/ActiveCheckpoint.js +12 -0
  77. package/dist/types/models/ActiveCheckpoint.js.map +1 -0
  78. package/dist/types/models/ActiveCheckpointNotification.js +8 -0
  79. package/dist/types/models/ActiveCheckpointNotification.js.map +1 -0
  80. package/dist/types/models/BucketData.js +23 -0
  81. package/dist/types/models/BucketData.js.map +1 -0
  82. package/dist/types/models/BucketParameters.js +11 -0
  83. package/dist/types/models/BucketParameters.js.map +1 -0
  84. package/dist/types/models/CurrentData.js +16 -0
  85. package/dist/types/models/CurrentData.js.map +1 -0
  86. package/dist/types/models/Instance.js +5 -0
  87. package/dist/types/models/Instance.js.map +1 -0
  88. package/dist/types/models/Migration.js +12 -0
  89. package/dist/types/models/Migration.js.map +1 -0
  90. package/dist/types/models/SourceTable.js +24 -0
  91. package/dist/types/models/SourceTable.js.map +1 -0
  92. package/dist/types/models/SyncRules.js +47 -0
  93. package/dist/types/models/SyncRules.js.map +1 -0
  94. package/dist/types/models/WriteCheckpoint.js +13 -0
  95. package/dist/types/models/WriteCheckpoint.js.map +1 -0
  96. package/dist/types/models/models-index.js +11 -0
  97. package/dist/types/models/models-index.js.map +1 -0
  98. package/dist/types/types.js +46 -0
  99. package/dist/types/types.js.map +1 -0
  100. package/dist/utils/bson.js +16 -0
  101. package/dist/utils/bson.js.map +1 -0
  102. package/dist/utils/bucket-data.js +25 -0
  103. package/dist/utils/bucket-data.js.map +1 -0
  104. package/dist/utils/db.js +24 -0
  105. package/dist/utils/db.js.map +1 -0
  106. package/dist/utils/ts-codec.js +11 -0
  107. package/dist/utils/ts-codec.js.map +1 -0
  108. package/dist/utils/utils-index.js +5 -0
  109. package/dist/utils/utils-index.js.map +1 -0
  110. package/package.json +50 -0
  111. package/src/index.ts +10 -0
  112. package/src/migrations/PostgresMigrationAgent.ts +46 -0
  113. package/src/migrations/PostgresMigrationStore.ts +70 -0
  114. package/src/migrations/migration-utils.ts +14 -0
  115. package/src/migrations/scripts/1684951997326-init.ts +141 -0
  116. package/src/module/PostgresStorageModule.ts +30 -0
  117. package/src/storage/PostgresBucketStorageFactory.ts +496 -0
  118. package/src/storage/PostgresCompactor.ts +366 -0
  119. package/src/storage/PostgresStorageProvider.ts +42 -0
  120. package/src/storage/PostgresSyncRulesStorage.ts +666 -0
  121. package/src/storage/PostgresTestStorageFactoryGenerator.ts +61 -0
  122. package/src/storage/batch/OperationBatch.ts +101 -0
  123. package/src/storage/batch/PostgresBucketBatch.ts +885 -0
  124. package/src/storage/batch/PostgresPersistedBatch.ts +441 -0
  125. package/src/storage/checkpoints/PostgresWriteCheckpointAPI.ts +176 -0
  126. package/src/storage/storage-index.ts +5 -0
  127. package/src/storage/sync-rules/PostgresPersistedSyncRulesContent.ts +67 -0
  128. package/src/types/codecs.ts +136 -0
  129. package/src/types/models/ActiveCheckpoint.ts +15 -0
  130. package/src/types/models/ActiveCheckpointNotification.ts +14 -0
  131. package/src/types/models/BucketData.ts +26 -0
  132. package/src/types/models/BucketParameters.ts +14 -0
  133. package/src/types/models/CurrentData.ts +23 -0
  134. package/src/types/models/Instance.ts +8 -0
  135. package/src/types/models/Migration.ts +19 -0
  136. package/src/types/models/SourceTable.ts +32 -0
  137. package/src/types/models/SyncRules.ts +50 -0
  138. package/src/types/models/WriteCheckpoint.ts +20 -0
  139. package/src/types/models/models-index.ts +10 -0
  140. package/src/types/types.ts +73 -0
  141. package/src/utils/bson.ts +17 -0
  142. package/src/utils/bucket-data.ts +25 -0
  143. package/src/utils/db.ts +27 -0
  144. package/src/utils/ts-codec.ts +14 -0
  145. package/src/utils/utils-index.ts +4 -0
  146. package/test/src/__snapshots__/storage.test.ts.snap +9 -0
  147. package/test/src/__snapshots__/storage_sync.test.ts.snap +332 -0
  148. package/test/src/env.ts +6 -0
  149. package/test/src/migrations.test.ts +34 -0
  150. package/test/src/setup.ts +16 -0
  151. package/test/src/storage.test.ts +131 -0
  152. package/test/src/storage_compacting.test.ts +5 -0
  153. package/test/src/storage_sync.test.ts +12 -0
  154. package/test/src/util.ts +34 -0
  155. package/test/tsconfig.json +20 -0
  156. package/tsconfig.json +36 -0
  157. package/vitest.config.ts +13 -0
@@ -0,0 +1,885 @@
1
+ import * as lib_postgres from '@powersync/lib-service-postgres';
2
+ import { container, DisposableObserver, errors, logger } from '@powersync/lib-services-framework';
3
+ import { storage, utils } from '@powersync/service-core';
4
+ import * as sync_rules from '@powersync/service-sync-rules';
5
+ import * as timers from 'timers/promises';
6
+ import * as t from 'ts-codec';
7
+ import { CurrentBucket, CurrentData, CurrentDataDecoded } from '../../types/models/CurrentData.js';
8
+ import { models, RequiredOperationBatchLimits } from '../../types/types.js';
9
+ import { NOTIFICATION_CHANNEL, sql } from '../../utils/db.js';
10
+ import { pick } from '../../utils/ts-codec.js';
11
+ import { batchCreateCustomWriteCheckpoints } from '../checkpoints/PostgresWriteCheckpointAPI.js';
12
+ import { cacheKey, encodedCacheKey, OperationBatch, RecordOperation } from './OperationBatch.js';
13
+ import { PostgresPersistedBatch } from './PostgresPersistedBatch.js';
14
+
15
+ export interface PostgresBucketBatchOptions {
16
+ db: lib_postgres.DatabaseClient;
17
+ sync_rules: sync_rules.SqlSyncRules;
18
+ group_id: number;
19
+ slot_name: string;
20
+ last_checkpoint_lsn: string | null;
21
+ no_checkpoint_before_lsn: string;
22
+ store_current_data: boolean;
23
+ keep_alive_op?: bigint | null;
24
+ /**
25
+ * Set to true for initial replication.
26
+ */
27
+ skip_existing_rows: boolean;
28
+ batch_limits: RequiredOperationBatchLimits;
29
+ }
30
+
31
+ /**
32
+ * Intermediate type which helps for only watching the active sync rules
33
+ * via the Postgres NOTIFY protocol.
34
+ */
35
+ const StatefulCheckpoint = models.ActiveCheckpoint.and(t.object({ state: t.Enum(storage.SyncRuleState) }));
36
+ type StatefulCheckpointDecoded = t.Decoded<typeof StatefulCheckpoint>;
37
+
38
+ /**
39
+ * 15MB. Currently matches MongoDB.
40
+ * This could be increased in future.
41
+ */
42
+ const MAX_ROW_SIZE = 15 * 1024 * 1024;
43
+
44
+ export class PostgresBucketBatch
45
+ extends DisposableObserver<storage.BucketBatchStorageListener>
46
+ implements storage.BucketStorageBatch
47
+ {
48
+ public last_flushed_op: bigint | null = null;
49
+
50
+ protected db: lib_postgres.DatabaseClient;
51
+ protected group_id: number;
52
+ protected last_checkpoint_lsn: string | null;
53
+ protected no_checkpoint_before_lsn: string;
54
+
55
+ protected persisted_op: bigint | null;
56
+
57
+ protected write_checkpoint_batch: storage.CustomWriteCheckpointOptions[];
58
+ protected readonly sync_rules: sync_rules.SqlSyncRules;
59
+ protected batch: OperationBatch | null;
60
+ private lastWaitingLogThrottled = 0;
61
+
62
+ constructor(protected options: PostgresBucketBatchOptions) {
63
+ super();
64
+ this.db = options.db;
65
+ this.group_id = options.group_id;
66
+ this.last_checkpoint_lsn = options.last_checkpoint_lsn;
67
+ this.no_checkpoint_before_lsn = options.no_checkpoint_before_lsn;
68
+ this.write_checkpoint_batch = [];
69
+ this.sync_rules = options.sync_rules;
70
+ this.batch = null;
71
+ this.persisted_op = null;
72
+ if (options.keep_alive_op) {
73
+ this.persisted_op = options.keep_alive_op;
74
+ }
75
+ }
76
+
77
+ get lastCheckpointLsn() {
78
+ return this.last_checkpoint_lsn;
79
+ }
80
+
81
+ async save(record: storage.SaveOptions): Promise<storage.FlushedResult | null> {
82
+ // TODO maybe share with abstract class
83
+ const { after, afterReplicaId, before, beforeReplicaId, sourceTable, tag } = record;
84
+ for (const event of this.getTableEvents(sourceTable)) {
85
+ this.iterateListeners((cb) =>
86
+ cb.replicationEvent?.({
87
+ batch: this,
88
+ table: sourceTable,
89
+ data: {
90
+ op: tag,
91
+ after: after && utils.isCompleteRow(this.options.store_current_data, after) ? after : undefined,
92
+ before: before && utils.isCompleteRow(this.options.store_current_data, before) ? before : undefined
93
+ },
94
+ event
95
+ })
96
+ );
97
+ }
98
+ /**
99
+ * Return if the table is just an event table
100
+ */
101
+ if (!sourceTable.syncData && !sourceTable.syncParameters) {
102
+ return null;
103
+ }
104
+
105
+ logger.debug(`Saving ${record.tag}:${record.before?.id}/${record.after?.id}`);
106
+
107
+ this.batch ??= new OperationBatch(this.options.batch_limits);
108
+ this.batch.push(new RecordOperation(record));
109
+
110
+ if (this.batch.shouldFlush()) {
111
+ const r = await this.flush();
112
+ // HACK: Give other streams a chance to also flush
113
+ await timers.setTimeout(5);
114
+ return r;
115
+ }
116
+ return null;
117
+ }
118
+
119
+ async truncate(sourceTables: storage.SourceTable[]): Promise<storage.FlushedResult | null> {
120
+ await this.flush();
121
+
122
+ let last_op: bigint | null = null;
123
+ for (let table of sourceTables) {
124
+ last_op = await this.truncateSingle(table);
125
+ }
126
+
127
+ if (last_op) {
128
+ this.persisted_op = last_op;
129
+ }
130
+
131
+ return {
132
+ flushed_op: String(last_op!)
133
+ };
134
+ }
135
+
136
+ protected async truncateSingle(sourceTable: storage.SourceTable) {
137
+ // To avoid too large transactions, we limit the amount of data we delete per transaction.
138
+ // Since we don't use the record data here, we don't have explicit size limits per batch.
139
+ const BATCH_LIMIT = 2000;
140
+ let lastBatchCount = BATCH_LIMIT;
141
+ let processedCount = 0;
142
+ const codec = pick(models.CurrentData, ['buckets', 'lookups', 'source_key']);
143
+
144
+ while (lastBatchCount == BATCH_LIMIT) {
145
+ lastBatchCount = 0;
146
+ await this.withReplicationTransaction(async (db) => {
147
+ const persistedBatch = new PostgresPersistedBatch({
148
+ group_id: this.group_id,
149
+ ...this.options.batch_limits
150
+ });
151
+
152
+ for await (const rows of db.streamRows<t.Encoded<typeof codec>>(sql`
153
+ SELECT
154
+ buckets,
155
+ lookups,
156
+ source_key
157
+ FROM
158
+ current_data
159
+ WHERE
160
+ group_id = ${{ type: 'int4', value: this.group_id }}
161
+ AND source_table = ${{ type: 'varchar', value: sourceTable.id }}
162
+ LIMIT
163
+ ${{ type: 'int4', value: BATCH_LIMIT }}
164
+ `)) {
165
+ lastBatchCount += rows.length;
166
+ processedCount += rows.length;
167
+
168
+ const decodedRows = rows.map((row) => codec.decode(row));
169
+ for (const value of decodedRows) {
170
+ persistedBatch.saveBucketData({
171
+ before_buckets: value.buckets,
172
+ evaluated: [],
173
+ table: sourceTable,
174
+ source_key: value.source_key
175
+ });
176
+ persistedBatch.saveParameterData({
177
+ existing_lookups: value.lookups,
178
+ evaluated: [],
179
+ table: sourceTable,
180
+ source_key: value.source_key
181
+ });
182
+ persistedBatch.deleteCurrentData({
183
+ // This is serialized since we got it from a DB query
184
+ serialized_source_key: value.source_key,
185
+ source_table_id: sourceTable.id
186
+ });
187
+ }
188
+ }
189
+ await persistedBatch.flush(db);
190
+ });
191
+ }
192
+ if (processedCount == 0) {
193
+ // The op sequence should not have progressed
194
+ return null;
195
+ }
196
+
197
+ const currentSequence = await this.db.sql`
198
+ SELECT
199
+ LAST_VALUE AS value
200
+ FROM
201
+ op_id_sequence;
202
+ `.first<{ value: bigint }>();
203
+ return currentSequence!.value;
204
+ }
205
+
206
+ async drop(sourceTables: storage.SourceTable[]): Promise<storage.FlushedResult | null> {
207
+ await this.truncate(sourceTables);
208
+ const result = await this.flush();
209
+
210
+ await this.db.transaction(async (db) => {
211
+ for (const table of sourceTables) {
212
+ await db.sql`
213
+ DELETE FROM source_tables
214
+ WHERE
215
+ id = ${{ type: 'varchar', value: table.id }}
216
+ `.execute();
217
+ }
218
+ });
219
+ return result;
220
+ }
221
+
222
+ async flush(): Promise<storage.FlushedResult | null> {
223
+ let result: storage.FlushedResult | null = null;
224
+ // One flush may be split over multiple transactions.
225
+ // Each flushInner() is one transaction.
226
+ while (this.batch != null) {
227
+ let r = await this.flushInner();
228
+ if (r) {
229
+ result = r;
230
+ }
231
+ }
232
+ await batchCreateCustomWriteCheckpoints(this.db, this.write_checkpoint_batch);
233
+ this.write_checkpoint_batch = [];
234
+ return result;
235
+ }
236
+
237
+ private async flushInner(): Promise<storage.FlushedResult | null> {
238
+ const batch = this.batch;
239
+ if (batch == null) {
240
+ return null;
241
+ }
242
+
243
+ let resumeBatch: OperationBatch | null = null;
244
+
245
+ const lastOp = await this.withReplicationTransaction(async (db) => {
246
+ resumeBatch = await this.replicateBatch(db, batch);
247
+
248
+ const sequence = await db.sql`
249
+ SELECT
250
+ LAST_VALUE AS value
251
+ FROM
252
+ op_id_sequence;
253
+ `.first<{ value: bigint }>();
254
+ return sequence!.value;
255
+ });
256
+
257
+ // null if done, set if we need another flush
258
+ this.batch = resumeBatch;
259
+
260
+ if (lastOp == null) {
261
+ throw new Error('Unexpected last_op == null');
262
+ }
263
+
264
+ this.persisted_op = lastOp;
265
+ this.last_flushed_op = lastOp;
266
+ return { flushed_op: String(lastOp) };
267
+ }
268
+
269
+ async commit(lsn: string): Promise<boolean> {
270
+ await this.flush();
271
+
272
+ if (this.last_checkpoint_lsn != null && lsn < this.last_checkpoint_lsn) {
273
+ // When re-applying transactions, don't create a new checkpoint until
274
+ // we are past the last transaction.
275
+ logger.info(`Re-applied transaction ${lsn} - skipping checkpoint`);
276
+ return false;
277
+ }
278
+
279
+ if (lsn < this.no_checkpoint_before_lsn) {
280
+ if (Date.now() - this.lastWaitingLogThrottled > 5_000) {
281
+ logger.info(
282
+ `Waiting until ${this.no_checkpoint_before_lsn} before creating checkpoint, currently at ${lsn}. Persisted op: ${this.persisted_op}`
283
+ );
284
+ this.lastWaitingLogThrottled = Date.now();
285
+ }
286
+
287
+ // Edge case: During initial replication, we have a no_checkpoint_before_lsn set,
288
+ // and don't actually commit the snapshot.
289
+ // The first commit can happen from an implicit keepalive message.
290
+ // That needs the persisted_op to get an accurate checkpoint, so
291
+ // we persist that in keepalive_op.
292
+
293
+ await this.db.sql`
294
+ UPDATE sync_rules
295
+ SET
296
+ keepalive_op = ${{ type: 'int8', value: this.persisted_op }}
297
+ WHERE
298
+ id = ${{ type: 'int4', value: this.group_id }}
299
+ `.execute();
300
+
301
+ return false;
302
+ }
303
+ const now = new Date().toISOString();
304
+ const update: Partial<models.SyncRules> = {
305
+ last_checkpoint_lsn: lsn,
306
+ last_checkpoint_ts: now,
307
+ last_keepalive_ts: now,
308
+ snapshot_done: true,
309
+ last_fatal_error: null,
310
+ keepalive_op: null
311
+ };
312
+
313
+ if (this.persisted_op != null) {
314
+ update.last_checkpoint = this.persisted_op.toString();
315
+ }
316
+
317
+ const doc = await this.db.sql`
318
+ UPDATE sync_rules
319
+ SET
320
+ keepalive_op = ${{ type: 'int8', value: update.keepalive_op }},
321
+ last_fatal_error = ${{ type: 'varchar', value: update.last_fatal_error }},
322
+ snapshot_done = ${{ type: 'bool', value: update.snapshot_done }},
323
+ last_keepalive_ts = ${{ type: 1184, value: update.last_keepalive_ts }},
324
+ last_checkpoint = COALESCE(
325
+ ${{ type: 'int8', value: update.last_checkpoint }},
326
+ last_checkpoint
327
+ ),
328
+ last_checkpoint_ts = ${{ type: 1184, value: update.last_checkpoint_ts }},
329
+ last_checkpoint_lsn = ${{ type: 'varchar', value: update.last_checkpoint_lsn }}
330
+ WHERE
331
+ id = ${{ type: 'int4', value: this.group_id }}
332
+ RETURNING
333
+ id,
334
+ state,
335
+ last_checkpoint,
336
+ last_checkpoint_lsn
337
+ `
338
+ .decoded(StatefulCheckpoint)
339
+ .first();
340
+
341
+ await notifySyncRulesUpdate(this.db, doc!);
342
+
343
+ this.persisted_op = null;
344
+ this.last_checkpoint_lsn = lsn;
345
+ return true;
346
+ }
347
+
348
+ async keepalive(lsn: string): Promise<boolean> {
349
+ if (this.last_checkpoint_lsn != null && lsn <= this.last_checkpoint_lsn) {
350
+ // No-op
351
+ return false;
352
+ }
353
+
354
+ if (lsn < this.no_checkpoint_before_lsn) {
355
+ return false;
356
+ }
357
+
358
+ if (this.persisted_op != null) {
359
+ // The commit may have been skipped due to "no_checkpoint_before_lsn".
360
+ // Apply it now if relevant
361
+ logger.info(`Commit due to keepalive at ${lsn} / ${this.persisted_op}`);
362
+ return await this.commit(lsn);
363
+ }
364
+
365
+ const updated = await this.db.sql`
366
+ UPDATE sync_rules
367
+ SET
368
+ snapshot_done = ${{ type: 'bool', value: true }},
369
+ last_checkpoint_lsn = ${{ type: 'varchar', value: lsn }},
370
+ last_fatal_error = ${{ type: 'varchar', value: null }},
371
+ last_keepalive_ts = ${{ type: 1184, value: new Date().toISOString() }}
372
+ WHERE
373
+ id = ${{ type: 'int4', value: this.group_id }}
374
+ RETURNING
375
+ id,
376
+ state,
377
+ last_checkpoint,
378
+ last_checkpoint_lsn
379
+ `
380
+ .decoded(StatefulCheckpoint)
381
+ .first();
382
+
383
+ await notifySyncRulesUpdate(this.db, updated!);
384
+
385
+ this.last_checkpoint_lsn = lsn;
386
+ return true;
387
+ }
388
+
389
+ async markSnapshotDone(
390
+ tables: storage.SourceTable[],
391
+ no_checkpoint_before_lsn: string
392
+ ): Promise<storage.SourceTable[]> {
393
+ const ids = tables.map((table) => table.id.toString());
394
+
395
+ await this.db.transaction(async (db) => {
396
+ await db.sql`
397
+ UPDATE source_tables
398
+ SET
399
+ snapshot_done = ${{ type: 'bool', value: true }}
400
+ WHERE
401
+ id IN (
402
+ SELECT
403
+ (value ->> 0)::TEXT
404
+ FROM
405
+ jsonb_array_elements(${{ type: 'jsonb', value: ids }}) AS value
406
+ );
407
+ `.execute();
408
+
409
+ if (no_checkpoint_before_lsn > this.no_checkpoint_before_lsn) {
410
+ this.no_checkpoint_before_lsn = no_checkpoint_before_lsn;
411
+
412
+ await db.sql`
413
+ UPDATE sync_rules
414
+ SET
415
+ no_checkpoint_before = ${{ type: 'varchar', value: no_checkpoint_before_lsn }},
416
+ last_keepalive_ts = ${{ type: 1184, value: new Date().toISOString() }}
417
+ WHERE
418
+ id = ${{ type: 'int4', value: this.group_id }}
419
+ `.execute();
420
+ }
421
+ });
422
+ return tables.map((table) => {
423
+ const copy = new storage.SourceTable(
424
+ table.id,
425
+ table.connectionTag,
426
+ table.objectId,
427
+ table.schema,
428
+ table.table,
429
+ table.replicaIdColumns,
430
+ table.snapshotComplete
431
+ );
432
+ copy.syncData = table.syncData;
433
+ copy.syncParameters = table.syncParameters;
434
+ return copy;
435
+ });
436
+ }
437
+
438
+ addCustomWriteCheckpoint(checkpoint: storage.BatchedCustomWriteCheckpointOptions): void {
439
+ this.write_checkpoint_batch.push({
440
+ ...checkpoint,
441
+ sync_rules_id: this.group_id
442
+ });
443
+ }
444
+
445
+ protected async replicateBatch(db: lib_postgres.WrappedConnection, batch: OperationBatch) {
446
+ let sizes: Map<string, number> | undefined = undefined;
447
+ if (this.options.store_current_data && !this.options.skip_existing_rows) {
448
+ // We skip this step if we don't store current_data, since the sizes will
449
+ // always be small in that case.
450
+
451
+ // With skipExistingRows, we don't load the full documents into memory,
452
+ // so we can also skip the size lookup step.
453
+
454
+ // Find sizes of current_data documents, to assist in intelligent batching without
455
+ // exceeding memory limits.
456
+ const sizeLookups = batch.batch.map((r) => {
457
+ return {
458
+ source_table: r.record.sourceTable.id.toString(),
459
+ /**
460
+ * Encode to hex in order to pass a jsonb
461
+ */
462
+ source_key: storage.serializeReplicaId(r.beforeId).toString('hex')
463
+ };
464
+ });
465
+
466
+ sizes = new Map<string, number>();
467
+
468
+ for await (const rows of db.streamRows<{
469
+ source_table: string;
470
+ source_key: storage.ReplicaId;
471
+ data_size: number;
472
+ }>(lib_postgres.sql`
473
+ WITH
474
+ filter_data AS (
475
+ SELECT
476
+ decode(FILTER ->> 'source_key', 'hex') AS source_key, -- Decoding from hex to bytea
477
+ (FILTER ->> 'source_table') AS source_table_id
478
+ FROM
479
+ jsonb_array_elements(${{ type: 'jsonb', value: sizeLookups }}::jsonb) AS FILTER
480
+ )
481
+ SELECT
482
+ pg_column_size(c.data) AS data_size,
483
+ c.source_table,
484
+ c.source_key
485
+ FROM
486
+ current_data c
487
+ JOIN filter_data f ON c.source_table = f.source_table_id
488
+ AND c.source_key = f.source_key
489
+ WHERE
490
+ c.group_id = ${{ type: 'int4', value: this.group_id }}
491
+ `)) {
492
+ for (const row of rows) {
493
+ const key = cacheKey(row.source_table, row.source_key);
494
+ sizes.set(key, row.data_size);
495
+ }
496
+ }
497
+ }
498
+
499
+ // If set, we need to start a new transaction with this batch.
500
+ let resumeBatch: OperationBatch | null = null;
501
+
502
+ // Now batch according to the sizes
503
+ // This is a single batch if storeCurrentData == false
504
+ for await (const b of batch.batched(sizes)) {
505
+ if (resumeBatch) {
506
+ // These operations need to be completed in a new transaction.
507
+ for (let op of b) {
508
+ resumeBatch.push(op);
509
+ }
510
+ continue;
511
+ }
512
+
513
+ const lookups = b.map((r) => {
514
+ return {
515
+ source_table: r.record.sourceTable.id,
516
+ source_key: storage.serializeReplicaId(r.beforeId).toString('hex')
517
+ };
518
+ });
519
+
520
+ const current_data_lookup = new Map<string, CurrentDataDecoded>();
521
+ for await (const currentDataRows of db.streamRows<CurrentData>({
522
+ statement: /* sql */ `
523
+ WITH
524
+ filter_data AS (
525
+ SELECT
526
+ decode(FILTER ->> 'source_key', 'hex') AS source_key, -- Decoding from hex to bytea
527
+ (FILTER ->> 'source_table') AS source_table_id
528
+ FROM
529
+ jsonb_array_elements($1::jsonb) AS FILTER
530
+ )
531
+ SELECT
532
+ --- With skipExistingRows, we only need to know whether or not the row exists.
533
+ ${this.options.skip_existing_rows ? `c.source_table, c.source_key` : 'c.*'}
534
+ FROM
535
+ current_data c
536
+ JOIN filter_data f ON c.source_table = f.source_table_id
537
+ AND c.source_key = f.source_key
538
+ WHERE
539
+ c.group_id = $2
540
+ `,
541
+ params: [
542
+ {
543
+ type: 'jsonb',
544
+ value: lookups
545
+ },
546
+ {
547
+ type: 'int8',
548
+ value: this.group_id
549
+ }
550
+ ]
551
+ })) {
552
+ for (const row of currentDataRows) {
553
+ const decoded = this.options.skip_existing_rows
554
+ ? pick(CurrentData, ['source_key', 'source_table']).decode(row)
555
+ : CurrentData.decode(row);
556
+ current_data_lookup.set(
557
+ encodedCacheKey(decoded.source_table, decoded.source_key),
558
+ decoded as CurrentDataDecoded
559
+ );
560
+ }
561
+ }
562
+
563
+ let persistedBatch: PostgresPersistedBatch | null = new PostgresPersistedBatch({
564
+ group_id: this.group_id,
565
+ ...this.options.batch_limits
566
+ });
567
+
568
+ for (const op of b) {
569
+ // These operations need to be completed in a new transaction
570
+ if (resumeBatch) {
571
+ resumeBatch.push(op);
572
+ continue;
573
+ }
574
+
575
+ const currentData = current_data_lookup.get(op.internalBeforeKey) ?? null;
576
+ if (currentData != null) {
577
+ // If it will be used again later, it will be set again using nextData below
578
+ current_data_lookup.delete(op.internalBeforeKey);
579
+ }
580
+ const nextData = await this.saveOperation(persistedBatch!, op, currentData);
581
+ if (nextData != null) {
582
+ // Update our current_data and size cache
583
+ current_data_lookup.set(op.internalAfterKey!, nextData);
584
+ sizes?.set(op.internalAfterKey!, nextData.data.byteLength);
585
+ }
586
+
587
+ if (persistedBatch!.shouldFlushTransaction()) {
588
+ await persistedBatch!.flush(db);
589
+ // The operations stored in this batch will be processed in the `resumeBatch`
590
+ persistedBatch = null;
591
+ // Return the remaining entries for the next resume transaction
592
+ resumeBatch = new OperationBatch(this.options.batch_limits);
593
+ }
594
+ }
595
+
596
+ if (persistedBatch) {
597
+ /**
598
+ * The operations were less than the max size if here. Flush now.
599
+ * `persistedBatch` will be `null` if the operations should be flushed in a new transaction.
600
+ */
601
+ await persistedBatch.flush(db);
602
+ }
603
+ }
604
+ return resumeBatch;
605
+ }
606
+
607
+ protected async saveOperation(
608
+ persistedBatch: PostgresPersistedBatch,
609
+ operation: RecordOperation,
610
+ currentData?: CurrentDataDecoded | null
611
+ ) {
612
+ const record = operation.record;
613
+ // We store bytea colums for source keys
614
+ const beforeId = operation.beforeId;
615
+ const afterId = operation.afterId;
616
+ let after = record.after;
617
+ const sourceTable = record.sourceTable;
618
+
619
+ let existingBuckets: CurrentBucket[] = [];
620
+ let newBuckets: CurrentBucket[] = [];
621
+ let existingLookups: Buffer[] = [];
622
+ let newLookups: Buffer[] = [];
623
+
624
+ if (this.options.skip_existing_rows) {
625
+ if (record.tag == storage.SaveOperationTag.INSERT) {
626
+ if (currentData != null) {
627
+ // Initial replication, and we already have the record.
628
+ // This may be a different version of the record, but streaming replication
629
+ // will take care of that.
630
+ // Skip the insert here.
631
+ return null;
632
+ }
633
+ } else {
634
+ throw new Error(`${record.tag} not supported with skipExistingRows: true`);
635
+ }
636
+ }
637
+
638
+ if (record.tag == storage.SaveOperationTag.UPDATE) {
639
+ const result = currentData;
640
+ if (result == null) {
641
+ // Not an error if we re-apply a transaction
642
+ existingBuckets = [];
643
+ existingLookups = [];
644
+ // Log to help with debugging if there was a consistency issue
645
+ if (this.options.store_current_data) {
646
+ logger.warn(
647
+ `Cannot find previous record for update on ${record.sourceTable.qualifiedName}: ${beforeId} / ${record.before?.id}`
648
+ );
649
+ }
650
+ } else {
651
+ existingBuckets = result.buckets;
652
+ existingLookups = result.lookups;
653
+ if (this.options.store_current_data) {
654
+ const data = storage.deserializeBson(result.data) as sync_rules.SqliteRow;
655
+ after = storage.mergeToast(after!, data);
656
+ }
657
+ }
658
+ } else if (record.tag == storage.SaveOperationTag.DELETE) {
659
+ const result = currentData;
660
+ if (result == null) {
661
+ // Not an error if we re-apply a transaction
662
+ existingBuckets = [];
663
+ existingLookups = [];
664
+ // Log to help with debugging if there was a consistency issue
665
+ if (this.options.store_current_data) {
666
+ logger.warn(
667
+ `Cannot find previous record for delete on ${record.sourceTable.qualifiedName}: ${beforeId} / ${record.before?.id}`
668
+ );
669
+ }
670
+ } else {
671
+ existingBuckets = result.buckets;
672
+ existingLookups = result.lookups;
673
+ }
674
+ }
675
+
676
+ let afterData: Buffer | undefined;
677
+ if (afterId != null && !this.options.store_current_data) {
678
+ afterData = storage.serializeBson({});
679
+ } else if (afterId != null) {
680
+ try {
681
+ afterData = storage.serializeBson(after);
682
+ if (afterData!.byteLength > MAX_ROW_SIZE) {
683
+ throw new Error(`Row too large: ${afterData?.byteLength}`);
684
+ }
685
+ } catch (e) {
686
+ // Replace with empty values, equivalent to TOAST values
687
+ after = Object.fromEntries(
688
+ Object.entries(after!).map(([key, value]) => {
689
+ return [key, undefined];
690
+ })
691
+ );
692
+ afterData = storage.serializeBson(after);
693
+
694
+ container.reporter.captureMessage(
695
+ `Data too big on ${record.sourceTable.qualifiedName}.${record.after?.id}: ${e.message}`,
696
+ {
697
+ level: errors.ErrorSeverity.WARNING,
698
+ metadata: {
699
+ replication_slot: this.options.slot_name,
700
+ table: record.sourceTable.qualifiedName
701
+ }
702
+ }
703
+ );
704
+ }
705
+ }
706
+
707
+ // 2. Save bucket data
708
+ if (beforeId != null && (afterId == null || !storage.replicaIdEquals(beforeId, afterId))) {
709
+ // Source ID updated
710
+ if (sourceTable.syncData) {
711
+ // Delete old record
712
+ persistedBatch.saveBucketData({
713
+ source_key: beforeId,
714
+ table: sourceTable,
715
+ before_buckets: existingBuckets,
716
+ evaluated: []
717
+ });
718
+ // Clear this, so we don't also try to REMOVE for the new id
719
+ existingBuckets = [];
720
+ }
721
+
722
+ if (sourceTable.syncParameters) {
723
+ // Delete old parameters
724
+ persistedBatch.saveParameterData({
725
+ source_key: beforeId,
726
+ table: sourceTable,
727
+ evaluated: [],
728
+ existing_lookups: existingLookups
729
+ });
730
+ existingLookups = [];
731
+ }
732
+ }
733
+
734
+ // If we re-apply a transaction, we can end up with a partial row.
735
+ //
736
+ // We may end up with toasted values, which means the record is not quite valid.
737
+ // However, it will be valid by the end of the transaction.
738
+ //
739
+ // In this case, we don't save the op, but we do save the current data.
740
+ if (afterId && after && utils.isCompleteRow(this.options.store_current_data, after)) {
741
+ // Insert or update
742
+ if (sourceTable.syncData) {
743
+ const { results: evaluated, errors: syncErrors } = this.sync_rules.evaluateRowWithErrors({
744
+ record: after,
745
+ sourceTable
746
+ });
747
+
748
+ for (const error of syncErrors) {
749
+ container.reporter.captureMessage(
750
+ `Failed to evaluate data query on ${record.sourceTable.qualifiedName}.${record.after?.id}: ${error.error}`,
751
+ {
752
+ level: errors.ErrorSeverity.WARNING,
753
+ metadata: {
754
+ replication_slot: this.options.slot_name,
755
+ table: record.sourceTable.qualifiedName
756
+ }
757
+ }
758
+ );
759
+ logger.error(
760
+ `Failed to evaluate data query on ${record.sourceTable.qualifiedName}.${record.after?.id}: ${error.error}`
761
+ );
762
+ }
763
+
764
+ // Save new one
765
+ persistedBatch.saveBucketData({
766
+ source_key: afterId,
767
+ evaluated,
768
+ table: sourceTable,
769
+ before_buckets: existingBuckets
770
+ });
771
+
772
+ newBuckets = evaluated.map((e) => {
773
+ return {
774
+ bucket: e.bucket,
775
+ table: e.table,
776
+ id: e.id
777
+ };
778
+ });
779
+ }
780
+
781
+ if (sourceTable.syncParameters) {
782
+ // Parameters
783
+ const { results: paramEvaluated, errors: paramErrors } = this.sync_rules.evaluateParameterRowWithErrors(
784
+ sourceTable,
785
+ after
786
+ );
787
+
788
+ for (let error of paramErrors) {
789
+ container.reporter.captureMessage(
790
+ `Failed to evaluate parameter query on ${record.sourceTable.qualifiedName}.${record.after?.id}: ${error.error}`,
791
+ {
792
+ level: errors.ErrorSeverity.WARNING,
793
+ metadata: {
794
+ replication_slot: this.options.slot_name,
795
+ table: record.sourceTable.qualifiedName
796
+ }
797
+ }
798
+ );
799
+ logger.error(
800
+ `Failed to evaluate parameter query on ${record.sourceTable.qualifiedName}.${after.id}: ${error.error}`
801
+ );
802
+ }
803
+
804
+ persistedBatch.saveParameterData({
805
+ source_key: afterId,
806
+ table: sourceTable,
807
+ evaluated: paramEvaluated,
808
+ existing_lookups: existingLookups
809
+ });
810
+
811
+ newLookups = paramEvaluated.map((p) => {
812
+ return storage.serializeLookupBuffer(p.lookup);
813
+ });
814
+ }
815
+ }
816
+
817
+ let result: CurrentDataDecoded | null = null;
818
+
819
+ // 5. TOAST: Update current data and bucket list.
820
+ if (afterId) {
821
+ // Insert or update
822
+ result = {
823
+ source_key: afterId,
824
+ group_id: this.group_id,
825
+ data: afterData!,
826
+ source_table: sourceTable.id,
827
+ buckets: newBuckets,
828
+ lookups: newLookups
829
+ };
830
+ persistedBatch.upsertCurrentData(result);
831
+ }
832
+
833
+ if (afterId == null || !storage.replicaIdEquals(beforeId, afterId)) {
834
+ // Either a delete (afterId == null), or replaced the old replication id
835
+ persistedBatch.deleteCurrentData({
836
+ source_table_id: record.sourceTable.id,
837
+ source_key: beforeId!
838
+ });
839
+ }
840
+
841
+ return result;
842
+ }
843
+
844
+ /**
845
+ * Gets relevant {@link SqlEventDescriptor}s for the given {@link SourceTable}
846
+ * TODO maybe share this with an abstract class
847
+ */
848
+ protected getTableEvents(table: storage.SourceTable): sync_rules.SqlEventDescriptor[] {
849
+ return this.sync_rules.event_descriptors.filter((evt) =>
850
+ [...evt.getSourceTables()].some((sourceTable) => sourceTable.matches(table))
851
+ );
852
+ }
853
+
854
+ protected async withReplicationTransaction<T>(
855
+ callback: (tx: lib_postgres.WrappedConnection) => Promise<T>
856
+ ): Promise<T> {
857
+ try {
858
+ return await this.db.transaction(async (db) => {
859
+ return await callback(db);
860
+ });
861
+ } finally {
862
+ await this.db.sql`
863
+ UPDATE sync_rules
864
+ SET
865
+ last_keepalive_ts = ${{ type: 1184, value: new Date().toISOString() }}
866
+ WHERE
867
+ id = ${{ type: 'int4', value: this.group_id }}
868
+ `.execute();
869
+ }
870
+ }
871
+ }
872
+
873
+ /**
874
+ * Uses Postgres' NOTIFY functionality to update different processes when the
875
+ * active checkpoint has been updated.
876
+ */
877
+ export const notifySyncRulesUpdate = async (db: lib_postgres.DatabaseClient, update: StatefulCheckpointDecoded) => {
878
+ if (update.state != storage.SyncRuleState.ACTIVE) {
879
+ return;
880
+ }
881
+
882
+ await db.query({
883
+ statement: `NOTIFY ${NOTIFICATION_CHANNEL}, '${models.ActiveCheckpointNotification.encode({ active_checkpoint: update })}'`
884
+ });
885
+ };