@powersync/service-module-postgres-storage 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (157) hide show
  1. package/CHANGELOG.md +17 -0
  2. package/LICENSE +67 -0
  3. package/README.md +67 -0
  4. package/dist/.tsbuildinfo +1 -0
  5. package/dist/@types/index.d.ts +7 -0
  6. package/dist/@types/migrations/PostgresMigrationAgent.d.ts +12 -0
  7. package/dist/@types/migrations/PostgresMigrationStore.d.ts +14 -0
  8. package/dist/@types/migrations/migration-utils.d.ts +3 -0
  9. package/dist/@types/migrations/scripts/1684951997326-init.d.ts +3 -0
  10. package/dist/@types/module/PostgresStorageModule.d.ts +6 -0
  11. package/dist/@types/storage/PostgresBucketStorageFactory.d.ts +42 -0
  12. package/dist/@types/storage/PostgresCompactor.d.ts +40 -0
  13. package/dist/@types/storage/PostgresStorageProvider.d.ts +5 -0
  14. package/dist/@types/storage/PostgresSyncRulesStorage.d.ts +46 -0
  15. package/dist/@types/storage/PostgresTestStorageFactoryGenerator.d.ts +13 -0
  16. package/dist/@types/storage/batch/OperationBatch.d.ts +47 -0
  17. package/dist/@types/storage/batch/PostgresBucketBatch.d.ts +90 -0
  18. package/dist/@types/storage/batch/PostgresPersistedBatch.d.ts +64 -0
  19. package/dist/@types/storage/checkpoints/PostgresWriteCheckpointAPI.d.ts +20 -0
  20. package/dist/@types/storage/storage-index.d.ts +5 -0
  21. package/dist/@types/storage/sync-rules/PostgresPersistedSyncRulesContent.d.ts +17 -0
  22. package/dist/@types/types/codecs.d.ts +61 -0
  23. package/dist/@types/types/models/ActiveCheckpoint.d.ts +12 -0
  24. package/dist/@types/types/models/ActiveCheckpointNotification.d.ts +19 -0
  25. package/dist/@types/types/models/BucketData.d.ts +22 -0
  26. package/dist/@types/types/models/BucketParameters.d.ts +11 -0
  27. package/dist/@types/types/models/CurrentData.d.ts +22 -0
  28. package/dist/@types/types/models/Instance.d.ts +6 -0
  29. package/dist/@types/types/models/Migration.d.ts +12 -0
  30. package/dist/@types/types/models/SourceTable.d.ts +31 -0
  31. package/dist/@types/types/models/SyncRules.d.ts +47 -0
  32. package/dist/@types/types/models/WriteCheckpoint.d.ts +15 -0
  33. package/dist/@types/types/models/models-index.d.ts +10 -0
  34. package/dist/@types/types/types.d.ts +94 -0
  35. package/dist/@types/utils/bson.d.ts +6 -0
  36. package/dist/@types/utils/bucket-data.d.ts +18 -0
  37. package/dist/@types/utils/db.d.ts +8 -0
  38. package/dist/@types/utils/ts-codec.d.ts +5 -0
  39. package/dist/@types/utils/utils-index.d.ts +4 -0
  40. package/dist/index.js +8 -0
  41. package/dist/index.js.map +1 -0
  42. package/dist/migrations/PostgresMigrationAgent.js +36 -0
  43. package/dist/migrations/PostgresMigrationAgent.js.map +1 -0
  44. package/dist/migrations/PostgresMigrationStore.js +60 -0
  45. package/dist/migrations/PostgresMigrationStore.js.map +1 -0
  46. package/dist/migrations/migration-utils.js +13 -0
  47. package/dist/migrations/migration-utils.js.map +1 -0
  48. package/dist/migrations/scripts/1684951997326-init.js +196 -0
  49. package/dist/migrations/scripts/1684951997326-init.js.map +1 -0
  50. package/dist/module/PostgresStorageModule.js +23 -0
  51. package/dist/module/PostgresStorageModule.js.map +1 -0
  52. package/dist/storage/PostgresBucketStorageFactory.js +433 -0
  53. package/dist/storage/PostgresBucketStorageFactory.js.map +1 -0
  54. package/dist/storage/PostgresCompactor.js +298 -0
  55. package/dist/storage/PostgresCompactor.js.map +1 -0
  56. package/dist/storage/PostgresStorageProvider.js +35 -0
  57. package/dist/storage/PostgresStorageProvider.js.map +1 -0
  58. package/dist/storage/PostgresSyncRulesStorage.js +619 -0
  59. package/dist/storage/PostgresSyncRulesStorage.js.map +1 -0
  60. package/dist/storage/PostgresTestStorageFactoryGenerator.js +110 -0
  61. package/dist/storage/PostgresTestStorageFactoryGenerator.js.map +1 -0
  62. package/dist/storage/batch/OperationBatch.js +93 -0
  63. package/dist/storage/batch/OperationBatch.js.map +1 -0
  64. package/dist/storage/batch/PostgresBucketBatch.js +732 -0
  65. package/dist/storage/batch/PostgresBucketBatch.js.map +1 -0
  66. package/dist/storage/batch/PostgresPersistedBatch.js +367 -0
  67. package/dist/storage/batch/PostgresPersistedBatch.js.map +1 -0
  68. package/dist/storage/checkpoints/PostgresWriteCheckpointAPI.js +148 -0
  69. package/dist/storage/checkpoints/PostgresWriteCheckpointAPI.js.map +1 -0
  70. package/dist/storage/storage-index.js +6 -0
  71. package/dist/storage/storage-index.js.map +1 -0
  72. package/dist/storage/sync-rules/PostgresPersistedSyncRulesContent.js +58 -0
  73. package/dist/storage/sync-rules/PostgresPersistedSyncRulesContent.js.map +1 -0
  74. package/dist/types/codecs.js +97 -0
  75. package/dist/types/codecs.js.map +1 -0
  76. package/dist/types/models/ActiveCheckpoint.js +12 -0
  77. package/dist/types/models/ActiveCheckpoint.js.map +1 -0
  78. package/dist/types/models/ActiveCheckpointNotification.js +8 -0
  79. package/dist/types/models/ActiveCheckpointNotification.js.map +1 -0
  80. package/dist/types/models/BucketData.js +23 -0
  81. package/dist/types/models/BucketData.js.map +1 -0
  82. package/dist/types/models/BucketParameters.js +11 -0
  83. package/dist/types/models/BucketParameters.js.map +1 -0
  84. package/dist/types/models/CurrentData.js +16 -0
  85. package/dist/types/models/CurrentData.js.map +1 -0
  86. package/dist/types/models/Instance.js +5 -0
  87. package/dist/types/models/Instance.js.map +1 -0
  88. package/dist/types/models/Migration.js +12 -0
  89. package/dist/types/models/Migration.js.map +1 -0
  90. package/dist/types/models/SourceTable.js +24 -0
  91. package/dist/types/models/SourceTable.js.map +1 -0
  92. package/dist/types/models/SyncRules.js +47 -0
  93. package/dist/types/models/SyncRules.js.map +1 -0
  94. package/dist/types/models/WriteCheckpoint.js +13 -0
  95. package/dist/types/models/WriteCheckpoint.js.map +1 -0
  96. package/dist/types/models/models-index.js +11 -0
  97. package/dist/types/models/models-index.js.map +1 -0
  98. package/dist/types/types.js +46 -0
  99. package/dist/types/types.js.map +1 -0
  100. package/dist/utils/bson.js +16 -0
  101. package/dist/utils/bson.js.map +1 -0
  102. package/dist/utils/bucket-data.js +25 -0
  103. package/dist/utils/bucket-data.js.map +1 -0
  104. package/dist/utils/db.js +24 -0
  105. package/dist/utils/db.js.map +1 -0
  106. package/dist/utils/ts-codec.js +11 -0
  107. package/dist/utils/ts-codec.js.map +1 -0
  108. package/dist/utils/utils-index.js +5 -0
  109. package/dist/utils/utils-index.js.map +1 -0
  110. package/package.json +50 -0
  111. package/src/index.ts +10 -0
  112. package/src/migrations/PostgresMigrationAgent.ts +46 -0
  113. package/src/migrations/PostgresMigrationStore.ts +70 -0
  114. package/src/migrations/migration-utils.ts +14 -0
  115. package/src/migrations/scripts/1684951997326-init.ts +141 -0
  116. package/src/module/PostgresStorageModule.ts +30 -0
  117. package/src/storage/PostgresBucketStorageFactory.ts +496 -0
  118. package/src/storage/PostgresCompactor.ts +366 -0
  119. package/src/storage/PostgresStorageProvider.ts +42 -0
  120. package/src/storage/PostgresSyncRulesStorage.ts +666 -0
  121. package/src/storage/PostgresTestStorageFactoryGenerator.ts +61 -0
  122. package/src/storage/batch/OperationBatch.ts +101 -0
  123. package/src/storage/batch/PostgresBucketBatch.ts +885 -0
  124. package/src/storage/batch/PostgresPersistedBatch.ts +441 -0
  125. package/src/storage/checkpoints/PostgresWriteCheckpointAPI.ts +176 -0
  126. package/src/storage/storage-index.ts +5 -0
  127. package/src/storage/sync-rules/PostgresPersistedSyncRulesContent.ts +67 -0
  128. package/src/types/codecs.ts +136 -0
  129. package/src/types/models/ActiveCheckpoint.ts +15 -0
  130. package/src/types/models/ActiveCheckpointNotification.ts +14 -0
  131. package/src/types/models/BucketData.ts +26 -0
  132. package/src/types/models/BucketParameters.ts +14 -0
  133. package/src/types/models/CurrentData.ts +23 -0
  134. package/src/types/models/Instance.ts +8 -0
  135. package/src/types/models/Migration.ts +19 -0
  136. package/src/types/models/SourceTable.ts +32 -0
  137. package/src/types/models/SyncRules.ts +50 -0
  138. package/src/types/models/WriteCheckpoint.ts +20 -0
  139. package/src/types/models/models-index.ts +10 -0
  140. package/src/types/types.ts +73 -0
  141. package/src/utils/bson.ts +17 -0
  142. package/src/utils/bucket-data.ts +25 -0
  143. package/src/utils/db.ts +27 -0
  144. package/src/utils/ts-codec.ts +14 -0
  145. package/src/utils/utils-index.ts +4 -0
  146. package/test/src/__snapshots__/storage.test.ts.snap +9 -0
  147. package/test/src/__snapshots__/storage_sync.test.ts.snap +332 -0
  148. package/test/src/env.ts +6 -0
  149. package/test/src/migrations.test.ts +34 -0
  150. package/test/src/setup.ts +16 -0
  151. package/test/src/storage.test.ts +131 -0
  152. package/test/src/storage_compacting.test.ts +5 -0
  153. package/test/src/storage_sync.test.ts +12 -0
  154. package/test/src/util.ts +34 -0
  155. package/test/tsconfig.json +20 -0
  156. package/tsconfig.json +36 -0
  157. package/vitest.config.ts +13 -0
@@ -0,0 +1,732 @@
1
+ import * as lib_postgres from '@powersync/lib-service-postgres';
2
+ import { container, DisposableObserver, errors, logger } from '@powersync/lib-services-framework';
3
+ import { storage, utils } from '@powersync/service-core';
4
+ import * as timers from 'timers/promises';
5
+ import * as t from 'ts-codec';
6
+ import { CurrentData } from '../../types/models/CurrentData.js';
7
+ import { models } from '../../types/types.js';
8
+ import { NOTIFICATION_CHANNEL, sql } from '../../utils/db.js';
9
+ import { pick } from '../../utils/ts-codec.js';
10
+ import { batchCreateCustomWriteCheckpoints } from '../checkpoints/PostgresWriteCheckpointAPI.js';
11
+ import { cacheKey, encodedCacheKey, OperationBatch, RecordOperation } from './OperationBatch.js';
12
+ import { PostgresPersistedBatch } from './PostgresPersistedBatch.js';
13
+ /**
14
+ * Intermediate type which helps for only watching the active sync rules
15
+ * via the Postgres NOTIFY protocol.
16
+ */
17
+ const StatefulCheckpoint = models.ActiveCheckpoint.and(t.object({ state: t.Enum(storage.SyncRuleState) }));
18
+ /**
19
+ * 15MB. Currently matches MongoDB.
20
+ * This could be increased in future.
21
+ */
22
+ const MAX_ROW_SIZE = 15 * 1024 * 1024;
23
+ export class PostgresBucketBatch extends DisposableObserver {
24
+ options;
25
+ last_flushed_op = null;
26
+ db;
27
+ group_id;
28
+ last_checkpoint_lsn;
29
+ no_checkpoint_before_lsn;
30
+ persisted_op;
31
+ write_checkpoint_batch;
32
+ sync_rules;
33
+ batch;
34
+ lastWaitingLogThrottled = 0;
35
+ constructor(options) {
36
+ super();
37
+ this.options = options;
38
+ this.db = options.db;
39
+ this.group_id = options.group_id;
40
+ this.last_checkpoint_lsn = options.last_checkpoint_lsn;
41
+ this.no_checkpoint_before_lsn = options.no_checkpoint_before_lsn;
42
+ this.write_checkpoint_batch = [];
43
+ this.sync_rules = options.sync_rules;
44
+ this.batch = null;
45
+ this.persisted_op = null;
46
+ if (options.keep_alive_op) {
47
+ this.persisted_op = options.keep_alive_op;
48
+ }
49
+ }
50
+ get lastCheckpointLsn() {
51
+ return this.last_checkpoint_lsn;
52
+ }
53
+ async save(record) {
54
+ // TODO maybe share with abstract class
55
+ const { after, afterReplicaId, before, beforeReplicaId, sourceTable, tag } = record;
56
+ for (const event of this.getTableEvents(sourceTable)) {
57
+ this.iterateListeners((cb) => cb.replicationEvent?.({
58
+ batch: this,
59
+ table: sourceTable,
60
+ data: {
61
+ op: tag,
62
+ after: after && utils.isCompleteRow(this.options.store_current_data, after) ? after : undefined,
63
+ before: before && utils.isCompleteRow(this.options.store_current_data, before) ? before : undefined
64
+ },
65
+ event
66
+ }));
67
+ }
68
+ /**
69
+ * Return if the table is just an event table
70
+ */
71
+ if (!sourceTable.syncData && !sourceTable.syncParameters) {
72
+ return null;
73
+ }
74
+ logger.debug(`Saving ${record.tag}:${record.before?.id}/${record.after?.id}`);
75
+ this.batch ??= new OperationBatch(this.options.batch_limits);
76
+ this.batch.push(new RecordOperation(record));
77
+ if (this.batch.shouldFlush()) {
78
+ const r = await this.flush();
79
+ // HACK: Give other streams a chance to also flush
80
+ await timers.setTimeout(5);
81
+ return r;
82
+ }
83
+ return null;
84
+ }
85
+ async truncate(sourceTables) {
86
+ await this.flush();
87
+ let last_op = null;
88
+ for (let table of sourceTables) {
89
+ last_op = await this.truncateSingle(table);
90
+ }
91
+ if (last_op) {
92
+ this.persisted_op = last_op;
93
+ }
94
+ return {
95
+ flushed_op: String(last_op)
96
+ };
97
+ }
98
+ async truncateSingle(sourceTable) {
99
+ // To avoid too large transactions, we limit the amount of data we delete per transaction.
100
+ // Since we don't use the record data here, we don't have explicit size limits per batch.
101
+ const BATCH_LIMIT = 2000;
102
+ let lastBatchCount = BATCH_LIMIT;
103
+ let processedCount = 0;
104
+ const codec = pick(models.CurrentData, ['buckets', 'lookups', 'source_key']);
105
+ while (lastBatchCount == BATCH_LIMIT) {
106
+ lastBatchCount = 0;
107
+ await this.withReplicationTransaction(async (db) => {
108
+ const persistedBatch = new PostgresPersistedBatch({
109
+ group_id: this.group_id,
110
+ ...this.options.batch_limits
111
+ });
112
+ for await (const rows of db.streamRows(sql `
113
+ SELECT
114
+ buckets,
115
+ lookups,
116
+ source_key
117
+ FROM
118
+ current_data
119
+ WHERE
120
+ group_id = ${{ type: 'int4', value: this.group_id }}
121
+ AND source_table = ${{ type: 'varchar', value: sourceTable.id }}
122
+ LIMIT
123
+ ${{ type: 'int4', value: BATCH_LIMIT }}
124
+ `)) {
125
+ lastBatchCount += rows.length;
126
+ processedCount += rows.length;
127
+ const decodedRows = rows.map((row) => codec.decode(row));
128
+ for (const value of decodedRows) {
129
+ persistedBatch.saveBucketData({
130
+ before_buckets: value.buckets,
131
+ evaluated: [],
132
+ table: sourceTable,
133
+ source_key: value.source_key
134
+ });
135
+ persistedBatch.saveParameterData({
136
+ existing_lookups: value.lookups,
137
+ evaluated: [],
138
+ table: sourceTable,
139
+ source_key: value.source_key
140
+ });
141
+ persistedBatch.deleteCurrentData({
142
+ // This is serialized since we got it from a DB query
143
+ serialized_source_key: value.source_key,
144
+ source_table_id: sourceTable.id
145
+ });
146
+ }
147
+ }
148
+ await persistedBatch.flush(db);
149
+ });
150
+ }
151
+ if (processedCount == 0) {
152
+ // The op sequence should not have progressed
153
+ return null;
154
+ }
155
+ const currentSequence = await this.db.sql `
156
+ SELECT
157
+ LAST_VALUE AS value
158
+ FROM
159
+ op_id_sequence;
160
+ `.first();
161
+ return currentSequence.value;
162
+ }
163
+ async drop(sourceTables) {
164
+ await this.truncate(sourceTables);
165
+ const result = await this.flush();
166
+ await this.db.transaction(async (db) => {
167
+ for (const table of sourceTables) {
168
+ await db.sql `
169
+ DELETE FROM source_tables
170
+ WHERE
171
+ id = ${{ type: 'varchar', value: table.id }}
172
+ `.execute();
173
+ }
174
+ });
175
+ return result;
176
+ }
177
+ async flush() {
178
+ let result = null;
179
+ // One flush may be split over multiple transactions.
180
+ // Each flushInner() is one transaction.
181
+ while (this.batch != null) {
182
+ let r = await this.flushInner();
183
+ if (r) {
184
+ result = r;
185
+ }
186
+ }
187
+ await batchCreateCustomWriteCheckpoints(this.db, this.write_checkpoint_batch);
188
+ this.write_checkpoint_batch = [];
189
+ return result;
190
+ }
191
+ async flushInner() {
192
+ const batch = this.batch;
193
+ if (batch == null) {
194
+ return null;
195
+ }
196
+ let resumeBatch = null;
197
+ const lastOp = await this.withReplicationTransaction(async (db) => {
198
+ resumeBatch = await this.replicateBatch(db, batch);
199
+ const sequence = await db.sql `
200
+ SELECT
201
+ LAST_VALUE AS value
202
+ FROM
203
+ op_id_sequence;
204
+ `.first();
205
+ return sequence.value;
206
+ });
207
+ // null if done, set if we need another flush
208
+ this.batch = resumeBatch;
209
+ if (lastOp == null) {
210
+ throw new Error('Unexpected last_op == null');
211
+ }
212
+ this.persisted_op = lastOp;
213
+ this.last_flushed_op = lastOp;
214
+ return { flushed_op: String(lastOp) };
215
+ }
216
+ async commit(lsn) {
217
+ await this.flush();
218
+ if (this.last_checkpoint_lsn != null && lsn < this.last_checkpoint_lsn) {
219
+ // When re-applying transactions, don't create a new checkpoint until
220
+ // we are past the last transaction.
221
+ logger.info(`Re-applied transaction ${lsn} - skipping checkpoint`);
222
+ return false;
223
+ }
224
+ if (lsn < this.no_checkpoint_before_lsn) {
225
+ if (Date.now() - this.lastWaitingLogThrottled > 5_000) {
226
+ logger.info(`Waiting until ${this.no_checkpoint_before_lsn} before creating checkpoint, currently at ${lsn}. Persisted op: ${this.persisted_op}`);
227
+ this.lastWaitingLogThrottled = Date.now();
228
+ }
229
+ // Edge case: During initial replication, we have a no_checkpoint_before_lsn set,
230
+ // and don't actually commit the snapshot.
231
+ // The first commit can happen from an implicit keepalive message.
232
+ // That needs the persisted_op to get an accurate checkpoint, so
233
+ // we persist that in keepalive_op.
234
+ await this.db.sql `
235
+ UPDATE sync_rules
236
+ SET
237
+ keepalive_op = ${{ type: 'int8', value: this.persisted_op }}
238
+ WHERE
239
+ id = ${{ type: 'int4', value: this.group_id }}
240
+ `.execute();
241
+ return false;
242
+ }
243
+ const now = new Date().toISOString();
244
+ const update = {
245
+ last_checkpoint_lsn: lsn,
246
+ last_checkpoint_ts: now,
247
+ last_keepalive_ts: now,
248
+ snapshot_done: true,
249
+ last_fatal_error: null,
250
+ keepalive_op: null
251
+ };
252
+ if (this.persisted_op != null) {
253
+ update.last_checkpoint = this.persisted_op.toString();
254
+ }
255
+ const doc = await this.db.sql `
256
+ UPDATE sync_rules
257
+ SET
258
+ keepalive_op = ${{ type: 'int8', value: update.keepalive_op }},
259
+ last_fatal_error = ${{ type: 'varchar', value: update.last_fatal_error }},
260
+ snapshot_done = ${{ type: 'bool', value: update.snapshot_done }},
261
+ last_keepalive_ts = ${{ type: 1184, value: update.last_keepalive_ts }},
262
+ last_checkpoint = COALESCE(
263
+ ${{ type: 'int8', value: update.last_checkpoint }},
264
+ last_checkpoint
265
+ ),
266
+ last_checkpoint_ts = ${{ type: 1184, value: update.last_checkpoint_ts }},
267
+ last_checkpoint_lsn = ${{ type: 'varchar', value: update.last_checkpoint_lsn }}
268
+ WHERE
269
+ id = ${{ type: 'int4', value: this.group_id }}
270
+ RETURNING
271
+ id,
272
+ state,
273
+ last_checkpoint,
274
+ last_checkpoint_lsn
275
+ `
276
+ .decoded(StatefulCheckpoint)
277
+ .first();
278
+ await notifySyncRulesUpdate(this.db, doc);
279
+ this.persisted_op = null;
280
+ this.last_checkpoint_lsn = lsn;
281
+ return true;
282
+ }
283
+ async keepalive(lsn) {
284
+ if (this.last_checkpoint_lsn != null && lsn <= this.last_checkpoint_lsn) {
285
+ // No-op
286
+ return false;
287
+ }
288
+ if (lsn < this.no_checkpoint_before_lsn) {
289
+ return false;
290
+ }
291
+ if (this.persisted_op != null) {
292
+ // The commit may have been skipped due to "no_checkpoint_before_lsn".
293
+ // Apply it now if relevant
294
+ logger.info(`Commit due to keepalive at ${lsn} / ${this.persisted_op}`);
295
+ return await this.commit(lsn);
296
+ }
297
+ const updated = await this.db.sql `
298
+ UPDATE sync_rules
299
+ SET
300
+ snapshot_done = ${{ type: 'bool', value: true }},
301
+ last_checkpoint_lsn = ${{ type: 'varchar', value: lsn }},
302
+ last_fatal_error = ${{ type: 'varchar', value: null }},
303
+ last_keepalive_ts = ${{ type: 1184, value: new Date().toISOString() }}
304
+ WHERE
305
+ id = ${{ type: 'int4', value: this.group_id }}
306
+ RETURNING
307
+ id,
308
+ state,
309
+ last_checkpoint,
310
+ last_checkpoint_lsn
311
+ `
312
+ .decoded(StatefulCheckpoint)
313
+ .first();
314
+ await notifySyncRulesUpdate(this.db, updated);
315
+ this.last_checkpoint_lsn = lsn;
316
+ return true;
317
+ }
318
+ async markSnapshotDone(tables, no_checkpoint_before_lsn) {
319
+ const ids = tables.map((table) => table.id.toString());
320
+ await this.db.transaction(async (db) => {
321
+ await db.sql `
322
+ UPDATE source_tables
323
+ SET
324
+ snapshot_done = ${{ type: 'bool', value: true }}
325
+ WHERE
326
+ id IN (
327
+ SELECT
328
+ (value ->> 0)::TEXT
329
+ FROM
330
+ jsonb_array_elements(${{ type: 'jsonb', value: ids }}) AS value
331
+ );
332
+ `.execute();
333
+ if (no_checkpoint_before_lsn > this.no_checkpoint_before_lsn) {
334
+ this.no_checkpoint_before_lsn = no_checkpoint_before_lsn;
335
+ await db.sql `
336
+ UPDATE sync_rules
337
+ SET
338
+ no_checkpoint_before = ${{ type: 'varchar', value: no_checkpoint_before_lsn }},
339
+ last_keepalive_ts = ${{ type: 1184, value: new Date().toISOString() }}
340
+ WHERE
341
+ id = ${{ type: 'int4', value: this.group_id }}
342
+ `.execute();
343
+ }
344
+ });
345
+ return tables.map((table) => {
346
+ const copy = new storage.SourceTable(table.id, table.connectionTag, table.objectId, table.schema, table.table, table.replicaIdColumns, table.snapshotComplete);
347
+ copy.syncData = table.syncData;
348
+ copy.syncParameters = table.syncParameters;
349
+ return copy;
350
+ });
351
+ }
352
+ addCustomWriteCheckpoint(checkpoint) {
353
+ this.write_checkpoint_batch.push({
354
+ ...checkpoint,
355
+ sync_rules_id: this.group_id
356
+ });
357
+ }
358
+ async replicateBatch(db, batch) {
359
+ let sizes = undefined;
360
+ if (this.options.store_current_data && !this.options.skip_existing_rows) {
361
+ // We skip this step if we don't store current_data, since the sizes will
362
+ // always be small in that case.
363
+ // With skipExistingRows, we don't load the full documents into memory,
364
+ // so we can also skip the size lookup step.
365
+ // Find sizes of current_data documents, to assist in intelligent batching without
366
+ // exceeding memory limits.
367
+ const sizeLookups = batch.batch.map((r) => {
368
+ return {
369
+ source_table: r.record.sourceTable.id.toString(),
370
+ /**
371
+ * Encode to hex in order to pass a jsonb
372
+ */
373
+ source_key: storage.serializeReplicaId(r.beforeId).toString('hex')
374
+ };
375
+ });
376
+ sizes = new Map();
377
+ for await (const rows of db.streamRows(lib_postgres.sql `
378
+ WITH
379
+ filter_data AS (
380
+ SELECT
381
+ decode(FILTER ->> 'source_key', 'hex') AS source_key, -- Decoding from hex to bytea
382
+ (FILTER ->> 'source_table') AS source_table_id
383
+ FROM
384
+ jsonb_array_elements(${{ type: 'jsonb', value: sizeLookups }}::jsonb) AS FILTER
385
+ )
386
+ SELECT
387
+ pg_column_size(c.data) AS data_size,
388
+ c.source_table,
389
+ c.source_key
390
+ FROM
391
+ current_data c
392
+ JOIN filter_data f ON c.source_table = f.source_table_id
393
+ AND c.source_key = f.source_key
394
+ WHERE
395
+ c.group_id = ${{ type: 'int4', value: this.group_id }}
396
+ `)) {
397
+ for (const row of rows) {
398
+ const key = cacheKey(row.source_table, row.source_key);
399
+ sizes.set(key, row.data_size);
400
+ }
401
+ }
402
+ }
403
+ // If set, we need to start a new transaction with this batch.
404
+ let resumeBatch = null;
405
+ // Now batch according to the sizes
406
+ // This is a single batch if storeCurrentData == false
407
+ for await (const b of batch.batched(sizes)) {
408
+ if (resumeBatch) {
409
+ // These operations need to be completed in a new transaction.
410
+ for (let op of b) {
411
+ resumeBatch.push(op);
412
+ }
413
+ continue;
414
+ }
415
+ const lookups = b.map((r) => {
416
+ return {
417
+ source_table: r.record.sourceTable.id,
418
+ source_key: storage.serializeReplicaId(r.beforeId).toString('hex')
419
+ };
420
+ });
421
+ const current_data_lookup = new Map();
422
+ for await (const currentDataRows of db.streamRows({
423
+ statement: /* sql */ `
424
+ WITH
425
+ filter_data AS (
426
+ SELECT
427
+ decode(FILTER ->> 'source_key', 'hex') AS source_key, -- Decoding from hex to bytea
428
+ (FILTER ->> 'source_table') AS source_table_id
429
+ FROM
430
+ jsonb_array_elements($1::jsonb) AS FILTER
431
+ )
432
+ SELECT
433
+ --- With skipExistingRows, we only need to know whether or not the row exists.
434
+ ${this.options.skip_existing_rows ? `c.source_table, c.source_key` : 'c.*'}
435
+ FROM
436
+ current_data c
437
+ JOIN filter_data f ON c.source_table = f.source_table_id
438
+ AND c.source_key = f.source_key
439
+ WHERE
440
+ c.group_id = $2
441
+ `,
442
+ params: [
443
+ {
444
+ type: 'jsonb',
445
+ value: lookups
446
+ },
447
+ {
448
+ type: 'int8',
449
+ value: this.group_id
450
+ }
451
+ ]
452
+ })) {
453
+ for (const row of currentDataRows) {
454
+ const decoded = this.options.skip_existing_rows
455
+ ? pick(CurrentData, ['source_key', 'source_table']).decode(row)
456
+ : CurrentData.decode(row);
457
+ current_data_lookup.set(encodedCacheKey(decoded.source_table, decoded.source_key), decoded);
458
+ }
459
+ }
460
+ let persistedBatch = new PostgresPersistedBatch({
461
+ group_id: this.group_id,
462
+ ...this.options.batch_limits
463
+ });
464
+ for (const op of b) {
465
+ // These operations need to be completed in a new transaction
466
+ if (resumeBatch) {
467
+ resumeBatch.push(op);
468
+ continue;
469
+ }
470
+ const currentData = current_data_lookup.get(op.internalBeforeKey) ?? null;
471
+ if (currentData != null) {
472
+ // If it will be used again later, it will be set again using nextData below
473
+ current_data_lookup.delete(op.internalBeforeKey);
474
+ }
475
+ const nextData = await this.saveOperation(persistedBatch, op, currentData);
476
+ if (nextData != null) {
477
+ // Update our current_data and size cache
478
+ current_data_lookup.set(op.internalAfterKey, nextData);
479
+ sizes?.set(op.internalAfterKey, nextData.data.byteLength);
480
+ }
481
+ if (persistedBatch.shouldFlushTransaction()) {
482
+ await persistedBatch.flush(db);
483
+ // The operations stored in this batch will be processed in the `resumeBatch`
484
+ persistedBatch = null;
485
+ // Return the remaining entries for the next resume transaction
486
+ resumeBatch = new OperationBatch(this.options.batch_limits);
487
+ }
488
+ }
489
+ if (persistedBatch) {
490
+ /**
491
+ * The operations were less than the max size if here. Flush now.
492
+ * `persistedBatch` will be `null` if the operations should be flushed in a new transaction.
493
+ */
494
+ await persistedBatch.flush(db);
495
+ }
496
+ }
497
+ return resumeBatch;
498
+ }
499
+ async saveOperation(persistedBatch, operation, currentData) {
500
+ const record = operation.record;
501
+ // We store bytea colums for source keys
502
+ const beforeId = operation.beforeId;
503
+ const afterId = operation.afterId;
504
+ let after = record.after;
505
+ const sourceTable = record.sourceTable;
506
+ let existingBuckets = [];
507
+ let newBuckets = [];
508
+ let existingLookups = [];
509
+ let newLookups = [];
510
+ if (this.options.skip_existing_rows) {
511
+ if (record.tag == storage.SaveOperationTag.INSERT) {
512
+ if (currentData != null) {
513
+ // Initial replication, and we already have the record.
514
+ // This may be a different version of the record, but streaming replication
515
+ // will take care of that.
516
+ // Skip the insert here.
517
+ return null;
518
+ }
519
+ }
520
+ else {
521
+ throw new Error(`${record.tag} not supported with skipExistingRows: true`);
522
+ }
523
+ }
524
+ if (record.tag == storage.SaveOperationTag.UPDATE) {
525
+ const result = currentData;
526
+ if (result == null) {
527
+ // Not an error if we re-apply a transaction
528
+ existingBuckets = [];
529
+ existingLookups = [];
530
+ // Log to help with debugging if there was a consistency issue
531
+ if (this.options.store_current_data) {
532
+ logger.warn(`Cannot find previous record for update on ${record.sourceTable.qualifiedName}: ${beforeId} / ${record.before?.id}`);
533
+ }
534
+ }
535
+ else {
536
+ existingBuckets = result.buckets;
537
+ existingLookups = result.lookups;
538
+ if (this.options.store_current_data) {
539
+ const data = storage.deserializeBson(result.data);
540
+ after = storage.mergeToast(after, data);
541
+ }
542
+ }
543
+ }
544
+ else if (record.tag == storage.SaveOperationTag.DELETE) {
545
+ const result = currentData;
546
+ if (result == null) {
547
+ // Not an error if we re-apply a transaction
548
+ existingBuckets = [];
549
+ existingLookups = [];
550
+ // Log to help with debugging if there was a consistency issue
551
+ if (this.options.store_current_data) {
552
+ logger.warn(`Cannot find previous record for delete on ${record.sourceTable.qualifiedName}: ${beforeId} / ${record.before?.id}`);
553
+ }
554
+ }
555
+ else {
556
+ existingBuckets = result.buckets;
557
+ existingLookups = result.lookups;
558
+ }
559
+ }
560
+ let afterData;
561
+ if (afterId != null && !this.options.store_current_data) {
562
+ afterData = storage.serializeBson({});
563
+ }
564
+ else if (afterId != null) {
565
+ try {
566
+ afterData = storage.serializeBson(after);
567
+ if (afterData.byteLength > MAX_ROW_SIZE) {
568
+ throw new Error(`Row too large: ${afterData?.byteLength}`);
569
+ }
570
+ }
571
+ catch (e) {
572
+ // Replace with empty values, equivalent to TOAST values
573
+ after = Object.fromEntries(Object.entries(after).map(([key, value]) => {
574
+ return [key, undefined];
575
+ }));
576
+ afterData = storage.serializeBson(after);
577
+ container.reporter.captureMessage(`Data too big on ${record.sourceTable.qualifiedName}.${record.after?.id}: ${e.message}`, {
578
+ level: errors.ErrorSeverity.WARNING,
579
+ metadata: {
580
+ replication_slot: this.options.slot_name,
581
+ table: record.sourceTable.qualifiedName
582
+ }
583
+ });
584
+ }
585
+ }
586
+ // 2. Save bucket data
587
+ if (beforeId != null && (afterId == null || !storage.replicaIdEquals(beforeId, afterId))) {
588
+ // Source ID updated
589
+ if (sourceTable.syncData) {
590
+ // Delete old record
591
+ persistedBatch.saveBucketData({
592
+ source_key: beforeId,
593
+ table: sourceTable,
594
+ before_buckets: existingBuckets,
595
+ evaluated: []
596
+ });
597
+ // Clear this, so we don't also try to REMOVE for the new id
598
+ existingBuckets = [];
599
+ }
600
+ if (sourceTable.syncParameters) {
601
+ // Delete old parameters
602
+ persistedBatch.saveParameterData({
603
+ source_key: beforeId,
604
+ table: sourceTable,
605
+ evaluated: [],
606
+ existing_lookups: existingLookups
607
+ });
608
+ existingLookups = [];
609
+ }
610
+ }
611
+ // If we re-apply a transaction, we can end up with a partial row.
612
+ //
613
+ // We may end up with toasted values, which means the record is not quite valid.
614
+ // However, it will be valid by the end of the transaction.
615
+ //
616
+ // In this case, we don't save the op, but we do save the current data.
617
+ if (afterId && after && utils.isCompleteRow(this.options.store_current_data, after)) {
618
+ // Insert or update
619
+ if (sourceTable.syncData) {
620
+ const { results: evaluated, errors: syncErrors } = this.sync_rules.evaluateRowWithErrors({
621
+ record: after,
622
+ sourceTable
623
+ });
624
+ for (const error of syncErrors) {
625
+ container.reporter.captureMessage(`Failed to evaluate data query on ${record.sourceTable.qualifiedName}.${record.after?.id}: ${error.error}`, {
626
+ level: errors.ErrorSeverity.WARNING,
627
+ metadata: {
628
+ replication_slot: this.options.slot_name,
629
+ table: record.sourceTable.qualifiedName
630
+ }
631
+ });
632
+ logger.error(`Failed to evaluate data query on ${record.sourceTable.qualifiedName}.${record.after?.id}: ${error.error}`);
633
+ }
634
+ // Save new one
635
+ persistedBatch.saveBucketData({
636
+ source_key: afterId,
637
+ evaluated,
638
+ table: sourceTable,
639
+ before_buckets: existingBuckets
640
+ });
641
+ newBuckets = evaluated.map((e) => {
642
+ return {
643
+ bucket: e.bucket,
644
+ table: e.table,
645
+ id: e.id
646
+ };
647
+ });
648
+ }
649
+ if (sourceTable.syncParameters) {
650
+ // Parameters
651
+ const { results: paramEvaluated, errors: paramErrors } = this.sync_rules.evaluateParameterRowWithErrors(sourceTable, after);
652
+ for (let error of paramErrors) {
653
+ container.reporter.captureMessage(`Failed to evaluate parameter query on ${record.sourceTable.qualifiedName}.${record.after?.id}: ${error.error}`, {
654
+ level: errors.ErrorSeverity.WARNING,
655
+ metadata: {
656
+ replication_slot: this.options.slot_name,
657
+ table: record.sourceTable.qualifiedName
658
+ }
659
+ });
660
+ logger.error(`Failed to evaluate parameter query on ${record.sourceTable.qualifiedName}.${after.id}: ${error.error}`);
661
+ }
662
+ persistedBatch.saveParameterData({
663
+ source_key: afterId,
664
+ table: sourceTable,
665
+ evaluated: paramEvaluated,
666
+ existing_lookups: existingLookups
667
+ });
668
+ newLookups = paramEvaluated.map((p) => {
669
+ return storage.serializeLookupBuffer(p.lookup);
670
+ });
671
+ }
672
+ }
673
+ let result = null;
674
+ // 5. TOAST: Update current data and bucket list.
675
+ if (afterId) {
676
+ // Insert or update
677
+ result = {
678
+ source_key: afterId,
679
+ group_id: this.group_id,
680
+ data: afterData,
681
+ source_table: sourceTable.id,
682
+ buckets: newBuckets,
683
+ lookups: newLookups
684
+ };
685
+ persistedBatch.upsertCurrentData(result);
686
+ }
687
+ if (afterId == null || !storage.replicaIdEquals(beforeId, afterId)) {
688
+ // Either a delete (afterId == null), or replaced the old replication id
689
+ persistedBatch.deleteCurrentData({
690
+ source_table_id: record.sourceTable.id,
691
+ source_key: beforeId
692
+ });
693
+ }
694
+ return result;
695
+ }
696
+ /**
697
+ * Gets relevant {@link SqlEventDescriptor}s for the given {@link SourceTable}
698
+ * TODO maybe share this with an abstract class
699
+ */
700
+ getTableEvents(table) {
701
+ return this.sync_rules.event_descriptors.filter((evt) => [...evt.getSourceTables()].some((sourceTable) => sourceTable.matches(table)));
702
+ }
703
+ async withReplicationTransaction(callback) {
704
+ try {
705
+ return await this.db.transaction(async (db) => {
706
+ return await callback(db);
707
+ });
708
+ }
709
+ finally {
710
+ await this.db.sql `
711
+ UPDATE sync_rules
712
+ SET
713
+ last_keepalive_ts = ${{ type: 1184, value: new Date().toISOString() }}
714
+ WHERE
715
+ id = ${{ type: 'int4', value: this.group_id }}
716
+ `.execute();
717
+ }
718
+ }
719
+ }
720
+ /**
721
+ * Uses Postgres' NOTIFY functionality to update different processes when the
722
+ * active checkpoint has been updated.
723
+ */
724
+ export const notifySyncRulesUpdate = async (db, update) => {
725
+ if (update.state != storage.SyncRuleState.ACTIVE) {
726
+ return;
727
+ }
728
+ await db.query({
729
+ statement: `NOTIFY ${NOTIFICATION_CHANNEL}, '${models.ActiveCheckpointNotification.encode({ active_checkpoint: update })}'`
730
+ });
731
+ };
732
+ //# sourceMappingURL=PostgresBucketBatch.js.map