@powersync/service-module-mongodb 0.0.0-dev-20241128134723 → 0.0.0-dev-20241219110735

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (137) hide show
  1. package/CHANGELOG.md +69 -4
  2. package/dist/db/db-index.d.ts +1 -0
  3. package/dist/db/db-index.js +2 -0
  4. package/dist/db/db-index.js.map +1 -0
  5. package/dist/db/mongo.d.ts +35 -0
  6. package/dist/db/mongo.js +73 -0
  7. package/dist/db/mongo.js.map +1 -0
  8. package/dist/index.d.ts +2 -0
  9. package/dist/index.js +2 -0
  10. package/dist/index.js.map +1 -1
  11. package/dist/locks/MonogLocks.d.ts +36 -0
  12. package/dist/locks/MonogLocks.js +83 -0
  13. package/dist/locks/MonogLocks.js.map +1 -0
  14. package/dist/migrations/MonogMigrationAgent.d.ts +12 -0
  15. package/dist/migrations/MonogMigrationAgent.js +25 -0
  16. package/dist/migrations/MonogMigrationAgent.js.map +1 -0
  17. package/dist/migrations/db/migrations/1684951997326-init.d.ts +3 -0
  18. package/dist/migrations/db/migrations/1684951997326-init.js +30 -0
  19. package/dist/migrations/db/migrations/1684951997326-init.js.map +1 -0
  20. package/dist/migrations/db/migrations/1688556755264-initial-sync-rules.d.ts +2 -0
  21. package/dist/migrations/db/migrations/1688556755264-initial-sync-rules.js +5 -0
  22. package/dist/migrations/db/migrations/1688556755264-initial-sync-rules.js.map +1 -0
  23. package/dist/migrations/db/migrations/1702295701188-sync-rule-state.d.ts +3 -0
  24. package/dist/migrations/db/migrations/1702295701188-sync-rule-state.js +54 -0
  25. package/dist/migrations/db/migrations/1702295701188-sync-rule-state.js.map +1 -0
  26. package/dist/migrations/db/migrations/1711543888062-write-checkpoint-index.d.ts +3 -0
  27. package/dist/migrations/db/migrations/1711543888062-write-checkpoint-index.js +26 -0
  28. package/dist/migrations/db/migrations/1711543888062-write-checkpoint-index.js.map +1 -0
  29. package/dist/migrations/db/migrations/1727099539247-custom-write-checkpoint-index.d.ts +3 -0
  30. package/dist/migrations/db/migrations/1727099539247-custom-write-checkpoint-index.js +28 -0
  31. package/dist/migrations/db/migrations/1727099539247-custom-write-checkpoint-index.js.map +1 -0
  32. package/dist/migrations/mongo-migration-store.d.ts +7 -0
  33. package/dist/migrations/mongo-migration-store.js +49 -0
  34. package/dist/migrations/mongo-migration-store.js.map +1 -0
  35. package/dist/module/MongoModule.js +15 -4
  36. package/dist/module/MongoModule.js.map +1 -1
  37. package/dist/replication/MongoManager.d.ts +1 -1
  38. package/dist/replication/MongoManager.js +3 -2
  39. package/dist/replication/MongoManager.js.map +1 -1
  40. package/dist/storage/MongoBucketStorage.d.ts +48 -0
  41. package/dist/storage/MongoBucketStorage.js +425 -0
  42. package/dist/storage/MongoBucketStorage.js.map +1 -0
  43. package/dist/storage/implementation/MongoBucketBatch.d.ts +72 -0
  44. package/dist/storage/implementation/MongoBucketBatch.js +681 -0
  45. package/dist/storage/implementation/MongoBucketBatch.js.map +1 -0
  46. package/dist/storage/implementation/MongoCompactor.d.ts +40 -0
  47. package/dist/storage/implementation/MongoCompactor.js +310 -0
  48. package/dist/storage/implementation/MongoCompactor.js.map +1 -0
  49. package/dist/storage/implementation/MongoIdSequence.d.ts +12 -0
  50. package/dist/storage/implementation/MongoIdSequence.js +21 -0
  51. package/dist/storage/implementation/MongoIdSequence.js.map +1 -0
  52. package/dist/storage/implementation/MongoPersistedSyncRules.d.ts +9 -0
  53. package/dist/storage/implementation/MongoPersistedSyncRules.js +9 -0
  54. package/dist/storage/implementation/MongoPersistedSyncRules.js.map +1 -0
  55. package/dist/storage/implementation/MongoPersistedSyncRulesContent.d.ts +20 -0
  56. package/dist/storage/implementation/MongoPersistedSyncRulesContent.js +26 -0
  57. package/dist/storage/implementation/MongoPersistedSyncRulesContent.js.map +1 -0
  58. package/dist/storage/implementation/MongoStorageProvider.d.ts +6 -0
  59. package/dist/storage/implementation/MongoStorageProvider.js +34 -0
  60. package/dist/storage/implementation/MongoStorageProvider.js.map +1 -0
  61. package/dist/storage/implementation/MongoSyncBucketStorage.d.ts +36 -0
  62. package/dist/storage/implementation/MongoSyncBucketStorage.js +529 -0
  63. package/dist/storage/implementation/MongoSyncBucketStorage.js.map +1 -0
  64. package/dist/storage/implementation/MongoSyncRulesLock.d.ts +16 -0
  65. package/dist/storage/implementation/MongoSyncRulesLock.js +65 -0
  66. package/dist/storage/implementation/MongoSyncRulesLock.js.map +1 -0
  67. package/dist/storage/implementation/MongoWriteCheckpointAPI.d.ts +20 -0
  68. package/dist/storage/implementation/MongoWriteCheckpointAPI.js +104 -0
  69. package/dist/storage/implementation/MongoWriteCheckpointAPI.js.map +1 -0
  70. package/dist/storage/implementation/OperationBatch.d.ts +34 -0
  71. package/dist/storage/implementation/OperationBatch.js +119 -0
  72. package/dist/storage/implementation/OperationBatch.js.map +1 -0
  73. package/dist/storage/implementation/PersistedBatch.d.ts +46 -0
  74. package/dist/storage/implementation/PersistedBatch.js +223 -0
  75. package/dist/storage/implementation/PersistedBatch.js.map +1 -0
  76. package/dist/storage/implementation/config.d.ts +19 -0
  77. package/dist/storage/implementation/config.js +26 -0
  78. package/dist/storage/implementation/config.js.map +1 -0
  79. package/dist/storage/implementation/db.d.ts +36 -0
  80. package/dist/storage/implementation/db.js +47 -0
  81. package/dist/storage/implementation/db.js.map +1 -0
  82. package/dist/storage/implementation/models.d.ts +139 -0
  83. package/dist/storage/implementation/models.js +2 -0
  84. package/dist/storage/implementation/models.js.map +1 -0
  85. package/dist/storage/implementation/util.d.ts +58 -0
  86. package/dist/storage/implementation/util.js +196 -0
  87. package/dist/storage/implementation/util.js.map +1 -0
  88. package/dist/storage/storage-index.d.ts +14 -0
  89. package/dist/storage/storage-index.js +15 -0
  90. package/dist/storage/storage-index.js.map +1 -0
  91. package/dist/types/types.d.ts +3 -0
  92. package/dist/types/types.js +4 -1
  93. package/dist/types/types.js.map +1 -1
  94. package/package.json +11 -8
  95. package/src/db/db-index.ts +1 -0
  96. package/src/db/mongo.ts +81 -0
  97. package/src/index.ts +4 -0
  98. package/src/locks/MonogLocks.ts +147 -0
  99. package/src/migrations/MonogMigrationAgent.ts +39 -0
  100. package/src/migrations/db/migrations/1684951997326-init.ts +39 -0
  101. package/src/migrations/db/migrations/1688556755264-initial-sync-rules.ts +5 -0
  102. package/src/migrations/db/migrations/1702295701188-sync-rule-state.ts +105 -0
  103. package/src/migrations/db/migrations/1711543888062-write-checkpoint-index.ts +38 -0
  104. package/src/migrations/db/migrations/1727099539247-custom-write-checkpoint-index.ts +40 -0
  105. package/src/migrations/mongo-migration-store.ts +62 -0
  106. package/src/module/MongoModule.ts +18 -4
  107. package/src/replication/MongoManager.ts +6 -2
  108. package/src/storage/MongoBucketStorage.ts +530 -0
  109. package/src/storage/implementation/MongoBucketBatch.ts +893 -0
  110. package/src/storage/implementation/MongoCompactor.ts +392 -0
  111. package/src/storage/implementation/MongoIdSequence.ts +24 -0
  112. package/src/storage/implementation/MongoPersistedSyncRules.ts +16 -0
  113. package/src/storage/implementation/MongoPersistedSyncRulesContent.ts +49 -0
  114. package/src/storage/implementation/MongoStorageProvider.ts +42 -0
  115. package/src/storage/implementation/MongoSyncBucketStorage.ts +612 -0
  116. package/src/storage/implementation/MongoSyncRulesLock.ts +88 -0
  117. package/src/storage/implementation/MongoWriteCheckpointAPI.ts +146 -0
  118. package/src/storage/implementation/OperationBatch.ts +130 -0
  119. package/src/storage/implementation/PersistedBatch.ts +283 -0
  120. package/src/storage/implementation/config.ts +40 -0
  121. package/src/storage/implementation/db.ts +88 -0
  122. package/src/storage/implementation/models.ts +160 -0
  123. package/src/storage/implementation/util.ts +209 -0
  124. package/src/storage/storage-index.ts +14 -0
  125. package/src/types/types.ts +8 -1
  126. package/test/src/__snapshots__/storage_sync.test.ts.snap +332 -0
  127. package/test/src/change_stream.test.ts +34 -33
  128. package/test/src/change_stream_utils.ts +6 -6
  129. package/test/src/env.ts +1 -0
  130. package/test/src/slow_tests.test.ts +4 -4
  131. package/test/src/storage.test.ts +7 -0
  132. package/test/src/storage_compacting.test.ts +6 -0
  133. package/test/src/storage_sync.test.ts +113 -0
  134. package/test/src/util.ts +20 -7
  135. package/test/tsconfig.json +4 -0
  136. package/tsconfig.tsbuildinfo +1 -1
  137. package/vitest.config.ts +1 -1
@@ -0,0 +1,893 @@
1
+ import { SqlEventDescriptor, SqliteRow, SqlSyncRules } from '@powersync/service-sync-rules';
2
+ import * as bson from 'bson';
3
+ import * as mongo from 'mongodb';
4
+
5
+ import { container, DisposableObserver, errors, logger } from '@powersync/lib-services-framework';
6
+ import { SaveOperationTag, storage, utils } from '@powersync/service-core';
7
+ import * as timers from 'node:timers/promises';
8
+ import { PowerSyncMongo } from './db.js';
9
+ import { CurrentBucket, CurrentDataDocument, SourceKey, SyncRuleDocument } from './models.js';
10
+ import { MongoIdSequence } from './MongoIdSequence.js';
11
+ import { batchCreateCustomWriteCheckpoints } from './MongoWriteCheckpointAPI.js';
12
+ import { cacheKey, OperationBatch, RecordOperation } from './OperationBatch.js';
13
+ import { PersistedBatch } from './PersistedBatch.js';
14
+ import { BSON_DESERIALIZE_OPTIONS, idPrefixFilter, replicaIdEquals, serializeLookup } from './util.js';
15
+
16
+ /**
17
+ * 15MB
18
+ */
19
+ const MAX_ROW_SIZE = 15 * 1024 * 1024;
20
+
21
+ // Currently, we can only have a single flush() at a time, since it locks the op_id sequence.
22
+ // While the MongoDB transaction retry mechanism handles this okay, using an in-process Mutex
23
+ // makes it more fair and has less overhead.
24
+ //
25
+ // In the future, we can investigate allowing multiple replication streams operating independently.
26
+ const replicationMutex = new utils.Mutex();
27
+
28
+ export interface MongoBucketBatchOptions {
29
+ db: PowerSyncMongo;
30
+ syncRules: SqlSyncRules;
31
+ groupId: number;
32
+ slotName: string;
33
+ lastCheckpointLsn: string | null;
34
+ keepaliveOp: string | null;
35
+ noCheckpointBeforeLsn: string;
36
+ storeCurrentData: boolean;
37
+ /**
38
+ * Set to true for initial replication.
39
+ */
40
+ skipExistingRows: boolean;
41
+ }
42
+
43
+ export class MongoBucketBatch
44
+ extends DisposableObserver<storage.BucketBatchStorageListener>
45
+ implements storage.BucketStorageBatch
46
+ {
47
+ private readonly client: mongo.MongoClient;
48
+ public readonly db: PowerSyncMongo;
49
+ public readonly session: mongo.ClientSession;
50
+ private readonly sync_rules: SqlSyncRules;
51
+
52
+ private readonly group_id: number;
53
+
54
+ private readonly slot_name: string;
55
+ private readonly storeCurrentData: boolean;
56
+ private readonly skipExistingRows: boolean;
57
+
58
+ private batch: OperationBatch | null = null;
59
+ private write_checkpoint_batch: storage.CustomWriteCheckpointOptions[] = [];
60
+
61
+ /**
62
+ * Last LSN received associated with a checkpoint.
63
+ *
64
+ * This could be either:
65
+ * 1. A commit LSN.
66
+ * 2. A keepalive message LSN.
67
+ */
68
+ private last_checkpoint_lsn: string | null = null;
69
+
70
+ private no_checkpoint_before_lsn: string;
71
+
72
+ private persisted_op: bigint | null = null;
73
+
74
+ /**
75
+ * For tests only - not for persistence logic.
76
+ */
77
+ public last_flushed_op: bigint | null = null;
78
+
79
+ constructor(options: MongoBucketBatchOptions) {
80
+ super();
81
+ this.client = options.db.client;
82
+ this.db = options.db;
83
+ this.group_id = options.groupId;
84
+ this.last_checkpoint_lsn = options.lastCheckpointLsn;
85
+ this.no_checkpoint_before_lsn = options.noCheckpointBeforeLsn;
86
+ this.session = this.client.startSession();
87
+ this.slot_name = options.slotName;
88
+ this.sync_rules = options.syncRules;
89
+ this.storeCurrentData = options.storeCurrentData;
90
+ this.skipExistingRows = options.skipExistingRows;
91
+ this.batch = new OperationBatch();
92
+
93
+ if (options.keepaliveOp) {
94
+ this.persisted_op = BigInt(options.keepaliveOp);
95
+ }
96
+ }
97
+
98
+ addCustomWriteCheckpoint(checkpoint: storage.BatchedCustomWriteCheckpointOptions): void {
99
+ this.write_checkpoint_batch.push({
100
+ ...checkpoint,
101
+ sync_rules_id: this.group_id
102
+ });
103
+ }
104
+
105
+ get lastCheckpointLsn() {
106
+ return this.last_checkpoint_lsn;
107
+ }
108
+
109
+ async flush(): Promise<storage.FlushedResult | null> {
110
+ let result: storage.FlushedResult | null = null;
111
+ // One flush may be split over multiple transactions.
112
+ // Each flushInner() is one transaction.
113
+ while (this.batch != null) {
114
+ let r = await this.flushInner();
115
+ if (r) {
116
+ result = r;
117
+ }
118
+ }
119
+ await batchCreateCustomWriteCheckpoints(this.db, this.write_checkpoint_batch);
120
+ this.write_checkpoint_batch = [];
121
+ return result;
122
+ }
123
+
124
+ private async flushInner(): Promise<storage.FlushedResult | null> {
125
+ const batch = this.batch;
126
+ if (batch == null) {
127
+ return null;
128
+ }
129
+
130
+ let last_op: bigint | null = null;
131
+ let resumeBatch: OperationBatch | null = null;
132
+
133
+ await this.withReplicationTransaction(`Flushing ${batch.length} ops`, async (session, opSeq) => {
134
+ resumeBatch = await this.replicateBatch(session, batch, opSeq);
135
+
136
+ last_op = opSeq.last();
137
+ });
138
+
139
+ // null if done, set if we need another flush
140
+ this.batch = resumeBatch;
141
+
142
+ if (last_op == null) {
143
+ throw new Error('Unexpected last_op == null');
144
+ }
145
+
146
+ this.persisted_op = last_op;
147
+ this.last_flushed_op = last_op;
148
+ return { flushed_op: String(last_op) };
149
+ }
150
+
151
+ private async replicateBatch(
152
+ session: mongo.ClientSession,
153
+ batch: OperationBatch,
154
+ op_seq: MongoIdSequence
155
+ ): Promise<OperationBatch | null> {
156
+ let sizes: Map<string, number> | undefined = undefined;
157
+ if (this.storeCurrentData && !this.skipExistingRows) {
158
+ // We skip this step if we don't store current_data, since the sizes will
159
+ // always be small in that case.
160
+
161
+ // With skipExistingRows, we don't load the full documents into memory,
162
+ // so we can also skip the size lookup step.
163
+
164
+ // Find sizes of current_data documents, to assist in intelligent batching without
165
+ // exceeding memory limits.
166
+ //
167
+ // A previous attempt tried to do batching by the results of the current_data query
168
+ // (automatically limited to 48MB(?) per batch by MongoDB). The issue is that it changes
169
+ // the order of processing, which then becomes really tricky to manage.
170
+ // This now takes 2+ queries, but doesn't have any issues with order of operations.
171
+ const sizeLookups: SourceKey[] = batch.batch.map((r) => {
172
+ return { g: this.group_id, t: r.record.sourceTable.id, k: r.beforeId };
173
+ });
174
+
175
+ sizes = new Map<string, number>();
176
+
177
+ const sizeCursor: mongo.AggregationCursor<{ _id: SourceKey; size: number }> = this.db.current_data.aggregate(
178
+ [
179
+ {
180
+ $match: {
181
+ _id: { $in: sizeLookups }
182
+ }
183
+ },
184
+ {
185
+ $project: {
186
+ _id: 1,
187
+ size: { $bsonSize: '$$ROOT' }
188
+ }
189
+ }
190
+ ],
191
+ { session }
192
+ );
193
+ for await (let doc of sizeCursor.stream()) {
194
+ const key = cacheKey(doc._id.t, doc._id.k);
195
+ sizes.set(key, doc.size);
196
+ }
197
+ }
198
+
199
+ // If set, we need to start a new transaction with this batch.
200
+ let resumeBatch: OperationBatch | null = null;
201
+ let transactionSize = 0;
202
+
203
+ // Now batch according to the sizes
204
+ // This is a single batch if storeCurrentData == false
205
+ for await (let b of batch.batched(sizes)) {
206
+ if (resumeBatch) {
207
+ for (let op of b) {
208
+ resumeBatch.push(op);
209
+ }
210
+ continue;
211
+ }
212
+ const lookups: SourceKey[] = b.map((r) => {
213
+ return { g: this.group_id, t: r.record.sourceTable.id, k: r.beforeId };
214
+ });
215
+ let current_data_lookup = new Map<string, CurrentDataDocument>();
216
+ // With skipExistingRows, we only need to know whether or not the row exists.
217
+ const projection = this.skipExistingRows ? { _id: 1 } : undefined;
218
+ const cursor = this.db.current_data.find(
219
+ {
220
+ _id: { $in: lookups }
221
+ },
222
+ { session, projection }
223
+ );
224
+ for await (let doc of cursor.stream()) {
225
+ current_data_lookup.set(cacheKey(doc._id.t, doc._id.k), doc);
226
+ }
227
+
228
+ let persistedBatch: PersistedBatch | null = new PersistedBatch(this.group_id, transactionSize);
229
+
230
+ for (let op of b) {
231
+ if (resumeBatch) {
232
+ resumeBatch.push(op);
233
+ continue;
234
+ }
235
+ const currentData = current_data_lookup.get(op.internalBeforeKey) ?? null;
236
+ if (currentData != null) {
237
+ // If it will be used again later, it will be set again using nextData below
238
+ current_data_lookup.delete(op.internalBeforeKey);
239
+ }
240
+ const nextData = this.saveOperation(persistedBatch!, op, currentData, op_seq);
241
+ if (nextData != null) {
242
+ // Update our current_data and size cache
243
+ current_data_lookup.set(op.internalAfterKey!, nextData);
244
+ sizes?.set(op.internalAfterKey!, nextData.data.length());
245
+ }
246
+
247
+ if (persistedBatch!.shouldFlushTransaction()) {
248
+ // Transaction is getting big.
249
+ // Flush, and resume in a new transaction.
250
+ await persistedBatch!.flush(this.db, this.session);
251
+ persistedBatch = null;
252
+ // Computing our current progress is a little tricky here, since
253
+ // we're stopping in the middle of a batch.
254
+ // We create a new batch, and push any remaining operations to it.
255
+ resumeBatch = new OperationBatch();
256
+ }
257
+ }
258
+
259
+ if (persistedBatch) {
260
+ transactionSize = persistedBatch.currentSize;
261
+ await persistedBatch.flush(this.db, this.session);
262
+ }
263
+ }
264
+
265
+ return resumeBatch;
266
+ }
267
+
268
+ private saveOperation(
269
+ batch: PersistedBatch,
270
+ operation: RecordOperation,
271
+ current_data: CurrentDataDocument | null,
272
+ opSeq: MongoIdSequence
273
+ ) {
274
+ const record = operation.record;
275
+ const beforeId = operation.beforeId;
276
+ const afterId = operation.afterId;
277
+ let after = record.after;
278
+ const sourceTable = record.sourceTable;
279
+
280
+ let existing_buckets: CurrentBucket[] = [];
281
+ let new_buckets: CurrentBucket[] = [];
282
+ let existing_lookups: bson.Binary[] = [];
283
+ let new_lookups: bson.Binary[] = [];
284
+
285
+ const before_key: SourceKey = { g: this.group_id, t: record.sourceTable.id, k: beforeId };
286
+
287
+ if (this.skipExistingRows) {
288
+ if (record.tag == SaveOperationTag.INSERT) {
289
+ if (current_data != null) {
290
+ // Initial replication, and we already have the record.
291
+ // This may be a different version of the record, but streaming replication
292
+ // will take care of that.
293
+ // Skip the insert here.
294
+ return null;
295
+ }
296
+ } else {
297
+ throw new Error(`${record.tag} not supported with skipExistingRows: true`);
298
+ }
299
+ }
300
+
301
+ if (record.tag == SaveOperationTag.UPDATE) {
302
+ const result = current_data;
303
+ if (result == null) {
304
+ // Not an error if we re-apply a transaction
305
+ existing_buckets = [];
306
+ existing_lookups = [];
307
+ // Log to help with debugging if there was a consistency issue
308
+ if (this.storeCurrentData) {
309
+ logger.warn(
310
+ `Cannot find previous record for update on ${record.sourceTable.qualifiedName}: ${beforeId} / ${record.before?.id}`
311
+ );
312
+ }
313
+ } else {
314
+ existing_buckets = result.buckets;
315
+ existing_lookups = result.lookups;
316
+ if (this.storeCurrentData) {
317
+ const data = bson.deserialize((result.data as mongo.Binary).buffer, BSON_DESERIALIZE_OPTIONS) as SqliteRow;
318
+ after = storage.mergeToast(after!, data);
319
+ }
320
+ }
321
+ } else if (record.tag == SaveOperationTag.DELETE) {
322
+ const result = current_data;
323
+ if (result == null) {
324
+ // Not an error if we re-apply a transaction
325
+ existing_buckets = [];
326
+ existing_lookups = [];
327
+ // Log to help with debugging if there was a consistency issue
328
+ if (this.storeCurrentData) {
329
+ logger.warn(
330
+ `Cannot find previous record for delete on ${record.sourceTable.qualifiedName}: ${beforeId} / ${record.before?.id}`
331
+ );
332
+ }
333
+ } else {
334
+ existing_buckets = result.buckets;
335
+ existing_lookups = result.lookups;
336
+ }
337
+ }
338
+
339
+ let afterData: bson.Binary | undefined;
340
+ if (afterId != null && !this.storeCurrentData) {
341
+ afterData = new bson.Binary(bson.serialize({}));
342
+ } else if (afterId != null) {
343
+ try {
344
+ // This will fail immediately if the record is > 16MB.
345
+ afterData = new bson.Binary(bson.serialize(after!));
346
+ // We additionally make sure it's <= 15MB - we need some margin for metadata.
347
+ if (afterData.length() > MAX_ROW_SIZE) {
348
+ throw new Error(`Row too large: ${afterData.length()}`);
349
+ }
350
+ } catch (e) {
351
+ // Replace with empty values, equivalent to TOAST values
352
+ after = Object.fromEntries(
353
+ Object.entries(after!).map(([key, value]) => {
354
+ return [key, undefined];
355
+ })
356
+ );
357
+ afterData = new bson.Binary(bson.serialize(after!));
358
+
359
+ container.reporter.captureMessage(
360
+ `Data too big on ${record.sourceTable.qualifiedName}.${record.after?.id}: ${e.message}`,
361
+ {
362
+ level: errors.ErrorSeverity.WARNING,
363
+ metadata: {
364
+ replication_slot: this.slot_name,
365
+ table: record.sourceTable.qualifiedName
366
+ }
367
+ }
368
+ );
369
+ }
370
+ }
371
+
372
+ // 2. Save bucket data
373
+ if (beforeId != null && (afterId == null || !replicaIdEquals(beforeId, afterId))) {
374
+ // Source ID updated
375
+ if (sourceTable.syncData) {
376
+ // Delete old record
377
+ batch.saveBucketData({
378
+ op_seq: opSeq,
379
+ sourceKey: beforeId,
380
+ table: sourceTable,
381
+ before_buckets: existing_buckets,
382
+ evaluated: []
383
+ });
384
+ // Clear this, so we don't also try to REMOVE for the new id
385
+ existing_buckets = [];
386
+ }
387
+
388
+ if (sourceTable.syncParameters) {
389
+ // Delete old parameters
390
+ batch.saveParameterData({
391
+ op_seq: opSeq,
392
+ sourceKey: beforeId,
393
+ sourceTable,
394
+ evaluated: [],
395
+ existing_lookups
396
+ });
397
+ existing_lookups = [];
398
+ }
399
+ }
400
+
401
+ // If we re-apply a transaction, we can end up with a partial row.
402
+ //
403
+ // We may end up with toasted values, which means the record is not quite valid.
404
+ // However, it will be valid by the end of the transaction.
405
+ //
406
+ // In this case, we don't save the op, but we do save the current data.
407
+ if (afterId && after && utils.isCompleteRow(after)) {
408
+ // Insert or update
409
+ if (sourceTable.syncData) {
410
+ const { results: evaluated, errors: syncErrors } = this.sync_rules.evaluateRowWithErrors({
411
+ record: after,
412
+ sourceTable
413
+ });
414
+
415
+ for (let error of syncErrors) {
416
+ container.reporter.captureMessage(
417
+ `Failed to evaluate data query on ${record.sourceTable.qualifiedName}.${record.after?.id}: ${error.error}`,
418
+ {
419
+ level: errors.ErrorSeverity.WARNING,
420
+ metadata: {
421
+ replication_slot: this.slot_name,
422
+ table: record.sourceTable.qualifiedName
423
+ }
424
+ }
425
+ );
426
+ logger.error(
427
+ `Failed to evaluate data query on ${record.sourceTable.qualifiedName}.${record.after?.id}: ${error.error}`
428
+ );
429
+ }
430
+
431
+ // Save new one
432
+ batch.saveBucketData({
433
+ op_seq: opSeq,
434
+ sourceKey: afterId,
435
+ evaluated,
436
+ table: sourceTable,
437
+ before_buckets: existing_buckets
438
+ });
439
+ new_buckets = evaluated.map((e) => {
440
+ return {
441
+ bucket: e.bucket,
442
+ table: e.table,
443
+ id: e.id
444
+ };
445
+ });
446
+ }
447
+
448
+ if (sourceTable.syncParameters) {
449
+ // Parameters
450
+ const { results: paramEvaluated, errors: paramErrors } = this.sync_rules.evaluateParameterRowWithErrors(
451
+ sourceTable,
452
+ after
453
+ );
454
+
455
+ for (let error of paramErrors) {
456
+ container.reporter.captureMessage(
457
+ `Failed to evaluate parameter query on ${record.sourceTable.qualifiedName}.${record.after?.id}: ${error.error}`,
458
+ {
459
+ level: errors.ErrorSeverity.WARNING,
460
+ metadata: {
461
+ replication_slot: this.slot_name,
462
+ table: record.sourceTable.qualifiedName
463
+ }
464
+ }
465
+ );
466
+ logger.error(
467
+ `Failed to evaluate parameter query on ${record.sourceTable.qualifiedName}.${after.id}: ${error.error}`
468
+ );
469
+ }
470
+
471
+ batch.saveParameterData({
472
+ op_seq: opSeq,
473
+ sourceKey: afterId,
474
+ sourceTable,
475
+ evaluated: paramEvaluated,
476
+ existing_lookups
477
+ });
478
+ new_lookups = paramEvaluated.map((p) => {
479
+ return serializeLookup(p.lookup);
480
+ });
481
+ }
482
+ }
483
+
484
+ let result: CurrentDataDocument | null = null;
485
+
486
+ // 5. TOAST: Update current data and bucket list.
487
+ if (afterId) {
488
+ // Insert or update
489
+ const after_key: SourceKey = { g: this.group_id, t: sourceTable.id, k: afterId };
490
+ batch.upsertCurrentData(after_key, {
491
+ data: afterData,
492
+ buckets: new_buckets,
493
+ lookups: new_lookups
494
+ });
495
+ result = {
496
+ _id: after_key,
497
+ data: afterData!,
498
+ buckets: new_buckets,
499
+ lookups: new_lookups
500
+ };
501
+ }
502
+
503
+ if (afterId == null || !replicaIdEquals(beforeId, afterId)) {
504
+ // Either a delete (afterId == null), or replaced the old replication id
505
+ batch.deleteCurrentData(before_key);
506
+ }
507
+ return result;
508
+ }
509
+
510
+ private async withTransaction(cb: () => Promise<void>) {
511
+ await replicationMutex.exclusiveLock(async () => {
512
+ await this.session.withTransaction(
513
+ async () => {
514
+ try {
515
+ await cb();
516
+ } catch (e: unknown) {
517
+ if (e instanceof mongo.MongoError && e.hasErrorLabel('TransientTransactionError')) {
518
+ // Likely write conflict caused by concurrent write stream replicating
519
+ } else {
520
+ logger.warn('Transaction error', e as Error);
521
+ }
522
+ await timers.setTimeout(Math.random() * 50);
523
+ throw e;
524
+ }
525
+ },
526
+ { maxCommitTimeMS: 10000 }
527
+ );
528
+ });
529
+ }
530
+
531
+ private async withReplicationTransaction(
532
+ description: string,
533
+ callback: (session: mongo.ClientSession, opSeq: MongoIdSequence) => Promise<void>
534
+ ): Promise<void> {
535
+ let flushTry = 0;
536
+
537
+ const start = Date.now();
538
+ const lastTry = start + 90000;
539
+
540
+ const session = this.session;
541
+
542
+ await this.withTransaction(async () => {
543
+ flushTry += 1;
544
+ if (flushTry % 10 == 0) {
545
+ logger.info(`${this.slot_name} ${description} - try ${flushTry}`);
546
+ }
547
+ if (flushTry > 20 && Date.now() > lastTry) {
548
+ throw new Error('Max transaction tries exceeded');
549
+ }
550
+
551
+ const next_op_id_doc = await this.db.op_id_sequence.findOneAndUpdate(
552
+ {
553
+ _id: 'main'
554
+ },
555
+ {
556
+ $setOnInsert: { op_id: 0n },
557
+ $set: {
558
+ // Force update to ensure we get a mongo lock
559
+ ts: Date.now()
560
+ }
561
+ },
562
+ {
563
+ upsert: true,
564
+ returnDocument: 'after',
565
+ session
566
+ }
567
+ );
568
+ const opSeq = new MongoIdSequence(next_op_id_doc?.op_id ?? 0n);
569
+
570
+ await callback(session, opSeq);
571
+
572
+ await this.db.op_id_sequence.updateOne(
573
+ {
574
+ _id: 'main'
575
+ },
576
+ {
577
+ $set: {
578
+ op_id: opSeq.last()
579
+ }
580
+ },
581
+ {
582
+ session
583
+ }
584
+ );
585
+
586
+ await this.db.sync_rules.updateOne(
587
+ {
588
+ _id: this.group_id
589
+ },
590
+ {
591
+ $set: {
592
+ last_keepalive_ts: new Date()
593
+ }
594
+ },
595
+ { session }
596
+ );
597
+ });
598
+ }
599
+
600
+ async [Symbol.asyncDispose]() {
601
+ await this.session.endSession();
602
+ super[Symbol.dispose]();
603
+ }
604
+
605
+ private lastWaitingLogThottled = 0;
606
+
607
+ async commit(lsn: string): Promise<boolean> {
608
+ await this.flush();
609
+
610
+ if (this.last_checkpoint_lsn != null && lsn < this.last_checkpoint_lsn) {
611
+ // When re-applying transactions, don't create a new checkpoint until
612
+ // we are past the last transaction.
613
+ logger.info(`Re-applied transaction ${lsn} - skipping checkpoint`);
614
+ return false;
615
+ }
616
+ if (lsn < this.no_checkpoint_before_lsn) {
617
+ if (Date.now() - this.lastWaitingLogThottled > 5_000) {
618
+ logger.info(
619
+ `Waiting until ${this.no_checkpoint_before_lsn} before creating checkpoint, currently at ${lsn}. Persisted op: ${this.persisted_op}`
620
+ );
621
+ this.lastWaitingLogThottled = Date.now();
622
+ }
623
+
624
+ // Edge case: During initial replication, we have a no_checkpoint_before_lsn set,
625
+ // and don't actually commit the snapshot.
626
+ // The first commit can happen from an implicit keepalive message.
627
+ // That needs the persisted_op to get an accurate checkpoint, so
628
+ // we persist that in keepalive_op.
629
+
630
+ await this.db.sync_rules.updateOne(
631
+ {
632
+ _id: this.group_id
633
+ },
634
+ {
635
+ $set: {
636
+ keepalive_op: this.persisted_op == null ? null : String(this.persisted_op)
637
+ }
638
+ },
639
+ { session: this.session }
640
+ );
641
+
642
+ return false;
643
+ }
644
+
645
+ const now = new Date();
646
+ const update: Partial<SyncRuleDocument> = {
647
+ last_checkpoint_lsn: lsn,
648
+ last_checkpoint_ts: now,
649
+ last_keepalive_ts: now,
650
+ snapshot_done: true,
651
+ last_fatal_error: null,
652
+ keepalive_op: null
653
+ };
654
+
655
+ if (this.persisted_op != null) {
656
+ update.last_checkpoint = this.persisted_op;
657
+ }
658
+
659
+ await this.db.sync_rules.updateOne(
660
+ {
661
+ _id: this.group_id
662
+ },
663
+ {
664
+ $set: update
665
+ },
666
+ { session: this.session }
667
+ );
668
+ this.persisted_op = null;
669
+ this.last_checkpoint_lsn = lsn;
670
+ return true;
671
+ }
672
+
673
+ async keepalive(lsn: string): Promise<boolean> {
674
+ if (this.last_checkpoint_lsn != null && lsn <= this.last_checkpoint_lsn) {
675
+ // No-op
676
+ return false;
677
+ }
678
+
679
+ if (lsn < this.no_checkpoint_before_lsn) {
680
+ return false;
681
+ }
682
+
683
+ if (this.persisted_op != null) {
684
+ // The commit may have been skipped due to "no_checkpoint_before_lsn".
685
+ // Apply it now if relevant
686
+ logger.info(`Commit due to keepalive at ${lsn} / ${this.persisted_op}`);
687
+ return await this.commit(lsn);
688
+ }
689
+
690
+ await this.db.sync_rules.updateOne(
691
+ {
692
+ _id: this.group_id
693
+ },
694
+ {
695
+ $set: {
696
+ last_checkpoint_lsn: lsn,
697
+ snapshot_done: true,
698
+ last_fatal_error: null,
699
+ last_keepalive_ts: new Date()
700
+ }
701
+ },
702
+ { session: this.session }
703
+ );
704
+ this.last_checkpoint_lsn = lsn;
705
+
706
+ return true;
707
+ }
708
+
709
+ async save(record: storage.SaveOptions): Promise<storage.FlushedResult | null> {
710
+ const { after, before, sourceTable, tag } = record;
711
+ for (const event of this.getTableEvents(sourceTable)) {
712
+ this.iterateListeners((cb) =>
713
+ cb.replicationEvent?.({
714
+ batch: this,
715
+ table: sourceTable,
716
+ data: {
717
+ op: tag,
718
+ after: after && utils.isCompleteRow(after) ? after : undefined,
719
+ before: before && utils.isCompleteRow(before) ? before : undefined
720
+ },
721
+ event
722
+ })
723
+ );
724
+ }
725
+
726
+ /**
727
+ * Return if the table is just an event table
728
+ */
729
+ if (!sourceTable.syncData && !sourceTable.syncParameters) {
730
+ return null;
731
+ }
732
+
733
+ logger.debug(`Saving ${record.tag}:${record.before?.id}/${record.after?.id}`);
734
+
735
+ this.batch ??= new OperationBatch();
736
+ this.batch.push(new RecordOperation(record));
737
+
738
+ if (this.batch.shouldFlush()) {
739
+ const r = await this.flush();
740
+ // HACK: Give other streams a chance to also flush
741
+ await timers.setTimeout(5);
742
+ return r;
743
+ }
744
+ return null;
745
+ }
746
+
747
+ /**
748
+ * Drop is equivalent to TRUNCATE, plus removing our record of the table.
749
+ */
750
+ async drop(sourceTables: storage.SourceTable[]): Promise<storage.FlushedResult | null> {
751
+ await this.truncate(sourceTables);
752
+ const result = await this.flush();
753
+
754
+ await this.withTransaction(async () => {
755
+ for (let table of sourceTables) {
756
+ await this.db.source_tables.deleteOne({ _id: table.id });
757
+ }
758
+ });
759
+ return result;
760
+ }
761
+
762
+ async truncate(sourceTables: storage.SourceTable[]): Promise<storage.FlushedResult | null> {
763
+ await this.flush();
764
+
765
+ let last_op: bigint | null = null;
766
+ for (let table of sourceTables) {
767
+ last_op = await this.truncateSingle(table);
768
+ }
769
+
770
+ if (last_op) {
771
+ this.persisted_op = last_op;
772
+ }
773
+
774
+ return {
775
+ flushed_op: String(last_op!)
776
+ };
777
+ }
778
+
779
+ async truncateSingle(sourceTable: storage.SourceTable): Promise<bigint> {
780
+ let last_op: bigint | null = null;
781
+
782
+ // To avoid too large transactions, we limit the amount of data we delete per transaction.
783
+ // Since we don't use the record data here, we don't have explicit size limits per batch.
784
+ const BATCH_LIMIT = 2000;
785
+
786
+ let lastBatchCount = BATCH_LIMIT;
787
+ while (lastBatchCount == BATCH_LIMIT) {
788
+ await this.withReplicationTransaction(`Truncate ${sourceTable.qualifiedName}`, async (session, opSeq) => {
789
+ const current_data_filter: mongo.Filter<CurrentDataDocument> = {
790
+ _id: idPrefixFilter<SourceKey>({ g: this.group_id, t: sourceTable.id }, ['k'])
791
+ };
792
+
793
+ const cursor = this.db.current_data.find(current_data_filter, {
794
+ projection: {
795
+ _id: 1,
796
+ buckets: 1,
797
+ lookups: 1
798
+ },
799
+ limit: BATCH_LIMIT,
800
+ session: session
801
+ });
802
+ const batch = await cursor.toArray();
803
+ const persistedBatch = new PersistedBatch(this.group_id, 0);
804
+
805
+ for (let value of batch) {
806
+ persistedBatch.saveBucketData({
807
+ op_seq: opSeq,
808
+ before_buckets: value.buckets,
809
+ evaluated: [],
810
+ table: sourceTable,
811
+ sourceKey: value._id.k
812
+ });
813
+ persistedBatch.saveParameterData({
814
+ op_seq: opSeq,
815
+ existing_lookups: value.lookups,
816
+ evaluated: [],
817
+ sourceTable: sourceTable,
818
+ sourceKey: value._id.k
819
+ });
820
+
821
+ persistedBatch.deleteCurrentData(value._id);
822
+ }
823
+ await persistedBatch.flush(this.db, session);
824
+ lastBatchCount = batch.length;
825
+
826
+ last_op = opSeq.last();
827
+ });
828
+ }
829
+
830
+ return last_op!;
831
+ }
832
+
833
+ async markSnapshotDone(tables: storage.SourceTable[], no_checkpoint_before_lsn: string) {
834
+ const session = this.session;
835
+ const ids = tables.map((table) => table.id);
836
+
837
+ await this.withTransaction(async () => {
838
+ await this.db.source_tables.updateMany(
839
+ { _id: { $in: ids } },
840
+ {
841
+ $set: {
842
+ snapshot_done: true
843
+ }
844
+ },
845
+ { session }
846
+ );
847
+
848
+ if (no_checkpoint_before_lsn > this.no_checkpoint_before_lsn) {
849
+ this.no_checkpoint_before_lsn = no_checkpoint_before_lsn;
850
+
851
+ await this.db.sync_rules.updateOne(
852
+ {
853
+ _id: this.group_id
854
+ },
855
+ {
856
+ $set: {
857
+ no_checkpoint_before: no_checkpoint_before_lsn,
858
+ last_keepalive_ts: new Date()
859
+ }
860
+ },
861
+ { session: this.session }
862
+ );
863
+ }
864
+ });
865
+ return tables.map((table) => {
866
+ const copy = new storage.SourceTable(
867
+ table.id,
868
+ table.connectionTag,
869
+ table.objectId,
870
+ table.schema,
871
+ table.table,
872
+ table.replicaIdColumns,
873
+ table.snapshotComplete
874
+ );
875
+ copy.syncData = table.syncData;
876
+ copy.syncParameters = table.syncParameters;
877
+ return copy;
878
+ });
879
+ }
880
+
881
+ /**
882
+ * Gets relevant {@link SqlEventDescriptor}s for the given {@link SourceTable}
883
+ */
884
+ protected getTableEvents(table: storage.SourceTable): SqlEventDescriptor[] {
885
+ return this.sync_rules.event_descriptors.filter((evt) =>
886
+ [...evt.getSourceTables()].some((sourceTable) => sourceTable.matches(table))
887
+ );
888
+ }
889
+ }
890
+
891
+ export function currentBucketKey(b: CurrentBucket) {
892
+ return `${b.bucket}/${b.table}/${b.id}`;
893
+ }