@powersync/service-module-mongodb-storage 0.14.0 → 0.15.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. package/CHANGELOG.md +45 -0
  2. package/dist/storage/MongoBucketStorage.js +16 -3
  3. package/dist/storage/MongoBucketStorage.js.map +1 -1
  4. package/dist/storage/implementation/MongoBucketBatch.d.ts +13 -11
  5. package/dist/storage/implementation/MongoBucketBatch.js +208 -127
  6. package/dist/storage/implementation/MongoBucketBatch.js.map +1 -1
  7. package/dist/storage/implementation/MongoChecksums.d.ts +4 -4
  8. package/dist/storage/implementation/MongoChecksums.js +1 -0
  9. package/dist/storage/implementation/MongoChecksums.js.map +1 -1
  10. package/dist/storage/implementation/MongoCompactor.d.ts +8 -2
  11. package/dist/storage/implementation/MongoCompactor.js +50 -21
  12. package/dist/storage/implementation/MongoCompactor.js.map +1 -1
  13. package/dist/storage/implementation/MongoParameterCompactor.d.ts +2 -2
  14. package/dist/storage/implementation/MongoParameterCompactor.js +13 -1
  15. package/dist/storage/implementation/MongoParameterCompactor.js.map +1 -1
  16. package/dist/storage/implementation/MongoPersistedSyncRulesContent.js +2 -7
  17. package/dist/storage/implementation/MongoPersistedSyncRulesContent.js.map +1 -1
  18. package/dist/storage/implementation/MongoSyncBucketStorage.d.ts +9 -4
  19. package/dist/storage/implementation/MongoSyncBucketStorage.js +35 -33
  20. package/dist/storage/implementation/MongoSyncBucketStorage.js.map +1 -1
  21. package/dist/storage/implementation/MongoSyncRulesLock.d.ts +3 -3
  22. package/dist/storage/implementation/MongoSyncRulesLock.js.map +1 -1
  23. package/dist/storage/implementation/MongoWriteCheckpointAPI.d.ts +4 -4
  24. package/dist/storage/implementation/MongoWriteCheckpointAPI.js.map +1 -1
  25. package/dist/storage/implementation/OperationBatch.js +3 -2
  26. package/dist/storage/implementation/OperationBatch.js.map +1 -1
  27. package/dist/storage/implementation/PersistedBatch.d.ts +11 -4
  28. package/dist/storage/implementation/PersistedBatch.js +42 -11
  29. package/dist/storage/implementation/PersistedBatch.js.map +1 -1
  30. package/dist/storage/implementation/db.d.ts +35 -1
  31. package/dist/storage/implementation/db.js +99 -0
  32. package/dist/storage/implementation/db.js.map +1 -1
  33. package/dist/storage/implementation/models.d.ts +15 -3
  34. package/dist/storage/implementation/models.js +2 -1
  35. package/dist/storage/implementation/models.js.map +1 -1
  36. package/dist/utils/test-utils.d.ts +4 -1
  37. package/dist/utils/test-utils.js +15 -12
  38. package/dist/utils/test-utils.js.map +1 -1
  39. package/dist/utils/util.d.ts +2 -1
  40. package/dist/utils/util.js +15 -1
  41. package/dist/utils/util.js.map +1 -1
  42. package/package.json +6 -6
  43. package/src/storage/MongoBucketStorage.ts +29 -8
  44. package/src/storage/implementation/MongoBucketBatch.ts +263 -177
  45. package/src/storage/implementation/MongoChecksums.ts +5 -3
  46. package/src/storage/implementation/MongoCompactor.ts +53 -24
  47. package/src/storage/implementation/MongoParameterCompactor.ts +17 -4
  48. package/src/storage/implementation/MongoPersistedSyncRulesContent.ts +3 -11
  49. package/src/storage/implementation/MongoSyncBucketStorage.ts +33 -26
  50. package/src/storage/implementation/MongoSyncRulesLock.ts +3 -3
  51. package/src/storage/implementation/MongoWriteCheckpointAPI.ts +4 -4
  52. package/src/storage/implementation/OperationBatch.ts +3 -2
  53. package/src/storage/implementation/PersistedBatch.ts +42 -11
  54. package/src/storage/implementation/db.ts +129 -1
  55. package/src/storage/implementation/models.ts +18 -4
  56. package/src/utils/test-utils.ts +15 -12
  57. package/src/utils/util.ts +17 -2
  58. package/test/src/__snapshots__/storage.test.ts.snap +201 -0
  59. package/test/src/__snapshots__/storage_compacting.test.ts.snap +17 -0
  60. package/test/src/__snapshots__/storage_sync.test.ts.snap +1111 -16
  61. package/test/src/storage.test.ts +9 -7
  62. package/test/src/storage_compacting.test.ts +117 -45
  63. package/test/src/storage_sync.test.ts +53 -51
  64. package/test/src/util.ts +3 -3
  65. package/tsconfig.tsbuildinfo +1 -1
@@ -1,5 +1,5 @@
1
1
  import { mongo } from '@powersync/lib-service-mongodb';
2
- import { SqlEventDescriptor, SqliteRow, SqliteValue, HydratedSyncRules } from '@powersync/service-sync-rules';
2
+ import { HydratedSyncRules, SqlEventDescriptor, SqliteRow, SqliteValue } from '@powersync/service-sync-rules';
3
3
  import * as bson from 'bson';
4
4
 
5
5
  import {
@@ -14,16 +14,18 @@ import {
14
14
  } from '@powersync/lib-services-framework';
15
15
  import {
16
16
  BucketStorageMarkRecordUnavailable,
17
+ CheckpointResult,
17
18
  deserializeBson,
18
19
  InternalOpId,
19
20
  isCompleteRow,
20
21
  SaveOperationTag,
21
22
  storage,
23
+ SyncRuleState,
22
24
  utils
23
25
  } from '@powersync/service-core';
24
26
  import * as timers from 'node:timers/promises';
25
- import { idPrefixFilter } from '../../utils/util.js';
26
- import { PowerSyncMongo } from './db.js';
27
+ import { idPrefixFilter, mongoTableId } from '../../utils/util.js';
28
+ import { PowerSyncMongo, VersionedPowerSyncMongo } from './db.js';
27
29
  import { CurrentBucket, CurrentDataDocument, SourceKey, SyncRuleDocument } from './models.js';
28
30
  import { MongoIdSequence } from './MongoIdSequence.js';
29
31
  import { batchCreateCustomWriteCheckpoints } from './MongoWriteCheckpointAPI.js';
@@ -42,14 +44,15 @@ export const MAX_ROW_SIZE = 15 * 1024 * 1024;
42
44
  // In the future, we can investigate allowing multiple replication streams operating independently.
43
45
  const replicationMutex = new utils.Mutex();
44
46
 
47
+ export const EMPTY_DATA = new bson.Binary(bson.serialize({}));
48
+
45
49
  export interface MongoBucketBatchOptions {
46
- db: PowerSyncMongo;
50
+ db: VersionedPowerSyncMongo;
47
51
  syncRules: HydratedSyncRules;
48
52
  groupId: number;
49
53
  slotName: string;
50
54
  lastCheckpointLsn: string | null;
51
55
  keepaliveOp: InternalOpId | null;
52
- noCheckpointBeforeLsn: string;
53
56
  resumeFromLsn: string | null;
54
57
  storeCurrentData: boolean;
55
58
  /**
@@ -69,7 +72,7 @@ export class MongoBucketBatch
69
72
  private logger: Logger;
70
73
 
71
74
  private readonly client: mongo.MongoClient;
72
- public readonly db: PowerSyncMongo;
75
+ public readonly db: VersionedPowerSyncMongo;
73
76
  public readonly session: mongo.ClientSession;
74
77
  private readonly sync_rules: HydratedSyncRules;
75
78
 
@@ -93,8 +96,6 @@ export class MongoBucketBatch
93
96
  */
94
97
  private last_checkpoint_lsn: string | null = null;
95
98
 
96
- private no_checkpoint_before_lsn: string;
97
-
98
99
  private persisted_op: InternalOpId | null = null;
99
100
 
100
101
  /**
@@ -123,7 +124,6 @@ export class MongoBucketBatch
123
124
  this.db = options.db;
124
125
  this.group_id = options.groupId;
125
126
  this.last_checkpoint_lsn = options.lastCheckpointLsn;
126
- this.no_checkpoint_before_lsn = options.noCheckpointBeforeLsn;
127
127
  this.resumeFromLsn = options.resumeFromLsn;
128
128
  this.session = this.client.startSession();
129
129
  this.slot_name = options.slotName;
@@ -147,10 +147,6 @@ export class MongoBucketBatch
147
147
  return this.last_checkpoint_lsn;
148
148
  }
149
149
 
150
- get noCheckpointBeforeLsn() {
151
- return this.no_checkpoint_before_lsn;
152
- }
153
-
154
150
  async flush(options?: storage.BatchBucketFlushOptions): Promise<storage.FlushedResult | null> {
155
151
  let result: storage.FlushedResult | null = null;
156
152
  // One flush may be split over multiple transactions.
@@ -217,27 +213,28 @@ export class MongoBucketBatch
217
213
  // the order of processing, which then becomes really tricky to manage.
218
214
  // This now takes 2+ queries, but doesn't have any issues with order of operations.
219
215
  const sizeLookups: SourceKey[] = batch.batch.map((r) => {
220
- return { g: this.group_id, t: r.record.sourceTable.id, k: r.beforeId };
216
+ return { g: this.group_id, t: mongoTableId(r.record.sourceTable.id), k: r.beforeId };
221
217
  });
222
218
 
223
219
  sizes = new Map<string, number>();
224
220
 
225
- const sizeCursor: mongo.AggregationCursor<{ _id: SourceKey; size: number }> = this.db.current_data.aggregate(
226
- [
227
- {
228
- $match: {
229
- _id: { $in: sizeLookups }
230
- }
231
- },
232
- {
233
- $project: {
234
- _id: 1,
235
- size: { $bsonSize: '$$ROOT' }
221
+ const sizeCursor: mongo.AggregationCursor<{ _id: SourceKey; size: number }> =
222
+ this.db.common_current_data.aggregate(
223
+ [
224
+ {
225
+ $match: {
226
+ _id: { $in: sizeLookups }
227
+ }
228
+ },
229
+ {
230
+ $project: {
231
+ _id: 1,
232
+ size: { $bsonSize: '$$ROOT' }
233
+ }
236
234
  }
237
- }
238
- ],
239
- { session }
240
- );
235
+ ],
236
+ { session }
237
+ );
241
238
  for await (let doc of sizeCursor.stream()) {
242
239
  const key = cacheKey(doc._id.t, doc._id.k);
243
240
  sizes.set(key, doc.size);
@@ -260,12 +257,12 @@ export class MongoBucketBatch
260
257
  continue;
261
258
  }
262
259
  const lookups: SourceKey[] = b.map((r) => {
263
- return { g: this.group_id, t: r.record.sourceTable.id, k: r.beforeId };
260
+ return { g: this.group_id, t: mongoTableId(r.record.sourceTable.id), k: r.beforeId };
264
261
  });
265
262
  let current_data_lookup = new Map<string, CurrentDataDocument>();
266
263
  // With skipExistingRows, we only need to know whether or not the row exists.
267
264
  const projection = this.skipExistingRows ? { _id: 1 } : undefined;
268
- const cursor = this.db.current_data.find(
265
+ const cursor = this.db.common_current_data.find(
269
266
  {
270
267
  _id: { $in: lookups }
271
268
  },
@@ -275,7 +272,7 @@ export class MongoBucketBatch
275
272
  current_data_lookup.set(cacheKey(doc._id.t, doc._id.k), doc);
276
273
  }
277
274
 
278
- let persistedBatch: PersistedBatch | null = new PersistedBatch(this.group_id, transactionSize, {
275
+ let persistedBatch: PersistedBatch | null = new PersistedBatch(this.db, this.group_id, transactionSize, {
279
276
  logger: this.logger
280
277
  });
281
278
 
@@ -299,7 +296,7 @@ export class MongoBucketBatch
299
296
  if (persistedBatch!.shouldFlushTransaction()) {
300
297
  // Transaction is getting big.
301
298
  // Flush, and resume in a new transaction.
302
- const { flushedAny } = await persistedBatch!.flush(this.db, this.session, options);
299
+ const { flushedAny } = await persistedBatch!.flush(this.session, options);
303
300
  didFlush ||= flushedAny;
304
301
  persistedBatch = null;
305
302
  // Computing our current progress is a little tricky here, since
@@ -311,7 +308,7 @@ export class MongoBucketBatch
311
308
 
312
309
  if (persistedBatch) {
313
310
  transactionSize = persistedBatch.currentSize;
314
- const { flushedAny } = await persistedBatch.flush(this.db, this.session, options);
311
+ const { flushedAny } = await persistedBatch.flush(this.session, options);
315
312
  didFlush ||= flushedAny;
316
313
  }
317
314
  }
@@ -340,7 +337,7 @@ export class MongoBucketBatch
340
337
  let existing_lookups: bson.Binary[] = [];
341
338
  let new_lookups: bson.Binary[] = [];
342
339
 
343
- const before_key: SourceKey = { g: this.group_id, t: record.sourceTable.id, k: beforeId };
340
+ const before_key: SourceKey = { g: this.group_id, t: mongoTableId(record.sourceTable.id), k: beforeId };
344
341
 
345
342
  if (this.skipExistingRows) {
346
343
  if (record.tag == SaveOperationTag.INSERT) {
@@ -403,7 +400,7 @@ export class MongoBucketBatch
403
400
 
404
401
  let afterData: bson.Binary | undefined;
405
402
  if (afterId != null && !this.storeCurrentData) {
406
- afterData = new bson.Binary(bson.serialize({}));
403
+ afterData = EMPTY_DATA;
407
404
  } else if (afterId != null) {
408
405
  try {
409
406
  // This will fail immediately if the record is > 16MB.
@@ -551,7 +548,7 @@ export class MongoBucketBatch
551
548
  // 5. TOAST: Update current data and bucket list.
552
549
  if (afterId) {
553
550
  // Insert or update
554
- const after_key: SourceKey = { g: this.group_id, t: sourceTable.id, k: afterId };
551
+ const after_key: SourceKey = { g: this.group_id, t: mongoTableId(sourceTable.id), k: afterId };
555
552
  batch.upsertCurrentData(after_key, {
556
553
  data: afterData,
557
554
  buckets: new_buckets,
@@ -567,7 +564,10 @@ export class MongoBucketBatch
567
564
 
568
565
  if (afterId == null || !storage.replicaIdEquals(beforeId, afterId)) {
569
566
  // Either a delete (afterId == null), or replaced the old replication id
570
- batch.deleteCurrentData(before_key);
567
+ // Note that this is a soft delete.
568
+ // We don't specifically need a new or unique op_id here, but it must be greater than the
569
+ // last checkpoint, so we use next().
570
+ batch.softDeleteCurrentData(before_key, opSeq.next());
571
571
  }
572
572
  return result;
573
573
  }
@@ -664,75 +664,28 @@ export class MongoBucketBatch
664
664
  }
665
665
 
666
666
  async [Symbol.asyncDispose]() {
667
+ if (this.batch != null || this.write_checkpoint_batch.length > 0) {
668
+ // We don't error here, since:
669
+ // 1. In error states, this is expected (we can't distinguish between disposing after success or error).
670
+ // 2. SuppressedError is messy to deal with.
671
+ this.logger.warn('Disposing writer with unflushed changes');
672
+ }
667
673
  await this.session.endSession();
668
674
  super.clearListeners();
669
675
  }
670
676
 
677
+ async dispose() {
678
+ await this[Symbol.asyncDispose]();
679
+ }
680
+
671
681
  private lastWaitingLogThottled = 0;
672
682
 
673
- async commit(lsn: string, options?: storage.BucketBatchCommitOptions): Promise<boolean> {
683
+ async commit(lsn: string, options?: storage.BucketBatchCommitOptions): Promise<CheckpointResult> {
674
684
  const { createEmptyCheckpoints } = { ...storage.DEFAULT_BUCKET_BATCH_COMMIT_OPTIONS, ...options };
675
685
 
676
686
  await this.flush(options);
677
687
 
678
- if (this.last_checkpoint_lsn != null && lsn < this.last_checkpoint_lsn) {
679
- // When re-applying transactions, don't create a new checkpoint until
680
- // we are past the last transaction.
681
- this.logger.info(`Re-applied transaction ${lsn} - skipping checkpoint`);
682
- // Cannot create a checkpoint yet - return false
683
- return false;
684
- }
685
- if (lsn < this.no_checkpoint_before_lsn) {
686
- if (Date.now() - this.lastWaitingLogThottled > 5_000) {
687
- this.logger.info(
688
- `Waiting until ${this.no_checkpoint_before_lsn} before creating checkpoint, currently at ${lsn}. Persisted op: ${this.persisted_op}`
689
- );
690
- this.lastWaitingLogThottled = Date.now();
691
- }
692
-
693
- // Edge case: During initial replication, we have a no_checkpoint_before_lsn set,
694
- // and don't actually commit the snapshot.
695
- // The first commit can happen from an implicit keepalive message.
696
- // That needs the persisted_op to get an accurate checkpoint, so
697
- // we persist that in keepalive_op.
698
-
699
- await this.db.sync_rules.updateOne(
700
- {
701
- _id: this.group_id
702
- },
703
- {
704
- $set: {
705
- keepalive_op: this.persisted_op == null ? null : String(this.persisted_op)
706
- }
707
- },
708
- { session: this.session }
709
- );
710
- await this.db.notifyCheckpoint();
711
-
712
- // Cannot create a checkpoint yet - return false
713
- return false;
714
- }
715
-
716
- if (!createEmptyCheckpoints && this.persisted_op == null) {
717
- // Nothing to commit - also return true
718
- await this.autoActivate(lsn);
719
- return true;
720
- }
721
-
722
688
  const now = new Date();
723
- const update: Partial<SyncRuleDocument> = {
724
- last_checkpoint_lsn: lsn,
725
- last_checkpoint_ts: now,
726
- last_keepalive_ts: now,
727
- snapshot_done: true,
728
- last_fatal_error: null,
729
- last_fatal_error_ts: null,
730
- keepalive_op: null
731
- };
732
-
733
- if (this.persisted_op != null) {
734
- update.last_checkpoint = this.persisted_op;
735
- }
736
689
 
737
690
  // Mark relevant write checkpoints as "processed".
738
691
  // This makes it easier to identify write checkpoints that are "valid" in order.
@@ -751,21 +704,167 @@ export class MongoBucketBatch
751
704
  }
752
705
  );
753
706
 
754
- await this.db.sync_rules.updateOne(
755
- {
756
- _id: this.group_id
757
- },
707
+ const can_checkpoint = {
708
+ $and: [
709
+ { $eq: ['$snapshot_done', true] },
710
+ {
711
+ $or: [{ $eq: ['$last_checkpoint_lsn', null] }, { $lte: ['$last_checkpoint_lsn', { $literal: lsn }] }]
712
+ },
713
+ {
714
+ $or: [{ $eq: ['$no_checkpoint_before', null] }, { $lte: ['$no_checkpoint_before', { $literal: lsn }] }]
715
+ }
716
+ ]
717
+ };
718
+
719
+ const new_keepalive_op = {
720
+ $cond: [
721
+ can_checkpoint,
722
+ { $literal: null },
723
+ {
724
+ $toString: {
725
+ $max: [{ $toLong: '$keepalive_op' }, { $literal: this.persisted_op }, 0n]
726
+ }
727
+ }
728
+ ]
729
+ };
730
+
731
+ const new_last_checkpoint = {
732
+ $cond: [
733
+ can_checkpoint,
734
+ {
735
+ $max: ['$last_checkpoint', { $literal: this.persisted_op }, { $toLong: '$keepalive_op' }, 0n]
736
+ },
737
+ '$last_checkpoint'
738
+ ]
739
+ };
740
+
741
+ // For this query, we need to handle multiple cases, depending on the state:
742
+ // 1. Normal commit - advance last_checkpoint to this.persisted_op.
743
+ // 2. Commit delayed by no_checkpoint_before due to snapshot. In this case we only advance keepalive_op.
744
+ // 3. Commit with no new data - here may may set last_checkpoint = keepalive_op, if a delayed commit is relevant.
745
+ // We want to do as much as possible in a single atomic database operation, which makes this somewhat complex.
746
+ let preUpdateDocument = await this.db.sync_rules.findOneAndUpdate(
747
+ { _id: this.group_id },
748
+ [
749
+ {
750
+ $set: {
751
+ _can_checkpoint: can_checkpoint,
752
+ _not_empty: createEmptyCheckpoints
753
+ ? true
754
+ : {
755
+ $or: [
756
+ { $literal: createEmptyCheckpoints },
757
+ { $ne: ['$keepalive_op', new_keepalive_op] },
758
+ { $ne: ['$last_checkpoint', new_last_checkpoint] }
759
+ ]
760
+ }
761
+ }
762
+ },
763
+ {
764
+ $set: {
765
+ last_checkpoint_lsn: {
766
+ $cond: [{ $and: ['$_can_checkpoint', '$_not_empty'] }, { $literal: lsn }, '$last_checkpoint_lsn']
767
+ },
768
+ last_checkpoint_ts: {
769
+ $cond: [{ $and: ['$_can_checkpoint', '$_not_empty'] }, { $literal: now }, '$last_checkpoint_ts']
770
+ },
771
+ last_keepalive_ts: { $literal: now },
772
+ last_fatal_error: { $literal: null },
773
+ last_fatal_error_ts: { $literal: null },
774
+ keepalive_op: new_keepalive_op,
775
+ last_checkpoint: new_last_checkpoint,
776
+ // Unset snapshot_lsn on checkpoint
777
+ snapshot_lsn: {
778
+ $cond: [{ $and: ['$_can_checkpoint', '$_not_empty'] }, { $literal: null }, '$snapshot_lsn']
779
+ }
780
+ }
781
+ },
782
+ {
783
+ $unset: ['_can_checkpoint', '_not_empty']
784
+ }
785
+ ],
758
786
  {
759
- $set: update,
760
- $unset: { snapshot_lsn: 1 }
761
- },
762
- { session: this.session }
787
+ session: this.session,
788
+ // We return the before document, so that we can check the previous state to determine if a checkpoint was actually created or if we were blocked by snapshot/no_checkpoint_before.
789
+ returnDocument: 'before',
790
+ projection: {
791
+ snapshot_done: 1,
792
+ last_checkpoint_lsn: 1,
793
+ no_checkpoint_before: 1,
794
+ keepalive_op: 1,
795
+ last_checkpoint: 1
796
+ }
797
+ }
763
798
  );
764
- await this.autoActivate(lsn);
765
- await this.db.notifyCheckpoint();
766
- this.persisted_op = null;
767
- this.last_checkpoint_lsn = lsn;
768
- return true;
799
+
800
+ if (preUpdateDocument == null) {
801
+ throw new ReplicationAssertionError(
802
+ 'Failed to update checkpoint - no matching sync_rules document for _id: ' + this.group_id
803
+ );
804
+ }
805
+
806
+ // This re-implements the same logic as in the pipeline, to determine what was actually updated.
807
+ // Unfortunately we cannot return these from the pipeline directly, so we need to re-implement the logic.
808
+ const canCheckpoint =
809
+ preUpdateDocument.snapshot_done === true &&
810
+ (preUpdateDocument.last_checkpoint_lsn == null || preUpdateDocument.last_checkpoint_lsn <= lsn) &&
811
+ (preUpdateDocument.no_checkpoint_before == null || preUpdateDocument.no_checkpoint_before <= lsn);
812
+
813
+ const keepaliveOp = preUpdateDocument.keepalive_op == null ? null : BigInt(preUpdateDocument.keepalive_op);
814
+ const maxKeepalive = [keepaliveOp ?? 0n, this.persisted_op ?? 0n, 0n].reduce((a, b) => (a > b ? a : b));
815
+ const newKeepaliveOp = canCheckpoint ? null : maxKeepalive.toString();
816
+ const newLastCheckpoint = canCheckpoint
817
+ ? [preUpdateDocument.last_checkpoint ?? 0n, this.persisted_op ?? 0n, keepaliveOp ?? 0n, 0n].reduce((a, b) =>
818
+ a > b ? a : b
819
+ )
820
+ : preUpdateDocument.last_checkpoint;
821
+ const notEmpty =
822
+ createEmptyCheckpoints ||
823
+ preUpdateDocument.keepalive_op !== newKeepaliveOp ||
824
+ preUpdateDocument.last_checkpoint !== newLastCheckpoint;
825
+ const checkpointCreated = canCheckpoint && notEmpty;
826
+
827
+ const checkpointBlocked = !canCheckpoint;
828
+
829
+ if (checkpointBlocked) {
830
+ // Failed on snapshot_done or no_checkpoint_before.
831
+ if (Date.now() - this.lastWaitingLogThottled > 5_000) {
832
+ this.logger.info(
833
+ `Waiting before creating checkpoint, currently at ${lsn} / ${preUpdateDocument.keepalive_op}. Current state: ${JSON.stringify(
834
+ {
835
+ snapshot_done: preUpdateDocument.snapshot_done,
836
+ last_checkpoint_lsn: preUpdateDocument.last_checkpoint_lsn,
837
+ no_checkpoint_before: preUpdateDocument.no_checkpoint_before
838
+ }
839
+ )}`
840
+ );
841
+ this.lastWaitingLogThottled = Date.now();
842
+ }
843
+ } else {
844
+ if (checkpointCreated) {
845
+ this.logger.debug(`Created checkpoint at ${lsn} / ${newLastCheckpoint}`);
846
+ }
847
+ await this.autoActivate(lsn);
848
+ await this.db.notifyCheckpoint();
849
+ this.persisted_op = null;
850
+ this.last_checkpoint_lsn = lsn;
851
+ if (this.db.storageConfig.softDeleteCurrentData && newLastCheckpoint != null) {
852
+ await this.cleanupCurrentData(newLastCheckpoint);
853
+ }
854
+ }
855
+ return { checkpointBlocked, checkpointCreated };
856
+ }
857
+
858
+ private async cleanupCurrentData(lastCheckpoint: bigint) {
859
+ const result = await this.db.v3_current_data.deleteMany({
860
+ '_id.g': this.group_id,
861
+ pending_delete: { $exists: true, $lte: lastCheckpoint }
862
+ });
863
+ if (result.deletedCount > 0) {
864
+ this.logger.info(
865
+ `Cleaned up ${result.deletedCount} pending delete current_data records for checkpoint ${lastCheckpoint}`
866
+ );
867
+ }
769
868
  }
770
869
 
771
870
  /**
@@ -785,7 +884,7 @@ export class MongoBucketBatch
785
884
  let activated = false;
786
885
  await session.withTransaction(async () => {
787
886
  const doc = await this.db.sync_rules.findOne({ _id: this.group_id }, { session });
788
- if (doc && doc.state == 'PROCESSING') {
887
+ if (doc && doc.state == SyncRuleState.PROCESSING && doc.snapshot_done && doc.last_checkpoint != null) {
789
888
  await this.db.sync_rules.updateOne(
790
889
  {
791
890
  _id: this.group_id
@@ -811,68 +910,19 @@ export class MongoBucketBatch
811
910
  { session }
812
911
  );
813
912
  activated = true;
913
+ } else if (doc?.state != SyncRuleState.PROCESSING) {
914
+ this.needsActivation = false;
814
915
  }
815
916
  });
816
917
  if (activated) {
817
918
  this.logger.info(`Activated new sync rules at ${lsn}`);
818
919
  await this.db.notifyCheckpoint();
920
+ this.needsActivation = false;
819
921
  }
820
- this.needsActivation = false;
821
922
  }
822
923
 
823
- async keepalive(lsn: string): Promise<boolean> {
824
- if (this.last_checkpoint_lsn != null && lsn < this.last_checkpoint_lsn) {
825
- // No-op
826
- return false;
827
- }
828
-
829
- if (lsn < this.no_checkpoint_before_lsn) {
830
- return false;
831
- }
832
-
833
- if (this.persisted_op != null) {
834
- // The commit may have been skipped due to "no_checkpoint_before_lsn".
835
- // Apply it now if relevant
836
- this.logger.info(`Commit due to keepalive at ${lsn} / ${this.persisted_op}`);
837
- return await this.commit(lsn);
838
- }
839
-
840
- await this.db.write_checkpoints.updateMany(
841
- {
842
- processed_at_lsn: null,
843
- 'lsns.1': { $lte: lsn }
844
- },
845
- {
846
- $set: {
847
- processed_at_lsn: lsn
848
- }
849
- },
850
- {
851
- session: this.session
852
- }
853
- );
854
-
855
- await this.db.sync_rules.updateOne(
856
- {
857
- _id: this.group_id
858
- },
859
- {
860
- $set: {
861
- last_checkpoint_lsn: lsn,
862
- snapshot_done: true,
863
- last_fatal_error: null,
864
- last_fatal_error_ts: null,
865
- last_keepalive_ts: new Date()
866
- },
867
- $unset: { snapshot_lsn: 1 }
868
- },
869
- { session: this.session }
870
- );
871
- await this.autoActivate(lsn);
872
- await this.db.notifyCheckpoint();
873
- this.last_checkpoint_lsn = lsn;
874
-
875
- return true;
924
+ async keepalive(lsn: string): Promise<CheckpointResult> {
925
+ return await this.commit(lsn, { createEmptyCheckpoints: true });
876
926
  }
877
927
 
878
928
  async setResumeLsn(lsn: string): Promise<void> {
@@ -938,7 +988,7 @@ export class MongoBucketBatch
938
988
 
939
989
  await this.withTransaction(async () => {
940
990
  for (let table of sourceTables) {
941
- await this.db.source_tables.deleteOne({ _id: table.id });
991
+ await this.db.source_tables.deleteOne({ _id: mongoTableId(table.id) });
942
992
  }
943
993
  });
944
994
  return result;
@@ -973,10 +1023,13 @@ export class MongoBucketBatch
973
1023
  while (lastBatchCount == BATCH_LIMIT) {
974
1024
  await this.withReplicationTransaction(`Truncate ${sourceTable.qualifiedName}`, async (session, opSeq) => {
975
1025
  const current_data_filter: mongo.Filter<CurrentDataDocument> = {
976
- _id: idPrefixFilter<SourceKey>({ g: this.group_id, t: sourceTable.id }, ['k'])
1026
+ _id: idPrefixFilter<SourceKey>({ g: this.group_id, t: mongoTableId(sourceTable.id) }, ['k']),
1027
+ // Skip soft-deleted data
1028
+ // Works for both v1 and v3 current_data schemas
1029
+ pending_delete: { $exists: false }
977
1030
  };
978
1031
 
979
- const cursor = this.db.current_data.find(current_data_filter, {
1032
+ const cursor = this.db.common_current_data.find(current_data_filter, {
980
1033
  projection: {
981
1034
  _id: 1,
982
1035
  buckets: 1,
@@ -986,7 +1039,7 @@ export class MongoBucketBatch
986
1039
  session: session
987
1040
  });
988
1041
  const batch = await cursor.toArray();
989
- const persistedBatch = new PersistedBatch(this.group_id, 0, { logger: this.logger });
1042
+ const persistedBatch = new PersistedBatch(this.db, this.group_id, 0, { logger: this.logger });
990
1043
 
991
1044
  for (let value of batch) {
992
1045
  persistedBatch.saveBucketData({
@@ -1004,9 +1057,10 @@ export class MongoBucketBatch
1004
1057
  sourceKey: value._id.k
1005
1058
  });
1006
1059
 
1007
- persistedBatch.deleteCurrentData(value._id);
1060
+ // Since this is not from streaming replication, we can do a hard delete
1061
+ persistedBatch.hardDeleteCurrentData(value._id);
1008
1062
  }
1009
- await persistedBatch.flush(this.db, session);
1063
+ await persistedBatch.flush(session);
1010
1064
  lastBatchCount = batch.length;
1011
1065
 
1012
1066
  last_op = opSeq.last();
@@ -1030,7 +1084,7 @@ export class MongoBucketBatch
1030
1084
 
1031
1085
  await this.withTransaction(async () => {
1032
1086
  await this.db.source_tables.updateOne(
1033
- { _id: table.id },
1087
+ { _id: mongoTableId(table.id) },
1034
1088
  {
1035
1089
  $set: {
1036
1090
  snapshot_status: {
@@ -1047,9 +1101,41 @@ export class MongoBucketBatch
1047
1101
  return copy;
1048
1102
  }
1049
1103
 
1050
- async markSnapshotDone(tables: storage.SourceTable[], no_checkpoint_before_lsn: string) {
1104
+ async markAllSnapshotDone(no_checkpoint_before_lsn: string) {
1105
+ await this.db.sync_rules.updateOne(
1106
+ {
1107
+ _id: this.group_id
1108
+ },
1109
+ {
1110
+ $set: {
1111
+ snapshot_done: true,
1112
+ last_keepalive_ts: new Date()
1113
+ },
1114
+ $max: {
1115
+ no_checkpoint_before: no_checkpoint_before_lsn
1116
+ }
1117
+ },
1118
+ { session: this.session }
1119
+ );
1120
+ }
1121
+
1122
+ async markTableSnapshotRequired(table: storage.SourceTable): Promise<void> {
1123
+ await this.db.sync_rules.updateOne(
1124
+ {
1125
+ _id: this.group_id
1126
+ },
1127
+ {
1128
+ $set: {
1129
+ snapshot_done: false
1130
+ }
1131
+ },
1132
+ { session: this.session }
1133
+ );
1134
+ }
1135
+
1136
+ async markTableSnapshotDone(tables: storage.SourceTable[], no_checkpoint_before_lsn?: string) {
1051
1137
  const session = this.session;
1052
- const ids = tables.map((table) => table.id);
1138
+ const ids = tables.map((table) => mongoTableId(table.id));
1053
1139
 
1054
1140
  await this.withTransaction(async () => {
1055
1141
  await this.db.source_tables.updateMany(
@@ -1065,17 +1151,17 @@ export class MongoBucketBatch
1065
1151
  { session }
1066
1152
  );
1067
1153
 
1068
- if (no_checkpoint_before_lsn > this.no_checkpoint_before_lsn) {
1069
- this.no_checkpoint_before_lsn = no_checkpoint_before_lsn;
1070
-
1154
+ if (no_checkpoint_before_lsn != null) {
1071
1155
  await this.db.sync_rules.updateOne(
1072
1156
  {
1073
1157
  _id: this.group_id
1074
1158
  },
1075
1159
  {
1076
1160
  $set: {
1077
- no_checkpoint_before: no_checkpoint_before_lsn,
1078
1161
  last_keepalive_ts: new Date()
1162
+ },
1163
+ $max: {
1164
+ no_checkpoint_before: no_checkpoint_before_lsn
1079
1165
  }
1080
1166
  },
1081
1167
  { session: this.session }