@powersync/service-module-mongodb-storage 0.13.2 → 0.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. package/CHANGELOG.md +51 -0
  2. package/dist/migrations/db/migrations/1770213298299-storage-version.d.ts +3 -0
  3. package/dist/migrations/db/migrations/1770213298299-storage-version.js +29 -0
  4. package/dist/migrations/db/migrations/1770213298299-storage-version.js.map +1 -0
  5. package/dist/storage/MongoBucketStorage.d.ts +7 -15
  6. package/dist/storage/MongoBucketStorage.js +28 -53
  7. package/dist/storage/MongoBucketStorage.js.map +1 -1
  8. package/dist/storage/implementation/MongoBucketBatch.d.ts +12 -11
  9. package/dist/storage/implementation/MongoBucketBatch.js +199 -127
  10. package/dist/storage/implementation/MongoBucketBatch.js.map +1 -1
  11. package/dist/storage/implementation/MongoChecksums.d.ts +8 -5
  12. package/dist/storage/implementation/MongoChecksums.js +8 -4
  13. package/dist/storage/implementation/MongoChecksums.js.map +1 -1
  14. package/dist/storage/implementation/MongoCompactor.d.ts +2 -2
  15. package/dist/storage/implementation/MongoCompactor.js +52 -26
  16. package/dist/storage/implementation/MongoCompactor.js.map +1 -1
  17. package/dist/storage/implementation/MongoParameterCompactor.d.ts +2 -2
  18. package/dist/storage/implementation/MongoParameterCompactor.js.map +1 -1
  19. package/dist/storage/implementation/MongoPersistedSyncRulesContent.d.ts +2 -12
  20. package/dist/storage/implementation/MongoPersistedSyncRulesContent.js +20 -25
  21. package/dist/storage/implementation/MongoPersistedSyncRulesContent.js.map +1 -1
  22. package/dist/storage/implementation/MongoSyncBucketStorage.d.ts +7 -4
  23. package/dist/storage/implementation/MongoSyncBucketStorage.js +11 -8
  24. package/dist/storage/implementation/MongoSyncBucketStorage.js.map +1 -1
  25. package/dist/storage/implementation/MongoSyncRulesLock.d.ts +3 -3
  26. package/dist/storage/implementation/MongoSyncRulesLock.js.map +1 -1
  27. package/dist/storage/implementation/MongoWriteCheckpointAPI.d.ts +4 -4
  28. package/dist/storage/implementation/MongoWriteCheckpointAPI.js.map +1 -1
  29. package/dist/storage/implementation/OperationBatch.js +3 -2
  30. package/dist/storage/implementation/OperationBatch.js.map +1 -1
  31. package/dist/storage/implementation/PersistedBatch.d.ts +11 -4
  32. package/dist/storage/implementation/PersistedBatch.js +42 -11
  33. package/dist/storage/implementation/PersistedBatch.js.map +1 -1
  34. package/dist/storage/implementation/db.d.ts +35 -1
  35. package/dist/storage/implementation/db.js +99 -0
  36. package/dist/storage/implementation/db.js.map +1 -1
  37. package/dist/storage/implementation/models.d.ts +25 -1
  38. package/dist/storage/implementation/models.js +10 -1
  39. package/dist/storage/implementation/models.js.map +1 -1
  40. package/dist/storage/storage-index.d.ts +0 -1
  41. package/dist/storage/storage-index.js +0 -1
  42. package/dist/storage/storage-index.js.map +1 -1
  43. package/dist/utils/test-utils.d.ts +7 -5
  44. package/dist/utils/test-utils.js +17 -14
  45. package/dist/utils/test-utils.js.map +1 -1
  46. package/dist/utils/util.d.ts +2 -1
  47. package/dist/utils/util.js +15 -1
  48. package/dist/utils/util.js.map +1 -1
  49. package/package.json +7 -7
  50. package/src/migrations/db/migrations/1770213298299-storage-version.ts +44 -0
  51. package/src/storage/MongoBucketStorage.ts +44 -61
  52. package/src/storage/implementation/MongoBucketBatch.ts +253 -177
  53. package/src/storage/implementation/MongoChecksums.ts +19 -9
  54. package/src/storage/implementation/MongoCompactor.ts +62 -31
  55. package/src/storage/implementation/MongoParameterCompactor.ts +3 -3
  56. package/src/storage/implementation/MongoPersistedSyncRulesContent.ts +20 -34
  57. package/src/storage/implementation/MongoSyncBucketStorage.ts +32 -17
  58. package/src/storage/implementation/MongoSyncRulesLock.ts +3 -3
  59. package/src/storage/implementation/MongoWriteCheckpointAPI.ts +4 -4
  60. package/src/storage/implementation/OperationBatch.ts +3 -2
  61. package/src/storage/implementation/PersistedBatch.ts +42 -11
  62. package/src/storage/implementation/db.ts +129 -1
  63. package/src/storage/implementation/models.ts +39 -1
  64. package/src/storage/storage-index.ts +0 -1
  65. package/src/utils/test-utils.ts +18 -16
  66. package/src/utils/util.ts +17 -2
  67. package/test/src/__snapshots__/storage.test.ts.snap +198 -22
  68. package/test/src/__snapshots__/storage_compacting.test.ts.snap +17 -0
  69. package/test/src/__snapshots__/storage_sync.test.ts.snap +2211 -21
  70. package/test/src/storage.test.ts +9 -7
  71. package/test/src/storage_compacting.test.ts +33 -24
  72. package/test/src/storage_sync.test.ts +31 -15
  73. package/test/src/util.ts +4 -1
  74. package/tsconfig.tsbuildinfo +1 -1
  75. package/dist/storage/implementation/MongoPersistedSyncRules.d.ts +0 -10
  76. package/dist/storage/implementation/MongoPersistedSyncRules.js +0 -17
  77. package/dist/storage/implementation/MongoPersistedSyncRules.js.map +0 -1
  78. package/src/storage/implementation/MongoPersistedSyncRules.ts +0 -20
@@ -1,5 +1,5 @@
1
1
  import { mongo } from '@powersync/lib-service-mongodb';
2
- import { SqlEventDescriptor, SqliteRow, SqliteValue, HydratedSyncRules } from '@powersync/service-sync-rules';
2
+ import { HydratedSyncRules, SqlEventDescriptor, SqliteRow, SqliteValue } from '@powersync/service-sync-rules';
3
3
  import * as bson from 'bson';
4
4
 
5
5
  import {
@@ -14,16 +14,18 @@ import {
14
14
  } from '@powersync/lib-services-framework';
15
15
  import {
16
16
  BucketStorageMarkRecordUnavailable,
17
+ CheckpointResult,
17
18
  deserializeBson,
18
19
  InternalOpId,
19
20
  isCompleteRow,
20
21
  SaveOperationTag,
21
22
  storage,
23
+ SyncRuleState,
22
24
  utils
23
25
  } from '@powersync/service-core';
24
26
  import * as timers from 'node:timers/promises';
25
- import { idPrefixFilter } from '../../utils/util.js';
26
- import { PowerSyncMongo } from './db.js';
27
+ import { idPrefixFilter, mongoTableId } from '../../utils/util.js';
28
+ import { PowerSyncMongo, VersionedPowerSyncMongo } from './db.js';
27
29
  import { CurrentBucket, CurrentDataDocument, SourceKey, SyncRuleDocument } from './models.js';
28
30
  import { MongoIdSequence } from './MongoIdSequence.js';
29
31
  import { batchCreateCustomWriteCheckpoints } from './MongoWriteCheckpointAPI.js';
@@ -42,14 +44,15 @@ export const MAX_ROW_SIZE = 15 * 1024 * 1024;
42
44
  // In the future, we can investigate allowing multiple replication streams operating independently.
43
45
  const replicationMutex = new utils.Mutex();
44
46
 
47
+ export const EMPTY_DATA = new bson.Binary(bson.serialize({}));
48
+
45
49
  export interface MongoBucketBatchOptions {
46
- db: PowerSyncMongo;
50
+ db: VersionedPowerSyncMongo;
47
51
  syncRules: HydratedSyncRules;
48
52
  groupId: number;
49
53
  slotName: string;
50
54
  lastCheckpointLsn: string | null;
51
55
  keepaliveOp: InternalOpId | null;
52
- noCheckpointBeforeLsn: string;
53
56
  resumeFromLsn: string | null;
54
57
  storeCurrentData: boolean;
55
58
  /**
@@ -69,7 +72,7 @@ export class MongoBucketBatch
69
72
  private logger: Logger;
70
73
 
71
74
  private readonly client: mongo.MongoClient;
72
- public readonly db: PowerSyncMongo;
75
+ public readonly db: VersionedPowerSyncMongo;
73
76
  public readonly session: mongo.ClientSession;
74
77
  private readonly sync_rules: HydratedSyncRules;
75
78
 
@@ -93,8 +96,6 @@ export class MongoBucketBatch
93
96
  */
94
97
  private last_checkpoint_lsn: string | null = null;
95
98
 
96
- private no_checkpoint_before_lsn: string;
97
-
98
99
  private persisted_op: InternalOpId | null = null;
99
100
 
100
101
  /**
@@ -123,7 +124,6 @@ export class MongoBucketBatch
123
124
  this.db = options.db;
124
125
  this.group_id = options.groupId;
125
126
  this.last_checkpoint_lsn = options.lastCheckpointLsn;
126
- this.no_checkpoint_before_lsn = options.noCheckpointBeforeLsn;
127
127
  this.resumeFromLsn = options.resumeFromLsn;
128
128
  this.session = this.client.startSession();
129
129
  this.slot_name = options.slotName;
@@ -147,10 +147,6 @@ export class MongoBucketBatch
147
147
  return this.last_checkpoint_lsn;
148
148
  }
149
149
 
150
- get noCheckpointBeforeLsn() {
151
- return this.no_checkpoint_before_lsn;
152
- }
153
-
154
150
  async flush(options?: storage.BatchBucketFlushOptions): Promise<storage.FlushedResult | null> {
155
151
  let result: storage.FlushedResult | null = null;
156
152
  // One flush may be split over multiple transactions.
@@ -217,27 +213,28 @@ export class MongoBucketBatch
217
213
  // the order of processing, which then becomes really tricky to manage.
218
214
  // This now takes 2+ queries, but doesn't have any issues with order of operations.
219
215
  const sizeLookups: SourceKey[] = batch.batch.map((r) => {
220
- return { g: this.group_id, t: r.record.sourceTable.id, k: r.beforeId };
216
+ return { g: this.group_id, t: mongoTableId(r.record.sourceTable.id), k: r.beforeId };
221
217
  });
222
218
 
223
219
  sizes = new Map<string, number>();
224
220
 
225
- const sizeCursor: mongo.AggregationCursor<{ _id: SourceKey; size: number }> = this.db.current_data.aggregate(
226
- [
227
- {
228
- $match: {
229
- _id: { $in: sizeLookups }
230
- }
231
- },
232
- {
233
- $project: {
234
- _id: 1,
235
- size: { $bsonSize: '$$ROOT' }
221
+ const sizeCursor: mongo.AggregationCursor<{ _id: SourceKey; size: number }> =
222
+ this.db.common_current_data.aggregate(
223
+ [
224
+ {
225
+ $match: {
226
+ _id: { $in: sizeLookups }
227
+ }
228
+ },
229
+ {
230
+ $project: {
231
+ _id: 1,
232
+ size: { $bsonSize: '$$ROOT' }
233
+ }
236
234
  }
237
- }
238
- ],
239
- { session }
240
- );
235
+ ],
236
+ { session }
237
+ );
241
238
  for await (let doc of sizeCursor.stream()) {
242
239
  const key = cacheKey(doc._id.t, doc._id.k);
243
240
  sizes.set(key, doc.size);
@@ -260,12 +257,12 @@ export class MongoBucketBatch
260
257
  continue;
261
258
  }
262
259
  const lookups: SourceKey[] = b.map((r) => {
263
- return { g: this.group_id, t: r.record.sourceTable.id, k: r.beforeId };
260
+ return { g: this.group_id, t: mongoTableId(r.record.sourceTable.id), k: r.beforeId };
264
261
  });
265
262
  let current_data_lookup = new Map<string, CurrentDataDocument>();
266
263
  // With skipExistingRows, we only need to know whether or not the row exists.
267
264
  const projection = this.skipExistingRows ? { _id: 1 } : undefined;
268
- const cursor = this.db.current_data.find(
265
+ const cursor = this.db.common_current_data.find(
269
266
  {
270
267
  _id: { $in: lookups }
271
268
  },
@@ -275,7 +272,7 @@ export class MongoBucketBatch
275
272
  current_data_lookup.set(cacheKey(doc._id.t, doc._id.k), doc);
276
273
  }
277
274
 
278
- let persistedBatch: PersistedBatch | null = new PersistedBatch(this.group_id, transactionSize, {
275
+ let persistedBatch: PersistedBatch | null = new PersistedBatch(this.db, this.group_id, transactionSize, {
279
276
  logger: this.logger
280
277
  });
281
278
 
@@ -299,7 +296,7 @@ export class MongoBucketBatch
299
296
  if (persistedBatch!.shouldFlushTransaction()) {
300
297
  // Transaction is getting big.
301
298
  // Flush, and resume in a new transaction.
302
- const { flushedAny } = await persistedBatch!.flush(this.db, this.session, options);
299
+ const { flushedAny } = await persistedBatch!.flush(this.session, options);
303
300
  didFlush ||= flushedAny;
304
301
  persistedBatch = null;
305
302
  // Computing our current progress is a little tricky here, since
@@ -311,7 +308,7 @@ export class MongoBucketBatch
311
308
 
312
309
  if (persistedBatch) {
313
310
  transactionSize = persistedBatch.currentSize;
314
- const { flushedAny } = await persistedBatch.flush(this.db, this.session, options);
311
+ const { flushedAny } = await persistedBatch.flush(this.session, options);
315
312
  didFlush ||= flushedAny;
316
313
  }
317
314
  }
@@ -340,7 +337,7 @@ export class MongoBucketBatch
340
337
  let existing_lookups: bson.Binary[] = [];
341
338
  let new_lookups: bson.Binary[] = [];
342
339
 
343
- const before_key: SourceKey = { g: this.group_id, t: record.sourceTable.id, k: beforeId };
340
+ const before_key: SourceKey = { g: this.group_id, t: mongoTableId(record.sourceTable.id), k: beforeId };
344
341
 
345
342
  if (this.skipExistingRows) {
346
343
  if (record.tag == SaveOperationTag.INSERT) {
@@ -403,7 +400,7 @@ export class MongoBucketBatch
403
400
 
404
401
  let afterData: bson.Binary | undefined;
405
402
  if (afterId != null && !this.storeCurrentData) {
406
- afterData = new bson.Binary(bson.serialize({}));
403
+ afterData = EMPTY_DATA;
407
404
  } else if (afterId != null) {
408
405
  try {
409
406
  // This will fail immediately if the record is > 16MB.
@@ -551,7 +548,7 @@ export class MongoBucketBatch
551
548
  // 5. TOAST: Update current data and bucket list.
552
549
  if (afterId) {
553
550
  // Insert or update
554
- const after_key: SourceKey = { g: this.group_id, t: sourceTable.id, k: afterId };
551
+ const after_key: SourceKey = { g: this.group_id, t: mongoTableId(sourceTable.id), k: afterId };
555
552
  batch.upsertCurrentData(after_key, {
556
553
  data: afterData,
557
554
  buckets: new_buckets,
@@ -567,7 +564,10 @@ export class MongoBucketBatch
567
564
 
568
565
  if (afterId == null || !storage.replicaIdEquals(beforeId, afterId)) {
569
566
  // Either a delete (afterId == null), or replaced the old replication id
570
- batch.deleteCurrentData(before_key);
567
+ // Note that this is a soft delete.
568
+ // We don't specifically need a new or unique op_id here, but it must be greater than the
569
+ // last checkpoint, so we use next().
570
+ batch.softDeleteCurrentData(before_key, opSeq.next());
571
571
  }
572
572
  return result;
573
573
  }
@@ -670,69 +670,12 @@ export class MongoBucketBatch
670
670
 
671
671
  private lastWaitingLogThottled = 0;
672
672
 
673
- async commit(lsn: string, options?: storage.BucketBatchCommitOptions): Promise<boolean> {
673
+ async commit(lsn: string, options?: storage.BucketBatchCommitOptions): Promise<CheckpointResult> {
674
674
  const { createEmptyCheckpoints } = { ...storage.DEFAULT_BUCKET_BATCH_COMMIT_OPTIONS, ...options };
675
675
 
676
676
  await this.flush(options);
677
677
 
678
- if (this.last_checkpoint_lsn != null && lsn < this.last_checkpoint_lsn) {
679
- // When re-applying transactions, don't create a new checkpoint until
680
- // we are past the last transaction.
681
- this.logger.info(`Re-applied transaction ${lsn} - skipping checkpoint`);
682
- // Cannot create a checkpoint yet - return false
683
- return false;
684
- }
685
- if (lsn < this.no_checkpoint_before_lsn) {
686
- if (Date.now() - this.lastWaitingLogThottled > 5_000) {
687
- this.logger.info(
688
- `Waiting until ${this.no_checkpoint_before_lsn} before creating checkpoint, currently at ${lsn}. Persisted op: ${this.persisted_op}`
689
- );
690
- this.lastWaitingLogThottled = Date.now();
691
- }
692
-
693
- // Edge case: During initial replication, we have a no_checkpoint_before_lsn set,
694
- // and don't actually commit the snapshot.
695
- // The first commit can happen from an implicit keepalive message.
696
- // That needs the persisted_op to get an accurate checkpoint, so
697
- // we persist that in keepalive_op.
698
-
699
- await this.db.sync_rules.updateOne(
700
- {
701
- _id: this.group_id
702
- },
703
- {
704
- $set: {
705
- keepalive_op: this.persisted_op == null ? null : String(this.persisted_op)
706
- }
707
- },
708
- { session: this.session }
709
- );
710
- await this.db.notifyCheckpoint();
711
-
712
- // Cannot create a checkpoint yet - return false
713
- return false;
714
- }
715
-
716
- if (!createEmptyCheckpoints && this.persisted_op == null) {
717
- // Nothing to commit - also return true
718
- await this.autoActivate(lsn);
719
- return true;
720
- }
721
-
722
678
  const now = new Date();
723
- const update: Partial<SyncRuleDocument> = {
724
- last_checkpoint_lsn: lsn,
725
- last_checkpoint_ts: now,
726
- last_keepalive_ts: now,
727
- snapshot_done: true,
728
- last_fatal_error: null,
729
- last_fatal_error_ts: null,
730
- keepalive_op: null
731
- };
732
-
733
- if (this.persisted_op != null) {
734
- update.last_checkpoint = this.persisted_op;
735
- }
736
679
 
737
680
  // Mark relevant write checkpoints as "processed".
738
681
  // This makes it easier to identify write checkpoints that are "valid" in order.
@@ -751,21 +694,167 @@ export class MongoBucketBatch
751
694
  }
752
695
  );
753
696
 
754
- await this.db.sync_rules.updateOne(
755
- {
756
- _id: this.group_id
757
- },
697
+ const can_checkpoint = {
698
+ $and: [
699
+ { $eq: ['$snapshot_done', true] },
700
+ {
701
+ $or: [{ $eq: ['$last_checkpoint_lsn', null] }, { $lte: ['$last_checkpoint_lsn', { $literal: lsn }] }]
702
+ },
703
+ {
704
+ $or: [{ $eq: ['$no_checkpoint_before', null] }, { $lte: ['$no_checkpoint_before', { $literal: lsn }] }]
705
+ }
706
+ ]
707
+ };
708
+
709
+ const new_keepalive_op = {
710
+ $cond: [
711
+ can_checkpoint,
712
+ { $literal: null },
713
+ {
714
+ $toString: {
715
+ $max: [{ $toLong: '$keepalive_op' }, { $literal: this.persisted_op }, 0n]
716
+ }
717
+ }
718
+ ]
719
+ };
720
+
721
+ const new_last_checkpoint = {
722
+ $cond: [
723
+ can_checkpoint,
724
+ {
725
+ $max: ['$last_checkpoint', { $literal: this.persisted_op }, { $toLong: '$keepalive_op' }, 0n]
726
+ },
727
+ '$last_checkpoint'
728
+ ]
729
+ };
730
+
731
+ // For this query, we need to handle multiple cases, depending on the state:
732
+ // 1. Normal commit - advance last_checkpoint to this.persisted_op.
733
+ // 2. Commit delayed by no_checkpoint_before due to snapshot. In this case we only advance keepalive_op.
734
+ // 3. Commit with no new data - here may may set last_checkpoint = keepalive_op, if a delayed commit is relevant.
735
+ // We want to do as much as possible in a single atomic database operation, which makes this somewhat complex.
736
+ let preUpdateDocument = await this.db.sync_rules.findOneAndUpdate(
737
+ { _id: this.group_id },
738
+ [
739
+ {
740
+ $set: {
741
+ _can_checkpoint: can_checkpoint,
742
+ _not_empty: createEmptyCheckpoints
743
+ ? true
744
+ : {
745
+ $or: [
746
+ { $literal: createEmptyCheckpoints },
747
+ { $ne: ['$keepalive_op', new_keepalive_op] },
748
+ { $ne: ['$last_checkpoint', new_last_checkpoint] }
749
+ ]
750
+ }
751
+ }
752
+ },
753
+ {
754
+ $set: {
755
+ last_checkpoint_lsn: {
756
+ $cond: [{ $and: ['$_can_checkpoint', '$_not_empty'] }, { $literal: lsn }, '$last_checkpoint_lsn']
757
+ },
758
+ last_checkpoint_ts: {
759
+ $cond: [{ $and: ['$_can_checkpoint', '$_not_empty'] }, { $literal: now }, '$last_checkpoint_ts']
760
+ },
761
+ last_keepalive_ts: { $literal: now },
762
+ last_fatal_error: { $literal: null },
763
+ last_fatal_error_ts: { $literal: null },
764
+ keepalive_op: new_keepalive_op,
765
+ last_checkpoint: new_last_checkpoint,
766
+ // Unset snapshot_lsn on checkpoint
767
+ snapshot_lsn: {
768
+ $cond: [{ $and: ['$_can_checkpoint', '$_not_empty'] }, { $literal: null }, '$snapshot_lsn']
769
+ }
770
+ }
771
+ },
772
+ {
773
+ $unset: ['_can_checkpoint', '_not_empty']
774
+ }
775
+ ],
758
776
  {
759
- $set: update,
760
- $unset: { snapshot_lsn: 1 }
761
- },
762
- { session: this.session }
777
+ session: this.session,
778
+ // We return the before document, so that we can check the previous state to determine if a checkpoint was actually created or if we were blocked by snapshot/no_checkpoint_before.
779
+ returnDocument: 'before',
780
+ projection: {
781
+ snapshot_done: 1,
782
+ last_checkpoint_lsn: 1,
783
+ no_checkpoint_before: 1,
784
+ keepalive_op: 1,
785
+ last_checkpoint: 1
786
+ }
787
+ }
763
788
  );
764
- await this.autoActivate(lsn);
765
- await this.db.notifyCheckpoint();
766
- this.persisted_op = null;
767
- this.last_checkpoint_lsn = lsn;
768
- return true;
789
+
790
+ if (preUpdateDocument == null) {
791
+ throw new ReplicationAssertionError(
792
+ 'Failed to update checkpoint - no matching sync_rules document for _id: ' + this.group_id
793
+ );
794
+ }
795
+
796
+ // This re-implements the same logic as in the pipeline, to determine what was actually updated.
797
+ // Unfortunately we cannot return these from the pipeline directly, so we need to re-implement the logic.
798
+ const canCheckpoint =
799
+ preUpdateDocument.snapshot_done === true &&
800
+ (preUpdateDocument.last_checkpoint_lsn == null || preUpdateDocument.last_checkpoint_lsn <= lsn) &&
801
+ (preUpdateDocument.no_checkpoint_before == null || preUpdateDocument.no_checkpoint_before <= lsn);
802
+
803
+ const keepaliveOp = preUpdateDocument.keepalive_op == null ? null : BigInt(preUpdateDocument.keepalive_op);
804
+ const maxKeepalive = [keepaliveOp ?? 0n, this.persisted_op ?? 0n, 0n].reduce((a, b) => (a > b ? a : b));
805
+ const newKeepaliveOp = canCheckpoint ? null : maxKeepalive.toString();
806
+ const newLastCheckpoint = canCheckpoint
807
+ ? [preUpdateDocument.last_checkpoint ?? 0n, this.persisted_op ?? 0n, keepaliveOp ?? 0n, 0n].reduce((a, b) =>
808
+ a > b ? a : b
809
+ )
810
+ : preUpdateDocument.last_checkpoint;
811
+ const notEmpty =
812
+ createEmptyCheckpoints ||
813
+ preUpdateDocument.keepalive_op !== newKeepaliveOp ||
814
+ preUpdateDocument.last_checkpoint !== newLastCheckpoint;
815
+ const checkpointCreated = canCheckpoint && notEmpty;
816
+
817
+ const checkpointBlocked = !canCheckpoint;
818
+
819
+ if (checkpointBlocked) {
820
+ // Failed on snapshot_done or no_checkpoint_before.
821
+ if (Date.now() - this.lastWaitingLogThottled > 5_000) {
822
+ this.logger.info(
823
+ `Waiting before creating checkpoint, currently at ${lsn} / ${preUpdateDocument.keepalive_op}. Current state: ${JSON.stringify(
824
+ {
825
+ snapshot_done: preUpdateDocument.snapshot_done,
826
+ last_checkpoint_lsn: preUpdateDocument.last_checkpoint_lsn,
827
+ no_checkpoint_before: preUpdateDocument.no_checkpoint_before
828
+ }
829
+ )}`
830
+ );
831
+ this.lastWaitingLogThottled = Date.now();
832
+ }
833
+ } else {
834
+ if (checkpointCreated) {
835
+ this.logger.debug(`Created checkpoint at ${lsn} / ${newLastCheckpoint}`);
836
+ }
837
+ await this.autoActivate(lsn);
838
+ await this.db.notifyCheckpoint();
839
+ this.persisted_op = null;
840
+ this.last_checkpoint_lsn = lsn;
841
+ if (this.db.storageConfig.softDeleteCurrentData && newLastCheckpoint != null) {
842
+ await this.cleanupCurrentData(newLastCheckpoint);
843
+ }
844
+ }
845
+ return { checkpointBlocked, checkpointCreated };
846
+ }
847
+
848
+ private async cleanupCurrentData(lastCheckpoint: bigint) {
849
+ const result = await this.db.v3_current_data.deleteMany({
850
+ '_id.g': this.group_id,
851
+ pending_delete: { $exists: true, $lte: lastCheckpoint }
852
+ });
853
+ if (result.deletedCount > 0) {
854
+ this.logger.info(
855
+ `Cleaned up ${result.deletedCount} pending delete current_data records for checkpoint ${lastCheckpoint}`
856
+ );
857
+ }
769
858
  }
770
859
 
771
860
  /**
@@ -785,7 +874,7 @@ export class MongoBucketBatch
785
874
  let activated = false;
786
875
  await session.withTransaction(async () => {
787
876
  const doc = await this.db.sync_rules.findOne({ _id: this.group_id }, { session });
788
- if (doc && doc.state == 'PROCESSING') {
877
+ if (doc && doc.state == SyncRuleState.PROCESSING && doc.snapshot_done && doc.last_checkpoint != null) {
789
878
  await this.db.sync_rules.updateOne(
790
879
  {
791
880
  _id: this.group_id
@@ -811,68 +900,19 @@ export class MongoBucketBatch
811
900
  { session }
812
901
  );
813
902
  activated = true;
903
+ } else if (doc?.state != SyncRuleState.PROCESSING) {
904
+ this.needsActivation = false;
814
905
  }
815
906
  });
816
907
  if (activated) {
817
908
  this.logger.info(`Activated new sync rules at ${lsn}`);
818
909
  await this.db.notifyCheckpoint();
910
+ this.needsActivation = false;
819
911
  }
820
- this.needsActivation = false;
821
912
  }
822
913
 
823
- async keepalive(lsn: string): Promise<boolean> {
824
- if (this.last_checkpoint_lsn != null && lsn < this.last_checkpoint_lsn) {
825
- // No-op
826
- return false;
827
- }
828
-
829
- if (lsn < this.no_checkpoint_before_lsn) {
830
- return false;
831
- }
832
-
833
- if (this.persisted_op != null) {
834
- // The commit may have been skipped due to "no_checkpoint_before_lsn".
835
- // Apply it now if relevant
836
- this.logger.info(`Commit due to keepalive at ${lsn} / ${this.persisted_op}`);
837
- return await this.commit(lsn);
838
- }
839
-
840
- await this.db.write_checkpoints.updateMany(
841
- {
842
- processed_at_lsn: null,
843
- 'lsns.1': { $lte: lsn }
844
- },
845
- {
846
- $set: {
847
- processed_at_lsn: lsn
848
- }
849
- },
850
- {
851
- session: this.session
852
- }
853
- );
854
-
855
- await this.db.sync_rules.updateOne(
856
- {
857
- _id: this.group_id
858
- },
859
- {
860
- $set: {
861
- last_checkpoint_lsn: lsn,
862
- snapshot_done: true,
863
- last_fatal_error: null,
864
- last_fatal_error_ts: null,
865
- last_keepalive_ts: new Date()
866
- },
867
- $unset: { snapshot_lsn: 1 }
868
- },
869
- { session: this.session }
870
- );
871
- await this.autoActivate(lsn);
872
- await this.db.notifyCheckpoint();
873
- this.last_checkpoint_lsn = lsn;
874
-
875
- return true;
914
+ async keepalive(lsn: string): Promise<CheckpointResult> {
915
+ return await this.commit(lsn, { createEmptyCheckpoints: true });
876
916
  }
877
917
 
878
918
  async setResumeLsn(lsn: string): Promise<void> {
@@ -938,7 +978,7 @@ export class MongoBucketBatch
938
978
 
939
979
  await this.withTransaction(async () => {
940
980
  for (let table of sourceTables) {
941
- await this.db.source_tables.deleteOne({ _id: table.id });
981
+ await this.db.source_tables.deleteOne({ _id: mongoTableId(table.id) });
942
982
  }
943
983
  });
944
984
  return result;
@@ -973,10 +1013,13 @@ export class MongoBucketBatch
973
1013
  while (lastBatchCount == BATCH_LIMIT) {
974
1014
  await this.withReplicationTransaction(`Truncate ${sourceTable.qualifiedName}`, async (session, opSeq) => {
975
1015
  const current_data_filter: mongo.Filter<CurrentDataDocument> = {
976
- _id: idPrefixFilter<SourceKey>({ g: this.group_id, t: sourceTable.id }, ['k'])
1016
+ _id: idPrefixFilter<SourceKey>({ g: this.group_id, t: mongoTableId(sourceTable.id) }, ['k']),
1017
+ // Skip soft-deleted data
1018
+ // Works for both v1 and v3 current_data schemas
1019
+ pending_delete: { $exists: false }
977
1020
  };
978
1021
 
979
- const cursor = this.db.current_data.find(current_data_filter, {
1022
+ const cursor = this.db.common_current_data.find(current_data_filter, {
980
1023
  projection: {
981
1024
  _id: 1,
982
1025
  buckets: 1,
@@ -986,7 +1029,7 @@ export class MongoBucketBatch
986
1029
  session: session
987
1030
  });
988
1031
  const batch = await cursor.toArray();
989
- const persistedBatch = new PersistedBatch(this.group_id, 0, { logger: this.logger });
1032
+ const persistedBatch = new PersistedBatch(this.db, this.group_id, 0, { logger: this.logger });
990
1033
 
991
1034
  for (let value of batch) {
992
1035
  persistedBatch.saveBucketData({
@@ -1004,9 +1047,10 @@ export class MongoBucketBatch
1004
1047
  sourceKey: value._id.k
1005
1048
  });
1006
1049
 
1007
- persistedBatch.deleteCurrentData(value._id);
1050
+ // Since this is not from streaming replication, we can do a hard delete
1051
+ persistedBatch.hardDeleteCurrentData(value._id);
1008
1052
  }
1009
- await persistedBatch.flush(this.db, session);
1053
+ await persistedBatch.flush(session);
1010
1054
  lastBatchCount = batch.length;
1011
1055
 
1012
1056
  last_op = opSeq.last();
@@ -1030,7 +1074,7 @@ export class MongoBucketBatch
1030
1074
 
1031
1075
  await this.withTransaction(async () => {
1032
1076
  await this.db.source_tables.updateOne(
1033
- { _id: table.id },
1077
+ { _id: mongoTableId(table.id) },
1034
1078
  {
1035
1079
  $set: {
1036
1080
  snapshot_status: {
@@ -1047,9 +1091,41 @@ export class MongoBucketBatch
1047
1091
  return copy;
1048
1092
  }
1049
1093
 
1050
- async markSnapshotDone(tables: storage.SourceTable[], no_checkpoint_before_lsn: string) {
1094
+ async markAllSnapshotDone(no_checkpoint_before_lsn: string) {
1095
+ await this.db.sync_rules.updateOne(
1096
+ {
1097
+ _id: this.group_id
1098
+ },
1099
+ {
1100
+ $set: {
1101
+ snapshot_done: true,
1102
+ last_keepalive_ts: new Date()
1103
+ },
1104
+ $max: {
1105
+ no_checkpoint_before: no_checkpoint_before_lsn
1106
+ }
1107
+ },
1108
+ { session: this.session }
1109
+ );
1110
+ }
1111
+
1112
+ async markTableSnapshotRequired(table: storage.SourceTable): Promise<void> {
1113
+ await this.db.sync_rules.updateOne(
1114
+ {
1115
+ _id: this.group_id
1116
+ },
1117
+ {
1118
+ $set: {
1119
+ snapshot_done: false
1120
+ }
1121
+ },
1122
+ { session: this.session }
1123
+ );
1124
+ }
1125
+
1126
+ async markTableSnapshotDone(tables: storage.SourceTable[], no_checkpoint_before_lsn?: string) {
1051
1127
  const session = this.session;
1052
- const ids = tables.map((table) => table.id);
1128
+ const ids = tables.map((table) => mongoTableId(table.id));
1053
1129
 
1054
1130
  await this.withTransaction(async () => {
1055
1131
  await this.db.source_tables.updateMany(
@@ -1065,17 +1141,17 @@ export class MongoBucketBatch
1065
1141
  { session }
1066
1142
  );
1067
1143
 
1068
- if (no_checkpoint_before_lsn > this.no_checkpoint_before_lsn) {
1069
- this.no_checkpoint_before_lsn = no_checkpoint_before_lsn;
1070
-
1144
+ if (no_checkpoint_before_lsn != null) {
1071
1145
  await this.db.sync_rules.updateOne(
1072
1146
  {
1073
1147
  _id: this.group_id
1074
1148
  },
1075
1149
  {
1076
1150
  $set: {
1077
- no_checkpoint_before: no_checkpoint_before_lsn,
1078
1151
  last_keepalive_ts: new Date()
1152
+ },
1153
+ $max: {
1154
+ no_checkpoint_before: no_checkpoint_before_lsn
1079
1155
  }
1080
1156
  },
1081
1157
  { session: this.session }