@powersync/service-module-mongodb-storage 0.12.6 → 0.12.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. package/CHANGELOG.md +23 -0
  2. package/dist/migrations/db/migrations/1760433882550-bucket-state-index2.d.ts +3 -0
  3. package/dist/migrations/db/migrations/1760433882550-bucket-state-index2.js +25 -0
  4. package/dist/migrations/db/migrations/1760433882550-bucket-state-index2.js.map +1 -0
  5. package/dist/storage/implementation/MongoCompactor.d.ts +13 -3
  6. package/dist/storage/implementation/MongoCompactor.js +86 -90
  7. package/dist/storage/implementation/MongoCompactor.js.map +1 -1
  8. package/dist/storage/implementation/MongoSyncBucketStorage.d.ts +2 -2
  9. package/dist/storage/implementation/MongoSyncBucketStorage.js +61 -18
  10. package/dist/storage/implementation/MongoSyncBucketStorage.js.map +1 -1
  11. package/dist/storage/implementation/MongoTestStorageFactoryGenerator.js +1 -0
  12. package/dist/storage/implementation/MongoTestStorageFactoryGenerator.js.map +1 -1
  13. package/dist/storage/implementation/MongoWriteCheckpointAPI.js +6 -2
  14. package/dist/storage/implementation/MongoWriteCheckpointAPI.js.map +1 -1
  15. package/dist/storage/implementation/db.d.ts +4 -0
  16. package/dist/storage/implementation/db.js +10 -0
  17. package/dist/storage/implementation/db.js.map +1 -1
  18. package/dist/storage/implementation/util.d.ts +1 -1
  19. package/dist/storage/implementation/util.js +1 -1
  20. package/package.json +9 -9
  21. package/src/migrations/db/migrations/1760433882550-bucket-state-index2.ts +34 -0
  22. package/src/storage/implementation/MongoCompactor.ts +100 -96
  23. package/src/storage/implementation/MongoSyncBucketStorage.ts +63 -19
  24. package/src/storage/implementation/MongoTestStorageFactoryGenerator.ts +1 -0
  25. package/src/storage/implementation/MongoWriteCheckpointAPI.ts +6 -2
  26. package/src/storage/implementation/db.ts +13 -0
  27. package/src/storage/implementation/util.ts +1 -1
  28. package/test/src/storage_compacting.test.ts +17 -2
  29. package/tsconfig.tsbuildinfo +1 -1
@@ -16,6 +16,8 @@ import {
16
16
  InternalOpId,
17
17
  internalToExternalOpId,
18
18
  maxLsn,
19
+ PopulateChecksumCacheOptions,
20
+ PopulateChecksumCacheResults,
19
21
  ProtocolOpId,
20
22
  ReplicationCheckpoint,
21
23
  storage,
@@ -41,6 +43,17 @@ export interface MongoSyncBucketStorageOptions {
41
43
  checksumOptions?: MongoChecksumOptions;
42
44
  }
43
45
 
46
+ /**
47
+ * Only keep checkpoints around for a minute, before fetching a fresh one.
48
+ *
49
+ * The reason is that we keep a MongoDB snapshot reference (clusterTime) with the checkpoint,
50
+ * and they expire after 5 minutes by default. This is an issue if the checkpoint stream is idle,
51
+ * but new clients connect and use an outdated checkpoint snapshot for parameter queries.
52
+ *
53
+ * These will be filtered out for existing clients, so should not create significant overhead.
54
+ */
55
+ const CHECKPOINT_TIMEOUT_MS = 60_000;
56
+
44
57
  export class MongoSyncBucketStorage
45
58
  extends BaseObserver<storage.SyncRulesBucketStorageListener>
46
59
  implements storage.SyncRulesBucketStorage
@@ -392,7 +405,9 @@ export class MongoSyncBucketStorage
392
405
  limit: batchLimit,
393
406
  // Increase batch size above the default 101, so that we can fill an entire batch in
394
407
  // one go.
395
- batchSize: batchLimit,
408
+ // batchSize is 1 more than limit to auto-close the cursor.
409
+ // See https://github.com/mongodb/node-mongodb-native/pull/4580
410
+ batchSize: batchLimit + 1,
396
411
  // Raw mode is returns an array of Buffer instead of parsed documents.
397
412
  // We use it so that:
398
413
  // 1. We can calculate the document size accurately without serializing again.
@@ -652,7 +667,7 @@ export class MongoSyncBucketStorage
652
667
  }
653
668
  }
654
669
 
655
- async populatePersistentChecksumCache(options: Required<Pick<CompactOptions, 'signal' | 'maxOpId'>>): Promise<void> {
670
+ async populatePersistentChecksumCache(options: PopulateChecksumCacheOptions): Promise<PopulateChecksumCacheResults> {
656
671
  logger.info(`Populating persistent checksum cache...`);
657
672
  const start = Date.now();
658
673
  // We do a minimal compact here.
@@ -663,9 +678,14 @@ export class MongoSyncBucketStorage
663
678
  memoryLimitMB: 0
664
679
  });
665
680
 
666
- await compactor.populateChecksums();
681
+ const result = await compactor.populateChecksums({
682
+ // There are cases with millions of small buckets, in which case it can take very long to
683
+ // populate the checksums, with minimal benefit. We skip the small buckets here.
684
+ minBucketChanges: options.minBucketChanges ?? 10
685
+ });
667
686
  const duration = Date.now() - start;
668
687
  logger.info(`Populated persistent checksum cache in ${(duration / 1000).toFixed(1)}s`);
688
+ return result;
669
689
  }
670
690
 
671
691
  /**
@@ -680,25 +700,45 @@ export class MongoSyncBucketStorage
680
700
 
681
701
  // We only watch changes to the active sync rules.
682
702
  // If it changes to inactive, we abort and restart with the new sync rules.
683
- let lastOp: storage.ReplicationCheckpoint | null = null;
703
+ try {
704
+ while (true) {
705
+ // If the stream is idle, we wait a max of a minute (CHECKPOINT_TIMEOUT_MS)
706
+ // before we get another checkpoint, to avoid stale checkpoint snapshots.
707
+ const timeout = timers
708
+ .setTimeout(CHECKPOINT_TIMEOUT_MS, { done: false }, { signal })
709
+ .catch(() => ({ done: true }));
710
+ try {
711
+ const result = await Promise.race([stream.next(), timeout]);
712
+ if (result.done) {
713
+ break;
714
+ }
715
+ } catch (e) {
716
+ if (e.name == 'AbortError') {
717
+ break;
718
+ }
719
+ throw e;
720
+ }
684
721
 
685
- for await (const _ of stream) {
686
- if (signal.aborted) {
687
- break;
688
- }
722
+ if (signal.aborted) {
723
+ // Would likely have been caught by the signal on the timeout or the upstream stream, but we check here anyway
724
+ break;
725
+ }
689
726
 
690
- const op = await this.getCheckpointInternal();
691
- if (op == null) {
692
- // Sync rules have changed - abort and restart.
693
- // We do a soft close of the stream here - no error
694
- break;
695
- }
727
+ const op = await this.getCheckpointInternal();
728
+ if (op == null) {
729
+ // Sync rules have changed - abort and restart.
730
+ // We do a soft close of the stream here - no error
731
+ break;
732
+ }
696
733
 
697
- // Check for LSN / checkpoint changes - ignore other metadata changes
698
- if (lastOp == null || op.lsn != lastOp.lsn || op.checkpoint != lastOp.checkpoint) {
699
- lastOp = op;
734
+ // Previously, we only yielded when the checkpoint or lsn changed.
735
+ // However, we always want to use the latest snapshotTime, so we skip that filtering here.
736
+ // That filtering could be added in the per-user streams if needed, but in general the capped collection
737
+ // should already only contain useful changes in most cases.
700
738
  yield op;
701
739
  }
740
+ } finally {
741
+ await stream.return(null);
702
742
  }
703
743
  }
704
744
 
@@ -874,7 +914,9 @@ export class MongoSyncBucketStorage
874
914
  '_id.b': 1
875
915
  },
876
916
  limit: limit + 1,
877
- batchSize: limit + 1,
917
+ // batchSize is 1 more than limit to auto-close the cursor.
918
+ // See https://github.com/mongodb/node-mongodb-native/pull/4580
919
+ batchSize: limit + 2,
878
920
  singleBatch: true
879
921
  }
880
922
  )
@@ -904,7 +946,9 @@ export class MongoSyncBucketStorage
904
946
  lookup: 1
905
947
  },
906
948
  limit: limit + 1,
907
- batchSize: limit + 1,
949
+ // batchSize is 1 more than limit to auto-close the cursor.
950
+ // See https://github.com/mongodb/node-mongodb-native/pull/4580
951
+ batchSize: limit + 2,
908
952
  singleBatch: true
909
953
  }
910
954
  )
@@ -25,6 +25,7 @@ export const MongoTestStorageFactoryGenerator = (factoryOptions: MongoTestStorag
25
25
  // Full migrations are not currently run for tests, so we manually create the important ones
26
26
  await db.createCheckpointEventsCollection();
27
27
  await db.createBucketStateIndex();
28
+ await db.createBucketStateIndex2();
28
29
 
29
30
  return new MongoBucketStorage(db, { slot_name_prefix: 'test_' }, factoryOptions.internalOptions);
30
31
  };
@@ -111,7 +111,9 @@ export class MongoWriteCheckpointAPI implements storage.WriteCheckpointAPI {
111
111
  },
112
112
  {
113
113
  limit: limit + 1,
114
- batchSize: limit + 1,
114
+ // batchSize is 1 more than limit to auto-close the cursor.
115
+ // See https://github.com/mongodb/node-mongodb-native/pull/4580
116
+ batchSize: limit + 2,
115
117
  singleBatch: true
116
118
  }
117
119
  )
@@ -140,7 +142,9 @@ export class MongoWriteCheckpointAPI implements storage.WriteCheckpointAPI {
140
142
  },
141
143
  {
142
144
  limit: limit + 1,
143
- batchSize: limit + 1,
145
+ // batchSize is 1 more than limit to auto-close the cursor.
146
+ // See https://github.com/mongodb/node-mongodb-native/pull/4580
147
+ batchSize: limit + 2,
144
148
  singleBatch: true
145
149
  }
146
150
  )
@@ -141,6 +141,19 @@ export class PowerSyncMongo {
141
141
  { name: 'bucket_updates', unique: true }
142
142
  );
143
143
  }
144
+ /**
145
+ * Only use in migrations and tests.
146
+ */
147
+ async createBucketStateIndex2() {
148
+ // TODO: Implement a better mechanism to use migrations in tests
149
+ await this.bucket_state.createIndex(
150
+ {
151
+ '_id.g': 1,
152
+ 'estimate_since_compact.count': -1
153
+ },
154
+ { name: 'dirty_count' }
155
+ );
156
+ }
144
157
  }
145
158
 
146
159
  export function createPowerSyncMongo(config: MongoStorageConfig, options?: lib_mongo.MongoConnectionOptions) {
@@ -41,7 +41,7 @@ export function generateSlotName(prefix: string, sync_rules_id: number) {
41
41
  * However, that makes `has_more` detection very difficult, since the cursor is always closed
42
42
  * after the first batch. Instead, we do a workaround to only fetch a single batch below.
43
43
  *
44
- * For this to be effective, set batchSize = limit in the find command.
44
+ * For this to be effective, set batchSize = limit + 1 in the find command.
45
45
  */
46
46
  export async function readSingleBatch<T>(cursor: mongo.AbstractCursor<T>): Promise<{ data: T[]; hasMore: boolean }> {
47
47
  try {
@@ -97,10 +97,25 @@ bucket_definitions:
97
97
  await populate(bucketStorage);
98
98
  const { checkpoint } = await bucketStorage.getCheckpoint();
99
99
 
100
- await bucketStorage.populatePersistentChecksumCache({
100
+ // Default is to small small numbers - should be a no-op
101
+ const result0 = await bucketStorage.populatePersistentChecksumCache({
102
+ maxOpId: checkpoint
103
+ });
104
+ expect(result0.buckets).toEqual(0);
105
+
106
+ // This should cache the checksums for the two buckets
107
+ const result1 = await bucketStorage.populatePersistentChecksumCache({
108
+ maxOpId: checkpoint,
109
+ minBucketChanges: 1
110
+ });
111
+ expect(result1.buckets).toEqual(2);
112
+
113
+ // This should be a no-op, as the checksums are already cached
114
+ const result2 = await bucketStorage.populatePersistentChecksumCache({
101
115
  maxOpId: checkpoint,
102
- signal: new AbortController().signal
116
+ minBucketChanges: 1
103
117
  });
118
+ expect(result2.buckets).toEqual(0);
104
119
 
105
120
  const checksumAfter = await bucketStorage.getChecksums(checkpoint, ['by_user2["u1"]', 'by_user2["u2"]']);
106
121
  expect(checksumAfter.get('by_user2["u1"]')).toEqual({