@powersync/service-module-mongodb-storage 0.0.0-dev-20250828090417 → 0.0.0-dev-20250828134335

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,11 +2,14 @@ import * as lib_mongo from '@powersync/lib-service-mongodb';
2
2
  import { mongo } from '@powersync/lib-service-mongodb';
3
3
  import {
4
4
  BaseObserver,
5
+ DatabaseQueryError,
6
+ ErrorCode,
5
7
  logger,
6
8
  ReplicationAbortedError,
7
9
  ServiceAssertionError
8
10
  } from '@powersync/lib-services-framework';
9
11
  import {
12
+ addBucketChecksums,
10
13
  addPartialChecksums,
11
14
  BroadcastIterable,
12
15
  BucketChecksum,
@@ -17,8 +20,10 @@ import {
17
20
  GetCheckpointChangesOptions,
18
21
  InternalOpId,
19
22
  internalToExternalOpId,
23
+ isPartialChecksum,
20
24
  maxLsn,
21
25
  PartialChecksum,
26
+ PartialOrFullChecksum,
22
27
  ProtocolOpId,
23
28
  ReplicationCheckpoint,
24
29
  storage,
@@ -37,7 +42,14 @@ import { MongoBucketBatch } from './MongoBucketBatch.js';
37
42
  import { MongoCompactor } from './MongoCompactor.js';
38
43
  import { MongoParameterCompactor } from './MongoParameterCompactor.js';
39
44
  import { MongoWriteCheckpointAPI } from './MongoWriteCheckpointAPI.js';
40
- import { idPrefixFilter, mapOpEntry, readSingleBatch, setSessionSnapshotTime } from './util.js';
45
+ import {
46
+ CHECKSUM_QUERY_GROUP_STAGE,
47
+ checksumFromAggregate,
48
+ idPrefixFilter,
49
+ mapOpEntry,
50
+ readSingleBatch,
51
+ setSessionSnapshotTime
52
+ } from './util.js';
41
53
 
42
54
  export class MongoSyncBucketStorage
43
55
  extends BaseObserver<storage.SyncRulesBucketStorageListener>
@@ -538,8 +550,7 @@ export class MongoSyncBucketStorage
538
550
  }
539
551
  }
540
552
 
541
- const filters: any[] = [];
542
- for (let request of batch) {
553
+ const mappedRequests = batch.map((request) => {
543
554
  let start = request.start;
544
555
  if (start == null) {
545
556
  const preState = preStates.get(request.bucket);
@@ -547,13 +558,52 @@ export class MongoSyncBucketStorage
547
558
  start = preState.opId;
548
559
  }
549
560
  }
561
+ return {
562
+ ...request,
563
+ start
564
+ };
565
+ });
566
+
567
+ const queriedChecksums = await this.queryPartialChecksums(mappedRequests);
550
568
 
569
+ return new Map<string, storage.PartialOrFullChecksum>(
570
+ batch.map((request) => {
571
+ const bucket = request.bucket;
572
+ // Could be null if this is either (1) a partial request, or (2) no compacted checksum was available
573
+ const preState = preStates.get(bucket);
574
+ // Could be null if we got no data
575
+ const partialChecksum = queriedChecksums.get(bucket);
576
+ const merged = addPartialChecksums(bucket, preState?.checksum ?? null, partialChecksum ?? null);
577
+
578
+ return [bucket, merged];
579
+ })
580
+ );
581
+ }
582
+
583
+ async queryPartialChecksums(batch: storage.FetchPartialBucketChecksum[]): Promise<storage.PartialChecksumMap> {
584
+ try {
585
+ return await this.queryPartialChecksumsInternal(batch);
586
+ } catch (e) {
587
+ if (e.codeName == 'MaxTimeMSExpired') {
588
+ logger.warn(`Checksum query timed out; falling back to slower version`, e);
589
+ // Timeout - try the slower but more robust version
590
+ return await this.queryPartialChecksumsFallback(batch);
591
+ }
592
+ throw lib_mongo.mapQueryError(e, 'while reading checksums');
593
+ }
594
+ }
595
+
596
+ private async queryPartialChecksumsInternal(
597
+ batch: storage.FetchPartialBucketChecksum[]
598
+ ): Promise<storage.PartialChecksumMap> {
599
+ const filters: any[] = [];
600
+ for (let request of batch) {
551
601
  filters.push({
552
602
  _id: {
553
603
  $gt: {
554
604
  g: this.group_id,
555
605
  b: request.bucket,
556
- o: start ?? new bson.MinKey()
606
+ o: request.start ?? new bson.MinKey()
557
607
  },
558
608
  $lte: {
559
609
  g: this.group_id,
@@ -572,66 +622,126 @@ export class MongoSyncBucketStorage
572
622
  $or: filters
573
623
  }
574
624
  },
575
- {
576
- $group: {
577
- _id: '$_id.b',
578
- // Historically, checksum may be stored as 'int' or 'double'.
579
- // More recently, this should be a 'long'.
580
- // $toLong ensures that we always sum it as a long, avoiding inaccuracies in the calculations.
581
- checksum_total: { $sum: { $toLong: '$checksum' } },
582
- count: { $sum: 1 },
583
- has_clear_op: {
584
- $max: {
585
- $cond: [{ $eq: ['$op', 'CLEAR'] }, 1, 0]
586
- }
587
- }
588
- }
589
- }
625
+ CHECKSUM_QUERY_GROUP_STAGE
590
626
  ],
591
- { session: undefined, readConcern: 'snapshot', maxTimeMS: lib_mongo.db.MONGO_CHECKSUM_TIMEOUT_MS }
627
+ { session: undefined, readConcern: 'snapshot', maxTimeMS: lib_mongo.MONGO_CHECKSUM_TIMEOUT_MS }
592
628
  )
593
- .toArray()
594
- .catch((e) => {
595
- throw lib_mongo.mapQueryError(e, 'while reading checksums');
596
- });
629
+ // Don't map the error here - we want to keep timeout errors as-is
630
+ .toArray();
597
631
 
598
632
  const partialChecksums = new Map<string, storage.PartialOrFullChecksum>(
599
633
  aggregate.map((doc) => {
600
- const partialChecksum = Number(BigInt(doc.checksum_total) & 0xffffffffn) & 0xffffffff;
601
634
  const bucket = doc._id;
602
- return [
603
- bucket,
604
- doc.has_clear_op == 1
605
- ? ({
606
- // full checksum - replaces any previous one
607
- bucket,
608
- checksum: partialChecksum,
609
- count: doc.count
610
- } satisfies BucketChecksum)
611
- : ({
612
- // partial checksum - is added to a previous one
613
- bucket,
614
- partialCount: doc.count,
615
- partialChecksum
616
- } satisfies PartialChecksum)
617
- ];
635
+ return [bucket, checksumFromAggregate(doc)];
618
636
  })
619
637
  );
620
638
 
621
639
  return new Map<string, storage.PartialOrFullChecksum>(
622
640
  batch.map((request) => {
623
641
  const bucket = request.bucket;
624
- // Could be null if this is either (1) a partial request, or (2) no compacted checksum was available
625
- const preState = preStates.get(bucket);
626
642
  // Could be null if we got no data
627
- const partialChecksum = partialChecksums.get(bucket);
628
- const merged = addPartialChecksums(bucket, preState?.checksum ?? null, partialChecksum ?? null);
643
+ let partialChecksum = partialChecksums.get(bucket);
644
+ if (partialChecksum == null) {
645
+ partialChecksum = {
646
+ bucket,
647
+ partialCount: 0,
648
+ partialChecksum: 0
649
+ };
650
+ }
651
+ if (request.start == null && isPartialChecksum(partialChecksum)) {
652
+ partialChecksum = {
653
+ bucket,
654
+ count: partialChecksum.partialCount,
655
+ checksum: partialChecksum.partialChecksum
656
+ };
657
+ }
629
658
 
630
- return [bucket, merged];
659
+ return [bucket, partialChecksum];
631
660
  })
632
661
  );
633
662
  }
634
663
 
664
+ /**
665
+ * Checksums for large buckets can run over the query timeout.
666
+ * To avoid this, we query in batches.
667
+ * This version can handle larger amounts of data, but is slower, especially for many buckets.
668
+ */
669
+ async queryPartialChecksumsFallback(
670
+ batch: storage.FetchPartialBucketChecksum[]
671
+ ): Promise<storage.PartialChecksumMap> {
672
+ const partialChecksums = new Map<string, storage.PartialOrFullChecksum>();
673
+ for (let request of batch) {
674
+ const checksum = await this.slowChecksum(request);
675
+ partialChecksums.set(request.bucket, checksum);
676
+ }
677
+
678
+ return partialChecksums;
679
+ }
680
+
681
+ private async slowChecksum(request: storage.FetchPartialBucketChecksum): Promise<PartialOrFullChecksum> {
682
+ const batchLimit = 50_000;
683
+
684
+ let lowerBound = 0n;
685
+ const bucket = request.bucket;
686
+
687
+ let runningChecksum: PartialOrFullChecksum = {
688
+ bucket,
689
+ partialCount: 0,
690
+ partialChecksum: 0
691
+ };
692
+ if (request.start == null) {
693
+ runningChecksum = {
694
+ bucket,
695
+ count: 0,
696
+ checksum: 0
697
+ };
698
+ }
699
+
700
+ while (true) {
701
+ const filter = {
702
+ _id: {
703
+ $gt: {
704
+ g: this.group_id,
705
+ b: bucket,
706
+ o: lowerBound
707
+ },
708
+ $lte: {
709
+ g: this.group_id,
710
+ b: bucket,
711
+ o: request.end
712
+ }
713
+ }
714
+ };
715
+ const docs = await this.db.bucket_data
716
+ .aggregate(
717
+ [
718
+ {
719
+ $match: filter
720
+ },
721
+ // sort and limit _before_ grouping
722
+ { $sort: { _id: 1 } },
723
+ { $limit: batchLimit },
724
+ CHECKSUM_QUERY_GROUP_STAGE
725
+ ],
726
+ { session: undefined, readConcern: 'snapshot', maxTimeMS: lib_mongo.MONGO_CHECKSUM_TIMEOUT_MS }
727
+ )
728
+ .toArray();
729
+ const doc = docs[0];
730
+ if (doc == null) {
731
+ return runningChecksum;
732
+ }
733
+ const partial = checksumFromAggregate(doc);
734
+ runningChecksum = addPartialChecksums(bucket, runningChecksum, partial);
735
+ const isFinal = doc.count != batchLimit;
736
+ if (isFinal) {
737
+ break;
738
+ } else {
739
+ lowerBound = doc.last_op;
740
+ }
741
+ }
742
+ return runningChecksum;
743
+ }
744
+
635
745
  async terminate(options?: storage.TerminateOptions) {
636
746
  // Default is to clear the storage except when explicitly requested not to.
637
747
  if (!options || options?.clearStorage) {
@@ -779,22 +889,25 @@ export class MongoSyncBucketStorage
779
889
  const checkpoint = await this.getCheckpointInternal();
780
890
  maxOpId = checkpoint?.checkpoint ?? undefined;
781
891
  }
782
- await new MongoCompactor(this.db, this.group_id, { ...options, maxOpId }).compact();
892
+ await new MongoCompactor(this, this.db, { ...options, maxOpId }).compact();
893
+
783
894
  if (maxOpId != null && options?.compactParameterData) {
784
895
  await new MongoParameterCompactor(this.db, this.group_id, maxOpId, options).compact();
785
896
  }
786
897
  }
787
898
 
788
- async populatePersistentChecksumCache(options: Pick<CompactOptions, 'signal' | 'maxOpId'>): Promise<void> {
899
+ async populatePersistentChecksumCache(options: Required<Pick<CompactOptions, 'signal' | 'maxOpId'>>): Promise<void> {
900
+ logger.info(`Populating persistent checksum cache...`);
789
901
  const start = Date.now();
790
- // We do a minimal compact, primarily to populate the checksum cache
791
- await this.compact({
902
+ // We do a minimal compact here.
903
+ // We can optimize this in the future.
904
+ const compactor = new MongoCompactor(this, this.db, {
792
905
  ...options,
793
- // Skip parameter data
794
- compactParameterData: false,
795
906
  // Don't track updates for MOVE compacting
796
907
  memoryLimitMB: 0
797
908
  });
909
+
910
+ await compactor.populateChecksums();
798
911
  const duration = Date.now() - start;
799
912
  logger.info(`Populated persistent checksum cache in ${(duration / 1000).toFixed(1)}s`);
800
913
  }
@@ -3,7 +3,7 @@ import * as crypto from 'crypto';
3
3
  import * as uuid from 'uuid';
4
4
 
5
5
  import { mongo } from '@powersync/lib-service-mongodb';
6
- import { storage, utils } from '@powersync/service-core';
6
+ import { BucketChecksum, PartialChecksum, PartialOrFullChecksum, storage, utils } from '@powersync/service-core';
7
7
 
8
8
  import { PowerSyncMongo } from './db.js';
9
9
  import { BucketDataDocument } from './models.js';
@@ -130,3 +130,44 @@ export function setSessionSnapshotTime(session: mongo.ClientSession, time: bson.
130
130
  throw new ServiceAssertionError(`Session snapshotTime is already set`);
131
131
  }
132
132
  }
133
+
134
+ export const CHECKSUM_QUERY_GROUP_STAGE = {
135
+ $group: {
136
+ _id: '$_id.b',
137
+ // Historically, checksum may be stored as 'int' or 'double'.
138
+ // More recently, this should be a 'long'.
139
+ // $toLong ensures that we always sum it as a long, avoiding inaccuracies in the calculations.
140
+ checksum_total: { $sum: { $toLong: '$checksum' } },
141
+ count: { $sum: 1 },
142
+ has_clear_op: {
143
+ $max: {
144
+ $cond: [{ $eq: ['$op', 'CLEAR'] }, 1, 0]
145
+ }
146
+ },
147
+ last_op: { $max: '$_id.o' }
148
+ }
149
+ };
150
+
151
+ /**
152
+ * Convert output of CHECKSUM_QUERY_GROUP_STAGE into a checksum.
153
+ */
154
+ export function checksumFromAggregate(doc: bson.Document): PartialOrFullChecksum {
155
+ const partialChecksum = Number(BigInt(doc.checksum_total) & 0xffffffffn) & 0xffffffff;
156
+ const bucket = doc._id;
157
+
158
+ if (doc.has_clear_op == 1) {
159
+ return {
160
+ // full checksum - replaces any previous one
161
+ bucket,
162
+ checksum: partialChecksum,
163
+ count: doc.count
164
+ } satisfies BucketChecksum;
165
+ } else {
166
+ return {
167
+ // partial checksum - is added to a previous one
168
+ bucket,
169
+ partialCount: doc.count,
170
+ partialChecksum
171
+ } satisfies PartialChecksum;
172
+ }
173
+ }