@powersync/service-module-mongodb-storage 0.0.0-dev-20250828090417 → 0.0.0-dev-20250828134335
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +18 -9
- package/dist/storage/implementation/MongoCompactor.d.ts +9 -2
- package/dist/storage/implementation/MongoCompactor.js +83 -5
- package/dist/storage/implementation/MongoCompactor.js.map +1 -1
- package/dist/storage/implementation/MongoSyncBucketStorage.d.ts +10 -1
- package/dist/storage/implementation/MongoSyncBucketStorage.js +134 -52
- package/dist/storage/implementation/MongoSyncBucketStorage.js.map +1 -1
- package/dist/storage/implementation/util.d.ts +28 -1
- package/dist/storage/implementation/util.js +39 -0
- package/dist/storage/implementation/util.js.map +1 -1
- package/package.json +8 -8
- package/src/storage/implementation/MongoCompactor.ts +93 -3
- package/src/storage/implementation/MongoSyncBucketStorage.ts +165 -52
- package/src/storage/implementation/util.ts +42 -1
- package/tsconfig.tsbuildinfo +1 -1
|
@@ -2,11 +2,14 @@ import * as lib_mongo from '@powersync/lib-service-mongodb';
|
|
|
2
2
|
import { mongo } from '@powersync/lib-service-mongodb';
|
|
3
3
|
import {
|
|
4
4
|
BaseObserver,
|
|
5
|
+
DatabaseQueryError,
|
|
6
|
+
ErrorCode,
|
|
5
7
|
logger,
|
|
6
8
|
ReplicationAbortedError,
|
|
7
9
|
ServiceAssertionError
|
|
8
10
|
} from '@powersync/lib-services-framework';
|
|
9
11
|
import {
|
|
12
|
+
addBucketChecksums,
|
|
10
13
|
addPartialChecksums,
|
|
11
14
|
BroadcastIterable,
|
|
12
15
|
BucketChecksum,
|
|
@@ -17,8 +20,10 @@ import {
|
|
|
17
20
|
GetCheckpointChangesOptions,
|
|
18
21
|
InternalOpId,
|
|
19
22
|
internalToExternalOpId,
|
|
23
|
+
isPartialChecksum,
|
|
20
24
|
maxLsn,
|
|
21
25
|
PartialChecksum,
|
|
26
|
+
PartialOrFullChecksum,
|
|
22
27
|
ProtocolOpId,
|
|
23
28
|
ReplicationCheckpoint,
|
|
24
29
|
storage,
|
|
@@ -37,7 +42,14 @@ import { MongoBucketBatch } from './MongoBucketBatch.js';
|
|
|
37
42
|
import { MongoCompactor } from './MongoCompactor.js';
|
|
38
43
|
import { MongoParameterCompactor } from './MongoParameterCompactor.js';
|
|
39
44
|
import { MongoWriteCheckpointAPI } from './MongoWriteCheckpointAPI.js';
|
|
40
|
-
import {
|
|
45
|
+
import {
|
|
46
|
+
CHECKSUM_QUERY_GROUP_STAGE,
|
|
47
|
+
checksumFromAggregate,
|
|
48
|
+
idPrefixFilter,
|
|
49
|
+
mapOpEntry,
|
|
50
|
+
readSingleBatch,
|
|
51
|
+
setSessionSnapshotTime
|
|
52
|
+
} from './util.js';
|
|
41
53
|
|
|
42
54
|
export class MongoSyncBucketStorage
|
|
43
55
|
extends BaseObserver<storage.SyncRulesBucketStorageListener>
|
|
@@ -538,8 +550,7 @@ export class MongoSyncBucketStorage
|
|
|
538
550
|
}
|
|
539
551
|
}
|
|
540
552
|
|
|
541
|
-
const
|
|
542
|
-
for (let request of batch) {
|
|
553
|
+
const mappedRequests = batch.map((request) => {
|
|
543
554
|
let start = request.start;
|
|
544
555
|
if (start == null) {
|
|
545
556
|
const preState = preStates.get(request.bucket);
|
|
@@ -547,13 +558,52 @@ export class MongoSyncBucketStorage
|
|
|
547
558
|
start = preState.opId;
|
|
548
559
|
}
|
|
549
560
|
}
|
|
561
|
+
return {
|
|
562
|
+
...request,
|
|
563
|
+
start
|
|
564
|
+
};
|
|
565
|
+
});
|
|
566
|
+
|
|
567
|
+
const queriedChecksums = await this.queryPartialChecksums(mappedRequests);
|
|
550
568
|
|
|
569
|
+
return new Map<string, storage.PartialOrFullChecksum>(
|
|
570
|
+
batch.map((request) => {
|
|
571
|
+
const bucket = request.bucket;
|
|
572
|
+
// Could be null if this is either (1) a partial request, or (2) no compacted checksum was available
|
|
573
|
+
const preState = preStates.get(bucket);
|
|
574
|
+
// Could be null if we got no data
|
|
575
|
+
const partialChecksum = queriedChecksums.get(bucket);
|
|
576
|
+
const merged = addPartialChecksums(bucket, preState?.checksum ?? null, partialChecksum ?? null);
|
|
577
|
+
|
|
578
|
+
return [bucket, merged];
|
|
579
|
+
})
|
|
580
|
+
);
|
|
581
|
+
}
|
|
582
|
+
|
|
583
|
+
async queryPartialChecksums(batch: storage.FetchPartialBucketChecksum[]): Promise<storage.PartialChecksumMap> {
|
|
584
|
+
try {
|
|
585
|
+
return await this.queryPartialChecksumsInternal(batch);
|
|
586
|
+
} catch (e) {
|
|
587
|
+
if (e.codeName == 'MaxTimeMSExpired') {
|
|
588
|
+
logger.warn(`Checksum query timed out; falling back to slower version`, e);
|
|
589
|
+
// Timeout - try the slower but more robust version
|
|
590
|
+
return await this.queryPartialChecksumsFallback(batch);
|
|
591
|
+
}
|
|
592
|
+
throw lib_mongo.mapQueryError(e, 'while reading checksums');
|
|
593
|
+
}
|
|
594
|
+
}
|
|
595
|
+
|
|
596
|
+
private async queryPartialChecksumsInternal(
|
|
597
|
+
batch: storage.FetchPartialBucketChecksum[]
|
|
598
|
+
): Promise<storage.PartialChecksumMap> {
|
|
599
|
+
const filters: any[] = [];
|
|
600
|
+
for (let request of batch) {
|
|
551
601
|
filters.push({
|
|
552
602
|
_id: {
|
|
553
603
|
$gt: {
|
|
554
604
|
g: this.group_id,
|
|
555
605
|
b: request.bucket,
|
|
556
|
-
o: start ?? new bson.MinKey()
|
|
606
|
+
o: request.start ?? new bson.MinKey()
|
|
557
607
|
},
|
|
558
608
|
$lte: {
|
|
559
609
|
g: this.group_id,
|
|
@@ -572,66 +622,126 @@ export class MongoSyncBucketStorage
|
|
|
572
622
|
$or: filters
|
|
573
623
|
}
|
|
574
624
|
},
|
|
575
|
-
|
|
576
|
-
$group: {
|
|
577
|
-
_id: '$_id.b',
|
|
578
|
-
// Historically, checksum may be stored as 'int' or 'double'.
|
|
579
|
-
// More recently, this should be a 'long'.
|
|
580
|
-
// $toLong ensures that we always sum it as a long, avoiding inaccuracies in the calculations.
|
|
581
|
-
checksum_total: { $sum: { $toLong: '$checksum' } },
|
|
582
|
-
count: { $sum: 1 },
|
|
583
|
-
has_clear_op: {
|
|
584
|
-
$max: {
|
|
585
|
-
$cond: [{ $eq: ['$op', 'CLEAR'] }, 1, 0]
|
|
586
|
-
}
|
|
587
|
-
}
|
|
588
|
-
}
|
|
589
|
-
}
|
|
625
|
+
CHECKSUM_QUERY_GROUP_STAGE
|
|
590
626
|
],
|
|
591
|
-
{ session: undefined, readConcern: 'snapshot', maxTimeMS: lib_mongo.
|
|
627
|
+
{ session: undefined, readConcern: 'snapshot', maxTimeMS: lib_mongo.MONGO_CHECKSUM_TIMEOUT_MS }
|
|
592
628
|
)
|
|
593
|
-
|
|
594
|
-
.
|
|
595
|
-
throw lib_mongo.mapQueryError(e, 'while reading checksums');
|
|
596
|
-
});
|
|
629
|
+
// Don't map the error here - we want to keep timeout errors as-is
|
|
630
|
+
.toArray();
|
|
597
631
|
|
|
598
632
|
const partialChecksums = new Map<string, storage.PartialOrFullChecksum>(
|
|
599
633
|
aggregate.map((doc) => {
|
|
600
|
-
const partialChecksum = Number(BigInt(doc.checksum_total) & 0xffffffffn) & 0xffffffff;
|
|
601
634
|
const bucket = doc._id;
|
|
602
|
-
return [
|
|
603
|
-
bucket,
|
|
604
|
-
doc.has_clear_op == 1
|
|
605
|
-
? ({
|
|
606
|
-
// full checksum - replaces any previous one
|
|
607
|
-
bucket,
|
|
608
|
-
checksum: partialChecksum,
|
|
609
|
-
count: doc.count
|
|
610
|
-
} satisfies BucketChecksum)
|
|
611
|
-
: ({
|
|
612
|
-
// partial checksum - is added to a previous one
|
|
613
|
-
bucket,
|
|
614
|
-
partialCount: doc.count,
|
|
615
|
-
partialChecksum
|
|
616
|
-
} satisfies PartialChecksum)
|
|
617
|
-
];
|
|
635
|
+
return [bucket, checksumFromAggregate(doc)];
|
|
618
636
|
})
|
|
619
637
|
);
|
|
620
638
|
|
|
621
639
|
return new Map<string, storage.PartialOrFullChecksum>(
|
|
622
640
|
batch.map((request) => {
|
|
623
641
|
const bucket = request.bucket;
|
|
624
|
-
// Could be null if this is either (1) a partial request, or (2) no compacted checksum was available
|
|
625
|
-
const preState = preStates.get(bucket);
|
|
626
642
|
// Could be null if we got no data
|
|
627
|
-
|
|
628
|
-
|
|
643
|
+
let partialChecksum = partialChecksums.get(bucket);
|
|
644
|
+
if (partialChecksum == null) {
|
|
645
|
+
partialChecksum = {
|
|
646
|
+
bucket,
|
|
647
|
+
partialCount: 0,
|
|
648
|
+
partialChecksum: 0
|
|
649
|
+
};
|
|
650
|
+
}
|
|
651
|
+
if (request.start == null && isPartialChecksum(partialChecksum)) {
|
|
652
|
+
partialChecksum = {
|
|
653
|
+
bucket,
|
|
654
|
+
count: partialChecksum.partialCount,
|
|
655
|
+
checksum: partialChecksum.partialChecksum
|
|
656
|
+
};
|
|
657
|
+
}
|
|
629
658
|
|
|
630
|
-
return [bucket,
|
|
659
|
+
return [bucket, partialChecksum];
|
|
631
660
|
})
|
|
632
661
|
);
|
|
633
662
|
}
|
|
634
663
|
|
|
664
|
+
/**
|
|
665
|
+
* Checksums for large buckets can run over the query timeout.
|
|
666
|
+
* To avoid this, we query in batches.
|
|
667
|
+
* This version can handle larger amounts of data, but is slower, especially for many buckets.
|
|
668
|
+
*/
|
|
669
|
+
async queryPartialChecksumsFallback(
|
|
670
|
+
batch: storage.FetchPartialBucketChecksum[]
|
|
671
|
+
): Promise<storage.PartialChecksumMap> {
|
|
672
|
+
const partialChecksums = new Map<string, storage.PartialOrFullChecksum>();
|
|
673
|
+
for (let request of batch) {
|
|
674
|
+
const checksum = await this.slowChecksum(request);
|
|
675
|
+
partialChecksums.set(request.bucket, checksum);
|
|
676
|
+
}
|
|
677
|
+
|
|
678
|
+
return partialChecksums;
|
|
679
|
+
}
|
|
680
|
+
|
|
681
|
+
private async slowChecksum(request: storage.FetchPartialBucketChecksum): Promise<PartialOrFullChecksum> {
|
|
682
|
+
const batchLimit = 50_000;
|
|
683
|
+
|
|
684
|
+
let lowerBound = 0n;
|
|
685
|
+
const bucket = request.bucket;
|
|
686
|
+
|
|
687
|
+
let runningChecksum: PartialOrFullChecksum = {
|
|
688
|
+
bucket,
|
|
689
|
+
partialCount: 0,
|
|
690
|
+
partialChecksum: 0
|
|
691
|
+
};
|
|
692
|
+
if (request.start == null) {
|
|
693
|
+
runningChecksum = {
|
|
694
|
+
bucket,
|
|
695
|
+
count: 0,
|
|
696
|
+
checksum: 0
|
|
697
|
+
};
|
|
698
|
+
}
|
|
699
|
+
|
|
700
|
+
while (true) {
|
|
701
|
+
const filter = {
|
|
702
|
+
_id: {
|
|
703
|
+
$gt: {
|
|
704
|
+
g: this.group_id,
|
|
705
|
+
b: bucket,
|
|
706
|
+
o: lowerBound
|
|
707
|
+
},
|
|
708
|
+
$lte: {
|
|
709
|
+
g: this.group_id,
|
|
710
|
+
b: bucket,
|
|
711
|
+
o: request.end
|
|
712
|
+
}
|
|
713
|
+
}
|
|
714
|
+
};
|
|
715
|
+
const docs = await this.db.bucket_data
|
|
716
|
+
.aggregate(
|
|
717
|
+
[
|
|
718
|
+
{
|
|
719
|
+
$match: filter
|
|
720
|
+
},
|
|
721
|
+
// sort and limit _before_ grouping
|
|
722
|
+
{ $sort: { _id: 1 } },
|
|
723
|
+
{ $limit: batchLimit },
|
|
724
|
+
CHECKSUM_QUERY_GROUP_STAGE
|
|
725
|
+
],
|
|
726
|
+
{ session: undefined, readConcern: 'snapshot', maxTimeMS: lib_mongo.MONGO_CHECKSUM_TIMEOUT_MS }
|
|
727
|
+
)
|
|
728
|
+
.toArray();
|
|
729
|
+
const doc = docs[0];
|
|
730
|
+
if (doc == null) {
|
|
731
|
+
return runningChecksum;
|
|
732
|
+
}
|
|
733
|
+
const partial = checksumFromAggregate(doc);
|
|
734
|
+
runningChecksum = addPartialChecksums(bucket, runningChecksum, partial);
|
|
735
|
+
const isFinal = doc.count != batchLimit;
|
|
736
|
+
if (isFinal) {
|
|
737
|
+
break;
|
|
738
|
+
} else {
|
|
739
|
+
lowerBound = doc.last_op;
|
|
740
|
+
}
|
|
741
|
+
}
|
|
742
|
+
return runningChecksum;
|
|
743
|
+
}
|
|
744
|
+
|
|
635
745
|
async terminate(options?: storage.TerminateOptions) {
|
|
636
746
|
// Default is to clear the storage except when explicitly requested not to.
|
|
637
747
|
if (!options || options?.clearStorage) {
|
|
@@ -779,22 +889,25 @@ export class MongoSyncBucketStorage
|
|
|
779
889
|
const checkpoint = await this.getCheckpointInternal();
|
|
780
890
|
maxOpId = checkpoint?.checkpoint ?? undefined;
|
|
781
891
|
}
|
|
782
|
-
await new MongoCompactor(this
|
|
892
|
+
await new MongoCompactor(this, this.db, { ...options, maxOpId }).compact();
|
|
893
|
+
|
|
783
894
|
if (maxOpId != null && options?.compactParameterData) {
|
|
784
895
|
await new MongoParameterCompactor(this.db, this.group_id, maxOpId, options).compact();
|
|
785
896
|
}
|
|
786
897
|
}
|
|
787
898
|
|
|
788
|
-
async populatePersistentChecksumCache(options: Pick<CompactOptions, 'signal' | 'maxOpId'
|
|
899
|
+
async populatePersistentChecksumCache(options: Required<Pick<CompactOptions, 'signal' | 'maxOpId'>>): Promise<void> {
|
|
900
|
+
logger.info(`Populating persistent checksum cache...`);
|
|
789
901
|
const start = Date.now();
|
|
790
|
-
// We do a minimal compact
|
|
791
|
-
|
|
902
|
+
// We do a minimal compact here.
|
|
903
|
+
// We can optimize this in the future.
|
|
904
|
+
const compactor = new MongoCompactor(this, this.db, {
|
|
792
905
|
...options,
|
|
793
|
-
// Skip parameter data
|
|
794
|
-
compactParameterData: false,
|
|
795
906
|
// Don't track updates for MOVE compacting
|
|
796
907
|
memoryLimitMB: 0
|
|
797
908
|
});
|
|
909
|
+
|
|
910
|
+
await compactor.populateChecksums();
|
|
798
911
|
const duration = Date.now() - start;
|
|
799
912
|
logger.info(`Populated persistent checksum cache in ${(duration / 1000).toFixed(1)}s`);
|
|
800
913
|
}
|
|
@@ -3,7 +3,7 @@ import * as crypto from 'crypto';
|
|
|
3
3
|
import * as uuid from 'uuid';
|
|
4
4
|
|
|
5
5
|
import { mongo } from '@powersync/lib-service-mongodb';
|
|
6
|
-
import { storage, utils } from '@powersync/service-core';
|
|
6
|
+
import { BucketChecksum, PartialChecksum, PartialOrFullChecksum, storage, utils } from '@powersync/service-core';
|
|
7
7
|
|
|
8
8
|
import { PowerSyncMongo } from './db.js';
|
|
9
9
|
import { BucketDataDocument } from './models.js';
|
|
@@ -130,3 +130,44 @@ export function setSessionSnapshotTime(session: mongo.ClientSession, time: bson.
|
|
|
130
130
|
throw new ServiceAssertionError(`Session snapshotTime is already set`);
|
|
131
131
|
}
|
|
132
132
|
}
|
|
133
|
+
|
|
134
|
+
export const CHECKSUM_QUERY_GROUP_STAGE = {
|
|
135
|
+
$group: {
|
|
136
|
+
_id: '$_id.b',
|
|
137
|
+
// Historically, checksum may be stored as 'int' or 'double'.
|
|
138
|
+
// More recently, this should be a 'long'.
|
|
139
|
+
// $toLong ensures that we always sum it as a long, avoiding inaccuracies in the calculations.
|
|
140
|
+
checksum_total: { $sum: { $toLong: '$checksum' } },
|
|
141
|
+
count: { $sum: 1 },
|
|
142
|
+
has_clear_op: {
|
|
143
|
+
$max: {
|
|
144
|
+
$cond: [{ $eq: ['$op', 'CLEAR'] }, 1, 0]
|
|
145
|
+
}
|
|
146
|
+
},
|
|
147
|
+
last_op: { $max: '$_id.o' }
|
|
148
|
+
}
|
|
149
|
+
};
|
|
150
|
+
|
|
151
|
+
/**
|
|
152
|
+
* Convert output of CHECKSUM_QUERY_GROUP_STAGE into a checksum.
|
|
153
|
+
*/
|
|
154
|
+
export function checksumFromAggregate(doc: bson.Document): PartialOrFullChecksum {
|
|
155
|
+
const partialChecksum = Number(BigInt(doc.checksum_total) & 0xffffffffn) & 0xffffffff;
|
|
156
|
+
const bucket = doc._id;
|
|
157
|
+
|
|
158
|
+
if (doc.has_clear_op == 1) {
|
|
159
|
+
return {
|
|
160
|
+
// full checksum - replaces any previous one
|
|
161
|
+
bucket,
|
|
162
|
+
checksum: partialChecksum,
|
|
163
|
+
count: doc.count
|
|
164
|
+
} satisfies BucketChecksum;
|
|
165
|
+
} else {
|
|
166
|
+
return {
|
|
167
|
+
// partial checksum - is added to a previous one
|
|
168
|
+
bucket,
|
|
169
|
+
partialCount: doc.count,
|
|
170
|
+
partialChecksum
|
|
171
|
+
} satisfies PartialChecksum;
|
|
172
|
+
}
|
|
173
|
+
}
|