@powersync/service-module-mongodb-storage 0.0.0-dev-20250828134335 → 0.0.0-dev-20250829094737
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +5 -3
- package/dist/storage/implementation/MongoChecksums.d.ts +34 -0
- package/dist/storage/implementation/MongoChecksums.js +274 -0
- package/dist/storage/implementation/MongoChecksums.js.map +1 -0
- package/dist/storage/implementation/MongoCompactor.js +26 -29
- package/dist/storage/implementation/MongoCompactor.js.map +1 -1
- package/dist/storage/implementation/MongoSyncBucketStorage.d.ts +2 -11
- package/dist/storage/implementation/MongoSyncBucketStorage.js +7 -207
- package/dist/storage/implementation/MongoSyncBucketStorage.js.map +1 -1
- package/dist/storage/implementation/models.d.ts +1 -1
- package/dist/storage/implementation/util.d.ts +1 -28
- package/dist/storage/implementation/util.js +0 -39
- package/dist/storage/implementation/util.js.map +1 -1
- package/package.json +4 -4
- package/src/storage/implementation/MongoChecksums.ts +320 -0
- package/src/storage/implementation/MongoCompactor.ts +56 -55
- package/src/storage/implementation/MongoSyncBucketStorage.ts +6 -255
- package/src/storage/implementation/models.ts +1 -1
- package/src/storage/implementation/util.ts +0 -41
- package/tsconfig.tsbuildinfo +1 -1
|
@@ -2,17 +2,12 @@ import * as lib_mongo from '@powersync/lib-service-mongodb';
|
|
|
2
2
|
import { mongo } from '@powersync/lib-service-mongodb';
|
|
3
3
|
import {
|
|
4
4
|
BaseObserver,
|
|
5
|
-
DatabaseQueryError,
|
|
6
|
-
ErrorCode,
|
|
7
5
|
logger,
|
|
8
6
|
ReplicationAbortedError,
|
|
9
7
|
ServiceAssertionError
|
|
10
8
|
} from '@powersync/lib-services-framework';
|
|
11
9
|
import {
|
|
12
|
-
addBucketChecksums,
|
|
13
|
-
addPartialChecksums,
|
|
14
10
|
BroadcastIterable,
|
|
15
|
-
BucketChecksum,
|
|
16
11
|
CHECKPOINT_INVALIDATE_ALL,
|
|
17
12
|
CheckpointChanges,
|
|
18
13
|
CompactOptions,
|
|
@@ -20,10 +15,7 @@ import {
|
|
|
20
15
|
GetCheckpointChangesOptions,
|
|
21
16
|
InternalOpId,
|
|
22
17
|
internalToExternalOpId,
|
|
23
|
-
isPartialChecksum,
|
|
24
18
|
maxLsn,
|
|
25
|
-
PartialChecksum,
|
|
26
|
-
PartialOrFullChecksum,
|
|
27
19
|
ProtocolOpId,
|
|
28
20
|
ReplicationCheckpoint,
|
|
29
21
|
storage,
|
|
@@ -39,28 +31,18 @@ import { MongoBucketStorage } from '../MongoBucketStorage.js';
|
|
|
39
31
|
import { PowerSyncMongo } from './db.js';
|
|
40
32
|
import { BucketDataDocument, BucketDataKey, BucketStateDocument, SourceKey, SourceTableDocument } from './models.js';
|
|
41
33
|
import { MongoBucketBatch } from './MongoBucketBatch.js';
|
|
34
|
+
import { MongoChecksums } from './MongoChecksums.js';
|
|
42
35
|
import { MongoCompactor } from './MongoCompactor.js';
|
|
43
36
|
import { MongoParameterCompactor } from './MongoParameterCompactor.js';
|
|
44
37
|
import { MongoWriteCheckpointAPI } from './MongoWriteCheckpointAPI.js';
|
|
45
|
-
import {
|
|
46
|
-
CHECKSUM_QUERY_GROUP_STAGE,
|
|
47
|
-
checksumFromAggregate,
|
|
48
|
-
idPrefixFilter,
|
|
49
|
-
mapOpEntry,
|
|
50
|
-
readSingleBatch,
|
|
51
|
-
setSessionSnapshotTime
|
|
52
|
-
} from './util.js';
|
|
38
|
+
import { idPrefixFilter, mapOpEntry, readSingleBatch, setSessionSnapshotTime } from './util.js';
|
|
53
39
|
|
|
54
40
|
export class MongoSyncBucketStorage
|
|
55
41
|
extends BaseObserver<storage.SyncRulesBucketStorageListener>
|
|
56
42
|
implements storage.SyncRulesBucketStorage
|
|
57
43
|
{
|
|
58
44
|
private readonly db: PowerSyncMongo;
|
|
59
|
-
|
|
60
|
-
fetchChecksums: (batch) => {
|
|
61
|
-
return this.getChecksumsInternal(batch);
|
|
62
|
-
}
|
|
63
|
-
});
|
|
45
|
+
readonly checksums: MongoChecksums;
|
|
64
46
|
|
|
65
47
|
private parsedSyncRulesCache: { parsed: SqlSyncRules; options: storage.ParseSyncRulesOptions } | undefined;
|
|
66
48
|
private writeCheckpointAPI: MongoWriteCheckpointAPI;
|
|
@@ -74,6 +56,7 @@ export class MongoSyncBucketStorage
|
|
|
74
56
|
) {
|
|
75
57
|
super();
|
|
76
58
|
this.db = factory.db;
|
|
59
|
+
this.checksums = new MongoChecksums(this.db, this.group_id);
|
|
77
60
|
this.writeCheckpointAPI = new MongoWriteCheckpointAPI({
|
|
78
61
|
db: this.db,
|
|
79
62
|
mode: writeCheckpointMode,
|
|
@@ -503,243 +486,11 @@ export class MongoSyncBucketStorage
|
|
|
503
486
|
}
|
|
504
487
|
|
|
505
488
|
async getChecksums(checkpoint: utils.InternalOpId, buckets: string[]): Promise<utils.ChecksumMap> {
|
|
506
|
-
return this.
|
|
489
|
+
return this.checksums.getChecksums(checkpoint, buckets);
|
|
507
490
|
}
|
|
508
491
|
|
|
509
492
|
clearChecksumCache() {
|
|
510
|
-
this.
|
|
511
|
-
}
|
|
512
|
-
|
|
513
|
-
private async getChecksumsInternal(batch: storage.FetchPartialBucketChecksum[]): Promise<storage.PartialChecksumMap> {
|
|
514
|
-
if (batch.length == 0) {
|
|
515
|
-
return new Map();
|
|
516
|
-
}
|
|
517
|
-
|
|
518
|
-
const preFilters: any[] = [];
|
|
519
|
-
for (let request of batch) {
|
|
520
|
-
if (request.start == null) {
|
|
521
|
-
preFilters.push({
|
|
522
|
-
_id: {
|
|
523
|
-
g: this.group_id,
|
|
524
|
-
b: request.bucket
|
|
525
|
-
},
|
|
526
|
-
'compacted_state.op_id': { $exists: true, $lte: request.end }
|
|
527
|
-
});
|
|
528
|
-
}
|
|
529
|
-
}
|
|
530
|
-
|
|
531
|
-
const preStates = new Map<string, { opId: InternalOpId; checksum: BucketChecksum }>();
|
|
532
|
-
|
|
533
|
-
if (preFilters.length > 0) {
|
|
534
|
-
// For un-cached bucket checksums, attempt to use the compacted state first.
|
|
535
|
-
const states = await this.db.bucket_state
|
|
536
|
-
.find({
|
|
537
|
-
$or: preFilters
|
|
538
|
-
})
|
|
539
|
-
.toArray();
|
|
540
|
-
for (let state of states) {
|
|
541
|
-
const compactedState = state.compacted_state!;
|
|
542
|
-
preStates.set(state._id.b, {
|
|
543
|
-
opId: compactedState.op_id,
|
|
544
|
-
checksum: {
|
|
545
|
-
bucket: state._id.b,
|
|
546
|
-
checksum: Number(compactedState.checksum),
|
|
547
|
-
count: compactedState.count
|
|
548
|
-
}
|
|
549
|
-
});
|
|
550
|
-
}
|
|
551
|
-
}
|
|
552
|
-
|
|
553
|
-
const mappedRequests = batch.map((request) => {
|
|
554
|
-
let start = request.start;
|
|
555
|
-
if (start == null) {
|
|
556
|
-
const preState = preStates.get(request.bucket);
|
|
557
|
-
if (preState != null) {
|
|
558
|
-
start = preState.opId;
|
|
559
|
-
}
|
|
560
|
-
}
|
|
561
|
-
return {
|
|
562
|
-
...request,
|
|
563
|
-
start
|
|
564
|
-
};
|
|
565
|
-
});
|
|
566
|
-
|
|
567
|
-
const queriedChecksums = await this.queryPartialChecksums(mappedRequests);
|
|
568
|
-
|
|
569
|
-
return new Map<string, storage.PartialOrFullChecksum>(
|
|
570
|
-
batch.map((request) => {
|
|
571
|
-
const bucket = request.bucket;
|
|
572
|
-
// Could be null if this is either (1) a partial request, or (2) no compacted checksum was available
|
|
573
|
-
const preState = preStates.get(bucket);
|
|
574
|
-
// Could be null if we got no data
|
|
575
|
-
const partialChecksum = queriedChecksums.get(bucket);
|
|
576
|
-
const merged = addPartialChecksums(bucket, preState?.checksum ?? null, partialChecksum ?? null);
|
|
577
|
-
|
|
578
|
-
return [bucket, merged];
|
|
579
|
-
})
|
|
580
|
-
);
|
|
581
|
-
}
|
|
582
|
-
|
|
583
|
-
async queryPartialChecksums(batch: storage.FetchPartialBucketChecksum[]): Promise<storage.PartialChecksumMap> {
|
|
584
|
-
try {
|
|
585
|
-
return await this.queryPartialChecksumsInternal(batch);
|
|
586
|
-
} catch (e) {
|
|
587
|
-
if (e.codeName == 'MaxTimeMSExpired') {
|
|
588
|
-
logger.warn(`Checksum query timed out; falling back to slower version`, e);
|
|
589
|
-
// Timeout - try the slower but more robust version
|
|
590
|
-
return await this.queryPartialChecksumsFallback(batch);
|
|
591
|
-
}
|
|
592
|
-
throw lib_mongo.mapQueryError(e, 'while reading checksums');
|
|
593
|
-
}
|
|
594
|
-
}
|
|
595
|
-
|
|
596
|
-
private async queryPartialChecksumsInternal(
|
|
597
|
-
batch: storage.FetchPartialBucketChecksum[]
|
|
598
|
-
): Promise<storage.PartialChecksumMap> {
|
|
599
|
-
const filters: any[] = [];
|
|
600
|
-
for (let request of batch) {
|
|
601
|
-
filters.push({
|
|
602
|
-
_id: {
|
|
603
|
-
$gt: {
|
|
604
|
-
g: this.group_id,
|
|
605
|
-
b: request.bucket,
|
|
606
|
-
o: request.start ?? new bson.MinKey()
|
|
607
|
-
},
|
|
608
|
-
$lte: {
|
|
609
|
-
g: this.group_id,
|
|
610
|
-
b: request.bucket,
|
|
611
|
-
o: request.end
|
|
612
|
-
}
|
|
613
|
-
}
|
|
614
|
-
});
|
|
615
|
-
}
|
|
616
|
-
|
|
617
|
-
const aggregate = await this.db.bucket_data
|
|
618
|
-
.aggregate(
|
|
619
|
-
[
|
|
620
|
-
{
|
|
621
|
-
$match: {
|
|
622
|
-
$or: filters
|
|
623
|
-
}
|
|
624
|
-
},
|
|
625
|
-
CHECKSUM_QUERY_GROUP_STAGE
|
|
626
|
-
],
|
|
627
|
-
{ session: undefined, readConcern: 'snapshot', maxTimeMS: lib_mongo.MONGO_CHECKSUM_TIMEOUT_MS }
|
|
628
|
-
)
|
|
629
|
-
// Don't map the error here - we want to keep timeout errors as-is
|
|
630
|
-
.toArray();
|
|
631
|
-
|
|
632
|
-
const partialChecksums = new Map<string, storage.PartialOrFullChecksum>(
|
|
633
|
-
aggregate.map((doc) => {
|
|
634
|
-
const bucket = doc._id;
|
|
635
|
-
return [bucket, checksumFromAggregate(doc)];
|
|
636
|
-
})
|
|
637
|
-
);
|
|
638
|
-
|
|
639
|
-
return new Map<string, storage.PartialOrFullChecksum>(
|
|
640
|
-
batch.map((request) => {
|
|
641
|
-
const bucket = request.bucket;
|
|
642
|
-
// Could be null if we got no data
|
|
643
|
-
let partialChecksum = partialChecksums.get(bucket);
|
|
644
|
-
if (partialChecksum == null) {
|
|
645
|
-
partialChecksum = {
|
|
646
|
-
bucket,
|
|
647
|
-
partialCount: 0,
|
|
648
|
-
partialChecksum: 0
|
|
649
|
-
};
|
|
650
|
-
}
|
|
651
|
-
if (request.start == null && isPartialChecksum(partialChecksum)) {
|
|
652
|
-
partialChecksum = {
|
|
653
|
-
bucket,
|
|
654
|
-
count: partialChecksum.partialCount,
|
|
655
|
-
checksum: partialChecksum.partialChecksum
|
|
656
|
-
};
|
|
657
|
-
}
|
|
658
|
-
|
|
659
|
-
return [bucket, partialChecksum];
|
|
660
|
-
})
|
|
661
|
-
);
|
|
662
|
-
}
|
|
663
|
-
|
|
664
|
-
/**
|
|
665
|
-
* Checksums for large buckets can run over the query timeout.
|
|
666
|
-
* To avoid this, we query in batches.
|
|
667
|
-
* This version can handle larger amounts of data, but is slower, especially for many buckets.
|
|
668
|
-
*/
|
|
669
|
-
async queryPartialChecksumsFallback(
|
|
670
|
-
batch: storage.FetchPartialBucketChecksum[]
|
|
671
|
-
): Promise<storage.PartialChecksumMap> {
|
|
672
|
-
const partialChecksums = new Map<string, storage.PartialOrFullChecksum>();
|
|
673
|
-
for (let request of batch) {
|
|
674
|
-
const checksum = await this.slowChecksum(request);
|
|
675
|
-
partialChecksums.set(request.bucket, checksum);
|
|
676
|
-
}
|
|
677
|
-
|
|
678
|
-
return partialChecksums;
|
|
679
|
-
}
|
|
680
|
-
|
|
681
|
-
private async slowChecksum(request: storage.FetchPartialBucketChecksum): Promise<PartialOrFullChecksum> {
|
|
682
|
-
const batchLimit = 50_000;
|
|
683
|
-
|
|
684
|
-
let lowerBound = 0n;
|
|
685
|
-
const bucket = request.bucket;
|
|
686
|
-
|
|
687
|
-
let runningChecksum: PartialOrFullChecksum = {
|
|
688
|
-
bucket,
|
|
689
|
-
partialCount: 0,
|
|
690
|
-
partialChecksum: 0
|
|
691
|
-
};
|
|
692
|
-
if (request.start == null) {
|
|
693
|
-
runningChecksum = {
|
|
694
|
-
bucket,
|
|
695
|
-
count: 0,
|
|
696
|
-
checksum: 0
|
|
697
|
-
};
|
|
698
|
-
}
|
|
699
|
-
|
|
700
|
-
while (true) {
|
|
701
|
-
const filter = {
|
|
702
|
-
_id: {
|
|
703
|
-
$gt: {
|
|
704
|
-
g: this.group_id,
|
|
705
|
-
b: bucket,
|
|
706
|
-
o: lowerBound
|
|
707
|
-
},
|
|
708
|
-
$lte: {
|
|
709
|
-
g: this.group_id,
|
|
710
|
-
b: bucket,
|
|
711
|
-
o: request.end
|
|
712
|
-
}
|
|
713
|
-
}
|
|
714
|
-
};
|
|
715
|
-
const docs = await this.db.bucket_data
|
|
716
|
-
.aggregate(
|
|
717
|
-
[
|
|
718
|
-
{
|
|
719
|
-
$match: filter
|
|
720
|
-
},
|
|
721
|
-
// sort and limit _before_ grouping
|
|
722
|
-
{ $sort: { _id: 1 } },
|
|
723
|
-
{ $limit: batchLimit },
|
|
724
|
-
CHECKSUM_QUERY_GROUP_STAGE
|
|
725
|
-
],
|
|
726
|
-
{ session: undefined, readConcern: 'snapshot', maxTimeMS: lib_mongo.MONGO_CHECKSUM_TIMEOUT_MS }
|
|
727
|
-
)
|
|
728
|
-
.toArray();
|
|
729
|
-
const doc = docs[0];
|
|
730
|
-
if (doc == null) {
|
|
731
|
-
return runningChecksum;
|
|
732
|
-
}
|
|
733
|
-
const partial = checksumFromAggregate(doc);
|
|
734
|
-
runningChecksum = addPartialChecksums(bucket, runningChecksum, partial);
|
|
735
|
-
const isFinal = doc.count != batchLimit;
|
|
736
|
-
if (isFinal) {
|
|
737
|
-
break;
|
|
738
|
-
} else {
|
|
739
|
-
lowerBound = doc.last_op;
|
|
740
|
-
}
|
|
741
|
-
}
|
|
742
|
-
return runningChecksum;
|
|
493
|
+
this.checksums.clearCache();
|
|
743
494
|
}
|
|
744
495
|
|
|
745
496
|
async terminate(options?: storage.TerminateOptions) {
|
|
@@ -130,44 +130,3 @@ export function setSessionSnapshotTime(session: mongo.ClientSession, time: bson.
|
|
|
130
130
|
throw new ServiceAssertionError(`Session snapshotTime is already set`);
|
|
131
131
|
}
|
|
132
132
|
}
|
|
133
|
-
|
|
134
|
-
export const CHECKSUM_QUERY_GROUP_STAGE = {
|
|
135
|
-
$group: {
|
|
136
|
-
_id: '$_id.b',
|
|
137
|
-
// Historically, checksum may be stored as 'int' or 'double'.
|
|
138
|
-
// More recently, this should be a 'long'.
|
|
139
|
-
// $toLong ensures that we always sum it as a long, avoiding inaccuracies in the calculations.
|
|
140
|
-
checksum_total: { $sum: { $toLong: '$checksum' } },
|
|
141
|
-
count: { $sum: 1 },
|
|
142
|
-
has_clear_op: {
|
|
143
|
-
$max: {
|
|
144
|
-
$cond: [{ $eq: ['$op', 'CLEAR'] }, 1, 0]
|
|
145
|
-
}
|
|
146
|
-
},
|
|
147
|
-
last_op: { $max: '$_id.o' }
|
|
148
|
-
}
|
|
149
|
-
};
|
|
150
|
-
|
|
151
|
-
/**
|
|
152
|
-
* Convert output of CHECKSUM_QUERY_GROUP_STAGE into a checksum.
|
|
153
|
-
*/
|
|
154
|
-
export function checksumFromAggregate(doc: bson.Document): PartialOrFullChecksum {
|
|
155
|
-
const partialChecksum = Number(BigInt(doc.checksum_total) & 0xffffffffn) & 0xffffffff;
|
|
156
|
-
const bucket = doc._id;
|
|
157
|
-
|
|
158
|
-
if (doc.has_clear_op == 1) {
|
|
159
|
-
return {
|
|
160
|
-
// full checksum - replaces any previous one
|
|
161
|
-
bucket,
|
|
162
|
-
checksum: partialChecksum,
|
|
163
|
-
count: doc.count
|
|
164
|
-
} satisfies BucketChecksum;
|
|
165
|
-
} else {
|
|
166
|
-
return {
|
|
167
|
-
// partial checksum - is added to a previous one
|
|
168
|
-
bucket,
|
|
169
|
-
partialCount: doc.count,
|
|
170
|
-
partialChecksum
|
|
171
|
-
} satisfies PartialChecksum;
|
|
172
|
-
}
|
|
173
|
-
}
|