@powersync/service-module-mongodb-storage 0.13.0 → 0.13.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +16 -0
- package/dist/storage/implementation/MongoCompactor.d.ts +7 -4
- package/dist/storage/implementation/MongoCompactor.js +71 -48
- package/dist/storage/implementation/MongoCompactor.js.map +1 -1
- package/dist/storage/implementation/MongoPersistedSyncRules.d.ts +3 -3
- package/dist/storage/implementation/MongoPersistedSyncRules.js +1 -1
- package/dist/storage/implementation/MongoPersistedSyncRules.js.map +1 -1
- package/dist/storage/implementation/MongoSyncBucketStorage.d.ts +1 -0
- package/dist/storage/implementation/MongoSyncBucketStorage.js +38 -39
- package/dist/storage/implementation/MongoSyncBucketStorage.js.map +1 -1
- package/package.json +6 -6
- package/src/storage/implementation/MongoCompactor.ts +80 -54
- package/src/storage/implementation/MongoPersistedSyncRules.ts +3 -3
- package/src/storage/implementation/MongoSyncBucketStorage.ts +44 -39
- package/test/src/storage_compacting.test.ts +1 -0
- package/tsconfig.tsbuildinfo +1 -1
|
@@ -62,6 +62,7 @@ const DEFAULT_CLEAR_BATCH_LIMIT = 5000;
|
|
|
62
62
|
const DEFAULT_MOVE_BATCH_LIMIT = 2000;
|
|
63
63
|
const DEFAULT_MOVE_BATCH_QUERY_LIMIT = 10_000;
|
|
64
64
|
const DEFAULT_MIN_BUCKET_CHANGES = 10;
|
|
65
|
+
const DEFAULT_MIN_CHANGE_RATIO = 0.1;
|
|
65
66
|
|
|
66
67
|
/** This default is primarily for tests. */
|
|
67
68
|
const DEFAULT_MEMORY_LIMIT_MB = 64;
|
|
@@ -75,6 +76,7 @@ export class MongoCompactor {
|
|
|
75
76
|
private moveBatchQueryLimit: number;
|
|
76
77
|
private clearBatchLimit: number;
|
|
77
78
|
private minBucketChanges: number;
|
|
79
|
+
private minChangeRatio: number;
|
|
78
80
|
private maxOpId: bigint;
|
|
79
81
|
private buckets: string[] | undefined;
|
|
80
82
|
private signal?: AbortSignal;
|
|
@@ -91,6 +93,7 @@ export class MongoCompactor {
|
|
|
91
93
|
this.moveBatchQueryLimit = options?.moveBatchQueryLimit ?? DEFAULT_MOVE_BATCH_QUERY_LIMIT;
|
|
92
94
|
this.clearBatchLimit = options?.clearBatchLimit ?? DEFAULT_CLEAR_BATCH_LIMIT;
|
|
93
95
|
this.minBucketChanges = options?.minBucketChanges ?? DEFAULT_MIN_BUCKET_CHANGES;
|
|
96
|
+
this.minChangeRatio = options?.minChangeRatio ?? DEFAULT_MIN_CHANGE_RATIO;
|
|
94
97
|
this.maxOpId = options?.maxOpId ?? 0n;
|
|
95
98
|
this.buckets = options?.compactBuckets;
|
|
96
99
|
this.signal = options?.signal;
|
|
@@ -115,27 +118,19 @@ export class MongoCompactor {
|
|
|
115
118
|
}
|
|
116
119
|
|
|
117
120
|
private async compactDirtyBuckets() {
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
let recentlyCompacted: string[] = [];
|
|
125
|
-
const buckets = await this.dirtyBucketBatch({
|
|
126
|
-
minBucketChanges: this.minBucketChanges,
|
|
127
|
-
exclude: recentlyCompacted
|
|
128
|
-
});
|
|
129
|
-
if (buckets.length == 0) {
|
|
130
|
-
// All done
|
|
121
|
+
for await (let buckets of this.dirtyBucketBatches({
|
|
122
|
+
minBucketChanges: this.minBucketChanges,
|
|
123
|
+
minChangeRatio: this.minChangeRatio
|
|
124
|
+
})) {
|
|
125
|
+
if (this.signal?.aborted) {
|
|
131
126
|
break;
|
|
132
127
|
}
|
|
128
|
+
if (buckets.length == 0) {
|
|
129
|
+
continue;
|
|
130
|
+
}
|
|
131
|
+
|
|
133
132
|
for (let { bucket } of buckets) {
|
|
134
133
|
await this.compactSingleBucket(bucket);
|
|
135
|
-
recentlyCompacted.push(bucket);
|
|
136
|
-
}
|
|
137
|
-
if (recentlyCompacted.length > TRACK_RECENTLY_COMPACTED_NUMBER) {
|
|
138
|
-
recentlyCompacted = recentlyCompacted.slice(-TRACK_RECENTLY_COMPACTED_NUMBER);
|
|
139
134
|
}
|
|
140
135
|
}
|
|
141
136
|
}
|
|
@@ -490,14 +485,19 @@ export class MongoCompactor {
|
|
|
490
485
|
*/
|
|
491
486
|
async populateChecksums(options: { minBucketChanges: number }): Promise<PopulateChecksumCacheResults> {
|
|
492
487
|
let count = 0;
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
488
|
+
for await (let buckets of this.dirtyBucketBatches({
|
|
489
|
+
minBucketChanges: options.minBucketChanges,
|
|
490
|
+
minChangeRatio: 0
|
|
491
|
+
})) {
|
|
492
|
+
if (this.signal?.aborted) {
|
|
497
493
|
break;
|
|
498
494
|
}
|
|
495
|
+
if (buckets.length == 0) {
|
|
496
|
+
continue;
|
|
497
|
+
}
|
|
498
|
+
|
|
499
499
|
const start = Date.now();
|
|
500
|
-
logger.info(`Calculating checksums for batch of ${buckets.length} buckets`);
|
|
500
|
+
logger.info(`Calculating checksums for batch of ${buckets.length} buckets, starting at ${buckets[0].bucket}`);
|
|
501
501
|
|
|
502
502
|
// Filter batch by estimated bucket size, to reduce possibility of timeouts
|
|
503
503
|
let checkBuckets: typeof buckets = [];
|
|
@@ -517,45 +517,71 @@ export class MongoCompactor {
|
|
|
517
517
|
}
|
|
518
518
|
|
|
519
519
|
/**
|
|
520
|
-
*
|
|
520
|
+
* Return batches of dirty buckets.
|
|
521
|
+
*
|
|
522
|
+
* Can be used to iterate through all buckets.
|
|
521
523
|
*
|
|
522
|
-
*
|
|
523
|
-
*
|
|
524
|
+
* minBucketChanges: minimum number of changes for a bucket to be included in the results.
|
|
525
|
+
* minChangeRatio: minimum ratio of changes to total ops for a bucket to be included in the results, number between 0 and 1.
|
|
524
526
|
*/
|
|
525
|
-
private async
|
|
527
|
+
private async *dirtyBucketBatches(options: {
|
|
526
528
|
minBucketChanges: number;
|
|
527
|
-
|
|
528
|
-
}):
|
|
529
|
+
minChangeRatio: number;
|
|
530
|
+
}): AsyncGenerator<{ bucket: string; estimatedCount: number }[]> {
|
|
531
|
+
// Previously, we used an index on {_id.g: 1, estimate_since_compact.count: 1} to only buckets with changes.
|
|
532
|
+
// This works well if there are only a small number of buckets with changes.
|
|
533
|
+
// However, if buckets are continuosly modified while we are compacting, we get the same buckets over and over again.
|
|
534
|
+
// This has caused the compact process to re-read the same collection around 5x times in total, which is very inefficient.
|
|
535
|
+
// To solve this, we now just iterate through all buckets, and filter out the ones with low changes.
|
|
536
|
+
|
|
529
537
|
if (options.minBucketChanges <= 0) {
|
|
530
538
|
throw new ReplicationAssertionError('minBucketChanges must be >= 1');
|
|
531
539
|
}
|
|
532
|
-
|
|
533
|
-
const
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
{
|
|
541
|
-
projection: {
|
|
542
|
-
_id: 1,
|
|
543
|
-
estimate_since_compact: 1,
|
|
544
|
-
compacted_state: 1
|
|
545
|
-
},
|
|
546
|
-
sort: {
|
|
547
|
-
'estimate_since_compact.count': -1
|
|
540
|
+
let lastId = { g: this.group_id, b: new mongo.MinKey() as any };
|
|
541
|
+
const maxId = { g: this.group_id, b: new mongo.MaxKey() as any };
|
|
542
|
+
while (true) {
|
|
543
|
+
const batch = await this.db.bucket_state
|
|
544
|
+
.find(
|
|
545
|
+
{
|
|
546
|
+
_id: { $gt: lastId, $lt: maxId },
|
|
547
|
+
'estimate_since_compact.count': { $gte: options.minBucketChanges }
|
|
548
548
|
},
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
549
|
+
{
|
|
550
|
+
projection: {
|
|
551
|
+
_id: 1,
|
|
552
|
+
estimate_since_compact: 1,
|
|
553
|
+
compacted_state: 1
|
|
554
|
+
},
|
|
555
|
+
sort: {
|
|
556
|
+
_id: 1
|
|
557
|
+
},
|
|
558
|
+
limit: 2000,
|
|
559
|
+
maxTimeMS: MONGO_OPERATION_TIMEOUT_MS
|
|
560
|
+
}
|
|
561
|
+
)
|
|
562
|
+
.toArray();
|
|
563
|
+
if (batch.length == 0) {
|
|
564
|
+
break;
|
|
565
|
+
}
|
|
566
|
+
lastId = batch[batch.length - 1]._id;
|
|
567
|
+
const mapped = batch.map((b) => {
|
|
568
|
+
const updatedCount = b.estimate_since_compact?.count ?? 0;
|
|
569
|
+
const totalCount = (b.compacted_state?.count ?? 0) + updatedCount;
|
|
570
|
+
const updatedBytes = b.estimate_since_compact?.bytes ?? 0;
|
|
571
|
+
const totalBytes = (b.compacted_state?.bytes ?? 0) + updatedBytes;
|
|
572
|
+
const dirtyChangeNumber = totalCount > 0 ? updatedCount / totalCount : 0;
|
|
573
|
+
const dirtyChangeBytes = totalBytes > 0 ? updatedBytes / totalBytes : 0;
|
|
574
|
+
return {
|
|
575
|
+
bucket: b._id.b,
|
|
576
|
+
estimatedCount: totalCount,
|
|
577
|
+
dirtyRatio: Math.max(dirtyChangeNumber, dirtyChangeBytes)
|
|
578
|
+
};
|
|
579
|
+
});
|
|
580
|
+
const filtered = mapped.filter(
|
|
581
|
+
(b) => b.estimatedCount >= options.minBucketChanges && b.dirtyRatio >= options.minChangeRatio
|
|
582
|
+
);
|
|
583
|
+
yield filtered;
|
|
584
|
+
}
|
|
559
585
|
}
|
|
560
586
|
|
|
561
587
|
private async updateChecksumsBatch(buckets: string[]) {
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { SyncConfigWithErrors, HydratedSyncRules, versionedHydrationState } from '@powersync/service-sync-rules';
|
|
2
2
|
|
|
3
3
|
import { storage } from '@powersync/service-core';
|
|
4
4
|
|
|
@@ -7,7 +7,7 @@ export class MongoPersistedSyncRules implements storage.PersistedSyncRules {
|
|
|
7
7
|
|
|
8
8
|
constructor(
|
|
9
9
|
public readonly id: number,
|
|
10
|
-
public readonly sync_rules:
|
|
10
|
+
public readonly sync_rules: SyncConfigWithErrors,
|
|
11
11
|
public readonly checkpoint_lsn: string | null,
|
|
12
12
|
slot_name: string | null
|
|
13
13
|
) {
|
|
@@ -15,6 +15,6 @@ export class MongoPersistedSyncRules implements storage.PersistedSyncRules {
|
|
|
15
15
|
}
|
|
16
16
|
|
|
17
17
|
hydratedSyncRules(): HydratedSyncRules {
|
|
18
|
-
return this.sync_rules.hydrate({ hydrationState: versionedHydrationState(this.id) });
|
|
18
|
+
return this.sync_rules.config.hydrate({ hydrationState: versionedHydrationState(this.id) });
|
|
19
19
|
}
|
|
20
20
|
}
|
|
@@ -15,6 +15,7 @@ import {
|
|
|
15
15
|
InternalOpId,
|
|
16
16
|
internalToExternalOpId,
|
|
17
17
|
maxLsn,
|
|
18
|
+
mergeAsyncIterables,
|
|
18
19
|
PopulateChecksumCacheOptions,
|
|
19
20
|
PopulateChecksumCacheResults,
|
|
20
21
|
ProtocolOpId,
|
|
@@ -694,53 +695,39 @@ export class MongoSyncBucketStorage
|
|
|
694
695
|
* Instance-wide watch on the latest available checkpoint (op_id + lsn).
|
|
695
696
|
*/
|
|
696
697
|
private async *watchActiveCheckpoint(signal: AbortSignal): AsyncIterable<ReplicationCheckpoint> {
|
|
697
|
-
const stream = this.checkpointChangesStream(signal);
|
|
698
|
-
|
|
699
698
|
if (signal.aborted) {
|
|
700
699
|
return;
|
|
701
700
|
}
|
|
702
701
|
|
|
702
|
+
// If the stream is idle, we wait a max of a minute (CHECKPOINT_TIMEOUT_MS) before we get another checkpoint,
|
|
703
|
+
// to avoid stale checkpoint snapshots. This is what checkpointTimeoutStream() is for.
|
|
704
|
+
// Essentially, even if there are no actual checkpoint changes, we want a new snapshotTime every minute or so,
|
|
705
|
+
// to ensure that any new clients connecting will get a valid snapshotTime.
|
|
706
|
+
const stream = mergeAsyncIterables(
|
|
707
|
+
[this.checkpointChangesStream(signal), this.checkpointTimeoutStream(signal)],
|
|
708
|
+
signal
|
|
709
|
+
);
|
|
710
|
+
|
|
703
711
|
// We only watch changes to the active sync rules.
|
|
704
712
|
// If it changes to inactive, we abort and restart with the new sync rules.
|
|
705
|
-
|
|
706
|
-
|
|
707
|
-
//
|
|
708
|
-
|
|
709
|
-
|
|
710
|
-
.setTimeout(CHECKPOINT_TIMEOUT_MS, { done: false }, { signal })
|
|
711
|
-
.catch(() => ({ done: true }));
|
|
712
|
-
try {
|
|
713
|
-
const result = await Promise.race([stream.next(), timeout]);
|
|
714
|
-
if (result.done) {
|
|
715
|
-
break;
|
|
716
|
-
}
|
|
717
|
-
} catch (e) {
|
|
718
|
-
if (e.name == 'AbortError') {
|
|
719
|
-
break;
|
|
720
|
-
}
|
|
721
|
-
throw e;
|
|
722
|
-
}
|
|
723
|
-
|
|
724
|
-
if (signal.aborted) {
|
|
725
|
-
// Would likely have been caught by the signal on the timeout or the upstream stream, but we check here anyway
|
|
726
|
-
break;
|
|
727
|
-
}
|
|
728
|
-
|
|
729
|
-
const op = await this.getCheckpointInternal();
|
|
730
|
-
if (op == null) {
|
|
731
|
-
// Sync rules have changed - abort and restart.
|
|
732
|
-
// We do a soft close of the stream here - no error
|
|
733
|
-
break;
|
|
734
|
-
}
|
|
713
|
+
for await (const _ of stream) {
|
|
714
|
+
if (signal.aborted) {
|
|
715
|
+
// Would likely have been caught by the signal on the timeout or the upstream stream, but we check here anyway
|
|
716
|
+
break;
|
|
717
|
+
}
|
|
735
718
|
|
|
736
|
-
|
|
737
|
-
|
|
738
|
-
//
|
|
739
|
-
//
|
|
740
|
-
|
|
719
|
+
const op = await this.getCheckpointInternal();
|
|
720
|
+
if (op == null) {
|
|
721
|
+
// Sync rules have changed - abort and restart.
|
|
722
|
+
// We do a soft close of the stream here - no error
|
|
723
|
+
break;
|
|
741
724
|
}
|
|
742
|
-
|
|
743
|
-
|
|
725
|
+
|
|
726
|
+
// Previously, we only yielded when the checkpoint or lsn changed.
|
|
727
|
+
// However, we always want to use the latest snapshotTime, so we skip that filtering here.
|
|
728
|
+
// That filtering could be added in the per-user streams if needed, but in general the capped collection
|
|
729
|
+
// should already only contain useful changes in most cases.
|
|
730
|
+
yield op;
|
|
744
731
|
}
|
|
745
732
|
}
|
|
746
733
|
|
|
@@ -900,6 +887,24 @@ export class MongoSyncBucketStorage
|
|
|
900
887
|
}
|
|
901
888
|
}
|
|
902
889
|
|
|
890
|
+
private async *checkpointTimeoutStream(signal: AbortSignal): AsyncGenerator<void> {
|
|
891
|
+
while (!signal.aborted) {
|
|
892
|
+
try {
|
|
893
|
+
await timers.setTimeout(CHECKPOINT_TIMEOUT_MS, undefined, { signal });
|
|
894
|
+
} catch (e) {
|
|
895
|
+
if (e.name == 'AbortError') {
|
|
896
|
+
// This is how we typically abort this stream, when all listeners are done
|
|
897
|
+
return;
|
|
898
|
+
}
|
|
899
|
+
throw e;
|
|
900
|
+
}
|
|
901
|
+
|
|
902
|
+
if (!signal.aborted) {
|
|
903
|
+
yield;
|
|
904
|
+
}
|
|
905
|
+
}
|
|
906
|
+
}
|
|
907
|
+
|
|
903
908
|
private async getDataBucketChanges(
|
|
904
909
|
options: GetCheckpointChangesOptions
|
|
905
910
|
): Promise<Pick<CheckpointChanges, 'updatedDataBuckets' | 'invalidateDataBuckets'>> {
|