@powersync/service-module-mongodb-storage 0.12.0 → 0.12.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -7,9 +7,7 @@ import {
7
7
  ServiceAssertionError
8
8
  } from '@powersync/lib-services-framework';
9
9
  import {
10
- addPartialChecksums,
11
10
  BroadcastIterable,
12
- BucketChecksum,
13
11
  CHECKPOINT_INVALIDATE_ALL,
14
12
  CheckpointChanges,
15
13
  CompactOptions,
@@ -18,7 +16,6 @@ import {
18
16
  InternalOpId,
19
17
  internalToExternalOpId,
20
18
  maxLsn,
21
- PartialChecksum,
22
19
  ProtocolOpId,
23
20
  ReplicationCheckpoint,
24
21
  storage,
@@ -34,6 +31,7 @@ import { MongoBucketStorage } from '../MongoBucketStorage.js';
34
31
  import { PowerSyncMongo } from './db.js';
35
32
  import { BucketDataDocument, BucketDataKey, BucketStateDocument, SourceKey, SourceTableDocument } from './models.js';
36
33
  import { MongoBucketBatch } from './MongoBucketBatch.js';
34
+ import { MongoChecksums } from './MongoChecksums.js';
37
35
  import { MongoCompactor } from './MongoCompactor.js';
38
36
  import { MongoParameterCompactor } from './MongoParameterCompactor.js';
39
37
  import { MongoWriteCheckpointAPI } from './MongoWriteCheckpointAPI.js';
@@ -44,11 +42,7 @@ export class MongoSyncBucketStorage
44
42
  implements storage.SyncRulesBucketStorage
45
43
  {
46
44
  private readonly db: PowerSyncMongo;
47
- private checksumCache = new storage.ChecksumCache({
48
- fetchChecksums: (batch) => {
49
- return this.getChecksumsInternal(batch);
50
- }
51
- });
45
+ readonly checksums: MongoChecksums;
52
46
 
53
47
  private parsedSyncRulesCache: { parsed: SqlSyncRules; options: storage.ParseSyncRulesOptions } | undefined;
54
48
  private writeCheckpointAPI: MongoWriteCheckpointAPI;
@@ -62,6 +56,7 @@ export class MongoSyncBucketStorage
62
56
  ) {
63
57
  super();
64
58
  this.db = factory.db;
59
+ this.checksums = new MongoChecksums(this.db, this.group_id);
65
60
  this.writeCheckpointAPI = new MongoWriteCheckpointAPI({
66
61
  db: this.db,
67
62
  mode: writeCheckpointMode,
@@ -491,145 +486,11 @@ export class MongoSyncBucketStorage
491
486
  }
492
487
 
493
488
  async getChecksums(checkpoint: utils.InternalOpId, buckets: string[]): Promise<utils.ChecksumMap> {
494
- return this.checksumCache.getChecksumMap(checkpoint, buckets);
489
+ return this.checksums.getChecksums(checkpoint, buckets);
495
490
  }
496
491
 
497
492
  clearChecksumCache() {
498
- this.checksumCache.clear();
499
- }
500
-
501
- private async getChecksumsInternal(batch: storage.FetchPartialBucketChecksum[]): Promise<storage.PartialChecksumMap> {
502
- if (batch.length == 0) {
503
- return new Map();
504
- }
505
-
506
- const preFilters: any[] = [];
507
- for (let request of batch) {
508
- if (request.start == null) {
509
- preFilters.push({
510
- _id: {
511
- g: this.group_id,
512
- b: request.bucket
513
- },
514
- 'compacted_state.op_id': { $exists: true, $lte: request.end }
515
- });
516
- }
517
- }
518
-
519
- const preStates = new Map<string, { opId: InternalOpId; checksum: BucketChecksum }>();
520
-
521
- if (preFilters.length > 0) {
522
- // For un-cached bucket checksums, attempt to use the compacted state first.
523
- const states = await this.db.bucket_state
524
- .find({
525
- $or: preFilters
526
- })
527
- .toArray();
528
- for (let state of states) {
529
- const compactedState = state.compacted_state!;
530
- preStates.set(state._id.b, {
531
- opId: compactedState.op_id,
532
- checksum: {
533
- bucket: state._id.b,
534
- checksum: Number(compactedState.checksum),
535
- count: compactedState.count
536
- }
537
- });
538
- }
539
- }
540
-
541
- const filters: any[] = [];
542
- for (let request of batch) {
543
- let start = request.start;
544
- if (start == null) {
545
- const preState = preStates.get(request.bucket);
546
- if (preState != null) {
547
- start = preState.opId;
548
- }
549
- }
550
-
551
- filters.push({
552
- _id: {
553
- $gt: {
554
- g: this.group_id,
555
- b: request.bucket,
556
- o: start ?? new bson.MinKey()
557
- },
558
- $lte: {
559
- g: this.group_id,
560
- b: request.bucket,
561
- o: request.end
562
- }
563
- }
564
- });
565
- }
566
-
567
- const aggregate = await this.db.bucket_data
568
- .aggregate(
569
- [
570
- {
571
- $match: {
572
- $or: filters
573
- }
574
- },
575
- {
576
- $group: {
577
- _id: '$_id.b',
578
- // Historically, checksum may be stored as 'int' or 'double'.
579
- // More recently, this should be a 'long'.
580
- // $toLong ensures that we always sum it as a long, avoiding inaccuracies in the calculations.
581
- checksum_total: { $sum: { $toLong: '$checksum' } },
582
- count: { $sum: 1 },
583
- has_clear_op: {
584
- $max: {
585
- $cond: [{ $eq: ['$op', 'CLEAR'] }, 1, 0]
586
- }
587
- }
588
- }
589
- }
590
- ],
591
- { session: undefined, readConcern: 'snapshot', maxTimeMS: lib_mongo.db.MONGO_CHECKSUM_TIMEOUT_MS }
592
- )
593
- .toArray()
594
- .catch((e) => {
595
- throw lib_mongo.mapQueryError(e, 'while reading checksums');
596
- });
597
-
598
- const partialChecksums = new Map<string, storage.PartialOrFullChecksum>(
599
- aggregate.map((doc) => {
600
- const partialChecksum = Number(BigInt(doc.checksum_total) & 0xffffffffn) & 0xffffffff;
601
- const bucket = doc._id;
602
- return [
603
- bucket,
604
- doc.has_clear_op == 1
605
- ? ({
606
- // full checksum - replaces any previous one
607
- bucket,
608
- checksum: partialChecksum,
609
- count: doc.count
610
- } satisfies BucketChecksum)
611
- : ({
612
- // partial checksum - is added to a previous one
613
- bucket,
614
- partialCount: doc.count,
615
- partialChecksum
616
- } satisfies PartialChecksum)
617
- ];
618
- })
619
- );
620
-
621
- return new Map<string, storage.PartialOrFullChecksum>(
622
- batch.map((request) => {
623
- const bucket = request.bucket;
624
- // Could be null if this is either (1) a partial request, or (2) no compacted checksum was available
625
- const preState = preStates.get(bucket);
626
- // Could be null if we got no data
627
- const partialChecksum = partialChecksums.get(bucket);
628
- const merged = addPartialChecksums(bucket, preState?.checksum ?? null, partialChecksum ?? null);
629
-
630
- return [bucket, merged];
631
- })
632
- );
493
+ this.checksums.clearCache();
633
494
  }
634
495
 
635
496
  async terminate(options?: storage.TerminateOptions) {
@@ -779,22 +640,25 @@ export class MongoSyncBucketStorage
779
640
  const checkpoint = await this.getCheckpointInternal();
780
641
  maxOpId = checkpoint?.checkpoint ?? undefined;
781
642
  }
782
- await new MongoCompactor(this.db, this.group_id, { ...options, maxOpId }).compact();
643
+ await new MongoCompactor(this, this.db, { ...options, maxOpId }).compact();
644
+
783
645
  if (maxOpId != null && options?.compactParameterData) {
784
646
  await new MongoParameterCompactor(this.db, this.group_id, maxOpId, options).compact();
785
647
  }
786
648
  }
787
649
 
788
- async populatePersistentChecksumCache(options: Pick<CompactOptions, 'signal' | 'maxOpId'>): Promise<void> {
650
+ async populatePersistentChecksumCache(options: Required<Pick<CompactOptions, 'signal' | 'maxOpId'>>): Promise<void> {
651
+ logger.info(`Populating persistent checksum cache...`);
789
652
  const start = Date.now();
790
- // We do a minimal compact, primarily to populate the checksum cache
791
- await this.compact({
653
+ // We do a minimal compact here.
654
+ // We can optimize this in the future.
655
+ const compactor = new MongoCompactor(this, this.db, {
792
656
  ...options,
793
- // Skip parameter data
794
- compactParameterData: false,
795
657
  // Don't track updates for MOVE compacting
796
658
  memoryLimitMB: 0
797
659
  });
660
+
661
+ await compactor.populateChecksums();
798
662
  const duration = Date.now() - start;
799
663
  logger.info(`Populated persistent checksum cache in ${(duration / 1000).toFixed(1)}s`);
800
664
  }
@@ -106,7 +106,7 @@ export interface BucketStateDocument {
106
106
  op_id: InternalOpId;
107
107
  count: number;
108
108
  checksum: bigint;
109
- bytes: number;
109
+ bytes: number | null;
110
110
  };
111
111
 
112
112
  estimate_since_compact?: {
@@ -3,7 +3,7 @@ import * as crypto from 'crypto';
3
3
  import * as uuid from 'uuid';
4
4
 
5
5
  import { mongo } from '@powersync/lib-service-mongodb';
6
- import { storage, utils } from '@powersync/service-core';
6
+ import { BucketChecksum, PartialChecksum, PartialOrFullChecksum, storage, utils } from '@powersync/service-core';
7
7
 
8
8
  import { PowerSyncMongo } from './db.js';
9
9
  import { BucketDataDocument } from './models.js';