@powersync/service-module-mongodb-storage 0.11.0 → 0.12.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. package/CHANGELOG.md +44 -0
  2. package/LICENSE +3 -3
  3. package/dist/storage/implementation/MongoBucketBatch.d.ts +1 -1
  4. package/dist/storage/implementation/MongoBucketBatch.js +7 -4
  5. package/dist/storage/implementation/MongoBucketBatch.js.map +1 -1
  6. package/dist/storage/implementation/MongoChecksums.d.ts +34 -0
  7. package/dist/storage/implementation/MongoChecksums.js +274 -0
  8. package/dist/storage/implementation/MongoChecksums.js.map +1 -0
  9. package/dist/storage/implementation/MongoCompactor.d.ts +16 -2
  10. package/dist/storage/implementation/MongoCompactor.js +204 -51
  11. package/dist/storage/implementation/MongoCompactor.js.map +1 -1
  12. package/dist/storage/implementation/MongoSyncBucketStorage.d.ts +5 -3
  13. package/dist/storage/implementation/MongoSyncBucketStorage.js +29 -70
  14. package/dist/storage/implementation/MongoSyncBucketStorage.js.map +1 -1
  15. package/dist/storage/implementation/PersistedBatch.d.ts +1 -0
  16. package/dist/storage/implementation/PersistedBatch.js +12 -5
  17. package/dist/storage/implementation/PersistedBatch.js.map +1 -1
  18. package/dist/storage/implementation/models.d.ts +14 -0
  19. package/dist/storage/implementation/util.d.ts +1 -1
  20. package/dist/storage/implementation/util.js.map +1 -1
  21. package/package.json +9 -9
  22. package/src/storage/implementation/MongoBucketBatch.ts +8 -6
  23. package/src/storage/implementation/MongoChecksums.ts +320 -0
  24. package/src/storage/implementation/MongoCompactor.ts +239 -48
  25. package/src/storage/implementation/MongoSyncBucketStorage.ts +33 -78
  26. package/src/storage/implementation/PersistedBatch.ts +13 -5
  27. package/src/storage/implementation/models.ts +15 -0
  28. package/src/storage/implementation/util.ts +2 -2
  29. package/test/src/__snapshots__/storage_sync.test.ts.snap +319 -11
  30. package/tsconfig.tsbuildinfo +1 -1
@@ -1,10 +1,12 @@
1
- import { mongo } from '@powersync/lib-service-mongodb';
2
- import { logger, ReplicationAssertionError } from '@powersync/lib-services-framework';
3
- import { InternalOpId, storage, utils } from '@powersync/service-core';
1
+ import { mongo, MONGO_OPERATION_TIMEOUT_MS } from '@powersync/lib-service-mongodb';
2
+ import { logger, ReplicationAssertionError, ServiceAssertionError } from '@powersync/lib-services-framework';
3
+ import { addChecksums, InternalOpId, isPartialChecksum, storage, utils } from '@powersync/service-core';
4
4
 
5
5
  import { PowerSyncMongo } from './db.js';
6
- import { BucketDataDocument, BucketDataKey } from './models.js';
6
+ import { BucketDataDocument, BucketDataKey, BucketStateDocument } from './models.js';
7
+ import { MongoSyncBucketStorage } from './MongoSyncBucketStorage.js';
7
8
  import { cacheKey } from './OperationBatch.js';
9
+ import { readSingleBatch } from './util.js';
8
10
 
9
11
  interface CurrentBucketState {
10
12
  /** Bucket name */
@@ -27,6 +29,21 @@ interface CurrentBucketState {
27
29
  * Number of REMOVE/MOVE operations seen since lastNotPut.
28
30
  */
29
31
  opsSincePut: number;
32
+
33
+ /**
34
+ * Incrementally-updated checksum, up to maxOpId
35
+ */
36
+ checksum: number;
37
+
38
+ /**
39
+ * op count for the checksum
40
+ */
41
+ opCount: number;
42
+
43
+ /**
44
+ * Byte size of ops covered by the checksum.
45
+ */
46
+ opBytes: number;
30
47
  }
31
48
 
32
49
  /**
@@ -43,25 +60,30 @@ const DEFAULT_MEMORY_LIMIT_MB = 64;
43
60
 
44
61
  export class MongoCompactor {
45
62
  private updates: mongo.AnyBulkWriteOperation<BucketDataDocument>[] = [];
63
+ private bucketStateUpdates: mongo.AnyBulkWriteOperation<BucketStateDocument>[] = [];
46
64
 
47
65
  private idLimitBytes: number;
48
66
  private moveBatchLimit: number;
49
67
  private moveBatchQueryLimit: number;
50
68
  private clearBatchLimit: number;
51
- private maxOpId: bigint | undefined;
69
+ private maxOpId: bigint;
52
70
  private buckets: string[] | undefined;
71
+ private signal?: AbortSignal;
72
+ private group_id: number;
53
73
 
54
74
  constructor(
75
+ private storage: MongoSyncBucketStorage,
55
76
  private db: PowerSyncMongo,
56
- private group_id: number,
57
77
  options?: MongoCompactOptions
58
78
  ) {
79
+ this.group_id = storage.group_id;
59
80
  this.idLimitBytes = (options?.memoryLimitMB ?? DEFAULT_MEMORY_LIMIT_MB) * 1024 * 1024;
60
81
  this.moveBatchLimit = options?.moveBatchLimit ?? DEFAULT_MOVE_BATCH_LIMIT;
61
82
  this.moveBatchQueryLimit = options?.moveBatchQueryLimit ?? DEFAULT_MOVE_BATCH_QUERY_LIMIT;
62
83
  this.clearBatchLimit = options?.clearBatchLimit ?? DEFAULT_CLEAR_BATCH_LIMIT;
63
- this.maxOpId = options?.maxOpId;
84
+ this.maxOpId = options?.maxOpId ?? 0n;
64
85
  this.buckets = options?.compactBuckets;
86
+ this.signal = options?.signal;
65
87
  }
66
88
 
67
89
  /**
@@ -117,31 +139,57 @@ export class MongoCompactor {
117
139
  o: new mongo.MaxKey() as any
118
140
  };
119
141
 
120
- while (true) {
142
+ const doneWithBucket = async () => {
143
+ if (currentState == null) {
144
+ return;
145
+ }
146
+ // Free memory before clearing bucket
147
+ currentState.seen.clear();
148
+ if (currentState.lastNotPut != null && currentState.opsSincePut >= 1) {
149
+ logger.info(
150
+ `Inserting CLEAR at ${this.group_id}:${currentState.bucket}:${currentState.lastNotPut} to remove ${currentState.opsSincePut} operations`
151
+ );
152
+ // Need flush() before clear()
153
+ await this.flush();
154
+ await this.clearBucket(currentState);
155
+ }
156
+
157
+ // Do this _after_ clearBucket so that we have accurate counts.
158
+ this.updateBucketChecksums(currentState);
159
+ };
160
+
161
+ while (!this.signal?.aborted) {
121
162
  // Query one batch at a time, to avoid cursor timeouts
122
- const batch = await this.db.bucket_data
123
- .find(
163
+ const cursor = this.db.bucket_data.aggregate<BucketDataDocument & { size: number | bigint }>(
164
+ [
124
165
  {
125
- _id: {
126
- $gte: lowerBound,
127
- $lt: upperBound
166
+ $match: {
167
+ _id: {
168
+ $gte: lowerBound,
169
+ $lt: upperBound
170
+ }
128
171
  }
129
172
  },
173
+ { $sort: { _id: -1 } },
174
+ { $limit: this.moveBatchQueryLimit },
130
175
  {
131
- projection: {
176
+ $project: {
132
177
  _id: 1,
133
178
  op: 1,
134
179
  table: 1,
135
180
  row_id: 1,
136
181
  source_table: 1,
137
- source_key: 1
138
- },
139
- limit: this.moveBatchQueryLimit,
140
- sort: { _id: -1 },
141
- singleBatch: true
182
+ source_key: 1,
183
+ checksum: 1,
184
+ size: { $bsonSize: '$$ROOT' }
185
+ }
142
186
  }
143
- )
144
- .toArray();
187
+ ],
188
+ { batchSize: this.moveBatchQueryLimit }
189
+ );
190
+ // We don't limit to a single batch here, since that often causes MongoDB to scan through more than it returns.
191
+ // Instead, we load up to the limit.
192
+ const batch = await cursor.toArray();
145
193
 
146
194
  if (batch.length == 0) {
147
195
  // We've reached the end
@@ -153,34 +201,31 @@ export class MongoCompactor {
153
201
 
154
202
  for (let doc of batch) {
155
203
  if (currentState == null || doc._id.b != currentState.bucket) {
156
- if (currentState != null && currentState.lastNotPut != null && currentState.opsSincePut >= 1) {
157
- // Important to flush before clearBucket()
158
- await this.flush();
159
- logger.info(
160
- `Inserting CLEAR at ${this.group_id}:${currentState.bucket}:${currentState.lastNotPut} to remove ${currentState.opsSincePut} operations`
161
- );
204
+ await doneWithBucket();
162
205
 
163
- const bucket = currentState.bucket;
164
- const clearOp = currentState.lastNotPut;
165
- // Free memory before clearing bucket
166
- currentState = null;
167
- await this.clearBucket(bucket, clearOp);
168
- }
169
206
  currentState = {
170
207
  bucket: doc._id.b,
171
208
  seen: new Map(),
172
209
  trackingSize: 0,
173
210
  lastNotPut: null,
174
- opsSincePut: 0
211
+ opsSincePut: 0,
212
+
213
+ checksum: 0,
214
+ opCount: 0,
215
+ opBytes: 0
175
216
  };
176
217
  }
177
218
 
178
- if (this.maxOpId != null && doc._id.o > this.maxOpId) {
219
+ if (doc._id.o > this.maxOpId) {
179
220
  continue;
180
221
  }
181
222
 
223
+ currentState.checksum = addChecksums(currentState.checksum, Number(doc.checksum));
224
+ currentState.opCount += 1;
225
+
182
226
  let isPersistentPut = doc.op == 'PUT';
183
227
 
228
+ currentState.opBytes += Number(doc.size);
184
229
  if (doc.op == 'REMOVE' || doc.op == 'PUT') {
185
230
  const key = `${doc.table}/${doc.row_id}/${cacheKey(doc.source_table!, doc.source_key!)}`;
186
231
  const targetOp = currentState.seen.get(key);
@@ -208,6 +253,8 @@ export class MongoCompactor {
208
253
  }
209
254
  }
210
255
  });
256
+
257
+ currentState.opBytes += 200 - Number(doc.size); // TODO: better estimate for this
211
258
  } else {
212
259
  if (currentState.trackingSize >= idLimitBytes) {
213
260
  // Reached memory limit.
@@ -234,24 +281,65 @@ export class MongoCompactor {
234
281
  currentState.opsSincePut += 1;
235
282
  }
236
283
 
237
- if (this.updates.length >= this.moveBatchLimit) {
284
+ if (this.updates.length + this.bucketStateUpdates.length >= this.moveBatchLimit) {
238
285
  await this.flush();
239
286
  }
240
287
  }
288
+
289
+ if (currentState != null) {
290
+ logger.info(`Processed batch of length ${batch.length} current bucket: ${currentState.bucket}`);
291
+ }
241
292
  }
242
293
 
294
+ await doneWithBucket();
295
+
296
+ // Need another flush after updateBucketChecksums()
243
297
  await this.flush();
244
- currentState?.seen.clear();
245
- if (currentState?.lastNotPut != null && currentState?.opsSincePut > 1) {
246
- logger.info(
247
- `Inserting CLEAR at ${this.group_id}:${currentState.bucket}:${currentState.lastNotPut} to remove ${currentState.opsSincePut} operations`
298
+ }
299
+
300
+ /**
301
+ * Call when done with a bucket.
302
+ */
303
+ private updateBucketChecksums(state: CurrentBucketState) {
304
+ if (state.opCount < 0) {
305
+ throw new ServiceAssertionError(
306
+ `Invalid opCount: ${state.opCount} checksum ${state.checksum} opsSincePut: ${state.opsSincePut} maxOpId: ${this.maxOpId}`
248
307
  );
249
- const bucket = currentState.bucket;
250
- const clearOp = currentState.lastNotPut;
251
- // Free memory before clearing bucket
252
- currentState = null;
253
- await this.clearBucket(bucket, clearOp);
254
308
  }
309
+ this.bucketStateUpdates.push({
310
+ updateOne: {
311
+ filter: {
312
+ _id: {
313
+ g: this.group_id,
314
+ b: state.bucket
315
+ }
316
+ },
317
+ update: {
318
+ $set: {
319
+ compacted_state: {
320
+ op_id: this.maxOpId,
321
+ count: state.opCount,
322
+ checksum: BigInt(state.checksum),
323
+ bytes: state.opBytes
324
+ },
325
+ estimate_since_compact: {
326
+ // Note: There could have been a whole bunch of new operations added to the bucket _while_ compacting,
327
+ // which we don't currently cater for.
328
+ // We could potentially query for that, but that could add overhead.
329
+ count: 0,
330
+ bytes: 0
331
+ }
332
+ },
333
+ $setOnInsert: {
334
+ // Only set this if we're creating the document.
335
+ // In all other cases, the replication process will have a set a more accurate id.
336
+ last_op: this.maxOpId
337
+ }
338
+ },
339
+ // We generally expect this to have been created before, but do handle cases of old unchanged buckets
340
+ upsert: true
341
+ }
342
+ });
255
343
  }
256
344
 
257
345
  private async flush() {
@@ -266,15 +354,26 @@ export class MongoCompactor {
266
354
  });
267
355
  this.updates = [];
268
356
  }
357
+ if (this.bucketStateUpdates.length > 0) {
358
+ logger.info(`Updating ${this.bucketStateUpdates.length} bucket states`);
359
+ await this.db.bucket_state.bulkWrite(this.bucketStateUpdates, {
360
+ ordered: false
361
+ });
362
+ this.bucketStateUpdates = [];
363
+ }
269
364
  }
270
365
 
271
366
  /**
272
367
  * Perform a CLEAR compact for a bucket.
273
368
  *
369
+ *
274
370
  * @param bucket bucket name
275
371
  * @param op op_id of the last non-PUT operation, which will be converted to CLEAR.
276
372
  */
277
- private async clearBucket(bucket: string, op: InternalOpId) {
373
+ private async clearBucket(currentState: CurrentBucketState) {
374
+ const bucket = currentState.bucket;
375
+ const clearOp = currentState.lastNotPut!;
376
+
278
377
  const opFilter = {
279
378
  _id: {
280
379
  $gte: {
@@ -285,7 +384,7 @@ export class MongoCompactor {
285
384
  $lte: {
286
385
  g: this.group_id,
287
386
  b: bucket,
288
- o: op
387
+ o: clearOp
289
388
  }
290
389
  }
291
390
  };
@@ -293,7 +392,8 @@ export class MongoCompactor {
293
392
  const session = this.db.client.startSession();
294
393
  try {
295
394
  let done = false;
296
- while (!done) {
395
+ while (!done && !this.signal?.aborted) {
396
+ let opCountDiff = 0;
297
397
  // Do the CLEAR operation in batches, with each batch a separate transaction.
298
398
  // The state after each batch is fully consistent.
299
399
  // We need a transaction per batch to make sure checksums stay consistent.
@@ -364,15 +464,106 @@ export class MongoCompactor {
364
464
  },
365
465
  { session }
366
466
  );
467
+
468
+ opCountDiff = -numberOfOpsToClear + 1;
367
469
  },
368
470
  {
369
471
  writeConcern: { w: 'majority' },
370
472
  readConcern: { level: 'snapshot' }
371
473
  }
372
474
  );
475
+ // Update _outside_ the transaction, since the transaction can be retried multiple times.
476
+ currentState.opCount += opCountDiff;
373
477
  }
374
478
  } finally {
375
479
  await session.endSession();
376
480
  }
377
481
  }
482
+
483
+ /**
484
+ * Subset of compact, only populating checksums where relevant.
485
+ */
486
+ async populateChecksums() {
487
+ let lastId: BucketStateDocument['_id'] | null = null;
488
+ while (!this.signal?.aborted) {
489
+ // By filtering buckets, we effectively make this "resumeable".
490
+ let filter: mongo.Filter<BucketStateDocument> = {
491
+ compacted_state: { $exists: false }
492
+ };
493
+ if (lastId) {
494
+ filter._id = { $gt: lastId };
495
+ }
496
+
497
+ const bucketsWithoutChecksums = await this.db.bucket_state
498
+ .find(filter, {
499
+ projection: {
500
+ _id: 1
501
+ },
502
+ sort: {
503
+ _id: 1
504
+ },
505
+ limit: 5_000,
506
+ maxTimeMS: MONGO_OPERATION_TIMEOUT_MS
507
+ })
508
+ .toArray();
509
+ if (bucketsWithoutChecksums.length == 0) {
510
+ // All done
511
+ break;
512
+ }
513
+
514
+ logger.info(`Calculating checksums for batch of ${bucketsWithoutChecksums.length} buckets`);
515
+
516
+ await this.updateChecksumsBatch(bucketsWithoutChecksums.map((b) => b._id.b));
517
+
518
+ lastId = bucketsWithoutChecksums[bucketsWithoutChecksums.length - 1]._id;
519
+ }
520
+ }
521
+
522
+ private async updateChecksumsBatch(buckets: string[]) {
523
+ const checksums = await this.storage.checksums.queryPartialChecksums(
524
+ buckets.map((bucket) => {
525
+ return {
526
+ bucket,
527
+ end: this.maxOpId
528
+ };
529
+ })
530
+ );
531
+
532
+ for (let bucketChecksum of checksums.values()) {
533
+ if (isPartialChecksum(bucketChecksum)) {
534
+ // Should never happen since we don't specify `start`
535
+ throw new ServiceAssertionError(`Full checksum expected, got ${JSON.stringify(bucketChecksum)}`);
536
+ }
537
+
538
+ this.bucketStateUpdates.push({
539
+ updateOne: {
540
+ filter: {
541
+ _id: {
542
+ g: this.group_id,
543
+ b: bucketChecksum.bucket
544
+ }
545
+ },
546
+ update: {
547
+ $set: {
548
+ compacted_state: {
549
+ op_id: this.maxOpId,
550
+ count: bucketChecksum.count,
551
+ checksum: BigInt(bucketChecksum.checksum),
552
+ bytes: null
553
+ }
554
+ },
555
+ $setOnInsert: {
556
+ // Only set this if we're creating the document.
557
+ // In all other cases, the replication process will have a set a more accurate id.
558
+ last_op: this.maxOpId
559
+ }
560
+ },
561
+ // We generally expect this to have been created before, but do handle cases of old unchanged buckets
562
+ upsert: true
563
+ }
564
+ });
565
+ }
566
+
567
+ await this.flush();
568
+ }
378
569
  }
@@ -10,6 +10,7 @@ import {
10
10
  BroadcastIterable,
11
11
  CHECKPOINT_INVALIDATE_ALL,
12
12
  CheckpointChanges,
13
+ CompactOptions,
13
14
  deserializeParameterLookup,
14
15
  GetCheckpointChangesOptions,
15
16
  InternalOpId,
@@ -30,21 +31,18 @@ import { MongoBucketStorage } from '../MongoBucketStorage.js';
30
31
  import { PowerSyncMongo } from './db.js';
31
32
  import { BucketDataDocument, BucketDataKey, BucketStateDocument, SourceKey, SourceTableDocument } from './models.js';
32
33
  import { MongoBucketBatch } from './MongoBucketBatch.js';
34
+ import { MongoChecksums } from './MongoChecksums.js';
33
35
  import { MongoCompactor } from './MongoCompactor.js';
36
+ import { MongoParameterCompactor } from './MongoParameterCompactor.js';
34
37
  import { MongoWriteCheckpointAPI } from './MongoWriteCheckpointAPI.js';
35
38
  import { idPrefixFilter, mapOpEntry, readSingleBatch, setSessionSnapshotTime } from './util.js';
36
- import { MongoParameterCompactor } from './MongoParameterCompactor.js';
37
39
 
38
40
  export class MongoSyncBucketStorage
39
41
  extends BaseObserver<storage.SyncRulesBucketStorageListener>
40
42
  implements storage.SyncRulesBucketStorage
41
43
  {
42
44
  private readonly db: PowerSyncMongo;
43
- private checksumCache = new storage.ChecksumCache({
44
- fetchChecksums: (batch) => {
45
- return this.getChecksumsInternal(batch);
46
- }
47
- });
45
+ readonly checksums: MongoChecksums;
48
46
 
49
47
  private parsedSyncRulesCache: { parsed: SqlSyncRules; options: storage.ParseSyncRulesOptions } | undefined;
50
48
  private writeCheckpointAPI: MongoWriteCheckpointAPI;
@@ -58,6 +56,7 @@ export class MongoSyncBucketStorage
58
56
  ) {
59
57
  super();
60
58
  this.db = factory.db;
59
+ this.checksums = new MongoChecksums(this.db, this.group_id);
61
60
  this.writeCheckpointAPI = new MongoWriteCheckpointAPI({
62
61
  db: this.db,
63
62
  mode: writeCheckpointMode,
@@ -487,76 +486,11 @@ export class MongoSyncBucketStorage
487
486
  }
488
487
 
489
488
  async getChecksums(checkpoint: utils.InternalOpId, buckets: string[]): Promise<utils.ChecksumMap> {
490
- return this.checksumCache.getChecksumMap(checkpoint, buckets);
489
+ return this.checksums.getChecksums(checkpoint, buckets);
491
490
  }
492
491
 
493
- private async getChecksumsInternal(batch: storage.FetchPartialBucketChecksum[]): Promise<storage.PartialChecksumMap> {
494
- if (batch.length == 0) {
495
- return new Map();
496
- }
497
-
498
- const filters: any[] = [];
499
- for (let request of batch) {
500
- filters.push({
501
- _id: {
502
- $gt: {
503
- g: this.group_id,
504
- b: request.bucket,
505
- o: request.start ? BigInt(request.start) : new bson.MinKey()
506
- },
507
- $lte: {
508
- g: this.group_id,
509
- b: request.bucket,
510
- o: BigInt(request.end)
511
- }
512
- }
513
- });
514
- }
515
-
516
- const aggregate = await this.db.bucket_data
517
- .aggregate(
518
- [
519
- {
520
- $match: {
521
- $or: filters
522
- }
523
- },
524
- {
525
- $group: {
526
- _id: '$_id.b',
527
- // Historically, checksum may be stored as 'int' or 'double'.
528
- // More recently, this should be a 'long'.
529
- // $toLong ensures that we always sum it as a long, avoiding inaccuracies in the calculations.
530
- checksum_total: { $sum: { $toLong: '$checksum' } },
531
- count: { $sum: 1 },
532
- has_clear_op: {
533
- $max: {
534
- $cond: [{ $eq: ['$op', 'CLEAR'] }, 1, 0]
535
- }
536
- }
537
- }
538
- }
539
- ],
540
- { session: undefined, readConcern: 'snapshot', maxTimeMS: lib_mongo.db.MONGO_OPERATION_TIMEOUT_MS }
541
- )
542
- .toArray()
543
- .catch((e) => {
544
- throw lib_mongo.mapQueryError(e, 'while reading checksums');
545
- });
546
-
547
- return new Map<string, storage.PartialChecksum>(
548
- aggregate.map((doc) => {
549
- return [
550
- doc._id,
551
- {
552
- bucket: doc._id,
553
- partialCount: doc.count,
554
- partialChecksum: Number(BigInt(doc.checksum_total) & 0xffffffffn) & 0xffffffff,
555
- isFullChecksum: doc.has_clear_op == 1
556
- } satisfies storage.PartialChecksum
557
- ];
558
- })
559
- );
492
+ clearChecksumCache() {
493
+ this.checksums.clearCache();
560
494
  }
561
495
 
562
496
  async terminate(options?: storage.TerminateOptions) {
@@ -701,13 +635,34 @@ export class MongoSyncBucketStorage
701
635
  }
702
636
 
703
637
  async compact(options?: storage.CompactOptions) {
704
- const checkpoint = await this.getCheckpointInternal();
705
- await new MongoCompactor(this.db, this.group_id, options).compact();
706
- if (checkpoint != null && options?.compactParameterData) {
707
- await new MongoParameterCompactor(this.db, this.group_id, checkpoint.checkpoint, options).compact();
638
+ let maxOpId = options?.maxOpId;
639
+ if (maxOpId == null) {
640
+ const checkpoint = await this.getCheckpointInternal();
641
+ maxOpId = checkpoint?.checkpoint ?? undefined;
642
+ }
643
+ await new MongoCompactor(this, this.db, { ...options, maxOpId }).compact();
644
+
645
+ if (maxOpId != null && options?.compactParameterData) {
646
+ await new MongoParameterCompactor(this.db, this.group_id, maxOpId, options).compact();
708
647
  }
709
648
  }
710
649
 
650
+ async populatePersistentChecksumCache(options: Required<Pick<CompactOptions, 'signal' | 'maxOpId'>>): Promise<void> {
651
+ logger.info(`Populating persistent checksum cache...`);
652
+ const start = Date.now();
653
+ // We do a minimal compact here.
654
+ // We can optimize this in the future.
655
+ const compactor = new MongoCompactor(this, this.db, {
656
+ ...options,
657
+ // Don't track updates for MOVE compacting
658
+ memoryLimitMB: 0
659
+ });
660
+
661
+ await compactor.populateChecksums();
662
+ const duration = Date.now() - start;
663
+ logger.info(`Populated persistent checksum cache in ${(duration / 1000).toFixed(1)}s`);
664
+ }
665
+
711
666
  /**
712
667
  * Instance-wide watch on the latest available checkpoint (op_id + lsn).
713
668
  */
@@ -71,15 +71,17 @@ export class PersistedBatch {
71
71
  this.logger = options?.logger ?? defaultLogger;
72
72
  }
73
73
 
74
- private incrementBucket(bucket: string, op_id: InternalOpId) {
74
+ private incrementBucket(bucket: string, op_id: InternalOpId, bytes: number) {
75
75
  let existingState = this.bucketStates.get(bucket);
76
76
  if (existingState) {
77
77
  existingState.lastOp = op_id;
78
78
  existingState.incrementCount += 1;
79
+ existingState.incrementBytes += bytes;
79
80
  } else {
80
81
  this.bucketStates.set(bucket, {
81
82
  lastOp: op_id,
82
- incrementCount: 1
83
+ incrementCount: 1,
84
+ incrementBytes: bytes
83
85
  });
84
86
  }
85
87
  }
@@ -115,7 +117,8 @@ export class PersistedBatch {
115
117
  }
116
118
 
117
119
  remaining_buckets.delete(key);
118
- this.currentSize += recordData.length + 200;
120
+ const byteEstimate = recordData.length + 200;
121
+ this.currentSize += byteEstimate;
119
122
 
120
123
  const op_id = options.op_seq.next();
121
124
  this.debugLastOpId = op_id;
@@ -138,7 +141,7 @@ export class PersistedBatch {
138
141
  }
139
142
  }
140
143
  });
141
- this.incrementBucket(k.bucket, op_id);
144
+ this.incrementBucket(k.bucket, op_id, byteEstimate);
142
145
  }
143
146
 
144
147
  for (let bd of remaining_buckets.values()) {
@@ -166,7 +169,7 @@ export class PersistedBatch {
166
169
  }
167
170
  });
168
171
  this.currentSize += 200;
169
- this.incrementBucket(bd.bucket, op_id);
172
+ this.incrementBucket(bd.bucket, op_id, 200);
170
173
  }
171
174
  }
172
175
 
@@ -369,6 +372,10 @@ export class PersistedBatch {
369
372
  update: {
370
373
  $set: {
371
374
  last_op: state.lastOp
375
+ },
376
+ $inc: {
377
+ 'estimate_since_compact.count': state.incrementCount,
378
+ 'estimate_since_compact.bytes': state.incrementBytes
372
379
  }
373
380
  },
374
381
  upsert: true
@@ -381,4 +388,5 @@ export class PersistedBatch {
381
388
  interface BucketStateUpdate {
382
389
  lastOp: InternalOpId;
383
390
  incrementCount: number;
391
+ incrementBytes: number;
384
392
  }
@@ -98,6 +98,21 @@ export interface BucketStateDocument {
98
98
  b: string;
99
99
  };
100
100
  last_op: bigint;
101
+ /**
102
+ * If set, this can be treated as "cache" of a checksum at a specific point.
103
+ * Can be updated periodically, for example by the compact job.
104
+ */
105
+ compacted_state?: {
106
+ op_id: InternalOpId;
107
+ count: number;
108
+ checksum: bigint;
109
+ bytes: number | null;
110
+ };
111
+
112
+ estimate_since_compact?: {
113
+ count: number;
114
+ bytes: number;
115
+ };
101
116
  }
102
117
 
103
118
  export interface IdSequenceDocument {
@@ -3,7 +3,7 @@ import * as crypto from 'crypto';
3
3
  import * as uuid from 'uuid';
4
4
 
5
5
  import { mongo } from '@powersync/lib-service-mongodb';
6
- import { storage, utils } from '@powersync/service-core';
6
+ import { BucketChecksum, PartialChecksum, PartialOrFullChecksum, storage, utils } from '@powersync/service-core';
7
7
 
8
8
  import { PowerSyncMongo } from './db.js';
9
9
  import { BucketDataDocument } from './models.js';
@@ -43,7 +43,7 @@ export function generateSlotName(prefix: string, sync_rules_id: number) {
43
43
  *
44
44
  * For this to be effective, set batchSize = limit in the find command.
45
45
  */
46
- export async function readSingleBatch<T>(cursor: mongo.FindCursor<T>): Promise<{ data: T[]; hasMore: boolean }> {
46
+ export async function readSingleBatch<T>(cursor: mongo.AbstractCursor<T>): Promise<{ data: T[]; hasMore: boolean }> {
47
47
  try {
48
48
  let data: T[];
49
49
  let hasMore = true;