@powersync/service-module-mongodb-storage 0.12.1 → 0.12.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. package/CHANGELOG.md +21 -0
  2. package/dist/migrations/db/migrations/1741697235857-bucket-state-index.js +1 -4
  3. package/dist/migrations/db/migrations/1741697235857-bucket-state-index.js.map +1 -1
  4. package/dist/storage/MongoBucketStorage.d.ts +3 -2
  5. package/dist/storage/MongoBucketStorage.js +4 -2
  6. package/dist/storage/MongoBucketStorage.js.map +1 -1
  7. package/dist/storage/implementation/MongoChecksums.d.ts +45 -13
  8. package/dist/storage/implementation/MongoChecksums.js +151 -136
  9. package/dist/storage/implementation/MongoChecksums.js.map +1 -1
  10. package/dist/storage/implementation/MongoCompactor.js +23 -21
  11. package/dist/storage/implementation/MongoCompactor.js.map +1 -1
  12. package/dist/storage/implementation/MongoSyncBucketStorage.d.ts +5 -2
  13. package/dist/storage/implementation/MongoSyncBucketStorage.js +3 -3
  14. package/dist/storage/implementation/MongoSyncBucketStorage.js.map +1 -1
  15. package/dist/storage/implementation/MongoTestStorageFactoryGenerator.d.ts +2 -0
  16. package/dist/storage/implementation/MongoTestStorageFactoryGenerator.js +4 -3
  17. package/dist/storage/implementation/MongoTestStorageFactoryGenerator.js.map +1 -1
  18. package/dist/storage/implementation/db.d.ts +4 -0
  19. package/dist/storage/implementation/db.js +10 -0
  20. package/dist/storage/implementation/db.js.map +1 -1
  21. package/dist/storage/implementation/models.d.ts +4 -0
  22. package/package.json +6 -6
  23. package/src/migrations/db/migrations/1741697235857-bucket-state-index.ts +1 -7
  24. package/src/storage/MongoBucketStorage.ts +4 -3
  25. package/src/storage/implementation/MongoChecksums.ts +174 -150
  26. package/src/storage/implementation/MongoCompactor.ts +23 -22
  27. package/src/storage/implementation/MongoSyncBucketStorage.ts +9 -4
  28. package/src/storage/implementation/MongoTestStorageFactoryGenerator.ts +7 -4
  29. package/src/storage/implementation/db.ts +14 -0
  30. package/src/storage/implementation/models.ts +4 -0
  31. package/test/src/__snapshots__/storage.test.ts.snap +17 -1
  32. package/test/src/storage.test.ts +86 -1
  33. package/test/src/storage_compacting.test.ts +120 -5
  34. package/tsconfig.tsbuildinfo +1 -1
@@ -1,3 +1,4 @@
1
+ import * as lib_mongo from '@powersync/lib-service-mongodb';
1
2
  import {
2
3
  addPartialChecksums,
3
4
  bson,
@@ -11,27 +12,51 @@ import {
11
12
  PartialChecksumMap,
12
13
  PartialOrFullChecksum
13
14
  } from '@powersync/service-core';
14
- import * as lib_mongo from '@powersync/lib-service-mongodb';
15
- import { logger } from '@powersync/lib-services-framework';
16
15
  import { PowerSyncMongo } from './db.js';
17
16
 
17
+ /**
18
+ * Checksum calculation options, primarily for tests.
19
+ */
20
+ export interface MongoChecksumOptions {
21
+ /**
22
+ * How many buckets to process in a batch when calculating checksums.
23
+ */
24
+ bucketBatchLimit?: number;
25
+
26
+ /**
27
+ * Limit on the number of documents to calculate a checksum on at a time.
28
+ */
29
+ operationBatchLimit?: number;
30
+ }
31
+
32
+ const DEFAULT_BUCKET_BATCH_LIMIT = 200;
33
+ const DEFAULT_OPERATION_BATCH_LIMIT = 50_000;
34
+
18
35
  /**
19
36
  * Checksum query implementation.
37
+ *
38
+ * General implementation flow is:
39
+ * 1. getChecksums() -> check cache for (partial) matches. If not found or partial match, query the remainder using computePartialChecksums().
40
+ * 2. computePartialChecksums() -> query bucket_state for partial matches. Query the remainder using computePartialChecksumsDirect().
41
+ * 3. computePartialChecksumsDirect() -> split into batches of 200 buckets at a time -> computePartialChecksumsInternal()
42
+ * 4. computePartialChecksumsInternal() -> aggregate over 50_000 operations in bucket_data at a time
20
43
  */
21
44
  export class MongoChecksums {
22
45
  private cache = new ChecksumCache({
23
46
  fetchChecksums: (batch) => {
24
- return this.getChecksumsInternal(batch);
47
+ return this.computePartialChecksums(batch);
25
48
  }
26
49
  });
27
50
 
28
51
  constructor(
29
52
  private db: PowerSyncMongo,
30
- private group_id: number
53
+ private group_id: number,
54
+ private options?: MongoChecksumOptions
31
55
  ) {}
32
56
 
33
57
  /**
34
- * Calculate checksums, utilizing the cache.
58
+ * Calculate checksums, utilizing the cache for partial checkums, and querying the remainder from
59
+ * the database (bucket_state + bucket_data).
35
60
  */
36
61
  async getChecksums(checkpoint: InternalOpId, buckets: string[]): Promise<ChecksumMap> {
37
62
  return this.cache.getChecksumMap(checkpoint, buckets);
@@ -42,11 +67,15 @@ export class MongoChecksums {
42
67
  }
43
68
 
44
69
  /**
45
- * Calculate (partial) checksums from bucket_state and the data collection.
70
+ * Calculate (partial) checksums from bucket_state (pre-aggregated) and bucket_data (individual operations).
71
+ *
72
+ * Results are not cached here. This method is only called by {@link ChecksumCache.getChecksumMap},
73
+ * which is responsible for caching its result.
46
74
  *
47
- * Results are not cached.
75
+ * As long as data is compacted regularly, this should be fast. Large buckets without pre-compacted bucket_state
76
+ * can be slow.
48
77
  */
49
- private async getChecksumsInternal(batch: FetchPartialBucketChecksum[]): Promise<PartialChecksumMap> {
78
+ private async computePartialChecksums(batch: FetchPartialBucketChecksum[]): Promise<PartialChecksumMap> {
50
79
  if (batch.length == 0) {
51
80
  return new Map();
52
81
  }
@@ -100,7 +129,7 @@ export class MongoChecksums {
100
129
  };
101
130
  });
102
131
 
103
- const queriedChecksums = await this.queryPartialChecksums(mappedRequests);
132
+ const queriedChecksums = await this.computePartialChecksumsDirect(mappedRequests);
104
133
 
105
134
  return new Map<string, PartialOrFullChecksum>(
106
135
  batch.map((request) => {
@@ -117,61 +146,152 @@ export class MongoChecksums {
117
146
  }
118
147
 
119
148
  /**
120
- * Calculate (partial) checksums from the data collection directly.
149
+ * Calculate (partial) checksums from the data collection directly, bypassing the cache and bucket_state.
150
+ *
151
+ * Can be used directly in cases where the cache should be bypassed, such as from a compact job.
152
+ *
153
+ * Internally, we do calculations in smaller batches of buckets as appropriate.
154
+ *
155
+ * For large buckets, this can be slow, but should not time out as the underlying queries are performed in
156
+ * smaller batches.
121
157
  */
122
- async queryPartialChecksums(batch: FetchPartialBucketChecksum[]): Promise<PartialChecksumMap> {
123
- try {
124
- return await this.queryPartialChecksumsInternal(batch);
125
- } catch (e) {
126
- if (e.codeName == 'MaxTimeMSExpired') {
127
- logger.warn(`Checksum query timed out; falling back to slower version`, e);
128
- // Timeout - try the slower but more robust version
129
- return await this.queryPartialChecksumsFallback(batch);
158
+ public async computePartialChecksumsDirect(batch: FetchPartialBucketChecksum[]): Promise<PartialChecksumMap> {
159
+ // Limit the number of buckets we query for at a time.
160
+ const bucketBatchLimit = this.options?.bucketBatchLimit ?? DEFAULT_BUCKET_BATCH_LIMIT;
161
+
162
+ if (batch.length < bucketBatchLimit) {
163
+ // Single batch - no need for splitting the batch and merging results
164
+ return await this.computePartialChecksumsInternal(batch);
165
+ }
166
+ // Split the batch and merge results
167
+ let results = new Map<string, PartialOrFullChecksum>();
168
+ for (let i = 0; i < batch.length; i += bucketBatchLimit) {
169
+ const bucketBatch = batch.slice(i, i + bucketBatchLimit);
170
+ const batchResults = await this.computePartialChecksumsInternal(bucketBatch);
171
+ for (let r of batchResults.values()) {
172
+ results.set(r.bucket, r);
130
173
  }
131
- throw lib_mongo.mapQueryError(e, 'while reading checksums');
132
174
  }
175
+ return results;
133
176
  }
134
177
 
135
- private async queryPartialChecksumsInternal(batch: FetchPartialBucketChecksum[]): Promise<PartialChecksumMap> {
136
- const filters: any[] = [];
178
+ /**
179
+ * Query a batch of checksums.
180
+ *
181
+ * We limit the number of operations that the query aggregates in each sub-batch, to avoid potential query timeouts.
182
+ *
183
+ * `batch` must be limited to DEFAULT_BUCKET_BATCH_LIMIT buckets before calling this.
184
+ */
185
+ private async computePartialChecksumsInternal(batch: FetchPartialBucketChecksum[]): Promise<PartialChecksumMap> {
186
+ const batchLimit = this.options?.operationBatchLimit ?? DEFAULT_OPERATION_BATCH_LIMIT;
187
+
188
+ // Map requests by bucket. We adjust this as we get partial results.
189
+ let requests = new Map<string, FetchPartialBucketChecksum>();
137
190
  for (let request of batch) {
138
- filters.push({
139
- _id: {
140
- $gt: {
141
- g: this.group_id,
142
- b: request.bucket,
143
- o: request.start ?? new bson.MinKey()
144
- },
145
- $lte: {
146
- g: this.group_id,
147
- b: request.bucket,
148
- o: request.end
149
- }
150
- }
151
- });
191
+ requests.set(request.bucket, request);
152
192
  }
153
193
 
154
- const aggregate = await this.db.bucket_data
155
- .aggregate(
156
- [
157
- {
158
- $match: {
159
- $or: filters
194
+ const partialChecksums = new Map<string, PartialOrFullChecksum>();
195
+
196
+ while (requests.size > 0) {
197
+ const filters: any[] = [];
198
+ for (let request of requests.values()) {
199
+ filters.push({
200
+ _id: {
201
+ $gt: {
202
+ g: this.group_id,
203
+ b: request.bucket,
204
+ o: request.start ?? new bson.MinKey()
205
+ },
206
+ $lte: {
207
+ g: this.group_id,
208
+ b: request.bucket,
209
+ o: request.end
160
210
  }
161
- },
162
- CHECKSUM_QUERY_GROUP_STAGE
163
- ],
164
- { session: undefined, readConcern: 'snapshot', maxTimeMS: lib_mongo.MONGO_CHECKSUM_TIMEOUT_MS }
165
- )
166
- // Don't map the error here - we want to keep timeout errors as-is
167
- .toArray();
211
+ }
212
+ });
213
+ }
168
214
 
169
- const partialChecksums = new Map<string, PartialOrFullChecksum>(
170
- aggregate.map((doc) => {
215
+ // Aggregate over a max of `batchLimit` operations at a time.
216
+ // Let's say we have 3 buckets (A, B, C), each with 10 operations, and our batch limit is 12.
217
+ // Then we'll do three batches:
218
+ // 1. Query: A[1-end], B[1-end], C[1-end]
219
+ // Returns: A[1-10], B[1-2]
220
+ // 2. Query: B[3-end], C[1-end]
221
+ // Returns: B[3-10], C[1-4]
222
+ // 3. Query: C[5-end]
223
+ // Returns: C[5-10]
224
+ const aggregate = await this.db.bucket_data
225
+ .aggregate(
226
+ [
227
+ {
228
+ $match: {
229
+ $or: filters
230
+ }
231
+ },
232
+ // sort and limit _before_ grouping
233
+ { $sort: { _id: 1 } },
234
+ { $limit: batchLimit },
235
+ {
236
+ $group: {
237
+ _id: '$_id.b',
238
+ // Historically, checksum may be stored as 'int' or 'double'.
239
+ // More recently, this should be a 'long'.
240
+ // $toLong ensures that we always sum it as a long, avoiding inaccuracies in the calculations.
241
+ checksum_total: { $sum: { $toLong: '$checksum' } },
242
+ count: { $sum: 1 },
243
+ has_clear_op: {
244
+ $max: {
245
+ $cond: [{ $eq: ['$op', 'CLEAR'] }, 1, 0]
246
+ }
247
+ },
248
+ last_op: { $max: '$_id.o' }
249
+ }
250
+ },
251
+ // Sort the aggregated results (100 max, so should be fast).
252
+ // This is important to identify which buckets we have partial data for.
253
+ { $sort: { _id: 1 } }
254
+ ],
255
+ { session: undefined, readConcern: 'snapshot', maxTimeMS: lib_mongo.MONGO_CHECKSUM_TIMEOUT_MS }
256
+ )
257
+ .toArray()
258
+ .catch((e) => {
259
+ throw lib_mongo.mapQueryError(e, 'while reading checksums');
260
+ });
261
+
262
+ let batchCount = 0;
263
+ let limitReached = false;
264
+ for (let doc of aggregate) {
171
265
  const bucket = doc._id;
172
- return [bucket, checksumFromAggregate(doc)];
173
- })
174
- );
266
+ const checksum = checksumFromAggregate(doc);
267
+
268
+ const existing = partialChecksums.get(bucket);
269
+ if (existing != null) {
270
+ partialChecksums.set(bucket, addPartialChecksums(bucket, existing, checksum));
271
+ } else {
272
+ partialChecksums.set(bucket, checksum);
273
+ }
274
+
275
+ batchCount += doc.count;
276
+ if (batchCount == batchLimit) {
277
+ // Limit reached. Request more in the next batch.
278
+ // Note that this only affects the _last_ bucket in a batch.
279
+ limitReached = true;
280
+ const req = requests.get(bucket);
281
+ requests.set(bucket, {
282
+ bucket,
283
+ start: doc.last_op,
284
+ end: req!.end
285
+ });
286
+ } else {
287
+ // All done for this bucket
288
+ requests.delete(bucket);
289
+ }
290
+ }
291
+ if (!limitReached) {
292
+ break;
293
+ }
294
+ }
175
295
 
176
296
  return new Map<string, PartialOrFullChecksum>(
177
297
  batch.map((request) => {
@@ -197,106 +317,10 @@ export class MongoChecksums {
197
317
  })
198
318
  );
199
319
  }
200
-
201
- /**
202
- * Checksums for large buckets can run over the query timeout.
203
- * To avoid this, we query in batches.
204
- * This version can handle larger amounts of data, but is slower, especially for many buckets.
205
- */
206
- async queryPartialChecksumsFallback(batch: FetchPartialBucketChecksum[]): Promise<PartialChecksumMap> {
207
- const partialChecksums = new Map<string, PartialOrFullChecksum>();
208
- for (let request of batch) {
209
- const checksum = await this.slowChecksum(request);
210
- partialChecksums.set(request.bucket, checksum);
211
- }
212
-
213
- return partialChecksums;
214
- }
215
-
216
- private async slowChecksum(request: FetchPartialBucketChecksum): Promise<PartialOrFullChecksum> {
217
- const batchLimit = 50_000;
218
-
219
- let lowerBound = 0n;
220
- const bucket = request.bucket;
221
-
222
- let runningChecksum: PartialOrFullChecksum = {
223
- bucket,
224
- partialCount: 0,
225
- partialChecksum: 0
226
- };
227
- if (request.start == null) {
228
- runningChecksum = {
229
- bucket,
230
- count: 0,
231
- checksum: 0
232
- };
233
- }
234
-
235
- while (true) {
236
- const filter = {
237
- _id: {
238
- $gt: {
239
- g: this.group_id,
240
- b: bucket,
241
- o: lowerBound
242
- },
243
- $lte: {
244
- g: this.group_id,
245
- b: bucket,
246
- o: request.end
247
- }
248
- }
249
- };
250
- const docs = await this.db.bucket_data
251
- .aggregate(
252
- [
253
- {
254
- $match: filter
255
- },
256
- // sort and limit _before_ grouping
257
- { $sort: { _id: 1 } },
258
- { $limit: batchLimit },
259
- CHECKSUM_QUERY_GROUP_STAGE
260
- ],
261
- { session: undefined, readConcern: 'snapshot', maxTimeMS: lib_mongo.MONGO_CHECKSUM_TIMEOUT_MS }
262
- )
263
- .toArray();
264
- const doc = docs[0];
265
- if (doc == null) {
266
- return runningChecksum;
267
- }
268
- const partial = checksumFromAggregate(doc);
269
- runningChecksum = addPartialChecksums(bucket, runningChecksum, partial);
270
- const isFinal = doc.count != batchLimit;
271
- if (isFinal) {
272
- break;
273
- } else {
274
- lowerBound = doc.last_op;
275
- }
276
- }
277
- return runningChecksum;
278
- }
279
320
  }
280
321
 
281
- const CHECKSUM_QUERY_GROUP_STAGE = {
282
- $group: {
283
- _id: '$_id.b',
284
- // Historically, checksum may be stored as 'int' or 'double'.
285
- // More recently, this should be a 'long'.
286
- // $toLong ensures that we always sum it as a long, avoiding inaccuracies in the calculations.
287
- checksum_total: { $sum: { $toLong: '$checksum' } },
288
- count: { $sum: 1 },
289
- has_clear_op: {
290
- $max: {
291
- $cond: [{ $eq: ['$op', 'CLEAR'] }, 1, 0]
292
- }
293
- },
294
- last_op: { $max: '$_id.o' }
295
- }
296
- };
297
-
298
322
  /**
299
- * Convert output of CHECKSUM_QUERY_GROUP_STAGE into a checksum.
323
+ * Convert output of the $group stage into a checksum.
300
324
  */
301
325
  function checksumFromAggregate(doc: bson.Document): PartialOrFullChecksum {
302
326
  const partialChecksum = Number(BigInt(doc.checksum_total) & 0xffffffffn) & 0xffffffff;
@@ -6,7 +6,6 @@ import { PowerSyncMongo } from './db.js';
6
6
  import { BucketDataDocument, BucketDataKey, BucketStateDocument } from './models.js';
7
7
  import { MongoSyncBucketStorage } from './MongoSyncBucketStorage.js';
8
8
  import { cacheKey } from './OperationBatch.js';
9
- import { readSingleBatch } from './util.js';
10
9
 
11
10
  interface CurrentBucketState {
12
11
  /** Bucket name */
@@ -329,15 +328,11 @@ export class MongoCompactor {
329
328
  count: 0,
330
329
  bytes: 0
331
330
  }
332
- },
333
- $setOnInsert: {
334
- // Only set this if we're creating the document.
335
- // In all other cases, the replication process will have a set a more accurate id.
336
- last_op: this.maxOpId
337
331
  }
338
332
  },
339
- // We generally expect this to have been created before, but do handle cases of old unchanged buckets
340
- upsert: true
333
+ // We generally expect this to have been created before.
334
+ // We don't create new ones here, to avoid issues with the unique index on bucket_updates.
335
+ upsert: false
341
336
  }
342
337
  });
343
338
  }
@@ -484,15 +479,25 @@ export class MongoCompactor {
484
479
  * Subset of compact, only populating checksums where relevant.
485
480
  */
486
481
  async populateChecksums() {
487
- let lastId: BucketStateDocument['_id'] | null = null;
482
+ // This is updated after each batch
483
+ let lowerBound: BucketStateDocument['_id'] = {
484
+ g: this.group_id,
485
+ b: new mongo.MinKey() as any
486
+ };
487
+ // This is static
488
+ const upperBound: BucketStateDocument['_id'] = {
489
+ g: this.group_id,
490
+ b: new mongo.MaxKey() as any
491
+ };
488
492
  while (!this.signal?.aborted) {
489
493
  // By filtering buckets, we effectively make this "resumeable".
490
- let filter: mongo.Filter<BucketStateDocument> = {
494
+ const filter: mongo.Filter<BucketStateDocument> = {
495
+ _id: {
496
+ $gt: lowerBound,
497
+ $lt: upperBound
498
+ },
491
499
  compacted_state: { $exists: false }
492
500
  };
493
- if (lastId) {
494
- filter._id = { $gt: lastId };
495
- }
496
501
 
497
502
  const bucketsWithoutChecksums = await this.db.bucket_state
498
503
  .find(filter, {
@@ -515,12 +520,12 @@ export class MongoCompactor {
515
520
 
516
521
  await this.updateChecksumsBatch(bucketsWithoutChecksums.map((b) => b._id.b));
517
522
 
518
- lastId = bucketsWithoutChecksums[bucketsWithoutChecksums.length - 1]._id;
523
+ lowerBound = bucketsWithoutChecksums[bucketsWithoutChecksums.length - 1]._id;
519
524
  }
520
525
  }
521
526
 
522
527
  private async updateChecksumsBatch(buckets: string[]) {
523
- const checksums = await this.storage.checksums.queryPartialChecksums(
528
+ const checksums = await this.storage.checksums.computePartialChecksumsDirect(
524
529
  buckets.map((bucket) => {
525
530
  return {
526
531
  bucket,
@@ -551,15 +556,11 @@ export class MongoCompactor {
551
556
  checksum: BigInt(bucketChecksum.checksum),
552
557
  bytes: null
553
558
  }
554
- },
555
- $setOnInsert: {
556
- // Only set this if we're creating the document.
557
- // In all other cases, the replication process will have a set a more accurate id.
558
- last_op: this.maxOpId
559
559
  }
560
560
  },
561
- // We generally expect this to have been created before, but do handle cases of old unchanged buckets
562
- upsert: true
561
+ // We don't create new ones here - it gets tricky to get the last_op right with the unique index on:
562
+ // bucket_updates: {'id.g': 1, 'last_op': 1}
563
+ upsert: false
563
564
  }
564
565
  });
565
566
  }
@@ -31,12 +31,16 @@ import { MongoBucketStorage } from '../MongoBucketStorage.js';
31
31
  import { PowerSyncMongo } from './db.js';
32
32
  import { BucketDataDocument, BucketDataKey, BucketStateDocument, SourceKey, SourceTableDocument } from './models.js';
33
33
  import { MongoBucketBatch } from './MongoBucketBatch.js';
34
- import { MongoChecksums } from './MongoChecksums.js';
34
+ import { MongoChecksumOptions, MongoChecksums } from './MongoChecksums.js';
35
35
  import { MongoCompactor } from './MongoCompactor.js';
36
36
  import { MongoParameterCompactor } from './MongoParameterCompactor.js';
37
37
  import { MongoWriteCheckpointAPI } from './MongoWriteCheckpointAPI.js';
38
38
  import { idPrefixFilter, mapOpEntry, readSingleBatch, setSessionSnapshotTime } from './util.js';
39
39
 
40
+ export interface MongoSyncBucketStorageOptions {
41
+ checksumOptions?: MongoChecksumOptions;
42
+ }
43
+
40
44
  export class MongoSyncBucketStorage
41
45
  extends BaseObserver<storage.SyncRulesBucketStorageListener>
42
46
  implements storage.SyncRulesBucketStorage
@@ -52,14 +56,15 @@ export class MongoSyncBucketStorage
52
56
  public readonly group_id: number,
53
57
  private readonly sync_rules: storage.PersistedSyncRulesContent,
54
58
  public readonly slot_name: string,
55
- writeCheckpointMode: storage.WriteCheckpointMode = storage.WriteCheckpointMode.MANAGED
59
+ writeCheckpointMode?: storage.WriteCheckpointMode,
60
+ options?: MongoSyncBucketStorageOptions
56
61
  ) {
57
62
  super();
58
63
  this.db = factory.db;
59
- this.checksums = new MongoChecksums(this.db, this.group_id);
64
+ this.checksums = new MongoChecksums(this.db, this.group_id, options?.checksumOptions);
60
65
  this.writeCheckpointAPI = new MongoWriteCheckpointAPI({
61
66
  db: this.db,
62
- mode: writeCheckpointMode,
67
+ mode: writeCheckpointMode ?? storage.WriteCheckpointMode.MANAGED,
63
68
  sync_rules_id: group_id
64
69
  });
65
70
  }
@@ -1,10 +1,12 @@
1
1
  import { TestStorageOptions } from '@powersync/service-core';
2
2
  import { MongoBucketStorage } from '../MongoBucketStorage.js';
3
3
  import { connectMongoForTests } from './util.js';
4
+ import { MongoSyncBucketStorageOptions } from './MongoSyncBucketStorage.js';
4
5
 
5
6
  export type MongoTestStorageOptions = {
6
7
  url: string;
7
8
  isCI: boolean;
9
+ internalOptions?: MongoSyncBucketStorageOptions;
8
10
  };
9
11
 
10
12
  export const MongoTestStorageFactoryGenerator = (factoryOptions: MongoTestStorageOptions) => {
@@ -16,13 +18,14 @@ export const MongoTestStorageFactoryGenerator = (factoryOptions: MongoTestStorag
16
18
  await db.db.createCollection('bucket_parameters');
17
19
  }
18
20
 
19
- // Full migrations are not currently run for tests, so we manually create this
20
- await db.createCheckpointEventsCollection();
21
-
22
21
  if (!options?.doNotClear) {
23
22
  await db.clear();
24
23
  }
25
24
 
26
- return new MongoBucketStorage(db, { slot_name_prefix: 'test_' });
25
+ // Full migrations are not currently run for tests, so we manually create the important ones
26
+ await db.createCheckpointEventsCollection();
27
+ await db.createBucketStateIndex();
28
+
29
+ return new MongoBucketStorage(db, { slot_name_prefix: 'test_' }, factoryOptions.internalOptions);
27
30
  };
28
31
  };
@@ -127,6 +127,20 @@ export class PowerSyncMongo {
127
127
  max: 50 // max number of documents
128
128
  });
129
129
  }
130
+
131
+ /**
132
+ * Only use in migrations and tests.
133
+ */
134
+ async createBucketStateIndex() {
135
+ // TODO: Implement a better mechanism to use migrations in tests
136
+ await this.bucket_state.createIndex(
137
+ {
138
+ '_id.g': 1,
139
+ last_op: 1
140
+ },
141
+ { name: 'bucket_updates', unique: true }
142
+ );
143
+ }
130
144
  }
131
145
 
132
146
  export function createPowerSyncMongo(config: MongoStorageConfig, options?: lib_mongo.MongoConnectionOptions) {
@@ -97,6 +97,10 @@ export interface BucketStateDocument {
97
97
  g: number;
98
98
  b: string;
99
99
  };
100
+ /**
101
+ * Important: There is an unique index on {'_id.g': 1, last_op: 1}.
102
+ * That means the last_op must match an actual op in the bucket, and not the commit checkpoint.
103
+ */
100
104
  last_op: bigint;
101
105
  /**
102
106
  * If set, this can be treated as "cache" of a checksum at a specific point.
@@ -1,6 +1,22 @@
1
1
  // Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html
2
2
 
3
- exports[`Mongo Sync Bucket Storage > empty storage metrics 1`] = `
3
+ exports[`Mongo Sync Bucket Storage - Data > empty storage metrics 1`] = `
4
+ {
5
+ "operations_size_bytes": 0,
6
+ "parameters_size_bytes": 0,
7
+ "replication_size_bytes": 0,
8
+ }
9
+ `;
10
+
11
+ exports[`Mongo Sync Bucket Storage - split buckets > empty storage metrics 1`] = `
12
+ {
13
+ "operations_size_bytes": 0,
14
+ "parameters_size_bytes": 0,
15
+ "replication_size_bytes": 0,
16
+ }
17
+ `;
18
+
19
+ exports[`Mongo Sync Bucket Storage - split operations > empty storage metrics 1`] = `
4
20
  {
5
21
  "operations_size_bytes": 0,
6
22
  "parameters_size_bytes": 0,