@powersync/service-module-mongodb-storage 0.0.0-dev-20250827091123 → 0.0.0-dev-20250828134335

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. package/CHANGELOG.md +28 -13
  2. package/dist/index.d.ts +0 -1
  3. package/dist/index.js +0 -1
  4. package/dist/index.js.map +1 -1
  5. package/dist/storage/MongoBucketStorage.js +1 -1
  6. package/dist/storage/MongoBucketStorage.js.map +1 -1
  7. package/dist/storage/implementation/MongoBucketBatch.d.ts +1 -1
  8. package/dist/storage/implementation/MongoBucketBatch.js +7 -4
  9. package/dist/storage/implementation/MongoBucketBatch.js.map +1 -1
  10. package/dist/storage/implementation/MongoCompactor.d.ts +16 -2
  11. package/dist/storage/implementation/MongoCompactor.js +204 -48
  12. package/dist/storage/implementation/MongoCompactor.js.map +1 -1
  13. package/dist/storage/implementation/MongoStorageProvider.d.ts +1 -1
  14. package/dist/storage/implementation/MongoStorageProvider.js +3 -7
  15. package/dist/storage/implementation/MongoStorageProvider.js.map +1 -1
  16. package/dist/storage/implementation/MongoSyncBucketStorage.d.ts +12 -1
  17. package/dist/storage/implementation/MongoSyncBucketStorage.js +196 -37
  18. package/dist/storage/implementation/MongoSyncBucketStorage.js.map +1 -1
  19. package/dist/storage/implementation/MongoTestStorageFactoryGenerator.d.ts +7 -0
  20. package/dist/storage/implementation/MongoTestStorageFactoryGenerator.js +18 -0
  21. package/dist/storage/implementation/MongoTestStorageFactoryGenerator.js.map +1 -0
  22. package/dist/storage/implementation/PersistedBatch.d.ts +1 -0
  23. package/dist/storage/implementation/PersistedBatch.js +13 -6
  24. package/dist/storage/implementation/PersistedBatch.js.map +1 -1
  25. package/dist/storage/implementation/db.d.ts +1 -6
  26. package/dist/storage/implementation/db.js +0 -16
  27. package/dist/storage/implementation/db.js.map +1 -1
  28. package/dist/storage/implementation/models.d.ts +14 -3
  29. package/dist/{utils → storage/implementation}/util.d.ts +35 -3
  30. package/dist/{utils → storage/implementation}/util.js +54 -0
  31. package/dist/storage/implementation/util.js.map +1 -0
  32. package/dist/storage/storage-index.d.ts +2 -3
  33. package/dist/storage/storage-index.js +2 -3
  34. package/dist/storage/storage-index.js.map +1 -1
  35. package/package.json +8 -8
  36. package/src/index.ts +0 -1
  37. package/src/storage/MongoBucketStorage.ts +1 -1
  38. package/src/storage/implementation/MongoBucketBatch.ts +8 -6
  39. package/src/storage/implementation/MongoCompactor.ts +239 -49
  40. package/src/storage/implementation/MongoStorageProvider.ts +4 -9
  41. package/src/storage/implementation/MongoSyncBucketStorage.ts +242 -38
  42. package/src/storage/implementation/MongoTestStorageFactoryGenerator.ts +28 -0
  43. package/src/storage/implementation/PersistedBatch.ts +14 -6
  44. package/src/storage/implementation/db.ts +0 -18
  45. package/src/storage/implementation/models.ts +15 -3
  46. package/src/{utils → storage/implementation}/util.ts +61 -3
  47. package/src/storage/storage-index.ts +2 -3
  48. package/test/src/__snapshots__/storage_sync.test.ts.snap +110 -0
  49. package/test/src/util.ts +2 -6
  50. package/tsconfig.tsbuildinfo +1 -1
  51. package/dist/migrations/db/migrations/1752661449910-connection-reporting.d.ts +0 -3
  52. package/dist/migrations/db/migrations/1752661449910-connection-reporting.js +0 -36
  53. package/dist/migrations/db/migrations/1752661449910-connection-reporting.js.map +0 -1
  54. package/dist/storage/MongoReportStorage.d.ts +0 -18
  55. package/dist/storage/MongoReportStorage.js +0 -154
  56. package/dist/storage/MongoReportStorage.js.map +0 -1
  57. package/dist/utils/test-utils.d.ts +0 -11
  58. package/dist/utils/test-utils.js +0 -40
  59. package/dist/utils/test-utils.js.map +0 -1
  60. package/dist/utils/util.js.map +0 -1
  61. package/dist/utils/utils-index.d.ts +0 -2
  62. package/dist/utils/utils-index.js +0 -3
  63. package/dist/utils/utils-index.js.map +0 -1
  64. package/src/migrations/db/migrations/1752661449910-connection-reporting.ts +0 -58
  65. package/src/storage/MongoReportStorage.ts +0 -177
  66. package/src/utils/test-utils.ts +0 -55
  67. package/src/utils/utils-index.ts +0 -2
  68. package/test/src/__snapshots__/connection-report-storage.test.ts.snap +0 -215
  69. package/test/src/connection-report-storage.test.ts +0 -133
@@ -1,10 +1,12 @@
1
- import { mongo } from '@powersync/lib-service-mongodb';
2
- import { logger, ReplicationAssertionError } from '@powersync/lib-services-framework';
3
- import { InternalOpId, storage, utils } from '@powersync/service-core';
1
+ import { mongo, MONGO_OPERATION_TIMEOUT_MS } from '@powersync/lib-service-mongodb';
2
+ import { logger, ReplicationAssertionError, ServiceAssertionError } from '@powersync/lib-services-framework';
3
+ import { addChecksums, InternalOpId, isPartialChecksum, storage, utils } from '@powersync/service-core';
4
4
 
5
5
  import { PowerSyncMongo } from './db.js';
6
- import { BucketDataDocument, BucketDataKey } from './models.js';
6
+ import { BucketDataDocument, BucketDataKey, BucketStateDocument } from './models.js';
7
+ import { MongoSyncBucketStorage } from './MongoSyncBucketStorage.js';
7
8
  import { cacheKey } from './OperationBatch.js';
9
+ import { readSingleBatch } from './util.js';
8
10
 
9
11
  interface CurrentBucketState {
10
12
  /** Bucket name */
@@ -27,6 +29,21 @@ interface CurrentBucketState {
27
29
  * Number of REMOVE/MOVE operations seen since lastNotPut.
28
30
  */
29
31
  opsSincePut: number;
32
+
33
+ /**
34
+ * Incrementally-updated checksum, up to maxOpId
35
+ */
36
+ checksum: number;
37
+
38
+ /**
39
+ * op count for the checksum
40
+ */
41
+ opCount: number;
42
+
43
+ /**
44
+ * Byte size of ops covered by the checksum.
45
+ */
46
+ opBytes: number;
30
47
  }
31
48
 
32
49
  /**
@@ -43,25 +60,30 @@ const DEFAULT_MEMORY_LIMIT_MB = 64;
43
60
 
44
61
  export class MongoCompactor {
45
62
  private updates: mongo.AnyBulkWriteOperation<BucketDataDocument>[] = [];
63
+ private bucketStateUpdates: mongo.AnyBulkWriteOperation<BucketStateDocument>[] = [];
46
64
 
47
65
  private idLimitBytes: number;
48
66
  private moveBatchLimit: number;
49
67
  private moveBatchQueryLimit: number;
50
68
  private clearBatchLimit: number;
51
- private maxOpId: bigint | undefined;
69
+ private maxOpId: bigint;
52
70
  private buckets: string[] | undefined;
71
+ private signal?: AbortSignal;
72
+ private group_id: number;
53
73
 
54
74
  constructor(
75
+ private storage: MongoSyncBucketStorage,
55
76
  private db: PowerSyncMongo,
56
- private group_id: number,
57
77
  options?: MongoCompactOptions
58
78
  ) {
79
+ this.group_id = storage.group_id;
59
80
  this.idLimitBytes = (options?.memoryLimitMB ?? DEFAULT_MEMORY_LIMIT_MB) * 1024 * 1024;
60
81
  this.moveBatchLimit = options?.moveBatchLimit ?? DEFAULT_MOVE_BATCH_LIMIT;
61
82
  this.moveBatchQueryLimit = options?.moveBatchQueryLimit ?? DEFAULT_MOVE_BATCH_QUERY_LIMIT;
62
83
  this.clearBatchLimit = options?.clearBatchLimit ?? DEFAULT_CLEAR_BATCH_LIMIT;
63
- this.maxOpId = options?.maxOpId;
84
+ this.maxOpId = options?.maxOpId ?? 0n;
64
85
  this.buckets = options?.compactBuckets;
86
+ this.signal = options?.signal;
65
87
  }
66
88
 
67
89
  /**
@@ -117,31 +139,33 @@ export class MongoCompactor {
117
139
  o: new mongo.MaxKey() as any
118
140
  };
119
141
 
120
- while (true) {
142
+ while (!this.signal?.aborted) {
121
143
  // Query one batch at a time, to avoid cursor timeouts
122
- const batch = await this.db.bucket_data
123
- .find(
124
- {
144
+ const cursor = this.db.bucket_data.aggregate<BucketDataDocument & { size: number | bigint }>([
145
+ {
146
+ $match: {
125
147
  _id: {
126
148
  $gte: lowerBound,
127
149
  $lt: upperBound
128
150
  }
129
- },
130
- {
131
- projection: {
132
- _id: 1,
133
- op: 1,
134
- table: 1,
135
- row_id: 1,
136
- source_table: 1,
137
- source_key: 1
138
- },
139
- limit: this.moveBatchQueryLimit,
140
- sort: { _id: -1 },
141
- singleBatch: true
142
151
  }
143
- )
144
- .toArray();
152
+ },
153
+ { $sort: { _id: -1 } },
154
+ { $limit: this.moveBatchQueryLimit },
155
+ {
156
+ $project: {
157
+ _id: 1,
158
+ op: 1,
159
+ table: 1,
160
+ row_id: 1,
161
+ source_table: 1,
162
+ source_key: 1,
163
+ checksum: 1,
164
+ size: { $bsonSize: '$$ROOT' }
165
+ }
166
+ }
167
+ ]);
168
+ const { data: batch } = await readSingleBatch(cursor);
145
169
 
146
170
  if (batch.length == 0) {
147
171
  // We've reached the end
@@ -153,34 +177,47 @@ export class MongoCompactor {
153
177
 
154
178
  for (let doc of batch) {
155
179
  if (currentState == null || doc._id.b != currentState.bucket) {
156
- if (currentState != null && currentState.lastNotPut != null && currentState.opsSincePut >= 1) {
157
- // Important to flush before clearBucket()
158
- await this.flush();
159
- logger.info(
160
- `Inserting CLEAR at ${this.group_id}:${currentState.bucket}:${currentState.lastNotPut} to remove ${currentState.opsSincePut} operations`
161
- );
180
+ if (currentState != null) {
181
+ if (currentState.lastNotPut != null && currentState.opsSincePut >= 1) {
182
+ // Important to flush before clearBucket()
183
+ // Does not have to happen before flushBucketChecksums()
184
+ await this.flush();
185
+ logger.info(
186
+ `Inserting CLEAR at ${this.group_id}:${currentState.bucket}:${currentState.lastNotPut} to remove ${currentState.opsSincePut} operations`
187
+ );
188
+
189
+ // Free memory before clearing bucket
190
+ currentState!.seen.clear();
191
+
192
+ await this.clearBucket(currentState);
193
+ }
162
194
 
163
- const bucket = currentState.bucket;
164
- const clearOp = currentState.lastNotPut;
165
- // Free memory before clearing bucket
166
- currentState = null;
167
- await this.clearBucket(bucket, clearOp);
195
+ // Should happen after clearBucket() for accurate stats
196
+ this.updateBucketChecksums(currentState);
168
197
  }
169
198
  currentState = {
170
199
  bucket: doc._id.b,
171
200
  seen: new Map(),
172
201
  trackingSize: 0,
173
202
  lastNotPut: null,
174
- opsSincePut: 0
203
+ opsSincePut: 0,
204
+
205
+ checksum: 0,
206
+ opCount: 0,
207
+ opBytes: 0
175
208
  };
176
209
  }
177
210
 
178
- if (this.maxOpId != null && doc._id.o > this.maxOpId) {
211
+ if (doc._id.o > this.maxOpId) {
179
212
  continue;
180
213
  }
181
214
 
215
+ currentState.checksum = addChecksums(currentState.checksum, Number(doc.checksum));
216
+ currentState.opCount += 1;
217
+
182
218
  let isPersistentPut = doc.op == 'PUT';
183
219
 
220
+ currentState.opBytes += Number(doc.size);
184
221
  if (doc.op == 'REMOVE' || doc.op == 'PUT') {
185
222
  const key = `${doc.table}/${doc.row_id}/${cacheKey(doc.source_table!, doc.source_key!)}`;
186
223
  const targetOp = currentState.seen.get(key);
@@ -208,6 +245,8 @@ export class MongoCompactor {
208
245
  }
209
246
  }
210
247
  });
248
+
249
+ currentState.opBytes += 200 - Number(doc.size); // TODO: better estimate for this
211
250
  } else {
212
251
  if (currentState.trackingSize >= idLimitBytes) {
213
252
  // Reached memory limit.
@@ -234,24 +273,72 @@ export class MongoCompactor {
234
273
  currentState.opsSincePut += 1;
235
274
  }
236
275
 
237
- if (this.updates.length >= this.moveBatchLimit) {
276
+ if (this.updates.length + this.bucketStateUpdates.length >= this.moveBatchLimit) {
238
277
  await this.flush();
239
278
  }
240
279
  }
241
280
  }
242
281
 
243
- await this.flush();
244
282
  currentState?.seen.clear();
245
283
  if (currentState?.lastNotPut != null && currentState?.opsSincePut > 1) {
246
284
  logger.info(
247
285
  `Inserting CLEAR at ${this.group_id}:${currentState.bucket}:${currentState.lastNotPut} to remove ${currentState.opsSincePut} operations`
248
286
  );
249
- const bucket = currentState.bucket;
250
- const clearOp = currentState.lastNotPut;
251
- // Free memory before clearing bucket
252
- currentState = null;
253
- await this.clearBucket(bucket, clearOp);
287
+ // Need flush() before clear()
288
+ await this.flush();
289
+ await this.clearBucket(currentState);
290
+ }
291
+ if (currentState != null) {
292
+ // Do this _after_ clearBucket so that we have accurate counts.
293
+ this.updateBucketChecksums(currentState);
254
294
  }
295
+ // Need another flush after updateBucketChecksums()
296
+ await this.flush();
297
+ }
298
+
299
+ /**
300
+ * Call when done with a bucket.
301
+ */
302
+ private updateBucketChecksums(state: CurrentBucketState) {
303
+ if (state.opCount < 0) {
304
+ throw new ServiceAssertionError(
305
+ `Invalid opCount: ${state.opCount} checksum ${state.checksum} opsSincePut: ${state.opsSincePut} maxOpId: ${this.maxOpId}`
306
+ );
307
+ }
308
+ this.bucketStateUpdates.push({
309
+ updateOne: {
310
+ filter: {
311
+ _id: {
312
+ g: this.group_id,
313
+ b: state.bucket
314
+ }
315
+ },
316
+ update: {
317
+ $set: {
318
+ compacted_state: {
319
+ op_id: this.maxOpId,
320
+ count: state.opCount,
321
+ checksum: BigInt(state.checksum),
322
+ bytes: state.opBytes
323
+ },
324
+ estimate_since_compact: {
325
+ // Note: There could have been a whole bunch of new operations added to the bucket _while_ compacting,
326
+ // which we don't currently cater for.
327
+ // We could potentially query for that, but that could add overhead.
328
+ count: 0,
329
+ bytes: 0
330
+ }
331
+ },
332
+ $setOnInsert: {
333
+ // Only set this if we're creating the document.
334
+ // In all other cases, the replication process will have a set a more accurate id.
335
+ last_op: this.maxOpId
336
+ }
337
+ },
338
+ // We generally expect this to have been created before, but do handle cases of old unchanged buckets
339
+ upsert: true
340
+ }
341
+ });
255
342
  }
256
343
 
257
344
  private async flush() {
@@ -266,15 +353,26 @@ export class MongoCompactor {
266
353
  });
267
354
  this.updates = [];
268
355
  }
356
+ if (this.bucketStateUpdates.length > 0) {
357
+ logger.info(`Updating ${this.bucketStateUpdates.length} bucket states`);
358
+ await this.db.bucket_state.bulkWrite(this.bucketStateUpdates, {
359
+ ordered: false
360
+ });
361
+ this.bucketStateUpdates = [];
362
+ }
269
363
  }
270
364
 
271
365
  /**
272
366
  * Perform a CLEAR compact for a bucket.
273
367
  *
368
+ *
274
369
  * @param bucket bucket name
275
370
  * @param op op_id of the last non-PUT operation, which will be converted to CLEAR.
276
371
  */
277
- private async clearBucket(bucket: string, op: InternalOpId) {
372
+ private async clearBucket(currentState: CurrentBucketState) {
373
+ const bucket = currentState.bucket;
374
+ const clearOp = currentState.lastNotPut!;
375
+
278
376
  const opFilter = {
279
377
  _id: {
280
378
  $gte: {
@@ -285,7 +383,7 @@ export class MongoCompactor {
285
383
  $lte: {
286
384
  g: this.group_id,
287
385
  b: bucket,
288
- o: op
386
+ o: clearOp
289
387
  }
290
388
  }
291
389
  };
@@ -293,7 +391,8 @@ export class MongoCompactor {
293
391
  const session = this.db.client.startSession();
294
392
  try {
295
393
  let done = false;
296
- while (!done) {
394
+ while (!done && !this.signal?.aborted) {
395
+ let opCountDiff = 0;
297
396
  // Do the CLEAR operation in batches, with each batch a separate transaction.
298
397
  // The state after each batch is fully consistent.
299
398
  // We need a transaction per batch to make sure checksums stay consistent.
@@ -364,15 +463,106 @@ export class MongoCompactor {
364
463
  },
365
464
  { session }
366
465
  );
466
+
467
+ opCountDiff = -numberOfOpsToClear + 1;
367
468
  },
368
469
  {
369
470
  writeConcern: { w: 'majority' },
370
471
  readConcern: { level: 'snapshot' }
371
472
  }
372
473
  );
474
+ // Update _outside_ the transaction, since the transaction can be retried multiple times.
475
+ currentState.opCount += opCountDiff;
373
476
  }
374
477
  } finally {
375
478
  await session.endSession();
376
479
  }
377
480
  }
481
+
482
+ /**
483
+ * Subset of compact, only populating checksums where relevant.
484
+ */
485
+ async populateChecksums() {
486
+ let lastId: BucketStateDocument['_id'] | null = null;
487
+ while (!this.signal?.aborted) {
488
+ // By filtering buckets, we effectively make this "resumeable".
489
+ let filter: mongo.Filter<BucketStateDocument> = {
490
+ compacted_state: { $exists: false }
491
+ };
492
+ if (lastId) {
493
+ filter._id = { $gt: lastId };
494
+ }
495
+
496
+ const bucketsWithoutChecksums = await this.db.bucket_state
497
+ .find(filter, {
498
+ projection: {
499
+ _id: 1
500
+ },
501
+ sort: {
502
+ _id: 1
503
+ },
504
+ limit: 5_000,
505
+ maxTimeMS: MONGO_OPERATION_TIMEOUT_MS
506
+ })
507
+ .toArray();
508
+ if (bucketsWithoutChecksums.length == 0) {
509
+ // All done
510
+ break;
511
+ }
512
+
513
+ logger.info(`Calculating checksums for batch of ${bucketsWithoutChecksums.length} buckets`);
514
+
515
+ await this.updateChecksumsBatch(bucketsWithoutChecksums.map((b) => b._id.b));
516
+
517
+ lastId = bucketsWithoutChecksums[bucketsWithoutChecksums.length - 1]._id;
518
+ }
519
+ }
520
+
521
+ private async updateChecksumsBatch(buckets: string[]) {
522
+ const checksums = await this.storage.queryPartialChecksums(
523
+ buckets.map((bucket) => {
524
+ return {
525
+ bucket,
526
+ end: this.maxOpId
527
+ };
528
+ })
529
+ );
530
+
531
+ for (let bucketChecksum of checksums.values()) {
532
+ if (isPartialChecksum(bucketChecksum)) {
533
+ // Should never happen since we don't specify `start`
534
+ throw new ServiceAssertionError(`Full checksum expected, got ${JSON.stringify(bucketChecksum)}`);
535
+ }
536
+
537
+ this.bucketStateUpdates.push({
538
+ updateOne: {
539
+ filter: {
540
+ _id: {
541
+ g: this.group_id,
542
+ b: bucketChecksum.bucket
543
+ }
544
+ },
545
+ update: {
546
+ $set: {
547
+ compacted_state: {
548
+ op_id: this.maxOpId,
549
+ count: bucketChecksum.count,
550
+ checksum: BigInt(bucketChecksum.checksum),
551
+ bytes: 0 // We don't calculate that here
552
+ }
553
+ },
554
+ $setOnInsert: {
555
+ // Only set this if we're creating the document.
556
+ // In all other cases, the replication process will have a set a more accurate id.
557
+ last_op: this.maxOpId
558
+ }
559
+ },
560
+ // We generally expect this to have been created before, but do handle cases of old unchanged buckets
561
+ upsert: true
562
+ }
563
+ });
564
+ }
565
+
566
+ await this.flush();
567
+ }
378
568
  }
@@ -4,9 +4,8 @@ import { POWERSYNC_VERSION, storage } from '@powersync/service-core';
4
4
  import { MongoStorageConfig } from '../../types/types.js';
5
5
  import { MongoBucketStorage } from '../MongoBucketStorage.js';
6
6
  import { PowerSyncMongo } from './db.js';
7
- import { MongoReportStorage } from '../MongoReportStorage.js';
8
7
 
9
- export class MongoStorageProvider implements storage.StorageProvider {
8
+ export class MongoStorageProvider implements storage.BucketStorageProvider {
10
9
  get type() {
11
10
  return lib_mongo.MONGO_CONNECTION_TYPE;
12
11
  }
@@ -38,19 +37,15 @@ export class MongoStorageProvider implements storage.StorageProvider {
38
37
  await client.connect();
39
38
 
40
39
  const database = new PowerSyncMongo(client, { database: resolvedConfig.storage.database });
41
- const syncStorageFactory = new MongoBucketStorage(database, {
40
+ const factory = new MongoBucketStorage(database, {
42
41
  // TODO currently need the entire resolved config due to this
43
42
  slot_name_prefix: resolvedConfig.slot_name_prefix
44
43
  });
45
-
46
- // Storage factory for reports
47
- const reportStorageFactory = new MongoReportStorage(database);
48
44
  return {
49
- storage: syncStorageFactory,
50
- reportStorage: reportStorageFactory,
45
+ storage: factory,
51
46
  shutDown: async () => {
52
47
  shuttingDown = true;
53
- await syncStorageFactory[Symbol.asyncDispose]();
48
+ await factory[Symbol.asyncDispose]();
54
49
  await client.close();
55
50
  },
56
51
  tearDown: () => {