@powersync/service-module-mongodb-storage 0.10.4 → 0.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. package/CHANGELOG.md +65 -0
  2. package/LICENSE +3 -3
  3. package/dist/storage/implementation/MongoBucketBatch.d.ts +21 -2
  4. package/dist/storage/implementation/MongoBucketBatch.js +66 -7
  5. package/dist/storage/implementation/MongoBucketBatch.js.map +1 -1
  6. package/dist/storage/implementation/MongoCompactor.d.ts +7 -0
  7. package/dist/storage/implementation/MongoCompactor.js +122 -44
  8. package/dist/storage/implementation/MongoCompactor.js.map +1 -1
  9. package/dist/storage/implementation/MongoParameterCompactor.d.ts +17 -0
  10. package/dist/storage/implementation/MongoParameterCompactor.js +92 -0
  11. package/dist/storage/implementation/MongoParameterCompactor.js.map +1 -0
  12. package/dist/storage/implementation/MongoSyncBucketStorage.d.ts +14 -4
  13. package/dist/storage/implementation/MongoSyncBucketStorage.js +229 -115
  14. package/dist/storage/implementation/MongoSyncBucketStorage.js.map +1 -1
  15. package/dist/storage/implementation/PersistedBatch.d.ts +1 -0
  16. package/dist/storage/implementation/PersistedBatch.js +12 -5
  17. package/dist/storage/implementation/PersistedBatch.js.map +1 -1
  18. package/dist/storage/implementation/models.d.ts +20 -0
  19. package/dist/storage/implementation/util.d.ts +2 -1
  20. package/dist/storage/implementation/util.js +13 -0
  21. package/dist/storage/implementation/util.js.map +1 -1
  22. package/package.json +9 -9
  23. package/src/storage/implementation/MongoBucketBatch.ts +82 -8
  24. package/src/storage/implementation/MongoCompactor.ts +147 -47
  25. package/src/storage/implementation/MongoParameterCompactor.ts +105 -0
  26. package/src/storage/implementation/MongoSyncBucketStorage.ts +257 -157
  27. package/src/storage/implementation/PersistedBatch.ts +13 -5
  28. package/src/storage/implementation/models.ts +21 -0
  29. package/src/storage/implementation/util.ts +14 -1
  30. package/test/src/__snapshots__/storage_sync.test.ts.snap +319 -11
  31. package/test/src/storage_compacting.test.ts +2 -0
  32. package/tsconfig.tsbuildinfo +1 -1
@@ -1,10 +1,11 @@
1
1
  import { mongo } from '@powersync/lib-service-mongodb';
2
- import { logger, ReplicationAssertionError } from '@powersync/lib-services-framework';
3
- import { InternalOpId, storage, utils } from '@powersync/service-core';
2
+ import { logger, ReplicationAssertionError, ServiceAssertionError } from '@powersync/lib-services-framework';
3
+ import { addChecksums, InternalOpId, storage, utils } from '@powersync/service-core';
4
4
 
5
5
  import { PowerSyncMongo } from './db.js';
6
- import { BucketDataDocument, BucketDataKey } from './models.js';
6
+ import { BucketDataDocument, BucketDataKey, BucketStateDocument } from './models.js';
7
7
  import { cacheKey } from './OperationBatch.js';
8
+ import { readSingleBatch } from './util.js';
8
9
 
9
10
  interface CurrentBucketState {
10
11
  /** Bucket name */
@@ -27,6 +28,21 @@ interface CurrentBucketState {
27
28
  * Number of REMOVE/MOVE operations seen since lastNotPut.
28
29
  */
29
30
  opsSincePut: number;
31
+
32
+ /**
33
+ * Incrementally-updated checksum, up to maxOpId
34
+ */
35
+ checksum: number;
36
+
37
+ /**
38
+ * op count for the checksum
39
+ */
40
+ opCount: number;
41
+
42
+ /**
43
+ * Byte size of ops covered by the checksum.
44
+ */
45
+ opBytes: number;
30
46
  }
31
47
 
32
48
  /**
@@ -43,13 +59,15 @@ const DEFAULT_MEMORY_LIMIT_MB = 64;
43
59
 
44
60
  export class MongoCompactor {
45
61
  private updates: mongo.AnyBulkWriteOperation<BucketDataDocument>[] = [];
62
+ private bucketStateUpdates: mongo.AnyBulkWriteOperation<BucketStateDocument>[] = [];
46
63
 
47
64
  private idLimitBytes: number;
48
65
  private moveBatchLimit: number;
49
66
  private moveBatchQueryLimit: number;
50
67
  private clearBatchLimit: number;
51
- private maxOpId: bigint | undefined;
68
+ private maxOpId: bigint;
52
69
  private buckets: string[] | undefined;
70
+ private signal?: AbortSignal;
53
71
 
54
72
  constructor(
55
73
  private db: PowerSyncMongo,
@@ -60,8 +78,9 @@ export class MongoCompactor {
60
78
  this.moveBatchLimit = options?.moveBatchLimit ?? DEFAULT_MOVE_BATCH_LIMIT;
61
79
  this.moveBatchQueryLimit = options?.moveBatchQueryLimit ?? DEFAULT_MOVE_BATCH_QUERY_LIMIT;
62
80
  this.clearBatchLimit = options?.clearBatchLimit ?? DEFAULT_CLEAR_BATCH_LIMIT;
63
- this.maxOpId = options?.maxOpId;
81
+ this.maxOpId = options?.maxOpId ?? 0n;
64
82
  this.buckets = options?.compactBuckets;
83
+ this.signal = options?.signal;
65
84
  }
66
85
 
67
86
  /**
@@ -117,31 +136,33 @@ export class MongoCompactor {
117
136
  o: new mongo.MaxKey() as any
118
137
  };
119
138
 
120
- while (true) {
139
+ while (!this.signal?.aborted) {
121
140
  // Query one batch at a time, to avoid cursor timeouts
122
- const batch = await this.db.bucket_data
123
- .find(
124
- {
141
+ const cursor = this.db.bucket_data.aggregate<BucketDataDocument & { size: number | bigint }>([
142
+ {
143
+ $match: {
125
144
  _id: {
126
145
  $gte: lowerBound,
127
146
  $lt: upperBound
128
147
  }
129
- },
130
- {
131
- projection: {
132
- _id: 1,
133
- op: 1,
134
- table: 1,
135
- row_id: 1,
136
- source_table: 1,
137
- source_key: 1
138
- },
139
- limit: this.moveBatchQueryLimit,
140
- sort: { _id: -1 },
141
- singleBatch: true
142
148
  }
143
- )
144
- .toArray();
149
+ },
150
+ { $sort: { _id: -1 } },
151
+ { $limit: this.moveBatchQueryLimit },
152
+ {
153
+ $project: {
154
+ _id: 1,
155
+ op: 1,
156
+ table: 1,
157
+ row_id: 1,
158
+ source_table: 1,
159
+ source_key: 1,
160
+ checksum: 1,
161
+ size: { $bsonSize: '$$ROOT' }
162
+ }
163
+ }
164
+ ]);
165
+ const { data: batch } = await readSingleBatch(cursor);
145
166
 
146
167
  if (batch.length == 0) {
147
168
  // We've reached the end
@@ -153,34 +174,47 @@ export class MongoCompactor {
153
174
 
154
175
  for (let doc of batch) {
155
176
  if (currentState == null || doc._id.b != currentState.bucket) {
156
- if (currentState != null && currentState.lastNotPut != null && currentState.opsSincePut >= 1) {
157
- // Important to flush before clearBucket()
158
- await this.flush();
159
- logger.info(
160
- `Inserting CLEAR at ${this.group_id}:${currentState.bucket}:${currentState.lastNotPut} to remove ${currentState.opsSincePut} operations`
161
- );
177
+ if (currentState != null) {
178
+ if (currentState.lastNotPut != null && currentState.opsSincePut >= 1) {
179
+ // Important to flush before clearBucket()
180
+ // Does not have to happen before flushBucketChecksums()
181
+ await this.flush();
182
+ logger.info(
183
+ `Inserting CLEAR at ${this.group_id}:${currentState.bucket}:${currentState.lastNotPut} to remove ${currentState.opsSincePut} operations`
184
+ );
185
+
186
+ // Free memory before clearing bucket
187
+ currentState!.seen.clear();
188
+
189
+ await this.clearBucket(currentState);
190
+ }
162
191
 
163
- const bucket = currentState.bucket;
164
- const clearOp = currentState.lastNotPut;
165
- // Free memory before clearing bucket
166
- currentState = null;
167
- await this.clearBucket(bucket, clearOp);
192
+ // Should happen after clearBucket() for accurate stats
193
+ this.updateBucketChecksums(currentState);
168
194
  }
169
195
  currentState = {
170
196
  bucket: doc._id.b,
171
197
  seen: new Map(),
172
198
  trackingSize: 0,
173
199
  lastNotPut: null,
174
- opsSincePut: 0
200
+ opsSincePut: 0,
201
+
202
+ checksum: 0,
203
+ opCount: 0,
204
+ opBytes: 0
175
205
  };
176
206
  }
177
207
 
178
- if (this.maxOpId != null && doc._id.o > this.maxOpId) {
208
+ if (doc._id.o > this.maxOpId) {
179
209
  continue;
180
210
  }
181
211
 
212
+ currentState.checksum = addChecksums(currentState.checksum, Number(doc.checksum));
213
+ currentState.opCount += 1;
214
+
182
215
  let isPersistentPut = doc.op == 'PUT';
183
216
 
217
+ currentState.opBytes += Number(doc.size);
184
218
  if (doc.op == 'REMOVE' || doc.op == 'PUT') {
185
219
  const key = `${doc.table}/${doc.row_id}/${cacheKey(doc.source_table!, doc.source_key!)}`;
186
220
  const targetOp = currentState.seen.get(key);
@@ -208,6 +242,8 @@ export class MongoCompactor {
208
242
  }
209
243
  }
210
244
  });
245
+
246
+ currentState.opBytes += 200 - Number(doc.size); // TODO: better estimate for this
211
247
  } else {
212
248
  if (currentState.trackingSize >= idLimitBytes) {
213
249
  // Reached memory limit.
@@ -234,24 +270,72 @@ export class MongoCompactor {
234
270
  currentState.opsSincePut += 1;
235
271
  }
236
272
 
237
- if (this.updates.length >= this.moveBatchLimit) {
273
+ if (this.updates.length + this.bucketStateUpdates.length >= this.moveBatchLimit) {
238
274
  await this.flush();
239
275
  }
240
276
  }
241
277
  }
242
278
 
243
- await this.flush();
244
279
  currentState?.seen.clear();
245
280
  if (currentState?.lastNotPut != null && currentState?.opsSincePut > 1) {
246
281
  logger.info(
247
282
  `Inserting CLEAR at ${this.group_id}:${currentState.bucket}:${currentState.lastNotPut} to remove ${currentState.opsSincePut} operations`
248
283
  );
249
- const bucket = currentState.bucket;
250
- const clearOp = currentState.lastNotPut;
251
- // Free memory before clearing bucket
252
- currentState = null;
253
- await this.clearBucket(bucket, clearOp);
284
+ // Need flush() before clear()
285
+ await this.flush();
286
+ await this.clearBucket(currentState);
254
287
  }
288
+ if (currentState != null) {
289
+ // Do this _after_ clearBucket so that we have accurate counts.
290
+ this.updateBucketChecksums(currentState);
291
+ }
292
+ // Need another flush after updateBucketChecksums()
293
+ await this.flush();
294
+ }
295
+
296
+ /**
297
+ * Call when done with a bucket.
298
+ */
299
+ private updateBucketChecksums(state: CurrentBucketState) {
300
+ if (state.opCount < 0) {
301
+ throw new ServiceAssertionError(
302
+ `Invalid opCount: ${state.opCount} checksum ${state.checksum} opsSincePut: ${state.opsSincePut} maxOpId: ${this.maxOpId}`
303
+ );
304
+ }
305
+ this.bucketStateUpdates.push({
306
+ updateOne: {
307
+ filter: {
308
+ _id: {
309
+ g: this.group_id,
310
+ b: state.bucket
311
+ }
312
+ },
313
+ update: {
314
+ $set: {
315
+ compacted_state: {
316
+ op_id: this.maxOpId,
317
+ count: state.opCount,
318
+ checksum: BigInt(state.checksum),
319
+ bytes: state.opBytes
320
+ },
321
+ estimate_since_compact: {
322
+ // Note: There could have been a whole bunch of new operations added to the bucket _while_ compacting,
323
+ // which we don't currently cater for.
324
+ // We could potentially query for that, but that could add overhead.
325
+ count: 0,
326
+ bytes: 0
327
+ }
328
+ },
329
+ $setOnInsert: {
330
+ // Only set this if we're creating the document.
331
+ // In all other cases, the replication process will have a set a more accurate id.
332
+ last_op: this.maxOpId
333
+ }
334
+ },
335
+ // We generally expect this to have been created before, but do handle cases of old unchanged buckets
336
+ upsert: true
337
+ }
338
+ });
255
339
  }
256
340
 
257
341
  private async flush() {
@@ -266,15 +350,26 @@ export class MongoCompactor {
266
350
  });
267
351
  this.updates = [];
268
352
  }
353
+ if (this.bucketStateUpdates.length > 0) {
354
+ logger.info(`Updating ${this.bucketStateUpdates.length} bucket states`);
355
+ await this.db.bucket_state.bulkWrite(this.bucketStateUpdates, {
356
+ ordered: false
357
+ });
358
+ this.bucketStateUpdates = [];
359
+ }
269
360
  }
270
361
 
271
362
  /**
272
363
  * Perform a CLEAR compact for a bucket.
273
364
  *
365
+ *
274
366
  * @param bucket bucket name
275
367
  * @param op op_id of the last non-PUT operation, which will be converted to CLEAR.
276
368
  */
277
- private async clearBucket(bucket: string, op: InternalOpId) {
369
+ private async clearBucket(currentState: CurrentBucketState) {
370
+ const bucket = currentState.bucket;
371
+ const clearOp = currentState.lastNotPut!;
372
+
278
373
  const opFilter = {
279
374
  _id: {
280
375
  $gte: {
@@ -285,7 +380,7 @@ export class MongoCompactor {
285
380
  $lte: {
286
381
  g: this.group_id,
287
382
  b: bucket,
288
- o: op
383
+ o: clearOp
289
384
  }
290
385
  }
291
386
  };
@@ -293,7 +388,8 @@ export class MongoCompactor {
293
388
  const session = this.db.client.startSession();
294
389
  try {
295
390
  let done = false;
296
- while (!done) {
391
+ while (!done && !this.signal?.aborted) {
392
+ let opCountDiff = 0;
297
393
  // Do the CLEAR operation in batches, with each batch a separate transaction.
298
394
  // The state after each batch is fully consistent.
299
395
  // We need a transaction per batch to make sure checksums stay consistent.
@@ -364,12 +460,16 @@ export class MongoCompactor {
364
460
  },
365
461
  { session }
366
462
  );
463
+
464
+ opCountDiff = -numberOfOpsToClear + 1;
367
465
  },
368
466
  {
369
467
  writeConcern: { w: 'majority' },
370
468
  readConcern: { level: 'snapshot' }
371
469
  }
372
470
  );
471
+ // Update _outside_ the transaction, since the transaction can be retried multiple times.
472
+ currentState.opCount += opCountDiff;
373
473
  }
374
474
  } finally {
375
475
  await session.endSession();
@@ -0,0 +1,105 @@
1
+ import { logger } from '@powersync/lib-services-framework';
2
+ import { bson, CompactOptions, InternalOpId } from '@powersync/service-core';
3
+ import { LRUCache } from 'lru-cache';
4
+ import { PowerSyncMongo } from './db.js';
5
+ import { mongo } from '@powersync/lib-service-mongodb';
6
+ import { BucketParameterDocument } from './models.js';
7
+
8
+ /**
9
+ * Compacts parameter lookup data (the bucket_parameters collection).
10
+ *
11
+ * This scans through the entire collection to find data to compact.
12
+ *
13
+ * For background, see the `/docs/parameters-lookups.md` file.
14
+ */
15
+ export class MongoParameterCompactor {
16
+ constructor(
17
+ private db: PowerSyncMongo,
18
+ private group_id: number,
19
+ private checkpoint: InternalOpId,
20
+ private options: CompactOptions
21
+ ) {}
22
+
23
+ async compact() {
24
+ logger.info(`Compacting parameters for group ${this.group_id} up to checkpoint ${this.checkpoint}`);
25
+ // This is the currently-active checkpoint.
26
+ // We do not remove any data that may be used by this checkpoint.
27
+ // snapshot queries ensure that if any clients are still using older checkpoints, they would
28
+ // not be affected by this compaction.
29
+ const checkpoint = this.checkpoint;
30
+
31
+ // Index on {'key.g': 1, lookup: 1, _id: 1}
32
+ // In theory, we could let MongoDB do more of the work here, by grouping by (key, lookup)
33
+ // in MongoDB already. However, that risks running into cases where MongoDB needs to process
34
+ // very large amounts of data before returning results, which could lead to timeouts.
35
+ const cursor = this.db.bucket_parameters.find(
36
+ {
37
+ 'key.g': this.group_id
38
+ },
39
+ {
40
+ sort: { lookup: 1, _id: 1 },
41
+ batchSize: 10_000,
42
+ projection: { _id: 1, key: 1, lookup: 1, bucket_parameters: 1 }
43
+ }
44
+ );
45
+
46
+ // The index doesn't cover sorting by key, so we keep our own cache of the last seen key.
47
+ let lastByKey = new LRUCache<string, InternalOpId>({
48
+ max: this.options.compactParameterCacheLimit ?? 10_000
49
+ });
50
+ let removeIds: InternalOpId[] = [];
51
+ let removeDeleted: mongo.AnyBulkWriteOperation<BucketParameterDocument>[] = [];
52
+
53
+ const flush = async (force: boolean) => {
54
+ if (removeIds.length >= 1000 || (force && removeIds.length > 0)) {
55
+ const results = await this.db.bucket_parameters.deleteMany({ _id: { $in: removeIds } });
56
+ logger.info(`Removed ${results.deletedCount} (${removeIds.length}) superseded parameter entries`);
57
+ removeIds = [];
58
+ }
59
+
60
+ if (removeDeleted.length > 10 || (force && removeDeleted.length > 0)) {
61
+ const results = await this.db.bucket_parameters.bulkWrite(removeDeleted);
62
+ logger.info(`Removed ${results.deletedCount} (${removeDeleted.length}) deleted parameter entries`);
63
+ removeDeleted = [];
64
+ }
65
+ };
66
+
67
+ while (await cursor.hasNext()) {
68
+ const batch = cursor.readBufferedDocuments();
69
+ for (let doc of batch) {
70
+ if (doc._id >= checkpoint) {
71
+ continue;
72
+ }
73
+ const uniqueKey = (
74
+ bson.serialize({
75
+ k: doc.key,
76
+ l: doc.lookup
77
+ }) as Buffer
78
+ ).toString('base64');
79
+ const previous = lastByKey.get(uniqueKey);
80
+ if (previous != null && previous < doc._id) {
81
+ // We have a newer entry for the same key, so we can remove the old one.
82
+ removeIds.push(previous);
83
+ }
84
+ lastByKey.set(uniqueKey, doc._id);
85
+
86
+ if (doc.bucket_parameters?.length == 0) {
87
+ // This is a delete operation, so we can remove it completely.
88
+ // For this we cannot remove the operation itself only: There is a possibility that
89
+ // there is still an earlier operation with the same key and lookup, that we don't have
90
+ // in the cache due to cache size limits. So we need to explicitly remove all earlier operations.
91
+ removeDeleted.push({
92
+ deleteMany: {
93
+ filter: { 'key.g': doc.key.g, lookup: doc.lookup, _id: { $lte: doc._id }, key: doc.key }
94
+ }
95
+ });
96
+ }
97
+ }
98
+
99
+ await flush(false);
100
+ }
101
+
102
+ await flush(true);
103
+ logger.info('Parameter compaction completed');
104
+ }
105
+ }