@powersync/service-module-mongodb-storage 0.11.0 → 0.12.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +44 -0
- package/LICENSE +3 -3
- package/dist/storage/implementation/MongoBucketBatch.d.ts +1 -1
- package/dist/storage/implementation/MongoBucketBatch.js +7 -4
- package/dist/storage/implementation/MongoBucketBatch.js.map +1 -1
- package/dist/storage/implementation/MongoChecksums.d.ts +34 -0
- package/dist/storage/implementation/MongoChecksums.js +274 -0
- package/dist/storage/implementation/MongoChecksums.js.map +1 -0
- package/dist/storage/implementation/MongoCompactor.d.ts +16 -2
- package/dist/storage/implementation/MongoCompactor.js +204 -51
- package/dist/storage/implementation/MongoCompactor.js.map +1 -1
- package/dist/storage/implementation/MongoSyncBucketStorage.d.ts +5 -3
- package/dist/storage/implementation/MongoSyncBucketStorage.js +29 -70
- package/dist/storage/implementation/MongoSyncBucketStorage.js.map +1 -1
- package/dist/storage/implementation/PersistedBatch.d.ts +1 -0
- package/dist/storage/implementation/PersistedBatch.js +12 -5
- package/dist/storage/implementation/PersistedBatch.js.map +1 -1
- package/dist/storage/implementation/models.d.ts +14 -0
- package/dist/storage/implementation/util.d.ts +1 -1
- package/dist/storage/implementation/util.js.map +1 -1
- package/package.json +9 -9
- package/src/storage/implementation/MongoBucketBatch.ts +8 -6
- package/src/storage/implementation/MongoChecksums.ts +320 -0
- package/src/storage/implementation/MongoCompactor.ts +239 -48
- package/src/storage/implementation/MongoSyncBucketStorage.ts +33 -78
- package/src/storage/implementation/PersistedBatch.ts +13 -5
- package/src/storage/implementation/models.ts +15 -0
- package/src/storage/implementation/util.ts +2 -2
- package/test/src/__snapshots__/storage_sync.test.ts.snap +319 -11
- package/tsconfig.tsbuildinfo +1 -1
|
@@ -1,10 +1,12 @@
|
|
|
1
|
-
import { mongo } from '@powersync/lib-service-mongodb';
|
|
2
|
-
import { logger, ReplicationAssertionError } from '@powersync/lib-services-framework';
|
|
3
|
-
import { InternalOpId, storage, utils } from '@powersync/service-core';
|
|
1
|
+
import { mongo, MONGO_OPERATION_TIMEOUT_MS } from '@powersync/lib-service-mongodb';
|
|
2
|
+
import { logger, ReplicationAssertionError, ServiceAssertionError } from '@powersync/lib-services-framework';
|
|
3
|
+
import { addChecksums, InternalOpId, isPartialChecksum, storage, utils } from '@powersync/service-core';
|
|
4
4
|
|
|
5
5
|
import { PowerSyncMongo } from './db.js';
|
|
6
|
-
import { BucketDataDocument, BucketDataKey } from './models.js';
|
|
6
|
+
import { BucketDataDocument, BucketDataKey, BucketStateDocument } from './models.js';
|
|
7
|
+
import { MongoSyncBucketStorage } from './MongoSyncBucketStorage.js';
|
|
7
8
|
import { cacheKey } from './OperationBatch.js';
|
|
9
|
+
import { readSingleBatch } from './util.js';
|
|
8
10
|
|
|
9
11
|
interface CurrentBucketState {
|
|
10
12
|
/** Bucket name */
|
|
@@ -27,6 +29,21 @@ interface CurrentBucketState {
|
|
|
27
29
|
* Number of REMOVE/MOVE operations seen since lastNotPut.
|
|
28
30
|
*/
|
|
29
31
|
opsSincePut: number;
|
|
32
|
+
|
|
33
|
+
/**
|
|
34
|
+
* Incrementally-updated checksum, up to maxOpId
|
|
35
|
+
*/
|
|
36
|
+
checksum: number;
|
|
37
|
+
|
|
38
|
+
/**
|
|
39
|
+
* op count for the checksum
|
|
40
|
+
*/
|
|
41
|
+
opCount: number;
|
|
42
|
+
|
|
43
|
+
/**
|
|
44
|
+
* Byte size of ops covered by the checksum.
|
|
45
|
+
*/
|
|
46
|
+
opBytes: number;
|
|
30
47
|
}
|
|
31
48
|
|
|
32
49
|
/**
|
|
@@ -43,25 +60,30 @@ const DEFAULT_MEMORY_LIMIT_MB = 64;
|
|
|
43
60
|
|
|
44
61
|
export class MongoCompactor {
|
|
45
62
|
private updates: mongo.AnyBulkWriteOperation<BucketDataDocument>[] = [];
|
|
63
|
+
private bucketStateUpdates: mongo.AnyBulkWriteOperation<BucketStateDocument>[] = [];
|
|
46
64
|
|
|
47
65
|
private idLimitBytes: number;
|
|
48
66
|
private moveBatchLimit: number;
|
|
49
67
|
private moveBatchQueryLimit: number;
|
|
50
68
|
private clearBatchLimit: number;
|
|
51
|
-
private maxOpId: bigint
|
|
69
|
+
private maxOpId: bigint;
|
|
52
70
|
private buckets: string[] | undefined;
|
|
71
|
+
private signal?: AbortSignal;
|
|
72
|
+
private group_id: number;
|
|
53
73
|
|
|
54
74
|
constructor(
|
|
75
|
+
private storage: MongoSyncBucketStorage,
|
|
55
76
|
private db: PowerSyncMongo,
|
|
56
|
-
private group_id: number,
|
|
57
77
|
options?: MongoCompactOptions
|
|
58
78
|
) {
|
|
79
|
+
this.group_id = storage.group_id;
|
|
59
80
|
this.idLimitBytes = (options?.memoryLimitMB ?? DEFAULT_MEMORY_LIMIT_MB) * 1024 * 1024;
|
|
60
81
|
this.moveBatchLimit = options?.moveBatchLimit ?? DEFAULT_MOVE_BATCH_LIMIT;
|
|
61
82
|
this.moveBatchQueryLimit = options?.moveBatchQueryLimit ?? DEFAULT_MOVE_BATCH_QUERY_LIMIT;
|
|
62
83
|
this.clearBatchLimit = options?.clearBatchLimit ?? DEFAULT_CLEAR_BATCH_LIMIT;
|
|
63
|
-
this.maxOpId = options?.maxOpId;
|
|
84
|
+
this.maxOpId = options?.maxOpId ?? 0n;
|
|
64
85
|
this.buckets = options?.compactBuckets;
|
|
86
|
+
this.signal = options?.signal;
|
|
65
87
|
}
|
|
66
88
|
|
|
67
89
|
/**
|
|
@@ -117,31 +139,57 @@ export class MongoCompactor {
|
|
|
117
139
|
o: new mongo.MaxKey() as any
|
|
118
140
|
};
|
|
119
141
|
|
|
120
|
-
|
|
142
|
+
const doneWithBucket = async () => {
|
|
143
|
+
if (currentState == null) {
|
|
144
|
+
return;
|
|
145
|
+
}
|
|
146
|
+
// Free memory before clearing bucket
|
|
147
|
+
currentState.seen.clear();
|
|
148
|
+
if (currentState.lastNotPut != null && currentState.opsSincePut >= 1) {
|
|
149
|
+
logger.info(
|
|
150
|
+
`Inserting CLEAR at ${this.group_id}:${currentState.bucket}:${currentState.lastNotPut} to remove ${currentState.opsSincePut} operations`
|
|
151
|
+
);
|
|
152
|
+
// Need flush() before clear()
|
|
153
|
+
await this.flush();
|
|
154
|
+
await this.clearBucket(currentState);
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
// Do this _after_ clearBucket so that we have accurate counts.
|
|
158
|
+
this.updateBucketChecksums(currentState);
|
|
159
|
+
};
|
|
160
|
+
|
|
161
|
+
while (!this.signal?.aborted) {
|
|
121
162
|
// Query one batch at a time, to avoid cursor timeouts
|
|
122
|
-
const
|
|
123
|
-
|
|
163
|
+
const cursor = this.db.bucket_data.aggregate<BucketDataDocument & { size: number | bigint }>(
|
|
164
|
+
[
|
|
124
165
|
{
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
166
|
+
$match: {
|
|
167
|
+
_id: {
|
|
168
|
+
$gte: lowerBound,
|
|
169
|
+
$lt: upperBound
|
|
170
|
+
}
|
|
128
171
|
}
|
|
129
172
|
},
|
|
173
|
+
{ $sort: { _id: -1 } },
|
|
174
|
+
{ $limit: this.moveBatchQueryLimit },
|
|
130
175
|
{
|
|
131
|
-
|
|
176
|
+
$project: {
|
|
132
177
|
_id: 1,
|
|
133
178
|
op: 1,
|
|
134
179
|
table: 1,
|
|
135
180
|
row_id: 1,
|
|
136
181
|
source_table: 1,
|
|
137
|
-
source_key: 1
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
singleBatch: true
|
|
182
|
+
source_key: 1,
|
|
183
|
+
checksum: 1,
|
|
184
|
+
size: { $bsonSize: '$$ROOT' }
|
|
185
|
+
}
|
|
142
186
|
}
|
|
143
|
-
|
|
144
|
-
.
|
|
187
|
+
],
|
|
188
|
+
{ batchSize: this.moveBatchQueryLimit }
|
|
189
|
+
);
|
|
190
|
+
// We don't limit to a single batch here, since that often causes MongoDB to scan through more than it returns.
|
|
191
|
+
// Instead, we load up to the limit.
|
|
192
|
+
const batch = await cursor.toArray();
|
|
145
193
|
|
|
146
194
|
if (batch.length == 0) {
|
|
147
195
|
// We've reached the end
|
|
@@ -153,34 +201,31 @@ export class MongoCompactor {
|
|
|
153
201
|
|
|
154
202
|
for (let doc of batch) {
|
|
155
203
|
if (currentState == null || doc._id.b != currentState.bucket) {
|
|
156
|
-
|
|
157
|
-
// Important to flush before clearBucket()
|
|
158
|
-
await this.flush();
|
|
159
|
-
logger.info(
|
|
160
|
-
`Inserting CLEAR at ${this.group_id}:${currentState.bucket}:${currentState.lastNotPut} to remove ${currentState.opsSincePut} operations`
|
|
161
|
-
);
|
|
204
|
+
await doneWithBucket();
|
|
162
205
|
|
|
163
|
-
const bucket = currentState.bucket;
|
|
164
|
-
const clearOp = currentState.lastNotPut;
|
|
165
|
-
// Free memory before clearing bucket
|
|
166
|
-
currentState = null;
|
|
167
|
-
await this.clearBucket(bucket, clearOp);
|
|
168
|
-
}
|
|
169
206
|
currentState = {
|
|
170
207
|
bucket: doc._id.b,
|
|
171
208
|
seen: new Map(),
|
|
172
209
|
trackingSize: 0,
|
|
173
210
|
lastNotPut: null,
|
|
174
|
-
opsSincePut: 0
|
|
211
|
+
opsSincePut: 0,
|
|
212
|
+
|
|
213
|
+
checksum: 0,
|
|
214
|
+
opCount: 0,
|
|
215
|
+
opBytes: 0
|
|
175
216
|
};
|
|
176
217
|
}
|
|
177
218
|
|
|
178
|
-
if (
|
|
219
|
+
if (doc._id.o > this.maxOpId) {
|
|
179
220
|
continue;
|
|
180
221
|
}
|
|
181
222
|
|
|
223
|
+
currentState.checksum = addChecksums(currentState.checksum, Number(doc.checksum));
|
|
224
|
+
currentState.opCount += 1;
|
|
225
|
+
|
|
182
226
|
let isPersistentPut = doc.op == 'PUT';
|
|
183
227
|
|
|
228
|
+
currentState.opBytes += Number(doc.size);
|
|
184
229
|
if (doc.op == 'REMOVE' || doc.op == 'PUT') {
|
|
185
230
|
const key = `${doc.table}/${doc.row_id}/${cacheKey(doc.source_table!, doc.source_key!)}`;
|
|
186
231
|
const targetOp = currentState.seen.get(key);
|
|
@@ -208,6 +253,8 @@ export class MongoCompactor {
|
|
|
208
253
|
}
|
|
209
254
|
}
|
|
210
255
|
});
|
|
256
|
+
|
|
257
|
+
currentState.opBytes += 200 - Number(doc.size); // TODO: better estimate for this
|
|
211
258
|
} else {
|
|
212
259
|
if (currentState.trackingSize >= idLimitBytes) {
|
|
213
260
|
// Reached memory limit.
|
|
@@ -234,24 +281,65 @@ export class MongoCompactor {
|
|
|
234
281
|
currentState.opsSincePut += 1;
|
|
235
282
|
}
|
|
236
283
|
|
|
237
|
-
if (this.updates.length >= this.moveBatchLimit) {
|
|
284
|
+
if (this.updates.length + this.bucketStateUpdates.length >= this.moveBatchLimit) {
|
|
238
285
|
await this.flush();
|
|
239
286
|
}
|
|
240
287
|
}
|
|
288
|
+
|
|
289
|
+
if (currentState != null) {
|
|
290
|
+
logger.info(`Processed batch of length ${batch.length} current bucket: ${currentState.bucket}`);
|
|
291
|
+
}
|
|
241
292
|
}
|
|
242
293
|
|
|
294
|
+
await doneWithBucket();
|
|
295
|
+
|
|
296
|
+
// Need another flush after updateBucketChecksums()
|
|
243
297
|
await this.flush();
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
/**
|
|
301
|
+
* Call when done with a bucket.
|
|
302
|
+
*/
|
|
303
|
+
private updateBucketChecksums(state: CurrentBucketState) {
|
|
304
|
+
if (state.opCount < 0) {
|
|
305
|
+
throw new ServiceAssertionError(
|
|
306
|
+
`Invalid opCount: ${state.opCount} checksum ${state.checksum} opsSincePut: ${state.opsSincePut} maxOpId: ${this.maxOpId}`
|
|
248
307
|
);
|
|
249
|
-
const bucket = currentState.bucket;
|
|
250
|
-
const clearOp = currentState.lastNotPut;
|
|
251
|
-
// Free memory before clearing bucket
|
|
252
|
-
currentState = null;
|
|
253
|
-
await this.clearBucket(bucket, clearOp);
|
|
254
308
|
}
|
|
309
|
+
this.bucketStateUpdates.push({
|
|
310
|
+
updateOne: {
|
|
311
|
+
filter: {
|
|
312
|
+
_id: {
|
|
313
|
+
g: this.group_id,
|
|
314
|
+
b: state.bucket
|
|
315
|
+
}
|
|
316
|
+
},
|
|
317
|
+
update: {
|
|
318
|
+
$set: {
|
|
319
|
+
compacted_state: {
|
|
320
|
+
op_id: this.maxOpId,
|
|
321
|
+
count: state.opCount,
|
|
322
|
+
checksum: BigInt(state.checksum),
|
|
323
|
+
bytes: state.opBytes
|
|
324
|
+
},
|
|
325
|
+
estimate_since_compact: {
|
|
326
|
+
// Note: There could have been a whole bunch of new operations added to the bucket _while_ compacting,
|
|
327
|
+
// which we don't currently cater for.
|
|
328
|
+
// We could potentially query for that, but that could add overhead.
|
|
329
|
+
count: 0,
|
|
330
|
+
bytes: 0
|
|
331
|
+
}
|
|
332
|
+
},
|
|
333
|
+
$setOnInsert: {
|
|
334
|
+
// Only set this if we're creating the document.
|
|
335
|
+
// In all other cases, the replication process will have a set a more accurate id.
|
|
336
|
+
last_op: this.maxOpId
|
|
337
|
+
}
|
|
338
|
+
},
|
|
339
|
+
// We generally expect this to have been created before, but do handle cases of old unchanged buckets
|
|
340
|
+
upsert: true
|
|
341
|
+
}
|
|
342
|
+
});
|
|
255
343
|
}
|
|
256
344
|
|
|
257
345
|
private async flush() {
|
|
@@ -266,15 +354,26 @@ export class MongoCompactor {
|
|
|
266
354
|
});
|
|
267
355
|
this.updates = [];
|
|
268
356
|
}
|
|
357
|
+
if (this.bucketStateUpdates.length > 0) {
|
|
358
|
+
logger.info(`Updating ${this.bucketStateUpdates.length} bucket states`);
|
|
359
|
+
await this.db.bucket_state.bulkWrite(this.bucketStateUpdates, {
|
|
360
|
+
ordered: false
|
|
361
|
+
});
|
|
362
|
+
this.bucketStateUpdates = [];
|
|
363
|
+
}
|
|
269
364
|
}
|
|
270
365
|
|
|
271
366
|
/**
|
|
272
367
|
* Perform a CLEAR compact for a bucket.
|
|
273
368
|
*
|
|
369
|
+
*
|
|
274
370
|
* @param bucket bucket name
|
|
275
371
|
* @param op op_id of the last non-PUT operation, which will be converted to CLEAR.
|
|
276
372
|
*/
|
|
277
|
-
private async clearBucket(
|
|
373
|
+
private async clearBucket(currentState: CurrentBucketState) {
|
|
374
|
+
const bucket = currentState.bucket;
|
|
375
|
+
const clearOp = currentState.lastNotPut!;
|
|
376
|
+
|
|
278
377
|
const opFilter = {
|
|
279
378
|
_id: {
|
|
280
379
|
$gte: {
|
|
@@ -285,7 +384,7 @@ export class MongoCompactor {
|
|
|
285
384
|
$lte: {
|
|
286
385
|
g: this.group_id,
|
|
287
386
|
b: bucket,
|
|
288
|
-
o:
|
|
387
|
+
o: clearOp
|
|
289
388
|
}
|
|
290
389
|
}
|
|
291
390
|
};
|
|
@@ -293,7 +392,8 @@ export class MongoCompactor {
|
|
|
293
392
|
const session = this.db.client.startSession();
|
|
294
393
|
try {
|
|
295
394
|
let done = false;
|
|
296
|
-
while (!done) {
|
|
395
|
+
while (!done && !this.signal?.aborted) {
|
|
396
|
+
let opCountDiff = 0;
|
|
297
397
|
// Do the CLEAR operation in batches, with each batch a separate transaction.
|
|
298
398
|
// The state after each batch is fully consistent.
|
|
299
399
|
// We need a transaction per batch to make sure checksums stay consistent.
|
|
@@ -364,15 +464,106 @@ export class MongoCompactor {
|
|
|
364
464
|
},
|
|
365
465
|
{ session }
|
|
366
466
|
);
|
|
467
|
+
|
|
468
|
+
opCountDiff = -numberOfOpsToClear + 1;
|
|
367
469
|
},
|
|
368
470
|
{
|
|
369
471
|
writeConcern: { w: 'majority' },
|
|
370
472
|
readConcern: { level: 'snapshot' }
|
|
371
473
|
}
|
|
372
474
|
);
|
|
475
|
+
// Update _outside_ the transaction, since the transaction can be retried multiple times.
|
|
476
|
+
currentState.opCount += opCountDiff;
|
|
373
477
|
}
|
|
374
478
|
} finally {
|
|
375
479
|
await session.endSession();
|
|
376
480
|
}
|
|
377
481
|
}
|
|
482
|
+
|
|
483
|
+
/**
|
|
484
|
+
* Subset of compact, only populating checksums where relevant.
|
|
485
|
+
*/
|
|
486
|
+
async populateChecksums() {
|
|
487
|
+
let lastId: BucketStateDocument['_id'] | null = null;
|
|
488
|
+
while (!this.signal?.aborted) {
|
|
489
|
+
// By filtering buckets, we effectively make this "resumeable".
|
|
490
|
+
let filter: mongo.Filter<BucketStateDocument> = {
|
|
491
|
+
compacted_state: { $exists: false }
|
|
492
|
+
};
|
|
493
|
+
if (lastId) {
|
|
494
|
+
filter._id = { $gt: lastId };
|
|
495
|
+
}
|
|
496
|
+
|
|
497
|
+
const bucketsWithoutChecksums = await this.db.bucket_state
|
|
498
|
+
.find(filter, {
|
|
499
|
+
projection: {
|
|
500
|
+
_id: 1
|
|
501
|
+
},
|
|
502
|
+
sort: {
|
|
503
|
+
_id: 1
|
|
504
|
+
},
|
|
505
|
+
limit: 5_000,
|
|
506
|
+
maxTimeMS: MONGO_OPERATION_TIMEOUT_MS
|
|
507
|
+
})
|
|
508
|
+
.toArray();
|
|
509
|
+
if (bucketsWithoutChecksums.length == 0) {
|
|
510
|
+
// All done
|
|
511
|
+
break;
|
|
512
|
+
}
|
|
513
|
+
|
|
514
|
+
logger.info(`Calculating checksums for batch of ${bucketsWithoutChecksums.length} buckets`);
|
|
515
|
+
|
|
516
|
+
await this.updateChecksumsBatch(bucketsWithoutChecksums.map((b) => b._id.b));
|
|
517
|
+
|
|
518
|
+
lastId = bucketsWithoutChecksums[bucketsWithoutChecksums.length - 1]._id;
|
|
519
|
+
}
|
|
520
|
+
}
|
|
521
|
+
|
|
522
|
+
private async updateChecksumsBatch(buckets: string[]) {
|
|
523
|
+
const checksums = await this.storage.checksums.queryPartialChecksums(
|
|
524
|
+
buckets.map((bucket) => {
|
|
525
|
+
return {
|
|
526
|
+
bucket,
|
|
527
|
+
end: this.maxOpId
|
|
528
|
+
};
|
|
529
|
+
})
|
|
530
|
+
);
|
|
531
|
+
|
|
532
|
+
for (let bucketChecksum of checksums.values()) {
|
|
533
|
+
if (isPartialChecksum(bucketChecksum)) {
|
|
534
|
+
// Should never happen since we don't specify `start`
|
|
535
|
+
throw new ServiceAssertionError(`Full checksum expected, got ${JSON.stringify(bucketChecksum)}`);
|
|
536
|
+
}
|
|
537
|
+
|
|
538
|
+
this.bucketStateUpdates.push({
|
|
539
|
+
updateOne: {
|
|
540
|
+
filter: {
|
|
541
|
+
_id: {
|
|
542
|
+
g: this.group_id,
|
|
543
|
+
b: bucketChecksum.bucket
|
|
544
|
+
}
|
|
545
|
+
},
|
|
546
|
+
update: {
|
|
547
|
+
$set: {
|
|
548
|
+
compacted_state: {
|
|
549
|
+
op_id: this.maxOpId,
|
|
550
|
+
count: bucketChecksum.count,
|
|
551
|
+
checksum: BigInt(bucketChecksum.checksum),
|
|
552
|
+
bytes: null
|
|
553
|
+
}
|
|
554
|
+
},
|
|
555
|
+
$setOnInsert: {
|
|
556
|
+
// Only set this if we're creating the document.
|
|
557
|
+
// In all other cases, the replication process will have a set a more accurate id.
|
|
558
|
+
last_op: this.maxOpId
|
|
559
|
+
}
|
|
560
|
+
},
|
|
561
|
+
// We generally expect this to have been created before, but do handle cases of old unchanged buckets
|
|
562
|
+
upsert: true
|
|
563
|
+
}
|
|
564
|
+
});
|
|
565
|
+
}
|
|
566
|
+
|
|
567
|
+
await this.flush();
|
|
568
|
+
}
|
|
378
569
|
}
|
|
@@ -10,6 +10,7 @@ import {
|
|
|
10
10
|
BroadcastIterable,
|
|
11
11
|
CHECKPOINT_INVALIDATE_ALL,
|
|
12
12
|
CheckpointChanges,
|
|
13
|
+
CompactOptions,
|
|
13
14
|
deserializeParameterLookup,
|
|
14
15
|
GetCheckpointChangesOptions,
|
|
15
16
|
InternalOpId,
|
|
@@ -30,21 +31,18 @@ import { MongoBucketStorage } from '../MongoBucketStorage.js';
|
|
|
30
31
|
import { PowerSyncMongo } from './db.js';
|
|
31
32
|
import { BucketDataDocument, BucketDataKey, BucketStateDocument, SourceKey, SourceTableDocument } from './models.js';
|
|
32
33
|
import { MongoBucketBatch } from './MongoBucketBatch.js';
|
|
34
|
+
import { MongoChecksums } from './MongoChecksums.js';
|
|
33
35
|
import { MongoCompactor } from './MongoCompactor.js';
|
|
36
|
+
import { MongoParameterCompactor } from './MongoParameterCompactor.js';
|
|
34
37
|
import { MongoWriteCheckpointAPI } from './MongoWriteCheckpointAPI.js';
|
|
35
38
|
import { idPrefixFilter, mapOpEntry, readSingleBatch, setSessionSnapshotTime } from './util.js';
|
|
36
|
-
import { MongoParameterCompactor } from './MongoParameterCompactor.js';
|
|
37
39
|
|
|
38
40
|
export class MongoSyncBucketStorage
|
|
39
41
|
extends BaseObserver<storage.SyncRulesBucketStorageListener>
|
|
40
42
|
implements storage.SyncRulesBucketStorage
|
|
41
43
|
{
|
|
42
44
|
private readonly db: PowerSyncMongo;
|
|
43
|
-
|
|
44
|
-
fetchChecksums: (batch) => {
|
|
45
|
-
return this.getChecksumsInternal(batch);
|
|
46
|
-
}
|
|
47
|
-
});
|
|
45
|
+
readonly checksums: MongoChecksums;
|
|
48
46
|
|
|
49
47
|
private parsedSyncRulesCache: { parsed: SqlSyncRules; options: storage.ParseSyncRulesOptions } | undefined;
|
|
50
48
|
private writeCheckpointAPI: MongoWriteCheckpointAPI;
|
|
@@ -58,6 +56,7 @@ export class MongoSyncBucketStorage
|
|
|
58
56
|
) {
|
|
59
57
|
super();
|
|
60
58
|
this.db = factory.db;
|
|
59
|
+
this.checksums = new MongoChecksums(this.db, this.group_id);
|
|
61
60
|
this.writeCheckpointAPI = new MongoWriteCheckpointAPI({
|
|
62
61
|
db: this.db,
|
|
63
62
|
mode: writeCheckpointMode,
|
|
@@ -487,76 +486,11 @@ export class MongoSyncBucketStorage
|
|
|
487
486
|
}
|
|
488
487
|
|
|
489
488
|
async getChecksums(checkpoint: utils.InternalOpId, buckets: string[]): Promise<utils.ChecksumMap> {
|
|
490
|
-
return this.
|
|
489
|
+
return this.checksums.getChecksums(checkpoint, buckets);
|
|
491
490
|
}
|
|
492
491
|
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
return new Map();
|
|
496
|
-
}
|
|
497
|
-
|
|
498
|
-
const filters: any[] = [];
|
|
499
|
-
for (let request of batch) {
|
|
500
|
-
filters.push({
|
|
501
|
-
_id: {
|
|
502
|
-
$gt: {
|
|
503
|
-
g: this.group_id,
|
|
504
|
-
b: request.bucket,
|
|
505
|
-
o: request.start ? BigInt(request.start) : new bson.MinKey()
|
|
506
|
-
},
|
|
507
|
-
$lte: {
|
|
508
|
-
g: this.group_id,
|
|
509
|
-
b: request.bucket,
|
|
510
|
-
o: BigInt(request.end)
|
|
511
|
-
}
|
|
512
|
-
}
|
|
513
|
-
});
|
|
514
|
-
}
|
|
515
|
-
|
|
516
|
-
const aggregate = await this.db.bucket_data
|
|
517
|
-
.aggregate(
|
|
518
|
-
[
|
|
519
|
-
{
|
|
520
|
-
$match: {
|
|
521
|
-
$or: filters
|
|
522
|
-
}
|
|
523
|
-
},
|
|
524
|
-
{
|
|
525
|
-
$group: {
|
|
526
|
-
_id: '$_id.b',
|
|
527
|
-
// Historically, checksum may be stored as 'int' or 'double'.
|
|
528
|
-
// More recently, this should be a 'long'.
|
|
529
|
-
// $toLong ensures that we always sum it as a long, avoiding inaccuracies in the calculations.
|
|
530
|
-
checksum_total: { $sum: { $toLong: '$checksum' } },
|
|
531
|
-
count: { $sum: 1 },
|
|
532
|
-
has_clear_op: {
|
|
533
|
-
$max: {
|
|
534
|
-
$cond: [{ $eq: ['$op', 'CLEAR'] }, 1, 0]
|
|
535
|
-
}
|
|
536
|
-
}
|
|
537
|
-
}
|
|
538
|
-
}
|
|
539
|
-
],
|
|
540
|
-
{ session: undefined, readConcern: 'snapshot', maxTimeMS: lib_mongo.db.MONGO_OPERATION_TIMEOUT_MS }
|
|
541
|
-
)
|
|
542
|
-
.toArray()
|
|
543
|
-
.catch((e) => {
|
|
544
|
-
throw lib_mongo.mapQueryError(e, 'while reading checksums');
|
|
545
|
-
});
|
|
546
|
-
|
|
547
|
-
return new Map<string, storage.PartialChecksum>(
|
|
548
|
-
aggregate.map((doc) => {
|
|
549
|
-
return [
|
|
550
|
-
doc._id,
|
|
551
|
-
{
|
|
552
|
-
bucket: doc._id,
|
|
553
|
-
partialCount: doc.count,
|
|
554
|
-
partialChecksum: Number(BigInt(doc.checksum_total) & 0xffffffffn) & 0xffffffff,
|
|
555
|
-
isFullChecksum: doc.has_clear_op == 1
|
|
556
|
-
} satisfies storage.PartialChecksum
|
|
557
|
-
];
|
|
558
|
-
})
|
|
559
|
-
);
|
|
492
|
+
clearChecksumCache() {
|
|
493
|
+
this.checksums.clearCache();
|
|
560
494
|
}
|
|
561
495
|
|
|
562
496
|
async terminate(options?: storage.TerminateOptions) {
|
|
@@ -701,13 +635,34 @@ export class MongoSyncBucketStorage
|
|
|
701
635
|
}
|
|
702
636
|
|
|
703
637
|
async compact(options?: storage.CompactOptions) {
|
|
704
|
-
|
|
705
|
-
|
|
706
|
-
|
|
707
|
-
|
|
638
|
+
let maxOpId = options?.maxOpId;
|
|
639
|
+
if (maxOpId == null) {
|
|
640
|
+
const checkpoint = await this.getCheckpointInternal();
|
|
641
|
+
maxOpId = checkpoint?.checkpoint ?? undefined;
|
|
642
|
+
}
|
|
643
|
+
await new MongoCompactor(this, this.db, { ...options, maxOpId }).compact();
|
|
644
|
+
|
|
645
|
+
if (maxOpId != null && options?.compactParameterData) {
|
|
646
|
+
await new MongoParameterCompactor(this.db, this.group_id, maxOpId, options).compact();
|
|
708
647
|
}
|
|
709
648
|
}
|
|
710
649
|
|
|
650
|
+
async populatePersistentChecksumCache(options: Required<Pick<CompactOptions, 'signal' | 'maxOpId'>>): Promise<void> {
|
|
651
|
+
logger.info(`Populating persistent checksum cache...`);
|
|
652
|
+
const start = Date.now();
|
|
653
|
+
// We do a minimal compact here.
|
|
654
|
+
// We can optimize this in the future.
|
|
655
|
+
const compactor = new MongoCompactor(this, this.db, {
|
|
656
|
+
...options,
|
|
657
|
+
// Don't track updates for MOVE compacting
|
|
658
|
+
memoryLimitMB: 0
|
|
659
|
+
});
|
|
660
|
+
|
|
661
|
+
await compactor.populateChecksums();
|
|
662
|
+
const duration = Date.now() - start;
|
|
663
|
+
logger.info(`Populated persistent checksum cache in ${(duration / 1000).toFixed(1)}s`);
|
|
664
|
+
}
|
|
665
|
+
|
|
711
666
|
/**
|
|
712
667
|
* Instance-wide watch on the latest available checkpoint (op_id + lsn).
|
|
713
668
|
*/
|
|
@@ -71,15 +71,17 @@ export class PersistedBatch {
|
|
|
71
71
|
this.logger = options?.logger ?? defaultLogger;
|
|
72
72
|
}
|
|
73
73
|
|
|
74
|
-
private incrementBucket(bucket: string, op_id: InternalOpId) {
|
|
74
|
+
private incrementBucket(bucket: string, op_id: InternalOpId, bytes: number) {
|
|
75
75
|
let existingState = this.bucketStates.get(bucket);
|
|
76
76
|
if (existingState) {
|
|
77
77
|
existingState.lastOp = op_id;
|
|
78
78
|
existingState.incrementCount += 1;
|
|
79
|
+
existingState.incrementBytes += bytes;
|
|
79
80
|
} else {
|
|
80
81
|
this.bucketStates.set(bucket, {
|
|
81
82
|
lastOp: op_id,
|
|
82
|
-
incrementCount: 1
|
|
83
|
+
incrementCount: 1,
|
|
84
|
+
incrementBytes: bytes
|
|
83
85
|
});
|
|
84
86
|
}
|
|
85
87
|
}
|
|
@@ -115,7 +117,8 @@ export class PersistedBatch {
|
|
|
115
117
|
}
|
|
116
118
|
|
|
117
119
|
remaining_buckets.delete(key);
|
|
118
|
-
|
|
120
|
+
const byteEstimate = recordData.length + 200;
|
|
121
|
+
this.currentSize += byteEstimate;
|
|
119
122
|
|
|
120
123
|
const op_id = options.op_seq.next();
|
|
121
124
|
this.debugLastOpId = op_id;
|
|
@@ -138,7 +141,7 @@ export class PersistedBatch {
|
|
|
138
141
|
}
|
|
139
142
|
}
|
|
140
143
|
});
|
|
141
|
-
this.incrementBucket(k.bucket, op_id);
|
|
144
|
+
this.incrementBucket(k.bucket, op_id, byteEstimate);
|
|
142
145
|
}
|
|
143
146
|
|
|
144
147
|
for (let bd of remaining_buckets.values()) {
|
|
@@ -166,7 +169,7 @@ export class PersistedBatch {
|
|
|
166
169
|
}
|
|
167
170
|
});
|
|
168
171
|
this.currentSize += 200;
|
|
169
|
-
this.incrementBucket(bd.bucket, op_id);
|
|
172
|
+
this.incrementBucket(bd.bucket, op_id, 200);
|
|
170
173
|
}
|
|
171
174
|
}
|
|
172
175
|
|
|
@@ -369,6 +372,10 @@ export class PersistedBatch {
|
|
|
369
372
|
update: {
|
|
370
373
|
$set: {
|
|
371
374
|
last_op: state.lastOp
|
|
375
|
+
},
|
|
376
|
+
$inc: {
|
|
377
|
+
'estimate_since_compact.count': state.incrementCount,
|
|
378
|
+
'estimate_since_compact.bytes': state.incrementBytes
|
|
372
379
|
}
|
|
373
380
|
},
|
|
374
381
|
upsert: true
|
|
@@ -381,4 +388,5 @@ export class PersistedBatch {
|
|
|
381
388
|
interface BucketStateUpdate {
|
|
382
389
|
lastOp: InternalOpId;
|
|
383
390
|
incrementCount: number;
|
|
391
|
+
incrementBytes: number;
|
|
384
392
|
}
|
|
@@ -98,6 +98,21 @@ export interface BucketStateDocument {
|
|
|
98
98
|
b: string;
|
|
99
99
|
};
|
|
100
100
|
last_op: bigint;
|
|
101
|
+
/**
|
|
102
|
+
* If set, this can be treated as "cache" of a checksum at a specific point.
|
|
103
|
+
* Can be updated periodically, for example by the compact job.
|
|
104
|
+
*/
|
|
105
|
+
compacted_state?: {
|
|
106
|
+
op_id: InternalOpId;
|
|
107
|
+
count: number;
|
|
108
|
+
checksum: bigint;
|
|
109
|
+
bytes: number | null;
|
|
110
|
+
};
|
|
111
|
+
|
|
112
|
+
estimate_since_compact?: {
|
|
113
|
+
count: number;
|
|
114
|
+
bytes: number;
|
|
115
|
+
};
|
|
101
116
|
}
|
|
102
117
|
|
|
103
118
|
export interface IdSequenceDocument {
|
|
@@ -3,7 +3,7 @@ import * as crypto from 'crypto';
|
|
|
3
3
|
import * as uuid from 'uuid';
|
|
4
4
|
|
|
5
5
|
import { mongo } from '@powersync/lib-service-mongodb';
|
|
6
|
-
import { storage, utils } from '@powersync/service-core';
|
|
6
|
+
import { BucketChecksum, PartialChecksum, PartialOrFullChecksum, storage, utils } from '@powersync/service-core';
|
|
7
7
|
|
|
8
8
|
import { PowerSyncMongo } from './db.js';
|
|
9
9
|
import { BucketDataDocument } from './models.js';
|
|
@@ -43,7 +43,7 @@ export function generateSlotName(prefix: string, sync_rules_id: number) {
|
|
|
43
43
|
*
|
|
44
44
|
* For this to be effective, set batchSize = limit in the find command.
|
|
45
45
|
*/
|
|
46
|
-
export async function readSingleBatch<T>(cursor: mongo.
|
|
46
|
+
export async function readSingleBatch<T>(cursor: mongo.AbstractCursor<T>): Promise<{ data: T[]; hasMore: boolean }> {
|
|
47
47
|
try {
|
|
48
48
|
let data: T[];
|
|
49
49
|
let hasMore = true;
|