@powersync/service-module-mongodb-storage 0.11.0 → 0.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +33 -0
- package/LICENSE +3 -3
- package/dist/storage/implementation/MongoBucketBatch.d.ts +1 -1
- package/dist/storage/implementation/MongoBucketBatch.js +7 -4
- package/dist/storage/implementation/MongoBucketBatch.js.map +1 -1
- package/dist/storage/implementation/MongoCompactor.d.ts +7 -0
- package/dist/storage/implementation/MongoCompactor.js +122 -44
- package/dist/storage/implementation/MongoCompactor.js.map +1 -1
- package/dist/storage/implementation/MongoSyncBucketStorage.d.ts +3 -1
- package/dist/storage/implementation/MongoSyncBucketStorage.js +94 -17
- package/dist/storage/implementation/MongoSyncBucketStorage.js.map +1 -1
- package/dist/storage/implementation/PersistedBatch.d.ts +1 -0
- package/dist/storage/implementation/PersistedBatch.js +12 -5
- package/dist/storage/implementation/PersistedBatch.js.map +1 -1
- package/dist/storage/implementation/models.d.ts +14 -0
- package/dist/storage/implementation/util.d.ts +1 -1
- package/dist/storage/implementation/util.js.map +1 -1
- package/package.json +9 -9
- package/src/storage/implementation/MongoBucketBatch.ts +8 -6
- package/src/storage/implementation/MongoCompactor.ts +147 -47
- package/src/storage/implementation/MongoSyncBucketStorage.ts +107 -16
- package/src/storage/implementation/PersistedBatch.ts +13 -5
- package/src/storage/implementation/models.ts +15 -0
- package/src/storage/implementation/util.ts +1 -1
- package/test/src/__snapshots__/storage_sync.test.ts.snap +319 -11
- package/tsconfig.tsbuildinfo +1 -1
|
@@ -1,10 +1,11 @@
|
|
|
1
1
|
import { mongo } from '@powersync/lib-service-mongodb';
|
|
2
|
-
import { logger, ReplicationAssertionError } from '@powersync/lib-services-framework';
|
|
3
|
-
import { InternalOpId, storage, utils } from '@powersync/service-core';
|
|
2
|
+
import { logger, ReplicationAssertionError, ServiceAssertionError } from '@powersync/lib-services-framework';
|
|
3
|
+
import { addChecksums, InternalOpId, storage, utils } from '@powersync/service-core';
|
|
4
4
|
|
|
5
5
|
import { PowerSyncMongo } from './db.js';
|
|
6
|
-
import { BucketDataDocument, BucketDataKey } from './models.js';
|
|
6
|
+
import { BucketDataDocument, BucketDataKey, BucketStateDocument } from './models.js';
|
|
7
7
|
import { cacheKey } from './OperationBatch.js';
|
|
8
|
+
import { readSingleBatch } from './util.js';
|
|
8
9
|
|
|
9
10
|
interface CurrentBucketState {
|
|
10
11
|
/** Bucket name */
|
|
@@ -27,6 +28,21 @@ interface CurrentBucketState {
|
|
|
27
28
|
* Number of REMOVE/MOVE operations seen since lastNotPut.
|
|
28
29
|
*/
|
|
29
30
|
opsSincePut: number;
|
|
31
|
+
|
|
32
|
+
/**
|
|
33
|
+
* Incrementally-updated checksum, up to maxOpId
|
|
34
|
+
*/
|
|
35
|
+
checksum: number;
|
|
36
|
+
|
|
37
|
+
/**
|
|
38
|
+
* op count for the checksum
|
|
39
|
+
*/
|
|
40
|
+
opCount: number;
|
|
41
|
+
|
|
42
|
+
/**
|
|
43
|
+
* Byte size of ops covered by the checksum.
|
|
44
|
+
*/
|
|
45
|
+
opBytes: number;
|
|
30
46
|
}
|
|
31
47
|
|
|
32
48
|
/**
|
|
@@ -43,13 +59,15 @@ const DEFAULT_MEMORY_LIMIT_MB = 64;
|
|
|
43
59
|
|
|
44
60
|
export class MongoCompactor {
|
|
45
61
|
private updates: mongo.AnyBulkWriteOperation<BucketDataDocument>[] = [];
|
|
62
|
+
private bucketStateUpdates: mongo.AnyBulkWriteOperation<BucketStateDocument>[] = [];
|
|
46
63
|
|
|
47
64
|
private idLimitBytes: number;
|
|
48
65
|
private moveBatchLimit: number;
|
|
49
66
|
private moveBatchQueryLimit: number;
|
|
50
67
|
private clearBatchLimit: number;
|
|
51
|
-
private maxOpId: bigint
|
|
68
|
+
private maxOpId: bigint;
|
|
52
69
|
private buckets: string[] | undefined;
|
|
70
|
+
private signal?: AbortSignal;
|
|
53
71
|
|
|
54
72
|
constructor(
|
|
55
73
|
private db: PowerSyncMongo,
|
|
@@ -60,8 +78,9 @@ export class MongoCompactor {
|
|
|
60
78
|
this.moveBatchLimit = options?.moveBatchLimit ?? DEFAULT_MOVE_BATCH_LIMIT;
|
|
61
79
|
this.moveBatchQueryLimit = options?.moveBatchQueryLimit ?? DEFAULT_MOVE_BATCH_QUERY_LIMIT;
|
|
62
80
|
this.clearBatchLimit = options?.clearBatchLimit ?? DEFAULT_CLEAR_BATCH_LIMIT;
|
|
63
|
-
this.maxOpId = options?.maxOpId;
|
|
81
|
+
this.maxOpId = options?.maxOpId ?? 0n;
|
|
64
82
|
this.buckets = options?.compactBuckets;
|
|
83
|
+
this.signal = options?.signal;
|
|
65
84
|
}
|
|
66
85
|
|
|
67
86
|
/**
|
|
@@ -117,31 +136,33 @@ export class MongoCompactor {
|
|
|
117
136
|
o: new mongo.MaxKey() as any
|
|
118
137
|
};
|
|
119
138
|
|
|
120
|
-
while (
|
|
139
|
+
while (!this.signal?.aborted) {
|
|
121
140
|
// Query one batch at a time, to avoid cursor timeouts
|
|
122
|
-
const
|
|
123
|
-
|
|
124
|
-
{
|
|
141
|
+
const cursor = this.db.bucket_data.aggregate<BucketDataDocument & { size: number | bigint }>([
|
|
142
|
+
{
|
|
143
|
+
$match: {
|
|
125
144
|
_id: {
|
|
126
145
|
$gte: lowerBound,
|
|
127
146
|
$lt: upperBound
|
|
128
147
|
}
|
|
129
|
-
},
|
|
130
|
-
{
|
|
131
|
-
projection: {
|
|
132
|
-
_id: 1,
|
|
133
|
-
op: 1,
|
|
134
|
-
table: 1,
|
|
135
|
-
row_id: 1,
|
|
136
|
-
source_table: 1,
|
|
137
|
-
source_key: 1
|
|
138
|
-
},
|
|
139
|
-
limit: this.moveBatchQueryLimit,
|
|
140
|
-
sort: { _id: -1 },
|
|
141
|
-
singleBatch: true
|
|
142
148
|
}
|
|
143
|
-
|
|
144
|
-
|
|
149
|
+
},
|
|
150
|
+
{ $sort: { _id: -1 } },
|
|
151
|
+
{ $limit: this.moveBatchQueryLimit },
|
|
152
|
+
{
|
|
153
|
+
$project: {
|
|
154
|
+
_id: 1,
|
|
155
|
+
op: 1,
|
|
156
|
+
table: 1,
|
|
157
|
+
row_id: 1,
|
|
158
|
+
source_table: 1,
|
|
159
|
+
source_key: 1,
|
|
160
|
+
checksum: 1,
|
|
161
|
+
size: { $bsonSize: '$$ROOT' }
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
]);
|
|
165
|
+
const { data: batch } = await readSingleBatch(cursor);
|
|
145
166
|
|
|
146
167
|
if (batch.length == 0) {
|
|
147
168
|
// We've reached the end
|
|
@@ -153,34 +174,47 @@ export class MongoCompactor {
|
|
|
153
174
|
|
|
154
175
|
for (let doc of batch) {
|
|
155
176
|
if (currentState == null || doc._id.b != currentState.bucket) {
|
|
156
|
-
if (currentState != null
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
177
|
+
if (currentState != null) {
|
|
178
|
+
if (currentState.lastNotPut != null && currentState.opsSincePut >= 1) {
|
|
179
|
+
// Important to flush before clearBucket()
|
|
180
|
+
// Does not have to happen before flushBucketChecksums()
|
|
181
|
+
await this.flush();
|
|
182
|
+
logger.info(
|
|
183
|
+
`Inserting CLEAR at ${this.group_id}:${currentState.bucket}:${currentState.lastNotPut} to remove ${currentState.opsSincePut} operations`
|
|
184
|
+
);
|
|
185
|
+
|
|
186
|
+
// Free memory before clearing bucket
|
|
187
|
+
currentState!.seen.clear();
|
|
188
|
+
|
|
189
|
+
await this.clearBucket(currentState);
|
|
190
|
+
}
|
|
162
191
|
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
// Free memory before clearing bucket
|
|
166
|
-
currentState = null;
|
|
167
|
-
await this.clearBucket(bucket, clearOp);
|
|
192
|
+
// Should happen after clearBucket() for accurate stats
|
|
193
|
+
this.updateBucketChecksums(currentState);
|
|
168
194
|
}
|
|
169
195
|
currentState = {
|
|
170
196
|
bucket: doc._id.b,
|
|
171
197
|
seen: new Map(),
|
|
172
198
|
trackingSize: 0,
|
|
173
199
|
lastNotPut: null,
|
|
174
|
-
opsSincePut: 0
|
|
200
|
+
opsSincePut: 0,
|
|
201
|
+
|
|
202
|
+
checksum: 0,
|
|
203
|
+
opCount: 0,
|
|
204
|
+
opBytes: 0
|
|
175
205
|
};
|
|
176
206
|
}
|
|
177
207
|
|
|
178
|
-
if (
|
|
208
|
+
if (doc._id.o > this.maxOpId) {
|
|
179
209
|
continue;
|
|
180
210
|
}
|
|
181
211
|
|
|
212
|
+
currentState.checksum = addChecksums(currentState.checksum, Number(doc.checksum));
|
|
213
|
+
currentState.opCount += 1;
|
|
214
|
+
|
|
182
215
|
let isPersistentPut = doc.op == 'PUT';
|
|
183
216
|
|
|
217
|
+
currentState.opBytes += Number(doc.size);
|
|
184
218
|
if (doc.op == 'REMOVE' || doc.op == 'PUT') {
|
|
185
219
|
const key = `${doc.table}/${doc.row_id}/${cacheKey(doc.source_table!, doc.source_key!)}`;
|
|
186
220
|
const targetOp = currentState.seen.get(key);
|
|
@@ -208,6 +242,8 @@ export class MongoCompactor {
|
|
|
208
242
|
}
|
|
209
243
|
}
|
|
210
244
|
});
|
|
245
|
+
|
|
246
|
+
currentState.opBytes += 200 - Number(doc.size); // TODO: better estimate for this
|
|
211
247
|
} else {
|
|
212
248
|
if (currentState.trackingSize >= idLimitBytes) {
|
|
213
249
|
// Reached memory limit.
|
|
@@ -234,24 +270,72 @@ export class MongoCompactor {
|
|
|
234
270
|
currentState.opsSincePut += 1;
|
|
235
271
|
}
|
|
236
272
|
|
|
237
|
-
if (this.updates.length >= this.moveBatchLimit) {
|
|
273
|
+
if (this.updates.length + this.bucketStateUpdates.length >= this.moveBatchLimit) {
|
|
238
274
|
await this.flush();
|
|
239
275
|
}
|
|
240
276
|
}
|
|
241
277
|
}
|
|
242
278
|
|
|
243
|
-
await this.flush();
|
|
244
279
|
currentState?.seen.clear();
|
|
245
280
|
if (currentState?.lastNotPut != null && currentState?.opsSincePut > 1) {
|
|
246
281
|
logger.info(
|
|
247
282
|
`Inserting CLEAR at ${this.group_id}:${currentState.bucket}:${currentState.lastNotPut} to remove ${currentState.opsSincePut} operations`
|
|
248
283
|
);
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
currentState = null;
|
|
253
|
-
await this.clearBucket(bucket, clearOp);
|
|
284
|
+
// Need flush() before clear()
|
|
285
|
+
await this.flush();
|
|
286
|
+
await this.clearBucket(currentState);
|
|
254
287
|
}
|
|
288
|
+
if (currentState != null) {
|
|
289
|
+
// Do this _after_ clearBucket so that we have accurate counts.
|
|
290
|
+
this.updateBucketChecksums(currentState);
|
|
291
|
+
}
|
|
292
|
+
// Need another flush after updateBucketChecksums()
|
|
293
|
+
await this.flush();
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
/**
|
|
297
|
+
* Call when done with a bucket.
|
|
298
|
+
*/
|
|
299
|
+
private updateBucketChecksums(state: CurrentBucketState) {
|
|
300
|
+
if (state.opCount < 0) {
|
|
301
|
+
throw new ServiceAssertionError(
|
|
302
|
+
`Invalid opCount: ${state.opCount} checksum ${state.checksum} opsSincePut: ${state.opsSincePut} maxOpId: ${this.maxOpId}`
|
|
303
|
+
);
|
|
304
|
+
}
|
|
305
|
+
this.bucketStateUpdates.push({
|
|
306
|
+
updateOne: {
|
|
307
|
+
filter: {
|
|
308
|
+
_id: {
|
|
309
|
+
g: this.group_id,
|
|
310
|
+
b: state.bucket
|
|
311
|
+
}
|
|
312
|
+
},
|
|
313
|
+
update: {
|
|
314
|
+
$set: {
|
|
315
|
+
compacted_state: {
|
|
316
|
+
op_id: this.maxOpId,
|
|
317
|
+
count: state.opCount,
|
|
318
|
+
checksum: BigInt(state.checksum),
|
|
319
|
+
bytes: state.opBytes
|
|
320
|
+
},
|
|
321
|
+
estimate_since_compact: {
|
|
322
|
+
// Note: There could have been a whole bunch of new operations added to the bucket _while_ compacting,
|
|
323
|
+
// which we don't currently cater for.
|
|
324
|
+
// We could potentially query for that, but that could add overhead.
|
|
325
|
+
count: 0,
|
|
326
|
+
bytes: 0
|
|
327
|
+
}
|
|
328
|
+
},
|
|
329
|
+
$setOnInsert: {
|
|
330
|
+
// Only set this if we're creating the document.
|
|
331
|
+
// In all other cases, the replication process will have a set a more accurate id.
|
|
332
|
+
last_op: this.maxOpId
|
|
333
|
+
}
|
|
334
|
+
},
|
|
335
|
+
// We generally expect this to have been created before, but do handle cases of old unchanged buckets
|
|
336
|
+
upsert: true
|
|
337
|
+
}
|
|
338
|
+
});
|
|
255
339
|
}
|
|
256
340
|
|
|
257
341
|
private async flush() {
|
|
@@ -266,15 +350,26 @@ export class MongoCompactor {
|
|
|
266
350
|
});
|
|
267
351
|
this.updates = [];
|
|
268
352
|
}
|
|
353
|
+
if (this.bucketStateUpdates.length > 0) {
|
|
354
|
+
logger.info(`Updating ${this.bucketStateUpdates.length} bucket states`);
|
|
355
|
+
await this.db.bucket_state.bulkWrite(this.bucketStateUpdates, {
|
|
356
|
+
ordered: false
|
|
357
|
+
});
|
|
358
|
+
this.bucketStateUpdates = [];
|
|
359
|
+
}
|
|
269
360
|
}
|
|
270
361
|
|
|
271
362
|
/**
|
|
272
363
|
* Perform a CLEAR compact for a bucket.
|
|
273
364
|
*
|
|
365
|
+
*
|
|
274
366
|
* @param bucket bucket name
|
|
275
367
|
* @param op op_id of the last non-PUT operation, which will be converted to CLEAR.
|
|
276
368
|
*/
|
|
277
|
-
private async clearBucket(
|
|
369
|
+
private async clearBucket(currentState: CurrentBucketState) {
|
|
370
|
+
const bucket = currentState.bucket;
|
|
371
|
+
const clearOp = currentState.lastNotPut!;
|
|
372
|
+
|
|
278
373
|
const opFilter = {
|
|
279
374
|
_id: {
|
|
280
375
|
$gte: {
|
|
@@ -285,7 +380,7 @@ export class MongoCompactor {
|
|
|
285
380
|
$lte: {
|
|
286
381
|
g: this.group_id,
|
|
287
382
|
b: bucket,
|
|
288
|
-
o:
|
|
383
|
+
o: clearOp
|
|
289
384
|
}
|
|
290
385
|
}
|
|
291
386
|
};
|
|
@@ -293,7 +388,8 @@ export class MongoCompactor {
|
|
|
293
388
|
const session = this.db.client.startSession();
|
|
294
389
|
try {
|
|
295
390
|
let done = false;
|
|
296
|
-
while (!done) {
|
|
391
|
+
while (!done && !this.signal?.aborted) {
|
|
392
|
+
let opCountDiff = 0;
|
|
297
393
|
// Do the CLEAR operation in batches, with each batch a separate transaction.
|
|
298
394
|
// The state after each batch is fully consistent.
|
|
299
395
|
// We need a transaction per batch to make sure checksums stay consistent.
|
|
@@ -364,12 +460,16 @@ export class MongoCompactor {
|
|
|
364
460
|
},
|
|
365
461
|
{ session }
|
|
366
462
|
);
|
|
463
|
+
|
|
464
|
+
opCountDiff = -numberOfOpsToClear + 1;
|
|
367
465
|
},
|
|
368
466
|
{
|
|
369
467
|
writeConcern: { w: 'majority' },
|
|
370
468
|
readConcern: { level: 'snapshot' }
|
|
371
469
|
}
|
|
372
470
|
);
|
|
471
|
+
// Update _outside_ the transaction, since the transaction can be retried multiple times.
|
|
472
|
+
currentState.opCount += opCountDiff;
|
|
373
473
|
}
|
|
374
474
|
} finally {
|
|
375
475
|
await session.endSession();
|
|
@@ -7,14 +7,18 @@ import {
|
|
|
7
7
|
ServiceAssertionError
|
|
8
8
|
} from '@powersync/lib-services-framework';
|
|
9
9
|
import {
|
|
10
|
+
addPartialChecksums,
|
|
10
11
|
BroadcastIterable,
|
|
12
|
+
BucketChecksum,
|
|
11
13
|
CHECKPOINT_INVALIDATE_ALL,
|
|
12
14
|
CheckpointChanges,
|
|
15
|
+
CompactOptions,
|
|
13
16
|
deserializeParameterLookup,
|
|
14
17
|
GetCheckpointChangesOptions,
|
|
15
18
|
InternalOpId,
|
|
16
19
|
internalToExternalOpId,
|
|
17
20
|
maxLsn,
|
|
21
|
+
PartialChecksum,
|
|
18
22
|
ProtocolOpId,
|
|
19
23
|
ReplicationCheckpoint,
|
|
20
24
|
storage,
|
|
@@ -31,9 +35,9 @@ import { PowerSyncMongo } from './db.js';
|
|
|
31
35
|
import { BucketDataDocument, BucketDataKey, BucketStateDocument, SourceKey, SourceTableDocument } from './models.js';
|
|
32
36
|
import { MongoBucketBatch } from './MongoBucketBatch.js';
|
|
33
37
|
import { MongoCompactor } from './MongoCompactor.js';
|
|
38
|
+
import { MongoParameterCompactor } from './MongoParameterCompactor.js';
|
|
34
39
|
import { MongoWriteCheckpointAPI } from './MongoWriteCheckpointAPI.js';
|
|
35
40
|
import { idPrefixFilter, mapOpEntry, readSingleBatch, setSessionSnapshotTime } from './util.js';
|
|
36
|
-
import { MongoParameterCompactor } from './MongoParameterCompactor.js';
|
|
37
41
|
|
|
38
42
|
export class MongoSyncBucketStorage
|
|
39
43
|
extends BaseObserver<storage.SyncRulesBucketStorageListener>
|
|
@@ -490,24 +494,71 @@ export class MongoSyncBucketStorage
|
|
|
490
494
|
return this.checksumCache.getChecksumMap(checkpoint, buckets);
|
|
491
495
|
}
|
|
492
496
|
|
|
497
|
+
clearChecksumCache() {
|
|
498
|
+
this.checksumCache.clear();
|
|
499
|
+
}
|
|
500
|
+
|
|
493
501
|
private async getChecksumsInternal(batch: storage.FetchPartialBucketChecksum[]): Promise<storage.PartialChecksumMap> {
|
|
494
502
|
if (batch.length == 0) {
|
|
495
503
|
return new Map();
|
|
496
504
|
}
|
|
497
505
|
|
|
506
|
+
const preFilters: any[] = [];
|
|
507
|
+
for (let request of batch) {
|
|
508
|
+
if (request.start == null) {
|
|
509
|
+
preFilters.push({
|
|
510
|
+
_id: {
|
|
511
|
+
g: this.group_id,
|
|
512
|
+
b: request.bucket
|
|
513
|
+
},
|
|
514
|
+
'compacted_state.op_id': { $exists: true, $lte: request.end }
|
|
515
|
+
});
|
|
516
|
+
}
|
|
517
|
+
}
|
|
518
|
+
|
|
519
|
+
const preStates = new Map<string, { opId: InternalOpId; checksum: BucketChecksum }>();
|
|
520
|
+
|
|
521
|
+
if (preFilters.length > 0) {
|
|
522
|
+
// For un-cached bucket checksums, attempt to use the compacted state first.
|
|
523
|
+
const states = await this.db.bucket_state
|
|
524
|
+
.find({
|
|
525
|
+
$or: preFilters
|
|
526
|
+
})
|
|
527
|
+
.toArray();
|
|
528
|
+
for (let state of states) {
|
|
529
|
+
const compactedState = state.compacted_state!;
|
|
530
|
+
preStates.set(state._id.b, {
|
|
531
|
+
opId: compactedState.op_id,
|
|
532
|
+
checksum: {
|
|
533
|
+
bucket: state._id.b,
|
|
534
|
+
checksum: Number(compactedState.checksum),
|
|
535
|
+
count: compactedState.count
|
|
536
|
+
}
|
|
537
|
+
});
|
|
538
|
+
}
|
|
539
|
+
}
|
|
540
|
+
|
|
498
541
|
const filters: any[] = [];
|
|
499
542
|
for (let request of batch) {
|
|
543
|
+
let start = request.start;
|
|
544
|
+
if (start == null) {
|
|
545
|
+
const preState = preStates.get(request.bucket);
|
|
546
|
+
if (preState != null) {
|
|
547
|
+
start = preState.opId;
|
|
548
|
+
}
|
|
549
|
+
}
|
|
550
|
+
|
|
500
551
|
filters.push({
|
|
501
552
|
_id: {
|
|
502
553
|
$gt: {
|
|
503
554
|
g: this.group_id,
|
|
504
555
|
b: request.bucket,
|
|
505
|
-
o:
|
|
556
|
+
o: start ?? new bson.MinKey()
|
|
506
557
|
},
|
|
507
558
|
$lte: {
|
|
508
559
|
g: this.group_id,
|
|
509
560
|
b: request.bucket,
|
|
510
|
-
o:
|
|
561
|
+
o: request.end
|
|
511
562
|
}
|
|
512
563
|
}
|
|
513
564
|
});
|
|
@@ -537,26 +588,48 @@ export class MongoSyncBucketStorage
|
|
|
537
588
|
}
|
|
538
589
|
}
|
|
539
590
|
],
|
|
540
|
-
{ session: undefined, readConcern: 'snapshot', maxTimeMS: lib_mongo.db.
|
|
591
|
+
{ session: undefined, readConcern: 'snapshot', maxTimeMS: lib_mongo.db.MONGO_CHECKSUM_TIMEOUT_MS }
|
|
541
592
|
)
|
|
542
593
|
.toArray()
|
|
543
594
|
.catch((e) => {
|
|
544
595
|
throw lib_mongo.mapQueryError(e, 'while reading checksums');
|
|
545
596
|
});
|
|
546
597
|
|
|
547
|
-
|
|
598
|
+
const partialChecksums = new Map<string, storage.PartialOrFullChecksum>(
|
|
548
599
|
aggregate.map((doc) => {
|
|
600
|
+
const partialChecksum = Number(BigInt(doc.checksum_total) & 0xffffffffn) & 0xffffffff;
|
|
601
|
+
const bucket = doc._id;
|
|
549
602
|
return [
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
603
|
+
bucket,
|
|
604
|
+
doc.has_clear_op == 1
|
|
605
|
+
? ({
|
|
606
|
+
// full checksum - replaces any previous one
|
|
607
|
+
bucket,
|
|
608
|
+
checksum: partialChecksum,
|
|
609
|
+
count: doc.count
|
|
610
|
+
} satisfies BucketChecksum)
|
|
611
|
+
: ({
|
|
612
|
+
// partial checksum - is added to a previous one
|
|
613
|
+
bucket,
|
|
614
|
+
partialCount: doc.count,
|
|
615
|
+
partialChecksum
|
|
616
|
+
} satisfies PartialChecksum)
|
|
557
617
|
];
|
|
558
618
|
})
|
|
559
619
|
);
|
|
620
|
+
|
|
621
|
+
return new Map<string, storage.PartialOrFullChecksum>(
|
|
622
|
+
batch.map((request) => {
|
|
623
|
+
const bucket = request.bucket;
|
|
624
|
+
// Could be null if this is either (1) a partial request, or (2) no compacted checksum was available
|
|
625
|
+
const preState = preStates.get(bucket);
|
|
626
|
+
// Could be null if we got no data
|
|
627
|
+
const partialChecksum = partialChecksums.get(bucket);
|
|
628
|
+
const merged = addPartialChecksums(bucket, preState?.checksum ?? null, partialChecksum ?? null);
|
|
629
|
+
|
|
630
|
+
return [bucket, merged];
|
|
631
|
+
})
|
|
632
|
+
);
|
|
560
633
|
}
|
|
561
634
|
|
|
562
635
|
async terminate(options?: storage.TerminateOptions) {
|
|
@@ -701,13 +774,31 @@ export class MongoSyncBucketStorage
|
|
|
701
774
|
}
|
|
702
775
|
|
|
703
776
|
async compact(options?: storage.CompactOptions) {
|
|
704
|
-
|
|
705
|
-
|
|
706
|
-
|
|
707
|
-
|
|
777
|
+
let maxOpId = options?.maxOpId;
|
|
778
|
+
if (maxOpId == null) {
|
|
779
|
+
const checkpoint = await this.getCheckpointInternal();
|
|
780
|
+
maxOpId = checkpoint?.checkpoint ?? undefined;
|
|
781
|
+
}
|
|
782
|
+
await new MongoCompactor(this.db, this.group_id, { ...options, maxOpId }).compact();
|
|
783
|
+
if (maxOpId != null && options?.compactParameterData) {
|
|
784
|
+
await new MongoParameterCompactor(this.db, this.group_id, maxOpId, options).compact();
|
|
708
785
|
}
|
|
709
786
|
}
|
|
710
787
|
|
|
788
|
+
async populatePersistentChecksumCache(options: Pick<CompactOptions, 'signal' | 'maxOpId'>): Promise<void> {
|
|
789
|
+
const start = Date.now();
|
|
790
|
+
// We do a minimal compact, primarily to populate the checksum cache
|
|
791
|
+
await this.compact({
|
|
792
|
+
...options,
|
|
793
|
+
// Skip parameter data
|
|
794
|
+
compactParameterData: false,
|
|
795
|
+
// Don't track updates for MOVE compacting
|
|
796
|
+
memoryLimitMB: 0
|
|
797
|
+
});
|
|
798
|
+
const duration = Date.now() - start;
|
|
799
|
+
logger.info(`Populated persistent checksum cache in ${(duration / 1000).toFixed(1)}s`);
|
|
800
|
+
}
|
|
801
|
+
|
|
711
802
|
/**
|
|
712
803
|
* Instance-wide watch on the latest available checkpoint (op_id + lsn).
|
|
713
804
|
*/
|
|
@@ -71,15 +71,17 @@ export class PersistedBatch {
|
|
|
71
71
|
this.logger = options?.logger ?? defaultLogger;
|
|
72
72
|
}
|
|
73
73
|
|
|
74
|
-
private incrementBucket(bucket: string, op_id: InternalOpId) {
|
|
74
|
+
private incrementBucket(bucket: string, op_id: InternalOpId, bytes: number) {
|
|
75
75
|
let existingState = this.bucketStates.get(bucket);
|
|
76
76
|
if (existingState) {
|
|
77
77
|
existingState.lastOp = op_id;
|
|
78
78
|
existingState.incrementCount += 1;
|
|
79
|
+
existingState.incrementBytes += bytes;
|
|
79
80
|
} else {
|
|
80
81
|
this.bucketStates.set(bucket, {
|
|
81
82
|
lastOp: op_id,
|
|
82
|
-
incrementCount: 1
|
|
83
|
+
incrementCount: 1,
|
|
84
|
+
incrementBytes: bytes
|
|
83
85
|
});
|
|
84
86
|
}
|
|
85
87
|
}
|
|
@@ -115,7 +117,8 @@ export class PersistedBatch {
|
|
|
115
117
|
}
|
|
116
118
|
|
|
117
119
|
remaining_buckets.delete(key);
|
|
118
|
-
|
|
120
|
+
const byteEstimate = recordData.length + 200;
|
|
121
|
+
this.currentSize += byteEstimate;
|
|
119
122
|
|
|
120
123
|
const op_id = options.op_seq.next();
|
|
121
124
|
this.debugLastOpId = op_id;
|
|
@@ -138,7 +141,7 @@ export class PersistedBatch {
|
|
|
138
141
|
}
|
|
139
142
|
}
|
|
140
143
|
});
|
|
141
|
-
this.incrementBucket(k.bucket, op_id);
|
|
144
|
+
this.incrementBucket(k.bucket, op_id, byteEstimate);
|
|
142
145
|
}
|
|
143
146
|
|
|
144
147
|
for (let bd of remaining_buckets.values()) {
|
|
@@ -166,7 +169,7 @@ export class PersistedBatch {
|
|
|
166
169
|
}
|
|
167
170
|
});
|
|
168
171
|
this.currentSize += 200;
|
|
169
|
-
this.incrementBucket(bd.bucket, op_id);
|
|
172
|
+
this.incrementBucket(bd.bucket, op_id, 200);
|
|
170
173
|
}
|
|
171
174
|
}
|
|
172
175
|
|
|
@@ -369,6 +372,10 @@ export class PersistedBatch {
|
|
|
369
372
|
update: {
|
|
370
373
|
$set: {
|
|
371
374
|
last_op: state.lastOp
|
|
375
|
+
},
|
|
376
|
+
$inc: {
|
|
377
|
+
'estimate_since_compact.count': state.incrementCount,
|
|
378
|
+
'estimate_since_compact.bytes': state.incrementBytes
|
|
372
379
|
}
|
|
373
380
|
},
|
|
374
381
|
upsert: true
|
|
@@ -381,4 +388,5 @@ export class PersistedBatch {
|
|
|
381
388
|
interface BucketStateUpdate {
|
|
382
389
|
lastOp: InternalOpId;
|
|
383
390
|
incrementCount: number;
|
|
391
|
+
incrementBytes: number;
|
|
384
392
|
}
|
|
@@ -98,6 +98,21 @@ export interface BucketStateDocument {
|
|
|
98
98
|
b: string;
|
|
99
99
|
};
|
|
100
100
|
last_op: bigint;
|
|
101
|
+
/**
|
|
102
|
+
* If set, this can be treated as "cache" of a checksum at a specific point.
|
|
103
|
+
* Can be updated periodically, for example by the compact job.
|
|
104
|
+
*/
|
|
105
|
+
compacted_state?: {
|
|
106
|
+
op_id: InternalOpId;
|
|
107
|
+
count: number;
|
|
108
|
+
checksum: bigint;
|
|
109
|
+
bytes: number;
|
|
110
|
+
};
|
|
111
|
+
|
|
112
|
+
estimate_since_compact?: {
|
|
113
|
+
count: number;
|
|
114
|
+
bytes: number;
|
|
115
|
+
};
|
|
101
116
|
}
|
|
102
117
|
|
|
103
118
|
export interface IdSequenceDocument {
|
|
@@ -43,7 +43,7 @@ export function generateSlotName(prefix: string, sync_rules_id: number) {
|
|
|
43
43
|
*
|
|
44
44
|
* For this to be effective, set batchSize = limit in the find command.
|
|
45
45
|
*/
|
|
46
|
-
export async function readSingleBatch<T>(cursor: mongo.
|
|
46
|
+
export async function readSingleBatch<T>(cursor: mongo.AbstractCursor<T>): Promise<{ data: T[]; hasMore: boolean }> {
|
|
47
47
|
try {
|
|
48
48
|
let data: T[];
|
|
49
49
|
let hasMore = true;
|