@powersync/service-module-mongodb-storage 0.12.0 → 0.12.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +11 -0
- package/dist/storage/implementation/MongoChecksums.d.ts +34 -0
- package/dist/storage/implementation/MongoChecksums.js +274 -0
- package/dist/storage/implementation/MongoChecksums.js.map +1 -0
- package/dist/storage/implementation/MongoCompactor.d.ts +9 -2
- package/dist/storage/implementation/MongoCompactor.js +107 -32
- package/dist/storage/implementation/MongoCompactor.js.map +1 -1
- package/dist/storage/implementation/MongoSyncBucketStorage.d.ts +3 -3
- package/dist/storage/implementation/MongoSyncBucketStorage.js +12 -130
- package/dist/storage/implementation/MongoSyncBucketStorage.js.map +1 -1
- package/dist/storage/implementation/models.d.ts +1 -1
- package/dist/storage/implementation/util.js.map +1 -1
- package/package.json +4 -4
- package/src/storage/implementation/MongoChecksums.ts +320 -0
- package/src/storage/implementation/MongoCompactor.ts +147 -56
- package/src/storage/implementation/MongoSyncBucketStorage.ts +14 -150
- package/src/storage/implementation/models.ts +1 -1
- package/src/storage/implementation/util.ts +1 -1
- package/tsconfig.tsbuildinfo +1 -1
|
@@ -0,0 +1,320 @@
|
|
|
1
|
+
import {
|
|
2
|
+
addPartialChecksums,
|
|
3
|
+
bson,
|
|
4
|
+
BucketChecksum,
|
|
5
|
+
ChecksumCache,
|
|
6
|
+
ChecksumMap,
|
|
7
|
+
FetchPartialBucketChecksum,
|
|
8
|
+
InternalOpId,
|
|
9
|
+
isPartialChecksum,
|
|
10
|
+
PartialChecksum,
|
|
11
|
+
PartialChecksumMap,
|
|
12
|
+
PartialOrFullChecksum
|
|
13
|
+
} from '@powersync/service-core';
|
|
14
|
+
import * as lib_mongo from '@powersync/lib-service-mongodb';
|
|
15
|
+
import { logger } from '@powersync/lib-services-framework';
|
|
16
|
+
import { PowerSyncMongo } from './db.js';
|
|
17
|
+
|
|
18
|
+
/**
|
|
19
|
+
* Checksum query implementation.
|
|
20
|
+
*/
|
|
21
|
+
export class MongoChecksums {
|
|
22
|
+
private cache = new ChecksumCache({
|
|
23
|
+
fetchChecksums: (batch) => {
|
|
24
|
+
return this.getChecksumsInternal(batch);
|
|
25
|
+
}
|
|
26
|
+
});
|
|
27
|
+
|
|
28
|
+
constructor(
|
|
29
|
+
private db: PowerSyncMongo,
|
|
30
|
+
private group_id: number
|
|
31
|
+
) {}
|
|
32
|
+
|
|
33
|
+
/**
|
|
34
|
+
* Calculate checksums, utilizing the cache.
|
|
35
|
+
*/
|
|
36
|
+
async getChecksums(checkpoint: InternalOpId, buckets: string[]): Promise<ChecksumMap> {
|
|
37
|
+
return this.cache.getChecksumMap(checkpoint, buckets);
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
clearCache() {
|
|
41
|
+
this.cache.clear();
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
/**
|
|
45
|
+
* Calculate (partial) checksums from bucket_state and the data collection.
|
|
46
|
+
*
|
|
47
|
+
* Results are not cached.
|
|
48
|
+
*/
|
|
49
|
+
private async getChecksumsInternal(batch: FetchPartialBucketChecksum[]): Promise<PartialChecksumMap> {
|
|
50
|
+
if (batch.length == 0) {
|
|
51
|
+
return new Map();
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
const preFilters: any[] = [];
|
|
55
|
+
for (let request of batch) {
|
|
56
|
+
if (request.start == null) {
|
|
57
|
+
preFilters.push({
|
|
58
|
+
_id: {
|
|
59
|
+
g: this.group_id,
|
|
60
|
+
b: request.bucket
|
|
61
|
+
},
|
|
62
|
+
'compacted_state.op_id': { $exists: true, $lte: request.end }
|
|
63
|
+
});
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
const preStates = new Map<string, { opId: InternalOpId; checksum: BucketChecksum }>();
|
|
68
|
+
|
|
69
|
+
if (preFilters.length > 0) {
|
|
70
|
+
// For un-cached bucket checksums, attempt to use the compacted state first.
|
|
71
|
+
const states = await this.db.bucket_state
|
|
72
|
+
.find({
|
|
73
|
+
$or: preFilters
|
|
74
|
+
})
|
|
75
|
+
.toArray();
|
|
76
|
+
for (let state of states) {
|
|
77
|
+
const compactedState = state.compacted_state!;
|
|
78
|
+
preStates.set(state._id.b, {
|
|
79
|
+
opId: compactedState.op_id,
|
|
80
|
+
checksum: {
|
|
81
|
+
bucket: state._id.b,
|
|
82
|
+
checksum: Number(compactedState.checksum),
|
|
83
|
+
count: compactedState.count
|
|
84
|
+
}
|
|
85
|
+
});
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
const mappedRequests = batch.map((request) => {
|
|
90
|
+
let start = request.start;
|
|
91
|
+
if (start == null) {
|
|
92
|
+
const preState = preStates.get(request.bucket);
|
|
93
|
+
if (preState != null) {
|
|
94
|
+
start = preState.opId;
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
return {
|
|
98
|
+
...request,
|
|
99
|
+
start
|
|
100
|
+
};
|
|
101
|
+
});
|
|
102
|
+
|
|
103
|
+
const queriedChecksums = await this.queryPartialChecksums(mappedRequests);
|
|
104
|
+
|
|
105
|
+
return new Map<string, PartialOrFullChecksum>(
|
|
106
|
+
batch.map((request) => {
|
|
107
|
+
const bucket = request.bucket;
|
|
108
|
+
// Could be null if this is either (1) a partial request, or (2) no compacted checksum was available
|
|
109
|
+
const preState = preStates.get(bucket);
|
|
110
|
+
// Could be null if we got no data
|
|
111
|
+
const partialChecksum = queriedChecksums.get(bucket);
|
|
112
|
+
const merged = addPartialChecksums(bucket, preState?.checksum ?? null, partialChecksum ?? null);
|
|
113
|
+
|
|
114
|
+
return [bucket, merged];
|
|
115
|
+
})
|
|
116
|
+
);
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
/**
|
|
120
|
+
* Calculate (partial) checksums from the data collection directly.
|
|
121
|
+
*/
|
|
122
|
+
async queryPartialChecksums(batch: FetchPartialBucketChecksum[]): Promise<PartialChecksumMap> {
|
|
123
|
+
try {
|
|
124
|
+
return await this.queryPartialChecksumsInternal(batch);
|
|
125
|
+
} catch (e) {
|
|
126
|
+
if (e.codeName == 'MaxTimeMSExpired') {
|
|
127
|
+
logger.warn(`Checksum query timed out; falling back to slower version`, e);
|
|
128
|
+
// Timeout - try the slower but more robust version
|
|
129
|
+
return await this.queryPartialChecksumsFallback(batch);
|
|
130
|
+
}
|
|
131
|
+
throw lib_mongo.mapQueryError(e, 'while reading checksums');
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
private async queryPartialChecksumsInternal(batch: FetchPartialBucketChecksum[]): Promise<PartialChecksumMap> {
|
|
136
|
+
const filters: any[] = [];
|
|
137
|
+
for (let request of batch) {
|
|
138
|
+
filters.push({
|
|
139
|
+
_id: {
|
|
140
|
+
$gt: {
|
|
141
|
+
g: this.group_id,
|
|
142
|
+
b: request.bucket,
|
|
143
|
+
o: request.start ?? new bson.MinKey()
|
|
144
|
+
},
|
|
145
|
+
$lte: {
|
|
146
|
+
g: this.group_id,
|
|
147
|
+
b: request.bucket,
|
|
148
|
+
o: request.end
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
});
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
const aggregate = await this.db.bucket_data
|
|
155
|
+
.aggregate(
|
|
156
|
+
[
|
|
157
|
+
{
|
|
158
|
+
$match: {
|
|
159
|
+
$or: filters
|
|
160
|
+
}
|
|
161
|
+
},
|
|
162
|
+
CHECKSUM_QUERY_GROUP_STAGE
|
|
163
|
+
],
|
|
164
|
+
{ session: undefined, readConcern: 'snapshot', maxTimeMS: lib_mongo.MONGO_CHECKSUM_TIMEOUT_MS }
|
|
165
|
+
)
|
|
166
|
+
// Don't map the error here - we want to keep timeout errors as-is
|
|
167
|
+
.toArray();
|
|
168
|
+
|
|
169
|
+
const partialChecksums = new Map<string, PartialOrFullChecksum>(
|
|
170
|
+
aggregate.map((doc) => {
|
|
171
|
+
const bucket = doc._id;
|
|
172
|
+
return [bucket, checksumFromAggregate(doc)];
|
|
173
|
+
})
|
|
174
|
+
);
|
|
175
|
+
|
|
176
|
+
return new Map<string, PartialOrFullChecksum>(
|
|
177
|
+
batch.map((request) => {
|
|
178
|
+
const bucket = request.bucket;
|
|
179
|
+
// Could be null if we got no data
|
|
180
|
+
let partialChecksum = partialChecksums.get(bucket);
|
|
181
|
+
if (partialChecksum == null) {
|
|
182
|
+
partialChecksum = {
|
|
183
|
+
bucket,
|
|
184
|
+
partialCount: 0,
|
|
185
|
+
partialChecksum: 0
|
|
186
|
+
};
|
|
187
|
+
}
|
|
188
|
+
if (request.start == null && isPartialChecksum(partialChecksum)) {
|
|
189
|
+
partialChecksum = {
|
|
190
|
+
bucket,
|
|
191
|
+
count: partialChecksum.partialCount,
|
|
192
|
+
checksum: partialChecksum.partialChecksum
|
|
193
|
+
};
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
return [bucket, partialChecksum];
|
|
197
|
+
})
|
|
198
|
+
);
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
/**
|
|
202
|
+
* Checksums for large buckets can run over the query timeout.
|
|
203
|
+
* To avoid this, we query in batches.
|
|
204
|
+
* This version can handle larger amounts of data, but is slower, especially for many buckets.
|
|
205
|
+
*/
|
|
206
|
+
async queryPartialChecksumsFallback(batch: FetchPartialBucketChecksum[]): Promise<PartialChecksumMap> {
|
|
207
|
+
const partialChecksums = new Map<string, PartialOrFullChecksum>();
|
|
208
|
+
for (let request of batch) {
|
|
209
|
+
const checksum = await this.slowChecksum(request);
|
|
210
|
+
partialChecksums.set(request.bucket, checksum);
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
return partialChecksums;
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
private async slowChecksum(request: FetchPartialBucketChecksum): Promise<PartialOrFullChecksum> {
|
|
217
|
+
const batchLimit = 50_000;
|
|
218
|
+
|
|
219
|
+
let lowerBound = 0n;
|
|
220
|
+
const bucket = request.bucket;
|
|
221
|
+
|
|
222
|
+
let runningChecksum: PartialOrFullChecksum = {
|
|
223
|
+
bucket,
|
|
224
|
+
partialCount: 0,
|
|
225
|
+
partialChecksum: 0
|
|
226
|
+
};
|
|
227
|
+
if (request.start == null) {
|
|
228
|
+
runningChecksum = {
|
|
229
|
+
bucket,
|
|
230
|
+
count: 0,
|
|
231
|
+
checksum: 0
|
|
232
|
+
};
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
while (true) {
|
|
236
|
+
const filter = {
|
|
237
|
+
_id: {
|
|
238
|
+
$gt: {
|
|
239
|
+
g: this.group_id,
|
|
240
|
+
b: bucket,
|
|
241
|
+
o: lowerBound
|
|
242
|
+
},
|
|
243
|
+
$lte: {
|
|
244
|
+
g: this.group_id,
|
|
245
|
+
b: bucket,
|
|
246
|
+
o: request.end
|
|
247
|
+
}
|
|
248
|
+
}
|
|
249
|
+
};
|
|
250
|
+
const docs = await this.db.bucket_data
|
|
251
|
+
.aggregate(
|
|
252
|
+
[
|
|
253
|
+
{
|
|
254
|
+
$match: filter
|
|
255
|
+
},
|
|
256
|
+
// sort and limit _before_ grouping
|
|
257
|
+
{ $sort: { _id: 1 } },
|
|
258
|
+
{ $limit: batchLimit },
|
|
259
|
+
CHECKSUM_QUERY_GROUP_STAGE
|
|
260
|
+
],
|
|
261
|
+
{ session: undefined, readConcern: 'snapshot', maxTimeMS: lib_mongo.MONGO_CHECKSUM_TIMEOUT_MS }
|
|
262
|
+
)
|
|
263
|
+
.toArray();
|
|
264
|
+
const doc = docs[0];
|
|
265
|
+
if (doc == null) {
|
|
266
|
+
return runningChecksum;
|
|
267
|
+
}
|
|
268
|
+
const partial = checksumFromAggregate(doc);
|
|
269
|
+
runningChecksum = addPartialChecksums(bucket, runningChecksum, partial);
|
|
270
|
+
const isFinal = doc.count != batchLimit;
|
|
271
|
+
if (isFinal) {
|
|
272
|
+
break;
|
|
273
|
+
} else {
|
|
274
|
+
lowerBound = doc.last_op;
|
|
275
|
+
}
|
|
276
|
+
}
|
|
277
|
+
return runningChecksum;
|
|
278
|
+
}
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
const CHECKSUM_QUERY_GROUP_STAGE = {
|
|
282
|
+
$group: {
|
|
283
|
+
_id: '$_id.b',
|
|
284
|
+
// Historically, checksum may be stored as 'int' or 'double'.
|
|
285
|
+
// More recently, this should be a 'long'.
|
|
286
|
+
// $toLong ensures that we always sum it as a long, avoiding inaccuracies in the calculations.
|
|
287
|
+
checksum_total: { $sum: { $toLong: '$checksum' } },
|
|
288
|
+
count: { $sum: 1 },
|
|
289
|
+
has_clear_op: {
|
|
290
|
+
$max: {
|
|
291
|
+
$cond: [{ $eq: ['$op', 'CLEAR'] }, 1, 0]
|
|
292
|
+
}
|
|
293
|
+
},
|
|
294
|
+
last_op: { $max: '$_id.o' }
|
|
295
|
+
}
|
|
296
|
+
};
|
|
297
|
+
|
|
298
|
+
/**
|
|
299
|
+
* Convert output of CHECKSUM_QUERY_GROUP_STAGE into a checksum.
|
|
300
|
+
*/
|
|
301
|
+
function checksumFromAggregate(doc: bson.Document): PartialOrFullChecksum {
|
|
302
|
+
const partialChecksum = Number(BigInt(doc.checksum_total) & 0xffffffffn) & 0xffffffff;
|
|
303
|
+
const bucket = doc._id;
|
|
304
|
+
|
|
305
|
+
if (doc.has_clear_op == 1) {
|
|
306
|
+
return {
|
|
307
|
+
// full checksum - replaces any previous one
|
|
308
|
+
bucket,
|
|
309
|
+
checksum: partialChecksum,
|
|
310
|
+
count: doc.count
|
|
311
|
+
} satisfies BucketChecksum;
|
|
312
|
+
} else {
|
|
313
|
+
return {
|
|
314
|
+
// partial checksum - is added to a previous one
|
|
315
|
+
bucket,
|
|
316
|
+
partialCount: doc.count,
|
|
317
|
+
partialChecksum
|
|
318
|
+
} satisfies PartialChecksum;
|
|
319
|
+
}
|
|
320
|
+
}
|
|
@@ -1,9 +1,10 @@
|
|
|
1
|
-
import { mongo } from '@powersync/lib-service-mongodb';
|
|
1
|
+
import { mongo, MONGO_OPERATION_TIMEOUT_MS } from '@powersync/lib-service-mongodb';
|
|
2
2
|
import { logger, ReplicationAssertionError, ServiceAssertionError } from '@powersync/lib-services-framework';
|
|
3
|
-
import { addChecksums, InternalOpId, storage, utils } from '@powersync/service-core';
|
|
3
|
+
import { addChecksums, InternalOpId, isPartialChecksum, storage, utils } from '@powersync/service-core';
|
|
4
4
|
|
|
5
5
|
import { PowerSyncMongo } from './db.js';
|
|
6
6
|
import { BucketDataDocument, BucketDataKey, BucketStateDocument } from './models.js';
|
|
7
|
+
import { MongoSyncBucketStorage } from './MongoSyncBucketStorage.js';
|
|
7
8
|
import { cacheKey } from './OperationBatch.js';
|
|
8
9
|
import { readSingleBatch } from './util.js';
|
|
9
10
|
|
|
@@ -68,12 +69,14 @@ export class MongoCompactor {
|
|
|
68
69
|
private maxOpId: bigint;
|
|
69
70
|
private buckets: string[] | undefined;
|
|
70
71
|
private signal?: AbortSignal;
|
|
72
|
+
private group_id: number;
|
|
71
73
|
|
|
72
74
|
constructor(
|
|
75
|
+
private storage: MongoSyncBucketStorage,
|
|
73
76
|
private db: PowerSyncMongo,
|
|
74
|
-
private group_id: number,
|
|
75
77
|
options?: MongoCompactOptions
|
|
76
78
|
) {
|
|
79
|
+
this.group_id = storage.group_id;
|
|
77
80
|
this.idLimitBytes = (options?.memoryLimitMB ?? DEFAULT_MEMORY_LIMIT_MB) * 1024 * 1024;
|
|
78
81
|
this.moveBatchLimit = options?.moveBatchLimit ?? DEFAULT_MOVE_BATCH_LIMIT;
|
|
79
82
|
this.moveBatchQueryLimit = options?.moveBatchQueryLimit ?? DEFAULT_MOVE_BATCH_QUERY_LIMIT;
|
|
@@ -136,33 +139,57 @@ export class MongoCompactor {
|
|
|
136
139
|
o: new mongo.MaxKey() as any
|
|
137
140
|
};
|
|
138
141
|
|
|
142
|
+
const doneWithBucket = async () => {
|
|
143
|
+
if (currentState == null) {
|
|
144
|
+
return;
|
|
145
|
+
}
|
|
146
|
+
// Free memory before clearing bucket
|
|
147
|
+
currentState.seen.clear();
|
|
148
|
+
if (currentState.lastNotPut != null && currentState.opsSincePut >= 1) {
|
|
149
|
+
logger.info(
|
|
150
|
+
`Inserting CLEAR at ${this.group_id}:${currentState.bucket}:${currentState.lastNotPut} to remove ${currentState.opsSincePut} operations`
|
|
151
|
+
);
|
|
152
|
+
// Need flush() before clear()
|
|
153
|
+
await this.flush();
|
|
154
|
+
await this.clearBucket(currentState);
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
// Do this _after_ clearBucket so that we have accurate counts.
|
|
158
|
+
this.updateBucketChecksums(currentState);
|
|
159
|
+
};
|
|
160
|
+
|
|
139
161
|
while (!this.signal?.aborted) {
|
|
140
162
|
// Query one batch at a time, to avoid cursor timeouts
|
|
141
|
-
const cursor = this.db.bucket_data.aggregate<BucketDataDocument & { size: number | bigint }>(
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
163
|
+
const cursor = this.db.bucket_data.aggregate<BucketDataDocument & { size: number | bigint }>(
|
|
164
|
+
[
|
|
165
|
+
{
|
|
166
|
+
$match: {
|
|
167
|
+
_id: {
|
|
168
|
+
$gte: lowerBound,
|
|
169
|
+
$lt: upperBound
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
},
|
|
173
|
+
{ $sort: { _id: -1 } },
|
|
174
|
+
{ $limit: this.moveBatchQueryLimit },
|
|
175
|
+
{
|
|
176
|
+
$project: {
|
|
177
|
+
_id: 1,
|
|
178
|
+
op: 1,
|
|
179
|
+
table: 1,
|
|
180
|
+
row_id: 1,
|
|
181
|
+
source_table: 1,
|
|
182
|
+
source_key: 1,
|
|
183
|
+
checksum: 1,
|
|
184
|
+
size: { $bsonSize: '$$ROOT' }
|
|
147
185
|
}
|
|
148
186
|
}
|
|
149
|
-
|
|
150
|
-
{
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
op: 1,
|
|
156
|
-
table: 1,
|
|
157
|
-
row_id: 1,
|
|
158
|
-
source_table: 1,
|
|
159
|
-
source_key: 1,
|
|
160
|
-
checksum: 1,
|
|
161
|
-
size: { $bsonSize: '$$ROOT' }
|
|
162
|
-
}
|
|
163
|
-
}
|
|
164
|
-
]);
|
|
165
|
-
const { data: batch } = await readSingleBatch(cursor);
|
|
187
|
+
],
|
|
188
|
+
{ batchSize: this.moveBatchQueryLimit }
|
|
189
|
+
);
|
|
190
|
+
// We don't limit to a single batch here, since that often causes MongoDB to scan through more than it returns.
|
|
191
|
+
// Instead, we load up to the limit.
|
|
192
|
+
const batch = await cursor.toArray();
|
|
166
193
|
|
|
167
194
|
if (batch.length == 0) {
|
|
168
195
|
// We've reached the end
|
|
@@ -174,24 +201,8 @@ export class MongoCompactor {
|
|
|
174
201
|
|
|
175
202
|
for (let doc of batch) {
|
|
176
203
|
if (currentState == null || doc._id.b != currentState.bucket) {
|
|
177
|
-
|
|
178
|
-
if (currentState.lastNotPut != null && currentState.opsSincePut >= 1) {
|
|
179
|
-
// Important to flush before clearBucket()
|
|
180
|
-
// Does not have to happen before flushBucketChecksums()
|
|
181
|
-
await this.flush();
|
|
182
|
-
logger.info(
|
|
183
|
-
`Inserting CLEAR at ${this.group_id}:${currentState.bucket}:${currentState.lastNotPut} to remove ${currentState.opsSincePut} operations`
|
|
184
|
-
);
|
|
185
|
-
|
|
186
|
-
// Free memory before clearing bucket
|
|
187
|
-
currentState!.seen.clear();
|
|
188
|
-
|
|
189
|
-
await this.clearBucket(currentState);
|
|
190
|
-
}
|
|
204
|
+
await doneWithBucket();
|
|
191
205
|
|
|
192
|
-
// Should happen after clearBucket() for accurate stats
|
|
193
|
-
this.updateBucketChecksums(currentState);
|
|
194
|
-
}
|
|
195
206
|
currentState = {
|
|
196
207
|
bucket: doc._id.b,
|
|
197
208
|
seen: new Map(),
|
|
@@ -274,21 +285,14 @@ export class MongoCompactor {
|
|
|
274
285
|
await this.flush();
|
|
275
286
|
}
|
|
276
287
|
}
|
|
277
|
-
}
|
|
278
288
|
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
`Inserting CLEAR at ${this.group_id}:${currentState.bucket}:${currentState.lastNotPut} to remove ${currentState.opsSincePut} operations`
|
|
283
|
-
);
|
|
284
|
-
// Need flush() before clear()
|
|
285
|
-
await this.flush();
|
|
286
|
-
await this.clearBucket(currentState);
|
|
287
|
-
}
|
|
288
|
-
if (currentState != null) {
|
|
289
|
-
// Do this _after_ clearBucket so that we have accurate counts.
|
|
290
|
-
this.updateBucketChecksums(currentState);
|
|
289
|
+
if (currentState != null) {
|
|
290
|
+
logger.info(`Processed batch of length ${batch.length} current bucket: ${currentState.bucket}`);
|
|
291
|
+
}
|
|
291
292
|
}
|
|
293
|
+
|
|
294
|
+
await doneWithBucket();
|
|
295
|
+
|
|
292
296
|
// Need another flush after updateBucketChecksums()
|
|
293
297
|
await this.flush();
|
|
294
298
|
}
|
|
@@ -475,4 +479,91 @@ export class MongoCompactor {
|
|
|
475
479
|
await session.endSession();
|
|
476
480
|
}
|
|
477
481
|
}
|
|
482
|
+
|
|
483
|
+
/**
|
|
484
|
+
* Subset of compact, only populating checksums where relevant.
|
|
485
|
+
*/
|
|
486
|
+
async populateChecksums() {
|
|
487
|
+
let lastId: BucketStateDocument['_id'] | null = null;
|
|
488
|
+
while (!this.signal?.aborted) {
|
|
489
|
+
// By filtering buckets, we effectively make this "resumeable".
|
|
490
|
+
let filter: mongo.Filter<BucketStateDocument> = {
|
|
491
|
+
compacted_state: { $exists: false }
|
|
492
|
+
};
|
|
493
|
+
if (lastId) {
|
|
494
|
+
filter._id = { $gt: lastId };
|
|
495
|
+
}
|
|
496
|
+
|
|
497
|
+
const bucketsWithoutChecksums = await this.db.bucket_state
|
|
498
|
+
.find(filter, {
|
|
499
|
+
projection: {
|
|
500
|
+
_id: 1
|
|
501
|
+
},
|
|
502
|
+
sort: {
|
|
503
|
+
_id: 1
|
|
504
|
+
},
|
|
505
|
+
limit: 5_000,
|
|
506
|
+
maxTimeMS: MONGO_OPERATION_TIMEOUT_MS
|
|
507
|
+
})
|
|
508
|
+
.toArray();
|
|
509
|
+
if (bucketsWithoutChecksums.length == 0) {
|
|
510
|
+
// All done
|
|
511
|
+
break;
|
|
512
|
+
}
|
|
513
|
+
|
|
514
|
+
logger.info(`Calculating checksums for batch of ${bucketsWithoutChecksums.length} buckets`);
|
|
515
|
+
|
|
516
|
+
await this.updateChecksumsBatch(bucketsWithoutChecksums.map((b) => b._id.b));
|
|
517
|
+
|
|
518
|
+
lastId = bucketsWithoutChecksums[bucketsWithoutChecksums.length - 1]._id;
|
|
519
|
+
}
|
|
520
|
+
}
|
|
521
|
+
|
|
522
|
+
private async updateChecksumsBatch(buckets: string[]) {
|
|
523
|
+
const checksums = await this.storage.checksums.queryPartialChecksums(
|
|
524
|
+
buckets.map((bucket) => {
|
|
525
|
+
return {
|
|
526
|
+
bucket,
|
|
527
|
+
end: this.maxOpId
|
|
528
|
+
};
|
|
529
|
+
})
|
|
530
|
+
);
|
|
531
|
+
|
|
532
|
+
for (let bucketChecksum of checksums.values()) {
|
|
533
|
+
if (isPartialChecksum(bucketChecksum)) {
|
|
534
|
+
// Should never happen since we don't specify `start`
|
|
535
|
+
throw new ServiceAssertionError(`Full checksum expected, got ${JSON.stringify(bucketChecksum)}`);
|
|
536
|
+
}
|
|
537
|
+
|
|
538
|
+
this.bucketStateUpdates.push({
|
|
539
|
+
updateOne: {
|
|
540
|
+
filter: {
|
|
541
|
+
_id: {
|
|
542
|
+
g: this.group_id,
|
|
543
|
+
b: bucketChecksum.bucket
|
|
544
|
+
}
|
|
545
|
+
},
|
|
546
|
+
update: {
|
|
547
|
+
$set: {
|
|
548
|
+
compacted_state: {
|
|
549
|
+
op_id: this.maxOpId,
|
|
550
|
+
count: bucketChecksum.count,
|
|
551
|
+
checksum: BigInt(bucketChecksum.checksum),
|
|
552
|
+
bytes: null
|
|
553
|
+
}
|
|
554
|
+
},
|
|
555
|
+
$setOnInsert: {
|
|
556
|
+
// Only set this if we're creating the document.
|
|
557
|
+
// In all other cases, the replication process will have a set a more accurate id.
|
|
558
|
+
last_op: this.maxOpId
|
|
559
|
+
}
|
|
560
|
+
},
|
|
561
|
+
// We generally expect this to have been created before, but do handle cases of old unchanged buckets
|
|
562
|
+
upsert: true
|
|
563
|
+
}
|
|
564
|
+
});
|
|
565
|
+
}
|
|
566
|
+
|
|
567
|
+
await this.flush();
|
|
568
|
+
}
|
|
478
569
|
}
|