@powersync/service-module-mongodb-storage 0.0.0-dev-20260203155513 → 0.0.0-dev-20260223082111
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +55 -10
- package/dist/migrations/db/migrations/1770213298299-storage-version.js +29 -0
- package/dist/migrations/db/migrations/1770213298299-storage-version.js.map +1 -0
- package/dist/storage/MongoBucketStorage.d.ts +7 -15
- package/dist/storage/MongoBucketStorage.js +12 -51
- package/dist/storage/MongoBucketStorage.js.map +1 -1
- package/dist/storage/MongoReportStorage.d.ts +1 -11
- package/dist/storage/MongoReportStorage.js +1 -321
- package/dist/storage/MongoReportStorage.js.map +1 -1
- package/dist/storage/implementation/MongoChecksums.d.ts +5 -2
- package/dist/storage/implementation/MongoChecksums.js +7 -4
- package/dist/storage/implementation/MongoChecksums.js.map +1 -1
- package/dist/storage/implementation/MongoCompactor.d.ts +16 -1
- package/dist/storage/implementation/MongoCompactor.js +80 -23
- package/dist/storage/implementation/MongoCompactor.js.map +1 -1
- package/dist/storage/implementation/MongoPersistedSyncRulesContent.d.ts +2 -12
- package/dist/storage/implementation/MongoPersistedSyncRulesContent.js +23 -24
- package/dist/storage/implementation/MongoPersistedSyncRulesContent.js.map +1 -1
- package/dist/storage/implementation/MongoSyncBucketStorage.d.ts +5 -2
- package/dist/storage/implementation/MongoSyncBucketStorage.js +42 -40
- package/dist/storage/implementation/MongoSyncBucketStorage.js.map +1 -1
- package/dist/storage/implementation/db.d.ts +0 -10
- package/dist/storage/implementation/db.js +0 -30
- package/dist/storage/implementation/db.js.map +1 -1
- package/dist/storage/implementation/models.d.ts +11 -0
- package/dist/storage/implementation/models.js +9 -1
- package/dist/storage/implementation/models.js.map +1 -1
- package/dist/storage/storage-index.d.ts +0 -1
- package/dist/storage/storage-index.js +0 -1
- package/dist/storage/storage-index.js.map +1 -1
- package/dist/utils/test-utils.d.ts +3 -4
- package/dist/utils/test-utils.js +2 -2
- package/dist/utils/test-utils.js.map +1 -1
- package/dist/utils/util.d.ts +0 -7
- package/dist/utils/util.js +3 -27
- package/dist/utils/util.js.map +1 -1
- package/package.json +7 -7
- package/src/migrations/db/migrations/1770213298299-storage-version.ts +44 -0
- package/src/storage/MongoBucketStorage.ts +20 -59
- package/src/storage/MongoReportStorage.ts +4 -369
- package/src/storage/implementation/MongoChecksums.ts +14 -6
- package/src/storage/implementation/MongoCompactor.ts +94 -25
- package/src/storage/implementation/MongoPersistedSyncRulesContent.ts +25 -32
- package/src/storage/implementation/MongoSyncBucketStorage.ts +60 -44
- package/src/storage/implementation/db.ts +0 -32
- package/src/storage/implementation/models.ts +23 -0
- package/src/storage/storage-index.ts +0 -1
- package/src/utils/test-utils.ts +3 -4
- package/src/utils/util.ts +3 -36
- package/test/src/__snapshots__/storage_sync.test.ts.snap +1116 -21
- package/test/src/compression.test.ts +17 -0
- package/test/src/connection-report-storage.test.ts +6 -2
- package/test/src/storage_compacting.test.ts +29 -22
- package/test/src/storage_sync.test.ts +27 -14
- package/test/src/util.ts +3 -0
- package/test/tsconfig.json +3 -7
- package/tsconfig.tsbuildinfo +1 -1
- package/dist/migrations/db/migrations/1770037239303-sync-reporting.js +0 -44
- package/dist/migrations/db/migrations/1770037239303-sync-reporting.js.map +0 -1
- package/dist/storage/implementation/MongoPersistedSyncRules.d.ts +0 -10
- package/dist/storage/implementation/MongoPersistedSyncRules.js +0 -17
- package/dist/storage/implementation/MongoPersistedSyncRules.js.map +0 -1
- package/src/migrations/db/migrations/1770037239303-sync-reporting.ts +0 -74
- package/src/storage/implementation/MongoPersistedSyncRules.ts +0 -20
- package/test/src/__snapshots__/storage.test.ts.snap +0 -25
- /package/dist/migrations/db/migrations/{1770037239303-sync-reporting.d.ts → 1770213298299-storage-version.d.ts} +0 -0
|
@@ -2,381 +2,14 @@ import { storage } from '@powersync/service-core';
|
|
|
2
2
|
import { event_types } from '@powersync/service-types';
|
|
3
3
|
import { PowerSyncMongo } from './implementation/db.js';
|
|
4
4
|
import { logger } from '@powersync/lib-services-framework';
|
|
5
|
-
import { createPaginatedConnectionQuery
|
|
6
|
-
import { mongo } from '@powersync/lib-service-mongodb';
|
|
7
|
-
import * as bson from 'bson';
|
|
5
|
+
import { createPaginatedConnectionQuery } from '../utils/util.js';
|
|
8
6
|
|
|
9
7
|
export class MongoReportStorage implements storage.ReportStorage {
|
|
10
8
|
public readonly db: PowerSyncMongo;
|
|
11
|
-
private readonly client: mongo.MongoClient;
|
|
12
|
-
private readonly session: mongo.ClientSession;
|
|
13
9
|
|
|
14
10
|
constructor(db: PowerSyncMongo) {
|
|
15
11
|
this.db = db;
|
|
16
|
-
this.client = db.client;
|
|
17
|
-
this.session = this.client.startSession();
|
|
18
12
|
}
|
|
19
|
-
|
|
20
|
-
private async reportSyncCheckpoint(data: event_types.SyncAnalyticsEventData) {
|
|
21
|
-
const { user_id, client_id, data: checkData, request_streams } = data;
|
|
22
|
-
const { id, type, last_op_id, buckets, date, streams } = checkData;
|
|
23
|
-
await this.session.withTransaction(
|
|
24
|
-
async () => {
|
|
25
|
-
await this.db.sync_report_events.insertOne(
|
|
26
|
-
{
|
|
27
|
-
_id: id,
|
|
28
|
-
user_id,
|
|
29
|
-
client_id,
|
|
30
|
-
type,
|
|
31
|
-
last_op_id,
|
|
32
|
-
date,
|
|
33
|
-
streams,
|
|
34
|
-
request_streams
|
|
35
|
-
},
|
|
36
|
-
{ session: this.session }
|
|
37
|
-
);
|
|
38
|
-
await this.db.bucket_report_events.insertMany(
|
|
39
|
-
buckets.map((bucket: any) => {
|
|
40
|
-
return {
|
|
41
|
-
state: `synced`,
|
|
42
|
-
checkpoint_id: id,
|
|
43
|
-
client_id,
|
|
44
|
-
user_id,
|
|
45
|
-
name: bucket.bucket,
|
|
46
|
-
operations: bucket.count,
|
|
47
|
-
date: date,
|
|
48
|
-
subscriptions: bucket.subscriptions,
|
|
49
|
-
priority: bucket.priority
|
|
50
|
-
};
|
|
51
|
-
})
|
|
52
|
-
);
|
|
53
|
-
},
|
|
54
|
-
{ session: this.session }
|
|
55
|
-
);
|
|
56
|
-
}
|
|
57
|
-
|
|
58
|
-
private async reportSyncDiffCheckpoint(data: event_types.SyncAnalyticsEventData) {
|
|
59
|
-
const { user_id, client_id, data: checkData, request_streams } = data;
|
|
60
|
-
const { id, type, last_op_id, updated_buckets, removed_buckets, date, streams } = checkData;
|
|
61
|
-
await this.session.withTransaction(async () => {
|
|
62
|
-
await this.db.sync_report_events.insertOne(
|
|
63
|
-
{
|
|
64
|
-
_id: id,
|
|
65
|
-
user_id,
|
|
66
|
-
client_id,
|
|
67
|
-
type,
|
|
68
|
-
last_op_id,
|
|
69
|
-
date,
|
|
70
|
-
streams,
|
|
71
|
-
request_streams
|
|
72
|
-
},
|
|
73
|
-
{ session: this.session }
|
|
74
|
-
);
|
|
75
|
-
if (updated_buckets.length > 0) {
|
|
76
|
-
await this.db.bucket_report_events.insertMany(
|
|
77
|
-
updated_buckets.map((bucket: any) => {
|
|
78
|
-
return {
|
|
79
|
-
state: `updated`,
|
|
80
|
-
checkpoint_id: id,
|
|
81
|
-
client_id,
|
|
82
|
-
user_id,
|
|
83
|
-
name: bucket.bucket,
|
|
84
|
-
operations: bucket.count,
|
|
85
|
-
date: date,
|
|
86
|
-
subscriptions: bucket.subscriptions,
|
|
87
|
-
priority: bucket.priority
|
|
88
|
-
};
|
|
89
|
-
}),
|
|
90
|
-
{ session: this.session }
|
|
91
|
-
);
|
|
92
|
-
}
|
|
93
|
-
if (removed_buckets.length > 0) {
|
|
94
|
-
await this.db.bucket_report_events.insertMany(
|
|
95
|
-
removed_buckets.map((bucket: string) => {
|
|
96
|
-
return {
|
|
97
|
-
state: `removed`,
|
|
98
|
-
checkpoint_id: id,
|
|
99
|
-
client_id,
|
|
100
|
-
user_id,
|
|
101
|
-
name: bucket,
|
|
102
|
-
operations: null,
|
|
103
|
-
subscriptions: null,
|
|
104
|
-
date: date,
|
|
105
|
-
priority: null
|
|
106
|
-
};
|
|
107
|
-
}),
|
|
108
|
-
{ session: this.session }
|
|
109
|
-
);
|
|
110
|
-
}
|
|
111
|
-
});
|
|
112
|
-
}
|
|
113
|
-
|
|
114
|
-
private syncBucketFacetPipeline() {
|
|
115
|
-
return {
|
|
116
|
-
$facet: {
|
|
117
|
-
buckets_removed: [
|
|
118
|
-
{ $match: { state: 'removed' } },
|
|
119
|
-
{ $group: { _id: '$checkpoint_id', total: { $sum: 1 } } },
|
|
120
|
-
{
|
|
121
|
-
$project: {
|
|
122
|
-
_id: 0,
|
|
123
|
-
checkpoint_id: '$_id',
|
|
124
|
-
total: 1
|
|
125
|
-
}
|
|
126
|
-
},
|
|
127
|
-
{ $sort: { total: -1 } }
|
|
128
|
-
],
|
|
129
|
-
buckets_updated: [
|
|
130
|
-
{ $match: { state: 'updated' } },
|
|
131
|
-
{ $group: { _id: '$checkpoint_id', total: { $sum: 1 } } },
|
|
132
|
-
{
|
|
133
|
-
$project: {
|
|
134
|
-
_id: 0,
|
|
135
|
-
checkpoint_id: '$_id',
|
|
136
|
-
total: 1
|
|
137
|
-
}
|
|
138
|
-
},
|
|
139
|
-
{ $sort: { total: -1 } }
|
|
140
|
-
],
|
|
141
|
-
buckets_synced: [
|
|
142
|
-
{ $match: { state: 'synced' } },
|
|
143
|
-
{ $group: { _id: '$checkpoint_id', total: { $sum: 1 } } },
|
|
144
|
-
{
|
|
145
|
-
$project: {
|
|
146
|
-
_id: 0,
|
|
147
|
-
checkpoint_id: '$_id',
|
|
148
|
-
total: 1
|
|
149
|
-
}
|
|
150
|
-
},
|
|
151
|
-
{ $sort: { total: -1 } }
|
|
152
|
-
]
|
|
153
|
-
}
|
|
154
|
-
};
|
|
155
|
-
}
|
|
156
|
-
|
|
157
|
-
private syncBucketProjectPipeline() {
|
|
158
|
-
return {
|
|
159
|
-
$project: {
|
|
160
|
-
buckets_removed: 1,
|
|
161
|
-
buckets_updated: 1,
|
|
162
|
-
buckets_synced: 1
|
|
163
|
-
}
|
|
164
|
-
};
|
|
165
|
-
}
|
|
166
|
-
|
|
167
|
-
async getSyncCheckpoint(data: event_types.SyncCheckpointRequest): Promise<event_types.PaginatedResponse<any>> {
|
|
168
|
-
const { cursor, date_range } = data;
|
|
169
|
-
const limit = data?.limit || 100;
|
|
170
|
-
const typeFilter = data.type != null ? { type: data.type } : undefined;
|
|
171
|
-
const date = { date: { $lte: date_range.end, $gte: date_range.start } };
|
|
172
|
-
const user_id = data.user_id != null ? { user_id: data.user_id } : undefined;
|
|
173
|
-
const client_id = data.client_id != null ? { client_id: data.client_id } : undefined;
|
|
174
|
-
return (await createPaginatedSyncCheckpointQuery(
|
|
175
|
-
{
|
|
176
|
-
...client_id,
|
|
177
|
-
...user_id,
|
|
178
|
-
...date,
|
|
179
|
-
...typeFilter
|
|
180
|
-
},
|
|
181
|
-
this.db.sync_report_events,
|
|
182
|
-
limit,
|
|
183
|
-
cursor
|
|
184
|
-
)) as event_types.PaginatedResponse<any>;
|
|
185
|
-
}
|
|
186
|
-
|
|
187
|
-
async getLastSyncReport(data: event_types.LastSyncRequest): Promise<any> {
|
|
188
|
-
const { client_id, user_id } = data;
|
|
189
|
-
const checkpoint = await this.db.sync_report_events.findOne(
|
|
190
|
-
{
|
|
191
|
-
user_id,
|
|
192
|
-
client_id
|
|
193
|
-
},
|
|
194
|
-
{ session: this.session, sort: { date: -1 } }
|
|
195
|
-
);
|
|
196
|
-
|
|
197
|
-
if (!checkpoint) {
|
|
198
|
-
throw new Error('No checkpoint found for the given user_id and client_id');
|
|
199
|
-
}
|
|
200
|
-
|
|
201
|
-
const sync_streams: string[] = checkpoint.streams.map((stream: any) => stream.name);
|
|
202
|
-
const subscriptions: string[] =
|
|
203
|
-
checkpoint.request_streams?.subscriptions.length > 0
|
|
204
|
-
? checkpoint.request_streams.subscriptions.map((stream: any) => stream.name)
|
|
205
|
-
: [];
|
|
206
|
-
|
|
207
|
-
const buckets = await this.db.bucket_report_events
|
|
208
|
-
.aggregate([
|
|
209
|
-
{
|
|
210
|
-
$facet: {
|
|
211
|
-
updated_buckets: [
|
|
212
|
-
{
|
|
213
|
-
$match: {
|
|
214
|
-
checkpoint_id: checkpoint._id,
|
|
215
|
-
state: 'updated'
|
|
216
|
-
}
|
|
217
|
-
},
|
|
218
|
-
{
|
|
219
|
-
$sort: {
|
|
220
|
-
operations: -1
|
|
221
|
-
}
|
|
222
|
-
},
|
|
223
|
-
{
|
|
224
|
-
$limit: 10
|
|
225
|
-
},
|
|
226
|
-
{
|
|
227
|
-
$project: {
|
|
228
|
-
_id: 0,
|
|
229
|
-
name: '$name',
|
|
230
|
-
operations: '$operations',
|
|
231
|
-
subscriptions: '$subscriptions',
|
|
232
|
-
priority: '$priority'
|
|
233
|
-
}
|
|
234
|
-
}
|
|
235
|
-
],
|
|
236
|
-
removed_buckets: [
|
|
237
|
-
{
|
|
238
|
-
$match: {
|
|
239
|
-
checkpoint_id: checkpoint._id,
|
|
240
|
-
state: 'removed'
|
|
241
|
-
}
|
|
242
|
-
},
|
|
243
|
-
{
|
|
244
|
-
$group: {
|
|
245
|
-
_id: null,
|
|
246
|
-
count: { $sum: 1 }
|
|
247
|
-
}
|
|
248
|
-
}
|
|
249
|
-
],
|
|
250
|
-
synced_buckets: [
|
|
251
|
-
{
|
|
252
|
-
$match: {
|
|
253
|
-
checkpoint_id: checkpoint._id,
|
|
254
|
-
state: 'synced'
|
|
255
|
-
}
|
|
256
|
-
},
|
|
257
|
-
{
|
|
258
|
-
$sort: {
|
|
259
|
-
operations: -1
|
|
260
|
-
}
|
|
261
|
-
},
|
|
262
|
-
{
|
|
263
|
-
$limit: 10
|
|
264
|
-
},
|
|
265
|
-
{
|
|
266
|
-
$project: {
|
|
267
|
-
_id: 0,
|
|
268
|
-
name: '$name',
|
|
269
|
-
operations: '$operations',
|
|
270
|
-
subscriptions: '$subscriptions',
|
|
271
|
-
priority: '$priority'
|
|
272
|
-
}
|
|
273
|
-
}
|
|
274
|
-
]
|
|
275
|
-
}
|
|
276
|
-
}
|
|
277
|
-
])
|
|
278
|
-
.toArray();
|
|
279
|
-
|
|
280
|
-
if (buckets.length === 0) {
|
|
281
|
-
throw new Error('No bucket report events found for the given checkpoint');
|
|
282
|
-
}
|
|
283
|
-
|
|
284
|
-
const syncedBuckets =
|
|
285
|
-
buckets[0].synced_buckets.length > 0
|
|
286
|
-
? buckets[0].synced_buckets.map((bucket: any) => {
|
|
287
|
-
const subs = {
|
|
288
|
-
default: [],
|
|
289
|
-
sub: []
|
|
290
|
-
};
|
|
291
|
-
for (const subscription of bucket?.subscriptions ?? []) {
|
|
292
|
-
if ('default' in subscription) {
|
|
293
|
-
// @ts-expect-error
|
|
294
|
-
subs.default.push(sync_streams[subscription.default]);
|
|
295
|
-
}
|
|
296
|
-
|
|
297
|
-
if ('sub' in subscription) {
|
|
298
|
-
// @ts-expect-error
|
|
299
|
-
subs.sub.push(subscriptions[subscription.sub]);
|
|
300
|
-
}
|
|
301
|
-
}
|
|
302
|
-
return {
|
|
303
|
-
name: bucket.name,
|
|
304
|
-
operations: bucket.operations,
|
|
305
|
-
priority: bucket.priority,
|
|
306
|
-
subscriptions: subs
|
|
307
|
-
};
|
|
308
|
-
})
|
|
309
|
-
: [];
|
|
310
|
-
|
|
311
|
-
const updatedBuckets =
|
|
312
|
-
buckets[0].updated_buckets.length > 0
|
|
313
|
-
? buckets[0].updated_buckets.map((bucket: any) => {
|
|
314
|
-
const subs = {
|
|
315
|
-
default: [],
|
|
316
|
-
sub: []
|
|
317
|
-
};
|
|
318
|
-
for (const subscription of bucket?.subscriptions ?? []) {
|
|
319
|
-
if ('default' in subscription) {
|
|
320
|
-
// @ts-expect-error
|
|
321
|
-
subs.default.push(sync_streams[subscription.default]);
|
|
322
|
-
}
|
|
323
|
-
|
|
324
|
-
if ('sub' in subscription) {
|
|
325
|
-
// @ts-expect-error
|
|
326
|
-
subs.sub.push(subscriptions[subscription.sub]);
|
|
327
|
-
}
|
|
328
|
-
}
|
|
329
|
-
return {
|
|
330
|
-
name: bucket.name,
|
|
331
|
-
operations: bucket.operations,
|
|
332
|
-
priority: bucket.priority,
|
|
333
|
-
subscriptions: subs
|
|
334
|
-
};
|
|
335
|
-
})
|
|
336
|
-
: [];
|
|
337
|
-
|
|
338
|
-
return {
|
|
339
|
-
checkpoint_id: checkpoint._id,
|
|
340
|
-
last_op_id: checkpoint.last_op_id,
|
|
341
|
-
date: checkpoint.date,
|
|
342
|
-
streams: sync_streams,
|
|
343
|
-
subscriptions,
|
|
344
|
-
updated_buckets: updatedBuckets,
|
|
345
|
-
removed_buckets: buckets[0].removed_buckets.length > 0 ? buckets[0].removed_buckets[0].count : 0,
|
|
346
|
-
synced_buckets: syncedBuckets
|
|
347
|
-
};
|
|
348
|
-
}
|
|
349
|
-
|
|
350
|
-
async getSyncBucketStats(data: event_types.SyncBucketStatsRequest): Promise<any> {
|
|
351
|
-
const { date_range } = data;
|
|
352
|
-
const user_id = data.user_id != null ? { user_id: data.user_id } : undefined;
|
|
353
|
-
const client_id = data.client_id != null ? { client_id: data.client_id } : undefined;
|
|
354
|
-
const checkpoint_id =
|
|
355
|
-
data.checkpoint_id != null ? { checkpoint_id: new bson.ObjectId(data.checkpoint_id) } : undefined;
|
|
356
|
-
const result = await this.db.bucket_report_events
|
|
357
|
-
.aggregate([
|
|
358
|
-
{
|
|
359
|
-
$match: {
|
|
360
|
-
...client_id,
|
|
361
|
-
...user_id,
|
|
362
|
-
...checkpoint_id,
|
|
363
|
-
date: { $lte: date_range.end, $gte: date_range.start }
|
|
364
|
-
}
|
|
365
|
-
},
|
|
366
|
-
this.syncBucketFacetPipeline(),
|
|
367
|
-
this.syncBucketProjectPipeline()
|
|
368
|
-
])
|
|
369
|
-
.toArray();
|
|
370
|
-
return result[0];
|
|
371
|
-
}
|
|
372
|
-
|
|
373
|
-
async reportSyncAnalyticsEvent(data: event_types.SyncAnalyticsEventData): Promise<void> {
|
|
374
|
-
if (data.data.type === event_types.SyncEventCheckpointType.FULL) {
|
|
375
|
-
return this.reportSyncCheckpoint(data);
|
|
376
|
-
}
|
|
377
|
-
return this.reportSyncDiffCheckpoint(data);
|
|
378
|
-
}
|
|
379
|
-
|
|
380
13
|
async deleteOldConnectionData(data: event_types.DeleteOldConnectionData): Promise<void> {
|
|
381
14
|
const { date } = data;
|
|
382
15
|
const result = await this.db.connection_report_events.deleteMany({
|
|
@@ -393,7 +26,9 @@ export class MongoReportStorage implements storage.ReportStorage {
|
|
|
393
26
|
}
|
|
394
27
|
}
|
|
395
28
|
|
|
396
|
-
async getClientConnectionReports(
|
|
29
|
+
async getClientConnectionReports(
|
|
30
|
+
data: event_types.ClientConnectionReportRequest
|
|
31
|
+
): Promise<event_types.ClientConnectionReportResponse> {
|
|
397
32
|
const { start, end } = data;
|
|
398
33
|
const result = await this.db.connection_report_events
|
|
399
34
|
.aggregate<event_types.ClientConnectionReportResponse>([
|
|
@@ -13,6 +13,7 @@ import {
|
|
|
13
13
|
PartialOrFullChecksum
|
|
14
14
|
} from '@powersync/service-core';
|
|
15
15
|
import { PowerSyncMongo } from './db.js';
|
|
16
|
+
import { StorageConfig } from './models.js';
|
|
16
17
|
|
|
17
18
|
/**
|
|
18
19
|
* Checksum calculation options, primarily for tests.
|
|
@@ -27,6 +28,8 @@ export interface MongoChecksumOptions {
|
|
|
27
28
|
* Limit on the number of documents to calculate a checksum on at a time.
|
|
28
29
|
*/
|
|
29
30
|
operationBatchLimit?: number;
|
|
31
|
+
|
|
32
|
+
storageConfig: StorageConfig;
|
|
30
33
|
}
|
|
31
34
|
|
|
32
35
|
const DEFAULT_BUCKET_BATCH_LIMIT = 200;
|
|
@@ -43,12 +46,15 @@ const DEFAULT_OPERATION_BATCH_LIMIT = 50_000;
|
|
|
43
46
|
*/
|
|
44
47
|
export class MongoChecksums {
|
|
45
48
|
private _cache: ChecksumCache | undefined;
|
|
49
|
+
private readonly storageConfig: StorageConfig;
|
|
46
50
|
|
|
47
51
|
constructor(
|
|
48
52
|
private db: PowerSyncMongo,
|
|
49
53
|
private group_id: number,
|
|
50
|
-
private options
|
|
51
|
-
) {
|
|
54
|
+
private options: MongoChecksumOptions
|
|
55
|
+
) {
|
|
56
|
+
this.storageConfig = options.storageConfig;
|
|
57
|
+
}
|
|
52
58
|
|
|
53
59
|
/**
|
|
54
60
|
* Lazy-instantiated cache.
|
|
@@ -222,6 +228,11 @@ export class MongoChecksums {
|
|
|
222
228
|
});
|
|
223
229
|
}
|
|
224
230
|
|
|
231
|
+
// Historically, checksum may be stored as 'int' or 'double'.
|
|
232
|
+
// More recently, this should be a 'long'.
|
|
233
|
+
// $toLong ensures that we always sum it as a long, avoiding inaccuracies in the calculations.
|
|
234
|
+
const checksumLong = this.storageConfig.longChecksums ? '$checksum' : { $toLong: '$checksum' };
|
|
235
|
+
|
|
225
236
|
// Aggregate over a max of `batchLimit` operations at a time.
|
|
226
237
|
// Let's say we have 3 buckets (A, B, C), each with 10 operations, and our batch limit is 12.
|
|
227
238
|
// Then we'll do three batches:
|
|
@@ -245,10 +256,7 @@ export class MongoChecksums {
|
|
|
245
256
|
{
|
|
246
257
|
$group: {
|
|
247
258
|
_id: '$_id.b',
|
|
248
|
-
|
|
249
|
-
// More recently, this should be a 'long'.
|
|
250
|
-
// $toLong ensures that we always sum it as a long, avoiding inaccuracies in the calculations.
|
|
251
|
-
checksum_total: { $sum: { $toLong: '$checksum' } },
|
|
259
|
+
checksum_total: { $sum: checksumLong },
|
|
252
260
|
count: { $sum: 1 },
|
|
253
261
|
has_clear_op: {
|
|
254
262
|
$max: {
|
|
@@ -62,6 +62,7 @@ const DEFAULT_CLEAR_BATCH_LIMIT = 5000;
|
|
|
62
62
|
const DEFAULT_MOVE_BATCH_LIMIT = 2000;
|
|
63
63
|
const DEFAULT_MOVE_BATCH_QUERY_LIMIT = 10_000;
|
|
64
64
|
const DEFAULT_MIN_BUCKET_CHANGES = 10;
|
|
65
|
+
const DEFAULT_MIN_CHANGE_RATIO = 0.1;
|
|
65
66
|
|
|
66
67
|
/** This default is primarily for tests. */
|
|
67
68
|
const DEFAULT_MEMORY_LIMIT_MB = 64;
|
|
@@ -75,6 +76,7 @@ export class MongoCompactor {
|
|
|
75
76
|
private moveBatchQueryLimit: number;
|
|
76
77
|
private clearBatchLimit: number;
|
|
77
78
|
private minBucketChanges: number;
|
|
79
|
+
private minChangeRatio: number;
|
|
78
80
|
private maxOpId: bigint;
|
|
79
81
|
private buckets: string[] | undefined;
|
|
80
82
|
private signal?: AbortSignal;
|
|
@@ -91,6 +93,7 @@ export class MongoCompactor {
|
|
|
91
93
|
this.moveBatchQueryLimit = options?.moveBatchQueryLimit ?? DEFAULT_MOVE_BATCH_QUERY_LIMIT;
|
|
92
94
|
this.clearBatchLimit = options?.clearBatchLimit ?? DEFAULT_CLEAR_BATCH_LIMIT;
|
|
93
95
|
this.minBucketChanges = options?.minBucketChanges ?? DEFAULT_MIN_BUCKET_CHANGES;
|
|
96
|
+
this.minChangeRatio = options?.minChangeRatio ?? DEFAULT_MIN_CHANGE_RATIO;
|
|
94
97
|
this.maxOpId = options?.maxOpId ?? 0n;
|
|
95
98
|
this.buckets = options?.compactBuckets;
|
|
96
99
|
this.signal = options?.signal;
|
|
@@ -115,27 +118,19 @@ export class MongoCompactor {
|
|
|
115
118
|
}
|
|
116
119
|
|
|
117
120
|
private async compactDirtyBuckets() {
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
let recentlyCompacted: string[] = [];
|
|
125
|
-
const buckets = await this.dirtyBucketBatch({
|
|
126
|
-
minBucketChanges: this.minBucketChanges,
|
|
127
|
-
exclude: recentlyCompacted
|
|
128
|
-
});
|
|
129
|
-
if (buckets.length == 0) {
|
|
130
|
-
// All done
|
|
121
|
+
for await (let buckets of this.dirtyBucketBatches({
|
|
122
|
+
minBucketChanges: this.minBucketChanges,
|
|
123
|
+
minChangeRatio: this.minChangeRatio
|
|
124
|
+
})) {
|
|
125
|
+
if (this.signal?.aborted) {
|
|
131
126
|
break;
|
|
132
127
|
}
|
|
128
|
+
if (buckets.length == 0) {
|
|
129
|
+
continue;
|
|
130
|
+
}
|
|
131
|
+
|
|
133
132
|
for (let { bucket } of buckets) {
|
|
134
133
|
await this.compactSingleBucket(bucket);
|
|
135
|
-
recentlyCompacted.push(bucket);
|
|
136
|
-
}
|
|
137
|
-
if (recentlyCompacted.length > TRACK_RECENTLY_COMPACTED_NUMBER) {
|
|
138
|
-
recentlyCompacted = recentlyCompacted.slice(-TRACK_RECENTLY_COMPACTED_NUMBER);
|
|
139
134
|
}
|
|
140
135
|
}
|
|
141
136
|
}
|
|
@@ -491,13 +486,13 @@ export class MongoCompactor {
|
|
|
491
486
|
async populateChecksums(options: { minBucketChanges: number }): Promise<PopulateChecksumCacheResults> {
|
|
492
487
|
let count = 0;
|
|
493
488
|
while (!this.signal?.aborted) {
|
|
494
|
-
const buckets = await this.
|
|
495
|
-
if (buckets.length == 0) {
|
|
489
|
+
const buckets = await this.dirtyBucketBatchForChecksums(options);
|
|
490
|
+
if (buckets.length == 0 || this.signal?.aborted) {
|
|
496
491
|
// All done
|
|
497
492
|
break;
|
|
498
493
|
}
|
|
494
|
+
|
|
499
495
|
const start = Date.now();
|
|
500
|
-
logger.info(`Calculating checksums for batch of ${buckets.length} buckets`);
|
|
501
496
|
|
|
502
497
|
// Filter batch by estimated bucket size, to reduce possibility of timeouts
|
|
503
498
|
let checkBuckets: typeof buckets = [];
|
|
@@ -509,22 +504,97 @@ export class MongoCompactor {
|
|
|
509
504
|
break;
|
|
510
505
|
}
|
|
511
506
|
}
|
|
507
|
+
logger.info(
|
|
508
|
+
`Calculating checksums for batch of ${buckets.length} buckets, estimated count of ${totalCountEstimate}`
|
|
509
|
+
);
|
|
512
510
|
await this.updateChecksumsBatch(checkBuckets.map((b) => b.bucket));
|
|
513
511
|
logger.info(`Updated checksums for batch of ${checkBuckets.length} buckets in ${Date.now() - start}ms`);
|
|
514
|
-
count +=
|
|
512
|
+
count += checkBuckets.length;
|
|
515
513
|
}
|
|
516
514
|
return { buckets: count };
|
|
517
515
|
}
|
|
518
516
|
|
|
517
|
+
/**
|
|
518
|
+
* Return batches of dirty buckets.
|
|
519
|
+
*
|
|
520
|
+
* Can be used to iterate through all buckets.
|
|
521
|
+
*
|
|
522
|
+
* minBucketChanges: minimum number of changes for a bucket to be included in the results.
|
|
523
|
+
* minChangeRatio: minimum ratio of changes to total ops for a bucket to be included in the results, number between 0 and 1.
|
|
524
|
+
*/
|
|
525
|
+
private async *dirtyBucketBatches(options: {
|
|
526
|
+
minBucketChanges: number;
|
|
527
|
+
minChangeRatio: number;
|
|
528
|
+
}): AsyncGenerator<{ bucket: string; estimatedCount: number }[]> {
|
|
529
|
+
// Previously, we used an index on {_id.g: 1, estimate_since_compact.count: 1} to only buckets with changes.
|
|
530
|
+
// This works well if there are only a small number of buckets with changes.
|
|
531
|
+
// However, if buckets are continuosly modified while we are compacting, we get the same buckets over and over again.
|
|
532
|
+
// This has caused the compact process to re-read the same collection around 5x times in total, which is very inefficient.
|
|
533
|
+
// To solve this, we now just iterate through all buckets, and filter out the ones with low changes.
|
|
534
|
+
|
|
535
|
+
if (options.minBucketChanges <= 0) {
|
|
536
|
+
throw new ReplicationAssertionError('minBucketChanges must be >= 1');
|
|
537
|
+
}
|
|
538
|
+
let lastId = { g: this.group_id, b: new mongo.MinKey() as any };
|
|
539
|
+
const maxId = { g: this.group_id, b: new mongo.MaxKey() as any };
|
|
540
|
+
while (true) {
|
|
541
|
+
const batch = await this.db.bucket_state
|
|
542
|
+
.find(
|
|
543
|
+
{
|
|
544
|
+
_id: { $gt: lastId, $lt: maxId },
|
|
545
|
+
'estimate_since_compact.count': { $gte: options.minBucketChanges }
|
|
546
|
+
},
|
|
547
|
+
{
|
|
548
|
+
projection: {
|
|
549
|
+
_id: 1,
|
|
550
|
+
estimate_since_compact: 1,
|
|
551
|
+
compacted_state: 1
|
|
552
|
+
},
|
|
553
|
+
sort: {
|
|
554
|
+
_id: 1
|
|
555
|
+
},
|
|
556
|
+
limit: 2000,
|
|
557
|
+
maxTimeMS: MONGO_OPERATION_TIMEOUT_MS
|
|
558
|
+
}
|
|
559
|
+
)
|
|
560
|
+
.toArray();
|
|
561
|
+
if (batch.length == 0) {
|
|
562
|
+
break;
|
|
563
|
+
}
|
|
564
|
+
lastId = batch[batch.length - 1]._id;
|
|
565
|
+
const mapped = batch.map((b) => {
|
|
566
|
+
const updatedCount = b.estimate_since_compact?.count ?? 0;
|
|
567
|
+
const totalCount = (b.compacted_state?.count ?? 0) + updatedCount;
|
|
568
|
+
const updatedBytes = b.estimate_since_compact?.bytes ?? 0;
|
|
569
|
+
const totalBytes = (b.compacted_state?.bytes ?? 0) + updatedBytes;
|
|
570
|
+
const dirtyChangeNumber = totalCount > 0 ? updatedCount / totalCount : 0;
|
|
571
|
+
const dirtyChangeBytes = totalBytes > 0 ? updatedBytes / totalBytes : 0;
|
|
572
|
+
return {
|
|
573
|
+
bucket: b._id.b,
|
|
574
|
+
estimatedCount: totalCount,
|
|
575
|
+
dirtyRatio: Math.max(dirtyChangeNumber, dirtyChangeBytes)
|
|
576
|
+
};
|
|
577
|
+
});
|
|
578
|
+
const filtered = mapped.filter(
|
|
579
|
+
(b) => b.estimatedCount >= options.minBucketChanges && b.dirtyRatio >= options.minChangeRatio
|
|
580
|
+
);
|
|
581
|
+
yield filtered;
|
|
582
|
+
}
|
|
583
|
+
}
|
|
584
|
+
|
|
519
585
|
/**
|
|
520
586
|
* Returns a batch of dirty buckets - buckets with most changes first.
|
|
521
587
|
*
|
|
522
588
|
* This cannot be used to iterate on its own - the client is expected to process these buckets and
|
|
523
589
|
* set estimate_since_compact.count: 0 when done, before fetching the next batch.
|
|
590
|
+
*
|
|
591
|
+
* Unlike dirtyBucketBatches, used for compacting, this is specifically designed to be resuamble after a restart,
|
|
592
|
+
* since it is used as the last step for initial replication.
|
|
593
|
+
*
|
|
594
|
+
* We currently don't get new data while doing populateChecksums, so we don't need to worry about buckets changing while processing.
|
|
524
595
|
*/
|
|
525
|
-
private async
|
|
596
|
+
private async dirtyBucketBatchForChecksums(options: {
|
|
526
597
|
minBucketChanges: number;
|
|
527
|
-
exclude?: string[];
|
|
528
598
|
}): Promise<{ bucket: string; estimatedCount: number }[]> {
|
|
529
599
|
if (options.minBucketChanges <= 0) {
|
|
530
600
|
throw new ReplicationAssertionError('minBucketChanges must be >= 1');
|
|
@@ -534,8 +604,7 @@ export class MongoCompactor {
|
|
|
534
604
|
.find(
|
|
535
605
|
{
|
|
536
606
|
'_id.g': this.group_id,
|
|
537
|
-
'estimate_since_compact.count': { $gte: options.minBucketChanges }
|
|
538
|
-
'_id.b': { $nin: options.exclude ?? [] }
|
|
607
|
+
'estimate_since_compact.count': { $gte: options.minBucketChanges }
|
|
539
608
|
},
|
|
540
609
|
{
|
|
541
610
|
projection: {
|