@powersync/service-module-mongodb-storage 0.0.0-dev-20250901073220 → 0.0.0-dev-20250903064005
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +14 -5
- package/dist/migrations/db/migrations/1741697235857-bucket-state-index.js +1 -4
- package/dist/migrations/db/migrations/1741697235857-bucket-state-index.js.map +1 -1
- package/dist/storage/MongoBucketStorage.d.ts +3 -2
- package/dist/storage/MongoBucketStorage.js +4 -2
- package/dist/storage/MongoBucketStorage.js.map +1 -1
- package/dist/storage/MongoReportStorage.d.ts +0 -1
- package/dist/storage/MongoReportStorage.js +0 -2
- package/dist/storage/MongoReportStorage.js.map +1 -1
- package/dist/storage/implementation/MongoChecksums.d.ts +45 -13
- package/dist/storage/implementation/MongoChecksums.js +148 -135
- package/dist/storage/implementation/MongoChecksums.js.map +1 -1
- package/dist/storage/implementation/MongoCompactor.js +23 -21
- package/dist/storage/implementation/MongoCompactor.js.map +1 -1
- package/dist/storage/implementation/MongoSyncBucketStorage.d.ts +5 -2
- package/dist/storage/implementation/MongoSyncBucketStorage.js +3 -3
- package/dist/storage/implementation/MongoSyncBucketStorage.js.map +1 -1
- package/dist/storage/implementation/db.d.ts +4 -0
- package/dist/storage/implementation/db.js +10 -1
- package/dist/storage/implementation/db.js.map +1 -1
- package/dist/storage/implementation/models.d.ts +4 -0
- package/dist/utils/test-utils.d.ts +2 -0
- package/dist/utils/test-utils.js +1 -1
- package/dist/utils/test-utils.js.map +1 -1
- package/package.json +6 -6
- package/src/migrations/db/migrations/1741697235857-bucket-state-index.ts +1 -7
- package/src/storage/MongoBucketStorage.ts +4 -3
- package/src/storage/MongoReportStorage.ts +0 -3
- package/src/storage/implementation/MongoChecksums.ts +172 -150
- package/src/storage/implementation/MongoCompactor.ts +23 -21
- package/src/storage/implementation/MongoSyncBucketStorage.ts +9 -4
- package/src/storage/implementation/db.ts +14 -1
- package/src/storage/implementation/models.ts +4 -0
- package/src/utils/test-utils.ts +3 -1
- package/test/src/__snapshots__/storage.test.ts.snap +17 -1
- package/test/src/connection-report-storage.test.ts +17 -17
- package/test/src/storage.test.ts +38 -1
- package/test/src/storage_compacting.test.ts +120 -5
- package/tsconfig.tsbuildinfo +1 -1
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import * as lib_mongo from '@powersync/lib-service-mongodb';
|
|
1
2
|
import {
|
|
2
3
|
addPartialChecksums,
|
|
3
4
|
bson,
|
|
@@ -11,27 +12,51 @@ import {
|
|
|
11
12
|
PartialChecksumMap,
|
|
12
13
|
PartialOrFullChecksum
|
|
13
14
|
} from '@powersync/service-core';
|
|
14
|
-
import * as lib_mongo from '@powersync/lib-service-mongodb';
|
|
15
|
-
import { logger } from '@powersync/lib-services-framework';
|
|
16
15
|
import { PowerSyncMongo } from './db.js';
|
|
17
16
|
|
|
17
|
+
/**
|
|
18
|
+
* Checksum calculation options, primarily for tests.
|
|
19
|
+
*/
|
|
20
|
+
export interface MongoChecksumOptions {
|
|
21
|
+
/**
|
|
22
|
+
* How many buckets to process in a batch when calculating checksums.
|
|
23
|
+
*/
|
|
24
|
+
bucketBatchLimit?: number;
|
|
25
|
+
|
|
26
|
+
/**
|
|
27
|
+
* Limit on the number of documents to calculate a checksum on at a time.
|
|
28
|
+
*/
|
|
29
|
+
operationBatchLimit?: number;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
const DEFAULT_BUCKET_BATCH_LIMIT = 200;
|
|
33
|
+
const DEFAULT_OPERATION_BATCH_LIMIT = 50_000;
|
|
34
|
+
|
|
18
35
|
/**
|
|
19
36
|
* Checksum query implementation.
|
|
37
|
+
*
|
|
38
|
+
* General implementation flow is:
|
|
39
|
+
* 1. getChecksums() -> check cache for (partial) matches. If not found or partial match, query the remainder using computePartialChecksums().
|
|
40
|
+
* 2. computePartialChecksums() -> query bucket_state for partial matches. Query the remainder using computePartialChecksumsDirect().
|
|
41
|
+
* 3. computePartialChecksumsDirect() -> split into batches of 200 buckets at a time -> computePartialChecksumsInternal()
|
|
42
|
+
* 4. computePartialChecksumsInternal() -> aggregate over 50_000 operations in bucket_data at a time
|
|
20
43
|
*/
|
|
21
44
|
export class MongoChecksums {
|
|
22
45
|
private cache = new ChecksumCache({
|
|
23
46
|
fetchChecksums: (batch) => {
|
|
24
|
-
return this.
|
|
47
|
+
return this.computePartialChecksums(batch);
|
|
25
48
|
}
|
|
26
49
|
});
|
|
27
50
|
|
|
28
51
|
constructor(
|
|
29
52
|
private db: PowerSyncMongo,
|
|
30
|
-
private group_id: number
|
|
53
|
+
private group_id: number,
|
|
54
|
+
private options?: MongoChecksumOptions
|
|
31
55
|
) {}
|
|
32
56
|
|
|
33
57
|
/**
|
|
34
|
-
* Calculate checksums, utilizing the cache
|
|
58
|
+
* Calculate checksums, utilizing the cache for partial checkums, and querying the remainder from
|
|
59
|
+
* the database (bucket_state + bucket_data).
|
|
35
60
|
*/
|
|
36
61
|
async getChecksums(checkpoint: InternalOpId, buckets: string[]): Promise<ChecksumMap> {
|
|
37
62
|
return this.cache.getChecksumMap(checkpoint, buckets);
|
|
@@ -42,11 +67,15 @@ export class MongoChecksums {
|
|
|
42
67
|
}
|
|
43
68
|
|
|
44
69
|
/**
|
|
45
|
-
* Calculate (partial) checksums from bucket_state and
|
|
70
|
+
* Calculate (partial) checksums from bucket_state (pre-aggregated) and bucket_data (individual operations).
|
|
71
|
+
*
|
|
72
|
+
* Results are not cached here. This method is only called by {@link ChecksumCache.getChecksumMap},
|
|
73
|
+
* which is responsible for caching its result.
|
|
46
74
|
*
|
|
47
|
-
*
|
|
75
|
+
* As long as data is compacted regularly, this should be fast. Large buckets without pre-compacted bucket_state
|
|
76
|
+
* can be slow.
|
|
48
77
|
*/
|
|
49
|
-
private async
|
|
78
|
+
private async computePartialChecksums(batch: FetchPartialBucketChecksum[]): Promise<PartialChecksumMap> {
|
|
50
79
|
if (batch.length == 0) {
|
|
51
80
|
return new Map();
|
|
52
81
|
}
|
|
@@ -100,7 +129,7 @@ export class MongoChecksums {
|
|
|
100
129
|
};
|
|
101
130
|
});
|
|
102
131
|
|
|
103
|
-
const queriedChecksums = await this.
|
|
132
|
+
const queriedChecksums = await this.computePartialChecksumsDirect(mappedRequests);
|
|
104
133
|
|
|
105
134
|
return new Map<string, PartialOrFullChecksum>(
|
|
106
135
|
batch.map((request) => {
|
|
@@ -117,61 +146,150 @@ export class MongoChecksums {
|
|
|
117
146
|
}
|
|
118
147
|
|
|
119
148
|
/**
|
|
120
|
-
* Calculate (partial) checksums from the data collection directly.
|
|
149
|
+
* Calculate (partial) checksums from the data collection directly, bypassing the cache and bucket_state.
|
|
150
|
+
*
|
|
151
|
+
* Can be used directly in cases where the cache should be bypassed, such as from a compact job.
|
|
152
|
+
*
|
|
153
|
+
* Internally, we do calculations in smaller batches of buckets as appropriate.
|
|
154
|
+
*
|
|
155
|
+
* For large buckets, this can be slow, but should not time out as the underlying queries are performed in
|
|
156
|
+
* smaller batches.
|
|
121
157
|
*/
|
|
122
|
-
async
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
158
|
+
public async computePartialChecksumsDirect(batch: FetchPartialBucketChecksum[]): Promise<PartialChecksumMap> {
|
|
159
|
+
// Limit the number of buckets we query for at a time.
|
|
160
|
+
const bucketBatchLimit = this.options?.bucketBatchLimit ?? DEFAULT_BUCKET_BATCH_LIMIT;
|
|
161
|
+
|
|
162
|
+
if (batch.length < bucketBatchLimit) {
|
|
163
|
+
// Single batch - no need for splitting the batch and merging results
|
|
164
|
+
return await this.computePartialChecksumsInternal(batch);
|
|
165
|
+
}
|
|
166
|
+
// Split the batch and merge results
|
|
167
|
+
let results = new Map<string, PartialOrFullChecksum>();
|
|
168
|
+
for (let i = 0; i < batch.length; i += bucketBatchLimit) {
|
|
169
|
+
const bucketBatch = batch.slice(i, i + bucketBatchLimit);
|
|
170
|
+
const batchResults = await this.computePartialChecksumsInternal(bucketBatch);
|
|
171
|
+
for (let r of batchResults.values()) {
|
|
172
|
+
results.set(r.bucket, r);
|
|
130
173
|
}
|
|
131
|
-
throw lib_mongo.mapQueryError(e, 'while reading checksums');
|
|
132
174
|
}
|
|
175
|
+
return results;
|
|
133
176
|
}
|
|
134
177
|
|
|
135
|
-
|
|
136
|
-
|
|
178
|
+
/**
|
|
179
|
+
* Query a batch of checksums.
|
|
180
|
+
*
|
|
181
|
+
* We limit the number of operations that the query aggregates in each sub-batch, to avoid potential query timeouts.
|
|
182
|
+
*
|
|
183
|
+
* `batch` must be limited to DEFAULT_BUCKET_BATCH_LIMIT buckets before calling this.
|
|
184
|
+
*/
|
|
185
|
+
private async computePartialChecksumsInternal(batch: FetchPartialBucketChecksum[]): Promise<PartialChecksumMap> {
|
|
186
|
+
const batchLimit = this.options?.operationBatchLimit ?? DEFAULT_OPERATION_BATCH_LIMIT;
|
|
187
|
+
|
|
188
|
+
// Map requests by bucket. We adjust this as we get partial results.
|
|
189
|
+
let requests = new Map<string, FetchPartialBucketChecksum>();
|
|
137
190
|
for (let request of batch) {
|
|
138
|
-
|
|
139
|
-
_id: {
|
|
140
|
-
$gt: {
|
|
141
|
-
g: this.group_id,
|
|
142
|
-
b: request.bucket,
|
|
143
|
-
o: request.start ?? new bson.MinKey()
|
|
144
|
-
},
|
|
145
|
-
$lte: {
|
|
146
|
-
g: this.group_id,
|
|
147
|
-
b: request.bucket,
|
|
148
|
-
o: request.end
|
|
149
|
-
}
|
|
150
|
-
}
|
|
151
|
-
});
|
|
191
|
+
requests.set(request.bucket, request);
|
|
152
192
|
}
|
|
153
193
|
|
|
154
|
-
const
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
194
|
+
const partialChecksums = new Map<string, PartialOrFullChecksum>();
|
|
195
|
+
|
|
196
|
+
while (requests.size > 0) {
|
|
197
|
+
const filters: any[] = [];
|
|
198
|
+
for (let request of requests.values()) {
|
|
199
|
+
filters.push({
|
|
200
|
+
_id: {
|
|
201
|
+
$gt: {
|
|
202
|
+
g: this.group_id,
|
|
203
|
+
b: request.bucket,
|
|
204
|
+
o: request.start ?? new bson.MinKey()
|
|
205
|
+
},
|
|
206
|
+
$lte: {
|
|
207
|
+
g: this.group_id,
|
|
208
|
+
b: request.bucket,
|
|
209
|
+
o: request.end
|
|
160
210
|
}
|
|
161
|
-
}
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
{ session: undefined, readConcern: 'snapshot', maxTimeMS: lib_mongo.MONGO_CHECKSUM_TIMEOUT_MS }
|
|
165
|
-
)
|
|
166
|
-
// Don't map the error here - we want to keep timeout errors as-is
|
|
167
|
-
.toArray();
|
|
211
|
+
}
|
|
212
|
+
});
|
|
213
|
+
}
|
|
168
214
|
|
|
169
|
-
|
|
170
|
-
|
|
215
|
+
// Aggregate over a max of `batchLimit` operations at a time.
|
|
216
|
+
// Let's say we have 3 buckets (A, B, C), each with 10 operations, and our batch limit is 12.
|
|
217
|
+
// Then we'll do three batches:
|
|
218
|
+
// 1. Query: A[1-end], B[1-end], C[1-end]
|
|
219
|
+
// Returns: A[1-10], B[1-2]
|
|
220
|
+
// 2. Query: B[3-end], C[1-end]
|
|
221
|
+
// Returns: B[3-10], C[1-4]
|
|
222
|
+
// 3. Query: C[5-end]
|
|
223
|
+
// Returns: C[5-10]
|
|
224
|
+
const aggregate = await this.db.bucket_data
|
|
225
|
+
.aggregate(
|
|
226
|
+
[
|
|
227
|
+
{
|
|
228
|
+
$match: {
|
|
229
|
+
$or: filters
|
|
230
|
+
}
|
|
231
|
+
},
|
|
232
|
+
// sort and limit _before_ grouping
|
|
233
|
+
{ $sort: { _id: 1 } },
|
|
234
|
+
{ $limit: batchLimit },
|
|
235
|
+
{
|
|
236
|
+
$group: {
|
|
237
|
+
_id: '$_id.b',
|
|
238
|
+
// Historically, checksum may be stored as 'int' or 'double'.
|
|
239
|
+
// More recently, this should be a 'long'.
|
|
240
|
+
// $toLong ensures that we always sum it as a long, avoiding inaccuracies in the calculations.
|
|
241
|
+
checksum_total: { $sum: { $toLong: '$checksum' } },
|
|
242
|
+
count: { $sum: 1 },
|
|
243
|
+
has_clear_op: {
|
|
244
|
+
$max: {
|
|
245
|
+
$cond: [{ $eq: ['$op', 'CLEAR'] }, 1, 0]
|
|
246
|
+
}
|
|
247
|
+
},
|
|
248
|
+
last_op: { $max: '$_id.o' }
|
|
249
|
+
}
|
|
250
|
+
}
|
|
251
|
+
],
|
|
252
|
+
{ session: undefined, readConcern: 'snapshot', maxTimeMS: lib_mongo.MONGO_CHECKSUM_TIMEOUT_MS }
|
|
253
|
+
)
|
|
254
|
+
.toArray()
|
|
255
|
+
.catch((e) => {
|
|
256
|
+
throw lib_mongo.mapQueryError(e, 'while reading checksums');
|
|
257
|
+
});
|
|
258
|
+
|
|
259
|
+
let batchCount = 0;
|
|
260
|
+
let limitReached = false;
|
|
261
|
+
for (let doc of aggregate) {
|
|
171
262
|
const bucket = doc._id;
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
263
|
+
const checksum = checksumFromAggregate(doc);
|
|
264
|
+
|
|
265
|
+
const existing = partialChecksums.get(bucket);
|
|
266
|
+
if (existing != null) {
|
|
267
|
+
partialChecksums.set(bucket, addPartialChecksums(bucket, existing, checksum));
|
|
268
|
+
} else {
|
|
269
|
+
partialChecksums.set(bucket, checksum);
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
batchCount += doc.count;
|
|
273
|
+
if (batchCount == batchLimit) {
|
|
274
|
+
// Limit reached. Request more in the next batch.
|
|
275
|
+
// Note that this only affects the _last_ bucket in a batch.
|
|
276
|
+
limitReached = true;
|
|
277
|
+
const req = requests.get(bucket);
|
|
278
|
+
requests.set(bucket, {
|
|
279
|
+
bucket,
|
|
280
|
+
start: doc.last_op,
|
|
281
|
+
end: req!.end
|
|
282
|
+
});
|
|
283
|
+
} else {
|
|
284
|
+
// All done for this bucket
|
|
285
|
+
requests.delete(bucket);
|
|
286
|
+
}
|
|
287
|
+
batchCount++;
|
|
288
|
+
}
|
|
289
|
+
if (!limitReached) {
|
|
290
|
+
break;
|
|
291
|
+
}
|
|
292
|
+
}
|
|
175
293
|
|
|
176
294
|
return new Map<string, PartialOrFullChecksum>(
|
|
177
295
|
batch.map((request) => {
|
|
@@ -197,106 +315,10 @@ export class MongoChecksums {
|
|
|
197
315
|
})
|
|
198
316
|
);
|
|
199
317
|
}
|
|
200
|
-
|
|
201
|
-
/**
|
|
202
|
-
* Checksums for large buckets can run over the query timeout.
|
|
203
|
-
* To avoid this, we query in batches.
|
|
204
|
-
* This version can handle larger amounts of data, but is slower, especially for many buckets.
|
|
205
|
-
*/
|
|
206
|
-
async queryPartialChecksumsFallback(batch: FetchPartialBucketChecksum[]): Promise<PartialChecksumMap> {
|
|
207
|
-
const partialChecksums = new Map<string, PartialOrFullChecksum>();
|
|
208
|
-
for (let request of batch) {
|
|
209
|
-
const checksum = await this.slowChecksum(request);
|
|
210
|
-
partialChecksums.set(request.bucket, checksum);
|
|
211
|
-
}
|
|
212
|
-
|
|
213
|
-
return partialChecksums;
|
|
214
|
-
}
|
|
215
|
-
|
|
216
|
-
private async slowChecksum(request: FetchPartialBucketChecksum): Promise<PartialOrFullChecksum> {
|
|
217
|
-
const batchLimit = 50_000;
|
|
218
|
-
|
|
219
|
-
let lowerBound = 0n;
|
|
220
|
-
const bucket = request.bucket;
|
|
221
|
-
|
|
222
|
-
let runningChecksum: PartialOrFullChecksum = {
|
|
223
|
-
bucket,
|
|
224
|
-
partialCount: 0,
|
|
225
|
-
partialChecksum: 0
|
|
226
|
-
};
|
|
227
|
-
if (request.start == null) {
|
|
228
|
-
runningChecksum = {
|
|
229
|
-
bucket,
|
|
230
|
-
count: 0,
|
|
231
|
-
checksum: 0
|
|
232
|
-
};
|
|
233
|
-
}
|
|
234
|
-
|
|
235
|
-
while (true) {
|
|
236
|
-
const filter = {
|
|
237
|
-
_id: {
|
|
238
|
-
$gt: {
|
|
239
|
-
g: this.group_id,
|
|
240
|
-
b: bucket,
|
|
241
|
-
o: lowerBound
|
|
242
|
-
},
|
|
243
|
-
$lte: {
|
|
244
|
-
g: this.group_id,
|
|
245
|
-
b: bucket,
|
|
246
|
-
o: request.end
|
|
247
|
-
}
|
|
248
|
-
}
|
|
249
|
-
};
|
|
250
|
-
const docs = await this.db.bucket_data
|
|
251
|
-
.aggregate(
|
|
252
|
-
[
|
|
253
|
-
{
|
|
254
|
-
$match: filter
|
|
255
|
-
},
|
|
256
|
-
// sort and limit _before_ grouping
|
|
257
|
-
{ $sort: { _id: 1 } },
|
|
258
|
-
{ $limit: batchLimit },
|
|
259
|
-
CHECKSUM_QUERY_GROUP_STAGE
|
|
260
|
-
],
|
|
261
|
-
{ session: undefined, readConcern: 'snapshot', maxTimeMS: lib_mongo.MONGO_CHECKSUM_TIMEOUT_MS }
|
|
262
|
-
)
|
|
263
|
-
.toArray();
|
|
264
|
-
const doc = docs[0];
|
|
265
|
-
if (doc == null) {
|
|
266
|
-
return runningChecksum;
|
|
267
|
-
}
|
|
268
|
-
const partial = checksumFromAggregate(doc);
|
|
269
|
-
runningChecksum = addPartialChecksums(bucket, runningChecksum, partial);
|
|
270
|
-
const isFinal = doc.count != batchLimit;
|
|
271
|
-
if (isFinal) {
|
|
272
|
-
break;
|
|
273
|
-
} else {
|
|
274
|
-
lowerBound = doc.last_op;
|
|
275
|
-
}
|
|
276
|
-
}
|
|
277
|
-
return runningChecksum;
|
|
278
|
-
}
|
|
279
318
|
}
|
|
280
319
|
|
|
281
|
-
const CHECKSUM_QUERY_GROUP_STAGE = {
|
|
282
|
-
$group: {
|
|
283
|
-
_id: '$_id.b',
|
|
284
|
-
// Historically, checksum may be stored as 'int' or 'double'.
|
|
285
|
-
// More recently, this should be a 'long'.
|
|
286
|
-
// $toLong ensures that we always sum it as a long, avoiding inaccuracies in the calculations.
|
|
287
|
-
checksum_total: { $sum: { $toLong: '$checksum' } },
|
|
288
|
-
count: { $sum: 1 },
|
|
289
|
-
has_clear_op: {
|
|
290
|
-
$max: {
|
|
291
|
-
$cond: [{ $eq: ['$op', 'CLEAR'] }, 1, 0]
|
|
292
|
-
}
|
|
293
|
-
},
|
|
294
|
-
last_op: { $max: '$_id.o' }
|
|
295
|
-
}
|
|
296
|
-
};
|
|
297
|
-
|
|
298
320
|
/**
|
|
299
|
-
* Convert output of
|
|
321
|
+
* Convert output of the $group stage into a checksum.
|
|
300
322
|
*/
|
|
301
323
|
function checksumFromAggregate(doc: bson.Document): PartialOrFullChecksum {
|
|
302
324
|
const partialChecksum = Number(BigInt(doc.checksum_total) & 0xffffffffn) & 0xffffffff;
|
|
@@ -328,15 +328,11 @@ export class MongoCompactor {
|
|
|
328
328
|
count: 0,
|
|
329
329
|
bytes: 0
|
|
330
330
|
}
|
|
331
|
-
},
|
|
332
|
-
$setOnInsert: {
|
|
333
|
-
// Only set this if we're creating the document.
|
|
334
|
-
// In all other cases, the replication process will have a set a more accurate id.
|
|
335
|
-
last_op: this.maxOpId
|
|
336
331
|
}
|
|
337
332
|
},
|
|
338
|
-
// We generally expect this to have been created before
|
|
339
|
-
|
|
333
|
+
// We generally expect this to have been created before.
|
|
334
|
+
// We don't create new ones here, to avoid issues with the unique index on bucket_updates.
|
|
335
|
+
upsert: false
|
|
340
336
|
}
|
|
341
337
|
});
|
|
342
338
|
}
|
|
@@ -483,15 +479,25 @@ export class MongoCompactor {
|
|
|
483
479
|
* Subset of compact, only populating checksums where relevant.
|
|
484
480
|
*/
|
|
485
481
|
async populateChecksums() {
|
|
486
|
-
|
|
482
|
+
// This is updated after each batch
|
|
483
|
+
let lowerBound: BucketStateDocument['_id'] = {
|
|
484
|
+
g: this.group_id,
|
|
485
|
+
b: new mongo.MinKey() as any
|
|
486
|
+
};
|
|
487
|
+
// This is static
|
|
488
|
+
const upperBound: BucketStateDocument['_id'] = {
|
|
489
|
+
g: this.group_id,
|
|
490
|
+
b: new mongo.MaxKey() as any
|
|
491
|
+
};
|
|
487
492
|
while (!this.signal?.aborted) {
|
|
488
493
|
// By filtering buckets, we effectively make this "resumeable".
|
|
489
|
-
|
|
494
|
+
const filter: mongo.Filter<BucketStateDocument> = {
|
|
495
|
+
_id: {
|
|
496
|
+
$gt: lowerBound,
|
|
497
|
+
$lt: upperBound
|
|
498
|
+
},
|
|
490
499
|
compacted_state: { $exists: false }
|
|
491
500
|
};
|
|
492
|
-
if (lastId) {
|
|
493
|
-
filter._id = { $gt: lastId };
|
|
494
|
-
}
|
|
495
501
|
|
|
496
502
|
const bucketsWithoutChecksums = await this.db.bucket_state
|
|
497
503
|
.find(filter, {
|
|
@@ -514,12 +520,12 @@ export class MongoCompactor {
|
|
|
514
520
|
|
|
515
521
|
await this.updateChecksumsBatch(bucketsWithoutChecksums.map((b) => b._id.b));
|
|
516
522
|
|
|
517
|
-
|
|
523
|
+
lowerBound = bucketsWithoutChecksums[bucketsWithoutChecksums.length - 1]._id;
|
|
518
524
|
}
|
|
519
525
|
}
|
|
520
526
|
|
|
521
527
|
private async updateChecksumsBatch(buckets: string[]) {
|
|
522
|
-
const checksums = await this.storage.checksums.
|
|
528
|
+
const checksums = await this.storage.checksums.computePartialChecksumsDirect(
|
|
523
529
|
buckets.map((bucket) => {
|
|
524
530
|
return {
|
|
525
531
|
bucket,
|
|
@@ -550,15 +556,11 @@ export class MongoCompactor {
|
|
|
550
556
|
checksum: BigInt(bucketChecksum.checksum),
|
|
551
557
|
bytes: null
|
|
552
558
|
}
|
|
553
|
-
},
|
|
554
|
-
$setOnInsert: {
|
|
555
|
-
// Only set this if we're creating the document.
|
|
556
|
-
// In all other cases, the replication process will have a set a more accurate id.
|
|
557
|
-
last_op: this.maxOpId
|
|
558
559
|
}
|
|
559
560
|
},
|
|
560
|
-
// We
|
|
561
|
-
|
|
561
|
+
// We don't create new ones here - it gets tricky to get the last_op right with the unique index on:
|
|
562
|
+
// bucket_updates: {'id.g': 1, 'last_op': 1}
|
|
563
|
+
upsert: false
|
|
562
564
|
}
|
|
563
565
|
});
|
|
564
566
|
}
|
|
@@ -31,13 +31,17 @@ import { MongoBucketStorage } from '../MongoBucketStorage.js';
|
|
|
31
31
|
import { PowerSyncMongo } from './db.js';
|
|
32
32
|
import { BucketDataDocument, BucketDataKey, BucketStateDocument, SourceKey, SourceTableDocument } from './models.js';
|
|
33
33
|
import { MongoBucketBatch } from './MongoBucketBatch.js';
|
|
34
|
-
import { MongoChecksums } from './MongoChecksums.js';
|
|
34
|
+
import { MongoChecksumOptions, MongoChecksums } from './MongoChecksums.js';
|
|
35
35
|
import { MongoCompactor } from './MongoCompactor.js';
|
|
36
36
|
import { MongoParameterCompactor } from './MongoParameterCompactor.js';
|
|
37
37
|
import { MongoWriteCheckpointAPI } from './MongoWriteCheckpointAPI.js';
|
|
38
38
|
import { idPrefixFilter, mapOpEntry, readSingleBatch, setSessionSnapshotTime } from '../../utils/util.js';
|
|
39
39
|
|
|
40
40
|
|
|
41
|
+
export interface MongoSyncBucketStorageOptions {
|
|
42
|
+
checksumOptions?: MongoChecksumOptions;
|
|
43
|
+
}
|
|
44
|
+
|
|
41
45
|
export class MongoSyncBucketStorage
|
|
42
46
|
extends BaseObserver<storage.SyncRulesBucketStorageListener>
|
|
43
47
|
implements storage.SyncRulesBucketStorage
|
|
@@ -53,14 +57,15 @@ export class MongoSyncBucketStorage
|
|
|
53
57
|
public readonly group_id: number,
|
|
54
58
|
private readonly sync_rules: storage.PersistedSyncRulesContent,
|
|
55
59
|
public readonly slot_name: string,
|
|
56
|
-
writeCheckpointMode
|
|
60
|
+
writeCheckpointMode?: storage.WriteCheckpointMode,
|
|
61
|
+
options?: MongoSyncBucketStorageOptions
|
|
57
62
|
) {
|
|
58
63
|
super();
|
|
59
64
|
this.db = factory.db;
|
|
60
|
-
this.checksums = new MongoChecksums(this.db, this.group_id);
|
|
65
|
+
this.checksums = new MongoChecksums(this.db, this.group_id, options?.checksumOptions);
|
|
61
66
|
this.writeCheckpointAPI = new MongoWriteCheckpointAPI({
|
|
62
67
|
db: this.db,
|
|
63
|
-
mode: writeCheckpointMode,
|
|
68
|
+
mode: writeCheckpointMode ?? storage.WriteCheckpointMode.MANAGED,
|
|
64
69
|
sync_rules_id: group_id
|
|
65
70
|
});
|
|
66
71
|
}
|
|
@@ -81,7 +81,6 @@ export class PowerSyncMongo {
|
|
|
81
81
|
await this.locks.deleteMany({});
|
|
82
82
|
await this.bucket_state.deleteMany({});
|
|
83
83
|
await this.custom_write_checkpoints.deleteMany({});
|
|
84
|
-
await this.connection_report_events.deleteMany({});
|
|
85
84
|
}
|
|
86
85
|
|
|
87
86
|
/**
|
|
@@ -145,6 +144,20 @@ export class PowerSyncMongo {
|
|
|
145
144
|
}
|
|
146
145
|
await this.db.createCollection('connection_report_events');
|
|
147
146
|
}
|
|
147
|
+
|
|
148
|
+
/**
|
|
149
|
+
* Only use in migrations and tests.
|
|
150
|
+
*/
|
|
151
|
+
async createBucketStateIndex() {
|
|
152
|
+
// TODO: Implement a better mechanism to use migrations in tests
|
|
153
|
+
await this.bucket_state.createIndex(
|
|
154
|
+
{
|
|
155
|
+
'_id.g': 1,
|
|
156
|
+
last_op: 1
|
|
157
|
+
},
|
|
158
|
+
{ name: 'bucket_updates', unique: true }
|
|
159
|
+
);
|
|
160
|
+
}
|
|
148
161
|
}
|
|
149
162
|
|
|
150
163
|
export function createPowerSyncMongo(config: MongoStorageConfig, options?: lib_mongo.MongoConnectionOptions) {
|
|
@@ -98,6 +98,10 @@ export interface BucketStateDocument {
|
|
|
98
98
|
g: number;
|
|
99
99
|
b: string;
|
|
100
100
|
};
|
|
101
|
+
/**
|
|
102
|
+
* Important: There is an unique index on {'_id.g': 1, last_op: 1}.
|
|
103
|
+
* That means the last_op must match an actual op in the bucket, and not the commit checkpoint.
|
|
104
|
+
*/
|
|
101
105
|
last_op: bigint;
|
|
102
106
|
/**
|
|
103
107
|
* If set, this can be treated as "cache" of a checksum at a specific point.
|
package/src/utils/test-utils.ts
CHANGED
|
@@ -3,10 +3,12 @@ import { PowerSyncMongo } from '../storage/implementation/db.js';
|
|
|
3
3
|
import { TestStorageOptions } from '@powersync/service-core';
|
|
4
4
|
import { MongoReportStorage } from '../storage/MongoReportStorage.js';
|
|
5
5
|
import { MongoBucketStorage } from '../storage/MongoBucketStorage.js';
|
|
6
|
+
import { MongoSyncBucketStorageOptions } from '../storage/implementation/MongoSyncBucketStorage.js';
|
|
6
7
|
|
|
7
8
|
export type MongoTestStorageOptions = {
|
|
8
9
|
url: string;
|
|
9
10
|
isCI: boolean;
|
|
11
|
+
internalOptions?: MongoSyncBucketStorageOptions;
|
|
10
12
|
};
|
|
11
13
|
|
|
12
14
|
export function mongoTestStorageFactoryGenerator(factoryOptions: MongoTestStorageOptions) {
|
|
@@ -25,7 +27,7 @@ export function mongoTestStorageFactoryGenerator(factoryOptions: MongoTestStorag
|
|
|
25
27
|
await db.clear();
|
|
26
28
|
}
|
|
27
29
|
|
|
28
|
-
return new MongoBucketStorage(db, { slot_name_prefix: 'test_' });
|
|
30
|
+
return new MongoBucketStorage(db, { slot_name_prefix: 'test_' }, factoryOptions.internalOptions);
|
|
29
31
|
};
|
|
30
32
|
}
|
|
31
33
|
|
|
@@ -1,6 +1,22 @@
|
|
|
1
1
|
// Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html
|
|
2
2
|
|
|
3
|
-
exports[`Mongo Sync Bucket Storage > empty storage metrics 1`] = `
|
|
3
|
+
exports[`Mongo Sync Bucket Storage - Data > empty storage metrics 1`] = `
|
|
4
|
+
{
|
|
5
|
+
"operations_size_bytes": 0,
|
|
6
|
+
"parameters_size_bytes": 0,
|
|
7
|
+
"replication_size_bytes": 0,
|
|
8
|
+
}
|
|
9
|
+
`;
|
|
10
|
+
|
|
11
|
+
exports[`Mongo Sync Bucket Storage - split buckets > empty storage metrics 1`] = `
|
|
12
|
+
{
|
|
13
|
+
"operations_size_bytes": 0,
|
|
14
|
+
"parameters_size_bytes": 0,
|
|
15
|
+
"replication_size_bytes": 0,
|
|
16
|
+
}
|
|
17
|
+
`;
|
|
18
|
+
|
|
19
|
+
exports[`Mongo Sync Bucket Storage - split operations > empty storage metrics 1`] = `
|
|
4
20
|
{
|
|
5
21
|
"operations_size_bytes": 0,
|
|
6
22
|
"parameters_size_bytes": 0,
|
|
@@ -9,9 +9,9 @@ const dates = register.REPORT_TEST_DATES;
|
|
|
9
9
|
const factory = await INITIALIZED_MONGO_REPORT_STORAGE_FACTORY();
|
|
10
10
|
|
|
11
11
|
function removeVolatileFields(
|
|
12
|
-
|
|
12
|
+
connections: event_types.ClientConnection[]
|
|
13
13
|
): Partial<event_types.ClientConnection & { _id: string }>[] {
|
|
14
|
-
return
|
|
14
|
+
return connections.map((sdk: Partial<event_types.ClientConnection & { _id: string }>) => {
|
|
15
15
|
const { _id, disconnected_at, connected_at, jwt_exp, ...rest } = sdk;
|
|
16
16
|
return {
|
|
17
17
|
...rest
|
|
@@ -57,9 +57,9 @@ describe('Connection reporting storage', async () => {
|
|
|
57
57
|
user_agent: userData.user_week.user_agent
|
|
58
58
|
});
|
|
59
59
|
|
|
60
|
-
const
|
|
61
|
-
expect(
|
|
62
|
-
const cleaned = removeVolatileFields(
|
|
60
|
+
const connection = await factory.db.connection_report_events.find({ user_id: userData.user_week.user_id }).toArray();
|
|
61
|
+
expect(connection).toHaveLength(2);
|
|
62
|
+
const cleaned = removeVolatileFields(connection);
|
|
63
63
|
expect(cleaned).toMatchSnapshot();
|
|
64
64
|
});
|
|
65
65
|
|
|
@@ -81,14 +81,14 @@ describe('Connection reporting storage', async () => {
|
|
|
81
81
|
user_agent: userData.user_one.user_agent
|
|
82
82
|
});
|
|
83
83
|
|
|
84
|
-
const
|
|
84
|
+
const connection = await factory.db.connection_report_events
|
|
85
85
|
.find({ user_id: userData.user_one.user_id, client_id: userData.user_one.client_id })
|
|
86
86
|
.toArray();
|
|
87
|
-
expect(
|
|
88
|
-
expect(new Date(
|
|
89
|
-
expect(new Date(
|
|
90
|
-
expect(
|
|
91
|
-
const cleaned = removeVolatileFields(
|
|
87
|
+
expect(connection).toHaveLength(1);
|
|
88
|
+
expect(new Date(connection[0].connected_at)).toEqual(newConnectAt);
|
|
89
|
+
expect(new Date(connection[0].jwt_exp!)).toEqual(jwtExp);
|
|
90
|
+
expect(connection[0].disconnected_at).toBeUndefined();
|
|
91
|
+
const cleaned = removeVolatileFields(connection);
|
|
92
92
|
expect(cleaned).toMatchSnapshot();
|
|
93
93
|
});
|
|
94
94
|
|
|
@@ -111,10 +111,10 @@ describe('Connection reporting storage', async () => {
|
|
|
111
111
|
connected_at: userData.user_three.connected_at
|
|
112
112
|
});
|
|
113
113
|
|
|
114
|
-
const
|
|
115
|
-
expect(
|
|
116
|
-
expect(new Date(
|
|
117
|
-
const cleaned = removeVolatileFields(
|
|
114
|
+
const connection = await factory.db.connection_report_events.find({ user_id: userData.user_three.user_id }).toArray();
|
|
115
|
+
expect(connection).toHaveLength(1);
|
|
116
|
+
expect(new Date(connection[0].disconnected_at!)).toEqual(disconnectAt);
|
|
117
|
+
const cleaned = removeVolatileFields(connection);
|
|
118
118
|
expect(cleaned).toMatchSnapshot();
|
|
119
119
|
});
|
|
120
120
|
|
|
@@ -124,10 +124,10 @@ describe('Connection reporting storage', async () => {
|
|
|
124
124
|
await factory.deleteOldConnectionData({
|
|
125
125
|
date: dates.weekAgo
|
|
126
126
|
});
|
|
127
|
-
const
|
|
127
|
+
const connection = await factory.getClientConnectionReports({
|
|
128
128
|
start: dates.monthAgo,
|
|
129
129
|
end: dates.now
|
|
130
130
|
});
|
|
131
|
-
expect(
|
|
131
|
+
expect(connection).toMatchSnapshot();
|
|
132
132
|
});
|
|
133
133
|
});
|