@powersync/service-module-mongodb-storage 0.12.1 → 0.12.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +9 -0
- package/dist/migrations/db/migrations/1741697235857-bucket-state-index.js +1 -4
- package/dist/migrations/db/migrations/1741697235857-bucket-state-index.js.map +1 -1
- package/dist/storage/MongoBucketStorage.d.ts +3 -2
- package/dist/storage/MongoBucketStorage.js +4 -2
- package/dist/storage/MongoBucketStorage.js.map +1 -1
- package/dist/storage/implementation/MongoChecksums.d.ts +45 -13
- package/dist/storage/implementation/MongoChecksums.js +148 -135
- package/dist/storage/implementation/MongoChecksums.js.map +1 -1
- package/dist/storage/implementation/MongoCompactor.js +23 -21
- package/dist/storage/implementation/MongoCompactor.js.map +1 -1
- package/dist/storage/implementation/MongoSyncBucketStorage.d.ts +5 -2
- package/dist/storage/implementation/MongoSyncBucketStorage.js +3 -3
- package/dist/storage/implementation/MongoSyncBucketStorage.js.map +1 -1
- package/dist/storage/implementation/MongoTestStorageFactoryGenerator.d.ts +2 -0
- package/dist/storage/implementation/MongoTestStorageFactoryGenerator.js +4 -3
- package/dist/storage/implementation/MongoTestStorageFactoryGenerator.js.map +1 -1
- package/dist/storage/implementation/db.d.ts +4 -0
- package/dist/storage/implementation/db.js +10 -0
- package/dist/storage/implementation/db.js.map +1 -1
- package/dist/storage/implementation/models.d.ts +4 -0
- package/package.json +3 -3
- package/src/migrations/db/migrations/1741697235857-bucket-state-index.ts +1 -7
- package/src/storage/MongoBucketStorage.ts +4 -3
- package/src/storage/implementation/MongoChecksums.ts +172 -150
- package/src/storage/implementation/MongoCompactor.ts +23 -22
- package/src/storage/implementation/MongoSyncBucketStorage.ts +9 -4
- package/src/storage/implementation/MongoTestStorageFactoryGenerator.ts +7 -4
- package/src/storage/implementation/db.ts +14 -0
- package/src/storage/implementation/models.ts +4 -0
- package/test/src/__snapshots__/storage.test.ts.snap +17 -1
- package/test/src/storage.test.ts +38 -1
- package/test/src/storage_compacting.test.ts +120 -5
- package/tsconfig.tsbuildinfo +1 -1
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import * as lib_mongo from '@powersync/lib-service-mongodb';
|
|
1
2
|
import {
|
|
2
3
|
addPartialChecksums,
|
|
3
4
|
bson,
|
|
@@ -11,27 +12,51 @@ import {
|
|
|
11
12
|
PartialChecksumMap,
|
|
12
13
|
PartialOrFullChecksum
|
|
13
14
|
} from '@powersync/service-core';
|
|
14
|
-
import * as lib_mongo from '@powersync/lib-service-mongodb';
|
|
15
|
-
import { logger } from '@powersync/lib-services-framework';
|
|
16
15
|
import { PowerSyncMongo } from './db.js';
|
|
17
16
|
|
|
17
|
+
/**
|
|
18
|
+
* Checksum calculation options, primarily for tests.
|
|
19
|
+
*/
|
|
20
|
+
export interface MongoChecksumOptions {
|
|
21
|
+
/**
|
|
22
|
+
* How many buckets to process in a batch when calculating checksums.
|
|
23
|
+
*/
|
|
24
|
+
bucketBatchLimit?: number;
|
|
25
|
+
|
|
26
|
+
/**
|
|
27
|
+
* Limit on the number of documents to calculate a checksum on at a time.
|
|
28
|
+
*/
|
|
29
|
+
operationBatchLimit?: number;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
const DEFAULT_BUCKET_BATCH_LIMIT = 200;
|
|
33
|
+
const DEFAULT_OPERATION_BATCH_LIMIT = 50_000;
|
|
34
|
+
|
|
18
35
|
/**
|
|
19
36
|
* Checksum query implementation.
|
|
37
|
+
*
|
|
38
|
+
* General implementation flow is:
|
|
39
|
+
* 1. getChecksums() -> check cache for (partial) matches. If not found or partial match, query the remainder using computePartialChecksums().
|
|
40
|
+
* 2. computePartialChecksums() -> query bucket_state for partial matches. Query the remainder using computePartialChecksumsDirect().
|
|
41
|
+
* 3. computePartialChecksumsDirect() -> split into batches of 200 buckets at a time -> computePartialChecksumsInternal()
|
|
42
|
+
* 4. computePartialChecksumsInternal() -> aggregate over 50_000 operations in bucket_data at a time
|
|
20
43
|
*/
|
|
21
44
|
export class MongoChecksums {
|
|
22
45
|
private cache = new ChecksumCache({
|
|
23
46
|
fetchChecksums: (batch) => {
|
|
24
|
-
return this.
|
|
47
|
+
return this.computePartialChecksums(batch);
|
|
25
48
|
}
|
|
26
49
|
});
|
|
27
50
|
|
|
28
51
|
constructor(
|
|
29
52
|
private db: PowerSyncMongo,
|
|
30
|
-
private group_id: number
|
|
53
|
+
private group_id: number,
|
|
54
|
+
private options?: MongoChecksumOptions
|
|
31
55
|
) {}
|
|
32
56
|
|
|
33
57
|
/**
|
|
34
|
-
* Calculate checksums, utilizing the cache
|
|
58
|
+
* Calculate checksums, utilizing the cache for partial checkums, and querying the remainder from
|
|
59
|
+
* the database (bucket_state + bucket_data).
|
|
35
60
|
*/
|
|
36
61
|
async getChecksums(checkpoint: InternalOpId, buckets: string[]): Promise<ChecksumMap> {
|
|
37
62
|
return this.cache.getChecksumMap(checkpoint, buckets);
|
|
@@ -42,11 +67,15 @@ export class MongoChecksums {
|
|
|
42
67
|
}
|
|
43
68
|
|
|
44
69
|
/**
|
|
45
|
-
* Calculate (partial) checksums from bucket_state and
|
|
70
|
+
* Calculate (partial) checksums from bucket_state (pre-aggregated) and bucket_data (individual operations).
|
|
71
|
+
*
|
|
72
|
+
* Results are not cached here. This method is only called by {@link ChecksumCache.getChecksumMap},
|
|
73
|
+
* which is responsible for caching its result.
|
|
46
74
|
*
|
|
47
|
-
*
|
|
75
|
+
* As long as data is compacted regularly, this should be fast. Large buckets without pre-compacted bucket_state
|
|
76
|
+
* can be slow.
|
|
48
77
|
*/
|
|
49
|
-
private async
|
|
78
|
+
private async computePartialChecksums(batch: FetchPartialBucketChecksum[]): Promise<PartialChecksumMap> {
|
|
50
79
|
if (batch.length == 0) {
|
|
51
80
|
return new Map();
|
|
52
81
|
}
|
|
@@ -100,7 +129,7 @@ export class MongoChecksums {
|
|
|
100
129
|
};
|
|
101
130
|
});
|
|
102
131
|
|
|
103
|
-
const queriedChecksums = await this.
|
|
132
|
+
const queriedChecksums = await this.computePartialChecksumsDirect(mappedRequests);
|
|
104
133
|
|
|
105
134
|
return new Map<string, PartialOrFullChecksum>(
|
|
106
135
|
batch.map((request) => {
|
|
@@ -117,61 +146,150 @@ export class MongoChecksums {
|
|
|
117
146
|
}
|
|
118
147
|
|
|
119
148
|
/**
|
|
120
|
-
* Calculate (partial) checksums from the data collection directly.
|
|
149
|
+
* Calculate (partial) checksums from the data collection directly, bypassing the cache and bucket_state.
|
|
150
|
+
*
|
|
151
|
+
* Can be used directly in cases where the cache should be bypassed, such as from a compact job.
|
|
152
|
+
*
|
|
153
|
+
* Internally, we do calculations in smaller batches of buckets as appropriate.
|
|
154
|
+
*
|
|
155
|
+
* For large buckets, this can be slow, but should not time out as the underlying queries are performed in
|
|
156
|
+
* smaller batches.
|
|
121
157
|
*/
|
|
122
|
-
async
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
158
|
+
public async computePartialChecksumsDirect(batch: FetchPartialBucketChecksum[]): Promise<PartialChecksumMap> {
|
|
159
|
+
// Limit the number of buckets we query for at a time.
|
|
160
|
+
const bucketBatchLimit = this.options?.bucketBatchLimit ?? DEFAULT_BUCKET_BATCH_LIMIT;
|
|
161
|
+
|
|
162
|
+
if (batch.length < bucketBatchLimit) {
|
|
163
|
+
// Single batch - no need for splitting the batch and merging results
|
|
164
|
+
return await this.computePartialChecksumsInternal(batch);
|
|
165
|
+
}
|
|
166
|
+
// Split the batch and merge results
|
|
167
|
+
let results = new Map<string, PartialOrFullChecksum>();
|
|
168
|
+
for (let i = 0; i < batch.length; i += bucketBatchLimit) {
|
|
169
|
+
const bucketBatch = batch.slice(i, i + bucketBatchLimit);
|
|
170
|
+
const batchResults = await this.computePartialChecksumsInternal(bucketBatch);
|
|
171
|
+
for (let r of batchResults.values()) {
|
|
172
|
+
results.set(r.bucket, r);
|
|
130
173
|
}
|
|
131
|
-
throw lib_mongo.mapQueryError(e, 'while reading checksums');
|
|
132
174
|
}
|
|
175
|
+
return results;
|
|
133
176
|
}
|
|
134
177
|
|
|
135
|
-
|
|
136
|
-
|
|
178
|
+
/**
|
|
179
|
+
* Query a batch of checksums.
|
|
180
|
+
*
|
|
181
|
+
* We limit the number of operations that the query aggregates in each sub-batch, to avoid potential query timeouts.
|
|
182
|
+
*
|
|
183
|
+
* `batch` must be limited to DEFAULT_BUCKET_BATCH_LIMIT buckets before calling this.
|
|
184
|
+
*/
|
|
185
|
+
private async computePartialChecksumsInternal(batch: FetchPartialBucketChecksum[]): Promise<PartialChecksumMap> {
|
|
186
|
+
const batchLimit = this.options?.operationBatchLimit ?? DEFAULT_OPERATION_BATCH_LIMIT;
|
|
187
|
+
|
|
188
|
+
// Map requests by bucket. We adjust this as we get partial results.
|
|
189
|
+
let requests = new Map<string, FetchPartialBucketChecksum>();
|
|
137
190
|
for (let request of batch) {
|
|
138
|
-
|
|
139
|
-
_id: {
|
|
140
|
-
$gt: {
|
|
141
|
-
g: this.group_id,
|
|
142
|
-
b: request.bucket,
|
|
143
|
-
o: request.start ?? new bson.MinKey()
|
|
144
|
-
},
|
|
145
|
-
$lte: {
|
|
146
|
-
g: this.group_id,
|
|
147
|
-
b: request.bucket,
|
|
148
|
-
o: request.end
|
|
149
|
-
}
|
|
150
|
-
}
|
|
151
|
-
});
|
|
191
|
+
requests.set(request.bucket, request);
|
|
152
192
|
}
|
|
153
193
|
|
|
154
|
-
const
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
194
|
+
const partialChecksums = new Map<string, PartialOrFullChecksum>();
|
|
195
|
+
|
|
196
|
+
while (requests.size > 0) {
|
|
197
|
+
const filters: any[] = [];
|
|
198
|
+
for (let request of requests.values()) {
|
|
199
|
+
filters.push({
|
|
200
|
+
_id: {
|
|
201
|
+
$gt: {
|
|
202
|
+
g: this.group_id,
|
|
203
|
+
b: request.bucket,
|
|
204
|
+
o: request.start ?? new bson.MinKey()
|
|
205
|
+
},
|
|
206
|
+
$lte: {
|
|
207
|
+
g: this.group_id,
|
|
208
|
+
b: request.bucket,
|
|
209
|
+
o: request.end
|
|
160
210
|
}
|
|
161
|
-
}
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
{ session: undefined, readConcern: 'snapshot', maxTimeMS: lib_mongo.MONGO_CHECKSUM_TIMEOUT_MS }
|
|
165
|
-
)
|
|
166
|
-
// Don't map the error here - we want to keep timeout errors as-is
|
|
167
|
-
.toArray();
|
|
211
|
+
}
|
|
212
|
+
});
|
|
213
|
+
}
|
|
168
214
|
|
|
169
|
-
|
|
170
|
-
|
|
215
|
+
// Aggregate over a max of `batchLimit` operations at a time.
|
|
216
|
+
// Let's say we have 3 buckets (A, B, C), each with 10 operations, and our batch limit is 12.
|
|
217
|
+
// Then we'll do three batches:
|
|
218
|
+
// 1. Query: A[1-end], B[1-end], C[1-end]
|
|
219
|
+
// Returns: A[1-10], B[1-2]
|
|
220
|
+
// 2. Query: B[3-end], C[1-end]
|
|
221
|
+
// Returns: B[3-10], C[1-4]
|
|
222
|
+
// 3. Query: C[5-end]
|
|
223
|
+
// Returns: C[5-10]
|
|
224
|
+
const aggregate = await this.db.bucket_data
|
|
225
|
+
.aggregate(
|
|
226
|
+
[
|
|
227
|
+
{
|
|
228
|
+
$match: {
|
|
229
|
+
$or: filters
|
|
230
|
+
}
|
|
231
|
+
},
|
|
232
|
+
// sort and limit _before_ grouping
|
|
233
|
+
{ $sort: { _id: 1 } },
|
|
234
|
+
{ $limit: batchLimit },
|
|
235
|
+
{
|
|
236
|
+
$group: {
|
|
237
|
+
_id: '$_id.b',
|
|
238
|
+
// Historically, checksum may be stored as 'int' or 'double'.
|
|
239
|
+
// More recently, this should be a 'long'.
|
|
240
|
+
// $toLong ensures that we always sum it as a long, avoiding inaccuracies in the calculations.
|
|
241
|
+
checksum_total: { $sum: { $toLong: '$checksum' } },
|
|
242
|
+
count: { $sum: 1 },
|
|
243
|
+
has_clear_op: {
|
|
244
|
+
$max: {
|
|
245
|
+
$cond: [{ $eq: ['$op', 'CLEAR'] }, 1, 0]
|
|
246
|
+
}
|
|
247
|
+
},
|
|
248
|
+
last_op: { $max: '$_id.o' }
|
|
249
|
+
}
|
|
250
|
+
}
|
|
251
|
+
],
|
|
252
|
+
{ session: undefined, readConcern: 'snapshot', maxTimeMS: lib_mongo.MONGO_CHECKSUM_TIMEOUT_MS }
|
|
253
|
+
)
|
|
254
|
+
.toArray()
|
|
255
|
+
.catch((e) => {
|
|
256
|
+
throw lib_mongo.mapQueryError(e, 'while reading checksums');
|
|
257
|
+
});
|
|
258
|
+
|
|
259
|
+
let batchCount = 0;
|
|
260
|
+
let limitReached = false;
|
|
261
|
+
for (let doc of aggregate) {
|
|
171
262
|
const bucket = doc._id;
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
263
|
+
const checksum = checksumFromAggregate(doc);
|
|
264
|
+
|
|
265
|
+
const existing = partialChecksums.get(bucket);
|
|
266
|
+
if (existing != null) {
|
|
267
|
+
partialChecksums.set(bucket, addPartialChecksums(bucket, existing, checksum));
|
|
268
|
+
} else {
|
|
269
|
+
partialChecksums.set(bucket, checksum);
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
batchCount += doc.count;
|
|
273
|
+
if (batchCount == batchLimit) {
|
|
274
|
+
// Limit reached. Request more in the next batch.
|
|
275
|
+
// Note that this only affects the _last_ bucket in a batch.
|
|
276
|
+
limitReached = true;
|
|
277
|
+
const req = requests.get(bucket);
|
|
278
|
+
requests.set(bucket, {
|
|
279
|
+
bucket,
|
|
280
|
+
start: doc.last_op,
|
|
281
|
+
end: req!.end
|
|
282
|
+
});
|
|
283
|
+
} else {
|
|
284
|
+
// All done for this bucket
|
|
285
|
+
requests.delete(bucket);
|
|
286
|
+
}
|
|
287
|
+
batchCount++;
|
|
288
|
+
}
|
|
289
|
+
if (!limitReached) {
|
|
290
|
+
break;
|
|
291
|
+
}
|
|
292
|
+
}
|
|
175
293
|
|
|
176
294
|
return new Map<string, PartialOrFullChecksum>(
|
|
177
295
|
batch.map((request) => {
|
|
@@ -197,106 +315,10 @@ export class MongoChecksums {
|
|
|
197
315
|
})
|
|
198
316
|
);
|
|
199
317
|
}
|
|
200
|
-
|
|
201
|
-
/**
|
|
202
|
-
* Checksums for large buckets can run over the query timeout.
|
|
203
|
-
* To avoid this, we query in batches.
|
|
204
|
-
* This version can handle larger amounts of data, but is slower, especially for many buckets.
|
|
205
|
-
*/
|
|
206
|
-
async queryPartialChecksumsFallback(batch: FetchPartialBucketChecksum[]): Promise<PartialChecksumMap> {
|
|
207
|
-
const partialChecksums = new Map<string, PartialOrFullChecksum>();
|
|
208
|
-
for (let request of batch) {
|
|
209
|
-
const checksum = await this.slowChecksum(request);
|
|
210
|
-
partialChecksums.set(request.bucket, checksum);
|
|
211
|
-
}
|
|
212
|
-
|
|
213
|
-
return partialChecksums;
|
|
214
|
-
}
|
|
215
|
-
|
|
216
|
-
private async slowChecksum(request: FetchPartialBucketChecksum): Promise<PartialOrFullChecksum> {
|
|
217
|
-
const batchLimit = 50_000;
|
|
218
|
-
|
|
219
|
-
let lowerBound = 0n;
|
|
220
|
-
const bucket = request.bucket;
|
|
221
|
-
|
|
222
|
-
let runningChecksum: PartialOrFullChecksum = {
|
|
223
|
-
bucket,
|
|
224
|
-
partialCount: 0,
|
|
225
|
-
partialChecksum: 0
|
|
226
|
-
};
|
|
227
|
-
if (request.start == null) {
|
|
228
|
-
runningChecksum = {
|
|
229
|
-
bucket,
|
|
230
|
-
count: 0,
|
|
231
|
-
checksum: 0
|
|
232
|
-
};
|
|
233
|
-
}
|
|
234
|
-
|
|
235
|
-
while (true) {
|
|
236
|
-
const filter = {
|
|
237
|
-
_id: {
|
|
238
|
-
$gt: {
|
|
239
|
-
g: this.group_id,
|
|
240
|
-
b: bucket,
|
|
241
|
-
o: lowerBound
|
|
242
|
-
},
|
|
243
|
-
$lte: {
|
|
244
|
-
g: this.group_id,
|
|
245
|
-
b: bucket,
|
|
246
|
-
o: request.end
|
|
247
|
-
}
|
|
248
|
-
}
|
|
249
|
-
};
|
|
250
|
-
const docs = await this.db.bucket_data
|
|
251
|
-
.aggregate(
|
|
252
|
-
[
|
|
253
|
-
{
|
|
254
|
-
$match: filter
|
|
255
|
-
},
|
|
256
|
-
// sort and limit _before_ grouping
|
|
257
|
-
{ $sort: { _id: 1 } },
|
|
258
|
-
{ $limit: batchLimit },
|
|
259
|
-
CHECKSUM_QUERY_GROUP_STAGE
|
|
260
|
-
],
|
|
261
|
-
{ session: undefined, readConcern: 'snapshot', maxTimeMS: lib_mongo.MONGO_CHECKSUM_TIMEOUT_MS }
|
|
262
|
-
)
|
|
263
|
-
.toArray();
|
|
264
|
-
const doc = docs[0];
|
|
265
|
-
if (doc == null) {
|
|
266
|
-
return runningChecksum;
|
|
267
|
-
}
|
|
268
|
-
const partial = checksumFromAggregate(doc);
|
|
269
|
-
runningChecksum = addPartialChecksums(bucket, runningChecksum, partial);
|
|
270
|
-
const isFinal = doc.count != batchLimit;
|
|
271
|
-
if (isFinal) {
|
|
272
|
-
break;
|
|
273
|
-
} else {
|
|
274
|
-
lowerBound = doc.last_op;
|
|
275
|
-
}
|
|
276
|
-
}
|
|
277
|
-
return runningChecksum;
|
|
278
|
-
}
|
|
279
318
|
}
|
|
280
319
|
|
|
281
|
-
const CHECKSUM_QUERY_GROUP_STAGE = {
|
|
282
|
-
$group: {
|
|
283
|
-
_id: '$_id.b',
|
|
284
|
-
// Historically, checksum may be stored as 'int' or 'double'.
|
|
285
|
-
// More recently, this should be a 'long'.
|
|
286
|
-
// $toLong ensures that we always sum it as a long, avoiding inaccuracies in the calculations.
|
|
287
|
-
checksum_total: { $sum: { $toLong: '$checksum' } },
|
|
288
|
-
count: { $sum: 1 },
|
|
289
|
-
has_clear_op: {
|
|
290
|
-
$max: {
|
|
291
|
-
$cond: [{ $eq: ['$op', 'CLEAR'] }, 1, 0]
|
|
292
|
-
}
|
|
293
|
-
},
|
|
294
|
-
last_op: { $max: '$_id.o' }
|
|
295
|
-
}
|
|
296
|
-
};
|
|
297
|
-
|
|
298
320
|
/**
|
|
299
|
-
* Convert output of
|
|
321
|
+
* Convert output of the $group stage into a checksum.
|
|
300
322
|
*/
|
|
301
323
|
function checksumFromAggregate(doc: bson.Document): PartialOrFullChecksum {
|
|
302
324
|
const partialChecksum = Number(BigInt(doc.checksum_total) & 0xffffffffn) & 0xffffffff;
|
|
@@ -6,7 +6,6 @@ import { PowerSyncMongo } from './db.js';
|
|
|
6
6
|
import { BucketDataDocument, BucketDataKey, BucketStateDocument } from './models.js';
|
|
7
7
|
import { MongoSyncBucketStorage } from './MongoSyncBucketStorage.js';
|
|
8
8
|
import { cacheKey } from './OperationBatch.js';
|
|
9
|
-
import { readSingleBatch } from './util.js';
|
|
10
9
|
|
|
11
10
|
interface CurrentBucketState {
|
|
12
11
|
/** Bucket name */
|
|
@@ -329,15 +328,11 @@ export class MongoCompactor {
|
|
|
329
328
|
count: 0,
|
|
330
329
|
bytes: 0
|
|
331
330
|
}
|
|
332
|
-
},
|
|
333
|
-
$setOnInsert: {
|
|
334
|
-
// Only set this if we're creating the document.
|
|
335
|
-
// In all other cases, the replication process will have a set a more accurate id.
|
|
336
|
-
last_op: this.maxOpId
|
|
337
331
|
}
|
|
338
332
|
},
|
|
339
|
-
// We generally expect this to have been created before
|
|
340
|
-
|
|
333
|
+
// We generally expect this to have been created before.
|
|
334
|
+
// We don't create new ones here, to avoid issues with the unique index on bucket_updates.
|
|
335
|
+
upsert: false
|
|
341
336
|
}
|
|
342
337
|
});
|
|
343
338
|
}
|
|
@@ -484,15 +479,25 @@ export class MongoCompactor {
|
|
|
484
479
|
* Subset of compact, only populating checksums where relevant.
|
|
485
480
|
*/
|
|
486
481
|
async populateChecksums() {
|
|
487
|
-
|
|
482
|
+
// This is updated after each batch
|
|
483
|
+
let lowerBound: BucketStateDocument['_id'] = {
|
|
484
|
+
g: this.group_id,
|
|
485
|
+
b: new mongo.MinKey() as any
|
|
486
|
+
};
|
|
487
|
+
// This is static
|
|
488
|
+
const upperBound: BucketStateDocument['_id'] = {
|
|
489
|
+
g: this.group_id,
|
|
490
|
+
b: new mongo.MaxKey() as any
|
|
491
|
+
};
|
|
488
492
|
while (!this.signal?.aborted) {
|
|
489
493
|
// By filtering buckets, we effectively make this "resumeable".
|
|
490
|
-
|
|
494
|
+
const filter: mongo.Filter<BucketStateDocument> = {
|
|
495
|
+
_id: {
|
|
496
|
+
$gt: lowerBound,
|
|
497
|
+
$lt: upperBound
|
|
498
|
+
},
|
|
491
499
|
compacted_state: { $exists: false }
|
|
492
500
|
};
|
|
493
|
-
if (lastId) {
|
|
494
|
-
filter._id = { $gt: lastId };
|
|
495
|
-
}
|
|
496
501
|
|
|
497
502
|
const bucketsWithoutChecksums = await this.db.bucket_state
|
|
498
503
|
.find(filter, {
|
|
@@ -515,12 +520,12 @@ export class MongoCompactor {
|
|
|
515
520
|
|
|
516
521
|
await this.updateChecksumsBatch(bucketsWithoutChecksums.map((b) => b._id.b));
|
|
517
522
|
|
|
518
|
-
|
|
523
|
+
lowerBound = bucketsWithoutChecksums[bucketsWithoutChecksums.length - 1]._id;
|
|
519
524
|
}
|
|
520
525
|
}
|
|
521
526
|
|
|
522
527
|
private async updateChecksumsBatch(buckets: string[]) {
|
|
523
|
-
const checksums = await this.storage.checksums.
|
|
528
|
+
const checksums = await this.storage.checksums.computePartialChecksumsDirect(
|
|
524
529
|
buckets.map((bucket) => {
|
|
525
530
|
return {
|
|
526
531
|
bucket,
|
|
@@ -551,15 +556,11 @@ export class MongoCompactor {
|
|
|
551
556
|
checksum: BigInt(bucketChecksum.checksum),
|
|
552
557
|
bytes: null
|
|
553
558
|
}
|
|
554
|
-
},
|
|
555
|
-
$setOnInsert: {
|
|
556
|
-
// Only set this if we're creating the document.
|
|
557
|
-
// In all other cases, the replication process will have a set a more accurate id.
|
|
558
|
-
last_op: this.maxOpId
|
|
559
559
|
}
|
|
560
560
|
},
|
|
561
|
-
// We
|
|
562
|
-
|
|
561
|
+
// We don't create new ones here - it gets tricky to get the last_op right with the unique index on:
|
|
562
|
+
// bucket_updates: {'id.g': 1, 'last_op': 1}
|
|
563
|
+
upsert: false
|
|
563
564
|
}
|
|
564
565
|
});
|
|
565
566
|
}
|
|
@@ -31,12 +31,16 @@ import { MongoBucketStorage } from '../MongoBucketStorage.js';
|
|
|
31
31
|
import { PowerSyncMongo } from './db.js';
|
|
32
32
|
import { BucketDataDocument, BucketDataKey, BucketStateDocument, SourceKey, SourceTableDocument } from './models.js';
|
|
33
33
|
import { MongoBucketBatch } from './MongoBucketBatch.js';
|
|
34
|
-
import { MongoChecksums } from './MongoChecksums.js';
|
|
34
|
+
import { MongoChecksumOptions, MongoChecksums } from './MongoChecksums.js';
|
|
35
35
|
import { MongoCompactor } from './MongoCompactor.js';
|
|
36
36
|
import { MongoParameterCompactor } from './MongoParameterCompactor.js';
|
|
37
37
|
import { MongoWriteCheckpointAPI } from './MongoWriteCheckpointAPI.js';
|
|
38
38
|
import { idPrefixFilter, mapOpEntry, readSingleBatch, setSessionSnapshotTime } from './util.js';
|
|
39
39
|
|
|
40
|
+
export interface MongoSyncBucketStorageOptions {
|
|
41
|
+
checksumOptions?: MongoChecksumOptions;
|
|
42
|
+
}
|
|
43
|
+
|
|
40
44
|
export class MongoSyncBucketStorage
|
|
41
45
|
extends BaseObserver<storage.SyncRulesBucketStorageListener>
|
|
42
46
|
implements storage.SyncRulesBucketStorage
|
|
@@ -52,14 +56,15 @@ export class MongoSyncBucketStorage
|
|
|
52
56
|
public readonly group_id: number,
|
|
53
57
|
private readonly sync_rules: storage.PersistedSyncRulesContent,
|
|
54
58
|
public readonly slot_name: string,
|
|
55
|
-
writeCheckpointMode
|
|
59
|
+
writeCheckpointMode?: storage.WriteCheckpointMode,
|
|
60
|
+
options?: MongoSyncBucketStorageOptions
|
|
56
61
|
) {
|
|
57
62
|
super();
|
|
58
63
|
this.db = factory.db;
|
|
59
|
-
this.checksums = new MongoChecksums(this.db, this.group_id);
|
|
64
|
+
this.checksums = new MongoChecksums(this.db, this.group_id, options?.checksumOptions);
|
|
60
65
|
this.writeCheckpointAPI = new MongoWriteCheckpointAPI({
|
|
61
66
|
db: this.db,
|
|
62
|
-
mode: writeCheckpointMode,
|
|
67
|
+
mode: writeCheckpointMode ?? storage.WriteCheckpointMode.MANAGED,
|
|
63
68
|
sync_rules_id: group_id
|
|
64
69
|
});
|
|
65
70
|
}
|
|
@@ -1,10 +1,12 @@
|
|
|
1
1
|
import { TestStorageOptions } from '@powersync/service-core';
|
|
2
2
|
import { MongoBucketStorage } from '../MongoBucketStorage.js';
|
|
3
3
|
import { connectMongoForTests } from './util.js';
|
|
4
|
+
import { MongoSyncBucketStorageOptions } from './MongoSyncBucketStorage.js';
|
|
4
5
|
|
|
5
6
|
export type MongoTestStorageOptions = {
|
|
6
7
|
url: string;
|
|
7
8
|
isCI: boolean;
|
|
9
|
+
internalOptions?: MongoSyncBucketStorageOptions;
|
|
8
10
|
};
|
|
9
11
|
|
|
10
12
|
export const MongoTestStorageFactoryGenerator = (factoryOptions: MongoTestStorageOptions) => {
|
|
@@ -16,13 +18,14 @@ export const MongoTestStorageFactoryGenerator = (factoryOptions: MongoTestStorag
|
|
|
16
18
|
await db.db.createCollection('bucket_parameters');
|
|
17
19
|
}
|
|
18
20
|
|
|
19
|
-
// Full migrations are not currently run for tests, so we manually create this
|
|
20
|
-
await db.createCheckpointEventsCollection();
|
|
21
|
-
|
|
22
21
|
if (!options?.doNotClear) {
|
|
23
22
|
await db.clear();
|
|
24
23
|
}
|
|
25
24
|
|
|
26
|
-
|
|
25
|
+
// Full migrations are not currently run for tests, so we manually create the important ones
|
|
26
|
+
await db.createCheckpointEventsCollection();
|
|
27
|
+
await db.createBucketStateIndex();
|
|
28
|
+
|
|
29
|
+
return new MongoBucketStorage(db, { slot_name_prefix: 'test_' }, factoryOptions.internalOptions);
|
|
27
30
|
};
|
|
28
31
|
};
|
|
@@ -127,6 +127,20 @@ export class PowerSyncMongo {
|
|
|
127
127
|
max: 50 // max number of documents
|
|
128
128
|
});
|
|
129
129
|
}
|
|
130
|
+
|
|
131
|
+
/**
|
|
132
|
+
* Only use in migrations and tests.
|
|
133
|
+
*/
|
|
134
|
+
async createBucketStateIndex() {
|
|
135
|
+
// TODO: Implement a better mechanism to use migrations in tests
|
|
136
|
+
await this.bucket_state.createIndex(
|
|
137
|
+
{
|
|
138
|
+
'_id.g': 1,
|
|
139
|
+
last_op: 1
|
|
140
|
+
},
|
|
141
|
+
{ name: 'bucket_updates', unique: true }
|
|
142
|
+
);
|
|
143
|
+
}
|
|
130
144
|
}
|
|
131
145
|
|
|
132
146
|
export function createPowerSyncMongo(config: MongoStorageConfig, options?: lib_mongo.MongoConnectionOptions) {
|
|
@@ -97,6 +97,10 @@ export interface BucketStateDocument {
|
|
|
97
97
|
g: number;
|
|
98
98
|
b: string;
|
|
99
99
|
};
|
|
100
|
+
/**
|
|
101
|
+
* Important: There is an unique index on {'_id.g': 1, last_op: 1}.
|
|
102
|
+
* That means the last_op must match an actual op in the bucket, and not the commit checkpoint.
|
|
103
|
+
*/
|
|
100
104
|
last_op: bigint;
|
|
101
105
|
/**
|
|
102
106
|
* If set, this can be treated as "cache" of a checksum at a specific point.
|
|
@@ -1,6 +1,22 @@
|
|
|
1
1
|
// Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html
|
|
2
2
|
|
|
3
|
-
exports[`Mongo Sync Bucket Storage > empty storage metrics 1`] = `
|
|
3
|
+
exports[`Mongo Sync Bucket Storage - Data > empty storage metrics 1`] = `
|
|
4
|
+
{
|
|
5
|
+
"operations_size_bytes": 0,
|
|
6
|
+
"parameters_size_bytes": 0,
|
|
7
|
+
"replication_size_bytes": 0,
|
|
8
|
+
}
|
|
9
|
+
`;
|
|
10
|
+
|
|
11
|
+
exports[`Mongo Sync Bucket Storage - split buckets > empty storage metrics 1`] = `
|
|
12
|
+
{
|
|
13
|
+
"operations_size_bytes": 0,
|
|
14
|
+
"parameters_size_bytes": 0,
|
|
15
|
+
"replication_size_bytes": 0,
|
|
16
|
+
}
|
|
17
|
+
`;
|
|
18
|
+
|
|
19
|
+
exports[`Mongo Sync Bucket Storage - split operations > empty storage metrics 1`] = `
|
|
4
20
|
{
|
|
5
21
|
"operations_size_bytes": 0,
|
|
6
22
|
"parameters_size_bytes": 0,
|
package/test/src/storage.test.ts
CHANGED
|
@@ -1,7 +1,44 @@
|
|
|
1
1
|
import { register } from '@powersync/service-core-tests';
|
|
2
2
|
import { describe } from 'vitest';
|
|
3
3
|
import { INITIALIZED_MONGO_STORAGE_FACTORY } from './util.js';
|
|
4
|
+
import { env } from './env.js';
|
|
5
|
+
import { MongoTestStorageFactoryGenerator } from '@module/storage/implementation/MongoTestStorageFactoryGenerator.js';
|
|
4
6
|
|
|
5
|
-
describe('Mongo Sync Bucket Storage', () =>
|
|
7
|
+
describe('Mongo Sync Bucket Storage - Parameters', () =>
|
|
8
|
+
register.registerDataStorageParameterTests(INITIALIZED_MONGO_STORAGE_FACTORY));
|
|
9
|
+
|
|
10
|
+
describe('Mongo Sync Bucket Storage - Data', () =>
|
|
11
|
+
register.registerDataStorageDataTests(INITIALIZED_MONGO_STORAGE_FACTORY));
|
|
12
|
+
|
|
13
|
+
describe('Mongo Sync Bucket Storage - Checkpoints', () =>
|
|
14
|
+
register.registerDataStorageCheckpointTests(INITIALIZED_MONGO_STORAGE_FACTORY));
|
|
6
15
|
|
|
7
16
|
describe('Sync Bucket Validation', register.registerBucketValidationTests);
|
|
17
|
+
|
|
18
|
+
describe('Mongo Sync Bucket Storage - split operations', () =>
|
|
19
|
+
register.registerDataStorageDataTests(
|
|
20
|
+
MongoTestStorageFactoryGenerator({
|
|
21
|
+
url: env.MONGO_TEST_URL,
|
|
22
|
+
isCI: env.CI,
|
|
23
|
+
internalOptions: {
|
|
24
|
+
checksumOptions: {
|
|
25
|
+
bucketBatchLimit: 100,
|
|
26
|
+
operationBatchLimit: 1
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
})
|
|
30
|
+
));
|
|
31
|
+
|
|
32
|
+
describe('Mongo Sync Bucket Storage - split buckets', () =>
|
|
33
|
+
register.registerDataStorageDataTests(
|
|
34
|
+
MongoTestStorageFactoryGenerator({
|
|
35
|
+
url: env.MONGO_TEST_URL,
|
|
36
|
+
isCI: env.CI,
|
|
37
|
+
internalOptions: {
|
|
38
|
+
checksumOptions: {
|
|
39
|
+
bucketBatchLimit: 1,
|
|
40
|
+
operationBatchLimit: 100
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
})
|
|
44
|
+
));
|