@powersync/service-core 0.0.0-dev-20240718134716 → 0.0.0-dev-20240725112650
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +11 -6
- package/dist/entry/cli-entry.js +2 -1
- package/dist/entry/cli-entry.js.map +1 -1
- package/dist/entry/commands/compact-action.d.ts +2 -0
- package/dist/entry/commands/compact-action.js +48 -0
- package/dist/entry/commands/compact-action.js.map +1 -0
- package/dist/entry/entry-index.d.ts +1 -0
- package/dist/entry/entry-index.js +1 -0
- package/dist/entry/entry-index.js.map +1 -1
- package/dist/metrics/Metrics.d.ts +4 -3
- package/dist/metrics/Metrics.js +51 -0
- package/dist/metrics/Metrics.js.map +1 -1
- package/dist/replication/WalStream.js +6 -8
- package/dist/replication/WalStream.js.map +1 -1
- package/dist/routes/configure-fastify.d.ts +883 -0
- package/dist/routes/configure-fastify.js +58 -0
- package/dist/routes/configure-fastify.js.map +1 -0
- package/dist/routes/configure-rsocket.d.ts +13 -0
- package/dist/routes/configure-rsocket.js +46 -0
- package/dist/routes/configure-rsocket.js.map +1 -0
- package/dist/routes/endpoints/socket-route.js +6 -14
- package/dist/routes/endpoints/socket-route.js.map +1 -1
- package/dist/routes/endpoints/sync-stream.js +4 -5
- package/dist/routes/endpoints/sync-stream.js.map +1 -1
- package/dist/routes/route-register.d.ts +1 -1
- package/dist/routes/route-register.js +1 -1
- package/dist/routes/route-register.js.map +1 -1
- package/dist/routes/router-socket.d.ts +4 -4
- package/dist/routes/router-socket.js.map +1 -1
- package/dist/routes/router.d.ts +1 -0
- package/dist/routes/router.js.map +1 -1
- package/dist/routes/routes-index.d.ts +2 -0
- package/dist/routes/routes-index.js +2 -0
- package/dist/routes/routes-index.js.map +1 -1
- package/dist/storage/BucketStorage.d.ts +31 -1
- package/dist/storage/BucketStorage.js.map +1 -1
- package/dist/storage/mongo/MongoCompactor.d.ts +40 -0
- package/dist/storage/mongo/MongoCompactor.js +292 -0
- package/dist/storage/mongo/MongoCompactor.js.map +1 -0
- package/dist/storage/mongo/MongoSyncBucketStorage.d.ts +3 -2
- package/dist/storage/mongo/MongoSyncBucketStorage.js +19 -13
- package/dist/storage/mongo/MongoSyncBucketStorage.js.map +1 -1
- package/dist/storage/mongo/models.d.ts +5 -4
- package/dist/storage/mongo/models.js.map +1 -1
- package/dist/storage/mongo/util.d.ts +3 -0
- package/dist/storage/mongo/util.js +22 -0
- package/dist/storage/mongo/util.js.map +1 -1
- package/dist/sync/RequestTracker.js +2 -3
- package/dist/sync/RequestTracker.js.map +1 -1
- package/dist/sync/sync-index.d.ts +1 -0
- package/dist/sync/sync-index.js +1 -0
- package/dist/sync/sync-index.js.map +1 -1
- package/dist/sync/sync.js +20 -7
- package/dist/sync/sync.js.map +1 -1
- package/dist/sync/util.js.map +1 -1
- package/dist/util/config/collectors/config-collector.d.ts +12 -0
- package/dist/util/config/collectors/config-collector.js +43 -0
- package/dist/util/config/collectors/config-collector.js.map +1 -1
- package/dist/util/config/compound-config-collector.d.ts +3 -29
- package/dist/util/config/compound-config-collector.js +22 -69
- package/dist/util/config/compound-config-collector.js.map +1 -1
- package/package.json +6 -4
- package/src/entry/cli-entry.ts +2 -1
- package/src/entry/commands/compact-action.ts +54 -0
- package/src/entry/entry-index.ts +1 -0
- package/src/metrics/Metrics.ts +67 -2
- package/src/replication/WalStream.ts +6 -10
- package/src/routes/configure-fastify.ts +102 -0
- package/src/routes/configure-rsocket.ts +59 -0
- package/src/routes/endpoints/socket-route.ts +6 -15
- package/src/routes/endpoints/sync-stream.ts +4 -5
- package/src/routes/route-register.ts +2 -2
- package/src/routes/router-socket.ts +5 -5
- package/src/routes/router.ts +2 -0
- package/src/routes/routes-index.ts +2 -0
- package/src/storage/BucketStorage.ts +36 -1
- package/src/storage/mongo/MongoCompactor.ts +371 -0
- package/src/storage/mongo/MongoSyncBucketStorage.ts +25 -14
- package/src/storage/mongo/models.ts +5 -4
- package/src/storage/mongo/util.ts +25 -0
- package/src/sync/RequestTracker.ts +3 -3
- package/src/sync/sync-index.ts +1 -0
- package/src/sync/sync.ts +21 -7
- package/src/sync/util.ts +1 -0
- package/src/util/config/collectors/config-collector.ts +48 -0
- package/src/util/config/compound-config-collector.ts +23 -87
- package/test/src/__snapshots__/sync.test.ts.snap +85 -0
- package/test/src/bucket_validation.test.ts +142 -0
- package/test/src/bucket_validation.ts +116 -0
- package/test/src/compacting.test.ts +207 -0
- package/test/src/data_storage.test.ts +19 -60
- package/test/src/slow_tests.test.ts +144 -102
- package/test/src/sync.test.ts +169 -29
- package/test/src/util.ts +71 -13
- package/test/src/wal_stream.test.ts +21 -16
- package/test/src/wal_stream_utils.ts +13 -4
- package/tsconfig.tsbuildinfo +1 -1
|
@@ -0,0 +1,371 @@
|
|
|
1
|
+
import { logger } from '@powersync/lib-services-framework';
|
|
2
|
+
import { AnyBulkWriteOperation, MaxKey, MinKey } from 'mongodb';
|
|
3
|
+
import { addChecksums } from '../../util/utils.js';
|
|
4
|
+
import { PowerSyncMongo } from './db.js';
|
|
5
|
+
import { BucketDataDocument, BucketDataKey } from './models.js';
|
|
6
|
+
import { CompactOptions } from '../BucketStorage.js';
|
|
7
|
+
|
|
8
|
+
interface CurrentBucketState {
|
|
9
|
+
/** Bucket name */
|
|
10
|
+
bucket: string;
|
|
11
|
+
/**
|
|
12
|
+
* Rows seen in the bucket, with the last op_id of each.
|
|
13
|
+
*/
|
|
14
|
+
seen: Map<string, bigint>;
|
|
15
|
+
/**
|
|
16
|
+
* Estimated memory usage of the seen Map.
|
|
17
|
+
*/
|
|
18
|
+
trackingSize: number;
|
|
19
|
+
|
|
20
|
+
/**
|
|
21
|
+
* Last (lowest) seen op_id that is not a PUT.
|
|
22
|
+
*/
|
|
23
|
+
lastNotPut: bigint | null;
|
|
24
|
+
|
|
25
|
+
/**
|
|
26
|
+
* Number of REMOVE/MOVE operations seen since lastNotPut.
|
|
27
|
+
*/
|
|
28
|
+
opsSincePut: number;
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
/**
|
|
32
|
+
* Additional options, primarily for testing.
|
|
33
|
+
*/
|
|
34
|
+
export interface MongoCompactOptions extends CompactOptions {
|
|
35
|
+
/** Minimum of 2 */
|
|
36
|
+
clearBatchLimit?: number;
|
|
37
|
+
/** Minimum of 1 */
|
|
38
|
+
moveBatchLimit?: number;
|
|
39
|
+
/** Minimum of 1 */
|
|
40
|
+
moveBatchQueryLimit?: number;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
const DEFAULT_CLEAR_BATCH_LIMIT = 5000;
|
|
44
|
+
const DEFAULT_MOVE_BATCH_LIMIT = 2000;
|
|
45
|
+
const DEFAULT_MOVE_BATCH_QUERY_LIMIT = 10_000;
|
|
46
|
+
|
|
47
|
+
/** This default is primarily for tests. */
|
|
48
|
+
const DEFAULT_MEMORY_LIMIT_MB = 64;
|
|
49
|
+
|
|
50
|
+
export class MongoCompactor {
|
|
51
|
+
private updates: AnyBulkWriteOperation<BucketDataDocument>[] = [];
|
|
52
|
+
|
|
53
|
+
private idLimitBytes: number;
|
|
54
|
+
private moveBatchLimit: number;
|
|
55
|
+
private moveBatchQueryLimit: number;
|
|
56
|
+
private clearBatchLimit: number;
|
|
57
|
+
private maxOpId: bigint | undefined;
|
|
58
|
+
private buckets: string[] | undefined;
|
|
59
|
+
|
|
60
|
+
constructor(private db: PowerSyncMongo, private group_id: number, options?: MongoCompactOptions) {
|
|
61
|
+
this.idLimitBytes = (options?.memoryLimitMB ?? DEFAULT_MEMORY_LIMIT_MB) * 1024 * 1024;
|
|
62
|
+
this.moveBatchLimit = options?.moveBatchLimit ?? DEFAULT_MOVE_BATCH_LIMIT;
|
|
63
|
+
this.moveBatchQueryLimit = options?.moveBatchQueryLimit ?? DEFAULT_MOVE_BATCH_QUERY_LIMIT;
|
|
64
|
+
this.clearBatchLimit = options?.clearBatchLimit ?? DEFAULT_CLEAR_BATCH_LIMIT;
|
|
65
|
+
this.maxOpId = options?.maxOpId;
|
|
66
|
+
this.buckets = options?.compactBuckets;
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
/**
|
|
70
|
+
* Compact buckets by converting operations into MOVE and/or CLEAR operations.
|
|
71
|
+
*
|
|
72
|
+
* See /docs/compacting-operations.md for details.
|
|
73
|
+
*/
|
|
74
|
+
async compact() {
|
|
75
|
+
if (this.buckets) {
|
|
76
|
+
for (let bucket of this.buckets) {
|
|
77
|
+
// We can make this more efficient later on by iterating
|
|
78
|
+
// through the buckets in a single query.
|
|
79
|
+
// That makes batching more tricky, so we leave for later.
|
|
80
|
+
await this.compactInternal(bucket);
|
|
81
|
+
}
|
|
82
|
+
} else {
|
|
83
|
+
await this.compactInternal(undefined);
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
async compactInternal(bucket: string | undefined) {
|
|
88
|
+
const idLimitBytes = this.idLimitBytes;
|
|
89
|
+
|
|
90
|
+
let currentState: CurrentBucketState | null = null;
|
|
91
|
+
|
|
92
|
+
// Constant lower bound
|
|
93
|
+
const lowerBound: BucketDataKey = {
|
|
94
|
+
g: this.group_id,
|
|
95
|
+
b: bucket ?? (new MinKey() as any),
|
|
96
|
+
o: new MinKey() as any
|
|
97
|
+
};
|
|
98
|
+
|
|
99
|
+
// Upper bound is adjusted for each batch
|
|
100
|
+
let upperBound: BucketDataKey = {
|
|
101
|
+
g: this.group_id,
|
|
102
|
+
b: bucket ?? (new MaxKey() as any),
|
|
103
|
+
o: new MaxKey() as any
|
|
104
|
+
};
|
|
105
|
+
|
|
106
|
+
while (true) {
|
|
107
|
+
// Query one batch at a time, to avoid cursor timeouts
|
|
108
|
+
const batch = await this.db.bucket_data
|
|
109
|
+
.find(
|
|
110
|
+
{
|
|
111
|
+
_id: {
|
|
112
|
+
$gte: lowerBound,
|
|
113
|
+
$lt: upperBound
|
|
114
|
+
}
|
|
115
|
+
},
|
|
116
|
+
{
|
|
117
|
+
projection: {
|
|
118
|
+
_id: 1,
|
|
119
|
+
op: 1,
|
|
120
|
+
table: 1,
|
|
121
|
+
row_id: 1,
|
|
122
|
+
source_table: 1,
|
|
123
|
+
source_key: 1
|
|
124
|
+
},
|
|
125
|
+
limit: this.moveBatchQueryLimit,
|
|
126
|
+
sort: { _id: -1 },
|
|
127
|
+
singleBatch: true
|
|
128
|
+
}
|
|
129
|
+
)
|
|
130
|
+
.toArray();
|
|
131
|
+
|
|
132
|
+
if (batch.length == 0) {
|
|
133
|
+
// We've reached the end
|
|
134
|
+
break;
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
// Set upperBound for the next batch
|
|
138
|
+
upperBound = batch[batch.length - 1]._id;
|
|
139
|
+
|
|
140
|
+
for (let doc of batch) {
|
|
141
|
+
if (currentState == null || doc._id.b != currentState.bucket) {
|
|
142
|
+
if (currentState != null && currentState.lastNotPut != null && currentState.opsSincePut >= 1) {
|
|
143
|
+
// Important to flush before clearBucket()
|
|
144
|
+
await this.flush();
|
|
145
|
+
logger.info(
|
|
146
|
+
`Inserting CLEAR at ${this.group_id}:${currentState.bucket}:${currentState.lastNotPut} to remove ${currentState.opsSincePut} operations`
|
|
147
|
+
);
|
|
148
|
+
|
|
149
|
+
const bucket = currentState.bucket;
|
|
150
|
+
const clearOp = currentState.lastNotPut;
|
|
151
|
+
// Free memory before clearing bucket
|
|
152
|
+
currentState = null;
|
|
153
|
+
await this.clearBucket(bucket, clearOp);
|
|
154
|
+
}
|
|
155
|
+
currentState = {
|
|
156
|
+
bucket: doc._id.b,
|
|
157
|
+
seen: new Map(),
|
|
158
|
+
trackingSize: 0,
|
|
159
|
+
lastNotPut: null,
|
|
160
|
+
opsSincePut: 0
|
|
161
|
+
};
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
if (this.maxOpId != null && doc._id.o > this.maxOpId) {
|
|
165
|
+
continue;
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
let isPersistentPut = doc.op == 'PUT';
|
|
169
|
+
|
|
170
|
+
if (doc.op == 'REMOVE' || doc.op == 'PUT') {
|
|
171
|
+
const key = `${doc.table}/${doc.row_id}/${doc.source_table}/${doc.source_key?.toHexString()}`;
|
|
172
|
+
const targetOp = currentState.seen.get(key);
|
|
173
|
+
if (targetOp) {
|
|
174
|
+
// Will convert to MOVE, so don't count as PUT
|
|
175
|
+
isPersistentPut = false;
|
|
176
|
+
|
|
177
|
+
this.updates.push({
|
|
178
|
+
updateOne: {
|
|
179
|
+
filter: {
|
|
180
|
+
_id: doc._id
|
|
181
|
+
},
|
|
182
|
+
update: {
|
|
183
|
+
$set: {
|
|
184
|
+
op: 'MOVE',
|
|
185
|
+
target_op: targetOp
|
|
186
|
+
},
|
|
187
|
+
$unset: {
|
|
188
|
+
source_table: 1,
|
|
189
|
+
source_key: 1,
|
|
190
|
+
table: 1,
|
|
191
|
+
row_id: 1,
|
|
192
|
+
data: 1
|
|
193
|
+
}
|
|
194
|
+
}
|
|
195
|
+
}
|
|
196
|
+
});
|
|
197
|
+
} else {
|
|
198
|
+
if (currentState.trackingSize >= idLimitBytes) {
|
|
199
|
+
// Reached memory limit.
|
|
200
|
+
// Keep the highest seen values in this case.
|
|
201
|
+
} else {
|
|
202
|
+
// flatstr reduces the memory usage by flattening the string
|
|
203
|
+
currentState.seen.set(flatstr(key), doc._id.o);
|
|
204
|
+
// length + 16 for the string
|
|
205
|
+
// 24 for the bigint
|
|
206
|
+
// 50 for map overhead
|
|
207
|
+
// 50 for additional overhead
|
|
208
|
+
currentState.trackingSize += key.length + 140;
|
|
209
|
+
}
|
|
210
|
+
}
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
if (isPersistentPut) {
|
|
214
|
+
currentState.lastNotPut = null;
|
|
215
|
+
currentState.opsSincePut = 0;
|
|
216
|
+
} else if (doc.op != 'CLEAR') {
|
|
217
|
+
if (currentState.lastNotPut == null) {
|
|
218
|
+
currentState.lastNotPut = doc._id.o;
|
|
219
|
+
}
|
|
220
|
+
currentState.opsSincePut += 1;
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
if (this.updates.length >= this.moveBatchLimit) {
|
|
224
|
+
await this.flush();
|
|
225
|
+
}
|
|
226
|
+
}
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
await this.flush();
|
|
230
|
+
currentState?.seen.clear();
|
|
231
|
+
if (currentState?.lastNotPut != null && currentState?.opsSincePut > 1) {
|
|
232
|
+
logger.info(
|
|
233
|
+
`Inserting CLEAR at ${this.group_id}:${currentState.bucket}:${currentState.lastNotPut} to remove ${currentState.opsSincePut} operations`
|
|
234
|
+
);
|
|
235
|
+
const bucket = currentState.bucket;
|
|
236
|
+
const clearOp = currentState.lastNotPut;
|
|
237
|
+
// Free memory before clearing bucket
|
|
238
|
+
currentState = null;
|
|
239
|
+
await this.clearBucket(bucket, clearOp);
|
|
240
|
+
}
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
private async flush() {
|
|
244
|
+
if (this.updates.length > 0) {
|
|
245
|
+
logger.info(`Compacting ${this.updates.length} ops`);
|
|
246
|
+
await this.db.bucket_data.bulkWrite(this.updates, {
|
|
247
|
+
// Order is not important.
|
|
248
|
+
// Since checksums are not affected, these operations can happen in any order,
|
|
249
|
+
// and it's fine if the operations are partially applied.
|
|
250
|
+
// Each individual operation is atomic.
|
|
251
|
+
ordered: false
|
|
252
|
+
});
|
|
253
|
+
this.updates = [];
|
|
254
|
+
}
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
/**
|
|
258
|
+
* Perform a CLEAR compact for a bucket.
|
|
259
|
+
*
|
|
260
|
+
* @param bucket bucket name
|
|
261
|
+
* @param op op_id of the last non-PUT operation, which will be converted to CLEAR.
|
|
262
|
+
*/
|
|
263
|
+
private async clearBucket(bucket: string, op: bigint) {
|
|
264
|
+
const opFilter = {
|
|
265
|
+
_id: {
|
|
266
|
+
$gte: {
|
|
267
|
+
g: this.group_id,
|
|
268
|
+
b: bucket,
|
|
269
|
+
o: new MinKey() as any
|
|
270
|
+
},
|
|
271
|
+
$lte: {
|
|
272
|
+
g: this.group_id,
|
|
273
|
+
b: bucket,
|
|
274
|
+
o: op
|
|
275
|
+
}
|
|
276
|
+
}
|
|
277
|
+
};
|
|
278
|
+
|
|
279
|
+
const session = this.db.client.startSession();
|
|
280
|
+
try {
|
|
281
|
+
let done = false;
|
|
282
|
+
while (!done) {
|
|
283
|
+
// Do the CLEAR operation in batches, with each batch a separate transaction.
|
|
284
|
+
// The state after each batch is fully consistent.
|
|
285
|
+
// We need a transaction per batch to make sure checksums stay consistent.
|
|
286
|
+
await session.withTransaction(
|
|
287
|
+
async () => {
|
|
288
|
+
const query = this.db.bucket_data.find(opFilter, {
|
|
289
|
+
session,
|
|
290
|
+
sort: { _id: 1 },
|
|
291
|
+
projection: {
|
|
292
|
+
_id: 1,
|
|
293
|
+
op: 1,
|
|
294
|
+
checksum: 1,
|
|
295
|
+
target_op: 1
|
|
296
|
+
},
|
|
297
|
+
limit: this.clearBatchLimit
|
|
298
|
+
});
|
|
299
|
+
let checksum = 0;
|
|
300
|
+
let lastOpId: BucketDataKey | null = null;
|
|
301
|
+
let targetOp: bigint | null = null;
|
|
302
|
+
let gotAnOp = false;
|
|
303
|
+
for await (let op of query.stream()) {
|
|
304
|
+
if (op.op == 'MOVE' || op.op == 'REMOVE' || op.op == 'CLEAR') {
|
|
305
|
+
checksum = addChecksums(checksum, op.checksum);
|
|
306
|
+
lastOpId = op._id;
|
|
307
|
+
if (op.op != 'CLEAR') {
|
|
308
|
+
gotAnOp = true;
|
|
309
|
+
}
|
|
310
|
+
if (op.target_op != null) {
|
|
311
|
+
if (targetOp == null || op.target_op > targetOp) {
|
|
312
|
+
targetOp = op.target_op;
|
|
313
|
+
}
|
|
314
|
+
}
|
|
315
|
+
} else {
|
|
316
|
+
throw new Error(`Unexpected ${op.op} operation at ${op._id.g}:${op._id.b}:${op._id.o}`);
|
|
317
|
+
}
|
|
318
|
+
}
|
|
319
|
+
if (!gotAnOp) {
|
|
320
|
+
done = true;
|
|
321
|
+
return;
|
|
322
|
+
}
|
|
323
|
+
|
|
324
|
+
logger.info(`Flushing CLEAR at ${lastOpId?.o}`);
|
|
325
|
+
await this.db.bucket_data.deleteMany(
|
|
326
|
+
{
|
|
327
|
+
_id: {
|
|
328
|
+
$gte: {
|
|
329
|
+
g: this.group_id,
|
|
330
|
+
b: bucket,
|
|
331
|
+
o: new MinKey() as any
|
|
332
|
+
},
|
|
333
|
+
$lte: lastOpId!
|
|
334
|
+
}
|
|
335
|
+
},
|
|
336
|
+
{ session }
|
|
337
|
+
);
|
|
338
|
+
|
|
339
|
+
await this.db.bucket_data.insertOne(
|
|
340
|
+
{
|
|
341
|
+
_id: lastOpId!,
|
|
342
|
+
op: 'CLEAR',
|
|
343
|
+
checksum: checksum,
|
|
344
|
+
data: null,
|
|
345
|
+
target_op: targetOp
|
|
346
|
+
},
|
|
347
|
+
{ session }
|
|
348
|
+
);
|
|
349
|
+
},
|
|
350
|
+
{
|
|
351
|
+
writeConcern: { w: 'majority' },
|
|
352
|
+
readConcern: { level: 'snapshot' }
|
|
353
|
+
}
|
|
354
|
+
);
|
|
355
|
+
}
|
|
356
|
+
} finally {
|
|
357
|
+
await session.endSession();
|
|
358
|
+
}
|
|
359
|
+
}
|
|
360
|
+
}
|
|
361
|
+
|
|
362
|
+
/**
|
|
363
|
+
* Flattens string to reduce memory usage (around 320 bytes -> 120 bytes),
|
|
364
|
+
* at the cost of some upfront CPU usage.
|
|
365
|
+
*
|
|
366
|
+
* From: https://github.com/davidmarkclements/flatstr/issues/8
|
|
367
|
+
*/
|
|
368
|
+
function flatstr(s: string) {
|
|
369
|
+
s.match(/\n/g);
|
|
370
|
+
return s;
|
|
371
|
+
}
|
|
@@ -8,21 +8,24 @@ import * as util from '../../util/util-index.js';
|
|
|
8
8
|
import {
|
|
9
9
|
BucketDataBatchOptions,
|
|
10
10
|
BucketStorageBatch,
|
|
11
|
+
CompactOptions,
|
|
11
12
|
DEFAULT_DOCUMENT_BATCH_LIMIT,
|
|
12
13
|
DEFAULT_DOCUMENT_CHUNK_LIMIT_BYTES,
|
|
13
14
|
FlushedResult,
|
|
14
15
|
ResolveTableOptions,
|
|
15
16
|
ResolveTableResult,
|
|
17
|
+
SyncBucketDataBatch,
|
|
16
18
|
SyncRulesBucketStorage,
|
|
17
19
|
SyncRuleStatus
|
|
18
20
|
} from '../BucketStorage.js';
|
|
21
|
+
import { ChecksumCache, FetchPartialBucketChecksum } from '../ChecksumCache.js';
|
|
19
22
|
import { MongoBucketStorage } from '../MongoBucketStorage.js';
|
|
20
23
|
import { SourceTable } from '../SourceTable.js';
|
|
21
24
|
import { PowerSyncMongo } from './db.js';
|
|
22
25
|
import { BucketDataDocument, BucketDataKey, SourceKey, SyncRuleState } from './models.js';
|
|
23
26
|
import { MongoBucketBatch } from './MongoBucketBatch.js';
|
|
24
|
-
import {
|
|
25
|
-
import {
|
|
27
|
+
import { MongoCompactor } from './MongoCompactor.js';
|
|
28
|
+
import { BSON_DESERIALIZE_OPTIONS, idPrefixFilter, mapOpEntry, readSingleBatch, serializeLookup } from './util.js';
|
|
26
29
|
|
|
27
30
|
export class MongoSyncBucketStorage implements SyncRulesBucketStorage {
|
|
28
31
|
private readonly db: PowerSyncMongo;
|
|
@@ -201,7 +204,7 @@ export class MongoSyncBucketStorage implements SyncRulesBucketStorage {
|
|
|
201
204
|
checkpoint: util.OpId,
|
|
202
205
|
dataBuckets: Map<string, string>,
|
|
203
206
|
options?: BucketDataBatchOptions
|
|
204
|
-
): AsyncIterable<
|
|
207
|
+
): AsyncIterable<SyncBucketDataBatch> {
|
|
205
208
|
if (dataBuckets.size == 0) {
|
|
206
209
|
return;
|
|
207
210
|
}
|
|
@@ -267,6 +270,7 @@ export class MongoSyncBucketStorage implements SyncRulesBucketStorage {
|
|
|
267
270
|
|
|
268
271
|
let batchSize = 0;
|
|
269
272
|
let currentBatch: util.SyncBucketData | null = null;
|
|
273
|
+
let targetOp: bigint | null = null;
|
|
270
274
|
|
|
271
275
|
// Ordered by _id, meaning buckets are grouped together
|
|
272
276
|
for (let rawData of data) {
|
|
@@ -284,7 +288,8 @@ export class MongoSyncBucketStorage implements SyncRulesBucketStorage {
|
|
|
284
288
|
start = currentBatch.after;
|
|
285
289
|
currentBatch = null;
|
|
286
290
|
batchSize = 0;
|
|
287
|
-
yield yieldBatch;
|
|
291
|
+
yield { batch: yieldBatch, targetOp: targetOp };
|
|
292
|
+
targetOp = null;
|
|
288
293
|
}
|
|
289
294
|
|
|
290
295
|
start ??= dataBuckets.get(bucket);
|
|
@@ -298,17 +303,18 @@ export class MongoSyncBucketStorage implements SyncRulesBucketStorage {
|
|
|
298
303
|
data: [],
|
|
299
304
|
next_after: start
|
|
300
305
|
};
|
|
306
|
+
targetOp = null;
|
|
307
|
+
}
|
|
308
|
+
|
|
309
|
+
const entry = mapOpEntry(row);
|
|
310
|
+
|
|
311
|
+
if (row.target_op != null) {
|
|
312
|
+
// MOVE, CLEAR
|
|
313
|
+
if (targetOp == null || row.target_op > targetOp) {
|
|
314
|
+
targetOp = row.target_op;
|
|
315
|
+
}
|
|
301
316
|
}
|
|
302
317
|
|
|
303
|
-
const entry: util.OplogEntry = {
|
|
304
|
-
op_id: util.timestampToOpId(row._id.o),
|
|
305
|
-
op: row.op,
|
|
306
|
-
object_type: row.table,
|
|
307
|
-
object_id: row.row_id,
|
|
308
|
-
checksum: Number(row.checksum),
|
|
309
|
-
subkey: `${row.source_table}/${row.source_key.toHexString()}`,
|
|
310
|
-
data: row.data
|
|
311
|
-
};
|
|
312
318
|
currentBatch.data.push(entry);
|
|
313
319
|
currentBatch.next_after = entry.op_id;
|
|
314
320
|
|
|
@@ -318,7 +324,8 @@ export class MongoSyncBucketStorage implements SyncRulesBucketStorage {
|
|
|
318
324
|
if (currentBatch != null) {
|
|
319
325
|
const yieldBatch = currentBatch;
|
|
320
326
|
currentBatch = null;
|
|
321
|
-
yield yieldBatch;
|
|
327
|
+
yield { batch: yieldBatch, targetOp: targetOp };
|
|
328
|
+
targetOp = null;
|
|
322
329
|
}
|
|
323
330
|
}
|
|
324
331
|
|
|
@@ -530,4 +537,8 @@ export class MongoSyncBucketStorage implements SyncRulesBucketStorage {
|
|
|
530
537
|
}
|
|
531
538
|
);
|
|
532
539
|
}
|
|
540
|
+
|
|
541
|
+
async compact(options?: CompactOptions) {
|
|
542
|
+
return new MongoCompactor(this.db, this.group_id, options).compact();
|
|
543
|
+
}
|
|
533
544
|
}
|
|
@@ -42,12 +42,13 @@ export interface BucketParameterDocument {
|
|
|
42
42
|
export interface BucketDataDocument {
|
|
43
43
|
_id: BucketDataKey;
|
|
44
44
|
op: OpType;
|
|
45
|
-
source_table
|
|
46
|
-
source_key
|
|
47
|
-
table
|
|
48
|
-
row_id
|
|
45
|
+
source_table?: bson.ObjectId;
|
|
46
|
+
source_key?: bson.UUID;
|
|
47
|
+
table?: string;
|
|
48
|
+
row_id?: string;
|
|
49
49
|
checksum: number;
|
|
50
50
|
data: string | null;
|
|
51
|
+
target_op?: bigint | null;
|
|
51
52
|
}
|
|
52
53
|
|
|
53
54
|
export type OpType = 'PUT' | 'REMOVE' | 'MOVE' | 'CLEAR';
|
|
@@ -2,6 +2,9 @@ import { SqliteJsonValue } from '@powersync/service-sync-rules';
|
|
|
2
2
|
import * as bson from 'bson';
|
|
3
3
|
import * as mongo from 'mongodb';
|
|
4
4
|
import * as crypto from 'crypto';
|
|
5
|
+
import { BucketDataDocument } from './models.js';
|
|
6
|
+
import { timestampToOpId } from '../../util/utils.js';
|
|
7
|
+
import { OplogEntry } from '../../util/protocol-types.js';
|
|
5
8
|
|
|
6
9
|
/**
|
|
7
10
|
* Lookup serialization must be number-agnostic. I.e. normalize numbers, instead of preserving numbers.
|
|
@@ -86,3 +89,25 @@ export const BSON_DESERIALIZE_OPTIONS: bson.DeserializeOptions = {
|
|
|
86
89
|
// use bigint instead of Long
|
|
87
90
|
useBigInt64: true
|
|
88
91
|
};
|
|
92
|
+
|
|
93
|
+
export function mapOpEntry(row: BucketDataDocument): OplogEntry {
|
|
94
|
+
if (row.op == 'PUT' || row.op == 'REMOVE') {
|
|
95
|
+
return {
|
|
96
|
+
op_id: timestampToOpId(row._id.o),
|
|
97
|
+
op: row.op,
|
|
98
|
+
object_type: row.table,
|
|
99
|
+
object_id: row.row_id,
|
|
100
|
+
checksum: Number(row.checksum),
|
|
101
|
+
subkey: `${row.source_table}/${row.source_key!.toHexString()}`,
|
|
102
|
+
data: row.data
|
|
103
|
+
};
|
|
104
|
+
} else {
|
|
105
|
+
// MOVE, CLEAR
|
|
106
|
+
|
|
107
|
+
return {
|
|
108
|
+
op_id: timestampToOpId(row._id.o),
|
|
109
|
+
op: row.op,
|
|
110
|
+
checksum: Number(row.checksum)
|
|
111
|
+
};
|
|
112
|
+
}
|
|
113
|
+
}
|
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
import { container } from '@powersync/lib-services-framework';
|
|
2
1
|
import { Metrics } from '../metrics/Metrics.js';
|
|
3
2
|
|
|
4
3
|
/**
|
|
@@ -10,12 +9,13 @@ export class RequestTracker {
|
|
|
10
9
|
|
|
11
10
|
addOperationsSynced(operations: number) {
|
|
12
11
|
this.operationsSynced += operations;
|
|
13
|
-
|
|
12
|
+
|
|
13
|
+
Metrics.getInstance().operations_synced_total.add(operations);
|
|
14
14
|
}
|
|
15
15
|
|
|
16
16
|
addDataSynced(bytes: number) {
|
|
17
17
|
this.dataSyncedBytes += bytes;
|
|
18
18
|
|
|
19
|
-
|
|
19
|
+
Metrics.getInstance().data_synced_bytes.add(bytes);
|
|
20
20
|
}
|
|
21
21
|
}
|
package/src/sync/sync-index.ts
CHANGED
package/src/sync/sync.ts
CHANGED
|
@@ -8,6 +8,7 @@ import * as storage from '../storage/storage-index.js';
|
|
|
8
8
|
import * as util from '../util/util-index.js';
|
|
9
9
|
|
|
10
10
|
import { logger } from '@powersync/lib-services-framework';
|
|
11
|
+
import { Metrics } from '../metrics/Metrics.js';
|
|
11
12
|
import { mergeAsyncIterables } from './merge.js';
|
|
12
13
|
import { TokenStreamOptions, tokenStream } from './util.js';
|
|
13
14
|
import { RequestTracker } from './RequestTracker.js';
|
|
@@ -257,6 +258,9 @@ interface BucketDataBatchResult {
|
|
|
257
258
|
async function* bucketDataBatch(request: BucketDataRequest): AsyncGenerator<BucketDataBatchResult, void> {
|
|
258
259
|
const { storage, checkpoint, bucketsToFetch, dataBuckets, raw_data, binary_data, tracker, signal } = request;
|
|
259
260
|
|
|
261
|
+
const checkpointOp = BigInt(checkpoint);
|
|
262
|
+
let checkpointInvalidated = false;
|
|
263
|
+
|
|
260
264
|
const [_, release] = await syncSemaphore.acquire();
|
|
261
265
|
try {
|
|
262
266
|
// Optimization: Only fetch buckets for which the checksums have changed since the last checkpoint
|
|
@@ -266,13 +270,16 @@ async function* bucketDataBatch(request: BucketDataRequest): AsyncGenerator<Buck
|
|
|
266
270
|
|
|
267
271
|
let has_more = false;
|
|
268
272
|
|
|
269
|
-
for await (let r of data) {
|
|
273
|
+
for await (let { batch: r, targetOp } of data) {
|
|
270
274
|
if (signal.aborted) {
|
|
271
275
|
return;
|
|
272
276
|
}
|
|
273
277
|
if (r.has_more) {
|
|
274
278
|
has_more = true;
|
|
275
279
|
}
|
|
280
|
+
if (targetOp != null && targetOp > checkpointOp) {
|
|
281
|
+
checkpointInvalidated = true;
|
|
282
|
+
}
|
|
276
283
|
if (r.data.length == 0) {
|
|
277
284
|
continue;
|
|
278
285
|
}
|
|
@@ -308,12 +315,19 @@ async function* bucketDataBatch(request: BucketDataRequest): AsyncGenerator<Buck
|
|
|
308
315
|
}
|
|
309
316
|
|
|
310
317
|
if (!has_more) {
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
318
|
+
if (checkpointInvalidated) {
|
|
319
|
+
// Checkpoint invalidated by a CLEAR or MOVE op.
|
|
320
|
+
// Don't send the checkpoint_complete line in this case.
|
|
321
|
+
// More data should be available immediately for a new checkpoint.
|
|
322
|
+
yield { data: null, done: true };
|
|
323
|
+
} else {
|
|
324
|
+
const line: util.StreamingSyncCheckpointComplete = {
|
|
325
|
+
checkpoint_complete: {
|
|
326
|
+
last_op_id: checkpoint
|
|
327
|
+
}
|
|
328
|
+
};
|
|
329
|
+
yield { data: line, done: true };
|
|
330
|
+
}
|
|
317
331
|
}
|
|
318
332
|
} finally {
|
|
319
333
|
release();
|
package/src/sync/util.ts
CHANGED
|
@@ -1,6 +1,8 @@
|
|
|
1
|
+
import * as t from 'ts-codec';
|
|
1
2
|
import * as yaml from 'yaml';
|
|
2
3
|
|
|
3
4
|
import { configFile } from '@powersync/service-types';
|
|
5
|
+
import { schema } from '@powersync/lib-services-framework';
|
|
4
6
|
|
|
5
7
|
import { RunnerConfig } from '../types.js';
|
|
6
8
|
|
|
@@ -21,6 +23,13 @@ export enum ConfigFileFormat {
|
|
|
21
23
|
*/
|
|
22
24
|
const YAML_ENV_PREFIX = 'PS_';
|
|
23
25
|
|
|
26
|
+
// ts-codec itself doesn't give great validation errors, so we use json schema for that
|
|
27
|
+
const configSchemaValidator = schema
|
|
28
|
+
.parseJSONSchema(
|
|
29
|
+
t.generateJSONSchema(configFile.powerSyncConfig, { allowAdditional: true, parsers: [configFile.portParser] })
|
|
30
|
+
)
|
|
31
|
+
.validator();
|
|
32
|
+
|
|
24
33
|
export abstract class ConfigCollector {
|
|
25
34
|
abstract get name(): string;
|
|
26
35
|
|
|
@@ -30,6 +39,45 @@ export abstract class ConfigCollector {
|
|
|
30
39
|
*/
|
|
31
40
|
abstract collectSerialized(runnerConfig: RunnerConfig): Promise<configFile.SerializedPowerSyncConfig | null>;
|
|
32
41
|
|
|
42
|
+
/**
|
|
43
|
+
* Collects the PowerSyncConfig settings.
|
|
44
|
+
* Validates and decodes the config.
|
|
45
|
+
* @returns null if this collector cannot provide a config
|
|
46
|
+
*/
|
|
47
|
+
async collect(runner_config: RunnerConfig): Promise<configFile.PowerSyncConfig | null> {
|
|
48
|
+
const serialized = await this.collectSerialized(runner_config);
|
|
49
|
+
if (!serialized) {
|
|
50
|
+
return null;
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
/**
|
|
54
|
+
* After this point a serialized config has been found. Any failures to decode or validate
|
|
55
|
+
* will result in a hard stop.
|
|
56
|
+
*/
|
|
57
|
+
const decoded = this.decode(serialized);
|
|
58
|
+
this.validate(decoded);
|
|
59
|
+
return decoded;
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
/**
|
|
63
|
+
* Validates input config
|
|
64
|
+
* ts-codec itself doesn't give great validation errors, so we use json schema for that
|
|
65
|
+
*/
|
|
66
|
+
validate(config: configFile.PowerSyncConfig) {
|
|
67
|
+
const valid = configSchemaValidator.validate(config);
|
|
68
|
+
if (!valid.valid) {
|
|
69
|
+
throw new Error(`Failed to validate PowerSync config: ${valid.errors.join(', ')}`);
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
decode(encoded: configFile.SerializedPowerSyncConfig): configFile.PowerSyncConfig {
|
|
74
|
+
try {
|
|
75
|
+
return configFile.powerSyncConfig.decode(encoded);
|
|
76
|
+
} catch (ex) {
|
|
77
|
+
throw new Error(`Failed to decode PowerSync config: ${ex}`);
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
|
|
33
81
|
protected parseContent(content: string, contentType?: ConfigFileFormat) {
|
|
34
82
|
switch (contentType) {
|
|
35
83
|
case ConfigFileFormat.YAML:
|