@powersync/service-core 0.1.3 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +14 -0
- package/dist/routes/socket-route.js +1 -1
- package/dist/routes/socket-route.js.map +1 -1
- package/dist/routes/sync-stream.js +1 -1
- package/dist/routes/sync-stream.js.map +1 -1
- package/dist/storage/BucketStorage.d.ts +6 -1
- package/dist/storage/BucketStorage.js.map +1 -1
- package/dist/storage/ChecksumCache.d.ts +50 -0
- package/dist/storage/ChecksumCache.js +234 -0
- package/dist/storage/ChecksumCache.js.map +1 -0
- package/dist/storage/mongo/MongoSyncBucketStorage.d.ts +3 -1
- package/dist/storage/mongo/MongoSyncBucketStorage.js +26 -14
- package/dist/storage/mongo/MongoSyncBucketStorage.js.map +1 -1
- package/dist/sync/sync.js +32 -21
- package/dist/sync/sync.js.map +1 -1
- package/dist/util/protocol-types.d.ts +4 -0
- package/dist/util/protocol-types.js +5 -1
- package/dist/util/protocol-types.js.map +1 -1
- package/dist/util/utils.d.ts +6 -3
- package/dist/util/utils.js +32 -15
- package/dist/util/utils.js.map +1 -1
- package/package.json +6 -5
- package/src/routes/socket-route.ts +5 -1
- package/src/routes/sync-stream.ts +4 -1
- package/src/storage/BucketStorage.ts +6 -1
- package/src/storage/ChecksumCache.ts +294 -0
- package/src/storage/mongo/MongoSyncBucketStorage.ts +31 -15
- package/src/sync/sync.ts +44 -37
- package/src/util/protocol-types.ts +6 -1
- package/src/util/utils.ts +36 -16
- package/test/src/__snapshots__/sync.test.ts.snap +14 -2
- package/test/src/checksum_cache.test.ts +436 -0
- package/test/src/data_storage.test.ts +3 -3
- package/test/src/large_batch.test.ts +4 -4
- package/test/src/sync_rules.test.ts +11 -9
- package/tsconfig.tsbuildinfo +1 -1
|
@@ -0,0 +1,294 @@
|
|
|
1
|
+
import { BucketChecksum, OpId } from '@/util/protocol-types.js';
|
|
2
|
+
import { ChecksumMap, addBucketChecksums } from '@/util/utils.js';
|
|
3
|
+
import { LRUCache } from 'lru-cache/min';
|
|
4
|
+
import { OrderedSet } from '@js-sdsl/ordered-set';
|
|
5
|
+
|
|
6
|
+
interface ChecksumFetchContext {
|
|
7
|
+
fetch(bucket: string): Promise<BucketChecksum>;
|
|
8
|
+
checkpoint: bigint;
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
export interface FetchPartialBucketChecksum {
|
|
12
|
+
bucket: string;
|
|
13
|
+
start?: OpId;
|
|
14
|
+
end: OpId;
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
export type FetchChecksums = (batch: FetchPartialBucketChecksum[]) => Promise<ChecksumMap>;
|
|
18
|
+
|
|
19
|
+
export interface ChecksumCacheOptions {
|
|
20
|
+
/**
|
|
21
|
+
* Upstream checksum implementation.
|
|
22
|
+
*
|
|
23
|
+
* This fetches a batch of either entire bucket checksums, or a partial range.
|
|
24
|
+
*/
|
|
25
|
+
fetchChecksums: FetchChecksums;
|
|
26
|
+
|
|
27
|
+
/**
|
|
28
|
+
* Maximum number of cached checksums.
|
|
29
|
+
*/
|
|
30
|
+
maxSize?: number;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
// Approximately 5MB of memory, if we assume 50 bytes per entry
|
|
34
|
+
const DEFAULT_MAX_SIZE = 100_000;
|
|
35
|
+
|
|
36
|
+
/**
|
|
37
|
+
* Implement a LRU cache for checksum requests. Each (bucket, checkpoint) request is cached separately,
|
|
38
|
+
* while the lookups occur in batches.
|
|
39
|
+
*
|
|
40
|
+
* For each bucket, we keep a separate OrderedSet of cached checkpoints.
|
|
41
|
+
* This allows us to do incrementally update checksums by using the last cached checksum for the same bucket.
|
|
42
|
+
*
|
|
43
|
+
* We use the LRUCache fetchMethod to deduplicate in-progress requests.
|
|
44
|
+
*/
|
|
45
|
+
export class ChecksumCache {
|
|
46
|
+
/**
|
|
47
|
+
* The primary checksum cache, with key of `${checkpoint}/${bucket}`.
|
|
48
|
+
*/
|
|
49
|
+
private cache: LRUCache<string, BucketChecksum, ChecksumFetchContext>;
|
|
50
|
+
/**
|
|
51
|
+
* For each bucket, an ordered set of cached checkpoints.
|
|
52
|
+
*/
|
|
53
|
+
private bucketCheckpoints = new Map<string, OrderedSet<bigint>>();
|
|
54
|
+
|
|
55
|
+
private fetchChecksums: FetchChecksums;
|
|
56
|
+
|
|
57
|
+
constructor(options: ChecksumCacheOptions) {
|
|
58
|
+
this.fetchChecksums = options.fetchChecksums;
|
|
59
|
+
|
|
60
|
+
this.cache = new LRUCache<string, BucketChecksum, ChecksumFetchContext>({
|
|
61
|
+
max: options.maxSize ?? DEFAULT_MAX_SIZE,
|
|
62
|
+
fetchMethod: async (cacheKey, _staleValue, options) => {
|
|
63
|
+
// Called when this checksum hasn't been cached yet.
|
|
64
|
+
// Pass the call back to the request, which implements batch fetching.
|
|
65
|
+
const { bucket } = parseCacheKey(cacheKey);
|
|
66
|
+
const result = await options.context.fetch(bucket);
|
|
67
|
+
|
|
68
|
+
// Add to the set of cached checkpoints for the bucket.
|
|
69
|
+
let checkpointSet = this.bucketCheckpoints.get(bucket);
|
|
70
|
+
if (checkpointSet == null) {
|
|
71
|
+
checkpointSet = new OrderedSet();
|
|
72
|
+
this.bucketCheckpoints.set(bucket, checkpointSet);
|
|
73
|
+
}
|
|
74
|
+
checkpointSet.insert(options.context.checkpoint);
|
|
75
|
+
return result;
|
|
76
|
+
},
|
|
77
|
+
|
|
78
|
+
dispose: (value, key) => {
|
|
79
|
+
// Remove from the set of cached checkpoints for the bucket
|
|
80
|
+
const { checkpointString } = parseCacheKey(key);
|
|
81
|
+
const checkpoint = BigInt(checkpointString);
|
|
82
|
+
const checkpointSet = this.bucketCheckpoints.get(value.bucket);
|
|
83
|
+
if (checkpointSet == null) {
|
|
84
|
+
return;
|
|
85
|
+
}
|
|
86
|
+
checkpointSet.eraseElementByKey(checkpoint);
|
|
87
|
+
if (checkpointSet.length == 0) {
|
|
88
|
+
this.bucketCheckpoints.delete(value.bucket);
|
|
89
|
+
}
|
|
90
|
+
},
|
|
91
|
+
|
|
92
|
+
noDisposeOnSet: true,
|
|
93
|
+
|
|
94
|
+
// When we have more fetches than the cache size, complete the fetches instead
|
|
95
|
+
// of failing with Error('evicted').
|
|
96
|
+
ignoreFetchAbort: true
|
|
97
|
+
});
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
async getChecksums(checkpoint: OpId, buckets: string[]): Promise<BucketChecksum[]> {
|
|
101
|
+
const checksums = await this.getChecksumMap(checkpoint, buckets);
|
|
102
|
+
// Return results in the same order as the request
|
|
103
|
+
return buckets.map((bucket) => checksums.get(bucket)!);
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
/**
|
|
107
|
+
* Get bucket checksums for a checkpoint.
|
|
108
|
+
*
|
|
109
|
+
* Any checksums not found upstream are returned as zero checksums.
|
|
110
|
+
*
|
|
111
|
+
* @returns a Map with exactly one entry for each bucket requested
|
|
112
|
+
*/
|
|
113
|
+
async getChecksumMap(checkpoint: OpId, buckets: string[]): Promise<ChecksumMap> {
|
|
114
|
+
// Buckets that don't have a cached checksum for this checkpoint yet
|
|
115
|
+
let toFetch = new Set<string>();
|
|
116
|
+
|
|
117
|
+
// Newly fetched results
|
|
118
|
+
let fetchResults = new Map<string, BucketChecksum>();
|
|
119
|
+
|
|
120
|
+
// Promise for the bactch new fetch requests
|
|
121
|
+
let resolveFetch!: () => void;
|
|
122
|
+
let rejectFetch!: (err: any) => void;
|
|
123
|
+
let fetchPromise = new Promise<void>((resolve, reject) => {
|
|
124
|
+
resolveFetch = resolve;
|
|
125
|
+
rejectFetch = reject;
|
|
126
|
+
});
|
|
127
|
+
|
|
128
|
+
// Accumulated results - both from cached checksums, and fetched checksums
|
|
129
|
+
let finalResults = new Map<string, BucketChecksum>();
|
|
130
|
+
|
|
131
|
+
const context: ChecksumFetchContext = {
|
|
132
|
+
async fetch(bucket) {
|
|
133
|
+
await fetchPromise;
|
|
134
|
+
if (!toFetch.has(bucket)) {
|
|
135
|
+
// Should never happen
|
|
136
|
+
throw new Error(`Expected to fetch ${bucket}`);
|
|
137
|
+
}
|
|
138
|
+
const checksum = fetchResults.get(bucket);
|
|
139
|
+
if (checksum == null) {
|
|
140
|
+
// Should never happen
|
|
141
|
+
throw new Error(`Failed to fetch checksum for bucket ${bucket}`);
|
|
142
|
+
}
|
|
143
|
+
return checksum;
|
|
144
|
+
},
|
|
145
|
+
checkpoint: BigInt(checkpoint)
|
|
146
|
+
};
|
|
147
|
+
|
|
148
|
+
// One promise to await to ensure all fetch requests completed.
|
|
149
|
+
let settledPromise: Promise<PromiseSettledResult<void>[]> | null = null;
|
|
150
|
+
|
|
151
|
+
try {
|
|
152
|
+
// Individual cache fetch promises
|
|
153
|
+
let cacheFetchPromises: Promise<void>[] = [];
|
|
154
|
+
|
|
155
|
+
for (let bucket of buckets) {
|
|
156
|
+
const cacheKey = makeCacheKey(checkpoint, bucket);
|
|
157
|
+
let status: LRUCache.Status<BucketChecksum> = {};
|
|
158
|
+
const p = this.cache.fetch(cacheKey, { context: context, status: status }).then((checksums) => {
|
|
159
|
+
if (checksums == null) {
|
|
160
|
+
// Should never happen
|
|
161
|
+
throw new Error(`Failed to get checksums for ${cacheKey}`);
|
|
162
|
+
}
|
|
163
|
+
finalResults.set(bucket, checksums);
|
|
164
|
+
});
|
|
165
|
+
cacheFetchPromises.push(p);
|
|
166
|
+
if (status.fetch == 'hit' || status.fetch == 'inflight') {
|
|
167
|
+
// The checksums is either cached already (hit), or another request is busy
|
|
168
|
+
// fetching (inflight).
|
|
169
|
+
// In either case, we don't need to fetch a new checksum.
|
|
170
|
+
} else {
|
|
171
|
+
// We need a new request for this checksum.
|
|
172
|
+
toFetch.add(bucket);
|
|
173
|
+
}
|
|
174
|
+
}
|
|
175
|
+
// We do this directly after creating the promises, otherwise
|
|
176
|
+
// we could end up with weird uncaught rejection errors.
|
|
177
|
+
settledPromise = Promise.allSettled(cacheFetchPromises);
|
|
178
|
+
|
|
179
|
+
if (toFetch.size == 0) {
|
|
180
|
+
// Nothing to fetch, but resolve in case
|
|
181
|
+
resolveFetch();
|
|
182
|
+
} else {
|
|
183
|
+
let bucketRequests: FetchPartialBucketChecksum[] = [];
|
|
184
|
+
// Partial checksum (previously cached) to add to the partial fetch
|
|
185
|
+
let add = new Map<string, BucketChecksum>();
|
|
186
|
+
|
|
187
|
+
for (let bucket of toFetch) {
|
|
188
|
+
let bucketRequest: FetchPartialBucketChecksum | null = null;
|
|
189
|
+
const checkpointSet = this.bucketCheckpoints.get(bucket);
|
|
190
|
+
if (checkpointSet != null) {
|
|
191
|
+
// Find smaller checkpoints, sorted in descending order
|
|
192
|
+
let iter = checkpointSet.reverseUpperBound(context.checkpoint);
|
|
193
|
+
const begin = checkpointSet.begin();
|
|
194
|
+
while (iter.isAccessible()) {
|
|
195
|
+
const cp = iter.pointer;
|
|
196
|
+
const cacheKey = makeCacheKey(cp, bucket);
|
|
197
|
+
// peek to avoid refreshing the key
|
|
198
|
+
const cached = this.cache.peek(cacheKey);
|
|
199
|
+
// As long as dispose() works correctly, the checkpointset should
|
|
200
|
+
// match up with the cache, and `cached` should also have a value here.
|
|
201
|
+
// However, we handle caces where it's not present either way.
|
|
202
|
+
// Test by disabling the `dispose()` callback.
|
|
203
|
+
if (cached != null) {
|
|
204
|
+
// Partial checksum found - make a partial checksum request
|
|
205
|
+
bucketRequest = {
|
|
206
|
+
bucket,
|
|
207
|
+
start: cp.toString(),
|
|
208
|
+
end: checkpoint
|
|
209
|
+
};
|
|
210
|
+
add.set(bucket, cached);
|
|
211
|
+
break;
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
if (iter.equals(begin)) {
|
|
215
|
+
// Cannot iterate further
|
|
216
|
+
break;
|
|
217
|
+
}
|
|
218
|
+
// Iterate backwards
|
|
219
|
+
iter = iter.pre();
|
|
220
|
+
}
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
if (bucketRequest == null) {
|
|
224
|
+
// No partial checksum found - make a new full checksum request
|
|
225
|
+
bucketRequest = {
|
|
226
|
+
bucket,
|
|
227
|
+
end: checkpoint
|
|
228
|
+
};
|
|
229
|
+
add.set(bucket, {
|
|
230
|
+
bucket,
|
|
231
|
+
checksum: 0,
|
|
232
|
+
count: 0
|
|
233
|
+
});
|
|
234
|
+
}
|
|
235
|
+
bucketRequests.push(bucketRequest);
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
// Fetch partial checksums from upstream
|
|
239
|
+
const results = await this.fetchChecksums(bucketRequests);
|
|
240
|
+
|
|
241
|
+
for (let bucket of toFetch) {
|
|
242
|
+
const result = results.get(bucket);
|
|
243
|
+
const toAdd = add.get(bucket);
|
|
244
|
+
if (toAdd == null) {
|
|
245
|
+
// Should never happen
|
|
246
|
+
throw new Error(`toAdd null for ${bucket}`);
|
|
247
|
+
}
|
|
248
|
+
// Compute the full checksum from the two partials.
|
|
249
|
+
// No results returned are treated the same as a zero result.
|
|
250
|
+
const added = addBucketChecksums(toAdd, result ?? null);
|
|
251
|
+
fetchResults.set(bucket, added);
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
// fetchResults is fully populated, so we resolve the Promise
|
|
255
|
+
resolveFetch();
|
|
256
|
+
}
|
|
257
|
+
} catch (e) {
|
|
258
|
+
// Failure when fetching checksums - reject the Promise.
|
|
259
|
+
// This will reject all individual cache fetch requests, and each will be retried
|
|
260
|
+
// on the next request.
|
|
261
|
+
rejectFetch(e);
|
|
262
|
+
|
|
263
|
+
// Wait for the above rejection to propagate, otherwise we end up with "uncaught" errors.
|
|
264
|
+
// This promise never throws.
|
|
265
|
+
await settledPromise;
|
|
266
|
+
|
|
267
|
+
throw e;
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
// Wait for all cache fetch reqeusts to complete
|
|
271
|
+
const settledResults = (await settledPromise) ?? [];
|
|
272
|
+
// Check if any of them failed
|
|
273
|
+
for (let result of settledResults) {
|
|
274
|
+
if (result.status == 'rejected') {
|
|
275
|
+
throw result.reason;
|
|
276
|
+
}
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
if (finalResults.size != buckets.length) {
|
|
280
|
+
// Should not happen
|
|
281
|
+
throw new Error(`Bucket results mismatch: ${finalResults.size} != ${buckets.length}`);
|
|
282
|
+
}
|
|
283
|
+
return finalResults;
|
|
284
|
+
}
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
function makeCacheKey(checkpoint: bigint | string, bucket: string) {
|
|
288
|
+
return `${checkpoint}/${bucket}`;
|
|
289
|
+
}
|
|
290
|
+
|
|
291
|
+
function parseCacheKey(key: string) {
|
|
292
|
+
const index = key.indexOf('/');
|
|
293
|
+
return { checkpointString: key.substring(0, index), bucket: key.substring(index + 1) };
|
|
294
|
+
}
|
|
@@ -22,9 +22,15 @@ import { PowerSyncMongo } from './db.js';
|
|
|
22
22
|
import { BucketDataDocument, BucketDataKey, SourceKey, SyncRuleState } from './models.js';
|
|
23
23
|
import { MongoBucketBatch } from './MongoBucketBatch.js';
|
|
24
24
|
import { BSON_DESERIALIZE_OPTIONS, idPrefixFilter, readSingleBatch, serializeLookup } from './util.js';
|
|
25
|
+
import { ChecksumCache, FetchPartialBucketChecksum } from '../ChecksumCache.js';
|
|
25
26
|
|
|
26
27
|
export class MongoSyncBucketStorage implements SyncRulesBucketStorage {
|
|
27
28
|
private readonly db: PowerSyncMongo;
|
|
29
|
+
private checksumCache = new ChecksumCache({
|
|
30
|
+
fetchChecksums: (batch) => {
|
|
31
|
+
return this.getChecksumsInternal(batch);
|
|
32
|
+
}
|
|
33
|
+
});
|
|
28
34
|
|
|
29
35
|
constructor(
|
|
30
36
|
public readonly factory: MongoBucketStorage,
|
|
@@ -316,23 +322,28 @@ export class MongoSyncBucketStorage implements SyncRulesBucketStorage {
|
|
|
316
322
|
}
|
|
317
323
|
}
|
|
318
324
|
|
|
319
|
-
async getChecksums(checkpoint: util.OpId, buckets: string[]): Promise<util.
|
|
320
|
-
|
|
321
|
-
|
|
325
|
+
async getChecksums(checkpoint: util.OpId, buckets: string[]): Promise<util.ChecksumMap> {
|
|
326
|
+
return this.checksumCache.getChecksumMap(checkpoint, buckets);
|
|
327
|
+
}
|
|
328
|
+
|
|
329
|
+
private async getChecksumsInternal(batch: FetchPartialBucketChecksum[]): Promise<util.ChecksumMap> {
|
|
330
|
+
if (batch.length == 0) {
|
|
331
|
+
return new Map();
|
|
322
332
|
}
|
|
333
|
+
|
|
323
334
|
const filters: any[] = [];
|
|
324
|
-
for (let
|
|
335
|
+
for (let request of batch) {
|
|
325
336
|
filters.push({
|
|
326
337
|
_id: {
|
|
327
338
|
$gt: {
|
|
328
339
|
g: this.group_id,
|
|
329
|
-
b:
|
|
330
|
-
o: new bson.MinKey()
|
|
340
|
+
b: request.bucket,
|
|
341
|
+
o: request.start ? BigInt(request.start) : new bson.MinKey()
|
|
331
342
|
},
|
|
332
343
|
$lte: {
|
|
333
344
|
g: this.group_id,
|
|
334
|
-
b:
|
|
335
|
-
o: BigInt(
|
|
345
|
+
b: request.bucket,
|
|
346
|
+
o: BigInt(request.end)
|
|
336
347
|
}
|
|
337
348
|
}
|
|
338
349
|
});
|
|
@@ -354,13 +365,18 @@ export class MongoSyncBucketStorage implements SyncRulesBucketStorage {
|
|
|
354
365
|
)
|
|
355
366
|
.toArray();
|
|
356
367
|
|
|
357
|
-
return
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
368
|
+
return new Map<string, util.BucketChecksum>(
|
|
369
|
+
aggregate.map((doc) => {
|
|
370
|
+
return [
|
|
371
|
+
doc._id,
|
|
372
|
+
{
|
|
373
|
+
bucket: doc._id,
|
|
374
|
+
count: doc.count,
|
|
375
|
+
checksum: Number(BigInt(doc.checksum_total) & 0xffffffffn) & 0xffffffff
|
|
376
|
+
} satisfies util.BucketChecksum
|
|
377
|
+
];
|
|
378
|
+
})
|
|
379
|
+
);
|
|
364
380
|
}
|
|
365
381
|
|
|
366
382
|
async terminate() {
|
package/src/sync/sync.ts
CHANGED
|
@@ -78,8 +78,8 @@ async function* streamResponseInner(
|
|
|
78
78
|
// This starts with the state from the client. May contain buckets that the user do not have access to (anymore).
|
|
79
79
|
let dataBuckets = new Map<string, string>();
|
|
80
80
|
|
|
81
|
-
let
|
|
82
|
-
let
|
|
81
|
+
let lastChecksums: util.ChecksumMap | null = null;
|
|
82
|
+
let lastWriteCheckpoint: bigint | null = null;
|
|
83
83
|
|
|
84
84
|
const { raw_data, binary_data } = params;
|
|
85
85
|
|
|
@@ -113,39 +113,42 @@ async function* streamResponseInner(
|
|
|
113
113
|
throw new Error(`Too many buckets: ${allBuckets.length}`);
|
|
114
114
|
}
|
|
115
115
|
|
|
116
|
-
let checksums: util.BucketChecksum[] | undefined = undefined;
|
|
117
|
-
|
|
118
116
|
let dataBucketsNew = new Map<string, string>();
|
|
119
117
|
for (let bucket of allBuckets) {
|
|
120
118
|
dataBucketsNew.set(bucket, dataBuckets.get(bucket) ?? '0');
|
|
121
119
|
}
|
|
122
120
|
dataBuckets = dataBucketsNew;
|
|
123
121
|
|
|
124
|
-
|
|
122
|
+
const bucketList = [...dataBuckets.keys()];
|
|
123
|
+
const checksumMap = await storage.getChecksums(checkpoint, bucketList);
|
|
124
|
+
// Subset of buckets for which there may be new data in this batch.
|
|
125
|
+
let bucketsToFetch: string[];
|
|
125
126
|
|
|
126
|
-
if (
|
|
127
|
-
const diff = util.checksumsDiff(
|
|
127
|
+
if (lastChecksums) {
|
|
128
|
+
const diff = util.checksumsDiff(lastChecksums, checksumMap);
|
|
128
129
|
|
|
129
130
|
if (
|
|
130
|
-
|
|
131
|
-
diff.
|
|
132
|
-
diff.
|
|
131
|
+
lastWriteCheckpoint == writeCheckpoint &&
|
|
132
|
+
diff.removedBuckets.length == 0 &&
|
|
133
|
+
diff.updatedBuckets.length == 0
|
|
133
134
|
) {
|
|
134
135
|
// No changes - don't send anything to the client
|
|
135
136
|
continue;
|
|
136
137
|
}
|
|
138
|
+
bucketsToFetch = diff.updatedBuckets.map((c) => c.bucket);
|
|
137
139
|
|
|
138
140
|
let message = `Updated checkpoint: ${checkpoint} | write: ${writeCheckpoint} | `;
|
|
139
141
|
message += `buckets: ${allBuckets.length} | `;
|
|
140
|
-
message += `updated: ${limitedBuckets(diff.
|
|
141
|
-
message += `removed: ${limitedBuckets(diff.
|
|
142
|
+
message += `updated: ${limitedBuckets(diff.updatedBuckets, 20)} | `;
|
|
143
|
+
message += `removed: ${limitedBuckets(diff.removedBuckets, 20)} | `;
|
|
142
144
|
micro.logger.info(message);
|
|
143
145
|
|
|
144
146
|
const checksum_line: util.StreamingSyncCheckpointDiff = {
|
|
145
147
|
checkpoint_diff: {
|
|
146
148
|
last_op_id: checkpoint,
|
|
147
149
|
write_checkpoint: writeCheckpoint ? String(writeCheckpoint) : undefined,
|
|
148
|
-
|
|
150
|
+
removed_buckets: diff.removedBuckets,
|
|
151
|
+
updated_buckets: diff.updatedBuckets
|
|
149
152
|
}
|
|
150
153
|
};
|
|
151
154
|
|
|
@@ -154,35 +157,41 @@ async function* streamResponseInner(
|
|
|
154
157
|
let message = `New checkpoint: ${checkpoint} | write: ${writeCheckpoint} | `;
|
|
155
158
|
message += `buckets: ${allBuckets.length} ${limitedBuckets(allBuckets, 20)}`;
|
|
156
159
|
micro.logger.info(message);
|
|
160
|
+
bucketsToFetch = allBuckets;
|
|
157
161
|
const checksum_line: util.StreamingSyncCheckpoint = {
|
|
158
162
|
checkpoint: {
|
|
159
163
|
last_op_id: checkpoint,
|
|
160
164
|
write_checkpoint: writeCheckpoint ? String(writeCheckpoint) : undefined,
|
|
161
|
-
buckets:
|
|
165
|
+
buckets: [...checksumMap.values()]
|
|
162
166
|
}
|
|
163
167
|
};
|
|
164
168
|
yield checksum_line;
|
|
165
169
|
}
|
|
170
|
+
lastChecksums = checksumMap;
|
|
171
|
+
lastWriteCheckpoint = writeCheckpoint;
|
|
166
172
|
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
yield* bucketDataInBatches(storage, checkpoint, dataBuckets, raw_data, binary_data, signal);
|
|
173
|
+
// This incrementally updates dataBuckets with each individual bucket position.
|
|
174
|
+
// At the end of this, we can be sure that all buckets have data up to the checkpoint.
|
|
175
|
+
yield* bucketDataInBatches({ storage, checkpoint, bucketsToFetch, dataBuckets, raw_data, binary_data, signal });
|
|
171
176
|
|
|
172
177
|
await new Promise((resolve) => setTimeout(resolve, 10));
|
|
173
178
|
}
|
|
174
179
|
}
|
|
175
180
|
|
|
176
|
-
|
|
177
|
-
storage: storage.SyncRulesBucketStorage
|
|
178
|
-
checkpoint: string
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
181
|
+
interface BucketDataRequest {
|
|
182
|
+
storage: storage.SyncRulesBucketStorage;
|
|
183
|
+
checkpoint: string;
|
|
184
|
+
bucketsToFetch: string[];
|
|
185
|
+
/** Bucket data position, modified by the request. */
|
|
186
|
+
dataBuckets: Map<string, string>;
|
|
187
|
+
raw_data: boolean | undefined;
|
|
188
|
+
binary_data: boolean | undefined;
|
|
189
|
+
signal: AbortSignal;
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
async function* bucketDataInBatches(request: BucketDataRequest) {
|
|
184
193
|
let isDone = false;
|
|
185
|
-
while (!signal.aborted && !isDone) {
|
|
194
|
+
while (!request.signal.aborted && !isDone) {
|
|
186
195
|
// The code below is functionally the same as this for-await loop below.
|
|
187
196
|
// However, the for-await loop appears to have a memory leak, so we avoid it.
|
|
188
197
|
// for await (const { done, data } of bucketDataBatch(storage, checkpoint, dataBuckets, raw_data, signal)) {
|
|
@@ -192,7 +201,7 @@ async function* bucketDataInBatches(
|
|
|
192
201
|
// }
|
|
193
202
|
// break;
|
|
194
203
|
// }
|
|
195
|
-
const iter = bucketDataBatch(
|
|
204
|
+
const iter = bucketDataBatch(request);
|
|
196
205
|
try {
|
|
197
206
|
while (true) {
|
|
198
207
|
const { value, done: iterDone } = await iter.next();
|
|
@@ -215,17 +224,15 @@ async function* bucketDataInBatches(
|
|
|
215
224
|
/**
|
|
216
225
|
* Extracted as a separate internal function just to avoid memory leaks.
|
|
217
226
|
*/
|
|
218
|
-
async function* bucketDataBatch(
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
dataBuckets: Map<string, string>,
|
|
222
|
-
raw_data: boolean | undefined,
|
|
223
|
-
binary_data: boolean | undefined,
|
|
224
|
-
signal: AbortSignal
|
|
225
|
-
) {
|
|
227
|
+
async function* bucketDataBatch(request: BucketDataRequest) {
|
|
228
|
+
const { storage, checkpoint, bucketsToFetch, dataBuckets, raw_data, binary_data, signal } = request;
|
|
229
|
+
|
|
226
230
|
const [_, release] = await syncSemaphore.acquire();
|
|
227
231
|
try {
|
|
228
|
-
|
|
232
|
+
// Optimization: Only fetch buckets for which the checksums have changed since the last checkpoint
|
|
233
|
+
// For the first batch, this will be all buckets.
|
|
234
|
+
const filteredBuckets = new Map(bucketsToFetch.map((bucket) => [bucket, dataBuckets.get(bucket)!]));
|
|
235
|
+
const data = storage.getBucketDataBatch(checkpoint, filteredBuckets);
|
|
229
236
|
|
|
230
237
|
let has_more = false;
|
|
231
238
|
|
|
@@ -89,7 +89,12 @@ export const StreamingSyncRequest = t.object({
|
|
|
89
89
|
/**
|
|
90
90
|
* Data is received in a serialized BSON Buffer
|
|
91
91
|
*/
|
|
92
|
-
binary_data: t.boolean.optional()
|
|
92
|
+
binary_data: t.boolean.optional(),
|
|
93
|
+
|
|
94
|
+
/**
|
|
95
|
+
* Client parameters to be passed to the sync rules.
|
|
96
|
+
*/
|
|
97
|
+
parameters: t.record(t.any).optional()
|
|
93
98
|
});
|
|
94
99
|
|
|
95
100
|
export type StreamingSyncRequest = t.Decoded<typeof StreamingSyncRequest>;
|
package/src/util/utils.ts
CHANGED
|
@@ -7,6 +7,8 @@ import * as storage from '@/storage/storage-index.js';
|
|
|
7
7
|
import { BucketChecksum, OpId } from './protocol-types.js';
|
|
8
8
|
import { retriedQuery } from './pgwire_utils.js';
|
|
9
9
|
|
|
10
|
+
export type ChecksumMap = Map<string, BucketChecksum>;
|
|
11
|
+
|
|
10
12
|
export function hashData(type: string, id: string, data: string): number {
|
|
11
13
|
const hash = crypto.createHash('sha256');
|
|
12
14
|
hash.update(`put.${type}.${id}.${data}`);
|
|
@@ -30,32 +32,50 @@ export function timestampToOpId(ts: bigint): OpId {
|
|
|
30
32
|
return ts.toString(10);
|
|
31
33
|
}
|
|
32
34
|
|
|
33
|
-
export function checksumsDiff(previous:
|
|
34
|
-
|
|
35
|
+
export function checksumsDiff(previous: ChecksumMap, current: ChecksumMap) {
|
|
36
|
+
// All changed ones
|
|
37
|
+
const updatedBuckets = new Map<string, BucketChecksum>();
|
|
35
38
|
|
|
36
|
-
const
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
39
|
+
const toRemove = new Set<string>(previous.keys());
|
|
40
|
+
|
|
41
|
+
for (let checksum of current.values()) {
|
|
42
|
+
const p = previous.get(checksum.bucket);
|
|
43
|
+
if (p == null) {
|
|
44
|
+
// Added
|
|
45
|
+
updatedBuckets.set(checksum.bucket, checksum);
|
|
43
46
|
} else {
|
|
44
|
-
|
|
45
|
-
if (
|
|
46
|
-
|
|
47
|
+
toRemove.delete(checksum.bucket);
|
|
48
|
+
if (checksum.checksum != p.checksum || checksum.count != p.count) {
|
|
49
|
+
// Updated
|
|
50
|
+
updatedBuckets.set(checksum.bucket, checksum);
|
|
51
|
+
} else {
|
|
52
|
+
// No change
|
|
47
53
|
}
|
|
48
|
-
previousBuckets.delete(checksum.bucket);
|
|
49
54
|
}
|
|
50
55
|
}
|
|
51
56
|
|
|
52
|
-
const removed_buckets: string[] = [...previousBuckets.keys()];
|
|
53
57
|
return {
|
|
54
|
-
|
|
55
|
-
|
|
58
|
+
updatedBuckets: [...updatedBuckets.values()],
|
|
59
|
+
removedBuckets: [...toRemove]
|
|
56
60
|
};
|
|
57
61
|
}
|
|
58
62
|
|
|
63
|
+
export function addChecksums(a: number, b: number) {
|
|
64
|
+
return (a + b) & 0xffffffff;
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
export function addBucketChecksums(a: BucketChecksum, b: BucketChecksum | null): BucketChecksum {
|
|
68
|
+
if (b == null) {
|
|
69
|
+
return a;
|
|
70
|
+
} else {
|
|
71
|
+
return {
|
|
72
|
+
bucket: a.bucket,
|
|
73
|
+
count: a.count + b.count,
|
|
74
|
+
checksum: addChecksums(a.checksum, b.checksum)
|
|
75
|
+
};
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
|
|
59
79
|
export async function getClientCheckpoint(
|
|
60
80
|
db: pgwire.PgClient,
|
|
61
81
|
bucketStorage: storage.BucketStorageFactory,
|
|
@@ -12,7 +12,13 @@ exports[`sync - mongodb > expiring token 1`] = `
|
|
|
12
12
|
[
|
|
13
13
|
{
|
|
14
14
|
"checkpoint": {
|
|
15
|
-
"buckets": [
|
|
15
|
+
"buckets": [
|
|
16
|
+
{
|
|
17
|
+
"bucket": "mybucket[]",
|
|
18
|
+
"checksum": 0,
|
|
19
|
+
"count": 0,
|
|
20
|
+
},
|
|
21
|
+
],
|
|
16
22
|
"last_op_id": "0",
|
|
17
23
|
"write_checkpoint": undefined,
|
|
18
24
|
},
|
|
@@ -135,7 +141,13 @@ exports[`sync - mongodb > sync updates to global data 1`] = `
|
|
|
135
141
|
[
|
|
136
142
|
{
|
|
137
143
|
"checkpoint": {
|
|
138
|
-
"buckets": [
|
|
144
|
+
"buckets": [
|
|
145
|
+
{
|
|
146
|
+
"bucket": "mybucket[]",
|
|
147
|
+
"checksum": 0,
|
|
148
|
+
"count": 0,
|
|
149
|
+
},
|
|
150
|
+
],
|
|
139
151
|
"last_op_id": "0",
|
|
140
152
|
"write_checkpoint": undefined,
|
|
141
153
|
},
|