@powersync/service-core 0.1.3 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. package/CHANGELOG.md +14 -0
  2. package/dist/routes/socket-route.js +1 -1
  3. package/dist/routes/socket-route.js.map +1 -1
  4. package/dist/routes/sync-stream.js +1 -1
  5. package/dist/routes/sync-stream.js.map +1 -1
  6. package/dist/storage/BucketStorage.d.ts +6 -1
  7. package/dist/storage/BucketStorage.js.map +1 -1
  8. package/dist/storage/ChecksumCache.d.ts +50 -0
  9. package/dist/storage/ChecksumCache.js +234 -0
  10. package/dist/storage/ChecksumCache.js.map +1 -0
  11. package/dist/storage/mongo/MongoSyncBucketStorage.d.ts +3 -1
  12. package/dist/storage/mongo/MongoSyncBucketStorage.js +26 -14
  13. package/dist/storage/mongo/MongoSyncBucketStorage.js.map +1 -1
  14. package/dist/sync/sync.js +32 -21
  15. package/dist/sync/sync.js.map +1 -1
  16. package/dist/util/protocol-types.d.ts +4 -0
  17. package/dist/util/protocol-types.js +5 -1
  18. package/dist/util/protocol-types.js.map +1 -1
  19. package/dist/util/utils.d.ts +6 -3
  20. package/dist/util/utils.js +32 -15
  21. package/dist/util/utils.js.map +1 -1
  22. package/package.json +6 -5
  23. package/src/routes/socket-route.ts +5 -1
  24. package/src/routes/sync-stream.ts +4 -1
  25. package/src/storage/BucketStorage.ts +6 -1
  26. package/src/storage/ChecksumCache.ts +294 -0
  27. package/src/storage/mongo/MongoSyncBucketStorage.ts +31 -15
  28. package/src/sync/sync.ts +44 -37
  29. package/src/util/protocol-types.ts +6 -1
  30. package/src/util/utils.ts +36 -16
  31. package/test/src/__snapshots__/sync.test.ts.snap +14 -2
  32. package/test/src/checksum_cache.test.ts +436 -0
  33. package/test/src/data_storage.test.ts +3 -3
  34. package/test/src/large_batch.test.ts +4 -4
  35. package/test/src/sync_rules.test.ts +11 -9
  36. package/tsconfig.tsbuildinfo +1 -1
@@ -0,0 +1,294 @@
1
+ import { BucketChecksum, OpId } from '@/util/protocol-types.js';
2
+ import { ChecksumMap, addBucketChecksums } from '@/util/utils.js';
3
+ import { LRUCache } from 'lru-cache/min';
4
+ import { OrderedSet } from '@js-sdsl/ordered-set';
5
+
6
+ interface ChecksumFetchContext {
7
+ fetch(bucket: string): Promise<BucketChecksum>;
8
+ checkpoint: bigint;
9
+ }
10
+
11
+ export interface FetchPartialBucketChecksum {
12
+ bucket: string;
13
+ start?: OpId;
14
+ end: OpId;
15
+ }
16
+
17
+ export type FetchChecksums = (batch: FetchPartialBucketChecksum[]) => Promise<ChecksumMap>;
18
+
19
+ export interface ChecksumCacheOptions {
20
+ /**
21
+ * Upstream checksum implementation.
22
+ *
23
+ * This fetches a batch of either entire bucket checksums, or a partial range.
24
+ */
25
+ fetchChecksums: FetchChecksums;
26
+
27
+ /**
28
+ * Maximum number of cached checksums.
29
+ */
30
+ maxSize?: number;
31
+ }
32
+
33
+ // Approximately 5MB of memory, if we assume 50 bytes per entry
34
+ const DEFAULT_MAX_SIZE = 100_000;
35
+
36
+ /**
37
+ * Implement a LRU cache for checksum requests. Each (bucket, checkpoint) request is cached separately,
38
+ * while the lookups occur in batches.
39
+ *
40
+ * For each bucket, we keep a separate OrderedSet of cached checkpoints.
41
+ * This allows us to do incrementally update checksums by using the last cached checksum for the same bucket.
42
+ *
43
+ * We use the LRUCache fetchMethod to deduplicate in-progress requests.
44
+ */
45
+ export class ChecksumCache {
46
+ /**
47
+ * The primary checksum cache, with key of `${checkpoint}/${bucket}`.
48
+ */
49
+ private cache: LRUCache<string, BucketChecksum, ChecksumFetchContext>;
50
+ /**
51
+ * For each bucket, an ordered set of cached checkpoints.
52
+ */
53
+ private bucketCheckpoints = new Map<string, OrderedSet<bigint>>();
54
+
55
+ private fetchChecksums: FetchChecksums;
56
+
57
+ constructor(options: ChecksumCacheOptions) {
58
+ this.fetchChecksums = options.fetchChecksums;
59
+
60
+ this.cache = new LRUCache<string, BucketChecksum, ChecksumFetchContext>({
61
+ max: options.maxSize ?? DEFAULT_MAX_SIZE,
62
+ fetchMethod: async (cacheKey, _staleValue, options) => {
63
+ // Called when this checksum hasn't been cached yet.
64
+ // Pass the call back to the request, which implements batch fetching.
65
+ const { bucket } = parseCacheKey(cacheKey);
66
+ const result = await options.context.fetch(bucket);
67
+
68
+ // Add to the set of cached checkpoints for the bucket.
69
+ let checkpointSet = this.bucketCheckpoints.get(bucket);
70
+ if (checkpointSet == null) {
71
+ checkpointSet = new OrderedSet();
72
+ this.bucketCheckpoints.set(bucket, checkpointSet);
73
+ }
74
+ checkpointSet.insert(options.context.checkpoint);
75
+ return result;
76
+ },
77
+
78
+ dispose: (value, key) => {
79
+ // Remove from the set of cached checkpoints for the bucket
80
+ const { checkpointString } = parseCacheKey(key);
81
+ const checkpoint = BigInt(checkpointString);
82
+ const checkpointSet = this.bucketCheckpoints.get(value.bucket);
83
+ if (checkpointSet == null) {
84
+ return;
85
+ }
86
+ checkpointSet.eraseElementByKey(checkpoint);
87
+ if (checkpointSet.length == 0) {
88
+ this.bucketCheckpoints.delete(value.bucket);
89
+ }
90
+ },
91
+
92
+ noDisposeOnSet: true,
93
+
94
+ // When we have more fetches than the cache size, complete the fetches instead
95
+ // of failing with Error('evicted').
96
+ ignoreFetchAbort: true
97
+ });
98
+ }
99
+
100
+ async getChecksums(checkpoint: OpId, buckets: string[]): Promise<BucketChecksum[]> {
101
+ const checksums = await this.getChecksumMap(checkpoint, buckets);
102
+ // Return results in the same order as the request
103
+ return buckets.map((bucket) => checksums.get(bucket)!);
104
+ }
105
+
106
+ /**
107
+ * Get bucket checksums for a checkpoint.
108
+ *
109
+ * Any checksums not found upstream are returned as zero checksums.
110
+ *
111
+ * @returns a Map with exactly one entry for each bucket requested
112
+ */
113
+ async getChecksumMap(checkpoint: OpId, buckets: string[]): Promise<ChecksumMap> {
114
+ // Buckets that don't have a cached checksum for this checkpoint yet
115
+ let toFetch = new Set<string>();
116
+
117
+ // Newly fetched results
118
+ let fetchResults = new Map<string, BucketChecksum>();
119
+
120
+ // Promise for the bactch new fetch requests
121
+ let resolveFetch!: () => void;
122
+ let rejectFetch!: (err: any) => void;
123
+ let fetchPromise = new Promise<void>((resolve, reject) => {
124
+ resolveFetch = resolve;
125
+ rejectFetch = reject;
126
+ });
127
+
128
+ // Accumulated results - both from cached checksums, and fetched checksums
129
+ let finalResults = new Map<string, BucketChecksum>();
130
+
131
+ const context: ChecksumFetchContext = {
132
+ async fetch(bucket) {
133
+ await fetchPromise;
134
+ if (!toFetch.has(bucket)) {
135
+ // Should never happen
136
+ throw new Error(`Expected to fetch ${bucket}`);
137
+ }
138
+ const checksum = fetchResults.get(bucket);
139
+ if (checksum == null) {
140
+ // Should never happen
141
+ throw new Error(`Failed to fetch checksum for bucket ${bucket}`);
142
+ }
143
+ return checksum;
144
+ },
145
+ checkpoint: BigInt(checkpoint)
146
+ };
147
+
148
+ // One promise to await to ensure all fetch requests completed.
149
+ let settledPromise: Promise<PromiseSettledResult<void>[]> | null = null;
150
+
151
+ try {
152
+ // Individual cache fetch promises
153
+ let cacheFetchPromises: Promise<void>[] = [];
154
+
155
+ for (let bucket of buckets) {
156
+ const cacheKey = makeCacheKey(checkpoint, bucket);
157
+ let status: LRUCache.Status<BucketChecksum> = {};
158
+ const p = this.cache.fetch(cacheKey, { context: context, status: status }).then((checksums) => {
159
+ if (checksums == null) {
160
+ // Should never happen
161
+ throw new Error(`Failed to get checksums for ${cacheKey}`);
162
+ }
163
+ finalResults.set(bucket, checksums);
164
+ });
165
+ cacheFetchPromises.push(p);
166
+ if (status.fetch == 'hit' || status.fetch == 'inflight') {
167
+ // The checksums is either cached already (hit), or another request is busy
168
+ // fetching (inflight).
169
+ // In either case, we don't need to fetch a new checksum.
170
+ } else {
171
+ // We need a new request for this checksum.
172
+ toFetch.add(bucket);
173
+ }
174
+ }
175
+ // We do this directly after creating the promises, otherwise
176
+ // we could end up with weird uncaught rejection errors.
177
+ settledPromise = Promise.allSettled(cacheFetchPromises);
178
+
179
+ if (toFetch.size == 0) {
180
+ // Nothing to fetch, but resolve in case
181
+ resolveFetch();
182
+ } else {
183
+ let bucketRequests: FetchPartialBucketChecksum[] = [];
184
+ // Partial checksum (previously cached) to add to the partial fetch
185
+ let add = new Map<string, BucketChecksum>();
186
+
187
+ for (let bucket of toFetch) {
188
+ let bucketRequest: FetchPartialBucketChecksum | null = null;
189
+ const checkpointSet = this.bucketCheckpoints.get(bucket);
190
+ if (checkpointSet != null) {
191
+ // Find smaller checkpoints, sorted in descending order
192
+ let iter = checkpointSet.reverseUpperBound(context.checkpoint);
193
+ const begin = checkpointSet.begin();
194
+ while (iter.isAccessible()) {
195
+ const cp = iter.pointer;
196
+ const cacheKey = makeCacheKey(cp, bucket);
197
+ // peek to avoid refreshing the key
198
+ const cached = this.cache.peek(cacheKey);
199
+ // As long as dispose() works correctly, the checkpointset should
200
+ // match up with the cache, and `cached` should also have a value here.
201
+ // However, we handle caces where it's not present either way.
202
+ // Test by disabling the `dispose()` callback.
203
+ if (cached != null) {
204
+ // Partial checksum found - make a partial checksum request
205
+ bucketRequest = {
206
+ bucket,
207
+ start: cp.toString(),
208
+ end: checkpoint
209
+ };
210
+ add.set(bucket, cached);
211
+ break;
212
+ }
213
+
214
+ if (iter.equals(begin)) {
215
+ // Cannot iterate further
216
+ break;
217
+ }
218
+ // Iterate backwards
219
+ iter = iter.pre();
220
+ }
221
+ }
222
+
223
+ if (bucketRequest == null) {
224
+ // No partial checksum found - make a new full checksum request
225
+ bucketRequest = {
226
+ bucket,
227
+ end: checkpoint
228
+ };
229
+ add.set(bucket, {
230
+ bucket,
231
+ checksum: 0,
232
+ count: 0
233
+ });
234
+ }
235
+ bucketRequests.push(bucketRequest);
236
+ }
237
+
238
+ // Fetch partial checksums from upstream
239
+ const results = await this.fetchChecksums(bucketRequests);
240
+
241
+ for (let bucket of toFetch) {
242
+ const result = results.get(bucket);
243
+ const toAdd = add.get(bucket);
244
+ if (toAdd == null) {
245
+ // Should never happen
246
+ throw new Error(`toAdd null for ${bucket}`);
247
+ }
248
+ // Compute the full checksum from the two partials.
249
+ // No results returned are treated the same as a zero result.
250
+ const added = addBucketChecksums(toAdd, result ?? null);
251
+ fetchResults.set(bucket, added);
252
+ }
253
+
254
+ // fetchResults is fully populated, so we resolve the Promise
255
+ resolveFetch();
256
+ }
257
+ } catch (e) {
258
+ // Failure when fetching checksums - reject the Promise.
259
+ // This will reject all individual cache fetch requests, and each will be retried
260
+ // on the next request.
261
+ rejectFetch(e);
262
+
263
+ // Wait for the above rejection to propagate, otherwise we end up with "uncaught" errors.
264
+ // This promise never throws.
265
+ await settledPromise;
266
+
267
+ throw e;
268
+ }
269
+
270
+ // Wait for all cache fetch reqeusts to complete
271
+ const settledResults = (await settledPromise) ?? [];
272
+ // Check if any of them failed
273
+ for (let result of settledResults) {
274
+ if (result.status == 'rejected') {
275
+ throw result.reason;
276
+ }
277
+ }
278
+
279
+ if (finalResults.size != buckets.length) {
280
+ // Should not happen
281
+ throw new Error(`Bucket results mismatch: ${finalResults.size} != ${buckets.length}`);
282
+ }
283
+ return finalResults;
284
+ }
285
+ }
286
+
287
+ function makeCacheKey(checkpoint: bigint | string, bucket: string) {
288
+ return `${checkpoint}/${bucket}`;
289
+ }
290
+
291
+ function parseCacheKey(key: string) {
292
+ const index = key.indexOf('/');
293
+ return { checkpointString: key.substring(0, index), bucket: key.substring(index + 1) };
294
+ }
@@ -22,9 +22,15 @@ import { PowerSyncMongo } from './db.js';
22
22
  import { BucketDataDocument, BucketDataKey, SourceKey, SyncRuleState } from './models.js';
23
23
  import { MongoBucketBatch } from './MongoBucketBatch.js';
24
24
  import { BSON_DESERIALIZE_OPTIONS, idPrefixFilter, readSingleBatch, serializeLookup } from './util.js';
25
+ import { ChecksumCache, FetchPartialBucketChecksum } from '../ChecksumCache.js';
25
26
 
26
27
  export class MongoSyncBucketStorage implements SyncRulesBucketStorage {
27
28
  private readonly db: PowerSyncMongo;
29
+ private checksumCache = new ChecksumCache({
30
+ fetchChecksums: (batch) => {
31
+ return this.getChecksumsInternal(batch);
32
+ }
33
+ });
28
34
 
29
35
  constructor(
30
36
  public readonly factory: MongoBucketStorage,
@@ -316,23 +322,28 @@ export class MongoSyncBucketStorage implements SyncRulesBucketStorage {
316
322
  }
317
323
  }
318
324
 
319
- async getChecksums(checkpoint: util.OpId, buckets: string[]): Promise<util.BucketChecksum[]> {
320
- if (buckets.length == 0) {
321
- return [];
325
+ async getChecksums(checkpoint: util.OpId, buckets: string[]): Promise<util.ChecksumMap> {
326
+ return this.checksumCache.getChecksumMap(checkpoint, buckets);
327
+ }
328
+
329
+ private async getChecksumsInternal(batch: FetchPartialBucketChecksum[]): Promise<util.ChecksumMap> {
330
+ if (batch.length == 0) {
331
+ return new Map();
322
332
  }
333
+
323
334
  const filters: any[] = [];
324
- for (let name of buckets) {
335
+ for (let request of batch) {
325
336
  filters.push({
326
337
  _id: {
327
338
  $gt: {
328
339
  g: this.group_id,
329
- b: name,
330
- o: new bson.MinKey()
340
+ b: request.bucket,
341
+ o: request.start ? BigInt(request.start) : new bson.MinKey()
331
342
  },
332
343
  $lte: {
333
344
  g: this.group_id,
334
- b: name,
335
- o: BigInt(checkpoint)
345
+ b: request.bucket,
346
+ o: BigInt(request.end)
336
347
  }
337
348
  }
338
349
  });
@@ -354,13 +365,18 @@ export class MongoSyncBucketStorage implements SyncRulesBucketStorage {
354
365
  )
355
366
  .toArray();
356
367
 
357
- return aggregate.map((doc) => {
358
- return {
359
- bucket: doc._id,
360
- count: doc.count,
361
- checksum: Number(BigInt(doc.checksum_total) & 0xffffffffn) & 4294967295
362
- };
363
- });
368
+ return new Map<string, util.BucketChecksum>(
369
+ aggregate.map((doc) => {
370
+ return [
371
+ doc._id,
372
+ {
373
+ bucket: doc._id,
374
+ count: doc.count,
375
+ checksum: Number(BigInt(doc.checksum_total) & 0xffffffffn) & 0xffffffff
376
+ } satisfies util.BucketChecksum
377
+ ];
378
+ })
379
+ );
364
380
  }
365
381
 
366
382
  async terminate() {
package/src/sync/sync.ts CHANGED
@@ -78,8 +78,8 @@ async function* streamResponseInner(
78
78
  // This starts with the state from the client. May contain buckets that the user do not have access to (anymore).
79
79
  let dataBuckets = new Map<string, string>();
80
80
 
81
- let last_checksums: util.BucketChecksum[] | null = null;
82
- let last_write_checkpoint: bigint | null = null;
81
+ let lastChecksums: util.ChecksumMap | null = null;
82
+ let lastWriteCheckpoint: bigint | null = null;
83
83
 
84
84
  const { raw_data, binary_data } = params;
85
85
 
@@ -113,39 +113,42 @@ async function* streamResponseInner(
113
113
  throw new Error(`Too many buckets: ${allBuckets.length}`);
114
114
  }
115
115
 
116
- let checksums: util.BucketChecksum[] | undefined = undefined;
117
-
118
116
  let dataBucketsNew = new Map<string, string>();
119
117
  for (let bucket of allBuckets) {
120
118
  dataBucketsNew.set(bucket, dataBuckets.get(bucket) ?? '0');
121
119
  }
122
120
  dataBuckets = dataBucketsNew;
123
121
 
124
- checksums = await storage.getChecksums(checkpoint, [...dataBuckets.keys()]);
122
+ const bucketList = [...dataBuckets.keys()];
123
+ const checksumMap = await storage.getChecksums(checkpoint, bucketList);
124
+ // Subset of buckets for which there may be new data in this batch.
125
+ let bucketsToFetch: string[];
125
126
 
126
- if (last_checksums) {
127
- const diff = util.checksumsDiff(last_checksums, checksums);
127
+ if (lastChecksums) {
128
+ const diff = util.checksumsDiff(lastChecksums, checksumMap);
128
129
 
129
130
  if (
130
- last_write_checkpoint == writeCheckpoint &&
131
- diff.removed_buckets.length == 0 &&
132
- diff.updated_buckets.length == 0
131
+ lastWriteCheckpoint == writeCheckpoint &&
132
+ diff.removedBuckets.length == 0 &&
133
+ diff.updatedBuckets.length == 0
133
134
  ) {
134
135
  // No changes - don't send anything to the client
135
136
  continue;
136
137
  }
138
+ bucketsToFetch = diff.updatedBuckets.map((c) => c.bucket);
137
139
 
138
140
  let message = `Updated checkpoint: ${checkpoint} | write: ${writeCheckpoint} | `;
139
141
  message += `buckets: ${allBuckets.length} | `;
140
- message += `updated: ${limitedBuckets(diff.updated_buckets, 20)} | `;
141
- message += `removed: ${limitedBuckets(diff.removed_buckets, 20)} | `;
142
+ message += `updated: ${limitedBuckets(diff.updatedBuckets, 20)} | `;
143
+ message += `removed: ${limitedBuckets(diff.removedBuckets, 20)} | `;
142
144
  micro.logger.info(message);
143
145
 
144
146
  const checksum_line: util.StreamingSyncCheckpointDiff = {
145
147
  checkpoint_diff: {
146
148
  last_op_id: checkpoint,
147
149
  write_checkpoint: writeCheckpoint ? String(writeCheckpoint) : undefined,
148
- ...diff
150
+ removed_buckets: diff.removedBuckets,
151
+ updated_buckets: diff.updatedBuckets
149
152
  }
150
153
  };
151
154
 
@@ -154,35 +157,41 @@ async function* streamResponseInner(
154
157
  let message = `New checkpoint: ${checkpoint} | write: ${writeCheckpoint} | `;
155
158
  message += `buckets: ${allBuckets.length} ${limitedBuckets(allBuckets, 20)}`;
156
159
  micro.logger.info(message);
160
+ bucketsToFetch = allBuckets;
157
161
  const checksum_line: util.StreamingSyncCheckpoint = {
158
162
  checkpoint: {
159
163
  last_op_id: checkpoint,
160
164
  write_checkpoint: writeCheckpoint ? String(writeCheckpoint) : undefined,
161
- buckets: checksums
165
+ buckets: [...checksumMap.values()]
162
166
  }
163
167
  };
164
168
  yield checksum_line;
165
169
  }
170
+ lastChecksums = checksumMap;
171
+ lastWriteCheckpoint = writeCheckpoint;
166
172
 
167
- last_checksums = checksums;
168
- last_write_checkpoint = writeCheckpoint;
169
-
170
- yield* bucketDataInBatches(storage, checkpoint, dataBuckets, raw_data, binary_data, signal);
173
+ // This incrementally updates dataBuckets with each individual bucket position.
174
+ // At the end of this, we can be sure that all buckets have data up to the checkpoint.
175
+ yield* bucketDataInBatches({ storage, checkpoint, bucketsToFetch, dataBuckets, raw_data, binary_data, signal });
171
176
 
172
177
  await new Promise((resolve) => setTimeout(resolve, 10));
173
178
  }
174
179
  }
175
180
 
176
- async function* bucketDataInBatches(
177
- storage: storage.SyncRulesBucketStorage,
178
- checkpoint: string,
179
- dataBuckets: Map<string, string>,
180
- raw_data: boolean | undefined,
181
- binary_data: boolean | undefined,
182
- signal: AbortSignal
183
- ) {
181
+ interface BucketDataRequest {
182
+ storage: storage.SyncRulesBucketStorage;
183
+ checkpoint: string;
184
+ bucketsToFetch: string[];
185
+ /** Bucket data position, modified by the request. */
186
+ dataBuckets: Map<string, string>;
187
+ raw_data: boolean | undefined;
188
+ binary_data: boolean | undefined;
189
+ signal: AbortSignal;
190
+ }
191
+
192
+ async function* bucketDataInBatches(request: BucketDataRequest) {
184
193
  let isDone = false;
185
- while (!signal.aborted && !isDone) {
194
+ while (!request.signal.aborted && !isDone) {
186
195
  // The code below is functionally the same as this for-await loop below.
187
196
  // However, the for-await loop appears to have a memory leak, so we avoid it.
188
197
  // for await (const { done, data } of bucketDataBatch(storage, checkpoint, dataBuckets, raw_data, signal)) {
@@ -192,7 +201,7 @@ async function* bucketDataInBatches(
192
201
  // }
193
202
  // break;
194
203
  // }
195
- const iter = bucketDataBatch(storage, checkpoint, dataBuckets, raw_data, binary_data, signal);
204
+ const iter = bucketDataBatch(request);
196
205
  try {
197
206
  while (true) {
198
207
  const { value, done: iterDone } = await iter.next();
@@ -215,17 +224,15 @@ async function* bucketDataInBatches(
215
224
  /**
216
225
  * Extracted as a separate internal function just to avoid memory leaks.
217
226
  */
218
- async function* bucketDataBatch(
219
- storage: storage.SyncRulesBucketStorage,
220
- checkpoint: string,
221
- dataBuckets: Map<string, string>,
222
- raw_data: boolean | undefined,
223
- binary_data: boolean | undefined,
224
- signal: AbortSignal
225
- ) {
227
+ async function* bucketDataBatch(request: BucketDataRequest) {
228
+ const { storage, checkpoint, bucketsToFetch, dataBuckets, raw_data, binary_data, signal } = request;
229
+
226
230
  const [_, release] = await syncSemaphore.acquire();
227
231
  try {
228
- const data = storage.getBucketDataBatch(checkpoint, dataBuckets);
232
+ // Optimization: Only fetch buckets for which the checksums have changed since the last checkpoint
233
+ // For the first batch, this will be all buckets.
234
+ const filteredBuckets = new Map(bucketsToFetch.map((bucket) => [bucket, dataBuckets.get(bucket)!]));
235
+ const data = storage.getBucketDataBatch(checkpoint, filteredBuckets);
229
236
 
230
237
  let has_more = false;
231
238
 
@@ -89,7 +89,12 @@ export const StreamingSyncRequest = t.object({
89
89
  /**
90
90
  * Data is received in a serialized BSON Buffer
91
91
  */
92
- binary_data: t.boolean.optional()
92
+ binary_data: t.boolean.optional(),
93
+
94
+ /**
95
+ * Client parameters to be passed to the sync rules.
96
+ */
97
+ parameters: t.record(t.any).optional()
93
98
  });
94
99
 
95
100
  export type StreamingSyncRequest = t.Decoded<typeof StreamingSyncRequest>;
package/src/util/utils.ts CHANGED
@@ -7,6 +7,8 @@ import * as storage from '@/storage/storage-index.js';
7
7
  import { BucketChecksum, OpId } from './protocol-types.js';
8
8
  import { retriedQuery } from './pgwire_utils.js';
9
9
 
10
+ export type ChecksumMap = Map<string, BucketChecksum>;
11
+
10
12
  export function hashData(type: string, id: string, data: string): number {
11
13
  const hash = crypto.createHash('sha256');
12
14
  hash.update(`put.${type}.${id}.${data}`);
@@ -30,32 +32,50 @@ export function timestampToOpId(ts: bigint): OpId {
30
32
  return ts.toString(10);
31
33
  }
32
34
 
33
- export function checksumsDiff(previous: BucketChecksum[], current: BucketChecksum[]) {
34
- const updated_buckets: BucketChecksum[] = [];
35
+ export function checksumsDiff(previous: ChecksumMap, current: ChecksumMap) {
36
+ // All changed ones
37
+ const updatedBuckets = new Map<string, BucketChecksum>();
35
38
 
36
- const previousBuckets = new Map<string, BucketChecksum>();
37
- for (let checksum of previous) {
38
- previousBuckets.set(checksum.bucket, checksum);
39
- }
40
- for (let checksum of current) {
41
- if (!previousBuckets.has(checksum.bucket)) {
42
- updated_buckets.push(checksum);
39
+ const toRemove = new Set<string>(previous.keys());
40
+
41
+ for (let checksum of current.values()) {
42
+ const p = previous.get(checksum.bucket);
43
+ if (p == null) {
44
+ // Added
45
+ updatedBuckets.set(checksum.bucket, checksum);
43
46
  } else {
44
- const p = previousBuckets.get(checksum.bucket);
45
- if (p?.checksum != checksum.checksum || p?.count != checksum.count) {
46
- updated_buckets.push(checksum);
47
+ toRemove.delete(checksum.bucket);
48
+ if (checksum.checksum != p.checksum || checksum.count != p.count) {
49
+ // Updated
50
+ updatedBuckets.set(checksum.bucket, checksum);
51
+ } else {
52
+ // No change
47
53
  }
48
- previousBuckets.delete(checksum.bucket);
49
54
  }
50
55
  }
51
56
 
52
- const removed_buckets: string[] = [...previousBuckets.keys()];
53
57
  return {
54
- updated_buckets,
55
- removed_buckets
58
+ updatedBuckets: [...updatedBuckets.values()],
59
+ removedBuckets: [...toRemove]
56
60
  };
57
61
  }
58
62
 
63
+ export function addChecksums(a: number, b: number) {
64
+ return (a + b) & 0xffffffff;
65
+ }
66
+
67
+ export function addBucketChecksums(a: BucketChecksum, b: BucketChecksum | null): BucketChecksum {
68
+ if (b == null) {
69
+ return a;
70
+ } else {
71
+ return {
72
+ bucket: a.bucket,
73
+ count: a.count + b.count,
74
+ checksum: addChecksums(a.checksum, b.checksum)
75
+ };
76
+ }
77
+ }
78
+
59
79
  export async function getClientCheckpoint(
60
80
  db: pgwire.PgClient,
61
81
  bucketStorage: storage.BucketStorageFactory,
@@ -12,7 +12,13 @@ exports[`sync - mongodb > expiring token 1`] = `
12
12
  [
13
13
  {
14
14
  "checkpoint": {
15
- "buckets": [],
15
+ "buckets": [
16
+ {
17
+ "bucket": "mybucket[]",
18
+ "checksum": 0,
19
+ "count": 0,
20
+ },
21
+ ],
16
22
  "last_op_id": "0",
17
23
  "write_checkpoint": undefined,
18
24
  },
@@ -135,7 +141,13 @@ exports[`sync - mongodb > sync updates to global data 1`] = `
135
141
  [
136
142
  {
137
143
  "checkpoint": {
138
- "buckets": [],
144
+ "buckets": [
145
+ {
146
+ "bucket": "mybucket[]",
147
+ "checksum": 0,
148
+ "count": 0,
149
+ },
150
+ ],
139
151
  "last_op_id": "0",
140
152
  "write_checkpoint": undefined,
141
153
  },