@powersync/service-core 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/sync/sync.ts CHANGED
@@ -78,8 +78,8 @@ async function* streamResponseInner(
78
78
  // This starts with the state from the client. May contain buckets that the user do not have access to (anymore).
79
79
  let dataBuckets = new Map<string, string>();
80
80
 
81
- let last_checksums: util.BucketChecksum[] | null = null;
82
- let last_write_checkpoint: bigint | null = null;
81
+ let lastChecksums: util.ChecksumMap | null = null;
82
+ let lastWriteCheckpoint: bigint | null = null;
83
83
 
84
84
  const { raw_data, binary_data } = params;
85
85
 
@@ -113,39 +113,42 @@ async function* streamResponseInner(
113
113
  throw new Error(`Too many buckets: ${allBuckets.length}`);
114
114
  }
115
115
 
116
- let checksums: util.BucketChecksum[] | undefined = undefined;
117
-
118
116
  let dataBucketsNew = new Map<string, string>();
119
117
  for (let bucket of allBuckets) {
120
118
  dataBucketsNew.set(bucket, dataBuckets.get(bucket) ?? '0');
121
119
  }
122
120
  dataBuckets = dataBucketsNew;
123
121
 
124
- checksums = await storage.getChecksums(checkpoint, [...dataBuckets.keys()]);
122
+ const bucketList = [...dataBuckets.keys()];
123
+ const checksumMap = await storage.getChecksums(checkpoint, bucketList);
124
+ // Subset of buckets for which there may be new data in this batch.
125
+ let bucketsToFetch: string[];
125
126
 
126
- if (last_checksums) {
127
- const diff = util.checksumsDiff(last_checksums, checksums);
127
+ if (lastChecksums) {
128
+ const diff = util.checksumsDiff(lastChecksums, checksumMap);
128
129
 
129
130
  if (
130
- last_write_checkpoint == writeCheckpoint &&
131
- diff.removed_buckets.length == 0 &&
132
- diff.updated_buckets.length == 0
131
+ lastWriteCheckpoint == writeCheckpoint &&
132
+ diff.removedBuckets.length == 0 &&
133
+ diff.updatedBuckets.length == 0
133
134
  ) {
134
135
  // No changes - don't send anything to the client
135
136
  continue;
136
137
  }
138
+ bucketsToFetch = diff.updatedBuckets.map((c) => c.bucket);
137
139
 
138
140
  let message = `Updated checkpoint: ${checkpoint} | write: ${writeCheckpoint} | `;
139
141
  message += `buckets: ${allBuckets.length} | `;
140
- message += `updated: ${limitedBuckets(diff.updated_buckets, 20)} | `;
141
- message += `removed: ${limitedBuckets(diff.removed_buckets, 20)} | `;
142
+ message += `updated: ${limitedBuckets(diff.updatedBuckets, 20)} | `;
143
+ message += `removed: ${limitedBuckets(diff.removedBuckets, 20)} | `;
142
144
  micro.logger.info(message);
143
145
 
144
146
  const checksum_line: util.StreamingSyncCheckpointDiff = {
145
147
  checkpoint_diff: {
146
148
  last_op_id: checkpoint,
147
149
  write_checkpoint: writeCheckpoint ? String(writeCheckpoint) : undefined,
148
- ...diff
150
+ removed_buckets: diff.removedBuckets,
151
+ updated_buckets: diff.updatedBuckets
149
152
  }
150
153
  };
151
154
 
@@ -154,35 +157,41 @@ async function* streamResponseInner(
154
157
  let message = `New checkpoint: ${checkpoint} | write: ${writeCheckpoint} | `;
155
158
  message += `buckets: ${allBuckets.length} ${limitedBuckets(allBuckets, 20)}`;
156
159
  micro.logger.info(message);
160
+ bucketsToFetch = allBuckets;
157
161
  const checksum_line: util.StreamingSyncCheckpoint = {
158
162
  checkpoint: {
159
163
  last_op_id: checkpoint,
160
164
  write_checkpoint: writeCheckpoint ? String(writeCheckpoint) : undefined,
161
- buckets: checksums
165
+ buckets: [...checksumMap.values()]
162
166
  }
163
167
  };
164
168
  yield checksum_line;
165
169
  }
170
+ lastChecksums = checksumMap;
171
+ lastWriteCheckpoint = writeCheckpoint;
166
172
 
167
- last_checksums = checksums;
168
- last_write_checkpoint = writeCheckpoint;
169
-
170
- yield* bucketDataInBatches(storage, checkpoint, dataBuckets, raw_data, binary_data, signal);
173
+ // This incrementally updates dataBuckets with each individual bucket position.
174
+ // At the end of this, we can be sure that all buckets have data up to the checkpoint.
175
+ yield* bucketDataInBatches({ storage, checkpoint, bucketsToFetch, dataBuckets, raw_data, binary_data, signal });
171
176
 
172
177
  await new Promise((resolve) => setTimeout(resolve, 10));
173
178
  }
174
179
  }
175
180
 
176
- async function* bucketDataInBatches(
177
- storage: storage.SyncRulesBucketStorage,
178
- checkpoint: string,
179
- dataBuckets: Map<string, string>,
180
- raw_data: boolean | undefined,
181
- binary_data: boolean | undefined,
182
- signal: AbortSignal
183
- ) {
181
+ interface BucketDataRequest {
182
+ storage: storage.SyncRulesBucketStorage;
183
+ checkpoint: string;
184
+ bucketsToFetch: string[];
185
+ /** Bucket data position, modified by the request. */
186
+ dataBuckets: Map<string, string>;
187
+ raw_data: boolean | undefined;
188
+ binary_data: boolean | undefined;
189
+ signal: AbortSignal;
190
+ }
191
+
192
+ async function* bucketDataInBatches(request: BucketDataRequest) {
184
193
  let isDone = false;
185
- while (!signal.aborted && !isDone) {
194
+ while (!request.signal.aborted && !isDone) {
186
195
  // The code below is functionally the same as this for-await loop below.
187
196
  // However, the for-await loop appears to have a memory leak, so we avoid it.
188
197
  // for await (const { done, data } of bucketDataBatch(storage, checkpoint, dataBuckets, raw_data, signal)) {
@@ -192,7 +201,7 @@ async function* bucketDataInBatches(
192
201
  // }
193
202
  // break;
194
203
  // }
195
- const iter = bucketDataBatch(storage, checkpoint, dataBuckets, raw_data, binary_data, signal);
204
+ const iter = bucketDataBatch(request);
196
205
  try {
197
206
  while (true) {
198
207
  const { value, done: iterDone } = await iter.next();
@@ -215,17 +224,15 @@ async function* bucketDataInBatches(
215
224
  /**
216
225
  * Extracted as a separate internal function just to avoid memory leaks.
217
226
  */
218
- async function* bucketDataBatch(
219
- storage: storage.SyncRulesBucketStorage,
220
- checkpoint: string,
221
- dataBuckets: Map<string, string>,
222
- raw_data: boolean | undefined,
223
- binary_data: boolean | undefined,
224
- signal: AbortSignal
225
- ) {
227
+ async function* bucketDataBatch(request: BucketDataRequest) {
228
+ const { storage, checkpoint, bucketsToFetch, dataBuckets, raw_data, binary_data, signal } = request;
229
+
226
230
  const [_, release] = await syncSemaphore.acquire();
227
231
  try {
228
- const data = storage.getBucketDataBatch(checkpoint, dataBuckets);
232
+ // Optimization: Only fetch buckets for which the checksums have changed since the last checkpoint
233
+ // For the first batch, this will be all buckets.
234
+ const filteredBuckets = new Map(bucketsToFetch.map((bucket) => [bucket, dataBuckets.get(bucket)!]));
235
+ const data = storage.getBucketDataBatch(checkpoint, filteredBuckets);
229
236
 
230
237
  let has_more = false;
231
238
 
package/src/util/utils.ts CHANGED
@@ -7,6 +7,8 @@ import * as storage from '@/storage/storage-index.js';
7
7
  import { BucketChecksum, OpId } from './protocol-types.js';
8
8
  import { retriedQuery } from './pgwire_utils.js';
9
9
 
10
+ export type ChecksumMap = Map<string, BucketChecksum>;
11
+
10
12
  export function hashData(type: string, id: string, data: string): number {
11
13
  const hash = crypto.createHash('sha256');
12
14
  hash.update(`put.${type}.${id}.${data}`);
@@ -30,32 +32,50 @@ export function timestampToOpId(ts: bigint): OpId {
30
32
  return ts.toString(10);
31
33
  }
32
34
 
33
- export function checksumsDiff(previous: BucketChecksum[], current: BucketChecksum[]) {
34
- const updated_buckets: BucketChecksum[] = [];
35
+ export function checksumsDiff(previous: ChecksumMap, current: ChecksumMap) {
36
+ // All changed ones
37
+ const updatedBuckets = new Map<string, BucketChecksum>();
35
38
 
36
- const previousBuckets = new Map<string, BucketChecksum>();
37
- for (let checksum of previous) {
38
- previousBuckets.set(checksum.bucket, checksum);
39
- }
40
- for (let checksum of current) {
41
- if (!previousBuckets.has(checksum.bucket)) {
42
- updated_buckets.push(checksum);
39
+ const toRemove = new Set<string>(previous.keys());
40
+
41
+ for (let checksum of current.values()) {
42
+ const p = previous.get(checksum.bucket);
43
+ if (p == null) {
44
+ // Added
45
+ updatedBuckets.set(checksum.bucket, checksum);
43
46
  } else {
44
- const p = previousBuckets.get(checksum.bucket);
45
- if (p?.checksum != checksum.checksum || p?.count != checksum.count) {
46
- updated_buckets.push(checksum);
47
+ toRemove.delete(checksum.bucket);
48
+ if (checksum.checksum != p.checksum || checksum.count != p.count) {
49
+ // Updated
50
+ updatedBuckets.set(checksum.bucket, checksum);
51
+ } else {
52
+ // No change
47
53
  }
48
- previousBuckets.delete(checksum.bucket);
49
54
  }
50
55
  }
51
56
 
52
- const removed_buckets: string[] = [...previousBuckets.keys()];
53
57
  return {
54
- updated_buckets,
55
- removed_buckets
58
+ updatedBuckets: [...updatedBuckets.values()],
59
+ removedBuckets: [...toRemove]
56
60
  };
57
61
  }
58
62
 
63
+ export function addChecksums(a: number, b: number) {
64
+ return (a + b) & 0xffffffff;
65
+ }
66
+
67
+ export function addBucketChecksums(a: BucketChecksum, b: BucketChecksum | null): BucketChecksum {
68
+ if (b == null) {
69
+ return a;
70
+ } else {
71
+ return {
72
+ bucket: a.bucket,
73
+ count: a.count + b.count,
74
+ checksum: addChecksums(a.checksum, b.checksum)
75
+ };
76
+ }
77
+ }
78
+
59
79
  export async function getClientCheckpoint(
60
80
  db: pgwire.PgClient,
61
81
  bucketStorage: storage.BucketStorageFactory,
@@ -12,7 +12,13 @@ exports[`sync - mongodb > expiring token 1`] = `
12
12
  [
13
13
  {
14
14
  "checkpoint": {
15
- "buckets": [],
15
+ "buckets": [
16
+ {
17
+ "bucket": "mybucket[]",
18
+ "checksum": 0,
19
+ "count": 0,
20
+ },
21
+ ],
16
22
  "last_op_id": "0",
17
23
  "write_checkpoint": undefined,
18
24
  },
@@ -135,7 +141,13 @@ exports[`sync - mongodb > sync updates to global data 1`] = `
135
141
  [
136
142
  {
137
143
  "checkpoint": {
138
- "buckets": [],
144
+ "buckets": [
145
+ {
146
+ "bucket": "mybucket[]",
147
+ "checksum": 0,
148
+ "count": 0,
149
+ },
150
+ ],
139
151
  "last_op_id": "0",
140
152
  "write_checkpoint": undefined,
141
153
  },
@@ -0,0 +1,436 @@
1
+ import { describe, expect, it } from 'vitest';
2
+ import { BucketChecksum, OpId } from '@/util/protocol-types.js';
3
+ import * as crypto from 'node:crypto';
4
+ import { addBucketChecksums } from '@/util/util-index.js';
5
+ import { ChecksumCache, FetchChecksums, FetchPartialBucketChecksum } from '@/storage/ChecksumCache.js';
6
+
7
+ /**
8
+ * Create a deterministic BucketChecksum based on the bucket name and checkpoint for testing purposes.
9
+ */
10
+ function testHash(bucket: string, checkpoint: OpId) {
11
+ const key = `${checkpoint}/${bucket}`;
12
+ const hash = crypto.createHash('sha256').update(key).digest().readInt32LE(0);
13
+ return hash;
14
+ }
15
+
16
+ function testPartialHash(request: FetchPartialBucketChecksum): BucketChecksum {
17
+ if (request.start) {
18
+ const a = testHash(request.bucket, request.start);
19
+ const b = testHash(request.bucket, request.end);
20
+ return addBucketChecksums(
21
+ {
22
+ bucket: request.bucket,
23
+ checksum: b,
24
+ count: Number(request.end)
25
+ },
26
+ {
27
+ // Subtract a
28
+ bucket: request.bucket,
29
+ checksum: -a,
30
+ count: -Number(request.start)
31
+ }
32
+ );
33
+ } else {
34
+ return {
35
+ bucket: request.bucket,
36
+ checksum: testHash(request.bucket, request.end),
37
+ count: Number(request.end)
38
+ };
39
+ }
40
+ }
41
+
42
+ const TEST_123 = {
43
+ bucket: 'test',
44
+ count: 123,
45
+ checksum: 1104081737
46
+ };
47
+
48
+ const TEST_1234 = {
49
+ bucket: 'test',
50
+ count: 1234,
51
+ checksum: -1593864957
52
+ };
53
+
54
+ const TEST2_123 = {
55
+ bucket: 'test2',
56
+ count: 123,
57
+ checksum: 1741377449
58
+ };
59
+
60
+ const TEST3_123 = {
61
+ bucket: 'test3',
62
+ count: 123,
63
+ checksum: -2085080402
64
+ };
65
+
66
+ function fetchTestChecksums(batch: FetchPartialBucketChecksum[]) {
67
+ return new Map(
68
+ batch.map((v) => {
69
+ return [v.bucket, testPartialHash(v)];
70
+ })
71
+ );
72
+ }
73
+
74
+ describe('checksum cache', function () {
75
+ const factory = (fetch: FetchChecksums) => {
76
+ return new ChecksumCache({ fetchChecksums: fetch });
77
+ };
78
+
79
+ it('should handle a sequential lookups (a)', async function () {
80
+ let lookups: FetchPartialBucketChecksum[][] = [];
81
+ const cache = factory(async (batch) => {
82
+ lookups.push(batch);
83
+ return fetchTestChecksums(batch);
84
+ });
85
+
86
+ expect(await cache.getChecksums('123', ['test'])).toEqual([TEST_123]);
87
+
88
+ expect(await cache.getChecksums('1234', ['test'])).toEqual([TEST_1234]);
89
+
90
+ expect(await cache.getChecksums('123', ['test2'])).toEqual([TEST2_123]);
91
+
92
+ expect(lookups).toEqual([
93
+ [{ bucket: 'test', end: '123' }],
94
+ // This should use the previous lookup
95
+ [{ bucket: 'test', start: '123', end: '1234' }],
96
+ [{ bucket: 'test2', end: '123' }]
97
+ ]);
98
+ });
99
+
100
+ it('should handle a sequential lookups (b)', async function () {
101
+ // Reverse order of the above
102
+ let lookups: FetchPartialBucketChecksum[][] = [];
103
+ const cache = factory(async (batch) => {
104
+ lookups.push(batch);
105
+ return fetchTestChecksums(batch);
106
+ });
107
+
108
+ expect(await cache.getChecksums('123', ['test2'])).toEqual([TEST2_123]);
109
+
110
+ expect(await cache.getChecksums('1234', ['test'])).toEqual([TEST_1234]);
111
+
112
+ expect(await cache.getChecksums('123', ['test'])).toEqual([TEST_123]);
113
+
114
+ expect(lookups).toEqual([
115
+ // With this order, there is no option for a partial lookup
116
+ [{ bucket: 'test2', end: '123' }],
117
+ [{ bucket: 'test', end: '1234' }],
118
+ [{ bucket: 'test', end: '123' }]
119
+ ]);
120
+ });
121
+
122
+ it('should handle a concurrent lookups (a)', async function () {
123
+ let lookups: FetchPartialBucketChecksum[][] = [];
124
+ const cache = factory(async (batch) => {
125
+ lookups.push(batch);
126
+ return fetchTestChecksums(batch);
127
+ });
128
+
129
+ const p1 = cache.getChecksums('123', ['test']);
130
+ const p2 = cache.getChecksums('1234', ['test']);
131
+ const p3 = cache.getChecksums('123', ['test2']);
132
+
133
+ expect(await p1).toEqual([TEST_123]);
134
+ expect(await p2).toEqual([TEST_1234]);
135
+ expect(await p3).toEqual([TEST2_123]);
136
+
137
+ // Concurrent requests, so we can't do a partial lookup for 123 -> 1234
138
+ expect(lookups).toEqual([
139
+ [{ bucket: 'test', end: '123' }],
140
+ [{ bucket: 'test', end: '1234' }],
141
+ [{ bucket: 'test2', end: '123' }]
142
+ ]);
143
+ });
144
+
145
+ it('should handle a concurrent lookups (b)', async function () {
146
+ let lookups: FetchPartialBucketChecksum[][] = [];
147
+ const cache = factory(async (batch) => {
148
+ lookups.push(batch);
149
+ return fetchTestChecksums(batch);
150
+ });
151
+
152
+ const p1 = cache.getChecksums('123', ['test']);
153
+ const p2 = cache.getChecksums('123', ['test']);
154
+
155
+ expect(await p1).toEqual([TEST_123]);
156
+
157
+ expect(await p2).toEqual([TEST_123]);
158
+
159
+ // The lookup should be deduplicated, even though it's in progress
160
+ expect(lookups).toEqual([[{ bucket: 'test', end: '123' }]]);
161
+ });
162
+
163
+ it('should handle serial + concurrent lookups', async function () {
164
+ let lookups: FetchPartialBucketChecksum[][] = [];
165
+ const cache = factory(async (batch) => {
166
+ lookups.push(batch);
167
+ return fetchTestChecksums(batch);
168
+ });
169
+
170
+ expect(await cache.getChecksums('123', ['test'])).toEqual([TEST_123]);
171
+
172
+ const p2 = cache.getChecksums('1234', ['test']);
173
+ const p3 = cache.getChecksums('1234', ['test']);
174
+
175
+ expect(await p2).toEqual([TEST_1234]);
176
+ expect(await p3).toEqual([TEST_1234]);
177
+
178
+ expect(lookups).toEqual([
179
+ [{ bucket: 'test', end: '123' }],
180
+ // This lookup is deduplicated
181
+ [{ bucket: 'test', start: '123', end: '1234' }]
182
+ ]);
183
+ });
184
+
185
+ it('should handle multiple buckets', async function () {
186
+ let lookups: FetchPartialBucketChecksum[][] = [];
187
+ const cache = factory(async (batch) => {
188
+ lookups.push(batch);
189
+ return fetchTestChecksums(batch);
190
+ });
191
+
192
+ expect(await cache.getChecksums('123', ['test', 'test2'])).toEqual([TEST_123, TEST2_123]);
193
+
194
+ expect(lookups).toEqual([
195
+ [
196
+ // Both lookups in the same request
197
+ { bucket: 'test', end: '123' },
198
+ { bucket: 'test2', end: '123' }
199
+ ]
200
+ ]);
201
+ });
202
+
203
+ it('should handle multiple buckets with partial caching (a)', async function () {
204
+ let lookups: FetchPartialBucketChecksum[][] = [];
205
+ const cache = factory(async (batch) => {
206
+ lookups.push(batch);
207
+ return fetchTestChecksums(batch);
208
+ });
209
+
210
+ expect(await cache.getChecksums('123', ['test'])).toEqual([TEST_123]);
211
+ expect(await cache.getChecksums('123', ['test', 'test2'])).toEqual([TEST_123, TEST2_123]);
212
+
213
+ expect(lookups).toEqual([
214
+ // Request 1
215
+ [{ bucket: 'test', end: '123' }],
216
+ // Request 2
217
+ [{ bucket: 'test2', end: '123' }]
218
+ ]);
219
+ });
220
+
221
+ it('should handle multiple buckets with partial caching (b)', async function () {
222
+ let lookups: FetchPartialBucketChecksum[][] = [];
223
+ const cache = factory(async (batch) => {
224
+ lookups.push(batch);
225
+ return fetchTestChecksums(batch);
226
+ });
227
+
228
+ const a = cache.getChecksums('123', ['test', 'test2']);
229
+ const b = cache.getChecksums('123', ['test2', 'test3']);
230
+
231
+ expect(await a).toEqual([TEST_123, TEST2_123]);
232
+ expect(await b).toEqual([TEST2_123, TEST3_123]);
233
+
234
+ expect(lookups).toEqual([
235
+ // Request A
236
+ [
237
+ { bucket: 'test', end: '123' },
238
+ { bucket: 'test2', end: '123' }
239
+ ],
240
+ // Request B (re-uses the checksum for test2 from request a)
241
+ [{ bucket: 'test3', end: '123' }]
242
+ ]);
243
+ });
244
+
245
+ it('should handle out-of-order requests', async function () {
246
+ let lookups: FetchPartialBucketChecksum[][] = [];
247
+ const cache = factory(async (batch) => {
248
+ lookups.push(batch);
249
+ return fetchTestChecksums(batch);
250
+ });
251
+
252
+ expect(await cache.getChecksums('123', ['test'])).toEqual([TEST_123]);
253
+
254
+ expect(await cache.getChecksums('125', ['test'])).toEqual([
255
+ {
256
+ bucket: 'test',
257
+ checksum: -1865121912,
258
+ count: 125
259
+ }
260
+ ]);
261
+
262
+ expect(await cache.getChecksums('124', ['test'])).toEqual([
263
+ {
264
+ bucket: 'test',
265
+ checksum: 1887460431,
266
+ count: 124
267
+ }
268
+ ]);
269
+ expect(lookups).toEqual([
270
+ [{ bucket: 'test', end: '123' }],
271
+ [{ bucket: 'test', start: '123', end: '125' }],
272
+ [{ bucket: 'test', start: '123', end: '124' }]
273
+ ]);
274
+ });
275
+
276
+ it('should handle errors', async function () {
277
+ let lookups: FetchPartialBucketChecksum[][] = [];
278
+ const TEST_ERROR = new Error('Simulated error');
279
+ const cache = factory(async (batch) => {
280
+ lookups.push(batch);
281
+ if (lookups.length == 1) {
282
+ throw new Error('Simulated error');
283
+ }
284
+ return fetchTestChecksums(batch);
285
+ });
286
+
287
+ const a = cache.getChecksums('123', ['test', 'test2']);
288
+ const b = cache.getChecksums('123', ['test2', 'test3']);
289
+
290
+ await expect(a).rejects.toEqual(TEST_ERROR);
291
+ await expect(b).rejects.toEqual(TEST_ERROR);
292
+
293
+ const a2 = cache.getChecksums('123', ['test', 'test2']);
294
+ const b2 = cache.getChecksums('123', ['test2', 'test3']);
295
+
296
+ expect(await a2).toEqual([TEST_123, TEST2_123]);
297
+ expect(await b2).toEqual([TEST2_123, TEST3_123]);
298
+
299
+ expect(lookups).toEqual([
300
+ // Request A (fails)
301
+ [
302
+ { bucket: 'test', end: '123' },
303
+ { bucket: 'test2', end: '123' }
304
+ ],
305
+ // Request B (re-uses the checksum for test2 from request a)
306
+ // Even thought the full request fails, this batch succeeds
307
+ [{ bucket: 'test3', end: '123' }],
308
+ // Retry request A
309
+ [
310
+ { bucket: 'test', end: '123' },
311
+ { bucket: 'test2', end: '123' }
312
+ ]
313
+ ]);
314
+ });
315
+
316
+ it('should handle missing checksums (a)', async function () {
317
+ let lookups: FetchPartialBucketChecksum[][] = [];
318
+ const cache = factory(async (batch) => {
319
+ lookups.push(batch);
320
+ return fetchTestChecksums(batch.filter((b) => b.bucket != 'test'));
321
+ });
322
+
323
+ expect(await cache.getChecksums('123', ['test'])).toEqual([{ bucket: 'test', checksum: 0, count: 0 }]);
324
+ expect(await cache.getChecksums('123', ['test', 'test2'])).toEqual([
325
+ { bucket: 'test', checksum: 0, count: 0 },
326
+ TEST2_123
327
+ ]);
328
+ });
329
+
330
+ it('should handle missing checksums (b)', async function () {
331
+ let lookups: FetchPartialBucketChecksum[][] = [];
332
+ const cache = factory(async (batch) => {
333
+ lookups.push(batch);
334
+ return fetchTestChecksums(batch.filter((b) => b.bucket != 'test' || b.end != '123'));
335
+ });
336
+
337
+ expect(await cache.getChecksums('123', ['test'])).toEqual([{ bucket: 'test', checksum: 0, count: 0 }]);
338
+ expect(await cache.getChecksums('1234', ['test'])).toEqual([
339
+ {
340
+ bucket: 'test',
341
+ checksum: 1597020602,
342
+ count: 1111
343
+ }
344
+ ]);
345
+
346
+ expect(lookups).toEqual([[{ bucket: 'test', end: '123' }], [{ bucket: 'test', start: '123', end: '1234' }]]);
347
+ });
348
+
349
+ it('should use maxSize', async function () {
350
+ let lookups: FetchPartialBucketChecksum[][] = [];
351
+ const cache = new ChecksumCache({
352
+ fetchChecksums: async (batch) => {
353
+ lookups.push(batch);
354
+ return fetchTestChecksums(batch);
355
+ },
356
+ maxSize: 2
357
+ });
358
+
359
+ expect(await cache.getChecksums('123', ['test'])).toEqual([TEST_123]);
360
+ expect(await cache.getChecksums('124', ['test'])).toEqual([
361
+ {
362
+ bucket: 'test',
363
+ checksum: 1887460431,
364
+ count: 124
365
+ }
366
+ ]);
367
+
368
+ expect(await cache.getChecksums('125', ['test'])).toEqual([
369
+ {
370
+ bucket: 'test',
371
+ checksum: -1865121912,
372
+ count: 125
373
+ }
374
+ ]);
375
+ expect(await cache.getChecksums('126', ['test'])).toEqual([
376
+ {
377
+ bucket: 'test',
378
+ checksum: -1720007310,
379
+ count: 126
380
+ }
381
+ ]);
382
+ expect(await cache.getChecksums('124', ['test'])).toEqual([
383
+ {
384
+ bucket: 'test',
385
+ checksum: 1887460431,
386
+ count: 124
387
+ }
388
+ ]);
389
+ expect(await cache.getChecksums('123', ['test'])).toEqual([TEST_123]);
390
+
391
+ expect(lookups).toEqual([
392
+ [{ bucket: 'test', end: '123' }],
393
+ [{ bucket: 'test', start: '123', end: '124' }],
394
+ [{ bucket: 'test', start: '124', end: '125' }],
395
+ [{ bucket: 'test', start: '125', end: '126' }],
396
+ [{ bucket: 'test', end: '124' }],
397
+ [{ bucket: 'test', end: '123' }]
398
+ ]);
399
+ });
400
+
401
+ it('should handle concurrent requests greater than cache size', async function () {
402
+ // This will not be cached efficiently, but we test that we don't get errors at least.
403
+ let lookups: FetchPartialBucketChecksum[][] = [];
404
+ const cache = new ChecksumCache({
405
+ fetchChecksums: async (batch) => {
406
+ lookups.push(batch);
407
+ return fetchTestChecksums(batch);
408
+ },
409
+ maxSize: 2
410
+ });
411
+
412
+ const p3 = cache.getChecksums('123', ['test3']);
413
+ const p4 = cache.getChecksums('123', ['test4']);
414
+ const p1 = cache.getChecksums('123', ['test']);
415
+ const p2 = cache.getChecksums('123', ['test2']);
416
+
417
+ expect(await p1).toEqual([TEST_123]);
418
+ expect(await p2).toEqual([TEST2_123]);
419
+ expect(await p3).toEqual([TEST3_123]);
420
+ expect(await p4).toEqual([
421
+ {
422
+ bucket: 'test4',
423
+ checksum: 1004797863,
424
+ count: 123
425
+ }
426
+ ]);
427
+
428
+ // The lookup should be deduplicated, even though it's in progress
429
+ expect(lookups).toEqual([
430
+ [{ bucket: 'test3', end: '123' }],
431
+ [{ bucket: 'test4', end: '123' }],
432
+ [{ bucket: 'test', end: '123' }],
433
+ [{ bucket: 'test2', end: '123' }]
434
+ ]);
435
+ });
436
+ });