@powersync/service-core 0.0.0-dev-20240718134716 → 0.0.0-dev-20240725112650

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (97) hide show
  1. package/CHANGELOG.md +11 -6
  2. package/dist/entry/cli-entry.js +2 -1
  3. package/dist/entry/cli-entry.js.map +1 -1
  4. package/dist/entry/commands/compact-action.d.ts +2 -0
  5. package/dist/entry/commands/compact-action.js +48 -0
  6. package/dist/entry/commands/compact-action.js.map +1 -0
  7. package/dist/entry/entry-index.d.ts +1 -0
  8. package/dist/entry/entry-index.js +1 -0
  9. package/dist/entry/entry-index.js.map +1 -1
  10. package/dist/metrics/Metrics.d.ts +4 -3
  11. package/dist/metrics/Metrics.js +51 -0
  12. package/dist/metrics/Metrics.js.map +1 -1
  13. package/dist/replication/WalStream.js +6 -8
  14. package/dist/replication/WalStream.js.map +1 -1
  15. package/dist/routes/configure-fastify.d.ts +883 -0
  16. package/dist/routes/configure-fastify.js +58 -0
  17. package/dist/routes/configure-fastify.js.map +1 -0
  18. package/dist/routes/configure-rsocket.d.ts +13 -0
  19. package/dist/routes/configure-rsocket.js +46 -0
  20. package/dist/routes/configure-rsocket.js.map +1 -0
  21. package/dist/routes/endpoints/socket-route.js +6 -14
  22. package/dist/routes/endpoints/socket-route.js.map +1 -1
  23. package/dist/routes/endpoints/sync-stream.js +4 -5
  24. package/dist/routes/endpoints/sync-stream.js.map +1 -1
  25. package/dist/routes/route-register.d.ts +1 -1
  26. package/dist/routes/route-register.js +1 -1
  27. package/dist/routes/route-register.js.map +1 -1
  28. package/dist/routes/router-socket.d.ts +4 -4
  29. package/dist/routes/router-socket.js.map +1 -1
  30. package/dist/routes/router.d.ts +1 -0
  31. package/dist/routes/router.js.map +1 -1
  32. package/dist/routes/routes-index.d.ts +2 -0
  33. package/dist/routes/routes-index.js +2 -0
  34. package/dist/routes/routes-index.js.map +1 -1
  35. package/dist/storage/BucketStorage.d.ts +31 -1
  36. package/dist/storage/BucketStorage.js.map +1 -1
  37. package/dist/storage/mongo/MongoCompactor.d.ts +40 -0
  38. package/dist/storage/mongo/MongoCompactor.js +292 -0
  39. package/dist/storage/mongo/MongoCompactor.js.map +1 -0
  40. package/dist/storage/mongo/MongoSyncBucketStorage.d.ts +3 -2
  41. package/dist/storage/mongo/MongoSyncBucketStorage.js +19 -13
  42. package/dist/storage/mongo/MongoSyncBucketStorage.js.map +1 -1
  43. package/dist/storage/mongo/models.d.ts +5 -4
  44. package/dist/storage/mongo/models.js.map +1 -1
  45. package/dist/storage/mongo/util.d.ts +3 -0
  46. package/dist/storage/mongo/util.js +22 -0
  47. package/dist/storage/mongo/util.js.map +1 -1
  48. package/dist/sync/RequestTracker.js +2 -3
  49. package/dist/sync/RequestTracker.js.map +1 -1
  50. package/dist/sync/sync-index.d.ts +1 -0
  51. package/dist/sync/sync-index.js +1 -0
  52. package/dist/sync/sync-index.js.map +1 -1
  53. package/dist/sync/sync.js +20 -7
  54. package/dist/sync/sync.js.map +1 -1
  55. package/dist/sync/util.js.map +1 -1
  56. package/dist/util/config/collectors/config-collector.d.ts +12 -0
  57. package/dist/util/config/collectors/config-collector.js +43 -0
  58. package/dist/util/config/collectors/config-collector.js.map +1 -1
  59. package/dist/util/config/compound-config-collector.d.ts +3 -29
  60. package/dist/util/config/compound-config-collector.js +22 -69
  61. package/dist/util/config/compound-config-collector.js.map +1 -1
  62. package/package.json +6 -4
  63. package/src/entry/cli-entry.ts +2 -1
  64. package/src/entry/commands/compact-action.ts +54 -0
  65. package/src/entry/entry-index.ts +1 -0
  66. package/src/metrics/Metrics.ts +67 -2
  67. package/src/replication/WalStream.ts +6 -10
  68. package/src/routes/configure-fastify.ts +102 -0
  69. package/src/routes/configure-rsocket.ts +59 -0
  70. package/src/routes/endpoints/socket-route.ts +6 -15
  71. package/src/routes/endpoints/sync-stream.ts +4 -5
  72. package/src/routes/route-register.ts +2 -2
  73. package/src/routes/router-socket.ts +5 -5
  74. package/src/routes/router.ts +2 -0
  75. package/src/routes/routes-index.ts +2 -0
  76. package/src/storage/BucketStorage.ts +36 -1
  77. package/src/storage/mongo/MongoCompactor.ts +371 -0
  78. package/src/storage/mongo/MongoSyncBucketStorage.ts +25 -14
  79. package/src/storage/mongo/models.ts +5 -4
  80. package/src/storage/mongo/util.ts +25 -0
  81. package/src/sync/RequestTracker.ts +3 -3
  82. package/src/sync/sync-index.ts +1 -0
  83. package/src/sync/sync.ts +21 -7
  84. package/src/sync/util.ts +1 -0
  85. package/src/util/config/collectors/config-collector.ts +48 -0
  86. package/src/util/config/compound-config-collector.ts +23 -87
  87. package/test/src/__snapshots__/sync.test.ts.snap +85 -0
  88. package/test/src/bucket_validation.test.ts +142 -0
  89. package/test/src/bucket_validation.ts +116 -0
  90. package/test/src/compacting.test.ts +207 -0
  91. package/test/src/data_storage.test.ts +19 -60
  92. package/test/src/slow_tests.test.ts +144 -102
  93. package/test/src/sync.test.ts +169 -29
  94. package/test/src/util.ts +71 -13
  95. package/test/src/wal_stream.test.ts +21 -16
  96. package/test/src/wal_stream_utils.ts +13 -4
  97. package/tsconfig.tsbuildinfo +1 -1
@@ -0,0 +1,371 @@
1
+ import { logger } from '@powersync/lib-services-framework';
2
+ import { AnyBulkWriteOperation, MaxKey, MinKey } from 'mongodb';
3
+ import { addChecksums } from '../../util/utils.js';
4
+ import { PowerSyncMongo } from './db.js';
5
+ import { BucketDataDocument, BucketDataKey } from './models.js';
6
+ import { CompactOptions } from '../BucketStorage.js';
7
+
8
+ interface CurrentBucketState {
9
+ /** Bucket name */
10
+ bucket: string;
11
+ /**
12
+ * Rows seen in the bucket, with the last op_id of each.
13
+ */
14
+ seen: Map<string, bigint>;
15
+ /**
16
+ * Estimated memory usage of the seen Map.
17
+ */
18
+ trackingSize: number;
19
+
20
+ /**
21
+ * Last (lowest) seen op_id that is not a PUT.
22
+ */
23
+ lastNotPut: bigint | null;
24
+
25
+ /**
26
+ * Number of REMOVE/MOVE operations seen since lastNotPut.
27
+ */
28
+ opsSincePut: number;
29
+ }
30
+
31
+ /**
32
+ * Additional options, primarily for testing.
33
+ */
34
+ export interface MongoCompactOptions extends CompactOptions {
35
+ /** Minimum of 2 */
36
+ clearBatchLimit?: number;
37
+ /** Minimum of 1 */
38
+ moveBatchLimit?: number;
39
+ /** Minimum of 1 */
40
+ moveBatchQueryLimit?: number;
41
+ }
42
+
43
+ const DEFAULT_CLEAR_BATCH_LIMIT = 5000;
44
+ const DEFAULT_MOVE_BATCH_LIMIT = 2000;
45
+ const DEFAULT_MOVE_BATCH_QUERY_LIMIT = 10_000;
46
+
47
+ /** This default is primarily for tests. */
48
+ const DEFAULT_MEMORY_LIMIT_MB = 64;
49
+
50
+ export class MongoCompactor {
51
+ private updates: AnyBulkWriteOperation<BucketDataDocument>[] = [];
52
+
53
+ private idLimitBytes: number;
54
+ private moveBatchLimit: number;
55
+ private moveBatchQueryLimit: number;
56
+ private clearBatchLimit: number;
57
+ private maxOpId: bigint | undefined;
58
+ private buckets: string[] | undefined;
59
+
60
+ constructor(private db: PowerSyncMongo, private group_id: number, options?: MongoCompactOptions) {
61
+ this.idLimitBytes = (options?.memoryLimitMB ?? DEFAULT_MEMORY_LIMIT_MB) * 1024 * 1024;
62
+ this.moveBatchLimit = options?.moveBatchLimit ?? DEFAULT_MOVE_BATCH_LIMIT;
63
+ this.moveBatchQueryLimit = options?.moveBatchQueryLimit ?? DEFAULT_MOVE_BATCH_QUERY_LIMIT;
64
+ this.clearBatchLimit = options?.clearBatchLimit ?? DEFAULT_CLEAR_BATCH_LIMIT;
65
+ this.maxOpId = options?.maxOpId;
66
+ this.buckets = options?.compactBuckets;
67
+ }
68
+
69
+ /**
70
+ * Compact buckets by converting operations into MOVE and/or CLEAR operations.
71
+ *
72
+ * See /docs/compacting-operations.md for details.
73
+ */
74
+ async compact() {
75
+ if (this.buckets) {
76
+ for (let bucket of this.buckets) {
77
+ // We can make this more efficient later on by iterating
78
+ // through the buckets in a single query.
79
+ // That makes batching more tricky, so we leave for later.
80
+ await this.compactInternal(bucket);
81
+ }
82
+ } else {
83
+ await this.compactInternal(undefined);
84
+ }
85
+ }
86
+
87
+ async compactInternal(bucket: string | undefined) {
88
+ const idLimitBytes = this.idLimitBytes;
89
+
90
+ let currentState: CurrentBucketState | null = null;
91
+
92
+ // Constant lower bound
93
+ const lowerBound: BucketDataKey = {
94
+ g: this.group_id,
95
+ b: bucket ?? (new MinKey() as any),
96
+ o: new MinKey() as any
97
+ };
98
+
99
+ // Upper bound is adjusted for each batch
100
+ let upperBound: BucketDataKey = {
101
+ g: this.group_id,
102
+ b: bucket ?? (new MaxKey() as any),
103
+ o: new MaxKey() as any
104
+ };
105
+
106
+ while (true) {
107
+ // Query one batch at a time, to avoid cursor timeouts
108
+ const batch = await this.db.bucket_data
109
+ .find(
110
+ {
111
+ _id: {
112
+ $gte: lowerBound,
113
+ $lt: upperBound
114
+ }
115
+ },
116
+ {
117
+ projection: {
118
+ _id: 1,
119
+ op: 1,
120
+ table: 1,
121
+ row_id: 1,
122
+ source_table: 1,
123
+ source_key: 1
124
+ },
125
+ limit: this.moveBatchQueryLimit,
126
+ sort: { _id: -1 },
127
+ singleBatch: true
128
+ }
129
+ )
130
+ .toArray();
131
+
132
+ if (batch.length == 0) {
133
+ // We've reached the end
134
+ break;
135
+ }
136
+
137
+ // Set upperBound for the next batch
138
+ upperBound = batch[batch.length - 1]._id;
139
+
140
+ for (let doc of batch) {
141
+ if (currentState == null || doc._id.b != currentState.bucket) {
142
+ if (currentState != null && currentState.lastNotPut != null && currentState.opsSincePut >= 1) {
143
+ // Important to flush before clearBucket()
144
+ await this.flush();
145
+ logger.info(
146
+ `Inserting CLEAR at ${this.group_id}:${currentState.bucket}:${currentState.lastNotPut} to remove ${currentState.opsSincePut} operations`
147
+ );
148
+
149
+ const bucket = currentState.bucket;
150
+ const clearOp = currentState.lastNotPut;
151
+ // Free memory before clearing bucket
152
+ currentState = null;
153
+ await this.clearBucket(bucket, clearOp);
154
+ }
155
+ currentState = {
156
+ bucket: doc._id.b,
157
+ seen: new Map(),
158
+ trackingSize: 0,
159
+ lastNotPut: null,
160
+ opsSincePut: 0
161
+ };
162
+ }
163
+
164
+ if (this.maxOpId != null && doc._id.o > this.maxOpId) {
165
+ continue;
166
+ }
167
+
168
+ let isPersistentPut = doc.op == 'PUT';
169
+
170
+ if (doc.op == 'REMOVE' || doc.op == 'PUT') {
171
+ const key = `${doc.table}/${doc.row_id}/${doc.source_table}/${doc.source_key?.toHexString()}`;
172
+ const targetOp = currentState.seen.get(key);
173
+ if (targetOp) {
174
+ // Will convert to MOVE, so don't count as PUT
175
+ isPersistentPut = false;
176
+
177
+ this.updates.push({
178
+ updateOne: {
179
+ filter: {
180
+ _id: doc._id
181
+ },
182
+ update: {
183
+ $set: {
184
+ op: 'MOVE',
185
+ target_op: targetOp
186
+ },
187
+ $unset: {
188
+ source_table: 1,
189
+ source_key: 1,
190
+ table: 1,
191
+ row_id: 1,
192
+ data: 1
193
+ }
194
+ }
195
+ }
196
+ });
197
+ } else {
198
+ if (currentState.trackingSize >= idLimitBytes) {
199
+ // Reached memory limit.
200
+ // Keep the highest seen values in this case.
201
+ } else {
202
+ // flatstr reduces the memory usage by flattening the string
203
+ currentState.seen.set(flatstr(key), doc._id.o);
204
+ // length + 16 for the string
205
+ // 24 for the bigint
206
+ // 50 for map overhead
207
+ // 50 for additional overhead
208
+ currentState.trackingSize += key.length + 140;
209
+ }
210
+ }
211
+ }
212
+
213
+ if (isPersistentPut) {
214
+ currentState.lastNotPut = null;
215
+ currentState.opsSincePut = 0;
216
+ } else if (doc.op != 'CLEAR') {
217
+ if (currentState.lastNotPut == null) {
218
+ currentState.lastNotPut = doc._id.o;
219
+ }
220
+ currentState.opsSincePut += 1;
221
+ }
222
+
223
+ if (this.updates.length >= this.moveBatchLimit) {
224
+ await this.flush();
225
+ }
226
+ }
227
+ }
228
+
229
+ await this.flush();
230
+ currentState?.seen.clear();
231
+ if (currentState?.lastNotPut != null && currentState?.opsSincePut > 1) {
232
+ logger.info(
233
+ `Inserting CLEAR at ${this.group_id}:${currentState.bucket}:${currentState.lastNotPut} to remove ${currentState.opsSincePut} operations`
234
+ );
235
+ const bucket = currentState.bucket;
236
+ const clearOp = currentState.lastNotPut;
237
+ // Free memory before clearing bucket
238
+ currentState = null;
239
+ await this.clearBucket(bucket, clearOp);
240
+ }
241
+ }
242
+
243
+ private async flush() {
244
+ if (this.updates.length > 0) {
245
+ logger.info(`Compacting ${this.updates.length} ops`);
246
+ await this.db.bucket_data.bulkWrite(this.updates, {
247
+ // Order is not important.
248
+ // Since checksums are not affected, these operations can happen in any order,
249
+ // and it's fine if the operations are partially applied.
250
+ // Each individual operation is atomic.
251
+ ordered: false
252
+ });
253
+ this.updates = [];
254
+ }
255
+ }
256
+
257
+ /**
258
+ * Perform a CLEAR compact for a bucket.
259
+ *
260
+ * @param bucket bucket name
261
+ * @param op op_id of the last non-PUT operation, which will be converted to CLEAR.
262
+ */
263
+ private async clearBucket(bucket: string, op: bigint) {
264
+ const opFilter = {
265
+ _id: {
266
+ $gte: {
267
+ g: this.group_id,
268
+ b: bucket,
269
+ o: new MinKey() as any
270
+ },
271
+ $lte: {
272
+ g: this.group_id,
273
+ b: bucket,
274
+ o: op
275
+ }
276
+ }
277
+ };
278
+
279
+ const session = this.db.client.startSession();
280
+ try {
281
+ let done = false;
282
+ while (!done) {
283
+ // Do the CLEAR operation in batches, with each batch a separate transaction.
284
+ // The state after each batch is fully consistent.
285
+ // We need a transaction per batch to make sure checksums stay consistent.
286
+ await session.withTransaction(
287
+ async () => {
288
+ const query = this.db.bucket_data.find(opFilter, {
289
+ session,
290
+ sort: { _id: 1 },
291
+ projection: {
292
+ _id: 1,
293
+ op: 1,
294
+ checksum: 1,
295
+ target_op: 1
296
+ },
297
+ limit: this.clearBatchLimit
298
+ });
299
+ let checksum = 0;
300
+ let lastOpId: BucketDataKey | null = null;
301
+ let targetOp: bigint | null = null;
302
+ let gotAnOp = false;
303
+ for await (let op of query.stream()) {
304
+ if (op.op == 'MOVE' || op.op == 'REMOVE' || op.op == 'CLEAR') {
305
+ checksum = addChecksums(checksum, op.checksum);
306
+ lastOpId = op._id;
307
+ if (op.op != 'CLEAR') {
308
+ gotAnOp = true;
309
+ }
310
+ if (op.target_op != null) {
311
+ if (targetOp == null || op.target_op > targetOp) {
312
+ targetOp = op.target_op;
313
+ }
314
+ }
315
+ } else {
316
+ throw new Error(`Unexpected ${op.op} operation at ${op._id.g}:${op._id.b}:${op._id.o}`);
317
+ }
318
+ }
319
+ if (!gotAnOp) {
320
+ done = true;
321
+ return;
322
+ }
323
+
324
+ logger.info(`Flushing CLEAR at ${lastOpId?.o}`);
325
+ await this.db.bucket_data.deleteMany(
326
+ {
327
+ _id: {
328
+ $gte: {
329
+ g: this.group_id,
330
+ b: bucket,
331
+ o: new MinKey() as any
332
+ },
333
+ $lte: lastOpId!
334
+ }
335
+ },
336
+ { session }
337
+ );
338
+
339
+ await this.db.bucket_data.insertOne(
340
+ {
341
+ _id: lastOpId!,
342
+ op: 'CLEAR',
343
+ checksum: checksum,
344
+ data: null,
345
+ target_op: targetOp
346
+ },
347
+ { session }
348
+ );
349
+ },
350
+ {
351
+ writeConcern: { w: 'majority' },
352
+ readConcern: { level: 'snapshot' }
353
+ }
354
+ );
355
+ }
356
+ } finally {
357
+ await session.endSession();
358
+ }
359
+ }
360
+ }
361
+
362
+ /**
363
+ * Flattens string to reduce memory usage (around 320 bytes -> 120 bytes),
364
+ * at the cost of some upfront CPU usage.
365
+ *
366
+ * From: https://github.com/davidmarkclements/flatstr/issues/8
367
+ */
368
+ function flatstr(s: string) {
369
+ s.match(/\n/g);
370
+ return s;
371
+ }
@@ -8,21 +8,24 @@ import * as util from '../../util/util-index.js';
8
8
  import {
9
9
  BucketDataBatchOptions,
10
10
  BucketStorageBatch,
11
+ CompactOptions,
11
12
  DEFAULT_DOCUMENT_BATCH_LIMIT,
12
13
  DEFAULT_DOCUMENT_CHUNK_LIMIT_BYTES,
13
14
  FlushedResult,
14
15
  ResolveTableOptions,
15
16
  ResolveTableResult,
17
+ SyncBucketDataBatch,
16
18
  SyncRulesBucketStorage,
17
19
  SyncRuleStatus
18
20
  } from '../BucketStorage.js';
21
+ import { ChecksumCache, FetchPartialBucketChecksum } from '../ChecksumCache.js';
19
22
  import { MongoBucketStorage } from '../MongoBucketStorage.js';
20
23
  import { SourceTable } from '../SourceTable.js';
21
24
  import { PowerSyncMongo } from './db.js';
22
25
  import { BucketDataDocument, BucketDataKey, SourceKey, SyncRuleState } from './models.js';
23
26
  import { MongoBucketBatch } from './MongoBucketBatch.js';
24
- import { BSON_DESERIALIZE_OPTIONS, idPrefixFilter, readSingleBatch, serializeLookup } from './util.js';
25
- import { ChecksumCache, FetchPartialBucketChecksum } from '../ChecksumCache.js';
27
+ import { MongoCompactor } from './MongoCompactor.js';
28
+ import { BSON_DESERIALIZE_OPTIONS, idPrefixFilter, mapOpEntry, readSingleBatch, serializeLookup } from './util.js';
26
29
 
27
30
  export class MongoSyncBucketStorage implements SyncRulesBucketStorage {
28
31
  private readonly db: PowerSyncMongo;
@@ -201,7 +204,7 @@ export class MongoSyncBucketStorage implements SyncRulesBucketStorage {
201
204
  checkpoint: util.OpId,
202
205
  dataBuckets: Map<string, string>,
203
206
  options?: BucketDataBatchOptions
204
- ): AsyncIterable<util.SyncBucketData> {
207
+ ): AsyncIterable<SyncBucketDataBatch> {
205
208
  if (dataBuckets.size == 0) {
206
209
  return;
207
210
  }
@@ -267,6 +270,7 @@ export class MongoSyncBucketStorage implements SyncRulesBucketStorage {
267
270
 
268
271
  let batchSize = 0;
269
272
  let currentBatch: util.SyncBucketData | null = null;
273
+ let targetOp: bigint | null = null;
270
274
 
271
275
  // Ordered by _id, meaning buckets are grouped together
272
276
  for (let rawData of data) {
@@ -284,7 +288,8 @@ export class MongoSyncBucketStorage implements SyncRulesBucketStorage {
284
288
  start = currentBatch.after;
285
289
  currentBatch = null;
286
290
  batchSize = 0;
287
- yield yieldBatch;
291
+ yield { batch: yieldBatch, targetOp: targetOp };
292
+ targetOp = null;
288
293
  }
289
294
 
290
295
  start ??= dataBuckets.get(bucket);
@@ -298,17 +303,18 @@ export class MongoSyncBucketStorage implements SyncRulesBucketStorage {
298
303
  data: [],
299
304
  next_after: start
300
305
  };
306
+ targetOp = null;
307
+ }
308
+
309
+ const entry = mapOpEntry(row);
310
+
311
+ if (row.target_op != null) {
312
+ // MOVE, CLEAR
313
+ if (targetOp == null || row.target_op > targetOp) {
314
+ targetOp = row.target_op;
315
+ }
301
316
  }
302
317
 
303
- const entry: util.OplogEntry = {
304
- op_id: util.timestampToOpId(row._id.o),
305
- op: row.op,
306
- object_type: row.table,
307
- object_id: row.row_id,
308
- checksum: Number(row.checksum),
309
- subkey: `${row.source_table}/${row.source_key.toHexString()}`,
310
- data: row.data
311
- };
312
318
  currentBatch.data.push(entry);
313
319
  currentBatch.next_after = entry.op_id;
314
320
 
@@ -318,7 +324,8 @@ export class MongoSyncBucketStorage implements SyncRulesBucketStorage {
318
324
  if (currentBatch != null) {
319
325
  const yieldBatch = currentBatch;
320
326
  currentBatch = null;
321
- yield yieldBatch;
327
+ yield { batch: yieldBatch, targetOp: targetOp };
328
+ targetOp = null;
322
329
  }
323
330
  }
324
331
 
@@ -530,4 +537,8 @@ export class MongoSyncBucketStorage implements SyncRulesBucketStorage {
530
537
  }
531
538
  );
532
539
  }
540
+
541
+ async compact(options?: CompactOptions) {
542
+ return new MongoCompactor(this.db, this.group_id, options).compact();
543
+ }
533
544
  }
@@ -42,12 +42,13 @@ export interface BucketParameterDocument {
42
42
  export interface BucketDataDocument {
43
43
  _id: BucketDataKey;
44
44
  op: OpType;
45
- source_table: bson.ObjectId;
46
- source_key: bson.UUID;
47
- table: string;
48
- row_id: string;
45
+ source_table?: bson.ObjectId;
46
+ source_key?: bson.UUID;
47
+ table?: string;
48
+ row_id?: string;
49
49
  checksum: number;
50
50
  data: string | null;
51
+ target_op?: bigint | null;
51
52
  }
52
53
 
53
54
  export type OpType = 'PUT' | 'REMOVE' | 'MOVE' | 'CLEAR';
@@ -2,6 +2,9 @@ import { SqliteJsonValue } from '@powersync/service-sync-rules';
2
2
  import * as bson from 'bson';
3
3
  import * as mongo from 'mongodb';
4
4
  import * as crypto from 'crypto';
5
+ import { BucketDataDocument } from './models.js';
6
+ import { timestampToOpId } from '../../util/utils.js';
7
+ import { OplogEntry } from '../../util/protocol-types.js';
5
8
 
6
9
  /**
7
10
  * Lookup serialization must be number-agnostic. I.e. normalize numbers, instead of preserving numbers.
@@ -86,3 +89,25 @@ export const BSON_DESERIALIZE_OPTIONS: bson.DeserializeOptions = {
86
89
  // use bigint instead of Long
87
90
  useBigInt64: true
88
91
  };
92
+
93
+ export function mapOpEntry(row: BucketDataDocument): OplogEntry {
94
+ if (row.op == 'PUT' || row.op == 'REMOVE') {
95
+ return {
96
+ op_id: timestampToOpId(row._id.o),
97
+ op: row.op,
98
+ object_type: row.table,
99
+ object_id: row.row_id,
100
+ checksum: Number(row.checksum),
101
+ subkey: `${row.source_table}/${row.source_key!.toHexString()}`,
102
+ data: row.data
103
+ };
104
+ } else {
105
+ // MOVE, CLEAR
106
+
107
+ return {
108
+ op_id: timestampToOpId(row._id.o),
109
+ op: row.op,
110
+ checksum: Number(row.checksum)
111
+ };
112
+ }
113
+ }
@@ -1,4 +1,3 @@
1
- import { container } from '@powersync/lib-services-framework';
2
1
  import { Metrics } from '../metrics/Metrics.js';
3
2
 
4
3
  /**
@@ -10,12 +9,13 @@ export class RequestTracker {
10
9
 
11
10
  addOperationsSynced(operations: number) {
12
11
  this.operationsSynced += operations;
13
- container.getImplementation(Metrics).operations_synced_total.add(operations);
12
+
13
+ Metrics.getInstance().operations_synced_total.add(operations);
14
14
  }
15
15
 
16
16
  addDataSynced(bytes: number) {
17
17
  this.dataSyncedBytes += bytes;
18
18
 
19
- container.getImplementation(Metrics).data_synced_bytes.add(bytes);
19
+ Metrics.getInstance().data_synced_bytes.add(bytes);
20
20
  }
21
21
  }
@@ -1,6 +1,7 @@
1
1
  export * from './BroadcastIterable.js';
2
2
  export * from './LastValueSink.js';
3
3
  export * from './merge.js';
4
+ export * from './RequestTracker.js';
4
5
  export * from './safeRace.js';
5
6
  export * from './sync.js';
6
7
  export * from './util.js';
package/src/sync/sync.ts CHANGED
@@ -8,6 +8,7 @@ import * as storage from '../storage/storage-index.js';
8
8
  import * as util from '../util/util-index.js';
9
9
 
10
10
  import { logger } from '@powersync/lib-services-framework';
11
+ import { Metrics } from '../metrics/Metrics.js';
11
12
  import { mergeAsyncIterables } from './merge.js';
12
13
  import { TokenStreamOptions, tokenStream } from './util.js';
13
14
  import { RequestTracker } from './RequestTracker.js';
@@ -257,6 +258,9 @@ interface BucketDataBatchResult {
257
258
  async function* bucketDataBatch(request: BucketDataRequest): AsyncGenerator<BucketDataBatchResult, void> {
258
259
  const { storage, checkpoint, bucketsToFetch, dataBuckets, raw_data, binary_data, tracker, signal } = request;
259
260
 
261
+ const checkpointOp = BigInt(checkpoint);
262
+ let checkpointInvalidated = false;
263
+
260
264
  const [_, release] = await syncSemaphore.acquire();
261
265
  try {
262
266
  // Optimization: Only fetch buckets for which the checksums have changed since the last checkpoint
@@ -266,13 +270,16 @@ async function* bucketDataBatch(request: BucketDataRequest): AsyncGenerator<Buck
266
270
 
267
271
  let has_more = false;
268
272
 
269
- for await (let r of data) {
273
+ for await (let { batch: r, targetOp } of data) {
270
274
  if (signal.aborted) {
271
275
  return;
272
276
  }
273
277
  if (r.has_more) {
274
278
  has_more = true;
275
279
  }
280
+ if (targetOp != null && targetOp > checkpointOp) {
281
+ checkpointInvalidated = true;
282
+ }
276
283
  if (r.data.length == 0) {
277
284
  continue;
278
285
  }
@@ -308,12 +315,19 @@ async function* bucketDataBatch(request: BucketDataRequest): AsyncGenerator<Buck
308
315
  }
309
316
 
310
317
  if (!has_more) {
311
- const line: util.StreamingSyncCheckpointComplete = {
312
- checkpoint_complete: {
313
- last_op_id: checkpoint
314
- }
315
- };
316
- yield { data: line, done: true };
318
+ if (checkpointInvalidated) {
319
+ // Checkpoint invalidated by a CLEAR or MOVE op.
320
+ // Don't send the checkpoint_complete line in this case.
321
+ // More data should be available immediately for a new checkpoint.
322
+ yield { data: null, done: true };
323
+ } else {
324
+ const line: util.StreamingSyncCheckpointComplete = {
325
+ checkpoint_complete: {
326
+ last_op_id: checkpoint
327
+ }
328
+ };
329
+ yield { data: line, done: true };
330
+ }
317
331
  }
318
332
  } finally {
319
333
  release();
package/src/sync/util.ts CHANGED
@@ -1,6 +1,7 @@
1
1
  import * as timers from 'timers/promises';
2
2
 
3
3
  import * as util from '../util/util-index.js';
4
+ import { Metrics } from '../metrics/Metrics.js';
4
5
  import { RequestTracker } from './RequestTracker.js';
5
6
 
6
7
  export type TokenStreamOptions = {
@@ -1,6 +1,8 @@
1
+ import * as t from 'ts-codec';
1
2
  import * as yaml from 'yaml';
2
3
 
3
4
  import { configFile } from '@powersync/service-types';
5
+ import { schema } from '@powersync/lib-services-framework';
4
6
 
5
7
  import { RunnerConfig } from '../types.js';
6
8
 
@@ -21,6 +23,13 @@ export enum ConfigFileFormat {
21
23
  */
22
24
  const YAML_ENV_PREFIX = 'PS_';
23
25
 
26
+ // ts-codec itself doesn't give great validation errors, so we use json schema for that
27
+ const configSchemaValidator = schema
28
+ .parseJSONSchema(
29
+ t.generateJSONSchema(configFile.powerSyncConfig, { allowAdditional: true, parsers: [configFile.portParser] })
30
+ )
31
+ .validator();
32
+
24
33
  export abstract class ConfigCollector {
25
34
  abstract get name(): string;
26
35
 
@@ -30,6 +39,45 @@ export abstract class ConfigCollector {
30
39
  */
31
40
  abstract collectSerialized(runnerConfig: RunnerConfig): Promise<configFile.SerializedPowerSyncConfig | null>;
32
41
 
42
+ /**
43
+ * Collects the PowerSyncConfig settings.
44
+ * Validates and decodes the config.
45
+ * @returns null if this collector cannot provide a config
46
+ */
47
+ async collect(runner_config: RunnerConfig): Promise<configFile.PowerSyncConfig | null> {
48
+ const serialized = await this.collectSerialized(runner_config);
49
+ if (!serialized) {
50
+ return null;
51
+ }
52
+
53
+ /**
54
+ * After this point a serialized config has been found. Any failures to decode or validate
55
+ * will result in a hard stop.
56
+ */
57
+ const decoded = this.decode(serialized);
58
+ this.validate(decoded);
59
+ return decoded;
60
+ }
61
+
62
+ /**
63
+ * Validates input config
64
+ * ts-codec itself doesn't give great validation errors, so we use json schema for that
65
+ */
66
+ validate(config: configFile.PowerSyncConfig) {
67
+ const valid = configSchemaValidator.validate(config);
68
+ if (!valid.valid) {
69
+ throw new Error(`Failed to validate PowerSync config: ${valid.errors.join(', ')}`);
70
+ }
71
+ }
72
+
73
+ decode(encoded: configFile.SerializedPowerSyncConfig): configFile.PowerSyncConfig {
74
+ try {
75
+ return configFile.powerSyncConfig.decode(encoded);
76
+ } catch (ex) {
77
+ throw new Error(`Failed to decode PowerSync config: ${ex}`);
78
+ }
79
+ }
80
+
33
81
  protected parseContent(content: string, contentType?: ConfigFileFormat) {
34
82
  switch (contentType) {
35
83
  case ConfigFileFormat.YAML: