@powersync/service-module-mongodb 0.10.4 → 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -215,9 +215,14 @@ export class ChangeStream {
215
215
 
216
216
  async estimatedCountNumber(table: storage.SourceTable): Promise<number> {
217
217
  const db = this.client.db(table.schema);
218
- return await db.collection(table.table).estimatedDocumentCount();
218
+ return await db.collection(table.name).estimatedDocumentCount();
219
219
  }
220
220
 
221
+ /**
222
+ * This gets a LSN before starting a snapshot, which we can resume streaming from after the snapshot.
223
+ *
224
+ * This LSN can survive initial replication restarts.
225
+ */
221
226
  private async getSnapshotLsn(): Promise<string> {
222
227
  const hello = await this.defaultDb.command({ hello: 1 });
223
228
  // Basic sanity check
@@ -292,6 +297,9 @@ export class ChangeStream {
292
297
  );
293
298
  }
294
299
 
300
+ /**
301
+ * Given a snapshot LSN, validate that we can read from it, by opening a change stream.
302
+ */
295
303
  private async validateSnapshotLsn(lsn: string) {
296
304
  await using streamManager = this.openChangeStream({ lsn: lsn, maxAwaitTimeMs: 0 });
297
305
  const { stream } = streamManager;
@@ -321,7 +329,7 @@ export class ChangeStream {
321
329
  if (snapshotLsn == null) {
322
330
  // First replication attempt - get a snapshot and store the timestamp
323
331
  snapshotLsn = await this.getSnapshotLsn();
324
- await batch.setSnapshotLsn(snapshotLsn);
332
+ await batch.setResumeLsn(snapshotLsn);
325
333
  this.logger.info(`Marking snapshot at ${snapshotLsn}`);
326
334
  } else {
327
335
  this.logger.info(`Resuming snapshot at ${snapshotLsn}`);
@@ -359,11 +367,20 @@ export class ChangeStream {
359
367
  await this.snapshotTable(batch, table);
360
368
  await batch.markSnapshotDone([table], MongoLSN.ZERO.comparable);
361
369
 
362
- await touch();
370
+ this.touch();
363
371
  }
364
372
 
365
- this.logger.info(`Snapshot commit at ${snapshotLsn}`);
373
+ // The checkpoint here is a marker - we need to replicate up to at least this
374
+ // point before the data can be considered consistent.
375
+ // We could do this for each individual table, but may as well just do it once for the entire snapshot.
376
+ const checkpoint = await createCheckpoint(this.client, this.defaultDb, STANDALONE_CHECKPOINT_ID);
377
+ await batch.markSnapshotDone([], checkpoint);
378
+
379
+ // This will not create a consistent checkpoint yet, but will persist the op.
380
+ // Actual checkpoint will be created when streaming replication caught up.
366
381
  await batch.commit(snapshotLsn);
382
+
383
+ this.logger.info(`Snapshot done. Need to replicate from ${snapshotLsn} to ${checkpoint} to be consistent`);
367
384
  }
368
385
  );
369
386
  }
@@ -432,7 +449,7 @@ export class ChangeStream {
432
449
  const totalEstimatedCount = await this.estimatedCountNumber(table);
433
450
  let at = table.snapshotStatus?.replicatedCount ?? 0;
434
451
  const db = this.client.db(table.schema);
435
- const collection = db.collection(table.table);
452
+ const collection = db.collection(table.name);
436
453
  await using query = new ChunkedSnapshotQuery({
437
454
  collection,
438
455
  key: table.snapshotStatus?.lastKey,
@@ -492,7 +509,7 @@ export class ChangeStream {
492
509
  this.logger.info(
493
510
  `Replicating ${table.qualifiedName} ${table.formatSnapshotProgress()} in ${duration.toFixed(0)}ms`
494
511
  );
495
- await touch();
512
+ this.touch();
496
513
  }
497
514
  // In case the loop was interrupted, make sure we await the last promise.
498
515
  await nextChunkPromise;
@@ -656,7 +673,6 @@ export class ChangeStream {
656
673
  try {
657
674
  // If anything errors here, the entire replication process is halted, and
658
675
  // all connections automatically closed, including this one.
659
-
660
676
  await this.initReplication();
661
677
  await this.streamChanges();
662
678
  } catch (e) {
@@ -757,19 +773,20 @@ export class ChangeStream {
757
773
  }
758
774
 
759
775
  async streamChangesInternal() {
760
- // Auto-activate as soon as initial replication is done
761
- await this.storage.autoActivate();
762
-
763
776
  await this.storage.startBatch(
764
777
  {
765
778
  logger: this.logger,
766
779
  zeroLSN: MongoLSN.ZERO.comparable,
767
780
  defaultSchema: this.defaultDb.databaseName,
781
+ // We get a complete postimage for every change, so we don't need to store the current data.
768
782
  storeCurrentData: false
769
783
  },
770
784
  async (batch) => {
771
- const { lastCheckpointLsn } = batch;
772
- const lastLsn = MongoLSN.fromSerialized(lastCheckpointLsn!);
785
+ const { resumeFromLsn } = batch;
786
+ if (resumeFromLsn == null) {
787
+ throw new ReplicationAssertionError(`No LSN found to resume from`);
788
+ }
789
+ const lastLsn = MongoLSN.fromSerialized(resumeFromLsn);
773
790
  const startAfter = lastLsn?.timestamp;
774
791
 
775
792
  // It is normal for this to be a minute or two old when there is a low volume
@@ -778,7 +795,7 @@ export class ChangeStream {
778
795
 
779
796
  this.logger.info(`Resume streaming at ${startAfter?.inspect()} / ${lastLsn} | Token age: ${tokenAgeSeconds}s`);
780
797
 
781
- await using streamManager = this.openChangeStream({ lsn: lastCheckpointLsn });
798
+ await using streamManager = this.openChangeStream({ lsn: resumeFromLsn });
782
799
  const { stream, filters } = streamManager;
783
800
  if (this.abort_signal.aborted) {
784
801
  await stream.close();
@@ -797,6 +814,7 @@ export class ChangeStream {
797
814
  let splitDocument: mongo.ChangeStreamDocument | null = null;
798
815
 
799
816
  let flexDbNameWorkaroundLogged = false;
817
+ let changesSinceLastCheckpoint = 0;
800
818
 
801
819
  let lastEmptyResume = performance.now();
802
820
 
@@ -831,7 +849,7 @@ export class ChangeStream {
831
849
  if (waitForCheckpointLsn == null && performance.now() - lastEmptyResume > 60_000) {
832
850
  const { comparable: lsn, timestamp } = MongoLSN.fromResumeToken(stream.resumeToken);
833
851
  await batch.keepalive(lsn);
834
- await touch();
852
+ this.touch();
835
853
  lastEmptyResume = performance.now();
836
854
  // Log the token update. This helps as a general "replication is still active" message in the logs.
837
855
  // This token would typically be around 10s behind.
@@ -843,7 +861,7 @@ export class ChangeStream {
843
861
  continue;
844
862
  }
845
863
 
846
- await touch();
864
+ this.touch();
847
865
 
848
866
  if (startAfter != null && originalChangeDocument.clusterTime?.lte(startAfter)) {
849
867
  continue;
@@ -966,6 +984,7 @@ export class ChangeStream {
966
984
  if (didCommit) {
967
985
  this.oldestUncommittedChange = null;
968
986
  this.isStartingReplication = false;
987
+ changesSinceLastCheckpoint = 0;
969
988
  }
970
989
  } else if (
971
990
  changeDocument.operationType == 'insert' ||
@@ -988,7 +1007,21 @@ export class ChangeStream {
988
1007
  if (this.oldestUncommittedChange == null && changeDocument.clusterTime != null) {
989
1008
  this.oldestUncommittedChange = timestampToDate(changeDocument.clusterTime);
990
1009
  }
991
- await this.writeChange(batch, table, changeDocument);
1010
+ const flushResult = await this.writeChange(batch, table, changeDocument);
1011
+ changesSinceLastCheckpoint += 1;
1012
+ if (flushResult != null && changesSinceLastCheckpoint >= 20_000) {
1013
+ // When we are catching up replication after an initial snapshot, there may be a very long delay
1014
+ // before we do a commit(). In that case, we need to periodically persist the resume LSN, so
1015
+ // we don't restart from scratch if we restart replication.
1016
+ // The same could apply if we need to catch up on replication after some downtime.
1017
+ const { comparable: lsn } = new MongoLSN({
1018
+ timestamp: changeDocument.clusterTime!,
1019
+ resume_token: changeDocument._id
1020
+ });
1021
+ this.logger.info(`Updating resume LSN to ${lsn} after ${changesSinceLastCheckpoint} changes`);
1022
+ await batch.setResumeLsn(lsn);
1023
+ changesSinceLastCheckpoint = 0;
1024
+ }
992
1025
  }
993
1026
  } else if (changeDocument.operationType == 'drop') {
994
1027
  const rel = getMongoRelation(changeDocument.ns);
@@ -1036,13 +1069,18 @@ export class ChangeStream {
1036
1069
  }
1037
1070
  return Date.now() - this.oldestUncommittedChange.getTime();
1038
1071
  }
1039
- }
1040
1072
 
1041
- async function touch() {
1042
- // FIXME: The hosted Kubernetes probe does not actually check the timestamp on this.
1043
- // FIXME: We need a timeout of around 5+ minutes in Kubernetes if we do start checking the timestamp,
1044
- // or reduce PING_INTERVAL here.
1045
- return container.probes.touch();
1073
+ private lastTouchedAt = performance.now();
1074
+
1075
+ private touch() {
1076
+ if (performance.now() - this.lastTouchedAt > 1_000) {
1077
+ this.lastTouchedAt = performance.now();
1078
+ // Update the probes, but don't wait for it
1079
+ container.probes.touch().catch((e) => {
1080
+ this.logger.error(`Failed to touch the container probe: ${e.message}`, e);
1081
+ });
1082
+ }
1083
+ }
1046
1084
  }
1047
1085
 
1048
1086
  function mapChangeStreamError(e: any) {
@@ -13,7 +13,7 @@ export function getMongoRelation(source: mongo.ChangeStreamNameSpace): storage.S
13
13
  schema: source.db,
14
14
  // Not relevant for MongoDB - we use db + coll name as the identifier
15
15
  objectId: undefined,
16
- replicationColumns: [{ name: '_id' }]
16
+ replicaIdColumns: [{ name: '_id' }]
17
17
  } satisfies storage.SourceEntityDescriptor;
18
18
  }
19
19
 
@@ -22,7 +22,7 @@ export function getMongoRelation(source: mongo.ChangeStreamNameSpace): storage.S
22
22
  */
23
23
  export function getCacheIdentifier(source: storage.SourceEntityDescriptor | storage.SourceTable): string {
24
24
  if (source instanceof storage.SourceTable) {
25
- return `${source.schema}.${source.table}`;
25
+ return `${source.schema}.${source.name}`;
26
26
  }
27
27
  return `${source.schema}.${source.name}`;
28
28
  }
@@ -23,7 +23,9 @@ describe('change stream', () => {
23
23
 
24
24
  function defineChangeStreamTests(factory: storage.TestStorageFactory) {
25
25
  test('replicating basic values', async () => {
26
- await using context = await ChangeStreamTestContext.open(factory);
26
+ await using context = await ChangeStreamTestContext.open(factory, {
27
+ mongoOptions: { postImages: PostImagesOption.READ_ONLY }
28
+ });
27
29
  const { db } = context;
28
30
  await context.updateSyncRules(`
29
31
  bucket_definitions:
@@ -32,7 +34,7 @@ bucket_definitions:
32
34
  - SELECT _id as id, description, num FROM "test_data"`);
33
35
 
34
36
  await db.createCollection('test_data', {
35
- changeStreamPreAndPostImages: { enabled: false }
37
+ changeStreamPreAndPostImages: { enabled: true }
36
38
  });
37
39
  const collection = db.collection('test_data');
38
40
 
@@ -42,11 +44,8 @@ bucket_definitions:
42
44
 
43
45
  const result = await collection.insertOne({ description: 'test1', num: 1152921504606846976n });
44
46
  const test_id = result.insertedId;
45
- await setTimeout(30);
46
47
  await collection.updateOne({ _id: test_id }, { $set: { description: 'test2' } });
47
- await setTimeout(30);
48
48
  await collection.replaceOne({ _id: test_id }, { description: 'test3' });
49
- await setTimeout(30);
50
49
  await collection.deleteOne({ _id: test_id });
51
50
 
52
51
  const data = await context.getBucketData('global[]');
@@ -354,6 +353,9 @@ bucket_definitions:
354
353
  const test_id = result.insertedId.toHexString();
355
354
 
356
355
  await context.replicateSnapshot();
356
+ // Note: snapshot is only consistent some time into the streaming request.
357
+ // At the point that we get the first acknowledged checkpoint, as is required
358
+ // for getBucketData(), the data should be consistent.
357
359
  context.startStreaming();
358
360
 
359
361
  const data = await context.getBucketData('global[]');
@@ -512,10 +514,13 @@ bucket_definitions:
512
514
  await collection.insertOne({ description: 'test1', num: 1152921504606846976n });
513
515
 
514
516
  await context.replicateSnapshot();
517
+ await context.markSnapshotConsistent();
515
518
 
516
519
  // Simulate an error
517
520
  await context.storage!.reportError(new Error('simulated error'));
518
- expect((await context.factory.getActiveSyncRulesContent())?.last_fatal_error).toEqual('simulated error');
521
+ const syncRules = await context.factory.getActiveSyncRulesContent();
522
+ expect(syncRules).toBeTruthy();
523
+ expect(syncRules?.last_fatal_error).toEqual('simulated error');
519
524
 
520
525
  // startStreaming() should automatically clear the error.
521
526
  context.startStreaming();
@@ -17,7 +17,7 @@ import { MongoManager } from '@module/replication/MongoManager.js';
17
17
  import { createCheckpoint, STANDALONE_CHECKPOINT_ID } from '@module/replication/MongoRelation.js';
18
18
  import { NormalizedMongoConnectionConfig } from '@module/types/types.js';
19
19
 
20
- import { TEST_CONNECTION_OPTIONS, clearTestDb } from './util.js';
20
+ import { clearTestDb, TEST_CONNECTION_OPTIONS } from './util.js';
21
21
 
22
22
  export class ChangeStreamTestContext {
23
23
  private _walStream?: ChangeStream;
@@ -119,7 +119,20 @@ export class ChangeStreamTestContext {
119
119
 
120
120
  async replicateSnapshot() {
121
121
  await this.walStream.initReplication();
122
- await this.storage!.autoActivate();
122
+ }
123
+
124
+ /**
125
+ * A snapshot is not consistent until streaming replication has caught up.
126
+ * We simulate that for tests.
127
+ * Do not use if there are any writes performed while doing the snapshot, as that
128
+ * would result in inconsistent data.
129
+ */
130
+ async markSnapshotConsistent() {
131
+ const checkpoint = await createCheckpoint(this.client, this.db, STANDALONE_CHECKPOINT_ID);
132
+
133
+ await this.storage!.startBatch(test_utils.BATCH_OPTIONS, async (batch) => {
134
+ await batch.keepalive(checkpoint);
135
+ });
123
136
  }
124
137
 
125
138
  startStreaming() {
@@ -195,12 +208,11 @@ export async function getClientCheckpoint(
195
208
  while (Date.now() - start < timeout) {
196
209
  const storage = await storageFactory.getActiveStorage();
197
210
  const cp = await storage?.getCheckpoint();
198
- if (cp == null) {
199
- throw new Error('No sync rules available');
200
- }
201
- lastCp = cp;
202
- if (cp.lsn && cp.lsn >= lsn) {
203
- return cp.checkpoint;
211
+ if (cp != null) {
212
+ lastCp = cp;
213
+ if (cp.lsn && cp.lsn >= lsn) {
214
+ return cp.checkpoint;
215
+ }
204
216
  }
205
217
  await new Promise((resolve) => setTimeout(resolve, 30));
206
218
  }