@powersync/service-module-mongodb 0.9.1 → 0.10.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. package/CHANGELOG.md +28 -0
  2. package/dist/api/MongoRouteAPIAdapter.d.ts +1 -1
  3. package/dist/api/MongoRouteAPIAdapter.js +1 -1
  4. package/dist/api/MongoRouteAPIAdapter.js.map +1 -1
  5. package/dist/replication/ChangeStream.d.ts +26 -11
  6. package/dist/replication/ChangeStream.js +556 -300
  7. package/dist/replication/ChangeStream.js.map +1 -1
  8. package/dist/replication/ChangeStreamReplicationJob.d.ts +2 -0
  9. package/dist/replication/ChangeStreamReplicationJob.js +13 -5
  10. package/dist/replication/ChangeStreamReplicationJob.js.map +1 -1
  11. package/dist/replication/ChangeStreamReplicator.d.ts +1 -0
  12. package/dist/replication/ChangeStreamReplicator.js +21 -0
  13. package/dist/replication/ChangeStreamReplicator.js.map +1 -1
  14. package/dist/replication/MongoRelation.d.ts +1 -1
  15. package/dist/replication/MongoRelation.js +4 -0
  16. package/dist/replication/MongoRelation.js.map +1 -1
  17. package/dist/replication/MongoSnapshotQuery.d.ts +26 -0
  18. package/dist/replication/MongoSnapshotQuery.js +56 -0
  19. package/dist/replication/MongoSnapshotQuery.js.map +1 -0
  20. package/dist/replication/replication-utils.d.ts +2 -0
  21. package/dist/replication/replication-utils.js +3 -0
  22. package/dist/replication/replication-utils.js.map +1 -1
  23. package/package.json +8 -8
  24. package/src/api/MongoRouteAPIAdapter.ts +1 -1
  25. package/src/replication/ChangeStream.ts +324 -124
  26. package/src/replication/ChangeStreamReplicationJob.ts +14 -6
  27. package/src/replication/ChangeStreamReplicator.ts +23 -0
  28. package/src/replication/MongoRelation.ts +4 -1
  29. package/src/replication/MongoSnapshotQuery.ts +59 -0
  30. package/src/replication/replication-utils.ts +5 -0
  31. package/test/src/change_stream.test.ts +18 -13
  32. package/test/src/change_stream_utils.ts +45 -20
  33. package/test/src/chunked_snapshot.test.ts +153 -0
  34. package/test/src/resume.test.ts +7 -94
  35. package/test/src/resume_token.test.ts +78 -2
  36. package/test/src/resuming_snapshots.test.ts +138 -0
  37. package/test/src/slow_tests.test.ts +4 -18
  38. package/test/src/util.ts +12 -1
  39. package/tsconfig.tsbuildinfo +1 -1
@@ -2,13 +2,21 @@ import { isMongoNetworkTimeoutError, isMongoServerError, mongo } from '@powersyn
2
2
  import {
3
3
  container,
4
4
  DatabaseConnectionError,
5
+ logger as defaultLogger,
5
6
  ErrorCode,
6
- logger,
7
+ Logger,
7
8
  ReplicationAbortedError,
8
9
  ReplicationAssertionError,
9
10
  ServiceError
10
11
  } from '@powersync/lib-services-framework';
11
- import { MetricsEngine, SaveOperationTag, SourceEntityDescriptor, SourceTable, storage } from '@powersync/service-core';
12
+ import {
13
+ MetricsEngine,
14
+ RelationCache,
15
+ SaveOperationTag,
16
+ SourceEntityDescriptor,
17
+ SourceTable,
18
+ storage
19
+ } from '@powersync/service-core';
12
20
  import { DatabaseInputRow, SqliteRow, SqlSyncRules, TablePattern } from '@powersync/service-sync-rules';
13
21
  import { ReplicationMetric } from '@powersync/service-types';
14
22
  import { MongoLSN } from '../common/MongoLSN.js';
@@ -22,7 +30,8 @@ import {
22
30
  getMongoRelation,
23
31
  STANDALONE_CHECKPOINT_ID
24
32
  } from './MongoRelation.js';
25
- import { CHECKPOINTS_COLLECTION } from './replication-utils.js';
33
+ import { ChunkedSnapshotQuery } from './MongoSnapshotQuery.js';
34
+ import { CHECKPOINTS_COLLECTION, timestampToDate } from './replication-utils.js';
26
35
 
27
36
  export interface ChangeStreamOptions {
28
37
  connections: MongoManager;
@@ -36,10 +45,18 @@ export interface ChangeStreamOptions {
36
45
  * in closing the stream. To cover that case, reduce the timeout for tests.
37
46
  */
38
47
  maxAwaitTimeMS?: number;
48
+
49
+ /**
50
+ * Override snapshotChunkLength for testing.
51
+ */
52
+ snapshotChunkLength?: number;
53
+
54
+ logger?: Logger;
39
55
  }
40
56
 
41
57
  interface InitResult {
42
58
  needsInitialSync: boolean;
59
+ snapshotLsn: string | null;
43
60
  }
44
61
 
45
62
  /**
@@ -73,16 +90,32 @@ export class ChangeStream {
73
90
 
74
91
  private abort_signal: AbortSignal;
75
92
 
76
- private relation_cache = new Map<string | number, storage.SourceTable>();
93
+ private relationCache = new RelationCache(getCacheIdentifier);
94
+
95
+ /**
96
+ * Time of the oldest uncommitted change, according to the source db.
97
+ * This is used to determine the replication lag.
98
+ */
99
+ private oldestUncommittedChange: Date | null = null;
100
+ /**
101
+ * Keep track of whether we have done a commit or keepalive yet.
102
+ * We can only compute replication lag if isStartingReplication == false, or oldestUncommittedChange is present.
103
+ */
104
+ private isStartingReplication = true;
77
105
 
78
106
  private checkpointStreamId = new mongo.ObjectId();
79
107
 
108
+ private logger: Logger;
109
+
110
+ private snapshotChunkLength: number;
111
+
80
112
  constructor(options: ChangeStreamOptions) {
81
113
  this.storage = options.storage;
82
114
  this.metrics = options.metrics;
83
115
  this.group_id = options.storage.group_id;
84
116
  this.connections = options.connections;
85
117
  this.maxAwaitTimeMS = options.maxAwaitTimeMS ?? 10_000;
118
+ this.snapshotChunkLength = options.snapshotChunkLength ?? 6_000;
86
119
  this.client = this.connections.client;
87
120
  this.defaultDb = this.connections.db;
88
121
  this.sync_rules = options.storage.getParsedSyncRules({
@@ -97,6 +130,8 @@ export class ChangeStream {
97
130
  },
98
131
  { once: true }
99
132
  );
133
+
134
+ this.logger = options.logger ?? defaultLogger;
100
135
  }
101
136
 
102
137
  get stopped() {
@@ -111,10 +146,6 @@ export class ChangeStream {
111
146
  return this.connections.options.postImages == PostImagesOption.AUTO_CONFIGURE;
112
147
  }
113
148
 
114
- private get logPrefix() {
115
- return `[powersync_${this.group_id}]`;
116
- }
117
-
118
149
  /**
119
150
  * This resolves a pattern, persists the related metadata, and returns
120
151
  * the resulting SourceTables.
@@ -150,7 +181,7 @@ export class ChangeStream {
150
181
  .toArray();
151
182
 
152
183
  if (!tablePattern.isWildcard && collections.length == 0) {
153
- logger.warn(`${this.logPrefix} Collection ${schema}.${tablePattern.name} not found`);
184
+ this.logger.warn(`Collection ${schema}.${tablePattern.name} not found`);
154
185
  }
155
186
 
156
187
  for (let collection of collections) {
@@ -170,37 +201,26 @@ export class ChangeStream {
170
201
  async initSlot(): Promise<InitResult> {
171
202
  const status = await this.storage.getStatus();
172
203
  if (status.snapshot_done && status.checkpoint_lsn) {
173
- logger.info(`${this.logPrefix} Initial replication already done`);
174
- return { needsInitialSync: false };
204
+ this.logger.info(`Initial replication already done`);
205
+ return { needsInitialSync: false, snapshotLsn: null };
175
206
  }
176
207
 
177
- return { needsInitialSync: true };
208
+ return { needsInitialSync: true, snapshotLsn: status.snapshot_lsn };
178
209
  }
179
210
 
180
211
  async estimatedCount(table: storage.SourceTable): Promise<string> {
181
- const db = this.client.db(table.schema);
182
- const count = await db.collection(table.table).estimatedDocumentCount();
212
+ const count = await this.estimatedCountNumber(table);
183
213
  return `~${count}`;
184
214
  }
185
215
 
186
- /**
187
- * Start initial replication.
188
- *
189
- * If (partial) replication was done before on this slot, this clears the state
190
- * and starts again from scratch.
191
- */
192
- async startInitialReplication() {
193
- await this.storage.clear();
194
- await this.initialReplication();
216
+ async estimatedCountNumber(table: storage.SourceTable): Promise<number> {
217
+ const db = this.client.db(table.schema);
218
+ return await db.collection(table.table).estimatedDocumentCount();
195
219
  }
196
220
 
197
- async initialReplication() {
198
- const sourceTables = this.sync_rules.getSourceTables();
199
- await this.client.connect();
200
-
201
- // We need to get the snapshot time before taking the initial snapshot.
221
+ private async getSnapshotLsn(): Promise<string> {
202
222
  const hello = await this.defaultDb.command({ hello: 1 });
203
- const snapshotTime = hello.lastWrite?.majorityOpTime?.ts as mongo.Timestamp;
223
+ // Basic sanity check
204
224
  if (hello.msg == 'isdbgrid') {
205
225
  throw new ServiceError(
206
226
  ErrorCode.PSYNC_S1341,
@@ -211,33 +231,139 @@ export class ChangeStream {
211
231
  ErrorCode.PSYNC_S1342,
212
232
  'Standalone MongoDB instances are not supported - use a replicaset.'
213
233
  );
214
- } else if (snapshotTime == null) {
215
- // Not known where this would happen apart from the above cases
216
- throw new ReplicationAssertionError('MongoDB lastWrite timestamp not found.');
217
234
  }
218
235
 
236
+ // Open a change stream just to get a resume token for later use.
237
+ // We could use clusterTime from the hello command, but that won't tell us if the
238
+ // snapshot isn't valid anymore.
239
+ // If we just use the first resumeToken from the stream, we get two potential issues:
240
+ // 1. The resumeToken may just be a wrapped clusterTime, which does not detect changes
241
+ // in source db or other stream issues.
242
+ // 2. The first actual change we get may have the same clusterTime, causing us to incorrect
243
+ // skip that event.
244
+ // Instead, we create a new checkpoint document, and wait until we get that document back in the stream.
245
+ // To avoid potential race conditions with the checkpoint creation, we create a new checkpoint document
246
+ // periodically until the timeout is reached.
247
+
248
+ const LSN_TIMEOUT_SECONDS = 60;
249
+ const LSN_CREATE_INTERVAL_SECONDS = 1;
250
+
251
+ await using streamManager = this.openChangeStream({ lsn: null, maxAwaitTimeMs: 0 });
252
+ const { stream } = streamManager;
253
+ const startTime = performance.now();
254
+ let lastCheckpointCreated = -10_000;
255
+ let eventsSeen = 0;
256
+
257
+ while (performance.now() - startTime < LSN_TIMEOUT_SECONDS * 1000) {
258
+ if (performance.now() - lastCheckpointCreated >= LSN_CREATE_INTERVAL_SECONDS * 1000) {
259
+ await createCheckpoint(this.client, this.defaultDb, this.checkpointStreamId);
260
+ lastCheckpointCreated = performance.now();
261
+ }
262
+
263
+ // tryNext() doesn't block, while next() / hasNext() does block until there is data on the stream
264
+ const changeDocument = await stream.tryNext().catch((e) => {
265
+ throw mapChangeStreamError(e);
266
+ });
267
+ if (changeDocument == null) {
268
+ continue;
269
+ }
270
+
271
+ const ns = 'ns' in changeDocument && 'coll' in changeDocument.ns ? changeDocument.ns : undefined;
272
+
273
+ if (ns?.coll == CHECKPOINTS_COLLECTION && 'documentKey' in changeDocument) {
274
+ const checkpointId = changeDocument.documentKey._id as string | mongo.ObjectId;
275
+ if (!this.checkpointStreamId.equals(checkpointId)) {
276
+ continue;
277
+ }
278
+ const { comparable: lsn } = new MongoLSN({
279
+ timestamp: changeDocument.clusterTime!,
280
+ resume_token: changeDocument._id
281
+ });
282
+ return lsn;
283
+ }
284
+
285
+ eventsSeen += 1;
286
+ }
287
+
288
+ // Could happen if there is a very large replication lag?
289
+ throw new ServiceError(
290
+ ErrorCode.PSYNC_S1301,
291
+ `Timeout after while waiting for checkpoint document for ${LSN_TIMEOUT_SECONDS}s. Streamed events = ${eventsSeen}`
292
+ );
293
+ }
294
+
295
+ private async validateSnapshotLsn(lsn: string) {
296
+ await using streamManager = this.openChangeStream({ lsn: lsn, maxAwaitTimeMs: 0 });
297
+ const { stream } = streamManager;
298
+ try {
299
+ // tryNext() doesn't block, while next() / hasNext() does block until there is data on the stream
300
+ await stream.tryNext();
301
+ } catch (e) {
302
+ // Note: A timeout here is not handled as a ChangeStreamInvalidatedError, even though
303
+ // we possibly cannot recover from it.
304
+ throw mapChangeStreamError(e);
305
+ }
306
+ }
307
+
308
+ async initialReplication(snapshotLsn: string | null) {
309
+ const sourceTables = this.sync_rules.getSourceTables();
310
+ await this.client.connect();
311
+
219
312
  await this.storage.startBatch(
220
- { zeroLSN: MongoLSN.ZERO.comparable, defaultSchema: this.defaultDb.databaseName, storeCurrentData: false },
313
+ {
314
+ logger: this.logger,
315
+ zeroLSN: MongoLSN.ZERO.comparable,
316
+ defaultSchema: this.defaultDb.databaseName,
317
+ storeCurrentData: false,
318
+ skipExistingRows: true
319
+ },
221
320
  async (batch) => {
321
+ if (snapshotLsn == null) {
322
+ // First replication attempt - get a snapshot and store the timestamp
323
+ snapshotLsn = await this.getSnapshotLsn();
324
+ await batch.setSnapshotLsn(snapshotLsn);
325
+ this.logger.info(`Marking snapshot at ${snapshotLsn}`);
326
+ } else {
327
+ this.logger.info(`Resuming snapshot at ${snapshotLsn}`);
328
+ // Check that the snapshot is still valid.
329
+ await this.validateSnapshotLsn(snapshotLsn);
330
+ }
331
+
222
332
  // Start by resolving all tables.
223
333
  // This checks postImage configuration, and that should fail as
224
- // earlier as possible.
334
+ // early as possible.
225
335
  let allSourceTables: SourceTable[] = [];
226
336
  for (let tablePattern of sourceTables) {
227
337
  const tables = await this.resolveQualifiedTableNames(batch, tablePattern);
228
338
  allSourceTables.push(...tables);
229
339
  }
230
340
 
341
+ let tablesWithStatus: SourceTable[] = [];
231
342
  for (let table of allSourceTables) {
343
+ if (table.snapshotComplete) {
344
+ this.logger.info(`Skipping ${table.qualifiedName} - snapshot already done`);
345
+ continue;
346
+ }
347
+ let count = await this.estimatedCountNumber(table);
348
+ const updated = await batch.updateTableProgress(table, {
349
+ totalEstimatedCount: count
350
+ });
351
+ tablesWithStatus.push(updated);
352
+ this.relationCache.update(updated);
353
+ this.logger.info(
354
+ `To replicate: ${table.qualifiedName}: ${updated.snapshotStatus?.replicatedCount}/~${updated.snapshotStatus?.totalEstimatedCount}`
355
+ );
356
+ }
357
+
358
+ for (let table of tablesWithStatus) {
232
359
  await this.snapshotTable(batch, table);
233
360
  await batch.markSnapshotDone([table], MongoLSN.ZERO.comparable);
234
361
 
235
362
  await touch();
236
363
  }
237
364
 
238
- const { comparable: lsn } = new MongoLSN({ timestamp: snapshotTime });
239
- logger.info(`${this.logPrefix} Snapshot commit at ${snapshotTime.inspect()} / ${lsn}`);
240
- await batch.commit(lsn);
365
+ this.logger.info(`Snapshot commit at ${snapshotLsn}`);
366
+ await batch.commit(snapshotLsn);
241
367
  }
242
368
  );
243
369
  }
@@ -303,27 +429,38 @@ export class ChangeStream {
303
429
  }
304
430
 
305
431
  private async snapshotTable(batch: storage.BucketStorageBatch, table: storage.SourceTable) {
306
- logger.info(`${this.logPrefix} Replicating ${table.qualifiedName}`);
307
- const estimatedCount = await this.estimatedCount(table);
308
- let at = 0;
432
+ const totalEstimatedCount = await this.estimatedCountNumber(table);
433
+ let at = table.snapshotStatus?.replicatedCount ?? 0;
309
434
  const db = this.client.db(table.schema);
310
435
  const collection = db.collection(table.table);
311
- const cursor = collection.find({}, { batchSize: 6_000, readConcern: 'majority' });
436
+ await using query = new ChunkedSnapshotQuery({
437
+ collection,
438
+ key: table.snapshotStatus?.lastKey,
439
+ batchSize: this.snapshotChunkLength
440
+ });
441
+ if (query.lastKey != null) {
442
+ this.logger.info(
443
+ `Replicating ${table.qualifiedName} ${table.formatSnapshotProgress()} - resuming at _id > ${query.lastKey}`
444
+ );
445
+ } else {
446
+ this.logger.info(`Replicating ${table.qualifiedName} ${table.formatSnapshotProgress()}`);
447
+ }
312
448
 
313
449
  let lastBatch = performance.now();
314
- // hasNext() is the call that triggers fetching of the next batch,
315
- // then we read it with readBufferedDocuments(). This gives us semi-explicit
316
- // control over the fetching of each batch, and avoids a separate promise per document
317
- let hasNextPromise = cursor.hasNext();
318
- while (await hasNextPromise) {
319
- const docBatch = cursor.readBufferedDocuments();
450
+ let nextChunkPromise = query.nextChunk();
451
+ while (true) {
452
+ const { docs: docBatch, lastKey } = await nextChunkPromise;
453
+ if (docBatch.length == 0) {
454
+ break;
455
+ }
456
+
457
+ if (this.abort_signal.aborted) {
458
+ throw new ReplicationAbortedError(`Aborted initial replication`);
459
+ }
460
+
320
461
  // Pre-fetch next batch, so that we can read and write concurrently
321
- hasNextPromise = cursor.hasNext();
462
+ nextChunkPromise = query.nextChunk();
322
463
  for (let document of docBatch) {
323
- if (this.abort_signal.aborted) {
324
- throw new ReplicationAbortedError(`Aborted initial replication`);
325
- }
326
-
327
464
  const record = constructAfterRecord(document);
328
465
 
329
466
  // This auto-flushes when the batch reaches its size limit
@@ -337,20 +474,27 @@ export class ChangeStream {
337
474
  });
338
475
  }
339
476
 
477
+ // Important: flush before marking progress
478
+ await batch.flush();
340
479
  at += docBatch.length;
341
480
  this.metrics.getCounter(ReplicationMetric.ROWS_REPLICATED).add(docBatch.length);
481
+
482
+ table = await batch.updateTableProgress(table, {
483
+ lastKey,
484
+ replicatedCount: at,
485
+ totalEstimatedCount: totalEstimatedCount
486
+ });
487
+ this.relationCache.update(table);
488
+
342
489
  const duration = performance.now() - lastBatch;
343
490
  lastBatch = performance.now();
344
- logger.info(
345
- `${this.logPrefix} Replicating ${table.qualifiedName} ${at}/${estimatedCount} in ${duration.toFixed(0)}ms`
491
+ this.logger.info(
492
+ `Replicating ${table.qualifiedName} ${table.formatSnapshotProgress()} in ${duration.toFixed(0)}ms`
346
493
  );
347
494
  await touch();
348
495
  }
349
496
  // In case the loop was interrupted, make sure we await the last promise.
350
- await hasNextPromise;
351
-
352
- await batch.flush();
353
- logger.info(`${this.logPrefix} Replicated ${at} documents for ${table.qualifiedName}`);
497
+ await nextChunkPromise;
354
498
  }
355
499
 
356
500
  private async getRelation(
@@ -358,8 +502,7 @@ export class ChangeStream {
358
502
  descriptor: SourceEntityDescriptor,
359
503
  options: { snapshot: boolean }
360
504
  ): Promise<SourceTable> {
361
- const cacheId = getCacheIdentifier(descriptor);
362
- const existing = this.relation_cache.get(cacheId);
505
+ const existing = this.relationCache.get(descriptor);
363
506
  if (existing != null) {
364
507
  return existing;
365
508
  }
@@ -399,7 +542,7 @@ export class ChangeStream {
399
542
  collMod: collectionInfo.name,
400
543
  changeStreamPreAndPostImages: { enabled: true }
401
544
  });
402
- logger.info(`${this.logPrefix} Enabled postImages on ${db}.${collectionInfo.name}`);
545
+ this.logger.info(`Enabled postImages on ${db}.${collectionInfo.name}`);
403
546
  } else if (!enabled) {
404
547
  throw new ServiceError(ErrorCode.PSYNC_S1343, `postImages not enabled on ${db}.${collectionInfo.name}`);
405
548
  }
@@ -425,12 +568,12 @@ export class ChangeStream {
425
568
  entity_descriptor: descriptor,
426
569
  sync_rules: this.sync_rules
427
570
  });
428
- this.relation_cache.set(getCacheIdentifier(descriptor), result.table);
571
+ this.relationCache.update(result.table);
429
572
 
430
573
  // Drop conflicting collections.
431
574
  // This is generally not expected for MongoDB source dbs, so we log an error.
432
575
  if (result.dropTables.length > 0) {
433
- logger.error(
576
+ this.logger.error(
434
577
  `Conflicting collections found for ${JSON.stringify(descriptor)}. Dropping: ${result.dropTables.map((t) => t.id).join(', ')}`
435
578
  );
436
579
  await batch.drop(result.dropTables);
@@ -442,7 +585,7 @@ export class ChangeStream {
442
585
  // 3. The table is used in sync rules.
443
586
  const shouldSnapshot = snapshot && !result.table.snapshotComplete && result.table.syncAny;
444
587
  if (shouldSnapshot) {
445
- logger.info(`${this.logPrefix} New collection: ${descriptor.schema}.${descriptor.name}`);
588
+ this.logger.info(`New collection: ${descriptor.schema}.${descriptor.name}`);
446
589
  // Truncate this table, in case a previous snapshot was interrupted.
447
590
  await batch.truncate([result.table]);
448
591
 
@@ -462,7 +605,7 @@ export class ChangeStream {
462
605
  change: mongo.ChangeStreamDocument
463
606
  ): Promise<storage.FlushedResult | null> {
464
607
  if (!table.syncAny) {
465
- logger.debug(`${this.logPrefix} Collection ${table.qualifiedName} not used in sync rules - skipping`);
608
+ this.logger.debug(`Collection ${table.qualifiedName} not used in sync rules - skipping`);
466
609
  return null;
467
610
  }
468
611
 
@@ -525,7 +668,11 @@ export class ChangeStream {
525
668
  const result = await this.initSlot();
526
669
  await this.setupCheckpointsCollection();
527
670
  if (result.needsInitialSync) {
528
- await this.startInitialReplication();
671
+ if (result.snapshotLsn == null) {
672
+ // Snapshot LSN is not present, so we need to start replication from scratch.
673
+ await this.storage.clear({ signal: this.abort_signal });
674
+ }
675
+ await this.initialReplication(result.snapshotLsn);
529
676
  }
530
677
  }
531
678
 
@@ -544,73 +691,99 @@ export class ChangeStream {
544
691
  }
545
692
  }
546
693
 
694
+ private openChangeStream(options: { lsn: string | null; maxAwaitTimeMs?: number }) {
695
+ const lastLsn = options.lsn ? MongoLSN.fromSerialized(options.lsn) : null;
696
+ const startAfter = lastLsn?.timestamp;
697
+ const resumeAfter = lastLsn?.resumeToken;
698
+
699
+ const filters = this.getSourceNamespaceFilters();
700
+
701
+ const pipeline: mongo.Document[] = [
702
+ {
703
+ $match: filters.$match
704
+ },
705
+ { $changeStreamSplitLargeEvent: {} }
706
+ ];
707
+
708
+ let fullDocument: 'required' | 'updateLookup';
709
+
710
+ if (this.usePostImages) {
711
+ // 'read_only' or 'auto_configure'
712
+ // Configuration happens during snapshot, or when we see new
713
+ // collections.
714
+ fullDocument = 'required';
715
+ } else {
716
+ fullDocument = 'updateLookup';
717
+ }
718
+
719
+ const streamOptions: mongo.ChangeStreamOptions = {
720
+ showExpandedEvents: true,
721
+ maxAwaitTimeMS: options.maxAwaitTimeMs ?? this.maxAwaitTimeMS,
722
+ fullDocument: fullDocument
723
+ };
724
+
725
+ /**
726
+ * Only one of these options can be supplied at a time.
727
+ */
728
+ if (resumeAfter) {
729
+ streamOptions.resumeAfter = resumeAfter;
730
+ } else {
731
+ // Legacy: We don't persist lsns without resumeTokens anymore, but we do still handle the
732
+ // case if we have an old one.
733
+ streamOptions.startAtOperationTime = startAfter;
734
+ }
735
+
736
+ let stream: mongo.ChangeStream<mongo.Document>;
737
+ if (filters.multipleDatabases) {
738
+ // Requires readAnyDatabase@admin on Atlas
739
+ stream = this.client.watch(pipeline, streamOptions);
740
+ } else {
741
+ // Same general result, but requires less permissions than the above
742
+ stream = this.defaultDb.watch(pipeline, streamOptions);
743
+ }
744
+
745
+ this.abort_signal.addEventListener('abort', () => {
746
+ stream.close();
747
+ });
748
+
749
+ return {
750
+ stream,
751
+ filters,
752
+ [Symbol.asyncDispose]: async () => {
753
+ return stream.close();
754
+ }
755
+ };
756
+ }
757
+
547
758
  async streamChangesInternal() {
548
759
  // Auto-activate as soon as initial replication is done
549
760
  await this.storage.autoActivate();
550
761
 
551
762
  await this.storage.startBatch(
552
- { zeroLSN: MongoLSN.ZERO.comparable, defaultSchema: this.defaultDb.databaseName, storeCurrentData: false },
763
+ {
764
+ logger: this.logger,
765
+ zeroLSN: MongoLSN.ZERO.comparable,
766
+ defaultSchema: this.defaultDb.databaseName,
767
+ storeCurrentData: false
768
+ },
553
769
  async (batch) => {
554
770
  const { lastCheckpointLsn } = batch;
555
- const lastLsn = lastCheckpointLsn ? MongoLSN.fromSerialized(lastCheckpointLsn) : null;
771
+ const lastLsn = MongoLSN.fromSerialized(lastCheckpointLsn!);
556
772
  const startAfter = lastLsn?.timestamp;
557
- const resumeAfter = lastLsn?.resumeToken;
558
-
559
- logger.info(`${this.logPrefix} Resume streaming at ${startAfter?.inspect()} / ${lastLsn}`);
560
-
561
- const filters = this.getSourceNamespaceFilters();
562
-
563
- const pipeline: mongo.Document[] = [
564
- {
565
- $match: filters.$match
566
- },
567
- { $changeStreamSplitLargeEvent: {} }
568
- ];
569
-
570
- let fullDocument: 'required' | 'updateLookup';
571
-
572
- if (this.usePostImages) {
573
- // 'read_only' or 'auto_configure'
574
- // Configuration happens during snapshot, or when we see new
575
- // collections.
576
- fullDocument = 'required';
577
- } else {
578
- fullDocument = 'updateLookup';
579
- }
580
773
 
581
- const streamOptions: mongo.ChangeStreamOptions = {
582
- showExpandedEvents: true,
583
- maxAwaitTimeMS: this.maxAwaitTimeMS,
584
- fullDocument: fullDocument
585
- };
586
-
587
- /**
588
- * Only one of these options can be supplied at a time.
589
- */
590
- if (resumeAfter) {
591
- streamOptions.resumeAfter = resumeAfter;
592
- } else {
593
- streamOptions.startAtOperationTime = startAfter;
594
- }
774
+ // It is normal for this to be a minute or two old when there is a low volume
775
+ // of ChangeStream events.
776
+ const tokenAgeSeconds = Math.round((Date.now() - timestampToDate(startAfter).getTime()) / 1000);
595
777
 
596
- let stream: mongo.ChangeStream<mongo.Document>;
597
- if (filters.multipleDatabases) {
598
- // Requires readAnyDatabase@admin on Atlas
599
- stream = this.client.watch(pipeline, streamOptions);
600
- } else {
601
- // Same general result, but requires less permissions than the above
602
- stream = this.defaultDb.watch(pipeline, streamOptions);
603
- }
778
+ this.logger.info(`Resume streaming at ${startAfter?.inspect()} / ${lastLsn} | Token age: ${tokenAgeSeconds}s`);
604
779
 
780
+ await using streamManager = this.openChangeStream({ lsn: lastCheckpointLsn });
781
+ const { stream, filters } = streamManager;
605
782
  if (this.abort_signal.aborted) {
606
- stream.close();
783
+ await stream.close();
607
784
  return;
608
785
  }
609
786
 
610
- this.abort_signal.addEventListener('abort', () => {
611
- stream.close();
612
- });
613
-
614
787
  // Always start with a checkpoint.
615
788
  // This helps us to clear errors when restarting, even if there is
616
789
  // no data to replicate.
@@ -655,10 +828,16 @@ export class ChangeStream {
655
828
  // We add an additional check for waitForCheckpointLsn == null, to make sure we're not
656
829
  // doing a keepalive in the middle of a transaction.
657
830
  if (waitForCheckpointLsn == null && performance.now() - lastEmptyResume > 60_000) {
658
- const { comparable: lsn } = MongoLSN.fromResumeToken(stream.resumeToken);
831
+ const { comparable: lsn, timestamp } = MongoLSN.fromResumeToken(stream.resumeToken);
659
832
  await batch.keepalive(lsn);
660
833
  await touch();
661
834
  lastEmptyResume = performance.now();
835
+ // Log the token update. This helps as a general "replication is still active" message in the logs.
836
+ // This token would typically be around 10s behind.
837
+ this.logger.info(
838
+ `Idle change stream. Persisted resumeToken for ${timestampToDate(timestamp).toISOString()}`
839
+ );
840
+ this.isStartingReplication = false;
662
841
  }
663
842
  continue;
664
843
  }
@@ -711,8 +890,8 @@ export class ChangeStream {
711
890
 
712
891
  if (!flexDbNameWorkaroundLogged) {
713
892
  flexDbNameWorkaroundLogged = true;
714
- logger.warn(
715
- `${this.logPrefix} Incorrect DB name in change stream: ${changeDocument.ns.db}. Changed to ${this.defaultDb.databaseName}.`
893
+ this.logger.warn(
894
+ `Incorrect DB name in change stream: ${changeDocument.ns.db}. Changed to ${this.defaultDb.databaseName}.`
716
895
  );
717
896
  }
718
897
  }
@@ -771,7 +950,12 @@ export class ChangeStream {
771
950
  if (waitForCheckpointLsn != null && lsn >= waitForCheckpointLsn) {
772
951
  waitForCheckpointLsn = null;
773
952
  }
774
- await batch.commit(lsn);
953
+ const didCommit = await batch.commit(lsn, { oldestUncommittedChange: this.oldestUncommittedChange });
954
+
955
+ if (didCommit) {
956
+ this.oldestUncommittedChange = null;
957
+ this.isStartingReplication = false;
958
+ }
775
959
  } else if (
776
960
  changeDocument.operationType == 'insert' ||
777
961
  changeDocument.operationType == 'update' ||
@@ -790,6 +974,9 @@ export class ChangeStream {
790
974
  snapshot: true
791
975
  });
792
976
  if (table.syncAny) {
977
+ if (this.oldestUncommittedChange == null && changeDocument.clusterTime != null) {
978
+ this.oldestUncommittedChange = timestampToDate(changeDocument.clusterTime);
979
+ }
793
980
  await this.writeChange(batch, table, changeDocument);
794
981
  }
795
982
  } else if (changeDocument.operationType == 'drop') {
@@ -800,7 +987,7 @@ export class ChangeStream {
800
987
  });
801
988
  if (table.syncAny) {
802
989
  await batch.drop([table]);
803
- this.relation_cache.delete(getCacheIdentifier(rel));
990
+ this.relationCache.delete(table);
804
991
  }
805
992
  } else if (changeDocument.operationType == 'rename') {
806
993
  const relFrom = getMongoRelation(changeDocument.ns);
@@ -811,7 +998,7 @@ export class ChangeStream {
811
998
  });
812
999
  if (tableFrom.syncAny) {
813
1000
  await batch.drop([tableFrom]);
814
- this.relation_cache.delete(getCacheIdentifier(relFrom));
1001
+ this.relationCache.delete(relFrom);
815
1002
  }
816
1003
  // Here we do need to snapshot the new table
817
1004
  const collection = await this.getCollectionInfo(relTo.schema, relTo.name);
@@ -825,6 +1012,19 @@ export class ChangeStream {
825
1012
  }
826
1013
  );
827
1014
  }
1015
+
1016
+ async getReplicationLagMillis(): Promise<number | undefined> {
1017
+ if (this.oldestUncommittedChange == null) {
1018
+ if (this.isStartingReplication) {
1019
+ // We don't have anything to compute replication lag with yet.
1020
+ return undefined;
1021
+ } else {
1022
+ // We don't have any uncommitted changes, so replication is up-to-date.
1023
+ return 0;
1024
+ }
1025
+ }
1026
+ return Date.now() - this.oldestUncommittedChange.getTime();
1027
+ }
828
1028
  }
829
1029
 
830
1030
  async function touch() {